func BleveMetaExtra(m map[string]interface{}) { br := make(map[string]map[string][]string) t, i := bleveRegistry.AnalyzerTypesAndInstances() br["Analyzer"] = map[string][]string{"types": t, "instances": i} t, i = bleveRegistry.ByteArrayConverterTypesAndInstances() br["ByteArrayConverter"] = map[string][]string{"types": t, "instances": i} t, i = bleveRegistry.CharFilterTypesAndInstances() br["CharFilter"] = map[string][]string{"types": t, "instances": i} t, i = bleveRegistry.DateTimeParserTypesAndInstances() br["DateTimeParser"] = map[string][]string{"types": t, "instances": i} t, i = bleveRegistry.FragmentFormatterTypesAndInstances() br["FragmentFormatter"] = map[string][]string{"types": t, "instances": i} t, i = bleveRegistry.FragmenterTypesAndInstances() br["Fragmenter"] = map[string][]string{"types": t, "instances": i} t, i = bleveRegistry.HighlighterTypesAndInstances() br["Highlighter"] = map[string][]string{"types": t, "instances": i} t, i = bleveRegistry.KVStoreTypesAndInstances() br["KVStore"] = map[string][]string{"types": t, "instances": i} t, i = bleveRegistry.TokenFilterTypesAndInstances() br["TokenFilter"] = map[string][]string{"types": t, "instances": i} t, i = bleveRegistry.TokenMapTypesAndInstances() br["TokenMap"] = map[string][]string{"types": t, "instances": i} t, i = bleveRegistry.TokenizerTypesAndInstances() br["Tokenizer"] = map[string][]string{"types": t, "instances": i} m["regBleve"] = br }
func printRegistry() { types, instances := registry.CharFilterTypesAndInstances() printType("Char Filter", types, instances) types, instances = registry.TokenizerTypesAndInstances() printType("Tokenizer", types, instances) types, instances = registry.TokenMapTypesAndInstances() printType("Token Map", types, instances) types, instances = registry.TokenFilterTypesAndInstances() printType("Token Filter", types, instances) types, instances = registry.AnalyzerTypesAndInstances() printType("Analyzer", types, instances) types, instances = registry.DateTimeParserTypesAndInstances() printType("Date Time Parser", types, instances) types, instances = registry.KVStoreTypesAndInstances() printType("KV Store", types, instances) types, instances = registry.ByteArrayConverterTypesAndInstances() printType("ByteArrayConverter", types, instances) types, instances = registry.FragmentFormatterTypesAndInstances() printType("Fragment Formatter", types, instances) types, instances = registry.FragmenterTypesAndInstances() printType("Fragmenter", types, instances) types, instances = registry.HighlighterTypesAndInstances() printType("Highlighter", types, instances) }
func ListAnalyzerNames(w http.ResponseWriter, req *http.Request) { indexMapping := bleve.NewIndexMapping() // read the request body requestBody, err := ioutil.ReadAll(req.Body) if err != nil { showError(w, req, fmt.Sprintf("error reading request body: %v", err), 400) return } // interpret request body as index mapping if len(requestBody) > 0 { requestBody, err = CleanseJSON(requestBody) if err != nil { showError(w, req, fmt.Sprintf("error preparing index mapping: %v", err), 400) return } err = json.Unmarshal(requestBody, &indexMapping) if err != nil { showError(w, req, fmt.Sprintf("error parsing index mapping: %v", err), 400) return } } // built in analyzer names _, analyzerNames := registry.AnalyzerTypesAndInstances() // add custom analyzer names for name := range indexMapping.CustomAnalysis.Analyzers { analyzerNames = append(analyzerNames, name) } sort.Strings(analyzerNames) rv := struct { Status string `json:"status"` Analyzers []string `json:"analyzers"` }{ Status: "ok", Analyzers: analyzerNames, } mustEncode(w, rv) }
return nil }, Run: func(cmd *cobra.Command, args []string) { types, instances := registry.CharFilterTypesAndInstances() printType("Char Filter", types, instances) types, instances = registry.TokenizerTypesAndInstances() printType("Tokenizer", types, instances) types, instances = registry.TokenMapTypesAndInstances() printType("Token Map", types, instances) types, instances = registry.TokenFilterTypesAndInstances() printType("Token Filter", types, instances) types, instances = registry.AnalyzerTypesAndInstances() printType("Analyzer", types, instances) types, instances = registry.DateTimeParserTypesAndInstances() printType("Date Time Parser", types, instances) types, instances = registry.KVStoreTypesAndInstances() printType("KV Store", types, instances) types, instances = registry.FragmentFormatterTypesAndInstances() printType("Fragment Formatter", types, instances) types, instances = registry.FragmenterTypesAndInstances() printType("Fragmenter", types, instances) types, instances = registry.HighlighterTypesAndInstances()
func main() { flag.Parse() if *path == "" { log.Fatal("provide a zim file path") } z, err := zim.NewReader(*path, false) if err != nil { log.Fatal(err) } if *indexPath == "" { log.Fatal("Please provide a path for the index") } switch *lang { case "en": //TODO: create a simple language support for stop word default: log.Fatal("unsupported language") } mapping := bleve.NewIndexMapping() mapping.DefaultType = "Article" articleMapping := bleve.NewDocumentMapping() mapping.AddDocumentMapping("Article", articleMapping) titleMapping := bleve.NewTextFieldMapping() titleMapping.Store = false titleMapping.Index = true titleMapping.Analyzer = "standard" articleMapping.AddFieldMappingsAt("Title", titleMapping) fmt.Println(registry.AnalyzerTypesAndInstances()) index, err := bleve.New(*indexPath, mapping) if err != nil { log.Fatal(err) } i := 0 batch := index.NewBatch() batchCount := 0 idoc := ArticleIndex{} divisor := float64(z.ArticleCount) / 100 z.ListTitlesPtrIterator(func(idx uint32) { if i%*batchSize == 0 { fmt.Printf("%.2f%% done\n", float64(i)/divisor) } a, err := z.ArticleAtURLIdx(idx) if err != nil || a.EntryType == zim.DeletedEntry { i++ return } if a.Namespace == 'A' { idoc.Title = a.Title // index the idoc with the idx as key batch.Index(fmt.Sprint(idx), idoc) batchCount++ } i++ // send a batch to bleve if batchCount >= *batchSize { err = index.Batch(batch) if err != nil { log.Fatal(err.Error()) } batch = index.NewBatch() batchCount = 0 } }) // batch the rest if batchCount > 0 { err = index.Batch(batch) if err != nil { log.Fatal(err.Error()) } } index.Close() }