func main() { flag.Parse() if *indexPath == "" { log.Fatal("must specify index path") } // create a new default mapping mapping := bleve.NewIndexMapping() if *mappingFile != "" { mappingBytes, err := ioutil.ReadFile(*mappingFile) if err != nil { log.Fatal(err) } err = json.Unmarshal(mappingBytes, &mapping) if err != nil { log.Fatal(err) } } // create the index index, err := bleve.NewUsing(*indexPath, mapping, *indexType, *storeType, nil) if err != nil { log.Fatal(err) } defer func() { cerr := index.Close() if cerr != nil { log.Fatalf("error closing index: %v", err) } }() log.Printf("Created bleve index at: %s", *indexPath) }
func init() { bleve.Config.DefaultKVStore = gtreap.Name bleveHttp.SetLog(log.New(os.Stderr, "bleve.http ", log.LstdFlags)) router := mux.NewRouter() router.StrictSlash(true) listIndexesHandler := bleveHttp.NewListIndexesHandler() router.Handle("/api", listIndexesHandler).Methods("GET") docCountHandler := bleveHttp.NewDocCountHandler("") docCountHandler.IndexNameLookup = indexNameLookup router.Handle("/api/{indexName}/_count", docCountHandler).Methods("GET") searchHandler := bleveHttp.NewSearchHandler("") searchHandler.IndexNameLookup = indexNameLookup router.Handle("/api/{indexName}/_search", searchHandler).Methods("POST") http.Handle("/", &CORSWrapper{router}) log.Printf("opening indexes") // walk the data dir and register index names dirEntries, err := ioutil.ReadDir(indexDir) if err != nil { log.Printf("error reading data dir: %v", err) return } for _, dirInfo := range dirEntries { indexPath := indexDir + string(os.PathSeparator) + dirInfo.Name() if dirInfo.IsDir() { log.Printf("see directory %s, this is not supported in the appengine environment", dirInfo.Name()) } else { log.Printf("preloading index export %s", dirInfo.Name()) // assume file in this dir is actually a bleve export i, err := bleve.NewUsing( "", bleve.NewIndexMapping(), bleve.Config.DefaultIndexType, preload.Name, map[string]interface{}{ "kvStoreName_actual": gtreap.Name, "preloadpath": indexPath, }) if err != nil { log.Printf("error preloading index %s: %v", indexPath, err) } else { i.SetName(dirInfo.Name()) log.Printf("registered index: %s", dirInfo.Name()) bleveHttp.RegisterIndexName(dirInfo.Name(), i) } } } }
func NewBlevePIndexImpl(indexType, indexParams, path string, restart func()) (cbgt.PIndexImpl, cbgt.Dest, error) { bleveParams := NewBleveParams() if len(indexParams) > 0 { err := json.Unmarshal([]byte(indexParams), bleveParams) if err != nil { return nil, nil, fmt.Errorf("bleve: parse params, err: %v", err) } } kvStoreName, ok := bleveParams.Store["kvStoreName"].(string) if !ok || kvStoreName == "" { kvStoreName = bleve.Config.DefaultKVStore } kvConfig := map[string]interface{}{ "create_if_missing": true, "error_if_exists": true, } for k, v := range bleveParams.Store { kvConfig[k] = v } // Always use the "metrics" wrapper KVStore if it's available and // also not already configured. _, exists := kvConfig["kvStoreName_actual"] if !exists && kvStoreName != "metrics" && bleveRegistry.KVStoreConstructorByName("metrics") != nil { kvConfig["kvStoreName_actual"] = kvStoreName kvStoreName = "metrics" } bleveIndexType, ok := bleveParams.Store["indexType"].(string) if !ok || bleveIndexType == "" { bleveIndexType = bleve.Config.DefaultIndexType } bindex, err := bleve.NewUsing(path, &bleveParams.Mapping, bleveIndexType, kvStoreName, kvConfig) if err != nil { return nil, nil, fmt.Errorf("bleve: new index, path: %s,"+ " kvStoreName: %s, kvConfig: %#v, err: %s", path, kvStoreName, kvConfig, err) } pathMeta := path + string(os.PathSeparator) + "PINDEX_BLEVE_META" err = ioutil.WriteFile(pathMeta, []byte(indexParams), 0600) if err != nil { return nil, nil, err } return bindex, &cbgt.DestForwarder{ DestProvider: NewBleveDest(path, bindex, restart), }, nil }
func createOpenIndex(path string) (bleve.Index, error) { index, err := bleve.Open(path) if err == bleve.ErrorIndexPathDoesNotExist { jww.INFO.Println("Creating Index: ", path) indexMapping, err := buildIndexMapping() if err != nil { return nil, err } index, err = bleve.NewUsing(path, indexMapping, bleve.Config.DefaultIndexType, goleveldb.Name, nil) if err != nil { return nil, err } } else if err != nil { return nil, err } else { jww.INFO.Println("Opening Index: ", path) } return index, nil }
func NewBleve(path string, size int64, logger webfw.Logger) (content.SearchProvider, error) { var err error var exists bool var index bleve.Index _, err = os.Stat(path) if err == nil { logger.Infoln("Opening search index " + path) index, err = bleve.Open(path) if err != nil { return nil, errors.New(fmt.Sprintf("Error opening search index: %v\n", err)) } exists = true } else if os.IsNotExist(err) { mapping := bleve.NewIndexMapping() docMapping := bleve.NewDocumentMapping() idfieldmapping := bleve.NewTextFieldMapping() idfieldmapping.IncludeInAll = false docMapping.AddFieldMappingsAt("FeedId", idfieldmapping) docMapping.AddFieldMappingsAt("ArticleId", idfieldmapping) mapping.AddDocumentMapping(mapping.DefaultType, docMapping) logger.Infoln("Creating search index " + path) index, err = bleve.NewUsing(path, mapping, upside_down.Name, goleveldb.Name, nil) if err != nil { return nil, errors.New(fmt.Sprintf("Error creating search index: %v\n", err)) } } else { return nil, errors.New( fmt.Sprintf("Error getting stat of '%s': %v\n", path, err)) } return &Bleve{logger: logger, index: index, batchSize: size, newIndex: !exists}, nil }
func runConfig(conf string, tar string, cpu string) []*Line { if cpu != "" { f, err := os.Create(cpu) if err != nil { log.Fatal(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } start := time.Now() wikiReader, err := blevebench.NewWikiReader(*source) if err != nil { log.Fatal(err) } defer wikiReader.Close() mapping := blevebench.BuildArticleMapping() benchConfig := blevebench.LoadConfigFile(conf) fmt.Printf("Using Index Type: %s\n", benchConfig.IndexType) fmt.Printf("Using KV store: %s\n", benchConfig.KVStore) fmt.Printf("Using KV config: %#v\n", benchConfig.KVConfig) index, err := bleve.NewUsing(tar, mapping, benchConfig.IndexType, benchConfig.KVStore, benchConfig.KVConfig) if err != nil { log.Fatal(err) } _, store, err := index.Advanced() if err != nil { log.Fatal(err) } itr := *count / (*level) lines := NewLines(itr, len(typename), conf, typename) tot := 0 // print header fmt.Printf("elapsed,docs,avg_single_doc_ms,avg_batched_doc_ms,query_water_matches,first_query_water_ms,avg_repeated%d_query_water_ms", *qrepeat) printOtherHeader(store) fmt.Printf("\n") singleCount := 0 var singleTime time.Duration batchCount := 0 var batchTime time.Duration batch := index.NewBatch() for i := 1; i < (*count)+1; i++ { leveli := i % *level a, err := wikiReader.Next() if err != nil { log.Fatal(err) } if leveli < *batchSize { // index single singleStart := time.Now() err = index.Index(a.Title, a) if err != nil { log.Fatalf("error indexing: %v", err) } duration := time.Since(singleStart) singleCount++ singleTime += duration } else { // add to batch batch.Index(a.Title, a) // if batch is full index it if batch.Size() == *batchSize { batchStart := time.Now() err := index.Batch(batch) if err != nil { log.Fatalf("error executing batch: %v", err) } duration := time.Since(batchStart) batchCount++ batchTime += duration // reset batch batch = index.NewBatch() } } if leveli == 0 { // run some queries termQueryCount := 0 termQueryStart := time.Now() termQuery := bleve.NewTermQuery("water") termQuery.SetField("text") termSearch := bleve.NewSearchRequest(termQuery) searchResults, err := index.Search(termSearch) if err != nil { log.Fatalf("error searching: %v", err) } termQueryCount++ termQueryTime := time.Since(termQueryStart) firstQueryTime := float64(termQueryTime) for termQueryCount < *qrepeat { termQueryStart = time.Now() searchResults, err = index.Search(termSearch) if err != nil { log.Fatal(err) } termQueryCount++ termQueryTime += time.Since(termQueryStart) } // print stats avgSingleDocTime := float64(singleTime) / float64(singleCount) avgBatchTime := float64(batchTime) / float64(batchCount) avgBatchDocTime := float64(avgBatchTime) / float64(*batchSize) avgQueryTime := float64(termQueryTime) / float64(termQueryCount) elapsedTime := time.Since(start) / time.Millisecond fmt.Printf("%d,%d,%f,%f,%d,%f,%f", elapsedTime, i, avgSingleDocTime/float64(time.Millisecond), avgBatchDocTime/float64(time.Millisecond), searchResults.Total, firstQueryTime/float64(time.Millisecond), avgQueryTime/float64(time.Millisecond)) printOther(store) lines[0].Pt[tot].Y = avgSingleDocTime / float64(time.Millisecond) lines[0].Pt[tot].X = float64(i) lines[1].Pt[tot].Y = avgBatchDocTime / float64(time.Millisecond) lines[1].Pt[tot].X = float64(i) lines[2].Pt[tot].Y = firstQueryTime / float64(time.Millisecond) lines[2].Pt[tot].X = float64(i) lines[3].Pt[tot].Y = avgQueryTime / float64(time.Millisecond) lines[3].Pt[tot].X = float64(i) tot++ fmt.Printf("\n") // reset stats singleCount = 0 singleTime = 0 batchCount = 0 batchTime = 0 // dump mem stats if requested if *memprofile != "" { f, err := os.Create(strconv.Itoa(i) + "-" + *memprofile) if err != nil { log.Fatal(err) } pprof.WriteHeapProfile(f) } } } return lines }
Long: `The create command will create a new empty index.`, PersistentPreRunE: func(cmd *cobra.Command, args []string) error { // override RootCmd version which opens existing index if len(args) < 1 { return fmt.Errorf("must specify path to index") } return nil }, RunE: func(cmd *cobra.Command, args []string) error { var mapping mapping.IndexMapping var err error mapping, err = buildMapping() if err != nil { return fmt.Errorf("error building mapping: %v", err) } idx, err = bleve.NewUsing(args[0], mapping, indexType, storeType, nil) if err != nil { return fmt.Errorf("error creating index: %v", err) } // the inheritted Post action will close the index return nil }, } func buildMapping() (mapping.IndexMapping, error) { mapping := mapping.NewIndexMapping() if mappingPath != "" { mappingBytes, err := ioutil.ReadFile(mappingPath) if err != nil { return nil, err }
func main() { flag.Parse() go http.ListenAndServe(*bindHttp, nil) if *cpuprofile != "" { f, err := os.Create(*cpuprofile) if err != nil { log.Fatal(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } bleve.Config.SetAnalysisQueueSize(*numAnalyzers) mapping := blevebench.BuildArticleMapping() benchConfig := blevebench.LoadConfigFile(*config) fmt.Printf("Using Index Type: %s\n", benchConfig.IndexType) fmt.Printf("Using KV store: %s\n", benchConfig.KVStore) fmt.Printf("Using KV config: %#v\n", benchConfig.KVConfig) index, err := bleve.NewUsing(*target, mapping, benchConfig.IndexType, benchConfig.KVStore, benchConfig.KVConfig) if err != nil { log.Fatal(err) } start = time.Now() work := make(chan *Work) // start reading worker go readingWorker(index, work) // start print time worker if *printTime > 0 { go printTimeWorker() } //start workers var wg sync.WaitGroup for i := 0; i < *numIndexers; i++ { wg.Add(1) go func() { batchIndexingWorker(index, work, start) wg.Done() }() } wg.Wait() end := time.Now() timeTaken := end.Sub(start) mb := float64(totalPlainTextIndexed) / 1000000.0 seconds := float64(timeTaken) / float64(time.Second) log.Printf("Result: %d bytes in %d seconds = %fMB/s", totalPlainTextIndexed, timeTaken/time.Second, mb/seconds) s := index.Stats() statsBytes, err := json.Marshal(s) if err != nil { log.Fatal(err) } log.Printf("stats: %s", string(statsBytes)) }
func main() { flag.Parse() go http.ListenAndServe(*bindHttp, nil) if *cpuprofile != "" { f, err := os.Create(*cpuprofile) if err != nil { log.Fatal(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } bleve.Config.SetAnalysisQueueSize(8) mapping := blevebench.BuildArticleMapping() benchConfig := blevebench.LoadConfigFile(*config) fmt.Printf("Using Index Type: %s\n", benchConfig.IndexType) fmt.Printf("Using KV store: %s\n", benchConfig.KVStore) fmt.Printf("Using KV config: %#v\n", benchConfig.KVConfig) index, err := bleve.NewUsing(*target, mapping, benchConfig.IndexType, benchConfig.KVStore, benchConfig.KVConfig) if err != nil { log.Fatal(err) } // start reading worker indexWorker(index) resetChan := make(chan bool) if *printTime > 0 { go printTimeWorker(resetChan) } for s, h := range queryType { log.Println("running", s, "query") var wg sync.WaitGroup q := h() start := time.Now() // Start the query after indexing for i := 0; i < *numQueryThreads; i++ { wg.Add(1) go func() { docs := *qcount / (*numQueryThreads) if i == *numQueryThreads-1 { docs = docs + *qcount%(*numQueryThreads) } queryWorker(index, q, docs) wg.Done() }() } wg.Wait() end := time.Now() timeTaken := end.Sub(start) seconds := float64(timeTaken) / float64(time.Second) mb := int(float64(*qcount) / float64(seconds)) log.Println("Result:", s, "query - queries per second", mb) resetChan <- true } s := index.Stats() statsBytes, err := json.Marshal(s) if err != nil { log.Fatal(err) } log.Printf("stats: %s", string(statsBytes)) }
func main() { flag.Parse() go http.ListenAndServe(*bindHttp, nil) // For expvar. if *statsFile != "" { // create all parents if necessary dir := path.Dir(*statsFile) os.MkdirAll(dir, 0755) var err error statsWriter, err = os.Create(*statsFile) if err != nil { log.Fatal(err) } } if *cpuprofile != "" { f, err := os.Create(*cpuprofile) if err != nil { log.Fatal(err) } defer f.Close() pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } bleve.Config.SetAnalysisQueueSize(*numAnalyzers) mapping := blevebench.BuildArticleMapping() benchConfig := blevebench.LoadConfigFile(*config) fmt.Printf("Using Index Type: %s\n", benchConfig.IndexType) fmt.Printf("Using KV store: %s\n", benchConfig.KVStore) fmt.Printf("Using KV config: %#v\n", benchConfig.KVConfig) index, err := bleve.NewUsing(*target, mapping, benchConfig.IndexType, benchConfig.KVStore, benchConfig.KVConfig) if err != nil { log.Fatal(err) } printHeader() timeStart = time.Now() timeLast = timeStart printLine() work := make(chan *Work) // start reading worker go readingWorker(index, work) // start print time worker if *printTime > 0 { go printTimeWorker() } // start workers var wg sync.WaitGroup for i := 0; i < *numIndexers; i++ { wg.Add(1) go func() { batchIndexingWorker(index, work, timeStart) wg.Done() }() } wg.Wait() // print final stats printLine() }
func NewOfferIndex(dir string) (bleve.Index, error) { err := os.RemoveAll(dir) if err != nil && !os.IsNotExist(err) { return nil, err } parts := []string{} for _, exc := range indexExceptions { parts = append(parts, regexp.QuoteMeta(exc)) } pattern := strings.Join(parts, "|") pattern = "(?i)(?:" + pattern + ")" m := bleve.NewIndexMapping() apecTokenizer := "apec" err = m.AddCustomTokenizer(apecTokenizer, map[string]interface{}{ "type": exception.Name, "exceptions": []string{pattern}, "tokenizer": bleveuni.Name, }) if err != nil { return nil, err } apecTokens := "apec_tokens" err = m.AddCustomTokenMap(apecTokens, map[string]interface{}{ "type": tokenmap.Name, "tokens": stopWords, }) if err != nil { return nil, err } apecStop := "apec_stop" err = m.AddCustomTokenFilter(apecStop, map[string]interface{}{ "type": stop.Name, "stop_token_map": apecTokens, }) if err != nil { return nil, err } frTokens := []string{ lowercase.Name, fr.ElisionName, fr.StopName, fr.LightStemmerName, apecStop, } fr := map[string]interface{}{ "type": custom.Name, "tokenizer": apecTokenizer, "token_filters": frTokens, } frHtml := map[string]interface{}{ "type": custom.Name, "char_filters": []string{ html.Name, }, "tokenizer": apecTokenizer, "token_filters": frTokens, } err = m.AddCustomAnalyzer("fr", fr) if err != nil { return nil, fmt.Errorf("failed to register analyzer fr: %s", err) } err = m.AddCustomAnalyzer("fr_html", frHtml) if err != nil { return nil, fmt.Errorf("failed to register analyzer fr_html: %s", err) } htmlFr := bleve.NewTextFieldMapping() htmlFr.Store = false htmlFr.IncludeInAll = false htmlFr.IncludeTermVectors = false htmlFr.Analyzer = "fr_html" textFr := bleve.NewTextFieldMapping() textFr.Store = false textFr.IncludeInAll = false textFr.IncludeTermVectors = false textFr.Analyzer = "fr" textAll := bleve.NewTextFieldMapping() textAll.Store = false textAll.IncludeInAll = true textAll.IncludeTermVectors = false date := bleve.NewDateTimeFieldMapping() date.Index = false date.Store = true date.IncludeInAll = false date.IncludeTermVectors = false offer := bleve.NewDocumentStaticMapping() offer.Dynamic = false offer.AddFieldMappingsAt("html", htmlFr) offer.AddFieldMappingsAt("title", textFr) offer.AddFieldMappingsAt("date", date) m.AddDocumentMapping("offer", offer) m.DefaultMapping = offer index, err := bleve.NewUsing(dir, m, upsidedown.Name, boltdb.Name, map[string]interface{}{ "nosync": true, }) if err != nil { return nil, err } return index, nil }