Пример #1
0
func main() {

	flag.Parse()

	if *indexPath == "" {
		log.Fatal("must specify index path")
	}

	// create a new default mapping
	mapping := bleve.NewIndexMapping()
	if *mappingFile != "" {
		mappingBytes, err := ioutil.ReadFile(*mappingFile)
		if err != nil {
			log.Fatal(err)
		}
		err = json.Unmarshal(mappingBytes, &mapping)
		if err != nil {
			log.Fatal(err)
		}
	}

	// create the index
	index, err := bleve.NewUsing(*indexPath, mapping, *indexType, *storeType, nil)
	if err != nil {
		log.Fatal(err)
	}
	defer func() {
		cerr := index.Close()
		if cerr != nil {
			log.Fatalf("error closing index: %v", err)
		}
	}()

	log.Printf("Created bleve index at: %s", *indexPath)
}
Пример #2
0
func init() {

	bleve.Config.DefaultKVStore = gtreap.Name

	bleveHttp.SetLog(log.New(os.Stderr, "bleve.http ", log.LstdFlags))

	router := mux.NewRouter()
	router.StrictSlash(true)

	listIndexesHandler := bleveHttp.NewListIndexesHandler()
	router.Handle("/api", listIndexesHandler).Methods("GET")

	docCountHandler := bleveHttp.NewDocCountHandler("")
	docCountHandler.IndexNameLookup = indexNameLookup
	router.Handle("/api/{indexName}/_count", docCountHandler).Methods("GET")

	searchHandler := bleveHttp.NewSearchHandler("")
	searchHandler.IndexNameLookup = indexNameLookup
	router.Handle("/api/{indexName}/_search", searchHandler).Methods("POST")

	http.Handle("/", &CORSWrapper{router})

	log.Printf("opening indexes")
	// walk the data dir and register index names
	dirEntries, err := ioutil.ReadDir(indexDir)
	if err != nil {
		log.Printf("error reading data dir: %v", err)
		return
	}

	for _, dirInfo := range dirEntries {
		indexPath := indexDir + string(os.PathSeparator) + dirInfo.Name()

		if dirInfo.IsDir() {
			log.Printf("see directory %s, this is not supported in the appengine environment", dirInfo.Name())
		} else {
			log.Printf("preloading index export %s", dirInfo.Name())
			// assume file in this dir is actually a bleve export
			i, err := bleve.NewUsing(
				"",
				bleve.NewIndexMapping(),
				bleve.Config.DefaultIndexType,
				preload.Name,
				map[string]interface{}{
					"kvStoreName_actual": gtreap.Name,
					"preloadpath":        indexPath,
				})
			if err != nil {
				log.Printf("error preloading index %s: %v", indexPath, err)
			} else {
				i.SetName(dirInfo.Name())
				log.Printf("registered index: %s", dirInfo.Name())
				bleveHttp.RegisterIndexName(dirInfo.Name(), i)
			}
		}
	}
}
Пример #3
0
func NewBlevePIndexImpl(indexType, indexParams, path string,
	restart func()) (cbgt.PIndexImpl, cbgt.Dest, error) {
	bleveParams := NewBleveParams()
	if len(indexParams) > 0 {
		err := json.Unmarshal([]byte(indexParams), bleveParams)
		if err != nil {
			return nil, nil, fmt.Errorf("bleve: parse params, err: %v", err)
		}
	}

	kvStoreName, ok := bleveParams.Store["kvStoreName"].(string)
	if !ok || kvStoreName == "" {
		kvStoreName = bleve.Config.DefaultKVStore
	}

	kvConfig := map[string]interface{}{
		"create_if_missing": true,
		"error_if_exists":   true,
	}
	for k, v := range bleveParams.Store {
		kvConfig[k] = v
	}

	// Always use the "metrics" wrapper KVStore if it's available and
	// also not already configured.
	_, exists := kvConfig["kvStoreName_actual"]
	if !exists &&
		kvStoreName != "metrics" &&
		bleveRegistry.KVStoreConstructorByName("metrics") != nil {
		kvConfig["kvStoreName_actual"] = kvStoreName
		kvStoreName = "metrics"
	}

	bleveIndexType, ok := bleveParams.Store["indexType"].(string)
	if !ok || bleveIndexType == "" {
		bleveIndexType = bleve.Config.DefaultIndexType
	}

	bindex, err := bleve.NewUsing(path, &bleveParams.Mapping,
		bleveIndexType, kvStoreName, kvConfig)
	if err != nil {
		return nil, nil, fmt.Errorf("bleve: new index, path: %s,"+
			" kvStoreName: %s, kvConfig: %#v, err: %s",
			path, kvStoreName, kvConfig, err)
	}

	pathMeta := path + string(os.PathSeparator) + "PINDEX_BLEVE_META"
	err = ioutil.WriteFile(pathMeta, []byte(indexParams), 0600)
	if err != nil {
		return nil, nil, err
	}

	return bindex, &cbgt.DestForwarder{
		DestProvider: NewBleveDest(path, bindex, restart),
	}, nil
}
Пример #4
0
func createOpenIndex(path string) (bleve.Index, error) {
	index, err := bleve.Open(path)
	if err == bleve.ErrorIndexPathDoesNotExist {
		jww.INFO.Println("Creating Index: ", path)
		indexMapping, err := buildIndexMapping()
		if err != nil {
			return nil, err
		}
		index, err = bleve.NewUsing(path, indexMapping, bleve.Config.DefaultIndexType, goleveldb.Name, nil)
		if err != nil {
			return nil, err
		}
	} else if err != nil {
		return nil, err
	} else {
		jww.INFO.Println("Opening Index: ", path)
	}
	return index, nil
}
Пример #5
0
func NewBleve(path string, size int64, logger webfw.Logger) (content.SearchProvider, error) {
	var err error
	var exists bool
	var index bleve.Index

	_, err = os.Stat(path)
	if err == nil {
		logger.Infoln("Opening search index " + path)
		index, err = bleve.Open(path)

		if err != nil {
			return nil, errors.New(fmt.Sprintf("Error opening search index: %v\n", err))
		}

		exists = true
	} else if os.IsNotExist(err) {
		mapping := bleve.NewIndexMapping()
		docMapping := bleve.NewDocumentMapping()

		idfieldmapping := bleve.NewTextFieldMapping()
		idfieldmapping.IncludeInAll = false
		docMapping.AddFieldMappingsAt("FeedId", idfieldmapping)
		docMapping.AddFieldMappingsAt("ArticleId", idfieldmapping)

		mapping.AddDocumentMapping(mapping.DefaultType, docMapping)

		logger.Infoln("Creating search index " + path)
		index, err = bleve.NewUsing(path, mapping, upside_down.Name, goleveldb.Name, nil)

		if err != nil {
			return nil, errors.New(fmt.Sprintf("Error creating search index: %v\n", err))
		}
	} else {
		return nil, errors.New(
			fmt.Sprintf("Error getting stat of '%s': %v\n", path, err))
	}

	return &Bleve{logger: logger, index: index, batchSize: size, newIndex: !exists}, nil
}
Пример #6
0
func runConfig(conf string, tar string, cpu string) []*Line {
	if cpu != "" {
		f, err := os.Create(cpu)
		if err != nil {
			log.Fatal(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	start := time.Now()

	wikiReader, err := blevebench.NewWikiReader(*source)
	if err != nil {
		log.Fatal(err)
	}
	defer wikiReader.Close()

	mapping := blevebench.BuildArticleMapping()
	benchConfig := blevebench.LoadConfigFile(conf)

	fmt.Printf("Using Index Type: %s\n", benchConfig.IndexType)
	fmt.Printf("Using KV store: %s\n", benchConfig.KVStore)
	fmt.Printf("Using KV config: %#v\n", benchConfig.KVConfig)
	index, err := bleve.NewUsing(tar, mapping, benchConfig.IndexType, benchConfig.KVStore, benchConfig.KVConfig)
	if err != nil {
		log.Fatal(err)
	}
	_, store, err := index.Advanced()
	if err != nil {
		log.Fatal(err)
	}

	itr := *count / (*level)
	lines := NewLines(itr, len(typename), conf, typename)
	tot := 0
	// print header
	fmt.Printf("elapsed,docs,avg_single_doc_ms,avg_batched_doc_ms,query_water_matches,first_query_water_ms,avg_repeated%d_query_water_ms", *qrepeat)
	printOtherHeader(store)
	fmt.Printf("\n")

	singleCount := 0
	var singleTime time.Duration
	batchCount := 0
	var batchTime time.Duration
	batch := index.NewBatch()
	for i := 1; i < (*count)+1; i++ {

		leveli := i % *level

		a, err := wikiReader.Next()
		if err != nil {
			log.Fatal(err)
		}
		if leveli < *batchSize {
			// index single
			singleStart := time.Now()
			err = index.Index(a.Title, a)
			if err != nil {
				log.Fatalf("error indexing: %v", err)
			}
			duration := time.Since(singleStart)
			singleCount++
			singleTime += duration
		} else {
			// add to batch
			batch.Index(a.Title, a)
			// if batch is full index it
			if batch.Size() == *batchSize {
				batchStart := time.Now()
				err := index.Batch(batch)
				if err != nil {
					log.Fatalf("error executing batch: %v", err)
				}
				duration := time.Since(batchStart)
				batchCount++
				batchTime += duration
				// reset batch
				batch = index.NewBatch()
			}
		}

		if leveli == 0 {

			// run some queries
			termQueryCount := 0
			termQueryStart := time.Now()
			termQuery := bleve.NewTermQuery("water")
			termQuery.SetField("text")
			termSearch := bleve.NewSearchRequest(termQuery)
			searchResults, err := index.Search(termSearch)
			if err != nil {
				log.Fatalf("error searching: %v", err)
			}
			termQueryCount++
			termQueryTime := time.Since(termQueryStart)

			firstQueryTime := float64(termQueryTime)

			for termQueryCount < *qrepeat {
				termQueryStart = time.Now()
				searchResults, err = index.Search(termSearch)
				if err != nil {
					log.Fatal(err)
				}
				termQueryCount++
				termQueryTime += time.Since(termQueryStart)
			}

			// print stats
			avgSingleDocTime := float64(singleTime) / float64(singleCount)
			avgBatchTime := float64(batchTime) / float64(batchCount)
			avgBatchDocTime := float64(avgBatchTime) / float64(*batchSize)
			avgQueryTime := float64(termQueryTime) / float64(termQueryCount)
			elapsedTime := time.Since(start) / time.Millisecond
			fmt.Printf("%d,%d,%f,%f,%d,%f,%f", elapsedTime, i, avgSingleDocTime/float64(time.Millisecond), avgBatchDocTime/float64(time.Millisecond), searchResults.Total, firstQueryTime/float64(time.Millisecond), avgQueryTime/float64(time.Millisecond))
			printOther(store)
			lines[0].Pt[tot].Y = avgSingleDocTime / float64(time.Millisecond)
			lines[0].Pt[tot].X = float64(i)
			lines[1].Pt[tot].Y = avgBatchDocTime / float64(time.Millisecond)
			lines[1].Pt[tot].X = float64(i)
			lines[2].Pt[tot].Y = firstQueryTime / float64(time.Millisecond)
			lines[2].Pt[tot].X = float64(i)
			lines[3].Pt[tot].Y = avgQueryTime / float64(time.Millisecond)
			lines[3].Pt[tot].X = float64(i)
			tot++

			fmt.Printf("\n")

			// reset stats
			singleCount = 0
			singleTime = 0
			batchCount = 0
			batchTime = 0

			// dump mem stats if requested
			if *memprofile != "" {
				f, err := os.Create(strconv.Itoa(i) + "-" + *memprofile)
				if err != nil {
					log.Fatal(err)
				}
				pprof.WriteHeapProfile(f)
			}
		}

	}
	return lines
}
Пример #7
0
	Long:  `The create command will create a new empty index.`,
	PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
		// override RootCmd version which opens existing index
		if len(args) < 1 {
			return fmt.Errorf("must specify path to index")
		}
		return nil
	},
	RunE: func(cmd *cobra.Command, args []string) error {
		var mapping mapping.IndexMapping
		var err error
		mapping, err = buildMapping()
		if err != nil {
			return fmt.Errorf("error building mapping: %v", err)
		}
		idx, err = bleve.NewUsing(args[0], mapping, indexType, storeType, nil)
		if err != nil {
			return fmt.Errorf("error creating index: %v", err)
		}
		// the inheritted Post action will close the index
		return nil
	},
}

func buildMapping() (mapping.IndexMapping, error) {
	mapping := mapping.NewIndexMapping()
	if mappingPath != "" {
		mappingBytes, err := ioutil.ReadFile(mappingPath)
		if err != nil {
			return nil, err
		}
Пример #8
0
func main() {
	flag.Parse()

	go http.ListenAndServe(*bindHttp, nil)

	if *cpuprofile != "" {
		f, err := os.Create(*cpuprofile)
		if err != nil {
			log.Fatal(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	bleve.Config.SetAnalysisQueueSize(*numAnalyzers)

	mapping := blevebench.BuildArticleMapping()
	benchConfig := blevebench.LoadConfigFile(*config)

	fmt.Printf("Using Index Type: %s\n", benchConfig.IndexType)
	fmt.Printf("Using KV store: %s\n", benchConfig.KVStore)
	fmt.Printf("Using KV config: %#v\n", benchConfig.KVConfig)
	index, err := bleve.NewUsing(*target, mapping, benchConfig.IndexType, benchConfig.KVStore, benchConfig.KVConfig)
	if err != nil {
		log.Fatal(err)
	}

	start = time.Now()
	work := make(chan *Work)

	// start reading worker
	go readingWorker(index, work)

	// start print time worker
	if *printTime > 0 {
		go printTimeWorker()
	}

	//start workers
	var wg sync.WaitGroup
	for i := 0; i < *numIndexers; i++ {
		wg.Add(1)
		go func() {
			batchIndexingWorker(index, work, start)
			wg.Done()
		}()
	}

	wg.Wait()
	end := time.Now()
	timeTaken := end.Sub(start)
	mb := float64(totalPlainTextIndexed) / 1000000.0
	seconds := float64(timeTaken) / float64(time.Second)
	log.Printf("Result: %d bytes in %d seconds = %fMB/s", totalPlainTextIndexed, timeTaken/time.Second, mb/seconds)

	s := index.Stats()
	statsBytes, err := json.Marshal(s)
	if err != nil {
		log.Fatal(err)
	}
	log.Printf("stats: %s", string(statsBytes))
}
Пример #9
0
func main() {
	flag.Parse()

	go http.ListenAndServe(*bindHttp, nil)

	if *cpuprofile != "" {
		f, err := os.Create(*cpuprofile)
		if err != nil {
			log.Fatal(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	bleve.Config.SetAnalysisQueueSize(8)

	mapping := blevebench.BuildArticleMapping()
	benchConfig := blevebench.LoadConfigFile(*config)

	fmt.Printf("Using Index Type: %s\n", benchConfig.IndexType)
	fmt.Printf("Using KV store: %s\n", benchConfig.KVStore)
	fmt.Printf("Using KV config: %#v\n", benchConfig.KVConfig)
	index, err := bleve.NewUsing(*target, mapping, benchConfig.IndexType, benchConfig.KVStore, benchConfig.KVConfig)
	if err != nil {
		log.Fatal(err)
	}

	// start reading worker
	indexWorker(index)

	resetChan := make(chan bool)
	if *printTime > 0 {
		go printTimeWorker(resetChan)
	}

	for s, h := range queryType {
		log.Println("running", s, "query")
		var wg sync.WaitGroup
		q := h()
		start := time.Now()
		// Start the query after indexing
		for i := 0; i < *numQueryThreads; i++ {
			wg.Add(1)
			go func() {
				docs := *qcount / (*numQueryThreads)
				if i == *numQueryThreads-1 {
					docs = docs + *qcount%(*numQueryThreads)
				}
				queryWorker(index, q, docs)
				wg.Done()
			}()
		}
		wg.Wait()
		end := time.Now()
		timeTaken := end.Sub(start)
		seconds := float64(timeTaken) / float64(time.Second)
		mb := int(float64(*qcount) / float64(seconds))
		log.Println("Result:", s, "query - queries per second", mb)
		resetChan <- true
	}
	s := index.Stats()
	statsBytes, err := json.Marshal(s)
	if err != nil {
		log.Fatal(err)
	}
	log.Printf("stats: %s", string(statsBytes))
}
Пример #10
0
func main() {
	flag.Parse()

	go http.ListenAndServe(*bindHttp, nil) // For expvar.

	if *statsFile != "" {
		// create all parents if necessary
		dir := path.Dir(*statsFile)
		os.MkdirAll(dir, 0755)

		var err error
		statsWriter, err = os.Create(*statsFile)
		if err != nil {
			log.Fatal(err)
		}
	}

	if *cpuprofile != "" {
		f, err := os.Create(*cpuprofile)
		if err != nil {
			log.Fatal(err)
		}
		defer f.Close()
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	bleve.Config.SetAnalysisQueueSize(*numAnalyzers)

	mapping := blevebench.BuildArticleMapping()
	benchConfig := blevebench.LoadConfigFile(*config)

	fmt.Printf("Using Index Type: %s\n", benchConfig.IndexType)
	fmt.Printf("Using KV store: %s\n", benchConfig.KVStore)
	fmt.Printf("Using KV config: %#v\n", benchConfig.KVConfig)
	index, err := bleve.NewUsing(*target, mapping, benchConfig.IndexType, benchConfig.KVStore, benchConfig.KVConfig)
	if err != nil {
		log.Fatal(err)
	}

	printHeader()
	timeStart = time.Now()
	timeLast = timeStart
	printLine()

	work := make(chan *Work)

	// start reading worker
	go readingWorker(index, work)

	// start print time worker
	if *printTime > 0 {
		go printTimeWorker()
	}

	// start workers
	var wg sync.WaitGroup
	for i := 0; i < *numIndexers; i++ {
		wg.Add(1)
		go func() {
			batchIndexingWorker(index, work, timeStart)
			wg.Done()
		}()
	}

	wg.Wait()

	// print final stats
	printLine()
}
Пример #11
0
func NewOfferIndex(dir string) (bleve.Index, error) {
	err := os.RemoveAll(dir)
	if err != nil && !os.IsNotExist(err) {
		return nil, err
	}

	parts := []string{}
	for _, exc := range indexExceptions {
		parts = append(parts, regexp.QuoteMeta(exc))
	}
	pattern := strings.Join(parts, "|")
	pattern = "(?i)(?:" + pattern + ")"

	m := bleve.NewIndexMapping()
	apecTokenizer := "apec"
	err = m.AddCustomTokenizer(apecTokenizer, map[string]interface{}{
		"type":       exception.Name,
		"exceptions": []string{pattern},
		"tokenizer":  bleveuni.Name,
	})
	if err != nil {
		return nil, err
	}

	apecTokens := "apec_tokens"
	err = m.AddCustomTokenMap(apecTokens, map[string]interface{}{
		"type":   tokenmap.Name,
		"tokens": stopWords,
	})
	if err != nil {
		return nil, err
	}

	apecStop := "apec_stop"
	err = m.AddCustomTokenFilter(apecStop, map[string]interface{}{
		"type":           stop.Name,
		"stop_token_map": apecTokens,
	})
	if err != nil {
		return nil, err
	}

	frTokens := []string{
		lowercase.Name,
		fr.ElisionName,
		fr.StopName,
		fr.LightStemmerName,
		apecStop,
	}
	fr := map[string]interface{}{
		"type":          custom.Name,
		"tokenizer":     apecTokenizer,
		"token_filters": frTokens,
	}
	frHtml := map[string]interface{}{
		"type": custom.Name,
		"char_filters": []string{
			html.Name,
		},
		"tokenizer":     apecTokenizer,
		"token_filters": frTokens,
	}
	err = m.AddCustomAnalyzer("fr", fr)
	if err != nil {
		return nil, fmt.Errorf("failed to register analyzer fr: %s", err)
	}
	err = m.AddCustomAnalyzer("fr_html", frHtml)
	if err != nil {
		return nil, fmt.Errorf("failed to register analyzer fr_html: %s", err)
	}

	htmlFr := bleve.NewTextFieldMapping()
	htmlFr.Store = false
	htmlFr.IncludeInAll = false
	htmlFr.IncludeTermVectors = false
	htmlFr.Analyzer = "fr_html"

	textFr := bleve.NewTextFieldMapping()
	textFr.Store = false
	textFr.IncludeInAll = false
	textFr.IncludeTermVectors = false
	textFr.Analyzer = "fr"

	textAll := bleve.NewTextFieldMapping()
	textAll.Store = false
	textAll.IncludeInAll = true
	textAll.IncludeTermVectors = false

	date := bleve.NewDateTimeFieldMapping()
	date.Index = false
	date.Store = true
	date.IncludeInAll = false
	date.IncludeTermVectors = false

	offer := bleve.NewDocumentStaticMapping()
	offer.Dynamic = false
	offer.AddFieldMappingsAt("html", htmlFr)
	offer.AddFieldMappingsAt("title", textFr)
	offer.AddFieldMappingsAt("date", date)

	m.AddDocumentMapping("offer", offer)
	m.DefaultMapping = offer

	index, err := bleve.NewUsing(dir, m, upsidedown.Name, boltdb.Name,
		map[string]interface{}{
			"nosync": true,
		})
	if err != nil {
		return nil, err
	}
	return index, nil
}