Exemple #1
0
func indexBeer(i bleve.Index) error {

	// open the directory
	dirEntries, err := ioutil.ReadDir(*jsonDir)
	if err != nil {
		return err
	}

	// walk the directory entries for indexing
	log.Printf("Indexing...")
	count := 0
	startTime := time.Now()
	batch := bleve.NewBatch()
	batchCount := 0
	for _, dirEntry := range dirEntries {
		filename := dirEntry.Name()
		// read the bytes
		jsonBytes, err := ioutil.ReadFile(*jsonDir + "/" + filename)
		if err != nil {
			return err
		}
		// // shred them into a document
		ext := filepath.Ext(filename)
		docId := filename[:(len(filename) - len(ext))]
		batch.Index(docId, jsonBytes)
		batchCount++

		if batchCount >= *batchSize {
			err = i.Batch(batch)
			if err != nil {
				return err
			}
			batch = bleve.NewBatch()
			batchCount = 0
		}
		count++
		if count%1000 == 0 {
			indexDuration := time.Since(startTime)
			indexDurationSeconds := float64(indexDuration) / float64(time.Second)
			timePerDoc := float64(indexDuration) / float64(count)
			log.Printf("Indexed %d documents, in %.2fs (average %.2fms/doc)", count, indexDurationSeconds, timePerDoc/float64(time.Millisecond))
		}
	}
	indexDuration := time.Since(startTime)
	indexDurationSeconds := float64(indexDuration) / float64(time.Second)
	timePerDoc := float64(indexDuration) / float64(count)
	log.Printf("Indexed %d documents, in %.2fs (average %.2fms/doc)", count, indexDurationSeconds, timePerDoc/float64(time.Millisecond))
	return nil
}
Exemple #2
0
func indexTPB(i bleve.Index) error {
	batch := bleve.NewBatch()
	batchCount := 0

	gzDumpFile, err := os.Open(*dump)
	if err != nil {
		return err
	}
	defer gzDumpFile.Close()

	dumpFile, err := gzip.NewReader(gzDumpFile)
	if err != nil {
		return err
	}

	reader := csv.NewReader(dumpFile)
	reader.FieldsPerRecord = 7
	reader.Comma = '|'

	count := 0
	startTime := time.Now()

	log.Printf("Indexing...")

	for {
		r, err := reader.Read()
		if err == io.EOF {
			break
		} else if err != nil {
			continue
		}

		size, err := strconv.ParseInt(r[1], 10, 0)
		if err != nil {
			fmt.Println("%#v", size)
			size = 0
		}

		batch.Index(r[2], tpbDoc{
			Name:     r[0],
			Size:     size,
			Hash:     r[2],
			Category: r[4],
			Type:     "torrent",
		})
		batchCount++

		if batchCount >= *batchSize {
			err = i.Batch(batch)
			if err != nil {
				return err
			}
			batch = bleve.NewBatch()
			batchCount = 0
		}

		count++

		if count%1000 == 0 {
			indexDuration := time.Since(startTime)
			indexDurationSeconds := float64(indexDuration) / float64(time.Second)
			timePerDoc := float64(indexDuration) / float64(count)
			log.Printf("Indexed %d documents in %.2fs (average %.2fms/doc)", count, indexDurationSeconds, timePerDoc/float64(time.Millisecond))
		}

		if *indexLimit > 0 && count >= *indexLimit {
			break
		}
	}

	// flush the last batch
	if batchCount > 0 {
		err := i.Batch(batch)
		if err != nil {
			log.Fatal(err)
		}
	}

	indexDuration := time.Since(startTime)
	indexDurationSeconds := float64(indexDuration) / float64(time.Second)
	timePerDoc := float64(indexDuration) / float64(count)
	log.Printf("Finished indexing %d documents in %.2fs (average %.2fms/doc)", count, indexDurationSeconds, timePerDoc/float64(time.Millisecond))

	log.Printf("Still listening on http://%v", bind)

	return nil
}