func indexBeer(i bleve.Index) error { // open the directory dirEntries, err := ioutil.ReadDir(*jsonDir) if err != nil { return err } // walk the directory entries for indexing log.Printf("Indexing...") count := 0 startTime := time.Now() batch := bleve.NewBatch() batchCount := 0 for _, dirEntry := range dirEntries { filename := dirEntry.Name() // read the bytes jsonBytes, err := ioutil.ReadFile(*jsonDir + "/" + filename) if err != nil { return err } // // shred them into a document ext := filepath.Ext(filename) docId := filename[:(len(filename) - len(ext))] batch.Index(docId, jsonBytes) batchCount++ if batchCount >= *batchSize { err = i.Batch(batch) if err != nil { return err } batch = bleve.NewBatch() batchCount = 0 } count++ if count%1000 == 0 { indexDuration := time.Since(startTime) indexDurationSeconds := float64(indexDuration) / float64(time.Second) timePerDoc := float64(indexDuration) / float64(count) log.Printf("Indexed %d documents, in %.2fs (average %.2fms/doc)", count, indexDurationSeconds, timePerDoc/float64(time.Millisecond)) } } indexDuration := time.Since(startTime) indexDurationSeconds := float64(indexDuration) / float64(time.Second) timePerDoc := float64(indexDuration) / float64(count) log.Printf("Indexed %d documents, in %.2fs (average %.2fms/doc)", count, indexDurationSeconds, timePerDoc/float64(time.Millisecond)) return nil }
func indexTPB(i bleve.Index) error { batch := bleve.NewBatch() batchCount := 0 gzDumpFile, err := os.Open(*dump) if err != nil { return err } defer gzDumpFile.Close() dumpFile, err := gzip.NewReader(gzDumpFile) if err != nil { return err } reader := csv.NewReader(dumpFile) reader.FieldsPerRecord = 7 reader.Comma = '|' count := 0 startTime := time.Now() log.Printf("Indexing...") for { r, err := reader.Read() if err == io.EOF { break } else if err != nil { continue } size, err := strconv.ParseInt(r[1], 10, 0) if err != nil { fmt.Println("%#v", size) size = 0 } batch.Index(r[2], tpbDoc{ Name: r[0], Size: size, Hash: r[2], Category: r[4], Type: "torrent", }) batchCount++ if batchCount >= *batchSize { err = i.Batch(batch) if err != nil { return err } batch = bleve.NewBatch() batchCount = 0 } count++ if count%1000 == 0 { indexDuration := time.Since(startTime) indexDurationSeconds := float64(indexDuration) / float64(time.Second) timePerDoc := float64(indexDuration) / float64(count) log.Printf("Indexed %d documents in %.2fs (average %.2fms/doc)", count, indexDurationSeconds, timePerDoc/float64(time.Millisecond)) } if *indexLimit > 0 && count >= *indexLimit { break } } // flush the last batch if batchCount > 0 { err := i.Batch(batch) if err != nil { log.Fatal(err) } } indexDuration := time.Since(startTime) indexDurationSeconds := float64(indexDuration) / float64(time.Second) timePerDoc := float64(indexDuration) / float64(count) log.Printf("Finished indexing %d documents in %.2fs (average %.2fms/doc)", count, indexDurationSeconds, timePerDoc/float64(time.Millisecond)) log.Printf("Still listening on http://%v", bind) return nil }