func main() { util.SetDefaultInfoStream(util.NewPrintStreamInfoStream(os.Stdout)) index.DefaultSimilarity = func() index.Similarity { return search.NewDefaultSimilarity() } directory, _ := store.OpenFSDirectory("test_index") analyzer := std.NewStandardAnalyzer() conf := index.NewIndexWriterConfig(util.VERSION_LATEST, analyzer) writer, _ := index.NewIndexWriter(directory, conf) d := document.NewDocument() d.Add(document.NewTextFieldFromString("foo", "bar", document.STORE_YES)) writer.AddDocument(d.Fields()) writer.Close() // ensure index is written reader, _ := index.OpenDirectoryReader(directory) searcher := search.NewIndexSearcher(reader) q := search.NewTermQuery(index.NewTerm("foo", "bar")) res, _ := searcher.Search(q, nil, 1000) fmt.Printf("Found %v hit(s).\n", res.TotalHits) for _, hit := range res.ScoreDocs { fmt.Printf("Doc %v score: %v\n", hit.Doc, hit.Score) doc, _ := reader.Document(hit.Doc) fmt.Printf("foo -> %v\n", doc.Get("foo")) } }
// Create a new index write config with random defaults using the specified random func newRandomIndexWriteConfig(r *rand.Rand, v util.Version, a analysis.Analyzer) *index.IndexWriterConfig { c := index.NewIndexWriterConfig(v, a) c.SetSimilarity(ClassEnvRule.similarity) if VERBOSE { // Even through TestRuleSetupAndRestoreClassEnv calls // infoStream.SetDefault, we do it again here so that the // PrintStreamInfoStream.messageID increments so that when there // are separate instance of IndexWriter created we see "IW 0", // "IW 1", "IW 2", ... instead of just always "IW 0": c.SetInfoStream(newThreadNameFixingPrintStreamInfoStream(os.Stdout)) } if r.Intn(2) == 0 { c.SetMergeScheduler(index.NewSerialMergeScheduler()) } else if Rarely(r) { log.Println("Use ConcurrentMergeScheduler") maxRoutineCount := NextInt(Random(), 1, 4) maxMergeCount := NextInt(Random(), maxRoutineCount, maxRoutineCount+4) cms := index.NewConcurrentMergeScheduler() cms.SetMaxMergesAndRoutines(maxMergeCount, maxRoutineCount) c.SetMergeScheduler(cms) } if r.Intn(2) == 0 { if Rarely(r) { log.Println("Use crazy value for buffered docs") // crazy value c.SetMaxBufferedDocs(NextInt(r, 2, 15)) } else { // reasonable value c.SetMaxBufferedDocs(NextInt(r, 16, 1000)) } } // Go doesn't need thread-affinity state. // if r.Intn(2) == 0 { // maxNumRoutineState := either(Rarely(r), // NextInt(r, 5, 20), // crazy value // NextInt(r, 1, 4)) // reasonable value // if Rarely(r) { // // reandom thread pool // c.SetIndexerThreadPool(newRandomDocumentsWriterPerThreadPool(maxNumRoutineState, r)) // } else { // // random thread pool // c.SetMaxThreadStates(maxNumRoutineState) // } // } c.SetMergePolicy(newMergePolicy(r)) if Rarely(r) { log.Println("Use SimpleMergedSegmentWarmer") c.SetMergedSegmentWarmer(index.NewSimpleMergedSegmentWarmer(c.InfoStream())) } c.SetUseCompoundFile(r.Intn(2) == 0) // c.SetUseCompoundFile(false) c.SetReaderPooling(r.Intn(2) == 0) c.SetReaderTermsIndexDivisor(NextInt(r, 1, 4)) return c }
func TestBasicIndexAndSearch(t *testing.T) { q := search.NewTermQuery(index.NewTerm("foo", "bar")) q.SetBoost(-42) os.RemoveAll(".gltest") directory, err := store.OpenFSDirectory(".gltest") It(t).Should("has no error: %v", err).Assert(err == nil) It(t).Should("has valid directory").Assert(directory != nil) fmt.Println("Directory", directory) defer directory.Close() analyzer := std.NewStandardAnalyzer() conf := index.NewIndexWriterConfig(util.VERSION_LATEST, analyzer) writer, err := index.NewIndexWriter(directory, conf) It(t).Should("has no error: %v", err).Assert(err == nil) d := docu.NewDocument() d.Add(docu.NewTextFieldFromString("foo", "bar", docu.STORE_YES)) err = writer.AddDocument(d.Fields()) It(t).Should("has no error: %v", err).Assert(err == nil) err = writer.Close() // ensure index is written It(t).Should("has no error: %v", err).Assert(err == nil) reader, err := index.OpenDirectoryReader(directory) It(t).Should("has no error: %v", err).Assert(err == nil) defer reader.Close() searcher := search.NewIndexSearcher(reader) res, err := searcher.Search(q, nil, 1000) It(t).Should("has no error: %v", err).Assert(err == nil) hits := res.ScoreDocs It(t).Should("expect 1 hits, but %v only.", len(hits)).Assert(len(hits) == 1) It(t).Should("expect score to be negative (got %v)", hits[0].Score).Verify(hits[0].Score < 0) explain, err := searcher.Explain(q, hits[0].Doc) It(t).Should("has no error: %v", err).Assert(err == nil) It(t).Should("score doesn't match explanation (%v vs %v)", hits[0].Score, explain.Value()).Verify(isSimilar(hits[0].Score, explain.Value(), 0.001)) It(t).Should("explain doesn't think doc is a match").Verify(explain.IsMatch()) }
func (w *MockDirectoryWrapper) Close() error { w.Lock() defer w.Unlock() // files that we tried to delete, but couldn't because reader were open // all that matters is that we tried! (they will eventually go away) pendingDeletions := make(map[string]bool) for k, v := range w.openFilesDeleted { pendingDeletions[k] = v } w.maybeYield() if w.openFiles == nil { w.openFiles = make(map[string]int) w.openFilesDeleted = make(map[string]bool) } nOpenFiles := len(w.openFiles) if w.noDeleteOpenFile && nOpenFiles > 0 { // print the first one as its very verbose otherwise var cause error for _, v := range w.openFileHandles { cause = v break } panic(mergeError(errors.New(fmt.Sprintf( "MockDirectoryWrapper: cannot close: there are still open files: %v", w.openFiles)), cause).Error()) } nOpenLocks := func() int { w.openLocksLock.Lock() defer w.openLocksLock.Unlock() return len(w.openLocks) }() if w.noDeleteOpenFile && nOpenLocks > 0 { panic(fmt.Sprintf("MockDirectoryWrapper: cannot close: there are still open locks: %v", w.openLocks)) } w.isOpen = false if w.checkIndexOnClose { w.randomErrorRate = 0 w.randomErrorRateOnOpen = 0 files, err := w.listAll() if err != nil { return err } if index.IsIndexFileExists(files) { log.Println("\nNOTE: MockDirectoryWrapper: now crash") err = w._crash() // corrupt any unsynced-files if err != nil { return err } log.Println("\nNOTE: MockDirectoryWrapper: now run CheckIndex") w.Unlock() // CheckIndex may access synchronized method CheckIndex(w, w.crossCheckTermVectorsOnClose) w.Lock() // CheckIndex may access synchronized method // TODO: factor this out / share w/ TestIW.assertNoUnreferencedFiles if w.assertNoUnreferencedFilesOnClose { // now look for unreferenced files: discount ones that we tried to delete but could not all, err := w.ListAll() if err != nil { return err } allFiles := make(map[string]bool) for _, name := range all { allFiles[name] = true } for name, _ := range pendingDeletions { delete(allFiles, name) } startFiles := make([]string, 0, len(allFiles)) for k, _ := range allFiles { startFiles = append(startFiles, k) } iwc := index.NewIndexWriterConfig(TEST_VERSION_CURRENT, nil) iwc.SetIndexDeletionPolicy(index.NO_DELETION_POLICY) iw, err := index.NewIndexWriter(w.Directory, iwc) if err != nil { return err } err = iw.Rollback() if err != nil { return err } endFiles, err := w.Directory.ListAll() if err != nil { return err } hasSegmentsGenFile := sort.SearchStrings(endFiles, index.INDEX_FILENAME_SEGMENTS_GEN) >= 0 if pendingDeletions["segments.gen"] && hasSegmentsGenFile { panic("not implemented yet") } // its possible we cannot delete the segments_N on windows if someone has it open and // maybe other files too, depending on timing. normally someone on windows wouldnt have // an issue (IFD would nuke this stuff eventually), but we pass NoDeletionPolicy... for _, file := range pendingDeletions { log.Println(file) panic("not implemented yet") } sort.Strings(startFiles) startFiles = uniqueStrings(startFiles) sort.Strings(endFiles) endFiles = uniqueStrings(endFiles) if !reflect.DeepEqual(startFiles, endFiles) { panic("not implemented") } ir1, err := index.OpenDirectoryReader(w) if err != nil { return err } numDocs1 := ir1.NumDocs() err = ir1.Close() if err != nil { return err } iw, err = index.NewIndexWriter(w, index.NewIndexWriterConfig(TEST_VERSION_CURRENT, nil)) if err != nil { return err } err = iw.Close() if err != nil { return err } ir2, err := index.OpenDirectoryReader(w) if err != nil { return err } numDocs2 := ir2.NumDocs() err = ir2.Close() if err != nil { return err } assert2(numDocs1 == numDocs2, fmt.Sprintf("numDocs changed after opening/closing IW: before=%v after=%v", numDocs1, numDocs2)) } } } return w.Directory.Close() }