Example #1
0
func main() {
	util.SetDefaultInfoStream(util.NewPrintStreamInfoStream(os.Stdout))
	index.DefaultSimilarity = func() index.Similarity {
		return search.NewDefaultSimilarity()
	}

	directory, _ := store.OpenFSDirectory("test_index")
	analyzer := std.NewStandardAnalyzer()
	conf := index.NewIndexWriterConfig(util.VERSION_LATEST, analyzer)
	writer, _ := index.NewIndexWriter(directory, conf)

	d := document.NewDocument()
	d.Add(document.NewTextFieldFromString("foo", "bar", document.STORE_YES))
	writer.AddDocument(d.Fields())
	writer.Close() // ensure index is written

	reader, _ := index.OpenDirectoryReader(directory)
	searcher := search.NewIndexSearcher(reader)

	q := search.NewTermQuery(index.NewTerm("foo", "bar"))
	res, _ := searcher.Search(q, nil, 1000)
	fmt.Printf("Found %v hit(s).\n", res.TotalHits)
	for _, hit := range res.ScoreDocs {
		fmt.Printf("Doc %v score: %v\n", hit.Doc, hit.Score)
		doc, _ := reader.Document(hit.Doc)
		fmt.Printf("foo -> %v\n", doc.Get("foo"))
	}

}
Example #2
0
// Create a new index write config with random defaults using the specified random
func newRandomIndexWriteConfig(r *rand.Rand, v util.Version, a analysis.Analyzer) *index.IndexWriterConfig {
	c := index.NewIndexWriterConfig(v, a)
	c.SetSimilarity(ClassEnvRule.similarity)
	if VERBOSE {
		// Even through TestRuleSetupAndRestoreClassEnv calls
		// infoStream.SetDefault, we do it again here so that the
		// PrintStreamInfoStream.messageID increments so that when there
		// are separate instance of IndexWriter created we see "IW 0",
		// "IW 1", "IW 2", ... instead of just always "IW 0":
		c.SetInfoStream(newThreadNameFixingPrintStreamInfoStream(os.Stdout))
	}

	if r.Intn(2) == 0 {
		c.SetMergeScheduler(index.NewSerialMergeScheduler())
	} else if Rarely(r) {
		log.Println("Use ConcurrentMergeScheduler")
		maxRoutineCount := NextInt(Random(), 1, 4)
		maxMergeCount := NextInt(Random(), maxRoutineCount, maxRoutineCount+4)
		cms := index.NewConcurrentMergeScheduler()
		cms.SetMaxMergesAndRoutines(maxMergeCount, maxRoutineCount)
		c.SetMergeScheduler(cms)
	}
	if r.Intn(2) == 0 {
		if Rarely(r) {
			log.Println("Use crazy value for buffered docs")
			// crazy value
			c.SetMaxBufferedDocs(NextInt(r, 2, 15))
		} else {
			// reasonable value
			c.SetMaxBufferedDocs(NextInt(r, 16, 1000))
		}
	}
	// Go doesn't need thread-affinity state.
	// if r.Intn(2) == 0 {
	// 	maxNumRoutineState := either(Rarely(r),
	// 		NextInt(r, 5, 20), // crazy value
	// 		NextInt(r, 1, 4))  // reasonable value

	// 	if Rarely(r) {
	// 		// reandom thread pool
	// 		c.SetIndexerThreadPool(newRandomDocumentsWriterPerThreadPool(maxNumRoutineState, r))
	// 	} else {
	// 		// random thread pool
	// 		c.SetMaxThreadStates(maxNumRoutineState)
	// 	}
	// }

	c.SetMergePolicy(newMergePolicy(r))

	if Rarely(r) {
		log.Println("Use SimpleMergedSegmentWarmer")
		c.SetMergedSegmentWarmer(index.NewSimpleMergedSegmentWarmer(c.InfoStream()))
	}
	c.SetUseCompoundFile(r.Intn(2) == 0)
	// c.SetUseCompoundFile(false)
	c.SetReaderPooling(r.Intn(2) == 0)
	c.SetReaderTermsIndexDivisor(NextInt(r, 1, 4))
	return c
}
Example #3
0
func TestBasicIndexAndSearch(t *testing.T) {
	q := search.NewTermQuery(index.NewTerm("foo", "bar"))
	q.SetBoost(-42)

	os.RemoveAll(".gltest")

	directory, err := store.OpenFSDirectory(".gltest")
	It(t).Should("has no error: %v", err).Assert(err == nil)
	It(t).Should("has valid directory").Assert(directory != nil)
	fmt.Println("Directory", directory)
	defer directory.Close()

	analyzer := std.NewStandardAnalyzer()
	conf := index.NewIndexWriterConfig(util.VERSION_LATEST, analyzer)

	writer, err := index.NewIndexWriter(directory, conf)
	It(t).Should("has no error: %v", err).Assert(err == nil)

	d := docu.NewDocument()
	d.Add(docu.NewTextFieldFromString("foo", "bar", docu.STORE_YES))
	err = writer.AddDocument(d.Fields())
	It(t).Should("has no error: %v", err).Assert(err == nil)
	err = writer.Close() // ensure index is written
	It(t).Should("has no error: %v", err).Assert(err == nil)

	reader, err := index.OpenDirectoryReader(directory)
	It(t).Should("has no error: %v", err).Assert(err == nil)
	defer reader.Close()

	searcher := search.NewIndexSearcher(reader)
	res, err := searcher.Search(q, nil, 1000)
	It(t).Should("has no error: %v", err).Assert(err == nil)
	hits := res.ScoreDocs
	It(t).Should("expect 1 hits, but %v only.", len(hits)).Assert(len(hits) == 1)
	It(t).Should("expect score to be negative (got %v)", hits[0].Score).Verify(hits[0].Score < 0)

	explain, err := searcher.Explain(q, hits[0].Doc)
	It(t).Should("has no error: %v", err).Assert(err == nil)
	It(t).Should("score doesn't match explanation (%v vs %v)", hits[0].Score, explain.Value()).Verify(isSimilar(hits[0].Score, explain.Value(), 0.001))
	It(t).Should("explain doesn't think doc is a match").Verify(explain.IsMatch())
}
func (w *MockDirectoryWrapper) Close() error {
	w.Lock()
	defer w.Unlock()

	// files that we tried to delete, but couldn't because reader were open
	// all that matters is that we tried! (they will eventually go away)
	pendingDeletions := make(map[string]bool)
	for k, v := range w.openFilesDeleted {
		pendingDeletions[k] = v
	}

	w.maybeYield()

	if w.openFiles == nil {
		w.openFiles = make(map[string]int)
		w.openFilesDeleted = make(map[string]bool)
	}
	nOpenFiles := len(w.openFiles)

	if w.noDeleteOpenFile && nOpenFiles > 0 {
		// print the first one as its very verbose otherwise
		var cause error
		for _, v := range w.openFileHandles {
			cause = v
			break
		}
		panic(mergeError(errors.New(fmt.Sprintf(
			"MockDirectoryWrapper: cannot close: there are still open files: %v",
			w.openFiles)), cause).Error())
	}

	nOpenLocks := func() int {
		w.openLocksLock.Lock()
		defer w.openLocksLock.Unlock()
		return len(w.openLocks)
	}()
	if w.noDeleteOpenFile && nOpenLocks > 0 {
		panic(fmt.Sprintf("MockDirectoryWrapper: cannot close: there are still open locks: %v", w.openLocks))
	}

	w.isOpen = false
	if w.checkIndexOnClose {
		w.randomErrorRate = 0
		w.randomErrorRateOnOpen = 0
		files, err := w.listAll()
		if err != nil {
			return err
		}
		if index.IsIndexFileExists(files) {
			log.Println("\nNOTE: MockDirectoryWrapper: now crash")
			err = w._crash() // corrupt any unsynced-files
			if err != nil {
				return err
			}
			log.Println("\nNOTE: MockDirectoryWrapper: now run CheckIndex")
			w.Unlock() // CheckIndex may access synchronized method
			CheckIndex(w, w.crossCheckTermVectorsOnClose)
			w.Lock() // CheckIndex may access synchronized method

			// TODO: factor this out / share w/ TestIW.assertNoUnreferencedFiles
			if w.assertNoUnreferencedFilesOnClose {
				// now look for unreferenced files: discount ones that we tried to delete but could not
				all, err := w.ListAll()
				if err != nil {
					return err
				}
				allFiles := make(map[string]bool)
				for _, name := range all {
					allFiles[name] = true
				}
				for name, _ := range pendingDeletions {
					delete(allFiles, name)
				}
				startFiles := make([]string, 0, len(allFiles))
				for k, _ := range allFiles {
					startFiles = append(startFiles, k)
				}
				iwc := index.NewIndexWriterConfig(TEST_VERSION_CURRENT, nil)
				iwc.SetIndexDeletionPolicy(index.NO_DELETION_POLICY)
				iw, err := index.NewIndexWriter(w.Directory, iwc)
				if err != nil {
					return err
				}
				err = iw.Rollback()
				if err != nil {
					return err
				}
				endFiles, err := w.Directory.ListAll()
				if err != nil {
					return err
				}

				hasSegmentsGenFile := sort.SearchStrings(endFiles, index.INDEX_FILENAME_SEGMENTS_GEN) >= 0
				if pendingDeletions["segments.gen"] && hasSegmentsGenFile {
					panic("not implemented yet")
				}

				// its possible we cannot delete the segments_N on windows if someone has it open and
				// maybe other files too, depending on timing. normally someone on windows wouldnt have
				// an issue (IFD would nuke this stuff eventually), but we pass NoDeletionPolicy...
				for _, file := range pendingDeletions {
					log.Println(file)
					panic("not implemented yet")
				}

				sort.Strings(startFiles)
				startFiles = uniqueStrings(startFiles)
				sort.Strings(endFiles)
				endFiles = uniqueStrings(endFiles)

				if !reflect.DeepEqual(startFiles, endFiles) {
					panic("not implemented")
				}

				ir1, err := index.OpenDirectoryReader(w)
				if err != nil {
					return err
				}
				numDocs1 := ir1.NumDocs()
				err = ir1.Close()
				if err != nil {
					return err
				}
				iw, err = index.NewIndexWriter(w, index.NewIndexWriterConfig(TEST_VERSION_CURRENT, nil))
				if err != nil {
					return err
				}
				err = iw.Close()
				if err != nil {
					return err
				}
				ir2, err := index.OpenDirectoryReader(w)
				if err != nil {
					return err
				}
				numDocs2 := ir2.NumDocs()
				err = ir2.Close()
				if err != nil {
					return err
				}
				assert2(numDocs1 == numDocs2, fmt.Sprintf("numDocs changed after opening/closing IW: before=%v after=%v", numDocs1, numDocs2))
			}
		}
	}
	return w.Directory.Close()
}