func TestConstantScorerWithQueryNorm(t *testing.T) { scorer := NewConstantScorer(1, 1, true) scorer.SetQueryNorm(2.0) tests := []struct { termMatch *index.TermFieldDoc result *search.DocumentMatch }{ { termMatch: &index.TermFieldDoc{ ID: index.IndexInternalID("one"), Freq: 1, Norm: 1.0, }, result: &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("one"), Score: 2.0, Sort: []string{}, Expl: &search.Explanation{ Value: 2.0, Message: "weight(^1.000000), product of:", Children: []*search.Explanation{ { Value: 2.0, Message: "ConstantScore()^1.000000, product of:", Children: []*search.Explanation{ { Value: 1, Message: "boost", }, { Value: 2, Message: "queryNorm", }, }, }, { Value: 1.0, Message: "ConstantScore()", }, }, }, }, }, } for _, test := range tests { ctx := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(1, 0), } actual := scorer.Score(ctx, test.termMatch.ID) if !reflect.DeepEqual(actual, test.result) { t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch) } } }
func TestIndexTermReaderCompositeFields(t *testing.T) { defer func() { err := DestroyTest() if err != nil { t.Fatal(err) } }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) if err != nil { t.Fatal(err) } err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } defer func() { err := idx.Close() if err != nil { t.Fatal(err) } }() doc := document.NewDocument("1") doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.IndexField|document.StoreField|document.IncludeTermVectors)) doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), document.IndexField|document.StoreField|document.IncludeTermVectors)) doc.AddField(document.NewCompositeFieldWithIndexingOptions("_all", true, nil, nil, document.IndexField|document.IncludeTermVectors)) err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } indexReader, err := idx.Reader() if err != nil { t.Error(err) } defer func() { err := indexReader.Close() if err != nil { t.Fatal(err) } }() termFieldReader, err := indexReader.TermFieldReader([]byte("mister"), "_all", true, true, true) if err != nil { t.Error(err) } tfd, err := termFieldReader.Next(nil) for tfd != nil && err == nil { if !tfd.ID.Equals(index.IndexInternalID("1")) { t.Errorf("expected to find document id 1") } tfd, err = termFieldReader.Next(nil) } if err != nil { t.Error(err) } }
func TestConstantScorer(t *testing.T) { scorer := NewConstantScorer(1, 1, true) tests := []struct { termMatch *index.TermFieldDoc result *search.DocumentMatch }{ // test some simple math { termMatch: &index.TermFieldDoc{ ID: index.IndexInternalID("one"), Freq: 1, Norm: 1.0, Vectors: []*index.TermFieldVector{ { Field: "desc", Pos: 1, Start: 0, End: 4, }, }, }, result: &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("one"), Score: 1.0, Expl: &search.Explanation{ Value: 1.0, Message: "ConstantScore()", }, Sort: []string{}, }, }, } for _, test := range tests { ctx := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(1, 0), } actual := scorer.Score(ctx, test.termMatch.ID) if !reflect.DeepEqual(actual, test.result) { t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch) } } }
func TestIndexDocumentFieldTerms(t *testing.T) { defer func() { err := DestroyTest() if err != nil { t.Fatal(err) } }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) if err != nil { t.Fatal(err) } err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } defer func() { err := idx.Close() if err != nil { t.Fatal(err) } }() doc := document.NewDocument("1") doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.IndexField|document.StoreField|document.IncludeTermVectors)) doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), document.IndexField|document.StoreField|document.IncludeTermVectors)) err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } indexReader, err := idx.Reader() if err != nil { t.Error(err) } defer func() { err := indexReader.Close() if err != nil { t.Fatal(err) } }() fieldTerms, err := indexReader.DocumentFieldTerms(index.IndexInternalID("1"), []string{"name", "title"}) if err != nil { t.Error(err) } expectedFieldTerms := index.FieldTerms{ "name": []string{"test"}, "title": []string{"mister"}, } if !reflect.DeepEqual(fieldTerms, expectedFieldTerms) { t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, fieldTerms) } }
func benchHelper(numOfMatches int, cc createCollector, b *testing.B) { matches := make([]*search.DocumentMatch, 0, numOfMatches) for i := 0; i < numOfMatches; i++ { matches = append(matches, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID(strconv.Itoa(i)), Score: rand.Float64(), }) } b.ResetTimer() for run := 0; run < b.N; run++ { searcher := &stubSearcher{ matches: matches, } collector := cc() err := collector.Collect(context.Background(), searcher, &stubReader{}) if err != nil { b.Fatal(err) } } }
func TestDisjunctionAdvance(t *testing.T) { twoDocIndexReader, err := twoDocIndex.Reader() if err != nil { t.Error(err) } defer func() { err := twoDocIndexReader.Close() if err != nil { t.Fatal(err) } }() martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true) if err != nil { t.Fatal(err) } dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true) if err != nil { t.Fatal(err) } martyOrDustinSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, true) if err != nil { t.Fatal(err) } ctx := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(martyOrDustinSearcher.DocumentMatchPoolSize(), 0), } match, err := martyOrDustinSearcher.Advance(ctx, index.IndexInternalID("3")) if err != nil { t.Errorf("unexpected error: %v", err) } if match == nil { t.Errorf("expected 3, got nil") } }
func TestIndexDocIdReader(t *testing.T) { defer func() { err := DestroyTest() if err != nil { t.Fatal(err) } }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) if err != nil { t.Fatal(err) } err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } defer func() { err := idx.Close() if err != nil { t.Fatal(err) } }() var expectedCount uint64 doc := document.NewDocument("1") doc.AddField(document.NewTextField("name", []uint64{}, []byte("test"))) err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } expectedCount++ doc = document.NewDocument("2") doc.AddField(document.NewTextField("name", []uint64{}, []byte("test test test"))) doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors)) err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } expectedCount++ indexReader, err := idx.Reader() if err != nil { t.Error(err) } defer func() { err := indexReader.Close() if err != nil { t.Error(err) } }() // first get all doc ids reader, err := indexReader.DocIDReaderAll() if err != nil { t.Errorf("Error accessing doc id reader: %v", err) } defer func() { err := reader.Close() if err != nil { t.Fatal(err) } }() id, err := reader.Next() count := uint64(0) for id != nil { count++ id, err = reader.Next() } if count != expectedCount { t.Errorf("expected %d, got %d", expectedCount, count) } // try it again, but jump to the second doc this time reader2, err := indexReader.DocIDReaderAll() if err != nil { t.Errorf("Error accessing doc id reader: %v", err) } defer func() { err := reader2.Close() if err != nil { t.Error(err) } }() id, err = reader2.Advance(index.IndexInternalID("2")) if err != nil { t.Error(err) } if !id.Equals(index.IndexInternalID("2")) { t.Errorf("expected to find id '2', got '%s'", id) } id, err = reader2.Advance(index.IndexInternalID("3")) if err != nil { t.Error(err) } if id != nil { t.Errorf("expected to find id '', got '%s'", id) } }
func TestTermSearcher(t *testing.T) { var queryTerm = "beer" var queryField = "desc" var queryBoost = 3.0 var queryExplain = true analysisQueue := index.NewAnalysisQueue(1) i, err := upsidedown.NewUpsideDownCouch( gtreap.Name, map[string]interface{}{ "path": "", }, analysisQueue) if err != nil { t.Fatal(err) } err = i.Open() if err != nil { t.Fatal(err) } err = i.Update(&document.Document{ ID: "a", Fields: []document.Field{ document.NewTextField("desc", []uint64{}, []byte("beer")), }, }) if err != nil { t.Fatal(err) } err = i.Update(&document.Document{ ID: "b", Fields: []document.Field{ document.NewTextField("desc", []uint64{}, []byte("beer")), }, }) if err != nil { t.Fatal(err) } err = i.Update(&document.Document{ ID: "c", Fields: []document.Field{ document.NewTextField("desc", []uint64{}, []byte("beer")), }, }) if err != nil { t.Fatal(err) } err = i.Update(&document.Document{ ID: "d", Fields: []document.Field{ document.NewTextField("desc", []uint64{}, []byte("beer")), }, }) if err != nil { t.Fatal(err) } err = i.Update(&document.Document{ ID: "e", Fields: []document.Field{ document.NewTextField("desc", []uint64{}, []byte("beer")), }, }) if err != nil { t.Fatal(err) } err = i.Update(&document.Document{ ID: "f", Fields: []document.Field{ document.NewTextField("desc", []uint64{}, []byte("beer")), }, }) if err != nil { t.Fatal(err) } err = i.Update(&document.Document{ ID: "g", Fields: []document.Field{ document.NewTextField("desc", []uint64{}, []byte("beer")), }, }) if err != nil { t.Fatal(err) } err = i.Update(&document.Document{ ID: "h", Fields: []document.Field{ document.NewTextField("desc", []uint64{}, []byte("beer")), }, }) if err != nil { t.Fatal(err) } err = i.Update(&document.Document{ ID: "i", Fields: []document.Field{ document.NewTextField("desc", []uint64{}, []byte("beer")), }, }) if err != nil { t.Fatal(err) } err = i.Update(&document.Document{ ID: "j", Fields: []document.Field{ document.NewTextField("title", []uint64{}, []byte("cat")), }, }) if err != nil { t.Fatal(err) } indexReader, err := i.Reader() if err != nil { t.Error(err) } defer func() { err := indexReader.Close() if err != nil { t.Fatal(err) } }() searcher, err := NewTermSearcher(indexReader, queryTerm, queryField, queryBoost, queryExplain) if err != nil { t.Fatal(err) } defer func() { err := searcher.Close() if err != nil { t.Fatal(err) } }() searcher.SetQueryNorm(2.0) docCount, err := indexReader.DocCount() if err != nil { t.Fatal(err) } idf := 1.0 + math.Log(float64(docCount)/float64(searcher.Count()+1.0)) expectedQueryWeight := 3 * idf * 3 * idf if expectedQueryWeight != searcher.Weight() { t.Errorf("expected weight %v got %v", expectedQueryWeight, searcher.Weight()) } if searcher.Count() != 9 { t.Errorf("expected count of 9, got %d", searcher.Count()) } ctx := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(1, 0), } docMatch, err := searcher.Next(ctx) if err != nil { t.Errorf("expected result, got %v", err) } if !docMatch.IndexInternalID.Equals(index.IndexInternalID("a")) { t.Errorf("expected result ID to be 'a', got '%s", docMatch.IndexInternalID) } ctx.DocumentMatchPool.Put(docMatch) docMatch, err = searcher.Advance(ctx, index.IndexInternalID("c")) if err != nil { t.Errorf("expected result, got %v", err) } if !docMatch.IndexInternalID.Equals(index.IndexInternalID("c")) { t.Errorf("expected result ID to be 'c' got '%s'", docMatch.IndexInternalID) } // try advancing past end ctx.DocumentMatchPool.Put(docMatch) docMatch, err = searcher.Advance(ctx, index.IndexInternalID("z")) if err != nil { t.Fatal(err) } if docMatch != nil { t.Errorf("expected nil, got %v", docMatch) } // try pushing next past end ctx.DocumentMatchPool.Put(docMatch) docMatch, err = searcher.Next(ctx) if err != nil { t.Fatal(err) } if docMatch != nil { t.Errorf("expected nil, got %v", docMatch) } }
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { analysisStart := time.Now() resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps)) var numUpdates uint64 var numPlainTextBytes uint64 for _, doc := range batch.IndexOps { if doc != nil { numUpdates++ numPlainTextBytes += doc.NumPlainTextBytes() } } go func() { for _, doc := range batch.IndexOps { if doc != nil { aw := index.NewAnalysisWork(udc, doc, resultChan) // put the work on the queue udc.analysisQueue.Queue(aw) } } }() // retrieve back index rows concurrent with analysis docBackIndexRowErr := error(nil) docBackIndexRowCh := make(chan *docBackIndexRow, len(batch.IndexOps)) udc.writeMutex.Lock() defer udc.writeMutex.Unlock() go func() { defer close(docBackIndexRowCh) // open a reader for backindex lookup var kvreader store.KVReader kvreader, err = udc.store.Reader() if err != nil { docBackIndexRowErr = err return } for docID, doc := range batch.IndexOps { backIndexRow, err := backIndexRowForDoc(kvreader, index.IndexInternalID(docID)) if err != nil { docBackIndexRowErr = err return } docBackIndexRowCh <- &docBackIndexRow{docID, doc, backIndexRow} } err = kvreader.Close() if err != nil { docBackIndexRowErr = err return } }() // wait for analysis result newRowsMap := make(map[string][]index.IndexRow) var itemsDeQueued uint64 for itemsDeQueued < numUpdates { result := <-resultChan newRowsMap[result.DocID] = result.Rows itemsDeQueued++ } close(resultChan) atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart))) docsAdded := uint64(0) docsDeleted := uint64(0) indexStart := time.Now() // prepare a list of rows var addRowsAll [][]UpsideDownCouchRow var updateRowsAll [][]UpsideDownCouchRow var deleteRowsAll [][]UpsideDownCouchRow // add the internal ops var updateRows []UpsideDownCouchRow var deleteRows []UpsideDownCouchRow for internalKey, internalValue := range batch.InternalOps { if internalValue == nil { // delete deleteInternalRow := NewInternalRow([]byte(internalKey), nil) deleteRows = append(deleteRows, deleteInternalRow) } else { updateInternalRow := NewInternalRow([]byte(internalKey), internalValue) updateRows = append(updateRows, updateInternalRow) } } if len(updateRows) > 0 { updateRowsAll = append(updateRowsAll, updateRows) } if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } // process back index rows as they arrive for dbir := range docBackIndexRowCh { if dbir.doc == nil && dbir.backIndexRow != nil { // delete deleteRows := udc.deleteSingle(dbir.docID, dbir.backIndexRow, nil) if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } docsDeleted++ } else if dbir.doc != nil { addRows, updateRows, deleteRows := udc.mergeOldAndNew(dbir.backIndexRow, newRowsMap[dbir.docID]) if len(addRows) > 0 { addRowsAll = append(addRowsAll, addRows) } if len(updateRows) > 0 { updateRowsAll = append(updateRowsAll, updateRows) } if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } if dbir.backIndexRow == nil { docsAdded++ } } } if docBackIndexRowErr != nil { return docBackIndexRowErr } // start a writer for this batch var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll) if err != nil { _ = kvwriter.Close() atomic.AddUint64(&udc.stats.errors, 1) return } err = kvwriter.Close() atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { udc.m.Lock() udc.docCount += docsAdded udc.docCount -= docsDeleted udc.m.Unlock() atomic.AddUint64(&udc.stats.updates, numUpdates) atomic.AddUint64(&udc.stats.deletes, docsDeleted) atomic.AddUint64(&udc.stats.batches, 1) atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes) } else { atomic.AddUint64(&udc.stats.errors, 1) } return }
func (udc *UpsideDownCouch) Delete(id string) (err error) { indexStart := time.Now() udc.writeMutex.Lock() defer udc.writeMutex.Unlock() // open a reader for backindex lookup var kvreader store.KVReader kvreader, err = udc.store.Reader() if err != nil { return } // first we lookup the backindex row for the doc id if it exists // lookup the back index row var backIndexRow *BackIndexRow backIndexRow, err = backIndexRowForDoc(kvreader, index.IndexInternalID(id)) if err != nil { _ = kvreader.Close() atomic.AddUint64(&udc.stats.errors, 1) return } err = kvreader.Close() if err != nil { return } if backIndexRow == nil { atomic.AddUint64(&udc.stats.deletes, 1) return } // start a writer for this delete var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } defer func() { if cerr := kvwriter.Close(); err == nil && cerr != nil { err = cerr } }() var deleteRowsAll [][]UpsideDownCouchRow deleteRows := udc.deleteSingle(id, backIndexRow, nil) if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } err = udc.batchRows(kvwriter, nil, nil, deleteRowsAll) if err == nil { udc.m.Lock() udc.docCount-- udc.m.Unlock() } atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { atomic.AddUint64(&udc.stats.deletes, 1) } else { atomic.AddUint64(&udc.stats.errors, 1) } return }
func TestIndexBatch(t *testing.T) { defer func() { err := DestroyTest() if err != nil { t.Fatal(err) } }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) if err != nil { t.Fatal(err) } err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } defer func() { err := idx.Close() if err != nil { t.Fatal(err) } }() var expectedCount uint64 // first create 2 docs the old fashioned way doc := document.NewDocument("1") doc.AddField(document.NewTextField("name", []uint64{}, []byte("test"))) err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } expectedCount++ doc = document.NewDocument("2") doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2"))) err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } expectedCount++ // now create a batch which does 3 things // insert new doc // update existing doc // delete existing doc // net document count change 0 batch := index.NewBatch() doc = document.NewDocument("3") doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3"))) batch.Update(doc) doc = document.NewDocument("2") doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2updated"))) batch.Update(doc) batch.Delete("1") err = idx.Batch(batch) if err != nil { t.Error(err) } indexReader, err := idx.Reader() if err != nil { t.Error(err) } defer func() { err := indexReader.Close() if err != nil { t.Fatal(err) } }() docCount, err := indexReader.DocCount() if err != nil { t.Fatal(err) } if docCount != expectedCount { t.Errorf("Expected document count to be %d got %d", expectedCount, docCount) } docIDReader, err := indexReader.DocIDReaderAll() if err != nil { t.Error(err) } var docIds []index.IndexInternalID docID, err := docIDReader.Next() for docID != nil && err == nil { docIds = append(docIds, docID) docID, err = docIDReader.Next() } if err != nil { t.Error(err) } expectedDocIds := []index.IndexInternalID{index.IndexInternalID("2"), index.IndexInternalID("3")} if !reflect.DeepEqual(docIds, expectedDocIds) { t.Errorf("expected ids: %v, got ids: %v", expectedDocIds, docIds) } }
func TestPaginationSameScores(t *testing.T) { // a stub search with more than 10 matches // all documents have the same score searcher := &stubSearcher{ matches: []*search.DocumentMatch{ &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("a"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("b"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("c"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("d"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("e"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("f"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("g"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("h"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("i"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("j"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("k"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("l"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("m"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("n"), Score: 5, }, }, } // first get first 5 hits collector := NewTopNCollector(5, 0, search.SortOrder{&search.SortScore{Desc: true}}) err := collector.Collect(context.Background(), searcher, &stubReader{}) if err != nil { t.Fatal(err) } total := collector.Total() if total != 14 { t.Errorf("expected 14 total results, got %d", total) } results := collector.Results() if len(results) != 5 { t.Fatalf("expected 5 results, got %d", len(results)) } firstResults := make(map[string]struct{}) for _, hit := range results { firstResults[hit.ID] = struct{}{} } // a stub search with more than 10 matches // all documents have the same score searcher = &stubSearcher{ matches: []*search.DocumentMatch{ &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("a"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("b"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("c"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("d"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("e"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("f"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("g"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("h"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("i"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("j"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("k"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("l"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("m"), Score: 5, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("n"), Score: 5, }, }, } // now get next 5 hits collector = NewTopNCollector(5, 5, search.SortOrder{&search.SortScore{Desc: true}}) err = collector.Collect(context.Background(), searcher, &stubReader{}) if err != nil { t.Fatal(err) } total = collector.Total() if total != 14 { t.Errorf("expected 14 total results, got %d", total) } results = collector.Results() if len(results) != 5 { t.Fatalf("expected 5 results, got %d", len(results)) } // make sure that none of these hits repeat ones we saw in the top 5 for _, hit := range results { if _, ok := firstResults[hit.ID]; ok { t.Errorf("doc ID %s is in top 5 and next 5 result sets", hit.ID) } } }
func TestPhraseSearch(t *testing.T) { twoDocIndexReader, err := twoDocIndex.Reader() if err != nil { t.Error(err) } defer func() { err := twoDocIndexReader.Close() if err != nil { t.Fatal(err) } }() angstTermSearcher, err := NewTermSearcher(twoDocIndexReader, "angst", "desc", 1.0, true) if err != nil { t.Fatal(err) } beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true) if err != nil { t.Fatal(err) } mustSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{angstTermSearcher, beerTermSearcher}, true) if err != nil { t.Fatal(err) } phraseSearcher, err := NewPhraseSearcher(twoDocIndexReader, mustSearcher, []string{"angst", "beer"}) if err != nil { t.Fatal(err) } tests := []struct { searcher search.Searcher results []*search.DocumentMatch }{ { searcher: phraseSearcher, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("2"), Score: 1.0807601687084403, }, }, }, } for testIndex, test := range tests { defer func() { err := test.searcher.Close() if err != nil { t.Fatal(err) } }() ctx := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0), } next, err := test.searcher.Next(ctx) i := 0 for err == nil && next != nil { if i < len(test.results) { if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) { t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex) } if next.Score != test.results[i].Score { t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex) t.Logf("scoring explanation: %s", next.Expl) } } ctx.DocumentMatchPool.Put(next) next, err = test.searcher.Next(ctx) i++ } if err != nil { t.Fatalf("error iterating searcher: %v for test %d", err, testIndex) } if len(test.results) != i { t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex) } } }
func TestRegexpSearch(t *testing.T) { twoDocIndexReader, err := twoDocIndex.Reader() if err != nil { t.Error(err) } defer func() { err := twoDocIndexReader.Close() if err != nil { t.Fatal(err) } }() pattern, err := regexp.Compile("ma.*") if err != nil { t.Fatal(err) } regexpSearcher, err := NewRegexpSearcher(twoDocIndexReader, pattern, "name", 1.0, true) if err != nil { t.Fatal(err) } patternCo, err := regexp.Compile("co.*") if err != nil { t.Fatal(err) } regexpSearcherCo, err := NewRegexpSearcher(twoDocIndexReader, patternCo, "desc", 1.0, true) if err != nil { t.Fatal(err) } tests := []struct { searcher search.Searcher results []*search.DocumentMatch }{ { searcher: regexpSearcher, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("1"), Score: 1.916290731874155, }, }, }, { searcher: regexpSearcherCo, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("2"), Score: 0.33875554280828685, }, { IndexInternalID: index.IndexInternalID("3"), Score: 0.33875554280828685, }, }, }, } for testIndex, test := range tests { defer func() { err := test.searcher.Close() if err != nil { t.Fatal(err) } }() ctx := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0), } next, err := test.searcher.Next(ctx) i := 0 for err == nil && next != nil { if i < len(test.results) { if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) { t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex) } if next.Score != test.results[i].Score { t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex) t.Logf("scoring explanation: %s", next.Expl) } } ctx.DocumentMatchPool.Put(next) next, err = test.searcher.Next(ctx) i++ } if err != nil { t.Fatalf("error iterating searcher: %v for test %d", err, testIndex) } if len(test.results) != i { t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex) } } }
func testDocIDSearcher(t *testing.T, indexed, searched, wanted []string) { analysisQueue := index.NewAnalysisQueue(1) i, err := upside_down.NewUpsideDownCouch(gtreap.Name, nil, analysisQueue) if err != nil { t.Fatal(err) } err = i.Open() if err != nil { t.Fatal(err) } for _, id := range indexed { err = i.Update(&document.Document{ ID: id, Fields: []document.Field{ document.NewTextField("desc", []uint64{}, []byte("beer")), }, }) if err != nil { t.Fatal(err) } } indexReader, err := i.Reader() if err != nil { t.Error(err) } defer func() { err := indexReader.Close() if err != nil { t.Fatal(err) } }() searcher, err := NewDocIDSearcher(indexReader, searched, 1.0, false) if err != nil { t.Fatal(err) } defer func() { err := searcher.Close() if err != nil { t.Fatal(err) } }() ctx := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(searcher.DocumentMatchPoolSize(), 0), } // Check the sequence for i, id := range wanted { m, err := searcher.Next(ctx) if err != nil { t.Fatal(err) } if !index.IndexInternalID(id).Equals(m.IndexInternalID) { t.Fatalf("expected %v at position %v, got %v", id, i, m.IndexInternalID) } ctx.DocumentMatchPool.Put(m) } m, err := searcher.Next(ctx) if err != nil { t.Fatal(err) } if m != nil { t.Fatalf("expected nil past the end of the sequence, got %v", m.IndexInternalID) } ctx.DocumentMatchPool.Put(m) // Check seeking for _, id := range wanted { if len(id) != 2 { t.Fatalf("expected identifier must be 2 characters long, got %v", id) } before := id[:1] for _, target := range []string{before, id} { m, err := searcher.Advance(ctx, index.IndexInternalID(target)) if err != nil { t.Fatal(err) } if m == nil || !m.IndexInternalID.Equals(index.IndexInternalID(id)) { t.Fatalf("advancing to %v returned %v instead of %v", before, m, id) } ctx.DocumentMatchPool.Put(m) } } // Seek after the end of the sequence after := "zzz" m, err = searcher.Advance(ctx, index.IndexInternalID(after)) if err != nil { t.Fatal(err) } if m != nil { t.Fatalf("advancing past the end of the sequence should return nil, got %v", m) } ctx.DocumentMatchPool.Put(m) }
func TestConjunctionSearch(t *testing.T) { twoDocIndexReader, err := twoDocIndex.Reader() if err != nil { t.Error(err) } defer func() { err := twoDocIndexReader.Close() if err != nil { t.Fatal(err) } }() // test 0 beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true) if err != nil { t.Fatal(err) } martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 5.0, true) if err != nil { t.Fatal(err) } beerAndMartySearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher, martyTermSearcher}, true) if err != nil { t.Fatal(err) } // test 1 angstTermSearcher, err := NewTermSearcher(twoDocIndexReader, "angst", "desc", 1.0, true) if err != nil { t.Fatal(err) } beerTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true) if err != nil { t.Fatal(err) } angstAndBeerSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{angstTermSearcher, beerTermSearcher2}, true) if err != nil { t.Fatal(err) } // test 2 beerTermSearcher3, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true) if err != nil { t.Fatal(err) } jackTermSearcher, err := NewTermSearcher(twoDocIndexReader, "jack", "name", 5.0, true) if err != nil { t.Fatal(err) } beerAndJackSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher3, jackTermSearcher}, true) if err != nil { t.Fatal(err) } // test 3 beerTermSearcher4, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true) if err != nil { t.Fatal(err) } misterTermSearcher, err := NewTermSearcher(twoDocIndexReader, "mister", "title", 5.0, true) if err != nil { t.Fatal(err) } beerAndMisterSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher4, misterTermSearcher}, true) if err != nil { t.Fatal(err) } // test 4 couchbaseTermSearcher, err := NewTermSearcher(twoDocIndexReader, "couchbase", "street", 1.0, true) if err != nil { t.Fatal(err) } misterTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "mister", "title", 5.0, true) if err != nil { t.Fatal(err) } couchbaseAndMisterSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{couchbaseTermSearcher, misterTermSearcher2}, true) if err != nil { t.Fatal(err) } // test 5 beerTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 5.0, true) if err != nil { t.Fatal(err) } couchbaseTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "couchbase", "street", 1.0, true) if err != nil { t.Fatal(err) } misterTermSearcher3, err := NewTermSearcher(twoDocIndexReader, "mister", "title", 5.0, true) if err != nil { t.Fatal(err) } couchbaseAndMisterSearcher2, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{couchbaseTermSearcher2, misterTermSearcher3}, true) if err != nil { t.Fatal(err) } beerAndCouchbaseAndMisterSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher5, couchbaseAndMisterSearcher2}, true) if err != nil { t.Fatal(err) } tests := []struct { searcher search.Searcher results []*search.DocumentMatch }{ { searcher: beerAndMartySearcher, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("1"), Score: 2.0097428702814377, }, }, }, { searcher: angstAndBeerSearcher, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("2"), Score: 1.0807601687084403, }, }, }, { searcher: beerAndJackSearcher, results: []*search.DocumentMatch{}, }, { searcher: beerAndMisterSearcher, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("2"), Score: 1.2877980334016337, }, { IndexInternalID: index.IndexInternalID("3"), Score: 1.2877980334016337, }, }, }, { searcher: couchbaseAndMisterSearcher, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("2"), Score: 1.4436599157093672, }, }, }, { searcher: beerAndCouchbaseAndMisterSearcher, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("2"), Score: 1.441614953806971, }, }, }, } for testIndex, test := range tests { defer func() { err := test.searcher.Close() if err != nil { t.Fatal(err) } }() ctx := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(10, 0), } next, err := test.searcher.Next(ctx) i := 0 for err == nil && next != nil { if i < len(test.results) { if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) { t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex) } if !scoresCloseEnough(next.Score, test.results[i].Score) { t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex) t.Logf("scoring explanation: %s", next.Expl) } } next, err = test.searcher.Next(ctx) i++ } if err != nil { t.Fatalf("error iterating searcher: %v for test %d", err, testIndex) } if len(test.results) != i { t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex) } } }
func TestTop10ScoresSkip10Only9Hits(t *testing.T) { // a stub search with only 10 matches searcher := &stubSearcher{ matches: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("a"), Score: 11, }, { IndexInternalID: index.IndexInternalID("c"), Score: 11, }, { IndexInternalID: index.IndexInternalID("e"), Score: 11, }, { IndexInternalID: index.IndexInternalID("g"), Score: 11, }, { IndexInternalID: index.IndexInternalID("i"), Score: 11, }, { IndexInternalID: index.IndexInternalID("j"), Score: 11, }, { IndexInternalID: index.IndexInternalID("k"), Score: 11, }, { IndexInternalID: index.IndexInternalID("m"), Score: 11, }, { IndexInternalID: index.IndexInternalID("n"), Score: 11, }, }, } collector := NewTopNCollector(10, 10, search.SortOrder{&search.SortScore{Desc: true}}) err := collector.Collect(context.Background(), searcher, &stubReader{}) if err != nil { t.Fatal(err) } total := collector.Total() if total != 9 { t.Errorf("expected 9 total results, got %d", total) } results := collector.Results() if len(results) != 0 { t.Fatalf("expected 0 results, got %d", len(results)) } }
func TestDisjunctionSearch(t *testing.T) { twoDocIndexReader, err := twoDocIndex.Reader() if err != nil { t.Error(err) } defer func() { err := twoDocIndexReader.Close() if err != nil { t.Fatal(err) } }() martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true) if err != nil { t.Fatal(err) } dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true) if err != nil { t.Fatal(err) } martyOrDustinSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, true) if err != nil { t.Fatal(err) } martyTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true) if err != nil { t.Fatal(err) } dustinTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true) if err != nil { t.Fatal(err) } martyOrDustinSearcher2, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, true) if err != nil { t.Fatal(err) } raviTermSearcher, err := NewTermSearcher(twoDocIndexReader, "ravi", "name", 1.0, true) if err != nil { t.Fatal(err) } nestedRaviOrMartyOrDustinSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{raviTermSearcher, martyOrDustinSearcher2}, 0, true) if err != nil { t.Fatal(err) } tests := []struct { searcher search.Searcher results []*search.DocumentMatch }{ { searcher: martyOrDustinSearcher, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("1"), Score: 0.6775110856165737, }, { IndexInternalID: index.IndexInternalID("3"), Score: 0.6775110856165737, }, }, }, // test a nested disjunction { searcher: nestedRaviOrMartyOrDustinSearcher, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("1"), Score: 0.2765927424732821, }, { IndexInternalID: index.IndexInternalID("3"), Score: 0.2765927424732821, }, { IndexInternalID: index.IndexInternalID("4"), Score: 0.5531854849465642, }, }, }, } for testIndex, test := range tests { defer func() { err := test.searcher.Close() if err != nil { t.Fatal(err) } }() ctx := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0), } next, err := test.searcher.Next(ctx) i := 0 for err == nil && next != nil { if i < len(test.results) { if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) { t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex) } if !scoresCloseEnough(next.Score, test.results[i].Score) { t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex) t.Logf("scoring explanation: %s", next.Expl) } } ctx.DocumentMatchPool.Put(next) next, err = test.searcher.Next(ctx) i++ } if err != nil { t.Fatalf("error iterating searcher: %v for test %d", err, testIndex) } if len(test.results) != i { t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex) } } }
func TestIndexReader(t *testing.T) { defer func() { err := DestroyTest() if err != nil { t.Fatal(err) } }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) if err != nil { t.Fatal(err) } err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } defer func() { err := idx.Close() if err != nil { t.Fatal(err) } }() var expectedCount uint64 doc := document.NewDocument("1") doc.AddField(document.NewTextField("name", []uint64{}, []byte("test"))) err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } expectedCount++ doc = document.NewDocument("2") doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer)) doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors, testAnalyzer)) err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } expectedCount++ indexReader, err := idx.Reader() if err != nil { t.Error(err) } defer func() { err := indexReader.Close() if err != nil { t.Fatal(err) } }() // first look for a term that doesn't exist reader, err := indexReader.TermFieldReader([]byte("nope"), "name", true, true, true) if err != nil { t.Errorf("Error accessing term field reader: %v", err) } count := reader.Count() if count != 0 { t.Errorf("Expected doc count to be: %d got: %d", 0, count) } err = reader.Close() if err != nil { t.Fatal(err) } reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true) if err != nil { t.Errorf("Error accessing term field reader: %v", err) } expectedCount = 2 count = reader.Count() if count != expectedCount { t.Errorf("Exptected doc count to be: %d got: %d", expectedCount, count) } var match *index.TermFieldDoc var actualCount uint64 match, err = reader.Next(nil) for err == nil && match != nil { match, err = reader.Next(nil) if err != nil { t.Errorf("unexpected error reading next") } actualCount++ } if actualCount != count { t.Errorf("count was 2, but only saw %d", actualCount) } expectedMatch := &index.TermFieldDoc{ ID: index.IndexInternalID("2"), Freq: 1, Norm: 0.5773502588272095, Vectors: []*index.TermFieldVector{ { Field: "desc", Pos: 3, Start: 9, End: 13, }, }, } tfr, err := indexReader.TermFieldReader([]byte("rice"), "desc", true, true, true) if err != nil { t.Errorf("unexpected error: %v", err) } match, err = tfr.Next(nil) if err != nil { t.Errorf("unexpected error: %v", err) } if !reflect.DeepEqual(expectedMatch, match) { t.Errorf("got %#v, expected %#v", match, expectedMatch) } err = reader.Close() if err != nil { t.Fatal(err) } // now test usage of advance reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true) if err != nil { t.Errorf("Error accessing term field reader: %v", err) } match, err = reader.Advance(index.IndexInternalID("2"), nil) if err != nil { t.Errorf("unexpected error: %v", err) } if match == nil { t.Fatalf("Expected match, got nil") } if !match.ID.Equals(index.IndexInternalID("2")) { t.Errorf("Expected ID '2', got '%s'", match.ID) } match, err = reader.Advance(index.IndexInternalID("3"), nil) if err != nil { t.Errorf("unexpected error: %v", err) } if match != nil { t.Errorf("expected nil, got %v", match) } err = reader.Close() if err != nil { t.Fatal(err) } // now test creating a reader for a field that doesn't exist reader, err = indexReader.TermFieldReader([]byte("water"), "doesnotexist", true, true, true) if err != nil { t.Errorf("Error accessing term field reader: %v", err) } count = reader.Count() if count != 0 { t.Errorf("expected count 0 for reader of non-existent field") } match, err = reader.Next(nil) if err != nil { t.Errorf("unexpected error: %v", err) } if match != nil { t.Errorf("expected nil, got %v", match) } match, err = reader.Advance(index.IndexInternalID("anywhere"), nil) if err != nil { t.Errorf("unexpected error: %v", err) } if match != nil { t.Errorf("expected nil, got %v", match) } }
func TestBooleanSearch(t *testing.T) { if twoDocIndex == nil { t.Fatal("its null") } twoDocIndexReader, err := twoDocIndex.Reader() if err != nil { t.Error(err) } defer func() { err := twoDocIndexReader.Close() if err != nil { t.Fatal(err) } }() // test 0 beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true) if err != nil { t.Fatal(err) } mustSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher}, true) if err != nil { t.Fatal(err) } martyTermSearcher, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true) if err != nil { t.Fatal(err) } dustinTermSearcher, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true) if err != nil { t.Fatal(err) } shouldSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, true) if err != nil { t.Fatal(err) } steveTermSearcher, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true) if err != nil { t.Fatal(err) } mustNotSearcher, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher}, 0, true) if err != nil { t.Fatal(err) } booleanSearcher, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher, shouldSearcher, mustNotSearcher, true) if err != nil { t.Fatal(err) } // test 1 martyTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true) if err != nil { t.Fatal(err) } dustinTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true) if err != nil { t.Fatal(err) } shouldSearcher2, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, true) if err != nil { t.Fatal(err) } steveTermSearcher2, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true) if err != nil { t.Fatal(err) } mustNotSearcher2, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher2}, 0, true) if err != nil { t.Fatal(err) } booleanSearcher2, err := NewBooleanSearcher(twoDocIndexReader, nil, shouldSearcher2, mustNotSearcher2, true) if err != nil { t.Fatal(err) } // test 2 steveTermSearcher3, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true) if err != nil { t.Fatal(err) } mustNotSearcher3, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher3}, 0, true) if err != nil { t.Fatal(err) } booleanSearcher3, err := NewBooleanSearcher(twoDocIndexReader, nil, nil, mustNotSearcher3, true) if err != nil { t.Fatal(err) } // test 3 beerTermSearcher4, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true) if err != nil { t.Fatal(err) } mustSearcher4, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher4}, true) if err != nil { t.Fatal(err) } steveTermSearcher4, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true) if err != nil { t.Fatal(err) } mustNotSearcher4, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher4}, 0, true) if err != nil { t.Fatal(err) } booleanSearcher4, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher4, nil, mustNotSearcher4, true) if err != nil { t.Fatal(err) } // test 4 beerTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true) if err != nil { t.Fatal(err) } mustSearcher5, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher5}, true) if err != nil { t.Fatal(err) } steveTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true) if err != nil { t.Fatal(err) } martyTermSearcher5, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true) if err != nil { t.Fatal(err) } mustNotSearcher5, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher5, martyTermSearcher5}, 0, true) if err != nil { t.Fatal(err) } booleanSearcher5, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher5, nil, mustNotSearcher5, true) if err != nil { t.Fatal(err) } // test 5 beerTermSearcher6, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true) if err != nil { t.Fatal(err) } mustSearcher6, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher6}, true) if err != nil { t.Fatal(err) } martyTermSearcher6, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true) if err != nil { t.Fatal(err) } dustinTermSearcher6, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true) if err != nil { t.Fatal(err) } shouldSearcher6, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher6, dustinTermSearcher6}, 2, true) if err != nil { t.Fatal(err) } booleanSearcher6, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher6, shouldSearcher6, nil, true) if err != nil { t.Fatal(err) } // test 6 beerTermSearcher7, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true) if err != nil { t.Fatal(err) } mustSearcher7, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher7}, true) if err != nil { t.Fatal(err) } booleanSearcher7, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher7, nil, nil, true) if err != nil { t.Fatal(err) } martyTermSearcher7, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 5.0, true) if err != nil { t.Fatal(err) } conjunctionSearcher7, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher7, booleanSearcher7}, true) // test 7 beerTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, true) if err != nil { t.Fatal(err) } mustSearcher8, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{beerTermSearcher8}, true) if err != nil { t.Fatal(err) } martyTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "marty", "name", 1.0, true) if err != nil { t.Fatal(err) } dustinTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 1.0, true) if err != nil { t.Fatal(err) } shouldSearcher8, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{martyTermSearcher8, dustinTermSearcher8}, 0, true) if err != nil { t.Fatal(err) } steveTermSearcher8, err := NewTermSearcher(twoDocIndexReader, "steve", "name", 1.0, true) if err != nil { t.Fatal(err) } mustNotSearcher8, err := NewDisjunctionSearcher(twoDocIndexReader, []search.Searcher{steveTermSearcher8}, 0, true) if err != nil { t.Fatal(err) } booleanSearcher8, err := NewBooleanSearcher(twoDocIndexReader, mustSearcher8, shouldSearcher8, mustNotSearcher8, true) if err != nil { t.Fatal(err) } dustinTermSearcher8a, err := NewTermSearcher(twoDocIndexReader, "dustin", "name", 5.0, true) if err != nil { t.Fatal(err) } conjunctionSearcher8, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{booleanSearcher8, dustinTermSearcher8a}, true) if err != nil { t.Fatal(err) } tests := []struct { searcher search.Searcher results []*search.DocumentMatch }{ { searcher: booleanSearcher, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("1"), Score: 0.9818005051949021, }, { IndexInternalID: index.IndexInternalID("3"), Score: 0.808709699395535, }, { IndexInternalID: index.IndexInternalID("4"), Score: 0.34618161159873423, }, }, }, { searcher: booleanSearcher2, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("1"), Score: 0.6775110856165737, }, { IndexInternalID: index.IndexInternalID("3"), Score: 0.6775110856165737, }, }, }, // no MUST or SHOULD clauses yields no results { searcher: booleanSearcher3, results: []*search.DocumentMatch{}, }, { searcher: booleanSearcher4, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("1"), Score: 1.0, }, { IndexInternalID: index.IndexInternalID("3"), Score: 0.5, }, { IndexInternalID: index.IndexInternalID("4"), Score: 1.0, }, }, }, { searcher: booleanSearcher5, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("3"), Score: 0.5, }, { IndexInternalID: index.IndexInternalID("4"), Score: 1.0, }, }, }, { searcher: booleanSearcher6, results: []*search.DocumentMatch{}, }, // test a conjunction query with a nested boolean { searcher: conjunctionSearcher7, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("1"), Score: 2.0097428702814377, }, }, }, { searcher: conjunctionSearcher8, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("3"), Score: 2.0681575785068107, }, }, }, } for testIndex, test := range tests { defer func() { err := test.searcher.Close() if err != nil { t.Fatal(err) } }() ctx := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0), } next, err := test.searcher.Next(ctx) i := 0 for err == nil && next != nil { if i < len(test.results) { if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) { t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex) } if !scoresCloseEnough(next.Score, test.results[i].Score) { t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex) t.Logf("scoring explanation: %s", next.Expl) } } ctx.DocumentMatchPool.Put(next) next, err = test.searcher.Next(ctx) i++ } if err != nil { t.Fatalf("error iterating searcher: %v for test %d", err, testIndex) } if len(test.results) != i { t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex) } } }
func TestIndexDocIdOnlyReader(t *testing.T) { defer func() { err := DestroyTest() if err != nil { t.Fatal(err) } }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) if err != nil { t.Fatal(err) } err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } defer func() { err := idx.Close() if err != nil { t.Fatal(err) } }() doc := document.NewDocument("1") err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } doc = document.NewDocument("3") err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } doc = document.NewDocument("5") err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } doc = document.NewDocument("7") err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } doc = document.NewDocument("9") err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } indexReader, err := idx.Reader() if err != nil { t.Error(err) } defer func() { err := indexReader.Close() if err != nil { t.Error(err) } }() onlyIds := []string{"1", "5", "9"} reader, err := indexReader.DocIDReaderOnly(onlyIds) if err != nil { t.Errorf("Error accessing doc id reader: %v", err) } defer func() { err := reader.Close() if err != nil { t.Fatal(err) } }() id, err := reader.Next() count := uint64(0) for id != nil { count++ id, err = reader.Next() } if count != 3 { t.Errorf("expected 3, got %d", count) } // try it again, but jump reader2, err := indexReader.DocIDReaderOnly(onlyIds) if err != nil { t.Errorf("Error accessing doc id reader: %v", err) } defer func() { err := reader2.Close() if err != nil { t.Error(err) } }() id, err = reader2.Advance(index.IndexInternalID("5")) if err != nil { t.Error(err) } if !id.Equals(index.IndexInternalID("5")) { t.Errorf("expected to find id '5', got '%s'", id) } id, err = reader2.Advance(index.IndexInternalID("a")) if err != nil { t.Error(err) } if id != nil { t.Errorf("expected to find id '', got '%s'", id) } // some keys aren't actually there onlyIds = []string{"0", "2", "4", "5", "6", "8", "a"} reader3, err := indexReader.DocIDReaderOnly(onlyIds) if err != nil { t.Errorf("Error accessing doc id reader: %v", err) } defer func() { err := reader3.Close() if err != nil { t.Error(err) } }() id, err = reader3.Next() count = uint64(0) for id != nil { count++ id, err = reader3.Next() } if count != 1 { t.Errorf("expected 1, got %d", count) } // mix advance and next onlyIds = []string{"0", "1", "3", "5", "6", "9"} reader4, err := indexReader.DocIDReaderOnly(onlyIds) if err != nil { t.Errorf("Error accessing doc id reader: %v", err) } defer func() { err := reader4.Close() if err != nil { t.Error(err) } }() // first key is "1" id, err = reader4.Next() if err != nil { t.Error(err) } if !id.Equals(index.IndexInternalID("1")) { t.Errorf("expected to find id '1', got '%s'", id) } // advancing to key we dont have gives next id, err = reader4.Advance(index.IndexInternalID("2")) if err != nil { t.Error(err) } if !id.Equals(index.IndexInternalID("3")) { t.Errorf("expected to find id '3', got '%s'", id) } // next after advance works id, err = reader4.Next() if err != nil { t.Error(err) } if !id.Equals(index.IndexInternalID("5")) { t.Errorf("expected to find id '5', got '%s'", id) } // advancing to key we do have works id, err = reader4.Advance(index.IndexInternalID("9")) if err != nil { t.Error(err) } if !id.Equals(index.IndexInternalID("9")) { t.Errorf("expected to find id '9', got '%s'", id) } // advance backwards at end id, err = reader4.Advance(index.IndexInternalID("4")) if err != nil { t.Error(err) } if !id.Equals(index.IndexInternalID("5")) { t.Errorf("expected to find id '5', got '%s'", id) } // next after advance works id, err = reader4.Next() if err != nil { t.Error(err) } if !id.Equals(index.IndexInternalID("9")) { t.Errorf("expected to find id '9', got '%s'", id) } // advance backwards to key that exists, but not in only set id, err = reader4.Advance(index.IndexInternalID("7")) if err != nil { t.Error(err) } if !id.Equals(index.IndexInternalID("9")) { t.Errorf("expected to find id '9', got '%s'", id) } }
func TestMatchAllSearch(t *testing.T) { twoDocIndexReader, err := twoDocIndex.Reader() if err != nil { t.Error(err) } defer func() { err := twoDocIndexReader.Close() if err != nil { t.Fatal(err) } }() allSearcher, err := NewMatchAllSearcher(twoDocIndexReader, 1.0, true) if err != nil { t.Fatal(err) } allSearcher2, err := NewMatchAllSearcher(twoDocIndexReader, 1.2, true) if err != nil { t.Fatal(err) } tests := []struct { searcher search.Searcher queryNorm float64 results []*search.DocumentMatch }{ { searcher: allSearcher, queryNorm: 1.0, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("1"), Score: 1.0, }, { IndexInternalID: index.IndexInternalID("2"), Score: 1.0, }, { IndexInternalID: index.IndexInternalID("3"), Score: 1.0, }, { IndexInternalID: index.IndexInternalID("4"), Score: 1.0, }, { IndexInternalID: index.IndexInternalID("5"), Score: 1.0, }, }, }, { searcher: allSearcher2, queryNorm: 0.8333333, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("1"), Score: 1.0, }, { IndexInternalID: index.IndexInternalID("2"), Score: 1.0, }, { IndexInternalID: index.IndexInternalID("3"), Score: 1.0, }, { IndexInternalID: index.IndexInternalID("4"), Score: 1.0, }, { IndexInternalID: index.IndexInternalID("5"), Score: 1.0, }, }, }, } for testIndex, test := range tests { if test.queryNorm != 1.0 { test.searcher.SetQueryNorm(test.queryNorm) } defer func() { err := test.searcher.Close() if err != nil { t.Fatal(err) } }() ctx := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0), } next, err := test.searcher.Next(ctx) i := 0 for err == nil && next != nil { if i < len(test.results) { if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) { t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex) } if !scoresCloseEnough(next.Score, test.results[i].Score) { t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex) t.Logf("scoring explanation: %s", next.Expl) } } ctx.DocumentMatchPool.Put(next) next, err = test.searcher.Next(ctx) i++ } if err != nil { t.Fatalf("error iterating searcher: %v for test %d", err, testIndex) } if len(test.results) != i { t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex) } } }
func TestTop10Scores(t *testing.T) { // a stub search with more than 10 matches // the top-10 scores are > 10 // everything else is less than 10 searcher := &stubSearcher{ matches: []*search.DocumentMatch{ &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("a"), Score: 11, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("b"), Score: 9, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("c"), Score: 11, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("d"), Score: 9, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("e"), Score: 11, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("f"), Score: 9, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("g"), Score: 11, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("h"), Score: 9, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("i"), Score: 11, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("j"), Score: 11, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("k"), Score: 11, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("l"), Score: 99, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("m"), Score: 11, }, &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("n"), Score: 11, }, }, } collector := NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}}) err := collector.Collect(context.Background(), searcher, &stubReader{}) if err != nil { t.Fatal(err) } maxScore := collector.MaxScore() if maxScore != 99.0 { t.Errorf("expected max score 99.0, got %f", maxScore) } total := collector.Total() if total != 14 { t.Errorf("expected 14 total results, got %d", total) } results := collector.Results() if len(results) != 10 { t.Logf("results: %v", results) t.Fatalf("expected 10 results, got %d", len(results)) } if results[0].ID != "l" { t.Errorf("expected first result to have ID 'l', got %s", results[0].ID) } if results[0].Score != 99.0 { t.Errorf("expected highest score to be 99.0, got %f", results[0].Score) } minScore := 1000.0 for _, result := range results { if result.Score < minScore { minScore = result.Score } } if minScore < 10 { t.Errorf("expected minimum score to be higher than 10, got %f", minScore) } }
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { // do analysis before acquiring write lock analysisStart := time.Now() numPlainTextBytes := doc.NumPlainTextBytes() resultChan := make(chan *index.AnalysisResult) aw := index.NewAnalysisWork(udc, doc, resultChan) // put the work on the queue udc.analysisQueue.Queue(aw) // wait for the result result := <-resultChan close(resultChan) atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart))) udc.writeMutex.Lock() defer udc.writeMutex.Unlock() // open a reader for backindex lookup var kvreader store.KVReader kvreader, err = udc.store.Reader() if err != nil { return } // first we lookup the backindex row for the doc id if it exists // lookup the back index row var backIndexRow *BackIndexRow backIndexRow, err = backIndexRowForDoc(kvreader, index.IndexInternalID(doc.ID)) if err != nil { _ = kvreader.Close() atomic.AddUint64(&udc.stats.errors, 1) return } err = kvreader.Close() if err != nil { return } // start a writer for this update indexStart := time.Now() var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } defer func() { if cerr := kvwriter.Close(); err == nil && cerr != nil { err = cerr } }() // prepare a list of rows var addRowsAll [][]UpsideDownCouchRow var updateRowsAll [][]UpsideDownCouchRow var deleteRowsAll [][]UpsideDownCouchRow addRows, updateRows, deleteRows := udc.mergeOldAndNew(backIndexRow, result.Rows) if len(addRows) > 0 { addRowsAll = append(addRowsAll, addRows) } if len(updateRows) > 0 { updateRowsAll = append(updateRowsAll, updateRows) } if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll) if err == nil && backIndexRow == nil { udc.m.Lock() udc.docCount++ udc.m.Unlock() } atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { atomic.AddUint64(&udc.stats.updates, 1) atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes) } else { atomic.AddUint64(&udc.stats.errors, 1) } return }
func TestTermScorerWithQueryNorm(t *testing.T) { var docTotal uint64 = 100 var docTerm uint64 = 9 var queryTerm = "beer" var queryField = "desc" var queryBoost = 3.0 scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, true) idf := 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)) scorer.SetQueryNorm(2.0) expectedQueryWeight := 3 * idf * 3 * idf actualQueryWeight := scorer.Weight() if expectedQueryWeight != actualQueryWeight { t.Errorf("expected query weight %f, got %f", expectedQueryWeight, actualQueryWeight) } tests := []struct { termMatch *index.TermFieldDoc result *search.DocumentMatch }{ { termMatch: &index.TermFieldDoc{ ID: index.IndexInternalID("one"), Freq: 1, Norm: 1.0, }, result: &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("one"), Score: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0, Sort: []string{}, Expl: &search.Explanation{ Value: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0, Message: "weight(desc:beer^3.000000 in one), product of:", Children: []*search.Explanation{ { Value: 2.0 * idf * 3.0, Message: "queryWeight(desc:beer^3.000000), product of:", Children: []*search.Explanation{ { Value: 3, Message: "boost", }, { Value: idf, Message: "idf(docFreq=9, maxDocs=100)", }, { Value: 2, Message: "queryNorm", }, }, }, { Value: math.Sqrt(1.0) * idf, Message: "fieldWeight(desc:beer in one), product of:", Children: []*search.Explanation{ { Value: 1, Message: "tf(termFreq(desc:beer)=1", }, { Value: 1, Message: "fieldNorm(field=desc, doc=one)", }, { Value: idf, Message: "idf(docFreq=9, maxDocs=100)", }, }, }, }, }, }, }, } for _, test := range tests { ctx := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(1, 0), } actual := scorer.Score(ctx, test.termMatch) if !reflect.DeepEqual(actual, test.result) { t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch) } } }
func TestFuzzySearch(t *testing.T) { twoDocIndexReader, err := twoDocIndex.Reader() if err != nil { t.Error(err) } defer func() { err := twoDocIndexReader.Close() if err != nil { t.Fatal(err) } }() fuzzySearcherbeet, err := NewFuzzySearcher(twoDocIndexReader, "beet", 0, 1, "desc", 1.0, true) if err != nil { t.Fatal(err) } fuzzySearcherdouches, err := NewFuzzySearcher(twoDocIndexReader, "douches", 0, 2, "desc", 1.0, true) if err != nil { t.Fatal(err) } fuzzySearcheraplee, err := NewFuzzySearcher(twoDocIndexReader, "aplee", 0, 2, "desc", 1.0, true) if err != nil { t.Fatal(err) } fuzzySearcherprefix, err := NewFuzzySearcher(twoDocIndexReader, "water", 3, 2, "desc", 1.0, true) if err != nil { t.Fatal(err) } tests := []struct { searcher search.Searcher results []*search.DocumentMatch }{ { searcher: fuzzySearcherbeet, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("1"), Score: 1.0, }, { IndexInternalID: index.IndexInternalID("2"), Score: 0.5, }, { IndexInternalID: index.IndexInternalID("3"), Score: 0.5, }, { IndexInternalID: index.IndexInternalID("4"), Score: 0.9999999838027345, }, }, }, { searcher: fuzzySearcherdouches, results: []*search.DocumentMatch{}, }, { searcher: fuzzySearcheraplee, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("3"), Score: 0.9581453659370776, }, }, }, { searcher: fuzzySearcherprefix, results: []*search.DocumentMatch{ { IndexInternalID: index.IndexInternalID("5"), Score: 1.916290731874155, }, }, }, } for testIndex, test := range tests { defer func() { err := test.searcher.Close() if err != nil { t.Fatal(err) } }() ctx := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0), } next, err := test.searcher.Next(ctx) i := 0 for err == nil && next != nil { if i < len(test.results) { if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) { t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex) } if next.Score != test.results[i].Score { t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex) t.Logf("scoring explanation: %s", next.Expl) } } ctx.DocumentMatchPool.Put(next) next, err = test.searcher.Next(ctx) i++ } if err != nil { t.Fatalf("error iterating searcher: %v for test %d", err, testIndex) } if len(test.results) != i { t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex) } } }
func TestTermScorer(t *testing.T) { var docTotal uint64 = 100 var docTerm uint64 = 9 var queryTerm = "beer" var queryField = "desc" var queryBoost = 1.0 scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, true) idf := 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)) tests := []struct { termMatch *index.TermFieldDoc result *search.DocumentMatch }{ // test some simple math { termMatch: &index.TermFieldDoc{ ID: index.IndexInternalID("one"), Freq: 1, Norm: 1.0, Vectors: []*index.TermFieldVector{ { Field: "desc", Pos: 1, Start: 0, End: 4, }, }, }, result: &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("one"), Score: math.Sqrt(1.0) * idf, Sort: []string{}, Expl: &search.Explanation{ Value: math.Sqrt(1.0) * idf, Message: "fieldWeight(desc:beer in one), product of:", Children: []*search.Explanation{ { Value: 1, Message: "tf(termFreq(desc:beer)=1", }, { Value: 1, Message: "fieldNorm(field=desc, doc=one)", }, { Value: idf, Message: "idf(docFreq=9, maxDocs=100)", }, }, }, Locations: search.FieldTermLocationMap{ "desc": search.TermLocationMap{ "beer": []*search.Location{ { Pos: 1, Start: 0, End: 4, }, }, }, }, }, }, // test the same thing again (score should be cached this time) { termMatch: &index.TermFieldDoc{ ID: index.IndexInternalID("one"), Freq: 1, Norm: 1.0, }, result: &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("one"), Score: math.Sqrt(1.0) * idf, Sort: []string{}, Expl: &search.Explanation{ Value: math.Sqrt(1.0) * idf, Message: "fieldWeight(desc:beer in one), product of:", Children: []*search.Explanation{ { Value: 1, Message: "tf(termFreq(desc:beer)=1", }, { Value: 1, Message: "fieldNorm(field=desc, doc=one)", }, { Value: idf, Message: "idf(docFreq=9, maxDocs=100)", }, }, }, }, }, // test a case where the sqrt isn't precalculated { termMatch: &index.TermFieldDoc{ ID: index.IndexInternalID("one"), Freq: 65, Norm: 1.0, }, result: &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("one"), Score: math.Sqrt(65) * idf, Sort: []string{}, Expl: &search.Explanation{ Value: math.Sqrt(65) * idf, Message: "fieldWeight(desc:beer in one), product of:", Children: []*search.Explanation{ { Value: math.Sqrt(65), Message: "tf(termFreq(desc:beer)=65", }, { Value: 1, Message: "fieldNorm(field=desc, doc=one)", }, { Value: idf, Message: "idf(docFreq=9, maxDocs=100)", }, }, }, }, }, } for _, test := range tests { ctx := &search.SearchContext{ DocumentMatchPool: search.NewDocumentMatchPool(1, 0), } actual := scorer.Score(ctx, test.termMatch) if !reflect.DeepEqual(actual, test.result) { t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch) } } }
func (i *IndexReader) InternalID(id string) (index.IndexInternalID, error) { return index.IndexInternalID(id), nil }