func srAsIntSet(sr *index.SearchResults) *intset.HashSet { s := intset.NewHashSet(100) for _, h := range sr.Hits { s.Add(h.ID) } return s }
// Add indexes tokens by a give id in the index. func (i *MapIndex) Add(doc int, tokens []string) { i.Lock() defer i.Unlock() for _, t := range tokens { if _, ok := i.index[t]; !ok { i.index[t] = intset.NewHashSet(999) } i.index[t].Add(doc) } }
func TestNGramAnalyzer(t *testing.T) { a := NewNGramAnalyzer(2, 10) a.Index("Bokstavlig talt!", 2) a.Index("KAKE-BOKSEN-SMULDRER-SMULER", 4) a.Index("Krepsens vendekrets", 8) a.Index("krapyl", 10) q := index.NewQuery().Should([]string{"oks", "kr"}) res := a.Idx.Query(q) if !srAsIntSet(res).Contains(2, 4, 8, 10) { t.Fatal("documents not indexed/not queryable") } q2 := index.NewQuery().Must([]string{"bok"}).Not([]string{"smuler"}) res2 := a.Idx.Query(q2) if !srAsIntSet(res2).Equal(intset.NewHashSet(10).Add(2)) { t.Error("ngramanalyzer: must+not query fails") } }
// Query the MapIndex for search hits. func (i *MapIndex) Query(q *Query) *SearchResults { res := SearchResults{} var and, not, or, setRes *intset.HashSet i.RLock() for _, t := range q.MustMatch { if _, ok := i.index[t]; ok { if and == nil { and = i.index[t].Clone() } else { and = and.Intersection(i.index[t]) } } else { and = intset.NewHashSet(10) break } } for _, t := range q.MustNotMatch { if _, ok := i.index[t]; ok { if not == nil { not = i.index[t].Clone() } else { not = not.Intersection(i.index[t]) } } else { not = intset.NewHashSet(10) break } } if not == nil { not = intset.NewHashSet(10) } // Ignore q.ShouldMatch if q.MustMatch has any entries if and == nil { for _, t := range q.ShouldMatch { if _, ok := i.index[t]; ok { if or == nil { or = i.index[t].Clone() } else { or = or.Union(i.index[t]) } } } setRes = or.Difference(not) } else { setRes = and.Difference(not) } i.RUnlock() // done reading from the index for _, i := range setRes.All() { res.Hits = append(res.Hits, searchHit{i, 0}) } res.Count = setRes.Size() return &res }