func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string, field string, boost float64, explain bool) (*TermPrefixSearcher, error) { // find the terms with this prefix fieldReader, err := indexReader.FieldReader(field, []byte(prefix), []byte(prefix)) // enumerate all the terms in the range qsearchers := make([]search.Searcher, 0, 25) tfd, err := fieldReader.Next() for err == nil && tfd != nil { qsearcher, err := NewTermSearcher(indexReader, string(tfd.Term), field, 1.0, explain) if err != nil { return nil, err } qsearchers = append(qsearchers, qsearcher) tfd, err = fieldReader.Next() } // build disjunction searcher of these ranges searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain) if err != nil { return nil, err } return &TermPrefixSearcher{ indexReader: indexReader, prefix: prefix, field: field, explain: explain, searcher: searcher, }, nil }
func findRegexpCandidateTerms(indexReader index.IndexReader, pattern *regexp.Regexp, field, prefixTerm string) (rv []string, err error) { rv = make([]string, 0) var fieldDict index.FieldDict if len(prefixTerm) > 0 { fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm)) } else { fieldDict, err = indexReader.FieldDict(field) } defer func() { if cerr := fieldDict.Close(); cerr != nil && err == nil { err = cerr } }() // enumerate the terms and check against regexp tfd, err := fieldDict.Next() for err == nil && tfd != nil { if pattern.MatchString(tfd.Term) { rv = append(rv, tfd.Term) if tooManyClauses(len(rv)) { return rv, tooManyClausesErr() } } tfd, err = fieldDict.Next() } return rv, err }
func findFuzzyCandidateTerms(indexReader index.IndexReader, term *string, fuzziness int, field, prefixTerm string) (rv []string, err error) { rv = make([]string, 0) var fieldDict index.FieldDict if len(prefixTerm) > 0 { fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm)) } else { fieldDict, err = indexReader.FieldDict(field) } defer func() { if cerr := fieldDict.Close(); cerr != nil && err == nil { err = cerr } }() // enumerate terms and check levenshtein distance tfd, err := fieldDict.Next() for err == nil && tfd != nil { ld, exceeded := search.LevenshteinDistanceMax(term, &tfd.Term, fuzziness) if !exceeded && ld <= fuzziness { rv = append(rv, tfd.Term) if tooManyClauses(len(rv)) { return rv, tooManyClausesErr() } } tfd, err = fieldDict.Next() } return rv, err }
func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzziness int, field string, boost float64, explain bool) (*FuzzySearcher, error) { prefixTerm := "" for i, r := range term { if i < prefix { prefixTerm += string(r) } } // find the terms with this prefix var fieldDict index.FieldDict var err error if len(prefixTerm) > 0 { fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm)) } else { fieldDict, err = indexReader.FieldDict(field) } // enumerate terms and check levenshtein distance candidateTerms := make([]string, 0) tfd, err := fieldDict.Next() for err == nil && tfd != nil { ld, exceeded := search.LevenshteinDistanceMax(&term, &tfd.Term, fuzziness) if !exceeded && ld <= fuzziness { candidateTerms = append(candidateTerms, tfd.Term) } tfd, err = fieldDict.Next() } if err != nil { return nil, err } // enumerate all the terms in the range qsearchers := make([]search.Searcher, 0, 25) for _, cterm := range candidateTerms { qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain) if err != nil { return nil, err } qsearchers = append(qsearchers, qsearcher) } // build disjunction searcher of these ranges searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain) if err != nil { return nil, err } return &FuzzySearcher{ indexReader: indexReader, term: term, prefix: prefix, fuzziness: fuzziness, field: field, explain: explain, searcher: searcher, }, nil }
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, explain bool) (*RegexpSearcher, error) { prefixTerm, complete := pattern.LiteralPrefix() candidateTerms := make([]string, 0) if complete { // there is no pattern candidateTerms = append(candidateTerms, prefixTerm) } else { var fieldDict index.FieldDict var err error if len(prefixTerm) > 0 { fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm)) } else { fieldDict, err = indexReader.FieldDict(field) } // enumerate the terms and check against regexp tfd, err := fieldDict.Next() for err == nil && tfd != nil { if pattern.MatchString(tfd.Term) { candidateTerms = append(candidateTerms, tfd.Term) } tfd, err = fieldDict.Next() } if err != nil { return nil, err } } // enumerate all the terms in the range qsearchers := make([]search.Searcher, 0, 25) for _, cterm := range candidateTerms { qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain) if err != nil { return nil, err } qsearchers = append(qsearchers, qsearcher) } // build disjunction searcher of these ranges searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain) if err != nil { return nil, err } return &RegexpSearcher{ indexReader: indexReader, pattern: pattern, field: field, explain: explain, searcher: searcher, }, nil }
func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, explain bool) (*MatchAllSearcher, error) { reader, err := indexReader.DocIDReader("", "") if err != nil { return nil, err } scorer := scorers.NewConstantScorer(1.0, boost, explain) return &MatchAllSearcher{ indexReader: indexReader, reader: reader, scorer: scorer, }, nil }
func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64, explain bool) (searcher *DocIDSearcher, err error) { reader, err := indexReader.DocIDReaderOnly(ids) if err != nil { return nil, err } scorer := scorers.NewConstantScorer(1.0, boost, explain) return &DocIDSearcher{ scorer: scorer, reader: reader, count: len(ids), }, nil }
func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, explain bool) (*TermSearcher, error) { reader, err := indexReader.TermFieldReader([]byte(term), field) if err != nil { return nil, err } scorer := scorers.NewTermQueryScorer(term, field, boost, indexReader.DocCount(), reader.Count(), explain) return &TermSearcher{ indexReader: indexReader, term: term, field: field, explain: explain, reader: reader, scorer: scorer, }, nil }
// finalizeResults starts with the heap containing the final top size+skip // it now throws away the results to be skipped // and does final doc id lookup (if necessary) func (hc *TopNCollector) finalizeResults(r index.IndexReader) error { var err error hc.results, err = hc.store.Final(hc.skip, func(doc *search.DocumentMatch) error { if doc.ID == "" { // look up the id since we need it for lookup var err error doc.ID, err = r.ExternalID(doc.IndexInternalID) if err != nil { return err } } return nil }) return err }
func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, explain bool) (*MatchAllSearcher, error) { reader, err := indexReader.DocIDReaderAll() if err != nil { return nil, err } count, err := indexReader.DocCount() if err != nil { _ = reader.Close() return nil, err } scorer := scorer.NewConstantScorer(1.0, boost, explain) return &MatchAllSearcher{ indexReader: indexReader, reader: reader, scorer: scorer, count: count, }, nil }
func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64, explain bool) (searcher *DocIDSearcher, err error) { kept := make([]string, len(ids)) copy(kept, ids) sort.Strings(kept) if len(ids) > 0 { var idReader index.DocIDReader endTerm := string(incrementBytes([]byte(kept[len(kept)-1]))) idReader, err = indexReader.DocIDReader(kept[0], endTerm) if err != nil { return nil, err } defer func() { if cerr := idReader.Close(); err == nil && cerr != nil { err = cerr } }() j := 0 for _, id := range kept { doc, err := idReader.Advance(id) if err != nil { return nil, err } // Non-duplicate match if doc == id && (j == 0 || kept[j-1] != id) { kept[j] = id j++ } } kept = kept[:j] } scorer := scorers.NewConstantScorer(1.0, boost, explain) return &DocIDSearcher{ ids: kept, scorer: scorer, }, nil }
func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error { // increment total hits hc.total++ d.HitNumber = hc.total // update max score if d.Score > hc.maxScore { hc.maxScore = d.Score } var err error // see if we need to load ID (at this early stage, for example to sort on it) if hc.needDocIds { d.ID, err = reader.ExternalID(d.IndexInternalID) if err != nil { return err } } // see if we need to load the stored fields if len(hc.neededFields) > 0 { // find out which fields haven't been loaded yet fieldsToLoad := d.CachedFieldTerms.FieldsNotYetCached(hc.neededFields) // look them up fieldTerms, err := reader.DocumentFieldTerms(d.IndexInternalID, fieldsToLoad) if err != nil { return err } // cache these as well if d.CachedFieldTerms == nil { d.CachedFieldTerms = make(map[string][]string) } d.CachedFieldTerms.Merge(fieldTerms) } // compute this hits sort value if len(hc.sort) == 1 && hc.cachedScoring[0] { d.Sort = sortByScoreOpt } else { hc.sort.Value(d) } // optimization, we track lowest sorting hit already removed from heap // with this one comparison, we can avoid all heap operations if // this hit would have been added and then immediately removed if hc.lowestMatchOutsideResults != nil { cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.lowestMatchOutsideResults) if cmp >= 0 { // this hit can't possibly be in the result set, so avoid heap ops ctx.DocumentMatchPool.Put(d) return nil } } hc.store.Add(d) if hc.store.Len() > hc.size+hc.skip { removed := hc.store.RemoveLast() if hc.lowestMatchOutsideResults == nil { hc.lowestMatchOutsideResults = removed } else { cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, removed, hc.lowestMatchOutsideResults) if cmp < 0 { tmp := hc.lowestMatchOutsideResults hc.lowestMatchOutsideResults = removed ctx.DocumentMatchPool.Put(tmp) } } } return nil }