Пример #1
0
func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string, field string, boost float64, explain bool) (*TermPrefixSearcher, error) {
	// find the terms with this prefix
	fieldReader, err := indexReader.FieldReader(field, []byte(prefix), []byte(prefix))

	// enumerate all the terms in the range
	qsearchers := make([]search.Searcher, 0, 25)
	tfd, err := fieldReader.Next()
	for err == nil && tfd != nil {
		qsearcher, err := NewTermSearcher(indexReader, string(tfd.Term), field, 1.0, explain)
		if err != nil {
			return nil, err
		}
		qsearchers = append(qsearchers, qsearcher)
		tfd, err = fieldReader.Next()
	}
	// build disjunction searcher of these ranges
	searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
	if err != nil {
		return nil, err
	}

	return &TermPrefixSearcher{
		indexReader: indexReader,
		prefix:      prefix,
		field:       field,
		explain:     explain,
		searcher:    searcher,
	}, nil
}
Пример #2
0
func findRegexpCandidateTerms(indexReader index.IndexReader, pattern *regexp.Regexp, field, prefixTerm string) (rv []string, err error) {
	rv = make([]string, 0)
	var fieldDict index.FieldDict
	if len(prefixTerm) > 0 {
		fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
	} else {
		fieldDict, err = indexReader.FieldDict(field)
	}
	defer func() {
		if cerr := fieldDict.Close(); cerr != nil && err == nil {
			err = cerr
		}
	}()

	// enumerate the terms and check against regexp
	tfd, err := fieldDict.Next()
	for err == nil && tfd != nil {
		if pattern.MatchString(tfd.Term) {
			rv = append(rv, tfd.Term)
			if tooManyClauses(len(rv)) {
				return rv, tooManyClausesErr()
			}
		}
		tfd, err = fieldDict.Next()
	}

	return rv, err
}
Пример #3
0
func findFuzzyCandidateTerms(indexReader index.IndexReader, term *string, fuzziness int, field, prefixTerm string) (rv []string, err error) {
	rv = make([]string, 0)
	var fieldDict index.FieldDict
	if len(prefixTerm) > 0 {
		fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
	} else {
		fieldDict, err = indexReader.FieldDict(field)
	}
	defer func() {
		if cerr := fieldDict.Close(); cerr != nil && err == nil {
			err = cerr
		}
	}()

	// enumerate terms and check levenshtein distance
	tfd, err := fieldDict.Next()
	for err == nil && tfd != nil {
		ld, exceeded := search.LevenshteinDistanceMax(term, &tfd.Term, fuzziness)
		if !exceeded && ld <= fuzziness {
			rv = append(rv, tfd.Term)
			if tooManyClauses(len(rv)) {
				return rv, tooManyClausesErr()
			}
		}
		tfd, err = fieldDict.Next()
	}

	return rv, err
}
Пример #4
0
func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzziness int, field string, boost float64, explain bool) (*FuzzySearcher, error) {
	prefixTerm := ""
	for i, r := range term {
		if i < prefix {
			prefixTerm += string(r)
		}
	}

	// find the terms with this prefix
	var fieldDict index.FieldDict
	var err error
	if len(prefixTerm) > 0 {
		fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
	} else {
		fieldDict, err = indexReader.FieldDict(field)
	}

	// enumerate terms and check levenshtein distance
	candidateTerms := make([]string, 0)
	tfd, err := fieldDict.Next()
	for err == nil && tfd != nil {
		ld, exceeded := search.LevenshteinDistanceMax(&term, &tfd.Term, fuzziness)
		if !exceeded && ld <= fuzziness {
			candidateTerms = append(candidateTerms, tfd.Term)
		}
		tfd, err = fieldDict.Next()
	}
	if err != nil {
		return nil, err
	}

	// enumerate all the terms in the range
	qsearchers := make([]search.Searcher, 0, 25)

	for _, cterm := range candidateTerms {
		qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain)
		if err != nil {
			return nil, err
		}
		qsearchers = append(qsearchers, qsearcher)
	}

	// build disjunction searcher of these ranges
	searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
	if err != nil {
		return nil, err
	}

	return &FuzzySearcher{
		indexReader: indexReader,
		term:        term,
		prefix:      prefix,
		fuzziness:   fuzziness,
		field:       field,
		explain:     explain,
		searcher:    searcher,
	}, nil
}
Пример #5
0
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, explain bool) (*RegexpSearcher, error) {

	prefixTerm, complete := pattern.LiteralPrefix()
	candidateTerms := make([]string, 0)
	if complete {
		// there is no pattern
		candidateTerms = append(candidateTerms, prefixTerm)
	} else {
		var fieldDict index.FieldDict
		var err error
		if len(prefixTerm) > 0 {
			fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
		} else {
			fieldDict, err = indexReader.FieldDict(field)
		}

		// enumerate the terms and check against regexp
		tfd, err := fieldDict.Next()
		for err == nil && tfd != nil {
			if pattern.MatchString(tfd.Term) {
				candidateTerms = append(candidateTerms, tfd.Term)
			}
			tfd, err = fieldDict.Next()
		}
		if err != nil {
			return nil, err
		}
	}

	// enumerate all the terms in the range
	qsearchers := make([]search.Searcher, 0, 25)

	for _, cterm := range candidateTerms {
		qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain)
		if err != nil {
			return nil, err
		}
		qsearchers = append(qsearchers, qsearcher)
	}

	// build disjunction searcher of these ranges
	searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
	if err != nil {
		return nil, err
	}

	return &RegexpSearcher{
		indexReader: indexReader,
		pattern:     pattern,
		field:       field,
		explain:     explain,
		searcher:    searcher,
	}, nil
}
Пример #6
0
func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, explain bool) (*MatchAllSearcher, error) {
	reader, err := indexReader.DocIDReader("", "")
	if err != nil {
		return nil, err
	}
	scorer := scorers.NewConstantScorer(1.0, boost, explain)
	return &MatchAllSearcher{
		indexReader: indexReader,
		reader:      reader,
		scorer:      scorer,
	}, nil
}
Пример #7
0
func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64,
	explain bool) (searcher *DocIDSearcher, err error) {

	reader, err := indexReader.DocIDReaderOnly(ids)
	if err != nil {
		return nil, err
	}
	scorer := scorers.NewConstantScorer(1.0, boost, explain)
	return &DocIDSearcher{
		scorer: scorer,
		reader: reader,
		count:  len(ids),
	}, nil
}
Пример #8
0
func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, explain bool) (*TermSearcher, error) {
	reader, err := indexReader.TermFieldReader([]byte(term), field)
	if err != nil {
		return nil, err
	}
	scorer := scorers.NewTermQueryScorer(term, field, boost, indexReader.DocCount(), reader.Count(), explain)
	return &TermSearcher{
		indexReader: indexReader,
		term:        term,
		field:       field,
		explain:     explain,
		reader:      reader,
		scorer:      scorer,
	}, nil
}
Пример #9
0
// finalizeResults starts with the heap containing the final top size+skip
// it now throws away the results to be skipped
// and does final doc id lookup (if necessary)
func (hc *TopNCollector) finalizeResults(r index.IndexReader) error {
	var err error
	hc.results, err = hc.store.Final(hc.skip, func(doc *search.DocumentMatch) error {
		if doc.ID == "" {
			// look up the id since we need it for lookup
			var err error
			doc.ID, err = r.ExternalID(doc.IndexInternalID)
			if err != nil {
				return err
			}
		}
		return nil
	})

	return err
}
Пример #10
0
func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, explain bool) (*MatchAllSearcher, error) {
	reader, err := indexReader.DocIDReaderAll()
	if err != nil {
		return nil, err
	}
	count, err := indexReader.DocCount()
	if err != nil {
		_ = reader.Close()
		return nil, err
	}
	scorer := scorer.NewConstantScorer(1.0, boost, explain)
	return &MatchAllSearcher{
		indexReader: indexReader,
		reader:      reader,
		scorer:      scorer,
		count:       count,
	}, nil
}
Пример #11
0
func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64,
	explain bool) (searcher *DocIDSearcher, err error) {

	kept := make([]string, len(ids))
	copy(kept, ids)
	sort.Strings(kept)

	if len(ids) > 0 {
		var idReader index.DocIDReader
		endTerm := string(incrementBytes([]byte(kept[len(kept)-1])))
		idReader, err = indexReader.DocIDReader(kept[0], endTerm)
		if err != nil {
			return nil, err
		}
		defer func() {
			if cerr := idReader.Close(); err == nil && cerr != nil {
				err = cerr
			}
		}()
		j := 0
		for _, id := range kept {
			doc, err := idReader.Advance(id)
			if err != nil {
				return nil, err
			}
			// Non-duplicate match
			if doc == id && (j == 0 || kept[j-1] != id) {
				kept[j] = id
				j++
			}
		}
		kept = kept[:j]
	}

	scorer := scorers.NewConstantScorer(1.0, boost, explain)
	return &DocIDSearcher{
		ids:    kept,
		scorer: scorer,
	}, nil
}
Пример #12
0
func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error {
	// increment total hits
	hc.total++
	d.HitNumber = hc.total

	// update max score
	if d.Score > hc.maxScore {
		hc.maxScore = d.Score
	}

	var err error
	// see if we need to load ID (at this early stage, for example to sort on it)
	if hc.needDocIds {
		d.ID, err = reader.ExternalID(d.IndexInternalID)
		if err != nil {
			return err
		}
	}

	// see if we need to load the stored fields
	if len(hc.neededFields) > 0 {
		// find out which fields haven't been loaded yet
		fieldsToLoad := d.CachedFieldTerms.FieldsNotYetCached(hc.neededFields)
		// look them up
		fieldTerms, err := reader.DocumentFieldTerms(d.IndexInternalID, fieldsToLoad)
		if err != nil {
			return err
		}
		// cache these as well
		if d.CachedFieldTerms == nil {
			d.CachedFieldTerms = make(map[string][]string)
		}
		d.CachedFieldTerms.Merge(fieldTerms)
	}

	// compute this hits sort value
	if len(hc.sort) == 1 && hc.cachedScoring[0] {
		d.Sort = sortByScoreOpt
	} else {
		hc.sort.Value(d)
	}

	// optimization, we track lowest sorting hit already removed from heap
	// with this one comparison, we can avoid all heap operations if
	// this hit would have been added and then immediately removed
	if hc.lowestMatchOutsideResults != nil {
		cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.lowestMatchOutsideResults)
		if cmp >= 0 {
			// this hit can't possibly be in the result set, so avoid heap ops
			ctx.DocumentMatchPool.Put(d)
			return nil
		}
	}

	hc.store.Add(d)
	if hc.store.Len() > hc.size+hc.skip {
		removed := hc.store.RemoveLast()
		if hc.lowestMatchOutsideResults == nil {
			hc.lowestMatchOutsideResults = removed
		} else {
			cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, removed, hc.lowestMatchOutsideResults)
			if cmp < 0 {
				tmp := hc.lowestMatchOutsideResults
				hc.lowestMatchOutsideResults = removed
				ctx.DocumentMatchPool.Put(tmp)
			}
		}
	}

	return nil
}