Exemple #1
0
func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzziness int, field string, boost float64, explain bool) (*FuzzySearcher, error) {
	prefixTerm := ""
	for i, r := range term {
		if i < prefix {
			prefixTerm += string(r)
		}
	}

	// find the terms with this prefix
	var fieldDict index.FieldDict
	var err error
	if len(prefixTerm) > 0 {
		fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
	} else {
		fieldDict, err = indexReader.FieldDict(field)
	}

	// enumerate terms and check levenshtein distance
	candidateTerms := make([]string, 0)
	tfd, err := fieldDict.Next()
	for err == nil && tfd != nil {
		ld, exceeded := search.LevenshteinDistanceMax(&term, &tfd.Term, fuzziness)
		if !exceeded && ld <= fuzziness {
			candidateTerms = append(candidateTerms, tfd.Term)
		}
		tfd, err = fieldDict.Next()
	}
	if err != nil {
		return nil, err
	}

	// enumerate all the terms in the range
	qsearchers := make([]search.Searcher, 0, 25)

	for _, cterm := range candidateTerms {
		qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain)
		if err != nil {
			return nil, err
		}
		qsearchers = append(qsearchers, qsearcher)
	}

	// build disjunction searcher of these ranges
	searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
	if err != nil {
		return nil, err
	}

	return &FuzzySearcher{
		indexReader: indexReader,
		term:        term,
		prefix:      prefix,
		fuzziness:   fuzziness,
		field:       field,
		explain:     explain,
		searcher:    searcher,
	}, nil
}
Exemple #2
0
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, explain bool) (*RegexpSearcher, error) {

	prefixTerm, complete := pattern.LiteralPrefix()
	candidateTerms := make([]string, 0)
	if complete {
		// there is no pattern
		candidateTerms = append(candidateTerms, prefixTerm)
	} else {
		var fieldDict index.FieldDict
		var err error
		if len(prefixTerm) > 0 {
			fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
		} else {
			fieldDict, err = indexReader.FieldDict(field)
		}

		// enumerate the terms and check against regexp
		tfd, err := fieldDict.Next()
		for err == nil && tfd != nil {
			if pattern.MatchString(tfd.Term) {
				candidateTerms = append(candidateTerms, tfd.Term)
			}
			tfd, err = fieldDict.Next()
		}
		if err != nil {
			return nil, err
		}
	}

	// enumerate all the terms in the range
	qsearchers := make([]search.Searcher, 0, 25)

	for _, cterm := range candidateTerms {
		qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain)
		if err != nil {
			return nil, err
		}
		qsearchers = append(qsearchers, qsearcher)
	}

	// build disjunction searcher of these ranges
	searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
	if err != nil {
		return nil, err
	}

	return &RegexpSearcher{
		indexReader: indexReader,
		pattern:     pattern,
		field:       field,
		explain:     explain,
		searcher:    searcher,
	}, nil
}
Exemple #3
0
func findFuzzyCandidateTerms(indexReader index.IndexReader, term *string, fuzziness int, field, prefixTerm string) (rv []string, err error) {
	rv = make([]string, 0)
	var fieldDict index.FieldDict
	if len(prefixTerm) > 0 {
		fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
	} else {
		fieldDict, err = indexReader.FieldDict(field)
	}
	defer func() {
		if cerr := fieldDict.Close(); cerr != nil && err == nil {
			err = cerr
		}
	}()

	// enumerate terms and check levenshtein distance
	tfd, err := fieldDict.Next()
	for err == nil && tfd != nil {
		ld, exceeded := search.LevenshteinDistanceMax(term, &tfd.Term, fuzziness)
		if !exceeded && ld <= fuzziness {
			rv = append(rv, tfd.Term)
			if tooManyClauses(len(rv)) {
				return rv, tooManyClausesErr()
			}
		}
		tfd, err = fieldDict.Next()
	}

	return rv, err
}
Exemple #4
0
func findRegexpCandidateTerms(indexReader index.IndexReader, pattern *regexp.Regexp, field, prefixTerm string) (rv []string, err error) {
	rv = make([]string, 0)
	var fieldDict index.FieldDict
	if len(prefixTerm) > 0 {
		fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
	} else {
		fieldDict, err = indexReader.FieldDict(field)
	}
	defer func() {
		if cerr := fieldDict.Close(); cerr != nil && err == nil {
			err = cerr
		}
	}()

	// enumerate the terms and check against regexp
	tfd, err := fieldDict.Next()
	for err == nil && tfd != nil {
		if pattern.MatchString(tfd.Term) {
			rv = append(rv, tfd.Term)
			if tooManyClauses(len(rv)) {
				return rv, tooManyClausesErr()
			}
		}
		tfd, err = fieldDict.Next()
	}

	return rv, err
}