func newSubsetLike(expr expression.BinaryFunction, re *regexp.Regexp) expression.Visitor { if re == nil { // Pattern is not a constant return newSubsetDefault(expr) } prefix, complete := re.LiteralPrefix() if complete { eq := expression.NewEq(expr.First(), expression.NewConstant(prefix)) return newSubsetEq(eq.(*expression.Eq)) } if prefix == "" { return newSubsetDefault(expr) } var and expression.Expression le := expression.NewLE(expression.NewConstant(prefix), expr.First()) last := len(prefix) - 1 if prefix[last] < math.MaxUint8 { bytes := []byte(prefix) bytes[last]++ and = expression.NewAnd(le, expression.NewLT( expr.First(), expression.NewConstant(string(bytes)))) } else { and = expression.NewAnd(le, expression.NewLT( expr.First(), expression.EMPTY_ARRAY_EXPR)) } return newSubsetAnd(and.(*expression.And)) }
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, explain bool) (*RegexpSearcher, error) { prefixTerm, complete := pattern.LiteralPrefix() candidateTerms := make([]string, 0) if complete { // there is no pattern candidateTerms = append(candidateTerms, prefixTerm) } else { var fieldDict index.FieldDict var err error if len(prefixTerm) > 0 { fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm)) } else { fieldDict, err = indexReader.FieldDict(field) } // enumerate the terms and check against regexp tfd, err := fieldDict.Next() for err == nil && tfd != nil { if pattern.MatchString(tfd.Term) { candidateTerms = append(candidateTerms, tfd.Term) } tfd, err = fieldDict.Next() } if err != nil { return nil, err } } // enumerate all the terms in the range qsearchers := make([]search.Searcher, 0, 25) for _, cterm := range candidateTerms { qsearcher, err := NewTermSearcher(indexReader, cterm, field, 1.0, explain) if err != nil { return nil, err } qsearchers = append(qsearchers, qsearcher) } // build disjunction searcher of these ranges searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain) if err != nil { return nil, err } return &RegexpSearcher{ indexReader: indexReader, pattern: pattern, field: field, explain: explain, searcher: searcher, }, nil }
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, explain bool) (*RegexpSearcher, error) { prefixTerm, complete := pattern.LiteralPrefix() var candidateTerms []string if complete { // there is no pattern candidateTerms = []string{prefixTerm} } else { var err error candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field, prefixTerm) if err != nil { return nil, err } } // enumerate all the terms in the range qsearchers := make([]search.Searcher, 0, len(candidateTerms)) qsearchersClose := func() { for _, searcher := range qsearchers { _ = searcher.Close() } } for _, cterm := range candidateTerms { qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain) if err != nil { qsearchersClose() return nil, err } qsearchers = append(qsearchers, qsearcher) } // build disjunction searcher of these ranges searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain) if err != nil { qsearchersClose() return nil, err } return &RegexpSearcher{ indexReader: indexReader, pattern: pattern, field: field, explain: explain, searcher: searcher, }, nil }
func newSargLike(pred expression.BinaryFunction, re *regexp.Regexp) expression.Visitor { prefix := "" if re != nil { var complete bool prefix, complete = re.LiteralPrefix() if complete { eq := expression.NewEq(pred.First(), expression.NewConstant(prefix)) return newSargEq(eq.(*expression.Eq)) } } rv := &sargLike{} rv.sarger = func(expr2 expression.Expression) (plan.Spans, error) { if SubsetOf(pred, expr2) { return _SELF_SPANS, nil } if !pred.First().EquivalentTo(expr2) { return nil, nil } span := &plan.Span{} span.Range.Low = expression.Expressions{expression.NewConstant(prefix)} last := len(prefix) - 1 if last >= 0 && prefix[last] < math.MaxUint8 { bytes := []byte(prefix) bytes[last]++ span.Range.High = expression.Expressions{expression.NewConstant(string(bytes))} } else { span.Range.High = _EMPTY_ARRAY } span.Range.Inclusion = datastore.LOW return plan.Spans{span}, nil } return rv }
// FindAllIndex returns a sorted list of non-overlapping matches of the // regular expression r, where a match is a pair of indices specifying // the matched slice of x.Bytes(). If n < 0, all matches are returned // in successive order. Otherwise, at most n matches are returned and // they may not be successive. The result is nil if there are no matches, // or if n == 0. // func (x *Index) FindAllIndex(r *regexp.Regexp, n int) (result [][]int) { // a non-empty literal prefix is used to determine possible // match start indices with Lookup prefix, complete := r.LiteralPrefix() lit := []byte(prefix) // worst-case scenario: no literal prefix if prefix == "" { return r.FindAllIndex(x.data, n) } // if regexp is a literal just use Lookup and convert its // result into match pairs if complete { // Lookup returns indices that may belong to overlapping matches. // After eliminating them, we may end up with fewer than n matches. // If we don't have enough at the end, redo the search with an // increased value n1, but only if Lookup returned all the requested // indices in the first place (if it returned fewer than that then // there cannot be more). for n1 := n; ; n1 += 2 * (n - len(result)) /* overflow ok */ { indices := x.Lookup(lit, n1) if len(indices) == 0 { return } sort.Ints(indices) pairs := make([]int, 2*len(indices)) result = make([][]int, len(indices)) count := 0 prev := 0 for _, i := range indices { if count == n { break } // ignore indices leading to overlapping matches if prev <= i { j := 2 * count pairs[j+0] = i pairs[j+1] = i + len(lit) result[count] = pairs[j : j+2] count++ prev = i + len(lit) } } result = result[0:count] if len(result) >= n || len(indices) != n1 { // found all matches or there's no chance to find more // (n and n1 can be negative) break } } if len(result) == 0 { result = nil } return } // regexp has a non-empty literal prefix; Lookup(lit) computes // the indices of possible complete matches; use these as starting // points for anchored searches // (regexp "^" matches beginning of input, not beginning of line) r = regexp.MustCompile("^" + r.String()) // compiles because r compiled // same comment about Lookup applies here as in the loop above for n1 := n; ; n1 += 2 * (n - len(result)) /* overflow ok */ { indices := x.Lookup(lit, n1) if len(indices) == 0 { return } sort.Ints(indices) result = result[0:0] prev := 0 for _, i := range indices { if len(result) == n { break } m := r.FindIndex(x.data[i:]) // anchored search - will not run off // ignore indices leading to overlapping matches if m != nil && prev <= i { m[0] = i // correct m m[1] += i result = append(result, m) prev = m[1] } } if len(result) >= n || len(indices) != n1 { // found all matches or there's no chance to find more // (n and n1 can be negative) break } } if len(result) == 0 { result = nil } return }