Beispiel #1
0
func newSubsetLike(expr expression.BinaryFunction, re *regexp.Regexp) expression.Visitor {
	if re == nil {
		// Pattern is not a constant
		return newSubsetDefault(expr)
	}

	prefix, complete := re.LiteralPrefix()
	if complete {
		eq := expression.NewEq(expr.First(), expression.NewConstant(prefix))
		return newSubsetEq(eq.(*expression.Eq))
	}

	if prefix == "" {
		return newSubsetDefault(expr)
	}

	var and expression.Expression
	le := expression.NewLE(expression.NewConstant(prefix), expr.First())
	last := len(prefix) - 1
	if prefix[last] < math.MaxUint8 {
		bytes := []byte(prefix)
		bytes[last]++
		and = expression.NewAnd(le, expression.NewLT(
			expr.First(),
			expression.NewConstant(string(bytes))))
	} else {
		and = expression.NewAnd(le, expression.NewLT(
			expr.First(),
			expression.EMPTY_ARRAY_EXPR))
	}

	return newSubsetAnd(and.(*expression.And))
}
Beispiel #2
0
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, explain bool) (*RegexpSearcher, error) {

	prefixTerm, complete := pattern.LiteralPrefix()
	candidateTerms := make([]string, 0)
	if complete {
		// there is no pattern
		candidateTerms = append(candidateTerms, prefixTerm)
	} else {
		var fieldDict index.FieldDict
		var err error
		if len(prefixTerm) > 0 {
			fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
		} else {
			fieldDict, err = indexReader.FieldDict(field)
		}

		// enumerate the terms and check against regexp
		tfd, err := fieldDict.Next()
		for err == nil && tfd != nil {
			if pattern.MatchString(tfd.Term) {
				candidateTerms = append(candidateTerms, tfd.Term)
			}
			tfd, err = fieldDict.Next()
		}
		if err != nil {
			return nil, err
		}
	}

	// enumerate all the terms in the range
	qsearchers := make([]search.Searcher, 0, 25)

	for _, cterm := range candidateTerms {
		qsearcher, err := NewTermSearcher(indexReader, cterm, field, 1.0, explain)
		if err != nil {
			return nil, err
		}
		qsearchers = append(qsearchers, qsearcher)
	}

	// build disjunction searcher of these ranges
	searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
	if err != nil {
		return nil, err
	}

	return &RegexpSearcher{
		indexReader: indexReader,
		pattern:     pattern,
		field:       field,
		explain:     explain,
		searcher:    searcher,
	}, nil
}
Beispiel #3
0
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, explain bool) (*RegexpSearcher, error) {

	prefixTerm, complete := pattern.LiteralPrefix()
	var candidateTerms []string
	if complete {
		// there is no pattern
		candidateTerms = []string{prefixTerm}
	} else {
		var err error
		candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field, prefixTerm)
		if err != nil {
			return nil, err
		}
	}

	// enumerate all the terms in the range
	qsearchers := make([]search.Searcher, 0, len(candidateTerms))
	qsearchersClose := func() {
		for _, searcher := range qsearchers {
			_ = searcher.Close()
		}
	}
	for _, cterm := range candidateTerms {
		qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain)
		if err != nil {
			qsearchersClose()
			return nil, err
		}
		qsearchers = append(qsearchers, qsearcher)
	}

	// build disjunction searcher of these ranges
	searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
	if err != nil {
		qsearchersClose()
		return nil, err
	}

	return &RegexpSearcher{
		indexReader: indexReader,
		pattern:     pattern,
		field:       field,
		explain:     explain,
		searcher:    searcher,
	}, nil
}
Beispiel #4
0
func newSargLike(pred expression.BinaryFunction, re *regexp.Regexp) expression.Visitor {
	prefix := ""

	if re != nil {
		var complete bool
		prefix, complete = re.LiteralPrefix()
		if complete {
			eq := expression.NewEq(pred.First(), expression.NewConstant(prefix))
			return newSargEq(eq.(*expression.Eq))
		}
	}

	rv := &sargLike{}
	rv.sarger = func(expr2 expression.Expression) (plan.Spans, error) {
		if SubsetOf(pred, expr2) {
			return _SELF_SPANS, nil
		}

		if !pred.First().EquivalentTo(expr2) {
			return nil, nil
		}

		span := &plan.Span{}
		span.Range.Low = expression.Expressions{expression.NewConstant(prefix)}

		last := len(prefix) - 1
		if last >= 0 && prefix[last] < math.MaxUint8 {
			bytes := []byte(prefix)
			bytes[last]++
			span.Range.High = expression.Expressions{expression.NewConstant(string(bytes))}
		} else {
			span.Range.High = _EMPTY_ARRAY
		}

		span.Range.Inclusion = datastore.LOW
		return plan.Spans{span}, nil
	}

	return rv
}
Beispiel #5
0
// FindAllIndex returns a sorted list of non-overlapping matches of the
// regular expression r, where a match is a pair of indices specifying
// the matched slice of x.Bytes(). If n < 0, all matches are returned
// in successive order. Otherwise, at most n matches are returned and
// they may not be successive. The result is nil if there are no matches,
// or if n == 0.
//
func (x *Index) FindAllIndex(r *regexp.Regexp, n int) (result [][]int) {
	// a non-empty literal prefix is used to determine possible
	// match start indices with Lookup
	prefix, complete := r.LiteralPrefix()
	lit := []byte(prefix)

	// worst-case scenario: no literal prefix
	if prefix == "" {
		return r.FindAllIndex(x.data, n)
	}

	// if regexp is a literal just use Lookup and convert its
	// result into match pairs
	if complete {
		// Lookup returns indices that may belong to overlapping matches.
		// After eliminating them, we may end up with fewer than n matches.
		// If we don't have enough at the end, redo the search with an
		// increased value n1, but only if Lookup returned all the requested
		// indices in the first place (if it returned fewer than that then
		// there cannot be more).
		for n1 := n; ; n1 += 2 * (n - len(result)) /* overflow ok */ {
			indices := x.Lookup(lit, n1)
			if len(indices) == 0 {
				return
			}
			sort.Ints(indices)
			pairs := make([]int, 2*len(indices))
			result = make([][]int, len(indices))
			count := 0
			prev := 0
			for _, i := range indices {
				if count == n {
					break
				}
				// ignore indices leading to overlapping matches
				if prev <= i {
					j := 2 * count
					pairs[j+0] = i
					pairs[j+1] = i + len(lit)
					result[count] = pairs[j : j+2]
					count++
					prev = i + len(lit)
				}
			}
			result = result[0:count]
			if len(result) >= n || len(indices) != n1 {
				// found all matches or there's no chance to find more
				// (n and n1 can be negative)
				break
			}
		}
		if len(result) == 0 {
			result = nil
		}
		return
	}

	// regexp has a non-empty literal prefix; Lookup(lit) computes
	// the indices of possible complete matches; use these as starting
	// points for anchored searches
	// (regexp "^" matches beginning of input, not beginning of line)
	r = regexp.MustCompile("^" + r.String()) // compiles because r compiled

	// same comment about Lookup applies here as in the loop above
	for n1 := n; ; n1 += 2 * (n - len(result)) /* overflow ok */ {
		indices := x.Lookup(lit, n1)
		if len(indices) == 0 {
			return
		}
		sort.Ints(indices)
		result = result[0:0]
		prev := 0
		for _, i := range indices {
			if len(result) == n {
				break
			}
			m := r.FindIndex(x.data[i:]) // anchored search - will not run off
			// ignore indices leading to overlapping matches
			if m != nil && prev <= i {
				m[0] = i // correct m
				m[1] += i
				result = append(result, m)
				prev = m[1]
			}
		}
		if len(result) >= n || len(indices) != n1 {
			// found all matches or there's no chance to find more
			// (n and n1 can be negative)
			break
		}
	}
	if len(result) == 0 {
		result = nil
	}
	return
}