Exemple #1
0
// TODO(knorton): This is still being tested. This is a grep that supports context lines. Unlike the version
// in codesearch, this one does not operate on chunks. The downside is that we have to have the whole file
// in memory to do the grep. Fortunately, we limit the size of files that get indexed anyway. 10M files tend
// to not be source code.
func (g *grepper) grep2(
	r io.Reader,
	re *regexp.Regexp,
	nctx int,
	fn func(line []byte, lineno int, before [][]byte, after [][]byte) (bool, error)) error {

	buf, err := g.fillFrom(r)
	if err != nil {
		return err
	}

	lineno := 0
	for {
		if len(buf) == 0 {
			return nil
		}

		m := re.Match(buf, true, true)
		if m < 0 {
			return nil
		}

		// start of matched line.
		str := bytes.LastIndex(buf[:m], nl) + 1

		//end of previous line
		endl := str - 1
		if endl < 0 {
			endl = 0
		}

		//end of current line
		end := m + 1
		if end > len(buf) {
			end = len(buf)
		}

		lineno += countLines(buf[:str])

		more, err := fn(
			bytes.TrimRight(buf[str:end], "\n"),
			lineno+1,
			lastNLines(buf[:endl], nctx),
			firstNLines(buf[end:], nctx))
		if err != nil {
			return err
		}
		if !more {
			return nil
		}

		lineno++
		buf = buf[end:]
	}
}
Exemple #2
0
// This nonsense is adapted from https://code.google.com/p/codesearch/source/browse/regexp/match.go#399
// and I assume it is a mess to make it faster, but I would like to try a much simpler cleaner version.
func (g *grepper) grep(r io.Reader, re *regexp.Regexp, fn func(line []byte, lineno int) (bool, error)) error {
	if g.buf == nil {
		g.buf = make([]byte, 1<<20)
	}

	var (
		buf       = g.buf[:0]
		lineno    = 1
		beginText = true
		endText   = false
	)

	for {
		n, err := io.ReadFull(r, buf[len(buf):cap(buf)])
		buf = buf[:len(buf)+n]
		end := len(buf)
		if err == nil {
			end = bytes.LastIndex(buf, nl) + 1
		} else {
			endText = true
		}
		chunkStart := 0
		for chunkStart < end {
			m1 := re.Match(buf[chunkStart:end], beginText, endText) + chunkStart
			beginText = false
			if m1 < chunkStart {
				break
			}
			lineStart := bytes.LastIndex(buf[chunkStart:m1], nl) + 1 + chunkStart
			lineEnd := m1 + 1
			if lineEnd > end {
				lineEnd = end
			}
			lineno += countLines(buf[chunkStart:lineStart])
			line := buf[lineStart:lineEnd]
			more, err := fn(line, lineno)
			if err != nil {
				return err
			}
			if !more {
				return nil
			}
			lineno++
			chunkStart = lineEnd
		}
		if err == nil {
			lineno += countLines(buf[chunkStart:end])
		}

		n = copy(buf, buf[end:])
		buf = buf[:n]
		if len(buf) == 0 && err != nil {
			if err != io.EOF && err != io.ErrUnexpectedEOF {
				return err
			}
			return nil
		}
	}

	return nil
}
Exemple #3
0
func (n *Index) Search(pat string, opt *SearchOptions) (*SearchResponse, error) {
	startedAt := time.Now()

	n.lck.RLock()
	defer n.lck.RUnlock()

	re, err := regexp.Compile(GetRegexpPattern(pat, opt.IgnoreCase))
	if err != nil {
		return nil, err
	}

	var (
		g                grepper
		results          []*FileMatch
		filesOpened      int
		filesFound       int
		filesCollected   int
		matchesCollected int
	)

	var fre *regexp.Regexp
	if opt.FileRegexp != "" {
		fre, err = regexp.Compile(opt.FileRegexp)
		if err != nil {
			return nil, err
		}
	}

	var counter int
	var lastFileId uint32
	files := n.idx.PostingQuery(index.RegexpQuery(re.Syntax))
	for _, file := range files {
		var matches []*Match
		name := n.idx.Name(file)
		hasMatch := false

		// counter++

		// reject files that do not match the file pattern
		if fre != nil && fre.MatchString(name, true, true) < 0 {
			continue
		}

		filesOpened++

		if counter > 500 {
			hasMatch = true
			matches = append(matches, &Match{
				Line:       "This hit has been skipped due to performance optimizations",
				LineNumber: 1337,
				Before:     []string{},
				After:      []string{},
			})
			matchesCollected++
		} else {
			if err := g.grep2File(filepath.Join(n.Ref.dir, "raw", name), re, int(opt.LinesOfContext),
				func(line []byte, lineno int, before [][]byte, after [][]byte) (bool, error) {

					hasMatch = true
					if lastFileId != file {
						lastFileId = file
						counter++
					}
					if filesFound < opt.Offset || (opt.Limit > 0 && filesCollected >= opt.Limit) {
						return false, nil
					}

					matchesCollected++
					// beforeA := []string{"BEFORE1", "BEFORE2"}
					// afterA := []string{"AFTER1", "AFTER2"}

					matches = append(matches, &Match{
						Line:       string(line),
						LineNumber: lineno,
						Before:     toStrings(before),
						After:      toStrings(after),
					})

					if matchesCollected > matchLimit {
						return false, fmt.Errorf("search exceeds limit on matches: %d", matchLimit)
					}

					return true, nil
				}); err != nil {
				return nil, err
			}
		}

		if !hasMatch {
			continue
		}

		filesFound++
		if len(matches) > 0 {
			filesCollected++
			results = append(results, &FileMatch{
				Filename: name,
				Matches:  matches,
			})
		}
	}

	return &SearchResponse{
		Matches:        results,
		FilesWithMatch: filesFound,
		FilesOpened:    filesOpened,
		Duration:       time.Now().Sub(startedAt),
		Revision:       n.Ref.Rev,
	}, nil
}