// TODO(knorton): This is still being tested. This is a grep that supports context lines. Unlike the version // in codesearch, this one does not operate on chunks. The downside is that we have to have the whole file // in memory to do the grep. Fortunately, we limit the size of files that get indexed anyway. 10M files tend // to not be source code. func (g *grepper) grep2( r io.Reader, re *regexp.Regexp, nctx int, fn func(line []byte, lineno int, before [][]byte, after [][]byte) (bool, error)) error { buf, err := g.fillFrom(r) if err != nil { return err } lineno := 0 for { if len(buf) == 0 { return nil } m := re.Match(buf, true, true) if m < 0 { return nil } // start of matched line. str := bytes.LastIndex(buf[:m], nl) + 1 //end of previous line endl := str - 1 if endl < 0 { endl = 0 } //end of current line end := m + 1 if end > len(buf) { end = len(buf) } lineno += countLines(buf[:str]) more, err := fn( bytes.TrimRight(buf[str:end], "\n"), lineno+1, lastNLines(buf[:endl], nctx), firstNLines(buf[end:], nctx)) if err != nil { return err } if !more { return nil } lineno++ buf = buf[end:] } }
// This nonsense is adapted from https://code.google.com/p/codesearch/source/browse/regexp/match.go#399 // and I assume it is a mess to make it faster, but I would like to try a much simpler cleaner version. func (g *grepper) grep(r io.Reader, re *regexp.Regexp, fn func(line []byte, lineno int) (bool, error)) error { if g.buf == nil { g.buf = make([]byte, 1<<20) } var ( buf = g.buf[:0] lineno = 1 beginText = true endText = false ) for { n, err := io.ReadFull(r, buf[len(buf):cap(buf)]) buf = buf[:len(buf)+n] end := len(buf) if err == nil { end = bytes.LastIndex(buf, nl) + 1 } else { endText = true } chunkStart := 0 for chunkStart < end { m1 := re.Match(buf[chunkStart:end], beginText, endText) + chunkStart beginText = false if m1 < chunkStart { break } lineStart := bytes.LastIndex(buf[chunkStart:m1], nl) + 1 + chunkStart lineEnd := m1 + 1 if lineEnd > end { lineEnd = end } lineno += countLines(buf[chunkStart:lineStart]) line := buf[lineStart:lineEnd] more, err := fn(line, lineno) if err != nil { return err } if !more { return nil } lineno++ chunkStart = lineEnd } if err == nil { lineno += countLines(buf[chunkStart:end]) } n = copy(buf, buf[end:]) buf = buf[:n] if len(buf) == 0 && err != nil { if err != io.EOF && err != io.ErrUnexpectedEOF { return err } return nil } } return nil }
func (n *Index) Search(pat string, opt *SearchOptions) (*SearchResponse, error) { startedAt := time.Now() n.lck.RLock() defer n.lck.RUnlock() re, err := regexp.Compile(GetRegexpPattern(pat, opt.IgnoreCase)) if err != nil { return nil, err } var ( g grepper results []*FileMatch filesOpened int filesFound int filesCollected int matchesCollected int ) var fre *regexp.Regexp if opt.FileRegexp != "" { fre, err = regexp.Compile(opt.FileRegexp) if err != nil { return nil, err } } files := n.idx.PostingQuery(index.RegexpQuery(re.Syntax)) for _, file := range files { var matches []*Match name := n.idx.Name(file) hasMatch := false // reject files that do not match the file pattern if fre != nil && fre.MatchString(name, true, true) < 0 { continue } filesOpened++ if err := g.grep2File(filepath.Join(n.Ref.dir, "raw", name), re, int(opt.LinesOfContext), func(line []byte, lineno int, before [][]byte, after [][]byte) (bool, error) { hasMatch = true if filesFound < opt.Offset || (opt.Limit > 0 && filesCollected >= opt.Limit) { return false, nil } matchesCollected++ matches = append(matches, &Match{ Line: string(line), LineNumber: lineno, Before: toStrings(before), After: toStrings(after), }) if matchesCollected > matchLimit { return false, fmt.Errorf("search exceeds limit on matches: %d", matchLimit) } return true, nil }); err != nil { return nil, err } if !hasMatch { continue } filesFound++ if len(matches) > 0 { filesCollected++ results = append(results, &FileMatch{ Filename: name, Matches: matches, }) } } return &SearchResponse{ Matches: results, FilesWithMatch: filesFound, FilesOpened: filesOpened, Duration: time.Now().Sub(startedAt), Revision: n.Ref.Rev, }, nil }