func ReplaceAll(src *[]byte, rgx *regexp.Regexp, rpl []byte) { // Initialize var start, end, delta, offset, i int len := len(rpl) // Find all matches for _, indexes := range rgx.FindAllIndex(*src, -1) { // Update indexes start = indexes[0] + offset end = indexes[1] + offset delta = (end - start) - len offset -= delta // Update src length if delta < 0 { // Insert (*src) = append((*src)[:start], append(make([]byte, -delta), (*src)[start:]...)...) } else if delta > 0 { // Delete (*src) = append((*src)[:start], (*src)[start+delta:]...) } // Update src content for i = 0; i < len; i++ { (*src)[i+start] = rpl[i] } } }
// ParseRegexpAddress parses a regular expression address. func (f *Samfile) parseRegexpAddress(re *regexp.Regexp, dir int, dot Address) (newdot Address, err error) { if dir == 1 { // In normal forward searching, find next occurance starting after dot. idx := re.FindIndex(f.b[dot.to:]) if idx != nil { newdot.from = idx[0] + dot.to newdot.to = idx[1] + dot.to return newdot, nil } // If there is none, restart from the beginning of the file to the dot. idx = re.FindIndex(f.b[0:dot.to]) if idx != nil { newdot.from = idx[0] newdot.to = idx[1] return newdot, nil } // Still no match: return dot unchanged. return dot, nil } // Backward searching must be implemented with FindAll. idxs := re.FindAllIndex(f.b, -1) if idxs == nil { // No matches, return dot unchanged. return dot, nil } // Look for last match before dot. for i := 0; i < len(idxs); i++ { if idxs[i][1] > dot.from { if i > 0 { newdot.from = idxs[i-1][0] newdot.to = idxs[i-1][1] return newdot, nil } } } // No match before the dot, restart from the end. for i := len(idxs) - 1; i >= 0; i-- { if idxs[i][0] < dot.to { if i != len(idxs)-1 { newdot.from = idxs[i+1][0] newdot.to = idxs[i+1][1] return newdot, nil } } } return dot, errors.New("implementation error: cannot reverse find") }
// splitb is very similar to regexp.Split(s, -1) but returns [][]byte. func splitb(re *regexp.Regexp, b []byte) [][]byte { matches := re.FindAllIndex(b, -1) outs := make([][]byte, 0, len(matches)) var beg, end int for _, m := range matches { end = m[0] if m[1] != 0 { outs = append(outs, b[beg:end]) beg = m[1] } } if end != len(b) { outs = append(outs, b[beg:]) } return outs }
// SearchLine runs Regexp.FindAllIndex on the given line, providing the same // return value. func (l *LineReader) SearchLine(r *regexp.Regexp, line int64) ([][]int, error) { start, end, err := l.findLineRange(line) if err != nil { return nil, err } size := end - start + 1 buf := make([]byte, size) _, err = l.src.ReadAt(buf, start) // TODO(prattmic): support partial reads if err != nil { return nil, err } return r.FindAllIndex(buf, -1), nil }
func markAndPrint(ln int, re *regexp.Regexp, line []byte) { locs := re.FindAllIndex(line, -1) if len(locs) > 0 { fmt.Printf("%4d: ", ln) } else { fmt.Print(" ") } p := 0 for _, loc := range locs { if loc[0] > p { os.Stdout.Write(line[p:loc[0]]) } ct.ChangeColor(ct.Green, true, ct.None, false) os.Stdout.Write(line[loc[0]:loc[1]]) ct.ResetColor() p = loc[1] } if p < len(line) { os.Stdout.Write(line[p:]) } fmt.Println() }
// getMatches gets all matches in the provided data, it is used for normal and condition matches. // // data contains the original data. // testBuffer contains the data to test the regex against (potentially modified, e.g. to support the ignore case option). // length contains the length of the provided data. // matches are only valid if they start within the validMatchRange. func getMatches(regex *regexp.Regexp, data []byte, testBuffer []byte, offset int64, length int, validMatchRange int, conditionID int, target string) Matches { var matches Matches if allIndex := regex.FindAllIndex(testBuffer, -1); allIndex != nil { // for _, index := range allindex { for mi := 0; mi < len(allIndex); mi++ { index := allIndex[mi] start := index[0] end := index[1] // \s always matches newline, leading to incorrect matches in non-multiline mode // analyze match and reject false matches if !options.Multiline { // remove newlines at the beginning of the match for ; start < length && end > start && data[start] == 0x0a; start++ { } // remove newlines at the end of the match for ; end > 0 && end > start && data[end-1] == 0x0a; end-- { } // check if the corrected match is still valid if !regex.Match(testBuffer[start:end]) { continue } // check if the match contains newlines if bytes.Contains(data[start:end], []byte{0x0a}) { // Rebuild the complete lines to check whether these contain valid matches. // In very rare cases, multiple lines may contain a valid match. As multiple // matches cannot be processed correctly here, requeue them to be processed again. lineStart := start lineEnd := end for lineStart > 0 && data[lineStart-1] != 0x0a { lineStart-- } for lineEnd < length && data[lineEnd] != 0x0a { lineEnd++ } lastStart := lineStart for pos := lastStart + 1; pos < lineEnd; pos++ { if data[pos] == 0x0a || pos == lineEnd-1 { if pos == lineEnd-1 && data[pos] != 0x0a { pos++ } if idx := regex.FindIndex(testBuffer[lastStart:pos]); idx != nil { start = lastStart end = pos start = lastStart + idx[0] end = lastStart + idx[1] allIndex = append(allIndex, []int{start, end}) } lastStart = pos + 1 } } continue } } lineStart := start lineEnd := end if options.Multiline && start >= validMatchRange { continue } for lineStart > 0 && data[lineStart-1] != 0x0a { lineStart-- } for lineEnd < length && data[lineEnd] != 0x0a { lineEnd++ } var contextBefore *string var contextAfter *string if options.ContextBefore > 0 { var contextBeforeStart int if lineStart > 0 { contextBeforeStart = lineStart - 1 precedingLinesFound := 0 for contextBeforeStart > 0 { if data[contextBeforeStart-1] == 0x0a { precedingLinesFound++ if precedingLinesFound == options.ContextBefore { break } } contextBeforeStart-- } if precedingLinesFound < options.ContextBefore && contextBeforeStart == 0 && offset > 0 { contextBefore = getBeforeContextFromFile(target, offset, start) } else { tmp := string(data[contextBeforeStart : lineStart-1]) contextBefore = &tmp } } else { if offset > 0 { contextBefore = getBeforeContextFromFile(target, offset, start) } else { contextBefore = nil } } } if options.ContextAfter > 0 { var contextAfterEnd int if lineEnd < length-1 { contextAfterEnd = lineEnd followingLinesFound := 0 for contextAfterEnd < length-1 { if data[contextAfterEnd+1] == 0x0a { followingLinesFound++ if followingLinesFound == options.ContextAfter { contextAfterEnd++ break } } contextAfterEnd++ } if followingLinesFound < options.ContextAfter && contextAfterEnd == length-1 { contextAfter = getAfterContextFromFile(target, offset, end) } else { tmp := string(data[lineEnd+1 : contextAfterEnd]) contextAfter = &tmp } } else { contextAfter = getAfterContextFromFile(target, offset, end) } } m := Match{ conditionID: conditionID, start: offset + int64(start), end: offset + int64(end), lineStart: offset + int64(lineStart), lineEnd: offset + int64(lineEnd), match: string(data[start:end]), line: string(data[lineStart:lineEnd]), contextBefore: contextBefore, contextAfter: contextAfter, } // handle special case where '^' matches after the last newline if int(lineStart) != validMatchRange { matches = append(matches, m) } } } return matches }
// FindAllIndex returns a sorted list of non-overlapping matches of the // regular expression r, where a match is a pair of indices specifying // the matched slice of x.Bytes(). If n < 0, all matches are returned // in successive order. Otherwise, at most n matches are returned and // they may not be successive. The result is nil if there are no matches, // or if n == 0. // func (x *Index) FindAllIndex(r *regexp.Regexp, n int) (result [][]int) { // a non-empty literal prefix is used to determine possible // match start indices with Lookup prefix, complete := r.LiteralPrefix() lit := []byte(prefix) // worst-case scenario: no literal prefix if prefix == "" { return r.FindAllIndex(x.data, n) } // if regexp is a literal just use Lookup and convert its // result into match pairs if complete { // Lookup returns indices that may belong to overlapping matches. // After eliminating them, we may end up with fewer than n matches. // If we don't have enough at the end, redo the search with an // increased value n1, but only if Lookup returned all the requested // indices in the first place (if it returned fewer than that then // there cannot be more). for n1 := n; ; n1 += 2 * (n - len(result)) /* overflow ok */ { indices := x.Lookup(lit, n1) if len(indices) == 0 { return } sort.Ints(indices) pairs := make([]int, 2*len(indices)) result = make([][]int, len(indices)) count := 0 prev := 0 for _, i := range indices { if count == n { break } // ignore indices leading to overlapping matches if prev <= i { j := 2 * count pairs[j+0] = i pairs[j+1] = i + len(lit) result[count] = pairs[j : j+2] count++ prev = i + len(lit) } } result = result[0:count] if len(result) >= n || len(indices) != n1 { // found all matches or there's no chance to find more // (n and n1 can be negative) break } } if len(result) == 0 { result = nil } return } // regexp has a non-empty literal prefix; Lookup(lit) computes // the indices of possible complete matches; use these as starting // points for anchored searches // (regexp "^" matches beginning of input, not beginning of line) r = regexp.MustCompile("^" + r.String()) // compiles because r compiled // same comment about Lookup applies here as in the loop above for n1 := n; ; n1 += 2 * (n - len(result)) /* overflow ok */ { indices := x.Lookup(lit, n1) if len(indices) == 0 { return } sort.Ints(indices) result = result[0:0] prev := 0 for _, i := range indices { if len(result) == n { break } m := r.FindIndex(x.data[i:]) // anchored search - will not run off // ignore indices leading to overlapping matches if m != nil && prev <= i { m[0] = i // correct m m[1] += i result = append(result, m) prev = m[1] } } if len(result) >= n || len(indices) != n1 { // found all matches or there's no chance to find more // (n and n1 can be negative) break } } if len(result) == 0 { result = nil } return }
//FindAllIndex returns the inverse of Regexp.FindAllIndex. func FindAllIndex(r *regexp.Regexp, b []byte, n int) [][]int { is := r.FindAllIndex(b, n) return Indicies(is, len(b)) }