Пример #1
0
func ReplaceAll(src *[]byte, rgx *regexp.Regexp, rpl []byte) {
	// Initialize
	var start, end, delta, offset, i int
	len := len(rpl)

	// Find all matches
	for _, indexes := range rgx.FindAllIndex(*src, -1) {
		// Update indexes
		start = indexes[0] + offset
		end = indexes[1] + offset
		delta = (end - start) - len
		offset -= delta

		// Update src length
		if delta < 0 {
			// Insert
			(*src) = append((*src)[:start], append(make([]byte, -delta), (*src)[start:]...)...)
		} else if delta > 0 {
			// Delete
			(*src) = append((*src)[:start], (*src)[start+delta:]...)
		}

		// Update src content
		for i = 0; i < len; i++ {
			(*src)[i+start] = rpl[i]
		}
	}
}
Пример #2
0
// ParseRegexpAddress parses a regular expression address.
func (f *Samfile) parseRegexpAddress(re *regexp.Regexp, dir int, dot Address) (newdot Address, err error) {
	if dir == 1 {
		// In normal forward searching, find next occurance starting after dot.
		idx := re.FindIndex(f.b[dot.to:])
		if idx != nil {
			newdot.from = idx[0] + dot.to
			newdot.to = idx[1] + dot.to
			return newdot, nil
		}
		// If there is none, restart from the beginning of the file to the dot.
		idx = re.FindIndex(f.b[0:dot.to])
		if idx != nil {
			newdot.from = idx[0]
			newdot.to = idx[1]
			return newdot, nil
		}
		// Still no match: return dot unchanged.
		return dot, nil
	}
	// Backward searching must be implemented with FindAll.
	idxs := re.FindAllIndex(f.b, -1)
	if idxs == nil {
		// No matches, return dot unchanged.
		return dot, nil
	}
	// Look for last match before dot.
	for i := 0; i < len(idxs); i++ {
		if idxs[i][1] > dot.from {
			if i > 0 {
				newdot.from = idxs[i-1][0]
				newdot.to = idxs[i-1][1]
				return newdot, nil
			}
		}
	}
	// No match before the dot, restart from the end.
	for i := len(idxs) - 1; i >= 0; i-- {
		if idxs[i][0] < dot.to {
			if i != len(idxs)-1 {
				newdot.from = idxs[i+1][0]
				newdot.to = idxs[i+1][1]
				return newdot, nil
			}
		}
	}
	return dot, errors.New("implementation error: cannot reverse find")
}
Пример #3
0
Файл: lib.go Проект: gaal/shstat
// splitb is very similar to regexp.Split(s, -1) but returns [][]byte.
func splitb(re *regexp.Regexp, b []byte) [][]byte {
	matches := re.FindAllIndex(b, -1)
	outs := make([][]byte, 0, len(matches))

	var beg, end int
	for _, m := range matches {
		end = m[0]
		if m[1] != 0 {
			outs = append(outs, b[beg:end])
			beg = m[1]
		}
	}
	if end != len(b) {
		outs = append(outs, b[beg:])
	}
	return outs
}
Пример #4
0
// SearchLine runs Regexp.FindAllIndex on the given line, providing the same
// return value.
func (l *LineReader) SearchLine(r *regexp.Regexp, line int64) ([][]int, error) {
	start, end, err := l.findLineRange(line)
	if err != nil {
		return nil, err
	}

	size := end - start + 1
	buf := make([]byte, size)

	_, err = l.src.ReadAt(buf, start)
	// TODO(prattmic): support partial reads
	if err != nil {
		return nil, err
	}

	return r.FindAllIndex(buf, -1), nil
}
Пример #5
0
func markAndPrint(ln int, re *regexp.Regexp, line []byte) {
	locs := re.FindAllIndex(line, -1)
	if len(locs) > 0 {
		fmt.Printf("%4d: ", ln)
	} else {
		fmt.Print("      ")
	}
	p := 0
	for _, loc := range locs {
		if loc[0] > p {
			os.Stdout.Write(line[p:loc[0]])
		}
		ct.ChangeColor(ct.Green, true, ct.None, false)
		os.Stdout.Write(line[loc[0]:loc[1]])
		ct.ResetColor()
		p = loc[1]
	}
	if p < len(line) {
		os.Stdout.Write(line[p:])
	}
	fmt.Println()
}
Пример #6
0
// getMatches gets all matches in the provided data, it is used for normal and condition matches.
//
// data contains the original data.
// testBuffer contains the data to test the regex against (potentially modified, e.g. to support the ignore case option).
// length contains the length of the provided data.
// matches are only valid if they start within the validMatchRange.
func getMatches(regex *regexp.Regexp, data []byte, testBuffer []byte, offset int64, length int, validMatchRange int, conditionID int, target string) Matches {
	var matches Matches
	if allIndex := regex.FindAllIndex(testBuffer, -1); allIndex != nil {
		// for _, index := range allindex {
		for mi := 0; mi < len(allIndex); mi++ {
			index := allIndex[mi]
			start := index[0]
			end := index[1]
			// \s always matches newline, leading to incorrect matches in non-multiline mode
			// analyze match and reject false matches
			if !options.Multiline {
				// remove newlines at the beginning of the match
				for ; start < length && end > start && data[start] == 0x0a; start++ {
				}
				// remove newlines at the end of the match
				for ; end > 0 && end > start && data[end-1] == 0x0a; end-- {
				}
				// check if the corrected match is still valid
				if !regex.Match(testBuffer[start:end]) {
					continue
				}
				// check if the match contains newlines
				if bytes.Contains(data[start:end], []byte{0x0a}) {
					// Rebuild the complete lines to check whether these contain valid matches.
					// In very rare cases, multiple lines may contain a valid match. As multiple
					// matches cannot be processed correctly here, requeue them to be processed again.
					lineStart := start
					lineEnd := end
					for lineStart > 0 && data[lineStart-1] != 0x0a {
						lineStart--
					}
					for lineEnd < length && data[lineEnd] != 0x0a {
						lineEnd++
					}

					lastStart := lineStart
					for pos := lastStart + 1; pos < lineEnd; pos++ {
						if data[pos] == 0x0a || pos == lineEnd-1 {
							if pos == lineEnd-1 && data[pos] != 0x0a {
								pos++
							}
							if idx := regex.FindIndex(testBuffer[lastStart:pos]); idx != nil {
								start = lastStart
								end = pos
								start = lastStart + idx[0]
								end = lastStart + idx[1]
								allIndex = append(allIndex, []int{start, end})
							}
							lastStart = pos + 1
						}
					}
					continue
				}
			}

			lineStart := start
			lineEnd := end
			if options.Multiline && start >= validMatchRange {
				continue
			}
			for lineStart > 0 && data[lineStart-1] != 0x0a {
				lineStart--
			}
			for lineEnd < length && data[lineEnd] != 0x0a {
				lineEnd++
			}

			var contextBefore *string
			var contextAfter *string

			if options.ContextBefore > 0 {
				var contextBeforeStart int
				if lineStart > 0 {
					contextBeforeStart = lineStart - 1
					precedingLinesFound := 0
					for contextBeforeStart > 0 {
						if data[contextBeforeStart-1] == 0x0a {
							precedingLinesFound++
							if precedingLinesFound == options.ContextBefore {
								break
							}
						}
						contextBeforeStart--
					}
					if precedingLinesFound < options.ContextBefore && contextBeforeStart == 0 && offset > 0 {
						contextBefore = getBeforeContextFromFile(target, offset, start)
					} else {
						tmp := string(data[contextBeforeStart : lineStart-1])
						contextBefore = &tmp
					}
				} else {
					if offset > 0 {
						contextBefore = getBeforeContextFromFile(target, offset, start)
					} else {
						contextBefore = nil
					}
				}
			}

			if options.ContextAfter > 0 {
				var contextAfterEnd int
				if lineEnd < length-1 {
					contextAfterEnd = lineEnd
					followingLinesFound := 0
					for contextAfterEnd < length-1 {
						if data[contextAfterEnd+1] == 0x0a {
							followingLinesFound++
							if followingLinesFound == options.ContextAfter {
								contextAfterEnd++
								break
							}
						}
						contextAfterEnd++
					}
					if followingLinesFound < options.ContextAfter && contextAfterEnd == length-1 {
						contextAfter = getAfterContextFromFile(target, offset, end)
					} else {
						tmp := string(data[lineEnd+1 : contextAfterEnd])
						contextAfter = &tmp
					}
				} else {
					contextAfter = getAfterContextFromFile(target, offset, end)
				}
			}

			m := Match{
				conditionID:   conditionID,
				start:         offset + int64(start),
				end:           offset + int64(end),
				lineStart:     offset + int64(lineStart),
				lineEnd:       offset + int64(lineEnd),
				match:         string(data[start:end]),
				line:          string(data[lineStart:lineEnd]),
				contextBefore: contextBefore,
				contextAfter:  contextAfter,
			}

			// handle special case where '^' matches after the last newline
			if int(lineStart) != validMatchRange {
				matches = append(matches, m)
			}
		}
	}
	return matches
}
Пример #7
0
// FindAllIndex returns a sorted list of non-overlapping matches of the
// regular expression r, where a match is a pair of indices specifying
// the matched slice of x.Bytes(). If n < 0, all matches are returned
// in successive order. Otherwise, at most n matches are returned and
// they may not be successive. The result is nil if there are no matches,
// or if n == 0.
//
func (x *Index) FindAllIndex(r *regexp.Regexp, n int) (result [][]int) {
	// a non-empty literal prefix is used to determine possible
	// match start indices with Lookup
	prefix, complete := r.LiteralPrefix()
	lit := []byte(prefix)

	// worst-case scenario: no literal prefix
	if prefix == "" {
		return r.FindAllIndex(x.data, n)
	}

	// if regexp is a literal just use Lookup and convert its
	// result into match pairs
	if complete {
		// Lookup returns indices that may belong to overlapping matches.
		// After eliminating them, we may end up with fewer than n matches.
		// If we don't have enough at the end, redo the search with an
		// increased value n1, but only if Lookup returned all the requested
		// indices in the first place (if it returned fewer than that then
		// there cannot be more).
		for n1 := n; ; n1 += 2 * (n - len(result)) /* overflow ok */ {
			indices := x.Lookup(lit, n1)
			if len(indices) == 0 {
				return
			}
			sort.Ints(indices)
			pairs := make([]int, 2*len(indices))
			result = make([][]int, len(indices))
			count := 0
			prev := 0
			for _, i := range indices {
				if count == n {
					break
				}
				// ignore indices leading to overlapping matches
				if prev <= i {
					j := 2 * count
					pairs[j+0] = i
					pairs[j+1] = i + len(lit)
					result[count] = pairs[j : j+2]
					count++
					prev = i + len(lit)
				}
			}
			result = result[0:count]
			if len(result) >= n || len(indices) != n1 {
				// found all matches or there's no chance to find more
				// (n and n1 can be negative)
				break
			}
		}
		if len(result) == 0 {
			result = nil
		}
		return
	}

	// regexp has a non-empty literal prefix; Lookup(lit) computes
	// the indices of possible complete matches; use these as starting
	// points for anchored searches
	// (regexp "^" matches beginning of input, not beginning of line)
	r = regexp.MustCompile("^" + r.String()) // compiles because r compiled

	// same comment about Lookup applies here as in the loop above
	for n1 := n; ; n1 += 2 * (n - len(result)) /* overflow ok */ {
		indices := x.Lookup(lit, n1)
		if len(indices) == 0 {
			return
		}
		sort.Ints(indices)
		result = result[0:0]
		prev := 0
		for _, i := range indices {
			if len(result) == n {
				break
			}
			m := r.FindIndex(x.data[i:]) // anchored search - will not run off
			// ignore indices leading to overlapping matches
			if m != nil && prev <= i {
				m[0] = i // correct m
				m[1] += i
				result = append(result, m)
				prev = m[1]
			}
		}
		if len(result) >= n || len(indices) != n1 {
			// found all matches or there's no chance to find more
			// (n and n1 can be negative)
			break
		}
	}
	if len(result) == 0 {
		result = nil
	}
	return
}
Пример #8
0
//FindAllIndex returns the inverse of Regexp.FindAllIndex.
func FindAllIndex(r *regexp.Regexp, b []byte, n int) [][]int {
	is := r.FindAllIndex(b, n)
	return Indicies(is, len(b))
}