func testFindAllIndex(t *testing.T, tc *testCase, x *Index, rx *regexp.Regexp, n int) { res := x.FindAllIndex(rx, n) exp := rx.FindAllStringIndex(tc.source, n) // check that the lengths match if len(res) != len(exp) { t.Errorf("test %q, FindAllIndex %q (n = %d): expected %d results; got %d", tc.name, rx, n, len(exp), len(res)) } // if n >= 0 the number of results is limited --- unless n >= all results, // we may obtain different positions from the Index and from regexp (because // Index may not find the results in the same order as regexp) => in general // we cannot simply check that the res and exp lists are equal // check that each result is in fact a correct match and the result is sorted for i, r := range res { if r[0] < 0 || r[0] > r[1] || len(tc.source) < r[1] { t.Errorf("test %q, FindAllIndex %q, result %d (n == %d): illegal match [%d, %d]", tc.name, rx, i, n, r[0], r[1]) } else if !rx.MatchString(tc.source[r[0]:r[1]]) { t.Errorf("test %q, FindAllIndex %q, result %d (n = %d): [%d, %d] not a match", tc.name, rx, i, n, r[0], r[1]) } } if n < 0 { // all results computed - sorted res and exp must be equal for i, r := range res { e := exp[i] if r[0] != e[0] || r[1] != e[1] { t.Errorf("test %q, FindAllIndex %q, result %d: expected match [%d, %d]; got [%d, %d]", tc.name, rx, i, e[0], e[1], r[0], r[1]) } } } }
// adapted from http://codereview.appspot.com/6846048/ // // re_split slices s into substrings separated by the expression and returns a slice of // the substrings between those expression matches. // // The slice returned by this method consists of all the substrings of s // not contained in the slice returned by FindAllString(). When called on an exp ression // that contains no metacharacters, it is equivalent to strings.SplitN(). // Example: // s := regexp.MustCompile("a*").re_split("abaabaccadaaae", 5) // // s: ["", "b", "b", "c", "cadaaae"] // // The count determines the number of substrings to return: // n > 0: at most n substrings; the last substring will be the unsplit remaind er. // n == 0: the result is nil (zero substrings) // n < 0: all substrings func re_split(re *regexp.Regexp, s string, n int) []string { if n == 0 { return nil } if len(s) == 0 { return []string{""} } matches := re.FindAllStringIndex(s, n) strings := make([]string, 0, len(matches)) beg := 0 end := 0 for _, match := range matches { if n > 0 && len(strings) >= n-1 { break } end = match[0] if match[1] != 0 { strings = append(strings, s[beg:end]) } beg = match[1] } if end != len(s) { strings = append(strings, s[beg:]) } return strings }
// genericSplit provides a generic version of Split and SplitAfter. // Set the includeSep bool to true to have it include the separtor. func genericSplit(re *regexp.Regexp, s string, numFields int, includeSep bool) []string { if numFields == 0 { return make([]string, 0) } // Using regexp, including the separator is really easy. Instead of // including up to the start of the separator we include to the end. // The start of the separator is stored in index 0. // The end of the separator is stored in index 1. var includeTo int if includeSep { includeTo = 1 } else { includeTo = 0 } count := re.FindAllStringIndex(s, numFields-1) n := len(count) + 1 stor := make([]string, n) if n == 1 { stor[0] = s return stor } stor[0] = s[:count[0][includeTo]] for i := 1; i < n-1; i++ { stor[i] = s[count[i-1][1]:count[i][includeTo]] } stor[n-1] = s[count[n-2][1]:] return stor }
func RegSplit(text string, reg *regexp.Regexp) []string { indexes := reg.FindAllStringIndex(text, -1) laststart := 0 result := make([]string, len(indexes)+1) for i, element := range indexes { result[i] = text[laststart:element[0]] laststart = element[1] } result[len(indexes)] = text[laststart:] return result }
// Highlight colorifies all occurences of pattern inside of needle // and returns a string. func highlight(pattern *regexp.Regexp, needle string) string { matches := pattern.FindAllStringIndex(needle, -1) out := "" prev := 0 printer := color.New(color.FgRed).SprintFunc() for _, locs := range matches { txt := needle[locs[0]:locs[1]] out += needle[prev:locs[0]] out += printer(txt) prev = locs[1] } out += needle[prev:] return out }
// regexCustom takes in a string of text and a regular expression statement // returning a MatchResult object. It is the barebones implementation of what // processors do. func regexCustom(s string, re *regexp.Regexp) *MatchResult { var res []*Match idxs := re.FindAllStringIndex(s, -1) if len(idxs) == 0 { return &MatchResult{Text: s, Matches: res} } for _, m := range idxs { res = append(res, &Match{Match: s[m[0]:m[1]], Indices: m}) } return &MatchResult{Text: s, Matches: res} }
func fullMatchString(re *regexp.Regexp, s string) []string { var rs = re.FindAllStringIndex(s, -1) var cur int for _, r := range rs { if notWhiteSpace(s, cur, r[0]) { return nil } if cur > 0 && cur == r[0] { return nil } cur = r[1] } if notWhiteSpace(s, cur, len(s)) { return nil } return re.FindAllString(s, -1) }
// This helper func is missing in Go 1.0 (String type) func splitWithRegexp(s string, re *regexp.Regexp) []string { if len(re.String()) > 0 && len(s) == 0 { return []string{""} } matches := re.FindAllStringIndex(s, -1) strings := make([]string, 0, len(matches)) beg := 0 end := 0 for _, match := range matches { end = match[0] if match[1] != 0 { strings = append(strings, s[beg:end]) } beg = match[1] } if end != len(s) { strings = append(strings, s[beg:]) } return strings }
func replaceParameterNameInSubstring(s, old, new string, buf *bytes.Buffer, paramRegExp *regexp.Regexp) { matchIndexPairs := paramRegExp.FindAllStringIndex(s, -1) prevMatchEnd := 1 for _, pair := range matchIndexPairs { matchStart := pair[0] matchEnd := pair[1] buf.WriteString(s[prevMatchEnd-1 : matchStart+1]) buf.WriteString(new) prevMatchEnd = matchEnd } if prevMatchEnd > 1 { buf.WriteString(s[prevMatchEnd-1:]) return } buf.WriteString(s) }
/* RegexpSplit split slices s into substrings separated by the expression and returns a slice of the substrings between those expression matches. If capturing parentheses are used in expression, then the text of all groups in the expression are also returned as part of the resulting slice. This function acts consistent with Python's re.split function. */ func RegexpSplit(re *regexp.Regexp, s string, n int) []string { if n == 0 { return nil } if len(re.String()) > 0 && len(s) == 0 { return []string{""} } var matches [][]int if len(re.SubexpNames()) > 1 { matches = re.FindAllStringSubmatchIndex(s, n) } else { matches = re.FindAllStringIndex(s, n) } strings := make([]string, 0, len(matches)) beg := 0 end := 0 for _, match := range matches { if n > 0 && len(strings) >= n-1 { break } end = match[0] if match[1] != 0 { strings = append(strings, s[beg:end]) } beg = match[1] if len(re.SubexpNames()) > 1 { strings = append(strings, s[match[0]:match[1]]) } } if end != len(s) { strings = append(strings, s[beg:]) } return strings }
func RegexpSplit(r *regexp.Regexp, sentence string) []string { result := make([]string, 0) locs := r.FindAllStringIndex(sentence, -1) lastLoc := 0 if len(locs) == 0 { return []string{sentence} } for _, loc := range locs { if loc[0] == lastLoc { result = append(result, sentence[loc[0]:loc[1]]) } else { result = append(result, sentence[lastLoc:loc[0]]) result = append(result, sentence[loc[0]:loc[1]]) } lastLoc = loc[1] } if lastLoc < len(sentence) { result = append(result, sentence[lastLoc:]) } return result }
func hiliteMatches(c *ansi.Colorer, p *regexp.Regexp, line string) string { // find the indexes for all matches idxs := p.FindAllStringIndex(line, -1) var buf bytes.Buffer beg := 0 for _, idx := range idxs { // for each match add the contents before the match ... buf.WriteString(line[beg:idx[0]]) // and the highlighted version of the match buf.WriteString(c.FgBg(line[idx[0]:idx[1]], ansi.Black, ansi.Bold, ansi.Yellow, ansi.Intense)) beg = idx[1] } buf.WriteString(line[beg:]) return buf.String() }
// Tokenize works just like strings.Split() except the resulting array includes // the delimiters. For example, the "<green>Hello, <red>world!</>" string when // tokenized by tags produces the following: // // [0] "<green>" // [1] "Hello, " // [2] "<red>" // [3] "world!" // [4] "</>" // func Tokenize(str string, regex *regexp.Regexp) []string { matches := regex.FindAllStringIndex(str, -1) strings := make([]string, 0, len(matches)) head, tail := 0, 0 for _, match := range matches { tail = match[0] if match[1] != 0 { if head != 0 || tail != 0 { // Apend the text between tags. strings = append(strings, str[head:tail]) } // Append the tag itmarkup. strings = append(strings, str[match[0]:match[1]]) } head = match[1] } if head != len(str) && tail != len(str) { strings = append(strings, str[head:]) } return strings }
//FindAllStringIndex returns the inverse of Regexp.FindAllStringIndex. func FindAllStringIndex(r *regexp.Regexp, s string, n int) [][]int { is := r.FindAllStringIndex(s, n) return Indicies(is, len(s)) }