func buildLineFromText(linePattern *regexp.Regexp, lineno int, offset int64, lineLength int64, text string) *Line { var line *Line = nil match := linePattern.FindAllStringSubmatchIndex(text, -1) if len(match) > 0 && len(match[0]) > 1 { line = &Line{LineData{lineno, offset, lineLength}, text[match[0][0]:match[0][1]], text} } return line }
// splitInclusive splits on a regexp, but the separators are included within the output strings func splitInclusive(txt string, sep *regexp.Regexp) []string { matches := sep.FindAllStringSubmatchIndex(txt, -1) used := 0 parts := make([]string, 0, len(matches)+1) for _, m := range matches { if used < m[0] { parts = append(parts, txt[used:m[0]]) } used = m[0] } if used < len(txt) { parts = append(parts, txt[used:]) } return parts }
// Apply the regular expression and return the list of all sub-matches // and a list of the positions. The positions are unique, and calculated // doing an average of the positions of all sub-matches. func (r *Response) ReList(re *regexp.Regexp) ([][]string, []int) { matchs := re.FindAllStringSubmatch(r.Body, -1) pos := re.FindAllStringSubmatchIndex(r.Body, -1) // Merge positions into a single value (the start one) newpos := make([]int, len(pos)) for i, p := range pos { sum := 0 items := 0 for _, n := range p { sum += n items++ } newpos[i] = sum / items } return matchs, newpos }
// splitAllByRegexp split a string by regexp, and return two string slices: // parts outside of regexp matches, and $1 for matches. func SplitAllByRegexp(s string, rx *regexp.Regexp) ([]string, []string) { indexes := rx.FindAllStringSubmatchIndex(s, -1) if indexes == nil { return []string{s}, nil } var parts, matches []string for i, ind := range indexes { if i == 0 { parts = append(parts, s[:ind[0]]) } else { parts = append(parts, s[indexes[i-1][1]:ind[0]]) } if len(ind) >= 4 { matches = append(matches, s[ind[2]:ind[3]]) } if i == len(indexes)-1 { parts = append(parts, s[ind[1]:]) } } return parts, matches }
/* RegexpSplit split slices s into substrings separated by the expression and returns a slice of the substrings between those expression matches. If capturing parentheses are used in expression, then the text of all groups in the expression are also returned as part of the resulting slice. This function acts consistent with Python's re.split function. */ func RegexpSplit(re *regexp.Regexp, s string, n int) []string { if n == 0 { return nil } if len(re.String()) > 0 && len(s) == 0 { return []string{""} } var matches [][]int if len(re.SubexpNames()) > 1 { matches = re.FindAllStringSubmatchIndex(s, n) } else { matches = re.FindAllStringIndex(s, n) } strings := make([]string, 0, len(matches)) beg := 0 end := 0 for _, match := range matches { if n > 0 && len(strings) >= n-1 { break } end = match[0] if match[1] != 0 { strings = append(strings, s[beg:end]) } beg = match[1] if len(re.SubexpNames()) > 1 { strings = append(strings, s[match[0]:match[1]]) } } if end != len(s) { strings = append(strings, s[beg:]) } return strings }
func (p NumericParser) parse(s string) (*Numeric, error) { var ( n *Numeric err error sign string reStr string re *regexp.Regexp parseErr = errors.New(ParseNumericError) ) // Record whether the input string has a currency symbol. // If so, it can only be a monetary value. hasCurrency := p.currencyRegex.MatchString(s) if hasCurrency { s = p.removeCurrencySymbol(s) } // Now determine whether the string's initial character is a + or -. // If so, strip it away and record the sign. sign = "" re = regexp.MustCompile("^[\\+-]") if re.MatchString(s) { if re.FindString(s) == "-" { sign = "-" } s = s[1:] } // Since currency and sign symbols have been stripped, we now check that the // expression begins with a decimal separator (possibly) and digit. // Valid strings thus look like either: .x* or x*. reStr = "^" + p.decimalReStr + "?" + "[0-9]" re = regexp.MustCompile(reStr) if !re.MatchString(s) { return nil, parseErr } // Prepend a 0 if the string begins with a decimal separator. reStr = "^" + p.decimalReStr re = regexp.MustCompile(reStr) if re.MatchString(s) { s = "0" + s } // If the input ends with the decimal separator, remove it. re = regexp.MustCompile(p.decimalReStr + "$") if re.MatchString(s) { s = re.ReplaceAllString(s, "") } // Create the main validating regex. reStr = "^\\d+" + "(" + p.digitReStr + "\\d{3})*" + p.decimalReStr + "?\\d*$" re = regexp.MustCompile(reStr) if !re.MatchString(s) { return nil, parseErr } // We can now assume that the string is valid except for // intermediate delimiters. // Before attempting to parse the string further, we (possibly) perform // some basic sanitization. var parsed string tmp, err := p.sanitize(s) if err == nil { parsed = tmp } else { // Probably the parser cannot distinguish between decimal and digit // separators. So we handle this case separately. re = regexp.MustCompile(p.digitReStr + "|" + p.decimalReStr) locs := re.FindAllStringSubmatchIndex(s, -1) switch len(locs) { case 0: // The number is an integer. No additional parsing needed. parsed = s err = nil case 1: // Need to deal with 1,234 vs 123,456 vs 12.345, etc. parsed, err = p.parseOneUnknownSeparator(s, locs[0][0]) default: // Try to find the last separator and determine its type. parsed, err = p.parseManyUnknownSeparators(s, locs) } } parsed = sign + parsed f, ferr := strconv.ParseFloat(parsed, 64) if err != nil || ferr != nil { return nil, err } // We now know that the parsed string correctly parses as a float. n = &Numeric{ isFloat: true, f: f, } if hasCurrency { n.isMoney = true } _, err = strconv.Atoi(parsed) if err == nil { n.isInt = true } return n, nil }