예제 #1
0
파일: bigwig.go 프로젝트: csurface/bigwig
func buildLineFromText(linePattern *regexp.Regexp, lineno int, offset int64, lineLength int64, text string) *Line {
	var line *Line = nil
	match := linePattern.FindAllStringSubmatchIndex(text, -1)
	if len(match) > 0 && len(match[0]) > 1 {
		line = &Line{LineData{lineno, offset, lineLength}, text[match[0][0]:match[0][1]], text}
	}
	return line
}
예제 #2
0
파일: byline.go 프로젝트: bcampbell/arts
// splitInclusive splits on a regexp, but the separators are included within the output strings
func splitInclusive(txt string, sep *regexp.Regexp) []string {
	matches := sep.FindAllStringSubmatchIndex(txt, -1)
	used := 0
	parts := make([]string, 0, len(matches)+1)
	for _, m := range matches {
		if used < m[0] {
			parts = append(parts, txt[used:m[0]])
		}
		used = m[0]
	}

	if used < len(txt) {
		parts = append(parts, txt[used:])
	}
	return parts
}
예제 #3
0
// Apply the regular expression and return the list of all sub-matches
// and a list of the positions. The positions are unique, and calculated
// doing an average of the positions of all sub-matches.
func (r *Response) ReList(re *regexp.Regexp) ([][]string, []int) {
	matchs := re.FindAllStringSubmatch(r.Body, -1)
	pos := re.FindAllStringSubmatchIndex(r.Body, -1)

	// Merge positions into a single value (the start one)
	newpos := make([]int, len(pos))
	for i, p := range pos {
		sum := 0
		items := 0
		for _, n := range p {
			sum += n
			items++
		}
		newpos[i] = sum / items
	}

	return matchs, newpos
}
예제 #4
0
// splitAllByRegexp split a string by regexp, and return two string slices:
// parts outside of regexp matches, and $1 for matches.
func SplitAllByRegexp(s string, rx *regexp.Regexp) ([]string, []string) {
	indexes := rx.FindAllStringSubmatchIndex(s, -1)
	if indexes == nil {
		return []string{s}, nil
	}
	var parts, matches []string
	for i, ind := range indexes {
		if i == 0 {
			parts = append(parts, s[:ind[0]])
		} else {
			parts = append(parts, s[indexes[i-1][1]:ind[0]])
		}
		if len(ind) >= 4 {
			matches = append(matches, s[ind[2]:ind[3]])
		}
		if i == len(indexes)-1 {
			parts = append(parts, s[ind[1]:])
		}
	}
	return parts, matches
}
예제 #5
0
파일: util.go 프로젝트: 9466/jiebago
/*
RegexpSplit split slices s into substrings separated by the expression and
returns a slice of the substrings between those expression matches.
If capturing parentheses are used in expression, then the text of all groups
in the expression are also returned as part of the resulting slice.

This function acts consistent with Python's re.split function.
*/
func RegexpSplit(re *regexp.Regexp, s string, n int) []string {
	if n == 0 {
		return nil
	}

	if len(re.String()) > 0 && len(s) == 0 {
		return []string{""}
	}

	var matches [][]int
	if len(re.SubexpNames()) > 1 {
		matches = re.FindAllStringSubmatchIndex(s, n)
	} else {
		matches = re.FindAllStringIndex(s, n)
	}
	strings := make([]string, 0, len(matches))

	beg := 0
	end := 0
	for _, match := range matches {
		if n > 0 && len(strings) >= n-1 {
			break
		}

		end = match[0]
		if match[1] != 0 {
			strings = append(strings, s[beg:end])
		}
		beg = match[1]
		if len(re.SubexpNames()) > 1 {
			strings = append(strings, s[match[0]:match[1]])
		}
	}

	if end != len(s) {
		strings = append(strings, s[beg:])
	}

	return strings
}
예제 #6
0
func (p NumericParser) parse(s string) (*Numeric, error) {
	var (
		n        *Numeric
		err      error
		sign     string
		reStr    string
		re       *regexp.Regexp
		parseErr = errors.New(ParseNumericError)
	)

	// Record whether the input string has a currency symbol.
	// If so, it can only be a monetary value.
	hasCurrency := p.currencyRegex.MatchString(s)
	if hasCurrency {
		s = p.removeCurrencySymbol(s)
	}

	// Now determine whether the string's initial character is a + or -.
	// If so, strip it away and record the sign.
	sign = ""
	re = regexp.MustCompile("^[\\+-]")
	if re.MatchString(s) {
		if re.FindString(s) == "-" {
			sign = "-"
		}
		s = s[1:]
	}

	// Since currency and sign symbols have been stripped, we now check that the
	// expression begins with a decimal separator (possibly) and digit.
	// Valid strings thus look like either: .x* or x*.
	reStr = "^" + p.decimalReStr + "?" + "[0-9]"
	re = regexp.MustCompile(reStr)
	if !re.MatchString(s) {
		return nil, parseErr
	}

	// Prepend a 0 if the string begins with a decimal separator.
	reStr = "^" + p.decimalReStr
	re = regexp.MustCompile(reStr)
	if re.MatchString(s) {
		s = "0" + s
	}

	// If the input ends with the decimal separator, remove it.
	re = regexp.MustCompile(p.decimalReStr + "$")
	if re.MatchString(s) {
		s = re.ReplaceAllString(s, "")
	}

	// Create the main validating regex.
	reStr = "^\\d+" + "(" + p.digitReStr + "\\d{3})*" + p.decimalReStr + "?\\d*$"
	re = regexp.MustCompile(reStr)
	if !re.MatchString(s) {
		return nil, parseErr
	}

	// We can now assume that the string is valid except for
	// intermediate delimiters.
	// Before attempting to parse the string further, we (possibly) perform
	// some basic sanitization.
	var parsed string
	tmp, err := p.sanitize(s)
	if err == nil {
		parsed = tmp
	} else {
		// Probably the parser cannot distinguish between decimal and digit
		// separators.  So we handle this case separately.
		re = regexp.MustCompile(p.digitReStr + "|" + p.decimalReStr)
		locs := re.FindAllStringSubmatchIndex(s, -1)
		switch len(locs) {
		case 0: // The number is an integer.  No additional parsing needed.
			parsed = s
			err = nil
		case 1: // Need to deal with 1,234 vs 123,456 vs 12.345, etc.
			parsed, err = p.parseOneUnknownSeparator(s, locs[0][0])
		default: // Try to find the last separator and determine its type.
			parsed, err = p.parseManyUnknownSeparators(s, locs)
		}

	}

	parsed = sign + parsed
	f, ferr := strconv.ParseFloat(parsed, 64)
	if err != nil || ferr != nil {
		return nil, err
	}

	// We now know that the parsed string correctly parses as a float.
	n = &Numeric{
		isFloat: true,
		f:       f,
	}
	if hasCurrency {
		n.isMoney = true
	}
	_, err = strconv.Atoi(parsed)
	if err == nil {
		n.isInt = true
	}

	return n, nil
}