Пример #1
0
// Tokenize tokenizes the given string with the delimiter
func Tokenize(text util.Chars, delimiter Delimiter) []Token {
	if delimiter.str == nil && delimiter.regex == nil {
		// AWK-style (\S+\s*)
		tokens, prefixLength := awkTokenizer(text)
		return withPrefixLengths(tokens, prefixLength)
	}

	if delimiter.str != nil {
		return withPrefixLengths(text.Split(*delimiter.str), 0)
	}

	// FIXME performance
	var tokens []string
	if delimiter.regex != nil {
		str := text.ToString()
		for len(str) > 0 {
			loc := delimiter.regex.FindStringIndex(str)
			if loc == nil {
				loc = []int{0, len(str)}
			}
			last := util.Max(loc[1], 1)
			tokens = append(tokens, str[:last])
			str = str[last:]
		}
	}
	asRunes := make([]util.Chars, len(tokens))
	for i, token := range tokens {
		asRunes[i] = util.RunesToChars([]rune(token))
	}
	return withPrefixLengths(asRunes, 0)
}