// NextEntry takes a buffer of []byte a Regular expression for splitting the plain text entries // and returns the next entry and a remainder buffer both of type []byte func NextEntry(buf []byte, re *regexp.Regexp) ([]byte, []byte) { loc := re.FindIndex(buf) if loc == nil { return buf, nil } return buf[0:loc[0]], buf[loc[1]:] }
// strip text between given markers func stripLiteral(wikitext []byte, start *regexp.Regexp, end *regexp.Regexp) (out []byte) { var loc []int out = make([]byte, 0, len(wikitext)) top: loc = start.FindIndex(wikitext) if loc != nil { // match? goto strip } out = append(out, wikitext...) // add what's left return strip: out = append(out, wikitext[:loc[0]]...) wikitext = wikitext[loc[1]:] loc = end.FindIndex(wikitext) if loc != nil { // match? goto endstrip } return // assume end at EOF if no match endstrip: wikitext = wikitext[loc[1]:] goto top panic("unreachable") // please the compiler }
// NewResult builds a result from a slice of grep.Match. func MakeResult(path string, re *regexp.Regexp, grepMatches []grep.Match) Result { var matches []Match for _, m := range grepMatches { start := m.LineNum - len(m.ContextBefore) snippetBefore := string(bytes.Join(m.ContextBefore, []byte{'\n'})) if len(m.ContextBefore) > 0 { snippetBefore += "\n" } // Find the exact match on the matching line. i := re.FindIndex(m.FullLine) snippetBefore += string(m.FullLine[:i[0]]) snippetMatch := string(m.FullLine[i[0]:i[1]]) snippetAfter := string(m.FullLine[i[1]:]) if len(m.ContextAfter) > 0 { snippetAfter += "\n" + string(bytes.Join(m.ContextAfter, []byte{'\n'})) } matches = append(matches, Match{ Start: start, SnippetBefore: snippetBefore, SnippetMatch: snippetMatch, SnippetAfter: snippetAfter, }) } return Result{ Path: path, Matches: matches, } }
func try(re *regexp.Regexp, s []byte) int { is := re.FindIndex(s) if is == nil { return 0 } if is[0] != 0 { return 0 } return is[1] }
// ParseRegexpAddress parses a regular expression address. func (f *Samfile) parseRegexpAddress(re *regexp.Regexp, dir int, dot Address) (newdot Address, err error) { if dir == 1 { // In normal forward searching, find next occurance starting after dot. idx := re.FindIndex(f.b[dot.to:]) if idx != nil { newdot.from = idx[0] + dot.to newdot.to = idx[1] + dot.to return newdot, nil } // If there is none, restart from the beginning of the file to the dot. idx = re.FindIndex(f.b[0:dot.to]) if idx != nil { newdot.from = idx[0] newdot.to = idx[1] return newdot, nil } // Still no match: return dot unchanged. return dot, nil } // Backward searching must be implemented with FindAll. idxs := re.FindAllIndex(f.b, -1) if idxs == nil { // No matches, return dot unchanged. return dot, nil } // Look for last match before dot. for i := 0; i < len(idxs); i++ { if idxs[i][1] > dot.from { if i > 0 { newdot.from = idxs[i-1][0] newdot.to = idxs[i-1][1] return newdot, nil } } } // No match before the dot, restart from the end. for i := len(idxs) - 1; i >= 0; i-- { if idxs[i][0] < dot.to { if i != len(idxs)-1 { newdot.from = idxs[i+1][0] newdot.to = idxs[i+1][1] return newdot, nil } } } return dot, errors.New("implementation error: cannot reverse find") }
func (b *Buffer) didSay(re *regexp.Regexp) (bool, []byte) { b.lock.Lock() defer b.lock.Unlock() unreadBytes := b.contents[b.readCursor:] copyOfUnreadBytes := make([]byte, len(unreadBytes)) copy(copyOfUnreadBytes, unreadBytes) loc := re.FindIndex(unreadBytes) if loc != nil { b.readCursor += uint64(loc[1]) return true, copyOfUnreadBytes } else { return false, copyOfUnreadBytes } }
func (e *Expector) matchOutput(stop chan bool, pattern *regexp.Regexp) bool { for { found := pattern.FindIndex(e.nextOutput()) if found != nil { e.forwardOutput(found[1]) return true } if e.isClosed() { return false } select { case <-time.After(100 * time.Millisecond): case <-stop: return false } } }
// makeREFieldSplitter returns a splitter that returns the next field by // splitting on a regular expression. func (s *Script) makeREFieldSplitter() func([]byte, bool) (int, []byte, error) { // Ensure that the regular expression is valid. var sepRegexp *regexp.Regexp var err error if s.rs == "" { // A special case in AWK is that if the record terminator is // empty (implying a blank line) then newlines are accepted as // a field separator in addition to whatever is specified for // FS. sepRegexp, err = s.compileRegexp(`(` + s.fs + `)|(\r?\n)`) } else { sepRegexp, err = s.compileRegexp(s.fs) } if err != nil { return func(data []byte, atEOF bool) (int, []byte, error) { return 0, nil, err } } // The regular expression is valid. Return a splitter customized to // that regular expression. returnedFinalToken := false // true=already returned a final, non-terminated token; false=didn't return func(data []byte, atEOF bool) (advance int, token []byte, err error) { // If we match the regular expression, return everything up to // the match. loc := sepRegexp.FindIndex(data) if loc != nil { return loc[1], data[:loc[0]], nil } // We didn't see a separator. If we're at EOF, we have a // final, non-terminated token. Return it (unless we already // did). if atEOF && !returnedFinalToken { returnedFinalToken = true return len(data), data, nil } // Request more data. return 0, nil, nil } }
func extractMethodInfo(methodName, className string, methodSignatureRegexp *regexp.Regexp, implBytes []byte, implBytesOffset int) (methodInfo MethodInfo) { matchedMethod := methodSignatureRegexp.FindIndex(implBytes) if matchedMethod == nil { // There is no previous method, the position for the new one will be just before @end matchedEnd := endRegexp.FindIndex(implBytes) methodInfo.PosStart, methodInfo.PosEnd = matchedEnd[0], matchedEnd[0] } else { methodInfo.PosStart = matchedMethod[0] bodyStart := matchedMethod[1] relativeBodyEnd := relativeEndOfMethodBody(implBytes[bodyStart:]) methodInfo.PosEnd = bodyStart + relativeBodyEnd } methodInfo.Name = methodName methodInfo.PosStart += implBytesOffset methodInfo.PosEnd += implBytesOffset return }
// FindFindRegexpMatch finds the first match of r in the process memory. This function works as FindFindBytesSequence // but instead of searching for a literal bytes sequence it uses a regexp. It tries to match the regexp in the memory // as is, not interpreting it as any charset in particular. func FindRegexpMatch(p process.Process, address uintptr, r *regexp.Regexp) (found bool, foundAddress uintptr, harderror error, softerrors []error) { const buffer_size = uint(4096) foundAddress = uintptr(0) found = false harderror, softerrors = memaccess.SlidingWalkMemory(p, address, buffer_size, func(address uintptr, buf []byte) (keepSearching bool) { loc := r.FindIndex(buf) if loc == nil { return true } foundAddress = address + uintptr(loc[0]) found = true return false }) return }
// getMatches gets all matches in the provided data, it is used for normal and condition matches. // // data contains the original data. // testBuffer contains the data to test the regex against (potentially modified, e.g. to support the ignore case option). // length contains the length of the provided data. // matches are only valid if they start within the validMatchRange. func getMatches(regex *regexp.Regexp, data []byte, testBuffer []byte, offset int64, length int, validMatchRange int, conditionID int, target string) Matches { var matches Matches if allIndex := regex.FindAllIndex(testBuffer, -1); allIndex != nil { // for _, index := range allindex { for mi := 0; mi < len(allIndex); mi++ { index := allIndex[mi] start := index[0] end := index[1] // \s always matches newline, leading to incorrect matches in non-multiline mode // analyze match and reject false matches if !options.Multiline { // remove newlines at the beginning of the match for ; start < length && end > start && data[start] == 0x0a; start++ { } // remove newlines at the end of the match for ; end > 0 && end > start && data[end-1] == 0x0a; end-- { } // check if the corrected match is still valid if !regex.Match(testBuffer[start:end]) { continue } // check if the match contains newlines if bytes.Contains(data[start:end], []byte{0x0a}) { // Rebuild the complete lines to check whether these contain valid matches. // In very rare cases, multiple lines may contain a valid match. As multiple // matches cannot be processed correctly here, requeue them to be processed again. lineStart := start lineEnd := end for lineStart > 0 && data[lineStart-1] != 0x0a { lineStart-- } for lineEnd < length && data[lineEnd] != 0x0a { lineEnd++ } lastStart := lineStart for pos := lastStart + 1; pos < lineEnd; pos++ { if data[pos] == 0x0a || pos == lineEnd-1 { if pos == lineEnd-1 && data[pos] != 0x0a { pos++ } if idx := regex.FindIndex(testBuffer[lastStart:pos]); idx != nil { start = lastStart end = pos start = lastStart + idx[0] end = lastStart + idx[1] allIndex = append(allIndex, []int{start, end}) } lastStart = pos + 1 } } continue } } lineStart := start lineEnd := end if options.Multiline && start >= validMatchRange { continue } for lineStart > 0 && data[lineStart-1] != 0x0a { lineStart-- } for lineEnd < length && data[lineEnd] != 0x0a { lineEnd++ } var contextBefore *string var contextAfter *string if options.ContextBefore > 0 { var contextBeforeStart int if lineStart > 0 { contextBeforeStart = lineStart - 1 precedingLinesFound := 0 for contextBeforeStart > 0 { if data[contextBeforeStart-1] == 0x0a { precedingLinesFound++ if precedingLinesFound == options.ContextBefore { break } } contextBeforeStart-- } if precedingLinesFound < options.ContextBefore && contextBeforeStart == 0 && offset > 0 { contextBefore = getBeforeContextFromFile(target, offset, start) } else { tmp := string(data[contextBeforeStart : lineStart-1]) contextBefore = &tmp } } else { if offset > 0 { contextBefore = getBeforeContextFromFile(target, offset, start) } else { contextBefore = nil } } } if options.ContextAfter > 0 { var contextAfterEnd int if lineEnd < length-1 { contextAfterEnd = lineEnd followingLinesFound := 0 for contextAfterEnd < length-1 { if data[contextAfterEnd+1] == 0x0a { followingLinesFound++ if followingLinesFound == options.ContextAfter { contextAfterEnd++ break } } contextAfterEnd++ } if followingLinesFound < options.ContextAfter && contextAfterEnd == length-1 { contextAfter = getAfterContextFromFile(target, offset, end) } else { tmp := string(data[lineEnd+1 : contextAfterEnd]) contextAfter = &tmp } } else { contextAfter = getAfterContextFromFile(target, offset, end) } } m := Match{ conditionID: conditionID, start: offset + int64(start), end: offset + int64(end), lineStart: offset + int64(lineStart), lineEnd: offset + int64(lineEnd), match: string(data[start:end]), line: string(data[lineStart:lineEnd]), contextBefore: contextBefore, contextAfter: contextAfter, } // handle special case where '^' matches after the last newline if int(lineStart) != validMatchRange { matches = append(matches, m) } } } return matches }
// FindAllIndex returns a sorted list of non-overlapping matches of the // regular expression r, where a match is a pair of indices specifying // the matched slice of x.Bytes(). If n < 0, all matches are returned // in successive order. Otherwise, at most n matches are returned and // they may not be successive. The result is nil if there are no matches, // or if n == 0. // func (x *Index) FindAllIndex(r *regexp.Regexp, n int) (result [][]int) { // a non-empty literal prefix is used to determine possible // match start indices with Lookup prefix, complete := r.LiteralPrefix() lit := []byte(prefix) // worst-case scenario: no literal prefix if prefix == "" { return r.FindAllIndex(x.data, n) } // if regexp is a literal just use Lookup and convert its // result into match pairs if complete { // Lookup returns indices that may belong to overlapping matches. // After eliminating them, we may end up with fewer than n matches. // If we don't have enough at the end, redo the search with an // increased value n1, but only if Lookup returned all the requested // indices in the first place (if it returned fewer than that then // there cannot be more). for n1 := n; ; n1 += 2 * (n - len(result)) /* overflow ok */ { indices := x.Lookup(lit, n1) if len(indices) == 0 { return } sort.Ints(indices) pairs := make([]int, 2*len(indices)) result = make([][]int, len(indices)) count := 0 prev := 0 for _, i := range indices { if count == n { break } // ignore indices leading to overlapping matches if prev <= i { j := 2 * count pairs[j+0] = i pairs[j+1] = i + len(lit) result[count] = pairs[j : j+2] count++ prev = i + len(lit) } } result = result[0:count] if len(result) >= n || len(indices) != n1 { // found all matches or there's no chance to find more // (n and n1 can be negative) break } } if len(result) == 0 { result = nil } return } // regexp has a non-empty literal prefix; Lookup(lit) computes // the indices of possible complete matches; use these as starting // points for anchored searches // (regexp "^" matches beginning of input, not beginning of line) r = regexp.MustCompile("^" + r.String()) // compiles because r compiled // same comment about Lookup applies here as in the loop above for n1 := n; ; n1 += 2 * (n - len(result)) /* overflow ok */ { indices := x.Lookup(lit, n1) if len(indices) == 0 { return } sort.Ints(indices) result = result[0:0] prev := 0 for _, i := range indices { if len(result) == n { break } m := r.FindIndex(x.data[i:]) // anchored search - will not run off // ignore indices leading to overlapping matches if m != nil && prev <= i { m[0] = i // correct m m[1] += i result = append(result, m) prev = m[1] } } if len(result) >= n || len(indices) != n1 { // found all matches or there's no chance to find more // (n and n1 can be negative) break } } if len(result) == 0 { result = nil } return }
func (c *CssCompressor) performGeneralCleanup() { // This function does a lot, ok? var sb bytes.Buffer var previousIndex int var re *regexp.Regexp // Remove the spaces before the things that should not have spaces before them. // But, be careful not to turn "p :link {...}" into "p:link{...}" // Swap out any pseudo-class colons with the token, and then swap back. c.Css = RegexFindReplace(c.Css, "(^|\\})(([^\\{:])+:)+([^\\{]*\\{)", func(groups []string) string { s := groups[0] s = strings.Replace(s, ":", "___YUICSSMIN_PSEUDOCLASSCOLON___", -1) s = strings.Replace(s, "\\\\", "\\\\\\\\", -1) s = strings.Replace(s, "\\$", "\\\\\\$", -1) return s }) // Remove spaces before the things that should not have spaces before them. re, _ = regexp.Compile("\\s+([!{};:>+\\(\\)\\],])") c.Css = re.ReplaceAll(c.Css, []byte("$1")) // Restore spaces for !important c.Css = bytes.Replace(c.Css, []byte("!important"), []byte(" !important"), -1) // bring back the colon c.Css = bytes.Replace(c.Css, []byte("___YUICSSMIN_PSEUDOCLASSCOLON___"), []byte(":"), -1) // retain space for special IE6 cases c.Css = RegexFindReplace(c.Css, "(?i):first\\-(line|letter)(\\{|,)", func(groups []string) string { return strings.ToLower(":first-"+groups[1]) + " " + groups[2] }) // no space after the end of a preserved comment c.Css = bytes.Replace(c.Css, []byte("*/ "), []byte("*/"), -1) // If there are multiple @charset directives, push them to the top of the file. c.Css = RegexFindReplace(c.Css, "(?i)^(.*)(@charset)( \"[^\"]*\";)", func(groups []string) string { return strings.ToLower(groups[2]) + groups[3] + groups[1] }) // When all @charset are at the top, remove the second and after (as they are completely ignored). c.Css = RegexFindReplace(c.Css, "(?i)^((\\s*)(@charset)( [^;]+;\\s*))+", func(groups []string) string { return groups[2] + strings.ToLower(groups[3]) + groups[4] }) // lowercase some popular @directives c.Css = RegexFindReplace(c.Css, "(?i)@(charset|font-face|import|(?:-(?:atsc|khtml|moz|ms|o|wap|webkit)-)?keyframe|media|page|namespace)", func(groups []string) string { return "@" + strings.ToLower(groups[1]) }) // lowercase some more common pseudo-elements c.Css = RegexFindReplace(c.Css, "(?i):(active|after|before|checked|disabled|empty|enabled|first-(?:child|of-type)|focus|hover|last-(?:child|of-type)|link|only-(?:child|of-type)|root|:selection|target|visited)", func(groups []string) string { return ":" + strings.ToLower(groups[1]) }) // lowercase some more common functions c.Css = RegexFindReplace(c.Css, "(?i):(lang|not|nth-child|nth-last-child|nth-last-of-type|nth-of-type|(?:-(?:moz|webkit)-)?any)\\(", func(groups []string) string { return ":" + strings.ToLower(groups[1]) + "(" }) // lower case some common function that can be values // NOTE: rgb() isn't useful as we replace with #hex later, as well as and() is already done for us right after this c.Css = RegexFindReplace(c.Css, "(?i)([:,\\( ]\\s*)(attr|color-stop|from|rgba|to|url|(?:-(?:atsc|khtml|moz|ms|o|wap|webkit)-)?(?:calc|max|min|(?:repeating-)?(?:linear|radial)-gradient)|-webkit-gradient)", func(groups []string) string { return groups[1] + strings.ToLower(groups[2]) }) // Put the space back in some cases, to support stuff like // @media screen and (-webkit-min-device-pixel-ratio:0){ re, _ = regexp.Compile("(?i)\\band\\(") c.Css = re.ReplaceAll(c.Css, []byte("and (")) // Remove the spaces after the things that should not have spaces after them. re, _ = regexp.Compile("([!{}:;>+\\(\\[,])\\s+") c.Css = re.ReplaceAll(c.Css, []byte("$1")) // remove unnecessary semicolons re, _ = regexp.Compile(";+}") c.Css = re.ReplaceAll(c.Css, []byte("}")) // Replace 0(px,em,%) with 0. re, _ = regexp.Compile("(?i)(^|[^0-9])(?:0?\\.)?0(?:px|em|%|in|cm|mm|pc|pt|ex|deg|g?rad|m?s|k?hz)") c.Css = re.ReplaceAll(c.Css, []byte("${1}0")) // Replace 0 0 0 0; with 0. re, _ = regexp.Compile(":0 0 0 0(;|})") re2, _ := regexp.Compile(":0 0 0(;|})") re3, _ := regexp.Compile(":0 0(;|})") c.Css = re.ReplaceAll(c.Css, []byte(":0$1")) c.Css = re2.ReplaceAll(c.Css, []byte(":0$1")) c.Css = re3.ReplaceAll(c.Css, []byte(":0$1")) // Replace background-position:0; with background-position:0 0; // same for transform-origin c.Css = RegexFindReplace(c.Css, "(?i)(background-position|webkit-mask-position|transform-origin|webkit-transform-origin|moz-transform-origin|o-transform-origin|ms-transform-origin):0(;|})", func(groups []string) string { return strings.ToLower(groups[1]) + ":0 0" + groups[2] }) // Replace 0.6 to .6, but only when preceded by : or a white-space re, _ = regexp.Compile("(:|\\s)0+\\.(\\d+)") c.Css = re.ReplaceAll(c.Css, []byte("$1.$2")) // Shorten colors from rgb(51,102,153) to #336699 // This makes it more likely that it'll get further compressed in the next step. c.Css = RegexFindReplace(c.Css, "rgb\\s*\\(\\s*([0-9,\\s]+)\\s*\\)", func(groups []string) string { rgbcolors := strings.Split(groups[1], ",") var hexcolor bytes.Buffer hexcolor.WriteString("#") for _, colour := range rgbcolors { val, _ := strconv.Atoi(colour) if val < 16 { hexcolor.WriteString("0") } // If someone passes an RGB value that's too big to express in two characters, round down. // Probably should throw out a warning here, but generating valid CSS is a bigger concern. if val > 255 { val = 255 } hexcolor.WriteString(fmt.Sprintf("%x", val)) } return hexcolor.String() }) // Shorten colors from #AABBCC to #ABC. Note that we want to make sure // the color is not preceded by either ", " or =. Indeed, the property // filter: chroma(color="#FFFFFF"); // would become // filter: chroma(color="#FFF"); // which makes the filter break in IE. // We also want to make sure we're only compressing #AABBCC patterns inside { }, not id selectors ( #FAABAC {} ) // We also want to avoid compressing invalid values (e.g. #AABBCCD to #ABCD) sb.Reset() re, _ = regexp.Compile("(\\=\\s*?[\"']?)?" + "#([0-9a-fA-F])([0-9a-fA-F])([0-9a-fA-F])([0-9a-fA-F])([0-9a-fA-F])([0-9a-fA-F])" + "(:?\\}|[^0-9a-fA-F{][^{]*?\\})") previousIndex = 0 for match := re.Find(c.Css[previousIndex:]); match != nil; match = re.Find(c.Css[previousIndex:]) { index := re.FindIndex(c.Css[previousIndex:]) submatches := re.FindStringSubmatch(string(c.Css[previousIndex:])) submatchIndexes := re.FindSubmatchIndex(c.Css[previousIndex:]) sb.WriteString(string(c.Css[previousIndex : index[0]+len(c.Css[:previousIndex])])) //boolean isFilter = (m.group(1) != null && !"".equals(m.group(1))); // I hope the below is the equivalent of the above :P isFilter := submatches[1] != "" && submatchIndexes[1] != -1 if isFilter { // Restore, as is. Compression will break filters sb.WriteString(submatches[1] + "#" + submatches[2] + submatches[3] + submatches[4] + submatches[5] + submatches[6] + submatches[7]) } else { if strings.ToLower(submatches[2]) == strings.ToLower(submatches[3]) && strings.ToLower(submatches[4]) == strings.ToLower(submatches[5]) && strings.ToLower(submatches[6]) == strings.ToLower(submatches[7]) { // #AABBCC pattern sb.WriteString("#" + strings.ToLower(submatches[3]+submatches[5]+submatches[7])) } else { // Non-compressible color, restore, but lower case. sb.WriteString("#" + strings.ToLower(submatches[2]+submatches[3]+submatches[4]+submatches[5]+submatches[6]+submatches[7])) } } // The "+ 4" below is a crazy hack which will come back to haunt me later. // For now, it makes everything work 100%. previousIndex = submatchIndexes[7] + len(c.Css[:previousIndex]) + 4 } if previousIndex > 0 { sb.WriteString(string(c.Css[previousIndex:])) } if sb.Len() > 0 { c.Css = sb.Bytes() } // Save a few chars by utilizing short colour keywords. // https://github.com/yui/yuicompressor/commit/fe8cf35d3693910103d65bf465d33b0d602dcfea colours := map[string]string{ "#f00": "red", "#000080": "navy", "#808080": "gray", "#808000": "olive", "#800080": "purple", "#c0c0c0": "silver", "#008080": "teal", "#ffa500": "orange", "#800000": "maroon", } for k, v := range colours { re, _ = regexp.Compile("(:|\\s)" + k + "(;|})") c.Css = re.ReplaceAll(c.Css, []byte("${1}"+v+"${2}")) } // border: none -> border:0 c.Css = RegexFindReplace(c.Css, "(?i)(border|border-top|border-right|border-bottom|border-left|outline|background):none(;|})", func(groups []string) string { return strings.ToLower(groups[1]) + ":0" + groups[2] }) // shorter opacity IE filter re, _ = regexp.Compile("(?i)progid:DXImageTransform.Microsoft.Alpha\\(Opacity=") c.Css = re.ReplaceAll(c.Css, []byte("alpha(opacity=")) // Find a fraction that is used for Opera's -o-device-pixel-ratio query // Add token to add the "\" back in later re, _ = regexp.Compile("\\(([\\-A-Za-z]+):([0-9]+)\\/([0-9]+)\\)") c.Css = re.ReplaceAll(c.Css, []byte("(${1}:${2}___YUI_QUERY_FRACTION___${3})")) // Remove empty rules. re, _ = regexp.Compile("[^\\}\\{/;]+\\{\\}") c.Css = re.ReplaceAll(c.Css, []byte("")) // Add "\" back to fix Opera -o-device-pixel-ratio query c.Css = bytes.Replace(c.Css, []byte("___YUI_QUERY_FRACTION___"), []byte("/"), -1) }
// makeRecordSplitter returns a splitter that returns the next record. // Although all the AWK documentation I've read define RS as a record // separator, as far as I can tell, AWK in fact treats it as a record // *terminator* so we do, too. func (s *Script) makeRecordSplitter() func([]byte, bool) (int, []byte, error) { // If the terminator is a single character, scan based on that. This // code is derived from the bufio.ScanWords source. if utf8.RuneCountInString(s.rs) == 1 { // Ensure the terminator character is valid. firstRune, _ := utf8.DecodeRuneInString(s.rs) if firstRune == utf8.RuneError { return func(data []byte, atEOF bool) (int, []byte, error) { return 0, nil, errors.New("Invalid rune in terminator") } } // The terminator is valid. Return a splitter customized to // that terminator. return func(data []byte, atEOF bool) (advance int, token []byte, err error) { // Scan until we see a terminator or run out of data. s.RT = string(firstRune) for width, i := 0, 0; i < len(data); i += width { var r rune r, width = utf8.DecodeRune(data[i:]) if r == utf8.RuneError { return 0, nil, errors.New("Invalid rune in input data") } if r == firstRune { return i + width, data[:i], nil } } // We didn't see a terminator. If we're at EOF, we // have a final, non-terminated token. Return it if // it's nonempty. if atEOF && len(data) > 0 { return len(data), data, nil } // Request more data. return 0, nil, nil } } // If the terminator is multiple characters, treat it as a regular // expression, and scan based on that. Or, as a special case, if the // terminator is empty, we treat it as a regular expression // representing one or more blank lines. return func(data []byte, atEOF bool) (advance int, token []byte, err error) { // Generate a regular expression based on the current RS and // IgnoreCase. var termRegexp *regexp.Regexp if s.rs == "" { termRegexp, err = s.compileRegexp(`\r?\n(\r?\n)+`) } else { termRegexp, err = s.compileRegexp(s.rs) } if err != nil { return 0, nil, err } // If we match the regular expression, return everything up to // the match. loc := termRegexp.FindIndex(data) if loc != nil { s.RT = string(data[loc[0]:loc[1]]) return loc[1], data[:loc[0]], nil } // We didn't see a terminator. If we're at EOF, we have a // final, non-terminated token. Return it if it's nonempty. if atEOF && len(data) > 0 { s.RT = "" return len(data), data, nil } // Request more data. return 0, nil, nil } }
func findInBuffer(re *regexp.Regexp, buffer []byte, r sparser.Range) bool { if r.IsEmpty() { return false } return re.FindIndex(buffer[r.MinOffs:r.MaxOffs+1]) != nil }