// Append returns f(append(out, b...)). // The buffer out must be nil, empty, or equal to f(out). func (f Form) Append(out []byte, src ...byte) []byte { if len(src) == 0 { return out } fd := formTable[f] rb := &reorderBuffer{f: *fd} doMerge := len(out) > 0 p := 0 if !utf8.RuneStart(src[0]) { // Move leading non-starters to destination. for p++; p < len(src) && !utf8.RuneStart(src[p]); p++ { } out = append(out, src[:p]...) buf, ndropped := patchTail(rb, out) if ndropped > 0 { out = append(buf, src[p-ndropped:p]...) doMerge = false // no need to merge, ends with illegal UTF-8 } else { out = decomposeToLastBoundary(rb, buf) // force decomposition } } if doMerge { var info runeInfo if p < len(src[p:]) { info = fd.info(src[p:]) if p == 0 && !fd.boundaryBefore(fd, info) { out = decomposeToLastBoundary(rb, out) } } if info.size == 0 { if fd.composing { rb.compose() } // Append incomplete UTF-8 encoding. return append(rb.flush(out), src[p:]...) } } if rb.nrune == 0 { src = src[p:] out, p = appendQuick(fd, out, src) } for n := 0; p < len(src); p += n { p += decomposeSegment(rb, src[p:]) if fd.composing { rb.compose() } out = rb.flush(out) out, n = appendQuick(fd, out, src[p:]) } return out }
// Trim returns a slice of the string s, with all leading and trailing white space // removed, as defined by Unicode. func TrimSpace(s string) string { start, end := 0, len(s) for start < end { wid := 1 rune := int(s[start]) if rune >= utf8.RuneSelf { rune, wid = utf8.DecodeRuneInString(s[start:end]) } if !unicode.IsSpace(rune) { break } start += wid } for start < end { wid := 1 rune := int(s[end-1]) if rune >= utf8.RuneSelf { // Back up carefully looking for beginning of rune. Mustn't pass start. for wid = 2; start <= end-wid && !utf8.RuneStart(s[end-wid]); wid++ { } if start > end-wid { // invalid UTF-8 sequence; stop processing return s[start:end] } rune, wid = utf8.DecodeRuneInString(s[end-wid : end]) } if !unicode.IsSpace(rune) { break } end -= wid } return s[start:end] }
func charToByteOffset(s []byte, offsetC int) (offsetB int) { for offsetB = 0; offsetC > 0 && offsetB < len(s); offsetB++ { if utf8.RuneStart(s[offsetB]) { offsetC-- } } return offsetB }
// lastRuneStart returns the runeInfo and position of the last // rune in buf or the zero runeInfo and -1 if no rune was found. func lastRuneStart(fd *formInfo, buf []byte) (runeInfo, int) { p := len(buf) - 1 for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- { } if p < 0 { return runeInfo{0, 0, 0, 0}, -1 } return fd.info(inputBytes(buf), p), p }
func utf8MoveBackwards(file []byte, cursor int) int { for { cursor-- if cursor <= 0 { return 0 } if utf8.RuneStart(file[cursor]) { return cursor } } return 0 }
// TrimRightFunc returns a slice of the string s with all trailing // Unicode code points c satisfying f(c) removed. func TrimRightFunc(s string, f func(r int) bool) string { start, end := 0, len(s) for start < end { wid := 1 rune := int(s[end-wid]) if rune >= utf8.RuneSelf { // Back up & look for beginning of rune. Mustn't pass start. for wid = 2; start <= end-wid && !utf8.RuneStart(s[end-wid]); wid++ { } if start > end-wid { // invalid UTF-8 sequence; stop processing return s[start:end] } rune, wid = utf8.DecodeRuneInString(s[end-wid : end]) } if !f(rune) { return s[0:end] } end -= wid } return s[0:end] }
// FirstBoundary returns the position i of the first boundary in b // or -1 if b contains no boundary. func (f Form) FirstBoundary(b []byte) int { i := 0 for ; i < len(b) && !utf8.RuneStart(b[i]); i++ { } if i >= len(b) { return -1 } fd := formTable[f] info := fd.info(b[i:]) for info.size != 0 && !fd.boundaryBefore(fd, info) { i += int(info.size) if i >= len(b) { if !fd.boundaryAfter(fd, info) { return -1 } return len(b) } info = fd.info(b[i:]) } if info.size == 0 { return -1 } return i }
func (s inputBytes) skipNonStarter() int { p := 0 for ; p < len(s) && !utf8.RuneStart(s[p]); p++ { } return p }