// Is the item exported from the package? func (n *Named) Exported() bool { if n.Imported() { return false } r, _ := utf8.DecodeRuneInString(n.Name) return unicode.IsUpper(r) }
// Trim returns a slice of the string s, with all leading and trailing white space // removed, as defined by Unicode. func TrimSpace(s string) string { start, end := 0, len(s) for start < end { wid := 1 rune := int(s[start]) if rune >= utf8.RuneSelf { rune, wid = utf8.DecodeRuneInString(s[start:end]) } if !unicode.IsSpace(rune) { break } start += wid } for start < end { wid := 1 rune := int(s[end-1]) if rune >= utf8.RuneSelf { // Back up carefully looking for beginning of rune. Mustn't pass start. for wid = 2; start <= end-wid && !utf8.RuneStart(s[end-wid]); wid++ { } if start > end-wid { // invalid UTF-8 sequence; stop processing return s[start:end] } rune, wid = utf8.DecodeRuneInString(s[end-wid : end]) } if !unicode.IsSpace(rune) { break } end -= wid } return s[start:end] }
// EqualFold reports whether s and t, interpreted as UTF-8 strings, // are equal under Unicode case-folding. func EqualFold(s, t string) bool { for s != "" && t != "" { // Extract first rune from each string. var sr, tr rune if s[0] < utf8.RuneSelf { sr, s = rune(s[0]), s[1:] } else { r, size := utf8.DecodeRuneInString(s) sr, s = r, s[size:] } if t[0] < utf8.RuneSelf { tr, t = rune(t[0]), t[1:] } else { r, size := utf8.DecodeRuneInString(t) tr, t = r, t[size:] } // If they match, keep going; if not, return false. // Easy case. if tr == sr { continue } // Make sr < tr to simplify what follows. if tr < sr { tr, sr = sr, tr } // Fast check for ASCII. if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' { // ASCII, and sr is upper case. tr must be lower case. if tr == sr+'a'-'A' { continue } return false } // General case. SimpleFold(x) returns the next equivalent rune > x // or wraps around to smaller values. r := unicode.SimpleFold(sr) for r != sr && r < tr { r = unicode.SimpleFold(r) } if r == tr { continue } return false } // One string is empty. Are both? return s == t }
// Peek at the next focus rune in SafeReader. func (r *SafeReader) peek() int { if r.pos < len(r.str) { rune, _ := utf8.DecodeRuneInString(r.str[r.pos:]) return rune } return -1 }
// insertString inserts the given rune in the buffer ordered by CCC. // It returns true if the buffer was large enough to hold the decomposed rune. func (rb *reorderBuffer) insertString(src string, info runeInfo) bool { if info.size == 3 && isHangulString(src) { rune, _ := utf8.DecodeRuneInString(src) return rb.decomposeHangul(uint32(rune)) } if info.flags.hasDecomposition() { dcomp := rb.f.decomposeString(src) for i := 0; i < len(dcomp); { info = rb.f.info(dcomp[i:]) pos := rb.nbyte if !rb.insertOrdered(info) { return false } end := i + int(info.size) copy(rb.byte[pos:], dcomp[i:end]) i = end } } else { copy(rb.byte[rb.nbyte:], src[:info.size]) if !rb.insertOrdered(info) { return false } } return true }
func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { re, err := syntax.Parse(expr, mode) if err != nil { return nil, err } maxCap := re.MaxCap() re = re.Simplify() prog, err := syntax.Compile(re) if err != nil { return nil, err } regexp := &Regexp{ expr: expr, prog: prog, numSubexp: maxCap, cond: prog.StartCond(), longest: longest, } regexp.prefix, regexp.prefixComplete = prog.Prefix() if regexp.prefix != "" { // TODO(rsc): Remove this allocation by adding // IndexString to package bytes. regexp.prefixBytes = []byte(regexp.prefix) regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix) } return regexp, nil }
// matchChunk checks whether chunk matches the beginning of s. // If so, it returns the remainder of s (after the match). // Chunk is all single-character operators: literals, char classes, and ?. func matchChunk(chunk, s string) (rest string, ok bool, err os.Error) { for len(chunk) > 0 { if len(s) == 0 { return } switch chunk[0] { case '?': _, n := utf8.DecodeRuneInString(s) s = s[n:] chunk = chunk[1:] case '\\': chunk = chunk[1:] if len(chunk) == 0 { err = ErrBadPattern return } fallthrough default: if chunk[0] != s[0] { return } s = s[1:] chunk = chunk[1:] } } return s, true, nil }
func (e *encodeState) string(s string) { e.WriteByte('"') start := 0 for i := 0; i < len(s); { if b := s[i]; b < utf8.RuneSelf { if 0x20 <= b && b != '\\' && b != '"' { i++ continue } if start < i { e.WriteString(s[start:i]) } if b == '\\' || b == '"' { e.WriteByte('\\') e.WriteByte(b) } else { e.WriteString(`\u00`) e.WriteByte(hex[b>>4]) e.WriteByte(hex[b&0xF]) } i++ start = i continue } c, size := utf8.DecodeRuneInString(s[i:]) if c == utf8.RuneError && size == 1 { e.error(&InvalidUTF8Error{s}) } i += size } if start < len(s) { e.WriteString(s[start:]) } e.WriteByte('"') }
// Converts a single Go utf-token to it's an Html entity. // ex: "♣" -> "♣" func UTF8ToHtml(token string) string { rune, size := utf8.DecodeRuneInString(token) if size == 0 { return "" } return fmt.Sprintf("&#%d;", rune) }
func (s inputString) hangul(p int) uint32 { if !isHangulString(string(s[p:])) { return 0 } rune, _ := utf8.DecodeRuneInString(string(s[p:])) return uint32(rune) }
func nextRune(s string) (c rune, t string, err error) { c, size := utf8.DecodeRuneInString(s) if c == utf8.RuneError && size == 1 { return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s} } return c, s[size:], nil }
// Replace returns a copy of the string s with the first n // non-overlapping instances of old replaced by new. // If n < 0, there is no limit on the number of replacements. func Replace(s, old, new string, n int) string { if old == new || n == 0 { return s // avoid allocation } // Compute number of replacements. if m := Count(s, old); m == 0 { return s // avoid allocation } else if n < 0 || m < n { n = m } // Apply replacements to buffer. t := make([]byte, len(s)+n*(len(new)-len(old))) w := 0 start := 0 for i := 0; i < n; i++ { j := start if len(old) == 0 { if i > 0 { _, wid := utf8.DecodeRuneInString(s[start:]) j += wid } } else { j += Index(s[start:], old) } w += copy(t[w:], s[start:j]) w += copy(t[w:], new) start = j + len(old) } w += copy(t[w:], s[start:]) return string(t[0:w]) }
// fieldNeedsQuotes returns true if our field must be enclosed in quotes. // Empty fields, files with a Comma, fields with a quote or newline, and // fields which start with a space must be enclosed in quotes. func (w *Writer) fieldNeedsQuotes(field string) bool { if len(field) == 0 || strings.IndexRune(field, w.Comma) >= 0 || strings.IndexAny(field, "\"\r\n") >= 0 { return true } r1, _ := utf8.DecodeRuneInString(field) return unicode.IsSpace(r1) }
func (s State) Next() (rune int, state State, ok bool) { if len(s.text) > 0 { rune, size := utf8.DecodeRuneInString(s.text) return rune, State{s.text[size:], s.increment(rune, size)}, true } return 0, s, false }
func equal(m string, s1, s2 string, t *testing.T) bool { if s1 == s2 { return true } e1 := Split(s1, "", -1) e2 := Split(s2, "", -1) for i, c1 := range e1 { if i > len(e2) { break } r1, _ := utf8.DecodeRuneInString(c1) r2, _ := utf8.DecodeRuneInString(e2[i]) if r1 != r2 { t.Errorf("%s diff at %d: U+%04X U+%04X", m, i, r1, r2) } } return false }
func (v *verifier) verifyChar(x *Token) int { s := x.String if utf8.RuneCountInString(s) != 1 { v.error(x.Pos(), "single char expected, found "+s) return 0 } ch, _ := utf8.DecodeRuneInString(s) return ch }
func (l *lexer) next() (rune int) { if l.pos >= len(l.input) { l.width = 0 return eof } rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) l.pos += l.width return rune }
func quoteWith(s string, quote byte) string { var buf bytes.Buffer buf.WriteByte(quote) for ; len(s) > 0; s = s[1:] { switch c := s[0]; { case c == quote: buf.WriteByte('\\') buf.WriteByte(quote) case c == '\\': buf.WriteString(`\\`) case ' ' <= c && c <= '~': buf.WriteString(string(c)) case c == '\a': buf.WriteString(`\a`) case c == '\b': buf.WriteString(`\b`) case c == '\f': buf.WriteString(`\f`) case c == '\n': buf.WriteString(`\n`) case c == '\r': buf.WriteString(`\r`) case c == '\t': buf.WriteString(`\t`) case c == '\v': buf.WriteString(`\v`) case c >= utf8.RuneSelf && utf8.FullRuneInString(s): r, size := utf8.DecodeRuneInString(s) if r == utf8.RuneError && size == 1 { goto EscX } s = s[size-1:] // next iteration will slice off 1 more if r < 0x10000 { buf.WriteString(`\u`) for j := uint(0); j < 4; j++ { buf.WriteByte(lowerhex[(r>>(12-4*j))&0xF]) } } else { buf.WriteString(`\U`) for j := uint(0); j < 8; j++ { buf.WriteByte(lowerhex[(r>>(28-4*j))&0xF]) } } default: EscX: buf.WriteString(`\x`) buf.WriteByte(lowerhex[c>>4]) buf.WriteByte(lowerhex[c&0xF]) } } buf.WriteByte(quote) return buf.String() }
// Unquote interprets s as a single-quoted, double-quoted, // or backquoted Go string literal, returning the string value // that s quotes. (If s is single-quoted, it would be a Go // character literal; Unquote returns the corresponding // one-character string.) func Unquote(s string) (t string, err os.Error) { n := len(s) if n < 2 { return "", os.EINVAL } quote := s[0] if quote != s[n-1] { return "", os.EINVAL } s = s[1 : n-1] if quote == '`' { if strings.Contains(s, "`") { return "", os.EINVAL } return s, nil } if quote != '"' && quote != '\'' { return "", os.EINVAL } if strings.Index(s, "\n") >= 0 { return "", os.EINVAL } // Is it trivial? Avoid allocation. if strings.Index(s, `\`) < 0 && strings.IndexRune(s, int(quote)) < 0 { switch quote { case '"': return s, nil case '\'': r, size := utf8.DecodeRuneInString(s) if size == len(s) && (r != utf8.RuneError || size != 1) { return s, nil } } } var buf bytes.Buffer for len(s) > 0 { c, multibyte, ss, err := UnquoteChar(s, quote) if err != nil { return "", err } s = ss if c < utf8.RuneSelf || !multibyte { buf.WriteByte(byte(c)) } else { buf.WriteString(string(c)) } if quote == '\'' && len(s) != 0 { // single-quoted must be single character return "", os.EINVAL } } return buf.String(), nil }
func checkUTF8(s string) error { for s != "" { rune, size := utf8.DecodeRuneInString(s) if rune == utf8.RuneError && size == 1 { return &Error{Code: ErrInvalidUTF8, Expr: s} } s = s[size:] } return nil }
// TrimRightFunc returns a slice of the string s with all trailing // Unicode code points c satisfying f(c) removed. func TrimRightFunc(s string, f func(rune) bool) string { i := lastIndexFunc(s, f, false) if i >= 0 && s[i] >= utf8.RuneSelf { _, wid := utf8.DecodeRuneInString(s[i:]) i += wid } else { i++ } return s[0:i] }
func (i *inputString) context(pos int) syntax.EmptyOp { r1, r2 := endOfText, endOfText if pos > 0 && pos <= len(i.str) { r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) } if pos < len(i.str) { r2, _ = utf8.DecodeRuneInString(i.str[pos:]) } return syntax.EmptyOpContext(r1, r2) }
func (i *inputString) step(pos int) (rune, int) { if pos < len(i.str) { c := i.str[pos] if c < utf8.RuneSelf { return rune(c), 1 } return utf8.DecodeRuneInString(i.str[pos:]) } return endOfText, 0 }
func (p *parser) nextc() int { if p.pos >= len(p.re.expr) { p.ch = endOfFile } else { c, w := utf8.DecodeRuneInString(p.re.expr[p.pos:]) p.ch = c p.pos += w } return p.ch }
func TestIndexRune(t *testing.T) { for _, tt := range indexRuneTests { a := []byte(tt.a) r, _ := utf8.DecodeRuneInString(tt.b) pos := IndexRune(a, r) if pos != tt.i { t.Errorf(`IndexRune(%q, '%c') = %v`, tt.a, r, pos) } } }
// isTest tells whether name looks like a test (or benchmark, according to prefix). // It is a Test (say) if there is a character after Test that is not a lower-case letter. // We don't want TesticularCancer. func isTest(name, prefix string) bool { if !strings.HasPrefix(name, prefix) { return false } if len(name) == len(prefix) { // "Test" is ok return true } rune, _ := utf8.DecodeRuneInString(name[len(prefix):]) return !unicode.IsLower(rune) }
func main() { maths, err := io.ReadFile("math.peg") if err != nil { log.Exit("Error: ", err) } fmt.Println(maths) rune, size := utf8.DecodeRuneInString(string(maths)) fmt.Println(rune, size) return }
func main() { var chars [6]int chars[0] = 'a' chars[1] = 'b' chars[2] = 'c' chars[3] = '\u65e5' chars[4] = '\u672c' chars[5] = '\u8a9e' s := "" for i := 0; i < 6; i++ { s += string(chars[i]) } var l = len(s) for w, i, j := 0, 0, 0; i < l; i += w { var r int r, w = utf8.DecodeRuneInString(s[i:len(s)]) if w == 0 { panic("zero width in string") } if r != chars[j] { panic("wrong value from string") } j++ } // encoded as bytes: 'a' 'b' 'c' e6 97 a5 e6 9c ac e8 aa 9e const L = 12 if L != l { panic("wrong length constructing array") } a := make([]byte, L) a[0] = 'a' a[1] = 'b' a[2] = 'c' a[3] = 0xe6 a[4] = 0x97 a[5] = 0xa5 a[6] = 0xe6 a[7] = 0x9c a[8] = 0xac a[9] = 0xe8 a[10] = 0xaa a[11] = 0x9e for w, i, j := 0, 0, 0; i < L; i += w { var r int r, w = utf8.DecodeRune(a[i:L]) if w == 0 { panic("zero width in bytes") } if r != chars[j] { panic("wrong value from bytes") } j++ } }
func getrune() (result int) { var n int if linep >= len(line) { return 0 } result, n = utf8.DecodeRuneInString(line[linep:len(line)]) linep += n if result == '\n' { result = 0 } return }
// advance determines whether the next characters in the input match // those of the format. It returns the number of bytes (sic) consumed // in the format. Newlines included, all runs of space characters in // either input or format behave as a single space. This routine also // handles the %% case. If the return value is zero, either format // starts with a % (with no following %) or the input is empty. // If it is negative, the input did not match the string. func (s *ss) advance(format string) (i int) { for i < len(format) { fmtc, w := utf8.DecodeRuneInString(format[i:]) if fmtc == '%' { // %% acts like a real percent nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty if nextc != '%' { return } i += w // skip the first % } sawSpace := false for unicode.IsSpace(fmtc) && i < len(format) { sawSpace = true i += w fmtc, w = utf8.DecodeRuneInString(format[i:]) } if sawSpace { // There was space in the format, so there should be space (EOF) // in the input. inputc := s.getRune() if inputc == eof { return } if !unicode.IsSpace(inputc) { // Space in format but not in input: error s.errorString("expected space in input to match format") } s.skipSpace(true) continue } inputc := s.mustReadRune() if fmtc != inputc { s.UnreadRune() return -1 } i += w } return }