func (l *Lexer) consumeIdentifierToken() bool { c := l.r.Peek(0) if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '$' || c == '_' { l.r.Move(1) } else if c >= 0xC0 { if r, n := l.r.PeekRune(0); unicode.IsOneOf(identifierStart, r) { l.r.Move(n) } else { return false } } else if !l.consumeUnicodeEscape() { return false } for { c := l.r.Peek(0) if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '$' || c == '_' { l.r.Move(1) } else if c >= 0xC0 { if r, n := l.r.PeekRune(0); r == '\u200C' || r == '\u200D' || unicode.IsOneOf(identifierContinue, r) { l.r.Move(n) } else { break } } else { break } } return true }
// SlugBytes replaces each run of characters which are not unicode letters or // numbers with a single hyphen, except for leading or trailing runs. Letters // will be stripped of diacritical marks and lowercased. Letter or number // codepoints that do not have combining marks or a lower-cased variant will // be passed through unaltered. func SlugBytes(s []byte) []byte { s = norm.NFKD.Bytes(s) buf := make([]byte, 0, len(s)) dash := false for len(s) > 0 { r, i := utf8.DecodeRune(s) switch { // unicode 'letters' like mandarin characters pass through case unicode.IsOneOf(lat, r): buf = append(buf, s[:i]...) dash = true case unicode.IsOneOf(nop, r): // skip case dash: buf = append(buf, '-') dash = false } s = s[i:] } i := len(buf) - 1 if i >= 0 && buf[i] == '-' { buf = buf[:i] } return buf }
func (Runes) Width(r rune) int { if unicode.IsOneOf(zeroWidth, r) { return 0 } if unicode.IsOneOf(doubleWidth, r) { return 2 } return 1 }
func getPrefixGlyphs(s []rune, num int) []rune { p := 0 for n := 0; n < num && p < len(s); p++ { if !unicode.IsOneOf(zeroWidth, s[p]) { n++ } } for p < len(s) && unicode.IsOneOf(zeroWidth, s[p]) { p++ } return s[:p] }
func Char(r rune) int { if r == 0 { return 0 // null character } else if unicode.Is(unicode.C, r) { // is r control or special character return -1 /* binary search in table of non-spacing characters */ } else if unicode.IsOneOf(nonSpacing, r) { return 0 } else if unicode.IsOneOf(fullAndWideSpacing, r) { return 2 } return 1 }
// countGlyphs considers zero-width characters to be zero glyphs wide, // and members of Chinese, Japanese, and Korean scripts to be 2 glyphs wide. func countGlyphs(s []rune) int { n := 0 for _, r := range s { switch { case unicode.IsOneOf(zeroWidth, r): case unicode.IsOneOf(doubleWidth, r): n += 2 default: n++ } } return n }
// starting from the end of the 'file', move backwards and return a slice of a // valid Go expression func (this *bytes_iterator) extract_go_expr() []byte { const ( last_none = iota last_dot last_paren last_ident ) last := last_none orig := this.cursor this.move_backwards() loop: for { r := this.rune() switch r { case '.': this.move_backwards() last = last_dot case ')', ']': if last == last_ident { break loop } this.skip_to_bracket_pair() this.move_backwards() last = last_paren default: if unicode.IsOneOf(g_unicode_ident_set, r) { this.skip_ident() last = last_ident } else { break loop } } } return this.data[this.cursor+1 : orig] }
func (scanner *Scanner) readToken() (Token, error) { r, _, err := scanner.stream.ReadRune() for err == nil && unicode.IsSpace(r) { r, _, err = scanner.stream.ReadRune() } if err == io.EOF { return EndToken{}, nil } if err != nil { return nil, err } switch { case r == rune('('): return OpenParenToken{}, nil case r == rune(')'): return CloseParenToken{}, nil case r == rune('!'), r == rune('='), r == rune('<'), r == rune('>'): return scanner.readComparisonOperatorToken(r) case unicode.IsOneOf(symbolChars, r), r == rune('\\'): scanner.stream.UnreadRune() return scanner.readTextToken() default: return nil, fmt.Errorf("Unepxected character '%v'.", r) } panic("unreachable") }
func (scanner *Scanner) readString(initialRune rune) (string, error) { text := string(initialRune) stop := false for !stop { r, _, err := scanner.stream.ReadRune() if err == io.EOF { return text, nil } if err != nil { return "", err } switch { case unicode.IsSpace(r), r == rune(')'), r == rune('('), r == rune('='), r == rune('<'), r == rune('>'): scanner.stream.UnreadRune() return text, nil case unicode.IsOneOf(symbolChars, r): text += string(r) default: return "", fmt.Errorf("Unexpected character '%v'.", r) } } panic("unreachable") }
func validateValueName(valueName string) error { switch valueName { case "": return errors.New("tag value cannot be empty.") case ".", "..": return errors.New("tag value cannot be '.' or '..'.") // cannot be used in the VFS case "and", "or", "not": return errors.New("tag value cannot be a logical operator: 'and', 'or' or 'not'.") // used in query language } for _, ch := range valueName { switch ch { case '(', ')': return errors.New("tag value cannot contain parentheses: '(' or ')'.") // used in query language case ',': return errors.New("tag value cannot contain comma: ','.") // reserved for tag delimiter case '=', '<', '>': return errors.New("tag value cannot contain a comparison operator: '=', '<' or '>'.") // reserved for tag values case ' ', '\t': return errors.New("tag value cannot contain space or tab.") // used as tag delimiter case '/': return errors.New("tag value cannot contain slash: '/'.") // cannot be used in the VFS } if !unicode.IsOneOf(validValueChars, ch) { return fmt.Errorf("tag value cannot contain '%c'.", ch) } } return nil }
// deduce cursor context, it includes the declaration under the cursor and partial identifier // (usually a part of the name of the child declaration) func (c *auto_complete_context) deduce_cursor_context(file []byte, cursor int) (cursor_context, bool) { if cursor <= 0 { return cursor_context{nil, ""}, true } orig := cursor iter := bytes_iterator{file, cursor} // figure out what is just before the cursor iter.move_backwards() if iter.char() == '.' { // we're '<whatever>.' // figure out decl, Parital is "" decl := c.deduce_cursor_decl(&iter) return cursor_context{decl, ""}, decl != nil } r := iter.rune() if unicode.IsOneOf(g_unicode_ident_set, r) { // we're '<whatever>.<ident>' // parse <ident> as Partial and figure out decl iter.skip_ident() partial := string(iter.data[iter.cursor+1 : orig]) if iter.char() == '.' { decl := c.deduce_cursor_decl(&iter) return cursor_context{decl, partial}, decl != nil } else { return cursor_context{nil, partial}, true } } return cursor_context{nil, ""}, true }
// SlugAsciiBytes is identical to SlugBytes, except that runs of one or more // unicode letters or numbers that still fall outside the ASCII range will have // their UTF-8 representation hex encoded and delimited by hyphens. As with // SlugBytes, in no case will hyphens appear at either end of the returned // string. func SlugAsciiBytes(s []byte) []byte { s = norm.NFKD.Bytes(s) const m = utf8.UTFMax var ( ib [m * 3]byte ob []byte buf = make([]byte, 0, len(s)) dash = false latin = true ) for len(s) > 0 { r, i := utf8.DecodeRune(s) switch { case unicode.IsOneOf(lat, r): r = unicode.ToLower(r) n := utf8.EncodeRune(ib[:m], r) if r >= 128 { if latin && dash { buf = append(buf, '-') } n = hex.Encode(ib[m:], ib[:n]) ob = ib[m : m+n] latin = false } else { if !latin { buf = append(buf, '-') } ob = ib[:n] latin = true } dash = true buf = append(buf, ob...) case unicode.IsOneOf(nop, r): // skip case dash: buf = append(buf, '-') dash = false latin = true } s = s[i:] } i := len(buf) - 1 if i >= 0 && buf[i] == '-' { buf = buf[:i] } return buf }
func getPrefixGlyphs(s []rune, num int) []rune { p := 0 for n := 0; n < num && p < len(s); p++ { // speed up the common case if s[p] < 127 { n++ continue } if !unicode.IsOneOf(zeroWidth, s[p]) { n++ } } for p < len(s) && unicode.IsOneOf(zeroWidth, s[p]) { p++ } return s[:p] }
func isChar(s string, rangeTable []*unicode.RangeTable) bool { runeForm := []rune(s) for _, r := range runeForm { if !unicode.IsOneOf(rangeTable, r) { return false } } return true }
func countMultiLineGlyphs(s []rune, columns int, start int) int { n := start for _, r := range s { switch { case unicode.IsOneOf(zeroWidth, r): case unicode.IsOneOf(doubleWidth, r): n += 2 // no room for a 2-glyphs-wide char in the ending // so skip a column and display it at the beginning if n%columns == 1 { n++ } default: n++ } } return n }
func (l *lexer) forward(tables []*unicode.RangeTable) { for { r, rlen := utf8.DecodeRuneInString(l.input[l.pos:]) if !unicode.IsOneOf(tables, r) { break } l.pos += rlen } }
func getSuffixGlyphs(s []rune, num int) []rune { p := len(s) for n := 0; n < num && p > 0; p-- { if !unicode.IsOneOf(zeroWidth, s[p-1]) { n++ } } return s[p:] }
// countGlyphs considers zero-width characters to be zero glyphs wide, // and members of Chinese, Japanese, and Korean scripts to be 2 glyphs wide. func countGlyphs(s []rune) int { n := 0 for _, r := range s { // speed up the common case if r < 127 { n++ continue } switch { case unicode.IsOneOf(zeroWidth, r): case unicode.IsOneOf(doubleWidth, r): n += 2 default: n++ } } return n }
// SlugAscii is identical to Slug, except that runs of one or more unicode // letters or numbers that still fall outside the ASCII range will have their // UTF-8 representation hex encoded and delimited by hyphens. As with Slug, in // no case will hyphens appear at either end of the returned string. func SlugAscii(s string) string { const m = utf8.UTFMax var ( ib [m * 3]byte ob []byte buf = make([]byte, 0, len(s)) dash = false latin = true ) for _, r := range norm.NFKD.String(s) { switch { case unicode.IsOneOf(lat, r): r = unicode.ToLower(r) n := utf8.EncodeRune(ib[:m], r) if r >= 128 { if latin && dash { buf = append(buf, '-') } n = hex.Encode(ib[m:], ib[:n]) ob = ib[m : m+n] latin = false } else { if !latin { buf = append(buf, '-') } ob = ib[:n] latin = true } dash = true buf = append(buf, ob...) case unicode.IsOneOf(nop, r): // skip case dash: buf = append(buf, '-') dash = false latin = true } } if i := len(buf) - 1; i >= 0 && buf[i] == '-' { buf = buf[:i] } return string(buf) }
func IsLatin(s string) bool { isLatin := true runeForm := []rune(s) for _, r := range runeForm { isLatin = isLatin && unicode.IsOneOf([]*unicode.RangeTable{unicode.Latin, unicode.ASCII_Hex_Digit, unicode.White_Space, unicode.Hyphen}, r) if !isLatin { return isLatin } } return isLatin }
func IsKana(s string) bool { isKana := true runeForm := []rune(s) for _, r := range runeForm { isKana = isKana && unicode.IsOneOf([]*unicode.RangeTable{unicode.Hiragana, unicode.Katakana, unicode.Hyphen, unicode.Diacritic}, r) if !isKana { return isKana } } return isKana }
func IsKanji(s string) bool { isKanji := true runeForm := []rune(s) for _, r := range runeForm { isKanji = isKanji && unicode.IsOneOf([]*unicode.RangeTable{unicode.Ideographic}, r) if !isKanji { return isKanji } } return isKanji }
// move cursor backwards, stop at the first rune that is not from // 'g_unicode_ident_set', or 0 func (this *bytes_iterator) skip_ident() { for this.cursor != 0 { r := this.rune() // stop if 'r' is not [a-zA-Z0-9_] (unicode correct though) if !unicode.IsOneOf(g_unicode_ident_set, r) { return } this.move_backwards() } }
// Slug replaces each run of characters which are not unicode letters or // numbers with a single hyphen, except for leading or trailing runs. Letters // will be stripped of diacritical marks and lowercased. Letter or number // codepoints that do not have combining marks or a lower-cased variant will // be passed through unaltered. func Slug(s string) string { buf := make([]rune, 0, len(s)) dash := false for _, r := range norm.NFKD.String(s) { switch { // unicode 'letters' like mandarin characters pass through case unicode.IsOneOf(lat, r): buf = append(buf, unicode.ToLower(r)) dash = true case unicode.IsOneOf(nop, r): // skip case dash: buf = append(buf, '-') dash = false } } if i := len(buf) - 1; i >= 0 && buf[i] == '-' { buf = buf[:i] } return string(buf) }
func cleanupPatchTitle(t string) string { rv := "" keepers := []*unicode.RangeTable{unicode.Letter, unicode.Number} for _, r := range t { switch { case unicode.IsOneOf(keepers, r): rv = rv + string(r) case unicode.IsSpace(r): rv = rv + "-" } } return rv }
func IsHanzi(s string) bool { isChinese := true runeForm := []rune(s) for _, r := range runeForm { isChinese = isChinese && unicode.IsOneOf([]*unicode.RangeTable{ unicode.Unified_Ideograph, }, r) if !isChinese { return isChinese } } return isChinese }
func getSuffixGlyphs(s []rune, num int) []rune { p := len(s) for n := 0; n < num && p > 0; p-- { // speed up the common case if s[p-1] < 127 { n++ continue } if !unicode.IsOneOf(zeroWidth, s[p-1]) { n++ } } return s[p:] }
func IsChinese(s string) bool { isChinese := true runeForm := []rune(s) for _, r := range runeForm { isChinese = isChinese && unicode.IsOneOf([]*unicode.RangeTable{ unicode.Unified_Ideograph, unicode.Terminal_Punctuation, unicode.Hyphen, unicode.Diacritic, unicode.White_Space, }, r) if !isChinese { return isChinese } } return isChinese }
func (l *Lexer) consumeRegexpToken() bool { // assume to be on / and not /* mark := l.r.Pos() l.r.Move(1) inClass := false for { c := l.r.Peek(0) if !inClass && c == '/' { l.r.Move(1) break } else if c == '[' { inClass = true } else if c == ']' { inClass = false } else if c == '\\' { l.r.Move(1) if l.consumeLineTerminator() { l.r.Rewind(mark) return false } } else if l.consumeLineTerminator() { l.r.Rewind(mark) return false } else if c == 0 { return true } l.r.Move(1) } // flags for { c := l.r.Peek(0) if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '$' || c == '_' { l.r.Move(1) } else if c >= 0xC0 { if r, n := l.r.PeekRune(0); r == '\u200C' || r == '\u200D' || unicode.IsOneOf(identifierContinue, r) { l.r.Move(n) } else { break } } else { break } } return true }
func verifyPassword(password string) error { if len(password) < 4 || len(password) > 30 { return fmt.Errorf(invalidPasswordLengthError) } next: for _, classes := range map[string][]*unicode.RangeTable{ "upper case": {unicode.Upper, unicode.Title}, "lower case": {unicode.Lower}, "numeric": {unicode.Number, unicode.Digit}, } { for _, r := range password { if unicode.IsOneOf(classes, r) { continue next } } return fmt.Errorf(invalidPasswordError) } return nil }