Example #1
0
func (l *Lexer) consumeIdentifierToken() bool {
	c := l.r.Peek(0)
	if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '$' || c == '_' {
		l.r.Move(1)
	} else if c >= 0xC0 {
		if r, n := l.r.PeekRune(0); unicode.IsOneOf(identifierStart, r) {
			l.r.Move(n)
		} else {
			return false
		}
	} else if !l.consumeUnicodeEscape() {
		return false
	}
	for {
		c := l.r.Peek(0)
		if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '$' || c == '_' {
			l.r.Move(1)
		} else if c >= 0xC0 {
			if r, n := l.r.PeekRune(0); r == '\u200C' || r == '\u200D' || unicode.IsOneOf(identifierContinue, r) {
				l.r.Move(n)
			} else {
				break
			}
		} else {
			break
		}
	}
	return true
}
Example #2
0
File: bytes.go Project: xtgo/slug
// SlugBytes replaces each run of characters which are not unicode letters or
// numbers with a single hyphen, except for leading or trailing runs. Letters
// will be stripped of diacritical marks and lowercased. Letter or number
// codepoints that do not have combining marks or a lower-cased variant will
// be passed through unaltered.
func SlugBytes(s []byte) []byte {
	s = norm.NFKD.Bytes(s)
	buf := make([]byte, 0, len(s))
	dash := false
	for len(s) > 0 {
		r, i := utf8.DecodeRune(s)
		switch {
		// unicode 'letters' like mandarin characters pass through
		case unicode.IsOneOf(lat, r):
			buf = append(buf, s[:i]...)
			dash = true
		case unicode.IsOneOf(nop, r):
			// skip
		case dash:
			buf = append(buf, '-')
			dash = false
		}
		s = s[i:]
	}
	i := len(buf) - 1
	if i >= 0 && buf[i] == '-' {
		buf = buf[:i]
	}
	return buf
}
Example #3
0
func (Runes) Width(r rune) int {
	if unicode.IsOneOf(zeroWidth, r) {
		return 0
	}
	if unicode.IsOneOf(doubleWidth, r) {
		return 2
	}
	return 1
}
Example #4
0
func getPrefixGlyphs(s []rune, num int) []rune {
	p := 0
	for n := 0; n < num && p < len(s); p++ {
		if !unicode.IsOneOf(zeroWidth, s[p]) {
			n++
		}
	}
	for p < len(s) && unicode.IsOneOf(zeroWidth, s[p]) {
		p++
	}
	return s[:p]
}
Example #5
0
func Char(r rune) int {
	if r == 0 {
		return 0 // null character
	} else if unicode.Is(unicode.C, r) { // is r control or special character
		return -1
		/* binary search in table of non-spacing characters */
	} else if unicode.IsOneOf(nonSpacing, r) {
		return 0
	} else if unicode.IsOneOf(fullAndWideSpacing, r) {
		return 2
	}
	return 1
}
Example #6
0
// countGlyphs considers zero-width characters to be zero glyphs wide,
// and members of Chinese, Japanese, and Korean scripts to be 2 glyphs wide.
func countGlyphs(s []rune) int {
	n := 0
	for _, r := range s {
		switch {
		case unicode.IsOneOf(zeroWidth, r):
		case unicode.IsOneOf(doubleWidth, r):
			n += 2
		default:
			n++
		}
	}
	return n
}
Example #7
0
// starting from the end of the 'file', move backwards and return a slice of a
// valid Go expression
func (this *bytes_iterator) extract_go_expr() []byte {
	const (
		last_none = iota
		last_dot
		last_paren
		last_ident
	)
	last := last_none
	orig := this.cursor
	this.move_backwards()
loop:
	for {
		r := this.rune()
		switch r {
		case '.':
			this.move_backwards()
			last = last_dot
		case ')', ']':
			if last == last_ident {
				break loop
			}
			this.skip_to_bracket_pair()
			this.move_backwards()
			last = last_paren
		default:
			if unicode.IsOneOf(g_unicode_ident_set, r) {
				this.skip_ident()
				last = last_ident
			} else {
				break loop
			}
		}
	}
	return this.data[this.cursor+1 : orig]
}
Example #8
0
func (scanner *Scanner) readToken() (Token, error) {
	r, _, err := scanner.stream.ReadRune()
	for err == nil && unicode.IsSpace(r) {
		r, _, err = scanner.stream.ReadRune()
	}

	if err == io.EOF {
		return EndToken{}, nil
	}
	if err != nil {
		return nil, err
	}

	switch {
	case r == rune('('):
		return OpenParenToken{}, nil
	case r == rune(')'):
		return CloseParenToken{}, nil
	case r == rune('!'), r == rune('='), r == rune('<'), r == rune('>'):
		return scanner.readComparisonOperatorToken(r)
	case unicode.IsOneOf(symbolChars, r), r == rune('\\'):
		scanner.stream.UnreadRune()
		return scanner.readTextToken()
	default:
		return nil, fmt.Errorf("Unepxected character '%v'.", r)
	}

	panic("unreachable")
}
Example #9
0
func (scanner *Scanner) readString(initialRune rune) (string, error) {
	text := string(initialRune)

	stop := false
	for !stop {
		r, _, err := scanner.stream.ReadRune()

		if err == io.EOF {
			return text, nil
		}
		if err != nil {
			return "", err
		}

		switch {
		case unicode.IsSpace(r), r == rune(')'), r == rune('('), r == rune('='), r == rune('<'), r == rune('>'):
			scanner.stream.UnreadRune()
			return text, nil
		case unicode.IsOneOf(symbolChars, r):
			text += string(r)
		default:
			return "", fmt.Errorf("Unexpected character '%v'.", r)
		}
	}

	panic("unreachable")
}
Example #10
0
func validateValueName(valueName string) error {
	switch valueName {
	case "":
		return errors.New("tag value cannot be empty.")
	case ".", "..":
		return errors.New("tag value cannot be '.' or '..'.") // cannot be used in the VFS
	case "and", "or", "not":
		return errors.New("tag value cannot be a logical operator: 'and', 'or' or 'not'.") // used in query language
	}

	for _, ch := range valueName {
		switch ch {
		case '(', ')':
			return errors.New("tag value cannot contain parentheses: '(' or ')'.") // used in query language
		case ',':
			return errors.New("tag value cannot contain comma: ','.") // reserved for tag delimiter
		case '=', '<', '>':
			return errors.New("tag value cannot contain a comparison operator: '=', '<' or '>'.") // reserved for tag values
		case ' ', '\t':
			return errors.New("tag value cannot contain space or tab.") // used as tag delimiter
		case '/':
			return errors.New("tag value cannot contain slash: '/'.") // cannot be used in the VFS
		}

		if !unicode.IsOneOf(validValueChars, ch) {
			return fmt.Errorf("tag value cannot contain '%c'.", ch)
		}
	}

	return nil
}
Example #11
0
// deduce cursor context, it includes the declaration under the cursor and partial identifier
// (usually a part of the name of the child declaration)
func (c *auto_complete_context) deduce_cursor_context(file []byte, cursor int) (cursor_context, bool) {
	if cursor <= 0 {
		return cursor_context{nil, ""}, true
	}

	orig := cursor
	iter := bytes_iterator{file, cursor}

	// figure out what is just before the cursor
	iter.move_backwards()
	if iter.char() == '.' {
		// we're '<whatever>.'
		// figure out decl, Parital is ""
		decl := c.deduce_cursor_decl(&iter)
		return cursor_context{decl, ""}, decl != nil
	}

	r := iter.rune()
	if unicode.IsOneOf(g_unicode_ident_set, r) {
		// we're '<whatever>.<ident>'
		// parse <ident> as Partial and figure out decl
		iter.skip_ident()
		partial := string(iter.data[iter.cursor+1 : orig])
		if iter.char() == '.' {
			decl := c.deduce_cursor_decl(&iter)
			return cursor_context{decl, partial}, decl != nil
		} else {
			return cursor_context{nil, partial}, true
		}
	}

	return cursor_context{nil, ""}, true
}
Example #12
0
File: bytes.go Project: xtgo/slug
// SlugAsciiBytes is identical to SlugBytes, except that runs of one or more
// unicode letters or numbers that still fall outside the ASCII range will have
// their UTF-8 representation hex encoded and delimited by hyphens. As with
// SlugBytes, in no case will hyphens appear at either end of the returned
// string.
func SlugAsciiBytes(s []byte) []byte {
	s = norm.NFKD.Bytes(s)
	const m = utf8.UTFMax
	var (
		ib    [m * 3]byte
		ob    []byte
		buf   = make([]byte, 0, len(s))
		dash  = false
		latin = true
	)
	for len(s) > 0 {
		r, i := utf8.DecodeRune(s)
		switch {
		case unicode.IsOneOf(lat, r):
			r = unicode.ToLower(r)
			n := utf8.EncodeRune(ib[:m], r)
			if r >= 128 {
				if latin && dash {
					buf = append(buf, '-')
				}
				n = hex.Encode(ib[m:], ib[:n])
				ob = ib[m : m+n]
				latin = false
			} else {
				if !latin {
					buf = append(buf, '-')
				}
				ob = ib[:n]
				latin = true
			}
			dash = true
			buf = append(buf, ob...)
		case unicode.IsOneOf(nop, r):
			// skip
		case dash:
			buf = append(buf, '-')
			dash = false
			latin = true
		}
		s = s[i:]
	}
	i := len(buf) - 1
	if i >= 0 && buf[i] == '-' {
		buf = buf[:i]
	}
	return buf
}
Example #13
0
func getPrefixGlyphs(s []rune, num int) []rune {
	p := 0
	for n := 0; n < num && p < len(s); p++ {
		// speed up the common case
		if s[p] < 127 {
			n++
			continue
		}
		if !unicode.IsOneOf(zeroWidth, s[p]) {
			n++
		}
	}
	for p < len(s) && unicode.IsOneOf(zeroWidth, s[p]) {
		p++
	}
	return s[:p]
}
Example #14
0
func isChar(s string, rangeTable []*unicode.RangeTable) bool {
	runeForm := []rune(s)
	for _, r := range runeForm {
		if !unicode.IsOneOf(rangeTable, r) {
			return false
		}
	}
	return true
}
Example #15
0
func countMultiLineGlyphs(s []rune, columns int, start int) int {
	n := start
	for _, r := range s {
		switch {
		case unicode.IsOneOf(zeroWidth, r):
		case unicode.IsOneOf(doubleWidth, r):
			n += 2
			// no room for a 2-glyphs-wide char in the ending
			// so skip a column and display it at the beginning
			if n%columns == 1 {
				n++
			}
		default:
			n++
		}
	}
	return n
}
Example #16
0
File: lexer.go Project: goj/golog
func (l *lexer) forward(tables []*unicode.RangeTable) {
	for {
		r, rlen := utf8.DecodeRuneInString(l.input[l.pos:])
		if !unicode.IsOneOf(tables, r) {
			break
		}
		l.pos += rlen
	}
}
Example #17
0
func getSuffixGlyphs(s []rune, num int) []rune {
	p := len(s)
	for n := 0; n < num && p > 0; p-- {
		if !unicode.IsOneOf(zeroWidth, s[p-1]) {
			n++
		}
	}
	return s[p:]
}
Example #18
0
// countGlyphs considers zero-width characters to be zero glyphs wide,
// and members of Chinese, Japanese, and Korean scripts to be 2 glyphs wide.
func countGlyphs(s []rune) int {
	n := 0
	for _, r := range s {
		// speed up the common case
		if r < 127 {
			n++
			continue
		}

		switch {
		case unicode.IsOneOf(zeroWidth, r):
		case unicode.IsOneOf(doubleWidth, r):
			n += 2
		default:
			n++
		}
	}
	return n
}
Example #19
0
// SlugAscii is identical to Slug, except that runs of one or more unicode
// letters or numbers that still fall outside the ASCII range will have their
// UTF-8 representation hex encoded and delimited by hyphens. As with Slug, in
// no case will hyphens appear at either end of the returned string.
func SlugAscii(s string) string {
	const m = utf8.UTFMax
	var (
		ib    [m * 3]byte
		ob    []byte
		buf   = make([]byte, 0, len(s))
		dash  = false
		latin = true
	)
	for _, r := range norm.NFKD.String(s) {
		switch {
		case unicode.IsOneOf(lat, r):
			r = unicode.ToLower(r)
			n := utf8.EncodeRune(ib[:m], r)
			if r >= 128 {
				if latin && dash {
					buf = append(buf, '-')
				}
				n = hex.Encode(ib[m:], ib[:n])
				ob = ib[m : m+n]
				latin = false
			} else {
				if !latin {
					buf = append(buf, '-')
				}
				ob = ib[:n]
				latin = true
			}
			dash = true
			buf = append(buf, ob...)
		case unicode.IsOneOf(nop, r):
			// skip
		case dash:
			buf = append(buf, '-')
			dash = false
			latin = true
		}
	}
	if i := len(buf) - 1; i >= 0 && buf[i] == '-' {
		buf = buf[:i]
	}
	return string(buf)
}
Example #20
0
func IsLatin(s string) bool {
	isLatin := true
	runeForm := []rune(s)
	for _, r := range runeForm {
		isLatin = isLatin && unicode.IsOneOf([]*unicode.RangeTable{unicode.Latin, unicode.ASCII_Hex_Digit, unicode.White_Space, unicode.Hyphen}, r)
		if !isLatin {
			return isLatin
		}
	}
	return isLatin
}
Example #21
0
func IsKana(s string) bool {
	isKana := true
	runeForm := []rune(s)
	for _, r := range runeForm {
		isKana = isKana && unicode.IsOneOf([]*unicode.RangeTable{unicode.Hiragana, unicode.Katakana, unicode.Hyphen, unicode.Diacritic}, r)
		if !isKana {
			return isKana
		}
	}
	return isKana
}
Example #22
0
func IsKanji(s string) bool {
	isKanji := true
	runeForm := []rune(s)
	for _, r := range runeForm {
		isKanji = isKanji && unicode.IsOneOf([]*unicode.RangeTable{unicode.Ideographic}, r)
		if !isKanji {
			return isKanji
		}
	}
	return isKanji
}
Example #23
0
// move cursor backwards, stop at the first rune that is not from
// 'g_unicode_ident_set', or 0
func (this *bytes_iterator) skip_ident() {
	for this.cursor != 0 {
		r := this.rune()

		// stop if 'r' is not [a-zA-Z0-9_] (unicode correct though)
		if !unicode.IsOneOf(g_unicode_ident_set, r) {
			return
		}
		this.move_backwards()
	}
}
Example #24
0
// Slug replaces each run of characters which are not unicode letters or
// numbers with a single hyphen, except for leading or trailing runs. Letters
// will be stripped of diacritical marks and lowercased. Letter or number
// codepoints that do not have combining marks or a lower-cased variant will
// be passed through unaltered.
func Slug(s string) string {
	buf := make([]rune, 0, len(s))
	dash := false
	for _, r := range norm.NFKD.String(s) {
		switch {
		// unicode 'letters' like mandarin characters pass through
		case unicode.IsOneOf(lat, r):
			buf = append(buf, unicode.ToLower(r))
			dash = true
		case unicode.IsOneOf(nop, r):
			// skip
		case dash:
			buf = append(buf, '-')
			dash = false
		}
	}
	if i := len(buf) - 1; i >= 0 && buf[i] == '-' {
		buf = buf[:i]
	}
	return string(buf)
}
Example #25
0
func cleanupPatchTitle(t string) string {
	rv := ""
	keepers := []*unicode.RangeTable{unicode.Letter, unicode.Number}
	for _, r := range t {
		switch {
		case unicode.IsOneOf(keepers, r):
			rv = rv + string(r)
		case unicode.IsSpace(r):
			rv = rv + "-"
		}
	}
	return rv
}
Example #26
0
func IsHanzi(s string) bool {
	isChinese := true
	runeForm := []rune(s)
	for _, r := range runeForm {
		isChinese = isChinese && unicode.IsOneOf([]*unicode.RangeTable{
			unicode.Unified_Ideograph,
		}, r)
		if !isChinese {
			return isChinese
		}
	}
	return isChinese
}
Example #27
0
func getSuffixGlyphs(s []rune, num int) []rune {
	p := len(s)
	for n := 0; n < num && p > 0; p-- {
		// speed up the common case
		if s[p-1] < 127 {
			n++
			continue
		}
		if !unicode.IsOneOf(zeroWidth, s[p-1]) {
			n++
		}
	}
	return s[p:]
}
Example #28
0
func IsChinese(s string) bool {
	isChinese := true
	runeForm := []rune(s)
	for _, r := range runeForm {
		isChinese = isChinese && unicode.IsOneOf([]*unicode.RangeTable{
			unicode.Unified_Ideograph,
			unicode.Terminal_Punctuation,
			unicode.Hyphen,
			unicode.Diacritic,
			unicode.White_Space,
		}, r)
		if !isChinese {
			return isChinese
		}
	}
	return isChinese
}
Example #29
0
func (l *Lexer) consumeRegexpToken() bool {
	// assume to be on / and not /*
	mark := l.r.Pos()
	l.r.Move(1)
	inClass := false
	for {
		c := l.r.Peek(0)
		if !inClass && c == '/' {
			l.r.Move(1)
			break
		} else if c == '[' {
			inClass = true
		} else if c == ']' {
			inClass = false
		} else if c == '\\' {
			l.r.Move(1)
			if l.consumeLineTerminator() {
				l.r.Rewind(mark)
				return false
			}
		} else if l.consumeLineTerminator() {
			l.r.Rewind(mark)
			return false
		} else if c == 0 {
			return true
		}
		l.r.Move(1)
	}
	// flags
	for {
		c := l.r.Peek(0)
		if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '$' || c == '_' {
			l.r.Move(1)
		} else if c >= 0xC0 {
			if r, n := l.r.PeekRune(0); r == '\u200C' || r == '\u200D' || unicode.IsOneOf(identifierContinue, r) {
				l.r.Move(n)
			} else {
				break
			}
		} else {
			break
		}
	}
	return true
}
Example #30
0
func verifyPassword(password string) error {
	if len(password) < 4 || len(password) > 30 {
		return fmt.Errorf(invalidPasswordLengthError)
	}

next:
	for _, classes := range map[string][]*unicode.RangeTable{
		"upper case": {unicode.Upper, unicode.Title},
		"lower case": {unicode.Lower},
		"numeric":    {unicode.Number, unicode.Digit},
	} {
		for _, r := range password {
			if unicode.IsOneOf(classes, r) {
				continue next
			}
		}
		return fmt.Errorf(invalidPasswordError)
	}
	return nil
}