// The readSpread method lexs a spread ("...") into the token t. // It is the caller's responsibility to set t.Start and to assert that l.last == '.'. func (l *lexer) readSpread(t *token.Token) (err error) { expectDot := func() error { if !l.advance() { return l.err } if l.eof { return &SyntaxError{t.Start, fmt.Errorf("unexpected EOF")} } if l.scanner.Rune() != '.' { return &SyntaxError{t.Start, fmt.Errorf("unexpected character: %U", l.scanner.Rune())} } return nil } // Expect 2 more dots. if err = expectDot(); err != nil { return } if err = expectDot(); err != nil { return } t.Kind = token.Spread t.End = t.Start + 3 t.Value = "..." if !l.advance() { return l.err } return }
// The Lex method lexs the next token into t, or returns an error. // Implements the Lexer interface. func (l *lexer) Lex(t *token.Token) error { // Skip past whitespace, comments, etc. if !l.advanceToNextToken() { return l.err } t.Start = l.lastIndex if l.eof { t.Kind = token.EOF t.End = t.Start return nil } r := l.scanner.Rune() if k, exists := token.RunePunctuators[r]; exists { t.Kind = k t.End = t.Start + 1 t.Value = string(r) if !l.advance() { return l.err } return nil } switch { case r == '_', l.isUpperLetter(), l.isLowerLetter(): return l.readName(t) case r == '-', l.isDigit(): return l.readNumber(t) case r < token.SPACE && r != token.TAB && r != token.LF && r != token.CR: return &SyntaxError{t.Start, fmt.Errorf("invalid character: %U", r)} } switch r { case '"': return l.readString(t) case '.': return l.readSpread(t) default: return &SyntaxError{t.Start, fmt.Errorf("unexpected character: %U", r)} } }
// The readName method lexs a name into the token t. // It is the caller's responsibility to set t.Start and assert that l.last is a valid first character. func (l *lexer) readName(t *token.Token) error { t.Kind = token.Name l.scanner.StartTail() for l.advance() { if l.scanner.Rune() == '_' || l.isDigit() || l.isUpperLetter() || l.isLowerLetter() { continue } else { t.End = l.lastIndex - 1 t.Value = l.scanner.EndTail() return nil } } return l.err }
// The readString methods lexs a string surrounding by double-quotes (") into the token t. // Any escaped or unicode characters will be replaced in t.Value. // It is the caller's responsibility to set t.Start and to assert that l.last == '"'. func (l *lexer) readString(t *token.Token) error { t.Kind = token.String var value bytes.Buffer for l.advance() { r := l.scanner.Rune() switch { case l.eof, r == token.LF, r == token.CR: return &SyntaxError{l.lastIndex, fmt.Errorf("unterminated string %q, encountered %U", value.String(), r)} case r == '"': t.End = l.lastIndex t.Value = value.String() if !l.advance() { return l.err } return nil case r < token.SPACE && r != token.TAB: return &SyntaxError{l.lastIndex, fmt.Errorf("Invalid character within String: %U", r)} case r != '\\': value.WriteRune(r) default: if !l.advance() { return l.err } switch l.scanner.Rune() { case '"': value.WriteRune('"') case '/': value.WriteRune('/') case '\\': value.WriteRune('\\') case 'b': value.WriteRune('\b') case 'f': value.WriteRune('\f') case 'n': value.WriteRune('\n') case 'r': value.WriteRune('\r') case 't': value.WriteRune('\t') case 'u': var uRunes [4]rune for i, _ := range uRunes { if !l.advance() { return l.err } if l.eof { return &SyntaxError{l.lastIndex, fmt.Errorf("invalid unicode; unexpected EOF")} } uRunes[i] = l.scanner.Rune() } b, err := hex.DecodeString(string(uRunes[:])) if err != nil { return &SyntaxError{l.lastIndex, err} } charCode := rune(binary.BigEndian.Uint16(b)) if charCode < 0 { return &SyntaxError{l.lastIndex, fmt.Errorf("Invalid character escape sequence: \\u%s", string(uRunes[:]))} } value.WriteRune(charCode) default: return &SyntaxError{l.lastIndex, fmt.Errorf("Invalid character escape sequence: \\%s", string(l.scanner.Rune()))} } } } return l.err }
// The readNumber method lexs a number into the token t. // It is the caller's responsibility to set t.Start and assert that l.last is a valid first character. // // Int: -?(0|[1-9][0-9]*) // Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)? func (l *lexer) readNumber(t *token.Token) error { l.scanner.StartTail() t.Kind = token.Int if l.scanner.Rune() == '-' { if !l.advance() { return l.err } if l.eof { return &SyntaxError{l.lastIndex, fmt.Errorf("invalid number; unexpected EOF following sign")} } } if l.scanner.Rune() == '0' { if !l.advance() { return l.err } if l.eof { return &SyntaxError{l.lastIndex, fmt.Errorf("invalid number; unexpected EOF following '0'")} } if l.isDigit() { return &SyntaxError{l.lastIndex, fmt.Errorf("invalid number, unexpected digit after 0: %U", l.scanner.Rune())} } } else { if !l.advanceDigits() { return l.err } if l.eof { t.End = l.lastIndex - 1 t.Value = l.scanner.EndTail() return nil } } // Decimal if l.scanner.Rune() == '.' { t.Kind = token.Float if !l.advanceDigits() { return l.err } if l.eof { return nil } } // Exponent if l.scanner.Rune() == 'E' || l.scanner.Rune() == 'e' { t.Kind = token.Float if !l.advance() { return l.err } if l.eof { return nil } switch { case l.scanner.Rune() == '+', l.scanner.Rune() == '-', l.isDigit(): if !l.advanceDigits() { return l.err } default: return &SyntaxError{l.lastIndex, fmt.Errorf("unterminated number; expected sign or digit but found %U", l.scanner.Rune())} } } t.End = l.lastIndex - 1 t.Value = l.scanner.EndTail() return nil }