func (s *Scanner) scanNumber() *token.Token { start := s.chOffset for s.isDigit(s.ch) { s.next() } return token.NewToken(token.NUMBER, string(s.src[start:s.chOffset])) }
func (s *Scanner) Next() (tok *token.Token) { next: if s.savedTok != nil { tok = s.savedTok s.savedTok = nil } else { s.skipWhitespace() switch ch := s.ch; { case s.isLetter(ch): tok = s.scanName() case s.isDigit(ch): tok = s.scanNumber() case ch == '"': tok = s.scanStringConst() case ch == '\n': s.state = maybesemi goto next default: tok = s.scanOperator(ch) } switch s.state { case maybeinsert: if isDoStart(tok) { s.savedTok = tok tok = token.NewToken(token.DO, "do") s.state = normal } else if !isCommandEnd(tok) { s.state = normal } case maybesemi: if isSemiStart(tok) { s.savedTok = tok tok = token.NewToken(token.SEMICOLON, ";") } s.state = normal case normal: if isCommandEnd(tok) { s.state = maybeinsert } } } return }
func (s *Scanner) scanStringConst() *token.Token { start := s.offset - 1 s.next() for s.ch != '"' { s.next() } s.next() return token.NewToken(token.STRINGCONST, string(s.src[start:s.chOffset])) }
func (s *Scanner) scanName() *token.Token { // 2.1.2 Hardware Conventions and Preprocessor Rules // (a) // (1) A name is either a single small letter or a sequence of letters // and digits starting with a capital letter. The character immediately // following a name may not be a letter or a digit. start := s.chOffset for s.isLetter(s.ch) || s.isDigit(s.ch) { s.next() } str := s.src[start:s.chOffset] // (2) A sequence of two or more small letters which is not part of a NAME, // SECTBRA, SECTKET or STRINGCONST is a reserved system word and may be used // to represent a canonical symbol. kind := token.NAME literal := string(str) if len(str) > 1 { kind = token.LookupName(literal) } return token.NewToken(kind, literal) }
func (s *Scanner) scanOperator(ch rune) *token.Token { kind := token.ILLEGAL lit := string(ch) s.next() switch ch { case '/': if s.ch == '/' { s.next() kind, lit = token.COMMENT, s.scanComment() } else { kind = token.DIV } case '+': kind, lit = token.PLUS, "+" case '-': if s.ch == '>' { s.next() kind, lit = token.COND, "->" } else { kind = token.MINUS } case '=': kind = token.EQ case '!': if s.ch == '=' { s.next() kind, lit = token.NE, "!=" } else { kind = token.NOT } case '<': switch s.ch { case '=': s.next() kind, lit = token.LE, "<=" case '<': s.next() kind, lit = token.LSHIFT, "<<" default: kind = token.LS } case '>': switch s.ch { case '=': s.next() kind, lit = token.GE, ">=" case '>': s.next() kind, lit = token.RSHIFT, ">>" default: kind = token.GR } case '&': kind = token.LOGAND case '|': kind = token.LOGOR case ',': kind = token.COMMA case ':': if s.ch == '=' { kind, lit = token.ASS, ":=" } else { kind = token.COLON } case '$': switch s.ch { case '(': s.next() kind, lit = token.SECTBRA, "$(" case ')': s.next() kind, lit = token.SECTKET, "$)" } case '(': kind = token.RBRA case ')': kind = token.RKET case '[': kind = token.SBRA case ']': kind = token.SKET case ';': kind = token.SEMICOLON case '*': kind = token.STAR case -1: kind, lit = token.EOF, "" } return token.NewToken(kind, lit) }
t.Errorf("bad token: got '%s', expected '%s'", tok, etok) } } func assertTokensEqualSource(t *testing.T, toks []*token.Token, str string) { var s Scanner s.Init([]byte(str)) for _, etok := range toks { tok := s.Next() assertTokensEqual(t, tok, etok) } } var test_single_token = [...]*token.Token{ token.NewToken(token.EOF, ""), token.NewToken(token.COMMENT, "// this is a BCPL a comment!\n"), // Names. token.NewToken(token.NAME, "Global"), token.NewToken(token.NAME, "Let"), token.NewToken(token.NAME, "a"), token.NewToken(token.NAME, "Z"), // Numbers. token.NewToken(token.NUMBER, "1"), token.NewToken(token.NUMBER, "98765"), // String constants. token.NewToken(token.STRINGCONST, "\"foo bar baz\n\""),