// Arbitrarily rename non-reserved, non-keyword, non-symbol tokens in place func (tok *JSTokenizer) RenameTokens(tokens []string, length int) { // first pass: create definition mapping defs := make(map[string]string) for i, t := range tokens { // rename if it is not a keyword, not a string, not reserved, and not a symbol if tok.isKw(t) || tok.isSymbol(t) || tok.isSpace(t) || tok.isReserved(t) || tokenize.IsNum(t) || tok.isQuoted(t) { continue } // if it comes after a dot, also check the part before the dot if strings.TrimSpace(tokens[i-1]) == "." { t := tokens[i-2] if tok.isKw(t) || tok.isSymbol(t) || tok.isSpace(t) || tok.isReserved(t) || tokenize.IsNum(t) || tok.isQuoted(t) { continue } } if strings.HasPrefix(t, "return") { ident := strings.TrimSpace(strings.TrimPrefix(t, "return")) defs[ident] = tokenize.MakeIdentString(length) defs[t] = "return " + defs[ident] } else if strings.HasSuffix(t, "++") { ident := strings.TrimSuffix(t, "++") defs[ident] = tokenize.MakeIdentString(length) defs[t] = defs[ident] + "++" } else if strings.HasSuffix(t, "--") { ident := strings.TrimSuffix(t, "--") defs[ident] = tokenize.MakeIdentString(length) defs[t] = defs[ident] + "--" } else { defs[t] = tokenize.MakeIdentString(length) } } // second pass: replace tokens with their renames for i, _ := range tokens { if ren, ok := defs[tokens[i]]; ok && len(tokens[i]) > 0 { tokens[i] = ren } } }
func (tok *JSTokenizer) Tokenize(code string, _reserved []string) []string { tok.reserved = append(tok.reserved, _reserved...) sort.Strings(tok.reserved) sort.Strings(tok.keywords) tokens := make([]string, 0) for { nsIndex, nsRune := tok.firstSymbol(code) if nsRune == rune(0) { break } tokens = append(tokens, code[:nsIndex]) if !unicode.IsSpace(nsRune) { tokens = append(tokens, string(nsRune)) } code = code[nsIndex+1:] // check for the speshul cases switch nsRune { // add the rest of the string as a token // TODO: support escaped quotes case '"', '\'': nsIndex = strings.IndexRune(code, nsRune) if nsIndex >= 0 { tokens[len(tokens)-1] = fmt.Sprintf("%c%s%c", nsRune, code[:nsIndex], nsRune) // advance the code to the close quote character code = code[nsIndex+1:] } case '+', '-': // check for unary -- or ++ if rune(code[0]) == nsRune { // move code forward 1 rune code = code[1:] // delete the - or + we just added tokens = tokens[:len(tokens)-1] // add -- or ++ to either the end of the previous token // TODO: or the start of the next one // maybe I should preprocess prefix and turn them into postfix? tokens[len(tokens)-1] += string([]rune{nsRune, nsRune}) } // check for += and -= if code[0] == '=' { tokens[len(tokens)-1] = string(nsRune) + "=" code = code[1:] } case '*': // check for *= if code[0] == '=' { tokens[len(tokens)-1] = string(nsRune) + "=" code = code[1:] } case '/': // check for single-line comment if code[0] == '/' { // ignore the rest of the line and remove the / token we added tokens = tokens[:len(tokens)-1] code = code[strings.IndexRune(code, '\n')+1:] } if code[0] == '*' { // skip the codepoint forward until the "*/" end comment tokens = tokens[:len(tokens)-1] code = code[strings.Index(code, "*/")+2:] } // check for /= if code[0] == '=' { tokens[len(tokens)-1] = string(nsRune) + "=" code = code[1:] } // if the previous token is a symbol, this is a regexp (I think) if tok.isSymbol(tokens[len(tokens)-1]) { // find the next symbol after the next / restart there nextSlash := strings.Index(code, "/") nextLine := strings.Index(code, "\n") next, r := tok.firstSymbol(code[nextSlash+1:]) next += nextSlash + 1 if r != rune(0) && nextSlash < nextLine { tokens[len(tokens)-1] = "/" + code[:next] code = code[next:] } } case '.': // check for floating point number: if previous token was a number and next one is too beforeDot := tokens[len(tokens)-2] if tokenize.IsNum(beforeDot) { pos, _ := tok.firstSymbol(code) nextTok := code[:pos] if tokenize.IsNum(nextTok) { tokens[len(tokens)-2] = fmt.Sprintf("%s.%s", beforeDot, nextTok) tokens[len(tokens)-1] = "" code = code[pos:] } } case '=': if code[0] == '=' && code[1] == '=' { tokens[len(tokens)-1] = "===" code = code[2:] } else if code[0] == '=' { tokens[len(tokens)-1] = "==" code = code[1:] } case '!': if code[0] == '=' && code[1] == '=' { tokens[len(tokens)-1] = "!==" code = code[2:] } else if code[0] == '=' { tokens[len(tokens)-1] = "!=" code = code[1:] } case '<', '>': if code[0] == '=' { tokens[len(tokens)-1] = string(nsRune) + "=" code = code[1:] } } code = strings.TrimSpace(code) } // go through tokens and hardcode a space before and after any keywords if len(tokens) == 0 { return removeEmpty(tokens) } for i, _ := range tokens[:len(tokens)-1] { if tok.isKw(tokens[i]) || tokenize.IsNum(tokens[i]) { if !tok.isSymbol(tokens[i+1]) { tokens[i] = tokens[i] + " " if tokens[i] == "return " { tokens[i] += tokens[i+1] if i+2 < len(tokens) { if tok.isSymbol(tokens[i+2]) { tokens[i+1] = "" } else { tokens[i+1] = " " } } } } if i > 0 && !tok.isSymbol(tokens[i-1]) { tokens[i] = " " + tokens[i] } } } return removeEmpty(tokens) }