// MakeConst makes an ideal constant from a literal // token and the corresponding literal string. func MakeConst(tok token.Token, lit string) Const { switch tok { case token.INT: var x big.Int _, ok := x.SetString(lit, 0) assert(ok) return Const{&x} case token.FLOAT: var y big.Rat _, ok := y.SetString(lit) assert(ok) return Const{&y} case token.IMAG: assert(lit[len(lit)-1] == 'i') var im big.Rat _, ok := im.SetString(lit[0 : len(lit)-1]) assert(ok) return Const{cmplx{big.NewRat(0, 1), &im}} case token.CHAR: assert(lit[0] == '\'' && lit[len(lit)-1] == '\'') code, _, _, err := strconv.UnquoteChar(lit[1:len(lit)-1], '\'') assert(err == nil) return Const{big.NewInt(int64(code))} case token.STRING: s, err := strconv.Unquote(lit) assert(err == nil) return Const{s} } panic("unreachable") }
func unquoteChar(s string) (uint8, error) { ef := fmt.Errorf n := len(s) if n < 3 { return 0, ef("invalid char literal") } if s[0] != '\'' || s[n-1] != '\'' { return 0, ef("invalid quoting char literal") } s = s[1 : n-1] ret, multi, tail, err := strconv.UnquoteChar(s, '\'') if multi { return 0, ef("multibyte char not allowed") } else if tail != "" { return 0, ef("char lit has a tail") } else if err != nil { return 0, ef("invalid char literal: %s, %v", s, err) } else if ret > math.MaxUint8 || ret < 0 { return 0, ef("invalid char value") } return uint8(ret), nil }
func (d *Decoder) loadUnicode() error { line, _, err := d.r.ReadLine() if err != nil { return err } sline := string(line) buf := bytes.Buffer{} for len(sline) >= 6 { var r rune var err error r, _, sline, err = strconv.UnquoteChar(sline, '\'') if err != nil { return err } _, err = buf.WriteRune(r) if err != nil { return err } } if len(sline) > 0 { return fmt.Errorf("characters remaining after loadUnicode operation: %s", sline) } d.push(buf.String()) return nil }
/* Returns Query Words, out of order and without repetitions. */ func getWordsFromQueryFile(file string) []string { tokenMap := make(map[string]bool) _, data := ReadFile(file) lowcaseData := strings.ToLower(string(data)) lines := strings.Split(string(lowcaseData), "\n") for _, line := range lines { tokens := strings.Split(line, " ") if tokens != nil { for i := 0; i < len(tokens); i++ { if tokens[i] != "" && tokens[i] != "." { //To handle the null-charachter case v, _, _, _ := strconv.UnquoteChar(tokens[i], 0) if v != 0 { tokenMap[tokens[i]] = true } } } } } returnArray := make([]string, len(tokenMap)) index := 0 for word := range tokenMap { returnArray[index] = word index++ } return returnArray }
func readTestSampleToMap(file string) map[string]SearchResults { testQueries := make(map[string]SearchResults) data, _ := ioutil.ReadFile(file) lines := strings.Split(string(data), "\r\n") for _, line := range lines { tokens := strings.Split(line, " ") if tokens != nil && tokens[0] != "" { //To handle the null-charachter case v, _, _, _ := strconv.UnquoteChar(tokens[0], 0) if v != 0 { maplist := make(map[string]bool, len(tokens)-1) for i := 1; i < len(tokens); i++ { if tokens[i] != "" { maplist[tokens[i]] = true } } testQueries[tokens[0]] = maplist } } } return testQueries }
func evalBasicLit(ctx *Ctx, lit *BasicLit) (reflect.Value, bool, error) { switch lit.Kind { case token.CHAR: if r, _, tail, err := strconv.UnquoteChar(lit.Value[1:len(lit.Value)-1], '\''); err != nil { return reflect.Value{}, false, ErrBadBasicLit{at(ctx, lit)} } else if tail != "" { // parser.ParseExpr() should raise a syntax error before we get here. panic("go-interactive: bad char lit " + lit.Value) } else { return reflect.ValueOf(r), false, nil } case token.STRING: str, err := strconv.Unquote(string(lit.Value)) return reflect.ValueOf(str), true, err case token.INT: i, err := strconv.ParseInt(lit.Value, 0, 0) return reflect.ValueOf(i), false, err case token.FLOAT: f, err := strconv.ParseFloat(lit.Value, 64) return reflect.ValueOf(f), false, err case token.IMAG: f, err := strconv.ParseFloat(lit.Value[:len(lit.Value)-1], 64) return reflect.ValueOf(complex(0, f)), false, err default: return reflect.Value{}, false, errors.New(fmt.Sprintf("BasicLit: Bad token type (%+v)", lit)) } }
// makeRuneConst returns the int64 code point for the rune literal // lit. The result is nil if lit is not a correct rune literal. // func makeRuneConst(lit string) interface{} { if n := len(lit); n >= 2 { if code, _, _, err := strconv.UnquoteChar(lit[1:n-1], '\''); err == nil { return int64(code) } } return nil }
// replaceUnicode converts hexadecimal Unicode codepoint notations to a one-rune string. // It assumes the input string is correctly formatted. func replaceUnicode(s string) string { if s[1] == '#' { r, _ := strconv.ParseInt(s[3:len(s)-1], 16, 32) return string(r) } r, _, _, _ := strconv.UnquoteChar(s, 0) return string(r) }
func parseCharacters(chars string) []string { parseSingle := func(s string) (r rune, tail string, escaped bool) { if s[0] == '\\' { if s[1] == 'u' || s[1] == 'U' { r, _, tail, err := strconv.UnquoteChar(s, 0) failOnError(err) return r, tail, false } else if strings.HasPrefix(s[1:], "&") { return '&', s[6:], false } return rune(s[1]), s[2:], true } else if strings.HasPrefix(s, """) { return '"', s[6:], false } r, sz := utf8.DecodeRuneInString(s) return r, s[sz:], false } chars = strings.Trim(chars, "[ ]") list := []string{} var r, last, end rune for len(chars) > 0 { if chars[0] == '{' { // character sequence buf := []rune{} for chars = chars[1:]; len(chars) > 0; { r, chars, _ = parseSingle(chars) if r == '}' { break } if r == ' ' { log.Fatalf("space not supported in sequence %q", chars) } buf = append(buf, r) } list = append(list, string(buf)) last = 0 } else { // single character escaped := false r, chars, escaped = parseSingle(chars) if r != ' ' { if r == '-' && !escaped { if last == 0 { log.Fatal("'-' should be preceded by a character") } end, chars, _ = parseSingle(chars) for ; last <= end; last++ { list = append(list, string(last)) } last = 0 } else { list = append(list, string(r)) last = r } } } } return list }
func main() { sr := `\"大\\家\\好!\"` var c rune var mb bool var err error for ; len(sr) > 0; c, mb, sr, err = strconv.UnquoteChar(sr, '"') { fmt.Println(c, mb, sr, err) } }
// copied from go source docs: strconv.Unquote // removed restriction of single quote 1 character length func unquote(s string) (t string, err error) { n := len(s) if n < 2 { return "", strconv.ErrSyntax } quote := s[0] if quote != s[n-1] { return "", strconv.ErrSyntax } s = s[1 : n-1] if quote == '`' { if contains(s, '`') { return "", strconv.ErrSyntax } return s, nil } if quote != '"' && quote != '\'' { return "", strconv.ErrSyntax } if contains(s, '\n') { return "", strconv.ErrSyntax } // Is it trivial? Avoid allocation. if !contains(s, '\\') && !contains(s, quote) { switch quote { case '"': return s, nil case '\'': r, size := utf8.DecodeRuneInString(s) if size == len(s) && (r != utf8.RuneError || size != 1) { return s, nil } } } var runeTmp [utf8.UTFMax]byte buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. for len(s) > 0 { c, multibyte, ss, err := strconv.UnquoteChar(s, quote) if err != nil { return "", err } s = ss if c < utf8.RuneSelf || !multibyte { buf = append(buf, byte(c)) } else { n := utf8.EncodeRune(runeTmp[:], c) buf = append(buf, runeTmp[:n]...) } } return string(buf), nil }
func (p *Compiler) pushByte(lit string) { v, multibyte, tail, err := strconv.UnquoteChar(lit[1:len(lit)-1], '\'') if err != nil { panic("invalid char `" + lit + "`: " + err.Error()) } if tail != "" || multibyte { panic("invalid char: " + lit) } p.code.Block(exec.Push(byte(v))) }
func checkBasicLit(ctx *Ctx, lit *ast.BasicLit, env *Env) (*BasicLit, []error) { aexpr := &BasicLit{BasicLit: lit} switch lit.Kind { case token.CHAR: if r, _, tail, err := strconv.UnquoteChar(lit.Value[1:len(lit.Value)-1], '\''); err != nil { return aexpr, []error{ErrBadBasicLit{at(ctx, lit)}} } else if tail != "" { // parser.ParseExpr() should raise a syntax error before we get here. panic("go-interactive: bad char lit " + lit.Value) } else { aexpr.constValue = constValueOf(NewConstRune(r)) aexpr.knownType = knownType{ConstRune} return aexpr, nil } case token.STRING: if str, err := strconv.Unquote(string(lit.Value)); err != nil { return aexpr, []error{ErrBadBasicLit{at(ctx, lit)}} } else { aexpr.constValue = constValueOf(str) aexpr.knownType = knownType{ConstString} return aexpr, nil } case token.INT: if i, ok := NewConstInteger(lit.Value); !ok { return aexpr, []error{ErrBadBasicLit{at(ctx, lit)}} } else { aexpr.constValue = constValueOf(i) aexpr.knownType = knownType{ConstInt} return aexpr, nil } case token.FLOAT: if f, ok := NewConstFloat(lit.Value); !ok { return aexpr, []error{ErrBadBasicLit{at(ctx, lit)}} } else { aexpr.constValue = constValueOf(f) aexpr.knownType = knownType{ConstFloat} return aexpr, nil } case token.IMAG: if i, ok := NewConstImag(lit.Value); !ok { return aexpr, []error{ErrBadBasicLit{at(ctx, lit)}} } else { aexpr.constValue = constValueOf(i) aexpr.knownType = knownType{ConstComplex} return aexpr, nil } default: return aexpr, []error{ErrBadBasicLit{at(ctx, lit)}} } }
// A better string unquoter that handles unicode sequences. Can't use Go's // standard unquoter because we need to handle single-quoted strings too. func unquote(chars []byte) (string, bool) { if !(chars[0] == '"' || chars[0] == '\'') { // it's not quoted return string(chars), false } if len(chars) == 2 { // it's just the quotes return "", false } remainder := string(chars[1 : len(chars)-1]) quotemark := chars[0] result := make([]rune, 0) var unquotedRune rune var err error if remainder[0] == '\\' && remainder[1] != 'u' { result = append(result, unEscape(remainder[0:2])) remainder = remainder[2:len(remainder)] } else { unquotedRune, _, remainder, err = strconv.UnquoteChar(remainder, quotemark) if err != nil { return "", true } result = append(result, unquotedRune) } for len(remainder) > 0 { if remainder[0] == '\\' && remainder[1] != 'u' { result = append(result, unEscape(remainder[0:2])) remainder = remainder[2:len(remainder)] } else { unquotedRune, _, remainder, err = strconv.UnquoteChar(remainder, quotemark) if err != nil { return "", true } result = append(result, unquotedRune) } } return string(result), false }
func (a *exprInfo) compileCharLit(lit string) *expr { if lit[0] != '\'' { // Caught by parser a.silentErrors++ return nil } v, _, tail, err := strconv.UnquoteChar(lit[1:], '\'') if err != nil || tail != "'" { // Caught by parser a.silentErrors++ return nil } return a.compileIdealInt(big.NewInt(int64(v)), "character literal") }
/** Opcode: UNICODE Push a Python Unicode string object. The argument is a raw-unicode-escape encoding of a Unicode string, and so may contain embedded escape sequences. The argument extends until the next newline character. ** Stack before: [] Stack after: [unicode] **/ func (pm *PickleMachine) opcode_UNICODE() error { str, err := pm.readBytes() if err != nil { return err } f := make([]rune, 0, len(str)) var total int var consumed int total = len(str) for total != consumed { h := str[consumed] //Python 'raw-unicode-escape' doesnt //escape extended ascii if h > 127 { ea := utf16.Decode([]uint16{uint16(h)}) f = append(f, ea...) consumed += 1 continue } //Multibyte unicode points are escaped //so use "UnquoteChar" to handle those var vr rune for _, i := range unquoteInputs { pre := string(str[consumed:]) var post string vr, _, post, err = strconv.UnquoteChar(pre, i) if err == nil { consumed += len(pre) - len(post) break } } if err != nil { c := str[0] return fmt.Errorf("Read thus far %q. Failed to unquote character %c error:%v", string(f), c, err) } f = append(f, vr) } pm.push(string(f)) return nil }
func ExampleUnquoteChar() { v, mb, t, err := strconv.UnquoteChar(`\"Fran & Freddie's Diner\"`, '"') if err != nil { log.Fatal(err) } fmt.Println("value:", string(v)) fmt.Println("multibyte:", mb) fmt.Println("tail:", t) // Output: // value: " // multibyte: false // tail: Fran & Freddie's Diner\" }
func (t Trie) IsPrefix(prefix string) (bool, *TrieNode) { prefix = strings.ToLower(prefix) splitPrefix := strings.Split(prefix, "") var child *TrieNode var exists bool child = t.root for _, char := range splitPrefix { currentRune, _, _, _ := strconv.UnquoteChar(char, 0) child, exists = child.FindChild(currentRune) if !exists { break } } return exists, child }
// unquote is a simplified strconv.Unquote that treats ' and " equally. // Raw quotes are Go-like and bounded by ``. // The return value is the string and a boolean rather than error, which // was almost always the same anyway. func unquote(s string) (t string, ok bool) { n := len(s) if n < 2 { return } quote := s[0] if quote != s[n-1] { return } s = s[1 : n-1] if quote == '`' { if contains(s, '`') { return } return s, true } if quote != '"' && quote != '\'' { return } if contains(s, '\n') { return } // Is it trivial? Avoid allocation. if !contains(s, '\\') && !contains(s, quote) { return s, true } var runeTmp [utf8.UTFMax]byte buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. for len(s) > 0 { c, multibyte, ss, err := strconv.UnquoteChar(s, quote) if err != nil { return } s = ss if c < utf8.RuneSelf || !multibyte { buf = append(buf, byte(c)) } else { n := utf8.EncodeRune(runeTmp[:], c) buf = append(buf, runeTmp[:n]...) } } return string(buf), true }
// TODO: this is here because strconv.Unquote("\\r\\n") doesn't do // what it is supposed to, there has to be a better way func unquote(in string) (string, error) { var ( c rune out []rune err error ) for len(in) > 0 { c, _, in, err = strconv.UnquoteChar(in, '"') if err != nil { break } out = append(out, c) } return string(out), err }
func parseInput(args []string) []Input { input := []Input{} for _, s := range args { rs := []rune{} for len(s) > 0 { var r rune r, _, s, _ = strconv.UnquoteChar(s, '\'') rs = append(rs, r) } s = string(rs) if *doNorm { s = norm.NFC.String(s) } input = append(input, makeInputString(s)) } return input }
// MakeFromLiteral returns the corresponding integer, floating-point, // imaginary, character, or string value for a Go literal string. The // tok value must be one of token.INT, token.FLOAT, token.IMAG, // token.CHAR, or token.STRING. The final argument must be zero. // If the literal string syntax is invalid, the result is an Unknown. func MakeFromLiteral(lit string, tok token.Token, zero uint) Value { if zero != 0 { panic("MakeFromLiteral called with non-zero last argument") } switch tok { case token.INT: if x, err := strconv.ParseInt(lit, 0, 64); err == nil { return int64Val(x) } if x, ok := newInt().SetString(lit, 0); ok { return intVal{x} } case token.FLOAT: if x := makeFloatFromLiteral(lit); x != nil { return x } case token.IMAG: if n := len(lit); n > 0 && lit[n-1] == 'i' { if im := makeFloatFromLiteral(lit[:n-1]); im != nil { return makeComplex(int64Val(0), im) } } case token.CHAR: if n := len(lit); n >= 2 { if code, _, _, err := strconv.UnquoteChar(lit[1:n-1], '\''); err == nil { return MakeInt64(int64(code)) } } case token.STRING: if s, err := strconv.Unquote(lit); err == nil { return MakeString(s) } default: panic(fmt.Sprintf("%v is not a valid token", tok)) } return unknownVal{} }
func (pm *PickleMachine) opcode_STRING() error { str, err := pm.readString() if err != nil { return err } //For whatever reason, the string is quoted. So the first and last character //should always be the single quote, unless the string contains a single quote, then it is double quoted if len(str) < 2 { return fmt.Errorf("For STRING opcode, argument has invalid length %d", len(str)) } if (str[0] != '\'' || str[len(str)-1] != '\'') && (str[0] != '"' || str[len(str)-1] != '"') { return fmt.Errorf("For STRING opcode, argument has poorly formed value %q", str) } v := str[1 : len(str)-1] f := make([]rune, 0, len(v)) for len(v) != 0 { var vr rune var replacement string for _, i := range unquoteInputs { vr, _, replacement, err = strconv.UnquoteChar(v, i) if err == nil { break } } if err != nil { c := v[0] return fmt.Errorf("Read thus far %q. Failed to unquote character %c error:%v", string(f), c, err) } v = replacement f = append(f, vr) } pm.push(string(f)) return nil }
func parseRuneLiteral(p *Parser) Expression { text := p.lex.Text() if len(text) < 3 { panic("bad rune literal: " + text) } r, _, _, err := strconv.UnquoteChar(text[1:], '\'') if err != nil { // The lexer may allow bad rune literals (>0x0010FFFF and // surrogate halves—whatever they are). panic(&MalformedLiteral{ Type: "rune literal", Text: p.text(), Start: p.start(), End: p.end(), }) } l := &IntegerLiteral{Value: big.NewInt(int64(r)), Rune: true, span: p.span()} p.next() return l }
func (t Trie) BuildTree(chars []string, id int, parent *TrieNode) { if len(chars) == 0 { parent.Delim = true parent.WordId = append(parent.WordId, id) return } // Get char as rune currentRune, _, _, _ := strconv.UnquoteChar(chars[0], 0) // Delete first entry chars = append(chars[:0], chars[0+1:]...) trieNode, exists := parent.FindChild(currentRune) if !exists { trieNode = parent.AddChild(currentRune) } t.BuildTree(chars, id, trieNode) }
func exprValue(x ast.Expr) value { switch t := x.(type) { case *ast.BasicLit: switch t.Kind { case token.INT: i, _ := strconv.ParseInt(t.Value, 10, 0) return i case token.FLOAT: f, _ := strconv.ParseFloat(t.Value, 64) return f case token.CHAR: r, _, _, _ := strconv.UnquoteChar(t.Value, '\'') return r case token.STRING: s, _ := strconv.Unquote(t.Value) return s } return t.Value } return nil }
// MakeFromLiteral returns the corresponding integer, floating-point, // imaginary, character, or string value for a Go literal string. // If prec > 0, prec specifies an upper limit for the precision of // a numeric value. If the literal string is invalid, the result is // nil. // BUG(gri) Only prec == 0 is supported at the moment. func MakeFromLiteral(lit string, tok token.Token, prec uint) Value { if prec != 0 { panic("limited precision not supported") } switch tok { case token.INT: if x, err := strconv.ParseInt(lit, 0, 64); err == nil { return int64Val(x) } if x, ok := new(big.Int).SetString(lit, 0); ok { return intVal{x} } case token.FLOAT: if x, ok := new(big.Rat).SetString(lit); ok { return normFloat(x) } case token.IMAG: if n := len(lit); n > 0 && lit[n-1] == 'i' { if im, ok := new(big.Rat).SetString(lit[0 : n-1]); ok { return normComplex(big.NewRat(0, 1), im) } } case token.CHAR: if n := len(lit); n >= 2 { if code, _, _, err := strconv.UnquoteChar(lit[1:n-1], '\''); err == nil { return int64Val(code) } } case token.STRING: if s, err := strconv.Unquote(lit); err == nil { return stringVal(s) } } return nil }
// From go-src:strconv.Unquote but modified so that a quote character can // be provided instead of requiring the string to be pre-quoted func unquote(s string, quote byte) (t string, err error) { n := len(s) if n == 0 { return "", nil } if quote == '`' { if contains(s, '`') { return "", strconv.ErrSyntax } return s, nil } if quote != '"' && quote != '\'' { return "", strconv.ErrSyntax } // Is it trivial? Avoid allocation. if !contains(s, '\\') && !contains(s, quote) { return s, nil } var runeTmp [utf8.UTFMax]byte buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. for len(s) > 0 { c, multibyte, ss, err := strconv.UnquoteChar(s, quote) if err != nil { return "", err } s = ss if c < utf8.RuneSelf || !multibyte { buf = append(buf, byte(c)) } else { n := utf8.EncodeRune(runeTmp[:], c) buf = append(buf, runeTmp[:n]...) } } return string(buf), nil }
// MakeFromLiteral returns the corresponding literal value. // If the literal has illegal format, the result is nil. func MakeFromLiteral(lit string, tok token.Token) Value { switch tok { case token.INT: if x, err := strconv.ParseInt(lit, 0, 64); err == nil { return int64Val(x) } if x, ok := new(big.Int).SetString(lit, 0); ok { return intVal{x} } case token.FLOAT: if x, ok := new(big.Rat).SetString(lit); ok { return normFloat(x) } case token.IMAG: if n := len(lit); n > 0 && lit[n-1] == 'i' { if im, ok := new(big.Rat).SetString(lit[0 : n-1]); ok { return normComplex(big.NewRat(0, 1), im) } } case token.CHAR: if n := len(lit); n >= 2 { if code, _, _, err := strconv.UnquoteChar(lit[1:n-1], '\''); err == nil { return int64Val(code) } } case token.STRING: if s, err := strconv.Unquote(lit); err == nil { return stringVal(s) } } // TODO(gri) should we instead a) return unknown, or b) an error? return nil }
// MakeFromLiteral creates a constant value from a string. func MakeFromLiteral(lit string, tok scan.Type, prec uint) Value { if prec != 0 { panic("limited precision not supported") } switch tok { case scan.Number: if x, err := strconv.ParseInt(lit, 0, 64); err == nil { return int64Val(x) } if x, ok := new(big.Int).SetString(lit, 0); ok { return intVal{x} } case scan.Rune: // special case because UnquoteChar fails on these if lit == "'\\'" { return int64Val('\\') } else if lit == "'''" { return int64Val('\'') } if n := len(lit); n >= 2 { if code, _, _, err := strconv.UnquoteChar(lit[1:n-1], '\''); err == nil { return int64Val(code) } } case scan.String: if s, err := strconv.Unquote(lit); err == nil { return stringVal(s) } } return nil }