Ejemplo n.º 1
0
func unicodeLen(str string) int {
	index := strings.Index(str, `\u`)
	for {
		if index == -1 {
			break
		}

		r1str := str[index+2:index+6] + " " + str[index+8:index+12]

		var r1, r2 rune

		fmt.Sscanf(r1str, "%x %x", &r1, &r2)
		Trace.Println("runes", r1, r2)
		if utf16.IsSurrogate(r1) && utf16.IsSurrogate(r2) {
			r := utf16.DecodeRune(r1, r2)
			if r == replacementChar {
				Warning.Println("Unable to deccode runes")
				str = strings.Replace(str, `\u`, "0x", 2)
			} else {
				str = str[:index] + " " + str[index+12:len(str)]
			}
		}
		index = strings.Index(str, `\u`)
	}
	return len(str)
}
Ejemplo n.º 2
0
func decodeUTF16beRune(p []byte) (r rune, size int, status Status) {
	if len(p) < 2 {
		status = NO_ROOM
		return
	}

	c := rune(p[0])<<8 + rune(p[1])

	if utf16.IsSurrogate(c) {
		if len(p) < 4 {
			status = NO_ROOM
			return
		}

		c2 := rune(p[2])<<8 + rune(p[3])
		c = utf16.DecodeRune(c, c2)

		if c == 0xfffd {
			return c, 2, INVALID_CHAR
		} else {
			return c, 4, SUCCESS
		}
	}

	return c, 2, SUCCESS
}
Ejemplo n.º 3
0
// readConsole reads utf16 characters from console File,
// encodes them into utf8 and stores them in buffer buf.
// It returns the number of utf8 bytes read and an error, if any.
func (f *File) readConsole(buf []byte) (n int, err error) {
	if len(buf) == 0 {
		return 0, nil
	}
	if len(f.readbuf) > 0 {
		return f.copyReadConsoleBuffer(buf)
	}
	wchar, err := f.readOneUTF16FromConsole()
	if err != nil {
		return 0, err
	}
	r := rune(wchar)
	if utf16.IsSurrogate(r) {
		wchar, err := f.readOneUTF16FromConsole()
		if err != nil {
			return 0, err
		}
		r = utf16.DecodeRune(r, rune(wchar))
	}
	if nr := utf8.RuneLen(r); nr > len(buf) {
		start := len(f.readbuf)
		for ; nr > 0; nr-- {
			f.readbuf = append(f.readbuf, 0)
		}
		utf8.EncodeRune(f.readbuf[start:cap(f.readbuf)], r)
	} else {
		utf8.EncodeRune(buf, r)
		buf = buf[nr:]
		n += nr
	}
	if n > 0 {
		return n, nil
	}
	return f.copyReadConsoleBuffer(buf)
}
Ejemplo n.º 4
0
func (u *utf16Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	if u.currentBOMPolicy == ExpectBOM {
		if len(src) < 2 {
			return 0, 0, transform.ErrShortSrc
		}
		switch {
		case src[0] == 0xfe && src[1] == 0xff:
			u.endianness = BigEndian
		case src[0] == 0xff && src[1] == 0xfe:
			u.endianness = LittleEndian
		default:
			return 0, 0, ErrMissingBOM
		}
		u.currentBOMPolicy = IgnoreBOM
		nSrc = 2
	}

	var r rune
	var dSize, sSize int
	for nSrc < len(src) {
		if nSrc+1 < len(src) {
			x := uint16(src[nSrc+0])<<8 | uint16(src[nSrc+1])
			if u.endianness == LittleEndian {
				x = x>>8 | x<<8
			}
			r, sSize = rune(x), 2
			if utf16.IsSurrogate(r) {
				if nSrc+3 < len(src) {
					x = uint16(src[nSrc+2])<<8 | uint16(src[nSrc+3])
					if u.endianness == LittleEndian {
						x = x>>8 | x<<8
					}
					// Safe for next iteration if it is not a high surrogate.
					if isHighSurrogate(rune(x)) {
						r, sSize = utf16.DecodeRune(r, rune(x)), 4
					}
				} else if !atEOF {
					err = transform.ErrShortSrc
					break
				}
			}
			if dSize = utf8.RuneLen(r); dSize < 0 {
				r, dSize = utf8.RuneError, 3
			}
		} else if atEOF {
			// Single trailing byte.
			r, dSize, sSize = utf8.RuneError, 3, 1
		} else {
			err = transform.ErrShortSrc
			break
		}
		if nDst+dSize > len(dst) {
			err = transform.ErrShortDst
			break
		}
		nDst += utf8.EncodeRune(dst[nDst:], r)
		nSrc += sSize
	}
	return nDst, nSrc, err
}
Ejemplo n.º 5
0
func (d *jsonDecDriver) appendStringAsBytes() {
	if d.tok == 0 {
		var b byte
		r := d.r
		for b = r.readn1(); jsonIsWS(b); b = r.readn1() {
		}
		d.tok = b
	}

	// handle null as a string
	if d.tok == 'n' {
		d.readStrIdx(10, 13) // ull
		d.bs = d.bs[:0]
		return
	}

	if d.tok != '"' {
		d.d.errorf("json: expect char '%c' but got char '%c'", '"', d.tok)
	}
	d.tok = 0

	v := d.bs[:0]
	var c uint8
	r := d.r
	for {
		c = r.readn1()
		if c == '"' {
			break
		} else if c == '\\' {
			c = r.readn1()
			switch c {
			case '"', '\\', '/', '\'':
				v = append(v, c)
			case 'b':
				v = append(v, '\b')
			case 'f':
				v = append(v, '\f')
			case 'n':
				v = append(v, '\n')
			case 'r':
				v = append(v, '\r')
			case 't':
				v = append(v, '\t')
			case 'u':
				rr := d.jsonU4(false)
				// fmt.Printf("$$$$$$$$$: is surrogate: %v\n", utf16.IsSurrogate(rr))
				if utf16.IsSurrogate(rr) {
					rr = utf16.DecodeRune(rr, d.jsonU4(true))
				}
				w2 := utf8.EncodeRune(d.bstr[:], rr)
				v = append(v, d.bstr[:w2]...)
			default:
				d.d.errorf("json: unsupported escaped value: %c", c)
			}
		} else {
			v = append(v, c)
		}
	}
	d.bs = v
}
Ejemplo n.º 6
0
func bytesToUTF16String(in []byte) string {
	var out bytes.Buffer
	out.WriteString(`u"`)
	for i := 0; i < len(in)/2; i++ {
		u := rune(in[2*i])<<8 | rune(in[2*i+1])
		if utf16.IsSurrogate(u) && i+1 < len(in)/2 {
			u2 := rune(in[2*i+2])<<8 | rune(in[2*i+3])
			r := utf16.DecodeRune(u, u2)
			if r != unicode.ReplacementChar {
				if unicode.IsPrint(r) {
					out.WriteRune(r)
				} else {
					fmt.Fprintf(&out, `\U%08x`, r)
				}
				i++
				continue
			}
		}

		if u == '\n' {
			out.WriteString(`\n`)
		} else if u == '"' {
			out.WriteString(`\"`)
		} else if u == '\\' {
			out.WriteString(`\\`)
		} else if !utf16.IsSurrogate(u) && unicode.IsPrint(u) {
			out.WriteRune(u)
		} else if u <= 0xff {
			fmt.Fprintf(&out, `\x%02x`, u)
		} else {
			fmt.Fprintf(&out, `\u%04x`, u)
		}
	}
	out.WriteString(`"`)

	// Print the trailing byte if needed.
	if len(in)&1 == 1 {
		fmt.Fprintf(&out, " `\\x%02x`", in[len(in)-1])
	}

	return out.String()
}
Ejemplo n.º 7
0
func appendUTF16LE(v []byte, val string) []byte {
	for _, r := range val {
		if utf16.IsSurrogate(r) {
			r1, r2 := utf16.EncodeRune(r)
			v = append16(v, uint16(r1))
			v = append16(v, uint16(r2))
		} else {
			v = append16(v, uint16(r))
		}
	}
	return v
}
Ejemplo n.º 8
0
func utf16le(val string) []byte {
	var v []byte
	for _, r := range val {
		if utf16.IsSurrogate(r) {
			r1, r2 := utf16.EncodeRune(r)
			v = append(v, byte(r1), byte(r1>>8))
			v = append(v, byte(r2), byte(r2>>8))
		} else {
			v = append(v, byte(r), byte(r>>8))
		}
	}
	return v
}
Ejemplo n.º 9
0
func (u *utf16Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	if u.bomPolicy == ExpectBOM {
		if len(src) < 2 {
			return 0, 0, transform.ErrShortSrc
		}
		switch {
		case src[0] == 0xfe && src[1] == 0xff:
			u.endianness = BigEndian
		case src[0] == 0xff && src[1] == 0xfe:
			u.endianness = LittleEndian
		default:
			return 0, 0, ErrMissingBOM
		}
		u.bomPolicy = IgnoreBOM
		nSrc = 2
	}

	for nSrc+1 < len(src) {
		x := uint16(src[nSrc+0])<<8 | uint16(src[nSrc+1])
		if u.endianness == LittleEndian {
			x = x>>8 | x<<8
		}
		r, sSize := rune(x), 2
		if utf16.IsSurrogate(r) {
			if nSrc+3 >= len(src) {
				break
			}
			x = uint16(src[nSrc+2])<<8 | uint16(src[nSrc+3])
			if u.endianness == LittleEndian {
				x = x>>8 | x<<8
			}
			r, sSize = utf16.DecodeRune(r, rune(x)), 4
		}
		dSize := utf8.RuneLen(r)
		if dSize < 0 {
			r, dSize = utf8.RuneError, 3
		}
		if nDst+dSize > len(dst) {
			err = transform.ErrShortDst
			break
		}
		nDst += utf8.EncodeRune(dst[nDst:], r)
		nSrc += sSize
	}

	if err == nil && nSrc != len(src) {
		err = transform.ErrShortSrc
	}
	return nDst, nSrc, err
}
Ejemplo n.º 10
0
func decodeUTF16(input chan byte, output chan rune, BE bool) {
	defer close(output)

	var surrogatePair rune
	for {
		r, ok, e := readNBytes(input, 4, BE)

		if !ok {
			if nil != e {
				panic(e)
			}
			break
		}

		if 0 != surrogatePair {
			if !utf16.IsSurrogate(r) {
				break
				//ошибка поймается в конце
			}
			r = utf16.DecodeRune(surrogatePair, r)

			surrogatePair = 0
		} else {
			if utf16.IsSurrogate(r) {
				surrogatePair = r
				continue
			}
		}

		output <- r
	}

	if 0 != surrogatePair {
		panic("UTF16 ERROR: Не хватает второй половины суррогатной пары\n")
	}
}
Ejemplo n.º 11
0
// utf7dec extracts UTF-16-BE bytes from Base64 data and converts them to UTF-8.
// A nil slice is returned if the encoding is invalid.
func utf7dec(b64 []byte) []byte {
	var b []byte

	// Allocate a single block of memory large enough to store the Base64 data
	// (if padding is required), UTF-16-BE bytes, and decoded UTF-8 bytes.
	// Since a 2-byte UTF-16 sequence may expand into a 3-byte UTF-8 sequence,
	// double the space allocation for UTF-8.
	if n := len(b64); b64[n-1] == '=' {
		return nil
	} else if n&3 == 0 {
		b = make([]byte, u7enc.DecodedLen(n)*3)
	} else {
		n += 4 - n&3
		b = make([]byte, n+u7enc.DecodedLen(n)*3)
		copy(b[copy(b, b64):n], []byte("=="))
		b64, b = b[:n], b[n:]
	}

	// Decode Base64 into the first 1/3rd of b
	n, err := u7enc.Decode(b, b64)
	if err != nil || n&1 == 1 {
		return nil
	}

	// Decode UTF-16-BE into the remaining 2/3rds of b
	b, s := b[:n], b[n:]
	j := 0
	for i := 0; i < n; i += 2 {
		r := rune(b[i])<<8 | rune(b[i+1])
		if utf16.IsSurrogate(r) {
			if i += 2; i == n {
				return nil
			}
			r2 := rune(b[i])<<8 | rune(b[i+1])
			if r = utf16.DecodeRune(r, r2); r == uRepl {
				return nil
			}
		} else if u7min <= r && r <= u7max {
			return nil
		}
		j += utf8.EncodeRune(s[j:], r)
	}
	return s[:j]
}
Ejemplo n.º 12
0
func (d *jsonDecDriver) appendStringAsBytes() {
	d.expectChar('"')
	v := d.bs[:0]
	var c uint8
	for {
		c = d.r.readn1()
		if c == '"' {
			break
		} else if c == '\\' {
			c = d.r.readn1()
			switch c {
			case '"', '\\', '/', '\'':
				v = append(v, c)
			case 'b':
				v = append(v, '\b')
			case 'f':
				v = append(v, '\f')
			case 'n':
				v = append(v, '\n')
			case 'r':
				v = append(v, '\r')
			case 't':
				v = append(v, '\t')
			case 'u':
				rr := d.jsonU4(false)
				// fmt.Printf("$$$$$$$$$: is surrogate: %v\n", utf16.IsSurrogate(rr))
				if utf16.IsSurrogate(rr) {
					rr = utf16.DecodeRune(rr, d.jsonU4(true))
				}
				w2 := utf8.EncodeRune(d.bstr[:], rr)
				v = append(v, d.bstr[:w2]...)
			default:
				d.d.errorf("json: unsupported escaped value: %c", c)
			}
		} else {
			v = append(v, c)
		}
	}
	if jsonTrackSkipWhitespace {
		d.wsSkipped = false
	}
	d.bs = v
}
Ejemplo n.º 13
0
func getKeys() []keyInfo {
	var numberOfEventsRead uint32
	var events [10]inputRecordT
	var orgConMode uint32

	result := make([]keyInfo, 0, 0)

	getConsoleMode.Call(uintptr(hConin),
		uintptr(unsafe.Pointer(&orgConMode)))
	setConsoleMode.Call(uintptr(hConin), 0)
	var precode rune = 0
	for len(result) <= 0 {
		readConsoleInput.Call(
			uintptr(hConin),
			uintptr(unsafe.Pointer(&events[0])),
			uintptr(len(events)),
			uintptr(unsafe.Pointer(&numberOfEventsRead)))
		for i := uint32(0); i < numberOfEventsRead; i++ {
			if events[i].eventType == KEY_EVENT && events[i].bKeyDown != 0 {
				var keycode = rune(events[i].unicodeChar)
				if keycode != 0 {
					if precode != 0 {
						keycode = utf16.DecodeRune(precode, keycode)
						precode = 0
					} else if utf16.IsSurrogate(keycode) {
						precode = keycode
						continue
					}
				}
				result = append(result, keyInfo{
					keycode,
					events[i].wVirtualKeyCode,
					events[i].dwControlKeyState,
				})
			}
		}
	}
	setConsoleMode.Call(uintptr(hConin), uintptr(orgConMode))
	return result
}
Ejemplo n.º 14
0
Archivo: json.go Proyecto: kurrik/json
// Decodes UTF-16 surrogate pairs (such as \uD834\uDD1E).
func decodeSurrogates(s string) string {
	var (
		r1  rune = 0
		buf      = new(bytes.Buffer)
	)
	for _, r := range s {
		if utf16.IsSurrogate(r) {
			if r1 == 0 {
				r1 = r
			} else {
				buf.WriteRune(utf16.DecodeRune(r1, r))
				r1 = 0
			}
		} else {
			if r1 != 0 {
				buf.WriteRune(r1)
				r1 = 0
			}
			buf.WriteRune(r)
		}
	}
	return buf.String()
}
Ejemplo n.º 15
0
func unquoteBytes(s []byte) (t []byte, ok bool) {
	// Check for unusual characters. If there are none,
	// then no unquoting is needed, so return a slice of the
	// original bytes.
	r := 0
	for r < len(s) {
		c := s[r]
		if c == '\\' || c == '"' || c < ' ' {
			break
		}
		if c < utf8.RuneSelf {
			r++
			continue
		}
		rr, size := utf8.DecodeRune(s[r:])
		if rr == utf8.RuneError && size == 1 {
			break
		}
		r += size
	}
	if r == len(s) {
		return s, true
	}

	b := make([]byte, len(s)+2*utf8.UTFMax)
	w := copy(b, s[0:r])
	for r < len(s) {
		// Out of room?  Can only happen if s is full of
		// malformed UTF-8 and we're replacing each
		// byte with RuneError.
		if w >= len(b)-2*utf8.UTFMax {
			nb := make([]byte, (len(b)+utf8.UTFMax)*2)
			copy(nb, b[0:w])
			b = nb
		}
		switch c := s[r]; {
		case c == '\\':
			r++
			if r >= len(s) {
				return
			}
			switch s[r] {
			default:
				return
			case '"', '\\', '/', '\'':
				b[w] = s[r]
				r++
				w++
			case 'b':
				b[w] = '\b'
				r++
				w++
			case 'f':
				b[w] = '\f'
				r++
				w++
			case 'n':
				b[w] = '\n'
				r++
				w++
			case 'r':
				b[w] = '\r'
				r++
				w++
			case 't':
				b[w] = '\t'
				r++
				w++
			case 'u':
				r--
				rr := getu4(s[r:])
				if rr < 0 {
					return
				}
				r += 6
				if utf16.IsSurrogate(rr) {
					rr1 := getu4(s[r:])
					if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
						// A valid pair; consume.
						r += 6
						w += utf8.EncodeRune(b[w:], dec)
						break
					}
					// Invalid surrogate; fall back to replacement rune.
					rr = unicode.ReplacementChar
				}
				w += utf8.EncodeRune(b[w:], rr)
			}

		// Quote, control characters are invalid.
		case c == '"', c < ' ':
			return

		// ASCII
		case c < utf8.RuneSelf:
			b[w] = c
			r++
			w++

		// Coerce to well-formed UTF-8.
		default:
			rr, size := utf8.DecodeRune(s[r:])
			r += size
			w += utf8.EncodeRune(b[w:], rr)
		}
	}
	return b[0:w], true
}
Ejemplo n.º 16
0
func scanString(txt []byte) (tok []byte, readn int) {
	if len(txt) < 2 {
		return nil, 0
	}

	e := 1
	for txt[e] != '"' {
		c := txt[e]
		if c == '\\' || c == '"' || c < ' ' {
			break
		}
		if c < utf8.RuneSelf {
			e++
			continue
		}
		r, size := utf8.DecodeRune(txt[e:])
		if r == utf8.RuneError && size == 1 {
			return nil, 0
		}
		e += size
	}

	if txt[e] == '"' { // done we have nothing to unquote
		return txt[:e+1], e + 1
	}

	out := make([]byte, len(txt)+2*utf8.UTFMax)
	oute := copy(out, txt[:e]) // copy so far

loop:
	for e < len(txt) {
		switch c := txt[e]; {
		case c == '"':
			out[oute] = c
			e++
			break loop

		case c == '\\':
			if txt[e+1] == 'u' {
				r := getu4(txt[e:])
				if r < 0 { // invalid
					return nil, 0
				}
				e += 6
				if utf16.IsSurrogate(r) {
					nextr := getu4(txt[e:])
					dec := utf16.DecodeRune(r, nextr)
					if dec != unicode.ReplacementChar { // A valid pair consume
						oute += utf8.EncodeRune(out[oute:], dec)
						e += 6
						break loop
					}
					// Invalid surrogate; fall back to replacement rune.
					r = unicode.ReplacementChar
				}
				oute += utf8.EncodeRune(out[oute:], r)

			} else { // escaped with " \ / ' b f n r t
				out[oute] = escapeCode[txt[e+1]]
				e += 2
				oute++
			}

		case c < ' ': // control character is invalid
			return nil, 0

		case c < utf8.RuneSelf: // ASCII
			out[oute] = c
			oute++
			e++

		default: // coerce to well-formed UTF-8
			r, size := utf8.DecodeRune(txt[e:])
			e += size
			oute += utf8.EncodeRune(out[oute:], r)
		}
	}

	if out[oute] == '"' {
		return out[:oute+1], e
	}
	return nil, 0
}
Ejemplo n.º 17
0
func (r *ffReader) handleEscaped(c byte, j int, out DecodingBuffer) (int, error) {
	if j >= r.l {
		return 0, io.EOF
	}

	c = r.s[j]
	j++

	if c == 'u' {
		ru, err := r.readU4(j)
		if err != nil {
			return 0, err
		}

		if utf16.IsSurrogate(ru) {
			ru2, err := r.readU4(j + 6)
			if err != nil {
				return 0, err
			}
			out.Write(r.s[r.i : j-2])
			r.i = j + 10
			j = r.i
			rval := utf16.DecodeRune(ru, ru2)
			if rval != unicode.ReplacementChar {
				out.WriteRune(rval)
			} else {
				return 0, fmt.Errorf("lex_string_invalid_unicode_surrogate: %v %v", ru, ru2)
			}
		} else {
			out.Write(r.s[r.i : j-2])
			r.i = j + 4
			j = r.i
			out.WriteRune(ru)
		}
		return j, nil
	} else if byteLookupTable[c]&cVEC == 0 {
		return 0, fmt.Errorf("lex_string_invalid_escaped_char: %v", c)
	} else {
		out.Write(r.s[r.i : j-2])
		r.i = j
		j = r.i

		switch c {
		case '"':
			out.WriteByte('"')
		case '\\':
			out.WriteByte('\\')
		case '/':
			out.WriteByte('/')
		case 'b':
			out.WriteByte('\b')
		case 'f':
			out.WriteByte('\f')
		case 'n':
			out.WriteByte('\n')
		case 'r':
			out.WriteByte('\r')
		case 't':
			out.WriteByte('\t')
		}
	}

	return j, nil
}
Ejemplo n.º 18
0
// readConsole reads utf16 characters from console File,
// encodes them into utf8 and stores them in buffer b.
// It returns the number of utf8 bytes read and an error, if any.
func (f *File) readConsole(b []byte) (n int, err error) {
	if len(b) == 0 {
		return 0, nil
	}

	if f.readuint16 == nil {
		// Note: syscall.ReadConsole fails for very large buffers.
		// The limit is somewhere around (but not exactly) 16384.
		// Stay well below.
		f.readuint16 = make([]uint16, 0, 10000)
		f.readbyte = make([]byte, 0, 4*cap(f.readuint16))
	}

	for f.readbyteOffset >= len(f.readbyte) {
		n := cap(f.readuint16) - len(f.readuint16)
		if n > len(b) {
			n = len(b)
		}
		var nw uint32
		err := readConsole(f.fd, &f.readuint16[:len(f.readuint16)+1][len(f.readuint16)], uint32(n), &nw, nil)
		if err != nil {
			return 0, err
		}
		uint16s := f.readuint16[:len(f.readuint16)+int(nw)]
		f.readuint16 = f.readuint16[:0]
		buf := f.readbyte[:0]
		for i := 0; i < len(uint16s); i++ {
			r := rune(uint16s[i])
			if utf16.IsSurrogate(r) {
				if i+1 == len(uint16s) {
					if nw > 0 {
						// Save half surrogate pair for next time.
						f.readuint16 = f.readuint16[:1]
						f.readuint16[0] = uint16(r)
						break
					}
					r = utf8.RuneError
				} else {
					r = utf16.DecodeRune(r, rune(uint16s[i+1]))
					if r != utf8.RuneError {
						i++
					}
				}
			}
			n := utf8.EncodeRune(buf[len(buf):cap(buf)], r)
			buf = buf[:len(buf)+n]
		}
		f.readbyte = buf
		f.readbyteOffset = 0
		if nw == 0 {
			break
		}
	}

	src := f.readbyte[f.readbyteOffset:]
	var i int
	for i = 0; i < len(src) && i < len(b); i++ {
		x := src[i]
		if x == 0x1A { // Ctrl-Z
			if i == 0 {
				f.readbyteOffset++
			}
			break
		}
		b[i] = x
	}
	f.readbyteOffset += i
	return i, nil
}
Ejemplo n.º 19
0
// regexp consumes a regular expression from d.data[d.off-1:].
// the two bytes of the regexp ("/a") have been read already.
func (d *decodeState) regexp() (string, string, error) {
	start := d.off - 1

	// Look ahead for /.
	op := d.scanWhile(scanRegexpPattern)
	if op != scanRegexpOptions {
		return "", "", fmt.Errorf("expected beginning of regular expression options")
	}
	pattern := d.data[start : d.off-1]

	start = d.off
	op = d.scanWhile(scanRegexpOptions)

	// Back up so caller can have the byte we just read.
	d.off--
	d.scan.undo(op)

	options := d.data[start:d.off]

	// Check for unusual characters. If there are none,
	// then no copying is needed, so return string of the
	// original bytes.
	r := 0
	for r < len(pattern) {
		c := pattern[r]
		if c == '\\' || c == '/' || c < ' ' {
			break
		}
		if c < utf8.RuneSelf {
			r++
			continue
		}
		rr, size := utf8.DecodeRune(pattern[r:])
		if rr == utf8.RuneError && size == 1 {
			break
		}
		r += size
	}
	if r == len(pattern) {
		return string(pattern), string(options), nil
	}

	b := make([]byte, len(pattern)+2*utf8.UTFMax)
	w := copy(b, pattern[0:r])
	for r < len(pattern) {
		// Out of room?  Can only happen if pattern is full of
		// malformed UTF-8 and we're replacing each
		// byte with RuneError.
		if w >= len(b)-2*utf8.UTFMax {
			nb := make([]byte, (len(b)+utf8.UTFMax)*2)
			copy(nb, b[0:w])
			b = nb
		}
		switch c := pattern[r]; {
		case c == '\\':
			r++
			if r >= len(pattern) {
				return "", "", errPhase
			}
			switch pattern[r] {
			default:
				return "", "", fmt.Errorf("invalid escape character")
			case '"', '\\', '/', '\'':
				b[w] = pattern[r]
				r++
				w++
			case 'b':
				b[w] = '\b'
				r++
				w++
			case 'f':
				b[w] = '\f'
				r++
				w++
			case 'n':
				b[w] = '\n'
				r++
				w++
			case 'r':
				b[w] = '\r'
				r++
				w++
			case 't':
				b[w] = '\t'
				r++
				w++
			case 'u':
				r--
				rr := getu4(pattern[r:])
				if rr < 0 {
					return "", "", fmt.Errorf("non-hexadecimal character found")
				}
				r += 6
				if utf16.IsSurrogate(rr) {
					rr1 := getu4(pattern[r:])
					if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
						// A valid pair; consume.
						r += 6
						w += utf8.EncodeRune(b[w:], dec)
						break
					}
					// Invalid surrogate; fall back to replacement rune.
					rr = unicode.ReplacementChar
				}
				w += utf8.EncodeRune(b[w:], rr)
			}

		// Forward slash, control characters are invalid.
		case c == '/', c < ' ':
			d.error(fmt.Errorf("regular expression pattern cannot contain unescaped '/'"))

		// ASCII
		case c < utf8.RuneSelf:
			b[w] = c
			r++
			w++

		// Coerce to well-formed UTF-8.
		default:
			rr, size := utf8.DecodeRune(pattern[r:])
			r += size
			w += utf8.EncodeRune(b[w:], rr)
		}
	}
	return string(b[0:w]), string(options), nil
}
Ejemplo n.º 20
0
Archivo: json.go Proyecto: kurrik/json
func (s *State) readString() (err error) {
	var (
		c       byte
		start   int
		buf     *bytes.Buffer
		atstart bool = false
		more    bool = true
		escaped bool = false
		escape  bool = false
	)
	for atstart == false {
		c = s.data[s.i]
		switch {
		case c == ' ':
			fallthrough
		case c == '\t':
			s.i++
		case c == '"':
			atstart = true
			break
		case c == '}':
			s.i++
			return EndMap{}
		case c == ']':
			s.i++
			return EndArray{}
		default:
			return errors.New("Invalid string char: " + string(c))
		}
	}
	s.i++
	start = s.i
	buf = new(bytes.Buffer)
	for more {
		c = s.data[s.i]
		switch {
		case escape == false && c == '\\':
			escape = true
			escaped = true
			break
		case escape == true && c == '\\':
			escape = false
			break
		case escape == true && c == '/':
			// Skip the backslash
			buf.Write(s.data[start : s.i-1])
			start = s.i
			escape = false
			break
		case escape == true && c == 'u':
			// TODO: Handle more encoded cases so that we don't need
			// the second pass for surrogate pairs down below.
			// However, Go specifically calls out surrogates
			// split with \u1234\u5678 encoding as invalid in
			// http://golang.org/ref/spec#Rune_literals so it's
			// unlikely to get direct support.
			buf.Write(s.data[start : s.i-1])
			var (
				r1, r2 rune
				perr   error
			)
			if r1, perr = s.getSmallURune(s.i - 1); perr != nil {
				return perr
			}
			s.i += 4 // On the last char of the first \u1234 value.
			if utf16.IsSurrogate(r1) {
				if r2, perr = s.getSmallURune(s.i + 1); perr != nil {
					return perr
				}
				buf.WriteRune(utf16.DecodeRune(r1, r2))
				s.i += 6 // On the last char of the second value.
			} else {
				buf.WriteRune(r1)
			}
			start = s.i + 1
			escape = false
			break
		case c == '"':
			if escape == false {
				more = false
			} else {
				escape = false
			}
			break
		case s.i >= len(s.data)-1:
			return errors.New("No string terminator")
		default:
			escape = false
			break
		}
		s.i++
	}
	buf.Write(s.data[start : s.i-1])
	s.v = buf.String()
	if escaped == true {
		var utfstr = s.v.(string)
		utfstr = "\"" + utfstr + "\""
		if s.v, err = strconv.Unquote(utfstr); err == nil {
			s.v = decodeSurrogates(s.v.(string))
		}
	}
	return
}