Example #1
0
// wideString converts s to a UTF-16 string. It will be terminated with a null
// character if terminate is true.
func wideString(s string, terminate bool) []uint16 {
	n := 0
	for _, c := range s {
		n++
		if c >= 0x10000 {
			n++
		}
	}
	if terminate {
		n++
	}

	a := make([]uint16, n)
	i := 0
	for _, c := range s {
		if c < 0x10000 {
			a[i] = uint16(c)
			i++
		} else {
			r1, r2 := utf16.EncodeRune(c)
			a[i] = uint16(r1)
			a[i+1] = uint16(r2)
			i += 2
		}
	}

	return a
}
Example #2
0
// utf7enc converts string s from UTF-8 to UTF-16-BE, encodes the result as
// Base64, removes the padding, and adds UTF-7 shifts.
func utf7enc(s []byte) []byte {
	// len(s) is sufficient for UTF-8 to UTF-16 conversion if there are no
	// control code points (see table below).
	b := make([]byte, 0, len(s)+4)
	for len(s) > 0 {
		r, size := utf8.DecodeRune(s)
		if r > utf8.MaxRune {
			r, size = utf8.RuneError, 1 // Bug fix (issue 3785)
		}
		s = s[size:]
		if r1, r2 := utf16.EncodeRune(r); r1 != uRepl {
			b = append(b, byte(r1>>8), byte(r1))
			r = r2
		}
		b = append(b, byte(r>>8), byte(r))
	}

	// Encode as Base64
	n := u7enc.EncodedLen(len(b)) + 2
	b64 := make([]byte, n)
	u7enc.Encode(b64[1:], b)

	// Strip padding
	n -= 2 - (len(b)+2)%3
	b64 = b64[:n]

	// Add UTF-7 shifts
	b64[0] = '&'
	b64[n-1] = '-'
	return b64
}
Example #3
0
func BStrFromString(s string) BStr {
	n := 0
	for _, c := range s {
		n++
		if c >= 0x10000 {
			n++
		}
	}

	a := make([]uint16, n+3) // 2 words for the length + one for the terminator
	i := 2
	for _, c := range s {
		if c < 0x10000 {
			a[i] = uint16(c)
			i++
		} else {
			r1, r2 := utf16.EncodeRune(c)
			a[i] = uint16(r1)
			a[i+1] = uint16(r2)
			i += 2
		}
	}

	byteLen := n * 2
	a[0] = uint16(byteLen)
	a[1] = uint16(byteLen >> 16)

	return BStr{&a[2]}
}
Example #4
0
func (b *Buffer) WriteUTF16(s string) {
	// first 4 bytes is the length, as int32. written last.
	// next n bytes is utf-16 string.
	if len(b.Data)-b.Offset < 4+4*len(s) {
		b.grow(4 + 4*len(s)) // worst case estimate, everything is surrogate pair
	}
	data := b.Data[b.Offset+4:]
	n := 0
	for _, v := range s {
		switch {
		case v < 0, surr1 <= v && v < surr3, v > maxRune:
			v = replacementChar
			fallthrough
		case v < surrSelf:
			writeUint16(data[n:], v)
			n += 2
		default:
			// surrogate pair, two uint16 values
			r1, r2 := utf16.EncodeRune(v)
			writeUint16(data[n:], r1)
			writeUint16(data[n+2:], r2)
			n += 4
		}
	}

	// write length at b.Data[b.Offset:], before contents.
	// length is number of uint16 values, not number of bytes.
	b.WriteInt32(int32(n / 2))

	b.Offset += n
}
Example #5
0
func (p *padder) writeInternal(b string) error {
	if !p.setup {
		if err := p.start(); err != nil {
			return err
		}
	}
	switch p.t {
	case script:
		d := messageData{b}
		if err := scriptMessage.Execute(p.w, d); err != nil {
			return err
		}
	case length:
		runeCount := utf8.RuneCountInString(b)
		jsLength := runeCount
		for _, r := range []rune(b) {
			// Internally js uses utf-16 for strings (after parsing them out of a
			// utf-8 context). In utf-16, non-bmp characters (code points >= U+10000)
			// are represented as surrogate pairs (length 2, not 1). Double count
			// code points represented as surrogate pairs in JS
			// http://mathiasbynens.be/notes/javascript-encoding
			if r1, r2 := utf16.EncodeRune(r); r1 != '\uFFFD' && r2 != '\uFFFD' {
				jsLength++
			}
		}
		if _, err := fmt.Fprintf(p.w, "%d\n%s", jsLength, b); err != nil {
			return err
		}
	default:
		if _, err := p.w.Write([]byte(b)); err != nil {
			return err
		}
	}
	return nil
}
Example #6
0
// hashCode imitates the behavior of the JDK's String#hashCode method.
// https://docs.oracle.com/javase/7/docs/api/java/lang/String.html#hashCode()
//
// As strings are encoded in utf16 on the JVM, this implementation checks wether
// s contains non-bmp runes and uses utf16 surrogate pairs for those.
func hashCode(s string) (hc int32) {
	for _, r := range s {
		r1, r2 := utf16.EncodeRune(r)
		if r1 == 0xfffd && r1 == r2 {
			hc = hc*31 + r
		} else {
			hc = (hc*31+r1)*31 + r2
		}
	}
	return
}
Example #7
0
func appendUTF16(b []byte, r rune) []byte {
	if r <= 0xffff {
		// Note this logic intentionally tolerates unpaired surrogates.
		return append(b, byte(r>>8), byte(r&0xff))
	}

	r1, r2 := utf16.EncodeRune(r)
	b = append(b, byte(r1>>8), byte(r1&0xff))
	b = append(b, byte(r2>>8), byte(r2&0xff))
	return b
}
Example #8
0
func appendUTF16LE(v []byte, val string) []byte {
	for _, r := range val {
		if utf16.IsSurrogate(r) {
			r1, r2 := utf16.EncodeRune(r)
			v = append16(v, uint16(r1))
			v = append16(v, uint16(r2))
		} else {
			v = append16(v, uint16(r))
		}
	}
	return v
}
Example #9
0
func appendUTF16(buf []uint16, s []byte) []uint16 {
	for len(s) > 0 {
		r, sz := utf8.DecodeRune(s)
		s = s[sz:]
		r1, r2 := utf16.EncodeRune(r)
		if r1 != 0xFFFD {
			buf = append(buf, uint16(r1), uint16(r2))
		} else {
			buf = append(buf, uint16(r))
		}
	}
	return buf
}
Example #10
0
func utf16le(val string) []byte {
	var v []byte
	for _, r := range val {
		if utf16.IsSurrogate(r) {
			r1, r2 := utf16.EncodeRune(r)
			v = append(v, byte(r1), byte(r1>>8))
			v = append(v, byte(r2), byte(r2>>8))
		} else {
			v = append(v, byte(r), byte(r>>8))
		}
	}
	return v
}
Example #11
0
func encodeUTF16(b []byte) []uint16 {
	a := []uint16{}
	for len(b) > 0 {
		r, sz := utf8.DecodeRune(b)
		b = b[sz:]
		r1, r2 := utf16.EncodeRune(r)
		if r1 != 0xFFFD {
			a = append(a, uint16(r1), uint16(r2))
		} else {
			a = append(a, uint16(r))
		}
	}
	return a
}
Example #12
0
// rc4HmacKey converts a UTF8 password into a key suitable for use with the
// rc4hmac.
func rc4HmacKey(password string) []byte {
	// Convert password from UTF8 to UTF16-LE
	s := make([]byte, 0)
	for _, r := range password {
		if r > 0x10000 {
			a, b := utf16.EncodeRune(r)
			s = append(s, byte(a), byte(a>>8), byte(b), byte(b>>8))
		} else {
			s = append(s, byte(r), byte(r>>8))
		}
	}

	h := md4.New()
	h.Write(s)
	return h.Sum(nil)
}
Example #13
0
// Unicode escape
func escapeUnicode(input string) string {
	//var buffer bytes.Buffer
	buffer := bytes.NewBufferString("")
	for _, r := range input {
		if r > 65535 {
			// surrogate pair
			var r1, r2 = utf16.EncodeRune(r)
			var s = fmt.Sprintf("\\u%x\\u%x", r1, r2)
			buffer.WriteString(s)
		} else if r > 127 {
			var s = fmt.Sprintf("\\u%04x", r)
			buffer.WriteString(s)
		} else {
			var s = fmt.Sprintf("%c", r)
			buffer.WriteString(s)
		}
	}
	return buffer.String()
}
Example #14
0
func bmpString(s string) ([]byte, error) {
	// References:
	// https://tools.ietf.org/html/rfc7292#appendix-B.1
	// http://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
	//  - non-BMP characters are encoded in UTF 16 by using a surrogate pair of 16-bit codes
	//	  EncodeRune returns 0xfffd if the rune does not need special encoding
	//  - the above RFC provides the info that BMPStrings are NULL terminated.

	rv := make([]byte, 0, 2*len(s)+2)

	for _, r := range s {
		if t, _ := utf16.EncodeRune(r); t != 0xfffd {
			return nil, errors.New("string contains characters that cannot be encoded in UCS-2")
		}
		rv = append(rv, byte(r/256), byte(r%256))
	}
	rv = append(rv, 0, 0)
	return rv, nil
}
Example #15
0
// UTF16Encode utf16 encodes s into chars. It returns the resulting
// length in units of uint16. It is assumed that the chars slice
// has enough room for the encoded string.
func UTF16Encode(s string, chars []uint16) int {
	n := 0
	for _, v := range s {
		switch {
		case v < 0, surr1 <= v && v < surr3, v > maxRune:
			v = replacementChar
			fallthrough
		case v < surrSelf:
			chars[n] = uint16(v)
			n += 1
		default:
			// surrogate pair, two uint16 values
			r1, r2 := utf16.EncodeRune(v)
			chars[n] = uint16(r1)
			chars[n+1] = uint16(r2)
			n += 2
		}
	}
	return n
}
Example #16
0
func (enc *FrameEncoder) StringUtf16(v string) {
	// Come back to encode the size later.
	sizeOffset := len(enc.Content)
	enc.Bytes(4)

	wordsEncoded := 0
	for _, r := range v {
		if r >= 0x10000 {
			r1, r2 := utf16.EncodeRune(r)
			enc.Uint16(uint16(r1))
			enc.Uint16(uint16(r2))
			wordsEncoded += 2
		} else {
			enc.Uint16(uint16(r))
			wordsEncoded++
		}
	}
	enc.Uint16(0)
	wordsEncoded++
	endianness.PutUint32(enc.Content[sizeOffset:], uint32(wordsEncoded))
}
Example #17
0
// BMPString computes the Basic Multilingual Plane (BMP) string
// of a []byte
func BMPString(utf8String []byte) ([]byte, error) {
	// References:
	// https://tools.ietf.org/html/rfc7292#appendix-B.1
	// http://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
	//  - non-BMP characters are encoded in UTF 16 by using a surrogate pair of 16-bit codes
	//	  EncodeRune returns 0xfffd if the rune does not need special encoding
	//  - the above RFC provides the info that BMPStrings are NULL terminated.

	rv := make([]byte, 0, 2*len(utf8String)+2)

	start := 0
	for start < len(utf8String) {
		c, size := utf8.DecodeRune(utf8String[start:])
		start += size
		if t, _ := utf16.EncodeRune(c); t != 0xfffd {
			return nil, errors.New("password contains characters that cannot be encoded in UCS-2")
		}
		rv = append(rv, byte(c/256), byte(c%256))
	}
	rv = append(rv, 0, 0)
	return rv, nil
}
Example #18
0
func cell_to_char_info(c Cell) (attr word, wc [2]wchar) {
	attr = color_table_fg[c.Fg&0x0F] | color_table_bg[c.Bg&0x0F]
	if c.Fg&AttrReverse|c.Bg&AttrReverse != 0 {
		attr = (attr&0xF0)>>4 | (attr&0x0F)<<4
	}
	if c.Fg&AttrBold != 0 {
		attr |= foreground_intensity
	}
	if c.Bg&AttrBold != 0 {
		attr |= background_intensity
	}

	r0, r1 := utf16.EncodeRune(c.Ch)
	if r0 == 0xFFFD {
		wc[0] = wchar(c.Ch)
		wc[1] = ' '
	} else {
		wc[0] = wchar(r0)
		wc[1] = wchar(r1)
	}
	return
}
Example #19
0
func encodeUTF16beRune(p []byte, c rune) (size int, status Status) {
	if c < 0x10000 {
		if len(p) < 2 {
			status = NO_ROOM
			return
		}
		p[0] = byte(c >> 8)
		p[1] = byte(c)
		return 2, SUCCESS
	}

	if len(p) < 4 {
		status = NO_ROOM
		return
	}
	s1, s2 := utf16.EncodeRune(c)
	p[0] = byte(s1 >> 8)
	p[1] = byte(s1)
	p[2] = byte(s2 >> 8)
	p[3] = byte(s2)
	return 4, SUCCESS
}
Example #20
0
func (b *Buffer) WriteUTF16(s string) {
	// The first 4 bytes is the length, as int32 (4-byte aligned).
	// written last.
	// The next n bytes is utf-16 string (1-byte aligned).
	offset0 := align(b.Offset, 4)  // length.
	offset1 := align(offset0+4, 1) // contents.

	if len(b.Data)-offset1 < 4*len(s) {
		// worst case estimate, everything is surrogate pair
		b.grow(offset1 + 4*len(s) - len(b.Data))
	}
	data := b.Data[offset1:]
	n := 0
	for _, v := range s {
		switch {
		case v < 0, surr1 <= v && v < surr3, v > maxRune:
			v = replacementChar
			fallthrough
		case v < surrSelf:
			writeUint16(data[n:], v)
			n += 2
		default:
			// surrogate pair, two uint16 values
			r1, r2 := utf16.EncodeRune(v)
			writeUint16(data[n:], r1)
			writeUint16(data[n+2:], r2)
			n += 4
		}
	}

	// write length at b.Data[b.Offset:], before contents.
	// length is number of uint16 values, not number of bytes.
	b.WriteInt32(int32(n / 2))

	b.Offset = offset1 + n
}
Example #21
0
func (u *utf16Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	if u.currentBOMPolicy&writeBOM != 0 {
		if len(dst) < 2 {
			return 0, 0, transform.ErrShortDst
		}
		dst[0], dst[1] = 0xfe, 0xff
		u.currentBOMPolicy = IgnoreBOM
		nDst = 2
	}

	r, size := rune(0), 0
	for nSrc < len(src) {
		r = rune(src[nSrc])

		// Decode a 1-byte rune.
		if r < utf8.RuneSelf {
			size = 1

		} else {
			// Decode a multi-byte rune.
			r, size = utf8.DecodeRune(src[nSrc:])
			if size == 1 {
				// All valid runes of size 1 (those below utf8.RuneSelf) were
				// handled above. We have invalid UTF-8 or we haven't seen the
				// full character yet.
				if !atEOF && !utf8.FullRune(src[nSrc:]) {
					err = transform.ErrShortSrc
					break
				}
			}
		}

		if r <= 0xffff {
			if nDst+2 > len(dst) {
				err = transform.ErrShortDst
				break
			}
			dst[nDst+0] = uint8(r >> 8)
			dst[nDst+1] = uint8(r)
			nDst += 2
		} else {
			if nDst+4 > len(dst) {
				err = transform.ErrShortDst
				break
			}
			r1, r2 := utf16.EncodeRune(r)
			dst[nDst+0] = uint8(r1 >> 8)
			dst[nDst+1] = uint8(r1)
			dst[nDst+2] = uint8(r2 >> 8)
			dst[nDst+3] = uint8(r2)
			nDst += 4
		}
		nSrc += size
	}

	if u.endianness == LittleEndian {
		for i := 0; i < nDst; i += 2 {
			dst[i], dst[i+1] = dst[i+1], dst[i]
		}
	}
	return nDst, nSrc, err
}