// wideString converts s to a UTF-16 string. It will be terminated with a null // character if terminate is true. func wideString(s string, terminate bool) []uint16 { n := 0 for _, c := range s { n++ if c >= 0x10000 { n++ } } if terminate { n++ } a := make([]uint16, n) i := 0 for _, c := range s { if c < 0x10000 { a[i] = uint16(c) i++ } else { r1, r2 := utf16.EncodeRune(c) a[i] = uint16(r1) a[i+1] = uint16(r2) i += 2 } } return a }
// utf7enc converts string s from UTF-8 to UTF-16-BE, encodes the result as // Base64, removes the padding, and adds UTF-7 shifts. func utf7enc(s []byte) []byte { // len(s) is sufficient for UTF-8 to UTF-16 conversion if there are no // control code points (see table below). b := make([]byte, 0, len(s)+4) for len(s) > 0 { r, size := utf8.DecodeRune(s) if r > utf8.MaxRune { r, size = utf8.RuneError, 1 // Bug fix (issue 3785) } s = s[size:] if r1, r2 := utf16.EncodeRune(r); r1 != uRepl { b = append(b, byte(r1>>8), byte(r1)) r = r2 } b = append(b, byte(r>>8), byte(r)) } // Encode as Base64 n := u7enc.EncodedLen(len(b)) + 2 b64 := make([]byte, n) u7enc.Encode(b64[1:], b) // Strip padding n -= 2 - (len(b)+2)%3 b64 = b64[:n] // Add UTF-7 shifts b64[0] = '&' b64[n-1] = '-' return b64 }
func BStrFromString(s string) BStr { n := 0 for _, c := range s { n++ if c >= 0x10000 { n++ } } a := make([]uint16, n+3) // 2 words for the length + one for the terminator i := 2 for _, c := range s { if c < 0x10000 { a[i] = uint16(c) i++ } else { r1, r2 := utf16.EncodeRune(c) a[i] = uint16(r1) a[i+1] = uint16(r2) i += 2 } } byteLen := n * 2 a[0] = uint16(byteLen) a[1] = uint16(byteLen >> 16) return BStr{&a[2]} }
func (b *Buffer) WriteUTF16(s string) { // first 4 bytes is the length, as int32. written last. // next n bytes is utf-16 string. if len(b.Data)-b.Offset < 4+4*len(s) { b.grow(4 + 4*len(s)) // worst case estimate, everything is surrogate pair } data := b.Data[b.Offset+4:] n := 0 for _, v := range s { switch { case v < 0, surr1 <= v && v < surr3, v > maxRune: v = replacementChar fallthrough case v < surrSelf: writeUint16(data[n:], v) n += 2 default: // surrogate pair, two uint16 values r1, r2 := utf16.EncodeRune(v) writeUint16(data[n:], r1) writeUint16(data[n+2:], r2) n += 4 } } // write length at b.Data[b.Offset:], before contents. // length is number of uint16 values, not number of bytes. b.WriteInt32(int32(n / 2)) b.Offset += n }
func (p *padder) writeInternal(b string) error { if !p.setup { if err := p.start(); err != nil { return err } } switch p.t { case script: d := messageData{b} if err := scriptMessage.Execute(p.w, d); err != nil { return err } case length: runeCount := utf8.RuneCountInString(b) jsLength := runeCount for _, r := range []rune(b) { // Internally js uses utf-16 for strings (after parsing them out of a // utf-8 context). In utf-16, non-bmp characters (code points >= U+10000) // are represented as surrogate pairs (length 2, not 1). Double count // code points represented as surrogate pairs in JS // http://mathiasbynens.be/notes/javascript-encoding if r1, r2 := utf16.EncodeRune(r); r1 != '\uFFFD' && r2 != '\uFFFD' { jsLength++ } } if _, err := fmt.Fprintf(p.w, "%d\n%s", jsLength, b); err != nil { return err } default: if _, err := p.w.Write([]byte(b)); err != nil { return err } } return nil }
// hashCode imitates the behavior of the JDK's String#hashCode method. // https://docs.oracle.com/javase/7/docs/api/java/lang/String.html#hashCode() // // As strings are encoded in utf16 on the JVM, this implementation checks wether // s contains non-bmp runes and uses utf16 surrogate pairs for those. func hashCode(s string) (hc int32) { for _, r := range s { r1, r2 := utf16.EncodeRune(r) if r1 == 0xfffd && r1 == r2 { hc = hc*31 + r } else { hc = (hc*31+r1)*31 + r2 } } return }
func appendUTF16(b []byte, r rune) []byte { if r <= 0xffff { // Note this logic intentionally tolerates unpaired surrogates. return append(b, byte(r>>8), byte(r&0xff)) } r1, r2 := utf16.EncodeRune(r) b = append(b, byte(r1>>8), byte(r1&0xff)) b = append(b, byte(r2>>8), byte(r2&0xff)) return b }
func appendUTF16LE(v []byte, val string) []byte { for _, r := range val { if utf16.IsSurrogate(r) { r1, r2 := utf16.EncodeRune(r) v = append16(v, uint16(r1)) v = append16(v, uint16(r2)) } else { v = append16(v, uint16(r)) } } return v }
func appendUTF16(buf []uint16, s []byte) []uint16 { for len(s) > 0 { r, sz := utf8.DecodeRune(s) s = s[sz:] r1, r2 := utf16.EncodeRune(r) if r1 != 0xFFFD { buf = append(buf, uint16(r1), uint16(r2)) } else { buf = append(buf, uint16(r)) } } return buf }
func utf16le(val string) []byte { var v []byte for _, r := range val { if utf16.IsSurrogate(r) { r1, r2 := utf16.EncodeRune(r) v = append(v, byte(r1), byte(r1>>8)) v = append(v, byte(r2), byte(r2>>8)) } else { v = append(v, byte(r), byte(r>>8)) } } return v }
func encodeUTF16(b []byte) []uint16 { a := []uint16{} for len(b) > 0 { r, sz := utf8.DecodeRune(b) b = b[sz:] r1, r2 := utf16.EncodeRune(r) if r1 != 0xFFFD { a = append(a, uint16(r1), uint16(r2)) } else { a = append(a, uint16(r)) } } return a }
// rc4HmacKey converts a UTF8 password into a key suitable for use with the // rc4hmac. func rc4HmacKey(password string) []byte { // Convert password from UTF8 to UTF16-LE s := make([]byte, 0) for _, r := range password { if r > 0x10000 { a, b := utf16.EncodeRune(r) s = append(s, byte(a), byte(a>>8), byte(b), byte(b>>8)) } else { s = append(s, byte(r), byte(r>>8)) } } h := md4.New() h.Write(s) return h.Sum(nil) }
// Unicode escape func escapeUnicode(input string) string { //var buffer bytes.Buffer buffer := bytes.NewBufferString("") for _, r := range input { if r > 65535 { // surrogate pair var r1, r2 = utf16.EncodeRune(r) var s = fmt.Sprintf("\\u%x\\u%x", r1, r2) buffer.WriteString(s) } else if r > 127 { var s = fmt.Sprintf("\\u%04x", r) buffer.WriteString(s) } else { var s = fmt.Sprintf("%c", r) buffer.WriteString(s) } } return buffer.String() }
func bmpString(s string) ([]byte, error) { // References: // https://tools.ietf.org/html/rfc7292#appendix-B.1 // http://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane // - non-BMP characters are encoded in UTF 16 by using a surrogate pair of 16-bit codes // EncodeRune returns 0xfffd if the rune does not need special encoding // - the above RFC provides the info that BMPStrings are NULL terminated. rv := make([]byte, 0, 2*len(s)+2) for _, r := range s { if t, _ := utf16.EncodeRune(r); t != 0xfffd { return nil, errors.New("string contains characters that cannot be encoded in UCS-2") } rv = append(rv, byte(r/256), byte(r%256)) } rv = append(rv, 0, 0) return rv, nil }
// UTF16Encode utf16 encodes s into chars. It returns the resulting // length in units of uint16. It is assumed that the chars slice // has enough room for the encoded string. func UTF16Encode(s string, chars []uint16) int { n := 0 for _, v := range s { switch { case v < 0, surr1 <= v && v < surr3, v > maxRune: v = replacementChar fallthrough case v < surrSelf: chars[n] = uint16(v) n += 1 default: // surrogate pair, two uint16 values r1, r2 := utf16.EncodeRune(v) chars[n] = uint16(r1) chars[n+1] = uint16(r2) n += 2 } } return n }
func (enc *FrameEncoder) StringUtf16(v string) { // Come back to encode the size later. sizeOffset := len(enc.Content) enc.Bytes(4) wordsEncoded := 0 for _, r := range v { if r >= 0x10000 { r1, r2 := utf16.EncodeRune(r) enc.Uint16(uint16(r1)) enc.Uint16(uint16(r2)) wordsEncoded += 2 } else { enc.Uint16(uint16(r)) wordsEncoded++ } } enc.Uint16(0) wordsEncoded++ endianness.PutUint32(enc.Content[sizeOffset:], uint32(wordsEncoded)) }
// BMPString computes the Basic Multilingual Plane (BMP) string // of a []byte func BMPString(utf8String []byte) ([]byte, error) { // References: // https://tools.ietf.org/html/rfc7292#appendix-B.1 // http://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane // - non-BMP characters are encoded in UTF 16 by using a surrogate pair of 16-bit codes // EncodeRune returns 0xfffd if the rune does not need special encoding // - the above RFC provides the info that BMPStrings are NULL terminated. rv := make([]byte, 0, 2*len(utf8String)+2) start := 0 for start < len(utf8String) { c, size := utf8.DecodeRune(utf8String[start:]) start += size if t, _ := utf16.EncodeRune(c); t != 0xfffd { return nil, errors.New("password contains characters that cannot be encoded in UCS-2") } rv = append(rv, byte(c/256), byte(c%256)) } rv = append(rv, 0, 0) return rv, nil }
func cell_to_char_info(c Cell) (attr word, wc [2]wchar) { attr = color_table_fg[c.Fg&0x0F] | color_table_bg[c.Bg&0x0F] if c.Fg&AttrReverse|c.Bg&AttrReverse != 0 { attr = (attr&0xF0)>>4 | (attr&0x0F)<<4 } if c.Fg&AttrBold != 0 { attr |= foreground_intensity } if c.Bg&AttrBold != 0 { attr |= background_intensity } r0, r1 := utf16.EncodeRune(c.Ch) if r0 == 0xFFFD { wc[0] = wchar(c.Ch) wc[1] = ' ' } else { wc[0] = wchar(r0) wc[1] = wchar(r1) } return }
func encodeUTF16beRune(p []byte, c rune) (size int, status Status) { if c < 0x10000 { if len(p) < 2 { status = NO_ROOM return } p[0] = byte(c >> 8) p[1] = byte(c) return 2, SUCCESS } if len(p) < 4 { status = NO_ROOM return } s1, s2 := utf16.EncodeRune(c) p[0] = byte(s1 >> 8) p[1] = byte(s1) p[2] = byte(s2 >> 8) p[3] = byte(s2) return 4, SUCCESS }
func (b *Buffer) WriteUTF16(s string) { // The first 4 bytes is the length, as int32 (4-byte aligned). // written last. // The next n bytes is utf-16 string (1-byte aligned). offset0 := align(b.Offset, 4) // length. offset1 := align(offset0+4, 1) // contents. if len(b.Data)-offset1 < 4*len(s) { // worst case estimate, everything is surrogate pair b.grow(offset1 + 4*len(s) - len(b.Data)) } data := b.Data[offset1:] n := 0 for _, v := range s { switch { case v < 0, surr1 <= v && v < surr3, v > maxRune: v = replacementChar fallthrough case v < surrSelf: writeUint16(data[n:], v) n += 2 default: // surrogate pair, two uint16 values r1, r2 := utf16.EncodeRune(v) writeUint16(data[n:], r1) writeUint16(data[n+2:], r2) n += 4 } } // write length at b.Data[b.Offset:], before contents. // length is number of uint16 values, not number of bytes. b.WriteInt32(int32(n / 2)) b.Offset = offset1 + n }
func (u *utf16Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { if u.currentBOMPolicy&writeBOM != 0 { if len(dst) < 2 { return 0, 0, transform.ErrShortDst } dst[0], dst[1] = 0xfe, 0xff u.currentBOMPolicy = IgnoreBOM nDst = 2 } r, size := rune(0), 0 for nSrc < len(src) { r = rune(src[nSrc]) // Decode a 1-byte rune. if r < utf8.RuneSelf { size = 1 } else { // Decode a multi-byte rune. r, size = utf8.DecodeRune(src[nSrc:]) if size == 1 { // All valid runes of size 1 (those below utf8.RuneSelf) were // handled above. We have invalid UTF-8 or we haven't seen the // full character yet. if !atEOF && !utf8.FullRune(src[nSrc:]) { err = transform.ErrShortSrc break } } } if r <= 0xffff { if nDst+2 > len(dst) { err = transform.ErrShortDst break } dst[nDst+0] = uint8(r >> 8) dst[nDst+1] = uint8(r) nDst += 2 } else { if nDst+4 > len(dst) { err = transform.ErrShortDst break } r1, r2 := utf16.EncodeRune(r) dst[nDst+0] = uint8(r1 >> 8) dst[nDst+1] = uint8(r1) dst[nDst+2] = uint8(r2 >> 8) dst[nDst+3] = uint8(r2) nDst += 4 } nSrc += size } if u.endianness == LittleEndian { for i := 0; i < nDst; i += 2 { dst[i], dst[i+1] = dst[i+1], dst[i] } } return nDst, nSrc, err }