func getEncoding() encoding.Encoding { switch config.encoding { case "utf-16": return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) case "utf-16be-with-signature": return unicode.UTF16(unicode.BigEndian, unicode.ExpectBOM) case "utf-16le-with-signature": return unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM) case "euc-jp": return japanese.EUCJP case "sjis": return japanese.ShiftJIS default: return nil } }
func convertUtf8ToUtf16LE(message string) (string, error) { utf16le := unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) utfEncoder := utf16le.NewEncoder() ut16LeEncodedMessage, err := utfEncoder.String(message) return ut16LeEncodedMessage, err }
func saveConns(conf *types.Configuration) { filename := replaceHome(connectionsPath) tmp := filename + ".tmp" wr, err := os.Create(tmp) p(err, "opening "+filename) defer func() { if err := os.Rename(tmp, filename); err != nil { p(os.Remove(filename), "deleting old connections.xml") p(os.Rename(tmp, filename), "overwriting connections.xml") } }() defer wr.Close() encoding := unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM) textEncoder := encoding.NewEncoder() writer := textEncoder.Writer(wr) fmt.Fprintln(writer, `<?xml version="1.0" encoding="utf-16"?> <!-- ****************************************************************--> <!-- * *--> <!-- * PuTTY Configuration Manager save file - All right reserved. *--> <!-- * *--> <!-- ****************************************************************--> <!-- The following lines can be modified at your own risks. -->`) encoder := xml.NewEncoder(writer) encoder.Indent("", " ") p(encoder.Encode(&conf), "encoding xml") }
// String is a best-effort attempt to get a UTF-8 encoded version of // Value. Only MicrosoftUnicode (3,1 ,X), MacRomain (1,0,X) and Unicode platform // strings are supported. func (nameEntry *NameEntry) String() string { if nameEntry.PlatformID == PlatformUnicode || (nameEntry.PlatformID == PlatformMicrosoft && nameEntry.EncodingID == PlatformEncodingMicrosoftUnicode) { decoder := unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder() outstr, _, err := transform.String(decoder, string(nameEntry.Value)) if err == nil { return outstr } } if nameEntry.PlatformID == PlatformMac && nameEntry.EncodingID == PlatformEncodingMacRoman { decoder := charmap.Macintosh.NewDecoder() outstr, _, err := transform.String(decoder, string(nameEntry.Value)) if err == nil { return outstr } } return string(nameEntry.Value) }
func Encode(code uint8, text string) []byte { switch code { // в зависимости от подходящей кодировки выбираем соответствующий метод кодирования case 8: // ucs8 enc := unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewEncoder() es, _, _ := transform.Bytes(enc, []byte(text)) return es case 3: // latin1 es, _, _ := transform.Bytes(charmap.Windows1252.NewEncoder(), []byte(text)) return es case 0: // декодируем в GSM 03.38 var result bytes.Buffer for _, r := range text { if nr, ok := utf8GsmChars[r]; ok { // делаем замены известным символам result.WriteString(nr) continue } if r > '\u007F' { // удаляем все, что не входит в формат result.WriteRune('?') continue } result.WriteRune(r) // добавляем как есть } return result.Bytes() default: return []byte(text) } }
// Decode from UCS2. func (s UCS2) Decode() []byte { e := unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM) es, _, err := transform.Bytes(e.NewDecoder(), s) if err != nil { return s } return es }
// read umeng file, need convert utf16-le(with bom) to utf8 func readFile(path string) (reader io.Reader, err error) { e := unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM) file, err := os.Open(path) if err != nil { return } reader = transform.NewReader(file, e.NewDecoder()) return }
// NewReader wraps a Reader to decode Unicode to UTF-8 as it reads. func NewReader(r io.Reader, d EncodingHint) io.Reader { var decoder *encoding.Decoder switch d { case UTF8: // Make a transformer that assumes UTF-8 but abides by the BOM. decoder = unicode.UTF8.NewDecoder() case UTF16LE: // Make an tranformer that decodes MS-Windows (16LE) UTF files: winutf := unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) // Make a transformer that is like winutf, but abides by BOM if found: decoder = winutf.NewDecoder() case UTF16BE: // Make an tranformer that decodes UTF-16BE files: utf16be := unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM) // Make a transformer that is like utf16be, but abides by BOM if found: decoder = utf16be.NewDecoder() } // Make a Reader that uses utf16bom: return transform.NewReader(r, unicode.BOMOverride(decoder)) }
func TestName(t *testing.T) { for i, tc := range []struct { desc string enc encoding.Encoding name string err error }{{ "defined encoding", charmap.ISO8859_2, "iso-8859-2", nil, }, { "defined Unicode encoding", unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), "utf-16be", nil, }, { "undefined Unicode encoding in HTML standard", unicode.UTF16(unicode.BigEndian, unicode.UseBOM), "", errUnsupported, }, { "undefined other encoding in HTML standard", charmap.CodePage437, "", errUnsupported, }, { "unknown encoding", encoding.Nop, "", errUnknown, }} { name, err := Name(tc.enc) if name != tc.name || err != tc.err { t.Errorf("%d:%s: got %q, %v; want %q, %v", i, tc.desc, name, err, tc.name, tc.err) } } }
func ExampleUTF8Validator() { for i := 0; i < 2; i++ { var transformer transform.Transformer transformer = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewEncoder() if i == 1 { transformer = transform.Chain(encoding.UTF8Validator, transformer) } dst := make([]byte, 256) src := []byte("abc\xffxyz") // src is invalid UTF-8. nDst, nSrc, err := transformer.Transform(dst, src, true) fmt.Printf("i=%d: produced %q, consumed %q, error %v\n", i, dst[:nDst], src[:nSrc], err) } // Output: // i=0: produced "\x00a\x00b\x00c\xff\xfd\x00x\x00y\x00z", consumed "abc\xffxyz", error <nil> // i=1: produced "\x00a\x00b\x00c", consumed "abc", error encoding: invalid UTF-8 }
// AddUnicodeEntry adds an entry to the name table for the 'Unicode' platform, // with Default Encoding (UTF-16). It returns an error if the value cannot be // represented in UTF-16. func (table *TableName) AddUnicodeEntry(nameId NameID, value string) error { encoder := unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewEncoder() outstr, _, err := transform.String(encoder, value) if err != nil { return err } table.Add(&NameEntry{ PlatformID: PlatformUnicode, EncodingID: PlatformEncodingUnicodeDefault, LanguageID: PlatformLanguageUnicodeDefault, NameID: nameId, Value: []byte(outstr), }) return nil }
func Decode(code uint8, text []byte) string { switch code { case 8: // UCS2 es, _, _ := transform.Bytes( unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder(), text) return string(es) case 3: // latin1 (windows1252) es, _, _ := transform.Bytes(charmap.Windows1252.NewDecoder(), text) return string(es) case 0: // декодируем из формата GSM 03.38 var result bytes.Buffer for _, r := range text { if nr, ok := gsmUtf8Chars[rune(r)]; ok { // делаем замены известным символам result.WriteString(nr) continue } result.WriteByte(r) // добавляем как есть } return result.String() default: return string(text) } }
// Creates a scanner similar to os.Open() but decodes the file as UTF-16 // if the special byte order mark is present. func newScannerUTF16or8(filename string) (utfScanner, error) { // Read the file into a []byte: file, err := os.Open(filename) if err != nil { return nil, err } // Check for BOM marker := make([]byte, 2) numread, err := io.ReadAtLeast(file, marker, 2) file.Seek(0, 0) if numread == 2 && err == nil && ((marker[0] == 0xFE && marker[1] == 0xFF) || (marker[0] == 0xFF && marker[1] == 0xFE)) { // Make an tranformer that converts MS-Win default to UTF8: win16be := unicode.UTF16(unicode.BigEndian, unicode.UseBOM) // Make a transformer that is like win16be, but abides by BOM: utf16bom := unicode.BOMOverride(win16be.NewDecoder()) // Make a Reader that uses utf16bom: unicodeReader := transform.NewReader(file, utf16bom) return unicodeReader, nil } return file, nil }
func GetCharset(charset string) encoding.Encoding { switch strings.ToUpper(charset) { case "GB2312", "GB18030": return simplifiedchinese.GB18030 case "HZ-GB2312": return simplifiedchinese.HZGB2312 case "GBK": return simplifiedchinese.GBK case "BIG5": return traditionalchinese.Big5 case "EUC-JP": return japanese.EUCJP case "ISO2022JP": return japanese.ISO2022JP case "SHIFTJIS": return japanese.ShiftJIS case "EUC-KR": return korean.EUCKR case "UTF8", "UTF-8": return encoding.Nop case "UTF16-BOM", "UTF-16-BOM": return unicode.UTF16(unicode.BigEndian, unicode.UseBOM) case "UTF16-BE-BOM", "UTF-16-BE-BOM": return unicode.UTF16(unicode.BigEndian, unicode.UseBOM) case "UTF16-LE-BOM", "UTF-16-LE-BOM": return unicode.UTF16(unicode.LittleEndian, unicode.UseBOM) case "UTF16", "UTF-16": return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM) case "UTF16-BE", "UTF-16-BE": return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM) case "UTF16-LE", "UTF-16-LE": return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) //case "UTF32", "UTF-32": // return simplifiedchinese.GBK default: return nil } }
func deployStartup(user *runtime.OSUser, configFile string, exePath string) error { scheduledTaskUTF8 := []byte(strings.Replace(`<?xml version="1.0" encoding="UTF-16"?> <Task version="1.2" xmlns="http://schemas.microsoft.com/windows/2004/02/mit/task"> <RegistrationInfo> <Date>2016-04-28T17:25:08.4654422</Date> <Author>GenericWorker</Author> <Description>Runs the generic worker.</Description> </RegistrationInfo> <Triggers> <LogonTrigger> <Enabled>true</Enabled> <UserId>GenericWorker</UserId> </LogonTrigger> </Triggers> <Principals> <Principal id="Author"> <UserId>GenericWorker</UserId> <LogonType>InteractiveToken</LogonType> <RunLevel>HighestAvailable</RunLevel> </Principal> </Principals> <Settings> <MultipleInstancesPolicy>IgnoreNew</MultipleInstancesPolicy> <DisallowStartIfOnBatteries>true</DisallowStartIfOnBatteries> <StopIfGoingOnBatteries>true</StopIfGoingOnBatteries> <AllowHardTerminate>true</AllowHardTerminate> <StartWhenAvailable>false</StartWhenAvailable> <RunOnlyIfNetworkAvailable>false</RunOnlyIfNetworkAvailable> <IdleSettings> <StopOnIdleEnd>true</StopOnIdleEnd> <RestartOnIdle>false</RestartOnIdle> </IdleSettings> <AllowStartOnDemand>true</AllowStartOnDemand> <Enabled>true</Enabled> <Hidden>false</Hidden> <RunOnlyIfIdle>false</RunOnlyIfIdle> <WakeToRun>false</WakeToRun> <ExecutionTimeLimit>PT0S</ExecutionTimeLimit> <Priority>3</Priority> </Settings> <Actions Context="Author"> <Exec> <Command>C:\generic-worker\run-generic-worker.bat</Command> </Exec> </Actions> </Task>`, "\n", "\r\n", -1)) utf16Encoder := unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewEncoder() scheduledTaskUTF16, err := utf16Encoder.Bytes(scheduledTaskUTF8) if err != nil { return fmt.Errorf("INTERNAL ERROR: Could not UTF16-encode (static) scheduled task: %s\n\nError received: %s", scheduledTaskUTF8, err) } xmlFilePath := filepath.Join(filepath.Dir(exePath), "Run Generic Worker.xml") err = ioutil.WriteFile(xmlFilePath, scheduledTaskUTF16, 0644) if err != nil { return fmt.Errorf("I was not able to write the file \"Run Generic Worker.xml\" to file location %q with 0644 permissions, due to: %s", xmlFilePath, err) } err = runtime.RunCommands(false, []string{"schtasks", "/create", "/tn", "Run Generic Worker on login", "/xml", xmlFilePath}) if err != nil { return fmt.Errorf("Not able to schedule task \"Run Generic Worker on login\" using schtasks command, due to error: %s\n\nAlso see stderr/stdout logs for output of the command that failed.", err) } k, _, err := registry.CreateKey(registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon`, registry.WRITE) if err != nil { return fmt.Errorf(`Was not able to create registry key 'SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon' due to %s`, err) } defer k.Close() err = k.SetDWordValue("AutoAdminLogon", 1) if err != nil { return fmt.Errorf(`Was not able to set registry entry 'SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon\AutoAdminLogon' to 1 due to %s`, err) } err = k.SetStringValue("DefaultUserName", user.Name) if err != nil { return fmt.Errorf(`Was not able to set registry entry 'SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon\DefaultUserName' to %q due to %s`, user.Name, err) } err = k.SetStringValue("DefaultPassword", user.Password) if err != nil { return fmt.Errorf(`Was not able to set registry entry 'SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon\DefaultPassword' to %q due to %s`, user.Password, err) } batScriptFilePath := filepath.Join(filepath.Dir(exePath), "run-generic-worker.bat") batScriptContents := []byte(strings.Join([]string{ `:: run the generic worker`, ``, `:: cd to folder containing this script`, `pushd %~dp0`, ``, `.\generic-worker.exe run --configure-for-aws > .\generic-worker.log 2>&1`, }, "\r\n")) err = ioutil.WriteFile(batScriptFilePath, batScriptContents, 0755) if err != nil { return fmt.Errorf("Was not able to create file %q with access permissions 0755 due to %s", batScriptFilePath, err) } return nil }
nDst, nSrc, err := encoding.UTF8Validator.Transform(dst, []byte(tc.src), tc.atEOF) if nDst < 0 || len(dst) < nDst { t.Errorf("%s: nDst=%d out of range", tc.desc, nDst) continue } got := string(dst[:nDst]) if got != tc.want || nSrc != len(tc.want) || err != tc.wantErr { t.Errorf("%s:\ngot %+q, %d, %v\nwant %+q, %d, %v", tc.desc, got, nSrc, err, tc.want, len(tc.want), tc.wantErr) continue } } } var ( utf16LEIB = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) // UTF-16LE (atypical interpretation) utf16LEUB = unicode.UTF16(unicode.LittleEndian, unicode.UseBOM) // UTF-16, LE utf16LEEB = unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM) // UTF-16, LE, Expect utf16BEIB = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM) // UTF-16BE (atypical interpretation) utf16BEUB = unicode.UTF16(unicode.BigEndian, unicode.UseBOM) // UTF-16 default utf16BEEB = unicode.UTF16(unicode.BigEndian, unicode.ExpectBOM) // UTF-16 Expect ) func TestUTF16(t *testing.T) { testCases := []struct { desc string src string notEOF bool // the inverse of atEOF sizeDst int want string nSrc int
utf8: "Heļlo", }, { e: charmap.Windows1258, encoded: "Hell\xf5", utf8: "Hellơ", }, { e: charmap.XUserDefined, encoded: "\x00\x40\x7f\x80\xab\xff", utf8: "\u0000\u0040\u007f\uf780\uf7ab\uf7ff", }, // UTF-16 tests. { e: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), encoded: "\x00\x57\x00\xe4\xd8\x35\xdd\x65", utf8: "\x57\u00e4\U0001d565", }, { e: unicode.UTF16(unicode.BigEndian, unicode.ExpectBOM), encPrefix: "\xfe\xff", encoded: "\x00\x57\x00\xe4\xd8\x35\xdd\x65", utf8: "\x57\u00e4\U0001d565", }, { e: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), encoded: "\x57\x00\xe4\x00\x35\xd8\x65\xdd", utf8: "\x57\u00e4\U0001d565", }, {
for i, t := range tickets { ns[i] = getNode(t) } return ns } func getNode(t string) *srvpb.Node { for _, n := range nodes { if n.Ticket == t { return n } } return &srvpb.Node{Ticket: t} } var utf16LE = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) func encodeText(e encoding.Encoding, text string) []byte { res, _, err := transform.Bytes(e.NewEncoder(), []byte(text)) if err != nil { panic(err) } return res } func TestNodes(t *testing.T) { st := tbl.Construct(t) for _, node := range tbl.Nodes { reply, err := st.Nodes(ctx, &xpb.NodesRequest{ Ticket: []string{node.Ticket},
var ( charsetDetector = chardet.NewTextDetector() charsetDetectors = map[string]encoding.Encoding{ "Big5": traditionalchinese.Big5, "EUC-JP": japanese.EUCJP, "EUC-KR": korean.EUCKR, "GB-18030": simplifiedchinese.GB18030, "ISO-2022-JP": japanese.ISO2022JP, "ISO-8859-5": charmap.ISO8859_5, "ISO-8859-6": charmap.ISO8859_6, "ISO-8859-7": charmap.ISO8859_7, "ISO-8859-8": charmap.ISO8859_8, "ISO-8859-8-I": charmap.ISO8859_8I, "KOI8-R": charmap.KOI8R, "Shift_JIS": japanese.ShiftJIS, "UTF-16BE": unicode.UTF16(unicode.BigEndian, unicode.UseBOM), "UTF-16LE": unicode.UTF16(unicode.LittleEndian, unicode.UseBOM), "windows-1251": charmap.Windows1251, "windows-1252": charmap.Windows1252, "windows-1253": charmap.Windows1253, "windows-1254": charmap.Windows1254, "windows-1255": charmap.Windows1255, "windows-1256": charmap.Windows1256, // Decoders don't exist for these // "IBM420_ltr": nil, // "IBM420_rtl": nil, // "IBM424_ltr": nil, // "IBM424_rtl": nil, // "ISO-2022-CN": nil, // "ISO-2022-KR": nil,
"x-sjis": {japanese.ShiftJIS, "shift_jis"}, "cseuckr": {korean.EUCKR, "euc-kr"}, "csksc56011987": {korean.EUCKR, "euc-kr"}, "euc-kr": {korean.EUCKR, "euc-kr"}, "iso-ir-149": {korean.EUCKR, "euc-kr"}, "korean": {korean.EUCKR, "euc-kr"}, "ks_c_5601-1987": {korean.EUCKR, "euc-kr"}, "ks_c_5601-1989": {korean.EUCKR, "euc-kr"}, "ksc5601": {korean.EUCKR, "euc-kr"}, "ksc_5601": {korean.EUCKR, "euc-kr"}, "windows-949": {korean.EUCKR, "euc-kr"}, "csiso2022kr": {encoding.Replacement, "replacement"}, "iso-2022-kr": {encoding.Replacement, "replacement"}, "iso-2022-cn": {encoding.Replacement, "replacement"}, "iso-2022-cn-ext": {encoding.Replacement, "replacement"}, "utf-16be": {unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), "utf-16be"}, "utf-16": {unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "utf-16le"}, "utf-16le": {unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "utf-16le"}, "x-user-defined": {charmap.XUserDefined, "x-user-defined"}, } var charsetRegexp *regexp.Regexp var errParsingCharset = errors.New("Could not find a valid charset in the HTML body") func ConvertToUTF8String(charset string, textBytes []byte) (string, error) { if strings.ToLower(charset) == "utf-8" { return string(textBytes), nil } item, ok := encodings[strings.ToLower(charset)] if !ok { return "", fmt.Errorf("Unsupport charset %s", charset)
iso8859_13: charmap.ISO8859_13, iso8859_14: charmap.ISO8859_14, iso8859_15: charmap.ISO8859_15, iso8859_16: charmap.ISO8859_16, koi8r: charmap.KOI8R, koi8u: charmap.KOI8U, macintosh: charmap.Macintosh, windows874: charmap.Windows874, windows1250: charmap.Windows1250, windows1251: charmap.Windows1251, windows1252: charmap.Windows1252, windows1253: charmap.Windows1253, windows1254: charmap.Windows1254, windows1255: charmap.Windows1255, windows1256: charmap.Windows1256, windows1257: charmap.Windows1257, windows1258: charmap.Windows1258, macintoshCyrillic: charmap.MacintoshCyrillic, gbk: simplifiedchinese.GBK, gb18030: simplifiedchinese.GB18030, big5: traditionalchinese.Big5, eucjp: japanese.EUCJP, iso2022jp: japanese.ISO2022JP, shiftJIS: japanese.ShiftJIS, euckr: korean.EUCKR, replacement: encoding.Replacement, utf16be: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), utf16le: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), xUserDefined: charmap.XUserDefined, }
// utf16 BOM based encodings. Only seekable data sources are supported for // the need to check the optional Byte Order Marker being available in data source // before configuring the actual decoder and encoder. var ( // BOM is required, as no fallback is specified utf16BOMRequired = utf16BOM(unknownEndianess) // BOM is optional. Falls back to BigEndian if missing utf16BOMBigEndian = utf16BOM(bigEndian) // BOM is optional. Falls back to LittleEndian if missing utf16BOMLittleEndian = utf16BOM(littleEndian) ) var utf16Map = map[endianness]Encoding{ bigEndian: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), littleEndian: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), } func utf16BOM(e endianness) EncodingFactory { return func(in_ io.Reader) (Encoding, error) { in, ok := in_.(io.ReadSeeker) if !ok { return nil, ErrUnsupportedSourceTypeBOM } return utf16Seekable(in, e) } } func utf16Seekable(in io.ReadSeeker, endianness endianness) (Encoding, error) {
func TestUtf16BOMEncodings(t *testing.T) { expectedLE := utf16Map[littleEndian] expectedBE := utf16Map[bigEndian] var tests = []struct { name string testEndianess unicode.Endianness testBOMPolicy unicode.BOMPolicy expectedEncoding Encoding expectedError error expectedOffset int }{ {"utf-16-bom", unicode.BigEndian, unicode.ExpectBOM, expectedBE, nil, 2}, {"utf-16-bom", unicode.BigEndian, unicode.IgnoreBOM, nil, unicode.ErrMissingBOM, 0}, {"utf-16-bom", unicode.LittleEndian, unicode.ExpectBOM, expectedLE, nil, 2}, {"utf-16-bom", unicode.LittleEndian, unicode.IgnoreBOM, nil, unicode.ErrMissingBOM, 0}, // big endian based encoding {"utf-16be-bom", unicode.BigEndian, unicode.ExpectBOM, expectedBE, nil, 2}, {"utf-16be-bom", unicode.BigEndian, unicode.IgnoreBOM, expectedBE, nil, 0}, {"utf-16be-bom", unicode.LittleEndian, unicode.ExpectBOM, expectedLE, nil, 2}, // little endian baed encoding {"utf-16le-bom", unicode.LittleEndian, unicode.ExpectBOM, expectedLE, nil, 2}, {"utf-16le-bom", unicode.LittleEndian, unicode.IgnoreBOM, expectedLE, nil, 0}, {"utf-16le-bom", unicode.BigEndian, unicode.ExpectBOM, expectedBE, nil, 2}, } text := []byte("hello world") for _, test := range tests { t.Logf("testing: codec=%v, bigendian=%v, bomPolicy=%v", test.name, test.testEndianess, test.testBOMPolicy) buf := bytes.NewBuffer(nil) writeEncoding := unicode.UTF16(test.testEndianess, test.testBOMPolicy) writer := transform.NewWriter(buf, writeEncoding.NewEncoder()) writer.Write(text) writer.Close() rawReader := bytes.NewReader(buf.Bytes()) contentLen := rawReader.Len() encodingFactory, ok := FindEncoding(test.name) if !ok { t.Errorf("Failed to load encoding: %v", test.name) continue } encoding, err := encodingFactory(rawReader) contentOffset := contentLen - rawReader.Len() assert.Equal(t, test.expectedEncoding, encoding) assert.Equal(t, test.expectedError, err) assert.Equal(t, test.expectedOffset, contentOffset) if err == nil { reader := transform.NewReader(rawReader, encoding.NewDecoder()) content, _ := ioutil.ReadAll(reader) assert.Equal(t, text, content) } } }
// Decoder wraps a reader for decoding input to utf-8 on read. type Decoder func(io.Reader) io.Reader var encodings = map[string]Decoder{ // default "nop": Plain, "plain": Plain, // utf8 (validate input) - shadow htmlindex utf8 codecs not validating input "unicode-1-1-utf-8": trans(encoding.UTF8Validator), "utf-8": trans(encoding.UTF8Validator), "utf8": trans(encoding.UTF8Validator), // utf16 "utf-16be-bom": enc(unicode.UTF16(unicode.BigEndian, unicode.UseBOM)), // simplified chinese "gbk": enc(simplifiedchinese.GBK), // shadow htmlindex using 'GB10830' for GBK // 8bit charmap encodings "iso8859-6e": enc(charmap.ISO8859_6E), "iso8859-6i": enc(charmap.ISO8859_6I), "iso8859-8e": enc(charmap.ISO8859_8E), "iso8859-8i": enc(charmap.ISO8859_8I), } // Plain file encoding not transforming any read bytes. var Plain = nopEnc // Find returns
"golang.org/x/text/transform" ) // Decoder wraps a reader for decoding input to utf-8 on read. type Decoder func(io.Reader) io.Reader var encodings = map[string]Decoder{ // default "nop": Plain, "plain": Plain, // utf8 (validate input) "utf-8": trans(encoding.UTF8Validator), // utf16 "utf-16be-bom": enc(unicode.UTF16(unicode.BigEndian, unicode.UseBOM)), "utf-16be": enc(unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)), "utf-16le": enc(unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)), // traditional chinese "big5": enc(traditionalchinese.Big5), // simplified chinese "gb18030": enc(simplifiedchinese.GB18030), "gbk": enc(simplifiedchinese.GBK), "hzgb2312": enc(simplifiedchinese.HZGB2312), // korean "euckr": enc(korean.EUCKR), // japanese