Пример #1
1
func getEncoding() encoding.Encoding {
	switch config.encoding {
	case "utf-16":
		return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
	case "utf-16be-with-signature":
		return unicode.UTF16(unicode.BigEndian, unicode.ExpectBOM)
	case "utf-16le-with-signature":
		return unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM)
	case "euc-jp":
		return japanese.EUCJP
	case "sjis":
		return japanese.ShiftJIS
	default:
		return nil
	}
}
Пример #2
0
func convertUtf8ToUtf16LE(message string) (string, error) {
	utf16le := unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
	utfEncoder := utf16le.NewEncoder()
	ut16LeEncodedMessage, err := utfEncoder.String(message)

	return ut16LeEncodedMessage, err
}
Пример #3
0
Файл: pcm.go Проект: cfstras/pcm
func saveConns(conf *types.Configuration) {
	filename := replaceHome(connectionsPath)
	tmp := filename + ".tmp"
	wr, err := os.Create(tmp)
	p(err, "opening "+filename)
	defer func() {
		if err := os.Rename(tmp, filename); err != nil {
			p(os.Remove(filename), "deleting old connections.xml")
			p(os.Rename(tmp, filename), "overwriting connections.xml")
		}
	}()
	defer wr.Close()

	encoding := unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM)
	textEncoder := encoding.NewEncoder()

	writer := textEncoder.Writer(wr)
	fmt.Fprintln(writer, `<?xml version="1.0" encoding="utf-16"?>
<!-- ****************************************************************-->
<!-- *                                                              *-->
<!-- * PuTTY Configuration Manager save file - All right reserved.  *-->
<!-- *                                                              *-->
<!-- ****************************************************************-->
<!-- The following lines can be modified at your own risks.  -->`)

	encoder := xml.NewEncoder(writer)
	encoder.Indent("", "  ")
	p(encoder.Encode(&conf), "encoding xml")
}
Пример #4
0
// String is a best-effort attempt to get a UTF-8 encoded version of
// Value. Only MicrosoftUnicode (3,1 ,X), MacRomain (1,0,X) and Unicode platform
// strings are supported.
func (nameEntry *NameEntry) String() string {

	if nameEntry.PlatformID == PlatformUnicode || (nameEntry.PlatformID == PlatformMicrosoft &&
		nameEntry.EncodingID == PlatformEncodingMicrosoftUnicode) {

		decoder := unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder()

		outstr, _, err := transform.String(decoder, string(nameEntry.Value))

		if err == nil {
			return outstr
		}
	}

	if nameEntry.PlatformID == PlatformMac &&
		nameEntry.EncodingID == PlatformEncodingMacRoman {

		decoder := charmap.Macintosh.NewDecoder()

		outstr, _, err := transform.String(decoder, string(nameEntry.Value))

		if err == nil {
			return outstr
		}
	}

	return string(nameEntry.Value)
}
Пример #5
0
func Encode(code uint8, text string) []byte {
	switch code { // в зависимости от подходящей кодировки выбираем соответствующий метод кодирования
	case 8: // ucs8
		enc := unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewEncoder()
		es, _, _ := transform.Bytes(enc, []byte(text))
		return es
	case 3: // latin1
		es, _, _ := transform.Bytes(charmap.Windows1252.NewEncoder(), []byte(text))
		return es
	case 0: // декодируем в GSM 03.38
		var result bytes.Buffer
		for _, r := range text {
			if nr, ok := utf8GsmChars[r]; ok { // делаем замены известным символам
				result.WriteString(nr)
				continue
			}
			if r > '\u007F' { // удаляем все, что не входит в формат
				result.WriteRune('?')
				continue
			}
			result.WriteRune(r) // добавляем как есть
		}
		return result.Bytes()
	default:
		return []byte(text)
	}
}
Пример #6
0
// Decode from UCS2.
func (s UCS2) Decode() []byte {
	e := unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
	es, _, err := transform.Bytes(e.NewDecoder(), s)
	if err != nil {
		return s
	}
	return es
}
Пример #7
0
// read umeng file, need convert utf16-le(with bom) to utf8
func readFile(path string) (reader io.Reader, err error) {
	e := unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM)
	file, err := os.Open(path)

	if err != nil {
		return
	}

	reader = transform.NewReader(file, e.NewDecoder())
	return
}
Пример #8
0
// NewReader wraps a Reader to decode Unicode to UTF-8 as it reads.
func NewReader(r io.Reader, d EncodingHint) io.Reader {
	var decoder *encoding.Decoder
	switch d {
	case UTF8:
		// Make a transformer that assumes UTF-8 but abides by the BOM.
		decoder = unicode.UTF8.NewDecoder()
	case UTF16LE:
		// Make an tranformer that decodes MS-Windows (16LE) UTF files:
		winutf := unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
		// Make a transformer that is like winutf, but abides by BOM if found:
		decoder = winutf.NewDecoder()
	case UTF16BE:
		// Make an tranformer that decodes UTF-16BE files:
		utf16be := unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
		// Make a transformer that is like utf16be, but abides by BOM if found:
		decoder = utf16be.NewDecoder()
	}

	// Make a Reader that uses utf16bom:
	return transform.NewReader(r, unicode.BOMOverride(decoder))
}
Пример #9
0
func TestName(t *testing.T) {
	for i, tc := range []struct {
		desc string
		enc  encoding.Encoding
		name string
		err  error
	}{{
		"defined encoding",
		charmap.ISO8859_2,
		"iso-8859-2",
		nil,
	}, {
		"defined Unicode encoding",
		unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
		"utf-16be",
		nil,
	}, {
		"undefined Unicode encoding in HTML standard",
		unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
		"",
		errUnsupported,
	}, {
		"undefined other encoding in HTML standard",
		charmap.CodePage437,
		"",
		errUnsupported,
	}, {
		"unknown encoding",
		encoding.Nop,
		"",
		errUnknown,
	}} {
		name, err := Name(tc.enc)
		if name != tc.name || err != tc.err {
			t.Errorf("%d:%s: got %q, %v; want %q, %v", i, tc.desc, name, err, tc.name, tc.err)
		}
	}
}
Пример #10
0
func ExampleUTF8Validator() {
	for i := 0; i < 2; i++ {
		var transformer transform.Transformer
		transformer = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewEncoder()
		if i == 1 {
			transformer = transform.Chain(encoding.UTF8Validator, transformer)
		}
		dst := make([]byte, 256)
		src := []byte("abc\xffxyz") // src is invalid UTF-8.
		nDst, nSrc, err := transformer.Transform(dst, src, true)
		fmt.Printf("i=%d: produced %q, consumed %q, error %v\n",
			i, dst[:nDst], src[:nSrc], err)
	}
	// Output:
	// i=0: produced "\x00a\x00b\x00c\xff\xfd\x00x\x00y\x00z", consumed "abc\xffxyz", error <nil>
	// i=1: produced "\x00a\x00b\x00c", consumed "abc", error encoding: invalid UTF-8
}
Пример #11
0
// AddUnicodeEntry adds an entry to the name table for the 'Unicode' platform,
// with Default Encoding (UTF-16). It returns an error if the value cannot be
// represented in UTF-16.
func (table *TableName) AddUnicodeEntry(nameId NameID, value string) error {
	encoder := unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewEncoder()
	outstr, _, err := transform.String(encoder, value)
	if err != nil {
		return err
	}

	table.Add(&NameEntry{
		PlatformID: PlatformUnicode,
		EncodingID: PlatformEncodingUnicodeDefault,
		LanguageID: PlatformLanguageUnicodeDefault,
		NameID:     nameId,
		Value:      []byte(outstr),
	})

	return nil
}
Пример #12
0
func Decode(code uint8, text []byte) string {
	switch code {
	case 8: // UCS2
		es, _, _ := transform.Bytes(
			unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder(), text)
		return string(es)
	case 3: // latin1 (windows1252)
		es, _, _ := transform.Bytes(charmap.Windows1252.NewDecoder(), text)
		return string(es)
	case 0: // декодируем из формата GSM 03.38
		var result bytes.Buffer
		for _, r := range text {
			if nr, ok := gsmUtf8Chars[rune(r)]; ok { // делаем замены известным символам
				result.WriteString(nr)
				continue
			}
			result.WriteByte(r) // добавляем как есть
		}
		return result.String()
	default:
		return string(text)
	}
}
Пример #13
0
// Creates a scanner similar to os.Open() but decodes the file as UTF-16
// if the special byte order mark is present.
func newScannerUTF16or8(filename string) (utfScanner, error) {

	// Read the file into a []byte:
	file, err := os.Open(filename)
	if err != nil {
		return nil, err
	}

	// Check for BOM
	marker := make([]byte, 2)
	numread, err := io.ReadAtLeast(file, marker, 2)
	file.Seek(0, 0)
	if numread == 2 && err == nil && ((marker[0] == 0xFE && marker[1] == 0xFF) || (marker[0] == 0xFF && marker[1] == 0xFE)) {
		// Make an tranformer that converts MS-Win default to UTF8:
		win16be := unicode.UTF16(unicode.BigEndian, unicode.UseBOM)
		// Make a transformer that is like win16be, but abides by BOM:
		utf16bom := unicode.BOMOverride(win16be.NewDecoder())

		// Make a Reader that uses utf16bom:
		unicodeReader := transform.NewReader(file, utf16bom)
		return unicodeReader, nil
	}
	return file, nil
}
Пример #14
0
func GetCharset(charset string) encoding.Encoding {
	switch strings.ToUpper(charset) {
	case "GB2312", "GB18030":
		return simplifiedchinese.GB18030
	case "HZ-GB2312":
		return simplifiedchinese.HZGB2312
	case "GBK":
		return simplifiedchinese.GBK
	case "BIG5":
		return traditionalchinese.Big5
	case "EUC-JP":
		return japanese.EUCJP
	case "ISO2022JP":
		return japanese.ISO2022JP
	case "SHIFTJIS":
		return japanese.ShiftJIS
	case "EUC-KR":
		return korean.EUCKR
	case "UTF8", "UTF-8":
		return encoding.Nop
	case "UTF16-BOM", "UTF-16-BOM":
		return unicode.UTF16(unicode.BigEndian, unicode.UseBOM)
	case "UTF16-BE-BOM", "UTF-16-BE-BOM":
		return unicode.UTF16(unicode.BigEndian, unicode.UseBOM)
	case "UTF16-LE-BOM", "UTF-16-LE-BOM":
		return unicode.UTF16(unicode.LittleEndian, unicode.UseBOM)
	case "UTF16", "UTF-16":
		return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
	case "UTF16-BE", "UTF-16-BE":
		return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
	case "UTF16-LE", "UTF-16-LE":
		return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
	//case "UTF32", "UTF-32":
	//	return simplifiedchinese.GBK
	default:
		return nil
	}
}
Пример #15
0
func deployStartup(user *runtime.OSUser, configFile string, exePath string) error {
	scheduledTaskUTF8 := []byte(strings.Replace(`<?xml version="1.0" encoding="UTF-16"?>
<Task version="1.2" xmlns="http://schemas.microsoft.com/windows/2004/02/mit/task">
  <RegistrationInfo>
    <Date>2016-04-28T17:25:08.4654422</Date>
    <Author>GenericWorker</Author>
    <Description>Runs the generic worker.</Description>
  </RegistrationInfo>
  <Triggers>
    <LogonTrigger>
      <Enabled>true</Enabled>
      <UserId>GenericWorker</UserId>
    </LogonTrigger>
  </Triggers>
  <Principals>
    <Principal id="Author">
      <UserId>GenericWorker</UserId>
      <LogonType>InteractiveToken</LogonType>
      <RunLevel>HighestAvailable</RunLevel>
    </Principal>
  </Principals>
  <Settings>
    <MultipleInstancesPolicy>IgnoreNew</MultipleInstancesPolicy>
    <DisallowStartIfOnBatteries>true</DisallowStartIfOnBatteries>
    <StopIfGoingOnBatteries>true</StopIfGoingOnBatteries>
    <AllowHardTerminate>true</AllowHardTerminate>
    <StartWhenAvailable>false</StartWhenAvailable>
    <RunOnlyIfNetworkAvailable>false</RunOnlyIfNetworkAvailable>
    <IdleSettings>
      <StopOnIdleEnd>true</StopOnIdleEnd>
      <RestartOnIdle>false</RestartOnIdle>
    </IdleSettings>
    <AllowStartOnDemand>true</AllowStartOnDemand>
    <Enabled>true</Enabled>
    <Hidden>false</Hidden>
    <RunOnlyIfIdle>false</RunOnlyIfIdle>
    <WakeToRun>false</WakeToRun>
    <ExecutionTimeLimit>PT0S</ExecutionTimeLimit>
    <Priority>3</Priority>
  </Settings>
  <Actions Context="Author">
    <Exec>
      <Command>C:\generic-worker\run-generic-worker.bat</Command>
    </Exec>
  </Actions>
</Task>`, "\n", "\r\n", -1))
	utf16Encoder := unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewEncoder()
	scheduledTaskUTF16, err := utf16Encoder.Bytes(scheduledTaskUTF8)
	if err != nil {
		return fmt.Errorf("INTERNAL ERROR: Could not UTF16-encode (static) scheduled task: %s\n\nError received: %s", scheduledTaskUTF8, err)
	}
	xmlFilePath := filepath.Join(filepath.Dir(exePath), "Run Generic Worker.xml")
	err = ioutil.WriteFile(xmlFilePath, scheduledTaskUTF16, 0644)
	if err != nil {
		return fmt.Errorf("I was not able to write the file \"Run Generic Worker.xml\" to file location %q with 0644 permissions, due to: %s", xmlFilePath, err)
	}
	err = runtime.RunCommands(false, []string{"schtasks", "/create", "/tn", "Run Generic Worker on login", "/xml", xmlFilePath})
	if err != nil {
		return fmt.Errorf("Not able to schedule task \"Run Generic Worker on login\" using schtasks command, due to error: %s\n\nAlso see stderr/stdout logs for output of the command that failed.", err)
	}
	k, _, err := registry.CreateKey(registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon`, registry.WRITE)
	if err != nil {
		return fmt.Errorf(`Was not able to create registry key 'SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon' due to %s`, err)
	}
	defer k.Close()
	err = k.SetDWordValue("AutoAdminLogon", 1)
	if err != nil {
		return fmt.Errorf(`Was not able to set registry entry 'SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon\AutoAdminLogon' to 1 due to %s`, err)
	}
	err = k.SetStringValue("DefaultUserName", user.Name)
	if err != nil {
		return fmt.Errorf(`Was not able to set registry entry 'SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon\DefaultUserName' to %q due to %s`, user.Name, err)
	}
	err = k.SetStringValue("DefaultPassword", user.Password)
	if err != nil {
		return fmt.Errorf(`Was not able to set registry entry 'SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon\DefaultPassword' to %q due to %s`, user.Password, err)
	}

	batScriptFilePath := filepath.Join(filepath.Dir(exePath), "run-generic-worker.bat")
	batScriptContents := []byte(strings.Join([]string{
		`:: run the generic worker`,
		``,
		`:: cd to folder containing this script`,
		`pushd %~dp0`,
		``,
		`.\generic-worker.exe run --configure-for-aws > .\generic-worker.log 2>&1`,
	}, "\r\n"))
	err = ioutil.WriteFile(batScriptFilePath, batScriptContents, 0755)
	if err != nil {
		return fmt.Errorf("Was not able to create file %q with access permissions 0755 due to %s", batScriptFilePath, err)
	}
	return nil
}
Пример #16
0
		nDst, nSrc, err := encoding.UTF8Validator.Transform(dst, []byte(tc.src), tc.atEOF)
		if nDst < 0 || len(dst) < nDst {
			t.Errorf("%s: nDst=%d out of range", tc.desc, nDst)
			continue
		}
		got := string(dst[:nDst])
		if got != tc.want || nSrc != len(tc.want) || err != tc.wantErr {
			t.Errorf("%s:\ngot  %+q, %d, %v\nwant %+q, %d, %v",
				tc.desc, got, nSrc, err, tc.want, len(tc.want), tc.wantErr)
			continue
		}
	}
}

var (
	utf16LEIB = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) // UTF-16LE (atypical interpretation)
	utf16LEUB = unicode.UTF16(unicode.LittleEndian, unicode.UseBOM)    // UTF-16, LE
	utf16LEEB = unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM) // UTF-16, LE, Expect
	utf16BEIB = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)    // UTF-16BE (atypical interpretation)
	utf16BEUB = unicode.UTF16(unicode.BigEndian, unicode.UseBOM)       // UTF-16 default
	utf16BEEB = unicode.UTF16(unicode.BigEndian, unicode.ExpectBOM)    // UTF-16 Expect
)

func TestUTF16(t *testing.T) {
	testCases := []struct {
		desc    string
		src     string
		notEOF  bool // the inverse of atEOF
		sizeDst int
		want    string
		nSrc    int
Пример #17
0
		utf8:    "Heļlo",
	},
	{
		e:       charmap.Windows1258,
		encoded: "Hell\xf5",
		utf8:    "Hellơ",
	},
	{
		e:       charmap.XUserDefined,
		encoded: "\x00\x40\x7f\x80\xab\xff",
		utf8:    "\u0000\u0040\u007f\uf780\uf7ab\uf7ff",
	},

	// UTF-16 tests.
	{
		e:       unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
		encoded: "\x00\x57\x00\xe4\xd8\x35\xdd\x65",
		utf8:    "\x57\u00e4\U0001d565",
	},
	{
		e:         unicode.UTF16(unicode.BigEndian, unicode.ExpectBOM),
		encPrefix: "\xfe\xff",
		encoded:   "\x00\x57\x00\xe4\xd8\x35\xdd\x65",
		utf8:      "\x57\u00e4\U0001d565",
	},
	{
		e:       unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
		encoded: "\x57\x00\xe4\x00\x35\xd8\x65\xdd",
		utf8:    "\x57\u00e4\U0001d565",
	},
	{
Пример #18
0
	for i, t := range tickets {
		ns[i] = getNode(t)
	}
	return ns
}

func getNode(t string) *srvpb.Node {
	for _, n := range nodes {
		if n.Ticket == t {
			return n
		}
	}
	return &srvpb.Node{Ticket: t}
}

var utf16LE = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)

func encodeText(e encoding.Encoding, text string) []byte {
	res, _, err := transform.Bytes(e.NewEncoder(), []byte(text))
	if err != nil {
		panic(err)
	}
	return res
}

func TestNodes(t *testing.T) {
	st := tbl.Construct(t)

	for _, node := range tbl.Nodes {
		reply, err := st.Nodes(ctx, &xpb.NodesRequest{
			Ticket: []string{node.Ticket},
Пример #19
0
var (
	charsetDetector  = chardet.NewTextDetector()
	charsetDetectors = map[string]encoding.Encoding{
		"Big5":         traditionalchinese.Big5,
		"EUC-JP":       japanese.EUCJP,
		"EUC-KR":       korean.EUCKR,
		"GB-18030":     simplifiedchinese.GB18030,
		"ISO-2022-JP":  japanese.ISO2022JP,
		"ISO-8859-5":   charmap.ISO8859_5,
		"ISO-8859-6":   charmap.ISO8859_6,
		"ISO-8859-7":   charmap.ISO8859_7,
		"ISO-8859-8":   charmap.ISO8859_8,
		"ISO-8859-8-I": charmap.ISO8859_8I,
		"KOI8-R":       charmap.KOI8R,
		"Shift_JIS":    japanese.ShiftJIS,
		"UTF-16BE":     unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
		"UTF-16LE":     unicode.UTF16(unicode.LittleEndian, unicode.UseBOM),
		"windows-1251": charmap.Windows1251,
		"windows-1252": charmap.Windows1252,
		"windows-1253": charmap.Windows1253,
		"windows-1254": charmap.Windows1254,
		"windows-1255": charmap.Windows1255,
		"windows-1256": charmap.Windows1256,

		// Decoders don't exist for these
		// "IBM420_ltr":   nil,
		// "IBM420_rtl":   nil,
		// "IBM424_ltr":   nil,
		// "IBM424_rtl":   nil,
		// "ISO-2022-CN":  nil,
		// "ISO-2022-KR":  nil,
Пример #20
0
	"x-sjis":              {japanese.ShiftJIS, "shift_jis"},
	"cseuckr":             {korean.EUCKR, "euc-kr"},
	"csksc56011987":       {korean.EUCKR, "euc-kr"},
	"euc-kr":              {korean.EUCKR, "euc-kr"},
	"iso-ir-149":          {korean.EUCKR, "euc-kr"},
	"korean":              {korean.EUCKR, "euc-kr"},
	"ks_c_5601-1987":      {korean.EUCKR, "euc-kr"},
	"ks_c_5601-1989":      {korean.EUCKR, "euc-kr"},
	"ksc5601":             {korean.EUCKR, "euc-kr"},
	"ksc_5601":            {korean.EUCKR, "euc-kr"},
	"windows-949":         {korean.EUCKR, "euc-kr"},
	"csiso2022kr":         {encoding.Replacement, "replacement"},
	"iso-2022-kr":         {encoding.Replacement, "replacement"},
	"iso-2022-cn":         {encoding.Replacement, "replacement"},
	"iso-2022-cn-ext":     {encoding.Replacement, "replacement"},
	"utf-16be":            {unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), "utf-16be"},
	"utf-16":              {unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "utf-16le"},
	"utf-16le":            {unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "utf-16le"},
	"x-user-defined":      {charmap.XUserDefined, "x-user-defined"},
}

var charsetRegexp *regexp.Regexp
var errParsingCharset = errors.New("Could not find a valid charset in the HTML body")

func ConvertToUTF8String(charset string, textBytes []byte) (string, error) {
	if strings.ToLower(charset) == "utf-8" {
		return string(textBytes), nil
	}
	item, ok := encodings[strings.ToLower(charset)]
	if !ok {
		return "", fmt.Errorf("Unsupport charset %s", charset)
Пример #21
0
	iso8859_13:        charmap.ISO8859_13,
	iso8859_14:        charmap.ISO8859_14,
	iso8859_15:        charmap.ISO8859_15,
	iso8859_16:        charmap.ISO8859_16,
	koi8r:             charmap.KOI8R,
	koi8u:             charmap.KOI8U,
	macintosh:         charmap.Macintosh,
	windows874:        charmap.Windows874,
	windows1250:       charmap.Windows1250,
	windows1251:       charmap.Windows1251,
	windows1252:       charmap.Windows1252,
	windows1253:       charmap.Windows1253,
	windows1254:       charmap.Windows1254,
	windows1255:       charmap.Windows1255,
	windows1256:       charmap.Windows1256,
	windows1257:       charmap.Windows1257,
	windows1258:       charmap.Windows1258,
	macintoshCyrillic: charmap.MacintoshCyrillic,
	gbk:               simplifiedchinese.GBK,
	gb18030:           simplifiedchinese.GB18030,
	big5:              traditionalchinese.Big5,
	eucjp:             japanese.EUCJP,
	iso2022jp:         japanese.ISO2022JP,
	shiftJIS:          japanese.ShiftJIS,
	euckr:             korean.EUCKR,
	replacement:       encoding.Replacement,
	utf16be:           unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
	utf16le:           unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
	xUserDefined:      charmap.XUserDefined,
}
Пример #22
0
// utf16 BOM based encodings. Only seekable data sources are supported for
// the need to check the optional Byte Order Marker being available in data source
// before configuring the actual decoder and encoder.
var (
	// BOM is required, as no fallback is specified
	utf16BOMRequired = utf16BOM(unknownEndianess)

	// BOM is optional. Falls back to BigEndian if missing
	utf16BOMBigEndian = utf16BOM(bigEndian)

	// BOM is optional. Falls back to LittleEndian if missing
	utf16BOMLittleEndian = utf16BOM(littleEndian)
)

var utf16Map = map[endianness]Encoding{
	bigEndian:    unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
	littleEndian: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
}

func utf16BOM(e endianness) EncodingFactory {
	return func(in_ io.Reader) (Encoding, error) {
		in, ok := in_.(io.ReadSeeker)
		if !ok {
			return nil, ErrUnsupportedSourceTypeBOM
		}

		return utf16Seekable(in, e)
	}
}

func utf16Seekable(in io.ReadSeeker, endianness endianness) (Encoding, error) {
Пример #23
0
func TestUtf16BOMEncodings(t *testing.T) {
	expectedLE := utf16Map[littleEndian]
	expectedBE := utf16Map[bigEndian]

	var tests = []struct {
		name             string
		testEndianess    unicode.Endianness
		testBOMPolicy    unicode.BOMPolicy
		expectedEncoding Encoding
		expectedError    error
		expectedOffset   int
	}{
		{"utf-16-bom",
			unicode.BigEndian, unicode.ExpectBOM, expectedBE, nil, 2},
		{"utf-16-bom",
			unicode.BigEndian, unicode.IgnoreBOM, nil, unicode.ErrMissingBOM, 0},
		{"utf-16-bom",
			unicode.LittleEndian, unicode.ExpectBOM, expectedLE, nil, 2},
		{"utf-16-bom",
			unicode.LittleEndian, unicode.IgnoreBOM, nil, unicode.ErrMissingBOM, 0},

		// big endian based encoding
		{"utf-16be-bom",
			unicode.BigEndian, unicode.ExpectBOM, expectedBE, nil, 2},
		{"utf-16be-bom",
			unicode.BigEndian, unicode.IgnoreBOM, expectedBE, nil, 0},
		{"utf-16be-bom",
			unicode.LittleEndian, unicode.ExpectBOM, expectedLE, nil, 2},

		// little endian baed encoding
		{"utf-16le-bom",
			unicode.LittleEndian, unicode.ExpectBOM, expectedLE, nil, 2},
		{"utf-16le-bom",
			unicode.LittleEndian, unicode.IgnoreBOM, expectedLE, nil, 0},
		{"utf-16le-bom",
			unicode.BigEndian, unicode.ExpectBOM, expectedBE, nil, 2},
	}

	text := []byte("hello world")

	for _, test := range tests {
		t.Logf("testing: codec=%v, bigendian=%v, bomPolicy=%v",
			test.name, test.testEndianess, test.testBOMPolicy)

		buf := bytes.NewBuffer(nil)
		writeEncoding := unicode.UTF16(test.testEndianess, test.testBOMPolicy)
		writer := transform.NewWriter(buf, writeEncoding.NewEncoder())
		writer.Write(text)
		writer.Close()

		rawReader := bytes.NewReader(buf.Bytes())
		contentLen := rawReader.Len()
		encodingFactory, ok := FindEncoding(test.name)
		if !ok {
			t.Errorf("Failed to load encoding: %v", test.name)
			continue
		}

		encoding, err := encodingFactory(rawReader)
		contentOffset := contentLen - rawReader.Len()

		assert.Equal(t, test.expectedEncoding, encoding)
		assert.Equal(t, test.expectedError, err)
		assert.Equal(t, test.expectedOffset, contentOffset)
		if err == nil {
			reader := transform.NewReader(rawReader, encoding.NewDecoder())
			content, _ := ioutil.ReadAll(reader)
			assert.Equal(t, text, content)
		}
	}
}
Пример #24
0
// Decoder wraps a reader for decoding input to utf-8 on read.
type Decoder func(io.Reader) io.Reader

var encodings = map[string]Decoder{
	// default
	"nop":   Plain,
	"plain": Plain,

	// utf8 (validate input) - shadow htmlindex utf8 codecs not validating input
	"unicode-1-1-utf-8": trans(encoding.UTF8Validator),
	"utf-8":             trans(encoding.UTF8Validator),
	"utf8":              trans(encoding.UTF8Validator),

	// utf16
	"utf-16be-bom": enc(unicode.UTF16(unicode.BigEndian, unicode.UseBOM)),

	// simplified chinese
	"gbk": enc(simplifiedchinese.GBK), // shadow htmlindex using 'GB10830' for GBK

	// 8bit charmap encodings
	"iso8859-6e": enc(charmap.ISO8859_6E),
	"iso8859-6i": enc(charmap.ISO8859_6I),
	"iso8859-8e": enc(charmap.ISO8859_8E),
	"iso8859-8i": enc(charmap.ISO8859_8I),
}

// Plain file encoding not transforming any read bytes.
var Plain = nopEnc

// Find returns
Пример #25
0
	"golang.org/x/text/transform"
)

// Decoder wraps a reader for decoding input to utf-8 on read.
type Decoder func(io.Reader) io.Reader

var encodings = map[string]Decoder{
	// default
	"nop":   Plain,
	"plain": Plain,

	// utf8 (validate input)
	"utf-8": trans(encoding.UTF8Validator),

	// utf16
	"utf-16be-bom": enc(unicode.UTF16(unicode.BigEndian, unicode.UseBOM)),
	"utf-16be":     enc(unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)),
	"utf-16le":     enc(unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)),

	// traditional chinese
	"big5": enc(traditionalchinese.Big5),

	// simplified chinese
	"gb18030":  enc(simplifiedchinese.GB18030),
	"gbk":      enc(simplifiedchinese.GBK),
	"hzgb2312": enc(simplifiedchinese.HZGB2312),

	// korean
	"euckr": enc(korean.EUCKR),

	// japanese