// TestNonRepertoire tests that codes outside of an Encoding's repertoire are // converted: // - to the Unicode replacement character '\ufffd' when decoding to UTF-8, // - to the ASCII substitute character '\x1a' when encoding from UTF-8. func TestNonRepertoire(t *testing.T) { testCases := []struct { e encoding.Encoding dSrc, eSrc string }{ {charmap.Windows1252, "\x81", "갂"}, {japanese.EUCJP, "\xfe\xfc", "갂"}, {japanese.ISO2022JP, "\x1b$B\x7e\x7e", "갂"}, {japanese.ShiftJIS, "\xef\xfc", "갂"}, {korean.EUCKR, "\xfe\xfe", "א"}, {simplifiedchinese.GBK, "\xfe\xfe", "갂"}, {simplifiedchinese.HZGB2312, "~{z~", "갂"}, {traditionalchinese.Big5, "\x81\x40", "갂"}, } for _, tc := range testCases { for _, direction := range []string{"Decode", "Encode"} { enc, want, src := (transform.Transformer)(nil), "", "" if direction == "Decode" { enc, want, src = tc.e.NewDecoder(), "\ufffd", tc.dSrc } else { enc, want, src = tc.e.NewEncoder(), "\x1a", tc.eSrc } dst, err := ioutil.ReadAll(transform.NewReader(strings.NewReader(src), enc)) if err != nil { t.Errorf("%s %v: %v", direction, tc.e, err) continue } if got := string(dst); got != want { t.Errorf("%s %v:\ngot %q\nwant %q", direction, tc.e, got, want) continue } } } }
func TestBasics(t *testing.T) { for _, tc := range basicTestCases { for _, direction := range []string{"Decode", "Encode"} { newTransformer, want, src := (func() transform.Transformer)(nil), "", "" wPrefix, sPrefix := "", "" if direction == "Decode" { newTransformer, want, src = tc.e.NewDecoder, tc.utf8, tc.encoded wPrefix, sPrefix = "", tc.encPrefix } else { newTransformer, want, src = tc.e.NewEncoder, tc.encoded, tc.utf8 wPrefix, sPrefix = tc.encPrefix, "" } dst := make([]byte, len(wPrefix)+len(want)) nDst, nSrc, err := newTransformer().Transform(dst, []byte(sPrefix+src), true) if err != nil { t.Errorf("%v: %s: %v", tc.e, direction, err) continue } if nDst != len(wPrefix)+len(want) { t.Errorf("%v: %s: nDst got %d, want %d", tc.e, direction, nDst, len(wPrefix)+len(want)) continue } if nSrc != len(sPrefix)+len(src) { t.Errorf("%v: %s: nSrc got %d, want %d", tc.e, direction, nSrc, len(sPrefix)+len(src)) continue } if got := string(dst); got != wPrefix+want { t.Errorf("%v: %s:\ngot %q\nwant %q", tc.e, direction, got, wPrefix+want) continue } for _, n := range []int{0, 1, 2, 10, 123, 4567} { input := sPrefix + strings.Repeat(src, n) sr := strings.NewReader(input) g, err := ioutil.ReadAll(transform.NewReader(sr, newTransformer())) if err != nil { t.Errorf("%v: %s: ReadAll: n=%d: %v", tc.e, direction, n, err) continue } if len(g) == 0 && len(input) == 0 { // If the input is empty then the output can be empty, // regardless of whatever wPrefix is. continue } got1, want1 := string(g), wPrefix+strings.Repeat(want, n) if got1 != want1 { t.Errorf("%v: %s: ReadAll: n=%d\ngot %q\nwant %q", tc.e, direction, n, trim(got1), trim(want1)) continue } } } } }
func benchmark(b *testing.B, direction string, enc encoding.Encoding) { _, src, newTransformer, err := load(direction, enc) if err != nil { b.Fatal(err) } b.SetBytes(int64(len(src))) b.ResetTimer() for i := 0; i < b.N; i++ { r := transform.NewReader(bytes.NewReader(src), newTransformer()) io.Copy(ioutil.Discard, r) } }
// TestBig5CircumflexAndMacron tests the special cases listed in // http://encoding.spec.whatwg.org/#big5 // Note that these special cases aren't preserved by round-tripping through // decoding and encoding (since // http://encoding.spec.whatwg.org/index-big5.txt does not have an entry for // U+0304 or U+030C), so we can't test this in TestBasics. func TestBig5CircumflexAndMacron(t *testing.T) { src := "\x88\x5f\x88\x60\x88\x61\x88\x62\x88\x63\x88\x64\x88\x65\x88\x66 " + "\x88\xa2\x88\xa3\x88\xa4\x88\xa5\x88\xa6" want := "ÓǑÒ\u00ca\u0304Ế\u00ca\u030cỀÊ " + "ü\u00ea\u0304ế\u00ea\u030cề" dst, err := ioutil.ReadAll(transform.NewReader( strings.NewReader(src), traditionalchinese.Big5.NewDecoder())) if err != nil { t.Fatal(err) } if got := string(dst); got != want { t.Fatalf("\ngot %q\nwant %q", got, want) } }
// Parse the html while handling the charset func ParseHTML(r io.Reader, cs string) (*html.Node, error) { var err error if cs == "" { // attempt to guess the charset of the HTML document r, err = charset.NewReader(r, "") if err != nil { return nil, err } } else { // let the user specify the charset e, name := charset.Lookup(cs) if name == "" { return nil, fmt.Errorf("'%s' is not a valid charset", cs) } r = transform.NewReader(r, e.NewDecoder()) } return html.Parse(r) }
// NewReader returns an io.Reader that converts the content of r to UTF-8. // It calls DetermineEncoding to find out what r's encoding is. func NewReader(r io.Reader, contentType string) (io.Reader, error) { preview := make([]byte, 1024) n, err := io.ReadFull(r, preview) switch { case err == io.ErrUnexpectedEOF: preview = preview[:n] r = bytes.NewReader(preview) case err != nil: return nil, err default: r = io.MultiReader(bytes.NewReader(preview), r) } if e, _, _ := DetermineEncoding(preview, contentType); e != encoding.Nop { r = transform.NewReader(r, e.NewDecoder()) } return r, nil }
func TestFiles(t *testing.T) { for _, dir := range []string{"Decode", "Encode"} { for _, tf := range testdataFiles { dst, src, newTransformer, err := load(dir, tf.enc) if err != nil { t.Errorf("%s, %s: load: %v", dir, tf.enc, err) continue } buf := bytes.NewBuffer(nil) r := transform.NewReader(bytes.NewReader(src), newTransformer()) if _, err := io.Copy(buf, r); err != nil { t.Errorf("%s, %s: copy: %v", dir, tf.enc, err) continue } if !bytes.Equal(buf.Bytes(), dst) { t.Errorf("%s, %s: transformed bytes did not match golden file", dir, tf.enc) continue } } } }
func TestReplacement(t *testing.T) { for _, direction := range []string{"Decode", "Encode"} { enc, want := (transform.Transformer)(nil), "" if direction == "Decode" { enc = encoding.Replacement.NewDecoder() want = "\ufffd" } else { enc = encoding.Replacement.NewEncoder() want = "AB\x00CD\ufffdYZ" } sr := strings.NewReader("AB\x00CD\x80YZ") g, err := ioutil.ReadAll(transform.NewReader(sr, enc)) if err != nil { t.Errorf("%s: ReadAll: %v", direction, err) continue } if got := string(g); got != want { t.Errorf("%s:\ngot %q\nwant %q", direction, got, want) continue } } }
func TestReader(t *testing.T) { switch runtime.GOOS { case "nacl": // platforms that don't permit direct file system access t.Skipf("not supported on %q", runtime.GOOS) } for _, tc := range sniffTestCases { content, err := ioutil.ReadFile("testdata/" + tc.filename) if err != nil { t.Errorf("%s: error reading file: %v", tc.filename, err) continue } r, err := NewReader(bytes.NewReader(content), tc.declared) if err != nil { t.Errorf("%s: error creating reader: %v", tc.filename, err) continue } got, err := ioutil.ReadAll(r) if err != nil { t.Errorf("%s: error reading from charset.NewReader: %v", tc.filename, err) continue } e, _ := Lookup(tc.want) want, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader(content), e.NewDecoder())) if err != nil { t.Errorf("%s: error decoding with hard-coded charset name: %v", tc.filename, err) continue } if !bytes.Equal(got, want) { t.Errorf("%s: got %q, want %q", tc.filename, got, want) continue } } }
func ExampleDecodeWindows1252() { sr := strings.NewReader("Gar\xe7on !") tr := transform.NewReader(sr, charmap.Windows1252.NewDecoder()) io.Copy(os.Stdout, tr) // Output: Garçon ! }
func transformString(t transform.Transformer, s string) (string, error) { r := transform.NewReader(strings.NewReader(s), t) b, err := ioutil.ReadAll(r) return string(b), err }