func MakeEncIconv(charset string, excludeAscii bool) func(char int) (bool, string) { return func(char int) (bool, string) { s := string(char) if excludeAscii && (char < 128) { return false, "" } // workaround for bug in glibc iconv implementation of shift_jis encoder if charset == "shift_jis" { if char == 0x5c { return false, "" } if char == 0x7e { return false, "" } } out, err := iconv.Conv(charset, "UTF-8", s) if err != nil { return false, "" } if len(out) == 0 { return false, "" } return true, encodeBytes(out) } }
func testUnidecode() { for i := 0; i <= 0x10FFFF; i++ { if (i & 0xFFFF) == 0 { fmt.Printf("Examining %x\n", i) } if okUtf8, redec, explanation := DecUtf8([]byte(string(i))); !okUtf8 { panic(fmt.Sprintf("Decoding of utf-8 encoded %d failed with reason: %s", i, explanation)) } else { if redec != i { panic(fmt.Sprintf("Decoding of utf-8 encoded %d erroneous, returned %d", i, redec)) } } if (i >= 0xd800) && (i <= 0xdfff) { continue } if utf16LEStr, err := iconv.Conv("UTF-16LE", "UTF-8", string(i)); err != nil { panic(fmt.Sprintf("Iconv error at %x: %s", i, err)) } else { if okUtf16, redec, explanation := DecUtf16LE([]byte(utf16LEStr)); !okUtf16 { panic(fmt.Sprintf("Decoding of utf-16le encoded %d failed with reason: %s", i, explanation)) } else { if redec != i { panic(fmt.Sprintf("Decoding of utf-16le encoded %d erroneous, returned %d", i, redec)) } } } if utf16BEStr, err := iconv.Conv("UTF-16BE", "UTF-8", string(i)); err != nil { panic(fmt.Sprintf("Iconv error at %x: %s", i, err)) } else { if okUtf16, redec, explanation := DecUtf16BE([]byte(utf16BEStr)); !okUtf16 { panic(fmt.Sprintf("Decoding of utf-16be encoded %d failed with reason: %s", i, explanation)) } else { if redec != i { panic(fmt.Sprintf("Decoding of utf-16be encoded %d erroneous, returned %d", i, redec)) } } } } }
func IconvDecoder(in []byte, charset string) (bool, int, string) { out, err := iconv.Conv("UTF-8", charset, string(in)) if err != nil { return false, -1, "Rejected by iconv" } if len(out) == 0 { return false, -1, "Rejected by iconv" } if len(out) > 1 { return false, -1, "More than one character encoded" } //fmt.Printf("Output %s\n", out) return true, []int(out)[0], "" }
func testJIS() { count := 0 for i := 0; i <= 0x10FFFF; i++ { if (i & 0xFFFF) == 0 { fmt.Printf("Examining %x (%d)\n", i, count) } // workaround for bug in eglibc iconv implementation of shift_jis encoder if i == 0x5c { continue } if i == 0x7e { continue } // other workaround for adaptivity in eglibc iconv implementation if i == 0xffe0 { continue } if i == 0xffe1 { continue } if i == 0xffe2 { continue } if shiftJISStr, err := iconv.Conv("shift_jis", "UTF-8", string(i)); (err == nil) && (len(shiftJISStr) > 0) { //fmt.Printf("Input: %s\n", string(i)) //if (len(shiftJISStr) > 1) { // fmt.Printf("Stringa shift-jis: %x %x\n", []byte(shiftJISStr)[0], []byte(shiftJISStr)[1]) //} count++ ok, out, reason := ShiftJISDecoder([]byte(shiftJISStr)) if !ok { panic(fmt.Sprintf("Error decoding encoded shift jis character at codepoint %d: %s", i, reason)) } if out != i { panic(fmt.Sprintf("Decoding mismatch for character at codepoint %x, returned %x", i, out)) } } } fmt.Printf("Examined %d characters\n", count) }