func (test translateTest) run(t *testing.T) { cs := charset.Info(test.charset) if cs == nil { t.Fatalf("no info found for %q", test.charset) } fromtr, err := charset.TranslatorFrom(test.charset) if err != nil { t.Fatalf("error making translator from %q: %v", test.charset, err) } out, err := translate(fromtr, test.in) if err != nil { t.Fatalf("error translating from %q: %v", test.charset, err) } if out != test.out { t.Fatalf("error translating from %q: expected %x got %x", test.charset, test.out, out) } if cs.NoTo || !test.canRoundTrip { return } totr, err := charset.TranslatorTo(test.charset) if err != nil { t.Fatalf("error making translator to %q: %v", test.charset, err) } in, err := translate(totr, out) if err != nil { t.Fatalf("error translating to %q: %v", test.charset, err) } if in != test.in { t.Fatalf("%q round trip conversion failed; expected %x got %x", test.charset, test.in, in) } }
// Will receive an input stream which would convert the response to utf-8 // The given function must close the reader r, in order to close the response body. func HandleStringReader(f func(r io.Reader, ctx *goproxy.ProxyCtx) io.Reader) goproxy.RespHandler { return goproxy.FuncRespHandler(func(resp *http.Response, ctx *goproxy.ProxyCtx) *http.Response { if ctx.Error != nil { return nil } charsetName := ctx.Charset() if charsetName == "" { charsetName = "utf-8" } if strings.ToLower(charsetName) != "utf-8" { r, err := charset.NewReader(charsetName, resp.Body) if err != nil { ctx.Warnf("Cannot convert from %v to utf-8: %v", charsetName, err) return resp } tr, err := charset.TranslatorTo(charsetName) if err != nil { ctx.Warnf("Can't translate to %v from utf-8: %v", charsetName, err) return resp } if err != nil { ctx.Warnf("Cannot translate to %v: %v", charsetName, err) return resp } newr := charset.NewTranslatingReader(f(r, ctx), tr) resp.Body = &readFirstCloseBoth{ioutil.NopCloser(newr), resp.Body} } else { //no translation is needed, already at utf-8 resp.Body = &readFirstCloseBoth{ioutil.NopCloser(f(resp.Body, ctx)), resp.Body} } return resp }) }
func main() { s := "Hello, \x90\xA2\x8A\x45" // CP932 encoded version of "Hello, 世界" , 这里的 s 是string类型,说明string没有字符集的概念 r, _ := charset.NewReader("CP932", strings.NewReader(s)) // convert from CP932 to UTF-8 s2_, _ := ioutil.ReadAll(r) s2 := string(s2_) fmt.Println(s2) // => Hello, 世界 fmt.Println(len(s2)) // => 13 fmt.Println(utf8.RuneCountInString(s2)) // => 9 fmt.Println(utf8.ValidString(s2)) // => true fmt.Println(utf8.ValidString(s)) // => false fmt.Printf("%T|%#v\n", s, s) // 注意 %v 与 %#v 的区别 ss := "This is not utf-8 string \xa1" fmt.Println(utf8.ValidString(ss)) // => false pice := []int32{20, 30, 40, 90} sss := string(pice) // string 似乎执行了内存拷贝,但是不会涉及到字符集的处理(转换或校验) fmt.Printf("%T:%p %T:%p:%d\n", pice, pice, sss, &sss, len(sss)) // 为什么打印字符串变量的地址还需要取地址符 tr, err := charset.TranslatorTo("windows-1252") //需要检查字符集列表 if err != nil { fmt.Println(err) os.Exit(1) } _, gbk, err2 := tr.Translate([]byte("utf-8汉字"), true) if err2 != nil { fmt.Println(err2) os.Exit(1) } fmt.Println(gbk) }