예제 #1
0
func (test translateTest) run(t *testing.T) {
	cs := charset.Info(test.charset)
	if cs == nil {
		t.Fatalf("no info found for %q", test.charset)
	}
	fromtr, err := charset.TranslatorFrom(test.charset)
	if err != nil {
		t.Fatalf("error making translator from %q: %v", test.charset, err)
	}
	out, err := translate(fromtr, test.in)
	if err != nil {
		t.Fatalf("error translating from %q: %v", test.charset, err)
	}
	if out != test.out {
		t.Fatalf("error translating from %q: expected %x got %x", test.charset, test.out, out)
	}

	if cs.NoTo || !test.canRoundTrip {
		return
	}

	totr, err := charset.TranslatorTo(test.charset)
	if err != nil {
		t.Fatalf("error making translator to %q: %v", test.charset, err)
	}
	in, err := translate(totr, out)
	if err != nil {
		t.Fatalf("error translating to %q: %v", test.charset, err)
	}
	if in != test.in {
		t.Fatalf("%q round trip conversion failed; expected %x got %x", test.charset, test.in, in)
	}
}
예제 #2
0
파일: html.go 프로젝트: Clarifai/kubernetes
// Will receive an input stream which would convert the response to utf-8
// The given function must close the reader r, in order to close the response body.
func HandleStringReader(f func(r io.Reader, ctx *goproxy.ProxyCtx) io.Reader) goproxy.RespHandler {
	return goproxy.FuncRespHandler(func(resp *http.Response, ctx *goproxy.ProxyCtx) *http.Response {
		if ctx.Error != nil {
			return nil
		}
		charsetName := ctx.Charset()
		if charsetName == "" {
			charsetName = "utf-8"
		}

		if strings.ToLower(charsetName) != "utf-8" {
			r, err := charset.NewReader(charsetName, resp.Body)
			if err != nil {
				ctx.Warnf("Cannot convert from %v to utf-8: %v", charsetName, err)
				return resp
			}
			tr, err := charset.TranslatorTo(charsetName)
			if err != nil {
				ctx.Warnf("Can't translate to %v from utf-8: %v", charsetName, err)
				return resp
			}
			if err != nil {
				ctx.Warnf("Cannot translate to %v: %v", charsetName, err)
				return resp
			}
			newr := charset.NewTranslatingReader(f(r, ctx), tr)
			resp.Body = &readFirstCloseBoth{ioutil.NopCloser(newr), resp.Body}
		} else {
			//no translation is needed, already at utf-8
			resp.Body = &readFirstCloseBoth{ioutil.NopCloser(f(resp.Body, ctx)), resp.Body}
		}
		return resp
	})
}
예제 #3
0
func main() {
	s := "Hello, \x90\xA2\x8A\x45" // CP932 encoded version of "Hello, 世界" , 这里的 s 是string类型,说明string没有字符集的概念

	r, _ := charset.NewReader("CP932", strings.NewReader(s)) // convert from CP932 to UTF-8
	s2_, _ := ioutil.ReadAll(r)
	s2 := string(s2_)
	fmt.Println(s2)                         // => Hello, 世界
	fmt.Println(len(s2))                    // => 13
	fmt.Println(utf8.RuneCountInString(s2)) // => 9
	fmt.Println(utf8.ValidString(s2))       // => true
	fmt.Println(utf8.ValidString(s))        // => false
	fmt.Printf("%T|%#v\n", s, s)            // 注意 %v 与 %#v 的区别

	ss := "This is not utf-8 string \xa1"
	fmt.Println(utf8.ValidString(ss)) // => false

	pice := []int32{20, 30, 40, 90}
	sss := string(pice)                                             // string 似乎执行了内存拷贝,但是不会涉及到字符集的处理(转换或校验)
	fmt.Printf("%T:%p %T:%p:%d\n", pice, pice, sss, &sss, len(sss)) // 为什么打印字符串变量的地址还需要取地址符

	tr, err := charset.TranslatorTo("windows-1252") //需要检查字符集列表
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}
	_, gbk, err2 := tr.Translate([]byte("utf-8汉字"), true)
	if err2 != nil {
		fmt.Println(err2)
		os.Exit(1)
	}
	fmt.Println(gbk)
}