// Convert unicode text to ASCII text // using specific codepage mapping. func convertUnicodeToAscii(text string, codepage encoding.Encoding) []byte { b := []byte(text) // fmt.Printf("Text length: %d\n", len(b)) var buf bytes.Buffer if codepage == nil { codepage = charmap.Windows1252 } w := transform.NewWriter(&buf, codepage.NewEncoder()) defer w.Close() w.Write(b) // fmt.Printf("Buffer length: %d\n", len(buf.Bytes())) return buf.Bytes() }
func load(direction string, enc encoding.Encoding) ([]byte, []byte, Transcoder, error) { basename, ext, count := "", "", 0 for _, tf := range testdataFiles { if tf.enc == enc { basename, ext = tf.basename, tf.ext count++ } } if count != 1 { if count == 0 { return nil, nil, nil, fmt.Errorf("no testdataFiles for %s", enc) } return nil, nil, nil, fmt.Errorf("too many testdataFiles for %s", enc) } dstFile := fmt.Sprintf("testdata/%s-%s.txt", basename, ext) srcFile := fmt.Sprintf("testdata/%s-utf-8.txt", basename) var coder Transcoder = encoding.ReplaceUnsupported(enc.NewEncoder()) if direction == "Decode" { dstFile, srcFile = srcFile, dstFile coder = enc.NewDecoder() } dst, err := ioutil.ReadFile(dstFile) if err != nil { return nil, nil, nil, err } src, err := ioutil.ReadFile(srcFile) if err != nil { return nil, nil, nil, err } return dst, src, coder, nil }
// NewReader returns a reader which decode from the given encoding, to utf8. // // If enc is nil, then only an utf8-enforcing replacement reader // (see http://godoc.org/code.google.com/p/go.text/encoding#pkg-variables) // is used. func NewReader(r io.Reader, enc encoding.Encoding) io.Reader { if enc == nil || enc == encoding.Replacement { return transform.NewReader(r, encoding.Replacement.NewEncoder()) } return transform.NewReader(r, transform.Chain(enc.NewDecoder(), encoding.Replacement.NewEncoder())) }
func encodeText(e encoding.Encoding, text string) []byte { res, _, err := transform.Bytes(e.NewEncoder(), []byte(text)) if err != nil { panic(err) } return res }
func ConvTo(b []byte, e encoding.Encoding) (result []byte, err error) { w := new(bytes.Buffer) writer := transform.NewWriter(w, e.NewEncoder()) defer writer.Close() if _, err = writer.Write(b); err != nil { return } return w.Bytes(), nil }
func verifyFromUTF(enc encoding.Encoding, b byte, r rune) { encoder := enc.NewEncoder() out := make([]byte, 6) utf := make([]byte, utf8.RuneLen(r)) utf8.EncodeRune(utf, r) ndst, nsrc, err := encoder.Transform(out, utf, true) So(err, ShouldBeNil) So(nsrc, ShouldEqual, len(utf)) So(ndst, ShouldEqual, 1) So(b, ShouldEqual, out[0]) }
func GetUTF8HtmlTitle(str string) string { var e encoding.Encoding var name string e, name, _ = charset.DetermineEncoding([]byte(str), "text/html") if name == "windows-1252" { e, name, _ = charset.DetermineEncoding([]byte(str), "text/html;charset=gbk") } r := transform.NewReader(strings.NewReader(str), e.NewDecoder()) if b, err := ioutil.ReadAll(r); err != nil { return "" } else { return getHtmlTitle(string(b)) } return "" }
func verifyToUTF(enc encoding.Encoding, b byte, r rune) { decoder := enc.NewDecoder() out := make([]byte, 6) nat := []byte{b} utf := make([]byte, utf8.RuneLen(r)) utf8.EncodeRune(utf, r) ndst, nsrc, err := decoder.Transform(out, nat, true) So(err, ShouldBeNil) So(nsrc, ShouldEqual, 1) if !bytes.Equal(utf, out[:ndst]) { Printf("UTF expected %v, but got %v for %x\n", utf, out, b) } So(bytes.Equal(utf, out[:ndst]), ShouldBeTrue) }
// There might be a shortcut when stream is not required, comparing to StreamFromResponse(). func TextFromResponse(response *http.Response) (text, textType string, err error) { charset, textType, err := CharsetFromContentType(response.Header.Get("Content-Type")) if err != nil { return } body := getBodyStream(response) var data []byte var stream io.Reader var encoding encoding.Encoding if (len(charset) == 0) && (textType == "html") { data, err = ioutil.ReadAll(body) if err != nil { return } charset = DetectCharset(data) if encoding, err = GetEncoding(charset); err != nil { return } // No encoding, it assumed as UTF-8. if encoding == nil { text = string(data) return } stream = transform.NewReader(bytes.NewReader(data), encoding.NewDecoder()) } else { if encoding, err = GetEncoding(charset); err != nil { return } if encoding != nil { stream = transform.NewReader(body, encoding.NewDecoder()) } else { stream = body } } if data, err = ioutil.ReadAll(stream); err == nil { text = string(data) } return }
// NewLine creates a new Line reader object func NewLine(input io.Reader, codec encoding.Encoding, bufferSize int) (*Line, error) { encoder := codec.NewEncoder() // Create newline char based on encoding nl, _, err := transform.Bytes(encoder, []byte{'\n'}) if err != nil { return nil, err } return &Line{ reader: input, codec: codec, bufferSize: bufferSize, nl: nl, decoder: codec.NewDecoder(), inBuffer: streambuf.New(nil), outBuffer: streambuf.New(nil), }, nil }
// read reader content to string, using charset specified func readToStringWithCharset(reader io.Reader, charset string) (string, error) { charset = strings.ToUpper(charset) var data []byte var err error if charset == "UTF-8" || charset == "UTF8" { data, err = ioutil.ReadAll(reader) } else { if charset == "GBK" || charset == "GB2312" { charset = "GB18030" } var encoder encoding.Encoding encoder, err = htmlindex.Get(charset) if err != nil { return "", err } data, err = ioutil.ReadAll(transform.NewReader(reader, encoder.NewDecoder())) } if err != nil { return "", err } return string(data), err }
func (client *Client) FileInfo(url string, encoding encoding.Encoding) (filename string, size int, status int, errMsg error) { status = http.StatusOK resp, err := client.Head(url) if err != nil { status = http.StatusBadGateway errMsg = fmt.Errorf("Failed to HEAD from %s: %s", url, err) return } // Filename. disposition := resp.Header.Get("Content-Disposition") disposition, err = encoding.NewDecoder().String(disposition) if err != nil { status = http.StatusInternalServerError errMsg = fmt.Errorf("Failed to decode Content-Disposition: %s", err) return } // Parse disposition header. _, params, err := mime.ParseMediaType(disposition) if err != nil { status = http.StatusInternalServerError errMsg = fmt.Errorf("Failed to parse header Content-Disposition of file at %s: %s", url, err) return } filename = params["filename"] // File size. sizeStr := resp.Header.Get("Content-Length") size, err = strconv.Atoi(sizeStr) if err != nil { status = http.StatusInternalServerError errMsg = fmt.Errorf("Failed to convert Content-Length (%s) to int: %s", sizeStr, err) return } return }
func ConvToUTF8(b []byte, e encoding.Encoding) (result []byte, err error) { reader := transform.NewReader(bytes.NewReader(b), unicode.BOMOverride(e.NewDecoder())) return ioutil.ReadAll(reader) }
func enc(encoding encoding.Encoding) Decoder { return trans(encoding.NewDecoder()) }
func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) { return "Decode", e.NewDecoder(), nil }
func enc(e encoding.Encoding) (dir string, t transform.Transformer, err error) { return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement }
// NewWriter returns a writer which encodes to the given encoding, utf8. // // If enc is nil, then only an utf8-enforcing replacement writer // (see http://godoc.org/code.google.com/p/go.text/encoding#pkg-variables) // is used. func NewWriter(w io.Writer, enc encoding.Encoding) io.WriteCloser { if enc == nil || enc == encoding.Replacement { return transform.NewWriter(w, encoding.Replacement.NewEncoder()) } return transform.NewWriter(w, transform.Chain(enc.NewEncoder())) }