Exemple #1
1
// Convert unicode text to ASCII text
// using specific codepage mapping.
func convertUnicodeToAscii(text string,
	codepage encoding.Encoding) []byte {
	b := []byte(text)
	// fmt.Printf("Text length: %d\n", len(b))
	var buf bytes.Buffer
	if codepage == nil {
		codepage = charmap.Windows1252
	}
	w := transform.NewWriter(&buf, codepage.NewEncoder())
	defer w.Close()
	w.Write(b)
	// fmt.Printf("Buffer length: %d\n", len(buf.Bytes()))
	return buf.Bytes()
}
Exemple #2
0
func load(direction string, enc encoding.Encoding) ([]byte, []byte, Transcoder, error) {
	basename, ext, count := "", "", 0
	for _, tf := range testdataFiles {
		if tf.enc == enc {
			basename, ext = tf.basename, tf.ext
			count++
		}
	}
	if count != 1 {
		if count == 0 {
			return nil, nil, nil, fmt.Errorf("no testdataFiles for %s", enc)
		}
		return nil, nil, nil, fmt.Errorf("too many testdataFiles for %s", enc)
	}
	dstFile := fmt.Sprintf("testdata/%s-%s.txt", basename, ext)
	srcFile := fmt.Sprintf("testdata/%s-utf-8.txt", basename)
	var coder Transcoder = encoding.ReplaceUnsupported(enc.NewEncoder())
	if direction == "Decode" {
		dstFile, srcFile = srcFile, dstFile
		coder = enc.NewDecoder()
	}
	dst, err := ioutil.ReadFile(dstFile)
	if err != nil {
		return nil, nil, nil, err
	}
	src, err := ioutil.ReadFile(srcFile)
	if err != nil {
		return nil, nil, nil, err
	}
	return dst, src, coder, nil
}
Exemple #3
0
// NewReader returns a reader which decode from the given encoding, to utf8.
//
// If enc is nil, then only an utf8-enforcing replacement reader
// (see http://godoc.org/code.google.com/p/go.text/encoding#pkg-variables)
// is used.
func NewReader(r io.Reader, enc encoding.Encoding) io.Reader {
	if enc == nil || enc == encoding.Replacement {
		return transform.NewReader(r, encoding.Replacement.NewEncoder())
	}
	return transform.NewReader(r,
		transform.Chain(enc.NewDecoder(), encoding.Replacement.NewEncoder()))
}
Exemple #4
0
func encodeText(e encoding.Encoding, text string) []byte {
	res, _, err := transform.Bytes(e.NewEncoder(), []byte(text))
	if err != nil {
		panic(err)
	}
	return res
}
Exemple #5
0
func ConvTo(b []byte, e encoding.Encoding) (result []byte, err error) {
	w := new(bytes.Buffer)
	writer := transform.NewWriter(w, e.NewEncoder())
	defer writer.Close()

	if _, err = writer.Write(b); err != nil {
		return
	}
	return w.Bytes(), nil
}
Exemple #6
0
func verifyFromUTF(enc encoding.Encoding, b byte, r rune) {

	encoder := enc.NewEncoder()

	out := make([]byte, 6)
	utf := make([]byte, utf8.RuneLen(r))
	utf8.EncodeRune(utf, r)

	ndst, nsrc, err := encoder.Transform(out, utf, true)
	So(err, ShouldBeNil)
	So(nsrc, ShouldEqual, len(utf))
	So(ndst, ShouldEqual, 1)
	So(b, ShouldEqual, out[0])
}
Exemple #7
0
func GetUTF8HtmlTitle(str string) string {
	var e encoding.Encoding
	var name string

	e, name, _ = charset.DetermineEncoding([]byte(str), "text/html")
	if name == "windows-1252" {
		e, name, _ = charset.DetermineEncoding([]byte(str), "text/html;charset=gbk")
	}
	r := transform.NewReader(strings.NewReader(str), e.NewDecoder())
	if b, err := ioutil.ReadAll(r); err != nil {
		return ""
	} else {
		return getHtmlTitle(string(b))
	}
	return ""
}
Exemple #8
0
func verifyToUTF(enc encoding.Encoding, b byte, r rune) {
	decoder := enc.NewDecoder()

	out := make([]byte, 6)
	nat := []byte{b}
	utf := make([]byte, utf8.RuneLen(r))
	utf8.EncodeRune(utf, r)

	ndst, nsrc, err := decoder.Transform(out, nat, true)
	So(err, ShouldBeNil)
	So(nsrc, ShouldEqual, 1)
	if !bytes.Equal(utf, out[:ndst]) {
		Printf("UTF expected %v, but got %v for %x\n", utf, out, b)
	}
	So(bytes.Equal(utf, out[:ndst]), ShouldBeTrue)
}
Exemple #9
0
// There might be a shortcut when stream is not required, comparing to StreamFromResponse().
func TextFromResponse(response *http.Response) (text, textType string, err error) {
	charset, textType, err := CharsetFromContentType(response.Header.Get("Content-Type"))
	if err != nil {
		return
	}

	body := getBodyStream(response)

	var data []byte
	var stream io.Reader
	var encoding encoding.Encoding
	if (len(charset) == 0) && (textType == "html") {
		data, err = ioutil.ReadAll(body)
		if err != nil {
			return
		}
		charset = DetectCharset(data)
		if encoding, err = GetEncoding(charset); err != nil {
			return
		}
		// No encoding, it assumed as UTF-8.
		if encoding == nil {
			text = string(data)
			return
		}

		stream = transform.NewReader(bytes.NewReader(data), encoding.NewDecoder())
	} else {
		if encoding, err = GetEncoding(charset); err != nil {
			return
		}
		if encoding != nil {
			stream = transform.NewReader(body, encoding.NewDecoder())
		} else {
			stream = body
		}
	}

	if data, err = ioutil.ReadAll(stream); err == nil {
		text = string(data)
	}
	return
}
Exemple #10
0
// NewLine creates a new Line reader object
func NewLine(input io.Reader, codec encoding.Encoding, bufferSize int) (*Line, error) {

	encoder := codec.NewEncoder()

	// Create newline char based on encoding
	nl, _, err := transform.Bytes(encoder, []byte{'\n'})
	if err != nil {
		return nil, err
	}

	return &Line{
		reader:     input,
		codec:      codec,
		bufferSize: bufferSize,
		nl:         nl,
		decoder:    codec.NewDecoder(),
		inBuffer:   streambuf.New(nil),
		outBuffer:  streambuf.New(nil),
	}, nil
}
Exemple #11
0
// read reader content to string, using charset specified
func readToStringWithCharset(reader io.Reader, charset string) (string, error) {
	charset = strings.ToUpper(charset)
	var data []byte
	var err error
	if charset == "UTF-8" || charset == "UTF8" {
		data, err = ioutil.ReadAll(reader)
	} else {
		if charset == "GBK" || charset == "GB2312" {
			charset = "GB18030"
		}
		var encoder encoding.Encoding
		encoder, err = htmlindex.Get(charset)
		if err != nil {
			return "", err
		}
		data, err = ioutil.ReadAll(transform.NewReader(reader, encoder.NewDecoder()))
	}
	if err != nil {
		return "", err
	}
	return string(data), err
}
Exemple #12
0
func (client *Client) FileInfo(url string, encoding encoding.Encoding) (filename string, size int, status int, errMsg error) {
	status = http.StatusOK

	resp, err := client.Head(url)
	if err != nil {
		status = http.StatusBadGateway
		errMsg = fmt.Errorf("Failed to HEAD from %s: %s", url, err)
		return
	}

	// Filename.
	disposition := resp.Header.Get("Content-Disposition")
	disposition, err = encoding.NewDecoder().String(disposition)
	if err != nil {
		status = http.StatusInternalServerError
		errMsg = fmt.Errorf("Failed to decode Content-Disposition: %s", err)
		return
	}

	// Parse disposition header.
	_, params, err := mime.ParseMediaType(disposition)
	if err != nil {
		status = http.StatusInternalServerError
		errMsg = fmt.Errorf("Failed to parse header Content-Disposition of file at %s: %s", url, err)
		return
	}
	filename = params["filename"]

	// File size.
	sizeStr := resp.Header.Get("Content-Length")
	size, err = strconv.Atoi(sizeStr)
	if err != nil {
		status = http.StatusInternalServerError
		errMsg = fmt.Errorf("Failed to convert Content-Length (%s) to int: %s", sizeStr, err)
		return
	}

	return
}
Exemple #13
0
func ConvToUTF8(b []byte, e encoding.Encoding) (result []byte, err error) {
	reader := transform.NewReader(bytes.NewReader(b), unicode.BOMOverride(e.NewDecoder()))
	return ioutil.ReadAll(reader)
}
Exemple #14
0
func enc(encoding encoding.Encoding) Decoder {
	return trans(encoding.NewDecoder())
}
Exemple #15
0
func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
	return "Decode", e.NewDecoder(), nil
}
Exemple #16
0
func enc(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
	return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
}
Exemple #17
0
// NewWriter returns a writer which encodes to the given encoding, utf8.
//
// If enc is nil, then only an utf8-enforcing replacement writer
// (see http://godoc.org/code.google.com/p/go.text/encoding#pkg-variables)
// is used.
func NewWriter(w io.Writer, enc encoding.Encoding) io.WriteCloser {
	if enc == nil || enc == encoding.Replacement {
		return transform.NewWriter(w, encoding.Replacement.NewEncoder())
	}
	return transform.NewWriter(w, transform.Chain(enc.NewEncoder()))
}