func DetermineEncoding(givenType *ParsedMimeType, head []byte) encoding.Encoding { // Try to determine encoding ala whatwg "encoding sniffing algorithm" enc, _, certain := charset.DetermineEncoding(head, givenType.String()) if certain { return enc } // Before doing a byte-level encoding detection, check for xml // declaration s := strings.TrimSpace(string(head)) if strings.HasPrefix(s, "<?xml ") { s = strings.TrimPrefix(s, "<?xml ") end := strings.Index(s, "?>") if end > 0 { s = s[:end] for _, match := range attrRe.FindAllStringSubmatch(s, -1) { if match[1] == "encoding" { if detectedEnc, _ := charset.Lookup(match[2]); detectedEnc != nil { return detectedEnc } } } } } var detector *chardet.Detector if givenType.IsHtml() || givenType.IsXml() { detector = chardet.NewHtmlDetector() } else { detector = chardet.NewTextDetector() } detected, err := detector.DetectBest(head) if err != nil { fmt.Fprintln(os.Stderr, "Error detecting character set, using default.") return enc } if detected.Charset == "GB-18030" { detected.Charset = "GB18030" } if detectedEnc, _ := charset.Lookup(detected.Charset); detectedEnc != nil { return detectedEnc } return enc }
func encodingReader(body []byte, contentType string) (io.Reader, error) { preview := make([]byte, 1024) var r io.Reader = bytes.NewReader(body) n, err := io.ReadFull(r, preview) switch { case err == io.ErrUnexpectedEOF: preview = preview[:n] r = bytes.NewReader(preview) case err != nil: return nil, err default: r = io.MultiReader(bytes.NewReader(preview), r) } e, _, certain := charset.DetermineEncoding(preview, contentType) if !certain && e == charmap.Windows1252 && utf8.Valid(body) { e = encoding.Nop } if e != encoding.Nop { r = transform.NewReader(r, e.NewDecoder()) } return r, nil }