Exemplo n.º 1
0
// open
func openUrl(url string) (data string, err error) {
	var resp *http.Response
	var raw []byte
	var dec mahonia.Decoder = nil
	debug.Println("Get:", url)
	resp, err = http.Get(url)
	if err != nil {
		return
	}
	if resp.StatusCode != http.StatusOK {
		err = errors.New("Bad Status:" + resp.Status)
		return
	}
	// only handle html files
	ctype := resp.Header.Get("Content-Type")
	if -1 == strings.Index(ctype, "text/html") {
		err = errors.New("Not a html file")
		return
	}
	// try enconding: gbk\big5\utf8
	charset := ""
	if seps := strings.Split(ctype, "="); len(seps) >= 2 {
		charset = seps[1]
		charset = strings.ToLower(charset)
		if strings.HasPrefix(charset, "gb") {
			charset = "gb18030"
			dec = gbk
		} else if strings.HasPrefix(charset, "big") {
			charset = "big5"
			dec = big5
		} else if strings.HasPrefix(charset, "utf") || charset == "" {
			charset = "utf8"
			dec = nil
		} else {
			err = errors.New("Unsupported charset:" + charset)
			return
		}
	} else {
		dec = nil
	}
	debug.Println("Using charset:", charset)
	// TODO gzip handle
	contentEncoding := resp.Header.Get("Content-Encoding")
	if contentEncoding == "gzip" {
		err = errors.New("Content-Encoding:" + contentEncoding + "temporally not supported")
		return
	}
	// read the response
	if dec != nil {
		raw, err = ioutil.ReadAll(dec.NewReader(resp.Body))
	} else {
		raw, err = ioutil.ReadAll(resp.Body)
	}
	if err != nil {
		return
	}
	defer resp.Body.Close()
	data = string(raw)
	debug.Println("Data:", data)
	return
}