// FetchStockData 抓取数据 func FetchStockData(code string) (*StockIndustry, *StockICB, error) { // http://stockData.stock.hexun.com/600028.shtml formt := "http://stockData.stock.hexun.com/%s.shtml" resp, err := wget.Get(fmt.Sprintf(formt, code)) if err != nil { return nil, nil, gos.DoError(err) } doc, err := goquery.NewDocumentFromResponse(resp) if err != nil { return nil, nil, gos.DoError(err) } tr := doc.Find("#list3 table.box6 tr") stockIndustry := &StockIndustry{} stockICB := &StockICB{} stockIndustry.Name, err = iconv.ConvertString(tr.Eq(7).Find("td").Eq(1).Text(), "gb2312", "utf-8") if err != nil { return nil, nil, gos.DoError(err) } stockICB.Name, err = iconv.ConvertString(tr.Eq(8).Find("td").Eq(1).Text(), "gb2312", "utf-8") if err != nil { return nil, nil, gos.DoError(err) } return stockIndustry, stockICB, nil }
func (this *TextMessageController) Post() { r := this.Ctx.Request //this.ParseForm() r.ParseForm() //beego.Trace("ToUserName", this.GetString("ToUserName")) //beego.Trace("ToUserName", r.PostFormValue("ToUserName")) //beego.Trace("MsgType", this.GetString("MsgType")) //beego.Trace("MsgType", r.PostFormValue("MsgType")) for k, v := range r.Form { beego.Trace("key:", k) beego.Trace("val:", strings.Join(v, "")) } //for k, v := range r.PostForm { // beego.Trace("key:", k) // beego.Trace("val:", strings.Join(v, "")) //} //for i, s := range string(this.Ctx.Input.RequestBody) { // beego.Trace(i, ":", s, ":", string(s)) //} //beego.Trace("body:", string(this.Ctx.Input.RequestBody)) output, _ := iconv.ConvertString(string(this.Ctx.Input.RequestBody), "utf8", "gbk") //beego.Trace(charmap.Windows1252.NewEncoder()) //encoding.UTF8Validator.Transform() beego.Trace(output) this.Data["Out"] = "sdfasdfasdfasdfasdfa" this.TplNames = "out.tpl" }
func GetProxyRegion(url string) string { //得到IP的地区,从另一个网站爬的 c := &http.Client{ Transport: &http.Transport{ Dial: func(netw, addr string) (net.Conn, error) { deadline := time.Now().Add(time.Second * 10) c, err := net.DialTimeout(netw, addr, time.Second*10) if err != nil { return nil, err } c.SetDeadline(deadline) return c, nil }, ResponseHeaderTimeout: time.Second * 10, }, } resp, getError := c.Get(url) body, goqueryError := goquery.NewDocumentFromResponse(resp) if getError != nil || goqueryError != nil { return "" } preRegion := body.Find(".ul1").Find("li").Eq(0).Text() //爬取的网站正好是gb2312格式的 address, err11 := iconv.ConvertString(preRegion, "gb2312", "utf-8") if err11 != nil { return "" } region := strings.Split(address, ":") if strings.Contains(region[1], "省") || strings.Contains(region[1], "市") { region[1] = "中国" + region[1] } return region[1] }
// Eval implements the Expression Eval interface. func (f *FunctionConvert) Eval(ctx context.Context, args map[interface{}]interface{}) (interface{}, error) { value, err := f.Expr.Eval(ctx, args) if err != nil { return nil, err } // Casting nil to any type returns nil if value == nil { return nil, nil } str, ok := value.(string) if !ok { return nil, nil } if strings.ToLower(f.Charset) == "ascii" { return value, nil } else if strings.ToLower(f.Charset) == "utf8mb4" { return value, nil } target, err := iconv.ConvertString(str, "utf-8", f.Charset) if err != nil { log.Errorf("Convert %s to %s with error: %v", str, f.Charset, err) return nil, errors.Trace(err) } return target, nil }
func ConvertToUtf8(src, encoding string) string { if len(src) < 1 { return src } if encoding == "utf-8" { return src } lines := strings.Split(src, "\n") var output string for _, line := range lines { o, err := iconv.ConvertString(line, encoding, "utf-8") // if error if err != nil { log.Println(err) output += line continue } output += o } return output }
//转换字符编码格式,例如文本是gb2312的,现在转换为utf-8:ConvretCharacterEncoding(str, "gb2312", "utf-8") func ConvertCharacterEncoding(msg, oldEncoding, newEncoding string) string { newMsg, err := iconv.ConvertString(msg, oldEncoding, newEncoding) if err != nil { log.Println("convert string Encoding error: ", err) return "" } return newMsg }
// parse a Part func (p *aParse) parsePart(part Part, contentType string) (string, bool) { if part.ContentType().ToString() == contentType && part.Filename() == "" { payload := string(p.rawPart(part)) // convert charset targetCharset := "utf-8" sourceCharset := strings.ToLower(part.ContentType().Parameter("charset")) if sourceCharset != targetCharset { payload, _ = iconv.ConvertString(payload, sourceCharset, targetCharset) } return payload, true } return "", false }
func (h *HDUJudger) GetProblems() error { vidsModel := &model.VIdsModel{} StartId, err := vidsModel.GetLastID("HDU") if err == model.DBErr { return err } else if StartId < 1000 { StartId = 999 } errCnt := 0 lastId := StartId for i := 1; ; i++ { pid := strconv.Itoa(StartId + i) page, err := h.GetProblemPage(pid) if err != nil { //offline hdulogger.Println("pid["+pid+"]: ", err, ".") return err } cpage, err := iconv.ConvertString(page, "gb2312", "utf-8") if err != nil { //Although getting error, continue proccess it. hdulogger.Println("pid["+pid+"]: ", "encode convert error.") cpage = page } if h.IsExist(cpage) { err := h.SetDetail(pid, cpage) if err != nil { hdulogger.Println("pid["+pid+"]: ", "import error.") } else { lastId = StartId + i vidsModel.SetLastID("HDU", lastId) } errCnt = 0 } else { hdulogger.Println("pid["+pid+"]: ", "not exist.") errCnt++ } if errCnt >= 100 { //If "not exist" continuously repeat 100 times, terminate it. break } } hdulogger.Println("import terminated. Last pid is ", lastId, ".") return nil }
// ReadContentUFT8 reads the contents from the file and pass these contents to UTF-8 encoding. func ReadContentUTF8(path string, encoding string) (contents string, err error) { // Read the contents of the file, whatever encoding it has dat, err := ioutil.ReadFile(path) if err != nil { log.Fatalf("[PropRef:toolset] error reading file contents: %v", err) return "", errors.New("[PropRef:tools] Error reading file " + err.Error()) } // We need to convert that to UTF-8 if the encoding is different to UTF-8 data, err := iconv.ConvertString(string(dat), encoding, "utf-8") if err != nil { log.Fatalf("[PropRef:tools] error converting file %v with encoding %v to UTF-8: %v.", path, encoding, err) return "", errors.New("[PropRef:tools] Error converting file encoding " + err.Error()) } return data, nil }
func scanDiapazon(ch chan storage.IpItem, quit chan uint32, ipstart, ipend uint32) { var ip uint32 var ipItem storage.IpItem proxy, _ := proxy.SOCKS5("tcp", "127.0.0.1:9050", nil, proxy.Direct) for ip = ipstart; ip < ipend; ip++ { ipItem.IP = ip conn, err := proxy.Dial("tcp", ipItem.String()+":23") if err != nil { } else { hello, err := bufio.NewReader(conn).ReadString(':') if err == nil { output, _ := iconv.ConvertString(hello, "latin1", "utf-8") ipItem.Hello = output conn.Close() ch <- ipItem } } } quit <- ipstart / BLOCK_SIZE }
// 打印响应 func (to *TcpObj) printResponse(r *http.Response, err error) { if nil != err { fmt.Println("Error Happend!\n", err) return } // 打印头部 if to.OutputResponse { fmt.Println(r.Proto, r.Status) fmt.Println(sep("-", 80), ":resp.HEADER") for key, _ := range r.Header { fmt.Printf("%20s : %s\n", key, r.Header.Get(key)) } // 打印分隔行 fmt.Println(sep("-", 80), ":resp.BODY") } // 分析编码和内容 rContentType := strings.TrimSpace(r.Header.Get("Content-Type")) reg := regexp.MustCompile(`^([a-z/]+)(;[ ]*)(charset=)(.*)$`) grps := reg.FindAllStringSubmatch(rContentType, -1) var charset string //var contentType string if nil != grps { //contentType = grps[0][1] charset = grps[0][4] } else { charset = "utf8" //contentType = "text/html" } // 打印 body bs, _ := ioutil.ReadAll(r.Body) r.Body.Close() str, _ := iconv.ConvertString(string(bs), charset, "utf8") fmt.Println(str) }
func searchbyYear(w http.ResponseWriter, r *http.Request) { params := mux.Vars(r) Year := params["year"] ID := params["id"] FN := params["fn"] LN := params["ln"] //fmt.Printf("%s %s %s %s",Year,ID,FN,LN) Surl := "http://refundedcheque.rd.go.th/itp_x_tw/SearchTaxpayerServlet" query := fmt.Sprintf("nid=%s&fName=%s&lName=%s&taxYear=%s&searchType=null&effDate=null", ID, FN, LN, Year) log.Println(query) req, err := http.NewRequest("POST", Surl, strings.NewReader(query)) req.Header.Set("Referer", "http://refundedcheque.rd.go.th/itp_x_tw/pages/ITPtaxresult.jsp") req.Header.Set("Content-Type", "application/x-www-form-urlencoded") req.Header.Set("X-Requested-With", "XMLHttpRequest") client := &http.Client{} resp, err := client.Do(req) if err != nil { panic(err) } defer resp.Body.Close() fmt.Println("response Status:", resp.Status) fmt.Println("response Headers:", resp.Header) body, _ := ioutil.ReadAll(resp.Body) output, _ := iconv.ConvertString(string(body), "tis-620", "utf-8") fmt.Println("response Body:", string(output)) if len(output) == 0 { log.Println("Data not found") w.Header().Set("Content-Type", "application/json") w.Write([]byte("[]")) } w.Write([]byte(output)) }
func fetchAndFillFundRank(page int, quarter string) (int, error) { // http://stockdata.stock.hexun.com/jgcc/data/outdata/orgrank.ashx?count=50&date=2015-09-30&orgType=&stateType=null&titType=null&page=2&callback=hxbase_json7 formt := "http://stockdata.stock.hexun.com/jgcc/data/outdata/orgrank.ashx?count=%s&date=%s&orgType=&stateType=null&titType=null&page=%d&callback=hxbase_json7" pageLimit := 100 body, err := wget.GetBody(fmt.Sprintf(formt, pageLimit, quarter, page)) if err != nil { return -1, err } exists := db.NewExistsBuilder("funds") ist := db.NewInsertBuilder("funds") query := db.NewQueryBuilder("funds") var fund *Fund var fundRank *FundRank var row db.DataRow // hxbase_json7( str, err := iconv.ConvertString(string(body), "gb2312", "utf-8") if err != nil { return -1, err } src := []byte(str) src = src[13 : len(src)-1] src = bytes.Replace(src, []byte(":'"), []byte(`":"`), -1) src = bytes.Replace(src, []byte("',"), []byte(`","`), -1) src = bytes.Replace(src, []byte("'}"), []byte(`"}`), -1) src = bytes.Replace(src, []byte("{"), []byte(`{"`), -1) src = bytes.Replace(src, []byte("sum:"), []byte(`sum":`), 1) src = bytes.Replace(src, []byte("list:"), []byte(`"list":`), 1) v := &JsonFund{} err = json.Unmarshal(src, v) if err != nil { fmt.Println(string(src)) return -1, err } // {RankTd:'51',OrgName:'法国巴黎银行',OrgNameLink:'o-QF000031.shtml',OrgType:'QFII',ShareHoldingNum:'3',ShareHoldingNumLink:'otherDetail.aspx?OrgNo=QF000031',TotalHoldings:'48,388.00',TotalMarketValue:'1,100,735.00',OrgAlt:'法国巴黎银行'} for _, item := range v.List { fund = &Fund{Code: item["OrgNameLink"], Name: string(item["OrgName"]), TypeID: FundType(item["OrgType"])} fund.Code = fund.Code[2 : len(fund.Code)-6] fmt.Println("----") if !exists.Table("funds").Where("code=? and type_id=?", fund.Code, fund.TypeID).Exists() { fmt.Println("insert", fund) ist.Table("funds").Insert(fund) } row, _ = query.Table("funds").Where("code=? and type_id=?", fund.Code, fund.TypeID).QueryOne() if row.Empty() { return -1, fmt.Errorf("code %s not found", fund.Code) } fundRank = &FundRank{ FundID: row.GetInt64("id"), Date: quarter, Rank: int(util.ParseMoney(item["RankTd"])), Count: int(util.ParseMoney(item["ShareHoldingNum"])), MH: int64(util.ParseMoney(item["TotalHoldings"])), MV: int64(util.ParseMoney(item["TotalMarketValue"])), } if !exists.Table("fund_rank").Where("fund_id=? and date=?", fundRank.FundID, fundRank.Date).Exists() { fmt.Println("insert", fundRank) ist.Table("fund_rank").Insert(fundRank) } } return int(math.Ceil(float64(v.Sum / pageLimit))), nil }
func (t *Connection) Submit(src, dst, msg string, opt *SubmitOptions) ( err error) { glog.Infof("Submit %s -> %s: %s", src, dst, msg) body := new(bytes.Buffer) // MsgType err = body.WriteByte(opt.MsgType) if err != nil { return } // NeedReport = 0 for now err = body.WriteByte(0) if err != nil { return } // Priority if !(opt.Priority >= 0 && opt.Priority <= 3) { glog.Warningf("Incorrect priority %d, set to 1, normal.", opt.Priority) opt.Priority = 1 } err = body.WriteByte(opt.Priority) if err != nil { return } // ServiceID 10 octetString b, err := octetString(opt.ServiceID, 10) if err != nil { return } _, err = body.Write(b) if err != nil { return } // FeeType 2 octetString if len(opt.FeeType) != 2 { err = errors.New("len(FeeType) should be 2") return } _, err = body.WriteString(opt.FeeType) if err != nil { return } // FeeCode 6 octetString b, err = octetString(opt.FeeCode, 6) if err != nil { return } _, err = body.Write(b) if err != nil { return } // FixedFee 6 octetString b, err = octetString(opt.FixedFee, 6) if err != nil { return } _, err = body.Write(b) if err != nil { return } // MsgFormat byte, text message required 15 for GB18030 err = body.WriteByte(15) if err != nil { return } // ValidTime 17 octetString “YYMMDDhhmmsstnnp” _, err = body.Write([]byte{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}) if err != nil { return } // AtTime 17 octetString _, err = body.Write([]byte{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}) if err != nil { return } // SrcTermID 21 octetString b, err = octetString(src, 21) if err != nil { return } _, err = body.Write(b) if err != nil { return } // ChargeTermID 21 octetString _, err = body.Write([]byte{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}) if err != nil { return } // DestTermIDCount byte err = body.WriteByte(0x01) if err != nil { return } // DestTermID 21 octetString * count b, err = octetString(dst, 21) if err != nil { return } _, err = body.Write(b) if err != nil { return } // convert msg to GB18030 msg, err = iconv.ConvertString(msg, "UTF-8", "GB18030") if err != nil { return } if len(msg) > 0xff { err = fmt.Errorf("Message too long: %d bytes", len(msg)) } // MsgLength byte err = body.WriteByte(byte(len(msg))) if err != nil { return } // MsgContent octetString _, err = body.WriteString(msg) if err != nil { return } // Reserve 8 octetString _, err = body.Write([]byte{0, 0, 0, 0, 0, 0, 0, 0}) if err != nil { return } seq, err := t.writeRequest(REQID_SUBMIT, body.Bytes()) if err != nil { return } t.pool[seq] = nil glog.Infof("Submit seq %d %s -> %s: %s", seq, src, dst, msg) return }
func main() { // read bytes from sample.utf8 utf8Bytes, err := ioutil.ReadFile("sample.utf8") if err != nil { fmt.Println("Could not open 'sample.utf8': ", err) } // read bytes from sample.ebcdic-us ebcdicBytes, err := ioutil.ReadFile("sample.ebcdic-us") if err != nil { fmt.Println("Could not open 'sample.ebcdic-us': ", err) } // use iconv to check conversions both ways utf8String := string(utf8Bytes) ebcdicString := string(ebcdicBytes) // convert from utf-8 to ebcdic utf8ConvertedString, err := iconv.ConvertString(utf8String, "utf-8", "ebcdic-us") if err != nil || ebcdicString != utf8ConvertedString { // generate hex string ebcdicHexString := hex.EncodeToString(ebcdicBytes) utf8ConvertedHexString := hex.EncodeToString([]byte(utf8ConvertedString)) fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.ConvertString, error: ", err) fmt.Println(ebcdicHexString, " - ", len(ebcdicString)) fmt.Println(utf8ConvertedHexString, " - ", len(utf8ConvertedString)) } else { fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.ConvertString") } // convert from ebcdic to utf-8 ebcdicConvertedString, err := iconv.ConvertString(ebcdicString, "ebcdic-us", "utf-8") if err != nil || utf8String != ebcdicConvertedString { // generate hex string utf8HexString := hex.EncodeToString(utf8Bytes) ebcdicConvertedHexString := hex.EncodeToString([]byte(ebcdicConvertedString)) fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.ConvertString, error: ", err) fmt.Println(utf8HexString, " - ", len(utf8String)) fmt.Println(ebcdicConvertedHexString, " - ", len(ebcdicConvertedString)) } else { fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.ConvertString") } testBuffer := make([]byte, len(ebcdicBytes)*2) // convert from ebdic bytes to utf-8 bytes bytesRead, bytesWritten, err := iconv.Convert(ebcdicBytes, testBuffer, "ebcdic-us", "utf-8") if err != nil || bytesRead != len(ebcdicBytes) || bytesWritten != len(utf8Bytes) { fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Convert, error: ", err) } else { fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Convert") } // convert from utf-8 bytes to ebcdic bytes bytesRead, bytesWritten, err = iconv.Convert(utf8Bytes, testBuffer, "utf-8", "ebcdic-us") if err != nil || bytesRead != len(utf8Bytes) || bytesWritten != len(ebcdicBytes) { fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.Convert, error: ", err) } else { fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.Convert") } // test iconv.Reader utf8File, _ := os.Open("sample.utf8") utf8Reader, _ := iconv.NewReader(utf8File, "utf-8", "ebcdic-us") bytesRead, err = utf8Reader.Read(testBuffer) if err != nil || bytesRead != len(ebcdicBytes) { fmt.Println("utf8 was not properly converted to ebcdic-us by iconv.Reader", err) } else { fmt.Println("utf8 was property converted to ebcdic-us by iconv.Reader") } ebcdicFile, _ := os.Open("sample.ebcdic-us") ebcdicReader, _ := iconv.NewReader(ebcdicFile, "ebcdic-us", "utf-8") bytesRead, err = ebcdicReader.Read(testBuffer) if err != nil || bytesRead != len(utf8Bytes) { fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Reader: ", err) if bytesRead > 0 { fmt.Println(string(testBuffer[:bytesRead])) fmt.Println(hex.EncodeToString(testBuffer[:bytesRead])) fmt.Println(hex.EncodeToString(utf8Bytes)) } } else { fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Reader") } }
func ConvertUtf(in string, code string) (output string, err error) { output, err = iconv.ConvertString(in, "utf-8", code) return }
func ToCp874(str string) string { str, _ = iconv.ConvertString(str, "utf-8", "cp874") return str }
func main() { iconv.ConvertString("我是UTF-8", "utf-8", "gbk") }
// FetchDzjy 抓取数据 func FetchDzjy(date time.Time) ([]*dzjyVO, error) { formt := "http://data.eastmoney.com/dzjy/%s.html" resp, err := wget.Get(fmt.Sprintf(formt, date.Format("200601"))) if err != nil { return nil, gos.DoError(err) } doc, err := goquery.NewDocumentFromResponse(resp) if err != nil { return nil, gos.DoError(err) } var td *goquery.Selection var dzjy *dzjyVO var dateStr string var stockCode string var row db.DataRow query := db.NewQueryBuilder("stock") datalist := make([]*dzjyVO, 0) buy := "" sell := "" var priceNow float64 var price float64 var amount float64 var total float64 var length int doc.Find("#content div.list").Eq(2).Find("table tr.list_eve").Each(func(i int, tr *goquery.Selection) { td = tr.Find("td") length = td.Length() if length == 10 { dateStr = td.Eq(0).Text() stockCode = td.Eq(1).Text() } else if length == 9 { stockCode = td.Eq(0).Text() } row, _ = query.Where("code=?", stockCode).QueryOne() if row.Empty() { return } switch length { case 10: priceNow = util.ParseMoney(td.Eq(4).Text()) price = util.ParseMoney(td.Eq(5).Text()) amount = util.ParseMoney(td.Eq(6).Text()) total = util.ParseMoney(td.Eq(7).Text()) buy, err = iconv.ConvertString(td.Eq(8).Text(), "gb2312", "utf-8") if err != nil { return } sell, err = iconv.ConvertString(td.Eq(9).Text(), "gb2312", "utf-8") if err != nil { return } case 9: priceNow = util.ParseMoney(td.Eq(3).Text()) price = util.ParseMoney(td.Eq(4).Text()) amount = util.ParseMoney(td.Eq(5).Text()) total = util.ParseMoney(td.Eq(6).Text()) buy, err = iconv.ConvertString(td.Eq(7).Text(), "gb2312", "utf-8") if err != nil { return } sell, err = iconv.ConvertString(td.Eq(8).Text(), "gb2312", "utf-8") if err != nil { return } case 5: price = util.ParseMoney(td.Eq(0).Text()) amount = util.ParseMoney(td.Eq(1).Text()) total = util.ParseMoney(td.Eq(2).Text()) buy, err = iconv.ConvertString(td.Eq(3).Text(), "gb2312", "utf-8") if err != nil { return } sell, err = iconv.ConvertString(td.Eq(4).Text(), "gb2312", "utf-8") if err != nil { return } default: return } dzjy = &dzjyVO{ StockID: row.GetInt64("id"), Date: dateStr, PriceNow: priceNow, Price: price, Amount: amount, Total: total, Buy: buy, Sell: sell, } datalist = append(datalist, dzjy) }) return datalist, nil }
func (qw *QQWry) QueryIP(ip string) (areaInfo, extraInfo string) { var first, last, current int64 addr := ip2Int64(ip) if addr == -1 { return "Unknown", "Unknown" } first = qw.first last = qw.last current = ((last-first)/7/2)*7 + first for current > first { if read4byte(qw, current) > addr { last = current current = ((last-first)/7/2)*7 + first } else { first = current current = ((last-first)/7/2)*7 + first } } offset := read3byte(qw, current+4) end := read4byte(qw, offset) if addr > end { return "Unknown", "Unknown" } offset += 4 mode := read1byte(qw, offset) country := make([]byte, 256) var area []byte n := 0 if mode == _REDIRECT_MODE_1 { offset = read3byte(qw, offset+1) if read1byte(qw, offset) == _REDIRECT_MODE_2 { off := read3byte(qw, offset+1) qw.ReadAt(country, off) n = bytes.IndexByte(country, 0x00) country = country[:n] offset += 4 } else { qw.ReadAt(country, offset) n = bytes.IndexByte(country, 0x00) country = country[:n] offset += int64(n + 1) } } else if mode == _REDIRECT_MODE_2 { off := read3byte(qw, offset+1) qw.ReadAt(country, off) n = bytes.IndexByte(country, 0x00) country = country[:n] offset += 4 } else { qw.ReadAt(country, offset) n = bytes.IndexByte(country, 0x00) country = country[:n] offset += int64(n + 1) } mode = read1byte(qw, offset) if mode == _REDIRECT_MODE_1 || mode == _REDIRECT_MODE_2 { offset = read3byte(qw, offset+1) } if offset != 0 { area = make([]byte, 256) qw.ReadAt(area, offset) n = bytes.IndexByte(area, 0x00) area = area[:n] } cz88 := []byte("CZ88.NET") if len(country) > 1 && bytes.Compare(country[1:], cz88) != 0 { areaInfo, _ = iconv.ConvertString(string(country), "GB18030", "utf-8") } if len(area) > 1 && bytes.Compare(area[1:], cz88) != 0 { extraInfo, _ = iconv.ConvertString(string(area), "GB18030", "utf-8") } return areaInfo, extraInfo }
func run() { var ( doc *goquery.Document doc_err, err error fd *os.File file_encoding string = "-" ) if *url == "-" { doc, doc_err = goquery.NewDocumentFromReader(os.Stdin) } else if fd, err = os.Open(*url); err == nil { doc, doc_err = goquery.NewDocumentFromReader(fd) defer fd.Close() } else { tmp_url := *url if !strings.HasPrefix(tmp_url, "http") { tmp_url = "http://" + tmp_url } doc, doc_err = goquery.NewDocument(tmp_url) } if doc_err != nil { log.Fatal("goquery NewDocument err:", doc_err) } if *debug { log.Printf("tag=[%s]\n", selector) } if !*noenc { file_encoding = get_html_enc(doc) } doc.Find(selector).Map(func(i int, sel *goquery.Selection) string { output := "" switch fun { case "html": if output, err = sel.Html(); err != nil { log.Fatal("select err:", err) } output = strings.TrimSpace(output) case "ohtml": if output, err = goquery.OuterHtml(sel); err != nil { log.Fatal("select err:", err) } output = strings.TrimSpace(output) case "text": output = strings.TrimSpace(sel.Text()) case "attr": attr_list := strings.Split(*attr, ",") for _, attr_i := range attr_list { var output_i string if attr_i == "text" { // a hardcode case for convenience scrapy output_i = strings.TrimSpace(sel.Text()) } else { output_i = sel.AttrOr(strings.TrimSpace(attr_i), "-") } if output_i == "" { output_i = "-" } output += output_i + "\t" } } if !*noenc && file_encoding != "" && file_encoding != "utf8" { if output, err = iconv.ConvertString(output, file_encoding, "utf8"); err != nil { log.Fatal("encoding invalid", err) } } fmt.Println(output) return "" }) }