func (e extractComic) setImage(a *Article, img *goquery.Selection) bool { if img.Length() == 0 { return false } img = img.First() src, ok := img.Attr("src") if !ok { return false } i := hitImage(src) if i == nil { return false } title, _ := img.Attr("title") if title == "" { title, _ = img.Attr("alt") } a.Img = i a.CleanedText = title a.addInlineArticleImageHTML(title) return true }
func parseRegions(regionTags *goquery.Selection) []string { result := make([]string, 0, regionTags.Length()) regionTags.Each(func(n int, s *goquery.Selection) { result = append(result, s.Text()) }) return result }
func findZipcode(index int, details *goquery.Selection) string { if details.Length() < index { return "" } zipcode_p := strings.Split(details.Eq(index).Text(), ":") if zipcode_p[0] == "CEP" { return zipcode_p[1] } return findZipcode(index+1, details) }
func getText(s *goquery.Selection, includeDecendents bool) string { if s.Length() == 0 { return "" } if includeDecendents { return strings.TrimSpace(s.Text()) } var buff []string for node := s.First().Nodes[0].FirstChild; node != nil; node = node.NextSibling { if node.Type == html.TextNode { buff = append(buff, node.Data) } } return strings.TrimSpace(strings.Join(buff, "")) }
func TestDownloadHtml(t *testing.T) { //return //request := request.NewRequest("http://live.sina.com.cn/zt/api/l/get/finance/globalnews1/index.htm?format=json&callback=t13975294&id=23521&pagesize=45&dire=f&dpc=1") var req *request.Request req = request.NewRequest("http://live.sina.com.cn/zt/l/v/finance/globalnews1/", "html", "", "GET", "", nil, nil, nil, nil) var dl downloader.Downloader dl = downloader.NewHttpDownloader() var p *page.Page p = dl.Download(req) var doc *goquery.Document doc = p.GetHtmlParser() //fmt.Println(doc) //body := p.GetBodyStr() //fmt.Println(body) var s *goquery.Selection s = doc.Find("body") if s.Length() < 1 { t.Error("html parse failed!") } /* doc, err := goquery.NewDocument("http://live.sina.com.cn/zt/l/v/finance/globalnews1/") if err != nil { fmt.Printf("%v",err) } s := doc.Find("meta"); fmt.Println(s.Length()) resp, err := http.Get("http://live.sina.com.cn/zt/l/v/finance/globalnews1/") if err != nil { fmt.Printf("%v",err) } defer resp.Body.Close() doc, err = goquery.NewDocumentFromReader(resp.Body) s = doc.Find("meta"); fmt.Println(s.Length()) */ }
func TestDownloadHtml(t *testing.T) { var req *context.Request req = context.NewRequest("http://live.sina.com.cn/zt/l/v/finance/globalnews1/", "html", "", "GET", "", nil, nil, nil, nil) var dl Downloader dl = NewHttpDownloader() var p *page.Page p = dl.Download(req) var doc *goquery.Document doc = p.GetHtmlParser() //fmt.Println(doc) //body := p.GetBodyStr() //fmt.Println(body) var s *goquery.Selection s = doc.Find("body") if s.Length() < 1 { t.Error("html parse failed!") } /*doc, err := goquery.NewDocument("http://live.sina.com.cn/zt/l/v/finance/globalnews1/") if err != nil { fmt.Printf("%v",err) } s := doc.Find("meta"); fmt.Println(s.Length()) resp, err := http.Get("http://live.sina.com.cn/zt/l/v/finance/globalnews1/") if err != nil { fmt.Printf("%v",err) } defer resp.Body.Close() doc, err = goquery.NewDocumentFromReader(resp.Body) s = doc.Find("meta"); fmt.Println(s.Length())*/ }
func encuentraGrupo(tabla *goquery.Selection) (grupo int) { var anterior *goquery.Selection for anterior = tabla.Prev(); anterior.Length() > 0 && grupo == 0; anterior = anterior.Prev() { if !anterior.Is("div") { log.Fatal(errors.New("No se encontró curso para la tabla")) } strongs := anterior.Find("strong") if strongs.Length() != 1 { continue } hayMatch, err := regexp.MatchString("Grupo [0-9]+", strongs.Text()) if err != nil { mataPrograma("Morí en encuentraGrupo regex", err) } if hayMatch { tokens := strings.Split(strongs.Text(), " ") grupo, err = strconv.Atoi(tokens[1]) if err != nil { mataPrograma("Morí en encuentraGrupo hayMatch", err) } } } return }
func parseName(players *goquery.Selection, position int) string { if players.Length()-1 >= position { return players.Eq(position).Text() } return "" }
// FetchDzjy 抓取数据 func FetchDzjy(date time.Time) ([]*dzjyVO, error) { formt := "http://data.eastmoney.com/dzjy/%s.html" resp, err := wget.Get(fmt.Sprintf(formt, date.Format("200601"))) if err != nil { return nil, gos.DoError(err) } doc, err := goquery.NewDocumentFromResponse(resp) if err != nil { return nil, gos.DoError(err) } var td *goquery.Selection var dzjy *dzjyVO var dateStr string var stockCode string var row db.DataRow query := db.NewQueryBuilder("stock") datalist := make([]*dzjyVO, 0) buy := "" sell := "" var priceNow float64 var price float64 var amount float64 var total float64 var length int doc.Find("#content div.list").Eq(2).Find("table tr.list_eve").Each(func(i int, tr *goquery.Selection) { td = tr.Find("td") length = td.Length() if length == 10 { dateStr = td.Eq(0).Text() stockCode = td.Eq(1).Text() } else if length == 9 { stockCode = td.Eq(0).Text() } row, _ = query.Where("code=?", stockCode).QueryOne() if row.Empty() { return } switch length { case 10: priceNow = util.ParseMoney(td.Eq(4).Text()) price = util.ParseMoney(td.Eq(5).Text()) amount = util.ParseMoney(td.Eq(6).Text()) total = util.ParseMoney(td.Eq(7).Text()) buy, err = iconv.ConvertString(td.Eq(8).Text(), "gb2312", "utf-8") if err != nil { return } sell, err = iconv.ConvertString(td.Eq(9).Text(), "gb2312", "utf-8") if err != nil { return } case 9: priceNow = util.ParseMoney(td.Eq(3).Text()) price = util.ParseMoney(td.Eq(4).Text()) amount = util.ParseMoney(td.Eq(5).Text()) total = util.ParseMoney(td.Eq(6).Text()) buy, err = iconv.ConvertString(td.Eq(7).Text(), "gb2312", "utf-8") if err != nil { return } sell, err = iconv.ConvertString(td.Eq(8).Text(), "gb2312", "utf-8") if err != nil { return } case 5: price = util.ParseMoney(td.Eq(0).Text()) amount = util.ParseMoney(td.Eq(1).Text()) total = util.ParseMoney(td.Eq(2).Text()) buy, err = iconv.ConvertString(td.Eq(3).Text(), "gb2312", "utf-8") if err != nil { return } sell, err = iconv.ConvertString(td.Eq(4).Text(), "gb2312", "utf-8") if err != nil { return } default: return } dzjy = &dzjyVO{ StockID: row.GetInt64("id"), Date: dateStr, PriceNow: priceNow, Price: price, Amount: amount, Total: total, Buy: buy, Sell: sell, } datalist = append(datalist, dzjy) }) return datalist, nil }