func (v *V2exCollector) CollectPage(uri string) (ret []Entry, err error) { resp, err := Get(uri) if err != nil { return nil, v.Err("get %s error: %v", uri, err) } defer resp.Body.Close() root, err := nw.Parse(resp.Body) if err != nil { return nil, v.Err("parse html %s: %v", uri, err) } var walkError error root.Walk(nw.Css("div.cell span.item_title a", func(n *nw.Node) { id, err := strconv.Atoi(v2exPidPattern.FindStringSubmatch(n.Attr["href"])[1]) if err != nil { walkError = v.Err("no post id: %s", uri) return } ret = append(ret, &V2exEntry{ Id: id, Title: n.Text, }) })) if walkError != nil { err = walkError } return }
func (b *BilibiliCollector) CollectNewest(urlPattern string, page int) (ret []Entry, err error) { // get content url := s(urlPattern, page) resp, err := GetWithCookie(url, b.cookie) if err != nil { return nil, b.Err("get newest page %s %v", url, err) } defer resp.Body.Close() root, err := nw.Parse(resp.Body) if err != nil { return nil, b.Err("parse html %v", err) } var link, title, image string var id int var walkErr error root.Walk(nw.Css("ul.vd_list li", nw.Multi( nw.Css("a.title", func(n *nw.Node) { link = "http://www.bilibili.com" + n.Attr["href"] title = n.Text }), nw.Css("a.preview img", func(n *nw.Node) { image = n.Attr["src"] }), func(node *nw.Node) { id, err = strconv.Atoi(regexp.MustCompile(`av([0-9]+)`).FindStringSubmatch(link)[1]) if err != nil { walkErr = b.Err("link without av id %s at %s", link, url) return } ret = append(ret, &BilibiliEntry{ Id: id, Link: link, Title: title, Image: image, }) }, ))) if walkErr != nil { return nil, walkErr } return }