func (b *BilibiliCollector) CollectTimeline(page int) (ret []Entry, err error) { // get content url := fmt.Sprintf("http://www.bilibili.com/account/dynamic/dyn-%d", page) data, err := GetBytesWithCookie(url, b.cookie) if err != nil { return nil, b.Err("get timeline %s %v", url, err) } if bytes.Contains(data, []byte(`document.write("请先登录!");`)) { return nil, bilibiliLoginError } root, err := nw.ParseBytes(data) if err != nil { return nil, err } var image, msgType, link, title, desc string var id int var walkErr error root.Walk(nw.Css("li", nw.Multi( nw.Css("img.preview", func(n *nw.Node) { image = n.Attr["src"] }), nw.Css("div.t", func(n *nw.Node) { msgType = n.Text }), nw.Css("a.vt", func(n *nw.Node) { title = n.Text link = n.Attr["href"] if !strings.HasPrefix(link, "http") { link = "http://www.bilibili.com" + link } }), nw.Css("div.content", func(n *nw.Node) { desc = strings.TrimSpace(n.Text) }), func(node *nw.Node) { id, err = strconv.Atoi(regexp.MustCompile(`av([0-9]+)`).FindStringSubmatch(link)[1]) if err != nil { walkErr = b.Err("link without av id %s at %s", link, url) return } ret = append(ret, &BilibiliEntry{ Id: id, Link: link, Title: title, Image: image, Description: desc, }) }, ))) if walkErr != nil { return nil, walkErr } return }
func (v *V2exCollector) CollectPage(uri string) (ret []Entry, err error) { resp, err := Get(uri) if err != nil { return nil, v.Err("get %s error: %v", uri, err) } defer resp.Body.Close() root, err := nw.Parse(resp.Body) if err != nil { return nil, v.Err("parse html %s: %v", uri, err) } var walkError error root.Walk(nw.Css("div.cell span.item_title a", func(n *nw.Node) { id, err := strconv.Atoi(v2exPidPattern.FindStringSubmatch(n.Attr["href"])[1]) if err != nil { walkError = v.Err("no post id: %s", uri) return } ret = append(ret, &V2exEntry{ Id: id, Title: n.Text, }) })) if walkError != nil { err = walkError } return }
func (b *BilibiliCollector) CollectNewest(urlPattern string, page int) (ret []Entry, err error) { // get content url := s(urlPattern, page) resp, err := GetWithCookie(url, b.cookie) if err != nil { return nil, b.Err("get newest page %s %v", url, err) } defer resp.Body.Close() root, err := nw.Parse(resp.Body) if err != nil { return nil, b.Err("parse html %v", err) } var link, title, image string var id int var walkErr error root.Walk(nw.Css("ul.vd_list li", nw.Multi( nw.Css("a.title", func(n *nw.Node) { link = "http://www.bilibili.com" + n.Attr["href"] title = n.Text }), nw.Css("a.preview img", func(n *nw.Node) { image = n.Attr["src"] }), func(node *nw.Node) { id, err = strconv.Atoi(regexp.MustCompile(`av([0-9]+)`).FindStringSubmatch(link)[1]) if err != nil { walkErr = b.Err("link without av id %s at %s", link, url) return } ret = append(ret, &BilibiliEntry{ Id: id, Link: link, Title: title, Image: image, }) }, ))) if walkErr != nil { return nil, walkErr } return }