Пример #1
0
func (spider *spider) parseList(ctx context.Context, resp *crawl.Response) error {
	defer spider.c.Close()

	var currentTitle string

	resp.Find("div#unterMenu a").Each(func(_ int, s *goquery.Selection) {
		c, _ := s.Attr("class")
		switch c {
		case "unterMenuTitel":
			currentTitle = strings.ToLower(s.Text())
		case "unterMenuName":
			ctx = metadata.NewContext(ctx, metadata.Pairs(
				"type", currentTitle,
				"title", s.Text(),
			))
			href, _ := s.Attr("href")
			spider.c.Execute(ctx, &crawl.Request{
				URL:       strings.TrimSpace(href),
				Referer:   resp.URL().String(),
				Callbacks: crawl.Callbacks("user-agents"),
			})
		}
	})
	close(spider.results)
	return nil
}
Пример #2
0
func (spider *spider) parseUserAgents(ctx context.Context, resp *crawl.Response) error {
	md, _ := metadata.FromContext(ctx)

	userAgentType := md["type"][0]
	userAgentTitle := md["title"][0]

	userAgents := resp.Find(`#liste li a`).Map(crawl.NodeText)

	glog.Info(resp.URL().String())
	glog.Infof("type=%q title=%q user-agents=%v", userAgentType, userAgentTitle, userAgents)

	spider.results <- &userAgent{
		Type:       userAgentType,
		Title:      userAgentTitle,
		UserAgents: userAgents,
	}

	return nil
}