func (spider *spider) parseList(ctx context.Context, resp *crawl.Response) error { defer spider.c.Close() var currentTitle string resp.Find("div#unterMenu a").Each(func(_ int, s *goquery.Selection) { c, _ := s.Attr("class") switch c { case "unterMenuTitel": currentTitle = strings.ToLower(s.Text()) case "unterMenuName": ctx = metadata.NewContext(ctx, metadata.Pairs( "type", currentTitle, "title", s.Text(), )) href, _ := s.Attr("href") spider.c.Execute(ctx, &crawl.Request{ URL: strings.TrimSpace(href), Referer: resp.URL().String(), Callbacks: crawl.Callbacks("user-agents"), }) } }) close(spider.results) return nil }
func (spider *spider) parseUserAgents(ctx context.Context, resp *crawl.Response) error { md, _ := metadata.FromContext(ctx) userAgentType := md["type"][0] userAgentTitle := md["title"][0] userAgents := resp.Find(`#liste li a`).Map(crawl.NodeText) glog.Info(resp.URL().String()) glog.Infof("type=%q title=%q user-agents=%v", userAgentType, userAgentTitle, userAgents) spider.results <- &userAgent{ Type: userAgentType, Title: userAgentTitle, UserAgents: userAgents, } return nil }