func (this *LazyProcesser) Process(resp *common.Response, y *common.Yield) { y.AddItem(func() *common.Item { item := common.NewItem() item.Set("html", resp.Body) return item }()) }
func (this *MyProcesser) Process(resp *common.Response, y *common.Yield) { m := regexp.MustCompile(`(?s)<div id="ua_string">.*?</span>(.*?)</div>`).FindAllStringSubmatch(resp.Body, -1) for _, v := range m { item := common.NewItem() item.Set("user-agent", v[1]) y.AddItem(item) } }
func (this *MyProcesser) processTitle(resp *common.Response, y *common.Yield) { m := regexp.MustCompile(`(?s)<div class="channel-item">.*?<h3><a href="(.*?)">(.*?)</a>`).FindAllStringSubmatch(resp.Body, -1) for _, v := range m { item := common.NewItem() item.Set("url", v[1]) item.Set("title", v[2]) y.AddItem(item) } }
func (this *MyProcesser) Process(resp *common.Response, y *common.Yield) { y.AddItem(func() *common.Item { item := common.NewItem() item.Set("url", resp.Url) item.Set("title", func() string { m := regexp.MustCompile(`<title>(.*?)</title>`).FindStringSubmatch(resp.Body) if len(m) > 0 { return m[1] } return "" }()) return item }()) }
func (this *MyProcesser) Process(resp *common.Response, y *common.Yield) { items := extractor.NewExtractor(). SetScopeRule(`(?s)<dt class="basicInfo-item name">.*?</dd>`). SetRules(map[string]string{ "key": `(?s)name">(.*?)</dt>`, "value": `(?s)value">(.*?)</dd>`, }). SetTrimFunc(extractor.TrimHtmlTags). Extract(resp) for _, item := range items { y.AddItem(item) } }
func (this *QuickEngineProcesser) processItems(resp *common.Response, y *common.Yield, rule _Rule) { var TrimFunc extractor.TrimFunc switch rule.ItemRule.TrimFunc { case "trim_html_tags": TrimFunc = extractor.TrimHtmlTags case "trim_blank": TrimFunc = extractor.TrimBlank } items := extractor.NewExtractor(). SetScopeRule(rule.ItemRule.ScopeRule). SetRules(rule.ItemRule.KVRule). SetTrimFunc(TrimFunc). Extract(resp) for _, item := range items { y.AddItem(item) } }
func (this *QuickEngineProcesser) Process(resp *common.Response, y *common.Yield) { common.Try(func() { for _, rule := range this.config.Rules { if regexp.MustCompile(rule.UrlMatch).MatchString(resp.Url) { if rule.Succ != "" && !strings.Contains(resp.Body, rule.Succ) { log.Printf("cannot find succ string:%s", rule.Succ) break } if rule.ItemRule.ScopeRule != "" { this.processItems(resp, y, rule) } if rule.RequestRule.ScopeRule != "" { this.processRequests(resp, y, rule) } y.SetMerge(rule.Merge) break //Only use the first match } } }, func(e interface{}) { log.Printf("pannic %s\n", e) }) }
func (this *QuickEngineProcesser) processRequests(resp *common.Response, y *common.Yield, rule _Rule) { var TrimFunc extractor.TrimFunc switch rule.RequestRule.TrimFunc { case "trim_html_tags": TrimFunc = extractor.TrimHtmlTags case "trim_blank": TrimFunc = extractor.TrimBlank } items := extractor.NewExtractor(). SetScopeRule(rule.RequestRule.ScopeRule). SetRules(rule.RequestRule.KVRule). SetTrimFunc(TrimFunc). Extract(resp) for _, item := range items { for _, url := range item.GetAll() { if strings.HasPrefix(url, "http://") { y.AddRequest(common.NewRequest(url)) } else { y.AddRequest(common.NewRequest(rule.BaseUrl + url)) } } } }
func (this *MyProcesser) processNext(resp *common.Response, y *common.Yield) { m := regexp.MustCompile(`(?s)<span class="next">.*?<a href="(.*?)"`).FindStringSubmatch(resp.Body) if len(m) > 0 { y.AddRequest(common.NewRequest(this.baseUrl + m[1])) } }