func (this *MyProcesser) Process(resp *common.Response, y *common.Yield) { items := extractor.NewExtractor(). SetScopeRule(`(?s)<dt class="basicInfo-item name">.*?</dd>`). SetRules(map[string]string{ "key": `(?s)name">(.*?)</dt>`, "value": `(?s)value">(.*?)</dd>`, }). SetTrimFunc(extractor.TrimHtmlTags). Extract(resp) for _, item := range items { y.AddItem(item) } }
func (this *QuickEngineProcesser) processItems(resp *common.Response, y *common.Yield, rule _Rule) { var TrimFunc extractor.TrimFunc switch rule.ItemRule.TrimFunc { case "trim_html_tags": TrimFunc = extractor.TrimHtmlTags case "trim_blank": TrimFunc = extractor.TrimBlank } items := extractor.NewExtractor(). SetScopeRule(rule.ItemRule.ScopeRule). SetRules(rule.ItemRule.KVRule). SetTrimFunc(TrimFunc). Extract(resp) for _, item := range items { y.AddItem(item) } }
func (this *QuickEngineProcesser) processRequests(resp *common.Response, y *common.Yield, rule _Rule) { var TrimFunc extractor.TrimFunc switch rule.RequestRule.TrimFunc { case "trim_html_tags": TrimFunc = extractor.TrimHtmlTags case "trim_blank": TrimFunc = extractor.TrimBlank } items := extractor.NewExtractor(). SetScopeRule(rule.RequestRule.ScopeRule). SetRules(rule.RequestRule.KVRule). SetTrimFunc(TrimFunc). Extract(resp) for _, item := range items { for _, url := range item.GetAll() { if strings.HasPrefix(url, "http://") { y.AddRequest(common.NewRequest(url)) } else { y.AddRequest(common.NewRequest(rule.BaseUrl + url)) } } } }