func (this *LazyProcesser) Process(resp *common.Response, y *common.Yield) { y.AddItem(func() *common.Item { item := common.NewItem() item.Set("html", resp.Body) return item }()) }
func (this *MyProcesser) Process(resp *common.Response, y *common.Yield) { m := regexp.MustCompile(`(?s)<div id="ua_string">.*?</span>(.*?)</div>`).FindAllStringSubmatch(resp.Body, -1) for _, v := range m { item := common.NewItem() item.Set("user-agent", v[1]) y.AddItem(item) } }
func (this *MyProcesser) processTitle(resp *common.Response, y *common.Yield) { m := regexp.MustCompile(`(?s)<div class="channel-item">.*?<h3><a href="(.*?)">(.*?)</a>`).FindAllStringSubmatch(resp.Body, -1) for _, v := range m { item := common.NewItem() item.Set("url", v[1]) item.Set("title", v[2]) y.AddItem(item) } }
func (this *MyProcesser) Process(resp *common.Response, y *common.Yield) { y.AddItem(func() *common.Item { item := common.NewItem() item.Set("url", resp.Url) item.Set("title", func() string { m := regexp.MustCompile(`<title>(.*?)</title>`).FindStringSubmatch(resp.Body) if len(m) > 0 { return m[1] } return "" }()) return item }()) }
func (this *Extractor) Extract(resp *common.Response) []*common.Item { items := []*common.Item{} scopes := regexp.MustCompile(this.scopeRule).FindAllString(resp.Body, -1) for _, scope := range scopes { item := common.NewItem() for key, rule := range this.kvRule { if rule == "_URL_" { item.Set(key, resp.Url) continue } value := regexp.MustCompile(rule).FindStringSubmatch(scope)[1] if this.trimFunc != nil { item.Set(key, this.trimFunc(value)) } else { item.Set(key, value) } } items = append(items, item) } return items }