func (this *LazyProcesser) Process(resp *common.Response, y *common.Yield) {
	y.AddItem(func() *common.Item {
		item := common.NewItem()
		item.Set("html", resp.Body)
		return item
	}())
}
func (this *MyProcesser) Process(resp *common.Response, y *common.Yield) {
	m := regexp.MustCompile(`(?s)<div id="ua_string">.*?</span>(.*?)</div>`).FindAllStringSubmatch(resp.Body, -1)
	for _, v := range m {
		item := common.NewItem()
		item.Set("user-agent", v[1])
		y.AddItem(item)
	}
}
func (this *MyProcesser) processTitle(resp *common.Response, y *common.Yield) {
	m := regexp.MustCompile(`(?s)<div class="channel-item">.*?<h3><a href="(.*?)">(.*?)</a>`).FindAllStringSubmatch(resp.Body, -1)
	for _, v := range m {
		item := common.NewItem()
		item.Set("url", v[1])
		item.Set("title", v[2])
		y.AddItem(item)
	}
}
func (this *MyProcesser) Process(resp *common.Response, y *common.Yield) {
	y.AddItem(func() *common.Item {
		item := common.NewItem()
		item.Set("url", resp.Url)
		item.Set("title", func() string {
			m := regexp.MustCompile(`<title>(.*?)</title>`).FindStringSubmatch(resp.Body)
			if len(m) > 0 {
				return m[1]
			}
			return ""
		}())
		return item
	}())
}
Example #5
0
func (this *Extractor) Extract(resp *common.Response) []*common.Item {
	items := []*common.Item{}
	scopes := regexp.MustCompile(this.scopeRule).FindAllString(resp.Body, -1)
	for _, scope := range scopes {
		item := common.NewItem()
		for key, rule := range this.kvRule {
			if rule == "_URL_" {
				item.Set(key, resp.Url)
				continue
			}
			value := regexp.MustCompile(rule).FindStringSubmatch(scope)[1]
			if this.trimFunc != nil {
				item.Set(key, this.trimFunc(value))
			} else {
				item.Set(key, value)
			}
		}
		items = append(items, item)
	}
	return items
}