Exemplo n.º 1
0
func (this *LazyProcesser) Process(resp *common.Response, y *common.Yield) {
	y.AddItem(func() *common.Item {
		item := common.NewItem()
		item.Set("html", resp.Body)
		return item
	}())
}
func (this *MyProcesser) Process(resp *common.Response, y *common.Yield) {
	m := regexp.MustCompile(`(?s)<div id="ua_string">.*?</span>(.*?)</div>`).FindAllStringSubmatch(resp.Body, -1)
	for _, v := range m {
		item := common.NewItem()
		item.Set("user-agent", v[1])
		y.AddItem(item)
	}
}
func (this *MyProcesser) processTitle(resp *common.Response, y *common.Yield) {
	m := regexp.MustCompile(`(?s)<div class="channel-item">.*?<h3><a href="(.*?)">(.*?)</a>`).FindAllStringSubmatch(resp.Body, -1)
	for _, v := range m {
		item := common.NewItem()
		item.Set("url", v[1])
		item.Set("title", v[2])
		y.AddItem(item)
	}
}
func (this *MyProcesser) Process(resp *common.Response, y *common.Yield) {
	y.AddItem(func() *common.Item {
		item := common.NewItem()
		item.Set("url", resp.Url)
		item.Set("title", func() string {
			m := regexp.MustCompile(`<title>(.*?)</title>`).FindStringSubmatch(resp.Body)
			if len(m) > 0 {
				return m[1]
			}
			return ""
		}())
		return item
	}())
}
func (this *MyProcesser) Process(resp *common.Response, y *common.Yield) {
	items := extractor.NewExtractor().
		SetScopeRule(`(?s)<dt class="basicInfo-item name">.*?</dd>`).
		SetRules(map[string]string{
			"key":   `(?s)name">(.*?)</dt>`,
			"value": `(?s)value">(.*?)</dd>`,
		}).
		SetTrimFunc(extractor.TrimHtmlTags).
		Extract(resp)

	for _, item := range items {
		y.AddItem(item)
	}
}
Exemplo n.º 6
0
func (this *QuickEngineProcesser) processItems(resp *common.Response, y *common.Yield, rule _Rule) {
	var TrimFunc extractor.TrimFunc
	switch rule.ItemRule.TrimFunc {
	case "trim_html_tags":
		TrimFunc = extractor.TrimHtmlTags
	case "trim_blank":
		TrimFunc = extractor.TrimBlank
	}

	items := extractor.NewExtractor().
		SetScopeRule(rule.ItemRule.ScopeRule).
		SetRules(rule.ItemRule.KVRule).
		SetTrimFunc(TrimFunc).
		Extract(resp)
	for _, item := range items {
		y.AddItem(item)
	}
}
Exemplo n.º 7
0
func (this *QuickEngineProcesser) Process(resp *common.Response, y *common.Yield) {
	common.Try(func() {
		for _, rule := range this.config.Rules {
			if regexp.MustCompile(rule.UrlMatch).MatchString(resp.Url) {
				if rule.Succ != "" && !strings.Contains(resp.Body, rule.Succ) {
					log.Printf("cannot find succ string:%s", rule.Succ)
					break
				}
				if rule.ItemRule.ScopeRule != "" {
					this.processItems(resp, y, rule)
				}
				if rule.RequestRule.ScopeRule != "" {
					this.processRequests(resp, y, rule)
				}
				y.SetMerge(rule.Merge)
				break //Only use the first match
			}
		}
	}, func(e interface{}) {
		log.Printf("pannic %s\n", e)
	})
}
Exemplo n.º 8
0
func (this *QuickEngineProcesser) processRequests(resp *common.Response, y *common.Yield, rule _Rule) {
	var TrimFunc extractor.TrimFunc
	switch rule.RequestRule.TrimFunc {
	case "trim_html_tags":
		TrimFunc = extractor.TrimHtmlTags
	case "trim_blank":
		TrimFunc = extractor.TrimBlank
	}

	items := extractor.NewExtractor().
		SetScopeRule(rule.RequestRule.ScopeRule).
		SetRules(rule.RequestRule.KVRule).
		SetTrimFunc(TrimFunc).
		Extract(resp)
	for _, item := range items {
		for _, url := range item.GetAll() {
			if strings.HasPrefix(url, "http://") {
				y.AddRequest(common.NewRequest(url))
			} else {
				y.AddRequest(common.NewRequest(rule.BaseUrl + url))
			}
		}
	}
}
func (this *MyProcesser) processNext(resp *common.Response, y *common.Yield) {
	m := regexp.MustCompile(`(?s)<span class="next">.*?<a href="(.*?)"`).FindStringSubmatch(resp.Body)
	if len(m) > 0 {
		y.AddRequest(common.NewRequest(this.baseUrl + m[1]))
	}
}