func (this *MyProcesser) Process(resp *common.Response, y *common.Yield) {
	items := extractor.NewExtractor().
		SetScopeRule(`(?s)<dt class="basicInfo-item name">.*?</dd>`).
		SetRules(map[string]string{
			"key":   `(?s)name">(.*?)</dt>`,
			"value": `(?s)value">(.*?)</dd>`,
		}).
		SetTrimFunc(extractor.TrimHtmlTags).
		Extract(resp)

	for _, item := range items {
		y.AddItem(item)
	}
}
Ejemplo n.º 2
0
func (this *QuickEngineProcesser) processItems(resp *common.Response, y *common.Yield, rule _Rule) {
	var TrimFunc extractor.TrimFunc
	switch rule.ItemRule.TrimFunc {
	case "trim_html_tags":
		TrimFunc = extractor.TrimHtmlTags
	case "trim_blank":
		TrimFunc = extractor.TrimBlank
	}

	items := extractor.NewExtractor().
		SetScopeRule(rule.ItemRule.ScopeRule).
		SetRules(rule.ItemRule.KVRule).
		SetTrimFunc(TrimFunc).
		Extract(resp)
	for _, item := range items {
		y.AddItem(item)
	}
}
Ejemplo n.º 3
0
func (this *QuickEngineProcesser) processRequests(resp *common.Response, y *common.Yield, rule _Rule) {
	var TrimFunc extractor.TrimFunc
	switch rule.RequestRule.TrimFunc {
	case "trim_html_tags":
		TrimFunc = extractor.TrimHtmlTags
	case "trim_blank":
		TrimFunc = extractor.TrimBlank
	}

	items := extractor.NewExtractor().
		SetScopeRule(rule.RequestRule.ScopeRule).
		SetRules(rule.RequestRule.KVRule).
		SetTrimFunc(TrimFunc).
		Extract(resp)
	for _, item := range items {
		for _, url := range item.GetAll() {
			if strings.HasPrefix(url, "http://") {
				y.AddRequest(common.NewRequest(url))
			} else {
				y.AddRequest(common.NewRequest(rule.BaseUrl + url))
			}
		}
	}
}