Пример #1
0
func (this MyPageProcesser) Process(p *robot.Page) {
	query := p.GetHtmlParser()

	if p.GetUrlTag() == "index" {
		query.Find(`div[class="main area"] div[class="lc"] ul li a`).Each(func(i int, s *goquery.Selection) {
			url, isExsit := s.Attr("href")
			if isExsit {
				reg := regexp.MustCompile(`^do not know what is this`)
				var fmtStr string
				if rxYule.MatchString(url) {
					reg = rxYule
					fmtStr = wkSohuYule
				}

				if rxPic.MatchString(url) {
					reg = rxPic
					fmtStr = wkSohuPic
				}

				regxpArrag := reg.FindStringSubmatch(url)
				if len(regxpArrag) == 2 {
					addRequest(p, "changyan", fmt.Sprintf(fmtStr, regxpArrag[1]), "", s.Text())
				}
			}
		})
	}

	if p.GetUrlTag() == "changyan" {
		jsonMap := ChangyanJson{}
		err := json.NewDecoder(strings.NewReader(p.GetBodyStr())).Decode(&jsonMap)
		if err == nil {
			content, ok := p.GetRequest().GetMeta().(string)
			if ok {
				fmt.Println("Title:", content, " CommentCount:", jsonMap.ListData.OuterCmtSum, " ParticipationCount:", jsonMap.ListData.ParticipationSum)
			}
		}
	}
}