Example #1
0
func (this *Spider) pageProcess(req *page.Request) {
	var p *page.Page
	//下载页面
	for i := 0; i < 3; i++ {
		p = this.m_downLoader.DownLoad(req)
		if p.IsSucc() {
			break
		}
		time.Sleep(time.Microsecond * 1000)
	}

	if !p.IsSucc() {
		this.finishForReqProcesser(req.GetUrl())
		return
	}

	//分析页面内容
	this.m_pageProcesser.Process(p)

	//获取新的链接
	if p.CountNewUrls() > 0 {
		newUrls := p.GetNewUrls()
		for tmpUrl, tmpUrlTag := range newUrls {
			this.AddUrl(tmpUrl, "html", tmpUrlTag)
		}
	}

	this.finishForReqProcesser(req.GetUrl())

	//输出
	for _, tmpOut := range this.m_outputs {
		tmpOut.Process(p.GetPageItemsList(), p.GetRequest().GetUrl())
	}

}