コード例 #1
0
ファイル: fetcher.go プロジェクト: zhaozhi406/crawler
func (this *Fetcher) fetchPage(pageStore PageStore) {
	defer this.wg.Done()

	httpClient := lib.HttpClient{}
loop:
	for {
		select {
		case taskPack := <-this.taskQueue:
			destUrl := taskPack.Domain + taskPack.Urlpath
			html, err := httpClient.Get(destUrl)
			log.Debugln("goto fetch ", destUrl)
			done := 0
			if err == nil {
				//report success to scheduler, make a log, save html
				html, err = httpClient.IconvHtml(html, "utf-8")
				done = 1
				log.Infoln("fetch '" + destUrl + "' done.")
				err = pageStore.Save(taskPack.Domain, taskPack.Urlpath, string(html))
				if err != nil {
					log.Errorln("fetcher save ", taskPack.Domain, taskPack.Urlpath, " error:", err)
				}
			} else {
				//report fail to scheduler
				log.Errorln("fetch '"+destUrl+"' failed!", err)
			}
			//向scheduler报告任务完成情况
			reportUrl := fmt.Sprintf("http://%s%s?task_id=%d&done=%d", this.scheduler_addr, this.scheduler_api["report"], taskPack.TaskId, done)
			res, err := httpClient.Get(reportUrl)
			if err != nil {
				log.Errorln("report ", reportUrl, " failed!")
			} else {
				result := types.JsonResult{}
				err = json.Unmarshal(res, &result)
				if err != nil || result.Err != 0 {
					log.Errorln("report ", reportUrl, ", get error response: ", string(res))
				}
			}
		case <-this.quitChan:
			//this.quitChan should be closed somewhere
			log.Infoln("quit fetch page...")
			break loop
		}
	}
}