コード例 #1
0
ファイル: scheduler.go プロジェクト: zhaozhi406/crawler
//分发task给Fetcher
func (this *Scheduler) DispatchTasks() {
	//获取等待任务
	tasks, err := this.FetchTasks()
	if err != nil {
		log.Errorln("[DispatchTasks] fetch tasks error: ", err)
		return
	}
	if len(tasks) == 0 {
		log.Warnln("[DispatchTasks] no wait tasks yet.")
		return
	}
	//排序
	sorter := lib.CrawlTaskSorter{Now: time.Now().Unix()}
	sorter.Sort(tasks, nil)
	//post到fetchers
	picked := map[int32]bool{}
	httpClient := lib.HttpClient{}
	for _, fetcher := range this.fetchers {
		taskPacks := []types.TaskPack{}
		//挑选未分配的,且符合礼貌原则的任务
		for _, task := range tasks {
			_, ok := picked[task.Id]
			if !ok && this.politeVisitor.IsPolite(task.Domain, fetcher) {
				taskPacks = append(taskPacks, types.TaskPack{TaskId: task.Id, Domain: task.Domain, Urlpath: task.Urlpath})
				picked[task.Id] = true
				//缓存最后访问时间,实际有误差,但是实现简单
				this.politeVisitor.SetLastVisitTime(task.Domain, fetcher, time.Now().Unix())
			}
		}
		jsonBytes, err := json.Marshal(taskPacks)
		if err != nil {
			log.Errorln("make task packs error: ", err)
		} else {
			param := url.Values{}
			param.Add("tasks", string(jsonBytes))
			result, err := httpClient.Post("http://"+fetcher+this.fetcherApi["push_tasks"], param)
			if err != nil {
				log.Errorln("post task packs to fetcher:", fetcher, ", error:", err, " data:", string(jsonBytes))
			} else {
				jsonResult := types.JsonResult{}
				err = json.Unmarshal(result, &jsonResult)
				if err != nil {
					log.Errorln("json unmarshal error:", err, " data:", string(result))
				} else {
					log.Infoln("get push tasks response: ", jsonResult)
				}
			}
		}
	}
}