Exemple #1
0
func test(config map[string]map[string]string) {
	db, _ := sqlx.Connect("mysql", config["scheduler"]["dsn"])
	log.Infoln("db stats: ", db.Stats())

	taskDao := dao.InitTaskDao(db)
	crawlRules, _ := taskDao.GetWaitRules()
	crawlTasks := []types.CrawlTask{}
	for _, rule := range crawlRules {
		log.Infoln(rule.Domain, rule.Urlpath)
		crawlTasks = append(crawlTasks, taskDao.ConvertRuleToTask(rule))
	}

	log.Infoln("get task: ", len(crawlTasks))

	num, results, err := taskDao.AddNewTasks(crawlTasks)

	log.Infoln("add task: ", num, results, err)

	affectedRows, _ := taskDao.UpdateRules(crawlRules, results)

	log.Infoln("update rules: ", affectedRows)

	waitingTasks, _ := taskDao.GetWaitingTasks()

	for _, task := range waitingTasks {
		log.Infoln(task.Domain, task.Urlpath)
	}

	affectedRows, _ = taskDao.SetTasksStatus(waitingTasks, dao.TASK_FINISH)
	log.Infoln("set task status: ", affectedRows)

}
Exemple #2
0
func InitScheduler(db *sqlx.DB, config map[string]string) *Scheduler {
	taskDao := dao.InitTaskDao(db)
	seconds, _ := strconv.Atoi(config["fetch_rules_period"])
	fetchRulesPeriod := time.Duration(seconds) * time.Second
	seconds, _ = strconv.Atoi(config["fetch_tasks_period"])
	fetchTasksPeriod := time.Duration(seconds) * time.Second
	listenAddr := config["listen_addr"]
	fetchers := strings.Split(strings.Replace(config["fetchers"], " ", "", -1), ",")
	fetcherApi := map[string]string{}
	json.Unmarshal([]byte(config["fetcher_api"]), &fetcherApi)
	minHostVisitInterval, _ := strconv.Atoi(config["min_host_visit_interval"])
	redisAddr := config["redis_addr"]
	redisPoolSize, _ := strconv.Atoi(config["redis_pool_size"])
	redisHeartbeat, _ := strconv.Atoi(config["redis_heartbeat"])

	pool, err := pool.New("tcp", redisAddr, redisPoolSize)
	if err != nil {
		log.Errorln("init redis pool error: ", err)
		return nil
	}
	politeVisitor := InitPoliteVisitor(pool, int64(minHostVisitInterval))

	quitChan := make(chan bool, 1)

	return &Scheduler{
		fetchRulesPeriod: fetchRulesPeriod,
		fetchTasksPeriod: fetchTasksPeriod,
		listenAddr:       listenAddr,
		db:               db,
		taskDao:          taskDao,
		fetchers:         fetchers,
		fetcherApi:       fetcherApi,
		politeVisitor:    politeVisitor,
		redisPool:        pool,
		redisPoolSize:    redisPoolSize,
		redisHeartbeat:   redisHeartbeat,
		quitChan:         quitChan}
}