func (task *SchedulableArticle) Run(scheduler *scheduler.Scheduler) {
	// check if the task ran while we were waiting
	select {
	case result := <-task.ran:
		if result == ARTICLE_OK {
			log.Info("scraped article:", task.article.GetLink())
			return
		}
		if result == ARTICLE_BAD {
			log.Warn("bad result for article:", task.article.GetLink(), "requeueing")
		}
	default:
		// nothing read
	}

	task.j.AddArticle(task.article, task.ran)

	// wait for the article to go off to a client
	res := <-task.ran
	if res == ARTICLE_OK {
		log.Info("scraped article", task.article.GetLink())
		return
	}
	if res == ARTICLE_BAD {
		log.Warn("bad result for article:", task.article.GetLink(), "requeueing")
		// re-queue
		task.start = time.Now()
		task.delay = 15 // set delay to 2 here b/c prev delay was relative
		scheduler.Add(task)
		return
	}

	// once the article is at the client, wait a reasonable amount of time
	// if the article did not come back in the expected time, requeue it
	var waitTime time.Duration = 15

	select {
	case result := <-task.ran:
		if result == ARTICLE_OK {
			log.Info("scraped article", task.article.GetLink())
			return // finish this
		}

		log.Warn("got result", toString(result), "for article", task.article.GetLink(), "requeueing")
		// else fall through to requeue

	case <-time.After(waitTime * time.Second):
		// fall through to requeue
		log.Info("timing out for article", task.article.GetLink())
	}

	task.start = time.Now()
	task.delay = 15 // set delay to 2 here b/c prev delay was relative
	scheduler.Add(task)
}
Exemplo n.º 2
0
func (task *SchedulableRSS) Run(scheduler *scheduler.Scheduler) {

	err := scraper.UpdateRSS(task.rss)
	if err != nil {
		log.Error("error updating rss stories:", err)
		// requeue
		task.start = time.Now()
		task.rss.GetChannel().ClearArticles()
		go scheduler.Add(task)
		return
	}

	// mark all articles as not in list
	for key := range task.oldArticles {
		task.oldArticles[key] = false
	}

	// schedule any new articles
	// an article is new if it wasn't in the last RSS ping
	delay := 60 // TODO: create legitimate task delays
	for i := 0; i < task.rss.GetChannel().GetNumArticles(); i++ {
		article := task.rss.GetChannel().GetArticle(i)

		if _, inOld := task.oldArticles[article.GetLink()]; !inOld {
			toSchedule := CreateSchedulableArticle(article, delay, task.j)
			delay += 600
			go scheduler.Add(toSchedule)
		}

		// add or update what we found
		task.oldArticles[article.GetLink()] = true
	}

	// remove any articles not in the set
	for key, inList := range task.oldArticles {
		if !inList {
			delete(task.oldArticles, key)
		}
	}

	// reschedule this task
	if task.IsLoopable() && scheduler.IsRunning() {
		task.start = time.Now()
		task.rss.GetChannel().ClearArticles()
		go scheduler.Add(task)
	}
}