func (task *SchedulableArticle) Run(scheduler *scheduler.Scheduler) { // check if the task ran while we were waiting select { case result := <-task.ran: if result == ARTICLE_OK { log.Info("scraped article:", task.article.GetLink()) return } if result == ARTICLE_BAD { log.Warn("bad result for article:", task.article.GetLink(), "requeueing") } default: // nothing read } task.j.AddArticle(task.article, task.ran) // wait for the article to go off to a client res := <-task.ran if res == ARTICLE_OK { log.Info("scraped article", task.article.GetLink()) return } if res == ARTICLE_BAD { log.Warn("bad result for article:", task.article.GetLink(), "requeueing") // re-queue task.start = time.Now() task.delay = 15 // set delay to 2 here b/c prev delay was relative scheduler.Add(task) return } // once the article is at the client, wait a reasonable amount of time // if the article did not come back in the expected time, requeue it var waitTime time.Duration = 15 select { case result := <-task.ran: if result == ARTICLE_OK { log.Info("scraped article", task.article.GetLink()) return // finish this } log.Warn("got result", toString(result), "for article", task.article.GetLink(), "requeueing") // else fall through to requeue case <-time.After(waitTime * time.Second): // fall through to requeue log.Info("timing out for article", task.article.GetLink()) } task.start = time.Now() task.delay = 15 // set delay to 2 here b/c prev delay was relative scheduler.Add(task) }
func (task *SchedulableRSS) Run(scheduler *scheduler.Scheduler) { err := scraper.UpdateRSS(task.rss) if err != nil { log.Error("error updating rss stories:", err) // requeue task.start = time.Now() task.rss.GetChannel().ClearArticles() go scheduler.Add(task) return } // mark all articles as not in list for key := range task.oldArticles { task.oldArticles[key] = false } // schedule any new articles // an article is new if it wasn't in the last RSS ping delay := 60 // TODO: create legitimate task delays for i := 0; i < task.rss.GetChannel().GetNumArticles(); i++ { article := task.rss.GetChannel().GetArticle(i) if _, inOld := task.oldArticles[article.GetLink()]; !inOld { toSchedule := CreateSchedulableArticle(article, delay, task.j) delay += 600 go scheduler.Add(toSchedule) } // add or update what we found task.oldArticles[article.GetLink()] = true } // remove any articles not in the set for key, inList := range task.oldArticles { if !inList { delete(task.oldArticles, key) } } // reschedule this task if task.IsLoopable() && scheduler.IsRunning() { task.start = time.Now() task.rss.GetChannel().ClearArticles() go scheduler.Add(task) } }