func TestWarning(t *testing.T) { log.Init(os.Stdout, os.Stdout, os.Stdout) log.Warn("hello world") log.Warn("hello", "world") log.Warnf("problem: %s is: %d\n", "one", 1) }
func (task *SchedulableArticle) Run(scheduler *scheduler.Scheduler) { // check if the task ran while we were waiting select { case result := <-task.ran: if result == ARTICLE_OK { log.Info("scraped article:", task.article.GetLink()) return } if result == ARTICLE_BAD { log.Warn("bad result for article:", task.article.GetLink(), "requeueing") } default: // nothing read } task.j.AddArticle(task.article, task.ran) // wait for the article to go off to a client res := <-task.ran if res == ARTICLE_OK { log.Info("scraped article", task.article.GetLink()) return } if res == ARTICLE_BAD { log.Warn("bad result for article:", task.article.GetLink(), "requeueing") // re-queue task.start = time.Now() task.delay = 15 // set delay to 2 here b/c prev delay was relative scheduler.Add(task) return } // once the article is at the client, wait a reasonable amount of time // if the article did not come back in the expected time, requeue it var waitTime time.Duration = 15 select { case result := <-task.ran: if result == ARTICLE_OK { log.Info("scraped article", task.article.GetLink()) return // finish this } log.Warn("got result", toString(result), "for article", task.article.GetLink(), "requeueing") // else fall through to requeue case <-time.After(waitTime * time.Second): // fall through to requeue log.Info("timing out for article", task.article.GetLink()) } task.start = time.Now() task.delay = 15 // set delay to 2 here b/c prev delay was relative scheduler.Add(task) }
// Run runs the client in an infinite loop func (c *Client) Run() { ticker := time.NewTicker(time.Duration(10) * time.Second) c.ignoreBad = make(map[string]struct{}) defer c.printBad() for { // wait for the next itr <-ticker.C // go get the article req, err := Get(c.IP) if err != nil { log.Error(err) continue // go around to the next iteration } // don't reply to empty requests if netScraper.IsEmptyRequest(req) { log.Info("got empty request") continue } if _, contains := c.ignoreBad[req.URL]; contains { log.Warn("got bad article:", req.URL) continue } log.Info("got article", req.URL) // for now only use the NYT article := scraper.NYTArticle{} article.Link = req.URL err = scraper.ScrapeArticle(&article) if err != nil { c.ignoreBad[req.URL] = struct{}{} // add to ignore log.Error("could not scrape article", req.URL, ":", err) continue } if len(article.GetData()) == 0 { c.ignoreBad[req.URL] = struct{}{} // add to ignore log.Error("bad article body for url:", req.URL) continue } // send article back up result := netScraper.Response{URL: req.URL, Data: article.Data, Error: netScraper.ResponseOk} err = Post(c.IP, result) if err != nil { time.Sleep(time.Duration(10) * time.Second) // sleep incase the WIFI is just down err = Post(c.IP, result) if err != nil { log.Error(err) } } } }
// handles scraped articles // TODO: think about where this should be func handleScrapedArticle(article scraper.Article) { if err := scraper.CheckFile(article.GetData()); err != nil { log.Warn("when checking article", article.GetTitle(), "got err:", err) } if err := storeArticle(article); err != nil { log.Error("failed to write article", article.GetTitle(), ":", err) return } }
// Handle is passed to go's built in http server. It only allows GET and POST // requests. GET requests ask for an article to scrape and POST requests // provide the clients result. Actual processing is handed off to internal functions. func (s *ScrapeServer) Handle() func(rw http.ResponseWriter, request *http.Request) { return func(rw http.ResponseWriter, request *http.Request) { method := request.Method if method == "GET" { s.getHandle(rw, request) } else if method == "POST" { s.postHandle(rw, request) } else { log.Warn("oh nose, unexpected HTTP method:", method) rw.WriteHeader(405) } } }
func (c Client) printBad() { for key, _ := range c.ignoreBad { log.Warn("ignored article:", key) } }
// Run the main scheduler loop. func (scheduler *Scheduler) Run() { scheduler.isRunning = true // signals the loop to run every (cycleTime) seconds ticker := time.NewTicker(time.Duration(scheduler.cycleTime) * time.Second) for { didAdd := false // keep track of adds so we only sort when we need to AddNewTasksLoop: for { // add tasks from buffered channel to queue until all waiting tasks are added select { case s := <-scheduler.addTask: scheduler.queue = append(scheduler.queue, s) didAdd = true default: break AddNewTasksLoop } } // only sort if we added a new task // TODO: reschedule tasks properly if didAdd { By(SortLowToHigh).Sort(scheduler.queue) } i := 0 // declare outside so we can use later didRemove := false for ; i < len(scheduler.queue); i++ { if scheduler.queue[i].TimeRemaining() < scheduler.cycleTime { // run any tasks that are ready go scheduler.queue[i].Run(scheduler) didRemove = true } else { // no tasks to run this cycle break } } if didRemove { // remove unused elements // do it this way to make sure we avoid mem leaks // (something could be sitting in an unused part of the queue and not get cleared) copy(scheduler.queue[0:], scheduler.queue[i:]) for j := 1; j <= i; j++ { scheduler.queue[len(scheduler.queue)-j] = nil } scheduler.queue = scheduler.queue[:len(scheduler.queue)-i] } select { case <-ticker.C: // wait until next time step case <-scheduler.quit: scheduler.isRunning = false log.Warn("Done with scheduler") ticker.Stop() return } } }