Exemple #1
0
func TestWarning(t *testing.T) {
	log.Init(os.Stdout, os.Stdout, os.Stdout)

	log.Warn("hello world")
	log.Warn("hello", "world")
	log.Warnf("problem: %s is: %d\n", "one", 1)
}
func (task *SchedulableArticle) Run(scheduler *scheduler.Scheduler) {
	// check if the task ran while we were waiting
	select {
	case result := <-task.ran:
		if result == ARTICLE_OK {
			log.Info("scraped article:", task.article.GetLink())
			return
		}
		if result == ARTICLE_BAD {
			log.Warn("bad result for article:", task.article.GetLink(), "requeueing")
		}
	default:
		// nothing read
	}

	task.j.AddArticle(task.article, task.ran)

	// wait for the article to go off to a client
	res := <-task.ran
	if res == ARTICLE_OK {
		log.Info("scraped article", task.article.GetLink())
		return
	}
	if res == ARTICLE_BAD {
		log.Warn("bad result for article:", task.article.GetLink(), "requeueing")
		// re-queue
		task.start = time.Now()
		task.delay = 15 // set delay to 2 here b/c prev delay was relative
		scheduler.Add(task)
		return
	}

	// once the article is at the client, wait a reasonable amount of time
	// if the article did not come back in the expected time, requeue it
	var waitTime time.Duration = 15

	select {
	case result := <-task.ran:
		if result == ARTICLE_OK {
			log.Info("scraped article", task.article.GetLink())
			return // finish this
		}

		log.Warn("got result", toString(result), "for article", task.article.GetLink(), "requeueing")
		// else fall through to requeue

	case <-time.After(waitTime * time.Second):
		// fall through to requeue
		log.Info("timing out for article", task.article.GetLink())
	}

	task.start = time.Now()
	task.delay = 15 // set delay to 2 here b/c prev delay was relative
	scheduler.Add(task)
}
Exemple #3
0
// Run runs the client in an infinite loop
func (c *Client) Run() {
	ticker := time.NewTicker(time.Duration(10) * time.Second)
	c.ignoreBad = make(map[string]struct{})
	defer c.printBad()

	for {

		// wait for the next itr
		<-ticker.C

		// go get the article
		req, err := Get(c.IP)
		if err != nil {
			log.Error(err)
			continue // go around to the next iteration
		}

		// don't reply to empty requests
		if netScraper.IsEmptyRequest(req) {
			log.Info("got empty request")
			continue
		}
		if _, contains := c.ignoreBad[req.URL]; contains {
			log.Warn("got bad article:", req.URL)
			continue
		}

		log.Info("got article", req.URL)

		// for now only use the NYT
		article := scraper.NYTArticle{}
		article.Link = req.URL

		err = scraper.ScrapeArticle(&article)
		if err != nil {
			c.ignoreBad[req.URL] = struct{}{} // add to ignore
			log.Error("could not scrape article", req.URL, ":", err)
			continue
		}

		if len(article.GetData()) == 0 {
			c.ignoreBad[req.URL] = struct{}{} // add to ignore
			log.Error("bad article body for url:", req.URL)
			continue
		}

		// send article back up
		result := netScraper.Response{URL: req.URL, Data: article.Data, Error: netScraper.ResponseOk}
		err = Post(c.IP, result)
		if err != nil {
			time.Sleep(time.Duration(10) * time.Second) // sleep incase the WIFI is just down

			err = Post(c.IP, result)
			if err != nil {
				log.Error(err)
			}
		}
	}
}
Exemple #4
0
// handles scraped articles
// TODO: think about where this should be
func handleScrapedArticle(article scraper.Article) {
	if err := scraper.CheckFile(article.GetData()); err != nil {
		log.Warn("when checking article", article.GetTitle(), "got err:", err)
	}
	if err := storeArticle(article); err != nil {
		log.Error("failed to write article", article.GetTitle(), ":", err)
		return
	}
}
Exemple #5
0
// Handle is passed to go's built in http server. It only allows GET and POST
// requests. GET requests ask for an article to scrape and POST requests
// provide the clients result. Actual processing is handed off to internal functions.
func (s *ScrapeServer) Handle() func(rw http.ResponseWriter, request *http.Request) {
	return func(rw http.ResponseWriter, request *http.Request) {

		method := request.Method
		if method == "GET" {
			s.getHandle(rw, request)

		} else if method == "POST" {
			s.postHandle(rw, request)
		} else {
			log.Warn("oh nose, unexpected HTTP method:", method)
			rw.WriteHeader(405)
		}
	}
}
Exemple #6
0
func (c Client) printBad() {
	for key, _ := range c.ignoreBad {
		log.Warn("ignored article:", key)
	}
}
Exemple #7
0
// Run the main scheduler loop.
func (scheduler *Scheduler) Run() {
	scheduler.isRunning = true

	// signals the loop to run every (cycleTime) seconds

	ticker := time.NewTicker(time.Duration(scheduler.cycleTime) * time.Second)

	for {

		didAdd := false // keep track of adds so we only sort when we need to
	AddNewTasksLoop:
		for {
			// add tasks from buffered channel to queue until all waiting tasks are added
			select {
			case s := <-scheduler.addTask:
				scheduler.queue = append(scheduler.queue, s)
				didAdd = true
			default:
				break AddNewTasksLoop
			}
		}

		// only sort if we added a new task
		// TODO: reschedule tasks properly
		if didAdd {
			By(SortLowToHigh).Sort(scheduler.queue)
		}

		i := 0 // declare outside so we can use later
		didRemove := false
		for ; i < len(scheduler.queue); i++ {
			if scheduler.queue[i].TimeRemaining() < scheduler.cycleTime {
				// run any tasks that are ready
				go scheduler.queue[i].Run(scheduler)
				didRemove = true

			} else { // no tasks to run this cycle
				break
			}

		}

		if didRemove {
			// remove unused elements
			// do it this way to make sure we avoid mem leaks
			// (something could be sitting in an unused part of the queue and not get cleared)
			copy(scheduler.queue[0:], scheduler.queue[i:])
			for j := 1; j <= i; j++ {
				scheduler.queue[len(scheduler.queue)-j] = nil
			}
			scheduler.queue = scheduler.queue[:len(scheduler.queue)-i]
		}

		select {
		case <-ticker.C:
			// wait until next time step

		case <-scheduler.quit:
			scheduler.isRunning = false
			log.Warn("Done with scheduler")
			ticker.Stop()
			return
		}
	}
}