Esempio n. 1
0
// AddArticle adds an article to the ready queue. The article will be scraped by
// a client then sent back up to the Jefe. The chan signals back to the
// schedulable article
func (j *Jefe) AddArticle(article scraper.Article, c chan int) {
	j.mutex.Lock()
	defer j.mutex.Unlock()

	log.Info("adding article", article.GetLink(), "to ready queue")
	j.queue = append(j.queue, article)

	j.openRequests[article.GetLink()] = c
	j.openArticles[article.GetLink()] = article
}
Esempio n. 2
0
// write the provided article to the storage
func storeArticle(article scraper.Article) error {
	jsonStr, err := json.Marshal(article)
	if err != nil {
		return err
	}

	// take all spaces out of title
	// TODO: think about cleaning this up a little more
	fileName := strings.Replace(article.GetTitle(), " ", "", -1)
	path := "opinionatedData/" + fileName + ".json"

	err = ioutil.WriteFile(path, jsonStr, 0644)
	if err != nil {
		return err
	}

	log.Info("wrote article:", article.GetTitle(), "to location:", path)
	return nil
}
Esempio n. 3
0
// handles scraped articles
// TODO: think about where this should be
func handleScrapedArticle(article scraper.Article) {
	if err := scraper.CheckFile(article.GetData()); err != nil {
		log.Warn("when checking article", article.GetTitle(), "got err:", err)
	}
	if err := storeArticle(article); err != nil {
		log.Error("failed to write article", article.GetTitle(), ":", err)
		return
	}
}
Esempio n. 4
0
// CompareBodies will take two strings, one HTML path and one path to a
// pre-parsed file and compare the two using the parse method from article.
// If they are the same it returns nil, otherwise will return an error.
func CompareBodies(HTMLarticle string, ExpectedArticle string, article scraper.Article) error {
	file, err := os.Open(HTMLarticle) // TODO: Add test file
	defer file.Close()
	if err != nil {
		return fmt.Errorf("error opening HTML file %s", err)
	}

	fileScanner := bufio.NewReader(file)
	parser := html.NewTokenizer(fileScanner)

	err = article.DoParse(parser)
	if err != nil {
		return fmt.Errorf("error parsing: %s", err)
	}

	fileCompare, err := os.Open(ExpectedArticle)
	defer fileCompare.Close()
	if err != nil {
		return fmt.Errorf("error opening body file:%s", err)
	}

	CompareFile := bufio.NewScanner(fileCompare)
	fullText := ""
	for CompareFile.Scan() {
		fullText += CompareFile.Text()
	}

	diffd := WriteDiff(article.GetData(), fullText)
	if fullText != article.GetData() {

		return fmt.Errorf("diff: \n%s\nExpected: \n%s\n Received: \n%s\n\n",
			diffd, fullText, article.GetData())

	}

	return nil
}