예제 #1
0
// handles scraped articles
// TODO: think about where this should be
func handleScrapedArticle(article scraper.Article) {
	if err := scraper.CheckFile(article.GetData()); err != nil {
		log.Warn("when checking article", article.GetTitle(), "got err:", err)
	}
	if err := storeArticle(article); err != nil {
		log.Error("failed to write article", article.GetTitle(), ":", err)
		return
	}
}
예제 #2
0
// CompareBodies will take two strings, one HTML path and one path to a
// pre-parsed file and compare the two using the parse method from article.
// If they are the same it returns nil, otherwise will return an error.
func CompareBodies(HTMLarticle string, ExpectedArticle string, article scraper.Article) error {
	file, err := os.Open(HTMLarticle) // TODO: Add test file
	defer file.Close()
	if err != nil {
		return fmt.Errorf("error opening HTML file %s", err)
	}

	fileScanner := bufio.NewReader(file)
	parser := html.NewTokenizer(fileScanner)

	err = article.DoParse(parser)
	if err != nil {
		return fmt.Errorf("error parsing: %s", err)
	}

	fileCompare, err := os.Open(ExpectedArticle)
	defer fileCompare.Close()
	if err != nil {
		return fmt.Errorf("error opening body file:%s", err)
	}

	CompareFile := bufio.NewScanner(fileCompare)
	fullText := ""
	for CompareFile.Scan() {
		fullText += CompareFile.Text()
	}

	diffd := WriteDiff(article.GetData(), fullText)
	if fullText != article.GetData() {

		return fmt.Errorf("diff: \n%s\nExpected: \n%s\n Received: \n%s\n\n",
			diffd, fullText, article.GetData())

	}

	return nil
}