// handles scraped articles // TODO: think about where this should be func handleScrapedArticle(article scraper.Article) { if err := scraper.CheckFile(article.GetData()); err != nil { log.Warn("when checking article", article.GetTitle(), "got err:", err) } if err := storeArticle(article); err != nil { log.Error("failed to write article", article.GetTitle(), ":", err) return } }
// CompareBodies will take two strings, one HTML path and one path to a // pre-parsed file and compare the two using the parse method from article. // If they are the same it returns nil, otherwise will return an error. func CompareBodies(HTMLarticle string, ExpectedArticle string, article scraper.Article) error { file, err := os.Open(HTMLarticle) // TODO: Add test file defer file.Close() if err != nil { return fmt.Errorf("error opening HTML file %s", err) } fileScanner := bufio.NewReader(file) parser := html.NewTokenizer(fileScanner) err = article.DoParse(parser) if err != nil { return fmt.Errorf("error parsing: %s", err) } fileCompare, err := os.Open(ExpectedArticle) defer fileCompare.Close() if err != nil { return fmt.Errorf("error opening body file:%s", err) } CompareFile := bufio.NewScanner(fileCompare) fullText := "" for CompareFile.Scan() { fullText += CompareFile.Text() } diffd := WriteDiff(article.GetData(), fullText) if fullText != article.GetData() { return fmt.Errorf("diff: \n%s\nExpected: \n%s\n Received: \n%s\n\n", diffd, fullText, article.GetData()) } return nil }