// AddArticle adds an article to the ready queue. The article will be scraped by // a client then sent back up to the Jefe. The chan signals back to the // schedulable article func (j *Jefe) AddArticle(article scraper.Article, c chan int) { j.mutex.Lock() defer j.mutex.Unlock() log.Info("adding article", article.GetLink(), "to ready queue") j.queue = append(j.queue, article) j.openRequests[article.GetLink()] = c j.openArticles[article.GetLink()] = article }
// write the provided article to the storage func storeArticle(article scraper.Article) error { jsonStr, err := json.Marshal(article) if err != nil { return err } // take all spaces out of title // TODO: think about cleaning this up a little more fileName := strings.Replace(article.GetTitle(), " ", "", -1) path := "opinionatedData/" + fileName + ".json" err = ioutil.WriteFile(path, jsonStr, 0644) if err != nil { return err } log.Info("wrote article:", article.GetTitle(), "to location:", path) return nil }
// handles scraped articles // TODO: think about where this should be func handleScrapedArticle(article scraper.Article) { if err := scraper.CheckFile(article.GetData()); err != nil { log.Warn("when checking article", article.GetTitle(), "got err:", err) } if err := storeArticle(article); err != nil { log.Error("failed to write article", article.GetTitle(), ":", err) return } }
// CompareBodies will take two strings, one HTML path and one path to a // pre-parsed file and compare the two using the parse method from article. // If they are the same it returns nil, otherwise will return an error. func CompareBodies(HTMLarticle string, ExpectedArticle string, article scraper.Article) error { file, err := os.Open(HTMLarticle) // TODO: Add test file defer file.Close() if err != nil { return fmt.Errorf("error opening HTML file %s", err) } fileScanner := bufio.NewReader(file) parser := html.NewTokenizer(fileScanner) err = article.DoParse(parser) if err != nil { return fmt.Errorf("error parsing: %s", err) } fileCompare, err := os.Open(ExpectedArticle) defer fileCompare.Close() if err != nil { return fmt.Errorf("error opening body file:%s", err) } CompareFile := bufio.NewScanner(fileCompare) fullText := "" for CompareFile.Scan() { fullText += CompareFile.Text() } diffd := WriteDiff(article.GetData(), fullText) if fullText != article.GetData() { return fmt.Errorf("diff: \n%s\nExpected: \n%s\n Received: \n%s\n\n", diffd, fullText, article.GetData()) } return nil }