func (fc *feedCommand) test(c *kingpin.ParseContext) error { fc.init() url := fc.FeedURL resp, err := crawler.GetFeed(url, nil) if err != nil { return err } body, err := ioutil.ReadAll(resp.Body) resp.Body.Close() if err != nil { return err } feed, stories, err := feed.ParseFeed(url, body) if err != nil { return err } fmt.Printf("Found %d items in feed:\n", len(stories)) fmt.Printf(" Url: %s\n", feed.URL) fmt.Printf(" Title: %s\n", feed.Title) fmt.Printf(" Updated: %s\n", feed.Updated) fmt.Printf(" NextUpdate: %s\n", feed.NextUpdate) fmt.Printf(" Url: %s\n", feed.Link) for i, s := range stories { fmt.Printf("%d) %s\n", i, s.Title) fmt.Printf(" Published %s\n", s.Published) fmt.Printf(" Updated %s\n", s.Updated) fmt.Println() fmt.Printf("%s\n", s.Content) fmt.Println() fmt.Printf("Mail Message for %s:\n", s.Title) fmt.Println() m := mail.CreateMailFromItem("[email protected]", netmail.Address{Address: "[email protected]dress"}, s) fmt.Println("****** Mail Message *******") b := bytes.NewBuffer([]byte{}) m.WriteTo(b) spew.Dump(b) fmt.Println("****** ++++++++++++ *******") } return nil }
func TestFeedWatcherWithGuidsSet(t *testing.T) { t.Parallel() n, feedResp, _ := SetupTest(t, "../testdata/ars.rss") OverrideAfter(n) _, stories, _ := feed.ParseFeed(n.FeedInfo.URL, feedResp) guids := make(map[string]bool, 25) for _, i := range stories { guids[i.ID] = true } n.GUIDCache = guids go n.PollFeed() req := <-n.crawlChan req.ResponseChan <- &FeedCrawlResponse{ URI: n.FeedInfo.URL, Body: feedResp, Error: nil, HTTPResponseStatus: "200 OK", HTTPResponseStatusCode: 200, } resp := <-n.responseChan if len(resp.Items) != 0 { t.Fatalf("Expected 0 items from the feed but got %d.", len(resp.Items)) } // Second poll with an new items. n.GUIDCache = map[string]bool{} req = <-n.crawlChan req.ResponseChan <- &FeedCrawlResponse{ URI: n.FeedInfo.URL, Body: feedResp, Error: nil, HTTPResponseStatus: "200 OK", HTTPResponseStatusCode: 200, } resp = <-n.responseChan if len(resp.Items) != 25 { t.Fatalf("Expected 25 items from the feed but got %d.", len(resp.Items)) } }
// Core logic to poll a feed, find new items, add those to the database, and // send them for mail. // // Populates fields: // - Feed with information extracted from the feed // - Items with the items in the feed // - Error any errors encountered in parsing or handling the feed func (fw *FeedWatcher) updateFeed(resp *FeedCrawlResponse) error { feed, stories, err := feed.ParseFeed(resp.URI, resp.Body) if feed == nil || stories == nil || len(stories) == 0 { if err != nil { resp.Error = fmt.Errorf("Error parsing response from %s: %#v", resp.URI, err) fw.Logger.Error(resp.Error) } else { resp.Error = fmt.Errorf("no items found in %s", resp.URI) fw.Logger.Info(resp.Error) } return resp.Error } resp.Feed = feed /* * Load most recent X * guidSaveRatio Guids from Db * Filter New Stories * Send New Stories for mailing * Add sent guid to list * prune Guids back to X * guidSaveRatio */ // If we don't know about any GUIDs for this feed we haven't been // initalized yet. Check the GUIDs in the feed we just read and see if // they exist in the DB. That should be the maximum we should need to // load into memory. guidsToLoad := int(math.Ceil(float64(len(stories)) * guidCacheSize)) fw.Logger.Infof("Got %d stories from feed %s.", len(stories), fw.FeedInfo.URL) // On first pass or no stories ever seen if len(fw.GUIDCache) == 0 { var err error fw.GUIDCache, err = fw.LoadGuidsFromDb(guidsToLoad) if err != nil { resp.Error = fmt.Errorf("error getting Guids from DB: %s", err) return resp.Error } fw.Logger.Infof("Loaded %d known guids for Feed %s.", len(fw.GUIDCache), fw.FeedInfo.URL) } resp.Items = fw.filterNewItems(stories) fw.Logger.Infof("Feed %s has %d new items", feed.Title, len(resp.Items)) handledItems := 0 for _, item := range resp.Items { item.Title = fmt.Sprintf("%s: %s", fw.FeedInfo.Name, item.Title) fw.Logger.Infof("New Story: %s, sending for mail.", item.Title) err := fw.sendMail(item) if err != nil { fw.Logger.Infof("Error sending mail: '%s'. Skipping %d remaining items", err.Error(), len(resp.Items)-handledItems) resp.Error = &ErrMailDeliveryFailed{err.Error()} // An error with the mailer usually means we should just stop trying for // a bit. So skip the rest of the items. break } else { err := fw.recordGUID(item.ID) if err != nil { e := fmt.Errorf("error writing guid to db: %s", err) resp.Error = e fw.Logger.Info(e) } else { fw.Logger.Infof("Added guid %s for feed %s", item.ID, fw.FeedInfo.URL) } } handledItems++ } // Reload GUIDs to X * guidSaveRatio but only if there were new // items otherwise it would be noop. if len(resp.Items) > 0 { fw.GUIDCache, err = fw.LoadGuidsFromDb(guidsToLoad) if err != nil { e := fmt.Errorf("error getting Guids from DB: %s", err) fw.Logger.Info(e) resp.Error = e } } return resp.Error }