Esempio n. 1
0
func (fc *feedCommand) test(c *kingpin.ParseContext) error {
	fc.init()
	url := fc.FeedURL

	resp, err := crawler.GetFeed(url, nil)
	if err != nil {
		return err
	}
	body, err := ioutil.ReadAll(resp.Body)
	resp.Body.Close()
	if err != nil {
		return err
	}

	feed, stories, err := feed.ParseFeed(url, body)
	if err != nil {
		return err
	}

	fmt.Printf("Found %d items in feed:\n", len(stories))
	fmt.Printf("  Url: %s\n", feed.URL)
	fmt.Printf("  Title: %s\n", feed.Title)
	fmt.Printf("  Updated: %s\n", feed.Updated)
	fmt.Printf("  NextUpdate: %s\n", feed.NextUpdate)
	fmt.Printf("  Url: %s\n", feed.Link)
	for i, s := range stories {
		fmt.Printf("%d)  %s\n", i, s.Title)
		fmt.Printf("  Published  %s\n", s.Published)
		fmt.Printf("  Updated  %s\n", s.Updated)
		fmt.Println()
		fmt.Printf("%s\n", s.Content)
		fmt.Println()

		fmt.Printf("Mail Message for %s:\n", s.Title)
		fmt.Println()
		m := mail.CreateMailFromItem("From@Address", netmail.Address{Address: "To@Address"}, s)
		fmt.Println("****** Mail Message *******")
		b := bytes.NewBuffer([]byte{})
		m.WriteTo(b)
		spew.Dump(b)
		fmt.Println("****** ++++++++++++ *******")
	}
	return nil
}
Esempio n. 2
0
func TestFeedWatcherWithGuidsSet(t *testing.T) {
	t.Parallel()
	n, feedResp, _ := SetupTest(t, "../testdata/ars.rss")
	OverrideAfter(n)

	_, stories, _ := feed.ParseFeed(n.FeedInfo.URL, feedResp)
	guids := make(map[string]bool, 25)
	for _, i := range stories {
		guids[i.ID] = true
	}
	n.GUIDCache = guids
	go n.PollFeed()
	req := <-n.crawlChan

	req.ResponseChan <- &FeedCrawlResponse{
		URI:                    n.FeedInfo.URL,
		Body:                   feedResp,
		Error:                  nil,
		HTTPResponseStatus:     "200 OK",
		HTTPResponseStatusCode: 200,
	}
	resp := <-n.responseChan
	if len(resp.Items) != 0 {
		t.Fatalf("Expected 0 items from the feed but got %d.", len(resp.Items))
	}
	// Second poll with an new items.
	n.GUIDCache = map[string]bool{}
	req = <-n.crawlChan

	req.ResponseChan <- &FeedCrawlResponse{
		URI:                    n.FeedInfo.URL,
		Body:                   feedResp,
		Error:                  nil,
		HTTPResponseStatus:     "200 OK",
		HTTPResponseStatusCode: 200,
	}
	resp = <-n.responseChan
	if len(resp.Items) != 25 {
		t.Fatalf("Expected 25 items from the feed but got %d.", len(resp.Items))
	}
}
Esempio n. 3
0
// Core logic to poll a feed, find new items, add those to the database, and
// send them for mail.
//
// Populates fields:
// - Feed with information extracted from the feed
// - Items with the items in the feed
// - Error any errors encountered in parsing or handling the feed
func (fw *FeedWatcher) updateFeed(resp *FeedCrawlResponse) error {
	feed, stories, err := feed.ParseFeed(resp.URI, resp.Body)

	if feed == nil || stories == nil || len(stories) == 0 {
		if err != nil {
			resp.Error = fmt.Errorf("Error parsing response from %s: %#v", resp.URI, err)
			fw.Logger.Error(resp.Error)
		} else {
			resp.Error = fmt.Errorf("no items found in %s", resp.URI)
			fw.Logger.Info(resp.Error)
		}
		return resp.Error
	}

	resp.Feed = feed
	/*
	 * Load most recent X * guidSaveRatio Guids from Db
	 * Filter New Stories
	 * Send New Stories for mailing
	 *  Add sent guid to list
	 * prune Guids back to X * guidSaveRatio
	 */

	// If we don't know about any GUIDs for this feed we haven't been
	// initalized yet.  Check the GUIDs in the feed we just read and see if
	// they exist in the DB.  That should be the maximum we should need to
	// load into memory.

	guidsToLoad := int(math.Ceil(float64(len(stories)) * guidCacheSize))

	fw.Logger.Infof("Got %d stories from feed %s.", len(stories), fw.FeedInfo.URL)

	// On first pass or no stories ever seen
	if len(fw.GUIDCache) == 0 {
		var err error
		fw.GUIDCache, err = fw.LoadGuidsFromDb(guidsToLoad)
		if err != nil {
			resp.Error = fmt.Errorf("error getting Guids from DB: %s", err)
			return resp.Error
		}
		fw.Logger.Infof("Loaded %d known guids for Feed %s.", len(fw.GUIDCache), fw.FeedInfo.URL)
	}

	resp.Items = fw.filterNewItems(stories)
	fw.Logger.Infof("Feed %s has %d new items", feed.Title, len(resp.Items))

	handledItems := 0
	for _, item := range resp.Items {
		item.Title = fmt.Sprintf("%s: %s", fw.FeedInfo.Name, item.Title)
		fw.Logger.Infof("New Story: %s, sending for mail.", item.Title)
		err := fw.sendMail(item)
		if err != nil {
			fw.Logger.Infof("Error sending mail: '%s'.  Skipping %d remaining items", err.Error(), len(resp.Items)-handledItems)
			resp.Error = &ErrMailDeliveryFailed{err.Error()}
			// An error with the mailer usually means we should just stop trying for
			// a bit. So skip the rest of the items.
			break
		} else {
			err := fw.recordGUID(item.ID)
			if err != nil {
				e := fmt.Errorf("error writing guid to db: %s", err)
				resp.Error = e
				fw.Logger.Info(e)
			} else {
				fw.Logger.Infof("Added guid %s for feed %s", item.ID, fw.FeedInfo.URL)
			}
		}
		handledItems++
	}

	// Reload GUIDs to X * guidSaveRatio but only if there were new
	// items otherwise it would be noop.
	if len(resp.Items) > 0 {
		fw.GUIDCache, err = fw.LoadGuidsFromDb(guidsToLoad)
		if err != nil {
			e := fmt.Errorf("error getting Guids from DB: %s", err)
			fw.Logger.Info(e)
			resp.Error = e
		}
	}
	return resp.Error
}