func RssMasterPollFeeds(con *riak.Client, InputCh chan<- url.URL, OutputCh <-chan FeedError) { bucket, err := con.NewBucket("feeds") if err != nil { log.Println("Failed to get feed bucket:", err) } // -62135596800 is Go's zero time according to Unix's time format. This is what empty feeds have for their check time. // Nothing should appear before that. keys_to_poll, err := bucket.IndexQueryRange(NextCheckIndexName, "-62135596800", strconv.FormatInt(time.Now().Unix(), 10)) var errors []error valid_keys := 0 for _, key := range keys_to_poll { var loadFeed Feed if err := con.LoadModel(key, &loadFeed); err != nil { errors = append(errors, err) } else { log.Println(loadFeed.Url) valid_keys++ go func(Url url.URL, inputCh chan<- url.URL) { inputCh <- Url }(loadFeed.Url, InputCh) } } for i := 0; i < valid_keys; i++ { if err := <-OutputCh; err.Err != nil { errors = append(errors, err) } } if len(errors) != 0 { log.Println(MultiError(errors)) } }
func mustCreateEmptyItemAt(t *testing.T, con *riak.Client, itemKey ItemKey) { itemModel := FeedItem{} if err := con.LoadModel(itemKey.GetRiakKey(), &itemModel); err != riak.NotFound { t.Fatalf("Failed to preload item to create an empty item at %s (%s)", itemKey.GetRiakKey(), err) } else if err = itemModel.Save(); err != nil { t.Fatalf("Failed to save an empty item at %s (%s)", itemKey.GetRiakKey(), err) } }
func RssMasterHandleAddRequest(con *riak.Client, Url url.URL) error { feedModel := &Feed{Url: Url} if err := con.LoadModel(feedModel.UrlKey(), feedModel); err != nil && err != riak.NotFound { return err } else if err == nil { return nil } else { // Implicitly err == riak.NotFound feedModel.Indexes()[NextCheckIndexName] = strconv.FormatInt(time.Time{}.Unix(), 10) if err = feedModel.Save(); err != nil { return err } } return nil }
func CreateFeed(t *testing.T, con *riak.Client, Url *url.URL) *Feed { feedModel := &Feed{Url: *Url} if err := con.LoadModel(feedModel.UrlKey(), feedModel); err != nil && err != riak.NotFound { t.Fatalf("Failed to initialize feed model (%s)!", err) } else { modelElement := feedModel.Model *feedModel = Feed{Url: *Url} feedModel.Model = modelElement if err = feedModel.Save(); err != nil { t.Fatalf("Failed to store feed model (%s)!", err) } } return feedModel }
func InsertItem(con *riak.Client, itemKey ItemKey, item ParsedFeedItem) error { itemModel := FeedItem{ Title: item.Title, Author: item.Author, Content: item.Content, Url: item.Url, PubDate: item.PubDate, } if err := con.LoadModel(itemKey.GetRiakKey(), &itemModel); err != riak.NotFound { return err } else if err = itemModel.Save(); err != nil { return err } return nil }
func checkAllItemsDeleted(t *testing.T, itemKeyList ItemKeyList, con *riak.Client) bool { ch := make(chan bool) for _, itemKey := range itemKeyList { go func(itemKey ItemKey, ch chan<- bool) { modelItem := FeedItem{} if err := con.LoadModel(itemKey.GetRiakKey(), &modelItem); err == riak.NotFound { ch <- false } else { ch <- true } }(itemKey, ch) } problems := 0 for _, _ = range itemKeyList { found := <-ch if found { problems++ } } return problems == 0 }
func compareParsedToFinalFeed(t *testing.T, data *ParsedFeedData, model *Feed, con *riak.Client) bool { // Compare basics: if data.Title != model.Title { t.Errorf("Feed title didn't match '%s' vs '%s'!", data.Title, model.Title) return false } if !data.NextCheckTime.Equal(model.NextCheck) { t.Errorf("Next time to check feed doesn't match %#v vs %#v!", data.NextCheckTime, model.NextCheck) } if !(strconv.FormatInt(data.NextCheckTime.Unix(), 10) == model.Indexes()[NextCheckIndexName]) { t.Errorf("Next time(in 2i) to check feed doesn't match %v vs %v!", data.NextCheckTime.Unix(), model.Indexes()[NextCheckIndexName]) } if !data.FetchedAt.Equal(model.LastCheck) { t.Errorf("Fetch time from feed doesn't match %#v vs %#v!", data.NextCheckTime, model.NextCheck) } if len(data.Items) != len(model.ItemKeys) { if len(data.Items) > MaximumFeedItems { t.Errorf("Item count differs due to items count greater then Maximum number of feed items (%v of %v)", len(data.Items), MaximumFeedItems) } else { t.Errorf("Item count is different %v vs %v!", len(data.Items), len(model.ItemKeys)) } return false } if len(model.InsertedItemKeys) != 0 || len(model.DeletedItemKeys) != 0 { t.Error("There are left over inserted or deleted item keys!") return false } type FeedItemCh struct { item FeedItem ch chan FeedItemCh } itemChOut := make(chan FeedItemCh) itemCh := make(chan FeedItem) go func(itemCh chan FeedItem, itemChOut chan FeedItemCh) { defer close(itemCh) for item, ok := <-itemChOut; ok; item, ok = <-itemChOut { itemCh <- item.item itemChOut = item.ch } }(itemCh, itemChOut) for _, itemKey := range model.ItemKeys { itemChIn := make(chan FeedItemCh) go func(itemKey ItemKey, itemChOut, itemChIn chan FeedItemCh) { defer close(itemChOut) modelItem := FeedItem{} if err := con.LoadModel(itemKey.GetRiakKey(), &modelItem, riak.R1); err != nil { t.Errorf("Failed to load item! Error: %s item %s", err, itemKey.GetRiakKey()) } itemChOut <- FeedItemCh{modelItem, itemChIn} }(itemKey, itemChOut, itemChIn) itemChOut = itemChIn } // This ensures the feeder go routine will eventually quit. It either closes its initial input, // or the final channel it will get (since itemChOut is equal to the last channel the go routine // will read from, since it stores the last itemChIn from above). close(itemChOut) //Compare saved feed items. This means a trip through riak! The order should match though ... for i, _ := range model.ItemKeys { var modelItem FeedItem select { case modelItem = <-itemCh: case <-time.After(time.Minute * 5): t.Fatalf("Failed to get an item before timeout, item %v", i) } dataItem := data.Items[i] if dataItem.Title != modelItem.Title || dataItem.Author != modelItem.Author || dataItem.Content != modelItem.Content || dataItem.Url != modelItem.Url || !dataItem.PubDate.Equal(modelItem.PubDate) { t.Errorf("Item data didn't match! Original:\n%#v\nLoaded:\n%#v", dataItem, modelItem) return false } } return true }
func updateFeed(con *riak.Client, feedUrl url.URL, feedData ParsedFeedData, ids <-chan uint64) (*Feed, error) { feed := &Feed{Url: feedUrl} if err := con.LoadModel(feed.UrlKey(), feed); err == riak.NotFound { return nil, FeedNotFound } else if err != nil { return nil, err } // First clean out inserted item keys. This handles unfinished previous operations. itemsBucket, err := con.Bucket("items") if err != nil { return nil, err } // Note, this insert items without caring about the 10,000 limit. Of course, any regular inserted // item will force the limit back down. for _, itemKey := range feed.InsertedItemKeys { // Does this item exist? if ok, err := itemsBucket.Exists(itemKey.GetRiakKey()); err != nil { return nil, err } else if ok { // Yep, so add it to the list. feed.ItemKeys = append(feed.ItemKeys, itemKey) } // Otherwise non-existent items are dropped. This is to avoid } feed.InsertedItemKeys = nil // Next update the basic attributes feed.Title = feedData.Title feed.NextCheck = feedData.NextCheckTime feed.LastCheck = feedData.FetchedAt // Also set 2i to appropriate values! feed.Indexes()[NextCheckIndexName] = strconv.FormatInt(feed.NextCheck.Unix(), 10) /* Next find all the feed items to insert/update. If the item doesn't exist, create it's id and * mark for insert. Otherwise mark it for an read/update/store pass. Make sure to mark for * deletion items as necessary. */ // This struct holds an ItemKey and a ParsedFeedItem for later parsing. type ToProcess struct { ItemKey ItemKey Data ParsedFeedItem Model *FeedItem } NewItems := make([]ToProcess, 0) UpdatedItems := make([]ToProcess, 0) SeenNewItemKeys := make(map[string]bool) for _, rawItem := range feedData.Items { // Try to find the raw Item in the Item Keys list. index := feed.ItemKeys.FindRawItemId(rawItem.GenericKey) if index != -1 { // Found it! Load the details. Also load the model, which will be re-used later. p := ToProcess{ ItemKey: feed.ItemKeys[index], Data: rawItem, Model: &FeedItem{}, } if err := con.LoadModel(p.ItemKey.GetRiakKey(), p.Model); err != nil { return nil, err } // Ok, now is this have a new pub date? If so, pull it out of its current position, and // move it up the chain. Otherwise, just update the content. If an item has no pub date, // assume that it has changed if the any part of the item changed. if p.Model.PubDate.Equal(p.Data.PubDate) && !(p.Data.PubDate.IsZero() && itemDiffersFromModel(p.Data, p.Model)) { // Pub dates are the same. Just modify the item to match what is in the feed. UpdatedItems = append(UpdatedItems, p) } else { // Pub dates differ. Delete the item, and re-insert it. feed.DeletedItemKeys = append(feed.DeletedItemKeys, p.ItemKey) feed.ItemKeys.RemoveAt(index) // Delete the model from the to process struct. p.Model = &FeedItem{} NewItems = append(NewItems, p) // This gives us the new id. } } else { // Nope, lets insert it! First, should we knock off an item? e need to stay below MaximumFeedItems. for (len(feed.ItemKeys)+len(NewItems)) >= MaximumFeedItems && len(feed.ItemKeys) > 0 { // Need to kill an item. So get the last key lastKey := feed.ItemKeys[len(feed.ItemKeys)-1] // insert it onto the end of the deleted item list. feed.DeletedItemKeys = append(feed.DeletedItemKeys, lastKey) // If we are updating this key, then remove it from this list. No need to waste // time. for i, item := range UpdatedItems { if item.ItemKey.Equal(lastKey) { UpdatedItems = append(UpdatedItems[:i], UpdatedItems[i+1:]...) } } // And finally, pop the item feed.ItemKeys = feed.ItemKeys[:len(feed.ItemKeys)-1] } // Only insert if there are less then MaximumFeedItems already to be inserted. // This works since any later item will have been updated after. if len(NewItems) < MaximumFeedItems { // Also, make sure we aren't inserting the same item twice. If it is duplicated, the // second item is guaranteed to be later. So just drop it. if keyString := string(rawItem.GenericKey); SeenNewItemKeys[keyString] == false { NewItems = append(NewItems, ToProcess{ Data: rawItem, }) SeenNewItemKeys[keyString] = true } } } } /* Alright, any new items are mentioned in the Feed before being inserted. In case something * happens, I'd prefer not to lose an item. Note the order is reversed so that the oldest story * will get the smallest id, preserving sort order. Inserted Item Keys needs to be sorted (well, * reversed) after this so it is in correct order as well. This loop violates ItemKeys sort * order, so the sort is necessary for now. */ for i := len(NewItems) - 1; i >= 0; i-- { newItem := &NewItems[i] newItem.ItemKey = NewItemKey(<-ids, newItem.Data.GenericKey) feed.InsertedItemKeys = append(feed.InsertedItemKeys, newItem.ItemKey) } sort.Sort(feed.InsertedItemKeys) // Ok, we must save here. Otherwise planned changes may occur that will not be cleaned up! if err := feed.Save(); err != nil { return nil, err } errCh := make(chan error) // All of the errors go into here, to be pulled out. // Good, now implement the change and update the Feed. // First add new items for _, newItem := range NewItems { feed.ItemKeys = append(feed.ItemKeys, newItem.ItemKey) go func(newItem ToProcess) { errCh <- InsertItem(con, newItem.ItemKey, newItem.Data) }(newItem) } feed.InsertedItemKeys = nil // Now update them. for _, newItem := range UpdatedItems { go func(newItem ToProcess) { errCh <- UpdateItem(con, newItem.ItemKey, newItem.Data, newItem.Model) }(newItem) } // Finally delete items. for _, deleteItemKey := range feed.DeletedItemKeys { go func(toDelete ItemKey) { if obj, err := itemsBucket.Get(toDelete.GetRiakKey()); obj == nil { errCh <- err } else { errCh <- obj.Destroy() } }(deleteItemKey) } deletedItemCount := len(feed.DeletedItemKeys) // Need this to drain the error channel later. // Ok, deleted. So clear the list feed.DeletedItemKeys = nil sort.Sort(sort.Reverse(feed.ItemKeys)) // Just sort this. TBD: Actually maintain this sort order to avoid this! //Now, collect the errors var errs []error drainErrorChannelIntoSlice(errCh, &errs, len(NewItems)) drainErrorChannelIntoSlice(errCh, &errs, len(UpdatedItems)) drainErrorChannelIntoSlice(errCh, &errs, deletedItemCount) if len(errs) != 0 { return nil, MultiError(errs) } if err := feed.Save(); err != nil { return nil, err } return feed, nil }