예제 #1
0
func RssMasterPollFeeds(con *riak.Client, InputCh chan<- url.URL, OutputCh <-chan FeedError) {
	bucket, err := con.NewBucket("feeds")
	if err != nil {
		log.Println("Failed to get feed bucket:", err)
	}
	// -62135596800 is Go's zero time according to Unix's time format.  This is what empty feeds have for their check time.
	// Nothing should appear before that.
	keys_to_poll, err := bucket.IndexQueryRange(NextCheckIndexName, "-62135596800", strconv.FormatInt(time.Now().Unix(), 10))
	var errors []error

	valid_keys := 0
	for _, key := range keys_to_poll {
		var loadFeed Feed
		if err := con.LoadModel(key, &loadFeed); err != nil {
			errors = append(errors, err)
		} else {
			log.Println(loadFeed.Url)
			valid_keys++
			go func(Url url.URL, inputCh chan<- url.URL) {
				inputCh <- Url
			}(loadFeed.Url, InputCh)
		}
	}
	for i := 0; i < valid_keys; i++ {
		if err := <-OutputCh; err.Err != nil {
			errors = append(errors, err)
		}
	}
	if len(errors) != 0 {
		log.Println(MultiError(errors))
	}
}
예제 #2
0
func killBucket(con *riak.Client, bucketName string) error {
	bucket, err := con.NewBucket(bucketName)
	if err != nil {
		return err
	}

	keys, err := bucket.ListKeys()
	if err != nil {
		return err
	}

	wg := sync.WaitGroup{}
	wg.Add(len(keys))
	for _, key := range keys {
		go func(key string) {
			defer wg.Done()
			obj, err := bucket.Get(string(key))
			if obj == nil {
				panic(err)
			}
			err = obj.Destroy()
			if err != nil {
				panic(err)
			}
		}(string(key))
	}
	wg.Wait()

	return nil
}
예제 #3
0
func mustCreateEmptyItemAt(t *testing.T, con *riak.Client, itemKey ItemKey) {
	itemModel := FeedItem{}
	if err := con.LoadModel(itemKey.GetRiakKey(), &itemModel); err != riak.NotFound {
		t.Fatalf("Failed to preload item to create an empty item at %s (%s)", itemKey.GetRiakKey(), err)
	} else if err = itemModel.Save(); err != nil {
		t.Fatalf("Failed to save an empty item at %s (%s)", itemKey.GetRiakKey(), err)
	}
}
예제 #4
0
func RssMasterHandleAddRequest(con *riak.Client, Url url.URL) error {
	feedModel := &Feed{Url: Url}
	if err := con.LoadModel(feedModel.UrlKey(), feedModel); err != nil && err != riak.NotFound {
		return err
	} else if err == nil {
		return nil
	} else { // Implicitly err == riak.NotFound
		feedModel.Indexes()[NextCheckIndexName] = strconv.FormatInt(time.Time{}.Unix(), 10)
		if err = feedModel.Save(); err != nil {
			return err
		}
	}
	return nil
}
예제 #5
0
func CreateFeed(t *testing.T, con *riak.Client, Url *url.URL) *Feed {
	feedModel := &Feed{Url: *Url}
	if err := con.LoadModel(feedModel.UrlKey(), feedModel); err != nil && err != riak.NotFound {
		t.Fatalf("Failed to initialize feed model (%s)!", err)
	} else {
		modelElement := feedModel.Model
		*feedModel = Feed{Url: *Url}
		feedModel.Model = modelElement
		if err = feedModel.Save(); err != nil {
			t.Fatalf("Failed to store feed model (%s)!", err)
		}
	}
	return feedModel
}
예제 #6
0
func InsertItem(con *riak.Client, itemKey ItemKey, item ParsedFeedItem) error {
	itemModel := FeedItem{
		Title:   item.Title,
		Author:  item.Author,
		Content: item.Content,
		Url:     item.Url,
		PubDate: item.PubDate,
	}
	if err := con.LoadModel(itemKey.GetRiakKey(), &itemModel); err != riak.NotFound {
		return err
	} else if err = itemModel.Save(); err != nil {
		return err
	}
	return nil
}
예제 #7
0
func checkAllItemsDeleted(t *testing.T, itemKeyList ItemKeyList, con *riak.Client) bool {
	ch := make(chan bool)
	for _, itemKey := range itemKeyList {
		go func(itemKey ItemKey, ch chan<- bool) {
			modelItem := FeedItem{}
			if err := con.LoadModel(itemKey.GetRiakKey(), &modelItem); err == riak.NotFound {
				ch <- false
			} else {
				ch <- true
			}
		}(itemKey, ch)
	}

	problems := 0
	for _, _ = range itemKeyList {
		found := <-ch
		if found {
			problems++
		}
	}
	return problems == 0
}
예제 #8
0
func compareParsedToFinalFeed(t *testing.T, data *ParsedFeedData, model *Feed, con *riak.Client) bool {
	// Compare basics:
	if data.Title != model.Title {
		t.Errorf("Feed title didn't match '%s' vs '%s'!", data.Title, model.Title)
		return false
	}
	if !data.NextCheckTime.Equal(model.NextCheck) {
		t.Errorf("Next time to check feed doesn't match %#v vs %#v!", data.NextCheckTime, model.NextCheck)
	}
	if !(strconv.FormatInt(data.NextCheckTime.Unix(), 10) == model.Indexes()[NextCheckIndexName]) {
		t.Errorf("Next time(in 2i) to check feed doesn't match %v vs %v!", data.NextCheckTime.Unix(), model.Indexes()[NextCheckIndexName])
	}
	if !data.FetchedAt.Equal(model.LastCheck) {
		t.Errorf("Fetch time from feed doesn't match %#v vs %#v!", data.NextCheckTime, model.NextCheck)
	}

	if len(data.Items) != len(model.ItemKeys) {
		if len(data.Items) > MaximumFeedItems {
			t.Errorf("Item count differs due to items count greater then Maximum number of feed items (%v of %v)", len(data.Items), MaximumFeedItems)
		} else {
			t.Errorf("Item count is different %v vs %v!", len(data.Items), len(model.ItemKeys))
		}
		return false
	}
	if len(model.InsertedItemKeys) != 0 || len(model.DeletedItemKeys) != 0 {
		t.Error("There are left over inserted or deleted item keys!")
		return false
	}

	type FeedItemCh struct {
		item FeedItem
		ch   chan FeedItemCh
	}

	itemChOut := make(chan FeedItemCh)
	itemCh := make(chan FeedItem)

	go func(itemCh chan FeedItem, itemChOut chan FeedItemCh) {
		defer close(itemCh)
		for item, ok := <-itemChOut; ok; item, ok = <-itemChOut {
			itemCh <- item.item
			itemChOut = item.ch
		}
	}(itemCh, itemChOut)

	for _, itemKey := range model.ItemKeys {
		itemChIn := make(chan FeedItemCh)

		go func(itemKey ItemKey, itemChOut, itemChIn chan FeedItemCh) {
			defer close(itemChOut)

			modelItem := FeedItem{}
			if err := con.LoadModel(itemKey.GetRiakKey(), &modelItem, riak.R1); err != nil {
				t.Errorf("Failed to load item! Error: %s item %s", err, itemKey.GetRiakKey())
			}
			itemChOut <- FeedItemCh{modelItem, itemChIn}
		}(itemKey, itemChOut, itemChIn)

		itemChOut = itemChIn
	}
	// This ensures the feeder go routine will eventually quit.  It either closes its initial input,
	// or the final channel it will get (since itemChOut is equal to the last channel the go routine
	// will read from, since it stores the last itemChIn from above).
	close(itemChOut)

	//Compare saved feed items.  This means a trip through riak!  The order should match though ...
	for i, _ := range model.ItemKeys {
		var modelItem FeedItem
		select {
		case modelItem = <-itemCh:
		case <-time.After(time.Minute * 5):
			t.Fatalf("Failed to get an item before timeout, item %v", i)
		}

		dataItem := data.Items[i]

		if dataItem.Title != modelItem.Title ||
			dataItem.Author != modelItem.Author ||
			dataItem.Content != modelItem.Content ||
			dataItem.Url != modelItem.Url ||
			!dataItem.PubDate.Equal(modelItem.PubDate) {
			t.Errorf("Item data didn't match! Original:\n%#v\nLoaded:\n%#v", dataItem, modelItem)
			return false
		}
	}

	return true
}
예제 #9
0
func updateFeed(con *riak.Client, feedUrl url.URL, feedData ParsedFeedData, ids <-chan uint64) (*Feed, error) {
	feed := &Feed{Url: feedUrl}
	if err := con.LoadModel(feed.UrlKey(), feed); err == riak.NotFound {
		return nil, FeedNotFound
	} else if err != nil {
		return nil, err
	}
	// First clean out inserted item keys.  This handles unfinished previous operations.
	itemsBucket, err := con.Bucket("items")
	if err != nil {
		return nil, err
	}

	// Note, this insert items without caring about the 10,000 limit.  Of course, any regular inserted
	// item will force the limit back down.
	for _, itemKey := range feed.InsertedItemKeys {
		// Does this item exist?
		if ok, err := itemsBucket.Exists(itemKey.GetRiakKey()); err != nil {
			return nil, err
		} else if ok {
			// Yep, so add it to the list.
			feed.ItemKeys = append(feed.ItemKeys, itemKey)
		}
		// Otherwise non-existent items are dropped.  This is to avoid
	}
	feed.InsertedItemKeys = nil

	// Next update the basic attributes
	feed.Title = feedData.Title
	feed.NextCheck = feedData.NextCheckTime
	feed.LastCheck = feedData.FetchedAt
	// Also set 2i to appropriate values!
	feed.Indexes()[NextCheckIndexName] = strconv.FormatInt(feed.NextCheck.Unix(), 10)

	/* Next find all the feed items to insert/update.  If the item doesn't exist, create it's id and
	 * mark for insert.  Otherwise mark it for an read/update/store pass.  Make sure to mark for
	 * deletion items as necessary.
	 */
	// This struct holds an ItemKey and a ParsedFeedItem for later parsing.
	type ToProcess struct {
		ItemKey ItemKey
		Data    ParsedFeedItem
		Model   *FeedItem
	}
	NewItems := make([]ToProcess, 0)
	UpdatedItems := make([]ToProcess, 0)
	SeenNewItemKeys := make(map[string]bool)

	for _, rawItem := range feedData.Items {
		// Try to find the raw Item in the Item Keys list.
		index := feed.ItemKeys.FindRawItemId(rawItem.GenericKey)
		if index != -1 {
			// Found it!  Load the details.  Also load the model, which will be re-used later.
			p := ToProcess{
				ItemKey: feed.ItemKeys[index],
				Data:    rawItem,
				Model:   &FeedItem{},
			}

			if err := con.LoadModel(p.ItemKey.GetRiakKey(), p.Model); err != nil {
				return nil, err
			}

			// Ok, now is this have a new pub date?  If so, pull it out of its current position, and
			// move it up the chain.  Otherwise, just update the content.  If an item has no pub date,
			// assume that it has changed if the any part of the item changed.
			if p.Model.PubDate.Equal(p.Data.PubDate) && !(p.Data.PubDate.IsZero() && itemDiffersFromModel(p.Data, p.Model)) {
				// Pub dates are the same.  Just modify the item to match what is in the feed.
				UpdatedItems = append(UpdatedItems, p)
			} else {
				// Pub dates differ.  Delete the item, and re-insert it.
				feed.DeletedItemKeys = append(feed.DeletedItemKeys, p.ItemKey)
				feed.ItemKeys.RemoveAt(index)

				// Delete the model from the to process struct.
				p.Model = &FeedItem{}

				NewItems = append(NewItems, p) // This gives us the new id.
			}
		} else {
			// Nope, lets insert it!  First, should we knock off an item?  e need to stay below MaximumFeedItems.
			for (len(feed.ItemKeys)+len(NewItems)) >= MaximumFeedItems && len(feed.ItemKeys) > 0 {
				// Need to kill an item.  So get the last key
				lastKey := feed.ItemKeys[len(feed.ItemKeys)-1]
				// insert it onto the end of the deleted item list.
				feed.DeletedItemKeys = append(feed.DeletedItemKeys, lastKey)
				// If we are updating this key, then remove it from this list.  No need to waste
				// time.
				for i, item := range UpdatedItems {
					if item.ItemKey.Equal(lastKey) {
						UpdatedItems = append(UpdatedItems[:i], UpdatedItems[i+1:]...)
					}
				}
				// And finally, pop the item
				feed.ItemKeys = feed.ItemKeys[:len(feed.ItemKeys)-1]
			}
			// Only insert if there are less then MaximumFeedItems already to be inserted.
			// This works since any later item will have been updated after.
			if len(NewItems) < MaximumFeedItems {
				// Also, make sure we aren't inserting the same item twice.  If it is duplicated, the
				// second item is guaranteed to be later.  So just drop it.
				if keyString := string(rawItem.GenericKey); SeenNewItemKeys[keyString] == false {
					NewItems = append(NewItems, ToProcess{
						Data: rawItem,
					})
					SeenNewItemKeys[keyString] = true
				}
			}
		}
	}

	/* Alright, any new items are mentioned in the Feed before being inserted.  In case something
	 * happens, I'd prefer not to lose an item.  Note the order is reversed so that the oldest story
	 * will get the smallest id, preserving sort order.  Inserted Item Keys needs to be sorted (well,
	 * reversed) after this so it is in correct order as well.  This loop violates ItemKeys sort
	 * order, so the sort is necessary for now. */
	for i := len(NewItems) - 1; i >= 0; i-- {
		newItem := &NewItems[i]
		newItem.ItemKey = NewItemKey(<-ids, newItem.Data.GenericKey)
		feed.InsertedItemKeys = append(feed.InsertedItemKeys, newItem.ItemKey)
	}
	sort.Sort(feed.InsertedItemKeys)

	// Ok, we must save here.  Otherwise planned changes may occur that will not be cleaned up!
	if err := feed.Save(); err != nil {
		return nil, err
	}

	errCh := make(chan error) // All of the errors go into here, to be pulled out.

	// Good, now implement the change and update the Feed.

	// First add new items
	for _, newItem := range NewItems {
		feed.ItemKeys = append(feed.ItemKeys, newItem.ItemKey)
		go func(newItem ToProcess) {
			errCh <- InsertItem(con, newItem.ItemKey, newItem.Data)
		}(newItem)
	}
	feed.InsertedItemKeys = nil

	// Now update them.
	for _, newItem := range UpdatedItems {
		go func(newItem ToProcess) {
			errCh <- UpdateItem(con, newItem.ItemKey, newItem.Data, newItem.Model)
		}(newItem)
	}

	// Finally delete items.
	for _, deleteItemKey := range feed.DeletedItemKeys {
		go func(toDelete ItemKey) {
			if obj, err := itemsBucket.Get(toDelete.GetRiakKey()); obj == nil {
				errCh <- err
			} else {
				errCh <- obj.Destroy()
			}
		}(deleteItemKey)
	}
	deletedItemCount := len(feed.DeletedItemKeys) // Need this to drain the error channel later.
	// Ok, deleted.  So clear the list
	feed.DeletedItemKeys = nil

	sort.Sort(sort.Reverse(feed.ItemKeys)) // Just sort this.  TBD: Actually maintain this sort order to avoid this!

	//Now, collect the errors
	var errs []error
	drainErrorChannelIntoSlice(errCh, &errs, len(NewItems))
	drainErrorChannelIntoSlice(errCh, &errs, len(UpdatedItems))
	drainErrorChannelIntoSlice(errCh, &errs, deletedItemCount)
	if len(errs) != 0 {
		return nil, MultiError(errs)
	}

	if err := feed.Save(); err != nil {
		return nil, err
	}

	return feed, nil
}