Пример #1
0
func updateFeed(c mpg.Context, url string, feed *Feed, stories []*Story) error {
	gn := goon.FromContext(c)
	f := Feed{Url: url}
	if err := gn.Get(&f); err != nil {
		return fmt.Errorf("feed not found: %s", url)
	}

	// Compare the feed's listed update to the story's update.
	// Note: these may not be accurate, hence, only compare them to each other,
	// since they should have the same relative error.
	storyDate := f.Updated

	hasUpdated := !feed.Updated.IsZero()
	isFeedUpdated := f.Updated == feed.Updated
	if !hasUpdated {
		feed.Updated = f.Updated
	}
	feed.Date = f.Date
	f = *feed

	if hasUpdated && isFeedUpdated {
		c.Infof("feed %s already updated to %v, putting", url, feed.Updated)
		f.Updated = time.Now()
		gn.Put(&f)
		return nil
	}

	c.Debugf("hasUpdate: %v, isFeedUpdated: %v, storyDate: %v", hasUpdated, isFeedUpdated, storyDate)

	var newStories []*Story
	for _, s := range stories {
		if s.Updated.IsZero() || !s.Updated.Before(storyDate) {
			newStories = append(newStories, s)
		}
	}
	c.Debugf("%v possible stories to update", len(newStories))

	puts := []interface{}{&f}

	// find non existant stories
	fk := gn.Key(&f)
	getStories := make([]*Story, len(newStories))
	for i, s := range newStories {
		getStories[i] = &Story{Id: s.Id, Parent: fk}
	}
	err := gn.GetMulti(getStories)
	if _, ok := err.(appengine.MultiError); err != nil && !ok {
		c.Errorf("get multi error: %v", err.Error())
		return err
	}
	var updateStories []*Story
	for i, s := range getStories {
		if goon.NotFound(err, i) {
			updateStories = append(updateStories, newStories[i])
		} else if !newStories[i].Updated.IsZero() && !newStories[i].Updated.Equal(s.Updated) {
			newStories[i].Created = s.Created
			newStories[i].Published = s.Published
			updateStories = append(updateStories, newStories[i])
		}
	}
	c.Debugf("%v update stories", len(updateStories))

	for _, s := range updateStories {
		puts = append(puts, s)
		sc := StoryContent{
			Id:     1,
			Parent: gn.Key(s),
		}
		buf := &bytes.Buffer{}
		if gz, err := gzip.NewWriterLevel(buf, gzip.BestCompression); err == nil {
			gz.Write([]byte(s.content))
			gz.Close()
			sc.Compressed = buf.Bytes()
		}
		if len(sc.Compressed) == 0 {
			sc.Content = s.content
		}
		gn.Put(&sc)
	}

	c.Debugf("putting %v entities", len(puts))
	if len(puts) > 1 {
		f.Date = time.Now()
		if !hasUpdated {
			f.Updated = f.Date
		}
	}
	gn.PutMulti(puts)

	return nil
}
Пример #2
0
func updateFeed(c mpg.Context, url string, feed *Feed, stories []*Story) error {
	gn := goon.FromContext(c)
	f := Feed{Url: url}
	if err := gn.Get(&f); err != nil {
		return errors.New(fmt.Sprintf("feed not found: %s", url))
	}

	// Compare the feed's listed update to the story's update.
	// Note: these may not be accurate, hence, only compare them to each other,
	// since they should have the same relative error.
	storyDate := f.Updated

	hasUpdated := !feed.Updated.IsZero()
	isFeedUpdated := f.Updated == feed.Updated
	if !hasUpdated {
		feed.Updated = f.Updated
	}
	feed.Date = f.Date
	f = *feed

	if hasUpdated && isFeedUpdated {
		c.Infof("feed %s already updated to %v, putting", url, feed.Updated)
		f.Updated = time.Now()
		if strings.TrimSpace(f.Url) == "" {
			c.Criticalf("badurl5: %v, %v", url, f)
			return errors.New("badurl5")
		}
		gn.PutComplete(&f)
		return nil
	}

	c.Debugf("hasUpdate: %v, isFeedUpdated: %v, storyDate: %v", hasUpdated, isFeedUpdated, storyDate)

	var newStories []*Story
	for _, s := range stories {
		if s.Updated.IsZero() || !s.Updated.Before(storyDate) {
			newStories = append(newStories, s)
		}
	}
	c.Debugf("%v possible stories to update", len(newStories))

	puts := []interface{}{&f}

	// find non existant stories
	fk := gn.Key(&f)
	getStories := make([]*Story, len(newStories))
	for i, s := range newStories {
		getStories[i] = &Story{Id: s.Id, Parent: fk}
	}
	err := gn.GetMulti(getStories)
	if _, ok := err.(appengine.MultiError); err != nil && !ok {
		c.Errorf("get multi error: %v", err.Error())
		return err
	}
	var updateStories []*Story
	for i, s := range getStories {
		if goon.NotFound(err, i) {
			updateStories = append(updateStories, newStories[i])
		} else if !newStories[i].Updated.IsZero() && !newStories[i].Updated.Equal(s.Updated) {
			newStories[i].Created = s.Created
			newStories[i].Published = s.Published
			updateStories = append(updateStories, newStories[i])
		}
	}
	c.Debugf("%v update stories", len(updateStories))

	for _, s := range updateStories {
		puts = append(puts, s)
		gn.Put(&StoryContent{
			Id:      1,
			Parent:  gn.Key(s),
			Content: s.content,
		})
	}

	c.Debugf("putting %v entities", len(puts))
	if len(puts) > 1 {
		f.Date = time.Now()
		if !hasUpdated {
			f.Updated = f.Date
		}
	}
	if f.Url == "" {
		c.Criticalf("badurl6: %v", f)
		return errors.New("badurl6")
	}
	gn.PutMultiComplete(puts)

	return nil
}
Пример #3
0
func updateFeed(c mpg.Context, url string, feed *Feed, stories []*Story, updateAll, fromSub, updateLast bool) error {
	gn := goon.FromContext(c)
	f := Feed{Url: url}
	if err := gn.Get(&f); err != nil {
		return fmt.Errorf("feed not found: %s", url)
	}
	gn.Put(&Log{
		Parent: gn.Key(&f),
		Id:     time.Now().UnixNano(),
		Text:   "feed update",
	})

	// Compare the feed's listed update to the story's update.
	// Note: these may not be accurate, hence, only compare them to each other,
	// since they should have the same relative error.
	storyDate := f.Updated

	hasUpdated := !feed.Updated.IsZero()
	isFeedUpdated := f.Updated.Equal(feed.Updated)
	if !hasUpdated {
		feed.Updated = f.Updated
	}
	feed.Date = f.Date
	feed.Average = f.Average
	feed.LastViewed = f.LastViewed
	f = *feed
	if updateLast {
		f.LastViewed = time.Now()
	}

	if hasUpdated && isFeedUpdated && !updateAll && !fromSub {
		c.Infof("feed %s already updated to %v, putting", url, feed.Updated)
		f.Updated = time.Now()
		scheduleNextUpdate(&f)
		gn.Put(&f)
		return nil
	}

	c.Debugf("hasUpdate: %v, isFeedUpdated: %v, storyDate: %v, stories: %v", hasUpdated, isFeedUpdated, storyDate, len(stories))
	puts := []interface{}{&f}

	// find non existant stories
	fk := gn.Key(&f)
	getStories := make([]*Story, len(stories))
	for i, s := range stories {
		getStories[i] = &Story{Id: s.Id, Parent: fk}
	}
	err := gn.GetMulti(getStories)
	if _, ok := err.(appengine.MultiError); err != nil && !ok {
		c.Errorf("get multi error: %v", err.Error())
		return err
	}
	var updateStories []*Story
	for i, s := range getStories {
		if goon.NotFound(err, i) {
			updateStories = append(updateStories, stories[i])
		} else if (!stories[i].Updated.IsZero() && !stories[i].Updated.Equal(s.Updated)) || updateAll {
			stories[i].Created = s.Created
			stories[i].Published = s.Published
			updateStories = append(updateStories, stories[i])
		}
	}
	c.Debugf("%v update stories", len(updateStories))

	for _, s := range updateStories {
		puts = append(puts, s)
		sc := StoryContent{
			Id:     1,
			Parent: gn.Key(s),
		}
		buf := &bytes.Buffer{}
		if gz, err := gzip.NewWriterLevel(buf, gzip.BestCompression); err == nil {
			gz.Write([]byte(s.content))
			gz.Close()
			sc.Compressed = buf.Bytes()
		}
		if len(sc.Compressed) == 0 {
			sc.Content = s.content
		}
		gn.Put(&sc)
	}

	c.Debugf("putting %v entities", len(puts))
	if len(puts) > 1 {
		updateAverage(&f, f.Date, len(puts)-1)
		f.Date = time.Now()
		if !hasUpdated {
			f.Updated = f.Date
		}
	}
	scheduleNextUpdate(&f)
	if fromSub {
		wait := time.Now().Add(time.Hour * 6)
		if f.NextUpdate.Before(wait) {
			f.NextUpdate = time.Now().Add(time.Hour * 6)
		}
	}
	delay := f.NextUpdate.Sub(time.Now())
	c.Infof("next update scheduled for %v from now", delay-delay%time.Second)
	gn.PutMulti(puts)
	return nil
}
Пример #4
0
func ListFeeds(c mpg.Context, w http.ResponseWriter, r *http.Request) {
	cu := user.Current(c)
	gn := goon.FromContext(c)
	u := &User{Id: cu.ID}
	ud := &UserData{Id: "data", Parent: gn.Key(u)}
	gn.GetMulti([]interface{}{u, ud})
	put := false
	fixRead := false
	if time.Since(u.Read) > oldDuration {
		c.Warningf("u.Read too old, fixing: %v", u.Read)
		u.Read = time.Now().Add(-oldDuration)
		put = true
		fixRead = true
		c.Warningf("new: %v", u.Read)
	}

	read := make(Read)
	var uf Opml
	c.Step("unmarshal user data", func() {
		gob.NewDecoder(bytes.NewReader(ud.Read)).Decode(&read)
		json.Unmarshal(ud.Opml, &uf)
	})
	var feeds []*Feed
	opmlMap := make(map[string]*OpmlOutline)
	var merr error
	c.Step("fetch feeds", func() {
		for _, outline := range uf.Outline {
			if outline.XmlUrl == "" {
				for _, so := range outline.Outline {
					feeds = append(feeds, &Feed{Url: so.XmlUrl})
					opmlMap[so.XmlUrl] = so
				}
			} else {
				feeds = append(feeds, &Feed{Url: outline.XmlUrl})
				opmlMap[outline.XmlUrl] = outline
			}
		}
		merr = gn.GetMulti(feeds)
	})
	lock := sync.Mutex{}
	fl := make(map[string][]*Story)
	q := datastore.NewQuery(gn.Key(&Story{}).Kind())
	hasStories := false
	updatedLinks := false
	icons := make(map[string]string)
	now := time.Now()
	numStories := 0

	c.Step("feed fetch + wait", func() {
		queue := make(chan *Feed)
		wg := sync.WaitGroup{}
		feedProc := func() {
			for f := range queue {
				defer wg.Done()
				var stories []*Story

				if u.Read.Before(f.Date) {
					fk := gn.Key(f)
					sq := q.Ancestor(fk).Filter(IDX_COL+" >", u.Read).KeysOnly().Order("-" + IDX_COL)
					keys, _ := gn.GetAll(sq, nil)
					stories = make([]*Story, len(keys))
					for j, key := range keys {
						stories[j] = &Story{
							Id:     key.StringID(),
							Parent: fk,
						}
					}
					gn.GetMulti(stories)
				}
				if f.Link != opmlMap[f.Url].HtmlUrl {
					updatedLinks = true
					opmlMap[f.Url].HtmlUrl = f.Link
				}
				if f.Errors == 0 && f.NextUpdate.Before(now) {
					t := taskqueue.NewPOSTTask(routeUrl("update-feed"), url.Values{
						"feed": {f.Url},
					})
					if _, err := taskqueue.Add(c, t, "update-manual"); err != nil {
						c.Errorf("taskqueue error: %v", err.Error())
					} else {
						c.Warningf("manual feed update: %v", f.Url)
					}
				}
				f.Subscribe(c)
				lock.Lock()
				fl[f.Url] = stories
				numStories += len(stories)
				if len(stories) > 0 {
					hasStories = true
				}
				if f.Image != "" {
					icons[f.Url] = f.Image
				}
				lock.Unlock()
			}
		}
		for i := 0; i < 20; i++ {
			go feedProc()
		}
		for i, f := range feeds {
			if goon.NotFound(merr, i) {
				continue
			}
			wg.Add(1)
			queue <- f
		}
		close(queue)
		wg.Wait()
	})
	if numStories > numStoriesLimit {
		c.Step("numStories", func() {
			c.Errorf("too many stories: %v", numStories)
			stories := make([]*Story, 0, numStories)
			for _, v := range fl {
				stories = append(stories, v...)
			}
			sort.Sort(sort.Reverse(Stories(stories)))
			last := stories[numStoriesLimit].Created
			stories = stories[:numStoriesLimit]
			u.Read = last
			put = true
			fixRead = true
			fl = make(map[string][]*Story)
			for _, s := range stories {
				fk := s.Parent.StringID()
				p := fl[fk]
				fl[fk] = append(p, s)
			}
			c.Errorf("filtered: %v, %v", len(stories), last)
		})
	}
	if fixRead {
		c.Step("fix read", func() {
			nread := make(Read)
			for k, v := range fl {
				for _, s := range v {
					rs := readStory{Feed: k, Story: s.Id}
					if read[rs] {
						nread[rs] = true
					}
				}
			}
			c.Errorf("fix read: %v -> %v", len(read), len(nread))
			read = nread
			var b bytes.Buffer
			gob.NewEncoder(&b).Encode(&read)
			ud.Read = b.Bytes()
			put = true
		})
	}
	for k, v := range fl {
		newStories := make([]*Story, 0, len(v))
		for _, s := range v {
			if !read[readStory{Feed: k, Story: s.Id}] {
				newStories = append(newStories, s)
			}
		}
		fl[k] = newStories
	}
	if !hasStories {
		var last time.Time
		for _, f := range feeds {
			if last.Before(f.Date) {
				last = f.Date
			}
		}
		if u.Read.Before(last) {
			c.Debugf("setting %v read to %v", cu.ID, last)
			put = true
			u.Read = last
			ud.Read = nil
		}
	}
	if updatedLinks {
		ud.Opml, _ = json.Marshal(&uf)
		put = true
	}
	if put {
		gn.PutMany(u, ud)
	}
	c.Step("json marshal", func() {
		o := struct {
			Opml    []*OpmlOutline
			Stories map[string][]*Story
			Icons   map[string]string
			Options string
		}{
			Opml:    uf.Outline,
			Stories: fl,
			Icons:   icons,
			Options: u.Options,
		}
		b, err := json.Marshal(o)
		if err != nil {
			c.Errorf("cleaning")
			for _, v := range fl {
				for _, s := range v {
					n := cleanNonUTF8(s.Summary)
					if n != s.Summary {
						s.Summary = n
						c.Errorf("cleaned %v", s.Id)
						gn.Put(s)
					}
				}
			}
			b, _ = json.Marshal(o)
		}
		w.Write(b)
	})
}
Пример #5
0
func ListFeeds(c mpg.Context, w http.ResponseWriter, r *http.Request) {
	cu := user.Current(c)
	gn := goon.FromContext(c)
	u := &User{Id: cu.ID}
	ud := &UserData{Id: "data", Parent: gn.Key(u)}
	if err := gn.GetMulti([]interface{}{u, ud}); err != nil && !goon.NotFound(err, 1) {
		serveError(w, err)
		return
	}
	l := &Log{
		Parent: ud.Parent,
		Id:     time.Now().UnixNano(),
		Text:   "list feeds",
	}
	l.Text += fmt.Sprintf(", len opml %v", len(ud.Opml))
	putU := false
	putUD := false
	fixRead := false
	if time.Since(u.Read) > oldDuration {
		u.Read = time.Now().Add(-oldDuration)
		putU = true
		fixRead = true
		l.Text += ", u.Read"
	}
	trialRemaining := 0
	if STRIPE_KEY != "" && ud.Opml != nil {
		if u.Created.IsZero() {
			u.Created = time.Now()
			putU = true
		} else if time.Since(u.Created) > accountFreeDuration {
			b, _ := json.Marshal(struct {
				ErrorSubscription bool
			}{
				true,
			})
			w.Write(b)
			return
		}
		trialRemaining = int((accountFreeDuration-time.Since(u.Created))/time.Hour/24) + 1
	}
	read := make(Read)
	var uf Opml
	c.Step("unmarshal user data", func(c mpg.Context) {
		gob.NewDecoder(bytes.NewReader(ud.Read)).Decode(&read)
		json.Unmarshal(ud.Opml, &uf)
	})
	var feeds []*Feed
	opmlMap := make(map[string]*OpmlOutline)
	var merr error
	c.Step("fetch feeds", func(c mpg.Context) {
		gn := goon.FromContext(appengine.Timeout(c, time.Minute))
		for _, outline := range uf.Outline {
			if outline.XmlUrl == "" {
				for _, so := range outline.Outline {
					feeds = append(feeds, &Feed{Url: so.XmlUrl})
					opmlMap[so.XmlUrl] = so
				}
			} else {
				feeds = append(feeds, &Feed{Url: outline.XmlUrl})
				opmlMap[outline.XmlUrl] = outline
			}
		}
		merr = gn.GetMulti(feeds)
	})
	lock := sync.Mutex{}
	fl := make(map[string][]*Story)
	q := datastore.NewQuery(gn.Key(&Story{}).Kind()).
		Filter(IDX_COL+" >=", u.Read).
		KeysOnly().
		Order("-" + IDX_COL).
		Limit(250)
	updatedLinks := false
	now := time.Now()
	numStories := 0

	c.Step(fmt.Sprintf("feed unreads: %v", u.Read), func(c mpg.Context) {
		queue := make(chan *Feed)
		tc := make(chan *taskqueue.Task)
		done := make(chan bool)
		wg := sync.WaitGroup{}
		feedProc := func() {
			for f := range queue {
				c.Step(f.Title, func(c mpg.Context) {
					defer wg.Done()
					var stories []*Story
					gn := goon.FromContext(appengine.Timeout(c, time.Minute))

					if !f.Date.Before(u.Read) {
						fk := gn.Key(f)
						sq := q.Ancestor(fk)
						keys, _ := gn.GetAll(sq, nil)
						stories = make([]*Story, len(keys))
						for j, key := range keys {
							stories[j] = &Story{
								Id:     key.StringID(),
								Parent: fk,
							}
						}
						gn.GetMulti(stories)
					}
					if f.Link != opmlMap[f.Url].HtmlUrl {
						l.Text += fmt.Sprintf(", link: %v -> %v", opmlMap[f.Url].HtmlUrl, f.Link)
						updatedLinks = true
						opmlMap[f.Url].HtmlUrl = f.Link
					}
					manualDone := false
					if time.Since(f.LastViewed) > time.Hour*24*2 {
						if f.NextUpdate.Equal(timeMax) {
							tc <- taskqueue.NewPOSTTask(routeUrl("update-feed-manual"), url.Values{
								"feed": {f.Url},
								"last": {"1"},
							})
							manualDone = true
						} else {
							tc <- taskqueue.NewPOSTTask(routeUrl("update-feed-last"), url.Values{
								"feed": {f.Url},
							})
						}
					}
					if !manualDone && now.Sub(f.NextUpdate) >= 0 {
						tc <- taskqueue.NewPOSTTask(routeUrl("update-feed-manual"), url.Values{
							"feed": {f.Url},
						})
					}
					lock.Lock()
					fl[f.Url] = stories
					numStories += len(stories)
					lock.Unlock()
				})
			}
		}
		go taskSender(c, "update-manual", tc, done)
		for i := 0; i < 20; i++ {
			go feedProc()
		}
		for i, f := range feeds {
			if goon.NotFound(merr, i) {
				continue
			}
			wg.Add(1)
			queue <- f
		}
		close(queue)
		// wait for feeds to complete so there are no more tasks to queue
		wg.Wait()
		// then finish enqueuing tasks
		close(tc)
		<-done
	})
	if numStories > 0 {
		c.Step("numStories", func(c mpg.Context) {
			stories := make([]*Story, 0, numStories)
			for _, v := range fl {
				stories = append(stories, v...)
			}
			sort.Sort(sort.Reverse(Stories(stories)))
			if len(stories) > numStoriesLimit {
				stories = stories[:numStoriesLimit]
				fl = make(map[string][]*Story)
				for _, s := range stories {
					fk := s.Parent.StringID()
					p := fl[fk]
					fl[fk] = append(p, s)
				}
			}
			last := stories[len(stories)-1].Created
			if u.Read.Before(last) {
				u.Read = last
				putU = true
				fixRead = true
			}
		})
	}
	if fixRead {
		c.Step("fix read", func(c mpg.Context) {
			nread := make(Read)
			for k, v := range fl {
				for _, s := range v {
					rs := readStory{Feed: k, Story: s.Id}
					if read[rs] {
						nread[rs] = true
					}
				}
			}
			if len(nread) != len(read) {
				read = nread
				var b bytes.Buffer
				gob.NewEncoder(&b).Encode(&read)
				ud.Read = b.Bytes()
				putUD = true
				l.Text += ", fix read"
			}
		})
	}
	numStories = 0
	for k, v := range fl {
		newStories := make([]*Story, 0, len(v))
		for _, s := range v {
			if !read[readStory{Feed: k, Story: s.Id}] {
				newStories = append(newStories, s)
			}
		}
		numStories += len(newStories)
		fl[k] = newStories
	}
	if numStories == 0 {
		l.Text += ", clear read"
		fixRead = false
		if ud.Read != nil {
			putUD = true
			ud.Read = nil
		}
		last := u.Read
		for _, v := range feeds {
			if last.Before(v.Date) {
				last = v.Date
			}
		}
		c.Infof("nothing here, move up: %v -> %v", u.Read, last)
		if u.Read.Before(last) {
			putU = true
			u.Read = last
		}
	}
	if updatedLinks {
		backupOPML(c)
		if o, err := json.Marshal(&uf); err == nil {
			ud.Opml = o
			putUD = true
			l.Text += ", update links"
		} else {
			c.Errorf("json UL err: %v, %v", err, uf)
		}
	}
	if putU {
		gn.Put(u)
		l.Text += ", putU"
	}
	if putUD {
		gn.Put(ud)
		l.Text += ", putUD"
	}
	l.Text += fmt.Sprintf(", len opml %v", len(ud.Opml))
	gn.Put(l)
	c.Step("json marshal", func(c mpg.Context) {
		gn := goon.FromContext(c)
		o := struct {
			Opml           []*OpmlOutline
			Stories        map[string][]*Story
			Options        string
			TrialRemaining int
			Feeds          []*Feed
		}{
			Opml:           uf.Outline,
			Stories:        fl,
			Options:        u.Options,
			TrialRemaining: trialRemaining,
			Feeds:          feeds,
		}
		b, err := json.Marshal(o)
		if err != nil {
			c.Errorf("cleaning")
			for _, v := range fl {
				for _, s := range v {
					n := sanitizer.CleanNonUTF8(s.Summary)
					if n != s.Summary {
						s.Summary = n
						c.Errorf("cleaned %v", s.Id)
						gn.Put(s)
					}
				}
			}
			b, _ = json.Marshal(o)
		}
		w.Write(b)
	})
}
Пример #6
0
func ListFeeds(c mpg.Context, w http.ResponseWriter, r *http.Request) {
	cu := user.Current(c)
	gn := goon.FromContext(c)
	u := &User{Id: cu.ID}
	ud := &UserData{Id: "data", Parent: gn.Key(u)}
	gn.GetMulti([]interface{}{u, ud})

	read := make(Read)
	var uf Opml
	c.Step("unmarshal user data", func() {
		json.Unmarshal(ud.Read, &read)
		json.Unmarshal(ud.Opml, &uf)
	})
	var feeds []*Feed
	opmlMap := make(map[string]*OpmlOutline)
	var merr error
	c.Step("fetch feeds", func() {
		for _, outline := range uf.Outline {
			if outline.XmlUrl == "" {
				for _, so := range outline.Outline {
					feeds = append(feeds, &Feed{Url: so.XmlUrl})
					opmlMap[so.XmlUrl] = so
				}
			} else {
				feeds = append(feeds, &Feed{Url: outline.XmlUrl})
				opmlMap[outline.XmlUrl] = outline
			}
		}
		merr = gn.GetMulti(feeds)
	})
	lock := sync.Mutex{}
	fl := make(map[string][]*Story)
	q := datastore.NewQuery(gn.Key(&Story{}).Kind())
	hasStories := false
	updatedLinks := false
	icons := make(map[string]string)
	now := time.Now()

	c.Step("feed fetch + wait", func() {
		queue := make(chan *Feed)
		wg := sync.WaitGroup{}
		feedProc := func() {
			for f := range queue {
				defer wg.Done()
				var newStories []*Story

				if u.Read.Before(f.Date) {
					c.Debugf("query for %v", f.Url)
					fk := gn.Key(f)
					sq := q.Ancestor(fk).Filter(IDX_COL+" >", u.Read).KeysOnly().Order("-" + IDX_COL)
					keys, _ := gn.GetAll(sq, nil)
					stories := make([]*Story, len(keys))
					for j, key := range keys {
						stories[j] = &Story{
							Id:     key.StringID(),
							Parent: fk,
						}
					}
					gn.GetMulti(stories)
					for _, st := range stories {
						found := false
						for _, s := range read[f.Url] {
							if s == st.Id {
								found = true
								break
							}
						}
						if !found {
							newStories = append(newStories, st)
						}
					}
				}
				if f.Link != opmlMap[f.Url].HtmlUrl {
					updatedLinks = true
					opmlMap[f.Url].HtmlUrl = f.Link
				}
				if f.Errors == 0 && f.NextUpdate.Before(now) {
					t := taskqueue.NewPOSTTask(routeUrl("update-feed"), url.Values{
						"feed": {f.Url},
					})
					if _, err := taskqueue.Add(c, t, "update-manual"); err != nil {
						c.Errorf("taskqueue error: %v", err.Error())
					} else {
						c.Warningf("manual feed update: %v", f.Url)
					}
				}
				f.Subscribe(c)
				lock.Lock()
				fl[f.Url] = newStories
				if len(newStories) > 0 {
					hasStories = true
				}
				if f.Image != "" {
					icons[f.Url] = f.Image
				}
				lock.Unlock()
			}
		}
		for i := 0; i < 20; i++ {
			go feedProc()
		}
		for i, f := range feeds {
			if goon.NotFound(merr, i) {
				continue
			}
			wg.Add(1)
			queue <- f
		}
		close(queue)
		wg.Wait()
	})
	if !hasStories {
		var last time.Time
		for _, f := range feeds {
			if last.Before(f.Date) {
				last = f.Date
			}
		}
		if u.Read.Before(last) {
			c.Debugf("setting %v read to %v", cu.ID, last)
			u.Read = last
			ud.Read = nil
			gn.PutMany(u, ud)
		}
	}
	if updatedLinks {
		ud.Opml, _ = json.Marshal(&uf)
		gn.Put(ud)
	}
	c.Step("json marshal", func() {
		o := struct {
			Opml    []*OpmlOutline
			Stories map[string][]*Story
			Icons   map[string]string
			Options string
		}{
			Opml:    uf.Outline,
			Stories: fl,
			Icons:   icons,
			Options: u.Options,
		}
		b, err := json.Marshal(o)
		if err != nil {
			c.Errorf("cleaning")
			for _, v := range fl {
				for _, s := range v {
					n := cleanNonUTF8(s.Summary)
					if n != s.Summary {
						s.Summary = n
						c.Errorf("cleaned %v", s.Id)
						gn.Put(s)
					}
				}
			}
			b, _ = json.Marshal(o)
		}
		w.Write(b)
	})
	_ = utf8.RuneError
}
Пример #7
0
func ListFeeds(c mpg.Context, w http.ResponseWriter, r *http.Request) {
	cu := user.Current(c)
	gn := goon.FromContext(c)
	u := &User{Id: cu.ID}
	ud := &UserData{Id: "data", Parent: gn.Key(u)}
	gn.GetMulti([]interface{}{u, ud})
	putU := false
	putUD := false
	fixRead := false
	if time.Since(u.Read) > oldDuration {
		u.Read = time.Now().Add(-oldDuration)
		putU = true
		fixRead = true
	}

	read := make(Read)
	var uf Opml
	c.Step("unmarshal user data", func() {
		gob.NewDecoder(bytes.NewReader(ud.Read)).Decode(&read)
		json.Unmarshal(ud.Opml, &uf)
	})
	var feeds []*Feed
	opmlMap := make(map[string]*OpmlOutline)
	var merr error
	c.Step("fetch feeds", func() {
		for _, outline := range uf.Outline {
			if outline.XmlUrl == "" {
				for _, so := range outline.Outline {
					feeds = append(feeds, &Feed{Url: so.XmlUrl})
					opmlMap[so.XmlUrl] = so
				}
			} else {
				feeds = append(feeds, &Feed{Url: outline.XmlUrl})
				opmlMap[outline.XmlUrl] = outline
			}
		}
		merr = gn.GetMulti(feeds)
	})
	lock := sync.Mutex{}
	fl := make(map[string][]*Story)
	q := datastore.NewQuery(gn.Key(&Story{}).Kind())
	hasStories := false
	updatedLinks := false
	icons := make(map[string]string)
	noads := make(map[string]bool)
	now := time.Now()
	numStories := 0

	c.Step("feed fetch + wait", func() {
		queue := make(chan *Feed)
		tc := make(chan *taskqueue.Task)
		wg := sync.WaitGroup{}
		feedProc := func() {
			for f := range queue {
				defer wg.Done()
				var stories []*Story

				if u.Read.Before(f.Date) {
					fk := gn.Key(f)
					sq := q.Ancestor(fk).Filter(IDX_COL+" >", u.Read).KeysOnly().Order("-" + IDX_COL)
					keys, _ := gn.GetAll(sq, nil)
					stories = make([]*Story, len(keys))
					for j, key := range keys {
						stories[j] = &Story{
							Id:     key.StringID(),
							Parent: fk,
						}
					}
					gn.GetMulti(stories)
				}
				if f.Link != opmlMap[f.Url].HtmlUrl {
					updatedLinks = true
					opmlMap[f.Url].HtmlUrl = f.Link
				}
				manualDone := false
				if time.Since(f.LastViewed) > time.Hour*24*2 {
					if f.NextUpdate.Equal(timeMax) {
						tc <- taskqueue.NewPOSTTask(routeUrl("update-feed-manual"), url.Values{
							"feed": {f.Url},
							"last": {"1"},
						})
						manualDone = true
					} else {
						tc <- taskqueue.NewPOSTTask(routeUrl("update-feed-last"), url.Values{
							"feed": {f.Url},
						})
					}
				}
				if !manualDone && now.Sub(f.NextUpdate) >= 0 {
					tc <- taskqueue.NewPOSTTask(routeUrl("update-feed-manual"), url.Values{
						"feed": {f.Url},
					})
				}
				lock.Lock()
				fl[f.Url] = stories
				numStories += len(stories)
				if len(stories) > 0 {
					hasStories = true
				}
				if f.Image != "" {
					icons[f.Url] = f.Image
				}
				if f.NoAds {
					noads[f.Url] = true
				}
				lock.Unlock()
			}
		}
		go func() {
			var tasks []*taskqueue.Task
			for t := range tc {
				tasks = append(tasks, t)
				if len(tasks) == 100 {
					taskqueue.AddMulti(c, tasks, "update-manual")
					c.Infof("added %v tasks", len(tasks))
					tasks = tasks[0:0]
				}
			}
			if len(tasks) > 0 {
				taskqueue.AddMulti(c, tasks, "update-manual")
				c.Infof("added %v tasks", len(tasks))
			}
			wg.Done()
		}()
		for i := 0; i < 20; i++ {
			go feedProc()
		}
		for i, f := range feeds {
			if goon.NotFound(merr, i) {
				continue
			}
			wg.Add(1)
			queue <- f
		}
		close(queue)
		// wait for feeds to complete so there are no more tasks to queue
		wg.Wait()
		wg.Add(1)
		// then finish enqueuing tasks
		close(tc)
		wg.Wait()
	})
	if numStories > numStoriesLimit {
		c.Step("numStories", func() {
			c.Infof("too many stories: %v", numStories)
			stories := make([]*Story, 0, numStories)
			for _, v := range fl {
				stories = append(stories, v...)
			}
			sort.Sort(sort.Reverse(Stories(stories)))
			last := stories[numStoriesLimit].Created
			stories = stories[:numStoriesLimit]
			u.Read = last
			putU = true
			fixRead = true
			fl = make(map[string][]*Story)
			for _, s := range stories {
				fk := s.Parent.StringID()
				p := fl[fk]
				fl[fk] = append(p, s)
			}
			c.Infof("filtered: %v, %v", len(stories), last)
		})
	}
	if fixRead {
		c.Step("fix read", func() {
			nread := make(Read)
			for k, v := range fl {
				for _, s := range v {
					rs := readStory{Feed: k, Story: s.Id}
					if read[rs] {
						nread[rs] = true
					}
				}
			}
			read = nread
			var b bytes.Buffer
			gob.NewEncoder(&b).Encode(&read)
			ud.Read = b.Bytes()
			putUD = true
		})
	}
	for k, v := range fl {
		newStories := make([]*Story, 0, len(v))
		for _, s := range v {
			if !read[readStory{Feed: k, Story: s.Id}] {
				newStories = append(newStories, s)
			}
		}
		fl[k] = newStories
	}
	if !hasStories {
		var last time.Time
		for _, f := range feeds {
			if last.Before(f.Date) {
				last = f.Date
			}
		}
		if u.Read.Before(last) {
			c.Debugf("setting %v read to %v", cu.ID, last)
			putU = true
			putUD = true
			u.Read = last
			ud.Read = nil
		}
	}
	if updatedLinks {
		backupOPML(c)
		if o, err := json.Marshal(&uf); err == nil {
			ud.Opml = o
			putUD = true
		} else {
			saveError(c, fmt.Sprintf("%v", uf), err)
			c.Errorf("json UL err: %v, %v", err, uf)
		}
	}
	if putU {
		gn.Put(u)
	}
	if putUD {
		gn.Put(ud)
	}
	c.Step("json marshal", func() {
		o := struct {
			Opml    []*OpmlOutline
			Stories map[string][]*Story
			Icons   map[string]string
			NoAds   map[string]bool
			Options string
		}{
			Opml:    uf.Outline,
			Stories: fl,
			Icons:   icons,
			NoAds:   noads,
			Options: u.Options,
		}
		b, err := json.Marshal(o)
		if err != nil {
			c.Errorf("cleaning")
			for _, v := range fl {
				for _, s := range v {
					n := sanitizer.CleanNonUTF8(s.Summary)
					if n != s.Summary {
						s.Summary = n
						c.Errorf("cleaned %v", s.Id)
						gn.Put(s)
					}
				}
			}
			b, _ = json.Marshal(o)
		}
		w.Write(b)
	})
}