func updateFeed(c mpg.Context, url string, feed *Feed, stories []*Story) error { gn := goon.FromContext(c) f := Feed{Url: url} if err := gn.Get(&f); err != nil { return fmt.Errorf("feed not found: %s", url) } // Compare the feed's listed update to the story's update. // Note: these may not be accurate, hence, only compare them to each other, // since they should have the same relative error. storyDate := f.Updated hasUpdated := !feed.Updated.IsZero() isFeedUpdated := f.Updated == feed.Updated if !hasUpdated { feed.Updated = f.Updated } feed.Date = f.Date f = *feed if hasUpdated && isFeedUpdated { c.Infof("feed %s already updated to %v, putting", url, feed.Updated) f.Updated = time.Now() gn.Put(&f) return nil } c.Debugf("hasUpdate: %v, isFeedUpdated: %v, storyDate: %v", hasUpdated, isFeedUpdated, storyDate) var newStories []*Story for _, s := range stories { if s.Updated.IsZero() || !s.Updated.Before(storyDate) { newStories = append(newStories, s) } } c.Debugf("%v possible stories to update", len(newStories)) puts := []interface{}{&f} // find non existant stories fk := gn.Key(&f) getStories := make([]*Story, len(newStories)) for i, s := range newStories { getStories[i] = &Story{Id: s.Id, Parent: fk} } err := gn.GetMulti(getStories) if _, ok := err.(appengine.MultiError); err != nil && !ok { c.Errorf("get multi error: %v", err.Error()) return err } var updateStories []*Story for i, s := range getStories { if goon.NotFound(err, i) { updateStories = append(updateStories, newStories[i]) } else if !newStories[i].Updated.IsZero() && !newStories[i].Updated.Equal(s.Updated) { newStories[i].Created = s.Created newStories[i].Published = s.Published updateStories = append(updateStories, newStories[i]) } } c.Debugf("%v update stories", len(updateStories)) for _, s := range updateStories { puts = append(puts, s) sc := StoryContent{ Id: 1, Parent: gn.Key(s), } buf := &bytes.Buffer{} if gz, err := gzip.NewWriterLevel(buf, gzip.BestCompression); err == nil { gz.Write([]byte(s.content)) gz.Close() sc.Compressed = buf.Bytes() } if len(sc.Compressed) == 0 { sc.Content = s.content } gn.Put(&sc) } c.Debugf("putting %v entities", len(puts)) if len(puts) > 1 { f.Date = time.Now() if !hasUpdated { f.Updated = f.Date } } gn.PutMulti(puts) return nil }
func updateFeed(c mpg.Context, url string, feed *Feed, stories []*Story) error { gn := goon.FromContext(c) f := Feed{Url: url} if err := gn.Get(&f); err != nil { return errors.New(fmt.Sprintf("feed not found: %s", url)) } // Compare the feed's listed update to the story's update. // Note: these may not be accurate, hence, only compare them to each other, // since they should have the same relative error. storyDate := f.Updated hasUpdated := !feed.Updated.IsZero() isFeedUpdated := f.Updated == feed.Updated if !hasUpdated { feed.Updated = f.Updated } feed.Date = f.Date f = *feed if hasUpdated && isFeedUpdated { c.Infof("feed %s already updated to %v, putting", url, feed.Updated) f.Updated = time.Now() if strings.TrimSpace(f.Url) == "" { c.Criticalf("badurl5: %v, %v", url, f) return errors.New("badurl5") } gn.PutComplete(&f) return nil } c.Debugf("hasUpdate: %v, isFeedUpdated: %v, storyDate: %v", hasUpdated, isFeedUpdated, storyDate) var newStories []*Story for _, s := range stories { if s.Updated.IsZero() || !s.Updated.Before(storyDate) { newStories = append(newStories, s) } } c.Debugf("%v possible stories to update", len(newStories)) puts := []interface{}{&f} // find non existant stories fk := gn.Key(&f) getStories := make([]*Story, len(newStories)) for i, s := range newStories { getStories[i] = &Story{Id: s.Id, Parent: fk} } err := gn.GetMulti(getStories) if _, ok := err.(appengine.MultiError); err != nil && !ok { c.Errorf("get multi error: %v", err.Error()) return err } var updateStories []*Story for i, s := range getStories { if goon.NotFound(err, i) { updateStories = append(updateStories, newStories[i]) } else if !newStories[i].Updated.IsZero() && !newStories[i].Updated.Equal(s.Updated) { newStories[i].Created = s.Created newStories[i].Published = s.Published updateStories = append(updateStories, newStories[i]) } } c.Debugf("%v update stories", len(updateStories)) for _, s := range updateStories { puts = append(puts, s) gn.Put(&StoryContent{ Id: 1, Parent: gn.Key(s), Content: s.content, }) } c.Debugf("putting %v entities", len(puts)) if len(puts) > 1 { f.Date = time.Now() if !hasUpdated { f.Updated = f.Date } } if f.Url == "" { c.Criticalf("badurl6: %v", f) return errors.New("badurl6") } gn.PutMultiComplete(puts) return nil }
func updateFeed(c mpg.Context, url string, feed *Feed, stories []*Story, updateAll, fromSub, updateLast bool) error { gn := goon.FromContext(c) f := Feed{Url: url} if err := gn.Get(&f); err != nil { return fmt.Errorf("feed not found: %s", url) } gn.Put(&Log{ Parent: gn.Key(&f), Id: time.Now().UnixNano(), Text: "feed update", }) // Compare the feed's listed update to the story's update. // Note: these may not be accurate, hence, only compare them to each other, // since they should have the same relative error. storyDate := f.Updated hasUpdated := !feed.Updated.IsZero() isFeedUpdated := f.Updated.Equal(feed.Updated) if !hasUpdated { feed.Updated = f.Updated } feed.Date = f.Date feed.Average = f.Average feed.LastViewed = f.LastViewed f = *feed if updateLast { f.LastViewed = time.Now() } if hasUpdated && isFeedUpdated && !updateAll && !fromSub { c.Infof("feed %s already updated to %v, putting", url, feed.Updated) f.Updated = time.Now() scheduleNextUpdate(&f) gn.Put(&f) return nil } c.Debugf("hasUpdate: %v, isFeedUpdated: %v, storyDate: %v, stories: %v", hasUpdated, isFeedUpdated, storyDate, len(stories)) puts := []interface{}{&f} // find non existant stories fk := gn.Key(&f) getStories := make([]*Story, len(stories)) for i, s := range stories { getStories[i] = &Story{Id: s.Id, Parent: fk} } err := gn.GetMulti(getStories) if _, ok := err.(appengine.MultiError); err != nil && !ok { c.Errorf("get multi error: %v", err.Error()) return err } var updateStories []*Story for i, s := range getStories { if goon.NotFound(err, i) { updateStories = append(updateStories, stories[i]) } else if (!stories[i].Updated.IsZero() && !stories[i].Updated.Equal(s.Updated)) || updateAll { stories[i].Created = s.Created stories[i].Published = s.Published updateStories = append(updateStories, stories[i]) } } c.Debugf("%v update stories", len(updateStories)) for _, s := range updateStories { puts = append(puts, s) sc := StoryContent{ Id: 1, Parent: gn.Key(s), } buf := &bytes.Buffer{} if gz, err := gzip.NewWriterLevel(buf, gzip.BestCompression); err == nil { gz.Write([]byte(s.content)) gz.Close() sc.Compressed = buf.Bytes() } if len(sc.Compressed) == 0 { sc.Content = s.content } gn.Put(&sc) } c.Debugf("putting %v entities", len(puts)) if len(puts) > 1 { updateAverage(&f, f.Date, len(puts)-1) f.Date = time.Now() if !hasUpdated { f.Updated = f.Date } } scheduleNextUpdate(&f) if fromSub { wait := time.Now().Add(time.Hour * 6) if f.NextUpdate.Before(wait) { f.NextUpdate = time.Now().Add(time.Hour * 6) } } delay := f.NextUpdate.Sub(time.Now()) c.Infof("next update scheduled for %v from now", delay-delay%time.Second) gn.PutMulti(puts) return nil }
func ListFeeds(c mpg.Context, w http.ResponseWriter, r *http.Request) { cu := user.Current(c) gn := goon.FromContext(c) u := &User{Id: cu.ID} ud := &UserData{Id: "data", Parent: gn.Key(u)} gn.GetMulti([]interface{}{u, ud}) put := false fixRead := false if time.Since(u.Read) > oldDuration { c.Warningf("u.Read too old, fixing: %v", u.Read) u.Read = time.Now().Add(-oldDuration) put = true fixRead = true c.Warningf("new: %v", u.Read) } read := make(Read) var uf Opml c.Step("unmarshal user data", func() { gob.NewDecoder(bytes.NewReader(ud.Read)).Decode(&read) json.Unmarshal(ud.Opml, &uf) }) var feeds []*Feed opmlMap := make(map[string]*OpmlOutline) var merr error c.Step("fetch feeds", func() { for _, outline := range uf.Outline { if outline.XmlUrl == "" { for _, so := range outline.Outline { feeds = append(feeds, &Feed{Url: so.XmlUrl}) opmlMap[so.XmlUrl] = so } } else { feeds = append(feeds, &Feed{Url: outline.XmlUrl}) opmlMap[outline.XmlUrl] = outline } } merr = gn.GetMulti(feeds) }) lock := sync.Mutex{} fl := make(map[string][]*Story) q := datastore.NewQuery(gn.Key(&Story{}).Kind()) hasStories := false updatedLinks := false icons := make(map[string]string) now := time.Now() numStories := 0 c.Step("feed fetch + wait", func() { queue := make(chan *Feed) wg := sync.WaitGroup{} feedProc := func() { for f := range queue { defer wg.Done() var stories []*Story if u.Read.Before(f.Date) { fk := gn.Key(f) sq := q.Ancestor(fk).Filter(IDX_COL+" >", u.Read).KeysOnly().Order("-" + IDX_COL) keys, _ := gn.GetAll(sq, nil) stories = make([]*Story, len(keys)) for j, key := range keys { stories[j] = &Story{ Id: key.StringID(), Parent: fk, } } gn.GetMulti(stories) } if f.Link != opmlMap[f.Url].HtmlUrl { updatedLinks = true opmlMap[f.Url].HtmlUrl = f.Link } if f.Errors == 0 && f.NextUpdate.Before(now) { t := taskqueue.NewPOSTTask(routeUrl("update-feed"), url.Values{ "feed": {f.Url}, }) if _, err := taskqueue.Add(c, t, "update-manual"); err != nil { c.Errorf("taskqueue error: %v", err.Error()) } else { c.Warningf("manual feed update: %v", f.Url) } } f.Subscribe(c) lock.Lock() fl[f.Url] = stories numStories += len(stories) if len(stories) > 0 { hasStories = true } if f.Image != "" { icons[f.Url] = f.Image } lock.Unlock() } } for i := 0; i < 20; i++ { go feedProc() } for i, f := range feeds { if goon.NotFound(merr, i) { continue } wg.Add(1) queue <- f } close(queue) wg.Wait() }) if numStories > numStoriesLimit { c.Step("numStories", func() { c.Errorf("too many stories: %v", numStories) stories := make([]*Story, 0, numStories) for _, v := range fl { stories = append(stories, v...) } sort.Sort(sort.Reverse(Stories(stories))) last := stories[numStoriesLimit].Created stories = stories[:numStoriesLimit] u.Read = last put = true fixRead = true fl = make(map[string][]*Story) for _, s := range stories { fk := s.Parent.StringID() p := fl[fk] fl[fk] = append(p, s) } c.Errorf("filtered: %v, %v", len(stories), last) }) } if fixRead { c.Step("fix read", func() { nread := make(Read) for k, v := range fl { for _, s := range v { rs := readStory{Feed: k, Story: s.Id} if read[rs] { nread[rs] = true } } } c.Errorf("fix read: %v -> %v", len(read), len(nread)) read = nread var b bytes.Buffer gob.NewEncoder(&b).Encode(&read) ud.Read = b.Bytes() put = true }) } for k, v := range fl { newStories := make([]*Story, 0, len(v)) for _, s := range v { if !read[readStory{Feed: k, Story: s.Id}] { newStories = append(newStories, s) } } fl[k] = newStories } if !hasStories { var last time.Time for _, f := range feeds { if last.Before(f.Date) { last = f.Date } } if u.Read.Before(last) { c.Debugf("setting %v read to %v", cu.ID, last) put = true u.Read = last ud.Read = nil } } if updatedLinks { ud.Opml, _ = json.Marshal(&uf) put = true } if put { gn.PutMany(u, ud) } c.Step("json marshal", func() { o := struct { Opml []*OpmlOutline Stories map[string][]*Story Icons map[string]string Options string }{ Opml: uf.Outline, Stories: fl, Icons: icons, Options: u.Options, } b, err := json.Marshal(o) if err != nil { c.Errorf("cleaning") for _, v := range fl { for _, s := range v { n := cleanNonUTF8(s.Summary) if n != s.Summary { s.Summary = n c.Errorf("cleaned %v", s.Id) gn.Put(s) } } } b, _ = json.Marshal(o) } w.Write(b) }) }
func ListFeeds(c mpg.Context, w http.ResponseWriter, r *http.Request) { cu := user.Current(c) gn := goon.FromContext(c) u := &User{Id: cu.ID} ud := &UserData{Id: "data", Parent: gn.Key(u)} if err := gn.GetMulti([]interface{}{u, ud}); err != nil && !goon.NotFound(err, 1) { serveError(w, err) return } l := &Log{ Parent: ud.Parent, Id: time.Now().UnixNano(), Text: "list feeds", } l.Text += fmt.Sprintf(", len opml %v", len(ud.Opml)) putU := false putUD := false fixRead := false if time.Since(u.Read) > oldDuration { u.Read = time.Now().Add(-oldDuration) putU = true fixRead = true l.Text += ", u.Read" } trialRemaining := 0 if STRIPE_KEY != "" && ud.Opml != nil { if u.Created.IsZero() { u.Created = time.Now() putU = true } else if time.Since(u.Created) > accountFreeDuration { b, _ := json.Marshal(struct { ErrorSubscription bool }{ true, }) w.Write(b) return } trialRemaining = int((accountFreeDuration-time.Since(u.Created))/time.Hour/24) + 1 } read := make(Read) var uf Opml c.Step("unmarshal user data", func(c mpg.Context) { gob.NewDecoder(bytes.NewReader(ud.Read)).Decode(&read) json.Unmarshal(ud.Opml, &uf) }) var feeds []*Feed opmlMap := make(map[string]*OpmlOutline) var merr error c.Step("fetch feeds", func(c mpg.Context) { gn := goon.FromContext(appengine.Timeout(c, time.Minute)) for _, outline := range uf.Outline { if outline.XmlUrl == "" { for _, so := range outline.Outline { feeds = append(feeds, &Feed{Url: so.XmlUrl}) opmlMap[so.XmlUrl] = so } } else { feeds = append(feeds, &Feed{Url: outline.XmlUrl}) opmlMap[outline.XmlUrl] = outline } } merr = gn.GetMulti(feeds) }) lock := sync.Mutex{} fl := make(map[string][]*Story) q := datastore.NewQuery(gn.Key(&Story{}).Kind()). Filter(IDX_COL+" >=", u.Read). KeysOnly(). Order("-" + IDX_COL). Limit(250) updatedLinks := false now := time.Now() numStories := 0 c.Step(fmt.Sprintf("feed unreads: %v", u.Read), func(c mpg.Context) { queue := make(chan *Feed) tc := make(chan *taskqueue.Task) done := make(chan bool) wg := sync.WaitGroup{} feedProc := func() { for f := range queue { c.Step(f.Title, func(c mpg.Context) { defer wg.Done() var stories []*Story gn := goon.FromContext(appengine.Timeout(c, time.Minute)) if !f.Date.Before(u.Read) { fk := gn.Key(f) sq := q.Ancestor(fk) keys, _ := gn.GetAll(sq, nil) stories = make([]*Story, len(keys)) for j, key := range keys { stories[j] = &Story{ Id: key.StringID(), Parent: fk, } } gn.GetMulti(stories) } if f.Link != opmlMap[f.Url].HtmlUrl { l.Text += fmt.Sprintf(", link: %v -> %v", opmlMap[f.Url].HtmlUrl, f.Link) updatedLinks = true opmlMap[f.Url].HtmlUrl = f.Link } manualDone := false if time.Since(f.LastViewed) > time.Hour*24*2 { if f.NextUpdate.Equal(timeMax) { tc <- taskqueue.NewPOSTTask(routeUrl("update-feed-manual"), url.Values{ "feed": {f.Url}, "last": {"1"}, }) manualDone = true } else { tc <- taskqueue.NewPOSTTask(routeUrl("update-feed-last"), url.Values{ "feed": {f.Url}, }) } } if !manualDone && now.Sub(f.NextUpdate) >= 0 { tc <- taskqueue.NewPOSTTask(routeUrl("update-feed-manual"), url.Values{ "feed": {f.Url}, }) } lock.Lock() fl[f.Url] = stories numStories += len(stories) lock.Unlock() }) } } go taskSender(c, "update-manual", tc, done) for i := 0; i < 20; i++ { go feedProc() } for i, f := range feeds { if goon.NotFound(merr, i) { continue } wg.Add(1) queue <- f } close(queue) // wait for feeds to complete so there are no more tasks to queue wg.Wait() // then finish enqueuing tasks close(tc) <-done }) if numStories > 0 { c.Step("numStories", func(c mpg.Context) { stories := make([]*Story, 0, numStories) for _, v := range fl { stories = append(stories, v...) } sort.Sort(sort.Reverse(Stories(stories))) if len(stories) > numStoriesLimit { stories = stories[:numStoriesLimit] fl = make(map[string][]*Story) for _, s := range stories { fk := s.Parent.StringID() p := fl[fk] fl[fk] = append(p, s) } } last := stories[len(stories)-1].Created if u.Read.Before(last) { u.Read = last putU = true fixRead = true } }) } if fixRead { c.Step("fix read", func(c mpg.Context) { nread := make(Read) for k, v := range fl { for _, s := range v { rs := readStory{Feed: k, Story: s.Id} if read[rs] { nread[rs] = true } } } if len(nread) != len(read) { read = nread var b bytes.Buffer gob.NewEncoder(&b).Encode(&read) ud.Read = b.Bytes() putUD = true l.Text += ", fix read" } }) } numStories = 0 for k, v := range fl { newStories := make([]*Story, 0, len(v)) for _, s := range v { if !read[readStory{Feed: k, Story: s.Id}] { newStories = append(newStories, s) } } numStories += len(newStories) fl[k] = newStories } if numStories == 0 { l.Text += ", clear read" fixRead = false if ud.Read != nil { putUD = true ud.Read = nil } last := u.Read for _, v := range feeds { if last.Before(v.Date) { last = v.Date } } c.Infof("nothing here, move up: %v -> %v", u.Read, last) if u.Read.Before(last) { putU = true u.Read = last } } if updatedLinks { backupOPML(c) if o, err := json.Marshal(&uf); err == nil { ud.Opml = o putUD = true l.Text += ", update links" } else { c.Errorf("json UL err: %v, %v", err, uf) } } if putU { gn.Put(u) l.Text += ", putU" } if putUD { gn.Put(ud) l.Text += ", putUD" } l.Text += fmt.Sprintf(", len opml %v", len(ud.Opml)) gn.Put(l) c.Step("json marshal", func(c mpg.Context) { gn := goon.FromContext(c) o := struct { Opml []*OpmlOutline Stories map[string][]*Story Options string TrialRemaining int Feeds []*Feed }{ Opml: uf.Outline, Stories: fl, Options: u.Options, TrialRemaining: trialRemaining, Feeds: feeds, } b, err := json.Marshal(o) if err != nil { c.Errorf("cleaning") for _, v := range fl { for _, s := range v { n := sanitizer.CleanNonUTF8(s.Summary) if n != s.Summary { s.Summary = n c.Errorf("cleaned %v", s.Id) gn.Put(s) } } } b, _ = json.Marshal(o) } w.Write(b) }) }
func ListFeeds(c mpg.Context, w http.ResponseWriter, r *http.Request) { cu := user.Current(c) gn := goon.FromContext(c) u := &User{Id: cu.ID} ud := &UserData{Id: "data", Parent: gn.Key(u)} gn.GetMulti([]interface{}{u, ud}) read := make(Read) var uf Opml c.Step("unmarshal user data", func() { json.Unmarshal(ud.Read, &read) json.Unmarshal(ud.Opml, &uf) }) var feeds []*Feed opmlMap := make(map[string]*OpmlOutline) var merr error c.Step("fetch feeds", func() { for _, outline := range uf.Outline { if outline.XmlUrl == "" { for _, so := range outline.Outline { feeds = append(feeds, &Feed{Url: so.XmlUrl}) opmlMap[so.XmlUrl] = so } } else { feeds = append(feeds, &Feed{Url: outline.XmlUrl}) opmlMap[outline.XmlUrl] = outline } } merr = gn.GetMulti(feeds) }) lock := sync.Mutex{} fl := make(map[string][]*Story) q := datastore.NewQuery(gn.Key(&Story{}).Kind()) hasStories := false updatedLinks := false icons := make(map[string]string) now := time.Now() c.Step("feed fetch + wait", func() { queue := make(chan *Feed) wg := sync.WaitGroup{} feedProc := func() { for f := range queue { defer wg.Done() var newStories []*Story if u.Read.Before(f.Date) { c.Debugf("query for %v", f.Url) fk := gn.Key(f) sq := q.Ancestor(fk).Filter(IDX_COL+" >", u.Read).KeysOnly().Order("-" + IDX_COL) keys, _ := gn.GetAll(sq, nil) stories := make([]*Story, len(keys)) for j, key := range keys { stories[j] = &Story{ Id: key.StringID(), Parent: fk, } } gn.GetMulti(stories) for _, st := range stories { found := false for _, s := range read[f.Url] { if s == st.Id { found = true break } } if !found { newStories = append(newStories, st) } } } if f.Link != opmlMap[f.Url].HtmlUrl { updatedLinks = true opmlMap[f.Url].HtmlUrl = f.Link } if f.Errors == 0 && f.NextUpdate.Before(now) { t := taskqueue.NewPOSTTask(routeUrl("update-feed"), url.Values{ "feed": {f.Url}, }) if _, err := taskqueue.Add(c, t, "update-manual"); err != nil { c.Errorf("taskqueue error: %v", err.Error()) } else { c.Warningf("manual feed update: %v", f.Url) } } f.Subscribe(c) lock.Lock() fl[f.Url] = newStories if len(newStories) > 0 { hasStories = true } if f.Image != "" { icons[f.Url] = f.Image } lock.Unlock() } } for i := 0; i < 20; i++ { go feedProc() } for i, f := range feeds { if goon.NotFound(merr, i) { continue } wg.Add(1) queue <- f } close(queue) wg.Wait() }) if !hasStories { var last time.Time for _, f := range feeds { if last.Before(f.Date) { last = f.Date } } if u.Read.Before(last) { c.Debugf("setting %v read to %v", cu.ID, last) u.Read = last ud.Read = nil gn.PutMany(u, ud) } } if updatedLinks { ud.Opml, _ = json.Marshal(&uf) gn.Put(ud) } c.Step("json marshal", func() { o := struct { Opml []*OpmlOutline Stories map[string][]*Story Icons map[string]string Options string }{ Opml: uf.Outline, Stories: fl, Icons: icons, Options: u.Options, } b, err := json.Marshal(o) if err != nil { c.Errorf("cleaning") for _, v := range fl { for _, s := range v { n := cleanNonUTF8(s.Summary) if n != s.Summary { s.Summary = n c.Errorf("cleaned %v", s.Id) gn.Put(s) } } } b, _ = json.Marshal(o) } w.Write(b) }) _ = utf8.RuneError }
func ListFeeds(c mpg.Context, w http.ResponseWriter, r *http.Request) { cu := user.Current(c) gn := goon.FromContext(c) u := &User{Id: cu.ID} ud := &UserData{Id: "data", Parent: gn.Key(u)} gn.GetMulti([]interface{}{u, ud}) putU := false putUD := false fixRead := false if time.Since(u.Read) > oldDuration { u.Read = time.Now().Add(-oldDuration) putU = true fixRead = true } read := make(Read) var uf Opml c.Step("unmarshal user data", func() { gob.NewDecoder(bytes.NewReader(ud.Read)).Decode(&read) json.Unmarshal(ud.Opml, &uf) }) var feeds []*Feed opmlMap := make(map[string]*OpmlOutline) var merr error c.Step("fetch feeds", func() { for _, outline := range uf.Outline { if outline.XmlUrl == "" { for _, so := range outline.Outline { feeds = append(feeds, &Feed{Url: so.XmlUrl}) opmlMap[so.XmlUrl] = so } } else { feeds = append(feeds, &Feed{Url: outline.XmlUrl}) opmlMap[outline.XmlUrl] = outline } } merr = gn.GetMulti(feeds) }) lock := sync.Mutex{} fl := make(map[string][]*Story) q := datastore.NewQuery(gn.Key(&Story{}).Kind()) hasStories := false updatedLinks := false icons := make(map[string]string) noads := make(map[string]bool) now := time.Now() numStories := 0 c.Step("feed fetch + wait", func() { queue := make(chan *Feed) tc := make(chan *taskqueue.Task) wg := sync.WaitGroup{} feedProc := func() { for f := range queue { defer wg.Done() var stories []*Story if u.Read.Before(f.Date) { fk := gn.Key(f) sq := q.Ancestor(fk).Filter(IDX_COL+" >", u.Read).KeysOnly().Order("-" + IDX_COL) keys, _ := gn.GetAll(sq, nil) stories = make([]*Story, len(keys)) for j, key := range keys { stories[j] = &Story{ Id: key.StringID(), Parent: fk, } } gn.GetMulti(stories) } if f.Link != opmlMap[f.Url].HtmlUrl { updatedLinks = true opmlMap[f.Url].HtmlUrl = f.Link } manualDone := false if time.Since(f.LastViewed) > time.Hour*24*2 { if f.NextUpdate.Equal(timeMax) { tc <- taskqueue.NewPOSTTask(routeUrl("update-feed-manual"), url.Values{ "feed": {f.Url}, "last": {"1"}, }) manualDone = true } else { tc <- taskqueue.NewPOSTTask(routeUrl("update-feed-last"), url.Values{ "feed": {f.Url}, }) } } if !manualDone && now.Sub(f.NextUpdate) >= 0 { tc <- taskqueue.NewPOSTTask(routeUrl("update-feed-manual"), url.Values{ "feed": {f.Url}, }) } lock.Lock() fl[f.Url] = stories numStories += len(stories) if len(stories) > 0 { hasStories = true } if f.Image != "" { icons[f.Url] = f.Image } if f.NoAds { noads[f.Url] = true } lock.Unlock() } } go func() { var tasks []*taskqueue.Task for t := range tc { tasks = append(tasks, t) if len(tasks) == 100 { taskqueue.AddMulti(c, tasks, "update-manual") c.Infof("added %v tasks", len(tasks)) tasks = tasks[0:0] } } if len(tasks) > 0 { taskqueue.AddMulti(c, tasks, "update-manual") c.Infof("added %v tasks", len(tasks)) } wg.Done() }() for i := 0; i < 20; i++ { go feedProc() } for i, f := range feeds { if goon.NotFound(merr, i) { continue } wg.Add(1) queue <- f } close(queue) // wait for feeds to complete so there are no more tasks to queue wg.Wait() wg.Add(1) // then finish enqueuing tasks close(tc) wg.Wait() }) if numStories > numStoriesLimit { c.Step("numStories", func() { c.Infof("too many stories: %v", numStories) stories := make([]*Story, 0, numStories) for _, v := range fl { stories = append(stories, v...) } sort.Sort(sort.Reverse(Stories(stories))) last := stories[numStoriesLimit].Created stories = stories[:numStoriesLimit] u.Read = last putU = true fixRead = true fl = make(map[string][]*Story) for _, s := range stories { fk := s.Parent.StringID() p := fl[fk] fl[fk] = append(p, s) } c.Infof("filtered: %v, %v", len(stories), last) }) } if fixRead { c.Step("fix read", func() { nread := make(Read) for k, v := range fl { for _, s := range v { rs := readStory{Feed: k, Story: s.Id} if read[rs] { nread[rs] = true } } } read = nread var b bytes.Buffer gob.NewEncoder(&b).Encode(&read) ud.Read = b.Bytes() putUD = true }) } for k, v := range fl { newStories := make([]*Story, 0, len(v)) for _, s := range v { if !read[readStory{Feed: k, Story: s.Id}] { newStories = append(newStories, s) } } fl[k] = newStories } if !hasStories { var last time.Time for _, f := range feeds { if last.Before(f.Date) { last = f.Date } } if u.Read.Before(last) { c.Debugf("setting %v read to %v", cu.ID, last) putU = true putUD = true u.Read = last ud.Read = nil } } if updatedLinks { backupOPML(c) if o, err := json.Marshal(&uf); err == nil { ud.Opml = o putUD = true } else { saveError(c, fmt.Sprintf("%v", uf), err) c.Errorf("json UL err: %v, %v", err, uf) } } if putU { gn.Put(u) } if putUD { gn.Put(ud) } c.Step("json marshal", func() { o := struct { Opml []*OpmlOutline Stories map[string][]*Story Icons map[string]string NoAds map[string]bool Options string }{ Opml: uf.Outline, Stories: fl, Icons: icons, NoAds: noads, Options: u.Options, } b, err := json.Marshal(o) if err != nil { c.Errorf("cleaning") for _, v := range fl { for _, s := range v { n := sanitizer.CleanNonUTF8(s.Summary) if n != s.Summary { s.Summary = n c.Errorf("cleaned %v", s.Id) gn.Put(s) } } } b, _ = json.Marshal(o) } w.Write(b) }) }