func CFix(c mpg.Context, w http.ResponseWriter, r *http.Request) { gn := goon.FromContext(c) url := r.FormValue("feed") c.Infof("fix feed %s", url) f := Feed{Url: url} if err := gn.Get(&f); err != nil { c.Criticalf("cfix err: %v", err) serveError(w, err) return } q := datastore.NewQuery("S").Ancestor(gn.Key(&f)) var ss []*Story keys, err := q.GetAll(c, &ss) if err != nil { c.Errorf("getall err: %v", err) serveError(w, err) return } c.Infof("trying to fix %v stories", len(ss)) const putLimit = 500 for i := 0; i <= len(keys)/putLimit; i++ { lo := i * putLimit hi := (i + 1) * putLimit if hi > len(keys) { hi = len(keys) } c.Infof("%v - %v", lo, hi) if _, err := datastore.PutMulti(c, keys[lo:hi], ss[lo:hi]); err != nil { c.Errorf("err: %v, %v, %v", lo, hi, err) } } }
func DeleteOldFeed(c mpg.Context, w http.ResponseWriter, r *http.Request) { g := goon.FromContext(c) oldDate := time.Now().Add(-time.Hour * 24 * 90) feed := Feed{Url: r.FormValue("f")} if err := g.Get(&feed); err != nil { c.Criticalf("err: %v", err) return } if feed.LastViewed.After(oldDate) { return } q := datastore.NewQuery(g.Kind(&Story{})).Ancestor(g.Key(&feed)).KeysOnly() keys, err := q.GetAll(c, nil) if err != nil { c.Criticalf("err: %v", err) return } q = datastore.NewQuery(g.Kind(&StoryContent{})).Ancestor(g.Key(&feed)).KeysOnly() sckeys, err := q.GetAll(c, nil) if err != nil { c.Criticalf("err: %v", err) return } keys = append(keys, sckeys...) c.Infof("delete: %v - %v", feed.Url, len(keys)) err = g.DeleteMulti(keys) if err != nil { c.Criticalf("err: %v", err) } }
func BackendStart(c mpg.Context, w http.ResponseWriter, r *http.Request) { return const sz = 100 ic := 0 var f func(appengine.Context) var cs string f = func(c appengine.Context) { gn := goon.FromContext(c) c.Errorf("ic: %d", ic) wg := sync.WaitGroup{} wg.Add(sz) var j int64 q := datastore.NewQuery("F").KeysOnly() if cs != "" { if cur, err := datastore.DecodeCursor(cs); err == nil { q = q.Start(cur) c.Errorf("cur start: %v", cur) } } it := q.Run(c) for j = 0; j < sz; j++ { k, err := it.Next(nil) c.Errorf("%v: %v, %v", j, k, err) if err != nil { c.Criticalf("err: %v", err) return } go func(k *datastore.Key) { f := Feed{Url: k.StringID()} if err := gn.Get(&f); err == nil { f.Subscribe(c) } wg.Done() }(k) } cur, err := it.Cursor() if err == nil { cs = cur.String() } wg.Wait() ic++ runtime.RunInBackground(c, f) } runtime.RunInBackground(c, f) }
func fetchFeed(c mpg.Context, origUrl, fetchUrl string) (*Feed, []*Story) { u, err := url.Parse(fetchUrl) _orig := origUrl if err == nil && u.Scheme == "" { u.Scheme = "http" origUrl = u.String() fetchUrl = origUrl if origUrl == "" { c.Criticalf("badurl1: %v, %v, %v, %v", _orig, u, origUrl, fetchUrl) return nil, nil } } if strings.TrimSpace(origUrl) == "" { c.Criticalf("badurl2: %v, %v", _orig, origUrl) return nil, nil } cl := &http.Client{ Transport: &urlfetch.Transport{ Context: c, Deadline: time.Minute, }, } if resp, err := cl.Get(fetchUrl); err == nil && resp.StatusCode == http.StatusOK { defer resp.Body.Close() b, _ := ioutil.ReadAll(resp.Body) if autoUrl, err := Autodiscover(b); err == nil && origUrl == fetchUrl { if autoU, err := url.Parse(autoUrl); err == nil { if autoU.Scheme == "" { autoU.Scheme = u.Scheme } if autoU.Host == "" { autoU.Host = u.Host } autoUrl = autoU.String() } return fetchFeed(c, origUrl, autoUrl) } return ParseFeed(c, origUrl, b) } else if err != nil { c.Warningf("fetch feed error: %s", err.Error()) } else { c.Warningf("fetch feed error: status code: %s", resp.Status) } return nil, nil }
func UpdateFeed(c mpg.Context, w http.ResponseWriter, r *http.Request) { gn := goon.FromContext(c) url := r.FormValue("feed") c.Debugf("update feed %s", url) f := Feed{Url: url} if err := gn.Get(&f); err == datastore.ErrNoSuchEntity { c.Errorf("no such entity") return } else if err != nil { c.Errorf("badurl7 error: %v", err.Error()) return } else if time.Now().Before(f.NextUpdate) { c.Infof("feed %v already updated", url) return } if f.Url == "" { c.Criticalf("badurl7: %v", url) return } feedError := func() { f.Errors++ v := f.Errors + 1 const max = 24 * 7 if v > max { v = max } else if f.Errors == 1 { v = 0 } f.NextUpdate = time.Now().Add(time.Hour * time.Duration(v)) gn.Put(&f) c.Warningf("error with %v (%v), bump next update to %v", url, f.Errors, f.NextUpdate) } c.Infof("fetching") if feed, stories := fetchFeed(c, f.Url, f.Url); feed != nil { if err := updateFeed(c, f.Url, feed, stories); err != nil { feedError() } } else { feedError() } c.Infof("done") }
func addFeed(c mpg.Context, userid string, outline *OpmlOutline) error { gn := goon.FromContext(c) o := outline.Outline[0] c.Infof("adding feed %v to user %s", o.XmlUrl, userid) f := Feed{Url: o.XmlUrl} if err := gn.Get(&f); err == datastore.ErrNoSuchEntity { if feed, stories := fetchFeed(c, o.XmlUrl, o.XmlUrl); feed == nil { return errors.New(fmt.Sprintf("could not add feed %s", o.XmlUrl)) } else { f = *feed f.Updated = time.Time{} f.Checked = f.Updated f.NextUpdate = f.Updated if strings.TrimSpace(f.Url) == "" { c.Criticalf("badurl4: %v, %v", o.XmlUrl, o) return errors.New("badurl4") } gn.PutComplete(&f) if err := updateFeed(c, f.Url, feed, stories); err != nil { return err } o.XmlUrl = feed.Url o.HtmlUrl = feed.Link if o.Title == "" { o.Title = feed.Title } } } else if err != nil { return err } else { o.HtmlUrl = f.Link if o.Title == "" { o.Title = f.Title } } o.Text = "" return nil }
func DeleteOldFeeds(c mpg.Context, w http.ResponseWriter, r *http.Request) { ctx := appengine.Timeout(c, time.Minute) gn := goon.FromContext(c) q := datastore.NewQuery(gn.Kind(&Feed{})).Filter("n=", timeMax).KeysOnly() if cur, err := datastore.DecodeCursor(r.FormValue("c")); err == nil { q = q.Start(cur) } it := q.Run(ctx) done := false var tasks []*taskqueue.Task for i := 0; i < 10000 && len(tasks) < 100; i++ { k, err := it.Next(nil) if err == datastore.Done { c.Criticalf("done") done = true break } else if err != nil { c.Errorf("err: %v", err) continue } values := make(url.Values) values.Add("f", k.StringID()) tasks = append(tasks, taskqueue.NewPOSTTask("/tasks/delete-old-feed", values)) } if len(tasks) > 0 { c.Errorf("deleting %v feeds", len(tasks)) if _, err := taskqueue.AddMulti(c, tasks, ""); err != nil { c.Errorf("err: %v", err) } } if !done { if cur, err := it.Cursor(); err == nil { values := make(url.Values) values.Add("c", cur.String()) taskqueue.Add(c, taskqueue.NewPOSTTask("/tasks/delete-old-feeds", values), "") } else { c.Errorf("err: %v", err) } } }
func updateFeed(c mpg.Context, url string, feed *Feed, stories []*Story) error { gn := goon.FromContext(c) f := Feed{Url: url} if err := gn.Get(&f); err != nil { return errors.New(fmt.Sprintf("feed not found: %s", url)) } // Compare the feed's listed update to the story's update. // Note: these may not be accurate, hence, only compare them to each other, // since they should have the same relative error. storyDate := f.Updated hasUpdated := !feed.Updated.IsZero() isFeedUpdated := f.Updated == feed.Updated if !hasUpdated { feed.Updated = f.Updated } feed.Date = f.Date f = *feed if hasUpdated && isFeedUpdated { c.Infof("feed %s already updated to %v, putting", url, feed.Updated) f.Updated = time.Now() if strings.TrimSpace(f.Url) == "" { c.Criticalf("badurl5: %v, %v", url, f) return errors.New("badurl5") } gn.PutComplete(&f) return nil } c.Debugf("hasUpdate: %v, isFeedUpdated: %v, storyDate: %v", hasUpdated, isFeedUpdated, storyDate) var newStories []*Story for _, s := range stories { if s.Updated.IsZero() || !s.Updated.Before(storyDate) { newStories = append(newStories, s) } } c.Debugf("%v possible stories to update", len(newStories)) puts := []interface{}{&f} // find non existant stories fk := gn.Key(&f) getStories := make([]*Story, len(newStories)) for i, s := range newStories { getStories[i] = &Story{Id: s.Id, Parent: fk} } err := gn.GetMulti(getStories) if _, ok := err.(appengine.MultiError); err != nil && !ok { c.Errorf("get multi error: %v", err.Error()) return err } var updateStories []*Story for i, s := range getStories { if goon.NotFound(err, i) { updateStories = append(updateStories, newStories[i]) } else if !newStories[i].Updated.IsZero() && !newStories[i].Updated.Equal(s.Updated) { newStories[i].Created = s.Created newStories[i].Published = s.Published updateStories = append(updateStories, newStories[i]) } } c.Debugf("%v update stories", len(updateStories)) for _, s := range updateStories { puts = append(puts, s) gn.Put(&StoryContent{ Id: 1, Parent: gn.Key(s), Content: s.content, }) } c.Debugf("putting %v entities", len(puts)) if len(puts) > 1 { f.Date = time.Now() if !hasUpdated { f.Updated = f.Date } } if f.Url == "" { c.Criticalf("badurl6: %v", f) return errors.New("badurl6") } gn.PutMultiComplete(puts) return nil }