func UpdateFeed(c mpg.Context, w http.ResponseWriter, r *http.Request) { gn := goon.FromContext(c) url := r.FormValue("feed") c.Debugf("update feed %s", url) f := Feed{Url: url} if err := gn.Get(&f); err == datastore.ErrNoSuchEntity { return } else if time.Now().Before(f.NextUpdate) { c.Infof("feed %v already updated", url) return } if feed, stories := fetchFeed(c, url, url); feed != nil { updateFeed(c, url, feed, stories) } else { f.Errors++ v := f.Errors + 1 const max = 24 * 7 if v > max { v = max } else if f.Errors == 1 { v = 0 } f.NextUpdate = time.Now().Add(time.Hour * time.Duration(v)) gn.Put(&f) c.Warningf("error with %v (%v), bump next update to %v", url, f.Errors, f.NextUpdate) } }
func UpdateFeed(c mpg.Context, w http.ResponseWriter, r *http.Request) { gn := goon.FromContext(appengine.Timeout(c, time.Minute)) url := r.FormValue("feed") if url == "" { c.Errorf("empty update feed") return } c.Debugf("update feed %s", url) last := len(r.FormValue("last")) > 0 f := Feed{Url: url} s := "" defer func() { gn.Put(&Log{ Parent: gn.Key(&f), Id: time.Now().UnixNano(), Text: "UpdateFeed - " + s, }) }() if err := gn.Get(&f); err == datastore.ErrNoSuchEntity { c.Errorf("no such entity - " + url) s += "NSE" return } else if err != nil { s += "err - " + err.Error() return } else if last { // noop } if time.Now().Before(f.NextUpdate) { c.Errorf("feed %v already updated: %v", url, f.NextUpdate) s += "already updated" return } feedError := func(err error) { s += "feed err - " + err.Error() f.Errors++ v := f.Errors + 1 const max = 24 * 7 if v > max { v = max } else if f.Errors == 1 { v = 0 } f.NextUpdate = time.Now().Add(time.Hour * time.Duration(v)) gn.Put(&f) c.Warningf("error with %v (%v), bump next update to %v, %v", url, f.Errors, f.NextUpdate, err) } if feed, stories, err := fetchFeed(c, f.Url, f.Url); err == nil { if err := updateFeed(c, f.Url, feed, stories, false, false, last); err != nil { feedError(err) } else { s += "success" } } else { feedError(err) } f.Subscribe(c) }
func fetchFeed(c mpg.Context, origUrl, fetchUrl string) (*Feed, []*Story, error) { u, err := url.Parse(fetchUrl) if err != nil { return nil, nil, err } if u.Host == "" { u.Host = u.Path u.Path = "" } const clURL = "craigslist.org" if strings.HasSuffix(u.Host, clURL) || u.Host == clURL { return nil, nil, fmt.Errorf("Craigslist blocks our server host: not possible to subscribe") } if u.Scheme == "" { u.Scheme = "http" origUrl = u.String() fetchUrl = origUrl if origUrl == "" { return nil, nil, fmt.Errorf("bad URL") } } cl := &http.Client{ Transport: &urlfetch.Transport{ Context: c, Deadline: time.Minute, }, } if resp, err := cl.Get(fetchUrl); err == nil && resp.StatusCode == http.StatusOK { reader := io.LimitReader(resp.Body, 1<<21) defer resp.Body.Close() b, _ := ioutil.ReadAll(reader) if autoUrl, err := Autodiscover(b); err == nil && origUrl == fetchUrl { if autoU, err := url.Parse(autoUrl); err == nil { if autoU.Scheme == "" { autoU.Scheme = u.Scheme } if autoU.Host == "" { autoU.Host = u.Host } autoUrl = autoU.String() } if autoUrl != fetchUrl { return fetchFeed(c, origUrl, autoUrl) } } return ParseFeed(c, origUrl, fetchUrl, b) } else if err != nil { c.Warningf("fetch feed error: %v", err) return nil, nil, fmt.Errorf("Could not fetch feed") } else { c.Warningf("fetch feed error: status code: %s", resp.Status) return nil, nil, fmt.Errorf("Bad response code from server") } }
func SubscribeCallback(c mpg.Context, w http.ResponseWriter, r *http.Request) { gn := goon.FromContext(c) furl := r.FormValue("feed") oldURL := false if len(furl) == 0 { vars := mux.Vars(r) furl = vars["feed"] oldURL = true } b, _ := base64.URLEncoding.DecodeString(furl) f := Feed{Url: string(b)} c.Infof("url: %v", f.Url) if err := gn.Get(&f); err != nil { http.Error(w, "", http.StatusNotFound) return } if r.Method == "GET" { if oldURL { c.Warningf("old url") http.Error(w, "", http.StatusNotFound) return } if f.NotViewed() || r.FormValue("hub.mode") != "subscribe" || r.FormValue("hub.topic") != f.Url { http.Error(w, "", http.StatusNotFound) return } w.Write([]byte(r.FormValue("hub.challenge"))) i, _ := strconv.Atoi(r.FormValue("hub.lease_seconds")) f.Subscribed = time.Now().Add(time.Second * time.Duration(i)) gn.Put(&f) c.Debugf("subscribed: %v - %v", f.Url, f.Subscribed) return } else if !f.NotViewed() { c.Infof("push: %v", f.Url) defer r.Body.Close() b, _ := ioutil.ReadAll(r.Body) nf, ss, err := ParseFeed(c, f.Url, b) if err != nil { c.Errorf("parse error: %v", err) return } if err := updateFeed(c, f.Url, nf, ss, false, true, false); err != nil { c.Errorf("push error: %v", err) } } else { c.Infof("not viewed") } }
func fetchFeed(c mpg.Context, origUrl, fetchUrl string) (*Feed, []*Story) { u, err := url.Parse(fetchUrl) _orig := origUrl if err == nil && u.Scheme == "" { u.Scheme = "http" origUrl = u.String() fetchUrl = origUrl if origUrl == "" { c.Criticalf("badurl1: %v, %v, %v, %v", _orig, u, origUrl, fetchUrl) return nil, nil } } if strings.TrimSpace(origUrl) == "" { c.Criticalf("badurl2: %v, %v", _orig, origUrl) return nil, nil } cl := &http.Client{ Transport: &urlfetch.Transport{ Context: c, Deadline: time.Minute, }, } if resp, err := cl.Get(fetchUrl); err == nil && resp.StatusCode == http.StatusOK { defer resp.Body.Close() b, _ := ioutil.ReadAll(resp.Body) if autoUrl, err := Autodiscover(b); err == nil && origUrl == fetchUrl { if autoU, err := url.Parse(autoUrl); err == nil { if autoU.Scheme == "" { autoU.Scheme = u.Scheme } if autoU.Host == "" { autoU.Host = u.Host } autoUrl = autoU.String() } return fetchFeed(c, origUrl, autoUrl) } return ParseFeed(c, origUrl, b) } else if err != nil { c.Warningf("fetch feed error: %s", err.Error()) } else { c.Warningf("fetch feed error: status code: %s", resp.Status) } return nil, nil }
func UpdateFeed(c mpg.Context, w http.ResponseWriter, r *http.Request) { gn := goon.FromContext(c) url := r.FormValue("feed") c.Debugf("update feed %s", url) last := len(r.FormValue("last")) > 0 f := Feed{Url: url} if err := gn.Get(&f); err == datastore.ErrNoSuchEntity { c.Errorf("no such entity") return } else if err != nil { return } else if last { // noop } else if time.Now().Before(f.NextUpdate) { c.Infof("feed %v already updated: %v", url, f.NextUpdate) return } f.Subscribe(c) feedError := func(err error) { f.Errors++ v := f.Errors + 1 const max = 24 * 7 if v > max { v = max } else if f.Errors == 1 { v = 0 } f.NextUpdate = time.Now().Add(time.Hour * time.Duration(v)) gn.Put(&f) c.Warningf("error with %v (%v), bump next update to %v, %v", url, f.Errors, f.NextUpdate, err) } if feed, stories, err := fetchFeed(c, f.Url, f.Url); err == nil { if err := updateFeed(c, f.Url, feed, stories, false, false, last); err != nil { feedError(err) } } else { feedError(err) } }
func ImportOpmlTask(c mpg.Context, w http.ResponseWriter, r *http.Request) { gn := goon.FromContext(c) userid := r.FormValue("user") bk := r.FormValue("key") fr := blobstore.NewReader(c, appengine.BlobKey(bk)) data, err := ioutil.ReadAll(fr) if err != nil { return } var skip int if s, err := strconv.Atoi(r.FormValue("skip")); err == nil { skip = s } c.Debugf("reader import for %v, skip %v", userid, skip) var userOpml []*OpmlOutline remaining := skip var proc func(label string, outlines []*OpmlOutline) proc = func(label string, outlines []*OpmlOutline) { for _, o := range outlines { if o.XmlUrl != "" { if remaining > 0 { remaining-- } else if len(userOpml) < IMPORT_LIMIT { userOpml = append(userOpml, &OpmlOutline{ Title: label, Outline: []*OpmlOutline{o}, }) } } if o.Title != "" && len(o.Outline) > 0 { proc(o.Title, o.Outline) } } } opml := Opml{} if err := xml.Unmarshal(data, &opml); err != nil { c.Errorf("opml error: %v", err.Error()) return } proc("", opml.Outline) // todo: refactor below with similar from ImportReaderTask wg := sync.WaitGroup{} wg.Add(len(userOpml)) for i := range userOpml { go func(i int) { o := userOpml[i].Outline[0] if err := addFeed(c, userid, userOpml[i]); err != nil { c.Warningf("opml import error: %v", err.Error()) // todo: do something here? } c.Debugf("opml import: %s, %s", o.Title, o.XmlUrl) wg.Done() }(i) } wg.Wait() ud := UserData{Id: "data", Parent: gn.Key(&User{Id: userid})} if err := gn.RunInTransaction(func(gn *goon.Goon) error { gn.Get(&ud) mergeUserOpml(&ud, opml.Outline...) _, err := gn.Put(&ud) return err }, nil); err != nil { w.WriteHeader(http.StatusInternalServerError) c.Errorf("ude update error: %v", err.Error()) return } if len(userOpml) == IMPORT_LIMIT { task := taskqueue.NewPOSTTask(routeUrl("import-opml-task"), url.Values{ "key": {bk}, "user": {userid}, "skip": {strconv.Itoa(skip + IMPORT_LIMIT)}, }) taskqueue.Add(c, task, "import-reader") } }
func ImportReaderTask(c mpg.Context, w http.ResponseWriter, r *http.Request) { gn := goon.FromContext(c) userid := r.FormValue("user") bk := r.FormValue("key") fr := blobstore.NewReader(c, appengine.BlobKey(bk)) data, err := ioutil.ReadAll(fr) if err != nil { return } var skip int if s, err := strconv.Atoi(r.FormValue("skip")); err == nil { skip = s } v := struct { Subscriptions []struct { Id string `json:"id"` Title string `json:"title"` HtmlUrl string `json:"htmlUrl"` Categories []struct { Id string `json:"id"` Label string `json:"label"` } `json:"categories"` } `json:"subscriptions"` }{} json.Unmarshal(data, &v) c.Debugf("reader import for %v, skip %v, len %v", userid, skip, len(v.Subscriptions)) end := skip + IMPORT_LIMIT if end > len(v.Subscriptions) { end = len(v.Subscriptions) } wg := sync.WaitGroup{} wg.Add(end - skip) userOpml := make([]*OpmlOutline, end-skip) for i := range v.Subscriptions[skip:end] { go func(i int) { sub := v.Subscriptions[skip+i] var label string if len(sub.Categories) > 0 { label = sub.Categories[0].Label } outline := &OpmlOutline{ Title: label, Outline: []*OpmlOutline{ &OpmlOutline{ XmlUrl: sub.Id[5:], Title: sub.Title, }, }, } userOpml[i] = outline if err := addFeed(c, userid, outline); err != nil { c.Warningf("reader import error: %v", err.Error()) // todo: do something here? } c.Debugf("reader import: %s, %s", sub.Title, sub.Id) wg.Done() }(i) } wg.Wait() ud := UserData{Id: "data", Parent: gn.Key(&User{Id: userid})} if err := gn.RunInTransaction(func(gn *goon.Goon) error { gn.Get(&ud) mergeUserOpml(&ud, userOpml...) _, err := gn.Put(&ud) return err }, nil); err != nil { w.WriteHeader(http.StatusInternalServerError) c.Errorf("ude update error: %v", err.Error()) return } if end < len(v.Subscriptions) { task := taskqueue.NewPOSTTask(routeUrl("import-reader-task"), url.Values{ "key": {bk}, "user": {userid}, "skip": {strconv.Itoa(skip + IMPORT_LIMIT)}, }) taskqueue.Add(c, task, "import-reader") } else { blobstore.Delete(c, appengine.BlobKey(bk)) } }
func ListFeeds(c mpg.Context, w http.ResponseWriter, r *http.Request) { cu := user.Current(c) gn := goon.FromContext(c) u := &User{Id: cu.ID} ud := &UserData{Id: "data", Parent: gn.Key(u)} gn.GetMulti([]interface{}{u, ud}) put := false fixRead := false if time.Since(u.Read) > oldDuration { c.Warningf("u.Read too old, fixing: %v", u.Read) u.Read = time.Now().Add(-oldDuration) put = true fixRead = true c.Warningf("new: %v", u.Read) } read := make(Read) var uf Opml c.Step("unmarshal user data", func() { gob.NewDecoder(bytes.NewReader(ud.Read)).Decode(&read) json.Unmarshal(ud.Opml, &uf) }) var feeds []*Feed opmlMap := make(map[string]*OpmlOutline) var merr error c.Step("fetch feeds", func() { for _, outline := range uf.Outline { if outline.XmlUrl == "" { for _, so := range outline.Outline { feeds = append(feeds, &Feed{Url: so.XmlUrl}) opmlMap[so.XmlUrl] = so } } else { feeds = append(feeds, &Feed{Url: outline.XmlUrl}) opmlMap[outline.XmlUrl] = outline } } merr = gn.GetMulti(feeds) }) lock := sync.Mutex{} fl := make(map[string][]*Story) q := datastore.NewQuery(gn.Key(&Story{}).Kind()) hasStories := false updatedLinks := false icons := make(map[string]string) now := time.Now() numStories := 0 c.Step("feed fetch + wait", func() { queue := make(chan *Feed) wg := sync.WaitGroup{} feedProc := func() { for f := range queue { defer wg.Done() var stories []*Story if u.Read.Before(f.Date) { fk := gn.Key(f) sq := q.Ancestor(fk).Filter(IDX_COL+" >", u.Read).KeysOnly().Order("-" + IDX_COL) keys, _ := gn.GetAll(sq, nil) stories = make([]*Story, len(keys)) for j, key := range keys { stories[j] = &Story{ Id: key.StringID(), Parent: fk, } } gn.GetMulti(stories) } if f.Link != opmlMap[f.Url].HtmlUrl { updatedLinks = true opmlMap[f.Url].HtmlUrl = f.Link } if f.Errors == 0 && f.NextUpdate.Before(now) { t := taskqueue.NewPOSTTask(routeUrl("update-feed"), url.Values{ "feed": {f.Url}, }) if _, err := taskqueue.Add(c, t, "update-manual"); err != nil { c.Errorf("taskqueue error: %v", err.Error()) } else { c.Warningf("manual feed update: %v", f.Url) } } f.Subscribe(c) lock.Lock() fl[f.Url] = stories numStories += len(stories) if len(stories) > 0 { hasStories = true } if f.Image != "" { icons[f.Url] = f.Image } lock.Unlock() } } for i := 0; i < 20; i++ { go feedProc() } for i, f := range feeds { if goon.NotFound(merr, i) { continue } wg.Add(1) queue <- f } close(queue) wg.Wait() }) if numStories > numStoriesLimit { c.Step("numStories", func() { c.Errorf("too many stories: %v", numStories) stories := make([]*Story, 0, numStories) for _, v := range fl { stories = append(stories, v...) } sort.Sort(sort.Reverse(Stories(stories))) last := stories[numStoriesLimit].Created stories = stories[:numStoriesLimit] u.Read = last put = true fixRead = true fl = make(map[string][]*Story) for _, s := range stories { fk := s.Parent.StringID() p := fl[fk] fl[fk] = append(p, s) } c.Errorf("filtered: %v, %v", len(stories), last) }) } if fixRead { c.Step("fix read", func() { nread := make(Read) for k, v := range fl { for _, s := range v { rs := readStory{Feed: k, Story: s.Id} if read[rs] { nread[rs] = true } } } c.Errorf("fix read: %v -> %v", len(read), len(nread)) read = nread var b bytes.Buffer gob.NewEncoder(&b).Encode(&read) ud.Read = b.Bytes() put = true }) } for k, v := range fl { newStories := make([]*Story, 0, len(v)) for _, s := range v { if !read[readStory{Feed: k, Story: s.Id}] { newStories = append(newStories, s) } } fl[k] = newStories } if !hasStories { var last time.Time for _, f := range feeds { if last.Before(f.Date) { last = f.Date } } if u.Read.Before(last) { c.Debugf("setting %v read to %v", cu.ID, last) put = true u.Read = last ud.Read = nil } } if updatedLinks { ud.Opml, _ = json.Marshal(&uf) put = true } if put { gn.PutMany(u, ud) } c.Step("json marshal", func() { o := struct { Opml []*OpmlOutline Stories map[string][]*Story Icons map[string]string Options string }{ Opml: uf.Outline, Stories: fl, Icons: icons, Options: u.Options, } b, err := json.Marshal(o) if err != nil { c.Errorf("cleaning") for _, v := range fl { for _, s := range v { n := cleanNonUTF8(s.Summary) if n != s.Summary { s.Summary = n c.Errorf("cleaned %v", s.Id) gn.Put(s) } } } b, _ = json.Marshal(o) } w.Write(b) }) }
func ListFeeds(c mpg.Context, w http.ResponseWriter, r *http.Request) { cu := user.Current(c) gn := goon.FromContext(c) u := &User{Id: cu.ID} ud := &UserData{Id: "data", Parent: gn.Key(u)} gn.GetMulti([]interface{}{u, ud}) read := make(Read) var uf Opml c.Step("unmarshal user data", func() { json.Unmarshal(ud.Read, &read) json.Unmarshal(ud.Opml, &uf) }) var feeds []*Feed opmlMap := make(map[string]*OpmlOutline) c.Step("fetch feeds", func() { for _, outline := range uf.Outline { if outline.XmlUrl == "" { for _, so := range outline.Outline { feeds = append(feeds, &Feed{Url: so.XmlUrl}) opmlMap[so.XmlUrl] = so } } else { feeds = append(feeds, &Feed{Url: outline.XmlUrl}) opmlMap[outline.XmlUrl] = outline } } gn.GetMulti(feeds) }) lock := sync.Mutex{} fl := make(map[string][]*Story) q := datastore.NewQuery(gn.Key(&Story{}).Kind()) hasStories := false updatedLinks := false icons := make(map[string]string) now := time.Now() c.Step("feed fetch + wait", func() { queue := make(chan *Feed) wg := sync.WaitGroup{} feedProc := func() { for f := range queue { defer wg.Done() var newStories []*Story if u.Read.Before(f.Date) { c.Debugf("query for %v", f.Url) fk := gn.Key(f) sq := q.Ancestor(fk).Filter("p >", u.Read).KeysOnly().Order("-p") keys, _ := gn.GetAll(sq, nil) stories := make([]*Story, len(keys)) for j, key := range keys { stories[j] = &Story{ Id: key.StringID(), Parent: fk, } } gn.GetMulti(stories) for _, st := range stories { found := false for _, s := range read[f.Url] { if s == st.Id { found = true break } } if !found { newStories = append(newStories, st) } } } if f.Link != opmlMap[f.Url].HtmlUrl { updatedLinks = true opmlMap[f.Url].HtmlUrl = f.Link } if f.Errors == 0 && f.NextUpdate.Before(now) { t := taskqueue.NewPOSTTask(routeUrl("update-feed"), url.Values{ "feed": {f.Url}, }) if _, err := taskqueue.Add(c, t, "update-manual"); err != nil { c.Errorf("taskqueue error: %v", err.Error()) } else { c.Warningf("manual feed update: %v", f.Url) } } lock.Lock() fl[f.Url] = newStories if len(newStories) > 0 { hasStories = true } if f.Image != "" { icons[f.Url] = f.Image } lock.Unlock() } } for i := 0; i < 20; i++ { go feedProc() } wg.Add(len(feeds)) for _, f := range feeds { queue <- f } close(queue) wg.Wait() }) if !hasStories { var last time.Time for _, f := range feeds { if last.Before(f.Date) { last = f.Date } } if u.Read.Before(last) { c.Debugf("setting %v read to %v", cu.ID, last) u.Read = last ud.Read = nil gn.PutMany(u, ud) } } if updatedLinks { ud.Opml, _ = json.Marshal(&uf) gn.Put(ud) } c.Step("json marshal", func() { b, _ := json.Marshal(struct { Opml []*OpmlOutline Stories map[string][]*Story Icons map[string]string Options string }{ Opml: uf.Outline, Stories: fl, Icons: icons, Options: u.Options, }) w.Write(b) }) }