func UpdateFeed(c mpg.Context, w http.ResponseWriter, r *http.Request) { gn := goon.FromContext(appengine.Timeout(c, time.Minute)) url := r.FormValue("feed") if url == "" { c.Errorf("empty update feed") return } c.Debugf("update feed %s", url) last := len(r.FormValue("last")) > 0 f := Feed{Url: url} s := "" defer func() { gn.Put(&Log{ Parent: gn.Key(&f), Id: time.Now().UnixNano(), Text: "UpdateFeed - " + s, }) }() if err := gn.Get(&f); err == datastore.ErrNoSuchEntity { c.Errorf("no such entity - " + url) s += "NSE" return } else if err != nil { s += "err - " + err.Error() return } else if last { // noop } else if time.Now().Before(f.NextUpdate) { c.Errorf("feed %v already updated: %v", url, f.NextUpdate) s += "already updated" return } feedError := func(err error) { s += "feed err - " + err.Error() f.Errors++ v := f.Errors + 1 const max = 24 * 7 if v > max { v = max } else if f.Errors == 1 { v = 0 } f.NextUpdate = time.Now().Add(time.Hour * time.Duration(v)) gn.Put(&f) c.Warningf("error with %v (%v), bump next update to %v, %v", url, f.Errors, f.NextUpdate, err) } if feed, stories, err := fetchFeed(c, f.Url, f.Url); err == nil { if err := updateFeed(c, f.Url, feed, stories, false, false, last); err != nil { feedError(err) } else { s += "success" } } else { feedError(err) } f.Subscribe(c) }
func ImportOpmlTask(c mpg.Context, w http.ResponseWriter, r *http.Request) { gn := goon.FromContext(c) userid := r.FormValue("user") bk := r.FormValue("key") del := func() { blobstore.Delete(c, appengine.BlobKey(bk)) } var skip int if s, err := strconv.Atoi(r.FormValue("skip")); err == nil { skip = s } c.Debugf("reader import for %v, skip %v", userid, skip) d := xml.NewDecoder(blobstore.NewReader(c, appengine.BlobKey(bk))) d.CharsetReader = charset.NewReader d.Strict = false opml := Opml{} err := d.Decode(&opml) if err != nil { del() c.Warningf("gob decode failed: %v", err.Error()) return } remaining := skip var userOpml []*OpmlOutline var proc func(label string, outlines []*OpmlOutline) proc = func(label string, outlines []*OpmlOutline) { for _, o := range outlines { if o.Title == "" { o.Title = o.Text } if o.XmlUrl != "" { if remaining > 0 { remaining-- } else if len(userOpml) < IMPORT_LIMIT { userOpml = append(userOpml, &OpmlOutline{ Title: label, Outline: []*OpmlOutline{o}, }) } } if o.Title != "" && len(o.Outline) > 0 { proc(o.Title, o.Outline) } } } proc("", opml.Outline) // todo: refactor below with similar from ImportReaderTask wg := sync.WaitGroup{} wg.Add(len(userOpml)) for i := range userOpml { go func(i int) { o := userOpml[i].Outline[0] if err := addFeed(c, userid, userOpml[i]); err != nil { c.Warningf("opml import error: %v", err.Error()) // todo: do something here? } c.Debugf("opml import: %s, %s", o.Title, o.XmlUrl) wg.Done() }(i) } wg.Wait() ud := UserData{Id: "data", Parent: gn.Key(&User{Id: userid})} if err := gn.RunInTransaction(func(gn *goon.Goon) error { gn.Get(&ud) if err := mergeUserOpml(c, &ud, userOpml...); err != nil { return err } _, err := gn.Put(&ud) return err }, nil); err != nil { w.WriteHeader(http.StatusInternalServerError) c.Errorf("ude update error: %v", err.Error()) return } if len(userOpml) == IMPORT_LIMIT { task := taskqueue.NewPOSTTask(routeUrl("import-opml-task"), url.Values{ "key": {bk}, "user": {userid}, "skip": {strconv.Itoa(skip + IMPORT_LIMIT)}, }) taskqueue.Add(c, task, "import-reader") } else { del() c.Infof("opml import done: %v", userid) } }
func fetchFeed(c mpg.Context, origUrl, fetchUrl string) (*Feed, []*Story, error) { u, err := url.Parse(fetchUrl) if err != nil { return nil, nil, err } if u.Host == "" { u.Host = u.Path u.Path = "" } const clURL = "craigslist.org" if strings.HasSuffix(u.Host, clURL) || u.Host == clURL { return nil, nil, fmt.Errorf("Craigslist blocks our server host: not possible to subscribe") } if u.Scheme == "" { u.Scheme = "http" origUrl = u.String() fetchUrl = origUrl if origUrl == "" { return nil, nil, fmt.Errorf("bad URL") } } cl := &http.Client{ Transport: &urlfetch.Transport{ Context: c, Deadline: time.Minute, }, } if resp, err := cl.Get(fetchUrl); err == nil && resp.StatusCode == http.StatusOK { const sz = 1 << 21 reader := &io.LimitedReader{R: resp.Body, N: sz} defer resp.Body.Close() b, err := ioutil.ReadAll(reader) if err != nil { return nil, nil, err } if reader.N == 0 { return nil, nil, fmt.Errorf("feed larger than %d bytes", sz) } if autoUrl, err := Autodiscover(b); err == nil && origUrl == fetchUrl { if autoU, err := url.Parse(autoUrl); err == nil { if autoU.Scheme == "" { autoU.Scheme = u.Scheme } if autoU.Host == "" { autoU.Host = u.Host } autoUrl = autoU.String() } if autoUrl != fetchUrl { return fetchFeed(c, origUrl, autoUrl) } } return ParseFeed(c, resp.Header.Get("Content-Type"), origUrl, fetchUrl, b) } else if err != nil { c.Warningf("fetch feed error: %v", err) return nil, nil, fmt.Errorf("Could not fetch feed") } else { c.Warningf("fetch feed error: status code: %s", resp.Status) return nil, nil, fmt.Errorf("Bad response code from server") } }