Exemple #1
0
func UpdateFeed(c mpg.Context, w http.ResponseWriter, r *http.Request) {
	gn := goon.FromContext(c)
	url := r.FormValue("feed")
	c.Debugf("update feed %s", url)
	f := Feed{Url: url}
	if err := gn.Get(&f); err == datastore.ErrNoSuchEntity {
		return
	} else if time.Now().Before(f.NextUpdate) {
		c.Infof("feed %v already updated", url)
		return
	}
	if feed, stories := fetchFeed(c, url, url); feed != nil {
		updateFeed(c, url, feed, stories)
	} else {
		f.Errors++
		v := f.Errors + 1
		const max = 24 * 7
		if v > max {
			v = max
		} else if f.Errors == 1 {
			v = 0
		}
		f.NextUpdate = time.Now().Add(time.Hour * time.Duration(v))
		gn.Put(&f)
		c.Warningf("error with %v (%v), bump next update to %v", url, f.Errors, f.NextUpdate)
	}
}
Exemple #2
0
func UpdateFeed(c mpg.Context, w http.ResponseWriter, r *http.Request) {
	gn := goon.FromContext(appengine.Timeout(c, time.Minute))
	url := r.FormValue("feed")
	if url == "" {
		c.Errorf("empty update feed")
		return
	}
	c.Debugf("update feed %s", url)
	last := len(r.FormValue("last")) > 0
	f := Feed{Url: url}
	s := ""
	defer func() {
		gn.Put(&Log{
			Parent: gn.Key(&f),
			Id:     time.Now().UnixNano(),
			Text:   "UpdateFeed - " + s,
		})
	}()
	if err := gn.Get(&f); err == datastore.ErrNoSuchEntity {
		c.Errorf("no such entity - " + url)
		s += "NSE"
		return
	} else if err != nil {
		s += "err - " + err.Error()
		return
	} else if last {
		// noop
	}
	if time.Now().Before(f.NextUpdate) {
		c.Errorf("feed %v already updated: %v", url, f.NextUpdate)
		s += "already updated"
		return
	}

	feedError := func(err error) {
		s += "feed err - " + err.Error()
		f.Errors++
		v := f.Errors + 1
		const max = 24 * 7
		if v > max {
			v = max
		} else if f.Errors == 1 {
			v = 0
		}
		f.NextUpdate = time.Now().Add(time.Hour * time.Duration(v))
		gn.Put(&f)
		c.Warningf("error with %v (%v), bump next update to %v, %v", url, f.Errors, f.NextUpdate, err)
	}

	if feed, stories, err := fetchFeed(c, f.Url, f.Url); err == nil {
		if err := updateFeed(c, f.Url, feed, stories, false, false, last); err != nil {
			feedError(err)
		} else {
			s += "success"
		}
	} else {
		feedError(err)
	}
	f.Subscribe(c)
}
Exemple #3
0
func fetchFeed(c mpg.Context, origUrl, fetchUrl string) (*Feed, []*Story, error) {
	u, err := url.Parse(fetchUrl)
	if err != nil {
		return nil, nil, err
	}
	if u.Host == "" {
		u.Host = u.Path
		u.Path = ""
	}
	const clURL = "craigslist.org"
	if strings.HasSuffix(u.Host, clURL) || u.Host == clURL {
		return nil, nil, fmt.Errorf("Craigslist blocks our server host: not possible to subscribe")
	}
	if u.Scheme == "" {
		u.Scheme = "http"
		origUrl = u.String()
		fetchUrl = origUrl
		if origUrl == "" {
			return nil, nil, fmt.Errorf("bad URL")
		}
	}

	cl := &http.Client{
		Transport: &urlfetch.Transport{
			Context:  c,
			Deadline: time.Minute,
		},
	}
	if resp, err := cl.Get(fetchUrl); err == nil && resp.StatusCode == http.StatusOK {
		reader := io.LimitReader(resp.Body, 1<<21)
		defer resp.Body.Close()
		b, _ := ioutil.ReadAll(reader)
		if autoUrl, err := Autodiscover(b); err == nil && origUrl == fetchUrl {
			if autoU, err := url.Parse(autoUrl); err == nil {
				if autoU.Scheme == "" {
					autoU.Scheme = u.Scheme
				}
				if autoU.Host == "" {
					autoU.Host = u.Host
				}
				autoUrl = autoU.String()
			}
			if autoUrl != fetchUrl {
				return fetchFeed(c, origUrl, autoUrl)
			}
		}
		return ParseFeed(c, origUrl, fetchUrl, b)
	} else if err != nil {
		c.Warningf("fetch feed error: %v", err)
		return nil, nil, fmt.Errorf("Could not fetch feed")
	} else {
		c.Warningf("fetch feed error: status code: %s", resp.Status)
		return nil, nil, fmt.Errorf("Bad response code from server")
	}
}
Exemple #4
0
func SubscribeCallback(c mpg.Context, w http.ResponseWriter, r *http.Request) {
	gn := goon.FromContext(c)
	furl := r.FormValue("feed")
	oldURL := false
	if len(furl) == 0 {
		vars := mux.Vars(r)
		furl = vars["feed"]
		oldURL = true
	}
	b, _ := base64.URLEncoding.DecodeString(furl)
	f := Feed{Url: string(b)}
	c.Infof("url: %v", f.Url)
	if err := gn.Get(&f); err != nil {
		http.Error(w, "", http.StatusNotFound)
		return
	}
	if r.Method == "GET" {
		if oldURL {
			c.Warningf("old url")
			http.Error(w, "", http.StatusNotFound)
			return
		}
		if f.NotViewed() || r.FormValue("hub.mode") != "subscribe" || r.FormValue("hub.topic") != f.Url {
			http.Error(w, "", http.StatusNotFound)
			return
		}
		w.Write([]byte(r.FormValue("hub.challenge")))
		i, _ := strconv.Atoi(r.FormValue("hub.lease_seconds"))
		f.Subscribed = time.Now().Add(time.Second * time.Duration(i))
		gn.Put(&f)
		c.Debugf("subscribed: %v - %v", f.Url, f.Subscribed)
		return
	} else if !f.NotViewed() {
		c.Infof("push: %v", f.Url)
		defer r.Body.Close()
		b, _ := ioutil.ReadAll(r.Body)
		nf, ss, err := ParseFeed(c, f.Url, b)
		if err != nil {
			c.Errorf("parse error: %v", err)
			return
		}
		if err := updateFeed(c, f.Url, nf, ss, false, true, false); err != nil {
			c.Errorf("push error: %v", err)
		}
	} else {
		c.Infof("not viewed")
	}
}
Exemple #5
0
func fetchFeed(c mpg.Context, origUrl, fetchUrl string) (*Feed, []*Story) {
	u, err := url.Parse(fetchUrl)
	_orig := origUrl
	if err == nil && u.Scheme == "" {
		u.Scheme = "http"
		origUrl = u.String()
		fetchUrl = origUrl
		if origUrl == "" {
			c.Criticalf("badurl1: %v, %v, %v, %v", _orig, u, origUrl, fetchUrl)
			return nil, nil
		}
	}
	if strings.TrimSpace(origUrl) == "" {
		c.Criticalf("badurl2: %v, %v", _orig, origUrl)
		return nil, nil
	}

	cl := &http.Client{
		Transport: &urlfetch.Transport{
			Context:  c,
			Deadline: time.Minute,
		},
	}
	if resp, err := cl.Get(fetchUrl); err == nil && resp.StatusCode == http.StatusOK {
		defer resp.Body.Close()
		b, _ := ioutil.ReadAll(resp.Body)
		if autoUrl, err := Autodiscover(b); err == nil && origUrl == fetchUrl {
			if autoU, err := url.Parse(autoUrl); err == nil {
				if autoU.Scheme == "" {
					autoU.Scheme = u.Scheme
				}
				if autoU.Host == "" {
					autoU.Host = u.Host
				}
				autoUrl = autoU.String()
			}
			return fetchFeed(c, origUrl, autoUrl)
		}
		return ParseFeed(c, origUrl, b)
	} else if err != nil {
		c.Warningf("fetch feed error: %s", err.Error())
	} else {
		c.Warningf("fetch feed error: status code: %s", resp.Status)
	}
	return nil, nil
}
Exemple #6
0
func UpdateFeed(c mpg.Context, w http.ResponseWriter, r *http.Request) {
	gn := goon.FromContext(c)
	url := r.FormValue("feed")
	c.Debugf("update feed %s", url)
	last := len(r.FormValue("last")) > 0
	f := Feed{Url: url}
	if err := gn.Get(&f); err == datastore.ErrNoSuchEntity {
		c.Errorf("no such entity")
		return
	} else if err != nil {
		return
	} else if last {
		// noop
	} else if time.Now().Before(f.NextUpdate) {
		c.Infof("feed %v already updated: %v", url, f.NextUpdate)
		return
	}
	f.Subscribe(c)

	feedError := func(err error) {
		f.Errors++
		v := f.Errors + 1
		const max = 24 * 7
		if v > max {
			v = max
		} else if f.Errors == 1 {
			v = 0
		}
		f.NextUpdate = time.Now().Add(time.Hour * time.Duration(v))
		gn.Put(&f)
		c.Warningf("error with %v (%v), bump next update to %v, %v", url, f.Errors, f.NextUpdate, err)
	}

	if feed, stories, err := fetchFeed(c, f.Url, f.Url); err == nil {
		if err := updateFeed(c, f.Url, feed, stories, false, false, last); err != nil {
			feedError(err)
		}
	} else {
		feedError(err)
	}
}
Exemple #7
0
func ImportOpmlTask(c mpg.Context, w http.ResponseWriter, r *http.Request) {
	gn := goon.FromContext(c)
	userid := r.FormValue("user")
	bk := r.FormValue("key")
	fr := blobstore.NewReader(c, appengine.BlobKey(bk))
	data, err := ioutil.ReadAll(fr)
	if err != nil {
		return
	}

	var skip int
	if s, err := strconv.Atoi(r.FormValue("skip")); err == nil {
		skip = s
	}
	c.Debugf("reader import for %v, skip %v", userid, skip)

	var userOpml []*OpmlOutline
	remaining := skip

	var proc func(label string, outlines []*OpmlOutline)
	proc = func(label string, outlines []*OpmlOutline) {
		for _, o := range outlines {
			if o.XmlUrl != "" {
				if remaining > 0 {
					remaining--
				} else if len(userOpml) < IMPORT_LIMIT {
					userOpml = append(userOpml, &OpmlOutline{
						Title:   label,
						Outline: []*OpmlOutline{o},
					})
				}
			}

			if o.Title != "" && len(o.Outline) > 0 {
				proc(o.Title, o.Outline)
			}
		}
	}

	opml := Opml{}
	if err := xml.Unmarshal(data, &opml); err != nil {
		c.Errorf("opml error: %v", err.Error())
		return
	}
	proc("", opml.Outline)

	// todo: refactor below with similar from ImportReaderTask
	wg := sync.WaitGroup{}
	wg.Add(len(userOpml))
	for i := range userOpml {
		go func(i int) {
			o := userOpml[i].Outline[0]
			if err := addFeed(c, userid, userOpml[i]); err != nil {
				c.Warningf("opml import error: %v", err.Error())
				// todo: do something here?
			}
			c.Debugf("opml import: %s, %s", o.Title, o.XmlUrl)
			wg.Done()
		}(i)
	}
	wg.Wait()

	ud := UserData{Id: "data", Parent: gn.Key(&User{Id: userid})}
	if err := gn.RunInTransaction(func(gn *goon.Goon) error {
		gn.Get(&ud)
		mergeUserOpml(&ud, opml.Outline...)
		_, err := gn.Put(&ud)
		return err
	}, nil); err != nil {
		w.WriteHeader(http.StatusInternalServerError)
		c.Errorf("ude update error: %v", err.Error())
		return
	}

	if len(userOpml) == IMPORT_LIMIT {
		task := taskqueue.NewPOSTTask(routeUrl("import-opml-task"), url.Values{
			"key":  {bk},
			"user": {userid},
			"skip": {strconv.Itoa(skip + IMPORT_LIMIT)},
		})
		taskqueue.Add(c, task, "import-reader")
	}
}
Exemple #8
0
func ImportReaderTask(c mpg.Context, w http.ResponseWriter, r *http.Request) {
	gn := goon.FromContext(c)
	userid := r.FormValue("user")
	bk := r.FormValue("key")
	fr := blobstore.NewReader(c, appengine.BlobKey(bk))
	data, err := ioutil.ReadAll(fr)
	if err != nil {
		return
	}

	var skip int
	if s, err := strconv.Atoi(r.FormValue("skip")); err == nil {
		skip = s
	}

	v := struct {
		Subscriptions []struct {
			Id         string `json:"id"`
			Title      string `json:"title"`
			HtmlUrl    string `json:"htmlUrl"`
			Categories []struct {
				Id    string `json:"id"`
				Label string `json:"label"`
			} `json:"categories"`
		} `json:"subscriptions"`
	}{}
	json.Unmarshal(data, &v)
	c.Debugf("reader import for %v, skip %v, len %v", userid, skip, len(v.Subscriptions))

	end := skip + IMPORT_LIMIT
	if end > len(v.Subscriptions) {
		end = len(v.Subscriptions)
	}

	wg := sync.WaitGroup{}
	wg.Add(end - skip)
	userOpml := make([]*OpmlOutline, end-skip)

	for i := range v.Subscriptions[skip:end] {
		go func(i int) {
			sub := v.Subscriptions[skip+i]
			var label string
			if len(sub.Categories) > 0 {
				label = sub.Categories[0].Label
			}
			outline := &OpmlOutline{
				Title: label,
				Outline: []*OpmlOutline{
					&OpmlOutline{
						XmlUrl: sub.Id[5:],
						Title:  sub.Title,
					},
				},
			}
			userOpml[i] = outline
			if err := addFeed(c, userid, outline); err != nil {
				c.Warningf("reader import error: %v", err.Error())
				// todo: do something here?
			}
			c.Debugf("reader import: %s, %s", sub.Title, sub.Id)
			wg.Done()
		}(i)
	}
	wg.Wait()

	ud := UserData{Id: "data", Parent: gn.Key(&User{Id: userid})}
	if err := gn.RunInTransaction(func(gn *goon.Goon) error {
		gn.Get(&ud)
		mergeUserOpml(&ud, userOpml...)
		_, err := gn.Put(&ud)
		return err
	}, nil); err != nil {
		w.WriteHeader(http.StatusInternalServerError)
		c.Errorf("ude update error: %v", err.Error())
		return
	}

	if end < len(v.Subscriptions) {
		task := taskqueue.NewPOSTTask(routeUrl("import-reader-task"), url.Values{
			"key":  {bk},
			"user": {userid},
			"skip": {strconv.Itoa(skip + IMPORT_LIMIT)},
		})
		taskqueue.Add(c, task, "import-reader")
	} else {
		blobstore.Delete(c, appengine.BlobKey(bk))
	}
}
Exemple #9
0
func ListFeeds(c mpg.Context, w http.ResponseWriter, r *http.Request) {
	cu := user.Current(c)
	gn := goon.FromContext(c)
	u := &User{Id: cu.ID}
	ud := &UserData{Id: "data", Parent: gn.Key(u)}
	gn.GetMulti([]interface{}{u, ud})
	put := false
	fixRead := false
	if time.Since(u.Read) > oldDuration {
		c.Warningf("u.Read too old, fixing: %v", u.Read)
		u.Read = time.Now().Add(-oldDuration)
		put = true
		fixRead = true
		c.Warningf("new: %v", u.Read)
	}

	read := make(Read)
	var uf Opml
	c.Step("unmarshal user data", func() {
		gob.NewDecoder(bytes.NewReader(ud.Read)).Decode(&read)
		json.Unmarshal(ud.Opml, &uf)
	})
	var feeds []*Feed
	opmlMap := make(map[string]*OpmlOutline)
	var merr error
	c.Step("fetch feeds", func() {
		for _, outline := range uf.Outline {
			if outline.XmlUrl == "" {
				for _, so := range outline.Outline {
					feeds = append(feeds, &Feed{Url: so.XmlUrl})
					opmlMap[so.XmlUrl] = so
				}
			} else {
				feeds = append(feeds, &Feed{Url: outline.XmlUrl})
				opmlMap[outline.XmlUrl] = outline
			}
		}
		merr = gn.GetMulti(feeds)
	})
	lock := sync.Mutex{}
	fl := make(map[string][]*Story)
	q := datastore.NewQuery(gn.Key(&Story{}).Kind())
	hasStories := false
	updatedLinks := false
	icons := make(map[string]string)
	now := time.Now()
	numStories := 0

	c.Step("feed fetch + wait", func() {
		queue := make(chan *Feed)
		wg := sync.WaitGroup{}
		feedProc := func() {
			for f := range queue {
				defer wg.Done()
				var stories []*Story

				if u.Read.Before(f.Date) {
					fk := gn.Key(f)
					sq := q.Ancestor(fk).Filter(IDX_COL+" >", u.Read).KeysOnly().Order("-" + IDX_COL)
					keys, _ := gn.GetAll(sq, nil)
					stories = make([]*Story, len(keys))
					for j, key := range keys {
						stories[j] = &Story{
							Id:     key.StringID(),
							Parent: fk,
						}
					}
					gn.GetMulti(stories)
				}
				if f.Link != opmlMap[f.Url].HtmlUrl {
					updatedLinks = true
					opmlMap[f.Url].HtmlUrl = f.Link
				}
				if f.Errors == 0 && f.NextUpdate.Before(now) {
					t := taskqueue.NewPOSTTask(routeUrl("update-feed"), url.Values{
						"feed": {f.Url},
					})
					if _, err := taskqueue.Add(c, t, "update-manual"); err != nil {
						c.Errorf("taskqueue error: %v", err.Error())
					} else {
						c.Warningf("manual feed update: %v", f.Url)
					}
				}
				f.Subscribe(c)
				lock.Lock()
				fl[f.Url] = stories
				numStories += len(stories)
				if len(stories) > 0 {
					hasStories = true
				}
				if f.Image != "" {
					icons[f.Url] = f.Image
				}
				lock.Unlock()
			}
		}
		for i := 0; i < 20; i++ {
			go feedProc()
		}
		for i, f := range feeds {
			if goon.NotFound(merr, i) {
				continue
			}
			wg.Add(1)
			queue <- f
		}
		close(queue)
		wg.Wait()
	})
	if numStories > numStoriesLimit {
		c.Step("numStories", func() {
			c.Errorf("too many stories: %v", numStories)
			stories := make([]*Story, 0, numStories)
			for _, v := range fl {
				stories = append(stories, v...)
			}
			sort.Sort(sort.Reverse(Stories(stories)))
			last := stories[numStoriesLimit].Created
			stories = stories[:numStoriesLimit]
			u.Read = last
			put = true
			fixRead = true
			fl = make(map[string][]*Story)
			for _, s := range stories {
				fk := s.Parent.StringID()
				p := fl[fk]
				fl[fk] = append(p, s)
			}
			c.Errorf("filtered: %v, %v", len(stories), last)
		})
	}
	if fixRead {
		c.Step("fix read", func() {
			nread := make(Read)
			for k, v := range fl {
				for _, s := range v {
					rs := readStory{Feed: k, Story: s.Id}
					if read[rs] {
						nread[rs] = true
					}
				}
			}
			c.Errorf("fix read: %v -> %v", len(read), len(nread))
			read = nread
			var b bytes.Buffer
			gob.NewEncoder(&b).Encode(&read)
			ud.Read = b.Bytes()
			put = true
		})
	}
	for k, v := range fl {
		newStories := make([]*Story, 0, len(v))
		for _, s := range v {
			if !read[readStory{Feed: k, Story: s.Id}] {
				newStories = append(newStories, s)
			}
		}
		fl[k] = newStories
	}
	if !hasStories {
		var last time.Time
		for _, f := range feeds {
			if last.Before(f.Date) {
				last = f.Date
			}
		}
		if u.Read.Before(last) {
			c.Debugf("setting %v read to %v", cu.ID, last)
			put = true
			u.Read = last
			ud.Read = nil
		}
	}
	if updatedLinks {
		ud.Opml, _ = json.Marshal(&uf)
		put = true
	}
	if put {
		gn.PutMany(u, ud)
	}
	c.Step("json marshal", func() {
		o := struct {
			Opml    []*OpmlOutline
			Stories map[string][]*Story
			Icons   map[string]string
			Options string
		}{
			Opml:    uf.Outline,
			Stories: fl,
			Icons:   icons,
			Options: u.Options,
		}
		b, err := json.Marshal(o)
		if err != nil {
			c.Errorf("cleaning")
			for _, v := range fl {
				for _, s := range v {
					n := cleanNonUTF8(s.Summary)
					if n != s.Summary {
						s.Summary = n
						c.Errorf("cleaned %v", s.Id)
						gn.Put(s)
					}
				}
			}
			b, _ = json.Marshal(o)
		}
		w.Write(b)
	})
}
Exemple #10
0
func ListFeeds(c mpg.Context, w http.ResponseWriter, r *http.Request) {
	cu := user.Current(c)
	gn := goon.FromContext(c)
	u := &User{Id: cu.ID}
	ud := &UserData{Id: "data", Parent: gn.Key(u)}
	gn.GetMulti([]interface{}{u, ud})

	read := make(Read)
	var uf Opml
	c.Step("unmarshal user data", func() {
		json.Unmarshal(ud.Read, &read)
		json.Unmarshal(ud.Opml, &uf)
	})
	var feeds []*Feed
	opmlMap := make(map[string]*OpmlOutline)
	c.Step("fetch feeds", func() {
		for _, outline := range uf.Outline {
			if outline.XmlUrl == "" {
				for _, so := range outline.Outline {
					feeds = append(feeds, &Feed{Url: so.XmlUrl})
					opmlMap[so.XmlUrl] = so
				}
			} else {
				feeds = append(feeds, &Feed{Url: outline.XmlUrl})
				opmlMap[outline.XmlUrl] = outline
			}
		}
		gn.GetMulti(feeds)
	})
	lock := sync.Mutex{}
	fl := make(map[string][]*Story)
	q := datastore.NewQuery(gn.Key(&Story{}).Kind())
	hasStories := false
	updatedLinks := false
	icons := make(map[string]string)
	now := time.Now()

	c.Step("feed fetch + wait", func() {
		queue := make(chan *Feed)
		wg := sync.WaitGroup{}
		feedProc := func() {
			for f := range queue {
				defer wg.Done()
				var newStories []*Story

				if u.Read.Before(f.Date) {
					c.Debugf("query for %v", f.Url)
					fk := gn.Key(f)
					sq := q.Ancestor(fk).Filter("p >", u.Read).KeysOnly().Order("-p")
					keys, _ := gn.GetAll(sq, nil)
					stories := make([]*Story, len(keys))
					for j, key := range keys {
						stories[j] = &Story{
							Id:     key.StringID(),
							Parent: fk,
						}
					}
					gn.GetMulti(stories)
					for _, st := range stories {
						found := false
						for _, s := range read[f.Url] {
							if s == st.Id {
								found = true
								break
							}
						}
						if !found {
							newStories = append(newStories, st)
						}
					}
				}
				if f.Link != opmlMap[f.Url].HtmlUrl {
					updatedLinks = true
					opmlMap[f.Url].HtmlUrl = f.Link
				}
				if f.Errors == 0 && f.NextUpdate.Before(now) {
					t := taskqueue.NewPOSTTask(routeUrl("update-feed"), url.Values{
						"feed": {f.Url},
					})
					if _, err := taskqueue.Add(c, t, "update-manual"); err != nil {
						c.Errorf("taskqueue error: %v", err.Error())
					} else {
						c.Warningf("manual feed update: %v", f.Url)
					}
				}
				lock.Lock()
				fl[f.Url] = newStories
				if len(newStories) > 0 {
					hasStories = true
				}
				if f.Image != "" {
					icons[f.Url] = f.Image
				}
				lock.Unlock()
			}
		}
		for i := 0; i < 20; i++ {
			go feedProc()
		}
		wg.Add(len(feeds))
		for _, f := range feeds {
			queue <- f
		}
		close(queue)
		wg.Wait()
	})
	if !hasStories {
		var last time.Time
		for _, f := range feeds {
			if last.Before(f.Date) {
				last = f.Date
			}
		}
		if u.Read.Before(last) {
			c.Debugf("setting %v read to %v", cu.ID, last)
			u.Read = last
			ud.Read = nil
			gn.PutMany(u, ud)
		}
	}
	if updatedLinks {
		ud.Opml, _ = json.Marshal(&uf)
		gn.Put(ud)
	}
	c.Step("json marshal", func() {
		b, _ := json.Marshal(struct {
			Opml    []*OpmlOutline
			Stories map[string][]*Story
			Icons   map[string]string
			Options string
		}{
			Opml:    uf.Outline,
			Stories: fl,
			Icons:   icons,
			Options: u.Options,
		})
		w.Write(b)
	})
}