func indexAllMembers(res *wcg.Response, req *wcg.Request, app *App) { var appCtx = lib.NewAppContextFromRequest(req) result := make(map[string][]string) crawler := ameblo.NewCrawler(appCtx.NewHttpClient()) for _, m := range app.Members { req.Logger.Debug("Crawling %s (%s)", m.BlogUrl, m.Name) entries, err := crawler.CrawlEntryList(m.BlogUrl) if err != nil { req.Logger.Error("An error occurred while crawling %s: %v", m.BlogUrl, err) continue } req.Logger.Debug("Found %d entries.", len(entries)) list := make([]string, 0) for _, e := range entries { e.Owner = m.Name list = append(list, e.Url) } if err := updateIndexes(appCtx, entries); err != nil { req.Logger.Error("Failed to update the entry: %v", err) } else { result[m.Name] = list } } // invalidate the cache time.Sleep(10 * time.Second) // TODO: wait for all indexes are updated on datastore. mc := appCtx.NewMemcacheDriver() for _, m := range app.Members { mckey := fmt.Sprintf(MC_KEY_HISTORY, app.Key, m.Name) mc.Delete(mckey) } res.WriteJson(result) }
func getVersion(res *wcg.Response, req *wcg.Request) { stats, _ := runtime.Stats(gae.NewContext(req)) res.WriteJson(map[string]interface{}{ "version": lib.APP_COMMIT, "timestamp": lib.APP_TIMESTAMP, "stats": stats, }) }
func indexSpecifiedMember(res *wcg.Response, req *wcg.Request, app *App) { var appCtx = lib.NewAppContextFromRequest(req) member, ok := app.Members[req.Param("member")] if !ok { lib.NotFound(res, req) return } num := wcg.ParseInt(req.Param("n"), 0, 0, wcg.ParseIntMax) if num == 0 { num = wcg.ParseIntMax } // Crawling crawler := ameblo.NewCrawler(appCtx.NewHttpClient()) prefix := strings.TrimSuffix(member.BlogUrl, ".html") // xxxx.html => xxxx-{num}.html entries := make([]*ameblo.AmebloEntry, 0) for i := 1; i < num; i += 1 { url := fmt.Sprintf("%s-%d.html", prefix, i) req.Logger.Info("Indexing from %s ... ", url) newentries, err := crawler.CrawlEntryList(url) if err != nil { lib.InternalError(res, req, err) return } if len(newentries) > 20 { panic(fmt.Errorf("Unexpected number of entries (%d) are returned during indexing.", len(newentries))) } if len(newentries) == 0 { break } if len(newentries) < 20 { entries = append(entries, newentries...) break } if len(entries) > 0 && entries[len(entries)-1].Url == newentries[len(newentries)-1].Url { break } entries = append(entries, newentries...) } // Save and return resutls results := make([]string, 0) for _, ent := range entries { ent.Owner = member.Name results = append(results, ent.Url) } if err := updateIndexes(appCtx, entries); err != nil { req.Logger.Error("Failed to update the entry: %v", err) lib.InternalError(res, req, err) // stopped. } else { time.Sleep(10 * time.Second) // TODO: wait for all indexes are updated on datastore. mc := appCtx.NewMemcacheDriver() mckey := fmt.Sprintf(MC_KEY_HISTORY, app.Key, member.Name) mc.Delete(mckey) res.WriteJson(results) } }
func crawl(res *wcg.Response, req *wcg.Request, member *ameblo.Member, app *App) { var appCtx = lib.NewAppContextFromRequest(req) var logger = appCtx.Logger var targets []*ameblo.AmebloEntry result := make([]string, 0) d := NewAmebloEntryDriver(appCtx) crawler := ameblo.NewCrawler(appCtx.NewHttpClient()) // prioritize the entries which are not crawled and are posted recently. q := d.NewQuery().Filter("CrawledAt =", time.Time{}).Order("PostAt").Limit(NUM_ENTRIES_TO_CRAWL_PER_CALL) if member != nil { q = q.Filter("Owner =", member.Name) } if _, err := q.GetAll(&targets); err != nil { lib.InternalError(res, req, err) return } // Crawl Contents for _, e := range targets { logger.Info("Crawling %s ... ", e.Url) if e1, err := crawler.CrawlEntry(e.Url); err != nil { logger.Warn("Failed to crawl %s, skipped: %v", e.Url, err) continue } else { if e1 == nil { logger.Warn("CrawlEntry returns nil entry for %s", e.Url) e.Content = "<No Content>" e.CrawledAt = time.Now() } else { logger.Debug("CrawlEntry scraped %d bytes.", len(e1.Content)) e.Content = e1.Content } result = append(result, e.Url) } } if err := updateContents(appCtx, targets, app.MemberList); err != nil { lib.InternalError(res, req, err) return } res.WriteJson(result) }
func historyInsights(res *wcg.Response, req *wcg.Request, app *App) { var appCtx = lib.NewAppContextFromRequest(req) member, ok := app.Members[req.Param("member")] if !ok { lib.NotFound(res, req) return } var insights amebloHistoryInsights mckey := fmt.Sprintf(MC_KEY_HISTORY, app.Key, member.Name) dent := NewAmebloEntryDriver(appCtx) dref := NewAmebloRefDriver(appCtx) mc := appCtx.NewMemcacheDriver() err := mc.CachedObject(mckey, &insights, func() (interface{}, error) { return getAmebloHistoryInsights(member, dent, dref) }, req.Query("force") == "1") if err != nil { lib.Error(res, req, err) return } res.WriteJson(insights) }
func listTvChannels(res *wcg.Response, req *wcg.Request) ([]*tv.TvChannel, error) { var list []*tv.TvChannel app := lib.GetCurrentApp(req) ctx := gae.NewContext(req) d := NewTvChannelDriver(app.Key, ctx, req.Logger) mc := memcache.NewDriver(ctx, req.Logger) err := mc.CachedObject(MC_KEY_CHANNELS, &list, func() (interface{}, error) { return d.AllAsList() }, req.Query("force") == "1") if err != nil { return nil, err } else { if len(list) == 0 { req.Logger.Warn("No channel is defined. Reset the configuraiton.") d.AddChannelList(defaultChannels) mc.Delete(MC_KEY_CHANNELS) mc.Set(MC_KEY_CHANNELS, defaultChannels) res.WriteJson(defaultChannels) return defaultChannels, nil } else { return list, nil } } }
// HTTP 200 OK func (cron *CronHelper) Ok(res *wcg.Response, req *wcg.Request) { res.WriteJson(map[string]interface{}{ "ok": true, }) }