func crawlSpecifiedMembers(res *wcg.Response, req *wcg.Request, app *App) { member, ok := app.Members[req.Param("member")] if !ok { lib.NotFound(res, req) return } crawl(res, req, member, app) }
func indexSpecifiedMember(res *wcg.Response, req *wcg.Request, app *App) { var appCtx = lib.NewAppContextFromRequest(req) member, ok := app.Members[req.Param("member")] if !ok { lib.NotFound(res, req) return } num := wcg.ParseInt(req.Param("n"), 0, 0, wcg.ParseIntMax) if num == 0 { num = wcg.ParseIntMax } // Crawling crawler := ameblo.NewCrawler(appCtx.NewHttpClient()) prefix := strings.TrimSuffix(member.BlogUrl, ".html") // xxxx.html => xxxx-{num}.html entries := make([]*ameblo.AmebloEntry, 0) for i := 1; i < num; i += 1 { url := fmt.Sprintf("%s-%d.html", prefix, i) req.Logger.Info("Indexing from %s ... ", url) newentries, err := crawler.CrawlEntryList(url) if err != nil { lib.InternalError(res, req, err) return } if len(newentries) > 20 { panic(fmt.Errorf("Unexpected number of entries (%d) are returned during indexing.", len(newentries))) } if len(newentries) == 0 { break } if len(newentries) < 20 { entries = append(entries, newentries...) break } if len(entries) > 0 && entries[len(entries)-1].Url == newentries[len(newentries)-1].Url { break } entries = append(entries, newentries...) } // Save and return resutls results := make([]string, 0) for _, ent := range entries { ent.Owner = member.Name results = append(results, ent.Url) } if err := updateIndexes(appCtx, entries); err != nil { req.Logger.Error("Failed to update the entry: %v", err) lib.InternalError(res, req, err) // stopped. } else { time.Sleep(10 * time.Second) // TODO: wait for all indexes are updated on datastore. mc := appCtx.NewMemcacheDriver() mckey := fmt.Sprintf(MC_KEY_HISTORY, app.Key, member.Name) mc.Delete(mckey) res.WriteJson(results) } }
func setupApi(app *App) { app.Api.Get("/ameblo/insights/:member/history.json", func(res *wcg.Response, req *wcg.Request) { historyInsights(res, req, app) }) app.Api.Get("/ameblo/indexes/", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) { indexAllMembers(res, req, app) })) app.Api.Get("/ameblo/indexes/:member.json", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) { indexSpecifiedMember(res, req, app) })) app.Api.Get("/ameblo/contents/", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) { crawlAllMembers(res, req, app) })) app.Api.Get("/ameblo/contents/:member.json", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) { crawlSpecifiedMembers(res, req, app) })) app.Api.Delete("/ameblo/contents/:member.json", lib.Admin.Required( func(res *wcg.Response, req *wcg.Request) { var appCtx = lib.NewAppContextFromRequest(req) member, ok := app.Members[req.Param("member")] if !ok { lib.NotFound(res, req) return } PER_ENT := 100 offset := 0 d := NewAmebloEntryDriver(appCtx) for { var list []*ameblo.AmebloEntry var q = d.NewQuery().Filter("Owner =", member.Name).Offset(offset).Limit(PER_ENT) if keys, err := q.GetAll(&list); err != nil { lib.InternalError(res, req, err) return } else { for _, ent := range list { ent.CrawledAt = time.Time{} } if _, err = d.PutMulti(keys, list); datastore.IsDatastoreError(err) { lib.InternalError(res, req, err) } offset = offset + len(list) if len(list) < PER_ENT { break } } } app.Api.Ok(res, req) }, )) }
func historyInsights(res *wcg.Response, req *wcg.Request, app *App) { var appCtx = lib.NewAppContextFromRequest(req) member, ok := app.Members[req.Param("member")] if !ok { lib.NotFound(res, req) return } var insights amebloHistoryInsights mckey := fmt.Sprintf(MC_KEY_HISTORY, app.Key, member.Name) dent := NewAmebloEntryDriver(appCtx) dref := NewAmebloRefDriver(appCtx) mc := appCtx.NewMemcacheDriver() err := mc.CachedObject(mckey, &insights, func() (interface{}, error) { return getAmebloHistoryInsights(member, dent, dref) }, req.Query("force") == "1") if err != nil { lib.Error(res, req, err) return } res.WriteJson(insights) }