Ejemplo n.º 1
0
func crawlSpecifiedMembers(res *wcg.Response, req *wcg.Request, app *App) {
	member, ok := app.Members[req.Param("member")]
	if !ok {
		lib.NotFound(res, req)
		return
	}
	crawl(res, req, member, app)
}
Ejemplo n.º 2
0
func indexSpecifiedMember(res *wcg.Response, req *wcg.Request, app *App) {
	var appCtx = lib.NewAppContextFromRequest(req)
	member, ok := app.Members[req.Param("member")]
	if !ok {
		lib.NotFound(res, req)
		return
	}

	num := wcg.ParseInt(req.Param("n"), 0, 0, wcg.ParseIntMax)
	if num == 0 {
		num = wcg.ParseIntMax
	}
	// Crawling
	crawler := ameblo.NewCrawler(appCtx.NewHttpClient())
	prefix := strings.TrimSuffix(member.BlogUrl, ".html") // xxxx.html => xxxx-{num}.html
	entries := make([]*ameblo.AmebloEntry, 0)
	for i := 1; i < num; i += 1 {
		url := fmt.Sprintf("%s-%d.html", prefix, i)
		req.Logger.Info("Indexing from %s ... ", url)
		newentries, err := crawler.CrawlEntryList(url)
		if err != nil {
			lib.InternalError(res, req, err)
			return
		}
		if len(newentries) > 20 {
			panic(fmt.Errorf("Unexpected number of entries (%d) are returned during indexing.", len(newentries)))
		}
		if len(newentries) == 0 {
			break
		}
		if len(newentries) < 20 {
			entries = append(entries, newentries...)
			break
		}
		if len(entries) > 0 && entries[len(entries)-1].Url == newentries[len(newentries)-1].Url {
			break
		}
		entries = append(entries, newentries...)
	}

	// Save and return resutls
	results := make([]string, 0)
	for _, ent := range entries {
		ent.Owner = member.Name
		results = append(results, ent.Url)
	}
	if err := updateIndexes(appCtx, entries); err != nil {
		req.Logger.Error("Failed to update the entry: %v", err)
		lib.InternalError(res, req, err) // stopped.
	} else {
		time.Sleep(10 * time.Second) // TODO: wait for all indexes are updated on datastore.
		mc := appCtx.NewMemcacheDriver()
		mckey := fmt.Sprintf(MC_KEY_HISTORY, app.Key, member.Name)
		mc.Delete(mckey)
		res.WriteJson(results)
	}
}
Ejemplo n.º 3
0
func setupApi(app *App) {
	app.Api.Get("/ameblo/insights/:member/history.json", func(res *wcg.Response, req *wcg.Request) {
		historyInsights(res, req, app)
	})
	app.Api.Get("/ameblo/indexes/", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) {
		indexAllMembers(res, req, app)
	}))
	app.Api.Get("/ameblo/indexes/:member.json", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) {
		indexSpecifiedMember(res, req, app)
	}))
	app.Api.Get("/ameblo/contents/", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) {
		crawlAllMembers(res, req, app)
	}))
	app.Api.Get("/ameblo/contents/:member.json", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) {
		crawlSpecifiedMembers(res, req, app)
	}))
	app.Api.Delete("/ameblo/contents/:member.json", lib.Admin.Required(
		func(res *wcg.Response, req *wcg.Request) {
			var appCtx = lib.NewAppContextFromRequest(req)

			member, ok := app.Members[req.Param("member")]
			if !ok {
				lib.NotFound(res, req)
				return
			}

			PER_ENT := 100
			offset := 0
			d := NewAmebloEntryDriver(appCtx)

			for {
				var list []*ameblo.AmebloEntry
				var q = d.NewQuery().Filter("Owner =", member.Name).Offset(offset).Limit(PER_ENT)
				if keys, err := q.GetAll(&list); err != nil {
					lib.InternalError(res, req, err)
					return
				} else {
					for _, ent := range list {
						ent.CrawledAt = time.Time{}
					}
					if _, err = d.PutMulti(keys, list); datastore.IsDatastoreError(err) {
						lib.InternalError(res, req, err)
					}
					offset = offset + len(list)
					if len(list) < PER_ENT {
						break
					}
				}
			}
			app.Api.Ok(res, req)
		},
	))
}
Ejemplo n.º 4
0
func historyInsights(res *wcg.Response, req *wcg.Request, app *App) {
	var appCtx = lib.NewAppContextFromRequest(req)
	member, ok := app.Members[req.Param("member")]
	if !ok {
		lib.NotFound(res, req)
		return
	}
	var insights amebloHistoryInsights
	mckey := fmt.Sprintf(MC_KEY_HISTORY, app.Key, member.Name)
	dent := NewAmebloEntryDriver(appCtx)
	dref := NewAmebloRefDriver(appCtx)
	mc := appCtx.NewMemcacheDriver()
	err := mc.CachedObject(mckey, &insights, func() (interface{}, error) {
		return getAmebloHistoryInsights(member, dent, dref)
	}, req.Query("force") == "1")
	if err != nil {
		lib.Error(res, req, err)
		return
	}
	res.WriteJson(insights)
}