Пример #1
0
func indexAllMembers(res *wcg.Response, req *wcg.Request, app *App) {
	var appCtx = lib.NewAppContextFromRequest(req)
	result := make(map[string][]string)
	crawler := ameblo.NewCrawler(appCtx.NewHttpClient())

	for _, m := range app.Members {
		req.Logger.Debug("Crawling %s (%s)", m.BlogUrl, m.Name)
		entries, err := crawler.CrawlEntryList(m.BlogUrl)
		if err != nil {
			req.Logger.Error("An error occurred while crawling %s: %v", m.BlogUrl, err)
			continue
		}
		req.Logger.Debug("Found %d entries.", len(entries))
		list := make([]string, 0)
		for _, e := range entries {
			e.Owner = m.Name
			list = append(list, e.Url)
		}
		if err := updateIndexes(appCtx, entries); err != nil {
			req.Logger.Error("Failed to update the entry: %v", err)
		} else {
			result[m.Name] = list
		}
	}

	// invalidate the cache
	time.Sleep(10 * time.Second) // TODO: wait for all indexes are updated on datastore.
	mc := appCtx.NewMemcacheDriver()
	for _, m := range app.Members {
		mckey := fmt.Sprintf(MC_KEY_HISTORY, app.Key, m.Name)
		mc.Delete(mckey)
	}
	res.WriteJson(result)
}
Пример #2
0
func getVersion(res *wcg.Response, req *wcg.Request) {
	stats, _ := runtime.Stats(gae.NewContext(req))
	res.WriteJson(map[string]interface{}{
		"version":   lib.APP_COMMIT,
		"timestamp": lib.APP_TIMESTAMP,
		"stats":     stats,
	})
}
Пример #3
0
func indexSpecifiedMember(res *wcg.Response, req *wcg.Request, app *App) {
	var appCtx = lib.NewAppContextFromRequest(req)
	member, ok := app.Members[req.Param("member")]
	if !ok {
		lib.NotFound(res, req)
		return
	}

	num := wcg.ParseInt(req.Param("n"), 0, 0, wcg.ParseIntMax)
	if num == 0 {
		num = wcg.ParseIntMax
	}
	// Crawling
	crawler := ameblo.NewCrawler(appCtx.NewHttpClient())
	prefix := strings.TrimSuffix(member.BlogUrl, ".html") // xxxx.html => xxxx-{num}.html
	entries := make([]*ameblo.AmebloEntry, 0)
	for i := 1; i < num; i += 1 {
		url := fmt.Sprintf("%s-%d.html", prefix, i)
		req.Logger.Info("Indexing from %s ... ", url)
		newentries, err := crawler.CrawlEntryList(url)
		if err != nil {
			lib.InternalError(res, req, err)
			return
		}
		if len(newentries) > 20 {
			panic(fmt.Errorf("Unexpected number of entries (%d) are returned during indexing.", len(newentries)))
		}
		if len(newentries) == 0 {
			break
		}
		if len(newentries) < 20 {
			entries = append(entries, newentries...)
			break
		}
		if len(entries) > 0 && entries[len(entries)-1].Url == newentries[len(newentries)-1].Url {
			break
		}
		entries = append(entries, newentries...)
	}

	// Save and return resutls
	results := make([]string, 0)
	for _, ent := range entries {
		ent.Owner = member.Name
		results = append(results, ent.Url)
	}
	if err := updateIndexes(appCtx, entries); err != nil {
		req.Logger.Error("Failed to update the entry: %v", err)
		lib.InternalError(res, req, err) // stopped.
	} else {
		time.Sleep(10 * time.Second) // TODO: wait for all indexes are updated on datastore.
		mc := appCtx.NewMemcacheDriver()
		mckey := fmt.Sprintf(MC_KEY_HISTORY, app.Key, member.Name)
		mc.Delete(mckey)
		res.WriteJson(results)
	}
}
Пример #4
0
func crawl(res *wcg.Response, req *wcg.Request, member *ameblo.Member, app *App) {
	var appCtx = lib.NewAppContextFromRequest(req)
	var logger = appCtx.Logger
	var targets []*ameblo.AmebloEntry
	result := make([]string, 0)

	d := NewAmebloEntryDriver(appCtx)
	crawler := ameblo.NewCrawler(appCtx.NewHttpClient())

	// prioritize the entries which are not crawled and are posted recently.
	q := d.NewQuery().Filter("CrawledAt =", time.Time{}).Order("PostAt").Limit(NUM_ENTRIES_TO_CRAWL_PER_CALL)
	if member != nil {
		q = q.Filter("Owner =", member.Name)
	}

	if _, err := q.GetAll(&targets); err != nil {
		lib.InternalError(res, req, err)
		return
	}

	// Crawl Contents
	for _, e := range targets {
		logger.Info("Crawling %s ... ", e.Url)
		if e1, err := crawler.CrawlEntry(e.Url); err != nil {
			logger.Warn("Failed to crawl %s, skipped: %v", e.Url, err)
			continue
		} else {
			if e1 == nil {
				logger.Warn("CrawlEntry returns nil entry for %s", e.Url)
				e.Content = "<No Content>"
				e.CrawledAt = time.Now()
			} else {
				logger.Debug("CrawlEntry scraped %d bytes.", len(e1.Content))
				e.Content = e1.Content
			}
			result = append(result, e.Url)
		}
	}
	if err := updateContents(appCtx, targets, app.MemberList); err != nil {
		lib.InternalError(res, req, err)
		return
	}
	res.WriteJson(result)
}
Пример #5
0
func historyInsights(res *wcg.Response, req *wcg.Request, app *App) {
	var appCtx = lib.NewAppContextFromRequest(req)
	member, ok := app.Members[req.Param("member")]
	if !ok {
		lib.NotFound(res, req)
		return
	}
	var insights amebloHistoryInsights
	mckey := fmt.Sprintf(MC_KEY_HISTORY, app.Key, member.Name)
	dent := NewAmebloEntryDriver(appCtx)
	dref := NewAmebloRefDriver(appCtx)
	mc := appCtx.NewMemcacheDriver()
	err := mc.CachedObject(mckey, &insights, func() (interface{}, error) {
		return getAmebloHistoryInsights(member, dent, dref)
	}, req.Query("force") == "1")
	if err != nil {
		lib.Error(res, req, err)
		return
	}
	res.WriteJson(insights)
}
Пример #6
0
func listTvChannels(res *wcg.Response, req *wcg.Request) ([]*tv.TvChannel, error) {
	var list []*tv.TvChannel
	app := lib.GetCurrentApp(req)
	ctx := gae.NewContext(req)
	d := NewTvChannelDriver(app.Key, ctx, req.Logger)
	mc := memcache.NewDriver(ctx, req.Logger)
	err := mc.CachedObject(MC_KEY_CHANNELS, &list, func() (interface{}, error) {
		return d.AllAsList()
	}, req.Query("force") == "1")
	if err != nil {
		return nil, err
	} else {
		if len(list) == 0 {
			req.Logger.Warn("No channel is defined. Reset the configuraiton.")
			d.AddChannelList(defaultChannels)
			mc.Delete(MC_KEY_CHANNELS)
			mc.Set(MC_KEY_CHANNELS, defaultChannels)
			res.WriteJson(defaultChannels)
			return defaultChannels, nil
		} else {
			return list, nil
		}
	}
}
Пример #7
0
// HTTP 200 OK
func (cron *CronHelper) Ok(res *wcg.Response, req *wcg.Request) {
	res.WriteJson(map[string]interface{}{
		"ok": true,
	})
}