Пример #1
0
func execStatsQuery(req *wcg.Request, query *entities.Query) response.Response {
	now := lib.Now()
	until := wcg.ParseDateTimeOr(req.Query("until"), now)
	if until.After(now) {
		until = now
	}
	sinceDefault := until.Add(StatsQueryDefaultTimeWindow)
	sinceMin := until.Add(StatsQueryMaxTimeWindow)
	since := wcg.ParseDateTimeOr(req.Query("since"), until.Add(-24*time.Hour))
	if since.After(until) {
		since = sinceDefault
	} else if sinceMin.After(since) {
		since = sinceMin
	}
	query = query.Filter("Timestamp >=", since).Filter("Timestamp <", until)
	if req.Query("device") != "" {
		query = query.Filter("Device =", req.Query("device"))
	}
	return response.NewJSONResponse(query.MustExecute(req).Data)
}
func runTasksCrawlersAmebloEntryLists(req *wcg.Request, task *models.AsyncAPITask) (*models.AsyncAPITaskProgress, error) {
	const FollowLinkKey = "fl"
	const SettingsKey = "s"
	const URLKey = "u"

	var query = req.HTTPRequest().URL.Query()
	var settingsList []*hplink.CrawlerSettings
	var urlList []string
	if settingsKeys, ok := query[SettingsKey]; ok {
		_, _list := entities.CrawlerSettings.GetMulti().Keys(settingsKeys...).MustList(req)
		settingsList = _list.([]*hplink.CrawlerSettings)
	} else {
		query := entities.CrawlerSettings.Query().Filter("Type=", hplink.CrawlerSettingsTypeAmeblo)
		if pagination := query.MustExecute(req); pagination.Length() > 0 {
			list := pagination.Data.([]hplink.CrawlerSettings)
			settingsList = make([]*hplink.CrawlerSettings, len(list))
			for i := range list {
				settingsList[i] = &list[i]
			}
		}
	}

	var numList = len(settingsList)
	urlList = make([]string, numList)
	if urls, ok := query[URLKey]; ok {
		if numList != len(urls) {
			return nil, fmt.Errorf("List mismatch - found %d settings but %d urls are specified", numList, len(urls))
		}
		urlList = query[URLKey]
	} else {
		for i := range settingsList {
			urlList[i] = (*hplink.AmebloCrawlerSettings)(settingsList[i]).GetEntryListURL()
		}
	}

	startTime := lib.Now()
	nextParamSettingsKeys := make([]string, numList)
	nextParamURLs := make([]string, numList)
	err := iterator.ParallelSlice(settingsList, func(i int, v *hplink.CrawlerSettings) error {
		next, err := _crawlAmebloEntryList(req, v, urlList[i])
		if err != nil {
			settingsList[i].Error = []byte(fmt.Sprintf("%v", err))
			settingsList[i].Status = hplink.CrawlerStatusFailure
			settingsList[i].LastRun = lib.Now()
			return err
		}
		settingsList[i].Error = nil
		settingsList[i].Status = hplink.CrawlerStatusSuccess
		settingsList[i].LastRun = lib.Now()
		if next != "" {
			nextParamSettingsKeys[i] = v.URL
			nextParamURLs[i] = next
		}
		return nil
	})
	entities.CrawlerSettings.PutMulti().MustUpdate(req, settingsList)
	if err != nil {
		return nil, err
	}
	if req.Query(FollowLinkKey) != "true" {
		return nil, err
	}
	// fl=true make a recursive call to follow next links
	// reduce empty urls from nextParam* and return it for recursive call
	var fixedNextParamSettingsKeys []string
	var fixedNextParamURLs []string
	var hasNext = false
	for i := range nextParamURLs {
		if nextParamURLs[i] != "" {
			hasNext = true
			fixedNextParamSettingsKeys = append(fixedNextParamSettingsKeys, nextParamSettingsKeys[i])
			fixedNextParamURLs = append(fixedNextParamURLs, nextParamURLs[i])
		}
	}
	var progress models.AsyncAPITaskProgress
	var lastProgress = task.LastProgress()
	if lastProgress == nil {
		progress.Current = len(urlList)
		progress.Total = 0
	} else {
		progress.Current = lastProgress.Current + len(urlList)
	}
	if hasNext {
		progress.Next = url.Values{
			FollowLinkKey: []string{"true"},
			SettingsKey:   fixedNextParamSettingsKeys,
			URLKey:        fixedNextParamURLs,
		}
		wait := configs.GetIntValue(req, "hplink.ameblo_crawler_url_wait", 2, 0, 10)
		lib.WaitAndEnsureAfter(startTime, time.Duration(wait)*time.Second)
	}
	req.Logger.Infof("No more URL needs to be crawled.")
	return &progress, nil
}