Beispiel #1
0
func Test_AsyncAPI_Define_RecursiveCall(t *testing.T) {
	assert := gaetest.NewAssert(t)
	app := NewApp("testapp")
	async := app.AsyncAPI()

	var processed = 0
	var q1 string
	config := async.Define("/async/")
	config.OnProcess(AsyncTaskHandler(func(req *wcg.Request, t *models.AsyncAPITask) (*models.AsyncAPITaskProgress, error) {
		var progress models.AsyncAPITaskProgress
		lastp := t.LastProgress()
		if lastp != nil {
			progress.Current = lastp.Current + 1
		} else {
			progress.Current++
		}
		progress.Total = 4
		// confirm progress text is updated.
		processed++
		if processed == 4 {
			q1 = req.Query("q1")
			// no progress returns to stop the task
			return &progress, nil
		}
		if processed == 3 {
			progress.Next = url.Values{
				"q1": []string{"b"},
			}
		} else {
			progress.Next = url.Values{}
		}
		return &progress, nil
	}))
	assert.EqStr("api-testapp-async", config.Queue.Name)

	var path = "/api/testapp/async/"
	var params = url.Values{}

	runner := testhelper.NewAsyncTaskTestRunner(t, app.Routes(), ts)
	task := runner.Run(fmt.Sprintf("%s", path), params)

	assert.OK(models.AsyncAPIStatusSuccess == task.Status)
	assert.EqInt(4, len(task.Progress))
	assert.EqInt(4, task.LastProgress().Current)
	assert.EqInt(4, processed)
	assert.EqStr("b", q1)
}
func runTasksCrawlersAmebloEntryLists(req *wcg.Request, task *models.AsyncAPITask) (*models.AsyncAPITaskProgress, error) {
	const FollowLinkKey = "fl"
	const SettingsKey = "s"
	const URLKey = "u"

	var query = req.HTTPRequest().URL.Query()
	var settingsList []*hplink.CrawlerSettings
	var urlList []string
	if settingsKeys, ok := query[SettingsKey]; ok {
		_, _list := entities.CrawlerSettings.GetMulti().Keys(settingsKeys...).MustList(req)
		settingsList = _list.([]*hplink.CrawlerSettings)
	} else {
		query := entities.CrawlerSettings.Query().Filter("Type=", hplink.CrawlerSettingsTypeAmeblo)
		if pagination := query.MustExecute(req); pagination.Length() > 0 {
			list := pagination.Data.([]hplink.CrawlerSettings)
			settingsList = make([]*hplink.CrawlerSettings, len(list))
			for i := range list {
				settingsList[i] = &list[i]
			}
		}
	}

	var numList = len(settingsList)
	urlList = make([]string, numList)
	if urls, ok := query[URLKey]; ok {
		if numList != len(urls) {
			return nil, fmt.Errorf("List mismatch - found %d settings but %d urls are specified", numList, len(urls))
		}
		urlList = query[URLKey]
	} else {
		for i := range settingsList {
			urlList[i] = (*hplink.AmebloCrawlerSettings)(settingsList[i]).GetEntryListURL()
		}
	}

	startTime := lib.Now()
	nextParamSettingsKeys := make([]string, numList)
	nextParamURLs := make([]string, numList)
	err := iterator.ParallelSlice(settingsList, func(i int, v *hplink.CrawlerSettings) error {
		next, err := _crawlAmebloEntryList(req, v, urlList[i])
		if err != nil {
			settingsList[i].Error = []byte(fmt.Sprintf("%v", err))
			settingsList[i].Status = hplink.CrawlerStatusFailure
			settingsList[i].LastRun = lib.Now()
			return err
		}
		settingsList[i].Error = nil
		settingsList[i].Status = hplink.CrawlerStatusSuccess
		settingsList[i].LastRun = lib.Now()
		if next != "" {
			nextParamSettingsKeys[i] = v.URL
			nextParamURLs[i] = next
		}
		return nil
	})
	entities.CrawlerSettings.PutMulti().MustUpdate(req, settingsList)
	if err != nil {
		return nil, err
	}
	if req.Query(FollowLinkKey) != "true" {
		return nil, err
	}
	// fl=true make a recursive call to follow next links
	// reduce empty urls from nextParam* and return it for recursive call
	var fixedNextParamSettingsKeys []string
	var fixedNextParamURLs []string
	var hasNext = false
	for i := range nextParamURLs {
		if nextParamURLs[i] != "" {
			hasNext = true
			fixedNextParamSettingsKeys = append(fixedNextParamSettingsKeys, nextParamSettingsKeys[i])
			fixedNextParamURLs = append(fixedNextParamURLs, nextParamURLs[i])
		}
	}
	var progress models.AsyncAPITaskProgress
	var lastProgress = task.LastProgress()
	if lastProgress == nil {
		progress.Current = len(urlList)
		progress.Total = 0
	} else {
		progress.Current = lastProgress.Current + len(urlList)
	}
	if hasNext {
		progress.Next = url.Values{
			FollowLinkKey: []string{"true"},
			SettingsKey:   fixedNextParamSettingsKeys,
			URLKey:        fixedNextParamURLs,
		}
		wait := configs.GetIntValue(req, "hplink.ameblo_crawler_url_wait", 2, 0, 10)
		lib.WaitAndEnsureAfter(startTime, time.Duration(wait)*time.Second)
	}
	req.Logger.Infof("No more URL needs to be crawled.")
	return &progress, nil
}