func Test_AsyncAPI_Define_RecursiveCall(t *testing.T) { assert := gaetest.NewAssert(t) app := NewApp("testapp") async := app.AsyncAPI() var processed = 0 var q1 string config := async.Define("/async/") config.OnProcess(AsyncTaskHandler(func(req *wcg.Request, t *models.AsyncAPITask) (*models.AsyncAPITaskProgress, error) { var progress models.AsyncAPITaskProgress lastp := t.LastProgress() if lastp != nil { progress.Current = lastp.Current + 1 } else { progress.Current++ } progress.Total = 4 // confirm progress text is updated. processed++ if processed == 4 { q1 = req.Query("q1") // no progress returns to stop the task return &progress, nil } if processed == 3 { progress.Next = url.Values{ "q1": []string{"b"}, } } else { progress.Next = url.Values{} } return &progress, nil })) assert.EqStr("api-testapp-async", config.Queue.Name) var path = "/api/testapp/async/" var params = url.Values{} runner := testhelper.NewAsyncTaskTestRunner(t, app.Routes(), ts) task := runner.Run(fmt.Sprintf("%s", path), params) assert.OK(models.AsyncAPIStatusSuccess == task.Status) assert.EqInt(4, len(task.Progress)) assert.EqInt(4, task.LastProgress().Current) assert.EqInt(4, processed) assert.EqStr("b", q1) }
func runTasksCrawlersAmebloEntryLists(req *wcg.Request, task *models.AsyncAPITask) (*models.AsyncAPITaskProgress, error) { const FollowLinkKey = "fl" const SettingsKey = "s" const URLKey = "u" var query = req.HTTPRequest().URL.Query() var settingsList []*hplink.CrawlerSettings var urlList []string if settingsKeys, ok := query[SettingsKey]; ok { _, _list := entities.CrawlerSettings.GetMulti().Keys(settingsKeys...).MustList(req) settingsList = _list.([]*hplink.CrawlerSettings) } else { query := entities.CrawlerSettings.Query().Filter("Type=", hplink.CrawlerSettingsTypeAmeblo) if pagination := query.MustExecute(req); pagination.Length() > 0 { list := pagination.Data.([]hplink.CrawlerSettings) settingsList = make([]*hplink.CrawlerSettings, len(list)) for i := range list { settingsList[i] = &list[i] } } } var numList = len(settingsList) urlList = make([]string, numList) if urls, ok := query[URLKey]; ok { if numList != len(urls) { return nil, fmt.Errorf("List mismatch - found %d settings but %d urls are specified", numList, len(urls)) } urlList = query[URLKey] } else { for i := range settingsList { urlList[i] = (*hplink.AmebloCrawlerSettings)(settingsList[i]).GetEntryListURL() } } startTime := lib.Now() nextParamSettingsKeys := make([]string, numList) nextParamURLs := make([]string, numList) err := iterator.ParallelSlice(settingsList, func(i int, v *hplink.CrawlerSettings) error { next, err := _crawlAmebloEntryList(req, v, urlList[i]) if err != nil { settingsList[i].Error = []byte(fmt.Sprintf("%v", err)) settingsList[i].Status = hplink.CrawlerStatusFailure settingsList[i].LastRun = lib.Now() return err } settingsList[i].Error = nil settingsList[i].Status = hplink.CrawlerStatusSuccess settingsList[i].LastRun = lib.Now() if next != "" { nextParamSettingsKeys[i] = v.URL nextParamURLs[i] = next } return nil }) entities.CrawlerSettings.PutMulti().MustUpdate(req, settingsList) if err != nil { return nil, err } if req.Query(FollowLinkKey) != "true" { return nil, err } // fl=true make a recursive call to follow next links // reduce empty urls from nextParam* and return it for recursive call var fixedNextParamSettingsKeys []string var fixedNextParamURLs []string var hasNext = false for i := range nextParamURLs { if nextParamURLs[i] != "" { hasNext = true fixedNextParamSettingsKeys = append(fixedNextParamSettingsKeys, nextParamSettingsKeys[i]) fixedNextParamURLs = append(fixedNextParamURLs, nextParamURLs[i]) } } var progress models.AsyncAPITaskProgress var lastProgress = task.LastProgress() if lastProgress == nil { progress.Current = len(urlList) progress.Total = 0 } else { progress.Current = lastProgress.Current + len(urlList) } if hasNext { progress.Next = url.Values{ FollowLinkKey: []string{"true"}, SettingsKey: fixedNextParamSettingsKeys, URLKey: fixedNextParamURLs, } wait := configs.GetIntValue(req, "hplink.ameblo_crawler_url_wait", 2, 0, 10) lib.WaitAndEnsureAfter(startTime, time.Duration(wait)*time.Second) } req.Logger.Infof("No more URL needs to be crawled.") return &progress, nil }