Пример #1
0
// NewContext a wrapper for appengine.NewContext
func NewContext(req *wcg.Request) context.Context {
	ctx := req.Local("__gaetest__context")
	if ctx != nil {
		return ctx.(context.Context)
	}
	ctx = req.Local("__gaebg_context")
	if ctx != nil {
		return ctx.(context.Context)
	}
	return appengine.NewContext(req.HTTPRequest())
}
Пример #2
0
func processEntityPut(req *wcg.Request, put *entities.Put, keyParams string, create bool) response.Response {
	key := req.Param(keyParams)
	_, ent := put.Kind().Get().Key(key).UseDefaultIfNil(create).MustOne(req)
	if ent == nil {
		return response.APINotFound
	}
	if req.HTTPRequest().Form == nil {
		req.Logger.Warnf("You should use ParseForm middleware to avoid the error in EntityPut|EntityPutOrCreate")
		return response.InternalServerError(req, ErrInvalidFormParameters).(response.Response)
	}
	err := put.Kind().UpdateEntityFromForm(ent, req.HTTPRequest().Form)
	if err != nil {
		return response.BadRequest(req, ErrInvalidFormParameters)
	}
	_, ent_ := put.Key(key).MustUpdate(req, ent)
	return response.NewJSONResponseWithStatus(ent_, 200)
}
Пример #3
0
// NewService returns a new bigquery service accessor instance
func NewService(req *wcg.Request) (*bigquery.Service, error) {
	var ctx context.Context
	tmp := req.Local("__gaetest__context")
	if tmp != nil {
		ctx = tmp.(context.Context)
	} else {
		ctx = appengine.NewContext(req.HTTPRequest())
	}

	client, err := NewHTTPClient(ctx)
	if err != nil {
		return nil, err
	}
	svc, err := bigquery.New(client)
	if err != nil {
		return nil, err
	}
	return svc, nil
}
func runTasksCrawlersAmebloEntryLists(req *wcg.Request, task *models.AsyncAPITask) (*models.AsyncAPITaskProgress, error) {
	const FollowLinkKey = "fl"
	const SettingsKey = "s"
	const URLKey = "u"

	var query = req.HTTPRequest().URL.Query()
	var settingsList []*hplink.CrawlerSettings
	var urlList []string
	if settingsKeys, ok := query[SettingsKey]; ok {
		_, _list := entities.CrawlerSettings.GetMulti().Keys(settingsKeys...).MustList(req)
		settingsList = _list.([]*hplink.CrawlerSettings)
	} else {
		query := entities.CrawlerSettings.Query().Filter("Type=", hplink.CrawlerSettingsTypeAmeblo)
		if pagination := query.MustExecute(req); pagination.Length() > 0 {
			list := pagination.Data.([]hplink.CrawlerSettings)
			settingsList = make([]*hplink.CrawlerSettings, len(list))
			for i := range list {
				settingsList[i] = &list[i]
			}
		}
	}

	var numList = len(settingsList)
	urlList = make([]string, numList)
	if urls, ok := query[URLKey]; ok {
		if numList != len(urls) {
			return nil, fmt.Errorf("List mismatch - found %d settings but %d urls are specified", numList, len(urls))
		}
		urlList = query[URLKey]
	} else {
		for i := range settingsList {
			urlList[i] = (*hplink.AmebloCrawlerSettings)(settingsList[i]).GetEntryListURL()
		}
	}

	startTime := lib.Now()
	nextParamSettingsKeys := make([]string, numList)
	nextParamURLs := make([]string, numList)
	err := iterator.ParallelSlice(settingsList, func(i int, v *hplink.CrawlerSettings) error {
		next, err := _crawlAmebloEntryList(req, v, urlList[i])
		if err != nil {
			settingsList[i].Error = []byte(fmt.Sprintf("%v", err))
			settingsList[i].Status = hplink.CrawlerStatusFailure
			settingsList[i].LastRun = lib.Now()
			return err
		}
		settingsList[i].Error = nil
		settingsList[i].Status = hplink.CrawlerStatusSuccess
		settingsList[i].LastRun = lib.Now()
		if next != "" {
			nextParamSettingsKeys[i] = v.URL
			nextParamURLs[i] = next
		}
		return nil
	})
	entities.CrawlerSettings.PutMulti().MustUpdate(req, settingsList)
	if err != nil {
		return nil, err
	}
	if req.Query(FollowLinkKey) != "true" {
		return nil, err
	}
	// fl=true make a recursive call to follow next links
	// reduce empty urls from nextParam* and return it for recursive call
	var fixedNextParamSettingsKeys []string
	var fixedNextParamURLs []string
	var hasNext = false
	for i := range nextParamURLs {
		if nextParamURLs[i] != "" {
			hasNext = true
			fixedNextParamSettingsKeys = append(fixedNextParamSettingsKeys, nextParamSettingsKeys[i])
			fixedNextParamURLs = append(fixedNextParamURLs, nextParamURLs[i])
		}
	}
	var progress models.AsyncAPITaskProgress
	var lastProgress = task.LastProgress()
	if lastProgress == nil {
		progress.Current = len(urlList)
		progress.Total = 0
	} else {
		progress.Current = lastProgress.Current + len(urlList)
	}
	if hasNext {
		progress.Next = url.Values{
			FollowLinkKey: []string{"true"},
			SettingsKey:   fixedNextParamSettingsKeys,
			URLKey:        fixedNextParamURLs,
		}
		wait := configs.GetIntValue(req, "hplink.ameblo_crawler_url_wait", 2, 0, 10)
		lib.WaitAndEnsureAfter(startTime, time.Duration(wait)*time.Second)
	}
	req.Logger.Infof("No more URL needs to be crawled.")
	return &progress, nil
}