Beispiel #1
0
func retryTask(c context.Context, ds appwrap.Datastore, taskIntf TaskInterface, jobKey *datastore.Key, taskKey *datastore.Key) error {
	var job JobInfo

	if j, err := getJob(ds, jobKey); err != nil {
		return fmt.Errorf("getting job: %s", err)
	} else {
		job = j
	}

	time.Sleep(time.Duration(job.RetryCount) * 5 * time.Second)

	if err := backoff.Retry(func() error {
		var task JobTask
		if err := ds.Get(taskKey, &task); err != nil {
			return fmt.Errorf("getting task: %s", err)
		}

		task.Status = TaskStatusPending
		if _, err := ds.Put(taskKey, &task); err != nil {
			return fmt.Errorf("putting task: %s", err)
		} else if err := taskIntf.PostTask(c, task.Url, job.JsonParameters); err != nil {
			return fmt.Errorf("enqueuing task: %s", err)
		}

		logInfo(c, "retrying task %d/%d", task.Retries, job.RetryCount)
		return nil
	}, mrBackOff()); err != nil {
		logInfo(c, "retryTask() failed after backoff attempts")
		return err
	} else {
		return nil
	}
}
Beispiel #2
0
func makeTaskKeys(ds appwrap.Datastore, firstId int64, count int) []*datastore.Key {
	taskKeys := make([]*datastore.Key, count)
	for i := 0; i < count; i++ {
		taskKeys[i] = ds.NewKey(TaskEntity, "", firstId+int64(i), nil)
	}

	return taskKeys
}
Beispiel #3
0
func getTask(ds appwrap.Datastore, taskKey *datastore.Key) (JobTask, error) {
	var task JobTask

	err := backoff.Retry(func() error {
		return ds.Get(taskKey, &task)
	}, mrBackOff())

	if err != nil {
		return JobTask{}, err
	}

	return task, nil
}
Beispiel #4
0
func getJob(ds appwrap.Datastore, jobKey *datastore.Key) (JobInfo, error) {
	var job JobInfo

	err := backoff.Retry(func() error {
		if err := ds.Get(jobKey, &job); err != nil {
			return err
		}

		return nil
	}, mrBackOff())

	job.Id = jobKey.IntID()

	return job, err
}
Beispiel #5
0
func updateTask(ds appwrap.Datastore, taskKey *datastore.Key, status TaskStatus, tryIncrement int, info string, result interface{}) (JobTask, error) {
	var task JobTask

	newCount := -1

	err := backoff.Retry(func() error {
		if err := ds.Get(taskKey, &task); err != nil {
			return err
		}

		task.UpdatedAt = time.Now()
		task.Info = info

		// this prevents double incrementing if the Put times out but has actually
		// written the value
		if newCount == -1 {
			newCount = task.Retries + tryIncrement
		}
		task.Retries = newCount

		if status != "" {
			task.Status = status
			if status == TaskStatusDone || task.Status == TaskStatusFailed {
				task.Done = task.Job
			}
		}

		if result != nil {
			resultBytes, err := json.Marshal(result)
			if err != nil {
				return err
			}

			task.Result = string(resultBytes)
		}

		_, err := ds.Put(taskKey, &task)
		return err
	}, mrBackOff())

	return task, err
}
Beispiel #6
0
func gatherTasks(ds appwrap.Datastore, job JobInfo) ([]JobTask, error) {
	taskKeys := makeTaskKeys(ds, job.FirstTaskId, job.TaskCount)
	tasks := make([]JobTask, len(taskKeys))

	i := 0
	for i < len(taskKeys) {
		last := i + 100
		if last > len(taskKeys) {
			last = len(taskKeys)
		}

		if err := ds.GetMulti(taskKeys[i:last], tasks[i:last]); err != nil {
			return nil, err
		}

		i = last
	}

	return tasks, nil
}
Beispiel #7
0
func markJobFailed(c context.Context, ds appwrap.Datastore, jobKey *datastore.Key) (prev JobInfo, finalErr error) {
	finalErr = runInTransaction(ds, func(ds appwrap.Datastore) error {
		prev = JobInfo{}
		if err := ds.Get(jobKey, &prev); err != nil {
			return err
		}

		job := prev
		job.Stage = StageFailed

		_, err := ds.Put(jobKey, &job)
		return err
	})

	if finalErr != nil {
		logCritical(c, "marking job failed for key %s failed: %s", jobKey, finalErr)
	}

	return
}
Beispiel #8
0
func createJob(ds appwrap.Datastore, urlPrefix string, writerNames []string, onCompleteUrl string, separateReduceItems bool, jsonParameters string, retryCount int) (*datastore.Key, error) {
	if retryCount == 0 {
		// default
		retryCount = 3
	}

	key := ds.NewKey(JobEntity, "", 0, nil)
	job := JobInfo{
		UrlPrefix:           urlPrefix,
		Stage:               StageFormation,
		UpdatedAt:           time.Now(),
		StartTime:           time.Now(),
		OnCompleteUrl:       onCompleteUrl,
		SeparateReduceItems: separateReduceItems,
		WriterNames:         writerNames,
		RetryCount:          retryCount,
		JsonParameters:      jsonParameters,
	}

	return ds.Put(key, &job)
}
Beispiel #9
0
func RemoveJob(ds appwrap.Datastore, jobId int64) error {
	jobKey := ds.NewKey(JobEntity, "", jobId, nil)
	q := ds.NewQuery(TaskEntity).Filter("Job =", jobKey).KeysOnly()
	keys, err := q.GetAll(nil)
	if err != nil {
		return err
	}

	keys = append(keys, jobKey)

	i := 0
	for i < len(keys) {
		last := i + 250
		if last > len(keys) {
			last = len(keys)
		}

		if err := ds.DeleteMulti(keys[i:last]); err != nil {
			return err
		}

		i = last
	}

	return nil
}
Beispiel #10
0
func createTasks(ds appwrap.Datastore, jobKey *datastore.Key, taskKeys []*datastore.Key, tasks []JobTask, newStage JobStage) error {
	now := time.Now()
	firstId := taskKeys[0].IntID()
	for i := range tasks {
		tasks[i].StartTime = now
		tasks[i].Job = jobKey

		if taskKeys[i].IntID() < firstId {
			firstId = taskKeys[i].IntID()
		}
	}

	putSize := 64

	i := 0
	for i < len(tasks) {
		if err := backoff.Retry(func() error {
			last := i + putSize
			if last > len(tasks) {
				last = len(tasks)
			}

			if _, err := ds.PutMulti(taskKeys[i:last], tasks[i:last]); err != nil {
				if putSize > 5 {
					putSize /= 2
				}

				return err
			}

			i = last

			return nil
		}, mrBackOff()); err != nil {
			return err
		}
	}

	return runInTransaction(ds,
		func(ds appwrap.Datastore) error {
			var job JobInfo

			if err := ds.Get(jobKey, &job); err != nil {
				return err
			}

			job.TaskCount = len(tasks)
			job.FirstTaskId = firstId
			job.Stage = newStage

			_, err := ds.Put(jobKey, &job)
			return err
		})
}
Beispiel #11
0
func GetJob(ds appwrap.Datastore, jobId int64) (JobInfo, error) {
	return getJob(ds, ds.NewKey(JobEntity, "", jobId, nil))
}
Beispiel #12
0
// check if the specified job has completed. it should currently be at expectedStage, and if it's been completed
// we advance it to next stage. if it's already at nextStage another process has beaten us to it so we're done
//
// caller needs to check the stage in the final job; if stageChanged is true it will be either nextStage or StageFailed.
// If StageFailed then at least one of the underlying tasks failed and the reason will appear as a taskError{} in err
func jobStageComplete(c context.Context, ds appwrap.Datastore, jobKey *datastore.Key, taskKeys []*datastore.Key, expectedStage, nextStage JobStage) (stageChanged bool, job JobInfo, finalErr error) {
	last := len(taskKeys)
	tasks := make([]JobTask, 100)
	for last > 0 {
		first := last - 100
		if first < 0 {
			first = 0
		}

		taskCount := last - first

		if err := ds.GetMulti(taskKeys[first:last], tasks[0:taskCount]); err != nil {
			finalErr = err
			return
		} else {
			for i := 0; i < taskCount; i++ {
				if tasks[i].Status == TaskStatusFailed {
					logInfo(c, "failed tasks found")
					nextStage = StageFailed
					last = -1
					finalErr = taskError{tasks[i].Info}
					break
				} else if tasks[i].Status != TaskStatusDone {
					return
				}
			}

			if last >= 0 {
				last = first
			}
		}
	}

	// running this in a transaction ensures only one process advances the stage
	if transErr := runInTransaction(ds, func(ds appwrap.Datastore) error {
		job = JobInfo{}
		if err := ds.Get(jobKey, &job); err != nil {
			return err
		}

		if job.Stage != expectedStage {
			// we're not where we expected, so advancing this isn't our responsibility
			stageChanged = false
			return errMonitorJobConflict
		}

		job.Stage = nextStage
		job.UpdatedAt = time.Now()

		_, err := ds.Put(jobKey, &job)
		stageChanged = (err == nil)
		return err
	}); transErr != nil {
		finalErr = transErr
	}

	if finalErr != nil {
		logCritical(c, "taskComplete failed: %s", finalErr)
	} else {
		logInfo(c, "task is complete")
	}

	return
}
Beispiel #13
0
func runInTransaction(ds appwrap.Datastore, f func(ds appwrap.Datastore) error) error {
	return backoff.Retry(func() error {
		return ds.RunInTransaction(f, nil)
	}, mrBackOff())
}