func (db *SQLDB) FailTask(logger lager.Logger, taskGuid, failureReason string) (*models.Task, error) { logger = logger.Session("fail-task", lager.Data{"task_guid": taskGuid}) logger.Info("starting") defer logger.Info("complete") var task *models.Task err := db.transact(logger, func(logger lager.Logger, tx *sql.Tx) error { var err error task, err = db.fetchTaskForUpdate(logger, taskGuid, tx) if err != nil { logger.Error("failed-locking-task", err) return err } if err = task.ValidateTransitionTo(models.Task_Completed); err != nil { if task.State != models.Task_Pending { logger.Error("failed-to-transition-task-to-completed", err) return err } } return db.completeTask(logger, task, true, failureReason, "", tx) }) return task, err }
func (db *SQLDB) CompleteTask(logger lager.Logger, taskGuid, cellID string, failed bool, failureReason, taskResult string) (*models.Task, error) { logger = logger.Session("complete-task", lager.Data{"task_guid": taskGuid, "cell_id": cellID}) logger.Info("starting") defer logger.Info("complete") var task *models.Task err := db.transact(logger, func(logger lager.Logger, tx *sql.Tx) error { var err error task, err = db.fetchTaskForUpdate(logger, taskGuid, tx) if err != nil { logger.Error("failed-locking-task", err) return err } if task.CellId != cellID && task.State == models.Task_Running { logger.Error("failed-task-already-running-on-different-cell", err) return models.NewRunningOnDifferentCellError(cellID, task.CellId) } if err = task.ValidateTransitionTo(models.Task_Completed); err != nil { logger.Error("failed-to-transition-task-to-completed", err) return err } return db.completeTask(logger, task, failed, failureReason, taskResult, tx) }) return task, err }
func (db *SQLDB) completeTask(logger lager.Logger, task *models.Task, failed bool, failureReason, result string, tx *sql.Tx) error { now := db.clock.Now().UnixNano() _, err := db.update(logger, tx, tasksTable, SQLAttributes{ "failed": failed, "failure_reason": failureReason, "result": result, "state": models.Task_Completed, "first_completed_at": now, "updated_at": now, "cell_id": "", }, "guid = ?", task.TaskGuid, ) if err != nil { logger.Error("failed-updating-tasks", err) return db.convertSQLError(err) } task.State = models.Task_Completed task.UpdatedAt = now task.FirstCompletedAt = now task.Failed = failed task.FailureReason = failureReason task.Result = result task.CellId = "" return nil }
func (db *ETCDDB) markTaskCompleted(task *models.Task, failed bool, failureReason, result string) { now := db.clock.Now().UnixNano() task.CellId = "" task.UpdatedAt = now task.FirstCompletedAt = now task.State = models.Task_Completed task.Failed = failed task.FailureReason = failureReason task.Result = result }
Context("when there is invalid data", func() { BeforeEach(func() { task1 := model_helpers.NewValidTask("a-guid") insertTask(db, serializer, task1, true) }) It("errors", func() { _, err := sqlDB.TaskByGuid(logger, "a-guid") Expect(err).To(Equal(models.ErrDeserialize)) }) }) }) Describe("StartTask", func() { var ( expectedTask, beforeTask *models.Task ) BeforeEach(func() { expectedTask = model_helpers.NewValidTask("task-guid") err := sqlDB.DesireTask(logger, expectedTask.TaskDefinition, expectedTask.TaskGuid, expectedTask.Domain) Expect(err).NotTo(HaveOccurred()) }) JustBeforeEach(func() { var err error beforeTask, err = sqlDB.TaskByGuid(logger, expectedTask.TaskGuid) Expect(err).NotTo(HaveOccurred()) }) It("starts the task", func() {
Describe("Version", func() { It("returns the timestamp from which it was created", func() { Expect(migration.Version()).To(BeEquivalentTo(1451635200)) }) }) Describe("Down", func() { It("returns a not implemented error", func() { Expect(migration.Down(logger)).To(HaveOccurred()) }) }) Describe("Up", func() { var ( task *models.Task migrationErr error ) Describe("Task Migration", func() { BeforeEach(func() { task = model_helpers.NewValidTask("task-guid-1") task.Action = models.WrapAction(&models.TimeoutAction{Action: model_helpers.NewValidAction(), DeprecatedTimeoutNs: 5 * int64(time.Second), }) }) JustBeforeEach(func() { taskData, err := serializer.Marshal(logger, format.ENCRYPTED_PROTO, task) Expect(err).NotTo(HaveOccurred()) _, err = storeClient.Set(etcddb.TaskSchemaPath(task), taskData, 0) Expect(err).NotTo(HaveOccurred())
func (db *ETCDDB) ConvergeTasks( logger lager.Logger, cellSet models.CellSet, kickTaskDuration, expirePendingTaskDuration, expireCompletedTaskDuration time.Duration, ) ([]*auctioneer.TaskStartRequest, []*models.Task) { logger.Info("starting-convergence") defer logger.Info("finished-convergence") convergeTaskRunsCounter.Increment() convergeStart := db.clock.Now() defer func() { err := convergeTaskDuration.Send(time.Since(convergeStart)) if err != nil { logger.Error("failed-to-send-converge-task-duration-metric", err) } }() logger.Debug("listing-tasks") taskState, modelErr := db.fetchRecursiveRaw(logger, TaskSchemaRoot) if modelErr != nil { logger.Debug("failed-listing-task") sendTaskMetrics(logger, -1, -1, -1, -1) return nil, nil } logger.Debug("succeeded-listing-task") logError := func(task *models.Task, message string) { logger.Error(message, nil, lager.Data{ "task_guid": task.TaskGuid, }) } tasksToComplete := []*models.Task{} scheduleForCompletion := func(task *models.Task) { if task.CompletionCallbackUrl == "" { return } tasksToComplete = append(tasksToComplete, task) } keysToDelete := []string{} tasksToCAS := []compareAndSwappableTask{} scheduleForCASByIndex := func(index uint64, newTask *models.Task) { tasksToCAS = append(tasksToCAS, compareAndSwappableTask{ OldIndex: index, NewTask: newTask, }) } tasksToAuction := []*auctioneer.TaskStartRequest{} var tasksKicked uint64 = 0 pendingCount := 0 runningCount := 0 completedCount := 0 resolvingCount := 0 logger.Debug("determining-convergence-work", lager.Data{"num_tasks": len(taskState.Nodes)}) for _, node := range taskState.Nodes { task := new(models.Task) err := db.deserializeModel(logger, node, task) if err != nil || task.Validate() != nil { logger.Error("found-invalid-task", err, lager.Data{ "key": node.Key, "value": node.Value, }) keysToDelete = append(keysToDelete, node.Key) continue } shouldKickTask := db.durationSinceTaskUpdated(task) >= kickTaskDuration switch task.State { case models.Task_Pending: pendingCount++ shouldMarkAsFailed := db.durationSinceTaskCreated(task) >= expirePendingTaskDuration if shouldMarkAsFailed { logError(task, "failed-to-start-in-time") db.markTaskFailed(task, "not started within time limit") scheduleForCASByIndex(node.ModifiedIndex, task) tasksKicked++ } else if shouldKickTask { logger.Info("requesting-auction-for-pending-task", lager.Data{"task_guid": task.TaskGuid}) start := auctioneer.NewTaskStartRequestFromModel(task.TaskGuid, task.Domain, task.TaskDefinition) tasksToAuction = append(tasksToAuction, &start) tasksKicked++ } case models.Task_Running: runningCount++ cellIsAlive := cellSet.HasCellID(task.CellId) if !cellIsAlive { logError(task, "cell-disappeared") db.markTaskFailed(task, "cell disappeared before completion") scheduleForCASByIndex(node.ModifiedIndex, task) tasksKicked++ } case models.Task_Completed: completedCount++ shouldDeleteTask := db.durationSinceTaskFirstCompleted(task) >= expireCompletedTaskDuration if shouldDeleteTask { logError(task, "failed-to-start-resolving-in-time") keysToDelete = append(keysToDelete, node.Key) } else if shouldKickTask { logger.Info("kicking-completed-task", lager.Data{"task_guid": task.TaskGuid}) scheduleForCompletion(task) tasksKicked++ } case models.Task_Resolving: resolvingCount++ shouldDeleteTask := db.durationSinceTaskFirstCompleted(task) >= expireCompletedTaskDuration if shouldDeleteTask { logError(task, "failed-to-resolve-in-time") keysToDelete = append(keysToDelete, node.Key) } else if shouldKickTask { logger.Info("demoting-resolving-to-completed", lager.Data{"task_guid": task.TaskGuid}) demoted := demoteToCompleted(task) scheduleForCASByIndex(node.ModifiedIndex, demoted) scheduleForCompletion(demoted) tasksKicked++ } } } logger.Debug("done-determining-convergence-work", lager.Data{ "num_tasks_to_auction": len(tasksToAuction), "num_tasks_to_cas": len(tasksToCAS), "num_tasks_to_complete": len(tasksToComplete), "num_keys_to_delete": len(keysToDelete), }) sendTaskMetrics(logger, pendingCount, runningCount, completedCount, resolvingCount) tasksKickedCounter.Add(tasksKicked) logger.Debug("compare-and-swapping-tasks", lager.Data{"num_tasks_to_cas": len(tasksToCAS)}) err := db.batchCompareAndSwapTasks(tasksToCAS, logger) if err != nil { return nil, nil } logger.Debug("done-compare-and-swapping-tasks", lager.Data{"num_tasks_to_cas": len(tasksToCAS)}) tasksPrunedCounter.Add(uint64(len(keysToDelete))) logger.Debug("deleting-keys", lager.Data{"num_keys_to_delete": len(keysToDelete)}) db.batchDeleteTasks(keysToDelete, logger) logger.Debug("done-deleting-keys", lager.Data{"num_keys_to_delete": len(keysToDelete)}) return tasksToAuction, tasksToComplete }
func demoteToCompleted(task *models.Task) *models.Task { task.State = models.Task_Completed return task }
import ( "encoding/json" "strings" "time" "code.cloudfoundry.org/bbs/format" "code.cloudfoundry.org/bbs/models" "github.com/gogo/protobuf/proto" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" ) var _ = Describe("Task", func() { var taskPayload string var task models.Task BeforeEach(func() { taskPayload = `{ "task_guid":"some-guid", "domain":"some-domain", "rootfs": "docker:///docker.com/docker", "env":[ { "name":"ENV_VAR_NAME", "value":"an environmment value" } ], "cell_id":"cell", "action": { "download":{
"code.cloudfoundry.org/bbs/models/test/model_helpers" "code.cloudfoundry.org/lager/lagertest" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" "github.com/onsi/gomega/gbytes" ) var _ = Describe("Task Handlers", func() { var ( logger *lagertest.TestLogger controller *fake_controllers.FakeTaskController responseRecorder *httptest.ResponseRecorder handler *handlers.TaskHandler exitCh chan struct{} task1 models.Task task2 models.Task requestBody interface{} ) BeforeEach(func() { logger = lagertest.NewTestLogger("test") responseRecorder = httptest.NewRecorder() exitCh = make(chan struct{}, 1) controller = &fake_controllers.FakeTaskController{} handler = handlers.NewTaskHandler(controller, exitCh) })
cfhttp.Initialize(timeout) fakeServer = ghttp.NewServer() logger = lagertest.NewTestLogger("test") logger.RegisterSink(lager.NewWriterSink(GinkgoWriter, lager.INFO)) }) AfterEach(func() { fakeServer.Close() }) Describe("HandleCompletedTask", func() { var ( callbackURL string taskDB *dbfakes.FakeTaskDB statusCodes chan int task *models.Task httpClient *http.Client ) BeforeEach(func() { httpClient = cfhttp.NewClient() statusCodes = make(chan int) fakeServer.RouteToHandler("POST", "/the-callback/url", func(w http.ResponseWriter, req *http.Request) { w.WriteHeader(<-statusCodes) }) callbackURL = fakeServer.URL() + "/the-callback/url" taskDB = new(dbfakes.FakeTaskDB) taskDB.ResolvingTaskReturns(nil)
It("creates a task in sqldb for each task in etcd", func() { Expect(migrationErr).NotTo(HaveOccurred()) rows, err := rawSQLDB.Query(` SELECT guid, domain, updated_at, created_at, first_completed_at, state, cell_id, result, failed, failure_reason, task_definition FROM tasks `) Expect(err).NotTo(HaveOccurred()) defer rows.Close() tasks := []*models.Task{} for rows.Next() { var taskTest models.Task var encryptedDefinition []byte err := rows.Scan(&taskTest.TaskGuid, &taskTest.Domain, &taskTest.UpdatedAt, &taskTest.CreatedAt, &taskTest.FirstCompletedAt, &taskTest.State, &taskTest.CellId, &taskTest.Result, &taskTest.Failed, &taskTest.FailureReason, &encryptedDefinition) Expect(err).NotTo(HaveOccurred()) taskTest.TaskDefinition = &models.TaskDefinition{} err = serializer.Unmarshal(logger, encryptedDefinition, taskTest.TaskDefinition) Expect(err).NotTo(HaveOccurred()) tasks = append(tasks, &taskTest) }
func TaskSchemaPath(task *models.Task) string { return TaskSchemaPathByGuid(task.GetTaskGuid()) }