func (db *ETCDDB) ConvergeTasks( logger lager.Logger, cellSet models.CellSet, kickTaskDuration, expirePendingTaskDuration, expireCompletedTaskDuration time.Duration, ) ([]*auctioneer.TaskStartRequest, []*models.Task) { logger.Info("starting-convergence") defer logger.Info("finished-convergence") convergeTaskRunsCounter.Increment() convergeStart := db.clock.Now() defer func() { err := convergeTaskDuration.Send(time.Since(convergeStart)) if err != nil { logger.Error("failed-to-send-converge-task-duration-metric", err) } }() logger.Debug("listing-tasks") taskState, modelErr := db.fetchRecursiveRaw(logger, TaskSchemaRoot) if modelErr != nil { logger.Debug("failed-listing-task") sendTaskMetrics(logger, -1, -1, -1, -1) return nil, nil } logger.Debug("succeeded-listing-task") logError := func(task *models.Task, message string) { logger.Error(message, nil, lager.Data{ "task_guid": task.TaskGuid, }) } tasksToComplete := []*models.Task{} scheduleForCompletion := func(task *models.Task) { if task.CompletionCallbackUrl == "" { return } tasksToComplete = append(tasksToComplete, task) } keysToDelete := []string{} tasksToCAS := []compareAndSwappableTask{} scheduleForCASByIndex := func(index uint64, newTask *models.Task) { tasksToCAS = append(tasksToCAS, compareAndSwappableTask{ OldIndex: index, NewTask: newTask, }) } tasksToAuction := []*auctioneer.TaskStartRequest{} var tasksKicked uint64 = 0 pendingCount := 0 runningCount := 0 completedCount := 0 resolvingCount := 0 logger.Debug("determining-convergence-work", lager.Data{"num_tasks": len(taskState.Nodes)}) for _, node := range taskState.Nodes { task := new(models.Task) err := db.deserializeModel(logger, node, task) if err != nil || task.Validate() != nil { logger.Error("found-invalid-task", err, lager.Data{ "key": node.Key, "value": node.Value, }) keysToDelete = append(keysToDelete, node.Key) continue } shouldKickTask := db.durationSinceTaskUpdated(task) >= kickTaskDuration switch task.State { case models.Task_Pending: pendingCount++ shouldMarkAsFailed := db.durationSinceTaskCreated(task) >= expirePendingTaskDuration if shouldMarkAsFailed { logError(task, "failed-to-start-in-time") db.markTaskFailed(task, "not started within time limit") scheduleForCASByIndex(node.ModifiedIndex, task) tasksKicked++ } else if shouldKickTask { logger.Info("requesting-auction-for-pending-task", lager.Data{"task_guid": task.TaskGuid}) start := auctioneer.NewTaskStartRequestFromModel(task.TaskGuid, task.Domain, task.TaskDefinition) tasksToAuction = append(tasksToAuction, &start) tasksKicked++ } case models.Task_Running: runningCount++ cellIsAlive := cellSet.HasCellID(task.CellId) if !cellIsAlive { logError(task, "cell-disappeared") db.markTaskFailed(task, "cell disappeared before completion") scheduleForCASByIndex(node.ModifiedIndex, task) tasksKicked++ } case models.Task_Completed: completedCount++ shouldDeleteTask := db.durationSinceTaskFirstCompleted(task) >= expireCompletedTaskDuration if shouldDeleteTask { logError(task, "failed-to-start-resolving-in-time") keysToDelete = append(keysToDelete, node.Key) } else if shouldKickTask { logger.Info("kicking-completed-task", lager.Data{"task_guid": task.TaskGuid}) scheduleForCompletion(task) tasksKicked++ } case models.Task_Resolving: resolvingCount++ shouldDeleteTask := db.durationSinceTaskFirstCompleted(task) >= expireCompletedTaskDuration if shouldDeleteTask { logError(task, "failed-to-resolve-in-time") keysToDelete = append(keysToDelete, node.Key) } else if shouldKickTask { logger.Info("demoting-resolving-to-completed", lager.Data{"task_guid": task.TaskGuid}) demoted := demoteToCompleted(task) scheduleForCASByIndex(node.ModifiedIndex, demoted) scheduleForCompletion(demoted) tasksKicked++ } } } logger.Debug("done-determining-convergence-work", lager.Data{ "num_tasks_to_auction": len(tasksToAuction), "num_tasks_to_cas": len(tasksToCAS), "num_tasks_to_complete": len(tasksToComplete), "num_keys_to_delete": len(keysToDelete), }) sendTaskMetrics(logger, pendingCount, runningCount, completedCount, resolvingCount) tasksKickedCounter.Add(tasksKicked) logger.Debug("compare-and-swapping-tasks", lager.Data{"num_tasks_to_cas": len(tasksToCAS)}) err := db.batchCompareAndSwapTasks(tasksToCAS, logger) if err != nil { return nil, nil } logger.Debug("done-compare-and-swapping-tasks", lager.Data{"num_tasks_to_cas": len(tasksToCAS)}) tasksPrunedCounter.Add(uint64(len(keysToDelete))) logger.Debug("deleting-keys", lager.Data{"num_keys_to_delete": len(keysToDelete)}) db.batchDeleteTasks(keysToDelete, logger) logger.Debug("done-deleting-keys", lager.Data{"num_keys_to_delete": len(keysToDelete)}) return tasksToAuction, tasksToComplete }
Describe("Validate", func() { Context("when the task has a domain, valid guid, stack, and valid action", func() { It("is valid", func() { task = models.Task{ Domain: "some-domain", TaskGuid: "some-task-guid", TaskDefinition: &models.TaskDefinition{ RootFs: "some:rootfs", Action: models.WrapAction(&models.RunAction{ Path: "ls", User: "******", }), }, } err := task.Validate() Expect(err).NotTo(HaveOccurred()) }) }) Context("when the task GUID is present but invalid", func() { It("returns an error indicating so", func() { task = models.Task{ Domain: "some-domain", TaskGuid: "invalid/guid", TaskDefinition: &models.TaskDefinition{ RootFs: "some:rootfs", Action: models.WrapAction(&models.RunAction{ Path: "ls", User: "******", }),