func (h *TaskController) DesireTask(logger lager.Logger, taskDefinition *models.TaskDefinition, taskGuid, domain string) error { var err error logger = logger.Session("desire-task") logger = logger.WithData(lager.Data{"task_guid": taskGuid}) err = h.db.DesireTask(logger, taskDefinition, taskGuid, domain) if err != nil { return err } logger.Debug("start-task-auction-request") taskStartRequest := auctioneer.NewTaskStartRequestFromModel(taskGuid, domain, taskDefinition) err = h.auctioneerClient.RequestTaskAuctions(logger, []*auctioneer.TaskStartRequest{&taskStartRequest}) if err != nil { logger.Error("failed-requesting-task-auction", err) // The creation succeeded, the auction request error can be dropped } else { logger.Debug("succeeded-requesting-task-auction") } return nil }
func (db *SQLDB) getTaskStartRequestsForKickablePendingTasks(logger lager.Logger, kickTasksDuration, expirePendingTaskDuration time.Duration) ([]*auctioneer.TaskStartRequest, uint64) { logger = logger.Session("get-task-start-requests-for-kickable-pending-tasks") rows, err := db.all(logger, db.db, tasksTable, taskColumns, NoLockRow, "state = ? AND updated_at < ? AND created_at > ?", models.Task_Pending, db.clock.Now().Add(-kickTasksDuration).UnixNano(), db.clock.Now().Add(-expirePendingTaskDuration).UnixNano(), ) if err != nil { logger.Error("failed-query", err) return []*auctioneer.TaskStartRequest{}, math.MaxUint64 } defer rows.Close() var failedFetches uint64 tasksToAuction := []*auctioneer.TaskStartRequest{} for rows.Next() { task, err := db.fetchTask(logger, rows, db.db) if err != nil { logger.Error("failed-fetch", err) if err == models.ErrDeserialize { failedFetches++ } } else { taskStartRequest := auctioneer.NewTaskStartRequestFromModel(task.TaskGuid, task.Domain, task.TaskDefinition) tasksToAuction = append(tasksToAuction, &taskStartRequest) } } if rows.Err() != nil { logger.Error("failed-getting-next-row", rows.Err()) } return tasksToAuction, failedFetches }
} else if task.TaskGuid == taskGuid2 { task2Completions++ } } Expect(task1Completions).To(Equal(1)) Expect(task2Completions).To(Equal(1)) }) }) Context("when there are tasks to auction", func() { const taskGuid1 = "to-auction-1" const taskGuid2 = "to-auction-2" BeforeEach(func() { taskStartRequest1 := auctioneer.NewTaskStartRequestFromModel(taskGuid1, "domain", model_helpers.NewValidTaskDefinition()) taskStartRequest2 := auctioneer.NewTaskStartRequestFromModel(taskGuid2, "domain", model_helpers.NewValidTaskDefinition()) fakeTaskDB.ConvergeTasksReturns([]*auctioneer.TaskStartRequest{&taskStartRequest1, &taskStartRequest2}, nil) }) It("requests an auction", func() { Expect(fakeAuctioneerClient.RequestTaskAuctionsCallCount()).To(Equal(1)) _, requestedTasks := fakeAuctioneerClient.RequestTaskAuctionsArgsForCall(0) Expect(requestedTasks).To(HaveLen(2)) Expect([]string{requestedTasks[0].TaskGuid, requestedTasks[1].TaskGuid}).To(ConsistOf(taskGuid1, taskGuid2)) }) Context("when requesting an auction is unsuccessful", func() { BeforeEach(func() { fakeAuctioneerClient.RequestTaskAuctionsReturns(errors.New("oops"))
func (db *ETCDDB) ConvergeTasks( logger lager.Logger, cellSet models.CellSet, kickTaskDuration, expirePendingTaskDuration, expireCompletedTaskDuration time.Duration, ) ([]*auctioneer.TaskStartRequest, []*models.Task) { logger.Info("starting-convergence") defer logger.Info("finished-convergence") convergeTaskRunsCounter.Increment() convergeStart := db.clock.Now() defer func() { err := convergeTaskDuration.Send(time.Since(convergeStart)) if err != nil { logger.Error("failed-to-send-converge-task-duration-metric", err) } }() logger.Debug("listing-tasks") taskState, modelErr := db.fetchRecursiveRaw(logger, TaskSchemaRoot) if modelErr != nil { logger.Debug("failed-listing-task") sendTaskMetrics(logger, -1, -1, -1, -1) return nil, nil } logger.Debug("succeeded-listing-task") logError := func(task *models.Task, message string) { logger.Error(message, nil, lager.Data{ "task_guid": task.TaskGuid, }) } tasksToComplete := []*models.Task{} scheduleForCompletion := func(task *models.Task) { if task.CompletionCallbackUrl == "" { return } tasksToComplete = append(tasksToComplete, task) } keysToDelete := []string{} tasksToCAS := []compareAndSwappableTask{} scheduleForCASByIndex := func(index uint64, newTask *models.Task) { tasksToCAS = append(tasksToCAS, compareAndSwappableTask{ OldIndex: index, NewTask: newTask, }) } tasksToAuction := []*auctioneer.TaskStartRequest{} var tasksKicked uint64 = 0 pendingCount := 0 runningCount := 0 completedCount := 0 resolvingCount := 0 logger.Debug("determining-convergence-work", lager.Data{"num_tasks": len(taskState.Nodes)}) for _, node := range taskState.Nodes { task := new(models.Task) err := db.deserializeModel(logger, node, task) if err != nil || task.Validate() != nil { logger.Error("found-invalid-task", err, lager.Data{ "key": node.Key, "value": node.Value, }) keysToDelete = append(keysToDelete, node.Key) continue } shouldKickTask := db.durationSinceTaskUpdated(task) >= kickTaskDuration switch task.State { case models.Task_Pending: pendingCount++ shouldMarkAsFailed := db.durationSinceTaskCreated(task) >= expirePendingTaskDuration if shouldMarkAsFailed { logError(task, "failed-to-start-in-time") db.markTaskFailed(task, "not started within time limit") scheduleForCASByIndex(node.ModifiedIndex, task) tasksKicked++ } else if shouldKickTask { logger.Info("requesting-auction-for-pending-task", lager.Data{"task_guid": task.TaskGuid}) start := auctioneer.NewTaskStartRequestFromModel(task.TaskGuid, task.Domain, task.TaskDefinition) tasksToAuction = append(tasksToAuction, &start) tasksKicked++ } case models.Task_Running: runningCount++ cellIsAlive := cellSet.HasCellID(task.CellId) if !cellIsAlive { logError(task, "cell-disappeared") db.markTaskFailed(task, "cell disappeared before completion") scheduleForCASByIndex(node.ModifiedIndex, task) tasksKicked++ } case models.Task_Completed: completedCount++ shouldDeleteTask := db.durationSinceTaskFirstCompleted(task) >= expireCompletedTaskDuration if shouldDeleteTask { logError(task, "failed-to-start-resolving-in-time") keysToDelete = append(keysToDelete, node.Key) } else if shouldKickTask { logger.Info("kicking-completed-task", lager.Data{"task_guid": task.TaskGuid}) scheduleForCompletion(task) tasksKicked++ } case models.Task_Resolving: resolvingCount++ shouldDeleteTask := db.durationSinceTaskFirstCompleted(task) >= expireCompletedTaskDuration if shouldDeleteTask { logError(task, "failed-to-resolve-in-time") keysToDelete = append(keysToDelete, node.Key) } else if shouldKickTask { logger.Info("demoting-resolving-to-completed", lager.Data{"task_guid": task.TaskGuid}) demoted := demoteToCompleted(task) scheduleForCASByIndex(node.ModifiedIndex, demoted) scheduleForCompletion(demoted) tasksKicked++ } } } logger.Debug("done-determining-convergence-work", lager.Data{ "num_tasks_to_auction": len(tasksToAuction), "num_tasks_to_cas": len(tasksToCAS), "num_tasks_to_complete": len(tasksToComplete), "num_keys_to_delete": len(keysToDelete), }) sendTaskMetrics(logger, pendingCount, runningCount, completedCount, resolvingCount) tasksKickedCounter.Add(tasksKicked) logger.Debug("compare-and-swapping-tasks", lager.Data{"num_tasks_to_cas": len(tasksToCAS)}) err := db.batchCompareAndSwapTasks(tasksToCAS, logger) if err != nil { return nil, nil } logger.Debug("done-compare-and-swapping-tasks", lager.Data{"num_tasks_to_cas": len(tasksToCAS)}) tasksPrunedCounter.Add(uint64(len(keysToDelete))) logger.Debug("deleting-keys", lager.Data{"num_keys_to_delete": len(keysToDelete)}) db.batchDeleteTasks(keysToDelete, logger) logger.Debug("done-deleting-keys", lager.Data{"num_keys_to_delete": len(keysToDelete)}) return tasksToAuction, tasksToComplete }
Expect(sender.GetCounter("ConvergenceTasksPruned")).To(Equal(uint64(4))) Expect(sender.GetCounter("ConvergenceTasksKicked")).To(Equal(uint64(5))) }) Context("pending tasks", func() { It("fails expired tasks", func() { task, err := sqlDB.TaskByGuid(logger, "pending-expired-task") Expect(err).NotTo(HaveOccurred()) Expect(task.FailureReason).To(Equal("not started within time limit")) Expect(task.Failed).To(BeTrue()) Expect(task.Result).To(Equal("")) Expect(task.State).To(Equal(models.Task_Completed)) Expect(task.UpdatedAt).To(Equal(fakeClock.Now().UnixNano())) Expect(task.FirstCompletedAt).To(Equal(fakeClock.Now().UnixNano())) taskRequest := auctioneer.NewTaskStartRequestFromModel("pending-expired-task", domain, taskDef) Expect(tasksToAuction).NotTo(ContainElement(&taskRequest)) }) It("returns tasks that should be kicked for auctioning", func() { task, err := sqlDB.TaskByGuid(logger, "pending-kickable-task") Expect(err).NotTo(HaveOccurred()) Expect(task.FailureReason).NotTo(Equal("not started within time limit")) Expect(task.Failed).NotTo(BeTrue()) taskRequest := auctioneer.NewTaskStartRequestFromModel("pending-kickable-task", domain, taskDef) Expect(tasksToAuction).To(ContainElement(&taskRequest)) }) It("delete tasks that should be kicked if they're invalid", func() { _, err := sqlDB.TaskByGuid(logger, "pending-kickable-invalid-task")