func (h *TaskHandler) DesireTask(w http.ResponseWriter, req *http.Request) { var err error logger := h.logger.Session("desire-task") request := &models.DesireTaskRequest{} response := &models.TaskLifecycleResponse{} defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) err = parseRequest(logger, req, request) if err != nil { response.Error = models.ConvertError(err) return } logger = logger.WithData(lager.Data{"task_guid": request.TaskGuid}) err = h.db.DesireTask(logger, request.TaskDefinition, request.TaskGuid, request.Domain) if err != nil { response.Error = models.ConvertError(err) return } logger.Debug("start-task-auction-request") taskStartRequest := auctioneer.NewTaskStartRequestFromModel(request.TaskGuid, request.Domain, request.TaskDefinition) err = h.auctioneerClient.RequestTaskAuctions([]*auctioneer.TaskStartRequest{&taskStartRequest}) if err != nil { logger.Error("failed-requesting-task-auction", err) // The creation succeeded, the auction request error can be dropped } else { logger.Debug("succeeded-requesting-task-auction") } }
func (db *SQLDB) getTaskStartRequestsForKickablePendingTasks(logger lager.Logger, kickTasksDuration, expirePendingTaskDuration time.Duration) ([]*auctioneer.TaskStartRequest, uint64) { logger = logger.Session("get-task-start-requests-for-kickable-pending-tasks") rows, err := db.all(logger, db.db, tasksTable, taskColumns, NoLockRow, "state = ? AND updated_at < ? AND created_at > ?", models.Task_Pending, db.clock.Now().Add(-kickTasksDuration).UnixNano(), db.clock.Now().Add(-expirePendingTaskDuration).UnixNano(), ) if err != nil { logger.Error("failed-query", err) } defer rows.Close() var failedFetches uint64 tasksToAuction := []*auctioneer.TaskStartRequest{} for rows.Next() { task, err := db.fetchTask(logger, rows, db.db) if err != nil { logger.Error("failed-fetch", err) if err == models.ErrDeserialize { failedFetches++ } } else { taskStartRequest := auctioneer.NewTaskStartRequestFromModel(task.TaskGuid, task.Domain, task.TaskDefinition) tasksToAuction = append(tasksToAuction, &taskStartRequest) } } if rows.Err() != nil { logger.Error("failed-getting-next-row", rows.Err()) } return tasksToAuction, failedFetches }
func (db *ETCDDB) ConvergeTasks( logger lager.Logger, kickTaskDuration, expirePendingTaskDuration, expireCompletedTaskDuration time.Duration, ) { logger = logger.Session("converge-tasks") logger.Info("starting-convergence") defer logger.Info("finished-convergence") convergeTaskRunsCounter.Increment() convergeStart := db.clock.Now() defer func() { convergeTaskDuration.Send(time.Since(convergeStart)) }() logger.Debug("listing-tasks") taskState, modelErr := db.fetchRecursiveRaw(logger, TaskSchemaRoot) if modelErr != nil { logger.Debug("failed-listing-task") pendingTasks.Send(-1) runningTasks.Send(-1) completedTasks.Send(-1) resolvingTasks.Send(-1) return } logger.Debug("succeeded-listing-task") logger.Debug("listing-cells") cellSet, modelErr := db.serviceClient.Cells(logger) if modelErr != nil { if !models.ErrResourceNotFound.Equal(modelErr) { logger.Debug("failed-listing-cells") return } cellSet = models.CellSet{} } logger.Debug("succeeded-listing-cells") logError := func(task *models.Task, message string) { logger.Error(message, nil, lager.Data{ "task-guid": task.TaskGuid, }) } tasksToComplete := []*models.Task{} scheduleForCompletion := func(task *models.Task) { if task.CompletionCallbackUrl == "" { return } tasksToComplete = append(tasksToComplete, task) } keysToDelete := []string{} tasksToCAS := []compareAndSwappableTask{} scheduleForCASByIndex := func(index uint64, newTask *models.Task) { tasksToCAS = append(tasksToCAS, compareAndSwappableTask{ OldIndex: index, NewTask: newTask, }) } tasksToAuction := []*auctioneer.TaskStartRequest{} var tasksKicked uint64 = 0 pendingCount := 0 runningCount := 0 completedCount := 0 resolvingCount := 0 logger.Debug("determining-convergence-work", lager.Data{"num-tasks": len(taskState.Nodes)}) for _, node := range taskState.Nodes { task := new(models.Task) err := db.deserializeModel(logger, node, task) if err != nil { logger.Error("failed-to-unmarshal-task-json", err, lager.Data{ "key": node.Key, "value": node.Value, }) keysToDelete = append(keysToDelete, node.Key) continue } shouldKickTask := db.durationSinceTaskUpdated(task) >= kickTaskDuration switch task.State { case models.Task_Pending: pendingCount++ shouldMarkAsFailed := db.durationSinceTaskCreated(task) >= expirePendingTaskDuration if shouldMarkAsFailed { logError(task, "failed-to-start-in-time") db.markTaskFailed(task, "not started within time limit") scheduleForCASByIndex(node.ModifiedIndex, task) tasksKicked++ } else if shouldKickTask { logger.Info("requesting-auction-for-pending-task", lager.Data{"task-guid": task.TaskGuid}) start := auctioneer.NewTaskStartRequestFromModel(task) tasksToAuction = append(tasksToAuction, &start) tasksKicked++ } case models.Task_Running: runningCount++ cellIsAlive := cellSet.HasCellID(task.CellId) if !cellIsAlive { logError(task, "cell-disappeared") db.markTaskFailed(task, "cell disappeared before completion") scheduleForCASByIndex(node.ModifiedIndex, task) tasksKicked++ } case models.Task_Completed: completedCount++ shouldDeleteTask := db.durationSinceTaskFirstCompleted(task) >= expireCompletedTaskDuration if shouldDeleteTask { logError(task, "failed-to-start-resolving-in-time") keysToDelete = append(keysToDelete, node.Key) } else if shouldKickTask { logger.Info("kicking-completed-task", lager.Data{"task-guid": task.TaskGuid}) scheduleForCompletion(task) tasksKicked++ } case models.Task_Resolving: resolvingCount++ shouldDeleteTask := db.durationSinceTaskFirstCompleted(task) >= expireCompletedTaskDuration if shouldDeleteTask { logError(task, "failed-to-resolve-in-time") keysToDelete = append(keysToDelete, node.Key) } else if shouldKickTask { logger.Info("demoting-resolving-to-completed", lager.Data{"task-guid": task.TaskGuid}) demoted := demoteToCompleted(task) scheduleForCASByIndex(node.ModifiedIndex, demoted) scheduleForCompletion(demoted) tasksKicked++ } } } logger.Debug("done-determining-convergence-work", lager.Data{ "num-tasks-to-auction": len(tasksToAuction), "num-tasks-to-cas": len(tasksToCAS), "num-tasks-to-complete": len(tasksToComplete), "num-keys-to-delete": len(keysToDelete), }) pendingTasks.Send(pendingCount) runningTasks.Send(runningCount) completedTasks.Send(completedCount) resolvingTasks.Send(resolvingCount) if len(tasksToAuction) > 0 { logger.Debug("requesting-task-auctions", lager.Data{"num-tasks-to-auction": len(tasksToAuction)}) if err := db.auctioneerClient.RequestTaskAuctions(tasksToAuction); err != nil { taskGuids := make([]string, len(tasksToAuction)) for i, task := range tasksToAuction { taskGuids[i] = task.TaskGuid } logger.Error("failed-to-request-auctions-for-pending-tasks", err, lager.Data{"task-guids": taskGuids}) } logger.Debug("done-requesting-task-auctions", lager.Data{"num-tasks-to-auction": len(tasksToAuction)}) } tasksKickedCounter.Add(tasksKicked) logger.Debug("compare-and-swapping-tasks", lager.Data{"num-tasks-to-cas": len(tasksToCAS)}) err := db.batchCompareAndSwapTasks(tasksToCAS, logger) if err != nil { return } logger.Debug("done-compare-and-swapping-tasks", lager.Data{"num-tasks-to-cas": len(tasksToCAS)}) logger.Debug("submitting-tasks-to-be-completed", lager.Data{"num-tasks-to-complete": len(tasksToComplete)}) for _, task := range tasksToComplete { db.taskCompletionClient.Submit(db, task) } logger.Debug("done-submitting-tasks-to-be-completed", lager.Data{"num-tasks-to-complete": len(tasksToComplete)}) tasksPrunedCounter.Add(uint64(len(keysToDelete))) logger.Debug("deleting-keys", lager.Data{"num-keys-to-delete": len(keysToDelete)}) db.batchDeleteTasks(keysToDelete, logger) logger.Debug("done-deleting-keys", lager.Data{"num-keys-to-delete": len(keysToDelete)}) }
persistedTask, err := etcdDB.TaskByGuid(logger, taskGuid) Expect(err).NotTo(HaveOccurred()) Expect(persistedTask.CreatedAt).To(Equal(clock.Now().UnixNano())) }) It("sets the UpdatedAt time", func() { persistedTask, err := etcdDB.TaskByGuid(logger, taskGuid) Expect(err).NotTo(HaveOccurred()) Expect(persistedTask.UpdatedAt).To(Equal(clock.Now().UnixNano())) }) Context("when able to fetch the Auctioneer address", func() { It("requests an auction", func() { Expect(fakeAuctioneerClient.RequestTaskAuctionsCallCount()).To(Equal(1)) expectedStartRequest := auctioneer.NewTaskStartRequestFromModel(task) requestedTasks := fakeAuctioneerClient.RequestTaskAuctionsArgsForCall(0) Expect(requestedTasks).To(HaveLen(1)) Expect(*requestedTasks[0]).To(Equal(expectedStartRequest)) }) Context("when requesting a task auction succeeds", func() { BeforeEach(func() { fakeAuctioneerClient.RequestTaskAuctionsReturns(nil) }) It("does not return an error", func() { Expect(errDesire).NotTo(HaveOccurred()) }) })
} else if task.TaskGuid == taskGuid2 { task2Completions++ } } Expect(task1Completions).To(Equal(1)) Expect(task2Completions).To(Equal(1)) }) }) Context("when there are tasks to auction", func() { const taskGuid1 = "to-auction-1" const taskGuid2 = "to-auction-2" BeforeEach(func() { taskStartRequest1 := auctioneer.NewTaskStartRequestFromModel(taskGuid1, "domain", model_helpers.NewValidTaskDefinition()) taskStartRequest2 := auctioneer.NewTaskStartRequestFromModel(taskGuid2, "domain", model_helpers.NewValidTaskDefinition()) fakeTaskDB.ConvergeTasksReturns([]*auctioneer.TaskStartRequest{&taskStartRequest1, &taskStartRequest2}, nil) }) It("requests an auction", func() { Expect(fakeAuctioneerClient.RequestTaskAuctionsCallCount()).To(Equal(1)) requestedTasks := fakeAuctioneerClient.RequestTaskAuctionsArgsForCall(0) Expect(requestedTasks).To(HaveLen(2)) Expect([]string{requestedTasks[0].TaskGuid, requestedTasks[1].TaskGuid}).To(ConsistOf(taskGuid1, taskGuid2)) }) Context("when requesting an auction is unsuccessful", func() { BeforeEach(func() { fakeAuctioneerClient.RequestTaskAuctionsReturns(errors.New("oops"))