Beispiel #1
0
func (h *TaskController) DesireTask(logger lager.Logger, taskDefinition *models.TaskDefinition, taskGuid, domain string) error {
	var err error
	logger = logger.Session("desire-task")

	logger = logger.WithData(lager.Data{"task_guid": taskGuid})

	err = h.db.DesireTask(logger, taskDefinition, taskGuid, domain)
	if err != nil {
		return err
	}

	logger.Debug("start-task-auction-request")
	taskStartRequest := auctioneer.NewTaskStartRequestFromModel(taskGuid, domain, taskDefinition)
	err = h.auctioneerClient.RequestTaskAuctions(logger, []*auctioneer.TaskStartRequest{&taskStartRequest})
	if err != nil {
		logger.Error("failed-requesting-task-auction", err)
		// The creation succeeded, the auction request error can be dropped
	} else {
		logger.Debug("succeeded-requesting-task-auction")
	}

	return nil
}
Beispiel #2
0
func (db *SQLDB) getTaskStartRequestsForKickablePendingTasks(logger lager.Logger, kickTasksDuration, expirePendingTaskDuration time.Duration) ([]*auctioneer.TaskStartRequest, uint64) {
	logger = logger.Session("get-task-start-requests-for-kickable-pending-tasks")

	rows, err := db.all(logger, db.db, tasksTable,
		taskColumns, NoLockRow,
		"state = ? AND updated_at < ? AND created_at > ?",
		models.Task_Pending, db.clock.Now().Add(-kickTasksDuration).UnixNano(), db.clock.Now().Add(-expirePendingTaskDuration).UnixNano(),
	)

	if err != nil {
		logger.Error("failed-query", err)
		return []*auctioneer.TaskStartRequest{}, math.MaxUint64
	}

	defer rows.Close()

	var failedFetches uint64
	tasksToAuction := []*auctioneer.TaskStartRequest{}
	for rows.Next() {
		task, err := db.fetchTask(logger, rows, db.db)
		if err != nil {
			logger.Error("failed-fetch", err)
			if err == models.ErrDeserialize {
				failedFetches++
			}
		} else {
			taskStartRequest := auctioneer.NewTaskStartRequestFromModel(task.TaskGuid, task.Domain, task.TaskDefinition)
			tasksToAuction = append(tasksToAuction, &taskStartRequest)
		}
	}

	if rows.Err() != nil {
		logger.Error("failed-getting-next-row", rows.Err())
	}

	return tasksToAuction, failedFetches
}
						} else if task.TaskGuid == taskGuid2 {
							task2Completions++
						}
					}

					Expect(task1Completions).To(Equal(1))
					Expect(task2Completions).To(Equal(1))
				})
			})

			Context("when there are tasks to auction", func() {
				const taskGuid1 = "to-auction-1"
				const taskGuid2 = "to-auction-2"

				BeforeEach(func() {
					taskStartRequest1 := auctioneer.NewTaskStartRequestFromModel(taskGuid1, "domain", model_helpers.NewValidTaskDefinition())
					taskStartRequest2 := auctioneer.NewTaskStartRequestFromModel(taskGuid2, "domain", model_helpers.NewValidTaskDefinition())
					fakeTaskDB.ConvergeTasksReturns([]*auctioneer.TaskStartRequest{&taskStartRequest1, &taskStartRequest2}, nil)
				})

				It("requests an auction", func() {
					Expect(fakeAuctioneerClient.RequestTaskAuctionsCallCount()).To(Equal(1))

					_, requestedTasks := fakeAuctioneerClient.RequestTaskAuctionsArgsForCall(0)
					Expect(requestedTasks).To(HaveLen(2))
					Expect([]string{requestedTasks[0].TaskGuid, requestedTasks[1].TaskGuid}).To(ConsistOf(taskGuid1, taskGuid2))
				})

				Context("when requesting an auction is unsuccessful", func() {
					BeforeEach(func() {
						fakeAuctioneerClient.RequestTaskAuctionsReturns(errors.New("oops"))
Beispiel #4
0
func (db *ETCDDB) ConvergeTasks(
	logger lager.Logger,
	cellSet models.CellSet,
	kickTaskDuration, expirePendingTaskDuration, expireCompletedTaskDuration time.Duration,
) ([]*auctioneer.TaskStartRequest, []*models.Task) {
	logger.Info("starting-convergence")
	defer logger.Info("finished-convergence")

	convergeTaskRunsCounter.Increment()

	convergeStart := db.clock.Now()

	defer func() {
		err := convergeTaskDuration.Send(time.Since(convergeStart))
		if err != nil {
			logger.Error("failed-to-send-converge-task-duration-metric", err)
		}
	}()

	logger.Debug("listing-tasks")
	taskState, modelErr := db.fetchRecursiveRaw(logger, TaskSchemaRoot)
	if modelErr != nil {
		logger.Debug("failed-listing-task")
		sendTaskMetrics(logger, -1, -1, -1, -1)
		return nil, nil
	}
	logger.Debug("succeeded-listing-task")

	logError := func(task *models.Task, message string) {
		logger.Error(message, nil, lager.Data{
			"task_guid": task.TaskGuid,
		})
	}

	tasksToComplete := []*models.Task{}
	scheduleForCompletion := func(task *models.Task) {
		if task.CompletionCallbackUrl == "" {
			return
		}
		tasksToComplete = append(tasksToComplete, task)
	}

	keysToDelete := []string{}

	tasksToCAS := []compareAndSwappableTask{}
	scheduleForCASByIndex := func(index uint64, newTask *models.Task) {
		tasksToCAS = append(tasksToCAS, compareAndSwappableTask{
			OldIndex: index,
			NewTask:  newTask,
		})
	}

	tasksToAuction := []*auctioneer.TaskStartRequest{}

	var tasksKicked uint64 = 0

	pendingCount := 0
	runningCount := 0
	completedCount := 0
	resolvingCount := 0

	logger.Debug("determining-convergence-work", lager.Data{"num_tasks": len(taskState.Nodes)})
	for _, node := range taskState.Nodes {
		task := new(models.Task)
		err := db.deserializeModel(logger, node, task)
		if err != nil || task.Validate() != nil {
			logger.Error("found-invalid-task", err, lager.Data{
				"key":   node.Key,
				"value": node.Value,
			})

			keysToDelete = append(keysToDelete, node.Key)
			continue
		}

		shouldKickTask := db.durationSinceTaskUpdated(task) >= kickTaskDuration

		switch task.State {
		case models.Task_Pending:
			pendingCount++
			shouldMarkAsFailed := db.durationSinceTaskCreated(task) >= expirePendingTaskDuration
			if shouldMarkAsFailed {
				logError(task, "failed-to-start-in-time")
				db.markTaskFailed(task, "not started within time limit")
				scheduleForCASByIndex(node.ModifiedIndex, task)
				tasksKicked++
			} else if shouldKickTask {
				logger.Info("requesting-auction-for-pending-task", lager.Data{"task_guid": task.TaskGuid})
				start := auctioneer.NewTaskStartRequestFromModel(task.TaskGuid, task.Domain, task.TaskDefinition)
				tasksToAuction = append(tasksToAuction, &start)
				tasksKicked++
			}
		case models.Task_Running:
			runningCount++
			cellIsAlive := cellSet.HasCellID(task.CellId)
			if !cellIsAlive {
				logError(task, "cell-disappeared")
				db.markTaskFailed(task, "cell disappeared before completion")
				scheduleForCASByIndex(node.ModifiedIndex, task)
				tasksKicked++
			}
		case models.Task_Completed:
			completedCount++
			shouldDeleteTask := db.durationSinceTaskFirstCompleted(task) >= expireCompletedTaskDuration
			if shouldDeleteTask {
				logError(task, "failed-to-start-resolving-in-time")
				keysToDelete = append(keysToDelete, node.Key)
			} else if shouldKickTask {
				logger.Info("kicking-completed-task", lager.Data{"task_guid": task.TaskGuid})
				scheduleForCompletion(task)
				tasksKicked++
			}
		case models.Task_Resolving:
			resolvingCount++
			shouldDeleteTask := db.durationSinceTaskFirstCompleted(task) >= expireCompletedTaskDuration
			if shouldDeleteTask {
				logError(task, "failed-to-resolve-in-time")
				keysToDelete = append(keysToDelete, node.Key)
			} else if shouldKickTask {
				logger.Info("demoting-resolving-to-completed", lager.Data{"task_guid": task.TaskGuid})
				demoted := demoteToCompleted(task)
				scheduleForCASByIndex(node.ModifiedIndex, demoted)
				scheduleForCompletion(demoted)
				tasksKicked++
			}
		}
	}
	logger.Debug("done-determining-convergence-work", lager.Data{
		"num_tasks_to_auction":  len(tasksToAuction),
		"num_tasks_to_cas":      len(tasksToCAS),
		"num_tasks_to_complete": len(tasksToComplete),
		"num_keys_to_delete":    len(keysToDelete),
	})

	sendTaskMetrics(logger, pendingCount, runningCount, completedCount, resolvingCount)

	tasksKickedCounter.Add(tasksKicked)
	logger.Debug("compare-and-swapping-tasks", lager.Data{"num_tasks_to_cas": len(tasksToCAS)})
	err := db.batchCompareAndSwapTasks(tasksToCAS, logger)
	if err != nil {
		return nil, nil
	}
	logger.Debug("done-compare-and-swapping-tasks", lager.Data{"num_tasks_to_cas": len(tasksToCAS)})

	tasksPrunedCounter.Add(uint64(len(keysToDelete)))
	logger.Debug("deleting-keys", lager.Data{"num_keys_to_delete": len(keysToDelete)})
	db.batchDeleteTasks(keysToDelete, logger)
	logger.Debug("done-deleting-keys", lager.Data{"num_keys_to_delete": len(keysToDelete)})

	return tasksToAuction, tasksToComplete
}
Beispiel #5
0
			Expect(sender.GetCounter("ConvergenceTasksPruned")).To(Equal(uint64(4)))
			Expect(sender.GetCounter("ConvergenceTasksKicked")).To(Equal(uint64(5)))
		})

		Context("pending tasks", func() {
			It("fails expired tasks", func() {
				task, err := sqlDB.TaskByGuid(logger, "pending-expired-task")
				Expect(err).NotTo(HaveOccurred())
				Expect(task.FailureReason).To(Equal("not started within time limit"))
				Expect(task.Failed).To(BeTrue())
				Expect(task.Result).To(Equal(""))
				Expect(task.State).To(Equal(models.Task_Completed))
				Expect(task.UpdatedAt).To(Equal(fakeClock.Now().UnixNano()))
				Expect(task.FirstCompletedAt).To(Equal(fakeClock.Now().UnixNano()))

				taskRequest := auctioneer.NewTaskStartRequestFromModel("pending-expired-task", domain, taskDef)
				Expect(tasksToAuction).NotTo(ContainElement(&taskRequest))
			})

			It("returns tasks that should be kicked for auctioning", func() {
				task, err := sqlDB.TaskByGuid(logger, "pending-kickable-task")
				Expect(err).NotTo(HaveOccurred())
				Expect(task.FailureReason).NotTo(Equal("not started within time limit"))
				Expect(task.Failed).NotTo(BeTrue())

				taskRequest := auctioneer.NewTaskStartRequestFromModel("pending-kickable-task", domain, taskDef)
				Expect(tasksToAuction).To(ContainElement(&taskRequest))
			})

			It("delete tasks that should be kicked if they're invalid", func() {
				_, err := sqlDB.TaskByGuid(logger, "pending-kickable-invalid-task")