Esempio n. 1
0
func (db *ETCDDB) ConvergeTasks(
	logger lager.Logger,
	cellSet models.CellSet,
	kickTaskDuration, expirePendingTaskDuration, expireCompletedTaskDuration time.Duration,
) ([]*auctioneer.TaskStartRequest, []*models.Task) {
	logger.Info("starting-convergence")
	defer logger.Info("finished-convergence")

	convergeTaskRunsCounter.Increment()

	convergeStart := db.clock.Now()

	defer func() {
		err := convergeTaskDuration.Send(time.Since(convergeStart))
		if err != nil {
			logger.Error("failed-to-send-converge-task-duration-metric", err)
		}
	}()

	logger.Debug("listing-tasks")
	taskState, modelErr := db.fetchRecursiveRaw(logger, TaskSchemaRoot)
	if modelErr != nil {
		logger.Debug("failed-listing-task")
		sendTaskMetrics(logger, -1, -1, -1, -1)
		return nil, nil
	}
	logger.Debug("succeeded-listing-task")

	logError := func(task *models.Task, message string) {
		logger.Error(message, nil, lager.Data{
			"task_guid": task.TaskGuid,
		})
	}

	tasksToComplete := []*models.Task{}
	scheduleForCompletion := func(task *models.Task) {
		if task.CompletionCallbackUrl == "" {
			return
		}
		tasksToComplete = append(tasksToComplete, task)
	}

	keysToDelete := []string{}

	tasksToCAS := []compareAndSwappableTask{}
	scheduleForCASByIndex := func(index uint64, newTask *models.Task) {
		tasksToCAS = append(tasksToCAS, compareAndSwappableTask{
			OldIndex: index,
			NewTask:  newTask,
		})
	}

	tasksToAuction := []*auctioneer.TaskStartRequest{}

	var tasksKicked uint64 = 0

	pendingCount := 0
	runningCount := 0
	completedCount := 0
	resolvingCount := 0

	logger.Debug("determining-convergence-work", lager.Data{"num_tasks": len(taskState.Nodes)})
	for _, node := range taskState.Nodes {
		task := new(models.Task)
		err := db.deserializeModel(logger, node, task)
		if err != nil || task.Validate() != nil {
			logger.Error("found-invalid-task", err, lager.Data{
				"key":   node.Key,
				"value": node.Value,
			})

			keysToDelete = append(keysToDelete, node.Key)
			continue
		}

		shouldKickTask := db.durationSinceTaskUpdated(task) >= kickTaskDuration

		switch task.State {
		case models.Task_Pending:
			pendingCount++
			shouldMarkAsFailed := db.durationSinceTaskCreated(task) >= expirePendingTaskDuration
			if shouldMarkAsFailed {
				logError(task, "failed-to-start-in-time")
				db.markTaskFailed(task, "not started within time limit")
				scheduleForCASByIndex(node.ModifiedIndex, task)
				tasksKicked++
			} else if shouldKickTask {
				logger.Info("requesting-auction-for-pending-task", lager.Data{"task_guid": task.TaskGuid})
				start := auctioneer.NewTaskStartRequestFromModel(task.TaskGuid, task.Domain, task.TaskDefinition)
				tasksToAuction = append(tasksToAuction, &start)
				tasksKicked++
			}
		case models.Task_Running:
			runningCount++
			cellIsAlive := cellSet.HasCellID(task.CellId)
			if !cellIsAlive {
				logError(task, "cell-disappeared")
				db.markTaskFailed(task, "cell disappeared before completion")
				scheduleForCASByIndex(node.ModifiedIndex, task)
				tasksKicked++
			}
		case models.Task_Completed:
			completedCount++
			shouldDeleteTask := db.durationSinceTaskFirstCompleted(task) >= expireCompletedTaskDuration
			if shouldDeleteTask {
				logError(task, "failed-to-start-resolving-in-time")
				keysToDelete = append(keysToDelete, node.Key)
			} else if shouldKickTask {
				logger.Info("kicking-completed-task", lager.Data{"task_guid": task.TaskGuid})
				scheduleForCompletion(task)
				tasksKicked++
			}
		case models.Task_Resolving:
			resolvingCount++
			shouldDeleteTask := db.durationSinceTaskFirstCompleted(task) >= expireCompletedTaskDuration
			if shouldDeleteTask {
				logError(task, "failed-to-resolve-in-time")
				keysToDelete = append(keysToDelete, node.Key)
			} else if shouldKickTask {
				logger.Info("demoting-resolving-to-completed", lager.Data{"task_guid": task.TaskGuid})
				demoted := demoteToCompleted(task)
				scheduleForCASByIndex(node.ModifiedIndex, demoted)
				scheduleForCompletion(demoted)
				tasksKicked++
			}
		}
	}
	logger.Debug("done-determining-convergence-work", lager.Data{
		"num_tasks_to_auction":  len(tasksToAuction),
		"num_tasks_to_cas":      len(tasksToCAS),
		"num_tasks_to_complete": len(tasksToComplete),
		"num_keys_to_delete":    len(keysToDelete),
	})

	sendTaskMetrics(logger, pendingCount, runningCount, completedCount, resolvingCount)

	tasksKickedCounter.Add(tasksKicked)
	logger.Debug("compare-and-swapping-tasks", lager.Data{"num_tasks_to_cas": len(tasksToCAS)})
	err := db.batchCompareAndSwapTasks(tasksToCAS, logger)
	if err != nil {
		return nil, nil
	}
	logger.Debug("done-compare-and-swapping-tasks", lager.Data{"num_tasks_to_cas": len(tasksToCAS)})

	tasksPrunedCounter.Add(uint64(len(keysToDelete)))
	logger.Debug("deleting-keys", lager.Data{"num_keys_to_delete": len(keysToDelete)})
	db.batchDeleteTasks(keysToDelete, logger)
	logger.Debug("done-deleting-keys", lager.Data{"num_keys_to_delete": len(keysToDelete)})

	return tasksToAuction, tasksToComplete
}
Esempio n. 2
0
	Describe("Validate", func() {
		Context("when the task has a domain, valid guid, stack, and valid action", func() {
			It("is valid", func() {
				task = models.Task{
					Domain:   "some-domain",
					TaskGuid: "some-task-guid",
					TaskDefinition: &models.TaskDefinition{
						RootFs: "some:rootfs",
						Action: models.WrapAction(&models.RunAction{
							Path: "ls",
							User: "******",
						}),
					},
				}

				err := task.Validate()
				Expect(err).NotTo(HaveOccurred())
			})
		})

		Context("when the task GUID is present but invalid", func() {
			It("returns an error indicating so", func() {
				task = models.Task{
					Domain:   "some-domain",
					TaskGuid: "invalid/guid",
					TaskDefinition: &models.TaskDefinition{
						RootFs: "some:rootfs",
						Action: models.WrapAction(&models.RunAction{
							Path: "ls",
							User: "******",
						}),