Example #1
0
func (e *DiegoExecutor) watchTasks(driver exec.ExecutorDriver) {
	timeInIdle := 0 * time.Second
	for {
		select {
		case <-time.After(1 * time.Second):
			state, err := e.cellClient.State()
			if err != nil {
				fmt.Println("Get rep state error", err)
				break
			}

			repContainerSet := map[string]bool{}
			for _, lrp := range state.LRPs {
				repContainerSet[lrp.Identifier()] = true
			}
			// require patch to rep so diego tasks can be returned
			for _, task := range state.Tasks {
				repContainerSet[task.Identifier()] = true
			}

			e.lock.Lock()
			// update task to running status if needed
			for taskId, _ := range repContainerSet {
				taskState := e.taskStateMap[taskId]
				if taskState != mesos.TaskState_TASK_RUNNING {
					_, err := sendTaskStatusUpdate(driver, taskId, mesos.TaskState_TASK_RUNNING)
					if err == nil {
						e.taskStateMap[taskId] = mesos.TaskState_TASK_RUNNING
					}
				}
			}

			// find tasks not in cell state
			for taskId, _ := range e.taskStateMap {
				_, exists := repContainerSet[taskId]
				if !exists { // not found, report finished (or failed?)
					_, err := sendTaskStatusUpdate(driver, taskId, mesos.TaskState_TASK_FINISHED)
					if err == nil {
						delete(e.taskStateMap, taskId)
					}
				}
			}

			fmt.Println("repContainerSet: ", repContainerSet)
			fmt.Println("taskStateMap: ", e.taskStateMap)

			// nothing running, abort if been idle for a while
			if len(state.LRPs) == 0 && len(state.Tasks) == 0 && len(e.taskStateMap) == 0 {
				timeInIdle += (1 * time.Second)
				if timeInIdle >= 10*time.Second {
					driver.Abort()
				}
			} else {
				timeInIdle = 0
			}

			e.lock.Unlock()
		}
	}
}