Beispiel #1
0
// QueuePendingTasks watches PENDING Job Store, generates appropriate Tasks and queues them into Pending Tasks queue
//
// QueuePendingTasks runs in a separate goroutine started in Worker.Start call
// It returns error if it can't read Job Store or if the goroutine it's running in has been stopped.
func (bw *BasicWorker) QueuePendingTasks() error {
	state := taurus.PENDING
	queue := Pending
	errChan := make(chan error)
	ticker := time.NewTicker(StoreScanTick)
	go func() {
		var qpErr error
	queuer:
		for {
			select {
			case <-bw.done:
				ticker.Stop()
				qpErr = nil
				log.Printf("Finishing %s Task queuer", state)
				break queuer
			case <-ticker.C:
				jobs, err := bw.store.GetJobs(state)
				if err != nil {
					qpErr = fmt.Errorf("Error reading new Jobs: %s", err)
					break queuer
				}
				for _, job := range jobs {
					ctx, cancel := context.WithTimeout(context.Background(), MasterTimeout)
					launchedTasks, err := taurus.MesosTasks(ctx, bw.master, job.Id, nil)
					log.Printf("Job %s has %d launched tasks", job.Id, len(launchedTasks))
					if err != nil {
						log.Printf("Failed to retrieve Tasks for Job %s: %s", job.Id, err)
						cancel()
						continue
					}
					for _, jobTask := range job.Tasks {
						for i := uint32(0); i < jobTask.Replicas-uint32(len(launchedTasks)); i++ {
							taskInfo := taurus.CreateMesosTaskInfo(job.Id, jobTask)
							task := &taurus.Task{
								Info:  taskInfo,
								JobId: job.Id,
							}
							taskId := taskInfo.TaskId.GetValue()
							log.Printf("Queueing task: %s", taskId)
							if err := bw.queue.Publish(queue, task); err != nil {
								log.Printf("Failed to queue %s: %s", taskId, err)
								continue
							}
						}
					}
				}
			}
		}
		errChan <- qpErr
		log.Printf("%s tasks queuer ticker stopped", state)
	}()

	return <-errChan
}
Beispiel #2
0
// ReconcilePendingJobs monitors launched Tasks of each PENDING Job and marks the Job as RUNNING
// if all of the Job tasks have been attempted to launch
//
// ReconcilePendingJobs runs in a separate goroutine started in Worker.Start call
// It returns error if it can't read the Job Store or the gourine it is running in has been stopped
func (bw *BasicWorker) ReconcilePendingJobs() error {
	oldState := taurus.PENDING
	newState := taurus.RUNNING
	errChan := make(chan error)
	ticker := time.NewTicker(ReconcileScanTick)
	go func() {
		var reconErr error
	reconciler:
		for {
			select {
			case <-bw.done:
				log.Printf("Finished %s Reconciler", oldState)
				ticker.Stop()
				reconErr = nil
				break reconciler
			case <-ticker.C:
				jobs, err := bw.store.GetJobs(oldState)
				if err != nil {
					reconErr = fmt.Errorf("Error reading %s Jobs: %s", oldState, err)
					break reconciler
				}
				for _, job := range jobs {
					ctx, cancel := context.WithTimeout(context.Background(), MasterTimeout)
					launchedTasks, err := taurus.MesosTasks(ctx, bw.master, job.Id, nil)
					log.Printf("Job %s has %d launched tasks", job.Id, len(launchedTasks))
					if err != nil {
						log.Printf("Failed to retrieve Tasks for Job %s: %s", job.Id, err)
						cancel()
						continue
					}
					jobTaskCount := uint32(0)
					for _, jobTask := range job.Tasks {
						jobTaskCount += jobTask.Replicas
					}
					if uint32(len(launchedTasks)) == jobTaskCount {
						job.State = newState
						if err := bw.store.UpdateJob(job); err != nil {
							reconErr = fmt.Errorf("Failed to update job %s: %s", job.Id, err)
							break reconciler
						}
						log.Printf("Job %s marked as %s", job.Id, newState)
					}
				}
			}
		}
		errChan <- reconErr
		log.Printf("%s Task Reconciler tick stopped", oldState)
	}()

	return <-errChan
}
Beispiel #3
0
// KillJobTasks monitors all Jobs marked as STOPPED and kills all of their running Tasks
//
// KillJobTasks runs in a separate goroutine started in Worker.Start call
// It returns error if it can't read Job Store or the goroutine it is running in has been stopped
func (bw *BasicWorker) KillJobTasks(driver scheduler.SchedulerDriver) error {
	state := taurus.STOPPED
	errChan := make(chan error)
	ticker := time.NewTicker(StoreScanTick)
	go func() {
		var killErr error
	killer:
		for {
			select {
			case <-bw.done:
				ticker.Stop()
				killErr = nil
				log.Printf("Finishing %s Task queuer", state)
				break killer
			case <-ticker.C:
				jobs, err := bw.store.GetJobs(state)
				if err != nil {
					killErr = fmt.Errorf("Error reading %s Jobs: %s", state, err)
					break killer
				}
				for _, job := range jobs {
					ctx, cancel := context.WithTimeout(context.Background(), MasterTimeout)
					mesosTasks, err := taurus.MesosTasks(ctx, bw.master, job.Id, mesos.TaskState_TASK_RUNNING.Enum())
					if err != nil {
						log.Printf("Failed to read tasks for Job %s: %s", job.Id, err)
						cancel()
						continue
					}
					for taskId, _ := range mesosTasks {
						mesosTaskId := mesosutil.NewTaskID(taskId)
						killStatus, err := driver.KillTask(mesosTaskId)
						if err != nil {
							log.Printf("Mesos in state %s failed to kill the task %s: %s", killStatus, taskId, err)
							continue
						}
					}
				}
			}
		}
		errChan <- killErr
		log.Printf("%s tasks killer ticker stopped", state)
	}()

	return <-errChan
}