Esempio n. 1
// QueuePendingTasks watches PENDING Job Store, generates appropriate Tasks and queues them into Pending Tasks queue
// QueuePendingTasks runs in a separate goroutine started in Worker.Start call
// It returns error if it can't read Job Store or if the goroutine it's running in has been stopped.
func (bw *BasicWorker) QueuePendingTasks() error {
	state := taurus.PENDING
	queue := Pending
	errChan := make(chan error)
	ticker := time.NewTicker(StoreScanTick)
	go func() {
		var qpErr error
		for {
			select {
			case <-bw.done:
				qpErr = nil
				log.Printf("Finishing %s Task queuer", state)
				break queuer
			case <-ticker.C:
				jobs, err :=
				if err != nil {
					qpErr = fmt.Errorf("Error reading new Jobs: %s", err)
					break queuer
				for _, job := range jobs {
					ctx, cancel := context.WithTimeout(context.Background(), MasterTimeout)
					launchedTasks, err := taurus.MesosTasks(ctx, bw.master, job.Id, nil)
					log.Printf("Job %s has %d launched tasks", job.Id, len(launchedTasks))
					if err != nil {
						log.Printf("Failed to retrieve Tasks for Job %s: %s", job.Id, err)
					for _, jobTask := range job.Tasks {
						for i := uint32(0); i < jobTask.Replicas-uint32(len(launchedTasks)); i++ {
							taskInfo := taurus.CreateMesosTaskInfo(job.Id, jobTask)
							task := &taurus.Task{
								Info:  taskInfo,
								JobId: job.Id,
							taskId := taskInfo.TaskId.GetValue()
							log.Printf("Queueing task: %s", taskId)
							if err := bw.queue.Publish(queue, task); err != nil {
								log.Printf("Failed to queue %s: %s", taskId, err)
		errChan <- qpErr
		log.Printf("%s tasks queuer ticker stopped", state)

	return <-errChan
Esempio n. 2
// ReconcilePendingJobs monitors launched Tasks of each PENDING Job and marks the Job as RUNNING
// if all of the Job tasks have been attempted to launch
// ReconcilePendingJobs runs in a separate goroutine started in Worker.Start call
// It returns error if it can't read the Job Store or the gourine it is running in has been stopped
func (bw *BasicWorker) ReconcilePendingJobs() error {
	oldState := taurus.PENDING
	newState := taurus.RUNNING
	errChan := make(chan error)
	ticker := time.NewTicker(ReconcileScanTick)
	go func() {
		var reconErr error
		for {
			select {
			case <-bw.done:
				log.Printf("Finished %s Reconciler", oldState)
				reconErr = nil
				break reconciler
			case <-ticker.C:
				jobs, err :=
				if err != nil {
					reconErr = fmt.Errorf("Error reading %s Jobs: %s", oldState, err)
					break reconciler
				for _, job := range jobs {
					ctx, cancel := context.WithTimeout(context.Background(), MasterTimeout)
					launchedTasks, err := taurus.MesosTasks(ctx, bw.master, job.Id, nil)
					log.Printf("Job %s has %d launched tasks", job.Id, len(launchedTasks))
					if err != nil {
						log.Printf("Failed to retrieve Tasks for Job %s: %s", job.Id, err)
					jobTaskCount := uint32(0)
					for _, jobTask := range job.Tasks {
						jobTaskCount += jobTask.Replicas
					if uint32(len(launchedTasks)) == jobTaskCount {
						job.State = newState
						if err :=; err != nil {
							reconErr = fmt.Errorf("Failed to update job %s: %s", job.Id, err)
							break reconciler
						log.Printf("Job %s marked as %s", job.Id, newState)
		errChan <- reconErr
		log.Printf("%s Task Reconciler tick stopped", oldState)

	return <-errChan
Esempio n. 3
// KillJobTasks monitors all Jobs marked as STOPPED and kills all of their running Tasks
// KillJobTasks runs in a separate goroutine started in Worker.Start call
// It returns error if it can't read Job Store or the goroutine it is running in has been stopped
func (bw *BasicWorker) KillJobTasks(driver scheduler.SchedulerDriver) error {
	state := taurus.STOPPED
	errChan := make(chan error)
	ticker := time.NewTicker(StoreScanTick)
	go func() {
		var killErr error
		for {
			select {
			case <-bw.done:
				killErr = nil
				log.Printf("Finishing %s Task queuer", state)
				break killer
			case <-ticker.C:
				jobs, err :=
				if err != nil {
					killErr = fmt.Errorf("Error reading %s Jobs: %s", state, err)
					break killer
				for _, job := range jobs {
					ctx, cancel := context.WithTimeout(context.Background(), MasterTimeout)
					mesosTasks, err := taurus.MesosTasks(ctx, bw.master, job.Id, mesos.TaskState_TASK_RUNNING.Enum())
					if err != nil {
						log.Printf("Failed to read tasks for Job %s: %s", job.Id, err)
					for taskId, _ := range mesosTasks {
						mesosTaskId := mesosutil.NewTaskID(taskId)
						killStatus, err := driver.KillTask(mesosTaskId)
						if err != nil {
							log.Printf("Mesos in state %s failed to kill the task %s: %s", killStatus, taskId, err)
		errChan <- killErr
		log.Printf("%s tasks killer ticker stopped", state)

	return <-errChan