Beispiel #1
0
func (sched *NoneScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) {
	taskId := status.GetTaskId().GetValue()
	log.Infoln("Status update: task", taskId, "is in state", status.State.Enum().String())

	c := sched.queue.GetCommandById(taskId)
	if c == nil {
		log.Errorln("Unable to find command for task", taskId)
		driver.Abort()
	}
	if c.Status.GetState() == status.GetState() {
		// ignore repeated status updates
		return
	}
	c.Status = status

	// send status update to CommandHandler
	if status.GetState() == mesos.TaskState_TASK_RUNNING {
		sched.handler.CommandRunning(c)
	} else if status.GetState() == mesos.TaskState_TASK_FINISHED {
		sched.handler.CommandEnded(c)
		sched.handler.CommandFinished(c)
	} else if status.GetState() == mesos.TaskState_TASK_FAILED ||
		status.GetState() == mesos.TaskState_TASK_LOST ||
		status.GetState() == mesos.TaskState_TASK_KILLED {
		sched.handler.CommandEnded(c)
		sched.handler.CommandFailed(c)
	}

	// stop if Commands channel was closed and all tasks are finished
	if sched.queue.Closed() && !sched.handler.HasRunningTasks() {
		log.Infoln("All tasks finished, stopping framework.")
		sched.handler.FinishAllCommands()
		driver.Stop(false)
	}
}
func (sched *testScheduler) StatusUpdate(dr SchedulerDriver, stat *mesos.TaskStatus) {
	log.Infoln("Sched.StatusUpdate() called.")
	sched.s.NotNil(stat)
	sched.s.Equal("test-task-001", stat.GetTaskId().GetValue())
	sched.wg.Done()
	log.Infof("Status update done with waitGroup")
}
Beispiel #3
0
func (k *inMemoryRegistry) UpdateStatus(status *mesos.TaskStatus) (*T, StateType) {
	taskId := status.GetTaskId().GetValue()

	k.rw.Lock()
	defer k.rw.Unlock()
	task, state := k._get(taskId)

	switch status.GetState() {
	case mesos.TaskState_TASK_STAGING:
		k.handleTaskStaging(task, state, status)
	case mesos.TaskState_TASK_STARTING:
		k.handleTaskStarting(task, state, status)
	case mesos.TaskState_TASK_RUNNING:
		k.handleTaskRunning(task, state, status)
	case mesos.TaskState_TASK_FINISHED:
		k.handleTaskFinished(task, state, status)
	case mesos.TaskState_TASK_FAILED:
		k.handleTaskFailed(task, state, status)
	case mesos.TaskState_TASK_ERROR:
		k.handleTaskError(task, state, status)
	case mesos.TaskState_TASK_KILLED:
		k.handleTaskKilled(task, state, status)
	case mesos.TaskState_TASK_LOST:
		k.handleTaskLost(task, state, status)
	default:
		log.Warningf("unhandled status update for task: %v", taskId)
	}
	return task.Clone(), state
}
Beispiel #4
0
// mesos.Scheduler interface method.
// Invoked when the status of a task has changed.
func (this *TransformScheduler) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) {
	fmt.Printf("Status update: task %s is in state %s\n", status.TaskId.GetValue(), status.State.Enum().String())

	if status.GetState() == mesos.TaskState_TASK_LOST || status.GetState() == mesos.TaskState_TASK_FAILED || status.GetState() == mesos.TaskState_TASK_FINISHED {
		this.removeTask(status.GetTaskId())
		this.decRunningInstances()
	}
}
Beispiel #5
0
func (r *Reconciler) Update(status *mesos.TaskStatus) {
	r.taskLock.Lock()
	defer r.taskLock.Unlock()

	delete(r.tasks, status.GetTaskId().GetValue())

	if len(r.tasks) == 0 {
		r.reconciles = 0
	}
}
func (ctx *RunOnceApplicationContext) updateTaskState(status *mesos.TaskStatus) {
	for _, task := range ctx.tasks {
		if task.TaskID == status.GetTaskId().GetValue() {
			task.State = status.GetState()
			return
		}
	}

	framework.Logger.Warn("Got unexpected status update for unknown task with ID %s", status.GetTaskId().GetValue())
}
func (s *Scheduler) onTaskFailed(id string, status *mesos.TaskStatus) {
	if s.cluster.Exists(id) {
		task := s.cluster.Get(id)
		if task.Data().State != TaskStateInactive {
			task.Data().State = TaskStateStopped
		}
	} else {
		Logger.Infof("Got %s for unknown/stopped task %s", pretty.Status(status), status.GetTaskId().GetValue())
	}
}
Beispiel #8
0
func (s *Scheduler) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) {
	Logger.Infof("[StatusUpdate] %s", statusString(status))

	slave := s.slaveFromTaskId(status.GetTaskId().GetValue())

	if status.GetState() == mesos.TaskState_TASK_FAILED || status.GetState() == mesos.TaskState_TASK_KILLED ||
		status.GetState() == mesos.TaskState_TASK_LOST || status.GetState() == mesos.TaskState_TASK_ERROR ||
		status.GetState() == mesos.TaskState_TASK_FINISHED {
		s.cluster.Remove(slave)
	}
}
Beispiel #9
0
func (sched *Scheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) {
	if glog.V(1) {
		glog.Infof("status update from task %s in state %s under executor %s on slave %s: %s",
			status.GetTaskId().GetValue(),
			status.GetState(),
			status.GetExecutorId().GetValue(),
			status.GetSlaveId().GetValue(),
			status.GetMessage(),
		)
	}
}
Beispiel #10
0
func statusString(status *mesos.TaskStatus) string {
	s := fmt.Sprintf("%s %s slave: %s", status.GetTaskId().GetValue(), status.GetState().String(), idString(status.GetSlaveId().GetValue()))

	if status.GetState() != mesos.TaskState_TASK_RUNNING {
		s += " reason: " + status.GetReason().String()
	}

	if status.GetMessage() != "" {
		s += " message: " + status.GetMessage()
	}

	return s
}
Beispiel #11
0
func (r *RunOnceRunner) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) bool {
	r.applicationLock.Lock()
	defer r.applicationLock.Unlock()

	applicationID := applicationIDFromTaskID(status.GetTaskId().GetValue())
	ctx, exists := r.applications[applicationID]
	if !exists {
		// this status update was not for run once application, just let it go
		return false
	}

	if ctx.StatusUpdate(driver, status) {
		delete(r.applications, applicationID)
	}
	return true
}
Beispiel #12
0
func (k *inMemoryRegistry) handleTaskFinished(task *T, state StateType, status *mesos.TaskStatus) {
	taskId := status.GetTaskId().GetValue()
	switch state {
	case StatePending:
		panic(fmt.Sprintf("Pending task %v finished, this couldn't happen", taskId))
	case StateRunning:
		log.V(2).Infof("received finished status for running task: %v", taskId)
		delete(k.podToTask, task.podKey)
		task.State = StateFinished
		task.UpdatedTime = time.Now()
		k.tasksFinished = k.recordFinishedTask(task.ID)
	case StateFinished:
		log.Warningf("Ignore status TASK_FINISHED because the task %v is already finished", taskId)
	default:
		log.Warningf("Ignore status TASK_FINISHED because the task %v is not running", taskId)
	}
}
Beispiel #13
0
func (k *inMemoryRegistry) handleTaskRunning(task *T, state StateType, status *mesos.TaskStatus) {
	taskId := status.GetTaskId().GetValue()
	switch state {
	case StatePending:
		task.UpdatedTime = time.Now()
		log.Infof("Received running status for pending task: %v", taskId)
		fillRunningPodInfo(task, status)
		task.State = StateRunning
	case StateRunning:
		task.UpdatedTime = time.Now()
		log.V(2).Infof("Ignore status TASK_RUNNING because the task %v is already running", taskId)
	case StateFinished:
		log.Warningf("Ignore status TASK_RUNNING because the task %v is already finished", taskId)
	default:
		log.Warningf("Ignore status TASK_RUNNING because the task %v is discarded", taskId)
	}
}
Beispiel #14
0
func (k *inMemoryRegistry) handleTaskStarting(task *T, state StateType, status *mesos.TaskStatus) {
	// we expect to receive this when a launched task is finally "bound"
	// via the API server. however, there's nothing specific for us to do here.
	switch state {
	case StatePending:
		task.UpdatedTime = time.Now()
		if !task.Has(Bound) {
			task.Set(Bound)
			task.bindTime = task.UpdatedTime
			timeToBind := task.bindTime.Sub(task.launchTime)
			metrics.BindLatency.Observe(metrics.InMicroseconds(timeToBind))
		}
	default:
		taskId := status.GetTaskId().GetValue()
		log.Warningf("Ignore status TASK_STARTING because the task %v is not pending", taskId)
	}
}
func (s *Scheduler) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) {
	Logger.Infof("[StatusUpdate] %s", pretty.Status(status))

	id := s.idFromTaskId(status.GetTaskId().GetValue())

	switch status.GetState() {
	case mesos.TaskState_TASK_RUNNING:
		s.onTaskStarted(id, status)
	case mesos.TaskState_TASK_LOST, mesos.TaskState_TASK_FAILED, mesos.TaskState_TASK_ERROR:
		s.onTaskFailed(id, status)
	case mesos.TaskState_TASK_FINISHED, mesos.TaskState_TASK_KILLED:
		s.onTaskFinished(id, status)
	default:
		Logger.Warnf("Got unexpected task state %s for task %s", pretty.Status(status), id)
	}

	s.cluster.Save()
}
Beispiel #16
0
func (s *MinerScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) {
	log.Infoln("Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String())
	// If the mining server failed, kill all daemons, since they will be trying to talk to the failed mining server
	if strings.Contains(status.GetTaskId().GetValue(), "server") {
		s.minerServerRunning = false

		// kill all tasks
		statuses := make([]*mesos.TaskStatus, 0)
		_, err := driver.ReconcileTasks(statuses)
		if err != nil {
			panic(err)
		}

		for _, status := range statuses {
			driver.KillTask(status.TaskId)
		}
	}
}
Beispiel #17
0
func Status(status *mesos.TaskStatus) string {
	var buffer bytes.Buffer
	buffer.WriteString(fmt.Sprintf("%s %s", status.GetTaskId().GetValue(), status.GetState().String()))
	if status.GetSlaveId() != nil && status.GetSlaveId().GetValue() != "" {
		buffer.WriteString(" slave: ")
		buffer.WriteString(ID(status.GetSlaveId().GetValue()))
	}

	if status.GetState() != mesos.TaskState_TASK_RUNNING {
		buffer.WriteString(" reason: ")
		buffer.WriteString(status.GetReason().String())
	}

	if status.GetMessage() != "" {
		buffer.WriteString(" message: ")
		buffer.WriteString(status.GetMessage())
	}

	return buffer.String()
}
func (ctx *RunOnceApplicationContext) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) bool {
	ctx.lock.Lock()
	defer ctx.lock.Unlock()

	hostname := hostnameFromTaskID(status.GetTaskId().GetValue())
	ctx.updateTaskState(status)
	switch status.GetState() {
	case mesos.TaskState_TASK_RUNNING:
		log.Infof("Task %s received status update in state %s", status.GetTaskId().GetValue(), status.GetState().String())
	case mesos.TaskState_TASK_LOST, mesos.TaskState_TASK_FAILED, mesos.TaskState_TASK_ERROR:
		//TODO also kill all other running tasks sometime?
		ctx.StatusChan <- framework.NewApplicationRunStatus(ctx.Application, fmt.Errorf("Application %s failed to run on host %s with status %s: %s", ctx.Application.ID, hostname, status.GetState().String(), status.GetMessage()))
		return true
	case mesos.TaskState_TASK_FINISHED, mesos.TaskState_TASK_KILLED:
		if ctx.allTasksFinished() {
			ctx.StatusChan <- framework.NewApplicationRunStatus(ctx.Application, nil)
			return true
		}
	default:
		log.Warningf("Got unexpected task state %s", pretty.Status(status))
	}

	return false
}
func (s *Scheduler) onTaskFinished(id string, status *mesos.TaskStatus) {
	if !s.cluster.Exists(id) {
		Logger.Infof("Got %s for unknown/stopped task %s", pretty.Status(status), status.GetTaskId().GetValue())
	}
}
Beispiel #20
0
func (s *EtcdScheduler) StatusUpdate(
	driver scheduler.SchedulerDriver,
	status *mesos.TaskStatus,
) {
	s.mut.Lock()
	defer s.mut.Unlock()
	log.Infoln(
		"Status update: task",
		status.TaskId.GetValue(),
		" is in state ",
		status.State.Enum().String(),
	)

	node, err := config.Parse(status.GetTaskId().GetValue())
	if err != nil {
		log.Errorf("scheduler: failed to unmarshal config.Node from TaskId: %s", err)
		return
	}
	node.SlaveID = status.SlaveId.GetValue()

	// record that we've heard about this task
	s.heardFrom[status.GetTaskId().GetValue()] = struct{}{}

	switch status.GetState() {
	case mesos.TaskState_TASK_LOST,
		mesos.TaskState_TASK_FINISHED,
		mesos.TaskState_TASK_KILLED,
		mesos.TaskState_TASK_ERROR,
		mesos.TaskState_TASK_FAILED:

		log.Errorf("Task contraction: %+v", status.GetState())
		log.Errorf("message: %s", status.GetMessage())
		log.Errorf("reason: %+v", status.GetReason())

		atomic.AddUint32(&s.Stats.FailedServers, 1)

		// TODO(tyler) kill this
		// Pump the brakes so that we have time to deconfigure the lost node
		// before adding a new one.  If we don't deconfigure first, we risk
		// split brain.
		s.PumpTheBrakes()

		// now we know this task is dead
		delete(s.pending, node.Name)
		delete(s.running, node.Name)
		delete(s.tasks, node.Name)

		// We don't have to clean up the state in ZK for this
		// as it is fine to eventually just persist when we
		// receive a new TASK_RUNNING below.
		delete(s.reconciliationInfo, status.TaskId.GetValue())

		s.QueueLaunchAttempt()

		// TODO(tyler) do we want to lock if the first task fails?
		// TODO(tyler) can we handle a total loss at reconciliation time,
		//             when s.state == Immutable?
		if len(s.running) == 0 && s.state == Mutable {
			log.Error("TOTAL CLUSTER LOSS!  LOCKING SCHEDULER, " +
				"FOLLOW RESTORATION GUIDE AT " +
				"https://github.com/mesosphere/" +
				"etcd-mesos/blob/master/docs/response.md")
			s.state = Immutable
		}
	case mesos.TaskState_TASK_STARTING:
	case mesos.TaskState_TASK_RUNNING:
		// We update data to ZK synchronously because it must happen
		// in-order.  If we spun off a goroutine this would possibly retry
		// and succeed in the wrong order, and older data would win.
		// We keep this simple here, as if ZK is healthy this won't take long.
		// If this takes long, we're probably about to die anyway, as ZK is
		// displeased and mesos-go will panic when it loses contact.
		s.reconciliationInfo[status.TaskId.GetValue()] = status.SlaveId.GetValue()
		err = s.updateReconciliationInfoFunc(
			s.reconciliationInfo,
			s.ZkServers,
			s.ZkChroot,
			s.FrameworkName,
		)
		if err != nil {
			log.Errorf("Failed to persist reconciliation info: %+v", err)
		}

		delete(s.pending, node.Name)
		_, present := s.running[node.Name]
		if !present {
			s.running[node.Name] = node
			s.tasks[node.Name] = status.TaskId
		}

		// During reconcilliation, we may find nodes with higher ID's due to ntp drift
		etcdIndexParts := strings.Split(node.Name, "-")
		if len(etcdIndexParts) != 2 {
			log.Warning("Task has a Name that does not follow the form etcd-<index>")
		} else {
			etcdIndex, err := strconv.ParseInt(etcdIndexParts[1], 10, 64)
			if err != nil {
				log.Warning("Task has a Name that does not follow the form etcd-<index>")
			} else {
				if etcdIndex > s.highestInstanceID {
					s.highestInstanceID = etcdIndex + 1
				}
			}
		}
	default:
		log.Warningf("Received unhandled task state: %+v", status.GetState())
	}
}
Beispiel #21
0
func (k *inMemoryRegistry) handleTaskStaging(task *T, state StateType, status *mesos.TaskStatus) {
	if status.GetSource() != mesos.TaskStatus_SOURCE_MASTER {
		log.Errorf("received STAGING for task %v with unexpected source: %v",
			status.GetTaskId().GetValue(), status.GetSource())
	}
}
func (s *Scheduler) onTaskStarted(id string, status *mesos.TaskStatus) {
	if s.cluster.Exists(id) {
		task := s.cluster.Get(id)
		task.Data().State = TaskStateRunning
	} else {
		Logger.Infof("Got %s for unknown/stopped task, killing task %s", pretty.Status(status), status.GetTaskId().GetValue())
	}
}