func (sched *NoneScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { taskId := status.GetTaskId().GetValue() log.Infoln("Status update: task", taskId, "is in state", status.State.Enum().String()) c := sched.queue.GetCommandById(taskId) if c == nil { log.Errorln("Unable to find command for task", taskId) driver.Abort() } if c.Status.GetState() == status.GetState() { // ignore repeated status updates return } c.Status = status // send status update to CommandHandler if status.GetState() == mesos.TaskState_TASK_RUNNING { sched.handler.CommandRunning(c) } else if status.GetState() == mesos.TaskState_TASK_FINISHED { sched.handler.CommandEnded(c) sched.handler.CommandFinished(c) } else if status.GetState() == mesos.TaskState_TASK_FAILED || status.GetState() == mesos.TaskState_TASK_LOST || status.GetState() == mesos.TaskState_TASK_KILLED { sched.handler.CommandEnded(c) sched.handler.CommandFailed(c) } // stop if Commands channel was closed and all tasks are finished if sched.queue.Closed() && !sched.handler.HasRunningTasks() { log.Infoln("All tasks finished, stopping framework.") sched.handler.FinishAllCommands() driver.Stop(false) } }
func (sched *testScheduler) StatusUpdate(dr SchedulerDriver, stat *mesos.TaskStatus) { log.Infoln("Sched.StatusUpdate() called.") sched.s.NotNil(stat) sched.s.Equal("test-task-001", stat.GetTaskId().GetValue()) sched.wg.Done() log.Infof("Status update done with waitGroup") }
func (k *inMemoryRegistry) UpdateStatus(status *mesos.TaskStatus) (*T, StateType) { taskId := status.GetTaskId().GetValue() k.rw.Lock() defer k.rw.Unlock() task, state := k._get(taskId) switch status.GetState() { case mesos.TaskState_TASK_STAGING: k.handleTaskStaging(task, state, status) case mesos.TaskState_TASK_STARTING: k.handleTaskStarting(task, state, status) case mesos.TaskState_TASK_RUNNING: k.handleTaskRunning(task, state, status) case mesos.TaskState_TASK_FINISHED: k.handleTaskFinished(task, state, status) case mesos.TaskState_TASK_FAILED: k.handleTaskFailed(task, state, status) case mesos.TaskState_TASK_ERROR: k.handleTaskError(task, state, status) case mesos.TaskState_TASK_KILLED: k.handleTaskKilled(task, state, status) case mesos.TaskState_TASK_LOST: k.handleTaskLost(task, state, status) default: log.Warningf("unhandled status update for task: %v", taskId) } return task.Clone(), state }
// mesos.Scheduler interface method. // Invoked when the status of a task has changed. func (this *TransformScheduler) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) { fmt.Printf("Status update: task %s is in state %s\n", status.TaskId.GetValue(), status.State.Enum().String()) if status.GetState() == mesos.TaskState_TASK_LOST || status.GetState() == mesos.TaskState_TASK_FAILED || status.GetState() == mesos.TaskState_TASK_FINISHED { this.removeTask(status.GetTaskId()) this.decRunningInstances() } }
func (r *Reconciler) Update(status *mesos.TaskStatus) { r.taskLock.Lock() defer r.taskLock.Unlock() delete(r.tasks, status.GetTaskId().GetValue()) if len(r.tasks) == 0 { r.reconciles = 0 } }
func (ctx *RunOnceApplicationContext) updateTaskState(status *mesos.TaskStatus) { for _, task := range ctx.tasks { if task.TaskID == status.GetTaskId().GetValue() { task.State = status.GetState() return } } framework.Logger.Warn("Got unexpected status update for unknown task with ID %s", status.GetTaskId().GetValue()) }
func (s *Scheduler) onTaskFailed(id string, status *mesos.TaskStatus) { if s.cluster.Exists(id) { task := s.cluster.Get(id) if task.Data().State != TaskStateInactive { task.Data().State = TaskStateStopped } } else { Logger.Infof("Got %s for unknown/stopped task %s", pretty.Status(status), status.GetTaskId().GetValue()) } }
func (s *Scheduler) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) { Logger.Infof("[StatusUpdate] %s", statusString(status)) slave := s.slaveFromTaskId(status.GetTaskId().GetValue()) if status.GetState() == mesos.TaskState_TASK_FAILED || status.GetState() == mesos.TaskState_TASK_KILLED || status.GetState() == mesos.TaskState_TASK_LOST || status.GetState() == mesos.TaskState_TASK_ERROR || status.GetState() == mesos.TaskState_TASK_FINISHED { s.cluster.Remove(slave) } }
func (sched *Scheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { if glog.V(1) { glog.Infof("status update from task %s in state %s under executor %s on slave %s: %s", status.GetTaskId().GetValue(), status.GetState(), status.GetExecutorId().GetValue(), status.GetSlaveId().GetValue(), status.GetMessage(), ) } }
func statusString(status *mesos.TaskStatus) string { s := fmt.Sprintf("%s %s slave: %s", status.GetTaskId().GetValue(), status.GetState().String(), idString(status.GetSlaveId().GetValue())) if status.GetState() != mesos.TaskState_TASK_RUNNING { s += " reason: " + status.GetReason().String() } if status.GetMessage() != "" { s += " message: " + status.GetMessage() } return s }
func (r *RunOnceRunner) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) bool { r.applicationLock.Lock() defer r.applicationLock.Unlock() applicationID := applicationIDFromTaskID(status.GetTaskId().GetValue()) ctx, exists := r.applications[applicationID] if !exists { // this status update was not for run once application, just let it go return false } if ctx.StatusUpdate(driver, status) { delete(r.applications, applicationID) } return true }
func (k *inMemoryRegistry) handleTaskFinished(task *T, state StateType, status *mesos.TaskStatus) { taskId := status.GetTaskId().GetValue() switch state { case StatePending: panic(fmt.Sprintf("Pending task %v finished, this couldn't happen", taskId)) case StateRunning: log.V(2).Infof("received finished status for running task: %v", taskId) delete(k.podToTask, task.podKey) task.State = StateFinished task.UpdatedTime = time.Now() k.tasksFinished = k.recordFinishedTask(task.ID) case StateFinished: log.Warningf("Ignore status TASK_FINISHED because the task %v is already finished", taskId) default: log.Warningf("Ignore status TASK_FINISHED because the task %v is not running", taskId) } }
func (k *inMemoryRegistry) handleTaskRunning(task *T, state StateType, status *mesos.TaskStatus) { taskId := status.GetTaskId().GetValue() switch state { case StatePending: task.UpdatedTime = time.Now() log.Infof("Received running status for pending task: %v", taskId) fillRunningPodInfo(task, status) task.State = StateRunning case StateRunning: task.UpdatedTime = time.Now() log.V(2).Infof("Ignore status TASK_RUNNING because the task %v is already running", taskId) case StateFinished: log.Warningf("Ignore status TASK_RUNNING because the task %v is already finished", taskId) default: log.Warningf("Ignore status TASK_RUNNING because the task %v is discarded", taskId) } }
func (k *inMemoryRegistry) handleTaskStarting(task *T, state StateType, status *mesos.TaskStatus) { // we expect to receive this when a launched task is finally "bound" // via the API server. however, there's nothing specific for us to do here. switch state { case StatePending: task.UpdatedTime = time.Now() if !task.Has(Bound) { task.Set(Bound) task.bindTime = task.UpdatedTime timeToBind := task.bindTime.Sub(task.launchTime) metrics.BindLatency.Observe(metrics.InMicroseconds(timeToBind)) } default: taskId := status.GetTaskId().GetValue() log.Warningf("Ignore status TASK_STARTING because the task %v is not pending", taskId) } }
func (s *Scheduler) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) { Logger.Infof("[StatusUpdate] %s", pretty.Status(status)) id := s.idFromTaskId(status.GetTaskId().GetValue()) switch status.GetState() { case mesos.TaskState_TASK_RUNNING: s.onTaskStarted(id, status) case mesos.TaskState_TASK_LOST, mesos.TaskState_TASK_FAILED, mesos.TaskState_TASK_ERROR: s.onTaskFailed(id, status) case mesos.TaskState_TASK_FINISHED, mesos.TaskState_TASK_KILLED: s.onTaskFinished(id, status) default: Logger.Warnf("Got unexpected task state %s for task %s", pretty.Status(status), id) } s.cluster.Save() }
func (s *MinerScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { log.Infoln("Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String()) // If the mining server failed, kill all daemons, since they will be trying to talk to the failed mining server if strings.Contains(status.GetTaskId().GetValue(), "server") { s.minerServerRunning = false // kill all tasks statuses := make([]*mesos.TaskStatus, 0) _, err := driver.ReconcileTasks(statuses) if err != nil { panic(err) } for _, status := range statuses { driver.KillTask(status.TaskId) } } }
func Status(status *mesos.TaskStatus) string { var buffer bytes.Buffer buffer.WriteString(fmt.Sprintf("%s %s", status.GetTaskId().GetValue(), status.GetState().String())) if status.GetSlaveId() != nil && status.GetSlaveId().GetValue() != "" { buffer.WriteString(" slave: ") buffer.WriteString(ID(status.GetSlaveId().GetValue())) } if status.GetState() != mesos.TaskState_TASK_RUNNING { buffer.WriteString(" reason: ") buffer.WriteString(status.GetReason().String()) } if status.GetMessage() != "" { buffer.WriteString(" message: ") buffer.WriteString(status.GetMessage()) } return buffer.String() }
func (ctx *RunOnceApplicationContext) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) bool { ctx.lock.Lock() defer ctx.lock.Unlock() hostname := hostnameFromTaskID(status.GetTaskId().GetValue()) ctx.updateTaskState(status) switch status.GetState() { case mesos.TaskState_TASK_RUNNING: log.Infof("Task %s received status update in state %s", status.GetTaskId().GetValue(), status.GetState().String()) case mesos.TaskState_TASK_LOST, mesos.TaskState_TASK_FAILED, mesos.TaskState_TASK_ERROR: //TODO also kill all other running tasks sometime? ctx.StatusChan <- framework.NewApplicationRunStatus(ctx.Application, fmt.Errorf("Application %s failed to run on host %s with status %s: %s", ctx.Application.ID, hostname, status.GetState().String(), status.GetMessage())) return true case mesos.TaskState_TASK_FINISHED, mesos.TaskState_TASK_KILLED: if ctx.allTasksFinished() { ctx.StatusChan <- framework.NewApplicationRunStatus(ctx.Application, nil) return true } default: log.Warningf("Got unexpected task state %s", pretty.Status(status)) } return false }
func (s *Scheduler) onTaskFinished(id string, status *mesos.TaskStatus) { if !s.cluster.Exists(id) { Logger.Infof("Got %s for unknown/stopped task %s", pretty.Status(status), status.GetTaskId().GetValue()) } }
func (s *EtcdScheduler) StatusUpdate( driver scheduler.SchedulerDriver, status *mesos.TaskStatus, ) { s.mut.Lock() defer s.mut.Unlock() log.Infoln( "Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String(), ) node, err := config.Parse(status.GetTaskId().GetValue()) if err != nil { log.Errorf("scheduler: failed to unmarshal config.Node from TaskId: %s", err) return } node.SlaveID = status.SlaveId.GetValue() // record that we've heard about this task s.heardFrom[status.GetTaskId().GetValue()] = struct{}{} switch status.GetState() { case mesos.TaskState_TASK_LOST, mesos.TaskState_TASK_FINISHED, mesos.TaskState_TASK_KILLED, mesos.TaskState_TASK_ERROR, mesos.TaskState_TASK_FAILED: log.Errorf("Task contraction: %+v", status.GetState()) log.Errorf("message: %s", status.GetMessage()) log.Errorf("reason: %+v", status.GetReason()) atomic.AddUint32(&s.Stats.FailedServers, 1) // TODO(tyler) kill this // Pump the brakes so that we have time to deconfigure the lost node // before adding a new one. If we don't deconfigure first, we risk // split brain. s.PumpTheBrakes() // now we know this task is dead delete(s.pending, node.Name) delete(s.running, node.Name) delete(s.tasks, node.Name) // We don't have to clean up the state in ZK for this // as it is fine to eventually just persist when we // receive a new TASK_RUNNING below. delete(s.reconciliationInfo, status.TaskId.GetValue()) s.QueueLaunchAttempt() // TODO(tyler) do we want to lock if the first task fails? // TODO(tyler) can we handle a total loss at reconciliation time, // when s.state == Immutable? if len(s.running) == 0 && s.state == Mutable { log.Error("TOTAL CLUSTER LOSS! LOCKING SCHEDULER, " + "FOLLOW RESTORATION GUIDE AT " + "https://github.com/mesosphere/" + "etcd-mesos/blob/master/docs/response.md") s.state = Immutable } case mesos.TaskState_TASK_STARTING: case mesos.TaskState_TASK_RUNNING: // We update data to ZK synchronously because it must happen // in-order. If we spun off a goroutine this would possibly retry // and succeed in the wrong order, and older data would win. // We keep this simple here, as if ZK is healthy this won't take long. // If this takes long, we're probably about to die anyway, as ZK is // displeased and mesos-go will panic when it loses contact. s.reconciliationInfo[status.TaskId.GetValue()] = status.SlaveId.GetValue() err = s.updateReconciliationInfoFunc( s.reconciliationInfo, s.ZkServers, s.ZkChroot, s.FrameworkName, ) if err != nil { log.Errorf("Failed to persist reconciliation info: %+v", err) } delete(s.pending, node.Name) _, present := s.running[node.Name] if !present { s.running[node.Name] = node s.tasks[node.Name] = status.TaskId } // During reconcilliation, we may find nodes with higher ID's due to ntp drift etcdIndexParts := strings.Split(node.Name, "-") if len(etcdIndexParts) != 2 { log.Warning("Task has a Name that does not follow the form etcd-<index>") } else { etcdIndex, err := strconv.ParseInt(etcdIndexParts[1], 10, 64) if err != nil { log.Warning("Task has a Name that does not follow the form etcd-<index>") } else { if etcdIndex > s.highestInstanceID { s.highestInstanceID = etcdIndex + 1 } } } default: log.Warningf("Received unhandled task state: %+v", status.GetState()) } }
func (k *inMemoryRegistry) handleTaskStaging(task *T, state StateType, status *mesos.TaskStatus) { if status.GetSource() != mesos.TaskStatus_SOURCE_MASTER { log.Errorf("received STAGING for task %v with unexpected source: %v", status.GetTaskId().GetValue(), status.GetSource()) } }
func (s *Scheduler) onTaskStarted(id string, status *mesos.TaskStatus) { if s.cluster.Exists(id) { task := s.cluster.Get(id) task.Data().State = TaskStateRunning } else { Logger.Infof("Got %s for unknown/stopped task, killing task %s", pretty.Status(status), status.GetTaskId().GetValue()) } }