func (k *inMemoryRegistry) UpdateStatus(status *mesos.TaskStatus) (*T, StateType) { taskId := status.GetTaskId().GetValue() k.rw.Lock() defer k.rw.Unlock() task, state := k._get(taskId) switch status.GetState() { case mesos.TaskState_TASK_STAGING: k.handleTaskStaging(task, state, status) case mesos.TaskState_TASK_STARTING: k.handleTaskStarting(task, state, status) case mesos.TaskState_TASK_RUNNING: k.handleTaskRunning(task, state, status) case mesos.TaskState_TASK_FINISHED: k.handleTaskFinished(task, state, status) case mesos.TaskState_TASK_FAILED: k.handleTaskFailed(task, state, status) case mesos.TaskState_TASK_ERROR: k.handleTaskError(task, state, status) case mesos.TaskState_TASK_KILLED: k.handleTaskKilled(task, state, status) case mesos.TaskState_TASK_LOST: k.handleTaskLost(task, state, status) default: log.Warningf("unhandled status update for task: %v", taskId) } return task.Clone(), state }
func (sched *MesosRunonceScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { log.V(1).Infoln("Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String()) eventCh <- status if status.GetState() == mesos.TaskState_TASK_FINISHED { sched.tasksFinished++ } if sched.tasksFinished >= sched.totalTasks { log.V(1).Infoln("Total tasks completed, stopping framework.") driver.Stop(false) } if status.GetState() == mesos.TaskState_TASK_LOST || status.GetState() == mesos.TaskState_TASK_KILLED || status.GetState() == mesos.TaskState_TASK_FAILED || status.GetState() == mesos.TaskState_TASK_ERROR { exitStatus = 1 log.Warningf("mesos TaskStatus: %v", status) driver.Stop(false) log.Errorln( "Aborting because task", status.TaskId.GetValue(), "is in unexpected state", status.State.String(), "with message.", status.GetMessage(), ) } }
func (sched *testScheduler) StatusUpdate(dr SchedulerDriver, stat *mesos.TaskStatus) { log.Infoln("Sched.StatusUpdate() called.") sched.s.NotNil(stat) sched.s.Equal("test-task-001", stat.GetTaskId().GetValue()) sched.wg.Done() log.Infof("Status update done with waitGroup") }
func (sched *SdcScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { log.Infoln("Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String()) if status.GetState() == mesos.TaskState_TASK_FINISHED { sched.tasksFinished++ // KillTaskを実行するとTASK_LOSTが検知され、フレームワークが止まる // driver.KillTask(status.TaskId) // log.Infoln("!! Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String()) // return } if sched.tasksFinished >= sched.totalTasks { // log.Infoln("Total tasks completed, stopping framework.") log.Infoln("Total tasks completed.") sched.tasksFinished = 0 sched.totalTasks = 0 sched.tasksLaunched = 0 // driver.Stop(false) } if status.GetState() == mesos.TaskState_TASK_LOST || status.GetState() == mesos.TaskState_TASK_KILLED || status.GetState() == mesos.TaskState_TASK_FAILED || status.GetState() == mesos.TaskState_TASK_ERROR { log.Infoln( "Aborting because task", status.TaskId.GetValue(), "is in unexpected state", status.State.String(), "with message", status.GetMessage(), ) driver.Abort() } }
func (sched *Scheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { log.Infoln("Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String()) if status.GetState() == mesos.TaskState_TASK_FINISHED { sched.tasksFinished++ log.Infoln("%v of %v tasks finished.", sched.tasksFinished, sched.totalTasks) } //TODO if a job is finished, failed, error, lost, killed // figure out how this impacts dependent jobs and update job graph /* //never shut down framework! if sched.tasksFinished >= sched.totalTasks { log.Infoln("Total tasks completed, stopping framework.") driver.Stop(false) } */ /* if status.GetState() == mesos.TaskState_TASK_LOST || status.GetState() == mesos.TaskState_TASK_KILLED || status.GetState() == mesos.TaskState_TASK_FAILED { log.Infoln( "Aborting because task", status.TaskId.GetValue(), "is in unexpected state", status.State.String(), "with message", status.GetMessage(), ) driver.Abort() } */ }
func (sched *ExampleScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { log.Infoln("Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String()) //if RunContainer finished, add if status.State.Enum().String() == "TASK_FINISHED" { labels := status.GetLabels() taskType, err := shared.GetValueFromLabels(labels, shared.Tags.TASK_TYPE) if err != nil { log.Infof("ERROR: Malformed task info, discarding task with status: %v", status) return } acceptedHost, err := shared.GetValueFromLabels(labels, shared.Tags.ACCEPTED_HOST) if err != nil { log.Infof("ERROR: Malformed task info, discarding task with status: %v", status) return } containerName, err := shared.GetValueFromLabels(labels, shared.Tags.CONTAINER_NAME) if err != nil { log.Infof("ERROR: Malformed task info, discarding task with status: %v", status) return } switch taskType { case shared.TaskTypes.RUN_CONTAINER: sched.ContainerSlaveMap[containerName] = acceptedHost break case shared.TaskTypes.CHECKPOINT_CONTAINER: delete(sched.ContainerSlaveMap, containerName) break case shared.TaskTypes.RESTORE_CONTAINER: sched.ContainerSlaveMap[containerName] = acceptedHost break } } }
func (sched *Scheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { log.Infoln("Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String()) if status.GetState() == mesos.TaskState_TASK_FINISHED { sched.tasksFinished++ log.Infoln("%v of %v tasks finished.", sched.tasksFinished, sched.totalTasks) } }
// StatusUpdate is called when a status update message is sent to the scheduler. func (k *KubernetesScheduler) StatusUpdate(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) { source, reason := "none", "none" if taskStatus.Source != nil { source = (*taskStatus.Source).String() } if taskStatus.Reason != nil { reason = (*taskStatus.Reason).String() } taskState := taskStatus.GetState() metrics.StatusUpdates.WithLabelValues(source, reason, taskState.String()).Inc() log.Infof( "task status update %q from %q for task %q on slave %q executor %q for reason %q", taskState.String(), source, taskStatus.TaskId.GetValue(), taskStatus.SlaveId.GetValue(), taskStatus.ExecutorId.GetValue(), reason) switch taskState { case mesos.TaskState_TASK_RUNNING, mesos.TaskState_TASK_FINISHED, mesos.TaskState_TASK_STARTING, mesos.TaskState_TASK_STAGING: if _, state := k.taskRegistry.UpdateStatus(taskStatus); state == podtask.StateUnknown { if taskState != mesos.TaskState_TASK_FINISHED { //TODO(jdef) what if I receive this after a TASK_LOST or TASK_KILLED? //I don't want to reincarnate then.. TASK_LOST is a special case because //the master is stateless and there are scenarios where I may get TASK_LOST //followed by TASK_RUNNING. //TODO(jdef) consider running this asynchronously since there are API server //calls that may be made k.reconcileNonTerminalTask(driver, taskStatus) } // else, we don't really care about FINISHED tasks that aren't registered return } if _, exists := k.slaves.getSlave(taskStatus.GetSlaveId().GetValue()); !exists { // a registered task has an update reported by a slave that we don't recognize. // this should never happen! So we don't reconcile it. log.Errorf("Ignore status %+v because the slave does not exist", taskStatus) return } case mesos.TaskState_TASK_FAILED: if task, _ := k.taskRegistry.UpdateStatus(taskStatus); task != nil { if task.Has(podtask.Launched) && !task.Has(podtask.Bound) { go k.plugin.reconcilePod(task.Pod) return } } else { // unknown task failed, not much we can do about it return } // last-ditch effort to reconcile our records fallthrough case mesos.TaskState_TASK_LOST, mesos.TaskState_TASK_KILLED: k.reconcileTerminalTask(driver, taskStatus) } }
func (r *Reconciler) Update(status *mesos.TaskStatus) { r.taskLock.Lock() defer r.taskLock.Unlock() delete(r.tasks, status.GetTaskId().GetValue()) if len(r.tasks) == 0 { r.reconciles = 0 } }
func (s *Scheduler) onTaskFailed(id string, status *mesos.TaskStatus) { if s.cluster.Exists(id) { task := s.cluster.Get(id) if task.Data().State != TaskStateInactive { task.Data().State = TaskStateStopped } } else { Logger.Infof("Got %s for unknown/stopped task %s", pretty.Status(status), status.GetTaskId().GetValue()) } }
func (sched *Scheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { if glog.V(1) { glog.Infof("status update from task %s in state %s under executor %s on slave %s: %s", status.GetTaskId().GetValue(), status.GetState(), status.GetExecutorId().GetValue(), status.GetSlaveId().GetValue(), status.GetMessage(), ) } }
func (s *Scheduler) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) { Logger.Infof("[StatusUpdate] %s", statusString(status)) slave := s.slaveFromTaskId(status.GetTaskId().GetValue()) if status.GetState() == mesos.TaskState_TASK_FAILED || status.GetState() == mesos.TaskState_TASK_KILLED || status.GetState() == mesos.TaskState_TASK_LOST || status.GetState() == mesos.TaskState_TASK_ERROR || status.GetState() == mesos.TaskState_TASK_FINISHED { s.cluster.Remove(slave) } }
func fillRunningPodInfo(task *T, taskStatus *mesos.TaskStatus) { if taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION && taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER { // there is no data.. return } //TODO(jdef) determine the usefullness of this information (if any) if result, err := ParsePodStatusResult(taskStatus); err != nil { log.Errorf("invalid TaskStatus.Data for task '%v': %v", task.ID, err) } else { task.podStatus = result.Status log.Infof("received pod status for task %v: %+v", task.ID, result.Status) } }
// StatusUpdate handles status updates messages received from Mesos master // // Currently this method only logs status updates. This might change in the future func (bw *BasicWorker) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) { taskId := status.TaskId.GetValue() taskStatus := status.GetState() log.Println("Task", taskId, "is in state", taskStatus.String()) switch taskStatus { case mesos.TaskState_TASK_RUNNING: log.Printf("Marking task %s as %s", taskId, taurus.RUNNING) case mesos.TaskState_TASK_KILLED, mesos.TaskState_TASK_FINISHED, mesos.TaskState_TASK_FAILED, mesos.TaskState_TASK_LOST: log.Printf("Marking task %s as %s", taskId, taurus.STOPPED) } }
func testErrorStatus(t *testing.T, ctx *RunOnceApplicationContext, driver *mesostest.MockSchedulerDriver, status *mesos.TaskStatus) { appDone := ctx.StatusUpdate(driver, status) So(appDone, ShouldBeTrue) select { case sts := <-ctx.StatusChan: So(sts.Error, ShouldNotBeNil) So(sts.Error.Error(), ShouldContainSubstring, "failed to run on host") default: t.Fail() } So(ctx.tasks[0].State, ShouldEqual, status.GetState()) }
func (s *StackDeployScheduler) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) { Logger.Info("[StatusUpdate] %s", pretty.Status(status)) if status.GetState() == mesos.TaskState_TASK_FINISHED { driver.ReviveOffers() } for _, runner := range MesosTaskRunners { if runner.StatusUpdate(driver, status) { return } } Logger.Warn("Received status update that was not handled by any Mesos Task Runner: %s", pretty.Status(status)) }
func (driver *MesosExecutorDriver) makeStatusUpdate(taskStatus *mesosproto.TaskStatus) *mesosproto.StatusUpdate { now := float64(time.Now().Unix()) // Fill in all the fields. taskStatus.Timestamp = proto.Float64(now) taskStatus.SlaveId = driver.slaveID update := &mesosproto.StatusUpdate{ FrameworkId: driver.frameworkID, ExecutorId: driver.executorID, SlaveId: driver.slaveID, Status: taskStatus, Timestamp: proto.Float64(now), Uuid: uuid.NewUUID(), } return update }
func (r *RunOnceRunner) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) bool { r.applicationLock.Lock() defer r.applicationLock.Unlock() applicationID := applicationIDFromTaskID(status.GetTaskId().GetValue()) ctx, exists := r.applications[applicationID] if !exists { // this status update was not for run once application, just let it go return false } if ctx.StatusUpdate(driver, status) { delete(r.applications, applicationID) } return true }
func (k *inMemoryRegistry) handleTaskFinished(task *T, state StateType, status *mesos.TaskStatus) { taskId := status.GetTaskId().GetValue() switch state { case StatePending: panic(fmt.Sprintf("Pending task %v finished, this couldn't happen", taskId)) case StateRunning: log.V(2).Infof("received finished status for running task: %v", taskId) delete(k.podToTask, task.podKey) task.State = StateFinished task.UpdatedTime = time.Now() k.tasksFinished = k.recordFinishedTask(task.ID) case StateFinished: log.Warningf("Ignore status TASK_FINISHED because the task %v is already finished", taskId) default: log.Warningf("Ignore status TASK_FINISHED because the task %v is not running", taskId) } }
func (k *inMemoryRegistry) handleTaskRunning(task *T, state StateType, status *mesos.TaskStatus) { taskId := status.GetTaskId().GetValue() switch state { case StatePending: task.UpdatedTime = time.Now() log.Infof("Received running status for pending task: %v", taskId) fillRunningPodInfo(task, status) task.State = StateRunning case StateRunning: task.UpdatedTime = time.Now() log.V(2).Infof("Ignore status TASK_RUNNING because the task %v is already running", taskId) case StateFinished: log.Warningf("Ignore status TASK_RUNNING because the task %v is already finished", taskId) default: log.Warningf("Ignore status TASK_RUNNING because the task %v is discarded", taskId) } }
func (k *inMemoryRegistry) handleTaskStarting(task *T, state StateType, status *mesos.TaskStatus) { // we expect to receive this when a launched task is finally "bound" // via the API server. however, there's nothing specific for us to do here. switch state { case StatePending: task.UpdatedTime = time.Now() if !task.Has(Bound) { task.Set(Bound) task.bindTime = task.UpdatedTime timeToBind := task.bindTime.Sub(task.launchTime) metrics.BindLatency.Observe(metrics.InMicroseconds(timeToBind)) } default: taskId := status.GetTaskId().GetValue() log.Warningf("Ignore status TASK_STARTING because the task %v is not pending", taskId) } }
func (s *MinerScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { log.Infoln("Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String()) // If the mining server failed, kill all daemons, since they will be trying to talk to the failed mining server if strings.Contains(status.GetTaskId().GetValue(), "server") { s.minerServerRunning = false // kill all tasks statuses := make([]*mesos.TaskStatus, 0) _, err := driver.ReconcileTasks(statuses) if err != nil { panic(err) } for _, status := range statuses { driver.KillTask(status.TaskId) } } }
func (s *Scheduler) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) { Logger.Infof("[StatusUpdate] %s", pretty.Status(status)) id := s.idFromTaskId(status.GetTaskId().GetValue()) switch status.GetState() { case mesos.TaskState_TASK_RUNNING: s.onTaskStarted(id, status) case mesos.TaskState_TASK_LOST, mesos.TaskState_TASK_FAILED, mesos.TaskState_TASK_ERROR: s.onTaskFailed(id, status) case mesos.TaskState_TASK_FINISHED, mesos.TaskState_TASK_KILLED: s.onTaskFinished(id, status) default: Logger.Warnf("Got unexpected task state %s for task %s", pretty.Status(status), id) } s.cluster.Save() }
// SendStatusUpdate sends status updates to the slave. func (driver *MesosExecutorDriver) SendStatusUpdate(taskStatus *mesosproto.TaskStatus) (mesosproto.Status, error) { log.V(3).Infoln("Sending task status update: ", taskStatus.String()) if stat := driver.Status(); stat != mesosproto.Status_DRIVER_RUNNING { return stat, fmt.Errorf("Unable to SendStatusUpdate, expecting driver.status %s, but got %s", mesosproto.Status_DRIVER_RUNNING, stat) } if taskStatus.GetState() == mesosproto.TaskState_TASK_STAGING { err := fmt.Errorf("Executor is not allowed to send TASK_STAGING status update. Aborting!") log.Errorln(err) if err0 := driver.stop(mesosproto.Status_DRIVER_ABORTED); err0 != nil { log.Errorln("Error while stopping the driver", err0) } return driver.Status(), err } // Set up status update. update := driver.makeStatusUpdate(taskStatus) log.Infof("Executor sending status update %v\n", update.String()) // Capture the status update. driver.lock.Lock() driver.updates[uuid.UUID(update.GetUuid()).String()] = update driver.lock.Unlock() // Put the status update in the message. message := &mesosproto.StatusUpdateMessage{ Update: update, Pid: proto.String(driver.self.String()), } // Send the message. if err := driver.send(driver.slaveUPID, message); err != nil { log.Errorf("Failed to send %v: %v\n", message, err) return driver.status, err } return driver.Status(), nil }
// mesos.Scheduler interface method. // Invoked when the status of a task has changed. func (this *TransformScheduler) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) { fmt.Printf("Status update: task %s is in state %s\n", status.TaskId.GetValue(), status.State.Enum().String()) if status.GetState() == mesos.TaskState_TASK_LOST || status.GetState() == mesos.TaskState_TASK_FAILED || status.GetState() == mesos.TaskState_TASK_FINISHED { this.removeTask(status.GetTaskId()) this.decRunningInstances() } }
// mesos.Scheduler interface method. // Invoked when the status of a task has changed. func (this *ElodinaTransportScheduler) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) { if *status.GetState().Enum() == mesos.TaskState_TASK_RUNNING { this.taskIdToTaskState[*status.TaskId.Value].pending = true } else if status.GetState() == mesos.TaskState_TASK_LOST || status.GetState() == mesos.TaskState_TASK_FAILED || status.GetState() == mesos.TaskState_TASK_FINISHED { this.TakenTopicPartitions.RemoveAll(this.taskIdToTaskState[*status.TaskId.Value].GetAssignment()) delete(this.taskIdToTaskState, *status.TaskId.Value) } }
func (sched *ExampleScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { log.Infoln("Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String()) if status.GetState() == mesos.TaskState_TASK_FINISHED { sched.tasksFinished++ } if sched.tasksFinished >= sched.totalTasks { log.Infoln("Total tasks completed, stopping framework.") driver.Stop(false) } if status.GetState() == mesos.TaskState_TASK_LOST || status.GetState() == mesos.TaskState_TASK_KILLED || status.GetState() == mesos.TaskState_TASK_FAILED || status.GetState() == mesos.TaskState_TASK_ERROR { log.Infoln( "Aborting because task", status.TaskId.GetValue(), "is in unexpected state", status.State.String(), "with message", status.GetMessage(), ) driver.Abort() } }
// mesos.Scheduler interface method. // Invoked when the status of a task has changed. func (this *ElodinaTransportScheduler) StatusUpdate(driver scheduler.SchedulerDriver, status *mesos.TaskStatus) { log.Logger.Info("Received status %s for task %s", status.GetState().Enum(), status.TaskId.GetValue()) if status.GetState() == mesos.TaskState_TASK_RUNNING { this.taskIdToTaskState[status.TaskId.GetValue()].pending = true } else if isTerminated(status.GetState()) { this.TakenTopicPartitions.RemoveAll(this.taskIdToTaskState[status.TaskId.GetValue()].GetAssignment()) delete(this.taskIdToTaskState, status.TaskId.GetValue()) } }
// StatusUpdate method func (c *Cluster) StatusUpdate(_ mesosscheduler.SchedulerDriver, taskStatus *mesosproto.TaskStatus) { log.WithFields(log.Fields{"name": "mesos", "state": taskStatus.State.String()}).Debug("Status update") taskID := taskStatus.TaskId.GetValue() slaveID := taskStatus.SlaveId.GetValue() s, ok := c.slaves[slaveID] if !ok { return } if task, ok := s.tasks[taskID]; ok { task.sendStatus(taskStatus) } else { var reason = "" if taskStatus.Reason != nil { reason = taskStatus.GetReason().String() } log.WithFields(log.Fields{ "name": "mesos", "state": taskStatus.State.String(), "slaveId": taskStatus.SlaveId.GetValue(), "reason": reason, }).Warn("Status update received for unknown slave") } }
// Will obtain the log file you desire from the mesos-agent and react to the status of the message accordingly. func FetchLogs(status *mesos.TaskStatus, offset int, file string, frameworkId string) ([]byte, error) { var ( dir string hostname string err error ) switch status.GetState() { case mesos.TaskState_TASK_FAILED, mesos.TaskState_TASK_KILLED: hostDir, err := hostDirFromState(status, frameworkId) if err != nil { return nil, err } hostname, dir = hostDir.Host, hostDir.Dir default: hostDir, err := hostDirFromTaskStatus(status) if err != nil { return nil, err } hostname, dir = hostDir.Host, hostDir.Dir } url := fmt.Sprintf("http://%s:5051/files/read.json?path=%s/%s&offset=%d", hostname, dir, file, offset) bodyData, err := fetchUrl(url) if err != nil { return nil, err } var logData LogData err = json.Unmarshal(bodyData, &logData) if err != nil { return nil, err } return []byte(logData.Data), nil }