func fillRunningPodInfo(task *T, taskStatus *mesos.TaskStatus) { if taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION && taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER { // there is no data.. return } //TODO(jdef) determine the usefullness of this information (if any) if result, err := ParsePodStatusResult(taskStatus); err != nil { log.Errorf("invalid TaskStatus.Data for task '%v': %v", task.ID, err) } else { task.podStatus = result.Status log.Infof("received pod status for task %v: %+v", task.ID, result.Status) } }
func statusString(status *mesos.TaskStatus) string { s := fmt.Sprintf("%s %s slave: %s", status.GetTaskId().GetValue(), status.GetState().String(), idString(status.GetSlaveId().GetValue())) if status.GetState() != mesos.TaskState_TASK_RUNNING { s += " reason: " + status.GetReason().String() } if status.GetMessage() != "" { s += " message: " + status.GetMessage() } return s }
func Status(status *mesos.TaskStatus) string { var buffer bytes.Buffer buffer.WriteString(fmt.Sprintf("%s %s", status.GetTaskId().GetValue(), status.GetState().String())) if status.GetSlaveId() != nil && status.GetSlaveId().GetValue() != "" { buffer.WriteString(" slave: ") buffer.WriteString(ID(status.GetSlaveId().GetValue())) } if status.GetState() != mesos.TaskState_TASK_RUNNING { buffer.WriteString(" reason: ") buffer.WriteString(status.GetReason().String()) } if status.GetMessage() != "" { buffer.WriteString(" message: ") buffer.WriteString(status.GetMessage()) } return buffer.String() }
// StatusUpdate method func (c *Cluster) StatusUpdate(_ mesosscheduler.SchedulerDriver, taskStatus *mesosproto.TaskStatus) { log.WithFields(log.Fields{"name": "mesos", "state": taskStatus.State.String()}).Debug("Status update") taskID := taskStatus.TaskId.GetValue() slaveID := taskStatus.SlaveId.GetValue() s, ok := c.slaves[slaveID] if !ok { return } if task, ok := s.tasks[taskID]; ok { task.sendStatus(taskStatus) } else { var reason = "" if taskStatus.Reason != nil { reason = taskStatus.GetReason().String() } log.WithFields(log.Fields{ "name": "mesos", "state": taskStatus.State.String(), "slaveId": taskStatus.SlaveId.GetValue(), "reason": reason, }).Warn("Status update received for unknown slave") } }
func (k *framework) reconcileTerminalTask(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) { task, state := k.sched.Tasks().UpdateStatus(taskStatus) if (state == podtask.StateRunning || state == podtask.StatePending) && ((taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER && taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION) || (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED) || (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED) || (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.ContainersDisappeared) || (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.KubeletPodLaunchFailed) || (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.TaskKilled && !task.Has(podtask.Deleted))) { //-- // pod-task has metadata that refers to: // (1) a task that Mesos no longer knows about, or else // (2) a pod that the Kubelet will never report as "failed" // (3) a pod that the kubeletExecutor reported as lost (likely due to docker daemon crash/restart) // (4) a pod that the kubeletExecutor reported as lost because the kubelet didn't manage to launch it (in time) // (5) a pod that the kubeletExecutor killed, but the scheduler didn't ask for that (maybe killed by the master) // For now, destroy the pod and hope that there's a replication controller backing it up. // TODO(jdef) for case #2 don't delete the pod, just update it's status to Failed pod := &task.Pod log.Warningf("deleting rogue pod %v/%v for lost task %v", pod.Namespace, pod.Name, task.ID) if err := k.client.Core().Pods(pod.Namespace).Delete(pod.Name, api.NewDeleteOptions(0)); err != nil && !errors.IsNotFound(err) { log.Errorf("failed to delete pod %v/%v for terminal task %v: %v", pod.Namespace, pod.Name, task.ID, err) } } else if taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED || taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED { // attempt to prevent dangling pods in the pod and task registries log.V(1).Infof("request explicit reconciliation to clean up for task %v after executor reported (terminated/unregistered)", taskStatus.TaskId.GetValue()) k.tasksReconciler.RequestExplicit() } else if taskStatus.GetState() == mesos.TaskState_TASK_LOST && state == podtask.StateRunning && taskStatus.ExecutorId != nil && taskStatus.SlaveId != nil { //TODO(jdef) this may not be meaningful once we have proper checkpointing and master detection //If we're reconciling and receive this then the executor may be //running a task that we need it to kill. It's possible that the framework //is unrecognized by the master at this point, so KillTask is not guaranteed //to do anything. The underlying driver transport may be able to send a //FrameworkMessage directly to the slave to terminate the task. log.V(2).Info("forwarding TASK_LOST message to executor %v on slave %v", taskStatus.ExecutorId, taskStatus.SlaveId) data := fmt.Sprintf("%s:%s", messages.TaskLost, task.ID) //TODO(jdef) use a real message type if _, err := driver.SendFrameworkMessage(taskStatus.ExecutorId, taskStatus.SlaveId, data); err != nil { log.Error(err.Error()) } } }
// reconcile an unknown (from the perspective of our registry) non-terminal task func (k *KubernetesScheduler) reconcileNonTerminalTask(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) { // attempt to recover task from pod info: // - task data may contain an api.PodStatusResult; if status.reason == REASON_RECONCILIATION then status.data == nil // - the Name can be parsed by container.ParseFullName() to yield a pod Name and Namespace // - pull the pod metadata down from the api server // - perform task recovery based on pod metadata taskId := taskStatus.TaskId.GetValue() if taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION && taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER { // there will be no data in the task status that we can use to determine the associated pod switch taskStatus.GetState() { case mesos.TaskState_TASK_STAGING: // there is still hope for this task, don't kill it just yet //TODO(jdef) there should probably be a limit for how long we tolerate tasks stuck in this state return default: // for TASK_{STARTING,RUNNING} we should have already attempted to recoverTasks() for. // if the scheduler failed over before the executor fired TASK_STARTING, then we should *not* // be processing this reconciliation update before we process the one from the executor. // point: we don't know what this task is (perhaps there was unrecoverable metadata in the pod), // so it gets killed. log.Errorf("killing non-terminal, unrecoverable task %v", taskId) } } else if podStatus, err := podtask.ParsePodStatusResult(taskStatus); err != nil { // possible rogue pod exists at this point because we can't identify it; should kill the task log.Errorf("possible rogue pod; illegal task status data for task %v, expected an api.PodStatusResult: %v", taskId, err) } else if name, namespace, err := container.ParsePodFullName(podStatus.Name); err != nil { // possible rogue pod exists at this point because we can't identify it; should kill the task log.Errorf("possible rogue pod; illegal api.PodStatusResult, unable to parse full pod name from: '%v' for task %v: %v", podStatus.Name, taskId, err) } else if pod, err := k.client.Pods(namespace).Get(name); err == nil { if t, ok, err := podtask.RecoverFrom(*pod); ok { log.Infof("recovered task %v from metadata in pod %v/%v", taskId, namespace, name) _, err := k.taskRegistry.Register(t, nil) if err != nil { // someone beat us to it?! log.Warningf("failed to register recovered task: %v", err) return } else { k.taskRegistry.UpdateStatus(taskStatus) } return } else if err != nil { //should kill the pod and the task log.Errorf("killing pod, failed to recover task from pod %v/%v: %v", namespace, name, err) if err := k.client.Pods(namespace).Delete(name, nil); err != nil { log.Errorf("failed to delete pod %v/%v: %v", namespace, name, err) } } else { //this is pretty unexpected: we received a TASK_{STARTING,RUNNING} message, but the apiserver's pod //metadata is not appropriate for task reconstruction -- which should almost certainly never //be the case unless someone swapped out the pod on us (and kept the same namespace/name) while //we were failed over. //kill this task, allow the newly launched scheduler to schedule the new pod log.Warningf("unexpected pod metadata for task %v in apiserver, assuming new unscheduled pod spec: %+v", taskId, pod) } } else if errors.IsNotFound(err) { // pod lookup failed, should delete the task since the pod is no longer valid; may be redundant, that's ok log.Infof("killing task %v since pod %v/%v no longer exists", taskId, namespace, name) } else if errors.IsServerTimeout(err) { log.V(2).Infof("failed to reconcile task due to API server timeout: %v", err) return } else { log.Errorf("unexpected API server error, aborting reconcile for task %v: %v", taskId, err) return } if _, err := driver.KillTask(taskStatus.TaskId); err != nil { log.Errorf("failed to kill task %v: %v", taskId, err) } }
func (s *EtcdScheduler) StatusUpdate( driver scheduler.SchedulerDriver, status *mesos.TaskStatus, ) { s.mut.Lock() defer s.mut.Unlock() log.Infoln( "Status update: task", status.TaskId.GetValue(), " is in state ", status.State.Enum().String(), ) node, err := config.Parse(status.GetTaskId().GetValue()) if err != nil { log.Errorf("scheduler: failed to unmarshal config.Node from TaskId: %s", err) return } node.SlaveID = status.SlaveId.GetValue() // record that we've heard about this task s.heardFrom[status.GetTaskId().GetValue()] = struct{}{} switch status.GetState() { case mesos.TaskState_TASK_LOST, mesos.TaskState_TASK_FINISHED, mesos.TaskState_TASK_KILLED, mesos.TaskState_TASK_ERROR, mesos.TaskState_TASK_FAILED: log.Errorf("Task contraction: %+v", status.GetState()) log.Errorf("message: %s", status.GetMessage()) log.Errorf("reason: %+v", status.GetReason()) atomic.AddUint32(&s.Stats.FailedServers, 1) // TODO(tyler) kill this // Pump the brakes so that we have time to deconfigure the lost node // before adding a new one. If we don't deconfigure first, we risk // split brain. s.PumpTheBrakes() // now we know this task is dead delete(s.pending, node.Name) delete(s.running, node.Name) delete(s.tasks, node.Name) // We don't have to clean up the state in ZK for this // as it is fine to eventually just persist when we // receive a new TASK_RUNNING below. delete(s.reconciliationInfo, status.TaskId.GetValue()) s.QueueLaunchAttempt() // TODO(tyler) do we want to lock if the first task fails? // TODO(tyler) can we handle a total loss at reconciliation time, // when s.state == Immutable? if len(s.running) == 0 && s.state == Mutable { log.Error("TOTAL CLUSTER LOSS! LOCKING SCHEDULER, " + "FOLLOW RESTORATION GUIDE AT " + "https://github.com/mesosphere/" + "etcd-mesos/blob/master/docs/response.md") s.state = Immutable } case mesos.TaskState_TASK_STARTING: case mesos.TaskState_TASK_RUNNING: // We update data to ZK synchronously because it must happen // in-order. If we spun off a goroutine this would possibly retry // and succeed in the wrong order, and older data would win. // We keep this simple here, as if ZK is healthy this won't take long. // If this takes long, we're probably about to die anyway, as ZK is // displeased and mesos-go will panic when it loses contact. s.reconciliationInfo[status.TaskId.GetValue()] = status.SlaveId.GetValue() err = s.updateReconciliationInfoFunc( s.reconciliationInfo, s.ZkServers, s.ZkChroot, s.FrameworkName, ) if err != nil { log.Errorf("Failed to persist reconciliation info: %+v", err) } delete(s.pending, node.Name) _, present := s.running[node.Name] if !present { s.running[node.Name] = node s.tasks[node.Name] = status.TaskId } // During reconcilliation, we may find nodes with higher ID's due to ntp drift etcdIndexParts := strings.Split(node.Name, "-") if len(etcdIndexParts) != 2 { log.Warning("Task has a Name that does not follow the form etcd-<index>") } else { etcdIndex, err := strconv.ParseInt(etcdIndexParts[1], 10, 64) if err != nil { log.Warning("Task has a Name that does not follow the form etcd-<index>") } else { if etcdIndex > s.highestInstanceID { s.highestInstanceID = etcdIndex + 1 } } } default: log.Warningf("Received unhandled task state: %+v", status.GetState()) } }