func fillRunningPodInfo(task *T, taskStatus *mesos.TaskStatus) { if taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION && taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER { // there is no data.. return } //TODO(jdef) determine the usefullness of this information (if any) if result, err := ParsePodStatusResult(taskStatus); err != nil { log.Errorf("invalid TaskStatus.Data for task '%v': %v", task.ID, err) } else { task.podStatus = result.Status log.Infof("received pod status for task %v: %+v", task.ID, result.Status) } }
func (k *framework) reconcileTerminalTask(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) { task, state := k.sched.Tasks().UpdateStatus(taskStatus) if (state == podtask.StateRunning || state == podtask.StatePending) && ((taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER && taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION) || (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED) || (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED) || (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_EXECUTOR && taskStatus.GetMessage() == messages.ContainersDisappeared)) { //-- // pod-task has metadata that refers to: // (1) a task that Mesos no longer knows about, or else // (2) a pod that the Kubelet will never report as "failed" // (3) a pod that the kubeletExecutor reported as lost (likely due to docker daemon crash/restart) // For now, destroy the pod and hope that there's a replication controller backing it up. // TODO(jdef) for case #2 don't delete the pod, just update it's status to Failed pod := &task.Pod log.Warningf("deleting rogue pod %v/%v for lost task %v", pod.Namespace, pod.Name, task.ID) if err := k.client.Pods(pod.Namespace).Delete(pod.Name, api.NewDeleteOptions(0)); err != nil && !errors.IsNotFound(err) { log.Errorf("failed to delete pod %v/%v for terminal task %v: %v", pod.Namespace, pod.Name, task.ID, err) } } else if taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED || taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED { // attempt to prevent dangling pods in the pod and task registries log.V(1).Infof("request explicit reconciliation to clean up for task %v after executor reported (terminated/unregistered)", taskStatus.TaskId.GetValue()) k.tasksReconciler.RequestExplicit() } else if taskStatus.GetState() == mesos.TaskState_TASK_LOST && state == podtask.StateRunning && taskStatus.ExecutorId != nil && taskStatus.SlaveId != nil { //TODO(jdef) this may not be meaningful once we have proper checkpointing and master detection //If we're reconciling and receive this then the executor may be //running a task that we need it to kill. It's possible that the framework //is unrecognized by the master at this point, so KillTask is not guaranteed //to do anything. The underlying driver transport may be able to send a //FrameworkMessage directly to the slave to terminate the task. log.V(2).Info("forwarding TASK_LOST message to executor %v on slave %v", taskStatus.ExecutorId, taskStatus.SlaveId) data := fmt.Sprintf("%s:%s", messages.TaskLost, task.ID) //TODO(jdef) use a real message type if _, err := driver.SendFrameworkMessage(taskStatus.ExecutorId, taskStatus.SlaveId, data); err != nil { log.Error(err.Error()) } } }
// reconcile an unknown (from the perspective of our registry) non-terminal task func (k *KubernetesScheduler) reconcileNonTerminalTask(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) { // attempt to recover task from pod info: // - task data may contain an api.PodStatusResult; if status.reason == REASON_RECONCILIATION then status.data == nil // - the Name can be parsed by container.ParseFullName() to yield a pod Name and Namespace // - pull the pod metadata down from the api server // - perform task recovery based on pod metadata taskId := taskStatus.TaskId.GetValue() if taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION && taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER { // there will be no data in the task status that we can use to determine the associated pod switch taskStatus.GetState() { case mesos.TaskState_TASK_STAGING: // there is still hope for this task, don't kill it just yet //TODO(jdef) there should probably be a limit for how long we tolerate tasks stuck in this state return default: // for TASK_{STARTING,RUNNING} we should have already attempted to recoverTasks() for. // if the scheduler failed over before the executor fired TASK_STARTING, then we should *not* // be processing this reconciliation update before we process the one from the executor. // point: we don't know what this task is (perhaps there was unrecoverable metadata in the pod), // so it gets killed. log.Errorf("killing non-terminal, unrecoverable task %v", taskId) } } else if podStatus, err := podtask.ParsePodStatusResult(taskStatus); err != nil { // possible rogue pod exists at this point because we can't identify it; should kill the task log.Errorf("possible rogue pod; illegal task status data for task %v, expected an api.PodStatusResult: %v", taskId, err) } else if name, namespace, err := container.ParsePodFullName(podStatus.Name); err != nil { // possible rogue pod exists at this point because we can't identify it; should kill the task log.Errorf("possible rogue pod; illegal api.PodStatusResult, unable to parse full pod name from: '%v' for task %v: %v", podStatus.Name, taskId, err) } else if pod, err := k.client.Pods(namespace).Get(name); err == nil { if t, ok, err := podtask.RecoverFrom(*pod); ok { log.Infof("recovered task %v from metadata in pod %v/%v", taskId, namespace, name) _, err := k.taskRegistry.Register(t, nil) if err != nil { // someone beat us to it?! log.Warningf("failed to register recovered task: %v", err) return } else { k.taskRegistry.UpdateStatus(taskStatus) } return } else if err != nil { //should kill the pod and the task log.Errorf("killing pod, failed to recover task from pod %v/%v: %v", namespace, name, err) if err := k.client.Pods(namespace).Delete(name, nil); err != nil { log.Errorf("failed to delete pod %v/%v: %v", namespace, name, err) } } else { //this is pretty unexpected: we received a TASK_{STARTING,RUNNING} message, but the apiserver's pod //metadata is not appropriate for task reconstruction -- which should almost certainly never //be the case unless someone swapped out the pod on us (and kept the same namespace/name) while //we were failed over. //kill this task, allow the newly launched scheduler to schedule the new pod log.Warningf("unexpected pod metadata for task %v in apiserver, assuming new unscheduled pod spec: %+v", taskId, pod) } } else if errors.IsNotFound(err) { // pod lookup failed, should delete the task since the pod is no longer valid; may be redundant, that's ok log.Infof("killing task %v since pod %v/%v no longer exists", taskId, namespace, name) } else if errors.IsServerTimeout(err) { log.V(2).Infof("failed to reconcile task due to API server timeout: %v", err) return } else { log.Errorf("unexpected API server error, aborting reconcile for task %v: %v", taskId, err) return } if _, err := driver.KillTask(taskStatus.TaskId); err != nil { log.Errorf("failed to kill task %v: %v", taskId, err) } }
func (k *inMemoryRegistry) handleTaskStaging(task *T, state StateType, status *mesos.TaskStatus) { if status.GetSource() != mesos.TaskStatus_SOURCE_MASTER { log.Errorf("received STAGING for task %v with unexpected source: %v", status.GetTaskId().GetValue(), status.GetSource()) } }