// validateContainerLogStatus returns the container ID for the desired container to retrieve logs for, based on the state // of the container. The previous flag will only return the logs for the last terminated container, otherwise, the current // running container is preferred over a previous termination. If info about the container is not available then a specific // error is returned to the end user. func (kl *Kubelet) validateContainerLogStatus(podName string, podStatus *api.PodStatus, containerName string, previous bool) (containerID kubecontainer.ContainerID, err error) { var cID string cStatus, found := api.GetContainerStatus(podStatus.ContainerStatuses, containerName) // if not found, check the init containers if !found { cStatus, found = api.GetContainerStatus(podStatus.InitContainerStatuses, containerName) } if !found { return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is not available", containerName, podName) } lastState := cStatus.LastTerminationState waiting, running, terminated := cStatus.State.Waiting, cStatus.State.Running, cStatus.State.Terminated switch { case previous: if lastState.Terminated == nil { return kubecontainer.ContainerID{}, fmt.Errorf("previous terminated container %q in pod %q not found", containerName, podName) } cID = lastState.Terminated.ContainerID case running != nil: cID = cStatus.ContainerID case terminated != nil: cID = terminated.ContainerID case lastState.Terminated != nil: cID = lastState.Terminated.ContainerID case waiting != nil: // output some info for the most common pending failures switch reason := waiting.Reason; reason { case images.ErrImagePull.Error(): return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start: image can't be pulled", containerName, podName) case images.ErrImagePullBackOff.Error(): return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start: trying and failing to pull image", containerName, podName) default: return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start: %v", containerName, podName, reason) } default: // unrecognized state return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start - no logs yet", containerName, podName) } return kubecontainer.ParseContainerID(cID), nil }
// doProbe probes the container once and records the result. // Returns whether the worker should continue. func (w *worker) doProbe() (keepGoing bool) { defer util.HandleCrash(func(_ interface{}) { keepGoing = true }) status, ok := w.probeManager.statusManager.GetPodStatus(w.pod.UID) if !ok { // Either the pod has not been created yet, or it was already deleted. glog.V(3).Infof("No status for pod: %v", kubeletutil.FormatPodName(w.pod)) return true } // Worker should terminate if pod is terminated. if status.Phase == api.PodFailed || status.Phase == api.PodSucceeded { glog.V(3).Infof("Pod %v %v, exiting probe worker", kubeletutil.FormatPodName(w.pod), status.Phase) return false } c, ok := api.GetContainerStatus(status.ContainerStatuses, w.container.Name) if !ok { // Either the container has not been created yet, or it was deleted. glog.V(3).Infof("Non-existant container probed: %v - %v", kubeletutil.FormatPodName(w.pod), w.container.Name) return true // Wait for more information. } if w.containerID.String() != c.ContainerID { if !w.containerID.IsEmpty() { w.resultsManager.Remove(w.containerID) } w.containerID = kubecontainer.ParseContainerID(c.ContainerID) } if c.State.Running == nil { glog.V(3).Infof("Non-running container probed: %v - %v", kubeletutil.FormatPodName(w.pod), w.container.Name) if !w.containerID.IsEmpty() { w.resultsManager.Set(w.containerID, results.Failure, w.pod) } // Abort if the container will not be restarted. return c.State.Terminated == nil || w.pod.Spec.RestartPolicy != api.RestartPolicyNever } if int64(time.Since(c.State.Running.StartedAt.Time).Seconds()) < w.spec.InitialDelaySeconds { w.resultsManager.Set(w.containerID, w.initialValue, w.pod) return true } // TODO: Move error handling out of prober. result, _ := w.probeManager.prober.probe(w.probeType, w.pod, status, w.container, w.containerID) if result != probe.Unknown { w.resultsManager.Set(w.containerID, result != probe.Failure, w.pod) } return true }
// doProbe probes the container once and records the result. // Returns whether the worker should continue. func doProbe(m *manager, w *worker) (keepGoing bool) { defer util.HandleCrash(func(_ interface{}) { keepGoing = true }) status, ok := m.statusManager.GetPodStatus(w.pod.UID) if !ok { // Either the pod has not been created yet, or it was already deleted. glog.V(3).Infof("No status for pod: %v", kubeutil.FormatPodName(w.pod)) return true } // Worker should terminate if pod is terminated. if status.Phase == api.PodFailed || status.Phase == api.PodSucceeded { glog.V(3).Infof("Pod %v %v, exiting probe worker", kubeutil.FormatPodName(w.pod), status.Phase) return false } c, ok := api.GetContainerStatus(status.ContainerStatuses, w.container.Name) if !ok { // Either the container has not been created yet, or it was deleted. glog.V(3).Infof("Non-existant container probed: %v - %v", kubeutil.FormatPodName(w.pod), w.container.Name) return true // Wait for more information. } if w.containerID != types.UID(c.ContainerID) { if w.containerID != "" { m.readinessCache.removeReadiness(string(w.containerID)) } w.containerID = types.UID(kubecontainer.TrimRuntimePrefix(c.ContainerID)) } if c.State.Running == nil { glog.V(3).Infof("Non-running container probed: %v - %v", kubeutil.FormatPodName(w.pod), w.container.Name) m.readinessCache.setReadiness(string(w.containerID), false) // Abort if the container will not be restarted. return c.State.Terminated == nil || w.pod.Spec.RestartPolicy != api.RestartPolicyNever } if int64(time.Since(c.State.Running.StartedAt.Time).Seconds()) < w.spec.InitialDelaySeconds { // Readiness defaults to false during the initial delay. m.readinessCache.setReadiness(string(w.containerID), false) return true } // TODO: Move error handling out of prober. result, _ := m.prober.ProbeReadiness(w.pod, status, w.container, string(w.containerID)) if result != probe.Unknown { m.readinessCache.setReadiness(string(w.containerID), result != probe.Failure) } return true }
// GeneratePodInitializedCondition returns initialized condition if all init containers in a pod are ready, else it // returns an uninitialized condition. func GeneratePodInitializedCondition(spec *api.PodSpec, containerStatuses []api.ContainerStatus, podPhase api.PodPhase) api.PodCondition { // Find if all containers are ready or not. if containerStatuses == nil && len(spec.InitContainers) > 0 { return api.PodCondition{ Type: api.PodInitialized, Status: api.ConditionFalse, Reason: "UnknownContainerStatuses", } } unknownContainers := []string{} unreadyContainers := []string{} for _, container := range spec.InitContainers { if containerStatus, ok := api.GetContainerStatus(containerStatuses, container.Name); ok { if !containerStatus.Ready { unreadyContainers = append(unreadyContainers, container.Name) } } else { unknownContainers = append(unknownContainers, container.Name) } } // If all init containers are known and succeeded, just return PodCompleted. if podPhase == api.PodSucceeded && len(unknownContainers) == 0 { return api.PodCondition{ Type: api.PodInitialized, Status: api.ConditionTrue, Reason: "PodCompleted", } } unreadyMessages := []string{} if len(unknownContainers) > 0 { unreadyMessages = append(unreadyMessages, fmt.Sprintf("containers with unknown status: %s", unknownContainers)) } if len(unreadyContainers) > 0 { unreadyMessages = append(unreadyMessages, fmt.Sprintf("containers with incomplete status: %s", unreadyContainers)) } unreadyMessage := strings.Join(unreadyMessages, ", ") if unreadyMessage != "" { return api.PodCondition{ Type: api.PodInitialized, Status: api.ConditionFalse, Reason: "ContainersNotInitialized", Message: unreadyMessage, } } return api.PodCondition{ Type: api.PodInitialized, Status: api.ConditionTrue, } }
// waitForPodSuccessInNamespace returns nil if the pod reached state success, or an error if it reached failure or ran too long. func waitForPodSuccessInNamespace(c *client.Client, podName string, contName string, namespace string) error { return waitForPodCondition(c, namespace, podName, "success or failure", podStartTimeout, func(pod *api.Pod) (bool, error) { // Cannot use pod.Status.Phase == api.PodSucceeded/api.PodFailed due to #2632 ci, ok := api.GetContainerStatus(pod.Status.ContainerStatuses, contName) if !ok { e2e.Logf("No Status.Info for container '%s' in pod '%s' yet", contName, podName) } else { if ci.State.Terminated != nil { if ci.State.Terminated.ExitCode == 0 { By("Saw pod success") return true, nil } return true, fmt.Errorf("pod '%s' terminated with failure: %+v", podName, ci.State.Terminated) } e2e.Logf("Nil State.Terminated for container '%s' in pod '%s' in namespace '%s' so far", contName, podName, namespace) } return false, nil }) }
func getRestartDelay(podClient *framework.PodClient, podName string, containerName string) (time.Duration, error) { beginTime := time.Now() for time.Since(beginTime) < (2 * maxBackOffTolerance) { // may just miss the 1st MaxContainerBackOff delay time.Sleep(time.Second) pod, err := podClient.Get(podName) framework.ExpectNoError(err, fmt.Sprintf("getting pod %s", podName)) status, ok := api.GetContainerStatus(pod.Status.ContainerStatuses, containerName) if !ok { framework.Logf("getRestartDelay: status missing") continue } if status.State.Waiting == nil && status.State.Running != nil && status.LastTerminationState.Terminated != nil && status.State.Running.StartedAt.Time.After(beginTime) { startedAt := status.State.Running.StartedAt.Time finishedAt := status.LastTerminationState.Terminated.FinishedAt.Time framework.Logf("getRestartDelay: restartCount = %d, finishedAt=%s restartedAt=%s (%s)", status.RestartCount, finishedAt, startedAt, startedAt.Sub(finishedAt)) return startedAt.Sub(finishedAt), nil } } return 0, fmt.Errorf("timeout getting pod restart delay") }
// doProbe probes the container once and records the result. // Returns whether the worker should continue. func (w *worker) doProbe() (keepGoing bool) { defer runtime.HandleCrash(func(_ interface{}) { keepGoing = true }) status, ok := w.probeManager.statusManager.GetPodStatus(w.pod.UID) if !ok { // Either the pod has not been created yet, or it was already deleted. glog.V(3).Infof("No status for pod: %v", format.Pod(w.pod)) return true } // Worker should terminate if pod is terminated. if status.Phase == api.PodFailed || status.Phase == api.PodSucceeded { glog.V(3).Infof("Pod %v %v, exiting probe worker", format.Pod(w.pod), status.Phase) return false } c, ok := api.GetContainerStatus(status.ContainerStatuses, w.container.Name) if !ok || len(c.ContainerID) == 0 { // Either the container has not been created yet, or it was deleted. glog.V(3).Infof("Probe target container not found: %v - %v", format.Pod(w.pod), w.container.Name) return true // Wait for more information. } if w.containerID.String() != c.ContainerID { if !w.containerID.IsEmpty() { w.resultsManager.Remove(w.containerID) } w.containerID = kubecontainer.ParseContainerID(c.ContainerID) w.resultsManager.Set(w.containerID, w.initialValue, w.pod) } if c.State.Running == nil { glog.V(3).Infof("Non-running container probed: %v - %v", format.Pod(w.pod), w.container.Name) if !w.containerID.IsEmpty() { w.resultsManager.Set(w.containerID, results.Failure, w.pod) } // Abort if the container will not be restarted. return c.State.Terminated == nil || w.pod.Spec.RestartPolicy != api.RestartPolicyNever } if int(time.Since(c.State.Running.StartedAt.Time).Seconds()) < w.spec.InitialDelaySeconds { return true } result, err := w.probeManager.prober.probe(w.probeType, w.pod, status, w.container, w.containerID) if err != nil { // Prober error, throw away the result. return true } if w.lastResult == result { w.resultRun++ } else { w.lastResult = result w.resultRun = 1 } if (result == results.Failure && w.resultRun < w.spec.FailureThreshold) || (result == results.Success && w.resultRun < w.spec.SuccessThreshold) { // Success or failure is below threshold - leave the probe state unchanged. return true } w.resultsManager.Set(w.containerID, result, w.pod) return true }
// doProbe probes the container once and records the result. // Returns whether the worker should continue. func (w *worker) doProbe() (keepGoing bool) { defer func() { recover() }() // Actually eat panics (HandleCrash takes care of logging) defer runtime.HandleCrash(func(_ interface{}) { keepGoing = true }) status, ok := w.probeManager.statusManager.GetPodStatus(w.pod.UID) if !ok { // Either the pod has not been created yet, or it was already deleted. glog.V(3).Infof("No status for pod: %v", format.Pod(w.pod)) return true } // Worker should terminate if pod is terminated. if status.Phase == api.PodFailed || status.Phase == api.PodSucceeded { glog.V(3).Infof("Pod %v %v, exiting probe worker", format.Pod(w.pod), status.Phase) return false } c, ok := api.GetContainerStatus(status.ContainerStatuses, w.container.Name) if !ok || len(c.ContainerID) == 0 { // Either the container has not been created yet, or it was deleted. glog.V(3).Infof("Probe target container not found: %v - %v", format.Pod(w.pod), w.container.Name) return true // Wait for more information. } if w.containerID.String() != c.ContainerID { if !w.containerID.IsEmpty() { w.resultsManager.Remove(w.containerID) } w.containerID = kubecontainer.ParseContainerID(c.ContainerID) w.resultsManager.Set(w.containerID, w.initialValue, w.pod) // We've got a new container; resume probing. w.onHold = false } if w.onHold { // Worker is on hold until there is a new container. return true } if c.State.Running == nil { glog.V(3).Infof("Non-running container probed: %v - %v", format.Pod(w.pod), w.container.Name) if !w.containerID.IsEmpty() { w.resultsManager.Set(w.containerID, results.Failure, w.pod) } // Abort if the container will not be restarted. return c.State.Terminated == nil || w.pod.Spec.RestartPolicy != api.RestartPolicyNever } if int32(time.Since(c.State.Running.StartedAt.Time).Seconds()) < w.spec.InitialDelaySeconds { return true } result, err := w.probeManager.prober.probe(w.probeType, w.pod, status, w.container, w.containerID) if err != nil { // Prober error, throw away the result. return true } if w.lastResult == result { w.resultRun++ } else { w.lastResult = result w.resultRun = 1 } if (result == results.Failure && w.resultRun < int(w.spec.FailureThreshold)) || (result == results.Success && w.resultRun < int(w.spec.SuccessThreshold)) { // Success or failure is below threshold - leave the probe state unchanged. return true } w.resultsManager.Set(w.containerID, result, w.pod) if w.probeType == liveness && result == results.Failure { // The container fails a liveness check, it will need to be restared. // Stop probing until we see a new container ID. This is to reduce the // chance of hitting #21751, where running `docker exec` when a // container is being stopped may lead to corrupted container state. w.onHold = true } return true }
// GetPhase returns the phase of a pod given its container info. // This func is exported to simplify integration with 3rd party kubelet // integrations like kubernetes-mesos. func GetPhase(spec *api.PodSpec, info []api.ContainerStatus) api.PodPhase { initialized := 0 pendingInitialization := 0 failedInitialization := 0 for _, container := range spec.InitContainers { containerStatus, ok := api.GetContainerStatus(info, container.Name) if !ok { pendingInitialization++ continue } switch { case containerStatus.State.Running != nil: pendingInitialization++ case containerStatus.State.Terminated != nil: if containerStatus.State.Terminated.ExitCode == 0 { initialized++ } else { failedInitialization++ } case containerStatus.State.Waiting != nil: if containerStatus.LastTerminationState.Terminated != nil { if containerStatus.LastTerminationState.Terminated.ExitCode == 0 { initialized++ } else { failedInitialization++ } } else { pendingInitialization++ } default: pendingInitialization++ } } unknown := 0 running := 0 waiting := 0 stopped := 0 failed := 0 succeeded := 0 for _, container := range spec.Containers { containerStatus, ok := api.GetContainerStatus(info, container.Name) if !ok { unknown++ continue } switch { case containerStatus.State.Running != nil: running++ case containerStatus.State.Terminated != nil: stopped++ if containerStatus.State.Terminated.ExitCode == 0 { succeeded++ } else { failed++ } case containerStatus.State.Waiting != nil: if containerStatus.LastTerminationState.Terminated != nil { stopped++ } else { waiting++ } default: unknown++ } } if failedInitialization > 0 && spec.RestartPolicy == api.RestartPolicyNever { return api.PodFailed } switch { case pendingInitialization > 0: fallthrough case waiting > 0: glog.V(5).Infof("pod waiting > 0, pending") // One or more containers has not been started return api.PodPending case running > 0 && unknown == 0: // All containers have been started, and at least // one container is running return api.PodRunning case running == 0 && stopped > 0 && unknown == 0: // All containers are terminated if spec.RestartPolicy == api.RestartPolicyAlways { // All containers are in the process of restarting return api.PodRunning } if stopped == succeeded { // RestartPolicy is not Always, and all // containers are terminated in success return api.PodSucceeded } if spec.RestartPolicy == api.RestartPolicyNever { // RestartPolicy is Never, and all containers are // terminated with at least one in failure return api.PodFailed } // RestartPolicy is OnFailure, and at least one in failure // and in the process of restarting return api.PodRunning default: glog.V(5).Infof("pod default case, pending") return api.PodPending } }