// checkAndKeepInitContainers keeps all successfully completed init containers. If there // are failing containers, only keep the first failing one. func checkAndKeepInitContainers(pod *api.Pod, podStatus *kubecontainer.PodStatus, initContainersToKeep map[kubecontainer.ContainerID]int) bool { initFailed := false for i, container := range pod.Spec.InitContainers { containerStatus := podStatus.FindContainerStatusByName(container.Name) if containerStatus == nil { continue } if containerStatus.State == kubecontainer.ContainerStateRunning { initContainersToKeep[containerStatus.ID] = i continue } if containerStatus.State == kubecontainer.ContainerStateExited { initContainersToKeep[containerStatus.ID] = i } if isContainerFailed(containerStatus) { initFailed = true break } } return initFailed }
// isPodRunning returns true if all containers of a manifest are running. func (kl *Kubelet) isPodRunning(pod *api.Pod, status *kubecontainer.PodStatus) bool { for _, c := range pod.Spec.Containers { cs := status.FindContainerStatusByName(c.Name) if cs == nil || cs.State != kubecontainer.ContainerStateRunning { glog.Infof("Container %q for pod %q not running", c.Name, format.Pod(pod)) return false } } return true }
// findNextInitContainerToRun returns the status of the last failed container, the // next init container to start, or done if there are no further init containers. // Status is only returned if an init container is failed, in which case next will // point to the current container. func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.ContainerStatus, next *v1.Container, done bool) { if len(pod.Spec.InitContainers) == 0 { return nil, nil, true } // If there are failed containers, return the status of the last failed one. for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- { container := &pod.Spec.InitContainers[i] status := podStatus.FindContainerStatusByName(container.Name) if status != nil && isContainerFailed(status) { return status, container, false } } // There are no failed containers now. for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- { container := &pod.Spec.InitContainers[i] status := podStatus.FindContainerStatusByName(container.Name) if status == nil { continue } // container is still running, return not done. if status.State == kubecontainer.ContainerStateRunning { return nil, nil, false } if status.State == kubecontainer.ContainerStateExited { // all init containers successful if i == (len(pod.Spec.InitContainers) - 1) { return nil, nil, true } // all containers up to i successful, go to i+1 return nil, &pod.Spec.InitContainers[i+1], false } } return nil, &pod.Spec.InitContainers[0], false }
// startContainer starts a container and returns a message indicates why it is failed on error. // It starts the container through the following steps: // * pull the image // * create the container // * start the container // * run the post start lifecycle hooks (if applicable) func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandboxConfig *runtimeapi.PodSandboxConfig, container *v1.Container, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, podIP string) (string, error) { // Step 1: pull the image. err, msg := m.imagePuller.EnsureImageExists(pod, container, pullSecrets) if err != nil { return msg, err } // Step 2: create the container. ref, err := kubecontainer.GenerateContainerRef(pod, container) if err != nil { glog.Errorf("Can't make a ref to pod %q, container %v: %v", format.Pod(pod), container.Name, err) } glog.V(4).Infof("Generating ref for container %s: %#v", container.Name, ref) // For a new container, the RestartCount should be 0 restartCount := 0 containerStatus := podStatus.FindContainerStatusByName(container.Name) if containerStatus != nil { restartCount = containerStatus.RestartCount + 1 } containerConfig, err := m.generateContainerConfig(container, pod, restartCount, podIP) if err != nil { m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedToCreateContainer, "Failed to create container with error: %v", err) return "Generate Container Config Failed", err } containerID, err := m.runtimeService.CreateContainer(podSandboxID, containerConfig, podSandboxConfig) if err != nil { m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedToCreateContainer, "Failed to create container with error: %v", err) return "Create Container Failed", err } m.recorder.Eventf(ref, v1.EventTypeNormal, events.CreatedContainer, "Created container with id %v", containerID) if ref != nil { m.containerRefManager.SetRef(kubecontainer.ContainerID{ Type: m.runtimeName, ID: containerID, }, ref) } // Step 3: start the container. err = m.runtimeService.StartContainer(containerID) if err != nil { m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedToStartContainer, "Failed to start container with id %v with error: %v", containerID, err) return "Start Container Failed", err } m.recorder.Eventf(ref, v1.EventTypeNormal, events.StartedContainer, "Started container with id %v", containerID) // Symlink container logs to the legacy container log location for cluster logging // support. // TODO(random-liu): Remove this after cluster logging supports CRI container log path. containerMeta := containerConfig.GetMetadata() sandboxMeta := podSandboxConfig.GetMetadata() legacySymlink := legacyLogSymlink(containerID, containerMeta.GetName(), sandboxMeta.GetName(), sandboxMeta.GetNamespace()) containerLog := filepath.Join(podSandboxConfig.GetLogDirectory(), containerConfig.GetLogPath()) if err := m.osInterface.Symlink(containerLog, legacySymlink); err != nil { glog.Errorf("Failed to create legacy symbolic link %q to container %q log %q: %v", legacySymlink, containerID, containerLog, err) } // Step 4: execute the post start hook. if container.Lifecycle != nil && container.Lifecycle.PostStart != nil { kubeContainerID := kubecontainer.ContainerID{ Type: m.runtimeName, ID: containerID, } msg, handlerErr := m.runner.Run(kubeContainerID, pod, container, container.Lifecycle.PostStart) if handlerErr != nil { err := fmt.Errorf("PostStart handler: %v", handlerErr) m.generateContainerEvent(kubeContainerID, v1.EventTypeWarning, events.FailedPostStartHook, msg) m.killContainer(pod, kubeContainerID, container.Name, "FailedPostStartHook", nil) return "PostStart Hook Failed", err } } return "", nil }
// TODO(yifan): Delete this function when the logic is moved to kubelet. func (r *Runtime) ConvertPodStatusToAPIPodStatus(pod *api.Pod, status *kubecontainer.PodStatus) (*api.PodStatus, error) { apiPodStatus := &api.PodStatus{ // TODO(yifan): Add reason and message field. PodIP: status.IP, } // Sort in the reverse order of the restart count because the // lastest one will have the largest restart count. sort.Sort(sort.Reverse(sortByRestartCount(status.ContainerStatuses))) containerStatuses := make(map[string]*api.ContainerStatus) for _, c := range status.ContainerStatuses { var st api.ContainerState switch c.State { case kubecontainer.ContainerStateRunning: st.Running = &api.ContainerStateRunning{ StartedAt: unversioned.NewTime(c.StartedAt), } case kubecontainer.ContainerStateExited: if pod.Spec.RestartPolicy == api.RestartPolicyAlways || pod.Spec.RestartPolicy == api.RestartPolicyOnFailure && c.ExitCode != 0 { // TODO(yifan): Add reason and message. st.Waiting = &api.ContainerStateWaiting{} break } st.Terminated = &api.ContainerStateTerminated{ ExitCode: c.ExitCode, StartedAt: unversioned.NewTime(c.StartedAt), // TODO(yifan): Add reason, message, finishedAt, signal. ContainerID: c.ID.String(), } default: // Unknown state. // TODO(yifan): Add reason and message. st.Waiting = &api.ContainerStateWaiting{} } status, ok := containerStatuses[c.Name] if !ok { containerStatuses[c.Name] = &api.ContainerStatus{ Name: c.Name, Image: c.Image, ImageID: c.ImageID, ContainerID: c.ID.String(), RestartCount: c.RestartCount, State: st, } continue } // Found multiple container statuses, fill that as last termination state. if status.LastTerminationState.Waiting == nil && status.LastTerminationState.Running == nil && status.LastTerminationState.Terminated == nil { status.LastTerminationState = st } } for _, c := range pod.Spec.Containers { cs, ok := containerStatuses[c.Name] if !ok { cs = &api.ContainerStatus{ Name: c.Name, Image: c.Image, // TODO(yifan): Add reason and message. State: api.ContainerState{Waiting: &api.ContainerStateWaiting{}}, } } apiPodStatus.ContainerStatuses = append(apiPodStatus.ContainerStatuses, *cs) } return apiPodStatus, nil }
// computePodContainerChanges checks whether the pod spec has changed and returns the changes if true. func (m *kubeGenericRuntimeManager) computePodContainerChanges(pod *api.Pod, podStatus *kubecontainer.PodStatus) podContainerSpecChanges { glog.V(5).Infof("Syncing Pod %q: %+v", format.Pod(pod), pod) sandboxChanged, attempt, sandboxID := m.podSandboxChanged(pod, podStatus) changes := podContainerSpecChanges{ CreateSandbox: sandboxChanged, SandboxID: sandboxID, Attempt: attempt, ContainersToStart: make(map[int]string), ContainersToKeep: make(map[kubecontainer.ContainerID]int), InitContainersToKeep: make(map[kubecontainer.ContainerID]int), ContainersToKill: make(map[kubecontainer.ContainerID]containerToKillInfo), } // check the status of init containers. initFailed := false // always reset the init containers if the sandbox is changed. if !sandboxChanged { // Keep all successfully completed containers. If there are failing containers, // only keep the first failing one. initFailed = checkAndKeepInitContainers(pod, podStatus, changes.InitContainersToKeep) } changes.InitFailed = initFailed // check the status of containers. for index, container := range pod.Spec.Containers { containerStatus := podStatus.FindContainerStatusByName(container.Name) if containerStatus == nil || containerStatus.State != kubecontainer.ContainerStateRunning { if kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) { message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container) glog.Info(message) changes.ContainersToStart[index] = message } continue } if sandboxChanged { if pod.Spec.RestartPolicy != api.RestartPolicyNever { message := fmt.Sprintf("Container %+v's pod sandbox is dead, the container will be recreated.", container) glog.Info(message) changes.ContainersToStart[index] = message } continue } if initFailed { // Initialization failed and Container exists. // If we have an initialization failure everything will be killed anyway. // If RestartPolicy is Always or OnFailure we restart containers that were running before. if pod.Spec.RestartPolicy != api.RestartPolicyNever { message := fmt.Sprintf("Failed to initialize pod. %q will be restarted.", container.Name) glog.V(1).Info(message) changes.ContainersToStart[index] = message } continue } expectedHash := kubecontainer.HashContainer(&container) containerChanged := containerStatus.Hash != expectedHash if containerChanged { message := fmt.Sprintf("Pod %q container %q hash changed (%d vs %d), it will be killed and re-created.", pod.Name, container.Name, containerStatus.Hash, expectedHash) glog.Info(message) changes.ContainersToStart[index] = message continue } liveness, found := m.livenessManager.Get(containerStatus.ID) if !found || liveness == proberesults.Success { changes.ContainersToKeep[containerStatus.ID] = index continue } if pod.Spec.RestartPolicy != api.RestartPolicyNever { message := fmt.Sprintf("pod %q container %q is unhealthy, it will be killed and re-created.", format.Pod(pod), container.Name) glog.Info(message) changes.ContainersToStart[index] = message } } // Don't keep init containers if they are the only containers to keep. if !sandboxChanged && len(changes.ContainersToStart) == 0 && len(changes.ContainersToKeep) == 0 { changes.InitContainersToKeep = make(map[kubecontainer.ContainerID]int) } // compute containers to be killed runningContainerStatuses := podStatus.GetRunningContainerStatuses() for _, containerStatus := range runningContainerStatuses { _, keep := changes.ContainersToKeep[containerStatus.ID] _, keepInit := changes.InitContainersToKeep[containerStatus.ID] if !keep && !keepInit { var podContainer *api.Container var killMessage string for i, c := range pod.Spec.Containers { if c.Name == containerStatus.Name { podContainer = &pod.Spec.Containers[i] killMessage = changes.ContainersToStart[i] break } } changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{ name: containerStatus.Name, container: podContainer, message: killMessage, } } } return changes }
// startContainer starts a container and returns a message indicates why it is failed on error. // It starts the container through the following steps: // * pull the image // * create the container // * start the container // * run the post start lifecycle hooks (if applicable) func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandboxConfig *runtimeApi.PodSandboxConfig, container *api.Container, pod *api.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []api.Secret, podIP string) (string, error) { // Step 1: pull the image. err, msg := m.imagePuller.EnsureImageExists(pod, container, pullSecrets) if err != nil { return msg, err } // Step 2: create the container. ref, err := kubecontainer.GenerateContainerRef(pod, container) if err != nil { glog.Errorf("Can't make a ref to pod %q, container %v: %v", format.Pod(pod), container.Name, err) } glog.V(4).Infof("Generating ref for container %s: %#v", container.Name, ref) // For a new container, the RestartCount should be 0 restartCount := 0 containerStatus := podStatus.FindContainerStatusByName(container.Name) if containerStatus != nil { restartCount = containerStatus.RestartCount + 1 } containerConfig, err := m.generateContainerConfig(container, pod, restartCount, podIP) if err != nil { m.recorder.Eventf(ref, api.EventTypeWarning, events.FailedToCreateContainer, "Failed to create container with error: %v", err) return "Generate Container Config Failed", err } containerID, err := m.runtimeService.CreateContainer(podSandboxID, containerConfig, podSandboxConfig) if err != nil { m.recorder.Eventf(ref, api.EventTypeWarning, events.FailedToCreateContainer, "Failed to create container with error: %v", err) return "Create Container Failed", err } m.recorder.Eventf(ref, api.EventTypeNormal, events.CreatedContainer, "Created container with id %v", containerID) if ref != nil { m.containerRefManager.SetRef(kubecontainer.ContainerID{ Type: m.runtimeName, ID: containerID, }, ref) } // Step 3: start the container. err = m.runtimeService.StartContainer(containerID) if err != nil { m.recorder.Eventf(ref, api.EventTypeWarning, events.FailedToStartContainer, "Failed to start container with id %v with error: %v", containerID, err) return "Start Container Failed", err } m.recorder.Eventf(ref, api.EventTypeNormal, events.StartedContainer, "Started container with id %v", containerID) // Step 4: execute the post start hook. if container.Lifecycle != nil && container.Lifecycle.PostStart != nil { kubeContainerID := kubecontainer.ContainerID{ Type: m.runtimeName, ID: containerID, } msg, handlerErr := m.runner.Run(kubeContainerID, pod, container, container.Lifecycle.PostStart) if handlerErr != nil { err := fmt.Errorf("PostStart handler: %v", handlerErr) m.generateContainerEvent(kubeContainerID, api.EventTypeWarning, events.FailedPostStartHook, msg) m.killContainer(pod, kubeContainerID, container.Name, "FailedPostStartHook", nil) return "PostStart Hook Failed", err } } return "", nil }
// TODO(yifan): Delete this function when the logic is moved to kubelet. func (r *runtime) ConvertPodStatusToAPIPodStatus(pod *api.Pod, status *kubecontainer.PodStatus) (*api.PodStatus, error) { apiPodStatus := &api.PodStatus{ PodIP: status.IP, ContainerStatuses: make([]api.ContainerStatus, 0, 1), } containerStatuses := make(map[string]*api.ContainerStatus) for _, c := range status.ContainerStatuses { var st api.ContainerState switch c.State { case kubecontainer.ContainerStateRunning: st.Running = &api.ContainerStateRunning{ StartedAt: unversioned.NewTime(c.StartedAt), } case kubecontainer.ContainerStateExited: st.Terminated = &api.ContainerStateTerminated{ ExitCode: c.ExitCode, StartedAt: unversioned.NewTime(c.StartedAt), Reason: c.Reason, Message: c.Message, FinishedAt: unversioned.NewTime(c.FinishedAt), ContainerID: c.ID.String(), } default: // Unknown state. st.Waiting = &api.ContainerStateWaiting{} } status, ok := containerStatuses[c.Name] if !ok { containerStatuses[c.Name] = &api.ContainerStatus{ Name: c.Name, Image: c.Image, ImageID: c.ImageID, ContainerID: c.ID.String(), RestartCount: c.RestartCount, State: st, } continue } // Found multiple container statuses, fill that as last termination state. if status.LastTerminationState.Waiting == nil && status.LastTerminationState.Running == nil && status.LastTerminationState.Terminated == nil { status.LastTerminationState = st } } for _, c := range pod.Spec.Containers { cs, ok := containerStatuses[c.Name] if !ok { cs = &api.ContainerStatus{ Name: c.Name, Image: c.Image, // TODO(yifan): Add reason and message. State: api.ContainerState{Waiting: &api.ContainerStateWaiting{}}, } } apiPodStatus.ContainerStatuses = append(apiPodStatus.ContainerStatuses, *cs) } sort.Sort(kubetypes.SortedContainerStatuses(apiPodStatus.ContainerStatuses)) return apiPodStatus, nil }
// convertToAPIContainerStatuses converts the given internal container // statuses into API container statuses. func (kl *Kubelet) convertToAPIContainerStatuses(pod *api.Pod, podStatus *kubecontainer.PodStatus, previousStatus []api.ContainerStatus, containers []api.Container, hasInitContainers, isInitContainer bool) []api.ContainerStatus { convertContainerStatus := func(cs *kubecontainer.ContainerStatus) *api.ContainerStatus { cid := cs.ID.String() status := &api.ContainerStatus{ Name: cs.Name, RestartCount: int32(cs.RestartCount), Image: cs.Image, ImageID: cs.ImageID, ContainerID: cid, } switch cs.State { case kubecontainer.ContainerStateRunning: status.State.Running = &api.ContainerStateRunning{StartedAt: unversioned.NewTime(cs.StartedAt)} case kubecontainer.ContainerStateExited: status.State.Terminated = &api.ContainerStateTerminated{ ExitCode: int32(cs.ExitCode), Reason: cs.Reason, Message: cs.Message, StartedAt: unversioned.NewTime(cs.StartedAt), FinishedAt: unversioned.NewTime(cs.FinishedAt), ContainerID: cid, } default: status.State.Waiting = &api.ContainerStateWaiting{} } return status } // Fetch old containers statuses from old pod status. oldStatuses := make(map[string]api.ContainerStatus, len(containers)) for _, status := range previousStatus { oldStatuses[status.Name] = status } // Set all container statuses to default waiting state statuses := make(map[string]*api.ContainerStatus, len(containers)) defaultWaitingState := api.ContainerState{Waiting: &api.ContainerStateWaiting{Reason: "ContainerCreating"}} if hasInitContainers { defaultWaitingState = api.ContainerState{Waiting: &api.ContainerStateWaiting{Reason: "PodInitializing"}} } for _, container := range containers { status := &api.ContainerStatus{ Name: container.Name, Image: container.Image, State: defaultWaitingState, } // Apply some values from the old statuses as the default values. if oldStatus, found := oldStatuses[container.Name]; found { status.RestartCount = oldStatus.RestartCount status.LastTerminationState = oldStatus.LastTerminationState } statuses[container.Name] = status } // Make the latest container status comes first. sort.Sort(sort.Reverse(kubecontainer.SortContainerStatusesByCreationTime(podStatus.ContainerStatuses))) // Set container statuses according to the statuses seen in pod status containerSeen := map[string]int{} for _, cStatus := range podStatus.ContainerStatuses { cName := cStatus.Name if _, ok := statuses[cName]; !ok { // This would also ignore the infra container. continue } if containerSeen[cName] >= 2 { continue } status := convertContainerStatus(cStatus) if containerSeen[cName] == 0 { statuses[cName] = status } else { statuses[cName].LastTerminationState = status.State } containerSeen[cName] = containerSeen[cName] + 1 } // Handle the containers failed to be started, which should be in Waiting state. for _, container := range containers { if isInitContainer { // If the init container is terminated with exit code 0, it won't be restarted. // TODO(random-liu): Handle this in a cleaner way. s := podStatus.FindContainerStatusByName(container.Name) if s != nil && s.State == kubecontainer.ContainerStateExited && s.ExitCode == 0 { continue } } // If a container should be restarted in next syncpod, it is *Waiting*. if !kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) { continue } status := statuses[container.Name] reason, message, ok := kl.reasonCache.Get(pod.UID, container.Name) if !ok { // In fact, we could also apply Waiting state here, but it is less informative, // and the container will be restarted soon, so we prefer the original state here. // Note that with the current implementation of ShouldContainerBeRestarted the original state here // could be: // * Waiting: There is no associated historical container and start failure reason record. // * Terminated: The container is terminated. continue } if status.State.Terminated != nil { status.LastTerminationState = status.State } status.State = api.ContainerState{ Waiting: &api.ContainerStateWaiting{ Reason: reason.Error(), Message: message, }, } statuses[container.Name] = status } var containerStatuses []api.ContainerStatus for _, status := range statuses { containerStatuses = append(containerStatuses, *status) } // Sort the container statuses since clients of this interface expect the list // of containers in a pod has a deterministic order. if isInitContainer { kubetypes.SortInitContainerStatuses(pod, containerStatuses) } else { sort.Sort(kubetypes.SortedContainerStatuses(containerStatuses)) } return containerStatuses }
// computePodContainerChanges checks whether the pod spec has changed and returns the changes if true. func (m *kubeGenericRuntimeManager) computePodContainerChanges(pod *api.Pod, podStatus *kubecontainer.PodStatus) podContainerSpecChanges { glog.V(5).Infof("Syncing Pod %q: %+v", format.Pod(pod), pod) sandboxChanged, attempt, sandboxID := m.podSandboxChanged(pod, podStatus) changes := podContainerSpecChanges{ CreateSandbox: sandboxChanged, SandboxID: sandboxID, Attempt: attempt, ContainersToStart: make(map[int]string), ContainersToKeep: make(map[kubecontainer.ContainerID]int), ContainersToKill: make(map[kubecontainer.ContainerID]containerToKillInfo), } for index, container := range pod.Spec.Containers { if sandboxChanged { message := fmt.Sprintf("Container %+v's pod sandbox is dead, the container will be recreated.", container) glog.Info(message) changes.ContainersToStart[index] = message continue } containerStatus := podStatus.FindContainerStatusByName(container.Name) if containerStatus == nil || containerStatus.State != kubecontainer.ContainerStateRunning { if kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) { message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container) glog.Info(message) changes.ContainersToStart[index] = message } continue } expectedHash := kubecontainer.HashContainer(&container) containerChanged := containerStatus.Hash != expectedHash if containerChanged { message := fmt.Sprintf("Pod %q container %q hash changed (%d vs %d), it will be killed and re-created.", pod.Name, container.Name, containerStatus.Hash, expectedHash) glog.Info(message) changes.ContainersToStart[index] = message continue } liveness, found := m.livenessManager.Get(containerStatus.ID) if !found || liveness == proberesults.Success { changes.ContainersToKeep[containerStatus.ID] = index continue } if pod.Spec.RestartPolicy != api.RestartPolicyNever { message := fmt.Sprintf("pod %q container %q is unhealthy, it will be killed and re-created.", format.Pod(pod), container.Name) glog.Info(message) changes.ContainersToStart[index] = message } } // compute containers that to be killed runningContainerStatues := podStatus.GetRunningContainerStatuses() for _, containerStatus := range runningContainerStatues { if _, keep := changes.ContainersToKeep[containerStatus.ID]; !keep { var podContainer *api.Container var killMessage string for i, c := range pod.Spec.Containers { if c.Name == containerStatus.Name { podContainer = &pod.Spec.Containers[i] killMessage = changes.ContainersToStart[i] break } } changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{ name: containerStatus.Name, container: podContainer, message: killMessage, } } } return changes }