// checkAndKeepInitContainers keeps all successfully completed init containers. If there
// are failing containers, only keep the first failing one.
func checkAndKeepInitContainers(pod *api.Pod, podStatus *kubecontainer.PodStatus, initContainersToKeep map[kubecontainer.ContainerID]int) bool {
	initFailed := false

	for i, container := range pod.Spec.InitContainers {
		containerStatus := podStatus.FindContainerStatusByName(container.Name)
		if containerStatus == nil {

		if containerStatus.State == kubecontainer.ContainerStateRunning {
			initContainersToKeep[containerStatus.ID] = i

		if containerStatus.State == kubecontainer.ContainerStateExited {
			initContainersToKeep[containerStatus.ID] = i

		if isContainerFailed(containerStatus) {
			initFailed = true

	return initFailed
// isPodRunning returns true if all containers of a manifest are running.
func (kl *Kubelet) isPodRunning(pod *api.Pod, status *kubecontainer.PodStatus) bool {
	for _, c := range pod.Spec.Containers {
		cs := status.FindContainerStatusByName(c.Name)
		if cs == nil || cs.State != kubecontainer.ContainerStateRunning {
			glog.Infof("Container %q for pod %q not running", c.Name, format.Pod(pod))
			return false
	return true
// findNextInitContainerToRun returns the status of the last failed container, the
// next init container to start, or done if there are no further init containers.
// Status is only returned if an init container is failed, in which case next will
// point to the current container.
func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.ContainerStatus, next *v1.Container, done bool) {
	if len(pod.Spec.InitContainers) == 0 {
		return nil, nil, true

	// If there are failed containers, return the status of the last failed one.
	for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
		container := &pod.Spec.InitContainers[i]
		status := podStatus.FindContainerStatusByName(container.Name)
		if status != nil && isContainerFailed(status) {
			return status, container, false

	// There are no failed containers now.
	for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
		container := &pod.Spec.InitContainers[i]
		status := podStatus.FindContainerStatusByName(container.Name)
		if status == nil {

		// container is still running, return not done.
		if status.State == kubecontainer.ContainerStateRunning {
			return nil, nil, false

		if status.State == kubecontainer.ContainerStateExited {
			// all init containers successful
			if i == (len(pod.Spec.InitContainers) - 1) {
				return nil, nil, true

			// all containers up to i successful, go to i+1
			return nil, &pod.Spec.InitContainers[i+1], false

	return nil, &pod.Spec.InitContainers[0], false
// startContainer starts a container and returns a message indicates why it is failed on error.
// It starts the container through the following steps:
// * pull the image
// * create the container
// * start the container
// * run the post start lifecycle hooks (if applicable)
func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandboxConfig *runtimeapi.PodSandboxConfig, container *v1.Container, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, podIP string) (string, error) {
	// Step 1: pull the image.
	err, msg := m.imagePuller.EnsureImageExists(pod, container, pullSecrets)
	if err != nil {
		return msg, err

	// Step 2: create the container.
	ref, err := kubecontainer.GenerateContainerRef(pod, container)
	if err != nil {
		glog.Errorf("Can't make a ref to pod %q, container %v: %v", format.Pod(pod), container.Name, err)
	glog.V(4).Infof("Generating ref for container %s: %#v", container.Name, ref)

	// For a new container, the RestartCount should be 0
	restartCount := 0
	containerStatus := podStatus.FindContainerStatusByName(container.Name)
	if containerStatus != nil {
		restartCount = containerStatus.RestartCount + 1

	containerConfig, err := m.generateContainerConfig(container, pod, restartCount, podIP)
	if err != nil {
		m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedToCreateContainer, "Failed to create container with error: %v", err)
		return "Generate Container Config Failed", err
	containerID, err := m.runtimeService.CreateContainer(podSandboxID, containerConfig, podSandboxConfig)
	if err != nil {
		m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedToCreateContainer, "Failed to create container with error: %v", err)
		return "Create Container Failed", err
	m.recorder.Eventf(ref, v1.EventTypeNormal, events.CreatedContainer, "Created container with id %v", containerID)
	if ref != nil {
			Type: m.runtimeName,
			ID:   containerID,
		}, ref)

	// Step 3: start the container.
	err = m.runtimeService.StartContainer(containerID)
	if err != nil {
		m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedToStartContainer,
			"Failed to start container with id %v with error: %v", containerID, err)
		return "Start Container Failed", err
	m.recorder.Eventf(ref, v1.EventTypeNormal, events.StartedContainer, "Started container with id %v", containerID)

	// Symlink container logs to the legacy container log location for cluster logging
	// support.
	// TODO(random-liu): Remove this after cluster logging supports CRI container log path.
	containerMeta := containerConfig.GetMetadata()
	sandboxMeta := podSandboxConfig.GetMetadata()
	legacySymlink := legacyLogSymlink(containerID, containerMeta.GetName(), sandboxMeta.GetName(),
	containerLog := filepath.Join(podSandboxConfig.GetLogDirectory(), containerConfig.GetLogPath())
	if err := m.osInterface.Symlink(containerLog, legacySymlink); err != nil {
		glog.Errorf("Failed to create legacy symbolic link %q to container %q log %q: %v",
			legacySymlink, containerID, containerLog, err)

	// Step 4: execute the post start hook.
	if container.Lifecycle != nil && container.Lifecycle.PostStart != nil {
		kubeContainerID := kubecontainer.ContainerID{
			Type: m.runtimeName,
			ID:   containerID,
		msg, handlerErr := m.runner.Run(kubeContainerID, pod, container, container.Lifecycle.PostStart)
		if handlerErr != nil {
			err := fmt.Errorf("PostStart handler: %v", handlerErr)
			m.generateContainerEvent(kubeContainerID, v1.EventTypeWarning, events.FailedPostStartHook, msg)
			m.killContainer(pod, kubeContainerID, container.Name, "FailedPostStartHook", nil)
			return "PostStart Hook Failed", err

	return "", nil
// TODO(yifan): Delete this function when the logic is moved to kubelet.
func (r *Runtime) ConvertPodStatusToAPIPodStatus(pod *api.Pod, status *kubecontainer.PodStatus) (*api.PodStatus, error) {
	apiPodStatus := &api.PodStatus{
		// TODO(yifan): Add reason and message field.
		PodIP: status.IP,

	// Sort in the reverse order of the restart count because the
	// lastest one will have the largest restart count.

	containerStatuses := make(map[string]*api.ContainerStatus)
	for _, c := range status.ContainerStatuses {
		var st api.ContainerState
		switch c.State {
		case kubecontainer.ContainerStateRunning:
			st.Running = &api.ContainerStateRunning{
				StartedAt: unversioned.NewTime(c.StartedAt),
		case kubecontainer.ContainerStateExited:
			if pod.Spec.RestartPolicy == api.RestartPolicyAlways ||
				pod.Spec.RestartPolicy == api.RestartPolicyOnFailure && c.ExitCode != 0 {
				// TODO(yifan): Add reason and message.
				st.Waiting = &api.ContainerStateWaiting{}
			st.Terminated = &api.ContainerStateTerminated{
				ExitCode:  c.ExitCode,
				StartedAt: unversioned.NewTime(c.StartedAt),
				// TODO(yifan): Add reason, message, finishedAt, signal.
				ContainerID: c.ID.String(),
			// Unknown state.
			// TODO(yifan): Add reason and message.
			st.Waiting = &api.ContainerStateWaiting{}

		status, ok := containerStatuses[c.Name]
		if !ok {
			containerStatuses[c.Name] = &api.ContainerStatus{
				Name:         c.Name,
				Image:        c.Image,
				ImageID:      c.ImageID,
				ContainerID:  c.ID.String(),
				RestartCount: c.RestartCount,
				State:        st,

		// Found multiple container statuses, fill that as last termination state.
		if status.LastTerminationState.Waiting == nil &&
			status.LastTerminationState.Running == nil &&
			status.LastTerminationState.Terminated == nil {
			status.LastTerminationState = st

	for _, c := range pod.Spec.Containers {
		cs, ok := containerStatuses[c.Name]
		if !ok {
			cs = &api.ContainerStatus{
				Name:  c.Name,
				Image: c.Image,
				// TODO(yifan): Add reason and message.
				State: api.ContainerState{Waiting: &api.ContainerStateWaiting{}},
		apiPodStatus.ContainerStatuses = append(apiPodStatus.ContainerStatuses, *cs)

	return apiPodStatus, nil
// computePodContainerChanges checks whether the pod spec has changed and returns the changes if true.
func (m *kubeGenericRuntimeManager) computePodContainerChanges(pod *api.Pod, podStatus *kubecontainer.PodStatus) podContainerSpecChanges {
	glog.V(5).Infof("Syncing Pod %q: %+v", format.Pod(pod), pod)

	sandboxChanged, attempt, sandboxID := m.podSandboxChanged(pod, podStatus)
	changes := podContainerSpecChanges{
		CreateSandbox:        sandboxChanged,
		SandboxID:            sandboxID,
		Attempt:              attempt,
		ContainersToStart:    make(map[int]string),
		ContainersToKeep:     make(map[kubecontainer.ContainerID]int),
		InitContainersToKeep: make(map[kubecontainer.ContainerID]int),
		ContainersToKill:     make(map[kubecontainer.ContainerID]containerToKillInfo),

	// check the status of init containers.
	initFailed := false
	// always reset the init containers if the sandbox is changed.
	if !sandboxChanged {
		// Keep all successfully completed containers. If there are failing containers,
		// only keep the first failing one.
		initFailed = checkAndKeepInitContainers(pod, podStatus, changes.InitContainersToKeep)
	changes.InitFailed = initFailed

	// check the status of containers.
	for index, container := range pod.Spec.Containers {
		containerStatus := podStatus.FindContainerStatusByName(container.Name)
		if containerStatus == nil || containerStatus.State != kubecontainer.ContainerStateRunning {
			if kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) {
				message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
				changes.ContainersToStart[index] = message
		if sandboxChanged {
			if pod.Spec.RestartPolicy != api.RestartPolicyNever {
				message := fmt.Sprintf("Container %+v's pod sandbox is dead, the container will be recreated.", container)
				changes.ContainersToStart[index] = message

		if initFailed {
			// Initialization failed and Container exists.
			// If we have an initialization failure everything will be killed anyway.
			// If RestartPolicy is Always or OnFailure we restart containers that were running before.
			if pod.Spec.RestartPolicy != api.RestartPolicyNever {
				message := fmt.Sprintf("Failed to initialize pod. %q will be restarted.", container.Name)
				changes.ContainersToStart[index] = message

		expectedHash := kubecontainer.HashContainer(&container)
		containerChanged := containerStatus.Hash != expectedHash
		if containerChanged {
			message := fmt.Sprintf("Pod %q container %q hash changed (%d vs %d), it will be killed and re-created.",
				pod.Name, container.Name, containerStatus.Hash, expectedHash)
			changes.ContainersToStart[index] = message

		liveness, found := m.livenessManager.Get(containerStatus.ID)
		if !found || liveness == proberesults.Success {
			changes.ContainersToKeep[containerStatus.ID] = index
		if pod.Spec.RestartPolicy != api.RestartPolicyNever {
			message := fmt.Sprintf("pod %q container %q is unhealthy, it will be killed and re-created.", format.Pod(pod), container.Name)
			changes.ContainersToStart[index] = message

	// Don't keep init containers if they are the only containers to keep.
	if !sandboxChanged && len(changes.ContainersToStart) == 0 && len(changes.ContainersToKeep) == 0 {
		changes.InitContainersToKeep = make(map[kubecontainer.ContainerID]int)

	// compute containers to be killed
	runningContainerStatuses := podStatus.GetRunningContainerStatuses()
	for _, containerStatus := range runningContainerStatuses {
		_, keep := changes.ContainersToKeep[containerStatus.ID]
		_, keepInit := changes.InitContainersToKeep[containerStatus.ID]
		if !keep && !keepInit {
			var podContainer *api.Container
			var killMessage string
			for i, c := range pod.Spec.Containers {
				if c.Name == containerStatus.Name {
					podContainer = &pod.Spec.Containers[i]
					killMessage = changes.ContainersToStart[i]

			changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
				name:      containerStatus.Name,
				container: podContainer,
				message:   killMessage,

	return changes
// TODO(yifan): Delete this function when the logic is moved to kubelet.
func (r *runtime) ConvertPodStatusToAPIPodStatus(pod *api.Pod, status *kubecontainer.PodStatus) (*api.PodStatus, error) {
	apiPodStatus := &api.PodStatus{
		PodIP:             status.IP,
		ContainerStatuses: make([]api.ContainerStatus, 0, 1),

	containerStatuses := make(map[string]*api.ContainerStatus)
	for _, c := range status.ContainerStatuses {
		var st api.ContainerState
		switch c.State {
		case kubecontainer.ContainerStateRunning:
			st.Running = &api.ContainerStateRunning{
				StartedAt: unversioned.NewTime(c.StartedAt),
		case kubecontainer.ContainerStateExited:
			st.Terminated = &api.ContainerStateTerminated{
				ExitCode:    c.ExitCode,
				StartedAt:   unversioned.NewTime(c.StartedAt),
				Reason:      c.Reason,
				Message:     c.Message,
				FinishedAt:  unversioned.NewTime(c.FinishedAt),
				ContainerID: c.ID.String(),
			// Unknown state.
			st.Waiting = &api.ContainerStateWaiting{}

		status, ok := containerStatuses[c.Name]
		if !ok {
			containerStatuses[c.Name] = &api.ContainerStatus{
				Name:         c.Name,
				Image:        c.Image,
				ImageID:      c.ImageID,
				ContainerID:  c.ID.String(),
				RestartCount: c.RestartCount,
				State:        st,

		// Found multiple container statuses, fill that as last termination state.
		if status.LastTerminationState.Waiting == nil &&
			status.LastTerminationState.Running == nil &&
			status.LastTerminationState.Terminated == nil {
			status.LastTerminationState = st

	for _, c := range pod.Spec.Containers {
		cs, ok := containerStatuses[c.Name]
		if !ok {
			cs = &api.ContainerStatus{
				Name:  c.Name,
				Image: c.Image,
				// TODO(yifan): Add reason and message.
				State: api.ContainerState{Waiting: &api.ContainerStateWaiting{}},
		apiPodStatus.ContainerStatuses = append(apiPodStatus.ContainerStatuses, *cs)


	return apiPodStatus, nil
// convertToAPIContainerStatuses converts the given internal container
// statuses into API container statuses.
func (kl *Kubelet) convertToAPIContainerStatuses(pod *api.Pod, podStatus *kubecontainer.PodStatus, previousStatus []api.ContainerStatus, containers []api.Container, hasInitContainers, isInitContainer bool) []api.ContainerStatus {
	convertContainerStatus := func(cs *kubecontainer.ContainerStatus) *api.ContainerStatus {
		cid := cs.ID.String()
		status := &api.ContainerStatus{
			Name:         cs.Name,
			RestartCount: int32(cs.RestartCount),
			Image:        cs.Image,
			ImageID:      cs.ImageID,
			ContainerID:  cid,
		switch cs.State {
		case kubecontainer.ContainerStateRunning:
			status.State.Running = &api.ContainerStateRunning{StartedAt: unversioned.NewTime(cs.StartedAt)}
		case kubecontainer.ContainerStateExited:
			status.State.Terminated = &api.ContainerStateTerminated{
				ExitCode:    int32(cs.ExitCode),
				Reason:      cs.Reason,
				Message:     cs.Message,
				StartedAt:   unversioned.NewTime(cs.StartedAt),
				FinishedAt:  unversioned.NewTime(cs.FinishedAt),
				ContainerID: cid,
			status.State.Waiting = &api.ContainerStateWaiting{}
		return status

	// Fetch old containers statuses from old pod status.
	oldStatuses := make(map[string]api.ContainerStatus, len(containers))
	for _, status := range previousStatus {
		oldStatuses[status.Name] = status

	// Set all container statuses to default waiting state
	statuses := make(map[string]*api.ContainerStatus, len(containers))
	defaultWaitingState := api.ContainerState{Waiting: &api.ContainerStateWaiting{Reason: "ContainerCreating"}}
	if hasInitContainers {
		defaultWaitingState = api.ContainerState{Waiting: &api.ContainerStateWaiting{Reason: "PodInitializing"}}

	for _, container := range containers {
		status := &api.ContainerStatus{
			Name:  container.Name,
			Image: container.Image,
			State: defaultWaitingState,
		// Apply some values from the old statuses as the default values.
		if oldStatus, found := oldStatuses[container.Name]; found {
			status.RestartCount = oldStatus.RestartCount
			status.LastTerminationState = oldStatus.LastTerminationState
		statuses[container.Name] = status

	// Make the latest container status comes first.
	// Set container statuses according to the statuses seen in pod status
	containerSeen := map[string]int{}
	for _, cStatus := range podStatus.ContainerStatuses {
		cName := cStatus.Name
		if _, ok := statuses[cName]; !ok {
			// This would also ignore the infra container.
		if containerSeen[cName] >= 2 {
		status := convertContainerStatus(cStatus)
		if containerSeen[cName] == 0 {
			statuses[cName] = status
		} else {
			statuses[cName].LastTerminationState = status.State
		containerSeen[cName] = containerSeen[cName] + 1

	// Handle the containers failed to be started, which should be in Waiting state.
	for _, container := range containers {
		if isInitContainer {
			// If the init container is terminated with exit code 0, it won't be restarted.
			// TODO(random-liu): Handle this in a cleaner way.
			s := podStatus.FindContainerStatusByName(container.Name)
			if s != nil && s.State == kubecontainer.ContainerStateExited && s.ExitCode == 0 {
		// If a container should be restarted in next syncpod, it is *Waiting*.
		if !kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) {
		status := statuses[container.Name]
		reason, message, ok := kl.reasonCache.Get(pod.UID, container.Name)
		if !ok {
			// In fact, we could also apply Waiting state here, but it is less informative,
			// and the container will be restarted soon, so we prefer the original state here.
			// Note that with the current implementation of ShouldContainerBeRestarted the original state here
			// could be:
			//   * Waiting: There is no associated historical container and start failure reason record.
			//   * Terminated: The container is terminated.
		if status.State.Terminated != nil {
			status.LastTerminationState = status.State
		status.State = api.ContainerState{
			Waiting: &api.ContainerStateWaiting{
				Reason:  reason.Error(),
				Message: message,
		statuses[container.Name] = status

	var containerStatuses []api.ContainerStatus
	for _, status := range statuses {
		containerStatuses = append(containerStatuses, *status)

	// Sort the container statuses since clients of this interface expect the list
	// of containers in a pod has a deterministic order.
	if isInitContainer {
		kubetypes.SortInitContainerStatuses(pod, containerStatuses)
	} else {
	return containerStatuses
