// Kill all containers in a pod. Returns the number of containers deleted and an error if one occurs. func (kl *Kubelet) killContainersInPod(pod *api.BoundPod, dockerContainers dockertools.DockerContainers) (int, error) { podFullName := GetPodFullName(pod) count := 0 errs := make(chan error, len(pod.Spec.Containers)) wg := sync.WaitGroup{} for _, container := range pod.Spec.Containers { // TODO: Consider being more aggressive: kill all containers with this pod UID, period. if dockerContainer, found, _ := dockerContainers.FindPodContainer(podFullName, pod.UID, container.Name); found { count++ wg.Add(1) go func() { err := kl.killContainer(dockerContainer) if err != nil { glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, podFullName) errs <- err } wg.Done() }() } } wg.Wait() close(errs) if len(errs) > 0 { errList := []error{} for err := range errs { errList = append(errList, err) } return -1, fmt.Errorf("failed to delete containers (%v)", errList) } return count, nil }
// Delete all containers in a pod (except the network container) returns the number of containers deleted // and an error if one occurs. func (kl *Kubelet) deleteAllContainers(pod *Pod, podFullName string, dockerContainers dockertools.DockerContainers) (int, error) { count := 0 errs := make(chan error, len(pod.Manifest.Containers)) wg := sync.WaitGroup{} for _, container := range pod.Manifest.Containers { if dockerContainer, found, _ := dockerContainers.FindPodContainer(podFullName, pod.Manifest.UUID, container.Name); found { count++ wg.Add(1) go func() { err := kl.killContainer(dockerContainer) if err != nil { glog.Errorf("Failed to delete container. (%v) Skipping pod %s", err, podFullName) errs <- err } wg.Done() }() } } wg.Wait() close(errs) if len(errs) > 0 { errList := []error{} for err := range errs { errList = append(errList, err) } return -1, fmt.Errorf("failed to delete containers (%v)", errList) } return count, nil }
// isPodRunning returns true if all containers of a manifest are running. func (kl *Kubelet) isPodRunning(pod api.BoundPod, dockerContainers dockertools.DockerContainers) (bool, error) { for _, container := range pod.Spec.Containers { dockerContainer, found, _ := dockerContainers.FindPodContainer(GetPodFullName(&pod), pod.UID, container.Name) if !found { glog.Infof("container %q not found", container.Name) return false, nil } inspectResult, err := kl.dockerClient.InspectContainer(dockerContainer.ID) if err != nil { glog.Infof("failed to inspect container %q: %v", container.Name, err) return false, err } if !inspectResult.State.Running { glog.Infof("container %q not running: %#v", container.Name, inspectResult.State) return false, nil } } return true, nil }
func (kl *Kubelet) syncPod(pod *api.BoundPod, dockerContainers dockertools.DockerContainers) error { podFullName := GetPodFullName(pod) uuid := pod.UID containersToKeep := make(map[dockertools.DockerID]empty) killedContainers := make(map[dockertools.DockerID]empty) glog.V(4).Infof("Syncing Pod, podFullName: %q, uuid: %q", podFullName, uuid) // Make sure we have a network container var netID dockertools.DockerID if netDockerContainer, found, _ := dockerContainers.FindPodContainer(podFullName, uuid, networkContainerName); found { netID = dockertools.DockerID(netDockerContainer.ID) } else { glog.V(2).Infof("Network container doesn't exist for pod %q, killing and re-creating the pod", podFullName) count, err := kl.killContainersInPod(pod, dockerContainers) if err != nil { return err } netID, err = kl.createNetworkContainer(pod) if err != nil { glog.Errorf("Failed to introspect network container: %v; Skipping pod %q", err, podFullName) return err } if count > 0 { // Re-list everything, otherwise we'll think we're ok. dockerContainers, err = dockertools.GetKubeletDockerContainers(kl.dockerClient, false) if err != nil { glog.Errorf("Error listing containers %#v", dockerContainers) return err } } } containersToKeep[netID] = empty{} podVolumes, err := kl.mountExternalVolumes(pod) if err != nil { glog.Errorf("Unable to mount volumes for pod %q: %v; skipping pod", podFullName, err) return err } podStatus := api.PodStatus{} info, err := kl.GetPodInfo(podFullName, uuid) if err != nil { glog.Errorf("Unable to get pod with name %q and uuid %q info, health checks may be invalid", podFullName, uuid) } netInfo, found := info[networkContainerName] if found { podStatus.PodIP = netInfo.PodIP } for _, container := range pod.Spec.Containers { expectedHash := dockertools.HashContainer(&container) if dockerContainer, found, hash := dockerContainers.FindPodContainer(podFullName, uuid, container.Name); found { containerID := dockertools.DockerID(dockerContainer.ID) glog.V(3).Infof("pod %q container %q exists as %v", podFullName, container.Name, containerID) // look for changes in the container. if hash == 0 || hash == expectedHash { // TODO: This should probably be separated out into a separate goroutine. healthy, err := kl.healthy(podFullName, uuid, podStatus, container, dockerContainer) if err != nil { glog.V(1).Infof("health check errored: %v", err) containersToKeep[containerID] = empty{} continue } if healthy == health.Healthy { containersToKeep[containerID] = empty{} continue } glog.V(1).Infof("pod %q container %q is unhealthy. Container will be killed and re-created.", podFullName, container.Name, healthy) } else { glog.V(1).Infof("pod %q container %q hash changed (%d vs %d). Container will be killed and re-created.", podFullName, container.Name, hash, expectedHash) } if err := kl.killContainer(dockerContainer); err != nil { glog.V(1).Infof("Failed to kill container %q: %v", dockerContainer.ID, err) continue } killedContainers[containerID] = empty{} // Also kill associated network container if netContainer, found, _ := dockerContainers.FindPodContainer(podFullName, uuid, networkContainerName); found { if err := kl.killContainer(netContainer); err != nil { glog.V(1).Infof("Failed to kill network container %q: %v", netContainer.ID, err) continue } } } // Check RestartPolicy for container recentContainers, err := dockertools.GetRecentDockerContainersWithNameAndUUID(kl.dockerClient, podFullName, uuid, container.Name) if err != nil { glog.Errorf("Error listing recent containers with name and uuid:%s--%s--%s", podFullName, uuid, container.Name) // TODO(dawnchen): error handling here? } if len(recentContainers) > 0 && pod.Spec.RestartPolicy.Always == nil { if pod.Spec.RestartPolicy.Never != nil { glog.V(3).Infof("Already ran container with name %s--%s--%s, do nothing", podFullName, uuid, container.Name) continue } if pod.Spec.RestartPolicy.OnFailure != nil { // Check the exit code of last run if recentContainers[0].State.ExitCode == 0 { glog.V(3).Infof("Already successfully ran container with name %s--%s--%s, do nothing", podFullName, uuid, container.Name) continue } } } glog.V(3).Infof("Container with name %s--%s--%s doesn't exist, creating %#v", podFullName, uuid, container.Name, container) ref, err := containerRef(pod, &container) if err != nil { glog.Errorf("Couldn't make a ref to pod %v, container %v: '%v'", pod.Name, container.Name, err) } if !api.IsPullNever(container.ImagePullPolicy) { present, err := kl.dockerPuller.IsImagePresent(container.Image) latest := dockertools.RequireLatestImage(container.Image) if err != nil { if ref != nil { record.Eventf(ref, "failed", "failed", "Failed to inspect image %q", container.Image) } glog.Errorf("Failed to inspect image %q: %v; skipping pod %q container %q", container.Image, err, podFullName, container.Name) continue } if api.IsPullAlways(container.ImagePullPolicy) || (api.IsPullIfNotPresent(container.ImagePullPolicy) && (!present || latest)) { if err := kl.pullImage(container.Image, ref); err != nil { continue } } } // TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container containerID, err := kl.runContainer(pod, &container, podVolumes, "container:"+string(netID)) if err != nil { // TODO(bburns) : Perhaps blacklist a container after N failures? glog.Errorf("Error running pod %q container %q: %v", podFullName, container.Name, err) continue } containersToKeep[containerID] = empty{} } // Kill any containers in this pod which were not identified above (guards against duplicates). for id, container := range dockerContainers { curPodFullName, curUUID, _, _ := dockertools.ParseDockerName(container.Names[0]) if curPodFullName == podFullName && curUUID == uuid { // Don't kill containers we want to keep or those we already killed. _, keep := containersToKeep[id] _, killed := killedContainers[id] if !keep && !killed { glog.V(1).Infof("Killing unwanted container in pod %q: %+v", curUUID, container) err = kl.killContainer(container) if err != nil { glog.Errorf("Error killing container: %v", err) } } } } return nil }
func (kl *Kubelet) syncPod(pod *Pod, dockerContainers dockertools.DockerContainers) error { podFullName := GetPodFullName(pod) uuid := pod.Manifest.UUID containersToKeep := make(map[dockertools.DockerID]empty) killedContainers := make(map[dockertools.DockerID]empty) // Make sure we have a network container var netID dockertools.DockerID if networkDockerContainer, found, _ := dockerContainers.FindPodContainer(podFullName, uuid, networkContainerName); found { netID = dockertools.DockerID(networkDockerContainer.ID) } else { glog.Infof("Network container doesn't exist, creating") count, err := kl.deleteAllContainers(pod, podFullName, dockerContainers) if err != nil { return err } dockerNetworkID, err := kl.createNetworkContainer(pod) if err != nil { glog.Errorf("Failed to introspect network container. (%v) Skipping pod %s", err, podFullName) return err } netID = dockerNetworkID if count > 0 { // relist everything, otherwise we'll think we're ok dockerContainers, err = dockertools.GetKubeletDockerContainers(kl.dockerClient) if err != nil { glog.Errorf("Error listing containers %#v", dockerContainers) return err } } } containersToKeep[netID] = empty{} podVolumes, err := kl.mountExternalVolumes(&pod.Manifest) if err != nil { glog.Errorf("Unable to mount volumes for pod %s: (%v) Skipping pod.", podFullName, err) return err } podState := api.PodState{Manifest: api.ContainerManifest{UUID: uuid}} info, err := kl.GetPodInfo(podFullName, uuid) if err != nil { glog.Errorf("Unable to get pod with name %s and uuid %s info, health checks may be invalid.", podFullName, uuid) } netInfo, found := info[networkContainerName] if found && netInfo.NetworkSettings != nil { podState.PodIP = netInfo.NetworkSettings.IPAddress } for _, container := range pod.Manifest.Containers { expectedHash := dockertools.HashContainer(&container) if dockerContainer, found, hash := dockerContainers.FindPodContainer(podFullName, uuid, container.Name); found { containerID := dockertools.DockerID(dockerContainer.ID) glog.V(1).Infof("pod %s container %s exists as %v", podFullName, container.Name, containerID) // look for changes in the container. if hash == 0 || hash == expectedHash { // TODO: This should probably be separated out into a separate goroutine. healthy, err := kl.healthy(podFullName, podState, container, dockerContainer) if err != nil { glog.V(1).Infof("health check errored: %v", err) containersToKeep[containerID] = empty{} continue } if healthy == health.Healthy { containersToKeep[containerID] = empty{} continue } glog.V(1).Infof("pod %s container %s is unhealthy.", podFullName, container.Name, healthy) } else { glog.V(1).Infof("container hash changed %d vs %d.", hash, expectedHash) } if err := kl.killContainer(dockerContainer); err != nil { glog.V(1).Infof("Failed to kill container %s: %v", dockerContainer.ID, err) continue } killedContainers[containerID] = empty{} } // Check RestartPolicy for container recentContainers, err := dockertools.GetRecentDockerContainersWithNameAndUUID(kl.dockerClient, podFullName, uuid, container.Name) if err != nil { glog.Errorf("Error listing recent containers with name and uuid:%s--%s--%s", podFullName, uuid, container.Name) // TODO(dawnchen): error handling here? } if len(recentContainers) > 0 && pod.Manifest.RestartPolicy.Always == nil { if pod.Manifest.RestartPolicy.Never != nil { glog.Infof("Already ran container with name %s--%s--%s, do nothing", podFullName, uuid, container.Name) continue } if pod.Manifest.RestartPolicy.OnFailure != nil { // Check the exit code of last run if recentContainers[0].State.ExitCode == 0 { glog.Infof("Already successfully ran container with name %s--%s--%s, do nothing", podFullName, uuid, container.Name) continue } } } glog.Infof("Container with name %s--%s--%s doesn't exist, creating %#v", podFullName, uuid, container.Name, container) if err := kl.dockerPuller.Pull(container.Image); err != nil { glog.Errorf("Failed to pull image %s: %v skipping pod %s container %s.", container.Image, err, podFullName, container.Name) continue } // TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container containerID, err := kl.runContainer(pod, &container, podVolumes, "container:"+string(netID)) if err != nil { // TODO(bburns) : Perhaps blacklist a container after N failures? glog.Errorf("Error running pod %s container %s: %v", podFullName, container.Name, err) continue } containersToKeep[containerID] = empty{} } // Kill any containers in this pod which were not identified above (guards against duplicates). for id, container := range dockerContainers { curPodFullName, curUUID, _, _ := dockertools.ParseDockerName(container.Names[0]) if curPodFullName == podFullName && curUUID == uuid { // Don't kill containers we want to keep or those we already killed. _, keep := containersToKeep[id] _, killed := killedContainers[id] if !keep && !killed { err = kl.killContainer(container) if err != nil { glog.Errorf("Error killing container: %v", err) } } } } return nil }