// SyncPods synchronizes the configured list of pods (desired state) with the host current state. func (kl *Kubelet) SyncPods(pods []Pod) error { glog.Infof("Desired [%s]: %+v", kl.hostname, pods) var err error desiredContainers := make(map[podContainer]empty) dockerContainers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient) if err != nil { glog.Errorf("Error listing containers %#v", dockerContainers) return err } // Check for any containers that need starting for i := range pods { pod := &pods[i] podFullName := GetPodFullName(pod) uuid := pod.Manifest.UUID // Add all containers (including net) to the map. desiredContainers[podContainer{podFullName, uuid, networkContainerName}] = empty{} for _, cont := range pod.Manifest.Containers { desiredContainers[podContainer{podFullName, uuid, cont.Name}] = empty{} } // Run the sync in an async manifest worker. kl.podWorkers.Run(podFullName, func() { err := kl.syncPod(pod, dockerContainers) if err != nil { glog.Errorf("Error syncing pod: %v skipping.", err) } }) } // Kill any containers we don't need existingContainers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient) if err != nil { glog.Errorf("Error listing containers: %v", err) return err } for _, container := range existingContainers { // Don't kill containers that are in the desired pods. podFullName, uuid, containerName, _ := dockertools.ParseDockerName(container.Names[0]) if _, ok := desiredContainers[podContainer{podFullName, uuid, containerName}]; !ok { err = kl.killContainer(container) if err != nil { glog.Errorf("Error killing container: %v", err) } } } // Remove any orphaned volumes. kl.reconcileVolumes(pods) return err }
// runPod runs a single pod and wait until all containers are running. func (kl *Kubelet) runPod(pod api.BoundPod) error { delay := RunOnceRetryDelay retry := 0 for { dockerContainers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient, false) if err != nil { return fmt.Errorf("failed to get kubelet docker containers: %v", err) } running, err := kl.isPodRunning(pod, dockerContainers) if err != nil { return fmt.Errorf("failed to check pod status: %v", err) } if running { glog.Infof("pod %q containers running", pod.Name) return nil } glog.Infof("pod %q containers not running: syncing", pod.Name) if err = kl.syncPod(&pod, dockerContainers); err != nil { return fmt.Errorf("error syncing pod: %v", err) } if retry >= RunOnceMaxRetries { return fmt.Errorf("timeout error: pod %q containers not running after %d retries", pod.Name, RunOnceMaxRetries) } // TODO(proppy): health checking would be better than waiting + checking the state at the next iteration. glog.Infof("pod %q containers synced, waiting for %v", pod.Name, delay) <-time.After(delay) retry++ delay *= RunOnceRetryDelayBackoff } }
// runPod runs a single pod and wait until all containers are running. func (kl *Kubelet) runPod(pod api.Pod, retryDelay time.Duration) error { delay := retryDelay retry := 0 for { dockerContainers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient, false) if err != nil { return fmt.Errorf("failed to get kubelet docker containers: %v", err) } running, err := kl.isPodRunning(pod, dockerContainers) if err != nil { return fmt.Errorf("failed to check pod status: %v", err) } if running { glog.Infof("pod %q containers running", pod.Name) return nil } glog.Infof("pod %q containers not running: syncing", pod.Name) // We don't create mirror pods in this mode; pass a dummy boolean value // to sycnPod. if err = kl.syncPod(&pod, false, dockerContainers); err != nil { return fmt.Errorf("error syncing pod: %v", err) } if retry >= RunOnceMaxRetries { return fmt.Errorf("timeout error: pod %q containers not running after %d retries", pod.Name, RunOnceMaxRetries) } // TODO(proppy): health checking would be better than waiting + checking the state at the next iteration. glog.Infof("pod %q containers synced, waiting for %v", pod.Name, delay) time.Sleep(delay) retry++ delay *= RunOnceRetryDelayBackoff } }
// SyncPods synchronizes the configured list of pods (desired state) with the host current state. func (kl *Kubelet) SyncPods(pods []api.BoundPod) error { glog.V(4).Infof("Desired: %#v", pods) var err error desiredContainers := make(map[podContainer]empty) desiredPods := make(map[string]empty) dockerContainers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient, false) if err != nil { glog.Errorf("Error listing containers: %#v", dockerContainers) return err } // Check for any containers that need starting for ix := range pods { pod := &pods[ix] podFullName := GetPodFullName(pod) uuid := pod.UID desiredPods[uuid] = empty{} // Add all containers (including net) to the map. desiredContainers[podContainer{podFullName, uuid, networkContainerName}] = empty{} for _, cont := range pod.Spec.Containers { desiredContainers[podContainer{podFullName, uuid, cont.Name}] = empty{} } // Run the sync in an async manifest worker. kl.podWorkers.Run(podFullName, func() { err := kl.syncPod(pod, dockerContainers) if err != nil { glog.Errorf("Error syncing pod, skipping: %s", err) } }) } // Kill any containers we don't need. for _, container := range dockerContainers { // Don't kill containers that are in the desired pods. podFullName, uuid, containerName, _ := dockertools.ParseDockerName(container.Names[0]) if _, found := desiredPods[uuid]; found { // syncPod() will handle this one. continue } pc := podContainer{podFullName, uuid, containerName} if _, ok := desiredContainers[pc]; !ok { glog.V(1).Infof("Killing unwanted container %+v", pc) err = kl.killContainer(container) if err != nil { glog.Errorf("Error killing container %+v: %s", pc, err) } } } // Remove any orphaned volumes. kl.reconcileVolumes(pods) return err }
// Run a command in a container, returns the combined stdout, stderr as an array of bytes func (kl *Kubelet) RunInContainer(podFullName, uuid, container string, cmd []string) ([]byte, error) { if kl.runner == nil { return nil, fmt.Errorf("no runner specified.") } dockerContainers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient, false) if err != nil { return nil, err } dockerContainer, found, _ := dockerContainers.FindPodContainer(podFullName, uuid, container) if !found { return nil, fmt.Errorf("container not found (%q)", container) } return kl.runner.RunInContainer(dockerContainer.ID, cmd) }
// GetContainerInfo returns stats (from Cadvisor) for a container. func (kl *Kubelet) GetContainerInfo(podFullName, uuid, containerName string, req *info.ContainerInfoRequest) (*info.ContainerInfo, error) { if kl.cadvisorClient == nil { return nil, nil } dockerContainers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient) if err != nil { return nil, err } dockerContainer, found, _ := dockerContainers.FindPodContainer(podFullName, uuid, containerName) if !found { return nil, errors.New("couldn't find container") } return kl.statsFromContainerPath(fmt.Sprintf("/docker/%s", dockerContainer.ID), req) }
// Get all containers that are evictable. Evictable containers are: not running // and created more than MinAge ago. func (cgc *realContainerGC) evictableContainers() (containersByEvictUnit, []containerGCInfo, error) { containers, err := dockertools.GetKubeletDockerContainers(cgc.dockerClient, true) if err != nil { return containersByEvictUnit{}, []containerGCInfo{}, err } unidentifiedContainers := make([]containerGCInfo, 0) evictUnits := make(containersByEvictUnit) newestGCTime := time.Now().Add(-cgc.policy.MinAge) for _, container := range containers { // Prune out running containers. data, err := cgc.dockerClient.InspectContainer(container.ID) if err != nil { // Container may have been removed already, skip. continue } else if data.State.Running { continue } else if newestGCTime.Before(data.Created) { continue } containerInfo := containerGCInfo{ id: container.ID, name: container.Names[0], createTime: data.Created, } containerName, _, err := dockertools.ParseDockerName(container.Names[0]) if err != nil { unidentifiedContainers = append(unidentifiedContainers, containerInfo) } else { key := evictUnit{ uid: containerName.PodUID, name: containerName.ContainerName, } containerInfo.podNameWithNamespace = containerName.PodFullName containerInfo.containerName = containerName.ContainerName evictUnits[key] = append(evictUnits[key], containerInfo) } } // Sort the containers by age. for uid := range evictUnits { sort.Sort(byCreated(evictUnits[uid])) } return evictUnits, unidentifiedContainers, nil }
// GetKubeletContainerLogs returns logs from the container // The second parameter of GetPodInfo and FindPodContainer methods represents pod UUID, which is allowed to be blank // TODO: this method is returning logs of random container attempts, when it should be returning the most recent attempt // or all of them. func (kl *Kubelet) GetKubeletContainerLogs(podFullName, containerName, tail string, follow bool, stdout, stderr io.Writer) error { _, err := kl.GetPodInfo(podFullName, "") if err == dockertools.ErrNoContainersInPod { return fmt.Errorf("pod not found (%q)\n", podFullName) } dockerContainers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient, true) if err != nil { return err } dockerContainer, found, _ := dockerContainers.FindPodContainer(podFullName, "", containerName) if !found { return fmt.Errorf("container not found (%q)\n", containerName) } return dockertools.GetKubeletDockerContainerLogs(kl.dockerClient, dockerContainer.ID, tail, follow, stdout, stderr) }
// GetContainerInfo returns stats (from Cadvisor) for a container. func (kl *Kubelet) GetContainerInfo(podFullName string, uid types.UID, containerName string, req *cadvisor.ContainerInfoRequest) (*cadvisor.ContainerInfo, error) { cc := kl.GetCadvisorClient() if cc == nil { return nil, nil } dockerContainers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient, false) if err != nil { return nil, err } dockerContainer, found, _ := dockerContainers.FindPodContainer(podFullName, uid, containerName) if !found { return nil, fmt.Errorf("couldn't find container") } return kl.statsFromDockerContainer(cc, dockerContainer.ID, req) }
// Destroy existing k8s containers func (k *KubernetesExecutor) killKubeletContainers() { if containers, err := dockertools.GetKubeletDockerContainers(k.dockerClient, true); err == nil { opts := docker.RemoveContainerOptions{ RemoveVolumes: true, Force: true, } for _, container := range containers { opts.ID = container.ID log.V(2).Infof("Removing container: %v", opts.ID) if err := k.dockerClient.RemoveContainer(opts); err != nil { log.Warning(err) } } } else { log.Warningf("Failed to list kubelet docker containers: %v", err) } }
// TODO: Also enforce a maximum total number of containers. func (kl *Kubelet) GarbageCollectContainers() error { if kl.maxContainerCount == 0 { return nil } containers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient, true) if err != nil { return err } uuidToIDMap := map[string][]string{} for _, container := range containers { _, uuid, name, _ := dockertools.ParseDockerName(container.ID) uuidName := uuid + "." + name uuidToIDMap[uuidName] = append(uuidToIDMap[uuidName], container.ID) } for _, list := range uuidToIDMap { if len(list) <= kl.maxContainerCount { continue } if err := kl.purgeOldest(list); err != nil { return err } } return nil }
func (kl *Kubelet) syncPod(pod *api.BoundPod, dockerContainers dockertools.DockerContainers) error { podFullName := GetPodFullName(pod) uuid := pod.UID containersToKeep := make(map[dockertools.DockerID]empty) killedContainers := make(map[dockertools.DockerID]empty) glog.V(4).Infof("Syncing Pod, podFullName: %q, uuid: %q", podFullName, uuid) // Make sure we have a network container var netID dockertools.DockerID if netDockerContainer, found, _ := dockerContainers.FindPodContainer(podFullName, uuid, networkContainerName); found { netID = dockertools.DockerID(netDockerContainer.ID) } else { glog.V(2).Infof("Network container doesn't exist for pod %q, killing and re-creating the pod", podFullName) count, err := kl.killContainersInPod(pod, dockerContainers) if err != nil { return err } netID, err = kl.createNetworkContainer(pod) if err != nil { glog.Errorf("Failed to introspect network container: %v; Skipping pod %q", err, podFullName) return err } if count > 0 { // Re-list everything, otherwise we'll think we're ok. dockerContainers, err = dockertools.GetKubeletDockerContainers(kl.dockerClient, false) if err != nil { glog.Errorf("Error listing containers %#v", dockerContainers) return err } } } containersToKeep[netID] = empty{} podVolumes, err := kl.mountExternalVolumes(pod) if err != nil { glog.Errorf("Unable to mount volumes for pod %q: %v; skipping pod", podFullName, err) return err } podStatus := api.PodStatus{} info, err := kl.GetPodInfo(podFullName, uuid) if err != nil { glog.Errorf("Unable to get pod with name %q and uuid %q info, health checks may be invalid", podFullName, uuid) } netInfo, found := info[networkContainerName] if found { podStatus.PodIP = netInfo.PodIP } for _, container := range pod.Spec.Containers { expectedHash := dockertools.HashContainer(&container) if dockerContainer, found, hash := dockerContainers.FindPodContainer(podFullName, uuid, container.Name); found { containerID := dockertools.DockerID(dockerContainer.ID) glog.V(3).Infof("pod %q container %q exists as %v", podFullName, container.Name, containerID) // look for changes in the container. if hash == 0 || hash == expectedHash { // TODO: This should probably be separated out into a separate goroutine. healthy, err := kl.healthy(podFullName, uuid, podStatus, container, dockerContainer) if err != nil { glog.V(1).Infof("health check errored: %v", err) containersToKeep[containerID] = empty{} continue } if healthy == health.Healthy { containersToKeep[containerID] = empty{} continue } glog.V(1).Infof("pod %q container %q is unhealthy. Container will be killed and re-created.", podFullName, container.Name, healthy) } else { glog.V(1).Infof("pod %q container %q hash changed (%d vs %d). Container will be killed and re-created.", podFullName, container.Name, hash, expectedHash) } if err := kl.killContainer(dockerContainer); err != nil { glog.V(1).Infof("Failed to kill container %q: %v", dockerContainer.ID, err) continue } killedContainers[containerID] = empty{} // Also kill associated network container if netContainer, found, _ := dockerContainers.FindPodContainer(podFullName, uuid, networkContainerName); found { if err := kl.killContainer(netContainer); err != nil { glog.V(1).Infof("Failed to kill network container %q: %v", netContainer.ID, err) continue } } } // Check RestartPolicy for container recentContainers, err := dockertools.GetRecentDockerContainersWithNameAndUUID(kl.dockerClient, podFullName, uuid, container.Name) if err != nil { glog.Errorf("Error listing recent containers with name and uuid:%s--%s--%s", podFullName, uuid, container.Name) // TODO(dawnchen): error handling here? } if len(recentContainers) > 0 && pod.Spec.RestartPolicy.Always == nil { if pod.Spec.RestartPolicy.Never != nil { glog.V(3).Infof("Already ran container with name %s--%s--%s, do nothing", podFullName, uuid, container.Name) continue } if pod.Spec.RestartPolicy.OnFailure != nil { // Check the exit code of last run if recentContainers[0].State.ExitCode == 0 { glog.V(3).Infof("Already successfully ran container with name %s--%s--%s, do nothing", podFullName, uuid, container.Name) continue } } } glog.V(3).Infof("Container with name %s--%s--%s doesn't exist, creating %#v", podFullName, uuid, container.Name, container) ref, err := containerRef(pod, &container) if err != nil { glog.Errorf("Couldn't make a ref to pod %v, container %v: '%v'", pod.Name, container.Name, err) } if !api.IsPullNever(container.ImagePullPolicy) { present, err := kl.dockerPuller.IsImagePresent(container.Image) latest := dockertools.RequireLatestImage(container.Image) if err != nil { if ref != nil { record.Eventf(ref, "failed", "failed", "Failed to inspect image %q", container.Image) } glog.Errorf("Failed to inspect image %q: %v; skipping pod %q container %q", container.Image, err, podFullName, container.Name) continue } if api.IsPullAlways(container.ImagePullPolicy) || (api.IsPullIfNotPresent(container.ImagePullPolicy) && (!present || latest)) { if err := kl.pullImage(container.Image, ref); err != nil { continue } } } // TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container containerID, err := kl.runContainer(pod, &container, podVolumes, "container:"+string(netID)) if err != nil { // TODO(bburns) : Perhaps blacklist a container after N failures? glog.Errorf("Error running pod %q container %q: %v", podFullName, container.Name, err) continue } containersToKeep[containerID] = empty{} } // Kill any containers in this pod which were not identified above (guards against duplicates). for id, container := range dockerContainers { curPodFullName, curUUID, _, _ := dockertools.ParseDockerName(container.Names[0]) if curPodFullName == podFullName && curUUID == uuid { // Don't kill containers we want to keep or those we already killed. _, keep := containersToKeep[id] _, killed := killedContainers[id] if !keep && !killed { glog.V(1).Infof("Killing unwanted container in pod %q: %+v", curUUID, container) err = kl.killContainer(container) if err != nil { glog.Errorf("Error killing container: %v", err) } } } } return nil }
func (kl *Kubelet) syncPod(pod *Pod, dockerContainers dockertools.DockerContainers) error { podFullName := GetPodFullName(pod) uuid := pod.Manifest.UUID containersToKeep := make(map[dockertools.DockerID]empty) killedContainers := make(map[dockertools.DockerID]empty) // Make sure we have a network container var netID dockertools.DockerID if networkDockerContainer, found, _ := dockerContainers.FindPodContainer(podFullName, uuid, networkContainerName); found { netID = dockertools.DockerID(networkDockerContainer.ID) } else { glog.Infof("Network container doesn't exist, creating") count, err := kl.deleteAllContainers(pod, podFullName, dockerContainers) if err != nil { return err } dockerNetworkID, err := kl.createNetworkContainer(pod) if err != nil { glog.Errorf("Failed to introspect network container. (%v) Skipping pod %s", err, podFullName) return err } netID = dockerNetworkID if count > 0 { // relist everything, otherwise we'll think we're ok dockerContainers, err = dockertools.GetKubeletDockerContainers(kl.dockerClient) if err != nil { glog.Errorf("Error listing containers %#v", dockerContainers) return err } } } containersToKeep[netID] = empty{} podVolumes, err := kl.mountExternalVolumes(&pod.Manifest) if err != nil { glog.Errorf("Unable to mount volumes for pod %s: (%v) Skipping pod.", podFullName, err) return err } podState := api.PodState{Manifest: api.ContainerManifest{UUID: uuid}} info, err := kl.GetPodInfo(podFullName, uuid) if err != nil { glog.Errorf("Unable to get pod with name %s and uuid %s info, health checks may be invalid.", podFullName, uuid) } netInfo, found := info[networkContainerName] if found && netInfo.NetworkSettings != nil { podState.PodIP = netInfo.NetworkSettings.IPAddress } for _, container := range pod.Manifest.Containers { expectedHash := dockertools.HashContainer(&container) if dockerContainer, found, hash := dockerContainers.FindPodContainer(podFullName, uuid, container.Name); found { containerID := dockertools.DockerID(dockerContainer.ID) glog.V(1).Infof("pod %s container %s exists as %v", podFullName, container.Name, containerID) // look for changes in the container. if hash == 0 || hash == expectedHash { // TODO: This should probably be separated out into a separate goroutine. healthy, err := kl.healthy(podFullName, podState, container, dockerContainer) if err != nil { glog.V(1).Infof("health check errored: %v", err) containersToKeep[containerID] = empty{} continue } if healthy == health.Healthy { containersToKeep[containerID] = empty{} continue } glog.V(1).Infof("pod %s container %s is unhealthy.", podFullName, container.Name, healthy) } else { glog.V(1).Infof("container hash changed %d vs %d.", hash, expectedHash) } if err := kl.killContainer(dockerContainer); err != nil { glog.V(1).Infof("Failed to kill container %s: %v", dockerContainer.ID, err) continue } killedContainers[containerID] = empty{} } // Check RestartPolicy for container recentContainers, err := dockertools.GetRecentDockerContainersWithNameAndUUID(kl.dockerClient, podFullName, uuid, container.Name) if err != nil { glog.Errorf("Error listing recent containers with name and uuid:%s--%s--%s", podFullName, uuid, container.Name) // TODO(dawnchen): error handling here? } if len(recentContainers) > 0 && pod.Manifest.RestartPolicy.Always == nil { if pod.Manifest.RestartPolicy.Never != nil { glog.Infof("Already ran container with name %s--%s--%s, do nothing", podFullName, uuid, container.Name) continue } if pod.Manifest.RestartPolicy.OnFailure != nil { // Check the exit code of last run if recentContainers[0].State.ExitCode == 0 { glog.Infof("Already successfully ran container with name %s--%s--%s, do nothing", podFullName, uuid, container.Name) continue } } } glog.Infof("Container with name %s--%s--%s doesn't exist, creating %#v", podFullName, uuid, container.Name, container) if err := kl.dockerPuller.Pull(container.Image); err != nil { glog.Errorf("Failed to pull image %s: %v skipping pod %s container %s.", container.Image, err, podFullName, container.Name) continue } // TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container containerID, err := kl.runContainer(pod, &container, podVolumes, "container:"+string(netID)) if err != nil { // TODO(bburns) : Perhaps blacklist a container after N failures? glog.Errorf("Error running pod %s container %s: %v", podFullName, container.Name, err) continue } containersToKeep[containerID] = empty{} } // Kill any containers in this pod which were not identified above (guards against duplicates). for id, container := range dockerContainers { curPodFullName, curUUID, _, _ := dockertools.ParseDockerName(container.Names[0]) if curPodFullName == podFullName && curUUID == uuid { // Don't kill containers we want to keep or those we already killed. _, keep := containersToKeep[id] _, killed := killedContainers[id] if !keep && !killed { err = kl.killContainer(container) if err != nil { glog.Errorf("Error killing container: %v", err) } } } } return nil }
// SyncPods synchronizes the configured list of pods (desired state) with the host current state. func (kl *Kubelet) SyncPods(pods []api.BoundPod) error { glog.V(4).Infof("Desired: %#v", pods) var err error desiredContainers := make(map[podContainer]empty) desiredPods := make(map[types.UID]empty) dockerContainers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient, false) if err != nil { glog.Errorf("Error listing containers: %#v", dockerContainers) return err } // Check for any containers that need starting for ix := range pods { pod := &pods[ix] podFullName := GetPodFullName(pod) uid := pod.UID desiredPods[uid] = empty{} // Add all containers (including net) to the map. desiredContainers[podContainer{podFullName, uid, networkContainerName}] = empty{} for _, cont := range pod.Spec.Containers { desiredContainers[podContainer{podFullName, uid, cont.Name}] = empty{} } // Run the sync in an async manifest worker. kl.podWorkers.Run(podFullName, func() { if err := kl.syncPod(pod, dockerContainers); err != nil { glog.Errorf("Error syncing pod, skipping: %v", err) record.Eventf(pod, "failedSync", "Error syncing pod, skipping: %v", err) } }) } // Kill any containers we don't need. for _, container := range dockerContainers { // Don't kill containers that are in the desired pods. podFullName, uid, containerName, _ := dockertools.ParseDockerName(container.Names[0]) if _, found := desiredPods[uid]; found { // syncPod() will handle this one. continue } _, _, podAnnotations := ParsePodFullName(podFullName) if source := podAnnotations[ConfigSourceAnnotationKey]; !kl.sourceReady(source) { // If the source for this container is not ready, skip deletion, so that we don't accidentally // delete containers for sources that haven't reported yet. glog.V(4).Infof("Skipping delete of container (%q), source (%s) aren't ready yet.", podFullName, source) continue } pc := podContainer{podFullName, uid, containerName} if _, ok := desiredContainers[pc]; !ok { glog.V(1).Infof("Killing unwanted container %+v", pc) err = kl.killContainer(container) if err != nil { glog.Errorf("Error killing container %+v: %v", pc, err) } } } // Remove any orphaned pods. err = kl.cleanupOrphanedPods(pods) if err != nil { return err } // Remove any orphaned volumes. err = kl.cleanupOrphanedVolumes(pods) if err != nil { return err } return err }