func (self *podAndContainerCollector) Collect(ch chan<- prometheus.Metric) { runningContainers, err := self.containerCache.RunningContainers() if err != nil { glog.Warning("Failed to get running container information while collecting metrics: %v", err) return } // Get a set of running pods. runningPods := make(map[types.UID]struct{}) for _, cont := range runningContainers { _, uid, _, _, err := dockertools.ParseDockerName(cont.Names[0]) if err != nil { continue } runningPods[uid] = struct{}{} } ch <- prometheus.MustNewConstMetric( runningPodCountDesc, prometheus.GaugeValue, float64(len(runningPods))) ch <- prometheus.MustNewConstMetric( runningContainerCountDesc, prometheus.GaugeValue, float64(len(runningContainers))) }
// SyncPods synchronizes the configured list of pods (desired state) with the host current state. func (kl *Kubelet) SyncPods(pods []api.BoundPod) error { glog.V(4).Infof("Desired: %#v", pods) var err error desiredContainers := make(map[podContainer]empty) desiredPods := make(map[string]empty) dockerContainers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient, false) if err != nil { glog.Errorf("Error listing containers: %#v", dockerContainers) return err } // Check for any containers that need starting for ix := range pods { pod := &pods[ix] podFullName := GetPodFullName(pod) uuid := pod.UID desiredPods[uuid] = empty{} // Add all containers (including net) to the map. desiredContainers[podContainer{podFullName, uuid, networkContainerName}] = empty{} for _, cont := range pod.Spec.Containers { desiredContainers[podContainer{podFullName, uuid, cont.Name}] = empty{} } // Run the sync in an async manifest worker. kl.podWorkers.Run(podFullName, func() { err := kl.syncPod(pod, dockerContainers) if err != nil { glog.Errorf("Error syncing pod, skipping: %s", err) } }) } // Kill any containers we don't need. for _, container := range dockerContainers { // Don't kill containers that are in the desired pods. podFullName, uuid, containerName, _ := dockertools.ParseDockerName(container.Names[0]) if _, found := desiredPods[uuid]; found { // syncPod() will handle this one. continue } pc := podContainer{podFullName, uuid, containerName} if _, ok := desiredContainers[pc]; !ok { glog.V(1).Infof("Killing unwanted container %+v", pc) err = kl.killContainer(container) if err != nil { glog.Errorf("Error killing container %+v: %s", pc, err) } } } // Remove any orphaned volumes. kl.reconcileVolumes(pods) return err }
// SyncPods synchronizes the configured list of pods (desired state) with the host current state. func (kl *Kubelet) SyncPods(pods []Pod) error { glog.Infof("Desired [%s]: %+v", kl.hostname, pods) var err error desiredContainers := make(map[podContainer]empty) dockerContainers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient) if err != nil { glog.Errorf("Error listing containers %#v", dockerContainers) return err } // Check for any containers that need starting for i := range pods { pod := &pods[i] podFullName := GetPodFullName(pod) uuid := pod.Manifest.UUID // Add all containers (including net) to the map. desiredContainers[podContainer{podFullName, uuid, networkContainerName}] = empty{} for _, cont := range pod.Manifest.Containers { desiredContainers[podContainer{podFullName, uuid, cont.Name}] = empty{} } // Run the sync in an async manifest worker. kl.podWorkers.Run(podFullName, func() { err := kl.syncPod(pod, dockerContainers) if err != nil { glog.Errorf("Error syncing pod: %v skipping.", err) } }) } // Kill any containers we don't need existingContainers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient) if err != nil { glog.Errorf("Error listing containers: %v", err) return err } for _, container := range existingContainers { // Don't kill containers that are in the desired pods. podFullName, uuid, containerName, _ := dockertools.ParseDockerName(container.Names[0]) if _, ok := desiredContainers[podContainer{podFullName, uuid, containerName}]; !ok { err = kl.killContainer(container) if err != nil { glog.Errorf("Error killing container: %v", err) } } } // Remove any orphaned volumes. kl.reconcileVolumes(pods) return err }
// Get all containers that are evictable. Evictable containers are: not running // and created more than MinAge ago. func (cgc *realContainerGC) evictableContainers() (containersByEvictUnit, []containerGCInfo, error) { containers, err := dockertools.GetKubeletDockerContainers(cgc.dockerClient, true) if err != nil { return containersByEvictUnit{}, []containerGCInfo{}, err } unidentifiedContainers := make([]containerGCInfo, 0) evictUnits := make(containersByEvictUnit) newestGCTime := time.Now().Add(-cgc.policy.MinAge) for _, container := range containers { // Prune out running containers. data, err := cgc.dockerClient.InspectContainer(container.ID) if err != nil { // Container may have been removed already, skip. continue } else if data.State.Running { continue } else if newestGCTime.Before(data.Created) { continue } containerInfo := containerGCInfo{ id: container.ID, name: container.Names[0], createTime: data.Created, } containerName, _, err := dockertools.ParseDockerName(container.Names[0]) if err != nil { unidentifiedContainers = append(unidentifiedContainers, containerInfo) } else { key := evictUnit{ uid: containerName.PodUID, name: containerName.ContainerName, } containerInfo.podNameWithNamespace = containerName.PodFullName containerInfo.containerName = containerName.ContainerName evictUnits[key] = append(evictUnits[key], containerInfo) } } // Sort the containers by age. for uid := range evictUnits { sort.Sort(byCreated(evictUnits[uid])) } return evictUnits, unidentifiedContainers, nil }
func (kl *Kubelet) killContainerByID(ID, name string) error { glog.Infof("Killing: %s", ID) err := kl.dockerClient.StopContainer(ID, 10) if len(name) == 0 { return err } podFullName, uuid, containerName, _ := dockertools.ParseDockerName(name) kl.LogEvent(&api.Event{ Event: "STOP", Manifest: &api.ContainerManifest{ //TODO: This should be reported using either the apiserver schema or the kubelet schema ID: podFullName, UUID: uuid, }, Container: &api.Container{ Name: containerName, }, }) return err }
// TODO: Also enforce a maximum total number of containers. func (kl *Kubelet) GarbageCollectContainers() error { if kl.maxContainerCount == 0 { return nil } containers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient, true) if err != nil { return err } uuidToIDMap := map[string][]string{} for _, container := range containers { _, uuid, name, _ := dockertools.ParseDockerName(container.ID) uuidName := uuid + "." + name uuidToIDMap[uuidName] = append(uuidToIDMap[uuidName], container.ID) } for _, list := range uuidToIDMap { if len(list) <= kl.maxContainerCount { continue } if err := kl.purgeOldest(list); err != nil { return err } } return nil }
func (kl *Kubelet) syncPod(pod *api.BoundPod, dockerContainers dockertools.DockerContainers) error { podFullName := GetPodFullName(pod) uuid := pod.UID containersToKeep := make(map[dockertools.DockerID]empty) killedContainers := make(map[dockertools.DockerID]empty) glog.V(4).Infof("Syncing Pod, podFullName: %q, uuid: %q", podFullName, uuid) // Make sure we have a network container var netID dockertools.DockerID if netDockerContainer, found, _ := dockerContainers.FindPodContainer(podFullName, uuid, networkContainerName); found { netID = dockertools.DockerID(netDockerContainer.ID) } else { glog.V(2).Infof("Network container doesn't exist for pod %q, killing and re-creating the pod", podFullName) count, err := kl.killContainersInPod(pod, dockerContainers) if err != nil { return err } netID, err = kl.createNetworkContainer(pod) if err != nil { glog.Errorf("Failed to introspect network container: %v; Skipping pod %q", err, podFullName) return err } if count > 0 { // Re-list everything, otherwise we'll think we're ok. dockerContainers, err = dockertools.GetKubeletDockerContainers(kl.dockerClient, false) if err != nil { glog.Errorf("Error listing containers %#v", dockerContainers) return err } } } containersToKeep[netID] = empty{} podVolumes, err := kl.mountExternalVolumes(pod) if err != nil { glog.Errorf("Unable to mount volumes for pod %q: %v; skipping pod", podFullName, err) return err } podStatus := api.PodStatus{} info, err := kl.GetPodInfo(podFullName, uuid) if err != nil { glog.Errorf("Unable to get pod with name %q and uuid %q info, health checks may be invalid", podFullName, uuid) } netInfo, found := info[networkContainerName] if found { podStatus.PodIP = netInfo.PodIP } for _, container := range pod.Spec.Containers { expectedHash := dockertools.HashContainer(&container) if dockerContainer, found, hash := dockerContainers.FindPodContainer(podFullName, uuid, container.Name); found { containerID := dockertools.DockerID(dockerContainer.ID) glog.V(3).Infof("pod %q container %q exists as %v", podFullName, container.Name, containerID) // look for changes in the container. if hash == 0 || hash == expectedHash { // TODO: This should probably be separated out into a separate goroutine. healthy, err := kl.healthy(podFullName, uuid, podStatus, container, dockerContainer) if err != nil { glog.V(1).Infof("health check errored: %v", err) containersToKeep[containerID] = empty{} continue } if healthy == health.Healthy { containersToKeep[containerID] = empty{} continue } glog.V(1).Infof("pod %q container %q is unhealthy. Container will be killed and re-created.", podFullName, container.Name, healthy) } else { glog.V(1).Infof("pod %q container %q hash changed (%d vs %d). Container will be killed and re-created.", podFullName, container.Name, hash, expectedHash) } if err := kl.killContainer(dockerContainer); err != nil { glog.V(1).Infof("Failed to kill container %q: %v", dockerContainer.ID, err) continue } killedContainers[containerID] = empty{} // Also kill associated network container if netContainer, found, _ := dockerContainers.FindPodContainer(podFullName, uuid, networkContainerName); found { if err := kl.killContainer(netContainer); err != nil { glog.V(1).Infof("Failed to kill network container %q: %v", netContainer.ID, err) continue } } } // Check RestartPolicy for container recentContainers, err := dockertools.GetRecentDockerContainersWithNameAndUUID(kl.dockerClient, podFullName, uuid, container.Name) if err != nil { glog.Errorf("Error listing recent containers with name and uuid:%s--%s--%s", podFullName, uuid, container.Name) // TODO(dawnchen): error handling here? } if len(recentContainers) > 0 && pod.Spec.RestartPolicy.Always == nil { if pod.Spec.RestartPolicy.Never != nil { glog.V(3).Infof("Already ran container with name %s--%s--%s, do nothing", podFullName, uuid, container.Name) continue } if pod.Spec.RestartPolicy.OnFailure != nil { // Check the exit code of last run if recentContainers[0].State.ExitCode == 0 { glog.V(3).Infof("Already successfully ran container with name %s--%s--%s, do nothing", podFullName, uuid, container.Name) continue } } } glog.V(3).Infof("Container with name %s--%s--%s doesn't exist, creating %#v", podFullName, uuid, container.Name, container) ref, err := containerRef(pod, &container) if err != nil { glog.Errorf("Couldn't make a ref to pod %v, container %v: '%v'", pod.Name, container.Name, err) } if !api.IsPullNever(container.ImagePullPolicy) { present, err := kl.dockerPuller.IsImagePresent(container.Image) latest := dockertools.RequireLatestImage(container.Image) if err != nil { if ref != nil { record.Eventf(ref, "failed", "failed", "Failed to inspect image %q", container.Image) } glog.Errorf("Failed to inspect image %q: %v; skipping pod %q container %q", container.Image, err, podFullName, container.Name) continue } if api.IsPullAlways(container.ImagePullPolicy) || (api.IsPullIfNotPresent(container.ImagePullPolicy) && (!present || latest)) { if err := kl.pullImage(container.Image, ref); err != nil { continue } } } // TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container containerID, err := kl.runContainer(pod, &container, podVolumes, "container:"+string(netID)) if err != nil { // TODO(bburns) : Perhaps blacklist a container after N failures? glog.Errorf("Error running pod %q container %q: %v", podFullName, container.Name, err) continue } containersToKeep[containerID] = empty{} } // Kill any containers in this pod which were not identified above (guards against duplicates). for id, container := range dockerContainers { curPodFullName, curUUID, _, _ := dockertools.ParseDockerName(container.Names[0]) if curPodFullName == podFullName && curUUID == uuid { // Don't kill containers we want to keep or those we already killed. _, keep := containersToKeep[id] _, killed := killedContainers[id] if !keep && !killed { glog.V(1).Infof("Killing unwanted container in pod %q: %+v", curUUID, container) err = kl.killContainer(container) if err != nil { glog.Errorf("Error killing container: %v", err) } } } } return nil }
func (kl *Kubelet) syncPod(pod *Pod, dockerContainers dockertools.DockerContainers) error { podFullName := GetPodFullName(pod) uuid := pod.Manifest.UUID containersToKeep := make(map[dockertools.DockerID]empty) killedContainers := make(map[dockertools.DockerID]empty) // Make sure we have a network container var netID dockertools.DockerID if networkDockerContainer, found, _ := dockerContainers.FindPodContainer(podFullName, uuid, networkContainerName); found { netID = dockertools.DockerID(networkDockerContainer.ID) } else { glog.Infof("Network container doesn't exist, creating") count, err := kl.deleteAllContainers(pod, podFullName, dockerContainers) if err != nil { return err } dockerNetworkID, err := kl.createNetworkContainer(pod) if err != nil { glog.Errorf("Failed to introspect network container. (%v) Skipping pod %s", err, podFullName) return err } netID = dockerNetworkID if count > 0 { // relist everything, otherwise we'll think we're ok dockerContainers, err = dockertools.GetKubeletDockerContainers(kl.dockerClient) if err != nil { glog.Errorf("Error listing containers %#v", dockerContainers) return err } } } containersToKeep[netID] = empty{} podVolumes, err := kl.mountExternalVolumes(&pod.Manifest) if err != nil { glog.Errorf("Unable to mount volumes for pod %s: (%v) Skipping pod.", podFullName, err) return err } podState := api.PodState{Manifest: api.ContainerManifest{UUID: uuid}} info, err := kl.GetPodInfo(podFullName, uuid) if err != nil { glog.Errorf("Unable to get pod with name %s and uuid %s info, health checks may be invalid.", podFullName, uuid) } netInfo, found := info[networkContainerName] if found && netInfo.NetworkSettings != nil { podState.PodIP = netInfo.NetworkSettings.IPAddress } for _, container := range pod.Manifest.Containers { expectedHash := dockertools.HashContainer(&container) if dockerContainer, found, hash := dockerContainers.FindPodContainer(podFullName, uuid, container.Name); found { containerID := dockertools.DockerID(dockerContainer.ID) glog.V(1).Infof("pod %s container %s exists as %v", podFullName, container.Name, containerID) // look for changes in the container. if hash == 0 || hash == expectedHash { // TODO: This should probably be separated out into a separate goroutine. healthy, err := kl.healthy(podFullName, podState, container, dockerContainer) if err != nil { glog.V(1).Infof("health check errored: %v", err) containersToKeep[containerID] = empty{} continue } if healthy == health.Healthy { containersToKeep[containerID] = empty{} continue } glog.V(1).Infof("pod %s container %s is unhealthy.", podFullName, container.Name, healthy) } else { glog.V(1).Infof("container hash changed %d vs %d.", hash, expectedHash) } if err := kl.killContainer(dockerContainer); err != nil { glog.V(1).Infof("Failed to kill container %s: %v", dockerContainer.ID, err) continue } killedContainers[containerID] = empty{} } // Check RestartPolicy for container recentContainers, err := dockertools.GetRecentDockerContainersWithNameAndUUID(kl.dockerClient, podFullName, uuid, container.Name) if err != nil { glog.Errorf("Error listing recent containers with name and uuid:%s--%s--%s", podFullName, uuid, container.Name) // TODO(dawnchen): error handling here? } if len(recentContainers) > 0 && pod.Manifest.RestartPolicy.Always == nil { if pod.Manifest.RestartPolicy.Never != nil { glog.Infof("Already ran container with name %s--%s--%s, do nothing", podFullName, uuid, container.Name) continue } if pod.Manifest.RestartPolicy.OnFailure != nil { // Check the exit code of last run if recentContainers[0].State.ExitCode == 0 { glog.Infof("Already successfully ran container with name %s--%s--%s, do nothing", podFullName, uuid, container.Name) continue } } } glog.Infof("Container with name %s--%s--%s doesn't exist, creating %#v", podFullName, uuid, container.Name, container) if err := kl.dockerPuller.Pull(container.Image); err != nil { glog.Errorf("Failed to pull image %s: %v skipping pod %s container %s.", container.Image, err, podFullName, container.Name) continue } // TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container containerID, err := kl.runContainer(pod, &container, podVolumes, "container:"+string(netID)) if err != nil { // TODO(bburns) : Perhaps blacklist a container after N failures? glog.Errorf("Error running pod %s container %s: %v", podFullName, container.Name, err) continue } containersToKeep[containerID] = empty{} } // Kill any containers in this pod which were not identified above (guards against duplicates). for id, container := range dockerContainers { curPodFullName, curUUID, _, _ := dockertools.ParseDockerName(container.Names[0]) if curPodFullName == podFullName && curUUID == uuid { // Don't kill containers we want to keep or those we already killed. _, keep := containersToKeep[id] _, killed := killedContainers[id] if !keep && !killed { err = kl.killContainer(container) if err != nil { glog.Errorf("Error killing container: %v", err) } } } } return nil }
// SyncPods synchronizes the configured list of pods (desired state) with the host current state. func (kl *Kubelet) SyncPods(pods []api.BoundPod) error { glog.V(4).Infof("Desired: %#v", pods) var err error desiredContainers := make(map[podContainer]empty) desiredPods := make(map[types.UID]empty) dockerContainers, err := dockertools.GetKubeletDockerContainers(kl.dockerClient, false) if err != nil { glog.Errorf("Error listing containers: %#v", dockerContainers) return err } // Check for any containers that need starting for ix := range pods { pod := &pods[ix] podFullName := GetPodFullName(pod) uid := pod.UID desiredPods[uid] = empty{} // Add all containers (including net) to the map. desiredContainers[podContainer{podFullName, uid, networkContainerName}] = empty{} for _, cont := range pod.Spec.Containers { desiredContainers[podContainer{podFullName, uid, cont.Name}] = empty{} } // Run the sync in an async manifest worker. kl.podWorkers.Run(podFullName, func() { if err := kl.syncPod(pod, dockerContainers); err != nil { glog.Errorf("Error syncing pod, skipping: %v", err) record.Eventf(pod, "failedSync", "Error syncing pod, skipping: %v", err) } }) } // Kill any containers we don't need. for _, container := range dockerContainers { // Don't kill containers that are in the desired pods. podFullName, uid, containerName, _ := dockertools.ParseDockerName(container.Names[0]) if _, found := desiredPods[uid]; found { // syncPod() will handle this one. continue } _, _, podAnnotations := ParsePodFullName(podFullName) if source := podAnnotations[ConfigSourceAnnotationKey]; !kl.sourceReady(source) { // If the source for this container is not ready, skip deletion, so that we don't accidentally // delete containers for sources that haven't reported yet. glog.V(4).Infof("Skipping delete of container (%q), source (%s) aren't ready yet.", podFullName, source) continue } pc := podContainer{podFullName, uid, containerName} if _, ok := desiredContainers[pc]; !ok { glog.V(1).Infof("Killing unwanted container %+v", pc) err = kl.killContainer(container) if err != nil { glog.Errorf("Error killing container %+v: %v", pc, err) } } } // Remove any orphaned pods. err = kl.cleanupOrphanedPods(pods) if err != nil { return err } // Remove any orphaned volumes. err = kl.cleanupOrphanedVolumes(pods) if err != nil { return err } return err }