func (p *podWorkers) managePodLoop(podUpdates <-chan workUpdate) { var minRuntimeCacheTime time.Time for newWork := range podUpdates { func() { defer p.checkForUpdates(newWork.pod.UID, newWork.updateCompleteFn) // We would like to have the state of the containers from at least // the moment when we finished the previous processing of that pod. if err := p.runtimeCache.ForceUpdateIfOlder(minRuntimeCacheTime); err != nil { glog.Errorf("Error updating the container runtime cache: %v", err) return } pods, err := p.runtimeCache.GetPods() if err != nil { glog.Errorf("Error getting pods while syncing pod: %v", err) return } err = p.syncPodFn(newWork.pod, newWork.mirrorPod, kubecontainer.Pods(pods).FindPodByID(newWork.pod.UID), newWork.updateType) if err != nil { glog.Errorf("Error syncing pod %s, skipping: %v", newWork.pod.UID, err) p.recorder.Eventf(newWork.pod, "FailedSync", "Error syncing pod, skipping: %v", err) return } minRuntimeCacheTime = time.Now() newWork.updateCompleteFn() }() } }
// runPod runs a single pod and wait until all containers are running. func (kl *Kubelet) runPod(pod *api.Pod, retryDelay time.Duration) error { delay := retryDelay retry := 0 for { pods, err := kl.containerRuntime.GetPods(false) if err != nil { return fmt.Errorf("failed to get kubelet pods: %v", err) } p := container.Pods(pods).FindPodByID(pod.UID) running, err := kl.isPodRunning(pod, p) if err != nil { return fmt.Errorf("failed to check pod status: %v", err) } if running { glog.Infof("pod %q containers running", pod.Name) return nil } glog.Infof("pod %q containers not running: syncing", pod.Name) // We don't create mirror pods in this mode; pass a dummy boolean value // to sycnPod. if err = kl.syncPod(pod, nil, p, kubetypes.SyncPodUpdate); err != nil { return fmt.Errorf("error syncing pod: %v", err) } if retry >= RunOnceMaxRetries { return fmt.Errorf("timeout error: pod %q containers not running after %d retries", pod.Name, RunOnceMaxRetries) } // TODO(proppy): health checking would be better than waiting + checking the state at the next iteration. glog.Infof("pod %q containers synced, waiting for %v", pod.Name, delay) time.Sleep(delay) retry++ delay *= RunOnceRetryDelayBackoff } }
// findContainer finds and returns the container with the given pod ID, full name, and container name. // It returns nil if not found. func (kl *Kubelet) findContainer(podFullName string, podUID types.UID, containerName string) (*kubecontainer.Container, error) { pods, err := kl.containerRuntime.GetPods(false) if err != nil { return nil, err } pod := kubecontainer.Pods(pods).FindPod(podFullName, podUID) return pod.FindContainerByName(containerName), nil }
func (f *fakePodWorkers) UpdatePod(pod *api.Pod, mirrorPod *api.Pod, updateComplete func()) { pods, err := f.runtimeCache.GetPods() if err != nil { f.t.Errorf("Unexpected error: %v", err) } if err := f.syncPodFn(pod, mirrorPod, kubecontainer.Pods(pods).FindPodByID(pod.UID), SyncPodUpdate); err != nil { f.t.Errorf("Unexpected error: %v", err) } }
// GetPodStatus currently invokes GetPods() to return the status. // TODO(yifan): Split the get status logic from GetPods(). func (r *runtime) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) { pods, err := r.GetPods(true) if err != nil { return nil, err } p := kubecontainer.Pods(pods).FindPodByID(pod.UID) if len(p.Containers) == 0 { return nil, fmt.Errorf("cannot find status for pod: %q", kubecontainer.BuildPodFullName(pod.Name, pod.Namespace)) } return &p.Status, nil }
// PortForward connects to the pod's port and copies data between the port // and the stream. func (kl *Kubelet) PortForward(podFullName string, podUID types.UID, port uint16, stream io.ReadWriteCloser) error { podUID = kl.podManager.TranslatePodUID(podUID) pods, err := kl.containerRuntime.GetPods(false) if err != nil { return err } pod := kubecontainer.Pods(pods).FindPod(podFullName, podUID) if pod.IsEmpty() { return fmt.Errorf("pod not found (%q)", podFullName) } return kl.runner.PortForward(&pod, port, stream) }
func (p *podWorkers) managePodLoop(podUpdates <-chan workUpdate) { var minRuntimeCacheTime time.Time for newWork := range podUpdates { err := func() (err error) { podID := newWork.pod.UID if p.podCache != nil { // This is a blocking call that would return only if the cache // has an entry for the pod that is newer than minRuntimeCache // Time. This ensures the worker doesn't start syncing until // after the cache is at least newer than the finished time of // the previous sync. // TODO: We don't consume the return PodStatus yet, but we // should pass it to syncPod() eventually. p.podCache.GetNewerThan(podID, minRuntimeCacheTime) } // TODO: Deprecate the runtime cache. // We would like to have the state of the containers from at least // the moment when we finished the previous processing of that pod. if err := p.runtimeCache.ForceUpdateIfOlder(minRuntimeCacheTime); err != nil { glog.Errorf("Error updating the container runtime cache: %v", err) return err } pods, err := p.runtimeCache.GetPods() if err != nil { glog.Errorf("Error getting pods while syncing pod: %v", err) return err } err = p.syncPodFn(newWork.pod, newWork.mirrorPod, kubecontainer.Pods(pods).FindPodByID(newWork.pod.UID), newWork.updateType) minRuntimeCacheTime = time.Now() if err != nil { glog.Errorf("Error syncing pod %s, skipping: %v", newWork.pod.UID, err) p.recorder.Eventf(newWork.pod, api.EventTypeWarning, kubecontainer.FailedSync, "Error syncing pod, skipping: %v", err) return err } newWork.updateCompleteFn() return nil }() p.wrapUp(newWork.pod.UID, err) } }
// runPod runs a single pod and wait until all containers are running. func (kl *Kubelet) runPod(pod *api.Pod, retryDelay time.Duration) error { delay := retryDelay retry := 0 for { pods, err := kl.containerRuntime.GetPods(false) if err != nil { return fmt.Errorf("failed to get kubelet pods: %v", err) } p := container.Pods(pods).FindPodByID(pod.UID) running, err := kl.isPodRunning(pod, p) if err != nil { return fmt.Errorf("failed to check pod status: %v", err) } if running { glog.Infof("pod %q containers running", pod.Name) return nil } glog.Infof("pod %q containers not running: syncing", pod.Name) status, err := kl.containerRuntime.GetPodStatus(pod.UID, pod.Name, pod.Namespace) if err != nil { glog.Errorf("Unable to get status for pod %q: %v", pod.Name, err) } glog.Infof("Creating a mirror pod for static pod %q", format.Pod(pod)) if err := kl.podManager.CreateMirrorPod(pod); err != nil { glog.Errorf("Failed creating a mirror pod %q: %v", format.Pod(pod), err) } mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod) if err = kl.syncPod(pod, mirrorPod, status, kubetypes.SyncPodUpdate); err != nil { return fmt.Errorf("error syncing pod: %v", err) } if retry >= runOnceMaxRetries { return fmt.Errorf("timeout error: pod %q containers not running after %d retries", pod.Name, runOnceMaxRetries) } // TODO(proppy): health checking would be better than waiting + checking the state at the next iteration. glog.Infof("pod %q containers synced, waiting for %v", pod.Name, delay) time.Sleep(delay) retry++ delay *= runOnceRetryDelayBackoff } }
// GetContainerInfo returns stats (from Cadvisor) for a container. func (kl *Kubelet) GetContainerInfo(podFullName string, podUID types.UID, containerName string, req *cadvisorapi.ContainerInfoRequest) (*cadvisorapi.ContainerInfo, error) { podUID = kl.podManager.TranslatePodUID(podUID) pods, err := kl.runtimeCache.GetPods() if err != nil { return nil, err } pod := kubecontainer.Pods(pods).FindPod(podFullName, podUID) container := pod.FindContainerByName(containerName) if container == nil { return nil, kubecontainer.ErrContainerNotFound } ci, err := kl.cadvisor.DockerContainer(container.ID.ID, req) if err != nil { return nil, err } return &ci, nil }
// GetContainerInfo returns stats (from Cadvisor) for a container. func (kl *Kubelet) GetContainerInfo(podFullName string, podUID types.UID, containerName string, req *cadvisorapi.ContainerInfoRequest) (*cadvisorapi.ContainerInfo, error) { cadvisorID := podFullName podUID = kl.podManager.TranslatePodUID(podUID) pods, err := kl.runtimeCache.GetPods() if err != nil { return nil, err } pod := kubecontainer.Pods(pods).FindPod(podFullName, podUID) // Get container stats if len(containerName) > 0 { container := pod.FindContainerByName(containerName) if container == nil { return nil, kubecontainer.ErrContainerNotFound } cadvisorID = container.ID.ID } var ci cadvisorapi.ContainerInfo switch kl.containerRuntime.Type() { case "docker": ci, err = kl.cadvisor.DockerContainer(cadvisorID, req) case "hyper": // TODO(feisky): Hyper container stats is not supported cadvisorID = podFullName ci, err = kl.cadvisor.HyperContainer(cadvisorID, req) default: err = fmt.Errorf("Container runtime %v not supported", kl.containerRuntime.Type()) } if err != nil { return nil, err } return &ci, nil }
// relist queries the container runtime for list of pods/containers, compare // with the internal pods/containers, and generats events accordingly. func (g *GenericPLEG) relist() { glog.V(5).Infof("GenericPLEG: Relisting") timestamp := time.Now() if !g.lastRelistTime.IsZero() { metrics.PLEGRelistInterval.Observe(metrics.SinceInMicroseconds(g.lastRelistTime)) } defer func() { // Update the relist time. g.lastRelistTime = timestamp metrics.PLEGRelistLatency.Observe(metrics.SinceInMicroseconds(timestamp)) }() // Get all the pods. podList, err := g.runtime.GetPods(true) if err != nil { glog.Errorf("GenericPLEG: Unable to retrieve pods: %v", err) return } pods := kubecontainer.Pods(podList) g.podRecords.setCurrent(pods) // Compare the old and the current pods, and generate events. eventsByPodID := map[types.UID][]*PodLifecycleEvent{} for pid := range g.podRecords { oldPod := g.podRecords.getOld(pid) pod := g.podRecords.getCurrent(pid) // Get all containers in the old and the new pod. allContainers := getContainersFromPods(oldPod, pod) for _, container := range allContainers { e := computeEvent(oldPod, pod, &container.ID) updateEvents(eventsByPodID, e) } } // If there are events associated with a pod, we should update the // podCache. for pid, events := range eventsByPodID { pod := g.podRecords.getCurrent(pid) if g.cacheEnabled() { // updateCache() will inspect the pod and update the cache. If an // error occurs during the inspection, we want PLEG to retry again // in the next relist. To achieve this, we do not update the // associated podRecord of the pod, so that the change will be // detect again in the next relist. // TODO: If many pods changed during the same relist period, // inspecting the pod and getting the PodStatus to update the cache // serially may take a while. We should be aware of this and // parallelize if needed. if err := g.updateCache(pod, pid); err != nil { glog.Errorf("PLEG: Ignoring events for pod %s/%s: %v", pod.Name, pod.Namespace, err) continue } } // Update the internal storage and send out the events. g.podRecords.update(pid) for i := range events { // Filter out events that are not reliable and no other components use yet. if events[i].Type == ContainerChanged || events[i].Type == ContainerRemoved { continue } g.eventChannel <- events[i] } } if g.cacheEnabled() { // Update the cache timestamp. This needs to happen *after* // all pods have been properly updated in the cache. g.cache.UpdateTime(timestamp) } }
// relist queries the container runtime for list of pods/containers, compare // with the internal pods/containers, and generats events accordingly. func (g *GenericPLEG) relist() { glog.V(5).Infof("GenericPLEG: Relisting") if lastRelistTime := g.getRelistTime(); !lastRelistTime.IsZero() { metrics.PLEGRelistInterval.Observe(metrics.SinceInMicroseconds(lastRelistTime)) } timestamp := g.clock.Now() // Update the relist time. g.updateRelisTime(timestamp) defer func() { metrics.PLEGRelistLatency.Observe(metrics.SinceInMicroseconds(timestamp)) }() // Get all the pods. podList, err := g.runtime.GetPods(true) if err != nil { glog.Errorf("GenericPLEG: Unable to retrieve pods: %v", err) return } pods := kubecontainer.Pods(podList) g.podRecords.setCurrent(pods) // Compare the old and the current pods, and generate events. eventsByPodID := map[types.UID][]*PodLifecycleEvent{} for pid := range g.podRecords { oldPod := g.podRecords.getOld(pid) pod := g.podRecords.getCurrent(pid) // Get all containers in the old and the new pod. allContainers := getContainersFromPods(oldPod, pod) for _, container := range allContainers { events := computeEvents(oldPod, pod, &container.ID) for _, e := range events { updateEvents(eventsByPodID, e) } } } var needsReinspection map[types.UID]*kubecontainer.Pod if g.cacheEnabled() { needsReinspection = make(map[types.UID]*kubecontainer.Pod) } // If there are events associated with a pod, we should update the // podCache. for pid, events := range eventsByPodID { pod := g.podRecords.getCurrent(pid) if g.cacheEnabled() { // updateCache() will inspect the pod and update the cache. If an // error occurs during the inspection, we want PLEG to retry again // in the next relist. To achieve this, we do not update the // associated podRecord of the pod, so that the change will be // detect again in the next relist. // TODO: If many pods changed during the same relist period, // inspecting the pod and getting the PodStatus to update the cache // serially may take a while. We should be aware of this and // parallelize if needed. if err := g.updateCache(pod, pid); err != nil { glog.Errorf("PLEG: Ignoring events for pod %s/%s: %v", pod.Name, pod.Namespace, err) // make sure we try to reinspect the pod during the next relisting needsReinspection[pid] = pod continue } else if _, found := g.podsToReinspect[pid]; found { // this pod was in the list to reinspect and we did so because it had events, so remove it // from the list (we don't want the reinspection code below to inspect it a second time in // this relist execution) delete(g.podsToReinspect, pid) } } // Update the internal storage and send out the events. g.podRecords.update(pid) for i := range events { // Filter out events that are not reliable and no other components use yet. if events[i].Type == ContainerChanged { continue } g.eventChannel <- events[i] } } if g.cacheEnabled() { // reinspect any pods that failed inspection during the previous relist if len(g.podsToReinspect) > 0 { glog.V(5).Infof("GenericPLEG: Reinspecting pods that previously failed inspection") for pid, pod := range g.podsToReinspect { if err := g.updateCache(pod, pid); err != nil { glog.Errorf("PLEG: pod %s/%s failed reinspection: %v", pod.Name, pod.Namespace, err) needsReinspection[pid] = pod } } } // Update the cache timestamp. This needs to happen *after* // all pods have been properly updated in the cache. g.cache.UpdateTime(timestamp) } // make sure we retain the list of pods that need reinspecting the next time relist is called g.podsToReinspect = needsReinspection }