Example #1
0
func (p *podWorkers) managePodLoop(podUpdates <-chan workUpdate) {
	var minRuntimeCacheTime time.Time
	for newWork := range podUpdates {
		func() {
			defer p.checkForUpdates(newWork.pod.UID, newWork.updateCompleteFn)
			// We would like to have the state of the containers from at least
			// the moment when we finished the previous processing of that pod.
			if err := p.runtimeCache.ForceUpdateIfOlder(minRuntimeCacheTime); err != nil {
				glog.Errorf("Error updating the container runtime cache: %v", err)
				return
			}
			pods, err := p.runtimeCache.GetPods()
			if err != nil {
				glog.Errorf("Error getting pods while syncing pod: %v", err)
				return
			}

			err = p.syncPodFn(newWork.pod, newWork.mirrorPod,
				kubecontainer.Pods(pods).FindPodByID(newWork.pod.UID), newWork.updateType)
			if err != nil {
				glog.Errorf("Error syncing pod %s, skipping: %v", newWork.pod.UID, err)
				p.recorder.Eventf(newWork.pod, "FailedSync", "Error syncing pod, skipping: %v", err)
				return
			}
			minRuntimeCacheTime = time.Now()

			newWork.updateCompleteFn()
		}()
	}
}
Example #2
0
// runPod runs a single pod and wait until all containers are running.
func (kl *Kubelet) runPod(pod *api.Pod, retryDelay time.Duration) error {
	delay := retryDelay
	retry := 0
	for {
		pods, err := kl.containerRuntime.GetPods(false)
		if err != nil {
			return fmt.Errorf("failed to get kubelet pods: %v", err)
		}
		p := container.Pods(pods).FindPodByID(pod.UID)
		running, err := kl.isPodRunning(pod, p)
		if err != nil {
			return fmt.Errorf("failed to check pod status: %v", err)
		}
		if running {
			glog.Infof("pod %q containers running", pod.Name)
			return nil
		}
		glog.Infof("pod %q containers not running: syncing", pod.Name)
		// We don't create mirror pods in this mode; pass a dummy boolean value
		// to sycnPod.
		if err = kl.syncPod(pod, nil, p, kubetypes.SyncPodUpdate); err != nil {
			return fmt.Errorf("error syncing pod: %v", err)
		}
		if retry >= RunOnceMaxRetries {
			return fmt.Errorf("timeout error: pod %q containers not running after %d retries", pod.Name, RunOnceMaxRetries)
		}
		// TODO(proppy): health checking would be better than waiting + checking the state at the next iteration.
		glog.Infof("pod %q containers synced, waiting for %v", pod.Name, delay)
		time.Sleep(delay)
		retry++
		delay *= RunOnceRetryDelayBackoff
	}
}
Example #3
0
// findContainer finds and returns the container with the given pod ID, full name, and container name.
// It returns nil if not found.
func (kl *Kubelet) findContainer(podFullName string, podUID types.UID, containerName string) (*kubecontainer.Container, error) {
	pods, err := kl.containerRuntime.GetPods(false)
	if err != nil {
		return nil, err
	}
	pod := kubecontainer.Pods(pods).FindPod(podFullName, podUID)
	return pod.FindContainerByName(containerName), nil
}
func (f *fakePodWorkers) UpdatePod(pod *api.Pod, mirrorPod *api.Pod, updateComplete func()) {
	pods, err := f.runtimeCache.GetPods()
	if err != nil {
		f.t.Errorf("Unexpected error: %v", err)
	}
	if err := f.syncPodFn(pod, mirrorPod, kubecontainer.Pods(pods).FindPodByID(pod.UID), SyncPodUpdate); err != nil {
		f.t.Errorf("Unexpected error: %v", err)
	}
}
Example #5
0
// GetPodStatus currently invokes GetPods() to return the status.
// TODO(yifan): Split the get status logic from GetPods().
func (r *runtime) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
	pods, err := r.GetPods(true)
	if err != nil {
		return nil, err
	}
	p := kubecontainer.Pods(pods).FindPodByID(pod.UID)
	if len(p.Containers) == 0 {
		return nil, fmt.Errorf("cannot find status for pod: %q", kubecontainer.BuildPodFullName(pod.Name, pod.Namespace))
	}
	return &p.Status, nil
}
Example #6
0
// PortForward connects to the pod's port and copies data between the port
// and the stream.
func (kl *Kubelet) PortForward(podFullName string, podUID types.UID, port uint16, stream io.ReadWriteCloser) error {
	podUID = kl.podManager.TranslatePodUID(podUID)

	pods, err := kl.containerRuntime.GetPods(false)
	if err != nil {
		return err
	}
	pod := kubecontainer.Pods(pods).FindPod(podFullName, podUID)
	if pod.IsEmpty() {
		return fmt.Errorf("pod not found (%q)", podFullName)
	}
	return kl.runner.PortForward(&pod, port, stream)
}
Example #7
0
func (p *podWorkers) managePodLoop(podUpdates <-chan workUpdate) {
	var minRuntimeCacheTime time.Time

	for newWork := range podUpdates {
		err := func() (err error) {
			podID := newWork.pod.UID
			if p.podCache != nil {
				// This is a blocking call that would return only if the cache
				// has an entry for the pod that is newer than minRuntimeCache
				// Time. This ensures the worker doesn't start syncing until
				// after the cache is at least newer than the finished time of
				// the previous sync.
				// TODO: We don't consume the return PodStatus yet, but we
				// should pass it to syncPod() eventually.
				p.podCache.GetNewerThan(podID, minRuntimeCacheTime)
			}
			// TODO: Deprecate the runtime cache.
			// We would like to have the state of the containers from at least
			// the moment when we finished the previous processing of that pod.
			if err := p.runtimeCache.ForceUpdateIfOlder(minRuntimeCacheTime); err != nil {
				glog.Errorf("Error updating the container runtime cache: %v", err)
				return err
			}
			pods, err := p.runtimeCache.GetPods()
			if err != nil {
				glog.Errorf("Error getting pods while syncing pod: %v", err)
				return err
			}

			err = p.syncPodFn(newWork.pod, newWork.mirrorPod,
				kubecontainer.Pods(pods).FindPodByID(newWork.pod.UID), newWork.updateType)
			minRuntimeCacheTime = time.Now()
			if err != nil {
				glog.Errorf("Error syncing pod %s, skipping: %v", newWork.pod.UID, err)
				p.recorder.Eventf(newWork.pod, api.EventTypeWarning, kubecontainer.FailedSync, "Error syncing pod, skipping: %v", err)
				return err
			}
			newWork.updateCompleteFn()
			return nil
		}()
		p.wrapUp(newWork.pod.UID, err)
	}
}
Example #8
0
// runPod runs a single pod and wait until all containers are running.
func (kl *Kubelet) runPod(pod *api.Pod, retryDelay time.Duration) error {
	delay := retryDelay
	retry := 0
	for {
		pods, err := kl.containerRuntime.GetPods(false)
		if err != nil {
			return fmt.Errorf("failed to get kubelet pods: %v", err)
		}
		p := container.Pods(pods).FindPodByID(pod.UID)
		running, err := kl.isPodRunning(pod, p)
		if err != nil {
			return fmt.Errorf("failed to check pod status: %v", err)
		}
		if running {
			glog.Infof("pod %q containers running", pod.Name)
			return nil
		}
		glog.Infof("pod %q containers not running: syncing", pod.Name)

		status, err := kl.containerRuntime.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
		if err != nil {
			glog.Errorf("Unable to get status for pod %q: %v", pod.Name, err)
		}
		glog.Infof("Creating a mirror pod for static pod %q", format.Pod(pod))
		if err := kl.podManager.CreateMirrorPod(pod); err != nil {
			glog.Errorf("Failed creating a mirror pod %q: %v", format.Pod(pod), err)
		}
		mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod)

		if err = kl.syncPod(pod, mirrorPod, status, kubetypes.SyncPodUpdate); err != nil {
			return fmt.Errorf("error syncing pod: %v", err)
		}
		if retry >= runOnceMaxRetries {
			return fmt.Errorf("timeout error: pod %q containers not running after %d retries", pod.Name, runOnceMaxRetries)
		}
		// TODO(proppy): health checking would be better than waiting + checking the state at the next iteration.
		glog.Infof("pod %q containers synced, waiting for %v", pod.Name, delay)
		time.Sleep(delay)
		retry++
		delay *= runOnceRetryDelayBackoff
	}
}
Example #9
0
// GetContainerInfo returns stats (from Cadvisor) for a container.
func (kl *Kubelet) GetContainerInfo(podFullName string, podUID types.UID, containerName string, req *cadvisorapi.ContainerInfoRequest) (*cadvisorapi.ContainerInfo, error) {

	podUID = kl.podManager.TranslatePodUID(podUID)

	pods, err := kl.runtimeCache.GetPods()
	if err != nil {
		return nil, err
	}
	pod := kubecontainer.Pods(pods).FindPod(podFullName, podUID)
	container := pod.FindContainerByName(containerName)
	if container == nil {
		return nil, kubecontainer.ErrContainerNotFound
	}

	ci, err := kl.cadvisor.DockerContainer(container.ID.ID, req)
	if err != nil {
		return nil, err
	}
	return &ci, nil
}
Example #10
0
// GetContainerInfo returns stats (from Cadvisor) for a container.
func (kl *Kubelet) GetContainerInfo(podFullName string, podUID types.UID, containerName string, req *cadvisorapi.ContainerInfoRequest) (*cadvisorapi.ContainerInfo, error) {
	cadvisorID := podFullName
	podUID = kl.podManager.TranslatePodUID(podUID)
	pods, err := kl.runtimeCache.GetPods()
	if err != nil {
		return nil, err
	}
	pod := kubecontainer.Pods(pods).FindPod(podFullName, podUID)

	// Get container stats
	if len(containerName) > 0 {
		container := pod.FindContainerByName(containerName)
		if container == nil {
			return nil, kubecontainer.ErrContainerNotFound
		}

		cadvisorID = container.ID.ID
	}

	var ci cadvisorapi.ContainerInfo
	switch kl.containerRuntime.Type() {
	case "docker":
		ci, err = kl.cadvisor.DockerContainer(cadvisorID, req)
	case "hyper":
		// TODO(feisky): Hyper container stats is not supported
		cadvisorID = podFullName
		ci, err = kl.cadvisor.HyperContainer(cadvisorID, req)
	default:
		err = fmt.Errorf("Container runtime %v not supported", kl.containerRuntime.Type())
	}

	if err != nil {
		return nil, err
	}

	return &ci, nil
}
Example #11
0
// relist queries the container runtime for list of pods/containers, compare
// with the internal pods/containers, and generats events accordingly.
func (g *GenericPLEG) relist() {
	glog.V(5).Infof("GenericPLEG: Relisting")
	timestamp := time.Now()

	if !g.lastRelistTime.IsZero() {
		metrics.PLEGRelistInterval.Observe(metrics.SinceInMicroseconds(g.lastRelistTime))
	}
	defer func() {
		// Update the relist time.
		g.lastRelistTime = timestamp
		metrics.PLEGRelistLatency.Observe(metrics.SinceInMicroseconds(timestamp))
	}()

	// Get all the pods.
	podList, err := g.runtime.GetPods(true)
	if err != nil {
		glog.Errorf("GenericPLEG: Unable to retrieve pods: %v", err)
		return
	}
	pods := kubecontainer.Pods(podList)
	g.podRecords.setCurrent(pods)

	// Compare the old and the current pods, and generate events.
	eventsByPodID := map[types.UID][]*PodLifecycleEvent{}
	for pid := range g.podRecords {
		oldPod := g.podRecords.getOld(pid)
		pod := g.podRecords.getCurrent(pid)
		// Get all containers in the old and the new pod.
		allContainers := getContainersFromPods(oldPod, pod)
		for _, container := range allContainers {
			e := computeEvent(oldPod, pod, &container.ID)
			updateEvents(eventsByPodID, e)
		}
	}

	// If there are events associated with a pod, we should update the
	// podCache.
	for pid, events := range eventsByPodID {
		pod := g.podRecords.getCurrent(pid)
		if g.cacheEnabled() {
			// updateCache() will inspect the pod and update the cache. If an
			// error occurs during the inspection, we want PLEG to retry again
			// in the next relist. To achieve this, we do not update the
			// associated podRecord of the pod, so that the change will be
			// detect again in the next relist.
			// TODO: If many pods changed during the same relist period,
			// inspecting the pod and getting the PodStatus to update the cache
			// serially may take a while. We should be aware of this and
			// parallelize if needed.
			if err := g.updateCache(pod, pid); err != nil {
				glog.Errorf("PLEG: Ignoring events for pod %s/%s: %v", pod.Name, pod.Namespace, err)
				continue
			}
		}
		// Update the internal storage and send out the events.
		g.podRecords.update(pid)
		for i := range events {
			// Filter out events that are not reliable and no other components use yet.
			if events[i].Type == ContainerChanged || events[i].Type == ContainerRemoved {
				continue
			}
			g.eventChannel <- events[i]
		}
	}

	if g.cacheEnabled() {
		// Update the cache timestamp.  This needs to happen *after*
		// all pods have been properly updated in the cache.
		g.cache.UpdateTime(timestamp)
	}
}
Example #12
0
// relist queries the container runtime for list of pods/containers, compare
// with the internal pods/containers, and generats events accordingly.
func (g *GenericPLEG) relist() {
	glog.V(5).Infof("GenericPLEG: Relisting")

	if lastRelistTime := g.getRelistTime(); !lastRelistTime.IsZero() {
		metrics.PLEGRelistInterval.Observe(metrics.SinceInMicroseconds(lastRelistTime))
	}

	timestamp := g.clock.Now()
	// Update the relist time.
	g.updateRelisTime(timestamp)
	defer func() {
		metrics.PLEGRelistLatency.Observe(metrics.SinceInMicroseconds(timestamp))
	}()

	// Get all the pods.
	podList, err := g.runtime.GetPods(true)
	if err != nil {
		glog.Errorf("GenericPLEG: Unable to retrieve pods: %v", err)
		return
	}
	pods := kubecontainer.Pods(podList)
	g.podRecords.setCurrent(pods)

	// Compare the old and the current pods, and generate events.
	eventsByPodID := map[types.UID][]*PodLifecycleEvent{}
	for pid := range g.podRecords {
		oldPod := g.podRecords.getOld(pid)
		pod := g.podRecords.getCurrent(pid)
		// Get all containers in the old and the new pod.
		allContainers := getContainersFromPods(oldPod, pod)
		for _, container := range allContainers {
			events := computeEvents(oldPod, pod, &container.ID)
			for _, e := range events {
				updateEvents(eventsByPodID, e)
			}
		}
	}

	var needsReinspection map[types.UID]*kubecontainer.Pod
	if g.cacheEnabled() {
		needsReinspection = make(map[types.UID]*kubecontainer.Pod)
	}

	// If there are events associated with a pod, we should update the
	// podCache.
	for pid, events := range eventsByPodID {
		pod := g.podRecords.getCurrent(pid)
		if g.cacheEnabled() {
			// updateCache() will inspect the pod and update the cache. If an
			// error occurs during the inspection, we want PLEG to retry again
			// in the next relist. To achieve this, we do not update the
			// associated podRecord of the pod, so that the change will be
			// detect again in the next relist.
			// TODO: If many pods changed during the same relist period,
			// inspecting the pod and getting the PodStatus to update the cache
			// serially may take a while. We should be aware of this and
			// parallelize if needed.
			if err := g.updateCache(pod, pid); err != nil {
				glog.Errorf("PLEG: Ignoring events for pod %s/%s: %v", pod.Name, pod.Namespace, err)

				// make sure we try to reinspect the pod during the next relisting
				needsReinspection[pid] = pod

				continue
			} else if _, found := g.podsToReinspect[pid]; found {
				// this pod was in the list to reinspect and we did so because it had events, so remove it
				// from the list (we don't want the reinspection code below to inspect it a second time in
				// this relist execution)
				delete(g.podsToReinspect, pid)
			}
		}
		// Update the internal storage and send out the events.
		g.podRecords.update(pid)
		for i := range events {
			// Filter out events that are not reliable and no other components use yet.
			if events[i].Type == ContainerChanged {
				continue
			}
			g.eventChannel <- events[i]
		}
	}

	if g.cacheEnabled() {
		// reinspect any pods that failed inspection during the previous relist
		if len(g.podsToReinspect) > 0 {
			glog.V(5).Infof("GenericPLEG: Reinspecting pods that previously failed inspection")
			for pid, pod := range g.podsToReinspect {
				if err := g.updateCache(pod, pid); err != nil {
					glog.Errorf("PLEG: pod %s/%s failed reinspection: %v", pod.Name, pod.Namespace, err)
					needsReinspection[pid] = pod
				}
			}
		}

		// Update the cache timestamp.  This needs to happen *after*
		// all pods have been properly updated in the cache.
		g.cache.UpdateTime(timestamp)
	}

	// make sure we retain the list of pods that need reinspecting the next time relist is called
	g.podsToReinspect = needsReinspection
}