// createPodSandbox creates a pod sandbox and returns (podSandBoxID, message, error). func (m *kubeGenericRuntimeManager) createPodSandbox(pod *v1.Pod, attempt uint32) (string, string, error) { podSandboxConfig, err := m.generatePodSandboxConfig(pod, attempt) if err != nil { message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err) glog.Error(message) return "", message, err } // Create pod logs directory err = m.osInterface.MkdirAll(podSandboxConfig.GetLogDirectory(), 0755) if err != nil { message := fmt.Sprintf("Create pod log directory for pod %q failed: %v", format.Pod(pod), err) glog.Errorf(message) return "", message, err } podSandBoxID, err := m.runtimeService.RunPodSandbox(podSandboxConfig) if err != nil { message := fmt.Sprintf("CreatePodSandbox for pod %q failed: %v", format.Pod(pod), err) glog.Error(message) return "", message, err } return podSandBoxID, "", nil }
// runOnce runs a given set of pods and returns their status. func (kl *Kubelet) runOnce(pods []*api.Pod, retryDelay time.Duration) (results []RunPodResult, err error) { ch := make(chan RunPodResult) admitted := []*api.Pod{} for _, pod := range pods { // Check if we can admit the pod. if ok, reason, message := kl.canAdmitPod(append(admitted, pod), pod); !ok { kl.rejectPod(pod, reason, message) } else { admitted = append(admitted, pod) } go func(pod *api.Pod) { err := kl.runPod(pod, retryDelay) ch <- RunPodResult{pod, err} }(pod) } glog.Infof("waiting for %d pods", len(pods)) failedPods := []string{} for i := 0; i < len(pods); i++ { res := <-ch results = append(results, res) if res.Err != nil { // TODO(proppy): report which containers failed the pod. glog.Infof("failed to start pod %q: %v", format.Pod(res.Pod), res.Err) failedPods = append(failedPods, format.Pod(res.Pod)) } else { glog.Infof("started pod %q", format.Pod(res.Pod)) } } if len(failedPods) > 0 { return results, fmt.Errorf("error running pods: %v", failedPods) } glog.Infof("%d pods started", len(pods)) return results, err }
func (m *manager) AddPod(pod *v1.Pod) { m.workerLock.Lock() defer m.workerLock.Unlock() key := probeKey{podUID: pod.UID} for _, c := range pod.Spec.Containers { key.containerName = c.Name if c.ReadinessProbe != nil { key.probeType = readiness if _, ok := m.workers[key]; ok { glog.Errorf("Readiness probe already exists! %v - %v", format.Pod(pod), c.Name) return } w := newWorker(m, readiness, pod, c) m.workers[key] = w go w.run() } if c.LivenessProbe != nil { key.probeType = liveness if _, ok := m.workers[key]; ok { glog.Errorf("Liveness probe already exists! %v - %v", format.Pod(pod), c.Name) return } w := newWorker(m, liveness, pod, c) m.workers[key] = w go w.run() } } }
// podSandboxChanged checks whether the spec of the pod is changed and returns // (changed, new attempt, original sandboxID if exist). func (m *kubeGenericRuntimeManager) podSandboxChanged(pod *api.Pod, podStatus *kubecontainer.PodStatus) (changed bool, attempt uint32, sandboxID string) { if len(podStatus.SandboxStatuses) == 0 { glog.V(2).Infof("No sandbox for pod %q can be found. Need to start a new one", format.Pod(pod)) return true, 0, "" } readySandboxCount := 0 for _, s := range podStatus.SandboxStatuses { if s.GetState() == runtimeApi.PodSandBoxState_READY { readySandboxCount++ } } // Needs to create a new sandbox when readySandboxCount > 1 or the ready sandbox is not the latest one. sandboxStatus := podStatus.SandboxStatuses[0] if readySandboxCount > 1 || sandboxStatus.GetState() != runtimeApi.PodSandBoxState_READY { glog.V(2).Infof("No ready sandbox for pod %q can be found. Need to start a new one", format.Pod(pod)) return true, sandboxStatus.Metadata.GetAttempt() + 1, sandboxStatus.GetId() } // Needs to create a new sandbox when network namespace changed. if sandboxStatus.Linux != nil && sandboxStatus.Linux.Namespaces.Options != nil && sandboxStatus.Linux.Namespaces.Options.GetHostNetwork() != kubecontainer.IsHostNetworkPod(pod) { glog.V(2).Infof("Sandbox for pod %q has changed. Need to start a new one", format.Pod(pod)) return true, sandboxStatus.Metadata.GetAttempt() + 1, "" } return false, sandboxStatus.Metadata.GetAttempt(), sandboxStatus.GetId() }
// updateStatusInternal updates the internal status cache, and queues an update to the api server if // necessary. Returns whether an update was triggered. // This method IS NOT THREAD SAFE and must be called from a locked function. func (m *manager) updateStatusInternal(pod *api.Pod, status api.PodStatus, forceUpdate bool) bool { var oldStatus api.PodStatus cachedStatus, isCached := m.podStatuses[pod.UID] if isCached { oldStatus = cachedStatus.status } else if mirrorPod, ok := m.podManager.GetMirrorPodByPod(pod); ok { oldStatus = mirrorPod.Status } else { oldStatus = pod.Status } // Set ReadyCondition.LastTransitionTime. if readyCondition := api.GetPodReadyCondition(status); readyCondition != nil { // Need to set LastTransitionTime. lastTransitionTime := unversioned.Now() oldReadyCondition := api.GetPodReadyCondition(oldStatus) if oldReadyCondition != nil && readyCondition.Status == oldReadyCondition.Status { lastTransitionTime = oldReadyCondition.LastTransitionTime } readyCondition.LastTransitionTime = lastTransitionTime } // ensure that the start time does not change across updates. if oldStatus.StartTime != nil && !oldStatus.StartTime.IsZero() { status.StartTime = oldStatus.StartTime } else if status.StartTime.IsZero() { // if the status has no start time, we need to set an initial time now := unversioned.Now() status.StartTime = &now } normalizeStatus(&status) // The intent here is to prevent concurrent updates to a pod's status from // clobbering each other so the phase of a pod progresses monotonically. if isCached && isStatusEqual(&cachedStatus.status, &status) && !forceUpdate { glog.V(3).Infof("Ignoring same status for pod %q, status: %+v", format.Pod(pod), status) return false // No new status. } newStatus := versionedPodStatus{ status: status, version: cachedStatus.version + 1, podName: pod.Name, podNamespace: pod.Namespace, } m.podStatuses[pod.UID] = newStatus select { case m.podStatusChannel <- podStatusSyncRequest{pod.UID, newStatus}: return true default: // Let the periodic syncBatch handle the update if the channel is full. // We can't block, since we hold the mutex lock. glog.V(4).Infof("Skpping the status update for pod %q for now because the channel is full; status: %+v", format.Pod(pod), status) return false } }
func (m *manager) SetContainerReadiness(pod *api.Pod, containerID kubecontainer.ContainerID, ready bool) { m.podStatusesLock.Lock() defer m.podStatusesLock.Unlock() oldStatus, found := m.podStatuses[pod.UID] if !found { glog.Warningf("Container readiness changed before pod has synced: %q - %q", format.Pod(pod), containerID.String()) return } // Find the container to update. containerIndex := -1 for i, c := range oldStatus.status.ContainerStatuses { if c.ContainerID == containerID.String() { containerIndex = i break } } if containerIndex == -1 { glog.Warningf("Container readiness changed for unknown container: %q - %q", format.Pod(pod), containerID.String()) return } if oldStatus.status.ContainerStatuses[containerIndex].Ready == ready { glog.V(4).Infof("Container readiness unchanged (%v): %q - %q", ready, format.Pod(pod), containerID.String()) return } // Make sure we're not updating the cached version. clone, err := api.Scheme.DeepCopy(&oldStatus.status) if err != nil { glog.Errorf("Failed to clone status %+v: %v", oldStatus.status, err) return } status := *clone.(*api.PodStatus) status.ContainerStatuses[containerIndex].Ready = ready // Update pod condition. readyConditionIndex := -1 for i, condition := range status.Conditions { if condition.Type == api.PodReady { readyConditionIndex = i break } } readyCondition := GeneratePodReadyCondition(&pod.Spec, status.ContainerStatuses, status.Phase) if readyConditionIndex != -1 { status.Conditions[readyConditionIndex] = readyCondition } else { glog.Warningf("PodStatus missing PodReady condition: %+v", status) status.Conditions = append(status.Conditions, readyCondition) } m.updateStatusInternal(pod, status) }
// Iterate through all pods in desired state of world, and remove if they no // longer exist func (dswp *desiredStateOfWorldPopulator) findAndRemoveDeletedPods() { var runningPods []*kubecontainer.Pod runningPodsFetched := false for _, volumeToMount := range dswp.desiredStateOfWorld.GetVolumesToMount() { if _, podExists := dswp.podManager.GetPodByUID(volumeToMount.Pod.UID); podExists { continue } // Once a pod has been deleted from kubelet pod manager, do not delete // it immediately from volume manager. Instead, check the kubelet // containerRuntime to verify that all containers in the pod have been // terminated. if !runningPodsFetched { var getPodsErr error runningPods, getPodsErr = dswp.kubeContainerRuntime.GetPods(false) if getPodsErr != nil { glog.Errorf( "kubeContainerRuntime.findAndRemoveDeletedPods returned error %v.", getPodsErr) continue } runningPodsFetched = true dswp.timeOfLastGetPodStatus = time.Now() } runningContainers := false for _, runningPod := range runningPods { if runningPod.ID == volumeToMount.Pod.UID { if len(runningPod.Containers) > 0 { runningContainers = true } break } } if runningContainers { glog.V(5).Infof( "Pod %q has been removed from pod manager. However, it still has one or more containers in the non-exited state. Therefore it will not be removed from volume manager.", format.Pod(volumeToMount.Pod)) continue } glog.V(5).Infof( "Removing volume %q (volSpec=%q) for pod %q from desired state.", volumeToMount.VolumeName, volumeToMount.VolumeSpec.Name(), format.Pod(volumeToMount.Pod)) dswp.desiredStateOfWorld.DeletePodFromVolume( volumeToMount.PodName, volumeToMount.VolumeName) dswp.deleteProcessedPod(volumeToMount.PodName) } }
func (m *manager) SetContainerReadiness(podUID types.UID, containerID kubecontainer.ContainerID, ready bool) { m.podStatusesLock.Lock() defer m.podStatusesLock.Unlock() pod, ok := m.podManager.GetPodByUID(podUID) if !ok { glog.V(4).Infof("Pod %q has been deleted, no need to update readiness", string(podUID)) return } oldStatus, found := m.podStatuses[pod.UID] if !found { glog.Warningf("Container readiness changed before pod has synced: %q - %q", format.Pod(pod), containerID.String()) return } // Find the container to update. containerStatus, _, ok := findContainerStatus(&oldStatus.status, containerID.String()) if !ok { glog.Warningf("Container readiness changed for unknown container: %q - %q", format.Pod(pod), containerID.String()) return } if containerStatus.Ready == ready { glog.V(4).Infof("Container readiness unchanged (%v): %q - %q", ready, format.Pod(pod), containerID.String()) return } // Make sure we're not updating the cached version. status, err := copyStatus(&oldStatus.status) if err != nil { return } containerStatus, _, _ = findContainerStatus(&status, containerID.String()) containerStatus.Ready = ready // Update pod condition. readyConditionIndex := -1 for i, condition := range status.Conditions { if condition.Type == api.PodReady { readyConditionIndex = i break } } readyCondition := GeneratePodReadyCondition(&pod.Spec, status.ContainerStatuses, status.Phase) if readyConditionIndex != -1 { status.Conditions[readyConditionIndex] = readyCondition } else { glog.Warningf("PodStatus missing PodReady condition: %+v", status) status.Conditions = append(status.Conditions, readyCondition) } m.updateStatusInternal(pod, status, false) }
// syncPod syncs the given status with the API server. The caller must not hold the lock. func (m *manager) syncPod(uid types.UID, status versionedPodStatus) { if !m.needsUpdate(uid, status) { glog.V(1).Infof("Status for pod %q is up-to-date; skipping", uid) return } // TODO: make me easier to express from client code pod, err := m.kubeClient.Core().Pods(status.podNamespace).Get(status.podName, metav1.GetOptions{}) if errors.IsNotFound(err) { glog.V(3).Infof("Pod %q (%s) does not exist on the server", status.podName, uid) // If the Pod is deleted the status will be cleared in // RemoveOrphanedStatuses, so we just ignore the update here. return } if err == nil { translatedUID := m.podManager.TranslatePodUID(pod.UID) if len(translatedUID) > 0 && translatedUID != uid { glog.V(2).Infof("Pod %q was deleted and then recreated, skipping status update; old UID %q, new UID %q", format.Pod(pod), uid, translatedUID) m.deletePodStatus(uid) return } pod.Status = status.status if err := podutil.SetInitContainersStatusesAnnotations(pod); err != nil { glog.Error(err) } // TODO: handle conflict as a retry, make that easier too. pod, err = m.kubeClient.Core().Pods(pod.Namespace).UpdateStatus(pod) if err == nil { glog.V(3).Infof("Status for pod %q updated successfully: %+v", format.Pod(pod), status) m.apiStatusVersions[pod.UID] = status.version if kubepod.IsMirrorPod(pod) { // We don't handle graceful deletion of mirror pods. return } if pod.DeletionTimestamp == nil { return } if !notRunning(pod.Status.ContainerStatuses) { glog.V(3).Infof("Pod %q is terminated, but some containers are still running", format.Pod(pod)) return } deleteOptions := v1.NewDeleteOptions(0) // Use the pod UID as the precondition for deletion to prevent deleting a newly created pod with the same name and namespace. deleteOptions.Preconditions = v1.NewUIDPreconditions(string(pod.UID)) if err = m.kubeClient.Core().Pods(pod.Namespace).Delete(pod.Name, deleteOptions); err == nil { glog.V(3).Infof("Pod %q fully terminated and removed from etcd", format.Pod(pod)) m.deletePodStatus(uid) return } } } // We failed to update status, wait for periodic sync to retry. glog.Warningf("Failed to update status for pod %q: %v", format.Pod(pod), err) }
// checkHostPortConflicts detects pods with conflicted host ports. func hasHostPortConflicts(pods []*api.Pod) bool { ports := sets.String{} for _, pod := range pods { if errs := validation.AccumulateUniqueHostPorts(pod.Spec.Containers, &ports, field.NewPath("spec", "containers")); len(errs) > 0 { glog.Errorf("Pod %q: HostPort is already allocated, ignoring: %v", format.Pod(pod), errs) return true } if errs := validation.AccumulateUniqueHostPorts(pod.Spec.InitContainers, &ports, field.NewPath("spec", "initContainers")); len(errs) > 0 { glog.Errorf("Pod %q: HostPort is already allocated, ignoring: %v", format.Pod(pod), errs) return true } } return false }
// RunPod first creates the unit file for a pod, and then // starts the unit over d-bus. func (r *Runtime) RunPod(pod *api.Pod, pullSecrets []api.Secret) error { glog.V(4).Infof("Rkt starts to run pod: name %q.", format.Pod(pod)) name, runtimePod, prepareErr := r.preparePod(pod, pullSecrets) // Set container references and generate events. // If preparedPod fails, then send out 'failed' events for each container. // Otherwise, store the container references so we can use them later to send events. for i, c := range pod.Spec.Containers { ref, err := kubecontainer.GenerateContainerRef(pod, &c) if err != nil { glog.Errorf("Couldn't make a ref to pod %q, container %v: '%v'", format.Pod(pod), c.Name, err) continue } if prepareErr != nil { r.recorder.Eventf(ref, api.EventTypeWarning, kubecontainer.FailedToCreateContainer, "Failed to create rkt container with error: %v", prepareErr) continue } containerID := runtimePod.Containers[i].ID r.containerRefManager.SetRef(containerID, ref) } if prepareErr != nil { return prepareErr } r.generateEvents(runtimePod, "Created", nil) // RestartUnit has the same effect as StartUnit if the unit is not running, besides it can restart // a unit if the unit file is changed and reloaded. reschan := make(chan string) _, err := r.systemd.RestartUnit(name, "replace", reschan) if err != nil { r.generateEvents(runtimePod, "Failed", err) return err } res := <-reschan if res != "done" { err := fmt.Errorf("Failed to restart unit %q: %s", name, res) r.generateEvents(runtimePod, "Failed", err) return err } r.generateEvents(runtimePod, "Started", nil) return nil }
// Admit rejects a pod if its not safe to admit for node stability. func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult { m.RLock() defer m.RUnlock() if len(m.nodeConditions) == 0 { return lifecycle.PodAdmitResult{Admit: true} } // Check the node conditions to identify the resource under pressure. // The resource can only be either disk or memory; set the default to disk. resource := api.ResourceStorage if hasNodeCondition(m.nodeConditions, api.NodeMemoryPressure) { resource = api.ResourceMemory // the node has memory pressure, admit if not best-effort notBestEffort := qos.BestEffort != qos.GetPodQOS(attrs.Pod) if notBestEffort { return lifecycle.PodAdmitResult{Admit: true} } } // reject pods when under memory pressure (if pod is best effort), or if under disk pressure. glog.Warningf("Failed to admit pod %q - node has conditions: %v", format.Pod(attrs.Pod), m.nodeConditions) return lifecycle.PodAdmitResult{ Admit: false, Reason: reason, Message: getMessage(resource), } }
// TODO(random-liu): This should be removed soon after rkt implements GetPodStatus. func ShouldContainerBeRestartedOldVersion(container *api.Container, pod *api.Pod, podStatus *api.PodStatus) bool { // Get all dead container status. var resultStatus []*api.ContainerStatus for i, containerStatus := range podStatus.ContainerStatuses { if containerStatus.Name == container.Name && containerStatus.State.Terminated != nil { resultStatus = append(resultStatus, &podStatus.ContainerStatuses[i]) } } // Check RestartPolicy for dead container. if len(resultStatus) > 0 { if pod.Spec.RestartPolicy == api.RestartPolicyNever { glog.V(4).Infof("Already ran container %q of pod %q, do nothing", container.Name, format.Pod(pod)) return false } if pod.Spec.RestartPolicy == api.RestartPolicyOnFailure { // Check the exit code of last run. Note: This assumes the result is sorted // by the created time in reverse order. if resultStatus[0].State.Terminated.ExitCode == 0 { glog.V(4).Infof("Already successfully ran container %q of pod %q, do nothing", container.Name, format.Pod(pod)) return false } } } return true }
// updateStatusInternal updates the internal status cache, and queues an update to the api server if // necessary. Returns whether an update was triggered. // This method IS NOT THREAD SAFE and must be called from a locked function. func (m *manager) updateStatusInternal(pod *api.Pod, status api.PodStatus) bool { // The intent here is to prevent concurrent updates to a pod's status from // clobbering each other so the phase of a pod progresses monotonically. oldStatus, found := m.podStatuses[pod.UID] if found && isStatusEqual(&oldStatus.status, &status) && pod.DeletionTimestamp == nil { glog.V(3).Infof("Ignoring same status for pod %q, status: %+v", format.Pod(pod), status) return false // No new status. } newStatus := versionedPodStatus{ status: status, version: oldStatus.version + 1, podName: pod.Name, podNamespace: pod.Namespace, } m.podStatuses[pod.UID] = newStatus select { case m.podStatusChannel <- podStatusSyncRequest{pod.UID, newStatus}: return true default: // Let the periodic syncBatch handle the update if the channel is full. // We can't block, since we hold the mutex lock. return false } }
// needsReconcile compares the given status with the status in the pod manager (which // in fact comes from apiserver), returns whether the status needs to be reconciled with // the apiserver. Now when pod status is inconsistent between apiserver and kubelet, // kubelet should forcibly send an update to reconclie the inconsistence, because kubelet // should be the source of truth of pod status. // NOTE(random-liu): It's simpler to pass in mirror pod uid and get mirror pod by uid, but // now the pod manager only supports getting mirror pod by static pod, so we have to pass // static pod uid here. // TODO(random-liu): Simplify the logic when mirror pod manager is added. func (m *manager) needsReconcile(uid types.UID, status api.PodStatus) bool { // The pod could be a static pod, so we should translate first. pod, ok := m.podManager.GetPodByUID(uid) if !ok { // Although we get uid from pod manager in syncBatch, it still could be deleted before here. glog.V(4).Infof("Pod %q has been deleted, no need to reconcile", format.Pod(pod)) return false } // If the pod is a static pod, we should check its mirror pod, because only status in mirror pod is meaningful to us. if kubepod.IsStaticPod(pod) { mirrorPod, ok := m.podManager.GetMirrorPodByPod(pod) if !ok { glog.V(4).Infof("Static pod %q has no corresponding mirror pod, no need to reconcile", format.Pod(pod)) return false } pod = mirrorPod } if isStatusEqual(&pod.Status, &status) { // If the status from the source is the same with the cached status, // reconcile is not needed. Just return. return false } glog.V(3).Infof("Pod status is inconsistent with cached status, a reconciliation should be triggered:\n %+v", util.ObjectDiff(pod.Status, status)) return true }
func (p *podWorkers) managePodLoop(podUpdates <-chan UpdatePodOptions) { var lastSyncTime time.Time for update := range podUpdates { err := func() error { podUID := update.Pod.UID // This is a blocking call that would return only if the cache // has an entry for the pod that is newer than minRuntimeCache // Time. This ensures the worker doesn't start syncing until // after the cache is at least newer than the finished time of // the previous sync. status, err := p.podCache.GetNewerThan(podUID, lastSyncTime) if err != nil { return err } err = p.syncPodFn(syncPodOptions{ mirrorPod: update.MirrorPod, pod: update.Pod, podStatus: status, killPodOptions: update.KillPodOptions, updateType: update.UpdateType, }) lastSyncTime = time.Now() return err }() // notify the call-back function if the operation succeeded or not if update.OnCompleteFunc != nil { update.OnCompleteFunc(err) } if err != nil { glog.Errorf("Error syncing pod %s (%q), skipping: %v", update.Pod.UID, format.Pod(update.Pod), err) p.recorder.Eventf(update.Pod, v1.EventTypeWarning, events.FailedSync, "Error syncing pod, skipping: %v", err) } p.wrapUp(update.Pod.UID, err) } }
// update ready status of all pods running on given node from master // return true if success func (nc *NodeController) markAllPodsNotReady(nodeName string) error { glog.V(2).Infof("Update ready status of pods on node [%v]", nodeName) opts := api.ListOptions{FieldSelector: fields.OneTermEqualSelector(client.PodHost, nodeName)} pods, err := nc.kubeClient.Core().Pods(api.NamespaceAll).List(opts) if err != nil { return err } errMsg := []string{} for _, pod := range pods.Items { // Defensive check, also needed for tests. if pod.Spec.NodeName != nodeName { continue } for i, cond := range pod.Status.Conditions { if cond.Type == api.PodReady { pod.Status.Conditions[i].Status = api.ConditionFalse glog.V(2).Infof("Updating ready status of pod %v to false", pod.Name) pod, err := nc.kubeClient.Core().Pods(pod.Namespace).UpdateStatus(&pod) if err != nil { glog.Warningf("Failed to updated status for pod %q: %v", format.Pod(pod), err) errMsg = append(errMsg, fmt.Sprintf("%v", err)) } break } } } if len(errMsg) == 0 { return nil } return fmt.Errorf("%v", strings.Join(errMsg, "; ")) }
// ShouldContainerBeRestarted checks whether a container needs to be restarted. // TODO(yifan): Think about how to refactor this. func ShouldContainerBeRestarted(container *api.Container, pod *api.Pod, podStatus *PodStatus) bool { // Get latest container status. status := podStatus.FindContainerStatusByName(container.Name) // If the container was never started before, we should start it. // NOTE(random-liu): If all historical containers were GC'd, we'll also return true here. if status == nil { return true } // Check whether container is running if status.State == ContainerStateRunning { return false } // Always restart container in unknown state now if status.State == ContainerStateUnknown { return true } // Check RestartPolicy for dead container if pod.Spec.RestartPolicy == api.RestartPolicyNever { glog.V(4).Infof("Already ran container %q of pod %q, do nothing", container.Name, format.Pod(pod)) return false } if pod.Spec.RestartPolicy == api.RestartPolicyOnFailure { // Check the exit code. if status.ExitCode == 0 { glog.V(4).Infof("Already successfully ran container %q of pod %q, do nothing", container.Name, format.Pod(pod)) return false } } return true }
// GetPodStatus retrieves the status of the pod, including the // information of all containers in the pod that are visble in Runtime. func (m *kubeGenericRuntimeManager) GetPodStatus(uid kubetypes.UID, name, namespace string) (*kubecontainer.PodStatus, error) { // Now we retain restart count of container as a container label. Each time a container // restarts, pod will read the restart count from the registered dead container, increment // it to get the new restart count, and then add a label with the new restart count on // the newly started container. // However, there are some limitations of this method: // 1. When all dead containers were garbage collected, the container status could // not get the historical value and would be *inaccurate*. Fortunately, the chance // is really slim. // 2. When working with old version containers which have no restart count label, // we can only assume their restart count is 0. // Anyhow, we only promised "best-effort" restart count reporting, we can just ignore // these limitations now. // TODO: move this comment to SyncPod. podSandboxIDs, err := m.getSandboxIDByPodUID(string(uid), nil) if err != nil { return nil, err } podFullName := format.Pod(&api.Pod{ ObjectMeta: api.ObjectMeta{ Name: name, Namespace: namespace, UID: uid, }, }) glog.V(4).Infof("getSandboxIDByPodUID got sandbox IDs %q for pod %q", podSandboxIDs, podFullName) sandboxStatuses := make([]*runtimeApi.PodSandboxStatus, len(podSandboxIDs)) podIP := "" for idx, podSandboxID := range podSandboxIDs { podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID) if err != nil { glog.Errorf("PodSandboxStatus of sandbox %q for pod %q error: %v", podSandboxID, podFullName, err) return nil, err } sandboxStatuses[idx] = podSandboxStatus // Only get pod IP from latest sandbox if idx == 0 && podSandboxStatus.GetState() == runtimeApi.PodSandBoxState_READY { podIP = m.determinePodSandboxIP(namespace, name, podSandboxStatus) } } // Get statuses of all containers visible in the pod. containerStatuses, err := m.getPodContainerStatuses(uid, name, namespace) if err != nil { glog.Errorf("getPodContainerStatuses for pod %q failed: %v", podFullName, err) return nil, err } return &kubecontainer.PodStatus{ ID: uid, Name: name, Namespace: namespace, IP: podIP, SandboxStatuses: sandboxStatuses, ContainerStatuses: containerStatuses, }, nil }
func (pb *prober) runProbe(p *api.Probe, pod *api.Pod, status api.PodStatus, container api.Container, containerID kubecontainer.ContainerID) (probe.Result, string, error) { timeout := time.Duration(p.TimeoutSeconds) * time.Second if p.Exec != nil { glog.V(4).Infof("Exec-Probe Pod: %v, Container: %v, Command: %v", pod, container, p.Exec.Command) return pb.exec.Probe(pb.newExecInContainer(container, containerID, p.Exec.Command)) } if p.HTTPGet != nil { scheme := strings.ToLower(string(p.HTTPGet.Scheme)) host := p.HTTPGet.Host if host == "" { host = status.PodIP } port, err := extractPort(p.HTTPGet.Port, container) if err != nil { return probe.Unknown, "", err } path := p.HTTPGet.Path glog.V(4).Infof("HTTP-Probe Host: %v://%v, Port: %v, Path: %v", scheme, host, port, path) url := formatURL(scheme, host, port, path) headers := buildHeader(p.HTTPGet.HTTPHeaders) glog.V(4).Infof("HTTP-Probe Headers: %v", headers) return pb.http.Probe(url, headers, timeout) } if p.TCPSocket != nil { port, err := extractPort(p.TCPSocket.Port, container) if err != nil { return probe.Unknown, "", err } glog.V(4).Infof("TCP-Probe PodIP: %v, Port: %v, Timeout: %v", status.PodIP, port, timeout) return pb.tcp.Probe(status.PodIP, port, timeout) } glog.Warningf("Failed to find probe builder for container: %v", container) return probe.Unknown, "", fmt.Errorf("Missing probe handler for %s:%s", format.Pod(pod), container.Name) }
// If a container is still in backoff, the function will return a brief backoff error and // a detailed error message. func (m *kubeGenericRuntimeManager) doBackOff(pod *api.Pod, container *api.Container, podStatus *kubecontainer.PodStatus, backOff *flowcontrol.Backoff) (bool, string, error) { var cStatus *kubecontainer.ContainerStatus for _, c := range podStatus.ContainerStatuses { if c.Name == container.Name && c.State == kubecontainer.ContainerStateExited { cStatus = c break } } if cStatus == nil { return false, "", nil } glog.Infof("checking backoff for container %q in pod %q", container.Name, format.Pod(pod)) // Use the finished time of the latest exited container as the start point to calculate whether to do back-off. ts := cStatus.FinishedAt // backOff requires a unique key to identify the container. key := getStableKey(pod, container) if backOff.IsInBackOffSince(key, ts) { if ref, err := kubecontainer.GenerateContainerRef(pod, container); err == nil { m.recorder.Eventf(ref, api.EventTypeWarning, events.BackOffStartContainer, "Back-off restarting failed container") } err := fmt.Errorf("Back-off %s restarting failed container=%s pod=%s", backOff.Get(key), container.Name, format.Pod(pod)) glog.Infof("%s", err.Error()) return true, err.Error(), kubecontainer.ErrCrashLoopBackOff } backOff.Next(key, ts) return false, "", nil }
// createPodSandbox creates a pod sandbox and returns (podSandBoxID, message, error). func (m *kubeGenericRuntimeManager) createPodSandbox(pod *api.Pod, attempt uint32) (string, string, error) { podSandboxConfig, err := m.generatePodSandboxConfig(pod, attempt) if err != nil { message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err) glog.Error(message) return "", message, err } podSandBoxID, err := m.runtimeService.RunPodSandbox(podSandboxConfig) if err != nil { message := fmt.Sprintf("CreatePodSandbox for pod %q failed: %v", format.Pod(pod), err) glog.Error(message) return "", message, err } return podSandBoxID, "", nil }
// GetContainerLogs uses rkt's GetLogs API to get the logs of the container. // By default, it returns a snapshot of the container log. Set |follow| to true to // stream the log. Set |follow| to false and specify the number of lines (e.g. // "100" or "all") to tail the log. // // TODO(yifan): This doesn't work with lkvm stage1 yet. func (r *Runtime) GetContainerLogs(pod *v1.Pod, containerID kubecontainer.ContainerID, logOptions *v1.PodLogOptions, stdout, stderr io.Writer) error { id, err := parseContainerID(containerID) if err != nil { return err } var since int64 if logOptions.SinceSeconds != nil { t := metav1.Now().Add(-time.Duration(*logOptions.SinceSeconds) * time.Second) since = t.Unix() } if logOptions.SinceTime != nil { since = logOptions.SinceTime.Unix() } getLogsRequest := &rktapi.GetLogsRequest{ PodId: id.uuid, AppName: id.appName, Follow: logOptions.Follow, SinceTime: since, } if logOptions.TailLines != nil { getLogsRequest.Lines = int32(*logOptions.TailLines) } stream, err := r.apisvc.GetLogs(context.Background(), getLogsRequest) if err != nil { glog.Errorf("rkt: Failed to create log stream for pod %q: %v", format.Pod(pod), err) return err } for { log, err := stream.Recv() if err == io.EOF { break } if err != nil { glog.Errorf("rkt: Failed to receive log for pod %q: %v", format.Pod(pod), err) return err } processLines(log.Lines, logOptions, stdout, stderr) } return nil }
// runPod runs a single pod and wait until all containers are running. func (kl *Kubelet) runPod(pod *api.Pod, retryDelay time.Duration) error { delay := retryDelay retry := 0 for { status, err := kl.containerRuntime.GetPodStatus(pod.UID, pod.Name, pod.Namespace) if err != nil { return fmt.Errorf("Unable to get status for pod %q: %v", format.Pod(pod), err) } if kl.isPodRunning(pod, status) { glog.Infof("pod %q containers running", format.Pod(pod)) return nil } glog.Infof("pod %q containers not running: syncing", format.Pod(pod)) glog.Infof("Creating a mirror pod for static pod %q", format.Pod(pod)) if err := kl.podManager.CreateMirrorPod(pod); err != nil { glog.Errorf("Failed creating a mirror pod %q: %v", format.Pod(pod), err) } mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod) if err = kl.syncPod(pod, mirrorPod, status, kubetypes.SyncPodUpdate); err != nil { return fmt.Errorf("error syncing pod %q: %v", format.Pod(pod), err) } if retry >= runOnceMaxRetries { return fmt.Errorf("timeout error: pod %q containers not running after %d retries", format.Pod(pod), runOnceMaxRetries) } // TODO(proppy): health checking would be better than waiting + checking the state at the next iteration. glog.Infof("pod %q containers synced, waiting for %v", format.Pod(pod), delay) time.Sleep(delay) retry++ delay *= runOnceRetryDelayBackoff } }
// isPodRunning returns true if all containers of a manifest are running. func (kl *Kubelet) isPodRunning(pod *api.Pod, status *kubecontainer.PodStatus) bool { for _, c := range pod.Spec.Containers { cs := status.FindContainerStatusByName(c.Name) if cs == nil || cs.State != kubecontainer.ContainerStateRunning { glog.Infof("Container %q for pod %q not running", c.Name, format.Pod(pod)) return false } } return true }
// deletePods will delete all pods from master running on given node, and return true // if any pods were deleted, or were found pending deletion. func deletePods(kubeClient clientset.Interface, recorder record.EventRecorder, nodeName, nodeUID string, daemonStore cache.StoreToDaemonSetLister) (bool, error) { remaining := false selector := fields.OneTermEqualSelector(api.PodHostField, nodeName) options := api.ListOptions{FieldSelector: selector} pods, err := kubeClient.Core().Pods(api.NamespaceAll).List(options) var updateErrList []error if err != nil { return remaining, err } if len(pods.Items) > 0 { recordNodeEvent(recorder, nodeName, nodeUID, api.EventTypeNormal, "DeletingAllPods", fmt.Sprintf("Deleting all Pods from Node %v.", nodeName)) } for _, pod := range pods.Items { // Defensive check, also needed for tests. if pod.Spec.NodeName != nodeName { continue } // Set reason and message in the pod object. if _, err = setPodTerminationReason(kubeClient, &pod, nodeName); err != nil { if errors.IsConflict(err) { updateErrList = append(updateErrList, fmt.Errorf("update status failed for pod %q: %v", format.Pod(&pod), err)) continue } } // if the pod has already been marked for deletion, we still return true that there are remaining pods. if pod.DeletionGracePeriodSeconds != nil { remaining = true continue } // if the pod is managed by a daemonset, ignore it _, err := daemonStore.GetPodDaemonSets(&pod) if err == nil { // No error means at least one daemonset was found continue } glog.V(2).Infof("Starting deletion of pod %v", pod.Name) recorder.Eventf(&pod, api.EventTypeNormal, "NodeControllerEviction", "Marking for deletion Pod %s from Node %s", pod.Name, nodeName) if err := kubeClient.Core().Pods(pod.Namespace).Delete(pod.Name, nil); err != nil { return false, err } remaining = true } if len(updateErrList) > 0 { return false, utilerrors.NewAggregate(updateErrList) } return remaining, nil }
func (vm *volumeManager) WaitForAttachAndMount(pod *v1.Pod) error { expectedVolumes := getExpectedVolumes(pod) if len(expectedVolumes) == 0 { // No volumes to verify return nil } glog.V(3).Infof("Waiting for volumes to attach and mount for pod %q", format.Pod(pod)) uniquePodName := volumehelper.GetUniquePodName(pod) // Some pods expect to have Setup called over and over again to update. // Remount plugins for which this is true. (Atomically updating volumes, // like Downward API, depend on this to update the contents of the volume). vm.desiredStateOfWorldPopulator.ReprocessPod(uniquePodName) vm.actualStateOfWorld.MarkRemountRequired(uniquePodName) err := wait.Poll( podAttachAndMountRetryInterval, podAttachAndMountTimeout, vm.verifyVolumesMountedFunc(uniquePodName, expectedVolumes)) if err != nil { // Timeout expired unmountedVolumes := vm.getUnmountedVolumes(uniquePodName, expectedVolumes) if len(unmountedVolumes) == 0 { return nil } return fmt.Errorf( "timeout expired waiting for volumes to attach/mount for pod %q/%q. list of unattached/unmounted volumes=%v", pod.Namespace, pod.Name, unmountedVolumes) } glog.V(3).Infof("All volumes are attached and mounted for pod %q", format.Pod(pod)) return nil }
func (m *manager) SetContainerReadiness(pod *api.Pod, containerID kubecontainer.ContainerID, ready bool) { m.podStatusesLock.Lock() defer m.podStatusesLock.Unlock() oldStatus, found := m.podStatuses[pod.UID] if !found { glog.Warningf("Container readiness changed before pod has synced: %q - %q", format.Pod(pod), containerID.String()) return } status := oldStatus.status // Find the container to update. containerIndex := -1 for i, c := range status.ContainerStatuses { if c.ContainerID == containerID.String() { containerIndex = i break } } if containerIndex == -1 { glog.Warningf("Container readiness changed for unknown container: %q - %q", format.Pod(pod), containerID.String()) return } if status.ContainerStatuses[containerIndex].Ready == ready { glog.V(4).Infof("Container readiness unchanged (%v): %q - %q", ready, format.Pod(pod), containerID.String()) return } // Make sure we're not updating the cached version. status.ContainerStatuses = make([]api.ContainerStatus, len(status.ContainerStatuses)) copy(status.ContainerStatuses, oldStatus.status.ContainerStatuses) status.ContainerStatuses[containerIndex].Ready = ready m.updateStatusInternal(pod, status) }
// isPodRunning returns true if all containers of a manifest are running. func (kl *Kubelet) isPodRunning(pod *api.Pod, runningPod container.Pod) (bool, error) { // TODO(random-liu): Change this to new pod status status, err := kl.containerRuntime.GetAPIPodStatus(pod) if err != nil { glog.Infof("Failed to get the status of pod %q: %v", format.Pod(pod), err) return false, err } for _, st := range status.ContainerStatuses { if st.State.Running == nil { glog.Infof("Container %q not running: %#v", st.Name, st.State) return false, nil } } return true, nil }
// generateAPIPodStatus creates the final API pod status for a pod, given the // internal pod status. func (kl *Kubelet) generateAPIPodStatus(pod *api.Pod, podStatus *kubecontainer.PodStatus) api.PodStatus { glog.V(3).Infof("Generating status for %q", format.Pod(pod)) // check if an internal module has requested the pod is evicted. for _, podSyncHandler := range kl.PodSyncHandlers { if result := podSyncHandler.ShouldEvict(pod); result.Evict { return api.PodStatus{ Phase: api.PodFailed, Reason: result.Reason, Message: result.Message, } } } s := kl.convertStatusToAPIStatus(pod, podStatus) // Assume info is ready to process spec := &pod.Spec allStatus := append(append([]api.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...) s.Phase = GetPhase(spec, allStatus) kl.probeManager.UpdatePodStatus(pod.UID, s) s.Conditions = append(s.Conditions, status.GeneratePodInitializedCondition(spec, s.InitContainerStatuses, s.Phase)) s.Conditions = append(s.Conditions, status.GeneratePodReadyCondition(spec, s.ContainerStatuses, s.Phase)) // s (the PodStatus we are creating) will not have a PodScheduled condition yet, because converStatusToAPIStatus() // does not create one. If the existing PodStatus has a PodScheduled condition, then copy it into s and make sure // it is set to true. If the existing PodStatus does not have a PodScheduled condition, then create one that is set to true. if _, oldPodScheduled := api.GetPodCondition(&pod.Status, api.PodScheduled); oldPodScheduled != nil { s.Conditions = append(s.Conditions, *oldPodScheduled) } api.UpdatePodCondition(&pod.Status, &api.PodCondition{ Type: api.PodScheduled, Status: api.ConditionTrue, }) if !kl.standaloneMode { hostIP, err := kl.getHostIPAnyWay() if err != nil { glog.V(4).Infof("Cannot get host IP: %v", err) } else { s.HostIP = hostIP.String() if podUsesHostNetwork(pod) && s.PodIP == "" { s.PodIP = hostIP.String() } } } return *s }