// syncBatch syncs pods statuses with the apiserver. func (s *statusManager) syncBatch() error { syncRequest := <-s.podStatusChannel pod := syncRequest.pod podFullName := kubecontainer.GetPodFullName(pod) status := syncRequest.status var err error statusPod := &api.Pod{ ObjectMeta: pod.ObjectMeta, } // TODO: make me easier to express from client code statusPod, err = s.kubeClient.Pods(statusPod.Namespace).Get(statusPod.Name) if err == nil { statusPod.Status = status _, err = s.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod) // TODO: handle conflict as a retry, make that easier too. if err == nil { glog.V(3).Infof("Status for pod %q updated successfully", kubeletUtil.FormatPodName(pod)) return nil } } // We failed to update status. In order to make sure we retry next time // we delete cached value. This may result in an additional update, but // this is ok. // Doing this synchronously will lead to a deadlock if the podStatusChannel // is full, and the pod worker holding the lock is waiting on this method // to clear the channel. Even if this delete never runs subsequent container // changes on the node should trigger updates. go s.DeletePodStatus(podFullName) return fmt.Errorf("error updating status for pod %q: %v", kubeletUtil.FormatPodName(pod), err) }
func (m *manager) AddPod(pod *api.Pod) { m.workerLock.Lock() defer m.workerLock.Unlock() key := probeKey{podUID: pod.UID} for _, c := range pod.Spec.Containers { key.containerName = c.Name if c.ReadinessProbe != nil { key.probeType = readiness if _, ok := m.workers[key]; ok { glog.Errorf("Readiness probe already exists! %v - %v", kubeutil.FormatPodName(pod), c.Name) return } w := newWorker(m, readiness, pod, c) m.workers[key] = w go w.run() } if c.LivenessProbe != nil { key.probeType = liveness if _, ok := m.workers[key]; ok { glog.Errorf("Liveness probe already exists! %v - %v", kubeutil.FormatPodName(pod), c.Name) return } w := newWorker(m, liveness, pod, c) m.workers[key] = w go w.run() } } }
// doProbe probes the container once and records the result. // Returns whether the worker should continue. func (w *worker) doProbe() (keepGoing bool) { defer util.HandleCrash(func(_ interface{}) { keepGoing = true }) status, ok := w.probeManager.statusManager.GetPodStatus(w.pod.UID) if !ok { // Either the pod has not been created yet, or it was already deleted. glog.V(3).Infof("No status for pod: %v", kubeletutil.FormatPodName(w.pod)) return true } // Worker should terminate if pod is terminated. if status.Phase == api.PodFailed || status.Phase == api.PodSucceeded { glog.V(3).Infof("Pod %v %v, exiting probe worker", kubeletutil.FormatPodName(w.pod), status.Phase) return false } c, ok := api.GetContainerStatus(status.ContainerStatuses, w.container.Name) if !ok { // Either the container has not been created yet, or it was deleted. glog.V(3).Infof("Non-existant container probed: %v - %v", kubeletutil.FormatPodName(w.pod), w.container.Name) return true // Wait for more information. } if w.containerID.String() != c.ContainerID { if !w.containerID.IsEmpty() { w.resultsManager.Remove(w.containerID) } w.containerID = kubecontainer.ParseContainerID(c.ContainerID) } if c.State.Running == nil { glog.V(3).Infof("Non-running container probed: %v - %v", kubeletutil.FormatPodName(w.pod), w.container.Name) if !w.containerID.IsEmpty() { w.resultsManager.Set(w.containerID, results.Failure, w.pod) } // Abort if the container will not be restarted. return c.State.Terminated == nil || w.pod.Spec.RestartPolicy != api.RestartPolicyNever } if int64(time.Since(c.State.Running.StartedAt.Time).Seconds()) < w.spec.InitialDelaySeconds { w.resultsManager.Set(w.containerID, w.initialValue, w.pod) return true } // TODO: Move error handling out of prober. result, _ := w.probeManager.prober.probe(w.probeType, w.pod, status, w.container, w.containerID) if result != probe.Unknown { w.resultsManager.Set(w.containerID, result != probe.Failure, w.pod) } return true }
// doProbe probes the container once and records the result. // Returns whether the worker should continue. func doProbe(m *manager, w *worker) (keepGoing bool) { defer util.HandleCrash(func(_ interface{}) { keepGoing = true }) status, ok := m.statusManager.GetPodStatus(w.pod.UID) if !ok { // Either the pod has not been created yet, or it was already deleted. glog.V(3).Infof("No status for pod: %v", kubeutil.FormatPodName(w.pod)) return true } // Worker should terminate if pod is terminated. if status.Phase == api.PodFailed || status.Phase == api.PodSucceeded { glog.V(3).Infof("Pod %v %v, exiting probe worker", kubeutil.FormatPodName(w.pod), status.Phase) return false } c, ok := api.GetContainerStatus(status.ContainerStatuses, w.container.Name) if !ok { // Either the container has not been created yet, or it was deleted. glog.V(3).Infof("Non-existant container probed: %v - %v", kubeutil.FormatPodName(w.pod), w.container.Name) return true // Wait for more information. } if w.containerID != types.UID(c.ContainerID) { if w.containerID != "" { m.readinessCache.removeReadiness(string(w.containerID)) } w.containerID = types.UID(kubecontainer.TrimRuntimePrefix(c.ContainerID)) } if c.State.Running == nil { glog.V(3).Infof("Non-running container probed: %v - %v", kubeutil.FormatPodName(w.pod), w.container.Name) m.readinessCache.setReadiness(string(w.containerID), false) // Abort if the container will not be restarted. return c.State.Terminated == nil || w.pod.Spec.RestartPolicy != api.RestartPolicyNever } if int64(time.Since(c.State.Running.StartedAt.Time).Seconds()) < w.spec.InitialDelaySeconds { // Readiness defaults to false during the initial delay. m.readinessCache.setReadiness(string(w.containerID), false) return true } // TODO: Move error handling out of prober. result, _ := m.prober.ProbeReadiness(w.pod, status, w.container, string(w.containerID)) if result != probe.Unknown { m.readinessCache.setReadiness(string(w.containerID), result != probe.Failure) } return true }
// syncBatch syncs pods statuses with the apiserver. func (m *manager) syncBatch() error { syncRequest := <-m.podStatusChannel pod := syncRequest.pod status := syncRequest.status var err error statusPod := &api.Pod{ ObjectMeta: pod.ObjectMeta, } // TODO: make me easier to express from client code statusPod, err = m.kubeClient.Pods(statusPod.Namespace).Get(statusPod.Name) if errors.IsNotFound(err) { glog.V(3).Infof("Pod %q was deleted on the server", pod.Name) return nil } if err == nil { if len(pod.UID) > 0 && statusPod.UID != pod.UID { glog.V(3).Infof("Pod %q was deleted and then recreated, skipping status update", kubeletUtil.FormatPodName(pod)) return nil } statusPod.Status = status // TODO: handle conflict as a retry, make that easier too. statusPod, err = m.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod) if err == nil { glog.V(3).Infof("Status for pod %q updated successfully", kubeletUtil.FormatPodName(pod)) if pod.DeletionTimestamp == nil { return nil } if !notRunning(pod.Status.ContainerStatuses) { glog.V(3).Infof("Pod %q is terminated, but some pods are still running", pod.Name) return nil } if err := m.kubeClient.Pods(statusPod.Namespace).Delete(statusPod.Name, api.NewDeleteOptions(0)); err == nil { glog.V(3).Infof("Pod %q fully terminated and removed from etcd", statusPod.Name) m.DeletePodStatus(pod.UID) return nil } } } // We failed to update status. In order to make sure we retry next time // we delete cached value. This may result in an additional update, but // this is ok. // Doing this synchronously will lead to a deadlock if the podStatusChannel // is full, and the pod worker holding the lock is waiting on this method // to clear the channel. Even if this delete never runs subsequent container // changes on the node should trigger updates. go m.DeletePodStatus(pod.UID) return fmt.Errorf("error updating status for pod %q: %v", kubeletUtil.FormatPodName(pod), err) }
// updateStatusInternal updates the internal status cache, and queues an update to the api server if // necessary. Returns whether an update was triggered. // This method IS NOT THREAD SAFE and must be called from a locked function. func (m *manager) updateStatusInternal(pod *api.Pod, status api.PodStatus) bool { // The intent here is to prevent concurrent updates to a pod's status from // clobbering each other so the phase of a pod progresses monotonically. oldStatus, found := m.podStatuses[pod.UID] if found && isStatusEqual(&oldStatus.status, &status) && pod.DeletionTimestamp == nil { glog.V(3).Infof("Ignoring same status for pod %q, status: %+v", kubeletutil.FormatPodName(pod), status) return false // No new status. } newStatus := versionedPodStatus{ status: status, version: oldStatus.version + 1, podName: pod.Name, podNamespace: pod.Namespace, } m.podStatuses[pod.UID] = newStatus select { case m.podStatusChannel <- podStatusSyncRequest{pod.UID, newStatus}: return true default: // Let the periodic syncBatch handle the update if the channel is full. // We can't block, since we hold the mutex lock. return false } }
// syncPod syncs the given status with the API server. The caller must not hold the lock. func (m *manager) syncPod(uid types.UID, status versionedPodStatus) { if !m.needsUpdate(uid, status) { glog.Warningf("Status is up-to-date; skipping: %q %+v", uid, status) return } // TODO: make me easier to express from client code pod, err := m.kubeClient.Pods(status.podNamespace).Get(status.podName) if errors.IsNotFound(err) { glog.V(3).Infof("Pod %q (%s) was deleted on the server", status.podName, uid) m.deletePodStatus(uid) return } if err == nil { translatedUID := m.podManager.TranslatePodUID(pod.UID) if len(translatedUID) > 0 && translatedUID != uid { glog.V(3).Infof("Pod %q was deleted and then recreated, skipping status update", kubeletutil.FormatPodName(pod)) m.deletePodStatus(uid) return } pod.Status = status.status // TODO: handle conflict as a retry, make that easier too. pod, err = m.kubeClient.Pods(pod.Namespace).UpdateStatus(pod) if err == nil { glog.V(3).Infof("Status for pod %q updated successfully", kubeletutil.FormatPodName(pod)) m.apiStatusVersions[uid] = status.version if pod.DeletionTimestamp == nil { return } if !notRunning(pod.Status.ContainerStatuses) { glog.V(3).Infof("Pod %q is terminated, but some containers are still running", kubeletutil.FormatPodName(pod)) return } if err := m.kubeClient.Pods(pod.Namespace).Delete(pod.Name, api.NewDeleteOptions(0)); err == nil { glog.V(3).Infof("Pod %q fully terminated and removed from etcd", kubeletutil.FormatPodName(pod)) m.deletePodStatus(uid) return } } } // We failed to update status, wait for periodic sync to retry. glog.Warningf("Failed to updated status for pod %q: %v", kubeletutil.FormatPodName(pod), err) }
func (m *manager) SetPodStatus(pod *api.Pod, status api.PodStatus) { m.podStatusesLock.Lock() defer m.podStatusesLock.Unlock() oldStatus, found := m.podStatuses[pod.UID] // ensure that the start time does not change across updates. if found && oldStatus.StartTime != nil { status.StartTime = oldStatus.StartTime } // Set ReadyCondition.LastTransitionTime. // Note we cannot do this while generating the status since we do not have oldStatus // at that time for mirror pods. if readyCondition := api.GetPodReadyCondition(status); readyCondition != nil { // Need to set LastTransitionTime. lastTransitionTime := unversioned.Now() if found { oldReadyCondition := api.GetPodReadyCondition(oldStatus) if oldReadyCondition != nil && readyCondition.Status == oldReadyCondition.Status { lastTransitionTime = oldReadyCondition.LastTransitionTime } } readyCondition.LastTransitionTime = lastTransitionTime } // if the status has no start time, we need to set an initial time // TODO(yujuhong): Consider setting StartTime when generating the pod // status instead, which would allow manager to become a simple cache // again. if status.StartTime.IsZero() { if pod.Status.StartTime.IsZero() { // the pod did not have a previously recorded value so set to now now := unversioned.Now() status.StartTime = &now } else { // the pod had a recorded value, but the kubelet restarted so we need to rebuild cache // based on last observed value status.StartTime = pod.Status.StartTime } } // TODO: Holding a lock during blocking operations is dangerous. Refactor so this isn't necessary. // The intent here is to prevent concurrent updates to a pod's status from // clobbering each other so the phase of a pod progresses monotonically. // Currently this routine is not called for the same pod from multiple // workers and/or the kubelet but dropping the lock before sending the // status down the channel feels like an easy way to get a bullet in foot. if !found || !isStatusEqual(&oldStatus, &status) || pod.DeletionTimestamp != nil { m.podStatuses[pod.UID] = status m.podStatusChannel <- podStatusSyncRequest{pod, status} } else { glog.V(3).Infof("Ignoring same status for pod %q, status: %+v", kubeletUtil.FormatPodName(pod), status) } }
// RunPod first creates the unit file for a pod, and then // starts the unit over d-bus. func (r *runtime) RunPod(pod *api.Pod, pullSecrets []api.Secret) error { glog.V(4).Infof("Rkt starts to run pod: name %q.", kubeletUtil.FormatPodName(pod)) name, runtimePod, prepareErr := r.preparePod(pod, pullSecrets) // Set container references and generate events. // If preparedPod fails, then send out 'failed' events for each container. // Otherwise, store the container references so we can use them later to send events. for i, c := range pod.Spec.Containers { ref, err := kubecontainer.GenerateContainerRef(pod, &c) if err != nil { glog.Errorf("Couldn't make a ref to pod %q, container %v: '%v'", kubeletUtil.FormatPodName(pod), c.Name, err) continue } if prepareErr != nil { r.recorder.Eventf(ref, "Failed", "Failed to create rkt container with error: %v", prepareErr) continue } containerID := string(runtimePod.Containers[i].ID) r.containerRefManager.SetRef(containerID, ref) } if prepareErr != nil { return prepareErr } r.generateEvents(runtimePod, "Created", nil) // TODO(yifan): This is the old version of go-systemd. Should update when libcontainer updates // its version of go-systemd. // RestartUnit has the same effect as StartUnit if the unit is not running, besides it can restart // a unit if the unit file is changed and reloaded. if _, err := r.systemd.RestartUnit(name, "replace"); err != nil { r.generateEvents(runtimePod, "Failed", err) return err } r.generateEvents(runtimePod, "Started", nil) return nil }
func (m *manager) SetContainerReadiness(pod *api.Pod, containerID kubecontainer.ContainerID, ready bool) { m.podStatusesLock.Lock() defer m.podStatusesLock.Unlock() oldStatus, found := m.podStatuses[pod.UID] if !found { glog.Warningf("Container readiness changed before pod has synced: %q - %q", kubeletutil.FormatPodName(pod), containerID.String()) return } status := oldStatus.status // Find the container to update. containerIndex := -1 for i, c := range status.ContainerStatuses { if c.ContainerID == containerID.String() { containerIndex = i break } } if containerIndex == -1 { glog.Warningf("Container readiness changed for unknown container: %q - %q", kubeletutil.FormatPodName(pod), containerID.String()) return } if status.ContainerStatuses[containerIndex].Ready == ready { glog.V(4).Infof("Container readiness unchanged (%v): %q - %q", ready, kubeletutil.FormatPodName(pod), containerID.String()) return } // Make sure we're not updating the cached version. status.ContainerStatuses = make([]api.ContainerStatus, len(status.ContainerStatuses)) copy(status.ContainerStatuses, oldStatus.status.ContainerStatuses) status.ContainerStatuses[containerIndex].Ready = ready m.updateStatusInternal(pod, status) }
// updateStatusInternal updates the internal status cache, and returns a versioned status if an // update is necessary. This method IS NOT THREAD SAFE and must be called from a locked function. func (m *manager) updateStatusInternal(pod *api.Pod, status api.PodStatus) *versionedPodStatus { // The intent here is to prevent concurrent updates to a pod's status from // clobbering each other so the phase of a pod progresses monotonically. oldStatus, found := m.podStatuses[pod.UID] if !found || !isStatusEqual(&oldStatus.status, &status) || pod.DeletionTimestamp != nil { newStatus := versionedPodStatus{ status: status, version: oldStatus.version + 1, podName: pod.Name, podNamespace: pod.Namespace, } m.podStatuses[pod.UID] = newStatus return &newStatus } else { glog.V(3).Infof("Ignoring same status for pod %q, status: %+v", kubeletutil.FormatPodName(pod), status) return nil // No new status. } }
func (m *manager) TerminatePods(pods []*api.Pod) bool { allSent := true m.podStatusesLock.Lock() defer m.podStatusesLock.Unlock() for _, pod := range pods { for i := range pod.Status.ContainerStatuses { pod.Status.ContainerStatuses[i].State = api.ContainerState{ Terminated: &api.ContainerStateTerminated{}, } } if sent := m.updateStatusInternal(pod, pod.Status); !sent { glog.V(4).Infof("Termination notice for %q was dropped because the status channel is full", kubeletutil.FormatPodName(pod)) allSent = false } } return allSent }
func (m *manager) TerminatePods(pods []*api.Pod) bool { sent := true m.podStatusesLock.Lock() defer m.podStatusesLock.Unlock() for _, pod := range pods { for i := range pod.Status.ContainerStatuses { pod.Status.ContainerStatuses[i].State = api.ContainerState{ Terminated: &api.ContainerStateTerminated{}, } } select { case m.podStatusChannel <- podStatusSyncRequest{pod, pod.Status}: default: sent = false glog.V(4).Infof("Termination notice for %q was dropped because the status channel is full", kubeletUtil.FormatPodName(pod)) } } return sent }
// SyncPod syncs the running pod to match the specified desired pod. func (r *runtime) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, podStatus api.PodStatus, pullSecrets []api.Secret, backOff *util.Backoff) error { podFullName := kubeletUtil.FormatPodName(pod) if len(runningPod.Containers) == 0 { glog.V(4).Infof("Pod %q is not running, will start it", podFullName) return r.RunPod(pod, pullSecrets) } // Add references to all containers. unidentifiedContainers := make(map[types.UID]*kubecontainer.Container) for _, c := range runningPod.Containers { unidentifiedContainers[c.ID] = c } restartPod := false for _, container := range pod.Spec.Containers { expectedHash := kubecontainer.HashContainer(&container) c := runningPod.FindContainerByName(container.Name) if c == nil { if kubecontainer.ShouldContainerBeRestarted(&container, pod, &podStatus, r.readinessManager) { glog.V(3).Infof("Container %+v is dead, but RestartPolicy says that we should restart it.", container) // TODO(yifan): Containers in one pod are fate-sharing at this moment, see: // https://github.com/appc/spec/issues/276. restartPod = true break } continue } // TODO: check for non-root image directives. See ../docker/manager.go#SyncPod // TODO(yifan): Take care of host network change. containerChanged := c.Hash != 0 && c.Hash != expectedHash if containerChanged { glog.Infof("Pod %q container %q hash changed (%d vs %d), it will be killed and re-created.", podFullName, container.Name, c.Hash, expectedHash) restartPod = true break } result, err := r.prober.Probe(pod, podStatus, container, string(c.ID), c.Created) // TODO(vmarmol): examine this logic. if err == nil && result != probe.Success { glog.Infof("Pod %q container %q is unhealthy (probe result: %v), it will be killed and re-created.", podFullName, container.Name, result) restartPod = true break } if err != nil { glog.V(2).Infof("Probe container %q failed: %v", container.Name, err) } delete(unidentifiedContainers, c.ID) } // If there is any unidentified containers, restart the pod. if len(unidentifiedContainers) > 0 { restartPod = true } if restartPod { if err := r.KillPod(pod, runningPod); err != nil { return err } if err := r.RunPod(pod, pullSecrets); err != nil { return err } } return nil }
// preparePod will: // // 1. Invoke 'rkt prepare' to prepare the pod, and get the rkt pod uuid. // 2. Create the unit file and save it under systemdUnitDir. // // On success, it will return a string that represents name of the unit file // and the runtime pod. func (r *runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret) (string, *kubecontainer.Pod, error) { // Generate the pod manifest from the pod spec. manifest, err := r.makePodManifest(pod, pullSecrets) if err != nil { return "", nil, err } manifestFile, err := ioutil.TempFile("", fmt.Sprintf("manifest-%s-", pod.Name)) if err != nil { return "", nil, err } defer func() { manifestFile.Close() if err := os.Remove(manifestFile.Name()); err != nil { glog.Warningf("rkt: Cannot remove temp manifest file %q: %v", manifestFile.Name(), err) } }() data, err := json.Marshal(manifest) if err != nil { return "", nil, err } // Since File.Write returns error if the written length is less than len(data), // so check error is enough for us. if _, err := manifestFile.Write(data); err != nil { return "", nil, err } // Run 'rkt prepare' to get the rkt UUID. cmds := []string{"prepare", "--quiet", "--pod-manifest", manifestFile.Name()} if r.config.Stage1Image != "" { cmds = append(cmds, "--stage1-image", r.config.Stage1Image) } output, err := r.runCommand(cmds...) if err != nil { return "", nil, err } if len(output) != 1 { return "", nil, fmt.Errorf("invalid output from 'rkt prepare': %v", output) } uuid := output[0] glog.V(4).Infof("'rkt prepare' returns %q", uuid) // Create systemd service file for the rkt pod. runtimePod := apiPodToRuntimePod(uuid, pod) b, err := json.Marshal(runtimePod) if err != nil { return "", nil, err } var runPrepared string if pod.Spec.HostNetwork { runPrepared = fmt.Sprintf("%s run-prepared --mds-register=false %s", r.rktBinAbsPath, uuid) } else { runPrepared = fmt.Sprintf("%s run-prepared --mds-register=false --private-net %s", r.rktBinAbsPath, uuid) } // TODO handle pod.Spec.HostPID // TODO handle pod.Spec.HostIPC units := []*unit.UnitOption{ newUnitOption(unitKubernetesSection, unitRktID, uuid), newUnitOption(unitKubernetesSection, unitPodName, string(b)), // This makes the service show up for 'systemctl list-units' even if it exits successfully. newUnitOption("Service", "RemainAfterExit", "true"), newUnitOption("Service", "ExecStart", runPrepared), // This enables graceful stop. newUnitOption("Service", "KillMode", "mixed"), } // Check if there's old rkt pod corresponding to the same pod, if so, update the restart count. var restartCount int var needReload bool serviceName := makePodServiceFileName(pod.UID) if _, err := os.Stat(serviceFilePath(serviceName)); err == nil { // Service file already exists, that means the pod is being restarted. needReload = true _, info, err := r.readServiceFile(serviceName) if err != nil { glog.Warningf("rkt: Cannot get old pod's info from service file %q: (%v), will ignore it", serviceName, err) restartCount = 0 } else { restartCount = info.restartCount + 1 } } units = append(units, newUnitOption(unitKubernetesSection, unitRestartCount, strconv.Itoa(restartCount))) glog.V(4).Infof("rkt: Creating service file %q for pod %q", serviceName, kubeletUtil.FormatPodName(pod)) serviceFile, err := os.Create(serviceFilePath(serviceName)) if err != nil { return "", nil, err } defer serviceFile.Close() _, err = io.Copy(serviceFile, unit.Serialize(units)) if err != nil { return "", nil, err } if needReload { if err := r.systemd.Reload(); err != nil { return "", nil, err } } return serviceName, runtimePod, nil }
// makePodManifest transforms a kubelet pod spec to the rkt pod manifest. func (r *runtime) makePodManifest(pod *api.Pod, pullSecrets []api.Secret) (*appcschema.PodManifest, error) { var globalPortMappings []kubecontainer.PortMapping manifest := appcschema.BlankPodManifest() for _, c := range pod.Spec.Containers { if err := r.imagePuller.PullImage(pod, &c, pullSecrets); err != nil { return nil, err } imgManifest, err := r.getImageManifest(c.Image) if err != nil { return nil, err } if imgManifest.App == nil { imgManifest.App = new(appctypes.App) } img, err := r.getImageByName(c.Image) if err != nil { return nil, err } hash, err := appctypes.NewHash(img.ID) if err != nil { return nil, err } opts, err := r.generator.GenerateRunContainerOptions(pod, &c) if err != nil { return nil, err } globalPortMappings = append(globalPortMappings, opts.PortMappings...) if err := setApp(imgManifest.App, &c, opts); err != nil { return nil, err } name, err := appctypes.SanitizeACName(c.Name) if err != nil { return nil, err } appName := appctypes.MustACName(name) manifest.Apps = append(manifest.Apps, appcschema.RuntimeApp{ Name: *appName, Image: appcschema.RuntimeImage{ID: *hash}, App: imgManifest.App, }) } volumeMap, ok := r.volumeGetter.GetVolumes(pod.UID) if !ok { return nil, fmt.Errorf("cannot get the volumes for pod %q", kubeletUtil.FormatPodName(pod)) } // Set global volumes. for name, volume := range volumeMap { volName, err := appctypes.NewACName(name) if err != nil { return nil, fmt.Errorf("cannot use the volume's name %q as ACName: %v", name, err) } manifest.Volumes = append(manifest.Volumes, appctypes.Volume{ Name: *volName, Kind: "host", Source: volume.GetPath(), }) } // Set global ports. for _, port := range globalPortMappings { name, err := appctypes.SanitizeACName(port.Name) if err != nil { return nil, fmt.Errorf("cannot use the port's name %q as ACName: %v", port.Name, err) } portName := appctypes.MustACName(name) manifest.Ports = append(manifest.Ports, appctypes.ExposedPort{ Name: *portName, HostPort: uint(port.HostPort), }) } // TODO(yifan): Set pod-level isolators once it's supported in kubernetes. return manifest, nil }
// SyncPod syncs the running pod to match the specified desired pod. func (r *Runtime) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, podStatus api.PodStatus, pullSecrets []api.Secret, backOff *util.Backoff) error { podFullName := kubeletutil.FormatPodName(pod) // Add references to all containers. unidentifiedContainers := make(map[kubecontainer.ContainerID]*kubecontainer.Container) for _, c := range runningPod.Containers { unidentifiedContainers[c.ID] = c } restartPod := false for _, container := range pod.Spec.Containers { expectedHash := kubecontainer.HashContainer(&container) c := runningPod.FindContainerByName(container.Name) if c == nil { if kubecontainer.ShouldContainerBeRestarted(&container, pod, &podStatus) { glog.V(3).Infof("Container %+v is dead, but RestartPolicy says that we should restart it.", container) // TODO(yifan): Containers in one pod are fate-sharing at this moment, see: // https://github.com/appc/spec/issues/276. restartPod = true break } continue } // TODO: check for non-root image directives. See ../docker/manager.go#SyncPod // TODO(yifan): Take care of host network change. containerChanged := c.Hash != 0 && c.Hash != expectedHash if containerChanged { glog.Infof("Pod %q container %q hash changed (%d vs %d), it will be killed and re-created.", podFullName, container.Name, c.Hash, expectedHash) restartPod = true break } liveness, found := r.livenessManager.Get(c.ID) if found && liveness != proberesults.Success && pod.Spec.RestartPolicy != api.RestartPolicyNever { glog.Infof("Pod %q container %q is unhealthy, it will be killed and re-created.", podFullName, container.Name) restartPod = true break } delete(unidentifiedContainers, c.ID) } // If there is any unidentified containers, restart the pod. if len(unidentifiedContainers) > 0 { restartPod = true } if restartPod { // Kill the pod only if the pod is actually running. if len(runningPod.Containers) > 0 { if err := r.KillPod(pod, runningPod); err != nil { return err } } if err := r.RunPod(pod, pullSecrets); err != nil { return err } } return nil }
// doProbe probes the container once and records the result. // Returns whether the worker should continue. func (w *worker) doProbe() (keepGoing bool) { defer util.HandleCrash(func(_ interface{}) { keepGoing = true }) status, ok := w.probeManager.statusManager.GetPodStatus(w.pod.UID) if !ok { // Either the pod has not been created yet, or it was already deleted. glog.V(3).Infof("No status for pod: %v", kubeletutil.FormatPodName(w.pod)) return true } // Worker should terminate if pod is terminated. if status.Phase == api.PodFailed || status.Phase == api.PodSucceeded { glog.V(3).Infof("Pod %v %v, exiting probe worker", kubeletutil.FormatPodName(w.pod), status.Phase) return false } c, ok := api.GetContainerStatus(status.ContainerStatuses, w.container.Name) if !ok { // Either the container has not been created yet, or it was deleted. glog.V(3).Infof("Non-existant container probed: %v - %v", kubeletutil.FormatPodName(w.pod), w.container.Name) return true // Wait for more information. } if w.containerID.String() != c.ContainerID { if !w.containerID.IsEmpty() { w.resultsManager.Remove(w.containerID) } w.containerID = kubecontainer.ParseContainerID(c.ContainerID) w.resultsManager.Set(w.containerID, w.initialValue, w.pod) } if c.State.Running == nil { glog.V(3).Infof("Non-running container probed: %v - %v", kubeletutil.FormatPodName(w.pod), w.container.Name) if !w.containerID.IsEmpty() { w.resultsManager.Set(w.containerID, results.Failure, w.pod) } // Abort if the container will not be restarted. return c.State.Terminated == nil || w.pod.Spec.RestartPolicy != api.RestartPolicyNever } if int64(time.Since(c.State.Running.StartedAt.Time).Seconds()) < w.spec.InitialDelaySeconds { return true } result, err := w.probeManager.prober.probe(w.probeType, w.pod, status, w.container, w.containerID) if err != nil { // Prober error, throw away the result. return true } if w.lastResult == result { w.resultRun++ } else { w.lastResult = result w.resultRun = 1 } if (result == results.Failure && w.resultRun < w.spec.FailureThreshold) || (result == results.Success && w.resultRun < w.spec.SuccessThreshold) { // Success or failure is below threshold - leave the probe state unchanged. return true } w.resultsManager.Set(w.containerID, result, w.pod) return true }
// recordFirstSeenTime records the first seen time of this pod. func recordFirstSeenTime(pod *api.Pod) { glog.V(4).Infof("Receiving a new pod %q", kubeletUtil.FormatPodName(pod)) pod.Annotations[kubeletTypes.ConfigFirstSeenAnnotationKey] = kubeletTypes.NewTimestamp().GetString() }