// syncBatch syncs pods statuses with the apiserver. func (m *manager) syncBatch() error { syncRequest := <-m.podStatusChannel pod := syncRequest.pod status := syncRequest.status var err error statusPod := &api.Pod{ ObjectMeta: pod.ObjectMeta, } // TODO: make me easier to express from client code statusPod, err = m.kubeClient.Pods(statusPod.Namespace).Get(statusPod.Name) if errors.IsNotFound(err) { glog.V(3).Infof("Pod %q was deleted on the server", pod.Name) return nil } if err == nil { if len(pod.UID) > 0 && statusPod.UID != pod.UID { glog.V(3).Infof("Pod %q was deleted and then recreated, skipping status update", kubeletUtil.FormatPodName(pod)) return nil } statusPod.Status = status // TODO: handle conflict as a retry, make that easier too. statusPod, err = m.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod) if err == nil { glog.V(3).Infof("Status for pod %q updated successfully", kubeletUtil.FormatPodName(pod)) if pod.DeletionTimestamp == nil { return nil } if !notRunning(pod.Status.ContainerStatuses) { glog.V(3).Infof("Pod %q is terminated, but some pods are still running", pod.Name) return nil } if err := m.kubeClient.Pods(statusPod.Namespace).Delete(statusPod.Name, api.NewDeleteOptions(0)); err == nil { glog.V(3).Infof("Pod %q fully terminated and removed from etcd", statusPod.Name) m.DeletePodStatus(pod.UID) return nil } } } // We failed to update status. In order to make sure we retry next time // we delete cached value. This may result in an additional update, but // this is ok. // Doing this synchronously will lead to a deadlock if the podStatusChannel // is full, and the pod worker holding the lock is waiting on this method // to clear the channel. Even if this delete never runs subsequent container // changes on the node should trigger updates. go m.DeletePodStatus(pod.UID) return fmt.Errorf("error updating status for pod %q: %v", kubeletUtil.FormatPodName(pod), err) }
// RunPod first creates the unit file for a pod, and then // starts the unit over d-bus. func (r *runtime) RunPod(pod *api.Pod, pullSecrets []api.Secret) error { glog.V(4).Infof("Rkt starts to run pod: name %q.", kubeletUtil.FormatPodName(pod)) name, runtimePod, prepareErr := r.preparePod(pod, pullSecrets) // Set container references and generate events. // If preparedPod fails, then send out 'failed' events for each container. // Otherwise, store the container references so we can use them later to send events. for i, c := range pod.Spec.Containers { ref, err := kubecontainer.GenerateContainerRef(pod, &c) if err != nil { glog.Errorf("Couldn't make a ref to pod %q, container %v: '%v'", kubeletUtil.FormatPodName(pod), c.Name, err) continue } if prepareErr != nil { r.recorder.Eventf(ref, "Failed", "Failed to create rkt container with error: %v", prepareErr) continue } containerID := string(runtimePod.Containers[i].ID) r.containerRefManager.SetRef(containerID, ref) } if prepareErr != nil { return prepareErr } r.generateEvents(runtimePod, "Created", nil) // TODO(yifan): This is the old version of go-systemd. Should update when libcontainer updates // its version of go-systemd. // RestartUnit has the same effect as StartUnit if the unit is not running, besides it can restart // a unit if the unit file is changed and reloaded. if _, err := r.systemd.RestartUnit(name, "replace"); err != nil { r.generateEvents(runtimePod, "Failed", err) return err } r.generateEvents(runtimePod, "Started", nil) return nil }
func (m *manager) SetPodStatus(pod *api.Pod, status api.PodStatus) { m.podStatusesLock.Lock() defer m.podStatusesLock.Unlock() oldStatus, found := m.podStatuses[pod.UID] // ensure that the start time does not change across updates. if found && oldStatus.StartTime != nil { status.StartTime = oldStatus.StartTime } // if the status has no start time, we need to set an initial time // TODO(yujuhong): Consider setting StartTime when generating the pod // status instead, which would allow manager to become a simple cache // again. if status.StartTime.IsZero() { if pod.Status.StartTime.IsZero() { // the pod did not have a previously recorded value so set to now now := unversioned.Now() status.StartTime = &now } else { // the pod had a recorded value, but the kubelet restarted so we need to rebuild cache // based on last observed value status.StartTime = pod.Status.StartTime } } // TODO: Holding a lock during blocking operations is dangerous. Refactor so this isn't necessary. // The intent here is to prevent concurrent updates to a pod's status from // clobbering each other so the phase of a pod progresses monotonically. // Currently this routine is not called for the same pod from multiple // workers and/or the kubelet but dropping the lock before sending the // status down the channel feels like an easy way to get a bullet in foot. if !found || !isStatusEqual(&oldStatus, &status) || pod.DeletionTimestamp != nil { m.podStatuses[pod.UID] = status m.podStatusChannel <- podStatusSyncRequest{pod, status} } else { glog.V(3).Infof("Ignoring same status for pod %q, status: %+v", kubeletUtil.FormatPodName(pod), status) } }
func (m *manager) TerminatePods(pods []*api.Pod) bool { sent := true m.podStatusesLock.Lock() defer m.podStatusesLock.Unlock() for _, pod := range pods { for i := range pod.Status.ContainerStatuses { pod.Status.ContainerStatuses[i].State = api.ContainerState{ Terminated: &api.ContainerStateTerminated{}, } } select { case m.podStatusChannel <- podStatusSyncRequest{pod, pod.Status}: default: sent = false glog.V(4).Infof("Termination notice for %q was dropped because the status channel is full", kubeletUtil.FormatPodName(pod)) } } return sent }
// recordFirstSeenTime records the first seen time of this pod. func recordFirstSeenTime(pod *api.Pod) { glog.V(4).Infof("Receiving a new pod %q", kubeletUtil.FormatPodName(pod)) pod.Annotations[kubelet.ConfigFirstSeenAnnotationKey] = kubeletTypes.NewTimestamp().GetString() }
// SyncPod syncs the running pod to match the specified desired pod. func (r *runtime) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, podStatus api.PodStatus, pullSecrets []api.Secret, backOff *util.Backoff) error { podFullName := kubeletUtil.FormatPodName(pod) if len(runningPod.Containers) == 0 { glog.V(4).Infof("Pod %q is not running, will start it", podFullName) return r.RunPod(pod, pullSecrets) } // Add references to all containers. unidentifiedContainers := make(map[types.UID]*kubecontainer.Container) for _, c := range runningPod.Containers { unidentifiedContainers[c.ID] = c } restartPod := false for _, container := range pod.Spec.Containers { expectedHash := kubecontainer.HashContainer(&container) c := runningPod.FindContainerByName(container.Name) if c == nil { if kubecontainer.ShouldContainerBeRestarted(&container, pod, &podStatus, r.readinessManager) { glog.V(3).Infof("Container %+v is dead, but RestartPolicy says that we should restart it.", container) // TODO(yifan): Containers in one pod are fate-sharing at this moment, see: // https://github.com/appc/spec/issues/276. restartPod = true break } continue } // TODO: check for non-root image directives. See ../docker/manager.go#SyncPod // TODO(yifan): Take care of host network change. containerChanged := c.Hash != 0 && c.Hash != expectedHash if containerChanged { glog.Infof("Pod %q container %q hash changed (%d vs %d), it will be killed and re-created.", podFullName, container.Name, c.Hash, expectedHash) restartPod = true break } result, err := r.prober.Probe(pod, podStatus, container, string(c.ID), c.Created) // TODO(vmarmol): examine this logic. if err == nil && result != probe.Success { glog.Infof("Pod %q container %q is unhealthy (probe result: %v), it will be killed and re-created.", podFullName, container.Name, result) restartPod = true break } if err != nil { glog.V(2).Infof("Probe container %q failed: %v", container.Name, err) } delete(unidentifiedContainers, c.ID) } // If there is any unidentified containers, restart the pod. if len(unidentifiedContainers) > 0 { restartPod = true } if restartPod { if err := r.KillPod(pod, runningPod); err != nil { return err } if err := r.RunPod(pod, pullSecrets); err != nil { return err } } return nil }
// preparePod will: // // 1. Invoke 'rkt prepare' to prepare the pod, and get the rkt pod uuid. // 2. Create the unit file and save it under systemdUnitDir. // // On success, it will return a string that represents name of the unit file // and the runtime pod. func (r *runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret) (string, *kubecontainer.Pod, error) { // Generate the pod manifest from the pod spec. manifest, err := r.makePodManifest(pod, pullSecrets) if err != nil { return "", nil, err } manifestFile, err := ioutil.TempFile("", fmt.Sprintf("manifest-%s-", pod.Name)) if err != nil { return "", nil, err } defer func() { manifestFile.Close() if err := os.Remove(manifestFile.Name()); err != nil { glog.Warningf("rkt: Cannot remove temp manifest file %q: %v", manifestFile.Name(), err) } }() data, err := json.Marshal(manifest) if err != nil { return "", nil, err } // Since File.Write returns error if the written length is less than len(data), // so check error is enough for us. if _, err := manifestFile.Write(data); err != nil { return "", nil, err } // Run 'rkt prepare' to get the rkt UUID. cmds := []string{"prepare", "--quiet", "--pod-manifest", manifestFile.Name()} if r.config.Stage1Image != "" { cmds = append(cmds, "--stage1-image", r.config.Stage1Image) } output, err := r.runCommand(cmds...) if err != nil { return "", nil, err } if len(output) != 1 { return "", nil, fmt.Errorf("invalid output from 'rkt prepare': %v", output) } uuid := output[0] glog.V(4).Infof("'rkt prepare' returns %q", uuid) // Create systemd service file for the rkt pod. runtimePod := apiPodToRuntimePod(uuid, pod) b, err := json.Marshal(runtimePod) if err != nil { return "", nil, err } var runPrepared string if pod.Spec.HostNetwork { runPrepared = fmt.Sprintf("%s run-prepared --mds-register=false %s", r.rktBinAbsPath, uuid) } else { runPrepared = fmt.Sprintf("%s run-prepared --mds-register=false --private-net %s", r.rktBinAbsPath, uuid) } // TODO handle pod.Spec.HostPID // TODO handle pod.Spec.HostIPC units := []*unit.UnitOption{ newUnitOption(unitKubernetesSection, unitRktID, uuid), newUnitOption(unitKubernetesSection, unitPodName, string(b)), // This makes the service show up for 'systemctl list-units' even if it exits successfully. newUnitOption("Service", "RemainAfterExit", "true"), newUnitOption("Service", "ExecStart", runPrepared), // This enables graceful stop. newUnitOption("Service", "KillMode", "mixed"), } // Check if there's old rkt pod corresponding to the same pod, if so, update the restart count. var restartCount int var needReload bool serviceName := makePodServiceFileName(pod.UID) if _, err := os.Stat(serviceFilePath(serviceName)); err == nil { // Service file already exists, that means the pod is being restarted. needReload = true _, info, err := r.readServiceFile(serviceName) if err != nil { glog.Warningf("rkt: Cannot get old pod's info from service file %q: (%v), will ignore it", serviceName, err) restartCount = 0 } else { restartCount = info.restartCount + 1 } } units = append(units, newUnitOption(unitKubernetesSection, unitRestartCount, strconv.Itoa(restartCount))) glog.V(4).Infof("rkt: Creating service file %q for pod %q", serviceName, kubeletUtil.FormatPodName(pod)) serviceFile, err := os.Create(serviceFilePath(serviceName)) if err != nil { return "", nil, err } defer serviceFile.Close() _, err = io.Copy(serviceFile, unit.Serialize(units)) if err != nil { return "", nil, err } if needReload { if err := r.systemd.Reload(); err != nil { return "", nil, err } } return serviceName, runtimePod, nil }
// makePodManifest transforms a kubelet pod spec to the rkt pod manifest. func (r *runtime) makePodManifest(pod *api.Pod, pullSecrets []api.Secret) (*appcschema.PodManifest, error) { var globalPortMappings []kubecontainer.PortMapping manifest := appcschema.BlankPodManifest() for _, c := range pod.Spec.Containers { if err := r.imagePuller.PullImage(pod, &c, pullSecrets); err != nil { return nil, err } imgManifest, err := r.getImageManifest(c.Image) if err != nil { return nil, err } if imgManifest.App == nil { imgManifest.App = new(appctypes.App) } img, err := r.getImageByName(c.Image) if err != nil { return nil, err } hash, err := appctypes.NewHash(img.ID) if err != nil { return nil, err } opts, err := r.generator.GenerateRunContainerOptions(pod, &c) if err != nil { return nil, err } globalPortMappings = append(globalPortMappings, opts.PortMappings...) if err := setApp(imgManifest.App, &c, opts); err != nil { return nil, err } name, err := appctypes.SanitizeACName(c.Name) if err != nil { return nil, err } appName := appctypes.MustACName(name) manifest.Apps = append(manifest.Apps, appcschema.RuntimeApp{ Name: *appName, Image: appcschema.RuntimeImage{ID: *hash}, App: imgManifest.App, }) } volumeMap, ok := r.volumeGetter.GetVolumes(pod.UID) if !ok { return nil, fmt.Errorf("cannot get the volumes for pod %q", kubeletUtil.FormatPodName(pod)) } // Set global volumes. for name, volume := range volumeMap { volName, err := appctypes.NewACName(name) if err != nil { return nil, fmt.Errorf("cannot use the volume's name %q as ACName: %v", name, err) } manifest.Volumes = append(manifest.Volumes, appctypes.Volume{ Name: *volName, Kind: "host", Source: volume.GetPath(), }) } // Set global ports. for _, port := range globalPortMappings { name, err := appctypes.SanitizeACName(port.Name) if err != nil { return nil, fmt.Errorf("cannot use the port's name %q as ACName: %v", port.Name, err) } portName := appctypes.MustACName(name) manifest.Ports = append(manifest.Ports, appctypes.ExposedPort{ Name: *portName, HostPort: uint(port.HostPort), }) } // TODO(yifan): Set pod-level isolators once it's supported in kubernetes. return manifest, nil }