Beispiel #1
0
// syncBatch syncs pods statuses with the apiserver.
func (m *manager) syncBatch() error {
	syncRequest := <-m.podStatusChannel
	pod := syncRequest.pod
	status := syncRequest.status

	var err error
	statusPod := &api.Pod{
		ObjectMeta: pod.ObjectMeta,
	}
	// TODO: make me easier to express from client code
	statusPod, err = m.kubeClient.Pods(statusPod.Namespace).Get(statusPod.Name)
	if errors.IsNotFound(err) {
		glog.V(3).Infof("Pod %q was deleted on the server", pod.Name)
		return nil
	}
	if err == nil {
		if len(pod.UID) > 0 && statusPod.UID != pod.UID {
			glog.V(3).Infof("Pod %q was deleted and then recreated, skipping status update", kubeletUtil.FormatPodName(pod))
			return nil
		}
		statusPod.Status = status
		// TODO: handle conflict as a retry, make that easier too.
		statusPod, err = m.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod)
		if err == nil {
			glog.V(3).Infof("Status for pod %q updated successfully", kubeletUtil.FormatPodName(pod))

			if pod.DeletionTimestamp == nil {
				return nil
			}
			if !notRunning(pod.Status.ContainerStatuses) {
				glog.V(3).Infof("Pod %q is terminated, but some pods are still running", pod.Name)
				return nil
			}
			if err := m.kubeClient.Pods(statusPod.Namespace).Delete(statusPod.Name, api.NewDeleteOptions(0)); err == nil {
				glog.V(3).Infof("Pod %q fully terminated and removed from etcd", statusPod.Name)
				m.DeletePodStatus(pod.UID)
				return nil
			}
		}
	}

	// We failed to update status. In order to make sure we retry next time
	// we delete cached value. This may result in an additional update, but
	// this is ok.
	// Doing this synchronously will lead to a deadlock if the podStatusChannel
	// is full, and the pod worker holding the lock is waiting on this method
	// to clear the channel. Even if this delete never runs subsequent container
	// changes on the node should trigger updates.
	go m.DeletePodStatus(pod.UID)
	return fmt.Errorf("error updating status for pod %q: %v", kubeletUtil.FormatPodName(pod), err)
}
Beispiel #2
0
// RunPod first creates the unit file for a pod, and then
// starts the unit over d-bus.
func (r *runtime) RunPod(pod *api.Pod, pullSecrets []api.Secret) error {
	glog.V(4).Infof("Rkt starts to run pod: name %q.", kubeletUtil.FormatPodName(pod))

	name, runtimePod, prepareErr := r.preparePod(pod, pullSecrets)

	// Set container references and generate events.
	// If preparedPod fails, then send out 'failed' events for each container.
	// Otherwise, store the container references so we can use them later to send events.
	for i, c := range pod.Spec.Containers {
		ref, err := kubecontainer.GenerateContainerRef(pod, &c)
		if err != nil {
			glog.Errorf("Couldn't make a ref to pod %q, container %v: '%v'", kubeletUtil.FormatPodName(pod), c.Name, err)
			continue
		}
		if prepareErr != nil {
			r.recorder.Eventf(ref, "Failed", "Failed to create rkt container with error: %v", prepareErr)
			continue
		}
		containerID := string(runtimePod.Containers[i].ID)
		r.containerRefManager.SetRef(containerID, ref)
	}

	if prepareErr != nil {
		return prepareErr
	}

	r.generateEvents(runtimePod, "Created", nil)

	// TODO(yifan): This is the old version of go-systemd. Should update when libcontainer updates
	// its version of go-systemd.
	// RestartUnit has the same effect as StartUnit if the unit is not running, besides it can restart
	// a unit if the unit file is changed and reloaded.
	if _, err := r.systemd.RestartUnit(name, "replace"); err != nil {
		r.generateEvents(runtimePod, "Failed", err)
		return err
	}

	r.generateEvents(runtimePod, "Started", nil)

	return nil
}
Beispiel #3
0
func (m *manager) SetPodStatus(pod *api.Pod, status api.PodStatus) {
	m.podStatusesLock.Lock()
	defer m.podStatusesLock.Unlock()
	oldStatus, found := m.podStatuses[pod.UID]

	// ensure that the start time does not change across updates.
	if found && oldStatus.StartTime != nil {
		status.StartTime = oldStatus.StartTime
	}

	// if the status has no start time, we need to set an initial time
	// TODO(yujuhong): Consider setting StartTime when generating the pod
	// status instead, which would allow manager to become a simple cache
	// again.
	if status.StartTime.IsZero() {
		if pod.Status.StartTime.IsZero() {
			// the pod did not have a previously recorded value so set to now
			now := unversioned.Now()
			status.StartTime = &now
		} else {
			// the pod had a recorded value, but the kubelet restarted so we need to rebuild cache
			// based on last observed value
			status.StartTime = pod.Status.StartTime
		}
	}

	// TODO: Holding a lock during blocking operations is dangerous. Refactor so this isn't necessary.
	// The intent here is to prevent concurrent updates to a pod's status from
	// clobbering each other so the phase of a pod progresses monotonically.
	// Currently this routine is not called for the same pod from multiple
	// workers and/or the kubelet but dropping the lock before sending the
	// status down the channel feels like an easy way to get a bullet in foot.
	if !found || !isStatusEqual(&oldStatus, &status) || pod.DeletionTimestamp != nil {
		m.podStatuses[pod.UID] = status
		m.podStatusChannel <- podStatusSyncRequest{pod, status}
	} else {
		glog.V(3).Infof("Ignoring same status for pod %q, status: %+v", kubeletUtil.FormatPodName(pod), status)
	}
}
Beispiel #4
0
func (m *manager) TerminatePods(pods []*api.Pod) bool {
	sent := true
	m.podStatusesLock.Lock()
	defer m.podStatusesLock.Unlock()
	for _, pod := range pods {
		for i := range pod.Status.ContainerStatuses {
			pod.Status.ContainerStatuses[i].State = api.ContainerState{
				Terminated: &api.ContainerStateTerminated{},
			}
		}
		select {
		case m.podStatusChannel <- podStatusSyncRequest{pod, pod.Status}:
		default:
			sent = false
			glog.V(4).Infof("Termination notice for %q was dropped because the status channel is full", kubeletUtil.FormatPodName(pod))
		}
	}
	return sent
}
Beispiel #5
0
// recordFirstSeenTime records the first seen time of this pod.
func recordFirstSeenTime(pod *api.Pod) {
	glog.V(4).Infof("Receiving a new pod %q", kubeletUtil.FormatPodName(pod))
	pod.Annotations[kubelet.ConfigFirstSeenAnnotationKey] = kubeletTypes.NewTimestamp().GetString()
}
Beispiel #6
0
// SyncPod syncs the running pod to match the specified desired pod.
func (r *runtime) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, podStatus api.PodStatus, pullSecrets []api.Secret, backOff *util.Backoff) error {
	podFullName := kubeletUtil.FormatPodName(pod)
	if len(runningPod.Containers) == 0 {
		glog.V(4).Infof("Pod %q is not running, will start it", podFullName)
		return r.RunPod(pod, pullSecrets)
	}

	// Add references to all containers.
	unidentifiedContainers := make(map[types.UID]*kubecontainer.Container)
	for _, c := range runningPod.Containers {
		unidentifiedContainers[c.ID] = c
	}

	restartPod := false
	for _, container := range pod.Spec.Containers {
		expectedHash := kubecontainer.HashContainer(&container)

		c := runningPod.FindContainerByName(container.Name)
		if c == nil {
			if kubecontainer.ShouldContainerBeRestarted(&container, pod, &podStatus, r.readinessManager) {
				glog.V(3).Infof("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
				// TODO(yifan): Containers in one pod are fate-sharing at this moment, see:
				// https://github.com/appc/spec/issues/276.
				restartPod = true
				break
			}
			continue
		}

		// TODO: check for non-root image directives.  See ../docker/manager.go#SyncPod

		// TODO(yifan): Take care of host network change.
		containerChanged := c.Hash != 0 && c.Hash != expectedHash
		if containerChanged {
			glog.Infof("Pod %q container %q hash changed (%d vs %d), it will be killed and re-created.", podFullName, container.Name, c.Hash, expectedHash)
			restartPod = true
			break
		}

		result, err := r.prober.Probe(pod, podStatus, container, string(c.ID), c.Created)
		// TODO(vmarmol): examine this logic.
		if err == nil && result != probe.Success {
			glog.Infof("Pod %q container %q is unhealthy (probe result: %v), it will be killed and re-created.", podFullName, container.Name, result)
			restartPod = true
			break
		}

		if err != nil {
			glog.V(2).Infof("Probe container %q failed: %v", container.Name, err)
		}
		delete(unidentifiedContainers, c.ID)
	}

	// If there is any unidentified containers, restart the pod.
	if len(unidentifiedContainers) > 0 {
		restartPod = true
	}

	if restartPod {
		if err := r.KillPod(pod, runningPod); err != nil {
			return err
		}
		if err := r.RunPod(pod, pullSecrets); err != nil {
			return err
		}
	}
	return nil
}
Beispiel #7
0
// preparePod will:
//
// 1. Invoke 'rkt prepare' to prepare the pod, and get the rkt pod uuid.
// 2. Create the unit file and save it under systemdUnitDir.
//
// On success, it will return a string that represents name of the unit file
// and the runtime pod.
func (r *runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret) (string, *kubecontainer.Pod, error) {
	// Generate the pod manifest from the pod spec.
	manifest, err := r.makePodManifest(pod, pullSecrets)
	if err != nil {
		return "", nil, err
	}
	manifestFile, err := ioutil.TempFile("", fmt.Sprintf("manifest-%s-", pod.Name))
	if err != nil {
		return "", nil, err
	}
	defer func() {
		manifestFile.Close()
		if err := os.Remove(manifestFile.Name()); err != nil {
			glog.Warningf("rkt: Cannot remove temp manifest file %q: %v", manifestFile.Name(), err)
		}
	}()

	data, err := json.Marshal(manifest)
	if err != nil {
		return "", nil, err
	}
	// Since File.Write returns error if the written length is less than len(data),
	// so check error is enough for us.
	if _, err := manifestFile.Write(data); err != nil {
		return "", nil, err
	}

	// Run 'rkt prepare' to get the rkt UUID.
	cmds := []string{"prepare", "--quiet", "--pod-manifest", manifestFile.Name()}
	if r.config.Stage1Image != "" {
		cmds = append(cmds, "--stage1-image", r.config.Stage1Image)
	}
	output, err := r.runCommand(cmds...)
	if err != nil {
		return "", nil, err
	}
	if len(output) != 1 {
		return "", nil, fmt.Errorf("invalid output from 'rkt prepare': %v", output)
	}
	uuid := output[0]
	glog.V(4).Infof("'rkt prepare' returns %q", uuid)

	// Create systemd service file for the rkt pod.
	runtimePod := apiPodToRuntimePod(uuid, pod)
	b, err := json.Marshal(runtimePod)
	if err != nil {
		return "", nil, err
	}

	var runPrepared string
	if pod.Spec.HostNetwork {
		runPrepared = fmt.Sprintf("%s run-prepared --mds-register=false %s", r.rktBinAbsPath, uuid)
	} else {
		runPrepared = fmt.Sprintf("%s run-prepared --mds-register=false --private-net %s", r.rktBinAbsPath, uuid)
	}

	// TODO handle pod.Spec.HostPID
	// TODO handle pod.Spec.HostIPC

	units := []*unit.UnitOption{
		newUnitOption(unitKubernetesSection, unitRktID, uuid),
		newUnitOption(unitKubernetesSection, unitPodName, string(b)),
		// This makes the service show up for 'systemctl list-units' even if it exits successfully.
		newUnitOption("Service", "RemainAfterExit", "true"),
		newUnitOption("Service", "ExecStart", runPrepared),
		// This enables graceful stop.
		newUnitOption("Service", "KillMode", "mixed"),
	}

	// Check if there's old rkt pod corresponding to the same pod, if so, update the restart count.
	var restartCount int
	var needReload bool
	serviceName := makePodServiceFileName(pod.UID)
	if _, err := os.Stat(serviceFilePath(serviceName)); err == nil {
		// Service file already exists, that means the pod is being restarted.
		needReload = true
		_, info, err := r.readServiceFile(serviceName)
		if err != nil {
			glog.Warningf("rkt: Cannot get old pod's info from service file %q: (%v), will ignore it", serviceName, err)
			restartCount = 0
		} else {
			restartCount = info.restartCount + 1
		}
	}
	units = append(units, newUnitOption(unitKubernetesSection, unitRestartCount, strconv.Itoa(restartCount)))

	glog.V(4).Infof("rkt: Creating service file %q for pod %q", serviceName, kubeletUtil.FormatPodName(pod))
	serviceFile, err := os.Create(serviceFilePath(serviceName))
	if err != nil {
		return "", nil, err
	}
	defer serviceFile.Close()

	_, err = io.Copy(serviceFile, unit.Serialize(units))
	if err != nil {
		return "", nil, err
	}

	if needReload {
		if err := r.systemd.Reload(); err != nil {
			return "", nil, err
		}
	}
	return serviceName, runtimePod, nil
}
Beispiel #8
0
// makePodManifest transforms a kubelet pod spec to the rkt pod manifest.
func (r *runtime) makePodManifest(pod *api.Pod, pullSecrets []api.Secret) (*appcschema.PodManifest, error) {
	var globalPortMappings []kubecontainer.PortMapping
	manifest := appcschema.BlankPodManifest()

	for _, c := range pod.Spec.Containers {
		if err := r.imagePuller.PullImage(pod, &c, pullSecrets); err != nil {
			return nil, err
		}
		imgManifest, err := r.getImageManifest(c.Image)
		if err != nil {
			return nil, err
		}

		if imgManifest.App == nil {
			imgManifest.App = new(appctypes.App)
		}

		img, err := r.getImageByName(c.Image)
		if err != nil {
			return nil, err
		}
		hash, err := appctypes.NewHash(img.ID)
		if err != nil {
			return nil, err
		}

		opts, err := r.generator.GenerateRunContainerOptions(pod, &c)
		if err != nil {
			return nil, err
		}

		globalPortMappings = append(globalPortMappings, opts.PortMappings...)

		if err := setApp(imgManifest.App, &c, opts); err != nil {
			return nil, err
		}

		name, err := appctypes.SanitizeACName(c.Name)
		if err != nil {
			return nil, err
		}
		appName := appctypes.MustACName(name)

		manifest.Apps = append(manifest.Apps, appcschema.RuntimeApp{
			Name:  *appName,
			Image: appcschema.RuntimeImage{ID: *hash},
			App:   imgManifest.App,
		})
	}

	volumeMap, ok := r.volumeGetter.GetVolumes(pod.UID)
	if !ok {
		return nil, fmt.Errorf("cannot get the volumes for pod %q", kubeletUtil.FormatPodName(pod))
	}

	// Set global volumes.
	for name, volume := range volumeMap {
		volName, err := appctypes.NewACName(name)
		if err != nil {
			return nil, fmt.Errorf("cannot use the volume's name %q as ACName: %v", name, err)
		}
		manifest.Volumes = append(manifest.Volumes, appctypes.Volume{
			Name:   *volName,
			Kind:   "host",
			Source: volume.GetPath(),
		})
	}

	// Set global ports.
	for _, port := range globalPortMappings {
		name, err := appctypes.SanitizeACName(port.Name)
		if err != nil {
			return nil, fmt.Errorf("cannot use the port's name %q as ACName: %v", port.Name, err)
		}
		portName := appctypes.MustACName(name)
		manifest.Ports = append(manifest.Ports, appctypes.ExposedPort{
			Name:     *portName,
			HostPort: uint(port.HostPort),
		})
	}
	// TODO(yifan): Set pod-level isolators once it's supported in kubernetes.
	return manifest, nil
}