func (s *statusManager) SetPodStatus(pod *api.Pod, status api.PodStatus) { podFullName := kubecontainer.GetPodFullName(pod) s.podStatusesLock.Lock() defer s.podStatusesLock.Unlock() oldStatus, found := s.podStatuses[podFullName] // ensure that the start time does not change across updates. if found && oldStatus.StartTime != nil { status.StartTime = oldStatus.StartTime } // if the status has no start time, we need to set an initial time // TODO(yujuhong): Consider setting StartTime when generating the pod // status instead, which would allow statusManager to become a simple cache // again. if status.StartTime.IsZero() { if pod.Status.StartTime.IsZero() { // the pod did not have a previously recorded value so set to now now := util.Now() status.StartTime = &now } else { // the pod had a recorded value, but the kubelet restarted so we need to rebuild cache // based on last observed value status.StartTime = pod.Status.StartTime } } if !found || !reflect.DeepEqual(oldStatus, status) { s.podStatuses[podFullName] = status s.podStatusChannel <- podStatusSyncRequest{pod, status} } else { glog.V(3).Infof("Ignoring same pod status for %s - old: %s new: %s", podFullName, oldStatus, status) } }
// syncBatch syncs pods statuses with the apiserver. func (s *statusManager) syncBatch() error { syncRequest := <-s.podStatusChannel pod := syncRequest.pod podFullName := kubecontainer.GetPodFullName(pod) status := syncRequest.status var err error statusPod := &api.Pod{ ObjectMeta: pod.ObjectMeta, } // TODO: make me easier to express from client code statusPod, err = s.kubeClient.Pods(statusPod.Namespace).Get(statusPod.Name) if err == nil { statusPod.Status = status _, err = s.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod) // TODO: handle conflict as a retry, make that easier too. if err == nil { glog.V(3).Infof("Status for pod %q updated successfully", pod.Name) return nil } } // We failed to update status. In order to make sure we retry next time // we delete cached value. This may result in an additional update, but // this is ok. s.DeletePodStatus(podFullName) return fmt.Errorf("error updating status for pod %q: %v", pod.Name, err) }
// syncBatch syncs pods statuses with the apiserver. func (s *statusManager) syncBatch() error { syncRequest := <-s.podStatusChannel pod := syncRequest.pod podFullName := kubecontainer.GetPodFullName(pod) status := syncRequest.status var err error statusPod := &api.Pod{ ObjectMeta: pod.ObjectMeta, } // TODO: make me easier to express from client code statusPod, err = s.kubeClient.Pods(statusPod.Namespace).Get(statusPod.Name) if err == nil { statusPod.Status = status _, err = s.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod) // TODO: handle conflict as a retry, make that easier too. if err == nil { glog.V(3).Infof("Status for pod %q updated successfully", pod.Name) return nil } } // We failed to update status. In order to make sure we retry next time // we delete cached value. This may result in an additional update, but // this is ok. // Doing this synchronously will lead to a deadlock if the podStatusChannel // is full, and the pod worker holding the lock is waiting on this method // to clear the channel. Even if this delete never runs subsequent container // changes on the node should trigger updates. go s.DeletePodStatus(podFullName) return fmt.Errorf("error updating status for pod %q: %v", pod.Name, err) }
// handleExec handles requests to run a command inside a container. func (s *Server) handleExec(w http.ResponseWriter, req *http.Request) { u, err := url.ParseRequestURI(req.RequestURI) if err != nil { s.error(w, err) return } podNamespace, podID, uid, container, err := parseContainerCoordinates(u.Path) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } pod, ok := s.host.GetPodByName(podNamespace, podID) if !ok { http.Error(w, "Pod does not exist", http.StatusNotFound) return } stdinStream, stdoutStream, stderrStream, errorStream, conn, tty, ok := s.createStreams(w, req) if conn != nil { defer conn.Close() } if !ok { return } err = s.host.ExecInContainer(kubecontainer.GetPodFullName(pod), uid, container, u.Query()[api.ExecCommandParamm], stdinStream, stdoutStream, stderrStream, tty) if err != nil { msg := fmt.Sprintf("Error executing command in container: %v", err) glog.Error(msg) errorStream.Write([]byte(msg)) } }
// handleRun handles requests to run a command inside a container. func (s *Server) handleRun(w http.ResponseWriter, req *http.Request) { u, err := url.ParseRequestURI(req.RequestURI) if err != nil { s.error(w, err) return } podNamespace, podID, uid, container, err := parseContainerCoordinates(u.Path) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } pod, ok := s.host.GetPodByName(podNamespace, podID) if !ok { http.Error(w, "Pod does not exist", http.StatusNotFound) return } command := strings.Split(u.Query().Get("cmd"), " ") data, err := s.host.RunInContainer(kubecontainer.GetPodFullName(pod), uid, container, command) if err != nil { s.error(w, err) return } w.Header().Add("Content-type", "text/plain") w.Write(data) }
// handlePodStatus handles podInfo requests against the Kubelet func (s *Server) handlePodStatus(w http.ResponseWriter, req *http.Request, versioned bool) { u, err := url.ParseRequestURI(req.RequestURI) if err != nil { s.error(w, err) return } podID := u.Query().Get("podID") podNamespace := u.Query().Get("podNamespace") if len(podID) == 0 { http.Error(w, "Missing 'podID=' query entry.", http.StatusBadRequest) return } if len(podNamespace) == 0 { http.Error(w, "Missing 'podNamespace=' query entry.", http.StatusBadRequest) return } pod, ok := s.host.GetPodByName(podNamespace, podID) if !ok { http.Error(w, "Pod does not exist", http.StatusNotFound) return } status, err := s.host.GetPodStatus(kubecontainer.GetPodFullName(pod)) if err != nil { s.error(w, err) return } data, err := exportPodStatus(status, versioned) if err != nil { s.error(w, err) return } w.Header().Add("Content-type", "application/json") w.Write(data) }
func filterInvalidPods(pods []*api.Pod, source string, recorder record.EventRecorder) (filtered []*api.Pod) { names := util.StringSet{} for i, pod := range pods { var errlist []error if errs := validation.ValidatePod(pod); len(errs) != 0 { errlist = append(errlist, errs...) // If validation fails, don't trust it any further - // even Name could be bad. } else { name := kubecontainer.GetPodFullName(pod) if names.Has(name) { errlist = append(errlist, fielderrors.NewFieldDuplicate("name", pod.Name)) } else { names.Insert(name) } } if len(errlist) > 0 { name := bestPodIdentString(pod) err := utilerrors.NewAggregate(errlist) glog.Warningf("Pod[%d] (%s) from %s failed validation, ignoring: %v", i+1, name, source, err) recorder.Eventf(pod, "failedValidation", "Error validating pod %s from %s, ignoring: %v", name, source, err) continue } filtered = append(filtered, pod) } return }
func (fmc *fakeMirrorClient) CreateMirrorPod(pod *api.Pod) error { fmc.mirrorPodLock.Lock() defer fmc.mirrorPodLock.Unlock() podFullName := kubecontainer.GetPodFullName(pod) fmc.mirrorPods.Insert(podFullName) fmc.createCounts[podFullName]++ return nil }
func TestNewStatus(t *testing.T) { syncer := newTestStatusManager() syncer.SetPodStatus(testPod, getRandomPodStatus()) verifyUpdates(t, syncer, 1) status, _ := syncer.GetPodStatus(kubecontainer.GetPodFullName(testPod)) if status.StartTime.IsZero() { t.Errorf("SetPodStatus did not set a proper start time value") } }
func (s *statusManager) SetPodStatus(pod *api.Pod, status api.PodStatus) { podFullName := kubecontainer.GetPodFullName(pod) s.podStatusesLock.Lock() defer s.podStatusesLock.Unlock() oldStatus, found := s.podStatuses[podFullName] if !found || !reflect.DeepEqual(oldStatus, status) { s.podStatuses[podFullName] = status s.podStatusChannel <- podStatusSyncRequest{pod, status} } else { glog.V(3).Infof("Ignoring same pod status for %s - old: %s new: %s", podFullName, oldStatus, status) } }
// isPodRunning returns true if all containers of a manifest are running. func (kl *Kubelet) isPodRunning(pod *api.Pod, runningPod container.Pod) (bool, error) { status, err := kl.containerRuntime.GetPodStatus(pod) if err != nil { glog.Infof("Failed to get the status of pod %q: %v", kubecontainer.GetPodFullName(pod), err) return false, err } for _, st := range status.ContainerStatuses { if st.State.Running == nil { glog.Infof("Container %q not running: %#v", st.Name, st.State) return false, nil } } return true, nil }
// If the UID belongs to a mirror pod, maps it to the UID of its static pod. // Otherwise, return the original UID. All public-facing functions should // perform this translation for UIDs because user may provide a mirror pod UID, // which is not recognized by internal Kubelet functions. func (pm *basicPodManager) TranslatePodUID(uid types.UID) types.UID { if uid == "" { return uid } pm.lock.RLock() defer pm.lock.RUnlock() if mirrorPod, ok := pm.mirrorPodByUID[uid]; ok { podFullName := kubecontainer.GetPodFullName(mirrorPod) if pod, ok := pm.podByFullName[podFullName]; ok { return pod.UID } } return uid }
func TestChangedStatusKeepsStartTime(t *testing.T) { syncer := newTestStatusManager() now := util.Now() firstStatus := getRandomPodStatus() firstStatus.StartTime = &now syncer.SetPodStatus(testPod, firstStatus) syncer.SetPodStatus(testPod, getRandomPodStatus()) verifyUpdates(t, syncer, 2) finalStatus, _ := syncer.GetPodStatus(kubecontainer.GetPodFullName(testPod)) if finalStatus.StartTime.IsZero() { t.Errorf("StartTime should not be zero") } if !finalStatus.StartTime.Time.Equal(now.Time) { t.Errorf("Expected %v, but got %v", now.Time, finalStatus.StartTime.Time) } }
func TestNewStatusPreservesPodStartTime(t *testing.T) { syncer := newTestStatusManager() pod := &api.Pod{ ObjectMeta: api.ObjectMeta{ UID: "12345678", Name: "foo", Namespace: "new", }, Status: api.PodStatus{}, } now := util.Now() startTime := util.NewTime(now.Time.Add(-1 * time.Minute)) pod.Status.StartTime = &startTime syncer.SetPodStatus(pod, getRandomPodStatus()) status, _ := syncer.GetPodStatus(kubecontainer.GetPodFullName(pod)) if !status.StartTime.Time.Equal(startTime.Time) { t.Errorf("Unexpected start time, expected %v, actual %v", startTime, status.StartTime) } }
// probeReadiness probes and sets the readiness of a container. // If the initial delay on the readiness probe has not passed, we set readiness to false. func (pb *prober) probeReadiness(pod *api.Pod, status api.PodStatus, container api.Container, containerID string, createdAt int64) { var ready probe.Result var output string var err error p := container.ReadinessProbe if p == nil { ready = probe.Success } else if time.Now().Unix()-createdAt < p.InitialDelaySeconds { ready = probe.Failure } else { ready, output, err = pb.runProbeWithRetries(p, pod, status, container, containerID, maxProbeRetries) } ctrName := fmt.Sprintf("%s:%s", kubecontainer.GetPodFullName(pod), container.Name) if err != nil || ready == probe.Failure { // Readiness failed in one way or another. pb.readinessManager.SetReadiness(containerID, false) ref, ok := pb.refManager.GetRef(containerID) if !ok { glog.Warningf("No ref for pod '%v' - '%v'", containerID, container.Name) } if err != nil { glog.V(1).Infof("readiness probe for %q errored: %v", ctrName, err) if ok { pb.recorder.Eventf(ref, "unhealthy", "Readiness probe errored: %v", err) } return } else { // ready != probe.Success glog.V(1).Infof("Readiness probe for %q failed (%v): %s", ctrName, ready, output) if ok { pb.recorder.Eventf(ref, "unhealthy", "Readiness probe failed: %s", output) } return } } if ready == probe.Success { pb.readinessManager.SetReadiness(containerID, true) } glog.V(3).Infof("Readiness probe for %q succeeded", ctrName) }
func (s *statusManager) SetPodStatus(pod *api.Pod, status api.PodStatus) { podFullName := kubecontainer.GetPodFullName(pod) s.podStatusesLock.Lock() defer s.podStatusesLock.Unlock() oldStatus, found := s.podStatuses[podFullName] // ensure that the start time does not change across updates. if found && oldStatus.StartTime != nil { status.StartTime = oldStatus.StartTime } // if the status has no start time, we need to set an initial time // TODO(yujuhong): Consider setting StartTime when generating the pod // status instead, which would allow statusManager to become a simple cache // again. if status.StartTime.IsZero() { if pod.Status.StartTime.IsZero() { // the pod did not have a previously recorded value so set to now now := util.Now() status.StartTime = &now } else { // the pod had a recorded value, but the kubelet restarted so we need to rebuild cache // based on last observed value status.StartTime = pod.Status.StartTime } } // TODO: Holding a lock during blocking operations is dangerous. Refactor so this isn't necessary. // The intent here is to prevent concurrent updates to a pod's status from // clobbering each other so the phase of a pod progresses monotonically. // Currently this routine is not called for the same pod from multiple // workers and/or the kubelet but dropping the lock before sending the // status down the channel feels like an easy way to get a bullet in foot. if !found || !isStatusEqual(&oldStatus, &status) { s.podStatuses[podFullName] = status s.podStatusChannel <- podStatusSyncRequest{pod, status} } else { glog.V(3).Infof("Ignoring same pod status for %s - old: %s new: %s", podFullName, oldStatus, status) } }
func (pm *basicPodManager) setPods(newPods []*api.Pod) { podByUID := make(map[types.UID]*api.Pod) mirrorPodByUID := make(map[types.UID]*api.Pod) podByFullName := make(map[string]*api.Pod) mirrorPodByFullName := make(map[string]*api.Pod) for _, pod := range newPods { podFullName := kubecontainer.GetPodFullName(pod) if isMirrorPod(pod) { mirrorPodByUID[pod.UID] = pod mirrorPodByFullName[podFullName] = pod } else { podByUID[pod.UID] = pod podByFullName[podFullName] = pod } } pm.podByUID = podByUID pm.podByFullName = podByFullName pm.mirrorPodByUID = mirrorPodByUID pm.mirrorPodByFullName = mirrorPodByFullName }
// probeLiveness probes the liveness of a container. // If the initalDelay since container creation on liveness probe has not passed the probe will return probe.Success. func (pb *prober) probeLiveness(pod *api.Pod, status api.PodStatus, container api.Container, containerID string, createdAt int64) (probe.Result, error) { var live probe.Result var output string var err error p := container.LivenessProbe if p == nil { return probe.Success, nil } if time.Now().Unix()-createdAt < p.InitialDelaySeconds { return probe.Success, nil } else { live, output, err = pb.runProbeWithRetries(p, pod, status, container, containerID, maxProbeRetries) } ctrName := fmt.Sprintf("%s:%s", kubecontainer.GetPodFullName(pod), container.Name) if err != nil || live != probe.Success { // Liveness failed in one way or another. ref, ok := pb.refManager.GetRef(containerID) if !ok { glog.Warningf("No ref for pod %q - '%v'", containerID, container.Name) } if err != nil { glog.V(1).Infof("Liveness probe for %q errored: %v", ctrName, err) if ok { pb.recorder.Eventf(ref, "unhealthy", "Liveness probe errored: %v", err) } return probe.Unknown, err } else { // live != probe.Success glog.V(1).Infof("Liveness probe for %q failed (%v): %s", ctrName, live, output) if ok { pb.recorder.Eventf(ref, "unhealthy", "Liveness probe failed: %s", output) } return live, nil } } glog.V(3).Infof("Liveness probe for %q succeeded", ctrName) return probe.Success, nil }
// SyncPod syncs the running pod to match the specified desired pod. func (r *runtime) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, podStatus api.PodStatus, pullSecrets []api.Secret) error { podFullName := kubecontainer.GetPodFullName(pod) if len(runningPod.Containers) == 0 { glog.V(4).Infof("Pod %q is not running, will start it", podFullName) return r.RunPod(pod) } // Add references to all containers. unidentifiedContainers := make(map[types.UID]*kubecontainer.Container) for _, c := range runningPod.Containers { unidentifiedContainers[c.ID] = c } restartPod := false for _, container := range pod.Spec.Containers { expectedHash := kubecontainer.HashContainer(&container) c := runningPod.FindContainerByName(container.Name) if c == nil { if kubecontainer.ShouldContainerBeRestarted(&container, pod, &podStatus, r.readinessManager) { glog.V(3).Infof("Container %+v is dead, but RestartPolicy says that we should restart it.", container) // TODO(yifan): Containers in one pod are fate-sharing at this moment, see: // https://github.com/appc/spec/issues/276. restartPod = true break } continue } // TODO(yifan): Take care of host network change. containerChanged := c.Hash != 0 && c.Hash != expectedHash if containerChanged { glog.Infof("Pod %q container %q hash changed (%d vs %d), it will be killed and re-created.", podFullName, container.Name, c.Hash, expectedHash) restartPod = true break } result, err := r.prober.Probe(pod, podStatus, container, string(c.ID), c.Created) // TODO(vmarmol): examine this logic. if err == nil && result != probe.Success { glog.Infof("Pod %q container %q is unhealthy (probe result: %v), it will be killed and re-created.", podFullName, container.Name, result) restartPod = true break } if err != nil { glog.V(2).Infof("Probe container %q failed: %v", container.Name, err) } delete(unidentifiedContainers, c.ID) } // If there is any unidentified containers, restart the pod. if len(unidentifiedContainers) > 0 { restartPod = true } if restartPod { // TODO(yifan): Handle network plugin. if err := r.KillPod(runningPod); err != nil { return err } if err := r.RunPod(pod); err != nil { return err } } return nil }
// GetPodStatus returns docker related status for all containers in the pod as // well as the infrastructure container. func (dm *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) { podFullName := kubecontainer.GetPodFullName(pod) uid := pod.UID manifest := pod.Spec oldStatuses := make(map[string]api.ContainerStatus, len(pod.Spec.Containers)) lastObservedTime := make(map[string]util.Time, len(pod.Spec.Containers)) for _, status := range pod.Status.ContainerStatuses { oldStatuses[status.Name] = status if status.LastTerminationState.Termination != nil { lastObservedTime[status.Name] = status.LastTerminationState.Termination.FinishedAt } } var podStatus api.PodStatus statuses := make(map[string]*api.ContainerStatus, len(pod.Spec.Containers)) expectedContainers := make(map[string]api.Container) for _, container := range manifest.Containers { expectedContainers[container.Name] = container } expectedContainers[PodInfraContainerName] = api.Container{} containers, err := dm.client.ListContainers(docker.ListContainersOptions{All: true}) if err != nil { return nil, err } containerDone := util.NewStringSet() // Loop through list of running and exited docker containers to construct // the statuses. We assume docker returns a list of containers sorted in // reverse by time. for _, value := range containers { if len(value.Names) == 0 { continue } dockerName, _, err := ParseDockerName(value.Names[0]) if err != nil { continue } if dockerName.PodFullName != podFullName { continue } if uid != "" && dockerName.PodUID != uid { continue } dockerContainerName := dockerName.ContainerName c, found := expectedContainers[dockerContainerName] if !found { continue } terminationMessagePath := c.TerminationMessagePath if containerDone.Has(dockerContainerName) { continue } var terminationState *api.ContainerState = nil // Inspect the container. result := dm.inspectContainer(value.ID, dockerContainerName, terminationMessagePath) if result.err != nil { return nil, result.err } else if result.status.State.Termination != nil { terminationState = &result.status.State } if containerStatus, found := statuses[dockerContainerName]; found { if containerStatus.LastTerminationState.Termination == nil && terminationState != nil { // Populate the last termination state. containerStatus.LastTerminationState = *terminationState } count := true // Only count dead containers terminated after last time we observed, if lastObservedTime, ok := lastObservedTime[dockerContainerName]; ok { if terminationState != nil && terminationState.Termination.FinishedAt.After(lastObservedTime.Time) { count = false } else { // The container finished before the last observation. No // need to examine/count the older containers. Mark the // container name as done. containerDone.Insert(dockerContainerName) } } if count { containerStatus.RestartCount += 1 } continue } if dockerContainerName == PodInfraContainerName { // Found network container if result.status.State.Running != nil { podStatus.PodIP = result.ip } } else { // Add user container information. if oldStatus, found := oldStatuses[dockerContainerName]; found { // Use the last observed restart count if it's available. result.status.RestartCount = oldStatus.RestartCount } statuses[dockerContainerName] = &result.status } } // Handle the containers for which we cannot find any associated active or // dead docker containers. for _, container := range manifest.Containers { if _, found := statuses[container.Name]; found { continue } var containerStatus api.ContainerStatus containerStatus.Name = container.Name containerStatus.Image = container.Image if oldStatus, found := oldStatuses[container.Name]; found { // Some states may be lost due to GC; apply the last observed // values if possible. containerStatus.RestartCount = oldStatus.RestartCount containerStatus.LastTerminationState = oldStatus.LastTerminationState } //Check image is ready on the node or not. // TODO: If we integrate DockerPuller into DockerManager, we can // record the pull failure and eliminate the image checking below. image := container.Image // TODO(dchen1107): docker/docker/issues/8365 to figure out if the image exists _, err := dm.client.InspectImage(image) if err == nil { containerStatus.State.Waiting = &api.ContainerStateWaiting{ Reason: fmt.Sprintf("Image: %s is ready, container is creating", image), } } else if err == docker.ErrNoSuchImage { containerStatus.State.Waiting = &api.ContainerStateWaiting{ Reason: fmt.Sprintf("Image: %s is not ready on the node", image), } } statuses[container.Name] = &containerStatus } podStatus.ContainerStatuses = make([]api.ContainerStatus, 0) for containerName, status := range statuses { if status.State.Waiting != nil { // For containers in the waiting state, fill in a specific reason if it is recorded. if reason, ok := dm.reasonCache.Get(uid, containerName); ok { status.State.Waiting.Reason = reason } } podStatus.ContainerStatuses = append(podStatus.ContainerStatuses, *status) } return &podStatus, nil }
// makePodManifest transforms a kubelet pod spec to the rkt pod manifest. // TODO(yifan): Use the RunContainerOptions generated by GenerateRunContainerOptions(). func (r *runtime) makePodManifest(pod *api.Pod) (*appcschema.PodManifest, error) { var globalPortMappings []kubecontainer.PortMapping manifest := appcschema.BlankPodManifest() for _, c := range pod.Spec.Containers { imgManifest, err := r.getImageManifest(c.Image) if err != nil { return nil, err } if imgManifest.App == nil { return nil, fmt.Errorf("no app section in image manifest for image: %q", c.Image) } img, err := r.getImageByName(c.Image) if err != nil { return nil, err } hash, err := appctypes.NewHash(img.id) if err != nil { return nil, err } opts, err := r.generator.GenerateRunContainerOptions(pod, &c) if err != nil { return nil, err } globalPortMappings = append(globalPortMappings, opts.PortMappings...) if err := setApp(imgManifest.App, &c, opts); err != nil { return nil, err } manifest.Apps = append(manifest.Apps, appcschema.RuntimeApp{ // TODO(yifan): We should allow app name to be different with // image name. See https://github.com/coreos/rkt/pull/640. Name: imgManifest.Name, Image: appcschema.RuntimeImage{ID: *hash}, App: imgManifest.App, }) } volumeMap, ok := r.volumeGetter.GetVolumes(pod.UID) if !ok { return nil, fmt.Errorf("cannot get the volumes for pod %q", kubecontainer.GetPodFullName(pod)) } // Set global volumes. for name, volume := range volumeMap { volName, err := appctypes.NewACName(name) if err != nil { return nil, fmt.Errorf("cannot use the volume's name %q as ACName: %v", name, err) } manifest.Volumes = append(manifest.Volumes, appctypes.Volume{ Name: *volName, Kind: "host", Source: volume.GetPath(), }) } // Set global ports. for _, port := range globalPortMappings { name, err := appctypes.SanitizeACName(port.Name) if err != nil { return nil, fmt.Errorf("cannot use the port's name %q as ACName: %v", port.Name, err) } portName := appctypes.MustACName(name) manifest.Ports = append(manifest.Ports, appctypes.ExposedPort{ Name: *portName, HostPort: uint(port.HostPort), }) } // TODO(yifan): Set pod-level isolators once it's supported in kubernetes. return manifest, nil }
// serveStats implements stats logic. func (s *Server) serveStats(w http.ResponseWriter, req *http.Request) { // Stats requests are in the following forms: // // /stats/ : Root container stats // /stats/container/ : Non-Kubernetes container stats (returns a map) // /stats/<pod name>/<container name> : Stats for Kubernetes pod/container // /stats/<namespace>/<pod name>/<uid>/<container name> : Stats for Kubernetes namespace/pod/uid/container components := strings.Split(strings.TrimPrefix(path.Clean(req.URL.Path), "/"), "/") var stats interface{} var err error var query StatsRequest query.NumStats = 60 err = json.NewDecoder(req.Body).Decode(&query) if err != nil && err != io.EOF { s.error(w, err) return } cadvisorRequest := cadvisorApi.ContainerInfoRequest{ NumStats: query.NumStats, Start: query.Start, End: query.End, } switch len(components) { case 1: // Root container stats. var statsMap map[string]*cadvisorApi.ContainerInfo statsMap, err = s.host.GetRawContainerInfo("/", &cadvisorRequest, false) stats = statsMap["/"] case 2: // Non-Kubernetes container stats. if components[1] != "container" { http.Error(w, fmt.Sprintf("unknown stats request type %q", components[1]), http.StatusNotFound) return } containerName := path.Join("/", query.ContainerName) stats, err = s.host.GetRawContainerInfo(containerName, &cadvisorRequest, query.Subcontainers) case 3: // Backward compatibility without uid information, does not support namespace pod, ok := s.host.GetPodByName(api.NamespaceDefault, components[1]) if !ok { http.Error(w, "Pod does not exist", http.StatusNotFound) return } stats, err = s.host.GetContainerInfo(kubecontainer.GetPodFullName(pod), "", components[2], &cadvisorRequest) case 5: pod, ok := s.host.GetPodByName(components[1], components[2]) if !ok { http.Error(w, "Pod does not exist", http.StatusNotFound) return } stats, err = s.host.GetContainerInfo(kubecontainer.GetPodFullName(pod), types.UID(components[3]), components[4], &cadvisorRequest) default: http.Error(w, fmt.Sprintf("Unknown resource: %v", components), http.StatusNotFound) return } switch err { case nil: break case ErrContainerNotFound: http.Error(w, err.Error(), http.StatusNotFound) return default: s.error(w, err) return } if stats == nil { fmt.Fprint(w, "{}") return } data, err := json.Marshal(stats) if err != nil { s.error(w, err) return } w.Header().Add("Content-type", "application/json") w.Write(data) return }
func (dm *DockerManager) runContainer(pod *api.Pod, container *api.Container, opts *kubecontainer.RunContainerOptions, ref *api.ObjectReference) (string, error) { dockerName := KubeletContainerName{ PodFullName: kubecontainer.GetPodFullName(pod), PodUID: pod.UID, ContainerName: container.Name, } exposedPorts, portBindings := makePortsAndBindings(container) // TODO(vmarmol): Handle better. // Cap hostname at 63 chars (specification is 64bytes which is 63 chars and the null terminating char). const hostnameMaxLen = 63 containerHostname := pod.Name if len(containerHostname) > hostnameMaxLen { containerHostname = containerHostname[:hostnameMaxLen] } dockerOpts := docker.CreateContainerOptions{ Name: BuildDockerName(dockerName, container), Config: &docker.Config{ Env: opts.Envs, ExposedPorts: exposedPorts, Hostname: containerHostname, Image: container.Image, Memory: container.Resources.Limits.Memory().Value(), CPUShares: milliCPUToShares(container.Resources.Limits.Cpu().MilliValue()), WorkingDir: container.WorkingDir, }, } setEntrypointAndCommand(container, &dockerOpts) glog.V(3).Infof("Container %v/%v/%v: setting entrypoint \"%v\" and command \"%v\"", pod.Namespace, pod.Name, container.Name, dockerOpts.Config.Entrypoint, dockerOpts.Config.Cmd) dockerContainer, err := dm.client.CreateContainer(dockerOpts) if err != nil { if ref != nil { dm.recorder.Eventf(ref, "failed", "Failed to create docker container with error: %v", err) } return "", err } if ref != nil { dm.recorder.Eventf(ref, "created", "Created with docker id %v", dockerContainer.ID) } // The reason we create and mount the log file in here (not in kubelet) is because // the file's location depends on the ID of the container, and we need to create and // mount the file before actually starting the container. // TODO(yifan): Consider to pull this logic out since we might need to reuse it in // other container runtime. if opts.PodContainerDir != "" && len(container.TerminationMessagePath) != 0 { containerLogPath := path.Join(opts.PodContainerDir, dockerContainer.ID) fs, err := os.Create(containerLogPath) if err != nil { // TODO: Clean up the previouly created dir? return the error? glog.Errorf("Error on creating termination-log file %q: %v", containerLogPath, err) } else { fs.Close() // Close immediately; we're just doing a `touch` here b := fmt.Sprintf("%s:%s", containerLogPath, container.TerminationMessagePath) opts.Binds = append(opts.Binds, b) } } privileged := false if capabilities.Get().AllowPrivileged { privileged = container.Privileged } else if container.Privileged { return "", fmt.Errorf("container requested privileged mode, but it is disallowed globally.") } capAdd, capDrop := makeCapabilites(container.Capabilities.Add, container.Capabilities.Drop) hc := &docker.HostConfig{ PortBindings: portBindings, Binds: opts.Binds, NetworkMode: opts.NetMode, IpcMode: opts.IpcMode, Privileged: privileged, CapAdd: capAdd, CapDrop: capDrop, } if len(opts.DNS) > 0 { hc.DNS = opts.DNS } if len(opts.DNSSearch) > 0 { hc.DNSSearch = opts.DNSSearch } if err = dm.client.StartContainer(dockerContainer.ID, hc); err != nil { if ref != nil { dm.recorder.Eventf(ref, "failed", "Failed to start with docker id %v with error: %v", dockerContainer.ID, err) } return "", err } if ref != nil { dm.recorder.Eventf(ref, "started", "Started with docker id %v", dockerContainer.ID) } return dockerContainer.ID, nil }
func (s *Server) handlePortForward(w http.ResponseWriter, req *http.Request) { u, err := url.ParseRequestURI(req.RequestURI) if err != nil { s.error(w, err) return } podNamespace, podID, uid, err := parsePodCoordinates(u.Path) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } pod, ok := s.host.GetPodByName(podNamespace, podID) if !ok { http.Error(w, "Pod does not exist", http.StatusNotFound) return } streamChan := make(chan httpstream.Stream, 1) upgrader := spdy.NewResponseUpgrader() conn := upgrader.UpgradeResponse(w, req, func(stream httpstream.Stream) error { portString := stream.Headers().Get(api.PortHeader) port, err := strconv.ParseUint(portString, 10, 16) if err != nil { return fmt.Errorf("Unable to parse '%s' as a port: %v", portString, err) } if port < 1 { return fmt.Errorf("Port '%d' must be greater than 0", port) } streamChan <- stream return nil }) if conn == nil { return } defer conn.Close() conn.SetIdleTimeout(s.host.StreamingConnectionIdleTimeout()) var dataStreamLock sync.Mutex dataStreamChans := make(map[string]chan httpstream.Stream) Loop: for { select { case <-conn.CloseChan(): break Loop case stream := <-streamChan: streamType := stream.Headers().Get(api.StreamType) port := stream.Headers().Get(api.PortHeader) dataStreamLock.Lock() switch streamType { case "error": ch := make(chan httpstream.Stream) dataStreamChans[port] = ch go waitForPortForwardDataStreamAndRun(kubecontainer.GetPodFullName(pod), uid, stream, ch, s.host) case "data": ch, ok := dataStreamChans[port] if ok { ch <- stream delete(dataStreamChans, port) } else { glog.Errorf("Unable to locate data stream channel for port %s", port) } default: glog.Errorf("streamType header must be 'error' or 'data', got: '%s'", streamType) stream.Reset() } dataStreamLock.Unlock() } } }
// handleExec handles requests to run a command inside a container. func (s *Server) handleExec(w http.ResponseWriter, req *http.Request) { u, err := url.ParseRequestURI(req.RequestURI) if err != nil { s.error(w, err) return } podNamespace, podID, uid, container, err := parseContainerCoordinates(u.Path) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } pod, ok := s.host.GetPodByName(podNamespace, podID) if !ok { http.Error(w, "Pod does not exist", http.StatusNotFound) return } req.ParseForm() // start at 1 for error stream expectedStreams := 1 if req.FormValue(api.ExecStdinParam) == "1" { expectedStreams++ } if req.FormValue(api.ExecStdoutParam) == "1" { expectedStreams++ } tty := req.FormValue(api.ExecTTYParam) == "1" if !tty && req.FormValue(api.ExecStderrParam) == "1" { expectedStreams++ } if expectedStreams == 1 { http.Error(w, "You must specify at least 1 of stdin, stdout, stderr", http.StatusBadRequest) return } streamCh := make(chan httpstream.Stream) upgrader := spdy.NewResponseUpgrader() conn := upgrader.UpgradeResponse(w, req, func(stream httpstream.Stream) error { streamCh <- stream return nil }) // from this point on, we can no longer call methods on w if conn == nil { // The upgrader is responsible for notifying the client of any errors that // occurred during upgrading. All we can do is return here at this point // if we weren't successful in upgrading. return } defer conn.Close() conn.SetIdleTimeout(s.host.StreamingConnectionIdleTimeout()) // TODO make it configurable? expired := time.NewTimer(streamCreationTimeout) var errorStream, stdinStream, stdoutStream, stderrStream httpstream.Stream receivedStreams := 0 WaitForStreams: for { select { case stream := <-streamCh: streamType := stream.Headers().Get(api.StreamType) switch streamType { case api.StreamTypeError: errorStream = stream defer errorStream.Reset() receivedStreams++ case api.StreamTypeStdin: stdinStream = stream receivedStreams++ case api.StreamTypeStdout: stdoutStream = stream receivedStreams++ case api.StreamTypeStderr: stderrStream = stream receivedStreams++ default: glog.Errorf("Unexpected stream type: '%s'", streamType) } if receivedStreams == expectedStreams { break WaitForStreams } case <-expired.C: // TODO find a way to return the error to the user. Maybe use a separate // stream to report errors? glog.Error("Timed out waiting for client to create streams") return } } if stdinStream != nil { // close our half of the input stream, since we won't be writing to it stdinStream.Close() } err = s.host.ExecInContainer(kubecontainer.GetPodFullName(pod), uid, container, u.Query()[api.ExecCommandParamm], stdinStream, stdoutStream, stderrStream, tty) if err != nil { msg := fmt.Sprintf("Error executing command in container: %v", err) glog.Error(msg) errorStream.Write([]byte(msg)) } }
// async continuation of LaunchTask func (k *KubernetesExecutor) launchTask(driver bindings.ExecutorDriver, taskId string, pod *api.Pod) { deleteTask := func() { k.lock.Lock() defer k.lock.Unlock() delete(k.tasks, taskId) k.resetSuicideWatch(driver) } // TODO(k8s): use Pods interface for binding once clusters are upgraded // return b.Pods(binding.Namespace).Bind(binding) if pod.Spec.NodeName == "" { //HACK(jdef): cloned binding construction from k8s plugin/pkg/scheduler/scheduler.go binding := &api.Binding{ ObjectMeta: api.ObjectMeta{ Namespace: pod.Namespace, Name: pod.Name, Annotations: make(map[string]string), }, Target: api.ObjectReference{ Kind: "Node", Name: pod.Annotations[meta.BindingHostKey], }, } // forward the annotations that the scheduler wants to apply for k, v := range pod.Annotations { binding.Annotations[k] = v } // create binding on apiserver log.Infof("Binding '%v/%v' to '%v' with annotations %+v...", pod.Namespace, pod.Name, binding.Target.Name, binding.Annotations) ctx := api.WithNamespace(api.NewContext(), binding.Namespace) err := k.client.Post().Namespace(api.NamespaceValue(ctx)).Resource("bindings").Body(binding).Do().Error() if err != nil { deleteTask() k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED, messages.CreateBindingFailure)) return } } else { // post annotations update to apiserver patch := struct { Metadata struct { Annotations map[string]string `json:"annotations"` } `json:"metadata"` }{} patch.Metadata.Annotations = pod.Annotations patchJson, _ := json.Marshal(patch) log.V(4).Infof("Patching annotations %v of pod %v/%v: %v", pod.Annotations, pod.Namespace, pod.Name, string(patchJson)) err := k.client.Patch(api.MergePatchType).RequestURI(pod.SelfLink).Body(patchJson).Do().Error() if err != nil { log.Errorf("Error updating annotations of ready-to-launch pod %v/%v: %v", pod.Namespace, pod.Name, err) deleteTask() k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED, messages.AnnotationUpdateFailure)) return } } podFullName := container.GetPodFullName(pod) // allow a recently failed-over scheduler the chance to recover the task/pod binding: // it may have failed and recovered before the apiserver is able to report the updated // binding information. replays of this status event will signal to the scheduler that // the apiserver should be up-to-date. data, err := json.Marshal(api.PodStatusResult{ ObjectMeta: api.ObjectMeta{ Name: podFullName, SelfLink: "/podstatusresult", }, }) if err != nil { deleteTask() log.Errorf("failed to marshal pod status result: %v", err) k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED, err.Error())) return } k.lock.Lock() defer k.lock.Unlock() // Add the task. task, found := k.tasks[taskId] if !found { log.V(1).Infof("task %v not found, probably killed: aborting launch, reporting lost", taskId) k.reportLostTask(driver, taskId, messages.LaunchTaskFailed) return } //TODO(jdef) check for duplicate pod name, if found send TASK_ERROR // from here on, we need to delete containers associated with the task // upon it going into a terminal state task.podName = podFullName k.pods[podFullName] = pod // send the latest snapshot of the set of pods to the kubelet via the pod update channel. // this results in the kubelet spinning up the new pod. update := kubelet.PodUpdate{Op: kubelet.SET} for _, p := range k.pods { update.Pods = append(update.Pods, p) } k.updateChan <- update statusUpdate := &mesos.TaskStatus{ TaskId: mutil.NewTaskID(taskId), State: mesos.TaskState_TASK_STARTING.Enum(), Message: proto.String(messages.CreateBindingSuccess), Data: data, } k.sendStatus(driver, statusUpdate) // Delay reporting 'task running' until container is up. psf := podStatusFunc(func() (*api.PodStatus, error) { status, err := k.podStatusFunc(k.kl, pod) if err != nil { return nil, err } status.Phase = kubelet.GetPhase(&pod.Spec, status.ContainerStatuses) hostIP, err := k.kl.GetHostIP() if err != nil { log.Errorf("Cannot get host IP: %v", err) } else { status.HostIP = hostIP.String() } return status, nil }) go k._launchTask(driver, taskId, podFullName, psf) }
// handleContainerLogs handles containerLogs request against the Kubelet func (s *Server) handleContainerLogs(w http.ResponseWriter, req *http.Request) { defer req.Body.Close() u, err := url.ParseRequestURI(req.RequestURI) if err != nil { s.error(w, err) return } parts := strings.Split(u.Path, "/") // req URI: /containerLogs/<podNamespace>/<podID>/<containerName> var podNamespace, podID, containerName string if len(parts) == 5 { podNamespace = parts[2] podID = parts[3] containerName = parts[4] } else { http.Error(w, "Unexpected path for command running", http.StatusBadRequest) return } if len(podID) == 0 { http.Error(w, `{"message": "Missing podID."}`, http.StatusBadRequest) return } if len(containerName) == 0 { http.Error(w, `{"message": "Missing container name."}`, http.StatusBadRequest) return } if len(podNamespace) == 0 { http.Error(w, `{"message": "Missing podNamespace."}`, http.StatusBadRequest) return } uriValues := u.Query() follow, _ := strconv.ParseBool(uriValues.Get("follow")) previous, _ := strconv.ParseBool(uriValues.Get("previous")) tail := uriValues.Get("tail") pod, ok := s.host.GetPodByName(podNamespace, podID) if !ok { http.Error(w, fmt.Sprintf("Pod %q does not exist", podID), http.StatusNotFound) return } // Check if containerName is valid. containerExists := false for _, container := range pod.Spec.Containers { if container.Name == containerName { containerExists = true } } if !containerExists { http.Error(w, fmt.Sprintf("Container %q not found in Pod %q", containerName, podID), http.StatusNotFound) return } if _, ok := w.(http.Flusher); !ok { s.error(w, fmt.Errorf("unable to convert %v into http.Flusher", w)) return } fw := flushwriter.Wrap(w) w.Header().Set("Transfer-Encoding", "chunked") w.WriteHeader(http.StatusOK) err = s.host.GetKubeletContainerLogs(kubecontainer.GetPodFullName(pod), containerName, tail, follow, previous, fw, fw) if err != nil { s.error(w, err) return } }
func (s *podStorage) merge(source string, change interface{}) (adds, updates, deletes *kubelet.PodUpdate) { s.podLock.Lock() defer s.podLock.Unlock() adds = &kubelet.PodUpdate{Op: kubelet.ADD} updates = &kubelet.PodUpdate{Op: kubelet.UPDATE} deletes = &kubelet.PodUpdate{Op: kubelet.REMOVE} pods := s.pods[source] if pods == nil { pods = make(map[string]*api.Pod) } update := change.(kubelet.PodUpdate) switch update.Op { case kubelet.ADD, kubelet.UPDATE: if update.Op == kubelet.ADD { glog.V(4).Infof("Adding new pods from source %s : %v", source, update.Pods) } else { glog.V(4).Infof("Updating pods from source %s : %v", source, update.Pods) } filtered := filterInvalidPods(update.Pods, source, s.recorder) for _, ref := range filtered { name := kubecontainer.GetPodFullName(ref) if existing, found := pods[name]; found { if !reflect.DeepEqual(existing.Spec, ref.Spec) { // this is an update existing.Spec = ref.Spec updates.Pods = append(updates.Pods, existing) continue } // this is a no-op continue } // this is an add if ref.Annotations == nil { ref.Annotations = make(map[string]string) } ref.Annotations[kubelet.ConfigSourceAnnotationKey] = source recordFirstSeenTime(ref) pods[name] = ref adds.Pods = append(adds.Pods, ref) } case kubelet.REMOVE: glog.V(4).Infof("Removing a pod %v", update) for _, value := range update.Pods { name := kubecontainer.GetPodFullName(value) if existing, found := pods[name]; found { // this is a delete delete(pods, name) deletes.Pods = append(deletes.Pods, existing) continue } // this is a no-op } case kubelet.SET: glog.V(4).Infof("Setting pods for source %s : %v", source, update) s.markSourceSet(source) // Clear the old map entries by just creating a new map oldPods := pods pods = make(map[string]*api.Pod) filtered := filterInvalidPods(update.Pods, source, s.recorder) for _, ref := range filtered { name := kubecontainer.GetPodFullName(ref) if existing, found := oldPods[name]; found { pods[name] = existing if !reflect.DeepEqual(existing.Spec, ref.Spec) { // this is an update existing.Spec = ref.Spec updates.Pods = append(updates.Pods, existing) continue } // this is a no-op continue } if ref.Annotations == nil { ref.Annotations = make(map[string]string) } ref.Annotations[kubelet.ConfigSourceAnnotationKey] = source recordFirstSeenTime(ref) pods[name] = ref adds.Pods = append(adds.Pods, ref) } for name, existing := range oldPods { if _, found := pods[name]; !found { // this is a delete deletes.Pods = append(deletes.Pods, existing) } } default: glog.Warningf("Received invalid update type: %v", update) } s.pods[source] = pods return adds, updates, deletes }