func filterInvalidPods(pods []*api.Pod, source string, recorder record.EventRecorder) (filtered []*api.Pod) { names := sets.String{} for i, pod := range pods { var errlist []error if errs := validation.ValidatePod(pod); len(errs) != 0 { errlist = append(errlist, errs...) // If validation fails, don't trust it any further - // even Name could be bad. } else { name := kubecontainer.GetPodFullName(pod) if names.Has(name) { errlist = append(errlist, fielderrors.NewFieldDuplicate("name", pod.Name)) } else { names.Insert(name) } } if len(errlist) > 0 { name := bestPodIdentString(pod) err := utilerrors.NewAggregate(errlist) glog.Warningf("Pod[%d] (%s) from %s failed validation, ignoring: %v", i+1, name, source, err) recorder.Eventf(pod, "FailedValidation", "Error validating pod %s from %s, ignoring: %v", name, source, err) continue } filtered = append(filtered, pod) } return }
func (fmc *fakeMirrorClient) CreateMirrorPod(pod *api.Pod) error { fmc.mirrorPodLock.Lock() defer fmc.mirrorPodLock.Unlock() podFullName := kubecontainer.GetPodFullName(pod) fmc.mirrorPods.Insert(podFullName) fmc.createCounts[podFullName]++ return nil }
func (pm *basicPodManager) UpdatePod(pod *api.Pod) { pm.lock.Lock() defer pm.lock.Unlock() podFullName := kubecontainer.GetPodFullName(pod) if isMirrorPod(pod) { pm.mirrorPodByUID[pod.UID] = pod pm.mirrorPodByFullName[podFullName] = pod } else { pm.podByUID[pod.UID] = pod pm.podByFullName[podFullName] = pod } }
func (pm *basicPodManager) DeletePod(pod *api.Pod) { pm.lock.Lock() defer pm.lock.Unlock() podFullName := kubecontainer.GetPodFullName(pod) if isMirrorPod(pod) { delete(pm.mirrorPodByUID, pod.UID) delete(pm.mirrorPodByFullName, podFullName) } else { delete(pm.podByUID, pod.UID) delete(pm.podByFullName, podFullName) } }
// getPortForward handles a new restful port forward request. It determines the // pod name and uid and then calls ServePortForward. func (s *Server) getPortForward(request *restful.Request, response *restful.Response) { podNamespace, podID, uid := getPodCoordinates(request) pod, ok := s.host.GetPodByName(podNamespace, podID) if !ok { response.WriteError(http.StatusNotFound, fmt.Errorf("pod does not exist")) return } podName := kubecontainer.GetPodFullName(pod) ServePortForward(response.ResponseWriter, request.Request, s.host, podName, uid, s.host.StreamingConnectionIdleTimeout(), defaultStreamCreationTimeout) }
// isPodRunning returns true if all containers of a manifest are running. func (kl *Kubelet) isPodRunning(pod *api.Pod, runningPod container.Pod) (bool, error) { status, err := kl.containerRuntime.GetPodStatus(pod) if err != nil { glog.Infof("Failed to get the status of pod %q: %v", kubecontainer.GetPodFullName(pod), err) return false, err } for _, st := range status.ContainerStatuses { if st.State.Running == nil { glog.Infof("Container %q not running: %#v", st.Name, st.State) return false, nil } } return true, nil }
// getRun handles requests to run a command inside a container. func (s *Server) getRun(request *restful.Request, response *restful.Response) { podNamespace, podID, uid, container := getContainerCoordinates(request) pod, ok := s.host.GetPodByName(podNamespace, podID) if !ok { response.WriteError(http.StatusNotFound, fmt.Errorf("pod does not exist")) return } command := strings.Split(request.QueryParameter("cmd"), " ") data, err := s.host.RunInContainer(kubecontainer.GetPodFullName(pod), uid, container, command) if err != nil { response.WriteError(http.StatusInternalServerError, err) return } response.Write(data) }
// If the UID belongs to a mirror pod, maps it to the UID of its static pod. // Otherwise, return the original UID. All public-facing functions should // perform this translation for UIDs because user may provide a mirror pod UID, // which is not recognized by internal Kubelet functions. func (pm *basicPodManager) TranslatePodUID(uid types.UID) types.UID { if uid == "" { return uid } pm.lock.RLock() defer pm.lock.RUnlock() if mirrorPod, ok := pm.mirrorPodByUID[uid]; ok { podFullName := kubecontainer.GetPodFullName(mirrorPod) if pod, ok := pm.podByFullName[podFullName]; ok { return pod.UID } } return uid }
// probeReadiness probes and sets the readiness of a container. // If the initial delay on the readiness probe has not passed, we set readiness to false. func (pb *prober) probeReadiness(pod *api.Pod, status api.PodStatus, container api.Container, containerID string, createdAt int64) { var ready probe.Result var output string var err error p := container.ReadinessProbe if p == nil { ready = probe.Success } else if time.Now().Unix()-createdAt < p.InitialDelaySeconds { ready = probe.Failure } else { ready, output, err = pb.runProbeWithRetries(p, pod, status, container, containerID, maxProbeRetries) } ctrName := fmt.Sprintf("%s:%s", kubecontainer.GetPodFullName(pod), container.Name) if err != nil || ready == probe.Failure { // Readiness failed in one way or another. pb.readinessManager.SetReadiness(containerID, false) ref, ok := pb.refManager.GetRef(containerID) if !ok { glog.Warningf("No ref for pod '%v' - '%v'", containerID, container.Name) } if err != nil { glog.V(1).Infof("readiness probe for %q errored: %v", ctrName, err) if ok { pb.recorder.Eventf(ref, "Unhealthy", "Readiness probe errored: %v", err) } return } else { // ready != probe.Success glog.V(1).Infof("Readiness probe for %q failed (%v): %s", ctrName, ready, output) if ok { pb.recorder.Eventf(ref, "Unhealthy", "Readiness probe failed: %s", output) } return } } if ready == probe.Success { pb.readinessManager.SetReadiness(containerID, true) } glog.V(3).Infof("Readiness probe for %q succeeded", ctrName) }
// runPod runs a single pod and wait until all containers are running. func (kl *Kubelet) runPod(pod *api.Pod, retryDelay time.Duration) error { delay := retryDelay retry := 0 for { pods, err := kl.containerRuntime.GetPods(false) if err != nil { return fmt.Errorf("failed to get kubelet pods: %v", err) } p := container.Pods(pods).FindPodByID(pod.UID) running, err := kl.isPodRunning(pod, p) if err != nil { return fmt.Errorf("failed to check pod status: %v", err) } if running { glog.Infof("pod %q containers running", pod.Name) return nil } glog.Infof("pod %q containers not running: syncing", pod.Name) podFullName := kubecontainer.GetPodFullName(pod) glog.Infof("Creating a mirror pod for static pod %q", podFullName) if err := kl.podManager.CreateMirrorPod(pod); err != nil { glog.Errorf("Failed creating a mirror pod %q: %v", podFullName, err) } mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod) if err = kl.syncPod(pod, mirrorPod, p, SyncPodUpdate); err != nil { return fmt.Errorf("error syncing pod: %v", err) } if retry >= RunOnceMaxRetries { return fmt.Errorf("timeout error: pod %q containers not running after %d retries", pod.Name, RunOnceMaxRetries) } // TODO(proppy): health checking would be better than waiting + checking the state at the next iteration. glog.Infof("pod %q containers synced, waiting for %v", pod.Name, delay) time.Sleep(delay) retry++ delay *= RunOnceRetryDelayBackoff } }
func (pm *basicPodManager) setPods(newPods []*api.Pod) { podByUID := make(map[types.UID]*api.Pod) mirrorPodByUID := make(map[types.UID]*api.Pod) podByFullName := make(map[string]*api.Pod) mirrorPodByFullName := make(map[string]*api.Pod) for _, pod := range newPods { podFullName := kubecontainer.GetPodFullName(pod) if isMirrorPod(pod) { mirrorPodByUID[pod.UID] = pod mirrorPodByFullName[podFullName] = pod } else { podByUID[pod.UID] = pod podByFullName[podFullName] = pod } } pm.podByUID = podByUID pm.podByFullName = podByFullName pm.mirrorPodByUID = mirrorPodByUID pm.mirrorPodByFullName = mirrorPodByFullName }
// getExec handles requests to run a command inside a container. func (s *Server) getExec(request *restful.Request, response *restful.Response) { podNamespace, podID, uid, container := getContainerCoordinates(request) pod, ok := s.host.GetPodByName(podNamespace, podID) if !ok { response.WriteError(http.StatusNotFound, fmt.Errorf("pod does not exist")) return } stdinStream, stdoutStream, stderrStream, errorStream, conn, tty, ok := s.createStreams(request, response) if conn != nil { defer conn.Close() } if !ok { // error is handled in the createStreams function return } cmd := request.Request.URL.Query()[api.ExecCommandParamm] err := s.host.ExecInContainer(kubecontainer.GetPodFullName(pod), uid, container, cmd, stdinStream, stdoutStream, stderrStream, tty) if err != nil { msg := fmt.Sprintf("Error executing command in container: %v", err) glog.Error(msg) errorStream.Write([]byte(msg)) } }
// probeLiveness probes the liveness of a container. // If the initalDelay since container creation on liveness probe has not passed the probe will return probe.Success. func (pb *prober) probeLiveness(pod *api.Pod, status api.PodStatus, container api.Container, containerID string, createdAt int64) (probe.Result, error) { var live probe.Result var output string var err error p := container.LivenessProbe if p == nil { return probe.Success, nil } if time.Now().Unix()-createdAt < p.InitialDelaySeconds { return probe.Success, nil } else { live, output, err = pb.runProbeWithRetries(p, pod, status, container, containerID, maxProbeRetries) } ctrName := fmt.Sprintf("%s:%s", kubecontainer.GetPodFullName(pod), container.Name) if err != nil || live != probe.Success { // Liveness failed in one way or another. ref, ok := pb.refManager.GetRef(containerID) if !ok { glog.Warningf("No ref for pod %q - '%v'", containerID, container.Name) } if err != nil { glog.V(1).Infof("Liveness probe for %q errored: %v", ctrName, err) if ok { pb.recorder.Eventf(ref, "Unhealthy", "Liveness probe errored: %v", err) } return probe.Unknown, err } else { // live != probe.Success glog.V(1).Infof("Liveness probe for %q failed (%v): %s", ctrName, live, output) if ok { pb.recorder.Eventf(ref, "Unhealthy", "Liveness probe failed: %s", output) } return live, nil } } glog.V(3).Infof("Liveness probe for %q succeeded", ctrName) return probe.Success, nil }
// getContainerLogs handles containerLogs request against the Kubelet func (s *Server) getContainerLogs(request *restful.Request, response *restful.Response) { podNamespace := request.PathParameter("podNamespace") podID := request.PathParameter("podID") containerName := request.PathParameter("containerName") if len(podID) == 0 { // TODO: Why return JSON when the rest return plaintext errors? // TODO: Why return plaintext errors? response.WriteError(http.StatusBadRequest, fmt.Errorf(`{"message": "Missing podID."}`)) return } if len(containerName) == 0 { // TODO: Why return JSON when the rest return plaintext errors? response.WriteError(http.StatusBadRequest, fmt.Errorf(`{"message": "Missing container name."}`)) return } if len(podNamespace) == 0 { // TODO: Why return JSON when the rest return plaintext errors? response.WriteError(http.StatusBadRequest, fmt.Errorf(`{"message": "Missing podNamespace."}`)) return } query := request.Request.URL.Query() // backwards compatibility for the "tail" query parameter if tail := request.QueryParameter("tail"); len(tail) > 0 { query["tailLines"] = []string{tail} // "all" is the same as omitting tail if tail == "all" { delete(query, "tailLines") } } // container logs on the kubelet are locked to v1 versioned := &v1.PodLogOptions{} if err := api.Scheme.Convert(&query, versioned); err != nil { response.WriteError(http.StatusBadRequest, fmt.Errorf(`{"message": "Unable to decode query."}`)) return } out, err := api.Scheme.ConvertToVersion(versioned, "") if err != nil { response.WriteError(http.StatusBadRequest, fmt.Errorf(`{"message": "Unable to convert request query."}`)) return } logOptions := out.(*api.PodLogOptions) logOptions.TypeMeta = unversioned.TypeMeta{} if errs := validation.ValidatePodLogOptions(logOptions); len(errs) > 0 { response.WriteError(apierrs.StatusUnprocessableEntity, fmt.Errorf(`{"message": "Invalid request."}`)) return } pod, ok := s.host.GetPodByName(podNamespace, podID) if !ok { response.WriteError(http.StatusNotFound, fmt.Errorf("Pod %q does not exist", podID)) return } // Check if containerName is valid. containerExists := false for _, container := range pod.Spec.Containers { if container.Name == containerName { containerExists = true } } if !containerExists { response.WriteError(http.StatusNotFound, fmt.Errorf("Container %q not found in Pod %q", containerName, podID)) return } if _, ok := response.ResponseWriter.(http.Flusher); !ok { response.WriteError(http.StatusInternalServerError, fmt.Errorf("unable to convert %v into http.Flusher", response)) return } fw := flushwriter.Wrap(response.ResponseWriter) if logOptions.LimitBytes != nil { fw = limitwriter.New(fw, *logOptions.LimitBytes) } response.Header().Set("Transfer-Encoding", "chunked") response.WriteHeader(http.StatusOK) if err := s.host.GetKubeletContainerLogs(kubecontainer.GetPodFullName(pod), containerName, logOptions, fw, fw); err != nil { if err != limitwriter.ErrMaximumWrite { response.WriteError(http.StatusInternalServerError, err) } return } }
func (pm *basicPodManager) GetPodByMirrorPod(mirrorPod *api.Pod) (*api.Pod, bool) { pm.lock.RLock() defer pm.lock.RUnlock() pod, ok := pm.podByFullName[kubecontainer.GetPodFullName(mirrorPod)] return pod, ok }
func (s *podStorage) merge(source string, change interface{}) (adds, updates, deletes *kubelet.PodUpdate) { s.podLock.Lock() defer s.podLock.Unlock() adds = &kubelet.PodUpdate{Op: kubelet.ADD, Source: source} updates = &kubelet.PodUpdate{Op: kubelet.UPDATE, Source: source} deletes = &kubelet.PodUpdate{Op: kubelet.REMOVE, Source: source} pods := s.pods[source] if pods == nil { pods = make(map[string]*api.Pod) } update := change.(kubelet.PodUpdate) switch update.Op { case kubelet.ADD, kubelet.UPDATE: if update.Op == kubelet.ADD { glog.V(4).Infof("Adding new pods from source %s : %v", source, update.Pods) } else { glog.V(4).Infof("Updating pods from source %s : %v", source, update.Pods) } filtered := filterInvalidPods(update.Pods, source, s.recorder) for _, ref := range filtered { name := kubecontainer.GetPodFullName(ref) // Annotate the pod with the source before any comparison. if ref.Annotations == nil { ref.Annotations = make(map[string]string) } ref.Annotations[kubelet.ConfigSourceAnnotationKey] = source if existing, found := pods[name]; found { if checkAndUpdatePod(existing, ref) { // this is an update updates.Pods = append(updates.Pods, existing) continue } // this is a no-op continue } // this is an add recordFirstSeenTime(ref) pods[name] = ref adds.Pods = append(adds.Pods, ref) } case kubelet.REMOVE: glog.V(4).Infof("Removing a pod %v", update) for _, value := range update.Pods { name := kubecontainer.GetPodFullName(value) if existing, found := pods[name]; found { // this is a delete delete(pods, name) deletes.Pods = append(deletes.Pods, existing) continue } // this is a no-op } case kubelet.SET: glog.V(4).Infof("Setting pods for source %s", source) s.markSourceSet(source) // Clear the old map entries by just creating a new map oldPods := pods pods = make(map[string]*api.Pod) filtered := filterInvalidPods(update.Pods, source, s.recorder) for _, ref := range filtered { name := kubecontainer.GetPodFullName(ref) // Annotate the pod with the source before any comparison. if ref.Annotations == nil { ref.Annotations = make(map[string]string) } ref.Annotations[kubelet.ConfigSourceAnnotationKey] = source if existing, found := oldPods[name]; found { pods[name] = existing if checkAndUpdatePod(existing, ref) { // this is an update updates.Pods = append(updates.Pods, existing) continue } // this is a no-op continue } recordFirstSeenTime(ref) pods[name] = ref adds.Pods = append(adds.Pods, ref) } for name, existing := range oldPods { if _, found := pods[name]; !found { // this is a delete deletes.Pods = append(deletes.Pods, existing) } } default: glog.Warningf("Received invalid update type: %v", update) } s.pods[source] = pods return adds, updates, deletes }
// serveStats implements stats logic. func (s *Server) serveStats(w http.ResponseWriter, req *http.Request) { // Stats requests are in the following forms: // // /stats/ : Root container stats // /stats/container/ : Non-Kubernetes container stats (returns a map) // /stats/<pod name>/<container name> : Stats for Kubernetes pod/container // /stats/<namespace>/<pod name>/<uid>/<container name> : Stats for Kubernetes namespace/pod/uid/container components := strings.Split(strings.TrimPrefix(path.Clean(req.URL.Path), "/"), "/") var stats interface{} var err error var query StatsRequest query.NumStats = 60 err = json.NewDecoder(req.Body).Decode(&query) if err != nil && err != io.EOF { s.error(w, err) return } cadvisorRequest := cadvisorApi.ContainerInfoRequest{ NumStats: query.NumStats, Start: query.Start, End: query.End, } switch len(components) { case 1: // Root container stats. var statsMap map[string]*cadvisorApi.ContainerInfo statsMap, err = s.host.GetRawContainerInfo("/", &cadvisorRequest, false) stats = statsMap["/"] case 2: // Non-Kubernetes container stats. if components[1] != "container" { http.Error(w, fmt.Sprintf("unknown stats request type %q", components[1]), http.StatusNotFound) return } containerName := path.Join("/", query.ContainerName) stats, err = s.host.GetRawContainerInfo(containerName, &cadvisorRequest, query.Subcontainers) case 3: // Backward compatibility without uid information, does not support namespace pod, ok := s.host.GetPodByName(api.NamespaceDefault, components[1]) if !ok { http.Error(w, "Pod does not exist", http.StatusNotFound) return } stats, err = s.host.GetContainerInfo(kubecontainer.GetPodFullName(pod), "", components[2], &cadvisorRequest) case 5: pod, ok := s.host.GetPodByName(components[1], components[2]) if !ok { http.Error(w, "Pod does not exist", http.StatusNotFound) return } stats, err = s.host.GetContainerInfo(kubecontainer.GetPodFullName(pod), types.UID(components[3]), components[4], &cadvisorRequest) default: http.Error(w, fmt.Sprintf("Unknown resource: %v", components), http.StatusNotFound) return } switch err { case nil: break case ErrContainerNotFound: http.Error(w, err.Error(), http.StatusNotFound) return default: s.error(w, err) return } if stats == nil { fmt.Fprint(w, "{}") return } data, err := json.Marshal(stats) if err != nil { s.error(w, err) return } w.Header().Add("Content-type", "application/json") w.Write(data) }
// async continuation of LaunchTask func (k *KubernetesExecutor) launchTask(driver bindings.ExecutorDriver, taskId string, pod *api.Pod) { deleteTask := func() { k.lock.Lock() defer k.lock.Unlock() delete(k.tasks, taskId) k.resetSuicideWatch(driver) } // TODO(k8s): use Pods interface for binding once clusters are upgraded // return b.Pods(binding.Namespace).Bind(binding) if pod.Spec.NodeName == "" { //HACK(jdef): cloned binding construction from k8s plugin/pkg/scheduler/scheduler.go binding := &api.Binding{ ObjectMeta: api.ObjectMeta{ Namespace: pod.Namespace, Name: pod.Name, Annotations: make(map[string]string), }, Target: api.ObjectReference{ Kind: "Node", Name: pod.Annotations[meta.BindingHostKey], }, } // forward the annotations that the scheduler wants to apply for k, v := range pod.Annotations { binding.Annotations[k] = v } // create binding on apiserver log.Infof("Binding '%v/%v' to '%v' with annotations %+v...", pod.Namespace, pod.Name, binding.Target.Name, binding.Annotations) ctx := api.WithNamespace(api.NewContext(), binding.Namespace) err := k.client.Post().Namespace(api.NamespaceValue(ctx)).Resource("bindings").Body(binding).Do().Error() if err != nil { deleteTask() k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED, messages.CreateBindingFailure)) return } } else { // post annotations update to apiserver patch := struct { Metadata struct { Annotations map[string]string `json:"annotations"` } `json:"metadata"` }{} patch.Metadata.Annotations = pod.Annotations patchJson, _ := json.Marshal(patch) log.V(4).Infof("Patching annotations %v of pod %v/%v: %v", pod.Annotations, pod.Namespace, pod.Name, string(patchJson)) err := k.client.Patch(api.MergePatchType).RequestURI(pod.SelfLink).Body(patchJson).Do().Error() if err != nil { log.Errorf("Error updating annotations of ready-to-launch pod %v/%v: %v", pod.Namespace, pod.Name, err) deleteTask() k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED, messages.AnnotationUpdateFailure)) return } } podFullName := container.GetPodFullName(pod) // allow a recently failed-over scheduler the chance to recover the task/pod binding: // it may have failed and recovered before the apiserver is able to report the updated // binding information. replays of this status event will signal to the scheduler that // the apiserver should be up-to-date. data, err := json.Marshal(api.PodStatusResult{ ObjectMeta: api.ObjectMeta{ Name: podFullName, SelfLink: "/podstatusresult", }, }) if err != nil { deleteTask() log.Errorf("failed to marshal pod status result: %v", err) k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED, err.Error())) return } k.lock.Lock() defer k.lock.Unlock() // Add the task. task, found := k.tasks[taskId] if !found { log.V(1).Infof("task %v not found, probably killed: aborting launch, reporting lost", taskId) k.reportLostTask(driver, taskId, messages.LaunchTaskFailed) return } //TODO(jdef) check for duplicate pod name, if found send TASK_ERROR // from here on, we need to delete containers associated with the task // upon it going into a terminal state task.podName = podFullName k.pods[podFullName] = pod // send the new pod to the kubelet which will spin it up update := kubelet.PodUpdate{ Op: kubelet.ADD, Pods: []*api.Pod{pod}, } k.updateChan <- update statusUpdate := &mesos.TaskStatus{ TaskId: mutil.NewTaskID(taskId), State: mesos.TaskState_TASK_STARTING.Enum(), Message: proto.String(messages.CreateBindingSuccess), Data: data, } k.sendStatus(driver, statusUpdate) // Delay reporting 'task running' until container is up. psf := podStatusFunc(func() (*api.PodStatus, error) { status, err := k.podStatusFunc(k.kl, pod) if err != nil { return nil, err } status.Phase = kubelet.GetPhase(&pod.Spec, status.ContainerStatuses) hostIP, err := k.kl.GetHostIP() if err != nil { log.Errorf("Cannot get host IP: %v", err) } else { status.HostIP = hostIP.String() } return status, nil }) go k._launchTask(driver, taskId, podFullName, psf) }