func TestParsePodFullName(t *testing.T) { type nameTuple struct { Name string Namespace string } successfulCases := map[string]nameTuple{ "bar_foo": {Name: "bar", Namespace: "foo"}, "bar.org_foo.com": {Name: "bar.org", Namespace: "foo.com"}, "bar-bar_foo": {Name: "bar-bar", Namespace: "foo"}, } failedCases := []string{"barfoo", "bar_foo_foo", ""} for podFullName, expected := range successfulCases { name, namespace, err := kubecontainer.ParsePodFullName(podFullName) if err != nil { t.Errorf("unexpected error when parsing the full name: %v", err) continue } if name != expected.Name || namespace != expected.Namespace { t.Errorf("expected name %q, namespace %q; got name %q, namespace %q", expected.Name, expected.Namespace, name, namespace) } } for _, podFullName := range failedCases { _, _, err := kubecontainer.ParsePodFullName(podFullName) if err == nil { t.Errorf("expected error when parsing the full name, got none") } } }
func (r *runtime) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy) error { podInfos, err := r.hyperClient.ListPods() if err != nil { return err } for _, pod := range podInfos { // omit not managed pods _, _, err := kubecontainer.ParsePodFullName(pod.PodName) if err != nil { continue } // omit running pods if pod.Status == StatusRunning { continue } // TODO: Replace lastTime with pod exited time lastTime, err := parseTimeString(pod.PodInfo.Status.StartTime) if err != nil { lastTime = time.Now().Add(-1 * time.Hour) } if lastTime.Before(time.Now().Add(-gcPolicy.MinAge)) { // Remove log links for _, c := range pod.PodInfo.Status.Status { _, _, _, containerName, _, _, err := r.parseHyperContainerFullName(c.Name) if err != nil { continue } symlinkFile := LogSymlink(r.containerLogsDir, pod.PodName, containerName, c.ContainerID) err = os.Remove(symlinkFile) if err != nil && !os.IsNotExist(err) { glog.Warningf("Failed to remove container log symlink %q: %v", symlinkFile, err) } } // Remove the pod cmds := append([]string{}, "rm", pod.PodID) _, err = r.runCommand(cmds...) if err != nil { glog.Warningf("Hyper GarbageCollect: remove pod %s failed, error: %s", pod.PodID, err) return err } } } return nil }
// Deletes a mirror pod. func (mc *basicMirrorClient) DeleteMirrorPod(podFullName string) error { if mc.apiserverClient == nil { return nil } name, namespace, err := kubecontainer.ParsePodFullName(podFullName) if err != nil { glog.Errorf("Failed to parse a pod full name %q", podFullName) return err } glog.V(4).Infof("Deleting a mirror pod %q", podFullName) if err := mc.apiserverClient.Pods(namespace).Delete(name, api.NewDeleteOptions(0)); err != nil { glog.Errorf("Failed deleting a mirror pod %q: %v", podFullName, err) } return nil }
func (mc *basicMirrorClient) DeleteMirrorPod(podFullName string) error { if mc.apiserverClient == nil { return nil } name, namespace, err := kubecontainer.ParsePodFullName(podFullName) if err != nil { glog.Errorf("Failed to parse a pod full name %q", podFullName) return err } glog.V(2).Infof("Deleting a mirror pod %q", podFullName) // TODO(random-liu): Delete the mirror pod with uid precondition in mirror pod manager if err := mc.apiserverClient.Core().Pods(namespace).Delete(name, v1.NewDeleteOptions(0)); err != nil && !errors.IsNotFound(err) { glog.Errorf("Failed deleting a mirror pod %q: %v", podFullName, err) } return nil }
// GetKubeletContainerLogs returns logs from the container // TODO: this method is returning logs of random container attempts, when it should be returning the most recent attempt // or all of them. func (kl *Kubelet) GetKubeletContainerLogs(podFullName, containerName string, logOptions *api.PodLogOptions, stdout, stderr io.Writer) error { // Pod workers periodically write status to statusManager. If status is not // cached there, something is wrong (or kubelet just restarted and hasn't // caught up yet). Just assume the pod is not ready yet. name, namespace, err := kubecontainer.ParsePodFullName(podFullName) if err != nil { return fmt.Errorf("unable to parse pod full name %q: %v", podFullName, err) } pod, ok := kl.GetPodByName(namespace, name) if !ok { return fmt.Errorf("pod %q cannot be found - no logs available", name) } podUID := pod.UID if mirrorPod, ok := kl.podManager.GetMirrorPodByPod(pod); ok { podUID = mirrorPod.UID } podStatus, found := kl.statusManager.GetPodStatus(podUID) if !found { // If there is no cached status, use the status from the // apiserver. This is useful if kubelet has recently been // restarted. podStatus = pod.Status } // TODO: Consolidate the logic here with kuberuntime.GetContainerLogs, here we convert container name to containerID, // but inside kuberuntime we convert container id back to container name and restart count. // TODO: After separate container log lifecycle management, we should get log based on the existing log files // instead of container status. containerID, err := kl.validateContainerLogStatus(pod.Name, &podStatus, containerName, logOptions.Previous) if err != nil { return err } // Do a zero-byte write to stdout before handing off to the container runtime. // This ensures at least one Write call is made to the writer when copying starts, // even if we then block waiting for log output from the container. if _, err := stdout.Write([]byte{}); err != nil { return err } return kl.containerRuntime.GetContainerLogs(pod, containerID, logOptions, stdout, stderr) }
func dockerContainersToPod(containers []*docker.APIContainers) kubecontainer.Pod { var pod kubecontainer.Pod for _, c := range containers { dockerName, hash, err := ParseDockerName(c.Names[0]) if err != nil { continue } pod.Containers = append(pod.Containers, &kubecontainer.Container{ ID: kubecontainer.ContainerID{"docker", c.ID}, Name: dockerName.ContainerName, Hash: hash, Image: c.Image, }) // TODO(yifan): Only one evaluation is enough. pod.ID = dockerName.PodUID name, namespace, _ := kubecontainer.ParsePodFullName(dockerName.PodFullName) pod.Name = name pod.Namespace = namespace } return pod }
func (r *runtime) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy) error { podInfos, err := r.hyperClient.ListPods() if err != nil { return err } for _, pod := range podInfos { // omit not managed pods _, _, err := kubecontainer.ParsePodFullName(pod.PodName) if err != nil { continue } // omit running pods if pod.Status == StatusRunning { continue } // TODO: Replace lastTime with pod exited time lastTime, err := parseTimeString(pod.PodInfo.Status.StartTime) if err != nil { lastTime = time.Now().Add(-1 * time.Hour) } if lastTime.Before(time.Now().Add(-gcPolicy.MinAge)) { cmds := append([]string{}, "rm", pod.PodID) _, err = r.runCommand(cmds...) if err != nil { glog.Warningf("Hyper GarbageCollect: remove pod %s failed, error: %s", pod.PodID, err) return err } } } return nil }
// This is where all of haproxy-setting calls happen. // assumes proxier.mu is held func (proxier *Proxier) syncProxyRules() { if proxier.disableHyperInternalService { return } // don't sync rules till we've received services and endpoints if !proxier.haveReceivedEndpointsUpdate || !proxier.haveReceivedServiceUpdate { glog.V(2).Info("Not syncing proxy rules until Services and Endpoints have been received from master") return } glog.V(3).Infof("Syncing proxy rules") // Get existing pods podList, err := proxier.hyperClient.ListPods() if err != nil { glog.Warningf("Can not get pod list: %v", err) return } // setup services with pod's same namespace for each pod for _, podInfo := range podList { _, podNamespace, err := kubecontainer.ParsePodFullName(podInfo.PodName) if err != nil { glog.Warningf("Pod %s is not managed by kubernetes", podInfo.PodName) continue } // Build services of same namespace (assume all services within same // namespace will be consumed) consumedServices := make([]*grpctypes.UserService, 0, 1) for _, svcInfo := range proxier.serviceMap { if svcInfo.namespace != podNamespace { continue } svc := &grpctypes.UserService{ ServicePort: svcInfo.port, ServiceIP: svcInfo.clusterIP.String(), Protocol: strings.ToLower(string(svcInfo.protocol)), } hosts := make([]*grpctypes.UserServiceBackend, 0, 1) for _, ep := range svcInfo.endpoints { hostport := strings.Split(ep, ":") port, _ := strconv.ParseInt(hostport[1], 10, 0) hosts = append(hosts, &grpctypes.UserServiceBackend{ HostIP: hostport[0], HostPort: int32(port), }) } svc.Hosts = hosts consumedServices = append(consumedServices, svc) } glog.V(4).Infof("Services of pod %s should consumed: %v", podInfo.PodName, consumedServices) // update existing services if len(consumedServices) == 0 { // services can't be null for kubernetes, so fake one if it is null consumedServices = append(consumedServices, &grpctypes.UserService{ ServiceIP: "127.0.0.2", ServicePort: 65534, }) } err = proxier.hyperClient.UpdateServices(podInfo.PodID, consumedServices) if err != nil { glog.Warningf("Updating service for hyper pod %s failed: %v", podInfo.PodName, err) } } }
// reconcile an unknown (from the perspective of our registry) non-terminal task func (k *KubernetesScheduler) reconcileNonTerminalTask(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) { // attempt to recover task from pod info: // - task data may contain an api.PodStatusResult; if status.reason == REASON_RECONCILIATION then status.data == nil // - the Name can be parsed by container.ParseFullName() to yield a pod Name and Namespace // - pull the pod metadata down from the api server // - perform task recovery based on pod metadata taskId := taskStatus.TaskId.GetValue() if taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION && taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER { // there will be no data in the task status that we can use to determine the associated pod switch taskStatus.GetState() { case mesos.TaskState_TASK_STAGING: // there is still hope for this task, don't kill it just yet //TODO(jdef) there should probably be a limit for how long we tolerate tasks stuck in this state return default: // for TASK_{STARTING,RUNNING} we should have already attempted to recoverTasks() for. // if the scheduler failed over before the executor fired TASK_STARTING, then we should *not* // be processing this reconciliation update before we process the one from the executor. // point: we don't know what this task is (perhaps there was unrecoverable metadata in the pod), // so it gets killed. log.Errorf("killing non-terminal, unrecoverable task %v", taskId) } } else if podStatus, err := podtask.ParsePodStatusResult(taskStatus); err != nil { // possible rogue pod exists at this point because we can't identify it; should kill the task log.Errorf("possible rogue pod; illegal task status data for task %v, expected an api.PodStatusResult: %v", taskId, err) } else if name, namespace, err := container.ParsePodFullName(podStatus.Name); err != nil { // possible rogue pod exists at this point because we can't identify it; should kill the task log.Errorf("possible rogue pod; illegal api.PodStatusResult, unable to parse full pod name from: '%v' for task %v: %v", podStatus.Name, taskId, err) } else if pod, err := k.client.Pods(namespace).Get(name); err == nil { if t, ok, err := podtask.RecoverFrom(*pod); ok { log.Infof("recovered task %v from metadata in pod %v/%v", taskId, namespace, name) _, err := k.taskRegistry.Register(t, nil) if err != nil { // someone beat us to it?! log.Warningf("failed to register recovered task: %v", err) return } else { k.taskRegistry.UpdateStatus(taskStatus) } return } else if err != nil { //should kill the pod and the task log.Errorf("killing pod, failed to recover task from pod %v/%v: %v", namespace, name, err) if err := k.client.Pods(namespace).Delete(name, nil); err != nil { log.Errorf("failed to delete pod %v/%v: %v", namespace, name, err) } } else { //this is pretty unexpected: we received a TASK_{STARTING,RUNNING} message, but the apiserver's pod //metadata is not appropriate for task reconstruction -- which should almost certainly never //be the case unless someone swapped out the pod on us (and kept the same namespace/name) while //we were failed over. //kill this task, allow the newly launched scheduler to schedule the new pod log.Warningf("unexpected pod metadata for task %v in apiserver, assuming new unscheduled pod spec: %+v", taskId, pod) } } else if errors.IsNotFound(err) { // pod lookup failed, should delete the task since the pod is no longer valid; may be redundant, that's ok log.Infof("killing task %v since pod %v/%v no longer exists", taskId, namespace, name) } else if errors.IsServerTimeout(err) { log.V(2).Infof("failed to reconcile task due to API server timeout: %v", err) return } else { log.Errorf("unexpected API server error, aborting reconcile for task %v: %v", taskId, err) return } if _, err := driver.KillTask(taskStatus.TaskId); err != nil { log.Errorf("failed to kill task %v: %v", taskId, err) } }
// GetPods returns a list containers group by pods. The boolean parameter // specifies whether the runtime returns all containers including those already // exited and dead containers (used for garbage collection). func (r *runtime) GetPods(all bool) ([]*kubecontainer.Pod, error) { podInfos, err := r.hyperClient.ListPods() if err != nil { return nil, err } var kubepods []*kubecontainer.Pod for _, podInfo := range podInfos { var pod kubecontainer.Pod var containers []*kubecontainer.Container if !all && podInfo.Status != StatusRunning { continue } podID := podInfo.PodInfo.Spec.Labels["UID"] podName, podNamespace, err := kubecontainer.ParsePodFullName(podInfo.PodName) if err != nil { glog.V(5).Infof("Hyper: pod %s is not managed by kubelet", podInfo.PodName) continue } pod.ID = types.UID(podID) pod.Name = podName pod.Namespace = podNamespace for _, cinfo := range podInfo.PodInfo.Spec.Containers { var container kubecontainer.Container container.ID = kubecontainer.ContainerID{Type: typeHyper, ID: cinfo.ContainerID} container.Image = cinfo.Image for _, cstatus := range podInfo.PodInfo.Status.Status { if cstatus.ContainerID == cinfo.ContainerID { switch cstatus.Phase { case StatusRunning: container.State = kubecontainer.ContainerStateRunning default: container.State = kubecontainer.ContainerStateExited } createAt, err := parseTimeString(cstatus.Running.StartedAt) if err == nil { container.Created = createAt.Unix() } } } _, _, _, containerName, _, containerHash, err := r.parseHyperContainerFullName(cinfo.Name) if err != nil { glog.V(5).Infof("Hyper: container %s is not managed by kubelet", cinfo.Name) continue } container.Name = containerName hash, err := strconv.ParseUint(containerHash, 16, 64) if err == nil { container.Hash = hash } containers = append(containers, &container) } pod.Containers = containers kubepods = append(kubepods, &pod) } return kubepods, nil }
func (r *runtime) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool) error { podInfos, err := r.hyperClient.ListPods() if err != nil { return err } for _, pod := range podInfos { // omit not managed pods podName, podNamespace, err := kubecontainer.ParsePodFullName(pod.PodName) if err != nil { continue } // omit running pods if pod.Status == StatusRunning { continue } lastTime, err := parseTimeString(pod.PodInfo.Status.FinishTime) if err != nil { continue } if lastTime.Before(time.Now().Add(-gcPolicy.MinAge)) { // Remove log links for _, c := range pod.PodInfo.Status.ContainerStatus { _, _, _, containerName, _, _, err := r.parseHyperContainerFullName(c.Name) if err != nil { continue } symlinkFile := LogSymlink(r.containerLogsDir, pod.PodName, containerName, c.ContainerID) err = os.Remove(symlinkFile) if err != nil && !os.IsNotExist(err) { glog.Warningf("Failed to remove container log symlink %q: %v", symlinkFile, err) } } // TODO(harryz) use allSourcesReady to prevent aggressive actions // Remove the pod err = r.hyperClient.RemovePod(pod.PodID) if err != nil { glog.Warningf("Hyper GarbageCollect: remove pod %s failed, error: %s", pod.PodID, err) return err } // KillPod is only called for running Pods, we should teardown network here for non-running Pods err = r.networkPlugin.TearDownPod(podNamespace, podName, kubecontainer.ContainerID{}, "hyper") if err != nil { glog.Warningf("Hyper: networkPlugin.TearDownPod failed, error: %v", err) } // Delete pod spec file specFileName := path.Join(hyperPodSpecDir, pod.PodName) _, err = os.Stat(specFileName) if err == nil { e := os.Remove(specFileName) if e != nil { glog.Warningf("Hyper: delete spec file for %s failed, error: %v", pod.PodName, e) } } } } // Remove dead symlinks - should only happen on upgrade // from a k8s version without proper log symlink cleanup logSymlinks, _ := filepath.Glob(path.Join(r.containerLogsDir, "*.log")) for _, logSymlink := range logSymlinks { if _, err = os.Stat(logSymlink); os.IsNotExist(err) { err = os.Remove(logSymlink) if err != nil { glog.Warningf("Failed to remove container log dead symlink %q: %v", logSymlink, err) } } } return nil }