func (m *manager) AddPod(pod *api.Pod) { m.workerLock.Lock() defer m.workerLock.Unlock() key := probeKey{podUID: pod.UID} for _, c := range pod.Spec.Containers { key.containerName = c.Name if c.ReadinessProbe != nil { key.probeType = readiness if _, ok := m.workers[key]; ok { glog.Errorf("Readiness probe already exists! %v - %v", format.Pod(pod), c.Name) return } w := newWorker(m, readiness, pod, c) m.workers[key] = w go w.run() } if c.LivenessProbe != nil { key.probeType = liveness if _, ok := m.workers[key]; ok { glog.Errorf("Liveness probe already exists! %v - %v", format.Pod(pod), c.Name) return } w := newWorker(m, liveness, pod, c) m.workers[key] = w go w.run() } } }
func (pb *prober) runProbe(p *api.Probe, pod *api.Pod, status api.PodStatus, container api.Container, containerID kubecontainer.ContainerID) (probe.Result, string, error) { timeout := time.Duration(p.TimeoutSeconds) * time.Second if p.Exec != nil { glog.V(4).Infof("Exec-Probe Pod: %v, Container: %v, Command: %v", pod, container, p.Exec.Command) return pb.exec.Probe(pb.newExecInContainer(container, containerID, p.Exec.Command)) } if p.HTTPGet != nil { scheme := strings.ToLower(string(p.HTTPGet.Scheme)) host := p.HTTPGet.Host if host == "" { host = status.PodIP } port, err := extractPort(p.HTTPGet.Port, container) if err != nil { return probe.Unknown, "", err } path := p.HTTPGet.Path glog.V(4).Infof("HTTP-Probe Host: %v://%v, Port: %v, Path: %v", scheme, host, port, path) url := formatURL(scheme, host, port, path) headers := buildHeader(p.HTTPGet.HTTPHeaders) glog.V(4).Infof("HTTP-Probe Headers: %v", headers) return pb.http.Probe(url, headers, timeout) } if p.TCPSocket != nil { port, err := extractPort(p.TCPSocket.Port, container) if err != nil { return probe.Unknown, "", err } glog.V(4).Infof("TCP-Probe PodIP: %v, Port: %v, Timeout: %v", status.PodIP, port, timeout) return pb.tcp.Probe(status.PodIP, port, timeout) } glog.Warningf("Failed to find probe builder for container: %v", container) return probe.Unknown, "", fmt.Errorf("Missing probe handler for %s:%s", format.Pod(pod), container.Name) }
// update ready status of all pods running on given node from master // return true if success func (nc *NodeController) markAllPodsNotReady(nodeName string) error { glog.V(2).Infof("Update ready status of pods on node [%v]", nodeName) opts := api.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, nodeName)} pods, err := nc.kubeClient.Core().Pods(api.NamespaceAll).List(opts) if err != nil { return err } errMsg := []string{} for _, pod := range pods.Items { // Defensive check, also needed for tests. if pod.Spec.NodeName != nodeName { continue } for i, cond := range pod.Status.Conditions { if cond.Type == api.PodReady { pod.Status.Conditions[i].Status = api.ConditionFalse glog.V(2).Infof("Updating ready status of pod %v to false", pod.Name) _, err := nc.kubeClient.Core().Pods(pod.Namespace).UpdateStatus(&pod) if err != nil { glog.Warningf("Failed to update status for pod %q: %v", format.Pod(&pod), err) errMsg = append(errMsg, fmt.Sprintf("%v", err)) } break } } } if len(errMsg) == 0 { return nil } return fmt.Errorf("%v", strings.Join(errMsg, "; ")) }
// ShouldContainerBeRestarted checks whether a container needs to be restarted. // TODO(yifan): Think about how to refactor this. func ShouldContainerBeRestarted(container *api.Container, pod *api.Pod, podStatus *PodStatus) bool { // Get latest container status. status := podStatus.FindContainerStatusByName(container.Name) // If the container was never started before, we should start it. // NOTE(random-liu): If all historical containers were GC'd, we'll also return true here. if status == nil { return true } // Check whether container is running if status.State == ContainerStateRunning { return false } // Always restart container in unknown state now if status.State == ContainerStateUnknown { return true } // Check RestartPolicy for dead container if pod.Spec.RestartPolicy == api.RestartPolicyNever { glog.V(4).Infof("Already ran container %q of pod %q, do nothing", container.Name, format.Pod(pod)) return false } if pod.Spec.RestartPolicy == api.RestartPolicyOnFailure { // Check the exit code. if status.ExitCode == 0 { glog.V(4).Infof("Already successfully ran container %q of pod %q, do nothing", container.Name, format.Pod(pod)) return false } } return true }
// GetContainerLogs uses rkt's GetLogs API to get the logs of the container. // By default, it returns a snapshot of the container log. Set |follow| to true to // stream the log. Set |follow| to false and specify the number of lines (e.g. // "100" or "all") to tail the log. // // TODO(yifan): This doesn't work with lkvm stage1 yet. func (r *Runtime) GetContainerLogs(pod *api.Pod, containerID kubecontainer.ContainerID, logOptions *api.PodLogOptions, stdout, stderr io.Writer) error { id, err := parseContainerID(containerID) if err != nil { return err } var since int64 if logOptions.SinceSeconds != nil { t := unversioned.Now().Add(-time.Duration(*logOptions.SinceSeconds) * time.Second) since = t.Unix() } if logOptions.SinceTime != nil { since = logOptions.SinceTime.Unix() } getLogsRequest := &rktapi.GetLogsRequest{ PodId: id.uuid, AppName: id.appName, Follow: logOptions.Follow, SinceTime: since, } if logOptions.TailLines != nil { getLogsRequest.Lines = int32(*logOptions.TailLines) } stream, err := r.apisvc.GetLogs(context.Background(), getLogsRequest) if err != nil { glog.Errorf("rkt: Failed to create log stream for pod %q: %v", format.Pod(pod), err) return err } for { log, err := stream.Recv() if err == io.EOF { break } if err != nil { glog.Errorf("rkt: Failed to receive log for pod %q: %v", format.Pod(pod), err) return err } processLines(log.Lines, logOptions, stdout, stderr) } return nil }
func (vm *volumeManager) WaitForAttachAndMount(pod *api.Pod) error { expectedVolumes := getExpectedVolumes(pod) if len(expectedVolumes) == 0 { // No volumes to verify return nil } glog.V(3).Infof("Waiting for volumes to attach and mount for pod %q", format.Pod(pod)) uniquePodName := volumehelper.GetUniquePodName(pod) // Some pods expect to have Setup called over and over again to update. // Remount plugins for which this is true. (Atomically updating volumes, // like Downward API, depend on this to update the contents of the volume). vm.desiredStateOfWorldPopulator.ReprocessPod(uniquePodName) vm.actualStateOfWorld.MarkRemountRequired(uniquePodName) err := wait.Poll( podAttachAndMountRetryInterval, podAttachAndMountTimeout, vm.verifyVolumesMountedFunc(uniquePodName, expectedVolumes)) if err != nil { // Timeout expired ummountedVolumes := vm.getUnmountedVolumes(uniquePodName, expectedVolumes) if len(ummountedVolumes) == 0 { return nil } return fmt.Errorf( "timeout expired waiting for volumes to attach/mount for pod %q/%q. list of unattached/unmounted volumes=%v", pod.Name, pod.Namespace, ummountedVolumes) } glog.V(3).Infof("All volumes are attached and mounted for pod %q", format.Pod(pod)) return nil }
// Admit rejects a pod if its not safe to admit for node stability. func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult { m.RLock() defer m.RUnlock() if len(m.nodeConditions) == 0 { return lifecycle.PodAdmitResult{Admit: true} } notBestEffort := qos.BestEffort != qos.GetPodQOS(attrs.Pod) if notBestEffort { return lifecycle.PodAdmitResult{Admit: true} } glog.Warningf("Failed to admit pod %v - %s", format.Pod(attrs.Pod), "node has conditions: %v", m.nodeConditions) // we reject all best effort pods until we are stable. return lifecycle.PodAdmitResult{ Admit: false, Reason: reason, Message: message, } }
// probe probes the container. func (pb *prober) probe(probeType probeType, pod *api.Pod, status api.PodStatus, container api.Container, containerID kubecontainer.ContainerID) (results.Result, error) { var probeSpec *api.Probe switch probeType { case readiness: probeSpec = container.ReadinessProbe case liveness: probeSpec = container.LivenessProbe default: return results.Failure, fmt.Errorf("Unknown probe type: %q", probeType) } ctrName := fmt.Sprintf("%s:%s", format.Pod(pod), container.Name) if probeSpec == nil { glog.Warningf("%s probe for %s is nil", probeType, ctrName) return results.Success, nil } result, output, err := pb.runProbeWithRetries(probeSpec, pod, status, container, containerID, maxProbeRetries) if err != nil || result != probe.Success { // Probe failed in one way or another. ref, hasRef := pb.refManager.GetRef(containerID) if !hasRef { glog.Warningf("No ref for container %q (%s)", containerID.String(), ctrName) } if err != nil { glog.V(1).Infof("%s probe for %q errored: %v", probeType, ctrName, err) if hasRef { pb.recorder.Eventf(ref, api.EventTypeWarning, kubecontainer.ContainerUnhealthy, "%s probe errored: %v", probeType, err) } } else { // result != probe.Success glog.V(1).Infof("%s probe for %q failed (%v): %s", probeType, ctrName, result, output) if hasRef { pb.recorder.Eventf(ref, api.EventTypeWarning, kubecontainer.ContainerUnhealthy, "%s probe failed: %s", probeType, output) } } return results.Failure, err } glog.V(3).Infof("%s probe for %q succeeded", probeType, ctrName) return results.Success, nil }
// The label kubernetesPodLabel is added a long time ago (#7421), it serialized the whole api.Pod to a docker label. // We want to remove this label because it serialized too much useless information. However kubelet may still work // with old containers which only have this label for a long time until we completely deprecate the old label. // Before that to ensure correctness we have to supply information with the old labels when newly added labels // are not available. // TODO(random-liu): Remove this function when we can completely remove label kubernetesPodLabel, probably after // dropping support for v1.1. func supplyContainerInfoWithOldLabel(labels map[string]string, containerInfo *labelledContainerInfo) { // Get api.Pod from old label var pod *api.Pod data, found := labels[kubernetesPodLabel] if !found { // Don't report any error here, because it's normal that a container has no pod label, especially // when we gradually deprecate the old label return } pod = &api.Pod{} if err := runtime.DecodeInto(api.Codecs.UniversalDecoder(), []byte(data), pod); err != nil { // If the pod label can't be parsed, we should report an error logError(containerInfo, kubernetesPodLabel, err) return } if containerInfo.PodDeletionGracePeriod == nil { containerInfo.PodDeletionGracePeriod = pod.DeletionGracePeriodSeconds } if containerInfo.PodTerminationGracePeriod == nil { containerInfo.PodTerminationGracePeriod = pod.Spec.TerminationGracePeriodSeconds } // Get api.Container from api.Pod var container *api.Container for i := range pod.Spec.Containers { if pod.Spec.Containers[i].Name == containerInfo.Name { container = &pod.Spec.Containers[i] break } } if container == nil { glog.Errorf("Unable to find container %q in pod %q", containerInfo.Name, format.Pod(pod)) return } if containerInfo.PreStopHandler == nil && container.Lifecycle != nil { containerInfo.PreStopHandler = container.Lifecycle.PreStop } }
// calcAndStoreStats calculates PodVolumeStats for a given pod and writes the result to the s.latest cache. func (s *volumeStatCalculator) calcAndStoreStats() { // Find all Volumes for the Pod volumes, found := s.statsProvider.ListVolumesForPod(s.pod.UID) if !found { return } // Call GetMetrics on each Volume and copy the result to a new VolumeStats.FsStats stats := make([]stats.VolumeStats, 0, len(volumes)) for name, v := range volumes { metric, err := v.GetMetrics() if err != nil { // Expected for Volumes that don't support Metrics // TODO: Disambiguate unsupported from errors // See issue #20676 glog.V(4).Infof("Failed to calculate volume metrics for pod %s volume %s: %+v", format.Pod(s.pod), name, err) continue } stats = append(stats, s.parsePodVolumeStats(name, metric)) } // Store the new stats s.latest.Store(PodVolumeStats{Volumes: stats}) }
func TestLabels(t *testing.T) { restartCount := 5 deletionGracePeriod := int64(10) terminationGracePeriod := int64(10) lifecycle := &api.Lifecycle{ // Left PostStart as nil PreStop: &api.Handler{ Exec: &api.ExecAction{ Command: []string{"action1", "action2"}, }, HTTPGet: &api.HTTPGetAction{ Path: "path", Host: "host", Port: intstr.FromInt(8080), Scheme: "scheme", }, TCPSocket: &api.TCPSocketAction{ Port: intstr.FromString("80"), }, }, } container := &api.Container{ Name: "test_container", TerminationMessagePath: "/somepath", Lifecycle: lifecycle, } pod := &api.Pod{ ObjectMeta: api.ObjectMeta{ Name: "test_pod", Namespace: "test_pod_namespace", UID: "test_pod_uid", DeletionGracePeriodSeconds: &deletionGracePeriod, }, Spec: api.PodSpec{ Containers: []api.Container{*container}, TerminationGracePeriodSeconds: &terminationGracePeriod, }, } expected := &labelledContainerInfo{ PodName: pod.Name, PodNamespace: pod.Namespace, PodUID: pod.UID, PodDeletionGracePeriod: pod.DeletionGracePeriodSeconds, PodTerminationGracePeriod: pod.Spec.TerminationGracePeriodSeconds, Name: container.Name, Hash: strconv.FormatUint(kubecontainer.HashContainer(container), 16), RestartCount: restartCount, TerminationMessagePath: container.TerminationMessagePath, PreStopHandler: container.Lifecycle.PreStop, } // Test whether we can get right information from label labels := newLabels(container, pod, restartCount, false) containerInfo := getContainerInfoFromLabel(labels) if !reflect.DeepEqual(containerInfo, expected) { t.Errorf("expected %v, got %v", expected, containerInfo) } // Test when DeletionGracePeriodSeconds, TerminationGracePeriodSeconds and Lifecycle are nil, // the information got from label should also be nil container.Lifecycle = nil pod.DeletionGracePeriodSeconds = nil pod.Spec.TerminationGracePeriodSeconds = nil expected.PodDeletionGracePeriod = nil expected.PodTerminationGracePeriod = nil expected.PreStopHandler = nil // Because container is changed, the Hash should be updated expected.Hash = strconv.FormatUint(kubecontainer.HashContainer(container), 16) labels = newLabels(container, pod, restartCount, false) containerInfo = getContainerInfoFromLabel(labels) if !reflect.DeepEqual(containerInfo, expected) { t.Errorf("expected %v, got %v", expected, containerInfo) } // Test when DeletionGracePeriodSeconds, TerminationGracePeriodSeconds and Lifecycle are nil, // but the old label kubernetesPodLabel is set, the information got from label should also be set pod.DeletionGracePeriodSeconds = &deletionGracePeriod pod.Spec.TerminationGracePeriodSeconds = &terminationGracePeriod container.Lifecycle = lifecycle data, err := runtime.Encode(testapi.Default.Codec(), pod) if err != nil { t.Fatalf("Failed to encode pod %q into string: %v", format.Pod(pod), err) } labels[kubernetesPodLabel] = string(data) expected.PodDeletionGracePeriod = pod.DeletionGracePeriodSeconds expected.PodTerminationGracePeriod = pod.Spec.TerminationGracePeriodSeconds expected.PreStopHandler = container.Lifecycle.PreStop // Do not update expected.Hash here, because we directly use the labels in last test, so we never // changed the kubernetesContainerHashLabel in this test, the expected.Hash shouldn't be changed. containerInfo = getContainerInfoFromLabel(labels) if !reflect.DeepEqual(containerInfo, expected) { t.Errorf("expected %v, got %v", expected, containerInfo) } }
// synchronize is the main control loop that enforces eviction thresholds. func (m *managerImpl) synchronize(podFunc ActivePodsFunc) { // if we have nothing to do, just return thresholds := m.config.Thresholds if len(thresholds) == 0 { return } // make observations and get a function to derive pod usage stats relative to those observations. observations, statsFunc, err := makeSignalObservations(m.summaryProvider) if err != nil { glog.Errorf("eviction manager: unexpected err: %v", err) return } // find the list of thresholds that are met independent of grace period now := m.clock.Now() // determine the set of thresholds met independent of grace period thresholds = thresholdsMet(thresholds, observations) // track when a threshold was first observed thresholdsFirstObservedAt := thresholdsFirstObservedAt(thresholds, m.thresholdsFirstObservedAt, now) // the set of node conditions that are triggered by currently observed thresholds nodeConditions := nodeConditions(thresholds) // track when a node condition was last observed nodeConditionsLastObservedAt := nodeConditionsLastObservedAt(nodeConditions, m.nodeConditionsLastObservedAt, now) // node conditions report true if it has been observed within the transition period window nodeConditions = nodeConditionsObservedSince(nodeConditionsLastObservedAt, m.config.PressureTransitionPeriod, now) // determine the set of thresholds we need to drive eviction behavior (i.e. all grace periods are met) thresholds = thresholdsMetGracePeriod(thresholdsFirstObservedAt, now) // update internal state m.Lock() m.nodeConditions = nodeConditions m.thresholdsFirstObservedAt = thresholdsFirstObservedAt m.nodeConditionsLastObservedAt = nodeConditionsLastObservedAt m.Unlock() // determine the set of resources under starvation starvedResources := reclaimResources(thresholds) if len(starvedResources) == 0 { glog.V(3).Infof("eviction manager: no resources are starved") return } // rank the resources to reclaim by eviction priority sort.Sort(byEvictionPriority(starvedResources)) resourceToReclaim := starvedResources[0] glog.Warningf("eviction manager: attempting to reclaim %v", resourceToReclaim) // determine if this is a soft or hard eviction associated with the resource softEviction := isSoftEviction(thresholds, resourceToReclaim) // record an event about the resources we are now attempting to reclaim via eviction m.recorder.Eventf(m.nodeRef, api.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim) // rank the pods for eviction rank, ok := resourceToRankFunc[resourceToReclaim] if !ok { glog.Errorf("eviction manager: no ranking function for resource %s", resourceToReclaim) return } // the only candidates viable for eviction are those pods that had anything running. activePods := podFunc() if len(activePods) == 0 { glog.Errorf("eviction manager: eviction thresholds have been met, but no pods are active to evict") return } // rank the running pods for eviction for the specified resource rank(activePods, statsFunc) glog.Infof("eviction manager: pods ranked for eviction: %s", format.Pods(activePods)) // we kill at most a single pod during each eviction interval for i := range activePods { pod := activePods[i] status := api.PodStatus{ Phase: api.PodFailed, Message: message, Reason: reason, } // record that we are evicting the pod m.recorder.Eventf(pod, api.EventTypeWarning, reason, message) gracePeriodOverride := int64(0) if softEviction { gracePeriodOverride = m.config.MaxPodGracePeriodSeconds } // this is a blocking call and should only return when the pod and its containers are killed. err := m.killPodFunc(pod, status, &gracePeriodOverride) if err != nil { glog.Infof("eviction manager: pod %s failed to evict %v", format.Pod(pod), err) continue } // success, so we return until the next housekeeping interval glog.Infof("eviction manager: pod %s evicted successfully", format.Pod(pod)) return } glog.Infof("eviction manager: unable to evict any pods from the node") }
// recordFirstSeenTime records the first seen time of this pod. func recordFirstSeenTime(pod *api.Pod) { glog.V(4).Infof("Receiving a new pod %q", format.Pod(pod)) pod.Annotations[kubetypes.ConfigFirstSeenAnnotationKey] = kubetypes.NewTimestamp().GetString() }
func (hr *FakeHandlerRunner) Run(containerID kubecontainer.ContainerID, pod *api.Pod, container *api.Container, handler *api.Handler) (string, error) { hr.Lock() defer hr.Unlock() if hr.Err != nil { return "", hr.Err } switch { case handler.Exec != nil: hr.HandlerRuns = append(hr.HandlerRuns, fmt.Sprintf("exec on pod: %v, container: %v: %v", format.Pod(pod), container.Name, containerID.String())) case handler.HTTPGet != nil: hr.HandlerRuns = append(hr.HandlerRuns, fmt.Sprintf("http-get on pod: %v, container: %v: %v", format.Pod(pod), container.Name, containerID.String())) case handler.TCPSocket != nil: hr.HandlerRuns = append(hr.HandlerRuns, fmt.Sprintf("tcp-socket on pod: %v, container: %v: %v", format.Pod(pod), container.Name, containerID.String())) default: return "", fmt.Errorf("Invalid handler: %v", handler) } return "", nil }
func newLabels(container *api.Container, pod *api.Pod, restartCount int, enableCustomMetrics bool) map[string]string { labels := map[string]string{} labels[types.KubernetesPodNameLabel] = pod.Name labels[types.KubernetesPodNamespaceLabel] = pod.Namespace labels[types.KubernetesPodUIDLabel] = string(pod.UID) if pod.DeletionGracePeriodSeconds != nil { labels[kubernetesPodDeletionGracePeriodLabel] = strconv.FormatInt(*pod.DeletionGracePeriodSeconds, 10) } if pod.Spec.TerminationGracePeriodSeconds != nil { labels[kubernetesPodTerminationGracePeriodLabel] = strconv.FormatInt(*pod.Spec.TerminationGracePeriodSeconds, 10) } labels[types.KubernetesContainerNameLabel] = container.Name labels[kubernetesContainerHashLabel] = strconv.FormatUint(kubecontainer.HashContainer(container), 16) labels[kubernetesContainerRestartCountLabel] = strconv.Itoa(restartCount) labels[kubernetesContainerTerminationMessagePathLabel] = container.TerminationMessagePath if container.Lifecycle != nil && container.Lifecycle.PreStop != nil { // Using json enconding so that the PreStop handler object is readable after writing as a label rawPreStop, err := json.Marshal(container.Lifecycle.PreStop) if err != nil { glog.Errorf("Unable to marshal lifecycle PreStop handler for container %q of pod %q: %v", container.Name, format.Pod(pod), err) } else { labels[kubernetesContainerPreStopHandlerLabel] = string(rawPreStop) } } if enableCustomMetrics { path, err := custommetrics.GetCAdvisorCustomMetricsDefinitionPath(container) if path != nil && err == nil { labels[cadvisorPrometheusMetricsLabel] = *path } } return labels }
func (hr *HandlerRunner) Run(containerID kubecontainer.ContainerID, pod *api.Pod, container *api.Container, handler *api.Handler) (string, error) { switch { case handler.Exec != nil: var ( buffer bytes.Buffer msg string ) output := ioutils.WriteCloserWrapper(&buffer) err := hr.commandRunner.ExecInContainer(containerID, handler.Exec.Command, nil, output, output, false) if err != nil { msg := fmt.Sprintf("Exec lifecycle hook (%v) for Container %q in Pod %q failed - %q", handler.Exec.Command, container.Name, format.Pod(pod), buffer.String()) glog.V(1).Infof(msg) } return msg, err case handler.HTTPGet != nil: msg, err := hr.runHTTPHandler(pod, container, handler) if err != nil { msg := fmt.Sprintf("Http lifecycle hook (%s) for Container %q in Pod %q failed - %q", handler.HTTPGet.Path, container.Name, format.Pod(pod), msg) glog.V(1).Infof(msg) } return msg, err default: err := fmt.Errorf("Invalid handler: %v", handler) msg := fmt.Sprintf("Cannot run handler: %v", err) glog.Errorf(msg) return msg, err } }