func TestDoNotDeleteMirrorPods(t *testing.T) { staticPod := getTestPod() staticPod.Annotations = map[string]string{kubetypes.ConfigSourceAnnotationKey: "file"} mirrorPod := getTestPod() mirrorPod.UID = "mirror-12345678" mirrorPod.Annotations = map[string]string{ kubetypes.ConfigSourceAnnotationKey: "api", kubetypes.ConfigMirrorAnnotationKey: "mirror", } // Set the deletion timestamp. mirrorPod.DeletionTimestamp = new(unversioned.Time) client := fake.NewSimpleClientset(mirrorPod) m := newTestManager(client) m.podManager.AddPod(staticPod) m.podManager.AddPod(mirrorPod) // Verify setup. assert.True(t, kubepod.IsStaticPod(staticPod), "SetUp error: staticPod") assert.True(t, kubepod.IsMirrorPod(mirrorPod), "SetUp error: mirrorPod") assert.Equal(t, m.podManager.TranslatePodUID(mirrorPod.UID), staticPod.UID) status := getRandomPodStatus() now := unversioned.Now() status.StartTime = &now m.SetPodStatus(staticPod, status) m.testSyncBatch() // Expect not to see an delete action. verifyActions(t, m.kubeClient, []core.Action{ core.GetActionImpl{ActionImpl: core.ActionImpl{Verb: "get", Resource: "pods"}}, core.UpdateActionImpl{ActionImpl: core.ActionImpl{Verb: "update", Resource: "pods", Subresource: "status"}}, }) }
func TestStaticPodStatus(t *testing.T) { staticPod := getTestPod() staticPod.Annotations = map[string]string{kubetypes.ConfigSourceAnnotationKey: "file"} mirrorPod := getTestPod() mirrorPod.UID = "mirror-12345678" mirrorPod.Annotations = map[string]string{ kubetypes.ConfigSourceAnnotationKey: "api", kubetypes.ConfigMirrorAnnotationKey: "mirror", } client := fake.NewSimpleClientset(mirrorPod) m := newTestManager(client) m.podManager.AddPod(staticPod) m.podManager.AddPod(mirrorPod) // Verify setup. assert.True(t, kubepod.IsStaticPod(staticPod), "SetUp error: staticPod") assert.True(t, kubepod.IsMirrorPod(mirrorPod), "SetUp error: mirrorPod") assert.Equal(t, m.podManager.TranslatePodUID(mirrorPod.UID), staticPod.UID) status := getRandomPodStatus() now := unversioned.Now() status.StartTime = &now m.SetPodStatus(staticPod, status) retrievedStatus := expectPodStatus(t, m, staticPod) normalizeStatus(&status) assert.True(t, isStatusEqual(&status, &retrievedStatus), "Expected: %+v, Got: %+v", status, retrievedStatus) retrievedStatus, _ = m.GetPodStatus(mirrorPod.UID) assert.True(t, isStatusEqual(&status, &retrievedStatus), "Expected: %+v, Got: %+v", status, retrievedStatus) // Should translate mirrorPod / staticPod UID. m.testSyncBatch() verifyActions(t, m.kubeClient, []core.Action{ core.GetActionImpl{ActionImpl: core.ActionImpl{Verb: "get", Resource: "pods"}}, core.UpdateActionImpl{ActionImpl: core.ActionImpl{Verb: "update", Resource: "pods", Subresource: "status"}}, }) updateAction := client.Actions()[1].(core.UpdateActionImpl) updatedPod := updateAction.Object.(*api.Pod) assert.Equal(t, mirrorPod.UID, updatedPod.UID, "Expected mirrorPod (%q), but got %q", mirrorPod.UID, updatedPod.UID) assert.True(t, isStatusEqual(&status, &updatedPod.Status), "Expected: %+v, Got: %+v", status, updatedPod.Status) client.ClearActions() // No changes. m.testSyncBatch() verifyActions(t, m.kubeClient, []core.Action{}) // Mirror pod identity changes. m.podManager.DeletePod(mirrorPod) mirrorPod.UID = "new-mirror-pod" mirrorPod.Status = api.PodStatus{} m.podManager.AddPod(mirrorPod) // Expect update to new mirrorPod. m.testSyncBatch() verifyActions(t, m.kubeClient, []core.Action{ core.GetActionImpl{ActionImpl: core.ActionImpl{Verb: "get", Resource: "pods"}}, core.UpdateActionImpl{ActionImpl: core.ActionImpl{Verb: "update", Resource: "pods", Subresource: "status"}}, }) updateAction = client.Actions()[1].(core.UpdateActionImpl) updatedPod = updateAction.Object.(*api.Pod) assert.Equal(t, mirrorPod.UID, updatedPod.UID, "Expected mirrorPod (%q), but got %q", mirrorPod.UID, updatedPod.UID) assert.True(t, isStatusEqual(&status, &updatedPod.Status), "Expected: %+v, Got: %+v", status, updatedPod.Status) }
// needsReconcile compares the given status with the status in the pod manager (which // in fact comes from apiserver), returns whether the status needs to be reconciled with // the apiserver. Now when pod status is inconsistent between apiserver and kubelet, // kubelet should forcibly send an update to reconclie the inconsistence, because kubelet // should be the source of truth of pod status. // NOTE(random-liu): It's simpler to pass in mirror pod uid and get mirror pod by uid, but // now the pod manager only supports getting mirror pod by static pod, so we have to pass // static pod uid here. // TODO(random-liu): Simplify the logic when mirror pod manager is added. func (m *manager) needsReconcile(uid types.UID, status api.PodStatus) bool { // The pod could be a static pod, so we should translate first. pod, ok := m.podManager.GetPodByUID(uid) if !ok { glog.V(4).Infof("Pod %q has been deleted, no need to reconcile", string(uid)) return false } // If the pod is a static pod, we should check its mirror pod, because only status in mirror pod is meaningful to us. if kubepod.IsStaticPod(pod) { mirrorPod, ok := m.podManager.GetMirrorPodByPod(pod) if !ok { glog.V(4).Infof("Static pod %q has no corresponding mirror pod, no need to reconcile", format.Pod(pod)) return false } pod = mirrorPod } podStatus, err := copyStatus(&pod.Status) if err != nil { return false } normalizeStatus(pod, &podStatus) if isStatusEqual(&podStatus, &status) { // If the status from the source is the same with the cached status, // reconcile is not needed. Just return. return false } glog.V(3).Infof("Pod status is inconsistent with cached status for pod %q, a reconciliation should be triggered:\n %+v", format.Pod(pod), diff.ObjectDiff(podStatus, status)) return true }
func TestStaticPodStatus(t *testing.T) { staticPod := *testPod staticPod.Annotations = map[string]string{kubetypes.ConfigSourceAnnotationKey: "file"} mirrorPod := *testPod mirrorPod.UID = "mirror-12345678" mirrorPod.Annotations = map[string]string{ kubetypes.ConfigSourceAnnotationKey: "api", kubetypes.ConfigMirrorAnnotationKey: "mirror", } client := testclient.NewSimpleFake(&mirrorPod) m := newTestManager(client) m.podManager.AddPod(&staticPod) m.podManager.AddPod(&mirrorPod) // Verify setup. assert.True(t, kubepod.IsStaticPod(&staticPod), "SetUp error: staticPod") assert.True(t, kubepod.IsMirrorPod(&mirrorPod), "SetUp error: mirrorPod") assert.Equal(t, m.podManager.TranslatePodUID(mirrorPod.UID), staticPod.UID) status := getRandomPodStatus() now := unversioned.Now() status.StartTime = &now m.SetPodStatus(&staticPod, status) retrievedStatus, _ := m.GetPodStatus(staticPod.UID) assert.True(t, isStatusEqual(&status, &retrievedStatus), "Expected: %+v, Got: %+v", status, retrievedStatus) retrievedStatus, _ = m.GetPodStatus(mirrorPod.UID) assert.True(t, isStatusEqual(&status, &retrievedStatus), "Expected: %+v, Got: %+v", status, retrievedStatus) // Should translate mirrorPod / staticPod UID. m.syncBatch() verifyActions(t, m.kubeClient, []testclient.Action{ testclient.GetActionImpl{ActionImpl: testclient.ActionImpl{Verb: "get", Resource: "pods"}}, testclient.UpdateActionImpl{ActionImpl: testclient.ActionImpl{Verb: "update", Resource: "pods", Subresource: "status"}}, }) updateAction := client.Actions()[1].(testclient.UpdateActionImpl) updatedPod := updateAction.Object.(*api.Pod) assert.Equal(t, mirrorPod.UID, updatedPod.UID, "Expected mirrorPod (%q), but got %q", mirrorPod.UID, updatedPod.UID) assert.True(t, isStatusEqual(&status, &updatedPod.Status), "Expected: %+v, Got: %+v", status, updatedPod.Status) client.ClearActions() otherPod := &api.Pod{ ObjectMeta: api.ObjectMeta{ UID: "other-87654321", Name: "other", Namespace: "new", }, } m.podManager.AddPod(otherPod) m.SetPodStatus(otherPod, getRandomPodStatus()) m.syncBatch() verifyActions(t, m.kubeClient, []testclient.Action{ testclient.GetActionImpl{ActionImpl: testclient.ActionImpl{Verb: "get", Resource: "pods"}}, }) _, found := m.GetPodStatus(otherPod.UID) assert.False(t, found, "otherPod status should have been deleted") }
func TestStaticPod(t *testing.T) { staticPod := getTestPod() staticPod.Annotations = map[string]string{kubetypes.ConfigSourceAnnotationKey: "file"} mirrorPod := getTestPod() mirrorPod.UID = "mirror-12345678" mirrorPod.Annotations = map[string]string{ kubetypes.ConfigSourceAnnotationKey: "api", kubetypes.ConfigMirrorAnnotationKey: "mirror", } client := fake.NewSimpleClientset(mirrorPod) m := newTestManager(client) // Create the static pod m.podManager.AddPod(staticPod) assert.True(t, kubepod.IsStaticPod(staticPod), "SetUp error: staticPod") status := getRandomPodStatus() now := metav1.Now() status.StartTime = &now m.SetPodStatus(staticPod, status) // Should be able to get the static pod status from status manager retrievedStatus := expectPodStatus(t, m, staticPod) normalizeStatus(staticPod, &status) assert.True(t, isStatusEqual(&status, &retrievedStatus), "Expected: %+v, Got: %+v", status, retrievedStatus) // Should not sync pod because there is no corresponding mirror pod for the static pod. m.testSyncBatch() verifyActions(t, m.kubeClient, []core.Action{}) client.ClearActions() // Create the mirror pod m.podManager.AddPod(mirrorPod) assert.True(t, kubepod.IsMirrorPod(mirrorPod), "SetUp error: mirrorPod") assert.Equal(t, m.podManager.TranslatePodUID(mirrorPod.UID), staticPod.UID) // Should be able to get the mirror pod status from status manager retrievedStatus, _ = m.GetPodStatus(mirrorPod.UID) assert.True(t, isStatusEqual(&status, &retrievedStatus), "Expected: %+v, Got: %+v", status, retrievedStatus) // Should sync pod because the corresponding mirror pod is created m.testSyncBatch() verifyActions(t, m.kubeClient, []core.Action{ core.GetActionImpl{ActionImpl: core.ActionImpl{Verb: "get", Resource: schema.GroupVersionResource{Resource: "pods"}}}, core.UpdateActionImpl{ActionImpl: core.ActionImpl{Verb: "update", Resource: schema.GroupVersionResource{Resource: "pods"}, Subresource: "status"}}, }) updateAction := client.Actions()[1].(core.UpdateActionImpl) updatedPod := updateAction.Object.(*v1.Pod) assert.Equal(t, mirrorPod.UID, updatedPod.UID, "Expected mirrorPod (%q), but got %q", mirrorPod.UID, updatedPod.UID) assert.True(t, isStatusEqual(&status, &updatedPod.Status), "Expected: %+v, Got: %+v", status, updatedPod.Status) client.ClearActions() // Should not sync pod because nothing is changed. m.testSyncBatch() verifyActions(t, m.kubeClient, []core.Action{}) // Change mirror pod identity. m.podManager.DeletePod(mirrorPod) mirrorPod.UID = "new-mirror-pod" mirrorPod.Status = v1.PodStatus{} m.podManager.AddPod(mirrorPod) // Should not update to mirror pod, because UID has changed. m.testSyncBatch() verifyActions(t, m.kubeClient, []core.Action{ core.GetActionImpl{ActionImpl: core.ActionImpl{Verb: "get", Resource: schema.GroupVersionResource{Resource: "pods"}}}, }) }
// synchronize is the main control loop that enforces eviction thresholds. func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) { // if we have nothing to do, just return thresholds := m.config.Thresholds if len(thresholds) == 0 { return } // build the ranking functions (if not yet known) // TODO: have a function in cadvisor that lets us know if global housekeeping has completed if len(m.resourceToRankFunc) == 0 || len(m.resourceToNodeReclaimFuncs) == 0 { // this may error if cadvisor has yet to complete housekeeping, so we will just try again in next pass. hasDedicatedImageFs, err := diskInfoProvider.HasDedicatedImageFs() if err != nil { return } m.resourceToRankFunc = buildResourceToRankFunc(hasDedicatedImageFs) m.resourceToNodeReclaimFuncs = buildResourceToNodeReclaimFuncs(m.imageGC, hasDedicatedImageFs) } // make observations and get a function to derive pod usage stats relative to those observations. observations, statsFunc, err := makeSignalObservations(m.summaryProvider) if err != nil { glog.Errorf("eviction manager: unexpected err: %v", err) return } // attempt to create a threshold notifier to improve eviction response time if m.config.KernelMemcgNotification && !m.notifiersInitialized { glog.Infof("eviction manager attempting to integrate with kernel memcg notification api") m.notifiersInitialized = true // start soft memory notification err = startMemoryThresholdNotifier(m.config.Thresholds, observations, false, func(desc string) { glog.Infof("soft memory eviction threshold crossed at %s", desc) // TODO wait grace period for soft memory limit m.synchronize(diskInfoProvider, podFunc) }) if err != nil { glog.Warningf("eviction manager: failed to create hard memory threshold notifier: %v", err) } // start hard memory notification err = startMemoryThresholdNotifier(m.config.Thresholds, observations, true, func(desc string) { glog.Infof("hard memory eviction threshold crossed at %s", desc) m.synchronize(diskInfoProvider, podFunc) }) if err != nil { glog.Warningf("eviction manager: failed to create soft memory threshold notifier: %v", err) } } // determine the set of thresholds met independent of grace period thresholds = thresholdsMet(thresholds, observations, false) // determine the set of thresholds previously met that have not yet satisfied the associated min-reclaim if len(m.thresholdsMet) > 0 { thresholdsNotYetResolved := thresholdsMet(m.thresholdsMet, observations, true) thresholds = mergeThresholds(thresholds, thresholdsNotYetResolved) } // determine the set of thresholds whose stats have been updated since the last sync thresholds = thresholdsUpdatedStats(thresholds, observations, m.lastObservations) // track when a threshold was first observed now := m.clock.Now() thresholdsFirstObservedAt := thresholdsFirstObservedAt(thresholds, m.thresholdsFirstObservedAt, now) // the set of node conditions that are triggered by currently observed thresholds nodeConditions := nodeConditions(thresholds) // track when a node condition was last observed nodeConditionsLastObservedAt := nodeConditionsLastObservedAt(nodeConditions, m.nodeConditionsLastObservedAt, now) // node conditions report true if it has been observed within the transition period window nodeConditions = nodeConditionsObservedSince(nodeConditionsLastObservedAt, m.config.PressureTransitionPeriod, now) // determine the set of thresholds we need to drive eviction behavior (i.e. all grace periods are met) thresholds = thresholdsMetGracePeriod(thresholdsFirstObservedAt, now) // update internal state m.Lock() m.nodeConditions = nodeConditions m.thresholdsFirstObservedAt = thresholdsFirstObservedAt m.nodeConditionsLastObservedAt = nodeConditionsLastObservedAt m.thresholdsMet = thresholds m.lastObservations = observations m.Unlock() // determine the set of resources under starvation starvedResources := getStarvedResources(thresholds) if len(starvedResources) == 0 { glog.V(3).Infof("eviction manager: no resources are starved") return } // rank the resources to reclaim by eviction priority sort.Sort(byEvictionPriority(starvedResources)) resourceToReclaim := starvedResources[0] glog.Warningf("eviction manager: attempting to reclaim %v", resourceToReclaim) // determine if this is a soft or hard eviction associated with the resource softEviction := isSoftEvictionThresholds(thresholds, resourceToReclaim) // record an event about the resources we are now attempting to reclaim via eviction m.recorder.Eventf(m.nodeRef, v1.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim) // check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods. if m.reclaimNodeLevelResources(resourceToReclaim, observations) { glog.Infof("eviction manager: able to reduce %v pressure without evicting pods.", resourceToReclaim) return } glog.Infof("eviction manager: must evict pod(s) to reclaim %v", resourceToReclaim) // rank the pods for eviction rank, ok := m.resourceToRankFunc[resourceToReclaim] if !ok { glog.Errorf("eviction manager: no ranking function for resource %s", resourceToReclaim) return } // the only candidates viable for eviction are those pods that had anything running. activePods := podFunc() if len(activePods) == 0 { glog.Errorf("eviction manager: eviction thresholds have been met, but no pods are active to evict") return } // rank the running pods for eviction for the specified resource rank(activePods, statsFunc) glog.Infof("eviction manager: pods ranked for eviction: %s", format.Pods(activePods)) // we kill at most a single pod during each eviction interval for i := range activePods { pod := activePods[i] if kubepod.IsStaticPod(pod) { // The eviction manager doesn't evict static pods. To stop a static // pod, the admin needs to remove the manifest from kubelet's // --config directory. // TODO(39124): This is a short term fix, we can't assume static pods // are always well behaved. glog.Infof("eviction manager: NOT evicting static pod %v", pod.Name) continue } status := v1.PodStatus{ Phase: v1.PodFailed, Message: fmt.Sprintf(message, resourceToReclaim), Reason: reason, } // record that we are evicting the pod m.recorder.Eventf(pod, v1.EventTypeWarning, reason, fmt.Sprintf(message, resourceToReclaim)) gracePeriodOverride := int64(0) if softEviction { gracePeriodOverride = m.config.MaxPodGracePeriodSeconds } // this is a blocking call and should only return when the pod and its containers are killed. err := m.killPodFunc(pod, status, &gracePeriodOverride) if err != nil { glog.Infof("eviction manager: pod %s failed to evict %v", format.Pod(pod), err) continue } // success, so we return until the next housekeeping interval glog.Infof("eviction manager: pod %s evicted successfully", format.Pod(pod)) return } glog.Infof("eviction manager: unable to evict any pods from the node") }