func logPodStartupStatus(c *client.Client, expectedPods int, ns string, observedLabels map[string]string, period time.Duration, stopCh chan struct{}) { label := labels.SelectorFromSet(labels.Set(observedLabels)) podStore := testutils.NewPodStore(c, ns, label, fields.Everything()) defer podStore.Stop() ticker := time.NewTicker(period) defer ticker.Stop() for { select { case <-ticker.C: pods := podStore.List() startupStatus := testutils.ComputeRCStartupStatus(pods, expectedPods) framework.Logf(startupStatus.String("Density")) case <-stopCh: pods := podStore.List() startupStatus := testutils.ComputeRCStartupStatus(pods, expectedPods) framework.Logf(startupStatus.String("Density")) return } } }
// rebootNode takes node name on provider through the following steps using c: // - ensures the node is ready // - ensures all pods on the node are running and ready // - reboots the node (by executing rebootCmd over ssh) // - ensures the node reaches some non-ready state // - ensures the node becomes ready again // - ensures all pods on the node become running and ready again // // It returns true through result only if all of the steps pass; at the first // failed step, it will return false through result and not run the rest. func rebootNode(c clientset.Interface, provider, name, rebootCmd string) bool { // Setup ns := api.NamespaceSystem ps := testutils.NewPodStore(c, ns, labels.Everything(), fields.OneTermEqualSelector(api.PodHostField, name)) defer ps.Stop() // Get the node initially. framework.Logf("Getting %s", name) node, err := c.Core().Nodes().Get(name, metav1.GetOptions{}) if err != nil { framework.Logf("Couldn't get node %s", name) return false } // Node sanity check: ensure it is "ready". if !framework.WaitForNodeToBeReady(c, name, framework.NodeReadyInitialTimeout) { return false } // Get all the pods on the node that don't have liveness probe set. // Liveness probe may cause restart of a pod during node reboot, and the pod may not be running. pods := ps.List() podNames := []string{} for _, p := range pods { probe := false for _, c := range p.Spec.Containers { if c.LivenessProbe != nil { probe = true break } } if !probe { podNames = append(podNames, p.ObjectMeta.Name) } } framework.Logf("Node %s has %d assigned pods with no liveness probes: %v", name, len(podNames), podNames) // For each pod, we do a sanity check to ensure it's running / healthy // or succeeded now, as that's what we'll be checking later. if !framework.CheckPodsRunningReadyOrSucceeded(c, ns, podNames, framework.PodReadyBeforeTimeout) { printStatusAndLogsForNotReadyPods(c, ns, podNames, pods) return false } // Reboot the node. if err = framework.IssueSSHCommand(rebootCmd, provider, node); err != nil { framework.Logf("Error while issuing ssh command: %v", err) return false } // Wait for some kind of "not ready" status. if !framework.WaitForNodeToBeNotReady(c, name, rebootNodeNotReadyTimeout) { return false } // Wait for some kind of "ready" status. if !framework.WaitForNodeToBeReady(c, name, rebootNodeReadyAgainTimeout) { return false } // Ensure all of the pods that we found on this node before the reboot are // running / healthy, or succeeded. if !framework.CheckPodsRunningReadyOrSucceeded(c, ns, podNames, rebootPodReadyAgainTimeout) { newPods := ps.List() printStatusAndLogsForNotReadyPods(c, ns, podNames, newPods) return false } framework.Logf("Reboot successful on node %s", name) return true }
} results = append(results, p) } return results } var _ = framework.KubeDescribe("Restart [Disruptive]", func() { f := framework.NewDefaultFramework("restart") var ps *testutils.PodStore BeforeEach(func() { // This test requires the ability to restart all nodes, so the provider // check must be identical to that call. framework.SkipUnlessProviderIs("gce", "gke") ps = testutils.NewPodStore(f.Client, api.NamespaceSystem, labels.Everything(), fields.Everything()) }) AfterEach(func() { if ps != nil { ps.Stop() } }) It("should restart all nodes and ensure all nodes and pods recover", func() { nn := framework.TestContext.CloudConfig.NumNodes By("ensuring all nodes are ready") nodeNamesBefore, err := framework.CheckNodesReady(f.Client, framework.NodeReadyInitialTimeout, nn) Expect(err).NotTo(HaveOccurred()) framework.Logf("Got the following nodes before restart: %v", nodeNamesBefore)