// rebootNode takes node name on provider through the following steps using c: // - ensures the node is ready // - ensures all pods on the node are running and ready // - reboots the node (by executing rebootCmd over ssh) // - ensures the node reaches some non-ready state // - ensures the node becomes ready again // - ensures all pods on the node become running and ready again // // It returns true through result only if all of the steps pass; at the first // failed step, it will return false through result and not run the rest. func rebootNode(c *client.Client, provider, name, rebootCmd string) bool { // Setup ns := api.NamespaceSystem ps := framework.NewPodStore(c, ns, labels.Everything(), fields.OneTermEqualSelector(api.PodHostField, name)) defer ps.Stop() // Get the node initially. framework.Logf("Getting %s", name) node, err := c.Nodes().Get(name) if err != nil { framework.Logf("Couldn't get node %s", name) return false } // Node sanity check: ensure it is "ready". if !framework.WaitForNodeToBeReady(c, name, framework.NodeReadyInitialTimeout) { return false } // Get all the pods on the node that don't have liveness probe set. // Liveness probe may cause restart of a pod during node reboot, and the pod may not be running. pods := ps.List() podNames := []string{} for _, p := range pods { probe := false for _, c := range p.Spec.Containers { if c.LivenessProbe != nil { probe = true break } } if !probe { podNames = append(podNames, p.ObjectMeta.Name) } } framework.Logf("Node %s has %d assigned pods with no liveness probes: %v", name, len(podNames), podNames) // For each pod, we do a sanity check to ensure it's running / healthy // or succeeded now, as that's what we'll be checking later. if !framework.CheckPodsRunningReadyOrSucceeded(c, ns, podNames, framework.PodReadyBeforeTimeout) { printStatusAndLogsForNotReadyPods(c, ns, podNames, pods) return false } // Reboot the node. if err = framework.IssueSSHCommand(rebootCmd, provider, node); err != nil { framework.Logf("Error while issuing ssh command: %v", err) return false } // Wait for some kind of "not ready" status. if !framework.WaitForNodeToBeNotReady(c, name, rebootNodeNotReadyTimeout) { return false } // Wait for some kind of "ready" status. if !framework.WaitForNodeToBeReady(c, name, rebootNodeReadyAgainTimeout) { return false } // Ensure all of the pods that we found on this node before the reboot are // running / healthy, or succeeded. if !framework.CheckPodsRunningReadyOrSucceeded(c, ns, podNames, rebootPodReadyAgainTimeout) { newPods := ps.List() printStatusAndLogsForNotReadyPods(c, ns, podNames, newPods) return false } framework.Logf("Reboot successful on node %s", name) return true }
By("ensuring all nodes are ready") nodeNamesBefore, err := framework.CheckNodesReady(f.Client, framework.NodeReadyInitialTimeout, nn) Expect(err).NotTo(HaveOccurred()) framework.Logf("Got the following nodes before restart: %v", nodeNamesBefore) By("ensuring all pods are running and ready") allPods := ps.List() pods := filterIrrelevantPods(allPods) podNamesBefore := make([]string, len(pods)) for i, p := range pods { podNamesBefore[i] = p.ObjectMeta.Name } ns := api.NamespaceSystem if !framework.CheckPodsRunningReadyOrSucceeded(f.Client, ns, podNamesBefore, framework.PodReadyBeforeTimeout) { framework.Failf("At least one pod wasn't running and ready or succeeded at test start.") } By("restarting all of the nodes") err = restartNodes(f, nodeNamesBefore) Expect(err).NotTo(HaveOccurred()) By("ensuring all nodes are ready after the restart") nodeNamesAfter, err := framework.CheckNodesReady(f.Client, framework.RestartNodeReadyAgainTimeout, nn) Expect(err).NotTo(HaveOccurred()) framework.Logf("Got the following nodes after restart: %v", nodeNamesAfter) // Make sure that we have the same number of nodes. We're not checking // that the names match because that's implementation specific. By("ensuring the same number of nodes exist after the restart")