// kubeletCommand performs `start`, `restart`, or `stop` on the kubelet running on the node of the target pod. // Allowed kubeltOps are `kStart`, `kStop`, and `kRestart` func kubeletCommand(kOp kubeletOpt, c clientset.Interface, pod *v1.Pod) { nodeIP, err := framework.GetHostExternalAddress(c, pod) Expect(err).NotTo(HaveOccurred()) nodeIP = nodeIP + ":22" sshResult, err := framework.SSH("sudo /etc/init.d/kubelet "+string(kOp), nodeIP, framework.TestContext.Provider) Expect(err).NotTo(HaveOccurred()) framework.LogSSHResult(sshResult) // On restart, waiting for node NotReady prevents a race condition where the node takes a few moments to leave the // Ready state which in turn short circuits WaitForNodeToBeReady() if kOp == kStop || kOp == kRestart { if ok := framework.WaitForNodeToBeNotReady(c, pod.Spec.NodeName, NodeStateTimeout); !ok { framework.Failf("Node %s failed to enter NotReady state", pod.Spec.NodeName) } } if kOp == kStart || kOp == kRestart { if ok := framework.WaitForNodeToBeReady(c, pod.Spec.NodeName, NodeStateTimeout); !ok { framework.Failf("Node %s failed to enter Ready state", pod.Spec.NodeName) } } }
// rebootNode takes node name on provider through the following steps using c: // - ensures the node is ready // - ensures all pods on the node are running and ready // - reboots the node (by executing rebootCmd over ssh) // - ensures the node reaches some non-ready state // - ensures the node becomes ready again // - ensures all pods on the node become running and ready again // // It returns true through result only if all of the steps pass; at the first // failed step, it will return false through result and not run the rest. func rebootNode(c *client.Client, provider, name, rebootCmd string) bool { // Setup ns := api.NamespaceSystem ps := framework.NewPodStore(c, ns, labels.Everything(), fields.OneTermEqualSelector(api.PodHostField, name)) defer ps.Stop() // Get the node initially. framework.Logf("Getting %s", name) node, err := c.Nodes().Get(name) if err != nil { framework.Logf("Couldn't get node %s", name) return false } // Node sanity check: ensure it is "ready". if !framework.WaitForNodeToBeReady(c, name, framework.NodeReadyInitialTimeout) { return false } // Get all the pods on the node that don't have liveness probe set. // Liveness probe may cause restart of a pod during node reboot, and the pod may not be running. pods := ps.List() podNames := []string{} for _, p := range pods { probe := false for _, c := range p.Spec.Containers { if c.LivenessProbe != nil { probe = true break } } if !probe { podNames = append(podNames, p.ObjectMeta.Name) } } framework.Logf("Node %s has %d assigned pods with no liveness probes: %v", name, len(podNames), podNames) // For each pod, we do a sanity check to ensure it's running / healthy // or succeeded now, as that's what we'll be checking later. if !framework.CheckPodsRunningReadyOrSucceeded(c, ns, podNames, framework.PodReadyBeforeTimeout) { printStatusAndLogsForNotReadyPods(c, ns, podNames, pods) return false } // Reboot the node. if err = framework.IssueSSHCommand(rebootCmd, provider, node); err != nil { framework.Logf("Error while issuing ssh command: %v", err) return false } // Wait for some kind of "not ready" status. if !framework.WaitForNodeToBeNotReady(c, name, rebootNodeNotReadyTimeout) { return false } // Wait for some kind of "ready" status. if !framework.WaitForNodeToBeReady(c, name, rebootNodeReadyAgainTimeout) { return false } // Ensure all of the pods that we found on this node before the reboot are // running / healthy, or succeeded. if !framework.CheckPodsRunningReadyOrSucceeded(c, ns, podNames, rebootPodReadyAgainTimeout) { newPods := ps.List() printStatusAndLogsForNotReadyPods(c, ns, podNames, newPods) return false } framework.Logf("Reboot successful on node %s", name) return true }