// Blocks outgoing network traffic on 'node'. Then verifies that 'podNameToDisappear', // that belongs to replication controller 'rcName', really disappeared. // Finally, it checks that the replication controller recreates the // pods on another node and that now the number of replicas is equal 'replicas'. // At the end (even in case of errors), the network traffic is brought back to normal. // This function executes commands on a node so it will work only for some // environments. func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replicas int32, podNameToDisappear string, node *api.Node) { host := getNodeExternalIP(node) master := getMaster(c) By(fmt.Sprintf("block network traffic from node %s to the master", node.Name)) defer func() { // This code will execute even if setting the iptables rule failed. // It is on purpose because we may have an error even if the new rule // had been inserted. (yes, we could look at the error code and ssh error // separately, but I prefer to stay on the safe side). By(fmt.Sprintf("Unblock network traffic from node %s to the master", node.Name)) framework.UnblockNetwork(host, master) }() framework.Logf("Waiting %v to ensure node %s is ready before beginning test...", resizeNodeReadyTimeout, node.Name) if !framework.WaitForNodeToBe(c, node.Name, api.NodeReady, true, resizeNodeReadyTimeout) { framework.Failf("Node %s did not become ready within %v", node.Name, resizeNodeReadyTimeout) } framework.BlockNetwork(host, master) framework.Logf("Waiting %v for node %s to be not ready after simulated network failure", resizeNodeNotReadyTimeout, node.Name) if !framework.WaitForNodeToBe(c, node.Name, api.NodeReady, false, resizeNodeNotReadyTimeout) { framework.Failf("Node %s did not become not-ready within %v", node.Name, resizeNodeNotReadyTimeout) } framework.Logf("Waiting for pod %s to be removed", podNameToDisappear) err := framework.WaitForRCPodToDisappear(c, ns, rcName, podNameToDisappear) Expect(err).NotTo(HaveOccurred()) By("verifying whether the pod from the unreachable node is recreated") err = framework.VerifyPods(c, ns, rcName, true, replicas) Expect(err).NotTo(HaveOccurred()) // network traffic is unblocked in a deferred function }
pods, err := c.Core().Pods(ns).List(options) // list pods after all have been scheduled Expect(err).NotTo(HaveOccurred()) nodeName := pods.Items[0].Spec.NodeName node, err := c.Core().Nodes().Get(nodeName) Expect(err).NotTo(HaveOccurred()) // This creates a temporary network partition, verifies that 'podNameToDisappear', // that belongs to replication controller 'rcName', really disappeared (because its // grace period is set to 0). // Finally, it checks that the replication controller recreates the // pods on another node and that now the number of replicas is equal 'replicas'. By(fmt.Sprintf("blocking network traffic from node %s", node.Name)) testUnderTemporaryNetworkFailure(c, ns, node, func() { framework.Logf("Waiting for pod %s to be removed", pods.Items[0].Name) err := framework.WaitForRCPodToDisappear(c, ns, name, pods.Items[0].Name) Expect(err).NotTo(HaveOccurred()) By("verifying whether the pod from the unreachable node is recreated") err = framework.VerifyPods(c, ns, name, true, replicas) Expect(err).NotTo(HaveOccurred()) }) framework.Logf("Waiting %v for node %s to be ready once temporary network failure ends", resizeNodeReadyTimeout, node.Name) if !framework.WaitForNodeToBeReady(c, node.Name, resizeNodeReadyTimeout) { framework.Failf("Node %s did not become ready within %v", node.Name, resizeNodeReadyTimeout) } // sleep a bit, to allow Watch in NodeController to catch up. time.Sleep(5 * time.Second)
selectorKey, selectorValue = sentinelRC, "true" label = labels.SelectorFromSet(labels.Set(map[string]string{selectorKey: selectorValue})) err = framework.WaitForPodsWithLabelRunning(c, ns, label) Expect(err).NotTo(HaveOccurred()) forEachPod(selectorKey, selectorValue, func(pod api.Pod) { if pod.Name != bootstrapPodName { _, err := framework.LookForStringInLog(ns, pod.Name, "sentinel", expectedOnSentinel, serverStartTimeout) Expect(err).NotTo(HaveOccurred()) } }) } checkAllLogs() By("turning down bootstrap") framework.RunKubectlOrDie("delete", "-f", bootstrapYaml, nsFlag) err = framework.WaitForRCPodToDisappear(c, ns, redisRC, bootstrapPodName) Expect(err).NotTo(HaveOccurred()) By("waiting for the new master election") checkAllLogs() }) }) framework.KubeDescribe("Spark", func() { It("should start spark master, driver and workers", func() { mkpath := func(file string) string { return filepath.Join(framework.TestContext.RepoRoot, "examples/spark", file) } // TODO: Add Zepplin and Web UI to this example. serviceYaml := mkpath("spark-master-service.yaml") masterYaml := mkpath("spark-master-controller.yaml")