func masterExec(cmd string) { stdout, stderr, code, err := ssh.SSH(cmd, getMasterHost()+":22", testContext.Provider) Expect(err).NotTo(HaveOccurred()) if code != 0 { Failf("master exec command, '%v' failed with exitcode %v: \n\tstdout: %v\n\tstderr: %v", cmd, code, stdout, stderr) } }
func issueSSHCommand(node *api.Node, provider, cmd string) error { Logf("Getting external IP address for %s", node.Name) host := "" for _, a := range node.Status.Addresses { if a.Type == api.NodeExternalIP { host = a.Address + ":22" break } } if host == "" { return fmt.Errorf("couldn't find external IP address for node %s", node.Name) } Logf("Calling %s on %s", cmd, node.Name) if _, _, code, err := ssh.SSH(cmd, host, provider); code != 0 || err != nil { return fmt.Errorf("when running %s on %s, got %d and %v", cmd, node.Name, code, err) } return nil }
func logCore(cmds []command, hosts []string, dir, provider string) { wg := &sync.WaitGroup{} // Run commands on all nodes via SSH. for _, cmd := range cmds { fmt.Printf("SSH'ing to all nodes and running %s\n", cmd.cmd) for _, host := range hosts { wg.Add(1) go func(cmd command, host string) { defer wg.Done() logfile := fmt.Sprintf("%s/%s-%s.log", dir, host, cmd.component) fmt.Printf("Writing to %s.\n", logfile) stdout, stderr, _, err := ssh.SSH(cmd.cmd, host, provider) if err != nil { fmt.Printf("Error running command: %v\n", err) } if err := ioutil.WriteFile(logfile, []byte(stdout+stderr), 0777); err != nil { fmt.Printf("Error writing logfile: %v\n", err) } }(cmd, host) } } wg.Wait() }
// Blocks outgoing network traffic on 'node'. Then verifies that 'podNameToDisappear', // that belongs to replication controller 'rcName', really disappeared. // Finally, it checks that the replication controller recreates the // pods on another node and that now the number of replicas is equal 'replicas'. // At the end (even in case of errors), the network traffic is brought back to normal. // This function executes commands on a node so it will work only for some // environments. func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replicas int, podNameToDisappear string, node *api.Node) { Logf("Getting external IP address for %s", node.Name) host := "" for _, a := range node.Status.Addresses { if a.Type == api.NodeExternalIP { host = a.Address + ":22" break } } if host == "" { Failf("Couldn't get the external IP of host %s with addresses %v", node.Name, node.Status.Addresses) } By(fmt.Sprintf("block network traffic from node %s to the master", node.Name)) // TODO marekbiskup 2015-06-19 #10085 // The use of MasterName will cause iptables to do a DNS lookup to // resolve the name to an IP address, which will slow down the test // and cause it to fail if DNS is absent or broken. // Use the IP address instead. destination := testContext.CloudConfig.MasterName if providerIs("aws") { // This is the (internal) IP address used on AWS for the master // TODO: Use IP address for all clouds? // TODO: Avoid hard-coding this destination = "172.20.0.9" } iptablesRule := fmt.Sprintf("OUTPUT --destination %s --jump DROP", destination) defer func() { // This code will execute even if setting the iptables rule failed. // It is on purpose because we may have an error even if the new rule // had been inserted. (yes, we could look at the error code and ssh error // separately, but I prefer to stay on the safe side). By(fmt.Sprintf("Unblock network traffic from node %s to the master", node.Name)) undropCmd := fmt.Sprintf("sudo iptables --delete %s", iptablesRule) // Undrop command may fail if the rule has never been created. // In such case we just lose 30 seconds, but the cluster is healthy. // But if the rule had been created and removing it failed, the node is broken and // not coming back. Subsequent tests will run or fewer nodes (some of the tests // may fail). Manual intervention is required in such case (recreating the // cluster solves the problem too). err := wait.Poll(time.Millisecond*100, time.Second*30, func() (bool, error) { _, _, code, err := ssh.SSH(undropCmd, host, testContext.Provider) if code == 0 && err == nil { return true, nil } else { Logf("Expected 0 exit code and nil error when running '%s' on %s, got %d and %v", undropCmd, node.Name, code, err) return false, nil } }) if err != nil { Failf("Failed to remove the iptable DROP rule. Manual intervention is "+ "required on node %s: remove rule %s, if exists", node.Name, iptablesRule) } }() // The command will block all outgoing network traffic from the node to the master // When multi-master is implemented, this test will have to be improved to block // network traffic to all masters. // We could also block network traffic from the master(s)s to this node, // but blocking it one way is sufficient for this test. dropCmd := fmt.Sprintf("sudo iptables --insert %s", iptablesRule) if _, _, code, err := ssh.SSH(dropCmd, host, testContext.Provider); code != 0 || err != nil { Failf("Expected 0 exit code and nil error when running %s on %s, got %d and %v", dropCmd, node.Name, code, err) } Logf("Waiting for node %s to be not ready", node.Name) waitForNodeToBe(c, node.Name, false, 2*time.Minute) Logf("Waiting for pod %s to be removed", podNameToDisappear) waitForRCPodToDisappear(c, ns, rcName, podNameToDisappear) By("verifying whether the pod from the unreachable node is recreated") err := verifyPods(c, ns, rcName, true, replicas) Expect(err).NotTo(HaveOccurred()) // network traffic is unblocked in a defered function }
expectedStderr string expectedCode int expectedError error }{ {`echo "Hello"`, true, "Hello", "", 0, nil}, // Same as previous, but useful for test output diagnostics. {`echo "Hello from $(whoami)@$(hostname)"`, false, "", "", 0, nil}, {`echo "foo" | grep "bar"`, true, "", "", 1, nil}, {`echo "Out" && echo "Error" >&2 && exit 7`, true, "Out", "Error", 7, nil}, } // Run commands on all nodes via SSH. for _, testCase := range testCases { By(fmt.Sprintf("SSH'ing to all nodes and running %s", testCase.cmd)) for _, host := range hosts { stdout, stderr, code, err := ssh.SSH(testCase.cmd, host, testContext.Provider) stdout, stderr = strings.TrimSpace(stdout), strings.TrimSpace(stderr) if err != testCase.expectedError { Failf("Ran %s on %s, got error %v, expected %v", testCase.cmd, host, err, testCase.expectedError) } if testCase.checkStdout && stdout != testCase.expectedStdout { Failf("Ran %s on %s, got stdout '%s', expected '%s'", testCase.cmd, host, stdout, testCase.expectedStdout) } if stderr != testCase.expectedStderr { Failf("Ran %s on %s, got stderr '%s', expected '%s'", testCase.cmd, host, stderr, testCase.expectedStderr) } if code != testCase.expectedCode { Failf("Ran %s on %s, got exit code %d, expected %d", testCase.cmd, host, code, testCase.expectedCode) } } }