Example #1
0
func masterExec(cmd string) {
	stdout, stderr, code, err := ssh.SSH(cmd, getMasterHost()+":22", testContext.Provider)
	Expect(err).NotTo(HaveOccurred())
	if code != 0 {
		Failf("master exec command, '%v' failed with exitcode %v: \n\tstdout: %v\n\tstderr: %v", cmd, code, stdout, stderr)
	}
}
Example #2
0
func issueSSHCommand(node *api.Node, provider, cmd string) error {
	Logf("Getting external IP address for %s", node.Name)
	host := ""
	for _, a := range node.Status.Addresses {
		if a.Type == api.NodeExternalIP {
			host = a.Address + ":22"
			break
		}
	}
	if host == "" {
		return fmt.Errorf("couldn't find external IP address for node %s", node.Name)
	}
	Logf("Calling %s on %s", cmd, node.Name)
	if _, _, code, err := ssh.SSH(cmd, host, provider); code != 0 || err != nil {
		return fmt.Errorf("when running %s on %s, got %d and %v", cmd, node.Name, code, err)
	}
	return nil
}
Example #3
0
func logCore(cmds []command, hosts []string, dir, provider string) {
	wg := &sync.WaitGroup{}
	// Run commands on all nodes via SSH.
	for _, cmd := range cmds {
		fmt.Printf("SSH'ing to all nodes and running %s\n", cmd.cmd)
		for _, host := range hosts {
			wg.Add(1)
			go func(cmd command, host string) {
				defer wg.Done()
				logfile := fmt.Sprintf("%s/%s-%s.log", dir, host, cmd.component)
				fmt.Printf("Writing to %s.\n", logfile)
				stdout, stderr, _, err := ssh.SSH(cmd.cmd, host, provider)
				if err != nil {
					fmt.Printf("Error running command: %v\n", err)
				}
				if err := ioutil.WriteFile(logfile, []byte(stdout+stderr), 0777); err != nil {
					fmt.Printf("Error writing logfile: %v\n", err)
				}
			}(cmd, host)
		}
	}
	wg.Wait()
}
Example #4
0
// Blocks outgoing network traffic on 'node'. Then verifies that 'podNameToDisappear',
// that belongs to replication controller 'rcName', really disappeared.
// Finally, it checks that the replication controller recreates the
// pods on another node and that now the number of replicas is equal 'replicas'.
// At the end (even in case of errors), the network traffic is brought back to normal.
// This function executes commands on a node so it will work only for some
// environments.
func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replicas int, podNameToDisappear string, node *api.Node) {
	Logf("Getting external IP address for %s", node.Name)
	host := ""
	for _, a := range node.Status.Addresses {
		if a.Type == api.NodeExternalIP {
			host = a.Address + ":22"
			break
		}
	}
	if host == "" {
		Failf("Couldn't get the external IP of host %s with addresses %v", node.Name, node.Status.Addresses)
	}
	By(fmt.Sprintf("block network traffic from node %s to the master", node.Name))

	// TODO marekbiskup 2015-06-19 #10085
	// The use of MasterName will cause iptables to do a DNS lookup to
	// resolve the name to an IP address, which will slow down the test
	// and cause it to fail if DNS is absent or broken.
	// Use the IP address instead.

	destination := testContext.CloudConfig.MasterName
	if providerIs("aws") {
		// This is the (internal) IP address used on AWS for the master
		// TODO: Use IP address for all clouds?
		// TODO: Avoid hard-coding this
		destination = "172.20.0.9"
	}

	iptablesRule := fmt.Sprintf("OUTPUT --destination %s --jump DROP", destination)
	defer func() {
		// This code will execute even if setting the iptables rule failed.
		// It is on purpose because we may have an error even if the new rule
		// had been inserted. (yes, we could look at the error code and ssh error
		// separately, but I prefer to stay on the safe side).

		By(fmt.Sprintf("Unblock network traffic from node %s to the master", node.Name))
		undropCmd := fmt.Sprintf("sudo iptables --delete %s", iptablesRule)
		// Undrop command may fail if the rule has never been created.
		// In such case we just lose 30 seconds, but the cluster is healthy.
		// But if the rule had been created and removing it failed, the node is broken and
		// not coming back. Subsequent tests will run or fewer nodes (some of the tests
		// may fail). Manual intervention is required in such case (recreating the
		// cluster solves the problem too).
		err := wait.Poll(time.Millisecond*100, time.Second*30, func() (bool, error) {
			_, _, code, err := ssh.SSH(undropCmd, host, testContext.Provider)
			if code == 0 && err == nil {
				return true, nil
			} else {
				Logf("Expected 0 exit code and nil error when running '%s' on %s, got %d and %v",
					undropCmd, node.Name, code, err)
				return false, nil
			}
		})
		if err != nil {
			Failf("Failed to remove the iptable DROP rule. Manual intervention is "+
				"required on node %s: remove rule %s, if exists", node.Name, iptablesRule)
		}
	}()

	// The command will block all outgoing network traffic from the node to the master
	// When multi-master is implemented, this test will have to be improved to block
	// network traffic to all masters.
	// We could also block network traffic from the master(s)s to this node,
	// but blocking it one way is sufficient for this test.
	dropCmd := fmt.Sprintf("sudo iptables --insert %s", iptablesRule)
	if _, _, code, err := ssh.SSH(dropCmd, host, testContext.Provider); code != 0 || err != nil {
		Failf("Expected 0 exit code and nil error when running %s on %s, got %d and %v",
			dropCmd, node.Name, code, err)
	}

	Logf("Waiting for node %s to be not ready", node.Name)
	waitForNodeToBe(c, node.Name, false, 2*time.Minute)

	Logf("Waiting for pod %s to be removed", podNameToDisappear)
	waitForRCPodToDisappear(c, ns, rcName, podNameToDisappear)

	By("verifying whether the pod from the unreachable node is recreated")
	err := verifyPods(c, ns, rcName, true, replicas)
	Expect(err).NotTo(HaveOccurred())

	// network traffic is unblocked in a defered function
}
Example #5
0
			expectedStderr string
			expectedCode   int
			expectedError  error
		}{
			{`echo "Hello"`, true, "Hello", "", 0, nil},
			// Same as previous, but useful for test output diagnostics.
			{`echo "Hello from $(whoami)@$(hostname)"`, false, "", "", 0, nil},
			{`echo "foo" | grep "bar"`, true, "", "", 1, nil},
			{`echo "Out" && echo "Error" >&2 && exit 7`, true, "Out", "Error", 7, nil},
		}

		// Run commands on all nodes via SSH.
		for _, testCase := range testCases {
			By(fmt.Sprintf("SSH'ing to all nodes and running %s", testCase.cmd))
			for _, host := range hosts {
				stdout, stderr, code, err := ssh.SSH(testCase.cmd, host, testContext.Provider)
				stdout, stderr = strings.TrimSpace(stdout), strings.TrimSpace(stderr)
				if err != testCase.expectedError {
					Failf("Ran %s on %s, got error %v, expected %v", testCase.cmd, host, err, testCase.expectedError)
				}
				if testCase.checkStdout && stdout != testCase.expectedStdout {
					Failf("Ran %s on %s, got stdout '%s', expected '%s'", testCase.cmd, host, stdout, testCase.expectedStdout)
				}
				if stderr != testCase.expectedStderr {
					Failf("Ran %s on %s, got stderr '%s', expected '%s'", testCase.cmd, host, stderr, testCase.expectedStderr)
				}
				if code != testCase.expectedCode {
					Failf("Ran %s on %s, got exit code %d, expected %d", testCase.cmd, host, code, testCase.expectedCode)
				}
			}
		}