Beispiel #1
0
// checkNodesReady waits up to nt for expect nodes accessed by c to be ready,
// returning an error if this doesn't happen in time. It returns the names of
// nodes it finds.
func checkNodesReady(c *client.Client, nt time.Duration, expect int) ([]string, error) {
	// First, keep getting all of the nodes until we get the number we expect.
	var nodeList *api.NodeList
	var errLast error
	start := time.Now()
	found := wait.Poll(framework.Poll, nt, func() (bool, error) {
		// A rolling-update (GCE/GKE implementation of restart) can complete before the apiserver
		// knows about all of the nodes. Thus, we retry the list nodes call
		// until we get the expected number of nodes.
		nodeList, errLast = c.Nodes().List(api.ListOptions{
			FieldSelector: fields.Set{"spec.unschedulable": "false"}.AsSelector()})
		if errLast != nil {
			return false, nil
		}
		if len(nodeList.Items) != expect {
			errLast = fmt.Errorf("expected to find %d nodes but found only %d (%v elapsed)",
				expect, len(nodeList.Items), time.Since(start))
			framework.Logf("%v", errLast)
			return false, nil
		}
		return true, nil
	}) == nil
	nodeNames := make([]string, len(nodeList.Items))
	for i, n := range nodeList.Items {
		nodeNames[i] = n.ObjectMeta.Name
	}
	if !found {
		return nodeNames, fmt.Errorf("couldn't find %d nodes within %v; last error: %v",
			expect, nt, errLast)
	}
	framework.Logf("Successfully found %d nodes", expect)

	// Next, ensure in parallel that all the nodes are ready. We subtract the
	// time we spent waiting above.
	timeout := nt - time.Since(start)
	result := make(chan bool, len(nodeList.Items))
	for _, n := range nodeNames {
		n := n
		go func() { result <- framework.WaitForNodeToBeReady(c, n, timeout) }()
	}
	failed := false
	// TODO(mbforbes): Change to `for range` syntax once we support only Go
	// >= 1.4.
	for i := range nodeList.Items {
		_ = i
		if !<-result {
			failed = true
		}
	}
	if failed {
		return nodeNames, fmt.Errorf("at least one node failed to be ready")
	}
	return nodeNames, nil
}
// kubeletCommand performs `start`, `restart`, or `stop` on the kubelet running on the node of the target pod.
// Allowed kubeltOps are `kStart`, `kStop`, and `kRestart`
func kubeletCommand(kOp kubeletOpt, c clientset.Interface, pod *v1.Pod) {
	nodeIP, err := framework.GetHostExternalAddress(c, pod)
	Expect(err).NotTo(HaveOccurred())
	nodeIP = nodeIP + ":22"
	sshResult, err := framework.SSH("sudo /etc/init.d/kubelet "+string(kOp), nodeIP, framework.TestContext.Provider)
	Expect(err).NotTo(HaveOccurred())
	framework.LogSSHResult(sshResult)

	// On restart, waiting for node NotReady prevents a race condition where the node takes a few moments to leave the
	// Ready state which in turn short circuits WaitForNodeToBeReady()
	if kOp == kStop || kOp == kRestart {
		if ok := framework.WaitForNodeToBeNotReady(c, pod.Spec.NodeName, NodeStateTimeout); !ok {
			framework.Failf("Node %s failed to enter NotReady state", pod.Spec.NodeName)
		}
	}
	if kOp == kStart || kOp == kRestart {
		if ok := framework.WaitForNodeToBeReady(c, pod.Spec.NodeName, NodeStateTimeout); !ok {
			framework.Failf("Node %s failed to enter Ready state", pod.Spec.NodeName)
		}
	}
}
Beispiel #3
0
// rebootNode takes node name on provider through the following steps using c:
//  - ensures the node is ready
//  - ensures all pods on the node are running and ready
//  - reboots the node (by executing rebootCmd over ssh)
//  - ensures the node reaches some non-ready state
//  - ensures the node becomes ready again
//  - ensures all pods on the node become running and ready again
//
// It returns true through result only if all of the steps pass; at the first
// failed step, it will return false through result and not run the rest.
func rebootNode(c *client.Client, provider, name, rebootCmd string) bool {
	// Setup
	ns := api.NamespaceSystem
	ps := framework.NewPodStore(c, ns, labels.Everything(), fields.OneTermEqualSelector(api.PodHostField, name))
	defer ps.Stop()

	// Get the node initially.
	framework.Logf("Getting %s", name)
	node, err := c.Nodes().Get(name)
	if err != nil {
		framework.Logf("Couldn't get node %s", name)
		return false
	}

	// Node sanity check: ensure it is "ready".
	if !framework.WaitForNodeToBeReady(c, name, framework.NodeReadyInitialTimeout) {
		return false
	}

	// Get all the pods on the node that don't have liveness probe set.
	// Liveness probe may cause restart of a pod during node reboot, and the pod may not be running.
	pods := ps.List()
	podNames := []string{}
	for _, p := range pods {
		probe := false
		for _, c := range p.Spec.Containers {
			if c.LivenessProbe != nil {
				probe = true
				break
			}
		}
		if !probe {
			podNames = append(podNames, p.ObjectMeta.Name)
		}
	}
	framework.Logf("Node %s has %d assigned pods with no liveness probes: %v", name, len(podNames), podNames)

	// For each pod, we do a sanity check to ensure it's running / healthy
	// or succeeded now, as that's what we'll be checking later.
	if !framework.CheckPodsRunningReadyOrSucceeded(c, ns, podNames, framework.PodReadyBeforeTimeout) {
		printStatusAndLogsForNotReadyPods(c, ns, podNames, pods)
		return false
	}

	// Reboot the node.
	if err = framework.IssueSSHCommand(rebootCmd, provider, node); err != nil {
		framework.Logf("Error while issuing ssh command: %v", err)
		return false
	}

	// Wait for some kind of "not ready" status.
	if !framework.WaitForNodeToBeNotReady(c, name, rebootNodeNotReadyTimeout) {
		return false
	}

	// Wait for some kind of "ready" status.
	if !framework.WaitForNodeToBeReady(c, name, rebootNodeReadyAgainTimeout) {
		return false
	}

	// Ensure all of the pods that we found on this node before the reboot are
	// running / healthy, or succeeded.
	if !framework.CheckPodsRunningReadyOrSucceeded(c, ns, podNames, rebootPodReadyAgainTimeout) {
		newPods := ps.List()
		printStatusAndLogsForNotReadyPods(c, ns, podNames, newPods)
		return false
	}

	framework.Logf("Reboot successful on node %s", name)
	return true
}
Beispiel #4
0
		diskName, err := createPDWithRetry()
		framework.ExpectNoError(err, "Error creating a pd")

		host0Pod := testPDPod([]string{diskName}, host0Name, false, 1)
		originalCount := len(nodes.Items)
		containerName := "mycontainer"
		nodeToDelete := &nodes.Items[0]
		defer func() error {
			By("Cleaning up PD-RW test env")
			detachAndDeletePDs(diskName, []types.NodeName{host0Name})
			nodeToDelete.ObjectMeta.SetResourceVersion("0")
			// need to set the resource version or else the Create() fails
			_, err := nodeClient.Create(nodeToDelete)
			framework.ExpectNoError(err, "Unable to re-create the deleted node")
			framework.ExpectNoError(WaitForGroupSize(framework.TestContext.CloudConfig.NodeInstanceGroup, int32(initialGroupSize)), "Unable to get the node group back to the original size")
			framework.WaitForNodeToBeReady(f.ClientSet, nodeToDelete.Name, nodeStatusTimeout)
			if len(nodes.Items) != originalCount {
				return fmt.Errorf("The node count is not back to original count")
			}
			return nil
		}()

		By("submitting host0Pod to kubernetes")
		_, err = podClient.Create(host0Pod)
		framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0pod: %v", err))

		framework.ExpectNoError(f.WaitForPodRunningSlow(host0Pod.Name))

		testFile := "/testpd1/tracker"
		testFileContents := fmt.Sprintf("%v", mathrand.Int())
Beispiel #5
0
				Expect(err).NotTo(HaveOccurred(), "Each pod should start running and responding")

				By("choose a node with at least one pod - we will block some network traffic on this node")
				label := labels.SelectorFromSet(labels.Set(map[string]string{"name": name}))
				options := api.ListOptions{LabelSelector: label}
				pods, err := c.Pods(ns).List(options) // list pods after all have been scheduled
				Expect(err).NotTo(HaveOccurred())
				nodeName := pods.Items[0].Spec.NodeName

				node, err := c.Nodes().Get(nodeName)
				Expect(err).NotTo(HaveOccurred())

				By(fmt.Sprintf("block network traffic from node %s", node.Name))
				performTemporaryNetworkFailure(c, ns, name, replicas, pods.Items[0].Name, node)
				framework.Logf("Waiting %v for node %s to be ready once temporary network failure ends", resizeNodeReadyTimeout, node.Name)
				if !framework.WaitForNodeToBeReady(c, node.Name, resizeNodeReadyTimeout) {
					framework.Failf("Node %s did not become ready within %v", node.Name, resizeNodeReadyTimeout)
				}

				// sleep a bit, to allow Watch in NodeController to catch up.
				time.Sleep(5 * time.Second)

				By("verify whether new pods can be created on the re-attached node")
				// increasing the RC size is not a valid way to test this
				// since we have no guarantees the pod will be scheduled on our node.
				additionalPod := "additionalpod"
				err = newPodOnNode(c, ns, additionalPod, node.Name)
				Expect(err).NotTo(HaveOccurred())
				err = framework.VerifyPods(c, ns, additionalPod, true, 1)
				Expect(err).NotTo(HaveOccurred())