Beispiel #1
0
func checkExistingRCRecovers(f Framework) {
	By("assert that the pre-existing replication controller recovers")
	podClient := f.Client.Pods(f.Namespace.Name)
	rcSelector := labels.Set{"name": "baz"}.AsSelector()

	By("deleting pods from existing replication controller")
	expectNoError(wait.Poll(time.Millisecond*500, time.Second*60, func() (bool, error) {
		pods, err := podClient.List(rcSelector, fields.Everything())
		if err != nil {
			Logf("apiserver returned error, as expected before recovery: %v", err)
			return false, nil
		}
		if len(pods.Items) == 0 {
			return false, nil
		}
		for _, pod := range pods.Items {
			err = podClient.Delete(pod.Name, api.NewDeleteOptions(0))
			Expect(err).NotTo(HaveOccurred())
		}
		Logf("apiserver has recovered")
		return true, nil
	}))

	By("waiting for replication controller to recover")
	expectNoError(wait.Poll(time.Millisecond*500, time.Second*60, func() (bool, error) {
		pods, err := podClient.List(rcSelector, fields.Everything())
		Expect(err).NotTo(HaveOccurred())
		for _, pod := range pods.Items {
			if pod.DeletionTimestamp == nil && api.IsPodReady(&pod) {
				return true, nil
			}
		}
		return false, nil
	}))
}
Beispiel #2
0
func runSchedulerNoPhantomPodsTest(client *client.Client) {
	pod := &api.Pod{
		Spec: api.PodSpec{
			Containers: []api.Container{
				{
					Name:  "c1",
					Image: "kubernetes/pause",
					Ports: []api.ContainerPort{
						{ContainerPort: 1234, HostPort: 9999},
					},
					ImagePullPolicy: api.PullIfNotPresent,
				},
			},
		},
	}

	// Assuming we only have two kublets, the third pod here won't schedule
	// if the scheduler doesn't correctly handle the delete for the second
	// pod.
	pod.ObjectMeta.Name = "phantom.foo"
	foo, err := client.Pods(api.NamespaceDefault).Create(pod)
	if err != nil {
		glog.Fatalf("Failed to create pod: %v, %v", pod, err)
	}
	if err := wait.Poll(time.Second, time.Second*30, podRunning(client, foo.Namespace, foo.Name)); err != nil {
		glog.Fatalf("FAILED: pod never started running %v", err)
	}

	pod.ObjectMeta.Name = "phantom.bar"
	bar, err := client.Pods(api.NamespaceDefault).Create(pod)
	if err != nil {
		glog.Fatalf("Failed to create pod: %v, %v", pod, err)
	}
	if err := wait.Poll(time.Second, time.Second*30, podRunning(client, bar.Namespace, bar.Name)); err != nil {
		glog.Fatalf("FAILED: pod never started running %v", err)
	}

	// Delete a pod to free up room.
	glog.Infof("Deleting pod %v", bar.Name)
	err = client.Pods(api.NamespaceDefault).Delete(bar.Name, api.NewDeleteOptions(0))
	if err != nil {
		glog.Fatalf("FAILED: couldn't delete pod %q: %v", bar.Name, err)
	}

	pod.ObjectMeta.Name = "phantom.baz"
	baz, err := client.Pods(api.NamespaceDefault).Create(pod)
	if err != nil {
		glog.Fatalf("Failed to create pod: %v, %v", pod, err)
	}
	if err := wait.Poll(time.Second, time.Second*60, podRunning(client, baz.Namespace, baz.Name)); err != nil {
		if pod, perr := client.Pods(api.NamespaceDefault).Get("phantom.bar"); perr == nil {
			glog.Fatalf("FAILED: 'phantom.bar' was never deleted: %#v", pod)
		} else {
			glog.Fatalf("FAILED: (Scheduler probably didn't process deletion of 'phantom.bar') Pod never started running: %v", err)
		}
	}

	glog.Info("Scheduler doesn't make phantom pods: test passed.")
}
Beispiel #3
0
// pollForReadyPods polls oldRc and newRc each interval and returns the old
// and new ready counts for their pods. If a pod is observed as being ready,
// it's considered ready even if it later becomes notReady.
func (r *RollingUpdater) pollForReadyPods(interval, timeout time.Duration, oldRc, newRc *api.ReplicationController) (int, int, error) {
	controllers := []*api.ReplicationController{oldRc, newRc}
	oldReady := 0
	newReady := 0
	err := wait.Poll(interval, timeout, func() (done bool, err error) {
		anyReady := false
		for _, controller := range controllers {
			selector := labels.Set(controller.Spec.Selector).AsSelector()
			pods, err := r.c.Pods(controller.Namespace).List(selector, fields.Everything())
			if err != nil {
				return false, err
			}
			for _, pod := range pods.Items {
				if api.IsPodReady(&pod) {
					switch controller.Name {
					case oldRc.Name:
						oldReady++
					case newRc.Name:
						newReady++
					}
					anyReady = true
				}
			}
		}
		if anyReady {
			return true, nil
		}
		return false, nil
	})
	return oldReady, newReady, err
}
Beispiel #4
0
// migTemlate (GCE/GKE-only) returns the name of the MIG template that the
// nodes of the cluster use.
func migTemplate() (string, error) {
	var errLast error
	var templ string
	key := "instanceTemplate"
	if wait.Poll(poll, singleCallTimeout, func() (bool, error) {
		// TODO(mikedanese): make this hit the compute API directly instead of
		// shelling out to gcloud.
		// An `instance-groups managed describe` call outputs what we want to stdout.
		output, _, err := retryCmd("gcloud", "compute", "instance-groups", "managed",
			fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID),
			"describe",
			fmt.Sprintf("--zone=%s", testContext.CloudConfig.Zone),
			testContext.CloudConfig.NodeInstanceGroup)
		if err != nil {
			errLast = fmt.Errorf("gcloud compute instance-groups managed describe call failed with err: %v", err)
			return false, nil
		}

		// The 'describe' call probably succeeded; parse the output and try to
		// find the line that looks like "instanceTemplate: url/to/<templ>" and
		// return <templ>.
		if val := parseKVLines(output, key); len(val) > 0 {
			url := strings.Split(val, "/")
			templ = url[len(url)-1]
			Logf("MIG group %s using template: %s", testContext.CloudConfig.NodeInstanceGroup, templ)
			return true, nil
		}
		errLast = fmt.Errorf("couldn't find %s in output to get MIG template. Output: %s", key, output)
		return false, nil
	}) != nil {
		return "", fmt.Errorf("migTemplate() failed with last error: %v", errLast)
	}
	return templ, nil
}
Beispiel #5
0
// migRollingUpdatePoll (CKE/GKE-only) polls the progress of the MIG rolling
// update with ID id until it is complete. It returns an error if this takes
// longer than nt times the number of nodes.
func migRollingUpdatePoll(id string, nt time.Duration) error {
	// Two keys and a val.
	status, progress, done := "status", "statusMessage", "ROLLED_OUT"
	start, timeout := time.Now(), nt*time.Duration(testContext.CloudConfig.NumNodes)
	var errLast error
	Logf("Waiting up to %v for MIG rolling update to complete.", timeout)
	if wait.Poll(restartPoll, timeout, func() (bool, error) {
		// A `rolling-updates describe` call outputs what we want to stdout.
		output, _, err := retryCmd("gcloud", append(migUdpateCmdBase(),
			"rolling-updates",
			fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID),
			fmt.Sprintf("--zone=%s", testContext.CloudConfig.Zone),
			"describe",
			id)...)
		if err != nil {
			errLast = fmt.Errorf("Error calling rolling-updates describe %s: %v", id, err)
			Logf("%v", errLast)
			return false, nil
		}

		// The 'describe' call probably succeeded; parse the output and try to
		// find the line that looks like "status: <status>" and see whether it's
		// done.
		Logf("Waiting for MIG rolling update: %s (%v elapsed)",
			parseKVLines(output, progress), time.Since(start))
		if st := parseKVLines(output, status); st == done {
			return true, nil
		}
		return false, nil
	}) != nil {
		return fmt.Errorf("timeout waiting %v for MIG rolling update to complete. Last error: %v", timeout, errLast)
	}
	Logf("MIG rolling update complete after %v", time.Since(start))
	return nil
}
Beispiel #6
0
func assertFilesExist(fileNames []string, fileDir string, pod *api.Pod, client *client.Client) {
	var failed []string

	expectNoError(wait.Poll(time.Second*2, time.Second*60, func() (bool, error) {
		failed = []string{}
		for _, fileName := range fileNames {
			if _, err := client.Get().
				Prefix("proxy").
				Resource("pods").
				Namespace(pod.Namespace).
				Name(pod.Name).
				Suffix(fileDir, fileName).
				Do().Raw(); err != nil {
				Logf("Unable to read %s from pod %s: %v", fileName, pod.Name, err)
				failed = append(failed, fileName)
			}
		}
		if len(failed) == 0 {
			return true, nil
		}
		Logf("Lookups using %s failed for: %v\n", pod.Name, failed)
		return false, nil
	}))
	Expect(len(failed)).To(Equal(0))
}
Beispiel #7
0
// Wait till the passFunc confirms that the object it expects to see is in the store.
// Used to observe reflected events.
func waitForReflection(s cache.Store, key string, passFunc func(n interface{}) bool) error {
	return wait.Poll(time.Millisecond*10, time.Second*20, func() (bool, error) {
		if n, _, err := s.GetByKey(key); err == nil && passFunc(n) {
			return true, nil
		}
		return false, nil
	})
}
Beispiel #8
0
// Wait for job to reach completions.
func waitForJobFinish(c *client.Client, ns, jobName string, completions int) error {
	return wait.Poll(poll, jobTimeout, func() (bool, error) {
		curr, err := c.Extensions().Jobs(ns).Get(jobName)
		if err != nil {
			return false, err
		}
		return curr.Status.Succeeded == completions, nil
	})
}
Beispiel #9
0
func waitForUpToDateCache(cacher *storage.Cacher, resourceVersion uint64) error {
	ready := func() (bool, error) {
		result, err := cacher.LastSyncResourceVersion()
		if err != nil {
			return false, err
		}
		return result == resourceVersion, nil
	}
	return wait.Poll(10*time.Millisecond, util.ForeverTestTimeout, ready)
}
Beispiel #10
0
// checkNodesReady waits up to nt for expect nodes accessed by c to be ready,
// returning an error if this doesn't happen in time. It returns the names of
// nodes it finds.
func checkNodesReady(c *client.Client, nt time.Duration, expect int) ([]string, error) {
	// First, keep getting all of the nodes until we get the number we expect.
	var nodeList *api.NodeList
	var errLast error
	start := time.Now()
	found := wait.Poll(poll, nt, func() (bool, error) {
		// Even though listNodes(...) has its own retries, a rolling-update
		// (GCE/GKE implementation of restart) can complete before the apiserver
		// knows about all of the nodes. Thus, we retry the list nodes call
		// until we get the expected number of nodes.
		nodeList, errLast = listNodes(c, labels.Everything(), fields.Everything())
		if errLast != nil {
			return false, nil
		}
		if len(nodeList.Items) != expect {
			errLast = fmt.Errorf("expected to find %d nodes but found only %d (%v elapsed)",
				expect, len(nodeList.Items), time.Since(start))
			Logf("%v", errLast)
			return false, nil
		}
		return true, nil
	}) == nil
	nodeNames := make([]string, len(nodeList.Items))
	for i, n := range nodeList.Items {
		nodeNames[i] = n.ObjectMeta.Name
	}
	if !found {
		return nodeNames, fmt.Errorf("couldn't find %d nodes within %v; last error: %v",
			expect, nt, errLast)
	}
	Logf("Successfully found %d nodes", expect)

	// Next, ensure in parallel that all the nodes are ready. We subtract the
	// time we spent waiting above.
	timeout := nt - time.Since(start)
	result := make(chan bool, len(nodeList.Items))
	for _, n := range nodeNames {
		n := n
		go func() { result <- waitForNodeToBeReady(c, n, timeout) }()
	}
	failed := false
	// TODO(mbforbes): Change to `for range` syntax once we support only Go
	// >= 1.4.
	for i := range nodeList.Items {
		_ = i
		if !<-result {
			failed = true
		}
	}
	if failed {
		return nodeNames, fmt.Errorf("at least one node failed to be ready")
	}
	return nodeNames, nil
}
Beispiel #11
0
func (s *ingManager) test(path string) error {
	url := fmt.Sprintf("%v/hostName", path)
	httpClient := &http.Client{}
	return wait.Poll(pollInterval, serviceRespondingTimeout, func() (bool, error) {
		body, err := simpleGET(httpClient, url, "")
		if err != nil {
			Logf("%v\n%v\n%v", url, body, err)
			return false, nil
		}
		return true, nil
	})
}
Beispiel #12
0
func getReferencedServiceAccountToken(c *client.Client, ns string, name string, shouldWait bool) (string, string, error) {
	tokenName := ""
	token := ""

	findToken := func() (bool, error) {
		user, err := c.ServiceAccounts(ns).Get(name)
		if errors.IsNotFound(err) {
			return false, nil
		}
		if err != nil {
			return false, err
		}

		for _, ref := range user.Secrets {
			secret, err := c.Secrets(ns).Get(ref.Name)
			if errors.IsNotFound(err) {
				continue
			}
			if err != nil {
				return false, err
			}
			if secret.Type != api.SecretTypeServiceAccountToken {
				continue
			}
			name := secret.Annotations[api.ServiceAccountNameKey]
			uid := secret.Annotations[api.ServiceAccountUIDKey]
			tokenData := secret.Data[api.ServiceAccountTokenKey]
			if name == user.Name && uid == string(user.UID) && len(tokenData) > 0 {
				tokenName = secret.Name
				token = string(tokenData)
				return true, nil
			}
		}

		return false, nil
	}

	if shouldWait {
		err := wait.Poll(time.Second, 10*time.Second, findToken)
		if err != nil {
			return "", "", err
		}
	} else {
		ok, err := findToken()
		if err != nil {
			return "", "", err
		}
		if !ok {
			return "", "", fmt.Errorf("No token found for %s/%s", ns, name)
		}
	}
	return tokenName, token, nil
}
Beispiel #13
0
// retryCmd runs cmd using args and retries it for up to singleCallTimeout if
// it returns an error. It returns stdout and stderr.
func retryCmd(command string, args ...string) (string, string, error) {
	var err error
	stdout, stderr := "", ""
	wait.Poll(poll, singleCallTimeout, func() (bool, error) {
		stdout, stderr, err = runCmd(command, args...)
		if err != nil {
			Logf("Got %v", err)
			return false, nil
		}
		return true, nil
	})
	return stdout, stderr, err
}
Beispiel #14
0
// Scale updates a Job to a new size, with optional precondition check (if preconditions is not nil),
// optional retries (if retry is not nil), and then optionally waits for parallelism to reach desired
// number, which can be less than requested based on job's current progress.
func (scaler *JobScaler) Scale(namespace, name string, newSize uint, preconditions *ScalePrecondition, retry, waitForReplicas *RetryParams) error {
	if preconditions == nil {
		preconditions = &ScalePrecondition{-1, ""}
	}
	if retry == nil {
		// Make it try only once, immediately
		retry = &RetryParams{Interval: time.Millisecond, Timeout: time.Millisecond}
	}
	cond := ScaleCondition(scaler, preconditions, namespace, name, newSize)
	if err := wait.Poll(retry.Interval, retry.Timeout, cond); err != nil {
		return err
	}
	if waitForReplicas != nil {
		job, err := scaler.c.Extensions().Jobs(namespace).Get(name)
		if err != nil {
			return err
		}
		return wait.Poll(waitForReplicas.Interval, waitForReplicas.Timeout,
			client.JobHasDesiredParallelism(scaler.c, job))
	}
	return nil
}
Beispiel #15
0
func (h *haproxyControllerTester) start(namespace string) (err error) {

	// Create a replication controller with the given configuration.
	rc := rcFromManifest(h.cfg)
	rc.Namespace = namespace
	rc.Spec.Template.Labels["rcName"] = rc.Name

	// Add the --namespace arg.
	// TODO: Remove this when we have proper namespace support.
	for i, c := range rc.Spec.Template.Spec.Containers {
		rc.Spec.Template.Spec.Containers[i].Args = append(
			c.Args, fmt.Sprintf("--namespace=%v", namespace))
		Logf("Container args %+v", rc.Spec.Template.Spec.Containers[i].Args)
	}

	rc, err = h.client.ReplicationControllers(rc.Namespace).Create(rc)
	if err != nil {
		return
	}
	if err = waitForRCPodsRunning(h.client, namespace, h.rcName); err != nil {
		return
	}
	h.rcName = rc.Name
	h.rcNamespace = rc.Namespace

	// Find the pods of the rc we just created.
	labelSelector := labels.SelectorFromSet(
		labels.Set(map[string]string{"rcName": h.rcName}))
	pods, err := h.client.Pods(h.rcNamespace).List(
		labelSelector, fields.Everything())
	if err != nil {
		return err
	}

	// Find the external addresses of the nodes the pods are running on.
	for _, p := range pods.Items {
		wait.Poll(pollInterval, serviceRespondingTimeout, func() (bool, error) {
			address, err := getHostExternalAddress(h.client, &p)
			if err != nil {
				Logf("%v", err)
				return false, nil
			}
			h.address = append(h.address, address)
			return true, nil
		})
	}
	if len(h.address) == 0 {
		return fmt.Errorf("No external ips found for loadbalancer %v", h.getName())
	}
	return nil
}
Beispiel #16
0
// migRollingUpdateStart (GCE/GKE-only) starts a MIG rolling update using templ
// as the new template, waiting up to nt per node, and returns the ID of that
// update.
func migRollingUpdateStart(templ string, nt time.Duration) (string, error) {
	var errLast error
	var id string
	prefix, suffix := "Started [", "]."
	if err := wait.Poll(poll, singleCallTimeout, func() (bool, error) {
		// TODO(mikedanese): make this hit the compute API directly instead of
		//                 shelling out to gcloud.
		// NOTE(mikedanese): If you are changing this gcloud command, update
		//                 cluster/gce/upgrade.sh to match this EXACTLY.
		// A `rolling-updates start` call outputs what we want to stderr.
		_, output, err := retryCmd("gcloud", append(migUdpateCmdBase(),
			"rolling-updates",
			fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID),
			fmt.Sprintf("--zone=%s", testContext.CloudConfig.Zone),
			"start",
			// Required args.
			fmt.Sprintf("--group=%s", testContext.CloudConfig.NodeInstanceGroup),
			fmt.Sprintf("--template=%s", templ),
			// Optional args to fine-tune behavior.
			fmt.Sprintf("--instance-startup-timeout=%ds", int(nt.Seconds())),
			// NOTE: We can speed up this process by increasing
			//       --max-num-concurrent-instances.
			fmt.Sprintf("--max-num-concurrent-instances=%d", 1),
			fmt.Sprintf("--max-num-failed-instances=%d", 0),
			fmt.Sprintf("--min-instance-update-time=%ds", 0))...)
		if err != nil {
			errLast = fmt.Errorf("rolling-updates call failed with err: %v", err)
			return false, nil
		}

		// The 'start' call probably succeeded; parse the output and try to find
		// the line that looks like "Started [url/to/<id>]." and return <id>.
		for _, line := range strings.Split(output, "\n") {
			// As a sanity check, ensure the line starts with prefix and ends
			// with suffix.
			if strings.Index(line, prefix) != 0 || strings.Index(line, suffix) != len(line)-len(suffix) {
				continue
			}
			url := strings.Split(strings.TrimSuffix(strings.TrimPrefix(line, prefix), suffix), "/")
			id = url[len(url)-1]
			Logf("Started MIG rolling update; ID: %s", id)
			return true, nil
		}
		errLast = fmt.Errorf("couldn't find line like '%s ... %s' in output to MIG rolling-update start. Output: %s",
			prefix, suffix, output)
		return false, nil
	}); err != nil {
		return "", fmt.Errorf("migRollingUpdateStart() failed with last error: %v", errLast)
	}
	return id, nil
}
Beispiel #17
0
func runReplicationControllerTest(c *client.Client) {
	clientAPIVersion := c.APIVersion()
	data, err := ioutil.ReadFile("cmd/integration/" + clientAPIVersion + "-controller.json")
	if err != nil {
		glog.Fatalf("Unexpected error: %v", err)
	}
	var controller api.ReplicationController
	if err := api.Scheme.DecodeInto(data, &controller); err != nil {
		glog.Fatalf("Unexpected error: %v", err)
	}

	glog.Infof("Creating replication controllers")
	updated, err := c.ReplicationControllers("test").Create(&controller)
	if err != nil {
		glog.Fatalf("Unexpected error: %v", err)
	}
	glog.Infof("Done creating replication controllers")

	// In practice the controller doesn't need 60s to create a handful of pods, but network latencies on CI
	// systems have been observed to vary unpredictably, so give the controller enough time to create pods.
	// Our e2e scalability tests will catch controllers that are *actually* slow.
	if err := wait.Poll(time.Second, time.Second*60, client.ControllerHasDesiredReplicas(c, updated)); err != nil {
		glog.Fatalf("FAILED: pods never created %v", err)
	}

	// Poll till we can retrieve the status of all pods matching the given label selector from their minions.
	// This involves 3 operations:
	//	- The scheduler must assign all pods to a minion
	//	- The assignment must reflect in a `List` operation against the apiserver, for labels matching the selector
	//  - We need to be able to query the kubelet on that minion for information about the pod
	if err := wait.Poll(
		time.Second, time.Second*30, podsOnMinions(c, "test", labels.Set(updated.Spec.Selector).AsSelector())); err != nil {
		glog.Fatalf("FAILED: pods never started running %v", err)
	}

	glog.Infof("Pods created")
}
Beispiel #18
0
func (config *KubeProxyTestConfig) waitForLoadBalancerIngressSetup() {
	err := wait.Poll(2*time.Second, 120*time.Second, func() (bool, error) {
		service, err := config.getServiceClient().Get(loadBalancerServiceName)
		if err != nil {
			return false, err
		} else {
			if len(service.Status.LoadBalancer.Ingress) > 0 {
				return true, nil
			} else {
				return false, fmt.Errorf("Service LoadBalancer Ingress was not setup.")
			}
		}
	})
	Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("Failed to setup Load Balancer Service. err:%v", err))
	config.loadBalancerService, _ = config.getServiceClient().Get(loadBalancerServiceName)
}
Beispiel #19
0
// Wait for all pods to become Running.  Only use when pods will run for a long time, or it will be racy.
func waitForAllPodsRunning(c *client.Client, ns, jobName string, parallelism int) error {
	label := labels.SelectorFromSet(labels.Set(map[string]string{jobSelectorKey: jobName}))
	return wait.Poll(poll, jobTimeout, func() (bool, error) {
		pods, err := c.Pods(ns).List(label, fields.Everything())
		if err != nil {
			return false, err
		}
		count := 0
		for _, p := range pods.Items {
			if p.Status.Phase == api.PodRunning {
				count++
			}
		}
		return count == parallelism, nil
	})
}
Beispiel #20
0
func waitForOp(op *compute.Operation, getOperation func(operationName string) (*compute.Operation, error)) error {
	if op == nil {
		return fmt.Errorf("operation must not be nil")
	}

	if opIsDone(op) {
		return getErrorFromOp(op)
	}

	opName := op.Name
	return wait.Poll(operationPollInterval, operationPollTimeoutDuration, func() (bool, error) {
		pollOp, err := getOperation(opName)
		if err != nil {
			glog.Warningf("GCE poll operation failed: %v", err)
		}
		return opIsDone(pollOp), getErrorFromOp(pollOp)
	})
}
Beispiel #21
0
func extinguish(c *client.Client, totalNS int, maxAllowedAfterDel int, maxSeconds int) {
	var err error

	By("Creating testing namespaces")
	wg := &sync.WaitGroup{}
	for n := 0; n < totalNS; n += 1 {
		wg.Add(1)
		go func(n int) {
			defer wg.Done()
			defer GinkgoRecover()
			_, err = createTestingNS(fmt.Sprintf("nslifetest-%v", n), c)
			Expect(err).NotTo(HaveOccurred())
		}(n)
	}
	wg.Wait()

	//Wait 10 seconds, then SEND delete requests for all the namespaces.
	By("Waiting 10 seconds")
	time.Sleep(time.Duration(10 * time.Second))
	deleted, err := deleteNamespaces(c, []string{"nslifetest"}, nil /* skipFilter */)
	Expect(err).NotTo(HaveOccurred())
	Expect(len(deleted)).To(Equal(totalNS))

	By("Waiting for namespaces to vanish")
	//Now POLL until all namespaces have been eradicated.
	expectNoError(wait.Poll(2*time.Second, time.Duration(maxSeconds)*time.Second,
		func() (bool, error) {
			var cnt = 0
			nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything())
			if err != nil {
				return false, err
			}
			for _, item := range nsList.Items {
				if strings.Contains(item.Name, "nslifetest") {
					cnt++
				}
			}
			if cnt > maxAllowedAfterDel {
				Logf("Remaining namespaces : %v", cnt)
				return false, nil
			}
			return true, nil
		}))
}
Beispiel #22
0
func (gce *GCECloud) AddSSHKeyToAllInstances(user string, keyData []byte) error {
	return wait.Poll(2*time.Second, 30*time.Second, func() (bool, error) {
		project, err := gce.service.Projects.Get(gce.projectID).Do()
		if err != nil {
			glog.Errorf("Could not get project: %v", err)
			return false, nil
		}
		keyString := fmt.Sprintf("%s:%s %s@%s", user, strings.TrimSpace(string(keyData)), user, user)
		found := false
		for _, item := range project.CommonInstanceMetadata.Items {
			if item.Key == "sshKeys" {
				if strings.Contains(item.Value, keyString) {
					// We've already added the key
					glog.Info("SSHKey already in project metadata")
					return true, nil
				}
				item.Value = item.Value + "\n" + keyString
				found = true
				break
			}
		}
		if !found {
			// This is super unlikely, so log.
			glog.Infof("Failed to find sshKeys metadata, creating a new item")
			project.CommonInstanceMetadata.Items = append(project.CommonInstanceMetadata.Items,
				&compute.MetadataItems{
					Key:   "sshKeys",
					Value: keyString,
				})
		}
		op, err := gce.service.Projects.SetCommonInstanceMetadata(gce.projectID, project.CommonInstanceMetadata).Do()
		if err != nil {
			glog.Errorf("Could not Set Metadata: %v", err)
			return false, nil
		}
		if err := gce.waitForGlobalOp(op); err != nil {
			glog.Errorf("Could not Set Metadata: %v", err)
			return false, nil
		}
		glog.Infof("Successfully added sshKey to project metadata")
		return true, nil
	})
}
Beispiel #23
0
func getServiceAccount(c *client.Client, ns string, name string, shouldWait bool) (*api.ServiceAccount, error) {
	if !shouldWait {
		return c.ServiceAccounts(ns).Get(name)
	}

	var user *api.ServiceAccount
	var err error
	err = wait.Poll(time.Second, 10*time.Second, func() (bool, error) {
		user, err = c.ServiceAccounts(ns).Get(name)
		if errors.IsNotFound(err) {
			return false, nil
		}
		if err != nil {
			return false, err
		}
		return true, nil
	})
	return user, err
}
Beispiel #24
0
// getNodePort waits for the Service, and returns it's first node port.
func getNodePort(client *client.Client, ns, name string) (nodePort int64, err error) {
	var svc *api.Service
	glog.Infof("Waiting for %v/%v", ns, name)
	wait.Poll(1*time.Second, 5*time.Minute, func() (bool, error) {
		svc, err = client.Services(ns).Get(name)
		if err != nil {
			return false, nil
		}
		for _, p := range svc.Spec.Ports {
			if p.NodePort != 0 {
				nodePort = int64(p.NodePort)
				glog.Infof("Node port %v", nodePort)
				break
			}
		}
		return true, nil
	})
	return
}
Beispiel #25
0
// cleanupWithClients performs cleanup tasks after the rolling update. Update
// process related annotations are removed from oldRc and newRc. The
// CleanupPolicy on config is executed.
func (r *RollingUpdater) cleanupWithClients(oldRc, newRc *api.ReplicationController, config *RollingUpdaterConfig) error {
	// Clean up annotations
	var err error
	newRc, err = r.c.ReplicationControllers(r.ns).Get(newRc.Name)
	if err != nil {
		return err
	}
	delete(newRc.Annotations, sourceIdAnnotation)
	delete(newRc.Annotations, desiredReplicasAnnotation)

	newRc, err = r.c.ReplicationControllers(r.ns).Update(newRc)
	if err != nil {
		return err
	}
	if err = wait.Poll(config.Interval, config.Timeout, client.ControllerHasDesiredReplicas(r.c, newRc)); err != nil {
		return err
	}
	newRc, err = r.c.ReplicationControllers(r.ns).Get(newRc.Name)
	if err != nil {
		return err
	}

	switch config.CleanupPolicy {
	case DeleteRollingUpdateCleanupPolicy:
		// delete old rc
		fmt.Fprintf(config.Out, "Update succeeded. Deleting %s\n", oldRc.Name)
		return r.c.ReplicationControllers(r.ns).Delete(oldRc.Name)
	case RenameRollingUpdateCleanupPolicy:
		// delete old rc
		fmt.Fprintf(config.Out, "Update succeeded. Deleting old controller: %s\n", oldRc.Name)
		if err := r.c.ReplicationControllers(r.ns).Delete(oldRc.Name); err != nil {
			return err
		}
		fmt.Fprintf(config.Out, "Renaming %s to %s\n", newRc.Name, oldRc.Name)
		return Rename(r.c, newRc, oldRc.Name)
	case PreserveRollingUpdateCleanupPolicy:
		return nil
	default:
		return nil
	}
}
Beispiel #26
0
func testMasterUpgrade(ip, v string, mUp func(v string) error) {
	Logf("Starting async validation")
	httpClient := http.Client{Timeout: 2 * time.Second}
	done := make(chan struct{}, 1)
	// Let's make sure we've finished the heartbeat before shutting things down.
	var wg sync.WaitGroup
	go util.Until(func() {
		defer GinkgoRecover()
		wg.Add(1)
		defer wg.Done()

		if err := wait.Poll(poll, singleCallTimeout, func() (bool, error) {
			r, err := httpClient.Get("http://" + ip)
			if err != nil {
				Logf("Error reaching %s: %v", ip, err)
				return false, nil
			}
			if r.StatusCode < http.StatusOK || r.StatusCode >= http.StatusNotFound {
				Logf("Bad response; status: %d, response: %v", r.StatusCode, r)
				return false, nil
			}
			return true, nil
		}); err != nil {
			// We log the error here because the test will fail at the very end
			// because this validation runs in another goroutine. Without this,
			// a failure is very confusing to track down because from the logs
			// everything looks fine.
			msg := fmt.Sprintf("Failed to contact service during master upgrade: %v", err)
			Logf(msg)
			Failf(msg)
		}
	}, 200*time.Millisecond, done)

	Logf("Starting master upgrade")
	expectNoError(mUp(v))
	done <- struct{}{}
	Logf("Stopping async validation")
	wg.Wait()
	Logf("Master upgrade complete")
}
Beispiel #27
0
func setDaemonSetNodeLabels(c *client.Client, nodeName string, labels map[string]string) (*api.Node, error) {
	nodeClient := c.Nodes()
	var newNode *api.Node
	var newLabels map[string]string
	err := wait.Poll(updateRetryPeriod, updateRetryTimeout, func() (bool, error) {
		node, err := nodeClient.Get(nodeName)
		if err != nil {
			return false, err
		}

		// remove all labels this test is creating
		daemonSetLabels, otherLabels := separateDaemonSetNodeLabels(node.Labels)
		if reflect.DeepEqual(daemonSetLabels, labels) {
			newNode = node
			return true, nil
		}
		node.Labels = otherLabels
		for k, v := range labels {
			node.Labels[k] = v
		}
		newNode, err = nodeClient.Update(node)
		if err == nil {
			newLabels, _ = separateDaemonSetNodeLabels(newNode.Labels)
			return true, err
		}
		if se, ok := err.(*apierrs.StatusError); ok && se.ErrStatus.Reason == unversioned.StatusReasonConflict {
			Logf("failed to update node due to resource version conflict")
			return false, nil
		}
		return false, err
	})
	if err != nil {
		return nil, err
	} else if len(newLabels) != len(labels) {
		return nil, fmt.Errorf("Could not set daemon set test labels as expected.")
	}

	return newNode, nil
}
Beispiel #28
0
// waitUp polls healthz of the daemon till it returns "ok" or the polling hits the pollTimeout
func (r *restartDaemonConfig) waitUp() {
	Logf("Checking if %v is up by polling for a 200 on its /healthz endpoint", r)
	healthzCheck := fmt.Sprintf(
		"curl -s -o /dev/null -I -w \"%%{http_code}\" http://localhost:%v/healthz", r.healthzPort)

	err := wait.Poll(r.pollInterval, r.pollTimeout, func() (bool, error) {
		stdout, stderr, code, err := nodeExec(r.nodeName, healthzCheck)
		expectNoError(err)
		if code == 0 {
			httpCode, err := strconv.Atoi(stdout)
			if err != nil {
				Logf("Unable to parse healthz http return code: %v", err)
			} else if httpCode == 200 {
				return true, nil
			}
		}
		Logf("node %v exec command, '%v' failed with exitcode %v: \n\tstdout: %v\n\tstderr: %v",
			r.nodeName, healthzCheck, code, stdout, stderr)
		return false, nil
	})
	expectNoError(err, "%v did not respond with a 200 via %v within %v", r, healthzCheck, r.pollTimeout)
}
Beispiel #29
0
// waitTillNPodsRunningOnNodes polls the /runningpods endpoint on kubelet until
// it finds targetNumPods pods that match the given criteria (namespace and
// podNamePrefix). Note that we usually use label selector to filter pods that
// belong to the same RC. However, we use podNamePrefix with namespace here
// because pods returned from /runningpods do not contain the original label
// information; they are reconstructed by examining the container runtime. In
// the scope of this test, we do not expect pod naming conflicts so
// podNamePrefix should be sufficient to identify the pods.
func waitTillNPodsRunningOnNodes(c *client.Client, nodeNames sets.String, podNamePrefix string, namespace string, targetNumPods int, timeout time.Duration) error {
	return wait.Poll(pollInterval, timeout, func() (bool, error) {
		matchCh := make(chan sets.String, len(nodeNames))
		for _, item := range nodeNames.List() {
			// Launch a goroutine per node to check the pods running on the nodes.
			nodeName := item
			go func() {
				matchCh <- getPodMatches(c, nodeName, podNamePrefix, namespace)
			}()
		}

		seen := sets.NewString()
		for i := 0; i < len(nodeNames.List()); i++ {
			seen = seen.Union(<-matchCh)
		}
		if seen.Len() == targetNumPods {
			return true, nil
		}
		Logf("Waiting for %d pods to be running on the node; %d are currently running;", targetNumPods, seen.Len())
		return false, nil
	})
}
Beispiel #30
0
// updateWithRetries updates applies the given rc as an update.
func updateWithRetries(rcClient client.ReplicationControllerInterface, rc *api.ReplicationController, applyUpdate updateFunc) (*api.ReplicationController, error) {
	var err error
	oldRc := rc
	err = wait.Poll(10*time.Millisecond, 1*time.Minute, func() (bool, error) {
		// Apply the update, then attempt to push it to the apiserver.
		applyUpdate(rc)
		if rc, err = rcClient.Update(rc); err == nil {
			// rc contains the latest controller post update
			return true, nil
		}
		// Update the controller with the latest resource version, if the update failed we
		// can't trust rc so use oldRc.Name.
		if rc, err = rcClient.Get(oldRc.Name); err != nil {
			// The Get failed: Value in rc cannot be trusted.
			rc = oldRc
		}
		// The Get passed: rc contains the latest controller, expect a poll for the update.
		return false, nil
	})
	// If the error is non-nil the returned controller cannot be trusted, if it is nil, the returned
	// controller contains the applied update.
	return rc, err
}