func checkExistingRCRecovers(f Framework) {
	By("assert that the pre-existing replication controller recovers")
	podClient := f.Client.Pods(f.Namespace.Name)
	rcSelector := labels.Set{"name": "baz"}.AsSelector()

	By("deleting pods from existing replication controller")
	expectNoError(wait.Poll(time.Millisecond*500, time.Second*60, func() (bool, error) {
		pods, err := podClient.List(rcSelector, fields.Everything())
		if err != nil {
			Logf("apiserver returned error, as expected before recovery: %v", err)
			return false, nil
		}
		if len(pods.Items) == 0 {
			return false, nil
		}
		for _, pod := range pods.Items {
			err = podClient.Delete(pod.Name, api.NewDeleteOptions(0))
			Expect(err).NotTo(HaveOccurred())
		}
		Logf("apiserver has recovered")
		return true, nil
	}))

	By("waiting for replication controller to recover")
	expectNoError(wait.Poll(time.Millisecond*500, time.Second*60, func() (bool, error) {
		pods, err := podClient.List(rcSelector, fields.Everything())
		Expect(err).NotTo(HaveOccurred())
		for _, pod := range pods.Items {
			if api.IsPodReady(&pod) {
				return true, nil
			}
		}
		return false, nil
	}))
}
func runSchedulerNoPhantomPodsTest(client *client.Client) {
	pod := &api.Pod{
		Spec: api.PodSpec{
			Containers: []api.Container{
				{
					Name:  "c1",
					Image: "qingyuan/pause",
					Ports: []api.ContainerPort{
						{ContainerPort: 1234, HostPort: 9999},
					},
					ImagePullPolicy: api.PullIfNotPresent,
				},
			},
		},
	}

	// Assuming we only have two kublets, the third pod here won't schedule
	// if the scheduler doesn't correctly handle the delete for the second
	// pod.
	pod.ObjectMeta.Name = "phantom.foo"
	foo, err := client.Pods(api.NamespaceDefault).Create(pod)
	if err != nil {
		glog.Fatalf("Failed to create pod: %v, %v", pod, err)
	}
	if err := wait.Poll(time.Second, time.Second*30, podRunning(client, foo.Namespace, foo.Name)); err != nil {
		glog.Fatalf("FAILED: pod never started running %v", err)
	}

	pod.ObjectMeta.Name = "phantom.bar"
	bar, err := client.Pods(api.NamespaceDefault).Create(pod)
	if err != nil {
		glog.Fatalf("Failed to create pod: %v, %v", pod, err)
	}
	if err := wait.Poll(time.Second, time.Second*30, podRunning(client, bar.Namespace, bar.Name)); err != nil {
		glog.Fatalf("FAILED: pod never started running %v", err)
	}

	// Delete a pod to free up room.
	glog.Infof("Deleting pod %v", bar.Name)
	err = client.Pods(api.NamespaceDefault).Delete(bar.Name, nil)
	if err != nil {
		glog.Fatalf("FAILED: couldn't delete pod %q: %v", bar.Name, err)
	}

	pod.ObjectMeta.Name = "phantom.baz"
	baz, err := client.Pods(api.NamespaceDefault).Create(pod)
	if err != nil {
		glog.Fatalf("Failed to create pod: %v, %v", pod, err)
	}
	if err := wait.Poll(time.Second, time.Second*60, podRunning(client, baz.Namespace, baz.Name)); err != nil {
		glog.Fatalf("FAILED: (Scheduler probably didn't process deletion of 'phantom.bar') Pod never started running: %v", err)
	}

	glog.Info("Scheduler doesn't make phantom pods: test passed.")
}
Exemple #3
0
func testNotReachable(ip string, port int) {
	url := fmt.Sprintf("http://%s:%d", ip, port)
	if ip == "" {
		Failf("Got empty IP for non-reachability check (%s)", url)
	}
	if port == 0 {
		Failf("Got port==0 for non-reachability check (%s)", url)
	}

	By(fmt.Sprintf("Waiting up to %v for %s to be *not* reachable", podStartTimeout, url))
	expectNoError(wait.Poll(poll, podStartTimeout, func() (bool, error) {
		resp, err := httpGetNoConnectionPool(url)
		if err != nil {
			Logf("Successfully waited for the url %s to be unreachable.", url)
			return true, nil
		}
		defer resp.Body.Close()
		body, err := ioutil.ReadAll(resp.Body)
		if err != nil {
			Logf("Expecting %s to be unreachable but was reachable and got an error reading response: %v", url, err)
			return false, nil
		}
		Logf("Able to reach service %s when should no longer have been reachable, status:%d and body: %s", url, resp.Status, string(body))
		return false, nil
	}))
}
Exemple #4
0
func testReachable(ip string, port int) {
	url := fmt.Sprintf("http://%s:%d", ip, port)
	if ip == "" {
		Failf("Got empty IP for reachability check (%s)", url)
	}
	if port == 0 {
		Failf("Got port==0 for reachability check (%s)", url)
	}

	By(fmt.Sprintf("Waiting up to %v for %s to be reachable", podStartTimeout, url))
	start := time.Now()
	expectNoError(wait.Poll(poll, podStartTimeout, func() (bool, error) {
		resp, err := httpGetNoConnectionPool(url)
		if err != nil {
			Logf("Got error waiting for reachability of %s: %v (%v)", url, err, time.Since(start))
			return false, nil
		}
		defer resp.Body.Close()
		body, err := ioutil.ReadAll(resp.Body)
		if err != nil {
			Logf("Got error reading response from %s: %v", url, err)
			return false, nil
		}
		if resp.StatusCode != 200 {
			return false, fmt.Errorf("received non-success return status %q trying to access %s; got body: %s", resp.Status, url, string(body))
		}
		if !strings.Contains(string(body), "test-webserver") {
			return false, fmt.Errorf("received response body without expected substring 'test-webserver': %s", string(body))
		}
		Logf("Successfully reached %v", url)
		return true, nil
	}))
}
Exemple #5
0
// migTemlate (GCE/GKE-only) returns the name of the MIG template that the
// nodes of the cluster use.
func migTemplate() (string, error) {
	var errLast error
	var templ string
	key := "instanceTemplate"
	// TODO(mbforbes): Refactor this to use cluster_upgrade.go:retryCmd(...)
	if wait.Poll(poll, singleCallTimeout, func() (bool, error) {
		// TODO(mbforbes): make this hit the compute API directly instead of
		// shelling out to gcloud.
		o, err := exec.Command("gcloud", "preview", "managed-instance-groups",
			fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID),
			fmt.Sprintf("--zone=%s", testContext.CloudConfig.Zone),
			"describe",
			testContext.CloudConfig.NodeInstanceGroup).CombinedOutput()
		if err != nil {
			errLast = fmt.Errorf("gcloud preview managed-instance-groups describe call failed with err: %v", err)
			return false, nil
		}
		output := string(o)

		// The 'describe' call probably succeeded; parse the output and try to
		// find the line that looks like "instanceTemplate: url/to/<templ>" and
		// return <templ>.
		if val := parseKVLines(output, key); len(val) > 0 {
			url := strings.Split(val, "/")
			templ = url[len(url)-1]
			Logf("MIG group %s using template: %s", testContext.CloudConfig.NodeInstanceGroup, templ)
			return true, nil
		}
		errLast = fmt.Errorf("couldn't find %s in output to get MIG template. Output: %s", key, output)
		return false, nil
	}) != nil {
		return "", fmt.Errorf("migTemplate() failed with last error: %v", errLast)
	}
	return templ, nil
}
Exemple #6
0
// migRollingUpdatePoll (CKE/GKE-only) polls the progress of the MIG rolling
// update with ID id until it is complete. It returns an error if this takes
// longer than nt times the number of nodes.
func migRollingUpdatePoll(id string, nt time.Duration) error {
	// Two keys and a val.
	status, progress, done := "status", "statusMessage", "ROLLED_OUT"
	start, timeout := time.Now(), nt*time.Duration(testContext.CloudConfig.NumNodes)
	var errLast error
	Logf("Waiting up to %v for MIG rolling update to complete.", timeout)
	// TODO(mbforbes): Refactor this to use cluster_upgrade.go:retryCmd(...)
	if wait.Poll(restartPoll, timeout, func() (bool, error) {
		o, err := exec.Command("gcloud", "preview", "rolling-updates",
			fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID),
			fmt.Sprintf("--zone=%s", testContext.CloudConfig.Zone),
			"describe",
			id).CombinedOutput()
		if err != nil {
			errLast = fmt.Errorf("Error calling rolling-updates describe %s: %v", id, err)
			Logf("%v", errLast)
			return false, nil
		}
		output := string(o)

		// The 'describe' call probably succeeded; parse the output and try to
		// find the line that looks like "status: <status>" and see whether it's
		// done.
		Logf("Waiting for MIG rolling update: %s (%v elapsed)",
			parseKVLines(output, progress), time.Since(start))
		if st := parseKVLines(output, status); st == done {
			return true, nil
		}
		return false, nil
	}) != nil {
		return fmt.Errorf("timeout waiting %v for MIG rolling update to complete. Last error: %v", timeout, errLast)
	}
	Logf("MIG rolling update complete after %v", time.Since(start))
	return nil
}
Exemple #7
0
func extinguish(c *client.Client, totalNS int, maxAllowedAfterDel int, maxSeconds int) {

	var err error

	for n := 0; n < totalNS; n += 1 {
		_, err = createTestingNS(fmt.Sprintf("nslifetest-%v", n), c)
		Expect(err).NotTo(HaveOccurred())
	}

	//Wait 10 seconds, then SEND delete requests for all the namespaces.
	time.Sleep(time.Duration(10 * time.Second))
	nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything())
	Expect(err).NotTo(HaveOccurred())
	for _, item := range nsList.Items {
		if strings.Contains(item.Name, "nslifetest") {
			if err := c.Namespaces().Delete(item.Name); err != nil {
				Failf("Failed deleting error ::: --- %v ", err)
			}
		}
		Logf("namespace : %v api call to delete is complete ", item)
	}

	//Now POLL until all namespaces have been eradicated.
	expectNoError(wait.Poll(2*time.Second, time.Duration(maxSeconds)*time.Second,
		func() (bool, error) {
			if rem, err := countRemaining(c, "nslifetest"); err != nil || rem > maxAllowedAfterDel {
				Logf("Remaining namespaces : %v", rem)
				return false, err
			} else {
				return true, nil
			}
		}))

}
Exemple #8
0
func assertFilesExist(fileNames []string, fileDir string, pod *api.Pod, client *client.Client) {
	var failed []string

	expectNoError(wait.Poll(time.Second*2, time.Second*60, func() (bool, error) {
		failed = []string{}
		for _, fileName := range fileNames {
			if _, err := client.Get().
				Prefix("proxy").
				Resource("pods").
				Namespace(pod.Namespace).
				Name(pod.Name).
				Suffix(fileDir, fileName).
				Do().Raw(); err != nil {
				Logf("Unable to read %s from pod %s: %v", fileName, pod.Name, err)
				failed = append(failed, fileName)
			}
		}
		if len(failed) == 0 {
			return true, nil
		}
		Logf("Lookups using %s failed for: %v\n", pod.Name, failed)
		return false, nil
	}))
	Expect(len(failed)).To(Equal(0))
}
// Wait till the passFunc confirms that the object it expects to see is in the store.
// Used to observe reflected events.
func waitForReflection(s cache.Store, key string, passFunc func(n interface{}) bool) error {
	return wait.Poll(time.Millisecond*10, time.Second*20, func() (bool, error) {
		if n, _, err := s.GetByKey(key); err == nil && passFunc(n) {
			return true, nil
		}
		return false, nil
	})
}
func (r *RollingUpdater) updateAndWait(rc *api.ReplicationController, interval, timeout time.Duration) (*api.ReplicationController, error) {
	rc, err := r.c.UpdateReplicationController(r.ns, rc)
	if err != nil {
		return nil, err
	}
	if err = wait.Poll(interval, timeout, r.c.ControllerHasDesiredReplicas(rc)); err != nil {
		return nil, err
	}
	return r.c.GetReplicationController(r.ns, rc.ObjectMeta.Name)
}
Exemple #11
0
// checkNodesReady waits up to nt for expect nodes accessed by c to be ready,
// returning an error if this doesn't happen in time. It returns the names of
// nodes it finds.
func checkNodesReady(c *client.Client, nt time.Duration, expect int) ([]string, error) {
	// First, keep getting all of the nodes until we get the number we expect.
	var nodeList *api.NodeList
	var errLast error
	start := time.Now()
	found := wait.Poll(poll, nt, func() (bool, error) {
		// Even though listNodes(...) has its own retries, a rolling-update
		// (GCE/GKE implementation of restart) can complete before the apiserver
		// knows about all of the nodes. Thus, we retry the list nodes call
		// until we get the expected number of nodes.
		nodeList, errLast = listNodes(c, labels.Everything(), fields.Everything())
		if errLast != nil {
			return false, nil
		}
		if len(nodeList.Items) != expect {
			errLast = fmt.Errorf("expected to find %d nodes but found only %d (%v elapsed)",
				expect, len(nodeList.Items), time.Since(start))
			Logf("%v", errLast)
			return false, nil
		}
		return true, nil
	}) == nil
	nodeNames := make([]string, len(nodeList.Items))
	for i, n := range nodeList.Items {
		nodeNames[i] = n.ObjectMeta.Name
	}
	if !found {
		return nodeNames, fmt.Errorf("couldn't find %d nodes within %v; last error: %v",
			expect, nt, errLast)
	}
	Logf("Successfully found %d nodes", expect)

	// Next, ensure in parallel that all the nodes are ready. We subtract the
	// time we spent waiting above.
	timeout := nt - time.Since(start)
	result := make(chan bool, len(nodeList.Items))
	for _, n := range nodeNames {
		n := n
		go func() { result <- waitForNodeToBeReady(c, n, timeout) }()
	}
	failed := false
	// TODO(mbforbes): Change to `for range` syntax once we support only Go
	// >= 1.4.
	for i := range nodeList.Items {
		_ = i
		if !<-result {
			failed = true
		}
	}
	if failed {
		return nodeNames, fmt.Errorf("at least one node failed to be ready")
	}
	return nodeNames, nil
}
func getReferencedServiceAccountToken(c *client.Client, ns string, name string, shouldWait bool) (string, string, error) {
	tokenName := ""
	token := ""

	findToken := func() (bool, error) {
		user, err := c.ServiceAccounts(ns).Get(name)
		if errors.IsNotFound(err) {
			return false, nil
		}
		if err != nil {
			return false, err
		}

		for _, ref := range user.Secrets {
			secret, err := c.Secrets(ns).Get(ref.Name)
			if errors.IsNotFound(err) {
				continue
			}
			if err != nil {
				return false, err
			}
			if secret.Type != api.SecretTypeServiceAccountToken {
				continue
			}
			name := secret.Annotations[api.ServiceAccountNameKey]
			uid := secret.Annotations[api.ServiceAccountUIDKey]
			tokenData := secret.Data[api.ServiceAccountTokenKey]
			if name == user.Name && uid == string(user.UID) && len(tokenData) > 0 {
				tokenName = secret.Name
				token = string(tokenData)
				return true, nil
			}
		}

		return false, nil
	}

	if shouldWait {
		err := wait.Poll(time.Second, 10*time.Second, findToken)
		if err != nil {
			return "", "", err
		}
	} else {
		ok, err := findToken()
		if err != nil {
			return "", "", err
		}
		if !ok {
			return "", "", fmt.Errorf("No token found for %s/%s", ns, name)
		}
	}
	return tokenName, token, nil
}
// retryCmd runs cmd using args and retries it for up to singleCallTimeout if
// it returns an error. It returns stdout and stderr.
func retryCmd(command string, args ...string) (string, string, error) {
	var err error
	stdout, stderr := "", ""
	wait.Poll(poll, singleCallTimeout, func() (bool, error) {
		stdout, stderr, err = runCmd(command, args...)
		if err != nil {
			Logf("Got %v", err)
			return false, nil
		}
		return true, nil
	})
	return stdout, stderr, err
}
Exemple #14
0
// migRollingUpdateStart (GCE/GKE-only) starts a MIG rolling update using templ
// as the new template, waiting up to nt per node, and returns the ID of that
// update.
func migRollingUpdateStart(templ string, nt time.Duration) (string, error) {
	var errLast error
	var id string
	prefix, suffix := "Started [", "]."
	// TODO(mbforbes): Refactor this to use cluster_upgrade.go:retryCmd(...)
	if err := wait.Poll(poll, singleCallTimeout, func() (bool, error) {
		// TODO(mbforbes): make this hit the compute API directly instead of
		//                 shelling out to gcloud.
		// NOTE(mbforbes): If you are changing this gcloud command, update
		//                 cluster/gce/upgrade.sh to match this EXACTLY.
		o, err := exec.Command("gcloud", "preview", "rolling-updates",
			fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID),
			fmt.Sprintf("--zone=%s", testContext.CloudConfig.Zone),
			"start",
			// Required args.
			fmt.Sprintf("--group=%s", testContext.CloudConfig.NodeInstanceGroup),
			fmt.Sprintf("--template=%s", templ),
			// Optional args to fine-tune behavior.
			fmt.Sprintf("--instance-startup-timeout=%ds", int(nt.Seconds())),
			// NOTE: We can speed up this process by increasing
			//       --max-num-concurrent-instances.
			fmt.Sprintf("--max-num-concurrent-instances=%d", 1),
			fmt.Sprintf("--max-num-failed-instances=%d", 0),
			fmt.Sprintf("--min-instance-update-time=%ds", 0)).CombinedOutput()
		if err != nil {
			errLast = fmt.Errorf("gcloud preview rolling-updates call failed with err: %v", err)
			return false, nil
		}
		output := string(o)

		// The 'start' call probably succeeded; parse the output and try to find
		// the line that looks like "Started [url/to/<id>]." and return <id>.
		for _, line := range strings.Split(output, "\n") {
			// As a sanity check, ensure the line starts with prefix and ends
			// with suffix.
			if strings.Index(line, prefix) != 0 || strings.Index(line, suffix) != len(line)-len(suffix) {
				continue
			}
			url := strings.Split(strings.TrimSuffix(strings.TrimPrefix(line, prefix), suffix), "/")
			id = url[len(url)-1]
			Logf("Started MIG rolling update; ID: %s", id)
			return true, nil
		}
		errLast = fmt.Errorf("couldn't find line like '%s ... %s' in output to MIG rolling-update start. Output: %s",
			prefix, suffix, output)
		return false, nil
	}); err != nil {
		return "", fmt.Errorf("migRollingUpdateStart() failed with last error: %v", errLast)
	}
	return id, nil
}
Exemple #15
0
func runReplicationControllerTest(c *client.Client) {
	clientAPIVersion := c.APIVersion()
	data, err := ioutil.ReadFile("cmd/integration/" + clientAPIVersion + "-controller.json")
	if err != nil {
		glog.Fatalf("Unexpected error: %v", err)
	}
	var controller api.ReplicationController
	if err := api.Scheme.DecodeInto(data, &controller); err != nil {
		glog.Fatalf("Unexpected error: %v", err)
	}

	glog.Infof("Creating replication controllers")
	updated, err := c.ReplicationControllers("test").Create(&controller)
	if err != nil {
		glog.Fatalf("Unexpected error: %v", err)
	}
	glog.Infof("Done creating replication controllers")

	// In practice the controller doesn't need 60s to create a handful of pods, but network latencies on CI
	// systems have been observed to vary unpredictably, so give the controller enough time to create pods.
	// Our e2e scalability tests will catch controllers that are *actually* slow.
	if err := wait.Poll(time.Second, time.Second*60, client.ControllerHasDesiredReplicas(c, updated)); err != nil {
		glog.Fatalf("FAILED: pods never created %v", err)
	}

	// Poll till we can retrieve the status of all pods matching the given label selector from their minions.
	// This involves 3 operations:
	//	- The scheduler must assign all pods to a minion
	//	- The assignment must reflect in a `List` operation against the apiserver, for labels matching the selector
	//  - We need to be able to query the qinglet on that minion for information about the pod
	if err := wait.Poll(
		time.Second, time.Second*30, podsOnMinions(c, "test", labels.Set(updated.Spec.Selector).AsSelector())); err != nil {
		glog.Fatalf("FAILED: pods never started running %v", err)
	}

	glog.Infof("Pods created")
}
func startManagerAndWait(manager *ReplicationManager, pods int, t *testing.T) chan struct{} {
	stopCh := make(chan struct{})
	go manager.Run(1, stopCh)
	err := wait.Poll(10*time.Millisecond, 100*time.Millisecond, func() (bool, error) {
		podList, err := manager.podStore.List(labels.Everything())
		if err != nil {
			return false, err
		}
		return len(podList) == pods, nil
	})
	if err != nil {
		t.Errorf("Failed to observe %d pods in 100ms", pods)
	}
	return stopCh
}
Exemple #17
0
func (gce *GCECloud) AddSSHKeyToAllInstances(user string, keyData []byte) error {
	return wait.Poll(2*time.Second, 30*time.Second, func() (bool, error) {
		project, err := gce.service.Projects.Get(gce.projectID).Do()
		if err != nil {
			glog.Errorf("Could not get project: %v", err)
			return false, nil
		}
		keyString := fmt.Sprintf("%s:%s %s@%s", user, strings.TrimSpace(string(keyData)), user, user)
		found := false
		for _, item := range project.CommonInstanceMetadata.Items {
			if item.Key == "sshKeys" {
				if strings.Contains(item.Value, keyString) {
					// We've already added the key
					glog.Info("SSHKey already in project metadata")
					return true, nil
				}
				item.Value = item.Value + "\n" + keyString
				found = true
				break
			}
		}
		if !found {
			// This is super unlikely, so log.
			glog.Infof("Failed to find sshKeys metadata, creating a new item")
			project.CommonInstanceMetadata.Items = append(project.CommonInstanceMetadata.Items,
				&compute.MetadataItems{
					Key:   "sshKeys",
					Value: keyString,
				})
		}
		op, err := gce.service.Projects.SetCommonInstanceMetadata(gce.projectID, project.CommonInstanceMetadata).Do()
		if err != nil {
			glog.Errorf("Could not Set Metadata: %v", err)
			return false, nil
		}
		if err := gce.waitForGlobalOp(op); err != nil {
			glog.Errorf("Could not Set Metadata: %v", err)
			return false, nil
		}
		glog.Infof("Successfully added sshKey to project metadata")
		return true, nil
	})
}
func getServiceAccount(c *client.Client, ns string, name string, shouldWait bool) (*api.ServiceAccount, error) {
	if !shouldWait {
		return c.ServiceAccounts(ns).Get(name)
	}

	var user *api.ServiceAccount
	var err error
	err = wait.Poll(time.Second, 10*time.Second, func() (bool, error) {
		user, err = c.ServiceAccounts(ns).Get(name)
		if errors.IsNotFound(err) {
			return false, nil
		}
		if err != nil {
			return false, err
		}
		return true, nil
	})
	return user, err
}
// realVersion turns a version constant--one accepted by cluster/gce/upgrade.sh--
// into a deployable version string.
//
// NOTE: KEEP THIS LIST UP-TO-DATE WITH THE CODE BELOW.
// The version strings supported are:
// - "latest_stable"   (returns a string like "0.18.2")
// - "latest_release"  (returns a string like "0.19.1")
// - "latest_ci"       (returns a string like "0.19.1-669-gabac8c8")
func realVersion(s string) (string, error) {
	bucket, file := "", ""
	switch s {
	// NOTE: IF YOU CHANGE THE FOLLOWING LIST, ALSO UPDATE cluster/gce/upgrade.sh
	case "latest_stable":
		bucket, file = "release", "stable"
	case "latest_release":
		bucket, file = "release", "latest"
	case "latest_ci":
		bucket, file = "ci", "latest"
	default:
		return "", fmt.Errorf("version %s is not supported", s)
	}

	url := fmt.Sprintf(versionURLFmt, bucket, file)
	var v string
	Logf("Fetching version from %s", url)
	c := &http.Client{Timeout: 2 * time.Second}
	if err := wait.Poll(poll, singleCallTimeout, func() (bool, error) {
		r, err := c.Get(url)
		if err != nil {
			Logf("Error reaching %s: %v", url, err)
			return false, nil
		}
		if r.StatusCode != http.StatusOK {
			Logf("Bad response; status: %d, response: %v", r.StatusCode, r)
			return false, nil
		}
		defer r.Body.Close()
		b, err := ioutil.ReadAll(r.Body)
		if err != nil {
			Logf("Could not read response body: %v", err)
			return false, nil
		}
		v = strings.TrimSpace(string(b))
		return true, nil
	}); err != nil {
		return "", fmt.Errorf("failed to fetch real version from %s", url)
	}
	// Versions start with "v", so remove that.
	return strings.TrimPrefix(v, "v"), nil
}
func testMasterUpgrade(ip, v string, mUp func(v string) error) {
	Logf("Starting async validation")
	httpClient := http.Client{Timeout: 2 * time.Second}
	done := make(chan struct{}, 1)
	// Let's make sure we've finished the heartbeat before shutting things down.
	var wg sync.WaitGroup
	go util.Until(func() {
		defer GinkgoRecover()
		wg.Add(1)
		defer wg.Done()

		if err := wait.Poll(poll, singleCallTimeout, func() (bool, error) {
			r, err := httpClient.Get("http://" + ip)
			if err != nil {
				Logf("Error reaching %s: %v", ip, err)
				return false, nil
			}
			if r.StatusCode < http.StatusOK || r.StatusCode >= http.StatusNotFound {
				Logf("Bad response; status: %d, response: %v", r.StatusCode, r)
				return false, nil
			}
			return true, nil
		}); err != nil {
			// We log the error here because the test will fail at the very end
			// because this validation runs in another goroutine. Without this,
			// a failure is very confusing to track down because from the logs
			// everything looks fine.
			msg := fmt.Sprintf("Failed to contact service during master upgrade: %v", err)
			Logf(msg)
			Failf(msg)
		}
	}, 200*time.Millisecond, done)

	Logf("Starting master upgrade")
	expectNoError(mUp(v))
	done <- struct{}{}
	Logf("Stopping async validation")
	wg.Wait()
	Logf("Master upgrade complete")
}
Exemple #21
0
func newEtcdClient(etcdServer string) (*etcd.Client, error) {
	var (
		client *etcd.Client
		err    error
	)
	for attempt := 1; attempt <= maxConnectAttempts; attempt++ {
		if _, err = tools.GetEtcdVersion(etcdServer); err == nil {
			break
		}
		if attempt == maxConnectAttempts {
			break
		}
		glog.Infof("[Attempt: %d] Attempting access to etcd after 5 second sleep", attempt)
		time.Sleep(5 * time.Second)
	}
	if err != nil {
		return nil, fmt.Errorf("failed to connect to etcd server: %v, error: %v", etcdServer, err)
	}
	glog.Infof("Etcd server found: %v", etcdServer)

	// loop until we have > 0 machines && machines[0] != ""
	poll, timeout := 1*time.Second, 10*time.Second
	if err := wait.Poll(poll, timeout, func() (bool, error) {
		if client = etcd.NewClient([]string{etcdServer}); client == nil {
			return false, fmt.Errorf("etcd.NewClient returned nil")
		}
		client.SyncCluster()
		machines := client.GetCluster()
		if len(machines) == 0 || len(machines[0]) == 0 {
			return false, nil
		}
		return true, nil
	}); err != nil {
		return nil, fmt.Errorf("Timed out after %s waiting for at least 1 synchronized etcd server in the cluster. Error: %v", timeout, err)
	}
	return client, nil
}
// updateWithRetries updates applies the given rc as an update.
func updateWithRetries(rcClient client.ReplicationControllerInterface, rc *api.ReplicationController, applyUpdate updateFunc) (*api.ReplicationController, error) {
	// Each update could take ~100ms, so give it 0.5 second
	var err error
	oldRc := rc
	err = wait.Poll(10*time.Millisecond, 500*time.Millisecond, func() (bool, error) {
		// Apply the update, then attempt to push it to the apiserver.
		applyUpdate(rc)
		if rc, err = rcClient.Update(rc); err == nil {
			// rc contains the latest controller post update
			return true, nil
		}
		// Update the controller with the latest resource version, if the update failed we
		// can't trust rc so use oldRc.Name.
		if rc, err = rcClient.Get(oldRc.Name); err != nil {
			// The Get failed: Value in rc cannot be trusted.
			rc = oldRc
		}
		// The Get passed: rc contains the latest controller, expect a poll for the update.
		return false, nil
	})
	// If the error is non-nil the returned controller cannot be trusted, if it is nil, the returned
	// controller contains the applied update.
	return rc, err
}
Exemple #23
0
// waitForNPods tries to list pods using c until it finds expect of them,
// returning their names if it can do so before timeout.
func waitForNPods(ps *podStore, expect int, timeout time.Duration) ([]string, error) {
	// Loop until we find expect pods or timeout is passed.
	var pods []*api.Pod
	var errLast error
	found := wait.Poll(poll, timeout, func() (bool, error) {
		pods = ps.List()
		if len(pods) != expect {
			errLast = fmt.Errorf("expected to find %d pods but found only %d", expect, len(pods))
			Logf("Error getting pods: %v", errLast)
			return false, nil
		}
		return true, nil
	}) == nil
	// Extract the names of all found pods.
	podNames := make([]string, len(pods))
	for i, p := range pods {
		podNames[i] = p.ObjectMeta.Name
	}
	if !found {
		return podNames, fmt.Errorf("couldn't find %d pods within %v; last error: %v",
			expect, timeout, errLast)
	}
	return podNames, nil
}
	BeforeEach(func() {
		framework.beforeEach()
		podClient = framework.Client.Pods(framework.Namespace.Name)
	})

	AfterEach(framework.afterEach)

	It("with readiness probe should not be ready before initial delay and never restart", func() {
		p, err := podClient.Create(makePodSpec(probe.withInitialDelay().build(), nil))
		expectNoError(err)
		startTime := time.Now()

		expectNoError(wait.Poll(poll, 90*time.Second, func() (bool, error) {
			p, err := podClient.Get(p.Name)
			if err != nil {
				return false, err
			}
			return api.IsPodReady(p), nil
		}))

		if time.Since(startTime) < 30*time.Second {
			Failf("Pod became ready before it's initial delay")
		}

		p, err = podClient.Get(p.Name)
		expectNoError(err)

		isReady, err := podRunningReady(p)
		expectNoError(err)
		Expect(isReady).To(BeTrue())
func TestServiceAccountTokenAutoCreate(t *testing.T) {
	c, _, stopFunc := startServiceAccountTestServer(t)
	defer stopFunc()

	ns := "test-service-account-token-creation"
	name := "my-service-account"

	// Create namespace
	_, err := c.Namespaces().Create(&api.Namespace{ObjectMeta: api.ObjectMeta{Name: ns}})
	if err != nil {
		t.Fatalf("could not create namespace: %v", err)
	}

	// Create service account
	serviceAccount, err := c.ServiceAccounts(ns).Create(&api.ServiceAccount{ObjectMeta: api.ObjectMeta{Name: name}})
	if err != nil {
		t.Fatalf("Service Account not created: %v", err)
	}

	// Get token
	token1Name, token1, err := getReferencedServiceAccountToken(c, ns, name, true)
	if err != nil {
		t.Fatal(err)
	}

	// Delete token
	err = c.Secrets(ns).Delete(token1Name)
	if err != nil {
		t.Fatalf("Could not delete token: %v", err)
	}

	// Get recreated token
	token2Name, token2, err := getReferencedServiceAccountToken(c, ns, name, true)
	if err != nil {
		t.Fatal(err)
	}
	if token1Name == token2Name {
		t.Fatalf("Expected new auto-created token name")
	}
	if token1 == token2 {
		t.Fatalf("Expected new auto-created token value")
	}

	// Trigger creation of a new referenced token
	serviceAccount, err = c.ServiceAccounts(ns).Get(name)
	if err != nil {
		t.Fatal(err)
	}
	serviceAccount.Secrets = []api.ObjectReference{}
	_, err = c.ServiceAccounts(ns).Update(serviceAccount)
	if err != nil {
		t.Fatal(err)
	}

	// Get rotated token
	token3Name, token3, err := getReferencedServiceAccountToken(c, ns, name, true)
	if err != nil {
		t.Fatal(err)
	}
	if token3Name == token2Name {
		t.Fatalf("Expected new auto-created token name")
	}
	if token3 == token2 {
		t.Fatalf("Expected new auto-created token value")
	}

	// Delete service account
	err = c.ServiceAccounts(ns).Delete(name)
	if err != nil {
		t.Fatal(err)
	}

	// Wait for tokens to be deleted
	tokensToCleanup := util.NewStringSet(token1Name, token2Name, token3Name)
	err = wait.Poll(time.Second, 10*time.Second, func() (bool, error) {
		// Get all secrets in the namespace
		secrets, err := c.Secrets(ns).List(labels.Everything(), fields.Everything())
		// Retrieval errors should fail
		if err != nil {
			return false, err
		}
		for _, s := range secrets.Items {
			if tokensToCleanup.Has(s.Name) {
				// Still waiting for tokens to be cleaned up
				return false, nil
			}
		}
		// All clean
		return true, nil
	})
	if err != nil {
		t.Fatalf("Error waiting for tokens to be deleted: %v", err)
	}
}
Exemple #26
0
			Failf("Failed to get pod: %v", err)
		}
		fmt.Printf("%+v\n", podWithUid)
		var events *api.EventList
		// Check for scheduler event about the pod.
		By("checking for scheduler event about the pod")
		expectNoError(wait.Poll(time.Second*2, time.Second*60, func() (bool, error) {
			events, err := c.Events(api.NamespaceDefault).List(
				labels.Everything(),
				fields.Set{
					"involvedObject.kind":      "Pod",
					"involvedObject.uid":       string(podWithUid.UID),
					"involvedObject.namespace": api.NamespaceDefault,
					"source":                   "scheduler",
				}.AsSelector(),
			)
			if err != nil {
				return false, err
			}
			if len(events.Items) > 0 {
				fmt.Println("Saw scheduler event for our pod.")
				return true, nil
			}
			return false, nil
		}))
		// Check for qinglet event about the pod.
		By("checking for qinglet event about the pod")
		expectNoError(wait.Poll(time.Second*2, time.Second*60, func() (bool, error) {
			events, err = c.Events(api.NamespaceDefault).List(
				labels.Everything(),
				fields.Set{
func DoTestUnschedulableNodes(t *testing.T, restClient *client.Client, nodeStore cache.Store) {
	goodCondition := api.NodeCondition{
		Type:              api.NodeReady,
		Status:            api.ConditionTrue,
		Reason:            fmt.Sprintf("schedulable condition"),
		LastHeartbeatTime: util.Time{time.Now()},
	}
	badCondition := api.NodeCondition{
		Type:              api.NodeReady,
		Status:            api.ConditionUnknown,
		Reason:            fmt.Sprintf("unschedulable condition"),
		LastHeartbeatTime: util.Time{time.Now()},
	}
	// Create a new schedulable node, since we're first going to apply
	// the unschedulable condition and verify that pods aren't scheduled.
	node := &api.Node{
		ObjectMeta: api.ObjectMeta{Name: "node-scheduling-test-node"},
		Spec:       api.NodeSpec{Unschedulable: false},
		Status: api.NodeStatus{
			Capacity: api.ResourceList{
				api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
			},
			Conditions: []api.NodeCondition{goodCondition},
		},
	}
	nodeKey, err := cache.MetaNamespaceKeyFunc(node)
	if err != nil {
		t.Fatalf("Couldn't retrieve key for node %v", node.Name)
	}

	// The test does the following for each nodeStateManager in this list:
	//	1. Create a new node
	//	2. Apply the makeUnSchedulable function
	//	3. Create a new pod
	//  4. Check that the pod doesn't get assigned to the node
	//  5. Apply the schedulable function
	//  6. Check that the pod *does* get assigned to the node
	//  7. Delete the pod and node.

	nodeModifications := []nodeStateManager{
		// Test node.Spec.Unschedulable=true/false
		{
			makeUnSchedulable: func(t *testing.T, n *api.Node, s cache.Store, c *client.Client) {
				n.Spec.Unschedulable = true
				if _, err := c.Nodes().Update(n); err != nil {
					t.Fatalf("Failed to update node with unschedulable=true: %v", err)
				}
				err = waitForReflection(s, nodeKey, func(node interface{}) bool {
					// An unschedulable node should get deleted from the store
					return node == nil
				})
				if err != nil {
					t.Fatalf("Failed to observe reflected update for setting unschedulable=true: %v", err)
				}
			},
			makeSchedulable: func(t *testing.T, n *api.Node, s cache.Store, c *client.Client) {
				n.Spec.Unschedulable = false
				if _, err := c.Nodes().Update(n); err != nil {
					t.Fatalf("Failed to update node with unschedulable=false: %v", err)
				}
				err = waitForReflection(s, nodeKey, func(node interface{}) bool {
					return node != nil && node.(*api.Node).Spec.Unschedulable == false
				})
				if err != nil {
					t.Fatalf("Failed to observe reflected update for setting unschedulable=false: %v", err)
				}
			},
		},
		// Test node.Status.Conditions=ConditionTrue/Unknown
		{
			makeUnSchedulable: func(t *testing.T, n *api.Node, s cache.Store, c *client.Client) {
				n.Status = api.NodeStatus{
					Capacity: api.ResourceList{
						api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
					},
					Conditions: []api.NodeCondition{badCondition},
				}
				if _, err = c.Nodes().UpdateStatus(n); err != nil {
					t.Fatalf("Failed to update node with bad status condition: %v", err)
				}
				err = waitForReflection(s, nodeKey, func(node interface{}) bool {
					return node != nil && node.(*api.Node).Status.Conditions[0].Status == api.ConditionUnknown
				})
				if err != nil {
					t.Fatalf("Failed to observe reflected update for status condition update: %v", err)
				}
			},
			makeSchedulable: func(t *testing.T, n *api.Node, s cache.Store, c *client.Client) {
				n.Status = api.NodeStatus{
					Capacity: api.ResourceList{
						api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
					},
					Conditions: []api.NodeCondition{goodCondition},
				}
				if _, err = c.Nodes().UpdateStatus(n); err != nil {
					t.Fatalf("Failed to update node with healthy status condition: %v", err)
				}
				waitForReflection(s, nodeKey, func(node interface{}) bool {
					return node != nil && node.(*api.Node).Status.Conditions[0].Status == api.ConditionTrue
				})
				if err != nil {
					t.Fatalf("Failed to observe reflected update for status condition update: %v", err)
				}
			},
		},
	}

	for i, mod := range nodeModifications {
		unSchedNode, err := restClient.Nodes().Create(node)
		if err != nil {
			t.Fatalf("Failed to create node: %v", err)
		}

		// Apply the unschedulable modification to the node, and wait for the reflection
		mod.makeUnSchedulable(t, unSchedNode, nodeStore, restClient)

		// Create the new pod, note that this needs to happen post unschedulable
		// modification or we have a race in the test.
		pod := &api.Pod{
			ObjectMeta: api.ObjectMeta{Name: "node-scheduling-test-pod"},
			Spec: api.PodSpec{
				Containers: []api.Container{{Name: "container", Image: "qingyuan/pause:go"}},
			},
		}
		myPod, err := restClient.Pods(api.NamespaceDefault).Create(pod)
		if err != nil {
			t.Fatalf("Failed to create pod: %v", err)
		}

		// There are no schedulable nodes - the pod shouldn't be scheduled.
		err = wait.Poll(time.Second, time.Second*10, podScheduled(restClient, myPod.Namespace, myPod.Name))
		if err == nil {
			t.Errorf("Pod scheduled successfully on unschedulable nodes")
		}
		if err != wait.ErrWaitTimeout {
			t.Errorf("Test %d: failed while trying to confirm the pod does not get scheduled on the node: %v", i, err)
		} else {
			t.Logf("Test %d: Pod did not get scheduled on an unschedulable node", i)
		}

		// Apply the schedulable modification to the node, and wait for the reflection
		schedNode, err := restClient.Nodes().Get(unSchedNode.Name)
		if err != nil {
			t.Fatalf("Failed to get node: %v", err)
		}
		mod.makeSchedulable(t, schedNode, nodeStore, restClient)

		// Wait until the pod is scheduled.
		err = wait.Poll(time.Second, time.Second*10, podScheduled(restClient, myPod.Namespace, myPod.Name))
		if err != nil {
			t.Errorf("Test %d: failed to schedule a pod: %v", i, err)
		} else {
			t.Logf("Test %d: Pod got scheduled on a schedulable node", i)
		}

		err = restClient.Pods(api.NamespaceDefault).Delete(myPod.Name, nil)
		if err != nil {
			t.Errorf("Failed to delete pod: %v", err)
		}
		err = restClient.Nodes().Delete(schedNode.Name)
		if err != nil {
			t.Errorf("Failed to delete node: %v", err)
		}
	}
}
Exemple #28
0
		By("verifying the pod is in qingyuan")
		pods, err := podClient.List(labels.SelectorFromSet(labels.Set(map[string]string{"time": value})), fields.Everything())
		Expect(len(pods.Items)).To(Equal(1))

		// Standard get, update retry loop
		expectNoError(wait.Poll(time.Millisecond*500, time.Second*30, func() (bool, error) {
			By("updating the pod")
			value = strconv.Itoa(time.Now().Nanosecond())
			if pod == nil { // on retries we need to re-get
				pod, err = podClient.Get(name)
				if err != nil {
					return false, fmt.Errorf("failed to get pod: %v", err)
				}
			}
			pod.Labels["time"] = value
			pod, err = podClient.Update(pod)
			if err == nil {
				Logf("Successfully updated pod")
				return true, nil
			}
			if errors.IsConflict(err) {
				Logf("Conflicting update to pod, re-get and re-update: %v", err)
				pod = nil // re-get it when we retry
				return false, nil
			}
			return false, fmt.Errorf("failed to update pod: %v", err)
		}))

		expectNoError(waitForPodRunning(c, pod.Name))

		By("verifying the updated pod is in qingyuan")
		pods, err = podClient.List(labels.SelectorFromSet(labels.Set(map[string]string{"time": value})), fields.Everything())
Exemple #29
0
// A basic test to check the deployment of an image using
// a replication controller. The image serves its hostname
// which is checked for each replica.
func ServeImageOrFail(c *client.Client, test string, image string) {
	ns := api.NamespaceDefault
	name := "my-hostname-" + test + "-" + string(util.NewUUID())
	replicas := 2

	// Create a replication controller for a service
	// that serves its hostname.
	// The source for the Docker containter qingyuan/serve_hostname is
	// in contrib/for-demos/serve_hostname
	By(fmt.Sprintf("Creating replication controller %s", name))
	controller, err := c.ReplicationControllers(ns).Create(&api.ReplicationController{
		ObjectMeta: api.ObjectMeta{
			Name: name,
		},
		Spec: api.ReplicationControllerSpec{
			Replicas: replicas,
			Selector: map[string]string{
				"name": name,
			},
			Template: &api.PodTemplateSpec{
				ObjectMeta: api.ObjectMeta{
					Labels: map[string]string{"name": name},
				},
				Spec: api.PodSpec{
					Containers: []api.Container{
						{
							Name:  name,
							Image: image,
							Ports: []api.ContainerPort{{ContainerPort: 9376}},
						},
					},
				},
			},
		},
	})
	Expect(err).NotTo(HaveOccurred())
	// Cleanup the replication controller when we are done.
	defer func() {
		// Resize the replication controller to zero to get rid of pods.
		By("Cleaning up the replication controller")
		rcReaper, err := qingctl.ReaperFor("ReplicationController", c)
		if err != nil {
			Logf("Failed to cleanup replication controller %v: %v.", controller.Name, err)
		}
		if _, err = rcReaper.Stop(ns, controller.Name, 0, nil); err != nil {
			Logf("Failed to stop replication controller %v: %v.", controller.Name, err)
		}
	}()

	// List the pods, making sure we observe all the replicas.
	listTimeout := time.Minute
	label := labels.SelectorFromSet(labels.Set(map[string]string{"name": name}))
	pods, err := c.Pods(ns).List(label, fields.Everything())
	Expect(err).NotTo(HaveOccurred())
	t := time.Now()
	for {
		Logf("Controller %s: Found %d pods out of %d", name, len(pods.Items), replicas)
		if len(pods.Items) == replicas {
			break
		}
		if time.Since(t) > listTimeout {
			Failf("Controller %s: Gave up waiting for %d pods to come up after seeing only %d pods after %v seconds",
				name, replicas, len(pods.Items), time.Since(t).Seconds())
		}
		time.Sleep(5 * time.Second)
		pods, err = c.Pods(ns).List(label, fields.Everything())
		Expect(err).NotTo(HaveOccurred())
	}

	By("Ensuring each pod is running")

	// Wait for the pods to enter the running state. Waiting loops until the pods
	// are running so non-running pods cause a timeout for this test.
	for _, pod := range pods.Items {
		err = waitForPodRunning(c, pod.Name)
		Expect(err).NotTo(HaveOccurred())
	}

	// Verify that something is listening.
	By("Trying to dial each unique pod")
	retryTimeout := 2 * time.Minute
	retryInterval := 5 * time.Second
	err = wait.Poll(retryInterval, retryTimeout, podResponseChecker{c, ns, label, name, true, pods}.checkAllResponses)
	if err != nil {
		Failf("Did not get expected responses within the timeout period of %.2f seconds.", retryTimeout.Seconds())
	}
}
	f := NewFramework("svcaccounts")

	It("should mount an API token into pods", func() {
		var tokenName string
		var tokenContent string

		// Standard get, update retry loop
		expectNoError(wait.Poll(time.Millisecond*500, time.Second*10, func() (bool, error) {
			By("getting the auto-created API token")
			tokenSelector := fields.SelectorFromSet(map[string]string{client.SecretType: string(api.SecretTypeServiceAccountToken)})
			secrets, err := f.Client.Secrets(f.Namespace.Name).List(labels.Everything(), tokenSelector)
			if err != nil {
				return false, err
			}
			if len(secrets.Items) == 0 {
				return false, nil
			}
			if len(secrets.Items) > 1 {
				return false, fmt.Errorf("Expected 1 token secret, got %d", len(secrets.Items))
			}
			tokenName = secrets.Items[0].Name
			tokenContent = string(secrets.Items[0].Data[api.ServiceAccountTokenKey])
			return true, nil
		}))

		pod := &api.Pod{
			ObjectMeta: api.ObjectMeta{
				Name: "pod-service-account-" + string(util.NewUUID()),
			},
			Spec: api.PodSpec{
				Containers: []api.Container{