func checkExistingRCRecovers(f Framework) { By("assert that the pre-existing replication controller recovers") podClient := f.Client.Pods(f.Namespace.Name) rcSelector := labels.Set{"name": "baz"}.AsSelector() By("deleting pods from existing replication controller") expectNoError(wait.Poll(time.Millisecond*500, time.Second*60, func() (bool, error) { pods, err := podClient.List(rcSelector, fields.Everything()) if err != nil { Logf("apiserver returned error, as expected before recovery: %v", err) return false, nil } if len(pods.Items) == 0 { return false, nil } for _, pod := range pods.Items { err = podClient.Delete(pod.Name, api.NewDeleteOptions(0)) Expect(err).NotTo(HaveOccurred()) } Logf("apiserver has recovered") return true, nil })) By("waiting for replication controller to recover") expectNoError(wait.Poll(time.Millisecond*500, time.Second*60, func() (bool, error) { pods, err := podClient.List(rcSelector, fields.Everything()) Expect(err).NotTo(HaveOccurred()) for _, pod := range pods.Items { if api.IsPodReady(&pod) { return true, nil } } return false, nil })) }
func runSchedulerNoPhantomPodsTest(client *client.Client) { pod := &api.Pod{ Spec: api.PodSpec{ Containers: []api.Container{ { Name: "c1", Image: "qingyuan/pause", Ports: []api.ContainerPort{ {ContainerPort: 1234, HostPort: 9999}, }, ImagePullPolicy: api.PullIfNotPresent, }, }, }, } // Assuming we only have two kublets, the third pod here won't schedule // if the scheduler doesn't correctly handle the delete for the second // pod. pod.ObjectMeta.Name = "phantom.foo" foo, err := client.Pods(api.NamespaceDefault).Create(pod) if err != nil { glog.Fatalf("Failed to create pod: %v, %v", pod, err) } if err := wait.Poll(time.Second, time.Second*30, podRunning(client, foo.Namespace, foo.Name)); err != nil { glog.Fatalf("FAILED: pod never started running %v", err) } pod.ObjectMeta.Name = "phantom.bar" bar, err := client.Pods(api.NamespaceDefault).Create(pod) if err != nil { glog.Fatalf("Failed to create pod: %v, %v", pod, err) } if err := wait.Poll(time.Second, time.Second*30, podRunning(client, bar.Namespace, bar.Name)); err != nil { glog.Fatalf("FAILED: pod never started running %v", err) } // Delete a pod to free up room. glog.Infof("Deleting pod %v", bar.Name) err = client.Pods(api.NamespaceDefault).Delete(bar.Name, nil) if err != nil { glog.Fatalf("FAILED: couldn't delete pod %q: %v", bar.Name, err) } pod.ObjectMeta.Name = "phantom.baz" baz, err := client.Pods(api.NamespaceDefault).Create(pod) if err != nil { glog.Fatalf("Failed to create pod: %v, %v", pod, err) } if err := wait.Poll(time.Second, time.Second*60, podRunning(client, baz.Namespace, baz.Name)); err != nil { glog.Fatalf("FAILED: (Scheduler probably didn't process deletion of 'phantom.bar') Pod never started running: %v", err) } glog.Info("Scheduler doesn't make phantom pods: test passed.") }
func testNotReachable(ip string, port int) { url := fmt.Sprintf("http://%s:%d", ip, port) if ip == "" { Failf("Got empty IP for non-reachability check (%s)", url) } if port == 0 { Failf("Got port==0 for non-reachability check (%s)", url) } By(fmt.Sprintf("Waiting up to %v for %s to be *not* reachable", podStartTimeout, url)) expectNoError(wait.Poll(poll, podStartTimeout, func() (bool, error) { resp, err := httpGetNoConnectionPool(url) if err != nil { Logf("Successfully waited for the url %s to be unreachable.", url) return true, nil } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { Logf("Expecting %s to be unreachable but was reachable and got an error reading response: %v", url, err) return false, nil } Logf("Able to reach service %s when should no longer have been reachable, status:%d and body: %s", url, resp.Status, string(body)) return false, nil })) }
func testReachable(ip string, port int) { url := fmt.Sprintf("http://%s:%d", ip, port) if ip == "" { Failf("Got empty IP for reachability check (%s)", url) } if port == 0 { Failf("Got port==0 for reachability check (%s)", url) } By(fmt.Sprintf("Waiting up to %v for %s to be reachable", podStartTimeout, url)) start := time.Now() expectNoError(wait.Poll(poll, podStartTimeout, func() (bool, error) { resp, err := httpGetNoConnectionPool(url) if err != nil { Logf("Got error waiting for reachability of %s: %v (%v)", url, err, time.Since(start)) return false, nil } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { Logf("Got error reading response from %s: %v", url, err) return false, nil } if resp.StatusCode != 200 { return false, fmt.Errorf("received non-success return status %q trying to access %s; got body: %s", resp.Status, url, string(body)) } if !strings.Contains(string(body), "test-webserver") { return false, fmt.Errorf("received response body without expected substring 'test-webserver': %s", string(body)) } Logf("Successfully reached %v", url) return true, nil })) }
// migTemlate (GCE/GKE-only) returns the name of the MIG template that the // nodes of the cluster use. func migTemplate() (string, error) { var errLast error var templ string key := "instanceTemplate" // TODO(mbforbes): Refactor this to use cluster_upgrade.go:retryCmd(...) if wait.Poll(poll, singleCallTimeout, func() (bool, error) { // TODO(mbforbes): make this hit the compute API directly instead of // shelling out to gcloud. o, err := exec.Command("gcloud", "preview", "managed-instance-groups", fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID), fmt.Sprintf("--zone=%s", testContext.CloudConfig.Zone), "describe", testContext.CloudConfig.NodeInstanceGroup).CombinedOutput() if err != nil { errLast = fmt.Errorf("gcloud preview managed-instance-groups describe call failed with err: %v", err) return false, nil } output := string(o) // The 'describe' call probably succeeded; parse the output and try to // find the line that looks like "instanceTemplate: url/to/<templ>" and // return <templ>. if val := parseKVLines(output, key); len(val) > 0 { url := strings.Split(val, "/") templ = url[len(url)-1] Logf("MIG group %s using template: %s", testContext.CloudConfig.NodeInstanceGroup, templ) return true, nil } errLast = fmt.Errorf("couldn't find %s in output to get MIG template. Output: %s", key, output) return false, nil }) != nil { return "", fmt.Errorf("migTemplate() failed with last error: %v", errLast) } return templ, nil }
// migRollingUpdatePoll (CKE/GKE-only) polls the progress of the MIG rolling // update with ID id until it is complete. It returns an error if this takes // longer than nt times the number of nodes. func migRollingUpdatePoll(id string, nt time.Duration) error { // Two keys and a val. status, progress, done := "status", "statusMessage", "ROLLED_OUT" start, timeout := time.Now(), nt*time.Duration(testContext.CloudConfig.NumNodes) var errLast error Logf("Waiting up to %v for MIG rolling update to complete.", timeout) // TODO(mbforbes): Refactor this to use cluster_upgrade.go:retryCmd(...) if wait.Poll(restartPoll, timeout, func() (bool, error) { o, err := exec.Command("gcloud", "preview", "rolling-updates", fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID), fmt.Sprintf("--zone=%s", testContext.CloudConfig.Zone), "describe", id).CombinedOutput() if err != nil { errLast = fmt.Errorf("Error calling rolling-updates describe %s: %v", id, err) Logf("%v", errLast) return false, nil } output := string(o) // The 'describe' call probably succeeded; parse the output and try to // find the line that looks like "status: <status>" and see whether it's // done. Logf("Waiting for MIG rolling update: %s (%v elapsed)", parseKVLines(output, progress), time.Since(start)) if st := parseKVLines(output, status); st == done { return true, nil } return false, nil }) != nil { return fmt.Errorf("timeout waiting %v for MIG rolling update to complete. Last error: %v", timeout, errLast) } Logf("MIG rolling update complete after %v", time.Since(start)) return nil }
func extinguish(c *client.Client, totalNS int, maxAllowedAfterDel int, maxSeconds int) { var err error for n := 0; n < totalNS; n += 1 { _, err = createTestingNS(fmt.Sprintf("nslifetest-%v", n), c) Expect(err).NotTo(HaveOccurred()) } //Wait 10 seconds, then SEND delete requests for all the namespaces. time.Sleep(time.Duration(10 * time.Second)) nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything()) Expect(err).NotTo(HaveOccurred()) for _, item := range nsList.Items { if strings.Contains(item.Name, "nslifetest") { if err := c.Namespaces().Delete(item.Name); err != nil { Failf("Failed deleting error ::: --- %v ", err) } } Logf("namespace : %v api call to delete is complete ", item) } //Now POLL until all namespaces have been eradicated. expectNoError(wait.Poll(2*time.Second, time.Duration(maxSeconds)*time.Second, func() (bool, error) { if rem, err := countRemaining(c, "nslifetest"); err != nil || rem > maxAllowedAfterDel { Logf("Remaining namespaces : %v", rem) return false, err } else { return true, nil } })) }
func assertFilesExist(fileNames []string, fileDir string, pod *api.Pod, client *client.Client) { var failed []string expectNoError(wait.Poll(time.Second*2, time.Second*60, func() (bool, error) { failed = []string{} for _, fileName := range fileNames { if _, err := client.Get(). Prefix("proxy"). Resource("pods"). Namespace(pod.Namespace). Name(pod.Name). Suffix(fileDir, fileName). Do().Raw(); err != nil { Logf("Unable to read %s from pod %s: %v", fileName, pod.Name, err) failed = append(failed, fileName) } } if len(failed) == 0 { return true, nil } Logf("Lookups using %s failed for: %v\n", pod.Name, failed) return false, nil })) Expect(len(failed)).To(Equal(0)) }
// Wait till the passFunc confirms that the object it expects to see is in the store. // Used to observe reflected events. func waitForReflection(s cache.Store, key string, passFunc func(n interface{}) bool) error { return wait.Poll(time.Millisecond*10, time.Second*20, func() (bool, error) { if n, _, err := s.GetByKey(key); err == nil && passFunc(n) { return true, nil } return false, nil }) }
func (r *RollingUpdater) updateAndWait(rc *api.ReplicationController, interval, timeout time.Duration) (*api.ReplicationController, error) { rc, err := r.c.UpdateReplicationController(r.ns, rc) if err != nil { return nil, err } if err = wait.Poll(interval, timeout, r.c.ControllerHasDesiredReplicas(rc)); err != nil { return nil, err } return r.c.GetReplicationController(r.ns, rc.ObjectMeta.Name) }
// checkNodesReady waits up to nt for expect nodes accessed by c to be ready, // returning an error if this doesn't happen in time. It returns the names of // nodes it finds. func checkNodesReady(c *client.Client, nt time.Duration, expect int) ([]string, error) { // First, keep getting all of the nodes until we get the number we expect. var nodeList *api.NodeList var errLast error start := time.Now() found := wait.Poll(poll, nt, func() (bool, error) { // Even though listNodes(...) has its own retries, a rolling-update // (GCE/GKE implementation of restart) can complete before the apiserver // knows about all of the nodes. Thus, we retry the list nodes call // until we get the expected number of nodes. nodeList, errLast = listNodes(c, labels.Everything(), fields.Everything()) if errLast != nil { return false, nil } if len(nodeList.Items) != expect { errLast = fmt.Errorf("expected to find %d nodes but found only %d (%v elapsed)", expect, len(nodeList.Items), time.Since(start)) Logf("%v", errLast) return false, nil } return true, nil }) == nil nodeNames := make([]string, len(nodeList.Items)) for i, n := range nodeList.Items { nodeNames[i] = n.ObjectMeta.Name } if !found { return nodeNames, fmt.Errorf("couldn't find %d nodes within %v; last error: %v", expect, nt, errLast) } Logf("Successfully found %d nodes", expect) // Next, ensure in parallel that all the nodes are ready. We subtract the // time we spent waiting above. timeout := nt - time.Since(start) result := make(chan bool, len(nodeList.Items)) for _, n := range nodeNames { n := n go func() { result <- waitForNodeToBeReady(c, n, timeout) }() } failed := false // TODO(mbforbes): Change to `for range` syntax once we support only Go // >= 1.4. for i := range nodeList.Items { _ = i if !<-result { failed = true } } if failed { return nodeNames, fmt.Errorf("at least one node failed to be ready") } return nodeNames, nil }
func getReferencedServiceAccountToken(c *client.Client, ns string, name string, shouldWait bool) (string, string, error) { tokenName := "" token := "" findToken := func() (bool, error) { user, err := c.ServiceAccounts(ns).Get(name) if errors.IsNotFound(err) { return false, nil } if err != nil { return false, err } for _, ref := range user.Secrets { secret, err := c.Secrets(ns).Get(ref.Name) if errors.IsNotFound(err) { continue } if err != nil { return false, err } if secret.Type != api.SecretTypeServiceAccountToken { continue } name := secret.Annotations[api.ServiceAccountNameKey] uid := secret.Annotations[api.ServiceAccountUIDKey] tokenData := secret.Data[api.ServiceAccountTokenKey] if name == user.Name && uid == string(user.UID) && len(tokenData) > 0 { tokenName = secret.Name token = string(tokenData) return true, nil } } return false, nil } if shouldWait { err := wait.Poll(time.Second, 10*time.Second, findToken) if err != nil { return "", "", err } } else { ok, err := findToken() if err != nil { return "", "", err } if !ok { return "", "", fmt.Errorf("No token found for %s/%s", ns, name) } } return tokenName, token, nil }
// retryCmd runs cmd using args and retries it for up to singleCallTimeout if // it returns an error. It returns stdout and stderr. func retryCmd(command string, args ...string) (string, string, error) { var err error stdout, stderr := "", "" wait.Poll(poll, singleCallTimeout, func() (bool, error) { stdout, stderr, err = runCmd(command, args...) if err != nil { Logf("Got %v", err) return false, nil } return true, nil }) return stdout, stderr, err }
// migRollingUpdateStart (GCE/GKE-only) starts a MIG rolling update using templ // as the new template, waiting up to nt per node, and returns the ID of that // update. func migRollingUpdateStart(templ string, nt time.Duration) (string, error) { var errLast error var id string prefix, suffix := "Started [", "]." // TODO(mbforbes): Refactor this to use cluster_upgrade.go:retryCmd(...) if err := wait.Poll(poll, singleCallTimeout, func() (bool, error) { // TODO(mbforbes): make this hit the compute API directly instead of // shelling out to gcloud. // NOTE(mbforbes): If you are changing this gcloud command, update // cluster/gce/upgrade.sh to match this EXACTLY. o, err := exec.Command("gcloud", "preview", "rolling-updates", fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID), fmt.Sprintf("--zone=%s", testContext.CloudConfig.Zone), "start", // Required args. fmt.Sprintf("--group=%s", testContext.CloudConfig.NodeInstanceGroup), fmt.Sprintf("--template=%s", templ), // Optional args to fine-tune behavior. fmt.Sprintf("--instance-startup-timeout=%ds", int(nt.Seconds())), // NOTE: We can speed up this process by increasing // --max-num-concurrent-instances. fmt.Sprintf("--max-num-concurrent-instances=%d", 1), fmt.Sprintf("--max-num-failed-instances=%d", 0), fmt.Sprintf("--min-instance-update-time=%ds", 0)).CombinedOutput() if err != nil { errLast = fmt.Errorf("gcloud preview rolling-updates call failed with err: %v", err) return false, nil } output := string(o) // The 'start' call probably succeeded; parse the output and try to find // the line that looks like "Started [url/to/<id>]." and return <id>. for _, line := range strings.Split(output, "\n") { // As a sanity check, ensure the line starts with prefix and ends // with suffix. if strings.Index(line, prefix) != 0 || strings.Index(line, suffix) != len(line)-len(suffix) { continue } url := strings.Split(strings.TrimSuffix(strings.TrimPrefix(line, prefix), suffix), "/") id = url[len(url)-1] Logf("Started MIG rolling update; ID: %s", id) return true, nil } errLast = fmt.Errorf("couldn't find line like '%s ... %s' in output to MIG rolling-update start. Output: %s", prefix, suffix, output) return false, nil }); err != nil { return "", fmt.Errorf("migRollingUpdateStart() failed with last error: %v", errLast) } return id, nil }
func runReplicationControllerTest(c *client.Client) { clientAPIVersion := c.APIVersion() data, err := ioutil.ReadFile("cmd/integration/" + clientAPIVersion + "-controller.json") if err != nil { glog.Fatalf("Unexpected error: %v", err) } var controller api.ReplicationController if err := api.Scheme.DecodeInto(data, &controller); err != nil { glog.Fatalf("Unexpected error: %v", err) } glog.Infof("Creating replication controllers") updated, err := c.ReplicationControllers("test").Create(&controller) if err != nil { glog.Fatalf("Unexpected error: %v", err) } glog.Infof("Done creating replication controllers") // In practice the controller doesn't need 60s to create a handful of pods, but network latencies on CI // systems have been observed to vary unpredictably, so give the controller enough time to create pods. // Our e2e scalability tests will catch controllers that are *actually* slow. if err := wait.Poll(time.Second, time.Second*60, client.ControllerHasDesiredReplicas(c, updated)); err != nil { glog.Fatalf("FAILED: pods never created %v", err) } // Poll till we can retrieve the status of all pods matching the given label selector from their minions. // This involves 3 operations: // - The scheduler must assign all pods to a minion // - The assignment must reflect in a `List` operation against the apiserver, for labels matching the selector // - We need to be able to query the qinglet on that minion for information about the pod if err := wait.Poll( time.Second, time.Second*30, podsOnMinions(c, "test", labels.Set(updated.Spec.Selector).AsSelector())); err != nil { glog.Fatalf("FAILED: pods never started running %v", err) } glog.Infof("Pods created") }
func startManagerAndWait(manager *ReplicationManager, pods int, t *testing.T) chan struct{} { stopCh := make(chan struct{}) go manager.Run(1, stopCh) err := wait.Poll(10*time.Millisecond, 100*time.Millisecond, func() (bool, error) { podList, err := manager.podStore.List(labels.Everything()) if err != nil { return false, err } return len(podList) == pods, nil }) if err != nil { t.Errorf("Failed to observe %d pods in 100ms", pods) } return stopCh }
func (gce *GCECloud) AddSSHKeyToAllInstances(user string, keyData []byte) error { return wait.Poll(2*time.Second, 30*time.Second, func() (bool, error) { project, err := gce.service.Projects.Get(gce.projectID).Do() if err != nil { glog.Errorf("Could not get project: %v", err) return false, nil } keyString := fmt.Sprintf("%s:%s %s@%s", user, strings.TrimSpace(string(keyData)), user, user) found := false for _, item := range project.CommonInstanceMetadata.Items { if item.Key == "sshKeys" { if strings.Contains(item.Value, keyString) { // We've already added the key glog.Info("SSHKey already in project metadata") return true, nil } item.Value = item.Value + "\n" + keyString found = true break } } if !found { // This is super unlikely, so log. glog.Infof("Failed to find sshKeys metadata, creating a new item") project.CommonInstanceMetadata.Items = append(project.CommonInstanceMetadata.Items, &compute.MetadataItems{ Key: "sshKeys", Value: keyString, }) } op, err := gce.service.Projects.SetCommonInstanceMetadata(gce.projectID, project.CommonInstanceMetadata).Do() if err != nil { glog.Errorf("Could not Set Metadata: %v", err) return false, nil } if err := gce.waitForGlobalOp(op); err != nil { glog.Errorf("Could not Set Metadata: %v", err) return false, nil } glog.Infof("Successfully added sshKey to project metadata") return true, nil }) }
func getServiceAccount(c *client.Client, ns string, name string, shouldWait bool) (*api.ServiceAccount, error) { if !shouldWait { return c.ServiceAccounts(ns).Get(name) } var user *api.ServiceAccount var err error err = wait.Poll(time.Second, 10*time.Second, func() (bool, error) { user, err = c.ServiceAccounts(ns).Get(name) if errors.IsNotFound(err) { return false, nil } if err != nil { return false, err } return true, nil }) return user, err }
// realVersion turns a version constant--one accepted by cluster/gce/upgrade.sh-- // into a deployable version string. // // NOTE: KEEP THIS LIST UP-TO-DATE WITH THE CODE BELOW. // The version strings supported are: // - "latest_stable" (returns a string like "0.18.2") // - "latest_release" (returns a string like "0.19.1") // - "latest_ci" (returns a string like "0.19.1-669-gabac8c8") func realVersion(s string) (string, error) { bucket, file := "", "" switch s { // NOTE: IF YOU CHANGE THE FOLLOWING LIST, ALSO UPDATE cluster/gce/upgrade.sh case "latest_stable": bucket, file = "release", "stable" case "latest_release": bucket, file = "release", "latest" case "latest_ci": bucket, file = "ci", "latest" default: return "", fmt.Errorf("version %s is not supported", s) } url := fmt.Sprintf(versionURLFmt, bucket, file) var v string Logf("Fetching version from %s", url) c := &http.Client{Timeout: 2 * time.Second} if err := wait.Poll(poll, singleCallTimeout, func() (bool, error) { r, err := c.Get(url) if err != nil { Logf("Error reaching %s: %v", url, err) return false, nil } if r.StatusCode != http.StatusOK { Logf("Bad response; status: %d, response: %v", r.StatusCode, r) return false, nil } defer r.Body.Close() b, err := ioutil.ReadAll(r.Body) if err != nil { Logf("Could not read response body: %v", err) return false, nil } v = strings.TrimSpace(string(b)) return true, nil }); err != nil { return "", fmt.Errorf("failed to fetch real version from %s", url) } // Versions start with "v", so remove that. return strings.TrimPrefix(v, "v"), nil }
func testMasterUpgrade(ip, v string, mUp func(v string) error) { Logf("Starting async validation") httpClient := http.Client{Timeout: 2 * time.Second} done := make(chan struct{}, 1) // Let's make sure we've finished the heartbeat before shutting things down. var wg sync.WaitGroup go util.Until(func() { defer GinkgoRecover() wg.Add(1) defer wg.Done() if err := wait.Poll(poll, singleCallTimeout, func() (bool, error) { r, err := httpClient.Get("http://" + ip) if err != nil { Logf("Error reaching %s: %v", ip, err) return false, nil } if r.StatusCode < http.StatusOK || r.StatusCode >= http.StatusNotFound { Logf("Bad response; status: %d, response: %v", r.StatusCode, r) return false, nil } return true, nil }); err != nil { // We log the error here because the test will fail at the very end // because this validation runs in another goroutine. Without this, // a failure is very confusing to track down because from the logs // everything looks fine. msg := fmt.Sprintf("Failed to contact service during master upgrade: %v", err) Logf(msg) Failf(msg) } }, 200*time.Millisecond, done) Logf("Starting master upgrade") expectNoError(mUp(v)) done <- struct{}{} Logf("Stopping async validation") wg.Wait() Logf("Master upgrade complete") }
func newEtcdClient(etcdServer string) (*etcd.Client, error) { var ( client *etcd.Client err error ) for attempt := 1; attempt <= maxConnectAttempts; attempt++ { if _, err = tools.GetEtcdVersion(etcdServer); err == nil { break } if attempt == maxConnectAttempts { break } glog.Infof("[Attempt: %d] Attempting access to etcd after 5 second sleep", attempt) time.Sleep(5 * time.Second) } if err != nil { return nil, fmt.Errorf("failed to connect to etcd server: %v, error: %v", etcdServer, err) } glog.Infof("Etcd server found: %v", etcdServer) // loop until we have > 0 machines && machines[0] != "" poll, timeout := 1*time.Second, 10*time.Second if err := wait.Poll(poll, timeout, func() (bool, error) { if client = etcd.NewClient([]string{etcdServer}); client == nil { return false, fmt.Errorf("etcd.NewClient returned nil") } client.SyncCluster() machines := client.GetCluster() if len(machines) == 0 || len(machines[0]) == 0 { return false, nil } return true, nil }); err != nil { return nil, fmt.Errorf("Timed out after %s waiting for at least 1 synchronized etcd server in the cluster. Error: %v", timeout, err) } return client, nil }
// updateWithRetries updates applies the given rc as an update. func updateWithRetries(rcClient client.ReplicationControllerInterface, rc *api.ReplicationController, applyUpdate updateFunc) (*api.ReplicationController, error) { // Each update could take ~100ms, so give it 0.5 second var err error oldRc := rc err = wait.Poll(10*time.Millisecond, 500*time.Millisecond, func() (bool, error) { // Apply the update, then attempt to push it to the apiserver. applyUpdate(rc) if rc, err = rcClient.Update(rc); err == nil { // rc contains the latest controller post update return true, nil } // Update the controller with the latest resource version, if the update failed we // can't trust rc so use oldRc.Name. if rc, err = rcClient.Get(oldRc.Name); err != nil { // The Get failed: Value in rc cannot be trusted. rc = oldRc } // The Get passed: rc contains the latest controller, expect a poll for the update. return false, nil }) // If the error is non-nil the returned controller cannot be trusted, if it is nil, the returned // controller contains the applied update. return rc, err }
// waitForNPods tries to list pods using c until it finds expect of them, // returning their names if it can do so before timeout. func waitForNPods(ps *podStore, expect int, timeout time.Duration) ([]string, error) { // Loop until we find expect pods or timeout is passed. var pods []*api.Pod var errLast error found := wait.Poll(poll, timeout, func() (bool, error) { pods = ps.List() if len(pods) != expect { errLast = fmt.Errorf("expected to find %d pods but found only %d", expect, len(pods)) Logf("Error getting pods: %v", errLast) return false, nil } return true, nil }) == nil // Extract the names of all found pods. podNames := make([]string, len(pods)) for i, p := range pods { podNames[i] = p.ObjectMeta.Name } if !found { return podNames, fmt.Errorf("couldn't find %d pods within %v; last error: %v", expect, timeout, errLast) } return podNames, nil }
BeforeEach(func() { framework.beforeEach() podClient = framework.Client.Pods(framework.Namespace.Name) }) AfterEach(framework.afterEach) It("with readiness probe should not be ready before initial delay and never restart", func() { p, err := podClient.Create(makePodSpec(probe.withInitialDelay().build(), nil)) expectNoError(err) startTime := time.Now() expectNoError(wait.Poll(poll, 90*time.Second, func() (bool, error) { p, err := podClient.Get(p.Name) if err != nil { return false, err } return api.IsPodReady(p), nil })) if time.Since(startTime) < 30*time.Second { Failf("Pod became ready before it's initial delay") } p, err = podClient.Get(p.Name) expectNoError(err) isReady, err := podRunningReady(p) expectNoError(err) Expect(isReady).To(BeTrue())
func TestServiceAccountTokenAutoCreate(t *testing.T) { c, _, stopFunc := startServiceAccountTestServer(t) defer stopFunc() ns := "test-service-account-token-creation" name := "my-service-account" // Create namespace _, err := c.Namespaces().Create(&api.Namespace{ObjectMeta: api.ObjectMeta{Name: ns}}) if err != nil { t.Fatalf("could not create namespace: %v", err) } // Create service account serviceAccount, err := c.ServiceAccounts(ns).Create(&api.ServiceAccount{ObjectMeta: api.ObjectMeta{Name: name}}) if err != nil { t.Fatalf("Service Account not created: %v", err) } // Get token token1Name, token1, err := getReferencedServiceAccountToken(c, ns, name, true) if err != nil { t.Fatal(err) } // Delete token err = c.Secrets(ns).Delete(token1Name) if err != nil { t.Fatalf("Could not delete token: %v", err) } // Get recreated token token2Name, token2, err := getReferencedServiceAccountToken(c, ns, name, true) if err != nil { t.Fatal(err) } if token1Name == token2Name { t.Fatalf("Expected new auto-created token name") } if token1 == token2 { t.Fatalf("Expected new auto-created token value") } // Trigger creation of a new referenced token serviceAccount, err = c.ServiceAccounts(ns).Get(name) if err != nil { t.Fatal(err) } serviceAccount.Secrets = []api.ObjectReference{} _, err = c.ServiceAccounts(ns).Update(serviceAccount) if err != nil { t.Fatal(err) } // Get rotated token token3Name, token3, err := getReferencedServiceAccountToken(c, ns, name, true) if err != nil { t.Fatal(err) } if token3Name == token2Name { t.Fatalf("Expected new auto-created token name") } if token3 == token2 { t.Fatalf("Expected new auto-created token value") } // Delete service account err = c.ServiceAccounts(ns).Delete(name) if err != nil { t.Fatal(err) } // Wait for tokens to be deleted tokensToCleanup := util.NewStringSet(token1Name, token2Name, token3Name) err = wait.Poll(time.Second, 10*time.Second, func() (bool, error) { // Get all secrets in the namespace secrets, err := c.Secrets(ns).List(labels.Everything(), fields.Everything()) // Retrieval errors should fail if err != nil { return false, err } for _, s := range secrets.Items { if tokensToCleanup.Has(s.Name) { // Still waiting for tokens to be cleaned up return false, nil } } // All clean return true, nil }) if err != nil { t.Fatalf("Error waiting for tokens to be deleted: %v", err) } }
Failf("Failed to get pod: %v", err) } fmt.Printf("%+v\n", podWithUid) var events *api.EventList // Check for scheduler event about the pod. By("checking for scheduler event about the pod") expectNoError(wait.Poll(time.Second*2, time.Second*60, func() (bool, error) { events, err := c.Events(api.NamespaceDefault).List( labels.Everything(), fields.Set{ "involvedObject.kind": "Pod", "involvedObject.uid": string(podWithUid.UID), "involvedObject.namespace": api.NamespaceDefault, "source": "scheduler", }.AsSelector(), ) if err != nil { return false, err } if len(events.Items) > 0 { fmt.Println("Saw scheduler event for our pod.") return true, nil } return false, nil })) // Check for qinglet event about the pod. By("checking for qinglet event about the pod") expectNoError(wait.Poll(time.Second*2, time.Second*60, func() (bool, error) { events, err = c.Events(api.NamespaceDefault).List( labels.Everything(), fields.Set{
func DoTestUnschedulableNodes(t *testing.T, restClient *client.Client, nodeStore cache.Store) { goodCondition := api.NodeCondition{ Type: api.NodeReady, Status: api.ConditionTrue, Reason: fmt.Sprintf("schedulable condition"), LastHeartbeatTime: util.Time{time.Now()}, } badCondition := api.NodeCondition{ Type: api.NodeReady, Status: api.ConditionUnknown, Reason: fmt.Sprintf("unschedulable condition"), LastHeartbeatTime: util.Time{time.Now()}, } // Create a new schedulable node, since we're first going to apply // the unschedulable condition and verify that pods aren't scheduled. node := &api.Node{ ObjectMeta: api.ObjectMeta{Name: "node-scheduling-test-node"}, Spec: api.NodeSpec{Unschedulable: false}, Status: api.NodeStatus{ Capacity: api.ResourceList{ api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI), }, Conditions: []api.NodeCondition{goodCondition}, }, } nodeKey, err := cache.MetaNamespaceKeyFunc(node) if err != nil { t.Fatalf("Couldn't retrieve key for node %v", node.Name) } // The test does the following for each nodeStateManager in this list: // 1. Create a new node // 2. Apply the makeUnSchedulable function // 3. Create a new pod // 4. Check that the pod doesn't get assigned to the node // 5. Apply the schedulable function // 6. Check that the pod *does* get assigned to the node // 7. Delete the pod and node. nodeModifications := []nodeStateManager{ // Test node.Spec.Unschedulable=true/false { makeUnSchedulable: func(t *testing.T, n *api.Node, s cache.Store, c *client.Client) { n.Spec.Unschedulable = true if _, err := c.Nodes().Update(n); err != nil { t.Fatalf("Failed to update node with unschedulable=true: %v", err) } err = waitForReflection(s, nodeKey, func(node interface{}) bool { // An unschedulable node should get deleted from the store return node == nil }) if err != nil { t.Fatalf("Failed to observe reflected update for setting unschedulable=true: %v", err) } }, makeSchedulable: func(t *testing.T, n *api.Node, s cache.Store, c *client.Client) { n.Spec.Unschedulable = false if _, err := c.Nodes().Update(n); err != nil { t.Fatalf("Failed to update node with unschedulable=false: %v", err) } err = waitForReflection(s, nodeKey, func(node interface{}) bool { return node != nil && node.(*api.Node).Spec.Unschedulable == false }) if err != nil { t.Fatalf("Failed to observe reflected update for setting unschedulable=false: %v", err) } }, }, // Test node.Status.Conditions=ConditionTrue/Unknown { makeUnSchedulable: func(t *testing.T, n *api.Node, s cache.Store, c *client.Client) { n.Status = api.NodeStatus{ Capacity: api.ResourceList{ api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI), }, Conditions: []api.NodeCondition{badCondition}, } if _, err = c.Nodes().UpdateStatus(n); err != nil { t.Fatalf("Failed to update node with bad status condition: %v", err) } err = waitForReflection(s, nodeKey, func(node interface{}) bool { return node != nil && node.(*api.Node).Status.Conditions[0].Status == api.ConditionUnknown }) if err != nil { t.Fatalf("Failed to observe reflected update for status condition update: %v", err) } }, makeSchedulable: func(t *testing.T, n *api.Node, s cache.Store, c *client.Client) { n.Status = api.NodeStatus{ Capacity: api.ResourceList{ api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI), }, Conditions: []api.NodeCondition{goodCondition}, } if _, err = c.Nodes().UpdateStatus(n); err != nil { t.Fatalf("Failed to update node with healthy status condition: %v", err) } waitForReflection(s, nodeKey, func(node interface{}) bool { return node != nil && node.(*api.Node).Status.Conditions[0].Status == api.ConditionTrue }) if err != nil { t.Fatalf("Failed to observe reflected update for status condition update: %v", err) } }, }, } for i, mod := range nodeModifications { unSchedNode, err := restClient.Nodes().Create(node) if err != nil { t.Fatalf("Failed to create node: %v", err) } // Apply the unschedulable modification to the node, and wait for the reflection mod.makeUnSchedulable(t, unSchedNode, nodeStore, restClient) // Create the new pod, note that this needs to happen post unschedulable // modification or we have a race in the test. pod := &api.Pod{ ObjectMeta: api.ObjectMeta{Name: "node-scheduling-test-pod"}, Spec: api.PodSpec{ Containers: []api.Container{{Name: "container", Image: "qingyuan/pause:go"}}, }, } myPod, err := restClient.Pods(api.NamespaceDefault).Create(pod) if err != nil { t.Fatalf("Failed to create pod: %v", err) } // There are no schedulable nodes - the pod shouldn't be scheduled. err = wait.Poll(time.Second, time.Second*10, podScheduled(restClient, myPod.Namespace, myPod.Name)) if err == nil { t.Errorf("Pod scheduled successfully on unschedulable nodes") } if err != wait.ErrWaitTimeout { t.Errorf("Test %d: failed while trying to confirm the pod does not get scheduled on the node: %v", i, err) } else { t.Logf("Test %d: Pod did not get scheduled on an unschedulable node", i) } // Apply the schedulable modification to the node, and wait for the reflection schedNode, err := restClient.Nodes().Get(unSchedNode.Name) if err != nil { t.Fatalf("Failed to get node: %v", err) } mod.makeSchedulable(t, schedNode, nodeStore, restClient) // Wait until the pod is scheduled. err = wait.Poll(time.Second, time.Second*10, podScheduled(restClient, myPod.Namespace, myPod.Name)) if err != nil { t.Errorf("Test %d: failed to schedule a pod: %v", i, err) } else { t.Logf("Test %d: Pod got scheduled on a schedulable node", i) } err = restClient.Pods(api.NamespaceDefault).Delete(myPod.Name, nil) if err != nil { t.Errorf("Failed to delete pod: %v", err) } err = restClient.Nodes().Delete(schedNode.Name) if err != nil { t.Errorf("Failed to delete node: %v", err) } } }
By("verifying the pod is in qingyuan") pods, err := podClient.List(labels.SelectorFromSet(labels.Set(map[string]string{"time": value})), fields.Everything()) Expect(len(pods.Items)).To(Equal(1)) // Standard get, update retry loop expectNoError(wait.Poll(time.Millisecond*500, time.Second*30, func() (bool, error) { By("updating the pod") value = strconv.Itoa(time.Now().Nanosecond()) if pod == nil { // on retries we need to re-get pod, err = podClient.Get(name) if err != nil { return false, fmt.Errorf("failed to get pod: %v", err) } } pod.Labels["time"] = value pod, err = podClient.Update(pod) if err == nil { Logf("Successfully updated pod") return true, nil } if errors.IsConflict(err) { Logf("Conflicting update to pod, re-get and re-update: %v", err) pod = nil // re-get it when we retry return false, nil } return false, fmt.Errorf("failed to update pod: %v", err) })) expectNoError(waitForPodRunning(c, pod.Name)) By("verifying the updated pod is in qingyuan") pods, err = podClient.List(labels.SelectorFromSet(labels.Set(map[string]string{"time": value})), fields.Everything())
// A basic test to check the deployment of an image using // a replication controller. The image serves its hostname // which is checked for each replica. func ServeImageOrFail(c *client.Client, test string, image string) { ns := api.NamespaceDefault name := "my-hostname-" + test + "-" + string(util.NewUUID()) replicas := 2 // Create a replication controller for a service // that serves its hostname. // The source for the Docker containter qingyuan/serve_hostname is // in contrib/for-demos/serve_hostname By(fmt.Sprintf("Creating replication controller %s", name)) controller, err := c.ReplicationControllers(ns).Create(&api.ReplicationController{ ObjectMeta: api.ObjectMeta{ Name: name, }, Spec: api.ReplicationControllerSpec{ Replicas: replicas, Selector: map[string]string{ "name": name, }, Template: &api.PodTemplateSpec{ ObjectMeta: api.ObjectMeta{ Labels: map[string]string{"name": name}, }, Spec: api.PodSpec{ Containers: []api.Container{ { Name: name, Image: image, Ports: []api.ContainerPort{{ContainerPort: 9376}}, }, }, }, }, }, }) Expect(err).NotTo(HaveOccurred()) // Cleanup the replication controller when we are done. defer func() { // Resize the replication controller to zero to get rid of pods. By("Cleaning up the replication controller") rcReaper, err := qingctl.ReaperFor("ReplicationController", c) if err != nil { Logf("Failed to cleanup replication controller %v: %v.", controller.Name, err) } if _, err = rcReaper.Stop(ns, controller.Name, 0, nil); err != nil { Logf("Failed to stop replication controller %v: %v.", controller.Name, err) } }() // List the pods, making sure we observe all the replicas. listTimeout := time.Minute label := labels.SelectorFromSet(labels.Set(map[string]string{"name": name})) pods, err := c.Pods(ns).List(label, fields.Everything()) Expect(err).NotTo(HaveOccurred()) t := time.Now() for { Logf("Controller %s: Found %d pods out of %d", name, len(pods.Items), replicas) if len(pods.Items) == replicas { break } if time.Since(t) > listTimeout { Failf("Controller %s: Gave up waiting for %d pods to come up after seeing only %d pods after %v seconds", name, replicas, len(pods.Items), time.Since(t).Seconds()) } time.Sleep(5 * time.Second) pods, err = c.Pods(ns).List(label, fields.Everything()) Expect(err).NotTo(HaveOccurred()) } By("Ensuring each pod is running") // Wait for the pods to enter the running state. Waiting loops until the pods // are running so non-running pods cause a timeout for this test. for _, pod := range pods.Items { err = waitForPodRunning(c, pod.Name) Expect(err).NotTo(HaveOccurred()) } // Verify that something is listening. By("Trying to dial each unique pod") retryTimeout := 2 * time.Minute retryInterval := 5 * time.Second err = wait.Poll(retryInterval, retryTimeout, podResponseChecker{c, ns, label, name, true, pods}.checkAllResponses) if err != nil { Failf("Did not get expected responses within the timeout period of %.2f seconds.", retryTimeout.Seconds()) } }
f := NewFramework("svcaccounts") It("should mount an API token into pods", func() { var tokenName string var tokenContent string // Standard get, update retry loop expectNoError(wait.Poll(time.Millisecond*500, time.Second*10, func() (bool, error) { By("getting the auto-created API token") tokenSelector := fields.SelectorFromSet(map[string]string{client.SecretType: string(api.SecretTypeServiceAccountToken)}) secrets, err := f.Client.Secrets(f.Namespace.Name).List(labels.Everything(), tokenSelector) if err != nil { return false, err } if len(secrets.Items) == 0 { return false, nil } if len(secrets.Items) > 1 { return false, fmt.Errorf("Expected 1 token secret, got %d", len(secrets.Items)) } tokenName = secrets.Items[0].Name tokenContent = string(secrets.Items[0].Data[api.ServiceAccountTokenKey]) return true, nil })) pod := &api.Pod{ ObjectMeta: api.ObjectMeta{ Name: "pod-service-account-" + string(util.NewUUID()), }, Spec: api.PodSpec{ Containers: []api.Container{