func checkExistingRCRecovers(f Framework) { By("assert that the pre-existing replication controller recovers") podClient := f.Client.Pods(f.Namespace.Name) rcSelector := labels.Set{"name": "baz"}.AsSelector() By("deleting pods from existing replication controller") expectNoError(wait.Poll(time.Millisecond*500, time.Second*60, func() (bool, error) { pods, err := podClient.List(rcSelector, fields.Everything()) if err != nil { Logf("apiserver returned error, as expected before recovery: %v", err) return false, nil } if len(pods.Items) == 0 { return false, nil } for _, pod := range pods.Items { err = podClient.Delete(pod.Name, api.NewDeleteOptions(0)) Expect(err).NotTo(HaveOccurred()) } Logf("apiserver has recovered") return true, nil })) By("waiting for replication controller to recover") expectNoError(wait.Poll(time.Millisecond*500, time.Second*60, func() (bool, error) { pods, err := podClient.List(rcSelector, fields.Everything()) Expect(err).NotTo(HaveOccurred()) for _, pod := range pods.Items { if pod.DeletionTimestamp == nil && api.IsPodReady(&pod) { return true, nil } } return false, nil })) }
func runSchedulerNoPhantomPodsTest(client *client.Client) { pod := &api.Pod{ Spec: api.PodSpec{ Containers: []api.Container{ { Name: "c1", Image: "kubernetes/pause", Ports: []api.ContainerPort{ {ContainerPort: 1234, HostPort: 9999}, }, ImagePullPolicy: api.PullIfNotPresent, }, }, }, } // Assuming we only have two kublets, the third pod here won't schedule // if the scheduler doesn't correctly handle the delete for the second // pod. pod.ObjectMeta.Name = "phantom.foo" foo, err := client.Pods(api.NamespaceDefault).Create(pod) if err != nil { glog.Fatalf("Failed to create pod: %v, %v", pod, err) } if err := wait.Poll(time.Second, time.Second*30, podRunning(client, foo.Namespace, foo.Name)); err != nil { glog.Fatalf("FAILED: pod never started running %v", err) } pod.ObjectMeta.Name = "phantom.bar" bar, err := client.Pods(api.NamespaceDefault).Create(pod) if err != nil { glog.Fatalf("Failed to create pod: %v, %v", pod, err) } if err := wait.Poll(time.Second, time.Second*30, podRunning(client, bar.Namespace, bar.Name)); err != nil { glog.Fatalf("FAILED: pod never started running %v", err) } // Delete a pod to free up room. glog.Infof("Deleting pod %v", bar.Name) err = client.Pods(api.NamespaceDefault).Delete(bar.Name, api.NewDeleteOptions(0)) if err != nil { glog.Fatalf("FAILED: couldn't delete pod %q: %v", bar.Name, err) } pod.ObjectMeta.Name = "phantom.baz" baz, err := client.Pods(api.NamespaceDefault).Create(pod) if err != nil { glog.Fatalf("Failed to create pod: %v, %v", pod, err) } if err := wait.Poll(time.Second, time.Second*60, podRunning(client, baz.Namespace, baz.Name)); err != nil { if pod, perr := client.Pods(api.NamespaceDefault).Get("phantom.bar"); perr == nil { glog.Fatalf("FAILED: 'phantom.bar' was never deleted: %#v", pod) } else { glog.Fatalf("FAILED: (Scheduler probably didn't process deletion of 'phantom.bar') Pod never started running: %v", err) } } glog.Info("Scheduler doesn't make phantom pods: test passed.") }
// pollForReadyPods polls oldRc and newRc each interval and returns the old // and new ready counts for their pods. If a pod is observed as being ready, // it's considered ready even if it later becomes notReady. func (r *RollingUpdater) pollForReadyPods(interval, timeout time.Duration, oldRc, newRc *api.ReplicationController) (int, int, error) { controllers := []*api.ReplicationController{oldRc, newRc} oldReady := 0 newReady := 0 err := wait.Poll(interval, timeout, func() (done bool, err error) { anyReady := false for _, controller := range controllers { selector := labels.Set(controller.Spec.Selector).AsSelector() pods, err := r.c.Pods(controller.Namespace).List(selector, fields.Everything()) if err != nil { return false, err } for _, pod := range pods.Items { if api.IsPodReady(&pod) { switch controller.Name { case oldRc.Name: oldReady++ case newRc.Name: newReady++ } anyReady = true } } } if anyReady { return true, nil } return false, nil }) return oldReady, newReady, err }
// migTemlate (GCE/GKE-only) returns the name of the MIG template that the // nodes of the cluster use. func migTemplate() (string, error) { var errLast error var templ string key := "instanceTemplate" if wait.Poll(poll, singleCallTimeout, func() (bool, error) { // TODO(mikedanese): make this hit the compute API directly instead of // shelling out to gcloud. // An `instance-groups managed describe` call outputs what we want to stdout. output, _, err := retryCmd("gcloud", "compute", "instance-groups", "managed", fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID), "describe", fmt.Sprintf("--zone=%s", testContext.CloudConfig.Zone), testContext.CloudConfig.NodeInstanceGroup) if err != nil { errLast = fmt.Errorf("gcloud compute instance-groups managed describe call failed with err: %v", err) return false, nil } // The 'describe' call probably succeeded; parse the output and try to // find the line that looks like "instanceTemplate: url/to/<templ>" and // return <templ>. if val := parseKVLines(output, key); len(val) > 0 { url := strings.Split(val, "/") templ = url[len(url)-1] Logf("MIG group %s using template: %s", testContext.CloudConfig.NodeInstanceGroup, templ) return true, nil } errLast = fmt.Errorf("couldn't find %s in output to get MIG template. Output: %s", key, output) return false, nil }) != nil { return "", fmt.Errorf("migTemplate() failed with last error: %v", errLast) } return templ, nil }
// migRollingUpdatePoll (CKE/GKE-only) polls the progress of the MIG rolling // update with ID id until it is complete. It returns an error if this takes // longer than nt times the number of nodes. func migRollingUpdatePoll(id string, nt time.Duration) error { // Two keys and a val. status, progress, done := "status", "statusMessage", "ROLLED_OUT" start, timeout := time.Now(), nt*time.Duration(testContext.CloudConfig.NumNodes) var errLast error Logf("Waiting up to %v for MIG rolling update to complete.", timeout) if wait.Poll(restartPoll, timeout, func() (bool, error) { // A `rolling-updates describe` call outputs what we want to stdout. output, _, err := retryCmd("gcloud", append(migUdpateCmdBase(), "rolling-updates", fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID), fmt.Sprintf("--zone=%s", testContext.CloudConfig.Zone), "describe", id)...) if err != nil { errLast = fmt.Errorf("Error calling rolling-updates describe %s: %v", id, err) Logf("%v", errLast) return false, nil } // The 'describe' call probably succeeded; parse the output and try to // find the line that looks like "status: <status>" and see whether it's // done. Logf("Waiting for MIG rolling update: %s (%v elapsed)", parseKVLines(output, progress), time.Since(start)) if st := parseKVLines(output, status); st == done { return true, nil } return false, nil }) != nil { return fmt.Errorf("timeout waiting %v for MIG rolling update to complete. Last error: %v", timeout, errLast) } Logf("MIG rolling update complete after %v", time.Since(start)) return nil }
func assertFilesExist(fileNames []string, fileDir string, pod *api.Pod, client *client.Client) { var failed []string expectNoError(wait.Poll(time.Second*2, time.Second*60, func() (bool, error) { failed = []string{} for _, fileName := range fileNames { if _, err := client.Get(). Prefix("proxy"). Resource("pods"). Namespace(pod.Namespace). Name(pod.Name). Suffix(fileDir, fileName). Do().Raw(); err != nil { Logf("Unable to read %s from pod %s: %v", fileName, pod.Name, err) failed = append(failed, fileName) } } if len(failed) == 0 { return true, nil } Logf("Lookups using %s failed for: %v\n", pod.Name, failed) return false, nil })) Expect(len(failed)).To(Equal(0)) }
// Wait till the passFunc confirms that the object it expects to see is in the store. // Used to observe reflected events. func waitForReflection(s cache.Store, key string, passFunc func(n interface{}) bool) error { return wait.Poll(time.Millisecond*10, time.Second*20, func() (bool, error) { if n, _, err := s.GetByKey(key); err == nil && passFunc(n) { return true, nil } return false, nil }) }
// Wait for job to reach completions. func waitForJobFinish(c *client.Client, ns, jobName string, completions int) error { return wait.Poll(poll, jobTimeout, func() (bool, error) { curr, err := c.Extensions().Jobs(ns).Get(jobName) if err != nil { return false, err } return curr.Status.Succeeded == completions, nil }) }
func waitForUpToDateCache(cacher *storage.Cacher, resourceVersion uint64) error { ready := func() (bool, error) { result, err := cacher.LastSyncResourceVersion() if err != nil { return false, err } return result == resourceVersion, nil } return wait.Poll(10*time.Millisecond, util.ForeverTestTimeout, ready) }
// checkNodesReady waits up to nt for expect nodes accessed by c to be ready, // returning an error if this doesn't happen in time. It returns the names of // nodes it finds. func checkNodesReady(c *client.Client, nt time.Duration, expect int) ([]string, error) { // First, keep getting all of the nodes until we get the number we expect. var nodeList *api.NodeList var errLast error start := time.Now() found := wait.Poll(poll, nt, func() (bool, error) { // Even though listNodes(...) has its own retries, a rolling-update // (GCE/GKE implementation of restart) can complete before the apiserver // knows about all of the nodes. Thus, we retry the list nodes call // until we get the expected number of nodes. nodeList, errLast = listNodes(c, labels.Everything(), fields.Everything()) if errLast != nil { return false, nil } if len(nodeList.Items) != expect { errLast = fmt.Errorf("expected to find %d nodes but found only %d (%v elapsed)", expect, len(nodeList.Items), time.Since(start)) Logf("%v", errLast) return false, nil } return true, nil }) == nil nodeNames := make([]string, len(nodeList.Items)) for i, n := range nodeList.Items { nodeNames[i] = n.ObjectMeta.Name } if !found { return nodeNames, fmt.Errorf("couldn't find %d nodes within %v; last error: %v", expect, nt, errLast) } Logf("Successfully found %d nodes", expect) // Next, ensure in parallel that all the nodes are ready. We subtract the // time we spent waiting above. timeout := nt - time.Since(start) result := make(chan bool, len(nodeList.Items)) for _, n := range nodeNames { n := n go func() { result <- waitForNodeToBeReady(c, n, timeout) }() } failed := false // TODO(mbforbes): Change to `for range` syntax once we support only Go // >= 1.4. for i := range nodeList.Items { _ = i if !<-result { failed = true } } if failed { return nodeNames, fmt.Errorf("at least one node failed to be ready") } return nodeNames, nil }
func (s *ingManager) test(path string) error { url := fmt.Sprintf("%v/hostName", path) httpClient := &http.Client{} return wait.Poll(pollInterval, serviceRespondingTimeout, func() (bool, error) { body, err := simpleGET(httpClient, url, "") if err != nil { Logf("%v\n%v\n%v", url, body, err) return false, nil } return true, nil }) }
func getReferencedServiceAccountToken(c *client.Client, ns string, name string, shouldWait bool) (string, string, error) { tokenName := "" token := "" findToken := func() (bool, error) { user, err := c.ServiceAccounts(ns).Get(name) if errors.IsNotFound(err) { return false, nil } if err != nil { return false, err } for _, ref := range user.Secrets { secret, err := c.Secrets(ns).Get(ref.Name) if errors.IsNotFound(err) { continue } if err != nil { return false, err } if secret.Type != api.SecretTypeServiceAccountToken { continue } name := secret.Annotations[api.ServiceAccountNameKey] uid := secret.Annotations[api.ServiceAccountUIDKey] tokenData := secret.Data[api.ServiceAccountTokenKey] if name == user.Name && uid == string(user.UID) && len(tokenData) > 0 { tokenName = secret.Name token = string(tokenData) return true, nil } } return false, nil } if shouldWait { err := wait.Poll(time.Second, 10*time.Second, findToken) if err != nil { return "", "", err } } else { ok, err := findToken() if err != nil { return "", "", err } if !ok { return "", "", fmt.Errorf("No token found for %s/%s", ns, name) } } return tokenName, token, nil }
// retryCmd runs cmd using args and retries it for up to singleCallTimeout if // it returns an error. It returns stdout and stderr. func retryCmd(command string, args ...string) (string, string, error) { var err error stdout, stderr := "", "" wait.Poll(poll, singleCallTimeout, func() (bool, error) { stdout, stderr, err = runCmd(command, args...) if err != nil { Logf("Got %v", err) return false, nil } return true, nil }) return stdout, stderr, err }
// Scale updates a Job to a new size, with optional precondition check (if preconditions is not nil), // optional retries (if retry is not nil), and then optionally waits for parallelism to reach desired // number, which can be less than requested based on job's current progress. func (scaler *JobScaler) Scale(namespace, name string, newSize uint, preconditions *ScalePrecondition, retry, waitForReplicas *RetryParams) error { if preconditions == nil { preconditions = &ScalePrecondition{-1, ""} } if retry == nil { // Make it try only once, immediately retry = &RetryParams{Interval: time.Millisecond, Timeout: time.Millisecond} } cond := ScaleCondition(scaler, preconditions, namespace, name, newSize) if err := wait.Poll(retry.Interval, retry.Timeout, cond); err != nil { return err } if waitForReplicas != nil { job, err := scaler.c.Extensions().Jobs(namespace).Get(name) if err != nil { return err } return wait.Poll(waitForReplicas.Interval, waitForReplicas.Timeout, client.JobHasDesiredParallelism(scaler.c, job)) } return nil }
func (h *haproxyControllerTester) start(namespace string) (err error) { // Create a replication controller with the given configuration. rc := rcFromManifest(h.cfg) rc.Namespace = namespace rc.Spec.Template.Labels["rcName"] = rc.Name // Add the --namespace arg. // TODO: Remove this when we have proper namespace support. for i, c := range rc.Spec.Template.Spec.Containers { rc.Spec.Template.Spec.Containers[i].Args = append( c.Args, fmt.Sprintf("--namespace=%v", namespace)) Logf("Container args %+v", rc.Spec.Template.Spec.Containers[i].Args) } rc, err = h.client.ReplicationControllers(rc.Namespace).Create(rc) if err != nil { return } if err = waitForRCPodsRunning(h.client, namespace, h.rcName); err != nil { return } h.rcName = rc.Name h.rcNamespace = rc.Namespace // Find the pods of the rc we just created. labelSelector := labels.SelectorFromSet( labels.Set(map[string]string{"rcName": h.rcName})) pods, err := h.client.Pods(h.rcNamespace).List( labelSelector, fields.Everything()) if err != nil { return err } // Find the external addresses of the nodes the pods are running on. for _, p := range pods.Items { wait.Poll(pollInterval, serviceRespondingTimeout, func() (bool, error) { address, err := getHostExternalAddress(h.client, &p) if err != nil { Logf("%v", err) return false, nil } h.address = append(h.address, address) return true, nil }) } if len(h.address) == 0 { return fmt.Errorf("No external ips found for loadbalancer %v", h.getName()) } return nil }
// migRollingUpdateStart (GCE/GKE-only) starts a MIG rolling update using templ // as the new template, waiting up to nt per node, and returns the ID of that // update. func migRollingUpdateStart(templ string, nt time.Duration) (string, error) { var errLast error var id string prefix, suffix := "Started [", "]." if err := wait.Poll(poll, singleCallTimeout, func() (bool, error) { // TODO(mikedanese): make this hit the compute API directly instead of // shelling out to gcloud. // NOTE(mikedanese): If you are changing this gcloud command, update // cluster/gce/upgrade.sh to match this EXACTLY. // A `rolling-updates start` call outputs what we want to stderr. _, output, err := retryCmd("gcloud", append(migUdpateCmdBase(), "rolling-updates", fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID), fmt.Sprintf("--zone=%s", testContext.CloudConfig.Zone), "start", // Required args. fmt.Sprintf("--group=%s", testContext.CloudConfig.NodeInstanceGroup), fmt.Sprintf("--template=%s", templ), // Optional args to fine-tune behavior. fmt.Sprintf("--instance-startup-timeout=%ds", int(nt.Seconds())), // NOTE: We can speed up this process by increasing // --max-num-concurrent-instances. fmt.Sprintf("--max-num-concurrent-instances=%d", 1), fmt.Sprintf("--max-num-failed-instances=%d", 0), fmt.Sprintf("--min-instance-update-time=%ds", 0))...) if err != nil { errLast = fmt.Errorf("rolling-updates call failed with err: %v", err) return false, nil } // The 'start' call probably succeeded; parse the output and try to find // the line that looks like "Started [url/to/<id>]." and return <id>. for _, line := range strings.Split(output, "\n") { // As a sanity check, ensure the line starts with prefix and ends // with suffix. if strings.Index(line, prefix) != 0 || strings.Index(line, suffix) != len(line)-len(suffix) { continue } url := strings.Split(strings.TrimSuffix(strings.TrimPrefix(line, prefix), suffix), "/") id = url[len(url)-1] Logf("Started MIG rolling update; ID: %s", id) return true, nil } errLast = fmt.Errorf("couldn't find line like '%s ... %s' in output to MIG rolling-update start. Output: %s", prefix, suffix, output) return false, nil }); err != nil { return "", fmt.Errorf("migRollingUpdateStart() failed with last error: %v", errLast) } return id, nil }
func runReplicationControllerTest(c *client.Client) { clientAPIVersion := c.APIVersion() data, err := ioutil.ReadFile("cmd/integration/" + clientAPIVersion + "-controller.json") if err != nil { glog.Fatalf("Unexpected error: %v", err) } var controller api.ReplicationController if err := api.Scheme.DecodeInto(data, &controller); err != nil { glog.Fatalf("Unexpected error: %v", err) } glog.Infof("Creating replication controllers") updated, err := c.ReplicationControllers("test").Create(&controller) if err != nil { glog.Fatalf("Unexpected error: %v", err) } glog.Infof("Done creating replication controllers") // In practice the controller doesn't need 60s to create a handful of pods, but network latencies on CI // systems have been observed to vary unpredictably, so give the controller enough time to create pods. // Our e2e scalability tests will catch controllers that are *actually* slow. if err := wait.Poll(time.Second, time.Second*60, client.ControllerHasDesiredReplicas(c, updated)); err != nil { glog.Fatalf("FAILED: pods never created %v", err) } // Poll till we can retrieve the status of all pods matching the given label selector from their minions. // This involves 3 operations: // - The scheduler must assign all pods to a minion // - The assignment must reflect in a `List` operation against the apiserver, for labels matching the selector // - We need to be able to query the kubelet on that minion for information about the pod if err := wait.Poll( time.Second, time.Second*30, podsOnMinions(c, "test", labels.Set(updated.Spec.Selector).AsSelector())); err != nil { glog.Fatalf("FAILED: pods never started running %v", err) } glog.Infof("Pods created") }
func (config *KubeProxyTestConfig) waitForLoadBalancerIngressSetup() { err := wait.Poll(2*time.Second, 120*time.Second, func() (bool, error) { service, err := config.getServiceClient().Get(loadBalancerServiceName) if err != nil { return false, err } else { if len(service.Status.LoadBalancer.Ingress) > 0 { return true, nil } else { return false, fmt.Errorf("Service LoadBalancer Ingress was not setup.") } } }) Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("Failed to setup Load Balancer Service. err:%v", err)) config.loadBalancerService, _ = config.getServiceClient().Get(loadBalancerServiceName) }
// Wait for all pods to become Running. Only use when pods will run for a long time, or it will be racy. func waitForAllPodsRunning(c *client.Client, ns, jobName string, parallelism int) error { label := labels.SelectorFromSet(labels.Set(map[string]string{jobSelectorKey: jobName})) return wait.Poll(poll, jobTimeout, func() (bool, error) { pods, err := c.Pods(ns).List(label, fields.Everything()) if err != nil { return false, err } count := 0 for _, p := range pods.Items { if p.Status.Phase == api.PodRunning { count++ } } return count == parallelism, nil }) }
func waitForOp(op *compute.Operation, getOperation func(operationName string) (*compute.Operation, error)) error { if op == nil { return fmt.Errorf("operation must not be nil") } if opIsDone(op) { return getErrorFromOp(op) } opName := op.Name return wait.Poll(operationPollInterval, operationPollTimeoutDuration, func() (bool, error) { pollOp, err := getOperation(opName) if err != nil { glog.Warningf("GCE poll operation failed: %v", err) } return opIsDone(pollOp), getErrorFromOp(pollOp) }) }
func extinguish(c *client.Client, totalNS int, maxAllowedAfterDel int, maxSeconds int) { var err error By("Creating testing namespaces") wg := &sync.WaitGroup{} for n := 0; n < totalNS; n += 1 { wg.Add(1) go func(n int) { defer wg.Done() defer GinkgoRecover() _, err = createTestingNS(fmt.Sprintf("nslifetest-%v", n), c) Expect(err).NotTo(HaveOccurred()) }(n) } wg.Wait() //Wait 10 seconds, then SEND delete requests for all the namespaces. By("Waiting 10 seconds") time.Sleep(time.Duration(10 * time.Second)) deleted, err := deleteNamespaces(c, []string{"nslifetest"}, nil /* skipFilter */) Expect(err).NotTo(HaveOccurred()) Expect(len(deleted)).To(Equal(totalNS)) By("Waiting for namespaces to vanish") //Now POLL until all namespaces have been eradicated. expectNoError(wait.Poll(2*time.Second, time.Duration(maxSeconds)*time.Second, func() (bool, error) { var cnt = 0 nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything()) if err != nil { return false, err } for _, item := range nsList.Items { if strings.Contains(item.Name, "nslifetest") { cnt++ } } if cnt > maxAllowedAfterDel { Logf("Remaining namespaces : %v", cnt) return false, nil } return true, nil })) }
func (gce *GCECloud) AddSSHKeyToAllInstances(user string, keyData []byte) error { return wait.Poll(2*time.Second, 30*time.Second, func() (bool, error) { project, err := gce.service.Projects.Get(gce.projectID).Do() if err != nil { glog.Errorf("Could not get project: %v", err) return false, nil } keyString := fmt.Sprintf("%s:%s %s@%s", user, strings.TrimSpace(string(keyData)), user, user) found := false for _, item := range project.CommonInstanceMetadata.Items { if item.Key == "sshKeys" { if strings.Contains(item.Value, keyString) { // We've already added the key glog.Info("SSHKey already in project metadata") return true, nil } item.Value = item.Value + "\n" + keyString found = true break } } if !found { // This is super unlikely, so log. glog.Infof("Failed to find sshKeys metadata, creating a new item") project.CommonInstanceMetadata.Items = append(project.CommonInstanceMetadata.Items, &compute.MetadataItems{ Key: "sshKeys", Value: keyString, }) } op, err := gce.service.Projects.SetCommonInstanceMetadata(gce.projectID, project.CommonInstanceMetadata).Do() if err != nil { glog.Errorf("Could not Set Metadata: %v", err) return false, nil } if err := gce.waitForGlobalOp(op); err != nil { glog.Errorf("Could not Set Metadata: %v", err) return false, nil } glog.Infof("Successfully added sshKey to project metadata") return true, nil }) }
func getServiceAccount(c *client.Client, ns string, name string, shouldWait bool) (*api.ServiceAccount, error) { if !shouldWait { return c.ServiceAccounts(ns).Get(name) } var user *api.ServiceAccount var err error err = wait.Poll(time.Second, 10*time.Second, func() (bool, error) { user, err = c.ServiceAccounts(ns).Get(name) if errors.IsNotFound(err) { return false, nil } if err != nil { return false, err } return true, nil }) return user, err }
// getNodePort waits for the Service, and returns it's first node port. func getNodePort(client *client.Client, ns, name string) (nodePort int64, err error) { var svc *api.Service glog.Infof("Waiting for %v/%v", ns, name) wait.Poll(1*time.Second, 5*time.Minute, func() (bool, error) { svc, err = client.Services(ns).Get(name) if err != nil { return false, nil } for _, p := range svc.Spec.Ports { if p.NodePort != 0 { nodePort = int64(p.NodePort) glog.Infof("Node port %v", nodePort) break } } return true, nil }) return }
// cleanupWithClients performs cleanup tasks after the rolling update. Update // process related annotations are removed from oldRc and newRc. The // CleanupPolicy on config is executed. func (r *RollingUpdater) cleanupWithClients(oldRc, newRc *api.ReplicationController, config *RollingUpdaterConfig) error { // Clean up annotations var err error newRc, err = r.c.ReplicationControllers(r.ns).Get(newRc.Name) if err != nil { return err } delete(newRc.Annotations, sourceIdAnnotation) delete(newRc.Annotations, desiredReplicasAnnotation) newRc, err = r.c.ReplicationControllers(r.ns).Update(newRc) if err != nil { return err } if err = wait.Poll(config.Interval, config.Timeout, client.ControllerHasDesiredReplicas(r.c, newRc)); err != nil { return err } newRc, err = r.c.ReplicationControllers(r.ns).Get(newRc.Name) if err != nil { return err } switch config.CleanupPolicy { case DeleteRollingUpdateCleanupPolicy: // delete old rc fmt.Fprintf(config.Out, "Update succeeded. Deleting %s\n", oldRc.Name) return r.c.ReplicationControllers(r.ns).Delete(oldRc.Name) case RenameRollingUpdateCleanupPolicy: // delete old rc fmt.Fprintf(config.Out, "Update succeeded. Deleting old controller: %s\n", oldRc.Name) if err := r.c.ReplicationControllers(r.ns).Delete(oldRc.Name); err != nil { return err } fmt.Fprintf(config.Out, "Renaming %s to %s\n", newRc.Name, oldRc.Name) return Rename(r.c, newRc, oldRc.Name) case PreserveRollingUpdateCleanupPolicy: return nil default: return nil } }
func testMasterUpgrade(ip, v string, mUp func(v string) error) { Logf("Starting async validation") httpClient := http.Client{Timeout: 2 * time.Second} done := make(chan struct{}, 1) // Let's make sure we've finished the heartbeat before shutting things down. var wg sync.WaitGroup go util.Until(func() { defer GinkgoRecover() wg.Add(1) defer wg.Done() if err := wait.Poll(poll, singleCallTimeout, func() (bool, error) { r, err := httpClient.Get("http://" + ip) if err != nil { Logf("Error reaching %s: %v", ip, err) return false, nil } if r.StatusCode < http.StatusOK || r.StatusCode >= http.StatusNotFound { Logf("Bad response; status: %d, response: %v", r.StatusCode, r) return false, nil } return true, nil }); err != nil { // We log the error here because the test will fail at the very end // because this validation runs in another goroutine. Without this, // a failure is very confusing to track down because from the logs // everything looks fine. msg := fmt.Sprintf("Failed to contact service during master upgrade: %v", err) Logf(msg) Failf(msg) } }, 200*time.Millisecond, done) Logf("Starting master upgrade") expectNoError(mUp(v)) done <- struct{}{} Logf("Stopping async validation") wg.Wait() Logf("Master upgrade complete") }
func setDaemonSetNodeLabels(c *client.Client, nodeName string, labels map[string]string) (*api.Node, error) { nodeClient := c.Nodes() var newNode *api.Node var newLabels map[string]string err := wait.Poll(updateRetryPeriod, updateRetryTimeout, func() (bool, error) { node, err := nodeClient.Get(nodeName) if err != nil { return false, err } // remove all labels this test is creating daemonSetLabels, otherLabels := separateDaemonSetNodeLabels(node.Labels) if reflect.DeepEqual(daemonSetLabels, labels) { newNode = node return true, nil } node.Labels = otherLabels for k, v := range labels { node.Labels[k] = v } newNode, err = nodeClient.Update(node) if err == nil { newLabels, _ = separateDaemonSetNodeLabels(newNode.Labels) return true, err } if se, ok := err.(*apierrs.StatusError); ok && se.ErrStatus.Reason == unversioned.StatusReasonConflict { Logf("failed to update node due to resource version conflict") return false, nil } return false, err }) if err != nil { return nil, err } else if len(newLabels) != len(labels) { return nil, fmt.Errorf("Could not set daemon set test labels as expected.") } return newNode, nil }
// waitUp polls healthz of the daemon till it returns "ok" or the polling hits the pollTimeout func (r *restartDaemonConfig) waitUp() { Logf("Checking if %v is up by polling for a 200 on its /healthz endpoint", r) healthzCheck := fmt.Sprintf( "curl -s -o /dev/null -I -w \"%%{http_code}\" http://localhost:%v/healthz", r.healthzPort) err := wait.Poll(r.pollInterval, r.pollTimeout, func() (bool, error) { stdout, stderr, code, err := nodeExec(r.nodeName, healthzCheck) expectNoError(err) if code == 0 { httpCode, err := strconv.Atoi(stdout) if err != nil { Logf("Unable to parse healthz http return code: %v", err) } else if httpCode == 200 { return true, nil } } Logf("node %v exec command, '%v' failed with exitcode %v: \n\tstdout: %v\n\tstderr: %v", r.nodeName, healthzCheck, code, stdout, stderr) return false, nil }) expectNoError(err, "%v did not respond with a 200 via %v within %v", r, healthzCheck, r.pollTimeout) }
// waitTillNPodsRunningOnNodes polls the /runningpods endpoint on kubelet until // it finds targetNumPods pods that match the given criteria (namespace and // podNamePrefix). Note that we usually use label selector to filter pods that // belong to the same RC. However, we use podNamePrefix with namespace here // because pods returned from /runningpods do not contain the original label // information; they are reconstructed by examining the container runtime. In // the scope of this test, we do not expect pod naming conflicts so // podNamePrefix should be sufficient to identify the pods. func waitTillNPodsRunningOnNodes(c *client.Client, nodeNames sets.String, podNamePrefix string, namespace string, targetNumPods int, timeout time.Duration) error { return wait.Poll(pollInterval, timeout, func() (bool, error) { matchCh := make(chan sets.String, len(nodeNames)) for _, item := range nodeNames.List() { // Launch a goroutine per node to check the pods running on the nodes. nodeName := item go func() { matchCh <- getPodMatches(c, nodeName, podNamePrefix, namespace) }() } seen := sets.NewString() for i := 0; i < len(nodeNames.List()); i++ { seen = seen.Union(<-matchCh) } if seen.Len() == targetNumPods { return true, nil } Logf("Waiting for %d pods to be running on the node; %d are currently running;", targetNumPods, seen.Len()) return false, nil }) }
// updateWithRetries updates applies the given rc as an update. func updateWithRetries(rcClient client.ReplicationControllerInterface, rc *api.ReplicationController, applyUpdate updateFunc) (*api.ReplicationController, error) { var err error oldRc := rc err = wait.Poll(10*time.Millisecond, 1*time.Minute, func() (bool, error) { // Apply the update, then attempt to push it to the apiserver. applyUpdate(rc) if rc, err = rcClient.Update(rc); err == nil { // rc contains the latest controller post update return true, nil } // Update the controller with the latest resource version, if the update failed we // can't trust rc so use oldRc.Name. if rc, err = rcClient.Get(oldRc.Name); err != nil { // The Get failed: Value in rc cannot be trusted. rc = oldRc } // The Get passed: rc contains the latest controller, expect a poll for the update. return false, nil }) // If the error is non-nil the returned controller cannot be trusted, if it is nil, the returned // controller contains the applied update. return rc, err }