func runPortForward(ns, podName string, port int) *portForwardCommand { cmd := framework.KubectlCmd("port-forward", fmt.Sprintf("--namespace=%v", ns), podName, fmt.Sprintf(":%d", port)) // This is somewhat ugly but is the only way to retrieve the port that was picked // by the port-forward command. We don't want to hard code the port as we have no // way of guaranteeing we can pick one that isn't in use, particularly on Jenkins. framework.Logf("starting port-forward command and streaming output") _, stderr, err := framework.StartCmdAndStreamOutput(cmd) if err != nil { framework.Failf("Failed to start port-forward command: %v", err) } buf := make([]byte, 128) var n int framework.Logf("reading from `kubectl port-forward` command's stderr") if n, err = stderr.Read(buf); err != nil { framework.Failf("Failed to read from kubectl port-forward stderr: %v", err) } portForwardOutput := string(buf[:n]) match := portForwardRegexp.FindStringSubmatch(portForwardOutput) if len(match) != 2 { framework.Failf("Failed to parse kubectl port-forward output: %s", portForwardOutput) } listenPort, err := strconv.Atoi(match[1]) if err != nil { framework.Failf("Error converting %s to an int: %v", match[1], err) } return &portForwardCommand{ cmd: cmd, port: listenPort, } }
// Since GCL API is not easily available from the outside of cluster // we use gcloud command to perform search with filter func readFilteredEntriesFromGcl(filter string) ([]string, error) { framework.Logf("Reading entries from GCL with filter '%v'", filter) argList := []string{"beta", "logging", "read", filter, "--format", "json", "--project", framework.TestContext.CloudConfig.ProjectID, } output, err := exec.Command("gcloud", argList...).CombinedOutput() if err != nil { return nil, err } var entries []*LogEntry if err = json.Unmarshal(output, &entries); err != nil { return nil, err } framework.Logf("Read %d entries from GCL", len(entries)) var result []string for _, entry := range entries { if entry.TextPayload != "" { result = append(result, entry.TextPayload) } } return result, nil }
func waitForPodsOrDie(cs *kubernetes.Clientset, ns string, n int) { By("Waiting for all pods to be running") err := wait.PollImmediate(framework.Poll, schedulingTimeout, func() (bool, error) { pods, err := cs.Core().Pods(ns).List(v1.ListOptions{LabelSelector: "foo=bar"}) if err != nil { return false, err } if pods == nil { return false, fmt.Errorf("pods is nil") } if len(pods.Items) < n { framework.Logf("pods: %v < %v", len(pods.Items), n) return false, nil } ready := 0 for i := 0; i < n; i++ { if pods.Items[i].Status.Phase == v1.PodRunning { ready++ } } if ready < n { framework.Logf("running pods: %v < %v", ready, n) return false, nil } return true, nil }) framework.ExpectNoError(err, "Waiting for pods in namespace %q to be ready", ns) }
// Waits for specified PD to to detach from specified hostName func waitForPDDetach(diskName string, nodeName types.NodeName) error { if framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" { framework.Logf("Waiting for GCE PD %q to detach from node %q.", diskName, nodeName) gceCloud, err := getGCECloud() if err != nil { return err } for start := time.Now(); time.Since(start) < gcePDDetachTimeout; time.Sleep(gcePDDetachPollTime) { diskAttached, err := gceCloud.DiskIsAttached(diskName, nodeName) if err != nil { framework.Logf("Error waiting for PD %q to detach from node %q. 'DiskIsAttached(...)' failed with %v", diskName, nodeName, err) return err } if !diskAttached { // Specified disk does not appear to be attached to specified node framework.Logf("GCE PD %q appears to have successfully detached from %q.", diskName, nodeName) return nil } framework.Logf("Waiting for GCE PD %q to detach from %q.", diskName, nodeName) } return fmt.Errorf("Gave up waiting for GCE PD %q to detach from %q after %v", diskName, nodeName, gcePDDetachTimeout) } return nil }
// can not be moved to util, as By and Expect must be put in Ginkgo test unit func registerClusters(clusters map[string]*cluster, userAgentName, federationName string, f *framework.Framework) string { contexts := f.GetUnderlyingFederatedContexts() for _, context := range contexts { createClusterObjectOrFail(f, &context) } By("Obtaining a list of all the clusters") clusterList := waitForAllClustersReady(f, len(contexts)) framework.Logf("Checking that %d clusters are Ready", len(contexts)) for _, context := range contexts { clusterIsReadyOrFail(f, &context) } framework.Logf("%d clusters are Ready", len(contexts)) primaryClusterName := clusterList.Items[0].Name By(fmt.Sprintf("Labeling %q as the first cluster", primaryClusterName)) for i, c := range clusterList.Items { framework.Logf("Creating a clientset for the cluster %s", c.Name) Expect(framework.TestContext.KubeConfig).ToNot(Equal(""), "KubeConfig must be specified to load clusters' client config") clusters[c.Name] = &cluster{c.Name, createClientsetForCluster(c, i, userAgentName), false, nil} } createNamespaceInClusters(clusters, f) return primaryClusterName }
// Blocks outgoing network traffic on 'node'. Then verifies that 'podNameToDisappear', // that belongs to replication controller 'rcName', really disappeared. // Finally, it checks that the replication controller recreates the // pods on another node and that now the number of replicas is equal 'replicas'. // At the end (even in case of errors), the network traffic is brought back to normal. // This function executes commands on a node so it will work only for some // environments. func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replicas int32, podNameToDisappear string, node *api.Node) { host := getNodeExternalIP(node) master := getMaster(c) By(fmt.Sprintf("block network traffic from node %s to the master", node.Name)) defer func() { // This code will execute even if setting the iptables rule failed. // It is on purpose because we may have an error even if the new rule // had been inserted. (yes, we could look at the error code and ssh error // separately, but I prefer to stay on the safe side). By(fmt.Sprintf("Unblock network traffic from node %s to the master", node.Name)) framework.UnblockNetwork(host, master) }() framework.Logf("Waiting %v to ensure node %s is ready before beginning test...", resizeNodeReadyTimeout, node.Name) if !framework.WaitForNodeToBe(c, node.Name, api.NodeReady, true, resizeNodeReadyTimeout) { framework.Failf("Node %s did not become ready within %v", node.Name, resizeNodeReadyTimeout) } framework.BlockNetwork(host, master) framework.Logf("Waiting %v for node %s to be not ready after simulated network failure", resizeNodeNotReadyTimeout, node.Name) if !framework.WaitForNodeToBe(c, node.Name, api.NodeReady, false, resizeNodeNotReadyTimeout) { framework.Failf("Node %s did not become not-ready within %v", node.Name, resizeNodeNotReadyTimeout) } framework.Logf("Waiting for pod %s to be removed", podNameToDisappear) err := framework.WaitForRCPodToDisappear(c, ns, rcName, podNameToDisappear) Expect(err).NotTo(HaveOccurred()) By("verifying whether the pod from the unreachable node is recreated") err = framework.VerifyPods(c, ns, rcName, true, replicas) Expect(err).NotTo(HaveOccurred()) // network traffic is unblocked in a deferred function }
// logAndVerifyResource prints the resource usage as perf data and verifies whether resource usage satisfies the limit. func logAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLimits framework.ContainersCPUSummary, memLimits framework.ResourceUsagePerContainer, testInfo map[string]string, isVerify bool) { nodeName := framework.TestContext.NodeName // Obtain memory PerfData usagePerContainer, err := rc.GetLatest() Expect(err).NotTo(HaveOccurred()) framework.Logf("%s", formatResourceUsageStats(usagePerContainer)) usagePerNode := make(framework.ResourceUsagePerNode) usagePerNode[nodeName] = usagePerContainer // Obtain CPU PerfData cpuSummary := rc.GetCPUSummary() framework.Logf("%s", formatCPUSummary(cpuSummary)) cpuSummaryPerNode := make(framework.NodesCPUSummary) cpuSummaryPerNode[nodeName] = cpuSummary // Print resource usage framework.PrintPerfData(framework.ResourceUsageToPerfDataWithLabels(usagePerNode, testInfo)) framework.PrintPerfData(framework.CPUUsageToPerfDataWithLabels(cpuSummaryPerNode, testInfo)) // Verify resource usage if isVerify { verifyMemoryLimits(f.Client, memLimits, usagePerNode) verifyCPULimits(cpuLimits, cpuSummaryPerNode) } }
// increaseKubeletAPIQPSLimit sets Kubelet API QPS via ConfigMap. Kubelet will restart with the new QPS. func setKubeletAPIQPSLimit(f *framework.Framework, newAPIQPS int32) { const restartGap = 40 * time.Second resp := pollConfigz(2*time.Minute, 5*time.Second) kubeCfg, err := decodeConfigz(resp) framework.ExpectNoError(err) framework.Logf("Old QPS limit is: %d\n", kubeCfg.KubeAPIQPS) // Set new API QPS limit kubeCfg.KubeAPIQPS = newAPIQPS // TODO(coufon): createConfigMap should firstly check whether configmap already exists, if so, use updateConfigMap. // Calling createConfigMap twice will result in error. It is fine for benchmark test because we only run one test on a new node. _, err = createConfigMap(f, kubeCfg) framework.ExpectNoError(err) // Wait for Kubelet to restart time.Sleep(restartGap) // Check new QPS has been set resp = pollConfigz(2*time.Minute, 5*time.Second) kubeCfg, err = decodeConfigz(resp) framework.ExpectNoError(err) framework.Logf("New QPS limit is: %d\n", kubeCfg.KubeAPIQPS) // TODO(coufon): check test result to see if we need to retry here if kubeCfg.KubeAPIQPS != newAPIQPS { framework.Failf("Fail to set new kubelet API QPS limit.") } }
// Blocks outgoing network traffic on 'node'. Then runs testFunc and returns its status. // At the end (even in case of errors), the network traffic is brought back to normal. // This function executes commands on a node so it will work only for some // environments. func testUnderTemporaryNetworkFailure(c clientset.Interface, ns string, node *api.Node, testFunc func()) { host := framework.GetNodeExternalIP(node) master := framework.GetMasterAddress(c) By(fmt.Sprintf("block network traffic from node %s to the master", node.Name)) defer func() { // This code will execute even if setting the iptables rule failed. // It is on purpose because we may have an error even if the new rule // had been inserted. (yes, we could look at the error code and ssh error // separately, but I prefer to stay on the safe side). By(fmt.Sprintf("Unblock network traffic from node %s to the master", node.Name)) framework.UnblockNetwork(host, master) }() framework.Logf("Waiting %v to ensure node %s is ready before beginning test...", resizeNodeReadyTimeout, node.Name) if !framework.WaitForNodeToBe(c, node.Name, api.NodeReady, true, resizeNodeReadyTimeout) { framework.Failf("Node %s did not become ready within %v", node.Name, resizeNodeReadyTimeout) } framework.BlockNetwork(host, master) framework.Logf("Waiting %v for node %s to be not ready after simulated network failure", resizeNodeNotReadyTimeout, node.Name) if !framework.WaitForNodeToBe(c, node.Name, api.NodeReady, false, resizeNodeNotReadyTimeout) { framework.Failf("Node %s did not become not-ready within %v", node.Name, resizeNodeNotReadyTimeout) } testFunc() // network traffic is unblocked in a deferred function }
func singleServiceLatency(f *framework.Framework, name string, q *endpointQueries) (time.Duration, error) { // Make a service that points to that pod. svc := &api.Service{ ObjectMeta: api.ObjectMeta{ GenerateName: "latency-svc-", }, Spec: api.ServiceSpec{ Ports: []api.ServicePort{{Protocol: api.ProtocolTCP, Port: 80}}, Selector: map[string]string{"name": name}, Type: api.ServiceTypeClusterIP, SessionAffinity: api.ServiceAffinityNone, }, } startTime := time.Now() gotSvc, err := f.Client.Services(f.Namespace.Name).Create(svc) if err != nil { return 0, err } framework.Logf("Created: %v", gotSvc.Name) defer f.Client.Services(gotSvc.Namespace).Delete(gotSvc.Name) if e := q.request(gotSvc.Name); e == nil { return 0, fmt.Errorf("Never got a result for endpoint %v", gotSvc.Name) } stopTime := time.Now() d := stopTime.Sub(startTime) framework.Logf("Got endpoints: %v [%v]", gotSvc.Name, d) return d, nil }
// testHostIP tests that a pod gets a host IP func testHostIP(c *client.Client, ns string, pod *api.Pod) { podClient := c.Pods(ns) By("creating pod") defer podClient.Delete(pod.Name, api.NewDeleteOptions(0)) if _, err := podClient.Create(pod); err != nil { framework.Failf("Failed to create pod: %v", err) } By("ensuring that pod is running and has a hostIP") // Wait for the pods to enter the running state. Waiting loops until the pods // are running so non-running pods cause a timeout for this test. err := framework.WaitForPodRunningInNamespace(c, pod.Name, ns) Expect(err).NotTo(HaveOccurred()) // Try to make sure we get a hostIP for each pod. hostIPTimeout := 2 * time.Minute t := time.Now() for { p, err := podClient.Get(pod.Name) Expect(err).NotTo(HaveOccurred()) if p.Status.HostIP != "" { framework.Logf("Pod %s has hostIP: %s", p.Name, p.Status.HostIP) break } if time.Since(t) >= hostIPTimeout { framework.Failf("Gave up waiting for hostIP of pod %s after %v seconds", p.Name, time.Since(t).Seconds()) } framework.Logf("Retrying to get the hostIP of pod %s", p.Name) time.Sleep(5 * time.Second) } }
// Delete the PVC and wait for the PV to become Available again. Validate that the PV // has recycled (assumption here about reclaimPolicy). Caller tells this func which // phase value to expect for the pv bound to the to-be-deleted claim. func deletePVCandValidatePV(c clientset.Interface, ns string, pvc *v1.PersistentVolumeClaim, pv *v1.PersistentVolume, expctPVPhase v1.PersistentVolumePhase) { pvname := pvc.Spec.VolumeName framework.Logf("Deleting PVC %v to trigger recycling of PV %v", pvc.Name, pvname) deletePersistentVolumeClaim(c, pvc.Name, ns) // Check that the PVC is really deleted. pvc, err := c.Core().PersistentVolumeClaims(ns).Get(pvc.Name, metav1.GetOptions{}) Expect(apierrs.IsNotFound(err)).To(BeTrue()) // Wait for the PV's phase to return to Available framework.Logf("Waiting for recycling process to complete.") err = framework.WaitForPersistentVolumePhase(expctPVPhase, c, pv.Name, 1*time.Second, 300*time.Second) Expect(err).NotTo(HaveOccurred()) // examine the pv's ClaimRef and UID and compare to expected values pv, err = c.Core().PersistentVolumes().Get(pv.Name, metav1.GetOptions{}) Expect(err).NotTo(HaveOccurred()) cr := pv.Spec.ClaimRef if expctPVPhase == v1.VolumeAvailable { if cr != nil { // may be ok if cr != nil Expect(len(cr.UID)).To(BeZero()) } } else if expctPVPhase == v1.VolumeBound { Expect(cr).NotTo(BeNil()) Expect(len(cr.UID)).NotTo(BeZero()) } framework.Logf("PV %v now in %q phase", pv.Name, expctPVPhase) }
// Delete the passed in pod. func deletePod(f *framework.Framework, c *client.Client, ns string, pod *api.Pod) error { framework.Logf("Deleting pod %v", pod.Name) err := c.Pods(ns).Delete(pod.Name, nil) if err != nil { return fmt.Errorf("Pod %v encountered a delete error: %v", pod.Name, err) } // Wait for pod to terminate err = f.WaitForPodTerminated(pod.Name, "") if err != nil && !apierrs.IsNotFound(err) { return fmt.Errorf("Pod %v will not teminate: %v", pod.Name, err) } // Re-get the pod to double check that it has been deleted; expect err // Note: Get() writes a log error if the pod is not found _, err = c.Pods(ns).Get(pod.Name) if err == nil { return fmt.Errorf("Pod %v has been deleted but able to re-Get the deleted pod", pod.Name) } if !apierrs.IsNotFound(err) { return fmt.Errorf("Pod %v has been deleted but still exists: %v", pod.Name, err) } framework.Logf("Ignore \"not found\" error above. Pod %v successfully deleted", pod.Name) return nil }
// Stop attempts to gracefully stop `kubectl port-forward`, only killing it if necessary. // This helps avoid spdy goroutine leaks in the Kubelet. func (c *portForwardCommand) Stop() { // SIGINT signals that kubectl port-forward should gracefully terminate if err := c.cmd.Process.Signal(syscall.SIGINT); err != nil { framework.Logf("error sending SIGINT to kubectl port-forward: %v", err) } // try to wait for a clean exit done := make(chan error) go func() { done <- c.cmd.Wait() }() expired := time.NewTimer(wait.ForeverTestTimeout) defer expired.Stop() select { case err := <-done: if err == nil { // success return } framework.Logf("error waiting for kubectl port-forward to exit: %v", err) case <-expired.C: framework.Logf("timed out waiting for kubectl port-forward to exit") } framework.Logf("trying to forcibly kill kubectl port-forward") framework.TryKill(c.cmd) }
func (r *ResourceCollector) LogLatest() { summary, err := r.GetLatest() if err != nil { framework.Logf("%v", err) } framework.Logf("%s", formatResourceUsageStats(summary)) }
// createSecret creates a secret containing TLS certificates for the given Ingress. // If a secret with the same name already exists in the namespace of the // Ingress, it's updated. func createSecret(kubeClient clientset.Interface, ing *extensions.Ingress) (host string, rootCA, privKey []byte, err error) { var k, c bytes.Buffer tls := ing.Spec.TLS[0] host = strings.Join(tls.Hosts, ",") framework.Logf("Generating RSA cert for host %v", host) if err = generateRSACerts(host, true, &k, &c); err != nil { return } cert := c.Bytes() key := k.Bytes() secret := &v1.Secret{ ObjectMeta: v1.ObjectMeta{ Name: tls.SecretName, }, Data: map[string][]byte{ v1.TLSCertKey: cert, v1.TLSPrivateKeyKey: key, }, } var s *v1.Secret if s, err = kubeClient.Core().Secrets(ing.Namespace).Get(tls.SecretName, metav1.GetOptions{}); err == nil { // TODO: Retry the update. We don't really expect anything to conflict though. framework.Logf("Updating secret %v in ns %v with hosts %v for ingress %v", secret.Name, secret.Namespace, host, ing.Name) s.Data = secret.Data _, err = kubeClient.Core().Secrets(ing.Namespace).Update(s) } else { framework.Logf("Creating secret %v in ns %v with hosts %v for ingress %v", secret.Name, secret.Namespace, host, ing.Name) _, err = kubeClient.Core().Secrets(ing.Namespace).Create(secret) } return host, cert, key, err }
func deploymentReachedCompletion(dc *deployapi.DeploymentConfig, rcs []kapi.ReplicationController, pods []kapi.Pod) (bool, error) { if len(rcs) == 0 { return false, nil } rc := rcs[len(rcs)-1] version := deployutil.DeploymentVersionFor(&rc) if version != dc.Status.LatestVersion { return false, nil } status := rc.Annotations[deployapi.DeploymentStatusAnnotation] if deployapi.DeploymentStatus(status) != deployapi.DeploymentStatusComplete { return false, nil } expectedReplicas := dc.Spec.Replicas if dc.Spec.Test { expectedReplicas = 0 } if rc.Spec.Replicas != int32(expectedReplicas) { return false, fmt.Errorf("deployment is complete but doesn't have expected spec replicas: %d %d", rc.Spec.Replicas, expectedReplicas) } if rc.Status.Replicas != int32(expectedReplicas) { e2e.Logf("POSSIBLE_ANOMALY: deployment is complete but doesn't have expected status replicas: %d %d", rc.Status.Replicas, expectedReplicas) return false, nil } e2e.Logf("Latest rollout of dc/%s (rc/%s) is complete.", dc.Name, rc.Name) return true, nil }
// gcloudList unmarshals json output of gcloud into given out interface. func gcloudList(resource, regex, project string, out interface{}) { // gcloud prints a message to stderr if it has an available update // so we only look at stdout. command := []string{ "compute", resource, "list", fmt.Sprintf("--regexp=%v", regex), fmt.Sprintf("--project=%v", project), "-q", "--format=json", } output, err := exec.Command("gcloud", command...).Output() if err != nil { errCode := -1 errMsg := "" if exitErr, ok := err.(utilexec.ExitError); ok { errCode = exitErr.ExitStatus() errMsg = exitErr.Error() if osExitErr, ok := err.(*exec.ExitError); ok { errMsg = fmt.Sprintf("%v, stderr %v", errMsg, string(osExitErr.Stderr)) } } framework.Logf("Error running gcloud command 'gcloud %s': err: %v, output: %v, status: %d, msg: %v", strings.Join(command, " "), err, string(output), errCode, errMsg) } if err := json.Unmarshal([]byte(output), out); err != nil { framework.Logf("Error unmarshalling gcloud output for %v: %v, output: %v", resource, err, string(output)) } }
// updates labels of nodes given by nodeNames. // In case a given label already exists, it overwrites it. If label to remove doesn't exist // it silently ignores it. // TODO: migrate to use framework.AddOrUpdateLabelOnNode/framework.RemoveLabelOffNode func updateNodeLabels(c clientset.Interface, nodeNames sets.String, toAdd, toRemove map[string]string) { const maxRetries = 5 for nodeName := range nodeNames { var node *v1.Node var err error for i := 0; i < maxRetries; i++ { node, err = c.Core().Nodes().Get(nodeName, metav1.GetOptions{}) if err != nil { framework.Logf("Error getting node %s: %v", nodeName, err) continue } if toAdd != nil { for k, v := range toAdd { node.ObjectMeta.Labels[k] = v } } if toRemove != nil { for k := range toRemove { delete(node.ObjectMeta.Labels, k) } } _, err = c.Core().Nodes().Update(node) if err != nil { framework.Logf("Error updating node %s: %v", nodeName, err) } else { break } } Expect(err).NotTo(HaveOccurred()) } }
// createIngress creates the Ingress and associated service/rc. // Required: ing.yaml, rc.yaml, svc.yaml must exist in manifestPath // Optional: secret.yaml, ingAnnotations // If ingAnnotations is specified it will overwrite any annotations in ing.yaml func (j *testJig) createIngress(manifestPath, ns string, ingAnnotations map[string]string) { mkpath := func(file string) string { return filepath.Join(framework.TestContext.RepoRoot, manifestPath, file) } framework.Logf("creating replication controller") framework.RunKubectlOrDie("create", "-f", mkpath("rc.yaml"), fmt.Sprintf("--namespace=%v", ns)) framework.Logf("creating service") framework.RunKubectlOrDie("create", "-f", mkpath("svc.yaml"), fmt.Sprintf("--namespace=%v", ns)) if exists(mkpath("secret.yaml")) { framework.Logf("creating secret") framework.RunKubectlOrDie("create", "-f", mkpath("secret.yaml"), fmt.Sprintf("--namespace=%v", ns)) } j.ing = ingFromManifest(mkpath("ing.yaml")) j.ing.Namespace = ns j.ing.Annotations = map[string]string{ingressClass: j.class} for k, v := range ingAnnotations { j.ing.Annotations[k] = v } framework.Logf(fmt.Sprintf("creating" + j.ing.Name + " ingress")) var err error j.ing, err = j.client.Extensions().Ingresses(ns).Create(j.ing) framework.ExpectNoError(err) }
func verifyPDContentsViaContainer(f *framework.Framework, podName, containerName string, fileAndContentToVerify map[string]string) { for filePath, expectedContents := range fileAndContentToVerify { var value string // Add a retry to avoid temporal failure in reading the content for i := 0; i < maxReadRetry; i++ { v, err := f.ReadFileViaContainer(podName, containerName, filePath) value = v if err != nil { framework.Logf("Error reading file: %v", err) } framework.ExpectNoError(err) framework.Logf("Read file %q with content: %v (iteration %d)", filePath, v, i) if strings.TrimSpace(v) != strings.TrimSpace(expectedContents) { framework.Logf("Warning: read content <%q> does not match execpted content <%q>.", v, expectedContents) size, err := f.CheckFileSizeViaContainer(podName, containerName, filePath) if err != nil { framework.Logf("Error checking file size: %v", err) } framework.Logf("Check file %q size: %q", filePath, size) } else { break } } Expect(strings.TrimSpace(value)).To(Equal(strings.TrimSpace(expectedContents))) } }
func (j *testJig) waitForIngress() { // Wait for the loadbalancer IP. address, err := framework.WaitForIngressAddress(j.client, j.ing.Namespace, j.ing.Name, lbPollTimeout) if err != nil { framework.Failf("Ingress failed to acquire an IP address within %v", lbPollTimeout) } j.address = address framework.Logf("Found address %v for ingress %v", j.address, j.ing.Name) timeoutClient := &http.Client{Timeout: reqTimeout} // Check that all rules respond to a simple GET. for _, rules := range j.ing.Spec.Rules { proto := "http" if len(j.ing.Spec.TLS) > 0 { knownHosts := sets.NewString(j.ing.Spec.TLS[0].Hosts...) if knownHosts.Has(rules.Host) { timeoutClient.Transport, err = buildTransport(rules.Host, j.getRootCA(j.ing.Spec.TLS[0].SecretName)) framework.ExpectNoError(err) proto = "https" } } for _, p := range rules.IngressRuleValue.HTTP.Paths { j.curlServiceNodePort(j.ing.Namespace, p.Backend.ServiceName, int(p.Backend.ServicePort.IntVal)) route := fmt.Sprintf("%v://%v%v", proto, address, p.Path) framework.Logf("Testing route %v host %v with simple GET", route, rules.Host) framework.ExpectNoError(pollURL(route, rules.Host, lbPollTimeout, j.pollInterval, timeoutClient, false)) } } }
func (p *statefulSetTester) waitForRunning(numPets int32, ps *apps.StatefulSet, shouldBeReady bool) { pollErr := wait.PollImmediate(statefulsetPoll, statefulsetTimeout, func() (bool, error) { podList := p.getPodList(ps) if int32(len(podList.Items)) < numPets { framework.Logf("Found %d stateful pods, waiting for %d", len(podList.Items), numPets) return false, nil } if int32(len(podList.Items)) > numPets { return false, fmt.Errorf("Too many pods scheduled, expected %d got %d", numPets, len(podList.Items)) } for _, p := range podList.Items { isReady := v1.IsPodReady(&p) desiredReadiness := shouldBeReady == isReady framework.Logf("Waiting for pod %v to enter %v - Ready=%v, currently %v - Ready=%v", p.Name, v1.PodRunning, shouldBeReady, p.Status.Phase, isReady) if p.Status.Phase != v1.PodRunning || !desiredReadiness { return false, nil } } return true, nil }) if pollErr != nil { framework.Failf("Failed waiting for pods to enter running: %v", pollErr) } }
func nodeUpgradeGCE(rawV string) error { // TODO(ihmccreery) This code path should be identical to how a user // would trigger a node update; right now it's very different. v := "v" + rawV framework.Logf("Getting the node template before the upgrade") tmplBefore, err := migTemplate() if err != nil { return fmt.Errorf("error getting the node template before the upgrade: %v", err) } framework.Logf("Preparing node upgrade by creating new instance template for %q", v) stdout, _, err := runCmd(path.Join(framework.TestContext.RepoRoot, "cluster/gce/upgrade.sh"), "-P", v) if err != nil { cleanupNodeUpgradeGCE(tmplBefore) return fmt.Errorf("error preparing node upgrade: %v", err) } tmpl := strings.TrimSpace(stdout) framework.Logf("Performing a node upgrade to %q; waiting at most %v per node", tmpl, restartPerNodeTimeout) if err := migRollingUpdate(tmpl, restartPerNodeTimeout); err != nil { cleanupNodeUpgradeGCE(tmplBefore) return fmt.Errorf("error doing node upgrade via a migRollingUpdate to %s: %v", tmpl, err) } return nil }
func createClusterObjectOrFail(f *framework.Framework, context *framework.E2EContext) { framework.Logf("Creating cluster object: %s (%s, secret: %s)", context.Name, context.Cluster.Cluster.Server, context.Name) cluster := federationapi.Cluster{ ObjectMeta: v1.ObjectMeta{ Name: context.Name, }, Spec: federationapi.ClusterSpec{ ServerAddressByClientCIDRs: []federationapi.ServerAddressByClientCIDR{ { ClientCIDR: "0.0.0.0/0", ServerAddress: context.Cluster.Cluster.Server, }, }, SecretRef: &v1.LocalObjectReference{ // Note: Name must correlate with federation build script secret name, // which currently matches the cluster name. // See federation/cluster/common.sh:132 Name: context.Name, }, }, } _, err := f.FederationClientset_1_5.Federation().Clusters().Create(&cluster) framework.ExpectNoError(err, fmt.Sprintf("creating cluster: %+v", err)) framework.Logf("Successfully created cluster object: %s (%s, secret: %s)", context.Name, context.Cluster.Cluster.Server, context.Name) }
func validate(f *framework.Framework, svcNameWant, rcNameWant string, ingress api.LoadBalancerIngress, podsWant int) error { framework.Logf("Beginning cluster validation") // Verify RC. rcs, err := f.Client.ReplicationControllers(f.Namespace.Name).List(api.ListOptions{}) if err != nil { return fmt.Errorf("error listing RCs: %v", err) } if len(rcs.Items) != 1 { return fmt.Errorf("wanted 1 RC with name %s, got %d", rcNameWant, len(rcs.Items)) } if got := rcs.Items[0].Name; got != rcNameWant { return fmt.Errorf("wanted RC name %q, got %q", rcNameWant, got) } // Verify pods. if err := framework.VerifyPods(f.Client, f.Namespace.Name, rcNameWant, false, podsWant); err != nil { return fmt.Errorf("failed to find %d %q pods: %v", podsWant, rcNameWant, err) } // Verify service. svc, err := f.Client.Services(f.Namespace.Name).Get(svcNameWant) if err != nil { return fmt.Errorf("error getting service %s: %v", svcNameWant, err) } if svcNameWant != svc.Name { return fmt.Errorf("wanted service name %q, got %q", svcNameWant, svc.Name) } // TODO(mikedanese): Make testLoadBalancerReachable return an error. testLoadBalancerReachable(ingress, 80) framework.Logf("Cluster validation succeeded") return nil }
func verifyMemoryLimits(c clientset.Interface, expected framework.ResourceUsagePerContainer, actual framework.ResourceUsagePerNode) { if expected == nil { return } var errList []string for nodeName, nodeSummary := range actual { var nodeErrs []string for cName, expectedResult := range expected { container, ok := nodeSummary[cName] if !ok { nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: missing", cName)) continue } expectedValue := expectedResult.MemoryRSSInBytes actualValue := container.MemoryRSSInBytes if expectedValue != 0 && actualValue > expectedValue { nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: expected RSS memory (MB) < %d; got %d", cName, expectedValue, actualValue)) } } if len(nodeErrs) > 0 { errList = append(errList, fmt.Sprintf("node %v:\n %s", nodeName, strings.Join(nodeErrs, ", "))) heapStats, err := framework.GetKubeletHeapStats(c, nodeName) if err != nil { framework.Logf("Unable to get heap stats from %q", nodeName) } else { framework.Logf("Heap stats on %q\n:%v", nodeName, heapStats) } } } if len(errList) > 0 { framework.Failf("Memory usage exceeding limits:\n %s", strings.Join(errList, "\n")) } }
// migRollingUpdatePoll (CKE/GKE-only) polls the progress of the MIG rolling // update with ID id until it is complete. It returns an error if this takes // longer than nt times the number of nodes. func migRollingUpdatePoll(id string, nt time.Duration) error { // Two keys and a val. status, progress, done := "status", "statusMessage", "ROLLED_OUT" start, timeout := time.Now(), nt*time.Duration(framework.TestContext.CloudConfig.NumNodes) var errLast error framework.Logf("Waiting up to %v for MIG rolling update to complete.", timeout) if wait.Poll(restartPoll, timeout, func() (bool, error) { // A `rolling-updates describe` call outputs what we want to stdout. output, _, err := retryCmd("gcloud", "alpha", "compute", "rolling-updates", fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID), fmt.Sprintf("--zone=%s", framework.TestContext.CloudConfig.Zone), "describe", id) if err != nil { errLast = fmt.Errorf("Error calling rolling-updates describe %s: %v", id, err) framework.Logf("%v", errLast) return false, nil } // The 'describe' call probably succeeded; parse the output and try to // find the line that looks like "status: <status>" and see whether it's // done. framework.Logf("Waiting for MIG rolling update: %s (%v elapsed)", framework.ParseKVLines(output, progress), time.Since(start)) if st := framework.ParseKVLines(output, status); st == done { return true, nil } return false, nil }) != nil { return fmt.Errorf("timeout waiting %v for MIG rolling update to complete. Last error: %v", timeout, errLast) } framework.Logf("MIG rolling update complete after %v", time.Since(start)) return nil }
// setupProviderConfig validates and sets up cloudConfig based on framework.TestContext.Provider. func setupProviderConfig() error { switch framework.TestContext.Provider { case "": glog.Info("The --provider flag is not set. Treating as a conformance test. Some tests may not be run.") case "gce", "gke": var err error framework.Logf("Fetching cloud provider for %q\r\n", framework.TestContext.Provider) var tokenSource oauth2.TokenSource tokenSource = nil if cloudConfig.ServiceAccount != "" { // Use specified service account for auth framework.Logf("Using service account %q as token source.", cloudConfig.ServiceAccount) tokenSource = google.ComputeTokenSource(cloudConfig.ServiceAccount) } zone := framework.TestContext.CloudConfig.Zone region, err := gcecloud.GetGCERegion(zone) if err != nil { return fmt.Errorf("error parsing GCE/GKE region from zone %q: %v", zone, err) } managedZones := []string{zone} // Only single-zone for now cloudConfig.Provider, err = gcecloud.CreateGCECloud(framework.TestContext.CloudConfig.ProjectID, region, zone, managedZones, "" /* networkUrl */, nil /* nodeTags */, "" /* nodeInstancePerfix */, tokenSource, false /* useMetadataServer */) if err != nil { return fmt.Errorf("Error building GCE/GKE provider: %v", err) } case "aws": if cloudConfig.Zone == "" { return fmt.Errorf("gce-zone must be specified for AWS") } } return nil }
// dialFromNode executes a tcp or udp request based on protocol via kubectl exec // in a test container running with host networking. // - minTries is the minimum number of curl attempts required before declaring // success. Set to 0 if you'd like to return as soon as all endpoints respond // at least once. // - maxTries is the maximum number of curl attempts. If this many attempts pass // and we don't see all expected endpoints, the test fails. // maxTries == minTries will confirm that we see the expected endpoints and no // more for maxTries. Use this if you want to eg: fail a readiness check on a // pod and confirm it doesn't show up as an endpoint. func (config *NetworkingTestConfig) dialFromNode(protocol, targetIP string, targetPort, maxTries, minTries int, expectedEps sets.String) { var cmd string if protocol == "udp" { cmd = fmt.Sprintf("echo 'hostName' | timeout -t 3 nc -w 1 -u %s %d", targetIP, targetPort) } else { cmd = fmt.Sprintf("curl -q -s --connect-timeout 1 http://%s:%d/hostName", targetIP, targetPort) } // TODO: This simply tells us that we can reach the endpoints. Check that // the probability of hitting a specific endpoint is roughly the same as // hitting any other. eps := sets.NewString() filterCmd := fmt.Sprintf("%s | grep -v '^\\s*$'", cmd) for i := 0; i < maxTries; i++ { stdout, err := framework.RunHostCmd(config.ns, config.hostTestContainerPod.Name, filterCmd) if err != nil { // A failure to kubectl exec counts as a try, not a hard fail. // Also note that we will keep failing for maxTries in tests where // we confirm unreachability. framework.Logf("Failed to execute %v: %v", filterCmd, err) } else { eps.Insert(strings.TrimSpace(stdout)) } framework.Logf("Waiting for %+v endpoints, got endpoints %+v", expectedEps.Difference(eps), eps) // Check against i+1 so we exit if minTries == maxTries. if (eps.Equal(expectedEps) || eps.Len() == 0 && expectedEps.Len() == 0) && i+1 >= minTries { return } } config.diagnoseMissingEndpoints(eps) framework.Failf("Failed to find expected endpoints:\nTries %d\nCommand %v\nretrieved %v\nexpected %v\n", minTries, cmd, eps, expectedEps) }