func waitForPodRunning(c *client.Client, pod *api.Pod, out io.Writer) (status api.PodPhase, err error) { for { pod, err := c.Pods(pod.Namespace).Get(pod.Name) if err != nil { return api.PodUnknown, err } ready := false if pod.Status.Phase == api.PodRunning { ready = true for _, status := range pod.Status.ContainerStatuses { if !status.Ready { ready = false break } } if ready { return api.PodRunning, nil } } if pod.Status.Phase == api.PodSucceeded || pod.Status.Phase == api.PodFailed { return pod.Status.Phase, nil } fmt.Fprintf(out, "Waiting for pod %s/%s to be running, status is %s, pod ready: %v\n", pod.Namespace, pod.Name, pod.Status.Phase, ready) time.Sleep(2 * time.Second) continue } }
// testHostIP tests that a pod gets a host IP func testHostIP(c *client.Client, ns string, pod *api.Pod) { podClient := c.Pods(ns) By("creating pod") defer podClient.Delete(pod.Name, api.NewDeleteOptions(0)) if _, err := podClient.Create(pod); err != nil { Failf("Failed to create pod: %v", err) } By("ensuring that pod is running and has a hostIP") // Wait for the pods to enter the running state. Waiting loops until the pods // are running so non-running pods cause a timeout for this test. err := waitForPodRunningInNamespace(c, pod.Name, ns) Expect(err).NotTo(HaveOccurred()) // Try to make sure we get a hostIP for each pod. hostIPTimeout := 2 * time.Minute t := time.Now() for { p, err := podClient.Get(pod.Name) Expect(err).NotTo(HaveOccurred()) if p.Status.HostIP != "" { Logf("Pod %s has hostIP: %s", p.Name, p.Status.HostIP) break } if time.Since(t) >= hostIPTimeout { Failf("Gave up waiting for hostIP of pod %s after %v seconds", p.Name, time.Since(t).Seconds()) } Logf("Retrying to get the hostIP of pod %s", p.Name) time.Sleep(5 * time.Second) } }
// Retrieves metrics information. func getMetrics(c *client.Client) (string, error) { body, err := c.Get().AbsPath("/metrics").DoRaw() if err != nil { return "", err } return string(body), nil }
func CreateNewControllerFromCurrentController(c *client.Client, namespace, oldName, newName, image, deploymentKey string) (*api.ReplicationController, error) { // load the old RC into the "new" RC newRc, err := c.ReplicationControllers(namespace).Get(oldName) if err != nil { return nil, err } if len(newRc.Spec.Template.Spec.Containers) > 1 { // TODO: support multi-container image update. return nil, goerrors.New("Image update is not supported for multi-container pods") } if len(newRc.Spec.Template.Spec.Containers) == 0 { return nil, goerrors.New(fmt.Sprintf("Pod has no containers! (%v)", newRc)) } newRc.Spec.Template.Spec.Containers[0].Image = image newHash, err := api.HashObject(newRc, c.Codec) if err != nil { return nil, err } if len(newName) == 0 { newName = fmt.Sprintf("%s-%s", newRc.Name, newHash) } newRc.Name = newName newRc.Spec.Selector[deploymentKey] = newHash newRc.Spec.Template.Labels[deploymentKey] = newHash // Clear resource version after hashing so that identical updates get different hashes. newRc.ResourceVersion = "" return newRc, nil }
func CheckCadvisorHealthOnAllNodes(c *client.Client, timeout time.Duration) { By("getting list of nodes") nodeList, err := c.Nodes().List(labels.Everything(), fields.Everything()) expectNoError(err) var errors []error retries := maxRetries for { errors = []error{} for _, node := range nodeList.Items { // cadvisor is not accessible directly unless its port (4194 by default) is exposed. // Here, we access '/stats/' REST endpoint on the kubelet which polls cadvisor internally. statsResource := fmt.Sprintf("api/v1/proxy/nodes/%s/stats/", node.Name) By(fmt.Sprintf("Querying stats from node %s using url %s", node.Name, statsResource)) _, err = c.Get().AbsPath(statsResource).Timeout(timeout).Do().Raw() if err != nil { errors = append(errors, err) } } if len(errors) == 0 { return } if retries--; retries <= 0 { break } Logf("failed to retrieve kubelet stats -\n %v", errors) time.Sleep(sleepDuration) } Failf("Failed after retrying %d times for cadvisor to be healthy on all nodes. Errors:\n%v", maxRetries, errors) }
func runMasterServiceTest(client *client.Client) { time.Sleep(12 * time.Second) svcList, err := client.Services(api.NamespaceDefault).List(labels.Everything()) if err != nil { glog.Fatalf("unexpected error listing services: %v", err) } var foundRW bool found := sets.String{} for i := range svcList.Items { found.Insert(svcList.Items[i].Name) if svcList.Items[i].Name == "kubernetes" { foundRW = true } } if foundRW { ep, err := client.Endpoints(api.NamespaceDefault).Get("kubernetes") if err != nil { glog.Fatalf("unexpected error listing endpoints for kubernetes service: %v", err) } if countEndpoints(ep) == 0 { glog.Fatalf("no endpoints for kubernetes service: %v", ep) } } else { glog.Errorf("no RW service found: %v", found) glog.Fatal("Kubernetes service test failed") } glog.Infof("Master service test passed.") }
// StartPods check for numPods in TestNS. If they exist, it no-ops, otherwise it starts up // a temp rc, scales it to match numPods, then deletes the rc leaving behind the pods. func StartPods(numPods int, host string, restClient *client.Client) error { start := time.Now() defer func() { glog.Infof("StartPods took %v with numPods %d", time.Since(start), numPods) }() hostField := fields.OneTermEqualSelector(client.PodHost, host) pods, err := restClient.Pods(TestNS).List(labels.Everything(), hostField) if err != nil || len(pods.Items) == numPods { return err } glog.Infof("Found %d pods that match host %v, require %d", len(pods.Items), hostField, numPods) // For the sake of simplicity, assume all pods in TestNS have selectors matching TestRCManifest. controller := RCFromManifest(TestRCManifest) // Make the rc unique to the given host. controller.Spec.Replicas = numPods controller.Spec.Template.Spec.NodeName = host controller.Name = controller.Name + host controller.Spec.Selector["host"] = host controller.Spec.Template.Labels["host"] = host if rc, err := StartRC(controller, restClient); err != nil { return err } else { // Delete the rc, otherwise when we restart master components for the next benchmark // the rc controller will race with the pods controller in the rc manager. return restClient.ReplicationControllers(TestNS).Delete(rc.Name) } }
func runServiceAndRCForResourceConsumer(c *client.Client, ns, name string, replicas int, cpuLimitMillis, memLimitMb int64) { _, err := c.Services(ns).Create(&api.Service{ ObjectMeta: api.ObjectMeta{ Name: name, }, Spec: api.ServiceSpec{ Ports: []api.ServicePort{{ Port: port, TargetPort: util.NewIntOrStringFromInt(targetPort), }}, Selector: map[string]string{ "name": name, }, }, }) expectNoError(err) config := RCConfig{ Client: c, Image: image, Name: name, Namespace: ns, Timeout: timeoutRC, Replicas: replicas, CpuLimit: cpuLimitMillis, MemLimit: memLimitMb * 1024 * 1024, // MemLimit is in bytes } expectNoError(RunRC(config)) // Make sure endpoints are propagated. // TODO(piosz): replace sleep with endpoints watch. time.Sleep(10 * time.Second) }
func verifyExpectedRcsExistAndGetExpectedPods(c *client.Client) ([]string, error) { expectedPods := []string{} // Iterate over the labels that identify the replication controllers that we // want to check. The rcLabels contains the value values for the k8s-app key // that identify the replication controllers that we want to check. Using a label // rather than an explicit name is preferred because the names will typically have // a version suffix e.g. heapster-monitoring-v1 and this will change after a rolling // update e.g. to heapster-monitoring-v2. By using a label query we can check for the // situaiton when a heapster-monitoring-v1 and heapster-monitoring-v2 replication controller // is running (which would be an error except during a rolling update). for _, rcLabel := range rcLabels { rcList, err := c.ReplicationControllers(api.NamespaceSystem).List(labels.Set{"k8s-app": rcLabel}.AsSelector()) if err != nil { return nil, err } if len(rcList.Items) != 1 { return nil, fmt.Errorf("expected to find one replica for RC with label %s but got %d", rcLabel, len(rcList.Items)) } for _, rc := range rcList.Items { podList, err := c.Pods(api.NamespaceSystem).List(labels.Set(rc.Spec.Selector).AsSelector(), fields.Everything()) if err != nil { return nil, err } for _, pod := range podList.Items { if pod.DeletionTimestamp != nil { continue } expectedPods = append(expectedPods, string(pod.UID)) } } } return expectedPods, nil }
func podsCreated(c *client.Client, ns, name string, replicas int) (*api.PodList, error) { timeout := 2 * time.Minute // List the pods, making sure we observe all the replicas. label := labels.SelectorFromSet(labels.Set(map[string]string{"name": name})) for start := time.Now(); time.Since(start) < timeout; time.Sleep(5 * time.Second) { pods, err := c.Pods(ns).List(label, fields.Everything()) if err != nil { return nil, err } created := []api.Pod{} for _, pod := range pods.Items { if pod.DeletionTimestamp != nil { continue } created = append(created, pod) } Logf("Pod name %s: Found %d pods out of %d", name, len(created), replicas) if len(created) == replicas { pods.Items = created return pods, nil } } return nil, fmt.Errorf("Pod name %s: Gave up waiting %v for %d pods to come up", name, timeout, replicas) }
// Creates a replication controller that serves its hostname and a service on top of it. func startServeHostnameService(c *client.Client, ns, name string, port, replicas int) ([]string, string, error) { podNames := make([]string, replicas) By("creating service " + name + " in namespace " + ns) _, err := c.Services(ns).Create(&api.Service{ ObjectMeta: api.ObjectMeta{ Name: name, }, Spec: api.ServiceSpec{ Ports: []api.ServicePort{{ Port: port, TargetPort: util.NewIntOrStringFromInt(9376), Protocol: "TCP", }}, Selector: map[string]string{ "name": name, }, }, }) if err != nil { return podNames, "", err } var createdPods []*api.Pod maxContainerFailures := 0 config := RCConfig{ Client: c, Image: "gcr.io/google_containers/serve_hostname:1.1", Name: name, Namespace: ns, PollInterval: 3 * time.Second, Timeout: 30 * time.Second, Replicas: replicas, CreatedPods: &createdPods, MaxContainerFailures: &maxContainerFailures, } err = RunRC(config) if err != nil { return podNames, "", err } if len(createdPods) != replicas { return podNames, "", fmt.Errorf("Incorrect number of running pods: %v", len(createdPods)) } for i := range createdPods { podNames[i] = createdPods[i].ObjectMeta.Name } sort.StringSlice(podNames).Sort() service, err := c.Services(ns).Get(name) if err != nil { return podNames, "", err } if service.Spec.ClusterIP == "" { return podNames, "", fmt.Errorf("Service IP is blank for %v", name) } serviceIP := service.Spec.ClusterIP return podNames, serviceIP, nil }
func runSchedulerNoPhantomPodsTest(client *client.Client) { pod := &api.Pod{ Spec: api.PodSpec{ Containers: []api.Container{ { Name: "c1", Image: "kubernetes/pause", Ports: []api.ContainerPort{ {ContainerPort: 1234, HostPort: 9999}, }, ImagePullPolicy: api.PullIfNotPresent, }, }, }, } // Assuming we only have two kublets, the third pod here won't schedule // if the scheduler doesn't correctly handle the delete for the second // pod. pod.ObjectMeta.Name = "phantom.foo" foo, err := client.Pods(api.NamespaceDefault).Create(pod) if err != nil { glog.Fatalf("Failed to create pod: %v, %v", pod, err) } if err := wait.Poll(time.Second, time.Second*30, podRunning(client, foo.Namespace, foo.Name)); err != nil { glog.Fatalf("FAILED: pod never started running %v", err) } pod.ObjectMeta.Name = "phantom.bar" bar, err := client.Pods(api.NamespaceDefault).Create(pod) if err != nil { glog.Fatalf("Failed to create pod: %v, %v", pod, err) } if err := wait.Poll(time.Second, time.Second*30, podRunning(client, bar.Namespace, bar.Name)); err != nil { glog.Fatalf("FAILED: pod never started running %v", err) } // Delete a pod to free up room. glog.Infof("Deleting pod %v", bar.Name) err = client.Pods(api.NamespaceDefault).Delete(bar.Name, api.NewDeleteOptions(0)) if err != nil { glog.Fatalf("FAILED: couldn't delete pod %q: %v", bar.Name, err) } pod.ObjectMeta.Name = "phantom.baz" baz, err := client.Pods(api.NamespaceDefault).Create(pod) if err != nil { glog.Fatalf("Failed to create pod: %v, %v", pod, err) } if err := wait.Poll(time.Second, time.Second*60, podRunning(client, baz.Namespace, baz.Name)); err != nil { if pod, perr := client.Pods(api.NamespaceDefault).Get("phantom.bar"); perr == nil { glog.Fatalf("FAILED: 'phantom.bar' was never deleted: %#v", pod) } else { glog.Fatalf("FAILED: (Scheduler probably didn't process deletion of 'phantom.bar') Pod never started running: %v", err) } } glog.Info("Scheduler doesn't make phantom pods: test passed.") }
// Performs a get on a node proxy endpoint given the nodename and rest client. func nodeProxyRequest(c *client.Client, node, endpoint string) client.Result { return c.Get(). Prefix("proxy"). Resource("nodes"). Name(fmt.Sprintf("%v:%v", node, ports.KubeletPort)). Suffix(endpoint). Do() }
// Wait for job to reach completions. func waitForJobFinish(c *client.Client, ns, jobName string, completions int) error { return wait.Poll(poll, jobTimeout, func() (bool, error) { curr, err := c.Extensions().Jobs(ns).Get(jobName) if err != nil { return false, err } return curr.Status.Succeeded == completions, nil }) }
func newPodOnNode(c *client.Client, namespace, podName, nodeName string) error { pod, err := c.Pods(namespace).Create(podOnNode(podName, nodeName, serveHostnameImage)) if err == nil { Logf("Created pod %s on node %s", pod.ObjectMeta.Name, nodeName) } else { Logf("Failed to create pod %s on node %s: %v", podName, nodeName, err) } return err }
func cleanupPods(c *client.Client, ns string) { By("Removing all pods in namespace " + ns) pods, err := c.Pods(ns).List(labels.Everything(), fields.Everything()) expectNoError(err) opt := api.NewDeleteOptions(0) for _, p := range pods.Items { expectNoError(c.Pods(ns).Delete(p.ObjectMeta.Name, opt)) } }
func resizeRC(c *client.Client, ns, name string, replicas int) error { rc, err := c.ReplicationControllers(ns).Get(name) if err != nil { return err } rc.Spec.Replicas = replicas _, err = c.ReplicationControllers(rc.Namespace).Update(rc) return err }
func stopServeHostnameService(c *client.Client, ns, name string) error { if err := DeleteRC(c, ns, name); err != nil { return err } if err := c.Services(ns).Delete(name); err != nil { return err } return nil }
func pickNode(c *client.Client) (string, error) { nodes, err := c.Nodes().List(labels.Everything(), fields.Everything()) if err != nil { return "", err } if len(nodes.Items) == 0 { return "", fmt.Errorf("no nodes exist, can't test node proxy") } return nodes.Items[0].Name, nil }
func getAllNodesInCluster(c *client.Client) ([]string, error) { nodeList, err := c.Nodes().List(labels.Everything(), fields.Everything()) if err != nil { return nil, err } result := []string{} for _, node := range nodeList.Items { result = append(result, node.Name) } return result, nil }
// Resets latency metrics in apiserver. func resetMetrics(c *client.Client) error { Logf("Resetting latency metrics in apiserver...") body, err := c.Get().AbsPath("/resetMetrics").DoRaw() if err != nil { return err } if string(body) != "metrics reset\n" { return fmt.Errorf("Unexpected response: %q", string(body)) } return nil }
func getNodePublicIps(c *client.Client) ([]string, error) { nodes, err := c.Nodes().List(labels.Everything(), fields.Everything()) if err != nil { return nil, err } ips := collectAddresses(nodes, api.NodeExternalIP) if len(ips) == 0 { ips = collectAddresses(nodes, api.NodeLegacyHostIP) } return ips, nil }
func getReferencedServiceAccountToken(c *client.Client, ns string, name string, shouldWait bool) (string, string, error) { tokenName := "" token := "" findToken := func() (bool, error) { user, err := c.ServiceAccounts(ns).Get(name) if errors.IsNotFound(err) { return false, nil } if err != nil { return false, err } for _, ref := range user.Secrets { secret, err := c.Secrets(ns).Get(ref.Name) if errors.IsNotFound(err) { continue } if err != nil { return false, err } if secret.Type != api.SecretTypeServiceAccountToken { continue } name := secret.Annotations[api.ServiceAccountNameKey] uid := secret.Annotations[api.ServiceAccountUIDKey] tokenData := secret.Data[api.ServiceAccountTokenKey] if name == user.Name && uid == string(user.UID) && len(tokenData) > 0 { tokenName = secret.Name token = string(tokenData) return true, nil } } return false, nil } if shouldWait { err := wait.Poll(time.Second, 10*time.Second, findToken) if err != nil { return "", "", err } } else { ok, err := findToken() if err != nil { return "", "", err } if !ok { return "", "", fmt.Errorf("No token found for %s/%s", ns, name) } } return tokenName, token, nil }
// StartRC creates given rc if it doesn't already exist, then updates it via kubectl's scaler. func StartRC(controller *api.ReplicationController, restClient *client.Client) (*api.ReplicationController, error) { created, err := restClient.ReplicationControllers(controller.Namespace).Get(controller.Name) if err != nil { glog.Infof("Rc %v doesn't exist, creating", controller.Name) created, err = restClient.ReplicationControllers(controller.Namespace).Create(controller) if err != nil { return nil, err } } // If we just created an rc, wait till it creates its replicas. return ScaleRC(created.Name, created.Namespace, controller.Spec.Replicas, restClient) }
// getContainerRestarts returns the count of container restarts across all pods matching the given labelSelector, // and a list of nodenames across which these containers restarted. func getContainerRestarts(c *client.Client, ns string, labelSelector labels.Selector) (int, []string) { pods, err := c.Pods(ns).List(labelSelector, fields.Everything()) expectNoError(err) failedContainers := 0 containerRestartNodes := sets.NewString() for _, p := range pods.Items { for _, v := range FailedContainers(&p) { failedContainers = failedContainers + v.restarts containerRestartNodes.Insert(p.Spec.NodeName) } } return failedContainers, containerRestartNodes.List() }
func translatePodNameToUIDOrFail(c *client.Client, ns string, expectedEndpoints PortsByPodName) PortsByPodUID { portsByUID := make(PortsByPodUID) for name, portList := range expectedEndpoints { pod, err := c.Pods(ns).Get(name) if err != nil { Failf("failed to get pod %s, that's pretty weird. validation failed: %s", name, err) } portsByUID[pod.ObjectMeta.UID] = portList } // Logf("successfully translated pod names to UIDs: %v -> %v on namespace %s", expectedEndpoints, portsByUID, ns) return portsByUID }
func clearDaemonSetNodeLabels(c *client.Client) error { nodeClient := c.Nodes() nodeList, err := nodeClient.List(labels.Everything(), fields.Everything()) if err != nil { return err } for _, node := range nodeList.Items { _, err := setDaemonSetNodeLabels(c, node.Name, map[string]string{}) if err != nil { return err } } return nil }
func makeRequestToGuestbook(c *client.Client, cmd, value string, ns string) (string, error) { result, err := c.Get(). Prefix("proxy"). Namespace(ns). Resource("services"). Name("frontend"). Suffix("/guestbook.php"). Param("cmd", cmd). Param("key", "messages"). Param("value", value). Do(). Raw() return string(result), err }
func handleAttachReplicationController(c *client.Client, controller *api.ReplicationController, opts *AttachOptions) error { var pods *api.PodList for pods == nil || len(pods.Items) == 0 { var err error if pods, err = c.Pods(controller.Namespace).List(labels.SelectorFromSet(controller.Spec.Selector), fields.Everything()); err != nil { return err } if len(pods.Items) == 0 { fmt.Fprint(opts.Out, "Waiting for pod to be scheduled\n") time.Sleep(2 * time.Second) } } pod := &pods.Items[0] return handleAttachPod(c, pod, opts) }
func runAPIVersionsTest(c *client.Client) { v, err := c.ServerAPIVersions() clientVersion := c.APIVersion() if err != nil { glog.Fatalf("failed to get api versions: %v", err) } // Verify that the server supports the API version used by the client. for _, version := range v.Versions { if version == clientVersion { glog.Infof("Version test passed") return } } glog.Fatalf("Server does not support APIVersion used by client. Server supported APIVersions: '%v', client APIVersion: '%v'", v.Versions, clientVersion) }