// readTransactions reads # of transactions from the k8petstore web server endpoint. // for more details see the source of the k8petstore web server. func readTransactions(c clientset.Interface, ns string) (error, int) { proxyRequest, errProxy := framework.GetServicesProxyRequest(c, c.Core().RESTClient().Get()) if errProxy != nil { return errProxy, -1 } ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout) defer cancel() body, err := proxyRequest.Namespace(ns). Context(ctx). Name("frontend"). Suffix("llen"). DoRaw() if err != nil { if ctx.Err() != nil { framework.Failf("Failed to read petstore transactions: %v", err) } return err, -1 } totalTrans, err := strconv.Atoi(string(body)) return err, totalTrans }
// ClusterLevelLoggingWithKibana is an end to end test that checks to see if Kibana is alive. func ClusterLevelLoggingWithKibana(f *framework.Framework) { // graceTime is how long to keep retrying requests for status information. const graceTime = 20 * time.Minute // Check for the existence of the Kibana service. By("Checking the Kibana service exists.") s := f.ClientSet.Core().Services(api.NamespaceSystem) // Make a few attempts to connect. This makes the test robust against // being run as the first e2e test just after the e2e cluster has been created. var err error for start := time.Now(); time.Since(start) < graceTime; time.Sleep(5 * time.Second) { if _, err = s.Get("kibana-logging", metav1.GetOptions{}); err == nil { break } framework.Logf("Attempt to check for the existence of the Kibana service failed after %v", time.Since(start)) } Expect(err).NotTo(HaveOccurred()) // Wait for the Kibana pod(s) to enter the running state. By("Checking to make sure the Kibana pods are running") label := labels.SelectorFromSet(labels.Set(map[string]string{kibanaKey: kibanaValue})) options := v1.ListOptions{LabelSelector: label.String()} pods, err := f.ClientSet.Core().Pods(api.NamespaceSystem).List(options) Expect(err).NotTo(HaveOccurred()) for _, pod := range pods.Items { err = framework.WaitForPodRunningInNamespace(f.ClientSet, &pod) Expect(err).NotTo(HaveOccurred()) } By("Checking to make sure we get a response from the Kibana UI.") err = nil for start := time.Now(); time.Since(start) < graceTime; time.Sleep(5 * time.Second) { proxyRequest, errProxy := framework.GetServicesProxyRequest(f.ClientSet, f.ClientSet.Core().RESTClient().Get()) if errProxy != nil { framework.Logf("After %v failed to get services proxy request: %v", time.Since(start), errProxy) err = errProxy continue } ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout) defer cancel() // Query against the root URL for Kibana. _, err = proxyRequest.Namespace(api.NamespaceSystem). Context(ctx). Name("kibana-logging"). DoRaw() if err != nil { if ctx.Err() != nil { framework.Failf("After %v proxy call to kibana-logging failed: %v", time.Since(start), err) break } framework.Logf("After %v proxy call to kibana-logging failed: %v", time.Since(start), err) continue } break } Expect(err).NotTo(HaveOccurred()) }
// sendOneConsumeMemRequest sends POST request for memory consumption func (rc *ResourceConsumer) sendOneConsumeMemRequest(megabytes int, durationSec int) { defer GinkgoRecover() proxyRequest, err := framework.GetServicesProxyRequest(rc.framework.Client, rc.framework.Client.Post()) framework.ExpectNoError(err) _, err = proxyRequest.Namespace(rc.framework.Namespace.Name). Name(rc.name). Suffix("ConsumeMem"). Param("megabytes", strconv.Itoa(megabytes)). Param("durationSec", strconv.Itoa(durationSec)). DoRaw() framework.ExpectNoError(err) }
// sendConsumeMemRequest sends POST request for memory consumption func (rc *ResourceConsumer) sendConsumeMemRequest(megabytes int) { proxyRequest, err := framework.GetServicesProxyRequest(rc.framework.ClientSet, rc.framework.ClientSet.Core().RESTClient().Post()) framework.ExpectNoError(err) req := proxyRequest.Namespace(rc.framework.Namespace.Name). Name(rc.controllerName). Suffix("ConsumeMem"). Param("megabytes", strconv.Itoa(megabytes)). Param("durationSec", strconv.Itoa(rc.consumptionTimeInSeconds)). Param("requestSizeMegabytes", strconv.Itoa(rc.requestSizeInMegabytes)) framework.Logf("URL: %v", *req.URL()) _, err = req.DoRaw() framework.ExpectNoError(err) }
// sendOneConsumeCustomMetric sends POST request for custom metric consumption func (rc *ResourceConsumer) sendOneConsumeCustomMetric(delta int, durationSec int) { defer GinkgoRecover() proxyRequest, err := framework.GetServicesProxyRequest(rc.framework.Client, rc.framework.Client.Post()) framework.ExpectNoError(err) _, err = proxyRequest.Namespace(rc.framework.Namespace.Name). Name(rc.name). Suffix("BumpMetric"). Param("metric", customMetricName). Param("delta", strconv.Itoa(delta)). Param("durationSec", strconv.Itoa(durationSec)). DoRaw() framework.ExpectNoError(err) }
// sendConsumeCustomMetric sends POST request for custom metric consumption func (rc *ResourceConsumer) sendConsumeCustomMetric(delta int) { proxyRequest, err := framework.GetServicesProxyRequest(rc.framework.ClientSet, rc.framework.ClientSet.Core().RESTClient().Post()) framework.ExpectNoError(err) req := proxyRequest.Namespace(rc.framework.Namespace.Name). Name(rc.controllerName). Suffix("BumpMetric"). Param("metric", customMetricName). Param("delta", strconv.Itoa(delta)). Param("durationSec", strconv.Itoa(rc.consumptionTimeInSeconds)). Param("requestSizeMetrics", strconv.Itoa(rc.requestSizeCustomMetric)) framework.Logf("URL: %v", *req.URL()) _, err = req.DoRaw() framework.ExpectNoError(err) }
// readTransactions reads # of transactions from the k8petstore web server endpoint. // for more details see the source of the k8petstore web server. func readTransactions(c clientset.Interface, ns string) (error, int) { proxyRequest, errProxy := framework.GetServicesProxyRequest(c, c.Core().RESTClient().Get()) if errProxy != nil { return errProxy, -1 } body, err := proxyRequest.Namespace(ns). Name("frontend"). Suffix("llen"). DoRaw() if err != nil { return err, -1 } else { totalTrans, err := strconv.Atoi(string(body)) return err, totalTrans } }
func (rc *ResourceConsumer) sendConsumeCPURequest(millicores int) { proxyRequest, err := framework.GetServicesProxyRequest(rc.framework.ClientSet, rc.framework.ClientSet.Core().RESTClient().Post()) framework.ExpectNoError(err) ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout) defer cancel() req := proxyRequest.Namespace(rc.framework.Namespace.Name). Context(ctx). Name(rc.controllerName). Suffix("ConsumeCPU"). Param("millicores", strconv.Itoa(millicores)). Param("durationSec", strconv.Itoa(rc.consumptionTimeInSeconds)). Param("requestSizeMillicores", strconv.Itoa(rc.requestSizeInMillicores)) framework.Logf("URL: %v", *req.URL()) _, err = req.DoRaw() framework.ExpectNoError(err) }
func makeHttpRequestToService(c *client.Client, ns, service, path string, timeout time.Duration) (string, error) { var result []byte var err error for t := time.Now(); time.Since(t) < timeout; time.Sleep(framework.Poll) { proxyRequest, errProxy := framework.GetServicesProxyRequest(c, c.Get()) if errProxy != nil { break } result, err = proxyRequest.Namespace(ns). Name(service). Suffix(path). Do(). Raw() if err != nil { break } } return string(result), err }
func makeHttpRequestToService(c clientset.Interface, ns, service, path string, timeout time.Duration) (string, error) { var result []byte var err error for t := time.Now(); time.Since(t) < timeout; time.Sleep(framework.Poll) { proxyRequest, errProxy := framework.GetServicesProxyRequest(c, c.Core().RESTClient().Get()) if errProxy != nil { break } ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout) defer cancel() result, err = proxyRequest.Namespace(ns). Context(ctx). Name(service). Suffix(path). Do(). Raw() if err != nil { break } } return string(result), err }
func main() { flag.Parse() glog.Infof("Starting serve_hostnames soak test with queries=%d and podsPerNode=%d upTo=%d", *queriesAverage, *podsPerNode, *upTo) var spec string if *gke != "" { spec = filepath.Join(os.Getenv("HOME"), ".config", "gcloud", "kubernetes", "kubeconfig") } else { spec = filepath.Join(os.Getenv("HOME"), ".kube", "config") } settings, err := clientcmd.LoadFromFile(spec) if err != nil { glog.Fatalf("Error loading configuration: %v", err.Error()) } if *gke != "" { settings.CurrentContext = *gke } config, err := clientcmd.NewDefaultClientConfig(*settings, &clientcmd.ConfigOverrides{}).ClientConfig() if err != nil { glog.Fatalf("Failed to construct config: %v", err) } c, err := client.New(config) if err != nil { glog.Fatalf("Failed to make client: %v", err) } var nodes *api.NodeList for start := time.Now(); time.Since(start) < nodeListTimeout; time.Sleep(2 * time.Second) { nodes, err = c.Nodes().List(api.ListOptions{}) if err == nil { break } glog.Warningf("Failed to list nodes: %v", err) } if err != nil { glog.Fatalf("Giving up trying to list nodes: %v", err) } if len(nodes.Items) == 0 { glog.Fatalf("Failed to find any nodes.") } glog.Infof("Found %d nodes on this cluster:", len(nodes.Items)) for i, node := range nodes.Items { glog.Infof("%d: %s", i, node.Name) } queries := *queriesAverage * len(nodes.Items) * *podsPerNode // Create the namespace got, err := c.Namespaces().Create(&api.Namespace{ObjectMeta: api.ObjectMeta{GenerateName: "serve-hostnames-"}}) if err != nil { glog.Fatalf("Failed to create namespace: %v", err) } ns := got.Name defer func(ns string) { if err := c.Namespaces().Delete(ns); err != nil { glog.Warningf("Failed to delete namespace ns: %e", ns, err) } else { // wait until the namespace disappears for i := 0; i < int(namespaceDeleteTimeout/time.Second); i++ { if _, err := c.Namespaces().Get(ns); err != nil { if errors.IsNotFound(err) { return } } time.Sleep(time.Second) } } }(ns) glog.Infof("Created namespace %s", ns) // Create a service for these pods. glog.Infof("Creating service %s/serve-hostnames", ns) // Make several attempts to create a service. var svc *api.Service for start := time.Now(); time.Since(start) < serviceCreateTimeout; time.Sleep(2 * time.Second) { t := time.Now() svc, err = c.Services(ns).Create(&api.Service{ ObjectMeta: api.ObjectMeta{ Name: "serve-hostnames", Labels: map[string]string{ "name": "serve-hostname", }, }, Spec: api.ServiceSpec{ Ports: []api.ServicePort{{ Protocol: "TCP", Port: 9376, TargetPort: intstr.FromInt(9376), }}, Selector: map[string]string{ "name": "serve-hostname", }, }, }) glog.V(4).Infof("Service create %s/server-hostnames took %v", ns, time.Since(t)) if err == nil { break } glog.Warningf("After %v failed to create service %s/serve-hostnames: %v", time.Since(start), ns, err) } if err != nil { glog.Warningf("Unable to create service %s/%s: %v", ns, svc.Name, err) return } // Clean up service defer func() { glog.Infof("Cleaning up service %s/serve-hostnames", ns) // Make several attempts to delete the service. for start := time.Now(); time.Since(start) < deleteTimeout; time.Sleep(1 * time.Second) { if err := c.Services(ns).Delete(svc.Name); err == nil { return } glog.Warningf("After %v unable to delete service %s/%s: %v", time.Since(start), ns, svc.Name, err) } }() // Put serve-hostname pods on each node. podNames := []string{} for i, node := range nodes.Items { for j := 0; j < *podsPerNode; j++ { podName := fmt.Sprintf("serve-hostname-%d-%d", i, j) podNames = append(podNames, podName) // Make several attempts for start := time.Now(); time.Since(start) < podCreateTimeout; time.Sleep(2 * time.Second) { glog.Infof("Creating pod %s/%s on node %s", ns, podName, node.Name) t := time.Now() _, err = c.Pods(ns).Create(&api.Pod{ ObjectMeta: api.ObjectMeta{ Name: podName, Labels: map[string]string{ "name": "serve-hostname", }, }, Spec: api.PodSpec{ Containers: []api.Container{ { Name: "serve-hostname", Image: "gcr.io/google_containers/serve_hostname:v1.4", Ports: []api.ContainerPort{{ContainerPort: 9376}}, }, }, NodeName: node.Name, }, }) glog.V(4).Infof("Pod create %s/%s request took %v", ns, podName, time.Since(t)) if err == nil { break } glog.Warningf("After %s failed to create pod %s/%s: %v", time.Since(start), ns, podName, err) } if err != nil { glog.Warningf("Failed to create pod %s/%s: %v", ns, podName, err) return } } } // Clean up the pods defer func() { glog.Info("Cleaning up pods") // Make several attempts to delete the pods. for _, podName := range podNames { for start := time.Now(); time.Since(start) < deleteTimeout; time.Sleep(1 * time.Second) { if err = c.Pods(ns).Delete(podName, nil); err == nil { break } glog.Warningf("After %v failed to delete pod %s/%s: %v", time.Since(start), ns, podName, err) } } }() glog.Info("Waiting for the serve-hostname pods to be ready") for _, podName := range podNames { var pod *api.Pod for start := time.Now(); time.Since(start) < podStartTimeout; time.Sleep(5 * time.Second) { pod, err = c.Pods(ns).Get(podName) if err != nil { glog.Warningf("Get pod %s/%s failed, ignoring for %v: %v", ns, podName, err, podStartTimeout) continue } if pod.Status.Phase == api.PodRunning { break } } if pod.Status.Phase != api.PodRunning { glog.Warningf("Gave up waiting on pod %s/%s to be running (saw %v)", ns, podName, pod.Status.Phase) } else { glog.Infof("%s/%s is running", ns, podName) } } proxyRequest, errProxy := e2e.GetServicesProxyRequest(c, c.Get()) if errProxy != nil { glog.Warningf("Get services proxy request failed: %v", errProxy) return } // Wait for the endpoints to propagate. for start := time.Now(); time.Since(start) < endpointTimeout; time.Sleep(10 * time.Second) { hostname, err := proxyRequest. Namespace(ns). Name("serve-hostnames"). DoRaw() if err != nil { glog.Infof("After %v while making a proxy call got error %v", time.Since(start), err) continue } var r unversioned.Status if err := runtime.DecodeInto(api.Codecs.UniversalDecoder(), hostname, &r); err != nil { break } if r.Status == unversioned.StatusFailure { glog.Infof("After %v got status %v", time.Since(start), string(hostname)) continue } break } // Repeatedly make requests. for iteration := 0; iteration != *upTo; iteration++ { responseChan := make(chan string, queries) // Use a channel of size *maxPar to throttle the number // of in-flight requests to avoid overloading the service. inFlight := make(chan struct{}, *maxPar) start := time.Now() for q := 0; q < queries; q++ { go func(i int, query int) { inFlight <- struct{}{} t := time.Now() hostname, err := proxyRequest. Namespace(ns). Name("serve-hostnames"). DoRaw() glog.V(4).Infof("Proxy call in namespace %s took %v", ns, time.Since(t)) if err != nil { glog.Warningf("Call failed during iteration %d query %d : %v", i, query, err) // If the query failed return a string which starts with a character // that can't be part of a hostname. responseChan <- fmt.Sprintf("!failed in iteration %d to issue query %d: %v", i, query, err) } else { responseChan <- string(hostname) } <-inFlight }(iteration, q) } responses := make(map[string]int, *podsPerNode*len(nodes.Items)) missing := 0 for q := 0; q < queries; q++ { r := <-responseChan glog.V(4).Infof("Got response from %s", r) responses[r]++ // If the returned hostname starts with '!' then it indicates // an error response. if len(r) > 0 && r[0] == '!' { glog.V(3).Infof("Got response %s", r) missing++ } } if missing > 0 { glog.Warningf("Missing %d responses out of %d", missing, queries) } // Report any nodes that did not respond. for n, node := range nodes.Items { for i := 0; i < *podsPerNode; i++ { name := fmt.Sprintf("serve-hostname-%d-%d", n, i) if _, ok := responses[name]; !ok { glog.Warningf("No response from pod %s on node %s at iteration %d", name, node.Name, iteration) } } } glog.Infof("Iteration %d took %v for %d queries (%.2f QPS) with %d missing", iteration, time.Since(start), queries-missing, float64(queries-missing)/time.Since(start).Seconds(), missing) } }
It("should check that the kubernetes-dashboard instance is alive", func() { framework.Skipf("UI is disabled") By("Checking whether the kubernetes-dashboard service exists.") err := framework.WaitForService(f.Client, uiNamespace, uiServiceName, true, framework.Poll, framework.ServiceStartTimeout) Expect(err).NotTo(HaveOccurred()) By("Checking to make sure the kubernetes-dashboard pods are running") selector := labels.SelectorFromSet(labels.Set(map[string]string{"k8s-app": uiAppName})) err = framework.WaitForPodsWithLabelRunning(f.Client, uiNamespace, selector) Expect(err).NotTo(HaveOccurred()) By("Checking to make sure we get a response from the kubernetes-dashboard.") err = wait.Poll(framework.Poll, serverStartTimeout, func() (bool, error) { var status int proxyRequest, errProxy := framework.GetServicesProxyRequest(f.Client, f.Client.Get()) if errProxy != nil { framework.Logf("Get services proxy request failed: %v", errProxy) } // Query against the proxy URL for the kube-ui service. err := proxyRequest.Namespace(uiNamespace). Name(uiServiceName). Timeout(framework.SingleCallTimeout). Do(). StatusCode(&status). Error() if status != http.StatusOK { framework.Logf("Unexpected status from kubernetes-dashboard: %v", status) } else if err != nil { framework.Logf("Request to kube-ui failed: %v", err) }
// Ensures that elasticsearch is running and ready to serve requests func checkElasticsearchReadiness(f *framework.Framework) error { // Check for the existence of the Elasticsearch service. By("Checking the Elasticsearch service exists.") s := f.ClientSet.Core().Services(api.NamespaceSystem) // Make a few attempts to connect. This makes the test robust against // being run as the first e2e test just after the e2e cluster has been created. var err error for start := time.Now(); time.Since(start) < graceTime; time.Sleep(5 * time.Second) { if _, err = s.Get("elasticsearch-logging"); err == nil { break } framework.Logf("Attempt to check for the existence of the Elasticsearch service failed after %v", time.Since(start)) } Expect(err).NotTo(HaveOccurred()) // Wait for the Elasticsearch pods to enter the running state. By("Checking to make sure the Elasticsearch pods are running") label := labels.SelectorFromSet(labels.Set(map[string]string{"k8s-app": "elasticsearch-logging"})) options := api.ListOptions{LabelSelector: label} pods, err := f.ClientSet.Core().Pods(api.NamespaceSystem).List(options) Expect(err).NotTo(HaveOccurred()) for _, pod := range pods.Items { err = framework.WaitForPodRunningInNamespace(f.ClientSet, &pod) Expect(err).NotTo(HaveOccurred()) } By("Checking to make sure we are talking to an Elasticsearch service.") // Perform a few checks to make sure this looks like an Elasticsearch cluster. var statusCode int err = nil var body []byte for start := time.Now(); time.Since(start) < graceTime; time.Sleep(10 * time.Second) { proxyRequest, errProxy := framework.GetServicesProxyRequest(f.ClientSet, f.ClientSet.Core().RESTClient().Get()) if errProxy != nil { framework.Logf("After %v failed to get services proxy request: %v", time.Since(start), errProxy) continue } // Query against the root URL for Elasticsearch. response := proxyRequest.Namespace(api.NamespaceSystem). Name("elasticsearch-logging"). Do() err = response.Error() response.StatusCode(&statusCode) if err != nil { framework.Logf("After %v proxy call to elasticsearch-loigging failed: %v", time.Since(start), err) continue } if int(statusCode) != 200 { framework.Logf("After %v Elasticsearch cluster has a bad status: %v", time.Since(start), statusCode) continue } break } Expect(err).NotTo(HaveOccurred()) if int(statusCode) != 200 { framework.Failf("Elasticsearch cluster has a bad status: %v", statusCode) } // Now assume we really are talking to an Elasticsearch instance. // Check the cluster health. By("Checking health of Elasticsearch service.") healthy := false for start := time.Now(); time.Since(start) < graceTime; time.Sleep(5 * time.Second) { proxyRequest, errProxy := framework.GetServicesProxyRequest(f.ClientSet, f.ClientSet.Core().RESTClient().Get()) if errProxy != nil { framework.Logf("After %v failed to get services proxy request: %v", time.Since(start), errProxy) continue } body, err = proxyRequest.Namespace(api.NamespaceSystem). Name("elasticsearch-logging"). Suffix("_cluster/health"). Param("level", "indices"). DoRaw() if err != nil { continue } health := make(map[string]interface{}) err := json.Unmarshal(body, &health) if err != nil { framework.Logf("Bad json response from elasticsearch: %v", err) continue } statusIntf, ok := health["status"] if !ok { framework.Logf("No status field found in cluster health response: %v", health) continue } status := statusIntf.(string) if status != "green" && status != "yellow" { framework.Logf("Cluster health has bad status: %v", health) continue } if err == nil && ok { healthy = true break } } if !healthy { return fmt.Errorf("After %v elasticsearch cluster is not healthy", graceTime) } return nil }
func getMissingLinesCountElasticsearch(f *framework.Framework, expectedCount int) (int, error) { proxyRequest, errProxy := framework.GetServicesProxyRequest(f.ClientSet, f.ClientSet.Core().RESTClient().Get()) if errProxy != nil { return 0, fmt.Errorf("Failed to get services proxy request: %v", errProxy) } // Ask Elasticsearch to return all the log lines that were tagged with the // pod name. Ask for ten times as many log lines because duplication is possible. body, err := proxyRequest.Namespace(api.NamespaceSystem). Name("elasticsearch-logging"). Suffix("_search"). // TODO: Change filter to only match records from current test run // after fluent-plugin-kubernetes_metadata_filter is enabled // and optimize current query Param("q", fmt.Sprintf("tag:*%s*", synthLoggerPodName)). Param("size", strconv.Itoa(expectedCount*10)). DoRaw() if err != nil { return 0, fmt.Errorf("Failed to make proxy call to elasticsearch-logging: %v", err) } var response map[string]interface{} err = json.Unmarshal(body, &response) if err != nil { return 0, fmt.Errorf("Failed to unmarshal response: %v", err) } hits, ok := response["hits"].(map[string]interface{}) if !ok { return 0, fmt.Errorf("response[hits] not of the expected type: %T", response["hits"]) } h, ok := hits["hits"].([]interface{}) if !ok { return 0, fmt.Errorf("Hits not of the expected type: %T", hits["hits"]) } // Initialize data-structure for observing counts. counts := make(map[int]int) // Iterate over the hits and populate the observed array. for _, e := range h { l, ok := e.(map[string]interface{}) if !ok { framework.Logf("Element of hit not of expected type: %T", e) continue } source, ok := l["_source"].(map[string]interface{}) if !ok { framework.Logf("_source not of the expected type: %T", l["_source"]) continue } msg, ok := source["log"].(string) if !ok { framework.Logf("Log not of the expected type: %T", source["log"]) continue } lineNumber, err := strconv.Atoi(strings.TrimSpace(msg)) if err != nil { framework.Logf("Log line %s is not a number", msg) continue } if lineNumber < 0 || lineNumber >= expectedCount { framework.Logf("Number %d is not valid, expected number from range [0, %d)", lineNumber, expectedCount) continue } // Record the observation of a log line // Duplicates are possible and fine, fluentd has at-least-once delivery counts[lineNumber]++ } return expectedCount - len(counts), nil }
// ClusterLevelLoggingWithElasticsearch is an end to end test for cluster level logging. func ClusterLevelLoggingWithElasticsearch(f *framework.Framework) { // graceTime is how long to keep retrying requests for status information. const graceTime = 5 * time.Minute // ingestionTimeout is how long to keep retrying to wait for all the // logs to be ingested. const ingestionTimeout = 10 * time.Minute // Check for the existence of the Elasticsearch service. By("Checking the Elasticsearch service exists.") s := f.Client.Services(api.NamespaceSystem) // Make a few attempts to connect. This makes the test robust against // being run as the first e2e test just after the e2e cluster has been created. var err error for start := time.Now(); time.Since(start) < graceTime; time.Sleep(5 * time.Second) { if _, err = s.Get("elasticsearch-logging"); err == nil { break } framework.Logf("Attempt to check for the existence of the Elasticsearch service failed after %v", time.Since(start)) } Expect(err).NotTo(HaveOccurred()) // Wait for the Elasticsearch pods to enter the running state. By("Checking to make sure the Elasticsearch pods are running") label := labels.SelectorFromSet(labels.Set(map[string]string{k8sAppKey: esValue})) options := api.ListOptions{LabelSelector: label} pods, err := f.Client.Pods(api.NamespaceSystem).List(options) Expect(err).NotTo(HaveOccurred()) for _, pod := range pods.Items { err = framework.WaitForPodRunningInNamespace(f.Client, &pod) Expect(err).NotTo(HaveOccurred()) } By("Checking to make sure we are talking to an Elasticsearch service.") // Perform a few checks to make sure this looks like an Elasticsearch cluster. var statusCode float64 var esResponse map[string]interface{} err = nil var body []byte for start := time.Now(); time.Since(start) < graceTime; time.Sleep(10 * time.Second) { proxyRequest, errProxy := framework.GetServicesProxyRequest(f.Client, f.Client.Get()) if errProxy != nil { framework.Logf("After %v failed to get services proxy request: %v", time.Since(start), errProxy) continue } // Query against the root URL for Elasticsearch. body, err = proxyRequest.Namespace(api.NamespaceSystem). Name("elasticsearch-logging"). DoRaw() if err != nil { framework.Logf("After %v proxy call to elasticsearch-loigging failed: %v", time.Since(start), err) continue } esResponse, err = bodyToJSON(body) if err != nil { framework.Logf("After %v failed to convert Elasticsearch JSON response %v to map[string]interface{}: %v", time.Since(start), string(body), err) continue } statusIntf, ok := esResponse["status"] if !ok { framework.Logf("After %v Elasticsearch response has no status field: %v", time.Since(start), esResponse) continue } statusCode, ok = statusIntf.(float64) if !ok { // Assume this is a string returning Failure. Retry. framework.Logf("After %v expected status to be a float64 but got %v of type %T", time.Since(start), statusIntf, statusIntf) continue } if int(statusCode) != 200 { framework.Logf("After %v Elasticsearch cluster has a bad status: %v", time.Since(start), statusCode) continue } break } Expect(err).NotTo(HaveOccurred()) if int(statusCode) != 200 { framework.Failf("Elasticsearch cluster has a bad status: %v", statusCode) } // Check to see if have a cluster_name field. clusterName, ok := esResponse["cluster_name"] if !ok { framework.Failf("No cluster_name field in Elasticsearch response: %v", esResponse) } if clusterName != "kubernetes-logging" { framework.Failf("Connected to wrong cluster %q (expecting kubernetes_logging)", clusterName) } // Now assume we really are talking to an Elasticsearch instance. // Check the cluster health. By("Checking health of Elasticsearch service.") healthy := false for start := time.Now(); time.Since(start) < graceTime; time.Sleep(5 * time.Second) { proxyRequest, errProxy := framework.GetServicesProxyRequest(f.Client, f.Client.Get()) if errProxy != nil { framework.Logf("After %v failed to get services proxy request: %v", time.Since(start), errProxy) continue } body, err = proxyRequest.Namespace(api.NamespaceSystem). Name("elasticsearch-logging"). Suffix("_cluster/health"). Param("level", "indices"). DoRaw() if err != nil { continue } health, err := bodyToJSON(body) if err != nil { framework.Logf("Bad json response from elasticsearch: %v", err) continue } statusIntf, ok := health["status"] if !ok { framework.Logf("No status field found in cluster health response: %v", health) continue } status := statusIntf.(string) if status != "green" && status != "yellow" { framework.Logf("Cluster health has bad status: %v", health) continue } if err == nil && ok { healthy = true break } } if !healthy { framework.Failf("After %v elasticsearch cluster is not healthy", graceTime) } // Obtain a list of nodes so we can place one synthetic logger on each node. nodes := framework.GetReadySchedulableNodesOrDie(f.Client) nodeCount := len(nodes.Items) if nodeCount == 0 { framework.Failf("Failed to find any nodes") } framework.Logf("Found %d nodes.", len(nodes.Items)) // Filter out unhealthy nodes. // Previous tests may have cause failures of some nodes. Let's skip // 'Not Ready' nodes, just in case (there is no need to fail the test). framework.FilterNodes(nodes, func(node api.Node) bool { return framework.IsNodeConditionSetAsExpected(&node, api.NodeReady, true) }) if len(nodes.Items) < 2 { framework.Failf("Less than two nodes were found Ready: %d", len(nodes.Items)) } framework.Logf("Found %d healthy nodes.", len(nodes.Items)) // Wait for the Fluentd pods to enter the running state. By("Checking to make sure the Fluentd pod are running on each healthy node") label = labels.SelectorFromSet(labels.Set(map[string]string{k8sAppKey: fluentdValue})) options = api.ListOptions{LabelSelector: label} fluentdPods, err := f.Client.Pods(api.NamespaceSystem).List(options) Expect(err).NotTo(HaveOccurred()) for _, pod := range fluentdPods.Items { if nodeInNodeList(pod.Spec.NodeName, nodes) { err = framework.WaitForPodRunningInNamespace(f.Client, &pod) Expect(err).NotTo(HaveOccurred()) } } // Check if each healthy node has fluentd running on it for _, node := range nodes.Items { exists := false for _, pod := range fluentdPods.Items { if pod.Spec.NodeName == node.Name { exists = true break } } if !exists { framework.Failf("Node %v does not have fluentd pod running on it.", node.Name) } } // Create a unique root name for the resources in this test to permit // parallel executions of this test. // Use a unique namespace for the resources created in this test. ns := f.Namespace.Name name := "synthlogger" // Form a unique name to taint log lines to be collected. // Replace '-' characters with '_' to prevent the analyzer from breaking apart names. taintName := strings.Replace(ns+name, "-", "_", -1) framework.Logf("Tainting log lines with %v", taintName) // podNames records the names of the synthetic logging pods that are created in the // loop below. var podNames []string // countTo is the number of log lines emitted (and checked) for each synthetic logging pod. const countTo = 100 // Instantiate a synthetic logger pod on each node. for i, node := range nodes.Items { podName := fmt.Sprintf("%s-%d", name, i) _, err := f.Client.Pods(ns).Create(&api.Pod{ ObjectMeta: api.ObjectMeta{ Name: podName, Labels: map[string]string{"name": name}, }, Spec: api.PodSpec{ Containers: []api.Container{ { Name: "synth-logger", Image: "gcr.io/google_containers/ubuntu:14.04", // notice: the subshell syntax is escaped with `$$` Command: []string{"bash", "-c", fmt.Sprintf("i=0; while ((i < %d)); do echo \"%d %s $i %s\"; i=$$(($i+1)); done", countTo, i, taintName, podName)}, }, }, NodeName: node.Name, RestartPolicy: api.RestartPolicyNever, }, }) Expect(err).NotTo(HaveOccurred()) podNames = append(podNames, podName) } // Cleanup the pods when we are done. defer func() { for _, pod := range podNames { if err = f.Client.Pods(ns).Delete(pod, nil); err != nil { framework.Logf("Failed to delete pod %s: %v", pod, err) } } }() // Wait for the synthetic logging pods to finish. By("Waiting for the pods to succeed.") for _, pod := range podNames { err = framework.WaitForPodSuccessInNamespace(f.Client, pod, "synth-logger", ns) Expect(err).NotTo(HaveOccurred()) } // Make several attempts to observe the logs ingested into Elasticsearch. By("Checking all the log lines were ingested into Elasticsearch") totalMissing := 0 expected := nodeCount * countTo missingPerNode := []int{} for start := time.Now(); time.Since(start) < ingestionTimeout; time.Sleep(25 * time.Second) { // Debugging code to report the status of the elasticsearch logging endpoints. selector := labels.Set{k8sAppKey: esValue}.AsSelector() options := api.ListOptions{LabelSelector: selector} esPods, err := f.Client.Pods(api.NamespaceSystem).List(options) if err != nil { framework.Logf("Attempt to list Elasticsearch nodes encountered a problem -- may retry: %v", err) continue } else { for i, pod := range esPods.Items { framework.Logf("pod %d: %s PodIP %s phase %s condition %+v", i, pod.Name, pod.Status.PodIP, pod.Status.Phase, pod.Status.Conditions) } } proxyRequest, errProxy := framework.GetServicesProxyRequest(f.Client, f.Client.Get()) if errProxy != nil { framework.Logf("After %v failed to get services proxy request: %v", time.Since(start), errProxy) continue } // Ask Elasticsearch to return all the log lines that were tagged with the underscore // version of the name. Ask for twice as many log lines as we expect to check for // duplication bugs. body, err = proxyRequest.Namespace(api.NamespaceSystem). Name("elasticsearch-logging"). Suffix("_search"). Param("q", fmt.Sprintf("log:%s", taintName)). Param("size", strconv.Itoa(2*expected)). DoRaw() if err != nil { framework.Logf("After %v failed to make proxy call to elasticsearch-logging: %v", time.Since(start), err) continue } response, err := bodyToJSON(body) if err != nil { framework.Logf("After %v failed to unmarshal response: %v", time.Since(start), err) framework.Logf("Body: %s", string(body)) continue } hits, ok := response["hits"].(map[string]interface{}) if !ok { framework.Logf("response[hits] not of the expected type: %T", response["hits"]) continue } totalF, ok := hits["total"].(float64) if !ok { framework.Logf("After %v hits[total] not of the expected type: %T", time.Since(start), hits["total"]) continue } total := int(totalF) if total != expected { framework.Logf("After %v expecting to find %d log lines but saw %d", time.Since(start), expected, total) } h, ok := hits["hits"].([]interface{}) if !ok { framework.Logf("After %v hits not of the expected type: %T", time.Since(start), hits["hits"]) continue } // Initialize data-structure for observing counts. observed := make([][]int, nodeCount) for i := range observed { observed[i] = make([]int, countTo) } // Iterate over the hits and populate the observed array. for _, e := range h { l, ok := e.(map[string]interface{}) if !ok { framework.Logf("element of hit not of expected type: %T", e) continue } source, ok := l["_source"].(map[string]interface{}) if !ok { framework.Logf("_source not of the expected type: %T", l["_source"]) continue } msg, ok := source["log"].(string) if !ok { framework.Logf("log not of the expected type: %T", source["log"]) continue } words := strings.Split(msg, " ") if len(words) != 4 { framework.Logf("Malformed log line: %s", msg) continue } n, err := strconv.ParseUint(words[0], 10, 0) if err != nil { framework.Logf("Expecting numer of node as first field of %s", msg) continue } if n < 0 || int(n) >= nodeCount { framework.Logf("Node count index out of range: %d", nodeCount) continue } index, err := strconv.ParseUint(words[2], 10, 0) if err != nil { framework.Logf("Expecting number as third field of %s", msg) continue } if index < 0 || index >= countTo { framework.Logf("Index value out of range: %d", index) continue } if words[1] != taintName { framework.Logf("Elasticsearch query return unexpected log line: %s", msg) continue } // Record the observation of a log line from node n at the given index. observed[n][index]++ } // Make sure we correctly observed the expected log lines from each node. totalMissing = 0 missingPerNode = make([]int, nodeCount) incorrectCount := false for n := range observed { for i, c := range observed[n] { if c == 0 { totalMissing++ missingPerNode[n]++ } if c < 0 || c > 1 { framework.Logf("Got incorrect count for node %d index %d: %d", n, i, c) incorrectCount = true } } } if incorrectCount { framework.Logf("After %v es still return duplicated log lines", time.Since(start)) continue } if totalMissing != 0 { framework.Logf("After %v still missing %d log lines", time.Since(start), totalMissing) continue } framework.Logf("After %s found all %d log lines", time.Since(start), expected) return } for n := range missingPerNode { if missingPerNode[n] > 0 { framework.Logf("Node %d %s is missing %d logs", n, nodes.Items[n].Name, missingPerNode[n]) opts := &api.PodLogOptions{} body, err = f.Client.Pods(ns).GetLogs(podNames[n], opts).DoRaw() if err != nil { framework.Logf("Cannot get logs from pod %v", podNames[n]) continue } framework.Logf("Pod %s has the following logs: %s", podNames[n], body) for _, pod := range fluentdPods.Items { if pod.Spec.NodeName == nodes.Items[n].Name { body, err = f.Client.Pods(api.NamespaceSystem).GetLogs(pod.Name, opts).DoRaw() if err != nil { framework.Logf("Cannot get logs from pod %v", pod.Name) break } framework.Logf("Fluentd Pod %s on node %s has the following logs: %s", pod.Name, nodes.Items[n].Name, body) break } } } } framework.Failf("Failed to find all %d log lines", expected) }
f := framework.NewDefaultFramework(uiServiceName) It("should check that the kubernetes-dashboard instance is alive", func() { By("Checking whether the kubernetes-dashboard service exists.") err := framework.WaitForService(f.ClientSet, uiNamespace, uiServiceName, true, framework.Poll, framework.ServiceStartTimeout) Expect(err).NotTo(HaveOccurred()) By("Checking to make sure the kubernetes-dashboard pods are running") selector := labels.SelectorFromSet(labels.Set(map[string]string{"k8s-app": uiAppName})) err = testutils.WaitForPodsWithLabelRunning(f.ClientSet, uiNamespace, selector) Expect(err).NotTo(HaveOccurred()) By("Checking to make sure we get a response from the kubernetes-dashboard.") err = wait.Poll(framework.Poll, serverStartTimeout, func() (bool, error) { var status int proxyRequest, errProxy := framework.GetServicesProxyRequest(f.ClientSet, f.ClientSet.Core().RESTClient().Get()) if errProxy != nil { framework.Logf("Get services proxy request failed: %v", errProxy) } // Query against the proxy URL for the kube-ui service. err := proxyRequest.Namespace(uiNamespace). Name(uiServiceName). Timeout(framework.SingleCallTimeout). Do(). StatusCode(&status). Error() if status != http.StatusOK { framework.Logf("Unexpected status from kubernetes-dashboard: %v", status) } else if err != nil { framework.Logf("Request to kube-ui failed: %v", err) }