func runResourceTrackingTest(f *framework.Framework, podsPerNode int, nodeNames sets.String, rm *framework.ResourceMonitor, expectedCPU map[string]map[float64]float64, expectedMemory framework.ResourceUsagePerContainer) { numNodes := nodeNames.Len() totalPods := podsPerNode * numNodes By(fmt.Sprintf("Creating a RC of %d pods and wait until all pods of this RC are running", totalPods)) rcName := fmt.Sprintf("resource%d-%s", totalPods, string(uuid.NewUUID())) // TODO: Use a more realistic workload Expect(framework.RunRC(testutils.RCConfig{ Client: f.ClientSet, InternalClient: f.InternalClientset, Name: rcName, Namespace: f.Namespace.Name, Image: framework.GetPauseImageName(f.ClientSet), Replicas: totalPods, })).NotTo(HaveOccurred()) // Log once and flush the stats. rm.LogLatest() rm.Reset() By("Start monitoring resource usage") // Periodically dump the cpu summary until the deadline is met. // Note that without calling framework.ResourceMonitor.Reset(), the stats // would occupy increasingly more memory. This should be fine // for the current test duration, but we should reclaim the // entries if we plan to monitor longer (e.g., 8 hours). deadline := time.Now().Add(monitoringTime) for time.Now().Before(deadline) { timeLeft := deadline.Sub(time.Now()) framework.Logf("Still running...%v left", timeLeft) if timeLeft < reportingPeriod { time.Sleep(timeLeft) } else { time.Sleep(reportingPeriod) } logPodsOnNodes(f.ClientSet, nodeNames.List()) } By("Reporting overall resource usage") logPodsOnNodes(f.ClientSet, nodeNames.List()) usageSummary, err := rm.GetLatest() Expect(err).NotTo(HaveOccurred()) // TODO(random-liu): Remove the original log when we migrate to new perfdash framework.Logf("%s", rm.FormatResourceUsage(usageSummary)) // Log perf result framework.PrintPerfData(framework.ResourceUsageToPerfData(rm.GetMasterNodeLatest(usageSummary))) verifyMemoryLimits(f.ClientSet, expectedMemory, usageSummary) cpuSummary := rm.GetCPUSummary() framework.Logf("%s", rm.FormatCPUSummary(cpuSummary)) // Log perf result framework.PrintPerfData(framework.CPUUsageToPerfData(rm.GetMasterNodeCPUSummary(cpuSummary))) verifyCPULimits(expectedCPU, cpuSummary) By("Deleting the RC") framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, rcName) }
Client: f.ClientSet, InternalClient: f.InternalClientset, Name: rcName, Namespace: f.Namespace.Name, Image: framework.GetPauseImageName(f.ClientSet), Replicas: totalPods, NodeSelector: nodeLabels, })).NotTo(HaveOccurred()) // Perform a sanity check so that we know all desired pods are // running on the nodes according to kubelet. The timeout is set to // only 30 seconds here because framework.RunRC already waited for all pods to // transition to the running status. Expect(waitTillNPodsRunningOnNodes(f.ClientSet, nodeNames, rcName, f.Namespace.Name, totalPods, time.Second*30)).NotTo(HaveOccurred()) if resourceMonitor != nil { resourceMonitor.LogLatest() } By("Deleting the RC") framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, rcName) // Check that the pods really are gone by querying /runningpods on the // node. The /runningpods handler checks the container runtime (or its // cache) and returns a list of running pods. Some possible causes of // failures are: // - kubelet deadlock // - a bug in graceful termination (if it is enabled) // - docker slow to delete pods (or resource problems causing slowness) start := time.Now() Expect(waitTillNPodsRunningOnNodes(f.ClientSet, nodeNames, rcName, f.Namespace.Name, 0, itArg.timeout)).NotTo(HaveOccurred()) framework.Logf("Deleting %d pods on %d nodes completed in %v after the RC was deleted", totalPods, len(nodeNames),