func (cli *HeapsterMetricsClient) GetNodeMetrics(nodeName string, selector labels.Selector) ([]metricsapi.NodeMetrics, error) { params := map[string]string{"labelSelector": selector.String()} path, err := nodeMetricsUrl(nodeName) if err != nil { return []metricsapi.NodeMetrics{}, err } resultRaw, err := GetHeapsterMetrics(cli, path, params) if err != nil { return []metricsapi.NodeMetrics{}, err } metrics := make([]metricsapi.NodeMetrics, 0) if len(nodeName) == 0 { metricsList := metricsapi.NodeMetricsList{} err = json.Unmarshal(resultRaw, &metricsList) if err != nil { return []metricsapi.NodeMetrics{}, fmt.Errorf("failed to unmarshall heapster response: %v", err) } metrics = append(metrics, metricsList.Items...) } else { var singleMetric metricsapi.NodeMetrics err = json.Unmarshal(resultRaw, &singleMetric) if err != nil { return []metricsapi.NodeMetrics{}, fmt.Errorf("failed to unmarshall heapster response: %v", err) } metrics = append(metrics, singleMetric) } return metrics, nil }
func (cli *HeapsterMetricsClient) GetPodMetrics(namespace string, podName string, allNamespaces bool, selector labels.Selector) ([]metricsapi.PodMetrics, error) { if allNamespaces { namespace = api.NamespaceAll } path, err := podMetricsUrl(namespace, podName) if err != nil { return []metricsapi.PodMetrics{}, err } params := map[string]string{"labelSelector": selector.String()} allMetrics := make([]metricsapi.PodMetrics, 0) resultRaw, err := GetHeapsterMetrics(cli, path, params) if err != nil { return []metricsapi.PodMetrics{}, err } if len(podName) == 0 { metrics := metricsapi.PodMetricsList{} err = json.Unmarshal(resultRaw, &metrics) if err != nil { return []metricsapi.PodMetrics{}, fmt.Errorf("failed to unmarshall heapster response: %v", err) } allMetrics = append(allMetrics, metrics.Items...) } else { var singleMetric metricsapi.PodMetrics err = json.Unmarshal(resultRaw, &singleMetric) if err != nil { return []metricsapi.PodMetrics{}, fmt.Errorf("failed to unmarshall heapster response: %v", err) } allMetrics = append(allMetrics, singleMetric) } return allMetrics, nil }
// LabelsSelectorParam adds the given selector as a query parameter func (r *Request) LabelsSelectorParam(s labels.Selector) *Request { if r.err != nil { return r } if s == nil { return r } if s.Empty() { return r } return r.setParam(metav1.LabelSelectorQueryParam(r.content.GroupVersion.String()), s.String()) }
// getContainerRestarts returns the count of container restarts across all pods matching the given labelSelector, // and a list of nodenames across which these containers restarted. func getContainerRestarts(c clientset.Interface, ns string, labelSelector labels.Selector) (int, []string) { options := v1.ListOptions{LabelSelector: labelSelector.String()} pods, err := c.Core().Pods(ns).List(options) framework.ExpectNoError(err) failedContainers := 0 containerRestartNodes := sets.NewString() for _, p := range pods.Items { for _, v := range testutils.FailedContainers(&p) { failedContainers = failedContainers + v.Restarts containerRestartNodes.Insert(p.Spec.NodeName) } } return failedContainers, containerRestartNodes.List() }
// filterLabels returns a list of pods which have labels. func filterLabels(selectors map[string]string, cli clientset.Interface, ns string) (*v1.PodList, error) { var err error var selector labels.Selector var pl *v1.PodList // List pods based on selectors. This might be a tiny optimization rather then filtering // everything manually. if len(selectors) > 0 { selector = labels.SelectorFromSet(labels.Set(selectors)) options := v1.ListOptions{LabelSelector: selector.String()} pl, err = cli.Core().Pods(ns).List(options) } else { pl, err = cli.Core().Pods(ns).List(v1.ListOptions{}) } return pl, err }
func (h *HeapsterMetricsClient) GetResourceMetric(resource v1.ResourceName, namespace string, selector labels.Selector) (PodResourceInfo, time.Time, error) { metricPath := fmt.Sprintf("/apis/metrics/v1alpha1/namespaces/%s/pods", namespace) params := map[string]string{"labelSelector": selector.String()} resultRaw, err := h.services. ProxyGet(h.heapsterScheme, h.heapsterService, h.heapsterPort, metricPath, params). DoRaw() if err != nil { return nil, time.Time{}, fmt.Errorf("failed to get heapster service: %v", err) } glog.V(4).Infof("Heapster metrics result: %s", string(resultRaw)) metrics := metricsapi.PodMetricsList{} err = json.Unmarshal(resultRaw, &metrics) if err != nil { return nil, time.Time{}, fmt.Errorf("failed to unmarshal heapster response: %v", err) } if len(metrics.Items) == 0 { return nil, time.Time{}, fmt.Errorf("no metrics returned from heapster") } res := make(PodResourceInfo, len(metrics.Items)) for _, m := range metrics.Items { podSum := int64(0) missing := len(m.Containers) == 0 for _, c := range m.Containers { resValue, found := c.Usage[v1.ResourceName(resource)] if !found { missing = true glog.V(2).Infof("missing resource metric %v for container %s in pod %s/%s", resource, c.Name, namespace, m.Name) continue } podSum += resValue.MilliValue() } if !missing { res[m.Name] = int64(podSum) } } timestamp := metrics.Items[0].Timestamp.Time return res, timestamp, nil }
func NewPodStore(c clientset.Interface, namespace string, label labels.Selector, field fields.Selector) *PodStore { lw := &cache.ListWatch{ ListFunc: func(options v1.ListOptions) (runtime.Object, error) { options.LabelSelector = label.String() options.FieldSelector = field.String() obj, err := c.Core().Pods(namespace).List(options) return runtime.Object(obj), err }, WatchFunc: func(options v1.ListOptions) (watch.Interface, error) { options.LabelSelector = label.String() options.FieldSelector = field.String() return c.Core().Pods(namespace).Watch(options) }, } store := cache.NewStore(cache.MetaNamespaceKeyFunc) stopCh := make(chan struct{}) reflector := cache.NewReflector(lw, &v1.Pod{}, store, 0) reflector.RunUntil(stopCh) return &PodStore{Store: store, stopCh: stopCh, Reflector: reflector} }
// Wait up to 10 minutes for all matching pods to become Running and at least one // matching pod exists. func WaitForPodsWithLabelRunning(c clientset.Interface, ns string, label labels.Selector) error { running := false PodStore := NewPodStore(c, ns, label, fields.Everything()) defer PodStore.Stop() waitLoop: for start := time.Now(); time.Since(start) < 10*time.Minute; time.Sleep(5 * time.Second) { pods := PodStore.List() if len(pods) == 0 { continue waitLoop } for _, p := range pods { if p.Status.Phase != v1.PodRunning { continue waitLoop } } running = true break } if !running { return fmt.Errorf("Timeout while waiting for pods with labels %q to be running", label.String()) } return nil }
func (h *HeapsterMetricsClient) GetRawMetric(metricName string, namespace string, selector labels.Selector) (PodMetricsInfo, time.Time, error) { podList, err := h.podsGetter.Pods(namespace).List(v1.ListOptions{LabelSelector: selector.String()}) if err != nil { return nil, time.Time{}, fmt.Errorf("failed to get pod list while fetching metrics: %v", err) } if len(podList.Items) == 0 { return nil, time.Time{}, fmt.Errorf("no pods matched the provided selector") } podNames := make([]string, len(podList.Items)) for i, pod := range podList.Items { podNames[i] = pod.Name } now := time.Now() startTime := now.Add(heapsterQueryStart) metricPath := fmt.Sprintf("/api/v1/model/namespaces/%s/pod-list/%s/metrics/%s", namespace, strings.Join(podNames, ","), metricName) resultRaw, err := h.services. ProxyGet(h.heapsterScheme, h.heapsterService, h.heapsterPort, metricPath, map[string]string{"start": startTime.Format(time.RFC3339)}). DoRaw() if err != nil { return nil, time.Time{}, fmt.Errorf("failed to get heapster service: %v", err) } var metrics heapster.MetricResultList err = json.Unmarshal(resultRaw, &metrics) if err != nil { return nil, time.Time{}, fmt.Errorf("failed to unmarshal heapster response: %v", err) } glog.V(4).Infof("Heapster metrics result: %s", string(resultRaw)) if len(metrics.Items) != len(podNames) { // if we get too many metrics or two few metrics, we have no way of knowing which metric goes to which pod // (note that Heapster returns *empty* metric items when a pod does not exist or have that metric, so this // does not cover the "missing metric entry" case) return nil, time.Time{}, fmt.Errorf("requested metrics for %v pods, got metrics for %v", len(podNames), len(metrics.Items)) } var timestamp *time.Time res := make(PodMetricsInfo, len(metrics.Items)) for i, podMetrics := range metrics.Items { val, podTimestamp, hadMetrics := collapseTimeSamples(podMetrics, time.Minute) if hadMetrics { res[podNames[i]] = val if timestamp == nil || podTimestamp.Before(*timestamp) { timestamp = &podTimestamp } } } if timestamp == nil { timestamp = &time.Time{} } return res, *timestamp, nil }
// GetResourceReplicas calculates the desired replica count based on a target resource utilization percentage // of the given resource for pods matching the given selector in the given namespace, and the current replica count func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUtilization int32, resource v1.ResourceName, namespace string, selector labels.Selector) (replicaCount int32, utilization int32, timestamp time.Time, err error) { metrics, timestamp, err := c.metricsClient.GetResourceMetric(resource, namespace, selector) if err != nil { return 0, 0, time.Time{}, fmt.Errorf("unable to get metrics for resource %s: %v", resource, err) } podList, err := c.podsGetter.Pods(namespace).List(v1.ListOptions{LabelSelector: selector.String()}) if err != nil { return 0, 0, time.Time{}, fmt.Errorf("unable to get pods while calculating replica count: %v", err) } if len(podList.Items) == 0 { return 0, 0, time.Time{}, fmt.Errorf("no pods returned by selector while calculating replica count") } requests := make(map[string]int64, len(podList.Items)) readyPodCount := 0 unreadyPods := sets.NewString() missingPods := sets.NewString() for _, pod := range podList.Items { podSum := int64(0) for _, container := range pod.Spec.Containers { if containerRequest, ok := container.Resources.Requests[resource]; ok { podSum += containerRequest.MilliValue() } else { return 0, 0, time.Time{}, fmt.Errorf("missing request for %s on container %s in pod %s/%s", resource, container.Name, namespace, pod.Name) } } requests[pod.Name] = podSum if pod.Status.Phase != v1.PodRunning || !v1.IsPodReady(&pod) { // save this pod name for later, but pretend it doesn't exist for now unreadyPods.Insert(pod.Name) delete(metrics, pod.Name) continue } if _, found := metrics[pod.Name]; !found { // save this pod name for later, but pretend it doesn't exist for now missingPods.Insert(pod.Name) continue } readyPodCount++ } if len(metrics) == 0 { return 0, 0, time.Time{}, fmt.Errorf("did not receive metrics for any ready pods") } usageRatio, utilization, err := metricsclient.GetResourceUtilizationRatio(metrics, requests, targetUtilization) if err != nil { return 0, 0, time.Time{}, err } rebalanceUnready := len(unreadyPods) > 0 && usageRatio > 1.0 if !rebalanceUnready && len(missingPods) == 0 { if math.Abs(1.0-usageRatio) <= tolerance { // return the current replicas if the change would be too small return currentReplicas, utilization, timestamp, nil } // if we don't have any unready or missing pods, we can calculate the new replica count now return int32(math.Ceil(usageRatio * float64(readyPodCount))), utilization, timestamp, nil } if len(missingPods) > 0 { if usageRatio < 1.0 { // on a scale-down, treat missing pods as using 100% of the resource request for podName := range missingPods { metrics[podName] = requests[podName] } } else if usageRatio > 1.0 { // on a scale-up, treat missing pods as using 0% of the resource request for podName := range missingPods { metrics[podName] = 0 } } } if rebalanceUnready { // on a scale-up, treat unready pods as using 0% of the resource request for podName := range unreadyPods { metrics[podName] = 0 } } // re-run the utilization calculation with our new numbers newUsageRatio, _, err := metricsclient.GetResourceUtilizationRatio(metrics, requests, targetUtilization) if err != nil { return 0, utilization, time.Time{}, err } if math.Abs(1.0-newUsageRatio) <= tolerance || (usageRatio < 1.0 && newUsageRatio > 1.0) || (usageRatio > 1.0 && newUsageRatio < 1.0) { // return the current replicas if the change would be too small, // or if the new usage ratio would cause a change in scale direction return currentReplicas, utilization, timestamp, nil } // return the result, where the number of replicas considered is // however many replicas factored into our calculation return int32(math.Ceil(newUsageRatio * float64(len(metrics)))), utilization, timestamp, nil }