func TestFindUnneededNodes(t *testing.T) { p1 := BuildTestPod("p1", 100, 0) p1.Spec.NodeName = "n1" p2 := BuildTestPod("p2", 300, 0) p2.Spec.NodeName = "n2" p2.Annotations = map[string]string{ "kubernetes.io/created-by": "{\"kind\":\"SerializedReference\",\"apiVersion\":\"v1\",\"reference\":{\"kind\":\"ReplicaSet\"}}", } p3 := BuildTestPod("p3", 400, 0) p3.Annotations = map[string]string{ "kubernetes.io/created-by": "{\"kind\":\"SerializedReference\",\"apiVersion\":\"v1\",\"reference\":{\"kind\":\"ReplicaSet\"}}", } p3.Spec.NodeName = "n3" p4 := BuildTestPod("p4", 2000, 0) p4.Annotations = map[string]string{ "kubernetes.io/created-by": "{\"kind\":\"SerializedReference\",\"apiVersion\":\"v1\",\"reference\":{\"kind\":\"ReplicaSet\"}}", } p4.Spec.NodeName = "n4" n1 := BuildTestNode("n1", 1000, 10) n2 := BuildTestNode("n2", 1000, 10) n3 := BuildTestNode("n3", 1000, 10) n4 := BuildTestNode("n4", 10000, 10) context := AutoscalingContext{ PredicateChecker: simulator.NewTestPredicateChecker(), ScaleDownUtilizationThreshold: 0.35, } result, hints, utilization := FindUnneededNodes(context, []*apiv1.Node{n1, n2, n3, n4}, map[string]time.Time{}, []*apiv1.Pod{p1, p2, p3, p4}, make(map[string]string), simulator.NewUsageTracker(), time.Now()) assert.Equal(t, 1, len(result)) addTime, found := result["n2"] assert.True(t, found) assert.Contains(t, hints, p2.Namespace+"/"+p2.Name) assert.Equal(t, 4, len(utilization)) result["n1"] = time.Now() result2, hints, utilization := FindUnneededNodes(context, []*apiv1.Node{n1, n2, n3, n4}, result, []*apiv1.Pod{p1, p2, p3, p4}, hints, simulator.NewUsageTracker(), time.Now()) assert.Equal(t, 1, len(result2)) addTime2, found := result2["n2"] assert.True(t, found) assert.Equal(t, addTime, addTime2) assert.Equal(t, 4, len(utilization)) }
// In order to meet interface criteria for LeaderElectionConfig we need to // take stop channell as an argument. However, since we are committing a suicide // after loosing mastership we can safely ignore it. func run(_ <-chan struct{}) { kubeClient := createKubeClient() predicateChecker, err := simulator.NewPredicateChecker(kubeClient) if err != nil { glog.Fatalf("Failed to create predicate checker: %v", err) } unschedulablePodLister := kube_util.NewUnschedulablePodLister(kubeClient, kube_api.NamespaceAll) scheduledPodLister := kube_util.NewScheduledPodLister(kubeClient) nodeLister := kube_util.NewNodeLister(kubeClient) lastScaleUpTime := time.Now() lastScaleDownFailedTrial := time.Now() unneededNodes := make(map[string]time.Time) podLocationHints := make(map[string]string) usageTracker := simulator.NewUsageTracker() recorder := createEventRecorder(kubeClient) var cloudProvider cloudprovider.CloudProvider if *cloudProviderFlag == "gce" { // GCE Manager var gceManager *gce.GceManager var gceError error if *cloudConfig != "" { config, fileErr := os.Open(*cloudConfig) if fileErr != nil { glog.Fatalf("Couldn't open cloud provider configuration %s: %#v", *cloudConfig, err) } defer config.Close() gceManager, gceError = gce.CreateGceManager(config) } else { gceManager, gceError = gce.CreateGceManager(nil) } if gceError != nil { glog.Fatalf("Failed to create GCE Manager: %v", err) } cloudProvider, err = gce.BuildGceCloudProvider(gceManager, nodeGroupsFlag) if err != nil { glog.Fatalf("Failed to create GCE cloud provider: %v", err) } } if *cloudProviderFlag == "aws" { var awsManager *aws.AwsManager var awsError error if *cloudConfig != "" { config, fileErr := os.Open(*cloudConfig) if fileErr != nil { glog.Fatalf("Couldn't open cloud provider configuration %s: %#v", *cloudConfig, err) } defer config.Close() awsManager, awsError = aws.CreateAwsManager(config) } else { awsManager, awsError = aws.CreateAwsManager(nil) } if awsError != nil { glog.Fatalf("Failed to create AWS Manager: %v", err) } cloudProvider, err = aws.BuildAwsCloudProvider(awsManager, nodeGroupsFlag) if err != nil { glog.Fatalf("Failed to create AWS cloud provider: %v", err) } } for { select { case <-time.After(*scanInterval): { loopStart := time.Now() updateLastTime("main") nodes, err := nodeLister.List() if err != nil { glog.Errorf("Failed to list nodes: %v", err) continue } if len(nodes) == 0 { glog.Errorf("No nodes in the cluster") continue } if err := CheckGroupsAndNodes(nodes, cloudProvider); err != nil { glog.Warningf("Cluster is not ready for autoscaling: %v", err) continue } allUnschedulablePods, err := unschedulablePodLister.List() if err != nil { glog.Errorf("Failed to list unscheduled pods: %v", err) continue } allScheduled, err := scheduledPodLister.List() if err != nil { glog.Errorf("Failed to list scheduled pods: %v", err) continue } // We need to reset all pods that have been marked as unschedulable not after // the newest node became available for the scheduler. allNodesAvailableTime := GetAllNodesAvailableTime(nodes) podsToReset, unschedulablePodsToHelp := SlicePodsByPodScheduledTime(allUnschedulablePods, allNodesAvailableTime) ResetPodScheduledCondition(kubeClient, podsToReset) // We need to check whether pods marked as unschedulable are actually unschedulable. // This should prevent from adding unnecessary nodes. Example of such situation: // - CA and Scheduler has slightly different configuration // - Scheduler can't schedule a pod and marks it as unschedulable // - CA added a node which should help the pod // - Scheduler doesn't schedule the pod on the new node // because according to it logic it doesn't fit there // - CA see the pod is still unschedulable, so it adds another node to help it // // With the check enabled the last point won't happen because CA will ignore a pod // which is supposed to schedule on an existing node. // // Without below check cluster might be unnecessary scaled up to the max allowed size // in the describe situation. schedulablePodsPresent := false if *verifyUnschedulablePods { newUnschedulablePodsToHelp := FilterOutSchedulable(unschedulablePodsToHelp, nodes, allScheduled, predicateChecker) if len(newUnschedulablePodsToHelp) != len(unschedulablePodsToHelp) { glog.V(2).Info("Schedulable pods present") schedulablePodsPresent = true } unschedulablePodsToHelp = newUnschedulablePodsToHelp } if len(unschedulablePodsToHelp) == 0 { glog.V(1).Info("No unschedulable pods") } else if *maxNodesTotal > 0 && len(nodes) >= *maxNodesTotal { glog.V(1).Info("Max total nodes in cluster reached") } else { scaleUpStart := time.Now() updateLastTime("scaleup") scaledUp, err := ScaleUp(unschedulablePodsToHelp, nodes, cloudProvider, kubeClient, predicateChecker, recorder, *maxNodesTotal) updateDuration("scaleup", scaleUpStart) if err != nil { glog.Errorf("Failed to scale up: %v", err) continue } else { if scaledUp { lastScaleUpTime = time.Now() // No scale down in this iteration. continue } } } if *scaleDownEnabled { unneededStart := time.Now() // In dry run only utilization is updated calculateUnneededOnly := lastScaleUpTime.Add(*scaleDownDelay).After(time.Now()) || lastScaleDownFailedTrial.Add(*scaleDownTrialInterval).After(time.Now()) || schedulablePodsPresent glog.V(4).Infof("Scale down status: unneededOnly=%v lastScaleUpTime=%s "+ "lastScaleDownFailedTrail=%s schedulablePodsPresent=%v", calculateUnneededOnly, lastScaleUpTime, lastScaleDownFailedTrial, schedulablePodsPresent) updateLastTime("findUnneeded") glog.V(4).Infof("Calculating unneded nodes") usageTracker.CleanUp(time.Now().Add(-(*scaleDownUnneededTime))) unneededNodes, podLocationHints = FindUnneededNodes( nodes, unneededNodes, *scaleDownUtilizationThreshold, allScheduled, predicateChecker, podLocationHints, usageTracker, time.Now()) updateDuration("findUnneeded", unneededStart) for key, val := range unneededNodes { if glog.V(4) { glog.V(4).Infof("%s is unneeded since %s duration %s", key, val.String(), time.Now().Sub(val).String()) } } if !calculateUnneededOnly { glog.V(4).Infof("Starting scale down") scaleDownStart := time.Now() updateLastTime("scaledown") result, err := ScaleDown( nodes, unneededNodes, *scaleDownUnneededTime, allScheduled, cloudProvider, kubeClient, predicateChecker, podLocationHints, usageTracker) updateDuration("scaledown", scaleDownStart) // TODO: revisit result handling if err != nil { glog.Errorf("Failed to scale down: %v", err) } else { if result == ScaleDownError || result == ScaleDownNoNodeDeleted { lastScaleDownFailedTrial = time.Now() } } } } updateDuration("main", loopStart) } } } }
func main() { glog.Infof("Cluster Autoscaler %s", ClusterAutoscalerVersion) flag.Var(&nodeGroupsFlag, "nodes", "sets min,max size and other configuration data for a node group in a format accepted by cloud provider."+ "Can be used multiple times. Format: <min>:<max>:<other...>") flag.Parse() go func() { http.Handle("/metrics", prometheus.Handler()) err := http.ListenAndServe(*address, nil) glog.Fatalf("Failed to start metrics: %v", err) }() url, err := url.Parse(*kubernetes) if err != nil { glog.Fatalf("Failed to parse Kuberentes url: %v", err) } // Configuration kubeConfig, err := config.GetKubeClientConfig(url) if err != nil { glog.Fatalf("Failed to build Kuberentes client configuration: %v", err) } kubeClient := kube_client.NewOrDie(kubeConfig) predicateChecker, err := simulator.NewPredicateChecker(kubeClient) if err != nil { glog.Fatalf("Failed to create predicate checker: %v", err) } unschedulablePodLister := NewUnschedulablePodLister(kubeClient) scheduledPodLister := NewScheduledPodLister(kubeClient) nodeLister := NewNodeLister(kubeClient) lastScaleUpTime := time.Now() lastScaleDownFailedTrial := time.Now() unneededNodes := make(map[string]time.Time) podLocationHints := make(map[string]string) usageTracker := simulator.NewUsageTracker() eventBroadcaster := kube_record.NewBroadcaster() eventBroadcaster.StartLogging(glog.Infof) eventBroadcaster.StartRecordingToSink(kubeClient.Events("")) recorder := eventBroadcaster.NewRecorder(kube_api.EventSource{Component: "cluster-autoscaler"}) var cloudProvider cloudprovider.CloudProvider if *cloudProviderFlag == "gce" { // GCE Manager var gceManager *gce.GceManager var gceError error if *cloudConfig != "" { config, fileErr := os.Open(*cloudConfig) if fileErr != nil { glog.Fatalf("Couldn't open cloud provider configuration %s: %#v", *cloudConfig, err) } defer config.Close() gceManager, gceError = gce.CreateGceManager(config) } else { gceManager, gceError = gce.CreateGceManager(nil) } if gceError != nil { glog.Fatalf("Failed to create GCE Manager: %v", err) } cloudProvider, err = gce.BuildGceCloudProvider(gceManager, nodeGroupsFlag) if err != nil { glog.Fatalf("Failed to create GCE cloud provider: %v", err) } } for { select { case <-time.After(*scanInterval): { loopStart := time.Now() updateLastTime("main") nodes, err := nodeLister.List() if err != nil { glog.Errorf("Failed to list nodes: %v", err) continue } if len(nodes) == 0 { glog.Errorf("No nodes in the cluster") continue } if err := CheckGroupsAndNodes(nodes, cloudProvider); err != nil { glog.Warningf("Cluster is not ready for autoscaling: %v", err) continue } allUnschedulablePods, err := unschedulablePodLister.List() if err != nil { glog.Errorf("Failed to list unscheduled pods: %v", err) continue } allScheduled, err := scheduledPodLister.List() if err != nil { glog.Errorf("Failed to list scheduled pods: %v", err) continue } // We need to reset all pods that have been marked as unschedulable not after // the newest node became available for the scheduler. allNodesAvailableTime := GetAllNodesAvailableTime(nodes) podsToReset, unschedulablePodsToHelp := SlicePodsByPodScheduledTime(allUnschedulablePods, allNodesAvailableTime) ResetPodScheduledCondition(kubeClient, podsToReset) // We need to check whether pods marked as unschedulable are actually unschedulable. // This should prevent from adding unnecessary nodes. Example of such situation: // - CA and Scheduler has slightly different configuration // - Scheduler can't schedule a pod and marks it as unschedulable // - CA added a node which should help the pod // - Scheduler doesn't schedule the pod on the new node // because according to it logic it doesn't fit there // - CA see the pod is still unschedulable, so it adds another node to help it // // With the check enabled the last point won't happen because CA will ignore a pod // which is supposed to schedule on an existing node. // // Without below check cluster might be unnecessary scaled up to the max allowed size // in the describe situation. schedulablePodsPresent := false if *verifyUnschedulablePods { newUnschedulablePodsToHelp := FilterOutSchedulable(unschedulablePodsToHelp, nodes, allScheduled, predicateChecker) if len(newUnschedulablePodsToHelp) != len(unschedulablePodsToHelp) { glog.V(2).Info("Schedulable pods present") schedulablePodsPresent = true } unschedulablePodsToHelp = newUnschedulablePodsToHelp } if len(unschedulablePodsToHelp) == 0 { glog.V(1).Info("No unschedulable pods") } else { scaleUpStart := time.Now() updateLastTime("scaleup") scaledUp, err := ScaleUp(unschedulablePodsToHelp, nodes, cloudProvider, kubeClient, predicateChecker, recorder) updateDuration("scaleup", scaleUpStart) if err != nil { glog.Errorf("Failed to scale up: %v", err) continue } else { if scaledUp { lastScaleUpTime = time.Now() // No scale down in this iteration. continue } } } if *scaleDownEnabled { unneededStart := time.Now() // In dry run only utilization is updated calculateUnneededOnly := lastScaleUpTime.Add(*scaleDownDelay).After(time.Now()) || lastScaleDownFailedTrial.Add(*scaleDownTrialInterval).After(time.Now()) || schedulablePodsPresent glog.V(4).Infof("Scale down status: unneededOnly=%v lastScaleUpTime=%s "+ "lastScaleDownFailedTrail=%s schedulablePodsPresent=%v", calculateUnneededOnly, lastScaleUpTime, lastScaleDownFailedTrial, schedulablePodsPresent) updateLastTime("findUnneeded") glog.V(4).Infof("Calculating unneded nodes") usageTracker.CleanUp(time.Now().Add(-(*scaleDownUnneededTime))) unneededNodes, podLocationHints = FindUnneededNodes( nodes, unneededNodes, *scaleDownUtilizationThreshold, allScheduled, predicateChecker, podLocationHints, usageTracker, time.Now()) updateDuration("findUnneeded", unneededStart) for key, val := range unneededNodes { if glog.V(4) { glog.V(4).Infof("%s is unneeded since %s duration %s", key, val.String(), time.Now().Sub(val).String()) } } if !calculateUnneededOnly { glog.V(4).Infof("Starting scale down") scaleDownStart := time.Now() updateLastTime("scaledown") result, err := ScaleDown( nodes, unneededNodes, *scaleDownUnneededTime, allScheduled, cloudProvider, kubeClient, predicateChecker, podLocationHints, usageTracker) updateDuration("scaledown", scaleDownStart) // TODO: revisit result handling if err != nil { glog.Errorf("Failed to scale down: %v", err) } else { if result == ScaleDownError || result == ScaleDownNoNodeDeleted { lastScaleDownFailedTrial = time.Now() } } } } updateDuration("main", loopStart) } } } }