func main() { flag.Var(&migConfigFlag, "nodes", "sets min,max size and url of a MIG to be controlled by Cluster Autoscaler. "+ "Can be used multiple times. Format: <min>:<max>:<migurl>") flag.Parse() go func() { http.Handle("/metrics", prometheus.Handler()) err := http.ListenAndServe(*address, nil) glog.Fatalf("Failed to start metrics: %v", err) }() url, err := url.Parse(*kubernetes) if err != nil { glog.Fatalf("Failed to parse Kuberentes url: %v", err) } // Configuration kubeConfig, err := config.GetKubeClientConfig(url) if err != nil { glog.Fatalf("Failed to build Kuberentes client configuration: %v", err) } migConfigs := make([]*config.MigConfig, 0, len(migConfigFlag)) for i := range migConfigFlag { migConfigs = append(migConfigs, &migConfigFlag[i]) } // GCE Manager var gceManager *gce.GceManager var gceError error if *cloudConfig != "" { config, fileErr := os.Open(*cloudConfig) if fileErr != nil { glog.Fatalf("Couldn't open cloud provider configuration %s: %#v", *cloudConfig, err) } defer config.Close() gceManager, gceError = gce.CreateGceManager(migConfigs, config) } else { gceManager, gceError = gce.CreateGceManager(migConfigs, nil) } if gceError != nil { glog.Fatalf("Failed to create GCE Manager: %v", err) } kubeClient := kube_client.NewOrDie(kubeConfig) predicateChecker, err := simulator.NewPredicateChecker(kubeClient) if err != nil { glog.Fatalf("Failed to create predicate checker: %v", err) } unschedulablePodLister := NewUnschedulablePodLister(kubeClient) scheduledPodLister := NewScheduledPodLister(kubeClient) nodeLister := NewNodeLister(kubeClient) lastScaleUpTime := time.Now() lastScaleDownFailedTrial := time.Now() unneededNodes := make(map[string]time.Time) eventBroadcaster := kube_record.NewBroadcaster() eventBroadcaster.StartLogging(glog.Infof) eventBroadcaster.StartRecordingToSink(kubeClient.Events("")) recorder := eventBroadcaster.NewRecorder(kube_api.EventSource{Component: "cluster-autoscaler"}) for { select { case <-time.After(*scanInterval): { loopStart := time.Now() updateLastTime("main") nodes, err := nodeLister.List() if err != nil { glog.Errorf("Failed to list nodes: %v", err) continue } if len(nodes) == 0 { glog.Errorf("No nodes in the cluster") continue } if err := CheckMigsAndNodes(nodes, gceManager); err != nil { glog.Warningf("Cluster is not ready for autoscaling: %v", err) continue } allUnschedulablePods, err := unschedulablePodLister.List() if err != nil { glog.Errorf("Failed to list unscheduled pods: %v", err) continue } allScheduled, err := scheduledPodLister.List() if err != nil { glog.Errorf("Failed to list scheduled pods: %v", err) continue } // We need to reset all pods that have been marked as unschedulable not after // the newest node became available for the scheduler. allNodesAvailableTime := GetAllNodesAvailableTime(nodes) podsToReset, unschedulablePodsToHelp := SlicePodsByPodScheduledTime(allUnschedulablePods, allNodesAvailableTime) ResetPodScheduledCondition(kubeClient, podsToReset) // We need to check whether pods marked as unschedulable are actually unschedulable. // This should prevent from adding unnecessary nodes. Example of such situation: // - CA and Scheduler has slightly different configuration // - Scheduler can't schedule a pod and marks it as unschedulable // - CA added a node which should help the pod // - Scheduler doesn't schedule the pod on the new node // because according to it logic it doesn't fit there // - CA see the pod is still unschedulable, so it adds another node to help it // // With the check enabled the last point won't happen because CA will ignore a pod // which is supposed to schedule on an existing node. // // Without below check cluster might be unnecessary scaled up to the max allowed size // in the describe situation. schedulablePodsPresent := false if *verifyUnschedulablePods { newUnschedulablePodsToHelp := FilterOutSchedulable(unschedulablePodsToHelp, nodes, allScheduled, predicateChecker) if len(newUnschedulablePodsToHelp) != len(unschedulablePodsToHelp) { glog.V(2).Info("Schedulable pods present") schedulablePodsPresent = true } unschedulablePodsToHelp = newUnschedulablePodsToHelp } if len(unschedulablePodsToHelp) == 0 { glog.V(1).Info("No unschedulable pods") } else { scaleUpStart := time.Now() updateLastTime("scaleup") scaledUp, err := ScaleUp(unschedulablePodsToHelp, nodes, migConfigs, gceManager, kubeClient, predicateChecker, recorder) updateDuration("scaleup", scaleUpStart) if err != nil { glog.Errorf("Failed to scale up: %v", err) continue } else { if scaledUp { lastScaleUpTime = time.Now() // No scale down in this iteration. continue } } } if *scaleDownEnabled { unneededStart := time.Now() // In dry run only utilization is updated calculateUnneededOnly := lastScaleUpTime.Add(*scaleDownDelay).After(time.Now()) || lastScaleDownFailedTrial.Add(*scaleDownTrialInterval).After(time.Now()) || schedulablePodsPresent glog.V(4).Infof("Scale down status: unneededOnly=%v lastScaleUpTime=%s "+ "lastScaleDownFailedTrail=%s schedulablePodsPresent=%v", calculateUnneededOnly, lastScaleUpTime, lastScaleDownFailedTrial, schedulablePodsPresent) updateLastTime("findUnneeded") glog.V(4).Infof("Calculating unneded nodes") unneededNodes = FindUnneededNodes( nodes, unneededNodes, *scaleDownUtilizationThreshold, allScheduled, predicateChecker) updateDuration("findUnneeded", unneededStart) for key, val := range unneededNodes { if glog.V(4) { glog.V(4).Infof("%s is unneeded since %s duration %s", key, val.String(), time.Now().Sub(val).String()) } } if !calculateUnneededOnly { glog.V(4).Infof("Starting scale down") scaleDownStart := time.Now() updateLastTime("scaledown") result, err := ScaleDown( nodes, unneededNodes, *scaleDownUnneededTime, allScheduled, gceManager, kubeClient, predicateChecker) updateDuration("scaledown", scaleDownStart) // TODO: revisit result handling if err != nil { glog.Errorf("Failed to scale down: %v", err) } else { if result == ScaleDownNodeDeleted { // Clean the map with unneeded nodes to be super sure that the simulated // deletions are made in the new context. unneededNodes = make(map[string]time.Time, len(unneededNodes)) } else { if result == ScaleDownError || result == ScaleDownNoNodeDeleted { lastScaleDownFailedTrial = time.Now() } } } } } updateDuration("main", loopStart) } } } }
func main() { flag.Var(&migConfigFlag, "nodes", "sets min,max size and url of a MIG to be controlled by Cluster Autoscaler. "+ "Can be used multiple times. Format: <min>:<max>:<migurl>") flag.Parse() url, err := url.Parse(*kubernetes) if err != nil { glog.Fatalf("Failed to parse Kuberentes url: %v", err) } kubeConfig, err := config.GetKubeClientConfig(url) if err != nil { glog.Fatalf("Failed to build Kuberentes client configuration: %v", err) } kubeClient := kube_client.NewOrDie(kubeConfig) unschedulablePodLister := NewUnschedulablePodLister(kubeClient) nodeLister := NewNodeLister(kubeClient) migConfigs := make([]*config.MigConfig, 0, len(migConfigFlag)) for i := range migConfigFlag { migConfigs = append(migConfigs, &migConfigFlag[i]) } gceManager, err := gce.CreateGceManager(migConfigs) if err != nil { glog.Fatalf("Failed to create GCE Manager %v", err) } predicateChecker := simulator.NewPredicateChecker() for { select { case <-time.After(time.Minute): { nodes, err := nodeLister.List() if err != nil { glog.Errorf("Failed to list nodes: %v", err) continue } if len(nodes) == 0 { glog.Errorf("No nodes in the cluster") continue } if err := CheckMigsAndNodes(nodes, gceManager); err != nil { glog.Warningf("Cluster is not ready for autoscaling: %v", err) continue } pods, err := unschedulablePodLister.List() if err != nil { glog.Errorf("Failed to list unscheduled pods: %v", err) continue } // We need to reset all pods that have been marked as unschedulable not after // the newest node became available for the scheduler. allNodesAvailableTime := GetAllNodesAvailableTime(nodes) resetOldPods(kubeClient, pods, allNodesAvailableTime) // From now on we only care about unschedulable pods that were marked after the newest // node became available for the scheduler. pods = filterOldPods(pods, allNodesAvailableTime) if len(pods) == 0 { glog.V(1).Info("No unschedulable pods") continue } for _, pod := range pods { glog.V(1).Infof("Pod %s/%s is unschedulable", pod.Namespace, pod.Name) } expansionOptions := make([]ExpansionOption, 0) nodeInfos, err := GetNodeInfosForMigs(nodes, gceManager, kubeClient) if err != nil { glog.Errorf("Failed to build node infors for migs: %v", err) continue } for _, migConfig := range migConfigs { currentSize, err := gceManager.GetMigSize(migConfig) if err != nil { glog.Errorf("Failed to get MIG size: %v", err) continue } if currentSize >= int64(migConfig.MaxSize) { // skip this mig. glog.V(4).Infof("Skipping MIG %s - max size reached", migConfig.Url()) continue } option := ExpansionOption{ migConfig: migConfig, estimator: estimator.NewBasicNodeEstimator(), } migHelpsSomePods := false nodeInfo, found := nodeInfos[migConfig.Url()] if !found { glog.Errorf("No node info for: %s", migConfig.Url()) continue } for _, pod := range pods { err = predicateChecker.CheckPredicates(pod, nodeInfo) if err == nil { migHelpsSomePods = true option.estimator.Add(pod) } else { glog.V(2).Infof("Scale-up predicate failed: %v", err) } } if migHelpsSomePods { expansionOptions = append(expansionOptions, option) } } // Pick some expansion option. bestOption := BestExpansionOption(expansionOptions) if bestOption != nil { glog.V(1).Infof("Best option to resize: %s", bestOption.migConfig.Url()) nodeInfo, found := nodeInfos[bestOption.migConfig.Url()] if !found { glog.Errorf("No sample node for: %s", bestOption.migConfig.Url()) continue } node := nodeInfo.Node() estimate, report := bestOption.estimator.Estimate(node) glog.V(1).Info(bestOption.estimator.GetDebug()) glog.V(1).Info(report) glog.V(1).Infof("Estimated %d nodes needed in %s", estimate, bestOption.migConfig.Url()) currentSize, err := gceManager.GetMigSize(bestOption.migConfig) if err != nil { glog.Errorf("Failed to get MIG size: %v", err) continue } newSize := currentSize + int64(estimate) if newSize >= int64(bestOption.migConfig.MaxSize) { newSize = int64(bestOption.migConfig.MaxSize) } glog.V(1).Infof("Setting %s size to %d", bestOption.migConfig.Url(), newSize) if err := gceManager.SetMigSize(bestOption.migConfig, newSize); err != nil { glog.Errorf("Failed to set MIG size: %v", err) } } } } } }
func main() { flag.Var(&migConfigFlag, "nodes", "sets min,max size and url of a MIG to be controlled by Cluster Autoscaler. "+ "Can be used multiple times. Format: <min>:<max>:<migurl>") flag.Parse() url, err := url.Parse(*kubernetes) if err != nil { glog.Fatalf("Failed to parse Kuberentes url: %v", err) } // Configuration kubeConfig, err := config.GetKubeClientConfig(url) if err != nil { glog.Fatalf("Failed to build Kuberentes client configuration: %v", err) } migConfigs := make([]*config.MigConfig, 0, len(migConfigFlag)) for i := range migConfigFlag { migConfigs = append(migConfigs, &migConfigFlag[i]) } gceManager, err := gce.CreateGceManager(migConfigs) if err != nil { glog.Fatalf("Failed to create GCE Manager: %v", err) } kubeClient := kube_client.NewOrDie(kubeConfig) predicateChecker, err := simulator.NewPredicateChecker(kubeClient) if err != nil { glog.Fatalf("Failed to create predicate checker: %v", err) } unschedulablePodLister := NewUnschedulablePodLister(kubeClient) scheduledPodLister := NewScheduledPodLister(kubeClient) nodeLister := NewNodeLister(kubeClient) lastScaleUpTime := time.Now() lastScaleDownFailedTrial := time.Now() underutilizedNodes := make(map[string]time.Time) eventBroadcaster := kube_record.NewBroadcaster() eventBroadcaster.StartLogging(glog.Infof) eventBroadcaster.StartRecordingToSink(kubeClient.Events("")) recorder := eventBroadcaster.NewRecorder(kube_api.EventSource{Component: "cluster-autoscaler"}) for { select { case <-time.After(time.Minute): { nodes, err := nodeLister.List() if err != nil { glog.Errorf("Failed to list nodes: %v", err) continue } if len(nodes) == 0 { glog.Errorf("No nodes in the cluster") continue } if err := CheckMigsAndNodes(nodes, gceManager); err != nil { glog.Warningf("Cluster is not ready for autoscaling: %v", err) continue } allUnschedulablePods, err := unschedulablePodLister.List() if err != nil { glog.Errorf("Failed to list unscheduled pods: %v", err) continue } allScheduled, err := scheduledPodLister.List() if err != nil { glog.Errorf("Failed to list scheduled pods: %v", err) continue } // We need to reset all pods that have been marked as unschedulable not after // the newest node became available for the scheduler. allNodesAvailableTime := GetAllNodesAvailableTime(nodes) podsToReset, unschedulablePodsToHelp := SlicePodsByPodScheduledTime(allUnschedulablePods, allNodesAvailableTime) ResetPodScheduledCondition(kubeClient, podsToReset) // We need to check whether pods marked as unschedulable are actually unschedulable. // This should prevent from adding unnecessary nodes. Example of such situation: // - CA and Scheduler has slightly different configuration // - Scheduler can't schedule a pod and marks it as unschedulable // - CA added a node which should help the pod // - Scheduler doesn't schedule the pod on the new node // because according to it logic it doesn't fit there // - CA see the pod is still unschedulable, so it adds another node to help it // // With the check enabled the last point won't happen because CA will ignore a pod // which is supposed to schedule on an existing node. // // Without below check cluster might be unnecessary scaled up to the max allowed size // in the describe situation. if *verifyUnschedulablePods { unschedulablePodsToHelp = FilterOutSchedulable(unschedulablePodsToHelp, nodes, allScheduled, predicateChecker) } if len(unschedulablePodsToHelp) == 0 { glog.V(1).Info("No unschedulable pods") } else { scaledUp, err := ScaleUp(unschedulablePodsToHelp, nodes, migConfigs, gceManager, kubeClient, predicateChecker, recorder) if err != nil { glog.Errorf("Failed to scale up: %v", err) continue } else { if scaledUp { lastScaleUpTime = time.Now() // No scale down in this iteration. continue } } } if *scaleDownEnabled { // In dry run only utilization is updated calculateUtilizationOnly := lastScaleUpTime.Add(*scaleDownDelay).After(time.Now()) || lastScaleDownFailedTrial.Add(*scaleDownTrialFrequency).After(time.Now()) underutilizedNodes = CalculateUnderutilizedNodes( nodes, underutilizedNodes, *scaleDownUtilizationThreshold, allScheduled, kubeClient, predicateChecker) if !calculateUtilizationOnly { result, err := ScaleDown( nodes, underutilizedNodes, *scaleDownUnderutilizedTime, allScheduled, gceManager, kubeClient, predicateChecker) if err != nil { glog.Errorf("Failed to scale down: %v", err) } else { if result != ScaleDownNodeDeleted { lastScaleDownFailedTrial = time.Now() } } } } } } } }
func main() { flag.Var(&migConfigFlag, "nodes", "sets min,max size and url of a MIG to be controlled by Cluster Autoscaler. "+ "Can be used multiple times. Format: <min>:<max>:<migurl>") flag.Parse() url, err := url.Parse(*kubernetes) if err != nil { glog.Fatalf("Failed to parse Kuberentes url: %v", err) } kubeConfig, err := config.GetKubeClientConfig(url) if err != nil { glog.Fatalf("Failed to build Kuberentes client configuration: %v", err) } kubeClient := kube_client.NewOrDie(kubeConfig) unscheduledPodLister := NewUnscheduledPodLister(kubeClient) nodeLister := NewNodeLister(kubeClient) migConfigs := make([]*config.MigConfig, 0, len(migConfigFlag)) for i := range migConfigFlag { migConfigs = append(migConfigs, &migConfigFlag[i]) } gceManager, err := gce.CreateGceManager(migConfigs) if err != nil { glog.Fatalf("Failed to create GCE Manager %v", err) } predicateChecker := simulator.NewPredicateChecker() for { select { case <-time.After(time.Minute): { pods, err := unscheduledPodLister.List() if err != nil { glog.Errorf("Failed to list pods: %v", err) continue } if len(pods) == 0 { glog.V(1).Info("No unscheduled pods") continue } for _, pod := range pods { glog.V(1).Infof("Pod %s/%s is not scheduled", pod.Namespace, pod.Name) } nodes, err := nodeLister.List() if err != nil { glog.Errorf("Failed to list nodes: %v", err) continue } if len(nodes) == 0 { glog.Errorf("No nodes in the cluster") continue } if err := CheckMigsAndNodes(nodes, gceManager); err != nil { glog.Warningf("Cluster is not ready for autoscaling: %v", err) continue } // Checks if scheduler tried to schedule the pods after thew newest node was added. newestNode := GetNewestNode(nodes) if newestNode == nil { glog.Errorf("No newest node") continue } oldestSchedulingTrial := GetOldestFailedSchedulingTrail(pods) if oldestSchedulingTrial == nil { glog.Errorf("No oldest unschedueled trial: %v", err) continue } // TODO: Find better way to check if all pods were checked after the newest node // was added. if newestNode.CreationTimestamp.After(oldestSchedulingTrial.Add(-1 * time.Minute)) { // Lets give scheduler another chance. glog.V(1).Infof("One of the pods have not been tried after adding %s", newestNode.Name) continue } expansionOptions := make([]ExpansionOption, 0) nodeInfos, sampleNodes, err := GetNodeInfosForMigs(nodes, gceManager, kubeClient) if err != nil { glog.Errorf("Failed to build node infors for migs: %v", err) continue } for _, migConfig := range migConfigs { currentSize, err := gceManager.GetMigSize(migConfig) if err != nil { glog.Errorf("Failed to get MIG size: %v", err) continue } if currentSize >= int64(migConfig.MaxSize) { // skip this mig. glog.V(4).Infof("Skipping MIG %s - max size reached", migConfig.Url()) continue } option := ExpansionOption{ migConfig: migConfig, estimator: estimator.NewBasicNodeEstimator(), } migHelpsSomePods := false nodeInfo, found := nodeInfos[migConfig.Url()] if !found { glog.Errorf("No node info for: %s", migConfig.Url()) continue } node, found := sampleNodes[migConfig.Url()] if !found { glog.Errorf("No sample node for: %s", migConfig.Url()) continue } for _, pod := range pods { err = predicateChecker.CheckPredicates(pod, node, nodeInfo) if err == nil { migHelpsSomePods = true option.estimator.Add(pod) } else { glog.V(2).Infof("Scale-up predicate failed: %v", err) } } if migHelpsSomePods { expansionOptions = append(expansionOptions, option) } } // Pick some expansion option. bestOption := BestExpansionOption(expansionOptions) if bestOption != nil { glog.V(1).Infof("Best option to resize: %s", bestOption.migConfig.Url()) node, found := sampleNodes[bestOption.migConfig.Url()] if !found { glog.Errorf("No sample node for: %s", bestOption.migConfig.Url()) continue } estimate := bestOption.estimator.Estimate(node) glog.V(1).Infof("Estimated %d nodes needed in %s", estimate, bestOption.migConfig.Url()) currentSize, err := gceManager.GetMigSize(bestOption.migConfig) if err != nil { glog.Errorf("Failed to get MIG size: %v", err) continue } newSize := currentSize + int64(estimate) if newSize >= int64(bestOption.migConfig.MaxSize) { newSize = int64(bestOption.migConfig.MaxSize) } glog.V(1).Infof("Setting %s size to %d", bestOption.migConfig.Url(), newSize) if err := gceManager.SetMigSize(bestOption.migConfig, newSize); err != nil { glog.Errorf("Failed to set MIG size: %v", err) } } } } } }