// ScaleUp tries to scale the cluster up. Return true if it found a way to increase the size, // false if it didn't and error if an error occured. Assumes that all nodes in the cluster are // ready and in sync with instance groups. func ScaleUp(unschedulablePods []*kube_api.Pod, nodes []*kube_api.Node, cloudProvider cloudprovider.CloudProvider, kubeClient *kube_client.Client, predicateChecker *simulator.PredicateChecker, recorder kube_record.EventRecorder, maxNodesTotal int, estimatorName string) (bool, error) { // From now on we only care about unschedulable pods that were marked after the newest // node became available for the scheduler. if len(unschedulablePods) == 0 { glog.V(1).Info("No unschedulable pods") return false, nil } for _, pod := range unschedulablePods { glog.V(1).Infof("Pod %s/%s is unschedulable", pod.Namespace, pod.Name) } expansionOptions := make([]ExpansionOption, 0) nodeInfos, err := GetNodeInfosForGroups(nodes, cloudProvider, kubeClient) if err != nil { return false, fmt.Errorf("failed to build node infos for node groups: %v", err) } podsRemainUnshedulable := make(map[*kube_api.Pod]struct{}) for _, nodeGroup := range cloudProvider.NodeGroups() { currentSize, err := nodeGroup.TargetSize() if err != nil { glog.Errorf("Failed to get node group size: %v", err) continue } if currentSize >= nodeGroup.MaxSize() { // skip this node group. glog.V(4).Infof("Skipping node group %s - max size reached", nodeGroup.Id()) continue } option := ExpansionOption{ nodeGroup: nodeGroup, pods: make([]*kube_api.Pod, 0), } nodeInfo, found := nodeInfos[nodeGroup.Id()] if !found { glog.Errorf("No node info for: %s", nodeGroup.Id()) continue } for _, pod := range unschedulablePods { err = predicateChecker.CheckPredicates(pod, nodeInfo) if err == nil { option.pods = append(option.pods, pod) } else { glog.V(2).Infof("Scale-up predicate failed: %v", err) podsRemainUnshedulable[pod] = struct{}{} } } if len(option.pods) > 0 { if estimatorName == BinpackingEstimatorName { binpackingEstimator := estimator.NewBinpackingNodeEstimator(predicateChecker) option.nodeCount = binpackingEstimator.Estimate(option.pods, nodeInfo) } else if estimatorName == BasicEstimatorName { basicEstimator := estimator.NewBasicNodeEstimator() for _, pod := range option.pods { basicEstimator.Add(pod) } option.nodeCount, option.debug = basicEstimator.Estimate(nodeInfo.Node()) } else { glog.Fatalf("Unrecognized estimator: %s", estimatorName) } expansionOptions = append(expansionOptions, option) } } // Pick some expansion option. bestOption := BestExpansionOption(expansionOptions) if bestOption != nil && bestOption.nodeCount > 0 { glog.V(1).Infof("Best option to resize: %s", bestOption.nodeGroup.Id()) if len(bestOption.debug) > 0 { glog.V(1).Info(bestOption.debug) } glog.V(1).Infof("Estimated %d nodes needed in %s", bestOption.nodeCount, bestOption.nodeGroup.Id()) currentSize, err := bestOption.nodeGroup.TargetSize() if err != nil { return false, fmt.Errorf("failed to get node group size: %v", err) } newSize := currentSize + bestOption.nodeCount if newSize >= bestOption.nodeGroup.MaxSize() { glog.V(1).Infof("Capping size to MAX (%d)", bestOption.nodeGroup.MaxSize()) newSize = bestOption.nodeGroup.MaxSize() } if maxNodesTotal > 0 && len(nodes)+(newSize-currentSize) > maxNodesTotal { glog.V(1).Infof("Capping size to max cluster total size (%d)", maxNodesTotal) newSize = maxNodesTotal - len(nodes) + currentSize if newSize < currentSize { return false, fmt.Errorf("max node total count already reached") } } glog.V(0).Infof("Scale-up: setting group %s size to %d", bestOption.nodeGroup.Id(), newSize) if err := bestOption.nodeGroup.IncreaseSize(newSize - currentSize); err != nil { return false, fmt.Errorf("failed to increase node group size: %v", err) } for _, pod := range bestOption.pods { recorder.Eventf(pod, kube_api.EventTypeNormal, "TriggeredScaleUp", "pod triggered scale-up, group: %s, sizes (current/new): %d/%d", bestOption.nodeGroup.Id(), currentSize, newSize) } return true, nil } for pod := range podsRemainUnshedulable { recorder.Event(pod, kube_api.EventTypeNormal, "NotTriggerScaleUp", "pod didn't trigger scale-up (it wouldn't fit if a new node is added)") } return false, nil }
func main() { flag.Var(&migConfigFlag, "nodes", "sets min,max size and url of a MIG to be controlled by Cluster Autoscaler. "+ "Can be used multiple times. Format: <min>:<max>:<migurl>") flag.Parse() url, err := url.Parse(*kubernetes) if err != nil { glog.Fatalf("Failed to parse Kuberentes url: %v", err) } kubeConfig, err := config.GetKubeClientConfig(url) if err != nil { glog.Fatalf("Failed to build Kuberentes client configuration: %v", err) } kubeClient := kube_client.NewOrDie(kubeConfig) unschedulablePodLister := NewUnschedulablePodLister(kubeClient) nodeLister := NewNodeLister(kubeClient) migConfigs := make([]*config.MigConfig, 0, len(migConfigFlag)) for i := range migConfigFlag { migConfigs = append(migConfigs, &migConfigFlag[i]) } gceManager, err := gce.CreateGceManager(migConfigs) if err != nil { glog.Fatalf("Failed to create GCE Manager %v", err) } predicateChecker := simulator.NewPredicateChecker() for { select { case <-time.After(time.Minute): { nodes, err := nodeLister.List() if err != nil { glog.Errorf("Failed to list nodes: %v", err) continue } if len(nodes) == 0 { glog.Errorf("No nodes in the cluster") continue } if err := CheckMigsAndNodes(nodes, gceManager); err != nil { glog.Warningf("Cluster is not ready for autoscaling: %v", err) continue } pods, err := unschedulablePodLister.List() if err != nil { glog.Errorf("Failed to list unscheduled pods: %v", err) continue } // We need to reset all pods that have been marked as unschedulable not after // the newest node became available for the scheduler. allNodesAvailableTime := GetAllNodesAvailableTime(nodes) resetOldPods(kubeClient, pods, allNodesAvailableTime) // From now on we only care about unschedulable pods that were marked after the newest // node became available for the scheduler. pods = filterOldPods(pods, allNodesAvailableTime) if len(pods) == 0 { glog.V(1).Info("No unschedulable pods") continue } for _, pod := range pods { glog.V(1).Infof("Pod %s/%s is unschedulable", pod.Namespace, pod.Name) } expansionOptions := make([]ExpansionOption, 0) nodeInfos, err := GetNodeInfosForMigs(nodes, gceManager, kubeClient) if err != nil { glog.Errorf("Failed to build node infors for migs: %v", err) continue } for _, migConfig := range migConfigs { currentSize, err := gceManager.GetMigSize(migConfig) if err != nil { glog.Errorf("Failed to get MIG size: %v", err) continue } if currentSize >= int64(migConfig.MaxSize) { // skip this mig. glog.V(4).Infof("Skipping MIG %s - max size reached", migConfig.Url()) continue } option := ExpansionOption{ migConfig: migConfig, estimator: estimator.NewBasicNodeEstimator(), } migHelpsSomePods := false nodeInfo, found := nodeInfos[migConfig.Url()] if !found { glog.Errorf("No node info for: %s", migConfig.Url()) continue } for _, pod := range pods { err = predicateChecker.CheckPredicates(pod, nodeInfo) if err == nil { migHelpsSomePods = true option.estimator.Add(pod) } else { glog.V(2).Infof("Scale-up predicate failed: %v", err) } } if migHelpsSomePods { expansionOptions = append(expansionOptions, option) } } // Pick some expansion option. bestOption := BestExpansionOption(expansionOptions) if bestOption != nil { glog.V(1).Infof("Best option to resize: %s", bestOption.migConfig.Url()) nodeInfo, found := nodeInfos[bestOption.migConfig.Url()] if !found { glog.Errorf("No sample node for: %s", bestOption.migConfig.Url()) continue } node := nodeInfo.Node() estimate, report := bestOption.estimator.Estimate(node) glog.V(1).Info(bestOption.estimator.GetDebug()) glog.V(1).Info(report) glog.V(1).Infof("Estimated %d nodes needed in %s", estimate, bestOption.migConfig.Url()) currentSize, err := gceManager.GetMigSize(bestOption.migConfig) if err != nil { glog.Errorf("Failed to get MIG size: %v", err) continue } newSize := currentSize + int64(estimate) if newSize >= int64(bestOption.migConfig.MaxSize) { newSize = int64(bestOption.migConfig.MaxSize) } glog.V(1).Infof("Setting %s size to %d", bestOption.migConfig.Url(), newSize) if err := gceManager.SetMigSize(bestOption.migConfig, newSize); err != nil { glog.Errorf("Failed to set MIG size: %v", err) } } } } } }
// ScaleUp tries to scale the cluster up. Return true if it found a way to increase the size, // false if it didn't and error if an error occured. func ScaleUp(unschedulablePods []*kube_api.Pod, nodes []*kube_api.Node, migConfigs []*config.MigConfig, gceManager *gce.GceManager, kubeClient *kube_client.Client, predicateChecker *simulator.PredicateChecker, recorder kube_record.EventRecorder) (bool, error) { // From now on we only care about unschedulable pods that were marked after the newest // node became available for the scheduler. if len(unschedulablePods) == 0 { glog.V(1).Info("No unschedulable pods") return false, nil } for _, pod := range unschedulablePods { glog.V(1).Infof("Pod %s/%s is unschedulable", pod.Namespace, pod.Name) } expansionOptions := make([]ExpansionOption, 0) nodeInfos, err := GetNodeInfosForMigs(nodes, gceManager, kubeClient) if err != nil { return false, fmt.Errorf("failed to build node infors for migs: %v", err) } podsRemainUnshedulable := make(map[*kube_api.Pod]struct{}) for _, migConfig := range migConfigs { currentSize, err := gceManager.GetMigSize(migConfig) if err != nil { glog.Errorf("Failed to get MIG size: %v", err) continue } if currentSize >= int64(migConfig.MaxSize) { // skip this mig. glog.V(4).Infof("Skipping MIG %s - max size reached", migConfig.Url()) continue } option := ExpansionOption{ migConfig: migConfig, estimator: estimator.NewBasicNodeEstimator(), } migHelpsSomePods := false nodeInfo, found := nodeInfos[migConfig.Url()] if !found { glog.Errorf("No node info for: %s", migConfig.Url()) continue } for _, pod := range unschedulablePods { err = predicateChecker.CheckPredicates(pod, nodeInfo) if err == nil { migHelpsSomePods = true option.estimator.Add(pod) } else { glog.V(2).Infof("Scale-up predicate failed: %v", err) podsRemainUnshedulable[pod] = struct{}{} } } if migHelpsSomePods { expansionOptions = append(expansionOptions, option) } } // Pick some expansion option. bestOption := BestExpansionOption(expansionOptions) if bestOption != nil && bestOption.estimator.GetCount() > 0 { glog.V(1).Infof("Best option to resize: %s", bestOption.migConfig.Url()) nodeInfo, found := nodeInfos[bestOption.migConfig.Url()] if !found { return false, fmt.Errorf("no sample node for: %s", bestOption.migConfig.Url()) } node := nodeInfo.Node() estimate, report := bestOption.estimator.Estimate(node) glog.V(1).Info(bestOption.estimator.GetDebug()) glog.V(1).Info(report) glog.V(1).Infof("Estimated %d nodes needed in %s", estimate, bestOption.migConfig.Url()) currentSize, err := gceManager.GetMigSize(bestOption.migConfig) if err != nil { return false, fmt.Errorf("failed to get MIG size: %v", err) } newSize := currentSize + int64(estimate) if newSize >= int64(bestOption.migConfig.MaxSize) { glog.V(1).Infof("Capping size to MAX (%d)", bestOption.migConfig.MaxSize) newSize = int64(bestOption.migConfig.MaxSize) } glog.V(1).Infof("Setting %s size to %d", bestOption.migConfig.Url(), newSize) if err := gceManager.SetMigSize(bestOption.migConfig, newSize); err != nil { return false, fmt.Errorf("failed to set MIG size: %v", err) } for pod := range bestOption.estimator.FittingPods { recorder.Eventf(pod, kube_api.EventTypeNormal, "TriggeredScaleUp", "pod triggered scale-up, mig: %s, sizes (current/new): %d/%d", bestOption.migConfig.Name, currentSize, newSize) } return true, nil } for pod := range podsRemainUnshedulable { recorder.Event(pod, kube_api.EventTypeNormal, "NotTriggerScaleUp", "pod didn't trigger scale-up (it wouldn't fit if a new node is added)") } return false, nil }
func main() { flag.Var(&migConfigFlag, "nodes", "sets min,max size and url of a MIG to be controlled by Cluster Autoscaler. "+ "Can be used multiple times. Format: <min>:<max>:<migurl>") flag.Parse() url, err := url.Parse(*kubernetes) if err != nil { glog.Fatalf("Failed to parse Kuberentes url: %v", err) } kubeConfig, err := config.GetKubeClientConfig(url) if err != nil { glog.Fatalf("Failed to build Kuberentes client configuration: %v", err) } kubeClient := kube_client.NewOrDie(kubeConfig) unscheduledPodLister := NewUnscheduledPodLister(kubeClient) nodeLister := NewNodeLister(kubeClient) migConfigs := make([]*config.MigConfig, 0, len(migConfigFlag)) for i := range migConfigFlag { migConfigs = append(migConfigs, &migConfigFlag[i]) } gceManager, err := gce.CreateGceManager(migConfigs) if err != nil { glog.Fatalf("Failed to create GCE Manager %v", err) } predicateChecker := simulator.NewPredicateChecker() for { select { case <-time.After(time.Minute): { pods, err := unscheduledPodLister.List() if err != nil { glog.Errorf("Failed to list pods: %v", err) continue } if len(pods) == 0 { glog.V(1).Info("No unscheduled pods") continue } for _, pod := range pods { glog.V(1).Infof("Pod %s/%s is not scheduled", pod.Namespace, pod.Name) } nodes, err := nodeLister.List() if err != nil { glog.Errorf("Failed to list nodes: %v", err) continue } if len(nodes) == 0 { glog.Errorf("No nodes in the cluster") continue } if err := CheckMigsAndNodes(nodes, gceManager); err != nil { glog.Warningf("Cluster is not ready for autoscaling: %v", err) continue } // Checks if scheduler tried to schedule the pods after thew newest node was added. newestNode := GetNewestNode(nodes) if newestNode == nil { glog.Errorf("No newest node") continue } oldestSchedulingTrial := GetOldestFailedSchedulingTrail(pods) if oldestSchedulingTrial == nil { glog.Errorf("No oldest unschedueled trial: %v", err) continue } // TODO: Find better way to check if all pods were checked after the newest node // was added. if newestNode.CreationTimestamp.After(oldestSchedulingTrial.Add(-1 * time.Minute)) { // Lets give scheduler another chance. glog.V(1).Infof("One of the pods have not been tried after adding %s", newestNode.Name) continue } expansionOptions := make([]ExpansionOption, 0) nodeInfos, sampleNodes, err := GetNodeInfosForMigs(nodes, gceManager, kubeClient) if err != nil { glog.Errorf("Failed to build node infors for migs: %v", err) continue } for _, migConfig := range migConfigs { currentSize, err := gceManager.GetMigSize(migConfig) if err != nil { glog.Errorf("Failed to get MIG size: %v", err) continue } if currentSize >= int64(migConfig.MaxSize) { // skip this mig. glog.V(4).Infof("Skipping MIG %s - max size reached", migConfig.Url()) continue } option := ExpansionOption{ migConfig: migConfig, estimator: estimator.NewBasicNodeEstimator(), } migHelpsSomePods := false nodeInfo, found := nodeInfos[migConfig.Url()] if !found { glog.Errorf("No node info for: %s", migConfig.Url()) continue } node, found := sampleNodes[migConfig.Url()] if !found { glog.Errorf("No sample node for: %s", migConfig.Url()) continue } for _, pod := range pods { err = predicateChecker.CheckPredicates(pod, node, nodeInfo) if err == nil { migHelpsSomePods = true option.estimator.Add(pod) } else { glog.V(2).Infof("Scale-up predicate failed: %v", err) } } if migHelpsSomePods { expansionOptions = append(expansionOptions, option) } } // Pick some expansion option. bestOption := BestExpansionOption(expansionOptions) if bestOption != nil { glog.V(1).Infof("Best option to resize: %s", bestOption.migConfig.Url()) node, found := sampleNodes[bestOption.migConfig.Url()] if !found { glog.Errorf("No sample node for: %s", bestOption.migConfig.Url()) continue } estimate := bestOption.estimator.Estimate(node) glog.V(1).Infof("Estimated %d nodes needed in %s", estimate, bestOption.migConfig.Url()) currentSize, err := gceManager.GetMigSize(bestOption.migConfig) if err != nil { glog.Errorf("Failed to get MIG size: %v", err) continue } newSize := currentSize + int64(estimate) if newSize >= int64(bestOption.migConfig.MaxSize) { newSize = int64(bestOption.migConfig.MaxSize) } glog.V(1).Infof("Setting %s size to %d", bestOption.migConfig.Url(), newSize) if err := gceManager.SetMigSize(bestOption.migConfig, newSize); err != nil { glog.Errorf("Failed to set MIG size: %v", err) } } } } } }