Example #1
0
// ScaleUp tries to scale the cluster up. Return true if it found a way to increase the size,
// false if it didn't and error if an error occured. Assumes that all nodes in the cluster are
// ready and in sync with instance groups.
func ScaleUp(unschedulablePods []*kube_api.Pod, nodes []*kube_api.Node, cloudProvider cloudprovider.CloudProvider, kubeClient *kube_client.Client,
	predicateChecker *simulator.PredicateChecker, recorder kube_record.EventRecorder, maxNodesTotal int,
	estimatorName string) (bool, error) {

	// From now on we only care about unschedulable pods that were marked after the newest
	// node became available for the scheduler.
	if len(unschedulablePods) == 0 {
		glog.V(1).Info("No unschedulable pods")
		return false, nil
	}

	for _, pod := range unschedulablePods {
		glog.V(1).Infof("Pod %s/%s is unschedulable", pod.Namespace, pod.Name)
	}

	expansionOptions := make([]ExpansionOption, 0)
	nodeInfos, err := GetNodeInfosForGroups(nodes, cloudProvider, kubeClient)
	if err != nil {
		return false, fmt.Errorf("failed to build node infos for node groups: %v", err)
	}

	podsRemainUnshedulable := make(map[*kube_api.Pod]struct{})
	for _, nodeGroup := range cloudProvider.NodeGroups() {

		currentSize, err := nodeGroup.TargetSize()
		if err != nil {
			glog.Errorf("Failed to get node group size: %v", err)
			continue
		}
		if currentSize >= nodeGroup.MaxSize() {
			// skip this node group.
			glog.V(4).Infof("Skipping node group %s - max size reached", nodeGroup.Id())
			continue
		}

		option := ExpansionOption{
			nodeGroup: nodeGroup,
			pods:      make([]*kube_api.Pod, 0),
		}

		nodeInfo, found := nodeInfos[nodeGroup.Id()]
		if !found {
			glog.Errorf("No node info for: %s", nodeGroup.Id())
			continue
		}

		for _, pod := range unschedulablePods {
			err = predicateChecker.CheckPredicates(pod, nodeInfo)
			if err == nil {
				option.pods = append(option.pods, pod)
			} else {
				glog.V(2).Infof("Scale-up predicate failed: %v", err)
				podsRemainUnshedulable[pod] = struct{}{}
			}
		}
		if len(option.pods) > 0 {
			if estimatorName == BinpackingEstimatorName {
				binpackingEstimator := estimator.NewBinpackingNodeEstimator(predicateChecker)
				option.nodeCount = binpackingEstimator.Estimate(option.pods, nodeInfo)
			} else if estimatorName == BasicEstimatorName {
				basicEstimator := estimator.NewBasicNodeEstimator()
				for _, pod := range option.pods {
					basicEstimator.Add(pod)
				}
				option.nodeCount, option.debug = basicEstimator.Estimate(nodeInfo.Node())
			} else {
				glog.Fatalf("Unrecognized estimator: %s", estimatorName)
			}
			expansionOptions = append(expansionOptions, option)
		}
	}

	// Pick some expansion option.
	bestOption := BestExpansionOption(expansionOptions)
	if bestOption != nil && bestOption.nodeCount > 0 {
		glog.V(1).Infof("Best option to resize: %s", bestOption.nodeGroup.Id())
		if len(bestOption.debug) > 0 {
			glog.V(1).Info(bestOption.debug)
		}
		glog.V(1).Infof("Estimated %d nodes needed in %s", bestOption.nodeCount, bestOption.nodeGroup.Id())

		currentSize, err := bestOption.nodeGroup.TargetSize()
		if err != nil {
			return false, fmt.Errorf("failed to get node group size: %v", err)
		}
		newSize := currentSize + bestOption.nodeCount
		if newSize >= bestOption.nodeGroup.MaxSize() {
			glog.V(1).Infof("Capping size to MAX (%d)", bestOption.nodeGroup.MaxSize())
			newSize = bestOption.nodeGroup.MaxSize()
		}

		if maxNodesTotal > 0 && len(nodes)+(newSize-currentSize) > maxNodesTotal {
			glog.V(1).Infof("Capping size to max cluster total size (%d)", maxNodesTotal)
			newSize = maxNodesTotal - len(nodes) + currentSize
			if newSize < currentSize {
				return false, fmt.Errorf("max node total count already reached")
			}
		}

		glog.V(0).Infof("Scale-up: setting group %s size to %d", bestOption.nodeGroup.Id(), newSize)

		if err := bestOption.nodeGroup.IncreaseSize(newSize - currentSize); err != nil {
			return false, fmt.Errorf("failed to increase node group size: %v", err)
		}

		for _, pod := range bestOption.pods {
			recorder.Eventf(pod, kube_api.EventTypeNormal, "TriggeredScaleUp",
				"pod triggered scale-up, group: %s, sizes (current/new): %d/%d", bestOption.nodeGroup.Id(), currentSize, newSize)
		}

		return true, nil
	}
	for pod := range podsRemainUnshedulable {
		recorder.Event(pod, kube_api.EventTypeNormal, "NotTriggerScaleUp",
			"pod didn't trigger scale-up (it wouldn't fit if a new node is added)")
	}

	return false, nil
}
Example #2
0
func main() {
	flag.Var(&migConfigFlag, "nodes", "sets min,max size and url of a MIG to be controlled by Cluster Autoscaler. "+
		"Can be used multiple times. Format: <min>:<max>:<migurl>")
	flag.Parse()

	url, err := url.Parse(*kubernetes)
	if err != nil {
		glog.Fatalf("Failed to parse Kuberentes url: %v", err)
	}
	kubeConfig, err := config.GetKubeClientConfig(url)
	if err != nil {
		glog.Fatalf("Failed to build Kuberentes client configuration: %v", err)
	}

	kubeClient := kube_client.NewOrDie(kubeConfig)
	unschedulablePodLister := NewUnschedulablePodLister(kubeClient)
	nodeLister := NewNodeLister(kubeClient)

	migConfigs := make([]*config.MigConfig, 0, len(migConfigFlag))
	for i := range migConfigFlag {
		migConfigs = append(migConfigs, &migConfigFlag[i])
	}

	gceManager, err := gce.CreateGceManager(migConfigs)
	if err != nil {
		glog.Fatalf("Failed to create GCE Manager %v", err)
	}

	predicateChecker := simulator.NewPredicateChecker()

	for {
		select {
		case <-time.After(time.Minute):
			{
				nodes, err := nodeLister.List()
				if err != nil {
					glog.Errorf("Failed to list nodes: %v", err)
					continue
				}
				if len(nodes) == 0 {
					glog.Errorf("No nodes in the cluster")
					continue
				}

				if err := CheckMigsAndNodes(nodes, gceManager); err != nil {
					glog.Warningf("Cluster is not ready for autoscaling: %v", err)
					continue
				}

				pods, err := unschedulablePodLister.List()
				if err != nil {
					glog.Errorf("Failed to list unscheduled pods: %v", err)
					continue
				}

				// We need to reset all pods that have been marked as unschedulable not after
				// the newest node became available for the scheduler.
				allNodesAvailableTime := GetAllNodesAvailableTime(nodes)
				resetOldPods(kubeClient, pods, allNodesAvailableTime)

				// From now on we only care about unschedulable pods that were marked after the newest
				// node became available for the scheduler.
				pods = filterOldPods(pods, allNodesAvailableTime)
				if len(pods) == 0 {
					glog.V(1).Info("No unschedulable pods")
					continue
				}
				for _, pod := range pods {
					glog.V(1).Infof("Pod %s/%s is unschedulable", pod.Namespace, pod.Name)
				}

				expansionOptions := make([]ExpansionOption, 0)
				nodeInfos, err := GetNodeInfosForMigs(nodes, gceManager, kubeClient)
				if err != nil {
					glog.Errorf("Failed to build node infors for migs: %v", err)
					continue
				}

				for _, migConfig := range migConfigs {

					currentSize, err := gceManager.GetMigSize(migConfig)
					if err != nil {
						glog.Errorf("Failed to get MIG size: %v", err)
						continue
					}
					if currentSize >= int64(migConfig.MaxSize) {
						// skip this mig.
						glog.V(4).Infof("Skipping MIG %s - max size reached", migConfig.Url())
						continue
					}

					option := ExpansionOption{
						migConfig: migConfig,
						estimator: estimator.NewBasicNodeEstimator(),
					}
					migHelpsSomePods := false

					nodeInfo, found := nodeInfos[migConfig.Url()]
					if !found {
						glog.Errorf("No node info for: %s", migConfig.Url())
						continue
					}

					for _, pod := range pods {
						err = predicateChecker.CheckPredicates(pod, nodeInfo)
						if err == nil {
							migHelpsSomePods = true
							option.estimator.Add(pod)
						} else {
							glog.V(2).Infof("Scale-up predicate failed: %v", err)
						}
					}
					if migHelpsSomePods {
						expansionOptions = append(expansionOptions, option)
					}
				}

				// Pick some expansion option.
				bestOption := BestExpansionOption(expansionOptions)
				if bestOption != nil {
					glog.V(1).Infof("Best option to resize: %s", bestOption.migConfig.Url())
					nodeInfo, found := nodeInfos[bestOption.migConfig.Url()]
					if !found {
						glog.Errorf("No sample node for: %s", bestOption.migConfig.Url())
						continue
					}
					node := nodeInfo.Node()
					estimate, report := bestOption.estimator.Estimate(node)
					glog.V(1).Info(bestOption.estimator.GetDebug())
					glog.V(1).Info(report)
					glog.V(1).Infof("Estimated %d nodes needed in %s", estimate, bestOption.migConfig.Url())

					currentSize, err := gceManager.GetMigSize(bestOption.migConfig)
					if err != nil {
						glog.Errorf("Failed to get MIG size: %v", err)
						continue
					}
					newSize := currentSize + int64(estimate)
					if newSize >= int64(bestOption.migConfig.MaxSize) {
						newSize = int64(bestOption.migConfig.MaxSize)
					}
					glog.V(1).Infof("Setting %s size to %d", bestOption.migConfig.Url(), newSize)

					if err := gceManager.SetMigSize(bestOption.migConfig, newSize); err != nil {
						glog.Errorf("Failed to set MIG size: %v", err)
					}
				}
			}
		}
	}
}
Example #3
0
// ScaleUp tries to scale the cluster up. Return true if it found a way to increase the size,
// false if it didn't and error if an error occured.
func ScaleUp(unschedulablePods []*kube_api.Pod, nodes []*kube_api.Node, migConfigs []*config.MigConfig,
	gceManager *gce.GceManager, kubeClient *kube_client.Client,
	predicateChecker *simulator.PredicateChecker, recorder kube_record.EventRecorder) (bool, error) {

	// From now on we only care about unschedulable pods that were marked after the newest
	// node became available for the scheduler.
	if len(unschedulablePods) == 0 {
		glog.V(1).Info("No unschedulable pods")
		return false, nil
	}

	for _, pod := range unschedulablePods {
		glog.V(1).Infof("Pod %s/%s is unschedulable", pod.Namespace, pod.Name)
	}

	expansionOptions := make([]ExpansionOption, 0)
	nodeInfos, err := GetNodeInfosForMigs(nodes, gceManager, kubeClient)
	if err != nil {
		return false, fmt.Errorf("failed to build node infors for migs: %v", err)
	}

	podsRemainUnshedulable := make(map[*kube_api.Pod]struct{})
	for _, migConfig := range migConfigs {

		currentSize, err := gceManager.GetMigSize(migConfig)
		if err != nil {
			glog.Errorf("Failed to get MIG size: %v", err)
			continue
		}
		if currentSize >= int64(migConfig.MaxSize) {
			// skip this mig.
			glog.V(4).Infof("Skipping MIG %s - max size reached", migConfig.Url())
			continue
		}

		option := ExpansionOption{
			migConfig: migConfig,
			estimator: estimator.NewBasicNodeEstimator(),
		}
		migHelpsSomePods := false

		nodeInfo, found := nodeInfos[migConfig.Url()]
		if !found {
			glog.Errorf("No node info for: %s", migConfig.Url())
			continue
		}

		for _, pod := range unschedulablePods {
			err = predicateChecker.CheckPredicates(pod, nodeInfo)
			if err == nil {
				migHelpsSomePods = true
				option.estimator.Add(pod)
			} else {
				glog.V(2).Infof("Scale-up predicate failed: %v", err)
				podsRemainUnshedulable[pod] = struct{}{}
			}
		}
		if migHelpsSomePods {
			expansionOptions = append(expansionOptions, option)
		}
	}

	// Pick some expansion option.
	bestOption := BestExpansionOption(expansionOptions)
	if bestOption != nil && bestOption.estimator.GetCount() > 0 {
		glog.V(1).Infof("Best option to resize: %s", bestOption.migConfig.Url())
		nodeInfo, found := nodeInfos[bestOption.migConfig.Url()]
		if !found {
			return false, fmt.Errorf("no sample node for: %s", bestOption.migConfig.Url())

		}
		node := nodeInfo.Node()
		estimate, report := bestOption.estimator.Estimate(node)
		glog.V(1).Info(bestOption.estimator.GetDebug())
		glog.V(1).Info(report)
		glog.V(1).Infof("Estimated %d nodes needed in %s", estimate, bestOption.migConfig.Url())

		currentSize, err := gceManager.GetMigSize(bestOption.migConfig)
		if err != nil {
			return false, fmt.Errorf("failed to get MIG size: %v", err)
		}
		newSize := currentSize + int64(estimate)
		if newSize >= int64(bestOption.migConfig.MaxSize) {
			glog.V(1).Infof("Capping size to MAX (%d)", bestOption.migConfig.MaxSize)
			newSize = int64(bestOption.migConfig.MaxSize)
		}
		glog.V(1).Infof("Setting %s size to %d", bestOption.migConfig.Url(), newSize)

		if err := gceManager.SetMigSize(bestOption.migConfig, newSize); err != nil {
			return false, fmt.Errorf("failed to set MIG size: %v", err)
		}

		for pod := range bestOption.estimator.FittingPods {
			recorder.Eventf(pod, kube_api.EventTypeNormal, "TriggeredScaleUp",
				"pod triggered scale-up, mig: %s, sizes (current/new): %d/%d", bestOption.migConfig.Name, currentSize, newSize)
		}

		return true, nil
	}
	for pod := range podsRemainUnshedulable {
		recorder.Event(pod, kube_api.EventTypeNormal, "NotTriggerScaleUp",
			"pod didn't trigger scale-up (it wouldn't fit if a new node is added)")
	}

	return false, nil
}
func main() {
	flag.Var(&migConfigFlag, "nodes", "sets min,max size and url of a MIG to be controlled by Cluster Autoscaler. "+
		"Can be used multiple times. Format: <min>:<max>:<migurl>")
	flag.Parse()

	url, err := url.Parse(*kubernetes)
	if err != nil {
		glog.Fatalf("Failed to parse Kuberentes url: %v", err)
	}
	kubeConfig, err := config.GetKubeClientConfig(url)
	if err != nil {
		glog.Fatalf("Failed to build Kuberentes client configuration: %v", err)
	}

	kubeClient := kube_client.NewOrDie(kubeConfig)
	unscheduledPodLister := NewUnscheduledPodLister(kubeClient)
	nodeLister := NewNodeLister(kubeClient)

	migConfigs := make([]*config.MigConfig, 0, len(migConfigFlag))
	for i := range migConfigFlag {
		migConfigs = append(migConfigs, &migConfigFlag[i])
	}

	gceManager, err := gce.CreateGceManager(migConfigs)
	if err != nil {
		glog.Fatalf("Failed to create GCE Manager %v", err)
	}

	predicateChecker := simulator.NewPredicateChecker()

	for {
		select {
		case <-time.After(time.Minute):
			{
				pods, err := unscheduledPodLister.List()
				if err != nil {
					glog.Errorf("Failed to list pods: %v", err)
					continue
				}
				if len(pods) == 0 {
					glog.V(1).Info("No unscheduled pods")
					continue
				}

				for _, pod := range pods {
					glog.V(1).Infof("Pod %s/%s is not scheduled", pod.Namespace, pod.Name)
				}

				nodes, err := nodeLister.List()
				if err != nil {
					glog.Errorf("Failed to list nodes: %v", err)
					continue
				}
				if len(nodes) == 0 {
					glog.Errorf("No nodes in the cluster")
					continue
				}

				if err := CheckMigsAndNodes(nodes, gceManager); err != nil {
					glog.Warningf("Cluster is not ready for autoscaling: %v", err)
					continue
				}

				// Checks if scheduler tried to schedule the pods after thew newest node was added.
				newestNode := GetNewestNode(nodes)
				if newestNode == nil {
					glog.Errorf("No newest node")
					continue
				}
				oldestSchedulingTrial := GetOldestFailedSchedulingTrail(pods)
				if oldestSchedulingTrial == nil {
					glog.Errorf("No oldest unschedueled trial: %v", err)
					continue
				}

				// TODO: Find better way to check if all pods were checked after the newest node
				// was added.
				if newestNode.CreationTimestamp.After(oldestSchedulingTrial.Add(-1 * time.Minute)) {
					// Lets give scheduler another chance.
					glog.V(1).Infof("One of the pods have not been tried after adding %s", newestNode.Name)
					continue
				}

				expansionOptions := make([]ExpansionOption, 0)
				nodeInfos, sampleNodes, err := GetNodeInfosForMigs(nodes, gceManager, kubeClient)
				if err != nil {
					glog.Errorf("Failed to build node infors for migs: %v", err)
					continue
				}

				for _, migConfig := range migConfigs {

					currentSize, err := gceManager.GetMigSize(migConfig)
					if err != nil {
						glog.Errorf("Failed to get MIG size: %v", err)
						continue
					}
					if currentSize >= int64(migConfig.MaxSize) {
						// skip this mig.
						glog.V(4).Infof("Skipping MIG %s - max size reached", migConfig.Url())
						continue
					}

					option := ExpansionOption{
						migConfig: migConfig,
						estimator: estimator.NewBasicNodeEstimator(),
					}
					migHelpsSomePods := false

					nodeInfo, found := nodeInfos[migConfig.Url()]
					if !found {
						glog.Errorf("No node info for: %s", migConfig.Url())
						continue
					}

					node, found := sampleNodes[migConfig.Url()]
					if !found {
						glog.Errorf("No sample node for: %s", migConfig.Url())
						continue
					}

					for _, pod := range pods {
						err = predicateChecker.CheckPredicates(pod, node, nodeInfo)
						if err == nil {
							migHelpsSomePods = true
							option.estimator.Add(pod)
						} else {
							glog.V(2).Infof("Scale-up predicate failed: %v", err)
						}
					}
					if migHelpsSomePods {
						expansionOptions = append(expansionOptions, option)
					}
				}

				// Pick some expansion option.
				bestOption := BestExpansionOption(expansionOptions)
				if bestOption != nil {
					glog.V(1).Infof("Best option to resize: %s", bestOption.migConfig.Url())
					node, found := sampleNodes[bestOption.migConfig.Url()]
					if !found {
						glog.Errorf("No sample node for: %s", bestOption.migConfig.Url())
						continue
					}
					estimate := bestOption.estimator.Estimate(node)
					glog.V(1).Infof("Estimated %d nodes needed in %s", estimate, bestOption.migConfig.Url())

					currentSize, err := gceManager.GetMigSize(bestOption.migConfig)
					if err != nil {
						glog.Errorf("Failed to get MIG size: %v", err)
						continue
					}
					newSize := currentSize + int64(estimate)
					if newSize >= int64(bestOption.migConfig.MaxSize) {
						newSize = int64(bestOption.migConfig.MaxSize)
					}
					glog.V(1).Infof("Setting %s size to %d", bestOption.migConfig.Url(), newSize)

					if err := gceManager.SetMigSize(bestOption.migConfig, newSize); err != nil {
						glog.Errorf("Failed to set MIG size: %v", err)
					}
				}
			}
		}
	}
}