예제 #1
0
func main() {
	flag.Var(&migConfigFlag, "nodes", "sets min,max size and url of a MIG to be controlled by Cluster Autoscaler. "+
		"Can be used multiple times. Format: <min>:<max>:<migurl>")
	flag.Parse()

	go func() {
		http.Handle("/metrics", prometheus.Handler())
		err := http.ListenAndServe(*address, nil)
		glog.Fatalf("Failed to start metrics: %v", err)
	}()

	url, err := url.Parse(*kubernetes)
	if err != nil {
		glog.Fatalf("Failed to parse Kuberentes url: %v", err)
	}

	// Configuration
	kubeConfig, err := config.GetKubeClientConfig(url)
	if err != nil {
		glog.Fatalf("Failed to build Kuberentes client configuration: %v", err)
	}
	migConfigs := make([]*config.MigConfig, 0, len(migConfigFlag))
	for i := range migConfigFlag {
		migConfigs = append(migConfigs, &migConfigFlag[i])
	}

	// GCE Manager
	var gceManager *gce.GceManager
	var gceError error
	if *cloudConfig != "" {
		config, fileErr := os.Open(*cloudConfig)
		if fileErr != nil {
			glog.Fatalf("Couldn't open cloud provider configuration %s: %#v", *cloudConfig, err)
		}
		defer config.Close()
		gceManager, gceError = gce.CreateGceManager(migConfigs, config)
	} else {
		gceManager, gceError = gce.CreateGceManager(migConfigs, nil)
	}
	if gceError != nil {
		glog.Fatalf("Failed to create GCE Manager: %v", err)
	}

	kubeClient := kube_client.NewOrDie(kubeConfig)

	predicateChecker, err := simulator.NewPredicateChecker(kubeClient)
	if err != nil {
		glog.Fatalf("Failed to create predicate checker: %v", err)
	}
	unschedulablePodLister := NewUnschedulablePodLister(kubeClient)
	scheduledPodLister := NewScheduledPodLister(kubeClient)
	nodeLister := NewNodeLister(kubeClient)

	lastScaleUpTime := time.Now()
	lastScaleDownFailedTrial := time.Now()
	unneededNodes := make(map[string]time.Time)

	eventBroadcaster := kube_record.NewBroadcaster()
	eventBroadcaster.StartLogging(glog.Infof)
	eventBroadcaster.StartRecordingToSink(kubeClient.Events(""))
	recorder := eventBroadcaster.NewRecorder(kube_api.EventSource{Component: "cluster-autoscaler"})

	for {
		select {
		case <-time.After(*scanInterval):
			{
				loopStart := time.Now()
				updateLastTime("main")

				nodes, err := nodeLister.List()
				if err != nil {
					glog.Errorf("Failed to list nodes: %v", err)
					continue
				}
				if len(nodes) == 0 {
					glog.Errorf("No nodes in the cluster")
					continue
				}

				if err := CheckMigsAndNodes(nodes, gceManager); err != nil {
					glog.Warningf("Cluster is not ready for autoscaling: %v", err)
					continue
				}

				allUnschedulablePods, err := unschedulablePodLister.List()
				if err != nil {
					glog.Errorf("Failed to list unscheduled pods: %v", err)
					continue
				}

				allScheduled, err := scheduledPodLister.List()
				if err != nil {
					glog.Errorf("Failed to list scheduled pods: %v", err)
					continue
				}

				// We need to reset all pods that have been marked as unschedulable not after
				// the newest node became available for the scheduler.
				allNodesAvailableTime := GetAllNodesAvailableTime(nodes)
				podsToReset, unschedulablePodsToHelp := SlicePodsByPodScheduledTime(allUnschedulablePods, allNodesAvailableTime)
				ResetPodScheduledCondition(kubeClient, podsToReset)

				// We need to check whether pods marked as unschedulable are actually unschedulable.
				// This should prevent from adding unnecessary nodes. Example of such situation:
				// - CA and Scheduler has slightly different configuration
				// - Scheduler can't schedule a pod and marks it as unschedulable
				// - CA added a node which should help the pod
				// - Scheduler doesn't schedule the pod on the new node
				//   because according to it logic it doesn't fit there
				// - CA see the pod is still unschedulable, so it adds another node to help it
				//
				// With the check enabled the last point won't happen because CA will ignore a pod
				// which is supposed to schedule on an existing node.
				//
				// Without below check cluster might be unnecessary scaled up to the max allowed size
				// in the describe situation.
				schedulablePodsPresent := false
				if *verifyUnschedulablePods {
					newUnschedulablePodsToHelp := FilterOutSchedulable(unschedulablePodsToHelp, nodes, allScheduled, predicateChecker)

					if len(newUnschedulablePodsToHelp) != len(unschedulablePodsToHelp) {
						glog.V(2).Info("Schedulable pods present")
						schedulablePodsPresent = true
					}
					unschedulablePodsToHelp = newUnschedulablePodsToHelp
				}

				if len(unschedulablePodsToHelp) == 0 {
					glog.V(1).Info("No unschedulable pods")
				} else {
					scaleUpStart := time.Now()
					updateLastTime("scaleup")
					scaledUp, err := ScaleUp(unschedulablePodsToHelp, nodes, migConfigs, gceManager, kubeClient, predicateChecker, recorder)

					updateDuration("scaleup", scaleUpStart)

					if err != nil {
						glog.Errorf("Failed to scale up: %v", err)
						continue
					} else {
						if scaledUp {
							lastScaleUpTime = time.Now()
							// No scale down in this iteration.
							continue
						}
					}
				}

				if *scaleDownEnabled {
					unneededStart := time.Now()

					// In dry run only utilization is updated
					calculateUnneededOnly := lastScaleUpTime.Add(*scaleDownDelay).After(time.Now()) ||
						lastScaleDownFailedTrial.Add(*scaleDownTrialInterval).After(time.Now()) ||
						schedulablePodsPresent

					glog.V(4).Infof("Scale down status: unneededOnly=%v lastScaleUpTime=%s "+
						"lastScaleDownFailedTrail=%s schedulablePodsPresent=%v", calculateUnneededOnly,
						lastScaleUpTime, lastScaleDownFailedTrial, schedulablePodsPresent)

					updateLastTime("findUnneeded")
					glog.V(4).Infof("Calculating unneded nodes")

					unneededNodes = FindUnneededNodes(
						nodes,
						unneededNodes,
						*scaleDownUtilizationThreshold,
						allScheduled,
						predicateChecker)

					updateDuration("findUnneeded", unneededStart)

					for key, val := range unneededNodes {
						if glog.V(4) {
							glog.V(4).Infof("%s is unneeded since %s duration %s", key, val.String(), time.Now().Sub(val).String())
						}
					}

					if !calculateUnneededOnly {
						glog.V(4).Infof("Starting scale down")

						scaleDownStart := time.Now()
						updateLastTime("scaledown")

						result, err := ScaleDown(
							nodes,
							unneededNodes,
							*scaleDownUnneededTime,
							allScheduled,
							gceManager, kubeClient, predicateChecker)

						updateDuration("scaledown", scaleDownStart)

						// TODO: revisit result handling
						if err != nil {
							glog.Errorf("Failed to scale down: %v", err)
						} else {
							if result == ScaleDownNodeDeleted {
								// Clean the map with unneeded nodes to be super sure that the simulated
								// deletions are made in the new context.
								unneededNodes = make(map[string]time.Time, len(unneededNodes))
							} else {
								if result == ScaleDownError || result == ScaleDownNoNodeDeleted {
									lastScaleDownFailedTrial = time.Now()
								}
							}
						}
					}
				}
				updateDuration("main", loopStart)
			}
		}
	}
}
예제 #2
0
func main() {
	flag.Var(&migConfigFlag, "nodes", "sets min,max size and url of a MIG to be controlled by Cluster Autoscaler. "+
		"Can be used multiple times. Format: <min>:<max>:<migurl>")
	flag.Parse()

	url, err := url.Parse(*kubernetes)
	if err != nil {
		glog.Fatalf("Failed to parse Kuberentes url: %v", err)
	}
	kubeConfig, err := config.GetKubeClientConfig(url)
	if err != nil {
		glog.Fatalf("Failed to build Kuberentes client configuration: %v", err)
	}

	kubeClient := kube_client.NewOrDie(kubeConfig)
	unschedulablePodLister := NewUnschedulablePodLister(kubeClient)
	nodeLister := NewNodeLister(kubeClient)

	migConfigs := make([]*config.MigConfig, 0, len(migConfigFlag))
	for i := range migConfigFlag {
		migConfigs = append(migConfigs, &migConfigFlag[i])
	}

	gceManager, err := gce.CreateGceManager(migConfigs)
	if err != nil {
		glog.Fatalf("Failed to create GCE Manager %v", err)
	}

	predicateChecker := simulator.NewPredicateChecker()

	for {
		select {
		case <-time.After(time.Minute):
			{
				nodes, err := nodeLister.List()
				if err != nil {
					glog.Errorf("Failed to list nodes: %v", err)
					continue
				}
				if len(nodes) == 0 {
					glog.Errorf("No nodes in the cluster")
					continue
				}

				if err := CheckMigsAndNodes(nodes, gceManager); err != nil {
					glog.Warningf("Cluster is not ready for autoscaling: %v", err)
					continue
				}

				pods, err := unschedulablePodLister.List()
				if err != nil {
					glog.Errorf("Failed to list unscheduled pods: %v", err)
					continue
				}

				// We need to reset all pods that have been marked as unschedulable not after
				// the newest node became available for the scheduler.
				allNodesAvailableTime := GetAllNodesAvailableTime(nodes)
				resetOldPods(kubeClient, pods, allNodesAvailableTime)

				// From now on we only care about unschedulable pods that were marked after the newest
				// node became available for the scheduler.
				pods = filterOldPods(pods, allNodesAvailableTime)
				if len(pods) == 0 {
					glog.V(1).Info("No unschedulable pods")
					continue
				}
				for _, pod := range pods {
					glog.V(1).Infof("Pod %s/%s is unschedulable", pod.Namespace, pod.Name)
				}

				expansionOptions := make([]ExpansionOption, 0)
				nodeInfos, err := GetNodeInfosForMigs(nodes, gceManager, kubeClient)
				if err != nil {
					glog.Errorf("Failed to build node infors for migs: %v", err)
					continue
				}

				for _, migConfig := range migConfigs {

					currentSize, err := gceManager.GetMigSize(migConfig)
					if err != nil {
						glog.Errorf("Failed to get MIG size: %v", err)
						continue
					}
					if currentSize >= int64(migConfig.MaxSize) {
						// skip this mig.
						glog.V(4).Infof("Skipping MIG %s - max size reached", migConfig.Url())
						continue
					}

					option := ExpansionOption{
						migConfig: migConfig,
						estimator: estimator.NewBasicNodeEstimator(),
					}
					migHelpsSomePods := false

					nodeInfo, found := nodeInfos[migConfig.Url()]
					if !found {
						glog.Errorf("No node info for: %s", migConfig.Url())
						continue
					}

					for _, pod := range pods {
						err = predicateChecker.CheckPredicates(pod, nodeInfo)
						if err == nil {
							migHelpsSomePods = true
							option.estimator.Add(pod)
						} else {
							glog.V(2).Infof("Scale-up predicate failed: %v", err)
						}
					}
					if migHelpsSomePods {
						expansionOptions = append(expansionOptions, option)
					}
				}

				// Pick some expansion option.
				bestOption := BestExpansionOption(expansionOptions)
				if bestOption != nil {
					glog.V(1).Infof("Best option to resize: %s", bestOption.migConfig.Url())
					nodeInfo, found := nodeInfos[bestOption.migConfig.Url()]
					if !found {
						glog.Errorf("No sample node for: %s", bestOption.migConfig.Url())
						continue
					}
					node := nodeInfo.Node()
					estimate, report := bestOption.estimator.Estimate(node)
					glog.V(1).Info(bestOption.estimator.GetDebug())
					glog.V(1).Info(report)
					glog.V(1).Infof("Estimated %d nodes needed in %s", estimate, bestOption.migConfig.Url())

					currentSize, err := gceManager.GetMigSize(bestOption.migConfig)
					if err != nil {
						glog.Errorf("Failed to get MIG size: %v", err)
						continue
					}
					newSize := currentSize + int64(estimate)
					if newSize >= int64(bestOption.migConfig.MaxSize) {
						newSize = int64(bestOption.migConfig.MaxSize)
					}
					glog.V(1).Infof("Setting %s size to %d", bestOption.migConfig.Url(), newSize)

					if err := gceManager.SetMigSize(bestOption.migConfig, newSize); err != nil {
						glog.Errorf("Failed to set MIG size: %v", err)
					}
				}
			}
		}
	}
}
예제 #3
0
func main() {
	flag.Var(&migConfigFlag, "nodes", "sets min,max size and url of a MIG to be controlled by Cluster Autoscaler. "+
		"Can be used multiple times. Format: <min>:<max>:<migurl>")
	flag.Parse()

	url, err := url.Parse(*kubernetes)
	if err != nil {
		glog.Fatalf("Failed to parse Kuberentes url: %v", err)
	}

	// Configuration
	kubeConfig, err := config.GetKubeClientConfig(url)
	if err != nil {
		glog.Fatalf("Failed to build Kuberentes client configuration: %v", err)
	}
	migConfigs := make([]*config.MigConfig, 0, len(migConfigFlag))
	for i := range migConfigFlag {
		migConfigs = append(migConfigs, &migConfigFlag[i])
	}

	gceManager, err := gce.CreateGceManager(migConfigs)
	if err != nil {
		glog.Fatalf("Failed to create GCE Manager: %v", err)
	}

	kubeClient := kube_client.NewOrDie(kubeConfig)

	predicateChecker, err := simulator.NewPredicateChecker(kubeClient)
	if err != nil {
		glog.Fatalf("Failed to create predicate checker: %v", err)
	}
	unschedulablePodLister := NewUnschedulablePodLister(kubeClient)
	scheduledPodLister := NewScheduledPodLister(kubeClient)
	nodeLister := NewNodeLister(kubeClient)

	lastScaleUpTime := time.Now()
	lastScaleDownFailedTrial := time.Now()
	underutilizedNodes := make(map[string]time.Time)

	eventBroadcaster := kube_record.NewBroadcaster()
	eventBroadcaster.StartLogging(glog.Infof)
	eventBroadcaster.StartRecordingToSink(kubeClient.Events(""))
	recorder := eventBroadcaster.NewRecorder(kube_api.EventSource{Component: "cluster-autoscaler"})

	for {
		select {
		case <-time.After(time.Minute):
			{
				nodes, err := nodeLister.List()
				if err != nil {
					glog.Errorf("Failed to list nodes: %v", err)
					continue
				}
				if len(nodes) == 0 {
					glog.Errorf("No nodes in the cluster")
					continue
				}

				if err := CheckMigsAndNodes(nodes, gceManager); err != nil {
					glog.Warningf("Cluster is not ready for autoscaling: %v", err)
					continue
				}

				allUnschedulablePods, err := unschedulablePodLister.List()
				if err != nil {
					glog.Errorf("Failed to list unscheduled pods: %v", err)
					continue
				}

				allScheduled, err := scheduledPodLister.List()
				if err != nil {
					glog.Errorf("Failed to list scheduled pods: %v", err)
					continue
				}

				// We need to reset all pods that have been marked as unschedulable not after
				// the newest node became available for the scheduler.
				allNodesAvailableTime := GetAllNodesAvailableTime(nodes)
				podsToReset, unschedulablePodsToHelp := SlicePodsByPodScheduledTime(allUnschedulablePods, allNodesAvailableTime)
				ResetPodScheduledCondition(kubeClient, podsToReset)

				// We need to check whether pods marked as unschedulable are actually unschedulable.
				// This should prevent from adding unnecessary nodes. Example of such situation:
				// - CA and Scheduler has slightly different configuration
				// - Scheduler can't schedule a pod and marks it as unschedulable
				// - CA added a node which should help the pod
				// - Scheduler doesn't schedule the pod on the new node
				//   because according to it logic it doesn't fit there
				// - CA see the pod is still unschedulable, so it adds another node to help it
				//
				// With the check enabled the last point won't happen because CA will ignore a pod
				// which is supposed to schedule on an existing node.
				//
				// Without below check cluster might be unnecessary scaled up to the max allowed size
				// in the describe situation.
				if *verifyUnschedulablePods {
					unschedulablePodsToHelp = FilterOutSchedulable(unschedulablePodsToHelp, nodes, allScheduled, predicateChecker)
				}

				if len(unschedulablePodsToHelp) == 0 {
					glog.V(1).Info("No unschedulable pods")
				} else {
					scaledUp, err := ScaleUp(unschedulablePodsToHelp, nodes, migConfigs, gceManager, kubeClient, predicateChecker, recorder)
					if err != nil {
						glog.Errorf("Failed to scale up: %v", err)
						continue
					} else {
						if scaledUp {
							lastScaleUpTime = time.Now()
							// No scale down in this iteration.
							continue
						}
					}
				}

				if *scaleDownEnabled {
					// In dry run only utilization is updated
					calculateUtilizationOnly := lastScaleUpTime.Add(*scaleDownDelay).After(time.Now()) ||
						lastScaleDownFailedTrial.Add(*scaleDownTrialFrequency).After(time.Now())

					underutilizedNodes = CalculateUnderutilizedNodes(
						nodes,
						underutilizedNodes,
						*scaleDownUtilizationThreshold,
						allScheduled,
						kubeClient,
						predicateChecker)

					if !calculateUtilizationOnly {
						result, err := ScaleDown(
							nodes,
							underutilizedNodes,
							*scaleDownUnderutilizedTime,
							allScheduled,
							gceManager, kubeClient, predicateChecker)
						if err != nil {
							glog.Errorf("Failed to scale down: %v", err)
						} else {
							if result != ScaleDownNodeDeleted {
								lastScaleDownFailedTrial = time.Now()
							}
						}
					}
				}
			}
		}
	}
}
예제 #4
0
func main() {
	flag.Var(&migConfigFlag, "nodes", "sets min,max size and url of a MIG to be controlled by Cluster Autoscaler. "+
		"Can be used multiple times. Format: <min>:<max>:<migurl>")
	flag.Parse()

	url, err := url.Parse(*kubernetes)
	if err != nil {
		glog.Fatalf("Failed to parse Kuberentes url: %v", err)
	}
	kubeConfig, err := config.GetKubeClientConfig(url)
	if err != nil {
		glog.Fatalf("Failed to build Kuberentes client configuration: %v", err)
	}

	kubeClient := kube_client.NewOrDie(kubeConfig)
	unscheduledPodLister := NewUnscheduledPodLister(kubeClient)
	nodeLister := NewNodeLister(kubeClient)

	migConfigs := make([]*config.MigConfig, 0, len(migConfigFlag))
	for i := range migConfigFlag {
		migConfigs = append(migConfigs, &migConfigFlag[i])
	}

	gceManager, err := gce.CreateGceManager(migConfigs)
	if err != nil {
		glog.Fatalf("Failed to create GCE Manager %v", err)
	}

	predicateChecker := simulator.NewPredicateChecker()

	for {
		select {
		case <-time.After(time.Minute):
			{
				pods, err := unscheduledPodLister.List()
				if err != nil {
					glog.Errorf("Failed to list pods: %v", err)
					continue
				}
				if len(pods) == 0 {
					glog.V(1).Info("No unscheduled pods")
					continue
				}

				for _, pod := range pods {
					glog.V(1).Infof("Pod %s/%s is not scheduled", pod.Namespace, pod.Name)
				}

				nodes, err := nodeLister.List()
				if err != nil {
					glog.Errorf("Failed to list nodes: %v", err)
					continue
				}
				if len(nodes) == 0 {
					glog.Errorf("No nodes in the cluster")
					continue
				}

				if err := CheckMigsAndNodes(nodes, gceManager); err != nil {
					glog.Warningf("Cluster is not ready for autoscaling: %v", err)
					continue
				}

				// Checks if scheduler tried to schedule the pods after thew newest node was added.
				newestNode := GetNewestNode(nodes)
				if newestNode == nil {
					glog.Errorf("No newest node")
					continue
				}
				oldestSchedulingTrial := GetOldestFailedSchedulingTrail(pods)
				if oldestSchedulingTrial == nil {
					glog.Errorf("No oldest unschedueled trial: %v", err)
					continue
				}

				// TODO: Find better way to check if all pods were checked after the newest node
				// was added.
				if newestNode.CreationTimestamp.After(oldestSchedulingTrial.Add(-1 * time.Minute)) {
					// Lets give scheduler another chance.
					glog.V(1).Infof("One of the pods have not been tried after adding %s", newestNode.Name)
					continue
				}

				expansionOptions := make([]ExpansionOption, 0)
				nodeInfos, sampleNodes, err := GetNodeInfosForMigs(nodes, gceManager, kubeClient)
				if err != nil {
					glog.Errorf("Failed to build node infors for migs: %v", err)
					continue
				}

				for _, migConfig := range migConfigs {

					currentSize, err := gceManager.GetMigSize(migConfig)
					if err != nil {
						glog.Errorf("Failed to get MIG size: %v", err)
						continue
					}
					if currentSize >= int64(migConfig.MaxSize) {
						// skip this mig.
						glog.V(4).Infof("Skipping MIG %s - max size reached", migConfig.Url())
						continue
					}

					option := ExpansionOption{
						migConfig: migConfig,
						estimator: estimator.NewBasicNodeEstimator(),
					}
					migHelpsSomePods := false

					nodeInfo, found := nodeInfos[migConfig.Url()]
					if !found {
						glog.Errorf("No node info for: %s", migConfig.Url())
						continue
					}

					node, found := sampleNodes[migConfig.Url()]
					if !found {
						glog.Errorf("No sample node for: %s", migConfig.Url())
						continue
					}

					for _, pod := range pods {
						err = predicateChecker.CheckPredicates(pod, node, nodeInfo)
						if err == nil {
							migHelpsSomePods = true
							option.estimator.Add(pod)
						} else {
							glog.V(2).Infof("Scale-up predicate failed: %v", err)
						}
					}
					if migHelpsSomePods {
						expansionOptions = append(expansionOptions, option)
					}
				}

				// Pick some expansion option.
				bestOption := BestExpansionOption(expansionOptions)
				if bestOption != nil {
					glog.V(1).Infof("Best option to resize: %s", bestOption.migConfig.Url())
					node, found := sampleNodes[bestOption.migConfig.Url()]
					if !found {
						glog.Errorf("No sample node for: %s", bestOption.migConfig.Url())
						continue
					}
					estimate := bestOption.estimator.Estimate(node)
					glog.V(1).Infof("Estimated %d nodes needed in %s", estimate, bestOption.migConfig.Url())

					currentSize, err := gceManager.GetMigSize(bestOption.migConfig)
					if err != nil {
						glog.Errorf("Failed to get MIG size: %v", err)
						continue
					}
					newSize := currentSize + int64(estimate)
					if newSize >= int64(bestOption.migConfig.MaxSize) {
						newSize = int64(bestOption.migConfig.MaxSize)
					}
					glog.V(1).Infof("Setting %s size to %d", bestOption.migConfig.Url(), newSize)

					if err := gceManager.SetMigSize(bestOption.migConfig, newSize); err != nil {
						glog.Errorf("Failed to set MIG size: %v", err)
					}
				}
			}
		}
	}
}