// CheckMigsAndNodes checks if all migs have all required nodes. func CheckMigsAndNodes(nodes []*kube_api.Node, gceManager *gce.GceManager) error { migCount := make(map[string]int) migs := make(map[string]*config.MigConfig) for _, node := range nodes { instanceConfig, err := config.InstanceConfigFromProviderId(node.Spec.ProviderID) if err != nil { return err } migConfig, err := gceManager.GetMigForInstance(instanceConfig) if err != nil { return err } url := migConfig.Url() count, _ := migCount[url] migCount[url] = count + 1 migs[url] = migConfig } for url, mig := range migs { size, err := gceManager.GetMigSize(mig) if err != nil { return err } count := migCount[url] if size != int64(count) { return fmt.Errorf("wrong number of nodes for mig: %s expected: %d actual: %d", url, size, count) } } return nil }
// GetNodeInfosForMigs finds NodeInfos for all migs used to manage the given nodes. It also returns a mig to sample node mapping. func GetNodeInfosForMigs(nodes []*kube_api.Node, gceManager *gce.GceManager, kubeClient *kube_client.Client) (map[string]*schedulercache.NodeInfo, map[string]*kube_api.Node, error) { sampleNodes := make(map[string]*kube_api.Node) for _, node := range nodes { instanceConfig, err := config.InstanceConfigFromProviderId(node.Spec.ProviderID) if err != nil { return map[string]*schedulercache.NodeInfo{}, map[string]*kube_api.Node{}, err } migConfig, err := gceManager.GetMigForInstance(instanceConfig) if err != nil { return map[string]*schedulercache.NodeInfo{}, map[string]*kube_api.Node{}, err } url := migConfig.Url() sampleNodes[url] = node } result := make(map[string]*schedulercache.NodeInfo) for url, node := range sampleNodes { nodeInfo, err := simulator.BuildNodeInfoForNode(node.Name, kubeClient) if err != nil { return map[string]*schedulercache.NodeInfo{}, map[string]*kube_api.Node{}, err } result[url] = nodeInfo } return result, sampleNodes, nil }
// ScaleUp tries to scale the cluster up. Return true if it found a way to increase the size, // false if it didn't and error if an error occured. func ScaleUp(unschedulablePods []*kube_api.Pod, nodes []*kube_api.Node, migConfigs []*config.MigConfig, gceManager *gce.GceManager, kubeClient *kube_client.Client, predicateChecker *simulator.PredicateChecker, recorder kube_record.EventRecorder) (bool, error) { // From now on we only care about unschedulable pods that were marked after the newest // node became available for the scheduler. if len(unschedulablePods) == 0 { glog.V(1).Info("No unschedulable pods") return false, nil } for _, pod := range unschedulablePods { glog.V(1).Infof("Pod %s/%s is unschedulable", pod.Namespace, pod.Name) } expansionOptions := make([]ExpansionOption, 0) nodeInfos, err := GetNodeInfosForMigs(nodes, gceManager, kubeClient) if err != nil { return false, fmt.Errorf("failed to build node infors for migs: %v", err) } podsRemainUnshedulable := make(map[*kube_api.Pod]struct{}) for _, migConfig := range migConfigs { currentSize, err := gceManager.GetMigSize(migConfig) if err != nil { glog.Errorf("Failed to get MIG size: %v", err) continue } if currentSize >= int64(migConfig.MaxSize) { // skip this mig. glog.V(4).Infof("Skipping MIG %s - max size reached", migConfig.Url()) continue } option := ExpansionOption{ migConfig: migConfig, estimator: estimator.NewBasicNodeEstimator(), } migHelpsSomePods := false nodeInfo, found := nodeInfos[migConfig.Url()] if !found { glog.Errorf("No node info for: %s", migConfig.Url()) continue } for _, pod := range unschedulablePods { err = predicateChecker.CheckPredicates(pod, nodeInfo) if err == nil { migHelpsSomePods = true option.estimator.Add(pod) } else { glog.V(2).Infof("Scale-up predicate failed: %v", err) podsRemainUnshedulable[pod] = struct{}{} } } if migHelpsSomePods { expansionOptions = append(expansionOptions, option) } } // Pick some expansion option. bestOption := BestExpansionOption(expansionOptions) if bestOption != nil && bestOption.estimator.GetCount() > 0 { glog.V(1).Infof("Best option to resize: %s", bestOption.migConfig.Url()) nodeInfo, found := nodeInfos[bestOption.migConfig.Url()] if !found { return false, fmt.Errorf("no sample node for: %s", bestOption.migConfig.Url()) } node := nodeInfo.Node() estimate, report := bestOption.estimator.Estimate(node) glog.V(1).Info(bestOption.estimator.GetDebug()) glog.V(1).Info(report) glog.V(1).Infof("Estimated %d nodes needed in %s", estimate, bestOption.migConfig.Url()) currentSize, err := gceManager.GetMigSize(bestOption.migConfig) if err != nil { return false, fmt.Errorf("failed to get MIG size: %v", err) } newSize := currentSize + int64(estimate) if newSize >= int64(bestOption.migConfig.MaxSize) { glog.V(1).Infof("Capping size to MAX (%d)", bestOption.migConfig.MaxSize) newSize = int64(bestOption.migConfig.MaxSize) } glog.V(1).Infof("Setting %s size to %d", bestOption.migConfig.Url(), newSize) if err := gceManager.SetMigSize(bestOption.migConfig, newSize); err != nil { return false, fmt.Errorf("failed to set MIG size: %v", err) } for pod := range bestOption.estimator.FittingPods { recorder.Eventf(pod, kube_api.EventTypeNormal, "TriggeredScaleUp", "pod triggered scale-up, mig: %s, sizes (current/new): %d/%d", bestOption.migConfig.Name, currentSize, newSize) } return true, nil } for pod := range podsRemainUnshedulable { recorder.Event(pod, kube_api.EventTypeNormal, "NotTriggerScaleUp", "pod didn't trigger scale-up (it wouldn't fit if a new node is added)") } return false, nil }
// ScaleDown tries to scale down the cluster. It returns ScaleDownResult indicating if any node was // removed and error if such occured. func ScaleDown( nodes []*kube_api.Node, unneededNodes map[string]time.Time, unneededTime time.Duration, pods []*kube_api.Pod, gceManager *gce.GceManager, client *kube_client.Client, predicateChecker *simulator.PredicateChecker) (ScaleDownResult, error) { now := time.Now() candidates := make([]*kube_api.Node, 0) for _, node := range nodes { if val, found := unneededNodes[node.Name]; found { glog.V(2).Infof("%s was unneeded for %s", node.Name, now.Sub(val).String()) // Check how long the node was underutilized. if !val.Add(unneededTime).Before(now) { continue } // Check mig size. instance, err := config.InstanceConfigFromProviderId(node.Spec.ProviderID) if err != nil { glog.Errorf("Error while parsing providerid of %s: %v", node.Name, err) continue } migConfig, err := gceManager.GetMigForInstance(instance) if err != nil { glog.Errorf("Error while checking mig config for instance %v: %v", instance, err) continue } size, err := gceManager.GetMigSize(migConfig) if err != nil { glog.Errorf("Error while checking mig size for instance %v: %v", instance, err) continue } if size <= int64(migConfig.MinSize) { glog.V(1).Infof("Skipping %s - mig min size reached", node.Name) continue } candidates = append(candidates, node) } } if len(candidates) == 0 { glog.Infof("No candidates for scale down") return ScaleDownNoUnneeded, nil } nodesToRemove, err := simulator.FindNodesToRemove(candidates, nodes, pods, client, predicateChecker, 1, false) if err != nil { return ScaleDownError, fmt.Errorf("Find node to remove failed: %v", err) } if len(nodesToRemove) == 0 { glog.V(1).Infof("No node to remove") return ScaleDownNoNodeDeleted, nil } nodeToRemove := nodesToRemove[0] glog.Infof("Removing %s", nodeToRemove.Name) instanceConfig, err := config.InstanceConfigFromProviderId(nodeToRemove.Spec.ProviderID) if err != nil { return ScaleDownError, fmt.Errorf("Failed to get instance config for %s: %v", nodeToRemove.Name, err) } err = gceManager.DeleteInstances([]*config.InstanceConfig{instanceConfig}) if err != nil { return ScaleDownError, fmt.Errorf("Failed to delete %v: %v", instanceConfig, err) } return ScaleDownNodeDeleted, nil }