// ScaleDown tries to scale down the cluster. It returns ScaleDownResult indicating if any node was // removed and error if such occured. func ScaleDown( nodes []*kube_api.Node, unneededNodes map[string]time.Time, unneededTime time.Duration, pods []*kube_api.Pod, cloudProvider cloudprovider.CloudProvider, client *kube_client.Client, predicateChecker *simulator.PredicateChecker, oldHints map[string]string, usageTracker *simulator.UsageTracker, recorder kube_record.EventRecorder) (ScaleDownResult, error) { now := time.Now() candidates := make([]*kube_api.Node, 0) for _, node := range nodes { if val, found := unneededNodes[node.Name]; found { glog.V(2).Infof("%s was unneeded for %s", node.Name, now.Sub(val).String()) // Check how long the node was underutilized. if !val.Add(unneededTime).Before(now) { continue } nodeGroup, err := cloudProvider.NodeGroupForNode(node) if err != nil { glog.Errorf("Error while checking node group for %s: %v", node.Name, err) continue } if nodeGroup == nil || reflect.ValueOf(nodeGroup).IsNil() { glog.V(4).Infof("Skipping %s - no node group config", node.Name) continue } size, err := nodeGroup.TargetSize() if err != nil { glog.Errorf("Error while checking node group size %s: %v", nodeGroup.Id(), err) continue } if size <= nodeGroup.MinSize() { glog.V(1).Infof("Skipping %s - node group min size reached", node.Name) continue } candidates = append(candidates, node) } } if len(candidates) == 0 { glog.Infof("No candidates for scale down") return ScaleDownNoUnneeded, nil } // We look for only 1 node so new hints may be incomplete. nodesToRemove, _, err := simulator.FindNodesToRemove(candidates, nodes, pods, client, predicateChecker, 1, false, oldHints, usageTracker, time.Now()) if err != nil { return ScaleDownError, fmt.Errorf("Find node to remove failed: %v", err) } if len(nodesToRemove) == 0 { glog.V(1).Infof("No node to remove") return ScaleDownNoNodeDeleted, nil } nodeToRemove := nodesToRemove[0] glog.Infof("Removing %s", nodeToRemove.Name) nodeGroup, err := cloudProvider.NodeGroupForNode(nodeToRemove) if err != nil { return ScaleDownError, fmt.Errorf("failed to node group for %s: %v", nodeToRemove.Name, err) } if nodeGroup == nil || reflect.ValueOf(nodeGroup).IsNil() { return ScaleDownError, fmt.Errorf("picked node that doesn't belong to a node group: %s", nodeToRemove.Name) } err = nodeGroup.DeleteNodes([]*kube_api.Node{nodeToRemove}) simulator.RemoveNodeFromTracker(usageTracker, nodeToRemove.Name, unneededNodes) if err != nil { return ScaleDownError, fmt.Errorf("Failed to delete %s: %v", nodeToRemove.Name, err) } recorder.Eventf(nodeToRemove, kube_api.EventTypeNormal, "ScaleDown", "node removed by cluster autoscaler") return ScaleDownNodeDeleted, nil }
// ScaleDown tries to scale down the cluster. It returns ScaleDownResult indicating if any node was // removed and error if such occured. func ScaleDown( context AutoscalingContext, nodes []*apiv1.Node, lastUtilizationMap map[string]float64, unneededNodes map[string]time.Time, pods []*apiv1.Pod, oldHints map[string]string, usageTracker *simulator.UsageTracker, ) (ScaleDownResult, error) { now := time.Now() candidates := make([]*apiv1.Node, 0) for _, node := range nodes { if val, found := unneededNodes[node.Name]; found { glog.V(2).Infof("%s was unneeded for %s", node.Name, now.Sub(val).String()) // Check how long the node was underutilized. if !val.Add(context.ScaleDownUnneededTime).Before(now) { continue } nodeGroup, err := context.CloudProvider.NodeGroupForNode(node) if err != nil { glog.Errorf("Error while checking node group for %s: %v", node.Name, err) continue } if nodeGroup == nil || reflect.ValueOf(nodeGroup).IsNil() { glog.V(4).Infof("Skipping %s - no node group config", node.Name) continue } size, err := nodeGroup.TargetSize() if err != nil { glog.Errorf("Error while checking node group size %s: %v", nodeGroup.Id(), err) continue } if size <= nodeGroup.MinSize() { glog.V(1).Infof("Skipping %s - node group min size reached", node.Name) continue } candidates = append(candidates, node) } } if len(candidates) == 0 { glog.Infof("No candidates for scale down") return ScaleDownNoUnneeded, nil } // Trying to delete empty nodes in bulk. If there are no empty nodes then CA will // try to delete not-so-empty nodes, possibly killing some pods and allowing them // to recreate on other nodes. emptyNodes := getEmptyNodes(candidates, pods, context.MaxEmptyBulkDelete, context.CloudProvider) if len(emptyNodes) > 0 { confirmation := make(chan error, len(emptyNodes)) for _, node := range emptyNodes { glog.V(0).Infof("Scale-down: removing empty node %s", node.Name) simulator.RemoveNodeFromTracker(usageTracker, node.Name, unneededNodes) go func(nodeToDelete *apiv1.Node) { confirmation <- deleteNodeFromCloudProvider(nodeToDelete, context.CloudProvider, context.Recorder) }(node) } var finalError error for range emptyNodes { if err := <-confirmation; err != nil { glog.Errorf("Problem with empty node deletion: %v", err) finalError = err } } if finalError == nil { return ScaleDownNodeDeleted, nil } return ScaleDownError, fmt.Errorf("failed to delete at least one empty node: %v", finalError) } // We look for only 1 node so new hints may be incomplete. nodesToRemove, _, err := simulator.FindNodesToRemove(candidates, nodes, pods, context.ClientSet, context.PredicateChecker, 1, false, oldHints, usageTracker, time.Now()) if err != nil { return ScaleDownError, fmt.Errorf("Find node to remove failed: %v", err) } if len(nodesToRemove) == 0 { glog.V(1).Infof("No node to remove") return ScaleDownNoNodeDeleted, nil } toRemove := nodesToRemove[0] utilization := lastUtilizationMap[toRemove.Node.Name] podNames := make([]string, 0, len(toRemove.PodsToReschedule)) for _, pod := range toRemove.PodsToReschedule { podNames = append(podNames, pod.Namespace+"/"+pod.Name) } glog.V(0).Infof("Scale-down: removing node %s, utilization: %v, pods to reschedule: ", toRemove.Node.Name, utilization, strings.Join(podNames, ",")) // Nothing super-bad should happen if the node is removed from tracker prematurely. simulator.RemoveNodeFromTracker(usageTracker, toRemove.Node.Name, unneededNodes) err = deleteNode(context, toRemove.Node, toRemove.PodsToReschedule) if err != nil { return ScaleDownError, fmt.Errorf("Failed to delete %s: %v", toRemove.Node.Name, err) } return ScaleDownNodeDeleted, nil }