// Calculates host priority based on the amount of unused resources. // 'node' has information about the resources on the node. // 'pods' is a list of pods currently scheduled on the node. // TODO: Use Node() from nodeInfo instead of passing it. func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, node *api.Node, nodeInfo *schedulercache.NodeInfo) schedulerapi.HostPriority { allocatableResources := nodeInfo.AllocatableResource() totalResources := *podRequests totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU totalResources.Memory += nodeInfo.NonZeroRequest().Memory cpuScore := calculateUnusedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name) memoryScore := calculateUnusedScore(totalResources.Memory, allocatableResources.Memory, node.Name) if glog.V(10) { // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is // not logged. There is visible performance gain from it. glog.V(10).Infof( "%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory", pod.Name, node.Name, allocatableResources.MilliCPU, allocatableResources.Memory, totalResources.MilliCPU, totalResources.Memory, cpuScore, memoryScore, ) } return schedulerapi.HostPriority{ Host: node.Name, Score: int((cpuScore + memoryScore) / 2), } }
func PodFitsResources(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { node := nodeInfo.Node() if node == nil { return false, nil, fmt.Errorf("node not found") } var predicateFails []algorithm.PredicateFailureReason allowedPodNumber := nodeInfo.AllowedPodNumber() if len(nodeInfo.Pods())+1 > allowedPodNumber { predicateFails = append(predicateFails, NewInsufficientResourceError(api.ResourcePods, 1, int64(len(nodeInfo.Pods())), int64(allowedPodNumber))) } var podRequest *schedulercache.Resource if predicateMeta, ok := meta.(*predicateMetadata); ok { podRequest = predicateMeta.podRequest } else { // We couldn't parse metadata - fallback to computing it. podRequest = GetResourceRequest(pod) } if podRequest.MilliCPU == 0 && podRequest.Memory == 0 && podRequest.NvidiaGPU == 0 && len(podRequest.OpaqueIntResources) == 0 { return len(predicateFails) == 0, predicateFails, nil } allocatable := nodeInfo.AllocatableResource() if allocatable.MilliCPU < podRequest.MilliCPU+nodeInfo.RequestedResource().MilliCPU { predicateFails = append(predicateFails, NewInsufficientResourceError(api.ResourceCPU, podRequest.MilliCPU, nodeInfo.RequestedResource().MilliCPU, allocatable.MilliCPU)) } if allocatable.Memory < podRequest.Memory+nodeInfo.RequestedResource().Memory { predicateFails = append(predicateFails, NewInsufficientResourceError(api.ResourceMemory, podRequest.Memory, nodeInfo.RequestedResource().Memory, allocatable.Memory)) } if allocatable.NvidiaGPU < podRequest.NvidiaGPU+nodeInfo.RequestedResource().NvidiaGPU { predicateFails = append(predicateFails, NewInsufficientResourceError(api.ResourceNvidiaGPU, podRequest.NvidiaGPU, nodeInfo.RequestedResource().NvidiaGPU, allocatable.NvidiaGPU)) } for rName, rQuant := range podRequest.OpaqueIntResources { if allocatable.OpaqueIntResources[rName] < rQuant+nodeInfo.RequestedResource().OpaqueIntResources[rName] { predicateFails = append(predicateFails, NewInsufficientResourceError(rName, podRequest.OpaqueIntResources[rName], nodeInfo.RequestedResource().OpaqueIntResources[rName], allocatable.OpaqueIntResources[rName])) } } if glog.V(10) { // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is // not logged. There is visible performance gain from it. glog.Infof("Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods.", podName(pod), node.Name, len(nodeInfo.Pods()), allowedPodNumber) } return len(predicateFails) == 0, predicateFails, nil }
func PodFitsResources(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } allowedPodNumber := nodeInfo.AllowedPodNumber() if len(nodeInfo.Pods())+1 > allowedPodNumber { return false, newInsufficientResourceError(podCountResourceName, 1, int64(len(nodeInfo.Pods())), int64(allowedPodNumber)) } var podRequest *schedulercache.Resource if predicateMeta, ok := meta.(*predicateMetadata); ok { podRequest = predicateMeta.podRequest } else { // We couldn't parse metadata - fallback to computing it. podRequest = getResourceRequest(pod) } if podRequest.MilliCPU == 0 && podRequest.Memory == 0 && podRequest.NvidiaGPU == 0 { return true, nil } allocatable := nodeInfo.AllocatableResource() if allocatable.MilliCPU < podRequest.MilliCPU+nodeInfo.RequestedResource().MilliCPU { return false, newInsufficientResourceError(cpuResourceName, podRequest.MilliCPU, nodeInfo.RequestedResource().MilliCPU, allocatable.MilliCPU) } if allocatable.Memory < podRequest.Memory+nodeInfo.RequestedResource().Memory { return false, newInsufficientResourceError(memoryResourceName, podRequest.Memory, nodeInfo.RequestedResource().Memory, allocatable.Memory) } if allocatable.NvidiaGPU < podRequest.NvidiaGPU+nodeInfo.RequestedResource().NvidiaGPU { return false, newInsufficientResourceError(nvidiaGpuResourceName, podRequest.NvidiaGPU, nodeInfo.RequestedResource().NvidiaGPU, allocatable.NvidiaGPU) } if glog.V(10) { // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is // not logged. There is visible performance gain from it. glog.Infof("Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods.", podName(pod), node.Name, len(nodeInfo.Pods()), allowedPodNumber) } return true, nil }
func calculateBalancedResourceAllocation(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { node := nodeInfo.Node() if node == nil { return schedulerapi.HostPriority{}, fmt.Errorf("node not found") } allocatableResources := nodeInfo.AllocatableResource() totalResources := *podRequests totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU totalResources.Memory += nodeInfo.NonZeroRequest().Memory cpuFraction := fractionOfCapacity(totalResources.MilliCPU, allocatableResources.MilliCPU) memoryFraction := fractionOfCapacity(totalResources.Memory, allocatableResources.Memory) score := int(0) if cpuFraction >= 1 || memoryFraction >= 1 { // if requested >= capacity, the corresponding host should never be preferred. score = 0 } else { // Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1 // respectively. Multilying the absolute value of the difference by 10 scales the value to // 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from // 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced. diff := math.Abs(cpuFraction - memoryFraction) score = int(10 - diff*10) } if glog.V(10) { // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is // not logged. There is visible performance gain from it. glog.V(10).Infof( "%v -> %v: Balanced Resource Allocation, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d", pod.Name, node.Name, allocatableResources.MilliCPU, allocatableResources.Memory, totalResources.MilliCPU, totalResources.Memory, score, ) } return schedulerapi.HostPriority{ Host: node.Name, Score: score, }, nil }