// Calculates score for all pods and returns podInfo structure. // Score is defined as cpu_sum/node_capacity + mem_sum/node_capacity. // Pods that have bigger requirements should be processed first, thus have higher scores. func calculatePodScore(pods []*apiv1.Pod, nodeTemplate *schedulercache.NodeInfo) []*podInfo { podInfos := make([]*podInfo, 0, len(pods)) for _, pod := range pods { cpuSum := resource.Quantity{} memorySum := resource.Quantity{} for _, container := range pod.Spec.Containers { if request, ok := container.Resources.Requests[apiv1.ResourceCPU]; ok { cpuSum.Add(request) } if request, ok := container.Resources.Requests[apiv1.ResourceMemory]; ok { memorySum.Add(request) } } score := float64(0) if cpuAllocatable, ok := nodeTemplate.Node().Status.Allocatable[apiv1.ResourceCPU]; ok && cpuAllocatable.MilliValue() > 0 { score += float64(cpuSum.MilliValue()) / float64(cpuAllocatable.MilliValue()) } if memAllocatable, ok := nodeTemplate.Node().Status.Allocatable[apiv1.ResourceMemory]; ok && memAllocatable.Value() > 0 { score += float64(memorySum.Value()) / float64(memAllocatable.Value()) } podInfos = append(podInfos, &podInfo{ score: score, pod: pod, }) } return podInfos }
// ComputeTaintTolerationPriority prepares the priority list for all the nodes based on the number of intolerable taints on the node func ComputeTaintTolerationPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { node := nodeInfo.Node() if node == nil { return schedulerapi.HostPriority{}, fmt.Errorf("node not found") } var tolerationList []api.Toleration if priorityMeta, ok := meta.(*priorityMetadata); ok { tolerationList = priorityMeta.podTolerations } else { var err error tolerationList, err = getTolerationListFromPod(pod) if err != nil { return schedulerapi.HostPriority{}, err } } taints, err := api.GetTaintsFromNodeAnnotations(node.Annotations) if err != nil { return schedulerapi.HostPriority{}, err } return schedulerapi.HostPriority{ Host: node.Name, Score: countIntolerableTaintsPreferNoSchedule(taints, tolerationList), }, nil }
func CalculateNodePreferAvoidPodsPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { node := nodeInfo.Node() if node == nil { return schedulerapi.HostPriority{}, fmt.Errorf("node not found") } controllerRef := priorityutil.GetControllerRef(pod) if controllerRef != nil { // Ignore pods that are owned by other controller than ReplicationController // or ReplicaSet. if controllerRef.Kind != "ReplicationController" && controllerRef.Kind != "ReplicaSet" { controllerRef = nil } } if controllerRef == nil { return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil } avoids, err := v1.GetAvoidPodsFromNodeAnnotations(node.Annotations) if err != nil { // If we cannot get annotation, assume it's schedulable there. return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil } for i := range avoids.PreferAvoidPods { avoid := &avoids.PreferAvoidPods[i] if controllerRef != nil { if avoid.PodSignature.PodController.Kind == controllerRef.Kind && avoid.PodSignature.PodController.UID == controllerRef.UID { return schedulerapi.HostPriority{Host: node.Name, Score: 0}, nil } } } return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil }
func (checker *PodAffinityChecker) InterPodAffinityMatches(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } allPods, err := checker.podLister.List(labels.Everything()) if err != nil { return false, err } affinity, err := api.GetAffinityFromPodAnnotations(pod.Annotations) if err != nil { return false, err } // Check if the current node match the inter-pod affinity scheduling constraints. // Hard inter-pod affinity is not symmetric, check only when affinity.PodAffinity exists. if affinity.PodAffinity != nil { if !checker.NodeMatchesHardPodAffinity(pod, allPods, node, affinity.PodAffinity) { return false, ErrPodAffinityNotMatch } } // Hard inter-pod anti-affinity is symmetric, we should always check it. if !checker.NodeMatchesHardPodAntiAffinity(pod, allPods, node, affinity.PodAntiAffinity) { return false, ErrPodAffinityNotMatch } return true, nil }
// Calculate the resource used on a node. 'node' has information about the resources on the node. // 'pods' is a list of pods currently scheduled on the node. func calculateUsedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { node := nodeInfo.Node() if node == nil { return schedulerapi.HostPriority{}, fmt.Errorf("node not found") } allocatableResources := nodeInfo.AllocatableResource() totalResources := *podRequests totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU totalResources.Memory += nodeInfo.NonZeroRequest().Memory cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name) memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name) if glog.V(10) { // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is // not logged. There is visible performance gain from it. glog.V(10).Infof( "%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory", pod.Name, node.Name, allocatableResources.MilliCPU, allocatableResources.Memory, totalResources.MilliCPU, totalResources.Memory, cpuScore, memoryScore, ) } return schedulerapi.HostPriority{ Host: node.Name, Score: int((cpuScore + memoryScore) / 2), }, nil }
func PodFitsResources(pod *api.Pod, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } allocatable := node.Status.Allocatable allowedPodNumber := allocatable.Pods().Value() if int64(len(nodeInfo.Pods()))+1 > allowedPodNumber { return false, newInsufficientResourceError(podCountResourceName, 1, int64(len(nodeInfo.Pods())), allowedPodNumber) } podRequest := getResourceRequest(pod) if podRequest.milliCPU == 0 && podRequest.memory == 0 { return true, nil } totalMilliCPU := allocatable.Cpu().MilliValue() totalMemory := allocatable.Memory().Value() if totalMilliCPU < podRequest.milliCPU+nodeInfo.RequestedResource().MilliCPU { return false, newInsufficientResourceError(cpuResourceName, podRequest.milliCPU, nodeInfo.RequestedResource().MilliCPU, totalMilliCPU) } if totalMemory < podRequest.memory+nodeInfo.RequestedResource().Memory { return false, newInsufficientResourceError(memoryResoureceName, podRequest.memory, nodeInfo.RequestedResource().Memory, totalMemory) } glog.V(10).Infof("Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods.", podName(pod), node.Name, len(nodeInfo.Pods()), allowedPodNumber) return true, nil }
// CheckNodeMemoryPressurePredicate checks if a pod can be scheduled on a node // reporting memory pressure condition. func CheckNodeMemoryPressurePredicate(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } var podBestEffort bool predicateMeta, ok := meta.(*predicateMetadata) if ok { podBestEffort = predicateMeta.podBestEffort } else { // We couldn't parse metadata - fallback to computing it. podBestEffort = isPodBestEffort(pod) } // pod is not BestEffort pod if !podBestEffort { return true, nil } // is node under presure? for _, cond := range node.Status.Conditions { if cond.Type == api.NodeMemoryPressure && cond.Status == api.ConditionTrue { return false, ErrNodeUnderMemoryPressure } } return true, nil }
func (c *PodAffinityChecker) InterPodAffinityMatches(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { node := nodeInfo.Node() if node == nil { return false, nil, fmt.Errorf("node not found") } if !c.satisfiesExistingPodsAntiAffinity(pod, meta, node) { return false, []algorithm.PredicateFailureReason{ErrPodAffinityNotMatch}, nil } // Now check if <pod> requirements will be satisfied on this node. affinity, err := api.GetAffinityFromPodAnnotations(pod.Annotations) if err != nil { return false, nil, err } if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) { return true, nil, nil } if !c.satisfiesPodsAffinityAntiAffinity(pod, node, affinity) { return false, []algorithm.PredicateFailureReason{ErrPodAffinityNotMatch}, nil } if glog.V(10) { // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is // not logged. There is visible performance gain from it. glog.Infof("Schedule Pod %+v on Node %+v is allowed, pod (anti)affinity constraints satisfied", podName(pod), node.Name) } return true, nil, nil }
func PodSelectorMatches(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { node := nodeInfo.Node() if node == nil { return false, nil, fmt.Errorf("node not found") } if podMatchesNodeLabels(pod, node) { return true, nil, nil } return false, []algorithm.PredicateFailureReason{ErrNodeSelectorNotMatch}, nil }
func matchesPredicate(pod *api.Pod, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } if pod.Name == node.Name { return true, nil } return false, algorithmpredicates.ErrFakePredicate }
func PodSelectorMatches(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found: %q", nodeName) } if PodMatchesNodeLabels(pod, node) { return true, nil } return false, ErrNodeSelectorNotMatch }
func PodSelectorMatches(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } if podMatchesNodeLabels(pod, node) { return true, nil } return false, ErrNodeSelectorNotMatch }
func matchesPredicate(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { node := nodeInfo.Node() if node == nil { return false, nil, fmt.Errorf("node not found") } if pod.Name == node.Name { return true, nil, nil } return false, []algorithm.PredicateFailureReason{algorithmpredicates.ErrFakePredicate}, nil }
func PodFitsHost(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { if len(pod.Spec.NodeName) == 0 { return true, nil, nil } node := nodeInfo.Node() if node == nil { return false, nil, fmt.Errorf("node not found") } if pod.Spec.NodeName == node.Name { return true, nil, nil } return false, []algorithm.PredicateFailureReason{ErrPodNotMatchHostName}, nil }
func PodFitsHost(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) { if len(pod.Spec.NodeName) == 0 { return true, nil } node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } if pod.Spec.NodeName == node.Name { return true, nil } return false, ErrPodNotMatchHostName }
func (checker *PodAffinityChecker) InterPodAffinityMatches(pod *api.Pod, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } allPods, err := checker.podLister.List(labels.Everything()) if err != nil { return false, err } if checker.NodeMatchPodAffinityAntiAffinity(pod, allPods, node) { return true, nil } return false, ErrPodAffinityNotMatch }
// CheckNodeInodePressurePredicate checks if a pod can be scheduled on a node // reporting inode pressure condition. func CheckNodeInodePressurePredicate(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { node := nodeInfo.Node() if node == nil { return false, nil, fmt.Errorf("node not found") } // is node under presure? for _, cond := range node.Status.Conditions { if cond.Type == api.NodeInodePressure && cond.Status == api.ConditionTrue { return false, []algorithm.PredicateFailureReason{ErrNodeUnderInodePressure}, nil } } return true, nil, nil }
// ImageLocalityPriority is a priority function that favors nodes that already have requested pod container's images. // It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10 // based on the total size of those images. // - If none of the images are present, this node will be given the lowest priority. // - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority. func ImageLocalityPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { node := nodeInfo.Node() if node == nil { return schedulerapi.HostPriority{}, fmt.Errorf("node not found") } var sumSize int64 for i := range pod.Spec.Containers { sumSize += checkContainerImageOnNode(node, &pod.Spec.Containers[i]) } return schedulerapi.HostPriority{ Host: node.Name, Score: calculateScoreFromSize(sumSize), }, nil }
// CheckPredicates Checks if the given pod can be placed on the given node. func (p *PredicateChecker) CheckPredicates(pod *kube_api.Pod, nodeInfo *schedulercache.NodeInfo) error { // TODO(fgrzadkowski): Use full list of predicates. match, err := predicates.GeneralPredicates(pod, nodeInfo) nodename := "unknown" if nodeInfo.Node() != nil { nodename = nodeInfo.Node().Name } if err != nil { return fmt.Errorf("cannot put %s on %s due to %v", pod.Name, nodename, err) } if !match { return fmt.Errorf("cannot put %s on %s", pod.Name, nodename) } return nil }
func PodFitsResources(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } allowedPodNumber := nodeInfo.AllowedPodNumber() if len(nodeInfo.Pods())+1 > allowedPodNumber { return false, newInsufficientResourceError(podCountResourceName, 1, int64(len(nodeInfo.Pods())), int64(allowedPodNumber)) } var podRequest *resourceRequest predicateMeta, ok := meta.(*predicateMetadata) if ok { podRequest = predicateMeta.podRequest } else { // We couldn't parse metadata - fallback to computing it. podRequest = getResourceRequest(pod) } if podRequest.milliCPU == 0 && podRequest.memory == 0 && podRequest.nvidiaGPU == 0 { return true, nil } allocatable := node.Status.Allocatable totalMilliCPU := allocatable.Cpu().MilliValue() totalMemory := allocatable.Memory().Value() totalNvidiaGPU := allocatable.NvidiaGPU().Value() if totalMilliCPU < podRequest.milliCPU+nodeInfo.RequestedResource().MilliCPU { return false, newInsufficientResourceError(cpuResourceName, podRequest.milliCPU, nodeInfo.RequestedResource().MilliCPU, totalMilliCPU) } if totalMemory < podRequest.memory+nodeInfo.RequestedResource().Memory { return false, newInsufficientResourceError(memoryResourceName, podRequest.memory, nodeInfo.RequestedResource().Memory, totalMemory) } if totalNvidiaGPU < podRequest.nvidiaGPU+nodeInfo.RequestedResource().NvidiaGPU { return false, newInsufficientResourceError(nvidiaGpuResourceName, podRequest.nvidiaGPU, nodeInfo.RequestedResource().NvidiaGPU, totalNvidiaGPU) } if glog.V(10) { // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is // not logged. There is visible performance gain from it. glog.Infof("Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods.", podName(pod), node.Name, len(nodeInfo.Pods()), allowedPodNumber) } return true, nil }
// CheckNodeLabelPresence checks whether all of the specified labels exists on a node or not, regardless of their value // If "presence" is false, then returns false if any of the requested labels matches any of the node's labels, // otherwise returns true. // If "presence" is true, then returns false if any of the requested labels does not match any of the node's labels, // otherwise returns true. // // Consider the cases where the nodes are placed in regions/zones/racks and these are identified by labels // In some cases, it is required that only nodes that are part of ANY of the defined regions/zones/racks be selected // // Alternately, eliminating nodes that have a certain label, regardless of value, is also useful // A node may have a label with "retiring" as key and the date as the value // and it may be desirable to avoid scheduling new pods on this node func (n *NodeLabelChecker) CheckNodeLabelPresence(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } var exists bool nodeLabels := labels.Set(node.Labels) for _, label := range n.labels { exists = nodeLabels.Has(label) if (exists && !n.presence) || (!exists && n.presence) { return false, ErrNodeLabelPresenceViolated } } return true, nil }
// CheckPredicates checks if the given pod can be placed on the given node. func (p *PredicateChecker) CheckPredicates(pod *kube_api.Pod, nodeInfo *schedulercache.NodeInfo) error { for _, predicate := range p.predicates { match, err := predicate(pod, nodeInfo) nodename := "unknown" if nodeInfo.Node() != nil { nodename = nodeInfo.Node().Name } if err != nil { return fmt.Errorf("cannot put %s on %s due to %v", pod.Name, nodename, err) } if !match { return fmt.Errorf("cannot put %s on %s", pod.Name, nodename) } } return nil }
// CalculateNodeLabelPriority checks whether a particular label exists on a node or not, regardless of its value. // If presence is true, prioritizes nodes that have the specified label, regardless of value. // If presence is false, prioritizes nodes that do not have the specified label. func (n *NodeLabelPrioritizer) CalculateNodeLabelPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { node := nodeInfo.Node() if node == nil { return schedulerapi.HostPriority{}, fmt.Errorf("node not found") } exists := labels.Set(node.Labels).Has(n.label) score := 0 if (exists && n.presence) || (!exists && !n.presence) { score = 10 } return schedulerapi.HostPriority{ Host: node.Name, Score: score, }, nil }
func PodFitsResources(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { node := nodeInfo.Node() if node == nil { return false, nil, fmt.Errorf("node not found") } var predicateFails []algorithm.PredicateFailureReason allowedPodNumber := nodeInfo.AllowedPodNumber() if len(nodeInfo.Pods())+1 > allowedPodNumber { predicateFails = append(predicateFails, NewInsufficientResourceError(api.ResourcePods, 1, int64(len(nodeInfo.Pods())), int64(allowedPodNumber))) } var podRequest *schedulercache.Resource if predicateMeta, ok := meta.(*predicateMetadata); ok { podRequest = predicateMeta.podRequest } else { // We couldn't parse metadata - fallback to computing it. podRequest = GetResourceRequest(pod) } if podRequest.MilliCPU == 0 && podRequest.Memory == 0 && podRequest.NvidiaGPU == 0 && len(podRequest.OpaqueIntResources) == 0 { return len(predicateFails) == 0, predicateFails, nil } allocatable := nodeInfo.AllocatableResource() if allocatable.MilliCPU < podRequest.MilliCPU+nodeInfo.RequestedResource().MilliCPU { predicateFails = append(predicateFails, NewInsufficientResourceError(api.ResourceCPU, podRequest.MilliCPU, nodeInfo.RequestedResource().MilliCPU, allocatable.MilliCPU)) } if allocatable.Memory < podRequest.Memory+nodeInfo.RequestedResource().Memory { predicateFails = append(predicateFails, NewInsufficientResourceError(api.ResourceMemory, podRequest.Memory, nodeInfo.RequestedResource().Memory, allocatable.Memory)) } if allocatable.NvidiaGPU < podRequest.NvidiaGPU+nodeInfo.RequestedResource().NvidiaGPU { predicateFails = append(predicateFails, NewInsufficientResourceError(api.ResourceNvidiaGPU, podRequest.NvidiaGPU, nodeInfo.RequestedResource().NvidiaGPU, allocatable.NvidiaGPU)) } for rName, rQuant := range podRequest.OpaqueIntResources { if allocatable.OpaqueIntResources[rName] < rQuant+nodeInfo.RequestedResource().OpaqueIntResources[rName] { predicateFails = append(predicateFails, NewInsufficientResourceError(rName, podRequest.OpaqueIntResources[rName], nodeInfo.RequestedResource().OpaqueIntResources[rName], allocatable.OpaqueIntResources[rName])) } } if glog.V(10) { // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is // not logged. There is visible performance gain from it. glog.Infof("Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods.", podName(pod), node.Name, len(nodeInfo.Pods()), allowedPodNumber) } return len(predicateFails) == 0, predicateFails, nil }
// CalculateNodeAffinityPriority prioritizes nodes according to node affinity scheduling preferences // indicated in PreferredDuringSchedulingIgnoredDuringExecution. Each time a node match a preferredSchedulingTerm, // it will a get an add of preferredSchedulingTerm.Weight. Thus, the more preferredSchedulingTerms // the node satisfies and the more the preferredSchedulingTerm that is satisfied weights, the higher // score the node gets. func CalculateNodeAffinityPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { node := nodeInfo.Node() if node == nil { return schedulerapi.HostPriority{}, fmt.Errorf("node not found") } var affinity *api.Affinity if priorityMeta, ok := meta.(*priorityMetadata); ok { affinity = priorityMeta.affinity } else { // We couldn't parse metadata - fallback to computing it. var err error affinity, err = api.GetAffinityFromPodAnnotations(pod.Annotations) if err != nil { return schedulerapi.HostPriority{}, err } } var count int32 // A nil element of PreferredDuringSchedulingIgnoredDuringExecution matches no objects. // An element of PreferredDuringSchedulingIgnoredDuringExecution that refers to an // empty PreferredSchedulingTerm matches all objects. if affinity != nil && affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil { // Match PreferredDuringSchedulingIgnoredDuringExecution term by term. for i := range affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution { preferredSchedulingTerm := &affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution[i] if preferredSchedulingTerm.Weight == 0 { continue } // TODO: Avoid computing it for all nodes if this becomes a performance problem. nodeSelector, err := api.NodeSelectorRequirementsAsSelector(preferredSchedulingTerm.Preference.MatchExpressions) if err != nil { return schedulerapi.HostPriority{}, err } if nodeSelector.Matches(labels.Set(node.Labels)) { count += preferredSchedulingTerm.Weight } } } return schedulerapi.HostPriority{ Host: node.Name, Score: int(count), }, nil }
func (t *TolerationMatch) PodToleratesNodeTaints(pod *api.Pod, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() taints, err := api.GetTaintsFromNodeAnnotations(node.Annotations) if err != nil { return false, err } tolerations, err := api.GetTolerationsFromPodAnnotations(pod.Annotations) if err != nil { return false, err } if tolerationsToleratesTaints(tolerations, taints) { return true, nil } return false, ErrTaintsTolerationsNotMatch }
func calculateBalancedResourceAllocation(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { node := nodeInfo.Node() if node == nil { return schedulerapi.HostPriority{}, fmt.Errorf("node not found") } allocatableResources := nodeInfo.AllocatableResource() totalResources := *podRequests totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU totalResources.Memory += nodeInfo.NonZeroRequest().Memory cpuFraction := fractionOfCapacity(totalResources.MilliCPU, allocatableResources.MilliCPU) memoryFraction := fractionOfCapacity(totalResources.Memory, allocatableResources.Memory) score := int(0) if cpuFraction >= 1 || memoryFraction >= 1 { // if requested >= capacity, the corresponding host should never be preferred. score = 0 } else { // Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1 // respectively. Multilying the absolute value of the difference by 10 scales the value to // 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from // 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced. diff := math.Abs(cpuFraction - memoryFraction) score = int(10 - diff*10) } if glog.V(10) { // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is // not logged. There is visible performance gain from it. glog.V(10).Infof( "%v -> %v: Balanced Resource Allocation, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d", pod.Name, node.Name, allocatableResources.MilliCPU, allocatableResources.Memory, totalResources.MilliCPU, totalResources.Memory, score, ) } return schedulerapi.HostPriority{ Host: node.Name, Score: score, }, nil }
// CheckNodeMemoryPressurePredicate checks if a pod can be scheduled on a node // reporting memory pressure condition. func CheckNodeMemoryPressurePredicate(pod *api.Pod, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } // pod is not BestEffort pod if !isPodBestEffort(pod) { return true, nil } // is node under presure? for _, cond := range node.Status.Conditions { if cond.Type == api.NodeMemoryPressure && cond.Status == api.ConditionTrue { return false, ErrNodeUnderMemoryPressure } } return true, nil }
func PodToleratesNodeTaints(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { node := nodeInfo.Node() if node == nil { return false, nil, fmt.Errorf("node not found") } taints, err := v1.GetTaintsFromNodeAnnotations(node.Annotations) if err != nil { return false, nil, err } tolerations, err := v1.GetTolerationsFromPodAnnotations(pod.Annotations) if err != nil { return false, nil, err } if tolerationsToleratesTaints(tolerations, taints) { return true, nil, nil } return false, []algorithm.PredicateFailureReason{ErrTaintsTolerationsNotMatch}, nil }
func PodToleratesNodeTaints(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } taints, err := api.GetTaintsFromNodeAnnotations(node.Annotations) if err != nil { return false, err } tolerations, err := api.GetTolerationsFromPodAnnotations(pod.Annotations) if err != nil { return false, err } if tolerationsToleratesTaints(tolerations, taints) { return true, nil } return false, ErrTaintsTolerationsNotMatch }