func CalculateNodePreferAvoidPodsPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { node := nodeInfo.Node() if node == nil { return schedulerapi.HostPriority{}, fmt.Errorf("node not found") } controllerRef := priorityutil.GetControllerRef(pod) if controllerRef != nil { // Ignore pods that are owned by other controller than ReplicationController // or ReplicaSet. if controllerRef.Kind != "ReplicationController" && controllerRef.Kind != "ReplicaSet" { controllerRef = nil } } if controllerRef == nil { return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil } avoids, err := v1.GetAvoidPodsFromNodeAnnotations(node.Annotations) if err != nil { // If we cannot get annotation, assume it's schedulable there. return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil } for i := range avoids.PreferAvoidPods { avoid := &avoids.PreferAvoidPods[i] if controllerRef != nil { if avoid.PodSignature.PodController.Kind == controllerRef.Kind && avoid.PodSignature.PodController.UID == controllerRef.UID { return schedulerapi.HostPriority{Host: node.Name, Score: 0}, nil } } } return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil }
// ComputeTaintTolerationPriority prepares the priority list for all the nodes based on the number of intolerable taints on the node func ComputeTaintTolerationPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { node := nodeInfo.Node() if node == nil { return schedulerapi.HostPriority{}, fmt.Errorf("node not found") } var tolerationList []api.Toleration if priorityMeta, ok := meta.(*priorityMetadata); ok { tolerationList = priorityMeta.podTolerations } else { var err error tolerationList, err = getTolerationListFromPod(pod) if err != nil { return schedulerapi.HostPriority{}, err } } taints, err := api.GetTaintsFromNodeAnnotations(node.Annotations) if err != nil { return schedulerapi.HostPriority{}, err } return schedulerapi.HostPriority{ Host: node.Name, Score: countIntolerableTaintsPreferNoSchedule(taints, tolerationList), }, nil }
// CheckNodeDiskPressurePredicate checks if a pod can be scheduled on a node // reporting disk pressure condition. func CheckNodeDiskPressurePredicate(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { // is node under presure? if nodeInfo.DiskPressureCondition() == v1.ConditionTrue { return false, []algorithm.PredicateFailureReason{ErrNodeUnderDiskPressure}, nil } return true, nil, nil }
// CheckNodeMemoryPressurePredicate checks if a pod can be scheduled on a node // reporting memory pressure condition. func CheckNodeMemoryPressurePredicate(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } var podBestEffort bool predicateMeta, ok := meta.(*predicateMetadata) if ok { podBestEffort = predicateMeta.podBestEffort } else { // We couldn't parse metadata - fallback to computing it. podBestEffort = isPodBestEffort(pod) } // pod is not BestEffort pod if !podBestEffort { return true, nil } // is node under presure? for _, cond := range node.Status.Conditions { if cond.Type == api.NodeMemoryPressure && cond.Status == api.ConditionTrue { return false, ErrNodeUnderMemoryPressure } } return true, nil }
func (c *MaxPDVolumeCountChecker) predicate(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) { newVolumes := make(map[string]bool) if err := c.filterVolumes(pod.Spec.Volumes, pod.Namespace, newVolumes); err != nil { return false, err } // quick return if len(newVolumes) == 0 { return true, nil } // count unique volumes existingVolumes := make(map[string]bool) for _, existingPod := range nodeInfo.Pods() { if err := c.filterVolumes(existingPod.Spec.Volumes, existingPod.Namespace, existingVolumes); err != nil { return false, err } } numExistingVolumes := len(existingVolumes) // filter out already-mounted volumes for k := range existingVolumes { if _, ok := newVolumes[k]; ok { delete(newVolumes, k) } } numNewVolumes := len(newVolumes) if numExistingVolumes+numNewVolumes > c.maxVolumes { return false, nil } return true, nil }
// Calculates score for all pods and returns podInfo structure. // Score is defined as cpu_sum/node_capacity + mem_sum/node_capacity. // Pods that have bigger requirements should be processed first, thus have higher scores. func calculatePodScore(pods []*apiv1.Pod, nodeTemplate *schedulercache.NodeInfo) []*podInfo { podInfos := make([]*podInfo, 0, len(pods)) for _, pod := range pods { cpuSum := resource.Quantity{} memorySum := resource.Quantity{} for _, container := range pod.Spec.Containers { if request, ok := container.Resources.Requests[apiv1.ResourceCPU]; ok { cpuSum.Add(request) } if request, ok := container.Resources.Requests[apiv1.ResourceMemory]; ok { memorySum.Add(request) } } score := float64(0) if cpuAllocatable, ok := nodeTemplate.Node().Status.Allocatable[apiv1.ResourceCPU]; ok && cpuAllocatable.MilliValue() > 0 { score += float64(cpuSum.MilliValue()) / float64(cpuAllocatable.MilliValue()) } if memAllocatable, ok := nodeTemplate.Node().Status.Allocatable[apiv1.ResourceMemory]; ok && memAllocatable.Value() > 0 { score += float64(memorySum.Value()) / float64(memAllocatable.Value()) } podInfos = append(podInfos, &podInfo{ score: score, pod: pod, }) } return podInfos }
func (checker *PodAffinityChecker) InterPodAffinityMatches(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } allPods, err := checker.podLister.List(labels.Everything()) if err != nil { return false, err } affinity, err := api.GetAffinityFromPodAnnotations(pod.Annotations) if err != nil { return false, err } // Check if the current node match the inter-pod affinity scheduling constraints. // Hard inter-pod affinity is not symmetric, check only when affinity.PodAffinity exists. if affinity.PodAffinity != nil { if !checker.NodeMatchesHardPodAffinity(pod, allPods, node, affinity.PodAffinity) { return false, ErrPodAffinityNotMatch } } // Hard inter-pod anti-affinity is symmetric, we should always check it. if !checker.NodeMatchesHardPodAntiAffinity(pod, allPods, node, affinity.PodAntiAffinity) { return false, ErrPodAffinityNotMatch } return true, nil }
func (c *PodAffinityChecker) InterPodAffinityMatches(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { node := nodeInfo.Node() if node == nil { return false, nil, fmt.Errorf("node not found") } if !c.satisfiesExistingPodsAntiAffinity(pod, meta, node) { return false, []algorithm.PredicateFailureReason{ErrPodAffinityNotMatch}, nil } // Now check if <pod> requirements will be satisfied on this node. affinity, err := api.GetAffinityFromPodAnnotations(pod.Annotations) if err != nil { return false, nil, err } if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) { return true, nil, nil } if !c.satisfiesPodsAffinityAntiAffinity(pod, node, affinity) { return false, []algorithm.PredicateFailureReason{ErrPodAffinityNotMatch}, nil } if glog.V(10) { // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is // not logged. There is visible performance gain from it. glog.Infof("Schedule Pod %+v on Node %+v is allowed, pod (anti)affinity constraints satisfied", podName(pod), node.Name) } return true, nil, nil }
// Calculate the resource occupancy on a node. 'node' has information about the resources on the node. // 'pods' is a list of pods currently scheduled on the node. func calculateResourceOccupancy(pod *api.Pod, node api.Node, nodeInfo *schedulercache.NodeInfo) schedulerapi.HostPriority { totalMilliCPU := nodeInfo.NonZeroRequest().MilliCPU totalMemory := nodeInfo.NonZeroRequest().Memory capacityMilliCPU := node.Status.Allocatable.Cpu().MilliValue() capacityMemory := node.Status.Allocatable.Memory().Value() // Add the resources requested by the current pod being scheduled. // This also helps differentiate between differently sized, but empty, nodes. for _, container := range pod.Spec.Containers { cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests) totalMilliCPU += cpu totalMemory += memory } cpuScore := calculateScore(totalMilliCPU, capacityMilliCPU, node.Name) memoryScore := calculateScore(totalMemory, capacityMemory, node.Name) glog.V(10).Infof( "%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory", pod.Name, node.Name, capacityMilliCPU, capacityMemory, totalMilliCPU, totalMemory, cpuScore, memoryScore, ) return schedulerapi.HostPriority{ Host: node.Name, Score: int((cpuScore + memoryScore) / 2), } }
// FastGetPodsToMove returns a list of pods that should be moved elsewhere if the node // is drained. Raises error if there is an unreplicated pod and force option was not specified. // Based on kubectl drain code. It makes an assumption that RC, DS, Jobs and RS were deleted // along with their pods (no abandoned pods with dangling created-by annotation). Usefull for fast // checks. Doesn't check i func FastGetPodsToMove(nodeInfo *schedulercache.NodeInfo, skipNodesWithSystemPods bool, skipNodesWithLocalStorage bool) ([]*api.Pod, error) { return drain.GetPodsForDeletionOnNodeDrain( nodeInfo.Pods(), api.Codecs.UniversalDecoder(), skipNodesWithSystemPods, skipNodesWithLocalStorage, false, nil, 0) }
// NoDiskConflict evaluates if a pod can fit due to the volumes it requests, and those that // are already mounted. If there is already a volume mounted on that node, another pod that uses the same volume // can't be scheduled there. // This is GCE, Amazon EBS, and Ceph RBD specific for now: // - GCE PD allows multiple mounts as long as they're all read-only // - AWS EBS forbids any two pods mounting the same volume ID // - Ceph RBD forbids if any two pods share at least same monitor, and match pool and image. // TODO: migrate this into some per-volume specific code? func NoDiskConflict(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) { for _, v := range pod.Spec.Volumes { for _, ev := range nodeInfo.Pods() { if isVolumeConflict(v, ev) { return false, ErrDiskConflict } } } return true, nil }
func PodSelectorMatches(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } if podMatchesNodeLabels(pod, node) { return true, nil } return false, ErrNodeSelectorNotMatch }
// NoDiskConflict evaluates if a pod can fit due to the volumes it requests, and those that // are already mounted. If there is already a volume mounted on that node, another pod that uses the same volume // can't be scheduled there. // This is GCE, Amazon EBS, and Ceph RBD specific for now: // - GCE PD allows multiple mounts as long as they're all read-only // - AWS EBS forbids any two pods mounting the same volume ID // - Ceph RBD forbids if any two pods share at least same monitor, and match pool and image. // TODO: migrate this into some per-volume specific code? func NoDiskConflict(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { for _, v := range pod.Spec.Volumes { for _, ev := range nodeInfo.Pods() { if isVolumeConflict(v, ev) { return false, []algorithm.PredicateFailureReason{ErrDiskConflict}, nil } } } return true, nil, nil }
// NoDiskConflict evaluates if a pod can fit due to the volumes it requests, and those that // are already mounted. If there is already a volume mounted on that node, another pod that uses the same volume // can't be scheduled there. // This is GCE, Amazon EBS, and Ceph RBD specific for now: // - GCE PD allows multiple mounts as long as they're all read-only // - AWS EBS forbids any two pods mounting the same volume ID // - Ceph RBD forbids if any two pods share at least same monitor, and match pool and image. // TODO: migrate this into some per-volume specific code? func NoDiskConflict(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) { for _, v := range pod.Spec.Volumes { for _, ev := range nodeInfo.Pods() { if isVolumeConflict(v, ev) { return false, nil } } } return true, nil }
func matchesPredicate(pod *api.Pod, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } if pod.Name == node.Name { return true, nil } return false, algorithmpredicates.ErrFakePredicate }
func PodSelectorMatches(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found: %q", nodeName) } if PodMatchesNodeLabels(pod, node) { return true, nil } return false, ErrNodeSelectorNotMatch }
func PodSelectorMatches(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { node := nodeInfo.Node() if node == nil { return false, nil, fmt.Errorf("node not found") } if podMatchesNodeLabels(pod, node) { return true, nil, nil } return false, []algorithm.PredicateFailureReason{ErrNodeSelectorNotMatch}, nil }
func matchesPredicate(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { node := nodeInfo.Node() if node == nil { return false, nil, fmt.Errorf("node not found") } if pod.Name == node.Name { return true, nil, nil } return false, []algorithm.PredicateFailureReason{algorithmpredicates.ErrFakePredicate}, nil }
// FastGetPodsToMove returns a list of pods that should be moved elsewhere if the node // is drained. Raises error if there is an unreplicated pod and force option was not specified. // Based on kubectl drain code. It makes an assumption that RC, DS, Jobs and RS were deleted // along with their pods (no abandoned pods with dangling created-by annotation). Usefull for fast // checks. func FastGetPodsToMove(nodeInfo *schedulercache.NodeInfo, force bool, skipNodesWithSystemPods bool, skipNodesWithLocalStorage bool, decoder runtime.Decoder) ([]*api.Pod, error) { pods := make([]*api.Pod, 0) unreplicatedPodNames := []string{} for _, pod := range nodeInfo.Pods() { _, found := pod.ObjectMeta.Annotations[types.ConfigMirrorAnnotationKey] if found { // Skip mirror pod continue } replicated := false daemonsetPod := false creatorRef, found := pod.ObjectMeta.Annotations[controller.CreatedByAnnotation] if found { var sr api.SerializedReference if err := runtime.DecodeInto(decoder, []byte(creatorRef), &sr); err != nil { return []*api.Pod{}, err } if sr.Reference.Kind == "ReplicationController" { replicated = true } else if sr.Reference.Kind == "DaemonSet" { daemonsetPod = true } else if sr.Reference.Kind == "Job" { replicated = true } else if sr.Reference.Kind == "ReplicaSet" { replicated = true } } if !daemonsetPod && pod.Namespace == "kube-system" && skipNodesWithSystemPods { return []*api.Pod{}, fmt.Errorf("non-deamons set, non-mirrored, kube-system pod present: %s", pod.Name) } if !daemonsetPod && hasLocalStorage(pod) && skipNodesWithLocalStorage { return []*api.Pod{}, fmt.Errorf("pod with local storage present: %s", pod.Name) } switch { case daemonsetPod: break case !replicated: unreplicatedPodNames = append(unreplicatedPodNames, pod.Name) if force { pods = append(pods, pod) } default: pods = append(pods, pod) } } if !force && len(unreplicatedPodNames) > 0 { return []*api.Pod{}, fmt.Errorf("unreplicated pods present") } return pods, nil }
// DetailedGetPodsForMove returns a list of pods that should be moved elsewhere if the node // is drained. Raises error if there is an unreplicated pod and force option was not specified. // Based on kubectl drain code. It checks whether RC, DS, Jobs and RS that created these pods // still exist. func DetailedGetPodsForMove(nodeInfo *schedulercache.NodeInfo, skipNodesWithSystemPods bool, skipNodesWithLocalStorage bool, client *unversionedclient.Client, minReplicaCount int32) ([]*api.Pod, error) { return drain.GetPodsForDeletionOnNodeDrain( nodeInfo.Pods(), api.Codecs.UniversalDecoder(), skipNodesWithSystemPods, skipNodesWithLocalStorage, true, client, minReplicaCount) }
func PodFitsHost(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) { if len(pod.Spec.NodeName) == 0 { return true, nil } node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } if pod.Spec.NodeName == node.Name { return true, nil } return false, ErrPodNotMatchHostName }
func PodFitsHost(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { if len(pod.Spec.NodeName) == 0 { return true, nil, nil } node := nodeInfo.Node() if node == nil { return false, nil, fmt.Errorf("node not found") } if pod.Spec.NodeName == node.Name { return true, nil, nil } return false, []algorithm.PredicateFailureReason{ErrPodNotMatchHostName}, nil }
// FastGetPodsToMove returns a list of pods that should be moved elsewhere if the node // is drained. Raises error if there is an unreplicated pod and force option was not specified. // Based on kubectl drain code. It makes an assumption that RC, DS, Jobs and RS were deleted // along with their pods (no abandoned pods with dangling created-by annotation). Usefull for fast // checks. func FastGetPodsToMove(nodeInfo *schedulercache.NodeInfo, force bool, skipNodesWithSystemPods bool, skipNodesWithLocalStorage bool) ([]*api.Pod, error) { pods := make([]*api.Pod, 0) unreplicatedPodNames := []string{} for _, pod := range nodeInfo.Pods() { if IsMirrorPod(pod) { continue } replicated := false daemonsetPod := false creatorKind, err := CreatorRefKind(pod) if err != nil { return []*api.Pod{}, err } if creatorKind == "ReplicationController" { replicated = true } else if creatorKind == "DaemonSet" { daemonsetPod = true } else if creatorKind == "Job" { replicated = true } else if creatorKind == "ReplicaSet" { replicated = true } if !daemonsetPod && pod.Namespace == "kube-system" && skipNodesWithSystemPods { return []*api.Pod{}, fmt.Errorf("non-deamons set, non-mirrored, kube-system pod present: %s", pod.Name) } if !daemonsetPod && hasLocalStorage(pod) && skipNodesWithLocalStorage { return []*api.Pod{}, fmt.Errorf("pod with local storage present: %s", pod.Name) } switch { case daemonsetPod: break case !replicated: unreplicatedPodNames = append(unreplicatedPodNames, pod.Name) if force { pods = append(pods, pod) } default: pods = append(pods, pod) } } if !force && len(unreplicatedPodNames) > 0 { return []*api.Pod{}, fmt.Errorf("unreplicated pods present") } return pods, nil }
func (r *NodeStatus) PodFitsResources(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) { info, err := r.info.GetNodeInfo(nodeName) if err != nil { return false, err } // TODO: move the following podNumber check to podFitsResourcesInternal when Kubelet allows podNumber check (See #20263). allocatable := info.Status.Allocatable allowedPodNumber := allocatable.Pods().Value() if int64(len(nodeInfo.Pods()))+1 > allowedPodNumber { return false, newInsufficientResourceError(podCountResourceName, 1, int64(len(nodeInfo.Pods())), allowedPodNumber) } return podFitsResourcesInternal(pod, nodeName, nodeInfo, info) }
// Calculate the resource used on a node. 'node' has information about the resources on the node. // 'pods' is a list of pods currently scheduled on the node. func calculateUsedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { node := nodeInfo.Node() if node == nil { return schedulerapi.HostPriority{}, fmt.Errorf("node not found") } allocatableResources := nodeInfo.AllocatableResource() totalResources := *podRequests totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU totalResources.Memory += nodeInfo.NonZeroRequest().Memory cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name) memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name) if glog.V(10) { // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is // not logged. There is visible performance gain from it. glog.V(10).Infof( "%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory", pod.Name, node.Name, allocatableResources.MilliCPU, allocatableResources.Memory, totalResources.MilliCPU, totalResources.Memory, cpuScore, memoryScore, ) } return schedulerapi.HostPriority{ Host: node.Name, Score: int((cpuScore + memoryScore) / 2), }, nil }
func (checker *PodAffinityChecker) InterPodAffinityMatches(pod *api.Pod, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } allPods, err := checker.podLister.List(labels.Everything()) if err != nil { return false, err } if checker.NodeMatchPodAffinityAntiAffinity(pod, allPods, node) { return true, nil } return false, ErrPodAffinityNotMatch }
// ImageLocalityPriority is a priority function that favors nodes that already have requested pod container's images. // It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10 // based on the total size of those images. // - If none of the images are present, this node will be given the lowest priority. // - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority. func ImageLocalityPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { node := nodeInfo.Node() if node == nil { return schedulerapi.HostPriority{}, fmt.Errorf("node not found") } var sumSize int64 for i := range pod.Spec.Containers { sumSize += checkContainerImageOnNode(node, &pod.Spec.Containers[i]) } return schedulerapi.HostPriority{ Host: node.Name, Score: calculateScoreFromSize(sumSize), }, nil }
// CheckPredicates Checks if the given pod can be placed on the given node. func (p *PredicateChecker) CheckPredicates(pod *kube_api.Pod, nodeInfo *schedulercache.NodeInfo) error { // TODO(fgrzadkowski): Use full list of predicates. match, err := predicates.GeneralPredicates(pod, nodeInfo) nodename := "unknown" if nodeInfo.Node() != nil { nodename = nodeInfo.Node().Name } if err != nil { return fmt.Errorf("cannot put %s on %s due to %v", pod.Name, nodename, err) } if !match { return fmt.Errorf("cannot put %s on %s", pod.Name, nodename) } return nil }
// CheckNodeInodePressurePredicate checks if a pod can be scheduled on a node // reporting inode pressure condition. func CheckNodeInodePressurePredicate(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { node := nodeInfo.Node() if node == nil { return false, nil, fmt.Errorf("node not found") } // is node under presure? for _, cond := range node.Status.Conditions { if cond.Type == api.NodeInodePressure && cond.Status == api.ConditionTrue { return false, []algorithm.PredicateFailureReason{ErrNodeUnderInodePressure}, nil } } return true, nil, nil }
// CheckNodeLabelPresence checks whether all of the specified labels exists on a node or not, regardless of their value // If "presence" is false, then returns false if any of the requested labels matches any of the node's labels, // otherwise returns true. // If "presence" is true, then returns false if any of the requested labels does not match any of the node's labels, // otherwise returns true. // // Consider the cases where the nodes are placed in regions/zones/racks and these are identified by labels // In some cases, it is required that only nodes that are part of ANY of the defined regions/zones/racks be selected // // Alternately, eliminating nodes that have a certain label, regardless of value, is also useful // A node may have a label with "retiring" as key and the date as the value // and it may be desirable to avoid scheduling new pods on this node func (n *NodeLabelChecker) CheckNodeLabelPresence(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) { node := nodeInfo.Node() if node == nil { return false, fmt.Errorf("node not found") } var exists bool nodeLabels := labels.Set(node.Labels) for _, label := range n.labels { exists = nodeLabels.Has(label) if (exists && !n.presence) || (!exists && n.presence) { return false, ErrNodeLabelPresenceViolated } } return true, nil }