func (c *PodAffinityChecker) getMatchingAntiAffinityTerms(pod *v1.Pod, allPods []*v1.Pod) ([]matchingPodAntiAffinityTerm, error) { var result []matchingPodAntiAffinityTerm for _, existingPod := range allPods { affinity, err := v1.GetAffinityFromPodAnnotations(existingPod.Annotations) if err != nil { return nil, err } if affinity != nil && affinity.PodAntiAffinity != nil { existingPodNode, err := c.info.GetNodeInfo(existingPod.Spec.NodeName) if err != nil { return nil, err } for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) { namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(existingPod, &term) selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector) if err != nil { return nil, err } match := priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector) if match { result = append(result, matchingPodAntiAffinityTerm{term: &term, node: existingPodNode}) } } } } return result, nil }
func getMatchingAntiAffinityTerms(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) ([]matchingPodAntiAffinityTerm, error) { allNodeNames := make([]string, 0, len(nodeInfoMap)) for name := range nodeInfoMap { allNodeNames = append(allNodeNames, name) } var lock sync.Mutex var result []matchingPodAntiAffinityTerm var firstError error appendResult := func(toAppend []matchingPodAntiAffinityTerm) { lock.Lock() defer lock.Unlock() result = append(result, toAppend...) } catchError := func(err error) { lock.Lock() defer lock.Unlock() if firstError == nil { firstError = err } } processNode := func(i int) { nodeInfo := nodeInfoMap[allNodeNames[i]] node := nodeInfo.Node() if node == nil { catchError(fmt.Errorf("node not found")) return } var nodeResult []matchingPodAntiAffinityTerm for _, existingPod := range nodeInfo.PodsWithAffinity() { affinity, err := v1.GetAffinityFromPodAnnotations(existingPod.Annotations) if err != nil { catchError(err) return } if affinity == nil { continue } for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) { namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term) selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector) if err != nil { catchError(err) return } match := priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector) if match { nodeResult = append(nodeResult, matchingPodAntiAffinityTerm{term: &term, node: node}) } } } if len(nodeResult) > 0 { appendResult(nodeResult) } } workqueue.Parallelize(16, len(allNodeNames), processNode) return result, firstError }
// Checks if scheduling the pod onto this node would break any rules of this pod. func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, node *v1.Node, affinity *v1.Affinity) bool { allPods, err := c.podLister.List(labels.Everything()) if err != nil { return false } // Check all affinity terms. for _, term := range getPodAffinityTerms(affinity.PodAffinity) { termMatches, matchingPodExists, err := c.anyPodMatchesPodAffinityTerm(pod, allPods, node, &term) if err != nil { glog.V(10).Infof("Cannot schedule pod %+v onto node %v,because of PodAffinityTerm %v, err: %v", podName(pod), node.Name, term, err) return false } if !termMatches { // If the requirement matches a pod's own labels are namespace, and there are // no other such pods, then disregard the requirement. This is necessary to // not block forever because the first pod of the collection can't be scheduled. if matchingPodExists { glog.V(10).Infof("Cannot schedule pod %+v onto node %v,because of PodAffinityTerm %v, err: %v", podName(pod), node.Name, term, err) return false } namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term) selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector) if err != nil { glog.V(10).Infof("Cannot parse selector on term %v for pod %v. Details %v", term, podName(pod), err) return false } match := priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector) if !match { glog.V(10).Infof("Cannot schedule pod %+v onto node %v,because of PodAffinityTerm %v, err: %v", podName(pod), node.Name, term, err) return false } } } // Check all anti-affinity terms. for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) { termMatches, _, err := c.anyPodMatchesPodAffinityTerm(pod, allPods, node, &term) if err != nil || termMatches { glog.V(10).Infof("Cannot schedule pod %+v onto node %v,because of PodAntiAffinityTerm %v, err: %v", podName(pod), node.Name, term, err) return false } } if glog.V(10) { // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is // not logged. There is visible performance gain from it. glog.Infof("Schedule Pod %+v on Node %+v is allowed, pod afinnity/anti-affinity constraints satisfied.", podName(pod), node.Name) } return true }
// TODO: Share it with predicates by moving to better location. // TODO: Can we avoid error handling here - this is only a matter of non-parsable selector? func podMatchesNamespaceAndSelector(pod *api.Pod, affinityPod *api.Pod, term *api.PodAffinityTerm) (bool, error) { namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(affinityPod, *term) if len(namespaces) != 0 && !namespaces.Has(pod.Namespace) { return false, nil } selector, err := unversioned.LabelSelectorAsSelector(term.LabelSelector) if err != nil || !selector.Matches(labels.Set(pod.Labels)) { return false, err } return true, nil }
// Checks whether the given node has pods which satisfy all the required pod affinity scheduling rules. // If node has pods which satisfy all the required pod affinity scheduling rules then return true. func (checker *PodAffinityChecker) NodeMatchesHardPodAffinity(pod *api.Pod, allPods []*api.Pod, node *api.Node, podAffinity *api.PodAffinity) bool { var podAffinityTerms []api.PodAffinityTerm if len(podAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 { podAffinityTerms = podAffinity.RequiredDuringSchedulingIgnoredDuringExecution } // TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution. //if len(podAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 { // podAffinityTerms = append(podAffinityTerms, podAffinity.RequiredDuringSchedulingRequiredDuringExecution...) //} for _, podAffinityTerm := range podAffinityTerms { podAffinityTermMatches, err := checker.AnyPodMatchesPodAffinityTerm(pod, allPods, node, podAffinityTerm) if err != nil { glog.V(10).Infof("Cannot schedule pod %+v onto node %v, an error ocurred when checking existing pods on the node for PodAffinityTerm %v err: %v", podName(pod), node.Name, podAffinityTerm, err) return false } if !podAffinityTermMatches { // TODO: Think about whether this can be simplified once we have controllerRef // Check if it is in special case that the requiredDuringScheduling affinity requirement can be disregarded. // If the requiredDuringScheduling affinity requirement matches a pod's own labels and namespace, and there are no other such pods // anywhere, then disregard the requirement. // This allows rules like "schedule all of the pods of this collection to the same zone" to not block forever // because the first pod of the collection can't be scheduled. names := priorityutil.GetNamespacesFromPodAffinityTerm(pod, podAffinityTerm) labelSelector, err := unversioned.LabelSelectorAsSelector(podAffinityTerm.LabelSelector) if err != nil || !names.Has(pod.Namespace) || !labelSelector.Matches(labels.Set(pod.Labels)) { glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because none of the existing pods on this node satisfy the PodAffinityTerm %v, err: %+v", podName(pod), node.Name, podAffinityTerm, err) return false } // the affinity is to put the pod together with other pods from its same service or controller filteredPods := priorityutil.FilterPodsByNameSpaces(names, allPods) for _, filteredPod := range filteredPods { // if found an existing pod from same service or RC, // the affinity scheduling rules cannot be disregarded. if labelSelector.Matches(labels.Set(filteredPod.Labels)) { glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because none of the existing pods on this node satisfy the PodAffinityTerm %v", podName(pod), node.Name, podAffinityTerm) return false } } } } // all the required pod affinity scheduling rules satisfied glog.V(10).Infof("All the required pod affinity scheduling rules are satisfied for Pod %+v, on node %v", podName(pod), node.Name) return true }
func (p *podAffinityPriorityMap) processTerm(term *v1.PodAffinityTerm, podDefiningAffinityTerm, podToCheck *v1.Pod, fixedNode *v1.Node, weight float64) { namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(podDefiningAffinityTerm, term) selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector) if err != nil { p.setError(err) return } match := priorityutil.PodMatchesTermsNamespaceAndSelector(podToCheck, namespaces, selector) if match { func() { p.Lock() defer p.Unlock() for _, node := range p.nodes { if p.failureDomains.NodesHaveSameTopologyKey(node, fixedNode, term.TopologyKey) { p.counts[node.Name] += weight } } }() } }
// AnyPodMatchesPodAffinityTerm checks if any of given pods can match the specific podAffinityTerm. // First return value indicates whether a matching pod exists on a node that matches the topology key, // while the second return value indicates whether a matching pod exists anywhere. // TODO: Do we really need any pod matching, or all pods matching? I think the latter. func (c *PodAffinityChecker) anyPodMatchesPodAffinityTerm(pod *v1.Pod, allPods []*v1.Pod, node *v1.Node, term *v1.PodAffinityTerm) (bool, bool, error) { matchingPodExists := false namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, term) selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector) if err != nil { return false, false, err } for _, existingPod := range allPods { match := priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) if match { matchingPodExists = true existingPodNode, err := c.info.GetNodeInfo(existingPod.Spec.NodeName) if err != nil { return false, matchingPodExists, err } if c.failureDomains.NodesHaveSameTopologyKey(node, existingPodNode, term.TopologyKey) { return true, matchingPodExists, nil } } } return false, matchingPodExists, nil }
// Checks whether the given node has pods which satisfy all the // required pod anti-affinity scheduling rules. // Also checks whether putting the pod onto the node would break // any anti-affinity scheduling rules indicated by existing pods. // If node has pods which satisfy all the required pod anti-affinity // scheduling rules and scheduling the pod onto the node won't // break any existing pods' anti-affinity rules, then return true. func (checker *PodAffinityChecker) NodeMatchesHardPodAntiAffinity(pod *api.Pod, allPods []*api.Pod, node *api.Node, podAntiAffinity *api.PodAntiAffinity) bool { var podAntiAffinityTerms []api.PodAffinityTerm if len(podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 { podAntiAffinityTerms = podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution } // TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution. //if len(podAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 { // podAntiAffinityTerms = append(podAntiAffinityTerms, podAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution...) //} // foreach element podAntiAffinityTerm of podAntiAffinityTerms // if the pod matches the term (breaks the anti-affinity), // don't schedule the pod onto this node. for _, podAntiAffinityTerm := range podAntiAffinityTerms { podAntiAffinityTermMatches, err := checker.AnyPodMatchesPodAffinityTerm(pod, allPods, node, podAntiAffinityTerm) if err != nil || podAntiAffinityTermMatches == true { glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because not all the existing pods on this node satisfy the PodAntiAffinityTerm %v, err: %v", podName(pod), node.Name, podAntiAffinityTerm, err) return false } } // Check if scheduling the pod onto this node would break // any anti-affinity rules indicated by the existing pods on the node. // If it would break, system should not schedule pod onto this node. for _, ep := range allPods { epAffinity, err := api.GetAffinityFromPodAnnotations(ep.Annotations) if err != nil { glog.V(10).Infof("Failed to get Affinity from Pod %+v, err: %+v", podName(pod), err) return false } if epAffinity.PodAntiAffinity != nil { var epAntiAffinityTerms []api.PodAffinityTerm if len(epAffinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 { epAntiAffinityTerms = epAffinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution } // TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution. //if len(epAffinity.PodAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 { // epAntiAffinityTerms = append(epAntiAffinityTerms, epAffinity.PodAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution...) //} for _, epAntiAffinityTerm := range epAntiAffinityTerms { labelSelector, err := unversioned.LabelSelectorAsSelector(epAntiAffinityTerm.LabelSelector) if err != nil { glog.V(10).Infof("Failed to get label selector from anti-affinityterm %+v of existing pod %+v, err: %+v", epAntiAffinityTerm, podName(pod), err) return false } names := priorityutil.GetNamespacesFromPodAffinityTerm(ep, epAntiAffinityTerm) if (len(names) == 0 || names.Has(pod.Namespace)) && labelSelector.Matches(labels.Set(pod.Labels)) { epNode, err := checker.info.GetNodeInfo(ep.Spec.NodeName) if err != nil || checker.failureDomains.NodesHaveSameTopologyKey(node, epNode, epAntiAffinityTerm.TopologyKey) { glog.V(10).Infof("Cannot schedule Pod %+v, onto node %v because the pod would break the PodAntiAffinityTerm %+v, of existing pod %+v, err: %v", podName(pod), node.Name, epAntiAffinityTerm, podName(ep), err) return false } } } } } // all the required pod anti-affinity scheduling rules are satisfied glog.V(10).Infof("Can schedule Pod %+v, on node %v because all the required pod anti-affinity scheduling rules are satisfied", podName(pod), node.Name) return true }