Example #1
0
func (c *PodAffinityChecker) getMatchingAntiAffinityTerms(pod *v1.Pod, allPods []*v1.Pod) ([]matchingPodAntiAffinityTerm, error) {
	var result []matchingPodAntiAffinityTerm
	for _, existingPod := range allPods {
		affinity, err := v1.GetAffinityFromPodAnnotations(existingPod.Annotations)
		if err != nil {
			return nil, err
		}
		if affinity != nil && affinity.PodAntiAffinity != nil {
			existingPodNode, err := c.info.GetNodeInfo(existingPod.Spec.NodeName)
			if err != nil {
				return nil, err
			}
			for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) {
				namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(existingPod, &term)
				selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
				if err != nil {
					return nil, err
				}
				match := priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector)
				if match {
					result = append(result, matchingPodAntiAffinityTerm{term: &term, node: existingPodNode})
				}
			}
		}
	}
	return result, nil
}
Example #2
0
func getMatchingAntiAffinityTerms(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) ([]matchingPodAntiAffinityTerm, error) {
	allNodeNames := make([]string, 0, len(nodeInfoMap))
	for name := range nodeInfoMap {
		allNodeNames = append(allNodeNames, name)
	}

	var lock sync.Mutex
	var result []matchingPodAntiAffinityTerm
	var firstError error
	appendResult := func(toAppend []matchingPodAntiAffinityTerm) {
		lock.Lock()
		defer lock.Unlock()
		result = append(result, toAppend...)
	}
	catchError := func(err error) {
		lock.Lock()
		defer lock.Unlock()
		if firstError == nil {
			firstError = err
		}
	}

	processNode := func(i int) {
		nodeInfo := nodeInfoMap[allNodeNames[i]]
		node := nodeInfo.Node()
		if node == nil {
			catchError(fmt.Errorf("node not found"))
			return
		}
		var nodeResult []matchingPodAntiAffinityTerm
		for _, existingPod := range nodeInfo.PodsWithAffinity() {
			affinity, err := v1.GetAffinityFromPodAnnotations(existingPod.Annotations)
			if err != nil {
				catchError(err)
				return
			}
			if affinity == nil {
				continue
			}
			for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) {
				namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
				selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
				if err != nil {
					catchError(err)
					return
				}
				match := priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector)
				if match {
					nodeResult = append(nodeResult, matchingPodAntiAffinityTerm{term: &term, node: node})
				}
			}
		}
		if len(nodeResult) > 0 {
			appendResult(nodeResult)
		}
	}
	workqueue.Parallelize(16, len(allNodeNames), processNode)
	return result, firstError
}
Example #3
0
// Checks if scheduling the pod onto this node would break any rules of this pod.
func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, node *v1.Node, affinity *v1.Affinity) bool {
	allPods, err := c.podLister.List(labels.Everything())
	if err != nil {
		return false
	}

	// Check all affinity terms.
	for _, term := range getPodAffinityTerms(affinity.PodAffinity) {
		termMatches, matchingPodExists, err := c.anyPodMatchesPodAffinityTerm(pod, allPods, node, &term)
		if err != nil {
			glog.V(10).Infof("Cannot schedule pod %+v onto node %v,because of PodAffinityTerm %v, err: %v",
				podName(pod), node.Name, term, err)
			return false
		}
		if !termMatches {
			// If the requirement matches a pod's own labels are namespace, and there are
			// no other such pods, then disregard the requirement. This is necessary to
			// not block forever because the first pod of the collection can't be scheduled.
			if matchingPodExists {
				glog.V(10).Infof("Cannot schedule pod %+v onto node %v,because of PodAffinityTerm %v, err: %v",
					podName(pod), node.Name, term, err)
				return false
			}
			namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
			selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
			if err != nil {
				glog.V(10).Infof("Cannot parse selector on term %v for pod %v. Details %v",
					term, podName(pod), err)
				return false
			}
			match := priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector)
			if !match {
				glog.V(10).Infof("Cannot schedule pod %+v onto node %v,because of PodAffinityTerm %v, err: %v",
					podName(pod), node.Name, term, err)
				return false
			}
		}
	}

	// Check all anti-affinity terms.
	for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) {
		termMatches, _, err := c.anyPodMatchesPodAffinityTerm(pod, allPods, node, &term)
		if err != nil || termMatches {
			glog.V(10).Infof("Cannot schedule pod %+v onto node %v,because of PodAntiAffinityTerm %v, err: %v",
				podName(pod), node.Name, term, err)
			return false
		}
	}

	if glog.V(10) {
		// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
		// not logged. There is visible performance gain from it.
		glog.Infof("Schedule Pod %+v on Node %+v is allowed, pod afinnity/anti-affinity constraints satisfied.",
			podName(pod), node.Name)
	}
	return true
}
// TODO: Share it with predicates by moving to better location.
// TODO: Can we avoid error handling here - this is only a matter of non-parsable selector?
func podMatchesNamespaceAndSelector(pod *api.Pod, affinityPod *api.Pod, term *api.PodAffinityTerm) (bool, error) {
	namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(affinityPod, *term)
	if len(namespaces) != 0 && !namespaces.Has(pod.Namespace) {
		return false, nil
	}

	selector, err := unversioned.LabelSelectorAsSelector(term.LabelSelector)
	if err != nil || !selector.Matches(labels.Set(pod.Labels)) {
		return false, err
	}
	return true, nil
}
Example #5
0
// Checks whether the given node has pods which satisfy all the required pod affinity scheduling rules.
// If node has pods which satisfy all the required pod affinity scheduling rules then return true.
func (checker *PodAffinityChecker) NodeMatchesHardPodAffinity(pod *api.Pod, allPods []*api.Pod, node *api.Node, podAffinity *api.PodAffinity) bool {
	var podAffinityTerms []api.PodAffinityTerm
	if len(podAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
		podAffinityTerms = podAffinity.RequiredDuringSchedulingIgnoredDuringExecution
	}
	// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
	//if len(podAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
	//	podAffinityTerms = append(podAffinityTerms, podAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
	//}

	for _, podAffinityTerm := range podAffinityTerms {
		podAffinityTermMatches, err := checker.AnyPodMatchesPodAffinityTerm(pod, allPods, node, podAffinityTerm)
		if err != nil {
			glog.V(10).Infof("Cannot schedule pod %+v onto node %v, an error ocurred when checking existing pods on the node for PodAffinityTerm %v err: %v",
				podName(pod), node.Name, podAffinityTerm, err)
			return false
		}

		if !podAffinityTermMatches {
			// TODO: Think about whether this can be simplified once we have controllerRef
			// Check if it is in special case that the requiredDuringScheduling affinity requirement can be disregarded.
			// If the requiredDuringScheduling affinity requirement matches a pod's own labels and namespace, and there are no other such pods
			// anywhere, then disregard the requirement.
			// This allows rules like "schedule all of the pods of this collection to the same zone" to not block forever
			// because the first pod of the collection can't be scheduled.
			names := priorityutil.GetNamespacesFromPodAffinityTerm(pod, podAffinityTerm)
			labelSelector, err := unversioned.LabelSelectorAsSelector(podAffinityTerm.LabelSelector)
			if err != nil || !names.Has(pod.Namespace) || !labelSelector.Matches(labels.Set(pod.Labels)) {
				glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because none of the existing pods on this node satisfy the PodAffinityTerm %v, err: %+v",
					podName(pod), node.Name, podAffinityTerm, err)
				return false
			}

			// the affinity is to put the pod together with other pods from its same service or controller
			filteredPods := priorityutil.FilterPodsByNameSpaces(names, allPods)
			for _, filteredPod := range filteredPods {
				// if found an existing pod from same service or RC,
				// the affinity scheduling rules cannot be disregarded.
				if labelSelector.Matches(labels.Set(filteredPod.Labels)) {
					glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because none of the existing pods on this node satisfy the PodAffinityTerm %v",
						podName(pod), node.Name, podAffinityTerm)
					return false
				}
			}
		}
	}
	// all the required pod affinity scheduling rules satisfied
	glog.V(10).Infof("All the required pod affinity scheduling rules are satisfied for Pod %+v, on node %v", podName(pod), node.Name)
	return true
}
func (p *podAffinityPriorityMap) processTerm(term *v1.PodAffinityTerm, podDefiningAffinityTerm, podToCheck *v1.Pod, fixedNode *v1.Node, weight float64) {
	namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(podDefiningAffinityTerm, term)
	selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
	if err != nil {
		p.setError(err)
		return
	}
	match := priorityutil.PodMatchesTermsNamespaceAndSelector(podToCheck, namespaces, selector)
	if match {
		func() {
			p.Lock()
			defer p.Unlock()
			for _, node := range p.nodes {
				if p.failureDomains.NodesHaveSameTopologyKey(node, fixedNode, term.TopologyKey) {
					p.counts[node.Name] += weight
				}
			}
		}()
	}
}
Example #7
0
// AnyPodMatchesPodAffinityTerm checks if any of given pods can match the specific podAffinityTerm.
// First return value indicates whether a matching pod exists on a node that matches the topology key,
// while the second return value indicates whether a matching pod exists anywhere.
// TODO: Do we really need any pod matching, or all pods matching? I think the latter.
func (c *PodAffinityChecker) anyPodMatchesPodAffinityTerm(pod *v1.Pod, allPods []*v1.Pod, node *v1.Node, term *v1.PodAffinityTerm) (bool, bool, error) {
	matchingPodExists := false
	namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, term)
	selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
	if err != nil {
		return false, false, err
	}
	for _, existingPod := range allPods {
		match := priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector)
		if match {
			matchingPodExists = true
			existingPodNode, err := c.info.GetNodeInfo(existingPod.Spec.NodeName)
			if err != nil {
				return false, matchingPodExists, err
			}
			if c.failureDomains.NodesHaveSameTopologyKey(node, existingPodNode, term.TopologyKey) {
				return true, matchingPodExists, nil
			}
		}
	}
	return false, matchingPodExists, nil
}
Example #8
0
// Checks whether the given node has pods which satisfy all the
// required pod anti-affinity scheduling rules.
// Also checks whether putting the pod onto the node would break
// any anti-affinity scheduling rules indicated by existing pods.
// If node has pods which satisfy all the required pod anti-affinity
// scheduling rules and scheduling the pod onto the node won't
// break any existing pods' anti-affinity rules, then return true.
func (checker *PodAffinityChecker) NodeMatchesHardPodAntiAffinity(pod *api.Pod, allPods []*api.Pod, node *api.Node, podAntiAffinity *api.PodAntiAffinity) bool {
	var podAntiAffinityTerms []api.PodAffinityTerm
	if len(podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
		podAntiAffinityTerms = podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution
	}
	// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
	//if len(podAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
	//	podAntiAffinityTerms = append(podAntiAffinityTerms, podAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
	//}

	// foreach element podAntiAffinityTerm of podAntiAffinityTerms
	// if the pod matches the term (breaks the anti-affinity),
	// don't schedule the pod onto this node.
	for _, podAntiAffinityTerm := range podAntiAffinityTerms {
		podAntiAffinityTermMatches, err := checker.AnyPodMatchesPodAffinityTerm(pod, allPods, node, podAntiAffinityTerm)
		if err != nil || podAntiAffinityTermMatches == true {
			glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because not all the existing pods on this node satisfy the PodAntiAffinityTerm %v, err: %v",
				podName(pod), node.Name, podAntiAffinityTerm, err)
			return false
		}
	}

	// Check if scheduling the pod onto this node would break
	// any anti-affinity rules indicated by the existing pods on the node.
	// If it would break, system should not schedule pod onto this node.
	for _, ep := range allPods {
		epAffinity, err := api.GetAffinityFromPodAnnotations(ep.Annotations)
		if err != nil {
			glog.V(10).Infof("Failed to get Affinity from Pod %+v, err: %+v", podName(pod), err)
			return false
		}
		if epAffinity.PodAntiAffinity != nil {
			var epAntiAffinityTerms []api.PodAffinityTerm
			if len(epAffinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
				epAntiAffinityTerms = epAffinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution
			}
			// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
			//if len(epAffinity.PodAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
			//	epAntiAffinityTerms = append(epAntiAffinityTerms, epAffinity.PodAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
			//}

			for _, epAntiAffinityTerm := range epAntiAffinityTerms {
				labelSelector, err := unversioned.LabelSelectorAsSelector(epAntiAffinityTerm.LabelSelector)
				if err != nil {
					glog.V(10).Infof("Failed to get label selector from anti-affinityterm %+v of existing pod %+v, err: %+v", epAntiAffinityTerm, podName(pod), err)
					return false
				}

				names := priorityutil.GetNamespacesFromPodAffinityTerm(ep, epAntiAffinityTerm)
				if (len(names) == 0 || names.Has(pod.Namespace)) && labelSelector.Matches(labels.Set(pod.Labels)) {
					epNode, err := checker.info.GetNodeInfo(ep.Spec.NodeName)
					if err != nil || checker.failureDomains.NodesHaveSameTopologyKey(node, epNode, epAntiAffinityTerm.TopologyKey) {
						glog.V(10).Infof("Cannot schedule Pod %+v, onto node %v because the pod would break the PodAntiAffinityTerm %+v, of existing pod %+v, err: %v",
							podName(pod), node.Name, epAntiAffinityTerm, podName(ep), err)
						return false
					}
				}
			}
		}
	}
	// all the required pod anti-affinity scheduling rules are satisfied
	glog.V(10).Infof("Can schedule Pod %+v, on node %v because all the required pod anti-affinity scheduling rules are satisfied", podName(pod), node.Name)
	return true
}