func CalculateNodePreferAvoidPodsPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
	node := nodeInfo.Node()
	if node == nil {
		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
	}

	controllerRef := priorityutil.GetControllerRef(pod)
	if controllerRef != nil {
		// Ignore pods that are owned by other controller than ReplicationController
		// or ReplicaSet.
		if controllerRef.Kind != "ReplicationController" && controllerRef.Kind != "ReplicaSet" {
			controllerRef = nil
		}
	}
	if controllerRef == nil {
		return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
	}

	avoids, err := v1.GetAvoidPodsFromNodeAnnotations(node.Annotations)
	if err != nil {
		// If we cannot get annotation, assume it's schedulable there.
		return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
	}
	for i := range avoids.PreferAvoidPods {
		avoid := &avoids.PreferAvoidPods[i]
		if controllerRef != nil {
			if avoid.PodSignature.PodController.Kind == controllerRef.Kind && avoid.PodSignature.PodController.UID == controllerRef.UID {
				return schedulerapi.HostPriority{Host: node.Name, Score: 0}, nil
			}
		}
	}
	return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
}
Example #2
0
// ComputeTaintTolerationPriority prepares the priority list for all the nodes based on the number of intolerable taints on the node
func ComputeTaintTolerationPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
	node := nodeInfo.Node()
	if node == nil {
		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
	}

	var tolerationList []api.Toleration
	if priorityMeta, ok := meta.(*priorityMetadata); ok {
		tolerationList = priorityMeta.podTolerations
	} else {
		var err error
		tolerationList, err = getTolerationListFromPod(pod)
		if err != nil {
			return schedulerapi.HostPriority{}, err
		}
	}

	taints, err := api.GetTaintsFromNodeAnnotations(node.Annotations)
	if err != nil {
		return schedulerapi.HostPriority{}, err
	}
	return schedulerapi.HostPriority{
		Host:  node.Name,
		Score: countIntolerableTaintsPreferNoSchedule(taints, tolerationList),
	}, nil
}
Example #3
0
// CheckNodeDiskPressurePredicate checks if a pod can be scheduled on a node
// reporting disk pressure condition.
func CheckNodeDiskPressurePredicate(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
	// is node under presure?
	if nodeInfo.DiskPressureCondition() == v1.ConditionTrue {
		return false, []algorithm.PredicateFailureReason{ErrNodeUnderDiskPressure}, nil
	}
	return true, nil, nil
}
Example #4
0
// CheckNodeMemoryPressurePredicate checks if a pod can be scheduled on a node
// reporting memory pressure condition.
func CheckNodeMemoryPressurePredicate(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) {
	node := nodeInfo.Node()
	if node == nil {
		return false, fmt.Errorf("node not found")
	}

	var podBestEffort bool

	predicateMeta, ok := meta.(*predicateMetadata)
	if ok {
		podBestEffort = predicateMeta.podBestEffort
	} else {
		// We couldn't parse metadata - fallback to computing it.
		podBestEffort = isPodBestEffort(pod)
	}

	// pod is not BestEffort pod
	if !podBestEffort {
		return true, nil
	}

	// is node under presure?
	for _, cond := range node.Status.Conditions {
		if cond.Type == api.NodeMemoryPressure && cond.Status == api.ConditionTrue {
			return false, ErrNodeUnderMemoryPressure
		}
	}

	return true, nil
}
Example #5
0
func (c *MaxPDVolumeCountChecker) predicate(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) {
	newVolumes := make(map[string]bool)
	if err := c.filterVolumes(pod.Spec.Volumes, pod.Namespace, newVolumes); err != nil {
		return false, err
	}

	// quick return
	if len(newVolumes) == 0 {
		return true, nil
	}

	// count unique volumes
	existingVolumes := make(map[string]bool)
	for _, existingPod := range nodeInfo.Pods() {
		if err := c.filterVolumes(existingPod.Spec.Volumes, existingPod.Namespace, existingVolumes); err != nil {
			return false, err
		}
	}
	numExistingVolumes := len(existingVolumes)

	// filter out already-mounted volumes
	for k := range existingVolumes {
		if _, ok := newVolumes[k]; ok {
			delete(newVolumes, k)
		}
	}

	numNewVolumes := len(newVolumes)

	if numExistingVolumes+numNewVolumes > c.maxVolumes {
		return false, nil
	}

	return true, nil
}
// Calculates score for all pods and returns podInfo structure.
// Score is defined as cpu_sum/node_capacity + mem_sum/node_capacity.
// Pods that have bigger requirements should be processed first, thus have higher scores.
func calculatePodScore(pods []*apiv1.Pod, nodeTemplate *schedulercache.NodeInfo) []*podInfo {
	podInfos := make([]*podInfo, 0, len(pods))

	for _, pod := range pods {
		cpuSum := resource.Quantity{}
		memorySum := resource.Quantity{}

		for _, container := range pod.Spec.Containers {
			if request, ok := container.Resources.Requests[apiv1.ResourceCPU]; ok {
				cpuSum.Add(request)
			}
			if request, ok := container.Resources.Requests[apiv1.ResourceMemory]; ok {
				memorySum.Add(request)
			}
		}
		score := float64(0)
		if cpuAllocatable, ok := nodeTemplate.Node().Status.Allocatable[apiv1.ResourceCPU]; ok && cpuAllocatable.MilliValue() > 0 {
			score += float64(cpuSum.MilliValue()) / float64(cpuAllocatable.MilliValue())
		}
		if memAllocatable, ok := nodeTemplate.Node().Status.Allocatable[apiv1.ResourceMemory]; ok && memAllocatable.Value() > 0 {
			score += float64(memorySum.Value()) / float64(memAllocatable.Value())
		}

		podInfos = append(podInfos, &podInfo{
			score: score,
			pod:   pod,
		})
	}
	return podInfos
}
Example #7
0
func (checker *PodAffinityChecker) InterPodAffinityMatches(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) {
	node := nodeInfo.Node()
	if node == nil {
		return false, fmt.Errorf("node not found")
	}
	allPods, err := checker.podLister.List(labels.Everything())
	if err != nil {
		return false, err
	}
	affinity, err := api.GetAffinityFromPodAnnotations(pod.Annotations)
	if err != nil {
		return false, err
	}

	// Check if the current node match the inter-pod affinity scheduling constraints.
	// Hard inter-pod affinity is not symmetric, check only when affinity.PodAffinity exists.
	if affinity.PodAffinity != nil {
		if !checker.NodeMatchesHardPodAffinity(pod, allPods, node, affinity.PodAffinity) {
			return false, ErrPodAffinityNotMatch
		}
	}

	// Hard inter-pod anti-affinity is symmetric, we should always check it.
	if !checker.NodeMatchesHardPodAntiAffinity(pod, allPods, node, affinity.PodAntiAffinity) {
		return false, ErrPodAffinityNotMatch
	}

	return true, nil
}
func (c *PodAffinityChecker) InterPodAffinityMatches(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
	node := nodeInfo.Node()
	if node == nil {
		return false, nil, fmt.Errorf("node not found")
	}
	if !c.satisfiesExistingPodsAntiAffinity(pod, meta, node) {
		return false, []algorithm.PredicateFailureReason{ErrPodAffinityNotMatch}, nil
	}

	// Now check if <pod> requirements will be satisfied on this node.
	affinity, err := api.GetAffinityFromPodAnnotations(pod.Annotations)
	if err != nil {
		return false, nil, err
	}
	if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) {
		return true, nil, nil
	}
	if !c.satisfiesPodsAffinityAntiAffinity(pod, node, affinity) {
		return false, []algorithm.PredicateFailureReason{ErrPodAffinityNotMatch}, nil
	}

	if glog.V(10) {
		// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
		// not logged. There is visible performance gain from it.
		glog.Infof("Schedule Pod %+v on Node %+v is allowed, pod (anti)affinity constraints satisfied",
			podName(pod), node.Name)
	}
	return true, nil, nil
}
Example #9
0
// Calculate the resource occupancy on a node.  'node' has information about the resources on the node.
// 'pods' is a list of pods currently scheduled on the node.
func calculateResourceOccupancy(pod *api.Pod, node api.Node, nodeInfo *schedulercache.NodeInfo) schedulerapi.HostPriority {
	totalMilliCPU := nodeInfo.NonZeroRequest().MilliCPU
	totalMemory := nodeInfo.NonZeroRequest().Memory
	capacityMilliCPU := node.Status.Allocatable.Cpu().MilliValue()
	capacityMemory := node.Status.Allocatable.Memory().Value()

	// Add the resources requested by the current pod being scheduled.
	// This also helps differentiate between differently sized, but empty, nodes.
	for _, container := range pod.Spec.Containers {
		cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
		totalMilliCPU += cpu
		totalMemory += memory
	}

	cpuScore := calculateScore(totalMilliCPU, capacityMilliCPU, node.Name)
	memoryScore := calculateScore(totalMemory, capacityMemory, node.Name)
	glog.V(10).Infof(
		"%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
		pod.Name, node.Name,
		capacityMilliCPU, capacityMemory,
		totalMilliCPU, totalMemory,
		cpuScore, memoryScore,
	)

	return schedulerapi.HostPriority{
		Host:  node.Name,
		Score: int((cpuScore + memoryScore) / 2),
	}
}
Example #10
0
// FastGetPodsToMove returns a list of pods that should be moved elsewhere if the node
// is drained. Raises error if there is an unreplicated pod and force option was not specified.
// Based on kubectl drain code. It makes an assumption that RC, DS, Jobs and RS were deleted
// along with their pods (no abandoned pods with dangling created-by annotation). Usefull for fast
// checks. Doesn't check i
func FastGetPodsToMove(nodeInfo *schedulercache.NodeInfo, skipNodesWithSystemPods bool, skipNodesWithLocalStorage bool) ([]*api.Pod, error) {
	return drain.GetPodsForDeletionOnNodeDrain(
		nodeInfo.Pods(),
		api.Codecs.UniversalDecoder(),
		skipNodesWithSystemPods,
		skipNodesWithLocalStorage,
		false,
		nil,
		0)
}
Example #11
0
// NoDiskConflict evaluates if a pod can fit due to the volumes it requests, and those that
// are already mounted. If there is already a volume mounted on that node, another pod that uses the same volume
// can't be scheduled there.
// This is GCE, Amazon EBS, and Ceph RBD specific for now:
// - GCE PD allows multiple mounts as long as they're all read-only
// - AWS EBS forbids any two pods mounting the same volume ID
// - Ceph RBD forbids if any two pods share at least same monitor, and match pool and image.
// TODO: migrate this into some per-volume specific code?
func NoDiskConflict(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) {
	for _, v := range pod.Spec.Volumes {
		for _, ev := range nodeInfo.Pods() {
			if isVolumeConflict(v, ev) {
				return false, ErrDiskConflict
			}
		}
	}
	return true, nil
}
Example #12
0
func PodSelectorMatches(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) {
	node := nodeInfo.Node()
	if node == nil {
		return false, fmt.Errorf("node not found")
	}
	if podMatchesNodeLabels(pod, node) {
		return true, nil
	}
	return false, ErrNodeSelectorNotMatch
}
Example #13
0
// NoDiskConflict evaluates if a pod can fit due to the volumes it requests, and those that
// are already mounted. If there is already a volume mounted on that node, another pod that uses the same volume
// can't be scheduled there.
// This is GCE, Amazon EBS, and Ceph RBD specific for now:
// - GCE PD allows multiple mounts as long as they're all read-only
// - AWS EBS forbids any two pods mounting the same volume ID
// - Ceph RBD forbids if any two pods share at least same monitor, and match pool and image.
// TODO: migrate this into some per-volume specific code?
func NoDiskConflict(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
	for _, v := range pod.Spec.Volumes {
		for _, ev := range nodeInfo.Pods() {
			if isVolumeConflict(v, ev) {
				return false, []algorithm.PredicateFailureReason{ErrDiskConflict}, nil
			}
		}
	}
	return true, nil, nil
}
Example #14
0
// NoDiskConflict evaluates if a pod can fit due to the volumes it requests, and those that
// are already mounted. If there is already a volume mounted on that node, another pod that uses the same volume
// can't be scheduled there.
// This is GCE, Amazon EBS, and Ceph RBD specific for now:
// - GCE PD allows multiple mounts as long as they're all read-only
// - AWS EBS forbids any two pods mounting the same volume ID
// - Ceph RBD forbids if any two pods share at least same monitor, and match pool and image.
// TODO: migrate this into some per-volume specific code?
func NoDiskConflict(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) {
	for _, v := range pod.Spec.Volumes {
		for _, ev := range nodeInfo.Pods() {
			if isVolumeConflict(v, ev) {
				return false, nil
			}
		}
	}
	return true, nil
}
func matchesPredicate(pod *api.Pod, nodeInfo *schedulercache.NodeInfo) (bool, error) {
	node := nodeInfo.Node()
	if node == nil {
		return false, fmt.Errorf("node not found")
	}
	if pod.Name == node.Name {
		return true, nil
	}
	return false, algorithmpredicates.ErrFakePredicate
}
Example #16
0
func PodSelectorMatches(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) {
	node := nodeInfo.Node()
	if node == nil {
		return false, fmt.Errorf("node not found: %q", nodeName)
	}
	if PodMatchesNodeLabels(pod, node) {
		return true, nil
	}
	return false, ErrNodeSelectorNotMatch
}
Example #17
0
func PodSelectorMatches(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
	node := nodeInfo.Node()
	if node == nil {
		return false, nil, fmt.Errorf("node not found")
	}
	if podMatchesNodeLabels(pod, node) {
		return true, nil, nil
	}
	return false, []algorithm.PredicateFailureReason{ErrNodeSelectorNotMatch}, nil
}
func matchesPredicate(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
	node := nodeInfo.Node()
	if node == nil {
		return false, nil, fmt.Errorf("node not found")
	}
	if pod.Name == node.Name {
		return true, nil, nil
	}
	return false, []algorithm.PredicateFailureReason{algorithmpredicates.ErrFakePredicate}, nil
}
Example #19
0
// FastGetPodsToMove returns a list of pods that should be moved elsewhere if the node
// is drained. Raises error if there is an unreplicated pod and force option was not specified.
// Based on kubectl drain code. It makes an assumption that RC, DS, Jobs and RS were deleted
// along with their pods (no abandoned pods with dangling created-by annotation). Usefull for fast
// checks.
func FastGetPodsToMove(nodeInfo *schedulercache.NodeInfo, force bool,
	skipNodesWithSystemPods bool, skipNodesWithLocalStorage bool, decoder runtime.Decoder) ([]*api.Pod, error) {
	pods := make([]*api.Pod, 0)
	unreplicatedPodNames := []string{}
	for _, pod := range nodeInfo.Pods() {
		_, found := pod.ObjectMeta.Annotations[types.ConfigMirrorAnnotationKey]
		if found {
			// Skip mirror pod
			continue
		}
		replicated := false
		daemonsetPod := false

		creatorRef, found := pod.ObjectMeta.Annotations[controller.CreatedByAnnotation]
		if found {
			var sr api.SerializedReference
			if err := runtime.DecodeInto(decoder, []byte(creatorRef), &sr); err != nil {
				return []*api.Pod{}, err
			}
			if sr.Reference.Kind == "ReplicationController" {
				replicated = true
			} else if sr.Reference.Kind == "DaemonSet" {
				daemonsetPod = true
			} else if sr.Reference.Kind == "Job" {
				replicated = true
			} else if sr.Reference.Kind == "ReplicaSet" {
				replicated = true
			}
		}

		if !daemonsetPod && pod.Namespace == "kube-system" && skipNodesWithSystemPods {
			return []*api.Pod{}, fmt.Errorf("non-deamons set, non-mirrored, kube-system pod present: %s", pod.Name)
		}

		if !daemonsetPod && hasLocalStorage(pod) && skipNodesWithLocalStorage {
			return []*api.Pod{}, fmt.Errorf("pod with local storage present: %s", pod.Name)
		}

		switch {
		case daemonsetPod:
			break
		case !replicated:
			unreplicatedPodNames = append(unreplicatedPodNames, pod.Name)
			if force {
				pods = append(pods, pod)
			}
		default:
			pods = append(pods, pod)
		}
	}
	if !force && len(unreplicatedPodNames) > 0 {
		return []*api.Pod{}, fmt.Errorf("unreplicated pods present")
	}
	return pods, nil
}
Example #20
0
// DetailedGetPodsForMove returns a list of pods that should be moved elsewhere if the node
// is drained. Raises error if there is an unreplicated pod and force option was not specified.
// Based on kubectl drain code. It checks whether RC, DS, Jobs and RS that created these pods
// still exist.
func DetailedGetPodsForMove(nodeInfo *schedulercache.NodeInfo, skipNodesWithSystemPods bool,
	skipNodesWithLocalStorage bool, client *unversionedclient.Client, minReplicaCount int32) ([]*api.Pod, error) {
	return drain.GetPodsForDeletionOnNodeDrain(
		nodeInfo.Pods(),
		api.Codecs.UniversalDecoder(),
		skipNodesWithSystemPods,
		skipNodesWithLocalStorage,
		true,
		client,
		minReplicaCount)
}
Example #21
0
func PodFitsHost(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) {
	if len(pod.Spec.NodeName) == 0 {
		return true, nil
	}
	node := nodeInfo.Node()
	if node == nil {
		return false, fmt.Errorf("node not found")
	}
	if pod.Spec.NodeName == node.Name {
		return true, nil
	}
	return false, ErrPodNotMatchHostName
}
Example #22
0
func PodFitsHost(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
	if len(pod.Spec.NodeName) == 0 {
		return true, nil, nil
	}
	node := nodeInfo.Node()
	if node == nil {
		return false, nil, fmt.Errorf("node not found")
	}
	if pod.Spec.NodeName == node.Name {
		return true, nil, nil
	}
	return false, []algorithm.PredicateFailureReason{ErrPodNotMatchHostName}, nil
}
Example #23
0
// FastGetPodsToMove returns a list of pods that should be moved elsewhere if the node
// is drained. Raises error if there is an unreplicated pod and force option was not specified.
// Based on kubectl drain code. It makes an assumption that RC, DS, Jobs and RS were deleted
// along with their pods (no abandoned pods with dangling created-by annotation). Usefull for fast
// checks.
func FastGetPodsToMove(nodeInfo *schedulercache.NodeInfo, force bool,
	skipNodesWithSystemPods bool, skipNodesWithLocalStorage bool) ([]*api.Pod, error) {
	pods := make([]*api.Pod, 0)
	unreplicatedPodNames := []string{}
	for _, pod := range nodeInfo.Pods() {
		if IsMirrorPod(pod) {
			continue
		}

		replicated := false
		daemonsetPod := false

		creatorKind, err := CreatorRefKind(pod)
		if err != nil {
			return []*api.Pod{}, err
		}
		if creatorKind == "ReplicationController" {
			replicated = true
		} else if creatorKind == "DaemonSet" {
			daemonsetPod = true
		} else if creatorKind == "Job" {
			replicated = true
		} else if creatorKind == "ReplicaSet" {
			replicated = true
		}

		if !daemonsetPod && pod.Namespace == "kube-system" && skipNodesWithSystemPods {
			return []*api.Pod{}, fmt.Errorf("non-deamons set, non-mirrored, kube-system pod present: %s", pod.Name)
		}

		if !daemonsetPod && hasLocalStorage(pod) && skipNodesWithLocalStorage {
			return []*api.Pod{}, fmt.Errorf("pod with local storage present: %s", pod.Name)
		}

		switch {
		case daemonsetPod:
			break
		case !replicated:
			unreplicatedPodNames = append(unreplicatedPodNames, pod.Name)
			if force {
				pods = append(pods, pod)
			}
		default:
			pods = append(pods, pod)
		}
	}
	if !force && len(unreplicatedPodNames) > 0 {
		return []*api.Pod{}, fmt.Errorf("unreplicated pods present")
	}
	return pods, nil
}
Example #24
0
func (r *NodeStatus) PodFitsResources(pod *api.Pod, nodeName string, nodeInfo *schedulercache.NodeInfo) (bool, error) {
	info, err := r.info.GetNodeInfo(nodeName)
	if err != nil {
		return false, err
	}
	// TODO: move the following podNumber check to podFitsResourcesInternal when Kubelet allows podNumber check (See #20263).
	allocatable := info.Status.Allocatable
	allowedPodNumber := allocatable.Pods().Value()
	if int64(len(nodeInfo.Pods()))+1 > allowedPodNumber {
		return false,
			newInsufficientResourceError(podCountResourceName, 1, int64(len(nodeInfo.Pods())), allowedPodNumber)
	}
	return podFitsResourcesInternal(pod, nodeName, nodeInfo, info)
}
Example #25
0
// Calculate the resource used on a node.  'node' has information about the resources on the node.
// 'pods' is a list of pods currently scheduled on the node.
func calculateUsedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
	node := nodeInfo.Node()
	if node == nil {
		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
	}

	allocatableResources := nodeInfo.AllocatableResource()
	totalResources := *podRequests
	totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
	totalResources.Memory += nodeInfo.NonZeroRequest().Memory

	cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
	memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
	if glog.V(10) {
		// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
		// not logged. There is visible performance gain from it.
		glog.V(10).Infof(
			"%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
			pod.Name, node.Name,
			allocatableResources.MilliCPU, allocatableResources.Memory,
			totalResources.MilliCPU, totalResources.Memory,
			cpuScore, memoryScore,
		)
	}

	return schedulerapi.HostPriority{
		Host:  node.Name,
		Score: int((cpuScore + memoryScore) / 2),
	}, nil
}
Example #26
0
func (checker *PodAffinityChecker) InterPodAffinityMatches(pod *api.Pod, nodeInfo *schedulercache.NodeInfo) (bool, error) {
	node := nodeInfo.Node()
	if node == nil {
		return false, fmt.Errorf("node not found")
	}
	allPods, err := checker.podLister.List(labels.Everything())
	if err != nil {
		return false, err
	}
	if checker.NodeMatchPodAffinityAntiAffinity(pod, allPods, node) {
		return true, nil
	}
	return false, ErrPodAffinityNotMatch
}
Example #27
0
// ImageLocalityPriority is a priority function that favors nodes that already have requested pod container's images.
// It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10
// based on the total size of those images.
// - If none of the images are present, this node will be given the lowest priority.
// - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority.
func ImageLocalityPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
	node := nodeInfo.Node()
	if node == nil {
		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
	}

	var sumSize int64
	for i := range pod.Spec.Containers {
		sumSize += checkContainerImageOnNode(node, &pod.Spec.Containers[i])
	}
	return schedulerapi.HostPriority{
		Host:  node.Name,
		Score: calculateScoreFromSize(sumSize),
	}, nil
}
Example #28
0
// CheckPredicates Checks if the given pod can be placed on the given node.
func (p *PredicateChecker) CheckPredicates(pod *kube_api.Pod, nodeInfo *schedulercache.NodeInfo) error {
	// TODO(fgrzadkowski): Use full list of predicates.
	match, err := predicates.GeneralPredicates(pod, nodeInfo)
	nodename := "unknown"
	if nodeInfo.Node() != nil {
		nodename = nodeInfo.Node().Name
	}
	if err != nil {
		return fmt.Errorf("cannot put %s on %s due to %v", pod.Name, nodename, err)
	}
	if !match {
		return fmt.Errorf("cannot put %s on %s", pod.Name, nodename)
	}
	return nil
}
Example #29
0
// CheckNodeInodePressurePredicate checks if a pod can be scheduled on a node
// reporting inode pressure condition.
func CheckNodeInodePressurePredicate(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
	node := nodeInfo.Node()
	if node == nil {
		return false, nil, fmt.Errorf("node not found")
	}

	// is node under presure?
	for _, cond := range node.Status.Conditions {
		if cond.Type == api.NodeInodePressure && cond.Status == api.ConditionTrue {
			return false, []algorithm.PredicateFailureReason{ErrNodeUnderInodePressure}, nil
		}
	}

	return true, nil, nil
}
Example #30
0
// CheckNodeLabelPresence checks whether all of the specified labels exists on a node or not, regardless of their value
// If "presence" is false, then returns false if any of the requested labels matches any of the node's labels,
// otherwise returns true.
// If "presence" is true, then returns false if any of the requested labels does not match any of the node's labels,
// otherwise returns true.
//
// Consider the cases where the nodes are placed in regions/zones/racks and these are identified by labels
// In some cases, it is required that only nodes that are part of ANY of the defined regions/zones/racks be selected
//
// Alternately, eliminating nodes that have a certain label, regardless of value, is also useful
// A node may have a label with "retiring" as key and the date as the value
// and it may be desirable to avoid scheduling new pods on this node
func (n *NodeLabelChecker) CheckNodeLabelPresence(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) {
	node := nodeInfo.Node()
	if node == nil {
		return false, fmt.Errorf("node not found")
	}

	var exists bool
	nodeLabels := labels.Set(node.Labels)
	for _, label := range n.labels {
		exists = nodeLabels.Has(label)
		if (exists && !n.presence) || (!exists && n.presence) {
			return false, ErrNodeLabelPresenceViolated
		}
	}
	return true, nil
}