Ejemplo n.º 1
0
// MapPodsToMachines obtains a list of pods and pivots that list into a map where the keys are host names
// and the values are the list of pods running on that host.
func MapPodsToMachines(lister algorithm.PodLister) (map[string][]*api.Pod, error) {
	machineToPods := map[string][]*api.Pod{}
	// TODO: perform more targeted query...
	pods, err := lister.List(labels.Everything())
	if err != nil {
		return map[string][]*api.Pod{}, err
	}
	for _, scheduledPod := range pods {
		host := scheduledPod.Spec.NodeName
		machineToPods[host] = append(machineToPods[host], scheduledPod)
	}
	return machineToPods, nil
}
Ejemplo n.º 2
0
// CalculateAntiAffinityPriority spreads pods by minimizing the number of pods belonging to the same service
// on machines with the same value for a particular label.
// The label to be considered is provided to the struct (ServiceAntiAffinity).
func (s *ServiceAntiAffinity) CalculateAntiAffinityPriority(pod *api.Pod, podLister algorithm.PodLister, nodeLister algorithm.NodeLister) (algorithm.HostPriorityList, error) {
	var nsServicePods []*api.Pod

	services, err := s.serviceLister.GetPodServices(pod)
	if err == nil {
		// just use the first service and get the other pods within the service
		// TODO: a separate predicate can be created that tries to handle all services for the pod
		selector := labels.SelectorFromSet(services[0].Spec.Selector)
		pods, err := podLister.List(selector)
		if err != nil {
			return nil, err
		}
		// consider only the pods that belong to the same namespace
		for _, nsPod := range pods {
			if nsPod.Namespace == pod.Namespace {
				nsServicePods = append(nsServicePods, nsPod)
			}
		}
	}

	nodes, err := nodeLister.List()
	if err != nil {
		return nil, err
	}

	// separate out the nodes that have the label from the ones that don't
	otherNodes := []string{}
	labeledNodes := map[string]string{}
	for _, node := range nodes.Items {
		if labels.Set(node.Labels).Has(s.label) {
			label := labels.Set(node.Labels).Get(s.label)
			labeledNodes[node.Name] = label
		} else {
			otherNodes = append(otherNodes, node.Name)
		}
	}

	podCounts := map[string]int{}
	for _, pod := range nsServicePods {
		label, exists := labeledNodes[pod.Spec.NodeName]
		if !exists {
			continue
		}
		podCounts[label]++
	}

	numServicePods := len(nsServicePods)
	result := []algorithm.HostPriority{}
	//score int - scale of 0-10
	// 0 being the lowest priority and 10 being the highest
	for node := range labeledNodes {
		// initializing to the default/max node score of 10
		fScore := float32(10)
		if numServicePods > 0 {
			fScore = 10 * (float32(numServicePods-podCounts[labeledNodes[node]]) / float32(numServicePods))
		}
		result = append(result, algorithm.HostPriority{Host: node, Score: int(fScore)})
	}
	// add the open nodes with a score of 0
	for _, node := range otherNodes {
		result = append(result, algorithm.HostPriority{Host: node, Score: 0})
	}

	return result, nil
}
Ejemplo n.º 3
0
// CalculateSpreadPriority spreads pods by minimizing the number of pods belonging to the same service or replication controller. It counts number of pods that run under
// Services or RCs as the pod being scheduled and tries to minimize the number of conflicts. I.e. pushes scheduler towards a Node where there's a smallest number of
// pods which match the same selectors of Services and RCs as current pod.
func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, podLister algorithm.PodLister, nodeLister algorithm.NodeLister) (algorithm.HostPriorityList, error) {
	var maxCount int
	var nsPods []*api.Pod

	selectors := make([]labels.Selector, 0)
	services, err := s.serviceLister.GetPodServices(pod)
	if err == nil {
		for _, service := range services {
			selectors = append(selectors, labels.SelectorFromSet(service.Spec.Selector))
		}
	}
	controllers, err := s.controllerLister.GetPodControllers(pod)
	if err == nil {
		for _, controller := range controllers {
			selectors = append(selectors, labels.SelectorFromSet(controller.Spec.Selector))
		}
	}

	if len(selectors) > 0 {
		pods, err := podLister.List(labels.Everything())
		if err != nil {
			return nil, err
		}
		// consider only the pods that belong to the same namespace
		for _, nsPod := range pods {
			if nsPod.Namespace == pod.Namespace {
				nsPods = append(nsPods, nsPod)
			}
		}
	}

	nodes, err := nodeLister.List()
	if err != nil {
		return nil, err
	}

	counts := map[string]int{}
	if len(nsPods) > 0 {
		for _, pod := range nsPods {
			matches := false
			for _, selector := range selectors {
				if selector.Matches(labels.Set(pod.ObjectMeta.Labels)) {
					matches = true
					break
				}
			}
			if matches {
				counts[pod.Spec.NodeName]++
				// Compute the maximum number of pods hosted on any node
				if counts[pod.Spec.NodeName] > maxCount {
					maxCount = counts[pod.Spec.NodeName]
				}
			}
		}
	}

	result := []algorithm.HostPriority{}
	//score int - scale of 0-10
	// 0 being the lowest priority and 10 being the highest
	for _, node := range nodes.Items {
		// initializing to the default/max node score of 10
		fScore := float32(10)
		if maxCount > 0 {
			fScore = 10 * (float32(maxCount-counts[node.Name]) / float32(maxCount))
		}
		result = append(result, algorithm.HostPriority{Host: node.Name, Score: int(fScore)})
		glog.V(10).Infof(
			"%v -> %v: SelectorSpreadPriority, Score: (%d)", pod.Name, node.Name, int(fScore),
		)
	}
	return result, nil
}
// CalculateSpreadPriority spreads pods across hosts and zones, considering pods belonging to the same service or replication controller.
// When a pod is scheduled, it looks for services or RCs that match the pod, then finds existing pods that match those selectors.
// It favors nodes that have fewer existing matching pods.
// i.e. it pushes the scheduler towards a node where there's the smallest number of
// pods which match the same service selectors or RC selectors as the pod being scheduled.
// Where zone information is included on the nodes, it favors nodes in zones with fewer existing matching pods.
func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, machinesToPods map[string][]*api.Pod, podLister algorithm.PodLister, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
	var nsPods []*api.Pod

	selectors := make([]labels.Selector, 0)
	services, err := s.serviceLister.GetPodServices(pod)
	if err == nil {
		for _, service := range services {
			selectors = append(selectors, labels.SelectorFromSet(service.Spec.Selector))
		}
	}
	controllers, err := s.controllerLister.GetPodControllers(pod)
	if err == nil {
		for _, controller := range controllers {
			selectors = append(selectors, labels.SelectorFromSet(controller.Spec.Selector))
		}
	}

	if len(selectors) > 0 {
		pods, err := podLister.List(labels.Everything())
		if err != nil {
			return nil, err
		}
		// consider only the pods that belong to the same namespace
		for _, nsPod := range pods {
			if nsPod.Namespace == pod.Namespace {
				nsPods = append(nsPods, nsPod)
			}
		}
	}

	nodes, err := nodeLister.List()
	if err != nil {
		return nil, err
	}

	// Count similar pods by node
	countsByNodeName := map[string]int{}
	for _, pod := range nsPods {
		// When we are replacing a failed pod, we often see the previous deleted version
		// while scheduling the replacement.  Ignore the previous deleted version for spreading
		// purposes (it can still be considered for resource restrictions etc.)
		if pod.DeletionTimestamp != nil {
			glog.V(2).Infof("skipping pending-deleted pod: %s/%s", pod.Namespace, pod.Name)
			continue
		}
		matches := false
		for _, selector := range selectors {
			if selector.Matches(labels.Set(pod.ObjectMeta.Labels)) {
				matches = true
				break
			}
		}
		if !matches {
			continue
		}

		countsByNodeName[pod.Spec.NodeName]++
	}

	// Aggregate by-node information
	// Compute the maximum number of pods hosted on any node
	maxCountByNodeName := 0
	for _, count := range countsByNodeName {
		if count > maxCountByNodeName {
			maxCountByNodeName = count
		}
	}

	// Count similar pods by zone, if zone information is present
	countsByZone := map[string]int{}
	for i := range nodes.Items {
		node := &nodes.Items[i]

		count, found := countsByNodeName[node.Name]
		if !found {
			continue
		}

		zoneId := getZoneKey(node)
		if zoneId == "" {
			continue
		}

		countsByZone[zoneId] += count
	}

	// Aggregate by-zone information
	// Compute the maximum number of pods hosted in any zone
	haveZones := len(countsByZone) != 0
	maxCountByZone := 0
	for _, count := range countsByZone {
		if count > maxCountByZone {
			maxCountByZone = count
		}
	}

	result := []schedulerapi.HostPriority{}
	//score int - scale of 0-maxPriority
	// 0 being the lowest priority and maxPriority being the highest
	for i := range nodes.Items {
		node := &nodes.Items[i]
		// initializing to the default/max node score of maxPriority
		fScore := float32(maxPriority)
		if maxCountByNodeName > 0 {
			fScore = maxPriority * (float32(maxCountByNodeName-countsByNodeName[node.Name]) / float32(maxCountByNodeName))
		}

		// If there is zone information present, incorporate it
		if haveZones {
			zoneId := getZoneKey(node)
			if zoneId != "" {
				zoneScore := maxPriority * (float32(maxCountByZone-countsByZone[zoneId]) / float32(maxCountByZone))
				fScore = (fScore * (1.0 - zoneWeighting)) + (zoneWeighting * zoneScore)
			}
		}

		result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
		glog.V(10).Infof(
			"%v -> %v: SelectorSpreadPriority, Score: (%d)", pod.Name, node.Name, int(fScore),
		)
	}
	return result, nil
}