// removePods unschedules pods for all scheduled nodes not selected // by ds.nodeSelector func (ds *daemonSet) removePods() error { podLocations, err := ds.CurrentPods() if err != nil { return util.Errorf("Error retrieving pod locations from daemon set: %v", err) } currentNodes := podLocations.Nodes() eligible, err := ds.EligibleNodes() if err != nil { return util.Errorf("Error retrieving eligible nodes for daemon set: %v", err) } // Get the difference in nodes that we need to unschedule on and then sort them // for deterministic ordering toUnscheduleSorted := types.NewNodeSet(currentNodes...).Difference(types.NewNodeSet(eligible...)).ListNodes() ds.logger.NoFields().Infof("Need to unschedule %d nodes", len(toUnscheduleSorted)) ds.cancelReplication() for _, node := range toUnscheduleSorted { err := ds.unschedule(node) if err != nil { return util.Errorf("Error unscheduling node: %v", err) } } ds.logger.Infof("Need to schedule %v nodes", len(currentNodes)) if len(currentNodes)-len(toUnscheduleSorted) > 0 { return ds.PublishToReplication() } return nil }
// addPods schedules pods for all unscheduled nodes selected by ds.nodeSelector func (ds *daemonSet) addPods() error { podLocations, err := ds.CurrentPods() if err != nil { return util.Errorf("Error retrieving pod locations from daemon set: %v", err) } currentNodes := podLocations.Nodes() eligible, err := ds.EligibleNodes() if err != nil { return util.Errorf("Error retrieving eligible nodes for daemon set: %v", err) } // TODO: Grab a lock here for the pod_id before adding something to check // contention and then disable // Get the difference in nodes that we need to schedule on and then sort them // for deterministic ordering toScheduleSorted := types.NewNodeSet(eligible...).Difference(types.NewNodeSet(currentNodes...)).ListNodes() ds.logger.NoFields().Infof("Need to label %d nodes", len(toScheduleSorted)) for _, node := range toScheduleSorted { err := ds.labelPod(node) if err != nil { return util.Errorf("Error labeling node: %v", err) } } ds.logger.Infof("Need to schedule %v nodes", len(currentNodes)) if len(currentNodes) > 0 { return ds.PublishToReplication() } return nil }
func (rc *replicationController) removePods(current types.PodLocations) error { currentNodes := current.Nodes() eligible, err := rc.eligibleNodes() if err != nil { return err } // If we need to downsize the number of nodes, prefer any in current that are not eligible anymore. // TODO: evaluate changes to 'eligible' more frequently preferred := types.NewNodeSet(currentNodes...).Difference(types.NewNodeSet(eligible...)) rest := types.NewNodeSet(currentNodes...).Difference(preferred) toUnschedule := len(current) - rc.ReplicasDesired rc.logger.NoFields().Infof("Need to unschedule %d nodes out of %s", toUnschedule, current) for i := 0; i < toUnschedule; i++ { unscheduleFrom, ok := preferred.PopAny() if !ok { var ok bool unscheduleFrom, ok = rest.PopAny() if !ok { // This should be mathematically impossible unless replicasDesired was negative return util.Errorf( "Unable to unschedule enough nodes to meet replicas desired: %d replicas desired, %d current.", rc.ReplicasDesired, len(current), ) } } err := rc.unschedule(unscheduleFrom) if err != nil { return err } } return nil }
// Returns nodes to be removed and nodes to be added func makeNodeChanges(oldNodeLabels []labels.Labeled, newNodeLabels []labels.Labeled) ([]types.NodeName, []types.NodeName) { var oldNodeNames []types.NodeName var newNodeNames []types.NodeName for _, node := range oldNodeLabels { oldNodeNames = append(oldNodeNames, types.NodeName(node.ID)) } for _, node := range newNodeLabels { newNodeNames = append(newNodeNames, types.NodeName(node.ID)) } toRemove := types.NewNodeSet(oldNodeNames...).Difference(types.NewNodeSet(newNodeNames...)).ListNodes() toAdd := types.NewNodeSet(newNodeNames...).Difference(types.NewNodeSet(oldNodeNames...)).ListNodes() return toRemove, toAdd }
func (rc *replicationController) addPods(current types.PodLocations) error { currentNodes := current.Nodes() eligible, err := rc.eligibleNodes() if err != nil { return err } // TODO: With Docker or runc we would not be constrained to running only once per node. // So it may be the case that we need to make the Scheduler interface smarter and use it here. possible := types.NewNodeSet(eligible...).Difference(types.NewNodeSet(currentNodes...)) // Users want deterministic ordering of nodes being populated to a new // RC. Move nodes in sorted order by hostname to achieve this possibleSorted := possible.ListNodes() toSchedule := rc.ReplicasDesired - len(currentNodes) rc.logger.NoFields().Infof("Need to schedule %d nodes out of %s", toSchedule, possible) for i := 0; i < toSchedule; i++ { if len(possibleSorted) < i+1 { errMsg := fmt.Sprintf( "Not enough nodes to meet desire: %d replicas desired, %d currentNodes, %d eligible. Scheduled on %d nodes instead.", rc.ReplicasDesired, len(currentNodes), len(eligible), i, ) err := rc.alerter.Alert(rc.alertInfo(errMsg)) if err != nil { rc.logger.WithError(err).Errorln("Unable to send alert") } return util.Errorf(errMsg) } scheduleOn := possibleSorted[i] err := rc.schedule(scheduleOn) if err != nil { return err } } return nil }
// clearPods unschedules pods for all the nodes that have been scheduled by // this daemon set by using CurrentPods() // This should only be used when a daemon set is deleted func (ds *daemonSet) clearPods() error { podLocations, err := ds.CurrentPods() if err != nil { return util.Errorf("Error retrieving pod locations from daemon set: %v", err) } currentNodes := podLocations.Nodes() // Get the difference in nodes that we need to unschedule on and then sort them // for deterministic ordering toUnscheduleSorted := types.NewNodeSet(currentNodes...).ListNodes() ds.logger.NoFields().Infof("Need to unschedule %d nodes", len(toUnscheduleSorted)) ds.cancelReplication() for _, node := range toUnscheduleSorted { err := ds.unschedule(node) if err != nil { return util.Errorf("Error unscheduling node: %v", err) } } return nil }
// Naive implementation of a guard, this checks if any of the scheduled nodes // are used by two daemon sets, does not pre-emptively catch overlaps by selector // because of how kubernetes selectors work // // Also naively checks the selectors to see if there are any selector overlap // if two label selectors are labels.Everything() // // Returns [ daemon set contended, contention exists, error ] func (dsf *Farm) dsContends(dsFields *ds_fields.DaemonSet) (ds_fields.ID, bool, error) { // This daemon set does not contend if it is disabled if dsFields.Disabled { return "", false, nil } // Get all eligible nodes for this daemon set by looking at the labes.NODE tree eligibleNodes, err := dsf.scheduler.EligibleNodes(dsFields.Manifest, dsFields.NodeSelector) if err != nil { return "", false, util.Errorf("Error retrieving eligible nodes for daemon set: %v", err) } // If this daemon set has a node selector set to Everything, check the labels // of other daemon sets for _, child := range dsf.children { everythingSelector := klabels.Everything().String() if !child.ds.IsDisabled() && child.ds.PodID() == dsFields.PodID && child.ds.ID() != dsFields.ID { // Naively check if both selectors are the Everything selector plus // something else or if both selectors are the same // // This will still think that the following two selectors contend: // { az = zone_one, az != zone_one } and {} (the everything selector) // even though the first selector doesn't select anything // // If either the child or the current daemon set has the everything selector // then they contend if dsFields.NodeSelector.String() == everythingSelector || child.ds.GetNodeSelector().String() == everythingSelector { dsf.raiseContentionAlert(child.ds, *dsFields) return child.ds.ID(), true, nil } // If both daemon sets have the same selector, then they contend // // This will still think that the following two selectors contend: // { az = zone_one, az != zone_one } and { az = zone_one, az != zone_one } // even though they don't select anything if dsFields.NodeSelector.String() == child.ds.GetNodeSelector().String() { dsf.raiseContentionAlert(child.ds, *dsFields) return child.ds.ID(), true, nil } // Check the child's eligibleNodes, then intersect it to see if there // are any overlaps // // NOTE: This is naive, it does not account for new nodes, so any alerts // we get will be caused by adding new nodes by both human, machine error, // are starting up a daemon set farm where contention already exists scheduledNodes, err := child.ds.EligibleNodes() if err != nil { return "", false, util.Errorf("Error getting scheduled nodes: %v", err) } intersectedNodes := types.NewNodeSet(eligibleNodes...).Intersection(types.NewNodeSet(scheduledNodes...)) if intersectedNodes.Len() > 0 { dsf.raiseContentionAlert(child.ds, *dsFields) return child.ds.ID(), true, nil } } } return "", false, nil }