func (ce *ConstraintEnforcer) shutdownNoncompliantTasks(node *api.Node) { // If the availability is "drain", the orchestrator will // shut down all tasks. // If the availability is "pause", we shouldn't touch // the tasks on this node. if node.Spec.Availability != api.NodeAvailabilityActive { return } var ( tasks []*api.Task err error ) ce.store.View(func(tx store.ReadTx) { tasks, err = store.FindTasks(tx, store.ByNodeID(node.ID)) }) if err != nil { log.L.WithError(err).Errorf("failed to list tasks for node ID %s", node.ID) } var availableMemoryBytes, availableNanoCPUs int64 if node.Description != nil && node.Description.Resources != nil { availableMemoryBytes = node.Description.Resources.MemoryBytes availableNanoCPUs = node.Description.Resources.NanoCPUs } removeTasks := make(map[string]*api.Task) // TODO(aaronl): The set of tasks removed will be // nondeterministic because it depends on the order of // the slice returned from FindTasks. We could do // a separate pass over the tasks for each type of // resource, and sort by the size of the reservation // to remove the most resource-intensive tasks. for _, t := range tasks { if t.DesiredState < api.TaskStateAssigned || t.DesiredState > api.TaskStateRunning { continue } // Ensure that the task still meets scheduling // constraints. if t.Spec.Placement != nil && len(t.Spec.Placement.Constraints) != 0 { constraints, _ := constraint.Parse(t.Spec.Placement.Constraints) if !constraint.NodeMatches(constraints, node) { removeTasks[t.ID] = t continue } } // Ensure that the task assigned to the node // still satisfies the resource limits. if t.Spec.Resources != nil && t.Spec.Resources.Reservations != nil { if t.Spec.Resources.Reservations.MemoryBytes > availableMemoryBytes { removeTasks[t.ID] = t continue } if t.Spec.Resources.Reservations.NanoCPUs > availableNanoCPUs { removeTasks[t.ID] = t continue } availableMemoryBytes -= t.Spec.Resources.Reservations.MemoryBytes availableNanoCPUs -= t.Spec.Resources.Reservations.NanoCPUs } } if len(removeTasks) != 0 { _, err := ce.store.Batch(func(batch *store.Batch) error { for _, t := range removeTasks { err := batch.Update(func(tx store.Tx) error { t = store.GetTask(tx, t.ID) if t == nil || t.DesiredState > api.TaskStateRunning { return nil } t.DesiredState = api.TaskStateShutdown return store.UpdateTask(tx, t) }) if err != nil { log.L.WithError(err).Errorf("failed to shut down task %s", t.ID) } } return nil }) if err != nil { log.L.WithError(err).Errorf("failed to shut down tasks") } } }
func (g *Orchestrator) reconcileServices(ctx context.Context, serviceIDs []string) { nodeCompleted := make(map[string]map[string]struct{}) nodeTasks := make(map[string]map[string][]*api.Task) g.store.View(func(tx store.ReadTx) { for _, serviceID := range serviceIDs { tasks, err := store.FindTasks(tx, store.ByServiceID(serviceID)) if err != nil { log.G(ctx).WithError(err).Errorf("global orchestrator: reconcileServices failed finding tasks for service %s", serviceID) continue } // a node may have completed this service nodeCompleted[serviceID] = make(map[string]struct{}) // nodeID -> task list nodeTasks[serviceID] = make(map[string][]*api.Task) for _, t := range tasks { if isTaskRunning(t) { // Collect all running instances of this service nodeTasks[serviceID][t.NodeID] = append(nodeTasks[serviceID][t.NodeID], t) } else { // for finished tasks, check restartPolicy if isTaskCompleted(t, orchestrator.RestartCondition(t)) { nodeCompleted[serviceID][t.NodeID] = struct{}{} } } } } }) _, err := g.store.Batch(func(batch *store.Batch) error { var updateTasks []orchestrator.Slot for _, serviceID := range serviceIDs { if _, exists := nodeTasks[serviceID]; !exists { continue } service := g.globalServices[serviceID] for nodeID, node := range g.nodes { meetsConstraints := constraint.NodeMatches(service.constraints, node) ntasks := nodeTasks[serviceID][nodeID] delete(nodeTasks[serviceID], nodeID) // if restart policy considers this node has finished its task // it should remove all running tasks if _, exists := nodeCompleted[serviceID][nodeID]; exists || !meetsConstraints { g.removeTasks(ctx, batch, ntasks) continue } if node.Spec.Availability == api.NodeAvailabilityPause { // the node is paused, so we won't add or update // any tasks continue } // this node needs to run 1 copy of the task if len(ntasks) == 0 { g.addTask(ctx, batch, service.Service, nodeID) } else { updateTasks = append(updateTasks, ntasks) } } if len(updateTasks) > 0 { g.updater.Update(ctx, g.cluster, service.Service, updateTasks) } // Remove any tasks assigned to nodes not found in g.nodes. // These must be associated with nodes that are drained, or // nodes that no longer exist. for _, ntasks := range nodeTasks[serviceID] { g.removeTasks(ctx, batch, ntasks) } } return nil }) if err != nil { log.G(ctx).WithError(err).Errorf("global orchestrator: reconcileServices transaction failed") } }
// reconcileServicesOneNode checks the specified services on one node func (g *Orchestrator) reconcileServicesOneNode(ctx context.Context, serviceIDs []string, nodeID string) { node, exists := g.nodes[nodeID] if !exists { return } // whether each service has completed on the node completed := make(map[string]bool) // tasks by service tasks := make(map[string][]*api.Task) var ( tasksOnNode []*api.Task err error ) g.store.View(func(tx store.ReadTx) { tasksOnNode, err = store.FindTasks(tx, store.ByNodeID(nodeID)) }) if err != nil { log.G(ctx).WithError(err).Errorf("global orchestrator: reconcile failed finding tasks on node %s", nodeID) return } for _, serviceID := range serviceIDs { for _, t := range tasksOnNode { if t.ServiceID != serviceID { continue } if isTaskRunning(t) { tasks[serviceID] = append(tasks[serviceID], t) } else { if isTaskCompleted(t, orchestrator.RestartCondition(t)) { completed[serviceID] = true } } } } _, err = g.store.Batch(func(batch *store.Batch) error { for _, serviceID := range serviceIDs { service, exists := g.globalServices[serviceID] if !exists { continue } if !constraint.NodeMatches(service.constraints, node) { continue } // if restart policy considers this node has finished its task // it should remove all running tasks if completed[serviceID] { g.removeTasks(ctx, batch, tasks[serviceID]) continue } if node.Spec.Availability == api.NodeAvailabilityPause { // the node is paused, so we won't add or update tasks continue } if len(tasks) == 0 { g.addTask(ctx, batch, service.Service, nodeID) } else { // If task is out of date, update it. This can happen // on node reconciliation if, for example, we pause a // node, update the service, and then activate the node // later. // We don't use g.updater here for two reasons: // - This is not a rolling update. Since it was not // triggered directly by updating the service, it // should not observe the rolling update parameters // or show status in UpdateStatus. // - Calling Update cancels any current rolling updates // for the service, such as one triggered by service // reconciliation. var ( dirtyTasks []*api.Task cleanTasks []*api.Task ) for _, t := range tasks[serviceID] { if orchestrator.IsTaskDirty(service.Service, t) { dirtyTasks = append(dirtyTasks, t) } else { cleanTasks = append(cleanTasks, t) } } if len(cleanTasks) == 0 { g.addTask(ctx, batch, service.Service, nodeID) } else { dirtyTasks = append(dirtyTasks, cleanTasks[1:]...) } g.removeTasks(ctx, batch, dirtyTasks) } } return nil }) if err != nil { log.G(ctx).WithError(err).Errorf("global orchestrator: reconcileServiceOneNode batch failed") } }
// Check returns true if the task's constraint is supported by the given node. func (f *ConstraintFilter) Check(n *NodeInfo) bool { return constraint.NodeMatches(f.constraints, n.Node) }