// applyPlan is used to apply the plan result and to return the alloc index func (s *Server) applyPlan(result *structs.PlanResult, snap *state.StateSnapshot) (raft.ApplyFuture, error) { req := structs.AllocUpdateRequest{} for _, updateList := range result.NodeUpdate { req.Alloc = append(req.Alloc, updateList...) } for _, allocList := range result.NodeAllocation { req.Alloc = append(req.Alloc, allocList...) } req.Alloc = append(req.Alloc, result.FailedAllocs...) // Set the time the alloc was applied for the first time. This can be used // to approximate the scheduling time. now := time.Now().UTC().UnixNano() for _, alloc := range req.Alloc { if alloc.CreateTime == 0 { alloc.CreateTime = now } } // Dispatch the Raft transaction future, err := s.raftApplyFuture(structs.AllocUpdateRequestType, &req) if err != nil { return nil, err } // Optimistically apply to our state view if snap != nil { nextIdx := s.raft.AppliedIndex() + 1 if err := snap.UpsertAllocs(nextIdx, req.Alloc); err != nil { return future, err } } return future, nil }
// updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading. func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { reply.LeaderRPCAddr = n.srv.raft.Leader() // Reply with config information required for future RPC requests reply.Servers = make([]*structs.NodeServerInfo, 0, len(n.srv.localPeers)) for k, v := range n.srv.localPeers { reply.Servers = append(reply.Servers, &structs.NodeServerInfo{ RPCAdvertiseAddr: k, RPCMajorVersion: int32(v.MajorVersion), RPCMinorVersion: int32(v.MinorVersion), Datacenter: v.Datacenter, }) } // TODO(sean@): Use an indexed node count instead // // Snapshot is used only to iterate over all nodes to create a node // count to send back to Nomad Clients in their heartbeat so Clients // can estimate the size of the cluster. iter, err := snap.Nodes() if err == nil { for { raw := iter.Next() if raw == nil { break } reply.NumNodes++ } } return nil }
// applyPlan is used to apply the plan result and to return the alloc index func (s *Server) applyPlan(result *structs.PlanResult, snap *state.StateSnapshot) (raft.ApplyFuture, error) { req := structs.AllocUpdateRequest{} for _, updateList := range result.NodeUpdate { req.Alloc = append(req.Alloc, updateList...) } for _, allocList := range result.NodeAllocation { req.Alloc = append(req.Alloc, allocList...) } req.Alloc = append(req.Alloc, result.FailedAllocs...) // Dispatch the Raft transaction future, err := s.raftApplyFuture(structs.AllocUpdateRequestType, &req) if err != nil { return nil, err } // Optimistically apply to our state view if snap != nil { nextIdx := s.raft.AppliedIndex() + 1 if err := snap.UpsertAllocs(nextIdx, req.Alloc); err != nil { return future, err } } return future, nil }
// evaluateNodePlan is used to evalute the plan for a single node, // returning if the plan is valid or if an error is encountered func evaluateNodePlan(snap *state.StateSnapshot, plan *structs.Plan, nodeID string) (bool, error) { // If this is an evict-only plan, it always 'fits' since we are removing things. if len(plan.NodeAllocation[nodeID]) == 0 { return true, nil } // Get the node itself node, err := snap.NodeByID(nodeID) if err != nil { return false, fmt.Errorf("failed to get node '%s': %v", nodeID, err) } // If the node does not exist or is not ready for schduling it is not fit // XXX: There is a potential race between when we do this check and when // the Raft commit happens. if node == nil || node.Status != structs.NodeStatusReady || node.Drain { return false, nil } // Get the existing allocations existingAlloc, err := snap.AllocsByNode(nodeID) if err != nil { return false, fmt.Errorf("failed to get existing allocations for '%s': %v", nodeID, err) } // Filter on alloc state existingAlloc = structs.FilterTerminalAllocs(existingAlloc) // Determine the proposed allocation by first removing allocations // that are planned evictions and adding the new allocations. proposed := existingAlloc var remove []*structs.Allocation if update := plan.NodeUpdate[nodeID]; len(update) > 0 { remove = append(remove, update...) } if updated := plan.NodeAllocation[nodeID]; len(updated) > 0 { for _, alloc := range updated { remove = append(remove, alloc) } } proposed = structs.RemoveAllocs(existingAlloc, remove) proposed = append(proposed, plan.NodeAllocation[nodeID]...) // Check if these allocations fit fit, _, _, err := structs.AllocsFit(node, proposed, nil) return fit, err }
// applyPlan is used to apply the plan result and to return the alloc index func (s *Server) applyPlan(job *structs.Job, result *structs.PlanResult, snap *state.StateSnapshot) (raft.ApplyFuture, error) { // Determine the miniumum number of updates, could be more if there // are multiple updates per node minUpdates := len(result.NodeUpdate) minUpdates += len(result.NodeAllocation) minUpdates += len(result.FailedAllocs) // Setup the update request req := structs.AllocUpdateRequest{ Job: job, Alloc: make([]*structs.Allocation, 0, minUpdates), } for _, updateList := range result.NodeUpdate { req.Alloc = append(req.Alloc, updateList...) } for _, allocList := range result.NodeAllocation { req.Alloc = append(req.Alloc, allocList...) } req.Alloc = append(req.Alloc, result.FailedAllocs...) // Set the time the alloc was applied for the first time. This can be used // to approximate the scheduling time. now := time.Now().UTC().UnixNano() for _, alloc := range req.Alloc { if alloc.CreateTime == 0 { alloc.CreateTime = now } } // Dispatch the Raft transaction future, err := s.raftApplyFuture(structs.AllocUpdateRequestType, &req) if err != nil { return nil, err } // Optimistically apply to our state view if snap != nil { nextIdx := s.raft.AppliedIndex() + 1 if err := snap.UpsertAllocs(nextIdx, req.Alloc); err != nil { return future, err } } return future, nil }
// evaluatePlan is used to determine what portions of a plan // can be applied if any. Returns if there should be a plan application // which may be partial or if there was an error func evaluatePlan(snap *state.StateSnapshot, plan *structs.Plan) (*structs.PlanResult, error) { defer metrics.MeasureSince([]string{"nomad", "plan", "evaluate"}, time.Now()) // Create a result holder for the plan result := &structs.PlanResult{ NodeUpdate: make(map[string][]*structs.Allocation), NodeAllocation: make(map[string][]*structs.Allocation), FailedAllocs: plan.FailedAllocs, } // Collect all the nodeIDs nodeIDs := make(map[string]struct{}) for nodeID := range plan.NodeUpdate { nodeIDs[nodeID] = struct{}{} } for nodeID := range plan.NodeAllocation { nodeIDs[nodeID] = struct{}{} } // Check each allocation to see if it should be allowed for nodeID := range nodeIDs { // Evaluate the plan for this node fit, err := evaluateNodePlan(snap, plan, nodeID) if err != nil { return nil, err } if !fit { // Scheduler must have stale data, RefreshIndex should force // the latest view of allocations and nodes allocIndex, err := snap.Index("allocs") if err != nil { return nil, err } nodeIndex, err := snap.Index("nodes") if err != nil { return nil, err } result.RefreshIndex = maxUint64(nodeIndex, allocIndex) // If we require all-at-once scheduling, there is no point // to continue the evaluation, as we've already failed. if plan.AllAtOnce { result.NodeUpdate = nil result.NodeAllocation = nil return result, nil } // Skip this node, since it cannot be used. continue } // Add this to the plan result if nodeUpdate := plan.NodeUpdate[nodeID]; len(nodeUpdate) > 0 { result.NodeUpdate[nodeID] = nodeUpdate } if nodeAlloc := plan.NodeAllocation[nodeID]; len(nodeAlloc) > 0 { result.NodeAllocation[nodeID] = nodeAlloc } } return result, nil }
// evaluatePlan is used to determine what portions of a plan // can be applied if any. Returns if there should be a plan application // which may be partial or if there was an error func evaluatePlan(pool *EvaluatePool, snap *state.StateSnapshot, plan *structs.Plan) (*structs.PlanResult, error) { defer metrics.MeasureSince([]string{"nomad", "plan", "evaluate"}, time.Now()) // Create a result holder for the plan result := &structs.PlanResult{ NodeUpdate: make(map[string][]*structs.Allocation), NodeAllocation: make(map[string][]*structs.Allocation), } // Collect all the nodeIDs nodeIDs := make(map[string]struct{}) nodeIDList := make([]string, 0, len(plan.NodeUpdate)+len(plan.NodeAllocation)) for nodeID := range plan.NodeUpdate { if _, ok := nodeIDs[nodeID]; !ok { nodeIDs[nodeID] = struct{}{} nodeIDList = append(nodeIDList, nodeID) } } for nodeID := range plan.NodeAllocation { if _, ok := nodeIDs[nodeID]; !ok { nodeIDs[nodeID] = struct{}{} nodeIDList = append(nodeIDList, nodeID) } } // Setup a multierror to handle potentially getting many // errors since we are processing in parallel. var mErr multierror.Error partialCommit := false // handleResult is used to process the result of evaluateNodePlan handleResult := func(nodeID string, fit bool, err error) (cancel bool) { // Evaluate the plan for this node if err != nil { mErr.Errors = append(mErr.Errors, err) return true } if !fit { // Set that this is a partial commit partialCommit = true // If we require all-at-once scheduling, there is no point // to continue the evaluation, as we've already failed. if plan.AllAtOnce { result.NodeUpdate = nil result.NodeAllocation = nil return true } // Skip this node, since it cannot be used. return } // Add this to the plan result if nodeUpdate := plan.NodeUpdate[nodeID]; len(nodeUpdate) > 0 { result.NodeUpdate[nodeID] = nodeUpdate } if nodeAlloc := plan.NodeAllocation[nodeID]; len(nodeAlloc) > 0 { result.NodeAllocation[nodeID] = nodeAlloc } return } // Get the pool channels req := pool.RequestCh() resp := pool.ResultCh() outstanding := 0 didCancel := false // Evalute each node in the plan, handling results as they are ready to // avoid blocking. for len(nodeIDList) > 0 { nodeID := nodeIDList[0] select { case req <- evaluateRequest{snap, plan, nodeID}: outstanding++ nodeIDList = nodeIDList[1:] case r := <-resp: outstanding-- // Handle a result that allows us to cancel evaluation, // which may save time processing additional entries. if cancel := handleResult(r.nodeID, r.fit, r.err); cancel { didCancel = true break } } } // Drain the remaining results for outstanding > 0 { r := <-resp if !didCancel { if cancel := handleResult(r.nodeID, r.fit, r.err); cancel { didCancel = true } } outstanding-- } // If the plan resulted in a partial commit, we need to determine // a minimum refresh index to force the scheduler to work on a more // up-to-date state to avoid the failures. if partialCommit { allocIndex, err := snap.Index("allocs") if err != nil { mErr.Errors = append(mErr.Errors, err) } nodeIndex, err := snap.Index("nodes") if err != nil { mErr.Errors = append(mErr.Errors, err) } result.RefreshIndex = maxUint64(nodeIndex, allocIndex) if result.RefreshIndex == 0 { err := fmt.Errorf("partialCommit with RefreshIndex of 0 (%d node, %d alloc)", nodeIndex, allocIndex) mErr.Errors = append(mErr.Errors, err) } } return result, mErr.ErrorOrNil() }