// Deregister is used to remove a client from the client. If a client should // just be made unavailable for scheduling, a status update is prefered. func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error { if done, err := n.srv.forward("Node.Deregister", args, args, reply); done { return err } defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now()) // Verify the arguments if args.NodeID == "" { return fmt.Errorf("missing node ID for client deregistration") } // Commit this update via Raft _, index, err := n.srv.raftApply(structs.NodeDeregisterRequestType, args) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: Deregister failed: %v", err) return err } // Clear the heartbeat timer if any n.srv.clearHeartbeatTimer(args.NodeID) // Create the evaluations for this node evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) return err } // Setup the reply reply.EvalIDs = evalIDs reply.EvalCreateIndex = evalIndex reply.NodeModifyIndex = index reply.Index = index return nil }
// updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading. func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { reply.LeaderRPCAddr = n.srv.raft.Leader() // Reply with config information required for future RPC requests reply.Servers = make([]*structs.NodeServerInfo, 0, len(n.srv.localPeers)) for k, v := range n.srv.localPeers { reply.Servers = append(reply.Servers, &structs.NodeServerInfo{ RPCAdvertiseAddr: k, RPCMajorVersion: int32(v.MajorVersion), RPCMinorVersion: int32(v.MinorVersion), Datacenter: v.Datacenter, }) } // TODO(sean@): Use an indexed node count instead // // Snapshot is used only to iterate over all nodes to create a node // count to send back to Nomad Clients in their heartbeat so Clients // can estimate the size of the cluster. iter, err := snap.Nodes() if err == nil { for { raw := iter.Next() if raw == nil { break } reply.NumNodes++ } } return nil }
// Deregister is used to remove a client from the cluster. If a client should // just be made unavailable for scheduling, a status update is preferred. func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error { if done, err := n.srv.forward("Node.Deregister", args, args, reply); done { return err } defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now()) // Verify the arguments if args.NodeID == "" { return fmt.Errorf("missing node ID for client deregistration") } // Commit this update via Raft _, index, err := n.srv.raftApply(structs.NodeDeregisterRequestType, args) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: Deregister failed: %v", err) return err } // Clear the heartbeat timer if any n.srv.clearHeartbeatTimer(args.NodeID) // Create the evaluations for this node evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) return err } // Determine if there are any Vault accessors on the node accessors, err := n.srv.State().VaultAccessorsByNode(args.NodeID) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err) return err } if len(accessors) != 0 { if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil { n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err) return err } } // Setup the reply reply.EvalIDs = evalIDs reply.EvalCreateIndex = evalIndex reply.NodeModifyIndex = index reply.Index = index return nil }
// Evaluate is used to force a re-evaluation of the node func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error { if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done { return err } defer metrics.MeasureSince([]string{"nomad", "client", "evaluate"}, time.Now()) // Verify the arguments if args.NodeID == "" { return fmt.Errorf("missing node ID for evaluation") } // Look for the node snap, err := n.srv.fsm.State().Snapshot() if err != nil { return err } node, err := snap.NodeByID(args.NodeID) if err != nil { return err } if node == nil { return fmt.Errorf("node not found") } // Create the evaluation evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, node.ModifyIndex) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) return err } reply.EvalIDs = evalIDs reply.EvalCreateIndex = evalIndex // Set the reply index reply.Index = evalIndex n.srv.peerLock.RLock() defer n.srv.peerLock.RUnlock() if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err) return err } return nil }
// Register is used to upsert a client that is available for scheduling func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error { if done, err := n.srv.forward("Node.Register", args, args, reply); done { return err } defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now()) // Validate the arguments if args.Node == nil { return fmt.Errorf("missing node for client registration") } if args.Node.ID == "" { return fmt.Errorf("missing node ID for client registration") } if args.Node.Datacenter == "" { return fmt.Errorf("missing datacenter for client registration") } if args.Node.Name == "" { return fmt.Errorf("missing node name for client registration") } // Default the status if none is given if args.Node.Status == "" { args.Node.Status = structs.NodeStatusInit } if !structs.ValidNodeStatus(args.Node.Status) { return fmt.Errorf("invalid status for node") } // Compute the node class if err := args.Node.ComputeClass(); err != nil { return fmt.Errorf("failed to computed node class: %v", err) } // Commit this update via Raft _, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: Register failed: %v", err) return err } reply.NodeModifyIndex = index // Check if we should trigger evaluations if structs.ShouldDrainNode(args.Node.Status) { evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) return err } reply.EvalIDs = evalIDs reply.EvalCreateIndex = evalIndex } // Check if we need to setup a heartbeat if !args.Node.TerminalStatus() { ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err) return err } reply.HeartbeatTTL = ttl } // Set the reply index reply.Index = index return nil }
// UpdateStatus is used to update the status of a client node func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error { if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done { return err } defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now()) // Verify the arguments if args.NodeID == "" { return fmt.Errorf("missing node ID for client deregistration") } if !structs.ValidNodeStatus(args.Status) { return fmt.Errorf("invalid status for node") } // Look for the node snap, err := n.srv.fsm.State().Snapshot() if err != nil { return err } node, err := snap.NodeByID(args.NodeID) if err != nil { return err } if node == nil { return fmt.Errorf("node not found") } // Commit this update via Raft var index uint64 if node.Status != args.Status { _, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: status update failed: %v", err) return err } reply.NodeModifyIndex = index } // Check if we should trigger evaluations initToReady := node.Status == structs.NodeStatusInit && args.Status == structs.NodeStatusReady terminalToReady := node.Status == structs.NodeStatusDown && args.Status == structs.NodeStatusReady transitionToReady := initToReady || terminalToReady if structs.ShouldDrainNode(args.Status) || transitionToReady { evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) return err } reply.EvalIDs = evalIDs reply.EvalCreateIndex = evalIndex } // Check if we need to setup a heartbeat if args.Status != structs.NodeStatusDown { ttl, err := n.srv.resetHeartbeatTimer(args.NodeID) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err) return err } reply.HeartbeatTTL = ttl } // Set the reply index reply.Index = index return nil }
// Register is used to upsert a client that is available for scheduling func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error { if done, err := n.srv.forward("Node.Register", args, args, reply); done { return err } defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now()) // Validate the arguments if args.Node == nil { return fmt.Errorf("missing node for client registration") } if args.Node.ID == "" { return fmt.Errorf("missing node ID for client registration") } if args.Node.Datacenter == "" { return fmt.Errorf("missing datacenter for client registration") } if args.Node.Name == "" { return fmt.Errorf("missing node name for client registration") } // Default the status if none is given if args.Node.Status == "" { args.Node.Status = structs.NodeStatusInit } if !structs.ValidNodeStatus(args.Node.Status) { return fmt.Errorf("invalid status for node") } // Set the timestamp when the node is registered args.Node.StatusUpdatedAt = time.Now().Unix() // Compute the node class if err := args.Node.ComputeClass(); err != nil { return fmt.Errorf("failed to computed node class: %v", err) } // Look for the node so we can detect a state transistion snap, err := n.srv.fsm.State().Snapshot() if err != nil { return err } originalNode, err := snap.NodeByID(args.Node.ID) if err != nil { return err } // Commit this update via Raft _, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: Register failed: %v", err) return err } reply.NodeModifyIndex = index // Check if we should trigger evaluations originalStatus := structs.NodeStatusInit if originalNode != nil { originalStatus = originalNode.Status } transitionToReady := transitionedToReady(args.Node.Status, originalStatus) if structs.ShouldDrainNode(args.Node.Status) || transitionToReady { evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) return err } reply.EvalIDs = evalIDs reply.EvalCreateIndex = evalIndex } // Check if we need to setup a heartbeat if !args.Node.TerminalStatus() { ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err) return err } reply.HeartbeatTTL = ttl } // Set the reply index reply.Index = index snap, err = n.srv.fsm.State().Snapshot() if err != nil { return err } n.srv.peerLock.RLock() defer n.srv.peerLock.RUnlock() if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err) return err } return nil }
// UpdateStatus is used to update the status of a client node func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error { if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done { return err } defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now()) // Verify the arguments if args.NodeID == "" { return fmt.Errorf("missing node ID for client status update") } if !structs.ValidNodeStatus(args.Status) { return fmt.Errorf("invalid status for node") } // Look for the node snap, err := n.srv.fsm.State().Snapshot() if err != nil { return err } node, err := snap.NodeByID(args.NodeID) if err != nil { return err } if node == nil { return fmt.Errorf("node not found") } // XXX: Could use the SecretID here but have to update the heartbeat system // to track SecretIDs. // Update the timestamp of when the node status was updated node.StatusUpdatedAt = time.Now().Unix() // Commit this update via Raft var index uint64 if node.Status != args.Status { _, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: status update failed: %v", err) return err } reply.NodeModifyIndex = index } // Check if we should trigger evaluations transitionToReady := transitionedToReady(args.Status, node.Status) if structs.ShouldDrainNode(args.Status) || transitionToReady { evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) return err } reply.EvalIDs = evalIDs reply.EvalCreateIndex = evalIndex } // Check if we need to setup a heartbeat switch args.Status { case structs.NodeStatusDown: // Determine if there are any Vault accessors on the node accessors, err := n.srv.State().VaultAccessorsByNode(args.NodeID) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err) return err } if len(accessors) != 0 { if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil { n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err) return err } } default: ttl, err := n.srv.resetHeartbeatTimer(args.NodeID) if err != nil { n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err) return err } reply.HeartbeatTTL = ttl } // Set the reply index and leader reply.Index = index n.srv.peerLock.RLock() defer n.srv.peerLock.RUnlock() if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err) return err } return nil }