// RemoveNode removes a Node referenced by NodeID with the given NodeSpec. // - Returns NotFound if the Node is not found. // - Returns FailedPrecondition if the Node has manager role (and is part of the memberlist) or is not shut down. // - Returns InvalidArgument if NodeID or NodeVersion is not valid. // - Returns an error if the delete fails. func (s *Server) RemoveNode(ctx context.Context, request *api.RemoveNodeRequest) (*api.RemoveNodeResponse, error) { if request.NodeID == "" { return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error()) } err := s.store.Update(func(tx store.Tx) error { node := store.GetNode(tx, request.NodeID) if node == nil { return grpc.Errorf(codes.NotFound, "node %s not found", request.NodeID) } if node.Spec.Role == api.NodeRoleManager { if s.raft == nil { return grpc.Errorf(codes.FailedPrecondition, "node %s is a manager but cannot access node information from the raft memberlist", request.NodeID) } if member := s.raft.GetMemberByNodeID(request.NodeID); member != nil { return grpc.Errorf(codes.FailedPrecondition, "node %s is a cluster manager and is a member of the raft cluster. It must be demoted to worker before removal", request.NodeID) } } if !request.Force && node.Status.State == api.NodeStatus_READY { return grpc.Errorf(codes.FailedPrecondition, "node %s is not down and can't be removed", request.NodeID) } return store.DeleteNode(tx, request.NodeID) }) if err != nil { return nil, err } return &api.RemoveNodeResponse{}, nil }
func TestRenewTLSConfigWithNoNode(t *testing.T) { t.Parallel() tc := testutils.NewTestCA(t) defer tc.Stop() ctx, cancel := context.WithCancel(context.Background()) defer cancel() // Get a new nodeConfig with a TLS cert that has the default Cert duration nodeConfig, err := tc.WriteNewNodeConfig(ca.ManagerRole) assert.NoError(t, err) // Create a new RootCA, and change the policy to issue 6 minute certificates. // Because of the default backdate of 5 minutes, this issues certificates // valid for 1 minute. newRootCA, err := ca.NewRootCA(tc.RootCA.Cert, tc.RootCA.Key, ca.DefaultNodeCertExpiration) assert.NoError(t, err) newRootCA.Signer.SetPolicy(&cfconfig.Signing{ Default: &cfconfig.SigningProfile{ Usage: []string{"signing", "key encipherment", "server auth", "client auth"}, Expiry: 6 * time.Minute, }, }) // Create a new CSR and overwrite the key on disk csr, key, err := ca.GenerateNewCSR() assert.NoError(t, err) // Issue a new certificate with the same details as the current config, but with 1 min expiration time c := nodeConfig.ClientTLSCreds signedCert, err := newRootCA.ParseValidateAndSignCSR(csr, c.NodeID(), c.Role(), c.Organization()) assert.NoError(t, err) assert.NotNil(t, signedCert) // Overwrite the certificate on disk with one that expires in 1 minute err = ioutils.AtomicWriteFile(tc.Paths.Node.Cert, signedCert, 0644) assert.NoError(t, err) err = ioutils.AtomicWriteFile(tc.Paths.Node.Key, key, 0600) assert.NoError(t, err) // Delete the node from the backend store err = tc.MemoryStore.Update(func(tx store.Tx) error { node := store.GetNode(tx, nodeConfig.ClientTLSCreds.NodeID()) assert.NotNil(t, node) return store.DeleteNode(tx, nodeConfig.ClientTLSCreds.NodeID()) }) assert.NoError(t, err) renew := make(chan struct{}) updates := ca.RenewTLSConfig(ctx, nodeConfig, tc.Remotes, renew) select { case <-time.After(10 * time.Second): assert.Fail(t, "TestRenewTLSConfig timed-out") case certUpdate := <-updates: assert.Error(t, certUpdate.Err) assert.Contains(t, certUpdate.Err.Error(), "not found when attempting to renew certificate") } }
// RemoveNode updates a Node referenced by NodeID with the given NodeSpec. // - Returns NotFound if the Node is not found. // - Returns FailedPrecondition if the Node has manager role or not shut down. // - Returns InvalidArgument if NodeID or NodeVersion is not valid. // - Returns an error if the delete fails. func (s *Server) RemoveNode(ctx context.Context, request *api.RemoveNodeRequest) (*api.RemoveNodeResponse, error) { if request.NodeID == "" { return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error()) } if s.raft != nil { memberlist := s.raft.GetMemberlist() for _, member := range memberlist { if member.NodeID == request.NodeID { return nil, grpc.Errorf(codes.FailedPrecondition, "node %s is a cluster manager and is part of the quorum. It must be demoted to worker before removal", request.NodeID) } } } err := s.store.Update(func(tx store.Tx) error { node := store.GetNode(tx, request.NodeID) if node == nil { return grpc.Errorf(codes.NotFound, "node %s not found", request.NodeID) } if node.Spec.Role == api.NodeRoleManager { return grpc.Errorf(codes.FailedPrecondition, "node %s role is set to manager. It should be demoted to worker for safe removal", request.NodeID) } if node.Status.State == api.NodeStatus_READY { return grpc.Errorf(codes.FailedPrecondition, "node %s is not down and can't be removed", request.NodeID) } return store.DeleteNode(tx, request.NodeID) }) if err != nil { return nil, err } return &api.RemoveNodeResponse{}, nil }
func TestDrain(t *testing.T) { ctx := context.Background() initialService := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Restart: &api.RestartPolicy{ Condition: api.RestartOnNone, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 6, }, }, }, } initialNodeSet := []*api.Node{ { ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name1", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_DOWN, }, }, // We should NOT kick out tasks on UNKNOWN nodes. { ID: "id3", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name3", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_UNKNOWN, }, }, { ID: "id4", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name4", }, Availability: api.NodeAvailabilityPause, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id5", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name5", }, Availability: api.NodeAvailabilityDrain, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } initialTaskSet := []*api.Task{ // Task not assigned to any node { ID: "id0", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 1, ServiceAnnotations: api.Annotations{ Name: "name0", }, ServiceID: "id1", }, // Tasks assigned to the nodes defined above { ID: "id1", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 2, ServiceAnnotations: api.Annotations{ Name: "name1", }, ServiceID: "id1", NodeID: "id1", }, { ID: "id2", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 3, ServiceAnnotations: api.Annotations{ Name: "name2", }, ServiceID: "id1", NodeID: "id2", }, { ID: "id3", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 4, ServiceAnnotations: api.Annotations{ Name: "name3", }, ServiceID: "id1", NodeID: "id3", }, { ID: "id4", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 5, ServiceAnnotations: api.Annotations{ Name: "name4", }, ServiceID: "id1", NodeID: "id4", }, { ID: "id5", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 6, ServiceAnnotations: api.Annotations{ Name: "name5", }, ServiceID: "id1", NodeID: "id5", }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Prepopulate service assert.NoError(t, store.CreateService(tx, initialService)) // Prepoulate nodes for _, n := range initialNodeSet { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks for _, task := range initialTaskSet { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() go func() { assert.NoError(t, orchestrator.Run(ctx)) }() // id2 and id5 should be killed immediately deletion1 := watchShutdownTask(t, watch) deletion2 := watchShutdownTask(t, watch) assert.Regexp(t, "id(2|5)", deletion1.ID) assert.Regexp(t, "id(2|5)", deletion1.NodeID) assert.Regexp(t, "id(2|5)", deletion2.ID) assert.Regexp(t, "id(2|5)", deletion2.NodeID) // Create a new task, assigned to node id2 err = s.Update(func(tx store.Tx) error { task := initialTaskSet[2].Copy() task.ID = "newtask" task.NodeID = "id2" assert.NoError(t, store.CreateTask(tx, task)) return nil }) assert.NoError(t, err) deletion3 := watchShutdownTask(t, watch) assert.Equal(t, "newtask", deletion3.ID) assert.Equal(t, "id2", deletion3.NodeID) // Set node id4 to the DRAINED state err = s.Update(func(tx store.Tx) error { n := initialNodeSet[3].Copy() n.Spec.Availability = api.NodeAvailabilityDrain assert.NoError(t, store.UpdateNode(tx, n)) return nil }) assert.NoError(t, err) deletion4 := watchShutdownTask(t, watch) assert.Equal(t, "id4", deletion4.ID) assert.Equal(t, "id4", deletion4.NodeID) // Delete node id1 err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteNode(tx, "id1")) return nil }) assert.NoError(t, err) deletion5 := watchShutdownTask(t, watch) assert.Equal(t, "id1", deletion5.ID) assert.Equal(t, "id1", deletion5.NodeID) }
func TestScheduler(t *testing.T) { ctx := context.Background() initialNodeSet := []*api.Node{ { ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id3", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } initialTaskSet := []*api.Task{ { ID: "id1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStateAssigned, }, NodeID: initialNodeSet[0].ID, }, { ID: "id2", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, }, { ID: "id3", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Prepoulate nodes for _, n := range initialNodeSet { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks for _, task := range initialTaskSet { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() assignment1 := watchAssignment(t, watch) // must assign to id2 or id3 since id1 already has a task assert.Regexp(t, assignment1.NodeID, "(id2|id3)") assignment2 := watchAssignment(t, watch) // must assign to id2 or id3 since id1 already has a task if assignment1.NodeID == "id2" { assert.Equal(t, "id3", assignment2.NodeID) } else { assert.Equal(t, "id2", assignment2.NodeID) } err = s.Update(func(tx store.Tx) error { // Update each node to make sure this doesn't mess up the // scheduler's state. for _, n := range initialNodeSet { assert.NoError(t, store.UpdateNode(tx, n)) } return nil }) assert.NoError(t, err) err = s.Update(func(tx store.Tx) error { // Delete the task associated with node 1 so it's now the most lightly // loaded node. assert.NoError(t, store.DeleteTask(tx, "id1")) // Create a new task. It should get assigned to id1. t4 := &api.Task{ ID: "id4", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name4", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t4)) return nil }) assert.NoError(t, err) assignment3 := watchAssignment(t, watch) assert.Equal(t, "id1", assignment3.NodeID) // Update a task to make it unassigned. It should get assigned by the // scheduler. err = s.Update(func(tx store.Tx) error { // Remove assignment from task id4. It should get assigned // to node id1. t4 := &api.Task{ ID: "id4", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name4", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.UpdateTask(tx, t4)) return nil }) assert.NoError(t, err) assignment4 := watchAssignment(t, watch) assert.Equal(t, "id1", assignment4.NodeID) err = s.Update(func(tx store.Tx) error { // Create a ready node, then remove it. No tasks should ever // be assigned to it. node := &api.Node{ ID: "removednode", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "removednode", }, }, Status: api.NodeStatus{ State: api.NodeStatus_DOWN, }, } assert.NoError(t, store.CreateNode(tx, node)) assert.NoError(t, store.DeleteNode(tx, node.ID)) // Create an unassigned task. task := &api.Task{ ID: "removednode", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "removednode", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, task)) return nil }) assert.NoError(t, err) assignmentRemovedNode := watchAssignment(t, watch) assert.NotEqual(t, "removednode", assignmentRemovedNode.NodeID) err = s.Update(func(tx store.Tx) error { // Create a ready node. It should be used for the next // assignment. n4 := &api.Node{ ID: "id4", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name4", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, n4)) // Create an unassigned task. t5 := &api.Task{ ID: "id5", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name5", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t5)) return nil }) assert.NoError(t, err) assignment5 := watchAssignment(t, watch) assert.Equal(t, "id4", assignment5.NodeID) err = s.Update(func(tx store.Tx) error { // Create a non-ready node. It should NOT be used for the next // assignment. n5 := &api.Node{ ID: "id5", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name5", }, }, Status: api.NodeStatus{ State: api.NodeStatus_DOWN, }, } assert.NoError(t, store.CreateNode(tx, n5)) // Create an unassigned task. t6 := &api.Task{ ID: "id6", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name6", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t6)) return nil }) assert.NoError(t, err) assignment6 := watchAssignment(t, watch) assert.NotEqual(t, "id5", assignment6.NodeID) err = s.Update(func(tx store.Tx) error { // Update node id5 to put it in the READY state. n5 := &api.Node{ ID: "id5", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name5", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.UpdateNode(tx, n5)) // Create an unassigned task. Should be assigned to the // now-ready node. t7 := &api.Task{ ID: "id7", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name7", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t7)) return nil }) assert.NoError(t, err) assignment7 := watchAssignment(t, watch) assert.Equal(t, "id5", assignment7.NodeID) err = s.Update(func(tx store.Tx) error { // Create a ready node, then immediately take it down. The next // unassigned task should NOT be assigned to it. n6 := &api.Node{ ID: "id6", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name6", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, n6)) n6.Status.State = api.NodeStatus_DOWN assert.NoError(t, store.UpdateNode(tx, n6)) // Create an unassigned task. t8 := &api.Task{ ID: "id8", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name8", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t8)) return nil }) assert.NoError(t, err) assignment8 := watchAssignment(t, watch) assert.NotEqual(t, "id6", assignment8.NodeID) }
func deleteNode(t *testing.T, s *store.MemoryStore, node *api.Node) { s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteNode(tx, node.ID)) return nil }) }
// RemoveNode removes a Node referenced by NodeID with the given NodeSpec. // - Returns NotFound if the Node is not found. // - Returns FailedPrecondition if the Node has manager role (and is part of the memberlist) or is not shut down. // - Returns InvalidArgument if NodeID or NodeVersion is not valid. // - Returns an error if the delete fails. func (s *Server) RemoveNode(ctx context.Context, request *api.RemoveNodeRequest) (*api.RemoveNodeResponse, error) { if request.NodeID == "" { return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error()) } err := s.store.Update(func(tx store.Tx) error { node := store.GetNode(tx, request.NodeID) if node == nil { return grpc.Errorf(codes.NotFound, "node %s not found", request.NodeID) } if node.Spec.Role == api.NodeRoleManager { if s.raft == nil { return grpc.Errorf(codes.FailedPrecondition, "node %s is a manager but cannot access node information from the raft memberlist", request.NodeID) } if member := s.raft.GetMemberByNodeID(request.NodeID); member != nil { return grpc.Errorf(codes.FailedPrecondition, "node %s is a cluster manager and is a member of the raft cluster. It must be demoted to worker before removal", request.NodeID) } } if !request.Force && node.Status.State == api.NodeStatus_READY { return grpc.Errorf(codes.FailedPrecondition, "node %s is not down and can't be removed", request.NodeID) } // lookup the cluster clusters, err := store.FindClusters(tx, store.ByName("default")) if err != nil { return err } if len(clusters) != 1 { return grpc.Errorf(codes.Internal, "could not fetch cluster object") } cluster := clusters[0] removedNode := &api.RemovedNode{ID: node.ID} // Set an expiry time for this RemovedNode if a certificate // exists and can be parsed. if len(node.Certificate.Certificate) != 0 { certBlock, _ := pem.Decode(node.Certificate.Certificate) if certBlock != nil { X509Cert, err := x509.ParseCertificate(certBlock.Bytes) if err == nil && !X509Cert.NotAfter.IsZero() { expiry, err := ptypes.TimestampProto(X509Cert.NotAfter) if err == nil { removedNode.Expiry = expiry } } } } cluster.RemovedNodes = append(cluster.RemovedNodes, removedNode) if err := store.UpdateCluster(tx, cluster); err != nil { return err } return store.DeleteNode(tx, request.NodeID) }) if err != nil { return nil, err } return &api.RemoveNodeResponse{}, nil }