func createNode(s *store.MemoryStore, nodeID, role string, csr, cert []byte) error { apiRole, _ := ca.FormatRole(role) err := s.Update(func(tx store.Tx) error { node := &api.Node{ ID: nodeID, Certificate: api.Certificate{ CSR: csr, CN: nodeID, Role: apiRole, Status: api.IssuanceStatus{ State: api.IssuanceStateIssued, }, Certificate: cert, }, Spec: api.NodeSpec{ Role: apiRole, Membership: api.NodeMembershipAccepted, }, } return store.CreateNode(tx, node) }) return err }
func TestSchedulerNoReadyNodes(t *testing.T) { ctx := context.Background() initialTask := &api.Task{ ID: "id1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial task assert.NoError(t, store.CreateTask(tx, initialTask)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() failure := watchAssignmentFailure(t, watch) assert.Equal(t, "no suitable node", failure.Status.Message) err = s.Update(func(tx store.Tx) error { // Create a ready node. The task should get assigned to this // node. node := &api.Node{ ID: "newnode", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "newnode", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, node)) return nil }) assert.NoError(t, err) assignment := watchAssignment(t, watch) assert.Equal(t, "newnode", assignment.NodeID) }
func createNode(t *testing.T, ts *testServer, id string, role api.NodeRole, membership api.NodeSpec_Membership) *api.Node { node := &api.Node{ ID: id, Spec: api.NodeSpec{ Role: role, Membership: membership, }, } err := ts.Store.Update(func(tx store.Tx) error { return store.CreateNode(tx, node) }) assert.NoError(t, err) return node }
func TestSchedulerPluginConstraint(t *testing.T) { ctx := context.Background() // Node1: vol plugin1 n1 := &api.Node{ ID: "node1_ID", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node1", }, }, Description: &api.NodeDescription{ Engine: &api.EngineDescription{ Plugins: []api.PluginDescription{ { Type: "Volume", Name: "plugin1", }, }, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } // Node2: vol plugin1, vol plugin2 n2 := &api.Node{ ID: "node2_ID", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node2", }, }, Description: &api.NodeDescription{ Engine: &api.EngineDescription{ Plugins: []api.PluginDescription{ { Type: "Volume", Name: "plugin1", }, { Type: "Volume", Name: "plugin2", }, }, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } // Node3: vol plugin1, network plugin1 n3 := &api.Node{ ID: "node3_ID", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node3", }, }, Description: &api.NodeDescription{ Engine: &api.EngineDescription{ Plugins: []api.PluginDescription{ { Type: "Volume", Name: "plugin1", }, { Type: "Network", Name: "plugin1", }, }, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } volumeOptionsDriver := func(driver string) *api.Mount_VolumeOptions { return &api.Mount_VolumeOptions{ DriverConfig: &api.Driver{ Name: driver, }, } } // Task1: vol plugin1 t1 := &api.Task{ ID: "task1_ID", DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Mounts: []api.Mount{ { Source: "testVol1", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin1"), }, }, }, }, }, ServiceAnnotations: api.Annotations{ Name: "task1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } // Task2: vol plugin1, vol plugin2 t2 := &api.Task{ ID: "task2_ID", DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Mounts: []api.Mount{ { Source: "testVol1", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin1"), }, { Source: "testVol2", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin2"), }, }, }, }, }, ServiceAnnotations: api.Annotations{ Name: "task2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } // Task3: vol plugin1, network plugin1 t3 := &api.Task{ ID: "task3_ID", DesiredState: api.TaskStateRunning, Networks: []*api.NetworkAttachment{ { Network: &api.Network{ ID: "testNwID1", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "testVol1", }, }, DriverState: &api.Driver{ Name: "plugin1", }, }, }, }, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Mounts: []api.Mount{ { Source: "testVol1", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin1"), }, }, }, }, }, ServiceAnnotations: api.Annotations{ Name: "task2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() // Add initial node and task err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, t1)) assert.NoError(t, store.CreateNode(tx, n1)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() // t1 should get assigned assignment := watchAssignment(t, watch) assert.Equal(t, assignment.NodeID, "node1_ID") // Create t2; it should stay in the pending state because there is // no node that with volume plugin `plugin2` err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, t2)) return nil }) assert.NoError(t, err) time.Sleep(100 * time.Millisecond) s.View(func(tx store.ReadTx) { task := store.GetTask(tx, "task2_ID") if task.Status.State >= api.TaskStateAssigned { t.Fatalf("task 'task2_ID' should not have been assigned to node %v", task.NodeID) } }) // Now add the second node err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, n2)) return nil }) assert.NoError(t, err) // Check that t2 has been assigned assignment1 := watchAssignment(t, watch) assert.Equal(t, assignment1.ID, "task2_ID") assert.Equal(t, assignment1.NodeID, "node2_ID") // Create t3; it should stay in the pending state because there is // no node that with network plugin `plugin1` err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, t3)) return nil }) assert.NoError(t, err) time.Sleep(100 * time.Millisecond) s.View(func(tx store.ReadTx) { task := store.GetTask(tx, "task3_ID") if task.Status.State >= api.TaskStateAssigned { t.Fatal("task 'task3_ID' should not have been assigned") } }) // Now add the node3 err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, n3)) return nil }) assert.NoError(t, err) // Check that t3 has been assigned assignment2 := watchAssignment(t, watch) assert.Equal(t, assignment2.ID, "task3_ID") assert.Equal(t, assignment2.NodeID, "node3_ID") }
func TestPreassignedTasks(t *testing.T) { ctx := context.Background() initialNodeSet := []*api.Node{ { ID: "node1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "node2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } initialTaskSet := []*api.Task{ { ID: "task1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, }, { ID: "task2", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, NodeID: initialNodeSet[0].ID, }, { ID: "task3", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, NodeID: initialNodeSet[0].ID, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Prepoulate nodes for _, n := range initialNodeSet { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks for _, task := range initialTaskSet { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() //preassigned tasks would be processed first assignment1 := watchAssignment(t, watch) // task2 and task3 are preassigned to node1 assert.Equal(t, assignment1.NodeID, "node1") assert.Regexp(t, assignment1.ID, "(task2|task3)") assignment2 := watchAssignment(t, watch) if assignment1.ID == "task2" { assert.Equal(t, "task3", assignment2.ID) } else { assert.Equal(t, "task2", assignment2.ID) } // task1 would be assigned to node2 because node1 has 2 tasks already assignment3 := watchAssignment(t, watch) assert.Equal(t, assignment3.ID, "task1") assert.Equal(t, assignment3.NodeID, "node2") }
func TestSchedulerResourceConstraint(t *testing.T) { ctx := context.Background() // Create a ready node without enough memory to run the task. underprovisionedNode := &api.Node{ ID: "underprovisioned", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "underprovisioned", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 1e9, MemoryBytes: 1e9, }, }, } initialTask := &api.Task{ ID: "id1", Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Resources: &api.ResourceRequirements{ Reservations: &api.Resources{ MemoryBytes: 2e9, }, }, }, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStateAllocated, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) err := s.Update(func(tx store.Tx) error { // Add initial node and task assert.NoError(t, store.CreateTask(tx, initialTask)) assert.NoError(t, store.CreateNode(tx, underprovisionedNode)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() err = s.Update(func(tx store.Tx) error { // Create a node with enough memory. The task should get // assigned to this node. node := &api.Node{ ID: "bignode", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "bignode", }, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 4e9, MemoryBytes: 8e9, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, node)) return nil }) assert.NoError(t, err) assignment := watchAssignment(t, watch) assert.Equal(t, "bignode", assignment.NodeID) }
func TestDrain(t *testing.T) { ctx := context.Background() initialService := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Restart: &api.RestartPolicy{ Condition: api.RestartOnNone, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 6, }, }, }, } initialNodeSet := []*api.Node{ { ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name1", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_DOWN, }, }, // We should NOT kick out tasks on UNKNOWN nodes. { ID: "id3", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name3", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_UNKNOWN, }, }, { ID: "id4", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name4", }, Availability: api.NodeAvailabilityPause, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id5", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name5", }, Availability: api.NodeAvailabilityDrain, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } initialTaskSet := []*api.Task{ // Task not assigned to any node { ID: "id0", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 1, ServiceAnnotations: api.Annotations{ Name: "name0", }, ServiceID: "id1", }, // Tasks assigned to the nodes defined above { ID: "id1", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 2, ServiceAnnotations: api.Annotations{ Name: "name1", }, ServiceID: "id1", NodeID: "id1", }, { ID: "id2", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 3, ServiceAnnotations: api.Annotations{ Name: "name2", }, ServiceID: "id1", NodeID: "id2", }, { ID: "id3", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 4, ServiceAnnotations: api.Annotations{ Name: "name3", }, ServiceID: "id1", NodeID: "id3", }, { ID: "id4", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 5, ServiceAnnotations: api.Annotations{ Name: "name4", }, ServiceID: "id1", NodeID: "id4", }, { ID: "id5", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 6, ServiceAnnotations: api.Annotations{ Name: "name5", }, ServiceID: "id1", NodeID: "id5", }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Prepopulate service assert.NoError(t, store.CreateService(tx, initialService)) // Prepoulate nodes for _, n := range initialNodeSet { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks for _, task := range initialTaskSet { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() go func() { assert.NoError(t, orchestrator.Run(ctx)) }() // id2 and id5 should be killed immediately deletion1 := watchShutdownTask(t, watch) deletion2 := watchShutdownTask(t, watch) assert.Regexp(t, "id(2|5)", deletion1.ID) assert.Regexp(t, "id(2|5)", deletion1.NodeID) assert.Regexp(t, "id(2|5)", deletion2.ID) assert.Regexp(t, "id(2|5)", deletion2.NodeID) // Create a new task, assigned to node id2 err = s.Update(func(tx store.Tx) error { task := initialTaskSet[2].Copy() task.ID = "newtask" task.NodeID = "id2" assert.NoError(t, store.CreateTask(tx, task)) return nil }) assert.NoError(t, err) deletion3 := watchShutdownTask(t, watch) assert.Equal(t, "newtask", deletion3.ID) assert.Equal(t, "id2", deletion3.NodeID) // Set node id4 to the DRAINED state err = s.Update(func(tx store.Tx) error { n := initialNodeSet[3].Copy() n.Spec.Availability = api.NodeAvailabilityDrain assert.NoError(t, store.UpdateNode(tx, n)) return nil }) assert.NoError(t, err) deletion4 := watchShutdownTask(t, watch) assert.Equal(t, "id4", deletion4.ID) assert.Equal(t, "id4", deletion4.NodeID) // Delete node id1 err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteNode(tx, "id1")) return nil }) assert.NoError(t, err) deletion5 := watchShutdownTask(t, watch) assert.Equal(t, "id1", deletion5.ID) assert.Equal(t, "id1", deletion5.NodeID) }
// becomeLeader starts the subsystems that are run on the leader. func (m *Manager) becomeLeader(ctx context.Context) { s := m.RaftNode.MemoryStore() rootCA := m.config.SecurityConfig.RootCA() nodeID := m.config.SecurityConfig.ClientTLSCreds.NodeID() raftCfg := raft.DefaultRaftConfig() raftCfg.ElectionTick = uint32(m.RaftNode.Config.ElectionTick) raftCfg.HeartbeatTick = uint32(m.RaftNode.Config.HeartbeatTick) clusterID := m.config.SecurityConfig.ClientTLSCreds.Organization() initialCAConfig := ca.DefaultCAConfig() initialCAConfig.ExternalCAs = m.config.ExternalCAs s.Update(func(tx store.Tx) error { // Add a default cluster object to the // store. Don't check the error because // we expect this to fail unless this // is a brand new cluster. store.CreateCluster(tx, defaultClusterObject(clusterID, initialCAConfig, raftCfg, rootCA)) // Add Node entry for ourself, if one // doesn't exist already. store.CreateNode(tx, managerNode(nodeID)) return nil }) // Attempt to rotate the key-encrypting-key of the root CA key-material err := m.rotateRootCAKEK(ctx, clusterID) if err != nil { log.G(ctx).WithError(err).Error("root key-encrypting-key rotation failed") } m.replicatedOrchestrator = orchestrator.NewReplicatedOrchestrator(s) m.globalOrchestrator = orchestrator.NewGlobalOrchestrator(s) m.taskReaper = orchestrator.NewTaskReaper(s) m.scheduler = scheduler.New(s) m.keyManager = keymanager.New(s, keymanager.DefaultConfig()) // TODO(stevvooe): Allocate a context that can be used to // shutdown underlying manager processes when leadership is // lost. m.allocator, err = allocator.New(s) if err != nil { log.G(ctx).WithError(err).Error("failed to create allocator") // TODO(stevvooe): It doesn't seem correct here to fail // creating the allocator but then use it anyway. } if m.keyManager != nil { go func(keyManager *keymanager.KeyManager) { if err := keyManager.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("keymanager failed with an error") } }(m.keyManager) } go func(d *dispatcher.Dispatcher) { if err := d.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("Dispatcher exited with an error") } }(m.Dispatcher) go func(server *ca.Server) { if err := server.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("CA signer exited with an error") } }(m.caserver) // Start all sub-components in separate goroutines. // TODO(aluzzardi): This should have some kind of error handling so that // any component that goes down would bring the entire manager down. if m.allocator != nil { go func(allocator *allocator.Allocator) { if err := allocator.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("allocator exited with an error") } }(m.allocator) } go func(scheduler *scheduler.Scheduler) { if err := scheduler.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("scheduler exited with an error") } }(m.scheduler) go func(taskReaper *orchestrator.TaskReaper) { taskReaper.Run() }(m.taskReaper) go func(orchestrator *orchestrator.ReplicatedOrchestrator) { if err := orchestrator.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("replicated orchestrator exited with an error") } }(m.replicatedOrchestrator) go func(globalOrchestrator *orchestrator.GlobalOrchestrator) { if err := globalOrchestrator.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("global orchestrator exited with an error") } }(m.globalOrchestrator) }
func TestUpdateNodeDemote(t *testing.T) { tc := cautils.NewTestCA(nil, cautils.AcceptancePolicy(true, true, "")) ts := newTestServer(t) nodes, clockSource := raftutils.NewRaftCluster(t, tc) defer raftutils.TeardownCluster(t, nodes) // Assign one of the raft node to the test server ts.Server.raft = nodes[1].Node ts.Server.store = nodes[1].MemoryStore() // Create a node object for each of the managers assert.NoError(t, nodes[1].MemoryStore().Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, &api.Node{ ID: nodes[1].SecurityConfig.ClientTLSCreds.NodeID(), Spec: api.NodeSpec{ Role: api.NodeRoleManager, Membership: api.NodeMembershipAccepted, }, })) assert.NoError(t, store.CreateNode(tx, &api.Node{ ID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID(), Spec: api.NodeSpec{ Role: api.NodeRoleManager, Membership: api.NodeMembershipAccepted, }, })) assert.NoError(t, store.CreateNode(tx, &api.Node{ ID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID(), Spec: api.NodeSpec{ Role: api.NodeRoleManager, Membership: api.NodeMembershipAccepted, }, })) return nil })) // Stop Node 3 (1 node out of 3) nodes[3].Server.Stop() nodes[3].Shutdown() // Node 3 should be listed as Unreachable assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := nodes[1].GetMemberlist() if len(members) != 3 { return fmt.Errorf("expected 3 nodes, got %d", len(members)) } if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE { return fmt.Errorf("expected node 3 to be unreachable") } return nil })) // Try to demote Node 2, this should fail because of the quorum safeguard r, err := ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) spec := r.Node.Spec.Copy() spec.Role = api.NodeRoleWorker version := &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID(), Spec: spec, NodeVersion: version, }) assert.Error(t, err) assert.Equal(t, codes.FailedPrecondition, grpc.Code(err)) // Restart Node 3 nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false) raftutils.WaitForCluster(t, clockSource, nodes) // Node 3 should be listed as Reachable assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := nodes[1].GetMemberlist() if len(members) != 3 { return fmt.Errorf("expected 3 nodes, got %d", len(members)) } if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_UNREACHABLE { return fmt.Errorf("expected node 3 to be reachable") } return nil })) // Try to demote Node 3, this should succeed r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) spec = r.Node.Spec.Copy() spec.Role = api.NodeRoleWorker version = &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID(), Spec: spec, NodeVersion: version, }) assert.NoError(t, err) newCluster := map[uint64]*raftutils.TestNode{ 1: nodes[1], 2: nodes[2], } raftutils.WaitForCluster(t, clockSource, newCluster) // Server should list 2 members assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := nodes[1].GetMemberlist() if len(members) != 2 { return fmt.Errorf("expected 2 nodes, got %d", len(members)) } return nil })) // Try to demote Node 2 r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) spec = r.Node.Spec.Copy() spec.Role = api.NodeRoleWorker version = &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID(), Spec: spec, NodeVersion: version, }) assert.NoError(t, err) newCluster = map[uint64]*raftutils.TestNode{ 1: nodes[1], } raftutils.WaitForCluster(t, clockSource, newCluster) // New server should list 1 member assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := nodes[1].GetMemberlist() if len(members) != 1 { return fmt.Errorf("expected 1 node, got %d", len(members)) } return nil })) // Make sure we can't demote the last manager. r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodes[1].SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) spec = r.Node.Spec.Copy() spec.Role = api.NodeRoleWorker version = &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: nodes[1].SecurityConfig.ClientTLSCreds.NodeID(), Spec: spec, NodeVersion: version, }) assert.Error(t, err) assert.Equal(t, codes.FailedPrecondition, grpc.Code(err)) }
func TestHA(t *testing.T) { ctx := context.Background() initialNodeSet := []*api.Node{ { ID: "id1", Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id2", Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id3", Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id4", Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id5", Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } taskTemplate1 := &api.Task{ DesiredState: api.TaskStateRunning, ServiceID: "service1", Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:1", }, }, }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } taskTemplate2 := &api.Task{ DesiredState: api.TaskStateRunning, ServiceID: "service2", Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:2", }, }, }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() t1Instances := 18 err := s.Update(func(tx store.Tx) error { // Prepoulate nodes for _, n := range initialNodeSet { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks from template 1 for i := 0; i != t1Instances; i++ { taskTemplate1.ID = fmt.Sprintf("t1id%d", i) assert.NoError(t, store.CreateTask(tx, taskTemplate1)) } return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() t1Assignments := make(map[string]int) for i := 0; i != t1Instances; i++ { assignment := watchAssignment(t, watch) if !strings.HasPrefix(assignment.ID, "t1") { t.Fatal("got assignment for different kind of task") } t1Assignments[assignment.NodeID]++ } assert.Len(t, t1Assignments, 5) nodesWith3T1Tasks := 0 nodesWith4T1Tasks := 0 for nodeID, taskCount := range t1Assignments { if taskCount == 3 { nodesWith3T1Tasks++ } else if taskCount == 4 { nodesWith4T1Tasks++ } else { t.Fatalf("unexpected number of tasks %d on node %s", taskCount, nodeID) } } assert.Equal(t, 3, nodesWith4T1Tasks) assert.Equal(t, 2, nodesWith3T1Tasks) t2Instances := 2 // Add a new service with two instances. They should fill the nodes // that only have two tasks. err = s.Update(func(tx store.Tx) error { for i := 0; i != t2Instances; i++ { taskTemplate2.ID = fmt.Sprintf("t2id%d", i) assert.NoError(t, store.CreateTask(tx, taskTemplate2)) } return nil }) assert.NoError(t, err) t2Assignments := make(map[string]int) for i := 0; i != t2Instances; i++ { assignment := watchAssignment(t, watch) if !strings.HasPrefix(assignment.ID, "t2") { t.Fatal("got assignment for different kind of task") } t2Assignments[assignment.NodeID]++ } assert.Len(t, t2Assignments, 2) for nodeID := range t2Assignments { assert.Equal(t, 3, t1Assignments[nodeID]) } // Scale up service 1 to 21 tasks. It should cover the two nodes that // service 2 was assigned to, and also one other node. err = s.Update(func(tx store.Tx) error { for i := t1Instances; i != t1Instances+3; i++ { taskTemplate1.ID = fmt.Sprintf("t1id%d", i) assert.NoError(t, store.CreateTask(tx, taskTemplate1)) } return nil }) assert.NoError(t, err) var sharedNodes [2]string for i := 0; i != 3; i++ { assignment := watchAssignment(t, watch) if !strings.HasPrefix(assignment.ID, "t1") { t.Fatal("got assignment for different kind of task") } if t1Assignments[assignment.NodeID] == 5 { t.Fatal("more than one new task assigned to the same node") } t1Assignments[assignment.NodeID]++ if t2Assignments[assignment.NodeID] != 0 { if sharedNodes[0] == "" { sharedNodes[0] = assignment.NodeID } else if sharedNodes[1] == "" { sharedNodes[1] = assignment.NodeID } else { t.Fatal("all three assignments went to nodes with service2 tasks") } } } assert.NotEmpty(t, sharedNodes[0]) assert.NotEmpty(t, sharedNodes[1]) assert.NotEqual(t, sharedNodes[0], sharedNodes[1]) nodesWith4T1Tasks = 0 nodesWith5T1Tasks := 0 for nodeID, taskCount := range t1Assignments { if taskCount == 4 { nodesWith4T1Tasks++ } else if taskCount == 5 { nodesWith5T1Tasks++ } else { t.Fatalf("unexpected number of tasks %d on node %s", taskCount, nodeID) } } assert.Equal(t, 4, nodesWith4T1Tasks) assert.Equal(t, 1, nodesWith5T1Tasks) // Add another task from service2. It must not land on the node that // has 5 service1 tasks. err = s.Update(func(tx store.Tx) error { taskTemplate2.ID = "t2id4" assert.NoError(t, store.CreateTask(tx, taskTemplate2)) return nil }) assert.NoError(t, err) assignment := watchAssignment(t, watch) if assignment.ID != "t2id4" { t.Fatal("got assignment for different task") } if t2Assignments[assignment.NodeID] != 0 { t.Fatal("was scheduled on a node that already has a service2 task") } if t1Assignments[assignment.NodeID] == 5 { t.Fatal("was scheduled on the node that has the most service1 tasks") } t2Assignments[assignment.NodeID]++ // Remove all tasks on node id1. err = s.Update(func(tx store.Tx) error { tasks, err := store.FindTasks(tx, store.ByNodeID("id1")) assert.NoError(t, err) for _, task := range tasks { assert.NoError(t, store.DeleteTask(tx, task.ID)) } return nil }) assert.NoError(t, err) t1Assignments["id1"] = 0 t2Assignments["id1"] = 0 // Add four instances of service1 and two instances of service2. // All instances of service1 should land on node "id1", and one // of the two service2 instances should as well. // Put these in a map to randomize the order in which they are // created. err = s.Update(func(tx store.Tx) error { tasksMap := make(map[string]*api.Task) for i := 22; i <= 25; i++ { taskTemplate1.ID = fmt.Sprintf("t1id%d", i) tasksMap[taskTemplate1.ID] = taskTemplate1.Copy() } for i := 5; i <= 6; i++ { taskTemplate2.ID = fmt.Sprintf("t2id%d", i) tasksMap[taskTemplate2.ID] = taskTemplate2.Copy() } for _, task := range tasksMap { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) for i := 0; i != 4+2; i++ { assignment := watchAssignment(t, watch) if strings.HasPrefix(assignment.ID, "t1") { t1Assignments[assignment.NodeID]++ } else if strings.HasPrefix(assignment.ID, "t2") { t2Assignments[assignment.NodeID]++ } } assert.Equal(t, 4, t1Assignments["id1"]) assert.Equal(t, 1, t2Assignments["id1"]) }
// IssueNodeCertificate is responsible for gatekeeping both certificate requests from new nodes in the swarm, // and authorizing certificate renewals. // If a node presented a valid certificate, the corresponding certificate is set in a RENEW state. // If a node failed to present a valid certificate, we check for a valid join token and set the // role accordingly. A new random node ID is generated, and the corresponding node entry is created. // IssueNodeCertificate is the only place where new node entries to raft should be created. func (s *Server) IssueNodeCertificate(ctx context.Context, request *api.IssueNodeCertificateRequest) (*api.IssueNodeCertificateResponse, error) { // First, let's see if the remote node is presenting a non-empty CSR if len(request.CSR) == 0 { return nil, grpc.Errorf(codes.InvalidArgument, codes.InvalidArgument.String()) } if _, err := s.isRunningLocked(); err != nil { return nil, err } var ( blacklistedCerts map[string]*api.BlacklistedCertificate clusters []*api.Cluster err error ) s.store.View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName("default")) }) // Not having a cluster object yet means we can't check // the blacklist. if err == nil && len(clusters) == 1 { blacklistedCerts = clusters[0].BlacklistedCertificates } // Renewing the cert with a local (unix socket) is always valid. localNodeInfo := ctx.Value(LocalRequestKey) if localNodeInfo != nil { nodeInfo, ok := localNodeInfo.(RemoteNodeInfo) if ok && nodeInfo.NodeID != "" { return s.issueRenewCertificate(ctx, nodeInfo.NodeID, request.CSR) } } // If the remote node is a worker (either forwarded by a manager, or calling directly), // issue a renew worker certificate entry with the correct ID nodeID, err := AuthorizeForwardedRoleAndOrg(ctx, []string{WorkerRole}, []string{ManagerRole}, s.securityConfig.ClientTLSCreds.Organization(), blacklistedCerts) if err == nil { return s.issueRenewCertificate(ctx, nodeID, request.CSR) } // If the remote node is a manager (either forwarded by another manager, or calling directly), // issue a renew certificate entry with the correct ID nodeID, err = AuthorizeForwardedRoleAndOrg(ctx, []string{ManagerRole}, []string{ManagerRole}, s.securityConfig.ClientTLSCreds.Organization(), blacklistedCerts) if err == nil { return s.issueRenewCertificate(ctx, nodeID, request.CSR) } // The remote node didn't successfully present a valid MTLS certificate, let's issue a // certificate with a new random ID role := api.NodeRole(-1) s.mu.Lock() if subtle.ConstantTimeCompare([]byte(s.joinTokens.Manager), []byte(request.Token)) == 1 { role = api.NodeRoleManager } else if subtle.ConstantTimeCompare([]byte(s.joinTokens.Worker), []byte(request.Token)) == 1 { role = api.NodeRoleWorker } s.mu.Unlock() if role < 0 { return nil, grpc.Errorf(codes.InvalidArgument, "A valid join token is necessary to join this cluster") } // Max number of collisions of ID or CN to tolerate before giving up maxRetries := 3 // Generate a random ID for this new node for i := 0; ; i++ { nodeID = identity.NewID() // Create a new node err := s.store.Update(func(tx store.Tx) error { node := &api.Node{ Role: role, ID: nodeID, Certificate: api.Certificate{ CSR: request.CSR, CN: nodeID, Role: role, Status: api.IssuanceStatus{ State: api.IssuanceStatePending, }, }, Spec: api.NodeSpec{ DesiredRole: role, Membership: api.NodeMembershipAccepted, Availability: request.Availability, }, } return store.CreateNode(tx, node) }) if err == nil { log.G(ctx).WithFields(logrus.Fields{ "node.id": nodeID, "node.role": role, "method": "IssueNodeCertificate", }).Debugf("new certificate entry added") break } if err != store.ErrExist { return nil, err } if i == maxRetries { return nil, err } log.G(ctx).WithFields(logrus.Fields{ "node.id": nodeID, "node.role": role, "method": "IssueNodeCertificate", }).Errorf("randomly generated node ID collided with an existing one - retrying") } return &api.IssueNodeCertificateResponse{ NodeID: nodeID, NodeMembership: api.NodeMembershipAccepted, }, nil }
func testUpdateNodeDemote(leader bool, t *testing.T) { tc := cautils.NewTestCA(nil) defer tc.Stop() ts := newTestServer(t) defer ts.Stop() nodes, clockSource := raftutils.NewRaftCluster(t, tc) defer raftutils.TeardownCluster(t, nodes) // Assign one of the raft node to the test server ts.Server.raft = nodes[1].Node ts.Server.store = nodes[1].MemoryStore() // Create a node object for each of the managers assert.NoError(t, nodes[1].MemoryStore().Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, &api.Node{ ID: nodes[1].SecurityConfig.ClientTLSCreds.NodeID(), Spec: api.NodeSpec{ Role: api.NodeRoleManager, Membership: api.NodeMembershipAccepted, }, })) assert.NoError(t, store.CreateNode(tx, &api.Node{ ID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID(), Spec: api.NodeSpec{ Role: api.NodeRoleManager, Membership: api.NodeMembershipAccepted, }, })) assert.NoError(t, store.CreateNode(tx, &api.Node{ ID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID(), Spec: api.NodeSpec{ Role: api.NodeRoleManager, Membership: api.NodeMembershipAccepted, }, })) return nil })) // Stop Node 3 (1 node out of 3) nodes[3].Server.Stop() nodes[3].ShutdownRaft() // Node 3 should be listed as Unreachable assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := nodes[1].GetMemberlist() if len(members) != 3 { return fmt.Errorf("expected 3 nodes, got %d", len(members)) } if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE { return fmt.Errorf("expected node 3 to be unreachable") } return nil })) // Try to demote Node 2, this should fail because of the quorum safeguard r, err := ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) spec := r.Node.Spec.Copy() spec.Role = api.NodeRoleWorker version := &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID(), Spec: spec, NodeVersion: version, }) assert.Error(t, err) assert.Equal(t, codes.FailedPrecondition, grpc.Code(err)) // Restart Node 3 nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false) raftutils.WaitForCluster(t, clockSource, nodes) // Node 3 should be listed as Reachable assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := nodes[1].GetMemberlist() if len(members) != 3 { return fmt.Errorf("expected 3 nodes, got %d", len(members)) } if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_UNREACHABLE { return fmt.Errorf("expected node 3 to be reachable") } return nil })) // Try to demote Node 3, this should succeed r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) spec = r.Node.Spec.Copy() spec.Role = api.NodeRoleWorker version = &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID(), Spec: spec, NodeVersion: version, }) assert.NoError(t, err) newCluster := map[uint64]*raftutils.TestNode{ 1: nodes[1], 2: nodes[2], } raftutils.WaitForCluster(t, clockSource, newCluster) // Server should list 2 members assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := nodes[1].GetMemberlist() if len(members) != 2 { return fmt.Errorf("expected 2 nodes, got %d", len(members)) } return nil })) var demoteNode, lastNode *raftutils.TestNode if leader { demoteNode = nodes[1] lastNode = nodes[2] } else { demoteNode = nodes[2] lastNode = nodes[1] } // Try to demote a Node and scale down to 1 r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: demoteNode.SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) spec = r.Node.Spec.Copy() spec.Role = api.NodeRoleWorker version = &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: demoteNode.SecurityConfig.ClientTLSCreds.NodeID(), Spec: spec, NodeVersion: version, }) assert.NoError(t, err) // Update the server ts.Server.raft = lastNode.Node ts.Server.store = lastNode.MemoryStore() newCluster = map[uint64]*raftutils.TestNode{ 1: lastNode, } raftutils.WaitForCluster(t, clockSource, newCluster) assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := lastNode.GetMemberlist() if len(members) != 1 { return fmt.Errorf("expected 1 node, got %d", len(members)) } return nil })) // Make sure we can't demote the last manager. r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: lastNode.SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) spec = r.Node.Spec.Copy() spec.Role = api.NodeRoleWorker version = &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: lastNode.SecurityConfig.ClientTLSCreds.NodeID(), Spec: spec, NodeVersion: version, }) assert.Error(t, err) assert.Equal(t, codes.FailedPrecondition, grpc.Code(err)) // Propose a change in the spec and check if the remaining node can still process updates r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: lastNode.SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) spec = r.Node.Spec.Copy() spec.Availability = api.NodeAvailabilityDrain version = &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: lastNode.SecurityConfig.ClientTLSCreds.NodeID(), Spec: spec, NodeVersion: version, }) assert.NoError(t, err) // Get node information and check that the availability is set to drain r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: lastNode.SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) assert.Equal(t, r.Node.Spec.Availability, api.NodeAvailabilityDrain) }
func TestUpdateNode(t *testing.T) { tc := cautils.NewTestCA(nil) defer tc.Stop() ts := newTestServer(t) defer ts.Stop() nodes := make(map[uint64]*raftutils.TestNode) nodes[1], _ = raftutils.NewInitNode(t, tc, nil) defer raftutils.TeardownCluster(t, nodes) nodeID := nodes[1].SecurityConfig.ClientTLSCreds.NodeID() // Assign one of the raft node to the test server ts.Server.raft = nodes[1].Node ts.Server.store = nodes[1].MemoryStore() _, err := ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: nodeID, Spec: &api.NodeSpec{ Availability: api.NodeAvailabilityDrain, }, NodeVersion: &api.Version{}, }) assert.Error(t, err) assert.Equal(t, codes.NotFound, grpc.Code(err)) // Create a node object for the manager assert.NoError(t, nodes[1].MemoryStore().Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, &api.Node{ ID: nodes[1].SecurityConfig.ClientTLSCreds.NodeID(), Spec: api.NodeSpec{ Role: api.NodeRoleManager, Membership: api.NodeMembershipAccepted, }, })) return nil })) _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{}) assert.Error(t, err) assert.Equal(t, codes.InvalidArgument, grpc.Code(err)) _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{NodeID: "invalid", Spec: &api.NodeSpec{}, NodeVersion: &api.Version{}}) assert.Error(t, err) assert.Equal(t, codes.NotFound, grpc.Code(err)) r, err := ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodeID}) assert.NoError(t, err) if !assert.NotNil(t, r) { assert.FailNow(t, "got unexpected nil response from GetNode") } assert.NotNil(t, r.Node) _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{NodeID: nodeID}) assert.Error(t, err) assert.Equal(t, codes.InvalidArgument, grpc.Code(err)) spec := r.Node.Spec.Copy() spec.Availability = api.NodeAvailabilityDrain _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: nodeID, Spec: spec, }) assert.Error(t, err) assert.Equal(t, codes.InvalidArgument, grpc.Code(err)) _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: nodeID, Spec: spec, NodeVersion: &r.Node.Meta.Version, }) assert.NoError(t, err) r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodeID}) assert.NoError(t, err) if !assert.NotNil(t, r) { assert.FailNow(t, "got unexpected nil response from GetNode") } assert.NotNil(t, r.Node) assert.NotNil(t, r.Node.Spec) assert.Equal(t, api.NodeAvailabilityDrain, r.Node.Spec.Availability) version := &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{NodeID: nodeID, Spec: &r.Node.Spec, NodeVersion: version}) assert.NoError(t, err) // Perform an update with the "old" version. _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{NodeID: nodeID, Spec: &r.Node.Spec, NodeVersion: version}) assert.Error(t, err) }
func TestListManagerNodes(t *testing.T) { t.Parallel() tc := cautils.NewTestCA(nil) defer tc.Stop() ts := newTestServer(t) defer ts.Stop() nodes, clockSource := raftutils.NewRaftCluster(t, tc) defer raftutils.TeardownCluster(t, nodes) // Create a node object for each of the managers assert.NoError(t, nodes[1].MemoryStore().Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, &api.Node{ID: nodes[1].SecurityConfig.ClientTLSCreds.NodeID()})) assert.NoError(t, store.CreateNode(tx, &api.Node{ID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID()})) assert.NoError(t, store.CreateNode(tx, &api.Node{ID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID()})) return nil })) // Assign one of the raft node to the test server ts.Server.raft = nodes[1].Node ts.Server.store = nodes[1].MemoryStore() // There should be 3 reachable managers listed r, err := ts.Client.ListNodes(context.Background(), &api.ListNodesRequest{}) assert.NoError(t, err) assert.NotNil(t, r) managers := getMap(t, r.Nodes) assert.Len(t, ts.Server.raft.GetMemberlist(), 3) assert.Len(t, r.Nodes, 3) // Node 1 should be the leader for i := 1; i <= 3; i++ { if i == 1 { assert.True(t, managers[nodes[uint64(i)].Config.ID].Leader) continue } assert.False(t, managers[nodes[uint64(i)].Config.ID].Leader) } // All nodes should be reachable for i := 1; i <= 3; i++ { assert.Equal(t, api.RaftMemberStatus_REACHABLE, managers[nodes[uint64(i)].Config.ID].Reachability) } // Add two more nodes to the cluster raftutils.AddRaftNode(t, clockSource, nodes, tc) raftutils.AddRaftNode(t, clockSource, nodes, tc) raftutils.WaitForCluster(t, clockSource, nodes) // Add node entries for these assert.NoError(t, nodes[1].MemoryStore().Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, &api.Node{ID: nodes[4].SecurityConfig.ClientTLSCreds.NodeID()})) assert.NoError(t, store.CreateNode(tx, &api.Node{ID: nodes[5].SecurityConfig.ClientTLSCreds.NodeID()})) return nil })) // There should be 5 reachable managers listed r, err = ts.Client.ListNodes(context.Background(), &api.ListNodesRequest{}) assert.NoError(t, err) assert.NotNil(t, r) managers = getMap(t, r.Nodes) assert.Len(t, ts.Server.raft.GetMemberlist(), 5) assert.Len(t, r.Nodes, 5) for i := 1; i <= 5; i++ { assert.Equal(t, api.RaftMemberStatus_REACHABLE, managers[nodes[uint64(i)].Config.ID].Reachability) } // Stops 2 nodes nodes[4].Server.Stop() nodes[4].ShutdownRaft() nodes[5].Server.Stop() nodes[5].ShutdownRaft() // Node 4 and Node 5 should be listed as Unreachable assert.NoError(t, raftutils.PollFunc(clockSource, func() error { r, err = ts.Client.ListNodes(context.Background(), &api.ListNodesRequest{}) if err != nil { return err } managers = getMap(t, r.Nodes) if len(r.Nodes) != 5 { return fmt.Errorf("expected 5 nodes, got %d", len(r.Nodes)) } if managers[nodes[4].Config.ID].Reachability == api.RaftMemberStatus_REACHABLE { return fmt.Errorf("expected node 4 to be unreachable") } if managers[nodes[5].Config.ID].Reachability == api.RaftMemberStatus_REACHABLE { return fmt.Errorf("expected node 5 to be unreachable") } return nil })) // Restart the 2 nodes nodes[4] = raftutils.RestartNode(t, clockSource, nodes[4], false) nodes[5] = raftutils.RestartNode(t, clockSource, nodes[5], false) raftutils.WaitForCluster(t, clockSource, nodes) assert.Len(t, ts.Server.raft.GetMemberlist(), 5) // All the nodes should be reachable again assert.NoError(t, raftutils.PollFunc(clockSource, func() error { r, err = ts.Client.ListNodes(context.Background(), &api.ListNodesRequest{}) if err != nil { return err } managers = getMap(t, r.Nodes) for i := 1; i <= 5; i++ { if managers[nodes[uint64(i)].Config.ID].Reachability != api.RaftMemberStatus_REACHABLE { return fmt.Errorf("node %x is unreachable", nodes[uint64(i)].Config.ID) } } return nil })) // Switch the raft node used by the server ts.Server.raft = nodes[2].Node // Stop node 1 (leader) nodes[1].Server.Stop() nodes[1].ShutdownRaft() newCluster := map[uint64]*raftutils.TestNode{ 2: nodes[2], 3: nodes[3], 4: nodes[4], 5: nodes[5], } // Wait for the re-election to occur raftutils.WaitForCluster(t, clockSource, newCluster) // Node 1 should not be the leader anymore assert.NoError(t, raftutils.PollFunc(clockSource, func() error { r, err = ts.Client.ListNodes(context.Background(), &api.ListNodesRequest{}) if err != nil { return err } managers = getMap(t, r.Nodes) if managers[nodes[1].Config.ID].Leader { return fmt.Errorf("expected node 1 not to be the leader") } if managers[nodes[1].Config.ID].Reachability == api.RaftMemberStatus_REACHABLE { return fmt.Errorf("expected node 1 to be unreachable") } return nil })) // Restart node 1 nodes[1].ShutdownRaft() nodes[1] = raftutils.RestartNode(t, clockSource, nodes[1], false) raftutils.WaitForCluster(t, clockSource, nodes) // Ensure that node 1 is not the leader assert.False(t, managers[nodes[uint64(1)].Config.ID].Leader) // Check that another node got the leader status var leader uint64 leaderCount := 0 for i := 1; i <= 5; i++ { if managers[nodes[uint64(i)].Config.ID].Leader { leader = nodes[uint64(i)].Config.ID leaderCount++ } } // There should be only one leader after node 1 recovery and it // should be different than node 1 assert.Equal(t, 1, leaderCount) assert.NotEqual(t, leader, nodes[1].Config.ID) }
func TestManager(t *testing.T) { ctx := context.Background() temp, err := ioutil.TempFile("", "test-socket") assert.NoError(t, err) assert.NoError(t, temp.Close()) assert.NoError(t, os.Remove(temp.Name())) defer os.RemoveAll(temp.Name()) stateDir, err := ioutil.TempDir("", "test-raft") assert.NoError(t, err) defer os.RemoveAll(stateDir) tc := testutils.NewTestCA(t, func(p ca.CertPaths) *ca.KeyReadWriter { return ca.NewKeyReadWriter(p, []byte("kek"), nil) }) defer tc.Stop() agentSecurityConfig, err := tc.NewNodeConfig(ca.WorkerRole) assert.NoError(t, err) agentDiffOrgSecurityConfig, err := tc.NewNodeConfigOrg(ca.WorkerRole, "another-org") assert.NoError(t, err) managerSecurityConfig, err := tc.NewNodeConfig(ca.ManagerRole) assert.NoError(t, err) m, err := New(&Config{ RemoteAPI: RemoteAddrs{ListenAddr: "127.0.0.1:0"}, ControlAPI: temp.Name(), StateDir: stateDir, SecurityConfig: managerSecurityConfig, AutoLockManagers: true, UnlockKey: []byte("kek"), }) assert.NoError(t, err) assert.NotNil(t, m) tcpAddr := m.Addr() done := make(chan error) defer close(done) go func() { done <- m.Run(ctx) }() opts := []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(agentSecurityConfig.ClientTLSCreds), } conn, err := grpc.Dial(tcpAddr, opts...) assert.NoError(t, err) defer func() { assert.NoError(t, conn.Close()) }() // We have to send a dummy request to verify if the connection is actually up. client := api.NewDispatcherClient(conn) _, err = client.Heartbeat(ctx, &api.HeartbeatRequest{}) assert.Equal(t, dispatcher.ErrNodeNotRegistered.Error(), grpc.ErrorDesc(err)) _, err = client.Session(ctx, &api.SessionRequest{}) assert.NoError(t, err) // Try to have a client in a different org access this manager opts = []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(agentDiffOrgSecurityConfig.ClientTLSCreds), } conn2, err := grpc.Dial(tcpAddr, opts...) assert.NoError(t, err) defer func() { assert.NoError(t, conn2.Close()) }() client = api.NewDispatcherClient(conn2) _, err = client.Heartbeat(context.Background(), &api.HeartbeatRequest{}) assert.Contains(t, grpc.ErrorDesc(err), "Permission denied: unauthorized peer role: rpc error: code = 7 desc = Permission denied: remote certificate not part of organization") // Verify that requests to the various GRPC services running on TCP // are rejected if they don't have certs. opts = []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})), } noCertConn, err := grpc.Dial(tcpAddr, opts...) assert.NoError(t, err) defer func() { assert.NoError(t, noCertConn.Close()) }() client = api.NewDispatcherClient(noCertConn) _, err = client.Heartbeat(context.Background(), &api.HeartbeatRequest{}) assert.EqualError(t, err, "rpc error: code = 7 desc = Permission denied: unauthorized peer role: rpc error: code = 7 desc = no client certificates in request") controlClient := api.NewControlClient(noCertConn) _, err = controlClient.ListNodes(context.Background(), &api.ListNodesRequest{}) assert.EqualError(t, err, "rpc error: code = 7 desc = Permission denied: unauthorized peer role: rpc error: code = 7 desc = no client certificates in request") raftClient := api.NewRaftMembershipClient(noCertConn) _, err = raftClient.Join(context.Background(), &api.JoinRequest{}) assert.EqualError(t, err, "rpc error: code = 7 desc = Permission denied: unauthorized peer role: rpc error: code = 7 desc = no client certificates in request") opts = []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(managerSecurityConfig.ClientTLSCreds), } controlConn, err := grpc.Dial(tcpAddr, opts...) assert.NoError(t, err) defer func() { assert.NoError(t, controlConn.Close()) }() // check that the kek is added to the config var cluster api.Cluster m.raftNode.MemoryStore().View(func(tx store.ReadTx) { clusters, err := store.FindClusters(tx, store.All) require.NoError(t, err) require.Len(t, clusters, 1) cluster = *clusters[0] }) require.NotNil(t, cluster) require.Len(t, cluster.UnlockKeys, 1) require.Equal(t, &api.EncryptionKey{ Subsystem: ca.ManagerRole, Key: []byte("kek"), }, cluster.UnlockKeys[0]) // Test removal of the agent node agentID := agentSecurityConfig.ClientTLSCreds.NodeID() assert.NoError(t, m.raftNode.MemoryStore().Update(func(tx store.Tx) error { return store.CreateNode(tx, &api.Node{ ID: agentID, Certificate: api.Certificate{ Role: api.NodeRoleWorker, CN: agentID, }, }, ) })) controlClient = api.NewControlClient(controlConn) _, err = controlClient.RemoveNode(context.Background(), &api.RemoveNodeRequest{ NodeID: agentID, Force: true, }, ) assert.NoError(t, err) client = api.NewDispatcherClient(conn) _, err = client.Heartbeat(context.Background(), &api.HeartbeatRequest{}) assert.Contains(t, grpc.ErrorDesc(err), "removed from swarm") m.Stop(ctx) // After stopping we should MAY receive an error from ListenAndServe if // all this happened before WaitForLeader completed, so don't check the // error. <-done }
func TestReplicatedScaleDown(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() s1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 6, }, }, }, } err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, s1)) nodes := []*api.Node{ { ID: "node1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name1", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "node2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "node3", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name3", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } for _, node := range nodes { assert.NoError(t, store.CreateNode(tx, node)) } // task1 is assigned to node1 // task2 - task3 are assigned to node2 // task4 - task6 are assigned to node3 // task7 is unassigned tasks := []*api.Task{ { ID: "task1", Slot: 1, DesiredState: api.TaskStateRunning, Status: api.TaskStatus{ State: api.TaskStateStarting, }, ServiceAnnotations: api.Annotations{ Name: "task1", }, ServiceID: "id1", NodeID: "node1", }, { ID: "task2", Slot: 2, DesiredState: api.TaskStateRunning, Status: api.TaskStatus{ State: api.TaskStateRunning, }, ServiceAnnotations: api.Annotations{ Name: "task2", }, ServiceID: "id1", NodeID: "node2", }, { ID: "task3", Slot: 3, DesiredState: api.TaskStateRunning, Status: api.TaskStatus{ State: api.TaskStateRunning, }, ServiceAnnotations: api.Annotations{ Name: "task3", }, ServiceID: "id1", NodeID: "node2", }, { ID: "task4", Slot: 4, DesiredState: api.TaskStateRunning, Status: api.TaskStatus{ State: api.TaskStateRunning, }, ServiceAnnotations: api.Annotations{ Name: "task4", }, ServiceID: "id1", NodeID: "node3", }, { ID: "task5", Slot: 5, DesiredState: api.TaskStateRunning, Status: api.TaskStatus{ State: api.TaskStateRunning, }, ServiceAnnotations: api.Annotations{ Name: "task5", }, ServiceID: "id1", NodeID: "node3", }, { ID: "task6", Slot: 6, DesiredState: api.TaskStateRunning, Status: api.TaskStatus{ State: api.TaskStateRunning, }, ServiceAnnotations: api.Annotations{ Name: "task6", }, ServiceID: "id1", NodeID: "node3", }, { ID: "task7", Slot: 7, DesiredState: api.TaskStateRunning, Status: api.TaskStatus{ State: api.TaskStateNew, }, ServiceAnnotations: api.Annotations{ Name: "task7", }, ServiceID: "id1", }, } for _, task := range tasks { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() // Replicas was set to 6, but we started with 7 tasks. task7 should // be the one the orchestrator chose to shut down because it was not // assigned yet. observedShutdown := watchShutdownTask(t, watch) assert.Equal(t, "task7", observedShutdown.ID) // Now scale down to 2 instances. err = s.Update(func(tx store.Tx) error { s1.Spec.Mode = &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 2, }, } assert.NoError(t, store.UpdateService(tx, s1)) return nil }) assert.NoError(t, err) // Tasks should be shut down in a way that balances the remaining tasks. // node2 and node3 should be preferred over node1 because node1's task // is not running yet. shutdowns := make(map[string]int) for i := 0; i != 4; i++ { observedShutdown := watchShutdownTask(t, watch) shutdowns[observedShutdown.NodeID]++ } assert.Equal(t, 1, shutdowns["node1"]) assert.Equal(t, 1, shutdowns["node2"]) assert.Equal(t, 2, shutdowns["node3"]) // There should be remaining tasks on node2 and node3. s.View(func(readTx store.ReadTx) { tasks, err := store.FindTasks(readTx, store.ByDesiredState(api.TaskStateRunning)) require.NoError(t, err) require.Len(t, tasks, 2) if tasks[0].NodeID == "node2" { assert.Equal(t, "node3", tasks[1].NodeID) } else { assert.Equal(t, "node3", tasks[0].NodeID) assert.Equal(t, "node2", tasks[1].NodeID) } }) }
func benchScheduler(b *testing.B, nodes, tasks int, networkConstraints bool) { ctx := context.Background() for iters := 0; iters < b.N; iters++ { b.StopTimer() s := store.NewMemoryStore(nil) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) go func() { _ = scheduler.Run(ctx) }() // Let the scheduler get started runtime.Gosched() _ = s.Update(func(tx store.Tx) error { // Create initial nodes and tasks for i := 0; i < nodes; i++ { n := &api.Node{ ID: identity.NewID(), Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name" + strconv.Itoa(i), Labels: make(map[string]string), }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Engine: &api.EngineDescription{}, }, } // Give every third node a special network if i%3 == 0 { n.Description.Engine.Plugins = []api.PluginDescription{ { Name: "network", Type: "Network", }, } } err := store.CreateNode(tx, n) if err != nil { panic(err) } } for i := 0; i < tasks; i++ { id := "task" + strconv.Itoa(i) t := &api.Task{ ID: id, DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: id, }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } if networkConstraints { t.Networks = []*api.NetworkAttachment{ { Network: &api.Network{ DriverState: &api.Driver{ Name: "network", }, }, }, } } err := store.CreateTask(tx, t) if err != nil { panic(err) } } b.StartTimer() return nil }) for i := 0; i != tasks; i++ { <-watch } scheduler.Stop() cancel() s.Close() } }
func TestScheduler(t *testing.T) { ctx := context.Background() initialNodeSet := []*api.Node{ { ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id3", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } initialTaskSet := []*api.Task{ { ID: "id1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStateAssigned, }, NodeID: initialNodeSet[0].ID, }, { ID: "id2", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, }, { ID: "id3", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Prepoulate nodes for _, n := range initialNodeSet { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks for _, task := range initialTaskSet { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() assignment1 := watchAssignment(t, watch) // must assign to id2 or id3 since id1 already has a task assert.Regexp(t, assignment1.NodeID, "(id2|id3)") assignment2 := watchAssignment(t, watch) // must assign to id2 or id3 since id1 already has a task if assignment1.NodeID == "id2" { assert.Equal(t, "id3", assignment2.NodeID) } else { assert.Equal(t, "id2", assignment2.NodeID) } err = s.Update(func(tx store.Tx) error { // Update each node to make sure this doesn't mess up the // scheduler's state. for _, n := range initialNodeSet { assert.NoError(t, store.UpdateNode(tx, n)) } return nil }) assert.NoError(t, err) err = s.Update(func(tx store.Tx) error { // Delete the task associated with node 1 so it's now the most lightly // loaded node. assert.NoError(t, store.DeleteTask(tx, "id1")) // Create a new task. It should get assigned to id1. t4 := &api.Task{ ID: "id4", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name4", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t4)) return nil }) assert.NoError(t, err) assignment3 := watchAssignment(t, watch) assert.Equal(t, "id1", assignment3.NodeID) // Update a task to make it unassigned. It should get assigned by the // scheduler. err = s.Update(func(tx store.Tx) error { // Remove assignment from task id4. It should get assigned // to node id1. t4 := &api.Task{ ID: "id4", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name4", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.UpdateTask(tx, t4)) return nil }) assert.NoError(t, err) assignment4 := watchAssignment(t, watch) assert.Equal(t, "id1", assignment4.NodeID) err = s.Update(func(tx store.Tx) error { // Create a ready node, then remove it. No tasks should ever // be assigned to it. node := &api.Node{ ID: "removednode", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "removednode", }, }, Status: api.NodeStatus{ State: api.NodeStatus_DOWN, }, } assert.NoError(t, store.CreateNode(tx, node)) assert.NoError(t, store.DeleteNode(tx, node.ID)) // Create an unassigned task. task := &api.Task{ ID: "removednode", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "removednode", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, task)) return nil }) assert.NoError(t, err) assignmentRemovedNode := watchAssignment(t, watch) assert.NotEqual(t, "removednode", assignmentRemovedNode.NodeID) err = s.Update(func(tx store.Tx) error { // Create a ready node. It should be used for the next // assignment. n4 := &api.Node{ ID: "id4", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name4", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, n4)) // Create an unassigned task. t5 := &api.Task{ ID: "id5", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name5", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t5)) return nil }) assert.NoError(t, err) assignment5 := watchAssignment(t, watch) assert.Equal(t, "id4", assignment5.NodeID) err = s.Update(func(tx store.Tx) error { // Create a non-ready node. It should NOT be used for the next // assignment. n5 := &api.Node{ ID: "id5", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name5", }, }, Status: api.NodeStatus{ State: api.NodeStatus_DOWN, }, } assert.NoError(t, store.CreateNode(tx, n5)) // Create an unassigned task. t6 := &api.Task{ ID: "id6", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name6", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t6)) return nil }) assert.NoError(t, err) assignment6 := watchAssignment(t, watch) assert.NotEqual(t, "id5", assignment6.NodeID) err = s.Update(func(tx store.Tx) error { // Update node id5 to put it in the READY state. n5 := &api.Node{ ID: "id5", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name5", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.UpdateNode(tx, n5)) // Create an unassigned task. Should be assigned to the // now-ready node. t7 := &api.Task{ ID: "id7", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name7", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t7)) return nil }) assert.NoError(t, err) assignment7 := watchAssignment(t, watch) assert.Equal(t, "id5", assignment7.NodeID) err = s.Update(func(tx store.Tx) error { // Create a ready node, then immediately take it down. The next // unassigned task should NOT be assigned to it. n6 := &api.Node{ ID: "id6", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name6", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, n6)) n6.Status.State = api.NodeStatus_DOWN assert.NoError(t, store.UpdateNode(tx, n6)) // Create an unassigned task. t8 := &api.Task{ ID: "id8", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name8", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t8)) return nil }) assert.NoError(t, err) assignment8 := watchAssignment(t, watch) assert.NotEqual(t, "id6", assignment8.NodeID) }
func TestSchedulerFaultyNode(t *testing.T) { ctx := context.Background() taskTemplate := &api.Task{ ServiceID: "service1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } node1 := &api.Node{ ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } node2 := &api.Node{ ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial nodes, and one task assigned to node id1 assert.NoError(t, store.CreateNode(tx, node1)) assert.NoError(t, store.CreateNode(tx, node2)) task1 := taskTemplate.Copy() task1.ID = "id1" task1.NodeID = "id1" task1.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task1)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() for i := 0; i != 8; i++ { // Simulate a task failure cycle newTask := taskTemplate.Copy() newTask.ID = identity.NewID() err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, newTask)) return nil }) assert.NoError(t, err) assignment := watchAssignment(t, watch) assert.Equal(t, newTask.ID, assignment.ID) if i < 5 { // The first 5 attempts should be assigned to node id2 because // it has no replicas of the service. assert.Equal(t, "id2", assignment.NodeID) } else { // The next ones should be assigned to id1, since we'll // flag id2 as potentially faulty. assert.Equal(t, "id1", assignment.NodeID) } err = s.Update(func(tx store.Tx) error { newTask := store.GetTask(tx, newTask.ID) require.NotNil(t, newTask) newTask.Status.State = api.TaskStateFailed assert.NoError(t, store.UpdateTask(tx, newTask)) return nil }) assert.NoError(t, err) } }
func TestConstraintEnforcer(t *testing.T) { nodes := []*api.Node{ { ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name1", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Role: api.NodeRoleWorker, }, { ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 1e9, MemoryBytes: 1e9, }, }, }, } tasks := []*api.Task{ { ID: "id0", DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Placement: &api.Placement{ Constraints: []string{"node.role == manager"}, }, }, Status: api.TaskStatus{ State: api.TaskStateNew, }, NodeID: "id1", }, { ID: "id1", DesiredState: api.TaskStateRunning, Status: api.TaskStatus{ State: api.TaskStateNew, }, NodeID: "id1", }, { ID: "id2", DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Placement: &api.Placement{ Constraints: []string{"node.role == worker"}, }, }, Status: api.TaskStatus{ State: api.TaskStateRunning, }, NodeID: "id1", }, { ID: "id3", DesiredState: api.TaskStateNew, Status: api.TaskStatus{ State: api.TaskStateNew, }, NodeID: "id2", }, { ID: "id4", DesiredState: api.TaskStateReady, Spec: api.TaskSpec{ Resources: &api.ResourceRequirements{ Reservations: &api.Resources{ MemoryBytes: 9e8, }, }, }, Status: api.TaskStatus{ State: api.TaskStatePending, }, NodeID: "id2", }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Prepoulate nodes for _, n := range nodes { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks for _, task := range tasks { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() constraintEnforcer := New(s) defer constraintEnforcer.Stop() go constraintEnforcer.Run() // id0 should be killed immediately shutdown1 := testutils.WatchShutdownTask(t, watch) assert.Equal(t, "id0", shutdown1.ID) // Change node id1 to a manager err = s.Update(func(tx store.Tx) error { node := store.GetNode(tx, "id1") if node == nil { t.Fatal("could not get node id1") } node.Role = api.NodeRoleManager assert.NoError(t, store.UpdateNode(tx, node)) return nil }) assert.NoError(t, err) shutdown2 := testutils.WatchShutdownTask(t, watch) assert.Equal(t, "id2", shutdown2.ID) // Change resources on node id2 err = s.Update(func(tx store.Tx) error { node := store.GetNode(tx, "id2") if node == nil { t.Fatal("could not get node id2") } node.Description.Resources.MemoryBytes = 5e8 assert.NoError(t, store.UpdateNode(tx, node)) return nil }) assert.NoError(t, err) shutdown3 := testutils.WatchShutdownTask(t, watch) assert.Equal(t, "id4", shutdown3.ID) }
func TestSchedulerResourceConstraintHA(t *testing.T) { // node 1 starts with 1 task, node 2 starts with 3 tasks. // however, node 1 only has enough memory to schedule one more task. ctx := context.Background() node1 := &api.Node{ ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ MemoryBytes: 1e9, }, }, } node2 := &api.Node{ ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ MemoryBytes: 1e11, }, }, } taskTemplate := &api.Task{ DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Resources: &api.ResourceRequirements{ Reservations: &api.Resources{ MemoryBytes: 5e8, }, }, }, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial node and task assert.NoError(t, store.CreateNode(tx, node1)) assert.NoError(t, store.CreateNode(tx, node2)) // preassigned tasks task1 := taskTemplate.Copy() task1.ID = "id1" task1.NodeID = "id1" task1.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task1)) task2 := taskTemplate.Copy() task2.ID = "id2" task2.NodeID = "id2" task2.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task2)) task3 := taskTemplate.Copy() task3.ID = "id3" task3.NodeID = "id2" task3.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task3)) task4 := taskTemplate.Copy() task4.ID = "id4" task4.NodeID = "id2" task4.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task4)) // tasks to assign task5 := taskTemplate.Copy() task5.ID = "id5" assert.NoError(t, store.CreateTask(tx, task5)) task6 := taskTemplate.Copy() task6.ID = "id6" assert.NoError(t, store.CreateTask(tx, task6)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() assignment1 := watchAssignment(t, watch) if assignment1.ID != "id5" && assignment1.ID != "id6" { t.Fatal("assignment for unexpected task") } assignment2 := watchAssignment(t, watch) if assignment1.ID == "id5" { assert.Equal(t, "id6", assignment2.ID) } else { assert.Equal(t, "id5", assignment2.ID) } if assignment1.NodeID == "id1" { assert.Equal(t, "id2", assignment2.NodeID) } else { assert.Equal(t, "id1", assignment2.NodeID) } }
func TestSchedulerResourceConstraint(t *testing.T) { ctx := context.Background() // Create a ready node without enough memory to run the task. underprovisionedNode := &api.Node{ ID: "underprovisioned", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "underprovisioned", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 1e9, MemoryBytes: 1e9, }, }, } // Non-ready nodes that satisfy the constraints but shouldn't be used nonready1 := &api.Node{ ID: "nonready1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "nonready1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_UNKNOWN, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 2e9, MemoryBytes: 2e9, }, }, } nonready2 := &api.Node{ ID: "nonready2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "nonready2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_UNKNOWN, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 2e9, MemoryBytes: 2e9, }, }, } initialTask := &api.Task{ ID: "id1", DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Resources: &api.ResourceRequirements{ Reservations: &api.Resources{ MemoryBytes: 2e9, }, }, }, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial node and task assert.NoError(t, store.CreateTask(tx, initialTask)) assert.NoError(t, store.CreateNode(tx, underprovisionedNode)) assert.NoError(t, store.CreateNode(tx, nonready1)) assert.NoError(t, store.CreateNode(tx, nonready2)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() failure := watchAssignmentFailure(t, watch) assert.Equal(t, "no suitable node (2 nodes not available for new tasks; insufficient resources on 1 node)", failure.Status.Message) err = s.Update(func(tx store.Tx) error { // Create a node with enough memory. The task should get // assigned to this node. node := &api.Node{ ID: "bignode", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "bignode", }, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 4e9, MemoryBytes: 8e9, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, node)) return nil }) assert.NoError(t, err) assignment := watchAssignment(t, watch) assert.Equal(t, "bignode", assignment.NodeID) }
// IssueNodeCertificate is responsible for gatekeeping both certificate requests from new nodes in the swarm, // and authorizing certificate renewals. // If a node presented a valid certificate, the corresponding certificate is set in a RENEW state. // If a node failed to present a valid certificate, we enforce all the policies currently configured in // the swarm for node acceptance: check for the validity of the presented secret and check what is the // acceptance state the certificate should be put in (PENDING or ACCEPTED). // After going through the configured policies, a new random node ID is generated, and the corresponding node // entry is created. IssueNodeCertificate is the only place where new node entries to raft should be created. func (s *Server) IssueNodeCertificate(ctx context.Context, request *api.IssueNodeCertificateRequest) (*api.IssueNodeCertificateResponse, error) { // First, let's see if the remote node is proposing to be added as a valid node, and with a non-empty CSR if len(request.CSR) == 0 || (request.Role != api.NodeRoleWorker && request.Role != api.NodeRoleManager) { return nil, grpc.Errorf(codes.InvalidArgument, codes.InvalidArgument.String()) } if err := s.addTask(); err != nil { return nil, err } defer s.doneTask() // If the remote node is an Agent (either forwarded by a manager, or calling directly), // issue a renew agent certificate entry with the correct ID nodeID, err := AuthorizeForwardedRoleAndOrg(ctx, []string{AgentRole}, []string{ManagerRole}, s.securityConfig.ClientTLSCreds.Organization()) if err == nil { return s.issueRenewCertificate(ctx, nodeID, request.CSR) } // If the remote node is a Manager (either forwarded by another manager, or calling directly), // issue a renew certificate entry with the correct ID nodeID, err = AuthorizeForwardedRoleAndOrg(ctx, []string{ManagerRole}, []string{ManagerRole}, s.securityConfig.ClientTLSCreds.Organization()) if err == nil { return s.issueRenewCertificate(ctx, nodeID, request.CSR) } // The remote node didn't successfully present a valid MTLS certificate, let's issue a PENDING // certificate with a new random ID nodeMembership := api.NodeMembershipPending // If there are acceptance policies configured in the system, we should enforce them policy := s.getRolePolicy(request.Role) if policy != nil { // If the policy has a Secret set, let's verify it if policy.Secret != nil { if err := checkSecretValidity(policy, request.Secret); err != nil { return nil, grpc.Errorf(codes.InvalidArgument, "A valid secret token is necessary to join this cluster: %v", err) } } // Check to see if our autoacceptance policy allows this node to be issued without manual intervention if policy.Autoaccept { nodeMembership = api.NodeMembershipAccepted } } // Max number of collisions of ID or CN to tolerate before giving up maxRetries := 3 // Generate a random ID for this new node for i := 0; ; i++ { nodeID = identity.NewNodeID() // Create a new node err := s.store.Update(func(tx store.Tx) error { node := &api.Node{ ID: nodeID, Certificate: api.Certificate{ CSR: request.CSR, CN: nodeID, Role: request.Role, Status: api.IssuanceStatus{ State: api.IssuanceStatePending, }, }, Spec: api.NodeSpec{ Role: request.Role, Membership: nodeMembership, }, } return store.CreateNode(tx, node) }) if err == nil { log.G(ctx).WithFields(logrus.Fields{ "node.id": nodeID, "node.role": request.Role, "method": "IssueNodeCertificate", }).Debugf("new certificate entry added") break } if err != store.ErrExist { return nil, err } if i == maxRetries { return nil, err } log.G(ctx).WithFields(logrus.Fields{ "node.id": nodeID, "node.role": request.Role, "method": "IssueNodeCertificate", }).Errorf("randomly generated node ID collided with an existing one - retrying") } return &api.IssueNodeCertificateResponse{ NodeID: nodeID, }, nil }
func addNode(t *testing.T, s *store.MemoryStore, node *api.Node) { s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, node)) return nil }) }
func TestSchedulerResourceConstraintDeadTask(t *testing.T) { ctx := context.Background() // Create a ready node without enough memory to run the task. node := &api.Node{ ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 1e9, MemoryBytes: 1e9, }, }, } bigTask1 := &api.Task{ DesiredState: api.TaskStateRunning, ID: "id1", Spec: api.TaskSpec{ Resources: &api.ResourceRequirements{ Reservations: &api.Resources{ MemoryBytes: 8e8, }, }, }, ServiceAnnotations: api.Annotations{ Name: "big", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } bigTask2 := bigTask1.Copy() bigTask2.ID = "id2" s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial node and task assert.NoError(t, store.CreateNode(tx, node)) assert.NoError(t, store.CreateTask(tx, bigTask1)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() // The task fits, so it should get assigned assignment := watchAssignment(t, watch) assert.Equal(t, "id1", assignment.ID) assert.Equal(t, "id1", assignment.NodeID) err = s.Update(func(tx store.Tx) error { // Add a second task. It shouldn't get assigned because of // resource constraints. return store.CreateTask(tx, bigTask2) }) assert.NoError(t, err) time.Sleep(100 * time.Millisecond) s.View(func(tx store.ReadTx) { tasks, err := store.FindTasks(tx, store.ByNodeID(node.ID)) assert.NoError(t, err) assert.Len(t, tasks, 1) }) err = s.Update(func(tx store.Tx) error { // The task becomes dead updatedTask := store.GetTask(tx, bigTask1.ID) updatedTask.Status.State = api.TaskStateShutdown return store.UpdateTask(tx, updatedTask) }) assert.NoError(t, err) // With the first task no longer consuming resources, the second // one can be scheduled. assignment = watchAssignment(t, watch) assert.Equal(t, "id2", assignment.ID) assert.Equal(t, "id1", assignment.NodeID) }
// IssueNodeCertificate is responsible for gatekeeping both certificate requests from new nodes in the swarm, // and authorizing certificate renewals. // If a node presented a valid certificate, the corresponding certificate is set in a RENEW state. // If a node failed to present a valid certificate, we check for a valid join token and set the // role accordingly. A new random node ID is generated, and the corresponding node entry is created. // IssueNodeCertificate is the only place where new node entries to raft should be created. func (s *Server) IssueNodeCertificate(ctx context.Context, request *api.IssueNodeCertificateRequest) (*api.IssueNodeCertificateResponse, error) { // First, let's see if the remote node is presenting a non-empty CSR if len(request.CSR) == 0 { return nil, grpc.Errorf(codes.InvalidArgument, codes.InvalidArgument.String()) } if err := s.addTask(); err != nil { return nil, err } defer s.doneTask() // If the remote node is an Agent (either forwarded by a manager, or calling directly), // issue a renew agent certificate entry with the correct ID nodeID, err := AuthorizeForwardedRoleAndOrg(ctx, []string{AgentRole}, []string{ManagerRole}, s.securityConfig.ClientTLSCreds.Organization()) if err == nil { return s.issueRenewCertificate(ctx, nodeID, request.CSR) } // If the remote node is a Manager (either forwarded by another manager, or calling directly), // issue a renew certificate entry with the correct ID nodeID, err = AuthorizeForwardedRoleAndOrg(ctx, []string{ManagerRole}, []string{ManagerRole}, s.securityConfig.ClientTLSCreds.Organization()) if err == nil { return s.issueRenewCertificate(ctx, nodeID, request.CSR) } // The remote node didn't successfully present a valid MTLS certificate, let's issue a // certificate with a new random ID role := api.NodeRole(-1) s.mu.Lock() if subtle.ConstantTimeCompare([]byte(s.joinTokens.Manager), []byte(request.Token)) == 1 { role = api.NodeRoleManager } else if subtle.ConstantTimeCompare([]byte(s.joinTokens.Worker), []byte(request.Token)) == 1 { role = api.NodeRoleWorker } s.mu.Unlock() if role < 0 { return nil, grpc.Errorf(codes.InvalidArgument, "A valid join token is necessary to join this cluster") } // Max number of collisions of ID or CN to tolerate before giving up maxRetries := 3 // Generate a random ID for this new node for i := 0; ; i++ { nodeID = identity.NewID() // Create a new node err := s.store.Update(func(tx store.Tx) error { node := &api.Node{ ID: nodeID, Certificate: api.Certificate{ CSR: request.CSR, CN: nodeID, Role: role, Status: api.IssuanceStatus{ State: api.IssuanceStatePending, }, }, Spec: api.NodeSpec{ Role: role, Membership: api.NodeMembershipAccepted, }, } return store.CreateNode(tx, node) }) if err == nil { log.G(ctx).WithFields(logrus.Fields{ "node.id": nodeID, "node.role": role, "method": "IssueNodeCertificate", }).Debugf("new certificate entry added") break } if err != store.ErrExist { return nil, err } if i == maxRetries { return nil, err } log.G(ctx).WithFields(logrus.Fields{ "node.id": nodeID, "node.role": role, "method": "IssueNodeCertificate", }).Errorf("randomly generated node ID collided with an existing one - retrying") } return &api.IssueNodeCertificateResponse{ NodeID: nodeID, NodeMembership: api.NodeMembershipAccepted, }, nil }
func TestSchedulerPreexistingDeadTask(t *testing.T) { ctx := context.Background() // Create a ready node without enough memory to run two tasks at once. node := &api.Node{ ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 1e9, MemoryBytes: 1e9, }, }, } deadTask := &api.Task{ DesiredState: api.TaskStateRunning, ID: "id1", NodeID: "id1", Spec: api.TaskSpec{ Resources: &api.ResourceRequirements{ Reservations: &api.Resources{ MemoryBytes: 8e8, }, }, }, ServiceAnnotations: api.Annotations{ Name: "big", }, Status: api.TaskStatus{ State: api.TaskStateShutdown, }, } bigTask2 := deadTask.Copy() bigTask2.ID = "id2" bigTask2.Status.State = api.TaskStatePending s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial node and task assert.NoError(t, store.CreateNode(tx, node)) assert.NoError(t, store.CreateTask(tx, deadTask)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() err = s.Update(func(tx store.Tx) error { // Add a second task. It should get assigned because the task // using the resources is past the running state. return store.CreateTask(tx, bigTask2) }) assert.NoError(t, err) assignment := watchAssignment(t, watch) assert.Equal(t, "id2", assignment.ID) assert.Equal(t, "id1", assignment.NodeID) }
// Run starts all manager sub-systems and the gRPC server at the configured // address. // The call never returns unless an error occurs or `Stop()` is called. // // TODO(aluzzardi): /!\ This function is *way* too complex. /!\ // It needs to be split into smaller manageable functions. func (m *Manager) Run(parent context.Context) error { ctx, ctxCancel := context.WithCancel(parent) defer ctxCancel() // Harakiri. go func() { select { case <-ctx.Done(): case <-m.stopped: ctxCancel() } }() leadershipCh, cancel := m.RaftNode.SubscribeLeadership() defer cancel() go func() { for leadershipEvent := range leadershipCh { // read out and discard all of the messages when we've stopped // don't acquire the mutex yet. if stopped is closed, we don't need // this stops this loop from starving Run()'s attempt to Lock select { case <-m.stopped: continue default: // do nothing, we're not stopped } // we're not stopping so NOW acquire the mutex m.mu.Lock() newState := leadershipEvent.(raft.LeadershipState) if newState == raft.IsLeader { s := m.RaftNode.MemoryStore() rootCA := m.config.SecurityConfig.RootCA() nodeID := m.config.SecurityConfig.ClientTLSCreds.NodeID() raftCfg := raft.DefaultRaftConfig() raftCfg.ElectionTick = uint32(m.RaftNode.Config.ElectionTick) raftCfg.HeartbeatTick = uint32(m.RaftNode.Config.HeartbeatTick) clusterID := m.config.SecurityConfig.ClientTLSCreds.Organization() initialCAConfig := ca.DefaultCAConfig() initialCAConfig.ExternalCAs = m.config.ExternalCAs s.Update(func(tx store.Tx) error { // Add a default cluster object to the // store. Don't check the error because // we expect this to fail unless this // is a brand new cluster. store.CreateCluster(tx, &api.Cluster{ ID: clusterID, Spec: api.ClusterSpec{ Annotations: api.Annotations{ Name: store.DefaultClusterName, }, Orchestration: api.OrchestrationConfig{ TaskHistoryRetentionLimit: defaultTaskHistoryRetentionLimit, }, Dispatcher: api.DispatcherConfig{ HeartbeatPeriod: ptypes.DurationProto(dispatcher.DefaultHeartBeatPeriod), }, Raft: raftCfg, CAConfig: initialCAConfig, }, RootCA: api.RootCA{ CAKey: rootCA.Key, CACert: rootCA.Cert, CACertHash: rootCA.Digest.String(), JoinTokens: api.JoinTokens{ Worker: ca.GenerateJoinToken(rootCA), Manager: ca.GenerateJoinToken(rootCA), }, }, }) // Add Node entry for ourself, if one // doesn't exist already. store.CreateNode(tx, &api.Node{ ID: nodeID, Certificate: api.Certificate{ CN: nodeID, Role: api.NodeRoleManager, Status: api.IssuanceStatus{ State: api.IssuanceStateIssued, }, }, Spec: api.NodeSpec{ Role: api.NodeRoleManager, Membership: api.NodeMembershipAccepted, }, }) return nil }) // Attempt to rotate the key-encrypting-key of the root CA key-material err := m.rotateRootCAKEK(ctx, clusterID) if err != nil { log.G(ctx).WithError(err).Error("root key-encrypting-key rotation failed") } m.replicatedOrchestrator = orchestrator.NewReplicatedOrchestrator(s) m.globalOrchestrator = orchestrator.NewGlobalOrchestrator(s) m.taskReaper = orchestrator.NewTaskReaper(s) m.scheduler = scheduler.New(s) m.keyManager = keymanager.New(m.RaftNode.MemoryStore(), keymanager.DefaultConfig()) // TODO(stevvooe): Allocate a context that can be used to // shutdown underlying manager processes when leadership is // lost. m.allocator, err = allocator.New(s) if err != nil { log.G(ctx).WithError(err).Error("failed to create allocator") // TODO(stevvooe): It doesn't seem correct here to fail // creating the allocator but then use it anyway. } if m.keyManager != nil { go func(keyManager *keymanager.KeyManager) { if err := keyManager.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("keymanager failed with an error") } }(m.keyManager) } go func(d *dispatcher.Dispatcher) { if err := d.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("Dispatcher exited with an error") } }(m.Dispatcher) go func(server *ca.Server) { if err := server.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("CA signer exited with an error") } }(m.caserver) // Start all sub-components in separate goroutines. // TODO(aluzzardi): This should have some kind of error handling so that // any component that goes down would bring the entire manager down. if m.allocator != nil { go func(allocator *allocator.Allocator) { if err := allocator.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("allocator exited with an error") } }(m.allocator) } go func(scheduler *scheduler.Scheduler) { if err := scheduler.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("scheduler exited with an error") } }(m.scheduler) go func(taskReaper *orchestrator.TaskReaper) { taskReaper.Run() }(m.taskReaper) go func(orchestrator *orchestrator.ReplicatedOrchestrator) { if err := orchestrator.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("replicated orchestrator exited with an error") } }(m.replicatedOrchestrator) go func(globalOrchestrator *orchestrator.GlobalOrchestrator) { if err := globalOrchestrator.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("global orchestrator exited with an error") } }(m.globalOrchestrator) } else if newState == raft.IsFollower { m.Dispatcher.Stop() m.caserver.Stop() if m.allocator != nil { m.allocator.Stop() m.allocator = nil } m.replicatedOrchestrator.Stop() m.replicatedOrchestrator = nil m.globalOrchestrator.Stop() m.globalOrchestrator = nil m.taskReaper.Stop() m.taskReaper = nil m.scheduler.Stop() m.scheduler = nil if m.keyManager != nil { m.keyManager.Stop() m.keyManager = nil } } m.mu.Unlock() } }() proxyOpts := []grpc.DialOption{ grpc.WithTimeout(5 * time.Second), grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds), } cs := raftpicker.NewConnSelector(m.RaftNode, proxyOpts...) m.connSelector = cs // We need special connSelector for controlapi because it provides automatic // leader tracking. // Other APIs are using connSelector which errors out on leader change, but // allows to react quickly to reelections. controlAPIProxyOpts := []grpc.DialOption{ grpc.WithBackoffMaxDelay(time.Second), grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds), } controlAPIConnSelector := hackpicker.NewConnSelector(m.RaftNode, controlAPIProxyOpts...) authorize := func(ctx context.Context, roles []string) error { // Authorize the remote roles, ensure they can only be forwarded by managers _, err := ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization()) return err } baseControlAPI := controlapi.NewServer(m.RaftNode.MemoryStore(), m.RaftNode, m.config.SecurityConfig.RootCA()) healthServer := health.NewHealthServer() authenticatedControlAPI := api.NewAuthenticatedWrapperControlServer(baseControlAPI, authorize) authenticatedDispatcherAPI := api.NewAuthenticatedWrapperDispatcherServer(m.Dispatcher, authorize) authenticatedCAAPI := api.NewAuthenticatedWrapperCAServer(m.caserver, authorize) authenticatedNodeCAAPI := api.NewAuthenticatedWrapperNodeCAServer(m.caserver, authorize) authenticatedRaftAPI := api.NewAuthenticatedWrapperRaftServer(m.RaftNode, authorize) authenticatedHealthAPI := api.NewAuthenticatedWrapperHealthServer(healthServer, authorize) authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.RaftNode, authorize) proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) // localProxyControlAPI is a special kind of proxy. It is only wired up // to receive requests from a trusted local socket, and these requests // don't use TLS, therefore the requests it handles locally should // bypass authorization. When it proxies, it sends them as requests from // this manager rather than forwarded requests (it has no TLS // information to put in the metadata map). forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil } localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, controlAPIConnSelector, m.RaftNode, forwardAsOwnRequest) // Everything registered on m.server should be an authenticated // wrapper, or a proxy wrapping an authenticated wrapper! api.RegisterCAServer(m.server, proxyCAAPI) api.RegisterNodeCAServer(m.server, proxyNodeCAAPI) api.RegisterRaftServer(m.server, authenticatedRaftAPI) api.RegisterHealthServer(m.server, authenticatedHealthAPI) api.RegisterRaftMembershipServer(m.server, proxyRaftMembershipAPI) api.RegisterControlServer(m.localserver, localProxyControlAPI) api.RegisterControlServer(m.server, authenticatedControlAPI) api.RegisterDispatcherServer(m.server, proxyDispatcherAPI) errServe := make(chan error, 2) for proto, l := range m.listeners { go func(proto string, lis net.Listener) { ctx := log.WithLogger(ctx, log.G(ctx).WithFields( logrus.Fields{ "proto": lis.Addr().Network(), "addr": lis.Addr().String()})) if proto == "unix" { log.G(ctx).Info("Listening for local connections") // we need to disallow double closes because UnixListener.Close // can delete unix-socket file of newer listener. grpc calls // Close twice indeed: in Serve and in Stop. errServe <- m.localserver.Serve(&closeOnceListener{Listener: lis}) } else { log.G(ctx).Info("Listening for connections") errServe <- m.server.Serve(lis) } }(proto, l) } // Set the raft server as serving for the health server healthServer.SetServingStatus("Raft", api.HealthCheckResponse_SERVING) if err := m.RaftNode.JoinAndStart(); err != nil { for _, lis := range m.listeners { lis.Close() } return fmt.Errorf("can't initialize raft node: %v", err) } close(m.started) go func() { err := m.RaftNode.Run(ctx) if err != nil { log.G(ctx).Error(err) m.Stop(ctx) } }() if err := raft.WaitForLeader(ctx, m.RaftNode); err != nil { m.server.Stop() return err } c, err := raft.WaitForCluster(ctx, m.RaftNode) if err != nil { m.server.Stop() return err } raftConfig := c.Spec.Raft if int(raftConfig.ElectionTick) != m.RaftNode.Config.ElectionTick { log.G(ctx).Warningf("election tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.ElectionTick, raftConfig.ElectionTick) } if int(raftConfig.HeartbeatTick) != m.RaftNode.Config.HeartbeatTick { log.G(ctx).Warningf("heartbeat tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.HeartbeatTick, raftConfig.HeartbeatTick) } // wait for an error in serving. err = <-errServe select { // check to see if stopped was posted to. if so, we're in the process of // stopping, or done and that's why we got the error. if stopping is // deliberate, stopped will ALWAYS be closed before the error is trigger, // so this path will ALWAYS be taken if the stop was deliberate case <-m.stopped: // shutdown was requested, do not return an error // but first, we wait to acquire a mutex to guarantee that stopping is // finished. as long as we acquire the mutex BEFORE we return, we know // that stopping is stopped. m.mu.Lock() m.mu.Unlock() return nil // otherwise, we'll get something from errServe, which indicates that an // error in serving has actually occurred and this isn't a planned shutdown default: return err } }
func TestNodeCertificateAccept(t *testing.T) { tc := testutils.NewTestCA(t, ca.DefaultAcceptancePolicy()) defer tc.Stop() csr, _, err := ca.GenerateAndWriteNewKey(tc.Paths.Node) assert.NoError(t, err) testNode := &api.Node{ ID: "nodeID", Spec: api.NodeSpec{ Membership: api.NodeMembershipAccepted, Role: api.NodeRoleWorker, }, Certificate: api.Certificate{ CN: "nodeID", CSR: csr, Status: api.IssuanceStatus{State: api.IssuanceStatePending}, }, } err = tc.MemoryStore.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, testNode)) return nil }) assert.NoError(t, err) statusRequest := &api.NodeCertificateStatusRequest{NodeID: "nodeID"} resp, err := tc.NodeCAClients[1].NodeCertificateStatus(context.Background(), statusRequest) assert.NoError(t, err) assert.NotEmpty(t, resp.Certificate) assert.NotEmpty(t, resp.Status) assert.NotNil(t, resp.Certificate.Certificate) assert.Equal(t, api.IssuanceStateIssued, resp.Status.State) tc.MemoryStore.View(func(readTx store.ReadTx) { storeNodes, err := store.FindNodes(readTx, store.All) assert.NoError(t, err) assert.NotEmpty(t, storeNodes) var found bool for _, node := range storeNodes { if node.ID == "nodeID" { assert.Equal(t, api.IssuanceStateIssued, node.Certificate.Status.State) found = true } } assert.True(t, found) }) // Try it one more time for Worker, this time end-to-end role := api.NodeRoleWorker issueRequest := &api.IssueNodeCertificateRequest{CSR: csr, Role: role} issueResponse, err := tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) require.NoError(t, err) assert.NotNil(t, issueResponse.NodeID) assert.Equal(t, api.NodeMembershipAccepted, issueResponse.NodeMembership) // Try it one more time for Worker, this time end-to-end with manager role = api.NodeRoleManager issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role} issueResponse, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) require.NoError(t, err) assert.NotNil(t, issueResponse.NodeID) assert.Equal(t, api.NodeMembershipPending, issueResponse.NodeMembership) }