func TestRaftLeaderDown(t *testing.T) { t.Parallel() nodes, clockSource := raftutils.NewRaftCluster(t, tc) defer raftutils.TeardownCluster(t, nodes) // Stop node 1 nodes[1].Stop() newCluster := map[uint64]*raftutils.TestNode{ 2: nodes[2], 3: nodes[3], } // Wait for the re-election to occur raftutils.WaitForCluster(t, clockSource, newCluster) // Leader should not be 1 assert.NotEqual(t, nodes[2].Leader(), nodes[1].Config.ID) // Ensure that node 2 and node 3 have the same leader assert.Equal(t, nodes[3].Leader(), nodes[2].Leader()) // Find the leader node and a follower node var ( leaderNode *raftutils.TestNode followerNode *raftutils.TestNode ) for i, n := range newCluster { if n.Config.ID == n.Leader() { leaderNode = n if i == 2 { followerNode = newCluster[3] } else { followerNode = newCluster[2] } } } require.NotNil(t, leaderNode) require.NotNil(t, followerNode) // Propose a value value, err := raftutils.ProposeValue(t, leaderNode) assert.NoError(t, err, "failed to propose value") // The value should be replicated on all remaining nodes raftutils.CheckValue(t, clockSource, leaderNode, value) assert.Equal(t, len(leaderNode.GetMemberlist()), 3) raftutils.CheckValue(t, clockSource, followerNode, value) assert.Equal(t, len(followerNode.GetMemberlist()), 3) }
func TestRaftLeaderLeave(t *testing.T) { t.Parallel() nodes, clockSource := raftutils.NewRaftCluster(t, tc) // node 1 is the leader assert.Equal(t, nodes[1].Leader(), nodes[1].Config.ID) // Try to leave the raft // Use gRPC instead of calling handler directly because of // authorization check. client, err := nodes[1].ConnectToMember(nodes[1].Address, 10*time.Second) assert.NoError(t, err) defer client.Conn.Close() raftClient := api.NewRaftMembershipClient(client.Conn) ctx, _ := context.WithTimeout(context.Background(), 10*time.Second) resp, err := raftClient.Leave(ctx, &api.LeaveRequest{Node: &api.RaftMember{RaftID: nodes[1].Config.ID}}) assert.NoError(t, err, "error sending message to leave the raft") assert.NotNil(t, resp, "leave response message is nil") newCluster := map[uint64]*raftutils.TestNode{ 2: nodes[2], 3: nodes[3], } // Wait for election tick raftutils.WaitForCluster(t, clockSource, newCluster) // Leader should not be 1 assert.NotEqual(t, nodes[2].Leader(), nodes[1].Config.ID) assert.Equal(t, nodes[2].Leader(), nodes[3].Leader()) leader := nodes[2].Leader() // Find the leader node and a follower node var ( leaderNode *raftutils.TestNode followerNode *raftutils.TestNode ) for i, n := range nodes { if n.Config.ID == leader { leaderNode = n if i == 2 { followerNode = nodes[3] } else { followerNode = nodes[2] } } } require.NotNil(t, leaderNode) require.NotNil(t, followerNode) // Propose a value value, err := raftutils.ProposeValue(t, leaderNode) assert.NoError(t, err, "failed to propose value") // The value should be replicated on all remaining nodes raftutils.CheckValue(t, clockSource, leaderNode, value) assert.Equal(t, len(leaderNode.GetMemberlist()), 2) raftutils.CheckValue(t, clockSource, followerNode, value) assert.Equal(t, len(followerNode.GetMemberlist()), 2) raftutils.TeardownCluster(t, newCluster) }
func testUpdateNodeDemote(leader bool, t *testing.T) { tc := cautils.NewTestCA(nil) defer tc.Stop() ts := newTestServer(t) defer ts.Stop() nodes, clockSource := raftutils.NewRaftCluster(t, tc) defer raftutils.TeardownCluster(t, nodes) // Assign one of the raft node to the test server ts.Server.raft = nodes[1].Node ts.Server.store = nodes[1].MemoryStore() // Create a node object for each of the managers assert.NoError(t, nodes[1].MemoryStore().Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, &api.Node{ ID: nodes[1].SecurityConfig.ClientTLSCreds.NodeID(), Spec: api.NodeSpec{ Role: api.NodeRoleManager, Membership: api.NodeMembershipAccepted, }, })) assert.NoError(t, store.CreateNode(tx, &api.Node{ ID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID(), Spec: api.NodeSpec{ Role: api.NodeRoleManager, Membership: api.NodeMembershipAccepted, }, })) assert.NoError(t, store.CreateNode(tx, &api.Node{ ID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID(), Spec: api.NodeSpec{ Role: api.NodeRoleManager, Membership: api.NodeMembershipAccepted, }, })) return nil })) // Stop Node 3 (1 node out of 3) nodes[3].Server.Stop() nodes[3].ShutdownRaft() // Node 3 should be listed as Unreachable assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := nodes[1].GetMemberlist() if len(members) != 3 { return fmt.Errorf("expected 3 nodes, got %d", len(members)) } if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE { return fmt.Errorf("expected node 3 to be unreachable") } return nil })) // Try to demote Node 2, this should fail because of the quorum safeguard r, err := ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) spec := r.Node.Spec.Copy() spec.Role = api.NodeRoleWorker version := &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID(), Spec: spec, NodeVersion: version, }) assert.Error(t, err) assert.Equal(t, codes.FailedPrecondition, grpc.Code(err)) // Restart Node 3 nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false) raftutils.WaitForCluster(t, clockSource, nodes) // Node 3 should be listed as Reachable assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := nodes[1].GetMemberlist() if len(members) != 3 { return fmt.Errorf("expected 3 nodes, got %d", len(members)) } if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_UNREACHABLE { return fmt.Errorf("expected node 3 to be reachable") } return nil })) // Try to demote Node 3, this should succeed r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) spec = r.Node.Spec.Copy() spec.Role = api.NodeRoleWorker version = &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID(), Spec: spec, NodeVersion: version, }) assert.NoError(t, err) newCluster := map[uint64]*raftutils.TestNode{ 1: nodes[1], 2: nodes[2], } raftutils.WaitForCluster(t, clockSource, newCluster) // Server should list 2 members assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := nodes[1].GetMemberlist() if len(members) != 2 { return fmt.Errorf("expected 2 nodes, got %d", len(members)) } return nil })) var demoteNode, lastNode *raftutils.TestNode if leader { demoteNode = nodes[1] lastNode = nodes[2] } else { demoteNode = nodes[2] lastNode = nodes[1] } // Try to demote a Node and scale down to 1 r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: demoteNode.SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) spec = r.Node.Spec.Copy() spec.Role = api.NodeRoleWorker version = &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: demoteNode.SecurityConfig.ClientTLSCreds.NodeID(), Spec: spec, NodeVersion: version, }) assert.NoError(t, err) // Update the server ts.Server.raft = lastNode.Node ts.Server.store = lastNode.MemoryStore() newCluster = map[uint64]*raftutils.TestNode{ 1: lastNode, } raftutils.WaitForCluster(t, clockSource, newCluster) assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := lastNode.GetMemberlist() if len(members) != 1 { return fmt.Errorf("expected 1 node, got %d", len(members)) } return nil })) // Make sure we can't demote the last manager. r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: lastNode.SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) spec = r.Node.Spec.Copy() spec.Role = api.NodeRoleWorker version = &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: lastNode.SecurityConfig.ClientTLSCreds.NodeID(), Spec: spec, NodeVersion: version, }) assert.Error(t, err) assert.Equal(t, codes.FailedPrecondition, grpc.Code(err)) // Propose a change in the spec and check if the remaining node can still process updates r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: lastNode.SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) spec = r.Node.Spec.Copy() spec.Availability = api.NodeAvailabilityDrain version = &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: lastNode.SecurityConfig.ClientTLSCreds.NodeID(), Spec: spec, NodeVersion: version, }) assert.NoError(t, err) // Get node information and check that the availability is set to drain r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: lastNode.SecurityConfig.ClientTLSCreds.NodeID()}) assert.NoError(t, err) assert.Equal(t, r.Node.Spec.Availability, api.NodeAvailabilityDrain) }