func TestRaftQuorumRecovery(t *testing.T) { t.Parallel() // Bring up a 5 nodes cluster nodes, clockSource := raftutils.NewRaftCluster(t, tc) raftutils.AddRaftNode(t, clockSource, nodes, tc) raftutils.AddRaftNode(t, clockSource, nodes, tc) defer raftutils.TeardownCluster(t, nodes) // Lose a majority for i := uint64(1); i <= 3; i++ { nodes[i].Server.Stop() nodes[i].Shutdown() } raftutils.AdvanceTicks(clockSource, 5) // Restore the majority by restarting node 3 nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false) delete(nodes, 1) delete(nodes, 2) raftutils.WaitForCluster(t, clockSource, nodes) // Propose a value value, err := raftutils.ProposeValue(t, raftutils.Leader(nodes)) assert.NoError(t, err) for _, node := range nodes { raftutils.CheckValue(t, clockSource, node, value) } }
func TestRaftFollowerLeave(t *testing.T) { t.Parallel() // Bring up a 5 nodes cluster nodes, clockSource := raftutils.NewRaftCluster(t, tc) raftutils.AddRaftNode(t, clockSource, nodes, tc) raftutils.AddRaftNode(t, clockSource, nodes, tc) defer raftutils.TeardownCluster(t, nodes) // Node 5 leaves the cluster // Use gRPC instead of calling handler directly because of // authorization check. cc, err := dial(nodes[1], nodes[1].Address) assert.NoError(t, err) raftClient := api.NewRaftMembershipClient(cc) defer cc.Close() ctx, _ := context.WithTimeout(context.Background(), 10*time.Second) resp, err := raftClient.Leave(ctx, &api.LeaveRequest{Node: &api.RaftMember{RaftID: nodes[5].Config.ID}}) assert.NoError(t, err, "error sending message to leave the raft") assert.NotNil(t, resp, "leave response message is nil") raftutils.ShutdownNode(nodes[5]) delete(nodes, 5) raftutils.WaitForPeerNumber(t, clockSource, nodes, 4) // Propose a value value, err := raftutils.ProposeValue(t, nodes[1], DefaultProposalTime) assert.NoError(t, err, "failed to propose value") // Value should be replicated on every node raftutils.CheckValue(t, clockSource, nodes[1], value) assert.Len(t, nodes[1].GetMemberlist(), 4) raftutils.CheckValue(t, clockSource, nodes[2], value) assert.Len(t, nodes[2].GetMemberlist(), 4) raftutils.CheckValue(t, clockSource, nodes[3], value) assert.Len(t, nodes[3].GetMemberlist(), 4) raftutils.CheckValue(t, clockSource, nodes[4], value) assert.Len(t, nodes[4].GetMemberlist(), 4) }
func TestRaftQuorumFailure(t *testing.T) { t.Parallel() // Bring up a 5 nodes cluster nodes, clockSource := raftutils.NewRaftCluster(t, tc) raftutils.AddRaftNode(t, clockSource, nodes, tc) raftutils.AddRaftNode(t, clockSource, nodes, tc) defer raftutils.TeardownCluster(t, nodes) // Lose a majority for i := uint64(3); i <= 5; i++ { nodes[i].Server.Stop() nodes[i].Stop() } // Propose a value _, err := raftutils.ProposeValue(t, nodes[1]) assert.Error(t, err) // The value should not be replicated, we have no majority raftutils.CheckNoValue(t, clockSource, nodes[2]) raftutils.CheckNoValue(t, clockSource, nodes[1]) }
func TestRaftNewNodeGetsData(t *testing.T) { t.Parallel() // Bring up a 3 node cluster nodes, clockSource := raftutils.NewRaftCluster(t, tc) defer raftutils.TeardownCluster(t, nodes) // Propose a value value, err := raftutils.ProposeValue(t, nodes[1]) assert.NoError(t, err, "failed to propose value") // Add a new node raftutils.AddRaftNode(t, clockSource, nodes, tc) time.Sleep(500 * time.Millisecond) // Value should be replicated on every node for _, node := range nodes { raftutils.CheckValue(t, clockSource, node, value) assert.Equal(t, len(node.GetMemberlist()), 4) } }
func TestStress(t *testing.T) { t.Parallel() // Bring up a 5 nodes cluster nodes, clockSource := raftutils.NewRaftCluster(t, tc) raftutils.AddRaftNode(t, clockSource, nodes, tc) raftutils.AddRaftNode(t, clockSource, nodes, tc) defer raftutils.TeardownCluster(t, nodes) // number of nodes that are running nup := len(nodes) // record of nodes that are down idleNodes := map[int]struct{}{} // record of ids that proposed successfully or time-out pIDs := []string{} leader := -1 for iters := 0; iters < 1000; iters++ { // keep proposing new values and killing leader for i := 1; i <= 5; i++ { if nodes[uint64(i)] != nil { id := strconv.Itoa(iters) _, err := raftutils.ProposeValue(t, nodes[uint64(i)], id) if err == nil { pIDs = append(pIDs, id) // if propose successfully, at least there are 3 running nodes assert.True(t, nup >= 3) // only leader can propose value assert.True(t, leader == i || leader == -1) // update leader leader = i break } else if strings.Contains(err.Error(), "context deadline exceeded") { // though it's timing out, we still record this value // for it may be proposed successfully and stored in Raft some time later pIDs = append(pIDs, id) } } } if rand.Intn(100) < 10 { // increase clock to make potential election finish quickly clockSource.Increment(200 * time.Millisecond) time.Sleep(10 * time.Millisecond) } else { ms := rand.Intn(10) clockSource.Increment(time.Duration(ms) * time.Millisecond) } if leader != -1 { // if propose successfully, try to kill a node in random s := rand.Intn(5) + 1 if _, ok := idleNodes[s]; !ok { id := uint64(s) nodes[id].Server.Stop() nodes[id].Shutdown() idleNodes[s] = struct{}{} nup -= 1 if s == leader { // leader is killed leader = -1 } } } if nup < 3 { // if quorum is lost, try to bring back a node s := rand.Intn(5) + 1 if _, ok := idleNodes[s]; ok { id := uint64(s) nodes[id] = raftutils.RestartNode(t, clockSource, nodes[id], false) delete(idleNodes, s) nup++ } } } // bring back all nodes and propose the final value for i := range idleNodes { id := uint64(i) nodes[id] = raftutils.RestartNode(t, clockSource, nodes[id], false) } raftutils.WaitForCluster(t, clockSource, nodes) id := strconv.Itoa(1000) val, err := raftutils.ProposeValue(t, raftutils.Leader(nodes), id) assert.NoError(t, err, "failed to propose value") pIDs = append(pIDs, id) // increase clock to make cluster stable time.Sleep(500 * time.Millisecond) clockSource.Increment(500 * time.Millisecond) ids, values := raftutils.GetAllValuesOnNode(t, clockSource, nodes[1]) // since cluster is stable, final value must be in the raft store find := false for _, value := range values { if reflect.DeepEqual(value, val) { find = true break } } assert.True(t, find) // all nodes must have the same value raftutils.CheckValuesOnNodes(t, clockSource, nodes, ids, values) // ids should be a subset of pIDs for _, id := range ids { find = false for _, pid := range pIDs { if id == pid { find = true break } } assert.True(t, find) } }
func TestRaftSnapshot(t *testing.T) { t.Parallel() // Bring up a 3 node cluster nodes, clockSource := raftutils.NewRaftCluster(t, tc, &api.RaftConfig{SnapshotInterval: 9, LogEntriesForSlowFollowers: 0}) defer raftutils.TeardownCluster(t, nodes) nodeIDs := []string{"id1", "id2", "id3", "id4", "id5", "id6", "id7", "id8", "id9", "id10", "id11", "id12"} values := make([]*api.Node, len(nodeIDs)) snapshotFilenames := make(map[uint64]string, 4) // Propose 3 values var err error for i, nodeID := range nodeIDs[:3] { values[i], err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeID) assert.NoError(t, err, "failed to propose value") } // None of the nodes should have snapshot files yet for _, node := range nodes { dirents, err := ioutil.ReadDir(filepath.Join(node.StateDir, "snap")) assert.NoError(t, err) assert.Len(t, dirents, 0) } // Check all nodes have all the data. // This also acts as a synchronization point so that the next value we // propose will arrive as a separate message to the raft state machine, // and it is guaranteed to have the right cluster settings when // deciding whether to create a new snapshot. raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs[:3], values) // Propose a 4th value values[3], err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeIDs[3]) assert.NoError(t, err, "failed to propose value") // All nodes should now have a snapshot file for nodeID, node := range nodes { assert.NoError(t, raftutils.PollFunc(clockSource, func() error { dirents, err := ioutil.ReadDir(filepath.Join(node.StateDir, "snap")) if err != nil { return err } if len(dirents) != 1 { return fmt.Errorf("expected 1 snapshot, found %d", len(dirents)) } snapshotFilenames[nodeID] = dirents[0].Name() return nil })) } // Add a node to the cluster raftutils.AddRaftNode(t, clockSource, nodes, tc) // It should get a copy of the snapshot assert.NoError(t, raftutils.PollFunc(clockSource, func() error { dirents, err := ioutil.ReadDir(filepath.Join(nodes[4].StateDir, "snap")) if err != nil { return err } if len(dirents) != 1 { return fmt.Errorf("expected 1 snapshot, found %d on new node", len(dirents)) } snapshotFilenames[4] = dirents[0].Name() return nil })) // It should know about the other nodes stripMembers := func(memberList map[uint64]*api.RaftMember) map[uint64]*api.RaftMember { raftNodes := make(map[uint64]*api.RaftMember) for k, v := range memberList { raftNodes[k] = &api.RaftMember{ RaftID: v.RaftID, Addr: v.Addr, } } return raftNodes } assert.Equal(t, stripMembers(nodes[1].GetMemberlist()), stripMembers(nodes[4].GetMemberlist())) // All nodes should have all the data raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs[:4], values) // Propose more values to provoke a second snapshot for i := 4; i != len(nodeIDs); i++ { values[i], err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeIDs[i]) assert.NoError(t, err, "failed to propose value") } // All nodes should have a snapshot under a *different* name for nodeID, node := range nodes { assert.NoError(t, raftutils.PollFunc(clockSource, func() error { dirents, err := ioutil.ReadDir(filepath.Join(node.StateDir, "snap")) if err != nil { return err } if len(dirents) != 1 { return fmt.Errorf("expected 1 snapshot, found %d on node %d", len(dirents), nodeID) } if dirents[0].Name() == snapshotFilenames[nodeID] { return fmt.Errorf("snapshot %s did not get replaced", snapshotFilenames[nodeID]) } return nil })) } // All nodes should have all the data raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values) }
func TestCanRemoveMember(t *testing.T) { nodes, clockSource := raftutils.NewRaftCluster(t, tc) defer raftutils.TeardownCluster(t, nodes) // Stop node 2 and node 3 (2 nodes out of 3) nodes[2].Server.Stop() nodes[2].Shutdown() nodes[3].Server.Stop() nodes[3].Shutdown() // Node 2 and Node 3 should be listed as Unreachable assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := nodes[1].GetMemberlist() if len(members) != 3 { return fmt.Errorf("expected 3 nodes, got %d", len(members)) } if members[nodes[2].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE { return fmt.Errorf("expected node 2 to be unreachable") } if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE { return fmt.Errorf("expected node 3 to be unreachable") } return nil })) // Removing all nodes should fail for i := 1; i <= 3; i++ { ctx, _ := context.WithTimeout(context.Background(), 10*time.Second) err := nodes[1].RemoveMember(ctx, uint64(i)) assert.Error(t, err) assert.Equal(t, err, raft.ErrCannotRemoveMember) members := nodes[1].GetMemberlist() assert.Equal(t, len(members), 3) } // Restart node 2 and node 3 nodes[2] = raftutils.RestartNode(t, clockSource, nodes[2], false) nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false) raftutils.WaitForCluster(t, clockSource, nodes) // Node 2 and Node 3 should be listed as Reachable assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := nodes[1].GetMemberlist() if len(members) != 3 { return fmt.Errorf("expected 3 nodes, got %d", len(members)) } if members[nodes[2].Config.ID].Status.Reachability == api.RaftMemberStatus_UNREACHABLE { return fmt.Errorf("expected node 2 to be reachable") } if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_UNREACHABLE { return fmt.Errorf("expected node 3 to be reachable") } return nil })) // Stop Node 3 (1 node out of 3) nodes[3].Server.Stop() nodes[3].Shutdown() // Node 3 should be listed as Unreachable assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := nodes[1].GetMemberlist() if len(members) != 3 { return fmt.Errorf("expected 3 nodes, got %d", len(members)) } if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE { return fmt.Errorf("expected node 3 to be unreachable") } return nil })) // Removing node 2 should fail (this would break the quorum) ctx, _ := context.WithTimeout(context.Background(), 10*time.Second) err := nodes[1].RemoveMember(ctx, nodes[2].Config.ID) assert.Error(t, err) assert.Equal(t, err, raft.ErrCannotRemoveMember) members := nodes[1].GetMemberlist() assert.Equal(t, len(members), 3) // Removing node 3 works fine because it is already unreachable ctx, _ = context.WithTimeout(context.Background(), 10*time.Second) err = nodes[1].RemoveMember(ctx, nodes[3].Config.ID) assert.NoError(t, err) members = nodes[1].GetMemberlist() assert.Nil(t, members[nodes[3].Config.ID]) assert.Equal(t, len(members), 2) // Add back node 3 raftutils.ShutdownNode(nodes[3]) delete(nodes, 3) raftutils.AddRaftNode(t, clockSource, nodes, tc) // Node 2 and Node 3 should be listed as Reachable assert.NoError(t, raftutils.PollFunc(clockSource, func() error { members := nodes[1].GetMemberlist() if len(members) != 3 { return fmt.Errorf("expected 3 nodes, got %d", len(members)) } if members[nodes[2].Config.ID].Status.Reachability != api.RaftMemberStatus_REACHABLE { return fmt.Errorf("expected node 2 to be reachable") } if members[nodes[3].Config.ID].Status.Reachability != api.RaftMemberStatus_REACHABLE { return fmt.Errorf("expected node 3 to be reachable") } return nil })) // Removing node 3 should succeed ctx, _ = context.WithTimeout(context.Background(), 10*time.Second) err = nodes[1].RemoveMember(ctx, nodes[3].Config.ID) assert.NoError(t, err) members = nodes[1].GetMemberlist() assert.Nil(t, members[nodes[3].Config.ID]) assert.Equal(t, len(members), 2) // Removing node 2 should succeed ctx, _ = context.WithTimeout(context.Background(), 10*time.Second) err = nodes[1].RemoveMember(ctx, nodes[2].Config.ID) assert.NoError(t, err) members = nodes[1].GetMemberlist() assert.Nil(t, members[nodes[2].Config.ID]) assert.Equal(t, len(members), 1) }
func TestListManagerNodes(t *testing.T) { t.Parallel() tc := cautils.NewTestCA(nil) defer tc.Stop() ts := newTestServer(t) defer ts.Stop() nodes, clockSource := raftutils.NewRaftCluster(t, tc) defer raftutils.TeardownCluster(t, nodes) // Create a node object for each of the managers assert.NoError(t, nodes[1].MemoryStore().Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, &api.Node{ID: nodes[1].SecurityConfig.ClientTLSCreds.NodeID()})) assert.NoError(t, store.CreateNode(tx, &api.Node{ID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID()})) assert.NoError(t, store.CreateNode(tx, &api.Node{ID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID()})) return nil })) // Assign one of the raft node to the test server ts.Server.raft = nodes[1].Node ts.Server.store = nodes[1].MemoryStore() // There should be 3 reachable managers listed r, err := ts.Client.ListNodes(context.Background(), &api.ListNodesRequest{}) assert.NoError(t, err) assert.NotNil(t, r) managers := getMap(t, r.Nodes) assert.Len(t, ts.Server.raft.GetMemberlist(), 3) assert.Len(t, r.Nodes, 3) // Node 1 should be the leader for i := 1; i <= 3; i++ { if i == 1 { assert.True(t, managers[nodes[uint64(i)].Config.ID].Leader) continue } assert.False(t, managers[nodes[uint64(i)].Config.ID].Leader) } // All nodes should be reachable for i := 1; i <= 3; i++ { assert.Equal(t, api.RaftMemberStatus_REACHABLE, managers[nodes[uint64(i)].Config.ID].Reachability) } // Add two more nodes to the cluster raftutils.AddRaftNode(t, clockSource, nodes, tc) raftutils.AddRaftNode(t, clockSource, nodes, tc) raftutils.WaitForCluster(t, clockSource, nodes) // Add node entries for these assert.NoError(t, nodes[1].MemoryStore().Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, &api.Node{ID: nodes[4].SecurityConfig.ClientTLSCreds.NodeID()})) assert.NoError(t, store.CreateNode(tx, &api.Node{ID: nodes[5].SecurityConfig.ClientTLSCreds.NodeID()})) return nil })) // There should be 5 reachable managers listed r, err = ts.Client.ListNodes(context.Background(), &api.ListNodesRequest{}) assert.NoError(t, err) assert.NotNil(t, r) managers = getMap(t, r.Nodes) assert.Len(t, ts.Server.raft.GetMemberlist(), 5) assert.Len(t, r.Nodes, 5) for i := 1; i <= 5; i++ { assert.Equal(t, api.RaftMemberStatus_REACHABLE, managers[nodes[uint64(i)].Config.ID].Reachability) } // Stops 2 nodes nodes[4].Server.Stop() nodes[4].ShutdownRaft() nodes[5].Server.Stop() nodes[5].ShutdownRaft() // Node 4 and Node 5 should be listed as Unreachable assert.NoError(t, raftutils.PollFunc(clockSource, func() error { r, err = ts.Client.ListNodes(context.Background(), &api.ListNodesRequest{}) if err != nil { return err } managers = getMap(t, r.Nodes) if len(r.Nodes) != 5 { return fmt.Errorf("expected 5 nodes, got %d", len(r.Nodes)) } if managers[nodes[4].Config.ID].Reachability == api.RaftMemberStatus_REACHABLE { return fmt.Errorf("expected node 4 to be unreachable") } if managers[nodes[5].Config.ID].Reachability == api.RaftMemberStatus_REACHABLE { return fmt.Errorf("expected node 5 to be unreachable") } return nil })) // Restart the 2 nodes nodes[4] = raftutils.RestartNode(t, clockSource, nodes[4], false) nodes[5] = raftutils.RestartNode(t, clockSource, nodes[5], false) raftutils.WaitForCluster(t, clockSource, nodes) assert.Len(t, ts.Server.raft.GetMemberlist(), 5) // All the nodes should be reachable again assert.NoError(t, raftutils.PollFunc(clockSource, func() error { r, err = ts.Client.ListNodes(context.Background(), &api.ListNodesRequest{}) if err != nil { return err } managers = getMap(t, r.Nodes) for i := 1; i <= 5; i++ { if managers[nodes[uint64(i)].Config.ID].Reachability != api.RaftMemberStatus_REACHABLE { return fmt.Errorf("node %x is unreachable", nodes[uint64(i)].Config.ID) } } return nil })) // Switch the raft node used by the server ts.Server.raft = nodes[2].Node // Stop node 1 (leader) nodes[1].Server.Stop() nodes[1].ShutdownRaft() newCluster := map[uint64]*raftutils.TestNode{ 2: nodes[2], 3: nodes[3], 4: nodes[4], 5: nodes[5], } // Wait for the re-election to occur raftutils.WaitForCluster(t, clockSource, newCluster) // Node 1 should not be the leader anymore assert.NoError(t, raftutils.PollFunc(clockSource, func() error { r, err = ts.Client.ListNodes(context.Background(), &api.ListNodesRequest{}) if err != nil { return err } managers = getMap(t, r.Nodes) if managers[nodes[1].Config.ID].Leader { return fmt.Errorf("expected node 1 not to be the leader") } if managers[nodes[1].Config.ID].Reachability == api.RaftMemberStatus_REACHABLE { return fmt.Errorf("expected node 1 to be unreachable") } return nil })) // Restart node 1 nodes[1].ShutdownRaft() nodes[1] = raftutils.RestartNode(t, clockSource, nodes[1], false) raftutils.WaitForCluster(t, clockSource, nodes) // Ensure that node 1 is not the leader assert.False(t, managers[nodes[uint64(1)].Config.ID].Leader) // Check that another node got the leader status var leader uint64 leaderCount := 0 for i := 1; i <= 5; i++ { if managers[nodes[uint64(i)].Config.ID].Leader { leader = nodes[uint64(i)].Config.ID leaderCount++ } } // There should be only one leader after node 1 recovery and it // should be different than node 1 assert.Equal(t, 1, leaderCount) assert.NotEqual(t, leader, nodes[1].Config.ID) }