func TestRaftSnapshotRestart(t *testing.T) { t.Parallel() // Bring up a 3 node cluster nodes, clockSource := raftutils.NewRaftCluster(t, tc, &api.RaftConfig{SnapshotInterval: 10, LogEntriesForSlowFollowers: 0}) defer raftutils.TeardownCluster(t, nodes) nodeIDs := []string{"id1", "id2", "id3", "id4", "id5", "id6", "id7"} values := make([]*api.Node, len(nodeIDs)) // Propose 3 values var err error for i, nodeID := range nodeIDs[:3] { values[i], err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeID) assert.NoError(t, err, "failed to propose value") } // Take down node 3 nodes[3].Server.Stop() nodes[3].Shutdown() // Propose a 4th value before the snapshot values[3], err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeIDs[3]) assert.NoError(t, err, "failed to propose value") // Remaining nodes shouldn't have snapshot files yet for _, node := range []*raftutils.TestNode{nodes[1], nodes[2]} { dirents, err := ioutil.ReadDir(filepath.Join(node.StateDir, "snap")) assert.NoError(t, err) assert.Len(t, dirents, 0) } // Add a node to the cluster before the snapshot. This is the event // that triggers the snapshot. nodes[4] = raftutils.NewJoinNode(t, clockSource, nodes[1].Address, tc) raftutils.WaitForCluster(t, clockSource, map[uint64]*raftutils.TestNode{1: nodes[1], 2: nodes[2], 4: nodes[4]}) // Remaining nodes should now have a snapshot file for nodeIdx, node := range []*raftutils.TestNode{nodes[1], nodes[2]} { assert.NoError(t, raftutils.PollFunc(clockSource, func() error { dirents, err := ioutil.ReadDir(filepath.Join(node.StateDir, "snap")) if err != nil { return err } if len(dirents) != 1 { return fmt.Errorf("expected 1 snapshot, found %d on node %d", len(dirents), nodeIdx+1) } return nil })) } raftutils.CheckValuesOnNodes(t, clockSource, map[uint64]*raftutils.TestNode{1: nodes[1], 2: nodes[2]}, nodeIDs[:4], values[:4]) // Propose a 5th value values[4], err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeIDs[4]) require.NoError(t, err) // Add another node to the cluster nodes[5] = raftutils.NewJoinNode(t, clockSource, nodes[1].Address, tc) raftutils.WaitForCluster(t, clockSource, map[uint64]*raftutils.TestNode{1: nodes[1], 2: nodes[2], 4: nodes[4], 5: nodes[5]}) // New node should get a copy of the snapshot assert.NoError(t, raftutils.PollFunc(clockSource, func() error { dirents, err := ioutil.ReadDir(filepath.Join(nodes[5].StateDir, "snap")) if err != nil { return err } if len(dirents) != 1 { return fmt.Errorf("expected 1 snapshot, found %d on new node", len(dirents)) } return nil })) dirents, err := ioutil.ReadDir(filepath.Join(nodes[5].StateDir, "snap")) assert.NoError(t, err) assert.Len(t, dirents, 1) raftutils.CheckValuesOnNodes(t, clockSource, map[uint64]*raftutils.TestNode{1: nodes[1], 2: nodes[2]}, nodeIDs[:5], values[:5]) // It should know about the other nodes, including the one that was just added stripMembers := func(memberList map[uint64]*api.RaftMember) map[uint64]*api.RaftMember { raftNodes := make(map[uint64]*api.RaftMember) for k, v := range memberList { raftNodes[k] = &api.RaftMember{ RaftID: v.RaftID, Addr: v.Addr, } } return raftNodes } assert.Equal(t, stripMembers(nodes[1].GetMemberlist()), stripMembers(nodes[4].GetMemberlist())) // Restart node 3 nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false) raftutils.WaitForCluster(t, clockSource, nodes) // Node 3 should know about other nodes, including the new one assert.Len(t, nodes[3].GetMemberlist(), 5) assert.Equal(t, stripMembers(nodes[1].GetMemberlist()), stripMembers(nodes[3].GetMemberlist())) // Propose yet another value, to make sure the rejoined node is still // receiving new logs values[5], err = raftutils.ProposeValue(t, raftutils.Leader(nodes), DefaultProposalTime, nodeIDs[5]) require.NoError(t, err) // All nodes should have all the data raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs[:6], values[:6]) // Restart node 3 again. It should load the snapshot. nodes[3].Server.Stop() nodes[3].Shutdown() nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false) raftutils.WaitForCluster(t, clockSource, nodes) assert.Len(t, nodes[3].GetMemberlist(), 5) assert.Equal(t, stripMembers(nodes[1].GetMemberlist()), stripMembers(nodes[3].GetMemberlist())) raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs[:6], values[:6]) // Propose again. Just to check consensus after this latest restart. values[6], err = raftutils.ProposeValue(t, raftutils.Leader(nodes), DefaultProposalTime, nodeIDs[6]) require.NoError(t, err) raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values) }
func TestRaftForceNewCluster(t *testing.T) { t.Parallel() nodes, clockSource := raftutils.NewRaftCluster(t, tc) // Propose a value values := make([]*api.Node, 2) var err error values[0], err = raftutils.ProposeValue(t, nodes[1], "id1") assert.NoError(t, err, "failed to propose value") // The memberlist should contain 3 members on each node for i := 1; i <= 3; i++ { assert.Equal(t, len(nodes[uint64(i)].GetMemberlist()), 3) } // Stop all nodes for _, node := range nodes { node.Server.Stop() node.Shutdown() } raftutils.AdvanceTicks(clockSource, 5) toClean := map[uint64]*raftutils.TestNode{ 2: nodes[2], 3: nodes[3], } raftutils.TeardownCluster(t, toClean) delete(nodes, 2) delete(nodes, 3) // Only restart the first node with force-new-cluster option nodes[1] = raftutils.RestartNode(t, clockSource, nodes[1], true) raftutils.WaitForCluster(t, clockSource, nodes) // The memberlist should contain only one node (self) assert.Equal(t, len(nodes[1].GetMemberlist()), 1) // Add 2 more members nodes[2] = raftutils.NewJoinNode(t, clockSource, nodes[1].Address, tc) raftutils.WaitForCluster(t, clockSource, nodes) nodes[3] = raftutils.NewJoinNode(t, clockSource, nodes[1].Address, tc) raftutils.WaitForCluster(t, clockSource, nodes) newCluster := map[uint64]*raftutils.TestNode{ 1: nodes[1], 2: nodes[2], 3: nodes[3], } defer raftutils.TeardownCluster(t, newCluster) // The memberlist should contain 3 members on each node for i := 1; i <= 3; i++ { assert.Equal(t, len(nodes[uint64(i)].GetMemberlist()), 3) } // Propose another value values[1], err = raftutils.ProposeValue(t, raftutils.Leader(nodes), "id2") assert.NoError(t, err, "failed to propose value") for _, node := range nodes { assert.NoError(t, raftutils.PollFunc(clockSource, func() error { var err error node.MemoryStore().View(func(tx store.ReadTx) { var allNodes []*api.Node allNodes, err = store.FindNodes(tx, store.All) if err != nil { return } if len(allNodes) != 2 { err = fmt.Errorf("expected 2 nodes, got %d", len(allNodes)) return } for i, nodeID := range []string{"id1", "id2"} { n := store.GetNode(tx, nodeID) if !reflect.DeepEqual(n, values[i]) { err = fmt.Errorf("node %s did not match expected value", nodeID) return } } }) return err })) } }
func TestRaftSnapshotForceNewCluster(t *testing.T) { t.Parallel() // Bring up a 3 node cluster nodes, clockSource := raftutils.NewRaftCluster(t, tc, &api.RaftConfig{SnapshotInterval: 10, LogEntriesForSlowFollowers: 0}) defer raftutils.TeardownCluster(t, nodes) nodeIDs := []string{"id1", "id2", "id3", "id4", "id5"} // Propose 3 values. for _, nodeID := range nodeIDs[:3] { _, err := raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeID) assert.NoError(t, err, "failed to propose value") } // Remove one of the original nodes // Use gRPC instead of calling handler directly because of // authorization check. cc, err := dial(nodes[1], nodes[1].Address) assert.NoError(t, err) raftClient := api.NewRaftMembershipClient(cc) defer cc.Close() ctx, _ := context.WithTimeout(context.Background(), 10*time.Second) resp, err := raftClient.Leave(ctx, &api.LeaveRequest{Node: &api.RaftMember{RaftID: nodes[2].Config.ID}}) assert.NoError(t, err, "error sending message to leave the raft") assert.NotNil(t, resp, "leave response message is nil") raftutils.ShutdownNode(nodes[2]) delete(nodes, 2) // Nodes shouldn't have snapshot files yet for _, node := range nodes { dirents, err := ioutil.ReadDir(filepath.Join(node.StateDir, "snap-v3-encrypted")) assert.NoError(t, err) assert.Len(t, dirents, 0) } // Trigger a snapshot, with a 4th proposal _, err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeIDs[3]) assert.NoError(t, err, "failed to propose value") // Nodes should now have a snapshot file for nodeIdx, node := range nodes { assert.NoError(t, raftutils.PollFunc(clockSource, func() error { dirents, err := ioutil.ReadDir(filepath.Join(node.StateDir, "snap-v3-encrypted")) if err != nil { return err } if len(dirents) != 1 { return fmt.Errorf("expected 1 snapshot, found %d on node %d", len(dirents), nodeIdx+1) } return nil })) } // Join another node nodes[4] = raftutils.NewJoinNode(t, clockSource, nodes[1].Address, tc) raftutils.WaitForCluster(t, clockSource, nodes) // Only restart the first node with force-new-cluster option nodes[1].Server.Stop() nodes[1].ShutdownRaft() nodes[1] = raftutils.RestartNode(t, clockSource, nodes[1], true) delete(nodes, 3) delete(nodes, 4) raftutils.WaitForCluster(t, clockSource, nodes) // The memberlist should contain exactly one node (self) memberlist := nodes[1].GetMemberlist() require.Len(t, memberlist, 1) // Propose a 5th value _, err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeIDs[4]) require.NoError(t, err) }