func testRaftRestartCluster(t *testing.T, stagger bool) { nodes, clockSource := raftutils.NewRaftCluster(t, tc) defer raftutils.TeardownCluster(t, nodes) // Propose a value values := make([]*api.Node, 2) var err error values[0], err = raftutils.ProposeValue(t, nodes[1], "id1") assert.NoError(t, err, "failed to propose value") // Stop all nodes for _, node := range nodes { node.Server.Stop() node.Shutdown() } raftutils.AdvanceTicks(clockSource, 5) // Restart all nodes i := 0 for k, node := range nodes { if stagger && i != 0 { raftutils.AdvanceTicks(clockSource, 1) } nodes[k] = raftutils.RestartNode(t, clockSource, node, false) i++ } raftutils.WaitForCluster(t, clockSource, nodes) // Propose another value values[1], err = raftutils.ProposeValue(t, raftutils.Leader(nodes), "id2") assert.NoError(t, err, "failed to propose value") for _, node := range nodes { assert.NoError(t, raftutils.PollFunc(clockSource, func() error { var err error node.MemoryStore().View(func(tx store.ReadTx) { var allNodes []*api.Node allNodes, err = store.FindNodes(tx, store.All) if err != nil { return } if len(allNodes) != 2 { err = fmt.Errorf("expected 2 nodes, got %d", len(allNodes)) return } for i, nodeID := range []string{"id1", "id2"} { n := store.GetNode(tx, nodeID) if !reflect.DeepEqual(n, values[i]) { err = fmt.Errorf("node %s did not match expected value", nodeID) return } } }) return err })) } }
func TestRaftQuorumRecovery(t *testing.T) { t.Parallel() // Bring up a 5 nodes cluster nodes, clockSource := raftutils.NewRaftCluster(t, tc) raftutils.AddRaftNode(t, clockSource, nodes, tc) raftutils.AddRaftNode(t, clockSource, nodes, tc) defer raftutils.TeardownCluster(t, nodes) // Lose a majority for i := uint64(1); i <= 3; i++ { nodes[i].Server.Stop() nodes[i].Shutdown() } raftutils.AdvanceTicks(clockSource, 5) // Restore the majority by restarting node 3 nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false) delete(nodes, 1) delete(nodes, 2) raftutils.WaitForCluster(t, clockSource, nodes) // Propose a value value, err := raftutils.ProposeValue(t, raftutils.Leader(nodes)) assert.NoError(t, err) for _, node := range nodes { raftutils.CheckValue(t, clockSource, node, value) } }
// This test rotates the encryption key and restarts the node - the intent is try to trigger // race conditions if there is more than one node and hence consensus may take longer. func TestRaftEncryptionKeyRotationStress(t *testing.T) { t.Parallel() // Bring up a 3 nodes cluster nodes, clockSource := raftutils.NewRaftCluster(t, tc) defer raftutils.TeardownCluster(t, nodes) leader := nodes[1] // constantly propose values done, stop, restart, clusterReady := make(chan struct{}), make(chan struct{}), make(chan struct{}), make(chan struct{}) go func() { counter := len(nodes) for { select { case <-stop: close(done) return case <-restart: // the node restarts may trigger a leadership change, so wait until the cluster has 3 // nodes again and a leader is selected before proposing more values <-clusterReady leader = raftutils.Leader(nodes) default: counter += 1 raftutils.ProposeValue(t, leader, DefaultProposalTime, fmt.Sprintf("id%d", counter)) } } }() for i := 0; i < 30; i++ { // rotate the encryption key nodes[3].KeyRotator.QueuePendingKey([]byte(fmt.Sprintf("newKey%d", i))) nodes[3].KeyRotator.RotationNotify() <- struct{}{} require.NoError(t, raftutils.PollFunc(clockSource, func() error { if nodes[3].KeyRotator.GetKeys().PendingDEK == nil { return nil } return fmt.Errorf("not done rotating yet") })) // restart the node and wait for everything to settle and a leader to be elected nodes[3].Server.Stop() nodes[3].ShutdownRaft() restart <- struct{}{} nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false) raftutils.AdvanceTicks(clockSource, 1) raftutils.WaitForCluster(t, clockSource, nodes) clusterReady <- struct{}{} } close(stop) <-done }
func TestRaftUnreachableNode(t *testing.T) { t.Parallel() nodes := make(map[uint64]*raftutils.TestNode) var clockSource *fakeclock.FakeClock nodes[1], clockSource = raftutils.NewInitNode(t, tc, nil) ctx, cancel := context.WithCancel(context.Background()) defer cancel() // Add a new node nodes[2] = raftutils.NewNode(t, clockSource, tc, raft.NodeOptions{JoinAddr: nodes[1].Address}) err := nodes[2].JoinAndStart(ctx) require.NoError(t, err, "can't join cluster") go nodes[2].Run(ctx) // Stop the Raft server of second node on purpose after joining nodes[2].Server.Stop() nodes[2].Listener.Close() raftutils.AdvanceTicks(clockSource, 5) time.Sleep(100 * time.Millisecond) wrappedListener := raftutils.RecycleWrappedListener(nodes[2].Listener) securityConfig := nodes[2].SecurityConfig serverOpts := []grpc.ServerOption{grpc.Creds(securityConfig.ServerTLSCreds)} s := grpc.NewServer(serverOpts...) nodes[2].Server = s raft.Register(s, nodes[2].Node) go func() { // After stopping, we should receive an error from Serve assert.Error(t, s.Serve(wrappedListener)) }() raftutils.WaitForCluster(t, clockSource, nodes) defer raftutils.TeardownCluster(t, nodes) // Propose a value value, err := raftutils.ProposeValue(t, nodes[1], DefaultProposalTime) assert.NoError(t, err, "failed to propose value") // All nodes should have the value in the physical store raftutils.CheckValue(t, clockSource, nodes[1], value) raftutils.CheckValue(t, clockSource, nodes[2], value) }
func TestRaftWipedState(t *testing.T) { t.Parallel() nodes, clockSource := raftutils.NewRaftCluster(t, tc) defer raftutils.TeardownCluster(t, nodes) // Stop node 3 nodes[3].Server.Stop() nodes[3].ShutdownRaft() // Remove its state os.RemoveAll(nodes[3].StateDir) raftutils.AdvanceTicks(clockSource, 5) // Restart node 3 nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false) // Make sure this doesn't panic. raftutils.PollFuncWithTimeout(clockSource, func() error { return errors.New("keep the poll going") }, time.Second) }
func TestRaftUnreachableNode(t *testing.T) { t.Parallel() nodes := make(map[uint64]*raftutils.TestNode) var clockSource *fakeclock.FakeClock nodes[1], clockSource = raftutils.NewInitNode(t, tc, nil) ctx := context.Background() // Add a new node, but don't start its server yet n := raftutils.NewNode(t, clockSource, tc, raft.NewNodeOptions{JoinAddr: nodes[1].Address}) go n.Run(ctx) raftutils.AdvanceTicks(clockSource, 5) time.Sleep(100 * time.Millisecond) raft.Register(n.Server, n.Node) // Now start the new node's server go func() { // After stopping, we should receive an error from Serve assert.Error(t, n.Server.Serve(n.Listener)) }() nodes[2] = n raftutils.WaitForCluster(t, clockSource, nodes) defer raftutils.TeardownCluster(t, nodes) // Propose a value value, err := raftutils.ProposeValue(t, nodes[1]) assert.NoError(t, err, "failed to propose value") // All nodes should have the value in the physical store raftutils.CheckValue(t, clockSource, nodes[1], value) raftutils.CheckValue(t, clockSource, nodes[2], value) }
func TestRaftForceNewCluster(t *testing.T) { t.Parallel() nodes, clockSource := raftutils.NewRaftCluster(t, tc) // Propose a value values := make([]*api.Node, 2) var err error values[0], err = raftutils.ProposeValue(t, nodes[1], "id1") assert.NoError(t, err, "failed to propose value") // The memberlist should contain 3 members on each node for i := 1; i <= 3; i++ { assert.Equal(t, len(nodes[uint64(i)].GetMemberlist()), 3) } // Stop all nodes for _, node := range nodes { node.Server.Stop() node.Shutdown() } raftutils.AdvanceTicks(clockSource, 5) toClean := map[uint64]*raftutils.TestNode{ 2: nodes[2], 3: nodes[3], } raftutils.TeardownCluster(t, toClean) delete(nodes, 2) delete(nodes, 3) // Only restart the first node with force-new-cluster option nodes[1] = raftutils.RestartNode(t, clockSource, nodes[1], true) raftutils.WaitForCluster(t, clockSource, nodes) // The memberlist should contain only one node (self) assert.Equal(t, len(nodes[1].GetMemberlist()), 1) // Add 2 more members nodes[2] = raftutils.NewJoinNode(t, clockSource, nodes[1].Address, tc) raftutils.WaitForCluster(t, clockSource, nodes) nodes[3] = raftutils.NewJoinNode(t, clockSource, nodes[1].Address, tc) raftutils.WaitForCluster(t, clockSource, nodes) newCluster := map[uint64]*raftutils.TestNode{ 1: nodes[1], 2: nodes[2], 3: nodes[3], } defer raftutils.TeardownCluster(t, newCluster) // The memberlist should contain 3 members on each node for i := 1; i <= 3; i++ { assert.Equal(t, len(nodes[uint64(i)].GetMemberlist()), 3) } // Propose another value values[1], err = raftutils.ProposeValue(t, raftutils.Leader(nodes), "id2") assert.NoError(t, err, "failed to propose value") for _, node := range nodes { assert.NoError(t, raftutils.PollFunc(clockSource, func() error { var err error node.MemoryStore().View(func(tx store.ReadTx) { var allNodes []*api.Node allNodes, err = store.FindNodes(tx, store.All) if err != nil { return } if len(allNodes) != 2 { err = fmt.Errorf("expected 2 nodes, got %d", len(allNodes)) return } for i, nodeID := range []string{"id1", "id2"} { n := store.GetNode(tx, nodeID) if !reflect.DeepEqual(n, values[i]) { err = fmt.Errorf("node %s did not match expected value", nodeID) return } } }) return err })) } }
func TestGCWAL(t *testing.T) { t.Parallel() // Additional log entries from cluster setup, leader election extraLogEntries := 5 // Number of large entries to propose proposals := 8 // Bring up a 3 node cluster nodes, clockSource := raftutils.NewRaftCluster(t, tc, &api.RaftConfig{SnapshotInterval: uint64(proposals + extraLogEntries), LogEntriesForSlowFollowers: 0}) for i := 0; i != proposals; i++ { _, err := proposeLargeValue(t, nodes[1], DefaultProposalTime, fmt.Sprintf("id%d", i)) assert.NoError(t, err, "failed to propose value") } time.Sleep(250 * time.Millisecond) // Snapshot should have been triggered just as the WAL rotated, so // both WAL files should be preserved assert.NoError(t, raftutils.PollFunc(clockSource, func() error { dirents, err := ioutil.ReadDir(filepath.Join(nodes[1].StateDir, "snap")) if err != nil { return err } if len(dirents) != 1 { return fmt.Errorf("expected 1 snapshot, found %d", len(dirents)) } dirents, err = ioutil.ReadDir(filepath.Join(nodes[1].StateDir, "wal")) if err != nil { return err } var walCount int for _, f := range dirents { if strings.HasSuffix(f.Name(), ".wal") { walCount++ } } if walCount != 2 { return fmt.Errorf("expected 2 WAL files, found %d", walCount) } return nil })) raftutils.TeardownCluster(t, nodes) // Repeat this test, but trigger the snapshot after the WAL has rotated proposals++ nodes, clockSource = raftutils.NewRaftCluster(t, tc, &api.RaftConfig{SnapshotInterval: uint64(proposals + extraLogEntries), LogEntriesForSlowFollowers: 0}) defer raftutils.TeardownCluster(t, nodes) for i := 0; i != proposals; i++ { _, err := proposeLargeValue(t, nodes[1], DefaultProposalTime, fmt.Sprintf("id%d", i)) assert.NoError(t, err, "failed to propose value") } time.Sleep(250 * time.Millisecond) // This time only one WAL file should be saved. assert.NoError(t, raftutils.PollFunc(clockSource, func() error { dirents, err := ioutil.ReadDir(filepath.Join(nodes[1].StateDir, "snap")) if err != nil { return err } if len(dirents) != 1 { return fmt.Errorf("expected 1 snapshot, found %d", len(dirents)) } dirents, err = ioutil.ReadDir(filepath.Join(nodes[1].StateDir, "wal")) if err != nil { return err } var walCount int for _, f := range dirents { if strings.HasSuffix(f.Name(), ".wal") { walCount++ } } if walCount != 1 { return fmt.Errorf("expected 1 WAL file, found %d", walCount) } return nil })) // Restart the whole cluster for _, node := range nodes { node.Server.Stop() node.Shutdown() } raftutils.AdvanceTicks(clockSource, 5) i := 0 for k, node := range nodes { nodes[k] = raftutils.RestartNode(t, clockSource, node, false) i++ } raftutils.WaitForCluster(t, clockSource, nodes) // Is the data intact after restart? for _, node := range nodes { assert.NoError(t, raftutils.PollFunc(clockSource, func() error { var err error node.MemoryStore().View(func(tx store.ReadTx) { var allNodes []*api.Node allNodes, err = store.FindNodes(tx, store.All) if err != nil { return } if len(allNodes) != proposals { err = fmt.Errorf("expected %d nodes, got %d", proposals, len(allNodes)) return } }) return err })) } // It should still be possible to propose values _, err := raftutils.ProposeValue(t, raftutils.Leader(nodes), DefaultProposalTime, "newnode") assert.NoError(t, err, "failed to propose value") for _, node := range nodes { assert.NoError(t, raftutils.PollFunc(clockSource, func() error { var err error node.MemoryStore().View(func(tx store.ReadTx) { var allNodes []*api.Node allNodes, err = store.FindNodes(tx, store.All) if err != nil { return } if len(allNodes) != proposals+1 { err = fmt.Errorf("expected %d nodes, got %d", proposals, len(allNodes)) return } }) return err })) } }