Exemple #1
0
func testRaftRestartCluster(t *testing.T, stagger bool) {
	nodes, clockSource := raftutils.NewRaftCluster(t, tc)
	defer raftutils.TeardownCluster(t, nodes)

	// Propose a value
	values := make([]*api.Node, 2)
	var err error
	values[0], err = raftutils.ProposeValue(t, nodes[1], "id1")
	assert.NoError(t, err, "failed to propose value")

	// Stop all nodes
	for _, node := range nodes {
		node.Server.Stop()
		node.Shutdown()
	}

	raftutils.AdvanceTicks(clockSource, 5)

	// Restart all nodes
	i := 0
	for k, node := range nodes {
		if stagger && i != 0 {
			raftutils.AdvanceTicks(clockSource, 1)
		}
		nodes[k] = raftutils.RestartNode(t, clockSource, node, false)
		i++
	}
	raftutils.WaitForCluster(t, clockSource, nodes)

	// Propose another value
	values[1], err = raftutils.ProposeValue(t, raftutils.Leader(nodes), "id2")
	assert.NoError(t, err, "failed to propose value")

	for _, node := range nodes {
		assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
			var err error
			node.MemoryStore().View(func(tx store.ReadTx) {
				var allNodes []*api.Node
				allNodes, err = store.FindNodes(tx, store.All)
				if err != nil {
					return
				}
				if len(allNodes) != 2 {
					err = fmt.Errorf("expected 2 nodes, got %d", len(allNodes))
					return
				}

				for i, nodeID := range []string{"id1", "id2"} {
					n := store.GetNode(tx, nodeID)
					if !reflect.DeepEqual(n, values[i]) {
						err = fmt.Errorf("node %s did not match expected value", nodeID)
						return
					}
				}
			})
			return err
		}))
	}
}
Exemple #2
0
func TestRaftQuorumRecovery(t *testing.T) {
	t.Parallel()

	// Bring up a 5 nodes cluster
	nodes, clockSource := raftutils.NewRaftCluster(t, tc)
	raftutils.AddRaftNode(t, clockSource, nodes, tc)
	raftutils.AddRaftNode(t, clockSource, nodes, tc)
	defer raftutils.TeardownCluster(t, nodes)

	// Lose a majority
	for i := uint64(1); i <= 3; i++ {
		nodes[i].Server.Stop()
		nodes[i].Shutdown()
	}

	raftutils.AdvanceTicks(clockSource, 5)

	// Restore the majority by restarting node 3
	nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false)

	delete(nodes, 1)
	delete(nodes, 2)
	raftutils.WaitForCluster(t, clockSource, nodes)

	// Propose a value
	value, err := raftutils.ProposeValue(t, raftutils.Leader(nodes))
	assert.NoError(t, err)

	for _, node := range nodes {
		raftutils.CheckValue(t, clockSource, node, value)
	}
}
Exemple #3
0
// This test rotates the encryption key and restarts the node - the intent is try to trigger
// race conditions if there is more than one node and hence consensus may take longer.
func TestRaftEncryptionKeyRotationStress(t *testing.T) {
	t.Parallel()

	// Bring up a 3 nodes cluster
	nodes, clockSource := raftutils.NewRaftCluster(t, tc)
	defer raftutils.TeardownCluster(t, nodes)
	leader := nodes[1]

	// constantly propose values
	done, stop, restart, clusterReady := make(chan struct{}), make(chan struct{}), make(chan struct{}), make(chan struct{})
	go func() {
		counter := len(nodes)
		for {
			select {
			case <-stop:
				close(done)
				return
			case <-restart:
				// the node restarts may trigger a leadership change, so wait until the cluster has 3
				// nodes again and a leader is selected before proposing more values
				<-clusterReady
				leader = raftutils.Leader(nodes)
			default:
				counter += 1
				raftutils.ProposeValue(t, leader, DefaultProposalTime, fmt.Sprintf("id%d", counter))
			}
		}
	}()

	for i := 0; i < 30; i++ {
		// rotate the encryption key
		nodes[3].KeyRotator.QueuePendingKey([]byte(fmt.Sprintf("newKey%d", i)))
		nodes[3].KeyRotator.RotationNotify() <- struct{}{}

		require.NoError(t, raftutils.PollFunc(clockSource, func() error {
			if nodes[3].KeyRotator.GetKeys().PendingDEK == nil {
				return nil
			}
			return fmt.Errorf("not done rotating yet")
		}))

		// restart the node and wait for everything to settle and a leader to be elected
		nodes[3].Server.Stop()
		nodes[3].ShutdownRaft()
		restart <- struct{}{}
		nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false)
		raftutils.AdvanceTicks(clockSource, 1)

		raftutils.WaitForCluster(t, clockSource, nodes)
		clusterReady <- struct{}{}
	}

	close(stop)
	<-done
}
Exemple #4
0
func TestRaftUnreachableNode(t *testing.T) {
	t.Parallel()

	nodes := make(map[uint64]*raftutils.TestNode)
	var clockSource *fakeclock.FakeClock
	nodes[1], clockSource = raftutils.NewInitNode(t, tc, nil)

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()
	// Add a new node
	nodes[2] = raftutils.NewNode(t, clockSource, tc, raft.NodeOptions{JoinAddr: nodes[1].Address})

	err := nodes[2].JoinAndStart(ctx)
	require.NoError(t, err, "can't join cluster")

	go nodes[2].Run(ctx)

	// Stop the Raft server of second node on purpose after joining
	nodes[2].Server.Stop()
	nodes[2].Listener.Close()

	raftutils.AdvanceTicks(clockSource, 5)
	time.Sleep(100 * time.Millisecond)

	wrappedListener := raftutils.RecycleWrappedListener(nodes[2].Listener)
	securityConfig := nodes[2].SecurityConfig
	serverOpts := []grpc.ServerOption{grpc.Creds(securityConfig.ServerTLSCreds)}
	s := grpc.NewServer(serverOpts...)

	nodes[2].Server = s
	raft.Register(s, nodes[2].Node)

	go func() {
		// After stopping, we should receive an error from Serve
		assert.Error(t, s.Serve(wrappedListener))
	}()

	raftutils.WaitForCluster(t, clockSource, nodes)
	defer raftutils.TeardownCluster(t, nodes)

	// Propose a value
	value, err := raftutils.ProposeValue(t, nodes[1], DefaultProposalTime)
	assert.NoError(t, err, "failed to propose value")

	// All nodes should have the value in the physical store
	raftutils.CheckValue(t, clockSource, nodes[1], value)
	raftutils.CheckValue(t, clockSource, nodes[2], value)
}
Exemple #5
0
func TestRaftWipedState(t *testing.T) {
	t.Parallel()

	nodes, clockSource := raftutils.NewRaftCluster(t, tc)
	defer raftutils.TeardownCluster(t, nodes)

	// Stop node 3
	nodes[3].Server.Stop()
	nodes[3].ShutdownRaft()

	// Remove its state
	os.RemoveAll(nodes[3].StateDir)

	raftutils.AdvanceTicks(clockSource, 5)

	// Restart node 3
	nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false)

	// Make sure this doesn't panic.
	raftutils.PollFuncWithTimeout(clockSource, func() error { return errors.New("keep the poll going") }, time.Second)
}
Exemple #6
0
func TestRaftUnreachableNode(t *testing.T) {
	t.Parallel()

	nodes := make(map[uint64]*raftutils.TestNode)
	var clockSource *fakeclock.FakeClock
	nodes[1], clockSource = raftutils.NewInitNode(t, tc, nil)

	ctx := context.Background()
	// Add a new node, but don't start its server yet
	n := raftutils.NewNode(t, clockSource, tc, raft.NewNodeOptions{JoinAddr: nodes[1].Address})
	go n.Run(ctx)

	raftutils.AdvanceTicks(clockSource, 5)
	time.Sleep(100 * time.Millisecond)

	raft.Register(n.Server, n.Node)

	// Now start the new node's server
	go func() {
		// After stopping, we should receive an error from Serve
		assert.Error(t, n.Server.Serve(n.Listener))
	}()

	nodes[2] = n
	raftutils.WaitForCluster(t, clockSource, nodes)

	defer raftutils.TeardownCluster(t, nodes)

	// Propose a value
	value, err := raftutils.ProposeValue(t, nodes[1])
	assert.NoError(t, err, "failed to propose value")

	// All nodes should have the value in the physical store
	raftutils.CheckValue(t, clockSource, nodes[1], value)
	raftutils.CheckValue(t, clockSource, nodes[2], value)
}
Exemple #7
0
func TestRaftForceNewCluster(t *testing.T) {
	t.Parallel()

	nodes, clockSource := raftutils.NewRaftCluster(t, tc)

	// Propose a value
	values := make([]*api.Node, 2)
	var err error
	values[0], err = raftutils.ProposeValue(t, nodes[1], "id1")
	assert.NoError(t, err, "failed to propose value")

	// The memberlist should contain 3 members on each node
	for i := 1; i <= 3; i++ {
		assert.Equal(t, len(nodes[uint64(i)].GetMemberlist()), 3)
	}

	// Stop all nodes
	for _, node := range nodes {
		node.Server.Stop()
		node.Shutdown()
	}

	raftutils.AdvanceTicks(clockSource, 5)

	toClean := map[uint64]*raftutils.TestNode{
		2: nodes[2],
		3: nodes[3],
	}
	raftutils.TeardownCluster(t, toClean)
	delete(nodes, 2)
	delete(nodes, 3)

	// Only restart the first node with force-new-cluster option
	nodes[1] = raftutils.RestartNode(t, clockSource, nodes[1], true)
	raftutils.WaitForCluster(t, clockSource, nodes)

	// The memberlist should contain only one node (self)
	assert.Equal(t, len(nodes[1].GetMemberlist()), 1)

	// Add 2 more members
	nodes[2] = raftutils.NewJoinNode(t, clockSource, nodes[1].Address, tc)
	raftutils.WaitForCluster(t, clockSource, nodes)

	nodes[3] = raftutils.NewJoinNode(t, clockSource, nodes[1].Address, tc)
	raftutils.WaitForCluster(t, clockSource, nodes)

	newCluster := map[uint64]*raftutils.TestNode{
		1: nodes[1],
		2: nodes[2],
		3: nodes[3],
	}
	defer raftutils.TeardownCluster(t, newCluster)

	// The memberlist should contain 3 members on each node
	for i := 1; i <= 3; i++ {
		assert.Equal(t, len(nodes[uint64(i)].GetMemberlist()), 3)
	}

	// Propose another value
	values[1], err = raftutils.ProposeValue(t, raftutils.Leader(nodes), "id2")
	assert.NoError(t, err, "failed to propose value")

	for _, node := range nodes {
		assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
			var err error
			node.MemoryStore().View(func(tx store.ReadTx) {
				var allNodes []*api.Node
				allNodes, err = store.FindNodes(tx, store.All)
				if err != nil {
					return
				}
				if len(allNodes) != 2 {
					err = fmt.Errorf("expected 2 nodes, got %d", len(allNodes))
					return
				}

				for i, nodeID := range []string{"id1", "id2"} {
					n := store.GetNode(tx, nodeID)
					if !reflect.DeepEqual(n, values[i]) {
						err = fmt.Errorf("node %s did not match expected value", nodeID)
						return
					}
				}
			})
			return err
		}))
	}
}
Exemple #8
0
func TestGCWAL(t *testing.T) {
	t.Parallel()

	// Additional log entries from cluster setup, leader election
	extraLogEntries := 5
	// Number of large entries to propose
	proposals := 8

	// Bring up a 3 node cluster
	nodes, clockSource := raftutils.NewRaftCluster(t, tc, &api.RaftConfig{SnapshotInterval: uint64(proposals + extraLogEntries), LogEntriesForSlowFollowers: 0})

	for i := 0; i != proposals; i++ {
		_, err := proposeLargeValue(t, nodes[1], DefaultProposalTime, fmt.Sprintf("id%d", i))
		assert.NoError(t, err, "failed to propose value")
	}

	time.Sleep(250 * time.Millisecond)

	// Snapshot should have been triggered just as the WAL rotated, so
	// both WAL files should be preserved
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		dirents, err := ioutil.ReadDir(filepath.Join(nodes[1].StateDir, "snap"))
		if err != nil {
			return err
		}
		if len(dirents) != 1 {
			return fmt.Errorf("expected 1 snapshot, found %d", len(dirents))
		}

		dirents, err = ioutil.ReadDir(filepath.Join(nodes[1].StateDir, "wal"))
		if err != nil {
			return err
		}
		var walCount int
		for _, f := range dirents {
			if strings.HasSuffix(f.Name(), ".wal") {
				walCount++
			}
		}
		if walCount != 2 {
			return fmt.Errorf("expected 2 WAL files, found %d", walCount)
		}
		return nil
	}))

	raftutils.TeardownCluster(t, nodes)

	// Repeat this test, but trigger the snapshot after the WAL has rotated
	proposals++
	nodes, clockSource = raftutils.NewRaftCluster(t, tc, &api.RaftConfig{SnapshotInterval: uint64(proposals + extraLogEntries), LogEntriesForSlowFollowers: 0})
	defer raftutils.TeardownCluster(t, nodes)

	for i := 0; i != proposals; i++ {
		_, err := proposeLargeValue(t, nodes[1], DefaultProposalTime, fmt.Sprintf("id%d", i))
		assert.NoError(t, err, "failed to propose value")
	}

	time.Sleep(250 * time.Millisecond)

	// This time only one WAL file should be saved.
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		dirents, err := ioutil.ReadDir(filepath.Join(nodes[1].StateDir, "snap"))
		if err != nil {
			return err
		}

		if len(dirents) != 1 {
			return fmt.Errorf("expected 1 snapshot, found %d", len(dirents))
		}

		dirents, err = ioutil.ReadDir(filepath.Join(nodes[1].StateDir, "wal"))
		if err != nil {
			return err
		}
		var walCount int
		for _, f := range dirents {
			if strings.HasSuffix(f.Name(), ".wal") {
				walCount++
			}
		}
		if walCount != 1 {
			return fmt.Errorf("expected 1 WAL file, found %d", walCount)
		}
		return nil
	}))

	// Restart the whole cluster
	for _, node := range nodes {
		node.Server.Stop()
		node.Shutdown()
	}

	raftutils.AdvanceTicks(clockSource, 5)

	i := 0
	for k, node := range nodes {
		nodes[k] = raftutils.RestartNode(t, clockSource, node, false)
		i++
	}
	raftutils.WaitForCluster(t, clockSource, nodes)

	// Is the data intact after restart?
	for _, node := range nodes {
		assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
			var err error
			node.MemoryStore().View(func(tx store.ReadTx) {
				var allNodes []*api.Node
				allNodes, err = store.FindNodes(tx, store.All)
				if err != nil {
					return
				}
				if len(allNodes) != proposals {
					err = fmt.Errorf("expected %d nodes, got %d", proposals, len(allNodes))
					return
				}
			})
			return err
		}))
	}

	// It should still be possible to propose values
	_, err := raftutils.ProposeValue(t, raftutils.Leader(nodes), DefaultProposalTime, "newnode")
	assert.NoError(t, err, "failed to propose value")

	for _, node := range nodes {
		assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
			var err error
			node.MemoryStore().View(func(tx store.ReadTx) {
				var allNodes []*api.Node
				allNodes, err = store.FindNodes(tx, store.All)
				if err != nil {
					return
				}
				if len(allNodes) != proposals+1 {
					err = fmt.Errorf("expected %d nodes, got %d", proposals, len(allNodes))
					return
				}
			})
			return err
		}))
	}
}