Esempio n. 1
0
func TestClusterStoreWithPasshphraseForRootCA(t *testing.T) {
	if !*integrationTests {
		t.Skip("integration test")
	}
	// Start with a passphrase from moment 0
	os.Setenv(ca.PassphraseENVVar, "password1")
	defer os.Setenv(ca.PassphraseENVVar, "")
	defer os.Setenv(ca.PassphraseENVVarPrev, "")

	mCount, aCount := 5, 15
	c := createManagersCluster(t, mCount, aCount)
	require.NoError(t, testutils.PollFunc(nil, c.pollRegister))

	// Get the leader
	leader, err := c.leader()
	assert.NoError(t, err)

	// check key material in store
	var clusters []*api.Cluster
	leader.m.RaftNode.MemoryStore().View(func(tx store.ReadTx) {
		clusters, err = store.FindClusters(tx, store.All)
	})
	assert.NoError(t, err)
	assert.Len(t, clusters, 1, "there should be one cluster")
	assert.NotNil(t, clusters[0].RootCA.CACert)
	assert.NotNil(t, clusters[0].RootCA.CAKey)
	assert.Contains(t, string(clusters[0].RootCA.CAKey), "Proc-Type: 4,ENCRYPTED")
}
Esempio n. 2
0
func testRaftRestartCluster(t *testing.T, stagger bool) {
	nodes, clockSource := raftutils.NewRaftCluster(t, tc)
	defer raftutils.TeardownCluster(t, nodes)

	// Propose a value
	values := make([]*api.Node, 2)
	var err error
	values[0], err = raftutils.ProposeValue(t, nodes[1], "id1")
	assert.NoError(t, err, "failed to propose value")

	// Stop all nodes
	for _, node := range nodes {
		node.Server.Stop()
		node.Shutdown()
	}

	raftutils.AdvanceTicks(clockSource, 5)

	// Restart all nodes
	i := 0
	for k, node := range nodes {
		if stagger && i != 0 {
			raftutils.AdvanceTicks(clockSource, 1)
		}
		nodes[k] = raftutils.RestartNode(t, clockSource, node, false)
		i++
	}
	raftutils.WaitForCluster(t, clockSource, nodes)

	// Propose another value
	values[1], err = raftutils.ProposeValue(t, raftutils.Leader(nodes), "id2")
	assert.NoError(t, err, "failed to propose value")

	for _, node := range nodes {
		assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
			var err error
			node.MemoryStore().View(func(tx store.ReadTx) {
				var allNodes []*api.Node
				allNodes, err = store.FindNodes(tx, store.All)
				if err != nil {
					return
				}
				if len(allNodes) != 2 {
					err = fmt.Errorf("expected 2 nodes, got %d", len(allNodes))
					return
				}

				for i, nodeID := range []string{"id1", "id2"} {
					n := store.GetNode(tx, nodeID)
					if !reflect.DeepEqual(n, values[i]) {
						err = fmt.Errorf("node %s did not match expected value", nodeID)
						return
					}
				}
			})
			return err
		}))
	}
}
Esempio n. 3
0
// This test rotates the encryption key and restarts the node - the intent is try to trigger
// race conditions if there is more than one node and hence consensus may take longer.
func TestRaftEncryptionKeyRotationStress(t *testing.T) {
	t.Parallel()

	// Bring up a 3 nodes cluster
	nodes, clockSource := raftutils.NewRaftCluster(t, tc)
	defer raftutils.TeardownCluster(t, nodes)
	leader := nodes[1]

	// constantly propose values
	done, stop, restart, clusterReady := make(chan struct{}), make(chan struct{}), make(chan struct{}), make(chan struct{})
	go func() {
		counter := len(nodes)
		for {
			select {
			case <-stop:
				close(done)
				return
			case <-restart:
				// the node restarts may trigger a leadership change, so wait until the cluster has 3
				// nodes again and a leader is selected before proposing more values
				<-clusterReady
				leader = raftutils.Leader(nodes)
			default:
				counter += 1
				raftutils.ProposeValue(t, leader, DefaultProposalTime, fmt.Sprintf("id%d", counter))
			}
		}
	}()

	for i := 0; i < 30; i++ {
		// rotate the encryption key
		nodes[3].KeyRotator.QueuePendingKey([]byte(fmt.Sprintf("newKey%d", i)))
		nodes[3].KeyRotator.RotationNotify() <- struct{}{}

		require.NoError(t, raftutils.PollFunc(clockSource, func() error {
			if nodes[3].KeyRotator.GetKeys().PendingDEK == nil {
				return nil
			}
			return fmt.Errorf("not done rotating yet")
		}))

		// restart the node and wait for everything to settle and a leader to be elected
		nodes[3].Server.Stop()
		nodes[3].ShutdownRaft()
		restart <- struct{}{}
		nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false)
		raftutils.AdvanceTicks(clockSource, 1)

		raftutils.WaitForCluster(t, clockSource, nodes)
		clusterReady <- struct{}{}
	}

	close(stop)
	<-done
}
Esempio n. 4
0
func TestCluster(t *testing.T) {
	if !*integrationTests {
		t.Skip("integration test")
	}
	c := createManagersCluster(t, 5, 15)
	defer c.Close()
	assert.NoError(t, testutils.PollFunc(nil, c.pollRegister))
	m := c.ms[0]
	nCount := m.m.Dispatcher.NodeCount()
	assert.Equal(t, 15, nCount)
}
Esempio n. 5
0
func TestCanRemoveMember(t *testing.T) {
	nodes, clockSource := raftutils.NewRaftCluster(t, tc)
	defer raftutils.TeardownCluster(t, nodes)

	// Stop node 2 and node 3 (2 nodes out of 3)
	nodes[2].Server.Stop()
	nodes[2].Shutdown()
	nodes[3].Server.Stop()
	nodes[3].Shutdown()

	// Node 2 and Node 3 should be listed as Unreachable
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		members := nodes[1].GetMemberlist()
		if len(members) != 3 {
			return fmt.Errorf("expected 3 nodes, got %d", len(members))
		}
		if members[nodes[2].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE {
			return fmt.Errorf("expected node 2 to be unreachable")
		}
		if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE {
			return fmt.Errorf("expected node 3 to be unreachable")
		}
		return nil
	}))

	// Removing node 3 should fail
	ctx, _ := context.WithTimeout(context.Background(), 10*time.Second)
	err := nodes[1].RemoveMember(ctx, 3)
	assert.Error(t, err)
	assert.Equal(t, err, raft.ErrCannotRemoveMember)
	members := nodes[1].GetMemberlist()
	assert.Equal(t, len(members), 3)

	// Restart node 2 and node 3
	nodes[2] = raftutils.RestartNode(t, clockSource, nodes[2], false)
	nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false)
	raftutils.WaitForCluster(t, clockSource, nodes)

	// Removing node 3 should succeed
	ctx, _ = context.WithTimeout(context.Background(), 10*time.Second)
	err = nodes[1].RemoveMember(ctx, nodes[3].Config.ID)
	assert.NoError(t, err)
	members = nodes[1].GetMemberlist()
	assert.Nil(t, members[nodes[3].Config.ID])
	assert.Equal(t, len(members), 2)

	// Removing node 2 should fail
	ctx, _ = context.WithTimeout(context.Background(), 10*time.Second)
	err = nodes[1].RemoveMember(ctx, nodes[2].Config.ID)
	assert.Error(t, err)
	assert.Equal(t, err, raft.ErrCannotRemoveMember)
	assert.Equal(t, len(members), 2)
}
Esempio n. 6
0
func TestClusterStorePasshphraseRotationForRootCA(t *testing.T) {
	if !*integrationTests {
		t.Skip("integration test")
	}
	os.Setenv(ca.PassphraseENVVar, "password1")
	defer os.Setenv(ca.PassphraseENVVar, "")
	defer os.Setenv(ca.PassphraseENVVarPrev, "")

	mCount, aCount := 5, 15
	c := createManagersCluster(t, mCount, aCount)
	require.NoError(t, testutils.PollFunc(nil, c.pollRegister))

	// Get the leader
	leader, err := c.leader()
	assert.NoError(t, err)

	// check key material in store
	var clusters []*api.Cluster
	leader.m.RaftNode.MemoryStore().View(func(tx store.ReadTx) {
		clusters, err = store.FindClusters(tx, store.All)
	})
	assert.NoError(t, err)
	assert.Len(t, clusters, 1, "there should be one cluster")
	assert.NotNil(t, clusters[0].RootCA.CACert)
	assert.NotNil(t, clusters[0].RootCA.CAKey)
	assert.Contains(t, string(clusters[0].RootCA.CAKey), "Proc-Type: 4,ENCRYPTED")

	firstEncryptedKey := clusters[0].RootCA.CAKey

	// Set an ENV passphrase and kill the current leader
	os.Setenv(ca.PassphraseENVVarPrev, "password1")
	os.Setenv(ca.PassphraseENVVar, "password2")
	require.NoError(t, c.destroyLeader())

	// ensure that cluster will converge to expected number of agents, we need big timeout because of heartbeat times
	require.NoError(t, testutils.PollFuncWithTimeout(nil, c.pollRegister, 30*time.Second))

	// Get the new leader
	leader, err = c.leader()
	assert.NoError(t, err)
	// check key material in store
	leader.m.RaftNode.MemoryStore().View(func(tx store.ReadTx) {
		clusters, err = store.FindClusters(tx, store.All)
	})
	assert.NoError(t, err)
	assert.Len(t, clusters, 1, "there should be one cluster")
	assert.NotNil(t, clusters[0].RootCA.CACert)
	assert.NotNil(t, clusters[0].RootCA.CAKey)
	assert.Contains(t, string(clusters[0].RootCA.CAKey), "Proc-Type: 4,ENCRYPTED")
	assert.NotEqual(t, firstEncryptedKey, clusters[0].RootCA.CAKey)

}
Esempio n. 7
0
func TestClusterReelection(t *testing.T) {
	if !*integrationTests {
		t.Skip("integration test")
	}
	mCount, aCount := 5, 15
	c := createManagersCluster(t, mCount, aCount)
	require.NoError(t, testutils.PollFunc(nil, c.pollRegister))

	require.NoError(t, c.destroyLeader())
	// let's down some managers in the meantime
	require.NoError(t, c.destroyAgents(5))
	// ensure that cluster will converge to expected number of agents, we need big timeout because of heartbeat times
	require.NoError(t, testutils.PollFuncWithTimeout(nil, c.pollRegister, 30*time.Second))

	leader, err := c.leader()
	assert.NoError(t, err)

	// check nodes in store
	var nodes []*api.Node
	leader.m.RaftNode.MemoryStore().View(func(tx store.ReadTx) {
		ns, err := store.FindNodes(tx, store.All)
		assert.NoError(t, err)
		for _, n := range ns {
			if n.Spec.Role == api.NodeRoleWorker {
				nodes = append(nodes, n)
			}
		}
	})
	assert.NoError(t, err)
	assert.Len(t, nodes, aCount, "there should be all nodes in store")
	var downAgentsCount int
	for _, node := range nodes {
		if node.Status.State == api.NodeStatus_DOWN {
			downAgentsCount++
			continue
		}
		assert.Equal(t, api.NodeStatus_READY, node.Status.State, "there should be only down and ready nodes at this point")
	}
	assert.Equal(t, 5, downAgentsCount, "unexpected number of down agents")
}
Esempio n. 8
0
func TestCanRemoveMember(t *testing.T) {
	nodes, clockSource := raftutils.NewRaftCluster(t, tc)
	defer raftutils.TeardownCluster(t, nodes)

	// Stop node 2 and node 3 (2 nodes out of 3)
	nodes[2].Server.Stop()
	nodes[2].Shutdown()
	nodes[3].Server.Stop()
	nodes[3].Shutdown()

	// Node 2 and Node 3 should be listed as Unreachable
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		members := nodes[1].GetMemberlist()
		if len(members) != 3 {
			return fmt.Errorf("expected 3 nodes, got %d", len(members))
		}
		if members[nodes[2].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE {
			return fmt.Errorf("expected node 2 to be unreachable")
		}
		if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE {
			return fmt.Errorf("expected node 3 to be unreachable")
		}
		return nil
	}))

	// Removing all nodes should fail
	for i := 1; i <= 3; i++ {
		ctx, _ := context.WithTimeout(context.Background(), 10*time.Second)
		err := nodes[1].RemoveMember(ctx, uint64(i))
		assert.Error(t, err)
		assert.Equal(t, err, raft.ErrCannotRemoveMember)
		members := nodes[1].GetMemberlist()
		assert.Equal(t, len(members), 3)
	}

	// Restart node 2 and node 3
	nodes[2] = raftutils.RestartNode(t, clockSource, nodes[2], false)
	nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false)
	raftutils.WaitForCluster(t, clockSource, nodes)

	// Node 2 and Node 3 should be listed as Reachable
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		members := nodes[1].GetMemberlist()
		if len(members) != 3 {
			return fmt.Errorf("expected 3 nodes, got %d", len(members))
		}
		if members[nodes[2].Config.ID].Status.Reachability == api.RaftMemberStatus_UNREACHABLE {
			return fmt.Errorf("expected node 2 to be reachable")
		}
		if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_UNREACHABLE {
			return fmt.Errorf("expected node 3 to be reachable")
		}
		return nil
	}))

	// Stop Node 3 (1 node out of 3)
	nodes[3].Server.Stop()
	nodes[3].Shutdown()

	// Node 3 should be listed as Unreachable
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		members := nodes[1].GetMemberlist()
		if len(members) != 3 {
			return fmt.Errorf("expected 3 nodes, got %d", len(members))
		}
		if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE {
			return fmt.Errorf("expected node 3 to be unreachable")
		}
		return nil
	}))

	// Removing node 2 should fail (this would break the quorum)
	ctx, _ := context.WithTimeout(context.Background(), 10*time.Second)
	err := nodes[1].RemoveMember(ctx, nodes[2].Config.ID)
	assert.Error(t, err)
	assert.Equal(t, err, raft.ErrCannotRemoveMember)
	members := nodes[1].GetMemberlist()
	assert.Equal(t, len(members), 3)

	// Removing node 3 works fine because it is already unreachable
	ctx, _ = context.WithTimeout(context.Background(), 10*time.Second)
	err = nodes[1].RemoveMember(ctx, nodes[3].Config.ID)
	assert.NoError(t, err)
	members = nodes[1].GetMemberlist()
	assert.Nil(t, members[nodes[3].Config.ID])
	assert.Equal(t, len(members), 2)

	// Add back node 3
	raftutils.ShutdownNode(nodes[3])
	delete(nodes, 3)
	raftutils.AddRaftNode(t, clockSource, nodes, tc)

	// Node 2 and Node 3 should be listed as Reachable
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		members := nodes[1].GetMemberlist()
		if len(members) != 3 {
			return fmt.Errorf("expected 3 nodes, got %d", len(members))
		}
		if members[nodes[2].Config.ID].Status.Reachability != api.RaftMemberStatus_REACHABLE {
			return fmt.Errorf("expected node 2 to be reachable")
		}
		if members[nodes[3].Config.ID].Status.Reachability != api.RaftMemberStatus_REACHABLE {
			return fmt.Errorf("expected node 3 to be reachable")
		}
		return nil
	}))

	// Removing node 3 should succeed
	ctx, _ = context.WithTimeout(context.Background(), 10*time.Second)
	err = nodes[1].RemoveMember(ctx, nodes[3].Config.ID)
	assert.NoError(t, err)
	members = nodes[1].GetMemberlist()
	assert.Nil(t, members[nodes[3].Config.ID])
	assert.Equal(t, len(members), 2)

	// Removing node 2 should succeed
	ctx, _ = context.WithTimeout(context.Background(), 10*time.Second)
	err = nodes[1].RemoveMember(ctx, nodes[2].Config.ID)
	assert.NoError(t, err)
	members = nodes[1].GetMemberlist()
	assert.Nil(t, members[nodes[2].Config.ID])
	assert.Equal(t, len(members), 1)
}
Esempio n. 9
0
func TestGetRemoteCA(t *testing.T) {
	tc := testutils.NewTestCA(t)
	defer tc.Stop()

	shaHash := sha256.New()
	shaHash.Write(tc.RootCA.Cert)
	md := shaHash.Sum(nil)
	mdStr := hex.EncodeToString(md)

	d, err := digest.Parse("sha256:" + mdStr)
	require.NoError(t, err)

	downloadedRootCA, err := ca.GetRemoteCA(tc.Context, d, tc.ConnBroker)
	require.NoError(t, err)
	require.Equal(t, downloadedRootCA.Cert, tc.RootCA.Cert)

	// update the test CA to include a multi-certificate bundle as the root - the digest
	// we use to verify with must be the digest of the whole bundle
	tmpDir, err := ioutil.TempDir("", "GetRemoteCA")
	require.NoError(t, err)
	defer os.RemoveAll(tmpDir)
	paths := ca.NewConfigPaths(tmpDir)

	otherRootCA, err := ca.CreateRootCA("other", paths.RootCA)
	require.NoError(t, err)

	comboCertBundle := append(tc.RootCA.Cert, otherRootCA.Cert...)
	require.NoError(t, tc.MemoryStore.Update(func(tx store.Tx) error {
		cluster := store.GetCluster(tx, tc.Organization)
		cluster.RootCA.CACert = comboCertBundle
		cluster.RootCA.CAKey = tc.RootCA.Key
		return store.UpdateCluster(tx, cluster)
	}))
	require.NoError(t, raftutils.PollFunc(nil, func() error {
		_, err := ca.GetRemoteCA(tc.Context, d, tc.ConnBroker)
		if err == nil {
			return fmt.Errorf("testca's rootca hasn't updated yet")
		}
		require.Contains(t, err.Error(), "remote CA does not match fingerprint")
		return nil
	}))

	// If we provide the right digest, the root CA is updated and we can validate
	// certs signed by either one
	d = digest.FromBytes(comboCertBundle)
	downloadedRootCA, err = ca.GetRemoteCA(tc.Context, d, tc.ConnBroker)
	require.NoError(t, err)
	require.Equal(t, comboCertBundle, downloadedRootCA.Cert)
	require.Equal(t, 2, len(downloadedRootCA.Pool.Subjects()))

	for _, rootCA := range []ca.RootCA{tc.RootCA, otherRootCA} {
		krw := ca.NewKeyReadWriter(paths.Node, nil, nil)
		_, err := rootCA.IssueAndSaveNewCertificates(krw, "cn", "ou", "org")
		require.NoError(t, err)

		certPEM, _, err := krw.Read()
		require.NoError(t, err)

		cert, err := helpers.ParseCertificatesPEM(certPEM)
		require.NoError(t, err)

		chains, err := cert[0].Verify(x509.VerifyOptions{
			Roots: downloadedRootCA.Pool,
		})
		require.NoError(t, err)
		require.Len(t, chains, 1)
	}
}
Esempio n. 10
0
func TestGCWAL(t *testing.T) {
	t.Parallel()

	// Additional log entries from cluster setup, leader election
	extraLogEntries := 5
	// Number of large entries to propose
	proposals := 8

	// Bring up a 3 node cluster
	nodes, clockSource := raftutils.NewRaftCluster(t, tc, &api.RaftConfig{SnapshotInterval: uint64(proposals + extraLogEntries), LogEntriesForSlowFollowers: 0})

	for i := 0; i != proposals; i++ {
		_, err := proposeLargeValue(t, nodes[1], DefaultProposalTime, fmt.Sprintf("id%d", i))
		assert.NoError(t, err, "failed to propose value")
	}

	time.Sleep(250 * time.Millisecond)

	// Snapshot should have been triggered just as the WAL rotated, so
	// both WAL files should be preserved
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		dirents, err := ioutil.ReadDir(filepath.Join(nodes[1].StateDir, "snap"))
		if err != nil {
			return err
		}
		if len(dirents) != 1 {
			return fmt.Errorf("expected 1 snapshot, found %d", len(dirents))
		}

		dirents, err = ioutil.ReadDir(filepath.Join(nodes[1].StateDir, "wal"))
		if err != nil {
			return err
		}
		var walCount int
		for _, f := range dirents {
			if strings.HasSuffix(f.Name(), ".wal") {
				walCount++
			}
		}
		if walCount != 2 {
			return fmt.Errorf("expected 2 WAL files, found %d", walCount)
		}
		return nil
	}))

	raftutils.TeardownCluster(t, nodes)

	// Repeat this test, but trigger the snapshot after the WAL has rotated
	proposals++
	nodes, clockSource = raftutils.NewRaftCluster(t, tc, &api.RaftConfig{SnapshotInterval: uint64(proposals + extraLogEntries), LogEntriesForSlowFollowers: 0})
	defer raftutils.TeardownCluster(t, nodes)

	for i := 0; i != proposals; i++ {
		_, err := proposeLargeValue(t, nodes[1], DefaultProposalTime, fmt.Sprintf("id%d", i))
		assert.NoError(t, err, "failed to propose value")
	}

	time.Sleep(250 * time.Millisecond)

	// This time only one WAL file should be saved.
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		dirents, err := ioutil.ReadDir(filepath.Join(nodes[1].StateDir, "snap"))
		if err != nil {
			return err
		}

		if len(dirents) != 1 {
			return fmt.Errorf("expected 1 snapshot, found %d", len(dirents))
		}

		dirents, err = ioutil.ReadDir(filepath.Join(nodes[1].StateDir, "wal"))
		if err != nil {
			return err
		}
		var walCount int
		for _, f := range dirents {
			if strings.HasSuffix(f.Name(), ".wal") {
				walCount++
			}
		}
		if walCount != 1 {
			return fmt.Errorf("expected 1 WAL file, found %d", walCount)
		}
		return nil
	}))

	// Restart the whole cluster
	for _, node := range nodes {
		node.Server.Stop()
		node.Shutdown()
	}

	raftutils.AdvanceTicks(clockSource, 5)

	i := 0
	for k, node := range nodes {
		nodes[k] = raftutils.RestartNode(t, clockSource, node, false)
		i++
	}
	raftutils.WaitForCluster(t, clockSource, nodes)

	// Is the data intact after restart?
	for _, node := range nodes {
		assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
			var err error
			node.MemoryStore().View(func(tx store.ReadTx) {
				var allNodes []*api.Node
				allNodes, err = store.FindNodes(tx, store.All)
				if err != nil {
					return
				}
				if len(allNodes) != proposals {
					err = fmt.Errorf("expected %d nodes, got %d", proposals, len(allNodes))
					return
				}
			})
			return err
		}))
	}

	// It should still be possible to propose values
	_, err := raftutils.ProposeValue(t, raftutils.Leader(nodes), DefaultProposalTime, "newnode")
	assert.NoError(t, err, "failed to propose value")

	for _, node := range nodes {
		assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
			var err error
			node.MemoryStore().View(func(tx store.ReadTx) {
				var allNodes []*api.Node
				allNodes, err = store.FindNodes(tx, store.All)
				if err != nil {
					return
				}
				if len(allNodes) != proposals+1 {
					err = fmt.Errorf("expected %d nodes, got %d", proposals, len(allNodes))
					return
				}
			})
			return err
		}))
	}
}
Esempio n. 11
0
func TestRaftSnapshot(t *testing.T) {
	t.Parallel()

	// Bring up a 3 node cluster
	nodes, clockSource := raftutils.NewRaftCluster(t, tc, &api.RaftConfig{SnapshotInterval: 9, LogEntriesForSlowFollowers: 0})
	defer raftutils.TeardownCluster(t, nodes)

	nodeIDs := []string{"id1", "id2", "id3", "id4", "id5", "id6", "id7", "id8", "id9", "id10", "id11", "id12"}
	values := make([]*api.Node, len(nodeIDs))
	snapshotFilenames := make(map[uint64]string, 4)

	// Propose 3 values
	var err error
	for i, nodeID := range nodeIDs[:3] {
		values[i], err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeID)
		assert.NoError(t, err, "failed to propose value")
	}

	// None of the nodes should have snapshot files yet
	for _, node := range nodes {
		dirents, err := ioutil.ReadDir(filepath.Join(node.StateDir, "snap"))
		assert.NoError(t, err)
		assert.Len(t, dirents, 0)
	}

	// Check all nodes have all the data.
	// This also acts as a synchronization point so that the next value we
	// propose will arrive as a separate message to the raft state machine,
	// and it is guaranteed to have the right cluster settings when
	// deciding whether to create a new snapshot.
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs[:3], values)

	// Propose a 4th value
	values[3], err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeIDs[3])
	assert.NoError(t, err, "failed to propose value")

	// All nodes should now have a snapshot file
	for nodeID, node := range nodes {
		assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
			dirents, err := ioutil.ReadDir(filepath.Join(node.StateDir, "snap"))
			if err != nil {
				return err
			}
			if len(dirents) != 1 {
				return fmt.Errorf("expected 1 snapshot, found %d", len(dirents))
			}
			snapshotFilenames[nodeID] = dirents[0].Name()
			return nil
		}))
	}

	// Add a node to the cluster
	raftutils.AddRaftNode(t, clockSource, nodes, tc)

	// It should get a copy of the snapshot
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		dirents, err := ioutil.ReadDir(filepath.Join(nodes[4].StateDir, "snap"))
		if err != nil {
			return err
		}
		if len(dirents) != 1 {
			return fmt.Errorf("expected 1 snapshot, found %d on new node", len(dirents))
		}
		snapshotFilenames[4] = dirents[0].Name()
		return nil
	}))

	// It should know about the other nodes
	stripMembers := func(memberList map[uint64]*api.RaftMember) map[uint64]*api.RaftMember {
		raftNodes := make(map[uint64]*api.RaftMember)
		for k, v := range memberList {
			raftNodes[k] = &api.RaftMember{
				RaftID: v.RaftID,
				Addr:   v.Addr,
			}
		}
		return raftNodes
	}
	assert.Equal(t, stripMembers(nodes[1].GetMemberlist()), stripMembers(nodes[4].GetMemberlist()))

	// All nodes should have all the data
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs[:4], values)

	// Propose more values to provoke a second snapshot
	for i := 4; i != len(nodeIDs); i++ {
		values[i], err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeIDs[i])
		assert.NoError(t, err, "failed to propose value")
	}

	// All nodes should have a snapshot under a *different* name
	for nodeID, node := range nodes {
		assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
			dirents, err := ioutil.ReadDir(filepath.Join(node.StateDir, "snap"))
			if err != nil {
				return err
			}
			if len(dirents) != 1 {
				return fmt.Errorf("expected 1 snapshot, found %d on node %d", len(dirents), nodeID)
			}
			if dirents[0].Name() == snapshotFilenames[nodeID] {
				return fmt.Errorf("snapshot %s did not get replaced", snapshotFilenames[nodeID])
			}
			return nil
		}))
	}

	// All nodes should have all the data
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)
}
Esempio n. 12
0
func TestRaftSnapshotRestart(t *testing.T) {
	t.Parallel()

	// Bring up a 3 node cluster
	nodes, clockSource := raftutils.NewRaftCluster(t, tc, &api.RaftConfig{SnapshotInterval: 10, LogEntriesForSlowFollowers: 0})
	defer raftutils.TeardownCluster(t, nodes)

	nodeIDs := []string{"id1", "id2", "id3", "id4", "id5", "id6", "id7"}
	values := make([]*api.Node, len(nodeIDs))

	// Propose 3 values
	var err error
	for i, nodeID := range nodeIDs[:3] {
		values[i], err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeID)
		assert.NoError(t, err, "failed to propose value")
	}

	// Take down node 3
	nodes[3].Server.Stop()
	nodes[3].Shutdown()

	// Propose a 4th value before the snapshot
	values[3], err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeIDs[3])
	assert.NoError(t, err, "failed to propose value")

	// Remaining nodes shouldn't have snapshot files yet
	for _, node := range []*raftutils.TestNode{nodes[1], nodes[2]} {
		dirents, err := ioutil.ReadDir(filepath.Join(node.StateDir, "snap"))
		assert.NoError(t, err)
		assert.Len(t, dirents, 0)
	}

	// Add a node to the cluster before the snapshot. This is the event
	// that triggers the snapshot.
	nodes[4] = raftutils.NewJoinNode(t, clockSource, nodes[1].Address, tc)
	raftutils.WaitForCluster(t, clockSource, map[uint64]*raftutils.TestNode{1: nodes[1], 2: nodes[2], 4: nodes[4]})

	// Remaining nodes should now have a snapshot file
	for nodeIdx, node := range []*raftutils.TestNode{nodes[1], nodes[2]} {
		assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
			dirents, err := ioutil.ReadDir(filepath.Join(node.StateDir, "snap"))
			if err != nil {
				return err
			}
			if len(dirents) != 1 {
				return fmt.Errorf("expected 1 snapshot, found %d on node %d", len(dirents), nodeIdx+1)
			}
			return nil
		}))
	}
	raftutils.CheckValuesOnNodes(t, clockSource, map[uint64]*raftutils.TestNode{1: nodes[1], 2: nodes[2]}, nodeIDs[:4], values[:4])

	// Propose a 5th value
	values[4], err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeIDs[4])
	require.NoError(t, err)

	// Add another node to the cluster
	nodes[5] = raftutils.NewJoinNode(t, clockSource, nodes[1].Address, tc)
	raftutils.WaitForCluster(t, clockSource, map[uint64]*raftutils.TestNode{1: nodes[1], 2: nodes[2], 4: nodes[4], 5: nodes[5]})

	// New node should get a copy of the snapshot
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		dirents, err := ioutil.ReadDir(filepath.Join(nodes[5].StateDir, "snap"))
		if err != nil {
			return err
		}
		if len(dirents) != 1 {
			return fmt.Errorf("expected 1 snapshot, found %d on new node", len(dirents))
		}
		return nil
	}))

	dirents, err := ioutil.ReadDir(filepath.Join(nodes[5].StateDir, "snap"))
	assert.NoError(t, err)
	assert.Len(t, dirents, 1)
	raftutils.CheckValuesOnNodes(t, clockSource, map[uint64]*raftutils.TestNode{1: nodes[1], 2: nodes[2]}, nodeIDs[:5], values[:5])

	// It should know about the other nodes, including the one that was just added
	stripMembers := func(memberList map[uint64]*api.RaftMember) map[uint64]*api.RaftMember {
		raftNodes := make(map[uint64]*api.RaftMember)
		for k, v := range memberList {
			raftNodes[k] = &api.RaftMember{
				RaftID: v.RaftID,
				Addr:   v.Addr,
			}
		}
		return raftNodes
	}
	assert.Equal(t, stripMembers(nodes[1].GetMemberlist()), stripMembers(nodes[4].GetMemberlist()))

	// Restart node 3
	nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false)
	raftutils.WaitForCluster(t, clockSource, nodes)

	// Node 3 should know about other nodes, including the new one
	assert.Len(t, nodes[3].GetMemberlist(), 5)
	assert.Equal(t, stripMembers(nodes[1].GetMemberlist()), stripMembers(nodes[3].GetMemberlist()))

	// Propose yet another value, to make sure the rejoined node is still
	// receiving new logs
	values[5], err = raftutils.ProposeValue(t, raftutils.Leader(nodes), DefaultProposalTime, nodeIDs[5])
	require.NoError(t, err)

	// All nodes should have all the data
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs[:6], values[:6])

	// Restart node 3 again. It should load the snapshot.
	nodes[3].Server.Stop()
	nodes[3].Shutdown()
	nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false)
	raftutils.WaitForCluster(t, clockSource, nodes)

	assert.Len(t, nodes[3].GetMemberlist(), 5)
	assert.Equal(t, stripMembers(nodes[1].GetMemberlist()), stripMembers(nodes[3].GetMemberlist()))
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs[:6], values[:6])

	// Propose again. Just to check consensus after this latest restart.
	values[6], err = raftutils.ProposeValue(t, raftutils.Leader(nodes), DefaultProposalTime, nodeIDs[6])
	require.NoError(t, err)
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)
}
Esempio n. 13
0
func TestListManagerNodes(t *testing.T) {
	t.Parallel()

	tc := cautils.NewTestCA(nil)
	defer tc.Stop()
	ts := newTestServer(t)
	defer ts.Stop()

	nodes, clockSource := raftutils.NewRaftCluster(t, tc)
	defer raftutils.TeardownCluster(t, nodes)

	// Create a node object for each of the managers
	assert.NoError(t, nodes[1].MemoryStore().Update(func(tx store.Tx) error {
		assert.NoError(t, store.CreateNode(tx, &api.Node{ID: nodes[1].SecurityConfig.ClientTLSCreds.NodeID()}))
		assert.NoError(t, store.CreateNode(tx, &api.Node{ID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID()}))
		assert.NoError(t, store.CreateNode(tx, &api.Node{ID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID()}))
		return nil
	}))

	// Assign one of the raft node to the test server
	ts.Server.raft = nodes[1].Node
	ts.Server.store = nodes[1].MemoryStore()

	// There should be 3 reachable managers listed
	r, err := ts.Client.ListNodes(context.Background(), &api.ListNodesRequest{})
	assert.NoError(t, err)
	assert.NotNil(t, r)
	managers := getMap(t, r.Nodes)
	assert.Len(t, ts.Server.raft.GetMemberlist(), 3)
	assert.Len(t, r.Nodes, 3)

	// Node 1 should be the leader
	for i := 1; i <= 3; i++ {
		if i == 1 {
			assert.True(t, managers[nodes[uint64(i)].Config.ID].Leader)
			continue
		}
		assert.False(t, managers[nodes[uint64(i)].Config.ID].Leader)
	}

	// All nodes should be reachable
	for i := 1; i <= 3; i++ {
		assert.Equal(t, api.RaftMemberStatus_REACHABLE, managers[nodes[uint64(i)].Config.ID].Reachability)
	}

	// Add two more nodes to the cluster
	raftutils.AddRaftNode(t, clockSource, nodes, tc)
	raftutils.AddRaftNode(t, clockSource, nodes, tc)
	raftutils.WaitForCluster(t, clockSource, nodes)

	// Add node entries for these
	assert.NoError(t, nodes[1].MemoryStore().Update(func(tx store.Tx) error {
		assert.NoError(t, store.CreateNode(tx, &api.Node{ID: nodes[4].SecurityConfig.ClientTLSCreds.NodeID()}))
		assert.NoError(t, store.CreateNode(tx, &api.Node{ID: nodes[5].SecurityConfig.ClientTLSCreds.NodeID()}))
		return nil
	}))

	// There should be 5 reachable managers listed
	r, err = ts.Client.ListNodes(context.Background(), &api.ListNodesRequest{})
	assert.NoError(t, err)
	assert.NotNil(t, r)
	managers = getMap(t, r.Nodes)
	assert.Len(t, ts.Server.raft.GetMemberlist(), 5)
	assert.Len(t, r.Nodes, 5)
	for i := 1; i <= 5; i++ {
		assert.Equal(t, api.RaftMemberStatus_REACHABLE, managers[nodes[uint64(i)].Config.ID].Reachability)
	}

	// Stops 2 nodes
	nodes[4].Server.Stop()
	nodes[4].ShutdownRaft()
	nodes[5].Server.Stop()
	nodes[5].ShutdownRaft()

	// Node 4 and Node 5 should be listed as Unreachable
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		r, err = ts.Client.ListNodes(context.Background(), &api.ListNodesRequest{})
		if err != nil {
			return err
		}

		managers = getMap(t, r.Nodes)

		if len(r.Nodes) != 5 {
			return fmt.Errorf("expected 5 nodes, got %d", len(r.Nodes))
		}

		if managers[nodes[4].Config.ID].Reachability == api.RaftMemberStatus_REACHABLE {
			return fmt.Errorf("expected node 4 to be unreachable")
		}

		if managers[nodes[5].Config.ID].Reachability == api.RaftMemberStatus_REACHABLE {
			return fmt.Errorf("expected node 5 to be unreachable")
		}

		return nil
	}))

	// Restart the 2 nodes
	nodes[4] = raftutils.RestartNode(t, clockSource, nodes[4], false)
	nodes[5] = raftutils.RestartNode(t, clockSource, nodes[5], false)
	raftutils.WaitForCluster(t, clockSource, nodes)

	assert.Len(t, ts.Server.raft.GetMemberlist(), 5)
	// All the nodes should be reachable again
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		r, err = ts.Client.ListNodes(context.Background(), &api.ListNodesRequest{})
		if err != nil {
			return err
		}
		managers = getMap(t, r.Nodes)
		for i := 1; i <= 5; i++ {
			if managers[nodes[uint64(i)].Config.ID].Reachability != api.RaftMemberStatus_REACHABLE {
				return fmt.Errorf("node %x is unreachable", nodes[uint64(i)].Config.ID)
			}
		}
		return nil
	}))

	// Switch the raft node used by the server
	ts.Server.raft = nodes[2].Node

	// Stop node 1 (leader)
	nodes[1].Server.Stop()
	nodes[1].ShutdownRaft()

	newCluster := map[uint64]*raftutils.TestNode{
		2: nodes[2],
		3: nodes[3],
		4: nodes[4],
		5: nodes[5],
	}

	// Wait for the re-election to occur
	raftutils.WaitForCluster(t, clockSource, newCluster)

	// Node 1 should not be the leader anymore
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		r, err = ts.Client.ListNodes(context.Background(), &api.ListNodesRequest{})
		if err != nil {
			return err
		}

		managers = getMap(t, r.Nodes)

		if managers[nodes[1].Config.ID].Leader {
			return fmt.Errorf("expected node 1 not to be the leader")
		}

		if managers[nodes[1].Config.ID].Reachability == api.RaftMemberStatus_REACHABLE {
			return fmt.Errorf("expected node 1 to be unreachable")
		}

		return nil
	}))

	// Restart node 1
	nodes[1].ShutdownRaft()
	nodes[1] = raftutils.RestartNode(t, clockSource, nodes[1], false)
	raftutils.WaitForCluster(t, clockSource, nodes)

	// Ensure that node 1 is not the leader
	assert.False(t, managers[nodes[uint64(1)].Config.ID].Leader)

	// Check that another node got the leader status
	var leader uint64
	leaderCount := 0
	for i := 1; i <= 5; i++ {
		if managers[nodes[uint64(i)].Config.ID].Leader {
			leader = nodes[uint64(i)].Config.ID
			leaderCount++
		}
	}

	// There should be only one leader after node 1 recovery and it
	// should be different than node 1
	assert.Equal(t, 1, leaderCount)
	assert.NotEqual(t, leader, nodes[1].Config.ID)
}
Esempio n. 14
0
// This test rotates the encryption key and waits for the expected thing to happen
func TestRaftEncryptionKeyRotationWait(t *testing.T) {
	t.Parallel()
	nodes := make(map[uint64]*raftutils.TestNode)
	var clockSource *fakeclock.FakeClock

	raftConfig := raft.DefaultRaftConfig()
	nodes[1], clockSource = raftutils.NewInitNode(t, tc, &raftConfig)
	defer raftutils.TeardownCluster(t, nodes)

	nodeIDs := []string{"id1", "id2", "id3"}
	values := make([]*api.Node, len(nodeIDs))

	// Propose 3 values
	var err error
	for i, nodeID := range nodeIDs[:3] {
		values[i], err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeID)
		require.NoError(t, err, "failed to propose value")
	}

	snapDir := filepath.Join(nodes[1].StateDir, "snap-v3-encrypted")

	startingKeys := nodes[1].KeyRotator.GetKeys()

	// rotate the encryption key
	nodes[1].KeyRotator.QueuePendingKey([]byte("key2"))
	nodes[1].KeyRotator.RotationNotify() <- struct{}{}

	// the rotation should trigger a snapshot, which should notify the rotator when it's done
	require.NoError(t, raftutils.PollFunc(clockSource, func() error {
		snapshots, err := storage.ListSnapshots(snapDir)
		if err != nil {
			return err
		}
		if len(snapshots) != 1 {
			return fmt.Errorf("expected 1 snapshot, found %d on new node", len(snapshots))
		}
		if nodes[1].KeyRotator.NeedsRotation() {
			return fmt.Errorf("rotation never finished")
		}
		return nil
	}))
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)

	// Propose a 4th value
	nodeIDs = append(nodeIDs, "id4")
	v, err := raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, "id4")
	require.NoError(t, err, "failed to propose value")
	values = append(values, v)
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)

	nodes[1].Server.Stop()
	nodes[1].ShutdownRaft()

	// Try to restart node 1. Without the new unlock key, it can't actually start
	n, ctx := raftutils.CopyNode(t, clockSource, nodes[1], false, raftutils.NewSimpleKeyRotator(startingKeys))
	require.Error(t, n.Node.JoinAndStart(ctx),
		"should not have been able to restart since we can't read snapshots")

	// with the right key, it can start, even if the right key is only the pending key
	newKeys := startingKeys
	newKeys.PendingDEK = []byte("key2")
	nodes[1].KeyRotator = raftutils.NewSimpleKeyRotator(newKeys)
	nodes[1] = raftutils.RestartNode(t, clockSource, nodes[1], false)

	raftutils.WaitForCluster(t, clockSource, nodes)

	// as soon as we joined, it should have finished rotating the key
	require.False(t, nodes[1].KeyRotator.NeedsRotation())
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)

	// break snapshotting, and ensure that key rotation never finishes
	tempSnapDir := filepath.Join(nodes[1].StateDir, "snap-backup")
	require.NoError(t, os.Rename(snapDir, tempSnapDir))
	require.NoError(t, ioutil.WriteFile(snapDir, []byte("this is no longer a directory"), 0644))

	nodes[1].KeyRotator.QueuePendingKey([]byte("key3"))
	nodes[1].KeyRotator.RotationNotify() <- struct{}{}

	time.Sleep(250 * time.Millisecond)

	// rotation has not been finished, because we cannot take a snapshot
	require.True(t, nodes[1].KeyRotator.NeedsRotation())

	// Propose a 5th value, so we have WALs written with the new key
	nodeIDs = append(nodeIDs, "id5")
	v, err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, "id5")
	require.NoError(t, err, "failed to propose value")
	values = append(values, v)
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)

	nodes[1].Server.Stop()
	nodes[1].ShutdownRaft()

	// restore the snapshot dir
	require.NoError(t, os.RemoveAll(snapDir))
	require.NoError(t, os.Rename(tempSnapDir, snapDir))

	// Now the wals are a mix of key2 and key3 - we can't actually start with either key
	singleKey := raft.EncryptionKeys{CurrentDEK: []byte("key2")}
	n, ctx = raftutils.CopyNode(t, clockSource, nodes[1], false, raftutils.NewSimpleKeyRotator(singleKey))
	require.Error(t, n.Node.JoinAndStart(ctx),
		"should not have been able to restart since we can't read all the WALs, even if we can read the snapshot")
	singleKey = raft.EncryptionKeys{CurrentDEK: []byte("key3")}
	n, ctx = raftutils.CopyNode(t, clockSource, nodes[1], false, raftutils.NewSimpleKeyRotator(singleKey))
	require.Error(t, n.Node.JoinAndStart(ctx),
		"should not have been able to restart since we can't read all the WALs, and also not the snapshot")

	nodes[1], ctx = raftutils.CopyNode(t, clockSource, nodes[1], false,
		raftutils.NewSimpleKeyRotator(raft.EncryptionKeys{
			CurrentDEK: []byte("key2"),
			PendingDEK: []byte("key3"),
		}))
	require.NoError(t, nodes[1].Node.JoinAndStart(ctx))

	// we can load, but we still need a snapshot because rotation hasn't finished
	snapshots, err := storage.ListSnapshots(snapDir)
	require.NoError(t, err)
	require.Len(t, snapshots, 1, "expected 1 snapshot")
	require.True(t, nodes[1].KeyRotator.NeedsRotation())
	currSnapshot := snapshots[0]

	// start the node - everything should fix itself
	go nodes[1].Node.Run(ctx)
	raftutils.WaitForCluster(t, clockSource, nodes)

	require.NoError(t, raftutils.PollFunc(clockSource, func() error {
		snapshots, err := storage.ListSnapshots(snapDir)
		if err != nil {
			return err
		}
		if len(snapshots) != 1 {
			return fmt.Errorf("expected 1 snapshots, found %d on new node", len(snapshots))
		}
		if snapshots[0] == currSnapshot {
			return fmt.Errorf("new snapshot not done yet")
		}
		if nodes[1].KeyRotator.NeedsRotation() {
			return fmt.Errorf("rotation never finished")
		}
		currSnapshot = snapshots[0]
		return nil
	}))
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)

	// If we can't update the keys, we wait for the next snapshot to do so
	nodes[1].KeyRotator.SetUpdateFunc(func() error { return fmt.Errorf("nope!") })
	nodes[1].KeyRotator.QueuePendingKey([]byte("key4"))
	nodes[1].KeyRotator.RotationNotify() <- struct{}{}

	require.NoError(t, raftutils.PollFunc(clockSource, func() error {
		snapshots, err := storage.ListSnapshots(snapDir)
		if err != nil {
			return err
		}
		if len(snapshots) != 1 {
			return fmt.Errorf("expected 1 snapshots, found %d on new node", len(snapshots))
		}
		if snapshots[0] == currSnapshot {
			return fmt.Errorf("new snapshot not done yet")
		}
		currSnapshot = snapshots[0]
		return nil
	}))
	require.True(t, nodes[1].KeyRotator.NeedsRotation())

	// Fix updating the key rotator, and propose a 6th value - this should trigger the key
	// rotation to finish
	nodes[1].KeyRotator.SetUpdateFunc(nil)
	nodeIDs = append(nodeIDs, "id6")
	v, err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, "id6")
	require.NoError(t, err, "failed to propose value")
	values = append(values, v)
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)

	require.NoError(t, raftutils.PollFunc(clockSource, func() error {
		if nodes[1].KeyRotator.NeedsRotation() {
			return fmt.Errorf("rotation never finished")
		}
		return nil
	}))

	// no new snapshot
	snapshots, err = storage.ListSnapshots(snapDir)
	require.NoError(t, err)
	require.Len(t, snapshots, 1)
	require.Equal(t, currSnapshot, snapshots[0])

	// Even if something goes wrong with getting keys, and needs rotation returns a false positive,
	// if there's no PendingDEK nothing happens.

	fakeTrue := true
	nodes[1].KeyRotator.SetNeedsRotation(&fakeTrue)
	nodes[1].KeyRotator.RotationNotify() <- struct{}{}

	// propose another value
	nodeIDs = append(nodeIDs, "id7")
	v, err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, "id7")
	require.NoError(t, err, "failed to propose value")
	values = append(values, v)
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)

	// no new snapshot
	snapshots, err = storage.ListSnapshots(snapDir)
	require.NoError(t, err)
	require.Len(t, snapshots, 1)
	require.Equal(t, currSnapshot, snapshots[0])

	// and when we restart, we can restart with the original key (the WAL written for the new proposed value)
	// is written with the old key
	nodes[1].Server.Stop()
	nodes[1].ShutdownRaft()

	nodes[1].KeyRotator = raftutils.NewSimpleKeyRotator(raft.EncryptionKeys{
		CurrentDEK: []byte("key4"),
	})
	nodes[1] = raftutils.RestartNode(t, clockSource, nodes[1], false)
	raftutils.WaitForCluster(t, clockSource, nodes)
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)
}
Esempio n. 15
0
func TestUpdateNodeDemote(t *testing.T) {
	tc := cautils.NewTestCA(nil, cautils.AcceptancePolicy(true, true, ""))
	ts := newTestServer(t)

	nodes, clockSource := raftutils.NewRaftCluster(t, tc)
	defer raftutils.TeardownCluster(t, nodes)

	// Assign one of the raft node to the test server
	ts.Server.raft = nodes[1].Node
	ts.Server.store = nodes[1].MemoryStore()

	// Create a node object for each of the managers
	assert.NoError(t, nodes[1].MemoryStore().Update(func(tx store.Tx) error {
		assert.NoError(t, store.CreateNode(tx, &api.Node{
			ID: nodes[1].SecurityConfig.ClientTLSCreds.NodeID(),
			Spec: api.NodeSpec{
				Role:       api.NodeRoleManager,
				Membership: api.NodeMembershipAccepted,
			},
		}))
		assert.NoError(t, store.CreateNode(tx, &api.Node{
			ID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID(),
			Spec: api.NodeSpec{
				Role:       api.NodeRoleManager,
				Membership: api.NodeMembershipAccepted,
			},
		}))
		assert.NoError(t, store.CreateNode(tx, &api.Node{
			ID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID(),
			Spec: api.NodeSpec{
				Role:       api.NodeRoleManager,
				Membership: api.NodeMembershipAccepted,
			},
		}))
		return nil
	}))

	// Stop Node 3 (1 node out of 3)
	nodes[3].Server.Stop()
	nodes[3].Shutdown()

	// Node 3 should be listed as Unreachable
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		members := nodes[1].GetMemberlist()
		if len(members) != 3 {
			return fmt.Errorf("expected 3 nodes, got %d", len(members))
		}
		if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE {
			return fmt.Errorf("expected node 3 to be unreachable")
		}
		return nil
	}))

	// Try to demote Node 2, this should fail because of the quorum safeguard
	r, err := ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID()})
	assert.NoError(t, err)
	spec := r.Node.Spec.Copy()
	spec.Role = api.NodeRoleWorker
	version := &r.Node.Meta.Version
	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{
		NodeID:      nodes[2].SecurityConfig.ClientTLSCreds.NodeID(),
		Spec:        spec,
		NodeVersion: version,
	})
	assert.Error(t, err)
	assert.Equal(t, codes.FailedPrecondition, grpc.Code(err))

	// Restart Node 3
	nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false)
	raftutils.WaitForCluster(t, clockSource, nodes)

	// Node 3 should be listed as Reachable
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		members := nodes[1].GetMemberlist()
		if len(members) != 3 {
			return fmt.Errorf("expected 3 nodes, got %d", len(members))
		}
		if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_UNREACHABLE {
			return fmt.Errorf("expected node 3 to be reachable")
		}
		return nil
	}))

	// Try to demote Node 3, this should succeed
	r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID()})
	assert.NoError(t, err)
	spec = r.Node.Spec.Copy()
	spec.Role = api.NodeRoleWorker
	version = &r.Node.Meta.Version
	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{
		NodeID:      nodes[3].SecurityConfig.ClientTLSCreds.NodeID(),
		Spec:        spec,
		NodeVersion: version,
	})
	assert.NoError(t, err)

	newCluster := map[uint64]*raftutils.TestNode{
		1: nodes[1],
		2: nodes[2],
	}

	raftutils.WaitForCluster(t, clockSource, newCluster)

	// Server should list 2 members
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		members := nodes[1].GetMemberlist()
		if len(members) != 2 {
			return fmt.Errorf("expected 2 nodes, got %d", len(members))
		}
		return nil
	}))

	// Try to demote Node 2
	r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID()})
	assert.NoError(t, err)
	spec = r.Node.Spec.Copy()
	spec.Role = api.NodeRoleWorker
	version = &r.Node.Meta.Version
	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{
		NodeID:      nodes[2].SecurityConfig.ClientTLSCreds.NodeID(),
		Spec:        spec,
		NodeVersion: version,
	})
	assert.NoError(t, err)

	newCluster = map[uint64]*raftutils.TestNode{
		1: nodes[1],
	}

	raftutils.WaitForCluster(t, clockSource, newCluster)

	// New server should list 1 member
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		members := nodes[1].GetMemberlist()
		if len(members) != 1 {
			return fmt.Errorf("expected 1 node, got %d", len(members))
		}
		return nil
	}))

	// Make sure we can't demote the last manager.
	r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodes[1].SecurityConfig.ClientTLSCreds.NodeID()})
	assert.NoError(t, err)
	spec = r.Node.Spec.Copy()
	spec.Role = api.NodeRoleWorker
	version = &r.Node.Meta.Version
	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{
		NodeID:      nodes[1].SecurityConfig.ClientTLSCreds.NodeID(),
		Spec:        spec,
		NodeVersion: version,
	})
	assert.Error(t, err)
	assert.Equal(t, codes.FailedPrecondition, grpc.Code(err))
}
Esempio n. 16
0
func TestRaftForceNewCluster(t *testing.T) {
	t.Parallel()

	nodes, clockSource := raftutils.NewRaftCluster(t, tc)

	// Propose a value
	values := make([]*api.Node, 2)
	var err error
	values[0], err = raftutils.ProposeValue(t, nodes[1], "id1")
	assert.NoError(t, err, "failed to propose value")

	// The memberlist should contain 3 members on each node
	for i := 1; i <= 3; i++ {
		assert.Equal(t, len(nodes[uint64(i)].GetMemberlist()), 3)
	}

	// Stop all nodes
	for _, node := range nodes {
		node.Server.Stop()
		node.Shutdown()
	}

	raftutils.AdvanceTicks(clockSource, 5)

	toClean := map[uint64]*raftutils.TestNode{
		2: nodes[2],
		3: nodes[3],
	}
	raftutils.TeardownCluster(t, toClean)
	delete(nodes, 2)
	delete(nodes, 3)

	// Only restart the first node with force-new-cluster option
	nodes[1] = raftutils.RestartNode(t, clockSource, nodes[1], true)
	raftutils.WaitForCluster(t, clockSource, nodes)

	// The memberlist should contain only one node (self)
	assert.Equal(t, len(nodes[1].GetMemberlist()), 1)

	// Add 2 more members
	nodes[2] = raftutils.NewJoinNode(t, clockSource, nodes[1].Address, tc)
	raftutils.WaitForCluster(t, clockSource, nodes)

	nodes[3] = raftutils.NewJoinNode(t, clockSource, nodes[1].Address, tc)
	raftutils.WaitForCluster(t, clockSource, nodes)

	newCluster := map[uint64]*raftutils.TestNode{
		1: nodes[1],
		2: nodes[2],
		3: nodes[3],
	}
	defer raftutils.TeardownCluster(t, newCluster)

	// The memberlist should contain 3 members on each node
	for i := 1; i <= 3; i++ {
		assert.Equal(t, len(nodes[uint64(i)].GetMemberlist()), 3)
	}

	// Propose another value
	values[1], err = raftutils.ProposeValue(t, raftutils.Leader(nodes), "id2")
	assert.NoError(t, err, "failed to propose value")

	for _, node := range nodes {
		assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
			var err error
			node.MemoryStore().View(func(tx store.ReadTx) {
				var allNodes []*api.Node
				allNodes, err = store.FindNodes(tx, store.All)
				if err != nil {
					return
				}
				if len(allNodes) != 2 {
					err = fmt.Errorf("expected 2 nodes, got %d", len(allNodes))
					return
				}

				for i, nodeID := range []string{"id1", "id2"} {
					n := store.GetNode(tx, nodeID)
					if !reflect.DeepEqual(n, values[i]) {
						err = fmt.Errorf("node %s did not match expected value", nodeID)
						return
					}
				}
			})
			return err
		}))
	}
}
Esempio n. 17
0
func testUpdateNodeDemote(leader bool, t *testing.T) {
	tc := cautils.NewTestCA(nil)
	defer tc.Stop()
	ts := newTestServer(t)
	defer ts.Stop()

	nodes, clockSource := raftutils.NewRaftCluster(t, tc)
	defer raftutils.TeardownCluster(t, nodes)

	// Assign one of the raft node to the test server
	ts.Server.raft = nodes[1].Node
	ts.Server.store = nodes[1].MemoryStore()

	// Create a node object for each of the managers
	assert.NoError(t, nodes[1].MemoryStore().Update(func(tx store.Tx) error {
		assert.NoError(t, store.CreateNode(tx, &api.Node{
			ID: nodes[1].SecurityConfig.ClientTLSCreds.NodeID(),
			Spec: api.NodeSpec{
				Role:       api.NodeRoleManager,
				Membership: api.NodeMembershipAccepted,
			},
		}))
		assert.NoError(t, store.CreateNode(tx, &api.Node{
			ID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID(),
			Spec: api.NodeSpec{
				Role:       api.NodeRoleManager,
				Membership: api.NodeMembershipAccepted,
			},
		}))
		assert.NoError(t, store.CreateNode(tx, &api.Node{
			ID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID(),
			Spec: api.NodeSpec{
				Role:       api.NodeRoleManager,
				Membership: api.NodeMembershipAccepted,
			},
		}))
		return nil
	}))

	// Stop Node 3 (1 node out of 3)
	nodes[3].Server.Stop()
	nodes[3].ShutdownRaft()

	// Node 3 should be listed as Unreachable
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		members := nodes[1].GetMemberlist()
		if len(members) != 3 {
			return fmt.Errorf("expected 3 nodes, got %d", len(members))
		}
		if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE {
			return fmt.Errorf("expected node 3 to be unreachable")
		}
		return nil
	}))

	// Try to demote Node 2, this should fail because of the quorum safeguard
	r, err := ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodes[2].SecurityConfig.ClientTLSCreds.NodeID()})
	assert.NoError(t, err)
	spec := r.Node.Spec.Copy()
	spec.Role = api.NodeRoleWorker
	version := &r.Node.Meta.Version
	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{
		NodeID:      nodes[2].SecurityConfig.ClientTLSCreds.NodeID(),
		Spec:        spec,
		NodeVersion: version,
	})
	assert.Error(t, err)
	assert.Equal(t, codes.FailedPrecondition, grpc.Code(err))

	// Restart Node 3
	nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false)
	raftutils.WaitForCluster(t, clockSource, nodes)

	// Node 3 should be listed as Reachable
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		members := nodes[1].GetMemberlist()
		if len(members) != 3 {
			return fmt.Errorf("expected 3 nodes, got %d", len(members))
		}
		if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_UNREACHABLE {
			return fmt.Errorf("expected node 3 to be reachable")
		}
		return nil
	}))

	// Try to demote Node 3, this should succeed
	r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodes[3].SecurityConfig.ClientTLSCreds.NodeID()})
	assert.NoError(t, err)
	spec = r.Node.Spec.Copy()
	spec.Role = api.NodeRoleWorker
	version = &r.Node.Meta.Version
	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{
		NodeID:      nodes[3].SecurityConfig.ClientTLSCreds.NodeID(),
		Spec:        spec,
		NodeVersion: version,
	})
	assert.NoError(t, err)

	newCluster := map[uint64]*raftutils.TestNode{
		1: nodes[1],
		2: nodes[2],
	}

	raftutils.WaitForCluster(t, clockSource, newCluster)

	// Server should list 2 members
	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		members := nodes[1].GetMemberlist()
		if len(members) != 2 {
			return fmt.Errorf("expected 2 nodes, got %d", len(members))
		}
		return nil
	}))

	var demoteNode, lastNode *raftutils.TestNode
	if leader {
		demoteNode = nodes[1]
		lastNode = nodes[2]
	} else {
		demoteNode = nodes[2]
		lastNode = nodes[1]
	}

	// Try to demote a Node and scale down to 1
	r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: demoteNode.SecurityConfig.ClientTLSCreds.NodeID()})
	assert.NoError(t, err)
	spec = r.Node.Spec.Copy()
	spec.Role = api.NodeRoleWorker
	version = &r.Node.Meta.Version
	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{
		NodeID:      demoteNode.SecurityConfig.ClientTLSCreds.NodeID(),
		Spec:        spec,
		NodeVersion: version,
	})
	assert.NoError(t, err)

	// Update the server
	ts.Server.raft = lastNode.Node
	ts.Server.store = lastNode.MemoryStore()

	newCluster = map[uint64]*raftutils.TestNode{
		1: lastNode,
	}

	raftutils.WaitForCluster(t, clockSource, newCluster)

	assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
		members := lastNode.GetMemberlist()
		if len(members) != 1 {
			return fmt.Errorf("expected 1 node, got %d", len(members))
		}
		return nil
	}))

	// Make sure we can't demote the last manager.
	r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: lastNode.SecurityConfig.ClientTLSCreds.NodeID()})
	assert.NoError(t, err)
	spec = r.Node.Spec.Copy()
	spec.Role = api.NodeRoleWorker
	version = &r.Node.Meta.Version
	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{
		NodeID:      lastNode.SecurityConfig.ClientTLSCreds.NodeID(),
		Spec:        spec,
		NodeVersion: version,
	})
	assert.Error(t, err)
	assert.Equal(t, codes.FailedPrecondition, grpc.Code(err))

	// Propose a change in the spec and check if the remaining node can still process updates
	r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: lastNode.SecurityConfig.ClientTLSCreds.NodeID()})
	assert.NoError(t, err)
	spec = r.Node.Spec.Copy()
	spec.Availability = api.NodeAvailabilityDrain
	version = &r.Node.Meta.Version
	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{
		NodeID:      lastNode.SecurityConfig.ClientTLSCreds.NodeID(),
		Spec:        spec,
		NodeVersion: version,
	})
	assert.NoError(t, err)

	// Get node information and check that the availability is set to drain
	r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: lastNode.SecurityConfig.ClientTLSCreds.NodeID()})
	assert.NoError(t, err)
	assert.Equal(t, r.Node.Spec.Availability, api.NodeAvailabilityDrain)

}
Esempio n. 18
0
func TestNewNodeCertificateRequiresToken(t *testing.T) {
	t.Parallel()

	tc := testutils.NewTestCA(t)
	defer tc.Stop()

	csr, _, err := ca.GenerateNewCSR()
	assert.NoError(t, err)

	// Issuance fails if no secret is provided
	role := api.NodeRoleManager
	issueRequest := &api.IssueNodeCertificateRequest{CSR: csr, Role: role}
	_, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest)
	assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster")

	role = api.NodeRoleWorker
	issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role}
	_, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest)
	assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster")

	// Issuance fails if wrong secret is provided
	role = api.NodeRoleManager
	issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: "invalid-secret"}
	_, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest)
	assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster")

	role = api.NodeRoleWorker
	issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: "invalid-secret"}
	_, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest)
	assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster")

	// Issuance succeeds if correct token is provided
	role = api.NodeRoleManager
	issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: tc.ManagerToken}
	_, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest)
	assert.NoError(t, err)

	role = api.NodeRoleWorker
	issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: tc.WorkerToken}
	_, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest)
	assert.NoError(t, err)

	// Rotate manager and worker tokens
	var (
		newManagerToken string
		newWorkerToken  string
	)
	assert.NoError(t, tc.MemoryStore.Update(func(tx store.Tx) error {
		clusters, _ := store.FindClusters(tx, store.ByName(store.DefaultClusterName))
		newWorkerToken = ca.GenerateJoinToken(&tc.RootCA)
		clusters[0].RootCA.JoinTokens.Worker = newWorkerToken
		newManagerToken = ca.GenerateJoinToken(&tc.RootCA)
		clusters[0].RootCA.JoinTokens.Manager = newManagerToken
		return store.UpdateCluster(tx, clusters[0])
	}))

	// updating the join token may take a little bit in order to register on the CA server, so poll
	assert.NoError(t, raftutils.PollFunc(nil, func() error {
		// Old token should fail
		role = api.NodeRoleManager
		issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: tc.ManagerToken}
		_, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest)
		if err == nil {
			return fmt.Errorf("join token not updated yet")
		}
		return nil
	}))

	// Old token should fail
	assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster")

	role = api.NodeRoleWorker
	issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: tc.WorkerToken}
	_, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest)
	assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster")

	// New token should succeed
	role = api.NodeRoleManager
	issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: newManagerToken}
	_, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest)
	assert.NoError(t, err)

	role = api.NodeRoleWorker
	issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: newWorkerToken}
	_, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest)
	assert.NoError(t, err)
}
Esempio n. 19
0
func TestRaftSnapshotForceNewCluster(t *testing.T) {
	t.Parallel()

	// Bring up a 3 node cluster
	nodes, clockSource := raftutils.NewRaftCluster(t, tc, &api.RaftConfig{SnapshotInterval: 10, LogEntriesForSlowFollowers: 0})
	defer raftutils.TeardownCluster(t, nodes)

	nodeIDs := []string{"id1", "id2", "id3", "id4", "id5"}

	// Propose 3 values.
	for _, nodeID := range nodeIDs[:3] {
		_, err := raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeID)
		assert.NoError(t, err, "failed to propose value")
	}

	// Remove one of the original nodes

	// Use gRPC instead of calling handler directly because of
	// authorization check.
	cc, err := dial(nodes[1], nodes[1].Address)
	assert.NoError(t, err)
	raftClient := api.NewRaftMembershipClient(cc)
	defer cc.Close()
	ctx, _ := context.WithTimeout(context.Background(), 10*time.Second)
	resp, err := raftClient.Leave(ctx, &api.LeaveRequest{Node: &api.RaftMember{RaftID: nodes[2].Config.ID}})
	assert.NoError(t, err, "error sending message to leave the raft")
	assert.NotNil(t, resp, "leave response message is nil")

	raftutils.ShutdownNode(nodes[2])
	delete(nodes, 2)

	// Nodes shouldn't have snapshot files yet
	for _, node := range nodes {
		dirents, err := ioutil.ReadDir(filepath.Join(node.StateDir, "snap-v3-encrypted"))
		assert.NoError(t, err)
		assert.Len(t, dirents, 0)
	}

	// Trigger a snapshot, with a 4th proposal
	_, err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeIDs[3])
	assert.NoError(t, err, "failed to propose value")

	// Nodes should now have a snapshot file
	for nodeIdx, node := range nodes {
		assert.NoError(t, raftutils.PollFunc(clockSource, func() error {
			dirents, err := ioutil.ReadDir(filepath.Join(node.StateDir, "snap-v3-encrypted"))
			if err != nil {
				return err
			}
			if len(dirents) != 1 {
				return fmt.Errorf("expected 1 snapshot, found %d on node %d", len(dirents), nodeIdx+1)
			}
			return nil
		}))
	}

	// Join another node
	nodes[4] = raftutils.NewJoinNode(t, clockSource, nodes[1].Address, tc)
	raftutils.WaitForCluster(t, clockSource, nodes)

	// Only restart the first node with force-new-cluster option
	nodes[1].Server.Stop()
	nodes[1].ShutdownRaft()
	nodes[1] = raftutils.RestartNode(t, clockSource, nodes[1], true)
	delete(nodes, 3)
	delete(nodes, 4)
	raftutils.WaitForCluster(t, clockSource, nodes)

	// The memberlist should contain exactly one node (self)
	memberlist := nodes[1].GetMemberlist()
	require.Len(t, memberlist, 1)

	// Propose a 5th value
	_, err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeIDs[4])
	require.NoError(t, err)
}