Example #1
0
func TestRaftUnreachableNode(t *testing.T) {
	t.Parallel()

	nodes := make(map[uint64]*raftutils.TestNode)
	var clockSource *fakeclock.FakeClock
	nodes[1], clockSource = raftutils.NewInitNode(t, tc, nil)

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()
	// Add a new node
	nodes[2] = raftutils.NewNode(t, clockSource, tc, raft.NodeOptions{JoinAddr: nodes[1].Address})

	err := nodes[2].JoinAndStart(ctx)
	require.NoError(t, err, "can't join cluster")

	go nodes[2].Run(ctx)

	// Stop the Raft server of second node on purpose after joining
	nodes[2].Server.Stop()
	nodes[2].Listener.Close()

	raftutils.AdvanceTicks(clockSource, 5)
	time.Sleep(100 * time.Millisecond)

	wrappedListener := raftutils.RecycleWrappedListener(nodes[2].Listener)
	securityConfig := nodes[2].SecurityConfig
	serverOpts := []grpc.ServerOption{grpc.Creds(securityConfig.ServerTLSCreds)}
	s := grpc.NewServer(serverOpts...)

	nodes[2].Server = s
	raft.Register(s, nodes[2].Node)

	go func() {
		// After stopping, we should receive an error from Serve
		assert.Error(t, s.Serve(wrappedListener))
	}()

	raftutils.WaitForCluster(t, clockSource, nodes)
	defer raftutils.TeardownCluster(t, nodes)

	// Propose a value
	value, err := raftutils.ProposeValue(t, nodes[1], DefaultProposalTime)
	assert.NoError(t, err, "failed to propose value")

	// All nodes should have the value in the physical store
	raftutils.CheckValue(t, clockSource, nodes[1], value)
	raftutils.CheckValue(t, clockSource, nodes[2], value)
}
Example #2
0
func TestRaftJoinWithIncorrectAddress(t *testing.T) {
	t.Parallel()

	nodes := make(map[uint64]*raftutils.TestNode)
	var clockSource *fakeclock.FakeClock
	nodes[1], clockSource = raftutils.NewInitNode(t, tc, nil)

	// Try joining a new node with an incorrect address
	n := raftutils.NewNode(t, clockSource, tc, raft.NewNodeOptions{JoinAddr: nodes[1].Address, Addr: "1.2.3.4:1234"})

	err := n.JoinAndStart()
	assert.NotNil(t, err)
	assert.Equal(t, grpc.ErrorDesc(err), raft.ErrHealthCheckFailure.Error())

	// Check if first node still has only itself registered in the memberlist
	assert.Equal(t, len(nodes[1].GetMemberlist()), 1)
}
Example #3
0
func TestRaftJoinWithIncorrectAddress(t *testing.T) {
	t.Parallel()

	nodes := make(map[uint64]*raftutils.TestNode)
	var clockSource *fakeclock.FakeClock
	nodes[1], clockSource = raftutils.NewInitNode(t, tc, nil)
	defer raftutils.ShutdownNode(nodes[1])

	// Try joining a new node with an incorrect address
	n := raftutils.NewNode(t, clockSource, tc, raft.NodeOptions{JoinAddr: nodes[1].Address, Addr: "1.2.3.4:1234"})
	defer raftutils.CleanupNonRunningNode(n)

	err := n.JoinAndStart(context.Background())
	assert.NotNil(t, err)
	assert.Contains(t, grpc.ErrorDesc(err), "could not connect to prospective new cluster member using its advertised address")

	// Check if first node still has only itself registered in the memberlist
	assert.Len(t, nodes[1].GetMemberlist(), 1)
}
Example #4
0
func TestRaftUnreachableNode(t *testing.T) {
	t.Parallel()

	nodes := make(map[uint64]*raftutils.TestNode)
	var clockSource *fakeclock.FakeClock
	nodes[1], clockSource = raftutils.NewInitNode(t, tc, nil)

	ctx := context.Background()
	// Add a new node, but don't start its server yet
	n := raftutils.NewNode(t, clockSource, tc, raft.NewNodeOptions{JoinAddr: nodes[1].Address})
	go n.Run(ctx)

	raftutils.AdvanceTicks(clockSource, 5)
	time.Sleep(100 * time.Millisecond)

	raft.Register(n.Server, n.Node)

	// Now start the new node's server
	go func() {
		// After stopping, we should receive an error from Serve
		assert.Error(t, n.Server.Serve(n.Listener))
	}()

	nodes[2] = n
	raftutils.WaitForCluster(t, clockSource, nodes)

	defer raftutils.TeardownCluster(t, nodes)

	// Propose a value
	value, err := raftutils.ProposeValue(t, nodes[1])
	assert.NoError(t, err, "failed to propose value")

	// All nodes should have the value in the physical store
	raftutils.CheckValue(t, clockSource, nodes[1], value)
	raftutils.CheckValue(t, clockSource, nodes[2], value)
}
Example #5
0
// This test rotates the encryption key and waits for the expected thing to happen
func TestRaftEncryptionKeyRotationWait(t *testing.T) {
	t.Parallel()
	nodes := make(map[uint64]*raftutils.TestNode)
	var clockSource *fakeclock.FakeClock

	raftConfig := raft.DefaultRaftConfig()
	nodes[1], clockSource = raftutils.NewInitNode(t, tc, &raftConfig)
	defer raftutils.TeardownCluster(t, nodes)

	nodeIDs := []string{"id1", "id2", "id3"}
	values := make([]*api.Node, len(nodeIDs))

	// Propose 3 values
	var err error
	for i, nodeID := range nodeIDs[:3] {
		values[i], err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeID)
		require.NoError(t, err, "failed to propose value")
	}

	snapDir := filepath.Join(nodes[1].StateDir, "snap-v3-encrypted")

	startingKeys := nodes[1].KeyRotator.GetKeys()

	// rotate the encryption key
	nodes[1].KeyRotator.QueuePendingKey([]byte("key2"))
	nodes[1].KeyRotator.RotationNotify() <- struct{}{}

	// the rotation should trigger a snapshot, which should notify the rotator when it's done
	require.NoError(t, raftutils.PollFunc(clockSource, func() error {
		snapshots, err := storage.ListSnapshots(snapDir)
		if err != nil {
			return err
		}
		if len(snapshots) != 1 {
			return fmt.Errorf("expected 1 snapshot, found %d on new node", len(snapshots))
		}
		if nodes[1].KeyRotator.NeedsRotation() {
			return fmt.Errorf("rotation never finished")
		}
		return nil
	}))
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)

	// Propose a 4th value
	nodeIDs = append(nodeIDs, "id4")
	v, err := raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, "id4")
	require.NoError(t, err, "failed to propose value")
	values = append(values, v)
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)

	nodes[1].Server.Stop()
	nodes[1].ShutdownRaft()

	// Try to restart node 1. Without the new unlock key, it can't actually start
	n, ctx := raftutils.CopyNode(t, clockSource, nodes[1], false, raftutils.NewSimpleKeyRotator(startingKeys))
	require.Error(t, n.Node.JoinAndStart(ctx),
		"should not have been able to restart since we can't read snapshots")

	// with the right key, it can start, even if the right key is only the pending key
	newKeys := startingKeys
	newKeys.PendingDEK = []byte("key2")
	nodes[1].KeyRotator = raftutils.NewSimpleKeyRotator(newKeys)
	nodes[1] = raftutils.RestartNode(t, clockSource, nodes[1], false)

	raftutils.WaitForCluster(t, clockSource, nodes)

	// as soon as we joined, it should have finished rotating the key
	require.False(t, nodes[1].KeyRotator.NeedsRotation())
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)

	// break snapshotting, and ensure that key rotation never finishes
	tempSnapDir := filepath.Join(nodes[1].StateDir, "snap-backup")
	require.NoError(t, os.Rename(snapDir, tempSnapDir))
	require.NoError(t, ioutil.WriteFile(snapDir, []byte("this is no longer a directory"), 0644))

	nodes[1].KeyRotator.QueuePendingKey([]byte("key3"))
	nodes[1].KeyRotator.RotationNotify() <- struct{}{}

	time.Sleep(250 * time.Millisecond)

	// rotation has not been finished, because we cannot take a snapshot
	require.True(t, nodes[1].KeyRotator.NeedsRotation())

	// Propose a 5th value, so we have WALs written with the new key
	nodeIDs = append(nodeIDs, "id5")
	v, err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, "id5")
	require.NoError(t, err, "failed to propose value")
	values = append(values, v)
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)

	nodes[1].Server.Stop()
	nodes[1].ShutdownRaft()

	// restore the snapshot dir
	require.NoError(t, os.RemoveAll(snapDir))
	require.NoError(t, os.Rename(tempSnapDir, snapDir))

	// Now the wals are a mix of key2 and key3 - we can't actually start with either key
	singleKey := raft.EncryptionKeys{CurrentDEK: []byte("key2")}
	n, ctx = raftutils.CopyNode(t, clockSource, nodes[1], false, raftutils.NewSimpleKeyRotator(singleKey))
	require.Error(t, n.Node.JoinAndStart(ctx),
		"should not have been able to restart since we can't read all the WALs, even if we can read the snapshot")
	singleKey = raft.EncryptionKeys{CurrentDEK: []byte("key3")}
	n, ctx = raftutils.CopyNode(t, clockSource, nodes[1], false, raftutils.NewSimpleKeyRotator(singleKey))
	require.Error(t, n.Node.JoinAndStart(ctx),
		"should not have been able to restart since we can't read all the WALs, and also not the snapshot")

	nodes[1], ctx = raftutils.CopyNode(t, clockSource, nodes[1], false,
		raftutils.NewSimpleKeyRotator(raft.EncryptionKeys{
			CurrentDEK: []byte("key2"),
			PendingDEK: []byte("key3"),
		}))
	require.NoError(t, nodes[1].Node.JoinAndStart(ctx))

	// we can load, but we still need a snapshot because rotation hasn't finished
	snapshots, err := storage.ListSnapshots(snapDir)
	require.NoError(t, err)
	require.Len(t, snapshots, 1, "expected 1 snapshot")
	require.True(t, nodes[1].KeyRotator.NeedsRotation())
	currSnapshot := snapshots[0]

	// start the node - everything should fix itself
	go nodes[1].Node.Run(ctx)
	raftutils.WaitForCluster(t, clockSource, nodes)

	require.NoError(t, raftutils.PollFunc(clockSource, func() error {
		snapshots, err := storage.ListSnapshots(snapDir)
		if err != nil {
			return err
		}
		if len(snapshots) != 1 {
			return fmt.Errorf("expected 1 snapshots, found %d on new node", len(snapshots))
		}
		if snapshots[0] == currSnapshot {
			return fmt.Errorf("new snapshot not done yet")
		}
		if nodes[1].KeyRotator.NeedsRotation() {
			return fmt.Errorf("rotation never finished")
		}
		currSnapshot = snapshots[0]
		return nil
	}))
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)

	// If we can't update the keys, we wait for the next snapshot to do so
	nodes[1].KeyRotator.SetUpdateFunc(func() error { return fmt.Errorf("nope!") })
	nodes[1].KeyRotator.QueuePendingKey([]byte("key4"))
	nodes[1].KeyRotator.RotationNotify() <- struct{}{}

	require.NoError(t, raftutils.PollFunc(clockSource, func() error {
		snapshots, err := storage.ListSnapshots(snapDir)
		if err != nil {
			return err
		}
		if len(snapshots) != 1 {
			return fmt.Errorf("expected 1 snapshots, found %d on new node", len(snapshots))
		}
		if snapshots[0] == currSnapshot {
			return fmt.Errorf("new snapshot not done yet")
		}
		currSnapshot = snapshots[0]
		return nil
	}))
	require.True(t, nodes[1].KeyRotator.NeedsRotation())

	// Fix updating the key rotator, and propose a 6th value - this should trigger the key
	// rotation to finish
	nodes[1].KeyRotator.SetUpdateFunc(nil)
	nodeIDs = append(nodeIDs, "id6")
	v, err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, "id6")
	require.NoError(t, err, "failed to propose value")
	values = append(values, v)
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)

	require.NoError(t, raftutils.PollFunc(clockSource, func() error {
		if nodes[1].KeyRotator.NeedsRotation() {
			return fmt.Errorf("rotation never finished")
		}
		return nil
	}))

	// no new snapshot
	snapshots, err = storage.ListSnapshots(snapDir)
	require.NoError(t, err)
	require.Len(t, snapshots, 1)
	require.Equal(t, currSnapshot, snapshots[0])

	// Even if something goes wrong with getting keys, and needs rotation returns a false positive,
	// if there's no PendingDEK nothing happens.

	fakeTrue := true
	nodes[1].KeyRotator.SetNeedsRotation(&fakeTrue)
	nodes[1].KeyRotator.RotationNotify() <- struct{}{}

	// propose another value
	nodeIDs = append(nodeIDs, "id7")
	v, err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, "id7")
	require.NoError(t, err, "failed to propose value")
	values = append(values, v)
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)

	// no new snapshot
	snapshots, err = storage.ListSnapshots(snapDir)
	require.NoError(t, err)
	require.Len(t, snapshots, 1)
	require.Equal(t, currSnapshot, snapshots[0])

	// and when we restart, we can restart with the original key (the WAL written for the new proposed value)
	// is written with the old key
	nodes[1].Server.Stop()
	nodes[1].ShutdownRaft()

	nodes[1].KeyRotator = raftutils.NewSimpleKeyRotator(raft.EncryptionKeys{
		CurrentDEK: []byte("key4"),
	})
	nodes[1] = raftutils.RestartNode(t, clockSource, nodes[1], false)
	raftutils.WaitForCluster(t, clockSource, nodes)
	raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values)
}
Example #6
0
func TestUpdateNode(t *testing.T) {
	tc := cautils.NewTestCA(nil)
	defer tc.Stop()
	ts := newTestServer(t)
	defer ts.Stop()

	nodes := make(map[uint64]*raftutils.TestNode)
	nodes[1], _ = raftutils.NewInitNode(t, tc, nil)
	defer raftutils.TeardownCluster(t, nodes)

	nodeID := nodes[1].SecurityConfig.ClientTLSCreds.NodeID()

	// Assign one of the raft node to the test server
	ts.Server.raft = nodes[1].Node
	ts.Server.store = nodes[1].MemoryStore()

	_, err := ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{
		NodeID: nodeID,
		Spec: &api.NodeSpec{
			Availability: api.NodeAvailabilityDrain,
		},
		NodeVersion: &api.Version{},
	})
	assert.Error(t, err)
	assert.Equal(t, codes.NotFound, grpc.Code(err))

	// Create a node object for the manager
	assert.NoError(t, nodes[1].MemoryStore().Update(func(tx store.Tx) error {
		assert.NoError(t, store.CreateNode(tx, &api.Node{
			ID: nodes[1].SecurityConfig.ClientTLSCreds.NodeID(),
			Spec: api.NodeSpec{
				Role:       api.NodeRoleManager,
				Membership: api.NodeMembershipAccepted,
			},
		}))
		return nil
	}))

	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{})
	assert.Error(t, err)
	assert.Equal(t, codes.InvalidArgument, grpc.Code(err))

	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{NodeID: "invalid", Spec: &api.NodeSpec{}, NodeVersion: &api.Version{}})
	assert.Error(t, err)
	assert.Equal(t, codes.NotFound, grpc.Code(err))

	r, err := ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodeID})
	assert.NoError(t, err)
	if !assert.NotNil(t, r) {
		assert.FailNow(t, "got unexpected nil response from GetNode")
	}
	assert.NotNil(t, r.Node)

	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{NodeID: nodeID})
	assert.Error(t, err)
	assert.Equal(t, codes.InvalidArgument, grpc.Code(err))

	spec := r.Node.Spec.Copy()
	spec.Availability = api.NodeAvailabilityDrain
	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{
		NodeID: nodeID,
		Spec:   spec,
	})
	assert.Error(t, err)
	assert.Equal(t, codes.InvalidArgument, grpc.Code(err))

	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{
		NodeID:      nodeID,
		Spec:        spec,
		NodeVersion: &r.Node.Meta.Version,
	})
	assert.NoError(t, err)

	r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodeID})
	assert.NoError(t, err)
	if !assert.NotNil(t, r) {
		assert.FailNow(t, "got unexpected nil response from GetNode")
	}
	assert.NotNil(t, r.Node)
	assert.NotNil(t, r.Node.Spec)
	assert.Equal(t, api.NodeAvailabilityDrain, r.Node.Spec.Availability)

	version := &r.Node.Meta.Version
	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{NodeID: nodeID, Spec: &r.Node.Spec, NodeVersion: version})
	assert.NoError(t, err)

	// Perform an update with the "old" version.
	_, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{NodeID: nodeID, Spec: &r.Node.Spec, NodeVersion: version})
	assert.Error(t, err)
}