func TestRaftUnreachableNode(t *testing.T) { t.Parallel() nodes := make(map[uint64]*raftutils.TestNode) var clockSource *fakeclock.FakeClock nodes[1], clockSource = raftutils.NewInitNode(t, tc, nil) ctx, cancel := context.WithCancel(context.Background()) defer cancel() // Add a new node nodes[2] = raftutils.NewNode(t, clockSource, tc, raft.NodeOptions{JoinAddr: nodes[1].Address}) err := nodes[2].JoinAndStart(ctx) require.NoError(t, err, "can't join cluster") go nodes[2].Run(ctx) // Stop the Raft server of second node on purpose after joining nodes[2].Server.Stop() nodes[2].Listener.Close() raftutils.AdvanceTicks(clockSource, 5) time.Sleep(100 * time.Millisecond) wrappedListener := raftutils.RecycleWrappedListener(nodes[2].Listener) securityConfig := nodes[2].SecurityConfig serverOpts := []grpc.ServerOption{grpc.Creds(securityConfig.ServerTLSCreds)} s := grpc.NewServer(serverOpts...) nodes[2].Server = s raft.Register(s, nodes[2].Node) go func() { // After stopping, we should receive an error from Serve assert.Error(t, s.Serve(wrappedListener)) }() raftutils.WaitForCluster(t, clockSource, nodes) defer raftutils.TeardownCluster(t, nodes) // Propose a value value, err := raftutils.ProposeValue(t, nodes[1], DefaultProposalTime) assert.NoError(t, err, "failed to propose value") // All nodes should have the value in the physical store raftutils.CheckValue(t, clockSource, nodes[1], value) raftutils.CheckValue(t, clockSource, nodes[2], value) }
func TestRaftJoinWithIncorrectAddress(t *testing.T) { t.Parallel() nodes := make(map[uint64]*raftutils.TestNode) var clockSource *fakeclock.FakeClock nodes[1], clockSource = raftutils.NewInitNode(t, tc, nil) // Try joining a new node with an incorrect address n := raftutils.NewNode(t, clockSource, tc, raft.NewNodeOptions{JoinAddr: nodes[1].Address, Addr: "1.2.3.4:1234"}) err := n.JoinAndStart() assert.NotNil(t, err) assert.Equal(t, grpc.ErrorDesc(err), raft.ErrHealthCheckFailure.Error()) // Check if first node still has only itself registered in the memberlist assert.Equal(t, len(nodes[1].GetMemberlist()), 1) }
func TestRaftJoinWithIncorrectAddress(t *testing.T) { t.Parallel() nodes := make(map[uint64]*raftutils.TestNode) var clockSource *fakeclock.FakeClock nodes[1], clockSource = raftutils.NewInitNode(t, tc, nil) defer raftutils.ShutdownNode(nodes[1]) // Try joining a new node with an incorrect address n := raftutils.NewNode(t, clockSource, tc, raft.NodeOptions{JoinAddr: nodes[1].Address, Addr: "1.2.3.4:1234"}) defer raftutils.CleanupNonRunningNode(n) err := n.JoinAndStart(context.Background()) assert.NotNil(t, err) assert.Contains(t, grpc.ErrorDesc(err), "could not connect to prospective new cluster member using its advertised address") // Check if first node still has only itself registered in the memberlist assert.Len(t, nodes[1].GetMemberlist(), 1) }
func TestRaftUnreachableNode(t *testing.T) { t.Parallel() nodes := make(map[uint64]*raftutils.TestNode) var clockSource *fakeclock.FakeClock nodes[1], clockSource = raftutils.NewInitNode(t, tc, nil) ctx := context.Background() // Add a new node, but don't start its server yet n := raftutils.NewNode(t, clockSource, tc, raft.NewNodeOptions{JoinAddr: nodes[1].Address}) go n.Run(ctx) raftutils.AdvanceTicks(clockSource, 5) time.Sleep(100 * time.Millisecond) raft.Register(n.Server, n.Node) // Now start the new node's server go func() { // After stopping, we should receive an error from Serve assert.Error(t, n.Server.Serve(n.Listener)) }() nodes[2] = n raftutils.WaitForCluster(t, clockSource, nodes) defer raftutils.TeardownCluster(t, nodes) // Propose a value value, err := raftutils.ProposeValue(t, nodes[1]) assert.NoError(t, err, "failed to propose value") // All nodes should have the value in the physical store raftutils.CheckValue(t, clockSource, nodes[1], value) raftutils.CheckValue(t, clockSource, nodes[2], value) }
// This test rotates the encryption key and waits for the expected thing to happen func TestRaftEncryptionKeyRotationWait(t *testing.T) { t.Parallel() nodes := make(map[uint64]*raftutils.TestNode) var clockSource *fakeclock.FakeClock raftConfig := raft.DefaultRaftConfig() nodes[1], clockSource = raftutils.NewInitNode(t, tc, &raftConfig) defer raftutils.TeardownCluster(t, nodes) nodeIDs := []string{"id1", "id2", "id3"} values := make([]*api.Node, len(nodeIDs)) // Propose 3 values var err error for i, nodeID := range nodeIDs[:3] { values[i], err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, nodeID) require.NoError(t, err, "failed to propose value") } snapDir := filepath.Join(nodes[1].StateDir, "snap-v3-encrypted") startingKeys := nodes[1].KeyRotator.GetKeys() // rotate the encryption key nodes[1].KeyRotator.QueuePendingKey([]byte("key2")) nodes[1].KeyRotator.RotationNotify() <- struct{}{} // the rotation should trigger a snapshot, which should notify the rotator when it's done require.NoError(t, raftutils.PollFunc(clockSource, func() error { snapshots, err := storage.ListSnapshots(snapDir) if err != nil { return err } if len(snapshots) != 1 { return fmt.Errorf("expected 1 snapshot, found %d on new node", len(snapshots)) } if nodes[1].KeyRotator.NeedsRotation() { return fmt.Errorf("rotation never finished") } return nil })) raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values) // Propose a 4th value nodeIDs = append(nodeIDs, "id4") v, err := raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, "id4") require.NoError(t, err, "failed to propose value") values = append(values, v) raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values) nodes[1].Server.Stop() nodes[1].ShutdownRaft() // Try to restart node 1. Without the new unlock key, it can't actually start n, ctx := raftutils.CopyNode(t, clockSource, nodes[1], false, raftutils.NewSimpleKeyRotator(startingKeys)) require.Error(t, n.Node.JoinAndStart(ctx), "should not have been able to restart since we can't read snapshots") // with the right key, it can start, even if the right key is only the pending key newKeys := startingKeys newKeys.PendingDEK = []byte("key2") nodes[1].KeyRotator = raftutils.NewSimpleKeyRotator(newKeys) nodes[1] = raftutils.RestartNode(t, clockSource, nodes[1], false) raftutils.WaitForCluster(t, clockSource, nodes) // as soon as we joined, it should have finished rotating the key require.False(t, nodes[1].KeyRotator.NeedsRotation()) raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values) // break snapshotting, and ensure that key rotation never finishes tempSnapDir := filepath.Join(nodes[1].StateDir, "snap-backup") require.NoError(t, os.Rename(snapDir, tempSnapDir)) require.NoError(t, ioutil.WriteFile(snapDir, []byte("this is no longer a directory"), 0644)) nodes[1].KeyRotator.QueuePendingKey([]byte("key3")) nodes[1].KeyRotator.RotationNotify() <- struct{}{} time.Sleep(250 * time.Millisecond) // rotation has not been finished, because we cannot take a snapshot require.True(t, nodes[1].KeyRotator.NeedsRotation()) // Propose a 5th value, so we have WALs written with the new key nodeIDs = append(nodeIDs, "id5") v, err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, "id5") require.NoError(t, err, "failed to propose value") values = append(values, v) raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values) nodes[1].Server.Stop() nodes[1].ShutdownRaft() // restore the snapshot dir require.NoError(t, os.RemoveAll(snapDir)) require.NoError(t, os.Rename(tempSnapDir, snapDir)) // Now the wals are a mix of key2 and key3 - we can't actually start with either key singleKey := raft.EncryptionKeys{CurrentDEK: []byte("key2")} n, ctx = raftutils.CopyNode(t, clockSource, nodes[1], false, raftutils.NewSimpleKeyRotator(singleKey)) require.Error(t, n.Node.JoinAndStart(ctx), "should not have been able to restart since we can't read all the WALs, even if we can read the snapshot") singleKey = raft.EncryptionKeys{CurrentDEK: []byte("key3")} n, ctx = raftutils.CopyNode(t, clockSource, nodes[1], false, raftutils.NewSimpleKeyRotator(singleKey)) require.Error(t, n.Node.JoinAndStart(ctx), "should not have been able to restart since we can't read all the WALs, and also not the snapshot") nodes[1], ctx = raftutils.CopyNode(t, clockSource, nodes[1], false, raftutils.NewSimpleKeyRotator(raft.EncryptionKeys{ CurrentDEK: []byte("key2"), PendingDEK: []byte("key3"), })) require.NoError(t, nodes[1].Node.JoinAndStart(ctx)) // we can load, but we still need a snapshot because rotation hasn't finished snapshots, err := storage.ListSnapshots(snapDir) require.NoError(t, err) require.Len(t, snapshots, 1, "expected 1 snapshot") require.True(t, nodes[1].KeyRotator.NeedsRotation()) currSnapshot := snapshots[0] // start the node - everything should fix itself go nodes[1].Node.Run(ctx) raftutils.WaitForCluster(t, clockSource, nodes) require.NoError(t, raftutils.PollFunc(clockSource, func() error { snapshots, err := storage.ListSnapshots(snapDir) if err != nil { return err } if len(snapshots) != 1 { return fmt.Errorf("expected 1 snapshots, found %d on new node", len(snapshots)) } if snapshots[0] == currSnapshot { return fmt.Errorf("new snapshot not done yet") } if nodes[1].KeyRotator.NeedsRotation() { return fmt.Errorf("rotation never finished") } currSnapshot = snapshots[0] return nil })) raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values) // If we can't update the keys, we wait for the next snapshot to do so nodes[1].KeyRotator.SetUpdateFunc(func() error { return fmt.Errorf("nope!") }) nodes[1].KeyRotator.QueuePendingKey([]byte("key4")) nodes[1].KeyRotator.RotationNotify() <- struct{}{} require.NoError(t, raftutils.PollFunc(clockSource, func() error { snapshots, err := storage.ListSnapshots(snapDir) if err != nil { return err } if len(snapshots) != 1 { return fmt.Errorf("expected 1 snapshots, found %d on new node", len(snapshots)) } if snapshots[0] == currSnapshot { return fmt.Errorf("new snapshot not done yet") } currSnapshot = snapshots[0] return nil })) require.True(t, nodes[1].KeyRotator.NeedsRotation()) // Fix updating the key rotator, and propose a 6th value - this should trigger the key // rotation to finish nodes[1].KeyRotator.SetUpdateFunc(nil) nodeIDs = append(nodeIDs, "id6") v, err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, "id6") require.NoError(t, err, "failed to propose value") values = append(values, v) raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values) require.NoError(t, raftutils.PollFunc(clockSource, func() error { if nodes[1].KeyRotator.NeedsRotation() { return fmt.Errorf("rotation never finished") } return nil })) // no new snapshot snapshots, err = storage.ListSnapshots(snapDir) require.NoError(t, err) require.Len(t, snapshots, 1) require.Equal(t, currSnapshot, snapshots[0]) // Even if something goes wrong with getting keys, and needs rotation returns a false positive, // if there's no PendingDEK nothing happens. fakeTrue := true nodes[1].KeyRotator.SetNeedsRotation(&fakeTrue) nodes[1].KeyRotator.RotationNotify() <- struct{}{} // propose another value nodeIDs = append(nodeIDs, "id7") v, err = raftutils.ProposeValue(t, nodes[1], DefaultProposalTime, "id7") require.NoError(t, err, "failed to propose value") values = append(values, v) raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values) // no new snapshot snapshots, err = storage.ListSnapshots(snapDir) require.NoError(t, err) require.Len(t, snapshots, 1) require.Equal(t, currSnapshot, snapshots[0]) // and when we restart, we can restart with the original key (the WAL written for the new proposed value) // is written with the old key nodes[1].Server.Stop() nodes[1].ShutdownRaft() nodes[1].KeyRotator = raftutils.NewSimpleKeyRotator(raft.EncryptionKeys{ CurrentDEK: []byte("key4"), }) nodes[1] = raftutils.RestartNode(t, clockSource, nodes[1], false) raftutils.WaitForCluster(t, clockSource, nodes) raftutils.CheckValuesOnNodes(t, clockSource, nodes, nodeIDs, values) }
func TestUpdateNode(t *testing.T) { tc := cautils.NewTestCA(nil) defer tc.Stop() ts := newTestServer(t) defer ts.Stop() nodes := make(map[uint64]*raftutils.TestNode) nodes[1], _ = raftutils.NewInitNode(t, tc, nil) defer raftutils.TeardownCluster(t, nodes) nodeID := nodes[1].SecurityConfig.ClientTLSCreds.NodeID() // Assign one of the raft node to the test server ts.Server.raft = nodes[1].Node ts.Server.store = nodes[1].MemoryStore() _, err := ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: nodeID, Spec: &api.NodeSpec{ Availability: api.NodeAvailabilityDrain, }, NodeVersion: &api.Version{}, }) assert.Error(t, err) assert.Equal(t, codes.NotFound, grpc.Code(err)) // Create a node object for the manager assert.NoError(t, nodes[1].MemoryStore().Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, &api.Node{ ID: nodes[1].SecurityConfig.ClientTLSCreds.NodeID(), Spec: api.NodeSpec{ Role: api.NodeRoleManager, Membership: api.NodeMembershipAccepted, }, })) return nil })) _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{}) assert.Error(t, err) assert.Equal(t, codes.InvalidArgument, grpc.Code(err)) _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{NodeID: "invalid", Spec: &api.NodeSpec{}, NodeVersion: &api.Version{}}) assert.Error(t, err) assert.Equal(t, codes.NotFound, grpc.Code(err)) r, err := ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodeID}) assert.NoError(t, err) if !assert.NotNil(t, r) { assert.FailNow(t, "got unexpected nil response from GetNode") } assert.NotNil(t, r.Node) _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{NodeID: nodeID}) assert.Error(t, err) assert.Equal(t, codes.InvalidArgument, grpc.Code(err)) spec := r.Node.Spec.Copy() spec.Availability = api.NodeAvailabilityDrain _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: nodeID, Spec: spec, }) assert.Error(t, err) assert.Equal(t, codes.InvalidArgument, grpc.Code(err)) _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{ NodeID: nodeID, Spec: spec, NodeVersion: &r.Node.Meta.Version, }) assert.NoError(t, err) r, err = ts.Client.GetNode(context.Background(), &api.GetNodeRequest{NodeID: nodeID}) assert.NoError(t, err) if !assert.NotNil(t, r) { assert.FailNow(t, "got unexpected nil response from GetNode") } assert.NotNil(t, r.Node) assert.NotNil(t, r.Node.Spec) assert.Equal(t, api.NodeAvailabilityDrain, r.Node.Spec.Availability) version := &r.Node.Meta.Version _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{NodeID: nodeID, Spec: &r.Node.Spec, NodeVersion: version}) assert.NoError(t, err) // Perform an update with the "old" version. _, err = ts.Client.UpdateNode(context.Background(), &api.UpdateNodeRequest{NodeID: nodeID, Spec: &r.Node.Spec, NodeVersion: version}) assert.Error(t, err) }