// TestNodeJoin verifies a new node is able to join a bootstrapped // cluster consisting of one node. func TestNodeJoin(t *testing.T) { defer leaktest.AfterTest(t) engineStopper := stop.NewStopper() defer engineStopper.Stop() e := engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper) stopper := stop.NewStopper() _, err := BootstrapCluster("cluster-1", []engine.Engine{e}, stopper) if err != nil { t.Fatal(err) } stopper.Stop() // Set an aggressive gossip interval to make sure information is exchanged tout de suite. testContext.GossipInterval = gossip.TestInterval // Start the bootstrap node. engines1 := []engine.Engine{e} addr1 := util.CreateTestAddr("tcp") server1, node1, stopper1 := createAndStartTestNode(addr1, engines1, addr1, t) defer stopper1.Stop() // Create a new node. engines2 := []engine.Engine{engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper)} server2, node2, stopper2 := createAndStartTestNode(util.CreateTestAddr("tcp"), engines2, server1.Addr(), t) defer stopper2.Stop() // Verify new node is able to bootstrap its store. if err := util.IsTrueWithin(func() bool { return node2.lSender.GetStoreCount() == 1 }, 50*time.Millisecond); err != nil { t.Fatal(err) } // Verify node1 sees node2 via gossip and vice versa. node1Key := gossip.MakeNodeIDKey(node1.Descriptor.NodeID) node2Key := gossip.MakeNodeIDKey(node2.Descriptor.NodeID) if err := util.IsTrueWithin(func() bool { nodeDesc1 := &roachpb.NodeDescriptor{} if err := node1.ctx.Gossip.GetInfoProto(node2Key, nodeDesc1); err != nil { return false } if addr2 := nodeDesc1.Address.AddressField; addr2 != server2.Addr().String() { t.Errorf("addr2 gossip %s doesn't match addr2 address %s", addr2, server2.Addr().String()) } nodeDesc2 := &roachpb.NodeDescriptor{} if err := node2.ctx.Gossip.GetInfoProto(node1Key, nodeDesc2); err != nil { return false } if addr1 := nodeDesc2.Address.AddressField; addr1 != server1.Addr().String() { t.Errorf("addr1 gossip %s doesn't match addr1 address %s", addr1, server1.Addr().String()) } return true }, 50*time.Millisecond); err != nil { t.Error(err) } }
// TestNodeJoin verifies a new node is able to join a bootstrapped // cluster consisting of one node. func TestNodeJoin(t *testing.T) { defer leaktest.AfterTest(t)() engineStopper := stop.NewStopper() defer engineStopper.Stop() e := engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper) if _, err := bootstrapCluster([]engine.Engine{e}, kv.NewTxnMetrics(metric.NewRegistry())); err != nil { t.Fatal(err) } // Start the bootstrap node. engines1 := []engine.Engine{e} addr1 := util.CreateTestAddr("tcp") _, server1Addr, node1, stopper1 := createAndStartTestNode(addr1, engines1, addr1, t) defer stopper1.Stop() // Create a new node. engines2 := []engine.Engine{engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper)} addr2 := util.CreateTestAddr("tcp") _, server2Addr, node2, stopper2 := createAndStartTestNode(addr2, engines2, server1Addr, t) defer stopper2.Stop() // Verify new node is able to bootstrap its store. util.SucceedsSoon(t, func() error { if sc := node2.stores.GetStoreCount(); sc != 1 { return util.Errorf("GetStoreCount() expected 1; got %d", sc) } return nil }) // Verify node1 sees node2 via gossip and vice versa. node1Key := gossip.MakeNodeIDKey(node1.Descriptor.NodeID) node2Key := gossip.MakeNodeIDKey(node2.Descriptor.NodeID) util.SucceedsSoon(t, func() error { var nodeDesc1 roachpb.NodeDescriptor if err := node1.ctx.Gossip.GetInfoProto(node2Key, &nodeDesc1); err != nil { return err } if addr2Str, server2AddrStr := nodeDesc1.Address.String(), server2Addr.String(); addr2Str != server2AddrStr { return util.Errorf("addr2 gossip %s doesn't match addr2 address %s", addr2Str, server2AddrStr) } var nodeDesc2 roachpb.NodeDescriptor if err := node2.ctx.Gossip.GetInfoProto(node1Key, &nodeDesc2); err != nil { return err } if addr1Str, server1AddrStr := nodeDesc2.Address.String(), server1Addr.String(); addr1Str != server1AddrStr { return util.Errorf("addr1 gossip %s doesn't match addr1 address %s", addr1Str, server1AddrStr) } return nil }) }
// createTestStoreWithEngine creates a test store using the given engine and clock. // The caller is responsible for closing the store on exit. func createTestStoreWithEngine(t *testing.T, eng engine.Engine, clock *hlc.Clock, bootstrap bool, sCtx *storage.StoreContext) (*storage.Store, *stop.Stopper) { stopper := stop.NewStopper() rpcContext := rpc.NewContext(&base.Context{}, clock, stopper) if sCtx == nil { // make a copy ctx := storage.TestStoreContext sCtx = &ctx } nodeDesc := &proto.NodeDescriptor{NodeID: 1} sCtx.Gossip = gossip.New(rpcContext, gossip.TestInterval, gossip.TestBootstrap) localSender := kv.NewLocalSender() rpcSend := func(_ rpc.Options, _ string, _ []net.Addr, getArgs func(addr net.Addr) gogoproto.Message, getReply func() gogoproto.Message, _ *rpc.Context) ([]gogoproto.Message, error) { call := proto.Call{ Args: getArgs(nil /* net.Addr */).(proto.Request), Reply: getReply().(proto.Response), } localSender.Send(context.Background(), call) return []gogoproto.Message{call.Reply}, call.Reply.Header().GoError() } // Mostly makes sure that we don't see a warning per request. { if err := sCtx.Gossip.AddInfoProto(gossip.MakeNodeIDKey(nodeDesc.NodeID), nodeDesc, time.Hour); err != nil { t.Fatal(err) } if err := sCtx.Gossip.SetNodeDescriptor(nodeDesc); err != nil { t.Fatal(err) } } distSender := kv.NewDistSender(&kv.DistSenderContext{ Clock: clock, RPCSend: rpcSend, // defined above RangeDescriptorDB: localSender, // for descriptor lookup }, sCtx.Gossip) sender := kv.NewTxnCoordSender(distSender, clock, false, nil, stopper) sCtx.Clock = clock sCtx.DB = client.NewDB(sender) sCtx.Transport = multiraft.NewLocalRPCTransport(stopper) // TODO(bdarnell): arrange to have the transport closed. store := storage.NewStore(*sCtx, eng, nodeDesc) if bootstrap { if err := store.Bootstrap(proto.StoreIdent{NodeID: 1, StoreID: 1}, stopper); err != nil { t.Fatal(err) } } localSender.AddStore(store) if bootstrap { if err := store.BootstrapRange(sql.GetInitialSystemValues()); err != nil { t.Fatal(err) } } if err := store.Start(stopper); err != nil { t.Fatal(err) } return store, stopper }
func makeTestGossip(t *testing.T) (*gossip.Gossip, func()) { n := simulation.NewNetwork(1, "tcp", gossip.TestInterval) g := n.Nodes[0].Gossip permConfig := &config.PermConfig{ Read: []string{""}, Write: []string{""}, } configMap, err := config.NewPrefixConfigMap([]*config.PrefixConfig{ {proto.KeyMin, nil, permConfig}, }) if err != nil { t.Fatalf("failed to make prefix config map, err: %s", err.Error()) } if err := g.AddInfo(gossip.KeySentinel, "cluster1", time.Hour); err != nil { t.Fatal(err) } if err := g.AddInfo(gossip.KeyConfigPermission, configMap, time.Hour); err != nil { t.Fatal(err) } if err := g.AddInfo(gossip.KeyFirstRangeDescriptor, testRangeDescriptor, time.Hour); err != nil { t.Fatal(err) } nodeIDKey := gossip.MakeNodeIDKey(1) if err := g.AddInfo(nodeIDKey, &proto.NodeDescriptor{ NodeID: 1, Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()), Attrs: proto.Attributes{Attrs: []string{"attr1", "attr2"}}, }, time.Hour); err != nil { t.Fatal(err) } return g, n.Stop }
// TestSendRPCRetry verifies that sendRPC failed on first address but succeed on // second address, the second reply should be successfully returned back. func TestSendRPCRetry(t *testing.T) { defer leaktest.AfterTest(t) g, s := makeTestGossip(t) defer s() if err := g.SetNodeDescriptor(&proto.NodeDescriptor{NodeID: 1}); err != nil { t.Fatal(err) } // Fill RangeDescriptor with 2 replicas var descriptor = proto.RangeDescriptor{ RaftID: 1, StartKey: proto.Key("a"), EndKey: proto.Key("z"), } for i := 1; i <= 2; i++ { addr := util.MakeUnresolvedAddr("tcp", fmt.Sprintf("node%d", i)) nd := &proto.NodeDescriptor{ NodeID: proto.NodeID(i), Address: proto.Addr{ Network: addr.Network(), Address: addr.String(), }, } if err := g.AddInfo(gossip.MakeNodeIDKey(proto.NodeID(i)), nd, time.Hour); err != nil { t.Fatal(err) } descriptor.Replicas = append(descriptor.Replicas, proto.Replica{ NodeID: proto.NodeID(i), StoreID: proto.StoreID(i), }) } // Define our rpcSend stub which returns success on the second address. var testFn rpcSendFn = func(_ rpc.Options, method string, addrs []net.Addr, getArgs func(addr net.Addr) interface{}, getReply func() interface{}, _ *rpc.Context) ([]interface{}, error) { if method == "Node.Scan" { // reply from first address failed _ = getReply() // reply from second address succeed reply := getReply() reply.(*proto.ScanResponse).Rows = append([]proto.KeyValue{}, proto.KeyValue{Key: proto.Key("b"), Value: proto.Value{}}) return []interface{}{reply}, nil } return nil, util.Errorf("Not expected method %v", method) } ctx := &DistSenderContext{ rpcSend: testFn, rangeDescriptorDB: mockRangeDescriptorDB(func(_ proto.Key, _ lookupOptions) ([]proto.RangeDescriptor, error) { return []proto.RangeDescriptor{descriptor}, nil }), } ds := NewDistSender(ctx, g) call := proto.ScanCall(proto.Key("a"), proto.Key("d"), 1) sr := call.Reply.(*proto.ScanResponse) ds.Send(context.Background(), call) if err := sr.GoError(); err != nil { t.Fatal(err) } if l := len(sr.Rows); l != 1 { t.Fatalf("expected 1 row; got %d", l) } }
// TestSendRPCRetry verifies that sendRPC failed on first address but succeed on // second address, the second reply should be successfully returned back. func TestSendRPCRetry(t *testing.T) { defer leaktest.AfterTest(t) g, s := makeTestGossip(t) defer s() g.SetNodeID(1) if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{NodeID: 1}); err != nil { t.Fatal(err) } // Fill RangeDescriptor with 2 replicas var descriptor = roachpb.RangeDescriptor{ RangeID: 1, StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("z"), } for i := 1; i <= 2; i++ { addr := util.MakeUnresolvedAddr("tcp", fmt.Sprintf("node%d", i)) nd := &roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(i), Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), } if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(i)), nd, time.Hour); err != nil { t.Fatal(err) } descriptor.Replicas = append(descriptor.Replicas, roachpb.ReplicaDescriptor{ NodeID: roachpb.NodeID(i), StoreID: roachpb.StoreID(i), }) } // Define our rpcSend stub which returns success on the second address. var testFn rpcSendFn = func(_ rpc.Options, method string, addrs []net.Addr, getArgs func(addr net.Addr) proto.Message, getReply func() proto.Message, _ *rpc.Context) ([]proto.Message, error) { if method == "Node.Batch" { // reply from first address failed _ = getReply() // reply from second address succeed batchReply := getReply().(*roachpb.BatchResponse) reply := &roachpb.ScanResponse{} batchReply.Add(reply) reply.Rows = append([]roachpb.KeyValue{}, roachpb.KeyValue{Key: roachpb.Key("b"), Value: roachpb.Value{}}) return []proto.Message{batchReply}, nil } return nil, util.Errorf("unexpected method %v", method) } ctx := &DistSenderContext{ RPCSend: testFn, RangeDescriptorDB: mockRangeDescriptorDB(func(_ roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { return []roachpb.RangeDescriptor{descriptor}, nil }), } ds := NewDistSender(ctx, g) scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("d"), 1) sr, err := client.SendWrapped(ds, nil, scan) if err != nil { t.Fatal(err) } if l := len(sr.(*roachpb.ScanResponse).Rows); l != 1 { t.Fatalf("expected 1 row; got %d", l) } }
// GossipNode gossips the node's address, which is necessary before // any messages can be sent to it. Normally done automatically by // AddNode. func (rttc *raftTransportTestContext) GossipNode(nodeID roachpb.NodeID, addr net.Addr) { if err := rttc.gossip.AddInfoProto(gossip.MakeNodeIDKey(nodeID), &roachpb.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { rttc.t.Fatal(err) } }
// isNetworkConnected returns true if the network is fully connected // with no partitions (i.e. every node knows every other node's // network address). func (n *Network) isNetworkConnected() bool { for _, leftNode := range n.Nodes { for _, rightNode := range n.Nodes { if _, err := leftNode.Gossip.GetInfo(gossip.MakeNodeIDKey(rightNode.Gossip.GetNodeID())); err != nil { return false } } } return true }
// TestSendRPCRetry verifies that sendRPC failed on first address but succeed on // second address, the second reply should be successfully returned back. func TestSendRPCRetry(t *testing.T) { defer leaktest.AfterTest(t) g, s := makeTestGossip(t) defer s() g.SetNodeID(1) if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{NodeID: 1}); err != nil { t.Fatal(err) } // Fill RangeDescriptor with 2 replicas var descriptor = roachpb.RangeDescriptor{ RangeID: 1, StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("z"), } for i := 1; i <= 2; i++ { addr := util.MakeUnresolvedAddr("tcp", fmt.Sprintf("node%d", i)) nd := &roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(i), Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), } if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(i)), nd, time.Hour); err != nil { t.Fatal(err) } descriptor.Replicas = append(descriptor.Replicas, roachpb.ReplicaDescriptor{ NodeID: roachpb.NodeID(i), StoreID: roachpb.StoreID(i), }) } var testFn rpcSendFn = func(_ SendOptions, _ ReplicaSlice, args roachpb.BatchRequest, _ *rpc.Context) (proto.Message, error) { batchReply := &roachpb.BatchResponse{} reply := &roachpb.ScanResponse{} batchReply.Add(reply) reply.Rows = append([]roachpb.KeyValue{}, roachpb.KeyValue{Key: roachpb.Key("b"), Value: roachpb.Value{}}) return batchReply, nil } ctx := &DistSenderContext{ RPCSend: testFn, RangeDescriptorDB: mockRangeDescriptorDB(func(_ roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { return []roachpb.RangeDescriptor{descriptor}, nil }), } ds := NewDistSender(ctx, g) scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("d"), 1) sr, err := client.SendWrapped(ds, nil, scan) if err != nil { t.Fatal(err) } if l := len(sr.(*roachpb.ScanResponse).Rows); l != 1 { t.Fatalf("expected 1 row; got %d", l) } }
func TestOwnNodeCertain(t *testing.T) { defer leaktest.AfterTest(t)() g, s := makeTestGossip(t) defer s() const expNodeID = 42 nd := &roachpb.NodeDescriptor{ NodeID: expNodeID, Address: util.MakeUnresolvedAddr("tcp", "foobar:1234"), } g.ResetNodeID(nd.NodeID) if err := g.SetNodeDescriptor(nd); err != nil { t.Fatal(err) } if err := g.AddInfoProto(gossip.MakeNodeIDKey(expNodeID), nd, time.Hour); err != nil { t.Fatal(err) } act := make(map[roachpb.NodeID]roachpb.Timestamp) var testFn rpcSendFn = func(_ SendOptions, _ ReplicaSlice, ba roachpb.BatchRequest, _ *rpc.Context) (*roachpb.BatchResponse, error) { for k, v := range ba.Txn.ObservedTimestamps { act[k] = v } return ba.CreateReply(), nil } ctx := &DistSenderContext{ RPCSend: testFn, RangeDescriptorDB: mockRangeDescriptorDB(func(_ roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { return []roachpb.RangeDescriptor{testRangeDescriptor}, nil }), } expTS := roachpb.ZeroTimestamp.Add(1, 2) ds := NewDistSender(ctx, g) v := roachpb.MakeValueFromString("value") put := roachpb.NewPut(roachpb.Key("a"), v) if _, err := client.SendWrappedWith(ds, nil, roachpb.Header{ // MaxTimestamp is set very high so that all uncertainty updates have // effect. Txn: &roachpb.Transaction{OrigTimestamp: expTS, MaxTimestamp: roachpb.MaxTimestamp}, }, put); err != nil { t.Fatalf("put encountered error: %s", err) } exp := map[roachpb.NodeID]roachpb.Timestamp{ expNodeID: expTS, } if !reflect.DeepEqual(exp, act) { t.Fatalf("wanted %v, got %v", exp, act) } }
// getNodeDescriptor returns ds.nodeDescriptor, but makes an attempt to load // it from the Gossip network if a nil value is found. // We must jump through hoops here to get the node descriptor because it's not available // until after the node has joined the gossip network and been allowed to initialize // its stores. func (ds *DistSender) getNodeDescriptor() *proto.NodeDescriptor { if desc := atomic.LoadPointer(&ds.nodeDescriptor); desc != nil { return (*proto.NodeDescriptor)(desc) } ownNodeID := ds.gossip.GetNodeID() if ownNodeID > 0 { nodeDesc := &proto.NodeDescriptor{} if err := ds.gossip.GetInfoProto(gossip.MakeNodeIDKey(ownNodeID), nodeDesc); err == nil { atomic.StorePointer(&ds.nodeDescriptor, unsafe.Pointer(nodeDesc)) return nodeDesc } } log.Infof("unable to determine this node's attributes for replica " + "selection; node is most likely bootstrapping") return nil }
func TestOwnNodeCertain(t *testing.T) { defer leaktest.AfterTest(t) g, s := makeTestGossip(t) defer s() const expNodeID = 42 nd := &roachpb.NodeDescriptor{ NodeID: expNodeID, Address: util.MakeUnresolvedAddr("tcp", "foobar:1234"), } g.SetNodeID(nd.NodeID) if err := g.SetNodeDescriptor(nd); err != nil { t.Fatal(err) } if err := g.AddInfoProto(gossip.MakeNodeIDKey(expNodeID), nd, time.Hour); err != nil { t.Fatal(err) } var act roachpb.NodeList var testFn rpcSendFn = func(_ rpc.Options, _ string, _ []net.Addr, getArgs func(addr net.Addr) proto.Message, _ func() proto.Message, _ *rpc.Context) ([]proto.Message, error) { ba := getArgs(nil).(*roachpb.BatchRequest) for _, nodeID := range ba.Txn.CertainNodes.Nodes { act.Add(roachpb.NodeID(nodeID)) } return []proto.Message{ba.CreateReply()}, nil } ctx := &DistSenderContext{ RPCSend: testFn, RangeDescriptorDB: mockRangeDescriptorDB(func(_ roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { return []roachpb.RangeDescriptor{testRangeDescriptor}, nil }), } ds := NewDistSender(ctx, g) v := roachpb.MakeValueFromString("value") put := roachpb.NewPut(roachpb.Key("a"), v) if _, err := client.SendWrappedWith(ds, nil, roachpb.Header{ Txn: &roachpb.Transaction{}, }, put); err != nil { t.Fatalf("put encountered error: %s", err) } if expNodes := []roachpb.NodeID{expNodeID}; !reflect.DeepEqual(act.Nodes, expNodes) { t.Fatalf("got %v, expected %v", act.Nodes, expNodes) } }
func makeTestGossip(t *testing.T) (*gossip.Gossip, func()) { n := simulation.NewNetwork(1) g := n.Nodes[0].Gossip if err := g.AddInfo(gossip.KeySentinel, nil, time.Hour); err != nil { t.Fatal(err) } if err := g.AddInfoProto(gossip.KeyFirstRangeDescriptor, &testRangeDescriptor, time.Hour); err != nil { t.Fatal(err) } nodeIDKey := gossip.MakeNodeIDKey(1) if err := g.AddInfoProto(nodeIDKey, &roachpb.NodeDescriptor{ NodeID: 1, Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()), Attrs: roachpb.Attributes{Attrs: []string{"attr1", "attr2"}}, }, time.Hour); err != nil { t.Fatal(err) } return g, n.Stop }
// getNodeDescriptor returns ds.nodeDescriptor, but makes an attempt to load // it from the Gossip network if a nil value is found. // We must jump through hoops here to get the node descriptor because it's not available // until after the node has joined the gossip network and been allowed to initialize // its stores. func (ds *DistSender) getNodeDescriptor() *roachpb.NodeDescriptor { if desc := atomic.LoadPointer(&ds.nodeDescriptor); desc != nil { return (*roachpb.NodeDescriptor)(desc) } if ds.gossip == nil { return nil } ownNodeID := ds.gossip.GetNodeID() if ownNodeID > 0 { // TODO(tschottdorf): Consider instead adding the NodeID of the // coordinator to the header, so we can get this from incoming // requests. Just in case we want to mostly eliminate gossip here. nodeDesc := &roachpb.NodeDescriptor{} if err := ds.gossip.GetInfoProto(gossip.MakeNodeIDKey(ownNodeID), nodeDesc); err == nil { atomic.StorePointer(&ds.nodeDescriptor, unsafe.Pointer(nodeDesc)) return nodeDesc } } log.Infof("unable to determine this node's attributes for replica " + "selection; node is most likely bootstrapping") return nil }
// TestSendRPCOrder verifies that sendRPC correctly takes into account the // leader, attributes and required consistency to determine where to send // remote requests. func TestSendRPCOrder(t *testing.T) { defer leaktest.AfterTest(t) g, s := makeTestGossip(t) defer s() rangeID := roachpb.RangeID(99) nodeAttrs := map[int32][]string{ 1: {}, // The local node, set in each test case. 2: {"us", "west", "gpu"}, 3: {"eu", "dublin", "pdu2", "gpu"}, 4: {"us", "east", "gpu"}, 5: {"us", "east", "gpu", "flaky"}, } // Gets filled below to identify the replica by its address. addrToNode := make(map[string]int32) makeVerifier := func(expOrder rpc.OrderingPolicy, expAddrs []int32) func(rpc.Options, []net.Addr) error { return func(o rpc.Options, addrs []net.Addr) error { if o.Ordering != expOrder { return util.Errorf("unexpected ordering, wanted %v, got %v", expOrder, o.Ordering) } var actualAddrs []int32 for i, a := range addrs { if len(expAddrs) <= i { return util.Errorf("got unexpected address: %s", a) } if expAddrs[i] == 0 { actualAddrs = append(actualAddrs, 0) } else { actualAddrs = append(actualAddrs, addrToNode[a.String()]) } } if !reflect.DeepEqual(expAddrs, actualAddrs) { return util.Errorf("expected %d, but found %d", expAddrs, actualAddrs) } return nil } } testCases := []struct { args roachpb.Request attrs []string order rpc.OrderingPolicy expReplica []int32 leader int32 // 0 for not caching a leader. // Naming is somewhat off, as eventually consistent reads usually // do not have to go to the leader when a node has a read lease. // Would really want CONSENSUS here, but that is not implemented. // Likely a test setup here will never have a read lease, but good // to keep in mind. consistent bool }{ // Inconsistent Scan without matching attributes. { args: &roachpb.ScanRequest{}, attrs: []string{}, order: rpc.OrderRandom, expReplica: []int32{1, 2, 3, 4, 5}, }, // Inconsistent Scan with matching attributes. // Should move the two nodes matching the attributes to the front and // go stable. { args: &roachpb.ScanRequest{}, attrs: nodeAttrs[5], order: rpc.OrderStable, // Compare only the first two resulting addresses. expReplica: []int32{5, 4, 0, 0, 0}, }, // Scan without matching attributes that requires but does not find // a leader. { args: &roachpb.ScanRequest{}, attrs: []string{}, order: rpc.OrderRandom, expReplica: []int32{1, 2, 3, 4, 5}, consistent: true, }, // Put without matching attributes that requires but does not find leader. // Should go random and not change anything. { args: &roachpb.PutRequest{}, attrs: []string{"nomatch"}, order: rpc.OrderRandom, expReplica: []int32{1, 2, 3, 4, 5}, }, // Put with matching attributes but no leader. // Should move the two nodes matching the attributes to the front and // go stable. { args: &roachpb.PutRequest{}, attrs: append(nodeAttrs[5], "irrelevant"), // Compare only the first two resulting addresses. order: rpc.OrderStable, expReplica: []int32{5, 4, 0, 0, 0}, }, // Put with matching attributes that finds the leader (node 3). // Should address the leader and the two nodes matching the attributes // (the last and second to last) in that order. { args: &roachpb.PutRequest{}, attrs: append(nodeAttrs[5], "irrelevant"), // Compare only the first resulting addresses as we have a leader // and that means we're only trying to send there. order: rpc.OrderStable, expReplica: []int32{2, 5, 4, 0, 0}, leader: 2, }, // Inconsistent Get without matching attributes but leader (node 3). Should just // go random as the leader does not matter. { args: &roachpb.GetRequest{}, attrs: []string{}, order: rpc.OrderRandom, expReplica: []int32{1, 2, 3, 4, 5}, leader: 2, }, } descriptor := roachpb.RangeDescriptor{ StartKey: roachpb.RKeyMin, EndKey: roachpb.RKeyMax, RangeID: rangeID, Replicas: nil, } // Stub to be changed in each test case. var verifyCall func(rpc.Options, []net.Addr) error var testFn rpcSendFn = func(opts rpc.Options, method string, addrs []net.Addr, getArgs func(addr net.Addr) proto.Message, getReply func() proto.Message, _ *rpc.Context) ([]proto.Message, error) { if err := verifyCall(opts, addrs); err != nil { return nil, err } return []proto.Message{getArgs(addrs[0]).(*roachpb.BatchRequest).CreateReply()}, nil } ctx := &DistSenderContext{ RPCSend: testFn, RangeDescriptorDB: mockRangeDescriptorDB(func(roachpb.RKey, bool, bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { return []roachpb.RangeDescriptor{descriptor}, nil }), } ds := NewDistSender(ctx, g) for n, tc := range testCases { verifyCall = makeVerifier(tc.order, tc.expReplica) descriptor.Replicas = nil // could do this once above, but more convenient here for i := int32(1); i <= 5; i++ { addr := util.MakeUnresolvedAddr("tcp", fmt.Sprintf("node%d", i)) addrToNode[addr.String()] = i nd := &roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(i), Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), Attrs: roachpb.Attributes{ Attrs: nodeAttrs[i], }, } if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(i)), nd, time.Hour); err != nil { t.Fatal(err) } descriptor.Replicas = append(descriptor.Replicas, roachpb.ReplicaDescriptor{ NodeID: roachpb.NodeID(i), StoreID: roachpb.StoreID(i), }) } { // The local node needs to get its attributes during sendRPC. nd := &roachpb.NodeDescriptor{ NodeID: 6, Attrs: roachpb.Attributes{ Attrs: tc.attrs, }, } g.SetNodeID(nd.NodeID) if err := g.SetNodeDescriptor(nd); err != nil { t.Fatal(err) } } ds.leaderCache.Update(roachpb.RangeID(rangeID), roachpb.ReplicaDescriptor{}) if tc.leader > 0 { ds.leaderCache.Update(roachpb.RangeID(rangeID), descriptor.Replicas[tc.leader-1]) } args := tc.args args.Header().Key = roachpb.Key("a") if roachpb.IsRange(args) { args.Header().EndKey = roachpb.Key("b") } consistency := roachpb.CONSISTENT if !tc.consistent { consistency = roachpb.INCONSISTENT } // Kill the cached NodeDescriptor, enforcing a lookup from Gossip. ds.nodeDescriptor = nil if _, err := client.SendWrappedWith(ds, nil, roachpb.Header{ RangeID: rangeID, // Not used in this test, but why not. ReadConsistency: consistency, }, args); err != nil { t.Errorf("%d: %s", n, err) } } }
// TestMultiRangeSplitEndTransaction verifies that when a chunk of batch looks // like it's going to be dispatched to more than one range, it will be split // up if it it contains EndTransaction. func TestMultiRangeSplitEndTransaction(t *testing.T) { defer leaktest.AfterTest(t) g, s := makeTestGossip(t) defer s() testCases := []struct { put1, put2, et roachpb.Key exp [][]roachpb.Method }{ { // Everything hits the first range, so we get a 1PC txn. roachpb.Key("a1"), roachpb.Key("a2"), roachpb.Key("a3"), [][]roachpb.Method{{roachpb.Put, roachpb.Put, roachpb.EndTransaction}}, }, { // Only EndTransaction hits the second range. roachpb.Key("a1"), roachpb.Key("a2"), roachpb.Key("b"), [][]roachpb.Method{{roachpb.Put, roachpb.Put}, {roachpb.EndTransaction}}, }, { // One write hits the second range, so EndTransaction has to be split off. // In this case, going in the usual order without splitting off // would actually be fine, but it doesn't seem worth optimizing at // this point. roachpb.Key("a1"), roachpb.Key("b1"), roachpb.Key("a1"), [][]roachpb.Method{{roachpb.Put, roachpb.Noop}, {roachpb.Noop, roachpb.Put}, {roachpb.EndTransaction}}, }, { // Both writes go to the second range, but not EndTransaction. roachpb.Key("b1"), roachpb.Key("b2"), roachpb.Key("a1"), [][]roachpb.Method{{roachpb.Put, roachpb.Put}, {roachpb.EndTransaction}}, }, } if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{NodeID: 1}); err != nil { t.Fatal(err) } nd := &roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(1), Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()), } if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil { t.Fatal(err) } // Fill mockRangeDescriptorDB with two descriptors. var descriptor1 = roachpb.RangeDescriptor{ RangeID: 1, StartKey: roachpb.RKeyMin, EndKey: roachpb.RKey("b"), Replicas: []roachpb.ReplicaDescriptor{ { NodeID: 1, StoreID: 1, }, }, } var descriptor2 = roachpb.RangeDescriptor{ RangeID: 2, StartKey: roachpb.RKey("b"), EndKey: roachpb.RKeyMax, Replicas: []roachpb.ReplicaDescriptor{ { NodeID: 1, StoreID: 1, }, }, } descDB := mockRangeDescriptorDB(func(key roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { desc := descriptor1 if !key.Less(roachpb.RKey("b")) { desc = descriptor2 } return []roachpb.RangeDescriptor{desc}, nil }) for _, test := range testCases { var act [][]roachpb.Method var testFn rpcSendFn = func(_ rpc.Options, method string, addrs []net.Addr, ga func(addr net.Addr) proto.Message, _ func() proto.Message, _ *rpc.Context) ([]proto.Message, error) { ba := ga(testAddress).(*roachpb.BatchRequest) var cur []roachpb.Method for _, union := range ba.Requests { cur = append(cur, union.GetInner().Method()) } act = append(act, cur) return []proto.Message{ba.CreateReply()}, nil } ctx := &DistSenderContext{ RPCSend: testFn, RangeDescriptorDB: descDB, } ds := NewDistSender(ctx, g) // Send a batch request containing two puts. var ba roachpb.BatchRequest ba.Txn = &roachpb.Transaction{Name: "test"} val := roachpb.MakeValueFromString("val") ba.Add(roachpb.NewPut(roachpb.Key(test.put1), val).(*roachpb.PutRequest)) ba.Add(roachpb.NewPut(roachpb.Key(test.put2), val).(*roachpb.PutRequest)) ba.Add(&roachpb.EndTransactionRequest{Span: roachpb.Span{Key: test.et}}) _, pErr := ds.Send(context.Background(), ba) if err := pErr.GoError(); err != nil { t.Fatal(err) } if !reflect.DeepEqual(test.exp, act) { t.Fatalf("expected %v, got %v", test.exp, act) } } }
// TestInOrderDelivery verifies that for a given pair of nodes, raft // messages are delivered in order. func TestInOrderDelivery(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() defer stopper.Stop() nodeRPCContext := rpc.NewContext(nodeTestBaseContext, hlc.NewClock(hlc.UnixNano), stopper) g := gossip.New(nodeRPCContext, gossip.TestInterval, gossip.TestBootstrap) server := rpc.NewServer(util.CreateTestAddr("tcp"), nodeRPCContext) if err := server.Start(); err != nil { t.Fatal(err) } defer server.Close() const numMessages = 100 protoNodeID := proto.NodeID(1) raftNodeID := proto.MakeRaftNodeID(protoNodeID, 1) serverTransport, err := newRPCTransport(g, server, nodeRPCContext) if err != nil { t.Fatal(err) } defer serverTransport.Close() serverChannel := newChannelServer(numMessages, 10*time.Millisecond) if err := serverTransport.Listen(raftNodeID, serverChannel); err != nil { t.Fatal(err) } addr := server.Addr() if err := g.AddInfoProto(gossip.MakeNodeIDKey(protoNodeID), &proto.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { t.Fatal(err) } clientNodeID := proto.MakeRaftNodeID(2, 2) clientTransport, err := newRPCTransport(g, nil, nodeRPCContext) if err != nil { t.Fatal(err) } defer clientTransport.Close() for i := 0; i < numMessages; i++ { req := &multiraft.RaftMessageRequest{ GroupID: 1, Message: raftpb.Message{ To: uint64(raftNodeID), From: uint64(clientNodeID), Commit: uint64(i), }, } if err := clientTransport.Send(req); err != nil { t.Errorf("failed to send message %d: %s", i, err) } } for i := 0; i < numMessages; i++ { req := <-serverChannel.ch if req.Message.Commit != uint64(i) { t.Errorf("messages out of order: got %d while expecting %d", req.Message.Commit, i) } } }
// TestTruncateWithSpanAndDescriptor verifies that a batch request is truncated with a // range span and the range of a descriptor found in cache. func TestTruncateWithSpanAndDescriptor(t *testing.T) { defer leaktest.AfterTest(t) g, s := makeTestGossip(t) defer s() g.SetNodeID(1) if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{NodeID: 1}); err != nil { t.Fatal(err) } nd := &roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(1), Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()), } if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil { t.Fatal(err) } // Fill mockRangeDescriptorDB with two descriptors. When a // range descriptor is looked up by key "b", return the second // descriptor whose range is ["a", "c") and partially overlaps // with the first descriptor's range. var descriptor1 = roachpb.RangeDescriptor{ RangeID: 1, StartKey: roachpb.RKeyMin, EndKey: roachpb.RKey("b"), Replicas: []roachpb.ReplicaDescriptor{ { NodeID: 1, StoreID: 1, }, }, } var descriptor2 = roachpb.RangeDescriptor{ RangeID: 2, StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("c"), Replicas: []roachpb.ReplicaDescriptor{ { NodeID: 1, StoreID: 1, }, }, } descDB := mockRangeDescriptorDB(func(key roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { desc := descriptor1 if key.Equal(roachpb.RKey("b")) { desc = descriptor2 } return []roachpb.RangeDescriptor{desc}, nil }) // Define our rpcSend stub which checks the span of the batch // requests. The first request should be the point request on // "a". The second request should be on "b". first := true var testFn rpcSendFn = func(_ rpc.Options, method string, addrs []net.Addr, getArgs func(addr net.Addr) proto.Message, getReply func() proto.Message, _ *rpc.Context) ([]proto.Message, error) { if method != "Node.Batch" { return nil, util.Errorf("unexpected method %v", method) } ba := getArgs(testAddress).(*roachpb.BatchRequest) rs := keys.Range(*ba) if first { if !(rs.Key.Equal(roachpb.RKey("a")) && rs.EndKey.Equal(roachpb.RKey("a").Next())) { t.Errorf("Unexpected span [%s,%s)", rs.Key, rs.EndKey) } first = false } else { if !(rs.Key.Equal(roachpb.RKey("b")) && rs.EndKey.Equal(roachpb.RKey("b").Next())) { t.Errorf("Unexpected span [%s,%s)", rs.Key, rs.EndKey) } } batchReply := getReply().(*roachpb.BatchResponse) reply := &roachpb.PutResponse{} batchReply.Add(reply) return []proto.Message{batchReply}, nil } ctx := &DistSenderContext{ RPCSend: testFn, RangeDescriptorDB: descDB, } ds := NewDistSender(ctx, g) // Send a batch request contains two puts. In the first // attempt, the range of the descriptor found in the cache is // ["a", "b"). The request is truncated to contain only the put // on "a". // // In the second attempt, The range of the descriptor found in // the cache is ["a", c"), but the put on "a" will not be // resent. The request is truncated to contain only the put on "b". ba := roachpb.BatchRequest{} ba.Txn = &roachpb.Transaction{Name: "test"} val := roachpb.MakeValueFromString("val") ba.Add(roachpb.NewPut(keys.RangeTreeNodeKey(roachpb.RKey("a")), val).(*roachpb.PutRequest)) ba.Add(roachpb.NewPut(keys.RangeTreeNodeKey(roachpb.RKey("b")), val).(*roachpb.PutRequest)) _, pErr := ds.Send(context.Background(), ba) if err := pErr.GoError(); err != nil { t.Fatal(err) } }
func TestSendAndReceive(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() nodeRPCContext := rpc.NewContext(testutils.NewNodeTestBaseContext(), nil, stopper) g := gossip.New(nodeRPCContext, nil, stopper) g.SetNodeID(roachpb.NodeID(1)) // Create several servers, each of which has two stores (A raft // node ID addresses a store). Node 1 has stores 1 and 2, node 2 has // stores 3 and 4, etc. // // We suppose that range 1 is replicated across the odd-numbered // stores in reverse order to ensure that the various IDs are not // equal: replica 1 is store 5, replica 2 is store 3, and replica 3 // is store 1. const numNodes = 3 const storesPerNode = 2 nextNodeID := roachpb.NodeID(2) nextStoreID := roachpb.StoreID(2) // Per-node state. transports := map[roachpb.NodeID]*storage.RaftTransport{} // Per-store state. storeNodes := map[roachpb.StoreID]roachpb.NodeID{} channels := map[roachpb.StoreID]channelServer{} replicaIDs := map[roachpb.StoreID]roachpb.ReplicaID{ 1: 3, 3: 2, 5: 1, } messageTypes := []raftpb.MessageType{ raftpb.MsgSnap, raftpb.MsgHeartbeat, } for nodeIndex := 0; nodeIndex < numNodes; nodeIndex++ { nodeID := nextNodeID nextNodeID++ grpcServer := rpc.NewServer(nodeRPCContext) ln, err := util.ListenAndServeGRPC(stopper, grpcServer, util.TestAddr) if err != nil { t.Fatal(err) } addr := ln.Addr() // Have to call g.SetNodeID before call g.AddInfo. g.ResetNodeID(roachpb.NodeID(nodeID)) if err := g.AddInfoProto(gossip.MakeNodeIDKey(nodeID), &roachpb.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { t.Fatal(err) } transport := storage.NewRaftTransport(storage.GossipAddressResolver(g), grpcServer, nodeRPCContext) transports[nodeID] = transport for storeIndex := 0; storeIndex < storesPerNode; storeIndex++ { storeID := nextStoreID nextStoreID++ storeNodes[storeID] = nodeID channel := newChannelServer(numNodes*storesPerNode*len(messageTypes), 0) transport.Listen(storeID, channel.RaftMessage) channels[storeID] = channel } } messageTypeCounts := make(map[roachpb.StoreID]map[raftpb.MessageType]int) // Each store sends one snapshot and one heartbeat to each store, including // itself. for toStoreID, toNodeID := range storeNodes { if _, ok := messageTypeCounts[toStoreID]; !ok { messageTypeCounts[toStoreID] = make(map[raftpb.MessageType]int) } for fromStoreID, fromNodeID := range storeNodes { baseReq := storage.RaftMessageRequest{ Message: raftpb.Message{ From: uint64(fromStoreID), To: uint64(toStoreID), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: fromNodeID, StoreID: fromStoreID, }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: toNodeID, StoreID: toStoreID, }, } for _, messageType := range messageTypes { req := baseReq req.Message.Type = messageType if err := transports[fromNodeID].Send(&req); err != nil { t.Errorf("unable to send %s from %d to %d: %s", req.Message.Type, fromNodeID, toNodeID, err) } messageTypeCounts[toStoreID][req.Message.Type]++ } } } // Read all the messages from the channels. Note that the transport // does not guarantee in-order delivery between independent // transports, so we just verify that the right number of messages // end up in each channel. for toStoreID := range storeNodes { func() { for len(messageTypeCounts[toStoreID]) > 0 { req := <-channels[toStoreID].ch if req.Message.To != uint64(toStoreID) { t.Errorf("got unexpected message %v on channel %d", req, toStoreID) } if typeCounts, ok := messageTypeCounts[toStoreID]; ok { if _, ok := typeCounts[req.Message.Type]; ok { typeCounts[req.Message.Type]-- if typeCounts[req.Message.Type] == 0 { delete(typeCounts, req.Message.Type) } } else { t.Errorf("expected %v to have key %v, but it did not", typeCounts, req.Message.Type) } } else { t.Errorf("expected %v to have key %v, but it did not", messageTypeCounts, toStoreID) } } delete(messageTypeCounts, toStoreID) }() select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %v on channel %d", req, toStoreID) case <-time.After(100 * time.Millisecond): } } if len(messageTypeCounts) > 0 { t.Errorf("remaining messages expected: %v", messageTypeCounts) } // Real raft messages have different node/store/replica IDs. // Send a message from replica 2 (on store 3, node 2) to replica 1 (on store 5, node 3) fromStoreID := roachpb.StoreID(3) toStoreID := roachpb.StoreID(5) expReq := &storage.RaftMessageRequest{ GroupID: 1, Message: raftpb.Message{ Type: raftpb.MsgApp, From: uint64(replicaIDs[fromStoreID]), To: uint64(replicaIDs[toStoreID]), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[fromStoreID], StoreID: fromStoreID, ReplicaID: replicaIDs[fromStoreID], }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[toStoreID], StoreID: toStoreID, ReplicaID: replicaIDs[toStoreID], }, } if err := transports[storeNodes[fromStoreID]].Send(expReq); err != nil { t.Errorf("unable to send message from %d to %d: %s", fromStoreID, toStoreID, err) } if req := <-channels[toStoreID].ch; !proto.Equal(req, expReq) { t.Errorf("got unexpected message %v on channel %d", req, toStoreID) } select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %v on channel %d", req, toStoreID) default: } }
// TestInOrderDelivery verifies that for a given pair of nodes, raft // messages are delivered in order. func TestInOrderDelivery(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() nodeRPCContext := rpc.NewContext(testutils.NewNodeTestBaseContext(), nil, stopper) g := gossip.New(nodeRPCContext, nil, stopper) grpcServer := rpc.NewServer(nodeRPCContext) ln, err := util.ListenAndServeGRPC(stopper, grpcServer, util.TestAddr) if err != nil { t.Fatal(err) } const numMessages = 100 nodeID := roachpb.NodeID(roachpb.NodeID(2)) serverTransport := storage.NewRaftTransport(storage.GossipAddressResolver(g), grpcServer, nodeRPCContext) serverChannel := newChannelServer(numMessages, 10*time.Millisecond) serverTransport.Listen(roachpb.StoreID(nodeID), serverChannel.RaftMessage) addr := ln.Addr() // Have to set gossip.NodeID before call gossip.AddInofXXX. g.SetNodeID(nodeID) if err := g.AddInfoProto(gossip.MakeNodeIDKey(nodeID), &roachpb.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { t.Fatal(err) } clientNodeID := roachpb.NodeID(2) clientTransport := storage.NewRaftTransport(storage.GossipAddressResolver(g), nil, nodeRPCContext) for i := 0; i < numMessages; i++ { req := &storage.RaftMessageRequest{ GroupID: 1, Message: raftpb.Message{ To: uint64(nodeID), From: uint64(clientNodeID), Commit: uint64(i), }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: nodeID, StoreID: roachpb.StoreID(nodeID), ReplicaID: roachpb.ReplicaID(nodeID), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: clientNodeID, StoreID: roachpb.StoreID(clientNodeID), ReplicaID: roachpb.ReplicaID(clientNodeID), }, } if err := clientTransport.Send(req); err != nil { t.Errorf("failed to send message %d: %s", i, err) } } for i := 0; i < numMessages; i++ { req := <-serverChannel.ch if req.Message.Commit != uint64(i) { t.Errorf("messages out of order: got %d while expecting %d", req.Message.Commit, i) } } }
// TestTruncateWithLocalSpanAndDescriptor verifies that a batch request with local keys // is truncated with a range span and the range of a descriptor found in cache. func TestTruncateWithLocalSpanAndDescriptor(t *testing.T) { defer leaktest.AfterTest(t)() g, s := makeTestGossip(t) defer s() if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{NodeID: 1}); err != nil { t.Fatal(err) } nd := &roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(1), Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()), } if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil { t.Fatal(err) } // Fill mockRangeDescriptorDB with two descriptors. var descriptor1 = roachpb.RangeDescriptor{ RangeID: 1, StartKey: roachpb.RKeyMin, EndKey: roachpb.RKey("b"), Replicas: []roachpb.ReplicaDescriptor{ { NodeID: 1, StoreID: 1, }, }, } var descriptor2 = roachpb.RangeDescriptor{ RangeID: 2, StartKey: roachpb.RKey("b"), EndKey: roachpb.RKey("c"), Replicas: []roachpb.ReplicaDescriptor{ { NodeID: 1, StoreID: 1, }, }, } var descriptor3 = roachpb.RangeDescriptor{ RangeID: 3, StartKey: roachpb.RKey("c"), EndKey: roachpb.RKeyMax, Replicas: []roachpb.ReplicaDescriptor{ { NodeID: 1, StoreID: 1, }, }, } descDB := mockRangeDescriptorDB(func(key roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { switch { case !key.Less(roachpb.RKey("c")): return []roachpb.RangeDescriptor{descriptor3}, nil case !key.Less(roachpb.RKey("b")): return []roachpb.RangeDescriptor{descriptor2}, nil default: return []roachpb.RangeDescriptor{descriptor1}, nil } }) // Define our rpcSend stub which checks the span of the batch // requests. requests := 0 sendStub := func(_ SendOptions, _ ReplicaSlice, ba roachpb.BatchRequest, _ *rpc.Context) (*roachpb.BatchResponse, error) { h := ba.Requests[0].GetInner().Header() switch requests { case 0: wantStart := keys.RangeDescriptorKey(roachpb.RKey("a")) wantEnd := keys.MakeRangeKeyPrefix(roachpb.RKey("b")) if !(h.Key.Equal(wantStart) && h.EndKey.Equal(wantEnd)) { t.Errorf("Unexpected span [%s,%s), want [%s,%s)", h.Key, h.EndKey, wantStart, wantEnd) } case 1: wantStart := keys.MakeRangeKeyPrefix(roachpb.RKey("b")) wantEnd := keys.MakeRangeKeyPrefix(roachpb.RKey("c")) if !(h.Key.Equal(wantStart) && h.EndKey.Equal(wantEnd)) { t.Errorf("Unexpected span [%s,%s), want [%s,%s)", h.Key, h.EndKey, wantStart, wantEnd) } case 2: wantStart := keys.MakeRangeKeyPrefix(roachpb.RKey("c")) wantEnd := keys.RangeDescriptorKey(roachpb.RKey("c")) if !(h.Key.Equal(wantStart) && h.EndKey.Equal(wantEnd)) { t.Errorf("Unexpected span [%s,%s), want [%s,%s)", h.Key, h.EndKey, wantStart, wantEnd) } } requests++ batchReply := &roachpb.BatchResponse{} reply := &roachpb.ScanResponse{} batchReply.Add(reply) return batchReply, nil } ctx := &DistSenderContext{ RPCSend: sendStub, RangeDescriptorDB: descDB, } ds := NewDistSender(ctx, g) // Send a batch request contains two scans. In the first // attempt, the range of the descriptor found in the cache is // ["", "b"). The request is truncated to contain only the scan // on local keys that address up to "b". // // In the second attempt, The range of the descriptor found in // the cache is ["b", "d"), The request is truncated to contain // only the scan on local keys that address from "b" to "d". ba := roachpb.BatchRequest{} ba.Txn = &roachpb.Transaction{Name: "test"} ba.Add(roachpb.NewScan(keys.RangeDescriptorKey(roachpb.RKey("a")), keys.RangeDescriptorKey(roachpb.RKey("c")), 0)) if _, pErr := ds.Send(context.Background(), ba); pErr != nil { t.Fatal(pErr) } if want := 3; requests != want { t.Errorf("expected request to be split into %d parts, found %d", want, requests) } }
// TestSequenceUpdate verifies txn sequence number is incremented // on successive commands. func TestSequenceUpdate(t *testing.T) { defer leaktest.AfterTest(t)() g, s := makeTestGossip(t) defer s() if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{NodeID: 1}); err != nil { t.Fatal(err) } nd := &roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(1), Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()), } if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil { t.Fatal(err) } descDB := mockRangeDescriptorDB(func(key roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { return []roachpb.RangeDescriptor{ { RangeID: 1, StartKey: roachpb.RKeyMin, EndKey: roachpb.RKeyMax, Replicas: []roachpb.ReplicaDescriptor{ { NodeID: 1, StoreID: 1, }, }, }, }, nil }) var expSequence uint32 var testFn rpcSendFn = func(_ SendOptions, _ ReplicaSlice, ba roachpb.BatchRequest, _ *rpc.Context) (*roachpb.BatchResponse, error) { expSequence++ if expSequence != ba.Txn.Sequence { t.Errorf("expected sequence %d; got %d", expSequence, ba.Txn.Sequence) } br := ba.CreateReply() br.Txn = ba.Txn return br, nil } ctx := &DistSenderContext{ RPCSend: testFn, RangeDescriptorDB: descDB, } ds := NewDistSender(ctx, g) // Send 5 puts and verify sequence number increase. txn := &roachpb.Transaction{Name: "test"} for i := 0; i < 5; i++ { var ba roachpb.BatchRequest ba.Txn = txn ba.Add(roachpb.NewPut(roachpb.Key("a"), roachpb.MakeValueFromString("foo")).(*roachpb.PutRequest)) br, pErr := ds.Send(context.Background(), ba) if pErr != nil { t.Fatal(pErr) } txn = br.Txn } }
func TestSendAndReceive(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() defer stopper.Stop() nodeRPCContext := rpc.NewContext(nodeTestBaseContext, hlc.NewClock(hlc.UnixNano), stopper) g := gossip.New(nodeRPCContext, gossip.TestInterval, gossip.TestBootstrap) // Create several servers, each of which has two stores (A multiraft node ID addresses // a store). const numServers = 3 const storesPerServer = 2 const numStores = numServers * storesPerServer // servers has length numServers. servers := []*rpc.Server{} // All the rest have length numStores (note that several stores share a transport). nextNodeID := proto.NodeID(1) nodeIDs := []proto.RaftNodeID{} transports := []multiraft.Transport{} channels := []channelServer{} for serverIndex := 0; serverIndex < numServers; serverIndex++ { server := rpc.NewServer(util.CreateTestAddr("tcp"), nodeRPCContext) if err := server.Start(); err != nil { t.Fatal(err) } defer server.Close() transport, err := newRPCTransport(g, server, nodeRPCContext) if err != nil { t.Fatalf("Unexpected error creating transport, Error: %s", err) } defer transport.Close() for store := 0; store < storesPerServer; store++ { protoNodeID := nextNodeID nodeID := proto.MakeRaftNodeID(protoNodeID, 1) nextNodeID++ channel := newChannelServer(10, 0) if err := transport.Listen(nodeID, channel); err != nil { t.Fatal(err) } addr := server.Addr() if err := g.AddInfoProto(gossip.MakeNodeIDKey(protoNodeID), &proto.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { t.Fatal(err) } nodeIDs = append(nodeIDs, nodeID) transports = append(transports, transport) channels = append(channels, channel) } servers = append(servers, server) } // Each store sends one message to each store. for from := 0; from < numStores; from++ { for to := 0; to < numStores; to++ { req := &multiraft.RaftMessageRequest{ GroupID: 1, Message: raftpb.Message{ From: uint64(nodeIDs[from]), To: uint64(nodeIDs[to]), Type: raftpb.MsgHeartbeat, }, } if err := transports[from].Send(req); err != nil { t.Errorf("Unable to send message from %d to %d: %s", nodeIDs[from], nodeIDs[to], err) } } } // Read all the messages from the channels. Note that the transport // does not guarantee in-order delivery between independent // transports, so we just verify that the right number of messages // end up in each channel. for to := 0; to < numStores; to++ { for from := 0; from < numStores; from++ { select { case req := <-channels[to].ch: if req.Message.To != uint64(nodeIDs[to]) { t.Errorf("invalid message received on channel %d (expected from %d): %+v", nodeIDs[to], nodeIDs[from], req) } case <-time.After(5 * time.Second): t.Fatal("timed out waiting for message") } } select { case req := <-channels[to].ch: t.Errorf("got unexpected message %+v on channel %d", req, nodeIDs[to]) default: } } }
// TestInOrderDelivery verifies that for a given pair of nodes, raft // messages are delivered in order. func TestInOrderDelivery(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() defer stopper.Stop() nodeRPCContext := rpc.NewContext(nodeTestBaseContext, hlc.NewClock(hlc.UnixNano), stopper) g := gossip.New(nodeRPCContext, gossip.TestBootstrap, stopper) g.SetNodeID(roachpb.NodeID(1)) rpcServer := rpc.NewServer(nodeRPCContext) grpcServer := grpc.NewServer() tlsConfig, err := nodeRPCContext.GetServerTLSConfig() if err != nil { t.Fatal(err) } ln, err := util.ListenAndServe(stopper, grpcutil.GRPCHandlerFunc(grpcServer, rpcServer), util.CreateTestAddr("tcp"), tlsConfig) if err != nil { t.Fatal(err) } const numMessages = 100 nodeID := roachpb.NodeID(roachpb.NodeID(2)) serverTransport := newRPCTransport(g, grpcServer, nodeRPCContext) defer serverTransport.Close() serverChannel := newChannelServer(numMessages, 10*time.Millisecond) if err := serverTransport.Listen(roachpb.StoreID(nodeID), serverChannel.RaftMessage); err != nil { t.Fatal(err) } addr := ln.Addr() // Have to set gossip.NodeID before call gossip.AddInofXXX g.SetNodeID(nodeID) if err := g.AddInfoProto(gossip.MakeNodeIDKey(nodeID), &roachpb.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { t.Fatal(err) } clientNodeID := roachpb.NodeID(2) clientTransport := newRPCTransport(g, nil, nodeRPCContext) defer clientTransport.Close() for i := 0; i < numMessages; i++ { req := &storage.RaftMessageRequest{ GroupID: 1, Message: raftpb.Message{ To: uint64(nodeID), From: uint64(clientNodeID), Commit: uint64(i), }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: nodeID, StoreID: roachpb.StoreID(nodeID), ReplicaID: roachpb.ReplicaID(nodeID), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: clientNodeID, StoreID: roachpb.StoreID(clientNodeID), ReplicaID: roachpb.ReplicaID(clientNodeID), }, } if err := clientTransport.Send(req); err != nil { t.Errorf("failed to send message %d: %s", i, err) } } for i := 0; i < numMessages; i++ { req := <-serverChannel.ch if req.Message.Commit != uint64(i) { t.Errorf("messages out of order: got %d while expecting %d", req.Message.Commit, i) } } }
func TestSendAndReceive(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() defer stopper.Stop() nodeRPCContext := rpc.NewContext(nodeTestBaseContext, hlc.NewClock(hlc.UnixNano), stopper) g := gossip.New(nodeRPCContext, gossip.TestBootstrap, stopper) g.SetNodeID(roachpb.NodeID(1)) // Create several servers, each of which has two stores (A raft // node ID addresses a store). Node 1 has stores 1 and 2, node 2 has // stores 3 and 4, etc. // // We suppose that range 1 is replicated across the odd-numbered // stores in reverse order to ensure that the various IDs are not // equal: replica 1 is store 5, replica 2 is store 3, and replica 3 // is store 1. const numServers = 3 const storesPerServer = 2 const numStores = numServers * storesPerServer nextNodeID := roachpb.NodeID(2) nextStoreID := roachpb.StoreID(2) // Per-node state. transports := map[roachpb.NodeID]storage.RaftTransport{} // Per-store state. storeNodes := map[roachpb.StoreID]roachpb.NodeID{} channels := map[roachpb.StoreID]channelServer{} replicaIDs := map[roachpb.StoreID]roachpb.ReplicaID{ 1: 3, 3: 2, 5: 1, } for serverIndex := 0; serverIndex < numServers; serverIndex++ { nodeID := nextNodeID nextNodeID++ rpcServer := rpc.NewServer(nodeRPCContext) grpcServer := grpc.NewServer() tlsConfig, err := nodeRPCContext.GetServerTLSConfig() if err != nil { t.Fatal(err) } ln, err := util.ListenAndServe(stopper, grpcutil.GRPCHandlerFunc(grpcServer, rpcServer), util.CreateTestAddr("tcp"), tlsConfig) if err != nil { t.Fatal(err) } addr := ln.Addr() // Have to call g.SetNodeID before call g.AddInfo g.SetNodeID(roachpb.NodeID(nodeID)) if err := g.AddInfoProto(gossip.MakeNodeIDKey(nodeID), &roachpb.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { t.Fatal(err) } transport := newRPCTransport(g, grpcServer, nodeRPCContext) defer transport.Close() transports[nodeID] = transport for store := 0; store < storesPerServer; store++ { storeID := nextStoreID nextStoreID++ storeNodes[storeID] = nodeID channel := newChannelServer(10, 0) if err := transport.Listen(storeID, channel.RaftMessage); err != nil { t.Fatal(err) } channels[storeID] = channel } } // Heartbeat messages: Each store sends one message to each store. for fromStoreID, fromNodeID := range storeNodes { for toStoreID, toNodeID := range storeNodes { req := &storage.RaftMessageRequest{ GroupID: 0, Message: raftpb.Message{ Type: raftpb.MsgHeartbeat, From: uint64(fromStoreID), To: uint64(toStoreID), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: fromNodeID, StoreID: fromStoreID, ReplicaID: 0, }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: toNodeID, StoreID: toStoreID, ReplicaID: 0, }, } if err := transports[fromNodeID].Send(req); err != nil { t.Errorf("Unable to send message from %d to %d: %s", fromNodeID, toNodeID, err) } } } // Read all the messages from the channels. Note that the transport // does not guarantee in-order delivery between independent // transports, so we just verify that the right number of messages // end up in each channel. for toStoreID := range storeNodes { for range storeNodes { select { case req := <-channels[toStoreID].ch: if req.Message.To != uint64(toStoreID) { t.Errorf("invalid message received on channel %d: %+v", toStoreID, req) } case <-time.After(5 * time.Second): t.Fatal("timed out waiting for message") } } select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %+v on channel %d", req, toStoreID) default: } } // Real raft messages have different node/store/replica IDs. // Send a message from replica 2 (on store 3, node 2) to replica 1 (on store 5, node 3) fromStoreID := roachpb.StoreID(3) toStoreID := roachpb.StoreID(5) req := &storage.RaftMessageRequest{ GroupID: 1, Message: raftpb.Message{ Type: raftpb.MsgApp, From: uint64(replicaIDs[fromStoreID]), To: uint64(replicaIDs[toStoreID]), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[fromStoreID], StoreID: fromStoreID, ReplicaID: replicaIDs[fromStoreID], }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[toStoreID], StoreID: toStoreID, ReplicaID: replicaIDs[toStoreID], }, } if err := transports[storeNodes[fromStoreID]].Send(req); err != nil { t.Errorf("Unable to send message from %d to %d: %s", fromStoreID, toStoreID, err) } select { case req2 := <-channels[toStoreID].ch: if !reflect.DeepEqual(req, req2) { t.Errorf("got unexpected message %+v", req2) } case <-time.After(5 * time.Second): t.Fatal("timed out waiting for message") } select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %+v on channel %d", req, toStoreID) default: } }
// TestSequenceUpdateOnMultiRangeQueryLoop reproduces #3206 and // verifies that the sequence is updated in the DistSender // multi-range-query loop. // // More specifically, the issue was that DistSender might send // multiple batch requests to the same replica when it finds a // post-split range descriptor in the cache while the split has not // yet been fully completed. By giving a higher sequence to the second // request, we can avoid an infinite txn restart error (otherwise // caused by hitting the sequence cache). func TestSequenceUpdateOnMultiRangeQueryLoop(t *testing.T) { defer leaktest.AfterTest(t) g, s := makeTestGossip(t) defer s() if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{NodeID: 1}); err != nil { t.Fatal(err) } nd := &roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(1), Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()), } if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil { t.Fatal(err) } // Fill mockRangeDescriptorDB with two descriptors. var descriptor1 = roachpb.RangeDescriptor{ RangeID: 1, StartKey: roachpb.RKeyMin, EndKey: roachpb.RKey("b"), Replicas: []roachpb.ReplicaDescriptor{ { NodeID: 1, StoreID: 1, }, }, } var descriptor2 = roachpb.RangeDescriptor{ RangeID: 2, StartKey: roachpb.RKey("b"), EndKey: roachpb.RKey("c"), Replicas: []roachpb.ReplicaDescriptor{ { NodeID: 1, StoreID: 1, }, }, } descDB := mockRangeDescriptorDB(func(key roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { desc := descriptor1 if key.Equal(roachpb.RKey("b")) { desc = descriptor2 } return []roachpb.RangeDescriptor{desc}, nil }) // Define our rpcSend stub which checks the span of the batch // requests. The first request should be the point request on // "a". The second request should be on "b". The sequence of the // second request will be incremented by one from that of the // first request. first := true var firstSequence uint32 var testFn rpcSendFn = func(_ rpc.Options, method string, addrs []net.Addr, getArgs func(addr net.Addr) proto.Message, getReply func() proto.Message, _ *rpc.Context) ([]proto.Message, error) { if method != "Node.Batch" { return nil, util.Errorf("unexpected method %v", method) } ba := getArgs(testAddress).(*roachpb.BatchRequest) rs := keys.Range(*ba) if first { if !(rs.Key.Equal(roachpb.RKey("a")) && rs.EndKey.Equal(roachpb.RKey("a").Next())) { t.Errorf("unexpected span [%s,%s)", rs.Key, rs.EndKey) } first = false firstSequence = ba.Txn.Sequence } else { if !(rs.Key.Equal(roachpb.RKey("b")) && rs.EndKey.Equal(roachpb.RKey("b").Next())) { t.Errorf("unexpected span [%s,%s)", rs.Key, rs.EndKey) } if ba.Txn.Sequence != firstSequence+1 { t.Errorf("unexpected sequence; expected %d, but got %d", firstSequence+1, ba.Txn.Sequence) } } return []proto.Message{ba.CreateReply()}, nil } ctx := &DistSenderContext{ RPCSend: testFn, RangeDescriptorDB: descDB, } ds := NewDistSender(ctx, g) // Send a batch request containing two puts. var ba roachpb.BatchRequest ba.Txn = &roachpb.Transaction{Name: "test"} val := roachpb.MakeValueFromString("val") ba.Add(roachpb.NewPut(roachpb.Key("a"), val).(*roachpb.PutRequest)) ba.Add(roachpb.NewPut(roachpb.Key("b"), val).(*roachpb.PutRequest)) _, pErr := ds.Send(context.Background(), ba) if err := pErr.GoError(); err != nil { t.Fatal(err) } }
// waitForStoreFrozen polls the given stores until they all report having no // unfrozen Replicas (or an error or timeout occurs). func (s *adminServer) waitForStoreFrozen( stream serverpb.Admin_ClusterFreezeServer, stores map[roachpb.StoreID]roachpb.NodeID, wantFrozen bool, ) error { mu := struct { sync.Mutex oks map[roachpb.StoreID]bool }{ oks: make(map[roachpb.StoreID]bool), } opts := base.DefaultRetryOptions() opts.Closer = s.server.stopper.ShouldDrain() opts.MaxRetries = 20 sem := make(chan struct{}, 256) errChan := make(chan error, 1) sendErr := func(err error) { select { case errChan <- err: default: } } numWaiting := len(stores) // loop until this drops to zero var err error for r := retry.Start(opts); r.Next(); { mu.Lock() for storeID, nodeID := range stores { storeID, nodeID := storeID, nodeID // loop-local copies for goroutine var nodeDesc roachpb.NodeDescriptor if err := s.server.gossip.GetInfoProto(gossip.MakeNodeIDKey(nodeID), &nodeDesc); err != nil { sendErr(err) break } addr := nodeDesc.Address.String() if _, inflightOrSucceeded := mu.oks[storeID]; inflightOrSucceeded { continue } mu.oks[storeID] = false // mark as inflight action := func() (err error) { var resp *roachpb.PollFrozenResponse defer func() { message := fmt.Sprintf("node %d, store %d: ", nodeID, storeID) if err != nil { message += err.Error() } else { numMismatching := len(resp.Results) mu.Lock() if numMismatching == 0 { // If the Store is in the right state, mark it as such. // This means we won't try it again. message += "ready" mu.oks[storeID] = true } else { // Otherwise, forget that we tried the Store so that // the retry loop picks it up again. message += fmt.Sprintf("%d replicas report wrong status", numMismatching) if limit := 10; numMismatching > limit { message += " [truncated]: " resp.Results = resp.Results[:limit] } else { message += ": " } message += fmt.Sprintf("%+v", resp.Results) delete(mu.oks, storeID) } mu.Unlock() err = stream.Send(&serverpb.ClusterFreezeResponse{ Message: message, }) } }() conn, err := s.server.rpcContext.GRPCDial(addr) if err != nil { return err } client := roachpb.NewInternalClient(conn) resp, err = client.PollFrozen(context.Background(), &roachpb.PollFrozenRequest{ StoreRequestHeader: roachpb.StoreRequestHeader{ NodeID: nodeID, StoreID: storeID, }, // If we are looking to freeze everything, we want to // collect thawed Replicas, and vice versa. CollectFrozen: !wantFrozen, }) return err } // Run a limited, non-blocking task. That means the task simply // won't run if the semaphore is full (or the node is draining). // Both are handled by the surrounding retry loop. if !s.server.stopper.RunLimitedAsyncTask(sem, func() { if err := action(); err != nil { sendErr(err) } }) { // Node draining. sendErr(errors.New("node is shutting down")) break } } numWaiting = len(stores) for _, ok := range mu.oks { if ok { // Store has reported that it is frozen. numWaiting-- continue } } mu.Unlock() select { case err = <-errChan: default: } // Keep going unless there's been an error or everyone's frozen. if err != nil || numWaiting == 0 { break } if err := stream.Send(&serverpb.ClusterFreezeResponse{ Message: fmt.Sprintf("waiting for %d store%s to apply operation", numWaiting, util.Pluralize(int64(numWaiting))), }); err != nil { return err } } if err != nil { return err } if numWaiting > 0 { err = fmt.Errorf("timed out waiting for %d store%s to report freeze", numWaiting, util.Pluralize(int64(numWaiting))) } return err }