// startFakeServerGossips creates local gossip instances and remote // faked gossip instance. The remote gossip instance launches its // faked gossip service just for check the client message. func startFakeServerGossips(t *testing.T) (local *Gossip, remote *fakeGossipServer, stopper *stop.Stopper) { stopper = stop.NewStopper() lRPCContext := rpc.NewContext(&base.Context{Insecure: true}, nil, stopper) lserver := rpc.NewServer(lRPCContext) lln, err := netutil.ListenAndServeGRPC(stopper, lserver, util.TestAddr) if err != nil { t.Fatal(err) } local = New(lRPCContext, nil, stopper) local.start(lserver, lln.Addr()) rRPCContext := rpc.NewContext(&base.Context{Insecure: true}, nil, stopper) rserver := rpc.NewServer(rRPCContext) rln, err := netutil.ListenAndServeGRPC(stopper, rserver, util.TestAddr) if err != nil { t.Fatal(err) } remote = newFakeGossipServer(rserver, stopper) addr := rln.Addr() remote.nodeAddr = util.MakeUnresolvedAddr(addr.Network(), addr.String()) return }
// CreateNode creates a simulation node and starts an RPC server for it. func (n *Network) CreateNode() (*Node, error) { server := rpc.NewServer(n.rpcContext) ln, err := netutil.ListenAndServeGRPC(n.Stopper, server, util.TestAddr) if err != nil { return nil, err } node := &Node{Server: server, Addr: ln.Addr()} node.Gossip = gossip.New(n.rpcContext, nil, n.Stopper) n.Nodes = append(n.Nodes, node) return node, nil }
// createTestNode creates an rpc server using the specified address, // gossip instance, KV database and a node using the specified slice // of engines. The server, clock and node are returned. If gossipBS is // not nil, the gossip bootstrap address is set to gossipBS. func createTestNode(addr net.Addr, engines []engine.Engine, gossipBS net.Addr, t *testing.T) ( *grpc.Server, net.Addr, *hlc.Clock, *Node, *stop.Stopper) { ctx := storage.StoreContext{} stopper := stop.NewStopper() ctx.Clock = hlc.NewClock(hlc.UnixNano) nodeRPCContext := rpc.NewContext(nodeTestBaseContext, ctx.Clock, stopper) ctx.ScanInterval = 10 * time.Hour ctx.ConsistencyCheckInterval = 10 * time.Hour grpcServer := rpc.NewServer(nodeRPCContext) serverCtx := makeTestContext() g := gossip.New( context.Background(), nodeRPCContext, grpcServer, serverCtx.GossipBootstrapResolvers, stopper, metric.NewRegistry()) ln, err := netutil.ListenAndServeGRPC(stopper, grpcServer, addr) if err != nil { t.Fatal(err) } if gossipBS != nil { // Handle possibility of a :0 port specification. if gossipBS.Network() == addr.Network() && gossipBS.String() == addr.String() { gossipBS = ln.Addr() } r, err := resolver.NewResolverFromAddress(gossipBS) if err != nil { t.Fatalf("bad gossip address %s: %s", gossipBS, err) } g.SetResolvers([]resolver.Resolver{r}) g.Start(ln.Addr()) } ctx.Gossip = g retryOpts := base.DefaultRetryOptions() retryOpts.Closer = stopper.ShouldQuiesce() distSender := kv.NewDistSender(&kv.DistSenderConfig{ Clock: ctx.Clock, RPCContext: nodeRPCContext, RPCRetryOptions: &retryOpts, }, g) ctx.Ctx = tracing.WithTracer(context.Background(), tracing.NewTracer()) sender := kv.NewTxnCoordSender(ctx.Ctx, distSender, ctx.Clock, false, stopper, kv.MakeTxnMetrics()) ctx.DB = client.NewDB(sender) ctx.Transport = storage.NewDummyRaftTransport() node := NewNode(ctx, status.NewMetricsRecorder(ctx.Clock), metric.NewRegistry(), stopper, kv.MakeTxnMetrics(), sql.MakeEventLogger(nil)) roachpb.RegisterInternalServer(grpcServer, node) return grpcServer, ln.Addr(), ctx.Clock, node, stopper }
// AddNodeWithoutGossip registers a node with the cluster. Nodes must // be added before they can be used in other methods of // raftTransportTestContext. Unless you are testing the effects of // delaying gossip, use AddNode instead. func (rttc *raftTransportTestContext) AddNodeWithoutGossip( nodeID roachpb.NodeID, ) (*storage.RaftTransport, net.Addr) { grpcServer := rpc.NewServer(rttc.nodeRPCContext) ln, err := netutil.ListenAndServeGRPC(rttc.stopper, grpcServer, util.TestAddr) if err != nil { rttc.t.Fatal(err) } transport := storage.NewRaftTransport(storage.GossipAddressResolver(rttc.gossip), grpcServer, rttc.nodeRPCContext) rttc.transports[nodeID] = transport return transport, ln.Addr() }
func newTestServer(t *testing.T, ctx *Context, manual bool) (*grpc.Server, net.Listener) { tlsConfig, err := ctx.GetServerTLSConfig() if err != nil { t.Fatal(err) } s := grpc.NewServer(grpc.Creds(credentials.NewTLS(tlsConfig))) ln, err := netutil.ListenAndServeGRPC(ctx.Stopper, s, util.TestAddr) if err != nil { t.Fatal(err) } return s, ln }
// TestClientRegisterInitNodeID verifies two client's gossip request with NodeID 0. func TestClientRegisterWithInitNodeID(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() // Create three gossip nodes, and connect to the first with NodeID 0. var g []*Gossip var gossipAddr string for i := 0; i < 3; i++ { RPCContext := rpc.NewContext(&base.Context{Insecure: true}, nil, stopper) server := rpc.NewServer(RPCContext) ln, err := netutil.ListenAndServeGRPC(stopper, server, util.TestAddr) if err != nil { t.Fatal(err) } // Connect to the first gossip node. if gossipAddr == "" { gossipAddr = ln.Addr().String() } var resolvers []resolver.Resolver resolver, err := resolver.NewResolver(RPCContext.Context, gossipAddr) if err != nil { t.Fatal(err) } resolvers = append(resolvers, resolver) gnode := New(RPCContext, server, resolvers, stopper, metric.NewRegistry()) // node ID must be non-zero gnode.SetNodeID(roachpb.NodeID(i + 1)) g = append(g, gnode) gnode.Start(ln.Addr()) } util.SucceedsSoon(t, func() error { // The first gossip node should have two gossip client address // in nodeMap if these three gossip nodes registered success. g[0].mu.Lock() defer g[0].mu.Unlock() if a, e := len(g[0].nodeMap), 2; a != e { return errors.Errorf("expected %s to contain %d nodes, got %d", g[0].nodeMap, e, a) } return nil }) }
// startGossip creates and starts a gossip instance. func startGossip(nodeID roachpb.NodeID, stopper *stop.Stopper, t *testing.T) *Gossip { rpcContext := rpc.NewContext(&base.Context{Insecure: true}, nil, stopper) server := rpc.NewServer(rpcContext) ln, err := netutil.ListenAndServeGRPC(stopper, server, util.TestAddr) if err != nil { t.Fatal(err) } addr := ln.Addr() g := New(rpcContext, nil, stopper) g.SetNodeID(nodeID) if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{ NodeID: nodeID, Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }); err != nil { t.Fatal(err) } g.start(server, addr) time.Sleep(time.Millisecond) return g }
func TestSendAndReceive(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() nodeRPCContext := rpc.NewContext(testutils.NewNodeTestBaseContext(), nil, stopper) g := gossip.New(nodeRPCContext, nil, stopper) g.SetNodeID(roachpb.NodeID(1)) // Create several servers, each of which has two stores (A raft // node ID addresses a store). Node 1 has stores 1 and 2, node 2 has // stores 3 and 4, etc. // // We suppose that range 1 is replicated across the odd-numbered // stores in reverse order to ensure that the various IDs are not // equal: replica 1 is store 5, replica 2 is store 3, and replica 3 // is store 1. const numNodes = 3 const storesPerNode = 2 nextNodeID := roachpb.NodeID(2) nextStoreID := roachpb.StoreID(2) // Per-node state. transports := map[roachpb.NodeID]*storage.RaftTransport{} // Per-store state. storeNodes := map[roachpb.StoreID]roachpb.NodeID{} channels := map[roachpb.StoreID]channelServer{} replicaIDs := map[roachpb.StoreID]roachpb.ReplicaID{ 1: 3, 3: 2, 5: 1, } messageTypes := []raftpb.MessageType{ raftpb.MsgSnap, raftpb.MsgHeartbeat, } for nodeIndex := 0; nodeIndex < numNodes; nodeIndex++ { nodeID := nextNodeID nextNodeID++ grpcServer := rpc.NewServer(nodeRPCContext) ln, err := netutil.ListenAndServeGRPC(stopper, grpcServer, util.TestAddr) if err != nil { t.Fatal(err) } addr := ln.Addr() // Have to call g.SetNodeID before call g.AddInfo. g.ResetNodeID(roachpb.NodeID(nodeID)) if err := g.AddInfoProto(gossip.MakeNodeIDKey(nodeID), &roachpb.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { t.Fatal(err) } transports[nodeID] = storage.NewRaftTransport(storage.GossipAddressResolver(g), grpcServer, nodeRPCContext) // This channel is normally unbuffered, but it is also normally serviced by // the raft goroutine. Since we don't have that goroutine in this test, we // must buffer the channel to prevent snapshots from blocking while we // iterate through the recipients in an order that may differ from the // sending order. sendersPerNode := storesPerNode recipientsPerSender := numNodes * storesPerNode outboundSnapshotsPerNode := sendersPerNode * recipientsPerSender transports[nodeID].SnapshotStatusChan = make(chan storage.RaftSnapshotStatus, outboundSnapshotsPerNode) for storeIndex := 0; storeIndex < storesPerNode; storeIndex++ { storeID := nextStoreID nextStoreID++ storeNodes[storeID] = nodeID sendersPerRecipient := numNodes * storesPerNode inboundMessagesPerStore := sendersPerRecipient * len(messageTypes) channels[storeID] = newChannelServer(inboundMessagesPerStore, 0) transports[nodeID].Listen(storeID, channels[storeID].RaftMessage) } } messageTypeCounts := make(map[roachpb.StoreID]map[raftpb.MessageType]int) // Each store sends one snapshot and one heartbeat to each store, including // itself. for toStoreID, toNodeID := range storeNodes { if _, ok := messageTypeCounts[toStoreID]; !ok { messageTypeCounts[toStoreID] = make(map[raftpb.MessageType]int) } for fromStoreID, fromNodeID := range storeNodes { baseReq := storage.RaftMessageRequest{ RangeID: 1, Message: raftpb.Message{ From: uint64(fromStoreID), To: uint64(toStoreID), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: fromNodeID, StoreID: fromStoreID, }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: toNodeID, StoreID: toStoreID, }, } for _, messageType := range messageTypes { req := baseReq req.Message.Type = messageType if !transports[fromNodeID].MakeSender(func(err error, _ roachpb.ReplicaDescriptor) { if err != nil && !grpcutil.IsClosedConnection(err) { panic(err) } }).SendAsync(&req) { t.Errorf("unable to send %s from %d to %d", req.Message.Type, fromNodeID, toNodeID) } messageTypeCounts[toStoreID][req.Message.Type]++ } } } // Read all the messages from the channels. Note that the transport // does not guarantee in-order delivery between independent // transports, so we just verify that the right number of messages // end up in each channel. for toStoreID := range storeNodes { func() { for len(messageTypeCounts[toStoreID]) > 0 { req := <-channels[toStoreID].ch if req.Message.To != uint64(toStoreID) { t.Errorf("got unexpected message %v on channel %d", req, toStoreID) } // Each MsgSnap should have a corresponding entry on the // sender's SnapshotStatusChan. if req.Message.Type == raftpb.MsgSnap { st := <-transports[req.FromReplica.NodeID].SnapshotStatusChan if st.Err != nil { t.Errorf("unexpected error sending snapshot: %s", st.Err) } } if typeCounts, ok := messageTypeCounts[toStoreID]; ok { if _, ok := typeCounts[req.Message.Type]; ok { typeCounts[req.Message.Type]-- if typeCounts[req.Message.Type] == 0 { delete(typeCounts, req.Message.Type) } } else { t.Errorf("expected %v to have key %v, but it did not", typeCounts, req.Message.Type) } } else { t.Errorf("expected %v to have key %v, but it did not", messageTypeCounts, toStoreID) } } delete(messageTypeCounts, toStoreID) }() select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %v on channel %d", req, toStoreID) case <-time.After(100 * time.Millisecond): } } if len(messageTypeCounts) > 0 { t.Errorf("remaining messages expected: %v", messageTypeCounts) } // Real raft messages have different node/store/replica IDs. // Send a message from replica 2 (on store 3, node 2) to replica 1 (on store 5, node 3) fromStoreID := roachpb.StoreID(3) toStoreID := roachpb.StoreID(5) expReq := &storage.RaftMessageRequest{ RangeID: 1, Message: raftpb.Message{ Type: raftpb.MsgApp, From: uint64(replicaIDs[fromStoreID]), To: uint64(replicaIDs[toStoreID]), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[fromStoreID], StoreID: fromStoreID, ReplicaID: replicaIDs[fromStoreID], }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[toStoreID], StoreID: toStoreID, ReplicaID: replicaIDs[toStoreID], }, } if !transports[storeNodes[fromStoreID]].MakeSender(func(err error, _ roachpb.ReplicaDescriptor) { if err != nil && !grpcutil.IsClosedConnection(err) { panic(err) } }).SendAsync(expReq) { t.Errorf("unable to send message from %d to %d", fromStoreID, toStoreID) } if req := <-channels[toStoreID].ch; !proto.Equal(req, expReq) { t.Errorf("got unexpected message %v on channel %d", req, toStoreID) } select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %v on channel %d", req, toStoreID) default: } }
// TestInOrderDelivery verifies that for a given pair of nodes, raft // messages are delivered in order. func TestInOrderDelivery(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() nodeRPCContext := rpc.NewContext(testutils.NewNodeTestBaseContext(), nil, stopper) g := gossip.New(nodeRPCContext, nil, stopper) grpcServer := rpc.NewServer(nodeRPCContext) ln, err := netutil.ListenAndServeGRPC(stopper, grpcServer, util.TestAddr) if err != nil { t.Fatal(err) } const numMessages = 100 nodeID := roachpb.NodeID(roachpb.NodeID(2)) serverTransport := storage.NewRaftTransport(storage.GossipAddressResolver(g), grpcServer, nodeRPCContext) serverChannel := newChannelServer(numMessages, 10*time.Millisecond) serverTransport.Listen(roachpb.StoreID(nodeID), serverChannel.RaftMessage) addr := ln.Addr() // Have to set gossip.NodeID before calling gossip.AddInfoXXX. g.SetNodeID(nodeID) if err := g.AddInfoProto(gossip.MakeNodeIDKey(nodeID), &roachpb.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { t.Fatal(err) } clientNodeID := roachpb.NodeID(2) clientTransport := storage.NewRaftTransport(storage.GossipAddressResolver(g), nil, nodeRPCContext) for i := 0; i < numMessages; i++ { req := &storage.RaftMessageRequest{ RangeID: 1, Message: raftpb.Message{ To: uint64(nodeID), From: uint64(clientNodeID), Commit: uint64(i), }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: nodeID, StoreID: roachpb.StoreID(nodeID), ReplicaID: roachpb.ReplicaID(nodeID), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: clientNodeID, StoreID: roachpb.StoreID(clientNodeID), ReplicaID: roachpb.ReplicaID(clientNodeID), }, } if !clientTransport.MakeSender(func(err error, _ roachpb.ReplicaDescriptor) { if err != nil && !grpcutil.IsClosedConnection(err) { panic(err) } }).SendAsync(req) { t.Errorf("failed to send message %d", i) } } for i := 0; i < numMessages; i++ { req := <-serverChannel.ch if req.Message.Commit != uint64(i) { t.Errorf("messages out of order: got %d while expecting %d", req.Message.Commit, i) } } }