// AddNodeWithoutGossip registers a node with the cluster. Nodes must // be added before they can be used in other methods of // raftTransportTestContext. Unless you are testing the effects of // delaying gossip, use AddNode instead. func (rttc *raftTransportTestContext) AddNodeWithoutGossip( nodeID roachpb.NodeID, ) (*storage.RaftTransport, net.Addr) { grpcServer := rpc.NewServer(rttc.nodeRPCContext) ln, err := netutil.ListenAndServeGRPC(rttc.stopper, grpcServer, util.TestAddr) if err != nil { rttc.t.Fatal(err) } transport := storage.NewRaftTransport(storage.GossipAddressResolver(rttc.gossip), grpcServer, rttc.nodeRPCContext) rttc.transports[nodeID] = transport return transport, ln.Addr() }
// NewServer creates a Server from a server.Context. func NewServer(ctx *Context, stopper *stop.Stopper) (*Server, error) { if ctx == nil { return nil, util.Errorf("ctx must not be null") } if _, err := net.ResolveTCPAddr("tcp", ctx.Addr); err != nil { return nil, util.Errorf("unable to resolve RPC address %q: %v", ctx.Addr, err) } if ctx.Insecure { log.Warning("running in insecure mode, this is strongly discouraged. See --insecure and --certs.") } // Try loading the TLS configs before anything else. if _, err := ctx.GetServerTLSConfig(); err != nil { return nil, err } if _, err := ctx.GetClientTLSConfig(); err != nil { return nil, err } s := &Server{ Tracer: tracing.NewTracer(), ctx: ctx, mux: http.NewServeMux(), clock: hlc.NewClock(hlc.UnixNano), stopper: stopper, } s.clock.SetMaxOffset(ctx.MaxOffset) s.rpcContext = crpc.NewContext(&ctx.Context, s.clock, stopper) stopper.RunWorker(func() { s.rpcContext.RemoteClocks.MonitorRemoteOffsets(stopper) }) s.rpc = crpc.NewServer(s.rpcContext) s.gossip = gossip.New(s.rpcContext, s.ctx.GossipBootstrapResolvers, stopper) s.storePool = storage.NewStorePool(s.gossip, s.clock, ctx.TimeUntilStoreDead, stopper) feed := util.NewFeed(stopper) // A custom RetryOptions is created which uses stopper.ShouldDrain() as // the Closer. This prevents infinite retry loops from occurring during // graceful server shutdown // // Such a loop loop occurs with the DistSender attempts a connection to the // local server during shutdown, and receives an internal server error (HTTP // Code 5xx). This is the correct error for a server to return when it is // shutting down, and is normally retryable in a cluster environment. // However, on a single-node setup (such as a test), retries will never // succeed because the only server has been shut down; thus, thus the // DistSender needs to know that it should not retry in this situation. retryOpts := kv.GetDefaultDistSenderRetryOptions() retryOpts.Closer = stopper.ShouldDrain() ds := kv.NewDistSender(&kv.DistSenderContext{ Clock: s.clock, RPCContext: s.rpcContext, RPCRetryOptions: &retryOpts, }, s.gossip) txnRegistry := metric.NewRegistry() txnMetrics := kv.NewTxnMetrics(txnRegistry) sender := kv.NewTxnCoordSender(ds, s.clock, ctx.Linearizable, s.Tracer, s.stopper, txnMetrics) s.db = client.NewDB(sender) s.grpc = grpc.NewServer() s.raftTransport = storage.NewRaftTransport(storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext) s.kvDB = kv.NewDBServer(&s.ctx.Context, sender, stopper) if err := s.kvDB.RegisterRPC(s.rpc); err != nil { return nil, err } s.leaseMgr = sql.NewLeaseManager(0, *s.db, s.clock) s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip) sqlRegistry := metric.NewRegistry() s.sqlExecutor = sql.NewExecutor(*s.db, s.gossip, s.leaseMgr, s.stopper, sqlRegistry) s.pgServer = pgwire.MakeServer(&s.ctx.Context, s.sqlExecutor, sqlRegistry) // TODO(bdarnell): make StoreConfig configurable. nCtx := storage.StoreContext{ Clock: s.clock, DB: s.db, Gossip: s.gossip, Transport: s.raftTransport, ScanInterval: s.ctx.ScanInterval, ScanMaxIdleTime: s.ctx.ScanMaxIdleTime, EventFeed: feed, Tracer: s.Tracer, StorePool: s.storePool, SQLExecutor: sql.InternalExecutor{ LeaseManager: s.leaseMgr, }, LogRangeEvents: true, AllocatorOptions: storage.AllocatorOptions{ AllowRebalance: true, Mode: s.ctx.BalanceMode, }, } s.recorder = status.NewMetricsRecorder(s.clock) s.recorder.AddNodeRegistry("sql.%s", sqlRegistry) s.recorder.AddNodeRegistry("txn.%s", txnRegistry) s.node = NewNode(nCtx, s.recorder, s.stopper, txnMetrics) s.admin = newAdminServer(s.db, s.stopper, s.sqlExecutor) s.tsDB = ts.NewDB(s.db) s.tsServer = ts.NewServer(s.tsDB) s.status = newStatusServer(s.db, s.gossip, s.recorder, s.ctx) return s, nil }
func TestSendAndReceive(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() nodeRPCContext := rpc.NewContext(testutils.NewNodeTestBaseContext(), nil, stopper) g := gossip.New(nodeRPCContext, nil, stopper) g.SetNodeID(roachpb.NodeID(1)) // Create several servers, each of which has two stores (A raft // node ID addresses a store). Node 1 has stores 1 and 2, node 2 has // stores 3 and 4, etc. // // We suppose that range 1 is replicated across the odd-numbered // stores in reverse order to ensure that the various IDs are not // equal: replica 1 is store 5, replica 2 is store 3, and replica 3 // is store 1. const numNodes = 3 const storesPerNode = 2 nextNodeID := roachpb.NodeID(2) nextStoreID := roachpb.StoreID(2) // Per-node state. transports := map[roachpb.NodeID]*storage.RaftTransport{} // Per-store state. storeNodes := map[roachpb.StoreID]roachpb.NodeID{} channels := map[roachpb.StoreID]channelServer{} replicaIDs := map[roachpb.StoreID]roachpb.ReplicaID{ 1: 3, 3: 2, 5: 1, } messageTypes := []raftpb.MessageType{ raftpb.MsgSnap, raftpb.MsgHeartbeat, } for nodeIndex := 0; nodeIndex < numNodes; nodeIndex++ { nodeID := nextNodeID nextNodeID++ grpcServer := rpc.NewServer(nodeRPCContext) ln, err := util.ListenAndServeGRPC(stopper, grpcServer, util.TestAddr) if err != nil { t.Fatal(err) } addr := ln.Addr() // Have to call g.SetNodeID before call g.AddInfo. g.ResetNodeID(roachpb.NodeID(nodeID)) if err := g.AddInfoProto(gossip.MakeNodeIDKey(nodeID), &roachpb.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { t.Fatal(err) } transport := storage.NewRaftTransport(storage.GossipAddressResolver(g), grpcServer, nodeRPCContext) transports[nodeID] = transport for storeIndex := 0; storeIndex < storesPerNode; storeIndex++ { storeID := nextStoreID nextStoreID++ storeNodes[storeID] = nodeID channel := newChannelServer(numNodes*storesPerNode*len(messageTypes), 0) transport.Listen(storeID, channel.RaftMessage) channels[storeID] = channel } } messageTypeCounts := make(map[roachpb.StoreID]map[raftpb.MessageType]int) // Each store sends one snapshot and one heartbeat to each store, including // itself. for toStoreID, toNodeID := range storeNodes { if _, ok := messageTypeCounts[toStoreID]; !ok { messageTypeCounts[toStoreID] = make(map[raftpb.MessageType]int) } for fromStoreID, fromNodeID := range storeNodes { baseReq := storage.RaftMessageRequest{ Message: raftpb.Message{ From: uint64(fromStoreID), To: uint64(toStoreID), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: fromNodeID, StoreID: fromStoreID, }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: toNodeID, StoreID: toStoreID, }, } for _, messageType := range messageTypes { req := baseReq req.Message.Type = messageType if err := transports[fromNodeID].Send(&req); err != nil { t.Errorf("unable to send %s from %d to %d: %s", req.Message.Type, fromNodeID, toNodeID, err) } messageTypeCounts[toStoreID][req.Message.Type]++ } } } // Read all the messages from the channels. Note that the transport // does not guarantee in-order delivery between independent // transports, so we just verify that the right number of messages // end up in each channel. for toStoreID := range storeNodes { func() { for len(messageTypeCounts[toStoreID]) > 0 { req := <-channels[toStoreID].ch if req.Message.To != uint64(toStoreID) { t.Errorf("got unexpected message %v on channel %d", req, toStoreID) } if typeCounts, ok := messageTypeCounts[toStoreID]; ok { if _, ok := typeCounts[req.Message.Type]; ok { typeCounts[req.Message.Type]-- if typeCounts[req.Message.Type] == 0 { delete(typeCounts, req.Message.Type) } } else { t.Errorf("expected %v to have key %v, but it did not", typeCounts, req.Message.Type) } } else { t.Errorf("expected %v to have key %v, but it did not", messageTypeCounts, toStoreID) } } delete(messageTypeCounts, toStoreID) }() select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %v on channel %d", req, toStoreID) case <-time.After(100 * time.Millisecond): } } if len(messageTypeCounts) > 0 { t.Errorf("remaining messages expected: %v", messageTypeCounts) } // Real raft messages have different node/store/replica IDs. // Send a message from replica 2 (on store 3, node 2) to replica 1 (on store 5, node 3) fromStoreID := roachpb.StoreID(3) toStoreID := roachpb.StoreID(5) expReq := &storage.RaftMessageRequest{ GroupID: 1, Message: raftpb.Message{ Type: raftpb.MsgApp, From: uint64(replicaIDs[fromStoreID]), To: uint64(replicaIDs[toStoreID]), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[fromStoreID], StoreID: fromStoreID, ReplicaID: replicaIDs[fromStoreID], }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[toStoreID], StoreID: toStoreID, ReplicaID: replicaIDs[toStoreID], }, } if err := transports[storeNodes[fromStoreID]].Send(expReq); err != nil { t.Errorf("unable to send message from %d to %d: %s", fromStoreID, toStoreID, err) } if req := <-channels[toStoreID].ch; !proto.Equal(req, expReq) { t.Errorf("got unexpected message %v on channel %d", req, toStoreID) } select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %v on channel %d", req, toStoreID) default: } }
// TestInOrderDelivery verifies that for a given pair of nodes, raft // messages are delivered in order. func TestInOrderDelivery(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() nodeRPCContext := rpc.NewContext(testutils.NewNodeTestBaseContext(), nil, stopper) g := gossip.New(nodeRPCContext, nil, stopper) grpcServer := rpc.NewServer(nodeRPCContext) ln, err := util.ListenAndServeGRPC(stopper, grpcServer, util.TestAddr) if err != nil { t.Fatal(err) } const numMessages = 100 nodeID := roachpb.NodeID(roachpb.NodeID(2)) serverTransport := storage.NewRaftTransport(storage.GossipAddressResolver(g), grpcServer, nodeRPCContext) serverChannel := newChannelServer(numMessages, 10*time.Millisecond) serverTransport.Listen(roachpb.StoreID(nodeID), serverChannel.RaftMessage) addr := ln.Addr() // Have to set gossip.NodeID before call gossip.AddInofXXX. g.SetNodeID(nodeID) if err := g.AddInfoProto(gossip.MakeNodeIDKey(nodeID), &roachpb.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { t.Fatal(err) } clientNodeID := roachpb.NodeID(2) clientTransport := storage.NewRaftTransport(storage.GossipAddressResolver(g), nil, nodeRPCContext) for i := 0; i < numMessages; i++ { req := &storage.RaftMessageRequest{ GroupID: 1, Message: raftpb.Message{ To: uint64(nodeID), From: uint64(clientNodeID), Commit: uint64(i), }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: nodeID, StoreID: roachpb.StoreID(nodeID), ReplicaID: roachpb.ReplicaID(nodeID), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: clientNodeID, StoreID: roachpb.StoreID(clientNodeID), ReplicaID: roachpb.ReplicaID(clientNodeID), }, } if err := clientTransport.Send(req); err != nil { t.Errorf("failed to send message %d: %s", i, err) } } for i := 0; i < numMessages; i++ { req := <-serverChannel.ch if req.Message.Commit != uint64(i) { t.Errorf("messages out of order: got %d while expecting %d", req.Message.Commit, i) } } }
// NewServer creates a Server from a server.Context. func NewServer(srvCtx Context, stopper *stop.Stopper) (*Server, error) { if _, err := net.ResolveTCPAddr("tcp", srvCtx.Addr); err != nil { return nil, errors.Errorf("unable to resolve RPC address %q: %v", srvCtx.Addr, err) } if srvCtx.Ctx == nil { srvCtx.Ctx = context.Background() } if srvCtx.Ctx.Done() != nil { panic("context with cancel or deadline") } if tracing.TracerFromCtx(srvCtx.Ctx) == nil { // TODO(radu): instead of modifying srvCtx.Ctx, we should have a separate // context.Context inside Server. We will need to rename server.Context // though. srvCtx.Ctx = tracing.WithTracer(srvCtx.Ctx, tracing.NewTracer()) } if srvCtx.Insecure { log.Warning(srvCtx.Ctx, "running in insecure mode, this is strongly discouraged. See --insecure.") } // Try loading the TLS configs before anything else. if _, err := srvCtx.GetServerTLSConfig(); err != nil { return nil, err } if _, err := srvCtx.GetClientTLSConfig(); err != nil { return nil, err } s := &Server{ mux: http.NewServeMux(), clock: hlc.NewClock(hlc.UnixNano), stopper: stopper, } // Add a dynamic log tag value for the node ID. // // We need to pass the server's Ctx as a base context for the various server // components, but we won't know the node ID until we Start(). At that point // it's too late to change the contexts in the components (various background // processes will have already started using the contexts). // // The dynamic value allows us to add the log tag to the context now and // update the value asynchronously. It's not significantly more expensive than // a regular tag since it's just doing an (atomic) load when a log/trace // message is constructed. s.nodeLogTagVal.Set(log.DynamicIntValueUnknown) srvCtx.Ctx = log.WithLogTag(srvCtx.Ctx, "n", &s.nodeLogTagVal) s.ctx = srvCtx s.clock.SetMaxOffset(srvCtx.MaxOffset) s.rpcContext = rpc.NewContext(srvCtx.Context, s.clock, s.stopper) s.rpcContext.HeartbeatCB = func() { if err := s.rpcContext.RemoteClocks.VerifyClockOffset(); err != nil { log.Fatal(s.Ctx(), err) } } s.grpc = rpc.NewServer(s.rpcContext) s.registry = metric.NewRegistry() s.gossip = gossip.New( s.Ctx(), s.rpcContext, s.grpc, s.ctx.GossipBootstrapResolvers, s.stopper, s.registry) s.storePool = storage.NewStorePool( s.gossip, s.clock, s.rpcContext, srvCtx.ReservationsEnabled, srvCtx.TimeUntilStoreDead, s.stopper, ) // A custom RetryOptions is created which uses stopper.ShouldQuiesce() as // the Closer. This prevents infinite retry loops from occurring during // graceful server shutdown // // Such a loop loop occurs with the DistSender attempts a connection to the // local server during shutdown, and receives an internal server error (HTTP // Code 5xx). This is the correct error for a server to return when it is // shutting down, and is normally retryable in a cluster environment. // However, on a single-node setup (such as a test), retries will never // succeed because the only server has been shut down; thus, thus the // DistSender needs to know that it should not retry in this situation. retryOpts := base.DefaultRetryOptions() retryOpts.Closer = s.stopper.ShouldQuiesce() distSenderCfg := kv.DistSenderConfig{ Ctx: s.Ctx(), Clock: s.clock, RPCContext: s.rpcContext, RPCRetryOptions: &retryOpts, } s.distSender = kv.NewDistSender(&distSenderCfg, s.gossip) txnMetrics := kv.MakeTxnMetrics() s.registry.AddMetricStruct(txnMetrics) s.txnCoordSender = kv.NewTxnCoordSender(s.Ctx(), s.distSender, s.clock, srvCtx.Linearizable, s.stopper, txnMetrics) s.db = client.NewDB(s.txnCoordSender) s.raftTransport = storage.NewRaftTransport(storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext) s.kvDB = kv.NewDBServer(s.ctx.Context, s.txnCoordSender, s.stopper) roachpb.RegisterExternalServer(s.grpc, s.kvDB) // Set up Lease Manager var lmKnobs sql.LeaseManagerTestingKnobs if srvCtx.TestingKnobs.SQLLeaseManager != nil { lmKnobs = *srvCtx.TestingKnobs.SQLLeaseManager.(*sql.LeaseManagerTestingKnobs) } s.leaseMgr = sql.NewLeaseManager(0, *s.db, s.clock, lmKnobs, s.stopper) s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip) // Set up the DistSQL server distSQLCfg := distsql.ServerConfig{ Context: s.Ctx(), DB: s.db, RPCContext: s.rpcContext, } s.distSQLServer = distsql.NewServer(distSQLCfg) distsql.RegisterDistSQLServer(s.grpc, s.distSQLServer) // Set up Executor execCfg := sql.ExecutorConfig{ Context: s.Ctx(), DB: s.db, Gossip: s.gossip, LeaseManager: s.leaseMgr, Clock: s.clock, DistSQLSrv: s.distSQLServer, } if srvCtx.TestingKnobs.SQLExecutor != nil { execCfg.TestingKnobs = srvCtx.TestingKnobs.SQLExecutor.(*sql.ExecutorTestingKnobs) } else { execCfg.TestingKnobs = &sql.ExecutorTestingKnobs{} } s.sqlExecutor = sql.NewExecutor(execCfg, s.stopper) s.registry.AddMetricStruct(s.sqlExecutor) s.pgServer = pgwire.MakeServer(s.ctx.Context, s.sqlExecutor) s.registry.AddMetricStruct(s.pgServer.Metrics()) // TODO(bdarnell): make StoreConfig configurable. nCtx := storage.StoreContext{ Ctx: s.Ctx(), Clock: s.clock, DB: s.db, Gossip: s.gossip, Transport: s.raftTransport, RaftTickInterval: s.ctx.RaftTickInterval, ScanInterval: s.ctx.ScanInterval, ScanMaxIdleTime: s.ctx.ScanMaxIdleTime, ConsistencyCheckInterval: s.ctx.ConsistencyCheckInterval, ConsistencyCheckPanicOnFailure: s.ctx.ConsistencyCheckPanicOnFailure, StorePool: s.storePool, SQLExecutor: sql.InternalExecutor{ LeaseManager: s.leaseMgr, }, LogRangeEvents: true, AllocatorOptions: storage.AllocatorOptions{ AllowRebalance: true, }, } if srvCtx.TestingKnobs.Store != nil { nCtx.TestingKnobs = *srvCtx.TestingKnobs.Store.(*storage.StoreTestingKnobs) } s.recorder = status.NewMetricsRecorder(s.clock) s.registry.AddMetricStruct(s.rpcContext.RemoteClocks.Metrics()) s.runtime = status.MakeRuntimeStatSampler(s.clock) s.registry.AddMetricStruct(s.runtime) s.node = NewNode(nCtx, s.recorder, s.registry, s.stopper, txnMetrics, sql.MakeEventLogger(s.leaseMgr)) roachpb.RegisterInternalServer(s.grpc, s.node) storage.RegisterStoresServer(s.grpc, s.node.storesServer) s.tsDB = ts.NewDB(s.db) s.tsServer = ts.MakeServer(s.tsDB) s.admin = makeAdminServer(s) s.status = newStatusServer(s.db, s.gossip, s.recorder, s.ctx.Context, s.rpcContext, s.node.stores) for _, gw := range []grpcGatewayServer{&s.admin, s.status, &s.tsServer} { gw.RegisterService(s.grpc) } return s, nil }
func TestSendAndReceive(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() nodeRPCContext := rpc.NewContext(testutils.NewNodeTestBaseContext(), hlc.NewClock(hlc.UnixNano), stopper) g := gossip.New(nodeRPCContext, gossip.TestBootstrap, stopper) g.SetNodeID(roachpb.NodeID(1)) // Create several servers, each of which has two stores (A raft // node ID addresses a store). Node 1 has stores 1 and 2, node 2 has // stores 3 and 4, etc. // // We suppose that range 1 is replicated across the odd-numbered // stores in reverse order to ensure that the various IDs are not // equal: replica 1 is store 5, replica 2 is store 3, and replica 3 // is store 1. const numServers = 3 const storesPerServer = 2 const numStores = numServers * storesPerServer nextNodeID := roachpb.NodeID(2) nextStoreID := roachpb.StoreID(2) // Per-node state. transports := map[roachpb.NodeID]*storage.RaftTransport{} // Per-store state. storeNodes := map[roachpb.StoreID]roachpb.NodeID{} channels := map[roachpb.StoreID]channelServer{} replicaIDs := map[roachpb.StoreID]roachpb.ReplicaID{ 1: 3, 3: 2, 5: 1, } for serverIndex := 0; serverIndex < numServers; serverIndex++ { nodeID := nextNodeID nextNodeID++ rpcServer := rpc.NewServer(nodeRPCContext) grpcServer := grpc.NewServer() tlsConfig, err := nodeRPCContext.GetServerTLSConfig() if err != nil { t.Fatal(err) } ln, err := util.ListenAndServe(stopper, grpcutil.GRPCHandlerFunc(grpcServer, rpcServer), util.CreateTestAddr("tcp"), tlsConfig) if err != nil { t.Fatal(err) } addr := ln.Addr() // Have to call g.SetNodeID before call g.AddInfo. g.ResetNodeID(roachpb.NodeID(nodeID)) if err := g.AddInfoProto(gossip.MakeNodeIDKey(nodeID), &roachpb.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { t.Fatal(err) } transport := storage.NewRaftTransport(storage.GossipAddressResolver(g), grpcServer, nodeRPCContext) transports[nodeID] = transport for store := 0; store < storesPerServer; store++ { storeID := nextStoreID nextStoreID++ storeNodes[storeID] = nodeID channel := newChannelServer(10, 0) transport.Listen(storeID, channel.RaftMessage) channels[storeID] = channel } } // Heartbeat messages: Each store sends one message to each store. for fromStoreID, fromNodeID := range storeNodes { for toStoreID, toNodeID := range storeNodes { req := &storage.RaftMessageRequest{ GroupID: 0, Message: raftpb.Message{ Type: raftpb.MsgHeartbeat, From: uint64(fromStoreID), To: uint64(toStoreID), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: fromNodeID, StoreID: fromStoreID, ReplicaID: 0, }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: toNodeID, StoreID: toStoreID, ReplicaID: 0, }, } if err := transports[fromNodeID].Send(req); err != nil { t.Errorf("Unable to send message from %d to %d: %s", fromNodeID, toNodeID, err) } } } // Read all the messages from the channels. Note that the transport // does not guarantee in-order delivery between independent // transports, so we just verify that the right number of messages // end up in each channel. for toStoreID := range storeNodes { for range storeNodes { select { case req := <-channels[toStoreID].ch: if req.Message.To != uint64(toStoreID) { t.Errorf("invalid message received on channel %d: %+v", toStoreID, req) } case <-time.After(5 * time.Second): t.Fatal("timed out waiting for message") } } select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %+v on channel %d", req, toStoreID) default: } } // Real raft messages have different node/store/replica IDs. // Send a message from replica 2 (on store 3, node 2) to replica 1 (on store 5, node 3) fromStoreID := roachpb.StoreID(3) toStoreID := roachpb.StoreID(5) req := &storage.RaftMessageRequest{ GroupID: 1, Message: raftpb.Message{ Type: raftpb.MsgApp, From: uint64(replicaIDs[fromStoreID]), To: uint64(replicaIDs[toStoreID]), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[fromStoreID], StoreID: fromStoreID, ReplicaID: replicaIDs[fromStoreID], }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[toStoreID], StoreID: toStoreID, ReplicaID: replicaIDs[toStoreID], }, } if err := transports[storeNodes[fromStoreID]].Send(req); err != nil { t.Errorf("Unable to send message from %d to %d: %s", fromStoreID, toStoreID, err) } select { case req2 := <-channels[toStoreID].ch: if !reflect.DeepEqual(req, req2) { t.Errorf("got unexpected message %+v", req2) } case <-time.After(5 * time.Second): t.Fatal("timed out waiting for message") } select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %+v on channel %d", req, toStoreID) default: } }
// NewServer creates a Server from a server.Context. func NewServer(ctx Context, stopper *stop.Stopper) (*Server, error) { if _, err := net.ResolveTCPAddr("tcp", ctx.Addr); err != nil { return nil, errors.Errorf("unable to resolve RPC address %q: %v", ctx.Addr, err) } if ctx.Insecure { log.Warning(context.TODO(), "running in insecure mode, this is strongly discouraged. See --insecure.") } // Try loading the TLS configs before anything else. if _, err := ctx.GetServerTLSConfig(); err != nil { return nil, err } if _, err := ctx.GetClientTLSConfig(); err != nil { return nil, err } s := &Server{ Tracer: tracing.NewTracer(), ctx: ctx, mux: http.NewServeMux(), clock: hlc.NewClock(hlc.UnixNano), stopper: stopper, } s.clock.SetMaxOffset(ctx.MaxOffset) s.rpcContext = rpc.NewContext(ctx.Context, s.clock, s.stopper) s.rpcContext.HeartbeatCB = func() { if err := s.rpcContext.RemoteClocks.VerifyClockOffset(); err != nil { log.Fatal(context.TODO(), err) } } s.grpc = rpc.NewServer(s.rpcContext) s.registry = metric.NewRegistry() s.gossip = gossip.New(s.rpcContext, s.grpc, s.ctx.GossipBootstrapResolvers, s.stopper, s.registry) s.storePool = storage.NewStorePool( s.gossip, s.clock, s.rpcContext, ctx.ReservationsEnabled, ctx.TimeUntilStoreDead, s.stopper, ) // A custom RetryOptions is created which uses stopper.ShouldQuiesce() as // the Closer. This prevents infinite retry loops from occurring during // graceful server shutdown // // Such a loop loop occurs with the DistSender attempts a connection to the // local server during shutdown, and receives an internal server error (HTTP // Code 5xx). This is the correct error for a server to return when it is // shutting down, and is normally retryable in a cluster environment. // However, on a single-node setup (such as a test), retries will never // succeed because the only server has been shut down; thus, thus the // DistSender needs to know that it should not retry in this situation. retryOpts := base.DefaultRetryOptions() retryOpts.Closer = s.stopper.ShouldQuiesce() s.distSender = kv.NewDistSender(&kv.DistSenderContext{ Clock: s.clock, RPCContext: s.rpcContext, RPCRetryOptions: &retryOpts, }, s.gossip) txnMetrics := kv.NewTxnMetrics(s.registry) sender := kv.NewTxnCoordSender(s.distSender, s.clock, ctx.Linearizable, s.Tracer, s.stopper, txnMetrics) s.db = client.NewDB(sender) s.raftTransport = storage.NewRaftTransport(storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext) s.kvDB = kv.NewDBServer(s.ctx.Context, sender, s.stopper) roachpb.RegisterExternalServer(s.grpc, s.kvDB) // Set up Lease Manager var lmKnobs sql.LeaseManagerTestingKnobs if ctx.TestingKnobs.SQLLeaseManager != nil { lmKnobs = *ctx.TestingKnobs.SQLLeaseManager.(*sql.LeaseManagerTestingKnobs) } s.leaseMgr = sql.NewLeaseManager(0, *s.db, s.clock, lmKnobs, s.stopper) s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip) // Set up the DistSQL server distSQLCtx := distsql.ServerContext{ Context: context.Background(), DB: s.db, RPCContext: s.rpcContext, } s.distSQLServer = distsql.NewServer(distSQLCtx) distsql.RegisterDistSQLServer(s.grpc, s.distSQLServer) // Set up Executor eCtx := sql.ExecutorContext{ Context: context.Background(), DB: s.db, Gossip: s.gossip, LeaseManager: s.leaseMgr, Clock: s.clock, DistSQLSrv: s.distSQLServer, } if ctx.TestingKnobs.SQLExecutor != nil { eCtx.TestingKnobs = ctx.TestingKnobs.SQLExecutor.(*sql.ExecutorTestingKnobs) } else { eCtx.TestingKnobs = &sql.ExecutorTestingKnobs{} } s.sqlExecutor = sql.NewExecutor(eCtx, s.stopper, s.registry) s.pgServer = pgwire.MakeServer(s.ctx.Context, s.sqlExecutor, s.registry) // TODO(bdarnell): make StoreConfig configurable. nCtx := storage.StoreContext{ Clock: s.clock, DB: s.db, Gossip: s.gossip, Transport: s.raftTransport, RaftTickInterval: s.ctx.RaftTickInterval, ScanInterval: s.ctx.ScanInterval, ScanMaxIdleTime: s.ctx.ScanMaxIdleTime, ConsistencyCheckInterval: s.ctx.ConsistencyCheckInterval, ConsistencyCheckPanicOnFailure: s.ctx.ConsistencyCheckPanicOnFailure, Tracer: s.Tracer, StorePool: s.storePool, SQLExecutor: sql.InternalExecutor{ LeaseManager: s.leaseMgr, }, LogRangeEvents: true, AllocatorOptions: storage.AllocatorOptions{ AllowRebalance: true, }, } if ctx.TestingKnobs.Store != nil { nCtx.TestingKnobs = *ctx.TestingKnobs.Store.(*storage.StoreTestingKnobs) } s.recorder = status.NewMetricsRecorder(s.clock) s.rpcContext.RemoteClocks.RegisterMetrics(s.registry) s.runtime = status.MakeRuntimeStatSampler(s.clock, s.registry) s.node = NewNode(nCtx, s.recorder, s.registry, s.stopper, txnMetrics, sql.MakeEventLogger(s.leaseMgr)) roachpb.RegisterInternalServer(s.grpc, s.node) roachpb.RegisterInternalStoresServer(s.grpc, s.node.InternalStoresServer) s.tsDB = ts.NewDB(s.db) s.tsServer = ts.MakeServer(s.tsDB) s.admin = makeAdminServer(s) s.status = newStatusServer(s.db, s.gossip, s.recorder, s.ctx.Context, s.rpcContext, s.node.stores) for _, gw := range []grpcGatewayServer{&s.admin, s.status, &s.tsServer} { gw.RegisterService(s.grpc) } return s, nil }