// TestSendToOneClient verifies that Send correctly sends a request // to one server using the heartbeat RPC. func TestSendToOneClient(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() ctx := newNodeTestContext(nil, stopper) s, ln := newTestServer(t, ctx) roachpb.RegisterInternalServer(s, Node(0)) sp := tracing.NewTracer().StartSpan("node test") defer sp.Finish() opts := SendOptions{ Ordering: orderStable, SendNextTimeout: 1 * time.Second, Timeout: 10 * time.Second, Trace: sp, } reply, err := sendBatch(opts, []net.Addr{ln.Addr()}, ctx) if err != nil { t.Fatal(err) } if reply == nil { t.Errorf("expected reply") } }
// TestRetryableError verifies that Send returns a retryable error // when it hits an RPC error. func TestRetryableError(t *testing.T) { defer leaktest.AfterTest(t)() clientStopper := stop.NewStopper() defer clientStopper.Stop() clientContext := newNodeTestContext(nil, clientStopper) serverStopper := stop.NewStopper() serverContext := newNodeTestContext(nil, serverStopper) s, ln := newTestServer(t, serverContext) roachpb.RegisterInternalServer(s, Node(0)) conn, err := clientContext.GRPCDial(ln.Addr().String()) if err != nil { t.Fatal(err) } ctx := context.Background() waitForConnState := func(desiredState grpc.ConnectivityState) { clientState, err := conn.State() for clientState != desiredState { if err != nil { t.Fatal(err) } if clientState == grpc.Shutdown { t.Fatalf("%v has unexpectedly shut down", conn) } clientState, err = conn.WaitForStateChange(ctx, clientState) } } // Wait until the client becomes healthy and shut down the server. waitForConnState(grpc.Ready) serverStopper.Stop() // Wait until the client becomes unhealthy. waitForConnState(grpc.TransientFailure) sp := tracing.NewTracer().StartSpan("node test") defer sp.Finish() opts := SendOptions{ Ordering: orderStable, SendNextTimeout: 100 * time.Millisecond, Timeout: 100 * time.Millisecond, Trace: sp, } if _, err := sendBatch(opts, []net.Addr{ln.Addr()}, clientContext); err != nil { retryErr, ok := err.(retry.Retryable) if !ok { t.Fatalf("Unexpected error type: %v", err) } if !retryErr.CanRetry() { t.Errorf("Expected retryable error: %v", retryErr) } } else { t.Fatalf("Unexpected success") } }
// createTestNode creates an rpc server using the specified address, // gossip instance, KV database and a node using the specified slice // of engines. The server, clock and node are returned. If gossipBS is // not nil, the gossip bootstrap address is set to gossipBS. func createTestNode(addr net.Addr, engines []engine.Engine, gossipBS net.Addr, t *testing.T) ( *grpc.Server, net.Addr, *hlc.Clock, *Node, *stop.Stopper) { ctx := storage.StoreContext{} stopper := stop.NewStopper() ctx.Clock = hlc.NewClock(hlc.UnixNano) nodeRPCContext := rpc.NewContext(nodeTestBaseContext, ctx.Clock, stopper) ctx.ScanInterval = 10 * time.Hour ctx.ConsistencyCheckInterval = 10 * time.Hour grpcServer := rpc.NewServer(nodeRPCContext) serverCtx := makeTestContext() g := gossip.New( context.Background(), nodeRPCContext, grpcServer, serverCtx.GossipBootstrapResolvers, stopper, metric.NewRegistry()) ln, err := netutil.ListenAndServeGRPC(stopper, grpcServer, addr) if err != nil { t.Fatal(err) } if gossipBS != nil { // Handle possibility of a :0 port specification. if gossipBS.Network() == addr.Network() && gossipBS.String() == addr.String() { gossipBS = ln.Addr() } r, err := resolver.NewResolverFromAddress(gossipBS) if err != nil { t.Fatalf("bad gossip address %s: %s", gossipBS, err) } g.SetResolvers([]resolver.Resolver{r}) g.Start(ln.Addr()) } ctx.Gossip = g retryOpts := base.DefaultRetryOptions() retryOpts.Closer = stopper.ShouldQuiesce() distSender := kv.NewDistSender(&kv.DistSenderConfig{ Clock: ctx.Clock, RPCContext: nodeRPCContext, RPCRetryOptions: &retryOpts, }, g) ctx.Ctx = tracing.WithTracer(context.Background(), tracing.NewTracer()) sender := kv.NewTxnCoordSender(ctx.Ctx, distSender, ctx.Clock, false, stopper, kv.MakeTxnMetrics()) ctx.DB = client.NewDB(sender) ctx.Transport = storage.NewDummyRaftTransport() node := NewNode(ctx, status.NewMetricsRecorder(ctx.Clock), metric.NewRegistry(), stopper, kv.MakeTxnMetrics(), sql.MakeEventLogger(nil)) roachpb.RegisterInternalServer(grpcServer, node) return grpcServer, ln.Addr(), ctx.Clock, node, stopper }
// NewServer creates a Server from a server.Context. func NewServer(ctx *Context, stopper *stop.Stopper) (*Server, error) { if ctx == nil { return nil, util.Errorf("ctx must not be null") } if _, err := net.ResolveTCPAddr("tcp", ctx.Addr); err != nil { return nil, util.Errorf("unable to resolve RPC address %q: %v", ctx.Addr, err) } if ctx.Insecure { log.Warning("running in insecure mode, this is strongly discouraged. See --insecure.") } // Try loading the TLS configs before anything else. if _, err := ctx.GetServerTLSConfig(); err != nil { return nil, err } if _, err := ctx.GetClientTLSConfig(); err != nil { return nil, err } s := &Server{ Tracer: tracing.NewTracer(), ctx: ctx, mux: http.NewServeMux(), clock: hlc.NewClock(hlc.UnixNano), stopper: stopper, } s.clock.SetMaxOffset(ctx.MaxOffset) s.rpcContext = rpc.NewContext(&ctx.Context, s.clock, stopper) s.rpcContext.HeartbeatCB = func() { if err := s.rpcContext.RemoteClocks.VerifyClockOffset(); err != nil { log.Fatal(err) } } s.gossip = gossip.New(s.rpcContext, s.ctx.GossipBootstrapResolvers, stopper) s.storePool = storage.NewStorePool(s.gossip, s.clock, ctx.TimeUntilStoreDead, stopper) // A custom RetryOptions is created which uses stopper.ShouldDrain() as // the Closer. This prevents infinite retry loops from occurring during // graceful server shutdown // // Such a loop loop occurs with the DistSender attempts a connection to the // local server during shutdown, and receives an internal server error (HTTP // Code 5xx). This is the correct error for a server to return when it is // shutting down, and is normally retryable in a cluster environment. // However, on a single-node setup (such as a test), retries will never // succeed because the only server has been shut down; thus, thus the // DistSender needs to know that it should not retry in this situation. retryOpts := kv.GetDefaultDistSenderRetryOptions() retryOpts.Closer = stopper.ShouldDrain() ds := kv.NewDistSender(&kv.DistSenderContext{ Clock: s.clock, RPCContext: s.rpcContext, RPCRetryOptions: &retryOpts, }, s.gossip) txnRegistry := metric.NewRegistry() txnMetrics := kv.NewTxnMetrics(txnRegistry) sender := kv.NewTxnCoordSender(ds, s.clock, ctx.Linearizable, s.Tracer, s.stopper, txnMetrics) s.db = client.NewDB(sender) s.grpc = rpc.NewServer(s.rpcContext) s.raftTransport = storage.NewRaftTransport(storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext) s.kvDB = kv.NewDBServer(&s.ctx.Context, sender, stopper) roachpb.RegisterExternalServer(s.grpc, s.kvDB) s.leaseMgr = sql.NewLeaseManager(0, *s.db, s.clock) s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip) eCtx := sql.ExecutorContext{ DB: s.db, Gossip: s.gossip, LeaseManager: s.leaseMgr, Clock: s.clock, TestingKnobs: &ctx.TestingKnobs.ExecutorTestingKnobs, } sqlRegistry := metric.NewRegistry() s.sqlExecutor = sql.NewExecutor(eCtx, s.stopper, sqlRegistry) s.pgServer = pgwire.MakeServer(&s.ctx.Context, s.sqlExecutor, sqlRegistry) // TODO(bdarnell): make StoreConfig configurable. nCtx := storage.StoreContext{ Clock: s.clock, DB: s.db, Gossip: s.gossip, Transport: s.raftTransport, ScanInterval: s.ctx.ScanInterval, ScanMaxIdleTime: s.ctx.ScanMaxIdleTime, ConsistencyCheckInterval: s.ctx.ConsistencyCheckInterval, ConsistencyCheckPanicOnFailure: s.ctx.ConsistencyCheckPanicOnFailure, Tracer: s.Tracer, StorePool: s.storePool, SQLExecutor: sql.InternalExecutor{ LeaseManager: s.leaseMgr, }, LogRangeEvents: true, AllocatorOptions: storage.AllocatorOptions{ AllowRebalance: true, }, TestingKnobs: ctx.TestingKnobs.StoreTestingKnobs, } s.recorder = status.NewMetricsRecorder(s.clock) s.recorder.AddNodeRegistry("sql.%s", sqlRegistry) s.recorder.AddNodeRegistry("txn.%s", txnRegistry) s.recorder.AddNodeRegistry("clock-offset.%s", s.rpcContext.RemoteClocks.Registry()) s.runtime = status.MakeRuntimeStatSampler(s.clock) s.recorder.AddNodeRegistry("sys.%s", s.runtime.Registry()) s.node = NewNode(nCtx, s.recorder, s.stopper, txnMetrics) roachpb.RegisterInternalServer(s.grpc, s.node) s.admin = newAdminServer(s.db, s.stopper, s.sqlExecutor, ds, s.node) s.tsDB = ts.NewDB(s.db) s.tsServer = ts.NewServer(s.tsDB) s.status = newStatusServer(s.db, s.gossip, s.recorder, s.ctx) return s, nil }
// NewServer creates a Server from a server.Context. func NewServer(srvCtx Context, stopper *stop.Stopper) (*Server, error) { if _, err := net.ResolveTCPAddr("tcp", srvCtx.Addr); err != nil { return nil, errors.Errorf("unable to resolve RPC address %q: %v", srvCtx.Addr, err) } if srvCtx.Ctx == nil { srvCtx.Ctx = context.Background() } if srvCtx.Ctx.Done() != nil { panic("context with cancel or deadline") } if tracing.TracerFromCtx(srvCtx.Ctx) == nil { // TODO(radu): instead of modifying srvCtx.Ctx, we should have a separate // context.Context inside Server. We will need to rename server.Context // though. srvCtx.Ctx = tracing.WithTracer(srvCtx.Ctx, tracing.NewTracer()) } if srvCtx.Insecure { log.Warning(srvCtx.Ctx, "running in insecure mode, this is strongly discouraged. See --insecure.") } // Try loading the TLS configs before anything else. if _, err := srvCtx.GetServerTLSConfig(); err != nil { return nil, err } if _, err := srvCtx.GetClientTLSConfig(); err != nil { return nil, err } s := &Server{ mux: http.NewServeMux(), clock: hlc.NewClock(hlc.UnixNano), stopper: stopper, } // Add a dynamic log tag value for the node ID. // // We need to pass the server's Ctx as a base context for the various server // components, but we won't know the node ID until we Start(). At that point // it's too late to change the contexts in the components (various background // processes will have already started using the contexts). // // The dynamic value allows us to add the log tag to the context now and // update the value asynchronously. It's not significantly more expensive than // a regular tag since it's just doing an (atomic) load when a log/trace // message is constructed. s.nodeLogTagVal.Set(log.DynamicIntValueUnknown) srvCtx.Ctx = log.WithLogTag(srvCtx.Ctx, "n", &s.nodeLogTagVal) s.ctx = srvCtx s.clock.SetMaxOffset(srvCtx.MaxOffset) s.rpcContext = rpc.NewContext(srvCtx.Context, s.clock, s.stopper) s.rpcContext.HeartbeatCB = func() { if err := s.rpcContext.RemoteClocks.VerifyClockOffset(); err != nil { log.Fatal(s.Ctx(), err) } } s.grpc = rpc.NewServer(s.rpcContext) s.registry = metric.NewRegistry() s.gossip = gossip.New( s.Ctx(), s.rpcContext, s.grpc, s.ctx.GossipBootstrapResolvers, s.stopper, s.registry) s.storePool = storage.NewStorePool( s.gossip, s.clock, s.rpcContext, srvCtx.ReservationsEnabled, srvCtx.TimeUntilStoreDead, s.stopper, ) // A custom RetryOptions is created which uses stopper.ShouldQuiesce() as // the Closer. This prevents infinite retry loops from occurring during // graceful server shutdown // // Such a loop loop occurs with the DistSender attempts a connection to the // local server during shutdown, and receives an internal server error (HTTP // Code 5xx). This is the correct error for a server to return when it is // shutting down, and is normally retryable in a cluster environment. // However, on a single-node setup (such as a test), retries will never // succeed because the only server has been shut down; thus, thus the // DistSender needs to know that it should not retry in this situation. retryOpts := base.DefaultRetryOptions() retryOpts.Closer = s.stopper.ShouldQuiesce() distSenderCfg := kv.DistSenderConfig{ Ctx: s.Ctx(), Clock: s.clock, RPCContext: s.rpcContext, RPCRetryOptions: &retryOpts, } s.distSender = kv.NewDistSender(&distSenderCfg, s.gossip) txnMetrics := kv.MakeTxnMetrics() s.registry.AddMetricStruct(txnMetrics) s.txnCoordSender = kv.NewTxnCoordSender(s.Ctx(), s.distSender, s.clock, srvCtx.Linearizable, s.stopper, txnMetrics) s.db = client.NewDB(s.txnCoordSender) s.raftTransport = storage.NewRaftTransport(storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext) s.kvDB = kv.NewDBServer(s.ctx.Context, s.txnCoordSender, s.stopper) roachpb.RegisterExternalServer(s.grpc, s.kvDB) // Set up Lease Manager var lmKnobs sql.LeaseManagerTestingKnobs if srvCtx.TestingKnobs.SQLLeaseManager != nil { lmKnobs = *srvCtx.TestingKnobs.SQLLeaseManager.(*sql.LeaseManagerTestingKnobs) } s.leaseMgr = sql.NewLeaseManager(0, *s.db, s.clock, lmKnobs, s.stopper) s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip) // Set up the DistSQL server distSQLCfg := distsql.ServerConfig{ Context: s.Ctx(), DB: s.db, RPCContext: s.rpcContext, } s.distSQLServer = distsql.NewServer(distSQLCfg) distsql.RegisterDistSQLServer(s.grpc, s.distSQLServer) // Set up Executor execCfg := sql.ExecutorConfig{ Context: s.Ctx(), DB: s.db, Gossip: s.gossip, LeaseManager: s.leaseMgr, Clock: s.clock, DistSQLSrv: s.distSQLServer, } if srvCtx.TestingKnobs.SQLExecutor != nil { execCfg.TestingKnobs = srvCtx.TestingKnobs.SQLExecutor.(*sql.ExecutorTestingKnobs) } else { execCfg.TestingKnobs = &sql.ExecutorTestingKnobs{} } s.sqlExecutor = sql.NewExecutor(execCfg, s.stopper) s.registry.AddMetricStruct(s.sqlExecutor) s.pgServer = pgwire.MakeServer(s.ctx.Context, s.sqlExecutor) s.registry.AddMetricStruct(s.pgServer.Metrics()) // TODO(bdarnell): make StoreConfig configurable. nCtx := storage.StoreContext{ Ctx: s.Ctx(), Clock: s.clock, DB: s.db, Gossip: s.gossip, Transport: s.raftTransport, RaftTickInterval: s.ctx.RaftTickInterval, ScanInterval: s.ctx.ScanInterval, ScanMaxIdleTime: s.ctx.ScanMaxIdleTime, ConsistencyCheckInterval: s.ctx.ConsistencyCheckInterval, ConsistencyCheckPanicOnFailure: s.ctx.ConsistencyCheckPanicOnFailure, StorePool: s.storePool, SQLExecutor: sql.InternalExecutor{ LeaseManager: s.leaseMgr, }, LogRangeEvents: true, AllocatorOptions: storage.AllocatorOptions{ AllowRebalance: true, }, } if srvCtx.TestingKnobs.Store != nil { nCtx.TestingKnobs = *srvCtx.TestingKnobs.Store.(*storage.StoreTestingKnobs) } s.recorder = status.NewMetricsRecorder(s.clock) s.registry.AddMetricStruct(s.rpcContext.RemoteClocks.Metrics()) s.runtime = status.MakeRuntimeStatSampler(s.clock) s.registry.AddMetricStruct(s.runtime) s.node = NewNode(nCtx, s.recorder, s.registry, s.stopper, txnMetrics, sql.MakeEventLogger(s.leaseMgr)) roachpb.RegisterInternalServer(s.grpc, s.node) storage.RegisterStoresServer(s.grpc, s.node.storesServer) s.tsDB = ts.NewDB(s.db) s.tsServer = ts.MakeServer(s.tsDB) s.admin = makeAdminServer(s) s.status = newStatusServer(s.db, s.gossip, s.recorder, s.ctx.Context, s.rpcContext, s.node.stores) for _, gw := range []grpcGatewayServer{&s.admin, s.status, &s.tsServer} { gw.RegisterService(s.grpc) } return s, nil }