// recordJoinEvent begins an asynchronous task which attempts to log a "node // join" or "node restart" event. This query will retry until it succeeds or the // server stops. func (n *Node) recordJoinEvent() { if !n.ctx.LogRangeEvents { return } logEventType := sql.EventLogNodeRestart if n.initialBoot { logEventType = sql.EventLogNodeJoin } n.stopper.RunWorker(func() { for r := retry.Start(retry.Options{Closer: n.stopper.ShouldStop()}); r.Next(); { if err := n.ctx.DB.Txn(func(txn *client.Txn) *roachpb.Error { return sql.MakeEventLogger(n.ctx.SQLExecutor.LeaseManager).InsertEventRecord(txn, logEventType, int32(n.Descriptor.NodeID), int32(n.Descriptor.NodeID), struct { Descriptor roachpb.NodeDescriptor ClusterID uuid.UUID StartedAt int64 }{n.Descriptor, n.ClusterID, n.startedAt}, ) }); err != nil { log.Warningc(n.context(context.TODO()), "unable to log %s event for node %d: %s", logEventType, n.Descriptor.NodeID, err) } else { return } } }) }
// createTestNode creates an rpc server using the specified address, // gossip instance, KV database and a node using the specified slice // of engines. The server, clock and node are returned. If gossipBS is // not nil, the gossip bootstrap address is set to gossipBS. func createTestNode(addr net.Addr, engines []engine.Engine, gossipBS net.Addr, t *testing.T) ( *grpc.Server, net.Addr, *hlc.Clock, *Node, *stop.Stopper) { ctx := storage.StoreContext{} stopper := stop.NewStopper() ctx.Clock = hlc.NewClock(hlc.UnixNano) nodeRPCContext := rpc.NewContext(nodeTestBaseContext, ctx.Clock, stopper) ctx.ScanInterval = 10 * time.Hour ctx.ConsistencyCheckInterval = 10 * time.Hour grpcServer := rpc.NewServer(nodeRPCContext) serverCtx := makeTestContext() g := gossip.New( context.Background(), nodeRPCContext, grpcServer, serverCtx.GossipBootstrapResolvers, stopper, metric.NewRegistry()) ln, err := netutil.ListenAndServeGRPC(stopper, grpcServer, addr) if err != nil { t.Fatal(err) } if gossipBS != nil { // Handle possibility of a :0 port specification. if gossipBS.Network() == addr.Network() && gossipBS.String() == addr.String() { gossipBS = ln.Addr() } r, err := resolver.NewResolverFromAddress(gossipBS) if err != nil { t.Fatalf("bad gossip address %s: %s", gossipBS, err) } g.SetResolvers([]resolver.Resolver{r}) g.Start(ln.Addr()) } ctx.Gossip = g retryOpts := base.DefaultRetryOptions() retryOpts.Closer = stopper.ShouldQuiesce() distSender := kv.NewDistSender(&kv.DistSenderConfig{ Clock: ctx.Clock, RPCContext: nodeRPCContext, RPCRetryOptions: &retryOpts, }, g) ctx.Ctx = tracing.WithTracer(context.Background(), tracing.NewTracer()) sender := kv.NewTxnCoordSender(ctx.Ctx, distSender, ctx.Clock, false, stopper, kv.MakeTxnMetrics()) ctx.DB = client.NewDB(sender) ctx.Transport = storage.NewDummyRaftTransport() node := NewNode(ctx, status.NewMetricsRecorder(ctx.Clock), metric.NewRegistry(), stopper, kv.MakeTxnMetrics(), sql.MakeEventLogger(nil)) roachpb.RegisterInternalServer(grpcServer, node) return grpcServer, ln.Addr(), ctx.Clock, node, stopper }
// NewServer creates a Server from a server.Context. func NewServer(srvCtx Context, stopper *stop.Stopper) (*Server, error) { if _, err := net.ResolveTCPAddr("tcp", srvCtx.Addr); err != nil { return nil, errors.Errorf("unable to resolve RPC address %q: %v", srvCtx.Addr, err) } if srvCtx.Ctx == nil { srvCtx.Ctx = context.Background() } if srvCtx.Ctx.Done() != nil { panic("context with cancel or deadline") } if tracing.TracerFromCtx(srvCtx.Ctx) == nil { // TODO(radu): instead of modifying srvCtx.Ctx, we should have a separate // context.Context inside Server. We will need to rename server.Context // though. srvCtx.Ctx = tracing.WithTracer(srvCtx.Ctx, tracing.NewTracer()) } if srvCtx.Insecure { log.Warning(srvCtx.Ctx, "running in insecure mode, this is strongly discouraged. See --insecure.") } // Try loading the TLS configs before anything else. if _, err := srvCtx.GetServerTLSConfig(); err != nil { return nil, err } if _, err := srvCtx.GetClientTLSConfig(); err != nil { return nil, err } s := &Server{ mux: http.NewServeMux(), clock: hlc.NewClock(hlc.UnixNano), stopper: stopper, } // Add a dynamic log tag value for the node ID. // // We need to pass the server's Ctx as a base context for the various server // components, but we won't know the node ID until we Start(). At that point // it's too late to change the contexts in the components (various background // processes will have already started using the contexts). // // The dynamic value allows us to add the log tag to the context now and // update the value asynchronously. It's not significantly more expensive than // a regular tag since it's just doing an (atomic) load when a log/trace // message is constructed. s.nodeLogTagVal.Set(log.DynamicIntValueUnknown) srvCtx.Ctx = log.WithLogTag(srvCtx.Ctx, "n", &s.nodeLogTagVal) s.ctx = srvCtx s.clock.SetMaxOffset(srvCtx.MaxOffset) s.rpcContext = rpc.NewContext(srvCtx.Context, s.clock, s.stopper) s.rpcContext.HeartbeatCB = func() { if err := s.rpcContext.RemoteClocks.VerifyClockOffset(); err != nil { log.Fatal(s.Ctx(), err) } } s.grpc = rpc.NewServer(s.rpcContext) s.registry = metric.NewRegistry() s.gossip = gossip.New( s.Ctx(), s.rpcContext, s.grpc, s.ctx.GossipBootstrapResolvers, s.stopper, s.registry) s.storePool = storage.NewStorePool( s.gossip, s.clock, s.rpcContext, srvCtx.ReservationsEnabled, srvCtx.TimeUntilStoreDead, s.stopper, ) // A custom RetryOptions is created which uses stopper.ShouldQuiesce() as // the Closer. This prevents infinite retry loops from occurring during // graceful server shutdown // // Such a loop loop occurs with the DistSender attempts a connection to the // local server during shutdown, and receives an internal server error (HTTP // Code 5xx). This is the correct error for a server to return when it is // shutting down, and is normally retryable in a cluster environment. // However, on a single-node setup (such as a test), retries will never // succeed because the only server has been shut down; thus, thus the // DistSender needs to know that it should not retry in this situation. retryOpts := base.DefaultRetryOptions() retryOpts.Closer = s.stopper.ShouldQuiesce() distSenderCfg := kv.DistSenderConfig{ Ctx: s.Ctx(), Clock: s.clock, RPCContext: s.rpcContext, RPCRetryOptions: &retryOpts, } s.distSender = kv.NewDistSender(&distSenderCfg, s.gossip) txnMetrics := kv.MakeTxnMetrics() s.registry.AddMetricStruct(txnMetrics) s.txnCoordSender = kv.NewTxnCoordSender(s.Ctx(), s.distSender, s.clock, srvCtx.Linearizable, s.stopper, txnMetrics) s.db = client.NewDB(s.txnCoordSender) s.raftTransport = storage.NewRaftTransport(storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext) s.kvDB = kv.NewDBServer(s.ctx.Context, s.txnCoordSender, s.stopper) roachpb.RegisterExternalServer(s.grpc, s.kvDB) // Set up Lease Manager var lmKnobs sql.LeaseManagerTestingKnobs if srvCtx.TestingKnobs.SQLLeaseManager != nil { lmKnobs = *srvCtx.TestingKnobs.SQLLeaseManager.(*sql.LeaseManagerTestingKnobs) } s.leaseMgr = sql.NewLeaseManager(0, *s.db, s.clock, lmKnobs, s.stopper) s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip) // Set up the DistSQL server distSQLCfg := distsql.ServerConfig{ Context: s.Ctx(), DB: s.db, RPCContext: s.rpcContext, } s.distSQLServer = distsql.NewServer(distSQLCfg) distsql.RegisterDistSQLServer(s.grpc, s.distSQLServer) // Set up Executor execCfg := sql.ExecutorConfig{ Context: s.Ctx(), DB: s.db, Gossip: s.gossip, LeaseManager: s.leaseMgr, Clock: s.clock, DistSQLSrv: s.distSQLServer, } if srvCtx.TestingKnobs.SQLExecutor != nil { execCfg.TestingKnobs = srvCtx.TestingKnobs.SQLExecutor.(*sql.ExecutorTestingKnobs) } else { execCfg.TestingKnobs = &sql.ExecutorTestingKnobs{} } s.sqlExecutor = sql.NewExecutor(execCfg, s.stopper) s.registry.AddMetricStruct(s.sqlExecutor) s.pgServer = pgwire.MakeServer(s.ctx.Context, s.sqlExecutor) s.registry.AddMetricStruct(s.pgServer.Metrics()) // TODO(bdarnell): make StoreConfig configurable. nCtx := storage.StoreContext{ Ctx: s.Ctx(), Clock: s.clock, DB: s.db, Gossip: s.gossip, Transport: s.raftTransport, RaftTickInterval: s.ctx.RaftTickInterval, ScanInterval: s.ctx.ScanInterval, ScanMaxIdleTime: s.ctx.ScanMaxIdleTime, ConsistencyCheckInterval: s.ctx.ConsistencyCheckInterval, ConsistencyCheckPanicOnFailure: s.ctx.ConsistencyCheckPanicOnFailure, StorePool: s.storePool, SQLExecutor: sql.InternalExecutor{ LeaseManager: s.leaseMgr, }, LogRangeEvents: true, AllocatorOptions: storage.AllocatorOptions{ AllowRebalance: true, }, } if srvCtx.TestingKnobs.Store != nil { nCtx.TestingKnobs = *srvCtx.TestingKnobs.Store.(*storage.StoreTestingKnobs) } s.recorder = status.NewMetricsRecorder(s.clock) s.registry.AddMetricStruct(s.rpcContext.RemoteClocks.Metrics()) s.runtime = status.MakeRuntimeStatSampler(s.clock) s.registry.AddMetricStruct(s.runtime) s.node = NewNode(nCtx, s.recorder, s.registry, s.stopper, txnMetrics, sql.MakeEventLogger(s.leaseMgr)) roachpb.RegisterInternalServer(s.grpc, s.node) storage.RegisterStoresServer(s.grpc, s.node.storesServer) s.tsDB = ts.NewDB(s.db) s.tsServer = ts.MakeServer(s.tsDB) s.admin = makeAdminServer(s) s.status = newStatusServer(s.db, s.gossip, s.recorder, s.ctx.Context, s.rpcContext, s.node.stores) for _, gw := range []grpcGatewayServer{&s.admin, s.status, &s.tsServer} { gw.RegisterService(s.grpc) } return s, nil }
// NewServer creates a Server from a server.Context. func NewServer(ctx Context, stopper *stop.Stopper) (*Server, error) { if _, err := net.ResolveTCPAddr("tcp", ctx.Addr); err != nil { return nil, errors.Errorf("unable to resolve RPC address %q: %v", ctx.Addr, err) } if ctx.Insecure { log.Warning(context.TODO(), "running in insecure mode, this is strongly discouraged. See --insecure.") } // Try loading the TLS configs before anything else. if _, err := ctx.GetServerTLSConfig(); err != nil { return nil, err } if _, err := ctx.GetClientTLSConfig(); err != nil { return nil, err } s := &Server{ Tracer: tracing.NewTracer(), ctx: ctx, mux: http.NewServeMux(), clock: hlc.NewClock(hlc.UnixNano), stopper: stopper, } s.clock.SetMaxOffset(ctx.MaxOffset) s.rpcContext = rpc.NewContext(ctx.Context, s.clock, s.stopper) s.rpcContext.HeartbeatCB = func() { if err := s.rpcContext.RemoteClocks.VerifyClockOffset(); err != nil { log.Fatal(context.TODO(), err) } } s.grpc = rpc.NewServer(s.rpcContext) s.registry = metric.NewRegistry() s.gossip = gossip.New(s.rpcContext, s.grpc, s.ctx.GossipBootstrapResolvers, s.stopper, s.registry) s.storePool = storage.NewStorePool( s.gossip, s.clock, s.rpcContext, ctx.ReservationsEnabled, ctx.TimeUntilStoreDead, s.stopper, ) // A custom RetryOptions is created which uses stopper.ShouldQuiesce() as // the Closer. This prevents infinite retry loops from occurring during // graceful server shutdown // // Such a loop loop occurs with the DistSender attempts a connection to the // local server during shutdown, and receives an internal server error (HTTP // Code 5xx). This is the correct error for a server to return when it is // shutting down, and is normally retryable in a cluster environment. // However, on a single-node setup (such as a test), retries will never // succeed because the only server has been shut down; thus, thus the // DistSender needs to know that it should not retry in this situation. retryOpts := base.DefaultRetryOptions() retryOpts.Closer = s.stopper.ShouldQuiesce() s.distSender = kv.NewDistSender(&kv.DistSenderContext{ Clock: s.clock, RPCContext: s.rpcContext, RPCRetryOptions: &retryOpts, }, s.gossip) txnMetrics := kv.NewTxnMetrics(s.registry) sender := kv.NewTxnCoordSender(s.distSender, s.clock, ctx.Linearizable, s.Tracer, s.stopper, txnMetrics) s.db = client.NewDB(sender) s.raftTransport = storage.NewRaftTransport(storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext) s.kvDB = kv.NewDBServer(s.ctx.Context, sender, s.stopper) roachpb.RegisterExternalServer(s.grpc, s.kvDB) // Set up Lease Manager var lmKnobs sql.LeaseManagerTestingKnobs if ctx.TestingKnobs.SQLLeaseManager != nil { lmKnobs = *ctx.TestingKnobs.SQLLeaseManager.(*sql.LeaseManagerTestingKnobs) } s.leaseMgr = sql.NewLeaseManager(0, *s.db, s.clock, lmKnobs, s.stopper) s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip) // Set up the DistSQL server distSQLCtx := distsql.ServerContext{ Context: context.Background(), DB: s.db, RPCContext: s.rpcContext, } s.distSQLServer = distsql.NewServer(distSQLCtx) distsql.RegisterDistSQLServer(s.grpc, s.distSQLServer) // Set up Executor eCtx := sql.ExecutorContext{ Context: context.Background(), DB: s.db, Gossip: s.gossip, LeaseManager: s.leaseMgr, Clock: s.clock, DistSQLSrv: s.distSQLServer, } if ctx.TestingKnobs.SQLExecutor != nil { eCtx.TestingKnobs = ctx.TestingKnobs.SQLExecutor.(*sql.ExecutorTestingKnobs) } else { eCtx.TestingKnobs = &sql.ExecutorTestingKnobs{} } s.sqlExecutor = sql.NewExecutor(eCtx, s.stopper, s.registry) s.pgServer = pgwire.MakeServer(s.ctx.Context, s.sqlExecutor, s.registry) // TODO(bdarnell): make StoreConfig configurable. nCtx := storage.StoreContext{ Clock: s.clock, DB: s.db, Gossip: s.gossip, Transport: s.raftTransport, RaftTickInterval: s.ctx.RaftTickInterval, ScanInterval: s.ctx.ScanInterval, ScanMaxIdleTime: s.ctx.ScanMaxIdleTime, ConsistencyCheckInterval: s.ctx.ConsistencyCheckInterval, ConsistencyCheckPanicOnFailure: s.ctx.ConsistencyCheckPanicOnFailure, Tracer: s.Tracer, StorePool: s.storePool, SQLExecutor: sql.InternalExecutor{ LeaseManager: s.leaseMgr, }, LogRangeEvents: true, AllocatorOptions: storage.AllocatorOptions{ AllowRebalance: true, }, } if ctx.TestingKnobs.Store != nil { nCtx.TestingKnobs = *ctx.TestingKnobs.Store.(*storage.StoreTestingKnobs) } s.recorder = status.NewMetricsRecorder(s.clock) s.rpcContext.RemoteClocks.RegisterMetrics(s.registry) s.runtime = status.MakeRuntimeStatSampler(s.clock, s.registry) s.node = NewNode(nCtx, s.recorder, s.registry, s.stopper, txnMetrics, sql.MakeEventLogger(s.leaseMgr)) roachpb.RegisterInternalServer(s.grpc, s.node) roachpb.RegisterInternalStoresServer(s.grpc, s.node.InternalStoresServer) s.tsDB = ts.NewDB(s.db) s.tsServer = ts.MakeServer(s.tsDB) s.admin = makeAdminServer(s) s.status = newStatusServer(s.db, s.gossip, s.recorder, s.ctx.Context, s.rpcContext, s.node.stores) for _, gw := range []grpcGatewayServer{&s.admin, s.status, &s.tsServer} { gw.RegisterService(s.grpc) } return s, nil }