// NewServer creates a Server from a server.Context. func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { if _, err := net.ResolveTCPAddr("tcp", cfg.AdvertiseAddr); err != nil { return nil, errors.Errorf("unable to resolve RPC address %q: %v", cfg.AdvertiseAddr, err) } if cfg.AmbientCtx.Tracer == nil { cfg.AmbientCtx.Tracer = tracing.NewTracer() } // Try loading the TLS configs before anything else. if _, err := cfg.GetServerTLSConfig(); err != nil { return nil, err } if _, err := cfg.GetClientTLSConfig(); err != nil { return nil, err } s := &Server{ mux: http.NewServeMux(), clock: hlc.NewClock(hlc.UnixNano, cfg.MaxOffset), stopper: stopper, cfg: cfg, } // Add a dynamic log tag value for the node ID. // // We need to pass an ambient context to the various server components, but we // won't know the node ID until we Start(). At that point it's too late to // change the ambient contexts in the components (various background processes // will have already started using them). // // NodeIDContainer allows us to add the log tag to the context now and update // the value asynchronously. It's not significantly more expensive than a // regular tag since it's just doing an (atomic) load when a log/trace message // is constructed. The node ID is set by the Store if this host was // bootstrapped; otherwise a new one is allocated in Node. s.cfg.AmbientCtx.AddLogTag("n", &s.nodeIDContainer) ctx := s.AnnotateCtx(context.Background()) if s.cfg.Insecure { log.Warning(ctx, "running in insecure mode, this is strongly discouraged. See --insecure.") } s.rpcContext = rpc.NewContext(s.cfg.AmbientCtx, s.cfg.Config, s.clock, s.stopper) s.rpcContext.HeartbeatCB = func() { if err := s.rpcContext.RemoteClocks.VerifyClockOffset(); err != nil { log.Fatal(ctx, err) } } s.grpc = rpc.NewServer(s.rpcContext) s.registry = metric.NewRegistry() s.gossip = gossip.New( s.cfg.AmbientCtx, &s.nodeIDContainer, s.rpcContext, s.grpc, s.cfg.GossipBootstrapResolvers, s.stopper, s.registry, ) s.storePool = storage.NewStorePool( s.cfg.AmbientCtx, s.gossip, s.clock, s.rpcContext, s.cfg.TimeUntilStoreDead, s.stopper, /* deterministic */ false, ) // A custom RetryOptions is created which uses stopper.ShouldQuiesce() as // the Closer. This prevents infinite retry loops from occurring during // graceful server shutdown // // Such a loop loop occurs with the DistSender attempts a connection to the // local server during shutdown, and receives an internal server error (HTTP // Code 5xx). This is the correct error for a server to return when it is // shutting down, and is normally retryable in a cluster environment. // However, on a single-node setup (such as a test), retries will never // succeed because the only server has been shut down; thus, thus the // DistSender needs to know that it should not retry in this situation. retryOpts := base.DefaultRetryOptions() retryOpts.Closer = s.stopper.ShouldQuiesce() distSenderCfg := kv.DistSenderConfig{ AmbientCtx: s.cfg.AmbientCtx, Clock: s.clock, RPCContext: s.rpcContext, RPCRetryOptions: &retryOpts, } s.distSender = kv.NewDistSender(distSenderCfg, s.gossip) txnMetrics := kv.MakeTxnMetrics(s.cfg.MetricsSampleInterval) s.registry.AddMetricStruct(txnMetrics) s.txnCoordSender = kv.NewTxnCoordSender( s.cfg.AmbientCtx, s.distSender, s.clock, s.cfg.Linearizable, s.stopper, txnMetrics, ) s.db = client.NewDB(s.txnCoordSender) // Use the range lease expiration and renewal durations as the node // liveness expiration and heartbeat interval. active, renewal := storage.RangeLeaseDurations( storage.RaftElectionTimeout(s.cfg.RaftTickInterval, s.cfg.RaftElectionTimeoutTicks)) s.nodeLiveness = storage.NewNodeLiveness( s.cfg.AmbientCtx, s.clock, s.db, s.gossip, active, renewal, ) s.registry.AddMetricStruct(s.nodeLiveness.Metrics()) s.raftTransport = storage.NewRaftTransport( s.cfg.AmbientCtx, storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext, ) s.kvDB = kv.NewDBServer(s.cfg.Config, s.txnCoordSender, s.stopper) roachpb.RegisterExternalServer(s.grpc, s.kvDB) // Set up internal memory metrics for use by internal SQL executors. s.internalMemMetrics = sql.MakeMemMetrics("internal") s.registry.AddMetricStruct(s.internalMemMetrics) // Set up Lease Manager var lmKnobs sql.LeaseManagerTestingKnobs if cfg.TestingKnobs.SQLLeaseManager != nil { lmKnobs = *s.cfg.TestingKnobs.SQLLeaseManager.(*sql.LeaseManagerTestingKnobs) } s.leaseMgr = sql.NewLeaseManager(&s.nodeIDContainer, *s.db, s.clock, lmKnobs, s.stopper, &s.internalMemMetrics) s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip) // Set up the DistSQL server distSQLCfg := distsql.ServerConfig{ AmbientContext: s.cfg.AmbientCtx, DB: s.db, RPCContext: s.rpcContext, Stopper: s.stopper, } s.distSQLServer = distsql.NewServer(distSQLCfg) distsql.RegisterDistSQLServer(s.grpc, s.distSQLServer) // Set up admin memory metrics for use by admin SQL executors. s.adminMemMetrics = sql.MakeMemMetrics("admin") s.registry.AddMetricStruct(s.adminMemMetrics) // Set up Executor execCfg := sql.ExecutorConfig{ AmbientCtx: s.cfg.AmbientCtx, NodeID: &s.nodeIDContainer, DB: s.db, Gossip: s.gossip, LeaseManager: s.leaseMgr, Clock: s.clock, DistSQLSrv: s.distSQLServer, MetricsSampleInterval: s.cfg.MetricsSampleInterval, } if s.cfg.TestingKnobs.SQLExecutor != nil { execCfg.TestingKnobs = s.cfg.TestingKnobs.SQLExecutor.(*sql.ExecutorTestingKnobs) } else { execCfg.TestingKnobs = &sql.ExecutorTestingKnobs{} } if s.cfg.TestingKnobs.SQLSchemaChanger != nil { execCfg.SchemaChangerTestingKnobs = s.cfg.TestingKnobs.SQLSchemaChanger.(*sql.SchemaChangerTestingKnobs) } else { execCfg.SchemaChangerTestingKnobs = &sql.SchemaChangerTestingKnobs{} } s.sqlExecutor = sql.NewExecutor(execCfg, s.stopper, &s.adminMemMetrics) s.registry.AddMetricStruct(s.sqlExecutor) s.pgServer = pgwire.MakeServer( s.cfg.AmbientCtx, s.cfg.Config, s.sqlExecutor, &s.internalMemMetrics, s.cfg.SQLMemoryPoolSize, ) s.registry.AddMetricStruct(s.pgServer.Metrics()) s.tsDB = ts.NewDB(s.db) s.tsServer = ts.MakeServer(s.cfg.AmbientCtx, s.tsDB, s.cfg.TimeSeriesServerConfig, s.stopper) // TODO(bdarnell): make StoreConfig configurable. storeCfg := storage.StoreConfig{ AmbientCtx: s.cfg.AmbientCtx, Clock: s.clock, DB: s.db, Gossip: s.gossip, NodeLiveness: s.nodeLiveness, Transport: s.raftTransport, RaftTickInterval: s.cfg.RaftTickInterval, ScanInterval: s.cfg.ScanInterval, ScanMaxIdleTime: s.cfg.ScanMaxIdleTime, ConsistencyCheckInterval: s.cfg.ConsistencyCheckInterval, ConsistencyCheckPanicOnFailure: s.cfg.ConsistencyCheckPanicOnFailure, MetricsSampleInterval: s.cfg.MetricsSampleInterval, StorePool: s.storePool, SQLExecutor: sql.InternalExecutor{ LeaseManager: s.leaseMgr, }, LogRangeEvents: s.cfg.EventLogEnabled, AllocatorOptions: storage.AllocatorOptions{ AllowRebalance: true, }, RangeLeaseActiveDuration: active, RangeLeaseRenewalDuration: renewal, TimeSeriesDataStore: s.tsDB, } if s.cfg.TestingKnobs.Store != nil { storeCfg.TestingKnobs = *s.cfg.TestingKnobs.Store.(*storage.StoreTestingKnobs) } s.recorder = status.NewMetricsRecorder(s.clock) s.registry.AddMetricStruct(s.rpcContext.RemoteClocks.Metrics()) s.runtime = status.MakeRuntimeStatSampler(s.clock) s.registry.AddMetricStruct(s.runtime) s.node = NewNode(storeCfg, s.recorder, s.registry, s.stopper, txnMetrics, sql.MakeEventLogger(s.leaseMgr)) roachpb.RegisterInternalServer(s.grpc, s.node) storage.RegisterConsistencyServer(s.grpc, s.node.storesServer) storage.RegisterFreezeServer(s.grpc, s.node.storesServer) s.admin = newAdminServer(s) s.status = newStatusServer( s.cfg.AmbientCtx, s.db, s.gossip, s.recorder, s.rpcContext, s.node.stores, ) for _, gw := range []grpcGatewayServer{s.admin, s.status, &s.tsServer} { gw.RegisterService(s.grpc) } return s, nil }
// bootstrapCluster bootstraps a multiple stores using the provided // engines and cluster ID. The first bootstrapped store contains a // single range spanning all keys. Initial range lookup metadata is // populated for the range. Returns the cluster ID. func bootstrapCluster( cfg storage.StoreConfig, engines []engine.Engine, txnMetrics kv.TxnMetrics, ) (uuid.UUID, error) { clusterID := uuid.MakeV4() stopper := stop.NewStopper() defer stopper.Stop() // Make sure that the store config has a valid clock and that it doesn't // try to use gossip, since that can introduce race conditions. if cfg.Clock == nil { cfg.Clock = hlc.NewClock(hlc.UnixNano, time.Nanosecond) } cfg.Gossip = nil cfg.TestingKnobs = storage.StoreTestingKnobs{} cfg.ScanInterval = 10 * time.Minute cfg.MetricsSampleInterval = time.Duration(math.MaxInt64) cfg.ConsistencyCheckInterval = 10 * time.Minute cfg.AmbientCtx.Tracer = tracing.NewTracer() // Create a KV DB with a local sender. stores := storage.NewStores(cfg.AmbientCtx, cfg.Clock) sender := kv.NewTxnCoordSender(cfg.AmbientCtx, stores, cfg.Clock, false, stopper, txnMetrics) cfg.DB = client.NewDB(sender) cfg.Transport = storage.NewDummyRaftTransport() for i, eng := range engines { sIdent := roachpb.StoreIdent{ ClusterID: clusterID, NodeID: FirstNodeID, StoreID: roachpb.StoreID(i + 1), } // The bootstrapping store will not connect to other nodes so its // StoreConfig doesn't really matter. s := storage.NewStore(cfg, eng, &roachpb.NodeDescriptor{NodeID: FirstNodeID}) // Verify the store isn't already part of a cluster. if s.Ident.ClusterID != (uuid.UUID{}) { return uuid.UUID{}, errors.Errorf("storage engine already belongs to a cluster (%s)", s.Ident.ClusterID) } // Bootstrap store to persist the store ident. if err := s.Bootstrap(sIdent); err != nil { return uuid.UUID{}, err } // Create first range, writing directly to engine. Note this does // not create the range, just its data. Only do this if this is the // first store. if i == 0 { initialValues := GetBootstrapSchema().GetInitialValues() if err := s.BootstrapRange(initialValues); err != nil { return uuid.UUID{}, err } } if err := s.Start(context.Background(), stopper); err != nil { return uuid.UUID{}, err } stores.AddStore(s) ctx := context.TODO() // Initialize node and store ids. Only initialize the node once. if i == 0 { if nodeID, err := allocateNodeID(ctx, cfg.DB); nodeID != sIdent.NodeID || err != nil { return uuid.UUID{}, errors.Errorf("expected to initialize node id allocator to %d, got %d: %s", sIdent.NodeID, nodeID, err) } } if storeID, err := allocateStoreIDs(ctx, sIdent.NodeID, 1, cfg.DB); storeID != sIdent.StoreID || err != nil { return uuid.UUID{}, errors.Errorf("expected to initialize store id allocator to %d, got %d: %s", sIdent.StoreID, storeID, err) } } return clusterID, nil }