// StartWithStopper is the same as Start, but allows passing a stopper // explicitly. func (ts *TestServer) StartWithStopper(stopper *stop.Stopper) error { if ts.Ctx == nil { ts.Ctx = NewTestContext() } if stopper == nil { stopper = stop.NewStopper() } // Change the replication requirements so we don't get log spam // about ranges not being replicated enough. // TODO(marc): set this in the zones table when we have an entry // for the default cluster-wide zone config and remove these // shenanigans about mutating the global default. oldDefaultZC := proto.Clone(config.DefaultZoneConfig).(*config.ZoneConfig) config.DefaultZoneConfig.ReplicaAttrs = []roachpb.Attributes{{}} stopper.AddCloser(stop.CloserFn(func() { config.DefaultZoneConfig = oldDefaultZC })) var err error ts.Server, err = NewServer(ts.Ctx, stopper) if err != nil { return err } // Ensure we have the correct number of engines. Add in in-memory ones where // needed. There must be at least one store/engine. if ts.StoresPerNode < 1 { ts.StoresPerNode = 1 } for i := len(ts.Ctx.Engines); i < ts.StoresPerNode; i++ { ts.Ctx.Engines = append(ts.Ctx.Engines, engine.NewInMem(roachpb.Attributes{}, 100<<20, ts.Server.stopper)) } if !ts.SkipBootstrap { stopper := stop.NewStopper() _, err := BootstrapCluster("cluster-1", ts.Ctx.Engines, stopper) if err != nil { return util.Errorf("could not bootstrap cluster: %s", err) } stopper.Stop() } if err := ts.Server.Start(true); err != nil { return err } // If enabled, wait for initial splits to complete before returning control. // If initial splits do not complete, the server is stopped before // returning. if config.TestingTableSplitsDisabled() { return nil } if err := ts.WaitForInitialSplits(); err != nil { ts.Stop() return err } return nil }
// TestingSetupZoneConfigHook initializes the zone config hook // to 'testingZoneConfigHook' which uses 'testingZoneConfig'. // Settings go back to their previous values when the stopper runs our closer. func TestingSetupZoneConfigHook(stopper *stop.Stopper) { testingLock.Lock() defer testingLock.Unlock() if testingHasHook { panic("TestingSetupZoneConfigHook called without restoring state") } testingHasHook = true testingZoneConfig = map[uint32]*ZoneConfig{} testingPreviousHook = ZoneConfigHook ZoneConfigHook = testingZoneConfigHook testingLargestIDHook = func(maxID uint32) (max uint32) { testingLock.Lock() defer testingLock.Unlock() for id := range testingZoneConfig { if maxID > 0 && id > maxID { continue } if id > max { max = id } } return } stopper.AddCloser(stop.CloserFn(testingResetZoneConfigHook)) }
// Start starts the TestServer by bootstrapping an in-memory store // (defaults to maximum of 100M). The server is started, launching the // node RPC server and all HTTP endpoints. Use the value of // TestServer.ServingAddr() after Start() for client connections. // Use TestServer.Stopper().Stop() to shutdown the server after the test // completes. func (ts *TestServer) Start(params base.TestServerArgs) error { if ts.Ctx == nil { panic("Ctx not set") } if params.Stopper == nil { params.Stopper = stop.NewStopper() } if !params.PartOfCluster { // Change the replication requirements so we don't get log spam about ranges // not being replicated enough. cfg := config.DefaultZoneConfig() cfg.ReplicaAttrs = []roachpb.Attributes{{}} fn := config.TestingSetDefaultZoneConfig(cfg) params.Stopper.AddCloser(stop.CloserFn(fn)) } // Needs to be called before NewServer to ensure resolvers are initialized. if err := ts.Ctx.InitNode(); err != nil { return err } // Ensure we have the correct number of engines. Add in-memory ones where // needed. There must be at least one store/engine. if params.StoresPerNode < 1 { params.StoresPerNode = 1 } for i := len(ts.Ctx.Engines); i < params.StoresPerNode; i++ { ts.Ctx.Engines = append(ts.Ctx.Engines, engine.NewInMem(roachpb.Attributes{}, 100<<20, params.Stopper)) } var err error ts.Server, err = NewServer(*ts.Ctx, params.Stopper) if err != nil { return err } // Our context must be shared with our server. ts.Ctx = &ts.Server.ctx if err := ts.Server.Start(); err != nil { return err } // If enabled, wait for initial splits to complete before returning control. // If initial splits do not complete, the server is stopped before // returning. if stk, ok := ts.ctx.TestingKnobs.Store.(*storage.StoreTestingKnobs); ok && stk.DisableSplitQueue { return nil } if err := ts.WaitForInitialSplits(); err != nil { ts.Stop() return err } return nil }
// StartTestCluster starts up a TestCluster made up of `nodes` in-memory testing // servers. // The cluster should be stopped using cluster.Stop(). func StartTestCluster(t testing.TB, nodes int, args base.TestClusterArgs) *TestCluster { if nodes < 1 { t.Fatal("invalid cluster size: ", nodes) } if args.ServerArgs.JoinAddr != "" { t.Fatal("can't specify a join addr when starting a cluster") } if args.ServerArgs.Stopper != nil { t.Fatal("can't set individual server stoppers when starting a cluster") } storeKnobs := args.ServerArgs.Knobs.Store if storeKnobs != nil && (storeKnobs.(*storage.StoreTestingKnobs).DisableSplitQueue || storeKnobs.(*storage.StoreTestingKnobs).DisableReplicateQueue) { t.Fatal("can't disable an individual server's queues when starting a cluster; " + "the cluster controls replication") } switch args.ReplicationMode { case base.ReplicationAuto: case base.ReplicationManual: if args.ServerArgs.Knobs.Store == nil { args.ServerArgs.Knobs.Store = &storage.StoreTestingKnobs{} } storeKnobs := args.ServerArgs.Knobs.Store.(*storage.StoreTestingKnobs) storeKnobs.DisableSplitQueue = true storeKnobs.DisableReplicateQueue = true default: t.Fatal("unexpected replication mode") } tc := &TestCluster{} tc.stopper = stop.NewStopper() args.ServerArgs.PartOfCluster = true for i := 0; i < nodes; i++ { serverArgs := args.ServerArgs serverArgs.Stopper = stop.NewStopper() if i > 0 { serverArgs.JoinAddr = tc.Servers[0].ServingAddr() } s, conn, _ := serverutils.StartServer(t, serverArgs) tc.Servers = append(tc.Servers, s.(*server.TestServer)) tc.Conns = append(tc.Conns, conn) tc.mu.Lock() tc.mu.serverStoppers = append(tc.mu.serverStoppers, serverArgs.Stopper) tc.mu.Unlock() } // Create a closer that will stop the individual server stoppers when the // cluster stopper is stopped. tc.stopper.AddCloser(stop.CloserFn(tc.stopServers)) tc.waitForStores(t) return tc }
// StartTestCluster starts up a TestCluster made up of `nodes` in-memory testing // servers. // The cluster should be stopped using cluster.Stopper().Stop(). func StartTestCluster(t testing.TB, nodes int, args ClusterArgs) *TestCluster { if nodes < 1 { t.Fatal("invalid cluster size: ", nodes) } if args.ServerArgs.JoinAddr != "" { t.Fatal("can't specify a join addr when starting a cluster") } if args.ServerArgs.Stopper != nil { t.Fatal("can't set individual server stoppers when starting a cluster") } storeKnobs := args.ServerArgs.Knobs.Store if storeKnobs != nil && (storeKnobs.(*storage.StoreTestingKnobs).DisableSplitQueue || storeKnobs.(*storage.StoreTestingKnobs).DisableReplicateQueue) { t.Fatal("can't disable an individual server's queues when starting a cluster; " + "the cluster controls replication") } if args.Stopper == nil { args.Stopper = stop.NewStopper() args.ServerArgs.Stopper = args.Stopper } switch args.ReplicationMode { case ReplicationFull: // Force all ranges to be replicated everywhere. cfg := config.DefaultZoneConfig() cfg.ReplicaAttrs = make([]roachpb.Attributes, nodes) fn := config.TestingSetDefaultZoneConfig(cfg) args.Stopper.AddCloser(stop.CloserFn(fn)) case ReplicationManual: if args.ServerArgs.Knobs.Store == nil { args.ServerArgs.Knobs.Store = &storage.StoreTestingKnobs{} } storeKnobs := args.ServerArgs.Knobs.Store.(*storage.StoreTestingKnobs) storeKnobs.DisableSplitQueue = true storeKnobs.DisableReplicateQueue = true default: t.Fatal("unexpected replication mode") } tc := &TestCluster{} args.ServerArgs.PartOfCluster = true first, conn, _ := serverutils.StartServer(t, args.ServerArgs) tc.Servers = append(tc.Servers, first.(*server.TestServer)) tc.Conns = append(tc.Conns, conn) args.ServerArgs.JoinAddr = first.ServingAddr() for i := 1; i < nodes; i++ { s, conn, _ := serverutils.StartServer(t, args.ServerArgs) tc.Servers = append(tc.Servers, s.(*server.TestServer)) tc.Conns = append(tc.Conns, conn) } tc.waitForStores(t) return tc }
// StartWithStopper is the same as Start, but allows passing a stopper // explicitly. func (ts *TestServer) StartWithStopper(stopper *stop.Stopper) error { if ts.Ctx == nil { ts.Ctx = NewTestContext() } if stopper == nil { stopper = stop.NewStopper() } // Change the replication requirements so we don't get log spam about ranges // not being replicated enough. cfg := config.DefaultZoneConfig() cfg.ReplicaAttrs = []roachpb.Attributes{{}} fn := config.TestingSetDefaultZoneConfig(cfg) stopper.AddCloser(stop.CloserFn(fn)) // Needs to be called before NewServer to ensure resolvers are initialized. if err := ts.Ctx.InitNode(); err != nil { return err } var err error ts.Server, err = NewServer(ts.Ctx, stopper) if err != nil { return err } // Ensure we have the correct number of engines. Add in-memory ones where // needed. There must be at least one store/engine. if ts.StoresPerNode < 1 { ts.StoresPerNode = 1 } for i := len(ts.Ctx.Engines); i < ts.StoresPerNode; i++ { ts.Ctx.Engines = append(ts.Ctx.Engines, engine.NewInMem(roachpb.Attributes{}, 100<<20, ts.Server.stopper)) } if err := ts.Server.Start(); err != nil { return err } // If enabled, wait for initial splits to complete before returning control. // If initial splits do not complete, the server is stopped before // returning. if config.TestingTableSplitsDisabled() { return nil } if err := ts.WaitForInitialSplits(); err != nil { ts.Stop() return err } return nil }
// New creates an instance of a gossip node. func New( ctx context.Context, rpcContext *rpc.Context, grpcServer *grpc.Server, resolvers []resolver.Resolver, stopper *stop.Stopper, registry *metric.Registry, ) *Gossip { ctx = log.WithEventLog(ctx, "gossip", "gossip") g := &Gossip{ ctx: ctx, Connected: make(chan struct{}), rpcContext: rpcContext, server: newServer(ctx, stopper, registry), outgoing: makeNodeSet(minPeers, metric.NewGauge(MetaConnectionsOutgoingGauge)), bootstrapping: map[string]struct{}{}, disconnected: make(chan *client, 10), stalledCh: make(chan struct{}, 1), stallInterval: defaultStallInterval, bootstrapInterval: defaultBootstrapInterval, cullInterval: defaultCullInterval, nodeDescs: map[roachpb.NodeID]*roachpb.NodeDescriptor{}, resolverAddrs: map[util.UnresolvedAddr]resolver.Resolver{}, bootstrapAddrs: map[util.UnresolvedAddr]struct{}{}, } stopper.AddCloser(stop.CloserFn(func() { log.FinishEventLog(ctx) })) registry.AddMetric(g.outgoing.gauge) g.clientsMu.breakers = map[string]*circuit.Breaker{} log.Infof(g.ctx, "initial resolvers: %s", resolvers) g.SetResolvers(resolvers) g.mu.Lock() // Add ourselves as a SystemConfig watcher. g.mu.is.registerCallback(KeySystemConfig, g.updateSystemConfig) // Add ourselves as a node descriptor watcher. g.mu.is.registerCallback(MakePrefixPattern(KeyNodeIDPrefix), g.updateNodeAddress) g.mu.Unlock() RegisterGossipServer(grpcServer, g.server) return g }
func (t *parallelTest) getClient(nodeIdx, clientIdx int) *gosql.DB { for len(t.clients[nodeIdx]) <= clientIdx { // Add a client. pgURL, cleanupFunc := sqlutils.PGUrl(t.T, t.cluster.Server(nodeIdx).ServingAddr(), security.RootUser, "TestParallel") db, err := gosql.Open("postgres", pgURL.String()) if err != nil { t.Fatal(err) } sqlutils.MakeSQLRunner(t, db).Exec("SET DATABASE = test") t.cluster.Stopper().AddCloser( stop.CloserFn(func() { _ = db.Close() cleanupFunc() })) t.clients[nodeIdx] = append(t.clients[nodeIdx], db) } return t.clients[nodeIdx][clientIdx] }
// Starts up a cluster made of up `nodes` in-memory testing servers, // creates database `name and returns open gosql.DB connections to each // node (to the named db), as well as a cleanup func that stops and // cleans up all nodes and connections. func SetupMultinodeTestCluster( t testing.TB, nodes int, name string, ) (MultinodeTestCluster, []*gosql.DB, *stop.Stopper) { if nodes < 1 { t.Fatal("invalid cluster size: ", nodes) } stopper := stop.NewStopper() // Force all ranges to be replicated everywhere. This is needed until #7297 is // fixed, otherwise starting a cluster takes forever. cfg := config.DefaultZoneConfig() cfg.ReplicaAttrs = make([]roachpb.Attributes, nodes) fn := config.TestingSetDefaultZoneConfig(cfg) stopper.AddCloser(stop.CloserFn(fn)) var servers []serverutils.TestServerInterface var conns []*gosql.DB args := base.TestServerArgs{ Stopper: stopper, PartOfCluster: true, UseDatabase: name, } first, conn, _ := serverutils.StartServer(t, args) servers = append(servers, first) conns = append(conns, conn) args.JoinAddr = first.ServingAddr() for i := 1; i < nodes; i++ { s, conn, _ := serverutils.StartServer(t, args) servers = append(servers, s) conns = append(conns, conn) } if _, err := conns[0].Exec(fmt.Sprintf(`CREATE DATABASE %s`, name)); err != nil { t.Fatal(err) } testCluster := MultinodeTestCluster{Servers: servers} return testCluster, conns, first.Stopper() }
// StartServer creates a test server and sets up a gosql DB connection. // The server should be stopped by calling server.Stopper().Stop(). func StartServer(t testing.TB, params base.TestServerArgs) ( TestServerInterface, *gosql.DB, *client.DB, ) { server, err := StartServerRaw(params) if err != nil { t.Fatal(err) } kvClient := server.KVClient().(*client.DB) pgURL, cleanupGoDB := sqlutils.PGUrl( t, server.ServingAddr(), security.RootUser, "StartServer") pgURL.Path = params.UseDatabase goDB, err := gosql.Open("postgres", pgURL.String()) if err != nil { t.Fatal(err) } server.Stopper().AddCloser( stop.CloserFn(func() { _ = goDB.Close() cleanupGoDB() })) return server, goDB, kvClient }
// Start starts the server on the specified port, starts gossip and initializes // the node using the engines from the server's context. // // The passed context can be used to trace the server startup. The context // should represent the general startup operation, and is different from // contexts used at runtime for server's background work (like `s.Ctx()`). func (s *Server) Start(ctx context.Context) error { // Copy log tags from s.Ctx() ctx = log.WithLogTagsFromCtx(ctx, s.Ctx()) tlsConfig, err := s.ctx.GetServerTLSConfig() if err != nil { return err } httpServer := netutil.MakeServer(s.stopper, tlsConfig, s) plainRedirectServer := netutil.MakeServer(s.stopper, tlsConfig, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "https://"+r.Host+r.RequestURI, http.StatusPermanentRedirect) })) // The following code is a specialization of util/net.go's ListenAndServe // which adds pgwire support. A single port is used to serve all protocols // (pg, http, h2) via the following construction: // // non-TLS case: // net.Listen -> cmux.New // | // - -> pgwire.Match -> pgwire.Server.ServeConn // - -> cmux.Any -> grpc.(*Server).Serve // // TLS case: // net.Listen -> cmux.New // | // - -> pgwire.Match -> pgwire.Server.ServeConn // - -> cmux.Any -> grpc.(*Server).Serve // // Note that the difference between the TLS and non-TLS cases exists due to // Go's lack of an h2c (HTTP2 Clear Text) implementation. See inline comments // in util.ListenAndServe for an explanation of how h2c is implemented there // and here. ln, err := net.Listen("tcp", s.ctx.Addr) if err != nil { return err } log.Tracef(ctx, "listening on port %s", s.ctx.Addr) unresolvedAddr, err := officialAddr(s.ctx.Addr, ln.Addr()) if err != nil { return err } s.ctx.Addr = unresolvedAddr.String() s.rpcContext.SetLocalInternalServer(s.node) m := cmux.New(ln) pgL := m.Match(pgwire.Match) anyL := m.Match(cmux.Any()) httpLn, err := net.Listen("tcp", s.ctx.HTTPAddr) if err != nil { return err } unresolvedHTTPAddr, err := officialAddr(s.ctx.HTTPAddr, httpLn.Addr()) if err != nil { return err } s.ctx.HTTPAddr = unresolvedHTTPAddr.String() s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() if err := httpLn.Close(); err != nil { log.Fatal(s.Ctx(), err) } }) if tlsConfig != nil { httpMux := cmux.New(httpLn) clearL := httpMux.Match(cmux.HTTP1()) tlsL := httpMux.Match(cmux.Any()) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpMux.Serve()) }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(plainRedirectServer.Serve(clearL)) }) httpLn = tls.NewListener(tlsL, tlsConfig) } s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpServer.Serve(httpLn)) }) s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() netutil.FatalIfUnexpected(anyL.Close()) <-s.stopper.ShouldStop() s.grpc.Stop() }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(s.grpc.Serve(anyL)) }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpServer.ServeWith(s.stopper, pgL, func(conn net.Conn) { if err := s.pgServer.ServeConn(conn); err != nil && !netutil.IsClosedConnection(err) { log.Error(s.Ctx(), err) } })) }) if len(s.ctx.SocketFile) != 0 { // Unix socket enabled: postgres protocol only. unixLn, err := net.Listen("unix", s.ctx.SocketFile) if err != nil { return err } s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() if err := unixLn.Close(); err != nil { log.Fatal(s.Ctx(), err) } }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpServer.ServeWith(s.stopper, unixLn, func(conn net.Conn) { if err := s.pgServer.ServeConn(conn); err != nil && !netutil.IsClosedConnection(err) { log.Error(s.Ctx(), err) } })) }) } // Enable the debug endpoints first to provide an earlier window // into what's going on with the node in advance of exporting node // functionality. // TODO(marc): when cookie-based authentication exists, // apply it for all web endpoints. s.mux.HandleFunc(debugEndpoint, http.HandlerFunc(handleDebug)) s.gossip.Start(unresolvedAddr) log.Trace(ctx, "started gossip") if err := s.node.start(ctx, unresolvedAddr, s.ctx.Engines, s.ctx.NodeAttributes); err != nil { return err } log.Trace(ctx, "started node") // Set the NodeID in the base context (which was inherited by the // various components of the server). s.nodeLogTagVal.Set(int64(s.node.Descriptor.NodeID)) // We can now add the node registry. s.recorder.AddNode(s.registry, s.node.Descriptor, s.node.startedAt) // Begin recording runtime statistics. s.startSampleEnvironment(s.ctx.MetricsSampleInterval) // Begin recording time series data collected by the status monitor. s.tsDB.PollSource(s.recorder, s.ctx.MetricsSampleInterval, ts.Resolution10s, s.stopper) // Begin recording status summaries. s.node.startWriteSummaries(s.ctx.MetricsSampleInterval) s.sqlExecutor.SetNodeID(s.node.Descriptor.NodeID) // Create and start the schema change manager only after a NodeID // has been assigned. testingKnobs := new(sql.SchemaChangeManagerTestingKnobs) if s.ctx.TestingKnobs.SQLSchemaChangeManager != nil { testingKnobs = s.ctx.TestingKnobs.SQLSchemaChangeManager.(*sql.SchemaChangeManagerTestingKnobs) } sql.NewSchemaChangeManager(testingKnobs, *s.db, s.gossip, s.leaseMgr).Start(s.stopper) log.Infof(s.Ctx(), "starting %s server at %s", s.ctx.HTTPRequestScheme(), unresolvedHTTPAddr) log.Infof(s.Ctx(), "starting grpc/postgres server at %s", unresolvedAddr) if len(s.ctx.SocketFile) != 0 { log.Infof(s.Ctx(), "starting postgres server at unix:%s", s.ctx.SocketFile) } s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(m.Serve()) }) log.Trace(ctx, "accepting connections") // Initialize grpc-gateway mux and context. jsonpb := &util.JSONPb{ EnumsAsInts: true, EmitDefaults: true, Indent: " ", } protopb := new(util.ProtoPb) gwMux := gwruntime.NewServeMux( gwruntime.WithMarshalerOption(gwruntime.MIMEWildcard, jsonpb), gwruntime.WithMarshalerOption(util.JSONContentType, jsonpb), gwruntime.WithMarshalerOption(util.AltJSONContentType, jsonpb), gwruntime.WithMarshalerOption(util.ProtoContentType, protopb), gwruntime.WithMarshalerOption(util.AltProtoContentType, protopb), ) gwCtx, gwCancel := context.WithCancel(s.Ctx()) s.stopper.AddCloser(stop.CloserFn(gwCancel)) // Setup HTTP<->gRPC handlers. conn, err := s.rpcContext.GRPCDial(s.ctx.Addr) if err != nil { return errors.Errorf("error constructing grpc-gateway: %s; are your certificates valid?", err) } for _, gw := range []grpcGatewayServer{&s.admin, s.status, &s.tsServer} { if err := gw.RegisterGateway(gwCtx, gwMux, conn); err != nil { return err } } var uiFileSystem http.FileSystem uiDebug := envutil.EnvOrDefaultBool("COCKROACH_DEBUG_UI", false) if uiDebug { uiFileSystem = http.Dir("ui") } else { uiFileSystem = &assetfs.AssetFS{ Asset: ui.Asset, AssetDir: ui.AssetDir, AssetInfo: ui.AssetInfo, } } uiFileServer := http.FileServer(uiFileSystem) s.mux.HandleFunc("/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/" { if uiDebug { r.URL.Path = "debug.html" } else { r.URL.Path = "release.html" } } uiFileServer.ServeHTTP(w, r) })) // TODO(marc): when cookie-based authentication exists, // apply it for all web endpoints. s.mux.Handle(adminEndpoint, gwMux) s.mux.Handle(ts.URLPrefix, gwMux) s.mux.Handle(statusPrefix, s.status) s.mux.Handle(healthEndpoint, s.status) log.Trace(ctx, "added http endpoints") if err := sdnotify.Ready(); err != nil { log.Errorf(s.Ctx(), "failed to signal readiness using systemd protocol: %s", err) } log.Trace(ctx, "server ready") return nil }
// Start starts the server on the specified port, starts gossip and // initializes the node using the engines from the server's context. func (s *Server) Start() error { tlsConfig, err := s.ctx.GetServerTLSConfig() if err != nil { return err } httpServer := netutil.MakeServer(s.stopper, tlsConfig, s) plainRedirectServer := netutil.MakeServer(s.stopper, tlsConfig, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // TODO(tamird): s/308/http.StatusPermanentRedirect/ when it exists. http.Redirect(w, r, "https://"+r.Host+r.RequestURI, 308) })) // The following code is a specialization of util/net.go's ListenAndServe // which adds pgwire support. A single port is used to serve all protocols // (pg, http, h2) via the following construction: // // non-TLS case: // net.Listen -> cmux.New // | // - -> pgwire.Match -> pgwire.Server.ServeConn // - -> cmux.Any -> grpc.(*Server).Serve // // TLS case: // net.Listen -> cmux.New // | // - -> pgwire.Match -> pgwire.Server.ServeConn // - -> cmux.Any -> grpc.(*Server).Serve // // Note that the difference between the TLS and non-TLS cases exists due to // Go's lack of an h2c (HTTP2 Clear Text) implementation. See inline comments // in util.ListenAndServe for an explanation of how h2c is implemented there // and here. ln, err := net.Listen("tcp", s.ctx.Addr) if err != nil { return err } unresolvedAddr, err := officialAddr(s.ctx.Addr, ln.Addr()) if err != nil { return err } s.ctx.Addr = unresolvedAddr.String() s.rpcContext.SetLocalInternalServer(s.node) s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() if err := ln.Close(); err != nil { log.Fatal(err) } }) m := cmux.New(ln) pgL := m.Match(pgwire.Match) anyL := m.Match(cmux.Any()) httpLn, err := net.Listen("tcp", s.ctx.HTTPAddr) if err != nil { return err } unresolvedHTTPAddr, err := officialAddr(s.ctx.HTTPAddr, httpLn.Addr()) if err != nil { return err } s.ctx.HTTPAddr = unresolvedHTTPAddr.String() s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() if err := httpLn.Close(); err != nil { log.Fatal(err) } }) if tlsConfig != nil { httpMux := cmux.New(httpLn) clearL := httpMux.Match(cmux.HTTP1()) tlsL := httpMux.Match(cmux.Any()) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpMux.Serve()) }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(plainRedirectServer.Serve(clearL)) }) httpLn = tls.NewListener(tlsL, tlsConfig) } s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpServer.Serve(httpLn)) }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(s.grpc.Serve(anyL)) }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpServer.ServeWith(pgL, func(conn net.Conn) { if err := s.pgServer.ServeConn(conn); err != nil && !netutil.IsClosedConnection(err) { log.Error(err) } })) }) if len(s.ctx.SocketFile) != 0 { // Unix socket enabled: postgres protocol only. unixLn, err := net.Listen("unix", s.ctx.SocketFile) if err != nil { return err } s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() if err := unixLn.Close(); err != nil { log.Fatal(err) } }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpServer.ServeWith(unixLn, func(conn net.Conn) { if err := s.pgServer.ServeConn(conn); err != nil && !netutil.IsClosedConnection(err) { log.Error(err) } })) }) } s.gossip.Start(s.grpc, unresolvedAddr) if err := s.node.start(unresolvedAddr, s.ctx.Engines, s.ctx.NodeAttributes); err != nil { return err } // Begin recording runtime statistics. s.startSampleEnvironment(s.ctx.MetricsSampleInterval) // Begin recording time series data collected by the status monitor. s.tsDB.PollSource(s.recorder, s.ctx.MetricsSampleInterval, ts.Resolution10s, s.stopper) // Begin recording status summaries. s.node.startWriteSummaries(s.ctx.MetricsSampleInterval) s.sqlExecutor.SetNodeID(s.node.Descriptor.NodeID) // Create and start the schema change manager only after a NodeID // has been assigned. testingKnobs := new(sql.SchemaChangeManagerTestingKnobs) if s.ctx.TestingKnobs.SQLSchemaChangeManager != nil { testingKnobs = s.ctx.TestingKnobs.SQLSchemaChangeManager.(*sql.SchemaChangeManagerTestingKnobs) } sql.NewSchemaChangeManager(testingKnobs, *s.db, s.gossip, s.leaseMgr).Start(s.stopper) log.Infof("starting %s server at %s", s.ctx.HTTPRequestScheme(), unresolvedHTTPAddr) log.Infof("starting grpc/postgres server at %s", unresolvedAddr) if len(s.ctx.SocketFile) != 0 { log.Infof("starting postgres server at unix:%s", s.ctx.SocketFile) } s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(m.Serve()) }) // Initialize grpc-gateway mux and context. jsonpb := &util.JSONPb{ EnumsAsInts: true, EmitDefaults: true, Indent: " ", } protopb := new(util.ProtoPb) gwMux := gwruntime.NewServeMux( gwruntime.WithMarshalerOption(gwruntime.MIMEWildcard, jsonpb), gwruntime.WithMarshalerOption(util.JSONContentType, jsonpb), gwruntime.WithMarshalerOption(util.AltJSONContentType, jsonpb), gwruntime.WithMarshalerOption(util.ProtoContentType, protopb), gwruntime.WithMarshalerOption(util.AltProtoContentType, protopb), ) gwCtx, gwCancel := context.WithCancel(context.Background()) s.stopper.AddCloser(stop.CloserFn(gwCancel)) // Setup HTTP<->gRPC handlers. var opts []grpc.DialOption if s.ctx.Insecure { opts = append(opts, grpc.WithInsecure()) } else { tlsConfig, err := s.ctx.GetClientTLSConfig() if err != nil { return err } opts = append( opts, // TODO(tamird): remove this timeout. It is currently necessary because // GRPC will not actually bail on a bad certificate error - it will just // retry indefinitely. See https://github.com/grpc/grpc-go/issues/622. grpc.WithTimeout(base.NetworkTimeout), grpc.WithBlock(), grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig)), ) } conn, err := s.rpcContext.GRPCDial(s.ctx.Addr, opts...) if err != nil { return errors.Errorf("error constructing grpc-gateway: %s; are your certificates valid?", err) } for _, gw := range []grpcGatewayServer{&s.admin, s.status, &s.tsServer} { if err := gw.RegisterGateway(gwCtx, gwMux, conn); err != nil { return err } } var uiFileSystem http.FileSystem uiDebug := envutil.EnvOrDefaultBool("debug_ui", false) if uiDebug { uiFileSystem = http.Dir("ui") } else { uiFileSystem = &assetfs.AssetFS{ Asset: ui.Asset, AssetDir: ui.AssetDir, AssetInfo: ui.AssetInfo, } } uiFileServer := http.FileServer(uiFileSystem) s.mux.HandleFunc("/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/" { if uiDebug { r.URL.Path = "debug.html" } else { r.URL.Path = "release.html" } } uiFileServer.ServeHTTP(w, r) })) // TODO(marc): when cookie-based authentication exists, // apply it for all web endpoints. s.mux.HandleFunc(debugEndpoint, http.HandlerFunc(handleDebug)) s.mux.Handle(adminEndpoint, gwMux) s.mux.Handle(ts.URLPrefix, gwMux) s.mux.Handle(statusPrefix, s.status) s.mux.Handle(healthEndpoint, s.status) if err := sdnotify.Ready(); err != nil { log.Errorf("failed to signal readiness using systemd protocol: %s", err) } return nil }
func initBacktrace(logDir string) *stop.Stopper { const ptracePath = "/opt/backtrace/bin/ptrace" if _, err := os.Stat(ptracePath); err != nil { log.Infof(context.TODO(), "backtrace disabled: %s", err) return stop.NewStopper() } if err := bcd.EnableTracing(); err != nil { log.Infof(context.TODO(), "unable to enable backtrace: %s", err) return stop.NewStopper() } bcd.UpdateConfig(bcd.GlobalConfig{ PanicOnKillFailure: true, ResendSignal: true, RateLimit: time.Second * 3, SynchronousPut: true, }) // Use the default tracer implementation. // false: Exclude system goroutines. tracer := bcd.New(bcd.NewOptions{ IncludeSystemGs: false, }) if err := tracer.SetOutputPath(logDir, 0755); err != nil { log.Infof(context.TODO(), "unable to set output path: %s", err) // Not a fatal error, continue. } // Enable WARNING log output from the tracer. tracer.AddOptions(nil, "-L", "WARNING") info := build.GetInfo() tracer.AddKV(nil, "cgo-compiler", info.CgoCompiler) tracer.AddKV(nil, "go-version", info.GoVersion) tracer.AddKV(nil, "platform", info.Platform) tracer.AddKV(nil, "tag", info.Tag) tracer.AddKV(nil, "time", info.Time) // Register for traces on signal reception. tracer.SetSigset( []os.Signal{ syscall.SIGABRT, syscall.SIGFPE, syscall.SIGSEGV, syscall.SIGILL, syscall.SIGBUS}...) bcd.Register(tracer) // Hook log.Fatal*. log.SetExitFunc(func(code int) { _ = bcd.Trace(tracer, fmt.Errorf("exit %d", code), nil) os.Exit(code) }) stopper := stop.NewStopper(stop.OnPanic(func(val interface{}) { err, ok := val.(error) if !ok { err = fmt.Errorf("%v", val) } _ = bcd.Trace(tracer, err, nil) panic(val) })) // Internally, backtrace uses an external program (/opt/backtrace/bin/ptrace) // to generate traces. We direct the stdout for this program to a file for // debugging our usage of backtrace. if f, err := os.OpenFile(filepath.Join(logDir, "backtrace.out"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666); err != nil { log.Infof(context.TODO(), "unable to open: %s", err) } else { stopper.AddCloser(stop.CloserFn(func() { f.Close() })) tracer.SetPipes(nil, f) } tracer.SetLogLevel(bcd.LogMax) log.Infof(context.TODO(), "backtrace enabled") return stopper }