// TestingSetupZoneConfigHook initializes the zone config hook // to 'testingZoneConfigHook' which uses 'testingZoneConfig'. // Settings go back to their previous values when the stopper runs our closer. func TestingSetupZoneConfigHook(stopper *stop.Stopper) { stopper.AddCloser(stop.CloserFn(testingResetZoneConfigHook)) testingLock.Lock() defer testingLock.Unlock() if testingHasHook { panic("TestingSetupZoneConfigHook called without restoring state") } testingHasHook = true testingZoneConfig = make(zoneConfigMap) testingPreviousHook = ZoneConfigHook ZoneConfigHook = testingZoneConfigHook testingLargestIDHook = func(maxID uint32) (max uint32) { testingLock.Lock() defer testingLock.Unlock() for id := range testingZoneConfig { if maxID > 0 && id > maxID { continue } if id > max { max = id } } return } }
// Start starts the TestServer by bootstrapping an in-memory store // (defaults to maximum of 100M). The server is started, launching the // node RPC server and all HTTP endpoints. Use the value of // TestServer.ServingAddr() after Start() for client connections. // Use TestServer.Stopper().Stop() to shutdown the server after the test // completes. func (ts *TestServer) Start(params base.TestServerArgs) error { if ts.Cfg == nil { panic("Cfg not set") } if params.Stopper == nil { params.Stopper = stop.NewStopper() } if !params.PartOfCluster { // Change the replication requirements so we don't get log spam about ranges // not being replicated enough. cfg := config.DefaultZoneConfig() cfg.NumReplicas = 1 fn := config.TestingSetDefaultZoneConfig(cfg) params.Stopper.AddCloser(stop.CloserFn(fn)) } // Needs to be called before NewServer to ensure resolvers are initialized. if err := ts.Cfg.InitNode(); err != nil { return err } var err error ts.Server, err = NewServer(*ts.Cfg, params.Stopper) if err != nil { return err } // Our context must be shared with our server. ts.Cfg = &ts.Server.cfg if err := ts.Server.Start(context.Background()); err != nil { return err } // If enabled, wait for initial splits to complete before returning control. // If initial splits do not complete, the server is stopped before // returning. if stk, ok := ts.cfg.TestingKnobs.Store.(*storage.StoreTestingKnobs); ok && stk.DisableSplitQueue { return nil } if err := ts.WaitForInitialSplits(); err != nil { ts.Stop() return err } return nil }
// New creates an instance of a gossip node. // The higher level manages the NodeIDContainer instance (which can be shared by // various server components). The ambient context is expected to already // contain the node ID. func New( ambient log.AmbientContext, nodeID *base.NodeIDContainer, rpcContext *rpc.Context, grpcServer *grpc.Server, resolvers []resolver.Resolver, stopper *stop.Stopper, registry *metric.Registry, ) *Gossip { ambient.SetEventLog("gossip", "gossip") g := &Gossip{ server: newServer(ambient, nodeID, stopper, registry), Connected: make(chan struct{}), rpcContext: rpcContext, outgoing: makeNodeSet(minPeers, metric.NewGauge(MetaConnectionsOutgoingGauge)), bootstrapping: map[string]struct{}{}, disconnected: make(chan *client, 10), stalledCh: make(chan struct{}, 1), stallInterval: defaultStallInterval, bootstrapInterval: defaultBootstrapInterval, cullInterval: defaultCullInterval, nodeDescs: map[roachpb.NodeID]*roachpb.NodeDescriptor{}, resolverAddrs: map[util.UnresolvedAddr]resolver.Resolver{}, bootstrapAddrs: map[util.UnresolvedAddr]roachpb.NodeID{}, } stopper.AddCloser(stop.CloserFn(g.server.AmbientContext.FinishEventLog)) registry.AddMetric(g.outgoing.gauge) g.clientsMu.breakers = map[string]*circuit.Breaker{} resolverAddrs := make([]string, len(resolvers)) for i, resolver := range resolvers { resolverAddrs[i] = resolver.Addr() } ctx := g.AnnotateCtx(context.Background()) if log.V(1) { log.Infof(ctx, "initial resolvers: %v", resolverAddrs) } g.SetResolvers(resolvers) g.mu.Lock() // Add ourselves as a SystemConfig watcher. g.mu.is.registerCallback(KeySystemConfig, g.updateSystemConfig) // Add ourselves as a node descriptor watcher. g.mu.is.registerCallback(MakePrefixPattern(KeyNodeIDPrefix), g.updateNodeAddress) g.mu.Unlock() RegisterGossipServer(grpcServer, g.server) return g }
func (t *parallelTest) getClient(nodeIdx, clientIdx int) *gosql.DB { for len(t.clients[nodeIdx]) <= clientIdx { // Add a client. pgURL, cleanupFunc := sqlutils.PGUrl(t.T, t.cluster.Server(nodeIdx).ServingAddr(), "TestParallel", url.User(security.RootUser)) db, err := gosql.Open("postgres", pgURL.String()) if err != nil { t.Fatal(err) } sqlutils.MakeSQLRunner(t, db).Exec("SET DATABASE = test") t.cluster.Stopper().AddCloser( stop.CloserFn(func() { _ = db.Close() cleanupFunc() })) t.clients[nodeIdx] = append(t.clients[nodeIdx], db) } return t.clients[nodeIdx][clientIdx] }
func testRepairInner(ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { dc := newDynamicClient(c, stopper) stopper.AddCloser(stop.CloserFn(func() { dc.Close(ctx) })) // Add some loads. for i := 0; i < c.NumNodes()*2; i++ { ID := i stopper.RunWorker(func() { insertLoad(ctx, t, dc, ID) }) } // TODO(bram): #5345 add repair mechanism. select { case <-stopper.ShouldStop(): case <-time.After(cfg.Duration): } }
// StartServer creates a test server and sets up a gosql DB connection. // The server should be stopped by calling server.Stopper().Stop(). func StartServer( t testing.TB, params base.TestServerArgs, ) (TestServerInterface, *gosql.DB, *client.DB) { server, err := StartServerRaw(params) if err != nil { t.Fatal(err) } kvClient := server.KVClient().(*client.DB) pgURL, cleanupGoDB := sqlutils.PGUrl( t, server.ServingAddr(), "StartServer", url.User(security.RootUser)) pgURL.Path = params.UseDatabase goDB, err := gosql.Open("postgres", pgURL.String()) if err != nil { t.Fatal(err) } server.Stopper().AddCloser( stop.CloserFn(func() { _ = goDB.Close() cleanupGoDB() })) return server, goDB, kvClient }
func initBacktrace(logDir string) *stop.Stopper { const ptracePath = "/opt/backtrace/bin/ptrace" if _, err := os.Stat(ptracePath); err != nil { log.Infof(context.TODO(), "backtrace disabled: %s", err) return stop.NewStopper() } if err := bcd.EnableTracing(); err != nil { log.Infof(context.TODO(), "unable to enable backtrace: %s", err) return stop.NewStopper() } bcd.UpdateConfig(bcd.GlobalConfig{ PanicOnKillFailure: true, ResendSignal: true, RateLimit: time.Second * 3, SynchronousPut: true, }) // Use the default tracer implementation. // false: Exclude system goroutines. tracer := bcd.New(bcd.NewOptions{ IncludeSystemGs: false, }) if err := tracer.SetOutputPath(logDir, 0755); err != nil { log.Infof(context.TODO(), "unable to set output path: %s", err) // Not a fatal error, continue. } // Enable WARNING log output from the tracer. tracer.AddOptions(nil, "-L", "WARNING") info := build.GetInfo() tracer.AddKV(nil, "cgo-compiler", info.CgoCompiler) tracer.AddKV(nil, "go-version", info.GoVersion) tracer.AddKV(nil, "platform", info.Platform) tracer.AddKV(nil, "tag", info.Tag) tracer.AddKV(nil, "time", info.Time) // Register for traces on signal reception. tracer.SetSigset( []os.Signal{ syscall.SIGABRT, syscall.SIGFPE, syscall.SIGSEGV, syscall.SIGILL, syscall.SIGBUS}...) bcd.Register(tracer) // Hook log.Fatal*. log.SetExitFunc(func(code int) { _ = bcd.Trace(tracer, fmt.Errorf("exit %d", code), nil) os.Exit(code) }) stopper := stop.NewStopper(stop.OnPanic(func(val interface{}) { err, ok := val.(error) if !ok { err = fmt.Errorf("%v", val) } _ = bcd.Trace(tracer, err, nil) panic(val) })) // Internally, backtrace uses an external program (/opt/backtrace/bin/ptrace) // to generate traces. We direct the stdout for this program to a file for // debugging our usage of backtrace. if f, err := os.OpenFile(filepath.Join(logDir, "backtrace.out"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666); err != nil { log.Infof(context.TODO(), "unable to open: %s", err) } else { stopper.AddCloser(stop.CloserFn(func() { f.Close() })) tracer.SetPipes(nil, f) } tracer.SetLogLevel(bcd.LogMax) log.Infof(context.TODO(), "backtrace enabled") return stopper }
// StartTestCluster starts up a TestCluster made up of `nodes` in-memory testing // servers. // The cluster should be stopped using cluster.Stop(). func StartTestCluster(t testing.TB, nodes int, args base.TestClusterArgs) *TestCluster { if nodes < 1 { t.Fatal("invalid cluster size: ", nodes) } if args.ServerArgs.JoinAddr != "" { t.Fatal("can't specify a join addr when starting a cluster") } if args.ServerArgs.Stopper != nil { t.Fatal("can't set individual server stoppers when starting a cluster") } storeKnobs := args.ServerArgs.Knobs.Store if storeKnobs != nil && (storeKnobs.(*storage.StoreTestingKnobs).DisableSplitQueue || storeKnobs.(*storage.StoreTestingKnobs).DisableReplicateQueue) { t.Fatal("can't disable an individual server's queues when starting a cluster; " + "the cluster controls replication") } switch args.ReplicationMode { case base.ReplicationAuto: case base.ReplicationManual: if args.ServerArgs.Knobs.Store == nil { args.ServerArgs.Knobs.Store = &storage.StoreTestingKnobs{} } storeKnobs := args.ServerArgs.Knobs.Store.(*storage.StoreTestingKnobs) storeKnobs.DisableSplitQueue = true storeKnobs.DisableReplicateQueue = true default: t.Fatal("unexpected replication mode") } tc := &TestCluster{} tc.stopper = stop.NewStopper() for i := 0; i < nodes; i++ { var serverArgs base.TestServerArgs if perNodeServerArgs, ok := args.ServerArgsPerNode[i]; ok { serverArgs = perNodeServerArgs } else { serverArgs = args.ServerArgs } serverArgs.PartOfCluster = true if i > 0 { serverArgs.JoinAddr = tc.Servers[0].ServingAddr() } tc.AddServer(t, serverArgs) } // Create a closer that will stop the individual server stoppers when the // cluster stopper is stopped. tc.stopper.AddCloser(stop.CloserFn(tc.stopServers)) tc.WaitForStores(t, tc.Servers[0].Gossip()) // TODO(peter): We should replace the hardcoded 3 with the default ZoneConfig // replication factor. if args.ReplicationMode == base.ReplicationAuto && nodes >= 3 { if err := tc.waitForFullReplication(); err != nil { t.Fatal(err) } } return tc }
// Start starts the server on the specified port, starts gossip and initializes // the node using the engines from the server's context. // // The passed context can be used to trace the server startup. The context // should represent the general startup operation. func (s *Server) Start(ctx context.Context) error { ctx = s.AnnotateCtx(ctx) startTime := timeutil.Now() tlsConfig, err := s.cfg.GetServerTLSConfig() if err != nil { return err } httpServer := netutil.MakeServer(s.stopper, tlsConfig, s) plainRedirectServer := netutil.MakeServer(s.stopper, tlsConfig, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "https://"+r.Host+r.RequestURI, http.StatusPermanentRedirect) })) // The following code is a specialization of util/net.go's ListenAndServe // which adds pgwire support. A single port is used to serve all protocols // (pg, http, h2) via the following construction: // // non-TLS case: // net.Listen -> cmux.New // | // - -> pgwire.Match -> pgwire.Server.ServeConn // - -> cmux.Any -> grpc.(*Server).Serve // // TLS case: // net.Listen -> cmux.New // | // - -> pgwire.Match -> pgwire.Server.ServeConn // - -> cmux.Any -> grpc.(*Server).Serve // // Note that the difference between the TLS and non-TLS cases exists due to // Go's lack of an h2c (HTTP2 Clear Text) implementation. See inline comments // in util.ListenAndServe for an explanation of how h2c is implemented there // and here. ln, err := net.Listen("tcp", s.cfg.Addr) if err != nil { return err } log.Eventf(ctx, "listening on port %s", s.cfg.Addr) unresolvedListenAddr, err := officialAddr(s.cfg.Addr, ln.Addr()) if err != nil { return err } s.cfg.Addr = unresolvedListenAddr.String() unresolvedAdvertAddr, err := officialAddr(s.cfg.AdvertiseAddr, ln.Addr()) if err != nil { return err } s.cfg.AdvertiseAddr = unresolvedAdvertAddr.String() s.rpcContext.SetLocalInternalServer(s.node) m := cmux.New(ln) pgL := m.Match(pgwire.Match) anyL := m.Match(cmux.Any()) httpLn, err := net.Listen("tcp", s.cfg.HTTPAddr) if err != nil { return err } unresolvedHTTPAddr, err := officialAddr(s.cfg.HTTPAddr, httpLn.Addr()) if err != nil { return err } s.cfg.HTTPAddr = unresolvedHTTPAddr.String() workersCtx := s.AnnotateCtx(context.Background()) s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() if err := httpLn.Close(); err != nil { log.Fatal(workersCtx, err) } }) if tlsConfig != nil { httpMux := cmux.New(httpLn) clearL := httpMux.Match(cmux.HTTP1()) tlsL := httpMux.Match(cmux.Any()) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpMux.Serve()) }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(plainRedirectServer.Serve(clearL)) }) httpLn = tls.NewListener(tlsL, tlsConfig) } s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpServer.Serve(httpLn)) }) s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() netutil.FatalIfUnexpected(anyL.Close()) <-s.stopper.ShouldStop() s.grpc.Stop() }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(s.grpc.Serve(anyL)) }) s.stopper.RunWorker(func() { pgCtx := s.pgServer.AmbientCtx.AnnotateCtx(context.Background()) netutil.FatalIfUnexpected(httpServer.ServeWith(s.stopper, pgL, func(conn net.Conn) { connCtx := log.WithLogTagStr(pgCtx, "client", conn.RemoteAddr().String()) if err := s.pgServer.ServeConn(connCtx, conn); err != nil && !netutil.IsClosedConnection(err) { // Report the error on this connection's context, so that we // know which remote client caused the error when looking at // the logs. log.Error(connCtx, err) } })) }) if len(s.cfg.SocketFile) != 0 { // Unix socket enabled: postgres protocol only. unixLn, err := net.Listen("unix", s.cfg.SocketFile) if err != nil { return err } s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() if err := unixLn.Close(); err != nil { log.Fatal(workersCtx, err) } }) s.stopper.RunWorker(func() { pgCtx := s.pgServer.AmbientCtx.AnnotateCtx(context.Background()) netutil.FatalIfUnexpected(httpServer.ServeWith(s.stopper, unixLn, func(conn net.Conn) { connCtx := log.WithLogTagStr(pgCtx, "client", conn.RemoteAddr().String()) if err := s.pgServer.ServeConn(connCtx, conn); err != nil && !netutil.IsClosedConnection(err) { // Report the error on this connection's context, so that we // know which remote client caused the error when looking at // the logs. log.Error(connCtx, err) } })) }) } // Enable the debug endpoints first to provide an earlier window // into what's going on with the node in advance of exporting node // functionality. // TODO(marc): when cookie-based authentication exists, // apply it for all web endpoints. s.mux.HandleFunc(debugEndpoint, http.HandlerFunc(handleDebug)) s.gossip.Start(unresolvedAdvertAddr) log.Event(ctx, "started gossip") s.engines, err = s.cfg.CreateEngines() if err != nil { return errors.Wrap(err, "failed to create engines") } s.stopper.AddCloser(&s.engines) // We might have to sleep a bit to protect against this node producing non- // monotonic timestamps. Before restarting, its clock might have been driven // by other nodes' fast clocks, but when we restarted, we lost all this // information. For example, a client might have written a value at a // timestamp that's in the future of the restarted node's clock, and if we // don't do something, the same client's read would not return the written // value. So, we wait up to MaxOffset; we couldn't have served timestamps more // than MaxOffset in the future (assuming that MaxOffset was not changed, see // #9733). // // As an optimization for tests, we don't sleep if all the stores are brand // new. In this case, the node will not serve anything anyway until it // synchronizes with other nodes. { anyStoreBootstrapped := false for _, e := range s.engines { if _, err := storage.ReadStoreIdent(ctx, e); err != nil { // NotBootstrappedError is expected. if _, ok := err.(*storage.NotBootstrappedError); !ok { return err } } else { anyStoreBootstrapped = true break } } if anyStoreBootstrapped { sleepDuration := s.clock.MaxOffset() - timeutil.Since(startTime) if sleepDuration > 0 { log.Infof(ctx, "sleeping for %s to guarantee HLC monotonicity", sleepDuration) time.Sleep(sleepDuration) } } } // Now that we have a monotonic HLC wrt previous incarnations of the process, // init all the replicas. err = s.node.start( ctx, unresolvedAdvertAddr, s.engines, s.cfg.NodeAttributes, s.cfg.Locality, ) if err != nil { return err } log.Event(ctx, "started node") s.nodeLiveness.StartHeartbeat(ctx, s.stopper) // We can now add the node registry. s.recorder.AddNode(s.registry, s.node.Descriptor, s.node.startedAt) // Begin recording runtime statistics. s.startSampleEnvironment(s.cfg.MetricsSampleInterval) // Begin recording time series data collected by the status monitor. s.tsDB.PollSource( s.cfg.AmbientCtx, s.recorder, s.cfg.MetricsSampleInterval, ts.Resolution10s, s.stopper, ) // Begin recording status summaries. s.node.startWriteSummaries(s.cfg.MetricsSampleInterval) // Create and start the schema change manager only after a NodeID // has been assigned. testingKnobs := &sql.SchemaChangerTestingKnobs{} if s.cfg.TestingKnobs.SQLSchemaChanger != nil { testingKnobs = s.cfg.TestingKnobs.SQLSchemaChanger.(*sql.SchemaChangerTestingKnobs) } sql.NewSchemaChangeManager(testingKnobs, *s.db, s.gossip, s.leaseMgr).Start(s.stopper) s.distSQLServer.Start() log.Infof(ctx, "starting %s server at %s", s.cfg.HTTPRequestScheme(), unresolvedHTTPAddr) log.Infof(ctx, "starting grpc/postgres server at %s", unresolvedListenAddr) log.Infof(ctx, "advertising CockroachDB node at %s", unresolvedAdvertAddr) if len(s.cfg.SocketFile) != 0 { log.Infof(ctx, "starting postgres server at unix:%s", s.cfg.SocketFile) } s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(m.Serve()) }) log.Event(ctx, "accepting connections") // Initialize grpc-gateway mux and context. jsonpb := &protoutil.JSONPb{ EnumsAsInts: true, EmitDefaults: true, Indent: " ", } protopb := new(protoutil.ProtoPb) gwMux := gwruntime.NewServeMux( gwruntime.WithMarshalerOption(gwruntime.MIMEWildcard, jsonpb), gwruntime.WithMarshalerOption(httputil.JSONContentType, jsonpb), gwruntime.WithMarshalerOption(httputil.AltJSONContentType, jsonpb), gwruntime.WithMarshalerOption(httputil.ProtoContentType, protopb), gwruntime.WithMarshalerOption(httputil.AltProtoContentType, protopb), ) gwCtx, gwCancel := context.WithCancel(s.AnnotateCtx(context.Background())) s.stopper.AddCloser(stop.CloserFn(gwCancel)) // Setup HTTP<->gRPC handlers. conn, err := s.rpcContext.GRPCDial(s.cfg.Addr) if err != nil { return errors.Errorf("error constructing grpc-gateway: %s; are your certificates valid?", err) } for _, gw := range []grpcGatewayServer{s.admin, s.status, &s.tsServer} { if err := gw.RegisterGateway(gwCtx, gwMux, conn); err != nil { return err } } var uiFileSystem http.FileSystem uiDebug := envutil.EnvOrDefaultBool("COCKROACH_DEBUG_UI", false) if uiDebug { uiFileSystem = http.Dir("pkg/ui") } else { uiFileSystem = &assetfs.AssetFS{ Asset: ui.Asset, AssetDir: ui.AssetDir, AssetInfo: ui.AssetInfo, } } uiFileServer := http.FileServer(uiFileSystem) s.mux.HandleFunc("/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/" { if uiDebug { r.URL.Path = "debug.html" } else { r.URL.Path = "release.html" } } uiFileServer.ServeHTTP(w, r) })) // TODO(marc): when cookie-based authentication exists, // apply it for all web endpoints. s.mux.Handle(adminPrefix, gwMux) s.mux.Handle(ts.URLPrefix, gwMux) s.mux.Handle(statusPrefix, gwMux) s.mux.Handle("/health", gwMux) s.mux.Handle(statusVars, http.HandlerFunc(s.status.handleVars)) log.Event(ctx, "added http endpoints") if err := sdnotify.Ready(); err != nil { log.Errorf(ctx, "failed to signal readiness using systemd protocol: %s", err) } log.Event(ctx, "server ready") return nil }