// CreateNode creates a simulation node and starts an RPC server for it. func (n *Network) CreateNode() (*Node, error) { server := rpc.NewServer(n.rpcContext) ln, err := net.Listen(util.TestAddr.Network(), util.TestAddr.String()) if err != nil { return nil, err } n.Stopper.RunWorker(func() { <-n.Stopper.ShouldQuiesce() netutil.FatalIfUnexpected(ln.Close()) <-n.Stopper.ShouldStop() server.Stop() }) node := &Node{Server: server, Listener: ln, Registry: metric.NewRegistry()} node.Gossip = gossip.New(n.rpcContext, server, nil, n.Stopper, node.Registry) n.Nodes = append(n.Nodes, node) return node, nil }
// StartNode initializes a gossip instance for the simulation node and // starts it. func (n *Network) StartNode(node *Node) error { node.Gossip.Start(node.Addr()) node.Gossip.EnableSimulationCycler(true) n.nodeIDAllocator++ node.Gossip.SetNodeID(n.nodeIDAllocator) if err := node.Gossip.SetNodeDescriptor(&roachpb.NodeDescriptor{ NodeID: node.Gossip.GetNodeID(), Address: util.MakeUnresolvedAddr(node.Addr().Network(), node.Addr().String()), }); err != nil { return err } if err := node.Gossip.AddInfo(node.Addr().String(), encoding.EncodeUint64Ascending(nil, 0), time.Hour); err != nil { return err } n.Stopper.RunWorker(func() { netutil.FatalIfUnexpected(node.Server.Serve(node.Listener)) }) return nil }
// Start starts the server on the specified port, starts gossip and initializes // the node using the engines from the server's context. // // The passed context can be used to trace the server startup. The context // should represent the general startup operation, and is different from // contexts used at runtime for server's background work (like `s.Ctx()`). func (s *Server) Start(ctx context.Context) error { // Copy log tags from s.Ctx() ctx = log.WithLogTagsFromCtx(ctx, s.Ctx()) tlsConfig, err := s.ctx.GetServerTLSConfig() if err != nil { return err } httpServer := netutil.MakeServer(s.stopper, tlsConfig, s) plainRedirectServer := netutil.MakeServer(s.stopper, tlsConfig, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "https://"+r.Host+r.RequestURI, http.StatusPermanentRedirect) })) // The following code is a specialization of util/net.go's ListenAndServe // which adds pgwire support. A single port is used to serve all protocols // (pg, http, h2) via the following construction: // // non-TLS case: // net.Listen -> cmux.New // | // - -> pgwire.Match -> pgwire.Server.ServeConn // - -> cmux.Any -> grpc.(*Server).Serve // // TLS case: // net.Listen -> cmux.New // | // - -> pgwire.Match -> pgwire.Server.ServeConn // - -> cmux.Any -> grpc.(*Server).Serve // // Note that the difference between the TLS and non-TLS cases exists due to // Go's lack of an h2c (HTTP2 Clear Text) implementation. See inline comments // in util.ListenAndServe for an explanation of how h2c is implemented there // and here. ln, err := net.Listen("tcp", s.ctx.Addr) if err != nil { return err } log.Tracef(ctx, "listening on port %s", s.ctx.Addr) unresolvedAddr, err := officialAddr(s.ctx.Addr, ln.Addr()) if err != nil { return err } s.ctx.Addr = unresolvedAddr.String() s.rpcContext.SetLocalInternalServer(s.node) m := cmux.New(ln) pgL := m.Match(pgwire.Match) anyL := m.Match(cmux.Any()) httpLn, err := net.Listen("tcp", s.ctx.HTTPAddr) if err != nil { return err } unresolvedHTTPAddr, err := officialAddr(s.ctx.HTTPAddr, httpLn.Addr()) if err != nil { return err } s.ctx.HTTPAddr = unresolvedHTTPAddr.String() s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() if err := httpLn.Close(); err != nil { log.Fatal(s.Ctx(), err) } }) if tlsConfig != nil { httpMux := cmux.New(httpLn) clearL := httpMux.Match(cmux.HTTP1()) tlsL := httpMux.Match(cmux.Any()) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpMux.Serve()) }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(plainRedirectServer.Serve(clearL)) }) httpLn = tls.NewListener(tlsL, tlsConfig) } s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpServer.Serve(httpLn)) }) s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() netutil.FatalIfUnexpected(anyL.Close()) <-s.stopper.ShouldStop() s.grpc.Stop() }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(s.grpc.Serve(anyL)) }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpServer.ServeWith(s.stopper, pgL, func(conn net.Conn) { if err := s.pgServer.ServeConn(conn); err != nil && !netutil.IsClosedConnection(err) { log.Error(s.Ctx(), err) } })) }) if len(s.ctx.SocketFile) != 0 { // Unix socket enabled: postgres protocol only. unixLn, err := net.Listen("unix", s.ctx.SocketFile) if err != nil { return err } s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() if err := unixLn.Close(); err != nil { log.Fatal(s.Ctx(), err) } }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpServer.ServeWith(s.stopper, unixLn, func(conn net.Conn) { if err := s.pgServer.ServeConn(conn); err != nil && !netutil.IsClosedConnection(err) { log.Error(s.Ctx(), err) } })) }) } // Enable the debug endpoints first to provide an earlier window // into what's going on with the node in advance of exporting node // functionality. // TODO(marc): when cookie-based authentication exists, // apply it for all web endpoints. s.mux.HandleFunc(debugEndpoint, http.HandlerFunc(handleDebug)) s.gossip.Start(unresolvedAddr) log.Trace(ctx, "started gossip") if err := s.node.start(ctx, unresolvedAddr, s.ctx.Engines, s.ctx.NodeAttributes); err != nil { return err } log.Trace(ctx, "started node") // Set the NodeID in the base context (which was inherited by the // various components of the server). s.nodeLogTagVal.Set(int64(s.node.Descriptor.NodeID)) // We can now add the node registry. s.recorder.AddNode(s.registry, s.node.Descriptor, s.node.startedAt) // Begin recording runtime statistics. s.startSampleEnvironment(s.ctx.MetricsSampleInterval) // Begin recording time series data collected by the status monitor. s.tsDB.PollSource(s.recorder, s.ctx.MetricsSampleInterval, ts.Resolution10s, s.stopper) // Begin recording status summaries. s.node.startWriteSummaries(s.ctx.MetricsSampleInterval) s.sqlExecutor.SetNodeID(s.node.Descriptor.NodeID) // Create and start the schema change manager only after a NodeID // has been assigned. testingKnobs := new(sql.SchemaChangeManagerTestingKnobs) if s.ctx.TestingKnobs.SQLSchemaChangeManager != nil { testingKnobs = s.ctx.TestingKnobs.SQLSchemaChangeManager.(*sql.SchemaChangeManagerTestingKnobs) } sql.NewSchemaChangeManager(testingKnobs, *s.db, s.gossip, s.leaseMgr).Start(s.stopper) log.Infof(s.Ctx(), "starting %s server at %s", s.ctx.HTTPRequestScheme(), unresolvedHTTPAddr) log.Infof(s.Ctx(), "starting grpc/postgres server at %s", unresolvedAddr) if len(s.ctx.SocketFile) != 0 { log.Infof(s.Ctx(), "starting postgres server at unix:%s", s.ctx.SocketFile) } s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(m.Serve()) }) log.Trace(ctx, "accepting connections") // Initialize grpc-gateway mux and context. jsonpb := &util.JSONPb{ EnumsAsInts: true, EmitDefaults: true, Indent: " ", } protopb := new(util.ProtoPb) gwMux := gwruntime.NewServeMux( gwruntime.WithMarshalerOption(gwruntime.MIMEWildcard, jsonpb), gwruntime.WithMarshalerOption(util.JSONContentType, jsonpb), gwruntime.WithMarshalerOption(util.AltJSONContentType, jsonpb), gwruntime.WithMarshalerOption(util.ProtoContentType, protopb), gwruntime.WithMarshalerOption(util.AltProtoContentType, protopb), ) gwCtx, gwCancel := context.WithCancel(s.Ctx()) s.stopper.AddCloser(stop.CloserFn(gwCancel)) // Setup HTTP<->gRPC handlers. conn, err := s.rpcContext.GRPCDial(s.ctx.Addr) if err != nil { return errors.Errorf("error constructing grpc-gateway: %s; are your certificates valid?", err) } for _, gw := range []grpcGatewayServer{&s.admin, s.status, &s.tsServer} { if err := gw.RegisterGateway(gwCtx, gwMux, conn); err != nil { return err } } var uiFileSystem http.FileSystem uiDebug := envutil.EnvOrDefaultBool("COCKROACH_DEBUG_UI", false) if uiDebug { uiFileSystem = http.Dir("ui") } else { uiFileSystem = &assetfs.AssetFS{ Asset: ui.Asset, AssetDir: ui.AssetDir, AssetInfo: ui.AssetInfo, } } uiFileServer := http.FileServer(uiFileSystem) s.mux.HandleFunc("/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/" { if uiDebug { r.URL.Path = "debug.html" } else { r.URL.Path = "release.html" } } uiFileServer.ServeHTTP(w, r) })) // TODO(marc): when cookie-based authentication exists, // apply it for all web endpoints. s.mux.Handle(adminEndpoint, gwMux) s.mux.Handle(ts.URLPrefix, gwMux) s.mux.Handle(statusPrefix, s.status) s.mux.Handle(healthEndpoint, s.status) log.Trace(ctx, "added http endpoints") if err := sdnotify.Ready(); err != nil { log.Errorf(s.Ctx(), "failed to signal readiness using systemd protocol: %s", err) } log.Trace(ctx, "server ready") return nil }
// TestHeartbeatHealth verifies that the health status changes after // heartbeats succeed or fail due to transport failures. func TestHeartbeatHealthTransport(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() // Can't be zero because that'd be an empty offset. clock := hlc.NewClock(time.Unix(0, 1).UnixNano) serverCtx := newNodeTestContext(clock, stopper) // newTestServer with a custom listener. tlsConfig, err := serverCtx.GetServerTLSConfig() if err != nil { t.Fatal(err) } s := grpc.NewServer(grpc.Creds(credentials.NewTLS(tlsConfig))) ln, err := net.Listen("tcp", util.TestAddr.String()) if err != nil { t.Fatal(err) } mu := struct { syncutil.Mutex conns []net.Conn }{} connectChan := make(chan struct{}) defer close(connectChan) ln = &interceptingListener{Listener: ln, connectChan: connectChan, connCB: func(conn net.Conn) { mu.Lock() mu.conns = append(mu.conns, conn) mu.Unlock() }} stopper.RunWorker(func() { <-stopper.ShouldQuiesce() netutil.FatalIfUnexpected(ln.Close()) <-stopper.ShouldStop() s.Stop() }) stopper.RunWorker(func() { netutil.FatalIfUnexpected(s.Serve(ln)) }) remoteAddr := ln.Addr().String() RegisterHeartbeatServer(s, &HeartbeatService{ clock: clock, remoteClockMonitor: serverCtx.RemoteClocks, }) clientCtx := newNodeTestContext(clock, stopper) // Make the intervals shorter to speed up the tests. clientCtx.HeartbeatInterval = 1 * time.Millisecond if _, err := clientCtx.GRPCDial(remoteAddr); err != nil { t.Fatal(err) } // Allow the connection to go through. connectChan <- struct{}{} // Everything is normal; should become healthy. util.SucceedsSoon(t, func() error { if !clientCtx.IsConnHealthy(remoteAddr) { return errors.Errorf("expected %s to be healthy", remoteAddr) } return nil }) closeConns := func() { mu.Lock() for _, conn := range mu.conns { if err := conn.Close(); err != nil { t.Fatal(err) } } mu.conns = mu.conns[:0] mu.Unlock() } // Close all the connections. closeConns() // Should become unhealthy now that the connection was closed. util.SucceedsSoon(t, func() error { if clientCtx.IsConnHealthy(remoteAddr) { return errors.Errorf("expected %s to be unhealthy", remoteAddr) } return nil }) // Should become healthy again after GRPC reconnects. connectChan <- struct{}{} util.SucceedsSoon(t, func() error { if !clientCtx.IsConnHealthy(remoteAddr) { return errors.Errorf("expected %s to be healthy", remoteAddr) } return nil }) // Close the listener and all the connections. if err := ln.Close(); err != nil { t.Fatal(err) } closeConns() // Should become unhealthy again now that the connection was closed. util.SucceedsSoon(t, func() error { if clientCtx.IsConnHealthy(remoteAddr) { return errors.Errorf("expected %s to be unhealthy", remoteAddr) } return nil }) // Should stay unhealthy despite reconnection attempts. errUnhealthy := errors.New("connection is still unhealthy") if err := util.RetryForDuration(100*clientCtx.HeartbeatInterval, func() error { if clientCtx.IsConnHealthy(remoteAddr) { return errors.Errorf("expected %s to be unhealthy", remoteAddr) } return errUnhealthy }); err != errUnhealthy { t.Fatal(err) } }
// Start starts the server on the specified port, starts gossip and // initializes the node using the engines from the server's context. func (s *Server) Start() error { tlsConfig, err := s.ctx.GetServerTLSConfig() if err != nil { return err } httpServer := netutil.MakeServer(s.stopper, tlsConfig, s) plainRedirectServer := netutil.MakeServer(s.stopper, tlsConfig, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // TODO(tamird): s/308/http.StatusPermanentRedirect/ when it exists. http.Redirect(w, r, "https://"+r.Host+r.RequestURI, 308) })) // The following code is a specialization of util/net.go's ListenAndServe // which adds pgwire support. A single port is used to serve all protocols // (pg, http, h2) via the following construction: // // non-TLS case: // net.Listen -> cmux.New // | // - -> pgwire.Match -> pgwire.Server.ServeConn // - -> cmux.Any -> grpc.(*Server).Serve // // TLS case: // net.Listen -> cmux.New // | // - -> pgwire.Match -> pgwire.Server.ServeConn // - -> cmux.Any -> grpc.(*Server).Serve // // Note that the difference between the TLS and non-TLS cases exists due to // Go's lack of an h2c (HTTP2 Clear Text) implementation. See inline comments // in util.ListenAndServe for an explanation of how h2c is implemented there // and here. ln, err := net.Listen("tcp", s.ctx.Addr) if err != nil { return err } unresolvedAddr, err := officialAddr(s.ctx.Addr, ln.Addr()) if err != nil { return err } s.ctx.Addr = unresolvedAddr.String() s.rpcContext.SetLocalInternalServer(s.node) s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() if err := ln.Close(); err != nil { log.Fatal(err) } }) m := cmux.New(ln) pgL := m.Match(pgwire.Match) anyL := m.Match(cmux.Any()) httpLn, err := net.Listen("tcp", s.ctx.HTTPAddr) if err != nil { return err } unresolvedHTTPAddr, err := officialAddr(s.ctx.HTTPAddr, httpLn.Addr()) if err != nil { return err } s.ctx.HTTPAddr = unresolvedHTTPAddr.String() s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() if err := httpLn.Close(); err != nil { log.Fatal(err) } }) if tlsConfig != nil { httpMux := cmux.New(httpLn) clearL := httpMux.Match(cmux.HTTP1()) tlsL := httpMux.Match(cmux.Any()) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpMux.Serve()) }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(plainRedirectServer.Serve(clearL)) }) httpLn = tls.NewListener(tlsL, tlsConfig) } s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpServer.Serve(httpLn)) }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(s.grpc.Serve(anyL)) }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpServer.ServeWith(pgL, func(conn net.Conn) { if err := s.pgServer.ServeConn(conn); err != nil && !netutil.IsClosedConnection(err) { log.Error(err) } })) }) if len(s.ctx.SocketFile) != 0 { // Unix socket enabled: postgres protocol only. unixLn, err := net.Listen("unix", s.ctx.SocketFile) if err != nil { return err } s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() if err := unixLn.Close(); err != nil { log.Fatal(err) } }) s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(httpServer.ServeWith(unixLn, func(conn net.Conn) { if err := s.pgServer.ServeConn(conn); err != nil && !netutil.IsClosedConnection(err) { log.Error(err) } })) }) } s.gossip.Start(s.grpc, unresolvedAddr) if err := s.node.start(unresolvedAddr, s.ctx.Engines, s.ctx.NodeAttributes); err != nil { return err } // Begin recording runtime statistics. s.startSampleEnvironment(s.ctx.MetricsSampleInterval) // Begin recording time series data collected by the status monitor. s.tsDB.PollSource(s.recorder, s.ctx.MetricsSampleInterval, ts.Resolution10s, s.stopper) // Begin recording status summaries. s.node.startWriteSummaries(s.ctx.MetricsSampleInterval) s.sqlExecutor.SetNodeID(s.node.Descriptor.NodeID) // Create and start the schema change manager only after a NodeID // has been assigned. testingKnobs := new(sql.SchemaChangeManagerTestingKnobs) if s.ctx.TestingKnobs.SQLSchemaChangeManager != nil { testingKnobs = s.ctx.TestingKnobs.SQLSchemaChangeManager.(*sql.SchemaChangeManagerTestingKnobs) } sql.NewSchemaChangeManager(testingKnobs, *s.db, s.gossip, s.leaseMgr).Start(s.stopper) log.Infof("starting %s server at %s", s.ctx.HTTPRequestScheme(), unresolvedHTTPAddr) log.Infof("starting grpc/postgres server at %s", unresolvedAddr) if len(s.ctx.SocketFile) != 0 { log.Infof("starting postgres server at unix:%s", s.ctx.SocketFile) } s.stopper.RunWorker(func() { netutil.FatalIfUnexpected(m.Serve()) }) // Initialize grpc-gateway mux and context. jsonpb := &util.JSONPb{ EnumsAsInts: true, EmitDefaults: true, Indent: " ", } protopb := new(util.ProtoPb) gwMux := gwruntime.NewServeMux( gwruntime.WithMarshalerOption(gwruntime.MIMEWildcard, jsonpb), gwruntime.WithMarshalerOption(util.JSONContentType, jsonpb), gwruntime.WithMarshalerOption(util.AltJSONContentType, jsonpb), gwruntime.WithMarshalerOption(util.ProtoContentType, protopb), gwruntime.WithMarshalerOption(util.AltProtoContentType, protopb), ) gwCtx, gwCancel := context.WithCancel(context.Background()) s.stopper.AddCloser(stop.CloserFn(gwCancel)) // Setup HTTP<->gRPC handlers. var opts []grpc.DialOption if s.ctx.Insecure { opts = append(opts, grpc.WithInsecure()) } else { tlsConfig, err := s.ctx.GetClientTLSConfig() if err != nil { return err } opts = append( opts, // TODO(tamird): remove this timeout. It is currently necessary because // GRPC will not actually bail on a bad certificate error - it will just // retry indefinitely. See https://github.com/grpc/grpc-go/issues/622. grpc.WithTimeout(base.NetworkTimeout), grpc.WithBlock(), grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig)), ) } conn, err := s.rpcContext.GRPCDial(s.ctx.Addr, opts...) if err != nil { return errors.Errorf("error constructing grpc-gateway: %s; are your certificates valid?", err) } for _, gw := range []grpcGatewayServer{&s.admin, s.status, &s.tsServer} { if err := gw.RegisterGateway(gwCtx, gwMux, conn); err != nil { return err } } var uiFileSystem http.FileSystem uiDebug := envutil.EnvOrDefaultBool("debug_ui", false) if uiDebug { uiFileSystem = http.Dir("ui") } else { uiFileSystem = &assetfs.AssetFS{ Asset: ui.Asset, AssetDir: ui.AssetDir, AssetInfo: ui.AssetInfo, } } uiFileServer := http.FileServer(uiFileSystem) s.mux.HandleFunc("/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/" { if uiDebug { r.URL.Path = "debug.html" } else { r.URL.Path = "release.html" } } uiFileServer.ServeHTTP(w, r) })) // TODO(marc): when cookie-based authentication exists, // apply it for all web endpoints. s.mux.HandleFunc(debugEndpoint, http.HandlerFunc(handleDebug)) s.mux.Handle(adminEndpoint, gwMux) s.mux.Handle(ts.URLPrefix, gwMux) s.mux.Handle(statusPrefix, s.status) s.mux.Handle(healthEndpoint, s.status) if err := sdnotify.Ready(); err != nil { log.Errorf("failed to signal readiness using systemd protocol: %s", err) } return nil }