Пример #1
0
// CreateNode creates a simulation node and starts an RPC server for it.
func (n *Network) CreateNode() (*Node, error) {
	server := rpc.NewServer(n.rpcContext)
	ln, err := net.Listen(util.TestAddr.Network(), util.TestAddr.String())
	if err != nil {
		return nil, err
	}
	node := &Node{Server: server, Listener: ln, Registry: metric.NewRegistry()}
	node.Gossip = gossip.NewTest(0, n.rpcContext, server, nil, n.Stopper, node.Registry)
	n.Stopper.RunWorker(func() {
		<-n.Stopper.ShouldQuiesce()
		netutil.FatalIfUnexpected(ln.Close())
		<-n.Stopper.ShouldStop()
		server.Stop()
		node.Gossip.EnableSimulationCycler(false)
	})
	n.Nodes = append(n.Nodes, node)
	return node, nil
}
Пример #2
0
// StartNode initializes a gossip instance for the simulation node and
// starts it.
func (n *Network) StartNode(node *Node) error {
	node.Gossip.Start(node.Addr())
	node.Gossip.EnableSimulationCycler(true)
	n.nodeIDAllocator++
	node.Gossip.NodeID.Set(context.TODO(), n.nodeIDAllocator)
	if err := node.Gossip.SetNodeDescriptor(&roachpb.NodeDescriptor{
		NodeID:  node.Gossip.NodeID.Get(),
		Address: util.MakeUnresolvedAddr(node.Addr().Network(), node.Addr().String()),
	}); err != nil {
		return err
	}
	if err := node.Gossip.AddInfo(node.Addr().String(),
		encoding.EncodeUint64Ascending(nil, 0), time.Hour); err != nil {
		return err
	}
	n.Stopper.RunWorker(func() {
		netutil.FatalIfUnexpected(node.Server.Serve(node.Listener))
	})
	return nil
}
Пример #3
0
// Start starts the server on the specified port, starts gossip and initializes
// the node using the engines from the server's context.
//
// The passed context can be used to trace the server startup. The context
// should represent the general startup operation.
func (s *Server) Start(ctx context.Context) error {
	ctx = s.AnnotateCtx(ctx)

	startTime := timeutil.Now()

	tlsConfig, err := s.cfg.GetServerTLSConfig()
	if err != nil {
		return err
	}

	httpServer := netutil.MakeServer(s.stopper, tlsConfig, s)
	plainRedirectServer := netutil.MakeServer(s.stopper, tlsConfig, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		http.Redirect(w, r, "https://"+r.Host+r.RequestURI, http.StatusPermanentRedirect)
	}))

	// The following code is a specialization of util/net.go's ListenAndServe
	// which adds pgwire support. A single port is used to serve all protocols
	// (pg, http, h2) via the following construction:
	//
	// non-TLS case:
	// net.Listen -> cmux.New
	//               |
	//               -  -> pgwire.Match -> pgwire.Server.ServeConn
	//               -  -> cmux.Any -> grpc.(*Server).Serve
	//
	// TLS case:
	// net.Listen -> cmux.New
	//               |
	//               -  -> pgwire.Match -> pgwire.Server.ServeConn
	//               -  -> cmux.Any -> grpc.(*Server).Serve
	//
	// Note that the difference between the TLS and non-TLS cases exists due to
	// Go's lack of an h2c (HTTP2 Clear Text) implementation. See inline comments
	// in util.ListenAndServe for an explanation of how h2c is implemented there
	// and here.

	ln, err := net.Listen("tcp", s.cfg.Addr)
	if err != nil {
		return err
	}
	log.Eventf(ctx, "listening on port %s", s.cfg.Addr)
	unresolvedListenAddr, err := officialAddr(s.cfg.Addr, ln.Addr())
	if err != nil {
		return err
	}
	s.cfg.Addr = unresolvedListenAddr.String()
	unresolvedAdvertAddr, err := officialAddr(s.cfg.AdvertiseAddr, ln.Addr())
	if err != nil {
		return err
	}
	s.cfg.AdvertiseAddr = unresolvedAdvertAddr.String()

	s.rpcContext.SetLocalInternalServer(s.node)

	m := cmux.New(ln)
	pgL := m.Match(pgwire.Match)
	anyL := m.Match(cmux.Any())

	httpLn, err := net.Listen("tcp", s.cfg.HTTPAddr)
	if err != nil {
		return err
	}
	unresolvedHTTPAddr, err := officialAddr(s.cfg.HTTPAddr, httpLn.Addr())
	if err != nil {
		return err
	}
	s.cfg.HTTPAddr = unresolvedHTTPAddr.String()

	workersCtx := s.AnnotateCtx(context.Background())

	s.stopper.RunWorker(func() {
		<-s.stopper.ShouldQuiesce()
		if err := httpLn.Close(); err != nil {
			log.Fatal(workersCtx, err)
		}
	})

	if tlsConfig != nil {
		httpMux := cmux.New(httpLn)
		clearL := httpMux.Match(cmux.HTTP1())
		tlsL := httpMux.Match(cmux.Any())

		s.stopper.RunWorker(func() {
			netutil.FatalIfUnexpected(httpMux.Serve())
		})

		s.stopper.RunWorker(func() {
			netutil.FatalIfUnexpected(plainRedirectServer.Serve(clearL))
		})

		httpLn = tls.NewListener(tlsL, tlsConfig)
	}

	s.stopper.RunWorker(func() {
		netutil.FatalIfUnexpected(httpServer.Serve(httpLn))
	})

	s.stopper.RunWorker(func() {
		<-s.stopper.ShouldQuiesce()
		netutil.FatalIfUnexpected(anyL.Close())
		<-s.stopper.ShouldStop()
		s.grpc.Stop()
	})

	s.stopper.RunWorker(func() {
		netutil.FatalIfUnexpected(s.grpc.Serve(anyL))
	})

	s.stopper.RunWorker(func() {
		pgCtx := s.pgServer.AmbientCtx.AnnotateCtx(context.Background())
		netutil.FatalIfUnexpected(httpServer.ServeWith(s.stopper, pgL, func(conn net.Conn) {
			connCtx := log.WithLogTagStr(pgCtx, "client", conn.RemoteAddr().String())
			if err := s.pgServer.ServeConn(connCtx, conn); err != nil && !netutil.IsClosedConnection(err) {
				// Report the error on this connection's context, so that we
				// know which remote client caused the error when looking at
				// the logs.
				log.Error(connCtx, err)
			}
		}))
	})

	if len(s.cfg.SocketFile) != 0 {
		// Unix socket enabled: postgres protocol only.
		unixLn, err := net.Listen("unix", s.cfg.SocketFile)
		if err != nil {
			return err
		}

		s.stopper.RunWorker(func() {
			<-s.stopper.ShouldQuiesce()
			if err := unixLn.Close(); err != nil {
				log.Fatal(workersCtx, err)
			}
		})

		s.stopper.RunWorker(func() {
			pgCtx := s.pgServer.AmbientCtx.AnnotateCtx(context.Background())
			netutil.FatalIfUnexpected(httpServer.ServeWith(s.stopper, unixLn, func(conn net.Conn) {
				connCtx := log.WithLogTagStr(pgCtx, "client", conn.RemoteAddr().String())
				if err := s.pgServer.ServeConn(connCtx, conn); err != nil && !netutil.IsClosedConnection(err) {
					// Report the error on this connection's context, so that we
					// know which remote client caused the error when looking at
					// the logs.
					log.Error(connCtx, err)
				}
			}))
		})
	}

	// Enable the debug endpoints first to provide an earlier window
	// into what's going on with the node in advance of exporting node
	// functionality.
	// TODO(marc): when cookie-based authentication exists,
	// apply it for all web endpoints.
	s.mux.HandleFunc(debugEndpoint, http.HandlerFunc(handleDebug))

	s.gossip.Start(unresolvedAdvertAddr)
	log.Event(ctx, "started gossip")

	s.engines, err = s.cfg.CreateEngines()
	if err != nil {
		return errors.Wrap(err, "failed to create engines")
	}
	s.stopper.AddCloser(&s.engines)

	// We might have to sleep a bit to protect against this node producing non-
	// monotonic timestamps. Before restarting, its clock might have been driven
	// by other nodes' fast clocks, but when we restarted, we lost all this
	// information. For example, a client might have written a value at a
	// timestamp that's in the future of the restarted node's clock, and if we
	// don't do something, the same client's read would not return the written
	// value. So, we wait up to MaxOffset; we couldn't have served timestamps more
	// than MaxOffset in the future (assuming that MaxOffset was not changed, see
	// #9733).
	//
	// As an optimization for tests, we don't sleep if all the stores are brand
	// new. In this case, the node will not serve anything anyway until it
	// synchronizes with other nodes.
	{
		anyStoreBootstrapped := false
		for _, e := range s.engines {
			if _, err := storage.ReadStoreIdent(ctx, e); err != nil {
				// NotBootstrappedError is expected.
				if _, ok := err.(*storage.NotBootstrappedError); !ok {
					return err
				}
			} else {
				anyStoreBootstrapped = true
				break
			}
		}
		if anyStoreBootstrapped {
			sleepDuration := s.clock.MaxOffset() - timeutil.Since(startTime)
			if sleepDuration > 0 {
				log.Infof(ctx, "sleeping for %s to guarantee HLC monotonicity", sleepDuration)
				time.Sleep(sleepDuration)
			}
		}
	}

	// Now that we have a monotonic HLC wrt previous incarnations of the process,
	// init all the replicas.
	err = s.node.start(
		ctx,
		unresolvedAdvertAddr,
		s.engines,
		s.cfg.NodeAttributes,
		s.cfg.Locality,
	)
	if err != nil {
		return err
	}
	log.Event(ctx, "started node")

	s.nodeLiveness.StartHeartbeat(ctx, s.stopper)

	// We can now add the node registry.
	s.recorder.AddNode(s.registry, s.node.Descriptor, s.node.startedAt)

	// Begin recording runtime statistics.
	s.startSampleEnvironment(s.cfg.MetricsSampleInterval)

	// Begin recording time series data collected by the status monitor.
	s.tsDB.PollSource(
		s.cfg.AmbientCtx, s.recorder, s.cfg.MetricsSampleInterval, ts.Resolution10s, s.stopper,
	)

	// Begin recording status summaries.
	s.node.startWriteSummaries(s.cfg.MetricsSampleInterval)

	// Create and start the schema change manager only after a NodeID
	// has been assigned.
	testingKnobs := &sql.SchemaChangerTestingKnobs{}
	if s.cfg.TestingKnobs.SQLSchemaChanger != nil {
		testingKnobs = s.cfg.TestingKnobs.SQLSchemaChanger.(*sql.SchemaChangerTestingKnobs)
	}
	sql.NewSchemaChangeManager(testingKnobs, *s.db, s.gossip, s.leaseMgr).Start(s.stopper)

	s.distSQLServer.Start()

	log.Infof(ctx, "starting %s server at %s", s.cfg.HTTPRequestScheme(), unresolvedHTTPAddr)
	log.Infof(ctx, "starting grpc/postgres server at %s", unresolvedListenAddr)
	log.Infof(ctx, "advertising CockroachDB node at %s", unresolvedAdvertAddr)
	if len(s.cfg.SocketFile) != 0 {
		log.Infof(ctx, "starting postgres server at unix:%s", s.cfg.SocketFile)
	}

	s.stopper.RunWorker(func() {
		netutil.FatalIfUnexpected(m.Serve())
	})

	log.Event(ctx, "accepting connections")

	// Initialize grpc-gateway mux and context.
	jsonpb := &protoutil.JSONPb{
		EnumsAsInts:  true,
		EmitDefaults: true,
		Indent:       "  ",
	}
	protopb := new(protoutil.ProtoPb)
	gwMux := gwruntime.NewServeMux(
		gwruntime.WithMarshalerOption(gwruntime.MIMEWildcard, jsonpb),
		gwruntime.WithMarshalerOption(httputil.JSONContentType, jsonpb),
		gwruntime.WithMarshalerOption(httputil.AltJSONContentType, jsonpb),
		gwruntime.WithMarshalerOption(httputil.ProtoContentType, protopb),
		gwruntime.WithMarshalerOption(httputil.AltProtoContentType, protopb),
	)
	gwCtx, gwCancel := context.WithCancel(s.AnnotateCtx(context.Background()))
	s.stopper.AddCloser(stop.CloserFn(gwCancel))

	// Setup HTTP<->gRPC handlers.
	conn, err := s.rpcContext.GRPCDial(s.cfg.Addr)
	if err != nil {
		return errors.Errorf("error constructing grpc-gateway: %s; are your certificates valid?", err)
	}

	for _, gw := range []grpcGatewayServer{s.admin, s.status, &s.tsServer} {
		if err := gw.RegisterGateway(gwCtx, gwMux, conn); err != nil {
			return err
		}
	}

	var uiFileSystem http.FileSystem
	uiDebug := envutil.EnvOrDefaultBool("COCKROACH_DEBUG_UI", false)
	if uiDebug {
		uiFileSystem = http.Dir("pkg/ui")
	} else {
		uiFileSystem = &assetfs.AssetFS{
			Asset:     ui.Asset,
			AssetDir:  ui.AssetDir,
			AssetInfo: ui.AssetInfo,
		}
	}
	uiFileServer := http.FileServer(uiFileSystem)

	s.mux.HandleFunc("/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		if r.URL.Path == "/" {
			if uiDebug {
				r.URL.Path = "debug.html"
			} else {
				r.URL.Path = "release.html"
			}
		}
		uiFileServer.ServeHTTP(w, r)
	}))

	// TODO(marc): when cookie-based authentication exists,
	// apply it for all web endpoints.
	s.mux.Handle(adminPrefix, gwMux)
	s.mux.Handle(ts.URLPrefix, gwMux)
	s.mux.Handle(statusPrefix, gwMux)
	s.mux.Handle("/health", gwMux)
	s.mux.Handle(statusVars, http.HandlerFunc(s.status.handleVars))
	log.Event(ctx, "added http endpoints")

	if err := sdnotify.Ready(); err != nil {
		log.Errorf(ctx, "failed to signal readiness using systemd protocol: %s", err)
	}
	log.Event(ctx, "server ready")

	return nil
}
Пример #4
0
// TestHeartbeatHealth verifies that the health status changes after
// heartbeats succeed or fail due to transport failures.
func TestHeartbeatHealthTransport(t *testing.T) {
	defer leaktest.AfterTest(t)()

	stopper := stop.NewStopper()
	defer stopper.Stop()

	// Can't be zero because that'd be an empty offset.
	clock := hlc.NewClock(time.Unix(0, 1).UnixNano, time.Nanosecond)

	serverCtx := newNodeTestContext(clock, stopper)
	// newTestServer with a custom listener.
	tlsConfig, err := serverCtx.GetServerTLSConfig()
	if err != nil {
		t.Fatal(err)
	}
	s := grpc.NewServer(grpc.Creds(credentials.NewTLS(tlsConfig)))
	ln, err := net.Listen("tcp", util.TestAddr.String())
	if err != nil {
		t.Fatal(err)
	}
	mu := struct {
		syncutil.Mutex
		conns []net.Conn
	}{}
	connectChan := make(chan struct{})
	defer close(connectChan)
	ln = &interceptingListener{Listener: ln, connectChan: connectChan, connCB: func(conn net.Conn) {
		mu.Lock()
		mu.conns = append(mu.conns, conn)
		mu.Unlock()
	}}
	stopper.RunWorker(func() {
		<-stopper.ShouldQuiesce()
		netutil.FatalIfUnexpected(ln.Close())
		<-stopper.ShouldStop()
		s.Stop()
	})

	stopper.RunWorker(func() {
		netutil.FatalIfUnexpected(s.Serve(ln))
	})

	remoteAddr := ln.Addr().String()

	RegisterHeartbeatServer(s, &HeartbeatService{
		clock:              clock,
		remoteClockMonitor: serverCtx.RemoteClocks,
	})

	clientCtx := newNodeTestContext(clock, stopper)
	// Make the intervals shorter to speed up the tests.
	clientCtx.HeartbeatInterval = 1 * time.Millisecond
	if _, err := clientCtx.GRPCDial(remoteAddr); err != nil {
		t.Fatal(err)
	}
	// Allow the connection to go through.
	connectChan <- struct{}{}

	// Everything is normal; should become healthy.
	testutils.SucceedsSoon(t, func() error {
		if !clientCtx.IsConnHealthy(remoteAddr) {
			return errors.Errorf("expected %s to be healthy", remoteAddr)
		}
		return nil
	})

	closeConns := func() {
		mu.Lock()
		for _, conn := range mu.conns {
			if err := conn.Close(); err != nil {
				t.Fatal(err)
			}
		}
		mu.conns = mu.conns[:0]
		mu.Unlock()
	}

	// Close all the connections.
	closeConns()

	// Should become unhealthy now that the connection was closed.
	testutils.SucceedsSoon(t, func() error {
		if clientCtx.IsConnHealthy(remoteAddr) {
			return errors.Errorf("expected %s to be unhealthy", remoteAddr)
		}
		return nil
	})

	// Should become healthy again after GRPC reconnects.
	connectChan <- struct{}{}
	testutils.SucceedsSoon(t, func() error {
		if !clientCtx.IsConnHealthy(remoteAddr) {
			return errors.Errorf("expected %s to be healthy", remoteAddr)
		}
		return nil
	})

	// Close the listener and all the connections.
	if err := ln.Close(); err != nil {
		t.Fatal(err)
	}
	closeConns()

	// Should become unhealthy again now that the connection was closed.
	testutils.SucceedsSoon(t, func() error {
		if clientCtx.IsConnHealthy(remoteAddr) {
			return errors.Errorf("expected %s to be unhealthy", remoteAddr)
		}
		return nil
	})

	// Should stay unhealthy despite reconnection attempts.
	errUnhealthy := errors.New("connection is still unhealthy")
	if err := util.RetryForDuration(100*clientCtx.HeartbeatInterval, func() error {
		if clientCtx.IsConnHealthy(remoteAddr) {
			return errors.Errorf("expected %s to be unhealthy", remoteAddr)
		}
		return errUnhealthy
	}); err != errUnhealthy {
		t.Fatal(err)
	}
}