// startFakeServerGossips creates local gossip instances and remote // faked gossip instance. The remote gossip instance launches its // faked gossip service just for check the client message. func startFakeServerGossips( t *testing.T, localNodeID roachpb.NodeID, ) (*Gossip, *fakeGossipServer, *stop.Stopper) { stopper := stop.NewStopper() lRPCContext := rpc.NewContext(log.AmbientContext{}, &base.Config{Insecure: true}, nil, stopper) lserver := rpc.NewServer(lRPCContext) local := NewTest(localNodeID, lRPCContext, lserver, nil, stopper, metric.NewRegistry()) lln, err := netutil.ListenAndServeGRPC(stopper, lserver, util.IsolatedTestAddr) if err != nil { t.Fatal(err) } local.start(lln.Addr()) rRPCContext := rpc.NewContext(log.AmbientContext{}, &base.Config{Insecure: true}, nil, stopper) rserver := rpc.NewServer(rRPCContext) rln, err := netutil.ListenAndServeGRPC(stopper, rserver, util.IsolatedTestAddr) if err != nil { t.Fatal(err) } remote := newFakeGossipServer(rserver, stopper) addr := rln.Addr() remote.nodeAddr = util.MakeUnresolvedAddr(addr.Network(), addr.String()) return local, remote, stopper }
// createTestNode creates an rpc server using the specified address, // gossip instance, KV database and a node using the specified slice // of engines. The server, clock and node are returned. If gossipBS is // not nil, the gossip bootstrap address is set to gossipBS. func createTestNode( addr net.Addr, engines []engine.Engine, gossipBS net.Addr, t *testing.T, ) (*grpc.Server, net.Addr, *hlc.Clock, *Node, *stop.Stopper) { cfg := storage.StoreConfig{} stopper := stop.NewStopper() cfg.Clock = hlc.NewClock(hlc.UnixNano) nodeRPCContext := rpc.NewContext(log.AmbientContext{}, nodeTestBaseContext, cfg.Clock, stopper) cfg.ScanInterval = 10 * time.Hour cfg.ConsistencyCheckInterval = 10 * time.Hour grpcServer := rpc.NewServer(nodeRPCContext) serverCfg := makeTestConfig() cfg.Gossip = gossip.NewTest( 0, nodeRPCContext, grpcServer, serverCfg.GossipBootstrapResolvers, stopper, metric.NewRegistry(), ) ln, err := netutil.ListenAndServeGRPC(stopper, grpcServer, addr) if err != nil { t.Fatal(err) } if gossipBS != nil { // Handle possibility of a :0 port specification. if gossipBS.Network() == addr.Network() && gossipBS.String() == addr.String() { gossipBS = ln.Addr() } r, err := resolver.NewResolverFromAddress(gossipBS) if err != nil { t.Fatalf("bad gossip address %s: %s", gossipBS, err) } cfg.Gossip.SetResolvers([]resolver.Resolver{r}) cfg.Gossip.Start(ln.Addr()) } retryOpts := base.DefaultRetryOptions() retryOpts.Closer = stopper.ShouldQuiesce() distSender := kv.NewDistSender(kv.DistSenderConfig{ Clock: cfg.Clock, RPCContext: nodeRPCContext, RPCRetryOptions: &retryOpts, }, cfg.Gossip) cfg.AmbientCtx.Tracer = tracing.NewTracer() sender := kv.NewTxnCoordSender( cfg.AmbientCtx, distSender, cfg.Clock, false, stopper, kv.MakeTxnMetrics(metric.TestSampleInterval), ) cfg.DB = client.NewDB(sender) cfg.Transport = storage.NewDummyRaftTransport() cfg.MetricsSampleInterval = metric.TestSampleInterval node := NewNode(cfg, status.NewMetricsRecorder(cfg.Clock), metric.NewRegistry(), stopper, kv.MakeTxnMetrics(metric.TestSampleInterval), sql.MakeEventLogger(nil)) roachpb.RegisterInternalServer(grpcServer, node) return grpcServer, ln.Addr(), cfg.Clock, node, stopper }
func gossipSucceedsSoon( t *testing.T, stopper *stop.Stopper, disconnected chan *client, gossip map[*client]*Gossip, f func() error, ) { // Use an insecure context since we don't need a valid cert. rpcContext := rpc.NewContext(log.AmbientContext{}, &base.Config{Insecure: true}, nil, stopper) for c := range gossip { disconnected <- c } util.SucceedsSoon(t, func() error { select { case client := <-disconnected: // If the client wasn't able to connect, restart it. client.start(gossip[client], disconnected, rpcContext, stopper, gossip[client].NodeID.Get(), rpcContext.NewBreaker()) default: } return f() }) }
func startGossipAtAddr( nodeID roachpb.NodeID, addr net.Addr, stopper *stop.Stopper, t *testing.T, registry *metric.Registry, ) *Gossip { rpcContext := rpc.NewContext(log.AmbientContext{}, &base.Config{Insecure: true}, nil, stopper) server := rpc.NewServer(rpcContext) g := NewTest(nodeID, rpcContext, server, nil, stopper, registry) ln, err := netutil.ListenAndServeGRPC(stopper, server, addr) if err != nil { t.Fatal(err) } addr = ln.Addr() if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{ NodeID: nodeID, Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }); err != nil { t.Fatal(err) } g.start(addr) time.Sleep(time.Millisecond) return g }
func newKVNative(b *testing.B) kvInterface { enableTracing := tracing.Disable() s, _, _ := serverutils.StartServer(b, base.TestServerArgs{}) // TestServer.DB() returns the TxnCoordSender wrapped client. But that isn't // a fair comparison with SQL as we want these client requests to be sent // over the network. sender, err := client.NewSender( rpc.NewContext(log.AmbientContext{}, &base.Config{ User: security.NodeUser, SSLCA: filepath.Join(security.EmbeddedCertsDir, security.EmbeddedCACert), SSLCert: filepath.Join(security.EmbeddedCertsDir, "node.crt"), SSLCertKey: filepath.Join(security.EmbeddedCertsDir, "node.key"), }, nil, s.Stopper()), s.ServingAddr()) if err != nil { b.Fatal(err) } return &kvNative{ db: client.NewDB(sender), doneFn: func() { s.Stopper().Stop() enableTracing() }, } }
// NewNetwork creates nodeCount gossip nodes. func NewNetwork(stopper *stop.Stopper, nodeCount int, createResolvers bool) *Network { log.Infof(context.TODO(), "simulating gossip network with %d nodes", nodeCount) n := &Network{ Nodes: []*Node{}, Stopper: stopper, } n.rpcContext = rpc.NewContext( log.AmbientContext{}, &base.Config{Insecure: true}, hlc.NewClock(hlc.UnixNano, time.Nanosecond), n.Stopper, ) var err error n.tlsConfig, err = n.rpcContext.GetServerTLSConfig() if err != nil { log.Fatal(context.TODO(), err) } for i := 0; i < nodeCount; i++ { node, err := n.CreateNode() if err != nil { log.Fatal(context.TODO(), err) } // Build a resolver for each instance or we'll get data races. if createResolvers { r, err := resolver.NewResolverFromAddress(n.Nodes[0].Addr()) if err != nil { log.Fatalf(context.TODO(), "bad gossip address %s: %s", n.Nodes[0].Addr(), err) } node.Gossip.SetResolvers([]resolver.Resolver{r}) } } return n }
func TestSpanStatsGRPCResponse(t *testing.T) { defer leaktest.AfterTest(t)() ts := startServer(t) defer ts.Stopper().Stop() rpcStopper := stop.NewStopper() defer rpcStopper.Stop() rpcContext := rpc.NewContext(log.AmbientContext{}, ts.RPCContext().Config, ts.Clock(), rpcStopper) request := serverpb.SpanStatsRequest{ NodeID: "1", StartKey: []byte(roachpb.RKeyMin), EndKey: []byte(roachpb.RKeyMax), } url := ts.ServingAddr() conn, err := rpcContext.GRPCDial(url) if err != nil { t.Fatal(err) } client := serverpb.NewStatusClient(conn) response, err := client.SpanStats(context.Background(), &request) if err != nil { t.Fatal(err) } if a, e := int(response.RangeCount), ExpectedInitialRangeCount(); a != e { t.Errorf("expected %d ranges, found %d", e, a) } }
func newInsecureRPCContext(stopper *stop.Stopper) *rpc.Context { return rpc.NewContext( log.AmbientContext{}, &base.Config{Insecure: true}, hlc.NewClock(hlc.UnixNano, time.Nanosecond), stopper, ) }
// Start starts the test cluster by bootstrapping an in-memory store // (defaults to maximum of 50M). The server is started, launching the // node RPC server and all HTTP endpoints. Use the value of // TestServer.Addr after Start() for client connections. Use Stop() // to shutdown the server after the test completes. func (ltc *LocalTestCluster) Start(t util.Tester, baseCtx *base.Config, initSender InitSenderFn) { ambient := log.AmbientContext{Tracer: tracing.NewTracer()} nc := &base.NodeIDContainer{} ambient.AddLogTag("n", nc) nodeID := roachpb.NodeID(1) nodeDesc := &roachpb.NodeDescriptor{NodeID: nodeID} ltc.tester = t ltc.Manual = hlc.NewManualClock(0) ltc.Clock = hlc.NewClock(ltc.Manual.UnixNano) ltc.Stopper = stop.NewStopper() rpcContext := rpc.NewContext(ambient, baseCtx, ltc.Clock, ltc.Stopper) server := rpc.NewServer(rpcContext) // never started ltc.Gossip = gossip.New(ambient, nc, rpcContext, server, nil, ltc.Stopper, metric.NewRegistry()) ltc.Eng = engine.NewInMem(roachpb.Attributes{}, 50<<20) ltc.Stopper.AddCloser(ltc.Eng) ltc.Stores = storage.NewStores(ambient, ltc.Clock) ltc.Sender = initSender(nodeDesc, ambient.Tracer, ltc.Clock, ltc.Latency, ltc.Stores, ltc.Stopper, ltc.Gossip) if ltc.DBContext == nil { dbCtx := client.DefaultDBContext() ltc.DBContext = &dbCtx } ltc.DB = client.NewDBWithContext(ltc.Sender, *ltc.DBContext) transport := storage.NewDummyRaftTransport() cfg := storage.TestStoreConfig() if ltc.RangeRetryOptions != nil { cfg.RangeRetryOptions = *ltc.RangeRetryOptions } cfg.AmbientCtx = ambient cfg.Clock = ltc.Clock cfg.DB = ltc.DB cfg.Gossip = ltc.Gossip cfg.Transport = transport cfg.MetricsSampleInterval = metric.TestSampleInterval ltc.Store = storage.NewStore(cfg, ltc.Eng, nodeDesc) if err := ltc.Store.Bootstrap(roachpb.StoreIdent{NodeID: nodeID, StoreID: 1}); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } ltc.Stores.AddStore(ltc.Store) if err := ltc.Store.BootstrapRange(nil); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } if err := ltc.Store.Start(context.Background(), ltc.Stopper); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } nc.Set(context.TODO(), nodeDesc.NodeID) if err := ltc.Gossip.SetNodeDescriptor(nodeDesc); err != nil { t.Fatalf("unable to set node descriptor: %s", err) } }
// NewClient implements the Cluster interface. func (f *Farmer) NewClient(t *testing.T, i int) (*client.DB, *stop.Stopper) { stopper := stop.NewStopper() rpcContext := rpc.NewContext(log.AmbientContext{}, &base.Config{ Insecure: true, User: security.NodeUser, }, nil, stopper) sender, err := client.NewSender(rpcContext, f.Addr(i, base.DefaultPort)) if err != nil { t.Fatal(err) } return client.NewDB(sender), stopper }
func createTestClientForUser(t *testing.T, stopper *stop.Stopper, addr, user string) *client.DB { var ctx base.Config ctx.InitDefaults() ctx.User = user ctx.SSLCA = filepath.Join(security.EmbeddedCertsDir, security.EmbeddedCACert) ctx.SSLCert = filepath.Join(security.EmbeddedCertsDir, fmt.Sprintf("%s.crt", user)) ctx.SSLCertKey = filepath.Join(security.EmbeddedCertsDir, fmt.Sprintf("%s.key", user)) sender, err := client.NewSender(rpc.NewContext(log.AmbientContext{}, &ctx, nil, stopper), addr) if err != nil { t.Fatal(err) } return client.NewDB(sender) }
// NewClient implements the Cluster interface. func (l *LocalCluster) NewClient(ctx context.Context, i int) (*roachClient.DB, error) { rpcContext := rpc.NewContext(log.AmbientContext{}, &base.Config{ User: security.NodeUser, SSLCA: filepath.Join(l.CertsDir, security.EmbeddedCACert), SSLCert: filepath.Join(l.CertsDir, security.EmbeddedNodeCert), SSLCertKey: filepath.Join(l.CertsDir, security.EmbeddedNodeKey), }, hlc.NewClock(hlc.UnixNano, 0), l.stopper) conn, err := rpcContext.GRPCDial(l.Nodes[i].Addr(ctx, DefaultTCP).String()) if err != nil { return nil, err } return roachClient.NewDB(roachClient.NewSender(conn)), nil }
// NewClient implements the Cluster interface. func (l *LocalCluster) NewClient(t *testing.T, i int) (*roachClient.DB, *stop.Stopper) { stopper := stop.NewStopper() rpcContext := rpc.NewContext(log.AmbientContext{}, &base.Config{ User: security.NodeUser, SSLCA: filepath.Join(l.CertsDir, security.EmbeddedCACert), SSLCert: filepath.Join(l.CertsDir, security.EmbeddedNodeCert), SSLCertKey: filepath.Join(l.CertsDir, security.EmbeddedNodeKey), }, hlc.NewClock(hlc.UnixNano, 0), stopper) conn, err := rpcContext.GRPCDial(l.Nodes[i].Addr(DefaultTCP).String()) if err != nil { t.Fatal(err) } return roachClient.NewDB(roachClient.NewSender(conn)), stopper }
func createTestClientForUser( t *testing.T, stopper *stop.Stopper, addr, user string, dbCtx client.DBContext, ) *client.DB { rpcContext := rpc.NewContext(log.AmbientContext{}, &base.Config{ User: user, SSLCA: filepath.Join(security.EmbeddedCertsDir, security.EmbeddedCACert), SSLCert: filepath.Join(security.EmbeddedCertsDir, fmt.Sprintf("%s.crt", user)), SSLCertKey: filepath.Join(security.EmbeddedCertsDir, fmt.Sprintf("%s.key", user)), }, nil, stopper) sender, err := client.NewSender(rpcContext, addr) if err != nil { t.Fatal(err) } return client.NewDBWithContext(sender, dbCtx) }
func createTestClientForUser( t *testing.T, s serverutils.TestServerInterface, user string, ) *client.DB { var ctx base.Config ctx.InitDefaults() ctx.User = user ctx.SSLCA = filepath.Join(security.EmbeddedCertsDir, security.EmbeddedCACert) ctx.SSLCert = filepath.Join(security.EmbeddedCertsDir, fmt.Sprintf("%s.crt", user)) ctx.SSLCertKey = filepath.Join(security.EmbeddedCertsDir, fmt.Sprintf("%s.key", user)) conn, err := rpc.NewContext(log.AmbientContext{}, &ctx, s.Clock(), s.Stopper()).GRPCDial(s.ServingAddr()) if err != nil { t.Fatal(err) } return client.NewDB(client.NewSender(conn)) }
func createTestClientForUser( t *testing.T, s serverutils.TestServerInterface, user string, dbCtx client.DBContext, ) *client.DB { rpcContext := rpc.NewContext(log.AmbientContext{}, &base.Config{ User: user, SSLCA: filepath.Join(security.EmbeddedCertsDir, security.EmbeddedCACert), SSLCert: filepath.Join(security.EmbeddedCertsDir, fmt.Sprintf("%s.crt", user)), SSLCertKey: filepath.Join(security.EmbeddedCertsDir, fmt.Sprintf("%s.key", user)), }, s.Clock(), s.Stopper()) conn, err := rpcContext.GRPCDial(s.ServingAddr()) if err != nil { t.Fatal(err) } return client.NewDBWithContext(client.NewSender(conn), dbCtx) }
func newRaftTransportTestContext(t testing.TB) *raftTransportTestContext { rttc := &raftTransportTestContext{ t: t, stopper: stop.NewStopper(), transports: map[roachpb.NodeID]*storage.RaftTransport{}, } rttc.nodeRPCContext = rpc.NewContext( log.AmbientContext{}, testutils.NewNodeTestBaseContext(), nil, rttc.stopper, ) server := rpc.NewServer(rttc.nodeRPCContext) // never started rttc.gossip = gossip.NewTest( 1, rttc.nodeRPCContext, server, nil, rttc.stopper, metric.NewRegistry(), ) return rttc }
// TestClientRegisterInitNodeID verifies two client's gossip request with NodeID 0. func TestClientRegisterWithInitNodeID(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() // Create three gossip nodes, and connect to the first with NodeID 0. var g []*Gossip var gossipAddr string for i := 0; i < 3; i++ { RPCContext := rpc.NewContext(log.AmbientContext{}, &base.Config{Insecure: true}, nil, stopper) server := rpc.NewServer(RPCContext) ln, err := netutil.ListenAndServeGRPC(stopper, server, util.IsolatedTestAddr) if err != nil { t.Fatal(err) } // Connect to the first gossip node. if gossipAddr == "" { gossipAddr = ln.Addr().String() } var resolvers []resolver.Resolver resolver, err := resolver.NewResolver(gossipAddr) if err != nil { t.Fatal(err) } resolvers = append(resolvers, resolver) // node ID must be non-zero gnode := NewTest( roachpb.NodeID(i+1), RPCContext, server, resolvers, stopper, metric.NewRegistry(), ) g = append(g, gnode) gnode.Start(ln.Addr()) } util.SucceedsSoon(t, func() error { // The first gossip node should have two gossip client address // in nodeMap if these three gossip nodes registered success. g[0].mu.Lock() defer g[0].mu.Unlock() if a, e := len(g[0].mu.nodeMap), 2; a != e { return errors.Errorf("expected %s to contain %d nodes, got %d", g[0].mu.nodeMap, e, a) } return nil }) }
// TestGossipInfoStore verifies operation of gossip instance infostore. func TestGossipInfoStore(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() rpcContext := rpc.NewContext(log.AmbientContext{}, &base.Config{Insecure: true}, nil, stopper) g := NewTest(1, rpcContext, rpc.NewServer(rpcContext), nil, stopper, metric.NewRegistry()) slice := []byte("b") if err := g.AddInfo("s", slice, time.Hour); err != nil { t.Fatal(err) } if val, err := g.GetInfo("s"); !bytes.Equal(val, slice) || err != nil { t.Errorf("error fetching string: %v", err) } if _, err := g.GetInfo("s2"); err == nil { t.Errorf("expected error fetching nonexistent key \"s2\"") } }
// createTestStorePool creates a stopper, gossip and storePool for use in // tests. Stopper must be stopped by the caller. func createTestStorePool( timeUntilStoreDead time.Duration, ) (*stop.Stopper, *gossip.Gossip, *hlc.ManualClock, *StorePool) { stopper := stop.NewStopper() mc := hlc.NewManualClock(0) clock := hlc.NewClock(mc.UnixNano) rpcContext := rpc.NewContext(log.AmbientContext{}, &base.Config{Insecure: true}, clock, stopper) server := rpc.NewServer(rpcContext) // never started g := gossip.NewTest(1, rpcContext, server, nil, stopper, metric.NewRegistry()) storePool := NewStorePool( log.AmbientContext{}, g, clock, rpcContext, timeUntilStoreDead, stopper, ) return stopper, g, mc, storePool }
// Start starts a cluster. The numWorkers parameter controls the SQL connection // settings to avoid unnecessary connection creation. The args parameter can be // used to pass extra arguments to each node. func (c *Cluster) Start(db string, numWorkers int, args, env []string) { c.started = timeutil.Now() baseCtx := &base.Config{ User: security.NodeUser, Insecure: true, } c.rpcCtx = rpc.NewContext(log.AmbientContext{}, baseCtx, nil, c.stopper) for i := range c.Nodes { c.Nodes[i] = c.makeNode(i, args, env) c.Clients[i] = c.makeClient(i) c.Status[i] = c.makeStatus(i) c.DB[i] = c.makeDB(i, numWorkers, db) } log.Infof(context.Background(), "started %.3fs", timeutil.Since(c.started).Seconds()) c.waitForFullReplication() }
// createTestStorePool creates a stopper, gossip and storePool for use in // tests. Stopper must be stopped by the caller. func createTestStorePool( timeUntilStoreDead time.Duration, deterministic bool, defaultNodeLiveness bool, ) (*stop.Stopper, *gossip.Gossip, *hlc.ManualClock, *StorePool, *mockNodeLiveness) { stopper := stop.NewStopper() mc := hlc.NewManualClock(123) clock := hlc.NewClock(mc.UnixNano, time.Nanosecond) rpcContext := rpc.NewContext(log.AmbientContext{}, &base.Config{Insecure: true}, clock, stopper) server := rpc.NewServer(rpcContext) // never started g := gossip.NewTest(1, rpcContext, server, nil, stopper, metric.NewRegistry()) mnl := newMockNodeLiveness(defaultNodeLiveness) storePool := NewStorePool( log.AmbientContext{}, g, clock, mnl.nodeLivenessFunc, timeUntilStoreDead, deterministic, ) return stopper, g, mc, storePool, mnl }
func TestGossipGetNextBootstrapAddress(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() resolverSpecs := []string{ "127.0.0.1:9000", "127.0.0.1:9001", "localhost:9004", } resolvers := []resolver.Resolver{} for _, rs := range resolverSpecs { resolver, err := resolver.NewResolver(rs) if err == nil { resolvers = append(resolvers, resolver) } } if len(resolvers) != 3 { t.Errorf("expected 3 resolvers; got %d", len(resolvers)) } server := rpc.NewServer( rpc.NewContext(log.AmbientContext{}, &base.Config{Insecure: true}, nil, stopper), ) g := NewTest(0, nil, server, resolvers, stop.NewStopper(), metric.NewRegistry()) // Using specified resolvers, fetch bootstrap addresses 3 times // and verify the results match expected addresses. expAddresses := []string{ "127.0.0.1:9000", "127.0.0.1:9001", "localhost:9004", } for i := 0; i < len(expAddresses); i++ { if addr := g.getNextBootstrapAddress(); addr == nil { t.Errorf("%d: unexpected nil addr when expecting %s", i, expAddresses[i]) } else if addrStr := addr.String(); addrStr != expAddresses[i] { t.Errorf("%d: expected addr %s; got %s", i, expAddresses[i], addrStr) } } }
func makeDBClient() (*client.DB, *stop.Stopper, error) { stopper := stop.NewStopper() cfg := &base.Config{ User: security.NodeUser, SSLCA: baseCfg.SSLCA, SSLCert: baseCfg.SSLCert, SSLCertKey: baseCfg.SSLCertKey, Insecure: baseCfg.Insecure, } addr, err := addrWithDefaultHost(baseCfg.Addr) if err != nil { return nil, nil, err } sender, err := client.NewSender( rpc.NewContext(log.AmbientContext{}, cfg, nil, stopper), addr, ) if err != nil { stopper.Stop() return nil, nil, errors.Wrap(err, "failed to initialize KV client") } return client.NewDB(sender), stopper, nil }
// TestClientNodeID verifies a client's gossip request with correct NodeID. func TestClientNodeID(t *testing.T) { defer leaktest.AfterTest(t)() localNodeID := roachpb.NodeID(1) local, remote, stopper := startFakeServerGossips(t, localNodeID) // Use an insecure context. We're talking to tcp socket which are not in the certs. rpcContext := rpc.NewContext(log.AmbientContext{}, &base.Config{Insecure: true}, nil, stopper) c := newClient(log.AmbientContext{}, &remote.nodeAddr, makeMetrics()) disconnected := make(chan *client, 1) disconnected <- c defer func() { stopper.Stop() if c != <-disconnected { t.Errorf("expected client disconnect after remote close") } }() // A gossip client may fail to start if the grpc connection times out which // can happen under load (such as in CircleCI or using `make stress`). So we // loop creating clients until success or the test times out. for { // Wait for c.gossip to start. select { case receivedNodeID := <-remote.nodeIDChan: if receivedNodeID != localNodeID { t.Fatalf("client should send NodeID with %v, got %v", localNodeID, receivedNodeID) } return case <-disconnected: // The client hasn't been started or failed to start, loop and try again. c.start(local, disconnected, rpcContext, stopper, localNodeID, rpcContext.NewBreaker()) } } }
// newNodeTestContext returns a rpc.Context for testing. // It is meant to be used by nodes. func newNodeTestContext(clock *hlc.Clock, stopper *stop.Stopper) *rpc.Context { ctx := rpc.NewContext(log.AmbientContext{}, testutils.NewNodeTestBaseContext(), clock, stopper) ctx.HeartbeatInterval = 10 * time.Millisecond ctx.HeartbeatTimeout = 5 * time.Second return ctx }
func TestStopServer(t *testing.T) { defer leaktest.AfterTest(t)() // Use insecure mode so our servers listen on util.IsolatedTestAddr // and they fail cleanly instead of interfering with other tests. // See https://github.com/cockroachdb/cockroach/issues/9256 tc := StartTestCluster(t, 3, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ Insecure: true, }, ReplicationMode: base.ReplicationAuto, }) defer tc.Stopper().Stop() // Connect to server 1, ensure it is answering requests over HTTP and GRPC. server1 := tc.Server(1) var response serverpb.HealthResponse httpClient1, err := server1.GetHTTPClient() if err != nil { t.Fatal(err) } url := server1.AdminURL() + "/_admin/v1/health" if err := httputil.GetJSON(httpClient1, url, &response); err != nil { t.Fatal(err) } rpcContext := rpc.NewContext( log.AmbientContext{}, tc.Server(1).RPCContext().Config, tc.Server(1).Clock(), tc.Stopper(), ) conn, err := rpcContext.GRPCDial(server1.ServingAddr()) if err != nil { t.Fatal(err) } adminClient1 := serverpb.NewAdminClient(conn) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() if _, err := adminClient1.Health(ctx, &serverpb.HealthRequest{}); err != nil { t.Fatal(err) } // Stop server 1. tc.StopServer(1) // Verify HTTP and GRPC requests to server now fail. httpErrorText := "connection refused" if err := httputil.GetJSON(httpClient1, url, &response); err == nil { t.Fatal("Expected HTTP Request to fail after server stopped") } else if !testutils.IsError(err, httpErrorText) { t.Fatalf("Expected error from server with text %q, got error with text %q", httpErrorText, err.Error()) } grpcErrorText := "rpc error" if _, err := adminClient1.Health(ctx, &serverpb.HealthRequest{}); err == nil { t.Fatal("Expected GRPC Request to fail after server stopped") } else if !testutils.IsError(err, grpcErrorText) { t.Fatalf("Expected error from GRPC with text %q, got error with text %q", grpcErrorText, err.Error()) } // Verify that request to Server 0 still works. httpClient1, err = tc.Server(0).GetHTTPClient() if err != nil { t.Fatal(err) } url = tc.Server(0).AdminURL() + "/_admin/v1/health" if err := httputil.GetJSON(httpClient1, url, &response); err != nil { t.Fatal(err) } }
// createCluster generates a new cluster using the provided stopper and the // number of nodes supplied. Each node will have one store to start. func createCluster( stopper *stop.Stopper, nodeCount int, epochWriter, actionWriter io.Writer, script Script, rand *rand.Rand, ) *Cluster { clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) rpcContext := rpc.NewContext(log.AmbientContext{}, &base.Config{Insecure: true}, clock, stopper) server := rpc.NewServer(rpcContext) // We set the node ID to MaxInt32 for the cluster Gossip instance to prevent // conflicts with real node IDs. g := gossip.NewTest(math.MaxInt32, rpcContext, server, nil, stopper, metric.NewRegistry()) // Set the store pool to deterministic so that a run with the exact same // input will always produce the same output. storePool := storage.NewStorePool( log.AmbientContext{}, g, clock, rpcContext, storage.TestTimeUntilStoreDeadOff, stopper, /* deterministic */ true, ) c := &Cluster{ stopper: stopper, clock: clock, rpc: rpcContext, gossip: g, storePool: storePool, allocator: storage.MakeAllocator(storePool, storage.AllocatorOptions{ AllowRebalance: true, }), storeGossiper: gossiputil.NewStoreGossiper(g), nodes: make(map[roachpb.NodeID]*Node), stores: make(map[roachpb.StoreID]*Store), ranges: make(map[roachpb.RangeID]*Range), rangeIDsByStore: make(map[roachpb.StoreID]roachpb.RangeIDSlice), rand: rand, epochWriter: tabwriter.NewWriter(epochWriter, 8, 1, 2, ' ', 0), actionWriter: tabwriter.NewWriter(actionWriter, 8, 1, 2, ' ', 0), script: script, epoch: -1, } // Add the nodes. for i := 0; i < nodeCount; i++ { c.addNewNodeWithStore() } // Add a single range and add to this first node's first store. firstRange := c.addRange() firstRange.addReplica(c.stores[0]) c.calculateRangeIDsByStore() // Output the first epoch header. c.epoch = 0 c.OutputEpochHeader() c.OutputEpoch() c.flush() return c }
// MakeFarmer creates a terrafarm farmer for use in acceptance tests. func MakeFarmer(t testing.TB, prefix string, stopper *stop.Stopper) *terrafarm.Farmer { SkipUnlessRemote(t) if *flagKeyName == "" { t.Fatal("-key-name is required") // saves a lot of trouble } if e := "GOOGLE_PROJECT"; os.Getenv(e) == "" { t.Fatalf("%s environment variable must be set for Terraform", e) } logDir := *flagLogDir if logDir == "" { var err error logDir, err = ioutil.TempDir("", "clustertest_") if err != nil { t.Fatal(err) } } if !filepath.IsAbs(logDir) { logDir = filepath.Join(filepath.Clean(os.ExpandEnv("${PWD}")), logDir) } stores := "--store=/mnt/data0" for j := 1; j < *flagStores; j++ { stores += " --store=/mnt/data" + strconv.Itoa(j) } // Pass variables to be passed to the Terraform config. terraformVars := make(map[string]string) if *flagTFCockroachBinary != "" { terraformVars["cockroach_binary"] = *flagTFCockroachBinary } if *flagTFCockroachFlags != "" { terraformVars["cockroach_flags"] = *flagTFCockroachFlags } if *flagTFCockroachEnv != "" { terraformVars["cockroach_env"] = *flagTFCockroachEnv } terraformVars["cockroach_disk_type"] = *flagTFDiskType var name string if *flagTFReuseCluster == "" { // We concatenate a random name to the prefix (for Terraform resource // names) to allow multiple instances of the same test to run // concurrently. The prefix is also used as the name of the Terraform // state file. name = prefix if name != "" { name += "-" } name += getRandomName() // Rudimentary collision control. for i := 0; ; i++ { newName := name if i > 0 { newName += strconv.Itoa(i) } _, err := os.Stat(filepath.Join(*flagCwd, newName+".tfstate")) if os.IsNotExist(err) { name = newName break } } } else { name = *flagTFReuseCluster } if !prefixRE.MatchString(name) { t.Fatalf("generated cluster name '%s' must match regex %s", name, prefixRE) } rpcContext := rpc.NewContext(log.AmbientContext{}, &base.Config{ Insecure: true, User: security.NodeUser, }, hlc.NewClock(hlc.UnixNano, 0), stopper) f := &terrafarm.Farmer{ Output: os.Stderr, Cwd: *flagCwd, LogDir: logDir, KeyName: *flagKeyName, Stores: strconv.Quote(stores), Prefix: name, StateFile: name + ".tfstate", AddVars: terraformVars, KeepCluster: flagTFKeepCluster.String(), RPCContext: rpcContext, } log.Infof(context.Background(), "logging to %s", logDir) return f }
// NewServer creates a Server from a server.Context. func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { if _, err := net.ResolveTCPAddr("tcp", cfg.AdvertiseAddr); err != nil { return nil, errors.Errorf("unable to resolve RPC address %q: %v", cfg.AdvertiseAddr, err) } if cfg.AmbientCtx.Tracer == nil { cfg.AmbientCtx.Tracer = tracing.NewTracer() } // Try loading the TLS configs before anything else. if _, err := cfg.GetServerTLSConfig(); err != nil { return nil, err } if _, err := cfg.GetClientTLSConfig(); err != nil { return nil, err } s := &Server{ mux: http.NewServeMux(), clock: hlc.NewClock(hlc.UnixNano, cfg.MaxOffset), stopper: stopper, cfg: cfg, } // Add a dynamic log tag value for the node ID. // // We need to pass an ambient context to the various server components, but we // won't know the node ID until we Start(). At that point it's too late to // change the ambient contexts in the components (various background processes // will have already started using them). // // NodeIDContainer allows us to add the log tag to the context now and update // the value asynchronously. It's not significantly more expensive than a // regular tag since it's just doing an (atomic) load when a log/trace message // is constructed. The node ID is set by the Store if this host was // bootstrapped; otherwise a new one is allocated in Node. s.cfg.AmbientCtx.AddLogTag("n", &s.nodeIDContainer) ctx := s.AnnotateCtx(context.Background()) if s.cfg.Insecure { log.Warning(ctx, "running in insecure mode, this is strongly discouraged. See --insecure.") } s.rpcContext = rpc.NewContext(s.cfg.AmbientCtx, s.cfg.Config, s.clock, s.stopper) s.rpcContext.HeartbeatCB = func() { if err := s.rpcContext.RemoteClocks.VerifyClockOffset(); err != nil { log.Fatal(ctx, err) } } s.grpc = rpc.NewServer(s.rpcContext) s.registry = metric.NewRegistry() s.gossip = gossip.New( s.cfg.AmbientCtx, &s.nodeIDContainer, s.rpcContext, s.grpc, s.cfg.GossipBootstrapResolvers, s.stopper, s.registry, ) s.storePool = storage.NewStorePool( s.cfg.AmbientCtx, s.gossip, s.clock, s.rpcContext, s.cfg.TimeUntilStoreDead, s.stopper, /* deterministic */ false, ) // A custom RetryOptions is created which uses stopper.ShouldQuiesce() as // the Closer. This prevents infinite retry loops from occurring during // graceful server shutdown // // Such a loop loop occurs with the DistSender attempts a connection to the // local server during shutdown, and receives an internal server error (HTTP // Code 5xx). This is the correct error for a server to return when it is // shutting down, and is normally retryable in a cluster environment. // However, on a single-node setup (such as a test), retries will never // succeed because the only server has been shut down; thus, thus the // DistSender needs to know that it should not retry in this situation. retryOpts := base.DefaultRetryOptions() retryOpts.Closer = s.stopper.ShouldQuiesce() distSenderCfg := kv.DistSenderConfig{ AmbientCtx: s.cfg.AmbientCtx, Clock: s.clock, RPCContext: s.rpcContext, RPCRetryOptions: &retryOpts, } s.distSender = kv.NewDistSender(distSenderCfg, s.gossip) txnMetrics := kv.MakeTxnMetrics(s.cfg.MetricsSampleInterval) s.registry.AddMetricStruct(txnMetrics) s.txnCoordSender = kv.NewTxnCoordSender( s.cfg.AmbientCtx, s.distSender, s.clock, s.cfg.Linearizable, s.stopper, txnMetrics, ) s.db = client.NewDB(s.txnCoordSender) // Use the range lease expiration and renewal durations as the node // liveness expiration and heartbeat interval. active, renewal := storage.RangeLeaseDurations( storage.RaftElectionTimeout(s.cfg.RaftTickInterval, s.cfg.RaftElectionTimeoutTicks)) s.nodeLiveness = storage.NewNodeLiveness( s.cfg.AmbientCtx, s.clock, s.db, s.gossip, active, renewal, ) s.registry.AddMetricStruct(s.nodeLiveness.Metrics()) s.raftTransport = storage.NewRaftTransport( s.cfg.AmbientCtx, storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext, ) s.kvDB = kv.NewDBServer(s.cfg.Config, s.txnCoordSender, s.stopper) roachpb.RegisterExternalServer(s.grpc, s.kvDB) // Set up internal memory metrics for use by internal SQL executors. s.internalMemMetrics = sql.MakeMemMetrics("internal") s.registry.AddMetricStruct(s.internalMemMetrics) // Set up Lease Manager var lmKnobs sql.LeaseManagerTestingKnobs if cfg.TestingKnobs.SQLLeaseManager != nil { lmKnobs = *s.cfg.TestingKnobs.SQLLeaseManager.(*sql.LeaseManagerTestingKnobs) } s.leaseMgr = sql.NewLeaseManager(&s.nodeIDContainer, *s.db, s.clock, lmKnobs, s.stopper, &s.internalMemMetrics) s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip) // Set up the DistSQL server distSQLCfg := distsql.ServerConfig{ AmbientContext: s.cfg.AmbientCtx, DB: s.db, RPCContext: s.rpcContext, Stopper: s.stopper, } s.distSQLServer = distsql.NewServer(distSQLCfg) distsql.RegisterDistSQLServer(s.grpc, s.distSQLServer) // Set up admin memory metrics for use by admin SQL executors. s.adminMemMetrics = sql.MakeMemMetrics("admin") s.registry.AddMetricStruct(s.adminMemMetrics) // Set up Executor execCfg := sql.ExecutorConfig{ AmbientCtx: s.cfg.AmbientCtx, NodeID: &s.nodeIDContainer, DB: s.db, Gossip: s.gossip, LeaseManager: s.leaseMgr, Clock: s.clock, DistSQLSrv: s.distSQLServer, MetricsSampleInterval: s.cfg.MetricsSampleInterval, } if s.cfg.TestingKnobs.SQLExecutor != nil { execCfg.TestingKnobs = s.cfg.TestingKnobs.SQLExecutor.(*sql.ExecutorTestingKnobs) } else { execCfg.TestingKnobs = &sql.ExecutorTestingKnobs{} } if s.cfg.TestingKnobs.SQLSchemaChanger != nil { execCfg.SchemaChangerTestingKnobs = s.cfg.TestingKnobs.SQLSchemaChanger.(*sql.SchemaChangerTestingKnobs) } else { execCfg.SchemaChangerTestingKnobs = &sql.SchemaChangerTestingKnobs{} } s.sqlExecutor = sql.NewExecutor(execCfg, s.stopper, &s.adminMemMetrics) s.registry.AddMetricStruct(s.sqlExecutor) s.pgServer = pgwire.MakeServer( s.cfg.AmbientCtx, s.cfg.Config, s.sqlExecutor, &s.internalMemMetrics, s.cfg.SQLMemoryPoolSize, ) s.registry.AddMetricStruct(s.pgServer.Metrics()) s.tsDB = ts.NewDB(s.db) s.tsServer = ts.MakeServer(s.cfg.AmbientCtx, s.tsDB, s.cfg.TimeSeriesServerConfig, s.stopper) // TODO(bdarnell): make StoreConfig configurable. storeCfg := storage.StoreConfig{ AmbientCtx: s.cfg.AmbientCtx, Clock: s.clock, DB: s.db, Gossip: s.gossip, NodeLiveness: s.nodeLiveness, Transport: s.raftTransport, RaftTickInterval: s.cfg.RaftTickInterval, ScanInterval: s.cfg.ScanInterval, ScanMaxIdleTime: s.cfg.ScanMaxIdleTime, ConsistencyCheckInterval: s.cfg.ConsistencyCheckInterval, ConsistencyCheckPanicOnFailure: s.cfg.ConsistencyCheckPanicOnFailure, MetricsSampleInterval: s.cfg.MetricsSampleInterval, StorePool: s.storePool, SQLExecutor: sql.InternalExecutor{ LeaseManager: s.leaseMgr, }, LogRangeEvents: s.cfg.EventLogEnabled, AllocatorOptions: storage.AllocatorOptions{ AllowRebalance: true, }, RangeLeaseActiveDuration: active, RangeLeaseRenewalDuration: renewal, TimeSeriesDataStore: s.tsDB, } if s.cfg.TestingKnobs.Store != nil { storeCfg.TestingKnobs = *s.cfg.TestingKnobs.Store.(*storage.StoreTestingKnobs) } s.recorder = status.NewMetricsRecorder(s.clock) s.registry.AddMetricStruct(s.rpcContext.RemoteClocks.Metrics()) s.runtime = status.MakeRuntimeStatSampler(s.clock) s.registry.AddMetricStruct(s.runtime) s.node = NewNode(storeCfg, s.recorder, s.registry, s.stopper, txnMetrics, sql.MakeEventLogger(s.leaseMgr)) roachpb.RegisterInternalServer(s.grpc, s.node) storage.RegisterConsistencyServer(s.grpc, s.node.storesServer) storage.RegisterFreezeServer(s.grpc, s.node.storesServer) s.admin = newAdminServer(s) s.status = newStatusServer( s.cfg.AmbientCtx, s.db, s.gossip, s.recorder, s.rpcContext, s.node.stores, ) for _, gw := range []grpcGatewayServer{s.admin, s.status, &s.tsServer} { gw.RegisterService(s.grpc) } return s, nil }