func TestGossipRaceLogStatus(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() local := startGossip(1, stopper, t, metric.NewRegistry()) local.mu.Lock() peer := startGossip(2, stopper, t, metric.NewRegistry()) local.startClient(&peer.is.NodeAddr) local.mu.Unlock() // Race gossiping against LogStatus. gun := make(chan struct{}) for i := uint8(0); i < 10; i++ { go func() { <-gun local.LogStatus() }() gun <- struct{}{} if err := local.AddInfo( strconv.FormatUint(uint64(i), 10), []byte{i}, time.Hour, ); err != nil { t.Fatal(err) } } close(gun) }
// TestClientGossip verifies a client can gossip a delta to the server. func TestClientGossip(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() local := startGossip(1, stopper, t, metric.NewRegistry()) remote := startGossip(2, stopper, t, metric.NewRegistry()) disconnected := make(chan *client, 1) c := newClient(&remote.is.NodeAddr, makeMetrics(metric.NewRegistry())) defer func() { stopper.Stop() if c != <-disconnected { t.Errorf("expected client disconnect after remote close") } }() if err := local.AddInfo("local-key", nil, time.Hour); err != nil { t.Fatal(err) } if err := remote.AddInfo("remote-key", nil, time.Hour); err != nil { t.Fatal(err) } gossipSucceedsSoon(t, stopper, disconnected, map[*client]*Gossip{ c: local, }, func() error { if _, err := remote.GetInfo("local-key"); err != nil { return err } if _, err := local.GetInfo("remote-key"); err != nil { return err } return nil }) }
// TestClientDisallowMultipleConns verifies that the server disallows // multiple connections from the same client node ID. func TestClientDisallowMultipleConns(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() local := startGossip(1, stopper, t, metric.NewRegistry()) remote := startGossip(2, stopper, t, metric.NewRegistry()) local.mu.Lock() remote.mu.Lock() rAddr := remote.is.NodeAddr // Start two clients from local to remote. RPC client cache is // disabled via the context, so we'll start two different outgoing // connections. local.startClient(&rAddr) local.startClient(&rAddr) local.mu.Unlock() remote.mu.Unlock() local.manage() remote.manage() util.SucceedsSoon(t, func() error { // Verify that the remote server has only a single incoming // connection and the local server has only a single outgoing // connection. local.mu.Lock() remote.mu.Lock() outgoing := local.outgoing.len() incoming := remote.incoming.len() local.mu.Unlock() remote.mu.Unlock() if outgoing == 1 && incoming == 1 && verifyServerMaps(local, 0) && verifyServerMaps(remote, 1) { return nil } return errors.Errorf("incorrect number of incoming (%d) or outgoing (%d) connections", incoming, outgoing) }) }
// TestClientDisconnectRedundant verifies that the gossip server // will drop an outgoing client connection that is already an // inbound client connection of another node. func TestClientDisconnectRedundant(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() local := startGossip(1, stopper, t, metric.NewRegistry()) remote := startGossip(2, stopper, t, metric.NewRegistry()) // startClient requires locks are held, so acquire here. local.mu.Lock() remote.mu.Lock() rAddr := remote.mu.is.NodeAddr lAddr := local.mu.is.NodeAddr local.startClient(&rAddr, remote.mu.is.NodeID) remote.startClient(&lAddr, local.mu.is.NodeID) local.mu.Unlock() remote.mu.Unlock() local.manage() remote.manage() util.SucceedsSoon(t, func() error { // Check which of the clients is connected to the other. ok1 := local.findClient(func(c *client) bool { return c.addr.String() == rAddr.String() }) != nil ok2 := remote.findClient(func(c *client) bool { return c.addr.String() == lAddr.String() }) != nil // We expect node 2 to disconnect; if both are still connected, // it's possible that node 1 gossiped before node 2 connected, in // which case we have to gossip from node 1 to trigger the // disconnect redundant client code. if ok1 && ok2 { if err := local.AddInfo("local-key", nil, time.Second); err != nil { t.Fatal(err) } } else if ok1 && !ok2 && verifyServerMaps(local, 0) && verifyServerMaps(remote, 1) { return nil } return errors.New("local client to remote not yet closed as redundant") }) }
// TestClientRetryBootstrap verifies that an initial failure to connect // to a bootstrap host doesn't stall the bootstrapping process in the // absence of any additional activity. This can happen during acceptance // tests if the DNS can't lookup hostnames when gossip is started. func TestClientRetryBootstrap(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() local := startGossip(1, stopper, t, metric.NewRegistry()) remote := startGossip(2, stopper, t, metric.NewRegistry()) remote.mu.Lock() rAddr := remote.is.NodeAddr remote.mu.Unlock() if err := local.AddInfo("local-key", []byte("hello"), 0*time.Second); err != nil { t.Fatal(err) } local.SetBootstrapInterval(10 * time.Millisecond) local.SetResolvers([]resolver.Resolver{ &testResolver{addr: rAddr.String(), numFails: 3, numSuccesses: 1}, }) local.bootstrap() local.manage() util.SucceedsSoon(t, func() error { _, err := remote.GetInfo("local-key") return err }) }
// TestClientForwardUnresolved verifies that a client does not resolve a forward // address prematurely. func TestClientForwardUnresolved(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() const nodeID = 1 local := startGossip(nodeID, stopper, t, metric.NewRegistry()) local.mu.Lock() addr := local.is.NodeAddr local.mu.Unlock() client := newClient(&addr, makeMetrics(metric.NewRegistry())) // never started newAddr := util.UnresolvedAddr{ NetworkField: "tcp", AddressField: "localhost:2345", } reply := &Response{ NodeID: nodeID, Addr: addr, AlternateNodeID: nodeID + 1, AlternateAddr: &newAddr, } if err := client.handleResponse(local, reply); !testutils.IsError(err, "received forward") { t.Fatal(err) } if !proto.Equal(client.forwardAddr, &newAddr) { t.Fatalf("unexpected forward address %v, expected %v", client.forwardAddr, &newAddr) } }
// TestClientGossipMetrics verifies a that gossip stats are generated. func TestClientGossipMetrics(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() localRegistry := metric.NewRegistry() local := startGossip(1, stopper, t, localRegistry) remoteRegistry := metric.NewRegistry() remote := startGossip(2, stopper, t, remoteRegistry) gossipSucceedsSoon( t, stopper, make(chan *client, 2), map[*client]*Gossip{ newClient(&local.is.NodeAddr, makeMetrics(metric.NewRegistry())): remote, newClient(&remote.is.NodeAddr, makeMetrics(metric.NewRegistry())): local, }, func() error { if err := local.AddInfo("local-key", nil, time.Hour); err != nil { t.Fatal(err) } if err := remote.AddInfo("remote-key", nil, time.Hour); err != nil { t.Fatal(err) } // Infos/Bytes Sent/Received should not be zero. for i, reg := range []*metric.Registry{localRegistry, remoteRegistry} { for _, ratesName := range []string{ InfosSentRatesName, InfosReceivedRatesName, BytesSentRatesName, BytesReceivedRatesName, } { counterName := ratesName + "-count" counter := reg.GetCounter(counterName) if counter == nil { return errors.Errorf("%d: missing counter %q", i, counterName) } if count := counter.Count(); count <= 0 { return errors.Errorf("%d: expected metrics counter %q > 0; = %d", i, counterName, count) } } } // Since there are two gossip nodes, there should be at least one incoming // and outgoing connection. for i, reg := range []*metric.Registry{localRegistry, remoteRegistry} { for _, name := range []string{} { gauge := reg.GetGauge(name) if gauge == nil { return errors.Errorf("%d: missing gauge %q", i, name) } if count := gauge.Value(); count <= 0 { return errors.Errorf("%d: expected metrics gauge %q > 0; = %d", i, name, count) } } } return nil }) }
func TestGossipOrphanedStallDetection(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() local := startGossip(1, stopper, t, metric.NewRegistry()) local.SetStallInterval(5 * time.Millisecond) // Make sure we have the sentinel to ensure that its absence is not the // cause of stall detection. if err := local.AddInfo(KeySentinel, nil, time.Hour); err != nil { t.Fatal(err) } peerStopper := stop.NewStopper() peer := startGossip(2, peerStopper, t, metric.NewRegistry()) peerNodeID := peer.GetNodeID() peerAddr := peer.GetNodeAddr() local.startClient(peerAddr, peerNodeID) util.SucceedsSoon(t, func() error { for _, peerID := range local.Outgoing() { if peerID == peerNodeID { return nil } } return errors.Errorf("%d not yet connected", peerNodeID) }) local.bootstrap() local.manage() peerStopper.Stop() util.SucceedsSoon(t, func() error { for _, peerID := range local.Outgoing() { if peerID == peerNodeID { return errors.Errorf("%d still connected", peerNodeID) } } return nil }) peerStopper = stop.NewStopper() defer peerStopper.Stop() peer = startGossipAtAddr(peerNodeID, peerAddr, peerStopper, t, metric.NewRegistry()) util.SucceedsSoon(t, func() error { for _, peerID := range local.Outgoing() { if peerID == peerNodeID { return nil } } return errors.Errorf("%d not yet connected", peerNodeID) }) }
// TestClientGossipMetrics verifies a that gossip stats are generated. func TestClientGossipMetrics(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() local := startGossip(1, stopper, t, metric.NewRegistry()) remote := startGossip(2, stopper, t, metric.NewRegistry()) if err := local.AddInfo("local-key", nil, time.Hour); err != nil { t.Fatal(err) } if err := remote.AddInfo("remote-key", nil, time.Hour); err != nil { t.Fatal(err) } gossipSucceedsSoon( t, stopper, make(chan *client, 2), map[*client]*Gossip{ newClient(context.TODO(), local.GetNodeAddr(), remote.nodeMetrics): remote, }, func() error { // Infos/Bytes Sent/Received should not be zero. for i, s := range []*server{local.server, remote.server} { for _, rate := range []metric.Rates{ s.nodeMetrics.InfosSent, s.nodeMetrics.InfosReceived, s.nodeMetrics.BytesSent, s.nodeMetrics.BytesReceived, } { counter := rate.Counter if count := counter.Count(); count <= 0 { return errors.Errorf("%d: expected metrics counter %q > 0; = %d", i, counter.GetName(), count) } } } // Since there are two gossip nodes, there should be exactly one incoming // or outgoing connection due to gossip's connection de-duplication. for i, g := range []*Gossip{local, remote} { g.mu.Lock() defer g.mu.Unlock() count := int64(0) for _, gauge := range []*metric.Gauge{g.mu.incoming.gauge, g.outgoing.gauge} { if gauge == nil { return errors.Errorf("%d: missing gauge", i) } count += gauge.Value() } const expected = 1 if count != expected { return errors.Errorf("%d: expected metrics incoming + outgoing connection count == %d; = %d", i, expected, count) } } return nil }) }
// createTestNode creates an rpc server using the specified address, // gossip instance, KV database and a node using the specified slice // of engines. The server, clock and node are returned. If gossipBS is // not nil, the gossip bootstrap address is set to gossipBS. func createTestNode(addr net.Addr, engines []engine.Engine, gossipBS net.Addr, t *testing.T) ( *grpc.Server, net.Addr, *hlc.Clock, *Node, *stop.Stopper) { ctx := storage.StoreContext{} stopper := stop.NewStopper() ctx.Clock = hlc.NewClock(hlc.UnixNano) nodeRPCContext := rpc.NewContext(nodeTestBaseContext, ctx.Clock, stopper) ctx.ScanInterval = 10 * time.Hour ctx.ConsistencyCheckInterval = 10 * time.Hour grpcServer := rpc.NewServer(nodeRPCContext) serverCtx := makeTestContext() g := gossip.New( context.Background(), nodeRPCContext, grpcServer, serverCtx.GossipBootstrapResolvers, stopper, metric.NewRegistry()) ln, err := netutil.ListenAndServeGRPC(stopper, grpcServer, addr) if err != nil { t.Fatal(err) } if gossipBS != nil { // Handle possibility of a :0 port specification. if gossipBS.Network() == addr.Network() && gossipBS.String() == addr.String() { gossipBS = ln.Addr() } r, err := resolver.NewResolverFromAddress(gossipBS) if err != nil { t.Fatalf("bad gossip address %s: %s", gossipBS, err) } g.SetResolvers([]resolver.Resolver{r}) g.Start(ln.Addr()) } ctx.Gossip = g retryOpts := base.DefaultRetryOptions() retryOpts.Closer = stopper.ShouldQuiesce() distSender := kv.NewDistSender(&kv.DistSenderConfig{ Clock: ctx.Clock, RPCContext: nodeRPCContext, RPCRetryOptions: &retryOpts, }, g) ctx.Ctx = tracing.WithTracer(context.Background(), tracing.NewTracer()) sender := kv.NewTxnCoordSender(ctx.Ctx, distSender, ctx.Clock, false, stopper, kv.MakeTxnMetrics()) ctx.DB = client.NewDB(sender) ctx.Transport = storage.NewDummyRaftTransport() node := NewNode(ctx, status.NewMetricsRecorder(ctx.Clock), metric.NewRegistry(), stopper, kv.MakeTxnMetrics(), sql.MakeEventLogger(nil)) roachpb.RegisterInternalServer(grpcServer, node) return grpcServer, ln.Addr(), ctx.Clock, node, stopper }
// TestTxnCoordSenderSingleRoundtripTxn checks that a batch which completely // holds the writing portion of a Txn (including EndTransaction) does not // launch a heartbeat goroutine at all. func TestTxnCoordSenderSingleRoundtripTxn(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() manual := hlc.NewManualClock(0) clock := hlc.NewClock(manual.UnixNano) clock.SetMaxOffset(20) ts := NewTxnCoordSender(senderFn(func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { br := ba.CreateReply() txnClone := ba.Txn.Clone() br.Txn = &txnClone br.Txn.Writing = true return br, nil }), clock, false, tracing.NewTracer(), stopper, NewTxnMetrics(metric.NewRegistry())) // Stop the stopper manually, prior to trying the transaction. This has the // effect of returning a NodeUnavailableError for any attempts at launching // a heartbeat goroutine. stopper.Stop() var ba roachpb.BatchRequest key := roachpb.Key("test") ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.EndTransactionRequest{}) ba.Txn = &roachpb.Transaction{Name: "test"} _, pErr := ts.Send(context.Background(), ba) if pErr != nil { t.Fatal(pErr) } }
// NewStoreStatusMonitor constructs a StoreStatusMonitor with the given ID. func NewStoreStatusMonitor(id roachpb.StoreID, metaRegistry *metric.Registry) *StoreStatusMonitor { registry := metric.NewRegistry() // Format as `cr.store.<metric>.<id>` in output, in analogy to the time // series data written. metaRegistry.MustAdd(storeTimeSeriesPrefix+"%s."+id.String(), registry) return &StoreStatusMonitor{ ID: id, registry: registry, rangeCount: registry.Counter("ranges"), leaderRangeCount: registry.Gauge("ranges.leader"), replicatedRangeCount: registry.Gauge("ranges.replicated"), availableRangeCount: registry.Gauge("ranges.available"), liveBytes: registry.Gauge("livebytes"), keyBytes: registry.Gauge("keybytes"), valBytes: registry.Gauge("valbytes"), intentBytes: registry.Gauge("intentbytes"), liveCount: registry.Gauge("livecount"), keyCount: registry.Gauge("keycount"), valCount: registry.Gauge("valcount"), intentCount: registry.Gauge("intentcount"), intentAge: registry.Gauge("intentage"), gcBytesAge: registry.Gauge("gcbytesage"), lastUpdateNanos: registry.Gauge("lastupdatenanos"), capacity: registry.Gauge("capacity"), available: registry.Gauge("capacity.available"), } }
// NewExecutor creates an Executor and registers a callback on the // system config. func NewExecutor(db client.DB, gossip *gossip.Gossip, leaseMgr *LeaseManager, stopper *stop.Stopper) *Executor { registry := metric.NewRegistry() exec := &Executor{ db: db, reCache: parser.NewRegexpCache(512), leaseMgr: leaseMgr, registry: registry, latency: registry.Latency("latency"), txnBeginCount: registry.Counter("transaction.begincount"), selectCount: registry.Counter("select.count"), updateCount: registry.Counter("update.count"), insertCount: registry.Counter("insert.count"), deleteCount: registry.Counter("delete.count"), ddlCount: registry.Counter("ddl.count"), miscCount: registry.Counter("misc.count"), } exec.systemConfigCond = sync.NewCond(&exec.systemConfigMu) gossipUpdateC := gossip.RegisterSystemConfigChannel() stopper.RunWorker(func() { for { select { case <-gossipUpdateC: cfg := gossip.GetSystemConfig() exec.updateSystemConfig(cfg) case <-stopper.ShouldStop(): return } } }) return exec }
// TestMultiRangeScanWithMaxResults tests that commands which access multiple // ranges with MaxResults parameter are carried out properly. func TestMultiRangeScanWithMaxResults(t *testing.T) { defer leaktest.AfterTest(t)() testCases := []struct { splitKeys []roachpb.Key keys []roachpb.Key }{ {[]roachpb.Key{roachpb.Key("m")}, []roachpb.Key{roachpb.Key("a"), roachpb.Key("z")}}, {[]roachpb.Key{roachpb.Key("h"), roachpb.Key("q")}, []roachpb.Key{roachpb.Key("b"), roachpb.Key("f"), roachpb.Key("k"), roachpb.Key("r"), roachpb.Key("w"), roachpb.Key("y")}}, } for i, tc := range testCases { s, _, _ := serverutils.StartServer(t, base.TestServerArgs{}) defer s.Stopper().Stop() ts := s.(*TestServer) retryOpts := base.DefaultRetryOptions() retryOpts.Closer = ts.stopper.ShouldDrain() ds := kv.NewDistSender(&kv.DistSenderContext{ Clock: s.Clock(), RPCContext: s.RPCContext(), RPCRetryOptions: &retryOpts, }, ts.Gossip()) tds := kv.NewTxnCoordSender(ds, ts.Clock(), ts.Ctx.Linearizable, tracing.NewTracer(), ts.stopper, kv.NewTxnMetrics(metric.NewRegistry())) for _, sk := range tc.splitKeys { if err := ts.node.ctx.DB.AdminSplit(sk); err != nil { t.Fatal(err) } } for _, k := range tc.keys { put := roachpb.NewPut(k, roachpb.MakeValueFromBytes(k)) if _, err := client.SendWrapped(tds, nil, put); err != nil { t.Fatal(err) } } // Try every possible ScanRequest startKey. for start := 0; start < len(tc.keys); start++ { // Try every possible maxResults, from 1 to beyond the size of key array. for maxResults := 1; maxResults <= len(tc.keys)-start+1; maxResults++ { scan := roachpb.NewScan(tc.keys[start], tc.keys[len(tc.keys)-1].Next(), int64(maxResults)) reply, err := client.SendWrapped(tds, nil, scan) if err != nil { t.Fatal(err) } rows := reply.(*roachpb.ScanResponse).Rows if start+maxResults <= len(tc.keys) && len(rows) != maxResults { t.Errorf("%d: start=%s: expected %d rows, but got %d", i, tc.keys[start], maxResults, len(rows)) } else if start+maxResults == len(tc.keys)+1 && len(rows) != maxResults-1 { t.Errorf("%d: expected %d rows, but got %d", i, maxResults-1, len(rows)) } } } } }
func makeTestV3Conn(c net.Conn) v3Conn { return makeV3Conn(c, sql.NewDummyExecutor(), newServerMetrics(metric.NewRegistry()), sql.SessionArgs{}, ) }
// TestCorruptedClusterID verifies that a node fails to start when a // store's cluster ID is empty. func TestCorruptedClusterID(t *testing.T) { defer leaktest.AfterTest(t)() engineStopper := stop.NewStopper() e := engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper) defer engineStopper.Stop() if _, err := bootstrapCluster([]engine.Engine{e}, kv.NewTxnMetrics(metric.NewRegistry())); err != nil { t.Fatal(err) } // Set the cluster ID to the empty UUID. sIdent := roachpb.StoreIdent{ ClusterID: *uuid.EmptyUUID, NodeID: 1, StoreID: 1, } if err := engine.MVCCPutProto(context.Background(), e, nil, keys.StoreIdentKey(), roachpb.ZeroTimestamp, nil, &sIdent); err != nil { t.Fatal(err) } engines := []engine.Engine{e} _, serverAddr, _, node, stopper := createTestNode(util.TestAddr, engines, nil, t) stopper.Stop() if err := node.start(serverAddr, engines, roachpb.Attributes{}); !testutils.IsError(err, "unidentified store") { t.Errorf("unexpected error %v", err) } }
func makeNodeMetrics() nodeMetrics { reg := metric.NewRegistry() return nodeMetrics{ registry: reg, latency: reg.Latency("latency"), success: reg.Rates("success"), err: reg.Rates("error"), } }
// NewMetricsRecorder initializes a new MetricsRecorder object that uses the // given clock. func NewMetricsRecorder(clock *hlc.Clock) *MetricsRecorder { mr := &MetricsRecorder{ nodeRegistry: metric.NewRegistry(), } mr.mu.storeRegistries = make(map[roachpb.StoreID]*metric.Registry) mr.mu.stores = make(map[roachpb.StoreID]storeMetrics) mr.mu.clock = clock return mr }
// newClient creates and returns a client struct. func newClient(addr net.Addr, nodeMetrics metrics) *client { return &client{ createdAt: timeutil.Now(), addr: addr, remoteHighWaterStamps: map[roachpb.NodeID]int64{}, closer: make(chan struct{}), clientMetrics: makeMetrics(metric.NewRegistry()), nodeMetrics: nodeMetrics, } }
// setupMetricsTest returns a TxnCoordSender and ManualClock pointing to a newly created // LocalTestCluster. Also returns a cleanup function to be executed at the end of the // test. func setupMetricsTest(t *testing.T) (*hlc.ManualClock, *TxnCoordSender, func()) { s, testSender := createTestDB(t) reg := metric.NewRegistry() txnMetrics := NewTxnMetrics(reg) sender := NewTxnCoordSender(testSender.wrapped, s.Clock, false, tracing.NewTracer(), s.Stopper, txnMetrics) return s.Manual, sender, func() { teardownHeartbeats(sender) s.Stop() } }
// newServer creates and returns a server struct. func newServer(stopper *stop.Stopper, registry *metric.Registry) *server { return &server{ stopper: stopper, is: newInfoStore(0, util.UnresolvedAddr{}, stopper), incoming: makeNodeSet(minPeers, registry.Gauge(ConnectionsIncomingGaugeName)), nodeMap: make(map[util.UnresolvedAddr]serverInfo), tighten: make(chan roachpb.NodeID, 1), ready: make(chan struct{}), nodeMetrics: makeMetrics(registry), serverMetrics: makeMetrics(metric.NewRegistry()), } }
func newRaftTransportTestContext(t testing.TB) *raftTransportTestContext { rttc := &raftTransportTestContext{ t: t, stopper: stop.NewStopper(), transports: map[roachpb.NodeID]*storage.RaftTransport{}, } rttc.nodeRPCContext = rpc.NewContext(testutils.NewNodeTestBaseContext(), nil, rttc.stopper) server := rpc.NewServer(rttc.nodeRPCContext) // never started rttc.gossip = gossip.New(rttc.nodeRPCContext, server, nil, rttc.stopper, metric.NewRegistry()) rttc.gossip.SetNodeID(1) return rttc }
// NewNodeStatusMonitor initializes a new NodeStatusMonitor instance. func NewNodeStatusMonitor(metaRegistry *metric.Registry) *NodeStatusMonitor { registry := metric.NewRegistry() return &NodeStatusMonitor{ stores: make(map[roachpb.StoreID]*StoreStatusMonitor), metaRegistry: metaRegistry, registry: registry, mLatency: registry.Latency("exec.latency"), mSuccess: registry.Rates("exec.success"), mError: registry.Rates("exec.error"), } }
// newRemoteClockMonitor returns a monitor with the given server clock. func newRemoteClockMonitor(clock *hlc.Clock) *RemoteClockMonitor { r := RemoteClockMonitor{ clock: clock, monitorInterval: defaultHeartbeatInterval * 10, registry: metric.NewRegistry(), } r.mu.offsets = make(map[string]RemoteOffset) r.metrics = remoteClockMetrics{ clusterOffsetLowerBound: r.registry.Gauge("lower-bound-nanos"), clusterOffsetUpperBound: r.registry.Gauge("upper-bound-nanos"), } return &r }
// setupMetricsTest returns a TxnCoordSender and ManualClock pointing to a newly created // LocalTestCluster. Also returns a cleanup function to be executed at the end of the // test. func setupMetricsTest(t *testing.T) (*hlc.ManualClock, *TxnCoordSender, func()) { s := createTestDB(t) reg := metric.NewRegistry() txnMetrics := NewTxnMetrics(reg) manual := hlc.NewManualClock(0) clock := hlc.NewClock(manual.UnixNano) sender := NewTxnCoordSender(s.distSender, clock, false, tracing.NewTracer(), s.Stopper, txnMetrics) return manual, sender, func() { teardownHeartbeats(sender) s.Stop() } }
// newRemoteClockMonitor returns a monitor with the given server clock. func newRemoteClockMonitor(clock *hlc.Clock, offsetTTL time.Duration) *RemoteClockMonitor { r := RemoteClockMonitor{ clock: clock, offsetTTL: offsetTTL, registry: metric.NewRegistry(), } r.mu.offsets = make(map[string]RemoteOffset) r.metrics = remoteClockMetrics{ clusterOffsetLowerBound: r.registry.Gauge("lower-bound-nanos"), clusterOffsetUpperBound: r.registry.Gauge("upper-bound-nanos"), } return &r }
// TestGossipCullNetwork verifies that a client will be culled from // the network periodically (at cullInterval duration intervals). func TestGossipCullNetwork(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() local := startGossip(1, stopper, t, metric.NewRegistry()) local.SetCullInterval(5 * time.Millisecond) local.mu.Lock() for i := 0; i < minPeers; i++ { peer := startGossip(roachpb.NodeID(i+2), stopper, t, metric.NewRegistry()) local.startClient(&peer.is.NodeAddr) } local.mu.Unlock() const slowGossipDuration = time.Minute if err := util.RetryForDuration(slowGossipDuration, func() error { if peers := len(local.Outgoing()); peers != minPeers { return errors.Errorf("%d of %d peers connected", peers, minPeers) } return nil }); err != nil { t.Fatalf("condition failed to evaluate within %s: %s", slowGossipDuration, err) } local.manage() if err := util.RetryForDuration(slowGossipDuration, func() error { // Verify that a client is closed within the cull interval. if peers := len(local.Outgoing()); peers != minPeers-1 { return errors.Errorf("%d of %d peers connected", peers, minPeers-1) } return nil }); err != nil { t.Fatalf("condition failed to evaluate within %s: %s", slowGossipDuration, err) } }
// createTestNode creates an rpc server using the specified address, // gossip instance, KV database and a node using the specified slice // of engines. The server, clock and node are returned. If gossipBS is // not nil, the gossip bootstrap address is set to gossipBS. func createTestNode(addr net.Addr, engines []engine.Engine, gossipBS net.Addr, t *testing.T) ( *rpc.Server, net.Addr, *hlc.Clock, *Node, *stop.Stopper) { ctx := storage.StoreContext{} stopper := stop.NewStopper() ctx.Clock = hlc.NewClock(hlc.UnixNano) nodeRPCContext := rpc.NewContext(nodeTestBaseContext, ctx.Clock, stopper) ctx.ScanInterval = 10 * time.Hour rpcServer := rpc.NewServer(nodeRPCContext) grpcServer := grpc.NewServer() tlsConfig, err := nodeRPCContext.GetServerTLSConfig() if err != nil { t.Fatal(err) } ln, err := util.ListenAndServe(stopper, grpcutil.GRPCHandlerFunc(grpcServer, rpcServer), addr, tlsConfig) if err != nil { t.Fatal(err) } g := gossip.New(nodeRPCContext, testContext.GossipBootstrapResolvers, stopper) if gossipBS != nil { // Handle possibility of a :0 port specification. if gossipBS.Network() == addr.Network() && gossipBS.String() == addr.String() { gossipBS = ln.Addr() } r, err := resolver.NewResolverFromAddress(gossipBS) if err != nil { t.Fatalf("bad gossip address %s: %s", gossipBS, err) } g.SetResolvers([]resolver.Resolver{r}) g.Start(grpcServer, ln.Addr()) } ctx.Gossip = g retryOpts := kv.GetDefaultDistSenderRetryOptions() retryOpts.Closer = stopper.ShouldDrain() distSender := kv.NewDistSender(&kv.DistSenderContext{ Clock: ctx.Clock, RPCContext: nodeRPCContext, RPCRetryOptions: &retryOpts, }, g) tracer := tracing.NewTracer() sender := kv.NewTxnCoordSender(distSender, ctx.Clock, false, tracer, stopper) ctx.DB = client.NewDB(sender) // TODO(bdarnell): arrange to have the transport closed. // (or attach LocalRPCTransport.Close to the stopper) ctx.Transport = storage.NewLocalRPCTransport(stopper) ctx.EventFeed = util.NewFeed(stopper) ctx.Tracer = tracer node := NewNode(ctx, metric.NewRegistry(), stopper, nil) return rpcServer, ln.Addr(), ctx.Clock, node, stopper }
// TestTxnCoordSenderErrorWithIntent validates that if a transactional request // returns an error but also indicates a Writing transaction, the coordinator // tracks it just like a successful request. func TestTxnCoordSenderErrorWithIntent(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() manual := hlc.NewManualClock(0) clock := hlc.NewClock(manual.UnixNano) clock.SetMaxOffset(20) testCases := []struct { roachpb.Error errMsg string }{ {*roachpb.NewError(roachpb.NewTransactionRetryError()), "retry txn"}, {*roachpb.NewError(roachpb.NewTransactionPushError(roachpb.Transaction{ TxnMeta: roachpb.TxnMeta{ ID: uuid.NewV4(), }})), "failed to push"}, {*roachpb.NewErrorf("testError"), "testError"}, } for i, test := range testCases { func() { ts := NewTxnCoordSender(senderFn(func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { txn := ba.Txn.Clone() txn.Writing = true pErr := &roachpb.Error{} *pErr = test.Error pErr.SetTxn(&txn) return nil, pErr }), clock, false, tracing.NewTracer(), stopper, NewTxnMetrics(metric.NewRegistry())) var ba roachpb.BatchRequest key := roachpb.Key("test") ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.EndTransactionRequest{}) ba.Txn = &roachpb.Transaction{Name: "test"} _, pErr := ts.Send(context.Background(), ba) if !testutils.IsPError(pErr, test.errMsg) { t.Errorf("%d: error did not match %s: %v", i, test.errMsg, pErr) } defer teardownHeartbeats(ts) ts.Lock() defer ts.Unlock() if len(ts.txns) != 1 { t.Errorf("%d: expected transaction to be tracked", i) } }() } }
// TestNodeJoin verifies a new node is able to join a bootstrapped // cluster consisting of one node. func TestNodeJoin(t *testing.T) { defer leaktest.AfterTest(t)() engineStopper := stop.NewStopper() defer engineStopper.Stop() e := engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper) if _, err := bootstrapCluster([]engine.Engine{e}, kv.NewTxnMetrics(metric.NewRegistry())); err != nil { t.Fatal(err) } // Start the bootstrap node. engines1 := []engine.Engine{e} addr1 := util.CreateTestAddr("tcp") _, server1Addr, node1, stopper1 := createAndStartTestNode(addr1, engines1, addr1, t) defer stopper1.Stop() // Create a new node. engines2 := []engine.Engine{engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper)} addr2 := util.CreateTestAddr("tcp") _, server2Addr, node2, stopper2 := createAndStartTestNode(addr2, engines2, server1Addr, t) defer stopper2.Stop() // Verify new node is able to bootstrap its store. util.SucceedsSoon(t, func() error { if sc := node2.stores.GetStoreCount(); sc != 1 { return util.Errorf("GetStoreCount() expected 1; got %d", sc) } return nil }) // Verify node1 sees node2 via gossip and vice versa. node1Key := gossip.MakeNodeIDKey(node1.Descriptor.NodeID) node2Key := gossip.MakeNodeIDKey(node2.Descriptor.NodeID) util.SucceedsSoon(t, func() error { var nodeDesc1 roachpb.NodeDescriptor if err := node1.ctx.Gossip.GetInfoProto(node2Key, &nodeDesc1); err != nil { return err } if addr2Str, server2AddrStr := nodeDesc1.Address.String(), server2Addr.String(); addr2Str != server2AddrStr { return util.Errorf("addr2 gossip %s doesn't match addr2 address %s", addr2Str, server2AddrStr) } var nodeDesc2 roachpb.NodeDescriptor if err := node2.ctx.Gossip.GetInfoProto(node1Key, &nodeDesc2); err != nil { return err } if addr1Str, server1AddrStr := nodeDesc2.Address.String(), server1Addr.String(); addr1Str != server1AddrStr { return util.Errorf("addr1 gossip %s doesn't match addr1 address %s", addr1Str, server1AddrStr) } return nil }) }