Ejemplo n.º 1
0
// TestTxnCoordSenderErrorWithIntent validates that if a transactional request
// returns an error but also indicates a Writing transaction, the coordinator
// tracks it just like a successful request.
func TestTxnCoordSenderErrorWithIntent(t *testing.T) {
	defer leaktest.AfterTest(t)
	stopper := stop.NewStopper()
	manual := hlc.NewManualClock(0)
	clock := hlc.NewClock(manual.UnixNano)
	clock.SetMaxOffset(20)

	ts := NewTxnCoordSender(senderFn(func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
		txn := ba.Txn.Clone()
		txn.Writing = true
		pErr := roachpb.NewError(roachpb.NewTransactionRetryError())
		pErr.SetTxn(&txn)
		return nil, pErr
	}), clock, false, tracing.NewTracer(), stopper)
	defer stopper.Stop()

	var ba roachpb.BatchRequest
	key := roachpb.Key("test")
	ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}})
	ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}})
	ba.Add(&roachpb.EndTransactionRequest{})
	ba.Txn = &roachpb.Transaction{Name: "test"}
	if _, pErr := ts.Send(context.Background(), ba); !testutils.IsPError(pErr, "retry txn") {
		t.Fatalf("unexpected error: %v", pErr)
	}

	defer teardownHeartbeats(ts)
	ts.Lock()
	defer ts.Unlock()
	if len(ts.txns) != 1 {
		t.Fatalf("expected transaction to be tracked")
	}
}
Ejemplo n.º 2
0
// TestMultiRangeScanWithMaxResults tests that commands which access multiple
// ranges with MaxResults parameter are carried out properly.
func TestMultiRangeScanWithMaxResults(t *testing.T) {
	defer leaktest.AfterTest(t)()
	testCases := []struct {
		splitKeys []roachpb.Key
		keys      []roachpb.Key
	}{
		{[]roachpb.Key{roachpb.Key("m")},
			[]roachpb.Key{roachpb.Key("a"), roachpb.Key("z")}},
		{[]roachpb.Key{roachpb.Key("h"), roachpb.Key("q")},
			[]roachpb.Key{roachpb.Key("b"), roachpb.Key("f"), roachpb.Key("k"),
				roachpb.Key("r"), roachpb.Key("w"), roachpb.Key("y")}},
	}

	for i, tc := range testCases {
		s, _, _ := serverutils.StartServer(t, base.TestServerArgs{})
		defer s.Stopper().Stop()
		ts := s.(*TestServer)
		retryOpts := base.DefaultRetryOptions()
		retryOpts.Closer = ts.stopper.ShouldDrain()
		ds := kv.NewDistSender(&kv.DistSenderContext{
			Clock:           s.Clock(),
			RPCContext:      s.RPCContext(),
			RPCRetryOptions: &retryOpts,
		}, ts.Gossip())
		tds := kv.NewTxnCoordSender(ds, ts.Clock(), ts.Ctx.Linearizable, tracing.NewTracer(),
			ts.stopper, kv.NewTxnMetrics(metric.NewRegistry()))

		for _, sk := range tc.splitKeys {
			if err := ts.node.ctx.DB.AdminSplit(sk); err != nil {
				t.Fatal(err)
			}
		}

		for _, k := range tc.keys {
			put := roachpb.NewPut(k, roachpb.MakeValueFromBytes(k))
			if _, err := client.SendWrapped(tds, nil, put); err != nil {
				t.Fatal(err)
			}
		}

		// Try every possible ScanRequest startKey.
		for start := 0; start < len(tc.keys); start++ {
			// Try every possible maxResults, from 1 to beyond the size of key array.
			for maxResults := 1; maxResults <= len(tc.keys)-start+1; maxResults++ {
				scan := roachpb.NewScan(tc.keys[start], tc.keys[len(tc.keys)-1].Next(),
					int64(maxResults))
				reply, err := client.SendWrapped(tds, nil, scan)
				if err != nil {
					t.Fatal(err)
				}
				rows := reply.(*roachpb.ScanResponse).Rows
				if start+maxResults <= len(tc.keys) && len(rows) != maxResults {
					t.Errorf("%d: start=%s: expected %d rows, but got %d", i, tc.keys[start], maxResults, len(rows))
				} else if start+maxResults == len(tc.keys)+1 && len(rows) != maxResults-1 {
					t.Errorf("%d: expected %d rows, but got %d", i, maxResults-1, len(rows))
				}
			}
		}
	}
}
Ejemplo n.º 3
0
// TestSendToOneClient verifies that Send correctly sends a request
// to one server using the heartbeat RPC.
func TestSendToOneClient(t *testing.T) {
	defer leaktest.AfterTest(t)()

	stopper := stop.NewStopper()
	defer stopper.Stop()

	ctx := newNodeTestContext(nil, stopper)
	s, ln := newTestServer(t, ctx)
	registerBatch(t, s, 0)

	sp := tracing.NewTracer().StartSpan("node test")
	defer sp.Finish()

	opts := SendOptions{
		Ordering:        orderStable,
		SendNextTimeout: 1 * time.Second,
		Timeout:         10 * time.Second,
		Trace:           sp,
	}
	reply, err := sendBatch(opts, []net.Addr{ln.Addr()}, ctx)
	if err != nil {
		t.Fatal(err)
	}
	if reply == nil {
		t.Errorf("expected reply")
	}
}
Ejemplo n.º 4
0
// TestTxnCoordSenderSingleRoundtripTxn checks that a batch which completely
// holds the writing portion of a Txn (including EndTransaction) does not
// launch a heartbeat goroutine at all.
func TestTxnCoordSenderSingleRoundtripTxn(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper := stop.NewStopper()
	manual := hlc.NewManualClock(0)
	clock := hlc.NewClock(manual.UnixNano)
	clock.SetMaxOffset(20)

	ts := NewTxnCoordSender(senderFn(func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
		br := ba.CreateReply()
		txnClone := ba.Txn.Clone()
		br.Txn = &txnClone
		br.Txn.Writing = true
		return br, nil
	}), clock, false, tracing.NewTracer(), stopper, NewTxnMetrics(metric.NewRegistry()))

	// Stop the stopper manually, prior to trying the transaction. This has the
	// effect of returning a NodeUnavailableError for any attempts at launching
	// a heartbeat goroutine.
	stopper.Stop()

	var ba roachpb.BatchRequest
	key := roachpb.Key("test")
	ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}})
	ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}})
	ba.Add(&roachpb.EndTransactionRequest{})
	ba.Txn = &roachpb.Transaction{Name: "test"}
	_, pErr := ts.Send(context.Background(), ba)
	if pErr != nil {
		t.Fatal(pErr)
	}
}
Ejemplo n.º 5
0
// NewDistSender returns a batch.Sender instance which connects to the
// Cockroach cluster via the supplied gossip instance. Supplying a
// DistSenderContext or the fields within is optional. For omitted values, sane
// defaults will be used.
func NewDistSender(ctx *DistSenderContext, gossip *gossip.Gossip) *DistSender {
	if ctx == nil {
		ctx = &DistSenderContext{}
	}
	clock := ctx.Clock
	if clock == nil {
		clock = hlc.NewClock(hlc.UnixNano)
	}
	ds := &DistSender{
		clock:  clock,
		gossip: gossip,
	}
	if ctx.nodeDescriptor != nil {
		atomic.StorePointer(&ds.nodeDescriptor, unsafe.Pointer(ctx.nodeDescriptor))
	}
	rcSize := ctx.RangeDescriptorCacheSize
	if rcSize <= 0 {
		rcSize = defaultRangeDescriptorCacheSize
	}
	rdb := ctx.RangeDescriptorDB
	if rdb == nil {
		rdb = ds
	}
	ds.rangeCache = newRangeDescriptorCache(rdb, int(rcSize))
	lcSize := ctx.LeaderCacheSize
	if lcSize <= 0 {
		lcSize = defaultLeaderCacheSize
	}
	ds.leaderCache = newLeaderCache(int(lcSize))
	if ctx.RangeLookupMaxRanges <= 0 {
		ds.rangeLookupMaxRanges = defaultRangeLookupMaxRanges
	}
	if ctx.TransportFactory != nil {
		ds.transportFactory = ctx.TransportFactory
	}
	ds.rpcRetryOptions = base.DefaultRetryOptions()
	if ctx.RPCRetryOptions != nil {
		ds.rpcRetryOptions = *ctx.RPCRetryOptions
	}
	if ctx.RPCContext != nil {
		ds.rpcContext = ctx.RPCContext
		if ds.rpcRetryOptions.Closer == nil {
			ds.rpcRetryOptions.Closer = ds.rpcContext.Stopper.ShouldDrain()
		}
	}
	if ctx.Tracer != nil {
		ds.Tracer = ctx.Tracer
	} else {
		ds.Tracer = tracing.NewTracer()
	}
	if ctx.SendNextTimeout != 0 {
		ds.sendNextTimeout = ctx.SendNextTimeout
	} else {
		ds.sendNextTimeout = defaultSendNextTimeout
	}

	return ds
}
Ejemplo n.º 6
0
// TestRetryableError verifies that Send returns a retryable error
// when it hits an RPC error.
func TestRetryableError(t *testing.T) {
	defer leaktest.AfterTest(t)()

	clientStopper := stop.NewStopper()
	defer clientStopper.Stop()
	clientContext := newNodeTestContext(nil, clientStopper)

	serverStopper := stop.NewStopper()
	serverContext := newNodeTestContext(nil, serverStopper)

	s, ln := newTestServer(t, serverContext)
	roachpb.RegisterInternalServer(s, Node(0))

	conn, err := clientContext.GRPCDial(ln.Addr().String())
	if err != nil {
		t.Fatal(err)
	}
	ctx := context.Background()
	waitForConnState := func(desiredState grpc.ConnectivityState) {
		clientState, err := conn.State()
		for clientState != desiredState {
			if err != nil {
				t.Fatal(err)
			}
			if clientState == grpc.Shutdown {
				t.Fatalf("%v has unexpectedly shut down", conn)
			}
			clientState, err = conn.WaitForStateChange(ctx, clientState)
		}
	}
	// Wait until the client becomes healthy and shut down the server.
	waitForConnState(grpc.Ready)
	serverStopper.Stop()
	// Wait until the client becomes unhealthy.
	waitForConnState(grpc.TransientFailure)

	sp := tracing.NewTracer().StartSpan("node test")
	defer sp.Finish()

	opts := SendOptions{
		Ordering:        orderStable,
		SendNextTimeout: 100 * time.Millisecond,
		Timeout:         100 * time.Millisecond,
		Trace:           sp,
	}
	if _, err := sendBatch(opts, []net.Addr{ln.Addr()}, clientContext); err != nil {
		retryErr, ok := err.(retry.Retryable)
		if !ok {
			t.Fatalf("Unexpected error type: %v", err)
		}
		if !retryErr.CanRetry() {
			t.Errorf("Expected retryable error: %v", retryErr)
		}
	} else {
		t.Fatalf("Unexpected success")
	}
}
Ejemplo n.º 7
0
// setupMetricsTest returns a TxnCoordSender and ManualClock pointing to a newly created
// LocalTestCluster. Also returns a cleanup function to be executed at the end of the
// test.
func setupMetricsTest(t *testing.T) (*hlc.ManualClock, *TxnCoordSender, func()) {
	s, testSender := createTestDB(t)
	reg := metric.NewRegistry()
	txnMetrics := NewTxnMetrics(reg)
	sender := NewTxnCoordSender(testSender.wrapped, s.Clock, false, tracing.NewTracer(), s.Stopper, txnMetrics)

	return s.Manual, sender, func() {
		teardownHeartbeats(sender)
		s.Stop()
	}
}
Ejemplo n.º 8
0
// setupMetricsTest returns a TxnCoordSender and ManualClock pointing to a newly created
// LocalTestCluster. Also returns a cleanup function to be executed at the end of the
// test.
func setupMetricsTest(t *testing.T) (*hlc.ManualClock, *TxnCoordSender, func()) {
	s, testSender := createTestDB(t)
	txnMetrics := MakeTxnMetrics()
	ctx := tracing.WithTracer(context.Background(), tracing.NewTracer())
	sender := NewTxnCoordSender(ctx, testSender.wrapped, s.Clock, false, s.Stopper, txnMetrics)

	return s.Manual, sender, func() {
		teardownHeartbeats(sender)
		s.Stop()
	}
}
Ejemplo n.º 9
0
// createTestNode creates an rpc server using the specified address,
// gossip instance, KV database and a node using the specified slice
// of engines. The server, clock and node are returned. If gossipBS is
// not nil, the gossip bootstrap address is set to gossipBS.
func createTestNode(addr net.Addr, engines []engine.Engine, gossipBS net.Addr, t *testing.T) (
	*grpc.Server, net.Addr, *hlc.Clock, *Node, *stop.Stopper) {
	ctx := storage.StoreContext{}

	stopper := stop.NewStopper()
	ctx.Clock = hlc.NewClock(hlc.UnixNano)
	nodeRPCContext := rpc.NewContext(nodeTestBaseContext, ctx.Clock, stopper)
	ctx.ScanInterval = 10 * time.Hour
	ctx.ConsistencyCheckInterval = 10 * time.Hour
	grpcServer := rpc.NewServer(nodeRPCContext)
	serverCtx := makeTestContext()
	g := gossip.New(
		context.Background(),
		nodeRPCContext,
		grpcServer,
		serverCtx.GossipBootstrapResolvers,
		stopper,
		metric.NewRegistry())
	ln, err := netutil.ListenAndServeGRPC(stopper, grpcServer, addr)
	if err != nil {
		t.Fatal(err)
	}
	if gossipBS != nil {
		// Handle possibility of a :0 port specification.
		if gossipBS.Network() == addr.Network() && gossipBS.String() == addr.String() {
			gossipBS = ln.Addr()
		}
		r, err := resolver.NewResolverFromAddress(gossipBS)
		if err != nil {
			t.Fatalf("bad gossip address %s: %s", gossipBS, err)
		}
		g.SetResolvers([]resolver.Resolver{r})
		g.Start(ln.Addr())
	}
	ctx.Gossip = g
	retryOpts := base.DefaultRetryOptions()
	retryOpts.Closer = stopper.ShouldQuiesce()
	distSender := kv.NewDistSender(&kv.DistSenderConfig{
		Clock:           ctx.Clock,
		RPCContext:      nodeRPCContext,
		RPCRetryOptions: &retryOpts,
	}, g)
	ctx.Ctx = tracing.WithTracer(context.Background(), tracing.NewTracer())
	sender := kv.NewTxnCoordSender(ctx.Ctx, distSender, ctx.Clock, false, stopper,
		kv.MakeTxnMetrics())
	ctx.DB = client.NewDB(sender)
	ctx.Transport = storage.NewDummyRaftTransport()
	node := NewNode(ctx, status.NewMetricsRecorder(ctx.Clock), metric.NewRegistry(), stopper,
		kv.MakeTxnMetrics(), sql.MakeEventLogger(nil))
	roachpb.RegisterInternalServer(grpcServer, node)
	return grpcServer, ln.Addr(), ctx.Clock, node, stopper
}
Ejemplo n.º 10
0
// TestTxnCoordSenderErrorWithIntent validates that if a transactional request
// returns an error but also indicates a Writing transaction, the coordinator
// tracks it just like a successful request.
func TestTxnCoordSenderErrorWithIntent(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper := stop.NewStopper()
	defer stopper.Stop()
	manual := hlc.NewManualClock(0)
	clock := hlc.NewClock(manual.UnixNano)
	clock.SetMaxOffset(20)

	testCases := []struct {
		roachpb.Error
		errMsg string
	}{
		{*roachpb.NewError(roachpb.NewTransactionRetryError()), "retry txn"},
		{*roachpb.NewError(roachpb.NewTransactionPushError(roachpb.Transaction{
			TxnMeta: enginepb.TxnMeta{
				ID: uuid.NewV4(),
			}})), "failed to push"},
		{*roachpb.NewErrorf("testError"), "testError"},
	}
	for i, test := range testCases {
		func() {
			senderFunc := func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
				txn := ba.Txn.Clone()
				txn.Writing = true
				pErr := &roachpb.Error{}
				*pErr = test.Error
				pErr.SetTxn(&txn)
				return nil, pErr
			}
			ctx := tracing.WithTracer(context.Background(), tracing.NewTracer())
			ts := NewTxnCoordSender(ctx, senderFn(senderFunc), clock, false, stopper, MakeTxnMetrics())

			var ba roachpb.BatchRequest
			key := roachpb.Key("test")
			ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}})
			ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}})
			ba.Add(&roachpb.EndTransactionRequest{})
			ba.Txn = &roachpb.Transaction{Name: "test"}
			_, pErr := ts.Send(context.Background(), ba)
			if !testutils.IsPError(pErr, test.errMsg) {
				t.Errorf("%d: error did not match %s: %v", i, test.errMsg, pErr)
			}

			defer teardownHeartbeats(ts)
			ts.Lock()
			defer ts.Unlock()
			if len(ts.txns) != 1 {
				t.Errorf("%d: expected transaction to be tracked", i)
			}
		}()
	}
}
Ejemplo n.º 11
0
// setupMetricsTest returns a TxnCoordSender and ManualClock pointing to a newly created
// LocalTestCluster. Also returns a cleanup function to be executed at the end of the
// test.
func setupMetricsTest(t *testing.T) (*hlc.ManualClock, *TxnCoordSender, func()) {
	s := createTestDB(t)
	reg := metric.NewRegistry()
	txnMetrics := NewTxnMetrics(reg)
	manual := hlc.NewManualClock(0)
	clock := hlc.NewClock(manual.UnixNano)
	sender := NewTxnCoordSender(s.distSender, clock, false, tracing.NewTracer(), s.Stopper, txnMetrics)

	return manual, sender, func() {
		teardownHeartbeats(sender)
		s.Stop()
	}
}
Ejemplo n.º 12
0
// NewDistSender returns a batch.Sender instance which connects to the
// Cockroach cluster via the supplied gossip instance. Supplying a
// DistSenderContext or the fields within is optional. For omitted values, sane
// defaults will be used.
func NewDistSender(ctx *DistSenderContext, gossip *gossip.Gossip) *DistSender {
	if ctx == nil {
		ctx = &DistSenderContext{}
	}
	clock := ctx.Clock
	if clock == nil {
		clock = hlc.NewClock(hlc.UnixNano)
	}
	ds := &DistSender{
		clock:  clock,
		gossip: gossip,
	}
	if ctx.nodeDescriptor != nil {
		atomic.StorePointer(&ds.nodeDescriptor, unsafe.Pointer(ctx.nodeDescriptor))
	}
	rcSize := ctx.RangeDescriptorCacheSize
	if rcSize <= 0 {
		rcSize = defaultRangeDescriptorCacheSize
	}
	rdb := ctx.RangeDescriptorDB
	if rdb == nil {
		rdb = ds
	}
	ds.rangeCache = newRangeDescriptorCache(rdb, int(rcSize))
	lcSize := ctx.LeaderCacheSize
	if lcSize <= 0 {
		lcSize = defaultLeaderCacheSize
	}
	ds.leaderCache = newLeaderCache(int(lcSize))
	if ctx.RangeLookupMaxRanges <= 0 {
		ds.rangeLookupMaxRanges = defaultRangeLookupMaxRanges
	}
	ds.rpcSend = send
	if ctx.RPCSend != nil {
		ds.rpcSend = ctx.RPCSend
	}
	if ctx.RPCContext != nil {
		ds.rpcContext = ctx.RPCContext
	}
	ds.rpcRetryOptions = defaultRPCRetryOptions
	if ctx.RPCRetryOptions != nil {
		ds.rpcRetryOptions = *ctx.RPCRetryOptions
	}
	if ctx.Tracer != nil {
		ds.Tracer = ctx.Tracer
	} else {
		ds.Tracer = tracing.NewTracer()
	}

	return ds
}
Ejemplo n.º 13
0
// TestRetryableError verifies that Send returns a retryable error
// when it hits an RPC error.
func TestRetryableError(t *testing.T) {
	defer leaktest.AfterTest(t)()

	clientStopper := stop.NewStopper()
	defer clientStopper.Stop()
	clientContext := newNodeTestContext(nil, clientStopper)
	clientContext.HeartbeatTimeout = 10 * clientContext.HeartbeatInterval

	serverStopper := stop.NewStopper()
	serverContext := newNodeTestContext(nil, serverStopper)

	s, ln := newTestServer(t, serverContext)
	registerBatch(t, s, 0)

	c := rpc.NewClient(ln.Addr(), clientContext)
	// Wait until the client becomes healthy and shut down the server.
	<-c.Healthy()
	serverStopper.Stop()
	// Wait until the client becomes unhealthy.
	func() {
		for r := retry.Start(retry.Options{}); r.Next(); {
			select {
			case <-c.Healthy():
			case <-time.After(1 * time.Nanosecond):
				return
			}
		}
	}()

	sp := tracing.NewTracer().StartSpan("node test")
	defer sp.Finish()

	opts := SendOptions{
		Ordering:        orderStable,
		SendNextTimeout: 100 * time.Millisecond,
		Timeout:         100 * time.Millisecond,
		Trace:           sp,
	}
	if _, err := sendBatch(opts, []net.Addr{ln.Addr()}, clientContext); err != nil {
		retryErr, ok := err.(retry.Retryable)
		if !ok {
			t.Fatalf("Unexpected error type: %v", err)
		}
		if !retryErr.CanRetry() {
			t.Errorf("Expected retryable error: %v", retryErr)
		}
	} else {
		t.Fatalf("Unexpected success")
	}
}
Ejemplo n.º 14
0
// createTestNode creates an rpc server using the specified address,
// gossip instance, KV database and a node using the specified slice
// of engines. The server, clock and node are returned. If gossipBS is
// not nil, the gossip bootstrap address is set to gossipBS.
func createTestNode(addr net.Addr, engines []engine.Engine, gossipBS net.Addr, t *testing.T) (
	*rpc.Server, net.Addr, *hlc.Clock, *Node, *stop.Stopper) {
	ctx := storage.StoreContext{}

	stopper := stop.NewStopper()
	ctx.Clock = hlc.NewClock(hlc.UnixNano)
	nodeRPCContext := rpc.NewContext(nodeTestBaseContext, ctx.Clock, stopper)
	ctx.ScanInterval = 10 * time.Hour
	rpcServer := rpc.NewServer(nodeRPCContext)
	grpcServer := grpc.NewServer()
	tlsConfig, err := nodeRPCContext.GetServerTLSConfig()
	if err != nil {
		t.Fatal(err)
	}
	ln, err := util.ListenAndServe(stopper, grpcutil.GRPCHandlerFunc(grpcServer, rpcServer), addr, tlsConfig)
	if err != nil {
		t.Fatal(err)
	}
	g := gossip.New(nodeRPCContext, testContext.GossipBootstrapResolvers, stopper)
	if gossipBS != nil {
		// Handle possibility of a :0 port specification.
		if gossipBS.Network() == addr.Network() && gossipBS.String() == addr.String() {
			gossipBS = ln.Addr()
		}
		r, err := resolver.NewResolverFromAddress(gossipBS)
		if err != nil {
			t.Fatalf("bad gossip address %s: %s", gossipBS, err)
		}
		g.SetResolvers([]resolver.Resolver{r})
		g.Start(grpcServer, ln.Addr())
	}
	ctx.Gossip = g
	retryOpts := kv.GetDefaultDistSenderRetryOptions()
	retryOpts.Closer = stopper.ShouldDrain()
	distSender := kv.NewDistSender(&kv.DistSenderContext{
		Clock:           ctx.Clock,
		RPCContext:      nodeRPCContext,
		RPCRetryOptions: &retryOpts,
	}, g)
	tracer := tracing.NewTracer()
	sender := kv.NewTxnCoordSender(distSender, ctx.Clock, false, tracer, stopper)
	ctx.DB = client.NewDB(sender)
	// TODO(bdarnell): arrange to have the transport closed.
	// (or attach LocalRPCTransport.Close to the stopper)
	ctx.Transport = storage.NewLocalRPCTransport(stopper)
	ctx.EventFeed = util.NewFeed(stopper)
	ctx.Tracer = tracer
	node := NewNode(ctx, metric.NewRegistry(), stopper, nil)
	return rpcServer, ln.Addr(), ctx.Clock, node, stopper
}
Ejemplo n.º 15
0
// Start starts the test cluster by bootstrapping an in-memory store
// (defaults to maximum of 50M). The server is started, launching the
// node RPC server and all HTTP endpoints. Use the value of
// TestServer.Addr after Start() for client connections. Use Stop()
// to shutdown the server after the test completes.
func (ltc *LocalTestCluster) Start(t util.Tester, baseCtx *base.Context, initSender InitSenderFn) {
	nodeID := roachpb.NodeID(1)
	nodeDesc := &roachpb.NodeDescriptor{NodeID: nodeID}
	tracer := tracing.NewTracer()
	ltc.tester = t
	ltc.Manual = hlc.NewManualClock(0)
	ltc.Clock = hlc.NewClock(ltc.Manual.UnixNano)
	ltc.Stopper = stop.NewStopper()
	rpcContext := rpc.NewContext(baseCtx, ltc.Clock, ltc.Stopper)
	server := rpc.NewServer(rpcContext) // never started
	ltc.Gossip = gossip.New(
		context.Background(), rpcContext, server, nil, ltc.Stopper, metric.NewRegistry())
	ltc.Eng = engine.NewInMem(roachpb.Attributes{}, 50<<20, ltc.Stopper)

	ltc.Stores = storage.NewStores(ltc.Clock)

	ltc.Sender = initSender(nodeDesc, tracer, ltc.Clock, ltc.Latency, ltc.Stores, ltc.Stopper,
		ltc.Gossip)
	if ltc.DBContext == nil {
		dbCtx := client.DefaultDBContext()
		ltc.DBContext = &dbCtx
	}
	ltc.DB = client.NewDBWithContext(ltc.Sender, *ltc.DBContext)
	transport := storage.NewDummyRaftTransport()
	ctx := storage.TestStoreContext()
	if ltc.RangeRetryOptions != nil {
		ctx.RangeRetryOptions = *ltc.RangeRetryOptions
	}
	ctx.Ctx = tracing.WithTracer(context.Background(), tracer)
	ctx.Clock = ltc.Clock
	ctx.DB = ltc.DB
	ctx.Gossip = ltc.Gossip
	ctx.Transport = transport
	ltc.Store = storage.NewStore(ctx, ltc.Eng, nodeDesc)
	if err := ltc.Store.Bootstrap(roachpb.StoreIdent{NodeID: nodeID, StoreID: 1}, ltc.Stopper); err != nil {
		t.Fatalf("unable to start local test cluster: %s", err)
	}
	ltc.Stores.AddStore(ltc.Store)
	if err := ltc.Store.BootstrapRange(nil); err != nil {
		t.Fatalf("unable to start local test cluster: %s", err)
	}
	if err := ltc.Store.Start(context.Background(), ltc.Stopper); err != nil {
		t.Fatalf("unable to start local test cluster: %s", err)
	}
	ltc.Gossip.SetNodeID(nodeDesc.NodeID)
	if err := ltc.Gossip.SetNodeDescriptor(nodeDesc); err != nil {
		t.Fatalf("unable to set node descriptor: %s", err)
	}
}
Ejemplo n.º 16
0
func TestInvalidAddrLength(t *testing.T) {
	defer leaktest.AfterTest(t)()

	// The provided replicas is nil, so its length will be always less than the
	// specified response number
	sp := tracing.NewTracer().StartSpan("node test")
	defer sp.Finish()
	opts := SendOptions{Trace: sp}
	ret, err := send(opts, nil, roachpb.BatchRequest{}, nil)

	// the expected return is nil and SendError
	if _, ok := err.(*roachpb.SendError); !ok || ret != nil {
		t.Fatalf("Shorter replicas should return nil and SendError.")
	}
}
Ejemplo n.º 17
0
// NewTxnCoordSender creates a new TxnCoordSender for use from a KV
// distributed DB instance.
func NewTxnCoordSender(wrapped client.Sender, clock *hlc.Clock, linearizable bool, tracer opentracing.Tracer, stopper *stop.Stopper) *TxnCoordSender {
	if tracer == nil {
		tracer = tracing.NewTracer()
	}
	tc := &TxnCoordSender{
		wrapped:           wrapped,
		clock:             clock,
		heartbeatInterval: storage.DefaultHeartbeatInterval,
		clientTimeout:     defaultClientTimeout,
		txns:              map[uuid.UUID]*txnMetadata{},
		linearizable:      linearizable,
		tracer:            tracer,
		stopper:           stopper,
	}

	tc.stopper.RunWorker(tc.startStats)
	return tc
}
Ejemplo n.º 18
0
// TestUnretryableError verifies that Send returns an unretryable
// error when it hits a critical error.
func TestUnretryableError(t *testing.T) {
	defer leaktest.AfterTest(t)()

	stopper := stop.NewStopper()
	defer stopper.Stop()

	nodeContext := newNodeTestContext(nil, stopper)
	_, ln := newTestServer(t, nodeContext)

	sp := tracing.NewTracer().StartSpan("node test")
	defer sp.Finish()

	opts := SendOptions{
		Ordering:        orderStable,
		SendNextTimeout: 1 * time.Second,
		Timeout:         10 * time.Second,
		Trace:           sp,
	}

	sendOneFn = func(client *batchClient, timeout time.Duration,
		context *rpc.Context, trace opentracing.Span, done chan *netrpc.Call) {
		call := netrpc.Call{
			Reply: &roachpb.BatchResponse{},
		}
		call.Error = errors.New("unretryable")
		done <- &call
	}
	defer func() { sendOneFn = sendOne }()

	_, err := sendBatch(opts, []net.Addr{ln.Addr()}, nodeContext)
	if err == nil {
		t.Fatalf("Unexpected success")
	}
	retryErr, ok := err.(retry.Retryable)
	if !ok {
		t.Fatalf("Unexpected error type: %v", err)
	}
	if retryErr.CanRetry() {
		t.Errorf("Unexpected retryable error: %v", retryErr)
	}
}
Ejemplo n.º 19
0
// Start starts the test cluster by bootstrapping an in-memory store
// (defaults to maximum of 50M). The server is started, launching the
// node RPC server and all HTTP endpoints. Use the value of
// TestServer.Addr after Start() for client connections. Use Stop()
// to shutdown the server after the test completes.
func (ltc *LocalTestCluster) Start(t util.Tester, baseCtx *base.Context, initSender InitSenderFn) {
	nodeID := roachpb.NodeID(1)
	nodeDesc := &roachpb.NodeDescriptor{NodeID: nodeID}
	tracer := tracing.NewTracer()
	ltc.tester = t
	ltc.Manual = hlc.NewManualClock(0)
	ltc.Clock = hlc.NewClock(ltc.Manual.UnixNano)
	ltc.Stopper = stop.NewStopper()
	rpcContext := rpc.NewContext(baseCtx, ltc.Clock, ltc.Stopper)
	ltc.Gossip = gossip.New(rpcContext, nil, ltc.Stopper)
	ltc.Eng = engine.NewInMem(roachpb.Attributes{}, 50<<20, ltc.Stopper)

	ltc.Stores = storage.NewStores(ltc.Clock)

	ltc.Sender = initSender(nodeDesc, tracer, ltc.Clock, ltc.Latency, ltc.Stores, ltc.Stopper,
		ltc.Gossip)
	ltc.DB = client.NewDB(ltc.Sender)
	transport := storage.NewDummyRaftTransport()
	ctx := storage.TestStoreContext()
	ctx.Clock = ltc.Clock
	ctx.DB = ltc.DB
	ctx.Gossip = ltc.Gossip
	ctx.Transport = transport
	ctx.Tracer = tracer
	ltc.Store = storage.NewStore(ctx, ltc.Eng, nodeDesc)
	if err := ltc.Store.Bootstrap(roachpb.StoreIdent{NodeID: nodeID, StoreID: 1}, ltc.Stopper); err != nil {
		t.Fatalf("unable to start local test cluster: %s", err)
	}
	ltc.Stores.AddStore(ltc.Store)
	if err := ltc.Store.BootstrapRange(nil); err != nil {
		t.Fatalf("unable to start local test cluster: %s", err)
	}
	if err := ltc.Store.Start(ltc.Stopper); err != nil {
		t.Fatalf("unable to start local test cluster: %s", err)
	}
	ltc.Gossip.SetNodeID(nodeDesc.NodeID)
	if err := ltc.Gossip.SetNodeDescriptor(nodeDesc); err != nil {
		t.Fatalf("unable to set node descriptor: %s", err)
	}
}
Ejemplo n.º 20
0
// NewDistSender returns a batch.Sender instance which connects to the
// Cockroach cluster via the supplied gossip instance. Supplying a
// DistSenderContext or the fields within is optional. For omitted values, sane
// defaults will be used.
func NewDistSender(cfg *DistSenderConfig, g *gossip.Gossip) *DistSender {
	if cfg == nil {
		cfg = &DistSenderConfig{}
	}

	ds := &DistSender{gossip: g}

	ds.Ctx = cfg.Ctx
	if ds.Ctx == nil {
		ds.Ctx = context.Background()
	}

	if ds.Ctx.Done() != nil {
		panic("context with cancel or deadline")
	}

	if tracing.TracerFromCtx(ds.Ctx) == nil {
		ds.Ctx = tracing.WithTracer(ds.Ctx, tracing.NewTracer())
	}

	ds.clock = cfg.Clock
	if ds.clock == nil {
		ds.clock = hlc.NewClock(hlc.UnixNano)
	}

	if cfg.nodeDescriptor != nil {
		atomic.StorePointer(&ds.nodeDescriptor, unsafe.Pointer(cfg.nodeDescriptor))
	}
	rcSize := cfg.RangeDescriptorCacheSize
	if rcSize <= 0 {
		rcSize = defaultRangeDescriptorCacheSize
	}
	rdb := cfg.RangeDescriptorDB
	if rdb == nil {
		rdb = ds
	}
	ds.rangeCache = newRangeDescriptorCache(rdb, int(rcSize))
	lcSize := cfg.LeaseHolderCacheSize
	if lcSize <= 0 {
		lcSize = defaultLeaseHolderCacheSize
	}
	ds.leaseHolderCache = newLeaseHolderCache(int(lcSize))
	if cfg.RangeLookupMaxRanges <= 0 {
		ds.rangeLookupMaxRanges = defaultRangeLookupMaxRanges
	}
	if cfg.TransportFactory != nil {
		ds.transportFactory = cfg.TransportFactory
	}
	ds.rpcRetryOptions = base.DefaultRetryOptions()
	if cfg.RPCRetryOptions != nil {
		ds.rpcRetryOptions = *cfg.RPCRetryOptions
	}
	if cfg.RPCContext != nil {
		ds.rpcContext = cfg.RPCContext
		if ds.rpcRetryOptions.Closer == nil {
			ds.rpcRetryOptions.Closer = ds.rpcContext.Stopper.ShouldQuiesce()
		}
	}
	if cfg.SendNextTimeout != 0 {
		ds.sendNextTimeout = cfg.SendNextTimeout
	} else {
		ds.sendNextTimeout = defaultSendNextTimeout
	}

	if g != nil {
		g.RegisterCallback(gossip.KeyFirstRangeDescriptor,
			func(_ string, value roachpb.Value) {
				if log.V(1) {
					var desc roachpb.RangeDescriptor
					if err := value.GetProto(&desc); err != nil {
						log.Errorf(ds.Ctx, "unable to parse gossipped first range descriptor: %s", err)
					} else {
						log.Infof(ds.Ctx,
							"gossipped first range descriptor: %+v", desc.Replicas)
					}
				}
				err := ds.rangeCache.EvictCachedRangeDescriptor(roachpb.RKeyMin, nil, false)
				if err != nil {
					log.Warningf(ds.Ctx, "failed to evict first range descriptor: %s", err)
				}
			})
	}
	return ds
}
Ejemplo n.º 21
0
// TestTxnCoordSenderNoDuplicateIntents verifies that TxnCoordSender does not
// generate duplicate intents and that it merges intents for overlapping ranges.
func TestTxnCoordSenderNoDuplicateIntents(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper := stop.NewStopper()
	manual := hlc.NewManualClock(0)
	clock := hlc.NewClock(manual.UnixNano)

	var expectedIntents []roachpb.Span

	senderFunc := func(_ context.Context, ba roachpb.BatchRequest) (
		*roachpb.BatchResponse, *roachpb.Error) {
		if rArgs, ok := ba.GetArg(roachpb.EndTransaction); ok {
			et := rArgs.(*roachpb.EndTransactionRequest)
			if !reflect.DeepEqual(et.IntentSpans, expectedIntents) {
				t.Errorf("Invalid intents: %+v; expected %+v", et.IntentSpans, expectedIntents)
			}
		}
		br := ba.CreateReply()
		txnClone := ba.Txn.Clone()
		br.Txn = &txnClone
		br.Txn.Writing = true
		return br, nil
	}
	ts := NewTxnCoordSender(senderFn(senderFunc), clock, false, tracing.NewTracer(), stopper,
		NewTxnMetrics(metric.NewRegistry()))

	defer stopper.Stop()
	defer teardownHeartbeats(ts)

	db := client.NewDB(ts)
	txn := client.NewTxn(context.Background(), *db)

	// Write to a, b, u-w before the final batch.

	pErr := txn.Put(roachpb.Key("a"), []byte("value"))
	if pErr != nil {
		t.Fatal(pErr)
	}
	pErr = txn.Put(roachpb.Key("b"), []byte("value"))
	if pErr != nil {
		t.Fatal(pErr)
	}
	pErr = txn.DelRange(roachpb.Key("u"), roachpb.Key("w"))
	if pErr != nil {
		t.Fatal(pErr)
	}

	// The final batch overwrites key a and overlaps part of the u-w range.
	b := txn.NewBatch()
	b.Put(roachpb.Key("b"), []byte("value"))
	b.Put(roachpb.Key("c"), []byte("value"))
	b.DelRange(roachpb.Key("v"), roachpb.Key("z"), false)

	// The expected intents are a, b, c, and u-z.
	expectedIntents = []roachpb.Span{
		{Key: roachpb.Key("a"), EndKey: nil},
		{Key: roachpb.Key("b"), EndKey: nil},
		{Key: roachpb.Key("c"), EndKey: nil},
		{Key: roachpb.Key("u"), EndKey: roachpb.Key("z")},
	}

	pErr = txn.CommitInBatch(b)
	if pErr != nil {
		t.Fatal(pErr)
	}
}
Ejemplo n.º 22
0
// verifyUncertainty writes values to a key in 5ns intervals and then launches
// a transaction at each value's timestamp reading that value with
// the maximumOffset given, verifying in the process that the correct values
// are read (usually after one transaction restart).
func verifyUncertainty(concurrency int, maxOffset time.Duration, t *testing.T) {
	s := createTestDB(t)
	disableOwnNodeCertain(s)
	defer s.Stop()

	key := []byte("key-test")
	// wgStart waits for all transactions to line up, wgEnd has the main
	// function wait for them to finish.
	var wgStart, wgEnd sync.WaitGroup
	wgStart.Add(concurrency + 1)
	wgEnd.Add(concurrency)

	// Initial high offset to allow for future writes.
	s.Clock.SetMaxOffset(999 * time.Nanosecond)
	s.Manual.Set(s.Clock.MaxOffset().Nanoseconds() + 1)
	for i := 0; i < concurrency; i++ {
		value := []byte(fmt.Sprintf("value-%d", i))
		// Values will be written with 5ns spacing.
		futureTS := s.Clock.Now().Add(5, 0)
		s.Clock.Update(futureTS)
		// Expected number of versions skipped.
		skipCount := int(maxOffset) / 5
		if i+skipCount >= concurrency {
			skipCount = concurrency - i - 1
		}
		readValue := []byte(fmt.Sprintf("value-%d", i+skipCount))
		if err := s.DB.Put(key, value); err != nil {
			t.Errorf("%d: got write error: %s", i, err)
		}
		if gr, err := s.DB.Get(key); err != nil {
			t.Fatalf("%d: expected success reading value: %s", i, err)
		} else if !gr.Exists() || !bytes.Equal(gr.ValueBytes(), value) {
			t.Fatalf("%d: expected success reading value: %v", i, gr.Value)
		}

		go func(i int) {
			defer wgEnd.Done()
			wgStart.Done()
			// Wait until the other goroutines are running.
			wgStart.Wait()

			txnManual := hlc.NewManualClock(futureTS.WallTime)
			txnClock := hlc.NewClock(txnManual.UnixNano)
			// Make sure to incorporate the logical component if the wall time
			// hasn't changed (i=0). The logical component will change
			// internally in a way we can't track, but we want to be just
			// ahead.
			txnClock.Update(futureTS.Add(0, 999))
			// The written values are spaced out in intervals of 5ns, so
			// setting <5ns here should make do without any restarts while
			// higher values require roughly offset/5 restarts.
			txnClock.SetMaxOffset(maxOffset)

			sender := NewTxnCoordSender(s.distSender, txnClock, false, tracing.NewTracer(), s.Stopper)
			txnDB := client.NewDB(sender)

			if pErr := txnDB.Txn(func(txn *client.Txn) *roachpb.Error {
				// Read within the transaction.
				gr, pErr := txn.Get(key)
				if pErr != nil {
					if _, ok := pErr.GetDetail().(*roachpb.ReadWithinUncertaintyIntervalError); ok {
						return pErr
					}
					return roachpb.NewErrorf("unexpected read error of type %s: %s", reflect.TypeOf(pErr.GetDetail()), pErr)
				}
				if !gr.Exists() {
					return roachpb.NewErrorf("no value read")
				}
				if !bytes.Equal(gr.ValueBytes(), readValue) {
					return roachpb.NewErrorf("%d: read wrong value %v at %s, wanted %q",
						i, gr.Value, futureTS, readValue)
				}
				return nil
			}); pErr != nil {
				t.Error(pErr)
			}
		}(i)
	}
	// Kick the goroutines loose.
	wgStart.Done()
	// Wait for the goroutines to finish.
	wgEnd.Wait()
}
Ejemplo n.º 23
0
// TestTxnCoordSenderTxnUpdatedOnError verifies that errors adjust the
// response transaction's timestamp and priority as appropriate.
func TestTxnCoordSenderTxnUpdatedOnError(t *testing.T) {
	defer leaktest.AfterTest(t)()
	origTS := makeTS(123, 0)
	plus10 := origTS.Add(10, 10)
	plus20 := plus10.Add(10, 0)
	testCases := []struct {
		pErr             *roachpb.Error
		expEpoch         uint32
		expPri           int32
		expTS, expOrigTS roachpb.Timestamp
		nodeSeen         bool
	}{
		{
			// No error, so nothing interesting either.
			pErr:      nil,
			expEpoch:  0,
			expPri:    1,
			expTS:     origTS,
			expOrigTS: origTS,
		},
		{
			// On uncertainty error, new epoch begins and node is seen.
			// Timestamp moves ahead of the existing write.
			pErr: func() *roachpb.Error {
				pErr := roachpb.NewErrorWithTxn(
					roachpb.NewReadWithinUncertaintyIntervalError(roachpb.ZeroTimestamp, roachpb.ZeroTimestamp),
					&roachpb.Transaction{})
				const nodeID = 1
				pErr.GetTxn().UpdateObservedTimestamp(nodeID, plus10)
				pErr.OriginNode = nodeID
				return pErr
			}(),
			expEpoch:  1,
			expPri:    1,
			expTS:     plus10,
			expOrigTS: plus10,
			nodeSeen:  true,
		},
		{
			// On abort, nothing changes but we get a new priority to use for
			// the next attempt.
			pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionAbortedError{},
				&roachpb.Transaction{
					TxnMeta: roachpb.TxnMeta{Timestamp: plus20, Priority: 10},
				}),
			expPri: 10,
		},
		{
			// On failed push, new epoch begins just past the pushed timestamp.
			// Additionally, priority ratchets up to just below the pusher's.
			pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionPushError{
				PusheeTxn: roachpb.Transaction{
					TxnMeta: roachpb.TxnMeta{Timestamp: plus10, Priority: int32(10)},
				},
			},
				&roachpb.Transaction{}),
			expEpoch:  1,
			expPri:    9,
			expTS:     plus10,
			expOrigTS: plus10,
		},
		{
			// On retry, restart with new epoch, timestamp and priority.
			pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionRetryError{},
				&roachpb.Transaction{
					TxnMeta: roachpb.TxnMeta{Timestamp: plus10, Priority: int32(10)},
				},
			),
			expEpoch:  1,
			expPri:    10,
			expTS:     plus10,
			expOrigTS: plus10,
		},
	}

	for i, test := range testCases {
		stopper := stop.NewStopper()

		manual := hlc.NewManualClock(origTS.WallTime)
		clock := hlc.NewClock(manual.UnixNano)
		clock.SetMaxOffset(20)

		ts := NewTxnCoordSender(senderFn(func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
			var reply *roachpb.BatchResponse
			if test.pErr == nil {
				reply = ba.CreateReply()
			}
			return reply, test.pErr
		}), clock, false, tracing.NewTracer(), stopper, NewTxnMetrics(metric.NewRegistry()))
		db := client.NewDB(ts)
		txn := client.NewTxn(context.Background(), *db)
		txn.InternalSetPriority(1)
		txn.Proto.Name = "test txn"
		key := roachpb.Key("test-key")
		_, err := txn.Get(key)
		teardownHeartbeats(ts)
		stopper.Stop()

		if test.pErr != nil && err == nil {
			t.Fatalf("expected an error")
		}
		if txn.Proto.Epoch != test.expEpoch {
			t.Errorf("%d: expected epoch = %d; got %d",
				i, test.expEpoch, txn.Proto.Epoch)
		}
		if txn.Proto.Priority != test.expPri {
			t.Errorf("%d: expected priority = %d; got %d",
				i, test.expPri, txn.Proto.Priority)
		}
		if !txn.Proto.Timestamp.Equal(test.expTS) {
			t.Errorf("%d: expected timestamp to be %s; got %s",
				i, test.expTS, txn.Proto.Timestamp)
		}
		if !txn.Proto.OrigTimestamp.Equal(test.expOrigTS) {
			t.Errorf("%d: expected orig timestamp to be %s; got %s",
				i, test.expOrigTS, txn.Proto.OrigTimestamp)
		}
		if ns := txn.Proto.ObservedTimestamps; (len(ns) != 0) != test.nodeSeen {
			t.Errorf("%d: expected nodeSeen=%t, but list of hosts is %v",
				i, test.nodeSeen, ns)
		}
	}
}
Ejemplo n.º 24
0
// NewServer creates a Server from a server.Context.
func NewServer(srvCtx Context, stopper *stop.Stopper) (*Server, error) {
	if _, err := net.ResolveTCPAddr("tcp", srvCtx.Addr); err != nil {
		return nil, errors.Errorf("unable to resolve RPC address %q: %v", srvCtx.Addr, err)
	}

	if srvCtx.Ctx == nil {
		srvCtx.Ctx = context.Background()
	}
	if srvCtx.Ctx.Done() != nil {
		panic("context with cancel or deadline")
	}
	if tracing.TracerFromCtx(srvCtx.Ctx) == nil {
		// TODO(radu): instead of modifying srvCtx.Ctx, we should have a separate
		// context.Context inside Server. We will need to rename server.Context
		// though.
		srvCtx.Ctx = tracing.WithTracer(srvCtx.Ctx, tracing.NewTracer())
	}

	if srvCtx.Insecure {
		log.Warning(srvCtx.Ctx, "running in insecure mode, this is strongly discouraged. See --insecure.")
	}
	// Try loading the TLS configs before anything else.
	if _, err := srvCtx.GetServerTLSConfig(); err != nil {
		return nil, err
	}
	if _, err := srvCtx.GetClientTLSConfig(); err != nil {
		return nil, err
	}

	s := &Server{
		mux:     http.NewServeMux(),
		clock:   hlc.NewClock(hlc.UnixNano),
		stopper: stopper,
	}
	// Add a dynamic log tag value for the node ID.
	//
	// We need to pass the server's Ctx as a base context for the various server
	// components, but we won't know the node ID until we Start(). At that point
	// it's too late to change the contexts in the components (various background
	// processes will have already started using the contexts).
	//
	// The dynamic value allows us to add the log tag to the context now and
	// update the value asynchronously. It's not significantly more expensive than
	// a regular tag since it's just doing an (atomic) load when a log/trace
	// message is constructed.
	s.nodeLogTagVal.Set(log.DynamicIntValueUnknown)
	srvCtx.Ctx = log.WithLogTag(srvCtx.Ctx, "n", &s.nodeLogTagVal)
	s.ctx = srvCtx

	s.clock.SetMaxOffset(srvCtx.MaxOffset)

	s.rpcContext = rpc.NewContext(srvCtx.Context, s.clock, s.stopper)
	s.rpcContext.HeartbeatCB = func() {
		if err := s.rpcContext.RemoteClocks.VerifyClockOffset(); err != nil {
			log.Fatal(s.Ctx(), err)
		}
	}
	s.grpc = rpc.NewServer(s.rpcContext)

	s.registry = metric.NewRegistry()
	s.gossip = gossip.New(
		s.Ctx(), s.rpcContext, s.grpc, s.ctx.GossipBootstrapResolvers, s.stopper, s.registry)
	s.storePool = storage.NewStorePool(
		s.gossip,
		s.clock,
		s.rpcContext,
		srvCtx.ReservationsEnabled,
		srvCtx.TimeUntilStoreDead,
		s.stopper,
	)

	// A custom RetryOptions is created which uses stopper.ShouldQuiesce() as
	// the Closer. This prevents infinite retry loops from occurring during
	// graceful server shutdown
	//
	// Such a loop loop occurs with the DistSender attempts a connection to the
	// local server during shutdown, and receives an internal server error (HTTP
	// Code 5xx). This is the correct error for a server to return when it is
	// shutting down, and is normally retryable in a cluster environment.
	// However, on a single-node setup (such as a test), retries will never
	// succeed because the only server has been shut down; thus, thus the
	// DistSender needs to know that it should not retry in this situation.
	retryOpts := base.DefaultRetryOptions()
	retryOpts.Closer = s.stopper.ShouldQuiesce()
	distSenderCfg := kv.DistSenderConfig{
		Ctx:             s.Ctx(),
		Clock:           s.clock,
		RPCContext:      s.rpcContext,
		RPCRetryOptions: &retryOpts,
	}
	s.distSender = kv.NewDistSender(&distSenderCfg, s.gossip)

	txnMetrics := kv.MakeTxnMetrics()
	s.registry.AddMetricStruct(txnMetrics)
	s.txnCoordSender = kv.NewTxnCoordSender(s.Ctx(), s.distSender, s.clock, srvCtx.Linearizable,
		s.stopper, txnMetrics)
	s.db = client.NewDB(s.txnCoordSender)

	s.raftTransport = storage.NewRaftTransport(storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext)

	s.kvDB = kv.NewDBServer(s.ctx.Context, s.txnCoordSender, s.stopper)
	roachpb.RegisterExternalServer(s.grpc, s.kvDB)

	// Set up Lease Manager
	var lmKnobs sql.LeaseManagerTestingKnobs
	if srvCtx.TestingKnobs.SQLLeaseManager != nil {
		lmKnobs = *srvCtx.TestingKnobs.SQLLeaseManager.(*sql.LeaseManagerTestingKnobs)
	}
	s.leaseMgr = sql.NewLeaseManager(0, *s.db, s.clock, lmKnobs, s.stopper)
	s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip)

	// Set up the DistSQL server
	distSQLCfg := distsql.ServerConfig{
		Context:    s.Ctx(),
		DB:         s.db,
		RPCContext: s.rpcContext,
	}
	s.distSQLServer = distsql.NewServer(distSQLCfg)
	distsql.RegisterDistSQLServer(s.grpc, s.distSQLServer)

	// Set up Executor
	execCfg := sql.ExecutorConfig{
		Context:      s.Ctx(),
		DB:           s.db,
		Gossip:       s.gossip,
		LeaseManager: s.leaseMgr,
		Clock:        s.clock,
		DistSQLSrv:   s.distSQLServer,
	}
	if srvCtx.TestingKnobs.SQLExecutor != nil {
		execCfg.TestingKnobs = srvCtx.TestingKnobs.SQLExecutor.(*sql.ExecutorTestingKnobs)
	} else {
		execCfg.TestingKnobs = &sql.ExecutorTestingKnobs{}
	}

	s.sqlExecutor = sql.NewExecutor(execCfg, s.stopper)
	s.registry.AddMetricStruct(s.sqlExecutor)

	s.pgServer = pgwire.MakeServer(s.ctx.Context, s.sqlExecutor)
	s.registry.AddMetricStruct(s.pgServer.Metrics())

	// TODO(bdarnell): make StoreConfig configurable.
	nCtx := storage.StoreContext{
		Ctx:                            s.Ctx(),
		Clock:                          s.clock,
		DB:                             s.db,
		Gossip:                         s.gossip,
		Transport:                      s.raftTransport,
		RaftTickInterval:               s.ctx.RaftTickInterval,
		ScanInterval:                   s.ctx.ScanInterval,
		ScanMaxIdleTime:                s.ctx.ScanMaxIdleTime,
		ConsistencyCheckInterval:       s.ctx.ConsistencyCheckInterval,
		ConsistencyCheckPanicOnFailure: s.ctx.ConsistencyCheckPanicOnFailure,
		StorePool:                      s.storePool,
		SQLExecutor: sql.InternalExecutor{
			LeaseManager: s.leaseMgr,
		},
		LogRangeEvents: true,
		AllocatorOptions: storage.AllocatorOptions{
			AllowRebalance: true,
		},
	}
	if srvCtx.TestingKnobs.Store != nil {
		nCtx.TestingKnobs = *srvCtx.TestingKnobs.Store.(*storage.StoreTestingKnobs)
	}

	s.recorder = status.NewMetricsRecorder(s.clock)
	s.registry.AddMetricStruct(s.rpcContext.RemoteClocks.Metrics())

	s.runtime = status.MakeRuntimeStatSampler(s.clock)
	s.registry.AddMetricStruct(s.runtime)

	s.node = NewNode(nCtx, s.recorder, s.registry, s.stopper, txnMetrics, sql.MakeEventLogger(s.leaseMgr))
	roachpb.RegisterInternalServer(s.grpc, s.node)
	storage.RegisterStoresServer(s.grpc, s.node.storesServer)

	s.tsDB = ts.NewDB(s.db)
	s.tsServer = ts.MakeServer(s.tsDB)

	s.admin = makeAdminServer(s)
	s.status = newStatusServer(s.db, s.gossip, s.recorder, s.ctx.Context, s.rpcContext, s.node.stores)
	for _, gw := range []grpcGatewayServer{&s.admin, s.status, &s.tsServer} {
		gw.RegisterService(s.grpc)
	}

	return s, nil
}
Ejemplo n.º 25
0
// bootstrapCluster bootstraps a multiple stores using the provided
// engines and cluster ID. The first bootstrapped store contains a
// single range spanning all keys. Initial range lookup metadata is
// populated for the range. Returns the cluster ID.
func bootstrapCluster(engines []engine.Engine) (uuid.UUID, error) {
	clusterID := uuid.MakeV4()
	stopper := stop.NewStopper()
	defer stopper.Stop()

	ctx := storage.StoreContext{}
	ctx.ScanInterval = 10 * time.Minute
	ctx.Clock = hlc.NewClock(hlc.UnixNano)
	ctx.Tracer = tracing.NewTracer()
	// Create a KV DB with a local sender.
	stores := storage.NewStores(ctx.Clock)
	sender := kv.NewTxnCoordSender(stores, ctx.Clock, false, ctx.Tracer, stopper)
	ctx.DB = client.NewDB(sender)
	ctx.Transport = storage.NewLocalRPCTransport(stopper)
	for i, eng := range engines {
		sIdent := roachpb.StoreIdent{
			ClusterID: clusterID,
			NodeID:    1,
			StoreID:   roachpb.StoreID(i + 1),
		}

		// The bootstrapping store will not connect to other nodes so its
		// StoreConfig doesn't really matter.
		s := storage.NewStore(ctx, eng, &roachpb.NodeDescriptor{NodeID: 1})

		// Verify the store isn't already part of a cluster.
		if s.Ident.ClusterID != *uuid.EmptyUUID {
			return uuid.UUID{}, util.Errorf("storage engine already belongs to a cluster (%s)", s.Ident.ClusterID)
		}

		// Bootstrap store to persist the store ident.
		if err := s.Bootstrap(sIdent, stopper); err != nil {
			return uuid.UUID{}, err
		}
		// Create first range, writing directly to engine. Note this does
		// not create the range, just its data. Only do this if this is the
		// first store.
		if i == 0 {
			initialValues := GetBootstrapSchema().GetInitialValues()
			if err := s.BootstrapRange(initialValues); err != nil {
				return uuid.UUID{}, err
			}
		}
		if err := s.Start(stopper); err != nil {
			return uuid.UUID{}, err
		}

		stores.AddStore(s)

		// Initialize node and store ids.  Only initialize the node once.
		if i == 0 {
			if nodeID, err := allocateNodeID(ctx.DB); nodeID != sIdent.NodeID || err != nil {
				return uuid.UUID{}, util.Errorf("expected to initialize node id allocator to %d, got %d: %s",
					sIdent.NodeID, nodeID, err)
			}
		}
		if storeID, err := allocateStoreIDs(sIdent.NodeID, 1, ctx.DB); storeID != sIdent.StoreID || err != nil {
			return uuid.UUID{}, util.Errorf("expected to initialize store id allocator to %d, got %d: %s",
				sIdent.StoreID, storeID, err)
		}
	}
	return clusterID, nil
}
Ejemplo n.º 26
0
// NewServer creates a Server from a server.Context.
func NewServer(ctx *Context, stopper *stop.Stopper) (*Server, error) {
	if ctx == nil {
		return nil, util.Errorf("ctx must not be null")
	}

	if _, err := net.ResolveTCPAddr("tcp", ctx.Addr); err != nil {
		return nil, util.Errorf("unable to resolve RPC address %q: %v", ctx.Addr, err)
	}

	if ctx.Insecure {
		log.Warning("running in insecure mode, this is strongly discouraged. See --insecure and --certs.")
	}
	// Try loading the TLS configs before anything else.
	if _, err := ctx.GetServerTLSConfig(); err != nil {
		return nil, err
	}
	if _, err := ctx.GetClientTLSConfig(); err != nil {
		return nil, err
	}

	s := &Server{
		Tracer:  tracing.NewTracer(),
		ctx:     ctx,
		mux:     http.NewServeMux(),
		clock:   hlc.NewClock(hlc.UnixNano),
		stopper: stopper,
	}
	s.clock.SetMaxOffset(ctx.MaxOffset)

	s.rpcContext = crpc.NewContext(&ctx.Context, s.clock, stopper)
	stopper.RunWorker(func() {
		s.rpcContext.RemoteClocks.MonitorRemoteOffsets(stopper)
	})

	s.rpc = crpc.NewServer(s.rpcContext)

	s.gossip = gossip.New(s.rpcContext, s.ctx.GossipBootstrapResolvers, stopper)
	s.storePool = storage.NewStorePool(s.gossip, s.clock, ctx.TimeUntilStoreDead, stopper)

	feed := util.NewFeed(stopper)

	// A custom RetryOptions is created which uses stopper.ShouldDrain() as
	// the Closer. This prevents infinite retry loops from occurring during
	// graceful server shutdown
	//
	// Such a loop loop occurs with the DistSender attempts a connection to the
	// local server during shutdown, and receives an internal server error (HTTP
	// Code 5xx). This is the correct error for a server to return when it is
	// shutting down, and is normally retryable in a cluster environment.
	// However, on a single-node setup (such as a test), retries will never
	// succeed because the only server has been shut down; thus, thus the
	// DistSender needs to know that it should not retry in this situation.
	retryOpts := kv.GetDefaultDistSenderRetryOptions()
	retryOpts.Closer = stopper.ShouldDrain()
	ds := kv.NewDistSender(&kv.DistSenderContext{
		Clock:           s.clock,
		RPCContext:      s.rpcContext,
		RPCRetryOptions: &retryOpts,
	}, s.gossip)
	txnRegistry := metric.NewRegistry()
	txnMetrics := kv.NewTxnMetrics(txnRegistry)
	sender := kv.NewTxnCoordSender(ds, s.clock, ctx.Linearizable, s.Tracer, s.stopper, txnMetrics)
	s.db = client.NewDB(sender)

	s.grpc = grpc.NewServer()
	s.raftTransport = storage.NewRaftTransport(storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext)

	s.kvDB = kv.NewDBServer(&s.ctx.Context, sender, stopper)
	if err := s.kvDB.RegisterRPC(s.rpc); err != nil {
		return nil, err
	}

	s.leaseMgr = sql.NewLeaseManager(0, *s.db, s.clock)
	s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip)
	sqlRegistry := metric.NewRegistry()
	s.sqlExecutor = sql.NewExecutor(*s.db, s.gossip, s.leaseMgr, s.stopper, sqlRegistry)

	s.pgServer = pgwire.MakeServer(&s.ctx.Context, s.sqlExecutor, sqlRegistry)

	// TODO(bdarnell): make StoreConfig configurable.
	nCtx := storage.StoreContext{
		Clock:           s.clock,
		DB:              s.db,
		Gossip:          s.gossip,
		Transport:       s.raftTransport,
		ScanInterval:    s.ctx.ScanInterval,
		ScanMaxIdleTime: s.ctx.ScanMaxIdleTime,
		EventFeed:       feed,
		Tracer:          s.Tracer,
		StorePool:       s.storePool,
		SQLExecutor: sql.InternalExecutor{
			LeaseManager: s.leaseMgr,
		},
		LogRangeEvents: true,
		AllocatorOptions: storage.AllocatorOptions{
			AllowRebalance: true,
			Mode:           s.ctx.BalanceMode,
		},
	}

	s.recorder = status.NewMetricsRecorder(s.clock)
	s.recorder.AddNodeRegistry("sql.%s", sqlRegistry)
	s.recorder.AddNodeRegistry("txn.%s", txnRegistry)

	s.node = NewNode(nCtx, s.recorder, s.stopper, txnMetrics)
	s.admin = newAdminServer(s.db, s.stopper, s.sqlExecutor)
	s.tsDB = ts.NewDB(s.db)
	s.tsServer = ts.NewServer(s.tsDB)
	s.status = newStatusServer(s.db, s.gossip, s.recorder, s.ctx)

	return s, nil
}
Ejemplo n.º 27
0
// Start starts the test cluster by bootstrapping an in-memory store
// (defaults to maximum of 50M). The server is started, launching the
// node RPC server and all HTTP endpoints. Use the value of
// TestServer.Addr after Start() for client connections. Use Stop()
// to shutdown the server after the test completes.
func (ltc *LocalTestCluster) Start(t util.Tester) {
	nodeID := roachpb.NodeID(1)
	nodeDesc := &roachpb.NodeDescriptor{NodeID: nodeID}
	ltc.tester = t
	ltc.Manual = hlc.NewManualClock(0)
	ltc.Clock = hlc.NewClock(ltc.Manual.UnixNano)
	ltc.Stopper = stop.NewStopper()
	rpcContext := rpc.NewContext(testutils.NewNodeTestBaseContext(), ltc.Clock, ltc.Stopper)
	ltc.Gossip = gossip.New(rpcContext, gossip.TestBootstrap, ltc.Stopper)
	ltc.Eng = engine.NewInMem(roachpb.Attributes{}, 50<<20, ltc.Stopper)

	ltc.stores = storage.NewStores(ltc.Clock)
	tracer := tracing.NewTracer()
	var rpcSend rpcSendFn = func(_ SendOptions, _ ReplicaSlice,
		args roachpb.BatchRequest, _ *rpc.Context) (proto.Message, error) {
		if ltc.Latency > 0 {
			time.Sleep(ltc.Latency)
		}
		sp := tracer.StartSpan("node")
		defer sp.Finish()
		ctx := opentracing.ContextWithSpan(context.Background(), sp)
		sp.LogEvent(args.String())
		br, pErr := ltc.stores.Send(ctx, args)
		if br == nil {
			br = &roachpb.BatchResponse{}
		}
		if br.Error != nil {
			panic(roachpb.ErrorUnexpectedlySet(ltc.stores, br))
		}
		br.Error = pErr
		if pErr != nil {
			sp.LogEvent("error: " + pErr.String())
		}
		return br, nil
	}
	retryOpts := GetDefaultDistSenderRetryOptions()
	retryOpts.Closer = ltc.Stopper.ShouldDrain()
	ltc.distSender = NewDistSender(&DistSenderContext{
		Clock: ltc.Clock,
		RangeDescriptorCacheSize: defaultRangeDescriptorCacheSize,
		RangeLookupMaxRanges:     defaultRangeLookupMaxRanges,
		LeaderCacheSize:          defaultLeaderCacheSize,
		RPCRetryOptions:          &retryOpts,
		nodeDescriptor:           nodeDesc,
		RPCSend:                  rpcSend,    // defined above
		RangeDescriptorDB:        ltc.stores, // for descriptor lookup
	}, ltc.Gossip)

	ltc.Sender = NewTxnCoordSender(ltc.distSender, ltc.Clock, false /* !linearizable */, tracer,
		ltc.Stopper, NewTxnMetrics(metric.NewRegistry()))
	ltc.DB = client.NewDB(ltc.Sender)
	transport := storage.NewDummyRaftTransport()
	ctx := storage.TestStoreContext()
	ctx.Clock = ltc.Clock
	ctx.DB = ltc.DB
	ctx.Gossip = ltc.Gossip
	ctx.Transport = transport
	ctx.Tracer = tracer
	ltc.Store = storage.NewStore(ctx, ltc.Eng, nodeDesc)
	if err := ltc.Store.Bootstrap(roachpb.StoreIdent{NodeID: nodeID, StoreID: 1}, ltc.Stopper); err != nil {
		t.Fatalf("unable to start local test cluster: %s", err)
	}
	ltc.stores.AddStore(ltc.Store)
	if err := ltc.Store.BootstrapRange(nil); err != nil {
		t.Fatalf("unable to start local test cluster: %s", err)
	}
	if err := ltc.Store.Start(ltc.Stopper); err != nil {
		t.Fatalf("unable to start local test cluster: %s", err)
	}
	ltc.Gossip.SetNodeID(nodeDesc.NodeID)
	if err := ltc.Gossip.SetNodeDescriptor(nodeDesc); err != nil {
		t.Fatalf("unable to set node descriptor: %s", err)
	}
}
Ejemplo n.º 28
0
// TestClientNotReady verifies that Send gets an RPC error when a client
// does not become ready.
func TestClientNotReady(t *testing.T) {
	defer leaktest.AfterTest(t)()
	t.Skip("TODO(tamird): #3942")

	stopper := stop.NewStopper()
	defer stopper.Stop()

	nodeContext := newNodeTestContext(nil, stopper)

	// Construct a server that listens but doesn't do anything.
	s, ln := newTestServer(t, nodeContext)
	registerBatch(t, s, 50*time.Millisecond)

	sp := tracing.NewTracer().StartSpan("node test")
	defer sp.Finish()

	opts := SendOptions{
		Ordering:        orderStable,
		SendNextTimeout: 100 * time.Nanosecond,
		Timeout:         100 * time.Nanosecond,
		Trace:           sp,
	}

	// Send RPC to an address where no server is running.
	if _, err := sendBatch(opts, []net.Addr{ln.Addr()}, nodeContext); err != nil {
		retryErr, ok := err.(retry.Retryable)
		if !ok {
			t.Fatalf("Unexpected error type: %v", err)
		}
		if !retryErr.CanRetry() {
			t.Errorf("Expected retryable error: %v", retryErr)
		}
	} else {
		t.Fatalf("Unexpected success")
	}

	// Send the RPC again with no timeout.
	opts.SendNextTimeout = 0
	opts.Timeout = 0
	c := make(chan error)
	go func() {
		if _, err := sendBatch(opts, []net.Addr{ln.Addr()}, nodeContext); err == nil {
			c <- util.Errorf("expected error when client is closed")
		} else if !strings.Contains(err.Error(), "failed as client connection was closed") {
			c <- err
		}
		close(c)
	}()

	select {
	case <-c:
		t.Fatalf("Unexpected end of rpc call")
	case <-time.After(1 * time.Millisecond):
	}

	// Grab the client for our invalid address and close it. This will cause the
	// blocked ping RPC to finish.
	rpc.NewClient(ln.Addr(), nodeContext).Close()
	if err := <-c; err != nil {
		t.Fatal(err)
	}
}
Ejemplo n.º 29
0
// TestMultiRangeScanDeleteRange tests that commands which access multiple
// ranges are carried out properly.
func TestMultiRangeScanDeleteRange(t *testing.T) {
	defer leaktest.AfterTest(t)()
	s := StartTestServer(t)
	defer s.Stop()
	retryOpts := kv.GetDefaultDistSenderRetryOptions()
	retryOpts.Closer = s.stopper.ShouldDrain()
	ds := kv.NewDistSender(&kv.DistSenderContext{
		Clock:           s.Clock(),
		RPCContext:      s.RPCContext(),
		RPCRetryOptions: &retryOpts,
	}, s.Gossip())
	tds := kv.NewTxnCoordSender(ds, s.Clock(), testContext.Linearizable, tracing.NewTracer(),
		s.stopper, kv.NewTxnMetrics(metric.NewRegistry()))

	if err := s.node.ctx.DB.AdminSplit("m"); err != nil {
		t.Fatal(err)
	}
	writes := []roachpb.Key{roachpb.Key("a"), roachpb.Key("z")}
	get := &roachpb.GetRequest{
		Span: roachpb.Span{Key: writes[0]},
	}
	get.EndKey = writes[len(writes)-1]
	if _, err := client.SendWrapped(tds, nil, get); err == nil {
		t.Errorf("able to call Get with a key range: %v", get)
	}
	var delTS roachpb.Timestamp
	for i, k := range writes {
		put := roachpb.NewPut(k, roachpb.MakeValueFromBytes(k))
		reply, err := client.SendWrapped(tds, nil, put)
		if err != nil {
			t.Fatal(err)
		}
		scan := roachpb.NewScan(writes[0], writes[len(writes)-1].Next(), 0).(*roachpb.ScanRequest)
		// The Put ts may have been pushed by tsCache,
		// so make sure we see their values in our Scan.
		delTS = reply.(*roachpb.PutResponse).Timestamp
		reply, err = client.SendWrappedWith(tds, nil, roachpb.Header{Timestamp: delTS}, scan)
		if err != nil {
			t.Fatal(err)
		}
		sr := reply.(*roachpb.ScanResponse)
		if sr.Txn != nil {
			// This was the other way around at some point in the past.
			// Same below for Delete, etc.
			t.Errorf("expected no transaction in response header")
		}
		if rows := sr.Rows; len(rows) != i+1 {
			t.Fatalf("expected %d rows, but got %d", i+1, len(rows))
		}
	}

	del := &roachpb.DeleteRangeRequest{
		Span: roachpb.Span{
			Key:    writes[0],
			EndKey: roachpb.Key(writes[len(writes)-1]).Next(),
		},
		ReturnKeys: true,
	}
	reply, err := client.SendWrappedWith(tds, nil, roachpb.Header{Timestamp: delTS}, del)
	if err != nil {
		t.Fatal(err)
	}
	dr := reply.(*roachpb.DeleteRangeResponse)
	if dr.Txn != nil {
		t.Errorf("expected no transaction in response header")
	}
	if !reflect.DeepEqual(dr.Keys, writes) {
		t.Errorf("expected %d keys to be deleted, but got %d instead", writes, dr.Keys)
	}

	scan := roachpb.NewScan(writes[0], writes[len(writes)-1].Next(), 0).(*roachpb.ScanRequest)
	txn := &roachpb.Transaction{Name: "MyTxn"}
	reply, err = client.SendWrappedWith(tds, nil, roachpb.Header{Txn: txn}, scan)
	if err != nil {
		t.Fatal(err)
	}
	sr := reply.(*roachpb.ScanResponse)
	if txn := sr.Txn; txn == nil || txn.Name != "MyTxn" {
		t.Errorf("wanted Txn to persist, but it changed to %v", txn)
	}
	if rows := sr.Rows; len(rows) > 0 {
		t.Fatalf("scan after delete returned rows: %v", rows)
	}
}
Ejemplo n.º 30
0
// TestComplexScenarios verifies various complex success/failure scenarios by
// mocking sendOne.
func TestComplexScenarios(t *testing.T) {
	defer leaktest.AfterTest(t)()

	stopper := stop.NewStopper()
	defer stopper.Stop()

	nodeContext := newNodeTestContext(nil, stopper)

	testCases := []struct {
		numServers               int
		numErrors                int
		numRetryableErrors       int
		success                  bool
		isRetryableErrorExpected bool
	}{
		// --- Success scenarios ---
		{1, 0, 0, true, false},
		{5, 0, 0, true, false},
		// There are some errors, but enough RPCs succeed.
		{5, 1, 0, true, false},
		{5, 4, 0, true, false},
		{5, 2, 0, true, false},

		// --- Failure scenarios ---
		// All RPCs fail.
		{5, 5, 0, false, false},
		// All RPCs fail, but some of the errors are retryable.
		{5, 5, 1, false, true},
		{5, 5, 3, false, true},
		// Some RPCs fail, but we do have enough remaining clients and recoverable errors.
		{5, 5, 2, false, true},
	}
	for i, test := range testCases {
		// Copy the values to avoid data race. sendOneFn might
		// be called after this test case finishes.
		numErrors := test.numErrors
		numRetryableErrors := test.numRetryableErrors

		var serverAddrs []net.Addr
		for j := 0; j < test.numServers; j++ {
			_, ln := newTestServer(t, nodeContext)
			serverAddrs = append(serverAddrs, ln.Addr())
		}

		sp := tracing.NewTracer().StartSpan("node test")
		defer sp.Finish()

		opts := SendOptions{
			Ordering:        orderStable,
			SendNextTimeout: 1 * time.Second,
			Timeout:         10 * time.Second,
			Trace:           sp,
		}

		// Mock sendOne.
		sendOneFn = func(client *batchClient, timeout time.Duration,
			context *rpc.Context, trace opentracing.Span, done chan *netrpc.Call) {
			addr := client.RemoteAddr()
			addrID := -1
			for serverAddrID, serverAddr := range serverAddrs {
				if serverAddr.String() == addr.String() {
					addrID = serverAddrID
					break
				}
			}
			if addrID == -1 {
				t.Fatalf("%d: %v is not found in serverAddrs: %v", i, addr, serverAddrs)
			}
			call := netrpc.Call{
				Reply: &roachpb.BatchResponse{},
			}
			if addrID < numErrors {
				call.Error = roachpb.NewSendError("test", addrID < numRetryableErrors)
			}
			done <- &call
		}
		defer func() { sendOneFn = sendOne }()

		reply, err := sendBatch(opts, serverAddrs, nodeContext)
		if test.success {
			if reply == nil {
				t.Errorf("%d: expected reply", i)
			}
			continue
		}

		retryErr, ok := err.(retry.Retryable)
		if !ok {
			t.Fatalf("%d: Unexpected error type: %v", i, err)
		}
		if retryErr.CanRetry() != test.isRetryableErrorExpected {
			t.Errorf("%d: Unexpected error: %v", i, retryErr)
		}
	}
}