// TestRejectFutureCommand verifies that lease holders reject commands that
// would cause a large time jump.
func TestRejectFutureCommand(t *testing.T) {
	defer leaktest.AfterTest(t)()

	const maxOffset = 100 * time.Millisecond
	manual := hlc.NewManualClock(0)
	clock := hlc.NewClock(manual.UnixNano)
	mtc := multiTestContext{
		clock: clock,
	mtc.Start(t, 1)
	defer mtc.Stop()

	// First do a write. The first write will advance the clock by MaxOffset
	// because of the read cache's low water mark.
	getArgs := putArgs([]byte("b"), []byte("b"))
	if _, err := client.SendWrapped(rg1(mtc.stores[0]), nil, &getArgs); err != nil {
	if now := clock.Now(); now.WallTime != int64(maxOffset) {
		t.Fatalf("expected clock to advance to 100ms; got %s", now)
	// The logical clock has advanced past the physical clock; increment
	// the "physical" clock to catch up.

	startTime := manual.UnixNano()

	// Commands with a future timestamp that is within the MaxOffset
	// bound will be accepted and will cause the clock to advance.
	for i := int64(0); i < 3; i++ {
		incArgs := incrementArgs([]byte("a"), 5)
		ts := hlc.ZeroTimestamp.Add(startTime+((i+1)*30)*int64(time.Millisecond), 0)
		if _, err := client.SendWrappedWith(rg1(mtc.stores[0]), nil, roachpb.Header{Timestamp: ts}, &incArgs); err != nil {
	if now := clock.Now(); now.WallTime != int64(190*time.Millisecond) {
		t.Fatalf("expected clock to advance to 190ms; got %s", now)

	// Once the accumulated offset reaches MaxOffset, commands will be rejected.
	incArgs := incrementArgs([]byte("a"), 11)
	ts := hlc.ZeroTimestamp.Add(int64((time.Duration(startTime)+maxOffset+1)*time.Millisecond), 0)
	if _, err := client.SendWrappedWith(rg1(mtc.stores[0]), nil, roachpb.Header{Timestamp: ts}, &incArgs); err == nil {
		t.Fatalf("expected clock offset error but got nil")

	// The clock remained at 190ms and the final command was not executed.
	if now := clock.Now(); now.WallTime != int64(190*time.Millisecond) {
		t.Errorf("expected clock to advance to 190ms; got %s", now)
	val, _, err := engine.MVCCGet(context.Background(), mtc.engines[0], roachpb.Key("a"), clock.Now(), true, nil)
	if err != nil {
	if v := mustGetInt(val); v != 15 {
		t.Errorf("expected 15, got %v", v)
// TestTxnMultipleCoord checks that a coordinator uses the Writing flag to
// enforce that only one coordinator can be used for transactional writes.
func TestTxnMultipleCoord(t *testing.T) {
	defer leaktest.AfterTest(t)()
	s, sender := createTestDB(t)
	defer s.Stop()

	testCases := []struct {
		args    roachpb.Request
		writing bool
		ok      bool
		{roachpb.NewGet(roachpb.Key("a")), true, false},
		{roachpb.NewGet(roachpb.Key("a")), false, true},
		{roachpb.NewPut(roachpb.Key("a"), roachpb.Value{}), false, false}, // transactional write before begin
		{roachpb.NewPut(roachpb.Key("a"), roachpb.Value{}), true, false},  // must have switched coordinators

	for i, tc := range testCases {
		txn := roachpb.NewTransaction("test", roachpb.Key("a"), 1, enginepb.SERIALIZABLE,
			s.Clock.Now(), s.Clock.MaxOffset().Nanoseconds())
		txn.Writing = tc.writing
		reply, pErr := client.SendWrappedWith(sender, nil, roachpb.Header{
			Txn: txn,
		}, tc.args)
		if pErr == nil != tc.ok {
			t.Errorf("%d: %T (writing=%t): success_expected=%t, but got: %v",
				i, tc.args, tc.writing, tc.ok, pErr)
		if pErr != nil {

		txn = reply.Header().Txn
		// The transaction should come back rw if it started rw or if we just
		// wrote.
		isWrite := roachpb.IsTransactionWrite(tc.args)
		if (tc.writing || isWrite) != txn.Writing {
			t.Errorf("%d: unexpected writing state: %s", i, txn)
		if !isWrite {
		// Abort for clean shutdown.
		if _, pErr := client.SendWrappedWith(sender, nil, roachpb.Header{
			Txn: txn,
		}, &roachpb.EndTransactionRequest{
			Commit: false,
		}); pErr != nil {
// fillRange writes keys with the given prefix and associated values
// until bytes bytes have been written or the given range has split.
func fillRange(store *storage.Store, rangeID roachpb.RangeID, prefix roachpb.Key, bytes int64, t *testing.T) {
	src := rand.New(rand.NewSource(0))
	for {
		var ms enginepb.MVCCStats
		if err := engine.MVCCGetRangeStats(context.Background(), store.Engine(), rangeID, &ms); err != nil {
		keyBytes, valBytes := ms.KeyBytes, ms.ValBytes
		if keyBytes+valBytes >= bytes {
		key := append(append([]byte(nil), prefix...), randutil.RandBytes(src, 100)...)
		key = keys.MakeRowSentinelKey(key)
		val := randutil.RandBytes(src, int(src.Int31n(1<<8)))
		pArgs := putArgs(key, val)
		_, pErr := client.SendWrappedWith(store, nil, roachpb.Header{
			RangeID: rangeID,
		}, &pArgs)
		// When the split occurs in the background, our writes may start failing.
		// We know we can stop writing when this happens.
		if _, ok := pErr.GetDetail().(*roachpb.RangeKeyMismatchError); ok {
		} else if pErr != nil {
func TestComputeStatsForKeySpan(t *testing.T) {
	defer leaktest.AfterTest(t)()
	mtc := startMultiTestContext(t, 3)
	defer mtc.Stop()

	// Create a number of ranges using splits.
	splitKeys := []string{"a", "c", "e", "g", "i"}
	for _, k := range splitKeys {
		key := []byte(k)
		repl := mtc.stores[0].LookupReplica(key, roachpb.RKeyMin)
		args := adminSplitArgs(key, key)
		header := roachpb.Header{
			RangeID: repl.RangeID,
		if _, err := client.SendWrappedWith(mtc.stores[0], nil, header, &args); err != nil {

	// Wait for splits to finish.
	util.SucceedsSoon(t, func() error {
		repl := mtc.stores[0].LookupReplica(roachpb.RKey("z"), nil)
		if actualRSpan := repl.Desc().RSpan(); !actualRSpan.Key.Equal(roachpb.RKey("i")) {
			return errors.Errorf("expected range %s to begin at key 'i'", repl)
		return nil

	// Create some keys across the ranges.
	incKeys := []string{"b", "bb", "bbb", "d", "dd", "h"}
	for _, k := range incKeys {
		if _, err := mtc.dbs[0].Inc([]byte(k), 5); err != nil {

	// Verify stats across different spans.
	for _, tcase := range []struct {
		startKey       string
		endKey         string
		expectedRanges int
		expectedKeys   int64
		{"a", "i", 4, 6},
		{"a", "c", 1, 3},
		{"b", "e", 2, 5},
		{"e", "i", 2, 1},
	} {
		start, end := tcase.startKey, tcase.endKey
		stats, count := mtc.stores[0].ComputeStatsForKeySpan(
			roachpb.RKey(start), roachpb.RKey(end))
		if a, e := count, tcase.expectedRanges; a != e {
			t.Errorf("Expected %d ranges in span [%s - %s], found %d", e, start, end, a)
		if a, e := stats.LiveCount, tcase.expectedKeys; a != e {
			t.Errorf("Expected %d keys in span [%s - %s], found %d", e, start, end, a)
// TestRangeCommandClockUpdate verifies that followers update their
// clocks when executing a command, even if the lease holder's clock is far
// in the future.
func TestRangeCommandClockUpdate(t *testing.T) {
	defer leaktest.AfterTest(t)()

	const numNodes = 3
	var manuals []*hlc.ManualClock
	var clocks []*hlc.Clock
	for i := 0; i < numNodes; i++ {
		manuals = append(manuals, hlc.NewManualClock(1))
		clocks = append(clocks, hlc.NewClock(manuals[i].UnixNano))
		clocks[i].SetMaxOffset(100 * time.Millisecond)
	mtc := multiTestContext{
		clocks: clocks,
	mtc.Start(t, numNodes)
	defer mtc.Stop()
	mtc.replicateRange(1, 1, 2)

	// Advance the lease holder's clock ahead of the followers (by more than
	// MaxOffset but less than the range lease) and execute a command.
	manuals[0].Increment(int64(500 * time.Millisecond))
	incArgs := incrementArgs([]byte("a"), 5)
	ts := clocks[0].Now()
	if _, err := client.SendWrappedWith(rg1(mtc.stores[0]), nil, roachpb.Header{Timestamp: ts}, &incArgs); err != nil {

	// Wait for that command to execute on all the followers.
	util.SucceedsSoon(t, func() error {
		values := []int64{}
		for _, eng := range mtc.engines {
			val, _, err := engine.MVCCGet(context.Background(), eng, roachpb.Key("a"), clocks[0].Now(), true, nil)
			if err != nil {
				return err
			values = append(values, mustGetInt(val))
		if !reflect.DeepEqual(values, []int64{5, 5, 5}) {
			return errors.Errorf("expected (5, 5, 5), got %v", values)
		return nil

	// Verify that all the followers have accepted the clock update from
	// node 0 even though it comes from outside the usual max offset.
	now := clocks[0].Now()
	for i, clock := range clocks {
		// Only compare the WallTimes: it's normal for clock 0 to be a few logical ticks ahead.
		if clock.Now().WallTime < now.WallTime {
			t.Errorf("clock %d is behind clock 0: %s vs %s", i, clock.Now(), now)
func writeRandomTimeSeriesDataToRange(
	t testing.TB,
	store *storage.Store,
	rangeID roachpb.RangeID,
	keyPrefix []byte,
) (midpoint []byte) {
	src := rand.New(rand.NewSource(0))
	r := ts.Resolution10s
	for i := 0; i < 20; i++ {
		var data []tspb.TimeSeriesData
		for j := int64(0); j <= src.Int63n(5); j++ {
			d := tspb.TimeSeriesData{
				Name:   "test.random.metric",
				Source: "cpu01",
			for k := int64(0); k <= src.Int63n(10); k++ {
				d.Datapoints = append(d.Datapoints, tspb.TimeSeriesDatapoint{
					TimestampNanos: src.Int63n(200) * r.KeyDuration(),
					Value:          src.Float64(),
			data = append(data, d)
		for _, d := range data {
			idatas, err := d.ToInternal(r.KeyDuration(), r.SampleDuration())
			if err != nil {
			for _, idata := range idatas {
				var value roachpb.Value
				if err := value.SetProto(&idata); err != nil {
				mArgs := roachpb.MergeRequest{
					Span: roachpb.Span{
						Key: encoding.EncodeVarintAscending(keyPrefix, idata.StartTimestampNanos),
					Value: value,
				if _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
					RangeID: rangeID,
				}, &mArgs); pErr != nil {
	// Return approximate midway point (100 is midway between random timestamps in range [0,200)).
	midKey := append([]byte(nil), keyPrefix...)
	midKey = encoding.EncodeVarintAscending(midKey, 100*r.KeyDuration())
	return keys.MakeRowSentinelKey(midKey)
func fillTestRange(t testing.TB, rep *Replica, size int) {
	src := rand.New(rand.NewSource(0))
	for i := 0; i < snapSize/(keySize+valSize); i++ {
		key := keys.MakeRowSentinelKey(randutil.RandBytes(src, keySize))
		val := randutil.RandBytes(src, valSize)
		pArgs := putArgs(key, val)
		if _, pErr := client.SendWrappedWith(rep, nil, roachpb.Header{
			RangeID: rangeID,
		}, &pArgs); pErr != nil {
// getTxn fetches the requested key and returns the transaction info.
func getTxn(coord *TxnCoordSender, txn *roachpb.Transaction) (*roachpb.Transaction, *roachpb.Error) {
	hb := &roachpb.HeartbeatTxnRequest{
		Span: roachpb.Span{
			Key: txn.Key,
	reply, pErr := client.SendWrappedWith(coord, nil, roachpb.Header{
		Txn: txn,
	}, hb)
	if pErr != nil {
		return nil, pErr
	return reply.(*roachpb.HeartbeatTxnResponse).Txn, nil
func writeRandomDataToRange(t testing.TB, store *storage.Store, rangeID roachpb.RangeID, keyPrefix []byte) {
	src := rand.New(rand.NewSource(0))
	for i := 0; i < 100; i++ {
		key := append([]byte(nil), keyPrefix...)
		key = append(key, randutil.RandBytes(src, int(src.Int31n(1<<7)))...)
		key = keys.MakeRowSentinelKey(key)
		val := randutil.RandBytes(src, int(src.Int31n(1<<8)))
		pArgs := putArgs(key, val)
		if _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
			RangeID: rangeID,
		}, &pArgs); pErr != nil {
// FindRangeLeaseHolder returns the current lease holder for the given range. If
// there is no lease at the time of the call, a replica is gets one as a
// side-effect of calling this; if hint is not nil, that replica will be the
// one.
// One of the Stores in the cluster is used as a Sender to send a dummy read
// command, which will either result in success (if a replica on that Node has
// the lease), in a NotLeaseHolderError pointing to the current lease holder (if
// there is an active lease), or in the replica on that store acquiring the
// lease (if there isn't an active lease).
// If an active lease existed for the range, it's extended as a side-effect.
func (tc *TestCluster) FindRangeLeaseHolder(
	rangeDesc *roachpb.RangeDescriptor,
	hint *ReplicationTarget,
) (ReplicationTarget, error) {
	var hintReplicaDesc roachpb.ReplicaDescriptor
	if hint != nil {
		var ok bool
		if hintReplicaDesc, ok = rangeDesc.GetReplicaDescriptor(hint.StoreID); !ok {
			return ReplicationTarget{}, errors.Errorf(
				"bad hint; store doesn't have a replica of the range")
	} else {
		hint = &ReplicationTarget{
			NodeID:  rangeDesc.Replicas[0].NodeID,
			StoreID: rangeDesc.Replicas[0].StoreID}
		hintReplicaDesc = rangeDesc.Replicas[0]
	// TODO(andrei): Using a dummy GetRequest for the purpose of figuring out the
	// lease holder is a hack. Instead, we should have a dedicate admin command.
	getReq := roachpb.GetRequest{
		Span: roachpb.Span{
			Key: rangeDesc.StartKey.AsRawKey(),

	store, err := tc.findMemberStore(hint.StoreID)
	if err != nil {
		return ReplicationTarget{}, err
	_, pErr := client.SendWrappedWith(
		store, nil,
		roachpb.Header{RangeID: rangeDesc.RangeID, Replica: hintReplicaDesc},
	if pErr != nil {
		if nle, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError); ok {
			if nle.LeaseHolder == nil {
				return ReplicationTarget{}, errors.Errorf(
					"unexpected NotLeaseHolderError with lease holder unknown")
			return ReplicationTarget{
				NodeID: nle.LeaseHolder.NodeID, StoreID: nle.LeaseHolder.StoreID}, nil
		return ReplicationTarget{}, pErr.GoError()
	// The replica we sent the request to either was already or just became
	// the lease holder.
	return *hint, nil
func fillTestRange(t testing.TB, rep *Replica, size int64) {
	src := rand.New(rand.NewSource(0))
	for i := int64(0); i < size/int64(keySize+valSize); i++ {
		key := keys.MakeRowSentinelKey(randutil.RandBytes(src, keySize))
		val := randutil.RandBytes(src, valSize)
		pArgs := putArgs(key, val)
		if _, pErr := client.SendWrappedWith(rep, nil, roachpb.Header{
			RangeID: rangeID,
		}, &pArgs); pErr != nil {
	after := rep.mu.state.Stats.Total()
	if after < size {
		t.Fatalf("range not full after filling: wrote %d, but range at %d", size, after)
// process synchronously invokes admin split for each proposed split key.
func (sq *splitQueue) process(
	ctx context.Context,
	now hlc.Timestamp,
	rng *Replica,
	sysCfg config.SystemConfig,
) error {
	// First handle case of splitting due to zone config maps.
	desc := rng.Desc()
	splitKeys := sysCfg.ComputeSplitKeys(desc.StartKey, desc.EndKey)
	if len(splitKeys) > 0 {
		log.Infof("splitting %s at keys %v", rng, splitKeys)
		log.Trace(ctx, fmt.Sprintf("splitting at keys %v", splitKeys))
		for _, splitKey := range splitKeys {
			if err := sq.db.AdminSplit(splitKey.AsRawKey()); err != nil {
				return errors.Errorf("unable to split %s at key %q: %s", rng, splitKey, err)
		return nil

	// Next handle case of splitting due to size.
	zone, err := sysCfg.GetZoneConfigForKey(desc.StartKey)
	if err != nil {
		return err
	size := rng.GetMVCCStats().Total()
	// FIXME: why is this implementation not the same as the one above?
	if float64(size)/float64(zone.RangeMaxBytes) > 1 {
		log.Infof("splitting %s size=%d max=%d", rng, size, zone.RangeMaxBytes)
		log.Trace(ctx, fmt.Sprintf("splitting size=%d max=%d", size, zone.RangeMaxBytes))
		if _, pErr := client.SendWrappedWith(rng, ctx, roachpb.Header{
			Timestamp: now,
		}, &roachpb.AdminSplitRequest{
			Span: roachpb.Span{Key: desc.StartKey.AsRawKey()},
		}); pErr != nil {
			return pErr.GoError()
	return nil
func BenchmarkReplicaSnapshot(b *testing.B) {
	defer tracing.Disable()()
	defer config.TestingDisableTableSplits()()
	store, stopper, _ := createTestStore(b)
	// We want to manually control the size of the raft log.
	defer stopper.Stop()

	const rangeID = 1
	const keySize = 1 << 7   // 128 B
	const valSize = 1 << 10  // 1 KiB
	const snapSize = 1 << 25 // 32 MiB

	rep, err := store.GetReplica(rangeID)
	if err != nil {

	src := rand.New(rand.NewSource(0))
	for i := 0; i < snapSize/(keySize+valSize); i++ {
		key := keys.MakeRowSentinelKey(randutil.RandBytes(src, keySize))
		val := randutil.RandBytes(src, valSize)
		pArgs := putArgs(key, val)
		if _, pErr := client.SendWrappedWith(rep, nil, roachpb.Header{
			RangeID: rangeID,
		}, &pArgs); pErr != nil {

	for i := 0; i < b.N; i++ {
		if _, err := rep.GetSnapshot(); err != nil {
func writeRandomDataToRange(
	t testing.TB,
	store *storage.Store,
	rangeID roachpb.RangeID,
	keyPrefix []byte,
) (midpoint []byte) {
	src := rand.New(rand.NewSource(0))
	for i := 0; i < 100; i++ {
		key := append([]byte(nil), keyPrefix...)
		key = append(key, randutil.RandBytes(src, int(src.Int31n(1<<7)))...)
		key = keys.MakeRowSentinelKey(key)
		val := randutil.RandBytes(src, int(src.Int31n(1<<8)))
		pArgs := putArgs(key, val)
		if _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
			RangeID: rangeID,
		}, &pArgs); pErr != nil {
	// Return approximate midway point ("Z" in string "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz").
	midKey := append([]byte(nil), keyPrefix...)
	midKey = append(midKey, []byte("Z")...)
	return keys.MakeRowSentinelKey(midKey)
// TestTxnPutOutOfOrder tests a case where a put operation of an older
// timestamp comes after a put operation of a newer timestamp in a
// txn. The test ensures such an out-of-order put succeeds and
// overrides an old value. The test uses a "Writer" and a "Reader"
// to reproduce an out-of-order put.
// 1) The Writer executes a put operation and writes a write intent with
//    time T in a txn.
// 2) Before the Writer's txn is committed, the Reader sends a high priority
//    get operation with time T+100. This pushes the Writer txn timestamp to
//    T+100 and triggers the restart of the Writer's txn. The original
//    write intent timestamp is also updated to T+100.
// 3) The Writer starts a new epoch of the txn, but before it writes, the
//    Reader sends another high priority get operation with time T+200. This
//    pushes the Writer txn timestamp to T+200 to trigger a restart of the
//    Writer txn. The Writer will not actually restart until it tries to commit
//    the current epoch of the transaction. The Reader updates the timestamp of
//    the write intent to T+200. The test deliberately fails the Reader get
//    operation, and cockroach doesn't update its read timestamp cache.
// 4) The Writer executes the put operation again. This put operation comes
//    out-of-order since its timestamp is T+100, while the intent timestamp
//    updated at Step 3 is T+200.
// 5) The put operation overrides the old value using timestamp T+100.
// 6) When the Writer attempts to commit its txn, the txn will be restarted
//    again at a new epoch timestamp T+200, which will finally succeed.
func TestTxnPutOutOfOrder(t *testing.T) {
	defer leaktest.AfterTest(t)()

	key := "key"
	// Set up a filter to so that the get operation at Step 3 will return an error.
	var numGets int32

	manualClock := hlc.NewManualClock(0)
	clock := hlc.NewClock(manualClock.UnixNano)
	stopper := stop.NewStopper()
	defer stopper.Stop()
	ctx := storage.TestStoreContext()
	ctx.TestingKnobs.TestingCommandFilter =
		func(filterArgs storagebase.FilterArgs) *roachpb.Error {
			if _, ok := filterArgs.Req.(*roachpb.GetRequest); ok &&
				filterArgs.Req.Header().Key.Equal(roachpb.Key(key)) &&
				filterArgs.Hdr.Txn == nil {
				// The Reader executes two get operations, each of which triggers two get requests
				// (the first request fails and triggers txn push, and then the second request
				// succeeds). Returns an error for the fourth get request to avoid timestamp cache
				// update after the third get operation pushes the txn timestamp.
				if atomic.AddInt32(&numGets, 1) == 4 {
					return roachpb.NewErrorWithTxn(errors.Errorf("Test"), filterArgs.Hdr.Txn)
			return nil
	store := createTestStoreWithEngine(t,
		engine.NewInMem(roachpb.Attributes{}, 10<<20, stopper),

	// Put an initial value.
	initVal := []byte("initVal")
	err := store.DB().Put(key, initVal)
	if err != nil {
		t.Fatalf("failed to put: %s", err)

	waitPut := make(chan struct{})
	waitFirstGet := make(chan struct{})
	waitTxnRestart := make(chan struct{})
	waitSecondGet := make(chan struct{})
	waitTxnComplete := make(chan struct{})

	// Start the Writer.
	go func() {
		epoch := -1
		// Start a txn that does read-after-write.
		// The txn will be restarted twice, and the out-of-order put
		// will happen in the second epoch.
		if err := store.DB().Txn(func(txn *client.Txn) error {

			if epoch == 1 {
				// Wait until the second get operation is issued.

			updatedVal := []byte("updatedVal")
			if err := txn.Put(key, updatedVal); err != nil {
				return err

			// Make sure a get will return the value that was just written.
			actual, err := txn.Get(key)
			if err != nil {
				return err
			if !bytes.Equal(actual.ValueBytes(), updatedVal) {
				t.Fatalf("unexpected get result: %s", actual)

			if epoch == 0 {
				// Wait until the first get operation will push the txn timestamp.

			b := txn.NewBatch()
			return txn.CommitInBatch(b)
		}); err != nil {

		if epoch != 2 {
			t.Fatalf("unexpected number of txn retries: %d", epoch)



	// Start the Reader.

	// Advance the clock and send a get operation with higher
	// priority to trigger the txn restart.

	priority := roachpb.UserPriority(-math.MaxInt32)
	requestHeader := roachpb.Span{
		Key: roachpb.Key(key),
	ts := clock.Now()
	if _, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
		Timestamp:    ts,
		UserPriority: priority,
	}, &roachpb.GetRequest{Span: requestHeader}); err != nil {
		t.Fatalf("failed to get: %s", err)

	// Wait until the writer restarts the txn.

	// Advance the clock and send a get operation again. This time
	// we use TestingCommandFilter so that a get operation is not
	// processed after the write intent is resolved (to prevent the
	// timestamp cache from being updated).

	ts = clock.Now()
	if _, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
		Timestamp:    ts,
		UserPriority: priority,
	}, &roachpb.GetRequest{Span: requestHeader}); err == nil {
		t.Fatal("unexpected success of get")

// TestStoreRangeMergeMetadataCleanup tests that all metadata of a
// subsumed range is cleaned up on merge.
func TestStoreRangeMergeMetadataCleanup(t *testing.T) {
	defer leaktest.AfterTest(t)()
	sCtx := storage.TestStoreContext()
	sCtx.TestingKnobs.DisableSplitQueue = true
	store, stopper, _ := createTestStoreWithContext(t, sCtx)
	defer stopper.Stop()

	scan := func(f func(roachpb.KeyValue) (bool, error)) {
		if _, err := engine.MVCCIterate(context.Background(), store.Engine(), roachpb.KeyMin, roachpb.KeyMax, hlc.ZeroTimestamp, true, nil, false, f); err != nil {
	content := roachpb.Key("testing!")

	// Write some values left of the proposed split key.
	pArgs := putArgs([]byte("aaa"), content)
	if _, err := client.SendWrapped(rg1(store), nil, &pArgs); err != nil {

	// Collect all the keys.
	preKeys := make(map[string]struct{})
	scan(func(kv roachpb.KeyValue) (bool, error) {
		preKeys[string(kv.Key)] = struct{}{}
		return false, nil

	// Split the range.
	_, bDesc, err := createSplitRanges(store)
	if err != nil {

	// Write some values right of the split key.
	pArgs = putArgs([]byte("ccc"), content)
	if _, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
		RangeID: bDesc.RangeID,
	}, &pArgs); err != nil {

	// Merge the b range back into the a range.
	args := adminMergeArgs(roachpb.KeyMin)
	if _, err := client.SendWrapped(rg1(store), nil, &args); err != nil {

	// Collect all the keys again.
	postKeys := make(map[string]struct{})
	scan(func(kv roachpb.KeyValue) (bool, error) {
		postKeys[string(kv.Key)] = struct{}{}
		return false, nil

	// Compute the new keys.
	for k := range preKeys {
		delete(postKeys, k)

	// Keep only the subsumed range's local keys.
	localRangeKeyPrefix := string(keys.MakeRangeIDPrefix(bDesc.RangeID))
	for k := range postKeys {
		if !strings.HasPrefix(k, localRangeKeyPrefix) {
			delete(postKeys, k)

	if numKeys := len(postKeys); numKeys > 0 {
		var buf bytes.Buffer
		fmt.Fprintf(&buf, "%d keys were not cleaned up:\n", numKeys)
		for k := range postKeys {
			fmt.Fprintf(&buf, "%q\n", k)
// TestStoreRangeMergeWithData attempts to merge two collocate ranges
// each containing data.
func TestStoreRangeMergeWithData(t *testing.T) {
	defer leaktest.AfterTest(t)()
	sCtx := storage.TestStoreContext()
	sCtx.TestingKnobs.DisableSplitQueue = true
	store, stopper, _ := createTestStoreWithContext(t, sCtx)
	defer stopper.Stop()

	content := roachpb.Key("testing!")

	aDesc, bDesc, err := createSplitRanges(store)
	if err != nil {

	// Write some values left and right of the proposed split key.
	pArgs := putArgs([]byte("aaa"), content)
	if _, err := client.SendWrapped(rg1(store), nil, &pArgs); err != nil {
	pArgs = putArgs([]byte("ccc"), content)
	if _, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
		RangeID: bDesc.RangeID,
	}, &pArgs); err != nil {

	// Confirm the values are there.
	gArgs := getArgs([]byte("aaa"))
	if reply, err := client.SendWrapped(rg1(store), nil, &gArgs); err != nil {
	} else if replyBytes, err := reply.(*roachpb.GetResponse).Value.GetBytes(); err != nil {
	} else if !bytes.Equal(replyBytes, content) {
		t.Fatalf("actual value %q did not match expected value %q", replyBytes, content)
	gArgs = getArgs([]byte("ccc"))
	if reply, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
		RangeID: bDesc.RangeID,
	}, &gArgs); err != nil {
	} else if replyBytes, err := reply.(*roachpb.GetResponse).Value.GetBytes(); err != nil {
	} else if !bytes.Equal(replyBytes, content) {
		t.Fatalf("actual value %q did not match expected value %q", replyBytes, content)

	// Merge the b range back into the a range.
	args := adminMergeArgs(roachpb.KeyMin)
	if _, err := client.SendWrapped(rg1(store), nil, &args); err != nil {

	// Verify no intents remains on range descriptor keys.
	for _, key := range []roachpb.Key{keys.RangeDescriptorKey(aDesc.StartKey), keys.RangeDescriptorKey(bDesc.StartKey)} {
		if _, _, err := engine.MVCCGet(context.Background(), store.Engine(), key, store.Clock().Now(), true, nil); err != nil {

	// Verify the merge by looking up keys from both ranges.
	rangeA := store.LookupReplica([]byte("a"), nil)
	rangeB := store.LookupReplica([]byte("c"), nil)
	rangeADesc := rangeA.Desc()
	rangeBDesc := rangeB.Desc()

	if !reflect.DeepEqual(rangeA, rangeB) {
		t.Fatalf("ranges were not merged %+v=%+v", rangeADesc, rangeBDesc)
	if !bytes.Equal(rangeADesc.StartKey, roachpb.RKeyMin) {
		t.Fatalf("The start key is not equal to KeyMin %q=%q", rangeADesc.StartKey, roachpb.RKeyMin)
	if !bytes.Equal(rangeADesc.EndKey, roachpb.RKeyMax) {
		t.Fatalf("The end key is not equal to KeyMax %q=%q", rangeADesc.EndKey, roachpb.RKeyMax)

	// Try to get values from after the merge.
	gArgs = getArgs([]byte("aaa"))
	if reply, err := client.SendWrapped(rg1(store), nil, &gArgs); err != nil {
	} else if replyBytes, err := reply.(*roachpb.GetResponse).Value.GetBytes(); err != nil {
	} else if !bytes.Equal(replyBytes, content) {
		t.Fatalf("actual value %q did not match expected value %q", replyBytes, content)
	gArgs = getArgs([]byte("ccc"))
	if reply, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
		RangeID: rangeB.RangeID,
	}, &gArgs); err != nil {
	} else if replyBytes, err := reply.(*roachpb.GetResponse).Value.GetBytes(); err != nil {
	} else if !bytes.Equal(replyBytes, content) {
		t.Fatalf("actual value %q did not match expected value %q", replyBytes, content)

	// Put new values after the merge on both sides.
	pArgs = putArgs([]byte("aaaa"), content)
	if _, err := client.SendWrapped(rg1(store), nil, &pArgs); err != nil {
	pArgs = putArgs([]byte("cccc"), content)
	if _, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
		RangeID: rangeB.RangeID,
	}, &pArgs); err != nil {

	// Try to get the newly placed values.
	gArgs = getArgs([]byte("aaaa"))
	if reply, err := client.SendWrapped(rg1(store), nil, &gArgs); err != nil {
	} else if replyBytes, err := reply.(*roachpb.GetResponse).Value.GetBytes(); err != nil {
	} else if !bytes.Equal(replyBytes, content) {
		t.Fatalf("actual value %q did not match expected value %q", replyBytes, content)
	gArgs = getArgs([]byte("cccc"))
	if reply, err := client.SendWrapped(rg1(store), nil, &gArgs); err != nil {
	} else if replyBytes, err := reply.(*roachpb.GetResponse).Value.GetBytes(); err != nil {
	} else if !bytes.Equal(replyBytes, content) {
		t.Fatalf("actual value %q did not match expected value %q", replyBytes, content)
// TestMultiRangeScanWithMaxResults tests that commands which access multiple
// ranges with MaxResults parameter are carried out properly.
func TestMultiRangeScanWithMaxResults(t *testing.T) {
	defer leaktest.AfterTest(t)()
	testCases := []struct {
		splitKeys []roachpb.Key
		keys      []roachpb.Key
			[]roachpb.Key{roachpb.Key("a"), roachpb.Key("z")}},
		{[]roachpb.Key{roachpb.Key("h"), roachpb.Key("q")},
			[]roachpb.Key{roachpb.Key("b"), roachpb.Key("f"), roachpb.Key("k"),
				roachpb.Key("r"), roachpb.Key("w"), roachpb.Key("y")}},

	for i, tc := range testCases {
		s, _, _ := serverutils.StartServer(t, base.TestServerArgs{})
		defer s.Stopper().Stop()
		ts := s.(*TestServer)
		retryOpts := base.DefaultRetryOptions()
		retryOpts.Closer = ts.stopper.ShouldQuiesce()
		ds := kv.NewDistSender(&kv.DistSenderConfig{
			Clock:           s.Clock(),
			RPCContext:      s.RPCContext(),
			RPCRetryOptions: &retryOpts,
		}, ts.Gossip())
		ctx := tracing.WithTracer(context.Background(), tracing.NewTracer())
		tds := kv.NewTxnCoordSender(ctx, ds, ts.Clock(), ts.Ctx.Linearizable,
			ts.stopper, kv.MakeTxnMetrics())

		for _, sk := range tc.splitKeys {
			if err := ts.node.ctx.DB.AdminSplit(sk); err != nil {

		for _, k := range tc.keys {
			put := roachpb.NewPut(k, roachpb.MakeValueFromBytes(k))
			if _, err := client.SendWrapped(tds, nil, put); err != nil {

		// Try every possible ScanRequest startKey.
		for start := 0; start < len(tc.keys); start++ {
			// Try every possible maxResults, from 1 to beyond the size of key array.
			for maxResults := 1; maxResults <= len(tc.keys)-start+1; maxResults++ {
				scan := roachpb.NewScan(tc.keys[start], tc.keys[len(tc.keys)-1].Next())
				reply, err := client.SendWrappedWith(
					tds, nil, roachpb.Header{MaxSpanRequestKeys: int64(maxResults)}, scan,
				if err != nil {
				rows := reply.(*roachpb.ScanResponse).Rows
				if start+maxResults <= len(tc.keys) && len(rows) != maxResults {
					t.Errorf("%d: start=%s: expected %d rows, but got %d", i, tc.keys[start], maxResults, len(rows))
				} else if start+maxResults == len(tc.keys)+1 && len(rows) != maxResults-1 {
					t.Errorf("%d: expected %d rows, but got %d", i, maxResults-1, len(rows))
// TestMultiRangeScanDeleteRange tests that commands which access multiple
// ranges are carried out properly.
func TestMultiRangeScanDeleteRange(t *testing.T) {
	defer leaktest.AfterTest(t)()
	s, _, _ := serverutils.StartServer(t, base.TestServerArgs{})
	defer s.Stopper().Stop()
	ts := s.(*TestServer)
	retryOpts := base.DefaultRetryOptions()
	retryOpts.Closer = ts.stopper.ShouldQuiesce()
	ds := kv.NewDistSender(&kv.DistSenderConfig{
		Clock:           s.Clock(),
		RPCContext:      s.RPCContext(),
		RPCRetryOptions: &retryOpts,
	}, ts.Gossip())
	ctx := tracing.WithTracer(context.Background(), tracing.NewTracer())
	tds := kv.NewTxnCoordSender(ctx, ds, s.Clock(), ts.Ctx.Linearizable,
		ts.stopper, kv.MakeTxnMetrics())

	if err := ts.node.ctx.DB.AdminSplit("m"); err != nil {
	writes := []roachpb.Key{roachpb.Key("a"), roachpb.Key("z")}
	get := &roachpb.GetRequest{
		Span: roachpb.Span{Key: writes[0]},
	get.EndKey = writes[len(writes)-1]
	if _, err := client.SendWrapped(tds, nil, get); err == nil {
		t.Errorf("able to call Get with a key range: %v", get)
	var delTS hlc.Timestamp
	for i, k := range writes {
		put := roachpb.NewPut(k, roachpb.MakeValueFromBytes(k))
		reply, err := client.SendWrapped(tds, nil, put)
		if err != nil {
		scan := roachpb.NewScan(writes[0], writes[len(writes)-1].Next())
		reply, err = client.SendWrapped(tds, nil, scan)
		if err != nil {
		sr := reply.(*roachpb.ScanResponse)
		if sr.Txn != nil {
			// This was the other way around at some point in the past.
			// Same below for Delete, etc.
			t.Errorf("expected no transaction in response header")
		if rows := sr.Rows; len(rows) != i+1 {
			t.Fatalf("expected %d rows, but got %d", i+1, len(rows))

	del := &roachpb.DeleteRangeRequest{
		Span: roachpb.Span{
			Key:    writes[0],
			EndKey: roachpb.Key(writes[len(writes)-1]).Next(),
		ReturnKeys: true,
	reply, err := client.SendWrappedWith(tds, nil, roachpb.Header{Timestamp: delTS}, del)
	if err != nil {
	dr := reply.(*roachpb.DeleteRangeResponse)
	if dr.Txn != nil {
		t.Errorf("expected no transaction in response header")
	if !reflect.DeepEqual(dr.Keys, writes) {
		t.Errorf("expected %d keys to be deleted, but got %d instead", writes, dr.Keys)

	scan := roachpb.NewScan(writes[0], writes[len(writes)-1].Next())
	txn := &roachpb.Transaction{Name: "MyTxn"}
	reply, err = client.SendWrappedWith(tds, nil, roachpb.Header{Txn: txn}, scan)
	if err != nil {
	sr := reply.(*roachpb.ScanResponse)
	if txn := sr.Txn; txn == nil || txn.Name != "MyTxn" {
		t.Errorf("wanted Txn to persist, but it changed to %v", txn)
	if rows := sr.Rows; len(rows) > 0 {
		t.Fatalf("scan after delete returned rows: %v", rows)
// TestStoreRangeSplitStatsWithMerges starts by splitting the system keys from
// user-space keys and verifying that the user space side of the split (which is empty),
// has all zeros for stats. It then issues a number of Merge requests to the user
// space side, simulating TimeSeries data. Finally, the test splits the user space
// side halfway and verifies the stats on either side of the split are equal to a
// recomputation.
// Note that unlike TestStoreRangeSplitStats, we do not check if the two halves of the
// split's stats are equal to the pre-split stats when added, because this will not be
// true of ranges populated with Merge requests. The reason for this is that Merge
// requests' impact on MVCCStats are only estimated. See updateStatsOnMerge.
func TestStoreRangeSplitStatsWithMerges(t *testing.T) {
	defer leaktest.AfterTest(t)()
	sCtx := storage.TestStoreContext()
	sCtx.TestingKnobs.DisableSplitQueue = true
	store, stopper, manual := createTestStoreWithContext(t, sCtx)
	defer stopper.Stop()

	// Split the range after the last table data key.
	keyPrefix := keys.MakeTablePrefix(keys.MaxReservedDescID + 1)
	keyPrefix = keys.MakeRowSentinelKey(keyPrefix)
	args := adminSplitArgs(roachpb.KeyMin, keyPrefix)
	if _, pErr := client.SendWrapped(rg1(store), nil, &args); pErr != nil {
	// Verify empty range has empty stats.
	rng := store.LookupReplica(keyPrefix, nil)
	// NOTE that this value is expected to change over time, depending on what
	// we store in the sys-local keyspace. Update it accordingly for this test.
	empty := enginepb.MVCCStats{LastUpdateNanos: manual.UnixNano()}
	if err := verifyRangeStats(store.Engine(), rng.RangeID, empty); err != nil {

	// Write random TimeSeries data.
	midKey := writeRandomTimeSeriesDataToRange(t, store, rng.RangeID, keyPrefix)

	// Split the range at approximate halfway point.
	args = adminSplitArgs(keyPrefix, midKey)
	if _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
		RangeID: rng.RangeID,
	}, &args); pErr != nil {

	snap := store.Engine().NewSnapshot()
	defer snap.Close()
	var msLeft, msRight enginepb.MVCCStats
	if err := engine.MVCCGetRangeStats(context.Background(), snap, rng.RangeID, &msLeft); err != nil {
	rngRight := store.LookupReplica(midKey, nil)
	if err := engine.MVCCGetRangeStats(context.Background(), snap, rngRight.RangeID, &msRight); err != nil {

	// Stats should both have the new timestamp.
	now := manual.UnixNano()
	if lTs := msLeft.LastUpdateNanos; lTs != now {
		t.Errorf("expected left range stats to have new timestamp, want %d, got %d", now, lTs)
	if rTs := msRight.LastUpdateNanos; rTs != now {
		t.Errorf("expected right range stats to have new timestamp, want %d, got %d", now, rTs)

	// Stats should agree with recomputation.
	if err := verifyRecomputedStats(snap, rng.Desc(), msLeft, now); err != nil {
		t.Fatalf("failed to verify left range's stats after split: %v", err)
	if err := verifyRecomputedStats(snap, rngRight.Desc(), msRight, now); err != nil {
		t.Fatalf("failed to verify right range's stats after split: %v", err)
// TestStoreRangeSplit executes a split of a range and verifies that the
// resulting ranges respond to the right key ranges and that their stats
// have been properly accounted for and requests can't be replayed.
func TestStoreRangeSplitIdempotency(t *testing.T) {
	defer leaktest.AfterTest(t)()
	defer config.TestingDisableTableSplits()()
	store, stopper, _ := createTestStore(t)
	defer stopper.Stop()
	rangeID := roachpb.RangeID(1)
	splitKey := roachpb.Key("m")
	content := roachpb.Key("asdvb")

	// First, write some values left and right of the proposed split key.
	pArgs := putArgs([]byte("c"), content)
	if _, pErr := client.SendWrapped(rg1(store), nil, &pArgs); pErr != nil {
	pArgs = putArgs([]byte("x"), content)
	if _, pErr := client.SendWrapped(rg1(store), nil, &pArgs); pErr != nil {

	// Increments are a good way of testing idempotency. Up here, we
	// address them to the original range, then later to the one that
	// contains the key.
	txn := roachpb.NewTransaction("test", []byte("c"), 10, enginepb.SERIALIZABLE,
		store.Clock().Now(), 0)
	lIncArgs := incrementArgs([]byte("apoptosis"), 100)
	lTxn := *txn
	if _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
		Txn: &lTxn,
	}, &lIncArgs); pErr != nil {
	rIncArgs := incrementArgs([]byte("wobble"), 10)
	rTxn := *txn
	if _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
		Txn: &rTxn,
	}, &rIncArgs); pErr != nil {

	// Get the original stats for key and value bytes.
	var ms enginepb.MVCCStats
	if err := engine.MVCCGetRangeStats(context.Background(), store.Engine(), rangeID, &ms); err != nil {
	keyBytes, valBytes := ms.KeyBytes, ms.ValBytes

	// Split the range.
	args := adminSplitArgs(roachpb.KeyMin, splitKey)
	if _, pErr := client.SendWrapped(rg1(store), nil, &args); pErr != nil {

	// Verify no intents remains on range descriptor keys.
	splitKeyAddr, err := keys.Addr(splitKey)
	if err != nil {
	for _, key := range []roachpb.Key{keys.RangeDescriptorKey(roachpb.RKeyMin), keys.RangeDescriptorKey(splitKeyAddr)} {
		if _, _, err := engine.MVCCGet(context.Background(), store.Engine(), key, store.Clock().Now(), true, nil); err != nil {

	rng := store.LookupReplica(roachpb.RKeyMin, nil)
	rngDesc := rng.Desc()
	newRng := store.LookupReplica([]byte("m"), nil)
	newRngDesc := newRng.Desc()
	if !bytes.Equal(newRngDesc.StartKey, splitKey) || !bytes.Equal(splitKey, rngDesc.EndKey) {
		t.Errorf("ranges mismatched, wanted %q=%q=%q", newRngDesc.StartKey, splitKey, rngDesc.EndKey)
	if !bytes.Equal(newRngDesc.EndKey, roachpb.RKeyMax) || !bytes.Equal(rngDesc.StartKey, roachpb.RKeyMin) {
		t.Errorf("new ranges do not cover KeyMin-KeyMax, but only %q-%q", rngDesc.StartKey, newRngDesc.EndKey)

	// Try to get values from both left and right of where the split happened.
	gArgs := getArgs([]byte("c"))
	if reply, pErr := client.SendWrapped(rg1(store), nil, &gArgs); pErr != nil {
	} else if replyBytes, pErr := reply.(*roachpb.GetResponse).Value.GetBytes(); pErr != nil {
	} else if !bytes.Equal(replyBytes, content) {
		t.Fatalf("actual value %q did not match expected value %q", replyBytes, content)
	gArgs = getArgs([]byte("x"))
	if reply, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
		RangeID: newRng.RangeID,
	}, &gArgs); pErr != nil {
	} else if replyBytes, err := reply.(*roachpb.GetResponse).Value.GetBytes(); err != nil {
	} else if !bytes.Equal(replyBytes, content) {
		t.Fatalf("actual value %q did not match expected value %q", replyBytes, content)

	// Send out an increment request copied from above (same txn/sequence)
	// which remains in the old range.
	_, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
		Txn: &lTxn,
	}, &lIncArgs)
	if _, ok := pErr.GetDetail().(*roachpb.TransactionRetryError); !ok {
		t.Fatalf("unexpected idempotency failure: %v", pErr)

	// Send out the same increment copied from above (same txn/sequence), but
	// now to the newly created range (which should hold that key).
	_, pErr = client.SendWrappedWith(rg1(store), nil, roachpb.Header{
		RangeID: newRng.RangeID,
		Txn:     &rTxn,
	}, &rIncArgs)
	if _, ok := pErr.GetDetail().(*roachpb.TransactionRetryError); !ok {
		t.Fatalf("unexpected idempotency failure: %v", pErr)

	// Compare stats of split ranges to ensure they are non zero and
	// exceed the original range when summed.
	var left, right enginepb.MVCCStats
	if err := engine.MVCCGetRangeStats(context.Background(), store.Engine(), rangeID, &left); err != nil {
	lKeyBytes, lValBytes := left.KeyBytes, left.ValBytes
	if err := engine.MVCCGetRangeStats(context.Background(), store.Engine(), newRng.RangeID, &right); err != nil {
	rKeyBytes, rValBytes := right.KeyBytes, right.ValBytes

	if lKeyBytes == 0 || rKeyBytes == 0 {
		t.Errorf("expected non-zero key bytes; got %d, %d", lKeyBytes, rKeyBytes)
	if lValBytes == 0 || rValBytes == 0 {
		t.Errorf("expected non-zero val bytes; got %d, %d", lValBytes, rValBytes)
	if lKeyBytes+rKeyBytes <= keyBytes {
		t.Errorf("left + right key bytes don't match; %d + %d <= %d", lKeyBytes, rKeyBytes, keyBytes)
	if lValBytes+rValBytes <= valBytes {
		t.Errorf("left + right val bytes don't match; %d + %d <= %d", lValBytes, rValBytes, valBytes)
// TestStoreRangeSplitStats starts by splitting the system keys from user-space
// keys and verifying that the user space side of the split (which is empty),
// has all zeros for stats. It then writes random data to the user space side,
// splits it halfway and verifies the two splits have stats exactly equaling
// the pre-split.
func TestStoreRangeSplitStats(t *testing.T) {
	defer leaktest.AfterTest(t)()
	defer config.TestingDisableTableSplits()()
	store, stopper, manual := createTestStore(t)
	defer stopper.Stop()

	// Split the range after the last table data key.
	keyPrefix := keys.MakeTablePrefix(keys.MaxReservedDescID + 1)
	keyPrefix = keys.MakeRowSentinelKey(keyPrefix)
	args := adminSplitArgs(roachpb.KeyMin, keyPrefix)
	if _, pErr := client.SendWrapped(rg1(store), nil, &args); pErr != nil {
	// Verify empty range has empty stats.
	rng := store.LookupReplica(keyPrefix, nil)
	// NOTE that this value is expected to change over time, depending on what
	// we store in the sys-local keyspace. Update it accordingly for this test.
	if err := verifyRangeStats(store.Engine(), rng.RangeID, enginepb.MVCCStats{LastUpdateNanos: manual.UnixNano()}); err != nil {

	// Write random data.
	writeRandomDataToRange(t, store, rng.RangeID, keyPrefix)

	// Get the range stats now that we have data.
	snap := store.Engine().NewSnapshot()
	defer snap.Close()
	var ms enginepb.MVCCStats
	if err := engine.MVCCGetRangeStats(context.Background(), snap, rng.RangeID, &ms); err != nil {
	if err := verifyRecomputedStats(snap, rng.Desc(), ms, manual.UnixNano()); err != nil {
		t.Fatalf("failed to verify range's stats before split: %v", err)
	if inMemMS := rng.GetMVCCStats(); inMemMS != ms {
		t.Fatalf("in-memory and on-disk diverged:\n%+v\n!=\n%+v", inMemMS, ms)


	// Split the range at approximate halfway point ("Z" in string "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz").
	midKey := append([]byte(nil), keyPrefix...)
	midKey = append(midKey, []byte("Z")...)
	midKey = keys.MakeRowSentinelKey(midKey)
	args = adminSplitArgs(keyPrefix, midKey)
	if _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
		RangeID: rng.RangeID,
	}, &args); pErr != nil {

	snap = store.Engine().NewSnapshot()
	defer snap.Close()
	var msLeft, msRight enginepb.MVCCStats
	if err := engine.MVCCGetRangeStats(context.Background(), snap, rng.RangeID, &msLeft); err != nil {
	rngRight := store.LookupReplica(midKey, nil)
	if err := engine.MVCCGetRangeStats(context.Background(), snap, rngRight.RangeID, &msRight); err != nil {

	// The stats should be exactly equal when added.
	expMS := enginepb.MVCCStats{
		LiveBytes:   msLeft.LiveBytes + msRight.LiveBytes,
		KeyBytes:    msLeft.KeyBytes + msRight.KeyBytes,
		ValBytes:    msLeft.ValBytes + msRight.ValBytes,
		IntentBytes: msLeft.IntentBytes + msRight.IntentBytes,
		LiveCount:   msLeft.LiveCount + msRight.LiveCount,
		KeyCount:    msLeft.KeyCount + msRight.KeyCount,
		ValCount:    msLeft.ValCount + msRight.ValCount,
		IntentCount: msLeft.IntentCount + msRight.IntentCount,
	ms.SysBytes, ms.SysCount = 0, 0
	ms.LastUpdateNanos = 0
	if expMS != ms {
		t.Errorf("expected left plus right ranges to equal original, but\n %+v\n+\n %+v\n!=\n %+v", msLeft, msRight, ms)

	// Stats should both have the new timestamp.
	now := manual.UnixNano()
	if lTs := msLeft.LastUpdateNanos; lTs != now {
		t.Errorf("expected left range stats to have new timestamp, want %d, got %d", now, lTs)
	if rTs := msRight.LastUpdateNanos; rTs != now {
		t.Errorf("expected right range stats to have new timestamp, want %d, got %d", now, rTs)

	// Stats should agree with recomputation.
	if err := verifyRecomputedStats(snap, rng.Desc(), msLeft, now); err != nil {
		t.Fatalf("failed to verify left range's stats after split: %v", err)
	if err := verifyRecomputedStats(snap, rngRight.Desc(), msRight, now); err != nil {
		t.Fatalf("failed to verify right range's stats after split: %v", err)
// TestRangeLookupUseReverse tests whether the results and the results count
// are correct when scanning in reverse order.
func TestRangeLookupUseReverse(t *testing.T) {
	defer leaktest.AfterTest(t)()
	sCtx := storage.TestStoreContext()
	sCtx.TestingKnobs.DisableSplitQueue = true
	store, stopper, _ := createTestStoreWithContext(t, sCtx)
	defer stopper.Stop()

	// Init test ranges:
	// ["","a"), ["a","c"), ["c","e"), ["e","g") and ["g","\xff\xff").
	splits := []roachpb.AdminSplitRequest{
		adminSplitArgs(roachpb.Key("g"), roachpb.Key("g")),
		adminSplitArgs(roachpb.Key("e"), roachpb.Key("e")),
		adminSplitArgs(roachpb.Key("c"), roachpb.Key("c")),
		adminSplitArgs(roachpb.Key("a"), roachpb.Key("a")),

	for _, split := range splits {
		_, pErr := client.SendWrapped(rg1(store), nil, &split)
		if pErr != nil {
			t.Fatalf("%q: split unexpected error: %s", split.SplitKey, pErr)

	// Resolve the intents.
	scanArgs := roachpb.ScanRequest{
		Span: roachpb.Span{
			Key:    keys.RangeMetaKey(roachpb.RKeyMin.Next()),
			EndKey: keys.RangeMetaKey(roachpb.RKeyMax),
	util.SucceedsSoon(t, func() error {
		_, pErr := client.SendWrapped(rg1(store), nil, &scanArgs)
		return pErr.GoError()

	revScanArgs := func(key []byte, maxResults int32) *roachpb.RangeLookupRequest {
		return &roachpb.RangeLookupRequest{
			Span: roachpb.Span{
				Key: key,
			MaxRanges: maxResults,
			Reverse:   true,


	// Test cases.
	testCases := []struct {
		request     *roachpb.RangeLookupRequest
		expected    []roachpb.RangeDescriptor
		expectedPre []roachpb.RangeDescriptor
		// Test key in the middle of the range.
			request: revScanArgs(keys.RangeMetaKey(roachpb.RKey("f")), 2),
			// ["e","g") and ["c","e").
			expected: []roachpb.RangeDescriptor{
				{StartKey: roachpb.RKey("e"), EndKey: roachpb.RKey("g")},
			expectedPre: []roachpb.RangeDescriptor{
				{StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("e")},
		// Test key in the end key of the range.
			request: revScanArgs(keys.RangeMetaKey(roachpb.RKey("g")), 3),
			// ["e","g"), ["c","e") and ["a","c").
			expected: []roachpb.RangeDescriptor{
				{StartKey: roachpb.RKey("e"), EndKey: roachpb.RKey("g")},
			expectedPre: []roachpb.RangeDescriptor{
				{StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("e")},
				{StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("c")},
			request: revScanArgs(keys.RangeMetaKey(roachpb.RKey("e")), 2),
			// ["c","e") and ["a","c").
			expected: []roachpb.RangeDescriptor{
				{StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("e")},
			expectedPre: []roachpb.RangeDescriptor{
				{StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("c")},
		// Test Meta2KeyMax.
			request: revScanArgs(keys.Meta2KeyMax, 2),
			// ["e","g") and ["g","\xff\xff")
			expected: []roachpb.RangeDescriptor{
				{StartKey: roachpb.RKey("g"), EndKey: roachpb.RKey("\xff\xff")},
			expectedPre: []roachpb.RangeDescriptor{
				{StartKey: roachpb.RKey("e"), EndKey: roachpb.RKey("g")},
		// Test Meta1KeyMax.
			request: revScanArgs(keys.Meta1KeyMax, 1),
			// ["","a")
			expected: []roachpb.RangeDescriptor{
				{StartKey: roachpb.RKeyMin, EndKey: roachpb.RKey("a")},

	for testIdx, test := range testCases {
		resp, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
			ReadConsistency: roachpb.INCONSISTENT,
		}, test.request)
		if pErr != nil {
			t.Fatalf("%d: RangeLookup error: %s", testIdx, pErr)

		rlReply := resp.(*roachpb.RangeLookupResponse)
		// Checks the results count.
		if int32(len(rlReply.Ranges))+int32(len(rlReply.PrefetchedRanges)) != test.request.MaxRanges {
			t.Fatalf("%d: returned results count, expected %d,but got %d", testIdx, test.request.MaxRanges, len(rlReply.Ranges))
		// Checks the range descriptors.
		for _, rngSlice := range []struct {
			expect, reply []roachpb.RangeDescriptor
			{test.expected, rlReply.Ranges},
			{test.expectedPre, rlReply.PrefetchedRanges},
		} {
			for i, rng := range rngSlice.expect {
				if !(rng.StartKey.Equal(rngSlice.reply[i].StartKey) && rng.EndKey.Equal(rngSlice.reply[i].EndKey)) {
					t.Fatalf("%d: returned range is not correct, expected %v ,but got %v", testIdx, rng, rngSlice.reply[i])
// TestStoreSplitReadRace prevents regression of #3148. It begins a couple of
// read requests and lets them complete while a split is happening; the reads
// hit the second half of the split. If the split happens non-atomically with
// respect to the reads (and in particular their update of the timestamp
// cache), then some of them may not be reflected in the timestamp cache of the
// new range, in which case this test would fail.
func TestStoreSplitReadRace(t *testing.T) {
	defer leaktest.AfterTest(t)()
	defer config.TestingDisableTableSplits()()
	splitKey := roachpb.Key("a")
	key := func(i int) roachpb.Key {
		splitCopy := append([]byte(nil), splitKey.Next()...)
		return append(splitCopy, []byte(fmt.Sprintf("%03d", i))...)

	getContinues := make(chan struct{})
	var getStarted sync.WaitGroup
	sCtx := storage.TestStoreContext()
	sCtx.TestingKnobs.TestingCommandFilter =
		func(filterArgs storagebase.FilterArgs) *roachpb.Error {
			if et, ok := filterArgs.Req.(*roachpb.EndTransactionRequest); ok {
				st := et.InternalCommitTrigger.GetSplitTrigger()
				if st == nil || !st.UpdatedDesc.EndKey.Equal(splitKey) {
					return nil
			} else if filterArgs.Req.Method() == roachpb.Get &&
				bytes.HasPrefix(filterArgs.Req.Header().Key, splitKey.Next()) {
			return nil
	store, stopper, _ := createTestStoreWithContext(t, &sCtx)
	defer stopper.Stop()

	now := store.Clock().Now()
	var wg sync.WaitGroup

	ts := func(i int) hlc.Timestamp {
		return now.Add(0, int32(1000+i))

	const num = 10

	for i := 0; i < num; i++ {
		go func(i int) {
			defer wg.Done()
			args := getArgs(key(i))
			var h roachpb.Header
			h.Timestamp = ts(i)
			if _, pErr := client.SendWrappedWith(rg1(store), nil, h, &args); pErr != nil {


	func() {
		defer wg.Done()
		args := adminSplitArgs(roachpb.KeyMin, splitKey)
		if _, pErr := client.SendWrapped(rg1(store), nil, &args); pErr != nil {


	for i := 0; i < num; i++ {
		var h roachpb.Header
		h.Timestamp = now
		args := putArgs(key(i), []byte("foo"))
		keyAddr, err := keys.Addr(args.Key)
		if err != nil {
		h.RangeID = store.LookupReplica(keyAddr, nil).RangeID
		_, respH, pErr := storage.SendWrapped(store, context.Background(), h, &args)
		if pErr != nil {
		if respH.Timestamp.Less(ts(i)) {
			t.Fatalf("%d: expected Put to be forced higher than %s by timestamp caches, but wrote at %s", i, ts(i), respH.Timestamp)
func TestRangeTransferLease(t *testing.T) {
	defer leaktest.AfterTest(t)()
	ctx := storage.TestStoreContext()
	var filterMu syncutil.Mutex
	var filter func(filterArgs storagebase.FilterArgs) *roachpb.Error
	ctx.TestingKnobs.TestingCommandFilter =
		func(filterArgs storagebase.FilterArgs) *roachpb.Error {
			filterCopy := filter
			if filterCopy != nil {
				return filterCopy(filterArgs)
			return nil
	var waitForTransferBlocked atomic.Value
	transferBlocked := make(chan struct{})
	ctx.TestingKnobs.LeaseTransferBlockedOnExtensionEvent = func(
		_ roachpb.ReplicaDescriptor) {
		if waitForTransferBlocked.Load().(bool) {
			transferBlocked <- struct{}{}
	mtc := &multiTestContext{}
	mtc.storeContext = &ctx
	mtc.Start(t, 2)
	defer mtc.Stop()

	// First, do a write; we'll use it to determine when the dust has settled.
	leftKey := roachpb.Key("a")
	incArgs := incrementArgs(leftKey, 1)
	if _, pErr := client.SendWrapped(mtc.distSenders[0], nil, &incArgs); pErr != nil {

	// Get the left range's ID.
	rangeID := mtc.stores[0].LookupReplica(roachpb.RKey("a"), nil).RangeID

	// Replicate the left range onto node 1.
	mtc.replicateRange(rangeID, 1)

	replica0 := mtc.stores[0].LookupReplica(roachpb.RKey("a"), nil)
	replica1 := mtc.stores[1].LookupReplica(roachpb.RKey("a"), nil)
	gArgs := getArgs(leftKey)
	replica0Desc, err := replica0.GetReplicaDescriptor()
	if err != nil {
	// Check that replica0 can serve reads OK.
	if _, pErr := client.SendWrappedWith(
		mtc.senders[0], nil, roachpb.Header{Replica: replica0Desc}, &gArgs); pErr != nil {

		// Transferring the lease to ourself should be a no-op.
		origLeasePtr, _ := replica0.GetLease()
		origLease := *origLeasePtr
		if err := replica0.AdminTransferLease(replica0Desc.StoreID); err != nil {
		newLeasePtr, _ := replica0.GetLease()
		if origLeasePtr != newLeasePtr || origLease != *newLeasePtr {
			t.Fatalf("expected %+v, but found %+v", origLeasePtr, newLeasePtr)

		// An invalid target should result in an error.
		const expected = "unable to find store .* in range"
		if err := replica0.AdminTransferLease(1000); !testutils.IsError(err, expected) {
			t.Fatalf("expected %s, but found %v", expected, err)

	// Move the lease to store 1.
	var newHolderDesc roachpb.ReplicaDescriptor
	util.SucceedsSoon(t, func() error {
		var err error
		newHolderDesc, err = replica1.GetReplicaDescriptor()
		return err

	if err := replica0.AdminTransferLease(newHolderDesc.StoreID); err != nil {

	// Check that replica0 doesn't serve reads any more.
	replica0Desc, err = replica0.GetReplicaDescriptor()
	if err != nil {
	_, pErr := client.SendWrappedWith(
		mtc.senders[0], nil, roachpb.Header{Replica: replica0Desc}, &gArgs)
	nlhe, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError)
	if !ok {
		t.Fatalf("expected %T, got %s", &roachpb.NotLeaseHolderError{}, pErr)
	if *(nlhe.LeaseHolder) != newHolderDesc {
		t.Fatalf("expected lease holder %+v, got %+v",
			newHolderDesc, nlhe.LeaseHolder)

	// Check that replica1 now has the lease (or gets it soon).
	util.SucceedsSoon(t, func() error {
		if _, pErr := client.SendWrappedWith(
			mtc.senders[1], nil, roachpb.Header{Replica: replica0Desc}, &gArgs); pErr != nil {
			return pErr.GoError()
		return nil

	replica1Lease, _ := replica1.GetLease()

	// Verify the timestamp cache low water. Because we executed a transfer lease
	// request, the low water should be set to the new lease start time which is
	// less than the previous lease's expiration time.
	if lowWater := replica1.GetTimestampCacheLowWater(); lowWater != replica1Lease.Start {
		t.Fatalf("expected timestamp cache low water %s, but found %s",
			replica1Lease.Start, lowWater)

	// Make replica1 extend its lease and transfer the lease immediately after
	// that. Test that the transfer still happens (it'll wait until the extension
	// is done).
	extensionSem := make(chan struct{})
	filter = func(filterArgs storagebase.FilterArgs) *roachpb.Error {
		if filterArgs.Sid != mtc.stores[1].Ident.StoreID {
			return nil
		llReq, ok := filterArgs.Req.(*roachpb.RequestLeaseRequest)
		if !ok {
			return nil
		if llReq.Lease.Replica == newHolderDesc {
			// Notify the main thread that the extension is in progress and wait for
			// the signal to proceed.
			filter = nil
			extensionSem <- struct{}{}
		return nil
	// Initiate an extension.
	var wg sync.WaitGroup
	go func() {
		defer wg.Done()
		shouldRenewTS := replica1Lease.StartStasis.Add(-1, 0)
		mtc.manualClock.Set(shouldRenewTS.WallTime + 1)
		if _, pErr := client.SendWrappedWith(
			mtc.senders[1], nil,
			roachpb.Header{Replica: replica0Desc}, &gArgs); pErr != nil {

	// Initiate a transfer.
	go func() {
		defer wg.Done()
		// Transfer back from replica1 to replica0.
		if err := replica1.AdminTransferLease(replica0Desc.StoreID); err != nil {
	// Wait for the transfer to be blocked by the extension.
	// Now unblock the extension.
	extensionSem <- struct{}{}
	// Check that the transfer to replica1 eventually happens.
	util.SucceedsSoon(t, func() error {
		if _, pErr := client.SendWrappedWith(
			mtc.senders[0], nil,
			roachpb.Header{Replica: replica0Desc}, &gArgs); pErr != nil {
			return pErr.GoError()
		return nil
	filter = nil
// TestMultiRangeScanReverseScanInconsistent verifies that a Scan/ReverseScan
// across ranges that doesn't require read consistency will set a timestamp
// using the clock local to the distributed sender.
func TestMultiRangeScanReverseScanInconsistent(t *testing.T) {
	defer leaktest.AfterTest(t)()

	s, _, _ := serverutils.StartServer(t, base.TestServerArgs{})
	defer s.Stopper().Stop()
	db := setupMultipleRanges(t, s, "b")

	// Write keys "a" and "b", the latter of which is the first key in the
	// second range.
	keys := [2]string{"a", "b"}
	ts := [2]hlc.Timestamp{}
	for i, key := range keys {
		b := &client.Batch{}
		b.Put(key, "value")
		if err := db.Run(b); err != nil {
		ts[i] = s.Clock().Now()
		log.Infof("%d: %s %d", i, key, ts[i])
		if i == 0 {
			util.SucceedsSoon(t, func() error {
				// Enforce that when we write the second key, it's written
				// with a strictly higher timestamp. We're dropping logical
				// ticks and the clock may just have been pushed into the
				// future, so that's necessary. See #3122.
				if ts[0].WallTime >= s.Clock().Now().WallTime {
					return errors.New("time stands still")
				return nil

	// Do an inconsistent Scan/ReverseScan from a new DistSender and verify
	// it does the read at its local clock and doesn't receive an
	// OpRequiresTxnError. We set the local clock to the timestamp of
	// just above the first key to verify it's used to read only key "a".
	for i, request := range []roachpb.Request{
		roachpb.NewScan(roachpb.Key("a"), roachpb.Key("c"), 0),
		roachpb.NewReverseScan(roachpb.Key("a"), roachpb.Key("c"), 0),
	} {
		manual := hlc.NewManualClock(ts[0].WallTime + 1)
		clock := hlc.NewClock(manual.UnixNano)
		ds := kv.NewDistSender(&kv.DistSenderContext{Clock: clock, RPCContext: s.RPCContext()}, s.(*server.TestServer).Gossip())

		reply, err := client.SendWrappedWith(ds, nil, roachpb.Header{
			ReadConsistency: roachpb.INCONSISTENT,
		}, request)
		if err != nil {

		var rows []roachpb.KeyValue
		switch r := reply.(type) {
		case *roachpb.ScanResponse:
			rows = r.Rows
		case *roachpb.ReverseScanResponse:
			rows = r.Rows
			t.Fatalf("unexpected response %T: %v", reply, reply)

		if l := len(rows); l != 1 {
			t.Fatalf("%d: expected 1 row; got %d\n%s", i, l, rows)
		if key := string(rows[0].Key); keys[0] != key {
			t.Errorf("expected key %q; got %q", keys[0], key)
// Test that a lease extension (a RequestLeaseRequest that doesn't change the
// lease holder) is not blocked by ongoing reads.
// Note that lease transfers are blocked by reads through their
// PostCommitTrigger.noConcurrentReads.
func TestLeaseExtensionNotBlockedByRead(t *testing.T) {
	defer leaktest.AfterTest(t)()
	readBlocked := make(chan struct{})
	cmdFilter := func(fArgs storagebase.FilterArgs) *roachpb.Error {
		if fArgs.Hdr.UserPriority == 42 {
			// Signal that the read is blocked.
			readBlocked <- struct{}{}
			// Wait for read to be unblocked.
		return nil
	srv, _, _ := serverutils.StartServer(t,
			Knobs: base.TestingKnobs{
				Store: &storage.StoreTestingKnobs{
					TestingCommandFilter: cmdFilter,
	s := srv.(*server.TestServer)
	defer s.Stopper().Stop()

	// Start a read and wait for it to block.
	key := roachpb.Key("a")
	errChan := make(chan error)
	go func() {
		getReq := roachpb.GetRequest{
			Span: roachpb.Span{
				Key: key,
		if _, pErr := client.SendWrappedWith(s.GetDistSender(), nil,
			roachpb.Header{UserPriority: 42},
			&getReq); pErr != nil {
			errChan <- pErr.GoError()

	select {
	case err := <-errChan:
	case <-readBlocked:
		// Send the lease request.
		// We change the key slightly, otherwise the lease request will be blocked
		// by the read through the command queue.
		// TODO(andrei): don't change the key anymore once lease requests don't go
		// through the command queue any more.
		leaseHdrKey := roachpb.Key(append(key, 0x00))
		rKey, err := keys.Addr(leaseHdrKey)
		if err != nil {
		_, repDesc, err := s.Stores().LookupReplica(rKey, nil)
		if err != nil {
		leaseReq := roachpb.RequestLeaseRequest{
			Span: roachpb.Span{
				Key: leaseHdrKey,
			Lease: roachpb.Lease{
				Start:       s.Clock().Now(),
				StartStasis: s.Clock().Now().Add(time.Second.Nanoseconds(), 0),
				Expiration:  s.Clock().Now().Add(2*time.Second.Nanoseconds(), 0),
				Replica:     repDesc,
		if _, pErr := client.SendWrapped(s.GetDistSender(), nil, &leaseReq); pErr != nil {
		// Unblock the read.
		readBlocked <- struct{}{}
// TestRequestToUninitializedRange tests the behavior when a request
// is sent to a node which should be a replica of the correct range
// but has not yet received its initial snapshot. This would
// previously panic due to a malformed error response from the server,
// as seen in https://github.com/cockroachdb/cockroach/issues/6027.
// Prior to the other changes in the commit that introduced it, this
// test would reliable trigger the panic from #6027. However, it
// relies on some hacky tricks to both trigger the panic and shut down
// cleanly. If this test needs a lot of maintenance in the future we
// should be willing to get rid of it.
func TestRequestToUninitializedRange(t *testing.T) {
	defer leaktest.AfterTest(t)()
	srv, _, _ := serverutils.StartServer(t, base.TestServerArgs{StoresPerNode: 2})
	defer srv.Stopper().Stop()
	s := srv.(*server.TestServer)

	// Choose a range ID that is much larger than any that would be
	// created by initial splits.
	const rangeID = roachpb.RangeID(1000)

	// Set up a range with replicas on two stores of the same node. This
	// ensures that the DistSender will consider both replicas healthy
	// and will try to talk to both (so we can get a non-retryable error
	// from the second store).
	replica1 := roachpb.ReplicaDescriptor{
		NodeID:    1,
		StoreID:   1,
		ReplicaID: 1,
	replica2 := roachpb.ReplicaDescriptor{
		NodeID:    1,
		StoreID:   2,
		ReplicaID: 2,

	// HACK: remove the second store from the node to generate a
	// non-retryable error when we try to talk to it.
	store2, err := s.Stores().GetStore(2)
	if err != nil {

	// Create the uninitialized range by sending an isolated raft
	// message to the first store.
	conn, err := s.RPCContext().GRPCDial(s.ServingAddr())
	if err != nil {
	raftClient := storage.NewMultiRaftClient(conn)
	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()
	stream, err := raftClient.RaftMessage(ctx)
	if err != nil {
	msg := storage.RaftMessageRequest{
		RangeID:     rangeID,
		ToReplica:   replica1,
		FromReplica: replica2,
		Message: raftpb.Message{
			Type: raftpb.MsgApp,
			To:   1,
	if err := stream.Send(&msg); err != nil {

	// Make sure the replica was created.
	store1, err := s.Stores().GetStore(1)
	if err != nil {
	util.SucceedsSoon(t, func() error {
		if replica, err := store1.GetReplica(rangeID); err != nil {
			return errors.Errorf("failed to look up replica: %s", err)
		} else if replica.IsInitialized() {
			return errors.Errorf("expected replica to be uninitialized")
		return nil

	// Create our own DistSender so we can force some requests to the
	// bogus range. The DistSender needs to be in scope for its own
	// MockRangeDescriptorDB closure.
	var sender *kv.DistSender
	sender = kv.NewDistSender(&kv.DistSenderContext{
		Clock:      s.Clock(),
		RPCContext: s.RPCContext(),
		RangeDescriptorDB: kv.MockRangeDescriptorDB(
			func(key roachpb.RKey, considerIntents, useReverseScan bool,
			) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, *roachpb.Error) {
				if key.Equal(roachpb.RKeyMin) {
					// Pass through requests for the first range to the real sender.
					desc, err := sender.FirstRange()
					if err != nil {
						return nil, nil, roachpb.NewError(err)
					return []roachpb.RangeDescriptor{*desc}, nil, nil
				return []roachpb.RangeDescriptor{{
					RangeID:  rangeID,
					StartKey: roachpb.RKey(keys.Meta2Prefix),
					EndKey:   roachpb.RKeyMax,
					Replicas: []roachpb.ReplicaDescriptor{replica1, replica2},
				}}, nil, nil
	}, s.Gossip())
	// Only inconsistent reads triggered the panic in #6027.
	hdr := roachpb.Header{
		ReadConsistency: roachpb.INCONSISTENT,
	req := roachpb.NewGet(roachpb.Key("asdf"))
	// Repeat the test a few times: due to the randomization between the
	// two replicas, each attempt only had a 50% chance of triggering
	// the panic.
	for i := 0; i < 5; i++ {
		_, pErr := client.SendWrappedWith(sender, context.Background(), hdr, req)
		// Each attempt fails with "store 2 not found" because that is the
		// non-retryable error.
		if !testutils.IsPError(pErr, "store 2 not found") {