Example #1
0
// Send forwards the call to the single store. This is a poor man's
// version of kv.TxnCoordSender, but it serves the purposes of
// supporting tests in this package. Transactions are not supported.
// Since kv/ depends on storage/, we can't get access to a
// TxnCoordSender from here.
// TODO(tschottdorf): {kv->storage}.LocalSender
func (db *testSender) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
	if et, ok := ba.GetArg(roachpb.EndTransaction); ok {
		return nil, roachpb.NewError(util.Errorf("%s method not supported", et.Method()))
	}
	// Lookup range and direct request.
	key, endKey := keys.Range(ba)
	rng := db.store.LookupReplica(key, endKey)
	if rng == nil {
		return nil, roachpb.NewError(roachpb.NewRangeKeyMismatchError(key, endKey, nil))
	}
	ba.RangeID = rng.Desc().RangeID
	replica := rng.GetReplica()
	if replica == nil {
		return nil, roachpb.NewError(util.Errorf("own replica missing in range"))
	}
	ba.Replica = *replica
	br, pErr := db.store.Send(ctx, ba)
	if br != nil && br.Error != nil {
		panic(roachpb.ErrorUnexpectedlySet(db.store, br))
	}
	if pErr != nil {
		return nil, pErr
	}
	return br, nil
}
Example #2
0
// SendCallConverted is a wrapped to go from the (ctx,call) interface to the
// one used by batch.Sender.
// TODO(tschottdorf): remove when new proto.Call is gone.
func SendCallConverted(sender BatchSender, ctx context.Context, call proto.Call) {
	call, unwrap := MaybeWrapCall(call)
	defer unwrap(call)

	{
		br := *call.Args.(*proto.BatchRequest)
		if len(br.Requests) == 0 {
			panic(br)
		}
		br.Key, br.EndKey = keys.Range(br)
		if bytes.Equal(br.Key, proto.KeyMax) {
			panic(br)
		}
	}

	reply, err := sender.SendBatch(ctx, *call.Args.(*proto.BatchRequest))

	if reply != nil {
		call.Reply.Reset() // required for BatchRequest (concats response otherwise)
		gogoproto.Merge(call.Reply, reply)
	}
	if call.Reply.Header().GoError() != nil {
		panic(proto.ErrorUnexpectedlySet)
	}
	if err != nil {
		call.Reply.Header().SetGoError(err)
	}
}
Example #3
0
// Send implements the client.Sender interface. The store is looked up from the
// store map if specified by the request; otherwise, the command is being
// executed locally, and the replica is determined via lookup through each
// store's LookupRange method. The latter path is taken only by unit tests.
func (ls *Stores) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
	var store *Store
	var err error

	// If we aren't given a Replica, then a little bending over
	// backwards here. This case applies exclusively to unittests.
	if ba.RangeID == 0 || ba.Replica.StoreID == 0 {
		var repl *roachpb.ReplicaDescriptor
		var rangeID roachpb.RangeID
		rs := keys.Range(ba)
		rangeID, repl, err = ls.lookupReplica(rs.Key, rs.EndKey)
		if err == nil {
			ba.RangeID = rangeID
			ba.Replica = *repl
		}
	}

	ctx = log.Add(ctx,
		log.RangeID, ba.RangeID)

	if err == nil {
		store, err = ls.GetStore(ba.Replica.StoreID)
	}

	if err != nil {
		return nil, roachpb.NewError(err)
	}

	sp, cleanupSp := tracing.SpanFromContext(opStores, store.Tracer(), ctx)
	defer cleanupSp()
	if ba.Txn != nil {
		// For calls that read data within a txn, we keep track of timestamps
		// observed from the various participating nodes' HLC clocks. If we have
		// a timestamp on file for this Node which is smaller than MaxTimestamp,
		// we can lower MaxTimestamp accordingly. If MaxTimestamp drops below
		// OrigTimestamp, we effectively can't see uncertainty restarts any
		// more.
		// Note that it's not an issue if MaxTimestamp propagates back out to
		// the client via a returned Transaction update - when updating a Txn
		// from another, the larger MaxTimestamp wins.
		if maxTS, ok := ba.Txn.GetObservedTimestamp(ba.Replica.NodeID); ok && maxTS.Less(ba.Txn.MaxTimestamp) {
			// Copy-on-write to protect others we might be sharing the Txn with.
			shallowTxn := *ba.Txn
			// The uncertainty window is [OrigTimestamp, maxTS), so if that window
			// is empty, there won't be any uncertainty restarts.
			if !ba.Txn.OrigTimestamp.Less(maxTS) {
				sp.LogEvent("read has no clock uncertainty")
			}
			shallowTxn.MaxTimestamp.Backward(maxTS)
			ba.Txn = &shallowTxn
		}
	}
	br, pErr := store.Send(ctx, ba)
	if br != nil && br.Error != nil {
		panic(roachpb.ErrorUnexpectedlySet(store, br))
	}
	return br, pErr
}
Example #4
0
// Send implements the client.Sender interface. The store is looked up from the
// store map if specified by the request; otherwise, the command is being
// executed locally, and the replica is determined via lookup through each
// store's LookupRange method. The latter path is taken only by unit tests.
func (ls *Stores) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
	sp := tracing.SpanFromContext(ctx)
	var store *Store
	var pErr *roachpb.Error

	// If we aren't given a Replica, then a little bending over
	// backwards here. This case applies exclusively to unittests.
	if ba.RangeID == 0 || ba.Replica.StoreID == 0 {
		var repl *roachpb.ReplicaDescriptor
		var rangeID roachpb.RangeID
		rs := keys.Range(ba)
		rangeID, repl, pErr = ls.lookupReplica(rs.Key, rs.EndKey)
		if pErr == nil {
			ba.RangeID = rangeID
			ba.Replica = *repl
		}
	}

	ctx = log.Add(ctx,
		log.RangeID, ba.RangeID)

	if pErr == nil {
		store, pErr = ls.GetStore(ba.Replica.StoreID)
	}

	var br *roachpb.BatchResponse
	if pErr != nil {
		return nil, pErr
	}
	// For calls that read data within a txn, we can avoid uncertainty
	// related retries in certain situations. If the node is in
	// "CertainNodes", we need not worry about uncertain reads any
	// more. Setting MaxTimestamp=OrigTimestamp for the operation
	// accomplishes that. See roachpb.Transaction.CertainNodes for details.
	if ba.Txn != nil && ba.Txn.CertainNodes.Contains(ba.Replica.NodeID) {
		// MaxTimestamp = Timestamp corresponds to no clock uncertainty.
		sp.LogEvent("read has no clock uncertainty")
		// Copy-on-write to protect others we might be sharing the Txn with.
		shallowTxn := *ba.Txn
		// We set to OrigTimestamp because that works for both SNAPSHOT and
		// SERIALIZABLE: If we used Timestamp instead, we could run into
		// unnecessary retries at SNAPSHOT. For example, a SNAPSHOT txn at
		// OrigTimestamp = 1000.0, Timestamp = 2000.0, MaxTimestamp = 3000.0
		// will always read at 1000, so a MaxTimestamp of 2000 will still let
		// it restart with uncertainty when it finds a value in (1000, 2000).
		shallowTxn.MaxTimestamp = ba.Txn.OrigTimestamp
		ba.Txn = &shallowTxn
	}
	br, pErr = store.Send(ctx, ba)
	if br != nil && br.Error != nil {
		panic(roachpb.ErrorUnexpectedlySet(store, br))
	}
	return br, pErr
}
// TestRetryOnWrongReplicaError sets up a DistSender on a minimal gossip
// network and a mock of rpc.Send, and verifies that the DistSender correctly
// retries upon encountering a stale entry in its range descriptor cache.
func TestRetryOnWrongReplicaError(t *testing.T) {
	defer leaktest.AfterTest(t)
	g, s := makeTestGossip(t)
	defer s()
	// Updated below, after it has first been returned.
	badStartKey := roachpb.RKey("m")
	newRangeDescriptor := testRangeDescriptor
	goodStartKey := newRangeDescriptor.StartKey
	newRangeDescriptor.StartKey = badStartKey
	descStale := true

	var testFn rpcSendFn = func(_ rpc.Options, method string, addrs []net.Addr, getArgs func(addr net.Addr) proto.Message, getReply func() proto.Message, _ *rpc.Context) ([]proto.Message, error) {
		ba := getArgs(testAddress).(*roachpb.BatchRequest)
		rs := keys.Range(*ba)
		if _, ok := ba.GetArg(roachpb.RangeLookup); ok {
			if !descStale && bytes.HasPrefix(rs.Key, keys.Meta2Prefix) {
				t.Errorf("unexpected extra lookup for non-stale replica descriptor at %s",
					rs.Key)
			}

			br := getReply().(*roachpb.BatchResponse)
			r := &roachpb.RangeLookupResponse{}
			r.Ranges = append(r.Ranges, newRangeDescriptor)
			br.Add(r)
			// If we just returned the stale descriptor, set up returning the
			// good one next time.
			if bytes.HasPrefix(rs.Key, keys.Meta2Prefix) {
				if newRangeDescriptor.StartKey.Equal(badStartKey) {
					newRangeDescriptor.StartKey = goodStartKey
				} else {
					descStale = false
				}
			}
			return []proto.Message{br}, nil
		}
		// When the Scan first turns up, update the descriptor for future
		// range descriptor lookups.
		if !newRangeDescriptor.StartKey.Equal(goodStartKey) {
			return nil, &roachpb.RangeKeyMismatchError{RequestStartKey: rs.Key.AsRawKey(),
				RequestEndKey: rs.EndKey.AsRawKey()}
		}
		return []proto.Message{ba.CreateReply()}, nil
	}

	ctx := &DistSenderContext{
		RPCSend: testFn,
	}
	ds := NewDistSender(ctx, g)
	scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("d"), 0)
	if _, err := client.SendWrapped(ds, nil, scan); err != nil {
		t.Errorf("scan encountered error: %s", err)
	}
}
Example #6
0
// Send implements the client.Sender interface. The store is looked up from the
// store map if specified by the request; otherwise, the command is being
// executed locally, and the replica is determined via lookup through each
// store's LookupRange method. The latter path is taken only by unit tests.
func (ls *Stores) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
	sp := tracing.SpanFromContext(ctx)
	var store *Store
	var pErr *roachpb.Error

	// If we aren't given a Replica, then a little bending over
	// backwards here. This case applies exclusively to unittests.
	if ba.RangeID == 0 || ba.Replica.StoreID == 0 {
		var repl *roachpb.ReplicaDescriptor
		var rangeID roachpb.RangeID
		rs := keys.Range(ba)
		rangeID, repl, pErr = ls.lookupReplica(rs.Key, rs.EndKey)
		if pErr == nil {
			ba.RangeID = rangeID
			ba.Replica = *repl
		}
	}

	ctx = log.Add(ctx,
		log.RangeID, ba.RangeID)

	if pErr == nil {
		store, pErr = ls.GetStore(ba.Replica.StoreID)
	}

	var br *roachpb.BatchResponse
	if pErr != nil {
		return nil, pErr
	}
	// For calls that read data within a txn, we can avoid uncertainty
	// related retries in certain situations. If the node is in
	// "CertainNodes", we need not worry about uncertain reads any
	// more. Setting MaxTimestamp=Timestamp for the operation
	// accomplishes that. See roachpb.Transaction.CertainNodes for details.
	if ba.Txn != nil && ba.Txn.CertainNodes.Contains(ba.Replica.NodeID) {
		// MaxTimestamp = Timestamp corresponds to no clock uncertainty.
		sp.LogEvent("read has no clock uncertainty")
		// Copy-on-write to protect others we might be sharing the Txn with.
		shallowTxn := *ba.Txn
		shallowTxn.MaxTimestamp = ba.Txn.Timestamp
		ba.Txn = &shallowTxn
	}
	br, pErr = store.Send(ctx, ba)
	if br != nil && br.Error != nil {
		panic(roachpb.ErrorUnexpectedlySet(store, br))
	}
	return br, pErr
}
Example #7
0
// Send implements the client.Sender interface. The store is looked up from the
// store map if specified by the request; otherwise, the command is being
// executed locally, and the replica is determined via lookup through each
// store's LookupRange method. The latter path is taken only by unit tests.
func (ls *LocalSender) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
	trace := tracer.FromCtx(ctx)
	var store *storage.Store
	var err error

	// If we aren't given a Replica, then a little bending over
	// backwards here. This case applies exclusively to unittests.
	if ba.RangeID == 0 || ba.Replica.StoreID == 0 {
		var repl *roachpb.ReplicaDescriptor
		var rangeID roachpb.RangeID
		key, endKey := keys.Range(ba)
		rangeID, repl, err = ls.lookupReplica(key, endKey)
		if err == nil {
			ba.RangeID = rangeID
			ba.Replica = *repl
		}
	}

	ctx = log.Add(ctx,
		log.RangeID, ba.RangeID)

	if err == nil {
		store, err = ls.GetStore(ba.Replica.StoreID)
	}

	var br *roachpb.BatchResponse
	if err != nil {
		return nil, roachpb.NewError(err)
	}
	// For calls that read data within a txn, we can avoid uncertainty
	// related retries in certain situations. If the node is in
	// "CertainNodes", we need not worry about uncertain reads any
	// more. Setting MaxTimestamp=Timestamp for the operation
	// accomplishes that. See roachpb.Transaction.CertainNodes for details.
	if ba.Txn != nil && ba.Txn.CertainNodes.Contains(ba.Replica.NodeID) {
		// MaxTimestamp = Timestamp corresponds to no clock uncertainty.
		trace.Event("read has no clock uncertainty")
		ba.Txn.MaxTimestamp = ba.Txn.Timestamp
	}
	br, pErr := store.Send(ctx, ba)
	if br != nil && br.Error != nil {
		panic(roachpb.ErrorUnexpectedlySet(store, br))
	}
	return br, pErr
}
// TestSequenceUpdateOnMultiRangeQueryLoop reproduces #3206 and
// verifies that the sequence is updated in the DistSender
// multi-range-query loop.
//
// More specifically, the issue was that DistSender might send
// multiple batch requests to the same replica when it finds a
// post-split range descriptor in the cache while the split has not
// yet been fully completed. By giving a higher sequence to the second
// request, we can avoid an infinite txn restart error (otherwise
// caused by hitting the sequence cache).
func TestSequenceUpdateOnMultiRangeQueryLoop(t *testing.T) {
	defer leaktest.AfterTest(t)
	g, s := makeTestGossip(t)
	defer s()

	if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{NodeID: 1}); err != nil {
		t.Fatal(err)
	}
	nd := &roachpb.NodeDescriptor{
		NodeID:  roachpb.NodeID(1),
		Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()),
	}
	if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil {
		t.Fatal(err)

	}

	// Fill mockRangeDescriptorDB with two descriptors.
	var descriptor1 = roachpb.RangeDescriptor{
		RangeID:  1,
		StartKey: roachpb.RKeyMin,
		EndKey:   roachpb.RKey("b"),
		Replicas: []roachpb.ReplicaDescriptor{
			{
				NodeID:  1,
				StoreID: 1,
			},
		},
	}
	var descriptor2 = roachpb.RangeDescriptor{
		RangeID:  2,
		StartKey: roachpb.RKey("b"),
		EndKey:   roachpb.RKey("c"),
		Replicas: []roachpb.ReplicaDescriptor{
			{
				NodeID:  1,
				StoreID: 1,
			},
		},
	}
	descDB := mockRangeDescriptorDB(func(key roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) {
		desc := descriptor1
		if key.Equal(roachpb.RKey("b")) {
			desc = descriptor2
		}
		return []roachpb.RangeDescriptor{desc}, nil
	})

	// Define our rpcSend stub which checks the span of the batch
	// requests. The first request should be the point request on
	// "a". The second request should be on "b". The sequence of the
	// second request will be incremented by one from that of the
	// first request.
	first := true
	var firstSequence uint32
	var testFn rpcSendFn = func(_ rpc.Options, method string, addrs []net.Addr, getArgs func(addr net.Addr) proto.Message, getReply func() proto.Message, _ *rpc.Context) ([]proto.Message, error) {
		if method != "Node.Batch" {
			return nil, util.Errorf("unexpected method %v", method)
		}

		ba := getArgs(testAddress).(*roachpb.BatchRequest)
		rs := keys.Range(*ba)
		if first {
			if !(rs.Key.Equal(roachpb.RKey("a")) && rs.EndKey.Equal(roachpb.RKey("a").Next())) {
				t.Errorf("unexpected span [%s,%s)", rs.Key, rs.EndKey)
			}
			first = false
			firstSequence = ba.Txn.Sequence
		} else {
			if !(rs.Key.Equal(roachpb.RKey("b")) && rs.EndKey.Equal(roachpb.RKey("b").Next())) {
				t.Errorf("unexpected span [%s,%s)", rs.Key, rs.EndKey)
			}
			if ba.Txn.Sequence != firstSequence+1 {
				t.Errorf("unexpected sequence; expected %d, but got %d", firstSequence+1, ba.Txn.Sequence)
			}
		}
		return []proto.Message{ba.CreateReply()}, nil
	}

	ctx := &DistSenderContext{
		RPCSend:           testFn,
		RangeDescriptorDB: descDB,
	}
	ds := NewDistSender(ctx, g)

	// Send a batch request containing two puts.
	var ba roachpb.BatchRequest
	ba.Txn = &roachpb.Transaction{Name: "test"}
	val := roachpb.MakeValueFromString("val")
	ba.Add(roachpb.NewPut(roachpb.Key("a"), val).(*roachpb.PutRequest))
	ba.Add(roachpb.NewPut(roachpb.Key("b"), val).(*roachpb.PutRequest))

	_, pErr := ds.Send(context.Background(), ba)
	if err := pErr.GoError(); err != nil {
		t.Fatal(err)
	}
}
// TestTruncateWithSpanAndDescriptor verifies that a batch request is truncated with a
// range span and the range of a descriptor found in cache.
func TestTruncateWithSpanAndDescriptor(t *testing.T) {
	defer leaktest.AfterTest(t)
	g, s := makeTestGossip(t)
	defer s()

	g.SetNodeID(1)
	if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{NodeID: 1}); err != nil {
		t.Fatal(err)
	}
	nd := &roachpb.NodeDescriptor{
		NodeID:  roachpb.NodeID(1),
		Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()),
	}
	if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil {
		t.Fatal(err)
	}

	// Fill mockRangeDescriptorDB with two descriptors. When a
	// range descriptor is looked up by key "b", return the second
	// descriptor whose range is ["a", "c") and partially overlaps
	// with the first descriptor's range.
	var descriptor1 = roachpb.RangeDescriptor{
		RangeID:  1,
		StartKey: roachpb.RKeyMin,
		EndKey:   roachpb.RKey("b"),
		Replicas: []roachpb.ReplicaDescriptor{
			{
				NodeID:  1,
				StoreID: 1,
			},
		},
	}
	var descriptor2 = roachpb.RangeDescriptor{
		RangeID:  2,
		StartKey: roachpb.RKey("a"),
		EndKey:   roachpb.RKey("c"),
		Replicas: []roachpb.ReplicaDescriptor{
			{
				NodeID:  1,
				StoreID: 1,
			},
		},
	}
	descDB := mockRangeDescriptorDB(func(key roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) {
		desc := descriptor1
		if key.Equal(roachpb.RKey("b")) {
			desc = descriptor2
		}
		return []roachpb.RangeDescriptor{desc}, nil
	})

	// Define our rpcSend stub which checks the span of the batch
	// requests. The first request should be the point request on
	// "a". The second request should be on "b".
	first := true
	var testFn rpcSendFn = func(_ rpc.Options, method string, addrs []net.Addr, getArgs func(addr net.Addr) proto.Message, getReply func() proto.Message, _ *rpc.Context) ([]proto.Message, error) {
		if method != "Node.Batch" {
			return nil, util.Errorf("unexpected method %v", method)
		}

		ba := getArgs(testAddress).(*roachpb.BatchRequest)
		rs := keys.Range(*ba)
		if first {
			if !(rs.Key.Equal(roachpb.RKey("a")) && rs.EndKey.Equal(roachpb.RKey("a").Next())) {
				t.Errorf("Unexpected span [%s,%s)", rs.Key, rs.EndKey)
			}
			first = false
		} else {
			if !(rs.Key.Equal(roachpb.RKey("b")) && rs.EndKey.Equal(roachpb.RKey("b").Next())) {
				t.Errorf("Unexpected span [%s,%s)", rs.Key, rs.EndKey)
			}
		}

		batchReply := getReply().(*roachpb.BatchResponse)
		reply := &roachpb.PutResponse{}
		batchReply.Add(reply)
		return []proto.Message{batchReply}, nil
	}

	ctx := &DistSenderContext{
		RPCSend:           testFn,
		RangeDescriptorDB: descDB,
	}
	ds := NewDistSender(ctx, g)

	// Send a batch request contains two puts. In the first
	// attempt, the range of the descriptor found in the cache is
	// ["a", "b"). The request is truncated to contain only the put
	// on "a".
	//
	// In the second attempt, The range of the descriptor found in
	// the cache is ["a", c"), but the put on "a" will not be
	// resent. The request is truncated to contain only the put on "b".
	ba := roachpb.BatchRequest{}
	ba.Txn = &roachpb.Transaction{Name: "test"}
	val := roachpb.MakeValueFromString("val")
	ba.Add(roachpb.NewPut(keys.RangeTreeNodeKey(roachpb.RKey("a")), val).(*roachpb.PutRequest))
	ba.Add(roachpb.NewPut(keys.RangeTreeNodeKey(roachpb.RKey("b")), val).(*roachpb.PutRequest))

	_, pErr := ds.Send(context.Background(), ba)
	if err := pErr.GoError(); err != nil {
		t.Fatal(err)
	}
}
Example #10
0
// TestMultiRangeMergeStaleDescriptor simulates the situation in which the
// DistSender executes a multi-range scan which encounters the stale descriptor
// of a range which has since incorporated its right neighbor by means of a
// merge. It is verified that the DistSender scans the correct keyrange exactly
// once.
func TestMultiRangeMergeStaleDescriptor(t *testing.T) {
	defer leaktest.AfterTest(t)
	g, s := makeTestGossip(t)
	defer s()
	// Assume we have two ranges, [a-b) and [b-KeyMax).
	merged := false
	// The stale first range descriptor which is unaware of the merge.
	var FirstRange = roachpb.RangeDescriptor{
		RangeID:  1,
		StartKey: roachpb.RKey("a"),
		EndKey:   roachpb.RKey("b"),
		Replicas: []roachpb.ReplicaDescriptor{
			{
				NodeID:  1,
				StoreID: 1,
			},
		},
	}
	// The merged descriptor, which will be looked up after having processed
	// the stale range [a,b).
	var mergedRange = roachpb.RangeDescriptor{
		RangeID:  1,
		StartKey: roachpb.RKey("a"),
		EndKey:   roachpb.RKeyMax,
		Replicas: []roachpb.ReplicaDescriptor{
			{
				NodeID:  1,
				StoreID: 1,
			},
		},
	}
	// Assume we have two key-value pairs, a=1 and c=2.
	existingKVs := []roachpb.KeyValue{
		{Key: roachpb.Key("a"), Value: roachpb.MakeValueFromString("1")},
		{Key: roachpb.Key("c"), Value: roachpb.MakeValueFromString("2")},
	}
	var testFn rpcSendFn = func(_ rpc.Options, method string, addrs []net.Addr, getArgs func(addr net.Addr) proto.Message, getReply func() proto.Message, _ *rpc.Context) ([]proto.Message, error) {
		if method != "Node.Batch" {
			t.Fatalf("unexpected method:%s", method)
		}
		ba := getArgs(testAddress).(*roachpb.BatchRequest)
		rs := keys.Range(*ba)
		batchReply := getReply().(*roachpb.BatchResponse)
		reply := &roachpb.ScanResponse{}
		batchReply.Add(reply)
		results := []roachpb.KeyValue{}
		for _, curKV := range existingKVs {
			if rs.Key.Less(keys.Addr(curKV.Key).Next()) && keys.Addr(curKV.Key).Less(rs.EndKey) {
				results = append(results, curKV)
			}
		}
		reply.Rows = results
		return []proto.Message{batchReply}, nil
	}
	ctx := &DistSenderContext{
		RPCSend: testFn,
		RangeDescriptorDB: mockRangeDescriptorDB(func(key roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) {
			if !merged {
				// Assume a range merge operation happened.
				merged = true
				return []roachpb.RangeDescriptor{FirstRange}, nil
			}
			return []roachpb.RangeDescriptor{mergedRange}, nil
		}),
	}
	ds := NewDistSender(ctx, g)
	scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("d"), 10).(*roachpb.ScanRequest)
	// Set the Txn info to avoid an OpRequiresTxnError.
	reply, err := client.SendWrappedWith(ds, nil, roachpb.Header{
		Txn: &roachpb.Transaction{},
	}, scan)
	if err != nil {
		t.Fatalf("scan encountered error: %s", err)
	}
	sr := reply.(*roachpb.ScanResponse)
	if !reflect.DeepEqual(existingKVs, sr.Rows) {
		t.Fatalf("expect get %v, actual get %v", existingKVs, sr.Rows)
	}
}
Example #11
0
// sendChunk is in charge of sending an "admissible" piece of batch, i.e. one
// which doesn't need to be subdivided further before going to a range (so no
// mixing of forward and reverse scans, etc). The parameters and return values
// correspond to client.Sender with the exception of the returned boolean,
// which is true when indicating that the caller should retry but needs to send
// EndTransaction in a separate request.
func (ds *DistSender) sendChunk(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error, bool) {
	isReverse := ba.IsReverse()

	trace := tracer.FromCtx(ctx)

	// The minimal key range encompassing all requests contained within.
	// Local addressing has already been resolved.
	// TODO(tschottdorf): consider rudimentary validation of the batch here
	// (for example, non-range requests with EndKey, or empty key ranges).
	rs := keys.Range(ba)
	var br *roachpb.BatchResponse
	// Send the request to one range per iteration.
	for {
		considerIntents := false
		var curReply *roachpb.BatchResponse
		var desc *roachpb.RangeDescriptor
		var needAnother bool
		var pErr *roachpb.Error
		for r := retry.Start(ds.rpcRetryOptions); r.Next(); {
			// Get range descriptor (or, when spanning range, descriptors). Our
			// error handling below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			descDone := trace.Epoch("meta descriptor lookup")
			var evictDesc func()
			desc, needAnother, evictDesc, pErr = ds.getDescriptors(rs, considerIntents, isReverse)
			descDone()

			// getDescriptors may fail retryably if the first range isn't
			// available via Gossip.
			if pErr != nil {
				if pErr.Retryable {
					if log.V(1) {
						log.Warning(pErr)
					}
					continue
				}
				break
			}

			if needAnother && br == nil {
				// TODO(tschottdorf): we should have a mechanism for discovering
				// range merges (descriptor staleness will mostly go unnoticed),
				// or we'll be turning single-range queries into multi-range
				// queries for no good reason.

				// If there's no transaction and op spans ranges, possibly
				// re-run as part of a transaction for consistency. The
				// case where we don't need to re-run is if the read
				// consistency is not required.
				if ba.Txn == nil && ba.IsPossibleTransaction() &&
					ba.ReadConsistency != roachpb.INCONSISTENT {
					return nil, roachpb.NewError(&roachpb.OpRequiresTxnError{}), false
				}
				// If the request is more than but ends with EndTransaction, we
				// want the caller to come again with the EndTransaction in an
				// extra call.
				if l := len(ba.Requests) - 1; l > 0 && ba.Requests[l].GetInner().Method() == roachpb.EndTransaction {
					return nil, roachpb.NewError(errors.New("cannot send 1PC txn to multiple ranges")), true /* shouldSplitET */
				}
			}

			// It's possible that the returned descriptor misses parts of the
			// keys it's supposed to scan after it's truncated to match the
			// descriptor. Example revscan [a,g), first desc lookup for "g"
			// returns descriptor [c,d) -> [d,g) is never scanned.
			// We evict and retry in such a case.
			if (isReverse && !desc.ContainsKeyRange(desc.StartKey, rs.EndKey)) || (!isReverse && !desc.ContainsKeyRange(rs.Key, desc.EndKey)) {
				evictDesc()
				continue
			}

			curReply, pErr = func() (*roachpb.BatchResponse, *roachpb.Error) {
				// Truncate the request to our current key range.
				intersected, iErr := rs.Intersect(desc)
				if iErr != nil {
					return nil, roachpb.NewError(iErr)
				}
				truncBA, numActive, trErr := truncate(ba, intersected)
				if numActive == 0 && trErr == nil {
					// This shouldn't happen in the wild, but some tests
					// exercise it.
					return nil, roachpb.NewErrorf("truncation resulted in empty batch on [%s,%s): %s",
						rs.Key, rs.EndKey, ba)
				}
				if trErr != nil {
					return nil, roachpb.NewError(trErr)
				}

				return ds.sendSingleRange(trace, truncBA, desc)
			}()
			// If sending succeeded, break this loop.
			if pErr == nil {
				break
			}

			if log.V(1) {
				log.Warningf("failed to invoke %s: %s", ba, pErr)
			}
			trace.Event(fmt.Sprintf("reply error: %T", pErr.GoError()))

			// Error handling below.
			// If retryable, allow retry. For range not found or range
			// key mismatch errors, we don't backoff on the retry,
			// but reset the backoff loop so we can retry immediately.
			switch tErr := pErr.GoError().(type) {
			case *roachpb.SendError:
				// For an RPC error to occur, we must've been unable to contact
				// any replicas. In this case, likely all nodes are down (or
				// not getting back to us within a reasonable amount of time).
				// We may simply not be trying to talk to the up-to-date
				// replicas, so clearing the descriptor here should be a good
				// idea.
				// TODO(tschottdorf): If a replica group goes dead, this
				// will cause clients to put high read pressure on the first
				// range, so there should be some rate limiting here.
				evictDesc()
				if tErr.CanRetry() {
					continue
				}
			case *roachpb.RangeNotFoundError, *roachpb.RangeKeyMismatchError:
				// Range descriptor might be out of date - evict it.
				evictDesc()
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				if log.V(1) {
					log.Warning(tErr)
				}
				// On retries, allow [uncommitted] intents on range descriptor
				// lookups to be returned 50% of the time in order to succeed
				// at finding the transaction record pointed to by the intent
				// itself. The 50% probability of returning either the current
				// intent or the previously committed value balances between
				// the two cases where the intent's txn hasn't yet been
				// committed (the previous value is correct), or the intent's
				// txn has been committed (the intent value is correct).
				considerIntents = true
				continue
			case *roachpb.NotLeaderError:
				newLeader := tErr.Leader
				// Verify that leader is a known replica according to the
				// descriptor. If not, we've got a stale replica; evict cache.
				// Next, cache the new leader.
				if newLeader != nil {
					if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 {
						if log.V(1) {
							log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc)
						}
						evictDesc()
					}
				} else {
					newLeader = &roachpb.ReplicaDescriptor{}
				}
				ds.updateLeaderCache(roachpb.RangeID(desc.RangeID), *newLeader)
				if log.V(1) {
					log.Warning(tErr)
				}
				r.Reset()
				continue
			case retry.Retryable:
				if tErr.CanRetry() {
					if log.V(1) {
						log.Warning(tErr)
					}
					continue
				}
			}
			break
		}

		// Immediately return if querying a range failed non-retryably.
		if pErr != nil {
			return nil, pErr, false
		}

		ba.Txn.Update(curReply.Txn)

		if br == nil {
			// First response from a Range.
			br = curReply
		} else {
			// This was the second or later call in a cross-Range request.
			// Combine the new response with the existing one.
			if err := br.Combine(curReply); err != nil {
				return nil, roachpb.NewError(err), false
			}
		}

		// If this request has a bound (such as MaxResults in
		// ScanRequest) and we are going to query at least one more range,
		// check whether enough rows have been retrieved.
		// TODO(tschottdorf): need tests for executing a multi-range batch
		// with various bounded requests which saturate at different times.
		if needAnother {
			// Start with the assumption that all requests are saturated.
			// Below, we look at each and decide whether that's true.
			// Everything that is indeed saturated is "masked out" from the
			// batch request; only if that's all requests does needAnother
			// remain false.
			needAnother = false
			if br == nil {
				// Clone ba.Requests. This is because we're multi-range, and
				// some requests may be bounded, which could lead to them being
				// masked out once they're saturated. We don't want to risk
				// removing requests that way in the "master copy" since that
				// could lead to omitting requests in certain retry scenarios.
				ba.Requests = append([]roachpb.RequestUnion(nil), ba.Requests...)
			}
			for i, union := range ba.Requests {
				args := union.GetInner()
				if _, ok := args.(*roachpb.NoopRequest); ok {
					// NoopRequests are skipped.
					continue
				}
				boundedArg, ok := args.(roachpb.Bounded)
				if !ok {
					// Non-bounded request. We will have to query all ranges.
					needAnother = true
					continue
				}
				prevBound := boundedArg.GetBound()
				cReply, ok := curReply.Responses[i].GetInner().(roachpb.Countable)
				if !ok || prevBound <= 0 {
					// Request bounded, but without max results. Again, will
					// need to query everything we can. The case in which the reply
					// isn't countable occurs when the request wasn't active for
					// that range (since it didn't apply to it), so the response
					// is a NoopResponse.
					needAnother = true
					continue
				}
				nextBound := prevBound - cReply.Count()
				if nextBound <= 0 {
					// We've hit max results for this piece of the batch. Mask
					// it out (we've copied the requests slice above, so this
					// is kosher).
					ba.Requests[i].Reset() // necessary (no one-of?)
					if !ba.Requests[i].SetValue(&roachpb.NoopRequest{}) {
						panic("RequestUnion excludes NoopRequest")
					}
					continue
				}
				// The request isn't saturated yet.
				needAnother = true
				boundedArg.SetBound(nextBound)
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if !needAnother {
			return br, nil, false
		}

		if isReverse {
			// In next iteration, query previous range.
			// We use the StartKey of the current descriptor as opposed to the
			// EndKey of the previous one since that doesn't have bugs when
			// stale descriptors come into play.
			rs.EndKey = prev(ba, desc.StartKey)
		} else {
			// In next iteration, query next range.
			// It's important that we use the EndKey of the current descriptor
			// as opposed to the StartKey of the next one: if the former is stale,
			// it's possible that the next range has since merged the subsequent
			// one, and unless both descriptors are stale, the next descriptor's
			// StartKey would move us to the beginning of the current range,
			// resulting in a duplicate scan.
			rs.Key = next(ba, desc.EndKey)
		}
		trace.Event("querying next range")
	}
}
Example #12
0
// sendChunk is in charge of sending an "admissible" piece of batch, i.e. one
// which doesn't need to be subdivided further before going to a range (so no
// mixing of forward and reverse scans, etc). The parameters and return values
// correspond to client.Sender with the exception of the returned boolean,
// which is true when indicating that the caller should retry but needs to send
// EndTransaction in a separate request.
func (ds *DistSender) sendChunk(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error, bool) {
	isReverse := ba.IsReverse()

	// TODO(radu): when contexts are properly plumbed, we should be able to get
	// the tracer from ctx, not from the DistSender.
	ctx, cleanup := tracing.EnsureContext(ctx, tracing.TracerFromCtx(ds.Ctx))
	defer cleanup()

	// The minimal key range encompassing all requests contained within.
	// Local addressing has already been resolved.
	// TODO(tschottdorf): consider rudimentary validation of the batch here
	// (for example, non-range requests with EndKey, or empty key ranges).
	rs, err := keys.Range(ba)
	if err != nil {
		return nil, roachpb.NewError(err), false
	}
	var br *roachpb.BatchResponse

	// Send the request to one range per iteration.
	for {
		// Increase the sequence counter only once before sending RPCs to
		// the ranges involved in this chunk of the batch (as opposed to for
		// each RPC individually). On RPC errors, there's no guarantee that
		// the request hasn't made its way to the target regardless of the
		// error; we'd like the second execution to be caught by the sequence
		// cache if that happens. There is a small chance that that we address
		// a range twice in this chunk (stale/suboptimal descriptors due to
		// splits/merges) which leads to a transaction retry.
		// TODO(tschottdorf): it's possible that if we don't evict from the
		//   cache we could be in for a busy loop.
		ba.SetNewRequest()

		var curReply *roachpb.BatchResponse
		var desc *roachpb.RangeDescriptor
		var evictToken *evictionToken
		var needAnother bool
		var pErr *roachpb.Error
		var finished bool
		var numAttempts int
		for r := retry.StartWithCtx(ctx, ds.rpcRetryOptions); r.Next(); {
			numAttempts++
			{
				const magicLogCurAttempt = 20

				var seq int32
				if ba.Txn != nil {
					seq = ba.Txn.Sequence
				}

				if numAttempts%magicLogCurAttempt == 0 || seq%magicLogCurAttempt == 0 {
					// Log a message if a request appears to get stuck for a long
					// time or, potentially, forever. See #8975.
					// The local counter captures this loop here; the Sequence number
					// should capture anything higher up (as it needs to be
					// incremented every time this method is called).
					log.Warningf(
						ctx,
						"%d retries for an RPC at sequence %d, last error was: %s, remaining key ranges %s: %s",
						numAttempts, seq, pErr, rs, ba,
					)
				}
			}
			// Get range descriptor (or, when spanning range, descriptors). Our
			// error handling below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			log.Trace(ctx, "meta descriptor lookup")
			var err error
			desc, needAnother, evictToken, err = ds.getDescriptors(ctx, rs, evictToken, isReverse)

			// getDescriptors may fail retryably if, for example, the first
			// range isn't available via Gossip. Assume that all errors at
			// this level are retryable. Non-retryable errors would be for
			// things like malformed requests which we should have checked
			// for before reaching this point.
			if err != nil {
				log.Trace(ctx, "range descriptor lookup failed: "+err.Error())
				if log.V(1) {
					log.Warning(ctx, err)
				}
				pErr = roachpb.NewError(err)
				continue
			}

			if needAnother && br == nil {
				// TODO(tschottdorf): we should have a mechanism for discovering
				// range merges (descriptor staleness will mostly go unnoticed),
				// or we'll be turning single-range queries into multi-range
				// queries for no good reason.

				// If there's no transaction and op spans ranges, possibly
				// re-run as part of a transaction for consistency. The
				// case where we don't need to re-run is if the read
				// consistency is not required.
				if ba.Txn == nil && ba.IsPossibleTransaction() &&
					ba.ReadConsistency != roachpb.INCONSISTENT {
					return nil, roachpb.NewError(&roachpb.OpRequiresTxnError{}), false
				}
				// If the request is more than but ends with EndTransaction, we
				// want the caller to come again with the EndTransaction in an
				// extra call.
				if l := len(ba.Requests) - 1; l > 0 && ba.Requests[l].GetInner().Method() == roachpb.EndTransaction {
					return nil, roachpb.NewError(errors.New("cannot send 1PC txn to multiple ranges")), true /* shouldSplitET */
				}
			}

			// It's possible that the returned descriptor misses parts of the
			// keys it's supposed to scan after it's truncated to match the
			// descriptor. Example revscan [a,g), first desc lookup for "g"
			// returns descriptor [c,d) -> [d,g) is never scanned.
			// We evict and retry in such a case.
			includesFrontOfCurSpan := func(rd *roachpb.RangeDescriptor) bool {
				if isReverse {
					return desc.ContainsExclusiveEndKey(rs.EndKey)
				}
				return desc.ContainsKey(rs.Key)
			}
			if !includesFrontOfCurSpan(desc) {
				if err := evictToken.Evict(ctx); err != nil {
					return nil, roachpb.NewError(err), false
				}
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				continue
			}

			curReply, pErr = func() (*roachpb.BatchResponse, *roachpb.Error) {
				// Truncate the request to our current key range.
				intersected, iErr := rs.Intersect(desc)
				if iErr != nil {
					return nil, roachpb.NewError(iErr)
				}
				truncBA, numActive, trErr := truncate(ba, intersected)
				if numActive == 0 && trErr == nil {
					// This shouldn't happen in the wild, but some tests
					// exercise it.
					return nil, roachpb.NewErrorf("truncation resulted in empty batch on [%s,%s): %s",
						rs.Key, rs.EndKey, ba)
				}
				if trErr != nil {
					return nil, roachpb.NewError(trErr)
				}
				return ds.sendSingleRange(ctx, truncBA, desc)
			}()
			// If sending succeeded, break this loop.
			if pErr == nil {
				finished = true
				break
			}

			log.VTracef(1, ctx, "reply error %s: %s", ba, pErr)

			// Error handling: If the error indicates that our range
			// descriptor is out of date, evict it from the cache and try
			// again. Errors that apply only to a single replica were
			// handled in send().
			//
			// TODO(bdarnell): Don't retry endlessly. If we fail twice in a
			// row and the range descriptor hasn't changed, return the error
			// to our caller.
			switch tErr := pErr.GetDetail().(type) {
			case *roachpb.SendError:
				// We've tried all the replicas without success. Either
				// they're all down, or we're using an out-of-date range
				// descriptor. Invalidate the cache and try again with the new
				// metadata.
				if err := evictToken.Evict(ctx); err != nil {
					return nil, roachpb.NewError(err), false
				}
				continue
			case *roachpb.RangeKeyMismatchError:
				// Range descriptor might be out of date - evict it. This is
				// likely the result of a range split. If we have new range
				// descriptors, insert them instead as long as they are different
				// from the last descriptor to avoid endless loops.
				var replacements []roachpb.RangeDescriptor
				different := func(rd *roachpb.RangeDescriptor) bool {
					return !desc.RSpan().Equal(rd.RSpan())
				}
				if tErr.MismatchedRange != nil && different(tErr.MismatchedRange) {
					replacements = append(replacements, *tErr.MismatchedRange)
				}
				if tErr.SuggestedRange != nil && different(tErr.SuggestedRange) {
					if includesFrontOfCurSpan(tErr.SuggestedRange) {
						replacements = append(replacements, *tErr.SuggestedRange)

					}
				}
				// Same as Evict() if replacements is empty.
				if err := evictToken.EvictAndReplace(ctx, replacements...); err != nil {
					return nil, roachpb.NewError(err), false
				}
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				if log.V(1) {
					log.Warning(ctx, tErr)
				}
				continue
			}
			break
		}

		// Immediately return if querying a range failed non-retryably.
		if pErr != nil {
			return nil, pErr, false
		} else if !finished {
			select {
			case <-ds.rpcRetryOptions.Closer:
				return nil, roachpb.NewError(&roachpb.NodeUnavailableError{}), false
			case <-ctx.Done():
				return nil, roachpb.NewError(ctx.Err()), false
			default:
				log.Fatal(ctx, "exited retry loop with nil error but finished=false")
			}
		}

		ba.UpdateTxn(curReply.Txn)

		if br == nil {
			// First response from a Range.
			br = curReply
		} else {
			// This was the second or later call in a cross-Range request.
			// Combine the new response with the existing one.
			if err := br.Combine(curReply); err != nil {
				return nil, roachpb.NewError(err), false
			}
		}

		if isReverse {
			// In next iteration, query previous range.
			// We use the StartKey of the current descriptor as opposed to the
			// EndKey of the previous one since that doesn't have bugs when
			// stale descriptors come into play.
			rs.EndKey, err = prev(ba, desc.StartKey)
		} else {
			// In next iteration, query next range.
			// It's important that we use the EndKey of the current descriptor
			// as opposed to the StartKey of the next one: if the former is stale,
			// it's possible that the next range has since merged the subsequent
			// one, and unless both descriptors are stale, the next descriptor's
			// StartKey would move us to the beginning of the current range,
			// resulting in a duplicate scan.
			rs.Key, err = next(ba, desc.EndKey)
		}
		if err != nil {
			return nil, roachpb.NewError(err), false
		}

		if ba.MaxSpanRequestKeys > 0 {
			// Count how many results we received.
			var numResults int64
			for _, resp := range curReply.Responses {
				numResults += resp.GetInner().Header().NumKeys
			}
			if numResults > ba.MaxSpanRequestKeys {
				panic(fmt.Sprintf("received %d results, limit was %d", numResults, ba.MaxSpanRequestKeys))
			}
			ba.MaxSpanRequestKeys -= numResults
			if ba.MaxSpanRequestKeys == 0 {
				// prepare the batch response after meeting the max key limit.
				fillSkippedResponses(ba, br, rs)
				// done, exit loop.
				return br, nil, false
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if !needAnother {
			return br, nil, false
		}

		// key cannot be less that the end key.
		if !rs.Key.Less(rs.EndKey) {
			panic(fmt.Sprintf("start key %s is less than %s", rs.Key, rs.EndKey))
		}

		log.Trace(ctx, "querying next range")
	}
}
Example #13
0
// sendChunk is in charge of sending an "admissible" piece of batch, i.e. one
// which doesn't need to be subdivided further before going to a range (so no
// mixing of forward and reverse scans, etc).
func (ds *DistSender) sendChunk(ctx context.Context, ba proto.BatchRequest) (*proto.BatchResponse, error) {
	// TODO(tschottdorf): prepare for removing Key and EndKey from BatchRequest,
	// making sure that anything that relies on them goes bust.
	ba.Key, ba.EndKey = nil, nil

	isReverse := ba.IsReverse()

	trace := tracer.FromCtx(ctx)

	// The minimal key range encompassing all requests contained within.
	// Local addressing has already been resolved.
	// TODO(tschottdorf): consider rudimentary validation of the batch here
	// (for example, non-range requests with EndKey, or empty key ranges).
	from, to := keys.Range(ba)
	var br *proto.BatchResponse
	// Send the request to one range per iteration.
	for {
		options := lookupOptions{
			useReverseScan: isReverse,
		}

		var curReply *proto.BatchResponse
		var desc *proto.RangeDescriptor
		var needAnother bool
		var err error
		for r := retry.Start(ds.rpcRetryOptions); r.Next(); {
			// Get range descriptor (or, when spanning range, descriptors). Our
			// error handling below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			descDone := trace.Epoch("meta descriptor lookup")
			var evictDesc func()

			desc, needAnother, evictDesc, err = ds.getDescriptors(from, to, options)
			descDone()

			// getDescriptors may fail retryably if the first range isn't
			// available via Gossip.
			if err != nil {
				if rErr, ok := err.(retry.Retryable); ok && rErr.CanRetry() {
					if log.V(1) {
						log.Warning(err)
					}
					continue
				}
				break
			}

			// If there's no transaction and op spans ranges, possibly
			// re-run as part of a transaction for consistency. The
			// case where we don't need to re-run is if the read
			// consistency is not required.
			if needAnother && ba.Txn == nil && ba.IsRange() &&
				ba.ReadConsistency != proto.INCONSISTENT {
				return nil, &proto.OpRequiresTxnError{}
			}

			// It's possible that the returned descriptor misses parts of the
			// keys it's supposed to scan after it's truncated to match the
			// descriptor. Example revscan [a,g), first desc lookup for "g"
			// returns descriptor [c,d) -> [d,g) is never scanned.
			// We evict and retry in such a case.
			if (isReverse && !desc.ContainsKeyRange(desc.StartKey, to)) || (!isReverse && !desc.ContainsKeyRange(from, desc.EndKey)) {
				evictDesc()
				continue
			}

			curReply, err = func() (*proto.BatchResponse, error) {
				// Truncate the request to our current key range.
				untruncate, numActive, trErr := truncate(&ba, desc, from, to)
				if numActive == 0 {
					untruncate()
					// This shouldn't happen in the wild, but some tests
					// exercise it.
					return nil, util.Errorf("truncation resulted in empty batch on [%s,%s): %s",
						from, to, ba)
				}
				defer untruncate()
				if trErr != nil {
					return nil, trErr
				}
				// TODO(tschottdorf): make key range on batch redundant. The
				// requests within dictate it anyways.
				ba.Key, ba.EndKey = keys.Range(ba)
				reply, err := ds.sendAttempt(trace, ba, desc)
				ba.Key, ba.EndKey = nil, nil

				if err != nil {
					if log.V(0 /* TODO(tschottdorf): 1 */) {
						log.Warningf("failed to invoke %s: %s", ba, err)
					}
				}
				return reply, err
			}()
			// If sending succeeded, break this loop.
			if err == nil {
				break
			}

			// Error handling below.
			// If retryable, allow retry. For range not found or range
			// key mismatch errors, we don't backoff on the retry,
			// but reset the backoff loop so we can retry immediately.
			switch tErr := err.(type) {
			case *rpc.SendError:
				// For an RPC error to occur, we must've been unable to contact
				// any replicas. In this case, likely all nodes are down (or
				// not getting back to us within a reasonable amount of time).
				// We may simply not be trying to talk to the up-to-date
				// replicas, so clearing the descriptor here should be a good
				// idea.
				// TODO(tschottdorf): If a replica group goes dead, this
				// will cause clients to put high read pressure on the first
				// range, so there should be some rate limiting here.
				evictDesc()
				if tErr.CanRetry() {
					continue
				}
			case *proto.RangeNotFoundError, *proto.RangeKeyMismatchError:
				trace.Event(fmt.Sprintf("reply error: %T", err))
				// Range descriptor might be out of date - evict it.
				evictDesc()
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				if log.V(1) {
					log.Warning(err)
				}
				// For the remainder of this call, we'll assume that intents
				// are fair game. This replaces more complex logic based on
				// the type of request.
				options.considerIntents = true
				continue
			case *proto.NotLeaderError:
				trace.Event(fmt.Sprintf("reply error: %T", err))
				newLeader := tErr.GetLeader()
				// Verify that leader is a known replica according to the
				// descriptor. If not, we've got a stale replica; evict cache.
				// Next, cache the new leader.
				if newLeader != nil {
					if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 {
						if log.V(1) {
							log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc)
						}
						evictDesc()
					}
				} else {
					newLeader = &proto.Replica{}
				}
				ds.updateLeaderCache(proto.RangeID(desc.RangeID), *newLeader)
				if log.V(1) {
					log.Warning(err)
				}
				r.Reset()
				continue
			case retry.Retryable:
				if tErr.CanRetry() {
					if log.V(1) {
						log.Warning(err)
					}
					trace.Event(fmt.Sprintf("reply error: %T", err))
					continue
				}
			}
			break
		}

		// Immediately return if querying a range failed non-retryably.
		if err != nil {
			return nil, err
		}

		first := br == nil
		if first {
			// First response from a Range.
			br = curReply
		} else {
			// This was the second or later call in a cross-Range request.
			// Combine the new response with the existing one.
			if err := br.Combine(curReply); err != nil {
				panic(err)
				// TODO(tschottdorf): return nil, err
			}
		}

		// If this request has a bound (such as MaxResults in
		// ScanRequest) and we are going to query at least one more range,
		// check whether enough rows have been retrieved.
		// TODO(tschottdorf): need tests for executing a multi-range batch
		// with various bounded requests which saturate at different times.
		if needAnother {
			// Start with the assumption that all requests are saturated.
			// Below, we look at each and decide whether that's true.
			// Everything that is indeed saturated is "masked out" from the
			// batch request; only if that's all requests does needAnother
			// remain false.
			needAnother = false
			if first {
				// Clone ba.Requests. This is because we're multi-range, and
				// some requests may be bounded, which could lead to them being
				// masked out once they're saturated. We don't want to risk
				// removing requests that way in the "master copy" since that
				// could lead to omitting requests in certain retry scenarios.
				ba.Requests = append([]proto.RequestUnion(nil), ba.Requests...)
			}
			for i, union := range ba.Requests {
				args := union.GetValue()
				if _, ok := args.(*proto.NoopRequest); ok {
					// NoopRequests are skipped.
					continue
				}
				boundedArg, ok := args.(proto.Bounded)
				if !ok {
					// Non-bounded request. We will have to query all ranges.
					needAnother = true
					continue
				}
				prevBound := boundedArg.GetBound()
				cReply, ok := curReply.Responses[i].GetValue().(proto.Countable)
				if !ok || prevBound <= 0 {
					// Request bounded, but without max results. Again, will
					// need to query everything we can. The case in which the reply
					// isn't countable occurs when the request wasn't active for
					// that range (since it didn't apply to it), so the response
					// is a NoopResponse.
					needAnother = true
					continue
				}
				nextBound := prevBound - cReply.Count()
				if nextBound <= 0 {
					// We've hit max results for this piece of the batch. Mask
					// it out (we've copied the requests slice above, so this
					// is kosher).
					ba.Requests[i].Reset() // necessary (no one-of?)
					if !ba.Requests[i].SetValue(&proto.NoopRequest{}) {
						panic("RequestUnion excludes NoopRequest")
					}
					continue
				}
				// The request isn't saturated yet.
				needAnother = true
				boundedArg.SetBound(nextBound)
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if !needAnother {
			return br, nil
		}

		if isReverse {
			// In next iteration, query previous range.
			// We use the StartKey of the current descriptor as opposed to the
			// EndKey of the previous one since that doesn't have bugs when
			// stale descriptors come into play.
			to = prev(ba, desc.StartKey)
		} else {
			// In next iteration, query next range.
			// It's important that we use the EndKey of the current descriptor
			// as opposed to the StartKey of the next one: if the former is stale,
			// it's possible that the next range has since merged the subsequent
			// one, and unless both descriptors are stale, the next descriptor's
			// StartKey would move us to the beginning of the current range,
			// resulting in a duplicate scan.
			from = next(ba, desc.EndKey)
		}
		trace.Event("querying next range")
	}
}
Example #14
0
// sendChunk is in charge of sending an "admissible" piece of batch, i.e. one
// which doesn't need to be subdivided further before going to a range (so no
// mixing of forward and reverse scans, etc). The parameters and return values
// correspond to client.Sender with the exception of the returned boolean,
// which is true when indicating that the caller should retry but needs to send
// EndTransaction in a separate request.
func (ds *DistSender) sendChunk(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error, bool) {
	isReverse := ba.IsReverse()

	ctx, cleanup := tracing.EnsureContext(ctx, ds.Tracer)
	defer cleanup()

	// The minimal key range encompassing all requests contained within.
	// Local addressing has already been resolved.
	// TODO(tschottdorf): consider rudimentary validation of the batch here
	// (for example, non-range requests with EndKey, or empty key ranges).
	rs, err := keys.Range(ba)
	if err != nil {
		return nil, roachpb.NewError(err), false
	}
	var br *roachpb.BatchResponse

	// Send the request to one range per iteration.
	for {
		// Increase the sequence counter only once before sending RPCs to
		// the ranges involved in this chunk of the batch (as opposed to for
		// each RPC individually). On RPC errors, there's no guarantee that
		// the request hasn't made its way to the target regardless of the
		// error; we'd like the second execution to be caught by the sequence
		// cache if that happens. There is a small chance that that we address
		// a range twice in this chunk (stale/suboptimal descriptors due to
		// splits/merges) which leads to a transaction retry.
		// TODO(tschottdorf): it's possible that if we don't evict from the
		//   cache we could be in for a busy loop.
		ba.SetNewRequest()

		var curReply *roachpb.BatchResponse
		var desc *roachpb.RangeDescriptor
		var evictToken evictionToken
		var needAnother bool
		var pErr *roachpb.Error
		var finished bool
		for r := retry.Start(ds.rpcRetryOptions); r.Next(); {
			// Get range descriptor (or, when spanning range, descriptors). Our
			// error handling below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			log.Trace(ctx, "meta descriptor lookup")
			desc, needAnother, evictToken, pErr = ds.getDescriptors(rs, evictToken, isReverse)

			// getDescriptors may fail retryably if the first range isn't
			// available via Gossip.
			if pErr != nil {
				log.Trace(ctx, "range descriptor lookup failed: "+pErr.String())
				if pErr.Retryable {
					if log.V(1) {
						log.Warning(pErr)
					}
					continue
				}
				break
			} else {
				log.Trace(ctx, "looked up range descriptor")
			}

			if needAnother && br == nil {
				// TODO(tschottdorf): we should have a mechanism for discovering
				// range merges (descriptor staleness will mostly go unnoticed),
				// or we'll be turning single-range queries into multi-range
				// queries for no good reason.

				// If there's no transaction and op spans ranges, possibly
				// re-run as part of a transaction for consistency. The
				// case where we don't need to re-run is if the read
				// consistency is not required.
				if ba.Txn == nil && ba.IsPossibleTransaction() &&
					ba.ReadConsistency != roachpb.INCONSISTENT {
					return nil, roachpb.NewError(&roachpb.OpRequiresTxnError{}), false
				}
				// If the request is more than but ends with EndTransaction, we
				// want the caller to come again with the EndTransaction in an
				// extra call.
				if l := len(ba.Requests) - 1; l > 0 && ba.Requests[l].GetInner().Method() == roachpb.EndTransaction {
					return nil, roachpb.NewError(errors.New("cannot send 1PC txn to multiple ranges")), true /* shouldSplitET */
				}
			}

			// It's possible that the returned descriptor misses parts of the
			// keys it's supposed to scan after it's truncated to match the
			// descriptor. Example revscan [a,g), first desc lookup for "g"
			// returns descriptor [c,d) -> [d,g) is never scanned.
			// We evict and retry in such a case.
			includesFrontOfCurSpan := func(rd *roachpb.RangeDescriptor) bool {
				if isReverse {
					// This approach is needed because rs.EndKey is exclusive.
					return desc.ContainsKeyRange(desc.StartKey, rs.EndKey)
				}
				return desc.ContainsKey(rs.Key)
			}
			if !includesFrontOfCurSpan(desc) {
				if err := evictToken.Evict(); err != nil {
					return nil, roachpb.NewError(err), false
				}
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				continue
			}

			curReply, pErr = func() (*roachpb.BatchResponse, *roachpb.Error) {
				// Truncate the request to our current key range.
				intersected, iErr := rs.Intersect(desc)
				if iErr != nil {
					return nil, roachpb.NewError(iErr)
				}
				truncBA, numActive, trErr := truncate(ba, intersected)
				if numActive == 0 && trErr == nil {
					// This shouldn't happen in the wild, but some tests
					// exercise it.
					return nil, roachpb.NewErrorf("truncation resulted in empty batch on [%s,%s): %s",
						rs.Key, rs.EndKey, ba)
				}
				if trErr != nil {
					return nil, roachpb.NewError(trErr)
				}
				return ds.sendSingleRange(ctx, truncBA, desc)
			}()
			// If sending succeeded, break this loop.
			if pErr == nil {
				finished = true
				break
			}

			if log.V(1) {
				log.Warningf("failed to invoke %s: %s", ba, pErr)
			}
			log.Trace(ctx, fmt.Sprintf("reply error: %T", pErr.GetDetail()))

			// Error handling below.
			// If retryable, allow retry. For range not found or range
			// key mismatch errors, we don't backoff on the retry,
			// but reset the backoff loop so we can retry immediately.
			switch tErr := pErr.GetDetail().(type) {
			case *roachpb.SendError:
				// For an RPC error to occur, we must've been unable to contact
				// any replicas. In this case, likely all nodes are down (or
				// not getting back to us within a reasonable amount of time).
				// We may simply not be trying to talk to the up-to-date
				// replicas, so clearing the descriptor here should be a good
				// idea.
				if err := evictToken.Evict(); err != nil {
					return nil, roachpb.NewError(err), false
				}
				if tErr.CanRetry() {
					continue
				}
			case *roachpb.RangeNotFoundError:
				// Range descriptor might be out of date - evict it. This is
				// likely the result of a rebalance.
				if err := evictToken.Evict(); err != nil {
					return nil, roachpb.NewError(err), false
				}
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				if log.V(1) {
					log.Warning(tErr)
				}
				continue
			case *roachpb.RangeKeyMismatchError:
				// Range descriptor might be out of date - evict it. This is
				// likely the result of a range split. If we have new range
				// descriptors, insert them instead as long as they are different
				// from the last descriptor to avoid endless loops.
				var replacements []roachpb.RangeDescriptor
				different := func(rd *roachpb.RangeDescriptor) bool {
					return !desc.RSpan().Equal(rd.RSpan())
				}
				if tErr.MismatchedRange != nil && different(tErr.MismatchedRange) {
					replacements = append(replacements, *tErr.MismatchedRange)
				}
				if tErr.SuggestedRange != nil && different(tErr.SuggestedRange) {
					if includesFrontOfCurSpan(tErr.SuggestedRange) {
						replacements = append(replacements, *tErr.SuggestedRange)

					}
				}
				// Same as Evict() if replacements is empty.
				if err := evictToken.EvictAndReplace(replacements...); err != nil {
					return nil, roachpb.NewError(err), false
				}
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				if log.V(1) {
					log.Warning(tErr)
				}
				continue
			case *roachpb.NotLeaderError:
				newLeader := tErr.Leader
				if newLeader != nil {
					// Verify that leader is a known replica according to the
					// descriptor. If not, we've got a stale range descriptor;
					// evict cache.
					if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 {
						if log.V(1) {
							log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc)
						}
						if err := evictToken.Evict(); err != nil {
							return nil, roachpb.NewError(err), false
						}
					}
				} else {
					// If the new leader is unknown, we were talking to a
					// replica that is partitioned away from the majority. Our
					// range descriptor may be stale, so clear the cache.
					//
					// TODO(bdarnell): An unknown-leader error doesn't
					// necessarily mean our descriptor is stale. Ideally we
					// would treat these errors more like SendError: retry on
					// another node (at a lower level), and then if it reaches
					// this level then we know we've exhausted our options and
					// must clear the cache.
					if err := evictToken.Evict(); err != nil {
						return nil, roachpb.NewError(err), false
					}
					newLeader = &roachpb.ReplicaDescriptor{}
				}
				// Next, cache the new leader.
				ds.updateLeaderCache(roachpb.RangeID(desc.RangeID), *newLeader)
				if log.V(1) {
					log.Warning(tErr)
				}
				r.Reset()
				continue
			case retry.Retryable:
				if tErr.CanRetry() {
					if log.V(1) {
						log.Warning(tErr)
					}
					continue
				}
			}
			break
		}

		// Immediately return if querying a range failed non-retryably.
		if pErr != nil {
			return nil, pErr, false
		} else if !finished {
			select {
			case <-ds.rpcRetryOptions.Closer:
				return nil, roachpb.NewError(&roachpb.NodeUnavailableError{}), false
			default:
				log.Fatal("exited retry loop with nil error but finished=false")
			}
		}

		ba.Txn.Update(curReply.Txn)

		if br == nil {
			// First response from a Range.
			br = curReply
		} else {
			// This was the second or later call in a cross-Range request.
			// Combine the new response with the existing one.
			if err := br.Combine(curReply); err != nil {
				return nil, roachpb.NewError(err), false
			}
		}

		if ba.MaxScanResults > 0 {
			// Count how many results we received.
			var numResults int64
			for _, resp := range curReply.Responses {
				if cResp, ok := resp.GetInner().(roachpb.Countable); ok {
					numResults += cResp.Count()
				}
			}
			if numResults > ba.MaxScanResults {
				panic(fmt.Sprintf("received %d results, limit was %d", numResults, ba.MaxScanResults))
			}
			ba.MaxScanResults -= numResults
			if ba.MaxScanResults == 0 {
				// We are done with this batch. Some requests might have NoopResponses; we must
				// replace them with empty responses of the proper type.
				for i, req := range ba.Requests {
					if _, ok := br.Responses[i].GetInner().(*roachpb.NoopResponse); !ok {
						continue
					}
					union := roachpb.ResponseUnion{}
					var reply roachpb.Response
					if _, ok := req.GetInner().(*roachpb.ScanRequest); ok {
						reply = &roachpb.ScanResponse{}
					} else {
						_ = req.GetInner().(*roachpb.ReverseScanRequest)
						reply = &roachpb.ReverseScanResponse{}
					}
					union.MustSetInner(reply)
					br.Responses[i] = union
				}
				return br, nil, false
			}
		}

		// If this request has a bound (such as MaxResults in
		// ScanRequest) and we are going to query at least one more range,
		// check whether enough rows have been retrieved.
		// TODO(tschottdorf): need tests for executing a multi-range batch
		// with various bounded requests which saturate at different times.
		if needAnother {
			// Start with the assumption that all requests are saturated.
			// Below, we look at each and decide whether that's true.
			// Everything that is indeed saturated is "masked out" from the
			// batch request; only if that's all requests does needAnother
			// remain false.
			needAnother = false
			if br == nil {
				// Clone ba.Requests. This is because we're multi-range, and
				// some requests may be bounded, which could lead to them being
				// masked out once they're saturated. We don't want to risk
				// removing requests that way in the "master copy" since that
				// could lead to omitting requests in certain retry scenarios.
				ba.Requests = append([]roachpb.RequestUnion(nil), ba.Requests...)
			}
			for i, union := range ba.Requests {
				args := union.GetInner()
				if _, ok := args.(*roachpb.NoopRequest); ok {
					// NoopRequests are skipped.
					continue
				}
				boundedArg, ok := args.(roachpb.Bounded)
				if !ok {
					// Non-bounded request. We will have to query all ranges.
					needAnother = true
					continue
				}
				prevBound := boundedArg.GetBound()
				cReply, ok := curReply.Responses[i].GetInner().(roachpb.Countable)
				if !ok || prevBound <= 0 {
					// Request bounded, but without max results. Again, will
					// need to query everything we can. The case in which the reply
					// isn't countable occurs when the request wasn't active for
					// that range (since it didn't apply to it), so the response
					// is a NoopResponse.
					needAnother = true
					continue
				}
				nextBound := prevBound - cReply.Count()
				if nextBound <= 0 {
					// We've hit max results for this piece of the batch. Mask
					// it out (we've copied the requests slice above, so this
					// is kosher).
					union := &ba.Requests[i] // avoid working on copy
					union.MustSetInner(&noopRequest)
					continue
				}
				// The request isn't saturated yet.
				needAnother = true
				boundedArg.SetBound(nextBound)
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if !needAnother {
			return br, nil, false
		}

		if isReverse {
			// In next iteration, query previous range.
			// We use the StartKey of the current descriptor as opposed to the
			// EndKey of the previous one since that doesn't have bugs when
			// stale descriptors come into play.
			rs.EndKey, err = prev(ba, desc.StartKey)
		} else {
			// In next iteration, query next range.
			// It's important that we use the EndKey of the current descriptor
			// as opposed to the StartKey of the next one: if the former is stale,
			// it's possible that the next range has since merged the subsequent
			// one, and unless both descriptors are stale, the next descriptor's
			// StartKey would move us to the beginning of the current range,
			// resulting in a duplicate scan.
			rs.Key, err = next(ba, desc.EndKey)
		}
		if err != nil {
			return nil, roachpb.NewError(err), false
		}
		log.Trace(ctx, "querying next range")
	}
}
Example #15
0
// newRSpan returns an rSpan encompassing all the keys in the batch request.
func newRSpan(ba roachpb.BatchRequest) rSpan {
	key, endKey := keys.Range(ba)
	return rSpan{key: key, endKey: endKey}
}