Пример #1
0
// Send implements the client.Sender interface. It verifies
// permissions and looks up the appropriate range based on the
// supplied key and sends the RPC according to the specified options.
//
// If the request spans multiple ranges (which is possible for Scan or
// DeleteRange requests), Send sends requests to the individual ranges
// sequentially and combines the results transparently.
//
// This may temporarily adjust the request headers, so the proto.Call
// must not be used concurrently until Send has returned.
func (ds *DistSender) Send(_ context.Context, call proto.Call) {
	args := call.Args
	finalReply := call.Reply

	// Verify permissions.
	if err := ds.verifyPermissions(call.Args); err != nil {
		call.Reply.Header().SetGoError(err)
		return
	}

	// In the event that timestamp isn't set and read consistency isn't
	// required, set the timestamp using the local clock.
	if args.Header().ReadConsistency == proto.INCONSISTENT && args.Header().Timestamp.Equal(proto.ZeroTimestamp) {
		// Make sure that after the call, args hasn't changed.
		defer func(timestamp proto.Timestamp) {
			args.Header().Timestamp = timestamp
		}(args.Header().Timestamp)
		args.Header().Timestamp = ds.clock.Now()
	}

	// If this is a bounded request, we will change its bound as we receive
	// replies. This undoes that when we return.
	boundedArgs, argsBounded := args.(proto.Bounded)

	if argsBounded {
		defer func(bound int64) {
			boundedArgs.SetBound(bound)
		}(boundedArgs.GetBound())
	}

	defer func(key proto.Key) {
		args.Header().Key = key
	}(args.Header().Key)

	// Retry logic for lookup of range by key and RPCs to range replicas.
	curReply := finalReply
	for {
		call.Reply = curReply
		curReply.Header().Reset()

		var desc, descNext *proto.RangeDescriptor
		var err error
		for r := retry.Start(ds.rpcRetryOptions); r.Next(); {
			// Get range descriptor (or, when spanning range, descriptors).
			// sendAttempt below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			desc, descNext, err = ds.getDescriptors(call)
			// getDescriptors may fail retryably if the first range isn't
			// available via Gossip.
			if err != nil {
				if rErr, ok := err.(util.Retryable); ok && rErr.CanRetry() {
					if log.V(1) {
						log.Warning(err)
					}
					continue
				}
				break
			}
			err = func() error {
				// Truncate the request to our current range, making sure not to
				// touch it unless we have to (it is illegal to send EndKey on
				// commands which do not operate on ranges).
				if descNext != nil {
					defer func(endKey proto.Key) {
						args.Header().EndKey = endKey
					}(args.Header().EndKey)
					args.Header().EndKey = desc.EndKey
				}
				leader := ds.leaderCache.Lookup(proto.RaftID(desc.RaftID))

				// Try to send the call.
				replicas := newReplicaSlice(ds.gossip, desc)

				// Rearrange the replicas so that those replicas with long common
				// prefix of attributes end up first. If there's no prefix, this is a
				// no-op.
				order := ds.optimizeReplicaOrder(replicas)

				// If this request needs to go to a leader and we know who that is, move
				// it to the front.
				if !(proto.IsRead(args) && args.Header().ReadConsistency == proto.INCONSISTENT) &&
					leader.StoreID > 0 {
					if i := replicas.FindReplica(leader.StoreID); i >= 0 {
						replicas.MoveToFront(i)
						order = rpc.OrderStable
					}
				}

				return ds.sendRPC(desc.RaftID, replicas, order, args, curReply)
			}()
			if err != nil {
				// For an RPC error to occur, we must've been unable to contact any
				// replicas. In this case, likely all nodes are down (or not getting back
				// to us within a reasonable amount of time).
				// We may simply not be trying to talk to the up-to-date replicas, so
				// clearing the descriptor here should be a good idea.
				// TODO(tschottdorf): If a replica group goes dead, this will cause clients
				// to put high read pressure on the first range, so there should be some
				// rate limiting here.
				ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc)
			} else {
				err = curReply.Header().GoError()
			}

			if err != nil {
				if log.V(1) {
					log.Warningf("failed to invoke %s: %s", call.Method(), err)
				}

				// If retryable, allow retry. For range not found or range
				// key mismatch errors, we don't backoff on the retry,
				// but reset the backoff loop so we can retry immediately.
				switch tErr := err.(type) {
				case *proto.RangeNotFoundError, *proto.RangeKeyMismatchError:
					// Range descriptor might be out of date - evict it.
					ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc)
					// On addressing errors, don't backoff; retry immediately.
					r.Reset()
					if log.V(1) {
						log.Warning(err)
					}
					continue
				case *proto.NotLeaderError:
					newLeader := tErr.GetLeader()
					// Verify that leader is a known replica according to the
					// descriptor. If not, we've got a stale replica; evict cache.
					// Next, cache the new leader.
					if newLeader != nil {
						if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 {
							if log.V(1) {
								log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc)
							}
							ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc)
						}
					} else {
						newLeader = &proto.Replica{}
					}
					ds.updateLeaderCache(proto.RaftID(desc.RaftID), *newLeader)
					if log.V(1) {
						log.Warning(err)
					}
					r.Reset()
					continue
				case util.Retryable:
					if tErr.CanRetry() {
						if log.V(1) {
							log.Warning(err)
						}
						continue
					}
				}
			}
			break
		}

		// Immediately return if querying a range failed non-retryably.
		// For multi-range requests, we return the failing range's reply.
		if err != nil {
			call.Reply.Header().SetGoError(err)
			return
		}

		if finalReply != curReply {
			// This was the second or later call in a multi-range request.
			// Combine the new response with the existing one.
			if cFinalReply, ok := finalReply.(proto.Combinable); ok {
				cFinalReply.Combine(curReply)
			} else {
				// This should never apply in practice, as we'll only end up here
				// for range-spanning requests.
				call.Reply.Header().SetGoError(util.Errorf("multi-range request with non-combinable response type"))
				return
			}
		}

		// If this request has a bound, such as MaxResults in
		// ScanRequest, check whether enough rows have been retrieved.
		if argsBounded {
			if prevBound := boundedArgs.GetBound(); prevBound > 0 {
				if cReply, ok := curReply.(proto.Countable); ok {
					if nextBound := prevBound - cReply.Count(); nextBound > 0 {
						// Update bound for the next round.
						// We've deferred restoring the original bound earlier.
						boundedArgs.SetBound(nextBound)
					} else {
						// Set flag to break the loop.
						descNext = nil
					}
				}
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if descNext == nil {
			break
		}

		// In next iteration, query next range.
		// It's important that we use the EndKey of the current descriptor
		// as opposed to the StartKey of the next one: if the former is stale,
		// it's possible that the next range has since merged the subsequent
		// one, and unless both descriptors are stale, the next descriptor's
		// StartKey would move us to the beginning of the current range,
		// resulting in a duplicate scan.
		args.Header().Key = desc.EndKey

		// This is a multi-range request, make a new reply object for
		// subsequent iterations of the loop.
		curReply = args.CreateReply()
	}
	call.Reply = finalReply
}
Пример #2
0
// sendAttempt is invoked by Send and handles retry logic and cache eviction
// for a call sent to a single range. It returns a retry status, which is Break
// on success and either Break, Continue or Reset depending on error condition.
// This method is expected to be invoked from within a backoff / retry loop to
// retry the send repeatedly (e.g. to continue processing after a critical node
// becomes available after downtime or the range descriptor is refreshed via
// lookup).
func (ds *DistSender) sendAttempt(desc *proto.RangeDescriptor, call proto.Call) (retry.Status, error) {
	leader := ds.leaderCache.Lookup(proto.RaftID(desc.RaftID))

	// Try to send the call.
	replicas := newReplicaSlice(ds.gossip, desc)

	// Rearrange the replicas so that those replicas with long common
	// prefix of attributes end up first. If there's no prefix, this is a
	// no-op.
	order := ds.optimizeReplicaOrder(replicas)

	args := call.Args
	reply := call.Reply

	// If this request needs to go to a leader and we know who that is, move
	// it to the front.
	if !(proto.IsRead(args) && args.Header().ReadConsistency == proto.INCONSISTENT) &&
		leader.StoreID > 0 {
		if i := replicas.FindReplica(leader.StoreID); i >= 0 {
			replicas.MoveToFront(i)
			order = rpc.OrderStable
		}
	}

	err := ds.sendRPC(desc.RaftID, replicas, order, args, reply)
	if err != nil {
		// For an RPC error to occur, we must've been unable to contact any
		// replicas. In this case, likely all nodes are down (or not getting back
		// to us within a reasonable amount of time).
		// We may simply not be trying to talk to the up-to-date replicas, so
		// clearing the descriptor here should be a good idea.
		// TODO(tschottdorf): If a replica group goes dead, this will cause clients
		// to put high read pressure on the first range, so there should be some
		// rate limiting here.
		ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc)
	} else {
		err = reply.Header().GoError()
	}

	if err != nil {
		if log.V(1) {
			log.Warningf("failed to invoke %s: %s", call.Method(), err)
		}

		// If retryable, allow retry. For range not found or range
		// key mismatch errors, we don't backoff on the retry,
		// but reset the backoff loop so we can retry immediately.
		switch tErr := err.(type) {
		case *proto.RangeNotFoundError, *proto.RangeKeyMismatchError:
			// Range descriptor might be out of date - evict it.
			ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc)
			// On addressing errors, don't backoff; retry immediately.
			return retry.Reset, err
		case *proto.NotLeaderError:
			newLeader := tErr.GetLeader()
			// Verify that leader is a known replica according to the
			// descriptor. If not, we've got a stale replica; evict cache.
			// Next, cache the new leader.
			if newLeader != nil {
				if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 {
					if log.V(1) {
						log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc)
					}
					ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc)
				}
			} else {
				newLeader = &proto.Replica{}
			}
			ds.updateLeaderCache(proto.RaftID(desc.RaftID), *newLeader)
			return retry.Reset, err
		case util.Retryable:
			if tErr.CanRetry() {
				return retry.Continue, err
			}
		}
		return retry.Break, err
	}
	return retry.Break, nil
}
Пример #3
0
// sendChunk is in charge of sending an "admissible" piece of batch, i.e. one
// which doesn't need to be subdivided further before going to a range (so no
// mixing of forward and reverse scans, etc).
func (ds *DistSender) sendChunk(ctx context.Context, ba proto.BatchRequest) (*proto.BatchResponse, error) {
	// TODO(tschottdorf): prepare for removing Key and EndKey from BatchRequest,
	// making sure that anything that relies on them goes bust.
	ba.Key, ba.EndKey = nil, nil

	isReverse := ba.IsReverse()

	trace := tracer.FromCtx(ctx)

	// The minimal key range encompassing all requests contained within.
	// Local addressing has already been resolved.
	// TODO(tschottdorf): consider rudimentary validation of the batch here
	// (for example, non-range requests with EndKey, or empty key ranges).
	from, to := keys.Range(ba)
	var br *proto.BatchResponse
	// Send the request to one range per iteration.
	for {
		options := lookupOptions{
			useReverseScan: isReverse,
		}

		var curReply *proto.BatchResponse
		var desc *proto.RangeDescriptor
		var needAnother bool
		var err error
		for r := retry.Start(ds.rpcRetryOptions); r.Next(); {
			// Get range descriptor (or, when spanning range, descriptors). Our
			// error handling below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			descDone := trace.Epoch("meta descriptor lookup")
			var evictDesc func()

			desc, needAnother, evictDesc, err = ds.getDescriptors(from, to, options)
			descDone()

			// getDescriptors may fail retryably if the first range isn't
			// available via Gossip.
			if err != nil {
				if rErr, ok := err.(retry.Retryable); ok && rErr.CanRetry() {
					if log.V(1) {
						log.Warning(err)
					}
					continue
				}
				break
			}

			// If there's no transaction and op spans ranges, possibly
			// re-run as part of a transaction for consistency. The
			// case where we don't need to re-run is if the read
			// consistency is not required.
			if needAnother && ba.Txn == nil && ba.IsRange() &&
				ba.ReadConsistency != proto.INCONSISTENT {
				return nil, &proto.OpRequiresTxnError{}
			}

			// It's possible that the returned descriptor misses parts of the
			// keys it's supposed to scan after it's truncated to match the
			// descriptor. Example revscan [a,g), first desc lookup for "g"
			// returns descriptor [c,d) -> [d,g) is never scanned.
			// We evict and retry in such a case.
			if (isReverse && !desc.ContainsKeyRange(desc.StartKey, to)) || (!isReverse && !desc.ContainsKeyRange(from, desc.EndKey)) {
				evictDesc()
				continue
			}

			curReply, err = func() (*proto.BatchResponse, error) {
				// Truncate the request to our current key range.
				untruncate, numActive, trErr := truncate(&ba, desc, from, to)
				if numActive == 0 {
					untruncate()
					// This shouldn't happen in the wild, but some tests
					// exercise it.
					return nil, util.Errorf("truncation resulted in empty batch on [%s,%s): %s",
						from, to, ba)
				}
				defer untruncate()
				if trErr != nil {
					return nil, trErr
				}
				// TODO(tschottdorf): make key range on batch redundant. The
				// requests within dictate it anyways.
				ba.Key, ba.EndKey = keys.Range(ba)
				reply, err := ds.sendAttempt(trace, ba, desc)
				ba.Key, ba.EndKey = nil, nil

				if err != nil {
					if log.V(0 /* TODO(tschottdorf): 1 */) {
						log.Warningf("failed to invoke %s: %s", ba, err)
					}
				}
				return reply, err
			}()
			// If sending succeeded, break this loop.
			if err == nil {
				break
			}

			// Error handling below.
			// If retryable, allow retry. For range not found or range
			// key mismatch errors, we don't backoff on the retry,
			// but reset the backoff loop so we can retry immediately.
			switch tErr := err.(type) {
			case *rpc.SendError:
				// For an RPC error to occur, we must've been unable to contact
				// any replicas. In this case, likely all nodes are down (or
				// not getting back to us within a reasonable amount of time).
				// We may simply not be trying to talk to the up-to-date
				// replicas, so clearing the descriptor here should be a good
				// idea.
				// TODO(tschottdorf): If a replica group goes dead, this
				// will cause clients to put high read pressure on the first
				// range, so there should be some rate limiting here.
				evictDesc()
				if tErr.CanRetry() {
					continue
				}
			case *proto.RangeNotFoundError, *proto.RangeKeyMismatchError:
				trace.Event(fmt.Sprintf("reply error: %T", err))
				// Range descriptor might be out of date - evict it.
				evictDesc()
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				if log.V(1) {
					log.Warning(err)
				}
				// For the remainder of this call, we'll assume that intents
				// are fair game. This replaces more complex logic based on
				// the type of request.
				options.considerIntents = true
				continue
			case *proto.NotLeaderError:
				trace.Event(fmt.Sprintf("reply error: %T", err))
				newLeader := tErr.GetLeader()
				// Verify that leader is a known replica according to the
				// descriptor. If not, we've got a stale replica; evict cache.
				// Next, cache the new leader.
				if newLeader != nil {
					if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 {
						if log.V(1) {
							log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc)
						}
						evictDesc()
					}
				} else {
					newLeader = &proto.Replica{}
				}
				ds.updateLeaderCache(proto.RangeID(desc.RangeID), *newLeader)
				if log.V(1) {
					log.Warning(err)
				}
				r.Reset()
				continue
			case retry.Retryable:
				if tErr.CanRetry() {
					if log.V(1) {
						log.Warning(err)
					}
					trace.Event(fmt.Sprintf("reply error: %T", err))
					continue
				}
			}
			break
		}

		// Immediately return if querying a range failed non-retryably.
		if err != nil {
			return nil, err
		}

		first := br == nil
		if first {
			// First response from a Range.
			br = curReply
		} else {
			// This was the second or later call in a cross-Range request.
			// Combine the new response with the existing one.
			if err := br.Combine(curReply); err != nil {
				panic(err)
				// TODO(tschottdorf): return nil, err
			}
		}

		// If this request has a bound (such as MaxResults in
		// ScanRequest) and we are going to query at least one more range,
		// check whether enough rows have been retrieved.
		// TODO(tschottdorf): need tests for executing a multi-range batch
		// with various bounded requests which saturate at different times.
		if needAnother {
			// Start with the assumption that all requests are saturated.
			// Below, we look at each and decide whether that's true.
			// Everything that is indeed saturated is "masked out" from the
			// batch request; only if that's all requests does needAnother
			// remain false.
			needAnother = false
			if first {
				// Clone ba.Requests. This is because we're multi-range, and
				// some requests may be bounded, which could lead to them being
				// masked out once they're saturated. We don't want to risk
				// removing requests that way in the "master copy" since that
				// could lead to omitting requests in certain retry scenarios.
				ba.Requests = append([]proto.RequestUnion(nil), ba.Requests...)
			}
			for i, union := range ba.Requests {
				args := union.GetValue()
				if _, ok := args.(*proto.NoopRequest); ok {
					// NoopRequests are skipped.
					continue
				}
				boundedArg, ok := args.(proto.Bounded)
				if !ok {
					// Non-bounded request. We will have to query all ranges.
					needAnother = true
					continue
				}
				prevBound := boundedArg.GetBound()
				cReply, ok := curReply.Responses[i].GetValue().(proto.Countable)
				if !ok || prevBound <= 0 {
					// Request bounded, but without max results. Again, will
					// need to query everything we can. The case in which the reply
					// isn't countable occurs when the request wasn't active for
					// that range (since it didn't apply to it), so the response
					// is a NoopResponse.
					needAnother = true
					continue
				}
				nextBound := prevBound - cReply.Count()
				if nextBound <= 0 {
					// We've hit max results for this piece of the batch. Mask
					// it out (we've copied the requests slice above, so this
					// is kosher).
					ba.Requests[i].Reset() // necessary (no one-of?)
					if !ba.Requests[i].SetValue(&proto.NoopRequest{}) {
						panic("RequestUnion excludes NoopRequest")
					}
					continue
				}
				// The request isn't saturated yet.
				needAnother = true
				boundedArg.SetBound(nextBound)
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if !needAnother {
			return br, nil
		}

		if isReverse {
			// In next iteration, query previous range.
			// We use the StartKey of the current descriptor as opposed to the
			// EndKey of the previous one since that doesn't have bugs when
			// stale descriptors come into play.
			to = prev(ba, desc.StartKey)
		} else {
			// In next iteration, query next range.
			// It's important that we use the EndKey of the current descriptor
			// as opposed to the StartKey of the next one: if the former is stale,
			// it's possible that the next range has since merged the subsequent
			// one, and unless both descriptors are stale, the next descriptor's
			// StartKey would move us to the beginning of the current range,
			// resulting in a duplicate scan.
			from = next(ba, desc.EndKey)
		}
		trace.Event("querying next range")
	}
}
Пример #4
0
// Send implements the client.Sender interface. It verifies
// permissions and looks up the appropriate range based on the
// supplied key and sends the RPC according to the specified options.
//
// If the request spans multiple ranges (which is possible for Scan or
// DeleteRange requests), Send sends requests to the individual ranges
// sequentially and combines the results transparently.
//
// This may temporarily adjust the request headers, so the proto.Call
// must not be used concurrently until Send has returned.
func (ds *DistSender) Send(ctx context.Context, call proto.Call) {
	args := call.Args

	// Verify permissions.
	if err := ds.verifyPermissions(call.Args); err != nil {
		call.Reply.Header().SetGoError(err)
		return
	}

	trace := tracer.FromCtx(ctx)

	// In the event that timestamp isn't set and read consistency isn't
	// required, set the timestamp using the local clock.
	if args.Header().ReadConsistency == proto.INCONSISTENT && args.Header().Timestamp.Equal(proto.ZeroTimestamp) {
		// Make sure that after the call, args hasn't changed.
		defer func(timestamp proto.Timestamp) {
			args.Header().Timestamp = timestamp
		}(args.Header().Timestamp)
		args.Header().Timestamp = ds.clock.Now()
	}

	// If this is a bounded request, we will change its bound as we receive
	// replies. This undoes that when we return.
	boundedArgs, argsBounded := args.(proto.Bounded)

	if argsBounded {
		defer func(bound int64) {
			boundedArgs.SetBound(bound)
		}(boundedArgs.GetBound())
	}
	_, isReverseScan := call.Args.(*proto.ReverseScanRequest)
	// Restore to the original range if the scan/reverse_scan crosses range boundaries.
	if isReverseScan {
		defer func(key proto.Key) {
			args.Header().EndKey = key
		}(args.Header().EndKey)
	} else {
		defer func(key proto.Key) {
			args.Header().Key = key
		}(args.Header().Key)
	}

	first := true

	// Retry logic for lookup of range by key and RPCs to range replicas.
	for {
		var curReply proto.Response
		var desc, descNext *proto.RangeDescriptor
		var err error
		for r := retry.Start(ds.rpcRetryOptions); r.Next(); {
			// Get range descriptor (or, when spanning range, descriptors). Our
			// error handling below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			descDone := trace.Epoch("meta descriptor lookup")
			// It is safe to pass call here (with its embedded reply) because
			// the reply is only used to check that it implements
			// proto.Combinable if the request spans multiple ranges.
			desc, descNext, err = ds.getDescriptors(call)
			descDone()
			// getDescriptors may fail retryably if the first range isn't
			// available via Gossip.
			if err != nil {
				if rErr, ok := err.(retry.Retryable); ok && rErr.CanRetry() {
					if log.V(1) {
						log.Warning(err)
					}
					continue
				}
				break
			}
			// At this point reply.Header().Error may be non-nil!
			curReply, err = ds.sendAttempt(trace, args, desc)

			descKey := args.Header().Key
			if isReverseScan {
				descKey = args.Header().EndKey
			}

			if err != nil {
				trace.Event(fmt.Sprintf("send error: %T", err))
				// For an RPC error to occur, we must've been unable to contact any
				// replicas. In this case, likely all nodes are down (or not getting back
				// to us within a reasonable amount of time).
				// We may simply not be trying to talk to the up-to-date replicas, so
				// clearing the descriptor here should be a good idea.
				// TODO(tschottdorf): If a replica group goes dead, this will cause clients
				// to put high read pressure on the first range, so there should be some
				// rate limiting here.
				ds.rangeCache.EvictCachedRangeDescriptor(descKey, desc, isReverseScan)
			} else {
				err = curReply.Header().GoError()
			}

			if err == nil {
				break
			}

			if log.V(1) {
				log.Warningf("failed to invoke %s: %s", call.Method(), err)
			}

			// If retryable, allow retry. For range not found or range
			// key mismatch errors, we don't backoff on the retry,
			// but reset the backoff loop so we can retry immediately.
			switch tErr := err.(type) {
			case *proto.RangeNotFoundError, *proto.RangeKeyMismatchError:
				trace.Event(fmt.Sprintf("reply error: %T", err))
				// Range descriptor might be out of date - evict it.
				ds.rangeCache.EvictCachedRangeDescriptor(descKey, desc, isReverseScan)
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				if log.V(1) {
					log.Warning(err)
				}
				continue
			case *proto.NotLeaderError:
				trace.Event(fmt.Sprintf("reply error: %T", err))
				newLeader := tErr.GetLeader()
				// Verify that leader is a known replica according to the
				// descriptor. If not, we've got a stale replica; evict cache.
				// Next, cache the new leader.
				if newLeader != nil {
					if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 {
						if log.V(1) {
							log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc)
						}
						ds.rangeCache.EvictCachedRangeDescriptor(descKey, desc, isReverseScan)
					}
				} else {
					newLeader = &proto.Replica{}
				}
				ds.updateLeaderCache(proto.RangeID(desc.RangeID), *newLeader)
				if log.V(1) {
					log.Warning(err)
				}
				r.Reset()
				continue
			case retry.Retryable:
				if tErr.CanRetry() {
					if log.V(1) {
						log.Warning(err)
					}
					trace.Event(fmt.Sprintf("reply error: %T", err))
					continue
				}
			}
			break
		}

		// Immediately return if querying a range failed non-retryably.
		// For multi-range requests, we return the failing range's reply.
		if err != nil {
			call.Reply.Header().SetGoError(err)
			return
		}

		if first {
			// Equivalent of `*call.Reply = curReply`. Generics!
			dst := reflect.ValueOf(call.Reply).Elem()
			dst.Set(reflect.ValueOf(curReply).Elem())
		} else {
			// This was the second or later call in a multi-range request.
			// Combine the new response with the existing one.
			if cReply, ok := call.Reply.(proto.Combinable); ok {
				cReply.Combine(curReply)
			} else {
				// This should never apply in practice, as we'll only end up here
				// for range-spanning requests.
				call.Reply.Header().SetGoError(util.Errorf("multi-range request with non-combinable response type"))
				return
			}
		}

		first = false

		// If this request has a bound, such as MaxResults in
		// ScanRequest, check whether enough rows have been retrieved.
		if argsBounded {
			if prevBound := boundedArgs.GetBound(); prevBound > 0 {
				if cReply, ok := curReply.(proto.Countable); ok {
					if nextBound := prevBound - cReply.Count(); nextBound > 0 {
						// Update bound for the next round.
						// We've deferred restoring the original bound earlier.
						boundedArgs.SetBound(nextBound)
					} else {
						// Set flag to break the loop.
						descNext = nil
					}
				}
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if descNext == nil {
			break
		}

		if isReverseScan {
			// In next iteration, query previous range.
			// We use the StartKey of the current descriptor as opposed to the
			// EndKey of the previous one.
			args.Header().EndKey = desc.StartKey
		} else {
			// In next iteration, query next range.
			// It's important that we use the EndKey of the current descriptor
			// as opposed to the StartKey of the next one: if the former is stale,
			// it's possible that the next range has since merged the subsequent
			// one, and unless both descriptors are stale, the next descriptor's
			// StartKey would move us to the beginning of the current range,
			// resulting in a duplicate scan.
			args.Header().Key = desc.EndKey
		}
		trace.Event("querying next range")
	}
}