Example #1
// divideAndSendBatchToRanges sends the supplied batch to all of the
// ranges which comprise the span specified by rs. The batch request
// is trimmed against each range which is part of the span and sent
// either serially or in parallel, if possible. isFirst indicates
// whether this is the first time this method has been called on the
// batch. It's specified false where this method is invoked recursively.
func (ds *DistSender) divideAndSendBatchToRanges(
	ctx context.Context, ba roachpb.BatchRequest, rs roachpb.RSpan, isFirst bool,
) (br *roachpb.BatchResponse, pErr *roachpb.Error) {
	// This function builds a channel of responses for each range
	// implicated in the span (rs) and combines them into a single
	// BatchResponse when finished.
	var responseChs []chan response
	defer func() {
		for _, responseCh := range responseChs {
			resp := <-responseCh
			if resp.pErr != nil {
				if pErr == nil {
					pErr = resp.pErr
			if br == nil {
				// First response from a Range.
				br = resp.reply
			} else {
				// This was the second or later call in a cross-Range request.
				// Combine the new response with the existing one.
				if err := br.Combine(resp.reply); err != nil {
					pErr = roachpb.NewError(err)

		// If we experienced an error, don't neglect to update the error's
		// attached transaction with any responses which were received.
		if pErr != nil {
			if br != nil {

	// Get initial seek key depending on direction of iteration.
	var seekKey roachpb.RKey
	isReverse := ba.IsReverse()
	if isReverse {
		seekKey = rs.EndKey
	} else {
		seekKey = rs.Key
	// Send the request to one range per iteration.
	ri := NewRangeIterator(ds, isReverse)
	for ri.Seek(ctx, seekKey); ri.Valid(); ri.Seek(ctx, seekKey) {
		// Increase the sequence counter only once before sending RPCs to
		// the ranges involved in this chunk of the batch (as opposed to
		// for each RPC individually). On RPC errors, there's no guarantee
		// that the request hasn't made its way to the target regardless
		// of the error; we'd like the second execution to be caught by
		// the sequence cache if that happens. There is a small chance
		// that we address a range twice in this chunk (stale/suboptimal
		// descriptors due to splits/merges) which leads to a transaction
		// retry.
		// TODO(tschottdorf): it's possible that if we don't evict from
		// the cache we could be in for a busy loop.

		responseCh := make(chan response, 1)
		responseChs = append(responseChs, responseCh)

		if isFirst && ri.NeedAnother(rs) {
			// TODO(tschottdorf): we should have a mechanism for discovering
			// range merges (descriptor staleness will mostly go unnoticed),
			// or we'll be turning single-range queries into multi-range
			// queries for no good reason.
			// If there's no transaction and op spans ranges, possibly
			// re-run as part of a transaction for consistency. The
			// case where we don't need to re-run is if the read
			// consistency is not required.
			if ba.Txn == nil && ba.IsPossibleTransaction() && ba.ReadConsistency != roachpb.INCONSISTENT {
				responseCh <- response{pErr: roachpb.NewError(&roachpb.OpRequiresTxnError{})}
			// If the request is more than but ends with EndTransaction, we
			// want the caller to come again with the EndTransaction in an
			// extra call.
			if l := len(ba.Requests) - 1; l > 0 && ba.Requests[l].GetInner().Method() == roachpb.EndTransaction {
				responseCh <- response{pErr: errNo1PCTxn}

		// Determine next seek key, taking a potentially sparse batch into
		// consideration.
		var err error
		nextRS := rs
		if isReverse {
			// In next iteration, query previous range.
			// We use the StartKey of the current descriptor as opposed to the
			// EndKey of the previous one since that doesn't have bugs when
			// stale descriptors come into play.
			seekKey, err = prev(ba, ri.Desc().StartKey)
			nextRS.EndKey = seekKey
		} else {
			// In next iteration, query next range.
			// It's important that we use the EndKey of the current descriptor
			// as opposed to the StartKey of the next one: if the former is stale,
			// it's possible that the next range has since merged the subsequent
			// one, and unless both descriptors are stale, the next descriptor's
			// StartKey would move us to the beginning of the current range,
			// resulting in a duplicate scan.
			seekKey, err = next(ba, ri.Desc().EndKey)
			nextRS.Key = seekKey
		if err != nil {
			responseCh <- response{pErr: roachpb.NewError(err)}

		// Send the next partial batch to the first range in the "rs" span.
		// If we're not handling a request which limits responses and we
		// can reserve one of the limited goroutines available for parallel
		// batch RPCs, send asynchronously.
		if ba.MaxSpanRequestKeys == 0 && ri.NeedAnother(rs) && ds.rpcContext != nil &&
			ds.sendPartialBatchAsync(ctx, ba, rs, ri.Desc(), ri.Token(), isFirst, responseCh) {
			// Note that we pass the batch request by value to the parallel
			// goroutine to avoid using the cloned txn.

			// Clone the txn to preserve the current txn sequence for the async call.
			if ba.Txn != nil {
				txnClone := ba.Txn.Clone()
				ba.Txn = &txnClone
		} else {
			// Send synchronously if there is no parallel capacity left, there's a
			// max results limit, or this is the final request in the span.
			resp := ds.sendPartialBatch(ctx, ba, rs, ri.Desc(), ri.Token(), isFirst)
			responseCh <- resp
			if resp.pErr != nil {

			// Check whether we've received enough responses to exit query loop.
			if ba.MaxSpanRequestKeys > 0 {
				var numResults int64
				for _, r := range resp.reply.Responses {
					numResults += r.GetInner().Header().NumKeys
				if numResults > ba.MaxSpanRequestKeys {
					panic(fmt.Sprintf("received %d results, limit was %d", numResults, ba.MaxSpanRequestKeys))
				ba.MaxSpanRequestKeys -= numResults
				// Exiting; fill in missing responses.
				if ba.MaxSpanRequestKeys == 0 {
					fillSkippedResponses(ba, resp.reply, seekKey)

		// Check for completion.
		if !ri.NeedAnother(rs) {
		isFirst = false // next range will not be first!
		rs = nextRS

	// We've exited early. Return the range iterator error.
	responseCh := make(chan response, 1)
	responseCh <- response{pErr: ri.Error()}
	responseChs = append(responseChs, responseCh)