Exemplo n.º 1
0
// fillSkippedResponses after meeting the batch key max limit for range
// requests.
func fillSkippedResponses(ba roachpb.BatchRequest, br *roachpb.BatchResponse, nextKey roachpb.RKey) {
	// Some requests might have NoopResponses; we must replace them with empty
	// responses of the proper type.
	for i, req := range ba.Requests {
		if _, ok := br.Responses[i].GetInner().(*roachpb.NoopResponse); !ok {
			continue
		}
		var reply roachpb.Response
		switch t := req.GetInner().(type) {
		case *roachpb.ScanRequest:
			reply = &roachpb.ScanResponse{}

		case *roachpb.ReverseScanRequest:
			reply = &roachpb.ReverseScanResponse{}

		case *roachpb.DeleteRangeRequest:
			reply = &roachpb.DeleteRangeResponse{}

		case *roachpb.BeginTransactionRequest, *roachpb.EndTransactionRequest:
			continue

		default:
			panic(fmt.Sprintf("bad type %T", t))
		}
		union := roachpb.ResponseUnion{}
		union.MustSetInner(reply)
		br.Responses[i] = union
	}
	// Set the ResumeSpan for future batch requests.
	isReverse := ba.IsReverse()
	for i, resp := range br.Responses {
		req := ba.Requests[i].GetInner()
		if !roachpb.IsRange(req) {
			continue
		}
		hdr := resp.GetInner().Header()
		origSpan := req.Header()
		if isReverse {
			if hdr.ResumeSpan != nil {
				// The ResumeSpan.Key might be set to the StartKey of a range;
				// correctly set it to the Key of the original request span.
				hdr.ResumeSpan.Key = origSpan.Key
			} else if roachpb.RKey(origSpan.Key).Less(nextKey) {
				// Some keys have yet to be processed.
				hdr.ResumeSpan = &origSpan
				if nextKey.Less(roachpb.RKey(origSpan.EndKey)) {
					// The original span has been partially processed.
					hdr.ResumeSpan.EndKey = nextKey.AsRawKey()
				}
			}
		} else {
			if hdr.ResumeSpan != nil {
				// The ResumeSpan.EndKey might be set to the EndKey of a
				// range; correctly set it to the EndKey of the original
				// request span.
				hdr.ResumeSpan.EndKey = origSpan.EndKey
			} else if nextKey.Less(roachpb.RKey(origSpan.EndKey)) {
				// Some keys have yet to be processed.
				hdr.ResumeSpan = &origSpan
				if roachpb.RKey(origSpan.Key).Less(nextKey) {
					// The original span has been partially processed.
					hdr.ResumeSpan.Key = nextKey.AsRawKey()
				}
			}
		}
		br.Responses[i].GetInner().SetHeader(hdr)
	}
}
Exemplo n.º 2
0
// divideAndSendBatchToRanges sends the supplied batch to all of the
// ranges which comprise the span specified by rs. The batch request
// is trimmed against each range which is part of the span and sent
// either serially or in parallel, if possible. isFirst indicates
// whether this is the first time this method has been called on the
// batch. It's specified false where this method is invoked recursively.
func (ds *DistSender) divideAndSendBatchToRanges(
	ctx context.Context, ba roachpb.BatchRequest, rs roachpb.RSpan, isFirst bool,
) (br *roachpb.BatchResponse, pErr *roachpb.Error) {
	// This function builds a channel of responses for each range
	// implicated in the span (rs) and combines them into a single
	// BatchResponse when finished.
	var responseChs []chan response
	defer func() {
		for _, responseCh := range responseChs {
			resp := <-responseCh
			if resp.pErr != nil {
				if pErr == nil {
					pErr = resp.pErr
				}
				continue
			}
			if br == nil {
				// First response from a Range.
				br = resp.reply
			} else {
				// This was the second or later call in a cross-Range request.
				// Combine the new response with the existing one.
				if err := br.Combine(resp.reply); err != nil {
					pErr = roachpb.NewError(err)
					return
				}
				br.Txn.Update(resp.reply.Txn)
			}
		}

		// If we experienced an error, don't neglect to update the error's
		// attached transaction with any responses which were received.
		if pErr != nil {
			if br != nil {
				pErr.UpdateTxn(br.Txn)
			}
		}
	}()

	// Get initial seek key depending on direction of iteration.
	var seekKey roachpb.RKey
	isReverse := ba.IsReverse()
	if isReverse {
		seekKey = rs.EndKey
	} else {
		seekKey = rs.Key
	}
	// Send the request to one range per iteration.
	ri := NewRangeIterator(ds, isReverse)
	for ri.Seek(ctx, seekKey); ri.Valid(); ri.Seek(ctx, seekKey) {
		// Increase the sequence counter only once before sending RPCs to
		// the ranges involved in this chunk of the batch (as opposed to
		// for each RPC individually). On RPC errors, there's no guarantee
		// that the request hasn't made its way to the target regardless
		// of the error; we'd like the second execution to be caught by
		// the sequence cache if that happens. There is a small chance
		// that we address a range twice in this chunk (stale/suboptimal
		// descriptors due to splits/merges) which leads to a transaction
		// retry.
		//
		// TODO(tschottdorf): it's possible that if we don't evict from
		// the cache we could be in for a busy loop.
		ba.SetNewRequest()

		responseCh := make(chan response, 1)
		responseChs = append(responseChs, responseCh)

		if isFirst && ri.NeedAnother(rs) {
			// TODO(tschottdorf): we should have a mechanism for discovering
			// range merges (descriptor staleness will mostly go unnoticed),
			// or we'll be turning single-range queries into multi-range
			// queries for no good reason.
			//
			// If there's no transaction and op spans ranges, possibly
			// re-run as part of a transaction for consistency. The
			// case where we don't need to re-run is if the read
			// consistency is not required.
			if ba.Txn == nil && ba.IsPossibleTransaction() && ba.ReadConsistency != roachpb.INCONSISTENT {
				responseCh <- response{pErr: roachpb.NewError(&roachpb.OpRequiresTxnError{})}
				return
			}
			// If the request is more than but ends with EndTransaction, we
			// want the caller to come again with the EndTransaction in an
			// extra call.
			if l := len(ba.Requests) - 1; l > 0 && ba.Requests[l].GetInner().Method() == roachpb.EndTransaction {
				responseCh <- response{pErr: errNo1PCTxn}
				return
			}
		}

		// Determine next seek key, taking a potentially sparse batch into
		// consideration.
		var err error
		nextRS := rs
		if isReverse {
			// In next iteration, query previous range.
			// We use the StartKey of the current descriptor as opposed to the
			// EndKey of the previous one since that doesn't have bugs when
			// stale descriptors come into play.
			seekKey, err = prev(ba, ri.Desc().StartKey)
			nextRS.EndKey = seekKey
		} else {
			// In next iteration, query next range.
			// It's important that we use the EndKey of the current descriptor
			// as opposed to the StartKey of the next one: if the former is stale,
			// it's possible that the next range has since merged the subsequent
			// one, and unless both descriptors are stale, the next descriptor's
			// StartKey would move us to the beginning of the current range,
			// resulting in a duplicate scan.
			seekKey, err = next(ba, ri.Desc().EndKey)
			nextRS.Key = seekKey
		}
		if err != nil {
			responseCh <- response{pErr: roachpb.NewError(err)}
			return
		}

		// Send the next partial batch to the first range in the "rs" span.
		// If we're not handling a request which limits responses and we
		// can reserve one of the limited goroutines available for parallel
		// batch RPCs, send asynchronously.
		if ba.MaxSpanRequestKeys == 0 && ri.NeedAnother(rs) && ds.rpcContext != nil &&
			ds.sendPartialBatchAsync(ctx, ba, rs, ri.Desc(), ri.Token(), isFirst, responseCh) {
			// Note that we pass the batch request by value to the parallel
			// goroutine to avoid using the cloned txn.

			// Clone the txn to preserve the current txn sequence for the async call.
			if ba.Txn != nil {
				txnClone := ba.Txn.Clone()
				ba.Txn = &txnClone
			}
		} else {
			// Send synchronously if there is no parallel capacity left, there's a
			// max results limit, or this is the final request in the span.
			resp := ds.sendPartialBatch(ctx, ba, rs, ri.Desc(), ri.Token(), isFirst)
			responseCh <- resp
			if resp.pErr != nil {
				return
			}
			ba.UpdateTxn(resp.reply.Txn)

			// Check whether we've received enough responses to exit query loop.
			if ba.MaxSpanRequestKeys > 0 {
				var numResults int64
				for _, r := range resp.reply.Responses {
					numResults += r.GetInner().Header().NumKeys
				}
				if numResults > ba.MaxSpanRequestKeys {
					panic(fmt.Sprintf("received %d results, limit was %d", numResults, ba.MaxSpanRequestKeys))
				}
				ba.MaxSpanRequestKeys -= numResults
				// Exiting; fill in missing responses.
				if ba.MaxSpanRequestKeys == 0 {
					fillSkippedResponses(ba, resp.reply, seekKey)
					return
				}
			}
		}

		// Check for completion.
		if !ri.NeedAnother(rs) {
			return
		}
		isFirst = false // next range will not be first!
		rs = nextRS
	}

	// We've exited early. Return the range iterator error.
	responseCh := make(chan response, 1)
	responseCh <- response{pErr: ri.Error()}
	responseChs = append(responseChs, responseCh)
	return
}
Exemplo n.º 3
0
// sendPartialBatch sends the supplied batch to the range specified by
// desc. The batch request is first truncated so that it contains only
// requests which intersect the range descriptor and keys for each
// request are limited to the range's key span. The send occurs in a
// retry loop to handle send failures. On failure to send to any
// replicas, we backoff and retry by refetching the range
// descriptor. If the underlying range seems to have split, we
// recursively invoke divideAndSendBatchToRanges to re-enumerate the
// ranges in the span and resend to each.
func (ds *DistSender) sendPartialBatch(
	ctx context.Context,
	ba roachpb.BatchRequest,
	rs roachpb.RSpan,
	desc *roachpb.RangeDescriptor,
	evictToken *EvictionToken,
	isFirst bool,
) response {
	var reply *roachpb.BatchResponse
	var pErr *roachpb.Error
	isReverse := ba.IsReverse()

	// Truncate the request to range descriptor.
	intersected, err := rs.Intersect(desc)
	if err != nil {
		return response{pErr: roachpb.NewError(err)}
	}
	truncBA, numActive, err := truncate(ba, intersected)
	if numActive == 0 && err == nil {
		// This shouldn't happen in the wild, but some tests exercise it.
		return response{
			pErr: roachpb.NewErrorf("truncation resulted in empty batch on %s: %s", intersected, ba),
		}
	}
	if err != nil {
		return response{pErr: roachpb.NewError(err)}
	}

	// Start a retry loop for sending the batch to the range.
	for r := retry.StartWithCtx(ctx, ds.rpcRetryOptions); r.Next(); {
		// If we've cleared the descriptor on a send failure, re-lookup.
		if desc == nil {
			var descKey roachpb.RKey
			if isReverse {
				descKey = intersected.EndKey
			} else {
				descKey = intersected.Key
			}
			desc, evictToken, err = ds.getDescriptor(ctx, descKey, nil, isReverse)
			if err != nil {
				log.ErrEventf(ctx, "range descriptor re-lookup failed: %s", err)
				continue
			}
		}

		reply, pErr = ds.sendSingleRange(ctx, truncBA, desc)

		// If sending succeeded, return immediately.
		if pErr == nil {
			return response{reply: reply}
		}

		log.ErrEventf(ctx, "reply error %s: %s", ba, pErr)

		// Error handling: If the error indicates that our range
		// descriptor is out of date, evict it from the cache and try
		// again. Errors that apply only to a single replica were
		// handled in send().
		//
		// TODO(bdarnell): Don't retry endlessly. If we fail twice in a
		// row and the range descriptor hasn't changed, return the error
		// to our caller.
		switch tErr := pErr.GetDetail().(type) {
		case *roachpb.SendError:
			// We've tried all the replicas without success. Either
			// they're all down, or we're using an out-of-date range
			// descriptor. Invalidate the cache and try again with the new
			// metadata.
			log.Event(ctx, "evicting range descriptor on send error and backoff for re-lookup")
			if err := evictToken.Evict(ctx); err != nil {
				return response{pErr: roachpb.NewError(err)}
			}
			// Clear the descriptor to reload on the next attempt.
			desc = nil
			continue
		case *roachpb.RangeKeyMismatchError:
			// Range descriptor might be out of date - evict it. This is
			// likely the result of a range split. If we have new range
			// descriptors, insert them instead as long as they are different
			// from the last descriptor to avoid endless loops.
			var replacements []roachpb.RangeDescriptor
			different := func(rd *roachpb.RangeDescriptor) bool {
				return !desc.RSpan().Equal(rd.RSpan())
			}
			if tErr.MismatchedRange != nil && different(tErr.MismatchedRange) {
				replacements = append(replacements, *tErr.MismatchedRange)
			}
			if tErr.SuggestedRange != nil && different(tErr.SuggestedRange) {
				if includesFrontOfCurSpan(isReverse, tErr.SuggestedRange, rs) {
					replacements = append(replacements, *tErr.SuggestedRange)
				}
			}
			// Same as Evict() if replacements is empty.
			if err := evictToken.EvictAndReplace(ctx, replacements...); err != nil {
				return response{pErr: roachpb.NewError(err)}
			}
			// On addressing errors (likely a split), we need to re-invoke
			// the range descriptor lookup machinery, so we recurse by
			// sending batch to just the partial span this descriptor was
			// supposed to cover.
			log.VEventf(ctx, 1, "likely split; resending batch to span: %s", tErr)
			reply, pErr = ds.divideAndSendBatchToRanges(ctx, ba, intersected, isFirst)
			return response{reply: reply, pErr: pErr}
		}
		break
	}

	// Propagate error if either the retry closer or context done
	// channels were closed.
	if pErr == nil {
		if pErr = ds.deduceRetryEarlyExitError(ctx); pErr == nil {
			log.Fatal(ctx, "exited retry loop without an error")
		}
	}

	return response{pErr: pErr}
}
Exemplo n.º 4
0
// initAndVerifyBatch initializes timestamp-related information and
// verifies batch constraints before splitting.
func (ds *DistSender) initAndVerifyBatch(
	ctx context.Context, ba *roachpb.BatchRequest,
) *roachpb.Error {
	// In the event that timestamp isn't set and read consistency isn't
	// required, set the timestamp using the local clock.
	if ba.ReadConsistency == roachpb.INCONSISTENT && ba.Timestamp.Equal(hlc.ZeroTimestamp) {
		ba.Timestamp = ds.clock.Now()
	}

	if ba.Txn != nil {
		// Make a copy here since the code below modifies it in different places.
		// TODO(tschottdorf): be smarter about this - no need to do it for
		// requests that don't get split.
		txnClone := ba.Txn.Clone()
		ba.Txn = &txnClone

		if len(ba.Txn.ObservedTimestamps) == 0 {
			// Ensure the local NodeID is marked as free from clock offset;
			// the transaction's timestamp was taken off the local clock.
			if nDesc := ds.getNodeDescriptor(); nDesc != nil {
				// TODO(tschottdorf): future refactoring should move this to txn
				// creation in TxnCoordSender, which is currently unaware of the
				// NodeID (and wraps *DistSender through client.Sender since it
				// also needs test compatibility with *LocalSender).
				//
				// Taking care below to not modify any memory referenced from
				// our BatchRequest which may be shared with others.
				//
				// We already have a clone of our txn (see above), so we can
				// modify it freely.
				//
				// Zero the existing data. That makes sure that if we had
				// something of size zero but with capacity, we don't re-use the
				// existing space (which others may also use). This is just to
				// satisfy paranoia/OCD and not expected to matter in practice.
				ba.Txn.ResetObservedTimestamps()
				// OrigTimestamp is the HLC timestamp at which the Txn started, so
				// this effectively means no more uncertainty on this node.
				ba.Txn.UpdateObservedTimestamp(nDesc.NodeID, ba.Txn.OrigTimestamp)
			}
		}
	}

	if len(ba.Requests) < 1 {
		return roachpb.NewErrorf("empty batch")
	}

	if ba.MaxSpanRequestKeys != 0 {
		// Verify that the batch contains only specific range requests or the
		// Begin/EndTransactionRequest. Verify that a batch with a ReverseScan
		// only contains ReverseScan range requests.
		isReverse := ba.IsReverse()
		for _, req := range ba.Requests {
			inner := req.GetInner()
			switch inner.(type) {
			case *roachpb.ScanRequest, *roachpb.DeleteRangeRequest:
				// Accepted range requests. All other range requests are still
				// not supported.
				// TODO(vivek): don't enumerate all range requests.
				if isReverse {
					return roachpb.NewErrorf("batch with limit contains both forward and reverse scans")
				}

			case *roachpb.BeginTransactionRequest, *roachpb.EndTransactionRequest, *roachpb.ReverseScanRequest:
				continue

			default:
				return roachpb.NewErrorf("batch with limit contains %T request", inner)
			}
		}
	}

	return nil
}