Golang BatchRequest.IsReverse Examples

Programming Language: Golang

Namespace/Package Name: github.com/cockroachdb/cockroach/roachpb

Class/Type: BatchRequest

Method/Function: IsReverse

Examples at hotexamples.com: 5

Golang BatchRequest.IsReverse - 5 examples found. These are the top rated real world Golang examples of github.com/cockroachdb/cockroach/roachpb.BatchRequest.IsReverse extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Add(30)

Txn(25)

Timestamp(16)

RangeID(14)

Requests(12)

GetArg(9)

SetNewRequest(9)

CmdID(7)

Replica(7)

ReadConsistency(7)

UserPriority(7)

IsReadOnly(6)

IsReverse(5)

Split(5)

GetOrCreateCmdID(5)

GetUserPriority(5)

GetIntentSpans(4)

IntentSpanIterate(3)

MaxScanResults(3)

IsPossibleTransaction(3)

GetIntents(3)

Header(2)

GetTxn(2)

IsTransactionWrite(2)

UpdateTxn(2)

Key(2)

MaxSpanRequestKeys(1)

TraceID(1)

Example #1

Show file

File: dist_sender.go Project: xnyan/cockroach

// sendChunk is in charge of sending an "admissible" piece of batch, i.e. one
// which doesn't need to be subdivided further before going to a range (so no
// mixing of forward and reverse scans, etc). The parameters and return values
// correspond to client.Sender with the exception of the returned boolean,
// which is true when indicating that the caller should retry but needs to send
// EndTransaction in a separate request.
func (ds *DistSender) sendChunk(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error, bool) {
	isReverse := ba.IsReverse()

	trace := tracer.FromCtx(ctx)

	// The minimal key range encompassing all requests contained within.
	// Local addressing has already been resolved.
	// TODO(tschottdorf): consider rudimentary validation of the batch here
	// (for example, non-range requests with EndKey, or empty key ranges).
	rs := keys.Range(ba)
	var br *roachpb.BatchResponse
	// Send the request to one range per iteration.
	for {
		considerIntents := false
		var curReply *roachpb.BatchResponse
		var desc *roachpb.RangeDescriptor
		var needAnother bool
		var pErr *roachpb.Error
		for r := retry.Start(ds.rpcRetryOptions); r.Next(); {
			// Get range descriptor (or, when spanning range, descriptors). Our
			// error handling below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			descDone := trace.Epoch("meta descriptor lookup")
			var evictDesc func()
			desc, needAnother, evictDesc, pErr = ds.getDescriptors(rs, considerIntents, isReverse)
			descDone()

			// getDescriptors may fail retryably if the first range isn't
			// available via Gossip.
			if pErr != nil {
				if pErr.Retryable {
					if log.V(1) {
						log.Warning(pErr)
					}
					continue
				}
				break
			}

			if needAnother && br == nil {
				// TODO(tschottdorf): we should have a mechanism for discovering
				// range merges (descriptor staleness will mostly go unnoticed),
				// or we'll be turning single-range queries into multi-range
				// queries for no good reason.

				// If there's no transaction and op spans ranges, possibly
				// re-run as part of a transaction for consistency. The
				// case where we don't need to re-run is if the read
				// consistency is not required.
				if ba.Txn == nil && ba.IsPossibleTransaction() &&
					ba.ReadConsistency != roachpb.INCONSISTENT {
					return nil, roachpb.NewError(&roachpb.OpRequiresTxnError{}), false
				}
				// If the request is more than but ends with EndTransaction, we
				// want the caller to come again with the EndTransaction in an
				// extra call.
				if l := len(ba.Requests) - 1; l > 0 && ba.Requests[l].GetInner().Method() == roachpb.EndTransaction {
					return nil, roachpb.NewError(errors.New("cannot send 1PC txn to multiple ranges")), true /* shouldSplitET */
				}
			}

			// It's possible that the returned descriptor misses parts of the
			// keys it's supposed to scan after it's truncated to match the
			// descriptor. Example revscan [a,g), first desc lookup for "g"
			// returns descriptor [c,d) -> [d,g) is never scanned.
			// We evict and retry in such a case.
			if (isReverse && !desc.ContainsKeyRange(desc.StartKey, rs.EndKey)) || (!isReverse && !desc.ContainsKeyRange(rs.Key, desc.EndKey)) {
				evictDesc()
				continue
			}

			curReply, pErr = func() (*roachpb.BatchResponse, *roachpb.Error) {
				// Truncate the request to our current key range.
				intersected, iErr := rs.Intersect(desc)
				if iErr != nil {
					return nil, roachpb.NewError(iErr)
				}
				truncBA, numActive, trErr := truncate(ba, intersected)
				if numActive == 0 && trErr == nil {
					// This shouldn't happen in the wild, but some tests
					// exercise it.
					return nil, roachpb.NewErrorf("truncation resulted in empty batch on [%s,%s): %s",
						rs.Key, rs.EndKey, ba)
				}
				if trErr != nil {
					return nil, roachpb.NewError(trErr)
				}

				return ds.sendSingleRange(trace, truncBA, desc)
			}()
			// If sending succeeded, break this loop.
			if pErr == nil {
				break
			}

			if log.V(1) {
				log.Warningf("failed to invoke %s: %s", ba, pErr)
			}
			trace.Event(fmt.Sprintf("reply error: %T", pErr.GoError()))

			// Error handling below.
			// If retryable, allow retry. For range not found or range
			// key mismatch errors, we don't backoff on the retry,
			// but reset the backoff loop so we can retry immediately.
			switch tErr := pErr.GoError().(type) {
			case *roachpb.SendError:
				// For an RPC error to occur, we must've been unable to contact
				// any replicas. In this case, likely all nodes are down (or
				// not getting back to us within a reasonable amount of time).
				// We may simply not be trying to talk to the up-to-date
				// replicas, so clearing the descriptor here should be a good
				// idea.
				// TODO(tschottdorf): If a replica group goes dead, this
				// will cause clients to put high read pressure on the first
				// range, so there should be some rate limiting here.
				evictDesc()
				if tErr.CanRetry() {
					continue
				}
			case *roachpb.RangeNotFoundError, *roachpb.RangeKeyMismatchError:
				// Range descriptor might be out of date - evict it.
				evictDesc()
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				if log.V(1) {
					log.Warning(tErr)
				}
				// On retries, allow [uncommitted] intents on range descriptor
				// lookups to be returned 50% of the time in order to succeed
				// at finding the transaction record pointed to by the intent
				// itself. The 50% probability of returning either the current
				// intent or the previously committed value balances between
				// the two cases where the intent's txn hasn't yet been
				// committed (the previous value is correct), or the intent's
				// txn has been committed (the intent value is correct).
				considerIntents = true
				continue
			case *roachpb.NotLeaderError:
				newLeader := tErr.Leader
				// Verify that leader is a known replica according to the
				// descriptor. If not, we've got a stale replica; evict cache.
				// Next, cache the new leader.
				if newLeader != nil {
					if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 {
						if log.V(1) {
							log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc)
						}
						evictDesc()
					}
				} else {
					newLeader = &roachpb.ReplicaDescriptor{}
				}
				ds.updateLeaderCache(roachpb.RangeID(desc.RangeID), *newLeader)
				if log.V(1) {
					log.Warning(tErr)
				}
				r.Reset()
				continue
			case retry.Retryable:
				if tErr.CanRetry() {
					if log.V(1) {
						log.Warning(tErr)
					}
					continue
				}
			}
			break
		}

		// Immediately return if querying a range failed non-retryably.
		if pErr != nil {
			return nil, pErr, false
		}

		ba.Txn.Update(curReply.Txn)

		if br == nil {
			// First response from a Range.
			br = curReply
		} else {
			// This was the second or later call in a cross-Range request.
			// Combine the new response with the existing one.
			if err := br.Combine(curReply); err != nil {
				return nil, roachpb.NewError(err), false
			}
		}

		// If this request has a bound (such as MaxResults in
		// ScanRequest) and we are going to query at least one more range,
		// check whether enough rows have been retrieved.
		// TODO(tschottdorf): need tests for executing a multi-range batch
		// with various bounded requests which saturate at different times.
		if needAnother {
			// Start with the assumption that all requests are saturated.
			// Below, we look at each and decide whether that's true.
			// Everything that is indeed saturated is "masked out" from the
			// batch request; only if that's all requests does needAnother
			// remain false.
			needAnother = false
			if br == nil {
				// Clone ba.Requests. This is because we're multi-range, and
				// some requests may be bounded, which could lead to them being
				// masked out once they're saturated. We don't want to risk
				// removing requests that way in the "master copy" since that
				// could lead to omitting requests in certain retry scenarios.
				ba.Requests = append([]roachpb.RequestUnion(nil), ba.Requests...)
			}
			for i, union := range ba.Requests {
				args := union.GetInner()
				if _, ok := args.(*roachpb.NoopRequest); ok {
					// NoopRequests are skipped.
					continue
				}
				boundedArg, ok := args.(roachpb.Bounded)
				if !ok {
					// Non-bounded request. We will have to query all ranges.
					needAnother = true
					continue
				}
				prevBound := boundedArg.GetBound()
				cReply, ok := curReply.Responses[i].GetInner().(roachpb.Countable)
				if !ok || prevBound <= 0 {
					// Request bounded, but without max results. Again, will
					// need to query everything we can. The case in which the reply
					// isn't countable occurs when the request wasn't active for
					// that range (since it didn't apply to it), so the response
					// is a NoopResponse.
					needAnother = true
					continue
				}
				nextBound := prevBound - cReply.Count()
				if nextBound <= 0 {
					// We've hit max results for this piece of the batch. Mask
					// it out (we've copied the requests slice above, so this
					// is kosher).
					ba.Requests[i].Reset() // necessary (no one-of?)
					if !ba.Requests[i].SetValue(&roachpb.NoopRequest{}) {
						panic("RequestUnion excludes NoopRequest")
					}
					continue
				}
				// The request isn't saturated yet.
				needAnother = true
				boundedArg.SetBound(nextBound)
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if !needAnother {
			return br, nil, false
		}

		if isReverse {
			// In next iteration, query previous range.
			// We use the StartKey of the current descriptor as opposed to the
			// EndKey of the previous one since that doesn't have bugs when
			// stale descriptors come into play.
			rs.EndKey = prev(ba, desc.StartKey)
		} else {
			// In next iteration, query next range.
			// It's important that we use the EndKey of the current descriptor
			// as opposed to the StartKey of the next one: if the former is stale,
			// it's possible that the next range has since merged the subsequent
			// one, and unless both descriptors are stale, the next descriptor's
			// StartKey would move us to the beginning of the current range,
			// resulting in a duplicate scan.
			rs.Key = next(ba, desc.EndKey)
		}
		trace.Event("querying next range")
	}
}

Example #2

Show file

File: dist_sender.go Project: yaojingguo/cockroach

// fillSkippedResponses after meeting the batch key max limit for range
// requests.
func fillSkippedResponses(
	ba roachpb.BatchRequest, br *roachpb.BatchResponse, nextSpan roachpb.RSpan,
) {
	// Some requests might have NoopResponses; we must replace them with empty
	// responses of the proper type.
	for i, req := range ba.Requests {
		if _, ok := br.Responses[i].GetInner().(*roachpb.NoopResponse); !ok {
			continue
		}
		var reply roachpb.Response
		switch t := req.GetInner().(type) {
		case *roachpb.ScanRequest:
			reply = &roachpb.ScanResponse{}

		case *roachpb.ReverseScanRequest:
			reply = &roachpb.ReverseScanResponse{}

		case *roachpb.DeleteRangeRequest:
			reply = &roachpb.DeleteRangeResponse{}

		case *roachpb.BeginTransactionRequest, *roachpb.EndTransactionRequest:
			continue

		default:
			panic(fmt.Sprintf("bad type %T", t))
		}
		union := roachpb.ResponseUnion{}
		union.MustSetInner(reply)
		br.Responses[i] = union
	}
	// Set the ResumeSpan for future batch requests.
	isReverse := ba.IsReverse()
	for i, resp := range br.Responses {
		req := ba.Requests[i].GetInner()
		if !roachpb.IsRange(req) {
			continue
		}
		hdr := resp.GetInner().Header()
		origSpan := req.Header()
		if isReverse {
			if hdr.ResumeSpan != nil {
				// The ResumeSpan.Key might be set to the StartKey of a range;
				// correctly set it to the Key of the original request span.
				hdr.ResumeSpan.Key = origSpan.Key
			} else if roachpb.RKey(origSpan.Key).Less(nextSpan.EndKey) {
				// Some keys have yet to be processed.
				hdr.ResumeSpan = &origSpan
				if nextSpan.EndKey.Less(roachpb.RKey(origSpan.EndKey)) {
					// The original span has been partially processed.
					hdr.ResumeSpan.EndKey = nextSpan.EndKey.AsRawKey()
				}
			}
		} else {
			if hdr.ResumeSpan != nil {
				// The ResumeSpan.EndKey might be set to the EndKey of a
				// range; correctly set it to the EndKey of the original
				// request span.
				hdr.ResumeSpan.EndKey = origSpan.EndKey
			} else if nextSpan.Key.Less(roachpb.RKey(origSpan.EndKey)) {
				// Some keys have yet to be processed.
				hdr.ResumeSpan = &origSpan
				if roachpb.RKey(origSpan.Key).Less(nextSpan.Key) {
					// The original span has been partially processed.
					hdr.ResumeSpan.Key = nextSpan.Key.AsRawKey()
				}
			}
		}
		br.Responses[i].GetInner().SetHeader(hdr)
	}
}

Example #3

Show file

File: dist_sender.go Project: yaojingguo/cockroach

// Send implements the batch.Sender interface. It subdivides
// the Batch into batches admissible for sending (preventing certain
// illegal mixtures of requests), executes each individual part
// (which may span multiple ranges), and recombines the response.
// When the request spans ranges, it is split up and the corresponding
// ranges queried serially, in ascending order.
// In particular, the first write in a transaction may not be part of the first
// request sent. This is relevant since the first write is a BeginTransaction
// request, thus opening up a window of time during which there may be intents
// of a transaction, but no entry. Pushing such a transaction will succeed, and
// may lead to the transaction being aborted early.
func (ds *DistSender) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
	tracing.AnnotateTrace()

	// In the event that timestamp isn't set and read consistency isn't
	// required, set the timestamp using the local clock.
	if ba.ReadConsistency == roachpb.INCONSISTENT && ba.Timestamp.Equal(hlc.ZeroTimestamp) {
		ba.Timestamp = ds.clock.Now()
	}

	if ba.Txn != nil {
		// Make a copy here since the code below modifies it in different places.
		// TODO(tschottdorf): be smarter about this - no need to do it for
		// requests that don't get split.
		txnClone := ba.Txn.Clone()
		ba.Txn = &txnClone

		if len(ba.Txn.ObservedTimestamps) == 0 {
			// Ensure the local NodeID is marked as free from clock offset;
			// the transaction's timestamp was taken off the local clock.
			if nDesc := ds.getNodeDescriptor(); nDesc != nil {
				// TODO(tschottdorf): future refactoring should move this to txn
				// creation in TxnCoordSender, which is currently unaware of the
				// NodeID (and wraps *DistSender through client.Sender since it
				// also needs test compatibility with *LocalSender).
				//
				// Taking care below to not modify any memory referenced from
				// our BatchRequest which may be shared with others.
				//
				// We already have a clone of our txn (see above), so we can
				// modify it freely.
				//
				// Zero the existing data. That makes sure that if we had
				// something of size zero but with capacity, we don't re-use the
				// existing space (which others may also use). This is just to
				// satisfy paranoia/OCD and not expected to matter in practice.
				ba.Txn.ResetObservedTimestamps()
				// OrigTimestamp is the HLC timestamp at which the Txn started, so
				// this effectively means no more uncertainty on this node.
				ba.Txn.UpdateObservedTimestamp(nDesc.NodeID, ba.Txn.OrigTimestamp)
			}
		}
	}

	if len(ba.Requests) < 1 {
		panic("empty batch")
	}

	if ba.MaxSpanRequestKeys != 0 {
		// Verify that the batch contains only specific range requests or the
		// Begin/EndTransactionRequest. Verify that a batch with a ReverseScan
		// only contains ReverseScan range requests.
		isReverse := ba.IsReverse()
		for _, req := range ba.Requests {
			inner := req.GetInner()
			switch inner.(type) {
			case *roachpb.ScanRequest, *roachpb.DeleteRangeRequest:
				// Accepted range requests. All other range requests are still
				// not supported.
				// TODO(vivek): don't enumerate all range requests.
				if isReverse {
					return nil, roachpb.NewErrorf("batch with limit contains both forward and reverse scans")
				}

			case *roachpb.BeginTransactionRequest, *roachpb.EndTransactionRequest, *roachpb.ReverseScanRequest:
				continue

			default:
				return nil, roachpb.NewErrorf("batch with limit contains %T request", inner)
			}
		}
	}

	var rplChunks []*roachpb.BatchResponse
	parts := ba.Split(false /* don't split ET */)
	if len(parts) > 1 && ba.MaxSpanRequestKeys != 0 {
		// We already verified above that the batch contains only scan requests of the same type.
		// Such a batch should never need splitting.
		panic("batch with MaxSpanRequestKeys needs splitting")
	}
	for len(parts) > 0 {
		part := parts[0]
		ba.Requests = part
		rpl, pErr, shouldSplitET := ds.sendChunk(ctx, ba)
		if shouldSplitET {
			// If we tried to send a single round-trip EndTransaction but
			// it looks like it's going to hit multiple ranges, split it
			// here and try again.
			if len(parts) != 1 {
				panic("EndTransaction not in last chunk of batch")
			}
			parts = ba.Split(true /* split ET */)
			if len(parts) != 2 {
				panic("split of final EndTransaction chunk resulted in != 2 parts")
			}
			continue
		}
		if pErr != nil {
			return nil, pErr
		}
		// Propagate transaction from last reply to next request. The final
		// update is taken and put into the response's main header.
		ba.UpdateTxn(rpl.Txn)
		rplChunks = append(rplChunks, rpl)
		parts = parts[1:]
	}

	reply := rplChunks[0]
	for _, rpl := range rplChunks[1:] {
		reply.Responses = append(reply.Responses, rpl.Responses...)
		reply.CollectedSpans = append(reply.CollectedSpans, rpl.CollectedSpans...)
	}
	reply.BatchResponse_Header = rplChunks[len(rplChunks)-1].BatchResponse_Header
	return reply, nil
}

Example #4

Show file

File: dist_sender.go Project: yaojingguo/cockroach

// sendChunk is in charge of sending an "admissible" piece of batch, i.e. one
// which doesn't need to be subdivided further before going to a range (so no
// mixing of forward and reverse scans, etc). The parameters and return values
// correspond to client.Sender with the exception of the returned boolean,
// which is true when indicating that the caller should retry but needs to send
// EndTransaction in a separate request.
func (ds *DistSender) sendChunk(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error, bool) {
	isReverse := ba.IsReverse()

	// TODO(radu): when contexts are properly plumbed, we should be able to get
	// the tracer from ctx, not from the DistSender.
	ctx, cleanup := tracing.EnsureContext(ctx, tracing.TracerFromCtx(ds.Ctx))
	defer cleanup()

	// The minimal key range encompassing all requests contained within.
	// Local addressing has already been resolved.
	// TODO(tschottdorf): consider rudimentary validation of the batch here
	// (for example, non-range requests with EndKey, or empty key ranges).
	rs, err := keys.Range(ba)
	if err != nil {
		return nil, roachpb.NewError(err), false
	}
	var br *roachpb.BatchResponse

	// Send the request to one range per iteration.
	for {
		// Increase the sequence counter only once before sending RPCs to
		// the ranges involved in this chunk of the batch (as opposed to for
		// each RPC individually). On RPC errors, there's no guarantee that
		// the request hasn't made its way to the target regardless of the
		// error; we'd like the second execution to be caught by the sequence
		// cache if that happens. There is a small chance that that we address
		// a range twice in this chunk (stale/suboptimal descriptors due to
		// splits/merges) which leads to a transaction retry.
		// TODO(tschottdorf): it's possible that if we don't evict from the
		//   cache we could be in for a busy loop.
		ba.SetNewRequest()

		var curReply *roachpb.BatchResponse
		var desc *roachpb.RangeDescriptor
		var evictToken *evictionToken
		var needAnother bool
		var pErr *roachpb.Error
		var finished bool
		var numAttempts int
		for r := retry.StartWithCtx(ctx, ds.rpcRetryOptions); r.Next(); {
			numAttempts++
			{
				const magicLogCurAttempt = 20

				var seq int32
				if ba.Txn != nil {
					seq = ba.Txn.Sequence
				}

				if numAttempts%magicLogCurAttempt == 0 || seq%magicLogCurAttempt == 0 {
					// Log a message if a request appears to get stuck for a long
					// time or, potentially, forever. See #8975.
					// The local counter captures this loop here; the Sequence number
					// should capture anything higher up (as it needs to be
					// incremented every time this method is called).
					log.Warningf(
						ctx,
						"%d retries for an RPC at sequence %d, last error was: %s, remaining key ranges %s: %s",
						numAttempts, seq, pErr, rs, ba,
					)
				}
			}
			// Get range descriptor (or, when spanning range, descriptors). Our
			// error handling below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			log.Trace(ctx, "meta descriptor lookup")
			var err error
			desc, needAnother, evictToken, err = ds.getDescriptors(ctx, rs, evictToken, isReverse)

			// getDescriptors may fail retryably if, for example, the first
			// range isn't available via Gossip. Assume that all errors at
			// this level are retryable. Non-retryable errors would be for
			// things like malformed requests which we should have checked
			// for before reaching this point.
			if err != nil {
				log.Trace(ctx, "range descriptor lookup failed: "+err.Error())
				if log.V(1) {
					log.Warning(ctx, err)
				}
				pErr = roachpb.NewError(err)
				continue
			}

			if needAnother && br == nil {
				// TODO(tschottdorf): we should have a mechanism for discovering
				// range merges (descriptor staleness will mostly go unnoticed),
				// or we'll be turning single-range queries into multi-range
				// queries for no good reason.

				// If there's no transaction and op spans ranges, possibly
				// re-run as part of a transaction for consistency. The
				// case where we don't need to re-run is if the read
				// consistency is not required.
				if ba.Txn == nil && ba.IsPossibleTransaction() &&
					ba.ReadConsistency != roachpb.INCONSISTENT {
					return nil, roachpb.NewError(&roachpb.OpRequiresTxnError{}), false
				}
				// If the request is more than but ends with EndTransaction, we
				// want the caller to come again with the EndTransaction in an
				// extra call.
				if l := len(ba.Requests) - 1; l > 0 && ba.Requests[l].GetInner().Method() == roachpb.EndTransaction {
					return nil, roachpb.NewError(errors.New("cannot send 1PC txn to multiple ranges")), true /* shouldSplitET */
				}
			}

			// It's possible that the returned descriptor misses parts of the
			// keys it's supposed to scan after it's truncated to match the
			// descriptor. Example revscan [a,g), first desc lookup for "g"
			// returns descriptor [c,d) -> [d,g) is never scanned.
			// We evict and retry in such a case.
			includesFrontOfCurSpan := func(rd *roachpb.RangeDescriptor) bool {
				if isReverse {
					return desc.ContainsExclusiveEndKey(rs.EndKey)
				}
				return desc.ContainsKey(rs.Key)
			}
			if !includesFrontOfCurSpan(desc) {
				if err := evictToken.Evict(ctx); err != nil {
					return nil, roachpb.NewError(err), false
				}
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				continue
			}

			curReply, pErr = func() (*roachpb.BatchResponse, *roachpb.Error) {
				// Truncate the request to our current key range.
				intersected, iErr := rs.Intersect(desc)
				if iErr != nil {
					return nil, roachpb.NewError(iErr)
				}
				truncBA, numActive, trErr := truncate(ba, intersected)
				if numActive == 0 && trErr == nil {
					// This shouldn't happen in the wild, but some tests
					// exercise it.
					return nil, roachpb.NewErrorf("truncation resulted in empty batch on [%s,%s): %s",
						rs.Key, rs.EndKey, ba)
				}
				if trErr != nil {
					return nil, roachpb.NewError(trErr)
				}
				return ds.sendSingleRange(ctx, truncBA, desc)
			}()
			// If sending succeeded, break this loop.
			if pErr == nil {
				finished = true
				break
			}

			log.VTracef(1, ctx, "reply error %s: %s", ba, pErr)

			// Error handling: If the error indicates that our range
			// descriptor is out of date, evict it from the cache and try
			// again. Errors that apply only to a single replica were
			// handled in send().
			//
			// TODO(bdarnell): Don't retry endlessly. If we fail twice in a
			// row and the range descriptor hasn't changed, return the error
			// to our caller.
			switch tErr := pErr.GetDetail().(type) {
			case *roachpb.SendError:
				// We've tried all the replicas without success. Either
				// they're all down, or we're using an out-of-date range
				// descriptor. Invalidate the cache and try again with the new
				// metadata.
				if err := evictToken.Evict(ctx); err != nil {
					return nil, roachpb.NewError(err), false
				}
				continue
			case *roachpb.RangeKeyMismatchError:
				// Range descriptor might be out of date - evict it. This is
				// likely the result of a range split. If we have new range
				// descriptors, insert them instead as long as they are different
				// from the last descriptor to avoid endless loops.
				var replacements []roachpb.RangeDescriptor
				different := func(rd *roachpb.RangeDescriptor) bool {
					return !desc.RSpan().Equal(rd.RSpan())
				}
				if tErr.MismatchedRange != nil && different(tErr.MismatchedRange) {
					replacements = append(replacements, *tErr.MismatchedRange)
				}
				if tErr.SuggestedRange != nil && different(tErr.SuggestedRange) {
					if includesFrontOfCurSpan(tErr.SuggestedRange) {
						replacements = append(replacements, *tErr.SuggestedRange)

					}
				}
				// Same as Evict() if replacements is empty.
				if err := evictToken.EvictAndReplace(ctx, replacements...); err != nil {
					return nil, roachpb.NewError(err), false
				}
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				if log.V(1) {
					log.Warning(ctx, tErr)
				}
				continue
			}
			break
		}

		// Immediately return if querying a range failed non-retryably.
		if pErr != nil {
			return nil, pErr, false
		} else if !finished {
			select {
			case <-ds.rpcRetryOptions.Closer:
				return nil, roachpb.NewError(&roachpb.NodeUnavailableError{}), false
			case <-ctx.Done():
				return nil, roachpb.NewError(ctx.Err()), false
			default:
				log.Fatal(ctx, "exited retry loop with nil error but finished=false")
			}
		}

		ba.UpdateTxn(curReply.Txn)

		if br == nil {
			// First response from a Range.
			br = curReply
		} else {
			// This was the second or later call in a cross-Range request.
			// Combine the new response with the existing one.
			if err := br.Combine(curReply); err != nil {
				return nil, roachpb.NewError(err), false
			}
		}

		if isReverse {
			// In next iteration, query previous range.
			// We use the StartKey of the current descriptor as opposed to the
			// EndKey of the previous one since that doesn't have bugs when
			// stale descriptors come into play.
			rs.EndKey, err = prev(ba, desc.StartKey)
		} else {
			// In next iteration, query next range.
			// It's important that we use the EndKey of the current descriptor
			// as opposed to the StartKey of the next one: if the former is stale,
			// it's possible that the next range has since merged the subsequent
			// one, and unless both descriptors are stale, the next descriptor's
			// StartKey would move us to the beginning of the current range,
			// resulting in a duplicate scan.
			rs.Key, err = next(ba, desc.EndKey)
		}
		if err != nil {
			return nil, roachpb.NewError(err), false
		}

		if ba.MaxSpanRequestKeys > 0 {
			// Count how many results we received.
			var numResults int64
			for _, resp := range curReply.Responses {
				numResults += resp.GetInner().Header().NumKeys
			}
			if numResults > ba.MaxSpanRequestKeys {
				panic(fmt.Sprintf("received %d results, limit was %d", numResults, ba.MaxSpanRequestKeys))
			}
			ba.MaxSpanRequestKeys -= numResults
			if ba.MaxSpanRequestKeys == 0 {
				// prepare the batch response after meeting the max key limit.
				fillSkippedResponses(ba, br, rs)
				// done, exit loop.
				return br, nil, false
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if !needAnother {
			return br, nil, false
		}

		// key cannot be less that the end key.
		if !rs.Key.Less(rs.EndKey) {
			panic(fmt.Sprintf("start key %s is less than %s", rs.Key, rs.EndKey))
		}

		log.Trace(ctx, "querying next range")
	}
}

Example #5

Show file

File: dist_sender.go Project: chzyer-dev/cockroach

// sendChunk is in charge of sending an "admissible" piece of batch, i.e. one
// which doesn't need to be subdivided further before going to a range (so no
// mixing of forward and reverse scans, etc). The parameters and return values
// correspond to client.Sender with the exception of the returned boolean,
// which is true when indicating that the caller should retry but needs to send
// EndTransaction in a separate request.
func (ds *DistSender) sendChunk(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error, bool) {
	isReverse := ba.IsReverse()

	ctx, cleanup := tracing.EnsureContext(ctx, ds.Tracer)
	defer cleanup()

	// The minimal key range encompassing all requests contained within.
	// Local addressing has already been resolved.
	// TODO(tschottdorf): consider rudimentary validation of the batch here
	// (for example, non-range requests with EndKey, or empty key ranges).
	rs, err := keys.Range(ba)
	if err != nil {
		return nil, roachpb.NewError(err), false
	}
	var br *roachpb.BatchResponse

	// Send the request to one range per iteration.
	for {
		// Increase the sequence counter only once before sending RPCs to
		// the ranges involved in this chunk of the batch (as opposed to for
		// each RPC individually). On RPC errors, there's no guarantee that
		// the request hasn't made its way to the target regardless of the
		// error; we'd like the second execution to be caught by the sequence
		// cache if that happens. There is a small chance that that we address
		// a range twice in this chunk (stale/suboptimal descriptors due to
		// splits/merges) which leads to a transaction retry.
		// TODO(tschottdorf): it's possible that if we don't evict from the
		//   cache we could be in for a busy loop.
		ba.SetNewRequest()

		var curReply *roachpb.BatchResponse
		var desc *roachpb.RangeDescriptor
		var evictToken evictionToken
		var needAnother bool
		var pErr *roachpb.Error
		var finished bool
		for r := retry.Start(ds.rpcRetryOptions); r.Next(); {
			// Get range descriptor (or, when spanning range, descriptors). Our
			// error handling below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			log.Trace(ctx, "meta descriptor lookup")
			desc, needAnother, evictToken, pErr = ds.getDescriptors(rs, evictToken, isReverse)

			// getDescriptors may fail retryably if the first range isn't
			// available via Gossip.
			if pErr != nil {
				log.Trace(ctx, "range descriptor lookup failed: "+pErr.String())
				if pErr.Retryable {
					if log.V(1) {
						log.Warning(pErr)
					}
					continue
				}
				break
			} else {
				log.Trace(ctx, "looked up range descriptor")
			}

			if needAnother && br == nil {
				// TODO(tschottdorf): we should have a mechanism for discovering
				// range merges (descriptor staleness will mostly go unnoticed),
				// or we'll be turning single-range queries into multi-range
				// queries for no good reason.

				// If there's no transaction and op spans ranges, possibly
				// re-run as part of a transaction for consistency. The
				// case where we don't need to re-run is if the read
				// consistency is not required.
				if ba.Txn == nil && ba.IsPossibleTransaction() &&
					ba.ReadConsistency != roachpb.INCONSISTENT {
					return nil, roachpb.NewError(&roachpb.OpRequiresTxnError{}), false
				}
				// If the request is more than but ends with EndTransaction, we
				// want the caller to come again with the EndTransaction in an
				// extra call.
				if l := len(ba.Requests) - 1; l > 0 && ba.Requests[l].GetInner().Method() == roachpb.EndTransaction {
					return nil, roachpb.NewError(errors.New("cannot send 1PC txn to multiple ranges")), true /* shouldSplitET */
				}
			}

			// It's possible that the returned descriptor misses parts of the
			// keys it's supposed to scan after it's truncated to match the
			// descriptor. Example revscan [a,g), first desc lookup for "g"
			// returns descriptor [c,d) -> [d,g) is never scanned.
			// We evict and retry in such a case.
			includesFrontOfCurSpan := func(rd *roachpb.RangeDescriptor) bool {
				if isReverse {
					// This approach is needed because rs.EndKey is exclusive.
					return desc.ContainsKeyRange(desc.StartKey, rs.EndKey)
				}
				return desc.ContainsKey(rs.Key)
			}
			if !includesFrontOfCurSpan(desc) {
				if err := evictToken.Evict(); err != nil {
					return nil, roachpb.NewError(err), false
				}
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				continue
			}

			curReply, pErr = func() (*roachpb.BatchResponse, *roachpb.Error) {
				// Truncate the request to our current key range.
				intersected, iErr := rs.Intersect(desc)
				if iErr != nil {
					return nil, roachpb.NewError(iErr)
				}
				truncBA, numActive, trErr := truncate(ba, intersected)
				if numActive == 0 && trErr == nil {
					// This shouldn't happen in the wild, but some tests
					// exercise it.
					return nil, roachpb.NewErrorf("truncation resulted in empty batch on [%s,%s): %s",
						rs.Key, rs.EndKey, ba)
				}
				if trErr != nil {
					return nil, roachpb.NewError(trErr)
				}
				return ds.sendSingleRange(ctx, truncBA, desc)
			}()
			// If sending succeeded, break this loop.
			if pErr == nil {
				finished = true
				break
			}

			if log.V(1) {
				log.Warningf("failed to invoke %s: %s", ba, pErr)
			}
			log.Trace(ctx, fmt.Sprintf("reply error: %T", pErr.GetDetail()))

			// Error handling below.
			// If retryable, allow retry. For range not found or range
			// key mismatch errors, we don't backoff on the retry,
			// but reset the backoff loop so we can retry immediately.
			switch tErr := pErr.GetDetail().(type) {
			case *roachpb.SendError:
				// For an RPC error to occur, we must've been unable to contact
				// any replicas. In this case, likely all nodes are down (or
				// not getting back to us within a reasonable amount of time).
				// We may simply not be trying to talk to the up-to-date
				// replicas, so clearing the descriptor here should be a good
				// idea.
				if err := evictToken.Evict(); err != nil {
					return nil, roachpb.NewError(err), false
				}
				if tErr.CanRetry() {
					continue
				}
			case *roachpb.RangeNotFoundError:
				// Range descriptor might be out of date - evict it. This is
				// likely the result of a rebalance.
				if err := evictToken.Evict(); err != nil {
					return nil, roachpb.NewError(err), false
				}
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				if log.V(1) {
					log.Warning(tErr)
				}
				continue
			case *roachpb.RangeKeyMismatchError:
				// Range descriptor might be out of date - evict it. This is
				// likely the result of a range split. If we have new range
				// descriptors, insert them instead as long as they are different
				// from the last descriptor to avoid endless loops.
				var replacements []roachpb.RangeDescriptor
				different := func(rd *roachpb.RangeDescriptor) bool {
					return !desc.RSpan().Equal(rd.RSpan())
				}
				if tErr.MismatchedRange != nil && different(tErr.MismatchedRange) {
					replacements = append(replacements, *tErr.MismatchedRange)
				}
				if tErr.SuggestedRange != nil && different(tErr.SuggestedRange) {
					if includesFrontOfCurSpan(tErr.SuggestedRange) {
						replacements = append(replacements, *tErr.SuggestedRange)

					}
				}
				// Same as Evict() if replacements is empty.
				if err := evictToken.EvictAndReplace(replacements...); err != nil {
					return nil, roachpb.NewError(err), false
				}
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				if log.V(1) {
					log.Warning(tErr)
				}
				continue
			case *roachpb.NotLeaderError:
				newLeader := tErr.Leader
				if newLeader != nil {
					// Verify that leader is a known replica according to the
					// descriptor. If not, we've got a stale range descriptor;
					// evict cache.
					if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 {
						if log.V(1) {
							log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc)
						}
						if err := evictToken.Evict(); err != nil {
							return nil, roachpb.NewError(err), false
						}
					}
				} else {
					// If the new leader is unknown, we were talking to a
					// replica that is partitioned away from the majority. Our
					// range descriptor may be stale, so clear the cache.
					//
					// TODO(bdarnell): An unknown-leader error doesn't
					// necessarily mean our descriptor is stale. Ideally we
					// would treat these errors more like SendError: retry on
					// another node (at a lower level), and then if it reaches
					// this level then we know we've exhausted our options and
					// must clear the cache.
					if err := evictToken.Evict(); err != nil {
						return nil, roachpb.NewError(err), false
					}
					newLeader = &roachpb.ReplicaDescriptor{}
				}
				// Next, cache the new leader.
				ds.updateLeaderCache(roachpb.RangeID(desc.RangeID), *newLeader)
				if log.V(1) {
					log.Warning(tErr)
				}
				r.Reset()
				continue
			case retry.Retryable:
				if tErr.CanRetry() {
					if log.V(1) {
						log.Warning(tErr)
					}
					continue
				}
			}
			break
		}

		// Immediately return if querying a range failed non-retryably.
		if pErr != nil {
			return nil, pErr, false
		} else if !finished {
			select {
			case <-ds.rpcRetryOptions.Closer:
				return nil, roachpb.NewError(&roachpb.NodeUnavailableError{}), false
			default:
				log.Fatal("exited retry loop with nil error but finished=false")
			}
		}

		ba.Txn.Update(curReply.Txn)

		if br == nil {
			// First response from a Range.
			br = curReply
		} else {
			// This was the second or later call in a cross-Range request.
			// Combine the new response with the existing one.
			if err := br.Combine(curReply); err != nil {
				return nil, roachpb.NewError(err), false
			}
		}

		if ba.MaxScanResults > 0 {
			// Count how many results we received.
			var numResults int64
			for _, resp := range curReply.Responses {
				if cResp, ok := resp.GetInner().(roachpb.Countable); ok {
					numResults += cResp.Count()
				}
			}
			if numResults > ba.MaxScanResults {
				panic(fmt.Sprintf("received %d results, limit was %d", numResults, ba.MaxScanResults))
			}
			ba.MaxScanResults -= numResults
			if ba.MaxScanResults == 0 {
				// We are done with this batch. Some requests might have NoopResponses; we must
				// replace them with empty responses of the proper type.
				for i, req := range ba.Requests {
					if _, ok := br.Responses[i].GetInner().(*roachpb.NoopResponse); !ok {
						continue
					}
					union := roachpb.ResponseUnion{}
					var reply roachpb.Response
					if _, ok := req.GetInner().(*roachpb.ScanRequest); ok {
						reply = &roachpb.ScanResponse{}
					} else {
						_ = req.GetInner().(*roachpb.ReverseScanRequest)
						reply = &roachpb.ReverseScanResponse{}
					}
					union.MustSetInner(reply)
					br.Responses[i] = union
				}
				return br, nil, false
			}
		}

		// If this request has a bound (such as MaxResults in
		// ScanRequest) and we are going to query at least one more range,
		// check whether enough rows have been retrieved.
		// TODO(tschottdorf): need tests for executing a multi-range batch
		// with various bounded requests which saturate at different times.
		if needAnother {
			// Start with the assumption that all requests are saturated.
			// Below, we look at each and decide whether that's true.
			// Everything that is indeed saturated is "masked out" from the
			// batch request; only if that's all requests does needAnother
			// remain false.
			needAnother = false
			if br == nil {
				// Clone ba.Requests. This is because we're multi-range, and
				// some requests may be bounded, which could lead to them being
				// masked out once they're saturated. We don't want to risk
				// removing requests that way in the "master copy" since that
				// could lead to omitting requests in certain retry scenarios.
				ba.Requests = append([]roachpb.RequestUnion(nil), ba.Requests...)
			}
			for i, union := range ba.Requests {
				args := union.GetInner()
				if _, ok := args.(*roachpb.NoopRequest); ok {
					// NoopRequests are skipped.
					continue
				}
				boundedArg, ok := args.(roachpb.Bounded)
				if !ok {
					// Non-bounded request. We will have to query all ranges.
					needAnother = true
					continue
				}
				prevBound := boundedArg.GetBound()
				cReply, ok := curReply.Responses[i].GetInner().(roachpb.Countable)
				if !ok || prevBound <= 0 {
					// Request bounded, but without max results. Again, will
					// need to query everything we can. The case in which the reply
					// isn't countable occurs when the request wasn't active for
					// that range (since it didn't apply to it), so the response
					// is a NoopResponse.
					needAnother = true
					continue
				}
				nextBound := prevBound - cReply.Count()
				if nextBound <= 0 {
					// We've hit max results for this piece of the batch. Mask
					// it out (we've copied the requests slice above, so this
					// is kosher).
					union := &ba.Requests[i] // avoid working on copy
					union.MustSetInner(&noopRequest)
					continue
				}
				// The request isn't saturated yet.
				needAnother = true
				boundedArg.SetBound(nextBound)
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if !needAnother {
			return br, nil, false
		}

		if isReverse {
			// In next iteration, query previous range.
			// We use the StartKey of the current descriptor as opposed to the
			// EndKey of the previous one since that doesn't have bugs when
			// stale descriptors come into play.
			rs.EndKey, err = prev(ba, desc.StartKey)
		} else {
			// In next iteration, query next range.
			// It's important that we use the EndKey of the current descriptor
			// as opposed to the StartKey of the next one: if the former is stale,
			// it's possible that the next range has since merged the subsequent
			// one, and unless both descriptors are stale, the next descriptor's
			// StartKey would move us to the beginning of the current range,
			// resulting in a duplicate scan.
			rs.Key, err = next(ba, desc.EndKey)
		}
		if err != nil {
			return nil, roachpb.NewError(err), false
		}
		log.Trace(ctx, "querying next range")
	}
}