// sendPartialBatch sends the supplied batch to the range specified by // desc. The batch request is first truncated so that it contains only // requests which intersect the range descriptor and keys for each // request are limited to the range's key span. The send occurs in a // retry loop to handle send failures. On failure to send to any // replicas, we backoff and retry by refetching the range // descriptor. If the underlying range seems to have split, we // recursively invoke divideAndSendBatchToRanges to re-enumerate the // ranges in the span and resend to each. func (ds *DistSender) sendPartialBatch( ctx context.Context, ba roachpb.BatchRequest, rs roachpb.RSpan, desc *roachpb.RangeDescriptor, evictToken *EvictionToken, isFirst bool, ) response { var reply *roachpb.BatchResponse var pErr *roachpb.Error isReverse := ba.IsReverse() // Truncate the request to range descriptor. intersected, err := rs.Intersect(desc) if err != nil { return response{pErr: roachpb.NewError(err)} } truncBA, numActive, err := truncate(ba, intersected) if numActive == 0 && err == nil { // This shouldn't happen in the wild, but some tests exercise it. return response{ pErr: roachpb.NewErrorf("truncation resulted in empty batch on %s: %s", intersected, ba), } } if err != nil { return response{pErr: roachpb.NewError(err)} } // Start a retry loop for sending the batch to the range. for r := retry.StartWithCtx(ctx, ds.rpcRetryOptions); r.Next(); { // If we've cleared the descriptor on a send failure, re-lookup. if desc == nil { var descKey roachpb.RKey if isReverse { descKey = intersected.EndKey } else { descKey = intersected.Key } desc, evictToken, err = ds.getDescriptor(ctx, descKey, nil, isReverse) if err != nil { log.ErrEventf(ctx, "range descriptor re-lookup failed: %s", err) continue } } reply, pErr = ds.sendSingleRange(ctx, truncBA, desc) // If sending succeeded, return immediately. if pErr == nil { return response{reply: reply} } log.ErrEventf(ctx, "reply error %s: %s", ba, pErr) // Error handling: If the error indicates that our range // descriptor is out of date, evict it from the cache and try // again. Errors that apply only to a single replica were // handled in send(). // // TODO(bdarnell): Don't retry endlessly. If we fail twice in a // row and the range descriptor hasn't changed, return the error // to our caller. switch tErr := pErr.GetDetail().(type) { case *roachpb.SendError: // We've tried all the replicas without success. Either // they're all down, or we're using an out-of-date range // descriptor. Invalidate the cache and try again with the new // metadata. log.Event(ctx, "evicting range descriptor on send error and backoff for re-lookup") if err := evictToken.Evict(ctx); err != nil { return response{pErr: roachpb.NewError(err)} } // Clear the descriptor to reload on the next attempt. desc = nil continue case *roachpb.RangeKeyMismatchError: // Range descriptor might be out of date - evict it. This is // likely the result of a range split. If we have new range // descriptors, insert them instead as long as they are different // from the last descriptor to avoid endless loops. var replacements []roachpb.RangeDescriptor different := func(rd *roachpb.RangeDescriptor) bool { return !desc.RSpan().Equal(rd.RSpan()) } if tErr.MismatchedRange != nil && different(tErr.MismatchedRange) { replacements = append(replacements, *tErr.MismatchedRange) } if tErr.SuggestedRange != nil && different(tErr.SuggestedRange) { if includesFrontOfCurSpan(isReverse, tErr.SuggestedRange, rs) { replacements = append(replacements, *tErr.SuggestedRange) } } // Same as Evict() if replacements is empty. if err := evictToken.EvictAndReplace(ctx, replacements...); err != nil { return response{pErr: roachpb.NewError(err)} } // On addressing errors (likely a split), we need to re-invoke // the range descriptor lookup machinery, so we recurse by // sending batch to just the partial span this descriptor was // supposed to cover. log.VEventf(ctx, 1, "likely split; resending batch to span: %s", tErr) reply, pErr = ds.divideAndSendBatchToRanges(ctx, ba, intersected, isFirst) return response{reply: reply, pErr: pErr} } break } // Propagate error if either the retry closer or context done // channels were closed. if pErr == nil { if pErr = ds.deduceRetryEarlyExitError(ctx); pErr == nil { log.Fatal(ctx, "exited retry loop without an error") } } return response{pErr: pErr} }