// handlePerReplicaError returns true if the given error is likely to // be unique to the replica that reported it, and retrying on other // replicas is likely to produce different results. This method should // be called only once for each error as it may have side effects such // as updating caches. func (ds *DistSender) handlePerReplicaError( ctx context.Context, transport Transport, rangeID roachpb.RangeID, pErr *roachpb.Error, ) bool { switch tErr := pErr.GetDetail().(type) { case *roachpb.RangeNotFoundError: return true case *roachpb.NodeUnavailableError: return true case *roachpb.NotLeaseHolderError: if tErr.LeaseHolder != nil { // If the replica we contacted knows the new lease holder, update the cache. leaseHolder := *tErr.LeaseHolder ds.updateLeaseHolderCache(ctx, rangeID, leaseHolder) // Move the new lease holder to the head of the queue for the next retry. transport.MoveToFront(leaseHolder) } return true } return false }
// sendPartialBatch sends the supplied batch to the range specified by // desc. The batch request is first truncated so that it contains only // requests which intersect the range descriptor and keys for each // request are limited to the range's key span. The send occurs in a // retry loop to handle send failures. On failure to send to any // replicas, we backoff and retry by refetching the range // descriptor. If the underlying range seems to have split, we // recursively invoke divideAndSendBatchToRanges to re-enumerate the // ranges in the span and resend to each. func (ds *DistSender) sendPartialBatch( ctx context.Context, ba roachpb.BatchRequest, rs roachpb.RSpan, desc *roachpb.RangeDescriptor, evictToken *EvictionToken, isFirst bool, ) response { var reply *roachpb.BatchResponse var pErr *roachpb.Error isReverse := ba.IsReverse() // Truncate the request to range descriptor. intersected, err := rs.Intersect(desc) if err != nil { return response{pErr: roachpb.NewError(err)} } truncBA, numActive, err := truncate(ba, intersected) if numActive == 0 && err == nil { // This shouldn't happen in the wild, but some tests exercise it. return response{ pErr: roachpb.NewErrorf("truncation resulted in empty batch on %s: %s", intersected, ba), } } if err != nil { return response{pErr: roachpb.NewError(err)} } // Start a retry loop for sending the batch to the range. for r := retry.StartWithCtx(ctx, ds.rpcRetryOptions); r.Next(); { // If we've cleared the descriptor on a send failure, re-lookup. if desc == nil { var descKey roachpb.RKey if isReverse { descKey = intersected.EndKey } else { descKey = intersected.Key } desc, evictToken, err = ds.getDescriptor(ctx, descKey, nil, isReverse) if err != nil { log.ErrEventf(ctx, "range descriptor re-lookup failed: %s", err) continue } } reply, pErr = ds.sendSingleRange(ctx, truncBA, desc) // If sending succeeded, return immediately. if pErr == nil { return response{reply: reply} } log.ErrEventf(ctx, "reply error %s: %s", ba, pErr) // Error handling: If the error indicates that our range // descriptor is out of date, evict it from the cache and try // again. Errors that apply only to a single replica were // handled in send(). // // TODO(bdarnell): Don't retry endlessly. If we fail twice in a // row and the range descriptor hasn't changed, return the error // to our caller. switch tErr := pErr.GetDetail().(type) { case *roachpb.SendError: // We've tried all the replicas without success. Either // they're all down, or we're using an out-of-date range // descriptor. Invalidate the cache and try again with the new // metadata. log.Event(ctx, "evicting range descriptor on send error and backoff for re-lookup") if err := evictToken.Evict(ctx); err != nil { return response{pErr: roachpb.NewError(err)} } // Clear the descriptor to reload on the next attempt. desc = nil continue case *roachpb.RangeKeyMismatchError: // Range descriptor might be out of date - evict it. This is // likely the result of a range split. If we have new range // descriptors, insert them instead as long as they are different // from the last descriptor to avoid endless loops. var replacements []roachpb.RangeDescriptor different := func(rd *roachpb.RangeDescriptor) bool { return !desc.RSpan().Equal(rd.RSpan()) } if tErr.MismatchedRange != nil && different(tErr.MismatchedRange) { replacements = append(replacements, *tErr.MismatchedRange) } if tErr.SuggestedRange != nil && different(tErr.SuggestedRange) { if includesFrontOfCurSpan(isReverse, tErr.SuggestedRange, rs) { replacements = append(replacements, *tErr.SuggestedRange) } } // Same as Evict() if replacements is empty. if err := evictToken.EvictAndReplace(ctx, replacements...); err != nil { return response{pErr: roachpb.NewError(err)} } // On addressing errors (likely a split), we need to re-invoke // the range descriptor lookup machinery, so we recurse by // sending batch to just the partial span this descriptor was // supposed to cover. log.VEventf(ctx, 1, "likely split; resending batch to span: %s", tErr) reply, pErr = ds.divideAndSendBatchToRanges(ctx, ba, intersected, isFirst) return response{reply: reply, pErr: pErr} } break } // Propagate error if either the retry closer or context done // channels were closed. if pErr == nil { if pErr = ds.deduceRetryEarlyExitError(ctx); pErr == nil { log.Fatal(ctx, "exited retry loop without an error") } } return response{pErr: pErr} }
// processWriteIntentError tries to push the conflicting // transaction(s) responsible for the given WriteIntentError, and to // resolve those intents if possible. Returns a new error to be used // in place of the original. // // The returned error may be a copy of the original WriteIntentError, // with or without the Resolved flag set, which governs the client's // retry behavior (if the transaction is pushed, the Resolved flag is // set to tell the client to retry immediately; otherwise it is false // to cause the client to back off). func (ir *intentResolver) processWriteIntentError( ctx context.Context, wiPErr *roachpb.Error, args roachpb.Request, h roachpb.Header, pushType roachpb.PushTxnType, ) *roachpb.Error { wiErr, ok := wiPErr.GetDetail().(*roachpb.WriteIntentError) if !ok { return roachpb.NewErrorf("not a WriteIntentError: %v", wiPErr) } if log.V(6) { log.Infof(ctx, "resolving write intent %s", wiErr) } method := args.Method() readOnly := roachpb.IsReadOnly(args) // TODO(tschottdorf): pass as param resolveIntents, pushErr := ir.maybePushTransactions(ctx, wiErr.Intents, h, pushType, false) if resErr := ir.resolveIntents(ctx, resolveIntents, false /* !wait */, pushType == roachpb.PUSH_ABORT /* poison */); resErr != nil { // When resolving without waiting, errors should not // usually be returned here, although there are some cases // when they may be (especially when a test cluster is in // the process of shutting down). log.Warningf(ctx, "asynchronous resolveIntents failed: %s", resErr) } if pushErr != nil { if log.V(1) { log.Infof(ctx, "on %s: %s", method, pushErr) } if _, isExpected := pushErr.GetDetail().(*roachpb.TransactionPushError); !isExpected { // If an unexpected error occurred, make sure it bubbles up to the // client. Examples are timeouts and logic errors. return pushErr } // For write/write conflicts within a transaction, propagate the // push failure, not the original write intent error. The push // failure will instruct the client to restart the transaction // with a backoff. if h.Txn != nil && h.Txn.ID != nil && !readOnly { return pushErr } // For read/write conflicts, and non-transactional write/write // conflicts, return the write intent error which engages // backoff/retry (with !Resolved). We don't need to restart the // txn, only resend the read with a backoff. return wiPErr } // We pushed all transactions, so tell the client everything's // resolved and it can retry immediately. wiErr.Resolved = true return wiPErr // references wiErr }
func (n *Node) batchInternal( ctx context.Context, args *roachpb.BatchRequest, ) (*roachpb.BatchResponse, error) { // TODO(marc): grpc's authentication model (which gives credential access in // the request handler) doesn't really fit with the current design of the // security package (which assumes that TLS state is only given at connection // time) - that should be fixed. if peer, ok := peer.FromContext(ctx); ok { if tlsInfo, ok := peer.AuthInfo.(credentials.TLSInfo); ok { certUser, err := security.GetCertificateUser(&tlsInfo.State) if err != nil { return nil, err } if certUser != security.NodeUser { return nil, errors.Errorf("user %s is not allowed", certUser) } } } var br *roachpb.BatchResponse type snowballInfo struct { syncutil.Mutex collectedSpans [][]byte done bool } var snowball *snowballInfo if err := n.stopper.RunTaskWithErr(func() error { const opName = "node.Batch" sp, err := tracing.JoinOrNew(n.storeCfg.AmbientCtx.Tracer, args.TraceContext, opName) if err != nil { return err } // If this is a snowball span, it gets special treatment: It skips the // regular tracing machinery, and we instead send the collected spans // back with the response. This is more expensive, but then again, // those are individual requests traced by users, so they can be. if sp.BaggageItem(tracing.Snowball) != "" { sp.LogEvent("delegating to snowball tracing") sp.Finish() snowball = new(snowballInfo) recorder := func(rawSpan basictracer.RawSpan) { snowball.Lock() defer snowball.Unlock() if snowball.done { // This is a late span that we must discard because the request was // already completed. return } encSp, err := tracing.EncodeRawSpan(&rawSpan, nil) if err != nil { log.Warning(ctx, err) } snowball.collectedSpans = append(snowball.collectedSpans, encSp) } if sp, err = tracing.JoinOrNewSnowball(opName, args.TraceContext, recorder); err != nil { return err } } defer sp.Finish() traceCtx := opentracing.ContextWithSpan(ctx, sp) log.Event(traceCtx, args.Summary()) tStart := timeutil.Now() var pErr *roachpb.Error br, pErr = n.stores.Send(traceCtx, *args) if pErr != nil { br = &roachpb.BatchResponse{} log.ErrEventf(traceCtx, "%T", pErr.GetDetail()) } if br.Error != nil { panic(roachpb.ErrorUnexpectedlySet(n.stores, br)) } n.metrics.callComplete(timeutil.Since(tStart), pErr) br.Error = pErr return nil }); err != nil { return nil, err } if snowball != nil { snowball.Lock() br.CollectedSpans = snowball.collectedSpans snowball.done = true snowball.Unlock() } return br, nil }
// requestLeaseAsync sends a transfer lease or lease request to the // specified replica. The request is sent in an async task. func (p *pendingLeaseRequest) requestLeaseAsync( repl *Replica, nextLeaseHolder roachpb.ReplicaDescriptor, reqLease roachpb.Lease, status LeaseStatus, leaseReq roachpb.Request, ) error { return repl.store.Stopper().RunAsyncTask(context.TODO(), func(ctx context.Context) { ctx = repl.AnnotateCtx(ctx) var pErr *roachpb.Error // If requesting an epoch-based lease & current state is expired, // potentially heartbeat our own liveness or increment epoch of // prior owner. Note we only do this if the previous lease was // epoch-based. if reqLease.Type() == roachpb.LeaseEpoch && status.state == leaseExpired && status.lease.Type() == roachpb.LeaseEpoch { var err error // If this replica is previous & next lease holder, manually heartbeat to become live. if status.lease.OwnedBy(nextLeaseHolder.StoreID) && repl.store.StoreID() == nextLeaseHolder.StoreID { if err = repl.store.cfg.NodeLiveness.Heartbeat(ctx, status.liveness); err != nil { log.Error(ctx, err) } } else if status.liveness.Epoch == *status.lease.Epoch { // If not owner, increment epoch if necessary to invalidate lease. if err = repl.store.cfg.NodeLiveness.IncrementEpoch(ctx, status.liveness); err != nil { log.Error(ctx, err) } } // Set error for propagation to all waiters below. if err != nil { pErr = roachpb.NewError(newNotLeaseHolderError(status.lease, repl.store.StoreID(), repl.Desc())) } } // Propose a RequestLease command and wait for it to apply. if pErr == nil { ba := roachpb.BatchRequest{} ba.Timestamp = repl.store.Clock().Now() ba.RangeID = repl.RangeID ba.Add(leaseReq) _, pErr = repl.Send(ctx, ba) } // We reset our state below regardless of whether we've gotten an error or // not, but note that an error is ambiguous - there's no guarantee that the // transfer will not still apply. That's OK, however, as the "in transfer" // state maintained by the pendingLeaseRequest is not relied on for // correctness (see repl.mu.minLeaseProposedTS), and resetting the state // is beneficial as it'll allow the replica to attempt to transfer again or // extend the existing lease in the future. // Send result of lease to all waiter channels. repl.mu.Lock() defer repl.mu.Unlock() for _, llChan := range p.llChans { // Don't send the same transaction object twice; this can lead to races. if pErr != nil { pErrClone := *pErr pErrClone.SetTxn(pErr.GetTxn()) llChan <- &pErrClone } else { llChan <- nil } } p.llChans = p.llChans[:0] p.nextLease = roachpb.Lease{} }) }
// updateState updates the transaction state in both the success and // error cases, applying those updates to the corresponding txnMeta // object when adequate. It also updates certain errors with the // updated transaction for use by client restarts. func (tc *TxnCoordSender) updateState( ctx context.Context, startNS int64, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error, ) *roachpb.Error { tc.Lock() defer tc.Unlock() if ba.Txn == nil { // Not a transactional request. return pErr } var newTxn roachpb.Transaction newTxn.Update(ba.Txn) if pErr == nil { newTxn.Update(br.Txn) } else if errTxn := pErr.GetTxn(); errTxn != nil { newTxn.Update(errTxn) } switch t := pErr.GetDetail().(type) { case *roachpb.OpRequiresTxnError: panic("OpRequiresTxnError must not happen at this level") case *roachpb.ReadWithinUncertaintyIntervalError: // If the reader encountered a newer write within the uncertainty // interval, we advance the txn's timestamp just past the last observed // timestamp from the node. restartTS, ok := newTxn.GetObservedTimestamp(pErr.OriginNode) if !ok { pErr = roachpb.NewError(errors.Errorf("no observed timestamp for node %d found on uncertainty restart", pErr.OriginNode)) } else { newTxn.Timestamp.Forward(restartTS) newTxn.Restart(ba.UserPriority, newTxn.Priority, newTxn.Timestamp) } case *roachpb.TransactionAbortedError: // Increase timestamp if applicable. newTxn.Timestamp.Forward(pErr.GetTxn().Timestamp) newTxn.Priority = pErr.GetTxn().Priority // Clean up the freshly aborted transaction in defer(), avoiding a // race with the state update below. defer tc.cleanupTxnLocked(ctx, newTxn) case *roachpb.TransactionPushError: // Increase timestamp if applicable, ensuring that we're // just ahead of the pushee. newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp) newTxn.Restart(ba.UserPriority, t.PusheeTxn.Priority-1, newTxn.Timestamp) case *roachpb.TransactionRetryError: // Increase timestamp so on restart, we're ahead of any timestamp // cache entries or newer versions which caused the restart. newTxn.Restart(ba.UserPriority, pErr.GetTxn().Priority, newTxn.Timestamp) case *roachpb.WriteTooOldError: newTxn.Restart(ba.UserPriority, newTxn.Priority, t.ActualTimestamp) case nil: // Nothing to do here, avoid the default case. default: // Do not clean up the transaction since we're leaving cancellation of // the transaction up to the client. For example, on seeing an error, // like TransactionStatusError or ConditionFailedError, the client // will call Txn.CleanupOnError() which will cleanup the transaction // and its intents. Therefore leave the transaction in the PENDING // state and do not call cleanTxnLocked(). } txnID := *newTxn.ID txnMeta := tc.txns[txnID] // For successful transactional requests, keep the written intents and // the updated transaction record to be sent along with the reply. // The transaction metadata is created with the first writing operation. // A tricky edge case is that of a transaction which "fails" on the // first writing request, but actually manages to write some intents // (for example, due to being multi-range). In this case, there will // be an error, but the transaction will be marked as Writing and the // coordinator must track the state, for the client's retry will be // performed with a Writing transaction which the coordinator rejects // unless it is tracking it (on top of it making sense to track it; // after all, it **has** laid down intents and only the coordinator // can augment a potential EndTransaction call). See #3303. if txnMeta != nil || pErr == nil || newTxn.Writing { // Adding the intents even on error reduces the likelihood of dangling // intents blocking concurrent writers for extended periods of time. // See #3346. var keys []roachpb.Span if txnMeta != nil { keys = txnMeta.keys } ba.IntentSpanIterate(br, func(key, endKey roachpb.Key) { keys = append(keys, roachpb.Span{ Key: key, EndKey: endKey, }) }) if txnMeta != nil { txnMeta.keys = keys } else if len(keys) > 0 { if !newTxn.Writing { panic("txn with intents marked as non-writing") } // If the transaction is already over, there's no point in // launching a one-off coordinator which will shut down right // away. If we ended up here with an error, we'll always start // the coordinator - the transaction has laid down intents, so // we expect it to be committed/aborted at some point in the // future. if _, isEnding := ba.GetArg(roachpb.EndTransaction); pErr != nil || !isEnding { log.Event(ctx, "coordinator spawns") txnMeta = &txnMetadata{ txn: newTxn, keys: keys, firstUpdateNanos: startNS, lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } tc.txns[txnID] = txnMeta if err := tc.stopper.RunAsyncTask(ctx, func(ctx context.Context) { tc.heartbeatLoop(ctx, txnID) }); err != nil { // The system is already draining and we can't start the // heartbeat. We refuse new transactions for now because // they're likely not going to have all intents committed. // In principle, we can relax this as needed though. tc.unregisterTxnLocked(txnID) return roachpb.NewError(err) } } else { // If this was a successful one phase commit, update stats // directly as they won't otherwise be updated on heartbeat // loop shutdown. etArgs, ok := br.Responses[len(br.Responses)-1].GetInner().(*roachpb.EndTransactionResponse) tc.updateStats(tc.clock.PhysicalNow()-startNS, 0, newTxn.Status, ok && etArgs.OnePhaseCommit) } } } // Update our record of this transaction, even on error. if txnMeta != nil { txnMeta.txn.Update(&newTxn) if !txnMeta.txn.Writing { panic("tracking a non-writing txn") } txnMeta.setLastUpdate(tc.clock.PhysicalNow()) } if pErr == nil { // For successful transactional requests, always send the updated txn // record back. Note that we make sure not to share data with newTxn // (which may have made it into txnMeta). if br.Txn != nil { br.Txn.Update(&newTxn) } else { clonedTxn := newTxn.Clone() br.Txn = &clonedTxn } } else if pErr.GetTxn() != nil { // Avoid changing existing errors because sometimes they escape into // goroutines and data races can occur. pErrShallow := *pErr pErrShallow.SetTxn(&newTxn) // SetTxn clones newTxn pErr = &pErrShallow } return pErr }
// Send implements the batch.Sender interface. If the request is part of a // transaction, the TxnCoordSender adds the transaction to a map of active // transactions and begins heartbeating it. Every subsequent request for the // same transaction updates the lastUpdate timestamp to prevent live // transactions from being considered abandoned and garbage collected. // Read/write mutating requests have their key or key range added to the // transaction's interval tree of key ranges for eventual cleanup via resolved // write intents; they're tagged to an outgoing EndTransaction request, with // the receiving replica in charge of resolving them. func (tc *TxnCoordSender) Send( ctx context.Context, ba roachpb.BatchRequest, ) (*roachpb.BatchResponse, *roachpb.Error) { // Start new or pick up active trace. From here on, there's always an active // Trace, though its overhead is small unless it's sampled. sp := opentracing.SpanFromContext(ctx) var tracer opentracing.Tracer if sp == nil { tracer = tc.AmbientContext.Tracer sp = tracer.StartSpan(opTxnCoordSender) defer sp.Finish() ctx = opentracing.ContextWithSpan(ctx, sp) } else { tracer = sp.Tracer() } startNS := tc.clock.PhysicalNow() if ba.Txn != nil { // If this request is part of a transaction... if err := tc.maybeBeginTxn(&ba); err != nil { return nil, roachpb.NewError(err) } txnID := *ba.Txn.ID // Associate the txnID with the trace. We need to do this after the // maybeBeginTxn call. We set both a baggage item and a tag because only // tags show up in the LIghtstep UI. txnIDStr := txnID.String() sp.SetTag("txnID", txnIDStr) sp.SetBaggageItem("txnID", txnIDStr) var et *roachpb.EndTransactionRequest var hasET bool { var rArgs roachpb.Request rArgs, hasET = ba.GetArg(roachpb.EndTransaction) if hasET { et = rArgs.(*roachpb.EndTransactionRequest) if len(et.Key) != 0 { return nil, roachpb.NewErrorf("EndTransaction must not have a Key set") } et.Key = ba.Txn.Key if len(et.IntentSpans) > 0 { // TODO(tschottdorf): it may be useful to allow this later. // That would be part of a possible plan to allow txns which // write on multiple coordinators. return nil, roachpb.NewErrorf("client must not pass intents to EndTransaction") } } } if pErr := func() *roachpb.Error { tc.Lock() defer tc.Unlock() if pErr := tc.maybeRejectClientLocked(ctx, *ba.Txn); pErr != nil { return pErr } if !hasET { return nil } // Everything below is carried out only when trying to commit. // Populate et.IntentSpans, taking into account both any existing // and new writes, and taking care to perform proper deduplication. txnMeta := tc.txns[txnID] distinctSpans := true if txnMeta != nil { et.IntentSpans = txnMeta.keys // Defensively set distinctSpans to false if we had any previous // requests in this transaction. This effectively limits the distinct // spans optimization to 1pc transactions. distinctSpans = len(txnMeta.keys) == 0 } // We can't pass in a batch response here to better limit the key // spans as we don't know what is going to be affected. This will // affect queries such as `DELETE FROM my.table LIMIT 10` when // executed as a 1PC transaction. e.g.: a (BeginTransaction, // DeleteRange, EndTransaction) batch. ba.IntentSpanIterate(nil, func(key, endKey roachpb.Key) { et.IntentSpans = append(et.IntentSpans, roachpb.Span{ Key: key, EndKey: endKey, }) }) // TODO(peter): Populate DistinctSpans on all batches, not just batches // which contain an EndTransactionRequest. var distinct bool // The request might already be used by an outgoing goroutine, so // we can't safely mutate anything in-place (as MergeSpans does). et.IntentSpans = append([]roachpb.Span(nil), et.IntentSpans...) et.IntentSpans, distinct = roachpb.MergeSpans(et.IntentSpans) ba.Header.DistinctSpans = distinct && distinctSpans if len(et.IntentSpans) == 0 { // If there aren't any intents, then there's factually no // transaction to end. Read-only txns have all of their state // in the client. return roachpb.NewErrorf("cannot commit a read-only transaction") } if txnMeta != nil { txnMeta.keys = et.IntentSpans } return nil }(); pErr != nil { return nil, pErr } if hasET && log.V(1) { for _, intent := range et.IntentSpans { log.Eventf(ctx, "intent: [%s,%s)", intent.Key, intent.EndKey) } } } // Embed the trace metadata into the header for use by RPC recipients. We need // to do this after the maybeBeginTxn call above. // TODO(tschottdorf): To get rid of the spurious alloc below we need to // implement the carrier interface on ba.Header or make Span non-nullable, // both of which force all of ba on the Heap. It's already there, so may // not be a big deal, but ba should live on the stack. Also not easy to use // a buffer pool here since anything that goes into the RPC layer could be // used by goroutines we didn't wait for. if ba.TraceContext == nil { ba.TraceContext = &tracing.SpanContextCarrier{} } else { // We didn't make this object but are about to mutate it, so we // have to take a copy - the original might already have been // passed to the RPC layer. ba.TraceContext = protoutil.Clone(ba.TraceContext).(*tracing.SpanContextCarrier) } if err := tracer.Inject(sp.Context(), basictracer.Delegator, ba.TraceContext); err != nil { return nil, roachpb.NewError(err) } // Send the command through wrapped sender, taking appropriate measures // on error. var br *roachpb.BatchResponse { var pErr *roachpb.Error br, pErr = tc.wrapped.Send(ctx, ba) if _, ok := pErr.GetDetail().(*roachpb.OpRequiresTxnError); ok { // TODO(tschottdorf): needs to keep the trace. br, pErr = tc.resendWithTxn(ba) } if pErr = tc.updateState(ctx, startNS, ba, br, pErr); pErr != nil { log.Eventf(ctx, "error: %s", pErr) return nil, pErr } } if br.Txn == nil { return br, nil } if _, ok := ba.GetArg(roachpb.EndTransaction); !ok { return br, nil } // If the --linearizable flag is set, we want to make sure that // all the clocks in the system are past the commit timestamp // of the transaction. This is guaranteed if either // - the commit timestamp is MaxOffset behind startNS // - MaxOffset ns were spent in this function // when returning to the client. Below we choose the option // that involves less waiting, which is likely the first one // unless a transaction commits with an odd timestamp. if tsNS := br.Txn.Timestamp.WallTime; startNS > tsNS { startNS = tsNS } sleepNS := tc.clock.MaxOffset() - time.Duration(tc.clock.PhysicalNow()-startNS) if tc.linearizable && sleepNS > 0 { defer func() { if log.V(1) { log.Infof(ctx, "%v: waiting %s on EndTransaction for linearizability", br.Txn.Short(), util.TruncateDuration(sleepNS, time.Millisecond)) } time.Sleep(sleepNS) }() } if br.Txn.Status != roachpb.PENDING { tc.Lock() tc.cleanupTxnLocked(ctx, *br.Txn) tc.Unlock() } return br, nil }