// Send implements the batch.Sender interface. If the request is part of a // transaction, the TxnCoordSender adds the transaction to a map of active // transactions and begins heartbeating it. Every subsequent request for the // same transaction updates the lastUpdate timestamp to prevent live // transactions from being considered abandoned and garbage collected. // Read/write mutating requests have their key or key range added to the // transaction's interval tree of key ranges for eventual cleanup via resolved // write intents; they're tagged to an outgoing EndTransaction request, with // the receiving replica in charge of resolving them. func (tc *TxnCoordSender) Send( ctx context.Context, ba roachpb.BatchRequest, ) (*roachpb.BatchResponse, *roachpb.Error) { // Start new or pick up active trace. From here on, there's always an active // Trace, though its overhead is small unless it's sampled. sp := opentracing.SpanFromContext(ctx) var tracer opentracing.Tracer if sp == nil { tracer = tc.AmbientContext.Tracer sp = tracer.StartSpan(opTxnCoordSender) defer sp.Finish() ctx = opentracing.ContextWithSpan(ctx, sp) } else { tracer = sp.Tracer() } startNS := tc.clock.PhysicalNow() if ba.Txn != nil { // If this request is part of a transaction... if err := tc.maybeBeginTxn(&ba); err != nil { return nil, roachpb.NewError(err) } txnID := *ba.Txn.ID // Associate the txnID with the trace. We need to do this after the // maybeBeginTxn call. We set both a baggage item and a tag because only // tags show up in the LIghtstep UI. txnIDStr := txnID.String() sp.SetTag("txnID", txnIDStr) sp.SetBaggageItem("txnID", txnIDStr) var et *roachpb.EndTransactionRequest var hasET bool { var rArgs roachpb.Request rArgs, hasET = ba.GetArg(roachpb.EndTransaction) if hasET { et = rArgs.(*roachpb.EndTransactionRequest) if len(et.Key) != 0 { return nil, roachpb.NewErrorf("EndTransaction must not have a Key set") } et.Key = ba.Txn.Key if len(et.IntentSpans) > 0 { // TODO(tschottdorf): it may be useful to allow this later. // That would be part of a possible plan to allow txns which // write on multiple coordinators. return nil, roachpb.NewErrorf("client must not pass intents to EndTransaction") } } } if pErr := func() *roachpb.Error { tc.Lock() defer tc.Unlock() if pErr := tc.maybeRejectClientLocked(ctx, *ba.Txn); pErr != nil { return pErr } if !hasET { return nil } // Everything below is carried out only when trying to commit. // Populate et.IntentSpans, taking into account both any existing // and new writes, and taking care to perform proper deduplication. txnMeta := tc.txns[txnID] distinctSpans := true if txnMeta != nil { et.IntentSpans = txnMeta.keys // Defensively set distinctSpans to false if we had any previous // requests in this transaction. This effectively limits the distinct // spans optimization to 1pc transactions. distinctSpans = len(txnMeta.keys) == 0 } // We can't pass in a batch response here to better limit the key // spans as we don't know what is going to be affected. This will // affect queries such as `DELETE FROM my.table LIMIT 10` when // executed as a 1PC transaction. e.g.: a (BeginTransaction, // DeleteRange, EndTransaction) batch. ba.IntentSpanIterate(nil, func(key, endKey roachpb.Key) { et.IntentSpans = append(et.IntentSpans, roachpb.Span{ Key: key, EndKey: endKey, }) }) // TODO(peter): Populate DistinctSpans on all batches, not just batches // which contain an EndTransactionRequest. var distinct bool // The request might already be used by an outgoing goroutine, so // we can't safely mutate anything in-place (as MergeSpans does). et.IntentSpans = append([]roachpb.Span(nil), et.IntentSpans...) et.IntentSpans, distinct = roachpb.MergeSpans(et.IntentSpans) ba.Header.DistinctSpans = distinct && distinctSpans if len(et.IntentSpans) == 0 { // If there aren't any intents, then there's factually no // transaction to end. Read-only txns have all of their state // in the client. return roachpb.NewErrorf("cannot commit a read-only transaction") } if txnMeta != nil { txnMeta.keys = et.IntentSpans } return nil }(); pErr != nil { return nil, pErr } if hasET && log.V(1) { for _, intent := range et.IntentSpans { log.Eventf(ctx, "intent: [%s,%s)", intent.Key, intent.EndKey) } } } // Embed the trace metadata into the header for use by RPC recipients. We need // to do this after the maybeBeginTxn call above. // TODO(tschottdorf): To get rid of the spurious alloc below we need to // implement the carrier interface on ba.Header or make Span non-nullable, // both of which force all of ba on the Heap. It's already there, so may // not be a big deal, but ba should live on the stack. Also not easy to use // a buffer pool here since anything that goes into the RPC layer could be // used by goroutines we didn't wait for. if ba.TraceContext == nil { ba.TraceContext = &tracing.SpanContextCarrier{} } else { // We didn't make this object but are about to mutate it, so we // have to take a copy - the original might already have been // passed to the RPC layer. ba.TraceContext = protoutil.Clone(ba.TraceContext).(*tracing.SpanContextCarrier) } if err := tracer.Inject(sp.Context(), basictracer.Delegator, ba.TraceContext); err != nil { return nil, roachpb.NewError(err) } // Send the command through wrapped sender, taking appropriate measures // on error. var br *roachpb.BatchResponse { var pErr *roachpb.Error br, pErr = tc.wrapped.Send(ctx, ba) if _, ok := pErr.GetDetail().(*roachpb.OpRequiresTxnError); ok { // TODO(tschottdorf): needs to keep the trace. br, pErr = tc.resendWithTxn(ba) } if pErr = tc.updateState(ctx, startNS, ba, br, pErr); pErr != nil { log.Eventf(ctx, "error: %s", pErr) return nil, pErr } } if br.Txn == nil { return br, nil } if _, ok := ba.GetArg(roachpb.EndTransaction); !ok { return br, nil } // If the --linearizable flag is set, we want to make sure that // all the clocks in the system are past the commit timestamp // of the transaction. This is guaranteed if either // - the commit timestamp is MaxOffset behind startNS // - MaxOffset ns were spent in this function // when returning to the client. Below we choose the option // that involves less waiting, which is likely the first one // unless a transaction commits with an odd timestamp. if tsNS := br.Txn.Timestamp.WallTime; startNS > tsNS { startNS = tsNS } sleepNS := tc.clock.MaxOffset() - time.Duration(tc.clock.PhysicalNow()-startNS) if tc.linearizable && sleepNS > 0 { defer func() { if log.V(1) { log.Infof(ctx, "%v: waiting %s on EndTransaction for linearizability", br.Txn.Short(), util.TruncateDuration(sleepNS, time.Millisecond)) } time.Sleep(sleepNS) }() } if br.Txn.Status != roachpb.PENDING { tc.Lock() tc.cleanupTxnLocked(ctx, *br.Txn) tc.Unlock() } return br, nil }
// printStatsLoop blocks and periodically logs transaction statistics // (throughput, success rates, durations, ...). Note that this only captures // write txns, since read-only txns are stateless as far as TxnCoordSender is // concerned. stats). func (tc *TxnCoordSender) printStatsLoop(ctx context.Context) { res := time.Millisecond // for duration logging resolution var statusLogTimer timeutil.Timer defer statusLogTimer.Stop() scale := metric.Scale1M for { statusLogTimer.Reset(statusLogInterval) select { case <-statusLogTimer.C: statusLogTimer.Read = true // Take a snapshot of metrics. There's some chance of skew, since the snapshots are // not done atomically, but that should be fine for these debug stats. metrics := tc.metrics durations, durationsWindow := metrics.Durations.Windowed() restarts, restartsWindow := metrics.Restarts.Windowed() if restartsWindow != durationsWindow { log.Fatalf(ctx, "misconfigured windowed histograms: %s != %s", restartsWindow, durationsWindow, ) } commitRate := metrics.Commits.Rates[scale].Value() commit1PCRate := metrics.Commits1PC.Rates[scale].Value() abortRate := metrics.Aborts.Rates[scale].Value() abandonRate := metrics.Abandons.Rates[scale].Value() // Show transaction stats over the last minute. Maybe this should // be shorter in the future. We'll revisit if we get sufficient // feedback. totalRate := commitRate + abortRate + abandonRate var pCommitted, pCommitted1PC, pAbandoned, pAborted float64 if totalRate > 0 { pCommitted = 100 * (commitRate / totalRate) pCommitted1PC = 100 * (commit1PCRate / totalRate) pAborted = 100 * (abortRate / totalRate) pAbandoned = 100 * (abandonRate / totalRate) } dMean := durations.Mean() dDev := durations.StdDev() dMax := durations.Max() rMean := restarts.Mean() rDev := restarts.StdDev() rMax := restarts.Max() num := durations.TotalCount() // We could skip calculating everything if !log.V(1) but we want to make // sure the code above doesn't silently break. if log.V(1) { log.Infof(ctx, "txn coordinator: %.2f txn/sec, %.2f/%.2f/%.2f/%.2f %%cmmt/cmmt1pc/abrt/abnd, "+ "%s/%s/%s avg/σ/max duration, %.1f/%.1f/%d avg/σ/max restarts (%d samples over %s)", totalRate, pCommitted, pCommitted1PC, pAborted, pAbandoned, util.TruncateDuration(time.Duration(dMean), res), util.TruncateDuration(time.Duration(dDev), res), util.TruncateDuration(time.Duration(dMax), res), rMean, rDev, rMax, num, restartsWindow, ) } case <-tc.stopper.ShouldStop(): return } } }