// withEventLogInternal embeds a trace.EventLog in the context, causing future // logging and event calls to go to the EventLog. The current context must not // have an existing open span. func withEventLogInternal(ctx context.Context, eventLog trace.EventLog) context.Context { if opentracing.SpanFromContext(ctx) != nil { panic("event log under span") } val := &ctxEventLog{eventLog: eventLog} return context.WithValue(ctx, ctxEventLogKey{}, val) }
// SpanFromContext wraps opentracing.SpanFromContext so that the returned // Span is never nil. func SpanFromContext(ctx context.Context) opentracing.Span { sp := opentracing.SpanFromContext(ctx) if sp == nil { return NoopSpan() } return sp }
// EnsureContext checks whether the given context.Context contains a Span. If // not, it creates one using the provided Tracer and wraps it in the returned // Span. The returned closure must be called after the request has been fully // processed. func EnsureContext(ctx context.Context, tracer opentracing.Tracer) (context.Context, func()) { _, _, funcName := caller.Lookup(1) if opentracing.SpanFromContext(ctx) == nil { sp := tracer.StartSpan(funcName) return opentracing.ContextWithSpan(ctx, sp), sp.Finish } return ctx, func() {} }
// ChildSpan opens a span as a child of the current span in the context (if // there is one). // // Returns the new context and the new span (if any). The span should be // closed via FinishSpan. func ChildSpan(ctx context.Context, opName string) (context.Context, opentracing.Span) { span := opentracing.SpanFromContext(ctx) if span == nil { return ctx, nil } newSpan := span.Tracer().StartSpan(opName, opentracing.ChildOf(span.Context())) return opentracing.ContextWithSpan(ctx, newSpan), newSpan }
// SpanFromContext returns the Span obtained from the context or, if none is // found, a new one started through the tracer. Callers should call (or defer) // the returned cleanup func as well to ensure that the span is Finish()ed, but // callers should *not* attempt to call Finish directly -- in the case where the // span was obtained from the context, it is not the caller's to Finish. func SpanFromContext(opName string, tracer opentracing.Tracer, ctx context.Context) (opentracing.Span, func()) { sp := opentracing.SpanFromContext(ctx) if sp == nil { sp = tracer.StartSpan(opName) return sp, sp.Finish } return sp, func() {} }
// SetFlowRequestTrace populates req.Trace with the context of the current Span // in the context (if any). func SetFlowRequestTrace(ctx context.Context, req *SetupFlowRequest) error { sp := opentracing.SpanFromContext(ctx) if sp == nil { return nil } req.TraceContext = &tracing.SpanContextCarrier{} tracer := sp.Tracer() return tracer.Inject(sp.Context(), basictracer.Delegator, req.TraceContext) }
// AnnotateCtx annotates a given context with the information in AmbientContext: // - the EventLog is embedded in the context if the context doesn't already // have en event log or an open trace. // - the log tags in AmbientContext are added (if ctx doesn't already have them). // // For background operations, context.Background() should be passed; however, in // that case it is strongly recommended to open a span if possible (using // AnnotateCtxWithSpan). func (ac *AmbientContext) AnnotateCtx(ctx context.Context) context.Context { // TODO(radu): We could keep a cached context based off of // context.Background() to avoid allocations in that case. if ac.eventLog != nil && opentracing.SpanFromContext(ctx) == nil && eventLogFromCtx(ctx) == nil { ctx = embedCtxEventLog(ctx, ac.eventLog) } if ac.tags != nil { ctx = copyTagChain(ctx, ac.tags) } return ctx }
// getSpanOrEventLog returns the current Span. If there is no Span, it returns // the current ctxEventLog. If neither (or the Span is NoopTracer), returns // false. func getSpanOrEventLog(ctx context.Context) (opentracing.Span, *ctxEventLog, bool) { if sp := opentracing.SpanFromContext(ctx); sp != nil { if sp.Tracer() == noopTracer { return nil, nil, false } return sp, nil, true } if el := eventLogFromCtx(ctx); el != nil { return nil, el, true } return nil, nil, false }
func newFlow(flowCtx FlowCtx, flowReg *flowRegistry, simpleFlowConsumer RowReceiver) *Flow { if opentracing.SpanFromContext(flowCtx.Context) == nil { panic("flow context has no span") } flowCtx.Context = log.WithLogTagStr(flowCtx.Context, "f", flowCtx.id.Short()) return &Flow{ FlowCtx: flowCtx, flowRegistry: flowReg, simpleFlowConsumer: simpleFlowConsumer, status: FlowNotStarted, } }
// sendSingleRange gathers and rearranges the replicas, and makes an RPC call. func (ds *DistSender) sendSingleRange( ctx context.Context, ba roachpb.BatchRequest, desc *roachpb.RangeDescriptor, ) (*roachpb.BatchResponse, *roachpb.Error) { // Hack: avoid formatting the message passed to Span.LogEvent for // opentracing.noopSpans. We can't actually tell if we have a noopSpan, but // we can see if the span as a NoopTracer. Note that this particular // invocation is expensive because we're pretty-printing keys. // // TODO(tschottdorf): This hack can go away when something like // Span.LogEventf is added. sp := opentracing.SpanFromContext(ctx) if sp != nil && sp.Tracer() != (opentracing.NoopTracer{}) { sp.LogEvent(fmt.Sprintf("sending RPC to [%s, %s)", desc.StartKey, desc.EndKey)) } // Try to send the call. replicas := newReplicaSlice(ds.gossip, desc) // Rearrange the replicas so that those replicas with long common // prefix of attributes end up first. If there's no prefix, this is a // no-op. order := ds.optimizeReplicaOrder(replicas) // If this request needs to go to a leader and we know who that is, move // it to the front. if !(ba.IsReadOnly() && ba.ReadConsistency == roachpb.INCONSISTENT) { if leader := ds.leaderCache.Lookup(roachpb.RangeID(desc.RangeID)); leader.StoreID > 0 { if i := replicas.FindReplica(leader.StoreID); i >= 0 { replicas.MoveToFront(i) order = orderStable } } } // TODO(tschottdorf): should serialize the trace here, not higher up. br, pErr := ds.sendRPC(ctx, desc.RangeID, replicas, order, ba) if pErr != nil { return nil, pErr } // If the reply contains a timestamp, update the local HLC with it. if br.Error != nil && br.Error.Now != roachpb.ZeroTimestamp { ds.clock.Update(br.Error.Now) } else if br.Now != roachpb.ZeroTimestamp { ds.clock.Update(br.Now) } // Untangle the error from the received response. pErr = br.Error br.Error = nil // scrub the response error return br, pErr }
// ForkCtxSpan checks if ctx has a Span open; if it does, it creates a new Span // that follows from the original Span. This allows the resulting context to be // used in an async task that might outlive the original operation. // // Returns the new context and the new span (if any). The span should be // closed via FinishSpan. func ForkCtxSpan(ctx context.Context, opName string) (context.Context, opentracing.Span) { if span := opentracing.SpanFromContext(ctx); span != nil { if span.BaggageItem(Snowball) == "1" { // If we are doing snowball tracing, the span might outlive the snowball // tracer (calling the record function when it is no longer legal to do // so). Return a context with no span in this case. return opentracing.ContextWithSpan(ctx, nil), nil } tr := span.Tracer() newSpan := tr.StartSpan(opName, opentracing.FollowsFrom(span.Context())) return opentracing.ContextWithSpan(ctx, newSpan), newSpan } return ctx, nil }
// Cleanup should be called when the flow completes (after all processors and // mailboxes exited). func (f *Flow) Cleanup() { if f.status == FlowFinished { panic("flow cleanup called twice") } if log.V(1) { log.Infof(f.Context, "cleaning up") } sp := opentracing.SpanFromContext(f.Context) sp.Finish() if f.status != FlowNotStarted { f.flowRegistry.UnregisterFlow(f.id) } f.status = FlowFinished }
// AnnotateCtxWithSpan annotates the given context with the information in // AmbientContext (see AnnotateCtx) and opens a span. // // If the given context has a span, the new span is a child of that span. // Otherwise, the Tracer in AmbientContext is used to create a new root span. // // The caller is responsible for closing the span (via Span.Finish). func (ac *AmbientContext) AnnotateCtxWithSpan( ctx context.Context, opName string, ) (context.Context, opentracing.Span) { if ac.tags != nil { ctx = copyTagChain(ctx, ac.tags) } var span opentracing.Span if parentSpan := opentracing.SpanFromContext(ctx); parentSpan != nil { tracer := parentSpan.Tracer() span = tracer.StartSpan(opName, opentracing.ChildOf(parentSpan.Context())) } else { if ac.Tracer == nil { panic("no tracer in AmbientContext for root span") } span = ac.Tracer.StartSpan(opName) } return opentracing.ContextWithSpan(ctx, span), span }
// NewSession creates and initializes a new Session object. // remote can be nil. func NewSession( ctx context.Context, args SessionArgs, e *Executor, remote net.Addr, memMetrics *MemoryMetrics, ) *Session { ctx = e.AnnotateCtx(ctx) s := &Session{ Database: args.Database, SearchPath: []string{"pg_catalog"}, User: args.User, Location: time.UTC, virtualSchemas: e.virtualSchemas, memMetrics: memMetrics, } cfg, cache := e.getSystemConfig() s.planner = planner{ leaseMgr: e.cfg.LeaseManager, systemConfig: cfg, databaseCache: cache, session: s, execCfg: &e.cfg, } s.PreparedStatements = makePreparedStatements(s) s.PreparedPortals = makePreparedPortals(s) if opentracing.SpanFromContext(ctx) == nil { remoteStr := "<admin>" if remote != nil { remoteStr = remote.String() } // Set up an EventLog for session events. ctx = log.WithEventLog(ctx, fmt.Sprintf("sql [%s]", args.User), remoteStr) s.finishEventLog = true } s.context, s.cancel = context.WithCancel(ctx) return s }
// Tracef looks for an opentracing.Trace in the context and formats and logs // the given message to it on success. func Tracef(ctx context.Context, format string, args ...interface{}) { sp := opentracing.SpanFromContext(ctx) if sp != nil && sp.Tracer() != noopTracer { sp.LogEvent(fmt.Sprintf(format, args...)) } }
// Send implements the batch.Sender interface. If the request is part of a // transaction, the TxnCoordSender adds the transaction to a map of active // transactions and begins heartbeating it. Every subsequent request for the // same transaction updates the lastUpdate timestamp to prevent live // transactions from being considered abandoned and garbage collected. // Read/write mutating requests have their key or key range added to the // transaction's interval tree of key ranges for eventual cleanup via resolved // write intents; they're tagged to an outgoing EndTransaction request, with // the receiving replica in charge of resolving them. func (tc *TxnCoordSender) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { { // Start new or pick up active trace and embed its trace metadata into // header for use by RPC recipients. From here on, there's always an active // Trace, though its overhead is small unless it's sampled. sp := opentracing.SpanFromContext(ctx) if sp == nil { sp = tc.tracer.StartSpan(opTxnCoordSender) defer sp.Finish() ctx = opentracing.ContextWithSpan(ctx, sp) } // TODO(tschottdorf): To get rid of the spurious alloc below we need to // implement the carrier interface on ba.Header or make Span non-nullable, // both of which force all of ba on the Heap. It's already there, so may // not be a big deal, but ba should live on the stack. Also not easy to use // a buffer pool here since anything that goes into the RPC layer could be // used by goroutines we didn't wait for. if ba.Header.Trace == nil { ba.Header.Trace = &tracing.Span{} } if err := tc.tracer.Inject(sp, basictracer.Delegator, ba.Trace); err != nil { return nil, roachpb.NewError(err) } } startNS := tc.clock.PhysicalNow() if ba.Txn != nil { // If this request is part of a transaction... if err := tc.maybeBeginTxn(&ba); err != nil { return nil, roachpb.NewError(err) } txnID := *ba.Txn.ID // Verify that if this Transaction is not read-only, we have it on file. // If not, refuse further operations - the transaction was aborted due // to a timeout or the client must have issued a write on another // coordinator previously. if ba.Txn.Writing { tc.Lock() _, ok := tc.txns[txnID] tc.Unlock() if !ok { pErr := roachpb.NewErrorf("writing transaction timed out, was aborted, " + "or ran on multiple coordinators") return nil, pErr } } if rArgs, ok := ba.GetArg(roachpb.EndTransaction); ok { et := rArgs.(*roachpb.EndTransactionRequest) if len(et.Key) != 0 { return nil, roachpb.NewErrorf("EndTransaction must not have a Key set") } et.Key = ba.Txn.Key if len(et.IntentSpans) > 0 { // TODO(tschottdorf): it may be useful to allow this later. // That would be part of a possible plan to allow txns which // write on multiple coordinators. return nil, roachpb.NewErrorf("client must not pass intents to EndTransaction") } tc.Lock() txnMeta, metaOK := tc.txns[txnID] { // Populate et.IntentSpans, taking into account both existing // writes (if any) and new writes in this batch, and taking // care to perform proper deduplication. var keys interval.RangeGroup if metaOK { keys = txnMeta.keys } else { keys = interval.NewRangeTree() } ba.IntentSpanIterate(func(key, endKey roachpb.Key) { addKeyRange(keys, key, endKey) }) et.IntentSpans = collectIntentSpans(keys) } tc.Unlock() if len(et.IntentSpans) > 0 { // All good, proceed. } else if !metaOK { // If we don't have the transaction, then this must be a retry // by the client. We can no longer reconstruct a correct // request so we must fail. // // TODO(bdarnell): if we had a GetTransactionStatus API then // we could lookup the transaction and return either nil or // TransactionAbortedError instead of this ambivalent error. return nil, roachpb.NewErrorf("transaction is already committed or aborted") } if len(et.IntentSpans) == 0 { // If there aren't any intents, then there's factually no // transaction to end. Read-only txns have all of their state in // the client. return nil, roachpb.NewErrorf("cannot commit a read-only transaction") } if log.V(1) { for _, intent := range et.IntentSpans { log.Trace(ctx, fmt.Sprintf("intent: [%s,%s)", intent.Key, intent.EndKey)) } } } } // Send the command through wrapped sender, taking appropriate measures // on error. var br *roachpb.BatchResponse { var pErr *roachpb.Error br, pErr = tc.wrapped.Send(ctx, ba) if _, ok := pErr.GetDetail().(*roachpb.OpRequiresTxnError); ok { // TODO(tschottdorf): needs to keep the trace. br, pErr = tc.resendWithTxn(ba) } if pErr = tc.updateState(startNS, ctx, ba, br, pErr); pErr != nil { log.Trace(ctx, fmt.Sprintf("error: %s", pErr)) return nil, pErr } } if br.Txn == nil { return br, nil } if _, ok := ba.GetArg(roachpb.EndTransaction); !ok { return br, nil } // If the --linearizable flag is set, we want to make sure that // all the clocks in the system are past the commit timestamp // of the transaction. This is guaranteed if either // - the commit timestamp is MaxOffset behind startNS // - MaxOffset ns were spent in this function // when returning to the client. Below we choose the option // that involves less waiting, which is likely the first one // unless a transaction commits with an odd timestamp. if tsNS := br.Txn.Timestamp.WallTime; startNS > tsNS { startNS = tsNS } sleepNS := tc.clock.MaxOffset() - time.Duration(tc.clock.PhysicalNow()-startNS) if tc.linearizable && sleepNS > 0 { defer func() { if log.V(1) { log.Infof("%v: waiting %s on EndTransaction for linearizability", br.Txn.ID.Short(), util.TruncateDuration(sleepNS, time.Millisecond)) } time.Sleep(sleepNS) }() } if br.Txn.Status != roachpb.PENDING { tc.cleanupTxn(ctx, *br.Txn) } return br, nil }
// withEventLogInternal embeds a trace.EventLog in the context, causing future // logging and event calls to go to the EventLog. The current context must not // have an existing open span. func withEventLogInternal(ctx context.Context, eventLog trace.EventLog) context.Context { if opentracing.SpanFromContext(ctx) != nil { panic("event log under span") } return embedCtxEventLog(ctx, &ctxEventLog{eventLog: eventLog}) }
// Send implements the batch.Sender interface. If the request is part of a // transaction, the TxnCoordSender adds the transaction to a map of active // transactions and begins heartbeating it. Every subsequent request for the // same transaction updates the lastUpdate timestamp to prevent live // transactions from being considered abandoned and garbage collected. // Read/write mutating requests have their key or key range added to the // transaction's interval tree of key ranges for eventual cleanup via resolved // write intents; they're tagged to an outgoing EndTransaction request, with // the receiving replica in charge of resolving them. func (tc *TxnCoordSender) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { { // Start new or pick up active trace and embed its trace metadata into // header for use by RPC recipients. From here on, there's always an active // Trace, though its overhead is small unless it's sampled. sp := opentracing.SpanFromContext(ctx) // TODO(radu): once contexts are plumbed correctly, we should use the Tracer // from ctx. tracer := tracing.TracerFromCtx(tc.ctx) if sp == nil { sp = tracer.StartSpan(opTxnCoordSender) defer sp.Finish() ctx = opentracing.ContextWithSpan(ctx, sp) } // TODO(tschottdorf): To get rid of the spurious alloc below we need to // implement the carrier interface on ba.Header or make Span non-nullable, // both of which force all of ba on the Heap. It's already there, so may // not be a big deal, but ba should live on the stack. Also not easy to use // a buffer pool here since anything that goes into the RPC layer could be // used by goroutines we didn't wait for. if ba.Header.Trace == nil { ba.Header.Trace = &tracing.Span{} } else { // We didn't make this object but are about to mutate it, so we // have to take a copy - the original might already have been // passed to the RPC layer. ba.Header.Trace = protoutil.Clone(ba.Header.Trace).(*tracing.Span) } if err := tracer.Inject(sp.Context(), basictracer.Delegator, ba.Trace); err != nil { return nil, roachpb.NewError(err) } } startNS := tc.clock.PhysicalNow() if ba.Txn != nil { // If this request is part of a transaction... if err := tc.maybeBeginTxn(&ba); err != nil { return nil, roachpb.NewError(err) } var et *roachpb.EndTransactionRequest var hasET bool { var rArgs roachpb.Request rArgs, hasET = ba.GetArg(roachpb.EndTransaction) if hasET { et = rArgs.(*roachpb.EndTransactionRequest) if len(et.Key) != 0 { return nil, roachpb.NewErrorf("EndTransaction must not have a Key set") } et.Key = ba.Txn.Key if len(et.IntentSpans) > 0 { // TODO(tschottdorf): it may be useful to allow this later. // That would be part of a possible plan to allow txns which // write on multiple coordinators. return nil, roachpb.NewErrorf("client must not pass intents to EndTransaction") } } } if pErr := func() *roachpb.Error { tc.Lock() defer tc.Unlock() if pErr := tc.maybeRejectClientLocked(ctx, *ba.Txn); pErr != nil { return pErr } if !hasET { return nil } // Everything below is carried out only when trying to commit. // Populate et.IntentSpans, taking into account both any existing // and new writes, and taking care to perform proper deduplication. txnMeta := tc.txns[*ba.Txn.ID] distinctSpans := true if txnMeta != nil { et.IntentSpans = txnMeta.keys // Defensively set distinctSpans to false if we had any previous // requests in this transaction. This effectively limits the distinct // spans optimization to 1pc transactions. distinctSpans = len(txnMeta.keys) == 0 } ba.IntentSpanIterate(func(key, endKey roachpb.Key) { et.IntentSpans = append(et.IntentSpans, roachpb.Span{ Key: key, EndKey: endKey, }) }) // TODO(peter): Populate DistinctSpans on all batches, not just batches // which contain an EndTransactionRequest. var distinct bool // The request might already be used by an outgoing goroutine, so // we can't safely mutate anything in-place (as MergeSpans does). et.IntentSpans = append([]roachpb.Span(nil), et.IntentSpans...) et.IntentSpans, distinct = roachpb.MergeSpans(et.IntentSpans) ba.Header.DistinctSpans = distinct && distinctSpans if len(et.IntentSpans) == 0 { // If there aren't any intents, then there's factually no // transaction to end. Read-only txns have all of their state // in the client. return roachpb.NewErrorf("cannot commit a read-only transaction") } if txnMeta != nil { txnMeta.keys = et.IntentSpans } return nil }(); pErr != nil { return nil, pErr } if hasET && log.V(1) { for _, intent := range et.IntentSpans { log.Tracef(ctx, "intent: [%s,%s)", intent.Key, intent.EndKey) } } } // Send the command through wrapped sender, taking appropriate measures // on error. var br *roachpb.BatchResponse { var pErr *roachpb.Error br, pErr = tc.wrapped.Send(ctx, ba) if _, ok := pErr.GetDetail().(*roachpb.OpRequiresTxnError); ok { // TODO(tschottdorf): needs to keep the trace. br, pErr = tc.resendWithTxn(ba) } if pErr = tc.updateState(startNS, ctx, ba, br, pErr); pErr != nil { log.Tracef(ctx, "error: %s", pErr) return nil, pErr } } if br.Txn == nil { return br, nil } if _, ok := ba.GetArg(roachpb.EndTransaction); !ok { return br, nil } // If the --linearizable flag is set, we want to make sure that // all the clocks in the system are past the commit timestamp // of the transaction. This is guaranteed if either // - the commit timestamp is MaxOffset behind startNS // - MaxOffset ns were spent in this function // when returning to the client. Below we choose the option // that involves less waiting, which is likely the first one // unless a transaction commits with an odd timestamp. if tsNS := br.Txn.Timestamp.WallTime; startNS > tsNS { startNS = tsNS } sleepNS := tc.clock.MaxOffset() - time.Duration(tc.clock.PhysicalNow()-startNS) if tc.linearizable && sleepNS > 0 { defer func() { if log.V(1) { log.Infof(ctx, "%v: waiting %s on EndTransaction for linearizability", br.Txn.ID.Short(), util.TruncateDuration(sleepNS, time.Millisecond)) } time.Sleep(sleepNS) }() } if br.Txn.Status != roachpb.PENDING { tc.Lock() tc.cleanupTxnLocked(ctx, *br.Txn) tc.Unlock() } return br, nil }
// resetForNewSQLTxn (re)initializes the txnState for a new transaction. // It creates a new client.Txn and initializes it using the session defaults. // txnState.State will be set to Open. func (ts *txnState) resetForNewSQLTxn(e *Executor, s *Session) { if ts.sp != nil { panic(fmt.Sprintf("txnState.reset() called on ts with active span. How come "+ "finishSQLTxn() wasn't called previously? ts: %+v", ts)) } // Reset state vars to defaults. ts.retrying = false ts.retryIntent = false ts.autoRetry = false ts.commitSeen = false // Discard previously collected spans. We start collecting anew on // every fresh SQL txn. ts.CollectedSpans = nil // Create a context for this transaction. It will include a // root span that will contain everything executed as part of the // upcoming SQL txn, including (automatic or user-directed) retries. // The span is closed by finishSQLTxn(). // TODO(andrei): figure out how to close these spans on server shutdown? ctx := s.context var sp opentracing.Span if traceSQL { var err error sp, err = tracing.JoinOrNewSnowball("coordinator", nil, func(sp basictracer.RawSpan) { ts.CollectedSpans = append(ts.CollectedSpans, sp) }) if err != nil { log.Warningf(ctx, "unable to create snowball tracer: %s", err) return } } else if traceSQLFor7881 { var err error sp, _, err = tracing.NewTracerAndSpanFor7881(func(sp basictracer.RawSpan) { ts.CollectedSpans = append(ts.CollectedSpans, sp) }) if err != nil { log.Fatalf(ctx, "couldn't create a tracer for debugging #7881: %s", err) } } else { if parentSp := opentracing.SpanFromContext(ctx); parentSp != nil { // Create a child span for this SQL txn. tracer := parentSp.Tracer() sp = tracer.StartSpan("sql txn", opentracing.ChildOf(parentSp.Context())) } else { // Create a root span for this SQL txn. tracer := e.cfg.AmbientCtx.Tracer sp = tracer.StartSpan("sql txn") } } // Put the new span in the context. ts.sp = sp ctx = opentracing.ContextWithSpan(ctx, sp) ts.Ctx = ctx ts.mon.Start(ctx, &s.mon, mon.BoundAccount{}) ts.txn = client.NewTxn(ts.Ctx, *e.cfg.DB) ts.txn.Proto.Isolation = s.DefaultIsolationLevel ts.State = Open // Discard the old schemaChangers, if any. ts.schemaChangers = schemaChangerCollection{} }
// Trace looks for an opentracing.Trace in the context and logs the given // message to it on success. func Trace(ctx context.Context, msg string) { sp := opentracing.SpanFromContext(ctx) if sp != nil { sp.LogEvent(msg) } }