func (tc *TxnCoordSender) heartbeat(txnID uuid.UUID, trace opentracing.Span, ctx context.Context) bool { tc.Lock() proceed := true txnMeta := tc.txns[txnID.String()] // Before we send a heartbeat, determine whether this transaction // should be considered abandoned. If so, exit heartbeat. if txnMeta.hasClientAbandonedCoord(tc.clock.PhysicalNow()) { // TODO(tschottdorf): should we be more proactive here? // The client might be continuing the transaction // through another coordinator, but in the most likely // case it's just gone and the open transaction record // could block concurrent operations. if log.V(1) { log.Infof("transaction %s abandoned; stopping heartbeat", txnMeta.txn) } proceed = false } // txnMeta.txn is possibly replaced concurrently, // so grab a copy before unlocking. txn := txnMeta.txn tc.Unlock() if !proceed { return false } hb := &roachpb.HeartbeatTxnRequest{} hb.Key = txn.Key ba := roachpb.BatchRequest{} ba.Timestamp = tc.clock.Now() txnClone := txn.Clone() ba.Txn = &txnClone ba.Add(hb) trace.LogEvent("heartbeat") _, err := tc.wrapped.Send(ctx, ba) // If the transaction is not in pending state, then we can stop // the heartbeat. It's either aborted or committed, and we resolve // write intents accordingly. if err != nil { log.Warningf("heartbeat to %s failed: %s", txn, err) } // TODO(bdarnell): once we have gotten a heartbeat response with // Status != PENDING, future heartbeats are useless. However, we // need to continue the heartbeatLoop until the client either // commits or abandons the transaction. We could save a little // pointless work by restructuring this loop to stop sending // heartbeats between the time that the transaction is aborted and // the client finds out. Furthermore, we could use this information // to send TransactionAbortedErrors to the client so it can restart // immediately instead of running until its EndTransaction. return true }
// heartbeatLoop periodically sends a HeartbeatTxn RPC to an extant // transaction, stopping in the event the transaction is aborted or // committed after attempting to resolve the intents. When the // heartbeat stops, the transaction is unregistered from the // coordinator, func (tc *TxnCoordSender) heartbeatLoop(txnID uuid.UUID) { var tickChan <-chan time.Time { ticker := time.NewTicker(tc.heartbeatInterval) tickChan = ticker.C defer ticker.Stop() } defer func() { tc.Lock() tc.unregisterTxnLocked(txnID) tc.Unlock() }() var closer <-chan struct{} var sp opentracing.Span { tc.Lock() txnMeta := tc.txns[txnID.String()] // do not leak to outer scope closer = txnMeta.txnEnd sp = tc.tracer.StartTrace("heartbeat loop") defer sp.Finish() tc.Unlock() } if closer == nil { // Avoid race in which a Txn is cleaned up before the heartbeat // goroutine gets a chance to start. return } ctx, _ := opentracing.ContextWithSpan(context.Background(), sp) // Loop with ticker for periodic heartbeats. for { select { case <-tickChan: if !tc.heartbeat(txnID, sp, ctx) { return } case <-closer: // Transaction finished normally. return case <-tc.stopper.ShouldDrain(): return } } }
// unregisterTxn deletes a txnMetadata object from the sender // and collects its stats. It assumes the lock is held. func (tc *TxnCoordSender) unregisterTxnLocked(txnID uuid.UUID) { txnIDStr := txnID.String() txnMeta := tc.txns[txnIDStr] // guaranteed to exist if txnMeta == nil { panic(fmt.Sprintf("attempt to unregister non-existent transaction: %s", txnID)) } tc.txnStats.durations = append(tc.txnStats.durations, float64(tc.clock.PhysicalNow()-txnMeta.firstUpdateNanos)) tc.txnStats.restarts = append(tc.txnStats.restarts, float64(txnMeta.txn.Epoch)) switch txnMeta.txn.Status { case roachpb.ABORTED: tc.txnStats.aborted++ case roachpb.PENDING: tc.txnStats.abandoned++ case roachpb.COMMITTED: tc.txnStats.committed++ } txnMeta.keys.Clear() delete(tc.txns, txnIDStr) }