// Exec executes fn in the context of a distributed transaction. // Execution is controlled by opt (see comments in TxnExecOptions). // // opt is passed to fn, and it's valid for fn to modify opt as it sees // fit during each execution attempt. // // It's valid for txn to be nil (meaning the txn has already aborted) if fn // can handle that. This is useful for continuing transactions that have been // aborted because of an error in a previous batch of statements in the hope // that a ROLLBACK will reset the state. Neither opt.AutoRetry not opt.AutoCommit // can be set in this case. // // If an error is returned, the txn has been aborted. func (txn *Txn) Exec( opt TxnExecOptions, fn func(txn *Txn, opt *TxnExecOptions) *roachpb.Error) *roachpb.Error { // Run fn in a retry loop until we encounter a success or // error condition this loop isn't capable of handling. var pErr *roachpb.Error var retryOptions retry.Options if txn == nil && (opt.AutoRetry || opt.AutoCommit) { panic("asked to retry or commit a txn that is already aborted") } if opt.AutoRetry { retryOptions = txn.db.txnRetryOptions } RetryLoop: for r := retry.Start(retryOptions); r.Next(); { pErr = fn(txn, &opt) if (pErr == nil) && opt.AutoCommit && (txn.Proto.Status == roachpb.PENDING) { // fn succeeded, but didn't commit. pErr = txn.commit(nil) } if pErr == nil { break } // Make sure the txn record that pErr carries is for this txn. // We check only when txn.Proto.ID has been initialized after an initial successful send. if pErr.GetTxn() != nil && txn.Proto.ID != nil { if errTxn := pErr.GetTxn(); !errTxn.Equal(&txn.Proto) { return roachpb.NewErrorf("mismatching transaction record in the error:\n%s\nv.s.\n%s", errTxn, txn.Proto) } } if !opt.AutoRetry { break RetryLoop } switch pErr.TransactionRestart { case roachpb.TransactionRestart_IMMEDIATE: r.Reset() case roachpb.TransactionRestart_BACKOFF: default: break RetryLoop } if log.V(2) { log.Infof("automatically retrying transaction: %s because of error: %s", txn.DebugName(), pErr) } } if txn != nil { // TODO(andrei): don't do Cleanup() on retriable errors here. // Let the sql executor do it. txn.Cleanup(pErr) } if pErr != nil { pErr.StripErrorTransaction() } return pErr }
func (txn *Txn) exec(retryable func(txn *Txn) *roachpb.Error) *roachpb.Error { // Run retryable in a retry loop until we encounter a success or // error condition this loop isn't capable of handling. var pErr *roachpb.Error for r := retry.Start(txn.db.txnRetryOptions); r.Next(); { pErr = retryable(txn) if pErr == nil && txn.Proto.Status == roachpb.PENDING { // retryable succeeded, but didn't commit. pErr = txn.commit(nil) } if pErr != nil { // Make sure the txn record that pErr carries is for this txn. // We check only when txn.Proto.ID has been initialized after an initial successful send. if pErr.GetTxn() != nil && txn.Proto.ID != nil { if errTxn := pErr.GetTxn(); !errTxn.Equal(&txn.Proto) { return roachpb.NewErrorf("mismatching transaction record in the error:\n%s\nv.s.\n%s", errTxn, txn.Proto) } } switch pErr.TransactionRestart { case roachpb.TransactionRestart_IMMEDIATE: if log.V(2) { log.Warning(pErr) } r.Reset() continue case roachpb.TransactionRestart_BACKOFF: if log.V(2) { log.Warning(pErr) } continue } // By default, fall through and break. } break } txn.Cleanup(pErr) return pErr }
// updateState updates the transaction state in both the success and // error cases, applying those updates to the corresponding txnMeta // object when adequate. It also updates certain errors with the // updated transaction for use by client restarts. func (tc *TxnCoordSender) updateState( startNS int64, ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error) *roachpb.Error { newTxn := &roachpb.Transaction{} newTxn.Update(ba.Txn) if pErr == nil { newTxn.Update(br.Txn) } else { newTxn.Update(pErr.GetTxn()) } switch t := pErr.GetDetail().(type) { case *roachpb.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider txn dead. defer tc.cleanupTxn(ctx, *pErr.GetTxn()) case *roachpb.OpRequiresTxnError: panic("OpRequiresTxnError must not happen at this level") case *roachpb.ReadWithinUncertaintyIntervalError: // If the reader encountered a newer write within the uncertainty // interval, we advance the txn's timestamp just past the last observed // timestamp from the node. restartTS, ok := newTxn.GetObservedTimestamp(pErr.OriginNode) if !ok { pErr = roachpb.NewError(util.Errorf("no observed timestamp for node %d found on uncertainty restart", pErr.OriginNode)) } else { newTxn.Timestamp.Forward(restartTS) newTxn.Restart(ba.UserPriority, newTxn.Priority, newTxn.Timestamp) } case *roachpb.TransactionAbortedError: // Increase timestamp if applicable. newTxn.Timestamp.Forward(pErr.GetTxn().Timestamp) newTxn.Priority = pErr.GetTxn().Priority // Clean up the freshly aborted transaction in defer(), avoiding a // race with the state update below. defer tc.cleanupTxn(ctx, *newTxn) case *roachpb.TransactionPushError: // Increase timestamp if applicable, ensuring that we're // just ahead of the pushee. newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp) newTxn.Restart(ba.UserPriority, t.PusheeTxn.Priority-1, newTxn.Timestamp) case *roachpb.TransactionRetryError: // Increase timestamp so on restart, we're ahead of any timestamp // cache entries or newer versions which caused the restart. newTxn.Restart(ba.UserPriority, pErr.GetTxn().Priority, newTxn.Timestamp) case *roachpb.WriteTooOldError: newTxn.Restart(ba.UserPriority, newTxn.Priority, t.ActualTimestamp) case nil: // Nothing to do here, avoid the default case. default: if pErr.GetTxn() != nil { if pErr.CanRetry() { panic("Retryable internal error must not happen at this level") } else { // Do not clean up the transaction here since the client might still // want to continue the transaction. For example, a client might // continue its transaction after receiving ConditionFailedError, which // can come from a unique index violation. } } } if pErr != nil && pErr.GetTxn() != nil { // Avoid changing existing errors because sometimes they escape into // goroutines and then there are races. Fairly sure there isn't one // here, but better safe than sorry. pErrShallow := *pErr pErrShallow.SetTxn(newTxn) pErr = &pErrShallow } if newTxn.ID == nil { return pErr } txnID := *newTxn.ID tc.Lock() defer tc.Unlock() txnMeta := tc.txns[txnID] // For successful transactional requests, keep the written intents and // the updated transaction record to be sent along with the reply. // The transaction metadata is created with the first writing operation. // A tricky edge case is that of a transaction which "fails" on the // first writing request, but actually manages to write some intents // (for example, due to being multi-range). In this case, there will // be an error, but the transaction will be marked as Writing and the // coordinator must track the state, for the client's retry will be // performed with a Writing transaction which the coordinator rejects // unless it is tracking it (on top of it making sense to track it; // after all, it **has** laid down intents and only the coordinator // can augment a potential EndTransaction call). See #3303. var intentGroup interval.RangeGroup if txnMeta != nil { intentGroup = txnMeta.keys } else if pErr == nil || newTxn.Writing { intentGroup = interval.NewRangeTree() } if intentGroup != nil { // Adding the intents even on error reduces the likelihood of dangling // intents blocking concurrent writers for extended periods of time. // See #3346. ba.IntentSpanIterate(func(key, endKey roachpb.Key) { addKeyRange(intentGroup, key, endKey) }) if txnMeta == nil && intentGroup.Len() > 0 { if !newTxn.Writing { panic("txn with intents marked as non-writing") } // If the transaction is already over, there's no point in // launching a one-off coordinator which will shut down right // away. If we ended up here with an error, we'll always start // the coordinator - the transaction has laid down intents, so // we expect it to be committed/aborted at some point in the // future. if _, isEnding := ba.GetArg(roachpb.EndTransaction); pErr != nil || !isEnding { log.Trace(ctx, "coordinator spawns") txnMeta = &txnMetadata{ txn: *newTxn, keys: intentGroup, firstUpdateNanos: startNS, lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } tc.txns[txnID] = txnMeta if !tc.stopper.RunAsyncTask(func() { tc.heartbeatLoop(ctx, txnID) }) { // The system is already draining and we can't start the // heartbeat. We refuse new transactions for now because // they're likely not going to have all intents committed. // In principle, we can relax this as needed though. tc.unregisterTxnLocked(txnID) return roachpb.NewError(&roachpb.NodeUnavailableError{}) } } else { // If this was a successful one phase commit, update stats // directly as they won't otherwise be updated on heartbeat // loop shutdown. etArgs, ok := br.Responses[len(br.Responses)-1].GetInner().(*roachpb.EndTransactionResponse) tc.updateStats(tc.clock.PhysicalNow()-startNS, 0, newTxn.Status, ok && etArgs.OnePhaseCommit) } } } // Update our record of this transaction, even on error. if txnMeta != nil { txnMeta.txn = *newTxn if !txnMeta.txn.Writing { panic("tracking a non-writing txn") } txnMeta.setLastUpdate(tc.clock.PhysicalNow()) } if pErr == nil { // For successful transactional requests, always send the updated txn // record back. br.Txn = newTxn } return pErr }
// updateState updates the transaction state in both the success and // error cases, applying those updates to the corresponding txnMeta // object when adequate. It also updates certain errors with the // updated transaction for use by client restarts. func (tc *TxnCoordSender) updateState(ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error) *roachpb.Error { trace := tracer.FromCtx(ctx) newTxn := &roachpb.Transaction{} newTxn.Update(ba.Txn) // TODO(tamird): remove this clone. It's currently needed to avoid race conditions. pErr = proto.Clone(pErr).(*roachpb.Error) // TODO(bdarnell): We're writing to errors here (and where using ErrorWithIndex); // since there's no concept of ownership copy-on-write is always preferable. switch t := pErr.GetDetail().(type) { case nil: newTxn.Update(br.Txn) // Move txn timestamp forward to response timestamp if applicable. // TODO(tschottdorf): see (*Replica).executeBatch and comments within. // Looks like this isn't necessary any more, nor did it prevent a bug // referenced in a TODO there. newTxn.Timestamp.Forward(br.Timestamp) case *roachpb.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider txn dead. defer tc.cleanupTxn(trace, *pErr.GetTxn()) case *roachpb.OpRequiresTxnError: panic("OpRequiresTxnError must not happen at this level") case *roachpb.ReadWithinUncertaintyIntervalError: // Mark the host as certain. See the protobuf comment for // Transaction.CertainNodes for details. if t.NodeID == 0 { panic("no replica set in header on uncertainty restart") } newTxn.Update(pErr.GetTxn()) newTxn.CertainNodes.Add(t.NodeID) // If the reader encountered a newer write within the uncertainty // interval, move the timestamp forward, just past that write or // up to MaxTimestamp, whichever comes first. candidateTS := newTxn.MaxTimestamp candidateTS.Backward(t.ExistingTimestamp.Add(0, 1)) newTxn.Timestamp.Forward(candidateTS) newTxn.Restart(ba.UserPriority, newTxn.Priority, newTxn.Timestamp) pErr.SetTxn(newTxn) case *roachpb.TransactionAbortedError: trace.SetError() newTxn.Update(pErr.GetTxn()) // Increase timestamp if applicable. newTxn.Timestamp.Forward(pErr.GetTxn().Timestamp) newTxn.Priority = pErr.GetTxn().Priority pErr.SetTxn(newTxn) // Clean up the freshly aborted transaction in defer(), avoiding a // race with the state update below. defer tc.cleanupTxn(trace, *pErr.GetTxn()) case *roachpb.TransactionPushError: newTxn.Update(t.Txn) // Increase timestamp if applicable, ensuring that we're // just ahead of the pushee. newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp.Add(0, 1)) newTxn.Restart(ba.UserPriority, t.PusheeTxn.Priority-1, newTxn.Timestamp) t.Txn = newTxn pErr.SetTxn(newTxn) case *roachpb.TransactionRetryError: newTxn.Update(pErr.GetTxn()) newTxn.Restart(ba.UserPriority, pErr.GetTxn().Priority, newTxn.Timestamp) pErr.SetTxn(newTxn) default: trace.SetError() } return func() *roachpb.Error { if len(newTxn.ID) <= 0 { return pErr } id := string(newTxn.ID) tc.Lock() defer tc.Unlock() txnMeta := tc.txns[id] // For successful transactional requests, keep the written intents and // the updated transaction record to be sent along with the reply. // The transaction metadata is created with the first writing operation. // A tricky edge case is that of a transaction which "fails" on the // first writing request, but actually manages to write some intents // (for example, due to being multi-range). In this case, there will // be an error, but the transaction will be marked as Writing and the // coordinator must track the state, for the client's retry will be // performed with a Writing transaction which the coordinator rejects // unless it is tracking it (on top of it making sense to track it; // after all, it **has** laid down intents and only the coordinator // can augment a potential EndTransaction call). See #3303. intents := ba.GetIntentSpans() if len(intents) > 0 && (pErr == nil || newTxn.Writing) { if txnMeta == nil { if !newTxn.Writing { panic("txn with intents marked as non-writing") } // If the transaction is already over, there's no point in // launching a one-off coordinator which will shut down right // away. If we ended up here with an error, we'll always start // the coordinator - the transaction has laid down intents, so // we expect it to be committed/aborted at some point in the // future. if _, isEnding := ba.GetArg(roachpb.EndTransaction); pErr != nil || !isEnding { trace.Event("coordinator spawns") txnMeta = &txnMetadata{ txn: *newTxn, keys: cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}), firstUpdateNanos: tc.clock.PhysicalNow(), lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } tc.txns[id] = txnMeta if !tc.stopper.RunAsyncTask(func() { tc.heartbeatLoop(id) }) { // The system is already draining and we can't start the // heartbeat. We refuse new transactions for now because // they're likely not going to have all intents committed. // In principle, we can relax this as needed though. tc.unregisterTxnLocked(id) return roachpb.NewError(&roachpb.NodeUnavailableError{}) } } } } // Update our record of this transaction, even on error. if txnMeta != nil { txnMeta.txn = *newTxn if !txnMeta.txn.Writing { panic("tracking a non-writing txn") } txnMeta.setLastUpdate(tc.clock.PhysicalNow()) // Adding the intents even on error reduces the likelyhood of dangling // intents blocking concurrent writers for extended periods of time. // See #3346. for _, intent := range intents { txnMeta.addKeyRange(intent.Key, intent.EndKey) } } if pErr == nil { // For successful transactional requests, always send the updated txn // record back. br.Txn = newTxn } return pErr }() }
// Exec executes fn in the context of a distributed transaction. // Execution is controlled by opt (see comments in TxnExecOptions). // // opt is passed to fn, and it's valid for fn to modify opt as it sees // fit during each execution attempt. // // It's valid for txn to be nil (meaning the txn has already aborted) if fn // can handle that. This is useful for continuing transactions that have been // aborted because of an error in a previous batch of statements in the hope // that a ROLLBACK will reset the state. Neither opt.AutoRetry not opt.AutoCommit // can be set in this case. // // When this method returns, txn might be in any state; Exec does not attempt // to clean up the transaction before returning an error. In case of // TransactionAbortedError, txn is reset to a fresh transaction, ready to be // used. // // TODO(andrei): Make Exec() return error; make fn return an error + a retriable // bit. There's no reason to propagate roachpb.Error (protos) above this point. func (txn *Txn) Exec( opt TxnExecOptions, fn func(txn *Txn, opt *TxnExecOptions) *roachpb.Error) *roachpb.Error { // Run fn in a retry loop until we encounter a success or // error condition this loop isn't capable of handling. var pErr *roachpb.Error var retryOptions retry.Options if txn == nil && (opt.AutoRetry || opt.AutoCommit) { panic("asked to retry or commit a txn that is already aborted") } if opt.AutoRetry { retryOptions = txn.db.txnRetryOptions } RetryLoop: for r := retry.Start(retryOptions); r.Next(); { if txn != nil { // If we're looking at a brand new transaction, then communicate // what should be used as initial timestamp for the KV txn created // by TxnCoordSender. if txn.Proto.OrigTimestamp == roachpb.ZeroTimestamp { txn.Proto.OrigTimestamp = opt.MinInitialTimestamp } } pErr = fn(txn, &opt) if txn != nil { txn.retrying = true defer func() { txn.retrying = false }() } if (pErr == nil) && opt.AutoCommit && (txn.Proto.Status == roachpb.PENDING) { // fn succeeded, but didn't commit. pErr = txn.Commit() } if pErr == nil { break } // Make sure the txn record that pErr carries is for this txn. // We check only when txn.Proto.ID has been initialized after an initial successful send. if pErr.GetTxn() != nil && txn.Proto.ID != nil { if errTxn := pErr.GetTxn(); !errTxn.Equal(&txn.Proto) { return roachpb.NewErrorf("mismatching transaction record in the error:\n%s\nv.s.\n%s", errTxn, txn.Proto) } } if !opt.AutoRetry { break RetryLoop } switch pErr.TransactionRestart { case roachpb.TransactionRestart_IMMEDIATE: r.Reset() case roachpb.TransactionRestart_BACKOFF: default: break RetryLoop } if log.V(2) { log.Infof("automatically retrying transaction: %s because of error: %s", txn.DebugName(), pErr) } } if pErr != nil { pErr.StripErrorTransaction() } return pErr }
// updateState updates the transaction state in both the success and // error cases, applying those updates to the corresponding txnMeta // object when adequate. It also updates certain errors with the // updated transaction for use by client restarts. func (tc *TxnCoordSender) updateState(ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error) *roachpb.Error { sp, cleanupSp := tracing.SpanFromContext(opTxnCoordSender, tc.tracer, ctx) defer cleanupSp() newTxn := &roachpb.Transaction{} newTxn.Update(ba.Txn) if pErr == nil { newTxn.Update(br.Txn) } else { newTxn.Update(pErr.GetTxn()) } // If the request was successful but we're in a transaction which needs to // restart but doesn't know it yet, let it restart now (as opposed to // waiting until EndTransaction). if pErr == nil && newTxn.Isolation == roachpb.SERIALIZABLE && !newTxn.OrigTimestamp.Equal(newTxn.Timestamp) { pErr = roachpb.NewErrorWithTxn(roachpb.NewTransactionRetryError(), br.Txn) br = nil } switch t := pErr.GetDetail().(type) { case nil: // Move txn timestamp forward to response timestamp if applicable. // TODO(tschottdorf): see (*Replica).executeBatch and comments within. // Looks like this isn't necessary any more, nor did it prevent a bug // referenced in a TODO there. newTxn.Timestamp.Forward(br.Timestamp) case *roachpb.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider txn dead. defer tc.cleanupTxn(sp, *pErr.GetTxn()) case *roachpb.OpRequiresTxnError: panic("OpRequiresTxnError must not happen at this level") case *roachpb.ReadWithinUncertaintyIntervalError: // If the reader encountered a newer write within the uncertainty // interval, we advance the txn's timestamp just past the last observed // timestamp from the node. restartTS, ok := newTxn.GetObservedTimestamp(pErr.OriginNode) if !ok { pErr = roachpb.NewError(util.Errorf("no observed timestamp for node %d found on uncertainty restart", pErr.OriginNode)) } else { newTxn.Timestamp.Forward(restartTS) newTxn.Restart(ba.UserPriority, newTxn.Priority, newTxn.Timestamp) } case *roachpb.TransactionAbortedError: // Increase timestamp if applicable. newTxn.Timestamp.Forward(pErr.GetTxn().Timestamp) newTxn.Priority = pErr.GetTxn().Priority // Clean up the freshly aborted transaction in defer(), avoiding a // race with the state update below. defer tc.cleanupTxn(sp, *newTxn) case *roachpb.TransactionPushError: // Increase timestamp if applicable, ensuring that we're // just ahead of the pushee. newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp.Add(0, 1)) newTxn.Restart(ba.UserPriority, t.PusheeTxn.Priority-1, newTxn.Timestamp) case *roachpb.TransactionRetryError: newTxn.Restart(ba.UserPriority, pErr.GetTxn().Priority, newTxn.Timestamp) default: if pErr.GetTxn() != nil { if pErr.CanRetry() { panic("Retryable internal error must not happen at this level") } else { // Do not clean up the transaction here since the client might still // want to continue the transaction. For example, a client might // continue its transaction after receiving ConditionFailedError, which // can come from a unique index violation. } } } if pErr != nil && pErr.GetTxn() != nil { // Avoid changing existing errors because sometimes they escape into // goroutines and then there are races. Fairly sure there isn't one // here, but better safe than sorry. pErrShallow := *pErr pErrShallow.SetTxn(newTxn) pErr = &pErrShallow } if newTxn.ID == nil { return pErr } txnID := *newTxn.ID tc.Lock() defer tc.Unlock() txnMeta := tc.txns[txnID] // For successful transactional requests, keep the written intents and // the updated transaction record to be sent along with the reply. // The transaction metadata is created with the first writing operation. // A tricky edge case is that of a transaction which "fails" on the // first writing request, but actually manages to write some intents // (for example, due to being multi-range). In this case, there will // be an error, but the transaction will be marked as Writing and the // coordinator must track the state, for the client's retry will be // performed with a Writing transaction which the coordinator rejects // unless it is tracking it (on top of it making sense to track it; // after all, it **has** laid down intents and only the coordinator // can augment a potential EndTransaction call). See #3303. intents := ba.GetIntentSpans() if len(intents) > 0 && (pErr == nil || newTxn.Writing) { if txnMeta == nil { if !newTxn.Writing { panic("txn with intents marked as non-writing") } // If the transaction is already over, there's no point in // launching a one-off coordinator which will shut down right // away. If we ended up here with an error, we'll always start // the coordinator - the transaction has laid down intents, so // we expect it to be committed/aborted at some point in the // future. if _, isEnding := ba.GetArg(roachpb.EndTransaction); pErr != nil || !isEnding { sp.LogEvent("coordinator spawns") txnMeta = &txnMetadata{ txn: *newTxn, keys: cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}), firstUpdateNanos: tc.clock.PhysicalNow(), lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } tc.txns[txnID] = txnMeta if !tc.stopper.RunAsyncTask(func() { tc.heartbeatLoop(txnID) }) { // The system is already draining and we can't start the // heartbeat. We refuse new transactions for now because // they're likely not going to have all intents committed. // In principle, we can relax this as needed though. tc.unregisterTxnLocked(txnID) return roachpb.NewError(&roachpb.NodeUnavailableError{}) } } } } // Update our record of this transaction, even on error. if txnMeta != nil { txnMeta.txn = *newTxn if !txnMeta.txn.Writing { panic("tracking a non-writing txn") } txnMeta.setLastUpdate(tc.clock.PhysicalNow()) // Adding the intents even on error reduces the likelihood of dangling // intents blocking concurrent writers for extended periods of time. // See #3346. for _, intent := range intents { txnMeta.addKeyRange(intent.Key, intent.EndKey) } } if pErr == nil { // For successful transactional requests, always send the updated txn // record back. br.Txn = newTxn } return pErr }