// maybeBeginTxn begins a new transaction if a txn has been specified // in the request but has a nil ID. The new transaction is initialized // using the name and isolation in the otherwise uninitialized txn. // The Priority, if non-zero is used as a minimum. func (tc *TxnCoordSender) maybeBeginTxn(ba *roachpb.BatchRequest) { if ba.Txn == nil { return } if len(ba.Requests) == 0 { panic("empty batch with txn") } if len(ba.Txn.ID) == 0 { // TODO(tschottdorf): should really choose the first txn write here. firstKey := ba.Requests[0].GetInner().Header().Key newTxn := roachpb.NewTransaction(ba.Txn.Name, keys.KeyAddress(firstKey), ba.GetUserPriority(), ba.Txn.Isolation, tc.clock.Now(), tc.clock.MaxOffset().Nanoseconds()) // Use existing priority as a minimum. This is used on transaction // aborts to ratchet priority when creating successor transaction. if newTxn.Priority < ba.Txn.Priority { newTxn.Priority = ba.Txn.Priority } ba.Txn = newTxn } }
// maybeBeginTxn begins a new transaction if a txn has been specified // in the request but has a nil ID. The new transaction is initialized // using the name and isolation in the otherwise uninitialized txn. // The Priority, if non-zero is used as a minimum. // // No transactional writes are allowed unless preceded by a begin // transaction request within the same batch. The exception is if the // transaction is already in state txn.Writing=true. func (tc *TxnCoordSender) maybeBeginTxn(ba *roachpb.BatchRequest) error { if ba.Txn == nil { return nil } if len(ba.Requests) == 0 { return util.Errorf("empty batch with txn") } if len(ba.Txn.ID) == 0 { // Create transaction without a key. The key is set when a begin // transaction request is received. newTxn := roachpb.NewTransaction(ba.Txn.Name, nil, ba.GetUserPriority(), ba.Txn.Isolation, tc.clock.Now(), tc.clock.MaxOffset().Nanoseconds()) // Use existing priority as a minimum. This is used on transaction // aborts to ratchet priority when creating successor transaction. if newTxn.Priority < ba.Txn.Priority { newTxn.Priority = ba.Txn.Priority } ba.Txn = newTxn } // Check for a begin transaction to set txn key based on the key of // the first transactional write. Also enforce that no transactional // writes occur before a begin transaction. var haveBeginTxn bool for _, req := range ba.Requests { args := req.GetInner() if bt, ok := args.(*roachpb.BeginTransactionRequest); ok { if haveBeginTxn || ba.Txn.Writing { return util.Errorf("begin transaction requested twice in the same transaction") } haveBeginTxn = true ba.Txn.Key = bt.Key } if roachpb.IsTransactionWrite(args) && !haveBeginTxn && !ba.Txn.Writing { return util.Errorf("transactional write before begin transaction") } } return nil }
// TODO(tschottdorf): this method is somewhat awkward but unless we want to // give this error back to the client, our options are limited. We'll have to // run the whole thing for them, or any restart will still end up at the client // which will not be prepared to be handed a Txn. func (tc *TxnCoordSender) resendWithTxn(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { // Run a one-off transaction with that single command. if log.V(1) { log.Infof("%s: auto-wrapping in txn and re-executing: ", ba) } tmpDB := client.NewDBWithPriority(tc, ba.GetUserPriority()) var br *roachpb.BatchResponse err := tmpDB.Txn(func(txn *client.Txn) error { txn.SetDebugName("auto-wrap", 0) b := &client.Batch{} for _, arg := range ba.Requests { req := arg.GetInner() b.InternalAddRequest(req) } var err error br, err = txn.CommitInBatchWithResponse(b) return err }) if err != nil { return nil, roachpb.NewError(err) } br.Txn = nil // hide the evidence return br, nil }
// updateState updates the transaction state in both the success and // error cases, applying those updates to the corresponding txnMeta // object when adequate. It also updates certain errors with the // updated transaction for use by client restarts. func (tc *TxnCoordSender) updateState(ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error) *roachpb.Error { trace := tracer.FromCtx(ctx) newTxn := &roachpb.Transaction{} newTxn.Update(ba.GetTxn()) // TODO(tamird): remove this clone. It's currently needed to avoid race conditions. pErr = proto.Clone(pErr).(*roachpb.Error) err := pErr.GoError() // TODO(bdarnell): We're writing to errors here (and where using ErrorWithIndex); // since there's no concept of ownership copy-on-write is always preferable. switch t := err.(type) { case nil: newTxn.Update(br.Txn) // Move txn timestamp forward to response timestamp if applicable. // TODO(tschottdorf): see (*Replica).executeBatch and comments within. // Looks like this isn't necessary any more, nor did it prevent a bug // referenced in a TODO there. newTxn.Timestamp.Forward(br.Timestamp) case *roachpb.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider txn dead. defer tc.cleanupTxn(trace, t.Txn) case *roachpb.OpRequiresTxnError: panic("OpRequiresTxnError must not happen at this level") case *roachpb.ReadWithinUncertaintyIntervalError: // Mark the host as certain. See the protobuf comment for // Transaction.CertainNodes for details. if t.NodeID == 0 { panic("no replica set in header on uncertainty restart") } newTxn.Update(&t.Txn) newTxn.CertainNodes.Add(t.NodeID) // If the reader encountered a newer write within the uncertainty // interval, move the timestamp forward, just past that write or // up to MaxTimestamp, whichever comes first. candidateTS := newTxn.MaxTimestamp candidateTS.Backward(t.ExistingTimestamp.Add(0, 1)) newTxn.Timestamp.Forward(candidateTS) newTxn.Restart(ba.GetUserPriority(), newTxn.Priority, newTxn.Timestamp) t.Txn = *newTxn case *roachpb.TransactionAbortedError: trace.SetError() newTxn.Update(&t.Txn) // Increase timestamp if applicable. newTxn.Timestamp.Forward(t.Txn.Timestamp) newTxn.Priority = t.Txn.Priority t.Txn = *newTxn // Clean up the freshly aborted transaction in defer(), avoiding a // race with the state update below. defer tc.cleanupTxn(trace, t.Txn) case *roachpb.TransactionPushError: newTxn.Update(t.Txn) // Increase timestamp if applicable, ensuring that we're // just ahead of the pushee. newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp.Add(0, 1)) newTxn.Restart(ba.GetUserPriority(), t.PusheeTxn.Priority-1, newTxn.Timestamp) t.Txn = newTxn case *roachpb.TransactionRetryError: newTxn.Update(&t.Txn) newTxn.Restart(ba.GetUserPriority(), t.Txn.Priority, newTxn.Timestamp) t.Txn = *newTxn case roachpb.TransactionRestartError: // Assertion: The above cases should exhaust all ErrorDetails which // carry a Transaction. if pErr.Detail != nil { panic(fmt.Sprintf("unhandled TransactionRestartError %T", err)) } default: trace.SetError() } return func() *roachpb.Error { if len(newTxn.ID) <= 0 { return pErr } id := string(newTxn.ID) tc.Lock() defer tc.Unlock() txnMeta := tc.txns[id] // For successful transactional requests, keep the written intents and // the updated transaction record to be sent along with the reply. // The transaction metadata is created with the first writing operation. // A tricky edge case is that of a transaction which "fails" on the // first writing request, but actually manages to write some intents // (for example, due to being multi-range). In this case, there will // be an error, but the transaction will be marked as Writing and the // coordinator must track the state, for the client's retry will be // performed with a Writing transaction which the coordinator rejects // unless it is tracking it (on top of it making sense to track it; // after all, it **has** laid down intents and only the coordinator // can augment a potential EndTransaction call). // consider re-using those. if intents := ba.GetIntents(); len(intents) > 0 && (err == nil || newTxn.Writing) { if txnMeta == nil { if !newTxn.Writing { panic("txn with intents marked as non-writing") } txnMeta = &txnMetadata{ txn: *newTxn, keys: cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}), firstUpdateNanos: tc.clock.PhysicalNow(), lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } tc.txns[id] = txnMeta // If the transaction is already over, there's no point in // launching a one-off coordinator which will shut down right // away. If we ended up here with an error, we'll always start // the coordinator - the transaction has laid down intents, so // we expect it to be committed/aborted at some point in the // future. if _, isEnding := ba.GetArg(roachpb.EndTransaction); err != nil || !isEnding { trace.Event("coordinator spawns") if !tc.stopper.RunAsyncTask(func() { tc.heartbeatLoop(id) }) { // The system is already draining and we can't start the // heartbeat. We refuse new transactions for now because // they're likely not going to have all intents committed. // In principle, we can relax this as needed though. tc.unregisterTxnLocked(id) return roachpb.NewError(&roachpb.NodeUnavailableError{}) } } } for _, intent := range intents { txnMeta.addKeyRange(intent.Key, intent.EndKey) } } // Update our record of this transaction, even on error. if txnMeta != nil { txnMeta.txn = *newTxn if !txnMeta.txn.Writing { panic("tracking a non-writing txn") } txnMeta.setLastUpdate(tc.clock.PhysicalNow()) } if err == nil { // For successful transactional requests, always send the updated txn // record back. br.Txn = newTxn } return pErr }() }
// updateState updates the transaction state in both the success and // error cases, applying those updates to the corresponding txnMeta // object when adequate. It also updates certain errors with the // updated transaction for use by client restarts. func (tc *TxnCoordSender) updateState(ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error) *roachpb.Error { trace := tracer.FromCtx(ctx) newTxn := &roachpb.Transaction{} newTxn.Update(ba.GetTxn()) err := pErr.GoError() switch t := err.(type) { case nil: newTxn.Update(br.GetTxn()) // Move txn timestamp forward to response timestamp if applicable. // TODO(tschottdorf): see (*Replica).executeBatch and comments within. // Looks like this isn't necessary any more, nor did it prevent a bug // referenced in a TODO there. newTxn.Timestamp.Forward(br.Timestamp) case *roachpb.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider txn dead. defer tc.cleanupTxn(trace, t.Txn) case *roachpb.OpRequiresTxnError: // TODO(tschottdorf): range-spanning autowrap currently broken. panic("TODO(tschottdorf): disabled") case *roachpb.ReadWithinUncertaintyIntervalError: // Mark the host as certain. See the protobuf comment for // Transaction.CertainNodes for details. if t.NodeID == 0 { panic("no replica set in header on uncertainty restart") } newTxn.CertainNodes.Add(t.NodeID) // If the reader encountered a newer write within the uncertainty // interval, move the timestamp forward, just past that write or // up to MaxTimestamp, whichever comes first. candidateTS := newTxn.MaxTimestamp candidateTS.Backward(t.ExistingTimestamp.Add(0, 1)) newTxn.Timestamp.Forward(candidateTS) newTxn.Restart(ba.GetUserPriority(), newTxn.Priority, newTxn.Timestamp) t.Txn = *newTxn case *roachpb.TransactionAbortedError: // Increase timestamp if applicable. newTxn.Timestamp.Forward(t.Txn.Timestamp) newTxn.Priority = t.Txn.Priority t.Txn = *newTxn // Clean up the freshly aborted transaction in defer(), avoiding a // race with the state update below. defer tc.cleanupTxn(trace, t.Txn) case *roachpb.TransactionPushError: // Increase timestamp if applicable, ensuring that we're // just ahead of the pushee. newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp.Add(0, 1)) newTxn.Restart(ba.GetUserPriority(), t.PusheeTxn.Priority-1, newTxn.Timestamp) t.Txn = newTxn case *roachpb.TransactionRetryError: // Increase timestamp if applicable. newTxn.Timestamp.Forward(t.Txn.Timestamp) newTxn.Restart(ba.GetUserPriority(), t.Txn.Priority, newTxn.Timestamp) t.Txn = *newTxn case roachpb.TransactionRestartError: // Assertion: The above cases should exhaust all ErrorDetails which // carry a Transaction. if pErr.Detail != nil { panic(fmt.Sprintf("unhandled TransactionRestartError %T", err)) } } return func() *roachpb.Error { if len(newTxn.ID) <= 0 { return pErr } id := string(newTxn.ID) tc.Lock() defer tc.Unlock() txnMeta := tc.txns[id] // For successful transactional requests, keep the written intents and // the updated transaction record to be sent along with the reply. // The transaction metadata is created with the first writing operation // TODO(tschottdorf): already computed the intents prior to sending, // consider re-using those. if intents := ba.GetIntents(); len(intents) > 0 && err == nil { if txnMeta == nil { newTxn.Writing = true txnMeta = &txnMetadata{ txn: *newTxn, keys: cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}), firstUpdateNanos: tc.clock.PhysicalNow(), lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } tc.txns[id] = txnMeta // If the transaction is already over, there's no point in // launching a one-off coordinator which will shut down right // away. if _, isEnding := ba.GetArg(roachpb.EndTransaction); !isEnding { trace.Event("coordinator spawns") if !tc.stopper.RunAsyncTask(func() { tc.heartbeatLoop(id) }) { // The system is already draining and we can't start the // heartbeat. We refuse new transactions for now because // they're likely not going to have all intents committed. // In principle, we can relax this as needed though. tc.unregisterTxnLocked(id) return roachpb.NewError(&roachpb.NodeUnavailableError{}) } } } for _, intent := range intents { txnMeta.addKeyRange(intent.Key, intent.EndKey) } } // Update our record of this transaction, even on error. if txnMeta != nil { txnMeta.txn.Update(newTxn) // better to replace after #2300 if !txnMeta.txn.Writing { panic("tracking a non-writing txn") } txnMeta.setLastUpdate(tc.clock.PhysicalNow()) } if err == nil { // For successful transactional requests, always send the updated txn // record back. if br.Txn == nil { br.Txn = &roachpb.Transaction{} } *br.Txn = *newTxn } return pErr }() }