// TestTxnCoordSenderErrorWithIntent validates that if a transactional request // returns an error but also indicates a Writing transaction, the coordinator // tracks it just like a successful request. func TestTxnCoordSenderErrorWithIntent(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() manual := hlc.NewManualClock(0) clock := hlc.NewClock(manual.UnixNano) clock.SetMaxOffset(20) ts := NewTxnCoordSender(senderFn(func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { txn := ba.Txn.Clone() txn.Writing = true pErr := roachpb.NewError(roachpb.NewTransactionRetryError()) pErr.SetTxn(txn) return nil, pErr }), clock, false, nil, stopper) defer stopper.Stop() var ba roachpb.BatchRequest key := roachpb.Key("test") ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.EndTransactionRequest{}) ba.Txn = &roachpb.Transaction{Name: "test"} if _, pErr := ts.Send(context.Background(), ba); !testutils.IsPError(pErr, "retry txn") { t.Fatalf("unexpected error: %v", pErr) } defer teardownHeartbeats(ts) ts.Lock() defer ts.Unlock() if len(ts.txns) != 1 { t.Fatalf("expected transaction to be tracked") } }
// TestEndWriteRestartReadOnlyTransaction verifies that if // a transaction writes, then restarts and turns read-only, // an explicit EndTransaction call is still sent if retry- // able didn't, regardless of whether there is an error // or not. func TestEndWriteRestartReadOnlyTransaction(t *testing.T) { defer leaktest.AfterTest(t)() for _, success := range []bool{true, false} { expCalls := []roachpb.Method{roachpb.BeginTransaction, roachpb.Put, roachpb.EndTransaction} var calls []roachpb.Method db := NewDB(newTestSender(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { calls = append(calls, ba.Methods()...) return ba.CreateReply(), nil }, nil)) ok := false if err := db.Txn(context.TODO(), func(txn *Txn) error { if !ok { if err := txn.Put("consider", "phlebas"); err != nil { t.Fatal(err) } ok = true // Return an immediate txn retry error. We need to go through the pErr // and back to get a RetryableTxnError. return roachpb.NewErrorWithTxn(roachpb.NewTransactionRetryError(), &txn.Proto).GoError() } if !success { return errors.New("aborting on purpose") } return nil }); err == nil != success { t.Errorf("expected error: %t, got error: %v", !success, err) } if !reflect.DeepEqual(expCalls, calls) { t.Fatalf("expected %v, got %v", expCalls, calls) } } }
func injectErrors( req roachpb.Request, hdr roachpb.Header, magicVals *filterVals, ) error { magicVals.Lock() defer magicVals.Unlock() switch req := req.(type) { case *roachpb.ConditionalPutRequest: for key, count := range magicVals.restartCounts { checkCorrectTxn(string(req.Value.RawBytes), magicVals, hdr.Txn) if count > 0 && bytes.Contains(req.Value.RawBytes, []byte(key)) { magicVals.restartCounts[key]-- err := roachpb.NewReadWithinUncertaintyIntervalError( hlc.ZeroTimestamp, hlc.ZeroTimestamp) magicVals.failedValues[string(req.Value.RawBytes)] = failureRecord{err, hdr.Txn} return err } } for key, count := range magicVals.abortCounts { checkCorrectTxn(string(req.Value.RawBytes), magicVals, hdr.Txn) if count > 0 && bytes.Contains(req.Value.RawBytes, []byte(key)) { magicVals.abortCounts[key]-- err := roachpb.NewTransactionAbortedError() magicVals.failedValues[string(req.Value.RawBytes)] = failureRecord{err, hdr.Txn} return err } } // If we're writing a value that's marked for an EndTransaction failure, // keep track of the txn id so we can fail it later on. for key, count := range magicVals.endTxnRestartCounts { if count > 0 && bytes.Contains(req.Value.RawBytes, []byte(key)) { txnID := *hdr.Txn.TxnMeta.ID if _, found := magicVals.txnsToFail[txnID]; found { continue } magicVals.endTxnRestartCounts[key]-- magicVals.txnsToFail[txnID] = true } } return nil case *roachpb.EndTransactionRequest: txnID := *hdr.Txn.TxnMeta.ID if !magicVals.txnsToFail[txnID] { return nil } delete(magicVals.txnsToFail, txnID) // Note that we can't return TransactionAborted errors, although those are // more representative for the errors that EndTransaction might encounter, // because returning those would result in the txn's intents being left // around. return roachpb.NewTransactionRetryError() default: return nil } }
// TestTxnCoordSenderErrorWithIntent validates that if a transactional request // returns an error but also indicates a Writing transaction, the coordinator // tracks it just like a successful request. func TestTxnCoordSenderErrorWithIntent(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() manual := hlc.NewManualClock(0) clock := hlc.NewClock(manual.UnixNano) clock.SetMaxOffset(20) testCases := []struct { roachpb.Error errMsg string }{ {*roachpb.NewError(roachpb.NewTransactionRetryError()), "retry txn"}, {*roachpb.NewError(roachpb.NewTransactionPushError(roachpb.Transaction{ TxnMeta: enginepb.TxnMeta{ ID: uuid.NewV4(), }})), "failed to push"}, {*roachpb.NewErrorf("testError"), "testError"}, } for i, test := range testCases { func() { senderFunc := func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { txn := ba.Txn.Clone() txn.Writing = true pErr := &roachpb.Error{} *pErr = test.Error pErr.SetTxn(&txn) return nil, pErr } ctx := tracing.WithTracer(context.Background(), tracing.NewTracer()) ts := NewTxnCoordSender(ctx, senderFn(senderFunc), clock, false, stopper, MakeTxnMetrics()) var ba roachpb.BatchRequest key := roachpb.Key("test") ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.EndTransactionRequest{}) ba.Txn = &roachpb.Transaction{Name: "test"} _, pErr := ts.Send(context.Background(), ba) if !testutils.IsPError(pErr, test.errMsg) { t.Errorf("%d: error did not match %s: %v", i, test.errMsg, pErr) } defer teardownHeartbeats(ts) ts.Lock() defer ts.Unlock() if len(ts.txns) != 1 { t.Errorf("%d: expected transaction to be tracked", i) } }() } }
// TestTransactionKeyNotChangedInRestart verifies that if the transaction already has a key (we're // in a restart), the key in the begin transaction request is not changed. func TestTransactionKeyNotChangedInRestart(t *testing.T) { defer leaktest.AfterTest(t)() tries := 0 db := NewDB(newTestSender(nil, func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { var bt *roachpb.BeginTransactionRequest if args, ok := ba.GetArg(roachpb.BeginTransaction); ok { bt = args.(*roachpb.BeginTransactionRequest) } else { t.Fatal("failed to find a begin transaction request") } // In the first try, the transaction key is the key of the first write command. Before the // second try, the transaction key is set to txnKey by the test sender. In the second try, the // transaction key is txnKey. var expectedKey roachpb.Key if tries == 1 { expectedKey = testKey } else { expectedKey = txnKey } if !bt.Key.Equal(expectedKey) { t.Fatalf("expected transaction key %v, got %v", expectedKey, bt.Key) } return ba.CreateReply(), nil })) if err := db.Txn(context.TODO(), func(txn *Txn) error { tries++ b := txn.NewBatch() b.Put("a", "b") if err := txn.Run(b); err != nil { t.Fatal(err) } if tries == 1 { return roachpb.NewErrorWithTxn(roachpb.NewTransactionRetryError(), &txn.Proto).GoError() } return nil }); err != nil { t.Errorf("unexpected error on commit: %s", err) } minimumTries := 2 if tries < minimumTries { t.Errorf("expected try count >= %d, got %d", minimumTries, tries) } }
// Test that a TransactionRetryError will retry the read until it succeeds. The // test is designed so that if the proto timestamps are bumped during retry // a failure will occur. func TestAsOfRetry(t *testing.T) { defer leaktest.AfterTest(t)() params, cmdFilters := createTestServerParams() // Disable one phase commits because they cannot be restarted. params.Knobs.Store.(*storage.StoreTestingKnobs).DisableOnePhaseCommits = true s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() const val1 = 1 const val2 = 2 const name = "boulanger" if _, err := sqlDB.Exec(fmt.Sprintf(` CREATE DATABASE d; CREATE TABLE d.t (s STRING PRIMARY KEY, a INT); INSERT INTO d.t (s, a) VALUES ('%v', %v); `, name, val1)); err != nil { t.Fatal(err) } var walltime int64 if err := sqlDB.QueryRow("UPDATE d.t SET a = $1 RETURNING cluster_logical_timestamp()::int", val2).Scan(&walltime); err != nil { t.Fatal(err) } tsVal2 := time.Unix(0, walltime).Format(time.RFC3339Nano) tsVal1 := time.Unix(0, walltime-1).Format(time.RFC3339Nano) // Set up error injection that causes retries. magicVals := createFilterVals(nil, nil) magicVals.restartCounts = map[string]int{ name: 5, } cleanupFilter := cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { magicVals.Lock() defer magicVals.Unlock() switch req := args.Req.(type) { case *roachpb.ScanRequest: for key, count := range magicVals.restartCounts { checkCorrectTxn(string(req.Key), magicVals, args.Hdr.Txn) if count > 0 && bytes.Contains(req.Key, []byte(key)) { magicVals.restartCounts[key]-- err := roachpb.NewTransactionRetryError() magicVals.failedValues[string(req.Key)] = failureRecord{err, args.Hdr.Txn} txn := args.Hdr.Txn.Clone() txn.Timestamp = txn.Timestamp.Add(0, 1) return roachpb.NewErrorWithTxn(err, &txn) } } } return nil }, false) var i int // Query with tsVal1 which should return the first value. Since tsVal1 is just // one nanosecond before tsVal2, any proto timestamp bumping will return val2 // and error. // Must specify the WHERE here to trigger the injection errors. if err := sqlDB.QueryRow(fmt.Sprintf("SELECT a FROM d.t AS OF SYSTEM TIME '%s' WHERE s = '%s'", tsVal1, name)).Scan(&i); err != nil { t.Fatal(err) } else if i != val1 { t.Fatalf("unexpected val: %v", i) } cleanupFilter() // Verify that the retry errors were injected. checkRestarts(t, magicVals) // Query with tsVal2 to ensure val2 is indeed present. if err := sqlDB.QueryRow(fmt.Sprintf("SELECT a FROM d.t AS OF SYSTEM TIME '%s'", tsVal2)).Scan(&i); err != nil { t.Fatal(err) } else if i != val2 { t.Fatalf("unexpected val: %v", i) } }
// updateState updates the transaction state in both the success and // error cases, applying those updates to the corresponding txnMeta // object when adequate. It also updates certain errors with the // updated transaction for use by client restarts. func (tc *TxnCoordSender) updateState(ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error) *roachpb.Error { sp, cleanupSp := tracing.SpanFromContext(opTxnCoordSender, tc.tracer, ctx) defer cleanupSp() newTxn := &roachpb.Transaction{} newTxn.Update(ba.Txn) if pErr == nil { newTxn.Update(br.Txn) } else { newTxn.Update(pErr.GetTxn()) } // If the request was successful but we're in a transaction which needs to // restart but doesn't know it yet, let it restart now (as opposed to // waiting until EndTransaction). if pErr == nil && newTxn.Isolation == roachpb.SERIALIZABLE && !newTxn.OrigTimestamp.Equal(newTxn.Timestamp) { pErr = roachpb.NewErrorWithTxn(roachpb.NewTransactionRetryError(), br.Txn) br = nil } switch t := pErr.GetDetail().(type) { case *roachpb.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider txn dead. defer tc.cleanupTxn(sp, *pErr.GetTxn()) case *roachpb.OpRequiresTxnError: panic("OpRequiresTxnError must not happen at this level") case *roachpb.ReadWithinUncertaintyIntervalError: // If the reader encountered a newer write within the uncertainty // interval, we advance the txn's timestamp just past the last observed // timestamp from the node. restartTS, ok := newTxn.GetObservedTimestamp(pErr.OriginNode) if !ok { pErr = roachpb.NewError(util.Errorf("no observed timestamp for node %d found on uncertainty restart", pErr.OriginNode)) } else { newTxn.Timestamp.Forward(restartTS) newTxn.Restart(ba.UserPriority, newTxn.Priority, newTxn.Timestamp) } case *roachpb.TransactionAbortedError: // Increase timestamp if applicable. newTxn.Timestamp.Forward(pErr.GetTxn().Timestamp) newTxn.Priority = pErr.GetTxn().Priority // Clean up the freshly aborted transaction in defer(), avoiding a // race with the state update below. defer tc.cleanupTxn(sp, *newTxn) case *roachpb.TransactionPushError: // Increase timestamp if applicable, ensuring that we're // just ahead of the pushee. newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp.Add(0, 1)) newTxn.Restart(ba.UserPriority, t.PusheeTxn.Priority-1, newTxn.Timestamp) case *roachpb.TransactionRetryError: newTxn.Restart(ba.UserPriority, pErr.GetTxn().Priority, newTxn.Timestamp) case nil: // Nothing to do here, avoid the default case. default: if pErr.GetTxn() != nil { if pErr.CanRetry() { panic("Retryable internal error must not happen at this level") } else { // Do not clean up the transaction here since the client might still // want to continue the transaction. For example, a client might // continue its transaction after receiving ConditionFailedError, which // can come from a unique index violation. } } } if pErr != nil && pErr.GetTxn() != nil { // Avoid changing existing errors because sometimes they escape into // goroutines and then there are races. Fairly sure there isn't one // here, but better safe than sorry. pErrShallow := *pErr pErrShallow.SetTxn(newTxn) pErr = &pErrShallow } if newTxn.ID == nil { return pErr } txnID := *newTxn.ID tc.Lock() defer tc.Unlock() txnMeta := tc.txns[txnID] // For successful transactional requests, keep the written intents and // the updated transaction record to be sent along with the reply. // The transaction metadata is created with the first writing operation. // A tricky edge case is that of a transaction which "fails" on the // first writing request, but actually manages to write some intents // (for example, due to being multi-range). In this case, there will // be an error, but the transaction will be marked as Writing and the // coordinator must track the state, for the client's retry will be // performed with a Writing transaction which the coordinator rejects // unless it is tracking it (on top of it making sense to track it; // after all, it **has** laid down intents and only the coordinator // can augment a potential EndTransaction call). See #3303. var intents []roachpb.Span // TODO(nvanbenschoten): Iterating here to put the intents in a slice for // the sole purpose of later iterating again and calling addKeyRange is a // little wasteful and can likely be avoided. ba.IntentSpanIterate(func(key, endKey roachpb.Key) { intents = append(intents, roachpb.Span{Key: key, EndKey: endKey}) }) if len(intents) > 0 && (pErr == nil || newTxn.Writing) { if txnMeta == nil { if !newTxn.Writing { panic("txn with intents marked as non-writing") } // If the transaction is already over, there's no point in // launching a one-off coordinator which will shut down right // away. If we ended up here with an error, we'll always start // the coordinator - the transaction has laid down intents, so // we expect it to be committed/aborted at some point in the // future. if _, isEnding := ba.GetArg(roachpb.EndTransaction); pErr != nil || !isEnding { sp.LogEvent("coordinator spawns") txnMeta = &txnMetadata{ txn: *newTxn, keys: interval.NewRangeTree(), firstUpdateNanos: tc.clock.PhysicalNow(), lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } tc.txns[txnID] = txnMeta if !tc.stopper.RunAsyncTask(func() { tc.heartbeatLoop(txnID) }) { // The system is already draining and we can't start the // heartbeat. We refuse new transactions for now because // they're likely not going to have all intents committed. // In principle, we can relax this as needed though. tc.unregisterTxnLocked(txnID) return roachpb.NewError(&roachpb.NodeUnavailableError{}) } } } } // Update our record of this transaction, even on error. if txnMeta != nil { txnMeta.txn = *newTxn if !txnMeta.txn.Writing { panic("tracking a non-writing txn") } txnMeta.setLastUpdate(tc.clock.PhysicalNow()) // Adding the intents even on error reduces the likelihood of dangling // intents blocking concurrent writers for extended periods of time. // See #3346. for _, intent := range intents { addKeyRange(txnMeta.keys, intent.Key, intent.EndKey) } } if pErr == nil { // For successful transactional requests, always send the updated txn // record back. br.Txn = newTxn } return pErr }
// updateState updates the transaction state in both the success and // error cases, applying those updates to the corresponding txnMeta // object when adequate. It also updates certain errors with the // updated transaction for use by client restarts. func (tc *TxnCoordSender) updateState(ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error) *roachpb.Error { sp := tracing.SpanFromContext(ctx) newTxn := &roachpb.Transaction{} newTxn.Update(ba.Txn) // If the request was successful but we're in a transaction which needs to // restart but doesn't know it yet, let it restart now (as opposed to // waiting until EndTransaction). if pErr == nil && br.Txn != nil && br.Txn.Isolation == roachpb.SERIALIZABLE && !br.Txn.OrigTimestamp.Equal(br.Txn.Timestamp) { pErr = roachpb.NewErrorWithTxn(roachpb.NewTransactionRetryError(), br.Txn) br = nil } // TODO(bdarnell): We're writing to errors here (and where using ErrorWithIndex); // since there's no concept of ownership copy-on-write is always preferable. switch t := pErr.GetDetail().(type) { case nil: newTxn.Update(br.Txn) // Move txn timestamp forward to response timestamp if applicable. // TODO(tschottdorf): see (*Replica).executeBatch and comments within. // Looks like this isn't necessary any more, nor did it prevent a bug // referenced in a TODO there. newTxn.Timestamp.Forward(br.Timestamp) case *roachpb.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider txn dead. pErrTxn := pErr.GetTxn().Clone() defer tc.cleanupTxn(sp, pErrTxn) case *roachpb.OpRequiresTxnError: panic("OpRequiresTxnError must not happen at this level") case *roachpb.ReadWithinUncertaintyIntervalError: // Mark the host as certain. See the protobuf comment for // Transaction.CertainNodes for details. if t.NodeID == 0 { panic("no replica set in header on uncertainty restart") } newTxn.Update(pErr.GetTxn()) newTxn.CertainNodes.Add(t.NodeID) // If the reader encountered a newer write within the uncertainty // interval, move the timestamp forward, just past that write or // up to MaxTimestamp, whichever comes first. candidateTS := newTxn.MaxTimestamp candidateTS.Backward(t.ExistingTimestamp.Add(0, 1)) newTxn.Timestamp.Forward(candidateTS) newTxn.Restart(ba.UserPriority, newTxn.Priority, newTxn.Timestamp) pErr.SetTxn(newTxn) case *roachpb.TransactionAbortedError: newTxn.Update(pErr.GetTxn()) // Increase timestamp if applicable. newTxn.Timestamp.Forward(pErr.GetTxn().Timestamp) newTxn.Priority = pErr.GetTxn().Priority pErr.SetTxn(newTxn) // Clean up the freshly aborted transaction in defer(), avoiding a // race with the state update below. defer tc.cleanupTxn(sp, *pErr.GetTxn()) case *roachpb.TransactionPushError: newTxn.Update(pErr.GetTxn()) // Increase timestamp if applicable, ensuring that we're // just ahead of the pushee. newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp.Add(0, 1)) newTxn.Restart(ba.UserPriority, t.PusheeTxn.Priority-1, newTxn.Timestamp) pErr.SetTxn(newTxn) case *roachpb.TransactionRetryError: newTxn.Update(pErr.GetTxn()) newTxn.Restart(ba.UserPriority, pErr.GetTxn().Priority, newTxn.Timestamp) pErr.SetTxn(newTxn) } if newTxn.ID == nil { return pErr } txnID := *newTxn.ID tc.Lock() defer tc.Unlock() txnMeta := tc.txns[txnID] // For successful transactional requests, keep the written intents and // the updated transaction record to be sent along with the reply. // The transaction metadata is created with the first writing operation. // A tricky edge case is that of a transaction which "fails" on the // first writing request, but actually manages to write some intents // (for example, due to being multi-range). In this case, there will // be an error, but the transaction will be marked as Writing and the // coordinator must track the state, for the client's retry will be // performed with a Writing transaction which the coordinator rejects // unless it is tracking it (on top of it making sense to track it; // after all, it **has** laid down intents and only the coordinator // can augment a potential EndTransaction call). See #3303. intents := ba.GetIntentSpans() if len(intents) > 0 && (pErr == nil || newTxn.Writing) { if txnMeta == nil { if !newTxn.Writing { panic("txn with intents marked as non-writing") } // If the transaction is already over, there's no point in // launching a one-off coordinator which will shut down right // away. If we ended up here with an error, we'll always start // the coordinator - the transaction has laid down intents, so // we expect it to be committed/aborted at some point in the // future. if _, isEnding := ba.GetArg(roachpb.EndTransaction); pErr != nil || !isEnding { sp.LogEvent("coordinator spawns") txnMeta = &txnMetadata{ txn: *newTxn, keys: cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}), firstUpdateNanos: tc.clock.PhysicalNow(), lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } tc.txns[txnID] = txnMeta if !tc.stopper.RunAsyncTask(func() { tc.heartbeatLoop(txnID) }) { // The system is already draining and we can't start the // heartbeat. We refuse new transactions for now because // they're likely not going to have all intents committed. // In principle, we can relax this as needed though. tc.unregisterTxnLocked(txnID) return roachpb.NewError(&roachpb.NodeUnavailableError{}) } } } } // Update our record of this transaction, even on error. if txnMeta != nil { txnMeta.txn = *newTxn if !txnMeta.txn.Writing { panic("tracking a non-writing txn") } txnMeta.setLastUpdate(tc.clock.PhysicalNow()) // Adding the intents even on error reduces the likelihood of dangling // intents blocking concurrent writers for extended periods of time. // See #3346. for _, intent := range intents { txnMeta.addKeyRange(intent.Key, intent.EndKey) } } if pErr == nil { // For successful transactional requests, always send the updated txn // record back. br.Txn = newTxn } return pErr }