// maybeRejectClientLocked checks whether the (transactional) request is in a // state that prevents it from continuing, such as the coordinator having // considered the client abandoned, or a heartbeat having reported an error. func (tc *TxnCoordSender) maybeRejectClientLocked( ctx context.Context, txn roachpb.Transaction, ) *roachpb.Error { if !txn.Writing { return nil } txnMeta, ok := tc.txns[*txn.ID] // Check whether the transaction is still tracked and has a chance of // completing. It's possible that the coordinator learns about the // transaction having terminated from a heartbeat, and GC queue correctness // (along with common sense) mandates that we don't let the client // continue. switch { case !ok: // TODO(spencerkimball): Could add coordinator node ID to the // transaction session so that we can definitively return the right // error between these possible errors. Or update the code to make an // educated guess based on the incoming transaction timestamp. return roachpb.NewError(errNoState) case txnMeta.txn.Status == roachpb.ABORTED: txn := txnMeta.txn.Clone() tc.cleanupTxnLocked(ctx, txn) return roachpb.NewErrorWithTxn(roachpb.NewTransactionAbortedError(), &txn) case txnMeta.txn.Status == roachpb.COMMITTED: txn := txnMeta.txn.Clone() tc.cleanupTxnLocked(ctx, txn) return roachpb.NewErrorWithTxn(roachpb.NewTransactionStatusError( "transaction is already committed"), &txn) default: return nil } }
// TestEndWriteRestartReadOnlyTransaction verifies that if // a transaction writes, then restarts and turns read-only, // an explicit EndTransaction call is still sent if retry- // able didn't, regardless of whether there is an error // or not. func TestEndWriteRestartReadOnlyTransaction(t *testing.T) { defer leaktest.AfterTest(t)() for _, success := range []bool{true, false} { expCalls := []roachpb.Method{roachpb.BeginTransaction, roachpb.Put, roachpb.EndTransaction} var calls []roachpb.Method db := NewDB(newTestSender(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { calls = append(calls, ba.Methods()...) return ba.CreateReply(), nil }, nil)) ok := false if err := db.Txn(context.TODO(), func(txn *Txn) error { if !ok { if err := txn.Put("consider", "phlebas"); err != nil { t.Fatal(err) } ok = true // Return an immediate txn retry error. We need to go through the pErr // and back to get a RetryableTxnError. return roachpb.NewErrorWithTxn(roachpb.NewTransactionRetryError(), &txn.Proto).GoError() } if !success { return errors.New("aborting on purpose") } return nil }); err == nil != success { t.Errorf("expected error: %t, got error: %v", !success, err) } if !reflect.DeepEqual(expCalls, calls) { t.Fatalf("expected %v, got %v", expCalls, calls) } } }
// Tests that a retryable error for an inner txn doesn't cause the outer txn to // be retried. func TestWrongTxnRetry(t *testing.T) { defer leaktest.AfterTest(t)() db := NewDB(newTestSender(nil, nil)) var retries int txnClosure := func(outerTxn *Txn) error { log.Infof(context.Background(), "outer retry") retries++ // Ensure the KV transaction is created. if err := outerTxn.Put("a", "b"); err != nil { t.Fatal(err) } var execOpt TxnExecOptions execOpt.AutoRetry = false err := outerTxn.Exec( execOpt, func(innerTxn *Txn, opt *TxnExecOptions) error { // Ensure the KV transaction is created. if err := innerTxn.Put("x", "y"); err != nil { t.Fatal(err) } return roachpb.NewErrorWithTxn(&roachpb.TransactionPushError{ PusheeTxn: outerTxn.Proto}, &innerTxn.Proto).GoError() }) return err } if err := db.Txn(context.TODO(), txnClosure); !testutils.IsError(err, "failed to push") { t.Fatal(err) } if retries != 1 { t.Fatalf("unexpected retries: %d", retries) } }
// TestNonRetryableError verifies that a non-retryable error from the // execution of EndTransactionRequests is propagated to the client. func TestNonRetryableErrorFromCommit(t *testing.T) { defer leaktest.AfterTest(t)() params, cmdFilters := createTestServerParams() s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() hitError := false cleanupFilter := cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if req, ok := args.Req.(*roachpb.EndTransactionRequest); ok { if bytes.Contains(req.Key, []byte(keys.DescIDGenerator)) { hitError = true return roachpb.NewErrorWithTxn(fmt.Errorf("testError"), args.Hdr.Txn) } } return nil }, false) defer cleanupFilter() if _, err := sqlDB.Exec("CREATE DATABASE t;"); !testutils.IsError(err, "pq: testError") { t.Errorf("unexpected error %v", err) } if !hitError { t.Errorf("expected to hit error, but it didn't happen") } }
// Verifies that an inner transaction in a nested transaction strips the transaction // information in its error when propagating it to an other transaction. func TestNestedTransaction(t *testing.T) { defer leaktest.AfterTest(t)() s, db := setup() defer s.Stop() pErr := db.Txn(func(txn1 *client.Txn) *roachpb.Error { if pErr := txn1.Put("a", "1"); pErr != nil { t.Fatalf("unexpected put error: %s", pErr) } iPErr := db.Txn(func(txn2 *client.Txn) *roachpb.Error { txnProto := roachpb.NewTransaction("test", roachpb.Key("a"), 1, roachpb.SERIALIZABLE, roachpb.Timestamp{}, 0) return roachpb.NewErrorWithTxn(util.Errorf("inner txn error"), txnProto) }) if iPErr.GetTxn() != nil { t.Errorf("error txn must be stripped: %s", iPErr) } return iPErr }) if pErr == nil { t.Fatal("unexpected success of txn") } if !testutils.IsPError(pErr, "inner txn error") { t.Errorf("unexpected failure: %s", pErr) } }
// TestNonRetryableError verifies that a non-retryable error is propagated to the client. func TestNonRetryableError(t *testing.T) { defer leaktest.AfterTest(t)() ctx, cmdFilters := createTestServerContext() server, sqlDB, _ := setupWithContext(t, &ctx) defer cleanup(server, sqlDB) testKey := []byte("test_key") hitError := false cleanupFilter := cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if req, ok := args.Req.(*roachpb.ScanRequest); ok { if bytes.Contains(req.Key, testKey) { hitError = true return roachpb.NewErrorWithTxn(fmt.Errorf("testError"), args.Hdr.Txn) } } return nil }, false) defer cleanupFilter() sqlDB.SetMaxOpenConns(1) if _, err := sqlDB.Exec(` CREATE DATABASE t; CREATE TABLE t.test (k TEXT PRIMARY KEY, v TEXT); INSERT INTO t.test (k, v) VALUES ('test_key', 'test_val'); SELECT * from t.test WHERE k = 'test_key'; `); !testutils.IsError(err, "pq: testError") { t.Errorf("unexpected error %s", err) } if !hitError { t.Errorf("expected to hit error, but it didn't happen") } }
// TestRunTransactionRetryOnErrors verifies that the transaction // is retried on the correct errors. func TestRunTransactionRetryOnErrors(t *testing.T) { defer leaktest.AfterTest(t)() testCases := []struct { err error retry bool // Expect retry? }{ {roachpb.NewReadWithinUncertaintyIntervalError(hlc.ZeroTimestamp, hlc.ZeroTimestamp), true}, {&roachpb.TransactionAbortedError{}, true}, {&roachpb.TransactionPushError{}, true}, {&roachpb.TransactionRetryError{}, true}, {&roachpb.WriteTooOldError{}, true}, {&roachpb.RangeNotFoundError{}, false}, {&roachpb.RangeKeyMismatchError{}, false}, {&roachpb.TransactionStatusError{}, false}, } for i, test := range testCases { count := 0 dbCtx := DefaultDBContext() dbCtx.TxnRetryOptions.InitialBackoff = 1 * time.Millisecond db := NewDBWithContext(newTestSender( func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { if _, ok := ba.GetArg(roachpb.Put); ok { count++ if count == 1 { return nil, roachpb.NewErrorWithTxn(test.err, ba.Txn) } } return ba.CreateReply(), nil }, nil), dbCtx) err := db.Txn(context.TODO(), func(txn *Txn) error { return txn.Put("a", "b") }) if test.retry { if count != 2 { t.Errorf("%d: expected one retry; got %d", i, count-1) } if err != nil { t.Errorf("%d: expected success on retry; got %s", i, err) } } else { if count != 1 { t.Errorf("%d: expected no retries; got %d", i, count) } if reflect.TypeOf(err) != reflect.TypeOf(test.err) { t.Errorf("%d: expected error of type %T; got %T", i, test.err, err) } } } }
func TestAbortCountConflictingWrites(t *testing.T) { defer leaktest.AfterTest(t)() ctx, cmdFilters := createTestServerContext() s, sqlDB, _ := setupWithContext(t, ctx) defer cleanup(s, sqlDB) if _, err := sqlDB.Exec("CREATE DATABASE db"); err != nil { t.Fatal(err) } if _, err := sqlDB.Exec("CREATE TABLE db.t (k TEXT PRIMARY KEY, v TEXT)"); err != nil { t.Fatal(err) } // Inject errors on the INSERT below. restarted := false cmdFilters.AppendFilter(func(args storageutils.FilterArgs) *roachpb.Error { switch req := args.Req.(type) { // SQL INSERT generates ConditionalPuts for unique indexes (such as the PK). case *roachpb.ConditionalPutRequest: if bytes.Contains(req.Value.RawBytes, []byte("marker")) && !restarted { restarted = true return roachpb.NewErrorWithTxn( roachpb.NewTransactionAbortedError(), args.Hdr.Txn) } } return nil }, false) txn, err := sqlDB.Begin() if err != nil { t.Fatal(err) } _, err = txn.Exec("INSERT INTO db.t VALUES ('key', 'marker')") if !testutils.IsError(err, "aborted") { t.Fatal(err) } if err = txn.Rollback(); err != nil { t.Fatal(err) } checkCounterEQ(t, s, "txn.abort.count", 1) checkCounterEQ(t, s, "txn.begin.count", 1) checkCounterEQ(t, s, "txn.rollback.count", 0) checkCounterEQ(t, s, "txn.commit.count", 0) checkCounterEQ(t, s, "insert.count", 1) }
// TestTxnResetTxnOnAbort verifies transaction is reset on abort. func TestTxnResetTxnOnAbort(t *testing.T) { defer leaktest.AfterTest(t) db := newDB(newTestSender(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { return nil, roachpb.NewErrorWithTxn(&roachpb.TransactionAbortedError{}, ba.Txn) }, nil)) txn := NewTxn(*db) _, pErr := txn.db.sender.Send(context.Background(), testPut()) if _, ok := pErr.GetDetail().(*roachpb.TransactionAbortedError); !ok { t.Fatalf("expected TransactionAbortedError, got %v", pErr) } if txn.Proto.ID != nil { t.Errorf("expected txn to be cleared") } }
// TestTransactionKeyNotChangedInRestart verifies that if the transaction already has a key (we're // in a restart), the key in the begin transaction request is not changed. func TestTransactionKeyNotChangedInRestart(t *testing.T) { defer leaktest.AfterTest(t)() tries := 0 db := NewDB(newTestSender(nil, func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { var bt *roachpb.BeginTransactionRequest if args, ok := ba.GetArg(roachpb.BeginTransaction); ok { bt = args.(*roachpb.BeginTransactionRequest) } else { t.Fatal("failed to find a begin transaction request") } // In the first try, the transaction key is the key of the first write command. Before the // second try, the transaction key is set to txnKey by the test sender. In the second try, the // transaction key is txnKey. var expectedKey roachpb.Key if tries == 1 { expectedKey = testKey } else { expectedKey = txnKey } if !bt.Key.Equal(expectedKey) { t.Fatalf("expected transaction key %v, got %v", expectedKey, bt.Key) } return ba.CreateReply(), nil })) if err := db.Txn(context.TODO(), func(txn *Txn) error { tries++ b := txn.NewBatch() b.Put("a", "b") if err := txn.Run(b); err != nil { t.Fatal(err) } if tries == 1 { return roachpb.NewErrorWithTxn(roachpb.NewTransactionRetryError(), &txn.Proto).GoError() } return nil }); err != nil { t.Errorf("unexpected error on commit: %s", err) } minimumTries := 2 if tries < minimumTries { t.Errorf("expected try count >= %d, got %d", minimumTries, tries) } }
// TestRollbackInRestartWait ensures that a ROLLBACK while the txn is in the // RetryWait state works. func TestRollbackInRestartWait(t *testing.T) { defer leaktest.AfterTest(t)() params, cmdFilters := createTestServerParams() s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() if _, err := sqlDB.Exec(` CREATE DATABASE t; CREATE TABLE t.test (k TEXT PRIMARY KEY, v TEXT); `); err != nil { t.Fatal(err) } // Set up error injection that causes retries. magicVals := createFilterVals(nil, nil) magicVals.endTxnRestartCounts = map[string]int{ "boulanger": 1, } defer cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if err := injectErrors(args.Req, args.Hdr, magicVals); err != nil { return roachpb.NewErrorWithTxn(err, args.Hdr.Txn) } return nil }, false)() tx, err := sqlDB.Begin() if err != nil { t.Fatal(err) } if _, err := tx.Exec("SAVEPOINT cockroach_restart"); err != nil { t.Fatal(err) } if _, err := tx.Exec( "INSERT INTO t.test (k, v) VALUES ('g', 'boulanger')"); err != nil { t.Fatal(err) } if _, err := tx.Exec("RELEASE SAVEPOINT cockroach_restart"); err == nil { t.Fatal("expected RELEASE to fail") } if err := tx.Rollback(); err != nil { t.Fatal(err) } }
// Verifies that a nested transaction returns an error if an inner txn // propagates an error to an outer txn. func TestNestedTransaction(t *testing.T) { defer leaktest.AfterTest(t)() s, db := setup() defer s.Stop() txnProto := roachpb.NewTransaction("test", roachpb.Key("a"), 1, roachpb.SERIALIZABLE, roachpb.Timestamp{}, 0) pErr := db.Txn(func(txn1 *client.Txn) *roachpb.Error { if pErr := txn1.Put("a", "1"); pErr != nil { t.Fatalf("unexpected put error: %s", pErr) } return db.Txn(func(txn2 *client.Txn) *roachpb.Error { return roachpb.NewErrorWithTxn(util.Errorf("err"), txnProto) }) }) if pErr == nil { t.Fatal("unexpected success of txn") } if !testutils.IsPError(pErr, "mismatching transaction record in the error") { t.Errorf("unexpected failure: %s", pErr) } }
// TestNonRetryableError verifies that a non-retryable error is propagated to the client. func TestNonRetryableError(t *testing.T) { defer leaktest.AfterTest(t)() params, cmdFilters := createTestServerParams() s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() testKey := []byte("test_key") hitError := false cleanupFilter := cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if req, ok := args.Req.(*roachpb.ScanRequest); ok { if bytes.Contains(req.Key, testKey) { hitError = true return roachpb.NewErrorWithTxn(fmt.Errorf("testError"), args.Hdr.Txn) } } return nil }, false) defer cleanupFilter() // We need to do everything on one connection as we'll want to observe the // connection state after a COMMIT. sqlDB.SetMaxOpenConns(1) if _, err := sqlDB.Exec(` CREATE DATABASE t; CREATE TABLE t.test (k TEXT PRIMARY KEY, v TEXT); INSERT INTO t.test (k, v) VALUES ('test_key', 'test_val'); SELECT * from t.test WHERE k = 'test_key'; `); !testutils.IsError(err, "pq: testError") { t.Errorf("unexpected error %v", err) } if !hitError { t.Errorf("expected to hit error, but it didn't happen") } }
func TestGCQueueTransactionTable(t *testing.T) { defer leaktest.AfterTest(t)() const now time.Duration = 3 * 24 * time.Hour const gcTxnAndAC = now - txnCleanupThreshold const gcACOnly = now - abortCacheAgeThreshold if gcTxnAndAC >= gcACOnly { t.Fatalf("test assumption violated due to changing constants; needs adjustment") } type spec struct { status roachpb.TransactionStatus orig time.Duration hb time.Duration // last heartbeat (none if ZeroTimestamp) newStatus roachpb.TransactionStatus // -1 for GCed failResolve bool // do we want to fail resolves in this trial? expResolve bool // expect attempt at removing txn-persisted intents? expAbortGC bool // expect abort cache entries removed? } // Describes the state of the Txn table before the test. // Many of the abort cache entries deleted wouldn't even be there, so don't // be confused by that. testCases := map[string]spec{ // Too young, should not touch. "aa": { status: roachpb.PENDING, orig: gcACOnly + 1, newStatus: roachpb.PENDING, }, // A little older, so the AbortCache gets cleaned up. "ab": { status: roachpb.PENDING, orig: gcTxnAndAC + 1, newStatus: roachpb.PENDING, expAbortGC: true, }, // Old and pending, but still heartbeat (so no Push attempted; it would succeed). // It's old enough to delete the abort cache entry though. "ba": { status: roachpb.PENDING, hb: gcTxnAndAC + 1, newStatus: roachpb.PENDING, expAbortGC: true, }, // Not old enough for Txn GC, but old enough to remove the abort cache entry. "bb": { status: roachpb.ABORTED, orig: gcACOnly - 1, newStatus: roachpb.ABORTED, expAbortGC: true, }, // Old, pending and abandoned. Should push and abort it successfully, // but not GC it just yet (this is an artifact of the implementation). // The abort cache gets cleaned up though. "c": { status: roachpb.PENDING, orig: gcTxnAndAC - 1, newStatus: roachpb.ABORTED, expAbortGC: true, }, // Old and aborted, should delete. "d": { status: roachpb.ABORTED, orig: gcTxnAndAC - 1, newStatus: -1, expResolve: true, expAbortGC: true, }, // Committed and fresh, so no action. But the abort cache entry is old // enough to be discarded. "e": { status: roachpb.COMMITTED, orig: gcTxnAndAC + 1, newStatus: roachpb.COMMITTED, expAbortGC: true, }, // Committed and old. It has an intent (like all tests here), which is // resolvable and hence we can GC. "f": { status: roachpb.COMMITTED, orig: gcTxnAndAC - 1, newStatus: -1, expResolve: true, expAbortGC: true, }, // Same as the previous one, but we've rigged things so that the intent // resolution here will fail and consequently no GC is expected. "g": { status: roachpb.COMMITTED, orig: gcTxnAndAC - 1, newStatus: roachpb.COMMITTED, failResolve: true, expResolve: true, expAbortGC: true, }, } resolved := map[string][]roachpb.Span{} tc := testContext{} tsc := TestStoreContext() tsc.TestingKnobs.TestingCommandFilter = func(filterArgs storagebase.FilterArgs) *roachpb.Error { if resArgs, ok := filterArgs.Req.(*roachpb.ResolveIntentRequest); ok { id := string(resArgs.IntentTxn.Key) resolved[id] = append(resolved[id], roachpb.Span{ Key: resArgs.Key, EndKey: resArgs.EndKey, }) // We've special cased one test case. Note that the intent is still // counted in `resolved`. if testCases[id].failResolve { return roachpb.NewErrorWithTxn(util.Errorf("boom"), filterArgs.Hdr.Txn) } } return nil } tc.StartWithStoreContext(t, tsc) defer tc.Stop() tc.manualClock.Set(int64(now)) outsideKey := tc.rng.Desc().EndKey.Next().AsRawKey() testIntents := []roachpb.Span{{Key: roachpb.Key("intent")}} txns := map[string]roachpb.Transaction{} for strKey, test := range testCases { baseKey := roachpb.Key(strKey) txnClock := hlc.NewClock(hlc.NewManualClock(int64(test.orig)).UnixNano) txn := newTransaction("txn1", baseKey, 1, enginepb.SERIALIZABLE, txnClock) txn.Status = test.status txn.Intents = testIntents if test.hb > 0 { txn.LastHeartbeat = &hlc.Timestamp{WallTime: int64(test.hb)} } // Set a high Timestamp to make sure it does not matter. Only // OrigTimestamp (and heartbeat) are used for GC decisions. txn.Timestamp.Forward(hlc.MaxTimestamp) txns[strKey] = *txn for _, addrKey := range []roachpb.Key{baseKey, outsideKey} { key := keys.TransactionKey(addrKey, txn.ID) if err := engine.MVCCPutProto(context.Background(), tc.engine, nil, key, hlc.ZeroTimestamp, nil, txn); err != nil { t.Fatal(err) } } entry := roachpb.AbortCacheEntry{Key: txn.Key, Timestamp: txn.LastActive()} if err := tc.rng.abortCache.Put(context.Background(), tc.engine, nil, txn.ID, &entry); err != nil { t.Fatal(err) } } // Run GC. gcQ := newGCQueue(tc.gossip) cfg, ok := tc.gossip.GetSystemConfig() if !ok { t.Fatal("config not set") } if err := gcQ.process(tc.clock.Now(), tc.rng, cfg); err != nil { t.Fatal(err) } util.SucceedsSoon(t, func() error { for strKey, sp := range testCases { txn := &roachpb.Transaction{} key := keys.TransactionKey(roachpb.Key(strKey), txns[strKey].ID) ok, err := engine.MVCCGetProto(context.Background(), tc.engine, key, hlc.ZeroTimestamp, true, nil, txn) if err != nil { return err } if expGC := (sp.newStatus == -1); expGC { if expGC != !ok { return fmt.Errorf("%s: expected gc: %t, but found %s\n%s", strKey, expGC, txn, roachpb.Key(strKey)) } } else if sp.newStatus != txn.Status { return fmt.Errorf("%s: expected status %s, but found %s", strKey, sp.newStatus, txn.Status) } var expIntents []roachpb.Span if sp.expResolve { expIntents = testIntents } if !reflect.DeepEqual(resolved[strKey], expIntents) { return fmt.Errorf("%s: unexpected intent resolutions:\nexpected: %s\nobserved: %s", strKey, expIntents, resolved[strKey]) } entry := &roachpb.AbortCacheEntry{} abortExists, err := tc.rng.abortCache.Get(context.Background(), tc.store.Engine(), txns[strKey].ID, entry) if err != nil { t.Fatal(err) } if (abortExists == false) != sp.expAbortGC { return fmt.Errorf("%s: expected abort cache gc: %t, found %+v", strKey, sp.expAbortGC, entry) } } return nil }) outsideTxnPrefix := keys.TransactionKey(outsideKey, uuid.EmptyUUID) outsideTxnPrefixEnd := keys.TransactionKey(outsideKey.Next(), uuid.EmptyUUID) var count int if _, err := engine.MVCCIterate(context.Background(), tc.store.Engine(), outsideTxnPrefix, outsideTxnPrefixEnd, hlc.ZeroTimestamp, true, nil, false, func(roachpb.KeyValue) (bool, error) { count++ return false, nil }); err != nil { t.Fatal(err) } if exp := len(testCases); exp != count { t.Fatalf("expected the %d external transaction entries to remain untouched, "+ "but only %d are left", exp, count) } }
// send runs the specified calls synchronously in a single batch and // returns any errors. If the transaction is read-only or has already // been successfully committed or aborted, a potential trailing // EndTransaction call is silently dropped, allowing the caller to // always commit or clean-up explicitly even when that may not be // required (or even erroneous). Returns (nil, nil) for an empty batch. func (txn *Txn) send(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { if txn.Proto.Status != roachpb.PENDING || txn.IsFinalized() { return nil, roachpb.NewErrorf( "attempting to use transaction with wrong status or finalized: %s", txn.Proto.Status) } // It doesn't make sense to use inconsistent reads in a transaction. However, // we still need to accept it as a parameter for this to compile. if ba.ReadConsistency != roachpb.CONSISTENT { return nil, roachpb.NewErrorf("cannot use %s ReadConsistency in txn", ba.ReadConsistency) } lastIndex := len(ba.Requests) - 1 if lastIndex < 0 { return nil, nil } // firstWriteIndex is set to the index of the first command which is // a transactional write. If != -1, this indicates an intention to // write. This is in contrast to txn.Proto.Writing, which is set by // the coordinator when the first intent has been created, and which // lives for the life of the transaction. firstWriteIndex := -1 var firstWriteKey roachpb.Key for i, ru := range ba.Requests { args := ru.GetInner() if i < lastIndex { if _, ok := args.(*roachpb.EndTransactionRequest); ok { return nil, roachpb.NewErrorf("%s sent as non-terminal call", args.Method()) } } if roachpb.IsTransactionWrite(args) && firstWriteIndex == -1 { firstWriteKey = args.Header().Key firstWriteIndex = i } } haveTxnWrite := firstWriteIndex != -1 endTxnRequest, haveEndTxn := ba.Requests[lastIndex].GetInner().(*roachpb.EndTransactionRequest) needBeginTxn := !txn.Proto.Writing && haveTxnWrite needEndTxn := txn.Proto.Writing || haveTxnWrite elideEndTxn := haveEndTxn && !needEndTxn // If we're not yet writing in this txn, but intend to, insert a // begin transaction request before the first write command. if needBeginTxn { // If the transaction already has a key (we're in a restart), make // sure we set the key in the begin transaction request to the original. bt := &roachpb.BeginTransactionRequest{ Span: roachpb.Span{ Key: firstWriteKey, }, } if txn.Proto.Key != nil { bt.Key = txn.Proto.Key } // Inject the new request before position firstWriteIndex, taking // care to avoid unnecessary allocations. oldRequests := ba.Requests ba.Requests = make([]roachpb.RequestUnion, len(ba.Requests)+1) copy(ba.Requests, oldRequests[:firstWriteIndex]) ba.Requests[firstWriteIndex].MustSetInner(bt) copy(ba.Requests[firstWriteIndex+1:], oldRequests[firstWriteIndex:]) } if elideEndTxn { ba.Requests = ba.Requests[:lastIndex] } br, pErr := txn.db.send(ba) if elideEndTxn && pErr == nil { // Check that read only transactions do not violate their deadline. This can NOT // happen since the txn deadline is normally updated when it is about to expire // or expired. We will just keep the code for safety (see TestReacquireLeaseOnRestart). if endTxnRequest.Deadline != nil { if endTxnRequest.Deadline.Less(txn.Proto.Timestamp) { return nil, roachpb.NewErrorWithTxn(roachpb.NewTransactionAbortedError(), &txn.Proto) } } // This normally happens on the server and sent back in response // headers, but this transaction was optimized away. The caller may // still inspect the transaction struct, so we manually update it // here to emulate a true transaction. if endTxnRequest.Commit { txn.Proto.Status = roachpb.COMMITTED } else { txn.Proto.Status = roachpb.ABORTED } txn.finalized = true } // If we inserted a begin transaction request, remove it here. if needBeginTxn { if br != nil && br.Responses != nil { br.Responses = append(br.Responses[:firstWriteIndex], br.Responses[firstWriteIndex+1:]...) } // Handle case where inserted begin txn confused an indexed error. if pErr != nil && pErr.Index != nil { idx := pErr.Index.Index if idx == int32(firstWriteIndex) { // An error was encountered on begin txn; disallow the indexing. pErr.Index = nil } else if idx > int32(firstWriteIndex) { // An error was encountered after begin txn; decrement index. pErr.SetErrorIndex(idx - 1) } } } return br, pErr }
// Verifies that an expired lease is released and a new lease is acquired on transaction // restart. // // This test triggers the above scenario by making ReadWithinUncertaintyIntervalError advance // the clock, so that the transaction timestamp exceeds the deadline of the EndTransactionRequest. func TestReacquireLeaseOnRestart(t *testing.T) { defer leaktest.AfterTest(t)() var cmdFilters CommandFilters cmdFilters.AppendFilter(checkEndTransactionTrigger, true) var clockUpdate int32 testKey := []byte("test_key") testingKnobs := &storage.StoreTestingKnobs{ TestingCommandFilter: cmdFilters.runFilters, ClockBeforeSend: func(c *hlc.Clock, ba roachpb.BatchRequest) { if atomic.LoadInt32(&clockUpdate) > 0 { return } // Hack to advance the transaction timestamp on a transaction restart. for _, union := range ba.Requests { if req, ok := union.GetInner().(*roachpb.ScanRequest); ok { if bytes.Contains(req.Key, testKey) { atomic.AddInt32(&clockUpdate, 1) now := c.Now() now.WallTime += int64(5 * sql.LeaseDuration) c.Update(now) break } } } }, } params, _ := createTestServerParams() params.Knobs.Store = testingKnobs s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() var restartDone int32 cleanupFilter := cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if atomic.LoadInt32(&restartDone) > 0 { return nil } if req, ok := args.Req.(*roachpb.ScanRequest); ok { if bytes.Contains(req.Key, testKey) { atomic.AddInt32(&restartDone, 1) // Return ReadWithinUncertaintyIntervalError to update the transaction timestamp on retry. txn := args.Hdr.Txn txn.ResetObservedTimestamps() now := s.Clock().Now() txn.UpdateObservedTimestamp( s.(*server.TestServer).Gossip().GetNodeID(), now) return roachpb.NewErrorWithTxn(roachpb.NewReadWithinUncertaintyIntervalError(now, now), txn) } } return nil }, false) defer cleanupFilter() // Use a large max offset to avoid rejecting a transaction whose timestanp is in // future (as we will advance the transaction timestamp with ReadWithinUncertaintyIntervalError). s.Clock().SetMaxOffset(sql.LeaseDuration * 10) sqlDB.SetMaxOpenConns(1) if _, err := sqlDB.Exec(` CREATE DATABASE t; CREATE TABLE t.test (k TEXT PRIMARY KEY, v TEXT); INSERT INTO t.test (k, v) VALUES ('test_key', 'test_val'); `); err != nil { t.Fatal(err) } // Acquire the lease and enable the auto-retry. The first read attempt will trigger ReadWithinUncertaintyIntervalError // and advance the transaction timestamp. The transaction timestamp will exceed the lease expiration // time, and the second read attempt will re-acquire the lease. if _, err := sqlDB.Exec(` SELECT * from t.test WHERE k = 'test_key'; `); err != nil { t.Fatal(err) } if u := atomic.LoadInt32(&clockUpdate); u != 1 { t.Errorf("expected exacltly one clock update, but got %d", u) } if u := atomic.LoadInt32(&restartDone); u != 1 { t.Errorf("expected exactly one restart, but got %d", u) } }
// updateState updates the transaction state in both the success and // error cases, applying those updates to the corresponding txnMeta // object when adequate. It also updates certain errors with the // updated transaction for use by client restarts. func (tc *TxnCoordSender) updateState(ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error) *roachpb.Error { sp, cleanupSp := tracing.SpanFromContext(opTxnCoordSender, tc.tracer, ctx) defer cleanupSp() newTxn := &roachpb.Transaction{} newTxn.Update(ba.Txn) if pErr == nil { newTxn.Update(br.Txn) } else { newTxn.Update(pErr.GetTxn()) } // If the request was successful but we're in a transaction which needs to // restart but doesn't know it yet, let it restart now (as opposed to // waiting until EndTransaction). if pErr == nil && newTxn.Isolation == roachpb.SERIALIZABLE && !newTxn.OrigTimestamp.Equal(newTxn.Timestamp) { pErr = roachpb.NewErrorWithTxn(roachpb.NewTransactionRetryError(), br.Txn) br = nil } switch t := pErr.GetDetail().(type) { case *roachpb.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider txn dead. defer tc.cleanupTxn(sp, *pErr.GetTxn()) case *roachpb.OpRequiresTxnError: panic("OpRequiresTxnError must not happen at this level") case *roachpb.ReadWithinUncertaintyIntervalError: // If the reader encountered a newer write within the uncertainty // interval, we advance the txn's timestamp just past the last observed // timestamp from the node. restartTS, ok := newTxn.GetObservedTimestamp(pErr.OriginNode) if !ok { pErr = roachpb.NewError(util.Errorf("no observed timestamp for node %d found on uncertainty restart", pErr.OriginNode)) } else { newTxn.Timestamp.Forward(restartTS) newTxn.Restart(ba.UserPriority, newTxn.Priority, newTxn.Timestamp) } case *roachpb.TransactionAbortedError: // Increase timestamp if applicable. newTxn.Timestamp.Forward(pErr.GetTxn().Timestamp) newTxn.Priority = pErr.GetTxn().Priority // Clean up the freshly aborted transaction in defer(), avoiding a // race with the state update below. defer tc.cleanupTxn(sp, *newTxn) case *roachpb.TransactionPushError: // Increase timestamp if applicable, ensuring that we're // just ahead of the pushee. newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp.Add(0, 1)) newTxn.Restart(ba.UserPriority, t.PusheeTxn.Priority-1, newTxn.Timestamp) case *roachpb.TransactionRetryError: newTxn.Restart(ba.UserPriority, pErr.GetTxn().Priority, newTxn.Timestamp) case nil: // Nothing to do here, avoid the default case. default: if pErr.GetTxn() != nil { if pErr.CanRetry() { panic("Retryable internal error must not happen at this level") } else { // Do not clean up the transaction here since the client might still // want to continue the transaction. For example, a client might // continue its transaction after receiving ConditionFailedError, which // can come from a unique index violation. } } } if pErr != nil && pErr.GetTxn() != nil { // Avoid changing existing errors because sometimes they escape into // goroutines and then there are races. Fairly sure there isn't one // here, but better safe than sorry. pErrShallow := *pErr pErrShallow.SetTxn(newTxn) pErr = &pErrShallow } if newTxn.ID == nil { return pErr } txnID := *newTxn.ID tc.Lock() defer tc.Unlock() txnMeta := tc.txns[txnID] // For successful transactional requests, keep the written intents and // the updated transaction record to be sent along with the reply. // The transaction metadata is created with the first writing operation. // A tricky edge case is that of a transaction which "fails" on the // first writing request, but actually manages to write some intents // (for example, due to being multi-range). In this case, there will // be an error, but the transaction will be marked as Writing and the // coordinator must track the state, for the client's retry will be // performed with a Writing transaction which the coordinator rejects // unless it is tracking it (on top of it making sense to track it; // after all, it **has** laid down intents and only the coordinator // can augment a potential EndTransaction call). See #3303. var intents []roachpb.Span // TODO(nvanbenschoten): Iterating here to put the intents in a slice for // the sole purpose of later iterating again and calling addKeyRange is a // little wasteful and can likely be avoided. ba.IntentSpanIterate(func(key, endKey roachpb.Key) { intents = append(intents, roachpb.Span{Key: key, EndKey: endKey}) }) if len(intents) > 0 && (pErr == nil || newTxn.Writing) { if txnMeta == nil { if !newTxn.Writing { panic("txn with intents marked as non-writing") } // If the transaction is already over, there's no point in // launching a one-off coordinator which will shut down right // away. If we ended up here with an error, we'll always start // the coordinator - the transaction has laid down intents, so // we expect it to be committed/aborted at some point in the // future. if _, isEnding := ba.GetArg(roachpb.EndTransaction); pErr != nil || !isEnding { sp.LogEvent("coordinator spawns") txnMeta = &txnMetadata{ txn: *newTxn, keys: interval.NewRangeTree(), firstUpdateNanos: tc.clock.PhysicalNow(), lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } tc.txns[txnID] = txnMeta if !tc.stopper.RunAsyncTask(func() { tc.heartbeatLoop(txnID) }) { // The system is already draining and we can't start the // heartbeat. We refuse new transactions for now because // they're likely not going to have all intents committed. // In principle, we can relax this as needed though. tc.unregisterTxnLocked(txnID) return roachpb.NewError(&roachpb.NodeUnavailableError{}) } } } } // Update our record of this transaction, even on error. if txnMeta != nil { txnMeta.txn = *newTxn if !txnMeta.txn.Writing { panic("tracking a non-writing txn") } txnMeta.setLastUpdate(tc.clock.PhysicalNow()) // Adding the intents even on error reduces the likelihood of dangling // intents blocking concurrent writers for extended periods of time. // See #3346. for _, intent := range intents { addKeyRange(txnMeta.keys, intent.Key, intent.EndKey) } } if pErr == nil { // For successful transactional requests, always send the updated txn // record back. br.Txn = newTxn } return pErr }
// Test that a COMMIT getting an error, retryable or not, leaves the txn // finalized and not in Aborted/RestartWait (i.e. COMMIT, like ROLLBACK, is // always final). func TestCommitFinalizesTxnOnError(t *testing.T) { defer leaktest.AfterTest(t)() params, cmdFilters := createTestServerParams() s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() if _, err := sqlDB.Exec(` CREATE DATABASE t; CREATE TABLE t.test (k INT PRIMARY KEY, v TEXT); `); err != nil { t.Fatal(err) } // We need to do everything on one connection as we'll want to observe the // connection state after a COMMIT. sqlDB.SetMaxOpenConns(1) // Set up error injection that causes retries. magicVals := createFilterVals(nil, nil) magicVals.endTxnRestartCounts = map[string]int{ "boulanger": 1000, // restart many times, for all the tests below } defer cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if err := injectErrors(args.Req, args.Hdr, magicVals); err != nil { return roachpb.NewErrorWithTxn(err, args.Hdr.Txn) } return nil }, false)() // We're going to test both errors that would leave the transaction in the // RestartWait state and errors that would leave the transaction in Aborted, // if they were to happen on any other statement than COMMIT. // We do that by always injecting a retryable error at COMMIT, but once in a // txn that had a "retry intent" (SAVEPOINT cockroach_restart), and once in a // txn without it. testCases := []struct { retryIntent bool }{ {false}, {true}, } for _, tc := range testCases { if _, err := sqlDB.Exec("BEGIN;"); err != nil { t.Fatal(err) } if tc.retryIntent { if _, err := sqlDB.Exec("SAVEPOINT cockroach_restart;"); err != nil { t.Fatal(err) } } if _, err := sqlDB.Exec("INSERT INTO t.test (k, v) VALUES (0, 'boulanger');"); err != nil { t.Fatal(err) } if _, err := sqlDB.Exec("COMMIT;"); !testutils.IsError(err, "pq: restart transaction") { t.Fatalf("unexpected error: %v", err) } // Check that we can start another txn on the (one and only) connection. if _, err := sqlDB.Exec("BEGIN;END;"); err != nil { t.Fatal(err) } } }
// TestPropagateTxnOnError verifies that DistSender.sendChunk properly // propagates the txn data to a next iteration. Use txn.Writing field to // verify that. func TestPropagateTxnOnError(t *testing.T) { defer leaktest.AfterTest(t)() // Set up a filter to so that the first CPut operation will // get a ReadWithinUncertaintyIntervalError. targetKey := roachpb.Key("b") var numGets int32 ctx := server.NewTestContext() ctx.TestingKnobs.StoreTestingKnobs.TestingCommandFilter = func(fArgs storageutils.FilterArgs) *roachpb.Error { _, ok := fArgs.Req.(*roachpb.ConditionalPutRequest) if ok && fArgs.Req.Header().Key.Equal(targetKey) { if atomic.AddInt32(&numGets, 1) == 1 { z := roachpb.ZeroTimestamp pErr := roachpb.NewReadWithinUncertaintyIntervalError(z, z) return roachpb.NewErrorWithTxn(pErr, fArgs.Hdr.Txn) } } return nil } s := server.StartTestServerWithContext(t, ctx) defer s.Stop() db := setupMultipleRanges(t, s, "b") // Set the initial value on the target key "b". origVal := "val" if pErr := db.Put(targetKey, origVal); pErr != nil { t.Fatal(pErr) } // The following txn creates a batch request that is split // into two requests: Put and CPut. The CPut operation will // get a ReadWithinUncertaintyIntervalError and the txn will be // retried. epoch := 0 if pErr := db.Txn(func(txn *client.Txn) *roachpb.Error { epoch++ if epoch >= 2 { // Writing must be true since we ran the BeginTransaction command. if !txn.Proto.Writing { t.Errorf("unexpected non-writing txn") } } else { // Writing must be false since we haven't run any write command. if txn.Proto.Writing { t.Errorf("unexpected writing txn") } } b := txn.NewBatch() b.Put("a", "val") b.CPut(targetKey, "new_val", origVal) pErr := txn.CommitInBatch(b) if epoch == 1 { if _, ok := pErr.GetDetail().(*roachpb.ReadWithinUncertaintyIntervalError); ok { if !pErr.GetTxn().Writing { t.Errorf("unexpected non-writing txn on error") } } else { t.Errorf("expected ReadWithinUncertaintyIntervalError, but got: %s", pErr) } } return pErr }); pErr != nil { t.Errorf("unexpected error on transactional Puts: %s", pErr) } if epoch != 2 { t.Errorf("unexpected epoch; the txn must be retried exactly once, but got %d", epoch) } }
// TestUserTxnRestart tests user-directed txn restarts. // The test will inject and otherwise create retriable errors of various kinds // and checks that we still manage to run a txn despite them. func TestTxnUserRestart(t *testing.T) { defer leaktest.AfterTest(t)() params, cmdFilters := createTestServerParams() params.Knobs.SQLExecutor = &sql.ExecutorTestingKnobs{FixTxnPriority: true} s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() if _, err := sqlDB.Exec(` CREATE DATABASE t; CREATE TABLE t.test (k INT PRIMARY KEY, v TEXT); `); err != nil { t.Fatal(err) } // Set up error injection that causes retries. testCases := []struct { magicVals *filterVals expectedErr string }{ { magicVals: createFilterVals( map[string]int{"boulanger": 2}, // restartCounts nil), expectedErr: ".*encountered previous write with future timestamp.*", }, { magicVals: createFilterVals( nil, map[string]int{"boulanger": 2}), // abortCounts expectedErr: ".*txn aborted.*", }, } for _, tc := range testCases { for _, rs := range []rollbackStrategy{rollbackToSavepoint, declareSavepoint} { cleanupFilter := cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if err := injectErrors(args.Req, args.Hdr, tc.magicVals); err != nil { return roachpb.NewErrorWithTxn(err, args.Hdr.Txn) } return nil }, false) // Also inject an error at RELEASE time, besides the error injected by magicVals. injectReleaseError := true commitCount := s.MustGetSQLCounter(sql.MetaTxnCommit.Name) // This is the magic. Run the txn closure until all the retries are exhausted. exec(t, sqlDB, rs, func(tx *gosql.Tx) bool { return runTestTxn(t, tc.magicVals, tc.expectedErr, &injectReleaseError, sqlDB, tx) }) checkRestarts(t, tc.magicVals) // Check that we only wrote the sentinel row. rows, err := sqlDB.Query("SELECT * FROM t.test") if err != nil { t.Fatal(err) } for rows.Next() { var k int var v string err = rows.Scan(&k, &v) if err != nil { t.Fatal(err) } if k != 0 || v != "sentinel" { t.Fatalf("didn't find expected row: %d %s", k, v) } } // Check that the commit counter was incremented. It could have been // incremented by more than 1 because of the transactions we use to force // aborts, plus who knows what else the server is doing in the background. checkCounterGE(t, s, sql.MetaTxnCommit, commitCount+1) // Clean up the table for the next test iteration. _, err = sqlDB.Exec("DELETE FROM t.test WHERE true") if err != nil { t.Fatal(err) } rows.Close() cleanupFilter() } } }
// TestTxnRestart tests the logic in the sql executor for automatically retrying // txns in case of retriable errors. func TestTxnRestart(t *testing.T) { defer leaktest.AfterTest(t)() params, cmdFilters := createTestServerParams() // Disable one phase commits because they cannot be restarted. params.Knobs.Store.(*storage.StoreTestingKnobs).DisableOnePhaseCommits = true s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() // Make sure all the commands we send in this test are sent over the same connection. // This is a bit of a hack; in Go you're not supposed to have connection state // outside of using a db.Tx. But we can't use a db.Tx here, because we want // to control the batching of BEGIN/COMMIT statements. // This SetMaxOpenConns is pretty shady, it doesn't guarantee that you'll be using // the *same* one connection across calls. A proper solution would be to use a // lib/pq connection directly. As of Feb 2016, there's code in cli/sql_util.go to // do that. sqlDB.SetMaxOpenConns(1) if _, err := sqlDB.Exec(` CREATE DATABASE t; CREATE TABLE t.test (k TEXT PRIMARY KEY, v TEXT, t DECIMAL); `); err != nil { t.Fatal(err) } // Set up error injection that causes retries. magicVals := createFilterVals(nil, nil) magicVals.restartCounts = map[string]int{ "boulanger": 2, "dromedary": 2, "fajita": 2, "hooly": 2, "josephine": 2, "laureal": 2, } magicVals.abortCounts = map[string]int{ "boulanger": 2, } magicVals.endTxnRestartCounts = map[string]int{ "boulanger": 2, "dromedary": 2, "fajita": 2, "hooly": 2, } cleanupFilter := cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if err := injectErrors(args.Req, args.Hdr, magicVals); err != nil { return roachpb.NewErrorWithTxn(err, args.Hdr.Txn) } return nil }, false) // Test that implicit txns - txns for which we see all the statements and prefixes // of txns (statements batched together with the BEGIN stmt) - are retried. // We also exercise the SQL cluster logical timestamp in here, because // this must be properly propagated across retries. if _, err := sqlDB.Exec(` INSERT INTO t.test (k, v, t) VALUES ('a', 'boulanger', cluster_logical_timestamp()); BEGIN; INSERT INTO t.test (k, v, t) VALUES ('c', 'dromedary', cluster_logical_timestamp()); INSERT INTO t.test (k, v, t) VALUES ('e', 'fajita', cluster_logical_timestamp()); END; INSERT INTO t.test (k, v, t) VALUES ('g', 'hooly', cluster_logical_timestamp()); BEGIN; INSERT INTO t.test (k, v, t) VALUES ('i', 'josephine', cluster_logical_timestamp()); INSERT INTO t.test (k, v, t) VALUES ('k', 'laureal', cluster_logical_timestamp()); `); err != nil { t.Fatal(err) } cleanupFilter() checkRestarts(t, magicVals) if _, err := sqlDB.Exec("END;"); err != nil { t.Fatal(err) } // Check that the txns succeeded by reading the rows. var count int if err := sqlDB.QueryRow("SELECT COUNT(*) FROM t.test").Scan(&count); err != nil { t.Fatal(err) } if count != 6 { t.Fatalf("Expected 6 rows, got %d", count) } // Now test that we don't retry what we shouldn't: insert an error into a txn // we can't automatically retry (because it spans requests). magicVals = createFilterVals(nil, nil) magicVals.restartCounts = map[string]int{ "hooly": 2, } cleanupFilter = cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if err := injectErrors(args.Req, args.Hdr, magicVals); err != nil { return roachpb.NewErrorWithTxn(err, args.Hdr.Txn) } return nil }, false) defer cleanupFilter() // Start a txn. if _, err := sqlDB.Exec(` DELETE FROM t.test WHERE true; BEGIN; `); err != nil { t.Fatal(err) } // Continue the txn in a new request, which is not retriable. _, err := sqlDB.Exec("INSERT INTO t.test (k, v, t) VALUES ('g', 'hooly', cluster_logical_timestamp())") if !testutils.IsError( err, "encountered previous write with future timestamp") { t.Errorf("didn't get expected injected error. Got: %v", err) } }
// TestTxnCoordSenderTxnUpdatedOnError verifies that errors adjust the // response transaction's timestamp and priority as appropriate. func TestTxnCoordSenderTxnUpdatedOnError(t *testing.T) { defer leaktest.AfterTest(t) origTS := makeTS(123, 0) testCases := []struct { pErr *roachpb.Error expEpoch uint32 expPri int32 expTS, expOrigTS roachpb.Timestamp nodeSeen bool }{ { // No error, so nothing interesting either. pErr: nil, expEpoch: 0, expPri: 1, expTS: origTS, expOrigTS: origTS, }, { // On uncertainty error, new epoch begins and node is seen. // Timestamp moves ahead of the existing write. pErr: roachpb.NewError(&roachpb.ReadWithinUncertaintyIntervalError{ NodeID: 1, ExistingTimestamp: origTS.Add(10, 10), }), expEpoch: 1, expPri: 1, expTS: origTS.Add(10, 11), expOrigTS: origTS.Add(10, 11), nodeSeen: true, }, { // On abort, nothing changes but we get a new priority to use for // the next attempt. pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionAbortedError{}, &roachpb.Transaction{ Timestamp: origTS.Add(20, 10), Priority: 10, }), expPri: 10, }, { // On failed push, new epoch begins just past the pushed timestamp. // Additionally, priority ratchets up to just below the pusher's. pErr: roachpb.NewError(&roachpb.TransactionPushError{ PusheeTxn: roachpb.Transaction{ Timestamp: origTS.Add(10, 10), Priority: int32(10)}, }), expEpoch: 1, expPri: 9, expTS: origTS.Add(10, 11), expOrigTS: origTS.Add(10, 11), }, { // On retry, restart with new epoch, timestamp and priority. pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionRetryError{}, &roachpb.Transaction{ Timestamp: origTS.Add(10, 10), Priority: int32(10)}), expEpoch: 1, expPri: 10, expTS: origTS.Add(10, 10), expOrigTS: origTS.Add(10, 10), }, } for i, test := range testCases { stopper := stop.NewStopper() manual := hlc.NewManualClock(origTS.WallTime) clock := hlc.NewClock(manual.UnixNano) clock.SetMaxOffset(20) ts := NewTxnCoordSender(senderFn(func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { var reply *roachpb.BatchResponse if test.pErr == nil { reply = ba.CreateReply() } return reply, test.pErr }), clock, false, nil, stopper) db := client.NewDB(ts) txn := client.NewTxn(*db) txn.InternalSetPriority(1) txn.Proto.Name = "test txn" key := roachpb.Key("test-key") _, pErr := txn.Get(key) teardownHeartbeats(ts) stopper.Stop() if reflect.TypeOf(test.pErr) != reflect.TypeOf(pErr) { t.Fatalf("%d: expected %T; got %T: %v", i, test.pErr, pErr, pErr) } if txn.Proto.Epoch != test.expEpoch { t.Errorf("%d: expected epoch = %d; got %d", i, test.expEpoch, txn.Proto.Epoch) } if txn.Proto.Priority != test.expPri { t.Errorf("%d: expected priority = %d; got %d", i, test.expPri, txn.Proto.Priority) } if !txn.Proto.Timestamp.Equal(test.expTS) { t.Errorf("%d: expected timestamp to be %s; got %s", i, test.expTS, txn.Proto.Timestamp) } if !txn.Proto.OrigTimestamp.Equal(test.expOrigTS) { t.Errorf("%d: expected orig timestamp to be %s + 1; got %s", i, test.expOrigTS, txn.Proto.OrigTimestamp) } if nodes := txn.Proto.CertainNodes.Nodes; (len(nodes) != 0) != test.nodeSeen { t.Errorf("%d: expected nodeSeen=%t, but list of hosts is %v", i, test.nodeSeen, nodes) } } }
// updateState updates the transaction state in both the success and // error cases, applying those updates to the corresponding txnMeta // object when adequate. It also updates certain errors with the // updated transaction for use by client restarts. func (tc *TxnCoordSender) updateState(ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error) *roachpb.Error { sp := tracing.SpanFromContext(ctx) newTxn := &roachpb.Transaction{} newTxn.Update(ba.Txn) // If the request was successful but we're in a transaction which needs to // restart but doesn't know it yet, let it restart now (as opposed to // waiting until EndTransaction). if pErr == nil && br.Txn != nil && br.Txn.Isolation == roachpb.SERIALIZABLE && !br.Txn.OrigTimestamp.Equal(br.Txn.Timestamp) { pErr = roachpb.NewErrorWithTxn(roachpb.NewTransactionRetryError(), br.Txn) br = nil } // TODO(bdarnell): We're writing to errors here (and where using ErrorWithIndex); // since there's no concept of ownership copy-on-write is always preferable. switch t := pErr.GetDetail().(type) { case nil: newTxn.Update(br.Txn) // Move txn timestamp forward to response timestamp if applicable. // TODO(tschottdorf): see (*Replica).executeBatch and comments within. // Looks like this isn't necessary any more, nor did it prevent a bug // referenced in a TODO there. newTxn.Timestamp.Forward(br.Timestamp) case *roachpb.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider txn dead. pErrTxn := pErr.GetTxn().Clone() defer tc.cleanupTxn(sp, pErrTxn) case *roachpb.OpRequiresTxnError: panic("OpRequiresTxnError must not happen at this level") case *roachpb.ReadWithinUncertaintyIntervalError: // Mark the host as certain. See the protobuf comment for // Transaction.CertainNodes for details. if t.NodeID == 0 { panic("no replica set in header on uncertainty restart") } newTxn.Update(pErr.GetTxn()) newTxn.CertainNodes.Add(t.NodeID) // If the reader encountered a newer write within the uncertainty // interval, move the timestamp forward, just past that write or // up to MaxTimestamp, whichever comes first. candidateTS := newTxn.MaxTimestamp candidateTS.Backward(t.ExistingTimestamp.Add(0, 1)) newTxn.Timestamp.Forward(candidateTS) newTxn.Restart(ba.UserPriority, newTxn.Priority, newTxn.Timestamp) pErr.SetTxn(newTxn) case *roachpb.TransactionAbortedError: newTxn.Update(pErr.GetTxn()) // Increase timestamp if applicable. newTxn.Timestamp.Forward(pErr.GetTxn().Timestamp) newTxn.Priority = pErr.GetTxn().Priority pErr.SetTxn(newTxn) // Clean up the freshly aborted transaction in defer(), avoiding a // race with the state update below. defer tc.cleanupTxn(sp, *pErr.GetTxn()) case *roachpb.TransactionPushError: newTxn.Update(pErr.GetTxn()) // Increase timestamp if applicable, ensuring that we're // just ahead of the pushee. newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp.Add(0, 1)) newTxn.Restart(ba.UserPriority, t.PusheeTxn.Priority-1, newTxn.Timestamp) pErr.SetTxn(newTxn) case *roachpb.TransactionRetryError: newTxn.Update(pErr.GetTxn()) newTxn.Restart(ba.UserPriority, pErr.GetTxn().Priority, newTxn.Timestamp) pErr.SetTxn(newTxn) } if newTxn.ID == nil { return pErr } txnID := *newTxn.ID tc.Lock() defer tc.Unlock() txnMeta := tc.txns[txnID] // For successful transactional requests, keep the written intents and // the updated transaction record to be sent along with the reply. // The transaction metadata is created with the first writing operation. // A tricky edge case is that of a transaction which "fails" on the // first writing request, but actually manages to write some intents // (for example, due to being multi-range). In this case, there will // be an error, but the transaction will be marked as Writing and the // coordinator must track the state, for the client's retry will be // performed with a Writing transaction which the coordinator rejects // unless it is tracking it (on top of it making sense to track it; // after all, it **has** laid down intents and only the coordinator // can augment a potential EndTransaction call). See #3303. intents := ba.GetIntentSpans() if len(intents) > 0 && (pErr == nil || newTxn.Writing) { if txnMeta == nil { if !newTxn.Writing { panic("txn with intents marked as non-writing") } // If the transaction is already over, there's no point in // launching a one-off coordinator which will shut down right // away. If we ended up here with an error, we'll always start // the coordinator - the transaction has laid down intents, so // we expect it to be committed/aborted at some point in the // future. if _, isEnding := ba.GetArg(roachpb.EndTransaction); pErr != nil || !isEnding { sp.LogEvent("coordinator spawns") txnMeta = &txnMetadata{ txn: *newTxn, keys: cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}), firstUpdateNanos: tc.clock.PhysicalNow(), lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } tc.txns[txnID] = txnMeta if !tc.stopper.RunAsyncTask(func() { tc.heartbeatLoop(txnID) }) { // The system is already draining and we can't start the // heartbeat. We refuse new transactions for now because // they're likely not going to have all intents committed. // In principle, we can relax this as needed though. tc.unregisterTxnLocked(txnID) return roachpb.NewError(&roachpb.NodeUnavailableError{}) } } } } // Update our record of this transaction, even on error. if txnMeta != nil { txnMeta.txn = *newTxn if !txnMeta.txn.Writing { panic("tracking a non-writing txn") } txnMeta.setLastUpdate(tc.clock.PhysicalNow()) // Adding the intents even on error reduces the likelihood of dangling // intents blocking concurrent writers for extended periods of time. // See #3346. for _, intent := range intents { txnMeta.addKeyRange(intent.Key, intent.EndKey) } } if pErr == nil { // For successful transactional requests, always send the updated txn // record back. br.Txn = newTxn } return pErr }
// TestTxnPutOutOfOrder tests a case where a put operation of an older // timestamp comes after a put operation of a newer timestamp in a // txn. The test ensures such an out-of-order put succeeds and // overrides an old value. The test uses a "Writer" and a "Reader" // to reproduce an out-of-order put. // // 1) The Writer executes a put operation and writes a write intent with // time T in a txn. // 2) Before the Writer's txn is committed, the Reader sends a high priority // get operation with time T+100. This pushes the Writer txn timestamp to // T+100 and triggers the restart of the Writer's txn. The original // write intent timestamp is also updated to T+100. // 3) The Writer starts a new epoch of the txn, but before it writes, the // Reader sends another high priority get operation with time T+200. This // pushes the Writer txn timestamp to T+200 to trigger a restart of the // Writer txn. The Writer will not actually restart until it tries to commit // the current epoch of the transaction. The Reader updates the timestamp of // the write intent to T+200. The test deliberately fails the Reader get // operation, and cockroach doesn't update its read timestamp cache. // 4) The Writer executes the put operation again. This put operation comes // out-of-order since its timestamp is T+100, while the intent timestamp // updated at Step 3 is T+200. // 5) The put operation overrides the old value using timestamp T+100. // 6) When the Writer attempts to commit its txn, the txn will be restarted // again at a new epoch timestamp T+200, which will finally succeed. func TestTxnPutOutOfOrder(t *testing.T) { defer leaktest.AfterTest(t)() key := "key" // Set up a filter to so that the get operation at Step 3 will return an error. var numGets int32 manualClock := hlc.NewManualClock(0) clock := hlc.NewClock(manualClock.UnixNano) stopper := stop.NewStopper() defer stopper.Stop() ctx := storage.TestStoreContext() ctx.TestingKnobs.TestingCommandFilter = func(filterArgs storagebase.FilterArgs) *roachpb.Error { if _, ok := filterArgs.Req.(*roachpb.GetRequest); ok && filterArgs.Req.Header().Key.Equal(roachpb.Key(key)) && filterArgs.Hdr.Txn == nil { // The Reader executes two get operations, each of which triggers two get requests // (the first request fails and triggers txn push, and then the second request // succeeds). Returns an error for the fourth get request to avoid timestamp cache // update after the third get operation pushes the txn timestamp. if atomic.AddInt32(&numGets, 1) == 4 { return roachpb.NewErrorWithTxn(errors.Errorf("Test"), filterArgs.Hdr.Txn) } } return nil } store := createTestStoreWithEngine(t, engine.NewInMem(roachpb.Attributes{}, 10<<20, stopper), clock, true, ctx, stopper) // Put an initial value. initVal := []byte("initVal") err := store.DB().Put(key, initVal) if err != nil { t.Fatalf("failed to put: %s", err) } waitPut := make(chan struct{}) waitFirstGet := make(chan struct{}) waitTxnRestart := make(chan struct{}) waitSecondGet := make(chan struct{}) waitTxnComplete := make(chan struct{}) // Start the Writer. go func() { epoch := -1 // Start a txn that does read-after-write. // The txn will be restarted twice, and the out-of-order put // will happen in the second epoch. if err := store.DB().Txn(func(txn *client.Txn) error { epoch++ if epoch == 1 { // Wait until the second get operation is issued. close(waitTxnRestart) <-waitSecondGet } updatedVal := []byte("updatedVal") if err := txn.Put(key, updatedVal); err != nil { return err } // Make sure a get will return the value that was just written. actual, err := txn.Get(key) if err != nil { return err } if !bytes.Equal(actual.ValueBytes(), updatedVal) { t.Fatalf("unexpected get result: %s", actual) } if epoch == 0 { // Wait until the first get operation will push the txn timestamp. close(waitPut) <-waitFirstGet } b := txn.NewBatch() return txn.CommitInBatch(b) }); err != nil { t.Fatal(err) } if epoch != 2 { t.Fatalf("unexpected number of txn retries: %d", epoch) } close(waitTxnComplete) }() <-waitPut // Start the Reader. // Advance the clock and send a get operation with higher // priority to trigger the txn restart. manualClock.Increment(100) priority := roachpb.UserPriority(-math.MaxInt32) requestHeader := roachpb.Span{ Key: roachpb.Key(key), } ts := clock.Now() if _, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ Timestamp: ts, UserPriority: priority, }, &roachpb.GetRequest{Span: requestHeader}); err != nil { t.Fatalf("failed to get: %s", err) } // Wait until the writer restarts the txn. close(waitFirstGet) <-waitTxnRestart // Advance the clock and send a get operation again. This time // we use TestingCommandFilter so that a get operation is not // processed after the write intent is resolved (to prevent the // timestamp cache from being updated). manualClock.Increment(100) ts = clock.Now() if _, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ Timestamp: ts, UserPriority: priority, }, &roachpb.GetRequest{Span: requestHeader}); err == nil { t.Fatal("unexpected success of get") } close(waitSecondGet) <-waitTxnComplete }
// Test that a TransactionRetryError will retry the read until it succeeds. The // test is designed so that if the proto timestamps are bumped during retry // a failure will occur. func TestAsOfRetry(t *testing.T) { defer leaktest.AfterTest(t)() params, cmdFilters := createTestServerParams() // Disable one phase commits because they cannot be restarted. params.Knobs.Store.(*storage.StoreTestingKnobs).DisableOnePhaseCommits = true s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() const val1 = 1 const val2 = 2 const name = "boulanger" if _, err := sqlDB.Exec(fmt.Sprintf(` CREATE DATABASE d; CREATE TABLE d.t (s STRING PRIMARY KEY, a INT); INSERT INTO d.t (s, a) VALUES ('%v', %v); `, name, val1)); err != nil { t.Fatal(err) } var walltime int64 if err := sqlDB.QueryRow("UPDATE d.t SET a = $1 RETURNING cluster_logical_timestamp()::int", val2).Scan(&walltime); err != nil { t.Fatal(err) } tsVal2 := time.Unix(0, walltime).Format(time.RFC3339Nano) tsVal1 := time.Unix(0, walltime-1).Format(time.RFC3339Nano) // Set up error injection that causes retries. magicVals := createFilterVals(nil, nil) magicVals.restartCounts = map[string]int{ name: 5, } cleanupFilter := cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { magicVals.Lock() defer magicVals.Unlock() switch req := args.Req.(type) { case *roachpb.ScanRequest: for key, count := range magicVals.restartCounts { checkCorrectTxn(string(req.Key), magicVals, args.Hdr.Txn) if count > 0 && bytes.Contains(req.Key, []byte(key)) { magicVals.restartCounts[key]-- err := roachpb.NewTransactionRetryError() magicVals.failedValues[string(req.Key)] = failureRecord{err, args.Hdr.Txn} txn := args.Hdr.Txn.Clone() txn.Timestamp = txn.Timestamp.Add(0, 1) return roachpb.NewErrorWithTxn(err, &txn) } } } return nil }, false) var i int // Query with tsVal1 which should return the first value. Since tsVal1 is just // one nanosecond before tsVal2, any proto timestamp bumping will return val2 // and error. // Must specify the WHERE here to trigger the injection errors. if err := sqlDB.QueryRow(fmt.Sprintf("SELECT a FROM d.t AS OF SYSTEM TIME '%s' WHERE s = '%s'", tsVal1, name)).Scan(&i); err != nil { t.Fatal(err) } else if i != val1 { t.Fatalf("unexpected val: %v", i) } cleanupFilter() // Verify that the retry errors were injected. checkRestarts(t, magicVals) // Query with tsVal2 to ensure val2 is indeed present. if err := sqlDB.QueryRow(fmt.Sprintf("SELECT a FROM d.t AS OF SYSTEM TIME '%s'", tsVal2)).Scan(&i); err != nil { t.Fatal(err) } else if i != val2 { t.Fatalf("unexpected val: %v", i) } }
// TestTxnCoordSenderTxnUpdatedOnError verifies that errors adjust the // response transaction's timestamp and priority as appropriate. func TestTxnCoordSenderTxnUpdatedOnError(t *testing.T) { defer leaktest.AfterTest(t)() origTS := makeTS(123, 0) plus10 := origTS.Add(10, 10) plus20 := plus10.Add(10, 0) testCases := []struct { pErr *roachpb.Error expEpoch uint32 expPri int32 expTS, expOrigTS roachpb.Timestamp nodeSeen bool }{ { // No error, so nothing interesting either. pErr: nil, expEpoch: 0, expPri: 1, expTS: origTS, expOrigTS: origTS, }, { // On uncertainty error, new epoch begins and node is seen. // Timestamp moves ahead of the existing write. pErr: func() *roachpb.Error { pErr := roachpb.NewErrorWithTxn( roachpb.NewReadWithinUncertaintyIntervalError(roachpb.ZeroTimestamp, roachpb.ZeroTimestamp), &roachpb.Transaction{}) const nodeID = 1 pErr.GetTxn().UpdateObservedTimestamp(nodeID, plus10) pErr.OriginNode = nodeID return pErr }(), expEpoch: 1, expPri: 1, expTS: plus10, expOrigTS: plus10, nodeSeen: true, }, { // On abort, nothing changes but we get a new priority to use for // the next attempt. pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionAbortedError{}, &roachpb.Transaction{ TxnMeta: roachpb.TxnMeta{Timestamp: plus20, Priority: 10}, }), expPri: 10, }, { // On failed push, new epoch begins just past the pushed timestamp. // Additionally, priority ratchets up to just below the pusher's. pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionPushError{ PusheeTxn: roachpb.Transaction{ TxnMeta: roachpb.TxnMeta{Timestamp: plus10, Priority: int32(10)}, }, }, &roachpb.Transaction{}), expEpoch: 1, expPri: 9, expTS: plus10, expOrigTS: plus10, }, { // On retry, restart with new epoch, timestamp and priority. pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionRetryError{}, &roachpb.Transaction{ TxnMeta: roachpb.TxnMeta{Timestamp: plus10, Priority: int32(10)}, }, ), expEpoch: 1, expPri: 10, expTS: plus10, expOrigTS: plus10, }, } for i, test := range testCases { stopper := stop.NewStopper() manual := hlc.NewManualClock(origTS.WallTime) clock := hlc.NewClock(manual.UnixNano) clock.SetMaxOffset(20) ts := NewTxnCoordSender(senderFn(func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { var reply *roachpb.BatchResponse if test.pErr == nil { reply = ba.CreateReply() } return reply, test.pErr }), clock, false, tracing.NewTracer(), stopper, NewTxnMetrics(metric.NewRegistry())) db := client.NewDB(ts) txn := client.NewTxn(context.Background(), *db) txn.InternalSetPriority(1) txn.Proto.Name = "test txn" key := roachpb.Key("test-key") _, err := txn.Get(key) teardownHeartbeats(ts) stopper.Stop() if test.pErr != nil && err == nil { t.Fatalf("expected an error") } if txn.Proto.Epoch != test.expEpoch { t.Errorf("%d: expected epoch = %d; got %d", i, test.expEpoch, txn.Proto.Epoch) } if txn.Proto.Priority != test.expPri { t.Errorf("%d: expected priority = %d; got %d", i, test.expPri, txn.Proto.Priority) } if !txn.Proto.Timestamp.Equal(test.expTS) { t.Errorf("%d: expected timestamp to be %s; got %s", i, test.expTS, txn.Proto.Timestamp) } if !txn.Proto.OrigTimestamp.Equal(test.expOrigTS) { t.Errorf("%d: expected orig timestamp to be %s; got %s", i, test.expOrigTS, txn.Proto.OrigTimestamp) } if ns := txn.Proto.ObservedTimestamps; (len(ns) != 0) != test.nodeSeen { t.Errorf("%d: expected nodeSeen=%t, but list of hosts is %v", i, test.nodeSeen, ns) } } }