// maybeRejectClientLocked checks whether the (transactional) request is in a // state that prevents it from continuing, such as the coordinator having // considered the client abandoned, or a heartbeat having reported an error. func (tc *TxnCoordSender) maybeRejectClientLocked( ctx context.Context, txn roachpb.Transaction, ) *roachpb.Error { if !txn.Writing { return nil } txnMeta, ok := tc.txns[*txn.ID] // Check whether the transaction is still tracked and has a chance of // completing. It's possible that the coordinator learns about the // transaction having terminated from a heartbeat, and GC queue correctness // (along with common sense) mandates that we don't let the client // continue. switch { case !ok: log.VEventf(ctx, 2, "rejecting unknown txn: %s", txn.ID) // TODO(spencerkimball): Could add coordinator node ID to the // transaction session so that we can definitively return the right // error between these possible errors. Or update the code to make an // educated guess based on the incoming transaction timestamp. return roachpb.NewError(errNoState) case txnMeta.txn.Status == roachpb.ABORTED: txn := txnMeta.txn.Clone() tc.cleanupTxnLocked(ctx, txn) return roachpb.NewErrorWithTxn(roachpb.NewTransactionAbortedError(), &txn) case txnMeta.txn.Status == roachpb.COMMITTED: txn := txnMeta.txn.Clone() tc.cleanupTxnLocked(ctx, txn) return roachpb.NewErrorWithTxn(roachpb.NewTransactionStatusError( "transaction is already committed"), &txn) default: return nil } }
// TestEndWriteRestartReadOnlyTransaction verifies that if // a transaction writes, then restarts and turns read-only, // an explicit EndTransaction call is still sent if retry- // able didn't, regardless of whether there is an error // or not. func TestEndWriteRestartReadOnlyTransaction(t *testing.T) { defer leaktest.AfterTest(t)() for _, success := range []bool{true, false} { expCalls := []roachpb.Method{roachpb.BeginTransaction, roachpb.Put, roachpb.EndTransaction} var calls []roachpb.Method db := NewDB(newTestSender(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { calls = append(calls, ba.Methods()...) return ba.CreateReply(), nil }, nil)) ok := false if err := db.Txn(context.TODO(), func(txn *Txn) error { if !ok { if err := txn.Put("consider", "phlebas"); err != nil { t.Fatal(err) } ok = true // Return an immediate txn retry error. We need to go through the pErr // and back to get a RetryableTxnError. return roachpb.NewErrorWithTxn(roachpb.NewTransactionRetryError(), &txn.Proto).GoError() } if !success { return errors.New("aborting on purpose") } return nil }); err == nil != success { t.Errorf("expected error: %t, got error: %v", !success, err) } if !reflect.DeepEqual(expCalls, calls) { t.Fatalf("expected %v, got %v", expCalls, calls) } } }
// TestNonRetryableErrorOnCommit verifies that a non-retryable error from the // execution of EndTransactionRequests is propagated to the client. func TestNonRetryableErrorOnCommit(t *testing.T) { defer leaktest.AfterTest(t)() params, cmdFilters := createTestServerParams() s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() hitError := false cleanupFilter := cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if req, ok := args.Req.(*roachpb.EndTransactionRequest); ok { if bytes.Contains(req.Key, []byte(keys.DescIDGenerator)) { hitError = true return roachpb.NewErrorWithTxn(fmt.Errorf("testError"), args.Hdr.Txn) } } return nil }, false) defer cleanupFilter() if _, err := sqlDB.Exec("CREATE DATABASE t"); !testutils.IsError(err, "pq: testError") { t.Errorf("unexpected error %v", err) } if !hitError { t.Errorf("expected to hit error, but it didn't happen") } }
// Tests that a retryable error for an inner txn doesn't cause the outer txn to // be retried. func TestWrongTxnRetry(t *testing.T) { defer leaktest.AfterTest(t)() db := NewDB(newTestSender(nil, nil)) var retries int txnClosure := func(outerTxn *Txn) error { log.Infof(context.Background(), "outer retry") retries++ // Ensure the KV transaction is created. if err := outerTxn.Put("a", "b"); err != nil { t.Fatal(err) } var execOpt TxnExecOptions execOpt.AutoRetry = false err := outerTxn.Exec( execOpt, func(innerTxn *Txn, opt *TxnExecOptions) error { // Ensure the KV transaction is created. if err := innerTxn.Put("x", "y"); err != nil { t.Fatal(err) } return roachpb.NewErrorWithTxn(&roachpb.TransactionPushError{ PusheeTxn: outerTxn.Proto}, &innerTxn.Proto).GoError() }) return err } if err := db.Txn(context.TODO(), txnClosure); !testutils.IsError(err, "failed to push") { t.Fatal(err) } if retries != 1 { t.Fatalf("unexpected retries: %d", retries) } }
func TestAbortCountConflictingWrites(t *testing.T) { defer leaktest.AfterTest(t)() params, cmdFilters := createTestServerParams() s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() if _, err := sqlDB.Exec("CREATE DATABASE db"); err != nil { t.Fatal(err) } if _, err := sqlDB.Exec("CREATE TABLE db.t (k TEXT PRIMARY KEY, v TEXT)"); err != nil { t.Fatal(err) } // Inject errors on the INSERT below. restarted := false cmdFilters.AppendFilter(func(args storagebase.FilterArgs) *roachpb.Error { switch req := args.Req.(type) { // SQL INSERT generates ConditionalPuts for unique indexes (such as the PK). case *roachpb.ConditionalPutRequest: if bytes.Contains(req.Value.RawBytes, []byte("marker")) && !restarted { restarted = true return roachpb.NewErrorWithTxn( roachpb.NewTransactionAbortedError(), args.Hdr.Txn) } } return nil }, false) txn, err := sqlDB.Begin() if err != nil { t.Fatal(err) } _, err = txn.Exec("INSERT INTO db.t VALUES ('key', 'marker')") if !testutils.IsError(err, "aborted") { t.Fatal(err) } if err = txn.Rollback(); err != nil { t.Fatal(err) } if err := checkCounterEQ(s, sql.MetaTxnAbort, 1); err != nil { t.Error(err) } if err := checkCounterEQ(s, sql.MetaTxnBegin, 1); err != nil { t.Error(err) } if err := checkCounterEQ(s, sql.MetaTxnRollback, 0); err != nil { t.Error(err) } if err := checkCounterEQ(s, sql.MetaTxnCommit, 0); err != nil { t.Error(err) } if err := checkCounterEQ(s, sql.MetaInsert, 1); err != nil { t.Error(err) } }
// TestRunTransactionRetryOnErrors verifies that the transaction // is retried on the correct errors. func TestRunTransactionRetryOnErrors(t *testing.T) { defer leaktest.AfterTest(t)() testCases := []struct { err error retry bool // Expect retry? }{ {roachpb.NewReadWithinUncertaintyIntervalError(hlc.ZeroTimestamp, hlc.ZeroTimestamp), true}, {&roachpb.TransactionAbortedError{}, true}, {&roachpb.TransactionPushError{}, true}, {&roachpb.TransactionRetryError{}, true}, {&roachpb.WriteTooOldError{}, true}, {&roachpb.RangeNotFoundError{}, false}, {&roachpb.RangeKeyMismatchError{}, false}, {&roachpb.TransactionStatusError{}, false}, } for i, test := range testCases { count := 0 dbCtx := DefaultDBContext() dbCtx.TxnRetryOptions.InitialBackoff = 1 * time.Millisecond db := NewDBWithContext(newTestSender( func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { if _, ok := ba.GetArg(roachpb.Put); ok { count++ if count == 1 { return nil, roachpb.NewErrorWithTxn(test.err, ba.Txn) } } return ba.CreateReply(), nil }, nil), dbCtx) err := db.Txn(context.TODO(), func(txn *Txn) error { return txn.Put("a", "b") }) if test.retry { if count != 2 { t.Errorf("%d: expected one retry; got %d", i, count-1) } if err != nil { t.Errorf("%d: expected success on retry; got %s", i, err) } } else { if count != 1 { t.Errorf("%d: expected no retries; got %d", i, count) } if reflect.TypeOf(err) != reflect.TypeOf(test.err) { t.Errorf("%d: expected error of type %T; got %T", i, test.err, err) } } } }
// TestTxnResetTxnOnAbort verifies transaction is reset on abort. func TestTxnResetTxnOnAbort(t *testing.T) { defer leaktest.AfterTest(t)() db := NewDB(newTestSender(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { return nil, roachpb.NewErrorWithTxn(&roachpb.TransactionAbortedError{}, ba.Txn) }, nil)) txn := NewTxn(context.Background(), *db) _, pErr := txn.sendInternal(testPut()) if _, ok := pErr.GetDetail().(*roachpb.TransactionAbortedError); !ok { t.Fatalf("expected TransactionAbortedError, got %v", pErr) } if txn.Proto.ID != nil { t.Errorf("expected txn to be cleared") } }
// TestTransactionKeyNotChangedInRestart verifies that if the transaction already has a key (we're // in a restart), the key in the begin transaction request is not changed. func TestTransactionKeyNotChangedInRestart(t *testing.T) { defer leaktest.AfterTest(t)() tries := 0 db := NewDB(newTestSender(nil, func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { var bt *roachpb.BeginTransactionRequest if args, ok := ba.GetArg(roachpb.BeginTransaction); ok { bt = args.(*roachpb.BeginTransactionRequest) } else { t.Fatal("failed to find a begin transaction request") } // In the first try, the transaction key is the key of the first write command. Before the // second try, the transaction key is set to txnKey by the test sender. In the second try, the // transaction key is txnKey. var expectedKey roachpb.Key if tries == 1 { expectedKey = testKey } else { expectedKey = txnKey } if !bt.Key.Equal(expectedKey) { t.Fatalf("expected transaction key %v, got %v", expectedKey, bt.Key) } return ba.CreateReply(), nil })) if err := db.Txn(context.TODO(), func(txn *Txn) error { tries++ b := txn.NewBatch() b.Put("a", "b") if err := txn.Run(b); err != nil { t.Fatal(err) } if tries == 1 { return roachpb.NewErrorWithTxn(roachpb.NewTransactionRetryError(), &txn.Proto).GoError() } return nil }); err != nil { t.Errorf("unexpected error on commit: %s", err) } minimumTries := 2 if tries < minimumTries { t.Errorf("expected try count >= %d, got %d", minimumTries, tries) } }
// TestNonRetryableError verifies that a non-retryable error is propagated to the client. func TestNonRetryableError(t *testing.T) { defer leaktest.AfterTest(t)() params, cmdFilters := createTestServerParams() s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() testKey := []byte("test_key") hitError := false cleanupFilter := cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if req, ok := args.Req.(*roachpb.ScanRequest); ok { if bytes.Contains(req.Key, testKey) { hitError = true return roachpb.NewErrorWithTxn(fmt.Errorf("testError"), args.Hdr.Txn) } } return nil }, false) defer cleanupFilter() // We need to do everything on one connection as we'll want to observe the // connection state after a COMMIT. sqlDB.SetMaxOpenConns(1) if _, err := sqlDB.Exec(` CREATE DATABASE t; CREATE TABLE t.test (k TEXT PRIMARY KEY, v TEXT); INSERT INTO t.test (k, v) VALUES ('test_key', 'test_val'); SELECT * from t.test WHERE k = 'test_key'; `); !testutils.IsError(err, "pq: testError") { t.Errorf("unexpected error %v", err) } if !hitError { t.Errorf("expected to hit error, but it didn't happen") } }
// TestPropagateTxnOnError verifies that DistSender.sendBatch properly // propagates the txn data to a next iteration. Use txn.Writing field to // verify that. func TestPropagateTxnOnError(t *testing.T) { defer leaktest.AfterTest(t)() var storeKnobs storage.StoreTestingKnobs // Set up a filter to so that the first CPut operation will // get a ReadWithinUncertaintyIntervalError. targetKey := roachpb.Key("b") var numGets int32 storeKnobs.TestingCommandFilter = func(fArgs storagebase.FilterArgs) *roachpb.Error { _, ok := fArgs.Req.(*roachpb.ConditionalPutRequest) if ok && fArgs.Req.Header().Key.Equal(targetKey) { if atomic.AddInt32(&numGets, 1) == 1 { z := hlc.ZeroTimestamp pErr := roachpb.NewReadWithinUncertaintyIntervalError(z, z) return roachpb.NewErrorWithTxn(pErr, fArgs.Hdr.Txn) } } return nil } s, _, _ := serverutils.StartServer(t, base.TestServerArgs{Knobs: base.TestingKnobs{Store: &storeKnobs}}) defer s.Stopper().Stop() db := setupMultipleRanges(t, s, "b") // Set the initial value on the target key "b". origVal := "val" if err := db.Put(context.TODO(), targetKey, origVal); err != nil { t.Fatal(err) } // The following txn creates a batch request that is split // into two requests: Put and CPut. The CPut operation will // get a ReadWithinUncertaintyIntervalError and the txn will be // retried. epoch := 0 if err := db.Txn(context.TODO(), func(txn *client.Txn) error { epoch++ if epoch >= 2 { // Writing must be true since we ran the BeginTransaction command. if !txn.Proto.Writing { t.Errorf("unexpected non-writing txn") } } else { // Writing must be false since we haven't run any write command. if txn.Proto.Writing { t.Errorf("unexpected writing txn") } } b := txn.NewBatch() b.Put("a", "val") b.CPut(targetKey, "new_val", origVal) err := txn.CommitInBatch(b) if epoch == 1 { if retErr, ok := err.(*roachpb.RetryableTxnError); ok { if _, ok := retErr.Cause.(*roachpb.ReadWithinUncertaintyIntervalError); ok { if !retErr.Transaction.Writing { t.Errorf("unexpected non-writing txn on error") } } else { t.Errorf("expected ReadWithinUncertaintyIntervalError, but got: %s", retErr.Cause) } } else { t.Errorf("expected a retryable error, but got: %s", err) } } return err }); err != nil { t.Errorf("unexpected error on transactional Puts: %s", err) } if epoch != 2 { t.Errorf("unexpected epoch; the txn must be retried exactly once, but got %d", epoch) } }
// TestTxnCoordSenderTxnUpdatedOnError verifies that errors adjust the // response transaction's timestamp and priority as appropriate. func TestTxnCoordSenderTxnUpdatedOnError(t *testing.T) { defer leaktest.AfterTest(t)() origTS := makeTS(123, 0) plus10 := origTS.Add(10, 10) plus20 := plus10.Add(10, 0) testCases := []struct { pErr *roachpb.Error expEpoch uint32 expPri int32 expTS, expOrigTS hlc.Timestamp nodeSeen bool }{ { // No error, so nothing interesting either. pErr: nil, expEpoch: 0, expPri: 1, expTS: origTS, expOrigTS: origTS, }, { // On uncertainty error, new epoch begins and node is seen. // Timestamp moves ahead of the existing write. pErr: func() *roachpb.Error { pErr := roachpb.NewErrorWithTxn( roachpb.NewReadWithinUncertaintyIntervalError(hlc.ZeroTimestamp, hlc.ZeroTimestamp), &roachpb.Transaction{}) const nodeID = 1 pErr.GetTxn().UpdateObservedTimestamp(nodeID, plus10) pErr.OriginNode = nodeID return pErr }(), expEpoch: 1, expPri: 1, expTS: plus10, expOrigTS: plus10, nodeSeen: true, }, { // On abort, nothing changes but we get a new priority to use for // the next attempt. pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionAbortedError{}, &roachpb.Transaction{ TxnMeta: enginepb.TxnMeta{Timestamp: plus20, Priority: 10}, }), expPri: 10, }, { // On failed push, new epoch begins just past the pushed timestamp. // Additionally, priority ratchets up to just below the pusher's. pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionPushError{ PusheeTxn: roachpb.Transaction{ TxnMeta: enginepb.TxnMeta{Timestamp: plus10, Priority: int32(10)}, }, }, &roachpb.Transaction{}), expEpoch: 1, expPri: 9, expTS: plus10, expOrigTS: plus10, }, { // On retry, restart with new epoch, timestamp and priority. pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionRetryError{}, &roachpb.Transaction{ TxnMeta: enginepb.TxnMeta{Timestamp: plus10, Priority: int32(10)}, }, ), expEpoch: 1, expPri: 10, expTS: plus10, expOrigTS: plus10, }, } for i, test := range testCases { stopper := stop.NewStopper() manual := hlc.NewManualClock(origTS.WallTime) clock := hlc.NewClock(manual.UnixNano, 20*time.Nanosecond) senderFunc := func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { var reply *roachpb.BatchResponse if test.pErr == nil { reply = ba.CreateReply() } return reply, test.pErr } ambient := log.AmbientContext{Tracer: tracing.NewTracer()} ts := NewTxnCoordSender( ambient, senderFn(senderFunc), clock, false, stopper, MakeTxnMetrics(metric.TestSampleInterval), ) db := client.NewDB(ts) txn := client.NewTxn(context.Background(), *db) txn.InternalSetPriority(1) txn.Proto.Name = "test txn" key := roachpb.Key("test-key") _, err := txn.Get(key) teardownHeartbeats(ts) stopper.Stop() if test.pErr != nil && err == nil { t.Fatalf("expected an error") } if txn.Proto.Epoch != test.expEpoch { t.Errorf("%d: expected epoch = %d; got %d", i, test.expEpoch, txn.Proto.Epoch) } if txn.Proto.Priority != test.expPri { t.Errorf("%d: expected priority = %d; got %d", i, test.expPri, txn.Proto.Priority) } if !txn.Proto.Timestamp.Equal(test.expTS) { t.Errorf("%d: expected timestamp to be %s; got %s", i, test.expTS, txn.Proto.Timestamp) } if !txn.Proto.OrigTimestamp.Equal(test.expOrigTS) { t.Errorf("%d: expected orig timestamp to be %s; got %s", i, test.expOrigTS, txn.Proto.OrigTimestamp) } if ns := txn.Proto.ObservedTimestamps; (len(ns) != 0) != test.nodeSeen { t.Errorf("%d: expected nodeSeen=%t, but list of hosts is %v", i, test.nodeSeen, ns) } } }
// TestTxnPutOutOfOrder tests a case where a put operation of an older // timestamp comes after a put operation of a newer timestamp in a // txn. The test ensures such an out-of-order put succeeds and // overrides an old value. The test uses a "Writer" and a "Reader" // to reproduce an out-of-order put. // // 1) The Writer executes a put operation and writes a write intent with // time T in a txn. // 2) Before the Writer's txn is committed, the Reader sends a high priority // get operation with time T+100. This pushes the Writer txn timestamp to // T+100 and triggers the restart of the Writer's txn. The original // write intent timestamp is also updated to T+100. // 3) The Writer starts a new epoch of the txn, but before it writes, the // Reader sends another high priority get operation with time T+200. This // pushes the Writer txn timestamp to T+200 to trigger a restart of the // Writer txn. The Writer will not actually restart until it tries to commit // the current epoch of the transaction. The Reader updates the timestamp of // the write intent to T+200. The test deliberately fails the Reader get // operation, and cockroach doesn't update its read timestamp cache. // 4) The Writer executes the put operation again. This put operation comes // out-of-order since its timestamp is T+100, while the intent timestamp // updated at Step 3 is T+200. // 5) The put operation overrides the old value using timestamp T+100. // 6) When the Writer attempts to commit its txn, the txn will be restarted // again at a new epoch timestamp T+200, which will finally succeed. func TestTxnPutOutOfOrder(t *testing.T) { defer leaktest.AfterTest(t)() const key = "key" // Set up a filter to so that the get operation at Step 3 will return an error. var numGets int32 stopper := stop.NewStopper() defer stopper.Stop() manual := hlc.NewManualClock(123) cfg := storage.TestStoreConfig(hlc.NewClock(manual.UnixNano, time.Nanosecond)) cfg.TestingKnobs.TestingCommandFilter = func(filterArgs storagebase.FilterArgs) *roachpb.Error { if _, ok := filterArgs.Req.(*roachpb.GetRequest); ok && filterArgs.Req.Header().Key.Equal(roachpb.Key(key)) && filterArgs.Hdr.Txn == nil { // The Reader executes two get operations, each of which triggers two get requests // (the first request fails and triggers txn push, and then the second request // succeeds). Returns an error for the fourth get request to avoid timestamp cache // update after the third get operation pushes the txn timestamp. if atomic.AddInt32(&numGets, 1) == 4 { return roachpb.NewErrorWithTxn(errors.Errorf("Test"), filterArgs.Hdr.Txn) } } return nil } eng := engine.NewInMem(roachpb.Attributes{}, 10<<20) stopper.AddCloser(eng) store := createTestStoreWithEngine(t, eng, true, cfg, stopper, ) // Put an initial value. initVal := []byte("initVal") err := store.DB().Put(context.TODO(), key, initVal) if err != nil { t.Fatalf("failed to put: %s", err) } waitPut := make(chan struct{}) waitFirstGet := make(chan struct{}) waitTxnRestart := make(chan struct{}) waitSecondGet := make(chan struct{}) waitTxnComplete := make(chan struct{}) // Start the Writer. go func() { epoch := -1 // Start a txn that does read-after-write. // The txn will be restarted twice, and the out-of-order put // will happen in the second epoch. if err := store.DB().Txn(context.TODO(), func(txn *client.Txn) error { epoch++ if epoch == 1 { // Wait until the second get operation is issued. close(waitTxnRestart) <-waitSecondGet } updatedVal := []byte("updatedVal") if err := txn.Put(key, updatedVal); err != nil { return err } // Make sure a get will return the value that was just written. actual, err := txn.Get(key) if err != nil { return err } if !bytes.Equal(actual.ValueBytes(), updatedVal) { t.Fatalf("unexpected get result: %s", actual) } if epoch == 0 { // Wait until the first get operation will push the txn timestamp. close(waitPut) <-waitFirstGet } b := txn.NewBatch() return txn.CommitInBatch(b) }); err != nil { t.Fatal(err) } if epoch != 2 { t.Fatalf("unexpected number of txn retries: %d", epoch) } close(waitTxnComplete) }() <-waitPut // Start the Reader. // Advance the clock and send a get operation with higher // priority to trigger the txn restart. manual.Increment(100) priority := roachpb.UserPriority(-math.MaxInt32) requestHeader := roachpb.Span{ Key: roachpb.Key(key), } if _, err := client.SendWrappedWith(context.Background(), rg1(store), roachpb.Header{ Timestamp: cfg.Clock.Now(), UserPriority: priority, }, &roachpb.GetRequest{Span: requestHeader}); err != nil { t.Fatalf("failed to get: %s", err) } // Wait until the writer restarts the txn. close(waitFirstGet) <-waitTxnRestart // Advance the clock and send a get operation again. This time // we use TestingCommandFilter so that a get operation is not // processed after the write intent is resolved (to prevent the // timestamp cache from being updated). manual.Increment(100) if _, err := client.SendWrappedWith(context.Background(), rg1(store), roachpb.Header{ Timestamp: cfg.Clock.Now(), UserPriority: priority, }, &roachpb.GetRequest{Span: requestHeader}); err == nil { t.Fatal("unexpected success of get") } close(waitSecondGet) <-waitTxnComplete }
// send runs the specified calls synchronously in a single batch and // returns any errors. If the transaction is read-only or has already // been successfully committed or aborted, a potential trailing // EndTransaction call is silently dropped, allowing the caller to // always commit or clean-up explicitly even when that may not be // required (or even erroneous). Returns (nil, nil) for an empty batch. func (txn *Txn) send(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { if txn.Proto.Status != roachpb.PENDING || txn.IsFinalized() { return nil, roachpb.NewErrorf( "attempting to use transaction with wrong status or finalized: %s", txn.Proto.Status) } // It doesn't make sense to use inconsistent reads in a transaction. However, // we still need to accept it as a parameter for this to compile. if ba.ReadConsistency != roachpb.CONSISTENT { return nil, roachpb.NewErrorf("cannot use %s ReadConsistency in txn", ba.ReadConsistency) } lastIndex := len(ba.Requests) - 1 if lastIndex < 0 { return nil, nil } // firstWriteIndex is set to the index of the first command which is // a transactional write. If != -1, this indicates an intention to // write. This is in contrast to txn.Proto.Writing, which is set by // the coordinator when the first intent has been created, and which // lives for the life of the transaction. firstWriteIndex := -1 var firstWriteKey roachpb.Key for i, ru := range ba.Requests { args := ru.GetInner() if i < lastIndex { if _, ok := args.(*roachpb.EndTransactionRequest); ok { return nil, roachpb.NewErrorf("%s sent as non-terminal call", args.Method()) } } if roachpb.IsTransactionWrite(args) && firstWriteIndex == -1 { firstWriteKey = args.Header().Key firstWriteIndex = i } } haveTxnWrite := firstWriteIndex != -1 endTxnRequest, haveEndTxn := ba.Requests[lastIndex].GetInner().(*roachpb.EndTransactionRequest) needBeginTxn := !txn.Proto.Writing && haveTxnWrite needEndTxn := txn.Proto.Writing || haveTxnWrite elideEndTxn := haveEndTxn && !needEndTxn // If we're not yet writing in this txn, but intend to, insert a // begin transaction request before the first write command. if needBeginTxn { // If the transaction already has a key (we're in a restart), make // sure we set the key in the begin transaction request to the original. bt := &roachpb.BeginTransactionRequest{ Span: roachpb.Span{ Key: firstWriteKey, }, } if txn.Proto.Key != nil { bt.Key = txn.Proto.Key } // Inject the new request before position firstWriteIndex, taking // care to avoid unnecessary allocations. oldRequests := ba.Requests ba.Requests = make([]roachpb.RequestUnion, len(ba.Requests)+1) copy(ba.Requests, oldRequests[:firstWriteIndex]) ba.Requests[firstWriteIndex].MustSetInner(bt) copy(ba.Requests[firstWriteIndex+1:], oldRequests[firstWriteIndex:]) } if elideEndTxn { ba.Requests = ba.Requests[:lastIndex] } br, pErr := txn.sendInternal(ba) if elideEndTxn && pErr == nil { // Check that read only transactions do not violate their deadline. This can NOT // happen since the txn deadline is normally updated when it is about to expire // or expired. We will just keep the code for safety (see TestReacquireLeaseOnRestart). if endTxnRequest.Deadline != nil { if endTxnRequest.Deadline.Less(txn.Proto.Timestamp) { return nil, roachpb.NewErrorWithTxn(roachpb.NewTransactionAbortedError(), &txn.Proto) } } // This normally happens on the server and sent back in response // headers, but this transaction was optimized away. The caller may // still inspect the transaction struct, so we manually update it // here to emulate a true transaction. if endTxnRequest.Commit { txn.Proto.Status = roachpb.COMMITTED } else { txn.Proto.Status = roachpb.ABORTED } txn.finalized = true } // If we inserted a begin transaction request, remove it here. if needBeginTxn { if br != nil && br.Responses != nil { br.Responses = append(br.Responses[:firstWriteIndex], br.Responses[firstWriteIndex+1:]...) } // Handle case where inserted begin txn confused an indexed error. if pErr != nil && pErr.Index != nil { idx := pErr.Index.Index if idx == int32(firstWriteIndex) { // An error was encountered on begin txn; disallow the indexing. pErr.Index = nil } else if idx > int32(firstWriteIndex) { // An error was encountered after begin txn; decrement index. pErr.SetErrorIndex(idx - 1) } } } return br, pErr }
// TestUserTxnRestart tests user-directed txn restarts. // The test will inject and otherwise create retriable errors of various kinds // and checks that we still manage to run a txn despite them. func TestTxnUserRestart(t *testing.T) { defer leaktest.AfterTest(t)() aborter := NewTxnAborter() defer aborter.Close(t) params, cmdFilters := createTestServerParams() params.Knobs.SQLExecutor = aborter.executorKnobs() s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() { pgURL, cleanup := sqlutils.PGUrl(t, s.ServingAddr(), "TestTxnUserRestart", url.User(security.RootUser)) defer cleanup() if err := aborter.Init(pgURL); err != nil { t.Fatal(err) } } if _, err := sqlDB.Exec(` CREATE DATABASE t; CREATE TABLE t.test (k INT PRIMARY KEY, v TEXT); `); err != nil { t.Fatal(err) } // Set up error injection that causes retries. testCases := []struct { magicVals *filterVals expectedErr string }{ { magicVals: createFilterVals( map[string]int{"boulanger": 2}, // restartCounts nil), expectedErr: ".*encountered previous write with future timestamp.*", }, { magicVals: createFilterVals( nil, map[string]int{"boulanger": 2}), // abortCounts expectedErr: ".*txn aborted.*", }, } for _, tc := range testCases { for _, rs := range []rollbackStrategy{rollbackToSavepoint, declareSavepoint} { cleanupFilter := cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if err := injectErrors(args.Req, args.Hdr, tc.magicVals); err != nil { return roachpb.NewErrorWithTxn(err, args.Hdr.Txn) } return nil }, false) // Also inject an error at RELEASE time, besides the error injected by magicVals. const sentinelInsert = "INSERT INTO t.test(k, v) VALUES (0, 'sentinel')" if err := aborter.QueueStmtForAbortion( sentinelInsert, 1 /* abortCount */, true, /* willBeRetriedIbid */ ); err != nil { t.Fatal(err) } commitCount := s.MustGetSQLCounter(sql.MetaTxnCommit.Name) // This is the magic. Run the txn closure until all the retries are exhausted. retryExec(t, sqlDB, rs, func(tx *gosql.Tx) bool { return runTestTxn(t, tc.magicVals, tc.expectedErr, sqlDB, tx, sentinelInsert) }) checkRestarts(t, tc.magicVals) // Check that we only wrote the sentinel row. rows, err := sqlDB.Query("SELECT * FROM t.test") if err != nil { t.Fatal(err) } for rows.Next() { var k int var v string err = rows.Scan(&k, &v) if err != nil { t.Fatal(err) } if k != 0 || v != "sentinel" { t.Fatalf("didn't find expected row: %d %s", k, v) } } // Check that the commit counter was incremented. It could have been // incremented by more than 1 because of the transactions we use to force // aborts, plus who knows what else the server is doing in the background. checkCounterGE(t, s, sql.MetaTxnCommit, commitCount+1) // Clean up the table for the next test iteration. _, err = sqlDB.Exec("DELETE FROM t.test WHERE true") if err != nil { t.Fatal(err) } rows.Close() cleanupFilter() } } }
// Test the logic in the sql executor for automatically retrying txns in case of // retriable errors. func TestTxnAutoRetry(t *testing.T) { defer leaktest.AfterTest(t)() aborter := NewTxnAborter() defer aborter.Close(t) params, cmdFilters := createTestServerParams() params.Knobs.SQLExecutor = aborter.executorKnobs() // Disable one phase commits because they cannot be restarted. params.Knobs.Store.(*storage.StoreTestingKnobs).DisableOnePhaseCommits = true s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() { pgURL, cleanup := sqlutils.PGUrl(t, s.ServingAddr(), "TestTxnAutoRetry", url.User(security.RootUser)) defer cleanup() if err := aborter.Init(pgURL); err != nil { t.Fatal(err) } } // Make sure all the commands we send in this test are sent over the same connection. // This is a bit of a hack; in Go you're not supposed to have connection state // outside of using a db.Tx. But we can't use a db.Tx here, because we want // to control the batching of BEGIN/COMMIT statements. // This SetMaxOpenConns is pretty shady, it doesn't guarantee that you'll be using // the *same* one connection across calls. A proper solution would be to use a // lib/pq connection directly. As of Feb 2016, there's code in cli/sql_util.go to // do that. sqlDB.SetMaxOpenConns(1) if _, err := sqlDB.Exec(` CREATE DATABASE t; CREATE TABLE t.test (k INT PRIMARY KEY, v TEXT, t DECIMAL); `); err != nil { t.Fatal(err) } // Set up error injection that causes retries. magicVals := createFilterVals(nil, nil) magicVals.restartCounts = map[string]int{ "boulanger": 2, "dromedary": 2, "fajita": 2, "hooly": 2, "josephine": 2, "laureal": 2, } magicVals.abortCounts = map[string]int{ "boulanger": 2, } cleanupFilter := cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if err := injectErrors(args.Req, args.Hdr, magicVals); err != nil { return roachpb.NewErrorWithTxn(err, args.Hdr.Txn) } return nil }, false) if err := aborter.QueueStmtForAbortion( "INSERT INTO t.test(k, v, t) VALUES (1, 'boulanger', cluster_logical_timestamp())", 2 /* abortCount */, true, /* willBeRetriedIbid */ ); err != nil { t.Fatal(err) } if err := aborter.QueueStmtForAbortion( "INSERT INTO t.test(k, v, t) VALUES (2, 'dromedary', cluster_logical_timestamp())", 2 /* abortCount */, true, /* willBeRetriedIbid */ ); err != nil { t.Fatal(err) } if err := aborter.QueueStmtForAbortion( "INSERT INTO t.test(k, v, t) VALUES (3, 'fajita', cluster_logical_timestamp())", 2 /* abortCount */, true, /* willBeRetriedIbid */ ); err != nil { t.Fatal(err) } if err := aborter.QueueStmtForAbortion( "INSERT INTO t.test(k, v, t) VALUES (4, 'hooly', cluster_logical_timestamp())", 2 /* abortCount */, true, /* willBeRetriedIbid */ ); err != nil { t.Fatal(err) } // Test that implicit txns - txns for which we see all the statements and prefixes // of txns (statements batched together with the BEGIN stmt) - are retried. // We also exercise the SQL cluster logical timestamp in here, because // this must be properly propagated across retries. // // The SELECT within the transaction also checks that discarded // intermediate result sets are properly released: the result set it // produces is accounted for by the session monitor, and if it is // not properly released upon a retry the monitor will cause the // server to panic (and thus the test to fail) when the connection // is closed. // // TODO(knz) This test can be made more robust by exposing the // current allocation count in monitor and checking that it has the // same value at the beginning of each retry. if _, err := sqlDB.Exec(` INSERT INTO t.test(k, v, t) VALUES (1, 'boulanger', cluster_logical_timestamp()); BEGIN; SELECT * FROM t.test; INSERT INTO t.test(k, v, t) VALUES (2, 'dromedary', cluster_logical_timestamp()); INSERT INTO t.test(k, v, t) VALUES (3, 'fajita', cluster_logical_timestamp()); END; INSERT INTO t.test(k, v, t) VALUES (4, 'hooly', cluster_logical_timestamp()); BEGIN; INSERT INTO t.test(k, v, t) VALUES (5, 'josephine', cluster_logical_timestamp()); INSERT INTO t.test(k, v, t) VALUES (6, 'laureal', cluster_logical_timestamp()); `); err != nil { t.Fatal(err) } cleanupFilter() checkRestarts(t, magicVals) if _, err := sqlDB.Exec("END"); err != nil { t.Fatal(err) } // Check that the txns succeeded by reading the rows. var count int if err := sqlDB.QueryRow("SELECT count(*) FROM t.test").Scan(&count); err != nil { t.Fatal(err) } if count != 6 { t.Fatalf("Expected 6 rows, got %d", count) } // Now test that we don't retry what we shouldn't: insert an error into a txn // we can't automatically retry (because it spans requests). magicVals = createFilterVals(nil, nil) magicVals.restartCounts = map[string]int{ "hooly": 2, } cleanupFilter = cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if err := injectErrors(args.Req, args.Hdr, magicVals); err != nil { return roachpb.NewErrorWithTxn(err, args.Hdr.Txn) } return nil }, false) defer cleanupFilter() // Start a txn. if _, err := sqlDB.Exec(` DELETE FROM t.test WHERE true; BEGIN; `); err != nil { t.Fatal(err) } // Continue the txn in a new request, which is not retriable. _, err := sqlDB.Exec("INSERT INTO t.test(k, v, t) VALUES (4, 'hooly', cluster_logical_timestamp())") if !testutils.IsError( err, "encountered previous write with future timestamp") { t.Errorf("didn't get expected injected error. Got: %v", err) } }
// Test that a TransactionRetryError will retry the read until it succeeds. The // test is designed so that if the proto timestamps are bumped during retry // a failure will occur. func TestAsOfRetry(t *testing.T) { defer leaktest.AfterTest(t)() params, cmdFilters := createTestServerParams() // Disable one phase commits because they cannot be restarted. params.Knobs.Store.(*storage.StoreTestingKnobs).DisableOnePhaseCommits = true s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() const val1 = 1 const val2 = 2 const name = "boulanger" if _, err := sqlDB.Exec(` CREATE DATABASE d; CREATE TABLE d.t (s STRING PRIMARY KEY, a INT); `); err != nil { t.Fatal(err) } var tsStart string if err := sqlDB.QueryRow(` INSERT INTO d.t (s, a) VALUES ($1, $2) RETURNING cluster_logical_timestamp(); `, name, val1).Scan(&tsStart); err != nil { t.Fatal(err) } var tsVal2 string if err := sqlDB.QueryRow("UPDATE d.t SET a = $1 RETURNING cluster_logical_timestamp()", val2).Scan(&tsVal2); err != nil { t.Fatal(err) } walltime := new(inf.Dec) if _, ok := walltime.SetString(tsVal2); !ok { t.Fatalf("couldn't set decimal: %s", tsVal2) } oneTick := inf.NewDec(1, 0) // Set tsVal1 to 1ns before tsVal2. tsVal1 := walltime.Sub(walltime, oneTick).String() // Set up error injection that causes retries. magicVals := createFilterVals(nil, nil) magicVals.restartCounts = map[string]int{ name: 5, } cleanupFilter := cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { magicVals.Lock() defer magicVals.Unlock() switch req := args.Req.(type) { case *roachpb.ScanRequest: for key, count := range magicVals.restartCounts { if err := checkCorrectTxn(string(req.Key), magicVals, args.Hdr.Txn); err != nil { return roachpb.NewError(err) } if count > 0 && bytes.Contains(req.Key, []byte(key)) { magicVals.restartCounts[key]-- err := roachpb.NewTransactionRetryError() magicVals.failedValues[string(req.Key)] = failureRecord{err, args.Hdr.Txn} txn := args.Hdr.Txn.Clone() txn.Timestamp = txn.Timestamp.Add(0, 1) return roachpb.NewErrorWithTxn(err, &txn) } } } return nil }, false) var i int // Query with tsVal1 which should return the first value. Since tsVal1 is just // one nanosecond before tsVal2, any proto timestamp bumping will return val2 // and error. // Must specify the WHERE here to trigger the injection errors. if err := sqlDB.QueryRow(fmt.Sprintf("SELECT a FROM d.t AS OF SYSTEM TIME %s WHERE s = '%s'", tsVal1, name)).Scan(&i); err != nil { t.Fatal(err) } else if i != val1 { t.Fatalf("unexpected val: %v", i) } cleanupFilter() // Verify that the retry errors were injected. checkRestarts(t, magicVals) // Query with tsVal2 to ensure val2 is indeed present. if err := sqlDB.QueryRow(fmt.Sprintf("SELECT a FROM d.t AS OF SYSTEM TIME %s", tsVal2)).Scan(&i); err != nil { t.Fatal(err) } else if i != val2 { t.Fatalf("unexpected val: %v", i) } }
func TestGCQueueTransactionTable(t *testing.T) { defer leaktest.AfterTest(t)() const now time.Duration = 3 * 24 * time.Hour const gcTxnAndAC = now - txnCleanupThreshold const gcACOnly = now - abortCacheAgeThreshold if gcTxnAndAC >= gcACOnly { t.Fatalf("test assumption violated due to changing constants; needs adjustment") } type spec struct { status roachpb.TransactionStatus orig time.Duration hb time.Duration // last heartbeat (none if ZeroTimestamp) newStatus roachpb.TransactionStatus // -1 for GCed failResolve bool // do we want to fail resolves in this trial? expResolve bool // expect attempt at removing txn-persisted intents? expAbortGC bool // expect abort cache entries removed? } // Describes the state of the Txn table before the test. // Many of the abort cache entries deleted wouldn't even be there, so don't // be confused by that. testCases := map[string]spec{ // Too young, should not touch. "aa": { status: roachpb.PENDING, orig: gcACOnly + 1, newStatus: roachpb.PENDING, }, // A little older, so the AbortCache gets cleaned up. "ab": { status: roachpb.PENDING, orig: gcTxnAndAC + 1, newStatus: roachpb.PENDING, expAbortGC: true, }, // Old and pending, but still heartbeat (so no Push attempted; it would succeed). // It's old enough to delete the abort cache entry though. "ba": { status: roachpb.PENDING, hb: gcTxnAndAC + 1, newStatus: roachpb.PENDING, expAbortGC: true, }, // Not old enough for Txn GC, but old enough to remove the abort cache entry. "bb": { status: roachpb.ABORTED, orig: gcACOnly - 1, newStatus: roachpb.ABORTED, expAbortGC: true, }, // Old, pending and abandoned. Should push and abort it successfully, // but not GC it just yet (this is an artifact of the implementation). // The abort cache gets cleaned up though. "c": { status: roachpb.PENDING, orig: gcTxnAndAC - 1, newStatus: roachpb.ABORTED, expAbortGC: true, }, // Old and aborted, should delete. "d": { status: roachpb.ABORTED, orig: gcTxnAndAC - 1, newStatus: -1, expResolve: true, expAbortGC: true, }, // Committed and fresh, so no action. But the abort cache entry is old // enough to be discarded. "e": { status: roachpb.COMMITTED, orig: gcTxnAndAC + 1, newStatus: roachpb.COMMITTED, expAbortGC: true, }, // Committed and old. It has an intent (like all tests here), which is // resolvable and hence we can GC. "f": { status: roachpb.COMMITTED, orig: gcTxnAndAC - 1, newStatus: -1, expResolve: true, expAbortGC: true, }, // Same as the previous one, but we've rigged things so that the intent // resolution here will fail and consequently no GC is expected. "g": { status: roachpb.COMMITTED, orig: gcTxnAndAC - 1, newStatus: roachpb.COMMITTED, failResolve: true, expResolve: true, expAbortGC: true, }, } resolved := map[string][]roachpb.Span{} tc := testContext{} tsc := TestStoreConfig() tsc.TestingKnobs.TestingCommandFilter = func(filterArgs storagebase.FilterArgs) *roachpb.Error { if resArgs, ok := filterArgs.Req.(*roachpb.ResolveIntentRequest); ok { id := string(resArgs.IntentTxn.Key) resolved[id] = append(resolved[id], roachpb.Span{ Key: resArgs.Key, EndKey: resArgs.EndKey, }) // We've special cased one test case. Note that the intent is still // counted in `resolved`. if testCases[id].failResolve { return roachpb.NewErrorWithTxn(errors.Errorf("boom"), filterArgs.Hdr.Txn) } } return nil } tc.StartWithStoreConfig(t, tsc) defer tc.Stop() tc.manualClock.Set(int64(now)) outsideKey := tc.rng.Desc().EndKey.Next().AsRawKey() testIntents := []roachpb.Span{{Key: roachpb.Key("intent")}} txns := map[string]roachpb.Transaction{} for strKey, test := range testCases { baseKey := roachpb.Key(strKey) txnClock := hlc.NewClock(hlc.NewManualClock(int64(test.orig)).UnixNano) txn := newTransaction("txn1", baseKey, 1, enginepb.SERIALIZABLE, txnClock) txn.Status = test.status txn.Intents = testIntents if test.hb > 0 { txn.LastHeartbeat = &hlc.Timestamp{WallTime: int64(test.hb)} } // Set a high Timestamp to make sure it does not matter. Only // OrigTimestamp (and heartbeat) are used for GC decisions. txn.Timestamp.Forward(hlc.MaxTimestamp) txns[strKey] = *txn for _, addrKey := range []roachpb.Key{baseKey, outsideKey} { key := keys.TransactionKey(addrKey, txn.ID) if err := engine.MVCCPutProto(context.Background(), tc.engine, nil, key, hlc.ZeroTimestamp, nil, txn); err != nil { t.Fatal(err) } } entry := roachpb.AbortCacheEntry{Key: txn.Key, Timestamp: txn.LastActive()} if err := tc.rng.abortCache.Put(context.Background(), tc.engine, nil, txn.ID, &entry); err != nil { t.Fatal(err) } } // Run GC. gcQ := newGCQueue(tc.store, tc.gossip) cfg, ok := tc.gossip.GetSystemConfig() if !ok { t.Fatal("config not set") } if err := gcQ.process(context.Background(), tc.clock.Now(), tc.rng, cfg); err != nil { t.Fatal(err) } util.SucceedsSoon(t, func() error { for strKey, sp := range testCases { txn := &roachpb.Transaction{} key := keys.TransactionKey(roachpb.Key(strKey), txns[strKey].ID) ok, err := engine.MVCCGetProto(context.Background(), tc.engine, key, hlc.ZeroTimestamp, true, nil, txn) if err != nil { return err } if expGC := (sp.newStatus == -1); expGC { if expGC != !ok { return fmt.Errorf("%s: expected gc: %t, but found %s\n%s", strKey, expGC, txn, roachpb.Key(strKey)) } } else if sp.newStatus != txn.Status { return fmt.Errorf("%s: expected status %s, but found %s", strKey, sp.newStatus, txn.Status) } var expIntents []roachpb.Span if sp.expResolve { expIntents = testIntents } if !reflect.DeepEqual(resolved[strKey], expIntents) { return fmt.Errorf("%s: unexpected intent resolutions:\nexpected: %s\nobserved: %s", strKey, expIntents, resolved[strKey]) } entry := &roachpb.AbortCacheEntry{} abortExists, err := tc.rng.abortCache.Get(context.Background(), tc.store.Engine(), txns[strKey].ID, entry) if err != nil { t.Fatal(err) } if abortExists == sp.expAbortGC { return fmt.Errorf("%s: expected abort cache gc: %t, found %+v", strKey, sp.expAbortGC, entry) } } return nil }) outsideTxnPrefix := keys.TransactionKey(outsideKey, uuid.EmptyUUID) outsideTxnPrefixEnd := keys.TransactionKey(outsideKey.Next(), uuid.EmptyUUID) var count int if _, err := engine.MVCCIterate(context.Background(), tc.store.Engine(), outsideTxnPrefix, outsideTxnPrefixEnd, hlc.ZeroTimestamp, true, nil, false, func(roachpb.KeyValue) (bool, error) { count++ return false, nil }); err != nil { t.Fatal(err) } if exp := len(testCases); exp != count { t.Fatalf("expected the %d external transaction entries to remain untouched, "+ "but only %d are left", exp, count) } batch := tc.engine.NewSnapshot() defer batch.Close() tc.rng.assertState(batch) // check that in-mem and on-disk state were updated tc.rng.mu.Lock() txnSpanThreshold := tc.rng.mu.state.TxnSpanGCThreshold tc.rng.mu.Unlock() // Verify that the new TxnSpanGCThreshold has reached the Replica. if expWT := int64(gcTxnAndAC); txnSpanThreshold.WallTime != expWT { t.Fatalf("expected TxnSpanGCThreshold.Walltime %d, got timestamp %s", expWT, txnSpanThreshold) } }
// Verifies that an expired lease is released and a new lease is acquired on transaction // restart. // // This test triggers the above scenario by making ReadWithinUncertaintyIntervalError advance // the clock, so that the transaction timestamp exceeds the deadline of the EndTransactionRequest. func TestReacquireLeaseOnRestart(t *testing.T) { defer leaktest.AfterTest(t)() advancement := 2 * sql.LeaseDuration var cmdFilters CommandFilters cmdFilters.AppendFilter(checkEndTransactionTrigger, true) var clockUpdate int32 testKey := []byte("test_key") testingKnobs := &storage.StoreTestingKnobs{ TestingCommandFilter: cmdFilters.runFilters, DisableMaxOffsetCheck: true, ClockBeforeSend: func(c *hlc.Clock, ba roachpb.BatchRequest) { if atomic.LoadInt32(&clockUpdate) > 0 { return } // Hack to advance the transaction timestamp on a transaction restart. for _, union := range ba.Requests { if req, ok := union.GetInner().(*roachpb.ScanRequest); ok { if bytes.Contains(req.Key, testKey) { atomic.AddInt32(&clockUpdate, 1) now := c.Now() now.WallTime += advancement.Nanoseconds() c.Update(now) break } } } }, } params, _ := createTestServerParams() params.Knobs.Store = testingKnobs s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() var restartDone int32 cleanupFilter := cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if atomic.LoadInt32(&restartDone) > 0 { return nil } if req, ok := args.Req.(*roachpb.ScanRequest); ok { if bytes.Contains(req.Key, testKey) { atomic.AddInt32(&restartDone, 1) // Return ReadWithinUncertaintyIntervalError to update the transaction timestamp on retry. txn := args.Hdr.Txn txn.ResetObservedTimestamps() now := s.Clock().Now() txn.UpdateObservedTimestamp(s.(*server.TestServer).Gossip().NodeID.Get(), now) return roachpb.NewErrorWithTxn(roachpb.NewReadWithinUncertaintyIntervalError(now, now), txn) } } return nil }, false) defer cleanupFilter() sqlDB.SetMaxOpenConns(1) if _, err := sqlDB.Exec(` CREATE DATABASE t; CREATE TABLE t.test (k TEXT PRIMARY KEY, v TEXT); INSERT INTO t.test (k, v) VALUES ('test_key', 'test_val'); `); err != nil { t.Fatal(err) } // Acquire the lease and enable the auto-retry. The first read attempt will trigger ReadWithinUncertaintyIntervalError // and advance the transaction timestamp. The transaction timestamp will exceed the lease expiration // time, and the second read attempt will re-acquire the lease. if _, err := sqlDB.Exec(` SELECT * from t.test WHERE k = 'test_key'; `); err != nil { t.Fatal(err) } if u := atomic.LoadInt32(&clockUpdate); u != 1 { t.Errorf("expected exacltly one clock update, but got %d", u) } if u := atomic.LoadInt32(&restartDone); u != 1 { t.Errorf("expected exactly one restart, but got %d", u) } }