// TestRunTransactionRetryOnErrors verifies that the transaction // is retried on the correct errors. func TestRunTransactionRetryOnErrors(t *testing.T) { defer leaktest.AfterTest(t)() testCases := []struct { err error retry bool // Expect retry? }{ {roachpb.NewReadWithinUncertaintyIntervalError(hlc.ZeroTimestamp, hlc.ZeroTimestamp), true}, {&roachpb.TransactionAbortedError{}, true}, {&roachpb.TransactionPushError{}, true}, {&roachpb.TransactionRetryError{}, true}, {&roachpb.WriteTooOldError{}, true}, {&roachpb.RangeNotFoundError{}, false}, {&roachpb.RangeKeyMismatchError{}, false}, {&roachpb.TransactionStatusError{}, false}, } for i, test := range testCases { count := 0 dbCtx := DefaultDBContext() dbCtx.TxnRetryOptions.InitialBackoff = 1 * time.Millisecond db := NewDBWithContext(newTestSender( func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { if _, ok := ba.GetArg(roachpb.Put); ok { count++ if count == 1 { return nil, roachpb.NewErrorWithTxn(test.err, ba.Txn) } } return ba.CreateReply(), nil }, nil), dbCtx) err := db.Txn(context.TODO(), func(txn *Txn) error { return txn.Put("a", "b") }) if test.retry { if count != 2 { t.Errorf("%d: expected one retry; got %d", i, count-1) } if err != nil { t.Errorf("%d: expected success on retry; got %s", i, err) } } else { if count != 1 { t.Errorf("%d: expected no retries; got %d", i, count) } if reflect.TypeOf(err) != reflect.TypeOf(test.err) { t.Errorf("%d: expected error of type %T; got %T", i, test.err, err) } } } }
// TestAbortTransactionOnCommitErrors verifies that transactions are // aborted on the correct errors. func TestAbortTransactionOnCommitErrors(t *testing.T) { defer leaktest.AfterTest(t)() testCases := []struct { err error abort bool }{ {roachpb.NewReadWithinUncertaintyIntervalError(hlc.ZeroTimestamp, hlc.ZeroTimestamp), true}, {&roachpb.TransactionAbortedError{}, false}, {&roachpb.TransactionPushError{}, true}, {&roachpb.TransactionRetryError{}, true}, {&roachpb.RangeNotFoundError{}, true}, {&roachpb.RangeKeyMismatchError{}, true}, {&roachpb.TransactionStatusError{}, true}, } for _, test := range testCases { var commit, abort bool db := NewDB(newTestSender(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { switch t := ba.Requests[0].GetInner().(type) { case *roachpb.EndTransactionRequest: if t.Commit { commit = true return nil, roachpb.NewError(test.err) } abort = true } return ba.CreateReply(), nil }, nil)) txn := NewTxn(context.Background(), *db) if pErr := txn.Put("a", "b"); pErr != nil { t.Fatalf("put failed: %s", pErr) } if pErr := txn.CommitOrCleanup(); pErr == nil { t.Fatalf("unexpected commit success") } if !commit { t.Errorf("%T: failed to find commit", test.err) } if test.abort && !abort { t.Errorf("%T: failed to find abort", test.err) } else if !test.abort && abort { t.Errorf("%T: found unexpected abort", test.err) } } }
func injectErrors(req roachpb.Request, hdr roachpb.Header, magicVals *filterVals) error { magicVals.Lock() defer magicVals.Unlock() switch req := req.(type) { case *roachpb.ConditionalPutRequest: for key, count := range magicVals.restartCounts { if err := checkCorrectTxn(string(req.Value.RawBytes), magicVals, hdr.Txn); err != nil { return err } if count > 0 && bytes.Contains(req.Value.RawBytes, []byte(key)) { magicVals.restartCounts[key]-- err := roachpb.NewReadWithinUncertaintyIntervalError( hlc.ZeroTimestamp, hlc.ZeroTimestamp) magicVals.failedValues[string(req.Value.RawBytes)] = failureRecord{err, hdr.Txn} return err } } for key, count := range magicVals.abortCounts { if err := checkCorrectTxn(string(req.Value.RawBytes), magicVals, hdr.Txn); err != nil { return err } if count > 0 && bytes.Contains(req.Value.RawBytes, []byte(key)) { magicVals.abortCounts[key]-- err := roachpb.NewTransactionAbortedError() magicVals.failedValues[string(req.Value.RawBytes)] = failureRecord{err, hdr.Txn} return err } } return nil default: return nil } }
// TestPropagateTxnOnError verifies that DistSender.sendBatch properly // propagates the txn data to a next iteration. Use txn.Writing field to // verify that. func TestPropagateTxnOnError(t *testing.T) { defer leaktest.AfterTest(t)() var storeKnobs storage.StoreTestingKnobs // Set up a filter to so that the first CPut operation will // get a ReadWithinUncertaintyIntervalError. targetKey := roachpb.Key("b") var numGets int32 storeKnobs.TestingCommandFilter = func(fArgs storagebase.FilterArgs) *roachpb.Error { _, ok := fArgs.Req.(*roachpb.ConditionalPutRequest) if ok && fArgs.Req.Header().Key.Equal(targetKey) { if atomic.AddInt32(&numGets, 1) == 1 { z := hlc.ZeroTimestamp pErr := roachpb.NewReadWithinUncertaintyIntervalError(z, z) return roachpb.NewErrorWithTxn(pErr, fArgs.Hdr.Txn) } } return nil } s, _, _ := serverutils.StartServer(t, base.TestServerArgs{Knobs: base.TestingKnobs{Store: &storeKnobs}}) defer s.Stopper().Stop() db := setupMultipleRanges(t, s, "b") // Set the initial value on the target key "b". origVal := "val" if err := db.Put(context.TODO(), targetKey, origVal); err != nil { t.Fatal(err) } // The following txn creates a batch request that is split // into two requests: Put and CPut. The CPut operation will // get a ReadWithinUncertaintyIntervalError and the txn will be // retried. epoch := 0 if err := db.Txn(context.TODO(), func(txn *client.Txn) error { epoch++ if epoch >= 2 { // Writing must be true since we ran the BeginTransaction command. if !txn.Proto.Writing { t.Errorf("unexpected non-writing txn") } } else { // Writing must be false since we haven't run any write command. if txn.Proto.Writing { t.Errorf("unexpected writing txn") } } b := txn.NewBatch() b.Put("a", "val") b.CPut(targetKey, "new_val", origVal) err := txn.CommitInBatch(b) if epoch == 1 { if retErr, ok := err.(*roachpb.RetryableTxnError); ok { if _, ok := retErr.Cause.(*roachpb.ReadWithinUncertaintyIntervalError); ok { if !retErr.Transaction.Writing { t.Errorf("unexpected non-writing txn on error") } } else { t.Errorf("expected ReadWithinUncertaintyIntervalError, but got: %s", retErr.Cause) } } else { t.Errorf("expected a retryable error, but got: %s", err) } } return err }); err != nil { t.Errorf("unexpected error on transactional Puts: %s", err) } if epoch != 2 { t.Errorf("unexpected epoch; the txn must be retried exactly once, but got %d", epoch) } }
// TestTxnCoordSenderTxnUpdatedOnError verifies that errors adjust the // response transaction's timestamp and priority as appropriate. func TestTxnCoordSenderTxnUpdatedOnError(t *testing.T) { defer leaktest.AfterTest(t)() origTS := makeTS(123, 0) plus10 := origTS.Add(10, 10) plus20 := plus10.Add(10, 0) testCases := []struct { pErr *roachpb.Error expEpoch uint32 expPri int32 expTS, expOrigTS hlc.Timestamp nodeSeen bool }{ { // No error, so nothing interesting either. pErr: nil, expEpoch: 0, expPri: 1, expTS: origTS, expOrigTS: origTS, }, { // On uncertainty error, new epoch begins and node is seen. // Timestamp moves ahead of the existing write. pErr: func() *roachpb.Error { pErr := roachpb.NewErrorWithTxn( roachpb.NewReadWithinUncertaintyIntervalError(hlc.ZeroTimestamp, hlc.ZeroTimestamp), &roachpb.Transaction{}) const nodeID = 1 pErr.GetTxn().UpdateObservedTimestamp(nodeID, plus10) pErr.OriginNode = nodeID return pErr }(), expEpoch: 1, expPri: 1, expTS: plus10, expOrigTS: plus10, nodeSeen: true, }, { // On abort, nothing changes but we get a new priority to use for // the next attempt. pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionAbortedError{}, &roachpb.Transaction{ TxnMeta: enginepb.TxnMeta{Timestamp: plus20, Priority: 10}, }), expPri: 10, }, { // On failed push, new epoch begins just past the pushed timestamp. // Additionally, priority ratchets up to just below the pusher's. pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionPushError{ PusheeTxn: roachpb.Transaction{ TxnMeta: enginepb.TxnMeta{Timestamp: plus10, Priority: int32(10)}, }, }, &roachpb.Transaction{}), expEpoch: 1, expPri: 9, expTS: plus10, expOrigTS: plus10, }, { // On retry, restart with new epoch, timestamp and priority. pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionRetryError{}, &roachpb.Transaction{ TxnMeta: enginepb.TxnMeta{Timestamp: plus10, Priority: int32(10)}, }, ), expEpoch: 1, expPri: 10, expTS: plus10, expOrigTS: plus10, }, } for i, test := range testCases { stopper := stop.NewStopper() manual := hlc.NewManualClock(origTS.WallTime) clock := hlc.NewClock(manual.UnixNano, 20*time.Nanosecond) senderFunc := func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { var reply *roachpb.BatchResponse if test.pErr == nil { reply = ba.CreateReply() } return reply, test.pErr } ambient := log.AmbientContext{Tracer: tracing.NewTracer()} ts := NewTxnCoordSender( ambient, senderFn(senderFunc), clock, false, stopper, MakeTxnMetrics(metric.TestSampleInterval), ) db := client.NewDB(ts) txn := client.NewTxn(context.Background(), *db) txn.InternalSetPriority(1) txn.Proto.Name = "test txn" key := roachpb.Key("test-key") _, err := txn.Get(key) teardownHeartbeats(ts) stopper.Stop() if test.pErr != nil && err == nil { t.Fatalf("expected an error") } if txn.Proto.Epoch != test.expEpoch { t.Errorf("%d: expected epoch = %d; got %d", i, test.expEpoch, txn.Proto.Epoch) } if txn.Proto.Priority != test.expPri { t.Errorf("%d: expected priority = %d; got %d", i, test.expPri, txn.Proto.Priority) } if !txn.Proto.Timestamp.Equal(test.expTS) { t.Errorf("%d: expected timestamp to be %s; got %s", i, test.expTS, txn.Proto.Timestamp) } if !txn.Proto.OrigTimestamp.Equal(test.expOrigTS) { t.Errorf("%d: expected orig timestamp to be %s; got %s", i, test.expOrigTS, txn.Proto.OrigTimestamp) } if ns := txn.Proto.ObservedTimestamps; (len(ns) != 0) != test.nodeSeen { t.Errorf("%d: expected nodeSeen=%t, but list of hosts is %v", i, test.nodeSeen, ns) } } }
// Verifies that an expired lease is released and a new lease is acquired on transaction // restart. // // This test triggers the above scenario by making ReadWithinUncertaintyIntervalError advance // the clock, so that the transaction timestamp exceeds the deadline of the EndTransactionRequest. func TestReacquireLeaseOnRestart(t *testing.T) { defer leaktest.AfterTest(t)() advancement := 2 * sql.LeaseDuration var cmdFilters CommandFilters cmdFilters.AppendFilter(checkEndTransactionTrigger, true) var clockUpdate int32 testKey := []byte("test_key") testingKnobs := &storage.StoreTestingKnobs{ TestingCommandFilter: cmdFilters.runFilters, DisableMaxOffsetCheck: true, ClockBeforeSend: func(c *hlc.Clock, ba roachpb.BatchRequest) { if atomic.LoadInt32(&clockUpdate) > 0 { return } // Hack to advance the transaction timestamp on a transaction restart. for _, union := range ba.Requests { if req, ok := union.GetInner().(*roachpb.ScanRequest); ok { if bytes.Contains(req.Key, testKey) { atomic.AddInt32(&clockUpdate, 1) now := c.Now() now.WallTime += advancement.Nanoseconds() c.Update(now) break } } } }, } params, _ := createTestServerParams() params.Knobs.Store = testingKnobs s, sqlDB, _ := serverutils.StartServer(t, params) defer s.Stopper().Stop() var restartDone int32 cleanupFilter := cmdFilters.AppendFilter( func(args storagebase.FilterArgs) *roachpb.Error { if atomic.LoadInt32(&restartDone) > 0 { return nil } if req, ok := args.Req.(*roachpb.ScanRequest); ok { if bytes.Contains(req.Key, testKey) { atomic.AddInt32(&restartDone, 1) // Return ReadWithinUncertaintyIntervalError to update the transaction timestamp on retry. txn := args.Hdr.Txn txn.ResetObservedTimestamps() now := s.Clock().Now() txn.UpdateObservedTimestamp(s.(*server.TestServer).Gossip().NodeID.Get(), now) return roachpb.NewErrorWithTxn(roachpb.NewReadWithinUncertaintyIntervalError(now, now), txn) } } return nil }, false) defer cleanupFilter() sqlDB.SetMaxOpenConns(1) if _, err := sqlDB.Exec(` CREATE DATABASE t; CREATE TABLE t.test (k TEXT PRIMARY KEY, v TEXT); INSERT INTO t.test (k, v) VALUES ('test_key', 'test_val'); `); err != nil { t.Fatal(err) } // Acquire the lease and enable the auto-retry. The first read attempt will trigger ReadWithinUncertaintyIntervalError // and advance the transaction timestamp. The transaction timestamp will exceed the lease expiration // time, and the second read attempt will re-acquire the lease. if _, err := sqlDB.Exec(` SELECT * from t.test WHERE k = 'test_key'; `); err != nil { t.Fatal(err) } if u := atomic.LoadInt32(&clockUpdate); u != 1 { t.Errorf("expected exacltly one clock update, but got %d", u) } if u := atomic.LoadInt32(&restartDone); u != 1 { t.Errorf("expected exactly one restart, but got %d", u) } }