Example #1
0
func injectErrors(
	req roachpb.Request,
	hdr roachpb.Header,
	magicVals *filterVals,
) error {
	magicVals.Lock()
	defer magicVals.Unlock()

	switch req := req.(type) {
	case *roachpb.ConditionalPutRequest:
		for key, count := range magicVals.restartCounts {
			checkCorrectTxn(string(req.Value.RawBytes), magicVals, hdr.Txn)
			if count > 0 && bytes.Contains(req.Value.RawBytes, []byte(key)) {
				magicVals.restartCounts[key]--
				err := roachpb.NewReadWithinUncertaintyIntervalError(
					hlc.ZeroTimestamp, hlc.ZeroTimestamp)
				magicVals.failedValues[string(req.Value.RawBytes)] =
					failureRecord{err, hdr.Txn}
				return err
			}
		}
		for key, count := range magicVals.abortCounts {
			checkCorrectTxn(string(req.Value.RawBytes), magicVals, hdr.Txn)
			if count > 0 && bytes.Contains(req.Value.RawBytes, []byte(key)) {
				magicVals.abortCounts[key]--
				err := roachpb.NewTransactionAbortedError()
				magicVals.failedValues[string(req.Value.RawBytes)] =
					failureRecord{err, hdr.Txn}
				return err
			}
		}
		// If we're writing a value that's marked for an EndTransaction failure,
		// keep track of the txn id so we can fail it later on.
		for key, count := range magicVals.endTxnRestartCounts {
			if count > 0 && bytes.Contains(req.Value.RawBytes, []byte(key)) {
				txnID := *hdr.Txn.TxnMeta.ID
				if _, found := magicVals.txnsToFail[txnID]; found {
					continue
				}
				magicVals.endTxnRestartCounts[key]--
				magicVals.txnsToFail[txnID] = true
			}
		}
		return nil
	case *roachpb.EndTransactionRequest:
		txnID := *hdr.Txn.TxnMeta.ID
		if !magicVals.txnsToFail[txnID] {
			return nil
		}
		delete(magicVals.txnsToFail, txnID)
		// Note that we can't return TransactionAborted errors, although those are
		// more representative for the errors that EndTransaction might encounter,
		// because returning those would result in the txn's intents being left
		// around.
		return roachpb.NewTransactionRetryError()
	default:
		return nil
	}
}
Example #2
0
// TestRunTransactionRetryOnErrors verifies that the transaction
// is retried on the correct errors.
func TestRunTransactionRetryOnErrors(t *testing.T) {
	defer leaktest.AfterTest(t)()
	testCases := []struct {
		err   error
		retry bool // Expect retry?
	}{
		{roachpb.NewReadWithinUncertaintyIntervalError(hlc.ZeroTimestamp, hlc.ZeroTimestamp), true},
		{&roachpb.TransactionAbortedError{}, true},
		{&roachpb.TransactionPushError{}, true},
		{&roachpb.TransactionRetryError{}, true},
		{&roachpb.WriteTooOldError{}, true},
		{&roachpb.RangeNotFoundError{}, false},
		{&roachpb.RangeKeyMismatchError{}, false},
		{&roachpb.TransactionStatusError{}, false},
	}

	for i, test := range testCases {
		count := 0
		dbCtx := DefaultDBContext()
		dbCtx.TxnRetryOptions.InitialBackoff = 1 * time.Millisecond
		db := NewDBWithContext(newTestSender(
			func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {

				if _, ok := ba.GetArg(roachpb.Put); ok {
					count++
					if count == 1 {
						return nil, roachpb.NewErrorWithTxn(test.err, ba.Txn)
					}
				}
				return ba.CreateReply(), nil
			}, nil), dbCtx)
		err := db.Txn(context.TODO(), func(txn *Txn) error {
			return txn.Put("a", "b")
		})
		if test.retry {
			if count != 2 {
				t.Errorf("%d: expected one retry; got %d", i, count-1)
			}
			if err != nil {
				t.Errorf("%d: expected success on retry; got %s", i, err)
			}
		} else {
			if count != 1 {
				t.Errorf("%d: expected no retries; got %d", i, count)
			}
			if reflect.TypeOf(err) != reflect.TypeOf(test.err) {
				t.Errorf("%d: expected error of type %T; got %T", i, test.err, err)
			}
		}
	}
}
Example #3
0
// TestAbortTransactionOnCommitErrors verifies that transactions are
// aborted on the correct errors.
func TestAbortTransactionOnCommitErrors(t *testing.T) {
	defer leaktest.AfterTest(t)()

	testCases := []struct {
		err   error
		abort bool
	}{
		{roachpb.NewReadWithinUncertaintyIntervalError(roachpb.ZeroTimestamp, roachpb.ZeroTimestamp), true},
		{&roachpb.TransactionAbortedError{}, false},
		{&roachpb.TransactionPushError{}, true},
		{&roachpb.TransactionRetryError{}, true},
		{&roachpb.RangeNotFoundError{}, true},
		{&roachpb.RangeKeyMismatchError{}, true},
		{&roachpb.TransactionStatusError{}, true},
	}

	for _, test := range testCases {
		var commit, abort bool
		db := NewDB(newTestSender(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {

			switch t := ba.Requests[0].GetInner().(type) {
			case *roachpb.EndTransactionRequest:
				if t.Commit {
					commit = true
					return nil, roachpb.NewError(test.err)
				}
				abort = true
			}
			return ba.CreateReply(), nil
		}, nil))

		txn := NewTxn(context.Background(), *db)
		if pErr := txn.Put("a", "b"); pErr != nil {
			t.Fatalf("put failed: %s", pErr)
		}
		if pErr := txn.CommitOrCleanup(); pErr == nil {
			t.Fatalf("unexpected commit success")
		}

		if !commit {
			t.Errorf("%T: failed to find commit", test.err)
		}
		if test.abort && !abort {
			t.Errorf("%T: failed to find abort", test.err)
		} else if !test.abort && abort {
			t.Errorf("%T: found unexpected abort", test.err)
		}
	}
}
Example #4
0
func injectRetriableErrors(
	_ roachpb.StoreID, req roachpb.Request, hdr roachpb.Header,
	magicVals []string, restarts map[string]int) error {
	cput, ok := req.(*roachpb.ConditionalPutRequest)
	if !ok {
		return nil
	}
	for _, val := range magicVals {
		if restarts[val] < 2 && bytes.Contains(cput.Value.RawBytes, []byte(val)) {
			restarts[val]++
			return roachpb.NewReadWithinUncertaintyIntervalError(roachpb.ZeroTimestamp, roachpb.ZeroTimestamp)
		}
	}
	return nil
}
Example #5
0
func injectRetriableErrors(
	_ roachpb.StoreID, req roachpb.Request, hdr roachpb.Header,
	magicVals *filterVals) error {
	magicVals.Lock()
	defer magicVals.Unlock()
	cput, ok := req.(*roachpb.ConditionalPutRequest)
	if !ok {
		return nil
	}
	for _, val := range magicVals.vals {
		if magicVals.restartCounts[val] < 2 && bytes.Contains(cput.Value.RawBytes, []byte(val)) {
			magicVals.restartCounts[val]++
			return roachpb.NewReadWithinUncertaintyIntervalError(
				roachpb.ZeroTimestamp, roachpb.ZeroTimestamp)
		}
	}
	return nil
}
Example #6
0
// Verifies that an expired lease is released and a new lease is acquired on transaction
// restart.
//
// This test triggers the above scenario by making ReadWithinUncertaintyIntervalError advance
// the clock, so that the transaction timestamp exceeds the deadline of the EndTransactionRequest.
func TestReacquireLeaseOnRestart(t *testing.T) {
	defer leaktest.AfterTest(t)()

	var cmdFilters CommandFilters
	cmdFilters.AppendFilter(checkEndTransactionTrigger, true)

	var clockUpdate int32
	testKey := []byte("test_key")
	testingKnobs := &storage.StoreTestingKnobs{
		TestingCommandFilter: cmdFilters.runFilters,
		ClockBeforeSend: func(c *hlc.Clock, ba roachpb.BatchRequest) {
			if atomic.LoadInt32(&clockUpdate) > 0 {
				return
			}

			// Hack to advance the transaction timestamp on a transaction restart.
			for _, union := range ba.Requests {
				if req, ok := union.GetInner().(*roachpb.ScanRequest); ok {
					if bytes.Contains(req.Key, testKey) {
						atomic.AddInt32(&clockUpdate, 1)
						now := c.Now()
						now.WallTime += int64(5 * sql.LeaseDuration)
						c.Update(now)
						break
					}
				}
			}
		},
	}

	params, _ := createTestServerParams()
	params.Knobs.Store = testingKnobs
	s, sqlDB, _ := serverutils.StartServer(t, params)
	defer s.Stopper().Stop()

	var restartDone int32
	cleanupFilter := cmdFilters.AppendFilter(
		func(args storagebase.FilterArgs) *roachpb.Error {
			if atomic.LoadInt32(&restartDone) > 0 {
				return nil
			}

			if req, ok := args.Req.(*roachpb.ScanRequest); ok {
				if bytes.Contains(req.Key, testKey) {
					atomic.AddInt32(&restartDone, 1)
					// Return ReadWithinUncertaintyIntervalError to update the transaction timestamp on retry.
					txn := args.Hdr.Txn
					txn.ResetObservedTimestamps()
					now := s.Clock().Now()
					txn.UpdateObservedTimestamp(
						s.(*server.TestServer).Gossip().GetNodeID(), now)
					return roachpb.NewErrorWithTxn(roachpb.NewReadWithinUncertaintyIntervalError(now, now), txn)
				}
			}
			return nil
		}, false)
	defer cleanupFilter()

	// Use a large max offset to avoid rejecting a transaction whose timestanp is in
	// future (as we will advance the transaction timestamp with ReadWithinUncertaintyIntervalError).
	s.Clock().SetMaxOffset(sql.LeaseDuration * 10)

	sqlDB.SetMaxOpenConns(1)
	if _, err := sqlDB.Exec(`
CREATE DATABASE t;
CREATE TABLE t.test (k TEXT PRIMARY KEY, v TEXT);
INSERT INTO t.test (k, v) VALUES ('test_key', 'test_val');
`); err != nil {
		t.Fatal(err)
	}
	// Acquire the lease and enable the auto-retry. The first read attempt will trigger ReadWithinUncertaintyIntervalError
	// and advance the transaction timestamp. The transaction timestamp will exceed the lease expiration
	// time, and the second read attempt will re-acquire the lease.
	if _, err := sqlDB.Exec(`
SELECT * from t.test WHERE k = 'test_key';
`); err != nil {
		t.Fatal(err)
	}

	if u := atomic.LoadInt32(&clockUpdate); u != 1 {
		t.Errorf("expected exacltly one clock update, but got %d", u)
	}
	if u := atomic.LoadInt32(&restartDone); u != 1 {
		t.Errorf("expected exactly one restart, but got %d", u)
	}
}
// TestTxnCoordSenderTxnUpdatedOnError verifies that errors adjust the
// response transaction's timestamp and priority as appropriate.
func TestTxnCoordSenderTxnUpdatedOnError(t *testing.T) {
	defer leaktest.AfterTest(t)()
	origTS := makeTS(123, 0)
	plus10 := origTS.Add(10, 10)
	plus20 := plus10.Add(10, 0)
	testCases := []struct {
		pErr             *roachpb.Error
		expEpoch         uint32
		expPri           int32
		expTS, expOrigTS roachpb.Timestamp
		nodeSeen         bool
	}{
		{
			// No error, so nothing interesting either.
			pErr:      nil,
			expEpoch:  0,
			expPri:    1,
			expTS:     origTS,
			expOrigTS: origTS,
		},
		{
			// On uncertainty error, new epoch begins and node is seen.
			// Timestamp moves ahead of the existing write.
			pErr: func() *roachpb.Error {
				pErr := roachpb.NewErrorWithTxn(
					roachpb.NewReadWithinUncertaintyIntervalError(roachpb.ZeroTimestamp, roachpb.ZeroTimestamp),
					&roachpb.Transaction{})
				const nodeID = 1
				pErr.GetTxn().UpdateObservedTimestamp(nodeID, plus10)
				pErr.OriginNode = nodeID
				return pErr
			}(),
			expEpoch:  1,
			expPri:    1,
			expTS:     plus10,
			expOrigTS: plus10,
			nodeSeen:  true,
		},
		{
			// On abort, nothing changes but we get a new priority to use for
			// the next attempt.
			pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionAbortedError{},
				&roachpb.Transaction{
					TxnMeta: roachpb.TxnMeta{Timestamp: plus20, Priority: 10},
				}),
			expPri: 10,
		},
		{
			// On failed push, new epoch begins just past the pushed timestamp.
			// Additionally, priority ratchets up to just below the pusher's.
			pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionPushError{
				PusheeTxn: roachpb.Transaction{
					TxnMeta: roachpb.TxnMeta{Timestamp: plus10, Priority: int32(10)},
				},
			},
				&roachpb.Transaction{}),
			expEpoch:  1,
			expPri:    9,
			expTS:     plus10,
			expOrigTS: plus10,
		},
		{
			// On retry, restart with new epoch, timestamp and priority.
			pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionRetryError{},
				&roachpb.Transaction{
					TxnMeta: roachpb.TxnMeta{Timestamp: plus10, Priority: int32(10)},
				},
			),
			expEpoch:  1,
			expPri:    10,
			expTS:     plus10,
			expOrigTS: plus10,
		},
	}

	for i, test := range testCases {
		stopper := stop.NewStopper()

		manual := hlc.NewManualClock(origTS.WallTime)
		clock := hlc.NewClock(manual.UnixNano)
		clock.SetMaxOffset(20)

		ts := NewTxnCoordSender(senderFn(func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
			var reply *roachpb.BatchResponse
			if test.pErr == nil {
				reply = ba.CreateReply()
			}
			return reply, test.pErr
		}), clock, false, tracing.NewTracer(), stopper, NewTxnMetrics(metric.NewRegistry()))
		db := client.NewDB(ts)
		txn := client.NewTxn(context.Background(), *db)
		txn.InternalSetPriority(1)
		txn.Proto.Name = "test txn"
		key := roachpb.Key("test-key")
		_, err := txn.Get(key)
		teardownHeartbeats(ts)
		stopper.Stop()

		if test.pErr != nil && err == nil {
			t.Fatalf("expected an error")
		}
		if txn.Proto.Epoch != test.expEpoch {
			t.Errorf("%d: expected epoch = %d; got %d",
				i, test.expEpoch, txn.Proto.Epoch)
		}
		if txn.Proto.Priority != test.expPri {
			t.Errorf("%d: expected priority = %d; got %d",
				i, test.expPri, txn.Proto.Priority)
		}
		if !txn.Proto.Timestamp.Equal(test.expTS) {
			t.Errorf("%d: expected timestamp to be %s; got %s",
				i, test.expTS, txn.Proto.Timestamp)
		}
		if !txn.Proto.OrigTimestamp.Equal(test.expOrigTS) {
			t.Errorf("%d: expected orig timestamp to be %s; got %s",
				i, test.expOrigTS, txn.Proto.OrigTimestamp)
		}
		if ns := txn.Proto.ObservedTimestamps; (len(ns) != 0) != test.nodeSeen {
			t.Errorf("%d: expected nodeSeen=%t, but list of hosts is %v",
				i, test.nodeSeen, ns)
		}
	}
}
// TestPropagateTxnOnError verifies that DistSender.sendChunk properly
// propagates the txn data to a next iteration. Use txn.Writing field to
// verify that.
func TestPropagateTxnOnError(t *testing.T) {
	defer leaktest.AfterTest(t)()

	// Set up a filter to so that the first CPut operation will
	// get a ReadWithinUncertaintyIntervalError.
	targetKey := roachpb.Key("b")
	var numGets int32

	ctx := server.NewTestContext()
	ctx.TestingKnobs.StoreTestingKnobs.TestingCommandFilter =
		func(fArgs storageutils.FilterArgs) *roachpb.Error {
			_, ok := fArgs.Req.(*roachpb.ConditionalPutRequest)
			if ok && fArgs.Req.Header().Key.Equal(targetKey) {
				if atomic.AddInt32(&numGets, 1) == 1 {
					z := roachpb.ZeroTimestamp
					pErr := roachpb.NewReadWithinUncertaintyIntervalError(z, z)
					return roachpb.NewErrorWithTxn(pErr, fArgs.Hdr.Txn)
				}
			}
			return nil
		}
	s := server.StartTestServerWithContext(t, ctx)
	defer s.Stop()
	db := setupMultipleRanges(t, s, "b")

	// Set the initial value on the target key "b".
	origVal := "val"
	if pErr := db.Put(targetKey, origVal); pErr != nil {
		t.Fatal(pErr)
	}

	// The following txn creates a batch request that is split
	// into two requests: Put and CPut. The CPut operation will
	// get a ReadWithinUncertaintyIntervalError and the txn will be
	// retried.
	epoch := 0
	if pErr := db.Txn(func(txn *client.Txn) *roachpb.Error {
		epoch++
		if epoch >= 2 {
			// Writing must be true since we ran the BeginTransaction command.
			if !txn.Proto.Writing {
				t.Errorf("unexpected non-writing txn")
			}
		} else {
			// Writing must be false since we haven't run any write command.
			if txn.Proto.Writing {
				t.Errorf("unexpected writing txn")
			}
		}

		b := txn.NewBatch()
		b.Put("a", "val")
		b.CPut(targetKey, "new_val", origVal)
		pErr := txn.CommitInBatch(b)
		if epoch == 1 {
			if _, ok := pErr.GetDetail().(*roachpb.ReadWithinUncertaintyIntervalError); ok {
				if !pErr.GetTxn().Writing {
					t.Errorf("unexpected non-writing txn on error")
				}
			} else {
				t.Errorf("expected ReadWithinUncertaintyIntervalError, but got: %s", pErr)
			}
		}
		return pErr
	}); pErr != nil {
		t.Errorf("unexpected error on transactional Puts: %s", pErr)
	}

	if epoch != 2 {
		t.Errorf("unexpected epoch; the txn must be retried exactly once, but got %d", epoch)
	}
}
Example #9
0
// TestStoreRangeSplitRaceUninitializedRHS reproduces #7600 (before it was
// fixed). While splits are happening, we simulate incoming messages for the
// right-hand side to trigger a race between the creation of the proper replica
// and the uninitialized replica reacting to messages.
func TestStoreRangeSplitRaceUninitializedRHS(t *testing.T) {
	defer leaktest.AfterTest(t)()
	mtc := &multiTestContext{}
	storeCtx := storage.TestStoreContext()
	// An aggressive tick interval lets groups communicate more and thus
	// triggers test failures much more reliably. We can't go too aggressive
	// or race tests never make any progress.
	storeCtx.RaftTickInterval = 50 * time.Millisecond
	storeCtx.RaftElectionTimeoutTicks = 2
	currentTrigger := make(chan *roachpb.SplitTrigger)
	seen := make(map[storagebase.CmdIDKey]struct{})
	storeCtx.TestingKnobs.TestingCommandFilter = func(args storagebase.FilterArgs) *roachpb.Error {
		et, ok := args.Req.(*roachpb.EndTransactionRequest)
		if !ok || et.InternalCommitTrigger == nil {
			return nil
		}
		trigger := protoutil.Clone(et.InternalCommitTrigger.GetSplitTrigger()).(*roachpb.SplitTrigger)
		if trigger != nil && len(trigger.NewDesc.Replicas) == 2 && args.Hdr.Txn.Epoch == 0 && args.Sid == mtc.stores[0].StoreID() {
			if _, ok := seen[args.CmdID]; ok {
				return nil
			}
			// Without replay protection, a single reproposal locks up the
			// test.
			seen[args.CmdID] = struct{}{}
			currentTrigger <- trigger
			return roachpb.NewError(roachpb.NewReadWithinUncertaintyIntervalError(args.Hdr.Timestamp, args.Hdr.Timestamp))
		}
		return nil
	}

	mtc.storeContext = &storeCtx
	mtc.Start(t, 2)
	defer mtc.Stop()

	leftRange := mtc.stores[0].LookupReplica(roachpb.RKey("a"), nil)

	// We'll fake messages from term 1, ..., .magicIters-1. The exact number
	// doesn't matter for anything but for its likelihood of triggering the
	// race.
	const magicIters = 5

	// Replicate the left range onto the second node. We don't wait since we
	// don't actually care what the second node does. All we want is that the
	// first node isn't surprised by messages from that node.
	mtc.replicateRange(leftRange.RangeID, 1)

	for i := 0; i < 10; i++ {
		var wg sync.WaitGroup
		wg.Add(2)

		go func() {
			defer wg.Done()
			// Split the data range. The split keys are chosen so that they move
			// towards "a" (so that the range being split is always the first
			// range).
			splitKey := roachpb.Key(encoding.EncodeVarintDescending([]byte("a"), int64(i)))
			splitArgs := adminSplitArgs(keys.SystemMax, splitKey)
			if _, pErr := client.SendWrapped(mtc.distSenders[0], nil, &splitArgs); pErr != nil {
				t.Fatal(pErr)
			}
		}()
		go func() {
			defer wg.Done()

			trigger := <-currentTrigger // our own copy
			// Make sure the first node is first for convenience.
			replicas := trigger.NewDesc.Replicas
			if replicas[0].NodeID > replicas[1].NodeID {
				tmp := replicas[1]
				replicas[1] = replicas[0]
				replicas[0] = tmp
			}

			// Send a few vote requests which look like they're from the other
			// node's right hand side of the split. This triggers a race which
			// is discussed in #7600 (briefly, the creation of the right hand
			// side in the split trigger was racing with the uninitialized
			// version for the same group, resulting in clobbered HardState).
			for term := uint64(1); term < magicIters; term++ {
				if err := mtc.stores[0].HandleRaftMessage(&storage.RaftMessageRequest{
					RangeID:     trigger.NewDesc.RangeID,
					ToReplica:   replicas[0],
					FromReplica: replicas[1],
					Message: raftpb.Message{
						Type: raftpb.MsgVote,
						To:   uint64(replicas[0].ReplicaID),
						From: uint64(replicas[1].ReplicaID),
						Term: term,
					},
				}); err != nil {
					t.Error(err)
				}
			}
		}()
		wg.Wait()
	}
}