Example #1
0
// TestTxnCoordSenderErrorWithIntent validates that if a transactional request
// returns an error but also indicates a Writing transaction, the coordinator
// tracks it just like a successful request.
func TestTxnCoordSenderErrorWithIntent(t *testing.T) {
	defer leaktest.AfterTest(t)
	stopper := stop.NewStopper()
	manual := hlc.NewManualClock(0)
	clock := hlc.NewClock(manual.UnixNano)
	clock.SetMaxOffset(20)

	ts := NewTxnCoordSender(senderFn(func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
		txn := ba.Txn.Clone()
		txn.Writing = true
		pErr := roachpb.NewError(roachpb.NewTransactionRetryError())
		pErr.SetTxn(txn)
		return nil, pErr
	}), clock, false, nil, stopper)
	defer stopper.Stop()

	var ba roachpb.BatchRequest
	key := roachpb.Key("test")
	ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}})
	ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}})
	ba.Add(&roachpb.EndTransactionRequest{})
	ba.Txn = &roachpb.Transaction{Name: "test"}
	if _, pErr := ts.Send(context.Background(), ba); !testutils.IsPError(pErr, "retry txn") {
		t.Fatalf("unexpected error: %v", pErr)
	}

	defer teardownHeartbeats(ts)
	ts.Lock()
	defer ts.Unlock()
	if len(ts.txns) != 1 {
		t.Fatalf("expected transaction to be tracked")
	}
}
Example #2
0
// TestEndWriteRestartReadOnlyTransaction verifies that if
// a transaction writes, then restarts and turns read-only,
// an explicit EndTransaction call is still sent if retry-
// able didn't, regardless of whether there is an error
// or not.
func TestEndWriteRestartReadOnlyTransaction(t *testing.T) {
	defer leaktest.AfterTest(t)()
	for _, success := range []bool{true, false} {
		expCalls := []roachpb.Method{roachpb.BeginTransaction, roachpb.Put, roachpb.EndTransaction}
		var calls []roachpb.Method
		db := NewDB(newTestSender(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
			calls = append(calls, ba.Methods()...)
			return ba.CreateReply(), nil
		}, nil))
		ok := false
		if err := db.Txn(context.TODO(), func(txn *Txn) error {
			if !ok {
				if err := txn.Put("consider", "phlebas"); err != nil {
					t.Fatal(err)
				}
				ok = true
				// Return an immediate txn retry error. We need to go through the pErr
				// and back to get a RetryableTxnError.
				return roachpb.NewErrorWithTxn(roachpb.NewTransactionRetryError(), &txn.Proto).GoError()
			}
			if !success {
				return errors.New("aborting on purpose")
			}
			return nil
		}); err == nil != success {
			t.Errorf("expected error: %t, got error: %v", !success, err)
		}
		if !reflect.DeepEqual(expCalls, calls) {
			t.Fatalf("expected %v, got %v", expCalls, calls)
		}
	}
}
Example #3
0
func injectErrors(
	req roachpb.Request,
	hdr roachpb.Header,
	magicVals *filterVals,
) error {
	magicVals.Lock()
	defer magicVals.Unlock()

	switch req := req.(type) {
	case *roachpb.ConditionalPutRequest:
		for key, count := range magicVals.restartCounts {
			checkCorrectTxn(string(req.Value.RawBytes), magicVals, hdr.Txn)
			if count > 0 && bytes.Contains(req.Value.RawBytes, []byte(key)) {
				magicVals.restartCounts[key]--
				err := roachpb.NewReadWithinUncertaintyIntervalError(
					hlc.ZeroTimestamp, hlc.ZeroTimestamp)
				magicVals.failedValues[string(req.Value.RawBytes)] =
					failureRecord{err, hdr.Txn}
				return err
			}
		}
		for key, count := range magicVals.abortCounts {
			checkCorrectTxn(string(req.Value.RawBytes), magicVals, hdr.Txn)
			if count > 0 && bytes.Contains(req.Value.RawBytes, []byte(key)) {
				magicVals.abortCounts[key]--
				err := roachpb.NewTransactionAbortedError()
				magicVals.failedValues[string(req.Value.RawBytes)] =
					failureRecord{err, hdr.Txn}
				return err
			}
		}
		// If we're writing a value that's marked for an EndTransaction failure,
		// keep track of the txn id so we can fail it later on.
		for key, count := range magicVals.endTxnRestartCounts {
			if count > 0 && bytes.Contains(req.Value.RawBytes, []byte(key)) {
				txnID := *hdr.Txn.TxnMeta.ID
				if _, found := magicVals.txnsToFail[txnID]; found {
					continue
				}
				magicVals.endTxnRestartCounts[key]--
				magicVals.txnsToFail[txnID] = true
			}
		}
		return nil
	case *roachpb.EndTransactionRequest:
		txnID := *hdr.Txn.TxnMeta.ID
		if !magicVals.txnsToFail[txnID] {
			return nil
		}
		delete(magicVals.txnsToFail, txnID)
		// Note that we can't return TransactionAborted errors, although those are
		// more representative for the errors that EndTransaction might encounter,
		// because returning those would result in the txn's intents being left
		// around.
		return roachpb.NewTransactionRetryError()
	default:
		return nil
	}
}
// TestTxnCoordSenderErrorWithIntent validates that if a transactional request
// returns an error but also indicates a Writing transaction, the coordinator
// tracks it just like a successful request.
func TestTxnCoordSenderErrorWithIntent(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper := stop.NewStopper()
	defer stopper.Stop()
	manual := hlc.NewManualClock(0)
	clock := hlc.NewClock(manual.UnixNano)
	clock.SetMaxOffset(20)

	testCases := []struct {
		roachpb.Error
		errMsg string
	}{
		{*roachpb.NewError(roachpb.NewTransactionRetryError()), "retry txn"},
		{*roachpb.NewError(roachpb.NewTransactionPushError(roachpb.Transaction{
			TxnMeta: enginepb.TxnMeta{
				ID: uuid.NewV4(),
			}})), "failed to push"},
		{*roachpb.NewErrorf("testError"), "testError"},
	}
	for i, test := range testCases {
		func() {
			senderFunc := func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
				txn := ba.Txn.Clone()
				txn.Writing = true
				pErr := &roachpb.Error{}
				*pErr = test.Error
				pErr.SetTxn(&txn)
				return nil, pErr
			}
			ctx := tracing.WithTracer(context.Background(), tracing.NewTracer())
			ts := NewTxnCoordSender(ctx, senderFn(senderFunc), clock, false, stopper, MakeTxnMetrics())

			var ba roachpb.BatchRequest
			key := roachpb.Key("test")
			ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}})
			ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}})
			ba.Add(&roachpb.EndTransactionRequest{})
			ba.Txn = &roachpb.Transaction{Name: "test"}
			_, pErr := ts.Send(context.Background(), ba)
			if !testutils.IsPError(pErr, test.errMsg) {
				t.Errorf("%d: error did not match %s: %v", i, test.errMsg, pErr)
			}

			defer teardownHeartbeats(ts)
			ts.Lock()
			defer ts.Unlock()
			if len(ts.txns) != 1 {
				t.Errorf("%d: expected transaction to be tracked", i)
			}
		}()
	}
}
Example #5
0
// TestTransactionKeyNotChangedInRestart verifies that if the transaction already has a key (we're
// in a restart), the key in the begin transaction request is not changed.
func TestTransactionKeyNotChangedInRestart(t *testing.T) {
	defer leaktest.AfterTest(t)()
	tries := 0
	db := NewDB(newTestSender(nil, func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
		var bt *roachpb.BeginTransactionRequest
		if args, ok := ba.GetArg(roachpb.BeginTransaction); ok {
			bt = args.(*roachpb.BeginTransactionRequest)
		} else {
			t.Fatal("failed to find a begin transaction request")
		}

		// In the first try, the transaction key is the key of the first write command. Before the
		// second try, the transaction key is set to txnKey by the test sender. In the second try, the
		// transaction key is txnKey.
		var expectedKey roachpb.Key
		if tries == 1 {
			expectedKey = testKey
		} else {
			expectedKey = txnKey
		}
		if !bt.Key.Equal(expectedKey) {
			t.Fatalf("expected transaction key %v, got %v", expectedKey, bt.Key)
		}

		return ba.CreateReply(), nil
	}))

	if err := db.Txn(context.TODO(), func(txn *Txn) error {
		tries++
		b := txn.NewBatch()
		b.Put("a", "b")
		if err := txn.Run(b); err != nil {
			t.Fatal(err)
		}
		if tries == 1 {
			return roachpb.NewErrorWithTxn(roachpb.NewTransactionRetryError(), &txn.Proto).GoError()
		}
		return nil
	}); err != nil {
		t.Errorf("unexpected error on commit: %s", err)
	}
	minimumTries := 2
	if tries < minimumTries {
		t.Errorf("expected try count >= %d, got %d", minimumTries, tries)
	}
}
Example #6
0
// Test that a TransactionRetryError will retry the read until it succeeds. The
// test is designed so that if the proto timestamps are bumped during retry
// a failure will occur.
func TestAsOfRetry(t *testing.T) {
	defer leaktest.AfterTest(t)()

	params, cmdFilters := createTestServerParams()
	// Disable one phase commits because they cannot be restarted.
	params.Knobs.Store.(*storage.StoreTestingKnobs).DisableOnePhaseCommits = true
	s, sqlDB, _ := serverutils.StartServer(t, params)
	defer s.Stopper().Stop()

	const val1 = 1
	const val2 = 2
	const name = "boulanger"

	if _, err := sqlDB.Exec(fmt.Sprintf(`
			CREATE DATABASE d;
			CREATE TABLE d.t (s STRING PRIMARY KEY, a INT);
			INSERT INTO d.t (s, a) VALUES ('%v', %v);
		`, name, val1)); err != nil {
		t.Fatal(err)
	}

	var walltime int64
	if err := sqlDB.QueryRow("UPDATE d.t SET a = $1 RETURNING cluster_logical_timestamp()::int", val2).Scan(&walltime); err != nil {
		t.Fatal(err)
	}
	tsVal2 := time.Unix(0, walltime).Format(time.RFC3339Nano)
	tsVal1 := time.Unix(0, walltime-1).Format(time.RFC3339Nano)

	// Set up error injection that causes retries.
	magicVals := createFilterVals(nil, nil)
	magicVals.restartCounts = map[string]int{
		name: 5,
	}
	cleanupFilter := cmdFilters.AppendFilter(
		func(args storagebase.FilterArgs) *roachpb.Error {
			magicVals.Lock()
			defer magicVals.Unlock()

			switch req := args.Req.(type) {
			case *roachpb.ScanRequest:
				for key, count := range magicVals.restartCounts {
					checkCorrectTxn(string(req.Key), magicVals, args.Hdr.Txn)
					if count > 0 && bytes.Contains(req.Key, []byte(key)) {
						magicVals.restartCounts[key]--
						err := roachpb.NewTransactionRetryError()
						magicVals.failedValues[string(req.Key)] =
							failureRecord{err, args.Hdr.Txn}
						txn := args.Hdr.Txn.Clone()
						txn.Timestamp = txn.Timestamp.Add(0, 1)
						return roachpb.NewErrorWithTxn(err, &txn)
					}
				}
			}
			return nil
		}, false)

	var i int
	// Query with tsVal1 which should return the first value. Since tsVal1 is just
	// one nanosecond before tsVal2, any proto timestamp bumping will return val2
	// and error.
	// Must specify the WHERE here to trigger the injection errors.
	if err := sqlDB.QueryRow(fmt.Sprintf("SELECT a FROM d.t AS OF SYSTEM TIME '%s' WHERE s = '%s'", tsVal1, name)).Scan(&i); err != nil {
		t.Fatal(err)
	} else if i != val1 {
		t.Fatalf("unexpected val: %v", i)
	}

	cleanupFilter()
	// Verify that the retry errors were injected.
	checkRestarts(t, magicVals)

	// Query with tsVal2 to ensure val2 is indeed present.
	if err := sqlDB.QueryRow(fmt.Sprintf("SELECT a FROM d.t AS OF SYSTEM TIME '%s'", tsVal2)).Scan(&i); err != nil {
		t.Fatal(err)
	} else if i != val2 {
		t.Fatalf("unexpected val: %v", i)
	}
}
Example #7
0
// updateState updates the transaction state in both the success and
// error cases, applying those updates to the corresponding txnMeta
// object when adequate. It also updates certain errors with the
// updated transaction for use by client restarts.
func (tc *TxnCoordSender) updateState(ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error) *roachpb.Error {
	sp, cleanupSp := tracing.SpanFromContext(opTxnCoordSender, tc.tracer, ctx)
	defer cleanupSp()

	newTxn := &roachpb.Transaction{}
	newTxn.Update(ba.Txn)
	if pErr == nil {
		newTxn.Update(br.Txn)
	} else {
		newTxn.Update(pErr.GetTxn())
	}

	// If the request was successful but we're in a transaction which needs to
	// restart but doesn't know it yet, let it restart now (as opposed to
	// waiting until EndTransaction).
	if pErr == nil && newTxn.Isolation == roachpb.SERIALIZABLE &&
		!newTxn.OrigTimestamp.Equal(newTxn.Timestamp) {
		pErr = roachpb.NewErrorWithTxn(roachpb.NewTransactionRetryError(), br.Txn)
		br = nil
	}

	switch t := pErr.GetDetail().(type) {
	case *roachpb.TransactionStatusError:
		// Likely already committed or more obscure errors such as epoch or
		// timestamp regressions; consider txn dead.
		defer tc.cleanupTxn(sp, *pErr.GetTxn())
	case *roachpb.OpRequiresTxnError:
		panic("OpRequiresTxnError must not happen at this level")
	case *roachpb.ReadWithinUncertaintyIntervalError:
		// If the reader encountered a newer write within the uncertainty
		// interval, we advance the txn's timestamp just past the last observed
		// timestamp from the node.
		restartTS, ok := newTxn.GetObservedTimestamp(pErr.OriginNode)
		if !ok {
			pErr = roachpb.NewError(util.Errorf("no observed timestamp for node %d found on uncertainty restart", pErr.OriginNode))
		} else {
			newTxn.Timestamp.Forward(restartTS)
			newTxn.Restart(ba.UserPriority, newTxn.Priority, newTxn.Timestamp)
		}
	case *roachpb.TransactionAbortedError:
		// Increase timestamp if applicable.
		newTxn.Timestamp.Forward(pErr.GetTxn().Timestamp)
		newTxn.Priority = pErr.GetTxn().Priority
		// Clean up the freshly aborted transaction in defer(), avoiding a
		// race with the state update below.
		defer tc.cleanupTxn(sp, *newTxn)
	case *roachpb.TransactionPushError:
		// Increase timestamp if applicable, ensuring that we're
		// just ahead of the pushee.
		newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp.Add(0, 1))
		newTxn.Restart(ba.UserPriority, t.PusheeTxn.Priority-1, newTxn.Timestamp)
	case *roachpb.TransactionRetryError:
		newTxn.Restart(ba.UserPriority, pErr.GetTxn().Priority, newTxn.Timestamp)
	case nil:
		// Nothing to do here, avoid the default case.
	default:
		if pErr.GetTxn() != nil {
			if pErr.CanRetry() {
				panic("Retryable internal error must not happen at this level")
			} else {
				// Do not clean up the transaction here since the client might still
				// want to continue the transaction. For example, a client might
				// continue its transaction after receiving ConditionFailedError, which
				// can come from a unique index violation.
			}
		}
	}

	if pErr != nil && pErr.GetTxn() != nil {
		// Avoid changing existing errors because sometimes they escape into
		// goroutines and then there are races. Fairly sure there isn't one
		// here, but better safe than sorry.
		pErrShallow := *pErr
		pErrShallow.SetTxn(newTxn)
		pErr = &pErrShallow
	}

	if newTxn.ID == nil {
		return pErr
	}
	txnID := *newTxn.ID
	tc.Lock()
	defer tc.Unlock()
	txnMeta := tc.txns[txnID]
	// For successful transactional requests, keep the written intents and
	// the updated transaction record to be sent along with the reply.
	// The transaction metadata is created with the first writing operation.
	// A tricky edge case is that of a transaction which "fails" on the
	// first writing request, but actually manages to write some intents
	// (for example, due to being multi-range). In this case, there will
	// be an error, but the transaction will be marked as Writing and the
	// coordinator must track the state, for the client's retry will be
	// performed with a Writing transaction which the coordinator rejects
	// unless it is tracking it (on top of it making sense to track it;
	// after all, it **has** laid down intents and only the coordinator
	// can augment a potential EndTransaction call). See #3303.
	var intents []roachpb.Span
	// TODO(nvanbenschoten): Iterating here to put the intents in a slice for
	// the sole purpose of later iterating again and calling addKeyRange is a
	// little wasteful and can likely be avoided.
	ba.IntentSpanIterate(func(key, endKey roachpb.Key) {
		intents = append(intents, roachpb.Span{Key: key, EndKey: endKey})
	})

	if len(intents) > 0 && (pErr == nil || newTxn.Writing) {
		if txnMeta == nil {
			if !newTxn.Writing {
				panic("txn with intents marked as non-writing")
			}
			// If the transaction is already over, there's no point in
			// launching a one-off coordinator which will shut down right
			// away. If we ended up here with an error, we'll always start
			// the coordinator - the transaction has laid down intents, so
			// we expect it to be committed/aborted at some point in the
			// future.
			if _, isEnding := ba.GetArg(roachpb.EndTransaction); pErr != nil || !isEnding {
				sp.LogEvent("coordinator spawns")
				txnMeta = &txnMetadata{
					txn:              *newTxn,
					keys:             interval.NewRangeTree(),
					firstUpdateNanos: tc.clock.PhysicalNow(),
					lastUpdateNanos:  tc.clock.PhysicalNow(),
					timeoutDuration:  tc.clientTimeout,
					txnEnd:           make(chan struct{}),
				}
				tc.txns[txnID] = txnMeta

				if !tc.stopper.RunAsyncTask(func() {
					tc.heartbeatLoop(txnID)
				}) {
					// The system is already draining and we can't start the
					// heartbeat. We refuse new transactions for now because
					// they're likely not going to have all intents committed.
					// In principle, we can relax this as needed though.
					tc.unregisterTxnLocked(txnID)
					return roachpb.NewError(&roachpb.NodeUnavailableError{})
				}
			}
		}
	}
	// Update our record of this transaction, even on error.
	if txnMeta != nil {
		txnMeta.txn = *newTxn
		if !txnMeta.txn.Writing {
			panic("tracking a non-writing txn")
		}
		txnMeta.setLastUpdate(tc.clock.PhysicalNow())
		// Adding the intents even on error reduces the likelihood of dangling
		// intents blocking concurrent writers for extended periods of time.
		// See #3346.
		for _, intent := range intents {
			addKeyRange(txnMeta.keys, intent.Key, intent.EndKey)
		}
	}
	if pErr == nil {
		// For successful transactional requests, always send the updated txn
		// record back.
		br.Txn = newTxn
	}
	return pErr
}
Example #8
0
// updateState updates the transaction state in both the success and
// error cases, applying those updates to the corresponding txnMeta
// object when adequate. It also updates certain errors with the
// updated transaction for use by client restarts.
func (tc *TxnCoordSender) updateState(ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error) *roachpb.Error {
	sp := tracing.SpanFromContext(ctx)
	newTxn := &roachpb.Transaction{}
	newTxn.Update(ba.Txn)
	// If the request was successful but we're in a transaction which needs to
	// restart but doesn't know it yet, let it restart now (as opposed to
	// waiting until EndTransaction).
	if pErr == nil && br.Txn != nil && br.Txn.Isolation == roachpb.SERIALIZABLE &&
		!br.Txn.OrigTimestamp.Equal(br.Txn.Timestamp) {
		pErr = roachpb.NewErrorWithTxn(roachpb.NewTransactionRetryError(), br.Txn)
		br = nil
	}

	// TODO(bdarnell): We're writing to errors here (and where using ErrorWithIndex);
	// since there's no concept of ownership copy-on-write is always preferable.
	switch t := pErr.GetDetail().(type) {
	case nil:
		newTxn.Update(br.Txn)
		// Move txn timestamp forward to response timestamp if applicable.
		// TODO(tschottdorf): see (*Replica).executeBatch and comments within.
		// Looks like this isn't necessary any more, nor did it prevent a bug
		// referenced in a TODO there.
		newTxn.Timestamp.Forward(br.Timestamp)
	case *roachpb.TransactionStatusError:
		// Likely already committed or more obscure errors such as epoch or
		// timestamp regressions; consider txn dead.
		pErrTxn := pErr.GetTxn().Clone()
		defer tc.cleanupTxn(sp, pErrTxn)
	case *roachpb.OpRequiresTxnError:
		panic("OpRequiresTxnError must not happen at this level")
	case *roachpb.ReadWithinUncertaintyIntervalError:
		// Mark the host as certain. See the protobuf comment for
		// Transaction.CertainNodes for details.
		if t.NodeID == 0 {
			panic("no replica set in header on uncertainty restart")
		}
		newTxn.Update(pErr.GetTxn())
		newTxn.CertainNodes.Add(t.NodeID)
		// If the reader encountered a newer write within the uncertainty
		// interval, move the timestamp forward, just past that write or
		// up to MaxTimestamp, whichever comes first.
		candidateTS := newTxn.MaxTimestamp
		candidateTS.Backward(t.ExistingTimestamp.Add(0, 1))
		newTxn.Timestamp.Forward(candidateTS)
		newTxn.Restart(ba.UserPriority, newTxn.Priority, newTxn.Timestamp)
		pErr.SetTxn(newTxn)
	case *roachpb.TransactionAbortedError:
		newTxn.Update(pErr.GetTxn())
		// Increase timestamp if applicable.
		newTxn.Timestamp.Forward(pErr.GetTxn().Timestamp)
		newTxn.Priority = pErr.GetTxn().Priority
		pErr.SetTxn(newTxn)
		// Clean up the freshly aborted transaction in defer(), avoiding a
		// race with the state update below.
		defer tc.cleanupTxn(sp, *pErr.GetTxn())
	case *roachpb.TransactionPushError:
		newTxn.Update(pErr.GetTxn())
		// Increase timestamp if applicable, ensuring that we're
		// just ahead of the pushee.
		newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp.Add(0, 1))
		newTxn.Restart(ba.UserPriority, t.PusheeTxn.Priority-1, newTxn.Timestamp)
		pErr.SetTxn(newTxn)
	case *roachpb.TransactionRetryError:
		newTxn.Update(pErr.GetTxn())
		newTxn.Restart(ba.UserPriority, pErr.GetTxn().Priority, newTxn.Timestamp)
		pErr.SetTxn(newTxn)
	}

	if newTxn.ID == nil {
		return pErr
	}
	txnID := *newTxn.ID
	tc.Lock()
	defer tc.Unlock()
	txnMeta := tc.txns[txnID]
	// For successful transactional requests, keep the written intents and
	// the updated transaction record to be sent along with the reply.
	// The transaction metadata is created with the first writing operation.
	// A tricky edge case is that of a transaction which "fails" on the
	// first writing request, but actually manages to write some intents
	// (for example, due to being multi-range). In this case, there will
	// be an error, but the transaction will be marked as Writing and the
	// coordinator must track the state, for the client's retry will be
	// performed with a Writing transaction which the coordinator rejects
	// unless it is tracking it (on top of it making sense to track it;
	// after all, it **has** laid down intents and only the coordinator
	// can augment a potential EndTransaction call). See #3303.
	intents := ba.GetIntentSpans()
	if len(intents) > 0 && (pErr == nil || newTxn.Writing) {
		if txnMeta == nil {
			if !newTxn.Writing {
				panic("txn with intents marked as non-writing")
			}
			// If the transaction is already over, there's no point in
			// launching a one-off coordinator which will shut down right
			// away. If we ended up here with an error, we'll always start
			// the coordinator - the transaction has laid down intents, so
			// we expect it to be committed/aborted at some point in the
			// future.
			if _, isEnding := ba.GetArg(roachpb.EndTransaction); pErr != nil || !isEnding {
				sp.LogEvent("coordinator spawns")
				txnMeta = &txnMetadata{
					txn:              *newTxn,
					keys:             cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}),
					firstUpdateNanos: tc.clock.PhysicalNow(),
					lastUpdateNanos:  tc.clock.PhysicalNow(),
					timeoutDuration:  tc.clientTimeout,
					txnEnd:           make(chan struct{}),
				}
				tc.txns[txnID] = txnMeta

				if !tc.stopper.RunAsyncTask(func() {
					tc.heartbeatLoop(txnID)
				}) {
					// The system is already draining and we can't start the
					// heartbeat. We refuse new transactions for now because
					// they're likely not going to have all intents committed.
					// In principle, we can relax this as needed though.
					tc.unregisterTxnLocked(txnID)
					return roachpb.NewError(&roachpb.NodeUnavailableError{})
				}
			}
		}
	}
	// Update our record of this transaction, even on error.
	if txnMeta != nil {
		txnMeta.txn = *newTxn
		if !txnMeta.txn.Writing {
			panic("tracking a non-writing txn")
		}
		txnMeta.setLastUpdate(tc.clock.PhysicalNow())
		// Adding the intents even on error reduces the likelihood of dangling
		// intents blocking concurrent writers for extended periods of time.
		// See #3346.
		for _, intent := range intents {
			txnMeta.addKeyRange(intent.Key, intent.EndKey)
		}
	}
	if pErr == nil {
		// For successful transactional requests, always send the updated txn
		// record back.
		br.Txn = newTxn
	}
	return pErr
}