Exemple #1
0
// send runs the specified calls synchronously in a single batch and
// returns any errors. If the transaction is read-only or has already
// been successfully committed or aborted, a potential trailing
// EndTransaction call is silently dropped, allowing the caller to
// always commit or clean-up explicitly even when that may not be
// required (or even erroneous).
func (txn *Txn) send(calls ...proto.Call) *proto.Error {
	if txn.Proto.Status != proto.PENDING {
		return proto.NewError(util.Errorf("attempting to use %s transaction", txn.Proto.Status))
	}

	lastIndex := len(calls) - 1
	if lastIndex < 0 {
		return nil
	}

	lastReq := calls[lastIndex].Args
	// haveTxnWrite tracks intention to write. This is in contrast to
	// txn.Proto.Writing, which is set by the coordinator when the first
	// intent has been created, and which lives for the life of the
	// transaction.
	haveTxnWrite := proto.IsTransactionWrite(lastReq)

	for _, call := range calls[:lastIndex] {
		request := call.Args

		if req, ok := request.(*proto.EndTransactionRequest); ok {
			return proto.NewError(util.Errorf("%s sent as non-terminal call", req.Method()))
		}

		if !haveTxnWrite {
			haveTxnWrite = proto.IsTransactionWrite(request)
		}
	}

	endTxnRequest, haveEndTxn := lastReq.(*proto.EndTransactionRequest)
	needEndTxn := txn.Proto.Writing || haveTxnWrite
	elideEndTxn := haveEndTxn && !needEndTxn

	if elideEndTxn {
		calls = calls[:lastIndex]
	}

	pErr := txn.db.send(calls...)
	if elideEndTxn && pErr == nil {
		// This normally happens on the server and sent back in response
		// headers, but this transaction was optimized away. The caller may
		// still inspect the transaction struct, so we manually update it
		// here to emulate a true transaction.
		if endTxnRequest.Commit {
			txn.Proto.Status = proto.COMMITTED
		} else {
			txn.Proto.Status = proto.ABORTED
		}
	}
	return pErr
}
// updateForBatch updates the first argument (the header of a request contained
// in a batch) from the second one (the batch header), returning an error when
// inconsistencies are found.
// It is checked that the individual call does not have a User, UserPriority
// or Txn set that differs from the batch's.
func updateForBatch(args proto.Request, bHeader proto.RequestHeader) error {
	// Disallow transaction, user and priority on individual calls, unless
	// equal.
	aHeader := args.Header()
	if aHeader.User != "" && aHeader.User != bHeader.User {
		return util.Error("conflicting user on call in batch")
	}
	if aPrio := aHeader.GetUserPriority(); aPrio != proto.Default_RequestHeader_UserPriority && aPrio != bHeader.GetUserPriority() {
		return util.Error("conflicting user priority on call in batch")
	}
	aHeader.User = bHeader.User
	aHeader.UserPriority = bHeader.UserPriority
	// Only allow individual transactions on the requests of a batch if
	// - the batch is non-transactional,
	// - the individual transaction does not write intents, and
	// - the individual transaction is initialized.
	// The main usage of this is to allow mass-resolution of intents, which
	// entails sending a non-txn batch of transactional InternalResolveIntent.
	if aHeader.Txn != nil && !aHeader.Txn.Equal(bHeader.Txn) {
		if len(aHeader.Txn.ID) == 0 || proto.IsTransactionWrite(args) || bHeader.Txn != nil {
			return util.Error("conflicting transaction in transactional batch")
		}
	} else {
		aHeader.Txn = bHeader.Txn
	}
	return nil
}
Exemple #3
0
func (txn *Txn) updateStateForRequest(r proto.Request) {
	if !txn.haveTxnWrite {
		txn.haveTxnWrite = proto.IsTransactionWrite(r)
	} else if _, ok := r.(*proto.EndTransactionRequest); ok {
		txn.haveEndTxn = true
	}
}
// TestTxnMultipleCoord checks that a coordinator uses the Writing flag to
// enforce that only one coordinator can be used for transactional writes.
func TestTxnMultipleCoord(t *testing.T) {
	defer leaktest.AfterTest(t)
	s := createTestDB(t)
	defer s.Stop()

	for i, tc := range []struct {
		call    proto.Call
		writing bool
		ok      bool
	}{
		{proto.GetCall(proto.Key("a")), true, true},
		{proto.GetCall(proto.Key("a")), false, true},
		{proto.PutCall(proto.Key("a"), proto.Value{}), false, true},
		{proto.PutCall(proto.Key("a"), proto.Value{}), true, false},
	} {
		{
			txn := newTxn(s.Clock, proto.Key("a"))
			txn.Writing = tc.writing
			tc.call.Args.Header().Txn = txn
		}
		err := sendCall(s.Sender, tc.call)
		if err == nil != tc.ok {
			t.Errorf("%d: %T (writing=%t): success_expected=%t, but got: %v",
				i, tc.call.Args, tc.writing, tc.ok, err)
		}
		if err != nil {
			continue
		}

		txn := tc.call.Reply.Header().Txn
		// The transaction should come back rw if it started rw or if we just
		// wrote.
		isWrite := proto.IsTransactionWrite(tc.call.Args)
		if (tc.writing || isWrite) != txn.Writing {
			t.Errorf("%d: unexpected writing state: %s", i, txn)
		}
		if !isWrite {
			continue
		}
		// Abort for clean shutdown.
		etReply := &proto.EndTransactionResponse{}
		if err := sendCall(s.Sender, proto.Call{
			Args: &proto.EndTransactionRequest{
				RequestHeader: proto.RequestHeader{
					Key:       txn.Key,
					Timestamp: txn.Timestamp,
					Txn:       txn,
				},
				Commit: false,
			},
			Reply: etReply,
		}); err != nil {
			log.Warning(err)
			t.Fatal(err)
		}
	}
}
Exemple #5
0
func (txn *Txn) updateStateForRequest(r proto.Request) error {
	if !txn.haveTxnWrite {
		txn.haveTxnWrite = proto.IsTransactionWrite(r)
	}
	if _, ok := r.(*proto.EndTransactionRequest); ok {
		if txn.haveEndTxn {
			return errMultipleEndTxn
		}
		txn.haveEndTxn = true
	}
	return nil
}
// TestTxnMultipleCoord checks that a coordinator uses the Writing flag to
// enforce that only one coordinator can be used for transactional writes.
func TestTxnMultipleCoord(t *testing.T) {
	defer leaktest.AfterTest(t)
	s := createTestDB(t)
	defer s.Stop()

	for i, tc := range []struct {
		args    proto.Request
		writing bool
		ok      bool
	}{
		{proto.NewGet(proto.Key("a")), true, true},
		{proto.NewGet(proto.Key("a")), false, true},
		{proto.NewPut(proto.Key("a"), proto.Value{}), false, true},
		{proto.NewPut(proto.Key("a"), proto.Value{}), true, false},
	} {
		{
			txn := newTxn(s.Clock, proto.Key("a"))
			txn.Writing = tc.writing
			tc.args.Header().Txn = txn
		}
		reply, err := batchutil.SendWrapped(s.Sender, tc.args)
		if err == nil != tc.ok {
			t.Errorf("%d: %T (writing=%t): success_expected=%t, but got: %v",
				i, tc.args, tc.writing, tc.ok, err)
		}
		if err != nil {
			continue
		}

		txn := reply.Header().Txn
		// The transaction should come back rw if it started rw or if we just
		// wrote.
		isWrite := proto.IsTransactionWrite(tc.args)
		if (tc.writing || isWrite) != txn.Writing {
			t.Errorf("%d: unexpected writing state: %s", i, txn)
		}
		if !isWrite {
			continue
		}
		// Abort for clean shutdown.
		if _, err := batchutil.SendWrapped(s.Sender, &proto.EndTransactionRequest{
			RequestHeader: proto.RequestHeader{
				Timestamp: txn.Timestamp,
				Txn:       txn,
			},
			Commit: false,
		}); err != nil {
			t.Fatal(err)
		}
	}
}
Exemple #7
0
// send runs the specified calls synchronously in a single batch and
// returns any errors. If the transaction is read-only or has already
// been successfully committed or aborted, a potential trailing
// EndTransaction call is silently dropped, allowing the caller to
// always commit or clean-up explicitly even when that may not be
// required (or even erroneous).
func (txn *Txn) send(calls ...proto.Call) error {
	if txn.Proto.Status != proto.PENDING {
		return util.Errorf("attempting to use %s transaction", txn.Proto.Status)
	}

	lastIndex := len(calls) - 1
	if lastIndex < 0 {
		return nil
	}

	lastReq := calls[lastIndex].Args
	// haveTxnWrite tracks intention to write. This is in contrast to
	// txn.Proto.Writing, which is set by the coordinator when the first
	// intent has been created, and which lives for the life of the
	// transaction.
	haveTxnWrite := proto.IsTransactionWrite(lastReq)

	for _, call := range calls[:lastIndex] {
		request := call.Args

		if req, ok := request.(*proto.EndTransactionRequest); ok {
			return util.Errorf("%s sent as non-terminal call", req.Method())
		}

		if !haveTxnWrite {
			haveTxnWrite = proto.IsTransactionWrite(request)
		}
	}

	_, haveEndTxn := lastReq.(*proto.EndTransactionRequest)
	needEndTxn := txn.Proto.Writing || haveTxnWrite

	if haveEndTxn && !needEndTxn {
		calls = calls[:lastIndex]
	}

	return txn.db.send(calls...)
}
// sendOne sends a single call via the wrapped sender. If the call is
// part of a transaction, the TxnCoordSender adds the transaction to a
// map of active transactions and begins heartbeating it. Every
// subsequent call for the same transaction updates the lastUpdate
// timestamp to prevent live transactions from being considered
// abandoned and garbage collected. Read/write mutating requests have
// their key or key range added to the transaction's interval tree of
// key ranges for eventual cleanup via resolved write intents.
//
// On success, and if the call is part of a transaction, the affected
// key range is recorded as live intents for eventual cleanup upon
// transaction commit. Upon successful txn commit, initiates cleanup
// of intents.
func (tc *TxnCoordSender) sendOne(ctx context.Context, call proto.Call) {
	var startNS int64
	header := call.Args.Header()
	trace := tracer.FromCtx(ctx)
	var id string // optional transaction ID
	if header.Txn != nil {
		// If this call is part of a transaction...
		id = string(header.Txn.ID)
		// Verify that if this Transaction is not read-only, we have it on
		// file. If not, refuse writes - the client must have issued a write on
		// another coordinator previously.
		if header.Txn.Writing && proto.IsTransactionWrite(call.Args) {
			tc.Lock()
			_, ok := tc.txns[id]
			tc.Unlock()
			if !ok {
				call.Reply.Header().SetGoError(util.Errorf(
					"transaction must not write on multiple coordinators"))
				return
			}
		}

		// Set the timestamp to the original timestamp for read-only
		// commands and to the transaction timestamp for read/write
		// commands.
		if proto.IsReadOnly(call.Args) {
			header.Timestamp = header.Txn.OrigTimestamp
		} else {
			header.Timestamp = header.Txn.Timestamp
		}

		if args, ok := call.Args.(*proto.EndTransactionRequest); ok {
			// Remember when EndTransaction started in case we want to
			// be linearizable.
			startNS = tc.clock.PhysicalNow()
			// EndTransaction must have its key set to that of the txn.
			header.Key = header.Txn.Key
			if len(args.Intents) > 0 {
				// TODO(tschottdorf): it may be useful to allow this later.
				// That would be part of a possible plan to allow txns which
				// write on multiple coordinators.
				call.Reply.Header().SetGoError(util.Errorf(
					"client must not pass intents to EndTransaction"))
				return
			}
			tc.Lock()
			txnMeta, metaOK := tc.txns[id]
			if id != "" && metaOK {
				args.Intents = txnMeta.intents()
			}
			tc.Unlock()

			if !metaOK {
				// If we don't have the transaction, then this must be a retry
				// by the client. We can no longer reconstruct a correct
				// request so we must fail.
				//
				// TODO(bdarnell): if we had a GetTransactionStatus API then
				// we could lookup the transaction and return either nil or
				// TransactionAbortedError instead of this ambivalent error.
				call.Reply.Header().SetGoError(util.Errorf(
					"transaction is already committed or aborted"))
				return
			} else if len(args.Intents) == 0 {
				// If there aren't any intents, then there's factually no
				// transaction to end. Read-only txns have all of their state in
				// the client.
				call.Reply.Header().SetGoError(util.Errorf(
					"cannot commit a read-only transaction"))
				return
			}
		}
	}

	// Send the command through wrapped sender.
	tc.wrapped.Send(ctx, call)

	// For transactional calls, need to track & update the transaction.
	if header.Txn != nil {
		respHeader := call.Reply.Header()
		if respHeader.Txn == nil {
			// When empty, simply use the request's transaction.
			// This is expected: the Range doesn't bother copying unless the
			// object changes.
			respHeader.Txn = gogoproto.Clone(header.Txn).(*proto.Transaction)
		}
		tc.updateResponseTxn(header, respHeader)
	}

	if txn := call.Reply.Header().Txn; txn != nil {
		if !header.Txn.Equal(txn) {
			panic("transaction ID changed")
		}
		tc.Lock()
		txnMeta := tc.txns[id]
		// If this transactional command leaves transactional intents, add the key
		// or key range to the intents map. If the transaction metadata doesn't yet
		// exist, create it.
		if call.Reply.Header().GoError() == nil {
			if proto.IsTransactionWrite(call.Args) {
				if txnMeta == nil {
					txn.Writing = true
					trace.Event("coordinator spawns")
					txnMeta = &txnMetadata{
						txn:              *txn,
						keys:             cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}),
						firstUpdateNanos: tc.clock.PhysicalNow(),
						lastUpdateNanos:  tc.clock.PhysicalNow(),
						timeoutDuration:  tc.clientTimeout,
						txnEnd:           make(chan struct{}),
					}
					tc.txns[id] = txnMeta
					if !tc.stopper.RunAsyncTask(func() {
						tc.heartbeatLoop(id)
					}) {
						// The system is already draining and we can't start the
						// heartbeat. We refuse new transactions for now because
						// they're likely not going to have all intents committed.
						// In principle, we can relax this as needed though.
						call.Reply.Header().SetGoError(&proto.NodeUnavailableError{})
						tc.Unlock()
						tc.unregisterTxn(id)
						return
					}
				}
				txnMeta.addKeyRange(header.Key, header.EndKey)
			}
			// Update our record of this transaction.
			if txnMeta != nil {
				txnMeta.txn = *txn
				txnMeta.setLastUpdate(tc.clock.PhysicalNow())
			}
		}
		tc.Unlock()
	}

	// Cleanup intents and transaction map if end of transaction.
	switch t := call.Reply.Header().GoError().(type) {
	case *proto.TransactionStatusError:
		// Likely already committed or more obscure errors such as epoch or
		// timestamp regressions; consider it dead.
		tc.cleanupTxn(trace, t.Txn)
	case *proto.TransactionAbortedError:
		// If already aborted, cleanup the txn on this TxnCoordSender.
		tc.cleanupTxn(trace, t.Txn)
	case *proto.OpRequiresTxnError:
		// Run a one-off transaction with that single command.
		if log.V(1) {
			log.Infof("%s: auto-wrapping in txn and re-executing", call.Method())
		}
		// TODO(tschottdorf): this part is awkward. Consider resending here
		// without starting a new call, which is hard to trace. Plus, the
		// below depends on default configuration.
		tmpDB, err := client.Open(
			fmt.Sprintf("//%s?priority=%d",
				call.Args.Header().User, call.Args.Header().GetUserPriority()),
			client.SenderOpt(tc))
		if err != nil {
			log.Warning(err)
			return
		}
		call.Reply.Reset()
		if err := tmpDB.Txn(func(txn *client.Txn) error {
			txn.SetDebugName("auto-wrap", 0)
			b := &client.Batch{}
			b.InternalAddCall(call)
			return txn.CommitInBatch(b)
		}); err != nil {
			log.Warning(err)
		}
	case nil:
		if txn := call.Reply.Header().Txn; txn != nil {
			if _, ok := call.Args.(*proto.EndTransactionRequest); ok {
				// If the --linearizable flag is set, we want to make sure that
				// all the clocks in the system are past the commit timestamp
				// of the transaction. This is guaranteed if either
				// - the commit timestamp is MaxOffset behind startNS
				// - MaxOffset ns were spent in this function
				// when returning to the client. Below we choose the option
				// that involves less waiting, which is likely the first one
				// unless a transaction commits with an odd timestamp.
				if tsNS := txn.Timestamp.WallTime; startNS > tsNS {
					startNS = tsNS
				}
				sleepNS := tc.clock.MaxOffset() -
					time.Duration(tc.clock.PhysicalNow()-startNS)
				if tc.linearizable && sleepNS > 0 {
					defer func() {
						if log.V(1) {
							log.Infof("%v: waiting %s on EndTransaction for linearizability", txn.Short(), util.TruncateDuration(sleepNS, time.Millisecond))
						}
						time.Sleep(sleepNS)
					}()
				}
				if txn.Status != proto.PENDING {
					tc.cleanupTxn(trace, *txn)
				}

			}
		}
	}
}
// sendOne sends a single call via the wrapped sender. If the call is
// part of a transaction, the TxnCoordSender adds the transaction to a
// map of active transactions and begins heartbeating it. Every
// subsequent call for the same transaction updates the lastUpdate
// timestamp to prevent live transactions from being considered
// abandoned and garbage collected. Read/write mutating requests have
// their key or key range added to the transaction's interval tree of
// key ranges for eventual cleanup via resolved write intents.
//
// On success, and if the call is part of a transaction, the affected
// key range is recorded as live intents for eventual cleanup upon
// transaction commit. Upon successful txn commit, initiates cleanup
// of intents.
func (tc *TxnCoordSender) sendOne(call proto.Call) {
	var startNS int64
	header := call.Args.Header()
	// If this call is part of a transaction...
	if header.Txn != nil {
		// Set the timestamp to the original timestamp for read-only
		// commands and to the transaction timestamp for read/write
		// commands.
		if proto.IsReadOnly(call.Args) {
			header.Timestamp = header.Txn.OrigTimestamp
		} else {
			header.Timestamp = header.Txn.Timestamp
		}
		// EndTransaction must have its key set to that of the txn.
		if _, ok := call.Args.(*proto.EndTransactionRequest); ok {
			header.Key = header.Txn.Key
			// Remember when EndTransaction started in case we want to
			// be linearizable.
			startNS = tc.clock.PhysicalNow()
		}
	}

	// Send the command through wrapped sender.
	tc.wrapped.Send(context.TODO(), call)

	if header.Txn != nil {
		// If not already set, copy the request txn.
		if call.Reply.Header().Txn == nil {
			call.Reply.Header().Txn = gogoproto.Clone(header.Txn).(*proto.Transaction)
		}
		tc.updateResponseTxn(header, call.Reply.Header())
	}

	if txn := call.Reply.Header().Txn; txn != nil {
		tc.Lock()
		txnMeta := tc.txns[string(txn.ID)]
		// If this transactional command leaves transactional intents, add the key
		// or key range to the intents map. If the transaction metadata doesn't yet
		// exist, create it.
		if call.Reply.Header().GoError() == nil {
			if proto.IsTransactionWrite(call.Args) {
				if txnMeta == nil {
					txnMeta = &txnMetadata{
						txn:              *txn,
						keys:             cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}),
						firstUpdateNanos: tc.clock.PhysicalNow(),
						lastUpdateNanos:  tc.clock.PhysicalNow(),
						timeoutDuration:  tc.clientTimeout,
						txnEnd:           make(chan struct{}),
					}
					id := string(txn.ID)
					tc.txns[id] = txnMeta
					tc.heartbeat(id)
				}
				txnMeta.addKeyRange(header.Key, header.EndKey)
			}
			// Update our record of this transaction.
			if txnMeta != nil {
				txnMeta.txn = *txn
				txnMeta.setLastUpdate(tc.clock.PhysicalNow())
			}
		}
		tc.Unlock()
	}

	// Cleanup intents and transaction map if end of transaction.
	switch t := call.Reply.Header().GoError().(type) {
	case *proto.TransactionStatusError:
		// Likely already committed or more obscure errors such as epoch or
		// timestamp regressions; consider it dead.
		tc.cleanupTxn(t.Txn, nil)
	case *proto.TransactionAbortedError:
		// If already aborted, cleanup the txn on this TxnCoordSender.
		tc.cleanupTxn(t.Txn, nil)
	case *proto.OpRequiresTxnError:
		// Run a one-off transaction with that single command.
		if log.V(1) {
			log.Infof("%s: auto-wrapping in txn and re-executing", call.Method())
		}
		tmpDB, err := client.Open(
			fmt.Sprintf("//%s?priority=%d",
				call.Args.Header().User, call.Args.Header().GetUserPriority()),
			client.SenderOpt(tc))
		if err != nil {
			log.Warning(err)
			return
		}
		call.Reply.Reset()
		if err := tmpDB.Txn(func(txn *client.Txn) error {
			txn.SetDebugName("auto-wrap")
			b := &client.Batch{}
			b.InternalAddCall(call)
			return txn.Commit(b)
		}); err != nil {
			log.Warning(err)
		}
	case nil:
		var resolved []proto.Key
		if txn := call.Reply.Header().Txn; txn != nil {
			if _, ok := call.Args.(*proto.EndTransactionRequest); ok {
				// If the --linearizable flag is set, we want to make sure that
				// all the clocks in the system are past the commit timestamp
				// of the transaction. This is guaranteed if either
				// - the commit timestamp is MaxOffset behind startNS
				// - MaxOffset ns were spent in this function
				// when returning to the client. Below we choose the option
				// that involves less waiting, which is likely the first one
				// unless a transaction commits with an odd timestamp.
				if tsNS := txn.Timestamp.WallTime; startNS > tsNS {
					startNS = tsNS
				}
				sleepNS := tc.clock.MaxOffset() -
					time.Duration(tc.clock.PhysicalNow()-startNS)
				if tc.linearizable && sleepNS > 0 {
					defer func() {
						if log.V(1) {
							log.Infof("%v: waiting %s on EndTransaction for linearizability", txn.Short(), util.TruncateDuration(sleepNS, time.Millisecond))
						}
						time.Sleep(sleepNS)
					}()
				}
				resolved = call.Reply.(*proto.EndTransactionResponse).Resolved
				if txn.Status != proto.PENDING {
					tc.cleanupTxn(*txn, resolved)
				}

			}
		}
	}
}