// send runs the specified calls synchronously in a single batch and // returns any errors. If the transaction is read-only or has already // been successfully committed or aborted, a potential trailing // EndTransaction call is silently dropped, allowing the caller to // always commit or clean-up explicitly even when that may not be // required (or even erroneous). func (txn *Txn) send(calls ...proto.Call) *proto.Error { if txn.Proto.Status != proto.PENDING { return proto.NewError(util.Errorf("attempting to use %s transaction", txn.Proto.Status)) } lastIndex := len(calls) - 1 if lastIndex < 0 { return nil } lastReq := calls[lastIndex].Args // haveTxnWrite tracks intention to write. This is in contrast to // txn.Proto.Writing, which is set by the coordinator when the first // intent has been created, and which lives for the life of the // transaction. haveTxnWrite := proto.IsTransactionWrite(lastReq) for _, call := range calls[:lastIndex] { request := call.Args if req, ok := request.(*proto.EndTransactionRequest); ok { return proto.NewError(util.Errorf("%s sent as non-terminal call", req.Method())) } if !haveTxnWrite { haveTxnWrite = proto.IsTransactionWrite(request) } } endTxnRequest, haveEndTxn := lastReq.(*proto.EndTransactionRequest) needEndTxn := txn.Proto.Writing || haveTxnWrite elideEndTxn := haveEndTxn && !needEndTxn if elideEndTxn { calls = calls[:lastIndex] } pErr := txn.db.send(calls...) if elideEndTxn && pErr == nil { // This normally happens on the server and sent back in response // headers, but this transaction was optimized away. The caller may // still inspect the transaction struct, so we manually update it // here to emulate a true transaction. if endTxnRequest.Commit { txn.Proto.Status = proto.COMMITTED } else { txn.Proto.Status = proto.ABORTED } } return pErr }
// updateForBatch updates the first argument (the header of a request contained // in a batch) from the second one (the batch header), returning an error when // inconsistencies are found. // It is checked that the individual call does not have a User, UserPriority // or Txn set that differs from the batch's. func updateForBatch(args proto.Request, bHeader proto.RequestHeader) error { // Disallow transaction, user and priority on individual calls, unless // equal. aHeader := args.Header() if aHeader.User != "" && aHeader.User != bHeader.User { return util.Error("conflicting user on call in batch") } if aPrio := aHeader.GetUserPriority(); aPrio != proto.Default_RequestHeader_UserPriority && aPrio != bHeader.GetUserPriority() { return util.Error("conflicting user priority on call in batch") } aHeader.User = bHeader.User aHeader.UserPriority = bHeader.UserPriority // Only allow individual transactions on the requests of a batch if // - the batch is non-transactional, // - the individual transaction does not write intents, and // - the individual transaction is initialized. // The main usage of this is to allow mass-resolution of intents, which // entails sending a non-txn batch of transactional InternalResolveIntent. if aHeader.Txn != nil && !aHeader.Txn.Equal(bHeader.Txn) { if len(aHeader.Txn.ID) == 0 || proto.IsTransactionWrite(args) || bHeader.Txn != nil { return util.Error("conflicting transaction in transactional batch") } } else { aHeader.Txn = bHeader.Txn } return nil }
func (txn *Txn) updateStateForRequest(r proto.Request) { if !txn.haveTxnWrite { txn.haveTxnWrite = proto.IsTransactionWrite(r) } else if _, ok := r.(*proto.EndTransactionRequest); ok { txn.haveEndTxn = true } }
// TestTxnMultipleCoord checks that a coordinator uses the Writing flag to // enforce that only one coordinator can be used for transactional writes. func TestTxnMultipleCoord(t *testing.T) { defer leaktest.AfterTest(t) s := createTestDB(t) defer s.Stop() for i, tc := range []struct { call proto.Call writing bool ok bool }{ {proto.GetCall(proto.Key("a")), true, true}, {proto.GetCall(proto.Key("a")), false, true}, {proto.PutCall(proto.Key("a"), proto.Value{}), false, true}, {proto.PutCall(proto.Key("a"), proto.Value{}), true, false}, } { { txn := newTxn(s.Clock, proto.Key("a")) txn.Writing = tc.writing tc.call.Args.Header().Txn = txn } err := sendCall(s.Sender, tc.call) if err == nil != tc.ok { t.Errorf("%d: %T (writing=%t): success_expected=%t, but got: %v", i, tc.call.Args, tc.writing, tc.ok, err) } if err != nil { continue } txn := tc.call.Reply.Header().Txn // The transaction should come back rw if it started rw or if we just // wrote. isWrite := proto.IsTransactionWrite(tc.call.Args) if (tc.writing || isWrite) != txn.Writing { t.Errorf("%d: unexpected writing state: %s", i, txn) } if !isWrite { continue } // Abort for clean shutdown. etReply := &proto.EndTransactionResponse{} if err := sendCall(s.Sender, proto.Call{ Args: &proto.EndTransactionRequest{ RequestHeader: proto.RequestHeader{ Key: txn.Key, Timestamp: txn.Timestamp, Txn: txn, }, Commit: false, }, Reply: etReply, }); err != nil { log.Warning(err) t.Fatal(err) } } }
func (txn *Txn) updateStateForRequest(r proto.Request) error { if !txn.haveTxnWrite { txn.haveTxnWrite = proto.IsTransactionWrite(r) } if _, ok := r.(*proto.EndTransactionRequest); ok { if txn.haveEndTxn { return errMultipleEndTxn } txn.haveEndTxn = true } return nil }
// TestTxnMultipleCoord checks that a coordinator uses the Writing flag to // enforce that only one coordinator can be used for transactional writes. func TestTxnMultipleCoord(t *testing.T) { defer leaktest.AfterTest(t) s := createTestDB(t) defer s.Stop() for i, tc := range []struct { args proto.Request writing bool ok bool }{ {proto.NewGet(proto.Key("a")), true, true}, {proto.NewGet(proto.Key("a")), false, true}, {proto.NewPut(proto.Key("a"), proto.Value{}), false, true}, {proto.NewPut(proto.Key("a"), proto.Value{}), true, false}, } { { txn := newTxn(s.Clock, proto.Key("a")) txn.Writing = tc.writing tc.args.Header().Txn = txn } reply, err := batchutil.SendWrapped(s.Sender, tc.args) if err == nil != tc.ok { t.Errorf("%d: %T (writing=%t): success_expected=%t, but got: %v", i, tc.args, tc.writing, tc.ok, err) } if err != nil { continue } txn := reply.Header().Txn // The transaction should come back rw if it started rw or if we just // wrote. isWrite := proto.IsTransactionWrite(tc.args) if (tc.writing || isWrite) != txn.Writing { t.Errorf("%d: unexpected writing state: %s", i, txn) } if !isWrite { continue } // Abort for clean shutdown. if _, err := batchutil.SendWrapped(s.Sender, &proto.EndTransactionRequest{ RequestHeader: proto.RequestHeader{ Timestamp: txn.Timestamp, Txn: txn, }, Commit: false, }); err != nil { t.Fatal(err) } } }
// send runs the specified calls synchronously in a single batch and // returns any errors. If the transaction is read-only or has already // been successfully committed or aborted, a potential trailing // EndTransaction call is silently dropped, allowing the caller to // always commit or clean-up explicitly even when that may not be // required (or even erroneous). func (txn *Txn) send(calls ...proto.Call) error { if txn.Proto.Status != proto.PENDING { return util.Errorf("attempting to use %s transaction", txn.Proto.Status) } lastIndex := len(calls) - 1 if lastIndex < 0 { return nil } lastReq := calls[lastIndex].Args // haveTxnWrite tracks intention to write. This is in contrast to // txn.Proto.Writing, which is set by the coordinator when the first // intent has been created, and which lives for the life of the // transaction. haveTxnWrite := proto.IsTransactionWrite(lastReq) for _, call := range calls[:lastIndex] { request := call.Args if req, ok := request.(*proto.EndTransactionRequest); ok { return util.Errorf("%s sent as non-terminal call", req.Method()) } if !haveTxnWrite { haveTxnWrite = proto.IsTransactionWrite(request) } } _, haveEndTxn := lastReq.(*proto.EndTransactionRequest) needEndTxn := txn.Proto.Writing || haveTxnWrite if haveEndTxn && !needEndTxn { calls = calls[:lastIndex] } return txn.db.send(calls...) }
// sendOne sends a single call via the wrapped sender. If the call is // part of a transaction, the TxnCoordSender adds the transaction to a // map of active transactions and begins heartbeating it. Every // subsequent call for the same transaction updates the lastUpdate // timestamp to prevent live transactions from being considered // abandoned and garbage collected. Read/write mutating requests have // their key or key range added to the transaction's interval tree of // key ranges for eventual cleanup via resolved write intents. // // On success, and if the call is part of a transaction, the affected // key range is recorded as live intents for eventual cleanup upon // transaction commit. Upon successful txn commit, initiates cleanup // of intents. func (tc *TxnCoordSender) sendOne(ctx context.Context, call proto.Call) { var startNS int64 header := call.Args.Header() trace := tracer.FromCtx(ctx) var id string // optional transaction ID if header.Txn != nil { // If this call is part of a transaction... id = string(header.Txn.ID) // Verify that if this Transaction is not read-only, we have it on // file. If not, refuse writes - the client must have issued a write on // another coordinator previously. if header.Txn.Writing && proto.IsTransactionWrite(call.Args) { tc.Lock() _, ok := tc.txns[id] tc.Unlock() if !ok { call.Reply.Header().SetGoError(util.Errorf( "transaction must not write on multiple coordinators")) return } } // Set the timestamp to the original timestamp for read-only // commands and to the transaction timestamp for read/write // commands. if proto.IsReadOnly(call.Args) { header.Timestamp = header.Txn.OrigTimestamp } else { header.Timestamp = header.Txn.Timestamp } if args, ok := call.Args.(*proto.EndTransactionRequest); ok { // Remember when EndTransaction started in case we want to // be linearizable. startNS = tc.clock.PhysicalNow() // EndTransaction must have its key set to that of the txn. header.Key = header.Txn.Key if len(args.Intents) > 0 { // TODO(tschottdorf): it may be useful to allow this later. // That would be part of a possible plan to allow txns which // write on multiple coordinators. call.Reply.Header().SetGoError(util.Errorf( "client must not pass intents to EndTransaction")) return } tc.Lock() txnMeta, metaOK := tc.txns[id] if id != "" && metaOK { args.Intents = txnMeta.intents() } tc.Unlock() if !metaOK { // If we don't have the transaction, then this must be a retry // by the client. We can no longer reconstruct a correct // request so we must fail. // // TODO(bdarnell): if we had a GetTransactionStatus API then // we could lookup the transaction and return either nil or // TransactionAbortedError instead of this ambivalent error. call.Reply.Header().SetGoError(util.Errorf( "transaction is already committed or aborted")) return } else if len(args.Intents) == 0 { // If there aren't any intents, then there's factually no // transaction to end. Read-only txns have all of their state in // the client. call.Reply.Header().SetGoError(util.Errorf( "cannot commit a read-only transaction")) return } } } // Send the command through wrapped sender. tc.wrapped.Send(ctx, call) // For transactional calls, need to track & update the transaction. if header.Txn != nil { respHeader := call.Reply.Header() if respHeader.Txn == nil { // When empty, simply use the request's transaction. // This is expected: the Range doesn't bother copying unless the // object changes. respHeader.Txn = gogoproto.Clone(header.Txn).(*proto.Transaction) } tc.updateResponseTxn(header, respHeader) } if txn := call.Reply.Header().Txn; txn != nil { if !header.Txn.Equal(txn) { panic("transaction ID changed") } tc.Lock() txnMeta := tc.txns[id] // If this transactional command leaves transactional intents, add the key // or key range to the intents map. If the transaction metadata doesn't yet // exist, create it. if call.Reply.Header().GoError() == nil { if proto.IsTransactionWrite(call.Args) { if txnMeta == nil { txn.Writing = true trace.Event("coordinator spawns") txnMeta = &txnMetadata{ txn: *txn, keys: cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}), firstUpdateNanos: tc.clock.PhysicalNow(), lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } tc.txns[id] = txnMeta if !tc.stopper.RunAsyncTask(func() { tc.heartbeatLoop(id) }) { // The system is already draining and we can't start the // heartbeat. We refuse new transactions for now because // they're likely not going to have all intents committed. // In principle, we can relax this as needed though. call.Reply.Header().SetGoError(&proto.NodeUnavailableError{}) tc.Unlock() tc.unregisterTxn(id) return } } txnMeta.addKeyRange(header.Key, header.EndKey) } // Update our record of this transaction. if txnMeta != nil { txnMeta.txn = *txn txnMeta.setLastUpdate(tc.clock.PhysicalNow()) } } tc.Unlock() } // Cleanup intents and transaction map if end of transaction. switch t := call.Reply.Header().GoError().(type) { case *proto.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider it dead. tc.cleanupTxn(trace, t.Txn) case *proto.TransactionAbortedError: // If already aborted, cleanup the txn on this TxnCoordSender. tc.cleanupTxn(trace, t.Txn) case *proto.OpRequiresTxnError: // Run a one-off transaction with that single command. if log.V(1) { log.Infof("%s: auto-wrapping in txn and re-executing", call.Method()) } // TODO(tschottdorf): this part is awkward. Consider resending here // without starting a new call, which is hard to trace. Plus, the // below depends on default configuration. tmpDB, err := client.Open( fmt.Sprintf("//%s?priority=%d", call.Args.Header().User, call.Args.Header().GetUserPriority()), client.SenderOpt(tc)) if err != nil { log.Warning(err) return } call.Reply.Reset() if err := tmpDB.Txn(func(txn *client.Txn) error { txn.SetDebugName("auto-wrap", 0) b := &client.Batch{} b.InternalAddCall(call) return txn.CommitInBatch(b) }); err != nil { log.Warning(err) } case nil: if txn := call.Reply.Header().Txn; txn != nil { if _, ok := call.Args.(*proto.EndTransactionRequest); ok { // If the --linearizable flag is set, we want to make sure that // all the clocks in the system are past the commit timestamp // of the transaction. This is guaranteed if either // - the commit timestamp is MaxOffset behind startNS // - MaxOffset ns were spent in this function // when returning to the client. Below we choose the option // that involves less waiting, which is likely the first one // unless a transaction commits with an odd timestamp. if tsNS := txn.Timestamp.WallTime; startNS > tsNS { startNS = tsNS } sleepNS := tc.clock.MaxOffset() - time.Duration(tc.clock.PhysicalNow()-startNS) if tc.linearizable && sleepNS > 0 { defer func() { if log.V(1) { log.Infof("%v: waiting %s on EndTransaction for linearizability", txn.Short(), util.TruncateDuration(sleepNS, time.Millisecond)) } time.Sleep(sleepNS) }() } if txn.Status != proto.PENDING { tc.cleanupTxn(trace, *txn) } } } } }
// sendOne sends a single call via the wrapped sender. If the call is // part of a transaction, the TxnCoordSender adds the transaction to a // map of active transactions and begins heartbeating it. Every // subsequent call for the same transaction updates the lastUpdate // timestamp to prevent live transactions from being considered // abandoned and garbage collected. Read/write mutating requests have // their key or key range added to the transaction's interval tree of // key ranges for eventual cleanup via resolved write intents. // // On success, and if the call is part of a transaction, the affected // key range is recorded as live intents for eventual cleanup upon // transaction commit. Upon successful txn commit, initiates cleanup // of intents. func (tc *TxnCoordSender) sendOne(call proto.Call) { var startNS int64 header := call.Args.Header() // If this call is part of a transaction... if header.Txn != nil { // Set the timestamp to the original timestamp for read-only // commands and to the transaction timestamp for read/write // commands. if proto.IsReadOnly(call.Args) { header.Timestamp = header.Txn.OrigTimestamp } else { header.Timestamp = header.Txn.Timestamp } // EndTransaction must have its key set to that of the txn. if _, ok := call.Args.(*proto.EndTransactionRequest); ok { header.Key = header.Txn.Key // Remember when EndTransaction started in case we want to // be linearizable. startNS = tc.clock.PhysicalNow() } } // Send the command through wrapped sender. tc.wrapped.Send(context.TODO(), call) if header.Txn != nil { // If not already set, copy the request txn. if call.Reply.Header().Txn == nil { call.Reply.Header().Txn = gogoproto.Clone(header.Txn).(*proto.Transaction) } tc.updateResponseTxn(header, call.Reply.Header()) } if txn := call.Reply.Header().Txn; txn != nil { tc.Lock() txnMeta := tc.txns[string(txn.ID)] // If this transactional command leaves transactional intents, add the key // or key range to the intents map. If the transaction metadata doesn't yet // exist, create it. if call.Reply.Header().GoError() == nil { if proto.IsTransactionWrite(call.Args) { if txnMeta == nil { txnMeta = &txnMetadata{ txn: *txn, keys: cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}), firstUpdateNanos: tc.clock.PhysicalNow(), lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } id := string(txn.ID) tc.txns[id] = txnMeta tc.heartbeat(id) } txnMeta.addKeyRange(header.Key, header.EndKey) } // Update our record of this transaction. if txnMeta != nil { txnMeta.txn = *txn txnMeta.setLastUpdate(tc.clock.PhysicalNow()) } } tc.Unlock() } // Cleanup intents and transaction map if end of transaction. switch t := call.Reply.Header().GoError().(type) { case *proto.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider it dead. tc.cleanupTxn(t.Txn, nil) case *proto.TransactionAbortedError: // If already aborted, cleanup the txn on this TxnCoordSender. tc.cleanupTxn(t.Txn, nil) case *proto.OpRequiresTxnError: // Run a one-off transaction with that single command. if log.V(1) { log.Infof("%s: auto-wrapping in txn and re-executing", call.Method()) } tmpDB, err := client.Open( fmt.Sprintf("//%s?priority=%d", call.Args.Header().User, call.Args.Header().GetUserPriority()), client.SenderOpt(tc)) if err != nil { log.Warning(err) return } call.Reply.Reset() if err := tmpDB.Txn(func(txn *client.Txn) error { txn.SetDebugName("auto-wrap") b := &client.Batch{} b.InternalAddCall(call) return txn.Commit(b) }); err != nil { log.Warning(err) } case nil: var resolved []proto.Key if txn := call.Reply.Header().Txn; txn != nil { if _, ok := call.Args.(*proto.EndTransactionRequest); ok { // If the --linearizable flag is set, we want to make sure that // all the clocks in the system are past the commit timestamp // of the transaction. This is guaranteed if either // - the commit timestamp is MaxOffset behind startNS // - MaxOffset ns were spent in this function // when returning to the client. Below we choose the option // that involves less waiting, which is likely the first one // unless a transaction commits with an odd timestamp. if tsNS := txn.Timestamp.WallTime; startNS > tsNS { startNS = tsNS } sleepNS := tc.clock.MaxOffset() - time.Duration(tc.clock.PhysicalNow()-startNS) if tc.linearizable && sleepNS > 0 { defer func() { if log.V(1) { log.Infof("%v: waiting %s on EndTransaction for linearizability", txn.Short(), util.TruncateDuration(sleepNS, time.Millisecond)) } time.Sleep(sleepNS) }() } resolved = call.Reply.(*proto.EndTransactionResponse).Resolved if txn.Status != proto.PENDING { tc.cleanupTxn(*txn, resolved) } } } } }