// AddCmd adds a command for execution on this range. The command's // affected keys are verified to be contained within the range and the // range's leadership is confirmed. The command is then dispatched // either along the read-only execution path or the read-write Raft // command queue. func (r *Range) AddCmd(ctx context.Context, call proto.Call) error { args := call.Args // TODO(tschottdorf) Some (internal) requests go here directly, so they // won't be traced. trace := tracer.FromCtx(ctx) // Differentiate between admin, read-only and read-write. var reply proto.Response var err error if proto.IsAdmin(args) { defer trace.Epoch("admin path")() reply, err = r.addAdminCmd(ctx, args) } else if proto.IsReadOnly(args) { defer trace.Epoch("read-only path")() reply, err = r.addReadOnlyCmd(ctx, args) } else if proto.IsWrite(args) { defer trace.Epoch("read-write path")() reply, err = r.addWriteCmd(ctx, args, nil) } else { panic(fmt.Sprintf("don't know how to handle command %T", args)) } if reply != nil { gogoproto.Merge(call.Reply, reply) } if err != nil { replyHeader := call.Reply.Header() if replyHeader.Error != nil { panic("the world is on fire") } replyHeader.SetGoError(err) } return err }
// sendAttempt gathers and rearranges the replicas, and makes an RPC call. func (ds *DistSender) sendAttempt(trace *tracer.Trace, ba proto.BatchRequest, desc *proto.RangeDescriptor) (*proto.BatchResponse, error) { defer trace.Epoch("sending RPC")() leader := ds.leaderCache.Lookup(proto.RangeID(desc.RangeID)) // Try to send the call. replicas := newReplicaSlice(ds.gossip, desc) // Rearrange the replicas so that those replicas with long common // prefix of attributes end up first. If there's no prefix, this is a // no-op. order := ds.optimizeReplicaOrder(replicas) // If this request needs to go to a leader and we know who that is, move // it to the front. if !(proto.IsReadOnly(&ba) && ba.ReadConsistency == proto.INCONSISTENT) && leader.StoreID > 0 { if i := replicas.FindReplica(leader.StoreID); i >= 0 { replicas.MoveToFront(i) order = rpc.OrderStable } } // TODO(tschottdorf) &ba -> ba resp, err := ds.sendRPC(trace, desc.RangeID, replicas, order, &ba) if err != nil { return nil, err } // Untangle the error from the received response. br := resp.(*proto.BatchResponse) err = br.GoError() br.Error = nil return br, err }
// AddCmd adds a command for execution on this range. The command's // affected keys are verified to be contained within the range and the // range's leadership is confirmed. The command is then dispatched // either along the read-only execution path or the read-write Raft // command queue. func (r *Range) AddCmd(ctx context.Context, call proto.Call) error { args, reply := call.Args, call.Reply // Differentiate between admin, read-only and read-write. if proto.IsAdmin(args) { return r.addAdminCmd(ctx, args, reply) } else if proto.IsReadOnly(args) { return r.addReadOnlyCmd(ctx, args, reply) } return r.addWriteCmd(ctx, args, reply, nil) }
// AddCmd adds a command for execution on this range. The command's // affected keys are verified to be contained within the range and the // range's leadership is confirmed. The command is then dispatched // either along the read-only execution path or the read-write Raft // command queue. func (r *Range) AddCmd(ctx context.Context, call proto.Call) error { args, reply := call.Args, call.Reply // TODO(tschottdorf) Some (internal) requests go here directly, so they // won't be traced. trace := tracer.FromCtx(ctx) // Differentiate between admin, read-only and read-write. if proto.IsAdmin(args) { defer trace.Epoch("admin path")() return r.addAdminCmd(ctx, args, reply) } else if proto.IsReadOnly(args) { defer trace.Epoch("read path")() return r.addReadOnlyCmd(ctx, args, reply) } return r.addWriteCmd(ctx, args, reply, nil) }
// AddCmd adds a command for execution on this range. The command's // affected keys are verified to be contained within the range and the // range's leadership is confirmed. The command is then dispatched // either along the read-only execution path or the read-write Raft // command queue. If wait is false, read-write commands are added to // Raft without waiting for their completion. func (r *Range) AddCmd(ctx context.Context, call proto.Call, wait bool) error { args, reply := call.Args, call.Reply header := args.Header() if !r.ContainsKeyRange(header.Key, header.EndKey) { err := proto.NewRangeKeyMismatchError(header.Key, header.EndKey, r.Desc()) reply.Header().SetGoError(err) return err } // Differentiate between admin, read-only and read-write. if proto.IsAdmin(args) { return r.addAdminCmd(ctx, args, reply) } else if proto.IsReadOnly(args) { return r.addReadOnlyCmd(ctx, args, reply) } return r.addWriteCmd(ctx, args, reply, wait) }
// AddCmd adds a command for execution on this range. The command's // affected keys are verified to be contained within the range and the // range's leadership is confirmed. The command is then dispatched // either along the read-only execution path or the read-write Raft // command queue. func (r *Replica) AddCmd(ctx context.Context, args proto.Request) (proto.Response, error) { // TODO(tschottdorf) Some (internal) requests go here directly, so they // won't be traced. trace := tracer.FromCtx(ctx) // Differentiate between admin, read-only and read-write. var reply proto.Response var err error if proto.IsAdmin(args) { defer trace.Epoch("admin path")() reply, err = r.addAdminCmd(ctx, args) } else if proto.IsReadOnly(args) { defer trace.Epoch("read-only path")() reply, err = r.addReadOnlyCmd(ctx, args) } else if proto.IsWrite(args) { defer trace.Epoch("read-write path")() reply, err = r.addWriteCmd(ctx, args, nil) } else { panic(fmt.Sprintf("don't know how to handle command %T", args)) } return reply, err }
// sendOne sends a single call via the wrapped sender. If the call is // part of a transaction, the TxnCoordSender adds the transaction to a // map of active transactions and begins heartbeating it. Every // subsequent call for the same transaction updates the lastUpdate // timestamp to prevent live transactions from being considered // abandoned and garbage collected. Read/write mutating requests have // their key or key range added to the transaction's interval tree of // key ranges for eventual cleanup via resolved write intents. // // On success, and if the call is part of a transaction, the affected // key range is recorded as live intents for eventual cleanup upon // transaction commit. Upon successful txn commit, initiates cleanup // of intents. func (tc *TxnCoordSender) sendOne(ctx context.Context, call proto.Call) { var startNS int64 header := call.Args.Header() trace := tracer.FromCtx(ctx) var id string // optional transaction ID if header.Txn != nil { // If this call is part of a transaction... id = string(header.Txn.ID) // Verify that if this Transaction is not read-only, we have it on // file. If not, refuse writes - the client must have issued a write on // another coordinator previously. if header.Txn.Writing && proto.IsTransactionWrite(call.Args) { tc.Lock() _, ok := tc.txns[id] tc.Unlock() if !ok { call.Reply.Header().SetGoError(util.Errorf( "transaction must not write on multiple coordinators")) return } } // Set the timestamp to the original timestamp for read-only // commands and to the transaction timestamp for read/write // commands. if proto.IsReadOnly(call.Args) { header.Timestamp = header.Txn.OrigTimestamp } else { header.Timestamp = header.Txn.Timestamp } if args, ok := call.Args.(*proto.EndTransactionRequest); ok { // Remember when EndTransaction started in case we want to // be linearizable. startNS = tc.clock.PhysicalNow() // EndTransaction must have its key set to that of the txn. header.Key = header.Txn.Key if len(args.Intents) > 0 { // TODO(tschottdorf): it may be useful to allow this later. // That would be part of a possible plan to allow txns which // write on multiple coordinators. call.Reply.Header().SetGoError(util.Errorf( "client must not pass intents to EndTransaction")) return } tc.Lock() txnMeta, metaOK := tc.txns[id] if id != "" && metaOK { args.Intents = txnMeta.intents() } tc.Unlock() if !metaOK { // If we don't have the transaction, then this must be a retry // by the client. We can no longer reconstruct a correct // request so we must fail. // // TODO(bdarnell): if we had a GetTransactionStatus API then // we could lookup the transaction and return either nil or // TransactionAbortedError instead of this ambivalent error. call.Reply.Header().SetGoError(util.Errorf( "transaction is already committed or aborted")) return } else if len(args.Intents) == 0 { // If there aren't any intents, then there's factually no // transaction to end. Read-only txns have all of their state in // the client. call.Reply.Header().SetGoError(util.Errorf( "cannot commit a read-only transaction")) return } } } // Send the command through wrapped sender. tc.wrapped.Send(ctx, call) // For transactional calls, need to track & update the transaction. if header.Txn != nil { respHeader := call.Reply.Header() if respHeader.Txn == nil { // When empty, simply use the request's transaction. // This is expected: the Range doesn't bother copying unless the // object changes. respHeader.Txn = gogoproto.Clone(header.Txn).(*proto.Transaction) } tc.updateResponseTxn(header, respHeader) } if txn := call.Reply.Header().Txn; txn != nil { if !header.Txn.Equal(txn) { panic("transaction ID changed") } tc.Lock() txnMeta := tc.txns[id] // If this transactional command leaves transactional intents, add the key // or key range to the intents map. If the transaction metadata doesn't yet // exist, create it. if call.Reply.Header().GoError() == nil { if proto.IsTransactionWrite(call.Args) { if txnMeta == nil { txn.Writing = true trace.Event("coordinator spawns") txnMeta = &txnMetadata{ txn: *txn, keys: cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}), firstUpdateNanos: tc.clock.PhysicalNow(), lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } tc.txns[id] = txnMeta if !tc.stopper.RunAsyncTask(func() { tc.heartbeatLoop(id) }) { // The system is already draining and we can't start the // heartbeat. We refuse new transactions for now because // they're likely not going to have all intents committed. // In principle, we can relax this as needed though. call.Reply.Header().SetGoError(&proto.NodeUnavailableError{}) tc.Unlock() tc.unregisterTxn(id) return } } txnMeta.addKeyRange(header.Key, header.EndKey) } // Update our record of this transaction. if txnMeta != nil { txnMeta.txn = *txn txnMeta.setLastUpdate(tc.clock.PhysicalNow()) } } tc.Unlock() } // Cleanup intents and transaction map if end of transaction. switch t := call.Reply.Header().GoError().(type) { case *proto.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider it dead. tc.cleanupTxn(trace, t.Txn) case *proto.TransactionAbortedError: // If already aborted, cleanup the txn on this TxnCoordSender. tc.cleanupTxn(trace, t.Txn) case *proto.OpRequiresTxnError: // Run a one-off transaction with that single command. if log.V(1) { log.Infof("%s: auto-wrapping in txn and re-executing", call.Method()) } // TODO(tschottdorf): this part is awkward. Consider resending here // without starting a new call, which is hard to trace. Plus, the // below depends on default configuration. tmpDB, err := client.Open( fmt.Sprintf("//%s?priority=%d", call.Args.Header().User, call.Args.Header().GetUserPriority()), client.SenderOpt(tc)) if err != nil { log.Warning(err) return } call.Reply.Reset() if err := tmpDB.Txn(func(txn *client.Txn) error { txn.SetDebugName("auto-wrap", 0) b := &client.Batch{} b.InternalAddCall(call) return txn.CommitInBatch(b) }); err != nil { log.Warning(err) } case nil: if txn := call.Reply.Header().Txn; txn != nil { if _, ok := call.Args.(*proto.EndTransactionRequest); ok { // If the --linearizable flag is set, we want to make sure that // all the clocks in the system are past the commit timestamp // of the transaction. This is guaranteed if either // - the commit timestamp is MaxOffset behind startNS // - MaxOffset ns were spent in this function // when returning to the client. Below we choose the option // that involves less waiting, which is likely the first one // unless a transaction commits with an odd timestamp. if tsNS := txn.Timestamp.WallTime; startNS > tsNS { startNS = tsNS } sleepNS := tc.clock.MaxOffset() - time.Duration(tc.clock.PhysicalNow()-startNS) if tc.linearizable && sleepNS > 0 { defer func() { if log.V(1) { log.Infof("%v: waiting %s on EndTransaction for linearizability", txn.Short(), util.TruncateDuration(sleepNS, time.Millisecond)) } time.Sleep(sleepNS) }() } if txn.Status != proto.PENDING { tc.cleanupTxn(trace, *txn) } } } } }
// sendOne sends a single call via the wrapped sender. If the call is // part of a transaction, the TxnCoordSender adds the transaction to a // map of active transactions and begins heartbeating it. Every // subsequent call for the same transaction updates the lastUpdate // timestamp to prevent live transactions from being considered // abandoned and garbage collected. Read/write mutating requests have // their key or key range added to the transaction's interval tree of // key ranges for eventual cleanup via resolved write intents. // // On success, and if the call is part of a transaction, the affected // key range is recorded as live intents for eventual cleanup upon // transaction commit. Upon successful txn commit, initiates cleanup // of intents. func (tc *TxnCoordSender) sendOne(call proto.Call) { var startNS int64 header := call.Args.Header() // If this call is part of a transaction... if header.Txn != nil { // Set the timestamp to the original timestamp for read-only // commands and to the transaction timestamp for read/write // commands. if proto.IsReadOnly(call.Args) { header.Timestamp = header.Txn.OrigTimestamp } else { header.Timestamp = header.Txn.Timestamp } // EndTransaction must have its key set to that of the txn. if _, ok := call.Args.(*proto.EndTransactionRequest); ok { header.Key = header.Txn.Key // Remember when EndTransaction started in case we want to // be linearizable. startNS = tc.clock.PhysicalNow() } } // Send the command through wrapped sender. tc.wrapped.Send(context.TODO(), call) if header.Txn != nil { // If not already set, copy the request txn. if call.Reply.Header().Txn == nil { call.Reply.Header().Txn = gogoproto.Clone(header.Txn).(*proto.Transaction) } tc.updateResponseTxn(header, call.Reply.Header()) } if txn := call.Reply.Header().Txn; txn != nil { tc.Lock() txnMeta := tc.txns[string(txn.ID)] // If this transactional command leaves transactional intents, add the key // or key range to the intents map. If the transaction metadata doesn't yet // exist, create it. if call.Reply.Header().GoError() == nil { if proto.IsTransactionWrite(call.Args) { if txnMeta == nil { txnMeta = &txnMetadata{ txn: *txn, keys: cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}), firstUpdateNanos: tc.clock.PhysicalNow(), lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } id := string(txn.ID) tc.txns[id] = txnMeta tc.heartbeat(id) } txnMeta.addKeyRange(header.Key, header.EndKey) } // Update our record of this transaction. if txnMeta != nil { txnMeta.txn = *txn txnMeta.setLastUpdate(tc.clock.PhysicalNow()) } } tc.Unlock() } // Cleanup intents and transaction map if end of transaction. switch t := call.Reply.Header().GoError().(type) { case *proto.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider it dead. tc.cleanupTxn(t.Txn, nil) case *proto.TransactionAbortedError: // If already aborted, cleanup the txn on this TxnCoordSender. tc.cleanupTxn(t.Txn, nil) case *proto.OpRequiresTxnError: // Run a one-off transaction with that single command. if log.V(1) { log.Infof("%s: auto-wrapping in txn and re-executing", call.Method()) } tmpDB, err := client.Open( fmt.Sprintf("//%s?priority=%d", call.Args.Header().User, call.Args.Header().GetUserPriority()), client.SenderOpt(tc)) if err != nil { log.Warning(err) return } call.Reply.Reset() if err := tmpDB.Txn(func(txn *client.Txn) error { txn.SetDebugName("auto-wrap") b := &client.Batch{} b.InternalAddCall(call) return txn.Commit(b) }); err != nil { log.Warning(err) } case nil: var resolved []proto.Key if txn := call.Reply.Header().Txn; txn != nil { if _, ok := call.Args.(*proto.EndTransactionRequest); ok { // If the --linearizable flag is set, we want to make sure that // all the clocks in the system are past the commit timestamp // of the transaction. This is guaranteed if either // - the commit timestamp is MaxOffset behind startNS // - MaxOffset ns were spent in this function // when returning to the client. Below we choose the option // that involves less waiting, which is likely the first one // unless a transaction commits with an odd timestamp. if tsNS := txn.Timestamp.WallTime; startNS > tsNS { startNS = tsNS } sleepNS := tc.clock.MaxOffset() - time.Duration(tc.clock.PhysicalNow()-startNS) if tc.linearizable && sleepNS > 0 { defer func() { if log.V(1) { log.Infof("%v: waiting %s on EndTransaction for linearizability", txn.Short(), util.TruncateDuration(sleepNS, time.Millisecond)) } time.Sleep(sleepNS) }() } resolved = call.Reply.(*proto.EndTransactionResponse).Resolved if txn.Status != proto.PENDING { tc.cleanupTxn(*txn, resolved) } } } } }
// TestVerifyPermissions verifies permissions are checked for single // zones and across multiple zones. It also verifies that permissions // are checked hierarchically. func TestVerifyPermissions(t *testing.T) { n := simulation.NewNetwork(1, "unix", gossip.TestInterval, gossip.TestBootstrap) ds := NewDistSender(n.Nodes[0].Gossip) config1 := &proto.PermConfig{ Read: []string{"read1", "readAll", "rw1", "rwAll"}, Write: []string{"write1", "writeAll", "rw1", "rwAll"}} config2 := &proto.PermConfig{ Read: []string{"read2", "readAll", "rw2", "rwAll"}, Write: []string{"write2", "writeAll", "rw2", "rwAll"}} configs := []*storage.PrefixConfig{ {engine.KeyMin, nil, config1}, {proto.Key("a"), nil, config2}, } configMap, err := storage.NewPrefixConfigMap(configs) if err != nil { t.Fatalf("failed to make prefix config map, err: %s", err.Error()) } ds.gossip.AddInfo(gossip.KeyConfigPermission, configMap, time.Hour) readOnlyMethods := make([]string, 0, len(proto.ReadMethods)) writeOnlyMethods := make([]string, 0, len(proto.WriteMethods)) readWriteMethods := make([]string, 0, len(proto.ReadMethods)+len(proto.WriteMethods)) for readM := range proto.ReadMethods { if proto.IsReadOnly(readM) { readOnlyMethods = append(readOnlyMethods, readM) } else { readWriteMethods = append(readWriteMethods, readM) } } for writeM := range proto.WriteMethods { if !proto.NeedReadPerm(writeM) { writeOnlyMethods = append(writeOnlyMethods, writeM) } } testData := []struct { // Permission-based db methods from the storage package. methods []string user string startKey, endKey proto.Key hasPermission bool }{ // Test permissions within a single range {readOnlyMethods, "read1", engine.KeyMin, engine.KeyMin, true}, {readOnlyMethods, "rw1", engine.KeyMin, engine.KeyMin, true}, {readOnlyMethods, "write1", engine.KeyMin, engine.KeyMin, false}, {readOnlyMethods, "random", engine.KeyMin, engine.KeyMin, false}, {readWriteMethods, "rw1", engine.KeyMin, engine.KeyMin, true}, {readWriteMethods, "read1", engine.KeyMin, engine.KeyMin, false}, {readWriteMethods, "write1", engine.KeyMin, engine.KeyMin, false}, {writeOnlyMethods, "write1", engine.KeyMin, engine.KeyMin, true}, {writeOnlyMethods, "rw1", engine.KeyMin, engine.KeyMin, true}, {writeOnlyMethods, "read1", engine.KeyMin, engine.KeyMin, false}, {writeOnlyMethods, "random", engine.KeyMin, engine.KeyMin, false}, // Test permissions hierarchically. {readOnlyMethods, "read1", proto.Key("a"), proto.Key("a1"), true}, {readWriteMethods, "rw1", proto.Key("a"), proto.Key("a1"), true}, {writeOnlyMethods, "write1", proto.Key("a"), proto.Key("a1"), true}, // Test permissions across both ranges. {readOnlyMethods, "readAll", engine.KeyMin, proto.Key("b"), true}, {readOnlyMethods, "read1", engine.KeyMin, proto.Key("b"), true}, {readOnlyMethods, "read2", engine.KeyMin, proto.Key("b"), false}, {readOnlyMethods, "random", engine.KeyMin, proto.Key("b"), false}, {readWriteMethods, "rwAll", engine.KeyMin, proto.Key("b"), true}, {readWriteMethods, "rw1", engine.KeyMin, proto.Key("b"), true}, {readWriteMethods, "random", engine.KeyMin, proto.Key("b"), false}, {writeOnlyMethods, "writeAll", engine.KeyMin, proto.Key("b"), true}, {writeOnlyMethods, "write1", engine.KeyMin, proto.Key("b"), true}, {writeOnlyMethods, "write2", engine.KeyMin, proto.Key("b"), false}, {writeOnlyMethods, "random", engine.KeyMin, proto.Key("b"), false}, // Test permissions within and around the boundaries of a range, // representatively using rw methods. {readWriteMethods, "rw2", proto.Key("a"), proto.Key("b"), true}, {readWriteMethods, "rwAll", proto.Key("a"), proto.Key("b"), true}, {readWriteMethods, "rw2", proto.Key("a"), proto.Key("a"), true}, {readWriteMethods, "rw2", proto.Key("a"), proto.Key("a1"), true}, {readWriteMethods, "rw2", proto.Key("a"), proto.Key("b1"), false}, {readWriteMethods, "rw2", proto.Key("a3"), proto.Key("a4"), true}, {readWriteMethods, "rw2", proto.Key("a3"), proto.Key("b1"), false}, } for i, test := range testData { for _, method := range test.methods { err := ds.verifyPermissions( method, &proto.RequestHeader{ User: test.user, Key: test.startKey, EndKey: test.endKey}) if err != nil && test.hasPermission { t.Errorf("test %d: user %s should have had permission to %s, err: %s", i, test.user, method, err.Error()) break } else if err == nil && !test.hasPermission { t.Errorf("test %d: user %s should not have had permission to %s", i, test.user, method) break } } } n.Stop() }
// sendOne sends a single call via the wrapped sender. If the call is // part of a transaction, the TxnCoordSender adds the transaction to a // map of active transactions and begins heartbeating it. Every // subsequent call for the same transaction updates the lastUpdateTS // to prevent live transactions from being considered abandoned and // garbage collected. Read/write mutating requests have their key or // key range added to the transaction's interval tree of key ranges // for eventual cleanup via resolved write intents. // // On success, and if the call is part of a transaction, the affected // key range is recorded as live intents for eventual cleanup upon // transaction commit. Upon successful txn commit, initiates cleanup // of intents. func (tc *TxnCoordSender) sendOne(call *client.Call) { var startNS int64 header := call.Args.Header() // If this call is part of a transaction... if header.Txn != nil { // Set the timestamp to the original timestamp for read-only // commands and to the transaction timestamp for read/write // commands. if proto.IsReadOnly(call.Method) { header.Timestamp = header.Txn.OrigTimestamp } else { header.Timestamp = header.Txn.Timestamp } // End transaction must have its key set to the txn ID. if call.Method == proto.EndTransaction { header.Key = header.Txn.Key // Remember when EndTransaction started in case we want to // be linearizable. startNS = tc.clock.PhysicalNow() } } // Send the command through wrapped sender. tc.wrapped.Send(call) if header.Txn != nil { // If not already set, copy the request txn. if call.Reply.Header().Txn == nil { call.Reply.Header().Txn = gogoproto.Clone(header.Txn).(*proto.Transaction) } tc.updateResponseTxn(header, call.Reply.Header()) } // If successful, we're in a transaction, and the command leaves // transactional intents, add the key or key range to the intents map. // If the transaction metadata doesn't yet exist, create it. if call.Reply.Header().GoError() == nil && header.Txn != nil && proto.IsTransactional(call.Method) { tc.Lock() var ok bool var txnMeta *txnMetadata if txnMeta, ok = tc.txns[string(header.Txn.ID)]; !ok { txnMeta = &txnMetadata{ txn: *header.Txn, keys: util.NewIntervalCache(util.CacheConfig{Policy: util.CacheNone}), lastUpdateTS: tc.clock.Now(), timeoutDuration: tc.clientTimeout, closer: make(chan struct{}), } tc.txns[string(header.Txn.ID)] = txnMeta // TODO(jiajia): Reevaluate this logic of creating a goroutine // for each active transaction. Spencer suggests a heap // containing next heartbeat timeouts which is processed by a // single goroutine. go tc.heartbeat(header.Txn, txnMeta.closer) } txnMeta.lastUpdateTS = tc.clock.Now() txnMeta.addKeyRange(header.Key, header.EndKey) tc.Unlock() } // Cleanup intents and transaction map if end of transaction. switch t := call.Reply.Header().GoError().(type) { case *proto.TransactionAbortedError: // If already aborted, cleanup the txn on this TxnCoordSender. tc.cleanupTxn(&t.Txn) case *proto.OpRequiresTxnError: // Run a one-off transaction with that single command. log.Infof("%s: auto-wrapping in txn and re-executing", call.Method) txnOpts := &client.TransactionOptions{ Name: "auto-wrap", } // Must not call Close() on this KV - that would call // tc.Close(). tmpKV := client.NewKV(tc, nil) tmpKV.User = call.Args.Header().User tmpKV.UserPriority = call.Args.Header().GetUserPriority() call.Reply.Reset() tmpKV.RunTransaction(txnOpts, func(txn *client.KV) error { return txn.Call(call.Method, call.Args, call.Reply) }) case nil: var txn *proto.Transaction if call.Method == proto.EndTransaction { txn = call.Reply.Header().Txn // If the -linearizable flag is set, we want to make sure that // all the clocks in the system are past the commit timestamp // of the transaction. This is guaranteed if either // - the commit timestamp is MaxOffset behind startNS // - MaxOffset ns were spent in this function // when returning to the client. Below we choose the option // that involves less waiting, which is likely the first one // unless a transaction commits with an odd timestamp. if tsNS := txn.Timestamp.WallTime; startNS > tsNS { startNS = tsNS } sleepNS := tc.clock.MaxOffset() - time.Duration(tc.clock.PhysicalNow()-startNS) if tc.linearizable && sleepNS > 0 { defer func() { log.V(1).Infof("%v: waiting %dms on EndTransaction for linearizability", txn.ID, sleepNS/1000000) time.Sleep(sleepNS) }() } } if txn != nil && txn.Status != proto.PENDING { tc.cleanupTxn(txn) } } }