// Send implements the client.Sender interface. func (rls *retryableLocalSender) Send(_ context.Context, call proto.Call) { // Instant retry to handle the case of a range split, which is // exposed here as a RangeKeyMismatchError. retryOpts := retry.Options{ Tag: fmt.Sprintf("routing %s locally", call.Method()), } // In local tests, the RPCs are not actually sent over the wire. We // need to clone the Txn in order to avoid unexpected sharing // between TxnCoordSender and client.Txn. if header := call.Args.Header(); header.Txn != nil { header.Txn = gogoproto.Clone(header.Txn).(*proto.Transaction) } err := retry.WithBackoff(retryOpts, func() (retry.Status, error) { call.Reply.Header().Error = nil rls.LocalSender.Send(context.TODO(), call) // Check for range key mismatch error (this could happen if // range was split between lookup and execution). In this case, // reset header.Replica and engage retry loop. if err := call.Reply.Header().GoError(); err != nil { if _, ok := err.(*proto.RangeKeyMismatchError); ok { // Clear request replica. call.Args.Header().Replica = proto.Replica{} return retry.Continue, err } } return retry.Break, nil }) if err != nil { panic(fmt.Sprintf("local sender did not succeed: %s", err)) } }
func (db *testSender) sendOne(call proto.Call) { switch call.Args.(type) { case *proto.EndTransactionRequest: safeSetGoError(call.Reply, util.Errorf("%s method not supported", call.Method())) return } // Lookup range and direct request. header := call.Args.Header() if rng := db.store.LookupRange(header.Key, header.EndKey); rng != nil { header.RangeID = rng.Desc().RangeID replica := rng.GetReplica() if replica == nil { safeSetGoError(call.Reply, util.Errorf("own replica missing in range")) } header.Replica = *replica reply, err := db.store.ExecuteCmd(context.Background(), call.Args) if reply != nil { gogoproto.Merge(call.Reply, reply) } if call.Reply.Header().Error != nil { panic(proto.ErrorUnexpectedlySet) } if err != nil { call.Reply.Header().SetGoError(err) } } else { safeSetGoError(call.Reply, proto.NewRangeKeyMismatchError(header.Key, header.EndKey, nil)) } }
// Send implements the client.Sender interface. If the call is part // of a transaction, the coordinator will initialize the transaction // if it's not nil but has an empty ID. func (tc *TxnCoordSender) Send(ctx context.Context, call proto.Call) { header := call.Args.Header() tc.maybeBeginTxn(header) header.CmdID = header.GetOrCreateCmdID(tc.clock.PhysicalNow()) // This is the earliest point at which the request has a ClientCmdID and/or // TxnID (if applicable). Begin a Trace which follows this request. trace := tc.tracer.NewTrace(call.Args.Header()) defer trace.Finalize() defer trace.Epoch(fmt.Sprintf("sending %s", call.Method()))() defer func() { if err := call.Reply.Header().GoError(); err != nil { trace.Event(fmt.Sprintf("reply error: %T", err)) } }() ctx = tracer.ToCtx(ctx, trace) // Process batch specially; otherwise, send via wrapped sender. switch args := call.Args.(type) { case *proto.BatchRequest: trace.Event("batch processing") tc.sendBatch(ctx, args, call.Reply.(*proto.BatchResponse)) default: // TODO(tschottdorf): should treat all calls as Batch. After all, that // will be almost all calls. tc.sendOne(ctx, call) } }
// sendBatch unrolls a batched command and sends each constituent // command in parallel. // TODO(tschottdorf): modify sendBatch so that it sends truly parallel requests // when outside of a Transaction. This can then be used to address the TODO in // (*TxnCoordSender).resolve(). func (tc *TxnCoordSender) sendBatch(ctx context.Context, batchArgs *proto.BatchRequest, batchReply *proto.BatchResponse) { // Prepare the calls by unrolling the batch. If the batchReply is // pre-initialized with replies, use those; otherwise create replies // as needed. // TODO(spencer): send calls in parallel. batchReply.Txn = batchArgs.Txn for i := range batchArgs.Requests { args := batchArgs.Requests[i].GetValue().(proto.Request) if err := updateForBatch(args, batchArgs.RequestHeader); err != nil { batchReply.Header().SetGoError(err) return } call := proto.Call{Args: args} // Create a reply from the method type and add to batch response. if i >= len(batchReply.Responses) { call.Reply = args.CreateReply() batchReply.Add(call.Reply) } else { call.Reply = batchReply.Responses[i].GetValue().(proto.Response) } tc.sendOne(ctx, call) // Amalgamate transaction updates and propagate first error, if applicable. if batchReply.Txn != nil { batchReply.Txn.Update(call.Reply.Header().Txn) } if call.Reply.Header().Error != nil { batchReply.Error = call.Reply.Header().Error return } } }
// post posts the call using the HTTP client. The call's method is // appended to KVDBEndpoint and set as the URL path. The call's arguments // are protobuf-serialized and written as the POST body. The content // type is set to application/x-protobuf. // // On success, the response body is unmarshalled into call.Reply. func (s *httpSender) post(call proto.Call) error { retryOpts := s.retryOpts retryOpts.Tag = fmt.Sprintf("%s %s", s.context.RequestScheme(), call.Method()) // Marshal the args into a request body. body, err := gogoproto.Marshal(call.Args) if err != nil { return err } url := s.context.RequestScheme() + "://" + s.server + KVDBEndpoint + call.Method().String() return retry.WithBackoff(retryOpts, func() (retry.Status, error) { req, err := http.NewRequest("POST", url, bytes.NewReader(body)) if err != nil { return retry.Break, err } req.Header.Add(util.ContentTypeHeader, util.ProtoContentType) req.Header.Add(util.AcceptHeader, util.ProtoContentType) req.Header.Add(util.AcceptEncodingHeader, util.SnappyEncoding) resp, err := s.client.Do(req) if err != nil { return retry.Continue, err } defer resp.Body.Close() switch resp.StatusCode { case http.StatusOK: // We're cool. case http.StatusServiceUnavailable, http.StatusGatewayTimeout, StatusTooManyRequests: // Retry on service unavailable and request timeout. // TODO(spencer): consider respecting the Retry-After header for // backoff / retry duration. return retry.Continue, errors.New(resp.Status) default: // Can't recover from all other errors. return retry.Break, errors.New(resp.Status) } if resp.Header.Get(util.ContentEncodingHeader) == util.SnappyEncoding { resp.Body = &snappyReader{body: resp.Body} } b, err := ioutil.ReadAll(resp.Body) if err != nil { return retry.Continue, err } if err := gogoproto.Unmarshal(b, call.Reply); err != nil { return retry.Continue, err } return retry.Break, nil }) }
// Send implements the client.Sender interface. The store is looked // up from the store map if specified by header.Replica; otherwise, // the command is being executed locally, and the replica is // determined via lookup through each store's LookupRange method. func (ls *LocalSender) Send(ctx context.Context, call proto.Call) { var err error var store *storage.Store trace := tracer.FromCtx(ctx) // If we aren't given a Replica, then a little bending over // backwards here. This case applies exclusively to unittests. header := call.Args.Header() if header.RaftID == 0 || header.Replica.StoreID == 0 { var repl *proto.Replica var raftID proto.RaftID raftID, repl, err = ls.lookupReplica(header.Key, header.EndKey) if err == nil { header.RaftID = raftID header.Replica = *repl } } ctx = log.Add(ctx, log.Method, call.Method(), log.Key, header.Key, log.RaftID, header.RaftID) if err == nil { store, err = ls.GetStore(header.Replica.StoreID) } var reply proto.Response if err == nil { // For calls that read data within a txn, we can avoid uncertainty // related retries in certain situations. If the node is in // "CertainNodes", we need not worry about uncertain reads any // more. Setting MaxTimestamp=Timestamp for the operation // accomplishes that. See proto.Transaction.CertainNodes for details. if header.Txn != nil && header.Txn.CertainNodes.Contains(header.Replica.NodeID) { // MaxTimestamp = Timestamp corresponds to no clock uncertainty. trace.Event("read has no clock uncertainty") header.Txn.MaxTimestamp = header.Txn.Timestamp } reply, err = store.ExecuteCmd(ctx, call.Args) } if reply != nil { gogoproto.Merge(call.Reply, reply) } if call.Reply.Header().Error != nil { panic(proto.ErrorUnexpectedlySet) } if err != nil { call.Reply.Header().SetGoError(err) } }
// sendBatch unrolls a batched command and sends each constituent // command in parallel. func (tc *TxnCoordSender) sendBatch(batchArgs *proto.InternalBatchRequest, batchReply *proto.InternalBatchResponse) { // Prepare the calls by unrolling the batch. If the batchReply is // pre-initialized with replies, use those; otherwise create replies // as needed. // TODO(spencer): send calls in parallel. batchReply.Txn = batchArgs.Txn for i := range batchArgs.Requests { args := batchArgs.Requests[i].GetValue().(proto.Request) call := proto.Call{Args: args} // Disallow transaction, user and priority on individual calls, unless // equal. if args.Header().User != "" && args.Header().User != batchArgs.User { batchReply.Header().SetGoError(util.Error("cannot have individual user on call in batch")) return } args.Header().User = batchArgs.User if args.Header().UserPriority != nil && args.Header().GetUserPriority() != batchArgs.GetUserPriority() { batchReply.Header().SetGoError(util.Error("cannot have individual user priority on call in batch")) return } args.Header().UserPriority = batchArgs.UserPriority if txn := args.Header().Txn; txn != nil && !txn.Equal(batchArgs.Txn) { batchReply.Header().SetGoError(util.Error("cannot have individual transactional call in batch")) return } // Propagate batch Txn to each call. args.Header().Txn = batchArgs.Txn // Create a reply from the method type and add to batch response. if i >= len(batchReply.Responses) { call.Reply = args.CreateReply() batchReply.Add(call.Reply) } else { call.Reply = batchReply.Responses[i].GetValue().(proto.Response) } tc.sendOne(call) // Amalgamate transaction updates and propagate first error, if applicable. if batchReply.Txn != nil { batchReply.Txn.Update(call.Reply.Header().Txn) } if call.Reply.Header().Error != nil { batchReply.Error = call.Reply.Header().Error return } } }
// Send implements the client.Sender interface. If the call is part // of a transaction, the coordinator will initialize the transaction // if it's not nil but has an empty ID. func (tc *TxnCoordSender) Send(ctx context.Context, call proto.Call) { header := call.Args.Header() tc.maybeBeginTxn(header) header.CmdID = header.GetOrCreateCmdID(tc.clock.PhysicalNow()) // This is the earliest point at which the request has a ClientCmdID and/or // TxnID (if applicable). Begin a Trace which follows this request. trace := tc.tracer.NewTrace(call.Args.Header()) defer trace.Finalize() defer trace.Epoch(fmt.Sprintf("sending %s", call.Method()))() defer func() { if err := call.Reply.Header().GoError(); err != nil { trace.Event(fmt.Sprintf("reply error: %T", err)) } }() ctx = tracer.ToCtx(ctx, trace) // Process batch specially; otherwise, send via wrapped sender. switch args := call.Args.(type) { case *proto.InternalBatchRequest: trace.Event("batch processing") tc.sendBatch(ctx, args, call.Reply.(*proto.InternalBatchResponse)) case *proto.BatchRequest: // Convert the batch request to internal-batch request. internalArgs := &proto.InternalBatchRequest{RequestHeader: args.RequestHeader} internalReply := &proto.InternalBatchResponse{} for i := range args.Requests { internalArgs.Add(args.Requests[i].GetValue().(proto.Request)) } tc.sendBatch(ctx, internalArgs, internalReply) reply := call.Reply.(*proto.BatchResponse) reply.ResponseHeader = internalReply.ResponseHeader // Convert from internal-batch response to batch response. for i := range internalReply.Responses { reply.Add(internalReply.Responses[i].GetValue().(proto.Response)) } default: tc.sendOne(ctx, call) } }
// MaybeWrapCall returns a new call which wraps the original Args and Reply // in a batch, if necessary. // TODO(tschottdorf): will go when proto.Call does. func MaybeWrapCall(call proto.Call) (proto.Call, func(proto.Call) proto.Call) { var unwrap func(proto.Response) proto.Response call.Args, unwrap = MaybeWrap(call.Args) newUnwrap := func(origReply proto.Response) func(proto.Call) proto.Call { return func(newCall proto.Call) proto.Call { origReply.Reset() gogoproto.Merge(origReply, unwrap(newCall.Reply)) *origReply.Header() = *newCall.Reply.Header() newCall.Reply = origReply return newCall } }(call.Reply) call.Reply = call.Args.CreateReply() return call, newUnwrap }
// Send implements the client.Sender interface. It verifies // permissions and looks up the appropriate range based on the // supplied key and sends the RPC according to the specified options. // // If the request spans multiple ranges (which is possible for Scan or // DeleteRange requests), Send sends requests to the individual ranges // sequentially and combines the results transparently. // // This may temporarily adjust the request headers, so the proto.Call // must not be used concurrently until Send has returned. func (ds *DistSender) Send(_ context.Context, call proto.Call) { args := call.Args finalReply := call.Reply // Verify permissions. if err := ds.verifyPermissions(call.Args); err != nil { call.Reply.Header().SetGoError(err) return } // In the event that timestamp isn't set and read consistency isn't // required, set the timestamp using the local clock. if args.Header().ReadConsistency == proto.INCONSISTENT && args.Header().Timestamp.Equal(proto.ZeroTimestamp) { // Make sure that after the call, args hasn't changed. defer func(timestamp proto.Timestamp) { args.Header().Timestamp = timestamp }(args.Header().Timestamp) args.Header().Timestamp = ds.clock.Now() } // If this is a bounded request, we will change its bound as we receive // replies. This undoes that when we return. boundedArgs, argsBounded := args.(proto.Bounded) if argsBounded { defer func(bound int64) { boundedArgs.SetBound(bound) }(boundedArgs.GetBound()) } defer func(key proto.Key) { args.Header().Key = key }(args.Header().Key) // Retry logic for lookup of range by key and RPCs to range replicas. curReply := finalReply for { call.Reply = curReply curReply.Header().Reset() var desc, descNext *proto.RangeDescriptor var err error for r := retry.Start(ds.rpcRetryOptions); r.Next(); { // Get range descriptor (or, when spanning range, descriptors). // sendAttempt below may clear them on certain errors, so we // refresh (likely from the cache) on every retry. desc, descNext, err = ds.getDescriptors(call) // getDescriptors may fail retryably if the first range isn't // available via Gossip. if err != nil { if rErr, ok := err.(util.Retryable); ok && rErr.CanRetry() { if log.V(1) { log.Warning(err) } continue } break } err = func() error { // Truncate the request to our current range, making sure not to // touch it unless we have to (it is illegal to send EndKey on // commands which do not operate on ranges). if descNext != nil { defer func(endKey proto.Key) { args.Header().EndKey = endKey }(args.Header().EndKey) args.Header().EndKey = desc.EndKey } leader := ds.leaderCache.Lookup(proto.RaftID(desc.RaftID)) // Try to send the call. replicas := newReplicaSlice(ds.gossip, desc) // Rearrange the replicas so that those replicas with long common // prefix of attributes end up first. If there's no prefix, this is a // no-op. order := ds.optimizeReplicaOrder(replicas) // If this request needs to go to a leader and we know who that is, move // it to the front. if !(proto.IsRead(args) && args.Header().ReadConsistency == proto.INCONSISTENT) && leader.StoreID > 0 { if i := replicas.FindReplica(leader.StoreID); i >= 0 { replicas.MoveToFront(i) order = rpc.OrderStable } } return ds.sendRPC(desc.RaftID, replicas, order, args, curReply) }() if err != nil { // For an RPC error to occur, we must've been unable to contact any // replicas. In this case, likely all nodes are down (or not getting back // to us within a reasonable amount of time). // We may simply not be trying to talk to the up-to-date replicas, so // clearing the descriptor here should be a good idea. // TODO(tschottdorf): If a replica group goes dead, this will cause clients // to put high read pressure on the first range, so there should be some // rate limiting here. ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc) } else { err = curReply.Header().GoError() } if err != nil { if log.V(1) { log.Warningf("failed to invoke %s: %s", call.Method(), err) } // If retryable, allow retry. For range not found or range // key mismatch errors, we don't backoff on the retry, // but reset the backoff loop so we can retry immediately. switch tErr := err.(type) { case *proto.RangeNotFoundError, *proto.RangeKeyMismatchError: // Range descriptor might be out of date - evict it. ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc) // On addressing errors, don't backoff; retry immediately. r.Reset() if log.V(1) { log.Warning(err) } continue case *proto.NotLeaderError: newLeader := tErr.GetLeader() // Verify that leader is a known replica according to the // descriptor. If not, we've got a stale replica; evict cache. // Next, cache the new leader. if newLeader != nil { if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 { if log.V(1) { log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc) } ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc) } } else { newLeader = &proto.Replica{} } ds.updateLeaderCache(proto.RaftID(desc.RaftID), *newLeader) if log.V(1) { log.Warning(err) } r.Reset() continue case util.Retryable: if tErr.CanRetry() { if log.V(1) { log.Warning(err) } continue } } } break } // Immediately return if querying a range failed non-retryably. // For multi-range requests, we return the failing range's reply. if err != nil { call.Reply.Header().SetGoError(err) return } if finalReply != curReply { // This was the second or later call in a multi-range request. // Combine the new response with the existing one. if cFinalReply, ok := finalReply.(proto.Combinable); ok { cFinalReply.Combine(curReply) } else { // This should never apply in practice, as we'll only end up here // for range-spanning requests. call.Reply.Header().SetGoError(util.Errorf("multi-range request with non-combinable response type")) return } } // If this request has a bound, such as MaxResults in // ScanRequest, check whether enough rows have been retrieved. if argsBounded { if prevBound := boundedArgs.GetBound(); prevBound > 0 { if cReply, ok := curReply.(proto.Countable); ok { if nextBound := prevBound - cReply.Count(); nextBound > 0 { // Update bound for the next round. // We've deferred restoring the original bound earlier. boundedArgs.SetBound(nextBound) } else { // Set flag to break the loop. descNext = nil } } } } // If this was the last range accessed by this call, exit loop. if descNext == nil { break } // In next iteration, query next range. // It's important that we use the EndKey of the current descriptor // as opposed to the StartKey of the next one: if the former is stale, // it's possible that the next range has since merged the subsequent // one, and unless both descriptors are stale, the next descriptor's // StartKey would move us to the beginning of the current range, // resulting in a duplicate scan. args.Header().Key = desc.EndKey // This is a multi-range request, make a new reply object for // subsequent iterations of the loop. curReply = args.CreateReply() } call.Reply = finalReply }
// sendOne sends a single call via the wrapped sender. If the call is // part of a transaction, the TxnCoordSender adds the transaction to a // map of active transactions and begins heartbeating it. Every // subsequent call for the same transaction updates the lastUpdate // timestamp to prevent live transactions from being considered // abandoned and garbage collected. Read/write mutating requests have // their key or key range added to the transaction's interval tree of // key ranges for eventual cleanup via resolved write intents. // // On success, and if the call is part of a transaction, the affected // key range is recorded as live intents for eventual cleanup upon // transaction commit. Upon successful txn commit, initiates cleanup // of intents. func (tc *TxnCoordSender) sendOne(ctx context.Context, call proto.Call) { var startNS int64 header := call.Args.Header() trace := tracer.FromCtx(ctx) var id string // optional transaction ID if header.Txn != nil { // If this call is part of a transaction... id = string(header.Txn.ID) // Verify that if this Transaction is not read-only, we have it on // file. If not, refuse writes - the client must have issued a write on // another coordinator previously. if header.Txn.Writing && proto.IsTransactionWrite(call.Args) { tc.Lock() _, ok := tc.txns[id] tc.Unlock() if !ok { call.Reply.Header().SetGoError(util.Errorf( "transaction must not write on multiple coordinators")) return } } // Set the timestamp to the original timestamp for read-only // commands and to the transaction timestamp for read/write // commands. if proto.IsReadOnly(call.Args) { header.Timestamp = header.Txn.OrigTimestamp } else { header.Timestamp = header.Txn.Timestamp } if args, ok := call.Args.(*proto.EndTransactionRequest); ok { // Remember when EndTransaction started in case we want to // be linearizable. startNS = tc.clock.PhysicalNow() // EndTransaction must have its key set to that of the txn. header.Key = header.Txn.Key if len(args.Intents) > 0 { // TODO(tschottdorf): it may be useful to allow this later. // That would be part of a possible plan to allow txns which // write on multiple coordinators. call.Reply.Header().SetGoError(util.Errorf( "client must not pass intents to EndTransaction")) return } tc.Lock() txnMeta, metaOK := tc.txns[id] if id != "" && metaOK { args.Intents = txnMeta.intents() } tc.Unlock() if !metaOK { // If we don't have the transaction, then this must be a retry // by the client. We can no longer reconstruct a correct // request so we must fail. // // TODO(bdarnell): if we had a GetTransactionStatus API then // we could lookup the transaction and return either nil or // TransactionAbortedError instead of this ambivalent error. call.Reply.Header().SetGoError(util.Errorf( "transaction is already committed or aborted")) return } else if len(args.Intents) == 0 { // If there aren't any intents, then there's factually no // transaction to end. Read-only txns have all of their state in // the client. call.Reply.Header().SetGoError(util.Errorf( "cannot commit a read-only transaction")) return } } } // Send the command through wrapped sender. tc.wrapped.Send(ctx, call) // For transactional calls, need to track & update the transaction. if header.Txn != nil { respHeader := call.Reply.Header() if respHeader.Txn == nil { // When empty, simply use the request's transaction. // This is expected: the Range doesn't bother copying unless the // object changes. respHeader.Txn = gogoproto.Clone(header.Txn).(*proto.Transaction) } tc.updateResponseTxn(header, respHeader) } if txn := call.Reply.Header().Txn; txn != nil { if !header.Txn.Equal(txn) { panic("transaction ID changed") } tc.Lock() txnMeta := tc.txns[id] // If this transactional command leaves transactional intents, add the key // or key range to the intents map. If the transaction metadata doesn't yet // exist, create it. if call.Reply.Header().GoError() == nil { if proto.IsTransactionWrite(call.Args) { if txnMeta == nil { txn.Writing = true trace.Event("coordinator spawns") txnMeta = &txnMetadata{ txn: *txn, keys: cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}), firstUpdateNanos: tc.clock.PhysicalNow(), lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } tc.txns[id] = txnMeta if !tc.stopper.RunAsyncTask(func() { tc.heartbeatLoop(id) }) { // The system is already draining and we can't start the // heartbeat. We refuse new transactions for now because // they're likely not going to have all intents committed. // In principle, we can relax this as needed though. call.Reply.Header().SetGoError(&proto.NodeUnavailableError{}) tc.Unlock() tc.unregisterTxn(id) return } } txnMeta.addKeyRange(header.Key, header.EndKey) } // Update our record of this transaction. if txnMeta != nil { txnMeta.txn = *txn txnMeta.setLastUpdate(tc.clock.PhysicalNow()) } } tc.Unlock() } // Cleanup intents and transaction map if end of transaction. switch t := call.Reply.Header().GoError().(type) { case *proto.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider it dead. tc.cleanupTxn(trace, t.Txn) case *proto.TransactionAbortedError: // If already aborted, cleanup the txn on this TxnCoordSender. tc.cleanupTxn(trace, t.Txn) case *proto.OpRequiresTxnError: // Run a one-off transaction with that single command. if log.V(1) { log.Infof("%s: auto-wrapping in txn and re-executing", call.Method()) } // TODO(tschottdorf): this part is awkward. Consider resending here // without starting a new call, which is hard to trace. Plus, the // below depends on default configuration. tmpDB, err := client.Open( fmt.Sprintf("//%s?priority=%d", call.Args.Header().User, call.Args.Header().GetUserPriority()), client.SenderOpt(tc)) if err != nil { log.Warning(err) return } call.Reply.Reset() if err := tmpDB.Txn(func(txn *client.Txn) error { txn.SetDebugName("auto-wrap", 0) b := &client.Batch{} b.InternalAddCall(call) return txn.CommitInBatch(b) }); err != nil { log.Warning(err) } case nil: if txn := call.Reply.Header().Txn; txn != nil { if _, ok := call.Args.(*proto.EndTransactionRequest); ok { // If the --linearizable flag is set, we want to make sure that // all the clocks in the system are past the commit timestamp // of the transaction. This is guaranteed if either // - the commit timestamp is MaxOffset behind startNS // - MaxOffset ns were spent in this function // when returning to the client. Below we choose the option // that involves less waiting, which is likely the first one // unless a transaction commits with an odd timestamp. if tsNS := txn.Timestamp.WallTime; startNS > tsNS { startNS = tsNS } sleepNS := tc.clock.MaxOffset() - time.Duration(tc.clock.PhysicalNow()-startNS) if tc.linearizable && sleepNS > 0 { defer func() { if log.V(1) { log.Infof("%v: waiting %s on EndTransaction for linearizability", txn.Short(), util.TruncateDuration(sleepNS, time.Millisecond)) } time.Sleep(sleepNS) }() } if txn.Status != proto.PENDING { tc.cleanupTxn(trace, *txn) } } } } }
// resolve sends resolve intent commands for all key ranges this transaction // has covered. Any keys listed in the resolved slice have already been // resolved and are skipped. func (tm *txnMetadata) resolve(trace *tracer.Trace, resolved []proto.Key, sender client.Sender) { txn := &tm.txn if tm.keys.Len() > 0 { if log.V(2) { log.Infof("cleaning up %d intent(s) for transaction %s", tm.keys.Len(), txn) } } // TODO(tschottdorf): Should create a Batch here. However, we're resolving // intents and if those are on meta records, there may be a certain order // in which they need to be resolved so that they can get routed to the // correct range. Since a batch runs its commands one by one and we don't // know the correct order, we prefer to fire them off in parallel. var wg sync.WaitGroup for _, o := range tm.keys.GetOverlaps(proto.KeyMin, proto.KeyMax) { // If the op was range based, end key != start key: resolve a range. var call proto.Call key := o.Key.Start().(proto.Key) endKey := o.Key.End().(proto.Key) if !key.Next().Equal(endKey) { call.Args = &proto.InternalResolveIntentRangeRequest{ RequestHeader: proto.RequestHeader{ Timestamp: txn.Timestamp, Key: key, EndKey: endKey, User: security.RootUser, Txn: txn, }, } call.Reply = &proto.InternalResolveIntentRangeResponse{} } else { // Check if the key has already been resolved; skip if yes. found := false for _, k := range resolved { if key.Equal(k) { if log.V(2) { log.Warningf("skipping previously resolved intent at %q", k) } found = true } } if found { continue } call.Args = &proto.InternalResolveIntentRequest{ RequestHeader: proto.RequestHeader{ Timestamp: txn.Timestamp, Key: key, User: security.RootUser, Txn: txn, }, } call.Reply = &proto.InternalResolveIntentResponse{} } ctx := tracer.ToCtx(context.Background(), trace.Fork()) if log.V(2) { log.Infof("cleaning up intent %q for txn %s", call.Args.Header().Key, txn) } // Each operation gets their own goroutine. We only want to return to // the caller after the operations have finished. wg.Add(1) go func() { sender.Send(ctx, call) wg.Done() if call.Reply.Header().Error != nil { log.Warningf("failed to cleanup %q intent: %s", call.Args.Header().Key, call.Reply.Header().GoError()) } }() } defer trace.Epoch("waiting for intent resolution")() wg.Wait() tm.keys.Clear() }
// Send implements the client.Sender interface. It verifies // permissions and looks up the appropriate range based on the // supplied key and sends the RPC according to the specified options. // // If the request spans multiple ranges (which is possible for Scan or // DeleteRange requests), Send sends requests to the individual ranges // sequentially and combines the results transparently. // // This may temporarily adjust the request headers, so the proto.Call // must not be used concurrently until Send has returned. func (ds *DistSender) Send(_ context.Context, call proto.Call) { args := call.Args finalReply := call.Reply endKey := args.Header().EndKey // Verify permissions. if err := ds.verifyPermissions(call.Args); err != nil { call.Reply.Header().SetGoError(err) return } // In the event that timestamp isn't set and read consistency isn't // required, set the timestamp using the local clock. if args.Header().ReadConsistency == proto.INCONSISTENT && args.Header().Timestamp.Equal(proto.ZeroTimestamp) { // Make sure that after the call, args hasn't changed. defer func(timestamp proto.Timestamp) { args.Header().Timestamp = timestamp }(args.Header().Timestamp) args.Header().Timestamp = ds.clock.Now() } // If this is a bounded request, we will change its bound as we receive // replies. This undoes that when we return. boundedArgs, _ := args.(proto.Bounded) if boundedArgs != nil { defer func(n int64) { boundedArgs.SetBound(n) }(boundedArgs.GetBound()) } // Retry logic for lookup of range by key and RPCs to range replicas. retryOpts := ds.rpcRetryOptions retryOpts.Tag = "routing " + call.Method().String() + " rpc" curReply := finalReply for { call.Reply = curReply curReply.Header().Reset() var desc, descNext *proto.RangeDescriptor err := retry.WithBackoff(retryOpts, func() (retry.Status, error) { var err error // Get range descriptor (or, when spanning range, descriptors). // sendAttempt below may clear them on certain errors, so we // refresh (likely from the cache) on every retry. desc, descNext, err = ds.getDescriptors(call) // getDescriptors may fail retryably if the first range isn't // available via Gossip. if err != nil { if rErr, ok := err.(util.Retryable); ok && rErr.CanRetry() { return retry.Continue, err } return retry.Break, err } // Truncate the request to our current range, making sure not to // touch it unless we have to (it is illegal to send EndKey on // commands which do not operate on ranges). if descNext != nil { args.Header().EndKey = desc.EndKey defer func() { // "Untruncate" EndKey to original. args.Header().EndKey = endKey }() } return ds.sendAttempt(desc, call) }) // Immediately return if querying a range failed non-retryably. // For multi-range requests, we return the failing range's reply. if err != nil { call.Reply.Header().SetGoError(err) return } if finalReply != curReply { // This was the second or later call in a multi-range request. // Combine the new response with the existing one. if cFinalReply, ok := finalReply.(proto.Combinable); ok { cFinalReply.Combine(curReply) } else { // This should never apply in practice, as we'll only end up here // for range-spanning requests. call.Reply.Header().SetGoError(util.Errorf("multi-range request with non-combinable response type")) return } } // If this request has a bound, such as MaxResults in // ScanRequest, check whether enough rows have been retrieved. if boundedArgs != nil { if prevBound := boundedArgs.GetBound(); prevBound > 0 { if cReply, ok := curReply.(proto.Countable); ok { if nextBound := prevBound - cReply.Count(); nextBound > 0 { // Update bound for the next round. // We've deferred restoring the original bound earlier. boundedArgs.SetBound(nextBound) } else { // Set flag to break the loop. descNext = nil } } } } // If this was the last range accessed by this call, exit loop. if descNext == nil { break } if curReply == finalReply { // This is the end of the first iteration in a multi-range query, // so it's a convenient place to clean up changes to the args in // the case of multi-range requests. // Reset original start key (the EndKey is taken care of without // defer above). defer func(k proto.Key) { args.Header().Key = k }(args.Header().Key) } // In next iteration, query next range. args.Header().Key = descNext.StartKey // This is a multi-range request, make a new reply object for // subsequent iterations of the loop. curReply = args.CreateReply() } call.Reply = finalReply }
// Send sends call to Cockroach via an HTTP post. HTTP response codes // which are retryable are retried with backoff in a loop using the // default retry options. func (s *httpSender) Send(_ context.Context, call proto.Call) { if err := HTTPPost(s.ctx, call.Args, call.Reply, call.Method()); err != nil { call.Reply.Header().SetGoError(err) } }
// sendAttempt is invoked by Send and handles retry logic and cache eviction // for a call sent to a single range. It returns a retry status, which is Break // on success and either Break, Continue or Reset depending on error condition. // This method is expected to be invoked from within a backoff / retry loop to // retry the send repeatedly (e.g. to continue processing after a critical node // becomes available after downtime or the range descriptor is refreshed via // lookup). func (ds *DistSender) sendAttempt(desc *proto.RangeDescriptor, call proto.Call) (retry.Status, error) { leader := ds.leaderCache.Lookup(proto.RaftID(desc.RaftID)) // Try to send the call. replicas := newReplicaSlice(ds.gossip, desc) // Rearrange the replicas so that those replicas with long common // prefix of attributes end up first. If there's no prefix, this is a // no-op. order := ds.optimizeReplicaOrder(replicas) args := call.Args reply := call.Reply // If this request needs to go to a leader and we know who that is, move // it to the front. if !(proto.IsRead(args) && args.Header().ReadConsistency == proto.INCONSISTENT) && leader.StoreID > 0 { if i := replicas.FindReplica(leader.StoreID); i >= 0 { replicas.MoveToFront(i) order = rpc.OrderStable } } err := ds.sendRPC(desc.RaftID, replicas, order, args, reply) if err != nil { // For an RPC error to occur, we must've been unable to contact any // replicas. In this case, likely all nodes are down (or not getting back // to us within a reasonable amount of time). // We may simply not be trying to talk to the up-to-date replicas, so // clearing the descriptor here should be a good idea. // TODO(tschottdorf): If a replica group goes dead, this will cause clients // to put high read pressure on the first range, so there should be some // rate limiting here. ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc) } else { err = reply.Header().GoError() } if err != nil { if log.V(1) { log.Warningf("failed to invoke %s: %s", call.Method(), err) } // If retryable, allow retry. For range not found or range // key mismatch errors, we don't backoff on the retry, // but reset the backoff loop so we can retry immediately. switch tErr := err.(type) { case *proto.RangeNotFoundError, *proto.RangeKeyMismatchError: // Range descriptor might be out of date - evict it. ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc) // On addressing errors, don't backoff; retry immediately. return retry.Reset, err case *proto.NotLeaderError: newLeader := tErr.GetLeader() // Verify that leader is a known replica according to the // descriptor. If not, we've got a stale replica; evict cache. // Next, cache the new leader. if newLeader != nil { if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 { if log.V(1) { log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc) } ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc) } } else { newLeader = &proto.Replica{} } ds.updateLeaderCache(proto.RaftID(desc.RaftID), *newLeader) return retry.Reset, err case util.Retryable: if tErr.CanRetry() { return retry.Continue, err } } return retry.Break, err } return retry.Break, nil }
// Send implements the client.Sender interface. It verifies // permissions and looks up the appropriate range based on the // supplied key and sends the RPC according to the specified options. // // If the request spans multiple ranges (which is possible for Scan or // DeleteRange requests), Send sends requests to the individual ranges // sequentially and combines the results transparently. // // This may temporarily adjust the request headers, so the proto.Call // must not be used concurrently until Send has returned. func (ds *DistSender) Send(ctx context.Context, call proto.Call) { args := call.Args // Verify permissions. if err := ds.verifyPermissions(call.Args); err != nil { call.Reply.Header().SetGoError(err) return } trace := tracer.FromCtx(ctx) // In the event that timestamp isn't set and read consistency isn't // required, set the timestamp using the local clock. if args.Header().ReadConsistency == proto.INCONSISTENT && args.Header().Timestamp.Equal(proto.ZeroTimestamp) { // Make sure that after the call, args hasn't changed. defer func(timestamp proto.Timestamp) { args.Header().Timestamp = timestamp }(args.Header().Timestamp) args.Header().Timestamp = ds.clock.Now() } // If this is a bounded request, we will change its bound as we receive // replies. This undoes that when we return. boundedArgs, argsBounded := args.(proto.Bounded) if argsBounded { defer func(bound int64) { boundedArgs.SetBound(bound) }(boundedArgs.GetBound()) } _, isReverseScan := call.Args.(*proto.ReverseScanRequest) // Restore to the original range if the scan/reverse_scan crosses range boundaries. if isReverseScan { defer func(key proto.Key) { args.Header().EndKey = key }(args.Header().EndKey) } else { defer func(key proto.Key) { args.Header().Key = key }(args.Header().Key) } first := true // Retry logic for lookup of range by key and RPCs to range replicas. for { var curReply proto.Response var desc, descNext *proto.RangeDescriptor var err error for r := retry.Start(ds.rpcRetryOptions); r.Next(); { // Get range descriptor (or, when spanning range, descriptors). Our // error handling below may clear them on certain errors, so we // refresh (likely from the cache) on every retry. descDone := trace.Epoch("meta descriptor lookup") // It is safe to pass call here (with its embedded reply) because // the reply is only used to check that it implements // proto.Combinable if the request spans multiple ranges. desc, descNext, err = ds.getDescriptors(call) descDone() // getDescriptors may fail retryably if the first range isn't // available via Gossip. if err != nil { if rErr, ok := err.(retry.Retryable); ok && rErr.CanRetry() { if log.V(1) { log.Warning(err) } continue } break } // At this point reply.Header().Error may be non-nil! curReply, err = ds.sendAttempt(trace, args, desc) descKey := args.Header().Key if isReverseScan { descKey = args.Header().EndKey } if err != nil { trace.Event(fmt.Sprintf("send error: %T", err)) // For an RPC error to occur, we must've been unable to contact any // replicas. In this case, likely all nodes are down (or not getting back // to us within a reasonable amount of time). // We may simply not be trying to talk to the up-to-date replicas, so // clearing the descriptor here should be a good idea. // TODO(tschottdorf): If a replica group goes dead, this will cause clients // to put high read pressure on the first range, so there should be some // rate limiting here. ds.rangeCache.EvictCachedRangeDescriptor(descKey, desc, isReverseScan) } else { err = curReply.Header().GoError() } if err == nil { break } if log.V(1) { log.Warningf("failed to invoke %s: %s", call.Method(), err) } // If retryable, allow retry. For range not found or range // key mismatch errors, we don't backoff on the retry, // but reset the backoff loop so we can retry immediately. switch tErr := err.(type) { case *proto.RangeNotFoundError, *proto.RangeKeyMismatchError: trace.Event(fmt.Sprintf("reply error: %T", err)) // Range descriptor might be out of date - evict it. ds.rangeCache.EvictCachedRangeDescriptor(descKey, desc, isReverseScan) // On addressing errors, don't backoff; retry immediately. r.Reset() if log.V(1) { log.Warning(err) } continue case *proto.NotLeaderError: trace.Event(fmt.Sprintf("reply error: %T", err)) newLeader := tErr.GetLeader() // Verify that leader is a known replica according to the // descriptor. If not, we've got a stale replica; evict cache. // Next, cache the new leader. if newLeader != nil { if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 { if log.V(1) { log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc) } ds.rangeCache.EvictCachedRangeDescriptor(descKey, desc, isReverseScan) } } else { newLeader = &proto.Replica{} } ds.updateLeaderCache(proto.RangeID(desc.RangeID), *newLeader) if log.V(1) { log.Warning(err) } r.Reset() continue case retry.Retryable: if tErr.CanRetry() { if log.V(1) { log.Warning(err) } trace.Event(fmt.Sprintf("reply error: %T", err)) continue } } break } // Immediately return if querying a range failed non-retryably. // For multi-range requests, we return the failing range's reply. if err != nil { call.Reply.Header().SetGoError(err) return } if first { // Equivalent of `*call.Reply = curReply`. Generics! dst := reflect.ValueOf(call.Reply).Elem() dst.Set(reflect.ValueOf(curReply).Elem()) } else { // This was the second or later call in a multi-range request. // Combine the new response with the existing one. if cReply, ok := call.Reply.(proto.Combinable); ok { cReply.Combine(curReply) } else { // This should never apply in practice, as we'll only end up here // for range-spanning requests. call.Reply.Header().SetGoError(util.Errorf("multi-range request with non-combinable response type")) return } } first = false // If this request has a bound, such as MaxResults in // ScanRequest, check whether enough rows have been retrieved. if argsBounded { if prevBound := boundedArgs.GetBound(); prevBound > 0 { if cReply, ok := curReply.(proto.Countable); ok { if nextBound := prevBound - cReply.Count(); nextBound > 0 { // Update bound for the next round. // We've deferred restoring the original bound earlier. boundedArgs.SetBound(nextBound) } else { // Set flag to break the loop. descNext = nil } } } } // If this was the last range accessed by this call, exit loop. if descNext == nil { break } if isReverseScan { // In next iteration, query previous range. // We use the StartKey of the current descriptor as opposed to the // EndKey of the previous one. args.Header().EndKey = desc.StartKey } else { // In next iteration, query next range. // It's important that we use the EndKey of the current descriptor // as opposed to the StartKey of the next one: if the former is stale, // it's possible that the next range has since merged the subsequent // one, and unless both descriptors are stale, the next descriptor's // StartKey would move us to the beginning of the current range, // resulting in a duplicate scan. args.Header().Key = desc.EndKey } trace.Event("querying next range") } }
// sendOne sends a single call via the wrapped sender. If the call is // part of a transaction, the TxnCoordSender adds the transaction to a // map of active transactions and begins heartbeating it. Every // subsequent call for the same transaction updates the lastUpdate // timestamp to prevent live transactions from being considered // abandoned and garbage collected. Read/write mutating requests have // their key or key range added to the transaction's interval tree of // key ranges for eventual cleanup via resolved write intents. // // On success, and if the call is part of a transaction, the affected // key range is recorded as live intents for eventual cleanup upon // transaction commit. Upon successful txn commit, initiates cleanup // of intents. func (tc *TxnCoordSender) sendOne(call proto.Call) { var startNS int64 header := call.Args.Header() // If this call is part of a transaction... if header.Txn != nil { // Set the timestamp to the original timestamp for read-only // commands and to the transaction timestamp for read/write // commands. if proto.IsReadOnly(call.Args) { header.Timestamp = header.Txn.OrigTimestamp } else { header.Timestamp = header.Txn.Timestamp } // EndTransaction must have its key set to that of the txn. if _, ok := call.Args.(*proto.EndTransactionRequest); ok { header.Key = header.Txn.Key // Remember when EndTransaction started in case we want to // be linearizable. startNS = tc.clock.PhysicalNow() } } // Send the command through wrapped sender. tc.wrapped.Send(context.TODO(), call) if header.Txn != nil { // If not already set, copy the request txn. if call.Reply.Header().Txn == nil { call.Reply.Header().Txn = gogoproto.Clone(header.Txn).(*proto.Transaction) } tc.updateResponseTxn(header, call.Reply.Header()) } if txn := call.Reply.Header().Txn; txn != nil { tc.Lock() txnMeta := tc.txns[string(txn.ID)] // If this transactional command leaves transactional intents, add the key // or key range to the intents map. If the transaction metadata doesn't yet // exist, create it. if call.Reply.Header().GoError() == nil { if proto.IsTransactionWrite(call.Args) { if txnMeta == nil { txnMeta = &txnMetadata{ txn: *txn, keys: cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}), firstUpdateNanos: tc.clock.PhysicalNow(), lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } id := string(txn.ID) tc.txns[id] = txnMeta tc.heartbeat(id) } txnMeta.addKeyRange(header.Key, header.EndKey) } // Update our record of this transaction. if txnMeta != nil { txnMeta.txn = *txn txnMeta.setLastUpdate(tc.clock.PhysicalNow()) } } tc.Unlock() } // Cleanup intents and transaction map if end of transaction. switch t := call.Reply.Header().GoError().(type) { case *proto.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider it dead. tc.cleanupTxn(t.Txn, nil) case *proto.TransactionAbortedError: // If already aborted, cleanup the txn on this TxnCoordSender. tc.cleanupTxn(t.Txn, nil) case *proto.OpRequiresTxnError: // Run a one-off transaction with that single command. if log.V(1) { log.Infof("%s: auto-wrapping in txn and re-executing", call.Method()) } tmpDB, err := client.Open( fmt.Sprintf("//%s?priority=%d", call.Args.Header().User, call.Args.Header().GetUserPriority()), client.SenderOpt(tc)) if err != nil { log.Warning(err) return } call.Reply.Reset() if err := tmpDB.Txn(func(txn *client.Txn) error { txn.SetDebugName("auto-wrap") b := &client.Batch{} b.InternalAddCall(call) return txn.Commit(b) }); err != nil { log.Warning(err) } case nil: var resolved []proto.Key if txn := call.Reply.Header().Txn; txn != nil { if _, ok := call.Args.(*proto.EndTransactionRequest); ok { // If the --linearizable flag is set, we want to make sure that // all the clocks in the system are past the commit timestamp // of the transaction. This is guaranteed if either // - the commit timestamp is MaxOffset behind startNS // - MaxOffset ns were spent in this function // when returning to the client. Below we choose the option // that involves less waiting, which is likely the first one // unless a transaction commits with an odd timestamp. if tsNS := txn.Timestamp.WallTime; startNS > tsNS { startNS = tsNS } sleepNS := tc.clock.MaxOffset() - time.Duration(tc.clock.PhysicalNow()-startNS) if tc.linearizable && sleepNS > 0 { defer func() { if log.V(1) { log.Infof("%v: waiting %s on EndTransaction for linearizability", txn.Short(), util.TruncateDuration(sleepNS, time.Millisecond)) } time.Sleep(sleepNS) }() } resolved = call.Reply.(*proto.EndTransactionResponse).Resolved if txn.Status != proto.PENDING { tc.cleanupTxn(*txn, resolved) } } } } }
// close sends resolve intent commands for all key ranges this // transaction has covered, clears the keys cache and closes the // metadata heartbeat. Any keys listed in the resolved slice have // already been resolved and do not receive resolve intent commands. func (tm *txnMetadata) close(txn *proto.Transaction, resolved []proto.Key, sender client.Sender, stopper *util.Stopper) { close(tm.txnEnd) // stop heartbeat if tm.keys.Len() > 0 { if log.V(2) { log.Infof("cleaning up %d intent(s) for transaction %s", tm.keys.Len(), txn) } } for _, o := range tm.keys.GetOverlaps(proto.KeyMin, proto.KeyMax) { // If the op was range based, end key != start key: resolve a range. var call proto.Call key := o.Key.Start().(proto.Key) endKey := o.Key.End().(proto.Key) if !key.Next().Equal(endKey) { call.Args = &proto.InternalResolveIntentRangeRequest{ RequestHeader: proto.RequestHeader{ Timestamp: txn.Timestamp, Key: key, EndKey: endKey, User: storage.UserRoot, Txn: txn, }, } call.Reply = &proto.InternalResolveIntentRangeResponse{} } else { // Check if the key has already been resolved; skip if yes. found := false for _, k := range resolved { if key.Equal(k) { found = true } } if found { continue } call.Args = &proto.InternalResolveIntentRequest{ RequestHeader: proto.RequestHeader{ Timestamp: txn.Timestamp, Key: key, User: storage.UserRoot, Txn: txn, }, } call.Reply = &proto.InternalResolveIntentResponse{} } // We don't care about the reply channel; these are best // effort. We simply fire and forget, each in its own goroutine. if stopper.StartTask() { go func() { if log.V(2) { log.Infof("cleaning up intent %q for txn %s", call.Args.Header().Key, txn) } sender.Send(context.TODO(), call) if call.Reply.Header().Error != nil { log.Warningf("failed to cleanup %q intent: %s", call.Args.Header().Key, call.Reply.Header().GoError()) } stopper.FinishTask() }() } } tm.keys.Clear() }
// close sends resolve intent commands for all key ranges this // transaction has covered, clears the keys cache and closes the // metadata heartbeat. Any keys listed in the resolved slice have // already been resolved and do not receive resolve intent commands. func (tm *txnMetadata) close(trace *tracer.Trace, txn *proto.Transaction, resolved []proto.Key, sender client.Sender, stopper *stop.Stopper) { close(tm.txnEnd) // stop heartbeat trace.Event("coordinator stops") if tm.keys.Len() > 0 { if log.V(2) { log.Infof("cleaning up %d intent(s) for transaction %s", tm.keys.Len(), txn) } } // TODO(tschottdorf): Should create a Batch here. for _, o := range tm.keys.GetOverlaps(proto.KeyMin, proto.KeyMax) { // If the op was range based, end key != start key: resolve a range. var call proto.Call key := o.Key.Start().(proto.Key) endKey := o.Key.End().(proto.Key) if !key.Next().Equal(endKey) { call.Args = &proto.InternalResolveIntentRangeRequest{ RequestHeader: proto.RequestHeader{ Timestamp: txn.Timestamp, Key: key, EndKey: endKey, User: security.RootUser, Txn: txn, }, } call.Reply = &proto.InternalResolveIntentRangeResponse{} } else { // Check if the key has already been resolved; skip if yes. found := false for _, k := range resolved { if key.Equal(k) { found = true } } if found { continue } call.Args = &proto.InternalResolveIntentRequest{ RequestHeader: proto.RequestHeader{ Timestamp: txn.Timestamp, Key: key, User: security.RootUser, Txn: txn, }, } call.Reply = &proto.InternalResolveIntentResponse{} } // We don't care about the reply channel; these are best // effort. We simply fire and forget, each in its own goroutine. ctx := tracer.ToCtx(context.Background(), trace.Fork()) stopper.RunAsyncTask(func() { if log.V(2) { log.Infof("cleaning up intent %q for txn %s", call.Args.Header().Key, txn) } sender.Send(ctx, call) if call.Reply.Header().Error != nil { log.Warningf("failed to cleanup %q intent: %s", call.Args.Header().Key, call.Reply.Header().GoError()) } }) } tm.keys.Clear() }