// sendOne invokes the specified RPC on the supplied client when the // client is ready. On success, the reply is sent on the channel; // otherwise an error is sent. // // Do not call directly, but instead use sendOneFn. Tests mock out this method // via sendOneFn in order to test various error cases. func sendOne(client *batchClient, timeout time.Duration, context *rpc.Context, trace opentracing.Span, done chan *netrpc.Call) { const method = "Node.Batch" addr := client.RemoteAddr() args := &client.args args.Replica = client.replica.ReplicaDescriptor if log.V(2) { log.Infof("sending request to %s: %+v", addr, args) } trace.LogEvent(fmt.Sprintf("sending to %s", addr)) if enableLocalCalls && context.LocalServer != nil && addr.String() == context.LocalAddr { if context.LocalServer.LocalCall(method, args, done) { return } } reply := &roachpb.BatchResponse{} // Don't bother firing off a goroutine in the common case where a client // is already healthy. select { case <-client.Healthy(): client.Go(method, args, reply, done) return default: } go func() { var timeoutChan <-chan time.Time if timeout != 0 { timeoutChan = time.After(timeout) } select { case <-client.Healthy(): client.Go(method, args, reply, done) case <-client.Closed: done <- &netrpc.Call{Error: newRPCError( util.Errorf("rpc to %s failed as client connection was closed", method))} case <-timeoutChan: done <- &netrpc.Call{Error: newRPCError( util.Errorf("rpc to %s: client not ready after %s", method, timeout))} } }() }
// cleanupTxn is called when a transaction ends. The transaction record is // updated and the heartbeat goroutine signaled to clean up the transaction // gracefully. func (tc *TxnCoordSender) cleanupTxn(trace opentracing.Span, txn roachpb.Transaction) { trace.LogEvent("coordinator stops") tc.Lock() defer tc.Unlock() txnMeta, ok := tc.txns[*txn.ID] // The heartbeat might've already removed the record. if !ok { return } // The supplied txn may be newer than the one in txnMeta, which is relevant // for stats. txnMeta.txn = txn // Trigger heartbeat shutdown. close(txnMeta.txnEnd) txnMeta.txnEnd = nil }
// sendSingleRange gathers and rearranges the replicas, and makes an RPC call. func (ds *DistSender) sendSingleRange(trace opentracing.Span, ba roachpb.BatchRequest, desc *roachpb.RangeDescriptor) (*roachpb.BatchResponse, *roachpb.Error) { trace.LogEvent(fmt.Sprintf("sending RPC to [%s, %s)", desc.StartKey, desc.EndKey)) leader := ds.leaderCache.Lookup(roachpb.RangeID(desc.RangeID)) // Try to send the call. replicas := newReplicaSlice(ds.gossip, desc) // Rearrange the replicas so that those replicas with long common // prefix of attributes end up first. If there's no prefix, this is a // no-op. order := ds.optimizeReplicaOrder(replicas) // If this request needs to go to a leader and we know who that is, move // it to the front. if !(ba.IsReadOnly() && ba.ReadConsistency == roachpb.INCONSISTENT) && leader.StoreID > 0 { if i := replicas.FindReplica(leader.StoreID); i >= 0 { replicas.MoveToFront(i) order = orderStable } } // Increase the sequence counter in the per-range loop (not // outside) since we might hit the same range twice by // accident. For example, we might send multiple requests to // the same Replica if (1) the descriptor cache has post-split // descriptors that are still write intents and (2) the split // has not yet been completed. ba.SetNewRequest() // TODO(tschottdorf): should serialize the trace here, not higher up. br, pErr := ds.sendRPC(trace, desc.RangeID, replicas, order, ba) if pErr != nil { return nil, pErr } // Untangle the error from the received response. pErr = br.Error br.Error = nil // scrub the response error return br, pErr }
func (tc *TxnCoordSender) heartbeat(txnID uuid.UUID, trace opentracing.Span, ctx context.Context) bool { tc.Lock() proceed := true txnMeta := tc.txns[txnID] var intentSpans []roachpb.Span // Before we send a heartbeat, determine whether this transaction // should be considered abandoned. If so, exit heartbeat. if txnMeta.hasClientAbandonedCoord(tc.clock.PhysicalNow()) { // TODO(tschottdorf): should we be more proactive here? // The client might be continuing the transaction // through another coordinator, but in the most likely // case it's just gone and the open transaction record // could block concurrent operations. if log.V(1) { log.Infof("transaction %s abandoned; stopping heartbeat", txnMeta.txn) } proceed = false // Grab the intents here to avoid potential race. intentSpans = collectIntentSpans(txnMeta.keys) txnMeta.keys.Clear() } // txnMeta.txn is possibly replaced concurrently, // so grab a copy before unlocking. txn := txnMeta.txn.Clone() tc.Unlock() ba := roachpb.BatchRequest{} ba.Txn = &txn if !proceed { // Actively abort the transaction and its intents since we assume it's abandoned. et := &roachpb.EndTransactionRequest{ Span: roachpb.Span{ Key: txn.Key, }, Commit: false, IntentSpans: intentSpans, } ba.Add(et) tc.stopper.RunAsyncTask(func() { // Use the wrapped sender since the normal Sender // does not allow clients to specify intents. // TODO(tschottdorf): not using the existing context here since that // leads to use-after-finish of the contained trace. Should fork off // before the goroutine. if _, pErr := tc.wrapped.Send(context.Background(), ba); pErr != nil { if log.V(1) { log.Warningf("abort due to inactivity failed for %s: %s ", txn, pErr) } } }) return false } hb := &roachpb.HeartbeatTxnRequest{ Now: tc.clock.Now(), } hb.Key = txn.Key ba.Add(hb) trace.LogEvent("heartbeat") _, err := tc.wrapped.Send(ctx, ba) // If the transaction is not in pending state, then we can stop // the heartbeat. It's either aborted or committed, and we resolve // write intents accordingly. if err != nil { log.Warningf("heartbeat to %s failed: %s", txn, err) } // TODO(bdarnell): once we have gotten a heartbeat response with // Status != PENDING, future heartbeats are useless. However, we // need to continue the heartbeatLoop until the client either // commits or abandons the transaction. We could save a little // pointless work by restructuring this loop to stop sending // heartbeats between the time that the transaction is aborted and // the client finds out. Furthermore, we could use this information // to send TransactionAbortedErrors to the client so it can restart // immediately instead of running until its EndTransaction. return true }