// requestLeaseLocked executes a request to obtain or extend a lease // asynchronously and returns a channel on which the result will be posted. If // there's already a request in progress, we join in waiting for the results of // that request. Unless an error is returned, the obtained lease will be valid // for a time interval containing the requested timestamp. // If a transfer is in progress, a NotLeaseHolderError directing to the recipient is // sent on the returned chan. func (r *Replica) requestLeaseLocked(timestamp hlc.Timestamp) <-chan *roachpb.Error { if r.store.TestingKnobs().LeaseRequestEvent != nil { r.store.TestingKnobs().LeaseRequestEvent(timestamp) } // Propose a Raft command to get a lease for this replica. repDesc, err := r.getReplicaDescriptorLocked() if err != nil { llChan := make(chan *roachpb.Error, 1) llChan <- roachpb.NewError(err) return llChan } if transferLease, ok := r.mu.pendingLeaseRequest.TransferInProgress( repDesc.ReplicaID); ok { llChan := make(chan *roachpb.Error, 1) llChan <- roachpb.NewError( newNotLeaseHolderError(&transferLease, r.store.StoreID(), r.mu.state.Desc)) return llChan } if r.store.IsDrainingLeases() { // We've retired from active duty. llChan := make(chan *roachpb.Error, 1) llChan <- roachpb.NewError(newNotLeaseHolderError(nil, r.store.StoreID(), r.mu.state.Desc)) return llChan } return r.mu.pendingLeaseRequest.InitOrJoinRequest( r, repDesc, timestamp, r.mu.state.Desc.StartKey.AsRawKey(), false /* transfer */) }
func (ds *DistSender) deduceRetryEarlyExitError(ctx context.Context) *roachpb.Error { select { case <-ds.rpcRetryOptions.Closer: // Typically happens during shutdown. return roachpb.NewError(&roachpb.NodeUnavailableError{}) case <-ctx.Done(): // Happens when the client request is cancelled. return roachpb.NewError(ctx.Err()) default: } return nil }
// Send implements the client.Sender interface. The store is looked up from the // store map if specified by the request; otherwise, the command is being // executed locally, and the replica is determined via lookup through each // store's LookupRange method. The latter path is taken only by unit tests. func (ls *Stores) Send( ctx context.Context, ba roachpb.BatchRequest, ) (*roachpb.BatchResponse, *roachpb.Error) { // If we aren't given a Replica, then a little bending over // backwards here. This case applies exclusively to unittests. if ba.RangeID == 0 || ba.Replica.StoreID == 0 { rs, err := keys.Range(ba) if err != nil { return nil, roachpb.NewError(err) } rangeID, repDesc, err := ls.LookupReplica(rs.Key, rs.EndKey) if err != nil { return nil, roachpb.NewError(err) } ba.RangeID = rangeID ba.Replica = repDesc } store, err := ls.GetStore(ba.Replica.StoreID) if err != nil { return nil, roachpb.NewError(err) } if ba.Txn != nil { // For calls that read data within a txn, we keep track of timestamps // observed from the various participating nodes' HLC clocks. If we have // a timestamp on file for this Node which is smaller than MaxTimestamp, // we can lower MaxTimestamp accordingly. If MaxTimestamp drops below // OrigTimestamp, we effectively can't see uncertainty restarts any // more. // Note that it's not an issue if MaxTimestamp propagates back out to // the client via a returned Transaction update - when updating a Txn // from another, the larger MaxTimestamp wins. if maxTS, ok := ba.Txn.GetObservedTimestamp(ba.Replica.NodeID); ok && maxTS.Less(ba.Txn.MaxTimestamp) { // Copy-on-write to protect others we might be sharing the Txn with. shallowTxn := *ba.Txn // The uncertainty window is [OrigTimestamp, maxTS), so if that window // is empty, there won't be any uncertainty restarts. if !ba.Txn.OrigTimestamp.Less(maxTS) { log.Event(ctx, "read has no clock uncertainty") } shallowTxn.MaxTimestamp.Backward(maxTS) ba.Txn = &shallowTxn } } br, pErr := store.Send(ctx, ba) if br != nil && br.Error != nil { panic(roachpb.ErrorUnexpectedlySet(store, br)) } return br, pErr }
// TODO(tschottdorf): this method is somewhat awkward but unless we want to // give this error back to the client, our options are limited. We'll have to // run the whole thing for them, or any restart will still end up at the client // which will not be prepared to be handed a Txn. func (tc *TxnCoordSender) resendWithTxn( ba roachpb.BatchRequest, ) (*roachpb.BatchResponse, *roachpb.Error) { ctx := tc.AnnotateCtx(context.TODO()) // Run a one-off transaction with that single command. if log.V(1) { log.Infof(ctx, "%s: auto-wrapping in txn and re-executing: ", ba) } // TODO(bdarnell): need to be able to pass other parts of DBContext // through here. dbCtx := client.DefaultDBContext() dbCtx.UserPriority = ba.UserPriority tmpDB := client.NewDBWithContext(tc, dbCtx) var br *roachpb.BatchResponse err := tmpDB.Txn(ctx, func(txn *client.Txn) error { txn.SetDebugName("auto-wrap", 0) b := txn.NewBatch() b.Header = ba.Header for _, arg := range ba.Requests { req := arg.GetInner() b.AddRawRequest(req) } err := txn.CommitInBatch(b) br = b.RawResponse() return err }) if err != nil { return nil, roachpb.NewError(err) } br.Txn = nil // hide the evidence return br, nil }
// maybeRejectClientLocked checks whether the (transactional) request is in a // state that prevents it from continuing, such as the coordinator having // considered the client abandoned, or a heartbeat having reported an error. func (tc *TxnCoordSender) maybeRejectClientLocked( ctx context.Context, txn roachpb.Transaction, ) *roachpb.Error { if !txn.Writing { return nil } txnMeta, ok := tc.txns[*txn.ID] // Check whether the transaction is still tracked and has a chance of // completing. It's possible that the coordinator learns about the // transaction having terminated from a heartbeat, and GC queue correctness // (along with common sense) mandates that we don't let the client // continue. switch { case !ok: log.VEventf(ctx, 2, "rejecting unknown txn: %s", txn.ID) // TODO(spencerkimball): Could add coordinator node ID to the // transaction session so that we can definitively return the right // error between these possible errors. Or update the code to make an // educated guess based on the incoming transaction timestamp. return roachpb.NewError(errNoState) case txnMeta.txn.Status == roachpb.ABORTED: txn := txnMeta.txn.Clone() tc.cleanupTxnLocked(ctx, txn) return roachpb.NewErrorWithTxn(roachpb.NewTransactionAbortedError(), &txn) case txnMeta.txn.Status == roachpb.COMMITTED: txn := txnMeta.txn.Clone() tc.cleanupTxnLocked(ctx, txn) return roachpb.NewErrorWithTxn(roachpb.NewTransactionStatusError( "transaction is already committed"), &txn) default: return nil } }
// Batch implements the roachpb.InternalServer interface. func (n *Node) Batch( ctx context.Context, args *roachpb.BatchRequest, ) (*roachpb.BatchResponse, error) { growStack() ctx = n.AnnotateCtx(ctx) br, err := n.batchInternal(ctx, args) // We always return errors via BatchResponse.Error so structure is // preserved; plain errors are presumed to be from the RPC // framework and not from cockroach. if err != nil { if br == nil { br = &roachpb.BatchResponse{} } if br.Error != nil { log.Fatalf( ctx, "attempting to return both a plain error (%s) and roachpb.Error (%s)", err, br.Error, ) } br.Error = roachpb.NewError(err) } return br, nil }
// RangeLookup implements the RangeDescriptorDB interface. // RangeLookup dispatches a RangeLookup request for the given metadata // key to the replicas of the given range. Note that we allow // inconsistent reads when doing range lookups for efficiency. Getting // stale data is not a correctness problem but instead may // infrequently result in additional latency as additional range // lookups may be required. Note also that rangeLookup bypasses the // DistSender's Send() method, so there is no error inspection and // retry logic here; this is not an issue since the lookup performs a // single inconsistent read only. func (ds *DistSender) RangeLookup( ctx context.Context, key roachpb.RKey, desc *roachpb.RangeDescriptor, useReverseScan bool, ) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, *roachpb.Error) { ba := roachpb.BatchRequest{} ba.ReadConsistency = roachpb.INCONSISTENT ba.Add(&roachpb.RangeLookupRequest{ Span: roachpb.Span{ // We can interpret the RKey as a Key here since it's a metadata // lookup; those are never local. Key: key.AsRawKey(), }, MaxRanges: ds.rangeLookupMaxRanges, Reverse: useReverseScan, }) replicas := newReplicaSlice(ds.gossip, desc) replicas.Shuffle() br, err := ds.sendRPC(ctx, desc.RangeID, replicas, ba) if err != nil { return nil, nil, roachpb.NewError(err) } if br.Error != nil { return nil, nil, br.Error } resp := br.Responses[0].GetInner().(*roachpb.RangeLookupResponse) return resp.Ranges, resp.PrefetchedRanges, nil }
func TestSendNext_NonRetryableApplicationError(t *testing.T) { defer leaktest.AfterTest(t)() doneChans, sendChan, stopper := setupSendNextTest(t) defer stopper.Stop() // One replica finishes with a non-retryable error. doneChans[1] <- BatchCall{ Reply: &roachpb.BatchResponse{ BatchResponse_Header: roachpb.BatchResponse_Header{ Error: roachpb.NewError(roachpb.NewTransactionReplayError()), }, }, } // The client completes with that error, without waiting for the // others to finish. bc := <-sendChan if bc.Err != nil { t.Fatalf("expected error in payload, not rpc error %s", bc.Err) } if _, ok := bc.Reply.Error.GetDetail().(*roachpb.TransactionReplayError); !ok { t.Errorf("expected TransactionReplayError, got %v", bc.Reply.Error) } }
func TestSendNext_AllRetryableApplicationErrors(t *testing.T) { defer leaktest.AfterTest(t)() doneChans, sendChan, stopper := setupSendNextTest(t) defer stopper.Stop() // All replicas finish with a retryable error. for _, ch := range doneChans { ch <- BatchCall{ Reply: &roachpb.BatchResponse{ BatchResponse_Header: roachpb.BatchResponse_Header{ Error: roachpb.NewError(roachpb.NewRangeNotFoundError(1)), }, }, } } // The client send finishes with one of the errors, wrapped in a SendError. bc := <-sendChan if bc.Err == nil { t.Fatalf("expected SendError, got err=nil and reply=%s", bc.Reply) } else if _, ok := bc.Err.(*roachpb.SendError); !ok { t.Fatalf("expected SendError, got err=%s", bc.Err) } else if exp := "range 1 was not found"; !testutils.IsError(bc.Err, exp) { t.Errorf("expected SendError to contain %q, but got %v", exp, bc.Err) } }
func TestSendNext_RetryableApplicationErrorThenSuccess(t *testing.T) { defer leaktest.AfterTest(t)() doneChans, sendChan, stopper := setupSendNextTest(t) defer stopper.Stop() // One replica finishes with a retryable error. doneChans[1] <- BatchCall{ Reply: &roachpb.BatchResponse{ BatchResponse_Header: roachpb.BatchResponse_Header{ Error: roachpb.NewError(roachpb.NewRangeNotFoundError(1)), }, }, } // A second replica finishes successfully. doneChans[2] <- BatchCall{ Reply: &roachpb.BatchResponse{}, } // The client send finishes with the second response. bc := <-sendChan if bc.Err != nil { t.Fatalf("unexpected RPC error: %s", bc.Err) } if bc.Reply.Error != nil { t.Errorf("expected successful reply, got %s", bc.Reply.Error) } }
// sendAndFill is a helper which sends the given batch and fills its results, // returning the appropriate error which is either from the first failing call, // or an "internal" error. func sendAndFill( send func(roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error), b *Batch, ) error { // Errors here will be attached to the results, so we will get them from // the call to fillResults in the regular case in which an individual call // fails. But send() also returns its own errors, so there's some dancing // here to do because we want to run fillResults() so that the individual // result gets initialized with an error from the corresponding call. var ba roachpb.BatchRequest // TODO(tschottdorf): this nonsensical copy is required since (at least at // the time of writing, the chunking and masking in DistSender operates on // the original data (as attested to by a whole bunch of test failures). ba.Requests = append([]roachpb.RequestUnion(nil), b.reqs...) ba.Header = b.Header b.response, b.pErr = send(ba) if b.pErr != nil { // Discard errors from fillResults. _ = b.fillResults() return b.pErr.GoError() } if err := b.fillResults(); err != nil { b.pErr = roachpb.NewError(err) return err } return nil }
// Batch implements the roachpb.KVServer interface. func (s *DBServer) Batch( ctx context.Context, args *roachpb.BatchRequest, ) (br *roachpb.BatchResponse, err error) { // TODO(marc,bdarnell): this code is duplicated in server/node.go, // which should be fixed. defer func() { // We always return errors via BatchResponse.Error so structure is // preserved; plain errors are presumed to be from the RPC // framework and not from cockroach. if err != nil { if br == nil { br = &roachpb.BatchResponse{} } if br.Error != nil { panic(fmt.Sprintf( "attempting to return both a plain error (%s) and roachpb.Error (%s)", err, br.Error)) } br.Error = roachpb.NewError(err) err = nil } }() // TODO(marc): grpc's authentication model (which gives credential access in // the request handler) doesn't really fit with the current design of the // security package (which assumes that TLS state is only given at connection // time) - that should be fixed. if peer, ok := peer.FromContext(ctx); ok { if tlsInfo, ok := peer.AuthInfo.(credentials.TLSInfo); ok { certUser, err := security.GetCertificateUser(&tlsInfo.State) if err != nil { return nil, err } if certUser != security.NodeUser { return nil, errors.Errorf("user %s is not allowed", certUser) } } } if err = verifyRequest(args); err != nil { return br, err } err = s.stopper.RunTask(func() { var pErr *roachpb.Error // TODO(wiz): This is required to be a different context from the one // provided by grpc since it has to last for the entire transaction and not // just this one RPC call. See comment for (*TxnCoordSender).hearbeatLoop. br, pErr = s.sender.Send(context.TODO(), *args) if pErr != nil { br = &roachpb.BatchResponse{} } if br.Error != nil { panic(roachpb.ErrorUnexpectedlySet(s.sender, br)) } br.Error = pErr }) return br, err }
// checkEndTransactionTrigger verifies that an EndTransactionRequest // that includes intents for the SystemDB keys sets the proper trigger. func checkEndTransactionTrigger(args storagebase.FilterArgs) *roachpb.Error { req, ok := args.Req.(*roachpb.EndTransactionRequest) if !ok { return nil } if !req.Commit { // This is a rollback: skip trigger verification. return nil } modifiedSpanTrigger := req.InternalCommitTrigger.GetModifiedSpanTrigger() modifiedSystemConfigSpan := modifiedSpanTrigger != nil && modifiedSpanTrigger.SystemConfigSpan var hasSystemKey bool for _, span := range req.IntentSpans { keyAddr, err := keys.Addr(span.Key) if err != nil { return roachpb.NewError(err) } if bytes.Compare(keyAddr, keys.SystemConfigSpan.Key) >= 0 && bytes.Compare(keyAddr, keys.SystemConfigSpan.EndKey) < 0 { hasSystemKey = true break } } // If the transaction in question has intents in the system span, then // modifiedSystemConfigSpan should always be true. However, it is possible // for modifiedSystemConfigSpan to be set, even though no system keys are // present. This can occur with certain conditional DDL statements (e.g. // "CREATE TABLE IF NOT EXISTS"), which set the SystemConfigTrigger // aggressively but may not actually end up changing the system DB depending // on the current state. // For more information, see the related comment at the beginning of // planner.makePlan(). if hasSystemKey && !modifiedSystemConfigSpan { return roachpb.NewError(errors.Errorf("EndTransaction hasSystemKey=%t, but hasSystemConfigTrigger=%t", hasSystemKey, modifiedSystemConfigSpan)) } return nil }
// Send implements the Sender interface. func (s sender) Send( ctx context.Context, ba roachpb.BatchRequest, ) (*roachpb.BatchResponse, *roachpb.Error) { br, err := s.Batch(ctx, &ba) if err != nil { return nil, roachpb.NewError(roachpb.NewSendError(err.Error())) } pErr := br.Error br.Error = nil return br, pErr }
// Seek positions the iterator at the specified key. func (ri *RangeIterator) Seek(ctx context.Context, key roachpb.RKey, scanDir ScanDirection) { log.Eventf(ctx, "querying next range at %s", key) ri.scanDir = scanDir ri.init = true // the iterator is now initialized ri.pErr = nil // clear any prior error ri.key = key // set the key // Retry loop for looking up next range in the span. The retry loop // deals with retryable range descriptor lookups. for r := retry.StartWithCtx(ctx, ri.ds.rpcRetryOptions); r.Next(); { log.Event(ctx, "meta descriptor lookup") var err error ri.desc, ri.token, err = ri.ds.getDescriptor( ctx, ri.key, ri.token, ri.scanDir == Descending) // getDescriptor may fail retryably if, for example, the first // range isn't available via Gossip. Assume that all errors at // this level are retryable. Non-retryable errors would be for // things like malformed requests which we should have checked // for before reaching this point. if err != nil { log.VEventf(ctx, 1, "range descriptor lookup failed: %s", err) continue } // It's possible that the returned descriptor misses parts of the // keys it's supposed to include after it's truncated to match the // descriptor. Example revscan [a,g), first desc lookup for "g" // returns descriptor [c,d) -> [d,g) is never scanned. // We evict and retry in such a case. // TODO: this code is subject to removal. See // https://groups.google.com/d/msg/cockroach-db/DebjQEgU9r4/_OhMe7atFQAJ reverse := ri.scanDir == Descending if (reverse && !ri.desc.ContainsExclusiveEndKey(ri.key)) || (!reverse && !ri.desc.ContainsKey(ri.key)) { log.Eventf(ctx, "addressing error: %s does not include key %s", ri.desc, ri.key) if err := ri.token.Evict(ctx); err != nil { ri.pErr = roachpb.NewError(err) return } // On addressing errors, don't backoff; retry immediately. r.Reset() continue } return } // Check for an early exit from the retry loop. if pErr := ri.ds.deduceRetryEarlyExitError(ctx); pErr != nil { ri.pErr = pErr } else { ri.pErr = roachpb.NewErrorf("RangeIterator failed to seek to %s", key) } }
// TestAbortTransactionOnCommitErrors verifies that transactions are // aborted on the correct errors. func TestAbortTransactionOnCommitErrors(t *testing.T) { defer leaktest.AfterTest(t)() testCases := []struct { err error abort bool }{ {roachpb.NewReadWithinUncertaintyIntervalError(hlc.ZeroTimestamp, hlc.ZeroTimestamp), true}, {&roachpb.TransactionAbortedError{}, false}, {&roachpb.TransactionPushError{}, true}, {&roachpb.TransactionRetryError{}, true}, {&roachpb.RangeNotFoundError{}, true}, {&roachpb.RangeKeyMismatchError{}, true}, {&roachpb.TransactionStatusError{}, true}, } for _, test := range testCases { var commit, abort bool db := NewDB(newTestSender(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { switch t := ba.Requests[0].GetInner().(type) { case *roachpb.EndTransactionRequest: if t.Commit { commit = true return nil, roachpb.NewError(test.err) } abort = true } return ba.CreateReply(), nil }, nil)) txn := NewTxn(context.Background(), *db) if pErr := txn.Put("a", "b"); pErr != nil { t.Fatalf("put failed: %s", pErr) } if pErr := txn.CommitOrCleanup(); pErr == nil { t.Fatalf("unexpected commit success") } if !commit { t.Errorf("%T: failed to find commit", test.err) } if test.abort && !abort { t.Errorf("%T: failed to find abort", test.err) } else if !test.abort && abort { t.Errorf("%T: found unexpected abort", test.err) } } }
// handleRaftRequest proxies a request to the listening server interface. func (t *RaftTransport) handleRaftRequest( ctx context.Context, req *RaftMessageRequest, respStream RaftMessageResponseStream, ) *roachpb.Error { t.recvMu.Lock() handler, ok := t.recvMu.handlers[req.ToReplica.StoreID] t.recvMu.Unlock() if !ok { log.Warningf(ctx, "unable to accept Raft message from %+v: no handler registered for %+v", req.FromReplica, req.ToReplica) return roachpb.NewError(roachpb.NewStoreNotFoundError(req.ToReplica.StoreID)) } return handler.HandleRaftRequest(ctx, req, respStream) }
// Test that the a txn gets a fresh OrigTimestamp with every retry. func TestAbortedRetryRenewsTimestamp(t *testing.T) { defer leaktest.AfterTest(t)() // Create a TestSender that aborts a transaction 2 times before succeeding. mc := hlc.NewManualClock(123) clock := hlc.NewClock(mc.UnixNano, time.Nanosecond) count := 0 db := NewDB(newTestSender(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { if _, ok := ba.GetArg(roachpb.Put); ok { mc.Increment(1) count++ if count < 3 { return nil, roachpb.NewError(&roachpb.TransactionAbortedError{}) } } return ba.CreateReply(), nil }, nil)) txnClosure := func(txn *Txn, opt *TxnExecOptions) error { // Ensure the KV transaction is created. return txn.Put("a", "b") } txn := NewTxn(context.Background(), *db) // Request a client-defined timestamp. refTimestamp := clock.Now() execOpt := TxnExecOptions{ AutoRetry: true, AutoCommit: true, Clock: clock, } // Perform the transaction. if err := txn.Exec(execOpt, txnClosure); err != nil { t.Fatal(err) } // Check the timestamp was preserved. if txn.Proto.OrigTimestamp.WallTime == refTimestamp.WallTime { t.Errorf("expected txn orig ts to be different than %s", refTimestamp) } }
// sendSingleRange gathers and rearranges the replicas, and makes an RPC call. func (ds *DistSender) sendSingleRange( ctx context.Context, ba roachpb.BatchRequest, desc *roachpb.RangeDescriptor, ) (*roachpb.BatchResponse, *roachpb.Error) { // Try to send the call. replicas := newReplicaSlice(ds.gossip, desc) // Rearrange the replicas so that those replicas with long common // prefix of attributes end up first. If there's no prefix, this is a // no-op. ds.optimizeReplicaOrder(replicas) // If this request needs to go to a lease holder and we know who that is, move // it to the front. if !(ba.IsReadOnly() && ba.ReadConsistency == roachpb.INCONSISTENT) { if leaseHolder, ok := ds.leaseHolderCache.Lookup(ctx, desc.RangeID); ok { if i := replicas.FindReplica(leaseHolder.StoreID); i >= 0 { replicas.MoveToFront(i) } } } // TODO(tschottdorf): should serialize the trace here, not higher up. br, err := ds.sendRPC(ctx, desc.RangeID, replicas, ba) if err != nil { return nil, roachpb.NewError(err) } // If the reply contains a timestamp, update the local HLC with it. if br.Error != nil && br.Error.Now != hlc.ZeroTimestamp { ds.clock.Update(br.Error.Now) } else if br.Now != hlc.ZeroTimestamp { ds.clock.Update(br.Now) } // Untangle the error from the received response. pErr := br.Error br.Error = nil // scrub the response error return br, pErr }
// TestBeginTransactionErrorIndex verifies that the error index is cleared // when a BeginTransaction command causes an error. func TestBeginTransactionErrorIndex(t *testing.T) { defer leaktest.AfterTest(t)() db := NewDB(newTestSender(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { pErr := roachpb.NewError(&roachpb.WriteIntentError{}) pErr.SetErrorIndex(0) return nil, pErr }, nil)) _ = db.Txn(context.TODO(), func(txn *Txn) error { b := txn.NewBatch() b.Put("a", "b") err := getOneErr(txn.Run(b), b) pErr := b.MustPErr() // Verify that the original error type is preserved, but the error index is unset. if _, ok := pErr.GetDetail().(*roachpb.WriteIntentError); !ok { t.Fatalf("unexpected error %s", pErr) } if pErr.Index != nil { t.Errorf("error index must not be set, but got %s", pErr.Index) } return err }) }
// FormMessage populates a message containing the rows added since the last call // to FormMessage. The returned StreamMessage should be treated as immutable. If // final is true, a message trailer is populated with the given error. func (se *StreamEncoder) FormMessage(final bool, trailerErr error) *StreamMessage { msg := &se.msg msg.Header = nil msg.Data.RawBytes = se.rowBuf msg.Trailer = nil if !se.firstMessageDone { msg.Header = &se.msgHdr if se.infos != nil { msg.Header.Info = se.infos } else { if !final { panic("trying to form non-final message with no rows") } } } if final { msg.Trailer = &se.msgTrl msg.Trailer.Error = roachpb.NewError(trailerErr) } se.rowBuf = se.rowBuf[:0] se.firstMessageDone = true return msg }
// InitOrJoinRequest executes a RequestLease command asynchronously and returns a // channel on which the result will be posted. If there's already a request in // progress, we join in waiting for the results of that request. // It is an error to call InitOrJoinRequest() while a request is in progress // naming another replica as lease holder. // // replica is used to schedule and execute async work (proposing a RequestLease // command). replica.mu is locked when delivering results, so calls from the // replica happen either before or after a result for a pending request has // happened. // // transfer needs to be set if the request represents a lease transfer (as // opposed to an extension, or acquiring the lease when none is held). // // Note: Once this function gets a context to be used for cancellation, instead // of replica.store.Stopper().ShouldQuiesce(), care will be needed for cancelling // the Raft command, similar to replica.addWriteCmd. func (p *pendingLeaseRequest) InitOrJoinRequest( replica *Replica, nextLeaseHolder roachpb.ReplicaDescriptor, timestamp hlc.Timestamp, startKey roachpb.Key, transfer bool, ) <-chan *roachpb.Error { if nextLease, ok := p.RequestPending(); ok { if nextLease.Replica.ReplicaID == nextLeaseHolder.ReplicaID { // Join a pending request asking for the same replica to become lease // holder. return p.JoinRequest() } llChan := make(chan *roachpb.Error, 1) // We can't join the request in progress. llChan <- roachpb.NewErrorf("request for different replica in progress "+ "(requesting: %+v, in progress: %+v)", nextLeaseHolder.ReplicaID, nextLease.Replica.ReplicaID) return llChan } llChan := make(chan *roachpb.Error, 1) // No request in progress. Let's propose a Lease command asynchronously. // TODO(tschottdorf): get duration from configuration, either as a // config flag or, later, dynamically adjusted. startStasis := timestamp.Add(int64(replica.store.cfg.RangeLeaseActiveDuration), 0) expiration := startStasis.Add(int64(replica.store.Clock().MaxOffset()), 0) reqSpan := roachpb.Span{ Key: startKey, } var leaseReq roachpb.Request reqLease := roachpb.Lease{ Start: timestamp, StartStasis: startStasis, Expiration: expiration, Replica: nextLeaseHolder, } if transfer { leaseReq = &roachpb.TransferLeaseRequest{ Span: reqSpan, Lease: reqLease, } } else { leaseReq = &roachpb.RequestLeaseRequest{ Span: reqSpan, Lease: reqLease, } } if replica.store.Stopper().RunAsyncTask(context.TODO(), func(ctx context.Context) { ctx = replica.AnnotateCtx(ctx) // Propose a RequestLease command and wait for it to apply. ba := roachpb.BatchRequest{} ba.Timestamp = replica.store.Clock().Now() ba.RangeID = replica.RangeID ba.Add(leaseReq) if log.V(2) { log.Infof(ctx, "sending lease request %v", leaseReq) } _, pErr := replica.Send(ctx, ba) // Send result of lease to all waiter channels. replica.mu.Lock() defer replica.mu.Unlock() for i, llChan := range p.llChans { // Don't send the same pErr object twice; this can lead to races. We could // clone every time but it's more efficient to send pErr itself to one of // the channels (the last one; if we send it earlier the race can still // happen). if i == len(p.llChans)-1 { llChan <- pErr } else { llChan <- protoutil.Clone(pErr).(*roachpb.Error) // works with `nil` } } p.llChans = p.llChans[:0] p.nextLease = roachpb.Lease{} }) != nil { // We failed to start the asynchronous task. Send a blank NotLeaseHolderError // back to indicate that we have no idea who the range lease holder might // be; we've withdrawn from active duty. llChan <- roachpb.NewError( newNotLeaseHolderError(nil, replica.store.StoreID(), replica.mu.state.Desc)) return llChan } p.llChans = append(p.llChans, llChan) p.nextLease = reqLease return llChan }
func TestIsSQLRetryableError(t *testing.T) { errAmbiguous := &roachpb.AmbiguousResultError{} if !IsSQLRetryableError(roachpb.NewError(errAmbiguous).GoError()) { t.Fatalf("%s should be a SQLRetryableError", errAmbiguous) } }
// TestRequestToUninitializedRange tests the behavior when a request // is sent to a node which should be a replica of the correct range // but has not yet received its initial snapshot. This would // previously panic due to a malformed error response from the server, // as seen in https://github.com/cockroachdb/cockroach/issues/6027. // // Prior to the other changes in the commit that introduced it, this // test would reliable trigger the panic from #6027. However, it // relies on some hacky tricks to both trigger the panic and shut down // cleanly. If this test needs a lot of maintenance in the future we // should be willing to get rid of it. func TestRequestToUninitializedRange(t *testing.T) { defer leaktest.AfterTest(t)() srv, _, _ := serverutils.StartServer(t, base.TestServerArgs{ StoreSpecs: []base.StoreSpec{ base.DefaultTestStoreSpec, base.DefaultTestStoreSpec, }, }) defer srv.Stopper().Stop() s := srv.(*server.TestServer) // Choose a range ID that is much larger than any that would be // created by initial splits. const rangeID = roachpb.RangeID(1000) // Set up a range with replicas on two stores of the same node. This // ensures that the DistSender will consider both replicas healthy // and will try to talk to both (so we can get a non-retryable error // from the second store). replica1 := roachpb.ReplicaDescriptor{ NodeID: 1, StoreID: 1, ReplicaID: 1, } replica2 := roachpb.ReplicaDescriptor{ NodeID: 1, StoreID: 2, ReplicaID: 2, } // HACK: remove the second store from the node to generate a // non-retryable error when we try to talk to it. store2, err := s.Stores().GetStore(2) if err != nil { t.Fatal(err) } s.Stores().RemoveStore(store2) // Create the uninitialized range by sending an isolated raft // message to the first store. conn, err := s.RPCContext().GRPCDial(s.ServingAddr()) if err != nil { t.Fatal(err) } raftClient := storage.NewMultiRaftClient(conn) ctx, cancel := context.WithCancel(context.Background()) defer cancel() stream, err := raftClient.RaftMessageBatch(ctx) if err != nil { t.Fatal(err) } msg := storage.RaftMessageRequestBatch{ Requests: []storage.RaftMessageRequest{ { RangeID: rangeID, ToReplica: replica1, FromReplica: replica2, Message: raftpb.Message{ Type: raftpb.MsgApp, To: 1, }, }, }, } if err := stream.Send(&msg); err != nil { t.Fatal(err) } // Make sure the replica was created. store1, err := s.Stores().GetStore(1) if err != nil { t.Fatal(err) } util.SucceedsSoon(t, func() error { if replica, err := store1.GetReplica(rangeID); err != nil { return errors.Errorf("failed to look up replica: %s", err) } else if replica.IsInitialized() { return errors.Errorf("expected replica to be uninitialized") } return nil }) // Create our own DistSender so we can force some requests to the // bogus range. The DistSender needs to be in scope for its own // MockRangeDescriptorDB closure. var sender *kv.DistSender sender = kv.NewDistSender(kv.DistSenderConfig{ Clock: s.Clock(), RPCContext: s.RPCContext(), RangeDescriptorDB: kv.MockRangeDescriptorDB( func(key roachpb.RKey, useReverseScan bool, ) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, *roachpb.Error) { if key.Equal(roachpb.RKeyMin) { // Pass through requests for the first range to the real sender. desc, err := sender.FirstRange() if err != nil { return nil, nil, roachpb.NewError(err) } return []roachpb.RangeDescriptor{*desc}, nil, nil } return []roachpb.RangeDescriptor{{ RangeID: rangeID, StartKey: roachpb.RKey(keys.Meta2Prefix), EndKey: roachpb.RKeyMax, Replicas: []roachpb.ReplicaDescriptor{replica1, replica2}, }}, nil, nil }), }, s.Gossip()) // Only inconsistent reads triggered the panic in #6027. hdr := roachpb.Header{ ReadConsistency: roachpb.INCONSISTENT, } req := roachpb.NewGet(roachpb.Key("asdf")) // Repeat the test a few times: due to the randomization between the // two replicas, each attempt only had a 50% chance of triggering // the panic. for i := 0; i < 5; i++ { _, pErr := client.SendWrappedWith(context.Background(), sender, hdr, req) // Each attempt fails with "store 2 not found" because that is the // non-retryable error. if !testutils.IsPError(pErr, "store 2 not found") { t.Fatal(pErr) } } }
// sendPartialBatch sends the supplied batch to the range specified by // desc. The batch request is first truncated so that it contains only // requests which intersect the range descriptor and keys for each // request are limited to the range's key span. The send occurs in a // retry loop to handle send failures. On failure to send to any // replicas, we backoff and retry by refetching the range // descriptor. If the underlying range seems to have split, we // recursively invoke divideAndSendBatchToRanges to re-enumerate the // ranges in the span and resend to each. func (ds *DistSender) sendPartialBatch( ctx context.Context, ba roachpb.BatchRequest, rs roachpb.RSpan, desc *roachpb.RangeDescriptor, evictToken *EvictionToken, isFirst bool, ) response { var reply *roachpb.BatchResponse var pErr *roachpb.Error isReverse := ba.IsReverse() // Truncate the request to range descriptor. intersected, err := rs.Intersect(desc) if err != nil { return response{pErr: roachpb.NewError(err)} } truncBA, numActive, err := truncate(ba, intersected) if numActive == 0 && err == nil { // This shouldn't happen in the wild, but some tests exercise it. return response{ pErr: roachpb.NewErrorf("truncation resulted in empty batch on %s: %s", intersected, ba), } } if err != nil { return response{pErr: roachpb.NewError(err)} } // Start a retry loop for sending the batch to the range. for r := retry.StartWithCtx(ctx, ds.rpcRetryOptions); r.Next(); { // If we've cleared the descriptor on a send failure, re-lookup. if desc == nil { var descKey roachpb.RKey if isReverse { descKey = intersected.EndKey } else { descKey = intersected.Key } desc, evictToken, err = ds.getDescriptor(ctx, descKey, nil, isReverse) if err != nil { log.ErrEventf(ctx, "range descriptor re-lookup failed: %s", err) continue } } reply, pErr = ds.sendSingleRange(ctx, truncBA, desc) // If sending succeeded, return immediately. if pErr == nil { return response{reply: reply} } log.ErrEventf(ctx, "reply error %s: %s", ba, pErr) // Error handling: If the error indicates that our range // descriptor is out of date, evict it from the cache and try // again. Errors that apply only to a single replica were // handled in send(). // // TODO(bdarnell): Don't retry endlessly. If we fail twice in a // row and the range descriptor hasn't changed, return the error // to our caller. switch tErr := pErr.GetDetail().(type) { case *roachpb.SendError: // We've tried all the replicas without success. Either // they're all down, or we're using an out-of-date range // descriptor. Invalidate the cache and try again with the new // metadata. log.Event(ctx, "evicting range descriptor on send error and backoff for re-lookup") if err := evictToken.Evict(ctx); err != nil { return response{pErr: roachpb.NewError(err)} } // Clear the descriptor to reload on the next attempt. desc = nil continue case *roachpb.RangeKeyMismatchError: // Range descriptor might be out of date - evict it. This is // likely the result of a range split. If we have new range // descriptors, insert them instead as long as they are different // from the last descriptor to avoid endless loops. var replacements []roachpb.RangeDescriptor different := func(rd *roachpb.RangeDescriptor) bool { return !desc.RSpan().Equal(rd.RSpan()) } if tErr.MismatchedRange != nil && different(tErr.MismatchedRange) { replacements = append(replacements, *tErr.MismatchedRange) } if tErr.SuggestedRange != nil && different(tErr.SuggestedRange) { if includesFrontOfCurSpan(isReverse, tErr.SuggestedRange, rs) { replacements = append(replacements, *tErr.SuggestedRange) } } // Same as Evict() if replacements is empty. if err := evictToken.EvictAndReplace(ctx, replacements...); err != nil { return response{pErr: roachpb.NewError(err)} } // On addressing errors (likely a split), we need to re-invoke // the range descriptor lookup machinery, so we recurse by // sending batch to just the partial span this descriptor was // supposed to cover. log.VEventf(ctx, 1, "likely split; resending batch to span: %s", tErr) reply, pErr = ds.divideAndSendBatchToRanges(ctx, ba, intersected, isFirst) return response{reply: reply, pErr: pErr} } break } // Propagate error if either the retry closer or context done // channels were closed. if pErr == nil { if pErr = ds.deduceRetryEarlyExitError(ctx); pErr == nil { log.Fatal(ctx, "exited retry loop without an error") } } return response{pErr: pErr} }
// divideAndSendBatchToRanges sends the supplied batch to all of the // ranges which comprise the span specified by rs. The batch request // is trimmed against each range which is part of the span and sent // either serially or in parallel, if possible. isFirst indicates // whether this is the first time this method has been called on the // batch. It's specified false where this method is invoked recursively. func (ds *DistSender) divideAndSendBatchToRanges( ctx context.Context, ba roachpb.BatchRequest, rs roachpb.RSpan, isFirst bool, ) (br *roachpb.BatchResponse, pErr *roachpb.Error) { // This function builds a channel of responses for each range // implicated in the span (rs) and combines them into a single // BatchResponse when finished. var responseChs []chan response defer func() { for _, responseCh := range responseChs { resp := <-responseCh if resp.pErr != nil { if pErr == nil { pErr = resp.pErr } continue } if br == nil { // First response from a Range. br = resp.reply } else { // This was the second or later call in a cross-Range request. // Combine the new response with the existing one. if err := br.Combine(resp.reply); err != nil { pErr = roachpb.NewError(err) return } br.Txn.Update(resp.reply.Txn) } } // If we experienced an error, don't neglect to update the error's // attached transaction with any responses which were received. if pErr != nil { if br != nil { pErr.UpdateTxn(br.Txn) } } }() // Get initial seek key depending on direction of iteration. var seekKey roachpb.RKey isReverse := ba.IsReverse() if isReverse { seekKey = rs.EndKey } else { seekKey = rs.Key } // Send the request to one range per iteration. ri := NewRangeIterator(ds, isReverse) for ri.Seek(ctx, seekKey); ri.Valid(); ri.Seek(ctx, seekKey) { // Increase the sequence counter only once before sending RPCs to // the ranges involved in this chunk of the batch (as opposed to // for each RPC individually). On RPC errors, there's no guarantee // that the request hasn't made its way to the target regardless // of the error; we'd like the second execution to be caught by // the sequence cache if that happens. There is a small chance // that we address a range twice in this chunk (stale/suboptimal // descriptors due to splits/merges) which leads to a transaction // retry. // // TODO(tschottdorf): it's possible that if we don't evict from // the cache we could be in for a busy loop. ba.SetNewRequest() responseCh := make(chan response, 1) responseChs = append(responseChs, responseCh) if isFirst && ri.NeedAnother(rs) { // TODO(tschottdorf): we should have a mechanism for discovering // range merges (descriptor staleness will mostly go unnoticed), // or we'll be turning single-range queries into multi-range // queries for no good reason. // // If there's no transaction and op spans ranges, possibly // re-run as part of a transaction for consistency. The // case where we don't need to re-run is if the read // consistency is not required. if ba.Txn == nil && ba.IsPossibleTransaction() && ba.ReadConsistency != roachpb.INCONSISTENT { responseCh <- response{pErr: roachpb.NewError(&roachpb.OpRequiresTxnError{})} return } // If the request is more than but ends with EndTransaction, we // want the caller to come again with the EndTransaction in an // extra call. if l := len(ba.Requests) - 1; l > 0 && ba.Requests[l].GetInner().Method() == roachpb.EndTransaction { responseCh <- response{pErr: errNo1PCTxn} return } } // Determine next seek key, taking a potentially sparse batch into // consideration. var err error nextRS := rs if isReverse { // In next iteration, query previous range. // We use the StartKey of the current descriptor as opposed to the // EndKey of the previous one since that doesn't have bugs when // stale descriptors come into play. seekKey, err = prev(ba, ri.Desc().StartKey) nextRS.EndKey = seekKey } else { // In next iteration, query next range. // It's important that we use the EndKey of the current descriptor // as opposed to the StartKey of the next one: if the former is stale, // it's possible that the next range has since merged the subsequent // one, and unless both descriptors are stale, the next descriptor's // StartKey would move us to the beginning of the current range, // resulting in a duplicate scan. seekKey, err = next(ba, ri.Desc().EndKey) nextRS.Key = seekKey } if err != nil { responseCh <- response{pErr: roachpb.NewError(err)} return } // Send the next partial batch to the first range in the "rs" span. // If we're not handling a request which limits responses and we // can reserve one of the limited goroutines available for parallel // batch RPCs, send asynchronously. if ba.MaxSpanRequestKeys == 0 && ri.NeedAnother(rs) && ds.rpcContext != nil && ds.sendPartialBatchAsync(ctx, ba, rs, ri.Desc(), ri.Token(), isFirst, responseCh) { // Note that we pass the batch request by value to the parallel // goroutine to avoid using the cloned txn. // Clone the txn to preserve the current txn sequence for the async call. if ba.Txn != nil { txnClone := ba.Txn.Clone() ba.Txn = &txnClone } } else { // Send synchronously if there is no parallel capacity left, there's a // max results limit, or this is the final request in the span. resp := ds.sendPartialBatch(ctx, ba, rs, ri.Desc(), ri.Token(), isFirst) responseCh <- resp if resp.pErr != nil { return } ba.UpdateTxn(resp.reply.Txn) // Check whether we've received enough responses to exit query loop. if ba.MaxSpanRequestKeys > 0 { var numResults int64 for _, r := range resp.reply.Responses { numResults += r.GetInner().Header().NumKeys } if numResults > ba.MaxSpanRequestKeys { panic(fmt.Sprintf("received %d results, limit was %d", numResults, ba.MaxSpanRequestKeys)) } ba.MaxSpanRequestKeys -= numResults // Exiting; fill in missing responses. if ba.MaxSpanRequestKeys == 0 { fillSkippedResponses(ba, resp.reply, seekKey) return } } } // Check for completion. if !ri.NeedAnother(rs) { return } isFirst = false // next range will not be first! rs = nextRS } // We've exited early. Return the range iterator error. responseCh := make(chan response, 1) responseCh <- response{pErr: ri.Error()} responseChs = append(responseChs, responseCh) return }
// Send implements the batch.Sender interface. It subdivides the Batch // into batches admissible for sending (preventing certain illegal // mixtures of requests), executes each individual part (which may // span multiple ranges), and recombines the response. // // When the request spans ranges, it is split by range and a partial // subset of the batch request is sent to affected ranges in parallel. // // The first write in a transaction may not arrive before writes to // other ranges. This is relevant in the case of a BeginTransaction // request. Intents written to other ranges before the transaction // record is created will cause the transaction to abort early. func (ds *DistSender) Send( ctx context.Context, ba roachpb.BatchRequest, ) (*roachpb.BatchResponse, *roachpb.Error) { tracing.AnnotateTrace() if pErr := ds.initAndVerifyBatch(ctx, &ba); pErr != nil { return nil, pErr } ctx = ds.AnnotateCtx(ctx) ctx, cleanup := tracing.EnsureContext(ctx, ds.AmbientContext.Tracer) defer cleanup() var rplChunks []*roachpb.BatchResponse parts := ba.Split(false /* don't split ET */) if len(parts) > 1 && ba.MaxSpanRequestKeys != 0 { // We already verified above that the batch contains only scan requests of the same type. // Such a batch should never need splitting. panic("batch with MaxSpanRequestKeys needs splitting") } for len(parts) > 0 { part := parts[0] ba.Requests = part // The minimal key range encompassing all requests contained within. // Local addressing has already been resolved. // TODO(tschottdorf): consider rudimentary validation of the batch here // (for example, non-range requests with EndKey, or empty key ranges). rs, err := keys.Range(ba) if err != nil { return nil, roachpb.NewError(err) } rpl, pErr := ds.divideAndSendBatchToRanges(ctx, ba, rs, true /* isFirst */) if pErr == errNo1PCTxn { // If we tried to send a single round-trip EndTransaction but // it looks like it's going to hit multiple ranges, split it // here and try again. if len(parts) != 1 { panic("EndTransaction not in last chunk of batch") } parts = ba.Split(true /* split ET */) if len(parts) != 2 { panic("split of final EndTransaction chunk resulted in != 2 parts") } continue } if pErr != nil { return nil, pErr } // Propagate transaction from last reply to next request. The final // update is taken and put into the response's main header. ba.UpdateTxn(rpl.Txn) rplChunks = append(rplChunks, rpl) parts = parts[1:] } reply := rplChunks[0] for _, rpl := range rplChunks[1:] { reply.Responses = append(reply.Responses, rpl.Responses...) reply.CollectedSpans = append(reply.CollectedSpans, rpl.CollectedSpans...) } reply.BatchResponse_Header = rplChunks[len(rplChunks)-1].BatchResponse_Header return reply, nil }
// maybePushTransactions tries to push the conflicting transaction(s) // responsible for the given intents: either move its // timestamp forward on a read/write conflict, abort it on a // write/write conflict, or do nothing if the transaction is no longer // pending. // // Returns a slice of intents which can now be resolved, and an error. // The returned intents should be resolved via intentResolver.resolveIntents. // // If skipIfInFlight is true, then no PushTxns will be sent and no // intents will be returned for any transaction for which there is // another push in progress. This should only be used by callers who // are not relying on the side effect of a push (i.e. only // pushType==PUSH_TOUCH), and who also don't need to synchronize with // the resolution of those intents (e.g. asynchronous resolutions of // intents skipped on inconsistent reads). // // Callers are involved with // a) conflict resolution for commands being executed at the Store with the // client waiting, // b) resolving intents encountered during inconsistent operations, and // c) resolving intents upon EndTransaction which are not local to the given // range. This is the only path in which the transaction is going to be // in non-pending state and doesn't require a push. func (ir *intentResolver) maybePushTransactions( ctx context.Context, intents []roachpb.Intent, h roachpb.Header, pushType roachpb.PushTxnType, skipIfInFlight bool, ) ([]roachpb.Intent, *roachpb.Error) { now := ir.store.Clock().Now() partialPusherTxn := h.Txn // If there's no pusher, we communicate a priority by sending an empty // txn with only the priority set. This is official usage of PushTxn. if partialPusherTxn == nil { partialPusherTxn = &roachpb.Transaction{ TxnMeta: enginepb.TxnMeta{ Priority: roachpb.MakePriority(h.UserPriority), }, } } log.Event(ctx, "pushing transaction") // Split intents into those we need to push and those which are good to // resolve. ir.mu.Lock() // TODO(tschottdorf): can optimize this and use same underlying slice. var pushIntents, nonPendingIntents []roachpb.Intent for _, intent := range intents { if intent.Status != roachpb.PENDING { // The current intent does not need conflict resolution // because the transaction is already finalized. // This shouldn't happen as all intents created are in // the PENDING status. nonPendingIntents = append(nonPendingIntents, intent) } else if _, ok := ir.mu.inFlight[*intent.Txn.ID]; ok && skipIfInFlight { // Another goroutine is working on this transaction so we can // skip it. if log.V(1) { log.Infof(ctx, "skipping PushTxn for %s; attempt already in flight", intent.Txn.ID) } continue } else { pushIntents = append(pushIntents, intent) ir.mu.inFlight[*intent.Txn.ID]++ } } ir.mu.Unlock() if len(nonPendingIntents) > 0 { return nil, roachpb.NewError(errors.Errorf("unexpected aborted/resolved intents: %+v", nonPendingIntents)) } // Attempt to push the transaction(s) which created the conflicting intent(s). var pushReqs []roachpb.Request for _, intent := range pushIntents { pushReqs = append(pushReqs, &roachpb.PushTxnRequest{ Span: roachpb.Span{ Key: intent.Txn.Key, }, PusherTxn: *partialPusherTxn, PusheeTxn: intent.Txn, PushTo: h.Timestamp, // The timestamp is used by PushTxn for figuring out whether the // transaction is abandoned. If we used the argument's timestamp // here, we would run into busy loops because that timestamp // usually stays fixed among retries, so it will never realize // that a transaction has timed out. See #877. Now: now, PushType: pushType, }) } b := &client.Batch{} b.AddRawRequest(pushReqs...) var pErr *roachpb.Error if err := ir.store.db.Run(ctx, b); err != nil { pErr = b.MustPErr() } ir.mu.Lock() for _, intent := range pushIntents { ir.mu.inFlight[*intent.Txn.ID]-- if ir.mu.inFlight[*intent.Txn.ID] == 0 { delete(ir.mu.inFlight, *intent.Txn.ID) } } ir.mu.Unlock() if pErr != nil { return nil, pErr } br := b.RawResponse() var resolveIntents []roachpb.Intent for i, intent := range pushIntents { pushee := br.Responses[i].GetInner().(*roachpb.PushTxnResponse).PusheeTxn intent.Txn = pushee.TxnMeta intent.Status = pushee.Status resolveIntents = append(resolveIntents, intent) } return resolveIntents, nil }
// InitOrJoinRequest executes a RequestLease command asynchronously and returns a // channel on which the result will be posted. If there's already a request in // progress, we join in waiting for the results of that request. // It is an error to call InitOrJoinRequest() while a request is in progress // naming another replica as lease holder. // // replica is used to schedule and execute async work (proposing a RequestLease // command). replica.mu is locked when delivering results, so calls from the // replica happen either before or after a result for a pending request has // happened. // // transfer needs to be set if the request represents a lease transfer (as // opposed to an extension, or acquiring the lease when none is held). // // Note: Once this function gets a context to be used for cancellation, instead // of replica.store.Stopper().ShouldQuiesce(), care will be needed for cancelling // the Raft command, similar to replica.addWriteCmd. func (p *pendingLeaseRequest) InitOrJoinRequest( replica *Replica, nextLeaseHolder roachpb.ReplicaDescriptor, timestamp hlc.Timestamp, startKey roachpb.Key, transfer bool, ) <-chan *roachpb.Error { if nextLease, ok := p.RequestPending(); ok { if nextLease.Replica.ReplicaID == nextLeaseHolder.ReplicaID { // Join a pending request asking for the same replica to become lease // holder. return p.JoinRequest() } llChan := make(chan *roachpb.Error, 1) // We can't join the request in progress. llChan <- roachpb.NewErrorf("request for different replica in progress "+ "(requesting: %+v, in progress: %+v)", nextLeaseHolder.ReplicaID, nextLease.Replica.ReplicaID) return llChan } llChan := make(chan *roachpb.Error, 1) // No request in progress. Let's propose a Lease command asynchronously. // TODO(tschottdorf): get duration from configuration, either as a // config flag or, later, dynamically adjusted. startStasis := timestamp.Add(int64(replica.store.cfg.RangeLeaseActiveDuration), 0) expiration := startStasis.Add(int64(replica.store.Clock().MaxOffset()), 0) reqSpan := roachpb.Span{ Key: startKey, } var leaseReq roachpb.Request now := replica.store.Clock().Now() reqLease := roachpb.Lease{ Start: timestamp, StartStasis: startStasis, Expiration: expiration, Replica: nextLeaseHolder, ProposedTS: &now, } if transfer { leaseReq = &roachpb.TransferLeaseRequest{ Span: reqSpan, Lease: reqLease, } } else { leaseReq = &roachpb.RequestLeaseRequest{ Span: reqSpan, Lease: reqLease, } } if replica.store.Stopper().RunAsyncTask(context.TODO(), func(ctx context.Context) { ctx = replica.AnnotateCtx(ctx) // Propose a RequestLease command and wait for it to apply. ba := roachpb.BatchRequest{} ba.Timestamp = replica.store.Clock().Now() ba.RangeID = replica.RangeID ba.Add(leaseReq) if log.V(2) { log.Infof(ctx, "sending lease request %v", leaseReq) } _, pErr := replica.Send(ctx, ba) // We reset our state below regardless of whether we've gotten an error or // not, but note that an error is ambiguous - there's no guarantee that the // transfer will not still apply. That's OK, however, as the "in transfer" // state maintained by the pendingLeaseRequest is not relied on for // correctness (see replica.mu.minLeaseProposedTS), and resetting the state // is beneficial as it'll allow the replica to attempt to transfer again or // extend the existing lease in the future. // Send result of lease to all waiter channels. replica.mu.Lock() defer replica.mu.Unlock() for _, llChan := range p.llChans { // Don't send the same transaction object twice; this can lead to races. if pErr != nil { pErrClone := *pErr pErrClone.SetTxn(pErr.GetTxn()) llChan <- &pErrClone } else { llChan <- nil } } p.llChans = p.llChans[:0] p.nextLease = roachpb.Lease{} }) != nil { // We failed to start the asynchronous task. Send a blank NotLeaseHolderError // back to indicate that we have no idea who the range lease holder might // be; we've withdrawn from active duty. llChan <- roachpb.NewError( newNotLeaseHolderError(nil, replica.store.StoreID(), replica.mu.state.Desc)) return llChan } p.llChans = append(p.llChans, llChan) p.nextLease = reqLease return llChan }
// sendInternal sends the batch and updates the transaction on error. Depending // on the error type, the transaction might be replaced by a new one. func (txn *Txn) sendInternal(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { if len(ba.Requests) == 0 { return nil, nil } if pErr := txn.db.prepareToSend(&ba); pErr != nil { return nil, pErr } // Send call through the DB's sender. ba.Txn = &txn.Proto // For testing purposes, txn.UserPriority can be a negative value (see // MakePriority). if txn.UserPriority != 0 { ba.UserPriority = txn.UserPriority } // TODO(radu): when db.send supports a context, we can just use that (and // remove the prepareToSend call above). br, pErr := txn.db.sender.Send(txn.Context, ba) if br != nil && br.Error != nil { panic(roachpb.ErrorUnexpectedlySet(txn.db.sender, br)) } if br != nil { for _, encSp := range br.CollectedSpans { var newSp basictracer.RawSpan if err := tracing.DecodeRawSpan(encSp, &newSp); err != nil { return nil, roachpb.NewError(err) } txn.CollectedSpans = append(txn.CollectedSpans, newSp) } } // Only successful requests can carry an updated Txn in their response // header. Any error (e.g. a restart) can have a Txn attached to them as // well; those update our local state in the same way for the next attempt. // The exception is if our transaction was aborted and needs to restart // from scratch, in which case we do just that. if pErr == nil { txn.Proto.Update(br.Txn) return br, nil } if log.V(1) { log.Infof(txn.Context, "failed batch: %s", pErr) } if _, ok := pErr.GetDetail().(*roachpb.TransactionAbortedError); ok { // On Abort, reset the transaction so we start anew on restart. txn.Proto = roachpb.Transaction{ TxnMeta: enginepb.TxnMeta{ Isolation: txn.Proto.Isolation, }, Name: txn.Proto.Name, } // Acts as a minimum priority on restart. if pErr.GetTxn() != nil { txn.Proto.Priority = pErr.GetTxn().Priority } } else if pErr.TransactionRestart != roachpb.TransactionRestart_NONE { txn.Proto.Update(pErr.GetTxn()) } return nil, pErr }