// RangeLookup implements the RangeDescriptorDB interface. // RangeLookup dispatches a RangeLookup request for the given metadata // key to the replicas of the given range. Note that we allow // inconsistent reads when doing range lookups for efficiency. Getting // stale data is not a correctness problem but instead may // infrequently result in additional latency as additional range // lookups may be required. Note also that rangeLookup bypasses the // DistSender's Send() method, so there is no error inspection and // retry logic here; this is not an issue since the lookup performs a // single inconsistent read only. func (ds *DistSender) RangeLookup( ctx context.Context, key roachpb.RKey, desc *roachpb.RangeDescriptor, useReverseScan bool, ) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, *roachpb.Error) { ba := roachpb.BatchRequest{} ba.ReadConsistency = roachpb.INCONSISTENT ba.Add(&roachpb.RangeLookupRequest{ Span: roachpb.Span{ // We can interpret the RKey as a Key here since it's a metadata // lookup; those are never local. Key: key.AsRawKey(), }, MaxRanges: ds.rangeLookupMaxRanges, Reverse: useReverseScan, }) replicas := newReplicaSlice(ds.gossip, desc) replicas.Shuffle() br, err := ds.sendRPC(ctx, desc.RangeID, replicas, ba) if err != nil { return nil, nil, roachpb.NewError(err) } if br.Error != nil { return nil, nil, br.Error } resp := br.Responses[0].GetInner().(*roachpb.RangeLookupResponse) return resp.Ranges, resp.PrefetchedRanges, nil }
// TestBatchError verifies that Range returns an error if a request has an invalid range. func TestBatchError(t *testing.T) { testCases := []struct { req [2]string errMsg string }{ { req: [2]string{"\xff\xff\xff\xff", "a"}, errMsg: "must be less than KeyMax", }, { req: [2]string{"a", "\xff\xff\xff\xff"}, errMsg: "must be less than or equal to KeyMax", }, } for i, c := range testCases { var ba roachpb.BatchRequest ba.Add(&roachpb.ScanRequest{Span: roachpb.Span{Key: roachpb.Key(c.req[0]), EndKey: roachpb.Key(c.req[1])}}) if _, err := Range(ba); !testutils.IsError(err, c.errMsg) { t.Errorf("%d: unexpected error %v", i, err) } } // Test a case where a non-range request has an end key. var ba roachpb.BatchRequest ba.Add(&roachpb.GetRequest{Span: roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("b")}}) if _, err := Range(ba); !testutils.IsError(err, "end key specified for non-range operation") { t.Errorf("unexpected error %v", err) } }
func testPut() roachpb.BatchRequest { var ba roachpb.BatchRequest ba.Timestamp = testTS put := &roachpb.PutRequest{} put.Key = testKey ba.Add(put) return ba }
// TestBatchPrevNext tests batch.{Prev,Next}. func TestBatchPrevNext(t *testing.T) { defer leaktest.AfterTest(t)() loc := func(s string) string { return string(keys.RangeDescriptorKey(roachpb.RKey(s))) } span := func(strs ...string) []roachpb.Span { var r []roachpb.Span for i, str := range strs { if i%2 == 0 { r = append(r, roachpb.Span{Key: roachpb.Key(str)}) } else { r[len(r)-1].EndKey = roachpb.Key(str) } } return r } max, min := string(roachpb.RKeyMax), string(roachpb.RKeyMin) abc := span("a", "", "b", "", "c", "") testCases := []struct { spans []roachpb.Span key, expFW, expBW string }{ {spans: span("a", "c", "b", ""), key: "b", expFW: "b", expBW: "b"}, {spans: span("a", "c", "b", ""), key: "a", expFW: "a", expBW: "a"}, {spans: span("a", "c", "d", ""), key: "c", expFW: "d", expBW: "c"}, {spans: span("a", "c\x00", "d", ""), key: "c", expFW: "c", expBW: "c"}, {spans: abc, key: "b", expFW: "b", expBW: "b"}, {spans: abc, key: "b\x00", expFW: "c", expBW: "b\x00"}, {spans: abc, key: "bb", expFW: "c", expBW: "b"}, {spans: span(), key: "whatevs", expFW: max, expBW: min}, {spans: span(loc("a"), loc("c")), key: "c", expFW: "c", expBW: "c"}, {spans: span(loc("a"), loc("c")), key: "c\x00", expFW: max, expBW: "c\x00"}, } for i, test := range testCases { var ba roachpb.BatchRequest for _, span := range test.spans { args := &roachpb.ScanRequest{} args.Key, args.EndKey = span.Key, span.EndKey ba.Add(args) } if next, err := next(ba, roachpb.RKey(test.key)); err != nil { t.Errorf("%d: %v", i, err) } else if !bytes.Equal(next, roachpb.Key(test.expFW)) { t.Errorf("%d: next: expected %q, got %q", i, test.expFW, next) } if prev, err := prev(ba, roachpb.RKey(test.key)); err != nil { t.Errorf("%d: %v", i, err) } else if !bytes.Equal(prev, roachpb.Key(test.expBW)) { t.Errorf("%d: prev: expected %q, got %q", i, test.expBW, prev) } } }
// process iterates through all keys in a replica's range, calling the garbage // collector for each key and associated set of values. GC'd keys are batched // into GC calls. Extant intents are resolved if intents are older than // intentAgeThreshold. The transaction and abort cache records are also // scanned and old entries evicted. During normal operation, both of these // records are cleaned up when their respective transaction finishes, so the // amount of work done here is expected to be small. // // Some care needs to be taken to avoid cyclic recreation of entries during GC: // * a Push initiated due to an intent may recreate a transaction entry // * resolving an intent may write a new abort cache entry // * obtaining the transaction for a abort cache entry requires a Push // // The following order is taken below: // 1) collect all intents with sufficiently old txn record // 2) collect these intents' transactions // 3) scan the transaction table, collecting abandoned or completed txns // 4) push all of these transactions (possibly recreating entries) // 5) resolve all intents (unless the txn is still PENDING), which will recreate // abort cache entries (but with the txn timestamp; i.e. likely gc'able) // 6) scan the abort cache table for old entries // 7) push these transactions (again, recreating txn entries). // 8) send a GCRequest. func (gcq *gcQueue) process( ctx context.Context, now hlc.Timestamp, repl *Replica, sysCfg config.SystemConfig, ) error { snap := repl.store.Engine().NewSnapshot() desc := repl.Desc() defer snap.Close() // Lookup the GC policy for the zone containing this key range. zone, err := sysCfg.GetZoneConfigForKey(desc.StartKey) if err != nil { return errors.Errorf("could not find zone config for range %s: %s", repl, err) } gcKeys, info, err := RunGC(ctx, desc, snap, now, zone.GC, func(now hlc.Timestamp, txn *roachpb.Transaction, typ roachpb.PushTxnType) { pushTxn(ctx, gcq.store.DB(), now, txn, typ) }, func(intents []roachpb.Intent, poison bool, wait bool) error { return repl.store.intentResolver.resolveIntents(ctx, intents, poison, wait) }) if err != nil { return err } log.VEventf(ctx, 1, "completed with stats %+v", info) info.updateMetrics(gcq.store.metrics) var ba roachpb.BatchRequest var gcArgs roachpb.GCRequest // TODO(tschottdorf): This is one of these instances in which we want // to be more careful that the request ends up on the correct Replica, // and we might have to worry about mixing range-local and global keys // in a batch which might end up spanning Ranges by the time it executes. gcArgs.Key = desc.StartKey.AsRawKey() gcArgs.EndKey = desc.EndKey.AsRawKey() gcArgs.Keys = gcKeys gcArgs.Threshold = info.Threshold gcArgs.TxnSpanGCThreshold = info.TxnSpanGCThreshold // Technically not needed since we're talking directly to the Range. ba.RangeID = desc.RangeID ba.Timestamp = now ba.Add(&gcArgs) if _, pErr := repl.Send(ctx, ba); pErr != nil { log.ErrEvent(ctx, pErr.String()) return pErr.GoError() } return nil }
// SendWrappedWith is a convenience function which wraps the request in a batch // and sends it via the provided Sender and headers. It returns the unwrapped // response or an error. It's valid to pass a `nil` context; an empty one is // used in that case. func SendWrappedWith( ctx context.Context, sender Sender, h roachpb.Header, args roachpb.Request, ) (roachpb.Response, *roachpb.Error) { ba := roachpb.BatchRequest{} ba.Header = h ba.Add(args) br, pErr := sender.Send(ctx, ba) if pErr != nil { return nil, pErr } unwrappedReply := br.Responses[0].GetInner() header := unwrappedReply.Header() header.Txn = br.Txn unwrappedReply.SetHeader(header) return unwrappedReply, nil }
// TestTxnCoordSenderSingleRoundtripTxn checks that a batch which completely // holds the writing portion of a Txn (including EndTransaction) does not // launch a heartbeat goroutine at all. func TestTxnCoordSenderSingleRoundtripTxn(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() manual := hlc.NewManualClock(123) clock := hlc.NewClock(manual.UnixNano, 20*time.Nanosecond) senderFunc := func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { br := ba.CreateReply() txnClone := ba.Txn.Clone() br.Txn = &txnClone br.Txn.Writing = true return br, nil } ambient := log.AmbientContext{Tracer: tracing.NewTracer()} ts := NewTxnCoordSender( ambient, senderFn(senderFunc), clock, false, stopper, MakeTxnMetrics(metric.TestSampleInterval), ) // Stop the stopper manually, prior to trying the transaction. This has the // effect of returning a NodeUnavailableError for any attempts at launching // a heartbeat goroutine. stopper.Stop() var ba roachpb.BatchRequest key := roachpb.Key("test") ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.EndTransactionRequest{}) ba.Txn = &roachpb.Transaction{Name: "test"} _, pErr := ts.Send(context.Background(), ba) if pErr != nil { t.Fatal(pErr) } }
func TestBatchPrevNextWithNoop(t *testing.T) { defer leaktest.AfterTest(t)() leftKey := roachpb.Key("a") middleKey := roachpb.RKey("b") rightKey := roachpb.Key("c") var ba roachpb.BatchRequest ba.Add(&roachpb.GetRequest{Span: roachpb.Span{Key: leftKey}}) ba.Add(&roachpb.NoopRequest{}) ba.Add(&roachpb.GetRequest{Span: roachpb.Span{Key: rightKey}}) t.Run("prev", func(t *testing.T) { rk, err := prev(ba, middleKey) if err != nil { t.Fatal(err) } if !rk.Equal(leftKey) { t.Errorf("got %s, expected %s", rk, leftKey) } }) t.Run("next", func(t *testing.T) { rk, err := next(ba, middleKey) if err != nil { t.Fatal(err) } if !rk.Equal(rightKey) { t.Errorf("got %s, expected %s", rk, rightKey) } }) }
// tryAsyncAbort (synchronously) grabs a copy of the txn proto and the intents // (which it then clears from txnMeta), and asynchronously tries to abort the // transaction. func (tc *TxnCoordSender) tryAsyncAbort(txnID uuid.UUID) { tc.Lock() txnMeta := tc.txns[txnID] // Clone the intents and the txn to avoid data races. intentSpans, _ := roachpb.MergeSpans(append([]roachpb.Span(nil), txnMeta.keys...)) txnMeta.keys = nil txn := txnMeta.txn.Clone() tc.Unlock() // Since we don't hold the lock continuously, it's possible that two aborts // raced here. That's fine (and probably better than the alternative, which // is missing new intents sometimes). if txn.Status != roachpb.PENDING { return } ba := roachpb.BatchRequest{} ba.Txn = &txn et := &roachpb.EndTransactionRequest{ Span: roachpb.Span{ Key: txn.Key, }, Commit: false, IntentSpans: intentSpans, } ba.Add(et) ctx := tc.AnnotateCtx(context.TODO()) if err := tc.stopper.RunAsyncTask(ctx, func(ctx context.Context) { // Use the wrapped sender since the normal Sender does not allow // clients to specify intents. if _, pErr := tc.wrapped.Send(ctx, ba); pErr != nil { if log.V(1) { log.Warningf(ctx, "abort due to inactivity failed for %s: %s ", txn, pErr) } } }); err != nil { log.Warning(ctx, err) } }
// TestTxnCoordSenderErrorWithIntent validates that if a transactional request // returns an error but also indicates a Writing transaction, the coordinator // tracks it just like a successful request. func TestTxnCoordSenderErrorWithIntent(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() manual := hlc.NewManualClock(0) clock := hlc.NewClock(manual.UnixNano) clock.SetMaxOffset(20) testCases := []struct { roachpb.Error errMsg string }{ {*roachpb.NewError(roachpb.NewTransactionRetryError()), "retry txn"}, {*roachpb.NewError(roachpb.NewTransactionPushError(roachpb.Transaction{ TxnMeta: enginepb.TxnMeta{ ID: uuid.NewV4(), }})), "failed to push"}, {*roachpb.NewErrorf("testError"), "testError"}, } for i, test := range testCases { func() { senderFunc := func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { txn := ba.Txn.Clone() txn.Writing = true pErr := &roachpb.Error{} *pErr = test.Error pErr.SetTxn(&txn) return nil, pErr } ambient := log.AmbientContext{Tracer: tracing.NewTracer()} ts := NewTxnCoordSender( ambient, senderFn(senderFunc), clock, false, stopper, MakeTxnMetrics(metric.TestSampleInterval), ) var ba roachpb.BatchRequest key := roachpb.Key("test") ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.EndTransactionRequest{}) ba.Txn = &roachpb.Transaction{Name: "test"} _, pErr := ts.Send(context.Background(), ba) if !testutils.IsPError(pErr, test.errMsg) { t.Errorf("%d: error did not match %s: %v", i, test.errMsg, pErr) } defer teardownHeartbeats(ts) ts.Lock() defer ts.Unlock() if len(ts.txns) != 1 { t.Errorf("%d: expected transaction to be tracked", i) } }() } }
func (tc *TxnCoordSender) heartbeat(ctx context.Context, txnID uuid.UUID) bool { tc.Lock() txnMeta := tc.txns[txnID] txn := txnMeta.txn.Clone() hasAbandoned := txnMeta.hasClientAbandonedCoord(tc.clock.PhysicalNow()) tc.Unlock() if txn.Status != roachpb.PENDING { // A previous iteration has already determined that the transaction is // already finalized, so we wait for the client to realize that and // want to keep our state for the time being (to dish out the right // error once it returns). return true } // Before we send a heartbeat, determine whether this transaction should be // considered abandoned. If so, exit heartbeat. If ctx.Done() is not nil, then // it is a cancelable Context and we skip this check and use the ctx lifetime // instead of a timeout. if ctx.Done() == nil && hasAbandoned { if log.V(1) { log.Infof(ctx, "transaction %s abandoned; stopping heartbeat", txnMeta.txn) } tc.tryAsyncAbort(txnID) return false } ba := roachpb.BatchRequest{} ba.Txn = &txn hb := &roachpb.HeartbeatTxnRequest{ Now: tc.clock.Now(), } hb.Key = txn.Key ba.Add(hb) log.Event(ctx, "heartbeat") br, pErr := tc.wrapped.Send(ctx, ba) // Correctness mandates that when we can't heartbeat the transaction, we // make sure the client doesn't keep going. This is particularly relevant // in the case of an ABORTED transaction, but if we can't reach the // transaction record at all, we're going to have to assume we're aborted // as well. if pErr != nil { log.Warningf(ctx, "heartbeat to %s failed: %s", txn, pErr) // We're not going to let the client carry out additional requests, so // try to clean up. tc.tryAsyncAbort(*txn.ID) txn.Status = roachpb.ABORTED } else { txn.Update(br.Responses[0].GetInner().(*roachpb.HeartbeatTxnResponse).Txn) } // Give the news to the txn in the txns map. This will update long-running // transactions (which may find out that they have to restart in that way), // but in particular makes sure that they notice when they've been aborted // (in which case we'll give them an error on their next request). tc.Lock() tc.txns[txnID].txn.Update(&txn) tc.Unlock() return true }
func TestBatchRange(t *testing.T) { testCases := []struct { req [][2]string exp [2]string }{ { // Boring single request. req: [][2]string{{"a", "b"}}, exp: [2]string{"a", "b"}, }, { // Request with invalid range. It's important that this still // results in a valid range. req: [][2]string{{"b", "a"}}, exp: [2]string{"b", "b\x00"}, }, { // Two overlapping ranges. req: [][2]string{{"a", "c"}, {"b", "d"}}, exp: [2]string{"a", "d"}, }, { // Two disjoint ranges. req: [][2]string{{"a", "b"}, {"c", "d"}}, exp: [2]string{"a", "d"}, }, { // Range and disjoint point request. req: [][2]string{{"a", "b"}, {"c", ""}}, exp: [2]string{"a", "c\x00"}, }, { // Three disjoint point requests. req: [][2]string{{"a", ""}, {"b", ""}, {"c", ""}}, exp: [2]string{"a", "c\x00"}, }, { // Disjoint range request and point request. req: [][2]string{{"a", "b"}, {"b", ""}}, exp: [2]string{"a", "b\x00"}, }, { // Range-local point request. req: [][2]string{{string(RangeDescriptorKey(roachpb.RKeyMax)), ""}}, exp: [2]string{"\xff\xff", "\xff\xff\x00"}, }, { // Range-local to global such that the key ordering flips. // Important that we get a valid range back. req: [][2]string{{string(RangeDescriptorKey(roachpb.RKeyMax)), "x"}}, exp: [2]string{"\xff\xff", "\xff\xff\x00"}, }, { // Range-local to global without order messed up. req: [][2]string{{string(RangeDescriptorKey(roachpb.RKey("a"))), "x"}}, exp: [2]string{"a", "x"}, }, } for i, c := range testCases { var ba roachpb.BatchRequest for _, pair := range c.req { ba.Add(&roachpb.ScanRequest{Span: roachpb.Span{Key: roachpb.Key(pair[0]), EndKey: roachpb.Key(pair[1])}}) } if rs, err := Range(ba); err != nil { t.Errorf("%d: %v", i, err) } else if actPair := [2]string{string(rs.Key), string(rs.EndKey)}; !reflect.DeepEqual(actPair, c.exp) { t.Errorf("%d: expected [%q,%q), got [%q,%q)", i, c.exp[0], c.exp[1], actPair[0], actPair[1]) } } }
// InitOrJoinRequest executes a RequestLease command asynchronously and returns a // channel on which the result will be posted. If there's already a request in // progress, we join in waiting for the results of that request. // It is an error to call InitOrJoinRequest() while a request is in progress // naming another replica as lease holder. // // replica is used to schedule and execute async work (proposing a RequestLease // command). replica.mu is locked when delivering results, so calls from the // replica happen either before or after a result for a pending request has // happened. // // transfer needs to be set if the request represents a lease transfer (as // opposed to an extension, or acquiring the lease when none is held). // // Note: Once this function gets a context to be used for cancellation, instead // of replica.store.Stopper().ShouldQuiesce(), care will be needed for cancelling // the Raft command, similar to replica.addWriteCmd. func (p *pendingLeaseRequest) InitOrJoinRequest( replica *Replica, nextLeaseHolder roachpb.ReplicaDescriptor, timestamp hlc.Timestamp, startKey roachpb.Key, transfer bool, ) <-chan *roachpb.Error { if nextLease, ok := p.RequestPending(); ok { if nextLease.Replica.ReplicaID == nextLeaseHolder.ReplicaID { // Join a pending request asking for the same replica to become lease // holder. return p.JoinRequest() } llChan := make(chan *roachpb.Error, 1) // We can't join the request in progress. llChan <- roachpb.NewErrorf("request for different replica in progress "+ "(requesting: %+v, in progress: %+v)", nextLeaseHolder.ReplicaID, nextLease.Replica.ReplicaID) return llChan } llChan := make(chan *roachpb.Error, 1) // No request in progress. Let's propose a Lease command asynchronously. // TODO(tschottdorf): get duration from configuration, either as a // config flag or, later, dynamically adjusted. startStasis := timestamp.Add(int64(replica.store.cfg.RangeLeaseActiveDuration), 0) expiration := startStasis.Add(int64(replica.store.Clock().MaxOffset()), 0) reqSpan := roachpb.Span{ Key: startKey, } var leaseReq roachpb.Request now := replica.store.Clock().Now() reqLease := roachpb.Lease{ Start: timestamp, StartStasis: startStasis, Expiration: expiration, Replica: nextLeaseHolder, ProposedTS: &now, } if transfer { leaseReq = &roachpb.TransferLeaseRequest{ Span: reqSpan, Lease: reqLease, } } else { leaseReq = &roachpb.RequestLeaseRequest{ Span: reqSpan, Lease: reqLease, } } if replica.store.Stopper().RunAsyncTask(context.TODO(), func(ctx context.Context) { ctx = replica.AnnotateCtx(ctx) // Propose a RequestLease command and wait for it to apply. ba := roachpb.BatchRequest{} ba.Timestamp = replica.store.Clock().Now() ba.RangeID = replica.RangeID ba.Add(leaseReq) if log.V(2) { log.Infof(ctx, "sending lease request %v", leaseReq) } _, pErr := replica.Send(ctx, ba) // We reset our state below regardless of whether we've gotten an error or // not, but note that an error is ambiguous - there's no guarantee that the // transfer will not still apply. That's OK, however, as the "in transfer" // state maintained by the pendingLeaseRequest is not relied on for // correctness (see replica.mu.minLeaseProposedTS), and resetting the state // is beneficial as it'll allow the replica to attempt to transfer again or // extend the existing lease in the future. // Send result of lease to all waiter channels. replica.mu.Lock() defer replica.mu.Unlock() for _, llChan := range p.llChans { // Don't send the same transaction object twice; this can lead to races. if pErr != nil { pErrClone := *pErr pErrClone.SetTxn(pErr.GetTxn()) llChan <- &pErrClone } else { llChan <- nil } } p.llChans = p.llChans[:0] p.nextLease = roachpb.Lease{} }) != nil { // We failed to start the asynchronous task. Send a blank NotLeaseHolderError // back to indicate that we have no idea who the range lease holder might // be; we've withdrawn from active duty. llChan <- roachpb.NewError( newNotLeaseHolderError(nil, replica.store.StoreID(), replica.mu.state.Desc)) return llChan } p.llChans = append(p.llChans, llChan) p.nextLease = reqLease return llChan }
// InitOrJoinRequest executes a RequestLease command asynchronously and returns a // channel on which the result will be posted. If there's already a request in // progress, we join in waiting for the results of that request. // It is an error to call InitOrJoinRequest() while a request is in progress // naming another replica as lease holder. // // replica is used to schedule and execute async work (proposing a RequestLease // command). replica.mu is locked when delivering results, so calls from the // replica happen either before or after a result for a pending request has // happened. // // transfer needs to be set if the request represents a lease transfer (as // opposed to an extension, or acquiring the lease when none is held). // // Note: Once this function gets a context to be used for cancellation, instead // of replica.store.Stopper().ShouldQuiesce(), care will be needed for cancelling // the Raft command, similar to replica.addWriteCmd. func (p *pendingLeaseRequest) InitOrJoinRequest( replica *Replica, nextLeaseHolder roachpb.ReplicaDescriptor, timestamp hlc.Timestamp, startKey roachpb.Key, transfer bool, ) <-chan *roachpb.Error { if nextLease, ok := p.RequestPending(); ok { if nextLease.Replica.ReplicaID == nextLeaseHolder.ReplicaID { // Join a pending request asking for the same replica to become lease // holder. return p.JoinRequest() } llChan := make(chan *roachpb.Error, 1) // We can't join the request in progress. llChan <- roachpb.NewErrorf("request for different replica in progress "+ "(requesting: %+v, in progress: %+v)", nextLeaseHolder.ReplicaID, nextLease.Replica.ReplicaID) return llChan } llChan := make(chan *roachpb.Error, 1) // No request in progress. Let's propose a Lease command asynchronously. // TODO(tschottdorf): get duration from configuration, either as a // config flag or, later, dynamically adjusted. startStasis := timestamp.Add(int64(replica.store.cfg.RangeLeaseActiveDuration), 0) expiration := startStasis.Add(int64(replica.store.Clock().MaxOffset()), 0) reqSpan := roachpb.Span{ Key: startKey, } var leaseReq roachpb.Request reqLease := roachpb.Lease{ Start: timestamp, StartStasis: startStasis, Expiration: expiration, Replica: nextLeaseHolder, } if transfer { leaseReq = &roachpb.TransferLeaseRequest{ Span: reqSpan, Lease: reqLease, } } else { leaseReq = &roachpb.RequestLeaseRequest{ Span: reqSpan, Lease: reqLease, } } if replica.store.Stopper().RunAsyncTask(context.TODO(), func(ctx context.Context) { ctx = replica.AnnotateCtx(ctx) // Propose a RequestLease command and wait for it to apply. ba := roachpb.BatchRequest{} ba.Timestamp = replica.store.Clock().Now() ba.RangeID = replica.RangeID ba.Add(leaseReq) if log.V(2) { log.Infof(ctx, "sending lease request %v", leaseReq) } _, pErr := replica.Send(ctx, ba) // Send result of lease to all waiter channels. replica.mu.Lock() defer replica.mu.Unlock() for i, llChan := range p.llChans { // Don't send the same pErr object twice; this can lead to races. We could // clone every time but it's more efficient to send pErr itself to one of // the channels (the last one; if we send it earlier the race can still // happen). if i == len(p.llChans)-1 { llChan <- pErr } else { llChan <- protoutil.Clone(pErr).(*roachpb.Error) // works with `nil` } } p.llChans = p.llChans[:0] p.nextLease = roachpb.Lease{} }) != nil { // We failed to start the asynchronous task. Send a blank NotLeaseHolderError // back to indicate that we have no idea who the range lease holder might // be; we've withdrawn from active duty. llChan <- roachpb.NewError( newNotLeaseHolderError(nil, replica.store.StoreID(), replica.mu.state.Desc)) return llChan } p.llChans = append(p.llChans, llChan) p.nextLease = reqLease return llChan }
func TestTruncate(t *testing.T) { defer leaktest.AfterTest(t)() loc := func(s string) string { return string(keys.RangeDescriptorKey(roachpb.RKey(s))) } locPrefix := func(s string) string { return string(keys.MakeRangeKeyPrefix(roachpb.RKey(s))) } testCases := []struct { keys [][2]string expKeys [][2]string from, to string desc [2]string // optional, defaults to {from,to} err string }{ { // Keys inside of active range. keys: [][2]string{{"a", "q"}, {"c"}, {"b, e"}, {"q"}}, expKeys: [][2]string{{"a", "q"}, {"c"}, {"b, e"}, {"q"}}, from: "a", to: "q\x00", }, { // Keys outside of active range. keys: [][2]string{{"a"}, {"a", "b"}, {"q"}, {"q", "z"}}, expKeys: [][2]string{{}, {}, {}, {}}, from: "b", to: "q", }, { // Range-local keys inside of active range. keys: [][2]string{{loc("b")}, {loc("c")}}, expKeys: [][2]string{{loc("b")}, {loc("c")}}, from: "b", to: "e", }, { // Range-local key outside of active range. keys: [][2]string{{loc("a")}}, expKeys: [][2]string{{}}, from: "b", to: "e", }, { // Range-local range contained in active range. keys: [][2]string{{loc("b"), loc("e") + "\x00"}}, expKeys: [][2]string{{loc("b"), loc("e") + "\x00"}}, from: "b", to: "e\x00", }, { // Range-local range not contained in active range. keys: [][2]string{{loc("a"), loc("b")}}, expKeys: [][2]string{{}}, from: "c", to: "e", }, { // Range-local range not contained in active range. keys: [][2]string{{loc("a"), locPrefix("b")}, {loc("e"), loc("f")}}, expKeys: [][2]string{{}, {}}, from: "b", to: "e", }, { // Range-local range partially contained in active range. keys: [][2]string{{loc("a"), loc("b")}}, expKeys: [][2]string{{loc("a"), locPrefix("b")}}, from: "a", to: "b", }, { // Range-local range partially contained in active range. keys: [][2]string{{loc("a"), loc("b")}}, expKeys: [][2]string{{locPrefix("b"), loc("b")}}, from: "b", to: "e", }, { // Range-local range contained in active range. keys: [][2]string{{locPrefix("b"), loc("b")}}, expKeys: [][2]string{{locPrefix("b"), loc("b")}}, from: "b", to: "c", }, { // Mixed range-local vs global key range. keys: [][2]string{{loc("c"), "d\x00"}}, from: "b", to: "e", err: "local key mixed with global key", }, { // Key range touching and intersecting active range. keys: [][2]string{{"a", "b"}, {"a", "c"}, {"p", "q"}, {"p", "r"}, {"a", "z"}}, expKeys: [][2]string{{}, {"b", "c"}, {"p", "q"}, {"p", "q"}, {"b", "q"}}, from: "b", to: "q", }, // Active key range is intersection of descriptor and [from,to). { keys: [][2]string{{"c", "q"}}, expKeys: [][2]string{{"d", "p"}}, from: "a", to: "z", desc: [2]string{"d", "p"}, }, { keys: [][2]string{{"c", "q"}}, expKeys: [][2]string{{"d", "p"}}, from: "d", to: "p", desc: [2]string{"a", "z"}, }, } for i, test := range testCases { goldenOriginal := roachpb.BatchRequest{} for _, ks := range test.keys { if len(ks[1]) > 0 { u := uuid.MakeV4() goldenOriginal.Add(&roachpb.ResolveIntentRangeRequest{ Span: roachpb.Span{Key: roachpb.Key(ks[0]), EndKey: roachpb.Key(ks[1])}, IntentTxn: enginepb.TxnMeta{ID: &u}, }) } else { goldenOriginal.Add(&roachpb.GetRequest{ Span: roachpb.Span{Key: roachpb.Key(ks[0])}, }) } } original := roachpb.BatchRequest{Requests: make([]roachpb.RequestUnion, len(goldenOriginal.Requests))} for i, request := range goldenOriginal.Requests { original.Requests[i].SetValue(request.GetInner().ShallowCopy()) } desc := &roachpb.RangeDescriptor{ StartKey: roachpb.RKey(test.desc[0]), EndKey: roachpb.RKey(test.desc[1]), } if len(desc.StartKey) == 0 { desc.StartKey = roachpb.RKey(test.from) } if len(desc.EndKey) == 0 { desc.EndKey = roachpb.RKey(test.to) } rs := roachpb.RSpan{Key: roachpb.RKey(test.from), EndKey: roachpb.RKey(test.to)} rs, err := rs.Intersect(desc) if err != nil { t.Errorf("%d: intersection failure: %v", i, err) continue } ba, num, err := truncate(original, rs) if err != nil || test.err != "" { if !testutils.IsError(err, test.err) { t.Errorf("%d: %v (expected: %q)", i, err, test.err) } continue } var reqs int for j, arg := range ba.Requests { req := arg.GetInner() if _, ok := req.(*roachpb.NoopRequest); ok { continue } if h := req.Header(); !bytes.Equal(h.Key, roachpb.Key(test.expKeys[j][0])) || !bytes.Equal(h.EndKey, roachpb.Key(test.expKeys[j][1])) { t.Errorf("%d.%d: range mismatch: actual [%q,%q), wanted [%q,%q)", i, j, h.Key, h.EndKey, test.expKeys[j][0], test.expKeys[j][1]) } else if _, ok := req.(*roachpb.NoopRequest); ok != (len(h.Key) == 0) { t.Errorf("%d.%d: expected NoopRequest, got %T", i, j, req) } else if len(h.Key) != 0 { reqs++ } } if reqs != num { t.Errorf("%d: counted %d requests, but truncation indicated %d", i, reqs, num) } if !reflect.DeepEqual(original, goldenOriginal) { t.Errorf("%d: truncation mutated original:\nexpected: %s\nactual: %s", i, goldenOriginal, original) } } }
// TestTxnCoordSenderHeartbeat verifies periodic heartbeat of the // transaction record. func TestTxnCoordSenderHeartbeat(t *testing.T) { defer leaktest.AfterTest(t)() s, sender := createTestDB(t) defer s.Stop() defer teardownHeartbeats(sender) // Set heartbeat interval to 1ms for testing. sender.heartbeatInterval = 1 * time.Millisecond initialTxn := client.NewTxn(context.Background(), *s.DB) if err := initialTxn.Put(roachpb.Key("a"), []byte("value")); err != nil { t.Fatal(err) } // Verify 3 heartbeats. var heartbeatTS hlc.Timestamp for i := 0; i < 3; i++ { util.SucceedsSoon(t, func() error { txn, pErr := getTxn(sender, &initialTxn.Proto) if pErr != nil { t.Fatal(pErr) } // Advance clock by 1ns. // Locking the TxnCoordSender to prevent a data race. sender.Lock() s.Manual.Increment(1) sender.Unlock() if txn.LastHeartbeat != nil && heartbeatTS.Less(*txn.LastHeartbeat) { heartbeatTS = *txn.LastHeartbeat return nil } return errors.Errorf("expected heartbeat") }) } // Sneakily send an ABORT right to DistSender (bypassing TxnCoordSender). { var ba roachpb.BatchRequest ba.Add(&roachpb.EndTransactionRequest{ Commit: false, Span: roachpb.Span{Key: initialTxn.Proto.Key}, }) ba.Txn = &initialTxn.Proto if _, pErr := sender.wrapped.Send(context.Background(), ba); pErr != nil { t.Fatal(pErr) } } util.SucceedsSoon(t, func() error { sender.Lock() defer sender.Unlock() if txnMeta, ok := sender.txns[*initialTxn.Proto.ID]; !ok { t.Fatal("transaction unregistered prematurely") } else if txnMeta.txn.Status != roachpb.ABORTED { return fmt.Errorf("transaction is not aborted") } return nil }) // Trying to do something else should give us a TransactionAbortedError. _, err := initialTxn.Get("a") assertTransactionAbortedError(t, err) }
func (txn *Txn) sendEndTxnReq(commit bool, deadline *hlc.Timestamp) error { var ba roachpb.BatchRequest ba.Add(endTxnReq(commit, deadline, txn.SystemConfigTrigger())) _, err := txn.send(ba) return err.GoError() }
// requestLeaseAsync sends a transfer lease or lease request to the // specified replica. The request is sent in an async task. func (p *pendingLeaseRequest) requestLeaseAsync( repl *Replica, nextLeaseHolder roachpb.ReplicaDescriptor, reqLease roachpb.Lease, status LeaseStatus, leaseReq roachpb.Request, ) error { return repl.store.Stopper().RunAsyncTask(context.TODO(), func(ctx context.Context) { ctx = repl.AnnotateCtx(ctx) var pErr *roachpb.Error // If requesting an epoch-based lease & current state is expired, // potentially heartbeat our own liveness or increment epoch of // prior owner. Note we only do this if the previous lease was // epoch-based. if reqLease.Type() == roachpb.LeaseEpoch && status.state == leaseExpired && status.lease.Type() == roachpb.LeaseEpoch { var err error // If this replica is previous & next lease holder, manually heartbeat to become live. if status.lease.OwnedBy(nextLeaseHolder.StoreID) && repl.store.StoreID() == nextLeaseHolder.StoreID { if err = repl.store.cfg.NodeLiveness.Heartbeat(ctx, status.liveness); err != nil { log.Error(ctx, err) } } else if status.liveness.Epoch == *status.lease.Epoch { // If not owner, increment epoch if necessary to invalidate lease. if err = repl.store.cfg.NodeLiveness.IncrementEpoch(ctx, status.liveness); err != nil { log.Error(ctx, err) } } // Set error for propagation to all waiters below. if err != nil { pErr = roachpb.NewError(newNotLeaseHolderError(status.lease, repl.store.StoreID(), repl.Desc())) } } // Propose a RequestLease command and wait for it to apply. if pErr == nil { ba := roachpb.BatchRequest{} ba.Timestamp = repl.store.Clock().Now() ba.RangeID = repl.RangeID ba.Add(leaseReq) _, pErr = repl.Send(ctx, ba) } // We reset our state below regardless of whether we've gotten an error or // not, but note that an error is ambiguous - there's no guarantee that the // transfer will not still apply. That's OK, however, as the "in transfer" // state maintained by the pendingLeaseRequest is not relied on for // correctness (see repl.mu.minLeaseProposedTS), and resetting the state // is beneficial as it'll allow the replica to attempt to transfer again or // extend the existing lease in the future. // Send result of lease to all waiter channels. repl.mu.Lock() defer repl.mu.Unlock() for _, llChan := range p.llChans { // Don't send the same transaction object twice; this can lead to races. if pErr != nil { pErrClone := *pErr pErrClone.SetTxn(pErr.GetTxn()) llChan <- &pErrClone } else { llChan <- nil } } p.llChans = p.llChans[:0] p.nextLease = roachpb.Lease{} }) }
func (tc *TxnCoordSender) heartbeat(ctx context.Context, txnID uuid.UUID) bool { tc.Lock() txnMeta := tc.txns[txnID] txn := txnMeta.txn.Clone() hasAbandoned := txnMeta.hasClientAbandonedCoord(tc.clock.PhysicalNow()) tc.Unlock() if txn.Status != roachpb.PENDING { // A previous iteration has already determined that the transaction is // already finalized, so we wait for the client to realize that and // want to keep our state for the time being (to dish out the right // error once it returns). return true } // Before we send a heartbeat, determine whether this transaction should be // considered abandoned. If so, exit heartbeat. If ctx.Done() is not nil, then // it is a cancellable Context and we skip this check and use the ctx lifetime // instead of a timeout. // // TODO(andrei): We should disallow non-cancellable contexts in the heartbeat // goroutine and enforce that our kv client cancels the context when it's // done. We get non-cancellable contexts from remote clients // (roachpb.ExternalClient) because we override the gRPC context to make it // non-cancellable in DBServer.Batch (as that context is not tied to a txn // lifetime). // Further note that, unfortunately, the Sender interface generally makes it // difficult for the TxnCoordSender to get a context with the same lifetime as // the transaction (the TxnCoordSender associates the context of the txn's // first write with the txn). We should move to using only use local clients // (i.e. merge, or at least co-locate client.Txn and the TxnCoordSender). At // that point, we probably don't even need to deal with context cancellation // any more; the client will be trusted to always send an EndRequest when it's // done with a transaction. if ctx.Done() == nil && hasAbandoned { if log.V(1) { log.Infof(ctx, "transaction %s abandoned; stopping heartbeat", txnMeta.txn) } tc.tryAsyncAbort(txnID) return false } ba := roachpb.BatchRequest{} ba.Txn = &txn hb := &roachpb.HeartbeatTxnRequest{ Now: tc.clock.Now(), } hb.Key = txn.Key ba.Add(hb) log.Event(ctx, "heartbeat") br, pErr := tc.wrapped.Send(ctx, ba) // Correctness mandates that when we can't heartbeat the transaction, we // make sure the client doesn't keep going. This is particularly relevant // in the case of an ABORTED transaction, but if we can't reach the // transaction record at all, we're going to have to assume we're aborted // as well. if pErr != nil { log.Warningf(ctx, "heartbeat to %s failed: %s", txn, pErr) // We're not going to let the client carry out additional requests, so // try to clean up. tc.tryAsyncAbort(*txn.ID) txn.Status = roachpb.ABORTED } else { txn.Update(br.Responses[0].GetInner().(*roachpb.HeartbeatTxnResponse).Txn) } // Give the news to the txn in the txns map. This will update long-running // transactions (which may find out that they have to restart in that way), // but in particular makes sure that they notice when they've been aborted // (in which case we'll give them an error on their next request). tc.Lock() tc.txns[txnID].txn.Update(&txn) tc.Unlock() return true }