// addInternal adds the replica the queue with specified priority. If // the replica is already queued, updates the existing // priority. Expects the queue lock to be held by caller. func (bq *baseQueue) addInternal( ctx context.Context, desc *roachpb.RangeDescriptor, should bool, priority float64, ) (bool, error) { if bq.mu.stopped { return false, errQueueStopped } if bq.mu.disabled { log.Event(ctx, "queue disabled") return false, errQueueDisabled } if !desc.IsInitialized() { // We checked this above in MaybeAdd(), but we need to check it // again for Add(). return false, errors.New("replica not initialized") } // If the replica is currently in purgatory, don't re-add it. if _, ok := bq.mu.purgatory[desc.RangeID]; ok { return false, nil } item, ok := bq.mu.replicas[desc.RangeID] if !should { if ok { log.Eventf(ctx, "%s: removing from queue", item.value) bq.remove(item) } return false, errReplicaNotAddable } else if ok { if item.priority != priority { log.Eventf(ctx, "%s: updating priority: %0.3f -> %0.3f", desc, item.priority, priority) } // Replica has already been added; update priority. bq.mu.priorityQ.update(item, priority) return false, nil } log.VEventf(ctx, 3, "%s: adding: priority=%0.3f", desc, priority) item = &replicaItem{value: desc.RangeID, priority: priority} bq.add(item) // If adding this replica has pushed the queue past its maximum size, // remove the lowest priority element. if pqLen := bq.mu.priorityQ.Len(); pqLen > bq.maxSize { bq.remove(bq.mu.priorityQ[pqLen-1]) } // Signal the processLoop that a replica has been added. select { case bq.incoming <- struct{}{}: default: // No need to signal again. } return true, nil }
// FindRangeLease is similar to FindRangeLeaseHolder but returns a Lease proto // without verifying if the lease is still active. Instead, it returns a time- // stamp taken off the queried node's clock. func (tc *TestCluster) FindRangeLease( rangeDesc *roachpb.RangeDescriptor, hint *ReplicationTarget, ) (_ *roachpb.Lease, now hlc.Timestamp, _ error) { if hint != nil { var ok bool if _, ok = rangeDesc.GetReplicaDescriptor(hint.StoreID); !ok { return nil, hlc.ZeroTimestamp, errors.Errorf( "bad hint: %+v; store doesn't have a replica of the range", hint) } } else { hint = &ReplicationTarget{ NodeID: rangeDesc.Replicas[0].NodeID, StoreID: rangeDesc.Replicas[0].StoreID} } // Find the server indicated by the hint and send a LeaseInfoRequest through // it. var hintServer *server.TestServer for _, s := range tc.Servers { if s.GetNode().Descriptor.NodeID == hint.NodeID { hintServer = s break } } if hintServer == nil { return nil, hlc.ZeroTimestamp, errors.Errorf("bad hint: %+v; no such node", hint) } leaseReq := roachpb.LeaseInfoRequest{ Span: roachpb.Span{ Key: rangeDesc.StartKey.AsRawKey(), }, } leaseResp, pErr := client.SendWrappedWith( context.TODO(), hintServer.DB().GetSender(), roachpb.Header{ // INCONSISTENT read, since we want to make sure that the node used to // send this is the one that processes the command, for the hint to // matter. ReadConsistency: roachpb.INCONSISTENT, }, &leaseReq) if pErr != nil { return nil, hlc.ZeroTimestamp, pErr.GoError() } return leaseResp.(*roachpb.LeaseInfoResponse).Lease, hintServer.Clock().Now(), nil }
func includesFrontOfCurSpan(isReverse bool, rd *roachpb.RangeDescriptor, rs roachpb.RSpan) bool { if isReverse { return rd.ContainsExclusiveEndKey(rs.EndKey) } return rd.ContainsKey(rs.Key) }
// sendPartialBatch sends the supplied batch to the range specified by // desc. The batch request is first truncated so that it contains only // requests which intersect the range descriptor and keys for each // request are limited to the range's key span. The send occurs in a // retry loop to handle send failures. On failure to send to any // replicas, we backoff and retry by refetching the range // descriptor. If the underlying range seems to have split, we // recursively invoke divideAndSendBatchToRanges to re-enumerate the // ranges in the span and resend to each. func (ds *DistSender) sendPartialBatch( ctx context.Context, ba roachpb.BatchRequest, rs roachpb.RSpan, desc *roachpb.RangeDescriptor, evictToken *EvictionToken, isFirst bool, ) response { var reply *roachpb.BatchResponse var pErr *roachpb.Error isReverse := ba.IsReverse() // Truncate the request to range descriptor. intersected, err := rs.Intersect(desc) if err != nil { return response{pErr: roachpb.NewError(err)} } truncBA, numActive, err := truncate(ba, intersected) if numActive == 0 && err == nil { // This shouldn't happen in the wild, but some tests exercise it. return response{ pErr: roachpb.NewErrorf("truncation resulted in empty batch on %s: %s", intersected, ba), } } if err != nil { return response{pErr: roachpb.NewError(err)} } // Start a retry loop for sending the batch to the range. for r := retry.StartWithCtx(ctx, ds.rpcRetryOptions); r.Next(); { // If we've cleared the descriptor on a send failure, re-lookup. if desc == nil { var descKey roachpb.RKey if isReverse { descKey = intersected.EndKey } else { descKey = intersected.Key } desc, evictToken, err = ds.getDescriptor(ctx, descKey, nil, isReverse) if err != nil { log.ErrEventf(ctx, "range descriptor re-lookup failed: %s", err) continue } } reply, pErr = ds.sendSingleRange(ctx, truncBA, desc) // If sending succeeded, return immediately. if pErr == nil { return response{reply: reply} } log.ErrEventf(ctx, "reply error %s: %s", ba, pErr) // Error handling: If the error indicates that our range // descriptor is out of date, evict it from the cache and try // again. Errors that apply only to a single replica were // handled in send(). // // TODO(bdarnell): Don't retry endlessly. If we fail twice in a // row and the range descriptor hasn't changed, return the error // to our caller. switch tErr := pErr.GetDetail().(type) { case *roachpb.SendError: // We've tried all the replicas without success. Either // they're all down, or we're using an out-of-date range // descriptor. Invalidate the cache and try again with the new // metadata. log.Event(ctx, "evicting range descriptor on send error and backoff for re-lookup") if err := evictToken.Evict(ctx); err != nil { return response{pErr: roachpb.NewError(err)} } // Clear the descriptor to reload on the next attempt. desc = nil continue case *roachpb.RangeKeyMismatchError: // Range descriptor might be out of date - evict it. This is // likely the result of a range split. If we have new range // descriptors, insert them instead as long as they are different // from the last descriptor to avoid endless loops. var replacements []roachpb.RangeDescriptor different := func(rd *roachpb.RangeDescriptor) bool { return !desc.RSpan().Equal(rd.RSpan()) } if tErr.MismatchedRange != nil && different(tErr.MismatchedRange) { replacements = append(replacements, *tErr.MismatchedRange) } if tErr.SuggestedRange != nil && different(tErr.SuggestedRange) { if includesFrontOfCurSpan(isReverse, tErr.SuggestedRange, rs) { replacements = append(replacements, *tErr.SuggestedRange) } } // Same as Evict() if replacements is empty. if err := evictToken.EvictAndReplace(ctx, replacements...); err != nil { return response{pErr: roachpb.NewError(err)} } // On addressing errors (likely a split), we need to re-invoke // the range descriptor lookup machinery, so we recurse by // sending batch to just the partial span this descriptor was // supposed to cover. log.VEventf(ctx, 1, "likely split; resending batch to span: %s", tErr) reply, pErr = ds.divideAndSendBatchToRanges(ctx, ba, intersected, isFirst) return response{reply: reply, pErr: pErr} } break } // Propagate error if either the retry closer or context done // channels were closed. if pErr == nil { if pErr = ds.deduceRetryEarlyExitError(ctx); pErr == nil { log.Fatal(ctx, "exited retry loop without an error") } } return response{pErr: pErr} }
func TestLeaseInfoRequest(t *testing.T) { defer leaktest.AfterTest(t)() tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{ ReplicationMode: base.ReplicationManual, }) defer tc.Stopper().Stop() kvDB0 := tc.Servers[0].DB() kvDB1 := tc.Servers[1].DB() key := []byte("a") rangeDesc := new(roachpb.RangeDescriptor) var err error *rangeDesc, err = tc.LookupRange(key) if err != nil { t.Fatal(err) } rangeDesc, err = tc.AddReplicas( rangeDesc.StartKey.AsRawKey(), tc.Target(1), tc.Target(2), ) if err != nil { t.Fatal(err) } if len(rangeDesc.Replicas) != 3 { t.Fatalf("expected 3 replicas, got %+v", rangeDesc.Replicas) } replicas := make([]roachpb.ReplicaDescriptor, 3) for i := 0; i < 3; i++ { var ok bool replicas[i], ok = rangeDesc.GetReplicaDescriptor(tc.Servers[i].GetFirstStoreID()) if !ok { t.Fatalf("expected to find replica in server %d", i) } } // Lease should start on Server 0, since nobody told it to move. leaseHolderReplica := LeaseInfo(t, kvDB0, *rangeDesc, roachpb.INCONSISTENT).Lease.Replica if leaseHolderReplica != replicas[0] { t.Fatalf("lease holder should be replica %+v, but is: %+v", replicas[0], leaseHolderReplica) } // Transfer the lease to Server 1 and check that LeaseInfoRequest gets the // right answer. err = tc.TransferRangeLease(rangeDesc, tc.Target(1)) if err != nil { t.Fatal(err) } // An inconsistent LeaseInfoReqeust on the old lease holder should give us the // right answer immediately, since the old holder has definitely applied the // transfer before TransferRangeLease returned. leaseHolderReplica = LeaseInfo(t, kvDB0, *rangeDesc, roachpb.INCONSISTENT).Lease.Replica if leaseHolderReplica != replicas[1] { t.Fatalf("lease holder should be replica %+v, but is: %+v", replicas[1], leaseHolderReplica) } // A read on the new lease holder does not necessarily succeed immediately, // since it might take a while for it to apply the transfer. util.SucceedsSoon(t, func() error { // We can't reliably do a CONSISTENT read here, even though we're reading // from the supposed lease holder, because this node might initially be // unaware of the new lease and so the request might bounce around for a // while (see #8816). leaseHolderReplica = LeaseInfo(t, kvDB1, *rangeDesc, roachpb.INCONSISTENT).Lease.Replica if leaseHolderReplica != replicas[1] { return errors.Errorf("lease holder should be replica %+v, but is: %+v", replicas[1], leaseHolderReplica) } return nil }) // Transfer the lease to Server 2 and check that LeaseInfoRequest gets the // right answer. err = tc.TransferRangeLease(rangeDesc, tc.Target(2)) if err != nil { t.Fatal(err) } leaseHolderReplica = LeaseInfo(t, kvDB1, *rangeDesc, roachpb.INCONSISTENT).Lease.Replica if leaseHolderReplica != replicas[2] { t.Fatalf("lease holder should be replica %+v, but is: %+v", replicas[2], leaseHolderReplica) } // TODO(andrei): test the side-effect of LeaseInfoRequest when there's no // active lease - the node getting the request is supposed to acquire the // lease. This requires a way to expire leases; the TestCluster probably needs // to use a mock clock. }