// RangeLookup implements the RangeDescriptorDB interface. // RangeLookup dispatches a RangeLookup request for the given metadata // key to the replicas of the given range. Note that we allow // inconsistent reads when doing range lookups for efficiency. Getting // stale data is not a correctness problem but instead may // infrequently result in additional latency as additional range // lookups may be required. Note also that rangeLookup bypasses the // DistSender's Send() method, so there is no error inspection and // retry logic here; this is not an issue since the lookup performs a // single inconsistent read only. func (ds *DistSender) RangeLookup( ctx context.Context, key roachpb.RKey, desc *roachpb.RangeDescriptor, useReverseScan bool, ) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, *roachpb.Error) { ba := roachpb.BatchRequest{} ba.ReadConsistency = roachpb.INCONSISTENT ba.Add(&roachpb.RangeLookupRequest{ Span: roachpb.Span{ // We can interpret the RKey as a Key here since it's a metadata // lookup; those are never local. Key: key.AsRawKey(), }, MaxRanges: ds.rangeLookupMaxRanges, Reverse: useReverseScan, }) replicas := newReplicaSlice(ds.gossip, desc) replicas.Shuffle() br, err := ds.sendRPC(ctx, desc.RangeID, replicas, ba) if err != nil { return nil, nil, roachpb.NewError(err) } if br.Error != nil { return nil, nil, br.Error } resp := br.Responses[0].GetInner().(*roachpb.RangeLookupResponse) return resp.Ranges, resp.PrefetchedRanges, nil }
// LookupReplica looks up replica by key [range]. Lookups are done // by consulting each store in turn via Store.LookupReplica(key). // Returns RangeID and replica on success; RangeKeyMismatch error // if not found. // If end is nil, a replica containing start is looked up. // This is only for testing usage; performance doesn't matter. func (ls *Stores) LookupReplica( start, end roachpb.RKey, ) (roachpb.RangeID, roachpb.ReplicaDescriptor, error) { ls.mu.RLock() defer ls.mu.RUnlock() var rangeID roachpb.RangeID var repDesc roachpb.ReplicaDescriptor var repDescFound bool for _, store := range ls.storeMap { replica := store.LookupReplica(start, nil) if replica == nil { continue } // Verify that the descriptor contains the entire range. if desc := replica.Desc(); !desc.ContainsKeyRange(start, end) { ctx := ls.AnnotateCtx(context.TODO()) log.Warningf(ctx, "range not contained in one range: [%s,%s), but have [%s,%s)", start, end, desc.StartKey, desc.EndKey) err := roachpb.NewRangeKeyMismatchError(start.AsRawKey(), end.AsRawKey(), desc) return 0, roachpb.ReplicaDescriptor{}, err } rangeID = replica.RangeID var err error repDesc, err = replica.GetReplicaDescriptor() if err != nil { if _, ok := err.(*roachpb.RangeNotFoundError); ok { // We are not holding a lock across this block; the replica could have // been removed from the range (via down-replication) between the // LookupReplica and the GetReplicaDescriptor calls. In this case just // ignore this replica. continue } return 0, roachpb.ReplicaDescriptor{}, err } if repDescFound { // We already found the range; this should never happen outside of tests. err := errors.Errorf("range %+v exists on additional store: %+v", replica, store) return 0, roachpb.ReplicaDescriptor{}, err } repDescFound = true } if !repDescFound { return 0, roachpb.ReplicaDescriptor{}, roachpb.NewRangeNotFoundError(0) } return rangeID, repDesc, nil }
// findTimeSeries searches the supplied engine over the supplied key range, // identifying time series which have stored data in the range, along with the // resolutions at which time series data is stored. A unique name/resolution // pair will only be identified once, even if the range contains keys for that // name/resolution pair at multiple timestamps or from multiple sources. // // An engine snapshot is used, rather than a client, because this function is // intended to be called by a storage queue which can inspect the local data for // a single range without the need for expensive network calls. func findTimeSeries( snapshot engine.Reader, startKey, endKey roachpb.RKey, now hlc.Timestamp, ) ([]timeSeriesResolutionInfo, error) { var results []timeSeriesResolutionInfo iter := snapshot.NewIterator(false) defer iter.Close() // Set start boundary for the search, which is the lesser of the range start // key and the beginning of time series data. start := engine.MakeMVCCMetadataKey(startKey.AsRawKey()) next := engine.MakeMVCCMetadataKey(keys.TimeseriesPrefix) if next.Less(start) { next = start } // Set end boundary for the search, which is the lesser of the range end key // and the end of time series data. end := engine.MakeMVCCMetadataKey(endKey.AsRawKey()) lastTS := engine.MakeMVCCMetadataKey(keys.TimeseriesPrefix.PrefixEnd()) if lastTS.Less(end) { end = lastTS } thresholds := computeThresholds(now.WallTime) for iter.Seek(next); iter.Valid() && iter.Less(end); iter.Seek(next) { foundKey := iter.Key().Key // Extract the name and resolution from the discovered key. name, _, res, tsNanos, err := DecodeDataKey(foundKey) if err != nil { return nil, err } // Skip this time series if there's nothing to prune. We check the // oldest (first) time series record's timestamp against the // pruning threshold. if threshold, ok := thresholds[res]; !ok || threshold > tsNanos { results = append(results, timeSeriesResolutionInfo{ Name: name, Resolution: res, }) } // Set 'next' is initialized to the next possible time series key // which could belong to a previously undiscovered time series. next = engine.MakeMVCCMetadataKey(makeDataKeySeriesPrefix(name, res).PrefixEnd()) } return results, nil }
// findTimeSeries searches the supplied engine over the supplied key range, // identifying time series which have stored data in the range, along with the // resolutions at which time series data is stored. A unique name/resolution // pair will only be identified once, even if the range contains keys for that // name/resolution pair at multiple timestamps or from multiple sources. // // An engine snapshot is used, rather than a client, because this function is // intended to be called by a storage queue which can inspect the local data for // a single range without the need for expensive network calls. func findTimeSeries( snapshot engine.Reader, startKey, endKey roachpb.RKey, ) ([]timeSeriesResolutionInfo, error) { var results []timeSeriesResolutionInfo iter := snapshot.NewIterator(false) defer iter.Close() // Set start boundary for the search, which is the lesser of the range start // key and the beginning of time series data. start := engine.MakeMVCCMetadataKey(startKey.AsRawKey()) next := engine.MakeMVCCMetadataKey(keys.TimeseriesPrefix) if next.Less(start) { next = start } // Set end boundary for the search, which is the lesser of the range end key // and the end of time series data. end := engine.MakeMVCCMetadataKey(endKey.AsRawKey()) lastTS := engine.MakeMVCCMetadataKey(keys.TimeseriesPrefix.PrefixEnd()) if lastTS.Less(end) { end = lastTS } for iter.Seek(next); iter.Valid() && iter.Less(end); iter.Seek(next) { foundKey := iter.Key().Key // Extract the name and resolution from the discovered key. name, _, res, _, err := DecodeDataKey(foundKey) if err != nil { return nil, err } results = append(results, timeSeriesResolutionInfo{ Name: name, Resolution: res, }) // Set 'next' is initialized to the next possible time series key // which could belong to a previously undiscovered time series. next = engine.MakeMVCCMetadataKey(makeDataKeySeriesPrefix(name, res).PrefixEnd()) } return results, nil }
// fillSkippedResponses after meeting the batch key max limit for range // requests. func fillSkippedResponses(ba roachpb.BatchRequest, br *roachpb.BatchResponse, nextKey roachpb.RKey) { // Some requests might have NoopResponses; we must replace them with empty // responses of the proper type. for i, req := range ba.Requests { if _, ok := br.Responses[i].GetInner().(*roachpb.NoopResponse); !ok { continue } var reply roachpb.Response switch t := req.GetInner().(type) { case *roachpb.ScanRequest: reply = &roachpb.ScanResponse{} case *roachpb.ReverseScanRequest: reply = &roachpb.ReverseScanResponse{} case *roachpb.DeleteRangeRequest: reply = &roachpb.DeleteRangeResponse{} case *roachpb.BeginTransactionRequest, *roachpb.EndTransactionRequest: continue default: panic(fmt.Sprintf("bad type %T", t)) } union := roachpb.ResponseUnion{} union.MustSetInner(reply) br.Responses[i] = union } // Set the ResumeSpan for future batch requests. isReverse := ba.IsReverse() for i, resp := range br.Responses { req := ba.Requests[i].GetInner() if !roachpb.IsRange(req) { continue } hdr := resp.GetInner().Header() origSpan := req.Header() if isReverse { if hdr.ResumeSpan != nil { // The ResumeSpan.Key might be set to the StartKey of a range; // correctly set it to the Key of the original request span. hdr.ResumeSpan.Key = origSpan.Key } else if roachpb.RKey(origSpan.Key).Less(nextKey) { // Some keys have yet to be processed. hdr.ResumeSpan = &origSpan if nextKey.Less(roachpb.RKey(origSpan.EndKey)) { // The original span has been partially processed. hdr.ResumeSpan.EndKey = nextKey.AsRawKey() } } } else { if hdr.ResumeSpan != nil { // The ResumeSpan.EndKey might be set to the EndKey of a // range; correctly set it to the EndKey of the original // request span. hdr.ResumeSpan.EndKey = origSpan.EndKey } else if nextKey.Less(roachpb.RKey(origSpan.EndKey)) { // Some keys have yet to be processed. hdr.ResumeSpan = &origSpan if roachpb.RKey(origSpan.Key).Less(nextKey) { // The original span has been partially processed. hdr.ResumeSpan.Key = nextKey.AsRawKey() } } } br.Responses[i].GetInner().SetHeader(hdr) } }