// prev gives the right boundary of the union of all requests which don't // affect keys larger than the given key. // TODO(tschottdorf): again, better on BatchRequest itself, but can't pull // 'keys' into 'roachpb'. func prev(ba roachpb.BatchRequest, k roachpb.RKey) (roachpb.RKey, error) { candidate := roachpb.RKeyMin for _, union := range ba.Requests { inner := union.GetInner() if _, ok := inner.(*roachpb.NoopRequest); ok { continue } h := inner.Header() addr, err := keys.Addr(h.Key) if err != nil { return nil, err } eAddr, err := keys.AddrUpperBound(h.EndKey) if err != nil { return nil, err } if len(eAddr) == 0 { eAddr = addr.Next() } if !eAddr.Less(k) { if !k.Less(addr) { // Range contains k, so won't be able to go lower. return k, nil } // Range is disjoint from [KeyMin,k). continue } // We want the largest surviving candidate. if candidate.Less(addr) { candidate = addr } } return candidate, nil }
// next gives the left boundary of the union of all requests which don't // affect keys less than the given key. // TODO(tschottdorf): again, better on BatchRequest itself, but can't pull // 'keys' into 'proto'. func next(ba roachpb.BatchRequest, k roachpb.RKey) (roachpb.RKey, error) { candidate := roachpb.RKeyMax for _, union := range ba.Requests { inner := union.GetInner() if _, ok := inner.(*roachpb.NoopRequest); ok { continue } h := inner.Header() addr, err := keys.Addr(h.Key) if err != nil { return nil, err } if addr.Less(k) { eAddr, err := keys.AddrUpperBound(h.EndKey) if err != nil { return nil, err } if k.Less(eAddr) { // Starts below k, but continues beyond. Need to stay at k. return k, nil } // Affects only [KeyMin,k). continue } // We want the smallest of the surviving candidates. if addr.Less(candidate) { candidate = addr } } return candidate, nil }
// getCachedRangeDescriptorLocked is a helper function to retrieve the // descriptor of the range which contains the given key, if present in the // cache. It is assumed that the caller holds a read lock on rdc.rangeCache. func (rdc *rangeDescriptorCache) getCachedRangeDescriptorLocked( key roachpb.RKey, inclusive bool, ) (rangeCacheKey, *roachpb.RangeDescriptor, error) { // The cache is indexed using the end-key of the range, but the // end-key is non-inclusive by default. var metaKey roachpb.RKey var err error if !inclusive { metaKey, err = meta(key.Next()) } else { metaKey, err = meta(key) } if err != nil { return nil, nil, err } k, v, ok := rdc.rangeCache.cache.Ceil(rangeCacheKey(metaKey)) if !ok { return nil, nil, nil } metaEndKey := k.(rangeCacheKey) rd := v.(*roachpb.RangeDescriptor) containsFn := (*roachpb.RangeDescriptor).ContainsKey if inclusive { containsFn = (*roachpb.RangeDescriptor).ContainsExclusiveEndKey } // Return nil if the key does not belong to the range. if !containsFn(rd, key) { return nil, nil, nil } return metaEndKey, rd, nil }
// RangeLookup implements the RangeDescriptorDB interface. // RangeLookup dispatches a RangeLookup request for the given metadata // key to the replicas of the given range. Note that we allow // inconsistent reads when doing range lookups for efficiency. Getting // stale data is not a correctness problem but instead may // infrequently result in additional latency as additional range // lookups may be required. Note also that rangeLookup bypasses the // DistSender's Send() method, so there is no error inspection and // retry logic here; this is not an issue since the lookup performs a // single inconsistent read only. func (ds *DistSender) RangeLookup( ctx context.Context, key roachpb.RKey, desc *roachpb.RangeDescriptor, useReverseScan bool, ) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, *roachpb.Error) { ba := roachpb.BatchRequest{} ba.ReadConsistency = roachpb.INCONSISTENT ba.Add(&roachpb.RangeLookupRequest{ Span: roachpb.Span{ // We can interpret the RKey as a Key here since it's a metadata // lookup; those are never local. Key: key.AsRawKey(), }, MaxRanges: ds.rangeLookupMaxRanges, Reverse: useReverseScan, }) replicas := newReplicaSlice(ds.gossip, desc) replicas.Shuffle() br, err := ds.sendRPC(ctx, desc.RangeID, replicas, ba) if err != nil { return nil, nil, roachpb.NewError(err) } if br.Error != nil { return nil, nil, br.Error } resp := br.Responses[0].GetInner().(*roachpb.RangeLookupResponse) return resp.Ranges, resp.PrefetchedRanges, nil }
func (m *modelTimeSeriesDataStore) ContainsTimeSeries(start, end roachpb.RKey) bool { if !start.Less(end) { m.t.Fatalf("ContainsTimeSeries passed start key %v which is not less than end key %v", start, end) } m.Lock() defer m.Unlock() m.containsCalled++ return true }
// ObjectIDForKey returns the object ID (table or database) for 'key', // or (_, false) if not within the structured key space. func ObjectIDForKey(key roachpb.RKey) (uint32, bool) { if key.Equal(roachpb.RKeyMax) { return 0, false } if encoding.PeekType(key) != encoding.Int { // TODO(marc): this should eventually return SystemDatabaseID. return 0, false } // Consume first encoded int. _, id64, err := encoding.DecodeUvarintAscending(key) return uint32(id64), err == nil }
// LookupReplica looks up replica by key [range]. Lookups are done // by consulting each store in turn via Store.LookupReplica(key). // Returns RangeID and replica on success; RangeKeyMismatch error // if not found. // If end is nil, a replica containing start is looked up. // This is only for testing usage; performance doesn't matter. func (ls *Stores) LookupReplica( start, end roachpb.RKey, ) (roachpb.RangeID, roachpb.ReplicaDescriptor, error) { ls.mu.RLock() defer ls.mu.RUnlock() var rangeID roachpb.RangeID var repDesc roachpb.ReplicaDescriptor var repDescFound bool for _, store := range ls.storeMap { replica := store.LookupReplica(start, nil) if replica == nil { continue } // Verify that the descriptor contains the entire range. if desc := replica.Desc(); !desc.ContainsKeyRange(start, end) { ctx := ls.AnnotateCtx(context.TODO()) log.Warningf(ctx, "range not contained in one range: [%s,%s), but have [%s,%s)", start, end, desc.StartKey, desc.EndKey) err := roachpb.NewRangeKeyMismatchError(start.AsRawKey(), end.AsRawKey(), desc) return 0, roachpb.ReplicaDescriptor{}, err } rangeID = replica.RangeID var err error repDesc, err = replica.GetReplicaDescriptor() if err != nil { if _, ok := err.(*roachpb.RangeNotFoundError); ok { // We are not holding a lock across this block; the replica could have // been removed from the range (via down-replication) between the // LookupReplica and the GetReplicaDescriptor calls. In this case just // ignore this replica. continue } return 0, roachpb.ReplicaDescriptor{}, err } if repDescFound { // We already found the range; this should never happen outside of tests. err := errors.Errorf("range %+v exists on additional store: %+v", replica, store) return 0, roachpb.ReplicaDescriptor{}, err } repDescFound = true } if !repDescFound { return 0, roachpb.ReplicaDescriptor{}, roachpb.NewRangeNotFoundError(0) } return rangeID, repDesc, nil }
// findTimeSeries searches the supplied engine over the supplied key range, // identifying time series which have stored data in the range, along with the // resolutions at which time series data is stored. A unique name/resolution // pair will only be identified once, even if the range contains keys for that // name/resolution pair at multiple timestamps or from multiple sources. // // An engine snapshot is used, rather than a client, because this function is // intended to be called by a storage queue which can inspect the local data for // a single range without the need for expensive network calls. func findTimeSeries( snapshot engine.Reader, startKey, endKey roachpb.RKey, now hlc.Timestamp, ) ([]timeSeriesResolutionInfo, error) { var results []timeSeriesResolutionInfo iter := snapshot.NewIterator(false) defer iter.Close() // Set start boundary for the search, which is the lesser of the range start // key and the beginning of time series data. start := engine.MakeMVCCMetadataKey(startKey.AsRawKey()) next := engine.MakeMVCCMetadataKey(keys.TimeseriesPrefix) if next.Less(start) { next = start } // Set end boundary for the search, which is the lesser of the range end key // and the end of time series data. end := engine.MakeMVCCMetadataKey(endKey.AsRawKey()) lastTS := engine.MakeMVCCMetadataKey(keys.TimeseriesPrefix.PrefixEnd()) if lastTS.Less(end) { end = lastTS } thresholds := computeThresholds(now.WallTime) for iter.Seek(next); iter.Valid() && iter.Less(end); iter.Seek(next) { foundKey := iter.Key().Key // Extract the name and resolution from the discovered key. name, _, res, tsNanos, err := DecodeDataKey(foundKey) if err != nil { return nil, err } // Skip this time series if there's nothing to prune. We check the // oldest (first) time series record's timestamp against the // pruning threshold. if threshold, ok := thresholds[res]; !ok || threshold > tsNanos { results = append(results, timeSeriesResolutionInfo{ Name: name, Resolution: res, }) } // Set 'next' is initialized to the next possible time series key // which could belong to a previously undiscovered time series. next = engine.MakeMVCCMetadataKey(makeDataKeySeriesPrefix(name, res).PrefixEnd()) } return results, nil }
// findTimeSeries searches the supplied engine over the supplied key range, // identifying time series which have stored data in the range, along with the // resolutions at which time series data is stored. A unique name/resolution // pair will only be identified once, even if the range contains keys for that // name/resolution pair at multiple timestamps or from multiple sources. // // An engine snapshot is used, rather than a client, because this function is // intended to be called by a storage queue which can inspect the local data for // a single range without the need for expensive network calls. func findTimeSeries( snapshot engine.Reader, startKey, endKey roachpb.RKey, ) ([]timeSeriesResolutionInfo, error) { var results []timeSeriesResolutionInfo iter := snapshot.NewIterator(false) defer iter.Close() // Set start boundary for the search, which is the lesser of the range start // key and the beginning of time series data. start := engine.MakeMVCCMetadataKey(startKey.AsRawKey()) next := engine.MakeMVCCMetadataKey(keys.TimeseriesPrefix) if next.Less(start) { next = start } // Set end boundary for the search, which is the lesser of the range end key // and the end of time series data. end := engine.MakeMVCCMetadataKey(endKey.AsRawKey()) lastTS := engine.MakeMVCCMetadataKey(keys.TimeseriesPrefix.PrefixEnd()) if lastTS.Less(end) { end = lastTS } for iter.Seek(next); iter.Valid() && iter.Less(end); iter.Seek(next) { foundKey := iter.Key().Key // Extract the name and resolution from the discovered key. name, _, res, _, err := DecodeDataKey(foundKey) if err != nil { return nil, err } results = append(results, timeSeriesResolutionInfo{ Name: name, Resolution: res, }) // Set 'next' is initialized to the next possible time series key // which could belong to a previously undiscovered time series. next = engine.MakeMVCCMetadataKey(makeDataKeySeriesPrefix(name, res).PrefixEnd()) } return results, nil }
// validateRangeMetaKey validates that the given key is a valid Range Metadata // key. This checks only the constraints common to forward and backwards scans: // correct prefix and not exceeding KeyMax. func validateRangeMetaKey(key roachpb.RKey) error { // KeyMin is a valid key. if key.Equal(roachpb.RKeyMin) { return nil } // Key must be at least as long as Meta1Prefix. if len(key) < len(Meta1Prefix) { return NewInvalidRangeMetaKeyError("too short", key) } prefix, body := key[:len(Meta1Prefix)], key[len(Meta1Prefix):] if !prefix.Equal(Meta2Prefix) && !prefix.Equal(Meta1Prefix) { return NewInvalidRangeMetaKeyError("not a meta key", key) } if roachpb.RKeyMax.Less(body) { return NewInvalidRangeMetaKeyError("body of meta key range lookup is > KeyMax", key) } return nil }
// MetaReverseScanBounds returns the range [start,end) within which the desired // meta record can be found by means of a reverse engine scan. The given key // must be a valid RangeMetaKey as defined by validateRangeMetaKey. func MetaReverseScanBounds(key roachpb.RKey) (roachpb.Key, roachpb.Key, error) { if err := validateRangeMetaKey(key); err != nil { return nil, nil, err } if key.Equal(roachpb.RKeyMin) || key.Equal(Meta1Prefix) { return nil, nil, NewInvalidRangeMetaKeyError("KeyMin and Meta1Prefix can't be used as the key of reverse scan", key) } if key.Equal(Meta2Prefix) { // Special case Meta2Prefix: this is the first key in Meta2, and the scan // interval covers all of Meta1. return Meta1Prefix, key.Next().AsRawKey(), nil } // Otherwise find the first entry greater than the given key and find the last entry // in the same prefix. For MVCCReverseScan the endKey is exclusive, if we want to find // the range descriptor the given key specified,we need to set the key.Next() as the // MVCCReverseScan`s endKey. For example: // If we have ranges [a,f) and [f,z), then we'll have corresponding meta records // at f and z. If you're looking for the meta record for key f, then you want the // second record (exclusive in MVCCReverseScan), hence key.Next() below. return key[:len(Meta1Prefix)].AsRawKey(), key.Next().AsRawKey(), nil }
// MetaScanBounds returns the range [start,end) within which the desired meta // record can be found by means of an engine scan. The given key must be a // valid RangeMetaKey as defined by validateRangeMetaKey. // TODO(tschottdorf): a lot of casting going on inside. func MetaScanBounds(key roachpb.RKey) (roachpb.Key, roachpb.Key, error) { if err := validateRangeMetaKey(key); err != nil { return nil, nil, err } if key.Equal(Meta2KeyMax) { return nil, nil, NewInvalidRangeMetaKeyError("Meta2KeyMax can't be used as the key of scan", key) } if key.Equal(roachpb.RKeyMin) { // Special case KeyMin: find the first entry in meta1. return Meta1Prefix, Meta1Prefix.PrefixEnd(), nil } if key.Equal(Meta1KeyMax) { // Special case Meta1KeyMax: this is the last key in Meta1, we don't want // to start at Next(). return Meta1KeyMax, Meta1Prefix.PrefixEnd(), nil } // Otherwise find the first entry greater than the given key in the same meta prefix. return key.Next().AsRawKey(), key[:len(Meta1Prefix)].PrefixEnd().AsRawKey(), nil }
func (m *modelTimeSeriesDataStore) PruneTimeSeries( ctx context.Context, snapshot engine.Reader, start, end roachpb.RKey, db *client.DB, now hlc.Timestamp, ) error { if snapshot == nil { m.t.Fatal("PruneTimeSeries was passed a nil snapshot") } if db == nil { m.t.Fatal("PruneTimeSeries was passed a nil client.DB") } if !start.Less(end) { m.t.Fatalf("PruneTimeSeries passed start key %v which is not less than end key %v", start, end) } m.Lock() defer m.Unlock() m.pruneCalled++ m.pruneSeenStartKeys[start.String()] = struct{}{} m.pruneSeenEndKeys[end.String()] = struct{}{} return nil }
// fillSkippedResponses after meeting the batch key max limit for range // requests. func fillSkippedResponses(ba roachpb.BatchRequest, br *roachpb.BatchResponse, nextKey roachpb.RKey) { // Some requests might have NoopResponses; we must replace them with empty // responses of the proper type. for i, req := range ba.Requests { if _, ok := br.Responses[i].GetInner().(*roachpb.NoopResponse); !ok { continue } var reply roachpb.Response switch t := req.GetInner().(type) { case *roachpb.ScanRequest: reply = &roachpb.ScanResponse{} case *roachpb.ReverseScanRequest: reply = &roachpb.ReverseScanResponse{} case *roachpb.DeleteRangeRequest: reply = &roachpb.DeleteRangeResponse{} case *roachpb.BeginTransactionRequest, *roachpb.EndTransactionRequest: continue default: panic(fmt.Sprintf("bad type %T", t)) } union := roachpb.ResponseUnion{} union.MustSetInner(reply) br.Responses[i] = union } // Set the ResumeSpan for future batch requests. isReverse := ba.IsReverse() for i, resp := range br.Responses { req := ba.Requests[i].GetInner() if !roachpb.IsRange(req) { continue } hdr := resp.GetInner().Header() origSpan := req.Header() if isReverse { if hdr.ResumeSpan != nil { // The ResumeSpan.Key might be set to the StartKey of a range; // correctly set it to the Key of the original request span. hdr.ResumeSpan.Key = origSpan.Key } else if roachpb.RKey(origSpan.Key).Less(nextKey) { // Some keys have yet to be processed. hdr.ResumeSpan = &origSpan if nextKey.Less(roachpb.RKey(origSpan.EndKey)) { // The original span has been partially processed. hdr.ResumeSpan.EndKey = nextKey.AsRawKey() } } } else { if hdr.ResumeSpan != nil { // The ResumeSpan.EndKey might be set to the EndKey of a // range; correctly set it to the EndKey of the original // request span. hdr.ResumeSpan.EndKey = origSpan.EndKey } else if nextKey.Less(roachpb.RKey(origSpan.EndKey)) { // Some keys have yet to be processed. hdr.ResumeSpan = &origSpan if roachpb.RKey(origSpan.Key).Less(nextKey) { // The original span has been partially processed. hdr.ResumeSpan.Key = nextKey.AsRawKey() } } } br.Responses[i].GetInner().SetHeader(hdr) } }
// ContainsTimeSeries returns true if the given key range overlaps the // range of possible time series keys. func (tsdb *DB) ContainsTimeSeries(start, end roachpb.RKey) bool { return !lastTSRKey.Less(start) && !end.Less(firstTSRKey) }
// ComputeSplitKeys takes a start and end key and returns an array of keys // at which to split the span [start, end). // The only required splits are at each user table prefix. func (s SystemConfig) ComputeSplitKeys(startKey, endKey roachpb.RKey) []roachpb.RKey { tableStart := roachpb.RKey(keys.SystemConfigTableDataMax) if !tableStart.Less(endKey) { // This range is before the user tables span: no required splits. return nil } startID, ok := ObjectIDForKey(startKey) if !ok || startID <= keys.MaxSystemConfigDescID { // The start key is either: // - not part of the structured data span // - part of the system span // In either case, start looking for splits at the first ID usable // by the user data span. startID = keys.MaxSystemConfigDescID + 1 } else { // The start key is either already a split key, or after the split // key for its ID. We can skip straight to the next one. startID++ } // Build key prefixes for sequential table IDs until we reach endKey. Note // that there are two disjoint sets of sequential keys: non-system reserved // tables have sequential IDs, as do user tables, but the two ranges contain a // gap. var splitKeys []roachpb.RKey var key roachpb.RKey // appendSplitKeys generates all possible split keys between the given range // of IDs and adds them to splitKeys. appendSplitKeys := func(startID, endID uint32) { // endID could be smaller than startID if we don't have user tables. for id := startID; id <= endID; id++ { key = keys.MakeRowSentinelKey(keys.MakeTablePrefix(id)) // Skip if this ID matches the startKey passed to ComputeSplitKeys. if !startKey.Less(key) { continue } // Handle the case where EndKey is already a table prefix. if !key.Less(endKey) { break } splitKeys = append(splitKeys, key) } } // If the startKey falls within the non-system reserved range, compute those // keys first. if startID <= keys.MaxReservedDescID { endID, err := s.GetLargestObjectID(keys.MaxReservedDescID) if err != nil { log.Errorf(context.TODO(), "unable to determine largest reserved object ID from system config: %s", err) return nil } appendSplitKeys(startID, endID) startID = keys.MaxReservedDescID + 1 } // Append keys in the user space. endID, err := s.GetLargestObjectID(0) if err != nil { log.Errorf(context.TODO(), "unable to determine largest object ID from system config: %s", err) return nil } appendSplitKeys(startID, endID) return splitKeys }