// prev gives the right boundary of the union of all requests which don't // affect keys larger than the given key. // TODO(tschottdorf): again, better on BatchRequest itself, but can't pull // 'keys' into 'roachpb'. func prev(ba roachpb.BatchRequest, k roachpb.RKey) roachpb.RKey { candidate := roachpb.RKeyMin for _, union := range ba.Requests { h := union.GetInner().Header() addr := keys.Addr(h.Key) eAddr := keys.Addr(h.EndKey) if len(eAddr) == 0 { // Can probably avoid having to compute Next() here if // we're in the mood for some more complexity. eAddr = addr.Next() } if !eAddr.Less(k) { if !k.Less(addr) { // Range contains k, so won't be able to go lower. return k } // Range is disjoint from [KeyMin,k). continue } // We want the largest surviving candidate. if candidate.Less(addr) { candidate = addr } } return candidate }
// next gives the left boundary of the union of all requests which don't // affect keys less than the given key. // TODO(tschottdorf): again, better on BatchRequest itself, but can't pull // 'keys' into 'proto'. func next(ba roachpb.BatchRequest, k roachpb.RKey) (roachpb.RKey, error) { candidate := roachpb.RKeyMax for _, union := range ba.Requests { h := union.GetInner().Header() addr, err := keys.Addr(h.Key) if err != nil { return nil, err } if addr.Less(k) { eAddr, err := keys.AddrUpperBound(h.EndKey) if err != nil { return nil, err } if k.Less(eAddr) { // Starts below k, but continues beyond. Need to stay at k. return k, nil } // Affects only [KeyMin,k). continue } // We want the smallest of the surviving candidates. if addr.Less(candidate) { candidate = addr } } return candidate, nil }
// RangeLookup dispatches an RangeLookup request for the given // metadata key to the replicas of the given range. Note that we allow // inconsistent reads when doing range lookups for efficiency. Getting // stale data is not a correctness problem but instead may // infrequently result in additional latency as additional range // lookups may be required. Note also that rangeLookup bypasses the // DistSender's Send() method, so there is no error inspection and // retry logic here; this is not an issue since the lookup performs a // single inconsistent read only. func (ds *DistSender) RangeLookup(key roachpb.RKey, desc *roachpb.RangeDescriptor, considerIntents, useReverseScan bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { ba := roachpb.BatchRequest{} ba.ReadConsistency = roachpb.INCONSISTENT ba.Add(&roachpb.RangeLookupRequest{ Span: roachpb.Span{ // We can interpret the RKey as a Key here since it's a metadata // lookup; those are never local. Key: key.AsRawKey(), }, MaxRanges: ds.rangeLookupMaxRanges, ConsiderIntents: considerIntents, Reverse: useReverseScan, }) replicas := newReplicaSlice(ds.gossip, desc) // TODO(tschottdorf) consider a Trace here, potentially that of the request // that had the cache miss and waits for the result. br, err := ds.sendRPC(nil /* Trace */, desc.RangeID, replicas, rpc.OrderRandom, ba) if err != nil { return nil, err } if br.Error != nil { return nil, br.Error } return br.Responses[0].GetInner().(*roachpb.RangeLookupResponse).Ranges, nil }
// RangeLookup implements the RangeDescriptorDB interface. // RangeLookup dispatches a RangeLookup request for the given metadata // key to the replicas of the given range. Note that we allow // inconsistent reads when doing range lookups for efficiency. Getting // stale data is not a correctness problem but instead may // infrequently result in additional latency as additional range // lookups may be required. Note also that rangeLookup bypasses the // DistSender's Send() method, so there is no error inspection and // retry logic here; this is not an issue since the lookup performs a // single inconsistent read only. func (ds *DistSender) RangeLookup( key roachpb.RKey, desc *roachpb.RangeDescriptor, considerIntents, useReverseScan bool, ) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, *roachpb.Error) { ba := roachpb.BatchRequest{} ba.ReadConsistency = roachpb.INCONSISTENT ba.Add(&roachpb.RangeLookupRequest{ Span: roachpb.Span{ // We can interpret the RKey as a Key here since it's a metadata // lookup; those are never local. Key: key.AsRawKey(), }, MaxRanges: ds.rangeLookupMaxRanges, ConsiderIntents: considerIntents, Reverse: useReverseScan, }) replicas := newReplicaSlice(ds.gossip, desc) replicas.Shuffle() // TODO(tschottdorf): Ideally we would use the trace of the request which // caused this lookup. _ = context.TODO() br, err := ds.sendRPC(ds.Ctx, desc.RangeID, replicas, ba) if err != nil { return nil, nil, roachpb.NewError(err) } if br.Error != nil { return nil, nil, br.Error } resp := br.Responses[0].GetInner().(*roachpb.RangeLookupResponse) return resp.Ranges, resp.PrefetchedRanges, nil }
// prev gives the right boundary of the union of all requests which don't // affect keys larger than the given key. // TODO(tschottdorf): again, better on BatchRequest itself, but can't pull // 'keys' into 'roachpb'. func prev(ba roachpb.BatchRequest, k roachpb.RKey) (roachpb.RKey, error) { candidate := roachpb.RKeyMin for _, union := range ba.Requests { h := union.GetInner().Header() addr, err := keys.Addr(h.Key) if err != nil { return nil, err } eAddr, err := keys.AddrUpperBound(h.EndKey) if err != nil { return nil, err } if len(eAddr) == 0 { eAddr = addr.Next() } if !eAddr.Less(k) { if !k.Less(addr) { // Range contains k, so won't be able to go lower. return k, nil } // Range is disjoint from [KeyMin,k). continue } // We want the largest surviving candidate. if candidate.Less(addr) { candidate = addr } } return candidate, nil }
// getCachedRangeDescriptorLocked is a helper function to retrieve the // descriptor of the range which contains the given key, if present in the // cache. It is assumed that the caller holds a read lock on rdc.rangeCache. func (rdc *rangeDescriptorCache) getCachedRangeDescriptorLocked(key roachpb.RKey, inclusive bool) (rangeCacheKey, *roachpb.RangeDescriptor, error) { // The cache is indexed using the end-key of the range, but the // end-key is non-inclusive by default. var metaKey roachpb.RKey var err error if !inclusive { metaKey, err = meta(key.Next()) } else { metaKey, err = meta(key) } if err != nil { return nil, nil, err } k, v, ok := rdc.rangeCache.cache.Ceil(rangeCacheKey(metaKey)) if !ok { return nil, nil, nil } metaEndKey := k.(rangeCacheKey) rd := v.(*roachpb.RangeDescriptor) // Return nil if the key does not belong to the range. if (!inclusive && !rd.ContainsKey(key)) || (inclusive && !rd.ContainsExclusiveEndKey(key)) { return nil, nil, nil } return metaEndKey, rd, nil }
// getDescriptors looks up the range descriptor to use for a query over the // key range [from,to), with the given lookupOptions. The range descriptor // which contains the range in which the request should start its query is // returned first; the returned bool is true in case the given range reaches // outside the first descriptor. // In case either of the descriptors is discovered stale, the returned closure // should be called; it evicts the cache appropriately. // Note that `from` and `to` are not necessarily Key and EndKey from a // RequestHeader; it's assumed that they've been translated to key addresses // already (via KeyAddress). func (ds *DistSender) getDescriptors(from, to roachpb.RKey, options lookupOptions) (*roachpb.RangeDescriptor, bool, func(), *roachpb.Error) { var desc *roachpb.RangeDescriptor var err error var descKey roachpb.RKey if !options.useReverseScan { descKey = from } else { descKey = to } desc, err = ds.rangeCache.LookupRangeDescriptor(descKey, options) if err != nil { return nil, false, nil, roachpb.NewError(err) } // Checks whether need to get next range descriptor. If so, returns true. needAnother := func(desc *roachpb.RangeDescriptor, isReverse bool) bool { if isReverse { return from.Less(desc.StartKey) } return desc.EndKey.Less(to) } evict := func() { ds.rangeCache.EvictCachedRangeDescriptor(descKey, desc, options.useReverseScan) } return desc, needAnother(desc, options.useReverseScan), evict, nil }
// RangeLookup dispatches an RangeLookup request for the given // metadata key to the replicas of the given range. Note that we allow // inconsistent reads when doing range lookups for efficiency. Getting // stale data is not a correctness problem but instead may // infrequently result in additional latency as additional range // lookups may be required. Note also that rangeLookup bypasses the // DistSender's Send() method, so there is no error inspection and // retry logic here; this is not an issue since the lookup performs a // single inconsistent read only. func (ds *DistSender) RangeLookup(key roachpb.RKey, desc *roachpb.RangeDescriptor, considerIntents, useReverseScan bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { ba := roachpb.BatchRequest{} ba.ReadConsistency = roachpb.INCONSISTENT ba.Add(&roachpb.RangeLookupRequest{ Span: roachpb.Span{ // We can interpret the RKey as a Key here since it's a metadata // lookup; those are never local. Key: key.AsRawKey(), }, MaxRanges: ds.rangeLookupMaxRanges, ConsiderIntents: considerIntents, Reverse: useReverseScan, }) replicas := newReplicaSlice(ds.gossip, desc) trace := ds.Tracer.StartSpan("range lookup") defer trace.Finish() // TODO(tschottdorf): Ideally we would use the trace of the request which // caused this lookup instead of a new one. br, err := ds.sendRPC(trace, desc.RangeID, replicas, orderRandom, ba) if err != nil { return nil, err } if br.Error != nil { return nil, br.Error } return br.Responses[0].GetInner().(*roachpb.RangeLookupResponse).Ranges, nil }
// lookupReplica looks up replica by key [range]. Lookups are done // by consulting each store in turn via Store.LookupRange(key). // Returns RangeID and replica on success; RangeKeyMismatch error // if not found. // This is only for testing usage; performance doesn't matter. func (ls *Stores) lookupReplica(start, end roachpb.RKey) (rangeID roachpb.RangeID, replica *roachpb.ReplicaDescriptor, pErr *roachpb.Error) { ls.mu.RLock() defer ls.mu.RUnlock() var rng *Replica for _, store := range ls.storeMap { rng = store.LookupReplica(start, end) if rng == nil { if tmpRng := store.LookupReplica(start, nil); tmpRng != nil { log.Warningf(fmt.Sprintf("range not contained in one range: [%s,%s), but have [%s,%s)", start, end, tmpRng.Desc().StartKey, tmpRng.Desc().EndKey)) } continue } if replica == nil { rangeID = rng.RangeID replica = rng.GetReplica() continue } // Should never happen outside of tests. return 0, nil, roachpb.NewErrorf( "range %+v exists on additional store: %+v", rng, store) } if replica == nil { pErr = roachpb.NewError(roachpb.NewRangeKeyMismatchError(start.AsRawKey(), end.AsRawKey(), nil)) } return rangeID, replica, pErr }
// clearOverlappingCachedRangeDescriptors looks up and clears any // cache entries which overlap the specified key or descriptor. func (rdc *rangeDescriptorCache) clearOverlappingCachedRangeDescriptors(key, metaKey roachpb.RKey, desc *roachpb.RangeDescriptor) { if desc.StartKey.Equal(desc.EndKey) { // True for some unittests. return } // Clear out any descriptors which subsume the key which we're going // to cache. For example, if an existing KeyMin->KeyMax descriptor // should be cleared out in favor of a KeyMin->"m" descriptor. k, v, ok := rdc.rangeCache.Ceil(rangeCacheKey(metaKey)) if ok { descriptor := v.(*roachpb.RangeDescriptor) if !key.Less(descriptor.StartKey) && !descriptor.EndKey.Less(key) { if log.V(1) { log.Infof("clearing overlapping descriptor: key=%s desc=%s", k, descriptor) } rdc.rangeCache.Del(k.(rangeCacheKey)) } } // Also clear any descriptors which are subsumed by the one we're // going to cache. This could happen on a merge (and also happens // when there's a lot of concurrency). Iterate from the range meta key // after RangeMetaKey(desc.StartKey) to the range meta key for desc.EndKey. rdc.rangeCache.DoRange(func(k, v interface{}) { if log.V(1) { log.Infof("clearing subsumed descriptor: key=%s desc=%s", k, v.(*roachpb.RangeDescriptor)) } rdc.rangeCache.Del(k.(rangeCacheKey)) }, rangeCacheKey(meta(desc.StartKey).Next()), rangeCacheKey(meta(desc.EndKey))) }
// lookupReplica looks up replica by key [range]. Lookups are done // by consulting each store in turn via Store.LookupRange(key). // Returns RangeID and replica on success; RangeKeyMismatch error // if not found. // This is only for testing usage; performance doesn't matter. func (ls *Stores) lookupReplica(start, end roachpb.RKey) (rangeID roachpb.RangeID, replica *roachpb.ReplicaDescriptor, err error) { ls.mu.RLock() defer ls.mu.RUnlock() var rng *Replica var partialDesc *roachpb.RangeDescriptor for _, store := range ls.storeMap { rng = store.LookupReplica(start, end) if rng == nil { if tmpRng := store.LookupReplica(start, nil); tmpRng != nil { log.Warningf("range not contained in one range: [%s,%s), but have [%s,%s)", start, end, tmpRng.Desc().StartKey, tmpRng.Desc().EndKey) partialDesc = tmpRng.Desc() break } continue } if replica == nil { rangeID = rng.RangeID replica, err = rng.GetReplica() if err != nil { if _, ok := err.(*errReplicaNotInRange); !ok { return 0, nil, err } } continue } // Should never happen outside of tests. return 0, nil, util.Errorf( "range %+v exists on additional store: %+v", rng, store) } if replica == nil { err = roachpb.NewRangeKeyMismatchError(start.AsRawKey(), end.AsRawKey(), partialDesc) } return rangeID, replica, err }
func getNode(t *testing.T, nodes map[string]storage.RangeTreeNode, testName string, key roachpb.RKey) (storage.RangeTreeNode, bool) { if key != nil { if node, ok := nodes[key.String()]; !ok { t.Errorf("%s: could not locate node with key %s", testName, key) } else { return node, ok } } return storage.RangeTreeNode{}, false }
// ComputeSplitKeys takes a start and end key and returns an array of keys // at which to split the span [start, end). // The only required splits are at each user table prefix. func (s SystemConfig) ComputeSplitKeys(startKey, endKey roachpb.RKey) []roachpb.RKey { testingLock.Lock() tableSplitsDisabled := testingDisableTableSplits testingLock.Unlock() if tableSplitsDisabled { return nil } tableStart := roachpb.RKey(keys.UserTableDataMin) if !tableStart.Less(endKey) { // This range is before the user tables span: no required splits. return nil } startID, ok := ObjectIDForKey(startKey) if !ok || startID <= keys.MaxReservedDescID { // The start key is either: // - not part of the structured data span // - part of the system span // In either case, start looking for splits at the first ID usable // by the user data span. startID = keys.MaxReservedDescID + 1 } else { // The start key is either already a split key, or after the split // key for its ID. We can skip straight to the next one. startID++ } // Find the largest object ID. // We can't keep splitting until we reach endKey as it could be roachpb.KeyMax. endID, err := s.GetLargestObjectID() if err != nil { log.Errorf("unable to determine largest object ID from system config: %s", err) return nil } // Build key prefixes for sequential table IDs until we reach endKey. var splitKeys []roachpb.RKey var key roachpb.RKey // endID could be smaller than startID if we don't have user tables. for id := startID; id <= endID; id++ { key = keys.MakeTablePrefix(id) // Skip if the range starts on a split key. if !startKey.Less(key) { continue } // Handle the case where EndKey is already a table prefix. if !key.Less(endKey) { break } splitKeys = append(splitKeys, key) } return splitKeys }
// ObjectIDForKey returns the object ID (table or database) for 'key', // or (_, false) if not within the structured key space. func ObjectIDForKey(key roachpb.RKey) (uint32, bool) { if key.Equal(roachpb.RKeyMax) { return 0, false } if encoding.PeekType(key) != encoding.Int { // TODO(marc): this should eventually return SystemDatabaseID. return 0, false } // Consume first encoded int. _, id64, err := encoding.DecodeUvarint(key) return uint32(id64), err == nil }
// LookupReplica looks up replica by key [range]. Lookups are done // by consulting each store in turn via Store.LookupReplica(key). // Returns RangeID and replica on success; RangeKeyMismatch error // if not found. // If end is nil, a replica containing start is looked up. // This is only for testing usage; performance doesn't matter. func (ls *Stores) LookupReplica( start, end roachpb.RKey, ) (roachpb.RangeID, roachpb.ReplicaDescriptor, error) { ls.mu.RLock() defer ls.mu.RUnlock() var rangeID roachpb.RangeID var repDesc roachpb.ReplicaDescriptor var repDescFound bool for _, store := range ls.storeMap { replica := store.LookupReplica(start, nil) if replica == nil { continue } // Verify that the descriptor contains the entire range. if desc := replica.Desc(); !desc.ContainsKeyRange(start, end) { log.Warningf(context.TODO(), "range not contained in one range: [%s,%s), but have [%s,%s)", start, end, desc.StartKey, desc.EndKey) err := roachpb.NewRangeKeyMismatchError(start.AsRawKey(), end.AsRawKey(), desc) return 0, roachpb.ReplicaDescriptor{}, err } rangeID = replica.RangeID var err error repDesc, err = replica.GetReplicaDescriptor() if err != nil { if _, ok := err.(*roachpb.RangeNotFoundError); ok { // We are not holding a lock across this block; the replica could have // been removed from the range (via down-replication) between the // LookupReplica and the GetReplicaDescriptor calls. In this case just // ignore this replica. continue } return 0, roachpb.ReplicaDescriptor{}, err } if repDescFound { // We already found the range; this should never happen outside of tests. err := errors.Errorf("range %+v exists on additional store: %+v", replica, store) return 0, roachpb.ReplicaDescriptor{}, err } repDescFound = true } if !repDescFound { return 0, roachpb.ReplicaDescriptor{}, roachpb.NewRangeNotFoundError(0) } return rangeID, repDesc, nil }
// verifyBinarySearchTree checks to ensure that all keys to the left of the root // node are less than it, and all nodes to the right of the root node are // greater than it. It recursively walks the tree to perform this same check. func verifyBinarySearchTree(t *testing.T, nodes map[string]storage.RangeTreeNode, testName string, node storage.RangeTreeNode, keyMin, keyMax roachpb.RKey) { if !node.Key.Less(keyMax) { t.Errorf("%s: Failed Property BST - The key %s is not less than %s.", testName, node.Key, keyMax) } // We need the extra check since roachpb.KeyMin is actually a range start key. if !keyMin.Less(node.Key) && !node.Key.Equal(roachpb.RKeyMin) { t.Errorf("%s: Failed Property BST - The key %s is not greater than %s.", testName, node.Key, keyMin) } if left, ok := getNode(t, nodes, testName, node.LeftKey); ok { verifyBinarySearchTree(t, nodes, testName, left, keyMin, node.Key) } if right, ok := getNode(t, nodes, testName, node.RightKey); ok { verifyBinarySearchTree(t, nodes, testName, right, node.Key, keyMax) } }
// verifyBinarySearchTree checks to ensure that all keys to the left of the root // node are less than it, and all nodes to the right of the root node are // greater than it. It recursively walks the tree to perform this same check. func verifyBinarySearchTree(t *testing.T, nodes map[string]roachpb.RangeTreeNode, testName string, node *roachpb.RangeTreeNode, keyMin, keyMax roachpb.RKey) { if node == nil { return } if !node.Key.Less(keyMax) { t.Errorf("%s: Failed Property BST - The key %s is not less than %s.", testName, node.Key, keyMax) } // We need the extra check since roachpb.KeyMin is actually a range start key. if !keyMin.Less(node.Key) && !node.Key.Equal(roachpb.RKeyMin) { t.Errorf("%s: Failed Property BST - The key %s is not greater than %s.", testName, node.Key, keyMin) } left, right := getLeftAndRight(t, nodes, testName, node) verifyBinarySearchTree(t, nodes, testName, left, keyMin, node.Key) verifyBinarySearchTree(t, nodes, testName, right, node.Key, keyMax) }
func (db *testDescriptorDB) getDescriptors(key roachpb.RKey, considerIntents bool, useReverseScan bool) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, *roachpb.Error) { rs := make([]roachpb.RangeDescriptor, 0, 1) preRs := make([]roachpb.RangeDescriptor, 0, 2) for i := 0; i < 3; i++ { var endKey roachpb.RKey if useReverseScan { endKey = key } else { endKey = key.Next() } v := db.data.Ceil(testDescriptorNode{ &roachpb.RangeDescriptor{ EndKey: endKey, }, }) if v == nil { break } desc := *(v.(testDescriptorNode).RangeDescriptor) if i == 0 { rs = append(rs, desc) if considerIntents { desc.RangeID++ rs = append(rs, desc) break } else if db.disablePrefetch { break } } else { preRs = append(preRs, desc) } // Break to keep from skidding off the end of the available ranges. if desc.EndKey.Equal(roachpb.RKeyMax) { break } if useReverseScan { key = desc.StartKey } else { key = desc.EndKey } } return rs, preRs, nil }
// RangeLookup implements the RangeDescriptorDB interface. It looks up // the descriptors for the given (meta) key. func (ls *Stores) RangeLookup(key roachpb.RKey, _ *roachpb.RangeDescriptor, considerIntents, useReverseScan bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { ba := roachpb.BatchRequest{} ba.ReadConsistency = roachpb.INCONSISTENT ba.Add(&roachpb.RangeLookupRequest{ Span: roachpb.Span{ // key is a meta key, so it's guaranteed not local-prefixed. Key: key.AsRawKey(), }, MaxRanges: 1, ConsiderIntents: considerIntents, Reverse: useReverseScan, }) br, pErr := ls.Send(context.Background(), ba) if pErr != nil { return nil, pErr } return br.Responses[0].GetInner().(*roachpb.RangeLookupResponse).Ranges, nil }
// ObjectIDForKey returns the object ID (table or database) for 'key', // or (_, false) if not within the structured key space. func ObjectIDForKey(key roachpb.RKey) (uint32, bool) { if key.Equal(roachpb.RKeyMax) { return 0, false } if key.Equal(keys.TableDataPrefix) { // TODO(marc): this should eventually return SystemDatabaseID. return 0, false } remaining := bytes.TrimPrefix(key, keys.TableDataPrefix) if len(remaining) == len(key) { // TrimPrefix returns the input untouched if the prefix doesn't match. return 0, false } // Consume first encoded int. _, id64, err := encoding.DecodeUvarint(remaining) return uint32(id64), err == nil }
// validateRangeMetaKey validates that the given key is a valid Range Metadata // key. This checks only the constraints common to forward and backwards scans: // correct prefix and not exceeding KeyMax. func validateRangeMetaKey(key roachpb.RKey) error { // KeyMin is a valid key. if key.Equal(roachpb.RKeyMin) { return nil } // Key must be at least as long as Meta1Prefix. if len(key) < len(Meta1Prefix) { return NewInvalidRangeMetaKeyError("too short", key) } prefix, body := key[:len(Meta1Prefix)], key[len(Meta1Prefix):] if !prefix.Equal(Meta2Prefix) && !prefix.Equal(Meta1Prefix) { return NewInvalidRangeMetaKeyError("not a meta key", key) } if roachpb.RKeyMax.Less(body) { return NewInvalidRangeMetaKeyError("body of meta key range lookup is > KeyMax", key) } return nil }
// getCachedRangeDescriptorLocked is a helper function to retrieve the // descriptor of the range which contains the given key, if present in the // cache. It is assumed that the caller holds a read lock on rdc.rangeCacheMu. func (rdc *rangeDescriptorCache) getCachedRangeDescriptorLocked(key roachpb.RKey, inclusive bool) ( rangeCacheKey, *roachpb.RangeDescriptor) { // The cache is indexed using the end-key of the range, but the // end-key is non-inclusive by default. var metaKey roachpb.RKey if !inclusive { metaKey = meta(key.Next()) } else { metaKey = meta(key) } k, v, ok := rdc.rangeCache.Ceil(rangeCacheKey(metaKey)) if !ok { return nil, nil } metaEndKey := k.(rangeCacheKey) rd := v.(*roachpb.RangeDescriptor) // Check that key actually belongs to the range. if !rd.ContainsKey(key) { // The key is the EndKey and we're inclusive, so just return the range descriptor. if inclusive && key.Equal(rd.EndKey) { return metaEndKey, rd } return nil, nil } // The key is the StartKey, but we're inclusive and thus need to return the // previous range descriptor, but it is not in the cache yet. if inclusive && key.Equal(rd.StartKey) { return nil, nil } return metaEndKey, rd }
// MetaReverseScanBounds returns the range [start,end) within which the desired // meta record can be found by means of a reverse engine scan. The given key // must be a valid RangeMetaKey as defined by validateRangeMetaKey. func MetaReverseScanBounds(key roachpb.RKey) (roachpb.Key, roachpb.Key, error) { if err := validateRangeMetaKey(key); err != nil { return nil, nil, err } if key.Equal(roachpb.RKeyMin) || key.Equal(Meta1Prefix) { return nil, nil, NewInvalidRangeMetaKeyError("KeyMin and Meta1Prefix can't be used as the key of reverse scan", key) } if key.Equal(Meta2Prefix) { // Special case Meta2Prefix: this is the first key in Meta2, and the scan // interval covers all of Meta1. return Meta1Prefix, key.Next().AsRawKey(), nil } // Otherwise find the first entry greater than the given key and find the last entry // in the same prefix. For MVCCReverseScan the endKey is exclusive, if we want to find // the range descriptor the given key specified,we need to set the key.Next() as the // MVCCReverseScan`s endKey. For example: // If we have ranges [a,f) and [f,z), then we'll have corresponding meta records // at f and z. If you're looking for the meta record for key f, then you want the // second record (exclusive in MVCCReverseScan), hence key.Next() below. return key[:len(Meta1Prefix)].AsRawKey(), key.Next().AsRawKey(), nil }
// LookupReplica looks up replica by key [range]. Lookups are done // by consulting each store in turn via Store.LookupReplica(key). // Returns RangeID and replica on success; RangeKeyMismatch error // if not found. // If end is nil, a replica containing start is looked up. // This is only for testing usage; performance doesn't matter. func (ls *Stores) LookupReplica(start, end roachpb.RKey) (rangeID roachpb.RangeID, repDesc roachpb.ReplicaDescriptor, err error) { ls.mu.RLock() defer ls.mu.RUnlock() var rng *Replica var partialDesc *roachpb.RangeDescriptor var repDescFound bool for _, store := range ls.storeMap { rng = store.LookupReplica(start, end) if rng == nil { if tmpRng := store.LookupReplica(start, nil); tmpRng != nil { partialDesc = tmpRng.Desc() log.Warningf(context.TODO(), "range not contained in one range: [%s,%s), but have [%s,%s)", start, end, partialDesc.StartKey, partialDesc.EndKey) break } continue } if !repDescFound { rangeID = rng.RangeID repDesc, err = rng.GetReplicaDescriptor() if err != nil { if _, ok := err.(*roachpb.RangeNotFoundError); !ok { return 0, roachpb.ReplicaDescriptor{}, err } } else { repDescFound = true } continue } // Should never happen outside of tests. return 0, roachpb.ReplicaDescriptor{}, errors.Errorf( "range %+v exists on additional store: %+v", rng, store, ) } if !repDescFound { err = roachpb.NewRangeKeyMismatchError(start.AsRawKey(), end.AsRawKey(), partialDesc) } return rangeID, repDesc, err }
// verifyBinarySearchTree checks to ensure that all keys to the left of the root // node are less than it, and all nodes to the right of the root node are // greater than it. It recursively walks the tree to perform this same check. func verifyBinarySearchTree(t *testing.T, tc *treeContext, testName string, node *roachpb.RangeTreeNode, keyMin, keyMax roachpb.RKey) { if !node.Key.Less(keyMax) { t.Errorf("%s: Failed Property BST - The key %s is not less than %s.", testName, node.Key, keyMax) } if !keyMin.Less(node.Key) { t.Errorf("%s: Failed Property BST - The key %s is not greater than %s.", testName, node.Key, keyMin) } if node.LeftKey != nil { left, err := tc.getNode(node.LeftKey) if err != nil { t.Fatal(err) } verifyBinarySearchTree(t, tc, testName, left, keyMin, node.Key) } if node.RightKey != nil { right, err := tc.getNode(node.RightKey) if err != nil { t.Fatal(err) } verifyBinarySearchTree(t, tc, testName, right, node.Key, keyMax) } }
// MetaScanBounds returns the range [start,end) within which the desired meta // record can be found by means of an engine scan. The given key must be a // valid RangeMetaKey as defined by validateRangeMetaKey. // TODO(tschottdorf): a lot of casting going on inside. func MetaScanBounds(key roachpb.RKey) (roachpb.Key, roachpb.Key, error) { if err := validateRangeMetaKey(key); err != nil { return nil, nil, err } if key.Equal(Meta2KeyMax) { return nil, nil, NewInvalidRangeMetaKeyError("Meta2KeyMax can't be used as the key of scan", key) } if key.Equal(roachpb.RKeyMin) { // Special case KeyMin: find the first entry in meta1. return Meta1Prefix, Meta1Prefix.PrefixEnd(), nil } if key.Equal(Meta1KeyMax) { // Special case Meta1KeyMax: this is the last key in Meta1, we don't want // to start at Next(). return Meta1KeyMax, Meta1Prefix.PrefixEnd(), nil } // Otherwise find the first entry greater than the given key in the same meta prefix. return key.Next().AsRawKey(), key[:len(Meta1Prefix)].PrefixEnd().AsRawKey(), nil }
// truncate restricts all contained requests to the given key range. // Even on error, the returned closure must be executed; it undoes any // truncations performed. // First, the boundaries of the truncation are obtained: This is the // intersection between [from,to) and the descriptor's range. // Secondly, all requests contained in the batch are "truncated" to // the resulting range, inserting NoopRequest appropriately to // replace requests which are left without a key range to operate on. // The number of non-noop requests after truncation is returned along // with a closure which must be executed to undo the truncation, even // in case of an error. // TODO(tschottdorf): Consider returning a new BatchRequest, which has more // overhead in the common case of a batch which never needs truncation but is // less magical. func truncate(br *roachpb.BatchRequest, desc *roachpb.RangeDescriptor, from, to roachpb.RKey) (func(), int, error) { if !desc.ContainsKey(from) { from = desc.StartKey } if !desc.ContainsKeyRange(desc.StartKey, to) || to == nil { to = desc.EndKey } truncateOne := func(args roachpb.Request) (bool, []func(), error) { if _, ok := args.(*roachpb.NoopRequest); ok { return true, nil, nil } header := args.Header() if !roachpb.IsRange(args) { // This is a point request. if len(header.EndKey) > 0 { return false, nil, util.Errorf("%T is not a range command, but EndKey is set", args) } if !desc.ContainsKey(keys.Addr(header.Key)) { return true, nil, nil } return false, nil, nil } // We're dealing with a range-spanning request. var undo []func() keyAddr, endKeyAddr := keys.Addr(header.Key), keys.Addr(header.EndKey) if l, r := !keyAddr.Equal(header.Key), !endKeyAddr.Equal(header.EndKey); l || r { if !desc.ContainsKeyRange(keyAddr, endKeyAddr) { return false, nil, util.Errorf("local key range must not span ranges") } if !l || !r { return false, nil, util.Errorf("local key mixed with global key in range") } } // Below, {end,}keyAddr equals header.{End,}Key, so nothing is local. if keyAddr.Less(from) { { origKey := header.Key undo = append(undo, func() { header.Key = origKey }) } header.Key = from.AsRawKey() // "from" can't be local keyAddr = from } if !endKeyAddr.Less(to) { { origEndKey := header.EndKey undo = append(undo, func() { header.EndKey = origEndKey }) } header.EndKey = to.AsRawKey() // "to" can't be local endKeyAddr = to } // Check whether the truncation has left any keys in the range. If not, // we need to cut it out of the request. return !keyAddr.Less(endKeyAddr), undo, nil } var fns []func() gUndo := func() { for _, f := range fns { f() } } var numNoop int for pos, arg := range br.Requests { omit, undo, err := truncateOne(arg.GetInner()) if omit { numNoop++ nReq := &roachpb.RequestUnion{} if !nReq.SetValue(&roachpb.NoopRequest{}) { panic("RequestUnion excludes NoopRequest") } oReq := br.Requests[pos] br.Requests[pos] = *nReq posCpy := pos // for closure undo = append(undo, func() { br.Requests[posCpy] = oReq }) } fns = append(fns, undo...) if err != nil { return gUndo, 0, err } } return gUndo, len(br.Requests) - numNoop, nil }
// ComputeSplitKeys takes a start and end key and returns an array of keys // at which to split the span [start, end). // The only required splits are at each user table prefix. func (s SystemConfig) ComputeSplitKeys(startKey, endKey roachpb.RKey) []roachpb.RKey { if TestingTableSplitsDisabled() { return nil } tableStart := roachpb.RKey(keys.ReservedTableDataMin) if !tableStart.Less(endKey) { // This range is before the user tables span: no required splits. return nil } startID, ok := ObjectIDForKey(startKey) if !ok || startID <= keys.MaxSystemDescID { // The start key is either: // - not part of the structured data span // - part of the system span // In either case, start looking for splits at the first ID usable // by the user data span. startID = keys.MaxSystemDescID + 1 } else { // The start key is either already a split key, or after the split // key for its ID. We can skip straight to the next one. startID++ } // Build key prefixes for sequential table IDs until we reach endKey. Note // that there are two disjoint sets of sequential keys: non-system reserved // tables have sequential IDs, as do user tables, but the two ranges contain a // gap. var splitKeys []roachpb.RKey var key roachpb.RKey // appendSplitKeys generates all possible split keys between the given range // of IDs and adds them to splitKeys. appendSplitKeys := func(startID, endID uint32) { // endID could be smaller than startID if we don't have user tables. for id := startID; id <= endID; id++ { key = keys.MakeNonColumnKey(keys.MakeTablePrefix(id)) // Skip if this ID matches the startKey passed to ComputeSplitKeys. if !startKey.Less(key) { continue } // Handle the case where EndKey is already a table prefix. if !key.Less(endKey) { break } splitKeys = append(splitKeys, key) } } // If the startKey falls within the non-system reserved range, compute those // keys first. if startID <= keys.MaxReservedDescID { endID, err := s.GetLargestObjectID(keys.MaxReservedDescID) if err != nil { log.Errorf("unable to determine largest reserved object ID from system config: %s", err) return nil } appendSplitKeys(startID, endID) startID = keys.MaxReservedDescID + 1 } // Append keys in the user space. endID, err := s.GetLargestObjectID(0) if err != nil { log.Errorf("unable to determine largest object ID from system config: %s", err) return nil } appendSplitKeys(startID, endID) return splitKeys }