// getCachedRangeDescriptorLocked is a helper function to retrieve the // descriptor of the range which contains the given key, if present in the // cache. It is assumed that the caller holds a read lock on rdc.rangeCacheMu. func (rdc *rangeDescriptorCache) getCachedRangeDescriptorLocked(key proto.Key, inclusive bool) ( rangeCacheKey, *proto.RangeDescriptor) { // The cache is indexed using the end-key of the range, but the // end-key is non-inclusive by default. var metaKey proto.Key if !inclusive { metaKey = keys.RangeMetaKey(key.Next()) } else { metaKey = keys.RangeMetaKey(key) } k, v, ok := rdc.rangeCache.Ceil(rangeCacheKey(metaKey)) if !ok { return nil, nil } metaEndKey := k.(rangeCacheKey) rd := v.(*proto.RangeDescriptor) // Check that key actually belongs to the range. if !rd.ContainsKey(key) { // The key is the EndKey and we're inclusive, so just return the range descriptor. if inclusive && key.Equal(rd.EndKey) { return metaEndKey, rd } return nil, nil } // The key is the StartKey, but we're inclusive and thus need to return the // previous range descriptor, but it is not in the cache yet. if inclusive && key.Equal(rd.StartKey) { return nil, nil } return metaEndKey, rd }
// Add the specified timestamp to the cache as covering the range of // keys from start to end. If end is nil, the range covers the start // key only. txnID is nil for no transaction. readOnly specifies // whether the command adding this timestamp was read-only or not. func (tc *TimestampCache) Add(start, end proto.Key, timestamp proto.Timestamp, txnID []byte, readOnly bool) { // This gives us a memory-efficient end key if end is empty. if len(end) == 0 { end = start.Next() start = end[:len(start)] } if tc.latest.Less(timestamp) { tc.latest = timestamp } // Only add to the cache if the timestamp is more recent than the // low water mark. if tc.lowWater.Less(timestamp) { // Check existing, overlapping entries. Remove superseded // entries or return without adding this entry if necessary. key := tc.cache.NewKey(start, end) for _, o := range tc.cache.GetOverlaps(start, end) { ce := o.Value.(cacheEntry) if ce.readOnly != readOnly { continue } if o.Key.Contains(key) && !ce.timestamp.Less(timestamp) { return // don't add this key; there's already a cache entry with >= timestamp. } else if key.Contains(o.Key) && !timestamp.Less(ce.timestamp) { tc.cache.Del(o.Key) // delete existing key; this cache entry supersedes. } } ce := cacheEntry{timestamp: timestamp, txnID: txnID, readOnly: readOnly} tc.cache.Add(key, ce) } }
// Add adds a command to the queue which affects the specified key // range. If end is empty, it is set to start.Next(), meaning the // command affects a single key. The returned interface is the key for // the command queue and must be re-supplied on subsequent invocation // of Remove(). // // Add should be invoked after waiting on already-executing, // overlapping commands via the WaitGroup initialized through // GetWait(). func (cq *CommandQueue) Add(start, end proto.Key, readOnly bool) interface{} { if len(end) == 0 { end = start.Next() } key := cq.cache.NewKey(start, end) cq.cache.Add(key, &cmd{readOnly: readOnly}) return key }
// MetaScanBounds returns the start and end keys of the range within which the // desired meta record can be found by means of an engine scan. The given key // must be a valid RangeMetaKey as defined by ValidateRangeMetaKey. func MetaScanBounds(key proto.Key) (proto.Key, proto.Key) { if key.Equal(proto.KeyMin) { // Special case KeyMin: find the first entry in meta1. return Meta1Prefix, Meta1Prefix.PrefixEnd() } if key.Equal(Meta1KeyMax) { // Special case Meta1KeyMax: this is the last key in Meta1, we don't want // to start at Next(). return key, Meta1Prefix.PrefixEnd() } // Otherwise find the first entry greater than the given key in the same meta prefix. return key.Next(), proto.Key(key[:len(Meta1Prefix)]).PrefixEnd() }
// GetWait initializes the supplied wait group with the number of // executing commands which overlap the specified key range. If end is // empty, end is set to start.Next(), meaning the command affects a // single key. The caller should call wg.Wait() to wait for // confirmation that all gating commands have completed or // failed. readOnly is true if the requester is a read-only command; // false for read-write. func (cq *CommandQueue) GetWait(start, end proto.Key, readOnly bool, wg *sync.WaitGroup) { // This gives us a memory-efficient end key if end is empty. if len(end) == 0 { end = start.Next() start = end[:len(start)] } for _, c := range cq.cache.GetOverlaps(start, end) { c := c.Value.(*cmd) // Only add to the wait group if one of the commands isn't read-only. if !readOnly || !c.readOnly { c.pending = append(c.pending, wg) wg.Add(1) } } }
// MetaReverseScanBounds returns the range [start,end) within which the desired // meta record can be found by means of a reverse engine scan. The given key // must be a valid RangeMetaKey as defined by ValidateRangeMetaKey. func MetaReverseScanBounds(key proto.Key) (proto.Key, proto.Key, error) { if key.Equal(proto.KeyMin) || key.Equal(Meta1Prefix) { return nil, nil, NewInvalidRangeMetaKeyError("KeyMin and Meta1Prefix can't be used as the key of reverse scan", key) } if key.Equal(Meta2Prefix) { // Special case Meta2Prefix: this is the first key in Meta2, and the scan // interval covers all of Meta1. return Meta1Prefix, key.Next(), nil } // Otherwise find the first entry greater than the given key and find the last entry // in the same prefix. For MVCCReverseScan the endKey is exclusive, if we want to find // the range descriptor the given key specified,we need to set the key.Next() as the // MVCCReverseScan`s endKey. For example: // If we have ranges ["", "f") and ["f", "z"), then we'll have corresponding meta records // at "f" and "z". If you're looking for the meta record for key "f", then you want the // second record (exclusive in MVCCReverseScan), hence key.Next() below. return key[:len(Meta1Prefix)], key.Next(), nil }
// GetMax returns the maximum read and write timestamps which overlap // the interval spanning from start to end. Cached timestamps matching // the specified txnID are not considered. If no part of the specified // range is overlapped by timestamps in the cache, the low water // timestamp is returned for both read and write timestamps. // // The txn ID prevents restarts with a pattern like: read("a"), // write("a"). The read adds a timestamp for "a". Then the write (for // the same transaction) would get that as the max timestamp and be // forced to increment it. This allows timestamps from the same txn // to be ignored. func (tc *TimestampCache) GetMax(start, end proto.Key, txnID []byte) (proto.Timestamp, proto.Timestamp) { if len(end) == 0 { end = start.Next() } maxR := tc.lowWater maxW := tc.lowWater for _, o := range tc.cache.GetOverlaps(start, end) { ce := o.Value.(cacheEntry) if ce.txnID == nil || txnID == nil || !proto.TxnIDEqual(txnID, ce.txnID) { if ce.readOnly && maxR.Less(ce.timestamp) { maxR = ce.timestamp } else if !ce.readOnly && maxW.Less(ce.timestamp) { maxW = ce.timestamp } } } return maxR, maxW }
// getCachedRangeDescriptorLocked is a helper function to retrieve the // descriptor of the range which contains the given key, if present in the // cache. It is assumed that the caller holds a read lock on rdc.rangeCacheMu. func (rdc *rangeDescriptorCache) getCachedRangeDescriptorLocked(key proto.Key) ( rangeCacheKey, *proto.RangeDescriptor) { // The cache is indexed using the end-key of the range, but the // end-key is non-inclusive. If inclusive is false, we access the // cache using key.Next(). metaKey := keys.RangeMetaKey(key.Next()) k, v, ok := rdc.rangeCache.Ceil(rangeCacheKey(metaKey)) if !ok { return nil, nil } metaEndKey := k.(rangeCacheKey) rd := v.(*proto.RangeDescriptor) // Check that key actually belongs to range if !rd.ContainsKey(keys.KeyAddress(key)) { return nil, nil } return metaEndKey, rd }
func (db *testDescriptorDB) getDescriptor(key proto.Key) []proto.RangeDescriptor { response := make([]proto.RangeDescriptor, 0, 3) for i := 0; i < 3; i++ { v := db.data.Ceil(testDescriptorNode{ &proto.RangeDescriptor{ EndKey: key.Next(), }, }) if v == nil { break } response = append(response, *(v.(testDescriptorNode).RangeDescriptor)) // Break to keep from skidding off the end of the available ranges. if response[i].EndKey.Equal(proto.KeyMax) { break } key = proto.Key(response[i].EndKey).Next() } return response }
// MetaScanBounds returns the range [start,end) within which the desired meta // record can be found by means of an engine scan. The given key must be a // valid RangeMetaKey as defined by validateRangeMetaKey. func MetaScanBounds(key proto.Key) (proto.Key, proto.Key, error) { if err := validateRangeMetaKey(key); err != nil { return nil, nil, err } if key.Equal(Meta2KeyMax) { return nil, nil, NewInvalidRangeMetaKeyError("Meta2KeyMax can't be used as the key of scan", key) } if key.Equal(proto.KeyMin) { // Special case KeyMin: find the first entry in meta1. return Meta1Prefix, Meta1Prefix.PrefixEnd(), nil } if key.Equal(Meta1KeyMax) { // Special case Meta1KeyMax: this is the last key in Meta1, we don't want // to start at Next(). return key, Meta1Prefix.PrefixEnd(), nil } // Otherwise find the first entry greater than the given key in the same meta prefix. return key.Next(), proto.Key(key[:len(Meta1Prefix)]).PrefixEnd(), nil }
// addKeyRange adds the specified key range to the interval cache, // taking care not to add this range if existing entries already // completely cover the range. func (tm *txnMetadata) addKeyRange(start, end proto.Key) { // This gives us a memory-efficient end key if end is empty. // The most common case for keys in the intents interval map // is for single keys. However, the interval cache requires // a non-empty interval, so we create two key slices which // share the same underlying byte array. if len(end) == 0 { end = start.Next() start = end[:len(start)] } key := tm.keys.NewKey(start, end) for _, o := range tm.keys.GetOverlaps(start, end) { if o.Key.Contains(key) { return } else if key.Contains(o.Key) { tm.keys.Del(o.Key) } } // Since no existing key range fully covered this range, add it now. tm.keys.Add(key, nil) }
// getCachedRangeDescriptorLocked is a helper function to retrieve the // descriptor of the range which contains the given key, if present in the // cache. It is assumed that the caller holds a read lock on rdc.rangeCacheMu. func (rdc *rangeDescriptorCache) getCachedRangeDescriptorLocked(key proto.Key, isReverse bool) ( rangeCacheKey, *proto.RangeDescriptor) { // The cache is indexed using the end-key of the range, but the // end-key is non-inclusive. var metaKey proto.Key if !isReverse { // If it is not reverse scan, we access the cache using key.Next(). metaKey = keys.RangeMetaKey(key.Next()) } else { // Because reverse scan request is begining at end key(exclusive),so we // access the cache using key directly. metaKey = keys.RangeMetaKey(key) } k, v, ok := rdc.rangeCache.Ceil(rangeCacheKey(metaKey)) if !ok { return nil, nil } metaEndKey := k.(rangeCacheKey) rd := v.(*proto.RangeDescriptor) // Check that key actually belongs to the range. if !rd.ContainsKey(keys.KeyAddress(key)) { // The key is the EndKey of the range in reverse scan, just return the range descriptor. if isReverse && key.Equal(rd.EndKey) { return metaEndKey, rd } return nil, nil } // The key is the StartKey of the range in reverse scan. We need to return the previous range // descriptor, but it is not in the cache yet. if isReverse && key.Equal(rd.StartKey) { return nil, nil } return metaEndKey, rd }