// Add the specified timestamp to the cache as covering the range of
// keys from start to end. If end is nil, the range covers the start
// key only. txnID is nil for no transaction. readOnly specifies
// whether the command adding this timestamp was read-only or not.
func (tc *TimestampCache) Add(start, end roachpb.Key, timestamp roachpb.Timestamp, txnID []byte, readOnly bool) {
	// This gives us a memory-efficient end key if end is empty.
	if len(end) == 0 {
		end = start.Next()
		start = end[:len(start)]
	}
	if tc.latest.Less(timestamp) {
		tc.latest = timestamp
	}
	// Only add to the cache if the timestamp is more recent than the
	// low water mark.
	if tc.lowWater.Less(timestamp) {
		// Check existing, overlapping entries. Remove superseded
		// entries or return without adding this entry if necessary.
		key := tc.cache.NewKey(start, end)
		for _, o := range tc.cache.GetOverlaps(start, end) {
			ce := o.Value.(cacheEntry)
			if ce.readOnly != readOnly {
				continue
			}
			if o.Key.Contains(key) && !ce.timestamp.Less(timestamp) {
				return // don't add this key; there's already a cache entry with >= timestamp.
			} else if key.Contains(o.Key) && !timestamp.Less(ce.timestamp) {
				tc.cache.Del(o.Key) // delete existing key; this cache entry supersedes.
			}
		}
		ce := cacheEntry{timestamp: timestamp, txnID: txnID, readOnly: readOnly}
		tc.cache.Add(key, ce)
	}
}
示例#2
0
// getCachedRangeDescriptorLocked is a helper function to retrieve the
// descriptor of the range which contains the given key, if present in the
// cache. It is assumed that the caller holds a read lock on rdc.rangeCacheMu.
func (rdc *rangeDescriptorCache) getCachedRangeDescriptorLocked(key roachpb.Key, inclusive bool) (
	rangeCacheKey, *roachpb.RangeDescriptor) {
	// The cache is indexed using the end-key of the range, but the
	// end-key is non-inclusive by default.
	var metaKey roachpb.Key
	if !inclusive {
		metaKey = keys.RangeMetaKey(key.Next())
	} else {
		metaKey = keys.RangeMetaKey(key)
	}

	k, v, ok := rdc.rangeCache.Ceil(rangeCacheKey(metaKey))
	if !ok {
		return nil, nil
	}
	metaEndKey := k.(rangeCacheKey)
	rd := v.(*roachpb.RangeDescriptor)

	// Check that key actually belongs to the range.
	if !rd.ContainsKey(key) {
		// The key is the EndKey and we're inclusive, so just return the range descriptor.
		if inclusive && key.Equal(rd.EndKey) {
			return metaEndKey, rd
		}
		return nil, nil
	}

	// The key is the StartKey, but we're inclusive and thus need to return the
	// previous range descriptor, but it is not in the cache yet.
	if inclusive && key.Equal(rd.StartKey) {
		return nil, nil
	}
	return metaEndKey, rd
}
示例#3
0
// addKeyRange adds the specified key range to the interval cache,
// taking care not to add this range if existing entries already
// completely cover the range.
func (tm *txnMetadata) addKeyRange(start, end roachpb.Key) {
	// This gives us a memory-efficient end key if end is empty.
	// The most common case for keys in the intents interval map
	// is for single keys. However, the interval cache requires
	// a non-empty interval, so we create two key slices which
	// share the same underlying byte array.
	if len(end) == 0 {
		end = start.Next()
		start = end[:len(start)]
	}
	key := tm.keys.MakeKey(start, end)
	for _, o := range tm.keys.GetOverlaps(key.Start, key.End) {
		if o.Key.Contains(key) {
			return
		} else if key.Contains(*o.Key) {
			tm.keys.Del(o.Key)
		}
	}

	// Since no existing key range fully covered this range, add it now. The
	// strange assignment to pkey makes sure we delay the heap allocation until
	// we know it is necessary.
	alloc := struct {
		key   cache.IntervalKey
		entry cache.Entry
	}{key: key}
	alloc.entry.Key = &alloc.key
	tm.keys.AddEntry(&alloc.entry)
}
示例#4
0
// addKeyRange adds the specified key range to the range group,
// taking care not to add this range if existing entries already
// completely cover the range.
func addKeyRange(keys interval.RangeGroup, start, end roachpb.Key) {
	// This gives us a memory-efficient end key if end is empty.
	// The most common case for keys in the intents interval map
	// is for single keys. However, the range group requires
	// a non-empty interval, so we create two key slices which
	// share the same underlying byte array.
	if len(end) == 0 {
		end = start.Next()
		start = end[:len(start)]
	}
	keyR := interval.Range{
		Start: interval.Comparable(start),
		End:   interval.Comparable(end),
	}
	keys.Add(keyR)
}
// GetMax returns the maximum read and write timestamps which overlap
// the interval spanning from start to end. Cached timestamps matching
// the specified txnID are not considered. If no part of the specified
// range is overlapped by timestamps in the cache, the low water
// timestamp is returned for both read and write timestamps.
//
// The txn ID prevents restarts with a pattern like: read("a"),
// write("a"). The read adds a timestamp for "a". Then the write (for
// the same transaction) would get that as the max timestamp and be
// forced to increment it. This allows timestamps from the same txn
// to be ignored.
func (tc *TimestampCache) GetMax(start, end roachpb.Key, txnID []byte) (roachpb.Timestamp, roachpb.Timestamp) {
	if len(end) == 0 {
		end = start.Next()
	}
	maxR := tc.lowWater
	maxW := tc.lowWater
	for _, o := range tc.cache.GetOverlaps(start, end) {
		ce := o.Value.(*cacheValue)
		if ce.txnID == nil || txnID == nil || !roachpb.TxnIDEqual(txnID, ce.txnID) {
			if ce.readOnly && maxR.Less(ce.timestamp) {
				maxR = ce.timestamp
			} else if !ce.readOnly && maxW.Less(ce.timestamp) {
				maxW = ce.timestamp
			}
		}
	}
	return maxR, maxW
}
示例#6
0
func (tc *TimestampCache) getMax(start, end roachpb.Key, txnID *uuid.UUID, readOnly bool) roachpb.Timestamp {
	if len(end) == 0 {
		end = start.Next()
	}
	max := tc.lowWater
	cache := tc.wCache
	if readOnly {
		cache = tc.rCache
	}
	for _, o := range cache.GetOverlaps(start, end) {
		ce := o.Value.(*cacheValue)
		if ce.txnID == nil || txnID == nil || !roachpb.TxnIDEqual(txnID, ce.txnID) {
			if max.Less(ce.timestamp) {
				max = ce.timestamp
			}
		}
	}
	return max
}
示例#7
0
// MetaScanBounds returns the range [start,end) within which the desired meta
// record can be found by means of an engine scan. The given key must be a
// valid RangeMetaKey as defined by validateRangeMetaKey.
func MetaScanBounds(key roachpb.Key) (roachpb.Key, roachpb.Key, error) {
	if err := validateRangeMetaKey(key); err != nil {
		return nil, nil, err
	}

	if key.Equal(Meta2KeyMax) {
		return nil, nil, NewInvalidRangeMetaKeyError("Meta2KeyMax can't be used as the key of scan", key)
	}

	if key.Equal(roachpb.KeyMin) {
		// Special case KeyMin: find the first entry in meta1.
		return Meta1Prefix, Meta1Prefix.PrefixEnd(), nil
	}
	if key.Equal(Meta1KeyMax) {
		// Special case Meta1KeyMax: this is the last key in Meta1, we don't want
		// to start at Next().
		return key, Meta1Prefix.PrefixEnd(), nil
	}
	// Otherwise find the first entry greater than the given key in the same meta prefix.
	return key.Next(), roachpb.Key(key[:len(Meta1Prefix)]).PrefixEnd(), nil
}
示例#8
0
func (tc *timestampCache) getMax(start, end roachpb.Key, txnID *uuid.UUID, readTSCache bool) (hlc.Timestamp, bool) {
	if len(end) == 0 {
		end = start.Next()
	}
	var ok bool
	max := tc.lowWater
	cache := tc.wCache
	if readTSCache {
		cache = tc.rCache
	}
	for _, o := range cache.GetOverlaps(start, end) {
		ce := o.Value.(*cacheValue)
		if ce.txnID == nil || txnID == nil || !roachpb.TxnIDEqual(txnID, ce.txnID) {
			if max.Less(ce.timestamp) {
				ok = true
				max = ce.timestamp
			}
		}
	}
	return max, ok
}
示例#9
0
// addKeyRange adds the specified key range to the interval cache,
// taking care not to add this range if existing entries already
// completely cover the range.
func (tm *txnMetadata) addKeyRange(start, end roachpb.Key) {
	// This gives us a memory-efficient end key if end is empty.
	// The most common case for keys in the intents interval map
	// is for single keys. However, the interval cache requires
	// a non-empty interval, so we create two key slices which
	// share the same underlying byte array.
	if len(end) == 0 {
		end = start.Next()
		start = end[:len(start)]
	}
	key := tm.keys.NewKey(start, end)
	for _, o := range tm.keys.GetOverlaps(start, end) {
		if o.Key.Contains(key) {
			return
		} else if key.Contains(o.Key) {
			tm.keys.Del(o.Key)
		}
	}

	// Since no existing key range fully covered this range, add it now.
	tm.keys.Add(key, nil)
}
示例#10
0
// MetaReverseScanBounds returns the range [start,end) within which the desired
// meta record can be found by means of a reverse engine scan. The given key
// must be a valid RangeMetaKey as defined by validateRangeMetaKey.
func MetaReverseScanBounds(key roachpb.Key) (roachpb.Key, roachpb.Key, error) {
	if err := validateRangeMetaKey(key); err != nil {
		return nil, nil, err
	}

	if key.Equal(roachpb.KeyMin) || key.Equal(Meta1Prefix) {
		return nil, nil, NewInvalidRangeMetaKeyError("KeyMin and Meta1Prefix can't be used as the key of reverse scan", key)
	}
	if key.Equal(Meta2Prefix) {
		// Special case Meta2Prefix: this is the first key in Meta2, and the scan
		// interval covers all of Meta1.
		return Meta1Prefix, key.Next(), nil
	}
	// Otherwise find the first entry greater than the given key and find the last entry
	// in the same prefix. For MVCCReverseScan the endKey is exclusive, if we want to find
	// the range descriptor the given key specified,we need to set the key.Next() as the
	// MVCCReverseScan`s endKey. For example:
	// If we have ranges [a,f) and [f,z), then we'll have corresponding meta records
	// at f and z. If you're looking for the meta record for key f, then you want the
	// second record (exclusive in MVCCReverseScan), hence key.Next() below.
	return key[:len(Meta1Prefix)], key.Next(), nil
}
示例#11
0
// Add the specified timestamp to the cache as covering the range of
// keys from start to end. If end is nil, the range covers the start
// key only. txnID is nil for no transaction. readTSCache specifies
// whether the command adding this timestamp should update the read
// timestamp; false to update the write timestamp cache.
func (tc *TimestampCache) Add(start, end roachpb.Key, timestamp roachpb.Timestamp, txnID *uuid.UUID, readTSCache bool) {
	// This gives us a memory-efficient end key if end is empty.
	if len(end) == 0 {
		end = start.Next()
		start = end[:len(start)]
	}
	if tc.latest.Less(timestamp) {
		tc.latest = timestamp
	}
	// Only add to the cache if the timestamp is more recent than the
	// low water mark.
	if tc.lowWater.Less(timestamp) {
		cache := tc.wCache
		if readTSCache {
			cache = tc.rCache
		}

		addRange := func(r interval.Range) {
			value := cacheValue{timestamp: timestamp, txnID: txnID}
			key := cache.MakeKey(r.Start, r.End)
			entry := makeCacheEntry(key, value)
			cache.AddEntry(entry)
		}
		r := interval.Range{
			Start: interval.Comparable(start),
			End:   interval.Comparable(end),
		}

		// Check existing, overlapping entries and truncate/split/remove if
		// superseded and in the past. If existing entries are in the future,
		// subtract from the range/ranges that need to be added to cache.
		for _, o := range cache.GetOverlaps(r.Start, r.End) {
			cv := o.Value.(*cacheValue)
			sCmp := r.Start.Compare(o.Key.Start)
			eCmp := r.End.Compare(o.Key.End)
			if !timestamp.Less(cv.timestamp) {
				// The existing interval has a timestamp less than or equal to the new interval.
				// Compare interval ranges to determine how to modify existing interval.
				switch {
				case sCmp == 0 && eCmp == 0:
					// New and old are equal; replace old with new and avoid the need to insert new.
					//
					// New: ------------
					// Old: ------------
					//
					// New: ------------
					*cv = cacheValue{timestamp: timestamp, txnID: txnID}
					cache.MoveToEnd(o.Entry)
					return
				case sCmp <= 0 && eCmp >= 0:
					// New contains or is equal to old; delete old.
					//
					// New: ------------      ------------      ------------
					// Old:   --------    or    ----------  or  ----------
					//
					// Old:
					cache.DelEntry(o.Entry)
				case sCmp > 0 && eCmp < 0:
					// Old contains new; split up old into two.
					//
					// New:     ----
					// Old: ------------
					//
					// Old: ----    ----
					oldEnd := o.Key.End
					o.Key.End = r.Start

					key := cache.MakeKey(r.End, oldEnd)
					entry := makeCacheEntry(key, *cv)
					cache.AddEntryAfter(entry, o.Entry)
				case eCmp >= 0:
					// Left partial overlap; truncate old end.
					//
					// New:     --------          --------
					// Old: --------      or  ------------
					//
					// Old: ----              ----
					o.Key.End = r.Start
				case sCmp <= 0:
					// Right partial overlap; truncate old start.
					//
					// New: --------          --------
					// Old:     --------  or  ------------
					//
					// Old:         ----              ----
					o.Key.Start = r.End
				default:
					panic(fmt.Sprintf("no overlap between %v and %v", o.Key.Range, r))
				}
			} else {
				// The existing interval has a timestamp greater than the new interval.
				// Compare interval ranges to determine how to modify new interval before
				// adding it to the timestamp cache.
				switch {
				case sCmp >= 0 && eCmp <= 0:
					// Old contains or is equal to new; no need to add.
					//
					// Old: -----------      -----------      -----------      -----------
					// New:    -----     or  -----------  or  --------     or     --------
					//
					// New:
					return
				case sCmp < 0 && eCmp > 0:
					// New contains old; split up old into two. We can add the left piece
					// immediately because it is guaranteed to be before the rest of the
					// overlaps.
					//
					// Old:    ------
					// New: ------------
					//
					// New: ---      ---
					lr := interval.Range{Start: r.Start, End: o.Key.Start}
					addRange(lr)

					r.Start = o.Key.End
				case eCmp > 0:
					// Left partial overlap; truncate new start.
					//
					// Old: --------          --------
					// New:     --------  or  ------------
					//
					// New:         ----              ----
					r.Start = o.Key.End
				case sCmp < 0:
					// Right partial overlap; truncate new end.
					//
					// Old:     --------          --------
					// New: --------      or  ------------
					//
					// New: ----              ----
					r.End = o.Key.Start
				default:
					panic(fmt.Sprintf("no overlap between %v and %v", o.Key.Range, r))
				}
			}
		}
		addRange(r)
	}
}
示例#12
0
// fetch retrieves spans from the kv
func (f *kvFetcher) fetch() error {
	batchSize := f.getBatchSize()

	b := &client.Batch{}
	b.Header.MaxScanResults = batchSize

	var resumeKey roachpb.Key
	if len(f.kvs) > 0 {
		resumeKey = f.kvs[len(f.kvs)-1].Key
		// To resume forward scans we will set the (inclusive) scan start to the Next of the last
		// received key. To resume reverse scans we will set the (exclusive) scan end to the last
		// received key.
		if !f.reverse {
			resumeKey = resumeKey.Next()
		}
	}

	atEnd := true
	if !f.reverse {
		for i := 0; i < len(f.spans); i++ {
			start := f.spans[i].Start
			if resumeKey != nil {
				if resumeKey.Compare(f.spans[i].End) >= 0 {
					// We are resuming from a key after this span.
					continue
				}
				if resumeKey.Compare(start) > 0 {
					// We are resuming from a key inside this span.
					// In this case we should technically reduce the max count for the span; but
					// since this count is only an optimization it's not incorrect to retrieve more
					// keys for the span.
					start = resumeKey
				}
			}
			atEnd = false
			b.Scan(start, f.spans[i].End, f.spans[i].Count)
		}
	} else {
		for i := len(f.spans) - 1; i >= 0; i-- {
			end := f.spans[i].End
			if resumeKey != nil {
				if resumeKey.Compare(f.spans[i].Start) <= 0 {
					// We are resuming from a key before this span.
					continue
				}
				if resumeKey.Compare(end) < 0 {
					// We are resuming from a key inside this span.
					// In this case we should technically reduce the max count for the span; but
					// since this count is only an optimization it's not incorrect to retrieve more
					// keys for the span.
					end = resumeKey
				}
			}
			atEnd = false
			b.ReverseScan(f.spans[i].Start, end, f.spans[i].Count)
		}
	}

	if atEnd {
		// The last scan happened to finish just at the end of the last span.
		f.kvs = nil
		f.fetchEnd = true
		return nil
	}

	if err := f.txn.Run(b); err != nil {
		return err
	}

	if f.kvs == nil {
		numResults := 0
		for _, result := range b.Results {
			numResults += len(result.Rows)
		}
		f.kvs = make([]client.KeyValue, 0, numResults)
	} else {
		f.kvs = f.kvs[:0]
	}

	for _, result := range b.Results {
		f.kvs = append(f.kvs, result.Rows...)
	}

	f.batchIdx++
	f.totalFetched += int64(len(f.kvs))
	f.kvIndex = 0

	if int64(len(f.kvs)) < batchSize {
		f.fetchEnd = true
	}

	// TODO(radu): We should fetch the next chunk in the background instead of waiting for the next
	// call to fetch(). We can use a pool of workers to issue the KV ops which will also limit the
	// total number of fetches that happen in parallel (and thus the amount of resources we use).
	return nil
}