Пример #1
0
// getDescriptors looks up the range descriptor to use for a query over the
// key range [from,to), with the given lookupOptions. The range descriptor
// which contains the range in which the request should start its query is
// returned first; the returned bool is true in case the given range reaches
// outside the first descriptor.
// In case either of the descriptors is discovered stale, the returned closure
// should be called; it evicts the cache appropriately.
// Note that `from` and `to` are not necessarily Key and EndKey from a
// RequestHeader; it's assumed that they've been translated to key addresses
// already (via KeyAddress).
func (ds *DistSender) getDescriptors(from, to roachpb.Key, options lookupOptions) (*roachpb.RangeDescriptor, bool, func(), *roachpb.Error) {
	var desc *roachpb.RangeDescriptor
	var err error
	var descKey roachpb.Key
	if !options.useReverseScan {
		descKey = from
	} else {
		descKey = to
	}
	desc, err = ds.rangeCache.LookupRangeDescriptor(descKey, options)

	if err != nil {
		return nil, false, nil, roachpb.NewError(err)
	}

	// Checks whether need to get next range descriptor. If so, returns true.
	needAnother := func(desc *roachpb.RangeDescriptor, isReverse bool) bool {
		if isReverse {
			return from.Less(desc.StartKey)
		}
		return desc.EndKey.Less(to)
	}

	evict := func() {
		ds.rangeCache.EvictCachedRangeDescriptor(descKey, desc, options.useReverseScan)
	}

	return desc, needAnother(desc, options.useReverseScan), evict, nil
}
Пример #2
0
// Add the specified timestamp to the cache as covering the range of
// keys from start to end. If end is nil, the range covers the start
// key only. txnID is nil for no transaction. readOnly specifies
// whether the command adding this timestamp was read-only or not.
func (tc *TimestampCache) Add(start, end roachpb.Key, timestamp roachpb.Timestamp, txnID []byte, readOnly bool) {
	// This gives us a memory-efficient end key if end is empty.
	if len(end) == 0 {
		end = start.Next()
		start = end[:len(start)]
	}
	if tc.latest.Less(timestamp) {
		tc.latest = timestamp
	}
	// Only add to the cache if the timestamp is more recent than the
	// low water mark.
	if tc.lowWater.Less(timestamp) {
		// Check existing, overlapping entries. Remove superseded
		// entries or return without adding this entry if necessary.
		key := tc.cache.NewKey(start, end)
		for _, o := range tc.cache.GetOverlaps(start, end) {
			ce := o.Value.(cacheEntry)
			if ce.readOnly != readOnly {
				continue
			}
			if o.Key.Contains(key) && !ce.timestamp.Less(timestamp) {
				return // don't add this key; there's already a cache entry with >= timestamp.
			} else if key.Contains(o.Key) && !timestamp.Less(ce.timestamp) {
				tc.cache.Del(o.Key) // delete existing key; this cache entry supersedes.
			}
		}
		ce := cacheEntry{timestamp: timestamp, txnID: txnID, readOnly: readOnly}
		tc.cache.Add(key, ce)
	}
}
Пример #3
0
// clearOverlappingCachedRangeDescriptors looks up and clears any
// cache entries which overlap the specified key or descriptor.
func (rdc *rangeDescriptorCache) clearOverlappingCachedRangeDescriptors(key, metaKey roachpb.Key, desc *roachpb.RangeDescriptor) {
	if desc.StartKey.Equal(desc.EndKey) { // True for some unittests.
		return
	}
	// Clear out any descriptors which subsume the key which we're going
	// to cache. For example, if an existing KeyMin->KeyMax descriptor
	// should be cleared out in favor of a KeyMin->"m" descriptor.
	k, v, ok := rdc.rangeCache.Ceil(rangeCacheKey(metaKey))
	if ok {
		descriptor := v.(*roachpb.RangeDescriptor)
		if !key.Less(descriptor.StartKey) && !descriptor.EndKey.Less(key) {
			if log.V(1) {
				log.Infof("clearing overlapping descriptor: key=%s desc=%s", k, descriptor)
			}
			rdc.rangeCache.Del(k.(rangeCacheKey))
		}
	}
	// Also clear any descriptors which are subsumed by the one we're
	// going to cache. This could happen on a merge (and also happens
	// when there's a lot of concurrency). Iterate from the range meta key
	// after RangeMetaKey(desc.StartKey) to the range meta key for desc.EndKey.
	rdc.rangeCache.DoRange(func(k, v interface{}) {
		if log.V(1) {
			log.Infof("clearing subsumed descriptor: key=%s desc=%s", k, v.(*roachpb.RangeDescriptor))
		}
		rdc.rangeCache.Del(k.(rangeCacheKey))
	}, rangeCacheKey(keys.RangeMetaKey(desc.StartKey).Next()),
		rangeCacheKey(keys.RangeMetaKey(desc.EndKey)))
}
Пример #4
0
// prev gives the right boundary of the union of all requests which don't
// affect keys larger than the given key.
// TODO(tschottdorf): again, better on BatchRequest itself, but can't pull
// 'keys' into 'proto'.
func prev(ba roachpb.BatchRequest, k roachpb.Key) roachpb.Key {
	candidate := roachpb.KeyMin
	for _, union := range ba.Requests {
		h := union.GetInner().Header()
		addr := keys.KeyAddress(h.Key)
		eAddr := keys.KeyAddress(h.EndKey)
		if len(eAddr) == 0 {
			// Can probably avoid having to compute Next() here if
			// we're in the mood for some more complexity.
			eAddr = addr.Next()
		}
		if !eAddr.Less(k) {
			if !k.Less(addr) {
				// Range contains k, so won't be able to go lower.
				return k
			}
			// Range is disjoint from [KeyMin,k).
			continue
		}
		// We want the largest surviving candidate.
		if candidate.Less(addr) {
			candidate = addr
		}
	}
	return candidate
}
Пример #5
0
// prettyPrintInternal parse key with prefix in keyDict,
// if the key don't march any prefix in keyDict, return its byte value with quotation and false,
// or else return its human readable value and true.
func prettyPrintInternal(key roachpb.Key) (string, bool) {
	var buf bytes.Buffer
	for _, k := range keyDict {
		if key.Compare(k.start) >= 0 && (k.end == nil || key.Compare(k.end) <= 0) {
			buf.WriteString(k.name)
			if k.end != nil && k.end.Compare(key) == 0 {
				buf.WriteString("/Max")
				return buf.String(), true
			}

			hasPrefix := false
			for _, e := range k.entries {
				if bytes.HasPrefix(key, e.prefix) {
					hasPrefix = true
					key = key[len(e.prefix):]
					fmt.Fprintf(&buf, "%s%s", e.name, e.ppFunc(key))
					break
				}
			}
			if !hasPrefix {
				key = key[len(k.start):]
				fmt.Fprintf(&buf, "/%q", []byte(key))
			}

			return buf.String(), true
		}
	}

	return fmt.Sprintf("%q", []byte(key)), false
}
Пример #6
0
// addKeyRange adds the specified key range to the interval cache,
// taking care not to add this range if existing entries already
// completely cover the range.
func (tm *txnMetadata) addKeyRange(start, end roachpb.Key) {
	// This gives us a memory-efficient end key if end is empty.
	// The most common case for keys in the intents interval map
	// is for single keys. However, the interval cache requires
	// a non-empty interval, so we create two key slices which
	// share the same underlying byte array.
	if len(end) == 0 {
		end = start.Next()
		start = end[:len(start)]
	}
	key := tm.keys.MakeKey(start, end)
	for _, o := range tm.keys.GetOverlaps(key.Start, key.End) {
		if o.Key.Contains(key) {
			return
		} else if key.Contains(*o.Key) {
			tm.keys.Del(o.Key)
		}
	}

	// Since no existing key range fully covered this range, add it now. The
	// strange assignment to pkey makes sure we delay the heap allocation until
	// we know it is necessary.
	alloc := struct {
		key   cache.IntervalKey
		entry cache.Entry
	}{key: key}
	alloc.entry.Key = &alloc.key
	tm.keys.AddEntry(&alloc.entry)
}
Пример #7
0
// Encodes datum at the end of key, using direction `dir` for the encoding.
// The key is a span end key, which is exclusive, but `val` needs to
// be inclusive. So if datum is the last end constraint, we transform it accordingly.
func encodeInclusiveEndValue(
	key roachpb.Key, datum parser.Datum, dir encoding.Direction,
	isLastEndConstraint bool) roachpb.Key {
	// Since the end of a span is exclusive, if the last constraint is an
	// inclusive one, we might need to make the key exclusive by applying a
	// PrefixEnd().  We normally avoid doing this by transforming "a = x" to
	// "a = x±1" for the last end constraint, depending on the encoding direction
	// (since this keeps the key nice and pretty-printable).
	// However, we might not be able to do the ±1.
	needExclusiveKey := false
	if isLastEndConstraint {
		if dir == encoding.Ascending {
			if datum.IsMax() {
				needExclusiveKey = true
			} else {
				datum = datum.Next()
			}
		} else {
			if datum.IsMin() || !datum.HasPrev() {
				needExclusiveKey = true
			} else {
				datum = datum.Prev()
			}
		}
	}
	key, pErr := encodeTableKey(key, datum, dir)
	if pErr != nil {
		panic(pErr)
	}
	if needExclusiveKey {
		key = key.PrefixEnd()
	}
	return key
}
Пример #8
0
// GetIndex searches the kv list for 'key' and returns its index if found.
func (s SystemConfig) GetIndex(key roachpb.Key) (int, bool) {
	l := len(s.Values)
	index := sort.Search(l, func(i int) bool {
		return bytes.Compare(s.Values[i].Key, key) >= 0
	})
	if index == l || !key.Equal(s.Values[index].Key) {
		return 0, false
	}
	return index, true
}
Пример #9
0
// prettyKey pretty-prints the specified key, skipping over the first `skip`
// fields. The pretty printed key looks like:
//
//   /Table/<tableID>/<indexID>/...
//
// We always strip off the /Table prefix and then `skip` more fields. Note that
// this assumes that the fields themselves do not contain '/', but that is
// currently true for the fields we care about stripping (the table and index
// ID).
func prettyKey(key roachpb.Key, skip int) string {
	p := key.String()
	for i := 0; i <= skip; i++ {
		n := strings.IndexByte(p[1:], '/')
		if n == -1 {
			return ""
		}
		p = p[n+1:]
	}
	return p
}
Пример #10
0
// ComputeSplitKeys takes a start and end key and returns an array of keys
// at which to split the span [start, end).
// The only required splits are at each user table prefix.
func (s *SystemConfig) ComputeSplitKeys(startKey, endKey roachpb.Key) []roachpb.Key {
	if TestingDisableTableSplits {
		return nil
	}

	tableStart := roachpb.Key(keys.UserTableDataMin)
	if !tableStart.Less(endKey) {
		// This range is before the user tables span: no required splits.
		return nil
	}

	startID, ok := ObjectIDForKey(startKey)
	if !ok || startID <= keys.MaxReservedDescID {
		// The start key is either:
		// - not part of the structured data span
		// - part of the system span
		// In either case, start looking for splits at the first ID usable
		// by the user data span.
		startID = keys.MaxReservedDescID + 1
	} else {
		// The start key is either already a split key, or after the split
		// key for its ID. We can skip straight to the next one.
		startID++
	}

	// Find the largest object ID.
	// We can't keep splitting until we reach endKey as it could be roachpb.KeyMax.
	endID, err := s.GetLargestObjectID()
	if err != nil {
		log.Errorf("unable to determine largest object ID from system config: %s", err)
		return nil
	}

	// Build key prefixes for sequential table IDs until we reach endKey.
	var splitKeys roachpb.KeySlice
	var key roachpb.Key
	// endID could be smaller than startID if we don't have user tables.
	for id := startID; id <= endID; id++ {
		key = keys.MakeTablePrefix(id)
		// Skip if the range starts on a split key.
		if !startKey.Less(key) {
			continue
		}
		// Handle the case where EndKey is already a table prefix.
		if !key.Less(endKey) {
			break
		}
		splitKeys = append(splitKeys, key)
	}

	return splitKeys
}
Пример #11
0
// GetIndex searches the kv list for 'key' and returns its index if found.
func (s *SystemConfig) GetIndex(key roachpb.Key) (int, bool) {
	if s == nil {
		return 0, false
	}

	l := len(s.Values)
	index := sort.Search(l, func(i int) bool {
		return !s.Values[i].Key.Less(key)
	})
	if index == l || !key.Equal(s.Values[index].Key) {
		return 0, false
	}
	return index, true
}
// verifyBinarySearchTree checks to ensure that all keys to the left of the root
// node are less than it, and all nodes to the right of the root node are
// greater than it. It recursively walks the tree to perform this same check.
func verifyBinarySearchTree(t *testing.T, nodes map[string]roachpb.RangeTreeNode, testName string, node *roachpb.RangeTreeNode, keyMin, keyMax roachpb.Key) {
	if node == nil {
		return
	}
	if !node.Key.Less(keyMax) {
		t.Errorf("%s: Failed Property BST - The key %s is not less than %s.", testName, node.Key, keyMax)
	}
	// We need the extra check since roachpb.KeyMin is actually a range start key.
	if !keyMin.Less(node.Key) && !node.Key.Equal(roachpb.KeyMin) {
		t.Errorf("%s: Failed Property BST - The key %s is not greater than %s.", testName, node.Key, keyMin)
	}
	left, right := getLeftAndRight(t, nodes, testName, node)
	verifyBinarySearchTree(t, nodes, testName, left, keyMin, node.Key)
	verifyBinarySearchTree(t, nodes, testName, right, node.Key, keyMax)
}
Пример #13
0
// addKeyRange adds the specified key range to the range group,
// taking care not to add this range if existing entries already
// completely cover the range.
func addKeyRange(keys interval.RangeGroup, start, end roachpb.Key) {
	// This gives us a memory-efficient end key if end is empty.
	// The most common case for keys in the intents interval map
	// is for single keys. However, the range group requires
	// a non-empty interval, so we create two key slices which
	// share the same underlying byte array.
	if len(end) == 0 {
		end = start.Next()
		start = end[:len(start)]
	}
	keyR := interval.Range{
		Start: interval.Comparable(start),
		End:   interval.Comparable(end),
	}
	keys.Add(keyR)
}
Пример #14
0
// TODO(dt): Batch checks of many rows.
func (f baseFKHelper) check(values parser.DTuple) (parser.DTuple, error) {
	var key roachpb.Key
	if values != nil {
		keyBytes, _, err := sqlbase.EncodeIndexKey(f.searchIdx, f.ids, values, f.searchPrefix)
		if err != nil {
			return nil, err
		}
		key = roachpb.Key(keyBytes)
	} else {
		key = roachpb.Key(f.searchPrefix)
	}
	spans := sqlbase.Spans{sqlbase.Span{Start: key, End: key.PrefixEnd()}}
	if err := f.rf.StartScan(f.txn, spans, 1); err != nil {
		return nil, err
	}
	return f.rf.NextRow()
}
Пример #15
0
// GetMax returns the maximum read and write timestamps which overlap
// the interval spanning from start to end. Cached timestamps matching
// the specified txnID are not considered. If no part of the specified
// range is overlapped by timestamps in the cache, the low water
// timestamp is returned for both read and write timestamps.
//
// The txn ID prevents restarts with a pattern like: read("a"),
// write("a"). The read adds a timestamp for "a". Then the write (for
// the same transaction) would get that as the max timestamp and be
// forced to increment it. This allows timestamps from the same txn
// to be ignored.
func (tc *TimestampCache) GetMax(start, end roachpb.Key, txnID []byte) (roachpb.Timestamp, roachpb.Timestamp) {
	if len(end) == 0 {
		end = start.Next()
	}
	maxR := tc.lowWater
	maxW := tc.lowWater
	for _, o := range tc.cache.GetOverlaps(start, end) {
		ce := o.Value.(*cacheValue)
		if ce.txnID == nil || txnID == nil || !roachpb.TxnIDEqual(txnID, ce.txnID) {
			if ce.readOnly && maxR.Less(ce.timestamp) {
				maxR = ce.timestamp
			} else if !ce.readOnly && maxW.Less(ce.timestamp) {
				maxW = ce.timestamp
			}
		}
	}
	return maxR, maxW
}
Пример #16
0
// ObjectIDForKey returns the object ID (table or database) for 'key',
// or (_, false) if not within the structured key space.
func ObjectIDForKey(key roachpb.Key) (uint32, bool) {
	if key.Equal(roachpb.KeyMax) {
		return 0, false
	}
	if key.Equal(keys.TableDataPrefix) {
		// TODO(marc): this should eventually return SystemDatabaseID.
		return 0, false
	}
	remaining := bytes.TrimPrefix(key, keys.TableDataPrefix)
	if len(remaining) == len(key) {
		// TrimPrefix returns the input untouched if the prefix doesn't match.
		return 0, false
	}

	// Consume first encoded int.
	_, id64, err := encoding.DecodeUvarint(remaining)
	return uint32(id64), err == nil
}
Пример #17
0
func (tc *TimestampCache) getMax(start, end roachpb.Key, txnID *uuid.UUID, readTSCache bool) roachpb.Timestamp {
	if len(end) == 0 {
		end = start.ShallowNext()
	}
	max := tc.lowWater
	cache := tc.wCache
	if readTSCache {
		cache = tc.rCache
	}
	for _, o := range cache.GetOverlaps(start, end) {
		ce := o.Value.(*cacheValue)
		if ce.txnID == nil || txnID == nil || !roachpb.TxnIDEqual(txnID, ce.txnID) {
			if max.Less(ce.timestamp) {
				max = ce.timestamp
			}
		}
	}
	return max
}
Пример #18
0
// next gives the left boundary of the union of all requests which don't
// affect keys less than the given key.
// TODO(tschottdorf): again, better on BatchRequest itself, but can't pull
// 'keys' into 'proto'.
func next(ba roachpb.BatchRequest, k roachpb.Key) roachpb.Key {
	candidate := roachpb.KeyMax
	for _, union := range ba.Requests {
		h := union.GetInner().Header()
		addr := keys.KeyAddress(h.Key)
		if addr.Less(k) {
			if eAddr := keys.KeyAddress(h.EndKey); k.Less(eAddr) {
				// Starts below k, but continues beyond. Need to stay at k.
				return k
			}
			// Affects only [KeyMin,k).
			continue
		}
		// We want the smallest of the surviving candidates.
		if addr.Less(candidate) {
			candidate = addr
		}
	}
	return candidate
}
Пример #19
0
// PrettyPrint prints the key in a human readable format:
//
// Key's Format                                   Key's Value
// /Local/...                                     "\x01"+...
// 		/Store/...                                  "\x01s"+...
//		/RangeID/...                                "\x01s"+[rangeid]
//			/[rangeid]/AbortCache/[id]                "\x01s"+[rangeid]+"abc-"+[id]
//			/[rangeid]/RaftLeaderLease                "\x01s"+[rangeid]+"rfll"
//			/[rangeid]/RaftTombstone                  "\x01s"+[rangeid]+"rftb"
//			/[rangeid]/RaftHardState						      "\x01s"+[rangeid]+"rfth"
//			/[rangeid]/RaftAppliedIndex						    "\x01s"+[rangeid]+"rfta"
//			/[rangeid]/RaftLog/logIndex:[logIndex]    "\x01s"+[rangeid]+"rftl"+[logIndex]
//			/[rangeid]/RaftTruncatedState             "\x01s"+[rangeid]+"rftt"
//			/[rangeid]/RaftLastIndex                  "\x01s"+[rangeid]+"rfti"
//			/[rangeid]/RangeLastReplicaGCTimestamp    "\x01s"+[rangeid]+"rlrt"
//			/[rangeid]/RangeLastVerificationTimestamp "\x01s"+[rangeid]+"rlvt"
//			/[rangeid]/RangeStats                     "\x01s"+[rangeid]+"stat"
//		/Range/...                                  "\x01k"+...
//			/RangeDescriptor/[key]                    "\x01k"+[key]+"rdsc"
//			/RangeTreeNode/[key]                      "\x01k"+[key]+"rtn-"
//			/Transaction/addrKey:[key]/id:[id]				"\x01k"+[key]+"txn-"+[id]
// /Local/Max                                     "\x02"
//
// /Meta1/[key]                                   "\x02"+[key]
// /Meta2/[key]                                   "\x03"+[key]
// /System/...                                    "\x04"
//		/StatusNode/[key]                           "\x04status-node-"+[key]
// /System/Max                                    "\x05"
//
// /Table/[key]                                   [key]
//
// /Min                                           ""
// /Max                                           "\xff\xff"
func PrettyPrint(key roachpb.Key) string {
	for _, k := range constKeyDict {
		if key.Equal(k.value) {
			return k.name
		}
	}

	for _, k := range keyOfKeyDict {
		if bytes.HasPrefix(key, k.prefix) {
			key = key[len(k.prefix):]
			str, formatted := prettyPrintInternal(key)
			if formatted {
				return k.name + str
			}
			return k.name + "/" + str
		}
	}
	str, _ := prettyPrintInternal(key)
	return str
}
Пример #20
0
// validateRangeMetaKey validates that the given key is a valid Range Metadata
// key. This checks only the constraints common to forward and backwards scans:
// correct prefix and not exceeding KeyMax.
func validateRangeMetaKey(key roachpb.Key) error {
	// KeyMin is a valid key.
	if key.Equal(roachpb.KeyMin) {
		return nil
	}
	// Key must be at least as long as Meta1Prefix.
	if len(key) < len(Meta1Prefix) {
		return NewInvalidRangeMetaKeyError("too short", key)
	}

	prefix, body := roachpb.Key(key[:len(Meta1Prefix)]), roachpb.Key(key[len(Meta1Prefix):])
	if !prefix.Equal(Meta2Prefix) && !prefix.Equal(Meta1Prefix) {
		return NewInvalidRangeMetaKeyError("not a meta key", key)
	}

	if roachpb.KeyMax.Less(body) {
		return NewInvalidRangeMetaKeyError("body of meta key range lookup is > KeyMax", key)
	}
	return nil
}
Пример #21
0
// getCachedRangeDescriptorLocked is a helper function to retrieve the
// descriptor of the range which contains the given key, if present in the
// cache. It is assumed that the caller holds a read lock on rdc.rangeCacheMu.
func (rdc *rangeDescriptorCache) getCachedRangeDescriptorLocked(key roachpb.Key, inclusive bool) (
	rangeCacheKey, *roachpb.RangeDescriptor) {
	// The cache is indexed using the end-key of the range, but the
	// end-key is non-inclusive by default.
	var metaKey roachpb.Key
	if !inclusive {
		metaKey = keys.RangeMetaKey(key.Next())
	} else {
		metaKey = keys.RangeMetaKey(key)
	}

	k, v, ok := rdc.rangeCache.Ceil(rangeCacheKey(metaKey))
	if !ok {
		return nil, nil
	}
	metaEndKey := k.(rangeCacheKey)
	rd := v.(*roachpb.RangeDescriptor)

	// Check that key actually belongs to the range.
	if !rd.ContainsKey(key) {
		// The key is the EndKey and we're inclusive, so just return the range descriptor.
		if inclusive && key.Equal(rd.EndKey) {
			return metaEndKey, rd
		}
		return nil, nil
	}

	// The key is the StartKey, but we're inclusive and thus need to return the
	// previous range descriptor, but it is not in the cache yet.
	if inclusive && key.Equal(rd.StartKey) {
		return nil, nil
	}
	return metaEndKey, rd
}
Пример #22
0
// MetaReverseScanBounds returns the range [start,end) within which the desired
// meta record can be found by means of a reverse engine scan. The given key
// must be a valid RangeMetaKey as defined by validateRangeMetaKey.
func MetaReverseScanBounds(key roachpb.Key) (roachpb.Key, roachpb.Key, error) {
	if err := validateRangeMetaKey(key); err != nil {
		return nil, nil, err
	}

	if key.Equal(roachpb.KeyMin) || key.Equal(Meta1Prefix) {
		return nil, nil, NewInvalidRangeMetaKeyError("KeyMin and Meta1Prefix can't be used as the key of reverse scan", key)
	}
	if key.Equal(Meta2Prefix) {
		// Special case Meta2Prefix: this is the first key in Meta2, and the scan
		// interval covers all of Meta1.
		return Meta1Prefix, key.Next(), nil
	}
	// Otherwise find the first entry greater than the given key and find the last entry
	// in the same prefix. For MVCCReverseScan the endKey is exclusive, if we want to find
	// the range descriptor the given key specified,we need to set the key.Next() as the
	// MVCCReverseScan`s endKey. For example:
	// If we have ranges [a,f) and [f,z), then we'll have corresponding meta records
	// at f and z. If you're looking for the meta record for key f, then you want the
	// second record (exclusive in MVCCReverseScan), hence key.Next() below.
	return key[:len(Meta1Prefix)], key.Next(), nil
}
Пример #23
0
// addKeyRange adds the specified key range to the interval cache,
// taking care not to add this range if existing entries already
// completely cover the range.
func (tm *txnMetadata) addKeyRange(start, end roachpb.Key) {
	// This gives us a memory-efficient end key if end is empty.
	// The most common case for keys in the intents interval map
	// is for single keys. However, the interval cache requires
	// a non-empty interval, so we create two key slices which
	// share the same underlying byte array.
	if len(end) == 0 {
		end = start.Next()
		start = end[:len(start)]
	}
	key := tm.keys.NewKey(start, end)
	for _, o := range tm.keys.GetOverlaps(start, end) {
		if o.Key.Contains(key) {
			return
		} else if key.Contains(o.Key) {
			tm.keys.Del(o.Key)
		}
	}

	// Since no existing key range fully covered this range, add it now.
	tm.keys.Add(key, nil)
}
Пример #24
0
// verifyBinarySearchTree checks to ensure that all keys to the left of the root
// node are less than it, and all nodes to the right of the root node are
// greater than it. It recursively walks the tree to perform this same check.
func verifyBinarySearchTree(t *testing.T, tc *treeContext, testName string, node *roachpb.RangeTreeNode, keyMin, keyMax roachpb.Key) {
	if !node.Key.Less(keyMax) {
		t.Errorf("%s: Failed Property BST - The key %s is not less than %s.", testName, node.Key, keyMax)
	}
	if !keyMin.Less(node.Key) {
		t.Errorf("%s: Failed Property BST - The key %s is not greater than %s.", testName, node.Key, keyMin)
	}

	if node.LeftKey != nil {
		left, err := tc.getNode(node.LeftKey)
		if err != nil {
			t.Fatal(err)
		}
		verifyBinarySearchTree(t, tc, testName, left, keyMin, node.Key)
	}
	if node.RightKey != nil {
		right, err := tc.getNode(node.RightKey)
		if err != nil {
			t.Fatal(err)
		}
		verifyBinarySearchTree(t, tc, testName, right, node.Key, keyMax)
	}
}
Пример #25
0
// PrettyPrint prints the key in a human readable format:
//
// Key's Format                                   Key's Value
// /Local/...                                     "\x01"+...
// 		/Store/...                                  "\x01s"+...
//		/RangeID/...                                "\x01s"+[rangeid]
//			/[rangeid]/SequenceCache/[id]/seq:[seq]   "\x01s"+[rangeid]+"res-"+[id]+[seq]
//			/[rangeid]/RaftLeaderLease                "\x01s"+[rangeid]+"rfll"
//			/[rangeid]/RaftTombstone                  "\x01s"+[rangeid]+"rftb"
//			/[rangeid]/RaftHardState						      "\x01s"+[rangeid]+"rfth"
//			/[rangeid]/RaftAppliedIndex						    "\x01s"+[rangeid]+"rfta"
//			/[rangeid]/RaftLog/logIndex:[logIndex]    "\x01s"+[rangeid]+"rftl"+[logIndex]
//			/[rangeid]/RaftTruncatedState             "\x01s"+[rangeid]+"rftt"
//			/[rangeid]/RaftLastIndex                  "\x01s"+[rangeid]+"rfti"
//			/[rangeid]/RangeLastVerificationTimestamp "\x01s"+[rangeid]+"rlvt"
//			/[rangeid]/RangeStats                     "\x01s"+[rangeid]+"stat"
//		/Range/...                                  "\x01k"+...
//			/RangeDescriptor/[key]                    "\x01k"+[key]+"rdsc"
//			/RangeTreeNode/[key]                      "\x01k"+[key]+"rtn-"
//			/Transaction/addrKey:[key]/id:[id]				"\x01k"+[key]+"txn-"+[id]
// /Local/Max                                     "\x02"
//
// /Meta1/[key]                                   "\x02"+[key]
// /Meta2/[key]                                   "\x03"+[key]
// /System/...                                    "\x04"
//		/StatusStore/[key]                          "\x04status-store-"+[key]
//		/StatusNode/[key]                           "\x04status-node-"+[key]
// /System/Max                                    "\x05"
//
// /Table/[key]                                   [key]
//
// /Min                                           ""
// /Max                                           "\xff\xff"
func PrettyPrint(key roachpb.Key) string {
	if bytes.Equal(key, MaxKey) {
		return "/Max"
	} else if bytes.Equal(key, MinKey) {
		return "/Min"
	}

	var buf bytes.Buffer
	for _, k := range keyDict {
		if key.Compare(k.start) >= 0 && (k.end == nil || key.Compare(k.end) <= 0) {
			fmt.Fprintf(&buf, "%s", k.name)
			if k.end != nil && k.end.Compare(key) == 0 {
				fmt.Fprintf(&buf, "/Max")
				return buf.String()
			}

			hasPrefix := false
			for _, e := range k.entries {
				if bytes.HasPrefix(key, e.prefix) {
					hasPrefix = true
					key = key[len(e.prefix):]

					fmt.Fprintf(&buf, "%s%s", e.name, e.ppFunc(key))
					break
				}
			}
			if !hasPrefix {
				key = key[len(k.start):]
				fmt.Fprintf(&buf, "/%q", []byte(key))
			}

			return buf.String()
		}
	}

	return fmt.Sprintf("%q", []byte(key))
}
Пример #26
0
// MetaScanBounds returns the range [start,end) within which the desired meta
// record can be found by means of an engine scan. The given key must be a
// valid RangeMetaKey as defined by validateRangeMetaKey.
func MetaScanBounds(key roachpb.Key) (roachpb.Key, roachpb.Key, error) {
	if err := validateRangeMetaKey(key); err != nil {
		return nil, nil, err
	}

	if key.Equal(Meta2KeyMax) {
		return nil, nil, NewInvalidRangeMetaKeyError("Meta2KeyMax can't be used as the key of scan", key)
	}

	if key.Equal(roachpb.KeyMin) {
		// Special case KeyMin: find the first entry in meta1.
		return Meta1Prefix, Meta1Prefix.PrefixEnd(), nil
	}
	if key.Equal(Meta1KeyMax) {
		// Special case Meta1KeyMax: this is the last key in Meta1, we don't want
		// to start at Next().
		return key, Meta1Prefix.PrefixEnd(), nil
	}
	// Otherwise find the first entry greater than the given key in the same meta prefix.
	return key.Next(), roachpb.Key(key[:len(Meta1Prefix)]).PrefixEnd(), nil
}
Пример #27
0
func (sc *SchemaChanger) truncateAndBackfillColumnsChunk(
	added []sqlbase.ColumnDescriptor,
	dropped []sqlbase.ColumnDescriptor,
	defaultExprs []parser.TypedExpr,
	evalCtx *parser.EvalContext,
	sp sqlbase.Span,
) (roachpb.Key, bool, error) {
	var curIndexKey roachpb.Key
	done := false
	err := sc.db.Txn(func(txn *client.Txn) error {
		tableDesc, err := getTableDescFromID(txn, sc.tableID)
		if err != nil {
			return err
		}
		// Short circuit the backfill if the table has been deleted.
		if tableDesc.Deleted() {
			done = true
			return nil
		}

		updateCols := append(added, dropped...)
		fkTables := TablesNeededForFKs(*tableDesc, CheckUpdates)
		for k := range fkTables {
			if fkTables[k], err = getTableDescFromID(txn, k); err != nil {
				return err
			}
		}
		// TODO(dan): Tighten up the bound on the requestedCols parameter to
		// makeRowUpdater.
		requestedCols := make([]sqlbase.ColumnDescriptor, 0, len(tableDesc.Columns)+len(added))
		requestedCols = append(requestedCols, tableDesc.Columns...)
		requestedCols = append(requestedCols, added...)
		ru, err := makeRowUpdater(
			txn, tableDesc, fkTables, updateCols, requestedCols, rowUpdaterOnlyColumns,
		)
		if err != nil {
			return err
		}

		// TODO(dan): This check is an unfortunate bleeding of the internals of
		// rowUpdater. Extract the sql row to k/v mapping logic out into something
		// usable here.
		if !ru.isColumnOnlyUpdate() {
			panic("only column data should be modified, but the rowUpdater is configured otherwise")
		}

		// Run a scan across the table using the primary key. Running
		// the scan and applying the changes in many transactions is
		// fine because the schema change is in the correct state to
		// handle intermediate OLTP commands which delete and add
		// values during the scan.
		var rf sqlbase.RowFetcher
		colIDtoRowIndex := colIDtoRowIndexFromCols(tableDesc.Columns)
		valNeededForCol := make([]bool, len(tableDesc.Columns))
		for i := range valNeededForCol {
			_, valNeededForCol[i] = ru.fetchColIDtoRowIndex[tableDesc.Columns[i].ID]
		}
		err = rf.Init(tableDesc, colIDtoRowIndex, &tableDesc.PrimaryIndex, false, false,
			tableDesc.Columns, valNeededForCol)
		if err != nil {
			return err
		}
		// StartScan uses 0 as a sentinal for the default limit of entries scanned.
		if err := rf.StartScan(txn, sqlbase.Spans{sp}, 0); err != nil {
			return err
		}

		indexKeyPrefix := sqlbase.MakeIndexKeyPrefix(tableDesc, tableDesc.PrimaryIndex.ID)
		oldValues := make(parser.DTuple, len(ru.fetchCols))
		updateValues := make(parser.DTuple, len(updateCols))

		writeBatch := &client.Batch{}
		var i int
		for ; i < ColumnTruncateAndBackfillChunkSize; i++ {
			row, err := rf.NextRow()
			if err != nil {
				return err
			}
			if row == nil {
				break // Done
			}

			curIndexKey, _, err = sqlbase.EncodeIndexKey(
				tableDesc, &tableDesc.PrimaryIndex, colIDtoRowIndex, row, indexKeyPrefix)

			for j, col := range added {
				if defaultExprs == nil || defaultExprs[j] == nil {
					updateValues[j] = parser.DNull
				} else {
					updateValues[j], err = defaultExprs[j].Eval(evalCtx)
					if err != nil {
						return err
					}
				}
				if !col.Nullable && updateValues[j].Compare(parser.DNull) == 0 {
					return sqlbase.NewNonNullViolationError(col.Name)
				}
			}
			for j := range dropped {
				updateValues[j+len(added)] = parser.DNull
			}

			copy(oldValues, row)
			for j := len(row); j < len(oldValues); j++ {
				oldValues[j] = parser.DNull
			}
			if _, err := ru.updateRow(writeBatch, oldValues, updateValues); err != nil {
				return err
			}
		}
		if i < ColumnTruncateAndBackfillChunkSize {
			done = true
		}

		if err := txn.Run(writeBatch); err != nil {
			return convertBackfillError(tableDesc, writeBatch)
		}
		return nil
	})
	return curIndexKey.PrefixEnd(), done, err
}
Пример #28
0
// fetch retrieves spans from the kv
func (f *kvFetcher) fetch() error {
	batchSize := f.getBatchSize()

	b := &client.Batch{}
	b.Header.MaxScanResults = batchSize

	var resumeKey roachpb.Key
	if len(f.kvs) > 0 {
		resumeKey = f.kvs[len(f.kvs)-1].Key
		// To resume forward scans we will set the (inclusive) scan start to the Next of the last
		// received key. To resume reverse scans we will set the (exclusive) scan end to the last
		// received key.
		if !f.reverse {
			resumeKey = resumeKey.Next()
		}
	}

	atEnd := true
	if !f.reverse {
		for i := 0; i < len(f.spans); i++ {
			start := f.spans[i].Start
			if resumeKey != nil {
				if resumeKey.Compare(f.spans[i].End) >= 0 {
					// We are resuming from a key after this span.
					continue
				}
				if resumeKey.Compare(start) > 0 {
					// We are resuming from a key inside this span.
					// In this case we should technically reduce the max count for the span; but
					// since this count is only an optimization it's not incorrect to retrieve more
					// keys for the span.
					start = resumeKey
				}
			}
			atEnd = false
			b.Scan(start, f.spans[i].End, f.spans[i].Count)
		}
	} else {
		for i := len(f.spans) - 1; i >= 0; i-- {
			end := f.spans[i].End
			if resumeKey != nil {
				if resumeKey.Compare(f.spans[i].Start) <= 0 {
					// We are resuming from a key before this span.
					continue
				}
				if resumeKey.Compare(end) < 0 {
					// We are resuming from a key inside this span.
					// In this case we should technically reduce the max count for the span; but
					// since this count is only an optimization it's not incorrect to retrieve more
					// keys for the span.
					end = resumeKey
				}
			}
			atEnd = false
			b.ReverseScan(f.spans[i].Start, end, f.spans[i].Count)
		}
	}

	if atEnd {
		// The last scan happened to finish just at the end of the last span.
		f.kvs = nil
		f.fetchEnd = true
		return nil
	}

	if err := f.txn.Run(b); err != nil {
		return err
	}

	if f.kvs == nil {
		numResults := 0
		for _, result := range b.Results {
			numResults += len(result.Rows)
		}
		f.kvs = make([]client.KeyValue, 0, numResults)
	} else {
		f.kvs = f.kvs[:0]
	}

	for _, result := range b.Results {
		f.kvs = append(f.kvs, result.Rows...)
	}

	f.batchIdx++
	f.totalFetched += int64(len(f.kvs))
	f.kvIndex = 0

	if int64(len(f.kvs)) < batchSize {
		f.fetchEnd = true
	}

	// TODO(radu): We should fetch the next chunk in the background instead of waiting for the next
	// call to fetch(). We can use a pool of workers to issue the KV ops which will also limit the
	// total number of fetches that happen in parallel (and thus the amount of resources we use).
	return nil
}
Пример #29
0
func (sc *SchemaChanger) truncateAndBackfillColumnsChunk(
	added []sqlbase.ColumnDescriptor,
	dropped []sqlbase.ColumnDescriptor,
	nonNullableColumn string,
	defaultExprs []parser.TypedExpr,
	evalCtx parser.EvalContext,
	sp sqlbase.Span,
) (roachpb.Key, bool, error) {
	var curSentinel roachpb.Key
	done := false
	err := sc.db.Txn(func(txn *client.Txn) error {
		tableDesc, err := getTableDescFromID(txn, sc.tableID)
		if err != nil {
			return err
		}
		// Short circuit the backfill if the table has been deleted.
		if tableDesc.Deleted {
			done = true
			return nil
		}

		// Run a scan across the table using the primary key. Running
		// the scan and applying the changes in many transactions is
		// fine because the schema change is in the correct state to
		// handle intermediate OLTP commands which delete and add
		// values during the scan.
		b := &client.Batch{}
		b.Scan(sp.Start, sp.End, ColumnTruncateAndBackfillChunkSize)
		if err := txn.Run(b); err != nil {
			return err
		}

		// Use a different batch to truncate/backfill columns.
		writeBatch := &client.Batch{}
		marshalled := make([]roachpb.Value, len(defaultExprs))
		done = true
		for _, result := range b.Results {
			var sentinelKey roachpb.Key
			for _, kv := range result.Rows {
				// Still processing table.
				done = false
				if nonNullableColumn != "" {
					return fmt.Errorf("column %s contains null values", nonNullableColumn)
				}

				if sentinelKey == nil || !bytes.HasPrefix(kv.Key, sentinelKey) {
					// Sentinel keys have a 0 suffix indicating 0 bytes of
					// column ID. Strip off that suffix to determine the
					// prefix shared with the other keys for the row.
					sentinelKey = sqlbase.StripColumnIDLength(kv.Key)
					// Store away key for the next table row as the point from
					// which to start from.
					curSentinel = sentinelKey

					// Delete the entire dropped columns. This used to use SQL
					// UPDATE in the past to update the dropped column to
					// NULL; but a column in the process of being dropped is
					// placed in the table descriptor mutations, and a SQL
					// UPDATE of a column in mutations will fail.
					for _, columnDesc := range dropped {
						// Delete the dropped column.
						colKey := keys.MakeColumnKey(sentinelKey, uint32(columnDesc.ID))
						if log.V(2) {
							log.Infof("Del %s", colKey)
						}
						writeBatch.Del(colKey)
					}

					// Add the new columns and backfill the values.
					for i, expr := range defaultExprs {
						if expr == nil {
							continue
						}
						col := added[i]
						colKey := keys.MakeColumnKey(sentinelKey, uint32(col.ID))
						d, err := expr.Eval(evalCtx)
						if err != nil {
							return err
						}
						marshalled[i], err = sqlbase.MarshalColumnValue(col, d)
						if err != nil {
							return err
						}

						if log.V(2) {
							log.Infof("Put %s -> %v", colKey, d)
						}
						// Insert default value into the column. If this row
						// was recently added the default value might have
						// already been populated, because the
						// ColumnDescriptor is in the WRITE_ONLY state.
						// Reinserting the default value is not a big deal.
						//
						// Note: a column in the WRITE_ONLY state cannot be
						// populated directly through SQL. A SQL INSERT cannot
						// directly reference the column, and the INSERT
						// populates the column with the default value.
						writeBatch.Put(colKey, &marshalled[i])
					}
				}
			}
		}
		if err := txn.Run(writeBatch); err != nil {
			for _, r := range writeBatch.Results {
				if r.PErr != nil {
					return convertBackfillError(tableDesc, writeBatch, r.PErr)
				}
			}
			return err
		}
		return nil
	})
	return curSentinel.PrefixEnd(), done, err
}
Пример #30
0
// Add the specified timestamp to the cache as covering the range of
// keys from start to end. If end is nil, the range covers the start
// key only. txnID is nil for no transaction. readTSCache specifies
// whether the command adding this timestamp should update the read
// timestamp; false to update the write timestamp cache.
func (tc *TimestampCache) Add(start, end roachpb.Key, timestamp roachpb.Timestamp, txnID *uuid.UUID, readTSCache bool) {
	// This gives us a memory-efficient end key if end is empty.
	if len(end) == 0 {
		end = start.Next()
		start = end[:len(start)]
	}
	if tc.latest.Less(timestamp) {
		tc.latest = timestamp
	}
	// Only add to the cache if the timestamp is more recent than the
	// low water mark.
	if tc.lowWater.Less(timestamp) {
		cache := tc.wCache
		if readTSCache {
			cache = tc.rCache
		}

		addRange := func(r interval.Range) {
			value := cacheValue{timestamp: timestamp, txnID: txnID}
			key := cache.MakeKey(r.Start, r.End)
			entry := makeCacheEntry(key, value)
			cache.AddEntry(entry)
		}
		r := interval.Range{
			Start: interval.Comparable(start),
			End:   interval.Comparable(end),
		}

		// Check existing, overlapping entries and truncate/split/remove if
		// superseded and in the past. If existing entries are in the future,
		// subtract from the range/ranges that need to be added to cache.
		for _, o := range cache.GetOverlaps(r.Start, r.End) {
			cv := o.Value.(*cacheValue)
			sCmp := r.Start.Compare(o.Key.Start)
			eCmp := r.End.Compare(o.Key.End)
			if !timestamp.Less(cv.timestamp) {
				// The existing interval has a timestamp less than or equal to the new interval.
				// Compare interval ranges to determine how to modify existing interval.
				switch {
				case sCmp == 0 && eCmp == 0:
					// New and old are equal; replace old with new and avoid the need to insert new.
					//
					// New: ------------
					// Old: ------------
					//
					// New: ------------
					*cv = cacheValue{timestamp: timestamp, txnID: txnID}
					cache.MoveToEnd(o.Entry)
					return
				case sCmp <= 0 && eCmp >= 0:
					// New contains or is equal to old; delete old.
					//
					// New: ------------      ------------      ------------
					// Old:   --------    or    ----------  or  ----------
					//
					// Old:
					cache.DelEntry(o.Entry)
				case sCmp > 0 && eCmp < 0:
					// Old contains new; split up old into two.
					//
					// New:     ----
					// Old: ------------
					//
					// Old: ----    ----
					oldEnd := o.Key.End
					o.Key.End = r.Start

					key := cache.MakeKey(r.End, oldEnd)
					entry := makeCacheEntry(key, *cv)
					cache.AddEntryAfter(entry, o.Entry)
				case eCmp >= 0:
					// Left partial overlap; truncate old end.
					//
					// New:     --------          --------
					// Old: --------      or  ------------
					//
					// Old: ----              ----
					o.Key.End = r.Start
				case sCmp <= 0:
					// Right partial overlap; truncate old start.
					//
					// New: --------          --------
					// Old:     --------  or  ------------
					//
					// Old:         ----              ----
					o.Key.Start = r.End
				default:
					panic(fmt.Sprintf("no overlap between %v and %v", o.Key.Range, r))
				}
			} else {
				// The existing interval has a timestamp greater than the new interval.
				// Compare interval ranges to determine how to modify new interval before
				// adding it to the timestamp cache.
				switch {
				case sCmp >= 0 && eCmp <= 0:
					// Old contains or is equal to new; no need to add.
					//
					// Old: -----------      -----------      -----------      -----------
					// New:    -----     or  -----------  or  --------     or     --------
					//
					// New:
					return
				case sCmp < 0 && eCmp > 0:
					// New contains old; split up old into two. We can add the left piece
					// immediately because it is guaranteed to be before the rest of the
					// overlaps.
					//
					// Old:    ------
					// New: ------------
					//
					// New: ---      ---
					lr := interval.Range{Start: r.Start, End: o.Key.Start}
					addRange(lr)

					r.Start = o.Key.End
				case eCmp > 0:
					// Left partial overlap; truncate new start.
					//
					// Old: --------          --------
					// New:     --------  or  ------------
					//
					// New:         ----              ----
					r.Start = o.Key.End
				case sCmp < 0:
					// Right partial overlap; truncate new end.
					//
					// Old:     --------          --------
					// New: --------      or  ------------
					//
					// New: ----              ----
					r.End = o.Key.Start
				default:
					panic(fmt.Sprintf("no overlap between %v and %v", o.Key.Range, r))
				}
			}
		}
		addRange(r)
	}
}