Exemplo n.º 1
0
func (db *testMetadataDB) getMetadata(key engine.Key) []proto.RangeDescriptor {
	response := make([]proto.RangeDescriptor, 0, 3)
	for i := 0; i < 3; i++ {
		v := db.data.Ceil(testMetadataNode{
			&proto.RangeDescriptor{
				EndKey: engine.NextKey(key),
			},
		})
		if v == nil {
			break
		}
		response = append(response, *(v.(testMetadataNode).RangeDescriptor))
		key = engine.NextKey(response[i].EndKey)
	}
	return response
}
Exemplo n.º 2
0
// AddWrite adds a pending write which affects the specified key range.
// If end is nil, it is set to start, meaning the write affects a single
// key. The returned interface is the key for the write and must be
// re-supplied on subsequent invocation of RemoveWrite().
//
// AddWrite is invoked as a mutating command is added to the queue of
// Raft proposals. As multiple commands may be in the proposal state,
// writes may overlap.
func (rq *ReadQueue) AddWrite(start, end engine.Key) interface{} {
	if end == nil {
		end = engine.NextKey(start)
	}
	key := rq.cache.NewKey(rangeKey(start), rangeKey(end))
	rq.cache.Add(key, &write{})
	return key
}
Exemplo n.º 3
0
// AddRead adds a read to the queue for the specified key range. If
// end is nil, end is set to start, meaning the read affects a single
// key. The supplied WaitGroup is incremented according to the number
// of pending writes with key(s) overlapping the key (start==end) or
// key range [start, end). The caller should call wg.Wait() to wait
// for confirmation that all pending writes have completed or failed.
func (rq *ReadQueue) AddRead(start, end engine.Key, wg *sync.WaitGroup) {
	if end == nil {
		end = engine.NextKey(start)
	}
	for _, w := range rq.cache.GetOverlaps(rangeKey(start), rangeKey(end)) {
		w := w.(*write)
		w.pending = append(w.pending, wg)
		wg.Add(1)
	}
}
Exemplo n.º 4
0
// GetMax returns the maximum read timestamp covering any part of the
// interval spanning from start to end keys. If no part of the
// specified range is overlapped by read timestamps in the cache, the
// high water timestamp is returned.
func (rtc *ReadTimestampCache) GetMax(start, end engine.Key) hlc.HLTimestamp {
	if end == nil {
		end = engine.NextKey(start)
	}
	max := rtc.highWater
	for _, v := range rtc.cache.GetOverlaps(rangeKey(start), rangeKey(end)) {
		ts := v.(hlc.HLTimestamp)
		if max.Less(ts) {
			max = ts
		}
	}
	return max
}
Exemplo n.º 5
0
// Add the specified timestamp to the cache as covering the range of
// keys from start to end. If end is nil, the range covers the start
// key only.
func (tc *TimestampCache) Add(start, end engine.Key, timestamp proto.Timestamp) {
	if end == nil {
		end = engine.NextKey(start)
	}
	if tc.latest.Less(timestamp) {
		tc.latest = timestamp
	}
	// Only add to the cache if the timestamp is more recent than the
	// high water mark.
	if tc.highWater.Less(timestamp) {
		tc.cache.Add(tc.cache.NewKey(rangeKey(start), rangeKey(end)), timestamp)
	}
}
Exemplo n.º 6
0
// InternalRangeLookup is used to look up RangeDescriptors - a RangeDescriptor
// is a metadata structure which describes the key range and replica locations
// of a distinct range in the cluster.
//
// RangeDescriptors are stored as values in the cockroach cluster's key-value
// store. However, they are always stored using special "Range Metadata keys",
// which are "ordinary" keys with a special prefix appended. The Range Metadata
// Key for an ordinary key can be generated with the `engine.RangeMetaKey(key)`
// function. The RangeDescriptor for the range which contains a given key can be
// retrieved by generating its Range Metadata Key and dispatching it to
// InternalRangeLookup.
//
// Note that the Range Metadata Key sent to InternalRangeLookup is NOT the key
// at which the desired RangeDescriptor is stored. Instead, this method returns
// the RangeDescriptor stored at the _lowest_ existing key which is _greater_
// than the given key. The returned RangeDescriptor will thus contain the
// ordinary key which was originally used to generate the Range Metadata Key
// sent to InternalRangeLookup.
//
// This method has an important optimization: instead of just returning the
// request RangeDescriptor, it also returns a slice of additional range
// descriptors immediately consecutive to the desired RangeDescriptor. This is
// intended to serve as a sort of caching pre-fetch, so that the requesting
// nodes can aggressively cache RangeDescriptors which are likely to be desired
// by their current workload.
func (r *Range) InternalRangeLookup(args *InternalRangeLookupRequest, reply *InternalRangeLookupResponse) {
	if err := engine.ValidateRangeMetaKey(args.Key); err != nil {
		reply.Error = err
		return
	}

	rangeCount := int64(args.MaxRanges)
	if rangeCount < 1 {
		reply.Error = util.Errorf(
			"Range lookup specified invalid maximum range count %d: must be > 0", rangeCount)
		return
	}

	// We want to search for the metadata key just greater than args.Key.  Scan
	// for both the requested key and the keys immediately afterwards, up to
	// MaxRanges.
	metaPrefix := args.Key[:len(engine.KeyMeta1Prefix)]
	nextKey := engine.NextKey(args.Key)
	kvs, err := r.engine.Scan(nextKey, engine.PrefixEndKey(metaPrefix), rangeCount)
	if err != nil {
		reply.Error = err
		return
	}

	// The initial key must have the same metadata level prefix as we queried.
	if len(kvs) == 0 {
		// At this point the range has been verified to contain the requested
		// key, but no matching results were returned from the scan. This could
		// indicate a very bad system error, but for now we will just treat it
		// as a retryable Key Mismatch error.
		reply.Error = NewRangeKeyMismatchError(args.Key, args.Key, r.Meta)
		log.Errorf("InternalRangeLookup dispatched to correct range, but no matching RangeDescriptor was found. %s",
			reply.Error.Error())
		return
	}

	// Decode all scanned range descriptors, stopping if a range is encountered
	// which does not have the same metadata prefix as the queried key.
	rds := make([]*RangeDescriptor, 0, len(kvs))
	for i := range kvs {
		rds = append(rds, &RangeDescriptor{})
		if err = gob.NewDecoder(bytes.NewBuffer(kvs[i].Value)).Decode(rds[i]); err != nil {
			reply.Error = err
			return
		}
	}

	reply.Ranges = rds
	return
}
Exemplo n.º 7
0
// InternalRangeLookup looks up the metadata info for the given args.Key.
// args.Key should be a metadata key, which are of the form "\0\0meta[12]<encoded_key>".
func (r *Range) InternalRangeLookup(args *InternalRangeLookupRequest, reply *InternalRangeLookupResponse) {
	if !bytes.HasPrefix(args.Key, engine.KeyMetaPrefix) {
		reply.Error = util.Errorf("invalid metadata key: %q", args.Key)
		return
	}

	// Validate that key is not outside the range. A range ends just
	// before its Meta.EndKey.
	if !args.Key.Less(r.Meta.EndKey) {
		reply.Error = util.Errorf("key outside the range %v with end key %q", r.Meta.RangeID, r.Meta.EndKey)
		return
	}

	// We want to search for the metadata key just greater than args.Key.
	nextKey := engine.NextKey(args.Key)
	kvs, err := r.engine.Scan(nextKey, engine.KeyMax, 1)
	if err != nil {
		reply.Error = err
		return
	}
	// We should have gotten the key with the same metadata level prefix as we queried.
	metaPrefix := args.Key[0:len(engine.KeyMeta1Prefix)]
	if len(kvs) != 1 || !bytes.HasPrefix(kvs[0].Key, metaPrefix) {
		reply.Error = util.Errorf("key not found in range %v", r.Meta.RangeID)
		return
	}

	if err = gob.NewDecoder(bytes.NewBuffer(kvs[0].Value)).Decode(&reply.Range); err != nil {
		reply.Error = err
		return
	}
	if args.Key.Less(reply.Range.StartKey) {
		// args.Key doesn't belong to this range. We are perhaps searching the wrong node?
		reply.Error = util.Errorf("no range found for key %q in range: %+v", args.Key, r.Meta)
		return
	}
	reply.EndKey = kvs[0].Key
}
Exemplo n.º 8
0
// Add the specified read timestamp to the cache as covering the range of
// keys from start to end. If end is nil, the range covers the start
// key only.
func (rtc *ReadTimestampCache) Add(start, end engine.Key, timestamp hlc.HLTimestamp) {
	if end == nil {
		end = engine.NextKey(start)
	}
	rtc.cache.Add(rtc.cache.NewKey(rangeKey(start), rangeKey(end)), timestamp)
}
Exemplo n.º 9
0
// Init starts the engine, sets the GC and reads the StoreIdent.
func (s *Store) Init() error {
	// Close store for idempotency.
	s.Close()

	// Start engine and set garbage collector.
	if err := s.engine.Start(); err != nil {
		return err
	}

	// Create ID allocators.
	s.raftIDAlloc = NewIDAllocator(engine.KeyRaftIDGenerator, s.db, 2, raftIDAllocCount)
	s.rangeIDAlloc = NewIDAllocator(engine.KeyRangeIDGenerator, s.db, 2, rangeIDAllocCount)

	// GCTimeouts method is called each time an engine compaction is
	// underway. It sets minimum timeouts for transaction records and
	// response cache entries.
	s.engine.SetGCTimeouts(func() (minTxnTS, minRCacheTS int64) {
		now := s.clock.Now()
		minTxnTS = 0 // disable GC of transactions until we know minimum write intent age
		minRCacheTS = now.WallTime - GCResponseCacheExpiration.Nanoseconds()
		return
	})

	// Read store ident and return a not-bootstrapped error if necessary.
	ok, err := engine.GetProto(s.engine, engine.KeyLocalIdent, &s.Ident)
	if err != nil {
		return err
	} else if !ok {
		return &NotBootstrappedError{}
	}

	s.mu.Lock()
	defer s.mu.Unlock()
	start := engine.KeyLocalRangeMetadataPrefix
	end := engine.PrefixEndKey(start)
	const rows = 64
	for {
		kvs, err := s.engine.Scan(start, end, rows)
		if err != nil {
			return err
		}
		for _, kv := range kvs {
			var meta proto.RangeMetadata
			if err := gogoproto.Unmarshal(kv.Value, &meta); err != nil {
				return err
			}
			rng := NewRange(&meta, s.clock, s.engine, s.allocator, s.gossip, s)
			rng.Start()
			s.ranges[meta.RangeID] = rng
			s.rangesByKey = append(s.rangesByKey, rng)
		}
		if len(kvs) < rows {
			break
		}
		start = engine.NextKey(kvs[rows-1].Key)
	}

	// Ensure that ranges are sorted.
	sort.Sort(s.rangesByKey)

	return nil
}