func (db *testMetadataDB) splitRange(t *testing.T, key engine.Key) { metadataKey := engine.RangeMetaKey(key) v := db.data.Ceil(&testMetadataNode{endKey: metadataKey}) if v == nil { t.Fatalf("Error splitting range at key %s, range to split not found", string(key)) } val := v.(*testMetadataNode) if bytes.Compare(val.desc.EndKey, key) == 0 { t.Fatalf("Attempt to split existing range at Endkey: %s", string(key)) } db.data.Insert(&testMetadataNode{ endKey: metadataKey, desc: &storage.RangeDescriptor{ StartKey: val.desc.StartKey, EndKey: key, }, }) db.data.Insert(&testMetadataNode{ endKey: val.endKey, desc: &storage.RangeDescriptor{ StartKey: key, EndKey: val.desc.EndKey, }, }) }
// TestRangeCache is a simple test which verifies that metadata ranges are being // cached and retrieved properly. It sets up a fake backing store for the // cache, and measures how often that backing store is accessed when looking up // metadata keys through the cache. func TestRangeCache(t *testing.T) { db := newTestMetadataDB() db.splitRange(t, engine.Key("a")) db.splitRange(t, engine.Key("b")) db.splitRange(t, engine.Key("c")) db.splitRange(t, engine.Key("d")) db.splitRange(t, engine.Key("e")) db.splitRange(t, engine.Key("f")) db.splitRange(t, engine.RangeMetaKey(engine.Key("d"))) db.hitCount = 0 rangeCache := NewRangeMetadataCache(db) db.cache = rangeCache doLookup(t, rangeCache, "ba") db.assertHitCount(t, 2) doLookup(t, rangeCache, "bb") db.assertHitCount(t, 0) doLookup(t, rangeCache, "ca") db.assertHitCount(t, 1) // Different metadata one range doLookup(t, rangeCache, "da") db.assertHitCount(t, 2) doLookup(t, rangeCache, "fa") db.assertHitCount(t, 1) // Evict clears both level 1 and level 2 cache for a key rangeCache.EvictCachedRangeMetadata(engine.Key("da")) doLookup(t, rangeCache, "fa") db.assertHitCount(t, 0) doLookup(t, rangeCache, "da") db.assertHitCount(t, 2) }
func (db *testMetadataDB) getMetadata(key engine.Key) (engine.Key, *storage.RangeDescriptor, error) { metadataKey := engine.RangeMetaKey(key) v := db.data.Ceil(&testMetadataNode{endKey: metadataKey}) if v == nil { return nil, nil, util.Errorf("Range for key %s not found", key) } val := v.(*testMetadataNode) db.hitCount++ return val.endKey, val.desc, nil }
func (db *testMetadataDB) getRangeMetadata(key engine.Key) ([]proto.RangeDescriptor, error) { db.hitCount++ metadataKey := engine.RangeMetaKey(key) // Recursively call into cache as the real DB would, terminating recursion // when a meta1key is encountered. if len(metadataKey) > 0 && !bytes.HasPrefix(metadataKey, engine.KeyMeta1Prefix) { db.cache.LookupRangeMetadata(metadataKey) } return db.getMetadata(key), nil }
func (db *testMetadataDB) LookupRangeMetadata(key engine.Key) (engine.Key, *storage.RangeDescriptor, error) { metadataKey := engine.RangeMetaKey(key) // Recursively call into cache as the real DB would, terminating when the // initial key is encountered. if len(metadataKey) == 0 { return nil, nil, nil } db.cache.LookupRangeMetadata(metadataKey) return db.getMetadata(key) }
// TestRangeCache is a simple test which verifies that metadata ranges // are being cached and retrieved properly. It sets up a fake backing // store for the cache, and measures how often that backing store is // accessed when looking up metadata keys through the cache. func TestRangeCache(t *testing.T) { db := newTestMetadataDB() for i, char := range "abcdefghijklmnopqrstuvwx" { db.splitRange(t, engine.Key(string(char))) if i > 0 && i%6 == 0 { db.splitRange(t, engine.RangeMetaKey(engine.Key(string(char)))) } } rangeCache := NewRangeMetadataCache(db) db.cache = rangeCache doLookup(t, rangeCache, "aa") db.assertHitCount(t, 2) // Metadata for the following ranges should be cached doLookup(t, rangeCache, "ab") db.assertHitCount(t, 0) doLookup(t, rangeCache, "ba") db.assertHitCount(t, 0) doLookup(t, rangeCache, "cz") db.assertHitCount(t, 0) // Metadata two ranges weren't cached, same metadata 1 range doLookup(t, rangeCache, "d") db.assertHitCount(t, 1) doLookup(t, rangeCache, "fa") db.assertHitCount(t, 0) // Metadata two ranges weren't cached, metadata 1 was aggressively cached doLookup(t, rangeCache, "ij") db.assertHitCount(t, 1) doLookup(t, rangeCache, "jk") db.assertHitCount(t, 0) doLookup(t, rangeCache, "pn") db.assertHitCount(t, 1) // Totally uncached ranges doLookup(t, rangeCache, "vu") db.assertHitCount(t, 2) doLookup(t, rangeCache, "xx") db.assertHitCount(t, 0) // Evict clears one level 1 and one level 2 cache rangeCache.EvictCachedRangeMetadata(engine.Key("da")) doLookup(t, rangeCache, "fa") db.assertHitCount(t, 0) doLookup(t, rangeCache, "da") db.assertHitCount(t, 2) }
// EvictCachedRangeDescriptor will evict any cached range descriptors // for the given key. It is intended that this method be called from a // consumer of RangeDescriptorCache if the returned range descriptor is // discovered to be stale. func (rmc *RangeDescriptorCache) EvictCachedRangeDescriptor(key proto.Key) { for { k, _ := rmc.getCachedRangeDescriptor(key) if k != nil { rmc.rangeCacheMu.Lock() rmc.rangeCache.Del(k) rmc.rangeCacheMu.Unlock() } // Retrieve the metadata range key for the next level of metadata, and // evict that key as well. This loop ends after the meta1 range, which // returns KeyMin as its metadata key. key = engine.RangeMetaKey(key) if len(key) == 0 { break } } }
// getCachedRangeDescriptor is a helper function to retrieve the // descriptor of the range which contains the given key, if present in // the cache. func (rmc *RangeDescriptorCache) getCachedRangeDescriptor(key proto.Key) ( rangeCacheKey, *proto.RangeDescriptor) { metaKey := engine.RangeMetaKey(key) rmc.rangeCacheMu.RLock() defer rmc.rangeCacheMu.RUnlock() k, v, ok := rmc.rangeCache.Ceil(rangeCacheKey(metaKey)) if !ok { return nil, nil } metaEndKey := k.(rangeCacheKey) rd := v.(*proto.RangeDescriptor) // Check that key actually belongs to range if !rd.ContainsKey(engine.KeyAddress(key)) { return nil, nil } return metaEndKey, rd }
// getRangeMetadata retrieves metadata for the range containing the given key // from storage. This function returns a sorted slice of RangeDescriptors for a // set of consecutive ranges, the first which must contain the requested key. // The additional RangeDescriptors are returned with the intent of pre-caching // subsequent ranges which are likely to be requested soon by the current // workload. func (kv *DistKV) getRangeMetadata(key engine.Key) ([]proto.RangeDescriptor, error) { var ( // metadataKey is sent to InternalRangeLookup to find the // RangeDescriptor which contains key. metadataKey = engine.RangeMetaKey(key) // metadataRange is the RangeDescriptor for the range which contains // metadataKey. metadataRange *proto.RangeDescriptor err error ) if len(metadataKey) == 0 { // In this case, the requested key is stored in the cluster's first // range. Return the first range, which is always gossiped and not // queried from the datastore. rd, err := kv.getFirstRangeDescriptor() if err != nil { return nil, err } return []proto.RangeDescriptor{*rd}, nil } if bytes.HasPrefix(metadataKey, engine.KeyMeta1Prefix) { // In this case, metadataRange is the cluster's first range. if metadataRange, err = kv.getFirstRangeDescriptor(); err != nil { return nil, err } } else { // Look up metadataRange from the cache, which will recursively call // into kv.getRangeMetadata if it is not cached. metadataRange, err = kv.rangeCache.LookupRangeMetadata(metadataKey) if err != nil { return nil, err } } return kv.internalRangeLookup(metadataKey, metadataRange) }
// TestUpdateRangeAddressing verifies range addressing records are // correctly updated on creation of new range descriptors. func TestUpdateRangeAddressing(t *testing.T) { store := createTestStore(t) // When split is false, merging treats the right range as the merged // range. With merging, expNewLeft indicates the addressing keys we // expect to be removed. testCases := []struct { split bool leftStart, leftEnd proto.Key rightStart, rightEnd proto.Key leftExpNew, rightExpNew []proto.Key }{ // Start out with whole range. {false, engine.KeyMin, engine.KeyMax, engine.KeyMin, engine.KeyMax, []proto.Key{}, []proto.Key{meta1Key(engine.KeyMax), meta2Key(engine.KeyMax)}}, // Split KeyMin-KeyMax at key "a". {true, engine.KeyMin, proto.Key("a"), proto.Key("a"), engine.KeyMax, []proto.Key{meta1Key(engine.KeyMax), meta2Key(proto.Key("a"))}, []proto.Key{meta2Key(engine.KeyMax)}}, // Split "a"-KeyMax at key "z". {true, proto.Key("a"), proto.Key("z"), proto.Key("z"), engine.KeyMax, []proto.Key{meta2Key(proto.Key("z"))}, []proto.Key{meta2Key(engine.KeyMax)}}, // Split "a"-"z" at key "m". {true, proto.Key("a"), proto.Key("m"), proto.Key("m"), proto.Key("z"), []proto.Key{meta2Key(proto.Key("m"))}, []proto.Key{meta2Key(proto.Key("z"))}}, // Split KeyMin-"a" at meta2(m). {true, engine.KeyMin, engine.RangeMetaKey(proto.Key("m")), engine.RangeMetaKey(proto.Key("m")), proto.Key("a"), []proto.Key{meta1Key(proto.Key("m"))}, []proto.Key{meta1Key(engine.KeyMax), meta2Key(proto.Key("a"))}}, // Split meta2(m)-"a" at meta2(z). {true, engine.RangeMetaKey(proto.Key("m")), engine.RangeMetaKey(proto.Key("z")), engine.RangeMetaKey(proto.Key("z")), proto.Key("a"), []proto.Key{meta1Key(proto.Key("z"))}, []proto.Key{meta1Key(engine.KeyMax), meta2Key(proto.Key("a"))}}, // Split meta2(m)-meta2(z) at meta2(r). {true, engine.RangeMetaKey(proto.Key("m")), engine.RangeMetaKey(proto.Key("r")), engine.RangeMetaKey(proto.Key("r")), engine.RangeMetaKey(proto.Key("z")), []proto.Key{meta1Key(proto.Key("r"))}, []proto.Key{meta1Key(proto.Key("z"))}}, // Now, merge all of our splits backwards... // Merge meta2(m)-meta2(z). {false, engine.RangeMetaKey(proto.Key("m")), engine.RangeMetaKey(proto.Key("r")), engine.RangeMetaKey(proto.Key("m")), engine.RangeMetaKey(proto.Key("z")), []proto.Key{meta1Key(proto.Key("r"))}, []proto.Key{meta1Key(proto.Key("z"))}}, // Merge meta2(m)-"a". {false, engine.RangeMetaKey(proto.Key("m")), engine.RangeMetaKey(proto.Key("z")), engine.RangeMetaKey(proto.Key("m")), proto.Key("a"), []proto.Key{meta1Key(proto.Key("z"))}, []proto.Key{meta1Key(engine.KeyMax), meta2Key(proto.Key("a"))}}, // Merge KeyMin-"a". {false, engine.KeyMin, engine.RangeMetaKey(proto.Key("m")), engine.KeyMin, proto.Key("a"), []proto.Key{meta1Key(proto.Key("m"))}, []proto.Key{meta1Key(engine.KeyMax), meta2Key(proto.Key("a"))}}, // Merge "a"-"z". {false, proto.Key("a"), proto.Key("m"), proto.Key("a"), proto.Key("z"), []proto.Key{meta2Key(proto.Key("m"))}, []proto.Key{meta2Key(proto.Key("z"))}}, // Merge "a"-KeyMax. {false, proto.Key("a"), proto.Key("z"), proto.Key("a"), engine.KeyMax, []proto.Key{meta2Key(proto.Key("z"))}, []proto.Key{meta2Key(engine.KeyMax)}}, // Merge KeyMin-KeyMax. {false, engine.KeyMin, proto.Key("a"), engine.KeyMin, engine.KeyMax, []proto.Key{meta2Key(proto.Key("a"))}, []proto.Key{meta1Key(engine.KeyMax), meta2Key(engine.KeyMax)}}, } expMetas := metaSlice{} for i, test := range testCases { left := &proto.RangeDescriptor{RaftID: int64(i * 2), StartKey: test.leftStart, EndKey: test.leftEnd} right := &proto.RangeDescriptor{RaftID: int64(i*2 + 1), StartKey: test.rightStart, EndKey: test.rightEnd} if test.split { if err := storage.SplitRangeAddressing(store.DB(), left, right); err != nil { t.Fatal(err) } } else { if err := storage.MergeRangeAddressing(store.DB(), left, right); err != nil { t.Fatal(err) } } store.DB().Flush() // Scan meta keys directly from engine. kvs, err := engine.MVCCScan(store.Engine(), engine.KeyMetaPrefix, engine.KeyMetaMax, 0, proto.MaxTimestamp, nil) if err != nil { t.Fatal(err) } metas := metaSlice{} for _, kv := range kvs { scannedDesc := &proto.RangeDescriptor{} if err := gogoproto.Unmarshal(kv.Value.Bytes, scannedDesc); err != nil { t.Fatal(err) } metas = append(metas, metaRecord{key: kv.Key, desc: scannedDesc}) } // Continue to build up the expected metas slice, replacing any earlier // version of same key. addOrRemoveNew := func(keys []proto.Key, desc *proto.RangeDescriptor, add bool) { for _, n := range keys { found := -1 for i := range expMetas { if expMetas[i].key.Equal(n) { found = i expMetas[i].desc = desc break } } if found == -1 && add { expMetas = append(expMetas, metaRecord{key: n, desc: desc}) } else if found != -1 && !add { expMetas = append(expMetas[:found], expMetas[found+1:]...) } } } addOrRemoveNew(test.leftExpNew, left, test.split /* on split, add; on merge, remove */) addOrRemoveNew(test.rightExpNew, right, true) sort.Sort(expMetas) if test.split { log.V(1).Infof("test case %d: split %q-%q at %q", i, left.StartKey, right.EndKey, left.EndKey) } else { log.V(1).Infof("test case %d: merge %q-%q + %q-%q", i, left.StartKey, left.EndKey, left.EndKey, right.EndKey) } for _, meta := range metas { log.V(1).Infof("%q", meta.key) } log.V(1).Infof("") if !reflect.DeepEqual(expMetas, metas) { t.Errorf("expected metas don't match") if len(expMetas) != len(metas) { t.Errorf("len(expMetas) != len(metas); %d != %d", len(expMetas), len(metas)) } else { for i, meta := range expMetas { if !meta.key.Equal(metas[i].key) { fmt.Printf("%d: expected %q vs %q\n", i, meta.key, metas[i].key) } if !reflect.DeepEqual(meta.desc, metas[i].desc) { fmt.Printf("%d: expected %q vs %q and %s vs %s\n", i, meta.key, metas[i].key, meta.desc, metas[i].desc) } } } } } }