// TestReplicateRange verifies basic replication functionality by creating two stores // and a range, replicating the range to the second store, and reading its data there. func TestReplicateRange(t *testing.T) { defer leaktest.AfterTest(t) mtc := multiTestContext{} mtc.Start(t, 2) defer mtc.Stop() // Issue a command on the first node before replicating. incArgs, incResp := incrementArgs([]byte("a"), 5, 1, mtc.stores[0].StoreID()) if err := mtc.stores[0].ExecuteCmd(context.Background(), proto.Call{Args: incArgs, Reply: incResp}); err != nil { t.Fatal(err) } rng, err := mtc.stores[0].GetRange(1) if err != nil { t.Fatal(err) } if err := rng.ChangeReplicas(proto.ADD_REPLICA, proto.Replica{ NodeID: mtc.stores[1].Ident.NodeID, StoreID: mtc.stores[1].Ident.StoreID, }); err != nil { t.Fatal(err) } // Verify no intent remains on range descriptor key. key := keys.RangeDescriptorKey(rng.Desc().StartKey) desc := proto.RangeDescriptor{} if ok, err := engine.MVCCGetProto(mtc.stores[0].Engine(), key, mtc.stores[0].Clock().Now(), true, nil, &desc); !ok || err != nil { t.Fatalf("fetching range descriptor yielded %t, %s", ok, err) } // Verify that in time, no intents remain on meta addressing // keys, and that range descriptor on the meta records is correct. util.SucceedsWithin(t, 1*time.Second, func() error { meta2 := keys.RangeMetaKey(proto.KeyMax) meta1 := keys.RangeMetaKey(meta2) for _, key := range []proto.Key{meta2, meta1} { metaDesc := proto.RangeDescriptor{} if ok, err := engine.MVCCGetProto(mtc.stores[0].Engine(), key, mtc.stores[0].Clock().Now(), true, nil, &metaDesc); !ok || err != nil { return util.Errorf("failed to resolve %s", key) } if !reflect.DeepEqual(metaDesc, desc) { return util.Errorf("descs not equal: %+v != %+v", metaDesc, desc) } } return nil }) // Verify that the same data is available on the replica. util.SucceedsWithin(t, 1*time.Second, func() error { getArgs, getResp := getArgs([]byte("a"), 1, mtc.stores[1].StoreID()) getArgs.ReadConsistency = proto.INCONSISTENT if err := mtc.stores[1].ExecuteCmd(context.Background(), proto.Call{Args: getArgs, Reply: getResp}); err != nil { return util.Errorf("failed to read data") } if v := mustGetInteger(getResp.Value); v != 5 { return util.Errorf("failed to read correct data: %d", v) } return nil }) }
func TestRangeCacheAssumptions(t *testing.T) { defer leaktest.AfterTest(t) expKeyMin := keys.RangeMetaKey(keys.RangeMetaKey(keys.RangeMetaKey(proto.Key("test")))) if !bytes.Equal(expKeyMin, proto.KeyMin) { t.Fatalf("RangeCache relies on RangeMetaKey returning KeyMin after two levels, but got %s", expKeyMin) } }
// TestRangeCacheClearOverlappingMeta prevents regression of a bug which caused // a panic when clearing overlapping descriptors for [KeyMin, Meta2Key). The // issue was that when attempting to clear out descriptors which were subsumed // by the above range, an iteration over the corresponding meta keys was // performed, with the left endpoint excluded. This exclusion was incorrect: it // first incremented the start key (KeyMin) and then formed the meta key; for // KeyMin this leads to Meta2Prefix\x00. For the above EndKey, the meta key is // a Meta1key which sorts before Meta2Prefix\x00, causing a panic. The fix was // simply to increment the meta key for StartKey, not StartKey itself. func TestRangeCacheClearOverlappingMeta(t *testing.T) { defer leaktest.AfterTest(t) firstDesc := &proto.RangeDescriptor{ StartKey: proto.KeyMin, EndKey: proto.Key("zzz"), } restDesc := &proto.RangeDescriptor{ StartKey: firstDesc.StartKey, EndKey: proto.KeyMax, } cache := newRangeDescriptorCache(nil, 2<<10) cache.rangeCache.Add(rangeCacheKey(keys.RangeMetaKey(firstDesc.EndKey)), firstDesc) cache.rangeCache.Add(rangeCacheKey(keys.RangeMetaKey(restDesc.EndKey)), restDesc) // Add new range, corresponding to splitting the first range at a meta key. metaSplitDesc := &proto.RangeDescriptor{ StartKey: proto.KeyMin, EndKey: proto.Key(keys.RangeMetaKey(proto.Key("foo"))), } func() { defer func() { if r := recover(); r != nil { t.Fatalf("invocation of clearOverlappingCachedRangeDescriptors panicked: %v", r) } }() cache.clearOverlappingCachedRangeDescriptors(metaSplitDesc.EndKey, keys.RangeMetaKey(metaSplitDesc.EndKey), metaSplitDesc) }() }
// clearOverlappingCachedRangeDescriptors looks up and clears any // cache entries which overlap the specified key or descriptor. func (rdc *rangeDescriptorCache) clearOverlappingCachedRangeDescriptors(key, metaKey proto.Key, desc *proto.RangeDescriptor) { if desc.StartKey.Equal(desc.EndKey) { // True for some unittests. return } // Clear out any descriptors which subsume the key which we're going // to cache. For example, if an existing KeyMin->KeyMax descriptor // should be cleared out in favor of a KeyMin->"m" descriptor. k, v, ok := rdc.rangeCache.Ceil(rangeCacheKey(metaKey)) if ok { descriptor := v.(*proto.RangeDescriptor) addrKey := keys.KeyAddress(key) if !addrKey.Less(descriptor.StartKey) && !descriptor.EndKey.Less(addrKey) { if log.V(1) { log.Infof("clearing overlapping descriptor: key=%s desc=%s", k, descriptor) } rdc.rangeCache.Del(k.(rangeCacheKey)) } } // Also clear any descriptors which are subsumed by the one we're // going to cache. This could happen on a merge (and also happens // when there's a lot of concurrency). Iterate from StartKey.Next(). rdc.rangeCache.DoRange(func(k, v interface{}) { if log.V(1) { log.Infof("clearing subsumed descriptor: key=%s desc=%s", k, v.(*proto.RangeDescriptor)) } rdc.rangeCache.Del(k.(rangeCacheKey)) }, rangeCacheKey(keys.RangeMetaKey(desc.StartKey.Next())), rangeCacheKey(keys.RangeMetaKey(desc.EndKey))) }
// getCachedRangeDescriptorLocked is a helper function to retrieve the // descriptor of the range which contains the given key, if present in the // cache. It is assumed that the caller holds a read lock on rdc.rangeCacheMu. func (rdc *rangeDescriptorCache) getCachedRangeDescriptorLocked(key proto.Key, inclusive bool) ( rangeCacheKey, *proto.RangeDescriptor) { // The cache is indexed using the end-key of the range, but the // end-key is non-inclusive by default. var metaKey proto.Key if !inclusive { metaKey = keys.RangeMetaKey(key.Next()) } else { metaKey = keys.RangeMetaKey(key) } k, v, ok := rdc.rangeCache.Ceil(rangeCacheKey(metaKey)) if !ok { return nil, nil } metaEndKey := k.(rangeCacheKey) rd := v.(*proto.RangeDescriptor) // Check that key actually belongs to the range. if !rd.ContainsKey(key) { // The key is the EndKey and we're inclusive, so just return the range descriptor. if inclusive && key.Equal(rd.EndKey) { return metaEndKey, rd } return nil, nil } // The key is the StartKey, but we're inclusive and thus need to return the // previous range descriptor, but it is not in the cache yet. if inclusive && key.Equal(rd.StartKey) { return nil, nil } return metaEndKey, rd }
// TestRangeSplitMeta executes various splits (including at meta addressing) // and checks that all created intents are resolved. This includes both intents // which are resolved synchronously with EndTransaction and via RPC. func TestRangeSplitMeta(t *testing.T) { defer leaktest.AfterTest(t) s := createTestDB(t) defer s.Stop() splitKeys := []roachpb.Key{roachpb.Key("G"), keys.RangeMetaKey(roachpb.Key("F")), keys.RangeMetaKey(roachpb.Key("K")), keys.RangeMetaKey(roachpb.Key("H"))} // Execute the consecutive splits. for _, splitKey := range splitKeys { log.Infof("starting split at key %q...", splitKey) if err := s.DB.AdminSplit(splitKey); err != nil { t.Fatal(err) } log.Infof("split at key %q complete", splitKey) } if err := util.IsTrueWithin(func() bool { if _, _, err := engine.MVCCScan(s.Eng, keys.LocalMax, roachpb.KeyMax, 0, roachpb.MaxTimestamp, true, nil); err != nil { log.Infof("mvcc scan should be clean: %s", err) return false } return true }, 500*time.Millisecond); err != nil { t.Error("failed to verify no dangling intents within 500ms") } }
// TestReplicateRange verifies basic replication functionality by creating two stores // and a range, replicating the range to the second store, and reading its data there. func TestReplicateRange(t *testing.T) { defer leaktest.AfterTest(t) mtc := startMultiTestContext(t, 2) defer mtc.Stop() // Issue a command on the first node before replicating. incArgs := incrementArgs([]byte("a"), 5) if _, err := client.SendWrapped(rg1(mtc.stores[0]), nil, &incArgs); err != nil { t.Fatal(err) } rng, err := mtc.stores[0].GetReplica(1) if err != nil { t.Fatal(err) } if err := rng.ChangeReplicas(roachpb.ADD_REPLICA, roachpb.ReplicaDescriptor{ NodeID: mtc.stores[1].Ident.NodeID, StoreID: mtc.stores[1].Ident.StoreID, }, rng.Desc()); err != nil { t.Fatal(err) } // Verify no intent remains on range descriptor key. key := keys.RangeDescriptorKey(rng.Desc().StartKey) desc := roachpb.RangeDescriptor{} if ok, err := engine.MVCCGetProto(mtc.stores[0].Engine(), key, mtc.stores[0].Clock().Now(), true, nil, &desc); !ok || err != nil { t.Fatalf("fetching range descriptor yielded %t, %s", ok, err) } // Verify that in time, no intents remain on meta addressing // keys, and that range descriptor on the meta records is correct. util.SucceedsWithin(t, 1*time.Second, func() error { meta2 := keys.Addr(keys.RangeMetaKey(roachpb.RKeyMax)) meta1 := keys.Addr(keys.RangeMetaKey(meta2)) for _, key := range []roachpb.RKey{meta2, meta1} { metaDesc := roachpb.RangeDescriptor{} if ok, err := engine.MVCCGetProto(mtc.stores[0].Engine(), key.AsRawKey(), mtc.stores[0].Clock().Now(), true, nil, &metaDesc); !ok || err != nil { return util.Errorf("failed to resolve %s", key.AsRawKey()) } if !reflect.DeepEqual(metaDesc, desc) { return util.Errorf("descs not equal: %+v != %+v", metaDesc, desc) } } return nil }) // Verify that the same data is available on the replica. util.SucceedsWithin(t, replicaReadTimeout, func() error { getArgs := getArgs([]byte("a")) if reply, err := client.SendWrappedWith(rg1(mtc.stores[1]), nil, roachpb.Header{ ReadConsistency: roachpb.INCONSISTENT, }, &getArgs); err != nil { return util.Errorf("failed to read data: %s", err) } else if e, v := int64(5), mustGetInt(reply.(*roachpb.GetResponse).Value); v != e { return util.Errorf("failed to read correct data: expected %d, got %d", e, v) } return nil }) }
// EvictCachedRangeDescriptor will evict any cached range descriptors // for the given key. It is intended that this method be called from a // consumer of rangeDescriptorCache if the returned range descriptor is // discovered to be stale. // seenDesc should always be passed in and is used as the basis of a // compare-and-evict (as pointers); if it is nil, eviction is unconditional // but a warning will be logged. func (rdc *rangeDescriptorCache) EvictCachedRangeDescriptor(descKey proto.Key, seenDesc *proto.RangeDescriptor) { if seenDesc == nil { log.Warningf("compare-and-evict for key %s with nil descriptor; clearing unconditionally", descKey) } rdc.rangeCacheMu.Lock() defer rdc.rangeCacheMu.Unlock() rngKey, cachedDesc := rdc.getCachedRangeDescriptorLocked(descKey) // Note that we're doing a "compare-and-erase": If seenDesc is not nil, // we want to clean the cache only if it equals the cached range // descriptor as a pointer. If not, then likely some other caller // already evicted previously, and we can save work by not doing it // again (which would prompt another expensive lookup). if seenDesc != nil && seenDesc != cachedDesc { return } for !bytes.Equal(descKey, proto.KeyMin) { if log.V(2) { log.Infof("evict cached descriptor: key=%s desc=%s\n%s", descKey, cachedDesc, rdc.stringLocked()) } else if log.V(1) { log.Infof("evict cached descriptor: key=%s desc=%s", descKey, cachedDesc) } rdc.rangeCache.Del(rngKey) // Retrieve the metadata range key for the next level of metadata, and // evict that key as well. This loop ends after the meta1 range, which // returns KeyMin as its metadata key. descKey = keys.RangeMetaKey(descKey) rngKey, cachedDesc = rdc.getCachedRangeDescriptorLocked(descKey) } }
func metaKey(key roachpb.RKey) []byte { rk, err := keys.Addr(keys.RangeMetaKey(key)) if err != nil { panic(err) } return rk }
func runLsRanges(cmd *cobra.Command, args []string) { if len(args) > 1 { mustUsage(cmd) return } var startKey proto.Key if len(args) >= 1 { startKey = keys.RangeMetaKey(proto.Key(args[0])) } else { startKey = keys.Meta2Prefix } kvDB, stopper := makeDBClient() defer stopper.Stop() rows, err := kvDB.Scan(startKey, keys.Meta2Prefix.PrefixEnd(), maxResults) if err != nil { fmt.Fprintf(os.Stderr, "scan failed: %s\n", err) osExit(1) return } for _, row := range rows { desc := &proto.RangeDescriptor{} if err := row.ValueProto(desc); err != nil { fmt.Fprintf(os.Stderr, "%s: unable to unmarshal range descriptor\n", row.Key) continue } fmt.Printf("%s-%s [%d]\n", desc.StartKey, desc.EndKey, desc.RangeID) for i, replica := range desc.Replicas { fmt.Printf("\t%d: node-id=%d store-id=%d\n", i, replica.NodeID, replica.StoreID) } } fmt.Printf("%d result(s)\n", len(rows)) }
// rangeAddressing updates or deletes the range addressing metadata // for the range specified by desc. The action to take is specified by // the supplied metaAction function. // // The rules for meta1 and meta2 records are as follows: // // 1. If desc.StartKey or desc.EndKey is meta1: // - ERROR // 2. If desc.EndKey is meta2: // - meta1(desc.EndKey) // 3. If desc.EndKey is normal user key: // - meta2(desc.EndKey) // 3a. If desc.StartKey is KeyMin or meta2: // - meta1(KeyMax) func rangeAddressing(b *client.Batch, desc *roachpb.RangeDescriptor, action metaAction) error { // 1. handle illegal case of start or end key being meta1. if bytes.HasPrefix(desc.EndKey, keys.Meta1Prefix) || bytes.HasPrefix(desc.StartKey, keys.Meta1Prefix) { return errors.Errorf("meta1 addressing records cannot be split: %+v", desc) } // Note that both cases 2 and 3 are handled by keys.RangeMetaKey. // // 2. the case of the range ending with a meta2 prefix. This means // the range is full of meta2. We must update the relevant meta1 // entry pointing to the end of this range. // // 3. the range ends with a normal user key, so we must update the // relevant meta2 entry pointing to the end of this range. action(b, keys.RangeMetaKey(desc.EndKey), desc) if !bytes.HasPrefix(desc.EndKey, keys.Meta2Prefix) { // 3a. the range starts with KeyMin or a meta2 addressing record, // update the meta1 entry for KeyMax. if bytes.Equal(desc.StartKey, roachpb.RKeyMin) || bytes.HasPrefix(desc.StartKey, keys.Meta2Prefix) { action(b, keys.Meta1KeyMax, desc) } } return nil }
func (db *testDescriptorDB) getRangeDescriptors(key proto.Key, options lookupOptions) ([]proto.RangeDescriptor, error) { db.lookupCount++ metadataKey := keys.RangeMetaKey(key) var err error // Recursively call into cache as the real DB would, terminating recursion // when a meta1key is encountered. if len(metadataKey) > 0 && !bytes.HasPrefix(metadataKey, keys.Meta1Prefix) { _, err = db.cache.LookupRangeDescriptor(metadataKey, options) } return db.getDescriptor(key), err }
// getCachedRangeDescriptorLocked is a helper function to retrieve the // descriptor of the range which contains the given key, if present in the // cache. It is assumed that the caller holds a read lock on rdc.rangeCacheMu. func (rdc *rangeDescriptorCache) getCachedRangeDescriptorLocked(key proto.Key, isReverse bool) ( rangeCacheKey, *proto.RangeDescriptor) { // The cache is indexed using the end-key of the range, but the // end-key is non-inclusive. var metaKey proto.Key if !isReverse { // If it is not reverse scan, we access the cache using key.Next(). metaKey = keys.RangeMetaKey(key.Next()) } else { // Because reverse scan request is begining at end key(exclusive),so we // access the cache using key directly. metaKey = keys.RangeMetaKey(key) } k, v, ok := rdc.rangeCache.Ceil(rangeCacheKey(metaKey)) if !ok { return nil, nil } metaEndKey := k.(rangeCacheKey) rd := v.(*proto.RangeDescriptor) // Check that key actually belongs to the range. if !rd.ContainsKey(keys.KeyAddress(key)) { // The key is the EndKey of the range in reverse scan, just return the range descriptor. if isReverse && key.Equal(rd.EndKey) { return metaEndKey, rd } return nil, nil } // The key is the StartKey of the range in reverse scan. We need to return the previous range // descriptor, but it is not in the cache yet. if isReverse && key.Equal(rd.StartKey) { return nil, nil } return metaEndKey, rd }
// LookupRange returns the descriptor of the range containing key. func (tc *TestCluster) LookupRange(key roachpb.Key) (roachpb.RangeDescriptor, error) { rangeLookupReq := roachpb.RangeLookupRequest{ Span: roachpb.Span{ Key: keys.RangeMetaKey(keys.MustAddr(key)), }, MaxRanges: 1, ConsiderIntents: false, } resp, pErr := client.SendWrapped(tc.Servers[0].GetDistSender(), nil, &rangeLookupReq) if pErr != nil { return roachpb.RangeDescriptor{}, errors.Errorf( "%q: lookup range unexpected error: %s", key, pErr) } return resp.(*roachpb.RangeLookupResponse).Ranges[0], nil }
func (c *cluster) lookupRange(nodeIdx int, key roachpb.Key) (*roachpb.RangeDescriptor, error) { req := &roachpb.RangeLookupRequest{ Span: roachpb.Span{ Key: keys.RangeMetaKey(keys.MustAddr(key)), }, MaxRanges: 1, ConsiderIntents: false, } sender := c.clients[nodeIdx].GetSender() resp, pErr := client.SendWrapped(sender, nil, req) if pErr != nil { return nil, errors.Errorf("%s: lookup range: %s", key, pErr) } return &resp.(*roachpb.RangeLookupResponse).Ranges[0], nil }
// LookupRangeDescriptor attempts to locate a descriptor for the range // containing the given Key. This is done by querying the two-level // lookup table of range descriptors which cockroach maintains. // // This method first looks up the specified key in the first level of // range metadata, which returns the location of the key within the // second level of range metadata. This second level location is then // queried to retrieve a descriptor for the range where the key's // value resides. Range descriptors retrieved during each search are // cached for subsequent lookups. // // This method returns the RangeDescriptor for the range containing // the key's data, or an error if any occurred. func (rdc *rangeDescriptorCache) LookupRangeDescriptor(key proto.Key, options lookupOptions) (*proto.RangeDescriptor, error) { if _, r := rdc.getCachedRangeDescriptor(key); r != nil { return r, nil } if log.V(1) { log.Infof("lookup range descriptor: key=%s", key) } else if log.V(2) { log.Infof("lookup range descriptor: key=%s\n%s", key, rdc) } rs, err := rdc.db.getRangeDescriptors(key, options) if err != nil { return nil, err } // TODO(tamird): there is a race here; multiple readers may experience cache // misses and concurrently attempt to refresh the cache, duplicating work. // Locking over the getRangeDescriptors call is even worse though, because // that blocks the cache completely for the duration of a slow query to the // cluster. rdc.rangeCacheMu.Lock() for i := range rs { // Note: we append the end key of each range to meta[12] records // so that calls to rdc.rangeCache.Ceil() for a key will return // the correct range. Using the start key would require using // Floor() which is a possibility for our llrb-based OrderedCache // but not possible for RocksDB. // Before adding a new descriptor, make sure we clear out any // pre-existing, overlapping descriptor which might have been // re-inserted due to concurrent range lookups. rangeKey := keys.RangeMetaKey(rs[i].EndKey) if log.V(1) { log.Infof("adding descriptor: key=%s desc=%s", rangeKey, &rs[i]) } rdc.clearOverlappingCachedRangeDescriptors(rs[i].EndKey, rangeKey, &rs[i]) rdc.rangeCache.Add(rangeCacheKey(rangeKey), &rs[i]) } if len(rs) == 0 { log.Fatalf("no range descriptors returned for %s", key) } rdc.rangeCacheMu.Unlock() return &rs[0], nil }
// getCachedRangeDescriptorLocked is a helper function to retrieve the // descriptor of the range which contains the given key, if present in the // cache. It is assumed that the caller holds a read lock on rdc.rangeCacheMu. func (rdc *rangeDescriptorCache) getCachedRangeDescriptorLocked(key proto.Key) ( rangeCacheKey, *proto.RangeDescriptor) { // The cache is indexed using the end-key of the range, but the // end-key is non-inclusive. If inclusive is false, we access the // cache using key.Next(). metaKey := keys.RangeMetaKey(key.Next()) k, v, ok := rdc.rangeCache.Ceil(rangeCacheKey(metaKey)) if !ok { return nil, nil } metaEndKey := k.(rangeCacheKey) rd := v.(*proto.RangeDescriptor) // Check that key actually belongs to range if !rd.ContainsKey(keys.KeyAddress(key)) { return nil, nil } return metaEndKey, rd }
// EvictCachedRangeDescriptor will evict any cached range descriptors // for the given key. It is intended that this method be called from a // consumer of rangeDescriptorCache if the returned range descriptor is // discovered to be stale. // seenDesc should always be passed in and is used as the basis of a // compare-and-evict (as pointers); if it is nil, eviction is unconditional // but a warning will be logged. func (rdc *rangeDescriptorCache) EvictCachedRangeDescriptor(descKey proto.Key, seenDesc *proto.RangeDescriptor, inclusive bool) { if seenDesc == nil { log.Warningf("compare-and-evict for key %s with nil descriptor; clearing unconditionally", descKey) } rdc.rangeCacheMu.Lock() defer rdc.rangeCacheMu.Unlock() rngKey, cachedDesc := rdc.getCachedRangeDescriptorLocked(descKey, inclusive) // Note that we're doing a "compare-and-erase": If seenDesc is not nil, // we want to clean the cache only if it equals the cached range // descriptor as a pointer. If not, then likely some other caller // already evicted previously, and we can save work by not doing it // again (which would prompt another expensive lookup). if seenDesc != nil && seenDesc != cachedDesc { return } for { if log.V(2) { log.Infof("evict cached descriptor: key=%s desc=%s\n%s", descKey, cachedDesc, rdc.stringLocked()) } else if log.V(1) { log.Infof("evict cached descriptor: key=%s desc=%s", descKey, cachedDesc) } rdc.rangeCache.Del(rngKey) // Retrieve the metadata range key for the next level of metadata, and // evict that key as well. This loop ends after the meta1 range, which // returns KeyMin as its metadata key. descKey = keys.RangeMetaKey(descKey) rngKey, cachedDesc = rdc.getCachedRangeDescriptorLocked(descKey, inclusive) // TODO(tschottdorf): write a test that verifies that the first descriptor // can also be evicted. This is necessary since the initial range // [KeyMin,KeyMax) may turn into [KeyMin, "something"), after which // larger ranges don't fit into it any more. if bytes.Equal(descKey, proto.KeyMin) { break } } }
func runLsRanges(cmd *cobra.Command, args []string) { if len(args) > 1 { mustUsage(cmd) return } var startKey roachpb.Key { k := roachpb.KeyMin.Next() if len(args) > 0 { k = roachpb.Key(args[0]) } rk, err := keys.Addr(k) if err != nil { panic(err) } startKey = keys.RangeMetaKey(rk) } endKey := keys.Meta2Prefix.PrefixEnd() kvDB, stopper := makeDBClient() defer stopper.Stop() rows, err := kvDB.Scan(startKey, endKey, maxResults) if err != nil { panicf("scan failed: %s\n", err) } for _, row := range rows { desc := &roachpb.RangeDescriptor{} if err := row.ValueProto(desc); err != nil { panicf("%s: unable to unmarshal range descriptor\n", row.Key) continue } fmt.Printf("%s-%s [%d]\n", desc.StartKey, desc.EndKey, desc.RangeID) for i, replica := range desc.Replicas { fmt.Printf("\t%d: node-id=%d store-id=%d\n", i, replica.NodeID, replica.StoreID) } } fmt.Printf("%d result(s)\n", len(rows)) }
// getRangeMetadata retrieves the current range descriptor for the target // range. func getRangeMetadata(key roachpb.RKey, mtc *multiTestContext, t *testing.T) roachpb.RangeDescriptor { // Calls to RangeLookup typically use inconsistent reads, but we // want to do a consistent read here. This is important when we are // considering one of the metadata ranges: we must not do an // inconsistent lookup in our own copy of the range. b := &client.Batch{} b.InternalAddRequest(&roachpb.RangeLookupRequest{ Span: roachpb.Span{ Key: keys.RangeMetaKey(key), }, MaxRanges: 1, }) var reply *roachpb.RangeLookupResponse if br, err := mtc.db.RunWithResponse(b); err != nil { t.Fatalf("error getting range metadata: %s", err) } else { reply = br.Responses[0].GetInner().(*roachpb.RangeLookupResponse) } if a, e := len(reply.Ranges), 1; a != e { t.Fatalf("expected %d range descriptor, got %d", e, a) } return reply.Ranges[0] }
// TestRangeCacheClearOverlapping verifies that existing, overlapping // cached entries are cleared when adding a new entry. func TestRangeCacheClearOverlapping(t *testing.T) { defer leaktest.AfterTest(t) defDesc := &proto.RangeDescriptor{ StartKey: proto.KeyMin, EndKey: proto.KeyMax, } cache := newRangeDescriptorCache(nil, 2<<10) cache.rangeCache.Add(rangeCacheKey(keys.RangeMetaKey(proto.KeyMax)), defDesc) // Now, add a new, overlapping set of descriptors. minToBDesc := &proto.RangeDescriptor{ StartKey: proto.KeyMin, EndKey: proto.Key("b"), } bToMaxDesc := &proto.RangeDescriptor{ StartKey: proto.Key("b"), EndKey: proto.KeyMax, } cache.clearOverlappingCachedRangeDescriptors(proto.Key("b"), keys.RangeMetaKey(proto.Key("b")), minToBDesc) cache.rangeCache.Add(rangeCacheKey(keys.RangeMetaKey(proto.Key("b"))), minToBDesc) if _, desc := cache.getCachedRangeDescriptor(proto.Key("b"), false); desc != nil { t.Errorf("descriptor unexpectedly non-nil: %s", desc) } cache.clearOverlappingCachedRangeDescriptors(proto.KeyMax, keys.RangeMetaKey(proto.KeyMax), bToMaxDesc) cache.rangeCache.Add(rangeCacheKey(keys.RangeMetaKey(proto.KeyMax)), bToMaxDesc) if _, desc := cache.getCachedRangeDescriptor(proto.Key("b"), false); desc != bToMaxDesc { t.Errorf("expected descriptor %s; got %s", bToMaxDesc, desc) } // Add default descriptor back which should remove two split descriptors. cache.clearOverlappingCachedRangeDescriptors(proto.KeyMax, keys.RangeMetaKey(proto.KeyMax), defDesc) cache.rangeCache.Add(rangeCacheKey(keys.RangeMetaKey(proto.KeyMax)), defDesc) for _, key := range []proto.Key{proto.Key("a"), proto.Key("b")} { if _, desc := cache.getCachedRangeDescriptor(key, false); desc != defDesc { t.Errorf("expected descriptor %s for key %s; got %s", defDesc, key, desc) } } }
// getRangeDescriptors returns a sorted slice of RangeDescriptors for a set of // consecutive ranges, the first of which must contain the requested key. The // additional RangeDescriptors are returned with the intent of pre-caching // subsequent ranges which are likely to be requested soon by the current // workload. func (ds *DistSender) getRangeDescriptors(key proto.Key, options lookupOptions) ([]proto.RangeDescriptor, error) { var ( // metadataKey is sent to internalRangeLookup to find the // RangeDescriptor which contains key. metadataKey = keys.RangeMetaKey(key) // desc is the RangeDescriptor for the range which contains // metadataKey. desc *proto.RangeDescriptor err error ) if bytes.Equal(metadataKey, proto.KeyMin) { // In this case, the requested key is stored in the cluster's first // range. Return the first range, which is always gossiped and not // queried from the datastore. rd, err := ds.getFirstRangeDescriptor() if err != nil { return nil, err } return []proto.RangeDescriptor{*rd}, nil } if bytes.HasPrefix(metadataKey, keys.Meta1Prefix) { // In this case, desc is the cluster's first range. if desc, err = ds.getFirstRangeDescriptor(); err != nil { return nil, err } } else { // Look up desc from the cache, which will recursively call into // ds.getRangeDescriptors if it is not cached. desc, err = ds.rangeCache.LookupRangeDescriptor(metadataKey, options) if err != nil { return nil, err } } return ds.internalRangeLookup(metadataKey, options, desc) }
// getRangeMetadata retrieves the current range descriptor for the target // range. func getRangeMetadata(key proto.Key, mtc *multiTestContext, t *testing.T) proto.RangeDescriptor { // Calls to RangeLookup typically use inconsistent reads, but we // want to do a consistent read here. This is important when we are // considering one of the metadata ranges: we must not do an // inconsistent lookup in our own copy of the range. reply := proto.RangeLookupResponse{} b := &client.Batch{} b.InternalAddCall(proto.Call{ Args: &proto.RangeLookupRequest{ RequestHeader: proto.RequestHeader{ Key: keys.RangeMetaKey(key), }, MaxRanges: 1, }, Reply: &reply, }) if err := mtc.db.Run(b).GoError(); err != nil { t.Fatalf("error getting range metadata: %s", err) } if a, e := len(reply.Ranges), 1; a != e { t.Fatalf("expected %d range descriptor, got %d", e, a) } return reply.Ranges[0] }
// process performs a consistent lookup on the range descriptor to see if we are // still a member of the range. func (q *rangeGCQueue) process(now proto.Timestamp, rng *Range) error { // Calls to InternalRangeLookup typically use inconsistent reads, but we // want to do a consistent read here. This is important when we are // considering one of the metadata ranges: we must not do an inconsistent // lookup in our own copy of the range. reply := proto.InternalRangeLookupResponse{} b := &client.Batch{} b.InternalAddCall(proto.Call{ Args: &proto.InternalRangeLookupRequest{ RequestHeader: proto.RequestHeader{ Key: keys.RangeMetaKey(rng.Desc().StartKey), }, MaxRanges: 1, }, Reply: &reply, }) if err := q.db.Run(b); err != nil { return err } if len(reply.Ranges) != 1 { return util.Errorf("expected 1 range descriptor, got %d", len(reply.Ranges)) } desc := reply.Ranges[0] currentMember := false if me := rng.GetReplica(); me != nil { for _, rep := range desc.Replicas { if rep.StoreID == me.StoreID { currentMember = true break } } } if !currentMember { // We are no longer a member of this range; clean up our local data. if log.V(1) { log.Infof("destroying local data from range %d", rng.Desc().RaftID) } if err := rng.rm.RemoveRange(rng); err != nil { return err } // TODO(bdarnell): update Destroy to leave tombstones for removed ranges (#768) // TODO(bdarnell): add some sort of locking to prevent the range // from being recreated while the underlying data is being destroyed. if err := rng.Destroy(); err != nil { return err } } else if desc.RaftID != rng.Desc().RaftID { // If we get a different raft ID back, then the range has been merged // away. But currentMember is true, so we are still a member of the // subsuming range. Shut down raft processing for the former range // and delete any remaining metadata, but do not delete the data. if log.V(1) { log.Infof("removing merged range %d", rng.Desc().RaftID) } if err := rng.rm.RemoveRange(rng); err != nil { return err } // TODO(bdarnell): remove raft logs and other metadata (while leaving a // tombstone). Add tests for GC of merged ranges. } return nil }
// TestRangeLookupUseReverse tests whether the results and the results count // are correct when scanning in reverse order. func TestRangeLookupUseReverse(t *testing.T) { defer leaktest.AfterTest(t)() sCtx := storage.TestStoreContext() sCtx.TestingKnobs.DisableSplitQueue = true store, stopper, _ := createTestStoreWithContext(t, sCtx) defer stopper.Stop() // Init test ranges: // ["","a"), ["a","c"), ["c","e"), ["e","g") and ["g","\xff\xff"). splits := []roachpb.AdminSplitRequest{ adminSplitArgs(roachpb.Key("g"), roachpb.Key("g")), adminSplitArgs(roachpb.Key("e"), roachpb.Key("e")), adminSplitArgs(roachpb.Key("c"), roachpb.Key("c")), adminSplitArgs(roachpb.Key("a"), roachpb.Key("a")), } for _, split := range splits { _, pErr := client.SendWrapped(rg1(store), nil, &split) if pErr != nil { t.Fatalf("%q: split unexpected error: %s", split.SplitKey, pErr) } } // Resolve the intents. scanArgs := roachpb.ScanRequest{ Span: roachpb.Span{ Key: keys.RangeMetaKey(roachpb.RKeyMin.Next()), EndKey: keys.RangeMetaKey(roachpb.RKeyMax), }, } util.SucceedsSoon(t, func() error { _, pErr := client.SendWrapped(rg1(store), nil, &scanArgs) return pErr.GoError() }) revScanArgs := func(key []byte, maxResults int32) *roachpb.RangeLookupRequest { return &roachpb.RangeLookupRequest{ Span: roachpb.Span{ Key: key, }, MaxRanges: maxResults, Reverse: true, } } // Test cases. testCases := []struct { request *roachpb.RangeLookupRequest expected []roachpb.RangeDescriptor expectedPre []roachpb.RangeDescriptor }{ // Test key in the middle of the range. { request: revScanArgs(keys.RangeMetaKey(roachpb.RKey("f")), 2), // ["e","g") and ["c","e"). expected: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("e"), EndKey: roachpb.RKey("g")}, }, expectedPre: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("e")}, }, }, // Test key in the end key of the range. { request: revScanArgs(keys.RangeMetaKey(roachpb.RKey("g")), 3), // ["e","g"), ["c","e") and ["a","c"). expected: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("e"), EndKey: roachpb.RKey("g")}, }, expectedPre: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("e")}, {StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("c")}, }, }, { request: revScanArgs(keys.RangeMetaKey(roachpb.RKey("e")), 2), // ["c","e") and ["a","c"). expected: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("e")}, }, expectedPre: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("c")}, }, }, // Test Meta2KeyMax. { request: revScanArgs(keys.Meta2KeyMax, 2), // ["e","g") and ["g","\xff\xff") expected: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("g"), EndKey: roachpb.RKey("\xff\xff")}, }, expectedPre: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("e"), EndKey: roachpb.RKey("g")}, }, }, // Test Meta1KeyMax. { request: revScanArgs(keys.Meta1KeyMax, 1), // ["","a") expected: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKeyMin, EndKey: roachpb.RKey("a")}, }, }, } for testIdx, test := range testCases { resp, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ ReadConsistency: roachpb.INCONSISTENT, }, test.request) if pErr != nil { t.Fatalf("%d: RangeLookup error: %s", testIdx, pErr) } rlReply := resp.(*roachpb.RangeLookupResponse) // Checks the results count. if int32(len(rlReply.Ranges))+int32(len(rlReply.PrefetchedRanges)) != test.request.MaxRanges { t.Fatalf("%d: returned results count, expected %d,but got %d", testIdx, test.request.MaxRanges, len(rlReply.Ranges)) } // Checks the range descriptors. for _, rngSlice := range []struct { expect, reply []roachpb.RangeDescriptor }{ {test.expected, rlReply.Ranges}, {test.expectedPre, rlReply.PrefetchedRanges}, } { for i, rng := range rngSlice.expect { if !(rng.StartKey.Equal(rngSlice.reply[i].StartKey) && rng.EndKey.Equal(rngSlice.reply[i].EndKey)) { t.Fatalf("%d: returned range is not correct, expected %v ,but got %v", testIdx, rng, rngSlice.reply[i]) } } } } }
func meta(k roachpb.RKey) roachpb.RKey { return keys.Addr(keys.RangeMetaKey(k)) }
// TestGetCachedRangeDescriptorInclusive verifies the correctness of the result // that is returned by getCachedRangeDescriptor with inclusive=true. func TestGetCachedRangeDescriptorInclusive(t *testing.T) { defer leaktest.AfterTest(t) testData := []*proto.RangeDescriptor{ {StartKey: proto.Key("a"), EndKey: proto.Key("c")}, {StartKey: proto.Key("c"), EndKey: proto.Key("e")}, {StartKey: proto.Key("g"), EndKey: proto.Key("z")}, } cache := newRangeDescriptorCache(nil, 2<<10) for _, rd := range testData { cache.rangeCache.Add(rangeCacheKey(keys.RangeMetaKey(rd.EndKey)), rd) } testCases := []struct { queryKey proto.Key cacheKey rangeCacheKey rng *proto.RangeDescriptor }{ { // Check range start key. queryKey: proto.Key("a"), cacheKey: nil, rng: nil, }, { // Check range end key. queryKey: proto.Key("c"), cacheKey: rangeCacheKey(keys.RangeMetaKey(proto.Key("c"))), rng: &proto.RangeDescriptor{StartKey: proto.Key("a"), EndKey: proto.Key("c")}, }, { // Check range middle key. queryKey: proto.Key("d"), cacheKey: rangeCacheKey(keys.RangeMetaKey(proto.Key("e"))), rng: &proto.RangeDescriptor{StartKey: proto.Key("c"), EndKey: proto.Key("e")}, }, { // Check miss range key. queryKey: proto.Key("f"), cacheKey: nil, rng: nil, }, { // Check range start key with previous range miss. queryKey: proto.Key("g"), cacheKey: nil, rng: nil, }, } for _, test := range testCases { cacheKey, targetRange := cache.getCachedRangeDescriptor(test.queryKey, true /* inclusive */) if !reflect.DeepEqual(targetRange, test.rng) { t.Fatalf("expect range %v, actual get %v", test.rng, targetRange) } if !reflect.DeepEqual(cacheKey, test.cacheKey) { t.Fatalf("expect cache key %v, actual get %v", test.cacheKey, cacheKey) } } }
// TestRangeCache is a simple test which verifies that metadata ranges // are being cached and retrieved properly. It sets up a fake backing // store for the cache, and measures how often that backing store is // lookuped when looking up metadata keys through the cache. func TestRangeCache(t *testing.T) { defer leaktest.AfterTest(t) db := newTestDescriptorDB() for i, char := range "abcdefghijklmnopqrstuvwx" { db.splitRange(t, proto.Key(string(char))) if i > 0 && i%6 == 0 { db.splitRange(t, keys.RangeMetaKey(proto.Key(string(char)))) } } db.cache = newRangeDescriptorCache(db, 2<<10) doLookup(t, db.cache, "aa") db.assertLookupCount(t, 2, "aa") // Descriptors for the following ranges should be cached. doLookup(t, db.cache, "ab") db.assertLookupCount(t, 0, "ab") doLookup(t, db.cache, "ba") db.assertLookupCount(t, 0, "ba") doLookup(t, db.cache, "cz") db.assertLookupCount(t, 0, "cz") // Metadata two ranges weren't cached, same metadata 1 range. doLookup(t, db.cache, "d") db.assertLookupCount(t, 1, "d") doLookup(t, db.cache, "fa") db.assertLookupCount(t, 0, "fa") // Metadata two ranges weren't cached, metadata 1 was aggressively cached doLookup(t, db.cache, "ij") db.assertLookupCount(t, 1, "ij") doLookup(t, db.cache, "jk") db.assertLookupCount(t, 0, "jk") doLookup(t, db.cache, "pn") db.assertLookupCount(t, 1, "pn") // Totally uncached ranges doLookup(t, db.cache, "vu") db.assertLookupCount(t, 2, "vu") doLookup(t, db.cache, "xx") db.assertLookupCount(t, 0, "xx") // Evict clears one level 1 and one level 2 cache db.cache.EvictCachedRangeDescriptor(proto.Key("da"), nil, false) doLookup(t, db.cache, "fa") db.assertLookupCount(t, 0, "fa") doLookup(t, db.cache, "da") db.assertLookupCount(t, 2, "da") // Looking up a descriptor that lands on an end-key should work // without a cache miss. doLookup(t, db.cache, "a") db.assertLookupCount(t, 0, "a") // Attempt to compare-and-evict with a descriptor that is not equal to the // cached one; it should not alter the cache. db.cache.EvictCachedRangeDescriptor(proto.Key("cz"), &proto.RangeDescriptor{}, false) doLookup(t, db.cache, "cz") db.assertLookupCount(t, 0, "cz") // Now evict with the actual descriptor. The cache should clear the // descriptor and the cached meta key. db.cache.EvictCachedRangeDescriptor(proto.Key("cz"), doLookup(t, db.cache, "cz"), false) doLookup(t, db.cache, "cz") db.assertLookupCount(t, 2, "cz") }
// TestRangeCacheClearOverlapping verifies that existing, overlapping // cached entries are cleared when adding a new entry. func TestRangeCacheClearOverlapping(t *testing.T) { defer leaktest.AfterTest(t)() defDesc := &roachpb.RangeDescriptor{ StartKey: roachpb.RKeyMin, EndKey: roachpb.RKeyMax, } cache := newRangeDescriptorCache(nil, 2<<10) cache.rangeCache.cache.Add(rangeCacheKey(keys.RangeMetaKey(roachpb.RKeyMax)), defDesc) // Now, add a new, overlapping set of descriptors. minToBDesc := &roachpb.RangeDescriptor{ StartKey: roachpb.RKeyMin, EndKey: roachpb.RKey("b"), } bToMaxDesc := &roachpb.RangeDescriptor{ StartKey: roachpb.RKey("b"), EndKey: roachpb.RKeyMax, } if err := cache.clearOverlappingCachedRangeDescriptors(minToBDesc); err != nil { t.Fatal(err) } cache.rangeCache.cache.Add(rangeCacheKey(mustMeta(roachpb.RKey("b"))), minToBDesc) if _, desc, err := cache.getCachedRangeDescriptor(roachpb.RKey("b"), false); err != nil { t.Fatal(err) } else if desc != nil { t.Errorf("descriptor unexpectedly non-nil: %s", desc) } if err := cache.clearOverlappingCachedRangeDescriptors(bToMaxDesc); err != nil { t.Fatal(err) } cache.rangeCache.cache.Add(rangeCacheKey(mustMeta(roachpb.RKeyMax)), bToMaxDesc) if _, desc, err := cache.getCachedRangeDescriptor(roachpb.RKey("b"), false); err != nil { t.Fatal(err) } else if desc != bToMaxDesc { t.Errorf("expected descriptor %s; got %s", bToMaxDesc, desc) } // Add default descriptor back which should remove two split descriptors. if err := cache.clearOverlappingCachedRangeDescriptors(defDesc); err != nil { t.Fatal(err) } cache.rangeCache.cache.Add(rangeCacheKey(keys.RangeMetaKey(roachpb.RKeyMax)), defDesc) for _, key := range []roachpb.RKey{roachpb.RKey("a"), roachpb.RKey("b")} { if _, desc, err := cache.getCachedRangeDescriptor(key, false); err != nil { t.Fatal(err) } else if desc != defDesc { t.Errorf("expected descriptor %s for key %s; got %s", defDesc, key, desc) } } // Insert ["b", "c") and then insert ["a", b"). Verify that the former is not evicted by the latter. bToCDesc := &roachpb.RangeDescriptor{ StartKey: roachpb.RKey("b"), EndKey: roachpb.RKey("c"), } if err := cache.clearOverlappingCachedRangeDescriptors(bToCDesc); err != nil { t.Fatal(err) } cache.rangeCache.cache.Add(rangeCacheKey(mustMeta(roachpb.RKey("c"))), bToCDesc) if _, desc, err := cache.getCachedRangeDescriptor(roachpb.RKey("c"), true); err != nil { t.Fatal(err) } else if desc != bToCDesc { t.Errorf("expected descriptor %s; got %s", bToCDesc, desc) } aToBDesc := &roachpb.RangeDescriptor{ StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("b"), } if err := cache.clearOverlappingCachedRangeDescriptors(aToBDesc); err != nil { t.Fatal(err) } cache.rangeCache.cache.Add(rangeCacheKey(mustMeta(roachpb.RKey("b"))), aToBDesc) if _, desc, err := cache.getCachedRangeDescriptor(roachpb.RKey("c"), true); err != nil { t.Fatal(err) } else if desc != bToCDesc { t.Errorf("expected descriptor %s; got %s", bToCDesc, desc) } }
// process performs a consistent lookup on the range descriptor to see if we are // still a member of the range. func (q *replicaGCQueue) process(now roachpb.Timestamp, rng *Replica, _ config.SystemConfig) error { // Note that the Replicas field of desc is probably out of date, so // we should only use `desc` for its static fields like RangeID and // StartKey (and avoid rng.GetReplica() for the same reason). desc := rng.Desc() // Calls to RangeLookup typically use inconsistent reads, but we // want to do a consistent read here. This is important when we are // considering one of the metadata ranges: we must not do an // inconsistent lookup in our own copy of the range. b := &client.Batch{} b.InternalAddRequest(&roachpb.RangeLookupRequest{ Span: roachpb.Span{ Key: keys.RangeMetaKey(desc.StartKey), }, MaxRanges: 1, }) br, err := q.db.RunWithResponse(b) if err != nil { return err } reply := br.Responses[0].GetInner().(*roachpb.RangeLookupResponse) if len(reply.Ranges) != 1 { return util.Errorf("expected 1 range descriptor, got %d", len(reply.Ranges)) } replyDesc := reply.Ranges[0] currentMember := false storeID := rng.store.StoreID() for _, rep := range replyDesc.Replicas { if rep.StoreID == storeID { currentMember = true break } } if !currentMember { // We are no longer a member of this range; clean up our local data. if log.V(1) { log.Infof("destroying local data from range %d", desc.RangeID) } if err := rng.store.RemoveReplica(rng, replyDesc, true); err != nil { return err } } else if desc.RangeID != replyDesc.RangeID { // If we get a different range ID back, then the range has been merged // away. But currentMember is true, so we are still a member of the // subsuming range. Shut down raft processing for the former range // and delete any remaining metadata, but do not delete the data. if log.V(1) { log.Infof("removing merged range %d", desc.RangeID) } if err := rng.store.RemoveReplica(rng, replyDesc, false); err != nil { return err } // TODO(bdarnell): remove raft logs and other metadata (while leaving a // tombstone). Add tests for GC of merged ranges. } else { // This range is a current member of the raft group. Set the last replica // GC check time to avoid re-processing for another check interval. if err := rng.setLastReplicaGCTimestamp(now); err != nil { return err } } return nil }