func metaKey(key roachpb.RKey) []byte { rk, err := keys.Addr(keys.RangeMetaKey(key)) if err != nil { panic(err) } return rk }
// rangeAddressing updates or deletes the range addressing metadata // for the range specified by desc. The action to take is specified by // the supplied metaAction function. // // The rules for meta1 and meta2 records are as follows: // // 1. If desc.StartKey or desc.EndKey is meta1: // - ERROR // 2. If desc.EndKey is meta2: // - meta1(desc.EndKey) // 3. If desc.EndKey is normal user key: // - meta2(desc.EndKey) // 3a. If desc.StartKey is KeyMin or meta2: // - meta1(KeyMax) func rangeAddressing(b *client.Batch, desc *roachpb.RangeDescriptor, action metaAction) error { // 1. handle illegal case of start or end key being meta1. if bytes.HasPrefix(desc.EndKey, keys.Meta1Prefix) || bytes.HasPrefix(desc.StartKey, keys.Meta1Prefix) { return errors.Errorf("meta1 addressing records cannot be split: %+v", desc) } // Note that both cases 2 and 3 are handled by keys.RangeMetaKey. // // 2. the case of the range ending with a meta2 prefix. This means // the range is full of meta2. We must update the relevant meta1 // entry pointing to the end of this range. // // 3. the range ends with a normal user key, so we must update the // relevant meta2 entry pointing to the end of this range. action(b, keys.RangeMetaKey(desc.EndKey), desc) if !bytes.HasPrefix(desc.EndKey, keys.Meta2Prefix) { // 3a. the range starts with KeyMin or a meta2 addressing record, // update the meta1 entry for KeyMax. if bytes.Equal(desc.StartKey, roachpb.RKeyMin) || bytes.HasPrefix(desc.StartKey, keys.Meta2Prefix) { action(b, keys.Meta1KeyMax, desc) } } return nil }
// LookupRange returns the descriptor of the range containing key. func (ts *TestServer) LookupRange(key roachpb.Key) (roachpb.RangeDescriptor, error) { rangeLookupReq := roachpb.RangeLookupRequest{ Span: roachpb.Span{ Key: keys.RangeMetaKey(keys.MustAddr(key)), }, MaxRanges: 1, } resp, pErr := client.SendWrapped(context.Background(), ts.DistSender(), &rangeLookupReq) if pErr != nil { return roachpb.RangeDescriptor{}, errors.Errorf( "%q: lookup range unexpected error: %s", key, pErr) } return resp.(*roachpb.RangeLookupResponse).Ranges[0], nil }
func (c *Cluster) lookupRange(nodeIdx int, key roachpb.Key) (*roachpb.RangeDescriptor, error) { req := &roachpb.RangeLookupRequest{ Span: roachpb.Span{ Key: keys.RangeMetaKey(keys.MustAddr(key)), }, MaxRanges: 1, } sender := c.Clients[nodeIdx].GetSender() resp, pErr := client.SendWrapped(context.Background(), sender, req) if pErr != nil { return nil, errors.Errorf("%s: lookup range: %s", key, pErr) } return &resp.(*roachpb.RangeLookupResponse).Ranges[0], nil }
func runLsRanges(cmd *cobra.Command, args []string) error { if len(args) > 1 { return usageAndError(cmd) } var startKey roachpb.Key { k := roachpb.KeyMin.Next() if len(args) > 0 { k = roachpb.Key(args[0]) } rk, err := keys.Addr(k) if err != nil { panic(err) } startKey = keys.RangeMetaKey(rk) } endKey := keys.Meta2Prefix.PrefixEnd() kvDB, stopper, err := MakeDBClient() if err != nil { return err } defer stopper.Stop() rows, err := kvDB.Scan(context.Background(), startKey, endKey, maxResults) if err != nil { return err } for _, row := range rows { desc := &roachpb.RangeDescriptor{} if err := row.ValueProto(desc); err != nil { return errors.Wrapf(err, "unable to unmarshal range descriptor at %s", row.Key) } fmt.Printf("%s-%s [%d]\n", desc.StartKey, desc.EndKey, desc.RangeID) for i, replica := range desc.Replicas { fmt.Printf("\t%d: node-id=%d store-id=%d\n", i, replica.NodeID, replica.StoreID) } } fmt.Printf("%d result(s)\n", len(rows)) return nil }
// TableStats is an endpoint that returns columns, indices, and other // relevant details for the specified table. func (s *adminServer) TableStats( ctx context.Context, req *serverpb.TableStatsRequest, ) (*serverpb.TableStatsResponse, error) { escDBName := parser.Name(req.Database).String() if err := s.assertNotVirtualSchema(escDBName); err != nil { return nil, err } // Get table span. var tableSpan roachpb.Span iexecutor := sql.InternalExecutor{LeaseManager: s.server.leaseMgr} if err := s.server.db.Txn(ctx, func(txn *client.Txn) error { var err error tableSpan, err = iexecutor.GetTableSpan(s.getUser(req), txn, req.Database, req.Table) return err }); err != nil { return nil, s.serverError(err) } startKey, err := keys.Addr(tableSpan.Key) if err != nil { return nil, s.serverError(err) } endKey, err := keys.Addr(tableSpan.EndKey) if err != nil { return nil, s.serverError(err) } // Get current range descriptors for table. This is done by scanning over // meta2 keys for the range. rangeDescKVs, err := s.server.db.Scan(ctx, keys.RangeMetaKey(startKey), keys.RangeMetaKey(endKey), 0) if err != nil { return nil, s.serverError(err) } // Extract a list of node IDs from the response. nodeIDs := make(map[roachpb.NodeID]struct{}) for _, kv := range rangeDescKVs { var rng roachpb.RangeDescriptor if err := kv.Value.GetProto(&rng); err != nil { return nil, s.serverError(err) } for _, repl := range rng.Replicas { nodeIDs[repl.NodeID] = struct{}{} } } // Construct TableStatsResponse by sending an RPC to every node involved. tableStatResponse := serverpb.TableStatsResponse{ NodeCount: int64(len(nodeIDs)), // TODO(mrtracy): The "RangeCount" returned by TableStats is more // accurate than the "RangeCount" returned by TableDetails, because this // method always consistently queries the meta2 key range for the table; // in contrast, TableDetails uses a method on the DistSender, which // queries using a range metadata cache and thus may return stale data // for tables that are rapidly splitting. However, one potential // *advantage* of using the DistSender is that it will populate the // DistSender's range metadata cache in the case where meta2 information // for this table is not already present; the query used by TableStats // does not populate the DistSender cache. We should consider plumbing // TableStats' meta2 query through the DistSender so that it will share // the advantage of populating the cache (without the disadvantage of // potentially returning stale data). // See Github #5435 for some discussion. RangeCount: int64(len(rangeDescKVs)), } type nodeResponse struct { nodeID roachpb.NodeID resp *serverpb.SpanStatsResponse err error } // Send a SpanStats query to each node. Set a timeout on the context for // these queries. responses := make(chan nodeResponse) nodeCtx, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() for nodeID := range nodeIDs { nodeID := nodeID if err := s.server.stopper.RunAsyncTask(nodeCtx, func(ctx context.Context) { var spanResponse *serverpb.SpanStatsResponse client, err := s.server.status.dialNode(nodeID) if err == nil { req := serverpb.SpanStatsRequest{ StartKey: startKey, EndKey: endKey, NodeID: nodeID.String(), } spanResponse, err = client.SpanStats(ctx, &req) } response := nodeResponse{ nodeID: nodeID, resp: spanResponse, err: err, } select { case responses <- response: // Response processed. case <-ctx.Done(): // Context completed, response no longer needed. } }); err != nil { return nil, err } } for remainingResponses := len(nodeIDs); remainingResponses > 0; remainingResponses-- { select { case resp := <-responses: // For nodes which returned an error, note that the node's data // is missing. For successful calls, aggregate statistics. if resp.err != nil { tableStatResponse.MissingNodes = append( tableStatResponse.MissingNodes, serverpb.TableStatsResponse_MissingNode{ NodeID: resp.nodeID.String(), ErrorMessage: resp.err.Error(), }, ) } else { tableStatResponse.Stats.Add(resp.resp.TotalStats) tableStatResponse.ReplicaCount += int64(resp.resp.RangeCount) } case <-ctx.Done(): return nil, ctx.Err() } } return &tableStatResponse, nil }
// TableStats is an endpoint that returns columns, indices, and other // relevant details for the specified table. func (s *adminServer) TableStats( ctx context.Context, req *serverpb.TableStatsRequest, ) (*serverpb.TableStatsResponse, error) { // Get table span. var tableSpan roachpb.Span iexecutor := sql.InternalExecutor{LeaseManager: s.server.leaseMgr} if err := s.server.db.Txn(ctx, func(txn *client.Txn) error { var err error tableSpan, err = iexecutor.GetTableSpan(s.getUser(req), txn, req.Database, req.Table) return err }); err != nil { return nil, s.serverError(err) } startKey, err := keys.Addr(tableSpan.Key) if err != nil { return nil, s.serverError(err) } endKey, err := keys.Addr(tableSpan.EndKey) if err != nil { return nil, s.serverError(err) } // Get current range descriptors for table. This is done by scanning over // meta2 keys for the range. rangeDescKVs, err := s.server.db.Scan(ctx, keys.RangeMetaKey(startKey), keys.RangeMetaKey(endKey), 0) if err != nil { return nil, s.serverError(err) } // Extract a list of node IDs from the response. nodeIDs := make(map[roachpb.NodeID]struct{}) for _, kv := range rangeDescKVs { var rng roachpb.RangeDescriptor if err := kv.Value.GetProto(&rng); err != nil { return nil, s.serverError(err) } for _, repl := range rng.Replicas { nodeIDs[repl.NodeID] = struct{}{} } } // Construct TableStatsResponse by sending an RPC to every node involved. tableStatResponse := serverpb.TableStatsResponse{ NodeCount: int64(len(nodeIDs)), RangeCount: int64(len(rangeDescKVs)), } type nodeResponse struct { nodeID roachpb.NodeID resp *serverpb.SpanStatsResponse err error } // Send a SpanStats query to each node. Set a timeout on the context for // these queries. responses := make(chan nodeResponse) nodeCtx, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() for nodeID := range nodeIDs { nodeID := nodeID if err := s.server.stopper.RunAsyncTask(nodeCtx, func(ctx context.Context) { var spanResponse *serverpb.SpanStatsResponse client, err := s.server.status.dialNode(nodeID) if err == nil { req := serverpb.SpanStatsRequest{ StartKey: startKey, EndKey: endKey, NodeID: nodeID.String(), } spanResponse, err = client.SpanStats(ctx, &req) } response := nodeResponse{ nodeID: nodeID, resp: spanResponse, err: err, } select { case responses <- response: // Response processed. case <-ctx.Done(): // Context completed, response no longer needed. } }); err != nil { return nil, err } } for remainingResponses := len(nodeIDs); remainingResponses > 0; remainingResponses-- { select { case resp := <-responses: // For nodes which returned an error, note that the node's data // is missing. For successful calls, aggregate statistics. if resp.err != nil { tableStatResponse.MissingNodes = append( tableStatResponse.MissingNodes, serverpb.TableStatsResponse_MissingNode{ NodeID: resp.nodeID.String(), ErrorMessage: resp.err.Error(), }, ) } else { tableStatResponse.Stats.Add(resp.resp.TotalStats) tableStatResponse.ReplicaCount += int64(resp.resp.RangeCount) } case <-ctx.Done(): return nil, ctx.Err() } } return &tableStatResponse, nil }
// process performs a consistent lookup on the range descriptor to see if we are // still a member of the range. func (rgcq *replicaGCQueue) process( ctx context.Context, repl *Replica, _ config.SystemConfig, ) error { // Note that the Replicas field of desc is probably out of date, so // we should only use `desc` for its static fields like RangeID and // StartKey (and avoid rng.GetReplica() for the same reason). desc := repl.Desc() // Calls to RangeLookup typically use inconsistent reads, but we // want to do a consistent read here. This is important when we are // considering one of the metadata ranges: we must not do an // inconsistent lookup in our own copy of the range. b := &client.Batch{} b.AddRawRequest(&roachpb.RangeLookupRequest{ Span: roachpb.Span{ Key: keys.RangeMetaKey(desc.StartKey), }, MaxRanges: 1, }) if err := rgcq.db.Run(ctx, b); err != nil { return err } br := b.RawResponse() reply := br.Responses[0].GetInner().(*roachpb.RangeLookupResponse) if len(reply.Ranges) != 1 { return errors.Errorf("expected 1 range descriptor, got %d", len(reply.Ranges)) } replyDesc := reply.Ranges[0] if _, currentMember := replyDesc.GetReplicaDescriptor(repl.store.StoreID()); !currentMember { // We are no longer a member of this range; clean up our local data. rgcq.metrics.RemoveReplicaCount.Inc(1) log.VEventf(ctx, 1, "destroying local data") if err := repl.store.RemoveReplica(ctx, repl, replyDesc, true); err != nil { return err } } else if desc.RangeID != replyDesc.RangeID { // If we get a different range ID back, then the range has been merged // away. But currentMember is true, so we are still a member of the // subsuming range. Shut down raft processing for the former range // and delete any remaining metadata, but do not delete the data. rgcq.metrics.RemoveReplicaCount.Inc(1) log.VEventf(ctx, 1, "removing merged range") if err := repl.store.RemoveReplica(ctx, repl, replyDesc, false); err != nil { return err } // TODO(bdarnell): remove raft logs and other metadata (while leaving a // tombstone). Add tests for GC of merged ranges. } else { // This replica is a current member of the raft group. Set the last replica // GC check time to avoid re-processing for another check interval. // // TODO(tschottdorf): should keep stats in particular on this outcome // but also on how good a job the queue does at inspecting every // Replica (see #8111) when inactive ones can be starved by // event-driven additions. log.Event(ctx, "not gc'able") if err := repl.setLastReplicaGCTimestamp(ctx, repl.store.Clock().Now()); err != nil { return err } } return nil }
// TestRangeLookupUseReverse tests whether the results and the results count // are correct when scanning in reverse order. func TestRangeLookupUseReverse(t *testing.T) { defer leaktest.AfterTest(t)() storeCfg := storage.TestStoreConfig(nil) storeCfg.TestingKnobs.DisableSplitQueue = true store, stopper := createTestStoreWithConfig(t, storeCfg) defer stopper.Stop() // Init test ranges: // ["","a"), ["a","c"), ["c","e"), ["e","g") and ["g","\xff\xff"). splits := []roachpb.AdminSplitRequest{ adminSplitArgs(roachpb.Key("g"), roachpb.Key("g")), adminSplitArgs(roachpb.Key("e"), roachpb.Key("e")), adminSplitArgs(roachpb.Key("c"), roachpb.Key("c")), adminSplitArgs(roachpb.Key("a"), roachpb.Key("a")), } for _, split := range splits { _, pErr := client.SendWrapped(context.Background(), rg1(store), &split) if pErr != nil { t.Fatalf("%q: split unexpected error: %s", split.SplitKey, pErr) } } // Resolve the intents. scanArgs := roachpb.ScanRequest{ Span: roachpb.Span{ Key: keys.RangeMetaKey(roachpb.RKeyMin.Next()), EndKey: keys.RangeMetaKey(roachpb.RKeyMax), }, } util.SucceedsSoon(t, func() error { _, pErr := client.SendWrapped(context.Background(), rg1(store), &scanArgs) return pErr.GoError() }) revScanArgs := func(key []byte, maxResults int32) *roachpb.RangeLookupRequest { return &roachpb.RangeLookupRequest{ Span: roachpb.Span{ Key: key, }, MaxRanges: maxResults, Reverse: true, } } // Test cases. testCases := []struct { request *roachpb.RangeLookupRequest expected []roachpb.RangeDescriptor expectedPre []roachpb.RangeDescriptor }{ // Test key in the middle of the range. { request: revScanArgs(keys.RangeMetaKey(roachpb.RKey("f")), 2), // ["e","g") and ["c","e"). expected: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("e"), EndKey: roachpb.RKey("g")}, }, expectedPre: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("e")}, }, }, // Test key in the end key of the range. { request: revScanArgs(keys.RangeMetaKey(roachpb.RKey("g")), 3), // ["e","g"), ["c","e") and ["a","c"). expected: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("e"), EndKey: roachpb.RKey("g")}, }, expectedPre: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("e")}, {StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("c")}, }, }, { request: revScanArgs(keys.RangeMetaKey(roachpb.RKey("e")), 2), // ["c","e") and ["a","c"). expected: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("e")}, }, expectedPre: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("c")}, }, }, // Test Meta2KeyMax. { request: revScanArgs(keys.Meta2KeyMax, 2), // ["e","g") and ["g","\xff\xff") expected: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("g"), EndKey: roachpb.RKey("\xff\xff")}, }, expectedPre: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("e"), EndKey: roachpb.RKey("g")}, }, }, // Test Meta1KeyMax. { request: revScanArgs(keys.Meta1KeyMax, 1), // ["","a") expected: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKeyMin, EndKey: roachpb.RKey("a")}, }, }, } for testIdx, test := range testCases { resp, pErr := client.SendWrappedWith(context.Background(), rg1(store), roachpb.Header{ ReadConsistency: roachpb.INCONSISTENT, }, test.request) if pErr != nil { t.Fatalf("%d: RangeLookup error: %s", testIdx, pErr) } rlReply := resp.(*roachpb.RangeLookupResponse) // Checks the results count. if rsCount, preRSCount := len(rlReply.Ranges), len(rlReply.PrefetchedRanges); int32(rsCount+preRSCount) != test.request.MaxRanges { t.Fatalf("%d: returned results count, expected %d, but got %d+%d", testIdx, test.request.MaxRanges, rsCount, preRSCount) } // Checks the range descriptors. for _, rngSlice := range []struct { expect, reply []roachpb.RangeDescriptor }{ {test.expected, rlReply.Ranges}, {test.expectedPre, rlReply.PrefetchedRanges}, } { for i, rng := range rngSlice.expect { if !(rng.StartKey.Equal(rngSlice.reply[i].StartKey) && rng.EndKey.Equal(rngSlice.reply[i].EndKey)) { t.Fatalf("%d: returned range is not correct, expected %v, but got %v", testIdx, rng, rngSlice.reply[i]) } } } } }
func meta(k roachpb.RKey) (roachpb.RKey, error) { return keys.Addr(keys.RangeMetaKey(k)) }