// TestStoreRangeMergeTwoEmptyRanges tries to merge two empty ranges together. func TestStoreRangeMergeTwoEmptyRanges(t *testing.T) { defer leaktest.AfterTest(t)() sCtx := storage.TestStoreContext() sCtx.TestingKnobs.DisableSplitQueue = true store, stopper, _ := createTestStoreWithContext(t, sCtx) defer stopper.Stop() if _, _, err := createSplitRanges(store); err != nil { t.Fatal(err) } // Merge the b range back into the a range. args := adminMergeArgs(roachpb.KeyMin) _, err := client.SendWrapped(rg1(store), nil, &args) if err != nil { t.Fatal(err) } // Verify the merge by looking up keys from both ranges. replicaA := store.LookupReplica([]byte("a"), nil) replicaB := store.LookupReplica([]byte("c"), nil) if !reflect.DeepEqual(replicaA, replicaB) { t.Fatalf("ranges were not merged %s!=%s", replicaA, replicaB) } }
// TestLeaderAfterSplit verifies that a raft group created by a split // elects a leader without waiting for an election timeout. func TestLeaderAfterSplit(t *testing.T) { defer leaktest.AfterTest(t)() storeContext := storage.TestStoreContext() storeContext.RaftElectionTimeoutTicks = 1000000 mtc := &multiTestContext{ storeContext: &storeContext, } mtc.Start(t, 3) defer mtc.Stop() mtc.replicateRange(1, 1, 2) leftKey := roachpb.Key("a") splitKey := roachpb.Key("m") rightKey := roachpb.Key("z") splitArgs := adminSplitArgs(roachpb.KeyMin, splitKey) if _, pErr := client.SendWrapped(mtc.distSenders[0], nil, &splitArgs); pErr != nil { t.Fatal(pErr) } incArgs := incrementArgs(leftKey, 1) if _, pErr := client.SendWrapped(mtc.distSenders[0], nil, &incArgs); pErr != nil { t.Fatal(pErr) } incArgs = incrementArgs(rightKey, 2) if _, pErr := client.SendWrapped(mtc.distSenders[0], nil, &incArgs); pErr != nil { t.Fatal(pErr) } }
// TestStoreRangeMergeStats starts by splitting a range, then writing random data // to both sides of the split. It then merges the ranges and verifies the merged // range has stats consistent with recomputations. func TestStoreRangeMergeStats(t *testing.T) { defer leaktest.AfterTest(t)() sCtx := storage.TestStoreContext() sCtx.TestingKnobs.DisableSplitQueue = true store, stopper, manual := createTestStoreWithContext(t, sCtx) defer stopper.Stop() // Split the range. aDesc, bDesc, err := createSplitRanges(store) if err != nil { t.Fatal(err) } // Write some values left and right of the proposed split key. writeRandomDataToRange(t, store, aDesc.RangeID, []byte("aaa")) writeRandomDataToRange(t, store, bDesc.RangeID, []byte("ccc")) // Get the range stats for both ranges now that we have data. var msA, msB enginepb.MVCCStats snap := store.Engine().NewSnapshot() defer snap.Close() if err := engine.MVCCGetRangeStats(context.Background(), snap, aDesc.RangeID, &msA); err != nil { t.Fatal(err) } if err := engine.MVCCGetRangeStats(context.Background(), snap, bDesc.RangeID, &msB); err != nil { t.Fatal(err) } // Stats should agree with recomputation. if err := verifyRecomputedStats(snap, aDesc, msA, manual.UnixNano()); err != nil { t.Fatalf("failed to verify range A's stats before split: %v", err) } if err := verifyRecomputedStats(snap, bDesc, msB, manual.UnixNano()); err != nil { t.Fatalf("failed to verify range B's stats before split: %v", err) } manual.Increment(100) // Merge the b range back into the a range. args := adminMergeArgs(roachpb.KeyMin) if _, err := client.SendWrapped(rg1(store), nil, &args); err != nil { t.Fatal(err) } rngMerged := store.LookupReplica(aDesc.StartKey, nil) // Get the range stats for the merged range and verify. snap = store.Engine().NewSnapshot() defer snap.Close() var msMerged enginepb.MVCCStats if err := engine.MVCCGetRangeStats(context.Background(), snap, rngMerged.RangeID, &msMerged); err != nil { t.Fatal(err) } // Merged stats should agree with recomputation. if err := verifyRecomputedStats(snap, rngMerged.Desc(), msMerged, manual.UnixNano()); err != nil { t.Errorf("failed to verify range's stats after merge: %v", err) } }
// TestReplicaGCQueueDropReplica verifies that a removed replica is // immediately cleaned up. func TestReplicaGCQueueDropReplicaDirect(t *testing.T) { defer leaktest.AfterTest(t)() mtc := &multiTestContext{} const numStores = 3 rangeID := roachpb.RangeID(1) // In this test, the Replica on the second Node is removed, and the test // verifies that that Node adds this Replica to its RangeGCQueue. However, // the queue does a consistent lookup which will usually be read from // Node 1. Hence, if Node 1 hasn't processed the removal when Node 2 has, // no GC will take place since the consistent RangeLookup hits the first // Node. We use the TestingCommandFilter to make sure that the second Node // waits for the first. ctx := storage.TestStoreContext() mtc.storeContext = &ctx mtc.storeContext.TestingKnobs.TestingCommandFilter = func(filterArgs storageutils.FilterArgs) error { et, ok := filterArgs.Req.(*roachpb.EndTransactionRequest) if !ok || filterArgs.Sid != 2 { return nil } rct := et.InternalCommitTrigger.GetChangeReplicasTrigger() if rct == nil || rct.ChangeType != roachpb.REMOVE_REPLICA { return nil } util.SucceedsSoon(t, func() error { r, err := mtc.stores[0].GetReplica(rangeID) if err != nil { return err } if i, _ := r.Desc().FindReplica(2); i >= 0 { return errors.New("expected second node gone from first node's known replicas") } return nil }) return nil } mtc.Start(t, numStores) defer mtc.Stop() mtc.replicateRange(rangeID, 1, 2) mtc.unreplicateRange(rangeID, 1) // Make sure the range is removed from the store. util.SucceedsSoon(t, func() error { if _, err := mtc.stores[1].GetReplica(rangeID); !testutils.IsError(err, "range .* was not found") { return util.Errorf("expected range removal") } return nil }) }
// TestStoreRangeMergeLastRange verifies that merging the last range // fails. func TestStoreRangeMergeLastRange(t *testing.T) { defer leaktest.AfterTest(t)() sCtx := storage.TestStoreContext() sCtx.TestingKnobs.DisableSplitQueue = true store, stopper, _ := createTestStoreWithContext(t, sCtx) defer stopper.Stop() // Merge last range. args := adminMergeArgs(roachpb.KeyMin) if _, pErr := client.SendWrapped(rg1(store), nil, &args); !testutils.IsPError(pErr, "cannot merge final range") { t.Fatalf("expected 'cannot merge final range' error; got %s", pErr) } }
// TestStoreRangeSplitInsideRow verifies an attempt to split a range inside of // a table row will cause a split at a boundary between rows. func TestStoreRangeSplitInsideRow(t *testing.T) { defer leaktest.AfterTest(t)() sCtx := storage.TestStoreContext() sCtx.TestingKnobs.DisableSplitQueue = true store, stopper, _ := createTestStoreWithContext(t, sCtx) defer stopper.Stop() // Manually create some the column keys corresponding to the table: // // CREATE TABLE t (id STRING PRIMARY KEY, col1 INT, col2 INT) tableKey := keys.MakeTablePrefix(keys.MaxReservedDescID + 1) rowKey := roachpb.Key(encoding.EncodeVarintAscending(append([]byte(nil), tableKey...), 1)) rowKey = encoding.EncodeStringAscending(encoding.EncodeVarintAscending(rowKey, 1), "a") col1Key := keys.MakeFamilyKey(append([]byte(nil), rowKey...), 1) col2Key := keys.MakeFamilyKey(append([]byte(nil), rowKey...), 2) // We don't care about the value, so just store any old thing. if err := store.DB().Put(col1Key, "column 1"); err != nil { t.Fatal(err) } if err := store.DB().Put(col2Key, "column 2"); err != nil { t.Fatal(err) } // Split between col1Key and col2Key by splitting before col2Key. args := adminSplitArgs(col2Key, col2Key) _, err := client.SendWrapped(rg1(store), nil, &args) if err != nil { t.Fatalf("%s: split unexpected error: %s", col1Key, err) } rng1 := store.LookupReplica(col1Key, nil) rng2 := store.LookupReplica(col2Key, nil) // Verify the two columns are still on the same range. if !reflect.DeepEqual(rng1, rng2) { t.Fatalf("%s: ranges differ: %+v vs %+v", roachpb.Key(col1Key), rng1, rng2) } // Verify we split on a row key. if startKey := rng1.Desc().StartKey; !startKey.Equal(rowKey) { t.Fatalf("%s: expected split on %s, but found %s", roachpb.Key(col1Key), roachpb.Key(rowKey), startKey) } // Verify the previous range was split on a row key. rng3 := store.LookupReplica(tableKey, nil) if endKey := rng3.Desc().EndKey; !endKey.Equal(rowKey) { t.Fatalf("%s: expected split on %s, but found %s", roachpb.Key(col1Key), roachpb.Key(rowKey), endKey) } }
// TestStoreRangeSplitAtTablePrefix verifies a range can be split at // UserTableDataMin and still gossip the SystemConfig properly. func TestStoreRangeSplitAtTablePrefix(t *testing.T) { defer leaktest.AfterTest(t)() sCtx := storage.TestStoreContext() sCtx.TestingKnobs.DisableSplitQueue = true store, stopper, _ := createTestStoreWithContext(t, sCtx) defer stopper.Stop() key := keys.MakeRowSentinelKey(append([]byte(nil), keys.UserTableDataMin...)) args := adminSplitArgs(key, key) if _, pErr := client.SendWrapped(rg1(store), nil, &args); pErr != nil { t.Fatalf("%q: split unexpected error: %s", key, pErr) } var desc sqlbase.TableDescriptor descBytes, err := protoutil.Marshal(&desc) if err != nil { t.Fatal(err) } // Update SystemConfig to trigger gossip. if err := store.DB().Txn(func(txn *client.Txn) error { txn.SetSystemConfigTrigger() // We don't care about the values, just the keys. k := sqlbase.MakeDescMetadataKey(sqlbase.ID(keys.MaxReservedDescID + 1)) return txn.Put(k, &desc) }); err != nil { t.Fatal(err) } successChan := make(chan struct{}, 1) store.Gossip().RegisterCallback(gossip.KeySystemConfig, func(_ string, content roachpb.Value) { contentBytes, err := content.GetBytes() if err != nil { t.Fatal(err) } if bytes.Contains(contentBytes, descBytes) { select { case successChan <- struct{}{}: default: } } }) select { case <-time.After(time.Second): t.Errorf("expected a schema gossip containing %q, but did not see one", descBytes) case <-successChan: } }
// Start starts the test cluster by bootstrapping an in-memory store // (defaults to maximum of 50M). The server is started, launching the // node RPC server and all HTTP endpoints. Use the value of // TestServer.Addr after Start() for client connections. Use Stop() // to shutdown the server after the test completes. func (ltc *LocalTestCluster) Start(t util.Tester, baseCtx *base.Context, initSender InitSenderFn) { nodeID := roachpb.NodeID(1) nodeDesc := &roachpb.NodeDescriptor{NodeID: nodeID} tracer := tracing.NewTracer() ltc.tester = t ltc.Manual = hlc.NewManualClock(0) ltc.Clock = hlc.NewClock(ltc.Manual.UnixNano) ltc.Stopper = stop.NewStopper() rpcContext := rpc.NewContext(baseCtx, ltc.Clock, ltc.Stopper) server := rpc.NewServer(rpcContext) // never started ltc.Gossip = gossip.New( context.Background(), rpcContext, server, nil, ltc.Stopper, metric.NewRegistry()) ltc.Eng = engine.NewInMem(roachpb.Attributes{}, 50<<20, ltc.Stopper) ltc.Stores = storage.NewStores(ltc.Clock) ltc.Sender = initSender(nodeDesc, tracer, ltc.Clock, ltc.Latency, ltc.Stores, ltc.Stopper, ltc.Gossip) if ltc.DBContext == nil { dbCtx := client.DefaultDBContext() ltc.DBContext = &dbCtx } ltc.DB = client.NewDBWithContext(ltc.Sender, *ltc.DBContext) transport := storage.NewDummyRaftTransport() ctx := storage.TestStoreContext() if ltc.RangeRetryOptions != nil { ctx.RangeRetryOptions = *ltc.RangeRetryOptions } ctx.Ctx = tracing.WithTracer(context.Background(), tracer) ctx.Clock = ltc.Clock ctx.DB = ltc.DB ctx.Gossip = ltc.Gossip ctx.Transport = transport ltc.Store = storage.NewStore(ctx, ltc.Eng, nodeDesc) if err := ltc.Store.Bootstrap(roachpb.StoreIdent{NodeID: nodeID, StoreID: 1}, ltc.Stopper); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } ltc.Stores.AddStore(ltc.Store) if err := ltc.Store.BootstrapRange(nil); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } if err := ltc.Store.Start(context.Background(), ltc.Stopper); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } ltc.Gossip.SetNodeID(nodeDesc.NodeID) if err := ltc.Gossip.SetNodeDescriptor(nodeDesc); err != nil { t.Fatalf("unable to set node descriptor: %s", err) } }
// TestStoreRangeSplitAtRangeBounds verifies a range cannot be split // at its start or end keys (would create zero-length range!). This // sort of thing might happen in the wild if two split requests // arrived for same key. The first one succeeds and second would try // to split at the start of the newly split range. func TestStoreRangeSplitAtRangeBounds(t *testing.T) { defer leaktest.AfterTest(t)() sCtx := storage.TestStoreContext() sCtx.TestingKnobs.DisableSplitQueue = true store, stopper, _ := createTestStoreWithContext(t, sCtx) defer stopper.Stop() args := adminSplitArgs(roachpb.KeyMin, []byte("a")) if _, err := client.SendWrapped(rg1(store), nil, &args); err != nil { t.Fatal(err) } // This second split will try to split at end of first split range. if _, err := client.SendWrapped(rg1(store), nil, &args); err == nil { t.Fatalf("split succeeded unexpectedly") } // Now try to split at start of new range. args = adminSplitArgs(roachpb.KeyMin, []byte("a")) if _, err := client.SendWrapped(rg1(store), nil, &args); err == nil { t.Fatalf("split succeeded unexpectedly") } }
// Start starts the test cluster by bootstrapping an in-memory store // (defaults to maximum of 50M). The server is started, launching the // node RPC server and all HTTP endpoints. Use the value of // TestServer.Addr after Start() for client connections. Use Stop() // to shutdown the server after the test completes. func (ltc *LocalTestCluster) Start(t util.Tester, baseCtx *base.Context, initSender InitSenderFn) { nodeID := roachpb.NodeID(1) nodeDesc := &roachpb.NodeDescriptor{NodeID: nodeID} tracer := tracing.NewTracer() ltc.tester = t ltc.Manual = hlc.NewManualClock(0) ltc.Clock = hlc.NewClock(ltc.Manual.UnixNano) ltc.Stopper = stop.NewStopper() rpcContext := rpc.NewContext(baseCtx, ltc.Clock, ltc.Stopper) ltc.Gossip = gossip.New(rpcContext, nil, ltc.Stopper) ltc.Eng = engine.NewInMem(roachpb.Attributes{}, 50<<20, ltc.Stopper) ltc.Stores = storage.NewStores(ltc.Clock) ltc.Sender = initSender(nodeDesc, tracer, ltc.Clock, ltc.Latency, ltc.Stores, ltc.Stopper, ltc.Gossip) ltc.DB = client.NewDB(ltc.Sender) transport := storage.NewDummyRaftTransport() ctx := storage.TestStoreContext() ctx.Clock = ltc.Clock ctx.DB = ltc.DB ctx.Gossip = ltc.Gossip ctx.Transport = transport ctx.Tracer = tracer ltc.Store = storage.NewStore(ctx, ltc.Eng, nodeDesc) if err := ltc.Store.Bootstrap(roachpb.StoreIdent{NodeID: nodeID, StoreID: 1}, ltc.Stopper); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } ltc.Stores.AddStore(ltc.Store) if err := ltc.Store.BootstrapRange(nil); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } if err := ltc.Store.Start(ltc.Stopper); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } ltc.Gossip.SetNodeID(nodeDesc.NodeID) if err := ltc.Gossip.SetNodeDescriptor(nodeDesc); err != nil { t.Fatalf("unable to set node descriptor: %s", err) } }
func BenchmarkStoreRangeSplit(b *testing.B) { defer tracing.Disable()() sCtx := storage.TestStoreContext() sCtx.TestingKnobs.DisableSplitQueue = true store, stopper, _ := createTestStoreWithContext(b, sCtx) defer stopper.Stop() // Perform initial split of ranges. sArgs := adminSplitArgs(roachpb.KeyMin, []byte("b")) if _, err := client.SendWrapped(rg1(store), nil, &sArgs); err != nil { b.Fatal(err) } // Write some values left and right of the split key. aDesc := store.LookupReplica([]byte("a"), nil).Desc() bDesc := store.LookupReplica([]byte("c"), nil).Desc() writeRandomDataToRange(b, store, aDesc.RangeID, []byte("aaa")) writeRandomDataToRange(b, store, bDesc.RangeID, []byte("ccc")) // Merge the b range back into the a range. mArgs := adminMergeArgs(roachpb.KeyMin) if _, err := client.SendWrapped(rg1(store), nil, &mArgs); err != nil { b.Fatal(err) } b.ResetTimer() for i := 0; i < b.N; i++ { // Split the range. b.StartTimer() if _, err := client.SendWrapped(rg1(store), nil, &sArgs); err != nil { b.Fatal(err) } // Merge the ranges. b.StopTimer() if _, err := client.SendWrapped(rg1(store), nil, &mArgs); err != nil { b.Fatal(err) } } }
// TestStoreRangeSplitConcurrent verifies that concurrent range splits // of the same range are executed serially, and all but the first fail // because the split key is invalid after the first split succeeds. func TestStoreRangeSplitConcurrent(t *testing.T) { defer leaktest.AfterTest(t)() sCtx := storage.TestStoreContext() sCtx.TestingKnobs.DisableSplitQueue = true store, stopper, _ := createTestStoreWithContext(t, sCtx) defer stopper.Stop() splitKey := roachpb.Key("a") concurrentCount := int32(10) wg := sync.WaitGroup{} wg.Add(int(concurrentCount)) failureCount := int32(0) for i := int32(0); i < concurrentCount; i++ { go func() { args := adminSplitArgs(roachpb.KeyMin, splitKey) _, pErr := client.SendWrapped(rg1(store), nil, &args) if pErr != nil { atomic.AddInt32(&failureCount, 1) } wg.Done() }() } wg.Wait() if failureCount != concurrentCount-1 { t.Fatalf("concurrent splits succeeded unexpectedly; failureCount=%d", failureCount) } // Verify everything ended up as expected. if a, e := store.ReplicaCount(), 2; a != e { t.Fatalf("expected %d stores after concurrent splits; actual count=%d", e, a) } rng := store.LookupReplica(roachpb.RKeyMin, nil) newRng := store.LookupReplica(roachpb.RKey(splitKey), nil) if !bytes.Equal(newRng.Desc().StartKey, splitKey) || !bytes.Equal(splitKey, rng.Desc().EndKey) { t.Errorf("ranges mismatched, wanted %q=%q=%q", newRng.Desc().StartKey, splitKey, rng.Desc().EndKey) } if !bytes.Equal(newRng.Desc().EndKey, roachpb.RKeyMax) || !bytes.Equal(rng.Desc().StartKey, roachpb.RKeyMin) { t.Errorf("new ranges do not cover KeyMin-KeyMax, but only %q-%q", rng.Desc().StartKey, newRng.Desc().EndKey) } }
// TestStoreRangeMergeWithData attempts to merge two collocate ranges // each containing data. func TestStoreRangeMergeWithData(t *testing.T) { defer leaktest.AfterTest(t)() sCtx := storage.TestStoreContext() sCtx.TestingKnobs.DisableSplitQueue = true store, stopper, _ := createTestStoreWithContext(t, sCtx) defer stopper.Stop() content := roachpb.Key("testing!") aDesc, bDesc, err := createSplitRanges(store) if err != nil { t.Fatal(err) } // Write some values left and right of the proposed split key. pArgs := putArgs([]byte("aaa"), content) if _, err := client.SendWrapped(rg1(store), nil, &pArgs); err != nil { t.Fatal(err) } pArgs = putArgs([]byte("ccc"), content) if _, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ RangeID: bDesc.RangeID, }, &pArgs); err != nil { t.Fatal(err) } // Confirm the values are there. gArgs := getArgs([]byte("aaa")) if reply, err := client.SendWrapped(rg1(store), nil, &gArgs); err != nil { t.Fatal(err) } else if replyBytes, err := reply.(*roachpb.GetResponse).Value.GetBytes(); err != nil { t.Fatal(err) } else if !bytes.Equal(replyBytes, content) { t.Fatalf("actual value %q did not match expected value %q", replyBytes, content) } gArgs = getArgs([]byte("ccc")) if reply, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ RangeID: bDesc.RangeID, }, &gArgs); err != nil { t.Fatal(err) } else if replyBytes, err := reply.(*roachpb.GetResponse).Value.GetBytes(); err != nil { t.Fatal(err) } else if !bytes.Equal(replyBytes, content) { t.Fatalf("actual value %q did not match expected value %q", replyBytes, content) } // Merge the b range back into the a range. args := adminMergeArgs(roachpb.KeyMin) if _, err := client.SendWrapped(rg1(store), nil, &args); err != nil { t.Fatal(err) } // Verify no intents remains on range descriptor keys. for _, key := range []roachpb.Key{keys.RangeDescriptorKey(aDesc.StartKey), keys.RangeDescriptorKey(bDesc.StartKey)} { if _, _, err := engine.MVCCGet(context.Background(), store.Engine(), key, store.Clock().Now(), true, nil); err != nil { t.Fatal(err) } } // Verify the merge by looking up keys from both ranges. rangeA := store.LookupReplica([]byte("a"), nil) rangeB := store.LookupReplica([]byte("c"), nil) rangeADesc := rangeA.Desc() rangeBDesc := rangeB.Desc() if !reflect.DeepEqual(rangeA, rangeB) { t.Fatalf("ranges were not merged %+v=%+v", rangeADesc, rangeBDesc) } if !bytes.Equal(rangeADesc.StartKey, roachpb.RKeyMin) { t.Fatalf("The start key is not equal to KeyMin %q=%q", rangeADesc.StartKey, roachpb.RKeyMin) } if !bytes.Equal(rangeADesc.EndKey, roachpb.RKeyMax) { t.Fatalf("The end key is not equal to KeyMax %q=%q", rangeADesc.EndKey, roachpb.RKeyMax) } // Try to get values from after the merge. gArgs = getArgs([]byte("aaa")) if reply, err := client.SendWrapped(rg1(store), nil, &gArgs); err != nil { t.Fatal(err) } else if replyBytes, err := reply.(*roachpb.GetResponse).Value.GetBytes(); err != nil { t.Fatal(err) } else if !bytes.Equal(replyBytes, content) { t.Fatalf("actual value %q did not match expected value %q", replyBytes, content) } gArgs = getArgs([]byte("ccc")) if reply, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ RangeID: rangeB.RangeID, }, &gArgs); err != nil { t.Fatal(err) } else if replyBytes, err := reply.(*roachpb.GetResponse).Value.GetBytes(); err != nil { t.Fatal(err) } else if !bytes.Equal(replyBytes, content) { t.Fatalf("actual value %q did not match expected value %q", replyBytes, content) } // Put new values after the merge on both sides. pArgs = putArgs([]byte("aaaa"), content) if _, err := client.SendWrapped(rg1(store), nil, &pArgs); err != nil { t.Fatal(err) } pArgs = putArgs([]byte("cccc"), content) if _, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ RangeID: rangeB.RangeID, }, &pArgs); err != nil { t.Fatal(err) } // Try to get the newly placed values. gArgs = getArgs([]byte("aaaa")) if reply, err := client.SendWrapped(rg1(store), nil, &gArgs); err != nil { t.Fatal(err) } else if replyBytes, err := reply.(*roachpb.GetResponse).Value.GetBytes(); err != nil { t.Fatal(err) } else if !bytes.Equal(replyBytes, content) { t.Fatalf("actual value %q did not match expected value %q", replyBytes, content) } gArgs = getArgs([]byte("cccc")) if reply, err := client.SendWrapped(rg1(store), nil, &gArgs); err != nil { t.Fatal(err) } else if replyBytes, err := reply.(*roachpb.GetResponse).Value.GetBytes(); err != nil { t.Fatal(err) } else if !bytes.Equal(replyBytes, content) { t.Fatalf("actual value %q did not match expected value %q", replyBytes, content) } }
// TestRangeLookupUseReverse tests whether the results and the results count // are correct when scanning in reverse order. func TestRangeLookupUseReverse(t *testing.T) { defer leaktest.AfterTest(t)() sCtx := storage.TestStoreContext() sCtx.TestingKnobs.DisableSplitQueue = true store, stopper, _ := createTestStoreWithContext(t, sCtx) defer stopper.Stop() // Init test ranges: // ["","a"), ["a","c"), ["c","e"), ["e","g") and ["g","\xff\xff"). splits := []roachpb.AdminSplitRequest{ adminSplitArgs(roachpb.Key("g"), roachpb.Key("g")), adminSplitArgs(roachpb.Key("e"), roachpb.Key("e")), adminSplitArgs(roachpb.Key("c"), roachpb.Key("c")), adminSplitArgs(roachpb.Key("a"), roachpb.Key("a")), } for _, split := range splits { _, pErr := client.SendWrapped(rg1(store), nil, &split) if pErr != nil { t.Fatalf("%q: split unexpected error: %s", split.SplitKey, pErr) } } // Resolve the intents. scanArgs := roachpb.ScanRequest{ Span: roachpb.Span{ Key: keys.RangeMetaKey(roachpb.RKeyMin.Next()), EndKey: keys.RangeMetaKey(roachpb.RKeyMax), }, } util.SucceedsSoon(t, func() error { _, pErr := client.SendWrapped(rg1(store), nil, &scanArgs) return pErr.GoError() }) revScanArgs := func(key []byte, maxResults int32) *roachpb.RangeLookupRequest { return &roachpb.RangeLookupRequest{ Span: roachpb.Span{ Key: key, }, MaxRanges: maxResults, Reverse: true, } } // Test cases. testCases := []struct { request *roachpb.RangeLookupRequest expected []roachpb.RangeDescriptor expectedPre []roachpb.RangeDescriptor }{ // Test key in the middle of the range. { request: revScanArgs(keys.RangeMetaKey(roachpb.RKey("f")), 2), // ["e","g") and ["c","e"). expected: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("e"), EndKey: roachpb.RKey("g")}, }, expectedPre: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("e")}, }, }, // Test key in the end key of the range. { request: revScanArgs(keys.RangeMetaKey(roachpb.RKey("g")), 3), // ["e","g"), ["c","e") and ["a","c"). expected: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("e"), EndKey: roachpb.RKey("g")}, }, expectedPre: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("e")}, {StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("c")}, }, }, { request: revScanArgs(keys.RangeMetaKey(roachpb.RKey("e")), 2), // ["c","e") and ["a","c"). expected: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("e")}, }, expectedPre: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("c")}, }, }, // Test Meta2KeyMax. { request: revScanArgs(keys.Meta2KeyMax, 2), // ["e","g") and ["g","\xff\xff") expected: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("g"), EndKey: roachpb.RKey("\xff\xff")}, }, expectedPre: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKey("e"), EndKey: roachpb.RKey("g")}, }, }, // Test Meta1KeyMax. { request: revScanArgs(keys.Meta1KeyMax, 1), // ["","a") expected: []roachpb.RangeDescriptor{ {StartKey: roachpb.RKeyMin, EndKey: roachpb.RKey("a")}, }, }, } for testIdx, test := range testCases { resp, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ ReadConsistency: roachpb.INCONSISTENT, }, test.request) if pErr != nil { t.Fatalf("%d: RangeLookup error: %s", testIdx, pErr) } rlReply := resp.(*roachpb.RangeLookupResponse) // Checks the results count. if int32(len(rlReply.Ranges))+int32(len(rlReply.PrefetchedRanges)) != test.request.MaxRanges { t.Fatalf("%d: returned results count, expected %d,but got %d", testIdx, test.request.MaxRanges, len(rlReply.Ranges)) } // Checks the range descriptors. for _, rngSlice := range []struct { expect, reply []roachpb.RangeDescriptor }{ {test.expected, rlReply.Ranges}, {test.expectedPre, rlReply.PrefetchedRanges}, } { for i, rng := range rngSlice.expect { if !(rng.StartKey.Equal(rngSlice.reply[i].StartKey) && rng.EndKey.Equal(rngSlice.reply[i].EndKey)) { t.Fatalf("%d: returned range is not correct, expected %v ,but got %v", testIdx, rng, rngSlice.reply[i]) } } } } }
// TestRaftLogQueue verifies that the raft log queue correctly truncates the // raft log. func TestRaftLogQueue(t *testing.T) { defer leaktest.AfterTest(t)() var mtc multiTestContext // Turn off raft elections so the raft leader won't change out from under // us in this test. sc := storage.TestStoreContext() sc.RaftTickInterval = time.Hour * 24 sc.RaftElectionTimeoutTicks = 1000000 mtc.storeContext = &sc mtc.Start(t, 3) defer mtc.Stop() // Write a single value to ensure we have a leader. pArgs := putArgs([]byte("key"), []byte("value")) if _, err := client.SendWrapped(rg1(mtc.stores[0]), nil, &pArgs); err != nil { t.Fatal(err) } // Get the raft leader (and ensure one exists). rangeID := mtc.stores[0].LookupReplica([]byte("a"), nil).RangeID raftLeaderRepl := mtc.getRaftLeader(rangeID) if raftLeaderRepl == nil { t.Fatalf("could not find raft leader replica for range %d", rangeID) } originalIndex, err := raftLeaderRepl.GetFirstIndex() if err != nil { t.Fatal(err) } // Write a collection of values to increase the raft log. for i := 0; i < storage.RaftLogQueueStaleThreshold+1; i++ { pArgs = putArgs([]byte(fmt.Sprintf("key-%d", i)), []byte("value")) if _, err := client.SendWrapped(rg1(mtc.stores[0]), nil, &pArgs); err != nil { t.Fatal(err) } } // Sadly, occasionally the queue has a race with the force processing so // this succeeds within will captures those rare cases. var afterTruncationIndex uint64 util.SucceedsSoon(t, func() error { // Force a truncation check. for _, store := range mtc.stores { store.ForceRaftLogScanAndProcess() } // Ensure that firstIndex has increased indicating that the log // truncation has occurred. var err error afterTruncationIndex, err = raftLeaderRepl.GetFirstIndex() if err != nil { t.Fatal(err) } if afterTruncationIndex <= originalIndex { return util.Errorf("raft log has not been truncated yet, afterTruncationIndex:%d originalIndex:%d", afterTruncationIndex, originalIndex) } return nil }) // Force a truncation check again to ensure that attempting to truncate an // already truncated log has no effect. for _, store := range mtc.stores { store.ForceRaftLogScanAndProcess() } after2ndTruncationIndex, err := raftLeaderRepl.GetFirstIndex() if err != nil { t.Fatal(err) } if afterTruncationIndex > after2ndTruncationIndex { t.Fatalf("second truncation destroyed state: afterTruncationIndex:%d after2ndTruncationIndex:%d", afterTruncationIndex, after2ndTruncationIndex) } }
// TestStoreRangeSplitStatsWithMerges starts by splitting the system keys from // user-space keys and verifying that the user space side of the split (which is empty), // has all zeros for stats. It then issues a number of Merge requests to the user // space side, simulating TimeSeries data. Finally, the test splits the user space // side halfway and verifies the stats on either side of the split are equal to a // recomputation. // // Note that unlike TestStoreRangeSplitStats, we do not check if the two halves of the // split's stats are equal to the pre-split stats when added, because this will not be // true of ranges populated with Merge requests. The reason for this is that Merge // requests' impact on MVCCStats are only estimated. See updateStatsOnMerge. func TestStoreRangeSplitStatsWithMerges(t *testing.T) { defer leaktest.AfterTest(t)() sCtx := storage.TestStoreContext() sCtx.TestingKnobs.DisableSplitQueue = true store, stopper, manual := createTestStoreWithContext(t, sCtx) defer stopper.Stop() // Split the range after the last table data key. keyPrefix := keys.MakeTablePrefix(keys.MaxReservedDescID + 1) keyPrefix = keys.MakeRowSentinelKey(keyPrefix) args := adminSplitArgs(roachpb.KeyMin, keyPrefix) if _, pErr := client.SendWrapped(rg1(store), nil, &args); pErr != nil { t.Fatal(pErr) } // Verify empty range has empty stats. rng := store.LookupReplica(keyPrefix, nil) // NOTE that this value is expected to change over time, depending on what // we store in the sys-local keyspace. Update it accordingly for this test. empty := enginepb.MVCCStats{LastUpdateNanos: manual.UnixNano()} if err := verifyRangeStats(store.Engine(), rng.RangeID, empty); err != nil { t.Fatal(err) } // Write random TimeSeries data. midKey := writeRandomTimeSeriesDataToRange(t, store, rng.RangeID, keyPrefix) manual.Increment(100) // Split the range at approximate halfway point. args = adminSplitArgs(keyPrefix, midKey) if _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ RangeID: rng.RangeID, }, &args); pErr != nil { t.Fatal(pErr) } snap := store.Engine().NewSnapshot() defer snap.Close() var msLeft, msRight enginepb.MVCCStats if err := engine.MVCCGetRangeStats(context.Background(), snap, rng.RangeID, &msLeft); err != nil { t.Fatal(err) } rngRight := store.LookupReplica(midKey, nil) if err := engine.MVCCGetRangeStats(context.Background(), snap, rngRight.RangeID, &msRight); err != nil { t.Fatal(err) } // Stats should both have the new timestamp. now := manual.UnixNano() if lTs := msLeft.LastUpdateNanos; lTs != now { t.Errorf("expected left range stats to have new timestamp, want %d, got %d", now, lTs) } if rTs := msRight.LastUpdateNanos; rTs != now { t.Errorf("expected right range stats to have new timestamp, want %d, got %d", now, rTs) } // Stats should agree with recomputation. if err := verifyRecomputedStats(snap, rng.Desc(), msLeft, now); err != nil { t.Fatalf("failed to verify left range's stats after split: %v", err) } if err := verifyRecomputedStats(snap, rngRight.Desc(), msRight, now); err != nil { t.Fatalf("failed to verify right range's stats after split: %v", err) } }
// TestStoreRangeSplitStats starts by splitting the system keys from user-space // keys and verifying that the user space side of the split (which is empty), // has all zeros for stats. It then writes random data to the user space side, // splits it halfway and verifies the two splits have stats exactly equaling // the pre-split. func TestStoreRangeSplitStats(t *testing.T) { defer leaktest.AfterTest(t)() sCtx := storage.TestStoreContext() sCtx.TestingKnobs.DisableSplitQueue = true store, stopper, manual := createTestStoreWithContext(t, sCtx) defer stopper.Stop() // Split the range after the last table data key. keyPrefix := keys.MakeTablePrefix(keys.MaxReservedDescID + 1) keyPrefix = keys.MakeRowSentinelKey(keyPrefix) args := adminSplitArgs(roachpb.KeyMin, keyPrefix) if _, pErr := client.SendWrapped(rg1(store), nil, &args); pErr != nil { t.Fatal(pErr) } // Verify empty range has empty stats. rng := store.LookupReplica(keyPrefix, nil) // NOTE that this value is expected to change over time, depending on what // we store in the sys-local keyspace. Update it accordingly for this test. empty := enginepb.MVCCStats{LastUpdateNanos: manual.UnixNano()} if err := verifyRangeStats(store.Engine(), rng.RangeID, empty); err != nil { t.Fatal(err) } // Write random data. midKey := writeRandomDataToRange(t, store, rng.RangeID, keyPrefix) // Get the range stats now that we have data. snap := store.Engine().NewSnapshot() defer snap.Close() var ms enginepb.MVCCStats if err := engine.MVCCGetRangeStats(context.Background(), snap, rng.RangeID, &ms); err != nil { t.Fatal(err) } if err := verifyRecomputedStats(snap, rng.Desc(), ms, manual.UnixNano()); err != nil { t.Fatalf("failed to verify range's stats before split: %v", err) } if inMemMS := rng.GetMVCCStats(); inMemMS != ms { t.Fatalf("in-memory and on-disk diverged:\n%+v\n!=\n%+v", inMemMS, ms) } manual.Increment(100) // Split the range at approximate halfway point. args = adminSplitArgs(keyPrefix, midKey) if _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ RangeID: rng.RangeID, }, &args); pErr != nil { t.Fatal(pErr) } snap = store.Engine().NewSnapshot() defer snap.Close() var msLeft, msRight enginepb.MVCCStats if err := engine.MVCCGetRangeStats(context.Background(), snap, rng.RangeID, &msLeft); err != nil { t.Fatal(err) } rngRight := store.LookupReplica(midKey, nil) if err := engine.MVCCGetRangeStats(context.Background(), snap, rngRight.RangeID, &msRight); err != nil { t.Fatal(err) } // The stats should be exactly equal when added. expMS := enginepb.MVCCStats{ LiveBytes: msLeft.LiveBytes + msRight.LiveBytes, KeyBytes: msLeft.KeyBytes + msRight.KeyBytes, ValBytes: msLeft.ValBytes + msRight.ValBytes, IntentBytes: msLeft.IntentBytes + msRight.IntentBytes, LiveCount: msLeft.LiveCount + msRight.LiveCount, KeyCount: msLeft.KeyCount + msRight.KeyCount, ValCount: msLeft.ValCount + msRight.ValCount, IntentCount: msLeft.IntentCount + msRight.IntentCount, } ms.SysBytes, ms.SysCount = 0, 0 ms.LastUpdateNanos = 0 if expMS != ms { t.Errorf("expected left plus right ranges to equal original, but\n %+v\n+\n %+v\n!=\n %+v", msLeft, msRight, ms) } // Stats should both have the new timestamp. now := manual.UnixNano() if lTs := msLeft.LastUpdateNanos; lTs != now { t.Errorf("expected left range stats to have new timestamp, want %d, got %d", now, lTs) } if rTs := msRight.LastUpdateNanos; rTs != now { t.Errorf("expected right range stats to have new timestamp, want %d, got %d", now, rTs) } // Stats should agree with recomputation. if err := verifyRecomputedStats(snap, rng.Desc(), msLeft, now); err != nil { t.Fatalf("failed to verify left range's stats after split: %v", err) } if err := verifyRecomputedStats(snap, rngRight.Desc(), msRight, now); err != nil { t.Fatalf("failed to verify right range's stats after split: %v", err) } }
// TestStoreRangeSplit executes a split of a range and verifies that the // resulting ranges respond to the right key ranges and that their stats // have been properly accounted for and requests can't be replayed. func TestStoreRangeSplitIdempotency(t *testing.T) { defer leaktest.AfterTest(t)() sCtx := storage.TestStoreContext() sCtx.TestingKnobs.DisableSplitQueue = true store, stopper, _ := createTestStoreWithContext(t, sCtx) defer stopper.Stop() rangeID := roachpb.RangeID(1) splitKey := roachpb.Key("m") content := roachpb.Key("asdvb") // First, write some values left and right of the proposed split key. pArgs := putArgs([]byte("c"), content) if _, pErr := client.SendWrapped(rg1(store), nil, &pArgs); pErr != nil { t.Fatal(pErr) } pArgs = putArgs([]byte("x"), content) if _, pErr := client.SendWrapped(rg1(store), nil, &pArgs); pErr != nil { t.Fatal(pErr) } // Increments are a good way of testing idempotency. Up here, we // address them to the original range, then later to the one that // contains the key. txn := roachpb.NewTransaction("test", []byte("c"), 10, enginepb.SERIALIZABLE, store.Clock().Now(), 0) lIncArgs := incrementArgs([]byte("apoptosis"), 100) lTxn := *txn lTxn.Sequence++ if _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ Txn: &lTxn, }, &lIncArgs); pErr != nil { t.Fatal(pErr) } rIncArgs := incrementArgs([]byte("wobble"), 10) rTxn := *txn rTxn.Sequence++ if _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ Txn: &rTxn, }, &rIncArgs); pErr != nil { t.Fatal(pErr) } // Get the original stats for key and value bytes. var ms enginepb.MVCCStats if err := engine.MVCCGetRangeStats(context.Background(), store.Engine(), rangeID, &ms); err != nil { t.Fatal(err) } keyBytes, valBytes := ms.KeyBytes, ms.ValBytes // Split the range. args := adminSplitArgs(roachpb.KeyMin, splitKey) if _, pErr := client.SendWrapped(rg1(store), nil, &args); pErr != nil { t.Fatal(pErr) } // Verify no intents remains on range descriptor keys. splitKeyAddr, err := keys.Addr(splitKey) if err != nil { t.Fatal(err) } for _, key := range []roachpb.Key{keys.RangeDescriptorKey(roachpb.RKeyMin), keys.RangeDescriptorKey(splitKeyAddr)} { if _, _, err := engine.MVCCGet(context.Background(), store.Engine(), key, store.Clock().Now(), true, nil); err != nil { t.Fatal(err) } } rng := store.LookupReplica(roachpb.RKeyMin, nil) rngDesc := rng.Desc() newRng := store.LookupReplica([]byte("m"), nil) newRngDesc := newRng.Desc() if !bytes.Equal(newRngDesc.StartKey, splitKey) || !bytes.Equal(splitKey, rngDesc.EndKey) { t.Errorf("ranges mismatched, wanted %q=%q=%q", newRngDesc.StartKey, splitKey, rngDesc.EndKey) } if !bytes.Equal(newRngDesc.EndKey, roachpb.RKeyMax) || !bytes.Equal(rngDesc.StartKey, roachpb.RKeyMin) { t.Errorf("new ranges do not cover KeyMin-KeyMax, but only %q-%q", rngDesc.StartKey, newRngDesc.EndKey) } // Try to get values from both left and right of where the split happened. gArgs := getArgs([]byte("c")) if reply, pErr := client.SendWrapped(rg1(store), nil, &gArgs); pErr != nil { t.Fatal(pErr) } else if replyBytes, pErr := reply.(*roachpb.GetResponse).Value.GetBytes(); pErr != nil { t.Fatal(pErr) } else if !bytes.Equal(replyBytes, content) { t.Fatalf("actual value %q did not match expected value %q", replyBytes, content) } gArgs = getArgs([]byte("x")) if reply, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ RangeID: newRng.RangeID, }, &gArgs); pErr != nil { t.Fatal(pErr) } else if replyBytes, err := reply.(*roachpb.GetResponse).Value.GetBytes(); err != nil { t.Fatal(err) } else if !bytes.Equal(replyBytes, content) { t.Fatalf("actual value %q did not match expected value %q", replyBytes, content) } // Send out an increment request copied from above (same txn/sequence) // which remains in the old range. _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ Txn: &lTxn, }, &lIncArgs) if _, ok := pErr.GetDetail().(*roachpb.TransactionRetryError); !ok { t.Fatalf("unexpected idempotency failure: %v", pErr) } // Send out the same increment copied from above (same txn/sequence), but // now to the newly created range (which should hold that key). _, pErr = client.SendWrappedWith(rg1(store), nil, roachpb.Header{ RangeID: newRng.RangeID, Txn: &rTxn, }, &rIncArgs) if _, ok := pErr.GetDetail().(*roachpb.TransactionRetryError); !ok { t.Fatalf("unexpected idempotency failure: %v", pErr) } // Compare stats of split ranges to ensure they are non zero and // exceed the original range when summed. var left, right enginepb.MVCCStats if err := engine.MVCCGetRangeStats(context.Background(), store.Engine(), rangeID, &left); err != nil { t.Fatal(err) } lKeyBytes, lValBytes := left.KeyBytes, left.ValBytes if err := engine.MVCCGetRangeStats(context.Background(), store.Engine(), newRng.RangeID, &right); err != nil { t.Fatal(err) } rKeyBytes, rValBytes := right.KeyBytes, right.ValBytes if lKeyBytes == 0 || rKeyBytes == 0 { t.Errorf("expected non-zero key bytes; got %d, %d", lKeyBytes, rKeyBytes) } if lValBytes == 0 || rValBytes == 0 { t.Errorf("expected non-zero val bytes; got %d, %d", lValBytes, rValBytes) } if lKeyBytes+rKeyBytes <= keyBytes { t.Errorf("left + right key bytes don't match; %d + %d <= %d", lKeyBytes, rKeyBytes, keyBytes) } if lValBytes+rValBytes <= valBytes { t.Errorf("left + right val bytes don't match; %d + %d <= %d", lValBytes, rValBytes, valBytes) } }
// TestStoreRangeMergeMetadataCleanup tests that all metadata of a // subsumed range is cleaned up on merge. func TestStoreRangeMergeMetadataCleanup(t *testing.T) { defer leaktest.AfterTest(t)() sCtx := storage.TestStoreContext() sCtx.TestingKnobs.DisableSplitQueue = true store, stopper, _ := createTestStoreWithContext(t, sCtx) defer stopper.Stop() scan := func(f func(roachpb.KeyValue) (bool, error)) { if _, err := engine.MVCCIterate(context.Background(), store.Engine(), roachpb.KeyMin, roachpb.KeyMax, hlc.ZeroTimestamp, true, nil, false, f); err != nil { t.Fatal(err) } } content := roachpb.Key("testing!") // Write some values left of the proposed split key. pArgs := putArgs([]byte("aaa"), content) if _, err := client.SendWrapped(rg1(store), nil, &pArgs); err != nil { t.Fatal(err) } // Collect all the keys. preKeys := make(map[string]struct{}) scan(func(kv roachpb.KeyValue) (bool, error) { preKeys[string(kv.Key)] = struct{}{} return false, nil }) // Split the range. _, bDesc, err := createSplitRanges(store) if err != nil { t.Fatal(err) } // Write some values right of the split key. pArgs = putArgs([]byte("ccc"), content) if _, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ RangeID: bDesc.RangeID, }, &pArgs); err != nil { t.Fatal(err) } // Merge the b range back into the a range. args := adminMergeArgs(roachpb.KeyMin) if _, err := client.SendWrapped(rg1(store), nil, &args); err != nil { t.Fatal(err) } // Collect all the keys again. postKeys := make(map[string]struct{}) scan(func(kv roachpb.KeyValue) (bool, error) { postKeys[string(kv.Key)] = struct{}{} return false, nil }) // Compute the new keys. for k := range preKeys { delete(postKeys, k) } // Keep only the subsumed range's local keys. localRangeKeyPrefix := string(keys.MakeRangeIDPrefix(bDesc.RangeID)) for k := range postKeys { if !strings.HasPrefix(k, localRangeKeyPrefix) { delete(postKeys, k) } } if numKeys := len(postKeys); numKeys > 0 { var buf bytes.Buffer fmt.Fprintf(&buf, "%d keys were not cleaned up:\n", numKeys) for k := range postKeys { fmt.Fprintf(&buf, "%q\n", k) } t.Fatal(buf.String()) } }
// TestStoreRangeSplitRaceUninitializedRHS reproduces #7600 (before it was // fixed). While splits are happening, we simulate incoming messages for the // right-hand side to trigger a race between the creation of the proper replica // and the uninitialized replica reacting to messages. func TestStoreRangeSplitRaceUninitializedRHS(t *testing.T) { defer leaktest.AfterTest(t)() mtc := &multiTestContext{} storeCtx := storage.TestStoreContext() // An aggressive tick interval lets groups communicate more and thus // triggers test failures much more reliably. We can't go too aggressive // or race tests never make any progress. storeCtx.RaftTickInterval = 50 * time.Millisecond storeCtx.RaftElectionTimeoutTicks = 2 currentTrigger := make(chan *roachpb.SplitTrigger) seen := make(map[storagebase.CmdIDKey]struct{}) storeCtx.TestingKnobs.TestingCommandFilter = func(args storagebase.FilterArgs) *roachpb.Error { et, ok := args.Req.(*roachpb.EndTransactionRequest) if !ok || et.InternalCommitTrigger == nil { return nil } trigger := protoutil.Clone(et.InternalCommitTrigger.GetSplitTrigger()).(*roachpb.SplitTrigger) if trigger != nil && len(trigger.NewDesc.Replicas) == 2 && args.Hdr.Txn.Epoch == 0 && args.Sid == mtc.stores[0].StoreID() { if _, ok := seen[args.CmdID]; ok { return nil } // Without replay protection, a single reproposal locks up the // test. seen[args.CmdID] = struct{}{} currentTrigger <- trigger return roachpb.NewError(roachpb.NewReadWithinUncertaintyIntervalError(args.Hdr.Timestamp, args.Hdr.Timestamp)) } return nil } mtc.storeContext = &storeCtx mtc.Start(t, 2) defer mtc.Stop() leftRange := mtc.stores[0].LookupReplica(roachpb.RKey("a"), nil) // We'll fake messages from term 1, ..., .magicIters-1. The exact number // doesn't matter for anything but for its likelihood of triggering the // race. const magicIters = 5 // Replicate the left range onto the second node. We don't wait since we // don't actually care what the second node does. All we want is that the // first node isn't surprised by messages from that node. mtc.replicateRange(leftRange.RangeID, 1) for i := 0; i < 10; i++ { var wg sync.WaitGroup wg.Add(2) go func() { defer wg.Done() // Split the data range. The split keys are chosen so that they move // towards "a" (so that the range being split is always the first // range). splitKey := roachpb.Key(encoding.EncodeVarintDescending([]byte("a"), int64(i))) splitArgs := adminSplitArgs(keys.SystemMax, splitKey) if _, pErr := client.SendWrapped(mtc.distSenders[0], nil, &splitArgs); pErr != nil { t.Fatal(pErr) } }() go func() { defer wg.Done() trigger := <-currentTrigger // our own copy // Make sure the first node is first for convenience. replicas := trigger.NewDesc.Replicas if replicas[0].NodeID > replicas[1].NodeID { tmp := replicas[1] replicas[1] = replicas[0] replicas[0] = tmp } // Send a few vote requests which look like they're from the other // node's right hand side of the split. This triggers a race which // is discussed in #7600 (briefly, the creation of the right hand // side in the split trigger was racing with the uninitialized // version for the same group, resulting in clobbered HardState). for term := uint64(1); term < magicIters; term++ { if err := mtc.stores[0].HandleRaftMessage(&storage.RaftMessageRequest{ RangeID: trigger.NewDesc.RangeID, ToReplica: replicas[0], FromReplica: replicas[1], Message: raftpb.Message{ Type: raftpb.MsgVote, To: uint64(replicas[0].ReplicaID), From: uint64(replicas[1].ReplicaID), Term: term, }, }); err != nil { t.Error(err) } } }() wg.Wait() } }
// TestStoreSplitReadRace prevents regression of #3148. It begins a couple of // read requests and lets them complete while a split is happening; the reads // hit the second half of the split. If the split happens non-atomically with // respect to the reads (and in particular their update of the timestamp // cache), then some of them may not be reflected in the timestamp cache of the // new range, in which case this test would fail. func TestStoreSplitReadRace(t *testing.T) { defer leaktest.AfterTest(t)() defer config.TestingDisableTableSplits()() splitKey := roachpb.Key("a") key := func(i int) roachpb.Key { splitCopy := append([]byte(nil), splitKey.Next()...) return append(splitCopy, []byte(fmt.Sprintf("%03d", i))...) } getContinues := make(chan struct{}) var getStarted sync.WaitGroup sCtx := storage.TestStoreContext() sCtx.TestingKnobs.TestingCommandFilter = func(filterArgs storageutils.FilterArgs) *roachpb.Error { if et, ok := filterArgs.Req.(*roachpb.EndTransactionRequest); ok { st := et.InternalCommitTrigger.GetSplitTrigger() if st == nil || !st.UpdatedDesc.EndKey.Equal(splitKey) { return nil } close(getContinues) } else if filterArgs.Req.Method() == roachpb.Get && bytes.HasPrefix(filterArgs.Req.Header().Key, splitKey.Next()) { getStarted.Done() <-getContinues } return nil } store, stopper, _ := createTestStoreWithContext(t, &sCtx) defer stopper.Stop() now := store.Clock().Now() var wg sync.WaitGroup ts := func(i int) roachpb.Timestamp { return now.Add(0, int32(1000+i)) } const num = 10 for i := 0; i < num; i++ { wg.Add(1) getStarted.Add(1) go func(i int) { defer wg.Done() args := getArgs(key(i)) var h roachpb.Header h.Timestamp = ts(i) if _, pErr := client.SendWrappedWith(rg1(store), nil, h, &args); pErr != nil { t.Fatal(pErr) } }(i) } getStarted.Wait() wg.Add(1) func() { defer wg.Done() args := adminSplitArgs(roachpb.KeyMin, splitKey) if _, pErr := client.SendWrapped(rg1(store), nil, &args); pErr != nil { t.Fatal(pErr) } }() wg.Wait() for i := 0; i < num; i++ { var h roachpb.Header h.Timestamp = now args := putArgs(key(i), []byte("foo")) keyAddr, err := keys.Addr(args.Key) if err != nil { t.Fatal(err) } h.RangeID = store.LookupReplica(keyAddr, nil).RangeID _, respH, pErr := storage.SendWrapped(store, context.Background(), h, &args) if pErr != nil { t.Fatal(pErr) } if respH.Timestamp.Less(ts(i)) { t.Fatalf("%d: expected Put to be forced higher than %s by timestamp caches, but wrote at %s", i, ts(i), respH.Timestamp) } } }
// TestRaftLogQueue verifies that the raft log queue correctly truncates the // raft log. func TestRaftLogQueue(t *testing.T) { defer leaktest.AfterTest(t)() var mtc multiTestContext // Set maxBytes to something small so we can trigger the raft log truncation // without adding 64MB of logs. const maxBytes = 1 << 16 defer config.TestingSetDefaultZoneConfig(config.ZoneConfig{ RangeMaxBytes: maxBytes, })() // Turn off raft elections so the raft leader won't change out from under // us in this test. sc := storage.TestStoreContext() sc.RaftTickInterval = time.Hour * 24 sc.RaftElectionTimeoutTicks = 1000000 mtc.storeContext = &sc mtc.Start(t, 3) defer mtc.Stop() // Write a single value to ensure we have a leader. pArgs := putArgs([]byte("key"), []byte("value")) if _, err := client.SendWrapped(rg1(mtc.stores[0]), nil, &pArgs); err != nil { t.Fatal(err) } // Get the raft leader (and ensure one exists). rangeID := mtc.stores[0].LookupReplica([]byte("a"), nil).RangeID raftLeaderRepl := mtc.getRaftLeader(rangeID) if raftLeaderRepl == nil { t.Fatalf("could not find raft leader replica for range %d", rangeID) } originalIndex, err := raftLeaderRepl.GetFirstIndex() if err != nil { t.Fatal(err) } // Disable splits since we're increasing the raft log with puts. for _, store := range mtc.stores { store.DisableSplitQueue(true) } // Write a collection of values to increase the raft log. value := bytes.Repeat([]byte("a"), 1000) // 1KB for size := int64(0); size < 2*maxBytes; size += int64(len(value)) { pArgs = putArgs([]byte(fmt.Sprintf("key-%d", size)), value) if _, err := client.SendWrapped(rg1(mtc.stores[0]), nil, &pArgs); err != nil { t.Fatal(err) } } // Sadly, occasionally the queue has a race with the force processing so // this succeeds within will captures those rare cases. var afterTruncationIndex uint64 util.SucceedsSoon(t, func() error { // Force a truncation check. for _, store := range mtc.stores { store.ForceRaftLogScanAndProcess() } // Ensure that firstIndex has increased indicating that the log // truncation has occurred. var err error afterTruncationIndex, err = raftLeaderRepl.GetFirstIndex() if err != nil { t.Fatal(err) } if afterTruncationIndex <= originalIndex { return errors.Errorf("raft log has not been truncated yet, afterTruncationIndex:%d originalIndex:%d", afterTruncationIndex, originalIndex) } return nil }) // Force a truncation check again to ensure that attempting to truncate an // already truncated log has no effect. for _, store := range mtc.stores { store.ForceRaftLogScanAndProcess() } after2ndTruncationIndex, err := raftLeaderRepl.GetFirstIndex() if err != nil { t.Fatal(err) } if afterTruncationIndex > after2ndTruncationIndex { t.Fatalf("second truncation destroyed state: afterTruncationIndex:%d after2ndTruncationIndex:%d", afterTruncationIndex, after2ndTruncationIndex) } }
// TestTxnPutOutOfOrder tests a case where a put operation of an older // timestamp comes after a put operation of a newer timestamp in a // txn. The test ensures such an out-of-order put succeeds and // overrides an old value. The test uses a "Writer" and a "Reader" // to reproduce an out-of-order put. // // 1) The Writer executes a put operation and writes a write intent with // time T in a txn. // 2) Before the Writer's txn is committed, the Reader sends a high priority // get operation with time T+100. This pushes the Writer txn timestamp to // T+100 and triggers the restart of the Writer's txn. The original // write intent timestamp is also updated to T+100. // 3) The Writer starts a new epoch of the txn, but before it writes, the // Reader sends another high priority get operation with time T+200. This // pushes the Writer txn timestamp to T+200 to trigger a restart of the // Writer txn. The Writer will not actually restart until it tries to commit // the current epoch of the transaction. The Reader updates the timestamp of // the write intent to T+200. The test deliberately fails the Reader get // operation, and cockroach doesn't update its read timestamp cache. // 4) The Writer executes the put operation again. This put operation comes // out-of-order since its timestamp is T+100, while the intent timestamp // updated at Step 3 is T+200. // 5) The put operation overrides the old value using timestamp T+100. // 6) When the Writer attempts to commit its txn, the txn will be restarted // again at a new epoch timestamp T+200, which will finally succeed. func TestTxnPutOutOfOrder(t *testing.T) { defer leaktest.AfterTest(t)() key := "key" // Set up a filter to so that the get operation at Step 3 will return an error. var numGets int32 manualClock := hlc.NewManualClock(0) clock := hlc.NewClock(manualClock.UnixNano) stopper := stop.NewStopper() defer stopper.Stop() ctx := storage.TestStoreContext() ctx.TestingKnobs.TestingCommandFilter = func(filterArgs storagebase.FilterArgs) *roachpb.Error { if _, ok := filterArgs.Req.(*roachpb.GetRequest); ok && filterArgs.Req.Header().Key.Equal(roachpb.Key(key)) && filterArgs.Hdr.Txn == nil { // The Reader executes two get operations, each of which triggers two get requests // (the first request fails and triggers txn push, and then the second request // succeeds). Returns an error for the fourth get request to avoid timestamp cache // update after the third get operation pushes the txn timestamp. if atomic.AddInt32(&numGets, 1) == 4 { return roachpb.NewErrorWithTxn(errors.Errorf("Test"), filterArgs.Hdr.Txn) } } return nil } store := createTestStoreWithEngine(t, engine.NewInMem(roachpb.Attributes{}, 10<<20, stopper), clock, true, ctx, stopper) // Put an initial value. initVal := []byte("initVal") err := store.DB().Put(key, initVal) if err != nil { t.Fatalf("failed to put: %s", err) } waitPut := make(chan struct{}) waitFirstGet := make(chan struct{}) waitTxnRestart := make(chan struct{}) waitSecondGet := make(chan struct{}) waitTxnComplete := make(chan struct{}) // Start the Writer. go func() { epoch := -1 // Start a txn that does read-after-write. // The txn will be restarted twice, and the out-of-order put // will happen in the second epoch. if err := store.DB().Txn(func(txn *client.Txn) error { epoch++ if epoch == 1 { // Wait until the second get operation is issued. close(waitTxnRestart) <-waitSecondGet } updatedVal := []byte("updatedVal") if err := txn.Put(key, updatedVal); err != nil { return err } // Make sure a get will return the value that was just written. actual, err := txn.Get(key) if err != nil { return err } if !bytes.Equal(actual.ValueBytes(), updatedVal) { t.Fatalf("unexpected get result: %s", actual) } if epoch == 0 { // Wait until the first get operation will push the txn timestamp. close(waitPut) <-waitFirstGet } b := txn.NewBatch() return txn.CommitInBatch(b) }); err != nil { t.Fatal(err) } if epoch != 2 { t.Fatalf("unexpected number of txn retries: %d", epoch) } close(waitTxnComplete) }() <-waitPut // Start the Reader. // Advance the clock and send a get operation with higher // priority to trigger the txn restart. manualClock.Increment(100) priority := roachpb.UserPriority(-math.MaxInt32) requestHeader := roachpb.Span{ Key: roachpb.Key(key), } ts := clock.Now() if _, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ Timestamp: ts, UserPriority: priority, }, &roachpb.GetRequest{Span: requestHeader}); err != nil { t.Fatalf("failed to get: %s", err) } // Wait until the writer restarts the txn. close(waitFirstGet) <-waitTxnRestart // Advance the clock and send a get operation again. This time // we use TestingCommandFilter so that a get operation is not // processed after the write intent is resolved (to prevent the // timestamp cache from being updated). manualClock.Increment(100) ts = clock.Now() if _, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ Timestamp: ts, UserPriority: priority, }, &roachpb.GetRequest{Span: requestHeader}); err == nil { t.Fatal("unexpected success of get") } close(waitSecondGet) <-waitTxnComplete }
// Start starts the test cluster by bootstrapping an in-memory store // (defaults to maximum of 50M). The server is started, launching the // node RPC server and all HTTP endpoints. Use the value of // TestServer.Addr after Start() for client connections. Use Stop() // to shutdown the server after the test completes. func (ltc *LocalTestCluster) Start(t util.Tester) { nodeID := roachpb.NodeID(1) nodeDesc := &roachpb.NodeDescriptor{NodeID: nodeID} ltc.tester = t ltc.Manual = hlc.NewManualClock(0) ltc.Clock = hlc.NewClock(ltc.Manual.UnixNano) ltc.Stopper = stop.NewStopper() rpcContext := rpc.NewContext(testutils.NewNodeTestBaseContext(), ltc.Clock, ltc.Stopper) ltc.Gossip = gossip.New(rpcContext, gossip.TestBootstrap, ltc.Stopper) ltc.Eng = engine.NewInMem(roachpb.Attributes{}, 50<<20, ltc.Stopper) ltc.stores = storage.NewStores(ltc.Clock) tracer := tracing.NewTracer() var rpcSend rpcSendFn = func(_ SendOptions, _ ReplicaSlice, args roachpb.BatchRequest, _ *rpc.Context) (proto.Message, error) { if ltc.Latency > 0 { time.Sleep(ltc.Latency) } sp := tracer.StartSpan("node") defer sp.Finish() ctx := opentracing.ContextWithSpan(context.Background(), sp) sp.LogEvent(args.String()) br, pErr := ltc.stores.Send(ctx, args) if br == nil { br = &roachpb.BatchResponse{} } if br.Error != nil { panic(roachpb.ErrorUnexpectedlySet(ltc.stores, br)) } br.Error = pErr if pErr != nil { sp.LogEvent("error: " + pErr.String()) } return br, nil } retryOpts := GetDefaultDistSenderRetryOptions() retryOpts.Closer = ltc.Stopper.ShouldDrain() ltc.distSender = NewDistSender(&DistSenderContext{ Clock: ltc.Clock, RangeDescriptorCacheSize: defaultRangeDescriptorCacheSize, RangeLookupMaxRanges: defaultRangeLookupMaxRanges, LeaderCacheSize: defaultLeaderCacheSize, RPCRetryOptions: &retryOpts, nodeDescriptor: nodeDesc, RPCSend: rpcSend, // defined above RangeDescriptorDB: ltc.stores, // for descriptor lookup }, ltc.Gossip) ltc.Sender = NewTxnCoordSender(ltc.distSender, ltc.Clock, false /* !linearizable */, tracer, ltc.Stopper, NewTxnMetrics(metric.NewRegistry())) ltc.DB = client.NewDB(ltc.Sender) transport := storage.NewDummyRaftTransport() ctx := storage.TestStoreContext() ctx.Clock = ltc.Clock ctx.DB = ltc.DB ctx.Gossip = ltc.Gossip ctx.Transport = transport ctx.Tracer = tracer ltc.Store = storage.NewStore(ctx, ltc.Eng, nodeDesc) if err := ltc.Store.Bootstrap(roachpb.StoreIdent{NodeID: nodeID, StoreID: 1}, ltc.Stopper); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } ltc.stores.AddStore(ltc.Store) if err := ltc.Store.BootstrapRange(nil); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } if err := ltc.Store.Start(ltc.Stopper); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } ltc.Gossip.SetNodeID(nodeDesc.NodeID) if err := ltc.Gossip.SetNodeDescriptor(nodeDesc); err != nil { t.Fatalf("unable to set node descriptor: %s", err) } }
func TestRangeTransferLease(t *testing.T) { defer leaktest.AfterTest(t)() ctx := storage.TestStoreContext() var filterMu syncutil.Mutex var filter func(filterArgs storagebase.FilterArgs) *roachpb.Error ctx.TestingKnobs.TestingCommandFilter = func(filterArgs storagebase.FilterArgs) *roachpb.Error { filterMu.Lock() filterCopy := filter filterMu.Unlock() if filterCopy != nil { return filterCopy(filterArgs) } return nil } var waitForTransferBlocked atomic.Value waitForTransferBlocked.Store(false) transferBlocked := make(chan struct{}) ctx.TestingKnobs.LeaseTransferBlockedOnExtensionEvent = func( _ roachpb.ReplicaDescriptor) { if waitForTransferBlocked.Load().(bool) { transferBlocked <- struct{}{} waitForTransferBlocked.Store(false) } } mtc := &multiTestContext{} mtc.storeContext = &ctx mtc.Start(t, 2) defer mtc.Stop() // First, do a write; we'll use it to determine when the dust has settled. leftKey := roachpb.Key("a") incArgs := incrementArgs(leftKey, 1) if _, pErr := client.SendWrapped(mtc.distSenders[0], nil, &incArgs); pErr != nil { t.Fatal(pErr) } // Get the left range's ID. rangeID := mtc.stores[0].LookupReplica(roachpb.RKey("a"), nil).RangeID // Replicate the left range onto node 1. mtc.replicateRange(rangeID, 1) replica0 := mtc.stores[0].LookupReplica(roachpb.RKey("a"), nil) replica1 := mtc.stores[1].LookupReplica(roachpb.RKey("a"), nil) gArgs := getArgs(leftKey) replica0Desc, err := replica0.GetReplicaDescriptor() if err != nil { t.Fatal(err) } // Check that replica0 can serve reads OK. if _, pErr := client.SendWrappedWith( mtc.senders[0], nil, roachpb.Header{Replica: replica0Desc}, &gArgs); pErr != nil { t.Fatal(pErr) } { // Transferring the lease to ourself should be a no-op. origLeasePtr, _ := replica0.GetLease() origLease := *origLeasePtr if err := replica0.AdminTransferLease(replica0Desc.StoreID); err != nil { t.Fatal(err) } newLeasePtr, _ := replica0.GetLease() if origLeasePtr != newLeasePtr || origLease != *newLeasePtr { t.Fatalf("expected %+v, but found %+v", origLeasePtr, newLeasePtr) } } { // An invalid target should result in an error. const expected = "unable to find store .* in range" if err := replica0.AdminTransferLease(1000); !testutils.IsError(err, expected) { t.Fatalf("expected %s, but found %v", expected, err) } } // Move the lease to store 1. var newHolderDesc roachpb.ReplicaDescriptor util.SucceedsSoon(t, func() error { var err error newHolderDesc, err = replica1.GetReplicaDescriptor() return err }) if err := replica0.AdminTransferLease(newHolderDesc.StoreID); err != nil { t.Fatal(err) } // Check that replica0 doesn't serve reads any more. replica0Desc, err = replica0.GetReplicaDescriptor() if err != nil { t.Fatal(err) } _, pErr := client.SendWrappedWith( mtc.senders[0], nil, roachpb.Header{Replica: replica0Desc}, &gArgs) nlhe, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError) if !ok { t.Fatalf("expected %T, got %s", &roachpb.NotLeaseHolderError{}, pErr) } if *(nlhe.LeaseHolder) != newHolderDesc { t.Fatalf("expected lease holder %+v, got %+v", newHolderDesc, nlhe.LeaseHolder) } // Check that replica1 now has the lease (or gets it soon). util.SucceedsSoon(t, func() error { if _, pErr := client.SendWrappedWith( mtc.senders[1], nil, roachpb.Header{Replica: replica0Desc}, &gArgs); pErr != nil { return pErr.GoError() } return nil }) replica1Lease, _ := replica1.GetLease() // Verify the timestamp cache low water. Because we executed a transfer lease // request, the low water should be set to the new lease start time which is // less than the previous lease's expiration time. if lowWater := replica1.GetTimestampCacheLowWater(); lowWater != replica1Lease.Start { t.Fatalf("expected timestamp cache low water %s, but found %s", replica1Lease.Start, lowWater) } // Make replica1 extend its lease and transfer the lease immediately after // that. Test that the transfer still happens (it'll wait until the extension // is done). extensionSem := make(chan struct{}) filterMu.Lock() filter = func(filterArgs storagebase.FilterArgs) *roachpb.Error { if filterArgs.Sid != mtc.stores[1].Ident.StoreID { return nil } llReq, ok := filterArgs.Req.(*roachpb.RequestLeaseRequest) if !ok { return nil } if llReq.Lease.Replica == newHolderDesc { // Notify the main thread that the extension is in progress and wait for // the signal to proceed. filterMu.Lock() filter = nil filterMu.Unlock() extensionSem <- struct{}{} <-extensionSem } return nil } filterMu.Unlock() // Initiate an extension. var wg sync.WaitGroup wg.Add(1) go func() { defer wg.Done() shouldRenewTS := replica1Lease.StartStasis.Add(-1, 0) mtc.manualClock.Set(shouldRenewTS.WallTime + 1) if _, pErr := client.SendWrappedWith( mtc.senders[1], nil, roachpb.Header{Replica: replica0Desc}, &gArgs); pErr != nil { panic(pErr) } }() <-extensionSem waitForTransferBlocked.Store(true) // Initiate a transfer. wg.Add(1) go func() { defer wg.Done() // Transfer back from replica1 to replica0. if err := replica1.AdminTransferLease(replica0Desc.StoreID); err != nil { panic(err) } }() // Wait for the transfer to be blocked by the extension. <-transferBlocked // Now unblock the extension. extensionSem <- struct{}{} // Check that the transfer to replica1 eventually happens. util.SucceedsSoon(t, func() error { if _, pErr := client.SendWrappedWith( mtc.senders[0], nil, roachpb.Header{Replica: replica0Desc}, &gArgs); pErr != nil { return pErr.GoError() } return nil }) filterMu.Lock() filter = nil filterMu.Unlock() wg.Wait() }
// TestStoreRangeRebalance verifies that the replication queue will take // rebalancing opportunities and add a new replica on another store. func TestStoreRangeRebalance(t *testing.T) { defer leaktest.AfterTest(t)() // Start multiTestContext with replica rebalancing enabled. mtc := &multiTestContext{ storeContext: &storage.StoreContext{}, } *mtc.storeContext = storage.TestStoreContext() mtc.storeContext.AllocatorOptions = storage.AllocatorOptions{ AllowRebalance: true, Deterministic: true, } // Four stores. mtc.Start(t, 4) defer mtc.Stop() // Replicate the first range to the first three stores. store0 := mtc.stores[0] replica := store0.LookupReplica(roachpb.RKeyMin, nil) desc := replica.Desc() mtc.replicateRange(desc.RangeID, 1, 2) // Initialize the gossip network with fake capacity data. storeDescs := make([]*roachpb.StoreDescriptor, 0, len(mtc.stores)) for _, s := range mtc.stores { desc, err := s.Descriptor() if err != nil { t.Fatal(err) } desc.Capacity.Capacity = 1024 * 1024 desc.Capacity.Available = 1024 * 1024 // Make sure store[1] is chosen as removal target. if desc.StoreID == mtc.stores[1].StoreID() { desc.Capacity.Available = 0 } storeDescs = append(storeDescs, desc) } sg := gossiputil.NewStoreGossiper(mtc.gossip) sg.GossipStores(storeDescs, t) // This can't use SucceedsSoon as using the exponential backoff mechanic // won't work well with the forced replication scans. maxTimeout := time.After(5 * time.Second) succeeded := false for !succeeded { select { case <-maxTimeout: t.Fatal("Failed to rebalance replica within 5 seconds") case <-time.After(10 * time.Millisecond): // Look up the official range descriptor, make sure fourth store is on it. rangeDesc := getRangeMetadata(roachpb.RKeyMin, mtc, t) // Test if we have already succeeded. for _, repl := range rangeDesc.Replicas { if repl.StoreID == mtc.stores[3].StoreID() { succeeded = true } } if succeeded { break } mtc.expireLeaderLeases() mtc.stores[1].ForceReplicationScanAndProcess() } } }