// TestRangeSplitsWithWritePressure sets the zone config max bytes for // a range to 256K and writes data until there are five ranges. func TestRangeSplitsWithWritePressure(t *testing.T) { defer leaktest.AfterTest(t)() // Override default zone config. cfg := config.DefaultZoneConfig() cfg.RangeMaxBytes = 1 << 18 defer config.TestingSetDefaultZoneConfig(cfg)() dbCtx := client.DefaultDBContext() dbCtx.TxnRetryOptions = retry.Options{ InitialBackoff: 1 * time.Millisecond, MaxBackoff: 10 * time.Millisecond, Multiplier: 2, } s, _ := createTestDBWithContext(t, dbCtx) // This is purely to silence log spam. config.TestingSetupZoneConfigHook(s.Stopper) defer s.Stop() // Start test writer write about a 32K/key so there aren't too many writes necessary to split 64K range. done := make(chan struct{}) var wg sync.WaitGroup wg.Add(1) go startTestWriter(s.DB, int64(0), 1<<15, &wg, nil, nil, done, t) // Check that we split 5 times in allotted time. testutils.SucceedsSoon(t, func() error { // Scan the txn records. rows, err := s.DB.Scan(context.TODO(), keys.Meta2Prefix, keys.MetaMax, 0) if err != nil { return errors.Errorf("failed to scan meta2 keys: %s", err) } if lr := len(rows); lr < 5 { return errors.Errorf("expected >= 5 scans; got %d", lr) } return nil }) close(done) wg.Wait() // This write pressure test often causes splits while resolve // intents are in flight, causing them to fail with range key // mismatch errors. However, LocalSender should retry in these // cases. Check here via MVCC scan that there are no dangling write // intents. We do this using a SucceedsSoon construct to account // for timing of finishing the test writer and a possibly-ongoing // asynchronous split. testutils.SucceedsSoon(t, func() error { if _, _, _, err := engine.MVCCScan(context.Background(), s.Eng, keys.LocalMax, roachpb.KeyMax, math.MaxInt64, hlc.MaxTimestamp, true, nil); err != nil { return errors.Errorf("failed to verify no dangling intents: %s", err) } return nil }) }
func TestSkipLargeReplicaSnapshot(t *testing.T) { defer leaktest.AfterTest(t)() storeCfg := TestStoreConfig(nil) storeCfg.TestingKnobs.DisableSplitQueue = true const snapSize = 5 * (keySize + valSize) cfg := config.DefaultZoneConfig() cfg.RangeMaxBytes = snapSize defer config.TestingSetDefaultZoneConfig(cfg)() stopper := stop.NewStopper() defer stopper.Stop() store := createTestStoreWithConfig(t, stopper, &storeCfg) rep, err := store.GetReplica(rangeID) if err != nil { t.Fatal(err) } rep.SetMaxBytes(snapSize) if pErr := rep.redirectOnOrAcquireLease(context.Background()); pErr != nil { t.Fatal(pErr) } if err := fillTestRange(rep, snapSize); err != nil { t.Fatal(err) } if _, err := rep.GetSnapshot(context.Background(), "test"); err != nil { t.Fatal(err) } rep.CloseOutSnap() if err := fillTestRange(rep, snapSize*2); err != nil { t.Fatal(err) } rep.mu.Lock() _, err = rep.Snapshot() rep.mu.Unlock() if err != raft.ErrSnapshotTemporarilyUnavailable { rep.mu.Lock() after := rep.mu.state.Stats.Total() rep.mu.Unlock() t.Fatalf( "snapshot of a very large range (%d / %d, needsSplit: %v, exceeds snap limit: %v) should fail but got %v", after, rep.GetMaxBytes(), rep.needsSplitBySize(), rep.exceedsDoubleSplitSizeLocked(), err, ) } }
// Start starts the TestServer by bootstrapping an in-memory store // (defaults to maximum of 100M). The server is started, launching the // node RPC server and all HTTP endpoints. Use the value of // TestServer.ServingAddr() after Start() for client connections. // Use TestServer.Stopper().Stop() to shutdown the server after the test // completes. func (ts *TestServer) Start(params base.TestServerArgs) error { if ts.Cfg == nil { panic("Cfg not set") } if params.Stopper == nil { params.Stopper = stop.NewStopper() } if !params.PartOfCluster { // Change the replication requirements so we don't get log spam about ranges // not being replicated enough. cfg := config.DefaultZoneConfig() cfg.NumReplicas = 1 fn := config.TestingSetDefaultZoneConfig(cfg) params.Stopper.AddCloser(stop.CloserFn(fn)) } // Needs to be called before NewServer to ensure resolvers are initialized. if err := ts.Cfg.InitNode(); err != nil { return err } var err error ts.Server, err = NewServer(*ts.Cfg, params.Stopper) if err != nil { return err } // Our context must be shared with our server. ts.Cfg = &ts.Server.cfg if err := ts.Server.Start(context.Background()); err != nil { return err } // If enabled, wait for initial splits to complete before returning control. // If initial splits do not complete, the server is stopped before // returning. if stk, ok := ts.cfg.TestingKnobs.Store.(*storage.StoreTestingKnobs); ok && stk.DisableSplitQueue { return nil } if err := ts.WaitForInitialSplits(); err != nil { ts.Stop() return err } return nil }
// TestRaftLogQueue verifies that the raft log queue correctly truncates the // raft log. func TestRaftLogQueue(t *testing.T) { defer leaktest.AfterTest(t)() mtc := &multiTestContext{} // Set maxBytes to something small so we can trigger the raft log truncation // without adding 64MB of logs. const maxBytes = 1 << 16 defer config.TestingSetDefaultZoneConfig(config.ZoneConfig{ RangeMaxBytes: maxBytes, })() // Turn off raft elections so the raft leader won't change out from under // us in this test. sc := storage.TestStoreConfig(nil) sc.RaftTickInterval = math.MaxInt32 sc.RaftElectionTimeoutTicks = 1000000 mtc.storeConfig = &sc defer mtc.Stop() mtc.Start(t, 3) // Write a single value to ensure we have a leader. pArgs := putArgs([]byte("key"), []byte("value")) if _, err := client.SendWrapped(context.Background(), rg1(mtc.stores[0]), pArgs); err != nil { t.Fatal(err) } // Get the raft leader (and ensure one exists). rangeID := mtc.stores[0].LookupReplica([]byte("a"), nil).RangeID raftLeaderRepl := mtc.getRaftLeader(rangeID) if raftLeaderRepl == nil { t.Fatalf("could not find raft leader replica for range %d", rangeID) } originalIndex, err := raftLeaderRepl.GetFirstIndex() if err != nil { t.Fatal(err) } // Disable splits since we're increasing the raft log with puts. for _, store := range mtc.stores { store.SetSplitQueueActive(false) } // Write a collection of values to increase the raft log. value := bytes.Repeat([]byte("a"), 1000) // 1KB for size := int64(0); size < 2*maxBytes; size += int64(len(value)) { pArgs = putArgs([]byte(fmt.Sprintf("key-%d", size)), value) if _, err := client.SendWrapped(context.Background(), rg1(mtc.stores[0]), pArgs); err != nil { t.Fatal(err) } } // Sadly, occasionally the queue has a race with the force processing so // this succeeds within will captures those rare cases. var afterTruncationIndex uint64 testutils.SucceedsSoon(t, func() error { // Force a truncation check. for _, store := range mtc.stores { store.ForceRaftLogScanAndProcess() } // Ensure that firstIndex has increased indicating that the log // truncation has occurred. var err error afterTruncationIndex, err = raftLeaderRepl.GetFirstIndex() if err != nil { t.Fatal(err) } if afterTruncationIndex <= originalIndex { return errors.Errorf("raft log has not been truncated yet, afterTruncationIndex:%d originalIndex:%d", afterTruncationIndex, originalIndex) } return nil }) // Force a truncation check again to ensure that attempting to truncate an // already truncated log has no effect. This check, unlike in the last // iteration, cannot use a succeedsSoon. This check is fragile in that the // truncation triggered here may lose the race against the call to // GetFirstIndex, giving a false negative. Fixing this requires additional // instrumentation of the queues, which was deemed to require too much work // at the time of this writing. for _, store := range mtc.stores { store.ForceRaftLogScanAndProcess() } after2ndTruncationIndex, err := raftLeaderRepl.GetFirstIndex() if err != nil { t.Fatal(err) } if afterTruncationIndex > after2ndTruncationIndex { t.Fatalf("second truncation destroyed state: afterTruncationIndex:%d after2ndTruncationIndex:%d", afterTruncationIndex, after2ndTruncationIndex) } }