// TestScannerAddToQueues verifies that ranges are added to and // removed from multiple queues. func TestScannerAddToQueues(t *testing.T) { const count = 3 iter := newTestIterator(count) q1, q2 := &testQueue{}, &testQueue{} s := newRangeScanner(1*time.Millisecond, iter) s.AddQueues(q1, q2) mc := hlc.NewManualClock(0) clock := hlc.NewClock(mc.UnixNano) stopper := util.NewStopper(0) // Start queue and verify that all ranges are added to both queues. s.Start(clock, stopper) if err := util.IsTrueWithin(func() bool { return q1.count() == count && q2.count() == count }, 50*time.Millisecond); err != nil { t.Error(err) } // Remove first range and verify it does not exist in either range. rng := iter.remove(0) s.RemoveRange(rng) if err := util.IsTrueWithin(func() bool { return q1.count() == count-1 && q2.count() == count-1 }, 10*time.Millisecond); err != nil { t.Error(err) } // Stop scanner and verify both queues are stopped. stopper.Stop() if !q1.isDone() || !q2.isDone() { t.Errorf("expected all queues to stop; got %t, %t", q1.isDone(), q2.isDone()) } }
// TestRangeSplitsWithWritePressure sets the zone config max bytes for // a range to 256K and writes data until there are five ranges. func TestRangeSplitsWithWritePressure(t *testing.T) { defer leaktest.AfterTest(t) s := createTestDB(t) defer s.Stop() setTestRetryOptions() // Rewrite a zone config with low max bytes. zoneConfig := &proto.ZoneConfig{ ReplicaAttrs: []proto.Attributes{ {}, {}, {}, }, RangeMinBytes: 1 << 8, RangeMaxBytes: 1 << 18, } if err := s.DB.Put(keys.MakeKey(keys.ConfigZonePrefix, proto.KeyMin), zoneConfig); err != nil { t.Fatal(err) } // Start test writer write about a 32K/key so there aren't too many writes necessary to split 64K range. done := make(chan struct{}) var wg sync.WaitGroup wg.Add(1) go startTestWriter(s.DB, int64(0), 1<<15, &wg, nil, nil, done, t) // Check that we split 5 times in allotted time. if err := util.IsTrueWithin(func() bool { // Scan the txn records. rows, err := s.DB.Scan(keys.Meta2Prefix, keys.MetaMax, 0) if err != nil { t.Fatalf("failed to scan meta2 keys: %s", err) } return len(rows) >= 5 }, 6*time.Second); err != nil { t.Errorf("failed to split 5 times: %s", err) } close(done) wg.Wait() // This write pressure test often causes splits while resolve // intents are in flight, causing them to fail with range key // mismatch errors. However, LocalSender should retry in these // cases. Check here via MVCC scan that there are no dangling write // intents. We do this using an IsTrueWithin construct to account // for timing of finishing the test writer and a possibly-ongoing // asynchronous split. if err := util.IsTrueWithin(func() bool { if _, _, err := engine.MVCCScan(s.Eng, keys.LocalMax, proto.KeyMax, 0, proto.MaxTimestamp, true, nil); err != nil { log.Infof("mvcc scan should be clean: %s", err) return false } return true }, 500*time.Millisecond); err != nil { t.Error("failed to verify no dangling intents within 500ms") } }
// TestChangeReplicasDuplicateError tests that a replica change aborts if // another change has been made to the RangeDescriptor since it was initiated. func TestChangeReplicasDescriptorInvariant(t *testing.T) { defer leaktest.AfterTest(t) mtc := startMultiTestContext(t, 3) defer mtc.Stop() repl, err := mtc.stores[0].GetReplica(1) if err != nil { t.Fatal(err) } addReplica := func(storeNum int, desc *roachpb.RangeDescriptor) error { return repl.ChangeReplicas(roachpb.ADD_REPLICA, roachpb.ReplicaDescriptor{ NodeID: mtc.stores[storeNum].Ident.NodeID, StoreID: mtc.stores[storeNum].Ident.StoreID, }, desc) } // Retain the descriptor for the range at this point. origDesc := repl.Desc() // Add replica to the second store, which should succeed. if err := addReplica(1, origDesc); err != nil { t.Fatal(err) } if err := util.IsTrueWithin(func() bool { r := mtc.stores[1].LookupReplica(roachpb.RKey("a"), roachpb.RKey("b")) if r == nil { return false } return true }, time.Second); err != nil { t.Fatal(err) } // Attempt to add replica to the third store with the original descriptor. // This should fail because the descriptor is stale. if err := addReplica(2, origDesc); err == nil { t.Fatal("Expected error calling ChangeReplicas with stale RangeDescriptor") } // Add to third store with fresh descriptor. if err := addReplica(2, repl.Desc()); err != nil { t.Fatal(err) } if err := util.IsTrueWithin(func() bool { r := mtc.stores[2].LookupReplica(roachpb.RKey("a"), roachpb.RKey("b")) if r == nil { return false } return true }, time.Second); err != nil { t.Fatal(err) } }
// TestStoreZoneUpdateAndRangeSplit verifies that modifying the zone // configuration changes range max bytes and Range.maybeSplit() takes // max bytes into account when deciding whether to enqueue a range for // splitting. It further verifies that the range is in fact split on // exceeding zone's RangeMaxBytes. func TestStoreZoneUpdateAndRangeSplit(t *testing.T) { defer leaktest.AfterTest(t) store, stopper := createTestStore(t) config.TestingSetupZoneConfigHook(stopper) defer stopper.Stop() maxBytes := int64(1 << 16) // Set max bytes. config.TestingSetZoneConfig(1000, &config.ZoneConfig{RangeMaxBytes: maxBytes}) // Trigger gossip callback. if err := store.Gossip().AddInfoProto(gossip.KeySystemConfig, &config.SystemConfig{}, 0); err != nil { t.Fatal(err) } // Wait for the range to be split along table boundaries. originalRange := store.LookupReplica(roachpb.RKeyMin, nil) var rng *storage.Replica if err := util.IsTrueWithin(func() bool { rng = store.LookupReplica(keys.MakeTablePrefix(1000), nil) return rng.Desc().RangeID != originalRange.Desc().RangeID }, time.Second); err != nil { t.Fatalf("failed to notice range max bytes update: %s", err) } // Check range's max bytes settings. if rng.GetMaxBytes() != maxBytes { t.Fatalf("range max bytes mismatch, got: %d, expected: %d", rng.GetMaxBytes(), maxBytes) } // Make sure the second range goes to the end. if !roachpb.RKeyMax.Equal(rng.Desc().EndKey) { t.Fatalf("second range has split: %+v", rng.Desc()) } // Look in the range after prefix we're writing to. fillRange(store, rng.Desc().RangeID, keys.MakeTablePrefix(1000), maxBytes, t) // Verify that the range is in fact split (give it a few seconds for very // slow test machines). var newRng *storage.Replica if err := util.IsTrueWithin(func() bool { newRng = store.LookupReplica(keys.MakeTablePrefix(2000), nil) return newRng.Desc().RangeID != rng.Desc().RangeID }, 5*time.Second); err != nil { t.Errorf("expected range to split within 1s") } // Make sure the new range goes to the end. if !roachpb.RKeyMax.Equal(newRng.Desc().EndKey) { t.Fatalf("second range has split: %+v", rng.Desc()) } }
// TestNodeJoin verifies a new node is able to join a bootstrapped // cluster consisting of one node. func TestNodeJoin(t *testing.T) { defer leaktest.AfterTest(t) engineStopper := stop.NewStopper() defer engineStopper.Stop() e := engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper) stopper := stop.NewStopper() _, err := BootstrapCluster("cluster-1", []engine.Engine{e}, stopper) if err != nil { t.Fatal(err) } stopper.Stop() // Set an aggressive gossip interval to make sure information is exchanged tout de suite. testContext.GossipInterval = gossip.TestInterval // Start the bootstrap node. engines1 := []engine.Engine{e} addr1 := util.CreateTestAddr("tcp") server1, node1, stopper1 := createAndStartTestNode(addr1, engines1, addr1, t) defer stopper1.Stop() // Create a new node. engines2 := []engine.Engine{engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper)} server2, node2, stopper2 := createAndStartTestNode(util.CreateTestAddr("tcp"), engines2, server1.Addr(), t) defer stopper2.Stop() // Verify new node is able to bootstrap its store. if err := util.IsTrueWithin(func() bool { return node2.lSender.GetStoreCount() == 1 }, 50*time.Millisecond); err != nil { t.Fatal(err) } // Verify node1 sees node2 via gossip and vice versa. node1Key := gossip.MakeNodeIDKey(node1.Descriptor.NodeID) node2Key := gossip.MakeNodeIDKey(node2.Descriptor.NodeID) if err := util.IsTrueWithin(func() bool { nodeDesc1 := &roachpb.NodeDescriptor{} if err := node1.ctx.Gossip.GetInfoProto(node2Key, nodeDesc1); err != nil { return false } if addr2 := nodeDesc1.Address.AddressField; addr2 != server2.Addr().String() { t.Errorf("addr2 gossip %s doesn't match addr2 address %s", addr2, server2.Addr().String()) } nodeDesc2 := &roachpb.NodeDescriptor{} if err := node2.ctx.Gossip.GetInfoProto(node1Key, nodeDesc2); err != nil { return false } if addr1 := nodeDesc2.Address.AddressField; addr1 != server1.Addr().String() { t.Errorf("addr1 gossip %s doesn't match addr1 address %s", addr1, server1.Addr().String()) } return true }, 50*time.Millisecond); err != nil { t.Error(err) } }
// TestRangeSplitsWithWritePressure sets the zone config max bytes for // a range to 256K and writes data until there are five ranges. func TestRangeSplitsWithWritePressure(t *testing.T) { defer leaktest.AfterTest(t) // Override default zone config. previousMaxBytes := config.DefaultZoneConfig.RangeMaxBytes config.DefaultZoneConfig.RangeMaxBytes = 1 << 18 defer func() { config.DefaultZoneConfig.RangeMaxBytes = previousMaxBytes }() s := createTestDB(t) // This is purely to silence log spam. config.TestingSetupZoneConfigHook(s.Stopper) defer s.Stop() setTestRetryOptions() // Start test writer write about a 32K/key so there aren't too many writes necessary to split 64K range. done := make(chan struct{}) var wg sync.WaitGroup wg.Add(1) go startTestWriter(s.DB, int64(0), 1<<15, &wg, nil, nil, done, t) // Check that we split 5 times in allotted time. if err := util.IsTrueWithin(func() bool { // Scan the txn records. rows, err := s.DB.Scan(keys.Meta2Prefix, keys.MetaMax, 0) if err != nil { t.Fatalf("failed to scan meta2 keys: %s", err) } return len(rows) >= 5 }, 6*time.Second); err != nil { t.Errorf("failed to split 5 times: %s", err) } close(done) wg.Wait() // This write pressure test often causes splits while resolve // intents are in flight, causing them to fail with range key // mismatch errors. However, LocalSender should retry in these // cases. Check here via MVCC scan that there are no dangling write // intents. We do this using an IsTrueWithin construct to account // for timing of finishing the test writer and a possibly-ongoing // asynchronous split. if err := util.IsTrueWithin(func() bool { if _, _, err := engine.MVCCScan(s.Eng, keys.LocalMax, roachpb.KeyMax, 0, roachpb.MaxTimestamp, true, nil); err != nil { log.Infof("mvcc scan should be clean: %s", err) return false } return true }, cleanMVCCScanTimeout); err != nil { t.Error("failed to verify no dangling intents within 500ms") } }
// TestCoordinatorHeartbeat verifies periodic heartbeat of the // transaction record. func TestCoordinatorHeartbeat(t *testing.T) { db, _, manual := createTestDB(t) defer db.Close() // Set heartbeat interval to 1ms for testing. db.coordinator.heartbeatInterval = 1 * time.Millisecond txnID := engine.Key("txn") <-db.Put(createPutRequest(engine.Key("a"), []byte("value"), txnID)) // Verify 3 heartbeats. var heartbeatTS proto.Timestamp for i := 0; i < 3; i++ { if err := util.IsTrueWithin(func() bool { ok, txn, err := getTxn(db, engine.MakeKey(engine.KeyLocalTransactionPrefix, txnID)) if !ok || err != nil { return false } // Advance clock by 1ns. // Locking the coordinator to prevent a data race. db.coordinator.Lock() *manual = hlc.ManualClock(*manual + 1) db.coordinator.Unlock() if heartbeatTS.Less(*txn.LastHeartbeat) { heartbeatTS = *txn.LastHeartbeat return true } return false }, 50*time.Millisecond); err != nil { t.Error("expected initial heartbeat within 50ms") } } }
func getLatestConfig(s *server.TestServer, expected int) (cfg *config.SystemConfig, err error) { err = util.IsTrueWithin(func() bool { cfg = s.Gossip().GetSystemConfig() return cfg != nil && len(cfg.Values) == expected }, 500*time.Millisecond) return }
// TestClientGossip verifies a client can gossip a delta to the server. func TestClientGossip(t *testing.T) { local, remote, lserver, rserver := startGossip(t) local.AddInfo("local-key", "local value", time.Second) remote.AddInfo("remote-key", "remote value", time.Second) disconnected := make(chan *client, 1) client := newClient(remote.is.NodeAddr) go client.start(local, disconnected) if err := util.IsTrueWithin(func() bool { _, lerr := remote.GetInfo("local-key") _, rerr := local.GetInfo("remote-key") return lerr == nil && rerr == nil }, 500*time.Millisecond); err != nil { t.Errorf("gossip exchange failed or taking too long") } remote.stop() local.stop() lserver.Close() rserver.Close() log.Info("done serving") if client != <-disconnected { t.Errorf("expected client disconnect after remote close") } }
// TestCoordinatorGC verifies that the coordinator cleans up extant // transactions after the lastUpdateTS exceeds the timeout. func TestCoordinatorGC(t *testing.T) { db, _, manual := createTestDB(t) defer db.Close() // Set heartbeat interval to 1ms for testing. db.coordinator.heartbeatInterval = 1 * time.Millisecond txnID := engine.Key("txn") <-db.Put(createPutRequest(engine.Key("a"), []byte("value"), txnID)) // Now, advance clock past the default client timeout. // Locking the coordinator to prevent a data race. db.coordinator.Lock() *manual = hlc.ManualClock(defaultClientTimeout.Nanoseconds() + 1) db.coordinator.Unlock() if err := util.IsTrueWithin(func() bool { // Locking the coordinator to prevent a data race. db.coordinator.Lock() _, ok := db.coordinator.txns[string(txnID)] db.coordinator.Unlock() return !ok }, 50*time.Millisecond); err != nil { t.Error("expected garbage collection") } }
// TestStoreRangeReplicate verifies that the replication queue will notice // under-replicated ranges and replicate them. func TestStoreRangeReplicate(t *testing.T) { defer leaktest.AfterTest(t) mtc := multiTestContext{} mtc.Start(t, 3) defer mtc.Stop() // Initialize the gossip network. var wg sync.WaitGroup wg.Add(len(mtc.stores)) key := gossip.MakePrefixPattern(gossip.KeyCapacityPrefix) mtc.stores[0].Gossip().RegisterCallback(key, func(_ string, _ bool) { wg.Done() }) for _, s := range mtc.stores { s.GossipCapacity() } wg.Wait() // Once we know our peers, trigger a scan. mtc.stores[0].ForceReplicationScan(t) // The range should become available on every node. if err := util.IsTrueWithin(func() bool { for _, s := range mtc.stores { r := s.LookupRange(proto.Key("a"), proto.Key("b")) if r == nil { return false } } return true }, 1*time.Second); err != nil { t.Fatal(err) } }
// TestTxnCoordSenderGC verifies that the coordinator cleans up extant // transactions after the lastUpdateNanos exceeds the timeout. func TestTxnCoordSenderGC(t *testing.T) { defer leaktest.AfterTest(t) s := createTestDB(t) defer s.Stop() // Set heartbeat interval to 1ms for testing. s.Sender.heartbeatInterval = 1 * time.Millisecond txn := newTxn(s.Clock, proto.Key("a")) call := proto.Call{ Args: createPutRequest(proto.Key("a"), []byte("value"), txn), Reply: &proto.PutResponse{}, } if err := sendCall(s.Sender, call); err != nil { t.Fatal(err) } // Now, advance clock past the default client timeout. // Locking the TxnCoordSender to prevent a data race. s.Sender.Lock() s.Manual.Set(defaultClientTimeout.Nanoseconds() + 1) s.Sender.Unlock() if err := util.IsTrueWithin(func() bool { // Locking the TxnCoordSender to prevent a data race. s.Sender.Lock() _, ok := s.Sender.txns[string(txn.ID)] s.Sender.Unlock() return !ok }, 50*time.Millisecond); err != nil { t.Error("expected garbage collection") } }
// TestTxnCoordSenderGC verifies that the coordinator cleans up extant // transactions after the lastUpdateNanos exceeds the timeout. func TestTxnCoordSenderGC(t *testing.T) { defer leaktest.AfterTest(t) s := createTestDB(t) defer s.Stop() // Set heartbeat interval to 1ms for testing. s.Sender.heartbeatInterval = 1 * time.Millisecond txn := client.NewTxn(*s.DB) if pErr := txn.Put(roachpb.Key("a"), []byte("value")); pErr != nil { t.Fatal(pErr) } // Now, advance clock past the default client timeout. // Locking the TxnCoordSender to prevent a data race. s.Sender.Lock() s.Manual.Set(defaultClientTimeout.Nanoseconds() + 1) s.Sender.Unlock() if err := util.IsTrueWithin(func() bool { // Locking the TxnCoordSender to prevent a data race. s.Sender.Lock() _, ok := s.Sender.txns[string(txn.Proto.ID)] s.Sender.Unlock() return !ok }, 50*time.Millisecond); err != nil { t.Error("expected garbage collection") } }
// TestStoreRangeUpReplicate verifies that the replication queue will notice // under-replicated ranges and replicate them. func TestStoreRangeUpReplicate(t *testing.T) { defer leaktest.AfterTest(t) mtc := startMultiTestContext(t, 3) defer mtc.Stop() // Initialize the gossip network. var wg sync.WaitGroup wg.Add(len(mtc.stores)) key := gossip.MakePrefixPattern(gossip.KeyStorePrefix) mtc.stores[0].Gossip().RegisterCallback(key, func(_ string, _ roachpb.Value) { wg.Done() }) for _, s := range mtc.stores { s.GossipStore() } wg.Wait() // Once we know our peers, trigger a scan. mtc.stores[0].ForceReplicationScanAndProcess() // The range should become available on every node. if err := util.IsTrueWithin(func() bool { for _, s := range mtc.stores { r := s.LookupReplica(roachpb.RKey("a"), roachpb.RKey("b")) if r == nil { return false } } return true }, replicationTimeout); err != nil { t.Fatal(err) } }
func gossipForTest(t *testing.T) (*gossip.Gossip, *stop.Stopper) { stopper := stop.NewStopper() // Setup fake zone config handler. config.TestingSetupZoneConfigHook(stopper) rpcContext := rpc.NewContext(&base.Context{}, hlc.NewClock(hlc.UnixNano), stopper) g := gossip.New(rpcContext, gossip.TestBootstrap) // Have to call g.SetNodeID before call g.AddInfo g.SetNodeID(roachpb.NodeID(1)) // Put an empty system config into gossip. if err := g.AddInfoProto(gossip.KeySystemConfig, &config.SystemConfig{}, 0); err != nil { t.Fatal(err) } // Wait for SystemConfig. if err := util.IsTrueWithin(func() bool { return g.GetSystemConfig() != nil }, 100*time.Millisecond); err != nil { t.Fatal(err) } return g, stopper }
// TestBootstrapNewStore starts a cluster with two unbootstrapped // stores and verifies both stores are added. func TestBootstrapNewStore(t *testing.T) { e := engine.NewInMem(engine.Attributes{}, 1<<20) localDB, err := BootstrapCluster("cluster-1", e) if err != nil { t.Fatal(err) } localDB.Close() // Start a new node with two new stores which will require bootstrapping. engines := []engine.Engine{ e, engine.NewInMem(engine.Attributes{}, 1<<20), engine.NewInMem(engine.Attributes{}, 1<<20), } server, node := createTestNode(util.CreateTestAddr("tcp"), engines, nil, t) defer server.Close() // Non-initialized stores (in this case the new in-memory-based // store) will be bootstrapped by the node upon start. This happens // in a goroutine, so we'll have to wait a bit (maximum 10ms) until // we can find the new node. if err := util.IsTrueWithin(func() bool { return node.localDB.GetStoreCount() == 3 }, 50*time.Millisecond); err != nil { t.Error(err) } }
// TestClientGossip verifies a client can gossip a delta to the server. func TestClientGossip(t *testing.T) { defer leaktest.AfterTest(t) local, remote, stopper := startGossip(t) if err := local.AddInfo("local-key", "local value", time.Second); err != nil { t.Fatal(err) } if err := remote.AddInfo("remote-key", "remote value", time.Second); err != nil { t.Fatal(err) } disconnected := make(chan *client, 1) client := newClient(remote.is.NodeAddr) // Use an insecure context. We're talking to unix socket which are not in the certs. lclock := hlc.NewClock(hlc.UnixNano) rpcContext := rpc.NewContext(insecureTestBaseContext, lclock, stopper) client.start(local, disconnected, rpcContext, stopper) if err := util.IsTrueWithin(func() bool { _, lerr := remote.GetInfo("local-key") _, rerr := local.GetInfo("remote-key") return lerr == nil && rerr == nil }, 500*time.Millisecond); err != nil { t.Errorf("gossip exchange failed or taking too long") } stopper.Stop() log.Info("done serving") if client != <-disconnected { t.Errorf("expected client disconnect after remote close") } }
// TestRangeSplitMeta executes various splits (including at meta addressing) // and checks that all created intents are resolved. This includes both intents // which are resolved synchronously with EndTransaction and via RPC. func TestRangeSplitMeta(t *testing.T) { defer leaktest.AfterTest(t) s := createTestDB(t) defer s.Stop() splitKeys := []roachpb.Key{roachpb.Key("G"), keys.RangeMetaKey(roachpb.Key("F")), keys.RangeMetaKey(roachpb.Key("K")), keys.RangeMetaKey(roachpb.Key("H"))} // Execute the consecutive splits. for _, splitKey := range splitKeys { log.Infof("starting split at key %q...", splitKey) if err := s.DB.AdminSplit(splitKey); err != nil { t.Fatal(err) } log.Infof("split at key %q complete", splitKey) } if err := util.IsTrueWithin(func() bool { if _, _, err := engine.MVCCScan(s.Eng, keys.LocalMax, roachpb.KeyMax, 0, roachpb.MaxTimestamp, true, nil); err != nil { log.Infof("mvcc scan should be clean: %s", err) return false } return true }, 500*time.Millisecond); err != nil { t.Error("failed to verify no dangling intents within 500ms") } }
// TestScannerStats verifies that stats accumulate from all ranges. func TestScannerStats(t *testing.T) { defer leaktest.AfterTest(t) const count = 3 ranges := newTestRangeSet(count, t) q := &testQueue{} stopper := util.NewStopper() defer stopper.Stop() s := newRangeScanner(1*time.Millisecond, 0, ranges, nil) s.AddQueues(q) mc := hlc.NewManualClock(0) clock := hlc.NewClock(mc.UnixNano) // At start, scanner stats should be blank for MVCC, but have accurate number of ranges. if rc := s.Stats().RangeCount; rc != count { t.Errorf("range count expected %d; got %d", count, rc) } if vb := s.Stats().MVCC.ValBytes; vb != 0 { t.Errorf("value bytes expected %d; got %d", 0, vb) } s.Start(clock, stopper) // We expect a full run to accumulate stats from all ranges. if err := util.IsTrueWithin(func() bool { if rc := s.Stats().RangeCount; rc != count { return false } if vb := s.Stats().MVCC.ValBytes; vb != count*2 { return false } return true }, 100*time.Millisecond); err != nil { t.Error(err) } }
// TestBaseQueueProcess verifies that items from the queue are // processed according to the timer function. func TestBaseQueueProcess(t *testing.T) { defer leaktest.AfterTest(t) g, stopper := gossipForTest(t) defer stopper.Stop() r1 := &Replica{} if err := r1.setDesc(&roachpb.RangeDescriptor{RangeID: 1}); err != nil { t.Fatal(err) } r2 := &Replica{} if err := r2.setDesc(&roachpb.RangeDescriptor{RangeID: 2}); err != nil { t.Fatal(err) } testQueue := &testQueueImpl{ blocker: make(chan struct{}, 1), shouldQueueFn: func(now roachpb.Timestamp, r *Replica) (shouldQueue bool, priority float64) { shouldQueue = true priority = float64(r.Desc().RangeID) return }, } bq := makeBaseQueue("test", testQueue, g, 2) mc := hlc.NewManualClock(0) clock := hlc.NewClock(mc.UnixNano) bq.Start(clock, stopper) bq.MaybeAdd(r1, roachpb.ZeroTimestamp) bq.MaybeAdd(r2, roachpb.ZeroTimestamp) if pc := atomic.LoadInt32(&testQueue.processed); pc != 0 { t.Errorf("expected no processed ranges; got %d", pc) } testQueue.blocker <- struct{}{} if err := util.IsTrueWithin(func() bool { return atomic.LoadInt32(&testQueue.processed) == 1 }, 250*time.Millisecond); err != nil { t.Error(err) } testQueue.blocker <- struct{}{} if err := util.IsTrueWithin(func() bool { return atomic.LoadInt32(&testQueue.processed) >= 2 }, 250*time.Millisecond); err != nil { t.Error(err) } }
// TestBaseQueueProcess verifies that items from the queue are // processed according to the timer function. func TestBaseQueueProcess(t *testing.T) { defer leaktest.AfterTest(t) r1 := &Range{} if err := r1.setDesc(&proto.RangeDescriptor{RaftID: 1}); err != nil { t.Fatal(err) } r2 := &Range{} if err := r2.setDesc(&proto.RangeDescriptor{RaftID: 2}); err != nil { t.Fatal(err) } testQueue := &testQueueImpl{ shouldQueueFn: func(now proto.Timestamp, r *Range) (shouldQueue bool, priority float64) { shouldQueue = true priority = float64(r.Desc().RaftID) return }, duration: 5 * time.Millisecond, } bq := newBaseQueue("test", testQueue, 2) stopper := util.NewStopper() mc := hlc.NewManualClock(0) clock := hlc.NewClock(mc.UnixNano) bq.Start(clock, stopper) defer stopper.Stop() bq.MaybeAdd(r1, proto.ZeroTimestamp) bq.MaybeAdd(r2, proto.ZeroTimestamp) if pc := atomic.LoadInt32(&testQueue.processed); pc != 0 { t.Errorf("expected no processed ranges; got %d", pc) } time.Sleep(5 * time.Millisecond) if err := util.IsTrueWithin(func() bool { return atomic.LoadInt32(&testQueue.processed) == 1 }, 5*time.Millisecond); err != nil { t.Error(err) } time.Sleep(5 * time.Millisecond) if err := util.IsTrueWithin(func() bool { return atomic.LoadInt32(&testQueue.processed) == 2 }, 5*time.Millisecond); err != nil { t.Error(err) } }
// TestNodeJoin verifies a new node is able to join a bootstrapped // cluster consisting of one node. func TestNodeJoin(t *testing.T) { e := engine.NewInMem(engine.Attributes{}, 1<<20) localDB, err := BootstrapCluster("cluster-1", e) if err != nil { t.Fatal(err) } localDB.Close() // Set an aggressive gossip interval to make sure information is exchanged tout de suite. *gossip.GossipInterval = 10 * time.Millisecond // Start the bootstrap node. engines1 := []engine.Engine{e} addr1 := util.CreateTestAddr("tcp") server1, node1 := createTestNode(addr1, engines1, addr1, t) defer server1.Close() // Create a new node. engines2 := []engine.Engine{engine.NewInMem(engine.Attributes{}, 1<<20)} server2, node2 := createTestNode(util.CreateTestAddr("tcp"), engines2, server1.Addr(), t) defer server2.Close() // Verify new node is able to bootstrap its store. if err := util.IsTrueWithin(func() bool { return node2.localDB.GetStoreCount() == 1 }, 50*time.Millisecond); err != nil { t.Fatal(err) } // Verify node1 sees node2 via gossip and vice versa. node1Key := gossip.MakeNodeIDGossipKey(node1.Descriptor.NodeID) node2Key := gossip.MakeNodeIDGossipKey(node2.Descriptor.NodeID) if err := util.IsTrueWithin(func() bool { if val, err := node1.gossip.GetInfo(node2Key); err != nil { return false } else if val.(net.Addr).String() != server2.Addr().String() { t.Error("addr2 gossip %s doesn't match addr2 address %s", val.(net.Addr).String(), server2.Addr().String()) } if val, err := node2.gossip.GetInfo(node1Key); err != nil { return false } else if val.(net.Addr).String() != server1.Addr().String() { t.Error("addr1 gossip %s doesn't match addr1 address %s", val.(net.Addr).String(), server1.Addr().String()) } return true }, 50*time.Millisecond); err != nil { t.Error(err) } }
// TestScannerAddToQueues verifies that ranges are added to and // removed from multiple queues. func TestScannerAddToQueues(t *testing.T) { defer leaktest.AfterTest(t) const count = 3 ranges := newTestRangeSet(count, t) q1, q2 := &testQueue{}, &testQueue{} // We don't want to actually consume entries from the queues during this test. q1.setDisabled(true) q2.setDisabled(true) s := newReplicaScanner(1*time.Millisecond, 0, ranges) s.AddQueues(q1, q2) mc := hlc.NewManualClock(0) clock := hlc.NewClock(mc.UnixNano) stopper := stop.NewStopper() // Start queue and verify that all ranges are added to both queues. s.Start(clock, stopper) if err := util.IsTrueWithin(func() bool { return q1.count() == count && q2.count() == count }, 50*time.Millisecond); err != nil { t.Error(err) } // Remove first range and verify it does not exist in either range. rng := ranges.remove(0, t) if err := util.IsTrueWithin(func() bool { // This is intentionally inside the loop, otherwise this test races as // our removal of the range may be processed before a stray re-queue. // Removing on each attempt makes sure we clean this up as we retry. s.RemoveReplica(rng) c1 := q1.count() c2 := q2.count() log.Infof("q1: %d, q2: %d, wanted: %d", c1, c2, count-1) return c1 == count-1 && c2 == count-1 }, time.Second); err != nil { t.Error(err) } // Stop scanner and verify both queues are stopped. stopper.Stop() if !q1.isDone() || !q2.isDone() { t.Errorf("expected all queues to stop; got %t, %t", q1.isDone(), q2.isDone()) } }
// TestStoreZoneUpdateAndRangeSplit verifies that modifying the zone // configuration changes range max bytes and Range.maybeSplit() takes // max bytes into account when deciding whether to enqueue a range for // splitting. It further verifies that the range is in fact split on // exceeding zone's RangeMaxBytes. func TestStoreZoneUpdateAndRangeSplit(t *testing.T) { defer leaktest.AfterTest(t) store, stopper := createTestStore(t) defer stopper.Stop() maxBytes := int64(1 << 16) rng := store.LookupReplica(proto.KeyMin, nil) fillRange(store, rng.Desc().RangeID, proto.Key("test"), maxBytes, t) // Rewrite zone config with range max bytes set to 64K. // This will cause the split queue to split the range in the background. // This must happen after fillRange() because that function is not using // a full-fledged client and cannot handle running concurrently with splits. zoneConfig := &config.ZoneConfig{ ReplicaAttrs: []proto.Attributes{ {}, {}, {}, }, RangeMinBytes: 1 << 8, RangeMaxBytes: maxBytes, } key := keys.MakeKey(keys.ConfigZonePrefix, proto.KeyMin) if err := store.DB().Put(key, zoneConfig); err != nil { t.Fatal(err) } // See if the range's max bytes is modified via gossip callback within 50ms. if err := util.IsTrueWithin(func() bool { return rng.GetMaxBytes() == zoneConfig.RangeMaxBytes }, 50*time.Millisecond); err != nil { t.Fatalf("failed to notice range max bytes update: %s", err) } // Verify that the range is in fact split (give it a second for very slow test machines). if err := util.IsTrueWithin(func() bool { newRng := store.LookupReplica(proto.Key("\xff\x00"), nil) return newRng != rng }, time.Second); err != nil { t.Errorf("expected range to split within 1s") } }
func getLatestConfig(s *server.TestServer, expected int) (cfg *config.SystemConfig, err error) { err = util.IsTrueWithin(func() bool { var err2 error cfg, err2 = s.Gossip().GetSystemConfig() if err2 != nil { return false } return len(cfg.Values) != expected }, 500*time.Millisecond) return }
// TestReplicateAfterSplit verifies that a new replica whose start key // is not KeyMin replicating to a fresh store can apply snapshots correctly. func TestReplicateAfterSplit(t *testing.T) { defer leaktest.AfterTest(t) mtc := startMultiTestContext(t, 2) defer mtc.Stop() rangeID := roachpb.RangeID(1) splitKey := roachpb.Key("m") key := roachpb.Key("z") store0 := mtc.stores[0] // Make the split splitArgs := adminSplitArgs(roachpb.KeyMin, splitKey) if _, err := client.SendWrapped(rg1(store0), nil, &splitArgs); err != nil { t.Fatal(err) } rangeID2 := store0.LookupReplica(roachpb.RKey(key), nil).RangeID if rangeID2 == rangeID { t.Errorf("got same range id after split") } // Issue an increment for later check. incArgs := incrementArgs(key, 11) if _, err := client.SendWrappedWith(rg1(store0), nil, roachpb.Header{ RangeID: rangeID2, }, &incArgs); err != nil { t.Fatal(err) } // Now add the second replica. mtc.replicateRange(rangeID2, 0, 1) if mtc.stores[1].LookupReplica(roachpb.RKey(key), nil).GetMaxBytes() == 0 { t.Error("Range MaxBytes is not set after snapshot applied") } // Once it catches up, the effects of increment commands can be seen. if err := util.IsTrueWithin(func() bool { getArgs := getArgs(key) // Reading on non-leader replica should use inconsistent read reply, err := client.SendWrappedWith(rg1(mtc.stores[1]), nil, roachpb.Header{ RangeID: rangeID2, ReadConsistency: roachpb.INCONSISTENT, }, &getArgs) if err != nil { return false } getResp := reply.(*roachpb.GetResponse) if log.V(1) { log.Infof("read value %d", mustGetInt(getResp.Value)) } return mustGetInt(getResp.Value) == 11 }, replicaReadTimeout); err != nil { t.Fatal(err) } }
// TestReplicateAfterSplit verifies that a new replica whose start key // is not KeyMin replicating to a fresh store can apply snapshots correctly. func TestReplicateAfterSplit(t *testing.T) { defer leaktest.AfterTest(t) mtc := startMultiTestContext(t, 2) defer mtc.Stop() rangeID := proto.RangeID(1) splitKey := proto.Key("m") key := proto.Key("z") store0 := mtc.stores[0] // Make the split splitArgs := adminSplitArgs(proto.KeyMin, splitKey, rangeID, store0.StoreID()) if _, err := store0.ExecuteCmd(context.Background(), &splitArgs); err != nil { t.Fatal(err) } rangeID2 := store0.LookupReplica(key, nil).Desc().RangeID if rangeID2 == rangeID { t.Errorf("got same range id after split") } // Issue an increment for later check. incArgs := incrementArgs(key, 11, rangeID2, store0.StoreID()) if _, err := store0.ExecuteCmd(context.Background(), &incArgs); err != nil { t.Fatal(err) } // Now add the second replica. mtc.replicateRange(rangeID2, 0, 1) if mtc.stores[1].LookupReplica(key, nil).GetMaxBytes() == 0 { t.Error("Range MaxBytes is not set after snapshot applied") } // Once it catches up, the effects of increment commands can be seen. if err := util.IsTrueWithin(func() bool { getArgs := getArgs(key, rangeID2, mtc.stores[1].StoreID()) // Reading on non-leader replica should use inconsistent read getArgs.ReadConsistency = proto.INCONSISTENT reply, err := mtc.stores[1].ExecuteCmd(context.Background(), &getArgs) if err != nil { return false } getResp := reply.(*proto.GetResponse) if log.V(1) { log.Infof("read value %d", mustGetInt(getResp.Value)) } return mustGetInt(getResp.Value) == 11 }, 1*time.Second); err != nil { t.Fatal(err) } }
func TestStopperNumTasks(t *testing.T) { defer leaktest.AfterTest(t) s := stop.NewStopper() var tasks []chan bool for i := 0; i < 3; i++ { c := make(chan bool) tasks = append(tasks, c) s.RunAsyncTask(func() { // Wait for channel to close <-c }) tm := s.RunningTasks() if numTypes, numTasks := len(tm), s.NumTasks(); numTypes != 1 || numTasks != i+1 { t.Errorf("stopper should have %d running tasks, got %d / %+v", i+1, numTasks, tm) } m := s.RunningTasks() if len(m) != 1 { t.Fatalf("expected exactly one task map entry: %+v", m) } for _, v := range m { if expNum := len(tasks); v != expNum { t.Fatalf("%d: expected %d tasks, got %d", i, expNum, v) } } } for i, c := range tasks { m := s.RunningTasks() if len(m) != 1 { t.Fatalf("%d: expected exactly one task map entry: %+v", i, m) } for _, v := range m { if expNum := len(tasks[i:]); v != expNum { t.Fatalf("%d: expected %d tasks, got %d:\n%s", i, expNum, v, m) } } // Close the channel to let the task proceed. close(c) expNum := len(tasks[i+1:]) err := util.IsTrueWithin(func() bool { return s.NumTasks() == expNum }, 20*time.Millisecond) if err != nil { t.Errorf("%d: stopper should have %d running tasks, got %d", i, expNum, s.NumTasks()) } } // The taskmap should've been cleared out. if m := s.RunningTasks(); len(m) != 0 { t.Fatalf("task map not empty: %+v", m) } s.Stop() }
// TestDelayedOffsetMeasurement tests that the client will record a // zero offset if the heartbeat reply exceeds the // maximumClockReadingDelay, but not the heartbeat timeout. func TestDelayedOffsetMeasurement(t *testing.T) { defer leaktest.AfterTest(t) stopper := util.NewStopper() defer stopper.Stop() serverManual := hlc.NewManualClock(10) serverClock := hlc.NewClock(serverManual.UnixNano) s := createTestServer(serverClock, stopper, t) heartbeat := &HeartbeatService{ clock: serverClock, remoteClockMonitor: newRemoteClockMonitor(serverClock), } if err := s.RegisterName("Heartbeat", heartbeat); err != nil { t.Fatalf("Unable to register heartbeat service: %s", err) } // Create a client that receives a heartbeat right after the // maximumClockReadingDelay. advancing := AdvancingClock{ time: 0, advancementInterval: maximumClockReadingDelay.Nanoseconds() + 1, } clientClock := hlc.NewClock(advancing.UnixNano) context := NewServerTestContext(clientClock, stopper) c := NewClient(s.Addr(), nil, context) <-c.Ready // Ensure we get a good heartbeat before continuing. if err := util.IsTrueWithin(c.IsHealthy, heartbeatInterval*10); err != nil { t.Fatal(err) } // Since the reply took too long, we should have a zero offset, even // though the client is still healthy because it received a heartbeat // reply. if o := c.RemoteOffset(); !o.Equal(proto.RemoteOffset{}) { t.Errorf("expected offset %v, actual %v", proto.RemoteOffset{}, o) } // Ensure the general offsets map was updated properly too. context.RemoteClocks.mu.Lock() if o, ok := context.RemoteClocks.offsets[c.addr.String()]; ok { t.Errorf("expected offset to not exist, but found %v", o) } context.RemoteClocks.mu.Unlock() }
func verifyCleanup(key proto.Key, coord *TxnCoordSender, eng engine.Engine, t *testing.T) { if len(coord.txns) != 0 { t.Errorf("expected empty transactions map; got %d", len(coord.txns)) } if err := util.IsTrueWithin(func() bool { meta := &engine.MVCCMetadata{} ok, _, _, err := eng.GetProto(engine.MVCCEncodeKey(key), meta) if err != nil { t.Errorf("error getting MVCC metadata: %s", err) } return !ok || meta.Txn == nil }, 500*time.Millisecond); err != nil { t.Errorf("expected intents to be cleaned up within 500ms") } }