func TestRebalanceInterval(t *testing.T) { defer leaktest.AfterTest(t)() if defaultMinRebalanceInterval <= gossip.DefaultGossipStoresInterval { t.Fatalf("defaultMinRebalanceInterval (%s) cannot be shorter than "+ "DefaultGossipStoresInterval (%s)", defaultMaxRebalanceInterval, gossip.DefaultGossipStoresInterval) } if defaultMaxRebalanceInterval < defaultMinRebalanceInterval { t.Fatalf("defaultMaxRebalanceInterval (%s) < defaultMinRebalanceInterval (%s)", defaultMinRebalanceInterval, defaultMaxRebalanceInterval) } stopper, g, _, a, manualClock := createTestAllocator() defer stopper.Stop() stores := []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 100, RangeCount: 100}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 100, RangeCount: 1}, }, } gossiputil.NewStoreGossiper(g).GossipStores(stores, t) a.options.Deterministic = true // Store currrent time before the first rebalance decision, to avoid flakiness // for the nextRebalance checks below. now := time.Unix(0, manualClock.UnixNano()) // The first ShouldRebalance call should always return true when there is an // imbalance. if a, e := a.ShouldRebalance(1), true; a != e { t.Errorf("ShouldRebalance returned %t != expected %t", a, e) } a.UpdateNextRebalance() backoff := a.nextRebalance.Sub(now) if backoff < defaultMinRebalanceInterval { t.Fatalf("nextRebalance interval (%s) < min (%s)", backoff, defaultMinRebalanceInterval) } if backoff > defaultMaxRebalanceInterval { t.Fatalf("nextRebalance interval (%s) > max (%s)", backoff, defaultMaxRebalanceInterval) } // We just rebalanced, so another rebalance should not be allowed. if a, e := a.ShouldRebalance(1), false; a != e { t.Errorf("ShouldRebalance returned %t != expected %t", a, e) } // Simulate the rebalance interval passing. manualClock.Increment(backoff.Nanoseconds()) if a, e := a.ShouldRebalance(1), true; a != e { t.Errorf("ShouldRebalance returned %t != expected %t", a, e) } }
// TestStoreRangeRemoveDead verifies that if a store becomes dead, the // ReplicateQueue will notice and remove any replicas on it. func TestStoreRangeRemoveDead(t *testing.T) { defer leaktest.AfterTest(t) mtc := &multiTestContext{} mtc.timeUntilStoreDead = storage.TestTimeUntilStoreDead mtc.Start(t, 3) defer mtc.Stop() sg := gossiputil.NewStoreGossiper(mtc.gossip) // Replicate the range to all stores. replica := mtc.stores[0].LookupReplica(roachpb.RKeyMin, nil) mtc.replicateRange(replica.Desc().RangeID, 0, 1, 2) // Initialize the gossip network. var storeIDs []roachpb.StoreID for _, s := range mtc.stores { storeIDs = append(storeIDs, s.StoreID()) } sg.GossipWithFunction(storeIDs, func() { for _, s := range mtc.stores { s.GossipStore() } }) aliveStoreIDs := []roachpb.StoreID{ mtc.stores[0].StoreID(), mtc.stores[1].StoreID(), } rangeDesc := getRangeMetadata(roachpb.RKeyMin, mtc, t) if e, a := 3, len(rangeDesc.Replicas); e != a { t.Fatalf("expected %d replicas, only found %d, rangeDesc: %+v", e, a, rangeDesc) } // This can't use SucceedsWithin as using the backoff mechanic won't work // as it requires a specific cadence of re-gossiping the alive stores to // maintain their alive status. ticker := time.NewTicker(storage.TestTimeUntilStoreDead / 2) defer ticker.Stop() maxTime := 5 * time.Second maxTimeout := time.After(maxTime) for len(getRangeMetadata(roachpb.RKeyMin, mtc, t).Replicas) > 2 { select { case <-maxTimeout: t.Fatalf("Failed to remove the dead replica within %s", maxTime) case <-ticker.C: // Keep gossiping the alive stores. sg.GossipWithFunction(aliveStoreIDs, func() { mtc.stores[0].GossipStore() mtc.stores[1].GossipStore() }) // Force the repair queues on all alive stores to run. mtc.stores[0].ForceReplicationScan(t) mtc.stores[1].ForceReplicationScan(t) } } ticker.Stop() }
func TestAllocatorTwoDatacenters(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, a, _ := createTestAllocator() defer stopper.Stop() gossiputil.NewStoreGossiper(g).GossipStores(multiDCStores, t) result1, err := a.AllocateTarget(multiDCConfig.ReplicaAttrs[0], []roachpb.ReplicaDescriptor{}, false, nil) if err != nil { t.Fatalf("Unable to perform allocation: %v", err) } result2, err := a.AllocateTarget(multiDCConfig.ReplicaAttrs[1], []roachpb.ReplicaDescriptor{}, false, nil) if err != nil { t.Fatalf("Unable to perform allocation: %v", err) } if result1.Node.NodeID != 1 || result2.Node.NodeID != 2 { t.Errorf("Expected nodes 1 & 2: %+v vs %+v", result1.Node, result2.Node) } // Verify that no result is forthcoming if we already have a replica. _, err = a.AllocateTarget(multiDCConfig.ReplicaAttrs[1], []roachpb.ReplicaDescriptor{ { NodeID: result2.Node.NodeID, StoreID: result2.StoreID, }, }, false, nil) if err == nil { t.Errorf("expected error on allocation without available stores") } }
// createCluster generates a new cluster using the provided stopper and the // number of nodes supplied. Each node will have one store to start. func createCluster(stopper *stop.Stopper, nodeCount int) *Cluster { rand, seed := randutil.NewPseudoRand() clock := hlc.NewClock(hlc.UnixNano) rpcContext := rpc.NewContext(&base.Context{}, clock, stopper) g := gossip.New(rpcContext, gossip.TestInterval, gossip.TestBootstrap) storePool := storage.NewStorePool(g, storage.TestTimeUntilStoreDeadOff, stopper) c := &Cluster{ stopper: stopper, clock: clock, rpc: rpcContext, gossip: g, storePool: storePool, allocator: storage.MakeAllocator(storePool, storage.RebalancingOptions{}), storeGossiper: gossiputil.NewStoreGossiper(g), nodes: make(map[proto.NodeID]*Node), stores: make(map[proto.StoreID]*Store), ranges: make(map[proto.RangeID]*Range), rand: rand, seed: seed, } // Add the nodes. for i := 0; i < nodeCount; i++ { c.addNewNodeWithStore() } // Add a single range and add to this first node's first store. firstRange := c.addRange() firstRange.attachRangeToStore(c.stores[proto.StoreID(0)]) return c }
// createCluster generates a new cluster using the provided stopper and the // number of nodes supplied. Each node will have one store to start. func createCluster( stopper *stop.Stopper, nodeCount int, epochWriter, actionWriter io.Writer, script Script, rand *rand.Rand, ) *Cluster { clock := hlc.NewClock(hlc.UnixNano) rpcContext := rpc.NewContext(nil, clock, stopper) g := gossip.New(rpcContext, nil, stopper) // NodeID is required for Gossip, so set it to -1 for the cluster Gossip // instance to prevent conflicts with real NodeIDs. g.SetNodeID(-1) storePool := storage.NewStorePool(g, clock, storage.TestTimeUntilStoreDeadOff, stopper) c := &Cluster{ stopper: stopper, clock: clock, rpc: rpcContext, gossip: g, storePool: storePool, allocator: storage.MakeAllocator(storePool, storage.AllocatorOptions{ AllowRebalance: true, Deterministic: true, }), storeGossiper: gossiputil.NewStoreGossiper(g), nodes: make(map[roachpb.NodeID]*Node), stores: make(map[roachpb.StoreID]*Store), ranges: make(map[roachpb.RangeID]*Range), rangeIDsByStore: make(map[roachpb.StoreID]roachpb.RangeIDSlice), rand: rand, epochWriter: tabwriter.NewWriter(epochWriter, 8, 1, 2, ' ', 0), actionWriter: tabwriter.NewWriter(actionWriter, 8, 1, 2, ' ', 0), script: script, epoch: -1, } // Add the nodes. for i := 0; i < nodeCount; i++ { c.addNewNodeWithStore() } // Add a single range and add to this first node's first store. firstRange := c.addRange() firstRange.addReplica(c.stores[0]) c.calculateRangeIDsByStore() // Output the first epoch header. c.epoch = 0 c.OutputEpochHeader() c.OutputEpoch() c.flush() return c }
// TestAllocatorRebalanceByCount verifies that rebalance targets are // chosen by range counts in the event that available capacities // exceed the maxAvailCapacityThreshold. func TestAllocatorRebalanceByCount(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, a, _ := createTestAllocator() defer stopper.Stop() // Setup the stores so that only one is below the standard deviation threshold. stores := []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 100, RangeCount: 10}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 99, RangeCount: 10}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 98, RangeCount: 10}, }, { StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 98, RangeCount: 5}, }, } gossiputil.NewStoreGossiper(g).GossipStores(stores, t) // Every rebalance target must be store 4 (or nil for case of missing the only option). for i := 0; i < 10; i++ { result := a.RebalanceTarget(roachpb.Attributes{}, []roachpb.ReplicaDescriptor{{StoreID: 1}}, 0) if result != nil && result.StoreID != 4 { t.Errorf("expected store 4; got %d", result.StoreID) } } // Verify shouldRebalance results. a.options.Deterministic = true for i, store := range stores { desc, ok := a.storePool.getStoreDescriptor(store.StoreID) if !ok { t.Fatalf("%d: unable to get store %d descriptor", i, store.StoreID) } sl, _, _ := a.storePool.getStoreList(roachpb.Attributes{}, true) result := a.shouldRebalance(desc, sl) if expResult := (i < 3); expResult != result { t.Errorf("%d: expected rebalance %t; got %t", i, expResult, result) } } }
func TestAllocatorSimpleRetrieval(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, a, _ := createTestAllocator() defer stopper.Stop() gossiputil.NewStoreGossiper(g).GossipStores(singleStore, t) result, err := a.AllocateTarget(simpleZoneConfig.ReplicaAttrs[0], []roachpb.ReplicaDescriptor{}, false, nil) if err != nil { t.Fatalf("Unable to perform allocation: %v", err) } if result.Node.NodeID != 1 || result.StoreID != 1 { t.Errorf("expected NodeID 1 and StoreID 1: %+v", result) } }
// TestAllocatorThrottled ensures that when a store is throttled, the replica // will not be sent to purgatory. func TestAllocatorThrottled(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, a, _ := createTestAllocator() defer stopper.Stop() // First test to make sure we would send the replica to purgatory. _, err := a.AllocateTarget( simpleZoneConfig.ReplicaAttrs[0], []roachpb.ReplicaDescriptor{}, false, nil, ) if _, ok := err.(purgatoryError); !ok { t.Fatalf("expected a purgatory error, got: %v", err) } // Second, test the normal case in which we can allocate to the store. gossiputil.NewStoreGossiper(g).GossipStores(singleStore, t) result, err := a.AllocateTarget( simpleZoneConfig.ReplicaAttrs[0], []roachpb.ReplicaDescriptor{}, false, nil, ) if err != nil { t.Fatalf("unable to perform allocation: %v", err) } if result.Node.NodeID != 1 || result.StoreID != 1 { t.Errorf("expected NodeID 1 and StoreID 1: %+v", result) } // Finally, set that store to be throttled and ensure we don't send the // replica to purgatory. a.storePool.mu.Lock() storeDetail, ok := a.storePool.mu.stores[singleStore[0].StoreID] if !ok { t.Fatalf("store:%d was not found in the store pool", singleStore[0].StoreID) } storeDetail.throttledUntil = timeutil.Now().Add(24 * time.Hour) a.storePool.mu.Unlock() _, err = a.AllocateTarget( simpleZoneConfig.ReplicaAttrs[0], []roachpb.ReplicaDescriptor{}, false, nil, ) if _, ok := err.(purgatoryError); ok { t.Fatalf("expected a non purgatory error, got: %v", err) } }
// TestAllocatorRebalance verifies that rebalance targets are chosen // randomly from amongst stores over the minAvailCapacityThreshold. func TestAllocatorRebalance(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, a := createTestAllocator() defer stopper.Stop() stores := []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 100}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 50}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 100 - int64(100*maxFractionUsedThreshold)}, }, { StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: (100 - int64(100*maxFractionUsedThreshold)) / 2}, }, } gossiputil.NewStoreGossiper(g).GossipStores(stores, t) // Every rebalance target must be either stores 1 or 2. for i := 0; i < 10; i++ { result := a.RebalanceTarget(3, roachpb.Attributes{}, []roachpb.ReplicaDescriptor{}) if result == nil { t.Fatal("nil result") } if result.StoreID != 1 && result.StoreID != 2 { t.Errorf("%d: expected store 1 or 2; got %d", i, result.StoreID) } } // Verify ShouldRebalance results. a.options.Deterministic = true for i, store := range stores { result := a.ShouldRebalance(store.StoreID) if expResult := (i >= 2); expResult != result { t.Errorf("%d: expected rebalance %t; got %t", i, expResult, result) } } }
func TestStorePoolGetStoreDetails(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, sp := createTestStorePool(TestTimeUntilStoreDeadOff) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) sg.GossipStores(uniqueStore, t) if detail := sp.getStoreDetail(roachpb.StoreID(1)); detail.dead { t.Errorf("Present storeDetail came back as dead, expected it to be alive. %+v", detail) } if detail := sp.getStoreDetail(roachpb.StoreID(2)); detail.dead { t.Errorf("Absent storeDetail came back as dead, expected it to be alive. %+v", detail) } }
// createCluster generates a new cluster using the provided stopper and the // number of nodes supplied. Each node will have one store to start. func createCluster(stopper *stop.Stopper, nodeCount int, epochWriter, actionWriter io.Writer, script Script) *Cluster { rand, seed := randutil.NewPseudoRand() clock := hlc.NewClock(hlc.UnixNano) rpcContext := rpc.NewContext(&base.Context{}, clock, stopper) g := gossip.New(rpcContext, gossip.TestInterval, gossip.TestBootstrap) storePool := storage.NewStorePool(g, storage.TestTimeUntilStoreDeadOff, stopper) c := &Cluster{ stopper: stopper, clock: clock, rpc: rpcContext, gossip: g, storePool: storePool, allocator: storage.MakeAllocator(storePool, storage.RebalancingOptions{ AllowRebalance: true, Deterministic: true, }), storeGossiper: gossiputil.NewStoreGossiper(g), nodes: make(map[roachpb.NodeID]*Node), stores: make(map[roachpb.StoreID]*Store), ranges: make(map[roachpb.RangeID]*Range), rangeIDsByStore: make(map[roachpb.StoreID]roachpb.RangeIDSlice), rand: rand, seed: seed, epochWriter: tabwriter.NewWriter(epochWriter, 8, 1, 2, ' ', 0), actionWriter: tabwriter.NewWriter(actionWriter, 8, 1, 2, ' ', 0), script: script, epoch: -1, } // Add the nodes. for i := 0; i < nodeCount; i++ { c.addNewNodeWithStore() } // Add a single range and add to this first node's first store. firstRange := c.addRange() firstRange.addReplica(c.stores[0]) c.calculateRangeIDsByStore() // Output the first epoch header. c.epoch = 0 c.OutputEpochHeader() c.OutputEpoch() c.flush() return c }
// TestAllocatorRebalance verifies that only rebalance targets within // a standard deviation of the mean are chosen. func TestAllocatorRebalanceByCapacity(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, a := createTestAllocator() defer stopper.Stop() stores := []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 50}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 50}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 50}, }, { StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 80}, }, } gossiputil.NewStoreGossiper(g).GossipStores(stores, t) // Every rebalance target must be store 4 (if not nil). for i := 0; i < 10; i++ { result := a.RebalanceTarget(1, roachpb.Attributes{}, []roachpb.ReplicaDescriptor{}) if result != nil && result.StoreID != 4 { t.Errorf("expected store 4; got %d", result.StoreID) } } // Verify ShouldRebalance results. a.options.Deterministic = true for i, store := range stores { result := a.ShouldRebalance(store.StoreID) if expResult := (i < 3); expResult != result { t.Errorf("%d: expected rebalance %t; got %t", i, expResult, result) } } }
// TestAllocatorRebalanceByCount verifies that rebalance targets are // chosen by range counts in the event that available capacities // exceed the maxAvailCapacityThreshold. func TestAllocatorRebalanceByCount(t *testing.T) { defer leaktest.AfterTest(t) stopper, g, _, a := createTestAllocator() defer stopper.Stop() // Setup the stores so that only one is below the standard deviation threshold. stores := []*proto.StoreDescriptor{ { StoreID: 1, Node: proto.NodeDescriptor{NodeID: 1}, Capacity: proto.StoreCapacity{Capacity: 100, Available: 100, RangeCount: 10}, }, { StoreID: 2, Node: proto.NodeDescriptor{NodeID: 2}, Capacity: proto.StoreCapacity{Capacity: 100, Available: 99, RangeCount: 10}, }, { StoreID: 3, Node: proto.NodeDescriptor{NodeID: 3}, Capacity: proto.StoreCapacity{Capacity: 100, Available: 98, RangeCount: 10}, }, { StoreID: 4, Node: proto.NodeDescriptor{NodeID: 4}, Capacity: proto.StoreCapacity{Capacity: 100, Available: 97, RangeCount: 5}, }, } gossiputil.NewStoreGossiper(g).GossipStores(stores, t) // Every rebalance target must be store 4 (or nil for case of missing the only option). for i := 0; i < 10; i++ { result := a.RebalanceTarget(proto.Attributes{}, []proto.Replica{}) if result != nil && result.StoreID != 4 { t.Errorf("expected store 4; got %d", result.StoreID) } } // Verify shouldRebalance results. for i, store := range stores { result := a.shouldRebalance(store) if expResult := (i < 3); expResult != result { t.Errorf("%d: expected rebalance %t; got %t", i, expResult, result) } } }
// TestAllocatorRelaxConstraints verifies that attribute constraints // will be relaxed in order to match nodes lacking required attributes, // if necessary to find an allocation target. func TestAllocatorRelaxConstraints(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, a, _ := createTestAllocator() defer stopper.Stop() gossiputil.NewStoreGossiper(g).GossipStores(multiDCStores, t) testCases := []struct { required []string // attribute strings existing []int // existing store/node ID relaxConstraints bool // allow constraints to be relaxed? expID int // expected store/node ID on allocate expErr bool }{ // The two stores in the system have attributes: // storeID=1 {"a", "ssd"} // storeID=2 {"b", "ssd"} {[]string{"a", "ssd"}, []int{}, true, 1, false}, {[]string{"a", "ssd"}, []int{1}, true, 2, false}, {[]string{"a", "ssd"}, []int{1}, false, 0, true}, {[]string{"a", "ssd"}, []int{1, 2}, true, 0, true}, {[]string{"b", "ssd"}, []int{}, true, 2, false}, {[]string{"b", "ssd"}, []int{1}, true, 2, false}, {[]string{"b", "ssd"}, []int{2}, false, 0, true}, {[]string{"b", "ssd"}, []int{2}, true, 1, false}, {[]string{"b", "ssd"}, []int{1, 2}, true, 0, true}, {[]string{"b", "hdd"}, []int{}, true, 2, false}, {[]string{"b", "hdd"}, []int{2}, true, 1, false}, {[]string{"b", "hdd"}, []int{2}, false, 0, true}, {[]string{"b", "hdd"}, []int{1, 2}, true, 0, true}, {[]string{"b", "ssd", "gpu"}, []int{}, true, 2, false}, {[]string{"b", "hdd", "gpu"}, []int{}, true, 2, false}, } for i, test := range testCases { var existing []roachpb.ReplicaDescriptor for _, id := range test.existing { existing = append(existing, roachpb.ReplicaDescriptor{NodeID: roachpb.NodeID(id), StoreID: roachpb.StoreID(id)}) } result, err := a.AllocateTarget(roachpb.Attributes{Attrs: test.required}, existing, test.relaxConstraints, nil) if haveErr := (err != nil); haveErr != test.expErr { t.Errorf("%d: expected error %t; got %t: %s", i, test.expErr, haveErr, err) } else if err == nil && roachpb.StoreID(test.expID) != result.StoreID { t.Errorf("%d: expected result to have store %d; got %+v", i, test.expID, result) } } }
func TestAllocatorExistingReplica(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, a, _ := createTestAllocator() defer stopper.Stop() gossiputil.NewStoreGossiper(g).GossipStores(sameDCStores, t) result, err := a.AllocateTarget(multiDisksConfig.ReplicaAttrs[1], []roachpb.ReplicaDescriptor{ { NodeID: 2, StoreID: 2, }, }, false, nil) if err != nil { t.Fatalf("Unable to perform allocation: %v", err) } if result.Node.NodeID != 3 || result.StoreID != 4 { t.Errorf("expected result to have node 3 and store 4: %+v", result) } }
// TestAllocatorRandomAllocation verifies that allocations bias // towards least loaded stores. func TestAllocatorRandomAllocation(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, a := createTestAllocator() defer stopper.Stop() stores := []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 150}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 50}, }, { StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 0}, }, } gossiputil.NewStoreGossiper(g).GossipStores(stores, t) // Every allocation will randomly choose 3 of the 4, meaning either // store 1 or store 2 will be chosen, as the least loaded of the // three random choices is returned. for i := 0; i < 10; i++ { result, err := a.AllocateTarget(roachpb.Attributes{}, []roachpb.ReplicaDescriptor{}, false, nil) if err != nil { t.Fatal(err) } if result.StoreID != 1 && result.StoreID != 2 { t.Errorf("expected store 1 or 2; got %d", result.StoreID) } } }
// TestStorePoolGossipUpdate ensures that the gossip callback in StorePool // correctly updates a store's details. func TestStorePoolGossipUpdate(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, sp := createTestStorePool(TestTimeUntilStoreDead) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) sp.mu.RLock() if _, ok := sp.stores[2]; ok { t.Fatalf("store 2 is already in the pool's store list") } sp.mu.RUnlock() sg.GossipStores(uniqueStore, t) sp.mu.RLock() if _, ok := sp.stores[2]; !ok { t.Fatalf("store 2 isn't in the pool's store list") } if e, a := 1, sp.queue.Len(); e > a { t.Fatalf("wrong number of stores in the queue expected at least:%d actual:%d", e, a) } sp.mu.RUnlock() }
func TestAllocatorThreeDisksSameDC(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, a, _ := createTestAllocator() defer stopper.Stop() gossiputil.NewStoreGossiper(g).GossipStores(sameDCStores, t) result1, err := a.AllocateTarget(multiDisksConfig.ReplicaAttrs[0], []roachpb.ReplicaDescriptor{}, false, nil) if err != nil { t.Fatalf("Unable to perform allocation: %v", err) } if result1.StoreID != 1 && result1.StoreID != 2 { t.Errorf("Expected store 1 or 2; got %+v", result1) } exReplicas := []roachpb.ReplicaDescriptor{ { NodeID: result1.Node.NodeID, StoreID: result1.StoreID, }, } result2, err := a.AllocateTarget(multiDisksConfig.ReplicaAttrs[1], exReplicas, false, nil) if err != nil { t.Errorf("Unable to perform allocation: %v", err) } if result2.StoreID != 3 && result2.StoreID != 4 { t.Errorf("Expected store 3 or 4; got %+v", result2) } if result1.Node.NodeID == result2.Node.NodeID { t.Errorf("Expected node ids to be different %+v vs %+v", result1, result2) } result3, err := a.AllocateTarget(multiDisksConfig.ReplicaAttrs[2], []roachpb.ReplicaDescriptor{}, false, nil) if err != nil { t.Errorf("Unable to perform allocation: %v", err) } if result3.Node.NodeID != 4 || result3.StoreID != 5 { t.Errorf("Expected node 4, store 5; got %+v", result3) } }
// TestStoreRangeRebalance verifies that the replication queue will take // rebalancing opportunities and add a new replica on another store. func TestStoreRangeRebalance(t *testing.T) { defer leaktest.AfterTest(t) // Start multiTestContext with replica rebalancing enabled. mtc := &multiTestContext{ storeContext: &storage.StoreContext{}, } *mtc.storeContext = storage.TestStoreContext mtc.storeContext.AllocatorOptions = storage.AllocatorOptions{ AllowRebalance: true, Deterministic: true, } // Four stores. mtc.Start(t, 4) defer mtc.Stop() // Replicate the first range to the first three stores. store0 := mtc.stores[0] replica := store0.LookupReplica(roachpb.RKeyMin, nil) desc := replica.Desc() mtc.replicateRange(desc.RangeID, 1, 2) // Initialize the gossip network with fake capacity data. storeDescs := make([]*roachpb.StoreDescriptor, 0, len(mtc.stores)) for _, s := range mtc.stores { desc, err := s.Descriptor() if err != nil { t.Fatal(err) } desc.Capacity.Capacity = 1024 * 1024 desc.Capacity.Available = 1024 * 1024 // Make sure store[1] is chosen as removal target. if desc.StoreID == mtc.stores[1].StoreID() { desc.Capacity.Available = 0 } storeDescs = append(storeDescs, desc) } sg := gossiputil.NewStoreGossiper(mtc.gossip) sg.GossipStores(storeDescs, t) // This can't use SucceedsWithin as using the exponential backoff mechanic // won't work well with the forced replication scans. maxTimeout := time.After(5 * time.Second) succeeded := false for !succeeded { select { case <-maxTimeout: t.Fatal("Failed to rebalance replica within 5 seconds") case <-time.After(10 * time.Millisecond): // Look up the official range descriptor, make sure fourth store is on it. rangeDesc := getRangeMetadata(roachpb.RKeyMin, mtc, t) // Test if we have already succeeded. for _, repl := range rangeDesc.Replicas { if repl.StoreID == mtc.stores[3].StoreID() { succeeded = true } } if succeeded { break } mtc.expireLeaderLeases() mtc.stores[1].ForceReplicationScanAndProcess() } } }
func TestStorePoolFindDeadReplicas(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, mc, sp := createTestStorePool(TestTimeUntilStoreDead) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) stores := []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, }, { StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, }, { StoreID: 5, Node: roachpb.NodeDescriptor{NodeID: 5}, }, } replicas := []roachpb.ReplicaDescriptor{ { NodeID: 1, StoreID: 1, ReplicaID: 1, }, { NodeID: 2, StoreID: 2, ReplicaID: 2, }, { NodeID: 3, StoreID: 3, ReplicaID: 4, }, { NodeID: 4, StoreID: 5, ReplicaID: 4, }, { NodeID: 5, StoreID: 5, ReplicaID: 5, }, } sg.GossipStores(stores, t) deadReplicas := sp.deadReplicas(replicas) if len(deadReplicas) > 0 { t.Fatalf("expected no dead replicas initially, found %d (%v)", len(deadReplicas), deadReplicas) } // Timeout all stores, but specifically store 5. waitUntilDead(t, mc, sp, 5) // Resurrect all stores except for 4 and 5. sg.GossipStores(stores[:3], t) deadReplicas = sp.deadReplicas(replicas) if a, e := deadReplicas, replicas[3:]; !reflect.DeepEqual(a, e) { t.Fatalf("findDeadReplicas did not return expected values; got \n%v, expected \n%v", a, e) } }
// TestStorePoolGetStoreList ensures that the store list returns only stores // that are alive and match the attribute criteria. func TestStorePoolGetStoreList(t *testing.T) { defer leaktest.AfterTest(t)() // We're going to manually mark stores dead in this test. stopper, g, _, sp := createTestStorePool(TestTimeUntilStoreDeadOff) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) required := []string{"ssd", "dc"} // Nothing yet. if sl := sp.getStoreList(roachpb.Attributes{Attrs: required}, false); len(sl.stores) != 0 { t.Errorf("expected no stores, instead %+v", sl.stores) } matchingStore := roachpb.StoreDescriptor{ StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, Attrs: roachpb.Attributes{Attrs: required}, } supersetStore := roachpb.StoreDescriptor{ StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 1}, Attrs: roachpb.Attributes{Attrs: append(required, "db")}, } unmatchingStore := roachpb.StoreDescriptor{ StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 1}, Attrs: roachpb.Attributes{Attrs: []string{"ssd", "otherdc"}}, } emptyStore := roachpb.StoreDescriptor{ StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 1}, Attrs: roachpb.Attributes{}, } deadStore := roachpb.StoreDescriptor{ StoreID: 5, Node: roachpb.NodeDescriptor{NodeID: 1}, Attrs: roachpb.Attributes{Attrs: required}, } // Mark all alive initially. sg.GossipStores([]*roachpb.StoreDescriptor{ &matchingStore, &supersetStore, &unmatchingStore, &emptyStore, &deadStore, }, t) if err := verifyStoreList(sp, required, []int{ int(matchingStore.StoreID), int(supersetStore.StoreID), int(deadStore.StoreID), }); err != nil { t.Error(err) } // Mark one store dead. sp.mu.Lock() sp.stores[deadStore.StoreID].markDead(sp.clock.Now()) sp.mu.Unlock() if err := verifyStoreList(sp, required, []int{ int(matchingStore.StoreID), int(supersetStore.StoreID), }); err != nil { t.Error(err) } }
// TestStorePoolDies ensures that a store is marked as dead after it // times out and that it will be revived after a new update is received. func TestStorePoolDies(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, mc, sp := createTestStorePool(TestTimeUntilStoreDead) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) sg.GossipStores(uniqueStore, t) { sp.mu.RLock() store2, ok := sp.stores[2] if !ok { t.Fatalf("store 2 isn't in the pool's store list") } if store2.dead { t.Errorf("store 2 is dead before it times out") } if e, a := 0, store2.timesDied; e != a { t.Errorf("store 2 has been counted dead %d times, expected %d", a, e) } if store2.index == -1 { t.Errorf("store 2 is mot the queue, it should be") } if e, a := 1, sp.queue.Len(); e > a { t.Errorf("wrong number of stores in the queue expected to be at least:%d actual:%d", e, a) } sp.mu.RUnlock() } // Timeout store 2. waitUntilDead(t, mc, sp, 2) { sp.mu.RLock() store2, ok := sp.stores[2] if !ok { t.Fatalf("store 2 isn't in the pool's store list") } if e, a := 1, store2.timesDied; e != a { t.Errorf("store 2 has been counted dead %d times, expected %d", a, e) } if store2.index != -1 { t.Errorf("store 2 is in the queue, it shouldn't be") } sp.mu.RUnlock() } sg.GossipStores(uniqueStore, t) { sp.mu.RLock() store2, ok := sp.stores[2] if !ok { t.Fatalf("store 2 isn't in the pool's store list") } if store2.dead { t.Errorf("store 2 is dead still, it should be alive") } if e, a := 1, store2.timesDied; e != a { t.Errorf("store 2 has been counted dead %d times, expected %d", a, e) } if store2.index == -1 { t.Errorf("store 2 is mot the queue, it should be") } sp.mu.RUnlock() } // Timeout store 2 again. waitUntilDead(t, mc, sp, 2) { sp.mu.RLock() store2, ok := sp.stores[2] if !ok { t.Fatalf("store 2 isn't in the pool's store list") } if e, a := 2, store2.timesDied; e != a { t.Errorf("store 2 has been counted dead %d times, expected %d", a, e) } if store2.index != -1 { t.Errorf("store 2 is in the queue, it shouldn't be") } sp.mu.RUnlock() } }
// TestAllocatorRemoveTarget verifies that the replica chosen by RemoveTarget is // the one with the lowest capacity. func TestAllocatorRemoveTarget(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, a, _ := createTestAllocator() defer stopper.Stop() // List of replicas that will be passed to RemoveTarget replicas := []roachpb.ReplicaDescriptor{ { StoreID: 1, NodeID: 1, ReplicaID: 1, }, { StoreID: 2, NodeID: 2, ReplicaID: 2, }, { StoreID: 3, NodeID: 3, ReplicaID: 3, }, { StoreID: 4, NodeID: 4, ReplicaID: 4, }, } // Setup the stores so that store 3 is the worst candidate. stores := []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 100, RangeCount: 10}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 80, RangeCount: 10}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 60, RangeCount: 15}, }, { StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 65, RangeCount: 10}, }, } sg := gossiputil.NewStoreGossiper(g) sg.GossipStores(stores, t) targetRepl, err := a.RemoveTarget(replicas) if err != nil { t.Fatal(err) } if a, e := targetRepl, replicas[2]; a != e { t.Fatalf("RemoveTarget did not select expected replica; expected %v, got %v", e, a) } }
// TestStorePoolGetStoreList ensures that the store list returns only stores // that are alive and match the attribute criteria. func TestStorePoolGetStoreList(t *testing.T) { defer leaktest.AfterTest(t) stopper, g, sp := createTestStorePool(TestTimeUntilStoreDead) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) required := []string{"ssd", "dc"} // Nothing yet. if sl := sp.getStoreList(roachpb.Attributes{Attrs: required}, false); len(sl.stores) != 0 { t.Errorf("expected no stores, instead %+v", sl.stores) } matchingStore := roachpb.StoreDescriptor{ StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, Attrs: roachpb.Attributes{Attrs: required}, } supersetStore := roachpb.StoreDescriptor{ StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 1}, Attrs: roachpb.Attributes{Attrs: append(required, "db")}, } unmatchingStore := roachpb.StoreDescriptor{ StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 1}, Attrs: roachpb.Attributes{Attrs: []string{"ssd", "otherdc"}}, } emptyStore := roachpb.StoreDescriptor{ StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 1}, Attrs: roachpb.Attributes{}, } deadStore := roachpb.StoreDescriptor{ StoreID: 5, Node: roachpb.NodeDescriptor{NodeID: 1}, Attrs: roachpb.Attributes{Attrs: required}, } sg.GossipStores([]*roachpb.StoreDescriptor{ &matchingStore, &supersetStore, &unmatchingStore, &emptyStore, &deadStore, }, t) if err := verifyStoreList(sp, required, []int{ int(matchingStore.StoreID), int(supersetStore.StoreID), int(deadStore.StoreID), }); err != nil { t.Error(err) } // Timeout all stores, but specifically store 5. waitUntilDead(t, sp, 5) // Resurrect all stores except for 5. sg.GossipStores([]*roachpb.StoreDescriptor{ &matchingStore, &supersetStore, &unmatchingStore, &emptyStore, }, t) if err := verifyStoreList(sp, required, []int{ int(matchingStore.StoreID), int(supersetStore.StoreID), }); err != nil { t.Error(err) } }
// TestAllocatorRebalance verifies that rebalance targets are chosen // randomly from amongst stores over the minAvailCapacityThreshold. func TestAllocatorRebalance(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, a, _ := createTestAllocator() defer stopper.Stop() stores := []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 100, RangeCount: 1}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, Capacity: roachpb.StoreCapacity{Capacity: 100, Available: 50, RangeCount: 1}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, Capacity: roachpb.StoreCapacity{ Capacity: 100, Available: 100 - int64(100*maxFractionUsedThreshold), RangeCount: 5, }, }, { // This store must not be rebalanced to, because it's too full. StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, Capacity: roachpb.StoreCapacity{ Capacity: 100, Available: (100 - int64(100*maxFractionUsedThreshold)) / 2, RangeCount: 10, }, }, { // This store will not be rebalanced to, because it already has more // replicas than the mean range count. StoreID: 5, Node: roachpb.NodeDescriptor{NodeID: 5}, Capacity: roachpb.StoreCapacity{ Capacity: 100, Available: 100, RangeCount: 10, }, }, } gossiputil.NewStoreGossiper(g).GossipStores(stores, t) // Every rebalance target must be either stores 1 or 2. for i := 0; i < 10; i++ { result := a.RebalanceTarget(roachpb.Attributes{}, []roachpb.ReplicaDescriptor{{StoreID: 3}}, 0) if result == nil { t.Fatal("nil result") } if result.StoreID != 1 && result.StoreID != 2 { t.Errorf("%d: expected store 1 or 2; got %d", i, result.StoreID) } } // Verify shouldRebalance results. a.options.Deterministic = true for i, store := range stores { desc, ok := a.storePool.getStoreDescriptor(store.StoreID) if !ok { t.Fatalf("%d: unable to get store %d descriptor", i, store.StoreID) } sl, _, _ := a.storePool.getStoreList(roachpb.Attributes{}, true) result := a.shouldRebalance(desc, sl) if expResult := (i >= 2); expResult != result { t.Errorf("%d: expected rebalance %t; got %t", i, expResult, result) } } }
// TestAllocatorRemoveTarget verifies that the replica chosen by RemoveTarget is // the one with the lowest capacity. func TestAllocatorRemoveTarget(t *testing.T) { defer leaktest.AfterTest(t) stopper, g, _, a := createTestAllocator() defer stopper.Stop() // List of replicas that will be passed to RemoveTarget replicas := []proto.Replica{ { StoreID: 1, NodeID: 1, ReplicaID: 1, }, { StoreID: 2, NodeID: 2, ReplicaID: 2, }, { StoreID: 3, NodeID: 3, ReplicaID: 3, }, { StoreID: 4, NodeID: 4, ReplicaID: 4, }, } // Setup the stores so that store 3 is the worst candidate. stores := []*proto.StoreDescriptor{ { StoreID: 1, Node: proto.NodeDescriptor{NodeID: 1}, Capacity: proto.StoreCapacity{Capacity: 100, Available: 100, RangeCount: 10}, }, { StoreID: 2, Node: proto.NodeDescriptor{NodeID: 2}, Capacity: proto.StoreCapacity{Capacity: 100, Available: 80, RangeCount: 10}, }, { StoreID: 3, Node: proto.NodeDescriptor{NodeID: 3}, Capacity: proto.StoreCapacity{Capacity: 100, Available: 60, RangeCount: 10}, }, { StoreID: 4, Node: proto.NodeDescriptor{NodeID: 4}, Capacity: proto.StoreCapacity{Capacity: 100, Available: 65, RangeCount: 5}, }, } sg := gossiputil.NewStoreGossiper(g) sg.GossipStores(stores, t) targetRepl, err := a.RemoveTarget(replicas) if err != nil { t.Fatal(err) } if a, e := targetRepl, replicas[2]; a != e { t.Fatalf("RemoveTarget did not select expected replica; expected %v, got %v", e, a) } // Setup the stores again so that store 2 is the worst, but with very low // used capacity to force the range count criteria to be used. stores = []*proto.StoreDescriptor{ { StoreID: 1, Node: proto.NodeDescriptor{NodeID: 1}, Capacity: proto.StoreCapacity{Capacity: 100, Available: 100, RangeCount: 10}, }, { StoreID: 2, Node: proto.NodeDescriptor{NodeID: 2}, Capacity: proto.StoreCapacity{Capacity: 100, Available: 100, RangeCount: 18}, }, { StoreID: 3, Node: proto.NodeDescriptor{NodeID: 3}, Capacity: proto.StoreCapacity{Capacity: 100, Available: 100, RangeCount: 10}, }, { StoreID: 4, Node: proto.NodeDescriptor{NodeID: 4}, Capacity: proto.StoreCapacity{Capacity: 100, Available: 100, RangeCount: 5}, }, } sg.GossipStores(stores, t) targetRepl, err = a.RemoveTarget(replicas) if err != nil { t.Fatal(err) } if a, e := targetRepl, replicas[1]; a != e { t.Fatalf("RemoveTarget did not select expected replica; expected %v, got %v", e, a) } }