func TestStorePoolThrottle(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, sp, _ := createTestStorePool( TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) sg.GossipStores(uniqueStore, t) { expected := sp.clock.Now().GoTime().Add(sp.declinedReservationsTimeout) sp.throttle(throttleDeclined, 1) sp.mu.Lock() detail := sp.getStoreDetailLocked(1) sp.mu.Unlock() if !detail.throttledUntil.Equal(expected) { t.Errorf("expected store to have been throttled to %v, found %v", expected, detail.throttledUntil) } } { expected := sp.clock.Now().GoTime().Add(sp.failedReservationsTimeout) sp.throttle(throttleFailed, 1) sp.mu.Lock() detail := sp.getStoreDetailLocked(1) sp.mu.Unlock() if !detail.throttledUntil.Equal(expected) { t.Errorf("expected store to have been throttled to %v, found %v", expected, detail.throttledUntil) } } }
func TestGetNodeLocalities(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, sp, _ := createTestStorePool( TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) // Creates a node with a locality with the number of tiers passed in. The // NodeID is the same as the tier count. createDescWithLocality := func(tierCount int) roachpb.NodeDescriptor { nodeDescriptor := roachpb.NodeDescriptor{NodeID: roachpb.NodeID(tierCount)} for i := 1; i <= tierCount; i++ { value := fmt.Sprintf("%d", i) nodeDescriptor.Locality.Tiers = append(nodeDescriptor.Locality.Tiers, roachpb.Tier{ Key: value, Value: value, }) } return nodeDescriptor } stores := []*roachpb.StoreDescriptor{ { StoreID: 1, Node: createDescWithLocality(1), }, { StoreID: 2, Node: createDescWithLocality(2), }, { StoreID: 3, Node: createDescWithLocality(3), }, { StoreID: 4, Node: createDescWithLocality(2), }, } sg.GossipStores(stores, t) var existingReplicas []roachpb.ReplicaDescriptor for _, store := range stores { existingReplicas = append(existingReplicas, roachpb.ReplicaDescriptor{NodeID: store.Node.NodeID}) } localities := sp.getNodeLocalities(existingReplicas) for _, store := range stores { locality, ok := localities[store.Node.NodeID] if !ok { t.Fatalf("could not find locality for node %d", store.Node.NodeID) } if e, a := int(store.Node.NodeID), len(locality.Tiers); e != a { t.Fatalf("for node %d, expected %d tiers, only got %d", store.Node.NodeID, e, a) } } }
func TestStorePoolGetStoreDetails(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, sp, _ := createTestStorePool( TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) sg.GossipStores(uniqueStore, t) sp.mu.Lock() defer sp.mu.Unlock() if detail := sp.getStoreDetailLocked(roachpb.StoreID(1)); detail.desc != nil { t.Errorf("unexpected fetched store ID 1: %+v", detail.desc) } if detail := sp.getStoreDetailLocked(roachpb.StoreID(2)); detail.desc == nil { t.Errorf("failed to fetch store ID 2") } }
func TestStorePoolGetStoreDetails(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, sp := createTestStorePool(TestTimeUntilStoreDeadOff, false /* deterministic */) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) sg.GossipStores(uniqueStore, t) sp.mu.Lock() defer sp.mu.Unlock() if detail := sp.getStoreDetailLocked(roachpb.StoreID(1)); detail.dead { t.Errorf("Present storeDetail came back as dead, expected it to be alive. %+v", detail) } if detail := sp.getStoreDetailLocked(roachpb.StoreID(2)); detail.dead { t.Errorf("Absent storeDetail came back as dead, expected it to be alive. %+v", detail) } }
// TestStorePoolGossipUpdate ensures that the gossip callback in StorePool // correctly updates a store's details. func TestStorePoolGossipUpdate(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, sp, _ := createTestStorePool( TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) sp.mu.RLock() if _, ok := sp.mu.storeDetails[2]; ok { t.Fatalf("store 2 is already in the pool's store list") } sp.mu.RUnlock() sg.GossipStores(uniqueStore, t) sp.mu.RLock() if _, ok := sp.mu.storeDetails[2]; !ok { t.Fatalf("store 2 isn't in the pool's store list") } sp.mu.RUnlock() }
// TestStorePoolGossipUpdate ensures that the gossip callback in StorePool // correctly updates a store's details. func TestStorePoolGossipUpdate(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, sp := createTestStorePool(TestTimeUntilStoreDead, false /* deterministic */) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) sp.mu.RLock() if _, ok := sp.mu.storeDetails[2]; ok { t.Fatalf("store 2 is already in the pool's store list") } sp.mu.RUnlock() sg.GossipStores(uniqueStore, t) sp.mu.RLock() if _, ok := sp.mu.storeDetails[2]; !ok { t.Fatalf("store 2 isn't in the pool's store list") } if e, a := 1, sp.mu.queue.Len(); e > a { t.Fatalf("wrong number of stores in the queue expected at least:%d actual:%d", e, a) } sp.mu.RUnlock() }
func TestStorePoolFindDeadReplicas(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, _, sp, mnl := createTestStorePool( TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) stores := []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, }, { StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, }, { StoreID: 5, Node: roachpb.NodeDescriptor{NodeID: 5}, }, } replicas := []roachpb.ReplicaDescriptor{ { NodeID: 1, StoreID: 1, ReplicaID: 1, }, { NodeID: 2, StoreID: 2, ReplicaID: 2, }, { NodeID: 3, StoreID: 3, ReplicaID: 4, }, { NodeID: 4, StoreID: 5, ReplicaID: 4, }, { NodeID: 5, StoreID: 5, ReplicaID: 5, }, } sg.GossipStores(stores, t) for i := 1; i <= 5; i++ { mnl.setLive(roachpb.NodeID(i), true) } deadReplicas := sp.deadReplicas(0, replicas) if len(deadReplicas) > 0 { t.Fatalf("expected no dead replicas initially, found %d (%v)", len(deadReplicas), deadReplicas) } // Mark nodes 4 & 5 as dead. mnl.setLive(4, false) mnl.setLive(5, false) deadReplicas = sp.deadReplicas(0, replicas) if a, e := deadReplicas, replicas[3:]; !reflect.DeepEqual(a, e) { t.Fatalf("findDeadReplicas did not return expected values; got \n%v, expected \n%v", a, e) } }
// TestStorePoolGetStoreList ensures that the store list returns only stores // that are live and match the attribute criteria. func TestStorePoolGetStoreList(t *testing.T) { defer leaktest.AfterTest(t)() // We're going to manually mark stores dead in this test. stopper, g, _, sp, mnl := createTestStorePool( TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) constraints := config.Constraints{Constraints: []config.Constraint{{Value: "ssd"}, {Value: "dc"}}} required := []string{"ssd", "dc"} // Nothing yet. sl, _, _ := sp.getStoreList(roachpb.RangeID(0)) sl = sl.filter(constraints) if len(sl.stores) != 0 { t.Errorf("expected no stores, instead %+v", sl.stores) } matchingStore := roachpb.StoreDescriptor{ StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, Attrs: roachpb.Attributes{Attrs: required}, } supersetStore := roachpb.StoreDescriptor{ StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, Attrs: roachpb.Attributes{Attrs: append(required, "db")}, } unmatchingStore := roachpb.StoreDescriptor{ StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, Attrs: roachpb.Attributes{Attrs: []string{"ssd", "otherdc"}}, } emptyStore := roachpb.StoreDescriptor{ StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, Attrs: roachpb.Attributes{}, } deadStore := roachpb.StoreDescriptor{ StoreID: 5, Node: roachpb.NodeDescriptor{NodeID: 5}, Attrs: roachpb.Attributes{Attrs: required}, } declinedStore := roachpb.StoreDescriptor{ StoreID: 6, Node: roachpb.NodeDescriptor{NodeID: 6}, Attrs: roachpb.Attributes{Attrs: required}, } corruptReplicaStore := roachpb.StoreDescriptor{ StoreID: 7, Node: roachpb.NodeDescriptor{NodeID: 7}, Attrs: roachpb.Attributes{Attrs: required}, } corruptedRangeID := roachpb.RangeID(1) // Gossip and mark all alive initially. sg.GossipStores([]*roachpb.StoreDescriptor{ &matchingStore, &supersetStore, &unmatchingStore, &emptyStore, &deadStore, &declinedStore, &corruptReplicaStore, }, t) for i := 1; i <= 7; i++ { mnl.setLive(roachpb.NodeID(i), true) } // Add some corrupt replicas that should not affect getStoreList(). sp.mu.Lock() sp.mu.storeDetails[matchingStore.StoreID].deadReplicas[roachpb.RangeID(10)] = []roachpb.ReplicaDescriptor{{ StoreID: matchingStore.StoreID, NodeID: matchingStore.Node.NodeID, }} sp.mu.storeDetails[matchingStore.StoreID].deadReplicas[roachpb.RangeID(11)] = []roachpb.ReplicaDescriptor{{ StoreID: matchingStore.StoreID, NodeID: matchingStore.Node.NodeID, }} sp.mu.storeDetails[corruptReplicaStore.StoreID].deadReplicas[roachpb.RangeID(10)] = []roachpb.ReplicaDescriptor{{ StoreID: corruptReplicaStore.StoreID, NodeID: corruptReplicaStore.Node.NodeID, }} sp.mu.Unlock() if err := verifyStoreList( sp, constraints, corruptedRangeID, []int{ int(matchingStore.StoreID), int(supersetStore.StoreID), int(deadStore.StoreID), int(declinedStore.StoreID), int(corruptReplicaStore.StoreID), }, /* expectedAliveStoreCount */ 7, /* expectedThrottledStoreCount */ 0, ); err != nil { t.Error(err) } // Set deadStore as dead. mnl.setLive(deadStore.Node.NodeID, false) sp.mu.Lock() // Set declinedStore as throttled. sp.mu.storeDetails[declinedStore.StoreID].throttledUntil = sp.clock.Now().GoTime().Add(time.Hour) // Add a corrupt replica to corruptReplicaStore. sp.mu.storeDetails[corruptReplicaStore.StoreID].deadReplicas[roachpb.RangeID(1)] = []roachpb.ReplicaDescriptor{{ StoreID: corruptReplicaStore.StoreID, NodeID: corruptReplicaStore.Node.NodeID, }} sp.mu.Unlock() if err := verifyStoreList( sp, constraints, corruptedRangeID, []int{ int(matchingStore.StoreID), int(supersetStore.StoreID), }, /* expectedAliveStoreCount */ 6, /* expectedThrottledStoreCount */ 1, ); err != nil { t.Error(err) } }
// createCluster generates a new cluster using the provided stopper and the // number of nodes supplied. Each node will have one store to start. func createCluster( stopper *stop.Stopper, nodeCount int, epochWriter, actionWriter io.Writer, script Script, rand *rand.Rand, ) *Cluster { clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) rpcContext := rpc.NewContext(log.AmbientContext{}, &base.Config{Insecure: true}, clock, stopper) server := rpc.NewServer(rpcContext) // We set the node ID to MaxInt32 for the cluster Gossip instance to prevent // conflicts with real node IDs. g := gossip.NewTest(math.MaxInt32, rpcContext, server, nil, stopper, metric.NewRegistry()) // Set the store pool to deterministic so that a run with the exact same // input will always produce the same output. storePool := storage.NewStorePool( log.AmbientContext{}, g, clock, rpcContext, storage.TestTimeUntilStoreDeadOff, stopper, /* deterministic */ true, ) c := &Cluster{ stopper: stopper, clock: clock, rpc: rpcContext, gossip: g, storePool: storePool, allocator: storage.MakeAllocator(storePool, storage.AllocatorOptions{ AllowRebalance: true, }), storeGossiper: gossiputil.NewStoreGossiper(g), nodes: make(map[roachpb.NodeID]*Node), stores: make(map[roachpb.StoreID]*Store), ranges: make(map[roachpb.RangeID]*Range), rangeIDsByStore: make(map[roachpb.StoreID]roachpb.RangeIDSlice), rand: rand, epochWriter: tabwriter.NewWriter(epochWriter, 8, 1, 2, ' ', 0), actionWriter: tabwriter.NewWriter(actionWriter, 8, 1, 2, ' ', 0), script: script, epoch: -1, } // Add the nodes. for i := 0; i < nodeCount; i++ { c.addNewNodeWithStore() } // Add a single range and add to this first node's first store. firstRange := c.addRange() firstRange.addReplica(c.stores[0]) c.calculateRangeIDsByStore() // Output the first epoch header. c.epoch = 0 c.OutputEpochHeader() c.OutputEpoch() c.flush() return c }
func TestStorePoolFindDeadReplicas(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, mc, sp := createTestStorePool(TestTimeUntilStoreDead, false /* deterministic */) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) stores := []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, }, { StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, }, { StoreID: 5, Node: roachpb.NodeDescriptor{NodeID: 5}, }, } replicas := []roachpb.ReplicaDescriptor{ { NodeID: 1, StoreID: 1, ReplicaID: 1, }, { NodeID: 2, StoreID: 2, ReplicaID: 2, }, { NodeID: 3, StoreID: 3, ReplicaID: 4, }, { NodeID: 4, StoreID: 5, ReplicaID: 4, }, { NodeID: 5, StoreID: 5, ReplicaID: 5, }, } sg.GossipStores(stores, t) deadReplicas := sp.deadReplicas(0, replicas) if len(deadReplicas) > 0 { t.Fatalf("expected no dead replicas initially, found %d (%v)", len(deadReplicas), deadReplicas) } // Timeout all stores, but specifically store 5. waitUntilDead(t, mc, sp, 5) // Resurrect all stores except for 4 and 5. sg.GossipStores(stores[:3], t) deadReplicas = sp.deadReplicas(0, replicas) if a, e := deadReplicas, replicas[3:]; !reflect.DeepEqual(a, e) { t.Fatalf("findDeadReplicas did not return expected values; got \n%v, expected \n%v", a, e) } }
// TestStorePoolDies ensures that a store is marked as dead after it // times out and that it will be revived after a new update is received. func TestStorePoolDies(t *testing.T) { defer leaktest.AfterTest(t)() stopper, g, mc, sp := createTestStorePool(TestTimeUntilStoreDead, false /* deterministic */) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) sg.GossipStores(uniqueStore, t) { sp.mu.RLock() store2, ok := sp.mu.storeDetails[2] if !ok { t.Fatalf("store 2 isn't in the pool's store list") } if store2.dead { t.Errorf("store 2 is dead before it times out") } if e, a := 0, store2.timesDied; e != a { t.Errorf("store 2 has been counted dead %d times, expected %d", a, e) } if store2.index == -1 { t.Errorf("store 2 is mot the queue, it should be") } if e, a := 1, sp.mu.queue.Len(); e > a { t.Errorf("wrong number of stores in the queue expected to be at least:%d actual:%d", e, a) } sp.mu.RUnlock() } // Timeout store 2. waitUntilDead(t, mc, sp, 2) { sp.mu.RLock() store2, ok := sp.mu.storeDetails[2] if !ok { t.Fatalf("store 2 isn't in the pool's store list") } if e, a := 1, store2.timesDied; e != a { t.Errorf("store 2 has been counted dead %d times, expected %d", a, e) } if store2.index != -1 { t.Errorf("store 2 is in the queue, it shouldn't be") } sp.mu.RUnlock() } sg.GossipStores(uniqueStore, t) { sp.mu.RLock() store2, ok := sp.mu.storeDetails[2] if !ok { t.Fatalf("store 2 isn't in the pool's store list") } if store2.dead { t.Errorf("store 2 is dead still, it should be alive") } if e, a := 1, store2.timesDied; e != a { t.Errorf("store 2 has been counted dead %d times, expected %d", a, e) } if store2.index == -1 { t.Errorf("store 2 is mot the queue, it should be") } sp.mu.RUnlock() } // Timeout store 2 again. waitUntilDead(t, mc, sp, 2) { sp.mu.RLock() store2, ok := sp.mu.storeDetails[2] if !ok { t.Fatalf("store 2 isn't in the pool's store list") } if e, a := 2, store2.timesDied; e != a { t.Errorf("store 2 has been counted dead %d times, expected %d", a, e) } if store2.index != -1 { t.Errorf("store 2 is in the queue, it shouldn't be") } sp.mu.RUnlock() } }