Ejemplo n.º 1
0
func TestStorePoolThrottle(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper, g, _, sp, _ := createTestStorePool(
		TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */)
	defer stopper.Stop()

	sg := gossiputil.NewStoreGossiper(g)
	sg.GossipStores(uniqueStore, t)

	{
		expected := sp.clock.Now().GoTime().Add(sp.declinedReservationsTimeout)
		sp.throttle(throttleDeclined, 1)

		sp.mu.Lock()
		detail := sp.getStoreDetailLocked(1)
		sp.mu.Unlock()
		if !detail.throttledUntil.Equal(expected) {
			t.Errorf("expected store to have been throttled to %v, found %v",
				expected, detail.throttledUntil)
		}
	}

	{
		expected := sp.clock.Now().GoTime().Add(sp.failedReservationsTimeout)
		sp.throttle(throttleFailed, 1)

		sp.mu.Lock()
		detail := sp.getStoreDetailLocked(1)
		sp.mu.Unlock()
		if !detail.throttledUntil.Equal(expected) {
			t.Errorf("expected store to have been throttled to %v, found %v",
				expected, detail.throttledUntil)
		}
	}
}
Ejemplo n.º 2
0
func TestGetNodeLocalities(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper, g, _, sp, _ := createTestStorePool(
		TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */)
	defer stopper.Stop()
	sg := gossiputil.NewStoreGossiper(g)

	// Creates a node with a locality with the number of tiers passed in. The
	// NodeID is the same as the tier count.
	createDescWithLocality := func(tierCount int) roachpb.NodeDescriptor {
		nodeDescriptor := roachpb.NodeDescriptor{NodeID: roachpb.NodeID(tierCount)}
		for i := 1; i <= tierCount; i++ {
			value := fmt.Sprintf("%d", i)
			nodeDescriptor.Locality.Tiers = append(nodeDescriptor.Locality.Tiers, roachpb.Tier{
				Key:   value,
				Value: value,
			})
		}
		return nodeDescriptor
	}

	stores := []*roachpb.StoreDescriptor{
		{
			StoreID: 1,
			Node:    createDescWithLocality(1),
		},
		{
			StoreID: 2,
			Node:    createDescWithLocality(2),
		},
		{
			StoreID: 3,
			Node:    createDescWithLocality(3),
		},
		{
			StoreID: 4,
			Node:    createDescWithLocality(2),
		},
	}

	sg.GossipStores(stores, t)

	var existingReplicas []roachpb.ReplicaDescriptor
	for _, store := range stores {
		existingReplicas = append(existingReplicas, roachpb.ReplicaDescriptor{NodeID: store.Node.NodeID})
	}

	localities := sp.getNodeLocalities(existingReplicas)
	for _, store := range stores {
		locality, ok := localities[store.Node.NodeID]
		if !ok {
			t.Fatalf("could not find locality for node %d", store.Node.NodeID)
		}
		if e, a := int(store.Node.NodeID), len(locality.Tiers); e != a {
			t.Fatalf("for node %d, expected %d tiers, only got %d", store.Node.NodeID, e, a)
		}
	}
}
Ejemplo n.º 3
0
func TestStorePoolGetStoreDetails(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper, g, _, sp, _ := createTestStorePool(
		TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */)
	defer stopper.Stop()
	sg := gossiputil.NewStoreGossiper(g)
	sg.GossipStores(uniqueStore, t)

	sp.mu.Lock()
	defer sp.mu.Unlock()
	if detail := sp.getStoreDetailLocked(roachpb.StoreID(1)); detail.desc != nil {
		t.Errorf("unexpected fetched store ID 1: %+v", detail.desc)
	}
	if detail := sp.getStoreDetailLocked(roachpb.StoreID(2)); detail.desc == nil {
		t.Errorf("failed to fetch store ID 2")
	}
}
Ejemplo n.º 4
0
func TestStorePoolGetStoreDetails(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper, g, _, sp := createTestStorePool(TestTimeUntilStoreDeadOff, false /* deterministic */)
	defer stopper.Stop()
	sg := gossiputil.NewStoreGossiper(g)
	sg.GossipStores(uniqueStore, t)

	sp.mu.Lock()
	defer sp.mu.Unlock()
	if detail := sp.getStoreDetailLocked(roachpb.StoreID(1)); detail.dead {
		t.Errorf("Present storeDetail came back as dead, expected it to be alive. %+v", detail)
	}

	if detail := sp.getStoreDetailLocked(roachpb.StoreID(2)); detail.dead {
		t.Errorf("Absent storeDetail came back as dead, expected it to be alive. %+v", detail)
	}
}
Ejemplo n.º 5
0
// TestStorePoolGossipUpdate ensures that the gossip callback in StorePool
// correctly updates a store's details.
func TestStorePoolGossipUpdate(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper, g, _, sp, _ := createTestStorePool(
		TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */)
	defer stopper.Stop()
	sg := gossiputil.NewStoreGossiper(g)

	sp.mu.RLock()
	if _, ok := sp.mu.storeDetails[2]; ok {
		t.Fatalf("store 2 is already in the pool's store list")
	}
	sp.mu.RUnlock()

	sg.GossipStores(uniqueStore, t)

	sp.mu.RLock()
	if _, ok := sp.mu.storeDetails[2]; !ok {
		t.Fatalf("store 2 isn't in the pool's store list")
	}
	sp.mu.RUnlock()
}
Ejemplo n.º 6
0
// TestStorePoolGossipUpdate ensures that the gossip callback in StorePool
// correctly updates a store's details.
func TestStorePoolGossipUpdate(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper, g, _, sp := createTestStorePool(TestTimeUntilStoreDead, false /* deterministic */)
	defer stopper.Stop()
	sg := gossiputil.NewStoreGossiper(g)

	sp.mu.RLock()
	if _, ok := sp.mu.storeDetails[2]; ok {
		t.Fatalf("store 2 is already in the pool's store list")
	}
	sp.mu.RUnlock()

	sg.GossipStores(uniqueStore, t)

	sp.mu.RLock()
	if _, ok := sp.mu.storeDetails[2]; !ok {
		t.Fatalf("store 2 isn't in the pool's store list")
	}
	if e, a := 1, sp.mu.queue.Len(); e > a {
		t.Fatalf("wrong number of stores in the queue expected at least:%d actual:%d", e, a)
	}
	sp.mu.RUnlock()
}
Ejemplo n.º 7
0
func TestStorePoolFindDeadReplicas(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper, g, _, sp, mnl := createTestStorePool(
		TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */)
	defer stopper.Stop()
	sg := gossiputil.NewStoreGossiper(g)

	stores := []*roachpb.StoreDescriptor{
		{
			StoreID: 1,
			Node:    roachpb.NodeDescriptor{NodeID: 1},
		},
		{
			StoreID: 2,
			Node:    roachpb.NodeDescriptor{NodeID: 2},
		},
		{
			StoreID: 3,
			Node:    roachpb.NodeDescriptor{NodeID: 3},
		},
		{
			StoreID: 4,
			Node:    roachpb.NodeDescriptor{NodeID: 4},
		},
		{
			StoreID: 5,
			Node:    roachpb.NodeDescriptor{NodeID: 5},
		},
	}

	replicas := []roachpb.ReplicaDescriptor{
		{
			NodeID:    1,
			StoreID:   1,
			ReplicaID: 1,
		},
		{
			NodeID:    2,
			StoreID:   2,
			ReplicaID: 2,
		},
		{
			NodeID:    3,
			StoreID:   3,
			ReplicaID: 4,
		},
		{
			NodeID:    4,
			StoreID:   5,
			ReplicaID: 4,
		},
		{
			NodeID:    5,
			StoreID:   5,
			ReplicaID: 5,
		},
	}

	sg.GossipStores(stores, t)
	for i := 1; i <= 5; i++ {
		mnl.setLive(roachpb.NodeID(i), true)
	}

	deadReplicas := sp.deadReplicas(0, replicas)
	if len(deadReplicas) > 0 {
		t.Fatalf("expected no dead replicas initially, found %d (%v)", len(deadReplicas), deadReplicas)
	}
	// Mark nodes 4 & 5 as dead.
	mnl.setLive(4, false)
	mnl.setLive(5, false)

	deadReplicas = sp.deadReplicas(0, replicas)
	if a, e := deadReplicas, replicas[3:]; !reflect.DeepEqual(a, e) {
		t.Fatalf("findDeadReplicas did not return expected values; got \n%v, expected \n%v", a, e)
	}
}
Ejemplo n.º 8
0
// TestStorePoolGetStoreList ensures that the store list returns only stores
// that are live and match the attribute criteria.
func TestStorePoolGetStoreList(t *testing.T) {
	defer leaktest.AfterTest(t)()
	// We're going to manually mark stores dead in this test.
	stopper, g, _, sp, mnl := createTestStorePool(
		TestTimeUntilStoreDead, false /* deterministic */, false /* defaultNodeLiveness */)
	defer stopper.Stop()
	sg := gossiputil.NewStoreGossiper(g)
	constraints := config.Constraints{Constraints: []config.Constraint{{Value: "ssd"}, {Value: "dc"}}}
	required := []string{"ssd", "dc"}
	// Nothing yet.
	sl, _, _ := sp.getStoreList(roachpb.RangeID(0))
	sl = sl.filter(constraints)
	if len(sl.stores) != 0 {
		t.Errorf("expected no stores, instead %+v", sl.stores)
	}

	matchingStore := roachpb.StoreDescriptor{
		StoreID: 1,
		Node:    roachpb.NodeDescriptor{NodeID: 1},
		Attrs:   roachpb.Attributes{Attrs: required},
	}
	supersetStore := roachpb.StoreDescriptor{
		StoreID: 2,
		Node:    roachpb.NodeDescriptor{NodeID: 2},
		Attrs:   roachpb.Attributes{Attrs: append(required, "db")},
	}
	unmatchingStore := roachpb.StoreDescriptor{
		StoreID: 3,
		Node:    roachpb.NodeDescriptor{NodeID: 3},
		Attrs:   roachpb.Attributes{Attrs: []string{"ssd", "otherdc"}},
	}
	emptyStore := roachpb.StoreDescriptor{
		StoreID: 4,
		Node:    roachpb.NodeDescriptor{NodeID: 4},
		Attrs:   roachpb.Attributes{},
	}
	deadStore := roachpb.StoreDescriptor{
		StoreID: 5,
		Node:    roachpb.NodeDescriptor{NodeID: 5},
		Attrs:   roachpb.Attributes{Attrs: required},
	}
	declinedStore := roachpb.StoreDescriptor{
		StoreID: 6,
		Node:    roachpb.NodeDescriptor{NodeID: 6},
		Attrs:   roachpb.Attributes{Attrs: required},
	}
	corruptReplicaStore := roachpb.StoreDescriptor{
		StoreID: 7,
		Node:    roachpb.NodeDescriptor{NodeID: 7},
		Attrs:   roachpb.Attributes{Attrs: required},
	}

	corruptedRangeID := roachpb.RangeID(1)

	// Gossip and mark all alive initially.
	sg.GossipStores([]*roachpb.StoreDescriptor{
		&matchingStore,
		&supersetStore,
		&unmatchingStore,
		&emptyStore,
		&deadStore,
		&declinedStore,
		&corruptReplicaStore,
	}, t)
	for i := 1; i <= 7; i++ {
		mnl.setLive(roachpb.NodeID(i), true)
	}

	// Add some corrupt replicas that should not affect getStoreList().
	sp.mu.Lock()
	sp.mu.storeDetails[matchingStore.StoreID].deadReplicas[roachpb.RangeID(10)] =
		[]roachpb.ReplicaDescriptor{{
			StoreID: matchingStore.StoreID,
			NodeID:  matchingStore.Node.NodeID,
		}}
	sp.mu.storeDetails[matchingStore.StoreID].deadReplicas[roachpb.RangeID(11)] =
		[]roachpb.ReplicaDescriptor{{
			StoreID: matchingStore.StoreID,
			NodeID:  matchingStore.Node.NodeID,
		}}
	sp.mu.storeDetails[corruptReplicaStore.StoreID].deadReplicas[roachpb.RangeID(10)] =
		[]roachpb.ReplicaDescriptor{{
			StoreID: corruptReplicaStore.StoreID,
			NodeID:  corruptReplicaStore.Node.NodeID,
		}}
	sp.mu.Unlock()

	if err := verifyStoreList(
		sp,
		constraints,
		corruptedRangeID,
		[]int{
			int(matchingStore.StoreID),
			int(supersetStore.StoreID),
			int(deadStore.StoreID),
			int(declinedStore.StoreID),
			int(corruptReplicaStore.StoreID),
		},
		/* expectedAliveStoreCount */ 7,
		/* expectedThrottledStoreCount */ 0,
	); err != nil {
		t.Error(err)
	}

	// Set deadStore as dead.
	mnl.setLive(deadStore.Node.NodeID, false)
	sp.mu.Lock()
	// Set declinedStore as throttled.
	sp.mu.storeDetails[declinedStore.StoreID].throttledUntil = sp.clock.Now().GoTime().Add(time.Hour)
	// Add a corrupt replica to corruptReplicaStore.
	sp.mu.storeDetails[corruptReplicaStore.StoreID].deadReplicas[roachpb.RangeID(1)] =
		[]roachpb.ReplicaDescriptor{{
			StoreID: corruptReplicaStore.StoreID,
			NodeID:  corruptReplicaStore.Node.NodeID,
		}}
	sp.mu.Unlock()

	if err := verifyStoreList(
		sp,
		constraints,
		corruptedRangeID,
		[]int{
			int(matchingStore.StoreID),
			int(supersetStore.StoreID),
		},
		/* expectedAliveStoreCount */ 6,
		/* expectedThrottledStoreCount */ 1,
	); err != nil {
		t.Error(err)
	}
}
Ejemplo n.º 9
0
// createCluster generates a new cluster using the provided stopper and the
// number of nodes supplied. Each node will have one store to start.
func createCluster(
	stopper *stop.Stopper,
	nodeCount int,
	epochWriter, actionWriter io.Writer,
	script Script,
	rand *rand.Rand,
) *Cluster {
	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
	rpcContext := rpc.NewContext(log.AmbientContext{}, &base.Config{Insecure: true}, clock, stopper)
	server := rpc.NewServer(rpcContext)
	// We set the node ID to MaxInt32 for the cluster Gossip instance to prevent
	// conflicts with real node IDs.
	g := gossip.NewTest(math.MaxInt32, rpcContext, server, nil, stopper, metric.NewRegistry())
	// Set the store pool to deterministic so that a run with the exact same
	// input will always produce the same output.
	storePool := storage.NewStorePool(
		log.AmbientContext{},
		g,
		clock,
		rpcContext,
		storage.TestTimeUntilStoreDeadOff,
		stopper,
		/* deterministic */ true,
	)
	c := &Cluster{
		stopper:   stopper,
		clock:     clock,
		rpc:       rpcContext,
		gossip:    g,
		storePool: storePool,
		allocator: storage.MakeAllocator(storePool, storage.AllocatorOptions{
			AllowRebalance: true,
		}),
		storeGossiper:   gossiputil.NewStoreGossiper(g),
		nodes:           make(map[roachpb.NodeID]*Node),
		stores:          make(map[roachpb.StoreID]*Store),
		ranges:          make(map[roachpb.RangeID]*Range),
		rangeIDsByStore: make(map[roachpb.StoreID]roachpb.RangeIDSlice),
		rand:            rand,
		epochWriter:     tabwriter.NewWriter(epochWriter, 8, 1, 2, ' ', 0),
		actionWriter:    tabwriter.NewWriter(actionWriter, 8, 1, 2, ' ', 0),
		script:          script,
		epoch:           -1,
	}

	// Add the nodes.
	for i := 0; i < nodeCount; i++ {
		c.addNewNodeWithStore()
	}

	// Add a single range and add to this first node's first store.
	firstRange := c.addRange()
	firstRange.addReplica(c.stores[0])

	c.calculateRangeIDsByStore()

	// Output the first epoch header.
	c.epoch = 0
	c.OutputEpochHeader()
	c.OutputEpoch()
	c.flush()

	return c
}
Ejemplo n.º 10
0
func TestStorePoolFindDeadReplicas(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper, g, mc, sp := createTestStorePool(TestTimeUntilStoreDead, false /* deterministic */)
	defer stopper.Stop()
	sg := gossiputil.NewStoreGossiper(g)

	stores := []*roachpb.StoreDescriptor{
		{
			StoreID: 1,
			Node:    roachpb.NodeDescriptor{NodeID: 1},
		},
		{
			StoreID: 2,
			Node:    roachpb.NodeDescriptor{NodeID: 2},
		},
		{
			StoreID: 3,
			Node:    roachpb.NodeDescriptor{NodeID: 3},
		},
		{
			StoreID: 4,
			Node:    roachpb.NodeDescriptor{NodeID: 4},
		},
		{
			StoreID: 5,
			Node:    roachpb.NodeDescriptor{NodeID: 5},
		},
	}

	replicas := []roachpb.ReplicaDescriptor{
		{
			NodeID:    1,
			StoreID:   1,
			ReplicaID: 1,
		},
		{
			NodeID:    2,
			StoreID:   2,
			ReplicaID: 2,
		},
		{
			NodeID:    3,
			StoreID:   3,
			ReplicaID: 4,
		},
		{
			NodeID:    4,
			StoreID:   5,
			ReplicaID: 4,
		},
		{
			NodeID:    5,
			StoreID:   5,
			ReplicaID: 5,
		},
	}

	sg.GossipStores(stores, t)

	deadReplicas := sp.deadReplicas(0, replicas)
	if len(deadReplicas) > 0 {
		t.Fatalf("expected no dead replicas initially, found %d (%v)", len(deadReplicas), deadReplicas)
	}
	// Timeout all stores, but specifically store 5.
	waitUntilDead(t, mc, sp, 5)

	// Resurrect all stores except for 4 and 5.
	sg.GossipStores(stores[:3], t)

	deadReplicas = sp.deadReplicas(0, replicas)
	if a, e := deadReplicas, replicas[3:]; !reflect.DeepEqual(a, e) {
		t.Fatalf("findDeadReplicas did not return expected values; got \n%v, expected \n%v", a, e)
	}
}
Ejemplo n.º 11
0
// TestStorePoolDies ensures that a store is marked as dead after it
// times out and that it will be revived after a new update is received.
func TestStorePoolDies(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper, g, mc, sp := createTestStorePool(TestTimeUntilStoreDead, false /* deterministic */)
	defer stopper.Stop()
	sg := gossiputil.NewStoreGossiper(g)
	sg.GossipStores(uniqueStore, t)

	{
		sp.mu.RLock()
		store2, ok := sp.mu.storeDetails[2]
		if !ok {
			t.Fatalf("store 2 isn't in the pool's store list")
		}
		if store2.dead {
			t.Errorf("store 2 is dead before it times out")
		}
		if e, a := 0, store2.timesDied; e != a {
			t.Errorf("store 2 has been counted dead %d times, expected %d", a, e)
		}
		if store2.index == -1 {
			t.Errorf("store 2 is mot the queue, it should be")
		}
		if e, a := 1, sp.mu.queue.Len(); e > a {
			t.Errorf("wrong number of stores in the queue expected to be at least:%d actual:%d", e, a)
		}
		sp.mu.RUnlock()
	}

	// Timeout store 2.
	waitUntilDead(t, mc, sp, 2)
	{
		sp.mu.RLock()
		store2, ok := sp.mu.storeDetails[2]
		if !ok {
			t.Fatalf("store 2 isn't in the pool's store list")
		}
		if e, a := 1, store2.timesDied; e != a {
			t.Errorf("store 2 has been counted dead %d times, expected %d", a, e)
		}
		if store2.index != -1 {
			t.Errorf("store 2 is in the queue, it shouldn't be")
		}
		sp.mu.RUnlock()
	}

	sg.GossipStores(uniqueStore, t)

	{
		sp.mu.RLock()
		store2, ok := sp.mu.storeDetails[2]
		if !ok {
			t.Fatalf("store 2 isn't in the pool's store list")
		}
		if store2.dead {
			t.Errorf("store 2 is dead still, it should be alive")
		}
		if e, a := 1, store2.timesDied; e != a {
			t.Errorf("store 2 has been counted dead %d times, expected %d", a, e)
		}
		if store2.index == -1 {
			t.Errorf("store 2 is mot the queue, it should be")
		}
		sp.mu.RUnlock()
	}

	// Timeout store 2 again.
	waitUntilDead(t, mc, sp, 2)
	{
		sp.mu.RLock()
		store2, ok := sp.mu.storeDetails[2]
		if !ok {
			t.Fatalf("store 2 isn't in the pool's store list")
		}
		if e, a := 2, store2.timesDied; e != a {
			t.Errorf("store 2 has been counted dead %d times, expected %d", a, e)
		}
		if store2.index != -1 {
			t.Errorf("store 2 is in the queue, it shouldn't be")
		}
		sp.mu.RUnlock()
	}
}