Exemple #1
0
// TestMetricsRecorder verifies that the metrics recorder properly formats the
// statistics from various registries, both for Time Series and for Status
// Summaries.
func TestMetricsRecorder(t *testing.T) {
	defer leaktest.AfterTest(t)()

	// ========================================
	// Construct a series of fake descriptors for use in test.
	// ========================================
	nodeDesc := roachpb.NodeDescriptor{
		NodeID: roachpb.NodeID(1),
	}
	storeDesc1 := roachpb.StoreDescriptor{
		StoreID: roachpb.StoreID(1),
		Capacity: roachpb.StoreCapacity{
			Capacity:  100,
			Available: 50,
		},
	}
	storeDesc2 := roachpb.StoreDescriptor{
		StoreID: roachpb.StoreID(2),
		Capacity: roachpb.StoreCapacity{
			Capacity:  200,
			Available: 75,
		},
	}

	// ========================================
	// Create registries and add them to the recorder (two node-level, two
	// store-level).
	// ========================================
	reg1 := metric.NewRegistry()
	reg2 := metric.NewRegistry()
	store1 := fakeStore{
		storeID:  roachpb.StoreID(1),
		desc:     storeDesc1,
		registry: metric.NewRegistry(),
	}
	store2 := fakeStore{
		storeID:  roachpb.StoreID(2),
		desc:     storeDesc2,
		registry: metric.NewRegistry(),
	}
	manual := hlc.NewManualClock(100)
	recorder := NewMetricsRecorder(hlc.NewClock(manual.UnixNano))
	recorder.AddNodeRegistry("one.%s", reg1)
	recorder.AddNodeRegistry("two.%s", reg1)
	recorder.AddStore(store1)
	recorder.AddStore(store2)
	recorder.NodeStarted(nodeDesc, 50)

	// Ensure the metric system's view of time does not advance during this test
	// as the test expects time to not advance too far which would age the actual
	// data (e.g. in histogram's) unexpectedly.
	defer metric.TestingSetNow(func() time.Time {
		return time.Unix(0, manual.UnixNano()).UTC()
	})()

	// ========================================
	// Generate Metrics Data & Expected Results
	// ========================================

	// Flatten the four registries into an array for ease of use.
	regList := []struct {
		reg    *metric.Registry
		prefix string
		source int64
		isNode bool
	}{
		{
			reg:    reg1,
			prefix: "one.",
			source: 1,
			isNode: true,
		},
		{
			reg:    reg2,
			prefix: "two.",
			source: 1,
			isNode: true,
		},
		{
			reg:    store1.registry,
			prefix: "",
			source: int64(store1.storeID),
			isNode: false,
		},
		{
			reg:    store2.registry,
			prefix: "",
			source: int64(store2.storeID),
			isNode: false,
		},
	}

	// Every registry will have a copy of the following metrics.
	metricNames := []struct {
		name string
		typ  string
		val  int64
	}{
		{"testGauge", "gauge", 20},
		{"testGaugeFloat64", "floatgauge", 20},
		{"testCounter", "counter", 5},
		{"testRate", "rate", 2},
		{"testHistogram", "histogram", 10},
		{"testLatency", "latency", 10},

		// Stats needed for store summaries.
		{"ranges", "counter", 1},
		{"ranges.leader", "gauge", 1},
		{"ranges.replicated", "gauge", 1},
		{"ranges.available", "gauge", 1},
	}

	// Add the metrics to each registry and set their values. At the same time,
	// generate expected time series results and status summary metric values.
	var expected []tspb.TimeSeriesData
	expectedNodeSummaryMetrics := make(map[string]float64)
	expectedStoreSummaryMetrics := make(map[string]float64)

	// addExpected generates expected data for a single metric data point.
	addExpected := func(prefix, name string, source, time, val int64, isNode bool) {
		// Generate time series data.
		tsPrefix := "cr.node."
		if !isNode {
			tsPrefix = "cr.store."
		}
		expect := tspb.TimeSeriesData{
			Name:   tsPrefix + prefix + name,
			Source: strconv.FormatInt(source, 10),
			Datapoints: []tspb.TimeSeriesDatapoint{
				{
					TimestampNanos: time,
					Value:          float64(val),
				},
			},
		}
		expected = append(expected, expect)

		// Generate status summary data.
		if isNode {
			expectedNodeSummaryMetrics[prefix+name] = float64(val)
		} else {
			// This can overwrite the previous value, but this is expected as
			// all stores in our tests have identical values; when comparing
			// status summaries, the same map is used as expected data for all
			// stores.
			expectedStoreSummaryMetrics[prefix+name] = float64(val)
		}
	}

	for _, reg := range regList {
		for _, data := range metricNames {
			switch data.typ {
			case "gauge":
				reg.reg.Gauge(data.name).Update(data.val)
				addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode)
			case "floatgauge":
				reg.reg.GaugeFloat64(data.name).Update(float64(data.val))
				addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode)
			case "counter":
				reg.reg.Counter(data.name).Inc(data.val)
				addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode)
			case "rate":
				reg.reg.Rates(data.name).Add(data.val)
				addExpected(reg.prefix, data.name+"-count", reg.source, 100, data.val, reg.isNode)
				for _, scale := range metric.DefaultTimeScales {
					// Rate data is subject to timing errors in tests. Zero out
					// these values.
					addExpected(reg.prefix, data.name+sep+scale.Name(), reg.source, 100, 0, reg.isNode)
				}
			case "histogram":
				reg.reg.Histogram(data.name, time.Second, 1000, 2).RecordValue(data.val)
				for _, q := range recordHistogramQuantiles {
					addExpected(reg.prefix, data.name+q.suffix, reg.source, 100, data.val, reg.isNode)
				}
			case "latency":
				reg.reg.Latency(data.name).RecordValue(data.val)
				// Latency is simply three histograms (at different resolution
				// time scales).
				for _, scale := range metric.DefaultTimeScales {
					for _, q := range recordHistogramQuantiles {
						addExpected(reg.prefix, data.name+sep+scale.Name()+q.suffix, reg.source, 100, data.val, reg.isNode)
					}
				}
			}
		}
	}

	// ========================================
	// Verify time series data
	// ========================================
	actual := recorder.GetTimeSeriesData()

	// Zero-out timing-sensitive rate values from actual data.
	for _, act := range actual {
		match, err := regexp.MatchString(`testRate-\d+m`, act.Name)
		if err != nil {
			t.Fatal(err)
		}
		if match {
			act.Datapoints[0].Value = 0.0
		}
	}

	// Actual comparison is simple: sort the resulting arrays by time and name,
	// and use reflect.DeepEqual.
	sort.Sort(byTimeAndName(actual))
	sort.Sort(byTimeAndName(expected))
	if a, e := actual, expected; !reflect.DeepEqual(a, e) {
		t.Errorf("recorder did not yield expected time series collection; diff:\n %v", pretty.Diff(e, a))
	}

	// ========================================
	// Verify node summary generation
	// ========================================
	expectedNodeSummary := &NodeStatus{
		Desc:      nodeDesc,
		BuildInfo: build.GetInfo(),
		StartedAt: 50,
		UpdatedAt: 100,
		Metrics:   expectedNodeSummaryMetrics,
		StoreStatuses: []StoreStatus{
			{
				Desc:    storeDesc1,
				Metrics: expectedStoreSummaryMetrics,
			},
			{
				Desc:    storeDesc2,
				Metrics: expectedStoreSummaryMetrics,
			},
		},
	}

	nodeSummary := recorder.GetStatusSummary()
	if nodeSummary == nil {
		t.Fatalf("recorder did not return nodeSummary.")
	}

	sort.Sort(byStoreDescID(nodeSummary.StoreStatuses))
	if a, e := nodeSummary, expectedNodeSummary; !reflect.DeepEqual(a, e) {
		t.Errorf("recorder did not produce expected NodeSummary; diff:\n %v", pretty.Diff(e, a))
	}
}
Exemple #2
0
// TestMetricsRecorder verifies that the metrics recorder properly formats the
// statistics from various registries, both for Time Series and for Status
// Summaries.
func TestMetricsRecorder(t *testing.T) {
	defer leaktest.AfterTest(t)()

	// Fake descriptors and stats for status summaries.
	nodeDesc := roachpb.NodeDescriptor{
		NodeID: roachpb.NodeID(1),
	}
	storeDesc1 := roachpb.StoreDescriptor{
		StoreID: roachpb.StoreID(1),
		Capacity: roachpb.StoreCapacity{
			Capacity:  100,
			Available: 50,
		},
	}
	storeDesc2 := roachpb.StoreDescriptor{
		StoreID: roachpb.StoreID(2),
		Capacity: roachpb.StoreCapacity{
			Capacity:  200,
			Available: 75,
		},
	}
	stats := engine.MVCCStats{
		LiveBytes:       1,
		KeyBytes:        2,
		ValBytes:        3,
		IntentBytes:     4,
		LiveCount:       5,
		KeyCount:        6,
		ValCount:        7,
		IntentCount:     8,
		IntentAge:       9,
		GCBytesAge:      10,
		LastUpdateNanos: 1 * 1E9,
	}

	// Create some registries and add them to the recorder (two at node-level,
	// two at store-level).
	reg1 := metric.NewRegistry()
	reg2 := metric.NewRegistry()
	store1 := fakeStore{
		storeID:  roachpb.StoreID(1),
		stats:    stats,
		desc:     storeDesc1,
		registry: metric.NewRegistry(),
	}
	store2 := fakeStore{
		storeID:  roachpb.StoreID(2),
		stats:    stats,
		desc:     storeDesc2,
		registry: metric.NewRegistry(),
	}
	manual := hlc.NewManualClock(100)
	recorder := NewMetricsRecorder(hlc.NewClock(manual.UnixNano))
	recorder.AddNodeRegistry("one.%s", reg1)
	recorder.AddNodeRegistry("two.%s", reg1)
	recorder.AddStore(store1)
	recorder.AddStore(store2)
	recorder.NodeStarted(nodeDesc, 50)

	// Ensure the metric system's view of time does not advance during this test
	// as the test expects time to not advance too far which would age the actual
	// data (e.g. in histogram's) unexpectedly.
	defer metric.TestingSetNow(func() time.Time {
		return time.Unix(0, manual.UnixNano()).UTC()
	})()

	// Create a flat array of registries, along with metadata for each, to help
	// generate expected results.
	regList := []struct {
		reg    *metric.Registry
		prefix string
		source int64
	}{
		{
			reg:    reg1,
			prefix: "cr.node.one.",
			source: 1,
		},
		{
			reg:    reg2,
			prefix: "cr.node.two.",
			source: 1,
		},
		{
			reg:    store1.registry,
			prefix: "cr.store.",
			source: int64(store1.storeID),
		},
		{
			reg:    store2.registry,
			prefix: "cr.store.",
			source: int64(store2.storeID),
		},
	}

	// Every registry will have the following metrics.
	metricNames := []struct {
		name string
		typ  string
		val  int64
	}{
		{"testGauge", "gauge", 20},
		{"testCounter", "counter", 5},
		{"testRate", "rate", 2},
		{"testHistogram", "histogram", 10},
		{"testLatency", "latency", 10},

		// Stats needed for store summaries.
		{"ranges", "counter", 1},
		{"ranges.leader", "gauge", 1},
		{"ranges.replicated", "gauge", 1},
		{"ranges.available", "gauge", 1},
	}

	// Add the above metrics to each registry. At the same time, generate
	// expected time series results.
	var expected []ts.TimeSeriesData
	addExpected := func(prefix, name string, source, time, val int64) {
		expect := ts.TimeSeriesData{
			Name:   prefix + name,
			Source: strconv.FormatInt(source, 10),
			Datapoints: []*ts.TimeSeriesDatapoint{
				{
					TimestampNanos: time,
					Value:          float64(val),
				},
			},
		}
		expected = append(expected, expect)
	}

	for _, data := range metricNames {
		for _, reg := range regList {
			switch data.typ {
			case "gauge":
				reg.reg.Gauge(data.name).Update(data.val)
				addExpected(reg.prefix, data.name, reg.source, 100, data.val)
			case "counter":
				reg.reg.Counter(data.name).Inc(data.val)
				addExpected(reg.prefix, data.name, reg.source, 100, data.val)
			case "rate":
				reg.reg.Rates(data.name).Add(data.val)
				addExpected(reg.prefix, data.name+"-count", reg.source, 100, data.val)
				for _, scale := range metric.DefaultTimeScales {
					// Rate data is subject to timing errors in tests. Zero out
					// these values.
					addExpected(reg.prefix, data.name+sep+scale.Name(), reg.source, 100, 0)
				}
			case "histogram":
				reg.reg.Histogram(data.name, time.Second, 1000, 2).RecordValue(data.val)
				for _, q := range recordHistogramQuantiles {
					addExpected(reg.prefix, data.name+q.suffix, reg.source, 100, data.val)
				}
			case "latency":
				reg.reg.Latency(data.name).RecordValue(data.val)
				// Latency is simply three histograms (at different resolution
				// time scales).
				for _, scale := range metric.DefaultTimeScales {
					for _, q := range recordHistogramQuantiles {
						addExpected(reg.prefix, data.name+sep+scale.Name()+q.suffix, reg.source, 100, data.val)
					}
				}
			}
		}
	}

	actual := recorder.GetTimeSeriesData()

	// Zero-out timing-sensitive rate values from actual data.
	for _, act := range actual {
		match, err := regexp.MatchString(`testRate-\d+m`, act.Name)
		if err != nil {
			t.Fatal(err)
		}
		if match {
			act.Datapoints[0].Value = 0.0
		}
	}

	// Actual comparison is simple: sort the resulting arrays by time and name,
	// and use reflect.DeepEqual.
	sort.Sort(byTimeAndName(actual))
	sort.Sort(byTimeAndName(expected))
	if a, e := actual, expected; !reflect.DeepEqual(a, e) {
		t.Errorf("recorder did not yield expected time series collection; diff:\n %v", pretty.Diff(e, a))
	}

	// **** STATUS SUMMARY TESTING
	// Generate an expected node summary and two store summaries. The
	// information here is relatively simple in our test.
	expectedNodeSummary := &NodeStatus{
		Desc:      nodeDesc,
		StartedAt: 50,
		UpdatedAt: 100,
		StoreIDs: []roachpb.StoreID{
			roachpb.StoreID(1),
			roachpb.StoreID(2),
		},
		RangeCount:           2,
		LeaderRangeCount:     2,
		AvailableRangeCount:  2,
		ReplicatedRangeCount: 2,
	}
	expectedStoreSummaries := []storage.StoreStatus{
		{
			Desc:                 storeDesc1,
			NodeID:               roachpb.NodeID(1),
			StartedAt:            50,
			UpdatedAt:            100,
			RangeCount:           1,
			LeaderRangeCount:     1,
			AvailableRangeCount:  1,
			ReplicatedRangeCount: 1,
			Stats:                stats,
		},
		{
			Desc:                 storeDesc2,
			NodeID:               roachpb.NodeID(1),
			StartedAt:            50,
			UpdatedAt:            100,
			RangeCount:           1,
			LeaderRangeCount:     1,
			AvailableRangeCount:  1,
			ReplicatedRangeCount: 1,
			Stats:                stats,
		},
	}
	for _, ss := range expectedStoreSummaries {
		expectedNodeSummary.Stats.Add(ss.Stats)
	}

	nodeSummary, storeSummaries := recorder.GetStatusSummaries()
	if nodeSummary == nil {
		t.Fatalf("recorder did not return nodeSummary.")
	}
	if storeSummaries == nil {
		t.Fatalf("recorder did not return storeSummaries.")
	}

	sort.Sort(byStoreDescID(storeSummaries))
	sort.Sort(byStoreID(nodeSummary.StoreIDs))
	if a, e := nodeSummary, expectedNodeSummary; !reflect.DeepEqual(a, e) {
		t.Errorf("recorder did not produce expected NodeSummary; diff:\n %v", pretty.Diff(e, a))
	}
	if a, e := storeSummaries, expectedStoreSummaries; !reflect.DeepEqual(a, e) {
		t.Errorf("recorder did not produce expected StoreSummaries; diff:\n %v", pretty.Diff(e, a))
	}
}