func makeNodeMetrics(reg *metric.Registry, sampleInterval time.Duration) nodeMetrics { nm := nodeMetrics{ Latency: metric.NewLatency(metaExecLatency, sampleInterval), Success: metric.NewCounter(metaExecSuccess), Err: metric.NewCounter(metaExecError), } reg.AddMetricStruct(nm) return nm }
// MakeTxnMetrics returns a TxnMetrics struct that contains metrics whose // windowed portions retain data for approximately sampleInterval. func MakeTxnMetrics(sampleInterval time.Duration) TxnMetrics { return TxnMetrics{ Aborts: metric.NewCounterWithRates(metaAbortsRates), Commits: metric.NewCounterWithRates(metaCommitsRates), Commits1PC: metric.NewCounterWithRates(metaCommits1PCRates), Abandons: metric.NewCounterWithRates(metaAbandonsRates), Durations: metric.NewLatency(metaDurationsHistograms, sampleInterval), Restarts: metric.NewHistogram(metaRestartsHistogram, sampleInterval, 100, 3), } }
// NewExecutor creates an Executor and registers a callback on the // system config. func NewExecutor( cfg ExecutorConfig, stopper *stop.Stopper, startupMemMetrics *MemoryMetrics, ) *Executor { exec := &Executor{ cfg: cfg, reCache: parser.NewRegexpCache(512), Latency: metric.NewLatency(MetaLatency, cfg.MetricsSampleInterval), TxnBeginCount: metric.NewCounter(MetaTxnBegin), TxnCommitCount: metric.NewCounter(MetaTxnCommit), TxnAbortCount: metric.NewCounter(MetaTxnAbort), TxnRollbackCount: metric.NewCounter(MetaTxnRollback), SelectCount: metric.NewCounter(MetaSelect), UpdateCount: metric.NewCounter(MetaUpdate), InsertCount: metric.NewCounter(MetaInsert), DeleteCount: metric.NewCounter(MetaDelete), DdlCount: metric.NewCounter(MetaDdl), MiscCount: metric.NewCounter(MetaMisc), QueryCount: metric.NewCounter(MetaQuery), } exec.systemConfigCond = sync.NewCond(exec.systemConfigMu.RLocker()) gossipUpdateC := cfg.Gossip.RegisterSystemConfigChannel() stopper.RunWorker(func() { for { select { case <-gossipUpdateC: sysCfg, _ := cfg.Gossip.GetSystemConfig() exec.updateSystemConfig(sysCfg) case <-stopper.ShouldStop(): return } } }) ctx := log.WithLogTag(context.Background(), "startup", nil) startupSession := NewSession(ctx, SessionArgs{}, exec, nil, startupMemMetrics) if err := exec.virtualSchemas.init(&startupSession.planner); err != nil { log.Fatal(ctx, err) } startupSession.Finish(exec) return exec }
// TestMetricsRecorder verifies that the metrics recorder properly formats the // statistics from various registries, both for Time Series and for Status // Summaries. func TestMetricsRecorder(t *testing.T) { defer leaktest.AfterTest(t)() // ======================================== // Construct a series of fake descriptors for use in test. // ======================================== nodeDesc := roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(1), } storeDesc1 := roachpb.StoreDescriptor{ StoreID: roachpb.StoreID(1), Capacity: roachpb.StoreCapacity{ Capacity: 100, Available: 50, }, } storeDesc2 := roachpb.StoreDescriptor{ StoreID: roachpb.StoreID(2), Capacity: roachpb.StoreCapacity{ Capacity: 200, Available: 75, }, } // ======================================== // Create registries and add them to the recorder (two node-level, two // store-level). // ======================================== reg1 := metric.NewRegistry() store1 := fakeStore{ storeID: roachpb.StoreID(1), desc: storeDesc1, registry: metric.NewRegistry(), } store2 := fakeStore{ storeID: roachpb.StoreID(2), desc: storeDesc2, registry: metric.NewRegistry(), } manual := hlc.NewManualClock(100) recorder := NewMetricsRecorder(hlc.NewClock(manual.UnixNano, time.Nanosecond)) recorder.AddStore(store1) recorder.AddStore(store2) recorder.AddNode(reg1, nodeDesc, 50) // Ensure the metric system's view of time does not advance during this test // as the test expects time to not advance too far which would age the actual // data (e.g. in histogram's) unexpectedly. defer metric.TestingSetNow(func() time.Time { return time.Unix(0, manual.UnixNano()).UTC() })() // ======================================== // Generate Metrics Data & Expected Results // ======================================== // Flatten the four registries into an array for ease of use. regList := []struct { reg *metric.Registry prefix string source int64 isNode bool }{ { reg: reg1, prefix: "one.", source: 1, isNode: true, }, { reg: reg1, prefix: "two.", source: 1, isNode: true, }, { reg: store1.registry, prefix: "", source: int64(store1.storeID), isNode: false, }, { reg: store2.registry, prefix: "", source: int64(store2.storeID), isNode: false, }, } // Every registry will have a copy of the following metrics. metricNames := []struct { name string typ string val int64 }{ {"testGauge", "gauge", 20}, {"testGaugeFloat64", "floatgauge", 20}, {"testCounter", "counter", 5}, {"testCounterWithRates", "counterwithrates", 2}, {"testHistogram", "histogram", 10}, {"testLatency", "latency", 10}, // Stats needed for store summaries. {"ranges", "counter", 1}, {"replicas.leaders", "gauge", 1}, {"replicas.leaseholders", "gauge", 1}, {"ranges", "gauge", 1}, {"ranges.available", "gauge", 1}, } // Add the metrics to each registry and set their values. At the same time, // generate expected time series results and status summary metric values. var expected []tspb.TimeSeriesData expectedNodeSummaryMetrics := make(map[string]float64) expectedStoreSummaryMetrics := make(map[string]float64) // addExpected generates expected data for a single metric data point. addExpected := func(prefix, name string, source, time, val int64, isNode bool) { // Generate time series data. tsPrefix := "cr.node." if !isNode { tsPrefix = "cr.store." } expect := tspb.TimeSeriesData{ Name: tsPrefix + prefix + name, Source: strconv.FormatInt(source, 10), Datapoints: []tspb.TimeSeriesDatapoint{ { TimestampNanos: time, Value: float64(val), }, }, } expected = append(expected, expect) // Generate status summary data. if isNode { expectedNodeSummaryMetrics[prefix+name] = float64(val) } else { // This can overwrite the previous value, but this is expected as // all stores in our tests have identical values; when comparing // status summaries, the same map is used as expected data for all // stores. expectedStoreSummaryMetrics[prefix+name] = float64(val) } } for _, reg := range regList { for _, data := range metricNames { switch data.typ { case "gauge": g := metric.NewGauge(metric.Metadata{Name: reg.prefix + data.name}) reg.reg.AddMetric(g) g.Update(data.val) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "floatgauge": g := metric.NewGaugeFloat64(metric.Metadata{Name: reg.prefix + data.name}) reg.reg.AddMetric(g) g.Update(float64(data.val)) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "counter": c := metric.NewCounter(metric.Metadata{Name: reg.prefix + data.name}) reg.reg.AddMetric(c) c.Inc((data.val)) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "counterwithrates": r := metric.NewCounterWithRates(metric.Metadata{Name: reg.prefix + data.name}) reg.reg.AddMetric(r) r.Inc(data.val) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "histogram": h := metric.NewHistogram(metric.Metadata{Name: reg.prefix + data.name}, time.Second, 1000, 2) reg.reg.AddMetric(h) h.RecordValue(data.val) for _, q := range recordHistogramQuantiles { addExpected(reg.prefix, data.name+q.suffix, reg.source, 100, data.val, reg.isNode) } case "latency": l := metric.NewLatency(metric.Metadata{Name: reg.prefix + data.name}, time.Hour) reg.reg.AddMetric(l) l.RecordValue(data.val) // Latency is simply three histograms (at different resolution // time scales). for _, q := range recordHistogramQuantiles { addExpected(reg.prefix, data.name+q.suffix, reg.source, 100, data.val, reg.isNode) } default: t.Fatalf("unexpected: %+v", data) } } } // ======================================== // Verify time series data // ======================================== actual := recorder.GetTimeSeriesData() // Actual comparison is simple: sort the resulting arrays by time and name, // and use reflect.DeepEqual. sort.Sort(byTimeAndName(actual)) sort.Sort(byTimeAndName(expected)) if a, e := actual, expected; !reflect.DeepEqual(a, e) { t.Errorf("recorder did not yield expected time series collection; diff:\n %v", pretty.Diff(e, a)) } // ======================================== // Verify node summary generation // ======================================== expectedNodeSummary := &NodeStatus{ Desc: nodeDesc, BuildInfo: build.GetInfo(), StartedAt: 50, UpdatedAt: 100, Metrics: expectedNodeSummaryMetrics, StoreStatuses: []StoreStatus{ { Desc: storeDesc1, Metrics: expectedStoreSummaryMetrics, }, { Desc: storeDesc2, Metrics: expectedStoreSummaryMetrics, }, }, } nodeSummary := recorder.GetStatusSummary() if nodeSummary == nil { t.Fatalf("recorder did not return nodeSummary") } sort.Sort(byStoreDescID(nodeSummary.StoreStatuses)) if a, e := nodeSummary, expectedNodeSummary; !reflect.DeepEqual(a, e) { t.Errorf("recorder did not produce expected NodeSummary; diff:\n %v", pretty.Diff(e, a)) } }