// newRemoteClockMonitor returns a monitor with the given server clock. func newRemoteClockMonitor(clock *hlc.Clock, offsetTTL time.Duration) *RemoteClockMonitor { r := RemoteClockMonitor{ clock: clock, offsetTTL: offsetTTL, } r.mu.offsets = make(map[string]RemoteOffset) r.metrics = RemoteClockMetrics{ ClockOffsetMeanNanos: metric.NewGauge(metaClockOffsetMeanNanos), ClockOffsetStdDevNanos: metric.NewGauge(metaClockOffsetStdDevNanos), } return &r }
// newServer creates and returns a server struct. func newServer( ambient log.AmbientContext, nodeID *base.NodeIDContainer, stopper *stop.Stopper, registry *metric.Registry, ) *server { s := &server{ AmbientContext: ambient, NodeID: nodeID, stopper: stopper, tighten: make(chan struct{}, 1), nodeMetrics: makeMetrics(), serverMetrics: makeMetrics(), } s.mu.is = newInfoStore(s.AmbientContext, nodeID, util.UnresolvedAddr{}, stopper) s.mu.incoming = makeNodeSet(minPeers, metric.NewGauge(MetaConnectionsIncomingGauge)) s.mu.nodeMap = make(map[util.UnresolvedAddr]serverInfo) s.mu.ready = make(chan struct{}) registry.AddMetric(s.mu.incoming.gauge) registry.AddMetricStruct(s.nodeMetrics) return s }
func TestNodeSetFilter(t *testing.T) { defer leaktest.AfterTest(t)() nodes1 := makeNodeSet(2, metric.NewGauge(metric.Metadata{Name: ""})) node0 := roachpb.NodeID(1) node1 := roachpb.NodeID(2) nodes1.addNode(node0) nodes1.addNode(node1) nodes2 := makeNodeSet(1, metric.NewGauge(metric.Metadata{Name: ""})) nodes2.addNode(node1) filtered := nodes1.filter(func(a roachpb.NodeID) bool { return !nodes2.hasNode(a) }) if filtered.len() != 1 || filtered.hasNode(node1) || !filtered.hasNode(node0) { t.Errorf("expected filter to leave node0: %+v", filtered) } }
// filter returns an nodeSet of nodes which return true when passed to the // supplied filter function filterFn. filterFn should return true to keep an // node and false to remove an node. The new nodeSet has a separate gauge object // from the parent. func (as nodeSet) filter(filterFn func(node roachpb.NodeID) bool) nodeSet { avail := makeNodeSet(as.maxSize, metric.NewGauge(metric.Metadata{Name: "TODO(marc)", Help: "TODO(marc)"})) for node := range as.nodes { if filterFn(node) { avail.addNode(node) } } return avail }
// TestLeastUseful verifies that the least-contributing peer node // can be determined. func TestLeastUseful(t *testing.T) { defer leaktest.AfterTest(t)() nodes := []roachpb.NodeID{ roachpb.NodeID(1), roachpb.NodeID(2), } is, stopper := newTestInfoStore() defer stopper.Stop() set := makeNodeSet(3, metric.NewGauge(metric.Metadata{Name: ""})) if is.leastUseful(set) != 0 { t.Error("not expecting a node from an empty set") } inf1 := is.newInfo(nil, time.Second) inf1.NodeID = 1 inf1.PeerID = 1 if err := is.addInfo("a1", inf1); err != nil { t.Fatal(err) } if is.leastUseful(set) != 0 { t.Error("not expecting a node from an empty set") } set.addNode(nodes[0]) if is.leastUseful(set) != nodes[0] { t.Error("expecting nodes[0] as least useful") } inf2 := is.newInfo(nil, time.Second) inf2.NodeID = 2 inf2.PeerID = 1 if err := is.addInfo("a2", inf2); err != nil { t.Fatal(err) } if is.leastUseful(set) != nodes[0] { t.Error("expecting nodes[0] as least useful") } set.addNode(nodes[1]) if is.leastUseful(set) != nodes[1] { t.Error("expecting nodes[1] as least useful") } inf3 := is.newInfo(nil, time.Second) inf3.NodeID = 2 inf3.PeerID = 2 if err := is.addInfo("a3", inf3); err != nil { t.Fatal(err) } if is.leastUseful(set) != nodes[1] { t.Error("expecting nodes[1] as least useful") } }
func TestNodeSetMaxSize(t *testing.T) { defer leaktest.AfterTest(t)() nodes := makeNodeSet(1, metric.NewGauge(metric.Metadata{Name: ""})) if !nodes.hasSpace() { t.Error("set should have space") } nodes.addNode(roachpb.NodeID(1)) if nodes.hasSpace() { t.Error("set should have no space") } }
func makeDistSenderMetrics() DistSenderMetrics { return DistSenderMetrics{ BatchCount: metric.NewCounter(metaDistSenderBatchCount), PartialBatchCount: metric.NewCounter(metaDistSenderPartialBatchCount), SentCount: metric.NewCounter(metaTransportSentCount), LocalSentCount: metric.NewCounter(metaTransportLocalSentCount), SendNextTimeoutCount: metric.NewCounter(metaDistSenderSendNextTimeoutCount), NextReplicaErrCount: metric.NewCounter(metaDistSenderNextReplicaErrCount), NotLeaseHolderErrCount: metric.NewCounter(metaDistSenderNotLeaseHolderErrCount), SlowRequestsCount: metric.NewGauge(metaSlowDistSenderRequests), } }
func TestNodeSetHasNode(t *testing.T) { defer leaktest.AfterTest(t)() nodes := makeNodeSet(2, metric.NewGauge(metric.Metadata{Name: ""})) node := roachpb.NodeID(1) if nodes.hasNode(node) { t.Error("node wasn't added and should not be valid") } // Add node and verify it's valid. nodes.addNode(node) if !nodes.hasNode(node) { t.Error("empty node wasn't added and should not be valid") } }
// New creates an instance of a gossip node. // The higher level manages the NodeIDContainer instance (which can be shared by // various server components). The ambient context is expected to already // contain the node ID. func New( ambient log.AmbientContext, nodeID *base.NodeIDContainer, rpcContext *rpc.Context, grpcServer *grpc.Server, resolvers []resolver.Resolver, stopper *stop.Stopper, registry *metric.Registry, ) *Gossip { ambient.SetEventLog("gossip", "gossip") g := &Gossip{ server: newServer(ambient, nodeID, stopper, registry), Connected: make(chan struct{}), rpcContext: rpcContext, outgoing: makeNodeSet(minPeers, metric.NewGauge(MetaConnectionsOutgoingGauge)), bootstrapping: map[string]struct{}{}, disconnected: make(chan *client, 10), stalledCh: make(chan struct{}, 1), stallInterval: defaultStallInterval, bootstrapInterval: defaultBootstrapInterval, cullInterval: defaultCullInterval, nodeDescs: map[roachpb.NodeID]*roachpb.NodeDescriptor{}, resolverAddrs: map[util.UnresolvedAddr]resolver.Resolver{}, bootstrapAddrs: map[util.UnresolvedAddr]roachpb.NodeID{}, } stopper.AddCloser(stop.CloserFn(g.server.AmbientContext.FinishEventLog)) registry.AddMetric(g.outgoing.gauge) g.clientsMu.breakers = map[string]*circuit.Breaker{} resolverAddrs := make([]string, len(resolvers)) for i, resolver := range resolvers { resolverAddrs[i] = resolver.Addr() } ctx := g.AnnotateCtx(context.Background()) if log.V(1) { log.Infof(ctx, "initial resolvers: %v", resolverAddrs) } g.SetResolvers(resolvers) g.mu.Lock() // Add ourselves as a SystemConfig watcher. g.mu.is.registerCallback(KeySystemConfig, g.updateSystemConfig) // Add ourselves as a node descriptor watcher. g.mu.is.registerCallback(MakePrefixPattern(KeyNodeIDPrefix), g.updateNodeAddress) g.mu.Unlock() RegisterGossipServer(grpcServer, g.server) return g }
func TestNodeSetAsSlice(t *testing.T) { defer leaktest.AfterTest(t)() nodes := makeNodeSet(2, metric.NewGauge(metric.Metadata{Name: ""})) node0 := roachpb.NodeID(1) node1 := roachpb.NodeID(2) nodes.addNode(node0) nodes.addNode(node1) nodeArr := nodes.asSlice() if len(nodeArr) != 2 { t.Error("expected slice of length 2:", nodeArr) } if (nodeArr[0] != node0 && nodeArr[0] != node1) || (nodeArr[1] != node1 && nodeArr[1] != node0) { t.Error("expected slice to contain both node0 and node1:", nodeArr) } }
func TestNodeSetAddAndRemoveNode(t *testing.T) { defer leaktest.AfterTest(t)() nodes := makeNodeSet(2, metric.NewGauge(metric.Metadata{Name: ""})) node0 := roachpb.NodeID(1) node1 := roachpb.NodeID(2) nodes.addNode(node0) nodes.addNode(node1) if !nodes.hasNode(node0) || !nodes.hasNode(node1) { t.Error("failed to locate added nodes") } nodes.removeNode(node0) if nodes.hasNode(node0) || !nodes.hasNode(node1) { t.Error("failed to remove node0", nodes) } nodes.removeNode(node1) if nodes.hasNode(node0) || nodes.hasNode(node1) { t.Error("failed to remove node1", nodes) } }
// TestMetricsRecorder verifies that the metrics recorder properly formats the // statistics from various registries, both for Time Series and for Status // Summaries. func TestMetricsRecorder(t *testing.T) { defer leaktest.AfterTest(t)() // ======================================== // Construct a series of fake descriptors for use in test. // ======================================== nodeDesc := roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(1), } storeDesc1 := roachpb.StoreDescriptor{ StoreID: roachpb.StoreID(1), Capacity: roachpb.StoreCapacity{ Capacity: 100, Available: 50, }, } storeDesc2 := roachpb.StoreDescriptor{ StoreID: roachpb.StoreID(2), Capacity: roachpb.StoreCapacity{ Capacity: 200, Available: 75, }, } // ======================================== // Create registries and add them to the recorder (two node-level, two // store-level). // ======================================== reg1 := metric.NewRegistry() store1 := fakeStore{ storeID: roachpb.StoreID(1), desc: storeDesc1, registry: metric.NewRegistry(), } store2 := fakeStore{ storeID: roachpb.StoreID(2), desc: storeDesc2, registry: metric.NewRegistry(), } manual := hlc.NewManualClock(100) recorder := NewMetricsRecorder(hlc.NewClock(manual.UnixNano, time.Nanosecond)) recorder.AddStore(store1) recorder.AddStore(store2) recorder.AddNode(reg1, nodeDesc, 50) // Ensure the metric system's view of time does not advance during this test // as the test expects time to not advance too far which would age the actual // data (e.g. in histogram's) unexpectedly. defer metric.TestingSetNow(func() time.Time { return time.Unix(0, manual.UnixNano()).UTC() })() // ======================================== // Generate Metrics Data & Expected Results // ======================================== // Flatten the four registries into an array for ease of use. regList := []struct { reg *metric.Registry prefix string source int64 isNode bool }{ { reg: reg1, prefix: "one.", source: 1, isNode: true, }, { reg: reg1, prefix: "two.", source: 1, isNode: true, }, { reg: store1.registry, prefix: "", source: int64(store1.storeID), isNode: false, }, { reg: store2.registry, prefix: "", source: int64(store2.storeID), isNode: false, }, } // Every registry will have a copy of the following metrics. metricNames := []struct { name string typ string val int64 }{ {"testGauge", "gauge", 20}, {"testGaugeFloat64", "floatgauge", 20}, {"testCounter", "counter", 5}, {"testCounterWithRates", "counterwithrates", 2}, {"testHistogram", "histogram", 10}, {"testLatency", "latency", 10}, // Stats needed for store summaries. {"ranges", "counter", 1}, {"replicas.leaders", "gauge", 1}, {"replicas.leaseholders", "gauge", 1}, {"ranges", "gauge", 1}, {"ranges.available", "gauge", 1}, } // Add the metrics to each registry and set their values. At the same time, // generate expected time series results and status summary metric values. var expected []tspb.TimeSeriesData expectedNodeSummaryMetrics := make(map[string]float64) expectedStoreSummaryMetrics := make(map[string]float64) // addExpected generates expected data for a single metric data point. addExpected := func(prefix, name string, source, time, val int64, isNode bool) { // Generate time series data. tsPrefix := "cr.node." if !isNode { tsPrefix = "cr.store." } expect := tspb.TimeSeriesData{ Name: tsPrefix + prefix + name, Source: strconv.FormatInt(source, 10), Datapoints: []tspb.TimeSeriesDatapoint{ { TimestampNanos: time, Value: float64(val), }, }, } expected = append(expected, expect) // Generate status summary data. if isNode { expectedNodeSummaryMetrics[prefix+name] = float64(val) } else { // This can overwrite the previous value, but this is expected as // all stores in our tests have identical values; when comparing // status summaries, the same map is used as expected data for all // stores. expectedStoreSummaryMetrics[prefix+name] = float64(val) } } for _, reg := range regList { for _, data := range metricNames { switch data.typ { case "gauge": g := metric.NewGauge(metric.Metadata{Name: reg.prefix + data.name}) reg.reg.AddMetric(g) g.Update(data.val) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "floatgauge": g := metric.NewGaugeFloat64(metric.Metadata{Name: reg.prefix + data.name}) reg.reg.AddMetric(g) g.Update(float64(data.val)) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "counter": c := metric.NewCounter(metric.Metadata{Name: reg.prefix + data.name}) reg.reg.AddMetric(c) c.Inc((data.val)) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "counterwithrates": r := metric.NewCounterWithRates(metric.Metadata{Name: reg.prefix + data.name}) reg.reg.AddMetric(r) r.Inc(data.val) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "histogram": h := metric.NewHistogram(metric.Metadata{Name: reg.prefix + data.name}, time.Second, 1000, 2) reg.reg.AddMetric(h) h.RecordValue(data.val) for _, q := range recordHistogramQuantiles { addExpected(reg.prefix, data.name+q.suffix, reg.source, 100, data.val, reg.isNode) } case "latency": l := metric.NewLatency(metric.Metadata{Name: reg.prefix + data.name}, time.Hour) reg.reg.AddMetric(l) l.RecordValue(data.val) // Latency is simply three histograms (at different resolution // time scales). for _, q := range recordHistogramQuantiles { addExpected(reg.prefix, data.name+q.suffix, reg.source, 100, data.val, reg.isNode) } default: t.Fatalf("unexpected: %+v", data) } } } // ======================================== // Verify time series data // ======================================== actual := recorder.GetTimeSeriesData() // Actual comparison is simple: sort the resulting arrays by time and name, // and use reflect.DeepEqual. sort.Sort(byTimeAndName(actual)) sort.Sort(byTimeAndName(expected)) if a, e := actual, expected; !reflect.DeepEqual(a, e) { t.Errorf("recorder did not yield expected time series collection; diff:\n %v", pretty.Diff(e, a)) } // ======================================== // Verify node summary generation // ======================================== expectedNodeSummary := &NodeStatus{ Desc: nodeDesc, BuildInfo: build.GetInfo(), StartedAt: 50, UpdatedAt: 100, Metrics: expectedNodeSummaryMetrics, StoreStatuses: []StoreStatus{ { Desc: storeDesc1, Metrics: expectedStoreSummaryMetrics, }, { Desc: storeDesc2, Metrics: expectedStoreSummaryMetrics, }, }, } nodeSummary := recorder.GetStatusSummary() if nodeSummary == nil { t.Fatalf("recorder did not return nodeSummary") } sort.Sort(byStoreDescID(nodeSummary.StoreStatuses)) if a, e := nodeSummary, expectedNodeSummary; !reflect.DeepEqual(a, e) { t.Errorf("recorder did not produce expected NodeSummary; diff:\n %v", pretty.Diff(e, a)) } }
func newStoreMetrics(sampleInterval time.Duration) *StoreMetrics { storeRegistry := metric.NewRegistry() sm := &StoreMetrics{ registry: storeRegistry, // Replica metrics. ReplicaCount: metric.NewCounter(metaReplicaCount), ReservedReplicaCount: metric.NewCounter(metaReservedReplicaCount), RaftLeaderCount: metric.NewGauge(metaRaftLeaderCount), RaftLeaderNotLeaseHolderCount: metric.NewGauge(metaRaftLeaderNotLeaseHolderCount), LeaseHolderCount: metric.NewGauge(metaLeaseHolderCount), QuiescentCount: metric.NewGauge(metaQuiescentCount), // Replica CommandQueue metrics. MaxCommandQueueSize: metric.NewGauge(metaMaxCommandQueueSize), MaxCommandQueueWriteCount: metric.NewGauge(metaMaxCommandQueueWriteCount), MaxCommandQueueReadCount: metric.NewGauge(metaMaxCommandQueueReadCount), MaxCommandQueueTreeSize: metric.NewGauge(metaMaxCommandQueueTreeSize), MaxCommandQueueOverlaps: metric.NewGauge(metaMaxCommandQueueOverlaps), CombinedCommandQueueSize: metric.NewGauge(metaCombinedCommandQueueSize), CombinedCommandWriteCount: metric.NewGauge(metaCombinedCommandWriteCount), CombinedCommandReadCount: metric.NewGauge(metaCombinedCommandReadCount), // Range metrics. RangeCount: metric.NewGauge(metaRangeCount), UnavailableRangeCount: metric.NewGauge(metaUnavailableRangeCount), UnderReplicatedRangeCount: metric.NewGauge(metaUnderReplicatedRangeCount), // Lease request metrics. LeaseRequestSuccessCount: metric.NewCounter(metaLeaseRequestSuccessCount), LeaseRequestErrorCount: metric.NewCounter(metaLeaseRequestErrorCount), // Storage metrics. LiveBytes: metric.NewGauge(metaLiveBytes), KeyBytes: metric.NewGauge(metaKeyBytes), ValBytes: metric.NewGauge(metaValBytes), IntentBytes: metric.NewGauge(metaIntentBytes), LiveCount: metric.NewGauge(metaLiveCount), KeyCount: metric.NewGauge(metaKeyCount), ValCount: metric.NewGauge(metaValCount), IntentCount: metric.NewGauge(metaIntentCount), IntentAge: metric.NewGauge(metaIntentAge), GcBytesAge: metric.NewGauge(metaGcBytesAge), LastUpdateNanos: metric.NewGauge(metaLastUpdateNanos), Capacity: metric.NewGauge(metaCapacity), Available: metric.NewGauge(metaAvailable), Reserved: metric.NewCounter(metaReserved), SysBytes: metric.NewGauge(metaSysBytes), SysCount: metric.NewGauge(metaSysCount), // RocksDB metrics. RdbBlockCacheHits: metric.NewGauge(metaRdbBlockCacheHits), RdbBlockCacheMisses: metric.NewGauge(metaRdbBlockCacheMisses), RdbBlockCacheUsage: metric.NewGauge(metaRdbBlockCacheUsage), RdbBlockCachePinnedUsage: metric.NewGauge(metaRdbBlockCachePinnedUsage), RdbBloomFilterPrefixChecked: metric.NewGauge(metaRdbBloomFilterPrefixChecked), RdbBloomFilterPrefixUseful: metric.NewGauge(metaRdbBloomFilterPrefixUseful), RdbMemtableHits: metric.NewGauge(metaRdbMemtableHits), RdbMemtableMisses: metric.NewGauge(metaRdbMemtableMisses), RdbMemtableTotalSize: metric.NewGauge(metaRdbMemtableTotalSize), RdbFlushes: metric.NewGauge(metaRdbFlushes), RdbCompactions: metric.NewGauge(metaRdbCompactions), RdbTableReadersMemEstimate: metric.NewGauge(metaRdbTableReadersMemEstimate), RdbReadAmplification: metric.NewGauge(metaRdbReadAmplification), RdbNumSSTables: metric.NewGauge(metaRdbNumSSTables), // Range event metrics. RangeSplits: metric.NewCounter(metaRangeSplits), RangeAdds: metric.NewCounter(metaRangeAdds), RangeRemoves: metric.NewCounter(metaRangeRemoves), RangeSnapshotsGenerated: metric.NewCounter(metaRangeSnapshotsGenerated), RangeSnapshotsNormalApplied: metric.NewCounter(metaRangeSnapshotsNormalApplied), RangeSnapshotsPreemptiveApplied: metric.NewCounter(metaRangeSnapshotsPreemptiveApplied), // Raft processing metrics. RaftTicks: metric.NewCounter(metaRaftTicks), RaftWorkingDurationNanos: metric.NewCounter(metaRaftWorkingDurationNanos), RaftTickingDurationNanos: metric.NewCounter(metaRaftTickingDurationNanos), // Raft message metrics. RaftRcvdMsgProp: metric.NewCounter(metaRaftRcvdProp), RaftRcvdMsgApp: metric.NewCounter(metaRaftRcvdApp), RaftRcvdMsgAppResp: metric.NewCounter(metaRaftRcvdAppResp), RaftRcvdMsgVote: metric.NewCounter(metaRaftRcvdVote), RaftRcvdMsgVoteResp: metric.NewCounter(metaRaftRcvdVoteResp), RaftRcvdMsgPreVote: metric.NewCounter(metaRaftRcvdPreVote), RaftRcvdMsgPreVoteResp: metric.NewCounter(metaRaftRcvdPreVoteResp), RaftRcvdMsgSnap: metric.NewCounter(metaRaftRcvdSnap), RaftRcvdMsgHeartbeat: metric.NewCounter(metaRaftRcvdHeartbeat), RaftRcvdMsgHeartbeatResp: metric.NewCounter(metaRaftRcvdHeartbeatResp), RaftRcvdMsgTransferLeader: metric.NewCounter(metaRaftRcvdTransferLeader), RaftRcvdMsgTimeoutNow: metric.NewCounter(metaRaftRcvdTimeoutNow), RaftRcvdMsgDropped: metric.NewCounter(metaRaftRcvdDropped), raftRcvdMessages: make(map[raftpb.MessageType]*metric.Counter, len(raftpb.MessageType_name)), RaftEnqueuedPending: metric.NewGauge(metaRaftEnqueuedPending), // This Gauge measures the number of heartbeats queued up just before // the queue is cleared, to avoid flapping wildly. RaftCoalescedHeartbeatsPending: metric.NewGauge(metaRaftCoalescedHeartbeatsPending), // Replica queue metrics. GCQueueSuccesses: metric.NewCounter(metaGCQueueSuccesses), GCQueueFailures: metric.NewCounter(metaGCQueueFailures), GCQueuePending: metric.NewGauge(metaGCQueuePending), GCQueueProcessingNanos: metric.NewCounter(metaGCQueueProcessingNanos), RaftLogQueueSuccesses: metric.NewCounter(metaRaftLogQueueSuccesses), RaftLogQueueFailures: metric.NewCounter(metaRaftLogQueueFailures), RaftLogQueuePending: metric.NewGauge(metaRaftLogQueuePending), RaftLogQueueProcessingNanos: metric.NewCounter(metaRaftLogQueueProcessingNanos), ConsistencyQueueSuccesses: metric.NewCounter(metaConsistencyQueueSuccesses), ConsistencyQueueFailures: metric.NewCounter(metaConsistencyQueueFailures), ConsistencyQueuePending: metric.NewGauge(metaConsistencyQueuePending), ConsistencyQueueProcessingNanos: metric.NewCounter(metaConsistencyQueueProcessingNanos), ReplicaGCQueueSuccesses: metric.NewCounter(metaReplicaGCQueueSuccesses), ReplicaGCQueueFailures: metric.NewCounter(metaReplicaGCQueueFailures), ReplicaGCQueuePending: metric.NewGauge(metaReplicaGCQueuePending), ReplicaGCQueueProcessingNanos: metric.NewCounter(metaReplicaGCQueueProcessingNanos), ReplicateQueueSuccesses: metric.NewCounter(metaReplicateQueueSuccesses), ReplicateQueueFailures: metric.NewCounter(metaReplicateQueueFailures), ReplicateQueuePending: metric.NewGauge(metaReplicateQueuePending), ReplicateQueueProcessingNanos: metric.NewCounter(metaReplicateQueueProcessingNanos), ReplicateQueuePurgatory: metric.NewGauge(metaReplicateQueuePurgatory), SplitQueueSuccesses: metric.NewCounter(metaSplitQueueSuccesses), SplitQueueFailures: metric.NewCounter(metaSplitQueueFailures), SplitQueuePending: metric.NewGauge(metaSplitQueuePending), SplitQueueProcessingNanos: metric.NewCounter(metaSplitQueueProcessingNanos), TimeSeriesMaintenanceQueueSuccesses: metric.NewCounter(metaTimeSeriesMaintenanceQueueFailures), TimeSeriesMaintenanceQueueFailures: metric.NewCounter(metaTimeSeriesMaintenanceQueueSuccesses), TimeSeriesMaintenanceQueuePending: metric.NewGauge(metaTimeSeriesMaintenanceQueuePending), TimeSeriesMaintenanceQueueProcessingNanos: metric.NewCounter(metaTimeSeriesMaintenanceQueueProcessingNanos), // GCInfo cumulative totals. GCNumKeysAffected: metric.NewCounter(metaGCNumKeysAffected), GCIntentsConsidered: metric.NewCounter(metaGCIntentsConsidered), GCIntentTxns: metric.NewCounter(metaGCIntentTxns), GCTransactionSpanScanned: metric.NewCounter(metaGCTransactionSpanScanned), GCTransactionSpanGCAborted: metric.NewCounter(metaGCTransactionSpanGCAborted), GCTransactionSpanGCCommitted: metric.NewCounter(metaGCTransactionSpanGCCommitted), GCTransactionSpanGCPending: metric.NewCounter(metaGCTransactionSpanGCPending), GCAbortSpanScanned: metric.NewCounter(metaGCAbortSpanScanned), GCAbortSpanConsidered: metric.NewCounter(metaGCAbortSpanConsidered), GCAbortSpanGCNum: metric.NewCounter(metaGCAbortSpanGCNum), GCPushTxn: metric.NewCounter(metaGCPushTxn), GCResolveTotal: metric.NewCounter(metaGCResolveTotal), GCResolveSuccess: metric.NewCounter(metaGCResolveSuccess), // Mutex timing. // // TODO(tschottdorf): Histograms don't work very well as they were // inherently built in a windowed (i.e. events-discarding) way, which // is not at all the correct way. Discard at one-minute interval which // gives sane (though mathematically nonsensical) results when exposed // at the moment. MuReplicaNanos: metric.NewHistogram( metaMuReplicaNanos, sampleInterval, time.Second.Nanoseconds(), 1, ), MuCommandQueueNanos: metric.NewHistogram( metaMuCommandQueueNanos, sampleInterval, time.Second.Nanoseconds(), 1, ), MuRaftNanos: metric.NewHistogram( metaMuRaftNanos, sampleInterval, time.Second.Nanoseconds(), 1, ), MuStoreNanos: metric.NewHistogram( metaMuStoreNanos, sampleInterval, time.Second.Nanoseconds(), 1, ), MuSchedulerNanos: metric.NewHistogram( metaMuSchedulerNanos, time.Minute, time.Second.Nanoseconds(), 1, ), } sm.raftRcvdMessages[raftpb.MsgProp] = sm.RaftRcvdMsgProp sm.raftRcvdMessages[raftpb.MsgApp] = sm.RaftRcvdMsgApp sm.raftRcvdMessages[raftpb.MsgAppResp] = sm.RaftRcvdMsgAppResp sm.raftRcvdMessages[raftpb.MsgVote] = sm.RaftRcvdMsgVote sm.raftRcvdMessages[raftpb.MsgVoteResp] = sm.RaftRcvdMsgVoteResp sm.raftRcvdMessages[raftpb.MsgPreVote] = sm.RaftRcvdMsgPreVote sm.raftRcvdMessages[raftpb.MsgPreVoteResp] = sm.RaftRcvdMsgPreVoteResp sm.raftRcvdMessages[raftpb.MsgSnap] = sm.RaftRcvdMsgSnap sm.raftRcvdMessages[raftpb.MsgHeartbeat] = sm.RaftRcvdMsgHeartbeat sm.raftRcvdMessages[raftpb.MsgHeartbeatResp] = sm.RaftRcvdMsgHeartbeatResp sm.raftRcvdMessages[raftpb.MsgTransferLeader] = sm.RaftRcvdMsgTransferLeader sm.raftRcvdMessages[raftpb.MsgTimeoutNow] = sm.RaftRcvdMsgTimeoutNow storeRegistry.AddMetricStruct(sm) return sm }
// MakeRuntimeStatSampler constructs a new RuntimeStatSampler object. func MakeRuntimeStatSampler(clock *hlc.Clock) RuntimeStatSampler { // Construct the build info metric. It is constant. // We first build set the labels on the metadata. info := build.GetInfo() timestamp, err := info.Timestamp() if err != nil { // We can't panic here, tests don't have a build timestamp. log.Warningf(context.TODO(), "Could not parse build timestamp: %v", err) } metaBuildTimestamp.AddLabel("tag", info.Tag) metaBuildTimestamp.AddLabel("go_version", info.GoVersion) buildTimestamp := metric.NewGauge(metaBuildTimestamp) buildTimestamp.Update(timestamp) return RuntimeStatSampler{ clock: clock, startTimeNanos: clock.PhysicalNow(), CgoCalls: metric.NewGauge(metaCgoCalls), Goroutines: metric.NewGauge(metaGoroutines), GoAllocBytes: metric.NewGauge(metaGoAllocBytes), GoTotalBytes: metric.NewGauge(metaGoTotalBytes), CgoAllocBytes: metric.NewGauge(metaCgoAllocBytes), CgoTotalBytes: metric.NewGauge(metaCgoTotalBytes), GcCount: metric.NewGauge(metaGCCount), GcPauseNS: metric.NewGauge(metaGCPauseNS), GcPausePercent: metric.NewGaugeFloat64(metaGCPausePercent), CPUUserNS: metric.NewGauge(metaCPUUserNS), CPUUserPercent: metric.NewGaugeFloat64(metaCPUUserPercent), CPUSysNS: metric.NewGauge(metaCPUSysNS), CPUSysPercent: metric.NewGaugeFloat64(metaCPUSysPercent), Rss: metric.NewGauge(metaRSS), FDOpen: metric.NewGauge(metaFDOpen), FDSoftLimit: metric.NewGauge(metaFDSoftLimit), Uptime: metric.NewGauge(metaUptime), BuildTimestamp: buildTimestamp, } }