func TestPrometheusCollector(t *testing.T) { c := NewPrometheusCollector(testSubcontainersInfoProvider{}, func(name string) map[string]string { return map[string]string{ "zone.name": "hello", } }) prometheus.MustRegister(c) defer prometheus.Unregister(c) rw := httptest.NewRecorder() prometheus.Handler().ServeHTTP(rw, &http.Request{}) metricsFile := "testdata/prometheus_metrics" wantMetrics, err := ioutil.ReadFile(metricsFile) if err != nil { t.Fatalf("unable to read input test file %s", metricsFile) } wantLines := strings.Split(string(wantMetrics), "\n") gotLines := strings.Split(string(rw.Body.String()), "\n") // Until the Prometheus Go client library offers better testability // (https://github.com/prometheus/client_golang/issues/58), we simply compare // verbatim text-format metrics outputs, but ignore certain metric lines // whose value depends on the current time or local circumstances. for i, want := range wantLines { if !includeRe.MatchString(want) || ignoreRe.MatchString(want) { continue } if want != gotLines[i] { t.Fatalf("want %s, got %s", want, gotLines[i]) } } }
func TestServer(t *testing.T) { ts := int(time.Now().Unix()) storageToTest := []string{ fmt.Sprintf("local:./test-%d.db", ts), fmt.Sprintf("rethinkdb:localhost:28015/annotst%d", ts), } for _, storage := range storageToTest { log.Printf("testing storage: %s", storage) s := NewSetup(t, storage) s.testAddAndQuery() s.testDefaultValues() s.testTagStats() s.testBrokenJSON() s.testMetrics() s.testAllTags() s.testAll() s.Server.Close() s.Ctx.storage.Cleanup() prometheus.Unregister(s.Ctx) } }
func (reg *MetricRegistry) Unregister(name string) { if metric := reg.metrics[name]; metric != nil { log.Infof("metric unregistered;name:<%s>", name) prometheus.Unregister(metric) delete(reg.metrics, name) } }
// NewSensors creates new sensors from a raw config func NewSensors(raw []interface{}) ([]*Sensor, error) { var sensors []*Sensor if err := utils.DecodeRaw(raw, &sensors); err != nil { return nil, fmt.Errorf("Sensor configuration error: %v", err) } for _, s := range sensors { check, err := commands.NewCommand(s.CheckExec, s.Timeout) if err != nil { return nil, fmt.Errorf("could not parse check in sensor %s: %s", s.Name, err) } check.Name = fmt.Sprintf("%s.sensor", s.Name) s.checkCmd = check // the prometheus client lib's API here is baffling... they don't expose // an interface or embed their Opts type in each of the Opts "subtypes", // so we can't share the initialization. switch { case s.Type == "counter": s.collector = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: s.Namespace, Subsystem: s.Subsystem, Name: s.Name, Help: s.Help, }) case s.Type == "gauge": s.collector = prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: s.Namespace, Subsystem: s.Subsystem, Name: s.Name, Help: s.Help, }) case s.Type == "histogram": s.collector = prometheus.NewHistogram(prometheus.HistogramOpts{ Namespace: s.Namespace, Subsystem: s.Subsystem, Name: s.Name, Help: s.Help, }) case s.Type == "summary": s.collector = prometheus.NewSummary(prometheus.SummaryOpts{ Namespace: s.Namespace, Subsystem: s.Subsystem, Name: s.Name, Help: s.Help, }) default: return nil, fmt.Errorf("invalid sensor type: %s", s.Type) } // we're going to unregister before every attempt to register // so that we can reload config prometheus.Unregister(s.collector) if err := prometheus.Register(s.collector); err != nil { return nil, err } } return sensors, nil }
func TestPrometheusCollector(t *testing.T) { c := NewPrometheusCollector(testSubcontainersInfoProvider{}, func(container *info.ContainerInfo) map[string]string { s := DefaultContainerLabels(container) s["zone.name"] = "hello" return s }) prometheus.MustRegister(c) defer prometheus.Unregister(c) testPrometheusCollector(t, c, "testdata/prometheus_metrics") }
func TestPrometheusCollector(t *testing.T) { c := NewPrometheusCollector(testSubcontainersInfoProvider{}, func(name string) map[string]string { return map[string]string{ "zone.name": "hello", } }) prometheus.MustRegister(c) defer prometheus.Unregister(c) testPrometheusCollector(t, c, "testdata/prometheus_metrics") }
func (h *handler) Stop() { if h.advertiser != nil { h.advertiser.stop() } if listener := h.listener; listener != nil { h.listener = nil if err := listener.Close(); err != nil { h.errorSink.Post(err) } } for _, c := range h.collectors() { prom.Unregister(c) } }
func TestPrometheusCollector_scrapeFailure(t *testing.T) { provider := &erroringSubcontainersInfoProvider{ successfulProvider: testSubcontainersInfoProvider{}, shouldFail: true, } c := NewPrometheusCollector(provider, func(name string) map[string]string { return map[string]string{ "zone.name": "hello", } }) prometheus.MustRegister(c) defer prometheus.Unregister(c) testPrometheusCollector(t, c, "testdata/prometheus_metrics_failure") provider.shouldFail = false testPrometheusCollector(t, c, "testdata/prometheus_metrics") }
func TestPrometheusCollector_scrapeFailure(t *testing.T) { provider := &erroringSubcontainersInfoProvider{ successfulProvider: testSubcontainersInfoProvider{}, shouldFail: true, } c := NewPrometheusCollector(provider, func(container *info.ContainerInfo) map[string]string { s := DefaultContainerLabels(container) s["zone.name"] = "hello" return s }) prometheus.MustRegister(c) defer prometheus.Unregister(c) testPrometheusCollector(t, c, "testdata/prometheus_metrics_failure") provider.shouldFail = false testPrometheusCollector(t, c, "testdata/prometheus_metrics") }
func export(json string) ([]byte, error) { exporter := NewExporter(&testScraper{json}) prometheus.MustRegister(exporter) defer prometheus.Unregister(exporter) server := httptest.NewServer(prometheus.UninstrumentedHandler()) defer server.Close() response, err := http.Get(server.URL) if err != nil { return nil, err } defer response.Body.Close() body, err := ioutil.ReadAll(response.Body) if err != nil { return nil, err } return body, nil }
func (s *OortValueStore) start() { s.vs = nil runtime.GC() log.Println("LocalID appears to be:", s.oort.GetLocalID()) var err error s.msgRing, err = ring.NewTCPMsgRing(&s.TCPMsgRingConfig) if err != nil { panic(err) } s.ValueStoreConfig.MsgRing = s.msgRing s.msgRing.SetRing(s.oort.Ring()) var restartChan chan error s.vs, restartChan = store.NewValueStore(&s.ValueStoreConfig) // TODO: I'm guessing we'll want to do something more graceful here; but // this will work for now since Systemd (or another service manager) should // restart the service. go func(restartChan chan error) { if err := <-restartChan; err != nil { panic(err) } }(restartChan) if err := s.vs.Startup(context.Background()); err != nil { panic(err) } go func(t *ring.TCPMsgRing) { t.Listen() log.Println("TCPMsgRing Listen() returned, shutdown?") }(s.msgRing) go func(t *ring.TCPMsgRing) { mRingChanges := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "RingChanges", Help: "Number of received ring changes.", }) mRingChangeCloses := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "RingChangeCloses", Help: "Number of connections closed due to ring changes.", }) mMsgToNodes := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "MsgToNodes", Help: "Number of times MsgToNode function has been called; single message to single node.", }) mMsgToNodeNoRings := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "MsgToNodeNoRings", Help: "Number of times MsgToNode function has been called with no ring yet available.", }) mMsgToNodeNoNodes := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "MsgToNodeNoNodes", Help: "Number of times MsgToNode function has been called with no matching node.", }) mMsgToOtherReplicas := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "MsgToOtherReplicas", Help: "Number of times MsgToOtherReplicas function has been called; single message to all replicas, excluding the local replica if responsible.", }) mMsgToOtherReplicasNoRings := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "MsgToOtherReplicasNoRings", Help: "Number of times MsgToOtherReplicas function has been called with no ring yet available.", }) mListenErrors := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "ListenErrors", Help: "Number of errors trying to establish a TCP listener.", }) mIncomingConnections := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "IncomingConnections", Help: "Number of incoming TCP connections made.", }) mDials := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "Dials", Help: "Number of attempts to establish outgoing TCP connections.", }) mDialErrors := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "DialErrors", Help: "Number of errors trying to establish outgoing TCP connections.", }) mOutgoingConnections := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "OutgoingConnections", Help: "Number of outgoing TCP connections established.", }) mMsgChanCreations := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "MsgChanCreations", Help: "Number of internal message channels created.", }) mMsgToAddrs := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "MsgToAddrs", Help: "Number times internal function msgToAddr has been called.", }) mMsgToAddrQueues := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "MsgToAddrQueues", Help: "Number of messages msgToAddr successfully queued.", }) mMsgToAddrTimeoutDrops := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "MsgToAddrTimeoutDrops", Help: "Number of messages msgToAddr dropped after timeout.", }) mMsgToAddrShutdownDrops := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "MsgToAddrShutdownDrops", Help: "Number of messages msgToAddr dropped due to a shutdown.", }) mMsgReads := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "MsgReads", Help: "Number of incoming messages read.", }) mMsgReadErrors := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "MsgReadErrors", Help: "Number of errors reading incoming messages.", }) mMsgWrites := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "MsgWrites", Help: "Number of outgoing messages written.", }) mMsgWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "TCPMsgRing", Name: "MsgWriteErrors", Help: "Number of errors writing outgoing messages.", }) mValues := prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: "Store", Name: "Values", Help: "Current number of values stored.", }) mValueBytes := prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: "Store", Name: "ValueBytes", Help: "Current number of bytes for the values stored.", }) mLookups := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "Lookups", Help: "Count of lookup requests executed.", }) mLookupErrors := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "LookupErrors", Help: "Count of lookup requests executed resulting in errors.", }) mReads := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "Reads", Help: "Count of read requests executed.", }) mReadErrors := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "ReadErrors", Help: "Count of read requests executed resulting in errors.", }) mWrites := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "Writes", Help: "Count of write requests executed.", }) mWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "WriteErrors", Help: "Count of write requests executed resulting in errors.", }) mWritesOverridden := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "WritesOverridden", Help: "Count of write requests that were outdated or repeated.", }) mDeletes := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "Deletes", Help: "Count of delete requests executed.", }) mDeleteErrors := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "DeleteErrors", Help: "Count of delete requests executed resulting in errors.", }) mDeletesOverridden := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "DeletesOverridden", Help: "Count of delete requests that were outdated or repeated.", }) mOutBulkSets := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "OutBulkSets", Help: "Count of outgoing bulk-set messages in response to incoming pull replication messages.", }) mOutBulkSetValues := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "OutBulkSetValues", Help: "Count of values in outgoing bulk-set messages; these bulk-set messages are those in response to incoming pull-replication messages.", }) mOutBulkSetPushes := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "OutBulkSetPushes", Help: "Count of outgoing bulk-set messages due to push replication.", }) mOutBulkSetPushValues := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "OutBulkSetPushValues", Help: "Count of values in outgoing bulk-set messages; these bulk-set messages are those due to push replication.", }) mInBulkSets := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "InBulkSets", Help: "Count of incoming bulk-set messages.", }) mInBulkSetDrops := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "InBulkSetDrops", Help: "Count of incoming bulk-set messages dropped due to the local system being overworked at the time.", }) mInBulkSetInvalids := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "InBulkSetInvalids", Help: "Count of incoming bulk-set messages that couldn't be parsed.", }) mInBulkSetWrites := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "InBulkSetWrites", Help: "Count of writes due to incoming bulk-set messages.", }) mInBulkSetWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "InBulkSetWriteErrors", Help: "Count of errors returned from writes due to incoming bulk-set messages.", }) mInBulkSetWritesOverridden := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "InBulkSetWritesOverridden", Help: "Count of writes from incoming bulk-set messages that result in no change.", }) mOutBulkSetAcks := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "OutBulkSetAcks", Help: "Count of outgoing bulk-set-ack messages.", }) mInBulkSetAcks := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "InBulkSetAcks", Help: "Count of incoming bulk-set-ack messages.", }) mInBulkSetAckDrops := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "InBulkSetAckDrops", Help: "Count of incoming bulk-set-ack messages dropped due to the local system being overworked at the time.", }) mInBulkSetAckInvalids := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "InBulkSetAckInvalids", Help: "Count of incoming bulk-set-ack messages that couldn't be parsed.", }) mInBulkSetAckWrites := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "InBulkSetAckWrites", Help: "Count of writes (for local removal) due to incoming bulk-set-ack messages.", }) mInBulkSetAckWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "InBulkSetAckWriteErrors", Help: "Count of errors returned from writes due to incoming bulk-set-ack messages.", }) mInBulkSetAckWritesOverridden := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "InBulkSetAckWritesOverridden", Help: "Count of writes from incoming bulk-set-ack messages that result in no change.", }) mOutPullReplications := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "OutPullReplications", Help: "Count of outgoing pull-replication messages.", }) mOutPullReplicationSeconds := prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: "Store", Name: "OutPullReplicationSeconds", Help: "How long the last out pull replication pass took.", }) mInPullReplications := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "InPullReplications", Help: "Count of incoming pull-replication messages.", }) mInPullReplicationDrops := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "InPullReplicationDrops", Help: "Count of incoming pull-replication messages droppped due to the local system being overworked at the time.", }) mInPullReplicationInvalids := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "InPullReplicationInvalids", Help: "Count of incoming pull-replication messages that couldn't be parsed.", }) mExpiredDeletions := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "ExpiredDeletions", Help: "Count of recent deletes that have become old enough to be completely discarded.", }) mCompactions := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "Compactions", Help: "Count of disk file sets compacted due to their contents exceeding a staleness threshold. For example, this happens when enough of the values have been overwritten or deleted in more recent operations.", }) mSmallFileCompactions := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "Store", Name: "SmallFileCompactions", Help: "Count of disk file sets compacted due to the entire file size being too small. For example, this may happen when the store is shutdown and restarted.", }) prometheus.Register(mRingChanges) prometheus.Register(mRingChangeCloses) prometheus.Register(mMsgToNodes) prometheus.Register(mMsgToNodeNoRings) prometheus.Register(mMsgToNodeNoNodes) prometheus.Register(mMsgToOtherReplicas) prometheus.Register(mMsgToOtherReplicasNoRings) prometheus.Register(mListenErrors) prometheus.Register(mIncomingConnections) prometheus.Register(mDials) prometheus.Register(mDialErrors) prometheus.Register(mOutgoingConnections) prometheus.Register(mMsgChanCreations) prometheus.Register(mMsgToAddrs) prometheus.Register(mMsgToAddrQueues) prometheus.Register(mMsgToAddrTimeoutDrops) prometheus.Register(mMsgToAddrShutdownDrops) prometheus.Register(mMsgReads) prometheus.Register(mMsgReadErrors) prometheus.Register(mMsgWrites) prometheus.Register(mMsgWriteErrors) prometheus.Register(mValues) prometheus.Register(mValueBytes) prometheus.Register(mLookups) prometheus.Register(mLookupErrors) prometheus.Register(mReads) prometheus.Register(mReadErrors) prometheus.Register(mWrites) prometheus.Register(mWriteErrors) prometheus.Register(mWritesOverridden) prometheus.Register(mDeletes) prometheus.Register(mDeleteErrors) prometheus.Register(mDeletesOverridden) prometheus.Register(mOutBulkSets) prometheus.Register(mOutBulkSetValues) prometheus.Register(mOutBulkSetPushes) prometheus.Register(mOutBulkSetPushValues) prometheus.Register(mInBulkSets) prometheus.Register(mInBulkSetDrops) prometheus.Register(mInBulkSetInvalids) prometheus.Register(mInBulkSetWrites) prometheus.Register(mInBulkSetWriteErrors) prometheus.Register(mInBulkSetWritesOverridden) prometheus.Register(mOutBulkSetAcks) prometheus.Register(mInBulkSetAcks) prometheus.Register(mInBulkSetAckDrops) prometheus.Register(mInBulkSetAckInvalids) prometheus.Register(mInBulkSetAckWrites) prometheus.Register(mInBulkSetAckWriteErrors) prometheus.Register(mInBulkSetAckWritesOverridden) prometheus.Register(mOutPullReplications) prometheus.Register(mOutPullReplicationSeconds) prometheus.Register(mInPullReplications) prometheus.Register(mInPullReplicationDrops) prometheus.Register(mInPullReplicationInvalids) prometheus.Register(mExpiredDeletions) prometheus.Register(mCompactions) prometheus.Register(mSmallFileCompactions) tcpMsgRingStats := t.Stats(false) for !tcpMsgRingStats.Shutdown { time.Sleep(time.Minute) tcpMsgRingStats = t.Stats(false) mRingChanges.Add(float64(tcpMsgRingStats.RingChanges)) mRingChangeCloses.Add(float64(tcpMsgRingStats.RingChangeCloses)) mMsgToNodes.Add(float64(tcpMsgRingStats.MsgToNodes)) mMsgToNodeNoRings.Add(float64(tcpMsgRingStats.MsgToNodeNoRings)) mMsgToNodeNoNodes.Add(float64(tcpMsgRingStats.MsgToNodeNoNodes)) mMsgToOtherReplicas.Add(float64(tcpMsgRingStats.MsgToOtherReplicas)) mMsgToOtherReplicasNoRings.Add(float64(tcpMsgRingStats.MsgToOtherReplicasNoRings)) mListenErrors.Add(float64(tcpMsgRingStats.ListenErrors)) mIncomingConnections.Add(float64(tcpMsgRingStats.IncomingConnections)) mDials.Add(float64(tcpMsgRingStats.Dials)) mDialErrors.Add(float64(tcpMsgRingStats.DialErrors)) mOutgoingConnections.Add(float64(tcpMsgRingStats.OutgoingConnections)) mMsgChanCreations.Add(float64(tcpMsgRingStats.MsgChanCreations)) mMsgToAddrs.Add(float64(tcpMsgRingStats.MsgToAddrs)) mMsgToAddrQueues.Add(float64(tcpMsgRingStats.MsgToAddrQueues)) mMsgToAddrTimeoutDrops.Add(float64(tcpMsgRingStats.MsgToAddrTimeoutDrops)) mMsgToAddrShutdownDrops.Add(float64(tcpMsgRingStats.MsgToAddrShutdownDrops)) mMsgReads.Add(float64(tcpMsgRingStats.MsgReads)) mMsgReadErrors.Add(float64(tcpMsgRingStats.MsgReadErrors)) mMsgWrites.Add(float64(tcpMsgRingStats.MsgWrites)) mMsgWriteErrors.Add(float64(tcpMsgRingStats.MsgWriteErrors)) stats, err := s.vs.Stats(context.Background(), false) if err != nil { log.Printf("stats error: %s\n", err) } else if s, ok := stats.(*store.ValueStoreStats); ok { mValues.Set(float64(s.Values)) mValueBytes.Set(float64(s.ValueBytes)) mLookups.Add(float64(s.Lookups)) mLookupErrors.Add(float64(s.LookupErrors)) mReads.Add(float64(s.Reads)) mReadErrors.Add(float64(s.ReadErrors)) mWrites.Add(float64(s.Writes)) mWriteErrors.Add(float64(s.WriteErrors)) mWritesOverridden.Add(float64(s.WritesOverridden)) mDeletes.Add(float64(s.Deletes)) mDeleteErrors.Add(float64(s.DeleteErrors)) mDeletesOverridden.Add(float64(s.DeletesOverridden)) mOutBulkSets.Add(float64(s.OutBulkSets)) mOutBulkSetValues.Add(float64(s.OutBulkSetValues)) mOutBulkSetPushes.Add(float64(s.OutBulkSetPushes)) mOutBulkSetPushValues.Add(float64(s.OutBulkSetPushValues)) mInBulkSets.Add(float64(s.InBulkSets)) mInBulkSetDrops.Add(float64(s.InBulkSetDrops)) mInBulkSetInvalids.Add(float64(s.InBulkSetInvalids)) mInBulkSetWrites.Add(float64(s.InBulkSetWrites)) mInBulkSetWriteErrors.Add(float64(s.InBulkSetWriteErrors)) mInBulkSetWritesOverridden.Add(float64(s.InBulkSetWritesOverridden)) mOutBulkSetAcks.Add(float64(s.OutBulkSetAcks)) mInBulkSetAcks.Add(float64(s.InBulkSetAcks)) mInBulkSetAckDrops.Add(float64(s.InBulkSetAckDrops)) mInBulkSetAckInvalids.Add(float64(s.InBulkSetAckInvalids)) mInBulkSetAckWrites.Add(float64(s.InBulkSetAckWrites)) mInBulkSetAckWriteErrors.Add(float64(s.InBulkSetAckWriteErrors)) mInBulkSetAckWritesOverridden.Add(float64(s.InBulkSetAckWritesOverridden)) mOutPullReplications.Add(float64(s.OutPullReplications)) mOutPullReplicationSeconds.Set(float64(s.OutPullReplicationNanoseconds) / 1000000000) mInPullReplications.Add(float64(s.InPullReplications)) mInPullReplicationDrops.Add(float64(s.InPullReplicationDrops)) mInPullReplicationInvalids.Add(float64(s.InPullReplicationInvalids)) mExpiredDeletions.Add(float64(s.ExpiredDeletions)) mCompactions.Add(float64(s.Compactions)) mSmallFileCompactions.Add(float64(s.SmallFileCompactions)) } else { log.Printf("%s\n", stats) } } prometheus.Unregister(mRingChanges) prometheus.Unregister(mRingChangeCloses) prometheus.Unregister(mMsgToNodes) prometheus.Unregister(mMsgToNodeNoRings) prometheus.Unregister(mMsgToNodeNoNodes) prometheus.Unregister(mMsgToOtherReplicas) prometheus.Unregister(mMsgToOtherReplicasNoRings) prometheus.Unregister(mListenErrors) prometheus.Unregister(mIncomingConnections) prometheus.Unregister(mDials) prometheus.Unregister(mDialErrors) prometheus.Unregister(mOutgoingConnections) prometheus.Unregister(mMsgChanCreations) prometheus.Unregister(mMsgToAddrs) prometheus.Unregister(mMsgToAddrQueues) prometheus.Unregister(mMsgToAddrTimeoutDrops) prometheus.Unregister(mMsgToAddrShutdownDrops) prometheus.Unregister(mMsgReads) prometheus.Unregister(mMsgReadErrors) prometheus.Unregister(mMsgWrites) prometheus.Unregister(mMsgWriteErrors) prometheus.Unregister(mValues) prometheus.Unregister(mValueBytes) prometheus.Unregister(mLookups) prometheus.Unregister(mLookupErrors) prometheus.Unregister(mReads) prometheus.Unregister(mReadErrors) prometheus.Unregister(mWrites) prometheus.Unregister(mWriteErrors) prometheus.Unregister(mWritesOverridden) prometheus.Unregister(mDeletes) prometheus.Unregister(mDeleteErrors) prometheus.Unregister(mDeletesOverridden) prometheus.Unregister(mOutBulkSets) prometheus.Unregister(mOutBulkSetValues) prometheus.Unregister(mOutBulkSetPushes) prometheus.Unregister(mOutBulkSetPushValues) prometheus.Unregister(mInBulkSets) prometheus.Unregister(mInBulkSetDrops) prometheus.Unregister(mInBulkSetInvalids) prometheus.Unregister(mInBulkSetWrites) prometheus.Unregister(mInBulkSetWriteErrors) prometheus.Unregister(mInBulkSetWritesOverridden) prometheus.Unregister(mOutBulkSetAcks) prometheus.Unregister(mInBulkSetAcks) prometheus.Unregister(mInBulkSetAckDrops) prometheus.Unregister(mInBulkSetAckInvalids) prometheus.Unregister(mInBulkSetAckWrites) prometheus.Unregister(mInBulkSetAckWriteErrors) prometheus.Unregister(mInBulkSetAckWritesOverridden) prometheus.Unregister(mOutPullReplications) prometheus.Unregister(mOutPullReplicationSeconds) prometheus.Unregister(mInPullReplications) prometheus.Unregister(mInPullReplicationDrops) prometheus.Unregister(mInPullReplicationInvalids) prometheus.Unregister(mExpiredDeletions) prometheus.Unregister(mCompactions) prometheus.Unregister(mSmallFileCompactions) }(s.msgRing) }
func TestMonitorCollector(t *testing.T) { for _, tt := range []struct { input string regexes []*regexp.Regexp }{ { ` { "health": { "health": { "health_services": [ { "mons": [ { "name": "test-mon01", "kb_total": 412718256, "kb_used": 1812852, "kb_avail": 389917500, "avail_percent": 94, "last_updated": "2015-12-28 15:54:03.763348", "store_stats": { "bytes_total": 1781282079, "bytes_sst": 1, "bytes_log": 609694, "bytes_misc": 1780672385, "last_updated": "0.000000" }, "health": "HEALTH_OK" }, { "name": "test-mon02", "kb_total": 412718256, "kb_used": 1875304, "kb_avail": 389855048, "avail_percent": 94, "last_updated": "2015-12-28 15:53:53.808657", "store_stats": { "bytes_total": 1844348214, "bytes_sst": 2, "bytes_log": 871605, "bytes_misc": 1843476609, "last_updated": "0.000000" }, "health": "HEALTH_OK" }, { "name": "test-mon03", "kb_total": 412718256, "kb_used": 2095356, "kb_avail": 389634996, "avail_percent": 94, "last_updated": "2015-12-28 15:53:06.292749", "store_stats": { "bytes_total": 2069468587, "bytes_sst": 3, "bytes_log": 871605, "bytes_misc": 2068596982, "last_updated": "0.000000" }, "health": "HEALTH_OK" }, { "name": "test-mon04", "kb_total": 412718256, "kb_used": 1726276, "kb_avail": 390004076, "avail_percent": 94, "last_updated": "2015-12-28 15:53:10.770775", "store_stats": { "bytes_total": 1691972147, "bytes_sst": 4, "bytes_log": 871605, "bytes_misc": 1691100542, "last_updated": "0.000000" }, "health": "HEALTH_OK" }, { "name": "test-mon05", "kb_total": 412718256, "kb_used": 1883228, "kb_avail": 389847124, "avail_percent": 94, "last_updated": "2015-12-28 15:53:11.407033", "store_stats": { "bytes_total": 1852485942, "bytes_sst": 5, "bytes_log": 871605, "bytes_misc": 1851614337, "last_updated": "0.000000" }, "health": "HEALTH_OK" } ] } ] }, "timechecks": { "epoch": 70, "round": 3362, "round_status": "finished", "mons": [ { "name": "test-mon01", "skew": 0.000000, "latency": 0.000000, "health": "HEALTH_OK" }, { "name": "test-mon02", "skew": -0.000002, "latency": 0.000815, "health": "HEALTH_OK" }, { "name": "test-mon03", "skew": -0.000002, "latency": 0.000829, "health": "HEALTH_OK" }, { "name": "test-mon04", "skew": -0.000019, "latency": 0.000609, "health": "HEALTH_OK" }, { "name": "test-mon05", "skew": -0.000628, "latency": 0.000659, "health": "HEALTH_OK" } ] }, "summary": [], "overall_status": "HEALTH_OK", "detail": [] }, "fsid": "6C9BF03E-044E-4EEB-9C5F-145A54ECF7DB", "election_epoch": 70, "quorum": [ 0, 1, 2, 3, 4 ], "monmap": { "epoch": 12, "fsid": "6C9BF03E-044E-4EEB-9C5F-145A54ECF7DB", "modified": "2015-11-25 07:58:56.388352", "created": "0.000000", "mons": [ { "rank": 0, "name": "test-mon01", "addr": "10.123.1.25:6789\/0" }, { "rank": 1, "name": "test-mon02", "addr": "10.123.1.26:6789\/0" }, { "rank": 2, "name": "test-mon03", "addr": "10.123.2.25:6789\/0" }, { "rank": 3, "name": "test-mon04", "addr": "10.123.2.26:6789\/0" }, { "rank": 4, "name": "test-mon05", "addr": "10.123.2.27:6789\/0" } ] } } `, []*regexp.Regexp{ regexp.MustCompile(`ceph_monitor_avail_bytes{monitor="test-mon01"} 3.899175e`), regexp.MustCompile(`ceph_monitor_avail_bytes{monitor="test-mon02"} 3.89855048e`), regexp.MustCompile(`ceph_monitor_avail_bytes{monitor="test-mon03"} 3.89634996e`), regexp.MustCompile(`ceph_monitor_avail_bytes{monitor="test-mon04"} 3.90004076e`), regexp.MustCompile(`ceph_monitor_avail_bytes{monitor="test-mon05"} 3.89847124e`), regexp.MustCompile(`ceph_monitor_avail_percent{monitor="test-mon01"} 94`), regexp.MustCompile(`ceph_monitor_avail_percent{monitor="test-mon02"} 94`), regexp.MustCompile(`ceph_monitor_avail_percent{monitor="test-mon03"} 94`), regexp.MustCompile(`ceph_monitor_avail_percent{monitor="test-mon04"} 94`), regexp.MustCompile(`ceph_monitor_avail_percent{monitor="test-mon05"} 94`), regexp.MustCompile(`ceph_monitor_clock_skew_seconds{monitor="test-mon01"} 0`), regexp.MustCompile(`ceph_monitor_clock_skew_seconds{monitor="test-mon02"} -2e-06`), regexp.MustCompile(`ceph_monitor_clock_skew_seconds{monitor="test-mon03"} -2e-06`), regexp.MustCompile(`ceph_monitor_clock_skew_seconds{monitor="test-mon04"} -1.9e-05`), regexp.MustCompile(`ceph_monitor_clock_skew_seconds{monitor="test-mon05"} -0.000628`), regexp.MustCompile(`ceph_monitor_latency_seconds{monitor="test-mon01"} 0`), regexp.MustCompile(`ceph_monitor_latency_seconds{monitor="test-mon02"} 0.000815`), regexp.MustCompile(`ceph_monitor_latency_seconds{monitor="test-mon03"} 0.000829`), regexp.MustCompile(`ceph_monitor_latency_seconds{monitor="test-mon04"} 0.000609`), regexp.MustCompile(`ceph_monitor_latency_seconds{monitor="test-mon05"} 0.000659`), regexp.MustCompile(`ceph_monitor_quorum_count 5`), regexp.MustCompile(`ceph_monitor_store_log_bytes{monitor="test-mon01"} 609694`), regexp.MustCompile(`ceph_monitor_store_log_bytes{monitor="test-mon02"} 871605`), regexp.MustCompile(`ceph_monitor_store_log_bytes{monitor="test-mon03"} 871605`), regexp.MustCompile(`ceph_monitor_store_log_bytes{monitor="test-mon04"} 871605`), regexp.MustCompile(`ceph_monitor_store_log_bytes{monitor="test-mon05"} 871605`), regexp.MustCompile(`ceph_monitor_store_misc_bytes{monitor="test-mon01"} 1.780672385e`), regexp.MustCompile(`ceph_monitor_store_misc_bytes{monitor="test-mon02"} 1.843476609e`), regexp.MustCompile(`ceph_monitor_store_misc_bytes{monitor="test-mon03"} 2.068596982e`), regexp.MustCompile(`ceph_monitor_store_misc_bytes{monitor="test-mon04"} 1.691100542e`), regexp.MustCompile(`ceph_monitor_store_misc_bytes{monitor="test-mon05"} 1.851614337e`), regexp.MustCompile(`ceph_monitor_store_sst_bytes{monitor="test-mon01"} 1`), regexp.MustCompile(`ceph_monitor_store_sst_bytes{monitor="test-mon02"} 2`), regexp.MustCompile(`ceph_monitor_store_sst_bytes{monitor="test-mon03"} 3`), regexp.MustCompile(`ceph_monitor_store_sst_bytes{monitor="test-mon04"} 4`), regexp.MustCompile(`ceph_monitor_store_sst_bytes{monitor="test-mon05"} 5`), regexp.MustCompile(`ceph_monitor_store_capacity_bytes{monitor="test-mon01"} 1.781282079e`), regexp.MustCompile(`ceph_monitor_store_capacity_bytes{monitor="test-mon02"} 1.844348214e`), regexp.MustCompile(`ceph_monitor_store_capacity_bytes{monitor="test-mon03"} 2.069468587e`), regexp.MustCompile(`ceph_monitor_store_capacity_bytes{monitor="test-mon04"} 1.691972147e`), regexp.MustCompile(`ceph_monitor_store_capacity_bytes{monitor="test-mon05"} 1.852485942e`), regexp.MustCompile(`ceph_monitor_capacity_bytes{monitor="test-mon01"} 4.12718256e`), regexp.MustCompile(`ceph_monitor_capacity_bytes{monitor="test-mon02"} 4.12718256e`), regexp.MustCompile(`ceph_monitor_capacity_bytes{monitor="test-mon03"} 4.12718256e`), regexp.MustCompile(`ceph_monitor_capacity_bytes{monitor="test-mon04"} 4.12718256e`), regexp.MustCompile(`ceph_monitor_capacity_bytes{monitor="test-mon05"} 4.12718256e`), regexp.MustCompile(`ceph_monitor_used_bytes{monitor="test-mon01"} 1.812852e`), regexp.MustCompile(`ceph_monitor_used_bytes{monitor="test-mon02"} 1.875304e`), regexp.MustCompile(`ceph_monitor_used_bytes{monitor="test-mon03"} 2.095356e`), regexp.MustCompile(`ceph_monitor_used_bytes{monitor="test-mon04"} 1.726276e`), regexp.MustCompile(`ceph_monitor_used_bytes{monitor="test-mon05"} 1.883228e`), }, }, } { func() { collector := NewMonitorCollector(NewNoopConn(tt.input)) if err := prometheus.Register(collector); err != nil { t.Fatalf("collector failed to register: %s", err) } defer prometheus.Unregister(collector) server := httptest.NewServer(prometheus.Handler()) defer server.Close() resp, err := http.Get(server.URL) if err != nil { t.Fatalf("unexpected failed response from prometheus: %s", err) } defer resp.Body.Close() buf, err := ioutil.ReadAll(resp.Body) if err != nil { t.Fatalf("failed reading server response: %s", err) } for _, re := range tt.regexes { if !re.Match(buf) { t.Errorf("failed matching: %q", re) } } }() } }
// Deregister removes all the metrics in the provided namespace from the // global metrics registry func Deregister(n *Namespace) { prometheus.Unregister(n) }
func ExampleRegister() { // Imagine you have a worker pool and want to count the tasks completed. taskCounter := prometheus.NewCounter(prometheus.CounterOpts{ Subsystem: "worker_pool", Name: "completed_tasks_total", Help: "Total number of tasks completed.", }) // This will register fine. if err := prometheus.Register(taskCounter); err != nil { fmt.Println(err) } else { fmt.Println("taskCounter registered.") } // Don't forget to tell the HTTP server about the Prometheus handler. // (In a real program, you still need to start the HTTP server...) http.Handle("/metrics", prometheus.Handler()) // Now you can start workers and give every one of them a pointer to // taskCounter and let it increment it whenever it completes a task. taskCounter.Inc() // This has to happen somewhere in the worker code. // But wait, you want to see how individual workers perform. So you need // a vector of counters, with one element for each worker. taskCounterVec := prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: "worker_pool", Name: "completed_tasks_total", Help: "Total number of tasks completed.", }, []string{"worker_id"}, ) // Registering will fail because we already have a metric of that name. if err := prometheus.Register(taskCounterVec); err != nil { fmt.Println("taskCounterVec not registered:", err) } else { fmt.Println("taskCounterVec registered.") } // To fix, first unregister the old taskCounter. if prometheus.Unregister(taskCounter) { fmt.Println("taskCounter unregistered.") } // Try registering taskCounterVec again. if err := prometheus.Register(taskCounterVec); err != nil { fmt.Println("taskCounterVec not registered:", err) } else { fmt.Println("taskCounterVec registered.") } // Bummer! Still doesn't work. // Prometheus will not allow you to ever export metrics with // inconsistent help strings or label names. After unregistering, the // unregistered metrics will cease to show up in the /metrics HTTP // response, but the registry still remembers that those metrics had // been exported before. For this example, we will now choose a // different name. (In a real program, you would obviously not export // the obsolete metric in the first place.) taskCounterVec = prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: "worker_pool", Name: "completed_tasks_by_id", Help: "Total number of tasks completed.", }, []string{"worker_id"}, ) if err := prometheus.Register(taskCounterVec); err != nil { fmt.Println("taskCounterVec not registered:", err) } else { fmt.Println("taskCounterVec registered.") } // Finally it worked! // The workers have to tell taskCounterVec their id to increment the // right element in the metric vector. taskCounterVec.WithLabelValues("42").Inc() // Code from worker 42. // Each worker could also keep a reference to their own counter element // around. Pick the counter at initialization time of the worker. myCounter := taskCounterVec.WithLabelValues("42") // From worker 42 initialization code. myCounter.Inc() // Somewhere in the code of that worker. // Note that something like WithLabelValues("42", "spurious arg") would // panic (because you have provided too many label values). If you want // to get an error instead, use GetMetricWithLabelValues(...) instead. notMyCounter, err := taskCounterVec.GetMetricWithLabelValues("42", "spurious arg") if err != nil { fmt.Println("Worker initialization failed:", err) } if notMyCounter == nil { fmt.Println("notMyCounter is nil.") } // A different (and somewhat tricky) approach is to use // ConstLabels. ConstLabels are pairs of label names and label values // that never change. You might ask what those labels are good for (and // rightfully so - if they never change, they could as well be part of // the metric name). There are essentially two use-cases: The first is // if labels are constant throughout the lifetime of a binary execution, // but they vary over time or between different instances of a running // binary. The second is what we have here: Each worker creates and // registers an own Counter instance where the only difference is in the // value of the ConstLabels. Those Counters can all be registered // because the different ConstLabel values guarantee that each worker // will increment a different Counter metric. counterOpts := prometheus.CounterOpts{ Subsystem: "worker_pool", Name: "completed_tasks", Help: "Total number of tasks completed.", ConstLabels: prometheus.Labels{"worker_id": "42"}, } taskCounterForWorker42 := prometheus.NewCounter(counterOpts) if err := prometheus.Register(taskCounterForWorker42); err != nil { fmt.Println("taskCounterVForWorker42 not registered:", err) } else { fmt.Println("taskCounterForWorker42 registered.") } // Obviously, in real code, taskCounterForWorker42 would be a member // variable of a worker struct, and the "42" would be retrieved with a // GetId() method or something. The Counter would be created and // registered in the initialization code of the worker. // For the creation of the next Counter, we can recycle // counterOpts. Just change the ConstLabels. counterOpts.ConstLabels = prometheus.Labels{"worker_id": "2001"} taskCounterForWorker2001 := prometheus.NewCounter(counterOpts) if err := prometheus.Register(taskCounterForWorker2001); err != nil { fmt.Println("taskCounterVForWorker2001 not registered:", err) } else { fmt.Println("taskCounterForWorker2001 registered.") } taskCounterForWorker2001.Inc() taskCounterForWorker42.Inc() taskCounterForWorker2001.Inc() // Yet another approach would be to turn the workers themselves into // Collectors and register them. See the Collector example for details. // Output: // taskCounter registered. // taskCounterVec not registered: a previously registered descriptor with the same fully-qualified name as Desc{fqName: "worker_pool_completed_tasks_total", help: "Total number of tasks completed.", constLabels: {}, variableLabels: [worker_id]} has different label names or a different help string // taskCounter unregistered. // taskCounterVec not registered: a previously registered descriptor with the same fully-qualified name as Desc{fqName: "worker_pool_completed_tasks_total", help: "Total number of tasks completed.", constLabels: {}, variableLabels: [worker_id]} has different label names or a different help string // taskCounterVec registered. // Worker initialization failed: inconsistent label cardinality // notMyCounter is nil. // taskCounterForWorker42 registered. // taskCounterForWorker2001 registered. }
// Periodically queries a Mesos slave and updates statistics of each running task func slavePoller(c *http.Client, conf *Config, frameworkRegistry *frameworkRegistry, slave Slave, erroredSlaves *map[string]struct{}) { var knownTasks map[string]taskMetric var monitoredTasks []MonitoredTask knownTasks = make(map[string]taskMetric) slaveStatsUrl := fmt.Sprintf("http://%s/monitor/statistics.json", slave.address()) constLabels := prometheus.Labels{"slave_pid": slave.Pid} cpusLimitGauge := newGaugeVec( constLabels, "CPU limit of the task.", "cpus_limit", ) cpusSystemTimeCounter := newCounterVec( constLabels, "Absolute CPU sytem time.", "cpus_system_time_seconds", ) cpusUserTimeCounter := newCounterVec( constLabels, "Absolute CPU user time.", "cpus_user_time_seconds", ) memLimitGauge := newGaugeVec( constLabels, "Maximum memory available to the task.", "mem_limit_bytes", ) memRssGauge := newGaugeVec( constLabels, "Current Memory usage.", "mem_rss_bytes", ) t := time.Tick(conf.MesosSlaveQueryInterval) for _ = range t { log.Debugf("Scraping slave '%s'", slave.Pid) availableTasks := make(map[string]struct{}) err := retrieveStats(c, &monitoredTasks, slaveStatsUrl) if err != nil { prometheus.Unregister(cpusLimitGauge) prometheus.Unregister(cpusSystemTimeCounter) prometheus.Unregister(cpusUserTimeCounter) prometheus.Unregister(memLimitGauge) prometheus.Unregister(memRssGauge) log.Errorf("Error retrieving stats from slave '%s' - Stopping goroutine", slave.Pid) (*erroredSlaves)[slave.Pid] = struct{}{} return } for _, item := range monitoredTasks { var frameworkName string var taskName string availableTasks[item.ExecutorId] = struct{}{} cpusLimit := item.Statistics.CpusLimit cpusSystemTime := item.Statistics.CpusSystemTimeSecs cpusUserTime := item.Statistics.CpusUserTimeSecs memLimit := float64(item.Statistics.MemLimitBytes) memRss := float64(item.Statistics.MemRssBytes) metric, ok := knownTasks[item.ExecutorId] if ok { frameworkName = metric.frameworkName taskName = metric.taskName } else { framework, err := frameworkRegistry.Get(item.FrameworkId) if err != nil { log.Debugf("Framework '%s' of task '%s' not registered - not scraping", item.FrameworkId, item.ExecutorId) continue } frameworkName = framework.Name taskName = findTaskName(item.ExecutorId, framework) if taskName == "" { log.Debugf("Could not find name of task of executor '%s' - skipping", item.ExecutorId) continue } log.Debugf("Found new task '%s'", item.ExecutorId) knownTasks[item.ExecutorId] = taskMetric{ frameworkName: frameworkName, taskName: taskName, } } cpusLimitGauge.WithLabelValues(item.ExecutorId, frameworkName, taskName).Set(cpusLimit) cpusSystemTimeCounter.WithLabelValues(item.ExecutorId, frameworkName, taskName).Set(cpusSystemTime) cpusUserTimeCounter.WithLabelValues(item.ExecutorId, frameworkName, taskName).Set(cpusUserTime) memLimitGauge.WithLabelValues(item.ExecutorId, frameworkName, taskName).Set(memLimit) memRssGauge.WithLabelValues(item.ExecutorId, frameworkName, taskName).Set(memRss) } // Remove tasks that have finished since the last check and unregister the metrics associated with the task for executorId, metric := range knownTasks { _, ok := availableTasks[executorId] if ok == false { log.Debugf("Removing finished task '%s'", executorId) cpusLimitGauge.DeleteLabelValues(executorId, metric.frameworkName, metric.taskName) cpusSystemTimeCounter.DeleteLabelValues(executorId, metric.frameworkName, metric.taskName) cpusUserTimeCounter.DeleteLabelValues(executorId, metric.frameworkName, metric.taskName) memLimitGauge.DeleteLabelValues(executorId, metric.frameworkName, metric.taskName) memRssGauge.DeleteLabelValues(executorId, metric.frameworkName, metric.taskName) delete(knownTasks, executorId) } } } }
func TestPoolUsageCollector(t *testing.T) { log.SetOutput(ioutil.Discard) for _, tt := range []struct { input string reMatch, reUnmatch []*regexp.Regexp }{ { input: ` {"pools": [ {"name": "rbd", "id": 11, "stats": {"bytes_used": 20, "objects": 5, "rd": 4, "wr": 6}} ]}`, reMatch: []*regexp.Regexp{ regexp.MustCompile(`pool_used_bytes{pool="rbd"} 20`), regexp.MustCompile(`pool_objects_total{pool="rbd"} 5`), regexp.MustCompile(`pool_read_total{pool="rbd"} 4`), regexp.MustCompile(`pool_write_total{pool="rbd"} 6`), }, reUnmatch: []*regexp.Regexp{}, }, { input: ` {"pools": [ {"name": "rbd", "id": 11, "stats": {"objects": 5, "rd": 4, "wr": 6}} ]}`, reMatch: []*regexp.Regexp{ regexp.MustCompile(`pool_used_bytes{pool="rbd"} 0`), regexp.MustCompile(`pool_objects_total{pool="rbd"} 5`), regexp.MustCompile(`pool_read_total{pool="rbd"} 4`), regexp.MustCompile(`pool_write_total{pool="rbd"} 6`), }, reUnmatch: []*regexp.Regexp{}, }, { input: ` {"pools": [ {"name": "rbd", "id": 11, "stats": {"bytes_used": 20, "rd": 4, "wr": 6}} ]}`, reMatch: []*regexp.Regexp{ regexp.MustCompile(`pool_used_bytes{pool="rbd"} 20`), regexp.MustCompile(`pool_objects_total{pool="rbd"} 0`), regexp.MustCompile(`pool_read_total{pool="rbd"} 4`), regexp.MustCompile(`pool_write_total{pool="rbd"} 6`), }, reUnmatch: []*regexp.Regexp{}, }, { input: ` {"pools": [ {"name": "rbd", "id": 11, "stats": {"bytes_used": 20, "objects": 5, "wr": 6}} ]}`, reMatch: []*regexp.Regexp{ regexp.MustCompile(`pool_used_bytes{pool="rbd"} 20`), regexp.MustCompile(`pool_objects_total{pool="rbd"} 5`), regexp.MustCompile(`pool_read_total{pool="rbd"} 0`), regexp.MustCompile(`pool_write_total{pool="rbd"} 6`), }, reUnmatch: []*regexp.Regexp{}, }, { input: ` {"pools": [ {"name": "rbd", "id": 11, "stats": {"bytes_used": 20, "objects": 5, "rd": 4}} ]}`, reMatch: []*regexp.Regexp{ regexp.MustCompile(`pool_used_bytes{pool="rbd"} 20`), regexp.MustCompile(`pool_objects_total{pool="rbd"} 5`), regexp.MustCompile(`pool_read_total{pool="rbd"} 4`), regexp.MustCompile(`pool_write_total{pool="rbd"} 0`), }, reUnmatch: []*regexp.Regexp{}, }, { input: ` {"pools": [ {{{{"name": "rbd", "id": 11, "stats": {"bytes_used": 20, "objects": 5, "rd": 4, "wr": 6}} ]}`, reMatch: []*regexp.Regexp{}, reUnmatch: []*regexp.Regexp{ regexp.MustCompile(`pool_used_bytes`), regexp.MustCompile(`pool_objects_total`), regexp.MustCompile(`pool_read_total`), regexp.MustCompile(`pool_write_total`), }, }, { input: ` {"pools": [ {"name": "rbd", "id": 11, "stats": {"bytes_used": 20, "objects": 5, "rd": 4, "wr": 6}}, {"name": "rbd-new", "id": 12, "stats": {"bytes_used": 50, "objects": 20, "rd": 10, "wr": 30}} ]}`, reMatch: []*regexp.Regexp{ regexp.MustCompile(`pool_used_bytes{pool="rbd"} 20`), regexp.MustCompile(`pool_objects_total{pool="rbd"} 5`), regexp.MustCompile(`pool_read_total{pool="rbd"} 4`), regexp.MustCompile(`pool_write_total{pool="rbd"} 6`), regexp.MustCompile(`pool_used_bytes{pool="rbd-new"} 50`), regexp.MustCompile(`pool_objects_total{pool="rbd-new"} 20`), regexp.MustCompile(`pool_read_total{pool="rbd-new"} 10`), regexp.MustCompile(`pool_write_total{pool="rbd-new"} 30`), }, reUnmatch: []*regexp.Regexp{}, }, } { func() { collector := NewPoolUsageCollector(NewNoopConn(tt.input)) if err := prometheus.Register(collector); err != nil { t.Fatalf("collector failed to register: %s", err) } defer prometheus.Unregister(collector) server := httptest.NewServer(prometheus.Handler()) defer server.Close() resp, err := http.Get(server.URL) if err != nil { t.Fatalf("unexpected failed response from prometheus: %s", err) } defer resp.Body.Close() buf, err := ioutil.ReadAll(resp.Body) if err != nil { t.Fatalf("failed reading server response: %s", err) } for _, re := range tt.reMatch { if !re.Match(buf) { t.Errorf("failed matching: %q", re) } } for _, re := range tt.reUnmatch { if re.Match(buf) { t.Errorf("should not have matched: %q", re) } } }() } }
func TestClusterHealthCollector(t *testing.T) { for _, tt := range []struct { input string regexes []*regexp.Regexp }{ { ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "5 pgs degraded"}]} }`, []*regexp.Regexp{ regexp.MustCompile(`degraded_pgs 5`), }, }, { ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "6 pgs stuck unclean"}]} }`, []*regexp.Regexp{ regexp.MustCompile(`unclean_pgs 6`), }, }, { ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "7 pgs undersized"}]} }`, []*regexp.Regexp{ regexp.MustCompile(`undersized_pgs 7`), }, }, { ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "8 pgs stale"}]} }`, []*regexp.Regexp{ regexp.MustCompile(`stale_pgs 8`), }, }, { ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "recovery 10/20 objects degraded"}]} }`, []*regexp.Regexp{ regexp.MustCompile(`degraded_objects 10`), }, }, { ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "3/20 in osds are down"}]} }`, []*regexp.Regexp{ regexp.MustCompile(`osds_down 3`), }, }, { ` { "osdmap": { "osdmap": { "num_osds": 1200, "num_up_osds": 1200, "num_in_osds": 1190, "num_remapped_pgs": 10 } }, "health": {"summary": []} }`, []*regexp.Regexp{ regexp.MustCompile(`osds 1200`), regexp.MustCompile(`osds_up 1200`), regexp.MustCompile(`osds_in 1190`), regexp.MustCompile(`pgs_remapped 10`), }, }, } { func() { collector := NewClusterHealthCollector(NewNoopConn(tt.input)) if err := prometheus.Register(collector); err != nil { t.Fatalf("collector failed to register: %s", err) } defer prometheus.Unregister(collector) server := httptest.NewServer(prometheus.Handler()) defer server.Close() resp, err := http.Get(server.URL) if err != nil { t.Fatalf("unexpected failed response from prometheus: %s", err) } defer resp.Body.Close() buf, err := ioutil.ReadAll(resp.Body) if err != nil { t.Fatalf("failed reading server response: %s", err) } for _, re := range tt.regexes { if !re.Match(buf) { t.Errorf("failed matching: %q", re) } } }() } }
func unregisterMetrics() { prometheus.Unregister(timeouts) prometheus.Unregister(errors) prometheus.Unregister(requestSum) prometheus.Unregister(requestSuccess) prometheus.Unregister(requestDuration) prometheus.Unregister(connOpen) prometheus.Unregister(connError) prometheus.Unregister(bytesWritten) prometheus.Unregister(bytesRead) prometheus.Unregister(writeError) prometheus.Unregister(readError) prometheus.Unregister(statusCodes) prometheus.Unregister(errorMessages) }
func TestClusterHealthCollector(t *testing.T) { for _, tt := range []struct { input string regexes []*regexp.Regexp }{ { input: ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "5 pgs degraded"}]} }`, regexes: []*regexp.Regexp{ regexp.MustCompile(`degraded_pgs 5`), }, }, { input: ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "15 pgs stuck degraded"}]} }`, regexes: []*regexp.Regexp{ regexp.MustCompile(`stuck_degraded_pgs 15`), }, }, { input: ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "6 pgs unclean"}]} }`, regexes: []*regexp.Regexp{ regexp.MustCompile(`unclean_pgs 6`), }, }, { input: ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "16 pgs stuck unclean"}]} }`, regexes: []*regexp.Regexp{ regexp.MustCompile(`stuck_unclean_pgs 16`), }, }, { input: ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "7 pgs undersized"}]} }`, regexes: []*regexp.Regexp{ regexp.MustCompile(`undersized_pgs 7`), }, }, { input: ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "17 pgs stuck undersized"}]} }`, regexes: []*regexp.Regexp{ regexp.MustCompile(`stuck_undersized_pgs 17`), }, }, { input: ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "8 pgs stale"}]} }`, regexes: []*regexp.Regexp{ regexp.MustCompile(`stale_pgs 8`), }, }, { input: ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "18 pgs stuck stale"}]} }`, regexes: []*regexp.Regexp{ regexp.MustCompile(`stuck_stale_pgs 18`), }, }, { input: ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "recovery 10/20 objects degraded"}]} }`, regexes: []*regexp.Regexp{ regexp.MustCompile(`degraded_objects 10`), }, }, { input: ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "recovery 20/40 objects misplaced"}]} }`, regexes: []*regexp.Regexp{ regexp.MustCompile(`misplaced_objects 20`), }, }, { input: ` { "osdmap": { "osdmap": { "num_osds": 0, "num_up_osds": 0, "num_in_osds": 0, "num_remapped_pgs": 0 } }, "health": {"summary": [{"severity": "HEALTH_WARN", "summary": "3/20 in osds are down"}]} }`, regexes: []*regexp.Regexp{ regexp.MustCompile(`osds_down 3`), }, }, { input: ` { "osdmap": { "osdmap": { "num_osds": 1200, "num_up_osds": 1200, "num_in_osds": 1190, "num_remapped_pgs": 10 } }, "health": {"summary": []} }`, regexes: []*regexp.Regexp{ regexp.MustCompile(`osds 1200`), regexp.MustCompile(`osds_up 1200`), regexp.MustCompile(`osds_in 1190`), regexp.MustCompile(`pgs_remapped 10`), }, }, { input: ` { "osdmap": { "osdmap": { "num_osds": 1200, "num_up_osds": 1200, "num_in_osds": 1190, "num_remapped_pgs": 10 } }, "health": { "overall_status": "HEALTH_OK" } }`, regexes: []*regexp.Regexp{ regexp.MustCompile(`health_status 0`), }, }, { input: ` { "osdmap": { "osdmap": { "num_osds": 1200, "num_up_osds": 1200, "num_in_osds": 1190, "num_remapped_pgs": 10 } }, "health": { "overall_status": "HEALTH_WARN" } }`, regexes: []*regexp.Regexp{ regexp.MustCompile(`health_status 1`), }, }, { input: ` { "osdmap": { "osdmap": { "num_osds": 1200, "num_up_osds": 1200, "num_in_osds": 1190, "num_remapped_pgs": 10 } }, "health": { "overall_status": "HEALTH_ERR" } }`, regexes: []*regexp.Regexp{ regexp.MustCompile(`health_status 2`), }, }, { input: ` $ sudo ceph -s cluster eff51be8-938a-4afa-b0d1-7a580b4ceb37 health HEALTH_OK monmap e3: 3 mons at {mon01,mon02,mon03} recovery io 5779 MB/s, 4 keys/s, 1522 objects/s client io 4273 kB/s rd, 2740 MB/s wr, 2863 op/s `, regexes: []*regexp.Regexp{ regexp.MustCompile(`recovery_io_bytes 5.779e`), regexp.MustCompile(`recovery_io_keys 4`), regexp.MustCompile(`recovery_io_objects 1522`), regexp.MustCompile(`client_io_ops 2863`), regexp.MustCompile(`client_io_read_bytes 4.273e`), regexp.MustCompile(`client_io_write_bytes 2.74e`), }, }, { input: ` $ sudo ceph -s cluster eff51be8-938a-4afa-b0d1-7a580b4ceb37 health HEALTH_OK monmap e3: 3 mons at {mon01,mon02,mon03} recovery io 5779 MB/s, 4 keys/s, 1522 objects/s client io 2863 op/s rd, 5847 op/s wr `, regexes: []*regexp.Regexp{ regexp.MustCompile(`recovery_io_bytes 5.779e`), regexp.MustCompile(`recovery_io_keys 4`), regexp.MustCompile(`recovery_io_objects 1522`), regexp.MustCompile(`client_io_ops 8710`), regexp.MustCompile(`client_io_read_ops 2863`), regexp.MustCompile(`client_io_write_ops 5847`), }, }, } { func() { collector := NewClusterHealthCollector(NewNoopConn(tt.input)) if err := prometheus.Register(collector); err != nil { t.Fatalf("collector failed to register: %s", err) } defer prometheus.Unregister(collector) server := httptest.NewServer(prometheus.Handler()) defer server.Close() resp, err := http.Get(server.URL) if err != nil { t.Fatalf("unexpected failed response from prometheus: %s", err) } defer resp.Body.Close() buf, err := ioutil.ReadAll(resp.Body) if err != nil { t.Fatalf("failed reading server response: %s", err) } for _, re := range tt.regexes { if !re.Match(buf) { t.Errorf("failed matching: %q", re) } } }() } }
func (s *OortGroupStore) start() { s.gs = nil runtime.GC() log.Println("LocalID appears to be:", s.oort.GetLocalID()) var err error s.msgRing, err = ring.NewTCPMsgRing(&s.TCPMsgRingConfig) if err != nil { panic(err) } s.GroupStoreConfig.MsgRing = s.msgRing s.msgRing.SetRing(s.oort.Ring()) var restartChan chan error s.gs, restartChan = store.NewGroupStore(&s.GroupStoreConfig) // TODO: I'm guessing we'll want to do something more graceful here; but // this will work for now since Systemd (or another service manager) should // restart the service. go func(restartChan chan error) { if err := <-restartChan; err != nil { panic(err) } }(restartChan) if err := s.gs.Startup(context.Background()); err != nil { panic(err) } go func(t *ring.TCPMsgRing) { t.Listen() log.Println("TCPMsgRing Listen() returned, shutdown?") }(s.msgRing) go func(t *ring.TCPMsgRing) { mValues := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "Values", Help: "Current number of values stored.", }) mValueBytes := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "ValueBytes", Help: "Current number of bytes for the values stored.", }) mLookups := prometheus.NewCounter(prometheus.CounterOpts{ Name: "Lookups", Help: "Count of lookup requests executed.", }) mLookupErrors := prometheus.NewCounter(prometheus.CounterOpts{ Name: "LookupErrors", Help: "Count of lookup requests executed resulting in errors.", }) mLookupGroups := prometheus.NewCounter(prometheus.CounterOpts{ Name: "LookupGroups", Help: "Count of lookup-group requests executed.", }) mLookupGroupItems := prometheus.NewCounter(prometheus.CounterOpts{ Name: "LookupGroupItems", Help: "Count of items lookup-group requests have returned.", }) mLookupGroupErrors := prometheus.NewCounter(prometheus.CounterOpts{ Name: "LookupGroupErrors", Help: "Count of errors lookup-group requests have returned.", }) mReads := prometheus.NewCounter(prometheus.CounterOpts{ Name: "Reads", Help: "Count of read requests executed.", }) mReadErrors := prometheus.NewCounter(prometheus.CounterOpts{ Name: "ReadErrors", Help: "Count of read requests executed resulting in errors.", }) mReadGroups := prometheus.NewCounter(prometheus.CounterOpts{ Name: "ReadGroups", Help: "Count of read-group requests executed.", }) mReadGroupItems := prometheus.NewCounter(prometheus.CounterOpts{ Name: "ReadGroupItems", Help: "Count of items read-group requests have returned.", }) mReadGroupErrors := prometheus.NewCounter(prometheus.CounterOpts{ Name: "ReadGroupErrors", Help: "Count of errors read-group requests have returned.", }) mWrites := prometheus.NewCounter(prometheus.CounterOpts{ Name: "Writes", Help: "Count of write requests executed.", }) mWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{ Name: "WriteErrors", Help: "Count of write requests executed resulting in errors.", }) mWritesOverridden := prometheus.NewCounter(prometheus.CounterOpts{ Name: "WritesOverridden", Help: "Count of write requests that were outdated or repeated.", }) mDeletes := prometheus.NewCounter(prometheus.CounterOpts{ Name: "Deletes", Help: "Count of delete requests executed.", }) mDeleteErrors := prometheus.NewCounter(prometheus.CounterOpts{ Name: "DeleteErrors", Help: "Count of delete requests executed resulting in errors.", }) mDeletesOverridden := prometheus.NewCounter(prometheus.CounterOpts{ Name: "DeletesOverridden", Help: "Count of delete requests that were outdated or repeated.", }) mOutBulkSets := prometheus.NewCounter(prometheus.CounterOpts{ Name: "OutBulkSets", Help: "Count of outgoing bulk-set messages in response to incoming pull replication messages.", }) mOutBulkSetValues := prometheus.NewCounter(prometheus.CounterOpts{ Name: "OutBulkSetValues", Help: "Count of values in outgoing bulk-set messages; these bulk-set messages are those in response to incoming pull-replication messages.", }) mOutBulkSetPushes := prometheus.NewCounter(prometheus.CounterOpts{ Name: "OutBulkSetPushes", Help: "Count of outgoing bulk-set messages due to push replication.", }) mOutBulkSetPushValues := prometheus.NewCounter(prometheus.CounterOpts{ Name: "OutBulkSetPushValues", Help: "Count of values in outgoing bulk-set messages; these bulk-set messages are those due to push replication.", }) mInBulkSets := prometheus.NewCounter(prometheus.CounterOpts{ Name: "InBulkSets", Help: "Count of incoming bulk-set messages.", }) mInBulkSetDrops := prometheus.NewCounter(prometheus.CounterOpts{ Name: "InBulkSetDrops", Help: "Count of incoming bulk-set messages dropped due to the local system being overworked at the time.", }) mInBulkSetInvalids := prometheus.NewCounter(prometheus.CounterOpts{ Name: "InBulkSetInvalids", Help: "Count of incoming bulk-set messages that couldn't be parsed.", }) mInBulkSetWrites := prometheus.NewCounter(prometheus.CounterOpts{ Name: "InBulkSetWrites", Help: "Count of writes due to incoming bulk-set messages.", }) mInBulkSetWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{ Name: "InBulkSetWriteErrors", Help: "Count of errors returned from writes due to incoming bulk-set messages.", }) mInBulkSetWritesOverridden := prometheus.NewCounter(prometheus.CounterOpts{ Name: "InBulkSetWritesOverridden", Help: "Count of writes from incoming bulk-set messages that result in no change.", }) mOutBulkSetAcks := prometheus.NewCounter(prometheus.CounterOpts{ Name: "OutBulkSetAcks", Help: "Count of outgoing bulk-set-ack messages.", }) mInBulkSetAcks := prometheus.NewCounter(prometheus.CounterOpts{ Name: "InBulkSetAcks", Help: "Count of incoming bulk-set-ack messages.", }) mInBulkSetAckDrops := prometheus.NewCounter(prometheus.CounterOpts{ Name: "InBulkSetAckDrops", Help: "Count of incoming bulk-set-ack messages dropped due to the local system being overworked at the time.", }) mInBulkSetAckInvalids := prometheus.NewCounter(prometheus.CounterOpts{ Name: "InBulkSetAckInvalids", Help: "Count of incoming bulk-set-ack messages that couldn't be parsed.", }) mInBulkSetAckWrites := prometheus.NewCounter(prometheus.CounterOpts{ Name: "InBulkSetAckWrites", Help: "Count of writes (for local removal) due to incoming bulk-set-ack messages.", }) mInBulkSetAckWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{ Name: "InBulkSetAckWriteErrors", Help: "Count of errors returned from writes due to incoming bulk-set-ack messages.", }) mInBulkSetAckWritesOverridden := prometheus.NewCounter(prometheus.CounterOpts{ Name: "InBulkSetAckWritesOverridden", Help: "Count of writes from incoming bulk-set-ack messages that result in no change.", }) mOutPullReplications := prometheus.NewCounter(prometheus.CounterOpts{ Name: "OutPullReplications", Help: "Count of outgoing pull-replication messages.", }) mOutPullReplicationSeconds := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "OutPullReplicationSeconds", Help: "How long the last out pull replication pass took.", }) mInPullReplications := prometheus.NewCounter(prometheus.CounterOpts{ Name: "InPullReplications", Help: "Count of incoming pull-replication messages.", }) mInPullReplicationDrops := prometheus.NewCounter(prometheus.CounterOpts{ Name: "InPullReplicationDrops", Help: "Count of incoming pull-replication messages droppped due to the local system being overworked at the time.", }) mInPullReplicationInvalids := prometheus.NewCounter(prometheus.CounterOpts{ Name: "InPullReplicationInvalids", Help: "Count of incoming pull-replication messages that couldn't be parsed.", }) mExpiredDeletions := prometheus.NewCounter(prometheus.CounterOpts{ Name: "ExpiredDeletions", Help: "Count of recent deletes that have become old enough to be completely discarded.", }) mCompactions := prometheus.NewCounter(prometheus.CounterOpts{ Name: "Compactions", Help: "Count of disk file sets compacted due to their contents exceeding a staleness threshold. For example, this happens when enough of the values have been overwritten or deleted in more recent operations.", }) mSmallFileCompactions := prometheus.NewCounter(prometheus.CounterOpts{ Name: "SmallFileCompactions", Help: "Count of disk file sets compacted due to the entire file size being too small. For example, this may happen when the store is shutdown and restarted.", }) prometheus.Register(mValues) prometheus.Register(mValueBytes) prometheus.Register(mLookups) prometheus.Register(mLookupErrors) prometheus.Register(mLookupGroups) prometheus.Register(mLookupGroupItems) prometheus.Register(mLookupGroupErrors) prometheus.Register(mReads) prometheus.Register(mReadErrors) prometheus.Register(mReadGroups) prometheus.Register(mReadGroupItems) prometheus.Register(mReadGroupErrors) prometheus.Register(mWrites) prometheus.Register(mWriteErrors) prometheus.Register(mWritesOverridden) prometheus.Register(mDeletes) prometheus.Register(mDeleteErrors) prometheus.Register(mDeletesOverridden) prometheus.Register(mOutBulkSets) prometheus.Register(mOutBulkSetValues) prometheus.Register(mOutBulkSetPushes) prometheus.Register(mOutBulkSetPushValues) prometheus.Register(mInBulkSets) prometheus.Register(mInBulkSetDrops) prometheus.Register(mInBulkSetInvalids) prometheus.Register(mInBulkSetWrites) prometheus.Register(mInBulkSetWriteErrors) prometheus.Register(mInBulkSetWritesOverridden) prometheus.Register(mOutBulkSetAcks) prometheus.Register(mInBulkSetAcks) prometheus.Register(mInBulkSetAckDrops) prometheus.Register(mInBulkSetAckInvalids) prometheus.Register(mInBulkSetAckWrites) prometheus.Register(mInBulkSetAckWriteErrors) prometheus.Register(mInBulkSetAckWritesOverridden) prometheus.Register(mOutPullReplications) prometheus.Register(mOutPullReplicationSeconds) prometheus.Register(mInPullReplications) prometheus.Register(mInPullReplicationDrops) prometheus.Register(mInPullReplicationInvalids) prometheus.Register(mExpiredDeletions) prometheus.Register(mCompactions) prometheus.Register(mSmallFileCompactions) tcpMsgRingStats := t.Stats(false) for !tcpMsgRingStats.Shutdown { time.Sleep(time.Minute) tcpMsgRingStats = t.Stats(false) log.Printf("%v\n", tcpMsgRingStats) stats, err := s.gs.Stats(context.Background(), false) if err != nil { log.Printf("stats error: %s\n", err) } else if s, ok := stats.(*store.GroupStoreStats); ok { mValues.Set(float64(s.Values)) mValueBytes.Set(float64(s.ValueBytes)) mLookups.Add(float64(s.Lookups)) mLookupErrors.Add(float64(s.LookupErrors)) mLookupGroups.Add(float64(s.LookupGroups)) mLookupGroupItems.Add(float64(s.LookupGroupItems)) mLookupGroupErrors.Add(float64(s.LookupGroupErrors)) mReads.Add(float64(s.Reads)) mReadErrors.Add(float64(s.ReadErrors)) mReadGroups.Add(float64(s.ReadGroups)) mReadGroupItems.Add(float64(s.ReadGroupItems)) mReadGroupErrors.Add(float64(s.ReadGroupErrors)) mWrites.Add(float64(s.Writes)) mWriteErrors.Add(float64(s.WriteErrors)) mWritesOverridden.Add(float64(s.WritesOverridden)) mDeletes.Add(float64(s.Deletes)) mDeleteErrors.Add(float64(s.DeleteErrors)) mDeletesOverridden.Add(float64(s.DeletesOverridden)) mOutBulkSets.Add(float64(s.OutBulkSets)) mOutBulkSetValues.Add(float64(s.OutBulkSetValues)) mOutBulkSetPushes.Add(float64(s.OutBulkSetPushes)) mOutBulkSetPushValues.Add(float64(s.OutBulkSetPushValues)) mInBulkSets.Add(float64(s.InBulkSets)) mInBulkSetDrops.Add(float64(s.InBulkSetDrops)) mInBulkSetInvalids.Add(float64(s.InBulkSetInvalids)) mInBulkSetWrites.Add(float64(s.InBulkSetWrites)) mInBulkSetWriteErrors.Add(float64(s.InBulkSetWriteErrors)) mInBulkSetWritesOverridden.Add(float64(s.InBulkSetWritesOverridden)) mOutBulkSetAcks.Add(float64(s.OutBulkSetAcks)) mInBulkSetAcks.Add(float64(s.InBulkSetAcks)) mInBulkSetAckDrops.Add(float64(s.InBulkSetAckDrops)) mInBulkSetAckInvalids.Add(float64(s.InBulkSetAckInvalids)) mInBulkSetAckWrites.Add(float64(s.InBulkSetAckWrites)) mInBulkSetAckWriteErrors.Add(float64(s.InBulkSetAckWriteErrors)) mInBulkSetAckWritesOverridden.Add(float64(s.InBulkSetAckWritesOverridden)) mOutPullReplications.Add(float64(s.OutPullReplications)) mOutPullReplicationSeconds.Set(float64(s.OutPullReplicationNanoseconds) / 1000000000) mInPullReplications.Add(float64(s.InPullReplications)) mInPullReplicationDrops.Add(float64(s.InPullReplicationDrops)) mInPullReplicationInvalids.Add(float64(s.InPullReplicationInvalids)) mExpiredDeletions.Add(float64(s.ExpiredDeletions)) mCompactions.Add(float64(s.Compactions)) mSmallFileCompactions.Add(float64(s.SmallFileCompactions)) } else { log.Printf("%s\n", stats) } } prometheus.Unregister(mValues) prometheus.Unregister(mValueBytes) prometheus.Unregister(mLookups) prometheus.Unregister(mLookupErrors) prometheus.Unregister(mLookupGroups) prometheus.Unregister(mLookupGroupItems) prometheus.Unregister(mLookupGroupErrors) prometheus.Unregister(mReads) prometheus.Unregister(mReadErrors) prometheus.Unregister(mReadGroups) prometheus.Unregister(mReadGroupItems) prometheus.Unregister(mReadGroupErrors) prometheus.Unregister(mWrites) prometheus.Unregister(mWriteErrors) prometheus.Unregister(mWritesOverridden) prometheus.Unregister(mDeletes) prometheus.Unregister(mDeleteErrors) prometheus.Unregister(mDeletesOverridden) prometheus.Unregister(mOutBulkSets) prometheus.Unregister(mOutBulkSetValues) prometheus.Unregister(mOutBulkSetPushes) prometheus.Unregister(mOutBulkSetPushValues) prometheus.Unregister(mInBulkSets) prometheus.Unregister(mInBulkSetDrops) prometheus.Unregister(mInBulkSetInvalids) prometheus.Unregister(mInBulkSetWrites) prometheus.Unregister(mInBulkSetWriteErrors) prometheus.Unregister(mInBulkSetWritesOverridden) prometheus.Unregister(mOutBulkSetAcks) prometheus.Unregister(mInBulkSetAcks) prometheus.Unregister(mInBulkSetAckDrops) prometheus.Unregister(mInBulkSetAckInvalids) prometheus.Unregister(mInBulkSetAckWrites) prometheus.Unregister(mInBulkSetAckWriteErrors) prometheus.Unregister(mInBulkSetAckWritesOverridden) prometheus.Unregister(mOutPullReplications) prometheus.Unregister(mOutPullReplicationSeconds) prometheus.Unregister(mInPullReplications) prometheus.Unregister(mInPullReplicationDrops) prometheus.Unregister(mInPullReplicationInvalids) prometheus.Unregister(mExpiredDeletions) prometheus.Unregister(mCompactions) prometheus.Unregister(mSmallFileCompactions) }(s.msgRing) }
func TestClusterUsage(t *testing.T) { log.SetOutput(ioutil.Discard) for _, tt := range []struct { input string reMatch, reUnmatch []*regexp.Regexp }{ { input: ` { "stats": { "total_bytes": 10, "total_used_bytes": 6, "total_avail_bytes": 4, "total_objects": 1 } }`, reMatch: []*regexp.Regexp{ regexp.MustCompile(`ceph_cluster_capacity_bytes 10`), regexp.MustCompile(`ceph_cluster_used_bytes 6`), regexp.MustCompile(`ceph_cluster_available_bytes 4`), regexp.MustCompile(`ceph_cluster_objects 1`), }, reUnmatch: []*regexp.Regexp{}, }, { input: ` { "stats": { "total_used_bytes": 6, "total_avail_bytes": 4, "total_objects": 1 } }`, reMatch: []*regexp.Regexp{ regexp.MustCompile(`ceph_cluster_capacity_bytes 0`), regexp.MustCompile(`ceph_cluster_used_bytes 6`), regexp.MustCompile(`ceph_cluster_available_bytes 4`), regexp.MustCompile(`ceph_cluster_objects 1`), }, reUnmatch: []*regexp.Regexp{}, }, { input: ` { "stats": { "total_bytes": 10, "total_avail_bytes": 4, "total_objects": 1 } }`, reMatch: []*regexp.Regexp{ regexp.MustCompile(`ceph_cluster_capacity_bytes 10`), regexp.MustCompile(`ceph_cluster_used_bytes 0`), regexp.MustCompile(`ceph_cluster_available_bytes 4`), regexp.MustCompile(`ceph_cluster_objects 1`), }, reUnmatch: []*regexp.Regexp{}, }, { input: ` { "stats": { "total_bytes": 10, "total_used_bytes": 6, "total_objects": 1 } }`, reMatch: []*regexp.Regexp{ regexp.MustCompile(`ceph_cluster_capacity_bytes 10`), regexp.MustCompile(`ceph_cluster_used_bytes 6`), regexp.MustCompile(`ceph_cluster_available_bytes 0`), regexp.MustCompile(`ceph_cluster_objects 1`), }, reUnmatch: []*regexp.Regexp{}, }, { input: ` { "stats": { "total_bytes": 10, "total_used_bytes": 6, "total_avail_bytes": 4 } }`, reMatch: []*regexp.Regexp{ regexp.MustCompile(`ceph_cluster_capacity_bytes 10`), regexp.MustCompile(`ceph_cluster_used_bytes 6`), regexp.MustCompile(`ceph_cluster_available_bytes 4`), regexp.MustCompile(`ceph_cluster_objects 0`), }, reUnmatch: []*regexp.Regexp{}, }, { input: ` { "stats": {{{ "total_bytes": 10, "total_used_bytes": 6, "total_avail_bytes": 4, "total_objects": 1 } }`, reMatch: []*regexp.Regexp{}, reUnmatch: []*regexp.Regexp{ regexp.MustCompile(`ceph_cluster_capacity_bytes`), regexp.MustCompile(`ceph_cluster_used_bytes`), regexp.MustCompile(`ceph_cluster_available_bytes`), regexp.MustCompile(`ceph_cluster_objects`), }, }, } { func() { collector := NewClusterUsageCollector(NewNoopConn(tt.input)) if err := prometheus.Register(collector); err != nil { t.Fatalf("collector failed to register: %s", err) } defer prometheus.Unregister(collector) server := httptest.NewServer(prometheus.Handler()) defer server.Close() resp, err := http.Get(server.URL) if err != nil { t.Fatalf("unexpected failed response from prometheus: %s", err) } defer resp.Body.Close() buf, err := ioutil.ReadAll(resp.Body) if err != nil { t.Fatalf("failed reading server response: %s", err) } for _, re := range tt.reMatch { if !re.Match(buf) { t.Errorf("failed matching: %q", re) } } for _, re := range tt.reUnmatch { if re.Match(buf) { t.Errorf("should not have matched: %q", re) } } }() } }