func TestPrometheusCollector(t *testing.T) {
	c := NewPrometheusCollector(testSubcontainersInfoProvider{}, func(name string) map[string]string {
		return map[string]string{
			"zone.name": "hello",
		}
	})
	prometheus.MustRegister(c)
	defer prometheus.Unregister(c)

	rw := httptest.NewRecorder()
	prometheus.Handler().ServeHTTP(rw, &http.Request{})

	metricsFile := "testdata/prometheus_metrics"
	wantMetrics, err := ioutil.ReadFile(metricsFile)
	if err != nil {
		t.Fatalf("unable to read input test file %s", metricsFile)
	}

	wantLines := strings.Split(string(wantMetrics), "\n")
	gotLines := strings.Split(string(rw.Body.String()), "\n")

	// Until the Prometheus Go client library offers better testability
	// (https://github.com/prometheus/client_golang/issues/58), we simply compare
	// verbatim text-format metrics outputs, but ignore certain metric lines
	// whose value depends on the current time or local circumstances.
	for i, want := range wantLines {
		if !includeRe.MatchString(want) || ignoreRe.MatchString(want) {
			continue
		}
		if want != gotLines[i] {
			t.Fatalf("want %s, got %s", want, gotLines[i])
		}
	}
}
func TestServer(t *testing.T) {
	ts := int(time.Now().Unix())
	storageToTest := []string{
		fmt.Sprintf("local:./test-%d.db", ts),
		fmt.Sprintf("rethinkdb:localhost:28015/annotst%d", ts),
	}

	for _, storage := range storageToTest {
		log.Printf("testing storage: %s", storage)

		s := NewSetup(t, storage)

		s.testAddAndQuery()
		s.testDefaultValues()
		s.testTagStats()
		s.testBrokenJSON()
		s.testMetrics()
		s.testAllTags()
		s.testAll()

		s.Server.Close()
		s.Ctx.storage.Cleanup()
		prometheus.Unregister(s.Ctx)
	}
}
func (reg *MetricRegistry) Unregister(name string) {
	if metric := reg.metrics[name]; metric != nil {
		log.Infof("metric unregistered;name:<%s>", name)
		prometheus.Unregister(metric)
		delete(reg.metrics, name)
	}
}
Example #4
0
// NewSensors creates new sensors from a raw config
func NewSensors(raw []interface{}) ([]*Sensor, error) {
	var sensors []*Sensor
	if err := utils.DecodeRaw(raw, &sensors); err != nil {
		return nil, fmt.Errorf("Sensor configuration error: %v", err)
	}
	for _, s := range sensors {
		check, err := commands.NewCommand(s.CheckExec, s.Timeout)
		if err != nil {
			return nil, fmt.Errorf("could not parse check in sensor %s: %s", s.Name, err)
		}
		check.Name = fmt.Sprintf("%s.sensor", s.Name)
		s.checkCmd = check

		// the prometheus client lib's API here is baffling... they don't expose
		// an interface or embed their Opts type in each of the Opts "subtypes",
		// so we can't share the initialization.
		switch {
		case s.Type == "counter":
			s.collector = prometheus.NewCounter(prometheus.CounterOpts{
				Namespace: s.Namespace,
				Subsystem: s.Subsystem,
				Name:      s.Name,
				Help:      s.Help,
			})
		case s.Type == "gauge":
			s.collector = prometheus.NewGauge(prometheus.GaugeOpts{
				Namespace: s.Namespace,
				Subsystem: s.Subsystem,
				Name:      s.Name,
				Help:      s.Help,
			})
		case s.Type == "histogram":
			s.collector = prometheus.NewHistogram(prometheus.HistogramOpts{
				Namespace: s.Namespace,
				Subsystem: s.Subsystem,
				Name:      s.Name,
				Help:      s.Help,
			})
		case s.Type == "summary":
			s.collector = prometheus.NewSummary(prometheus.SummaryOpts{
				Namespace: s.Namespace,
				Subsystem: s.Subsystem,
				Name:      s.Name,
				Help:      s.Help,
			})
		default:
			return nil, fmt.Errorf("invalid sensor type: %s", s.Type)
		}
		// we're going to unregister before every attempt to register
		// so that we can reload config
		prometheus.Unregister(s.collector)
		if err := prometheus.Register(s.collector); err != nil {
			return nil, err
		}
	}
	return sensors, nil
}
Example #5
0
func TestPrometheusCollector(t *testing.T) {
	c := NewPrometheusCollector(testSubcontainersInfoProvider{}, func(container *info.ContainerInfo) map[string]string {
		s := DefaultContainerLabels(container)
		s["zone.name"] = "hello"
		return s
	})
	prometheus.MustRegister(c)
	defer prometheus.Unregister(c)

	testPrometheusCollector(t, c, "testdata/prometheus_metrics")
}
Example #6
0
func TestPrometheusCollector(t *testing.T) {
	c := NewPrometheusCollector(testSubcontainersInfoProvider{}, func(name string) map[string]string {
		return map[string]string{
			"zone.name": "hello",
		}
	})
	prometheus.MustRegister(c)
	defer prometheus.Unregister(c)

	testPrometheusCollector(t, c, "testdata/prometheus_metrics")
}
Example #7
0
func (h *handler) Stop() {
	if h.advertiser != nil {
		h.advertiser.stop()
	}

	if listener := h.listener; listener != nil {
		h.listener = nil
		if err := listener.Close(); err != nil {
			h.errorSink.Post(err)
		}
	}

	for _, c := range h.collectors() {
		prom.Unregister(c)
	}
}
Example #8
0
func TestPrometheusCollector_scrapeFailure(t *testing.T) {
	provider := &erroringSubcontainersInfoProvider{
		successfulProvider: testSubcontainersInfoProvider{},
		shouldFail:         true,
	}

	c := NewPrometheusCollector(provider, func(name string) map[string]string {
		return map[string]string{
			"zone.name": "hello",
		}
	})
	prometheus.MustRegister(c)
	defer prometheus.Unregister(c)

	testPrometheusCollector(t, c, "testdata/prometheus_metrics_failure")

	provider.shouldFail = false

	testPrometheusCollector(t, c, "testdata/prometheus_metrics")
}
Example #9
0
func TestPrometheusCollector_scrapeFailure(t *testing.T) {
	provider := &erroringSubcontainersInfoProvider{
		successfulProvider: testSubcontainersInfoProvider{},
		shouldFail:         true,
	}

	c := NewPrometheusCollector(provider, func(container *info.ContainerInfo) map[string]string {
		s := DefaultContainerLabels(container)
		s["zone.name"] = "hello"
		return s
	})
	prometheus.MustRegister(c)
	defer prometheus.Unregister(c)

	testPrometheusCollector(t, c, "testdata/prometheus_metrics_failure")

	provider.shouldFail = false

	testPrometheusCollector(t, c, "testdata/prometheus_metrics")
}
func export(json string) ([]byte, error) {
	exporter := NewExporter(&testScraper{json})
	prometheus.MustRegister(exporter)
	defer prometheus.Unregister(exporter)

	server := httptest.NewServer(prometheus.UninstrumentedHandler())
	defer server.Close()

	response, err := http.Get(server.URL)
	if err != nil {
		return nil, err
	}

	defer response.Body.Close()
	body, err := ioutil.ReadAll(response.Body)
	if err != nil {
		return nil, err
	}

	return body, nil
}
Example #11
0
func (s *OortValueStore) start() {
	s.vs = nil
	runtime.GC()
	log.Println("LocalID appears to be:", s.oort.GetLocalID())
	var err error
	s.msgRing, err = ring.NewTCPMsgRing(&s.TCPMsgRingConfig)
	if err != nil {
		panic(err)
	}
	s.ValueStoreConfig.MsgRing = s.msgRing
	s.msgRing.SetRing(s.oort.Ring())
	var restartChan chan error
	s.vs, restartChan = store.NewValueStore(&s.ValueStoreConfig)
	// TODO: I'm guessing we'll want to do something more graceful here; but
	// this will work for now since Systemd (or another service manager) should
	// restart the service.
	go func(restartChan chan error) {
		if err := <-restartChan; err != nil {
			panic(err)
		}
	}(restartChan)
	if err := s.vs.Startup(context.Background()); err != nil {
		panic(err)
	}
	go func(t *ring.TCPMsgRing) {
		t.Listen()
		log.Println("TCPMsgRing Listen() returned, shutdown?")
	}(s.msgRing)
	go func(t *ring.TCPMsgRing) {
		mRingChanges := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "RingChanges",
			Help:      "Number of received ring changes.",
		})
		mRingChangeCloses := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "RingChangeCloses",
			Help:      "Number of connections closed due to ring changes.",
		})
		mMsgToNodes := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "MsgToNodes",
			Help:      "Number of times MsgToNode function has been called; single message to single node.",
		})
		mMsgToNodeNoRings := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "MsgToNodeNoRings",
			Help:      "Number of times MsgToNode function has been called with no ring yet available.",
		})
		mMsgToNodeNoNodes := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "MsgToNodeNoNodes",
			Help:      "Number of times MsgToNode function has been called with no matching node.",
		})
		mMsgToOtherReplicas := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "MsgToOtherReplicas",
			Help:      "Number of times MsgToOtherReplicas function has been called; single message to all replicas, excluding the local replica if responsible.",
		})
		mMsgToOtherReplicasNoRings := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "MsgToOtherReplicasNoRings",
			Help:      "Number of times MsgToOtherReplicas function has been called with no ring yet available.",
		})
		mListenErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "ListenErrors",
			Help:      "Number of errors trying to establish a TCP listener.",
		})
		mIncomingConnections := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "IncomingConnections",
			Help:      "Number of incoming TCP connections made.",
		})
		mDials := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "Dials",
			Help:      "Number of attempts to establish outgoing TCP connections.",
		})
		mDialErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "DialErrors",
			Help:      "Number of errors trying to establish outgoing TCP connections.",
		})
		mOutgoingConnections := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "OutgoingConnections",
			Help:      "Number of outgoing TCP connections established.",
		})
		mMsgChanCreations := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "MsgChanCreations",
			Help:      "Number of internal message channels created.",
		})
		mMsgToAddrs := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "MsgToAddrs",
			Help:      "Number times internal function msgToAddr has been called.",
		})
		mMsgToAddrQueues := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "MsgToAddrQueues",
			Help:      "Number of messages msgToAddr successfully queued.",
		})
		mMsgToAddrTimeoutDrops := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "MsgToAddrTimeoutDrops",
			Help:      "Number of messages msgToAddr dropped after timeout.",
		})
		mMsgToAddrShutdownDrops := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "MsgToAddrShutdownDrops",
			Help:      "Number of messages msgToAddr dropped due to a shutdown.",
		})
		mMsgReads := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "MsgReads",
			Help:      "Number of incoming messages read.",
		})
		mMsgReadErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "MsgReadErrors",
			Help:      "Number of errors reading incoming messages.",
		})
		mMsgWrites := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "MsgWrites",
			Help:      "Number of outgoing messages written.",
		})
		mMsgWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "TCPMsgRing",
			Name:      "MsgWriteErrors",
			Help:      "Number of errors writing outgoing messages.",
		})
		mValues := prometheus.NewGauge(prometheus.GaugeOpts{
			Namespace: "Store",
			Name:      "Values",
			Help:      "Current number of values stored.",
		})
		mValueBytes := prometheus.NewGauge(prometheus.GaugeOpts{
			Namespace: "Store",
			Name:      "ValueBytes",
			Help:      "Current number of bytes for the values stored.",
		})
		mLookups := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "Lookups",
			Help:      "Count of lookup requests executed.",
		})
		mLookupErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "LookupErrors",
			Help:      "Count of lookup requests executed resulting in errors.",
		})
		mReads := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "Reads",
			Help:      "Count of read requests executed.",
		})
		mReadErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "ReadErrors",
			Help:      "Count of read requests executed resulting in errors.",
		})
		mWrites := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "Writes",
			Help:      "Count of write requests executed.",
		})
		mWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "WriteErrors",
			Help:      "Count of write requests executed resulting in errors.",
		})
		mWritesOverridden := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "WritesOverridden",
			Help:      "Count of write requests that were outdated or repeated.",
		})
		mDeletes := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "Deletes",
			Help:      "Count of delete requests executed.",
		})
		mDeleteErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "DeleteErrors",
			Help:      "Count of delete requests executed resulting in errors.",
		})
		mDeletesOverridden := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "DeletesOverridden",
			Help:      "Count of delete requests that were outdated or repeated.",
		})
		mOutBulkSets := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "OutBulkSets",
			Help:      "Count of outgoing bulk-set messages in response to incoming pull replication messages.",
		})
		mOutBulkSetValues := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "OutBulkSetValues",
			Help:      "Count of values in outgoing bulk-set messages; these bulk-set messages are those in response to incoming pull-replication messages.",
		})
		mOutBulkSetPushes := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "OutBulkSetPushes",
			Help:      "Count of outgoing bulk-set messages due to push replication.",
		})
		mOutBulkSetPushValues := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "OutBulkSetPushValues",
			Help:      "Count of values in outgoing bulk-set messages; these bulk-set messages are those due to push replication.",
		})
		mInBulkSets := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "InBulkSets",
			Help:      "Count of incoming bulk-set messages.",
		})
		mInBulkSetDrops := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "InBulkSetDrops",
			Help:      "Count of incoming bulk-set messages dropped due to the local system being overworked at the time.",
		})
		mInBulkSetInvalids := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "InBulkSetInvalids",
			Help:      "Count of incoming bulk-set messages that couldn't be parsed.",
		})
		mInBulkSetWrites := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "InBulkSetWrites",
			Help:      "Count of writes due to incoming bulk-set messages.",
		})
		mInBulkSetWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "InBulkSetWriteErrors",
			Help:      "Count of errors returned from writes due to incoming bulk-set messages.",
		})
		mInBulkSetWritesOverridden := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "InBulkSetWritesOverridden",
			Help:      "Count of writes from incoming bulk-set messages that result in no change.",
		})
		mOutBulkSetAcks := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "OutBulkSetAcks",
			Help:      "Count of outgoing bulk-set-ack messages.",
		})
		mInBulkSetAcks := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "InBulkSetAcks",
			Help:      "Count of incoming bulk-set-ack messages.",
		})
		mInBulkSetAckDrops := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "InBulkSetAckDrops",
			Help:      "Count of incoming bulk-set-ack messages dropped due to the local system being overworked at the time.",
		})
		mInBulkSetAckInvalids := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "InBulkSetAckInvalids",
			Help:      "Count of incoming bulk-set-ack messages that couldn't be parsed.",
		})
		mInBulkSetAckWrites := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "InBulkSetAckWrites",
			Help:      "Count of writes (for local removal) due to incoming bulk-set-ack messages.",
		})
		mInBulkSetAckWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "InBulkSetAckWriteErrors",
			Help:      "Count of errors returned from writes due to incoming bulk-set-ack messages.",
		})
		mInBulkSetAckWritesOverridden := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "InBulkSetAckWritesOverridden",
			Help:      "Count of writes from incoming bulk-set-ack messages that result in no change.",
		})
		mOutPullReplications := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "OutPullReplications",
			Help:      "Count of outgoing pull-replication messages.",
		})
		mOutPullReplicationSeconds := prometheus.NewGauge(prometheus.GaugeOpts{
			Namespace: "Store",
			Name:      "OutPullReplicationSeconds",
			Help:      "How long the last out pull replication pass took.",
		})
		mInPullReplications := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "InPullReplications",
			Help:      "Count of incoming pull-replication messages.",
		})
		mInPullReplicationDrops := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "InPullReplicationDrops",
			Help:      "Count of incoming pull-replication messages droppped due to the local system being overworked at the time.",
		})
		mInPullReplicationInvalids := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "InPullReplicationInvalids",
			Help:      "Count of incoming pull-replication messages that couldn't be parsed.",
		})
		mExpiredDeletions := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "ExpiredDeletions",
			Help:      "Count of recent deletes that have become old enough to be completely discarded.",
		})
		mCompactions := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "Compactions",
			Help:      "Count of disk file sets compacted due to their contents exceeding a staleness threshold. For example, this happens when enough of the values have been overwritten or deleted in more recent operations.",
		})
		mSmallFileCompactions := prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "Store",
			Name:      "SmallFileCompactions",
			Help:      "Count of disk file sets compacted due to the entire file size being too small. For example, this may happen when the store is shutdown and restarted.",
		})
		prometheus.Register(mRingChanges)
		prometheus.Register(mRingChangeCloses)
		prometheus.Register(mMsgToNodes)
		prometheus.Register(mMsgToNodeNoRings)
		prometheus.Register(mMsgToNodeNoNodes)
		prometheus.Register(mMsgToOtherReplicas)
		prometheus.Register(mMsgToOtherReplicasNoRings)
		prometheus.Register(mListenErrors)
		prometheus.Register(mIncomingConnections)
		prometheus.Register(mDials)
		prometheus.Register(mDialErrors)
		prometheus.Register(mOutgoingConnections)
		prometheus.Register(mMsgChanCreations)
		prometheus.Register(mMsgToAddrs)
		prometheus.Register(mMsgToAddrQueues)
		prometheus.Register(mMsgToAddrTimeoutDrops)
		prometheus.Register(mMsgToAddrShutdownDrops)
		prometheus.Register(mMsgReads)
		prometheus.Register(mMsgReadErrors)
		prometheus.Register(mMsgWrites)
		prometheus.Register(mMsgWriteErrors)
		prometheus.Register(mValues)
		prometheus.Register(mValueBytes)
		prometheus.Register(mLookups)
		prometheus.Register(mLookupErrors)
		prometheus.Register(mReads)
		prometheus.Register(mReadErrors)
		prometheus.Register(mWrites)
		prometheus.Register(mWriteErrors)
		prometheus.Register(mWritesOverridden)
		prometheus.Register(mDeletes)
		prometheus.Register(mDeleteErrors)
		prometheus.Register(mDeletesOverridden)
		prometheus.Register(mOutBulkSets)
		prometheus.Register(mOutBulkSetValues)
		prometheus.Register(mOutBulkSetPushes)
		prometheus.Register(mOutBulkSetPushValues)
		prometheus.Register(mInBulkSets)
		prometheus.Register(mInBulkSetDrops)
		prometheus.Register(mInBulkSetInvalids)
		prometheus.Register(mInBulkSetWrites)
		prometheus.Register(mInBulkSetWriteErrors)
		prometheus.Register(mInBulkSetWritesOverridden)
		prometheus.Register(mOutBulkSetAcks)
		prometheus.Register(mInBulkSetAcks)
		prometheus.Register(mInBulkSetAckDrops)
		prometheus.Register(mInBulkSetAckInvalids)
		prometheus.Register(mInBulkSetAckWrites)
		prometheus.Register(mInBulkSetAckWriteErrors)
		prometheus.Register(mInBulkSetAckWritesOverridden)
		prometheus.Register(mOutPullReplications)
		prometheus.Register(mOutPullReplicationSeconds)
		prometheus.Register(mInPullReplications)
		prometheus.Register(mInPullReplicationDrops)
		prometheus.Register(mInPullReplicationInvalids)
		prometheus.Register(mExpiredDeletions)
		prometheus.Register(mCompactions)
		prometheus.Register(mSmallFileCompactions)
		tcpMsgRingStats := t.Stats(false)
		for !tcpMsgRingStats.Shutdown {
			time.Sleep(time.Minute)
			tcpMsgRingStats = t.Stats(false)
			mRingChanges.Add(float64(tcpMsgRingStats.RingChanges))
			mRingChangeCloses.Add(float64(tcpMsgRingStats.RingChangeCloses))
			mMsgToNodes.Add(float64(tcpMsgRingStats.MsgToNodes))
			mMsgToNodeNoRings.Add(float64(tcpMsgRingStats.MsgToNodeNoRings))
			mMsgToNodeNoNodes.Add(float64(tcpMsgRingStats.MsgToNodeNoNodes))
			mMsgToOtherReplicas.Add(float64(tcpMsgRingStats.MsgToOtherReplicas))
			mMsgToOtherReplicasNoRings.Add(float64(tcpMsgRingStats.MsgToOtherReplicasNoRings))
			mListenErrors.Add(float64(tcpMsgRingStats.ListenErrors))
			mIncomingConnections.Add(float64(tcpMsgRingStats.IncomingConnections))
			mDials.Add(float64(tcpMsgRingStats.Dials))
			mDialErrors.Add(float64(tcpMsgRingStats.DialErrors))
			mOutgoingConnections.Add(float64(tcpMsgRingStats.OutgoingConnections))
			mMsgChanCreations.Add(float64(tcpMsgRingStats.MsgChanCreations))
			mMsgToAddrs.Add(float64(tcpMsgRingStats.MsgToAddrs))
			mMsgToAddrQueues.Add(float64(tcpMsgRingStats.MsgToAddrQueues))
			mMsgToAddrTimeoutDrops.Add(float64(tcpMsgRingStats.MsgToAddrTimeoutDrops))
			mMsgToAddrShutdownDrops.Add(float64(tcpMsgRingStats.MsgToAddrShutdownDrops))
			mMsgReads.Add(float64(tcpMsgRingStats.MsgReads))
			mMsgReadErrors.Add(float64(tcpMsgRingStats.MsgReadErrors))
			mMsgWrites.Add(float64(tcpMsgRingStats.MsgWrites))
			mMsgWriteErrors.Add(float64(tcpMsgRingStats.MsgWriteErrors))
			stats, err := s.vs.Stats(context.Background(), false)
			if err != nil {
				log.Printf("stats error: %s\n", err)
			} else if s, ok := stats.(*store.ValueStoreStats); ok {
				mValues.Set(float64(s.Values))
				mValueBytes.Set(float64(s.ValueBytes))
				mLookups.Add(float64(s.Lookups))
				mLookupErrors.Add(float64(s.LookupErrors))
				mReads.Add(float64(s.Reads))
				mReadErrors.Add(float64(s.ReadErrors))
				mWrites.Add(float64(s.Writes))
				mWriteErrors.Add(float64(s.WriteErrors))
				mWritesOverridden.Add(float64(s.WritesOverridden))
				mDeletes.Add(float64(s.Deletes))
				mDeleteErrors.Add(float64(s.DeleteErrors))
				mDeletesOverridden.Add(float64(s.DeletesOverridden))
				mOutBulkSets.Add(float64(s.OutBulkSets))
				mOutBulkSetValues.Add(float64(s.OutBulkSetValues))
				mOutBulkSetPushes.Add(float64(s.OutBulkSetPushes))
				mOutBulkSetPushValues.Add(float64(s.OutBulkSetPushValues))
				mInBulkSets.Add(float64(s.InBulkSets))
				mInBulkSetDrops.Add(float64(s.InBulkSetDrops))
				mInBulkSetInvalids.Add(float64(s.InBulkSetInvalids))
				mInBulkSetWrites.Add(float64(s.InBulkSetWrites))
				mInBulkSetWriteErrors.Add(float64(s.InBulkSetWriteErrors))
				mInBulkSetWritesOverridden.Add(float64(s.InBulkSetWritesOverridden))
				mOutBulkSetAcks.Add(float64(s.OutBulkSetAcks))
				mInBulkSetAcks.Add(float64(s.InBulkSetAcks))
				mInBulkSetAckDrops.Add(float64(s.InBulkSetAckDrops))
				mInBulkSetAckInvalids.Add(float64(s.InBulkSetAckInvalids))
				mInBulkSetAckWrites.Add(float64(s.InBulkSetAckWrites))
				mInBulkSetAckWriteErrors.Add(float64(s.InBulkSetAckWriteErrors))
				mInBulkSetAckWritesOverridden.Add(float64(s.InBulkSetAckWritesOverridden))
				mOutPullReplications.Add(float64(s.OutPullReplications))
				mOutPullReplicationSeconds.Set(float64(s.OutPullReplicationNanoseconds) / 1000000000)
				mInPullReplications.Add(float64(s.InPullReplications))
				mInPullReplicationDrops.Add(float64(s.InPullReplicationDrops))
				mInPullReplicationInvalids.Add(float64(s.InPullReplicationInvalids))
				mExpiredDeletions.Add(float64(s.ExpiredDeletions))
				mCompactions.Add(float64(s.Compactions))
				mSmallFileCompactions.Add(float64(s.SmallFileCompactions))
			} else {
				log.Printf("%s\n", stats)
			}
		}
		prometheus.Unregister(mRingChanges)
		prometheus.Unregister(mRingChangeCloses)
		prometheus.Unregister(mMsgToNodes)
		prometheus.Unregister(mMsgToNodeNoRings)
		prometheus.Unregister(mMsgToNodeNoNodes)
		prometheus.Unregister(mMsgToOtherReplicas)
		prometheus.Unregister(mMsgToOtherReplicasNoRings)
		prometheus.Unregister(mListenErrors)
		prometheus.Unregister(mIncomingConnections)
		prometheus.Unregister(mDials)
		prometheus.Unregister(mDialErrors)
		prometheus.Unregister(mOutgoingConnections)
		prometheus.Unregister(mMsgChanCreations)
		prometheus.Unregister(mMsgToAddrs)
		prometheus.Unregister(mMsgToAddrQueues)
		prometheus.Unregister(mMsgToAddrTimeoutDrops)
		prometheus.Unregister(mMsgToAddrShutdownDrops)
		prometheus.Unregister(mMsgReads)
		prometheus.Unregister(mMsgReadErrors)
		prometheus.Unregister(mMsgWrites)
		prometheus.Unregister(mMsgWriteErrors)
		prometheus.Unregister(mValues)
		prometheus.Unregister(mValueBytes)
		prometheus.Unregister(mLookups)
		prometheus.Unregister(mLookupErrors)
		prometheus.Unregister(mReads)
		prometheus.Unregister(mReadErrors)
		prometheus.Unregister(mWrites)
		prometheus.Unregister(mWriteErrors)
		prometheus.Unregister(mWritesOverridden)
		prometheus.Unregister(mDeletes)
		prometheus.Unregister(mDeleteErrors)
		prometheus.Unregister(mDeletesOverridden)
		prometheus.Unregister(mOutBulkSets)
		prometheus.Unregister(mOutBulkSetValues)
		prometheus.Unregister(mOutBulkSetPushes)
		prometheus.Unregister(mOutBulkSetPushValues)
		prometheus.Unregister(mInBulkSets)
		prometheus.Unregister(mInBulkSetDrops)
		prometheus.Unregister(mInBulkSetInvalids)
		prometheus.Unregister(mInBulkSetWrites)
		prometheus.Unregister(mInBulkSetWriteErrors)
		prometheus.Unregister(mInBulkSetWritesOverridden)
		prometheus.Unregister(mOutBulkSetAcks)
		prometheus.Unregister(mInBulkSetAcks)
		prometheus.Unregister(mInBulkSetAckDrops)
		prometheus.Unregister(mInBulkSetAckInvalids)
		prometheus.Unregister(mInBulkSetAckWrites)
		prometheus.Unregister(mInBulkSetAckWriteErrors)
		prometheus.Unregister(mInBulkSetAckWritesOverridden)
		prometheus.Unregister(mOutPullReplications)
		prometheus.Unregister(mOutPullReplicationSeconds)
		prometheus.Unregister(mInPullReplications)
		prometheus.Unregister(mInPullReplicationDrops)
		prometheus.Unregister(mInPullReplicationInvalids)
		prometheus.Unregister(mExpiredDeletions)
		prometheus.Unregister(mCompactions)
		prometheus.Unregister(mSmallFileCompactions)
	}(s.msgRing)
}
Example #12
0
func TestMonitorCollector(t *testing.T) {
	for _, tt := range []struct {
		input   string
		regexes []*regexp.Regexp
	}{
		{
			`
{
    "health": {
        "health": {
            "health_services": [
                {
                    "mons": [
                        {
                            "name": "test-mon01",
                            "kb_total": 412718256,
                            "kb_used": 1812852,
                            "kb_avail": 389917500,
                            "avail_percent": 94,
                            "last_updated": "2015-12-28 15:54:03.763348",
                            "store_stats": {
                                "bytes_total": 1781282079,
                                "bytes_sst": 1,
                                "bytes_log": 609694,
                                "bytes_misc": 1780672385,
                                "last_updated": "0.000000"
                            },
                            "health": "HEALTH_OK"
                        },
                        {
                            "name": "test-mon02",
                            "kb_total": 412718256,
                            "kb_used": 1875304,
                            "kb_avail": 389855048,
                            "avail_percent": 94,
                            "last_updated": "2015-12-28 15:53:53.808657",
                            "store_stats": {
                                "bytes_total": 1844348214,
                                "bytes_sst": 2,
                                "bytes_log": 871605,
                                "bytes_misc": 1843476609,
                                "last_updated": "0.000000"
                            },
                            "health": "HEALTH_OK"
                        },
                        {
                            "name": "test-mon03",
                            "kb_total": 412718256,
                            "kb_used": 2095356,
                            "kb_avail": 389634996,
                            "avail_percent": 94,
                            "last_updated": "2015-12-28 15:53:06.292749",
                            "store_stats": {
                                "bytes_total": 2069468587,
                                "bytes_sst": 3,
                                "bytes_log": 871605,
                                "bytes_misc": 2068596982,
                                "last_updated": "0.000000"
                            },
                            "health": "HEALTH_OK"
                        },
                        {
                            "name": "test-mon04",
                            "kb_total": 412718256,
                            "kb_used": 1726276,
                            "kb_avail": 390004076,
                            "avail_percent": 94,
                            "last_updated": "2015-12-28 15:53:10.770775",
                            "store_stats": {
                                "bytes_total": 1691972147,
                                "bytes_sst": 4,
                                "bytes_log": 871605,
                                "bytes_misc": 1691100542,
                                "last_updated": "0.000000"
                            },
                            "health": "HEALTH_OK"
                        },
                        {
                            "name": "test-mon05",
                            "kb_total": 412718256,
                            "kb_used": 1883228,
                            "kb_avail": 389847124,
                            "avail_percent": 94,
                            "last_updated": "2015-12-28 15:53:11.407033",
                            "store_stats": {
                                "bytes_total": 1852485942,
                                "bytes_sst": 5,
                                "bytes_log": 871605,
                                "bytes_misc": 1851614337,
                                "last_updated": "0.000000"
                            },
                            "health": "HEALTH_OK"
                        }
                    ]
                }
            ]
        },
        "timechecks": {
            "epoch": 70,
            "round": 3362,
            "round_status": "finished",
            "mons": [
                {
                    "name": "test-mon01",
                    "skew": 0.000000,
                    "latency": 0.000000,
                    "health": "HEALTH_OK"
                },
                {
                    "name": "test-mon02",
                    "skew": -0.000002,
                    "latency": 0.000815,
                    "health": "HEALTH_OK"
                },
                {
                    "name": "test-mon03",
                    "skew": -0.000002,
                    "latency": 0.000829,
                    "health": "HEALTH_OK"
                },
                {
                    "name": "test-mon04",
                    "skew": -0.000019,
                    "latency": 0.000609,
                    "health": "HEALTH_OK"
                },
                {
                    "name": "test-mon05",
                    "skew": -0.000628,
                    "latency": 0.000659,
                    "health": "HEALTH_OK"
                }
            ]
        },
        "summary": [],
        "overall_status": "HEALTH_OK",
        "detail": []
    },
    "fsid": "6C9BF03E-044E-4EEB-9C5F-145A54ECF7DB",
    "election_epoch": 70,
    "quorum": [
        0,
        1,
        2,
        3,
        4
    ],
    "monmap": {
        "epoch": 12,
        "fsid": "6C9BF03E-044E-4EEB-9C5F-145A54ECF7DB",
        "modified": "2015-11-25 07:58:56.388352",
        "created": "0.000000",
        "mons": [
            {
                "rank": 0,
                "name": "test-mon01",
                "addr": "10.123.1.25:6789\/0"
            },
            {
                "rank": 1,
                "name": "test-mon02",
                "addr": "10.123.1.26:6789\/0"
            },
            {
                "rank": 2,
                "name": "test-mon03",
                "addr": "10.123.2.25:6789\/0"
            },
            {
                "rank": 3,
                "name": "test-mon04",
                "addr": "10.123.2.26:6789\/0"
            },
            {
                "rank": 4,
                "name": "test-mon05",
                "addr": "10.123.2.27:6789\/0"
            }
        ]
    }
}
`,
			[]*regexp.Regexp{
				regexp.MustCompile(`ceph_monitor_avail_bytes{monitor="test-mon01"} 3.899175e`),
				regexp.MustCompile(`ceph_monitor_avail_bytes{monitor="test-mon02"} 3.89855048e`),
				regexp.MustCompile(`ceph_monitor_avail_bytes{monitor="test-mon03"} 3.89634996e`),
				regexp.MustCompile(`ceph_monitor_avail_bytes{monitor="test-mon04"} 3.90004076e`),
				regexp.MustCompile(`ceph_monitor_avail_bytes{monitor="test-mon05"} 3.89847124e`),
				regexp.MustCompile(`ceph_monitor_avail_percent{monitor="test-mon01"} 94`),
				regexp.MustCompile(`ceph_monitor_avail_percent{monitor="test-mon02"} 94`),
				regexp.MustCompile(`ceph_monitor_avail_percent{monitor="test-mon03"} 94`),
				regexp.MustCompile(`ceph_monitor_avail_percent{monitor="test-mon04"} 94`),
				regexp.MustCompile(`ceph_monitor_avail_percent{monitor="test-mon05"} 94`),
				regexp.MustCompile(`ceph_monitor_clock_skew_seconds{monitor="test-mon01"} 0`),
				regexp.MustCompile(`ceph_monitor_clock_skew_seconds{monitor="test-mon02"} -2e-06`),
				regexp.MustCompile(`ceph_monitor_clock_skew_seconds{monitor="test-mon03"} -2e-06`),
				regexp.MustCompile(`ceph_monitor_clock_skew_seconds{monitor="test-mon04"} -1.9e-05`),
				regexp.MustCompile(`ceph_monitor_clock_skew_seconds{monitor="test-mon05"} -0.000628`),
				regexp.MustCompile(`ceph_monitor_latency_seconds{monitor="test-mon01"} 0`),
				regexp.MustCompile(`ceph_monitor_latency_seconds{monitor="test-mon02"} 0.000815`),
				regexp.MustCompile(`ceph_monitor_latency_seconds{monitor="test-mon03"} 0.000829`),
				regexp.MustCompile(`ceph_monitor_latency_seconds{monitor="test-mon04"} 0.000609`),
				regexp.MustCompile(`ceph_monitor_latency_seconds{monitor="test-mon05"} 0.000659`),
				regexp.MustCompile(`ceph_monitor_quorum_count 5`),
				regexp.MustCompile(`ceph_monitor_store_log_bytes{monitor="test-mon01"} 609694`),
				regexp.MustCompile(`ceph_monitor_store_log_bytes{monitor="test-mon02"} 871605`),
				regexp.MustCompile(`ceph_monitor_store_log_bytes{monitor="test-mon03"} 871605`),
				regexp.MustCompile(`ceph_monitor_store_log_bytes{monitor="test-mon04"} 871605`),
				regexp.MustCompile(`ceph_monitor_store_log_bytes{monitor="test-mon05"} 871605`),
				regexp.MustCompile(`ceph_monitor_store_misc_bytes{monitor="test-mon01"} 1.780672385e`),
				regexp.MustCompile(`ceph_monitor_store_misc_bytes{monitor="test-mon02"} 1.843476609e`),
				regexp.MustCompile(`ceph_monitor_store_misc_bytes{monitor="test-mon03"} 2.068596982e`),
				regexp.MustCompile(`ceph_monitor_store_misc_bytes{monitor="test-mon04"} 1.691100542e`),
				regexp.MustCompile(`ceph_monitor_store_misc_bytes{monitor="test-mon05"} 1.851614337e`),
				regexp.MustCompile(`ceph_monitor_store_sst_bytes{monitor="test-mon01"} 1`),
				regexp.MustCompile(`ceph_monitor_store_sst_bytes{monitor="test-mon02"} 2`),
				regexp.MustCompile(`ceph_monitor_store_sst_bytes{monitor="test-mon03"} 3`),
				regexp.MustCompile(`ceph_monitor_store_sst_bytes{monitor="test-mon04"} 4`),
				regexp.MustCompile(`ceph_monitor_store_sst_bytes{monitor="test-mon05"} 5`),
				regexp.MustCompile(`ceph_monitor_store_capacity_bytes{monitor="test-mon01"} 1.781282079e`),
				regexp.MustCompile(`ceph_monitor_store_capacity_bytes{monitor="test-mon02"} 1.844348214e`),
				regexp.MustCompile(`ceph_monitor_store_capacity_bytes{monitor="test-mon03"} 2.069468587e`),
				regexp.MustCompile(`ceph_monitor_store_capacity_bytes{monitor="test-mon04"} 1.691972147e`),
				regexp.MustCompile(`ceph_monitor_store_capacity_bytes{monitor="test-mon05"} 1.852485942e`),
				regexp.MustCompile(`ceph_monitor_capacity_bytes{monitor="test-mon01"} 4.12718256e`),
				regexp.MustCompile(`ceph_monitor_capacity_bytes{monitor="test-mon02"} 4.12718256e`),
				regexp.MustCompile(`ceph_monitor_capacity_bytes{monitor="test-mon03"} 4.12718256e`),
				regexp.MustCompile(`ceph_monitor_capacity_bytes{monitor="test-mon04"} 4.12718256e`),
				regexp.MustCompile(`ceph_monitor_capacity_bytes{monitor="test-mon05"} 4.12718256e`),
				regexp.MustCompile(`ceph_monitor_used_bytes{monitor="test-mon01"} 1.812852e`),
				regexp.MustCompile(`ceph_monitor_used_bytes{monitor="test-mon02"} 1.875304e`),
				regexp.MustCompile(`ceph_monitor_used_bytes{monitor="test-mon03"} 2.095356e`),
				regexp.MustCompile(`ceph_monitor_used_bytes{monitor="test-mon04"} 1.726276e`),
				regexp.MustCompile(`ceph_monitor_used_bytes{monitor="test-mon05"} 1.883228e`),
			},
		},
	} {
		func() {
			collector := NewMonitorCollector(NewNoopConn(tt.input))
			if err := prometheus.Register(collector); err != nil {
				t.Fatalf("collector failed to register: %s", err)
			}
			defer prometheus.Unregister(collector)

			server := httptest.NewServer(prometheus.Handler())
			defer server.Close()

			resp, err := http.Get(server.URL)
			if err != nil {
				t.Fatalf("unexpected failed response from prometheus: %s", err)
			}
			defer resp.Body.Close()

			buf, err := ioutil.ReadAll(resp.Body)
			if err != nil {
				t.Fatalf("failed reading server response: %s", err)
			}

			for _, re := range tt.regexes {
				if !re.Match(buf) {
					t.Errorf("failed matching: %q", re)
				}
			}
		}()
	}
}
Example #13
0
// Deregister removes all the metrics in the provided namespace from the
// global metrics registry
func Deregister(n *Namespace) {
	prometheus.Unregister(n)
}
Example #14
0
func ExampleRegister() {
	// Imagine you have a worker pool and want to count the tasks completed.
	taskCounter := prometheus.NewCounter(prometheus.CounterOpts{
		Subsystem: "worker_pool",
		Name:      "completed_tasks_total",
		Help:      "Total number of tasks completed.",
	})
	// This will register fine.
	if err := prometheus.Register(taskCounter); err != nil {
		fmt.Println(err)
	} else {
		fmt.Println("taskCounter registered.")
	}
	// Don't forget to tell the HTTP server about the Prometheus handler.
	// (In a real program, you still need to start the HTTP server...)
	http.Handle("/metrics", prometheus.Handler())

	// Now you can start workers and give every one of them a pointer to
	// taskCounter and let it increment it whenever it completes a task.
	taskCounter.Inc() // This has to happen somewhere in the worker code.

	// But wait, you want to see how individual workers perform. So you need
	// a vector of counters, with one element for each worker.
	taskCounterVec := prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Subsystem: "worker_pool",
			Name:      "completed_tasks_total",
			Help:      "Total number of tasks completed.",
		},
		[]string{"worker_id"},
	)

	// Registering will fail because we already have a metric of that name.
	if err := prometheus.Register(taskCounterVec); err != nil {
		fmt.Println("taskCounterVec not registered:", err)
	} else {
		fmt.Println("taskCounterVec registered.")
	}

	// To fix, first unregister the old taskCounter.
	if prometheus.Unregister(taskCounter) {
		fmt.Println("taskCounter unregistered.")
	}

	// Try registering taskCounterVec again.
	if err := prometheus.Register(taskCounterVec); err != nil {
		fmt.Println("taskCounterVec not registered:", err)
	} else {
		fmt.Println("taskCounterVec registered.")
	}
	// Bummer! Still doesn't work.

	// Prometheus will not allow you to ever export metrics with
	// inconsistent help strings or label names. After unregistering, the
	// unregistered metrics will cease to show up in the /metrics HTTP
	// response, but the registry still remembers that those metrics had
	// been exported before. For this example, we will now choose a
	// different name. (In a real program, you would obviously not export
	// the obsolete metric in the first place.)
	taskCounterVec = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Subsystem: "worker_pool",
			Name:      "completed_tasks_by_id",
			Help:      "Total number of tasks completed.",
		},
		[]string{"worker_id"},
	)
	if err := prometheus.Register(taskCounterVec); err != nil {
		fmt.Println("taskCounterVec not registered:", err)
	} else {
		fmt.Println("taskCounterVec registered.")
	}
	// Finally it worked!

	// The workers have to tell taskCounterVec their id to increment the
	// right element in the metric vector.
	taskCounterVec.WithLabelValues("42").Inc() // Code from worker 42.

	// Each worker could also keep a reference to their own counter element
	// around. Pick the counter at initialization time of the worker.
	myCounter := taskCounterVec.WithLabelValues("42") // From worker 42 initialization code.
	myCounter.Inc()                                   // Somewhere in the code of that worker.

	// Note that something like WithLabelValues("42", "spurious arg") would
	// panic (because you have provided too many label values). If you want
	// to get an error instead, use GetMetricWithLabelValues(...) instead.
	notMyCounter, err := taskCounterVec.GetMetricWithLabelValues("42", "spurious arg")
	if err != nil {
		fmt.Println("Worker initialization failed:", err)
	}
	if notMyCounter == nil {
		fmt.Println("notMyCounter is nil.")
	}

	// A different (and somewhat tricky) approach is to use
	// ConstLabels. ConstLabels are pairs of label names and label values
	// that never change. You might ask what those labels are good for (and
	// rightfully so - if they never change, they could as well be part of
	// the metric name). There are essentially two use-cases: The first is
	// if labels are constant throughout the lifetime of a binary execution,
	// but they vary over time or between different instances of a running
	// binary. The second is what we have here: Each worker creates and
	// registers an own Counter instance where the only difference is in the
	// value of the ConstLabels. Those Counters can all be registered
	// because the different ConstLabel values guarantee that each worker
	// will increment a different Counter metric.
	counterOpts := prometheus.CounterOpts{
		Subsystem:   "worker_pool",
		Name:        "completed_tasks",
		Help:        "Total number of tasks completed.",
		ConstLabels: prometheus.Labels{"worker_id": "42"},
	}
	taskCounterForWorker42 := prometheus.NewCounter(counterOpts)
	if err := prometheus.Register(taskCounterForWorker42); err != nil {
		fmt.Println("taskCounterVForWorker42 not registered:", err)
	} else {
		fmt.Println("taskCounterForWorker42 registered.")
	}
	// Obviously, in real code, taskCounterForWorker42 would be a member
	// variable of a worker struct, and the "42" would be retrieved with a
	// GetId() method or something. The Counter would be created and
	// registered in the initialization code of the worker.

	// For the creation of the next Counter, we can recycle
	// counterOpts. Just change the ConstLabels.
	counterOpts.ConstLabels = prometheus.Labels{"worker_id": "2001"}
	taskCounterForWorker2001 := prometheus.NewCounter(counterOpts)
	if err := prometheus.Register(taskCounterForWorker2001); err != nil {
		fmt.Println("taskCounterVForWorker2001 not registered:", err)
	} else {
		fmt.Println("taskCounterForWorker2001 registered.")
	}

	taskCounterForWorker2001.Inc()
	taskCounterForWorker42.Inc()
	taskCounterForWorker2001.Inc()

	// Yet another approach would be to turn the workers themselves into
	// Collectors and register them. See the Collector example for details.

	// Output:
	// taskCounter registered.
	// taskCounterVec not registered: a previously registered descriptor with the same fully-qualified name as Desc{fqName: "worker_pool_completed_tasks_total", help: "Total number of tasks completed.", constLabels: {}, variableLabels: [worker_id]} has different label names or a different help string
	// taskCounter unregistered.
	// taskCounterVec not registered: a previously registered descriptor with the same fully-qualified name as Desc{fqName: "worker_pool_completed_tasks_total", help: "Total number of tasks completed.", constLabels: {}, variableLabels: [worker_id]} has different label names or a different help string
	// taskCounterVec registered.
	// Worker initialization failed: inconsistent label cardinality
	// notMyCounter is nil.
	// taskCounterForWorker42 registered.
	// taskCounterForWorker2001 registered.
}
// Periodically queries a Mesos slave and updates statistics of each running task
func slavePoller(c *http.Client, conf *Config, frameworkRegistry *frameworkRegistry, slave Slave, erroredSlaves *map[string]struct{}) {
	var knownTasks map[string]taskMetric
	var monitoredTasks []MonitoredTask

	knownTasks = make(map[string]taskMetric)

	slaveStatsUrl := fmt.Sprintf("http://%s/monitor/statistics.json", slave.address())

	constLabels := prometheus.Labels{"slave_pid": slave.Pid}

	cpusLimitGauge := newGaugeVec(
		constLabels,
		"CPU limit of the task.",
		"cpus_limit",
	)

	cpusSystemTimeCounter := newCounterVec(
		constLabels,
		"Absolute CPU sytem time.",
		"cpus_system_time_seconds",
	)

	cpusUserTimeCounter := newCounterVec(
		constLabels,
		"Absolute CPU user time.",
		"cpus_user_time_seconds",
	)

	memLimitGauge := newGaugeVec(
		constLabels,
		"Maximum memory available to the task.",
		"mem_limit_bytes",
	)

	memRssGauge := newGaugeVec(
		constLabels,
		"Current Memory usage.",
		"mem_rss_bytes",
	)

	t := time.Tick(conf.MesosSlaveQueryInterval)

	for _ = range t {
		log.Debugf("Scraping slave '%s'", slave.Pid)

		availableTasks := make(map[string]struct{})

		err := retrieveStats(c, &monitoredTasks, slaveStatsUrl)
		if err != nil {
			prometheus.Unregister(cpusLimitGauge)
			prometheus.Unregister(cpusSystemTimeCounter)
			prometheus.Unregister(cpusUserTimeCounter)
			prometheus.Unregister(memLimitGauge)
			prometheus.Unregister(memRssGauge)

			log.Errorf("Error retrieving stats from slave '%s' - Stopping goroutine", slave.Pid)

			(*erroredSlaves)[slave.Pid] = struct{}{}
			return
		}

		for _, item := range monitoredTasks {
			var frameworkName string
			var taskName string

			availableTasks[item.ExecutorId] = struct{}{}

			cpusLimit := item.Statistics.CpusLimit
			cpusSystemTime := item.Statistics.CpusSystemTimeSecs
			cpusUserTime := item.Statistics.CpusUserTimeSecs
			memLimit := float64(item.Statistics.MemLimitBytes)
			memRss := float64(item.Statistics.MemRssBytes)

			metric, ok := knownTasks[item.ExecutorId]
			if ok {
				frameworkName = metric.frameworkName
				taskName = metric.taskName
			} else {
				framework, err := frameworkRegistry.Get(item.FrameworkId)
				if err != nil {
					log.Debugf("Framework '%s' of task '%s' not registered - not scraping", item.FrameworkId, item.ExecutorId)
					continue
				}

				frameworkName = framework.Name
				taskName = findTaskName(item.ExecutorId, framework)

				if taskName == "" {
					log.Debugf("Could not find name of task of executor '%s' - skipping", item.ExecutorId)
					continue
				}

				log.Debugf("Found new task '%s'", item.ExecutorId)

				knownTasks[item.ExecutorId] = taskMetric{
					frameworkName: frameworkName,
					taskName:      taskName,
				}
			}

			cpusLimitGauge.WithLabelValues(item.ExecutorId, frameworkName, taskName).Set(cpusLimit)

			cpusSystemTimeCounter.WithLabelValues(item.ExecutorId, frameworkName, taskName).Set(cpusSystemTime)

			cpusUserTimeCounter.WithLabelValues(item.ExecutorId, frameworkName, taskName).Set(cpusUserTime)

			memLimitGauge.WithLabelValues(item.ExecutorId, frameworkName, taskName).Set(memLimit)

			memRssGauge.WithLabelValues(item.ExecutorId, frameworkName, taskName).Set(memRss)
		}

		// Remove tasks that have finished since the last check and unregister the metrics associated with the task
		for executorId, metric := range knownTasks {
			_, ok := availableTasks[executorId]
			if ok == false {
				log.Debugf("Removing finished task '%s'", executorId)

				cpusLimitGauge.DeleteLabelValues(executorId, metric.frameworkName, metric.taskName)
				cpusSystemTimeCounter.DeleteLabelValues(executorId, metric.frameworkName, metric.taskName)
				cpusUserTimeCounter.DeleteLabelValues(executorId, metric.frameworkName, metric.taskName)
				memLimitGauge.DeleteLabelValues(executorId, metric.frameworkName, metric.taskName)
				memRssGauge.DeleteLabelValues(executorId, metric.frameworkName, metric.taskName)

				delete(knownTasks, executorId)
			}
		}
	}
}
Example #16
0
func TestPoolUsageCollector(t *testing.T) {
	log.SetOutput(ioutil.Discard)

	for _, tt := range []struct {
		input              string
		reMatch, reUnmatch []*regexp.Regexp
	}{
		{
			input: `
{"pools": [
	{"name": "rbd", "id": 11, "stats": {"bytes_used": 20, "objects": 5, "rd": 4, "wr": 6}}
]}`,
			reMatch: []*regexp.Regexp{
				regexp.MustCompile(`pool_used_bytes{pool="rbd"} 20`),
				regexp.MustCompile(`pool_objects_total{pool="rbd"} 5`),
				regexp.MustCompile(`pool_read_total{pool="rbd"} 4`),
				regexp.MustCompile(`pool_write_total{pool="rbd"} 6`),
			},
			reUnmatch: []*regexp.Regexp{},
		},
		{
			input: `
{"pools": [
	{"name": "rbd", "id": 11, "stats": {"objects": 5, "rd": 4, "wr": 6}}
]}`,
			reMatch: []*regexp.Regexp{
				regexp.MustCompile(`pool_used_bytes{pool="rbd"} 0`),
				regexp.MustCompile(`pool_objects_total{pool="rbd"} 5`),
				regexp.MustCompile(`pool_read_total{pool="rbd"} 4`),
				regexp.MustCompile(`pool_write_total{pool="rbd"} 6`),
			},
			reUnmatch: []*regexp.Regexp{},
		},
		{
			input: `
{"pools": [
	{"name": "rbd", "id": 11, "stats": {"bytes_used": 20, "rd": 4, "wr": 6}}
]}`,
			reMatch: []*regexp.Regexp{
				regexp.MustCompile(`pool_used_bytes{pool="rbd"} 20`),
				regexp.MustCompile(`pool_objects_total{pool="rbd"} 0`),
				regexp.MustCompile(`pool_read_total{pool="rbd"} 4`),
				regexp.MustCompile(`pool_write_total{pool="rbd"} 6`),
			},
			reUnmatch: []*regexp.Regexp{},
		},
		{
			input: `
{"pools": [
	{"name": "rbd", "id": 11, "stats": {"bytes_used": 20, "objects": 5, "wr": 6}}
]}`,
			reMatch: []*regexp.Regexp{
				regexp.MustCompile(`pool_used_bytes{pool="rbd"} 20`),
				regexp.MustCompile(`pool_objects_total{pool="rbd"} 5`),
				regexp.MustCompile(`pool_read_total{pool="rbd"} 0`),
				regexp.MustCompile(`pool_write_total{pool="rbd"} 6`),
			},
			reUnmatch: []*regexp.Regexp{},
		},
		{
			input: `
{"pools": [
	{"name": "rbd", "id": 11, "stats": {"bytes_used": 20, "objects": 5, "rd": 4}}
]}`,
			reMatch: []*regexp.Regexp{
				regexp.MustCompile(`pool_used_bytes{pool="rbd"} 20`),
				regexp.MustCompile(`pool_objects_total{pool="rbd"} 5`),
				regexp.MustCompile(`pool_read_total{pool="rbd"} 4`),
				regexp.MustCompile(`pool_write_total{pool="rbd"} 0`),
			},
			reUnmatch: []*regexp.Regexp{},
		},
		{
			input: `
{"pools": [
    {{{{"name": "rbd", "id": 11, "stats": {"bytes_used": 20, "objects": 5, "rd": 4, "wr": 6}}
]}`,
			reMatch: []*regexp.Regexp{},
			reUnmatch: []*regexp.Regexp{
				regexp.MustCompile(`pool_used_bytes`),
				regexp.MustCompile(`pool_objects_total`),
				regexp.MustCompile(`pool_read_total`),
				regexp.MustCompile(`pool_write_total`),
			},
		},
		{
			input: `
{"pools": [
	{"name": "rbd", "id": 11, "stats": {"bytes_used": 20, "objects": 5, "rd": 4, "wr": 6}},
	{"name": "rbd-new", "id": 12, "stats": {"bytes_used": 50, "objects": 20, "rd": 10, "wr": 30}}
]}`,
			reMatch: []*regexp.Regexp{
				regexp.MustCompile(`pool_used_bytes{pool="rbd"} 20`),
				regexp.MustCompile(`pool_objects_total{pool="rbd"} 5`),
				regexp.MustCompile(`pool_read_total{pool="rbd"} 4`),
				regexp.MustCompile(`pool_write_total{pool="rbd"} 6`),
				regexp.MustCompile(`pool_used_bytes{pool="rbd-new"} 50`),
				regexp.MustCompile(`pool_objects_total{pool="rbd-new"} 20`),
				regexp.MustCompile(`pool_read_total{pool="rbd-new"} 10`),
				regexp.MustCompile(`pool_write_total{pool="rbd-new"} 30`),
			},
			reUnmatch: []*regexp.Regexp{},
		},
	} {
		func() {
			collector := NewPoolUsageCollector(NewNoopConn(tt.input))
			if err := prometheus.Register(collector); err != nil {
				t.Fatalf("collector failed to register: %s", err)
			}
			defer prometheus.Unregister(collector)

			server := httptest.NewServer(prometheus.Handler())
			defer server.Close()

			resp, err := http.Get(server.URL)
			if err != nil {
				t.Fatalf("unexpected failed response from prometheus: %s", err)
			}
			defer resp.Body.Close()

			buf, err := ioutil.ReadAll(resp.Body)
			if err != nil {
				t.Fatalf("failed reading server response: %s", err)
			}

			for _, re := range tt.reMatch {
				if !re.Match(buf) {
					t.Errorf("failed matching: %q", re)
				}
			}

			for _, re := range tt.reUnmatch {
				if re.Match(buf) {
					t.Errorf("should not have matched: %q", re)
				}
			}
		}()
	}
}
Example #17
0
func TestClusterHealthCollector(t *testing.T) {
	for _, tt := range []struct {
		input   string
		regexes []*regexp.Regexp
	}{
		{
			`
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "5 pgs degraded"}]}
}`,
			[]*regexp.Regexp{
				regexp.MustCompile(`degraded_pgs 5`),
			},
		},
		{
			`
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "6 pgs stuck unclean"}]}
}`,
			[]*regexp.Regexp{
				regexp.MustCompile(`unclean_pgs 6`),
			},
		},
		{
			`
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "7 pgs undersized"}]}
}`,
			[]*regexp.Regexp{
				regexp.MustCompile(`undersized_pgs 7`),
			},
		},
		{
			`
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "8 pgs stale"}]}
}`,
			[]*regexp.Regexp{
				regexp.MustCompile(`stale_pgs 8`),
			},
		},
		{
			`
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "recovery 10/20 objects degraded"}]}
}`,
			[]*regexp.Regexp{
				regexp.MustCompile(`degraded_objects 10`),
			},
		},
		{
			`
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "3/20 in osds are down"}]}
}`,
			[]*regexp.Regexp{
				regexp.MustCompile(`osds_down 3`),
			},
		},
		{
			`
{
	"osdmap": {
		"osdmap": {
			"num_osds": 1200,
			"num_up_osds": 1200,
			"num_in_osds": 1190,
			"num_remapped_pgs": 10
		}
	},
	"health": {"summary": []}
}`,
			[]*regexp.Regexp{
				regexp.MustCompile(`osds 1200`),
				regexp.MustCompile(`osds_up 1200`),
				regexp.MustCompile(`osds_in 1190`),
				regexp.MustCompile(`pgs_remapped 10`),
			},
		},
	} {
		func() {
			collector := NewClusterHealthCollector(NewNoopConn(tt.input))
			if err := prometheus.Register(collector); err != nil {
				t.Fatalf("collector failed to register: %s", err)
			}
			defer prometheus.Unregister(collector)

			server := httptest.NewServer(prometheus.Handler())
			defer server.Close()

			resp, err := http.Get(server.URL)
			if err != nil {
				t.Fatalf("unexpected failed response from prometheus: %s", err)
			}
			defer resp.Body.Close()

			buf, err := ioutil.ReadAll(resp.Body)
			if err != nil {
				t.Fatalf("failed reading server response: %s", err)
			}

			for _, re := range tt.regexes {
				if !re.Match(buf) {
					t.Errorf("failed matching: %q", re)
				}
			}
		}()
	}
}
Example #18
0
func unregisterMetrics() {
	prometheus.Unregister(timeouts)
	prometheus.Unregister(errors)
	prometheus.Unregister(requestSum)
	prometheus.Unregister(requestSuccess)
	prometheus.Unregister(requestDuration)
	prometheus.Unregister(connOpen)
	prometheus.Unregister(connError)
	prometheus.Unregister(bytesWritten)
	prometheus.Unregister(bytesRead)
	prometheus.Unregister(writeError)
	prometheus.Unregister(readError)
	prometheus.Unregister(statusCodes)
	prometheus.Unregister(errorMessages)
}
Example #19
0
func TestClusterHealthCollector(t *testing.T) {
	for _, tt := range []struct {
		input   string
		regexes []*regexp.Regexp
	}{
		{
			input: `
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "5 pgs degraded"}]}
}`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`degraded_pgs 5`),
			},
		},
		{
			input: `
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "15 pgs stuck degraded"}]}
}`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`stuck_degraded_pgs 15`),
			},
		},
		{
			input: `
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "6 pgs unclean"}]}
}`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`unclean_pgs 6`),
			},
		},
		{
			input: `
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "16 pgs stuck unclean"}]}
}`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`stuck_unclean_pgs 16`),
			},
		},
		{
			input: `
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "7 pgs undersized"}]}
}`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`undersized_pgs 7`),
			},
		},
		{
			input: `
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "17 pgs stuck undersized"}]}
}`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`stuck_undersized_pgs 17`),
			},
		},
		{
			input: `
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "8 pgs stale"}]}
}`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`stale_pgs 8`),
			},
		},
		{
			input: `
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "18 pgs stuck stale"}]}
}`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`stuck_stale_pgs 18`),
			},
		},
		{
			input: `
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "recovery 10/20 objects degraded"}]}
}`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`degraded_objects 10`),
			},
		},
		{
			input: `
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "recovery 20/40 objects misplaced"}]}
}`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`misplaced_objects 20`),
			},
		},
		{
			input: `
{
	"osdmap": {
		"osdmap": {
			"num_osds": 0,
			"num_up_osds": 0,
			"num_in_osds": 0,
			"num_remapped_pgs": 0
		}
	},
	"health": {"summary": [{"severity": "HEALTH_WARN", "summary": "3/20 in osds are down"}]}
}`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`osds_down 3`),
			},
		},
		{
			input: `
{
	"osdmap": {
		"osdmap": {
			"num_osds": 1200,
			"num_up_osds": 1200,
			"num_in_osds": 1190,
			"num_remapped_pgs": 10
		}
	},
	"health": {"summary": []}
}`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`osds 1200`),
				regexp.MustCompile(`osds_up 1200`),
				regexp.MustCompile(`osds_in 1190`),
				regexp.MustCompile(`pgs_remapped 10`),
			},
		},
		{
			input: `
{
	"osdmap": {
		"osdmap": {
			"num_osds": 1200,
			"num_up_osds": 1200,
			"num_in_osds": 1190,
			"num_remapped_pgs": 10
		}
	},
	"health": { "overall_status": "HEALTH_OK" } }`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`health_status 0`),
			},
		},
		{
			input: `
{
	"osdmap": {
		"osdmap": {
			"num_osds": 1200,
			"num_up_osds": 1200,
			"num_in_osds": 1190,
			"num_remapped_pgs": 10
		}
	},
	"health": { "overall_status": "HEALTH_WARN" } }`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`health_status 1`),
			},
		},
		{
			input: `
{
	"osdmap": {
		"osdmap": {
			"num_osds": 1200,
			"num_up_osds": 1200,
			"num_in_osds": 1190,
			"num_remapped_pgs": 10
		}
	},
	"health": { "overall_status": "HEALTH_ERR" } }`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`health_status 2`),
			},
		},
		{
			input: `
$ sudo ceph -s
    cluster eff51be8-938a-4afa-b0d1-7a580b4ceb37
     health HEALTH_OK
     monmap e3: 3 mons at {mon01,mon02,mon03}
  recovery io 5779 MB/s, 4 keys/s, 1522 objects/s
  client io 4273 kB/s rd, 2740 MB/s wr, 2863 op/s
`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`recovery_io_bytes 5.779e`),
				regexp.MustCompile(`recovery_io_keys 4`),
				regexp.MustCompile(`recovery_io_objects 1522`),
				regexp.MustCompile(`client_io_ops 2863`),
				regexp.MustCompile(`client_io_read_bytes 4.273e`),
				regexp.MustCompile(`client_io_write_bytes 2.74e`),
			},
		},
		{
			input: `
$ sudo ceph -s
    cluster eff51be8-938a-4afa-b0d1-7a580b4ceb37
     health HEALTH_OK
     monmap e3: 3 mons at {mon01,mon02,mon03}
  recovery io 5779 MB/s, 4 keys/s, 1522 objects/s
  client io 2863 op/s rd, 5847 op/s wr
`,
			regexes: []*regexp.Regexp{
				regexp.MustCompile(`recovery_io_bytes 5.779e`),
				regexp.MustCompile(`recovery_io_keys 4`),
				regexp.MustCompile(`recovery_io_objects 1522`),
				regexp.MustCompile(`client_io_ops 8710`),
				regexp.MustCompile(`client_io_read_ops 2863`),
				regexp.MustCompile(`client_io_write_ops 5847`),
			},
		},
	} {
		func() {
			collector := NewClusterHealthCollector(NewNoopConn(tt.input))
			if err := prometheus.Register(collector); err != nil {
				t.Fatalf("collector failed to register: %s", err)
			}
			defer prometheus.Unregister(collector)

			server := httptest.NewServer(prometheus.Handler())
			defer server.Close()

			resp, err := http.Get(server.URL)
			if err != nil {
				t.Fatalf("unexpected failed response from prometheus: %s", err)
			}
			defer resp.Body.Close()

			buf, err := ioutil.ReadAll(resp.Body)
			if err != nil {
				t.Fatalf("failed reading server response: %s", err)
			}

			for _, re := range tt.regexes {
				if !re.Match(buf) {
					t.Errorf("failed matching: %q", re)
				}
			}
		}()
	}
}
Example #20
0
func (s *OortGroupStore) start() {
	s.gs = nil
	runtime.GC()
	log.Println("LocalID appears to be:", s.oort.GetLocalID())
	var err error
	s.msgRing, err = ring.NewTCPMsgRing(&s.TCPMsgRingConfig)
	if err != nil {
		panic(err)
	}
	s.GroupStoreConfig.MsgRing = s.msgRing
	s.msgRing.SetRing(s.oort.Ring())
	var restartChan chan error
	s.gs, restartChan = store.NewGroupStore(&s.GroupStoreConfig)
	// TODO: I'm guessing we'll want to do something more graceful here; but
	// this will work for now since Systemd (or another service manager) should
	// restart the service.
	go func(restartChan chan error) {
		if err := <-restartChan; err != nil {
			panic(err)
		}
	}(restartChan)
	if err := s.gs.Startup(context.Background()); err != nil {
		panic(err)
	}
	go func(t *ring.TCPMsgRing) {
		t.Listen()
		log.Println("TCPMsgRing Listen() returned, shutdown?")
	}(s.msgRing)
	go func(t *ring.TCPMsgRing) {
		mValues := prometheus.NewGauge(prometheus.GaugeOpts{
			Name: "Values",
			Help: "Current number of values stored.",
		})
		mValueBytes := prometheus.NewGauge(prometheus.GaugeOpts{
			Name: "ValueBytes",
			Help: "Current number of bytes for the values stored.",
		})
		mLookups := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "Lookups",
			Help: "Count of lookup requests executed.",
		})
		mLookupErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "LookupErrors",
			Help: "Count of lookup requests executed resulting in errors.",
		})
		mLookupGroups := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "LookupGroups",
			Help: "Count of lookup-group requests executed.",
		})
		mLookupGroupItems := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "LookupGroupItems",
			Help: "Count of items lookup-group requests have returned.",
		})
		mLookupGroupErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "LookupGroupErrors",
			Help: "Count of errors lookup-group requests have returned.",
		})
		mReads := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "Reads",
			Help: "Count of read requests executed.",
		})
		mReadErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "ReadErrors",
			Help: "Count of read requests executed resulting in errors.",
		})
		mReadGroups := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "ReadGroups",
			Help: "Count of read-group requests executed.",
		})
		mReadGroupItems := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "ReadGroupItems",
			Help: "Count of items read-group requests have returned.",
		})
		mReadGroupErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "ReadGroupErrors",
			Help: "Count of errors read-group requests have returned.",
		})
		mWrites := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "Writes",
			Help: "Count of write requests executed.",
		})
		mWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "WriteErrors",
			Help: "Count of write requests executed resulting in errors.",
		})
		mWritesOverridden := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "WritesOverridden",
			Help: "Count of write requests that were outdated or repeated.",
		})
		mDeletes := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "Deletes",
			Help: "Count of delete requests executed.",
		})
		mDeleteErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "DeleteErrors",
			Help: "Count of delete requests executed resulting in errors.",
		})
		mDeletesOverridden := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "DeletesOverridden",
			Help: "Count of delete requests that were outdated or repeated.",
		})
		mOutBulkSets := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "OutBulkSets",
			Help: "Count of outgoing bulk-set messages in response to incoming pull replication messages.",
		})
		mOutBulkSetValues := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "OutBulkSetValues",
			Help: "Count of values in outgoing bulk-set messages; these bulk-set messages are those in response to incoming pull-replication messages.",
		})
		mOutBulkSetPushes := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "OutBulkSetPushes",
			Help: "Count of outgoing bulk-set messages due to push replication.",
		})
		mOutBulkSetPushValues := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "OutBulkSetPushValues",
			Help: "Count of values in outgoing bulk-set messages; these bulk-set messages are those due to push replication.",
		})
		mInBulkSets := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "InBulkSets",
			Help: "Count of incoming bulk-set messages.",
		})
		mInBulkSetDrops := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "InBulkSetDrops",
			Help: "Count of incoming bulk-set messages dropped due to the local system being overworked at the time.",
		})
		mInBulkSetInvalids := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "InBulkSetInvalids",
			Help: "Count of incoming bulk-set messages that couldn't be parsed.",
		})
		mInBulkSetWrites := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "InBulkSetWrites",
			Help: "Count of writes due to incoming bulk-set messages.",
		})
		mInBulkSetWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "InBulkSetWriteErrors",
			Help: "Count of errors returned from writes due to incoming bulk-set messages.",
		})
		mInBulkSetWritesOverridden := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "InBulkSetWritesOverridden",
			Help: "Count of writes from incoming bulk-set messages that result in no change.",
		})
		mOutBulkSetAcks := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "OutBulkSetAcks",
			Help: "Count of outgoing bulk-set-ack messages.",
		})
		mInBulkSetAcks := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "InBulkSetAcks",
			Help: "Count of incoming bulk-set-ack messages.",
		})
		mInBulkSetAckDrops := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "InBulkSetAckDrops",
			Help: "Count of incoming bulk-set-ack messages dropped due to the local system being overworked at the time.",
		})
		mInBulkSetAckInvalids := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "InBulkSetAckInvalids",
			Help: "Count of incoming bulk-set-ack messages that couldn't be parsed.",
		})
		mInBulkSetAckWrites := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "InBulkSetAckWrites",
			Help: "Count of writes (for local removal) due to incoming bulk-set-ack messages.",
		})
		mInBulkSetAckWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "InBulkSetAckWriteErrors",
			Help: "Count of errors returned from writes due to incoming bulk-set-ack messages.",
		})
		mInBulkSetAckWritesOverridden := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "InBulkSetAckWritesOverridden",
			Help: "Count of writes from incoming bulk-set-ack messages that result in no change.",
		})
		mOutPullReplications := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "OutPullReplications",
			Help: "Count of outgoing pull-replication messages.",
		})
		mOutPullReplicationSeconds := prometheus.NewGauge(prometheus.GaugeOpts{
			Name: "OutPullReplicationSeconds",
			Help: "How long the last out pull replication pass took.",
		})
		mInPullReplications := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "InPullReplications",
			Help: "Count of incoming pull-replication messages.",
		})
		mInPullReplicationDrops := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "InPullReplicationDrops",
			Help: "Count of incoming pull-replication messages droppped due to the local system being overworked at the time.",
		})
		mInPullReplicationInvalids := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "InPullReplicationInvalids",
			Help: "Count of incoming pull-replication messages that couldn't be parsed.",
		})
		mExpiredDeletions := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "ExpiredDeletions",
			Help: "Count of recent deletes that have become old enough to be completely discarded.",
		})
		mCompactions := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "Compactions",
			Help: "Count of disk file sets compacted due to their contents exceeding a staleness threshold. For example, this happens when enough of the values have been overwritten or deleted in more recent operations.",
		})
		mSmallFileCompactions := prometheus.NewCounter(prometheus.CounterOpts{
			Name: "SmallFileCompactions",
			Help: "Count of disk file sets compacted due to the entire file size being too small. For example, this may happen when the store is shutdown and restarted.",
		})
		prometheus.Register(mValues)
		prometheus.Register(mValueBytes)
		prometheus.Register(mLookups)
		prometheus.Register(mLookupErrors)
		prometheus.Register(mLookupGroups)
		prometheus.Register(mLookupGroupItems)
		prometheus.Register(mLookupGroupErrors)
		prometheus.Register(mReads)
		prometheus.Register(mReadErrors)
		prometheus.Register(mReadGroups)
		prometheus.Register(mReadGroupItems)
		prometheus.Register(mReadGroupErrors)
		prometheus.Register(mWrites)
		prometheus.Register(mWriteErrors)
		prometheus.Register(mWritesOverridden)
		prometheus.Register(mDeletes)
		prometheus.Register(mDeleteErrors)
		prometheus.Register(mDeletesOverridden)
		prometheus.Register(mOutBulkSets)
		prometheus.Register(mOutBulkSetValues)
		prometheus.Register(mOutBulkSetPushes)
		prometheus.Register(mOutBulkSetPushValues)
		prometheus.Register(mInBulkSets)
		prometheus.Register(mInBulkSetDrops)
		prometheus.Register(mInBulkSetInvalids)
		prometheus.Register(mInBulkSetWrites)
		prometheus.Register(mInBulkSetWriteErrors)
		prometheus.Register(mInBulkSetWritesOverridden)
		prometheus.Register(mOutBulkSetAcks)
		prometheus.Register(mInBulkSetAcks)
		prometheus.Register(mInBulkSetAckDrops)
		prometheus.Register(mInBulkSetAckInvalids)
		prometheus.Register(mInBulkSetAckWrites)
		prometheus.Register(mInBulkSetAckWriteErrors)
		prometheus.Register(mInBulkSetAckWritesOverridden)
		prometheus.Register(mOutPullReplications)
		prometheus.Register(mOutPullReplicationSeconds)
		prometheus.Register(mInPullReplications)
		prometheus.Register(mInPullReplicationDrops)
		prometheus.Register(mInPullReplicationInvalids)
		prometheus.Register(mExpiredDeletions)
		prometheus.Register(mCompactions)
		prometheus.Register(mSmallFileCompactions)
		tcpMsgRingStats := t.Stats(false)
		for !tcpMsgRingStats.Shutdown {
			time.Sleep(time.Minute)
			tcpMsgRingStats = t.Stats(false)
			log.Printf("%v\n", tcpMsgRingStats)
			stats, err := s.gs.Stats(context.Background(), false)
			if err != nil {
				log.Printf("stats error: %s\n", err)
			} else if s, ok := stats.(*store.GroupStoreStats); ok {
				mValues.Set(float64(s.Values))
				mValueBytes.Set(float64(s.ValueBytes))
				mLookups.Add(float64(s.Lookups))
				mLookupErrors.Add(float64(s.LookupErrors))
				mLookupGroups.Add(float64(s.LookupGroups))
				mLookupGroupItems.Add(float64(s.LookupGroupItems))
				mLookupGroupErrors.Add(float64(s.LookupGroupErrors))
				mReads.Add(float64(s.Reads))
				mReadErrors.Add(float64(s.ReadErrors))
				mReadGroups.Add(float64(s.ReadGroups))
				mReadGroupItems.Add(float64(s.ReadGroupItems))
				mReadGroupErrors.Add(float64(s.ReadGroupErrors))
				mWrites.Add(float64(s.Writes))
				mWriteErrors.Add(float64(s.WriteErrors))
				mWritesOverridden.Add(float64(s.WritesOverridden))
				mDeletes.Add(float64(s.Deletes))
				mDeleteErrors.Add(float64(s.DeleteErrors))
				mDeletesOverridden.Add(float64(s.DeletesOverridden))
				mOutBulkSets.Add(float64(s.OutBulkSets))
				mOutBulkSetValues.Add(float64(s.OutBulkSetValues))
				mOutBulkSetPushes.Add(float64(s.OutBulkSetPushes))
				mOutBulkSetPushValues.Add(float64(s.OutBulkSetPushValues))
				mInBulkSets.Add(float64(s.InBulkSets))
				mInBulkSetDrops.Add(float64(s.InBulkSetDrops))
				mInBulkSetInvalids.Add(float64(s.InBulkSetInvalids))
				mInBulkSetWrites.Add(float64(s.InBulkSetWrites))
				mInBulkSetWriteErrors.Add(float64(s.InBulkSetWriteErrors))
				mInBulkSetWritesOverridden.Add(float64(s.InBulkSetWritesOverridden))
				mOutBulkSetAcks.Add(float64(s.OutBulkSetAcks))
				mInBulkSetAcks.Add(float64(s.InBulkSetAcks))
				mInBulkSetAckDrops.Add(float64(s.InBulkSetAckDrops))
				mInBulkSetAckInvalids.Add(float64(s.InBulkSetAckInvalids))
				mInBulkSetAckWrites.Add(float64(s.InBulkSetAckWrites))
				mInBulkSetAckWriteErrors.Add(float64(s.InBulkSetAckWriteErrors))
				mInBulkSetAckWritesOverridden.Add(float64(s.InBulkSetAckWritesOverridden))
				mOutPullReplications.Add(float64(s.OutPullReplications))
				mOutPullReplicationSeconds.Set(float64(s.OutPullReplicationNanoseconds) / 1000000000)
				mInPullReplications.Add(float64(s.InPullReplications))
				mInPullReplicationDrops.Add(float64(s.InPullReplicationDrops))
				mInPullReplicationInvalids.Add(float64(s.InPullReplicationInvalids))
				mExpiredDeletions.Add(float64(s.ExpiredDeletions))
				mCompactions.Add(float64(s.Compactions))
				mSmallFileCompactions.Add(float64(s.SmallFileCompactions))
			} else {
				log.Printf("%s\n", stats)
			}
		}
		prometheus.Unregister(mValues)
		prometheus.Unregister(mValueBytes)
		prometheus.Unregister(mLookups)
		prometheus.Unregister(mLookupErrors)
		prometheus.Unregister(mLookupGroups)
		prometheus.Unregister(mLookupGroupItems)
		prometheus.Unregister(mLookupGroupErrors)
		prometheus.Unregister(mReads)
		prometheus.Unregister(mReadErrors)
		prometheus.Unregister(mReadGroups)
		prometheus.Unregister(mReadGroupItems)
		prometheus.Unregister(mReadGroupErrors)
		prometheus.Unregister(mWrites)
		prometheus.Unregister(mWriteErrors)
		prometheus.Unregister(mWritesOverridden)
		prometheus.Unregister(mDeletes)
		prometheus.Unregister(mDeleteErrors)
		prometheus.Unregister(mDeletesOverridden)
		prometheus.Unregister(mOutBulkSets)
		prometheus.Unregister(mOutBulkSetValues)
		prometheus.Unregister(mOutBulkSetPushes)
		prometheus.Unregister(mOutBulkSetPushValues)
		prometheus.Unregister(mInBulkSets)
		prometheus.Unregister(mInBulkSetDrops)
		prometheus.Unregister(mInBulkSetInvalids)
		prometheus.Unregister(mInBulkSetWrites)
		prometheus.Unregister(mInBulkSetWriteErrors)
		prometheus.Unregister(mInBulkSetWritesOverridden)
		prometheus.Unregister(mOutBulkSetAcks)
		prometheus.Unregister(mInBulkSetAcks)
		prometheus.Unregister(mInBulkSetAckDrops)
		prometheus.Unregister(mInBulkSetAckInvalids)
		prometheus.Unregister(mInBulkSetAckWrites)
		prometheus.Unregister(mInBulkSetAckWriteErrors)
		prometheus.Unregister(mInBulkSetAckWritesOverridden)
		prometheus.Unregister(mOutPullReplications)
		prometheus.Unregister(mOutPullReplicationSeconds)
		prometheus.Unregister(mInPullReplications)
		prometheus.Unregister(mInPullReplicationDrops)
		prometheus.Unregister(mInPullReplicationInvalids)
		prometheus.Unregister(mExpiredDeletions)
		prometheus.Unregister(mCompactions)
		prometheus.Unregister(mSmallFileCompactions)
	}(s.msgRing)
}
func TestClusterUsage(t *testing.T) {
	log.SetOutput(ioutil.Discard)

	for _, tt := range []struct {
		input              string
		reMatch, reUnmatch []*regexp.Regexp
	}{
		{
			input: `
{
	"stats": {
		"total_bytes": 10,
		"total_used_bytes": 6,
		"total_avail_bytes": 4,
		"total_objects": 1
	}
}`,
			reMatch: []*regexp.Regexp{
				regexp.MustCompile(`ceph_cluster_capacity_bytes 10`),
				regexp.MustCompile(`ceph_cluster_used_bytes 6`),
				regexp.MustCompile(`ceph_cluster_available_bytes 4`),
				regexp.MustCompile(`ceph_cluster_objects 1`),
			},
			reUnmatch: []*regexp.Regexp{},
		},
		{
			input: `
{
	"stats": {
		"total_used_bytes": 6,
		"total_avail_bytes": 4,
		"total_objects": 1
	}
}`,
			reMatch: []*regexp.Regexp{
				regexp.MustCompile(`ceph_cluster_capacity_bytes 0`),
				regexp.MustCompile(`ceph_cluster_used_bytes 6`),
				regexp.MustCompile(`ceph_cluster_available_bytes 4`),
				regexp.MustCompile(`ceph_cluster_objects 1`),
			},
			reUnmatch: []*regexp.Regexp{},
		},
		{
			input: `
{
	"stats": {
		"total_bytes": 10,
		"total_avail_bytes": 4,
		"total_objects": 1
	}
}`,
			reMatch: []*regexp.Regexp{
				regexp.MustCompile(`ceph_cluster_capacity_bytes 10`),
				regexp.MustCompile(`ceph_cluster_used_bytes 0`),
				regexp.MustCompile(`ceph_cluster_available_bytes 4`),
				regexp.MustCompile(`ceph_cluster_objects 1`),
			},
			reUnmatch: []*regexp.Regexp{},
		},
		{
			input: `
{
	"stats": {
		"total_bytes": 10,
		"total_used_bytes": 6,
		"total_objects": 1
	}
}`,
			reMatch: []*regexp.Regexp{
				regexp.MustCompile(`ceph_cluster_capacity_bytes 10`),
				regexp.MustCompile(`ceph_cluster_used_bytes 6`),
				regexp.MustCompile(`ceph_cluster_available_bytes 0`),
				regexp.MustCompile(`ceph_cluster_objects 1`),
			},
			reUnmatch: []*regexp.Regexp{},
		},
		{
			input: `
{
	"stats": {
		"total_bytes": 10,
		"total_used_bytes": 6,
		"total_avail_bytes": 4
	}
}`,
			reMatch: []*regexp.Regexp{
				regexp.MustCompile(`ceph_cluster_capacity_bytes 10`),
				regexp.MustCompile(`ceph_cluster_used_bytes 6`),
				regexp.MustCompile(`ceph_cluster_available_bytes 4`),
				regexp.MustCompile(`ceph_cluster_objects 0`),
			},
			reUnmatch: []*regexp.Regexp{},
		},
		{
			input: `
{
	"stats": {{{
		"total_bytes": 10,
		"total_used_bytes": 6,
		"total_avail_bytes": 4,
		"total_objects": 1
	}
}`,
			reMatch: []*regexp.Regexp{},
			reUnmatch: []*regexp.Regexp{
				regexp.MustCompile(`ceph_cluster_capacity_bytes`),
				regexp.MustCompile(`ceph_cluster_used_bytes`),
				regexp.MustCompile(`ceph_cluster_available_bytes`),
				regexp.MustCompile(`ceph_cluster_objects`),
			},
		},
	} {
		func() {
			collector := NewClusterUsageCollector(NewNoopConn(tt.input))
			if err := prometheus.Register(collector); err != nil {
				t.Fatalf("collector failed to register: %s", err)
			}
			defer prometheus.Unregister(collector)

			server := httptest.NewServer(prometheus.Handler())
			defer server.Close()

			resp, err := http.Get(server.URL)
			if err != nil {
				t.Fatalf("unexpected failed response from prometheus: %s", err)
			}
			defer resp.Body.Close()

			buf, err := ioutil.ReadAll(resp.Body)
			if err != nil {
				t.Fatalf("failed reading server response: %s", err)
			}

			for _, re := range tt.reMatch {
				if !re.Match(buf) {
					t.Errorf("failed matching: %q", re)
				}
			}
			for _, re := range tt.reUnmatch {
				if re.Match(buf) {
					t.Errorf("should not have matched: %q", re)
				}
			}
		}()
	}
}