Ejemplo n.º 1
0
func newMetrics(r prometheus.Registerer) *metrics {
	m := &metrics{}

	m.gcDuration = prometheus.NewSummary(prometheus.SummaryOpts{
		Name: "alertmanager_silences_gc_duration_seconds",
		Help: "Duration of the last silence garbage collection cycle.",
	})
	m.snapshotDuration = prometheus.NewSummary(prometheus.SummaryOpts{
		Name: "alertmanager_silences_snapshot_duration_seconds",
		Help: "Duration of the last silence snapshot.",
	})
	m.queriesTotal = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "alertmanager_silences_queries_total",
		Help: "How many silence queries were received.",
	})
	m.queryErrorsTotal = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "alertmanager_silences_query_errors_total",
		Help: "How many silence received queries did not succeed.",
	})
	m.queryDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
		Name: "alertmanager_silences_query_duration_seconds",
		Help: "Duration of silence query evaluation.",
	})

	if r != nil {
		r.MustRegister(
			m.gcDuration,
			m.snapshotDuration,
			m.queriesTotal,
			m.queryErrorsTotal,
			m.queryDuration,
		)
	}
	return m
}
Ejemplo n.º 2
0
func newMetrics(r prometheus.Registerer) *metrics {
	m := &metrics{}

	m.gcDuration = prometheus.NewSummary(prometheus.SummaryOpts{
		Name: "alertmanager_nflog_gc_duration_seconds",
		Help: "Duration of the last notification log garbage collection cycle.",
	})
	m.snapshotDuration = prometheus.NewSummary(prometheus.SummaryOpts{
		Name: "alertmanager_nflog_snapshot_duration_seconds",
		Help: "Duration of the last notification log snapshot.",
	})
	m.queriesTotal = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "alertmanager_nflog_queries_total",
		Help: "Number of notification log queries were received.",
	})
	m.queryErrorsTotal = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "alertmanager_nflog_query_errors_total",
		Help: "Number notification log received queries that failed.",
	})
	m.queryDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
		Name: "alertmanager_nflog_query_duration_seconds",
		Help: "Duration of notification log query evaluation.",
	})

	if r != nil {
		r.MustRegister(
			m.gcDuration,
			m.snapshotDuration,
			m.queriesTotal,
			m.queryErrorsTotal,
			m.queryDuration,
		)
	}
	return m
}
// InstrumentHandlerFuncWithOpts works like InstrumentHandlerFunc but provides
// more flexibility (at the cost of a more complex call syntax).
//
// As InstrumentHandlerFunc, this function registers four metric collectors, but it
// uses the provided SummaryOpts to create them. However, the fields "Name" and
// "Help" in the SummaryOpts are ignored. "Name" is replaced by
// "requests_total", "request_duration_microseconds", "request_size_bytes", and
// "response_size_bytes", respectively. "Help" is replaced by an appropriate
// help string. The names of the variable labels of the http_requests_total
// CounterVec are "method" (get, post, etc.), and "code" (HTTP status code).
//
// If InstrumentHandlerWithOpts is called as follows, it mimics exactly the
// behavior of InstrumentHandler:
//
//     prometheus.InstrumentHandlerWithOpts(
//         prometheus.SummaryOpts{
//              Subsystem:   "http",
//              ConstLabels: prometheus.Labels{"handler": handlerName},
//         },
//         handler,
//     )
//
// Technical detail: "requests_total" is a CounterVec, not a SummaryVec, so it
// cannot use SummaryOpts. Instead, a CounterOpts struct is created internally,
// and all its fields are set to the equally named fields in the provided
// SummaryOpts.
func InstrumentHandlerFuncWithOpts(opts prometheus.SummaryOpts, handlerFunc gin.HandlerFunc) gin.HandlerFunc {
	reqCnt := prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Namespace:   opts.Namespace,
			Subsystem:   opts.Subsystem,
			Name:        "requests_total",
			Help:        "Total number of HTTP requests made.",
			ConstLabels: opts.ConstLabels,
		},
		instLabels,
	)

	opts.Name = "request_duration_microseconds"
	opts.Help = "The HTTP request latencies in microseconds."
	reqDur := prometheus.NewSummary(opts)

	opts.Name = "request_size_bytes"
	opts.Help = "The HTTP request sizes in bytes."
	reqSz := prometheus.NewSummary(opts)

	opts.Name = "response_size_bytes"
	opts.Help = "The HTTP response sizes in bytes."
	resSz := prometheus.NewSummary(opts)

	regReqCnt := prometheus.MustRegisterOrGet(reqCnt).(*prometheus.CounterVec)
	regReqDur := prometheus.MustRegisterOrGet(reqDur).(prometheus.Summary)
	regReqSz := prometheus.MustRegisterOrGet(reqSz).(prometheus.Summary)
	regResSz := prometheus.MustRegisterOrGet(resSz).(prometheus.Summary)

	return func(c *gin.Context) {
		now := time.Now()

		r := c.Request

		out := make(chan int)
		urlLen := 0
		if r.URL != nil {
			urlLen = len(r.URL.String())
		}
		go computeApproximateRequestSize(r, out, urlLen)

		handlerFunc(c)

		elapsed := float64(time.Since(now)) / float64(time.Microsecond)

		method := sanitizeMethod(r.Method)
		code := sanitizeCode(c.Writer.Status())
		regReqCnt.WithLabelValues(method, code).Inc()
		regReqDur.Observe(elapsed)
		regResSz.Observe(float64(c.Writer.Size()))
		regReqSz.Observe(float64(<-out))
	}
}
Ejemplo n.º 4
0
func TestSensorRecordSummary(t *testing.T) {
	testServer := httptest.NewServer(prometheus.UninstrumentedHandler())
	defer testServer.Close()

	sensor := &Sensor{
		Type: "summary",
		collector: prometheus.NewSummary(prometheus.SummaryOpts{
			Namespace: "telemetry",
			Subsystem: "sensors",
			Name:      "TestSensorRecordSummary",
			Help:      "help",
		})}
	prometheus.MustRegister(sensor.collector)
	patt := `telemetry_sensors_TestSensorRecordSummary{quantile="([\.0-9]*)"} ([0-9\.]*)`

	// need a bunch of metrics to make quantiles make any sense
	for i := 1; i <= 10; i++ {
		sensor.record(fmt.Sprintf("%v", i))
	}
	resp := getFromTestServer(t, testServer)
	expected := [][]string{{"0.5", "5"}, {"0.9", "9"}, {"0.99", "9"}}
	if !checkBuckets(resp, patt, expected) {
		t.Fatalf("Failed to get match for sensor in response")
	}

	for i := 1; i <= 5; i++ {
		// add a new record for each one in the bottom half
		sensor.record(fmt.Sprintf("%v", i))
	}
	resp = getFromTestServer(t, testServer)
	expected = [][]string{{"0.5", "4"}, {"0.9", "8"}, {"0.99", "9"}}
	if !checkBuckets(resp, patt, expected) {
		t.Fatalf("Failed to get match for sensor in response")
	}
}
Ejemplo n.º 5
0
func (_ prometheusMetricsProvider) NewWorkDurationMetric(name string) workqueue.SummaryMetric {
	workDuration := prometheus.NewSummary(prometheus.SummaryOpts{
		Subsystem: name,
		Name:      "work_duration",
		Help:      "How long processing an item from workqueue" + name + " takes.",
	})
	prometheus.Register(workDuration)
	return workDuration
}
Ejemplo n.º 6
0
// NewSensors creates new sensors from a raw config
func NewSensors(raw []interface{}) ([]*Sensor, error) {
	var sensors []*Sensor
	if err := utils.DecodeRaw(raw, &sensors); err != nil {
		return nil, fmt.Errorf("Sensor configuration error: %v", err)
	}
	for _, s := range sensors {
		check, err := commands.NewCommand(s.CheckExec, s.Timeout)
		if err != nil {
			return nil, fmt.Errorf("could not parse check in sensor %s: %s", s.Name, err)
		}
		check.Name = fmt.Sprintf("%s.sensor", s.Name)
		s.checkCmd = check

		// the prometheus client lib's API here is baffling... they don't expose
		// an interface or embed their Opts type in each of the Opts "subtypes",
		// so we can't share the initialization.
		switch {
		case s.Type == "counter":
			s.collector = prometheus.NewCounter(prometheus.CounterOpts{
				Namespace: s.Namespace,
				Subsystem: s.Subsystem,
				Name:      s.Name,
				Help:      s.Help,
			})
		case s.Type == "gauge":
			s.collector = prometheus.NewGauge(prometheus.GaugeOpts{
				Namespace: s.Namespace,
				Subsystem: s.Subsystem,
				Name:      s.Name,
				Help:      s.Help,
			})
		case s.Type == "histogram":
			s.collector = prometheus.NewHistogram(prometheus.HistogramOpts{
				Namespace: s.Namespace,
				Subsystem: s.Subsystem,
				Name:      s.Name,
				Help:      s.Help,
			})
		case s.Type == "summary":
			s.collector = prometheus.NewSummary(prometheus.SummaryOpts{
				Namespace: s.Namespace,
				Subsystem: s.Subsystem,
				Name:      s.Name,
				Help:      s.Help,
			})
		default:
			return nil, fmt.Errorf("invalid sensor type: %s", s.Type)
		}
		// we're going to unregister before every attempt to register
		// so that we can reload config
		prometheus.Unregister(s.collector)
		if err := prometheus.Register(s.collector); err != nil {
			return nil, err
		}
	}
	return sensors, nil
}
Ejemplo n.º 7
0
func (_ prometheusMetricsProvider) NewLatencyMetric(name string) workqueue.SummaryMetric {
	latency := prometheus.NewSummary(prometheus.SummaryOpts{
		Subsystem: name,
		Name:      "queue_latency",
		Help:      "How long an item stays in workqueue" + name + " before being requested.",
	})
	prometheus.Register(latency)
	return latency
}
Ejemplo n.º 8
0
// NewStorageQueueManager builds a new StorageQueueManager.
func NewStorageQueueManager(tsdb StorageClient, queueCapacity int) *StorageQueueManager {
	constLabels := prometheus.Labels{
		"type": tsdb.Name(),
	}

	return &StorageQueueManager{
		tsdb:          tsdb,
		queue:         make(chan *clientmodel.Sample, queueCapacity),
		sendSemaphore: make(chan bool, maxConcurrentSends),
		drained:       make(chan bool),

		samplesCount: prometheus.NewCounterVec(
			prometheus.CounterOpts{
				Namespace:   namespace,
				Subsystem:   subsystem,
				Name:        "sent_samples_total",
				Help:        "Total number of processed samples to be sent to remote storage.",
				ConstLabels: constLabels,
			},
			[]string{result},
		),
		sendLatency: prometheus.NewSummary(prometheus.SummaryOpts{
			Namespace:   namespace,
			Subsystem:   subsystem,
			Name:        "sent_latency_milliseconds",
			Help:        "Latency quantiles for sending sample batches to the remote storage.",
			ConstLabels: constLabels,
		}),
		sendErrors: prometheus.NewCounter(prometheus.CounterOpts{
			Namespace:   namespace,
			Subsystem:   subsystem,
			Name:        "sent_errors_total",
			Help:        "Total number of errors sending sample batches to the remote storage.",
			ConstLabels: constLabels,
		}),
		queueLength: prometheus.NewGauge(prometheus.GaugeOpts{
			Namespace:   namespace,
			Subsystem:   subsystem,
			Name:        "queue_length",
			Help:        "The number of processed samples queued to be sent to the remote storage.",
			ConstLabels: constLabels,
		}),
		queueCapacity: prometheus.MustNewConstMetric(
			prometheus.NewDesc(
				prometheus.BuildFQName(namespace, subsystem, "queue_capacity"),
				"The capacity of the queue of samples to be sent to the remote storage.",
				nil,
				constLabels,
			),
			prometheus.GaugeValue,
			float64(queueCapacity),
		),
	}
}
Ejemplo n.º 9
0
func (p *Prometheus) registerMetrics(subsystem string) {

	p.reqCnt = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Subsystem: subsystem,
			Name:      "requests_total",
			Help:      "How many HTTP requests processed, partitioned by status code and HTTP method.",
		},
		[]string{"code", "method", "handler"},
	)
	prometheus.MustRegister(p.reqCnt)

	p.reqDur = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Subsystem: subsystem,
			Name:      "request_duration_seconds",
			Help:      "The HTTP request latencies in seconds.",
		},
	)
	prometheus.MustRegister(p.reqDur)

	p.reqSz = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Subsystem: subsystem,
			Name:      "request_size_bytes",
			Help:      "The HTTP request sizes in bytes.",
		},
	)
	prometheus.MustRegister(p.reqSz)

	p.resSz = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Subsystem: subsystem,
			Name:      "response_size_bytes",
			Help:      "The HTTP response sizes in bytes.",
		},
	)
	prometheus.MustRegister(p.resSz)

}
Ejemplo n.º 10
0
// NewHandler constructs a new Handler.
func New(o *HandlerOptions) *Handler {
	ctx, cancel := context.WithCancel(context.Background())

	return &Handler{
		queue:  make(model.Alerts, 0, o.QueueCapacity),
		ctx:    ctx,
		cancel: cancel,
		more:   make(chan struct{}, 1),
		opts:   o,

		latency: prometheus.NewSummary(prometheus.SummaryOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "latency_seconds",
			Help:      "Latency quantiles for sending alert notifications (not including dropped notifications).",
		}),
		errors: prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "errors_total",
			Help:      "Total number of errors sending alert notifications.",
		}),
		sent: prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "sent_total",
			Help:      "Total number of alerts successfully sent.",
		}),
		dropped: prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "dropped_total",
			Help:      "Total number of alerts dropped due to alert manager missing in configuration.",
		}),
		queueLength: prometheus.NewGauge(prometheus.GaugeOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "queue_length",
			Help:      "The number of alert notifications in the queue.",
		}),
		queueCapacity: prometheus.MustNewConstMetric(
			prometheus.NewDesc(
				prometheus.BuildFQName(namespace, subsystem, "queue_capacity"),
				"The capacity of the alert notifications queue.",
				nil, nil,
			),
			prometheus.GaugeValue,
			float64(o.QueueCapacity),
		),
	}
}
Ejemplo n.º 11
0
func newQueueMetrics(name string) queueMetrics {
	var ret *defaultQueueMetrics
	if len(name) == 0 {
		return ret
	}

	ret = &defaultQueueMetrics{
		depth: prometheus.NewGauge(prometheus.GaugeOpts{
			Subsystem: name,
			Name:      "depth",
			Help:      "Current depth of workqueue: " + name,
		}),
		adds: prometheus.NewCounter(prometheus.CounterOpts{
			Subsystem: name,
			Name:      "adds",
			Help:      "Total number of adds handled by workqueue: " + name,
		}),
		latency: prometheus.NewSummary(prometheus.SummaryOpts{
			Subsystem: name,
			Name:      "queue_latency",
			Help:      "How long an item stays in workqueue" + name + " before being requested.",
		}),
		workDuration: prometheus.NewSummary(prometheus.SummaryOpts{
			Subsystem: name,
			Name:      "work_duration",
			Help:      "How long processing an item from workqueue" + name + " takes.",
		}),
		addTimes:             map[t]time.Time{},
		processingStartTimes: map[t]time.Time{},
	}

	prometheus.Register(ret.depth)
	prometheus.Register(ret.adds)
	prometheus.Register(ret.latency)
	prometheus.Register(ret.workDuration)

	return ret
}
Ejemplo n.º 12
0
func (group *Group) GetSummary(name string, description string) prometheus.Summary {
	summary := group.Summaries[name]

	if summary == nil {
		summary = prometheus.NewSummary(prometheus.SummaryOpts{
			Namespace: "mongodb",
			Subsystem: group.Name,
			Name:      name,
			Help:      description,
		})
		group.Summaries[name] = summary
	}

	return summary
}
Ejemplo n.º 13
0
func (p *PrometheusSink) AddSample(parts []string, val float32) {
	p.mu.Lock()
	defer p.mu.Unlock()
	key := p.flattenKey(parts)
	g, ok := p.summaries[key]
	if !ok {
		g = prometheus.NewSummary(prometheus.SummaryOpts{
			Name:   key,
			Help:   key,
			MaxAge: 10 * time.Second,
		})
		prometheus.MustRegister(g)
		p.summaries[key] = g
	}
	g.Observe(float64(val))
}
Ejemplo n.º 14
0
func (c *SummaryContainer) Get(metricName string, labels prometheus.Labels) prometheus.Summary {
	hash := hashNameAndLabels(metricName, labels)
	summary, ok := c.Elements[hash]
	if !ok {
		summary = prometheus.NewSummary(
			prometheus.SummaryOpts{
				Name:        metricName,
				Help:        defaultHelp,
				ConstLabels: labels,
			})
		c.Elements[hash] = summary
		if err := prometheus.Register(summary); err != nil {
			log.Fatalf(regErrF, metricName, err)
		}
	}
	return summary
}
Ejemplo n.º 15
0
// NewNotificationHandler constructs a new NotificationHandler.
func NewNotificationHandler(o *NotificationHandlerOptions) *NotificationHandler {
	return &NotificationHandler{
		alertmanagerURL:      strings.TrimRight(o.AlertmanagerURL, "/"),
		pendingNotifications: make(chan NotificationReqs, o.QueueCapacity),

		httpClient: httputil.NewDeadlineClient(o.Deadline, nil),

		notificationLatency: prometheus.NewSummary(prometheus.SummaryOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "latency_milliseconds",
			Help:      "Latency quantiles for sending alert notifications (not including dropped notifications).",
		}),
		notificationErrors: prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "errors_total",
			Help:      "Total number of errors sending alert notifications.",
		}),
		notificationDropped: prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "dropped_total",
			Help:      "Total number of alert notifications dropped due to alert manager missing in configuration.",
		}),
		notificationsQueueLength: prometheus.NewGauge(prometheus.GaugeOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "queue_length",
			Help:      "The number of alert notifications in the queue.",
		}),
		notificationsQueueCapacity: prometheus.MustNewConstMetric(
			prometheus.NewDesc(
				prometheus.BuildFQName(namespace, subsystem, "queue_capacity"),
				"The capacity of the alert notifications queue.",
				nil, nil,
			),
			prometheus.GaugeValue,
			float64(o.QueueCapacity),
		),
		stopped: make(chan struct{}),
	}
}
Ejemplo n.º 16
0
func ExampleSummary() {
	temps := prometheus.NewSummary(prometheus.SummaryOpts{
		Name:       "pond_temperature_celsius",
		Help:       "The temperature of the frog pond.",
		Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
	})

	// Simulate some observations.
	for i := 0; i < 1000; i++ {
		temps.Observe(30 + math.Floor(120*math.Sin(float64(i)*0.1))/10)
	}

	// Just for demonstration, let's check the state of the summary by
	// (ab)using its Write method (which is usually only used by Prometheus
	// internally).
	metric := &dto.Metric{}
	temps.Write(metric)
	fmt.Println(proto.MarshalTextString(metric))

	// Output:
	// summary: <
	//   sample_count: 1000
	//   sample_sum: 29969.50000000001
	//   quantile: <
	//     quantile: 0.5
	//     value: 31.1
	//   >
	//   quantile: <
	//     quantile: 0.9
	//     value: 41.3
	//   >
	//   quantile: <
	//     quantile: 0.99
	//     value: 41.9
	//   >
	// >
}
Ejemplo n.º 17
0
var (
	gceSDScrapesCount = prometheus.NewCounter(
		prometheus.CounterOpts{
			Namespace: namespace,
			Name:      "gce_sd_scrapes_total",
			Help:      "The number of GCE-SD scrapes.",
		})
	gceSDScrapeFailuresCount = prometheus.NewCounter(
		prometheus.CounterOpts{
			Namespace: namespace,
			Name:      "gce_sd_scrape_failures_total",
			Help:      "The number of GCE-SD scrape failures.",
		})
	gceSDScrapeDuration = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Namespace: namespace,
			Name:      "gce_sd_scrape_duration",
			Help:      "The duration of a GCE-SD scrape in seconds.",
		})
)

func init() {
	prometheus.MustRegister(gceSDScrapesCount)
	prometheus.MustRegister(gceSDScrapeFailuresCount)
	prometheus.MustRegister(gceSDScrapeDuration)
}

// GCEDiscovery periodically performs GCE-SD requests. It implements
// the TargetProvider interface.
type GCEDiscovery struct {
	project      string
	zone         string
Ejemplo n.º 18
0
import (
	"sync"
	"time"

	"github.com/prometheus/client_golang/prometheus"
)

const (
	schedulerSubsystem = "mesos_scheduler"
)

var (
	QueueWaitTime = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Subsystem: schedulerSubsystem,
			Name:      "queue_wait_time_microseconds",
			Help:      "Launch queue wait time in microseconds",
		},
	)
	BindLatency = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Subsystem: schedulerSubsystem,
			Name:      "bind_latency_microseconds",
			Help:      "Latency in microseconds between pod-task launch and pod binding.",
		},
	)
	StatusUpdates = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Subsystem: schedulerSubsystem,
			Name:      "status_updates",
			Help:      "Counter of TaskStatus updates, broken out by source, reason, state.",
Ejemplo n.º 19
0
// newPersistence returns a newly allocated persistence backed by local disk storage, ready to use.
func newPersistence(basePath string, dirty, pedanticChecks bool, shouldSync syncStrategy) (*persistence, error) {
	dirtyPath := filepath.Join(basePath, dirtyFileName)
	versionPath := filepath.Join(basePath, versionFileName)

	if versionData, err := ioutil.ReadFile(versionPath); err == nil {
		if persistedVersion, err := strconv.Atoi(strings.TrimSpace(string(versionData))); err != nil {
			return nil, fmt.Errorf("cannot parse content of %s: %s", versionPath, versionData)
		} else if persistedVersion != Version {
			return nil, fmt.Errorf("found storage version %d on disk, need version %d - please wipe storage or run a version of Prometheus compatible with storage version %d", persistedVersion, Version, persistedVersion)
		}
	} else if os.IsNotExist(err) {
		// No version file found. Let's create the directory (in case
		// it's not there yet) and then check if it is actually
		// empty. If not, we have found an old storage directory without
		// version file, so we have to bail out.
		if err := os.MkdirAll(basePath, 0700); err != nil {
			return nil, err
		}
		fis, err := ioutil.ReadDir(basePath)
		if err != nil {
			return nil, err
		}
		if len(fis) > 0 {
			return nil, fmt.Errorf("could not detect storage version on disk, assuming version 0, need version %d - please wipe storage or run a version of Prometheus compatible with storage version 0", Version)
		}
		// Finally we can write our own version into a new version file.
		file, err := os.Create(versionPath)
		if err != nil {
			return nil, err
		}
		defer file.Close()
		if _, err := fmt.Fprintf(file, "%d\n", Version); err != nil {
			return nil, err
		}
	} else {
		return nil, err
	}

	fLock, dirtyfileExisted, err := flock.New(dirtyPath)
	if err != nil {
		log.Errorf("Could not lock %s, Prometheus already running?", dirtyPath)
		return nil, err
	}
	if dirtyfileExisted {
		dirty = true
	}

	archivedFingerprintToMetrics, err := index.NewFingerprintMetricIndex(basePath)
	if err != nil {
		return nil, err
	}
	archivedFingerprintToTimeRange, err := index.NewFingerprintTimeRangeIndex(basePath)
	if err != nil {
		return nil, err
	}

	p := &persistence{
		basePath: basePath,

		archivedFingerprintToMetrics:   archivedFingerprintToMetrics,
		archivedFingerprintToTimeRange: archivedFingerprintToTimeRange,

		indexingQueue:   make(chan indexingOp, indexingQueueCapacity),
		indexingStopped: make(chan struct{}),
		indexingFlush:   make(chan chan int),

		indexingQueueLength: prometheus.NewGauge(prometheus.GaugeOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "indexing_queue_length",
			Help:      "The number of metrics waiting to be indexed.",
		}),
		indexingQueueCapacity: prometheus.MustNewConstMetric(
			prometheus.NewDesc(
				prometheus.BuildFQName(namespace, subsystem, "indexing_queue_capacity"),
				"The capacity of the indexing queue.",
				nil, nil,
			),
			prometheus.GaugeValue,
			float64(indexingQueueCapacity),
		),
		indexingBatchSizes: prometheus.NewSummary(
			prometheus.SummaryOpts{
				Namespace: namespace,
				Subsystem: subsystem,
				Name:      "indexing_batch_sizes",
				Help:      "Quantiles for indexing batch sizes (number of metrics per batch).",
			},
		),
		indexingBatchDuration: prometheus.NewSummary(
			prometheus.SummaryOpts{
				Namespace: namespace,
				Subsystem: subsystem,
				Name:      "indexing_batch_duration_milliseconds",
				Help:      "Quantiles for batch indexing duration in milliseconds.",
			},
		),
		checkpointDuration: prometheus.NewGauge(prometheus.GaugeOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "checkpoint_duration_milliseconds",
			Help:      "The duration (in milliseconds) it took to checkpoint in-memory metrics and head chunks.",
		}),
		dirtyCounter: prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "inconsistencies_total",
			Help:      "A counter incremented each time an inconsistency in the local storage is detected. If this is greater zero, restart the server as soon as possible.",
		}),
		dirty:          dirty,
		pedanticChecks: pedanticChecks,
		dirtyFileName:  dirtyPath,
		fLock:          fLock,
		shouldSync:     shouldSync,
		// Create buffers of length 3*chunkLenWithHeader by default because that is still reasonably small
		// and at the same time enough for many uses. The contract is to never return buffer smaller than
		// that to the pool so that callers can rely on a minimum buffer size.
		bufPool: sync.Pool{New: func() interface{} { return make([]byte, 0, 3*chunkLenWithHeader) }},
	}

	if p.dirty {
		// Blow away the label indexes. We'll rebuild them later.
		if err := index.DeleteLabelPairFingerprintIndex(basePath); err != nil {
			return nil, err
		}
		if err := index.DeleteLabelNameLabelValuesIndex(basePath); err != nil {
			return nil, err
		}
	}
	labelPairToFingerprints, err := index.NewLabelPairFingerprintIndex(basePath)
	if err != nil {
		return nil, err
	}
	labelNameToLabelValues, err := index.NewLabelNameLabelValuesIndex(basePath)
	if err != nil {
		return nil, err
	}
	p.labelPairToFingerprints = labelPairToFingerprints
	p.labelNameToLabelValues = labelNameToLabelValues

	return p, nil
}
Ejemplo n.º 20
0
	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/common/log"
	"github.com/prometheus/common/model"
	"golang.org/x/net/context"
	"gopkg.in/fsnotify.v1"
	"gopkg.in/yaml.v2"

	"github.com/prometheus/prometheus/config"
)

const fileSDFilepathLabel = model.MetaLabelPrefix + "filepath"

var (
	fileSDScanDuration = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Namespace: namespace,
			Name:      "sd_file_scan_duration_seconds",
			Help:      "The duration of the File-SD scan in seconds.",
		})
	fileSDReadErrorsCount = prometheus.NewCounter(
		prometheus.CounterOpts{
			Namespace: namespace,
			Name:      "sd_file_read_errors_total",
			Help:      "The number of File-SD read errors.",
		})
)

func init() {
	prometheus.MustRegister(fileSDScanDuration)
	prometheus.MustRegister(fileSDReadErrorsCount)
}
Ejemplo n.º 21
0
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package wal

import "github.com/prometheus/client_golang/prometheus"

var (
	syncDurations = prometheus.NewSummary(prometheus.SummaryOpts{
		Namespace: "etcd",
		Subsystem: "wal",
		Name:      "fsync_durations_microseconds",
		Help:      "The latency distributions of fsync called by wal.",
	})
	lastIndexSaved = prometheus.NewGauge(prometheus.GaugeOpts{
		Namespace: "etcd",
		Subsystem: "wal",
		Name:      "last_index_saved",
		Help:      "The index of the last entry saved by wal.",
	})
)

func init() {
	prometheus.MustRegister(syncDurations)
	prometheus.MustRegister(lastIndexSaved)
}
Ejemplo n.º 22
0
// InstrumentRouteFunc works like Prometheus' InstrumentHandlerFunc but wraps
// the go-restful RouteFunction instead of a HandlerFunc
func InstrumentRouteFunc(handlerName string, routeFunc restful.RouteFunction) restful.RouteFunction {
	opts := prometheus.SummaryOpts{
		Subsystem:   "http",
		ConstLabels: prometheus.Labels{"handler": handlerName},
	}

	reqCnt := prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Subsystem:   opts.Subsystem,
			Name:        "requests_total",
			Help:        "Total number of HTTP requests made.",
			ConstLabels: opts.ConstLabels,
		},
		instLabels,
	)

	opts.Name = "request_duration_microseconds"
	opts.Help = "The HTTP request latencies in microseconds."
	reqDur := prometheus.NewSummary(opts)

	opts.Name = "request_size_bytes"
	opts.Help = "The HTTP request sizes in bytes."
	reqSz := prometheus.NewSummary(opts)

	opts.Name = "response_size_bytes"
	opts.Help = "The HTTP response sizes in bytes."
	resSz := prometheus.NewSummary(opts)

	regReqCnt := prometheus.MustRegisterOrGet(reqCnt).(*prometheus.CounterVec)
	regReqDur := prometheus.MustRegisterOrGet(reqDur).(prometheus.Summary)
	regReqSz := prometheus.MustRegisterOrGet(reqSz).(prometheus.Summary)
	regResSz := prometheus.MustRegisterOrGet(resSz).(prometheus.Summary)

	return restful.RouteFunction(func(request *restful.Request, response *restful.Response) {
		now := time.Now()

		delegate := &responseWriterDelegator{ResponseWriter: response.ResponseWriter}
		out := make(chan int)
		urlLen := 0
		if request.Request.URL != nil {
			urlLen = len(request.Request.URL.String())
		}
		go computeApproximateRequestSize(request.Request, out, urlLen)

		_, cn := response.ResponseWriter.(http.CloseNotifier)
		_, fl := response.ResponseWriter.(http.Flusher)
		_, hj := response.ResponseWriter.(http.Hijacker)
		_, rf := response.ResponseWriter.(io.ReaderFrom)
		var rw http.ResponseWriter
		if cn && fl && hj && rf {
			rw = &fancyResponseWriterDelegator{delegate}
		} else {
			rw = delegate
		}
		response.ResponseWriter = rw

		routeFunc(request, response)

		elapsed := float64(time.Since(now)) / float64(time.Microsecond)

		method := strings.ToLower(request.Request.Method)
		code := strconv.Itoa(delegate.status)
		regReqCnt.WithLabelValues(method, code).Inc()
		regReqDur.Observe(elapsed)
		regResSz.Observe(float64(delegate.written))
		regReqSz.Observe(float64(<-out))
	})
}
Ejemplo n.º 23
0
			Namespace: "eventer",
			Subsystem: "scraper",
			Name:      "last_time_seconds",
			Help:      "Last time of event since unix epoch in seconds.",
		})
	totalEventsNum = prometheus.NewCounter(
		prometheus.CounterOpts{
			Namespace: "eventer",
			Subsystem: "scraper",
			Name:      "events_total_number",
			Help:      "The total number of events.",
		})
	scrapEventsDuration = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Namespace: "eventer",
			Subsystem: "scraper",
			Name:      "duration_microseconds",
			Help:      "Time spent scraping events in microseconds.",
		})
)

func init() {
	prometheus.MustRegister(lastEventTimestamp)
	prometheus.MustRegister(totalEventsNum)
	prometheus.MustRegister(scrapEventsDuration)
}

// Implements core.EventSource interface.
type KubernetesEventSource struct {
	// Large local buffer, periodically read.
	localEventsBuffer chan *kubeapi.Event
Ejemplo n.º 24
0
			Help:      "Number of runs that were retried",
			Name:      "runs_retry",
		}),
		"grader_runs_je": prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: "quark",
			Subsystem: "grader",
			Help:      "Number of runs that were JE",
			Name:      "runs_je",
		}),
	}

	summaries = map[string]prometheus.Summary{
		"grader_queue_delay_seconds": prometheus.NewSummary(prometheus.SummaryOpts{
			Namespace:  "quark",
			Subsystem:  "grader",
			Help:       "The duration of a run in the queue",
			Name:       "queue_delay_seconds",
			Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
		}),
	}
)

func setupMetrics(ctx *grader.Context) {
	for _, gauge := range gauges {
		prometheus.MustRegister(gauge)
	}
	for _, counter := range counters {
		prometheus.MustRegister(counter)
	}
	for _, summary := range summaries {
		prometheus.MustRegister(summary)
Ejemplo n.º 25
0
	cacheMissCounter = prometheus.NewCounter(
		prometheus.CounterOpts{
			Name: "etcd_helper_cache_miss_count",
			Help: "Counter of etcd helper cache miss.",
		},
	)
	cacheEntryCounter = prometheus.NewCounter(
		prometheus.CounterOpts{
			Name: "etcd_helper_cache_entry_count",
			Help: "Counter of etcd helper cache entries. This can be different from etcd_helper_cache_miss_count " +
				"because two concurrent threads can miss the cache and generate the same entry twice.",
		},
	)
	cacheGetLatency = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Name: "etcd_request_cache_get_latencies_summary",
			Help: "Latency in microseconds of getting an object from etcd cache",
		},
	)
	cacheAddLatency = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Name: "etcd_request_cache_add_latencies_summary",
			Help: "Latency in microseconds of adding an object to etcd cache",
		},
	)
	etcdRequestLatenciesSummary = prometheus.NewSummaryVec(
		prometheus.SummaryOpts{
			Name: "etcd_request_latencies_summary",
			Help: "Etcd request latency summary in microseconds for each operation and object type.",
		},
		[]string{"operation", "type"},
	)
Ejemplo n.º 26
0
			Name:      "rule_evaluation_failures_total",
			Help:      "The total number of rule evaluation failures.",
		},
		[]string{"rule_type"},
	)
	evalTotal = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Namespace: namespace,
			Name:      "rule_evaluations_total",
			Help:      "The total number of rule evaluations.",
		},
		[]string{"rule_type"},
	)
	iterationDuration = prometheus.NewSummary(prometheus.SummaryOpts{
		Namespace:  namespace,
		Name:       "evaluator_duration_seconds",
		Help:       "The duration of rule group evaluations.",
		Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
	})
	iterationsSkipped = prometheus.NewCounter(prometheus.CounterOpts{
		Namespace: namespace,
		Name:      "evaluator_iterations_skipped_total",
		Help:      "The total number of rule group evaluations skipped due to throttled metric storage.",
	})
	iterationsScheduled = prometheus.NewCounter(prometheus.CounterOpts{
		Namespace: namespace,
		Name:      "evaluator_iterations_total",
		Help:      "The total number of scheduled rule group evaluations, whether skipped or executed.",
	})
)

func init() {
Ejemplo n.º 27
0
package metrics

import (
	"sync"
	"time"

	"github.com/prometheus/client_golang/prometheus"
)

const schedulerSubsystem = "scheduler"

var (
	E2eSchedulingLatency = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Subsystem: schedulerSubsystem,
			Name:      "e2e_scheduling_latency_microseconds",
			Help:      "E2e scheduling latency (scheduling algorith + binding)",
		},
	)
	SchedulingAlgorithmLatency = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Subsystem: schedulerSubsystem,
			Name:      "scheduling_algorithm_latency_microseconds",
			Help:      "Scheduling algorithm latency",
		},
	)
	BindingLatency = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Subsystem: schedulerSubsystem,
			Name:      "binding_latency_microseconds",
			Help:      "Binding latency",
Ejemplo n.º 28
0
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package snap

import "github.com/prometheus/client_golang/prometheus"

var (
	// TODO: save_fsync latency?
	// TODO: save_encoding latency?
	saveDurations = prometheus.NewSummary(prometheus.SummaryOpts{
		Namespace: "etcd",
		Subsystem: "snapshot",
		Name:      "save_total_durations_microseconds",
		Help:      "The total latency distributions of save called by snapshot.",
	})
)

func init() {
	prometheus.MustRegister(saveDurations)
}
Ejemplo n.º 29
0
			Namespace: namespace,
			Name:      "rule_evaluation_duration_milliseconds",
			Help:      "The duration for a rule to execute.",
		},
		[]string{ruleTypeLabel},
	)
	evalFailures = prometheus.NewCounter(
		prometheus.CounterOpts{
			Namespace: namespace,
			Name:      "rule_evaluation_failures_total",
			Help:      "The total number of rule evaluation failures.",
		},
	)
	iterationDuration = prometheus.NewSummary(prometheus.SummaryOpts{
		Namespace:  namespace,
		Name:       "evaluator_duration_milliseconds",
		Help:       "The duration for all evaluations to execute.",
		Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
	})
)

func init() {
	prometheus.MustRegister(iterationDuration)
	prometheus.MustRegister(evalFailures)
	prometheus.MustRegister(evalDuration)
}

// A Rule encapsulates a vector expression which is evaluated at a specified
// interval and acted upon (currently either recorded or used for alerting).
type Rule interface {
	// Name returns the name of the rule.
	Name() string
Ejemplo n.º 30
0
const (
	KubeletSubsystem              = "kubelet"
	PodWorkerLatencyKey           = "pod_worker_latency_microseconds"
	SyncPodsLatencyKey            = "sync_pods_latency_microseconds"
	PodStartLatencyKey            = "pod_start_latency_microseconds"
	PodStatusLatencyKey           = "generate_pod_status_latency_microseconds"
	ContainerManagerOperationsKey = "container_manager_latency_microseconds"
	DockerOperationsKey           = "docker_operations_latency_microseconds"
	DockerErrorsKey               = "docker_errors"
)

var (
	ContainersPerPodCount = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Subsystem: KubeletSubsystem,
			Name:      "containers_per_pod_count",
			Help:      "The number of containers per pod.",
		},
	)
	PodWorkerLatency = prometheus.NewSummaryVec(
		prometheus.SummaryOpts{
			Subsystem: KubeletSubsystem,
			Name:      PodWorkerLatencyKey,
			Help:      "Latency in microseconds to sync a single pod. Broken down by operation type: create, update, or sync",
		},
		[]string{"operation_type"},
	)
	SyncPodsLatency = prometheus.NewSummary(
		prometheus.SummaryOpts{
			Subsystem: KubeletSubsystem,
			Name:      SyncPodsLatencyKey,