Exemplo n.º 1
0
func NewGaugeDiff(name string, r metrics.Registry) *GaugeDiff {
	return &GaugeDiff{
		Delta:    metrics.NewRegisteredGauge(name, r),
		Absolute: metrics.NewRegisteredGauge(name+"-absolute", metrics.NewRegistry()),
		Previous: metrics.NewRegisteredGauge(name+"-previous", metrics.NewRegistry()),
	}
}
Exemplo n.º 2
0
func ExtraNewMetricRAM(r metrics.Registry, extra RAMUpdater) *MetricRAM {
	return &MetricRAM{
		Free:  metrics.NewRegisteredGauge("memory.memory-free", r),
		Total: metrics.NewRegisteredGauge("memory.memory-total", metrics.NewRegistry()),
		Extra: extra,
	}
}
Exemplo n.º 3
0
Arquivo: index.go Projeto: vadv/ostent
func (ir *IndexRegistry) GetOrRegisterPrivateDF(fs sigar.FileSystem) operating.MetricDF {
	ir.PrivateMutex.Lock()
	defer ir.PrivateMutex.Unlock()
	if fs.DirName == "/" {
		fs.DevName = "root"
	} else {
		fs.DevName = strings.Replace(strings.TrimPrefix(fs.DevName, "/dev/"), "/", "-", -1)
	}
	if metric := ir.PrivateDFRegistry.Get(fs.DevName); metric != nil {
		return metric.(operating.MetricDF)
	}
	label := func(tail string) string {
		return fmt.Sprintf("df-%s.df_complex-%s", fs.DevName, tail)
	}
	r, unusedr := ir.Registry, metrics.NewRegistry()
	i := operating.MetricDF{
		DF: &operating.DF{
			DevName:     &operating.StandardMetricString{}, // unregistered
			DirName:     &operating.StandardMetricString{}, // unregistered
			Free:        metrics.NewRegisteredGaugeFloat64(label("free"), r),
			Reserved:    metrics.NewRegisteredGaugeFloat64(label("reserved"), r),
			Total:       metrics.NewRegisteredGauge(label("total"), unusedr),
			Used:        metrics.NewRegisteredGaugeFloat64(label("used"), r),
			Avail:       metrics.NewRegisteredGauge(label("avail"), unusedr),
			UsePercent:  metrics.NewRegisteredGaugeFloat64(label("usepercent"), unusedr),
			Inodes:      metrics.NewRegisteredGauge(label("inodes"), unusedr),
			Iused:       metrics.NewRegisteredGauge(label("iused"), unusedr),
			Ifree:       metrics.NewRegisteredGauge(label("ifree"), unusedr),
			IusePercent: metrics.NewRegisteredGaugeFloat64(label("iusepercent"), unusedr),
		},
	}
	ir.PrivateDFRegistry.Register(fs.DevName, i) // error is ignored
	// errs when the type is not derived from (go-)metrics types
	return i
}
Exemplo n.º 4
0
func NewMetricRAM(r metrics.Registry) *operating.MetricRAM {
	return operating.ExtraNewMetricRAM(r, &ExtraMetricRAM{
		Used:     metrics.NewRegisteredGauge("memory.memory-used", r),
		Buffered: metrics.NewRegisteredGauge("memory.memory-buffered", r),
		Cached:   metrics.NewRegisteredGauge("memory.memory-cached", r),
	})
}
Exemplo n.º 5
0
// Start calculating and reporting statistics on the repo and tiles.
//
// We presume the git.Update(true) is called somewhere else, usually this is done
// in the trace/db.Builder, so the repo is always as good as the loaded tiles.
func Start(nanoTileStore *db.Builder, git *gitinfo.GitInfo) {
	coverage := metrics.NewRegisteredGaugeFloat64("stats.tests.bench_runs_per_changelist", metrics.DefaultRegistry)
	skpLatency := metrics.NewRegisteredTimer("stats.skp.update_latency", metrics.DefaultRegistry)
	commits := metrics.NewRegisteredGauge("stats.commits.total", metrics.DefaultRegistry)

	go func() {
		for _ = range time.Tick(2 * time.Minute) {
			tile := nanoTileStore.GetTile()
			numCommits := tile.LastCommitIndex() + 1
			numTraces := len(tile.Traces)
			total := 0
			for _, tr := range tile.Traces {
				for i := 0; i < numCommits; i++ {
					if !tr.IsMissing(i) {
						total += 1
					}
				}
			}
			cov := float64(total) / float64(numCommits*numTraces)
			glog.Info("Coverage: ", cov)
			coverage.Update(cov)

			last, err := git.LastSkpCommit()
			if err != nil {
				glog.Warning("Failed to read last SKP commit: %s", err)
				continue
			}
			skpLatency.Update(time.Since(last))
			commits.Update(int64(git.NumCommits()))
		}
	}()
}
Exemplo n.º 6
0
// monitorIssueTracker reads the counts for all the types of issues in the skia
// issue tracker (code.google.com/p/skia) and stuffs the counts into Graphite.
func monitorIssueTracker() {
	c := &http.Client{
		Transport: &http.Transport{
			Dial: dialTimeout,
		},
	}

	if *useMetadata {
		*apikey = metadata.Must(metadata.ProjectGet(metadata.APIKEY))
	}

	// Create a new metrics registry for the issue tracker metrics.
	addr, err := net.ResolveTCPAddr("tcp", *graphiteServer)
	if err != nil {
		glog.Fatalln("Failed to resolve the Graphite server: ", err)
	}
	issueRegistry := metrics.NewRegistry()
	go graphite.Graphite(issueRegistry, common.SAMPLE_PERIOD, "issues", addr)

	// IssueStatus has all the info we need to capture and record a single issue status. I.e. capture
	// the count of all issues with a status of "New".
	type IssueStatus struct {
		Name   string
		Metric metrics.Gauge
		URL    string
	}

	allIssueStatusLabels := []string{
		"New", "Accepted", "Unconfirmed", "Started", "Fixed", "Verified", "Invalid", "WontFix", "Done", "Available", "Assigned",
	}

	issueStatus := []*IssueStatus{}
	for _, issueName := range allIssueStatusLabels {
		issueStatus = append(issueStatus, &IssueStatus{
			Name:   issueName,
			Metric: metrics.NewRegisteredGauge(strings.ToLower(issueName), issueRegistry),
			URL:    "https://www.googleapis.com/projecthosting/v2/projects/skia/issues?fields=totalResults&key=" + *apikey + "&status=" + issueName,
		})
	}

	liveness := imetrics.NewLiveness("issue-tracker")
	for _ = range time.Tick(ISSUE_TRACKER_PERIOD) {
		for _, issue := range issueStatus {
			resp, err := c.Get(issue.URL)
			jsonResp := map[string]int64{}
			dec := json.NewDecoder(resp.Body)
			if err := dec.Decode(&jsonResp); err != nil {
				glog.Warningf("Failed to decode JSON response: %s", err)
				util.Close(resp.Body)
				continue
			}
			issue.Metric.Update(jsonResp["totalResults"])
			glog.Infof("Num Issues: %s - %d", issue.Name, jsonResp["totalResults"])
			if err == nil && resp.Body != nil {
				util.Close(resp.Body)
			}
		}
		liveness.Update()
	}
}
Exemplo n.º 7
0
func newConsumerMetrics(consumerName, prefix string) *ConsumerMetrics {
	kafkaMetrics := &ConsumerMetrics{
		registry: metrics.DefaultRegistry,
	}

	// Ensure prefix ends with a dot (.) so it plays nice with statsd/graphite
	prefix = strings.Trim(prefix, " ")
	if prefix != "" && prefix[len(prefix)-1:] != "." {
		prefix += "."
	}
	kafkaMetrics.consumerName = consumerName
	kafkaMetrics.prefix = prefix

	kafkaMetrics.fetchersIdleTimer = metrics.NewRegisteredTimer(fmt.Sprintf("%sFetchersIdleTime-%s", prefix, consumerName), kafkaMetrics.registry)
	kafkaMetrics.fetchDurationTimer = metrics.NewRegisteredTimer(fmt.Sprintf("%sFetchDuration-%s", prefix, consumerName), kafkaMetrics.registry)

	kafkaMetrics.numWorkerManagersGauge = metrics.NewRegisteredGauge(fmt.Sprintf("%sNumWorkerManagers-%s", prefix, consumerName), kafkaMetrics.registry)
	kafkaMetrics.activeWorkersCounter = metrics.NewRegisteredCounter(fmt.Sprintf("%sWMsActiveWorkers-%s", prefix, consumerName), kafkaMetrics.registry)
	kafkaMetrics.pendingWMsTasksCounter = metrics.NewRegisteredCounter(fmt.Sprintf("%sWMsPendingTasks-%s", prefix, consumerName), kafkaMetrics.registry)
	kafkaMetrics.taskTimeoutCounter = metrics.NewRegisteredCounter(fmt.Sprintf("%sTaskTimeouts-%s", prefix, consumerName), kafkaMetrics.registry)
	kafkaMetrics.wmsBatchDurationTimer = metrics.NewRegisteredTimer(fmt.Sprintf("%sWMsBatchDuration-%s", prefix, consumerName), kafkaMetrics.registry)
	kafkaMetrics.wmsIdleTimer = metrics.NewRegisteredTimer(fmt.Sprintf("%sWMsIdleTime-%s", prefix, consumerName), kafkaMetrics.registry)

	kafkaMetrics.numFetchedMessagesCounter = metrics.NewRegisteredCounter(fmt.Sprintf("%sFetchedMessages-%s", prefix, consumerName), kafkaMetrics.registry)
	kafkaMetrics.numConsumedMessagesCounter = metrics.NewRegisteredCounter(fmt.Sprintf("%sConsumedMessages-%s", prefix, consumerName), kafkaMetrics.registry)
	kafkaMetrics.numAcksCounter = metrics.NewRegisteredCounter(fmt.Sprintf("%sAcks-%s", prefix, consumerName), kafkaMetrics.registry)
	kafkaMetrics.topicPartitionLag = make(map[TopicAndPartition]metrics.Gauge)

	kafkaMetrics.reportingStopChannels = make([]chan struct{}, 0)

	return kafkaMetrics
}
Exemplo n.º 8
0
func main() {
	defer common.LogPanic()
	common.InitWithMetrics("probeserver", graphiteServer)

	client, err := auth.NewDefaultJWTServiceAccountClient("https://www.googleapis.com/auth/userinfo.email")
	if err != nil {
		glog.Fatalf("Failed to create client for talking to the issue tracker: %s", err)
	}
	go monitorIssueTracker(client)
	glog.Infoln("Looking for Graphite server.")
	addr, err := net.ResolveTCPAddr("tcp", *graphiteServer)
	if err != nil {
		glog.Fatalln("Failed to resolve the Graphite server: ", err)
	}
	glog.Infoln("Found Graphite server.")

	liveness := imetrics.NewLiveness("probes")

	// We have two sets of metrics, one for the probes and one for the probe
	// server itself. The server's metrics are handled by common.Init()
	probeRegistry := metrics.NewRegistry()
	go graphite.Graphite(probeRegistry, common.SAMPLE_PERIOD, *prefix, addr)

	// TODO(jcgregorio) Monitor config file and reload if it changes.
	cfg, err := readConfigFiles(*config)
	if err != nil {
		glog.Fatalln("Failed to read config file: ", err)
	}
	glog.Infoln("Successfully read config file.")
	// Register counters for each probe.
	for name, probe := range cfg {
		probe.failure = metrics.NewRegisteredGauge(name+".failure", probeRegistry)
		probe.latency = metrics.NewRegisteredGauge(name+".latency", probeRegistry)
	}

	// Create a client that uses our dialer with a timeout.
	c := &http.Client{
		Transport: &http.Transport{
			Dial: dialTimeout,
		},
	}
	probeOneRound(cfg, c)
	for _ = range time.Tick(*runEvery) {
		probeOneRound(cfg, c)
		liveness.Update()
	}
}
Exemplo n.º 9
0
// monitorIssueTracker reads the counts for all the types of issues in the Skia
// issue tracker (bugs.chromium.org/p/skia) and stuffs the counts into Graphite.
func monitorIssueTracker(c *http.Client) {
	// Create a new metrics registry for the issue tracker metrics.
	addr, err := net.ResolveTCPAddr("tcp", *graphiteServer)
	if err != nil {
		glog.Fatalln("Failed to resolve the Graphite server: ", err)
	}
	issueRegistry := metrics.NewRegistry()
	go graphite.Graphite(issueRegistry, common.SAMPLE_PERIOD, "issues", addr)

	// IssueStatus has all the info we need to capture and record a single issue status. I.e. capture
	// the count of all issues with a status of "New".
	type IssueStatus struct {
		Name   string
		Metric metrics.Gauge
		URL    string
	}

	allIssueStatusLabels := []string{
		"New", "Accepted", "Unconfirmed", "Started", "Fixed", "Verified", "Invalid", "WontFix", "Done", "Available", "Assigned",
	}

	issueStatus := []*IssueStatus{}
	for _, issueName := range allIssueStatusLabels {
		q := url.Values{}
		q.Set("fields", "totalResults")
		q.Set("status", issueName)
		issueStatus = append(issueStatus, &IssueStatus{
			Name:   issueName,
			Metric: metrics.NewRegisteredGauge(strings.ToLower(issueName), issueRegistry),
			URL:    issues.MONORAIL_BASE_URL + "?" + q.Encode(),
		})
	}

	liveness := imetrics.NewLiveness("issue-tracker")
	for _ = range time.Tick(ISSUE_TRACKER_PERIOD) {
		for _, issue := range issueStatus {
			resp, err := c.Get(issue.URL)
			if err != nil {
				glog.Errorf("Failed to retrieve response from %s: %s", issue.URL, err)
				continue
			}
			jsonResp := map[string]int64{}
			dec := json.NewDecoder(resp.Body)
			if err := dec.Decode(&jsonResp); err != nil {
				glog.Warningf("Failed to decode JSON response: %s", err)
				util.Close(resp.Body)
				continue
			}
			issue.Metric.Update(jsonResp["totalResults"])
			glog.Infof("Num Issues: %s - %d", issue.Name, jsonResp["totalResults"])
			if err == nil && resp.Body != nil {
				util.Close(resp.Body)
			}
		}
		liveness.Update()
	}
}
Exemplo n.º 10
0
func (this *ConsumerMetrics) topicAndPartitionLag(topic string, partition int32) metrics.Gauge {
	topicAndPartition := TopicAndPartition{Topic: topic, Partition: partition}
	lag, ok := this.topicPartitionLag[topicAndPartition]
	if !ok {
		inLock(&this.metricLock, func() {
			lag, ok = this.topicPartitionLag[topicAndPartition]
			if !ok {
				this.topicPartitionLag[topicAndPartition] = metrics.NewRegisteredGauge(fmt.Sprintf("%sLag-%s-%s", this.prefix, this.consumerName, &topicAndPartition), this.registry)
				lag = this.topicPartitionLag[topicAndPartition]
			}
		})
	}
	return lag
}
Exemplo n.º 11
0
func newConsumerMetrics(consumerName string) *consumerMetrics {
	kafkaMetrics := &consumerMetrics{
		registry: metrics.NewRegistry(),
	}

	kafkaMetrics.fetchersIdleTimer = metrics.NewRegisteredTimer(fmt.Sprintf("FetchersIdleTime-%s", consumerName), kafkaMetrics.registry)
	kafkaMetrics.fetchDurationTimer = metrics.NewRegisteredTimer(fmt.Sprintf("FetchDuration-%s", consumerName), kafkaMetrics.registry)

	kafkaMetrics.numWorkerManagersGauge = metrics.NewRegisteredGauge(fmt.Sprintf("NumWorkerManagers-%s", consumerName), kafkaMetrics.registry)
	kafkaMetrics.activeWorkersCounter = metrics.NewRegisteredCounter(fmt.Sprintf("WMsActiveWorkers-%s", consumerName), kafkaMetrics.registry)
	kafkaMetrics.pendingWMsTasksCounter = metrics.NewRegisteredCounter(fmt.Sprintf("WMsPendingTasks-%s", consumerName), kafkaMetrics.registry)
	kafkaMetrics.wmsBatchDurationTimer = metrics.NewRegisteredTimer(fmt.Sprintf("WMsBatchDuration-%s", consumerName), kafkaMetrics.registry)
	kafkaMetrics.wmsIdleTimer = metrics.NewRegisteredTimer(fmt.Sprintf("WMsIdleTime-%s", consumerName), kafkaMetrics.registry)

	return kafkaMetrics
}
Exemplo n.º 12
0
func newConsumerMetrics(consumerName, prefix string) *ConsumerMetrics {
	kafkaMetrics := &ConsumerMetrics{
		registry: metrics.DefaultRegistry,
	}

	// Ensure prefix ends with a dot (.) so it plays nice with statsd/graphite
	prefix = strings.Trim(prefix, " ")
	if prefix != "" && prefix[len(prefix)-1:] != "." {
		prefix += "."
	}

	kafkaMetrics.fetchersIdleTimer = metrics.NewRegisteredTimer(fmt.Sprintf("%sFetchersIdleTime-%s", prefix, consumerName), kafkaMetrics.registry)
	kafkaMetrics.fetchDurationTimer = metrics.NewRegisteredTimer(fmt.Sprintf("%sFetchDuration-%s", prefix, consumerName), kafkaMetrics.registry)

	kafkaMetrics.numWorkerManagersGauge = metrics.NewRegisteredGauge(fmt.Sprintf("%sNumWorkerManagers-%s", prefix, consumerName), kafkaMetrics.registry)
	kafkaMetrics.activeWorkersCounter = metrics.NewRegisteredCounter(fmt.Sprintf("%sWMsActiveWorkers-%s", prefix, consumerName), kafkaMetrics.registry)
	kafkaMetrics.pendingWMsTasksCounter = metrics.NewRegisteredCounter(fmt.Sprintf("%sWMsPendingTasks-%s", prefix, consumerName), kafkaMetrics.registry)
	kafkaMetrics.taskTimeoutCounter = metrics.NewRegisteredCounter(fmt.Sprintf("%sTaskTimeouts-%s", prefix, consumerName), kafkaMetrics.registry)
	kafkaMetrics.wmsBatchDurationTimer = metrics.NewRegisteredTimer(fmt.Sprintf("%sWMsBatchDuration-%s", prefix, consumerName), kafkaMetrics.registry)
	kafkaMetrics.wmsIdleTimer = metrics.NewRegisteredTimer(fmt.Sprintf("%sWMsIdleTime-%s", prefix, consumerName), kafkaMetrics.registry)

	return kafkaMetrics
}
Exemplo n.º 13
0
func NewGaugePercent(name string, r metrics.Registry) *GaugePercent {
	return &GaugePercent{
		Percent:  metrics.NewRegisteredGaugeFloat64(name, r),
		Previous: metrics.NewRegisteredGauge(name+"-previous", metrics.NewRegistry()),
	}
}
Exemplo n.º 14
0
func NewMetricSwap(r metrics.Registry) MetricSwap {
	return MetricSwap{
		Free: metrics.NewRegisteredGauge("swap.swap-free", r),
		Used: metrics.NewRegisteredGauge("swap.swap-used", r),
	}
}
Exemplo n.º 15
0
// dirWatcher watches for changes in the specified dir. The frequency of polling
// is determined by the duration parameter. dirWatcher ensures:
// * Each app's logs do not exceed the log limit threshold. If they do then the
//   oldest files are deleted.
// * New encountered logs are reported to InfluxDB.
func dirWatcher(duration time.Duration, dir string) {
	filesToState, appLogLevelToSpace, appLogLevelToCount, lastCompletedRun, err := getPreviousState()
	if err != nil {
		glog.Fatalf("Could get access previous state: %s", err)
	}
	appLogLevelToMetric := make(map[string]metrics.Gauge)
	updatedFiles := false
	markFn := func(path string, fileInfo os.FileInfo, err error) error {
		if err != nil {
			return err
		}
		if fileInfo.IsDir() || fileInfo.Mode()&os.ModeSymlink != 0 {
			// We are only interested in watching non-symlink log files in the
			// top-level dir.
			return nil
		}

		if _, exists := filesToState[path]; !exists || fileInfo.ModTime().After(lastCompletedRun) {
			glog.Infof("Processing %s", path)
			app, logLevel := getAppAndLogLevel(fileInfo)
			if app != "" && logLevel != "" {
				appLogLevel := fmt.Sprintf("%s.%s", app, logLevel)
				if _, ok := appLogLevelToMetric[appLogLevel]; !ok {
					// First time encountered this app and log level combination.
					// Create a counter metric.
					appLogLevelToMetric[appLogLevel] = metrics.NewRegisteredGauge("logserver."+appLogLevel, metrics.DefaultRegistry)
				}

				// Calculate how many new lines and new disk space usage there is.
				totalLines := getLineCount(path)
				totalSize := fileInfo.Size()
				newLines := totalLines
				newSpace := totalSize
				if exists {
					fileState := filesToState[path]
					newLines = totalLines - fileState.LineCount
					newSpace = totalSize - fileState.Size
				}

				glog.Infof("Processed %d new lines", newLines)
				glog.Infof("Processed %d new bytes", newSpace)

				// Update the logs count metric.
				appLogLevelToCount[appLogLevel] += newLines
				appLogLevelToMetric[appLogLevel].Update(appLogLevelToCount[appLogLevel])

				// Add the file size to the current space count for this app and
				// log level combination.
				appLogLevelToSpace[appLogLevel] += newSpace

				updatedFiles = true
			}
			filesToState[path] = fileState{LineCount: getLineCount(path), Size: fileInfo.Size()}
		}
		return nil
	}

	for _ = range time.Tick(duration) {
		if err := filepath.Walk(dir, markFn); err != nil {
			glog.Fatal(err)
		}
		deletedFiles := cleanupAppLogs(dir, appLogLevelToSpace, filesToState)
		if updatedFiles || deletedFiles {
			if err := writeCurrentState(filesToState, appLogLevelToSpace, appLogLevelToCount, time.Now()); err != nil {
				glog.Fatalf("Could not write state: %s", err)
			}
			glog.Info(getPrettyMap(appLogLevelToCount, "AppLogLevels to their line counts"))
			glog.Info(getPrettyMap(appLogLevelToSpace, "AppLogLevels to their disk space"))
		}
		updatedFiles = false
		lastCompletedRun = time.Now()
	}
}
Exemplo n.º 16
0
func newGauge(name, suffix string) metrics.Gauge {
	return metrics.NewRegisteredGauge("ingester."+name+".gauge."+suffix, metrics.DefaultRegistry)
}
Exemplo n.º 17
0
)

const (
	CLUSTER_SIZE   = 50
	CLUSTER_STDDEV = 0.001

	// TRACKED_ITEM_URL_TEMPLATE is used to generate the URL that is
	// embedded in an issue. It is also used to search for issues linked to a
	// specific item (cluster). The format verb is to be replaced with the ID
	// of the tracked item.
	TRACKED_ITEM_URL_TEMPLATE = "https://perf.skia.org/cl/%d"
)

var (
	// The number of clusters with a status of "New".
	newClustersGauge = metrics.NewRegisteredGauge("alerting.new", metrics.DefaultRegistry)

	// The number of times we've successfully done alert clustering.
	runsCounter = metrics.NewRegisteredCounter("alerting.runs", metrics.DefaultRegistry)

	// How long it takes to do a clustering run.
	alertingLatency = metrics.NewRegisteredTimer("alerting.latency", metrics.DefaultRegistry)

	// tileBuilder is the tracedb.Builder where we load Tiles from.
	tileBuilder *tracedb.Builder
)

// CombineClusters combines freshly found clusters with existing clusters.
//
//  Algorithm:
//    Run clustering and pick out the "Interesting" clusters.