Ejemplo n.º 1
0
func Init(metrics met.Backend) {
	sec := setting.Cfg.Section("metric_publisher")

	if !sec.Key("enabled").MustBool(false) {
		return
	}

	addr := sec.Key("nsqd_addr").MustString("localhost:4150")
	topic = sec.Key("topic").MustString("metrics")
	cfg := nsq.NewConfig()
	cfg.UserAgent = fmt.Sprintf("probe-ctrl")
	var err error
	globalProducer, err = nsq.NewProducer(addr, cfg)
	if err != nil {
		log.Fatal(0, "failed to initialize nsq producer.", err)
	}
	err = globalProducer.Ping()
	if err != nil {
		log.Fatal(0, "can't connect to nsqd: %s", err)
	}
	metricsPublished = metrics.NewCount("metricpublisher.metrics-published")
	messagesPublished = metrics.NewCount("metricpublisher.messages-published")
	messagesSize = metrics.NewMeter("metricpublisher.message_size", 0)
	metricsPerMessage = metrics.NewMeter("metricpublisher.metrics_per_message", 0)
	publishDuration = metrics.NewTimer("metricpublisher.publish_duration", 0)
}
Ejemplo n.º 2
0
func NewConsumer(topic, channel string, config *nsq.Config, metricsPatt string, metrics met.Backend) (*Consumer, error) {
	consumer, err := nsq.NewConsumer(topic, channel, config)
	c := Consumer{
		consumer,
		metrics.NewGauge(fmt.Sprintf(metricsPatt, "received"), 0),
		metrics.NewGauge(fmt.Sprintf(metricsPatt, "finished"), 0),
		metrics.NewGauge(fmt.Sprintf(metricsPatt, "requeued"), 0),
		metrics.NewGauge(fmt.Sprintf(metricsPatt, "connections"), 0),
		0,
		metrics.NewGauge(fmt.Sprintf(metricsPatt, "num_handlers"), 0),
	}
	go func() {
		t := time.Tick(time.Second * time.Duration(1))
		for _ = range t {
			s := consumer.Stats()
			c.msgsReceived.Value(int64(s.MessagesReceived))
			c.msgsFinished.Value(int64(s.MessagesFinished))
			c.msgsRequeued.Value(int64(s.MessagesRequeued))
			c.msgsConnections.Value(int64(s.Connections))
			h := atomic.LoadInt32(&c.numHandlers)
			c.handlers.Value(int64(h))
		}
	}()
	return &c, err
}
Ejemplo n.º 3
0
func InitCollectorController(metrics met.Backend) {
	sec := setting.Cfg.Section("event_publisher")
	cmd := &m.ClearCollectorSessionCommand{
		InstanceId: setting.InstanceId,
	}
	if err := bus.Dispatch(cmd); err != nil {
		log.Fatal(0, "failed to clear collectorSessions", err)
	}

	if sec.Key("enabled").MustBool(false) {
		url := sec.Key("rabbitmq_url").String()
		exchange := sec.Key("exchange").String()
		exch := rabbitmq.Exchange{
			Name:         exchange,
			ExchangeType: "topic",
			Durable:      true,
		}
		q := rabbitmq.Queue{
			Name:       "",
			Durable:    false,
			AutoDelete: true,
			Exclusive:  true,
		}
		consumer := rabbitmq.Consumer{
			Url:        url,
			Exchange:   &exch,
			Queue:      &q,
			BindingKey: []string{"INFO.monitor.*", "INFO.collector.*"},
		}
		err := consumer.Connect()
		if err != nil {
			log.Fatal(0, "failed to start event.consumer.", err)
		}
		consumer.Consume(eventConsumer)
	} else {
		//tap into the update/add/Delete events emitted when monitors are modified.
		bus.AddEventListener(EmitUpdateMonitor)
		bus.AddEventListener(EmitAddMonitor)
		bus.AddEventListener(EmitDeleteMonitor)
		bus.AddEventListener(HandleCollectorConnected)
		bus.AddEventListener(HandleCollectorDisconnected)
	}
	metricsRecvd = metrics.NewCount("collector-ctrl.metrics-recv")
	bufCh = make(chan m.MetricDefinition, runtime.NumCPU()*100)
	go metricpublisher.ProcessBuffer(bufCh)
}
Ejemplo n.º 4
0
func Init(metrics met.Backend) {
	sec := setting.Cfg.Section("collector_event_publisher")

	if !sec.Key("enabled").MustBool(false) {
		enabled = false
		return
	}
	enabled = true

	addr := sec.Key("nsqd_addr").MustString("localhost:4150")
	topic = sec.Key("topic").MustString("metrics")
	cfg := nsq.NewConfig()
	cfg.UserAgent = fmt.Sprintf("probe-ctrl")
	var err error
	globalProducer, err = nsq.NewProducer(addr, cfg)
	if err != nil {
		log.Fatal(0, "failed to initialize nsq producer.", err)
	}
	collectorEventPublisherMsgs = metrics.NewCount("collectoreventpublisher.events-published")
}
Ejemplo n.º 5
0
func Init(metrics met.Backend) {
	sec := setting.Cfg.Section("event_publisher")

	if !sec.Key("enabled").MustBool(false) {
		return
	}

	url := sec.Key("rabbitmq_url").String()
	exchange := "metricResults"

	exch := rabbitmq.Exchange{
		Name:         exchange,
		ExchangeType: "x-consistent-hash",
		Durable:      true,
	}
	globalPublisher = &rabbitmq.Publisher{Url: url, Exchange: &exch}
	err := globalPublisher.Connect()
	if err != nil {
		log.Fatal(4, "Failed to connect to metricResults exchange: %v", err)
		return
	}
	metricPublisherMetrics = metrics.NewCount("metricpublisher.metrics-published")
	metricPublisherMsgs = metrics.NewCount("metricpublisher.messages-published")
}
Ejemplo n.º 6
0
// Init initalizes all metrics
// run this function when statsd is ready, so we can create the series
func Init(metrics met.Backend) {
	jobQueueInternalItems = metrics.NewGauge("alert-jobqueue-internal.items", 0)
	jobQueueInternalSize = metrics.NewGauge("alert-jobqueue-internal.size", int64(setting.InternalJobQueueSize))
	jobQueuePreAMQPItems = metrics.NewGauge("alert-jobqueue-preamqp.items", 0)
	jobQueuePreAMQPSize = metrics.NewGauge("alert-jobqueue-preamqp.size", int64(setting.PreAMQPJobQueueSize))
	tickQueueItems = metrics.NewMeter("alert-tickqueue.items", 0)
	tickQueueSize = metrics.NewGauge("alert-tickqueue.size", int64(setting.TickQueueSize))
	dispatcherJobsSkippedDueToSlowJobQueueInternal = metrics.NewCount("alert-dispatcher.jobs-skipped-due-to-slow-internal-jobqueue")
	dispatcherJobsSkippedDueToSlowJobQueuePreAMQP = metrics.NewCount("alert-dispatcher.jobs-skipped-due-to-slow-preamqp-jobqueue")
	dispatcherTicksSkippedDueToSlowTickQueue = metrics.NewCount("alert-dispatcher.ticks-skipped-due-to-slow-tickqueue")

	dispatcherGetSchedules = metrics.NewTimer("alert-dispatcher.get-schedules", 0)
	dispatcherNumGetSchedules = metrics.NewCount("alert-dispatcher.num-getschedules")
	dispatcherJobSchedulesSeen = metrics.NewCount("alert-dispatcher.job-schedules-seen")
	dispatcherJobsScheduled = metrics.NewCount("alert-dispatcher.jobs-scheduled")

	executorNum = metrics.NewGauge("alert-executor.num", 0)
	executorConsiderJobAlreadyDone = metrics.NewTimer("alert-executor.consider-job.already-done", 0)
	executorConsiderJobOriginalTodo = metrics.NewTimer("alert-executor.consider-job.original-todo", 0)

	executorNumAlreadyDone = metrics.NewCount("alert-executor.already-done")
	executorNumOriginalTodo = metrics.NewCount("alert-executor.original-todo")
	executorAlertOutcomesErr = metrics.NewCount("alert-executor.alert-outcomes.error")
	executorAlertOutcomesOk = metrics.NewCount("alert-executor.alert-outcomes.ok")
	executorAlertOutcomesWarn = metrics.NewCount("alert-executor.alert-outcomes.warning")
	executorAlertOutcomesCrit = metrics.NewCount("alert-executor.alert-outcomes.critical")
	executorAlertOutcomesUnkn = metrics.NewCount("alert-executor.alert-outcomes.unknown")
	executorGraphiteEmptyResponse = metrics.NewCount("alert-executor.graphite-emptyresponse")
	executorGraphiteIncompleteResponse = metrics.NewCount("alert-executor.graphite-incompleteresponse")
	executorGraphiteBadStart = metrics.NewCount("alert-executor.graphite-badstart")
	executorGraphiteBadStep = metrics.NewCount("alert-executor.graphite-badstep")
	executorGraphiteBadSteps = metrics.NewCount("alert-executor.graphite-badsteps")

	executorJobExecDelay = metrics.NewTimer("alert-executor.job_execution_delay", time.Duration(30)*time.Second)
	executorJobQueryGraphite = metrics.NewTimer("alert-executor.job_query_graphite", 0)
	executorJobParseAndEval = metrics.NewTimer("alert-executor.job_parse-and-evaluate", 0)
	executorGraphiteMissingVals = metrics.NewMeter("alert-executor.graphite-missingVals", 0)
}
Ejemplo n.º 7
0
func initMetrics(stats met.Backend) {
	reqSpanMem = stats.NewMeter("requests_span.mem", 0)
	reqSpanBoth = stats.NewMeter("requests_span.mem_and_cassandra", 0)
	chunkSizeAtSave = stats.NewMeter("chunk_size.at_save", 0)
	chunkSizeAtLoad = stats.NewMeter("chunk_size.at_load", 0)
	chunkCreate = stats.NewCount("chunks.create")
	chunkClear = stats.NewCount("chunks.clear")
	chunkSaveOk = stats.NewCount("chunks.save_ok")
	chunkSaveFail = stats.NewCount("chunks.save_fail")
	metricsReceived = stats.NewCount("metrics_received")
	metricsToCassandraOK = stats.NewCount("metrics_to_cassandra.ok")
	metricsToCassandraFail = stats.NewCount("metrics_to_cassandra.fail")
	cassandraRowsPerResponse = stats.NewMeter("cassandra_rows_per_response", 0)
	cassandraChunksPerRow = stats.NewMeter("cassandra_chunks_per_row", 0)
	messagesSize = stats.NewMeter("message_size", 0)
	metricsPerMessage = stats.NewMeter("metrics_per_message", 0)
	msgsAge = stats.NewMeter("message_age", 0)
	reqHandleDuration = stats.NewTimer("request_handle_duration", 0)
	cassandraGetDuration = stats.NewTimer("cassandra_get_duration", 0)
	cassandraBlockDuration = stats.NewTimer("cassandra_block_duration", 0)
	cassandraPutDuration = stats.NewTimer("cassandra_put_duration", 0)
	inItems = stats.NewMeter("in.items", 0)
	pointsPerMetric = stats.NewMeter("points_per_metric", 0)
	msgsHandleOK = stats.NewCount("handle.ok")
	msgsHandleFail = stats.NewCount("handle.fail")
	alloc = stats.NewGauge("bytes_alloc.not_freed", 0)
	totalAlloc = stats.NewGauge("bytes_alloc.incl_freed", 0)
	sysBytes = stats.NewGauge("bytes_sys", 0)
	metricsActive = stats.NewGauge("metrics_active", 0)
}