Example #1
0
func reloadConfig(filename string, rls ...Reloadable) (success bool) {
	log.Infof("Loading configuration file %s", filename)
	defer func() {
		if success {
			configSuccess.Set(1)
			configSuccessTime.Set(float64(time.Now().Unix()))
		} else {
			configSuccess.Set(0)
		}
	}()

	conf, err := config.LoadFile(filename)
	if err != nil {
		log.Errorf("Couldn't load configuration (-config.file=%s): %v", filename, err)
		// TODO(julius): Remove this notice when releasing 0.17.0 or 0.18.0.
		if err.Error() == "unknown fields in global config: labels" {
			log.Errorf("NOTE: The 'labels' setting in the global configuration section has been renamed to 'external_labels' and now has changed semantics (see release notes at https://github.com/prometheus/prometheus/blob/master/CHANGELOG.md). Please update your configuration file accordingly.")
		}
		return false
	}
	success = true

	for _, rl := range rls {
		success = success && rl.ApplyConfig(conf)
	}
	return success
}
Example #2
0
func matchRegularExpressions(reader io.Reader, config HTTPProbe) bool {
	body, err := ioutil.ReadAll(reader)
	if err != nil {
		log.Errorf("Error reading HTTP body: %s", err)
		return false
	}
	for _, expression := range config.FailIfMatchesRegexp {
		re, err := regexp.Compile(expression)
		if err != nil {
			log.Errorf("Could not compile expression %q as regular expression: %s", expression, err)
			return false
		}
		if re.Match(body) {
			return false
		}
	}
	for _, expression := range config.FailIfNotMatchesRegexp {
		re, err := regexp.Compile(expression)
		if err != nil {
			log.Errorf("Could not compile expression %q as regular expression: %s", expression, err)
			return false
		}
		if !re.Match(body) {
			return false
		}
	}
	return true
}
Example #3
0
// ApplyConfig updates the rule manager's state as the config requires. If
// loading the new rules failed the old rule set is restored. Returns true on success.
func (m *Manager) ApplyConfig(conf *config.Config) bool {
	m.Lock()
	defer m.Unlock()

	defer m.transferAlertState()()

	success := true
	m.interval = time.Duration(conf.GlobalConfig.EvaluationInterval)

	rulesSnapshot := make([]Rule, len(m.rules))
	copy(rulesSnapshot, m.rules)
	m.rules = m.rules[:0]

	var files []string
	for _, pat := range conf.RuleFiles {
		fs, err := filepath.Glob(pat)
		if err != nil {
			// The only error can be a bad pattern.
			log.Errorf("Error retrieving rule files for %s: %s", pat, err)
			success = false
		}
		files = append(files, fs...)
	}
	if err := m.loadRuleFiles(files...); err != nil {
		// If loading the new rules failed, restore the old rule set.
		m.rules = rulesSnapshot
		log.Errorf("Error loading rules, previous rule set restored: %s", err)
		success = false
	}

	return success
}
func (e *Exporter) scrape(csvRows chan<- []string) {
	defer close(csvRows)

	e.totalScrapes.Inc()

	resp, err := e.client.Get(e.URI)
	if err != nil {
		e.up.Set(0)
		log.Errorf("Can't scrape HAProxy: %v", err)
		return
	}
	defer resp.Body.Close()
	e.up.Set(1)

	reader := csv.NewReader(resp.Body)
	reader.TrailingComma = true
	reader.Comment = '#'

	for {
		row, err := reader.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			log.Errorf("Can't read CSV: %v", err)
			e.csvParseFailures.Inc()
			break
		}
		if len(row) == 0 {
			continue
		}
		csvRows <- row
	}
}
Example #5
0
// Mutes returns true iff the given label set is muted.
func (ih *Inhibitor) Mutes(lset model.LabelSet) bool {
	alerts := ih.alerts.GetPending()
	defer alerts.Close()

	// TODO(fabxc): improve erroring for iterators so it does not
	// go silenced here.

	for alert := range alerts.Next() {
		if err := alerts.Err(); err != nil {
			log.Errorf("Error iterating alerts: %s", err)
			continue
		}
		if alert.Resolved() {
			continue
		}
		for _, rule := range ih.rules {
			if rule.Mutes(alert.Labels, lset) {
				ih.marker.SetInhibited(lset.Fingerprint(), true)
				return true
			}
		}
	}
	if err := alerts.Err(); err != nil {
		log.Errorf("Error after iterating alerts: %s", err)
	}

	ih.marker.SetInhibited(lset.Fingerprint(), false)

	return false
}
Example #6
0
func (s *memorySeriesStorage) getOrCreateSeries(fp model.Fingerprint, m model.Metric) *memorySeries {
	series, ok := s.fpToSeries.get(fp)
	if !ok {
		var cds []*chunkDesc
		var modTime time.Time
		unarchived, err := s.persistence.unarchiveMetric(fp)
		if err != nil {
			log.Errorf("Error unarchiving fingerprint %v (metric %v): %v", fp, m, err)
		}
		if unarchived {
			s.seriesOps.WithLabelValues(unarchive).Inc()
			// We have to load chunkDescs anyway to do anything with
			// the series, so let's do it right now so that we don't
			// end up with a series without any chunkDescs for a
			// while (which is confusing as it makes the series
			// appear as archived or purged).
			cds, err = s.loadChunkDescs(fp, 0)
			if err != nil {
				log.Errorf("Error loading chunk descs for fingerprint %v (metric %v): %v", fp, m, err)
			}
			modTime = s.persistence.seriesFileModTime(fp)
		} else {
			// This was a genuinely new series, so index the metric.
			s.persistence.indexMetric(fp, m)
			s.seriesOps.WithLabelValues(create).Inc()
		}
		series = newMemorySeries(m, cds, modTime)
		s.fpToSeries.put(fp, series)
		s.numSeries.Inc()
	}
	return series
}
Example #7
0
// purgeArchivedMetric deletes an archived fingerprint and its corresponding
// metric entirely. It also queues the metric for un-indexing (no need to call
// unindexMetric for the deleted metric.) It does not touch the series file,
// though. The caller must have locked the fingerprint.
func (p *persistence) purgeArchivedMetric(fp model.Fingerprint) (err error) {
	defer func() {
		if err != nil {
			p.setDirty(fmt.Errorf("error in method purgeArchivedMetric(%v): %s", fp, err))
		}
	}()

	metric, err := p.archivedMetric(fp)
	if err != nil || metric == nil {
		return err
	}
	deleted, err := p.archivedFingerprintToMetrics.Delete(codable.Fingerprint(fp))
	if err != nil {
		return err
	}
	if !deleted {
		log.Errorf("Tried to delete non-archived fingerprint %s from archivedFingerprintToMetrics index. This should never happen.", fp)
	}
	deleted, err = p.archivedFingerprintToTimeRange.Delete(codable.Fingerprint(fp))
	if err != nil {
		return err
	}
	if !deleted {
		log.Errorf("Tried to delete non-archived fingerprint %s from archivedFingerprintToTimeRange index. This should never happen.", fp)
	}
	p.unindexMetric(fp, metric)
	return nil
}
Example #8
0
// Sources implements the TargetProvider interface.
func (kd *Discovery) Sources() []string {
	sourceNames := make([]string, 0, len(kd.apiServers))
	for _, apiServer := range kd.apiServers {
		sourceNames = append(sourceNames, apiServersTargetGroupName+":"+apiServer.Host)
	}

	nodes, _, err := kd.getNodes()
	if err != nil {
		// If we can't list nodes then we can't watch them. Assume this is a misconfiguration
		// & log & return empty.
		log.Errorf("Unable to initialize Kubernetes nodes: %s", err)
		return []string{}
	}
	sourceNames = append(sourceNames, kd.nodeSources(nodes)...)

	services, _, err := kd.getServices()
	if err != nil {
		// If we can't list services then we can't watch them. Assume this is a misconfiguration
		// & log & return empty.
		log.Errorf("Unable to initialize Kubernetes services: %s", err)
		return []string{}
	}
	sourceNames = append(sourceNames, kd.serviceSources(services)...)

	return sourceNames
}
Example #9
0
func (d *serviceDiscovery) addService(service *Service) *config.TargetGroup {
	namespace, ok := d.services[service.ObjectMeta.Namespace]
	if !ok {
		namespace = map[string]*Service{}
		d.services[service.ObjectMeta.Namespace] = namespace
	}

	namespace[service.ObjectMeta.Name] = service
	endpointURL := fmt.Sprintf(serviceEndpointsURL, service.ObjectMeta.Namespace, service.ObjectMeta.Name)

	res, err := d.kd.queryAPIServerPath(endpointURL)
	if err != nil {
		log.Errorf("Error getting service endpoints: %s", err)
		return nil
	}
	defer res.Body.Close()
	if res.StatusCode != http.StatusOK {
		log.Errorf("Failed to get service endpoints: %d", res.StatusCode)
		return nil
	}

	var eps Endpoints
	if err := json.NewDecoder(res.Body).Decode(&eps); err != nil {
		log.Errorf("Error getting service endpoints: %s", err)
		return nil
	}

	return d.updateServiceTargetGroup(service, &eps)
}
func (e *Exporter) setMetrics(jsonStats <-chan []StatsEntry) (statsMap map[string]float64) {
	statsMap = make(map[string]float64)
	stats := <-jsonStats
	for _, s := range stats {
		statsMap[s.Name] = s.Value
	}
	if len(statsMap) == 0 {
		return
	}

	for _, def := range e.gaugeDefs {
		if value, ok := statsMap[def.key]; ok {
			// latency gauges need to be converted from microseconds to seconds
			if strings.HasSuffix(def.key, "latency") {
				value = value / 1000000
			}
			e.gaugeMetrics[def.id].Set(value)
		} else {
			log.Errorf("Expected PowerDNS stats key not found: %s", def.key)
			e.jsonParseFailures.Inc()
		}
	}

	for _, def := range e.counterVecDefs {
		for key, label := range def.labelMap {
			if value, ok := statsMap[key]; ok {
				e.counterVecMetrics[def.id].WithLabelValues(label).Set(value)
			} else {
				log.Errorf("Expected PowerDNS stats key not found: %s", key)
				e.jsonParseFailures.Inc()
			}
		}
	}
	return
}
func (e *Exporter) scrape() {
	resp, err := e.client.Get(e.URL)
	if err != nil {
		e.up.Set(0)
		log.Errorf("Can't scrape Spring Actuator: %v", err)
		return
	}
	defer resp.Body.Close()

	if !(resp.StatusCode >= 200 && resp.StatusCode < 300) {
		e.up.Set(0)
		log.Errorf("Can't scrape Spring Actuator: StatusCode: %d", resp.StatusCode)
		return
	}
	e.up.Set(1)
	body, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		log.Errorf("Reading response body failed %v", err)
		return
	}

	var metrics map[string]*json.RawMessage
	if err := json.Unmarshal(body, &metrics); err != nil {
		log.Fatalf("JSON unmarshaling failed: %s", err)
	}
	e.export(metrics)
}
Example #12
0
func (e *Exporter) scrapeMetrics(json *gabs.Container, ch chan<- prometheus.Metric) {
	elements, _ := json.ChildrenMap()
	for key, element := range elements {
		switch key {
		case "message":
			log.Errorf("Problem collecting metrics: %s\n", element.Data().(string))
			return
		case "version":
			data := element.Data()
			version, ok := data.(string)
			if !ok {
				log.Errorf(fmt.Sprintf("Bad conversion! Unexpected value \"%v\" for version\n", data))
			} else {
				gauge, _ := e.Gauges.Fetch("metrics_version", "Marathon metrics version", "version")
				gauge.WithLabelValues(version).Set(1)
				gauge.Collect(ch)
			}

		case "counters":
			e.scrapeCounters(element)
		case "gauges":
			e.scrapeGauges(element)
		case "histograms":
			e.scrapeHistograms(element)
		case "meters":
			e.scrapeMeters(element)
		case "timers":
			e.scrapeTimers(element)
		}
	}
}
Example #13
0
// providersFromConfig returns all TargetProviders configured in cfg.
func providersFromConfig(cfg *config.ScrapeConfig) map[string]TargetProvider {
	providers := map[string]TargetProvider{}

	app := func(mech string, i int, tp TargetProvider) {
		providers[fmt.Sprintf("%s/%d", mech, i)] = tp
	}

	for i, c := range cfg.DNSSDConfigs {
		app("dns", i, discovery.NewDNS(c))
	}
	for i, c := range cfg.FileSDConfigs {
		app("file", i, discovery.NewFileDiscovery(c))
	}
	for i, c := range cfg.ConsulSDConfigs {
		k, err := discovery.NewConsul(c)
		if err != nil {
			log.Errorf("Cannot create Consul discovery: %s", err)
			continue
		}
		app("consul", i, k)
	}
	for i, c := range cfg.MarathonSDConfigs {
		app("marathon", i, discovery.NewMarathon(c))
	}
	for i, c := range cfg.KubernetesSDConfigs {
		k, err := discovery.NewKubernetesDiscovery(c)
		if err != nil {
			log.Errorf("Cannot create Kubernetes discovery: %s", err)
			continue
		}
		app("kubernetes", i, k)
	}
	for i, c := range cfg.ServersetSDConfigs {
		app("serverset", i, discovery.NewServersetDiscovery(c))
	}
	for i, c := range cfg.NerveSDConfigs {
		app("nerve", i, discovery.NewNerveDiscovery(c))
	}
	for i, c := range cfg.EC2SDConfigs {
		app("ec2", i, discovery.NewEC2Discovery(c))
	}
	for i, c := range cfg.GCESDConfigs {
		gced, err := discovery.NewGCEDiscovery(c)
		if err != nil {
			log.Errorf("Cannot initialize GCE discovery: %s", err)
			continue
		}
		app("gce", i, gced)
	}
	for i, c := range cfg.AzureSDConfigs {
		app("azure", i, discovery.NewAzureDiscovery(c))
	}
	if len(cfg.StaticConfigs) > 0 {
		app("static", 0, NewStaticProvider(cfg.StaticConfigs))
	}

	return providers
}
Example #14
0
// Run implements the TargetProvider interface.
func (fd *FileDiscovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) {
	defer close(ch)
	defer fd.stop()

	watcher, err := fsnotify.NewWatcher()
	if err != nil {
		log.Errorf("Error creating file watcher: %s", err)
		return
	}
	fd.watcher = watcher

	fd.refresh(ch)

	ticker := time.NewTicker(fd.interval)
	defer ticker.Stop()

	for {
		// Stopping has priority over refreshing. Thus we wrap the actual select
		// clause to always catch done signals.
		select {
		case <-ctx.Done():
			return
		default:
			select {
			case <-ctx.Done():
				return

			case event := <-fd.watcher.Events:
				// fsnotify sometimes sends a bunch of events without name or operation.
				// It's unclear what they are and why they are sent - filter them out.
				if len(event.Name) == 0 {
					break
				}
				// Everything but a chmod requires rereading.
				if event.Op^fsnotify.Chmod == 0 {
					break
				}
				// Changes to a file can spawn various sequences of events with
				// different combinations of operations. For all practical purposes
				// this is inaccurate.
				// The most reliable solution is to reload everything if anything happens.
				fd.refresh(ch)

			case <-ticker.C:
				// Setting a new watch after an update might fail. Make sure we don't lose
				// those files forever.
				fd.refresh(ch)

			case err := <-fd.watcher.Errors:
				if err != nil {
					log.Errorf("Error on file watch: %s", err)
				}
			}
		}
	}
}
Example #15
0
// ApplyConfig updates the rule manager's state as the config requires. If
// loading the new rules failed the old rule set is restored. Returns true on success.
func (m *Manager) ApplyConfig(conf *config.Config) bool {
	m.mtx.Lock()
	defer m.mtx.Unlock()

	// Get all rule files and load the groups they define.
	var files []string
	for _, pat := range conf.RuleFiles {
		fs, err := filepath.Glob(pat)
		if err != nil {
			// The only error can be a bad pattern.
			log.Errorf("Error retrieving rule files for %s: %s", pat, err)
			return false
		}
		files = append(files, fs...)
	}

	groups, err := m.loadGroups(files...)
	if err != nil {
		log.Errorf("Error loading rules, previous rule set restored: %s", err)
		return false
	}

	var wg sync.WaitGroup

	for _, newg := range groups {
		// To be replaced with a configurable per-group interval.
		newg.interval = time.Duration(conf.GlobalConfig.EvaluationInterval)

		wg.Add(1)

		// If there is an old group with the same identifier, stop it and wait for
		// it to finish the current iteration. Then copy its into the new group.
		oldg, ok := m.groups[newg.name]
		delete(m.groups, newg.name)

		go func(newg *Group) {
			if ok {
				oldg.stop()
				newg.copyState(oldg)
			}
			go newg.run()
			wg.Done()
		}(newg)
	}

	// Stop remaining old groups.
	for _, oldg := range m.groups {
		oldg.stop()
	}

	wg.Wait()
	m.groups = groups

	return true
}
Example #16
0
// Collect implements the prometheus.Collector interface.
func (e *PfExporter) Collect(ch chan<- prometheus.Metric) {
	stats, err := e.fw.Stats()
	if err != nil {
		log.Errorf("failed to get pf stats: %v", err)
		return
	}

	e.gauges["state_total"].Set(float64(stats.StateCount()))
	e.counters["state_searches"].Set(float64(stats.StateSearches()))
	e.counters["state_inserts"].Set(float64(stats.StateInserts()))
	e.counters["state_removals"].Set(float64(stats.StateRemovals()))

	ifstats := stats.IfStats()
	if ifstats != nil {
		e.counters["ipv4_bytes_in"].Set(float64(ifstats.IPv4.BytesIn))
		e.counters["ipv4_bytes_out"].Set(float64(ifstats.IPv4.BytesOut))
		e.counters["ipv4_packets_in_passed"].Set(float64(ifstats.IPv4.PacketsInPassed))
		e.counters["ipv4_packets_in_blocked"].Set(float64(ifstats.IPv4.PacketsInBlocked))
		e.counters["ipv4_packets_out_passed"].Set(float64(ifstats.IPv4.PacketsOutPassed))
		e.counters["ipv4_packets_out_blocked"].Set(float64(ifstats.IPv4.PacketsOutBlocked))

		e.counters["ipv6_bytes_in"].Set(float64(ifstats.IPv6.BytesIn))
		e.counters["ipv6_bytes_out"].Set(float64(ifstats.IPv6.BytesOut))
		e.counters["ipv6_packets_in_passed"].Set(float64(ifstats.IPv6.PacketsInPassed))
		e.counters["ipv6_packets_in_blocked"].Set(float64(ifstats.IPv6.PacketsInBlocked))
		e.counters["ipv6_packets_out_passed"].Set(float64(ifstats.IPv6.PacketsOutPassed))
		e.counters["ipv6_packets_out_blocked"].Set(float64(ifstats.IPv6.PacketsOutBlocked))
	}

	queues, err := e.fw.Queues()
	if err != nil {
		log.Errorf("failed to get queue stats: %v", err)
		return
	}

	for _, queue := range queues {
		e.countervecs["queue_xmit_packets"].WithLabelValues(queue.Name, queue.IfName).Set(float64(queue.Stats.TransmitPackets))
		e.countervecs["queue_xmit_bytes"].WithLabelValues(queue.Name, queue.IfName).Set(float64(queue.Stats.TransmitBytes))
		e.countervecs["queue_dropped_packets"].WithLabelValues(queue.Name, queue.IfName).Set(float64(queue.Stats.DroppedPackets))
		e.countervecs["queue_dropped_bytes"].WithLabelValues(queue.Name, queue.IfName).Set(float64(queue.Stats.DroppedBytes))
	}

	for _, m := range e.gauges {
		m.Collect(ch)
	}

	for _, m := range e.counters {
		m.Collect(ch)
	}

	for _, m := range e.countervecs {
		m.Collect(ch)
	}
}
Example #17
0
// watchNodes watches nodes as they come & go.
func (d *nodeDiscovery) watchNodes(events chan *nodeEvent, done <-chan struct{}, retryInterval time.Duration) {
	until(func() {
		nodes, resourceVersion, err := d.getNodes()
		if err != nil {
			log.Errorf("Cannot initialize nodes collection: %s", err)
			return
		}

		// Reset the known nodes.
		d.mtx.Lock()
		d.nodes = map[string]*Node{}
		d.mtx.Unlock()

		for _, node := range nodes {
			events <- &nodeEvent{Added, node}
		}

		req, err := http.NewRequest("GET", nodesURL, nil)
		if err != nil {
			log.Errorf("Cannot create nodes request: %s", err)
			return
		}
		values := req.URL.Query()
		values.Add("watch", "true")
		values.Add("resourceVersion", resourceVersion)
		req.URL.RawQuery = values.Encode()
		res, err := d.kd.queryAPIServerReq(req)
		if err != nil {
			log.Errorf("Failed to watch nodes: %s", err)
			return
		}
		defer res.Body.Close()
		if res.StatusCode != http.StatusOK {
			log.Errorf("Failed to watch nodes: %d", res.StatusCode)
			return
		}

		d := json.NewDecoder(res.Body)

		for {
			var event nodeEvent
			if err := d.Decode(&event); err != nil {
				log.Errorf("Watch nodes unexpectedly closed: %s", err)
				return
			}

			select {
			case events <- &event:
			case <-done:
			}
		}
	}, retryInterval, done)
}
Example #18
0
// providersFromConfig returns all TargetProviders configured in cfg.
func providersFromConfig(cfg *config.ScrapeConfig) []TargetProvider {
	var providers []TargetProvider

	app := func(mech string, i int, tp TargetProvider) {
		providers = append(providers, &prefixedTargetProvider{
			job:            cfg.JobName,
			mechanism:      mech,
			idx:            i,
			TargetProvider: tp,
		})
	}

	for i, c := range cfg.DNSSDConfigs {
		app("dns", i, discovery.NewDNSDiscovery(c))
	}
	for i, c := range cfg.FileSDConfigs {
		app("file", i, discovery.NewFileDiscovery(c))
	}
	for i, c := range cfg.ConsulSDConfigs {
		k, err := discovery.NewConsulDiscovery(c)
		if err != nil {
			log.Errorf("Cannot create Consul discovery: %s", err)
			continue
		}
		app("consul", i, k)
	}
	for i, c := range cfg.MarathonSDConfigs {
		app("marathon", i, discovery.NewMarathonDiscovery(c))
	}
	for i, c := range cfg.KubernetesSDConfigs {
		k, err := discovery.NewKubernetesDiscovery(c)
		if err != nil {
			log.Errorf("Cannot create Kubernetes discovery: %s", err)
			continue
		}
		app("kubernetes", i, k)
	}
	for i, c := range cfg.ServersetSDConfigs {
		app("serverset", i, discovery.NewServersetDiscovery(c))
	}
	for i, c := range cfg.NerveSDConfigs {
		app("nerve", i, discovery.NewNerveDiscovery(c))
	}
	for i, c := range cfg.EC2SDConfigs {
		app("ec2", i, discovery.NewEC2Discovery(c))
	}
	if len(cfg.TargetGroups) > 0 {
		app("static", 0, NewStaticProvider(cfg.TargetGroups))
	}

	return providers
}
Example #19
0
func (d *Dispatcher) run(it provider.AlertIterator) {
	cleanup := time.NewTicker(30 * time.Second)
	defer cleanup.Stop()

	defer it.Close()

	for {
		select {
		case alert, ok := <-it.Next():
			if !ok {
				// Iterator exhausted for some reason.
				if err := it.Err(); err != nil {
					log.Errorf("Error on alert update: %s", err)
				}
				return
			}

			d.log.With("alert", alert).Debug("Received alert")

			// Log errors but keep trying.
			if err := it.Err(); err != nil {
				log.Errorf("Error on alert update: %s", err)
				continue
			}

			for _, r := range d.route.Match(alert.Labels) {
				d.processAlert(alert, r)
			}

		case <-cleanup.C:
			d.mtx.Lock()

			for _, groups := range d.aggrGroups {
				for _, ag := range groups {
					if ag.empty() {
						ag.stop()
						delete(groups, ag.fingerprint())
					}
				}
			}

			d.mtx.Unlock()

		case <-d.ctx.Done():
			return
		}
	}
}
Example #20
0
func (d *podDiscovery) watchPods(events chan *podEvent, done <-chan struct{}, retryInterval time.Duration) {
	until(func() {
		pods, resourceVersion, err := d.getPods()
		if err != nil {
			log.Errorf("Cannot initialize pods collection: %s", err)
			return
		}
		d.mtx.Lock()
		d.pods = pods
		d.mtx.Unlock()

		req, err := http.NewRequest("GET", podsURL, nil)
		if err != nil {
			log.Errorf("Cannot create pods request: %s", err)
			return
		}

		values := req.URL.Query()
		values.Add("watch", "true")
		values.Add("resourceVersion", resourceVersion)
		req.URL.RawQuery = values.Encode()
		res, err := d.kd.queryAPIServerReq(req)
		if err != nil {
			log.Errorf("Failed to watch pods: %s", err)
			return
		}
		defer res.Body.Close()
		if res.StatusCode != http.StatusOK {
			log.Errorf("Failed to watch pods: %d", res.StatusCode)
			return
		}

		d := json.NewDecoder(res.Body)

		for {
			var event podEvent
			if err := d.Decode(&event); err != nil {
				log.Errorf("Watch pods unexpectedly closed: %s", err)
				return
			}

			select {
			case events <- &event:
			case <-done:
			}
		}
	}, retryInterval, done)
}
Example #21
0
// maintainArchivedSeries drops chunks older than beforeTime from an archived
// series. If the series contains no chunks after that, it is purged entirely.
func (s *memorySeriesStorage) maintainArchivedSeries(fp model.Fingerprint, beforeTime model.Time) {
	defer func(begin time.Time) {
		s.maintainSeriesDuration.WithLabelValues(maintainArchived).Observe(
			float64(time.Since(begin)) / float64(time.Millisecond),
		)
	}(time.Now())

	s.fpLocker.Lock(fp)
	defer s.fpLocker.Unlock(fp)

	has, firstTime, lastTime := s.persistence.hasArchivedMetric(fp)
	if !has || !firstTime.Before(beforeTime) {
		// Oldest sample not old enough, or metric purged or unarchived in the meantime.
		return
	}

	defer s.seriesOps.WithLabelValues(archiveMaintenance).Inc()

	newFirstTime, _, _, allDropped, err := s.persistence.dropAndPersistChunks(fp, beforeTime, nil)
	if err != nil {
		log.Error("Error dropping persisted chunks: ", err)
	}
	if allDropped {
		s.persistence.purgeArchivedMetric(fp) // Ignoring error. Nothing we can do.
		s.seriesOps.WithLabelValues(archivePurge).Inc()
		return
	}
	if err := s.persistence.updateArchivedTimeRange(fp, newFirstTime, lastTime); err != nil {
		log.Errorf("Error updating archived time range for fingerprint %v: %s", fp, err)
	}
}
Example #22
0
func parse(args []string) error {
	err := cfg.fs.Parse(args)
	if err != nil {
		if err != flag.ErrHelp {
			log.Errorf("Invalid command line arguments. Help: %s -h", os.Args[0])
		}
		return err
	}

	if err := parsePrometheusURL(); err != nil {
		return err
	}
	// Default -web.route-prefix to path of -web.external-url.
	if cfg.web.RoutePrefix == "" {
		cfg.web.RoutePrefix = cfg.web.ExternalURL.Path
	}
	// RoutePrefix must always be at least '/'.
	cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/")

	if err := parseInfluxdbURL(); err != nil {
		return err
	}
	for u := range cfg.alertmanagerURLs {
		if err := validateAlertmanagerURL(u); err != nil {
			return err
		}
		cfg.notifier.AlertmanagerURLs = cfg.alertmanagerURLs.slice()
	}

	cfg.remote.InfluxdbPassword = os.Getenv("INFLUXDB_PW")

	return nil
}
Example #23
0
func (d *podDiscovery) run(ctx context.Context, ch chan<- []*config.TargetGroup) {
	pods, _, err := d.getPods()
	if err != nil {
		log.Errorf("Cannot initialize pods collection: %s", err)
		return
	}
	d.pods = pods

	initial := []*config.TargetGroup{}
	switch d.kd.Conf.Role {
	case config.KubernetesRolePod:
		initial = append(initial, d.updatePodsTargetGroup())
	case config.KubernetesRoleContainer:
		for _, ns := range d.pods {
			for _, pod := range ns {
				initial = append(initial, d.updateContainerTargetGroup(pod))
			}
		}
	}

	select {
	case ch <- initial:
	case <-ctx.Done():
		return
	}

	update := make(chan *podEvent, 10)
	go d.watchPods(update, ctx.Done(), d.retryInterval)

	for {
		tgs := []*config.TargetGroup{}
		select {
		case <-ctx.Done():
			return
		case e := <-update:
			log.Debugf("k8s discovery received pod event (EventType=%s, Pod Name=%s)", e.EventType, e.Pod.ObjectMeta.Name)
			d.updatePod(e.Pod, e.EventType)

			switch d.kd.Conf.Role {
			case config.KubernetesRoleContainer:
				// Update the per-pod target group
				tgs = append(tgs, d.updateContainerTargetGroup(e.Pod))
			case config.KubernetesRolePod:
				// Update the all pods target group
				tgs = append(tgs, d.updatePodsTargetGroup())
			}
		}
		if tgs == nil {
			continue
		}

		for _, tg := range tgs {
			select {
			case ch <- []*config.TargetGroup{tg}:
			case <-ctx.Done():
				return
			}
		}
	}
}
Example #24
0
// Run implements the TargetProvider interface.
func (ad *AzureDiscovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) {
	ticker := time.NewTicker(ad.interval)
	defer ticker.Stop()

	for {
		select {
		case <-ctx.Done():
			return
		default:
		}

		tg, err := ad.refresh()
		if err != nil {
			log.Errorf("unable to refresh during Azure discovery: %s", err)
		} else {
			select {
			case <-ctx.Done():
			case ch <- []*config.TargetGroup{tg}:
			}
		}

		select {
		case <-ticker.C:
		case <-ctx.Done():
			return
		}
	}
}
Example #25
0
// refresh reads all files matching the discovery's patterns and sends the respective
// updated target groups through the channel.
func (fd *FileDiscovery) refresh(ch chan<- []*config.TargetGroup) {
	t0 := time.Now()
	defer func() {
		fileSDScanDuration.Observe(time.Since(t0).Seconds())
	}()

	ref := map[string]int{}
	for _, p := range fd.listFiles() {
		tgroups, err := readFile(p)
		if err != nil {
			fileSDReadErrorsCount.Inc()
			log.Errorf("Error reading file %q: %s", p, err)
			// Prevent deletion down below.
			ref[p] = fd.lastRefresh[p]
			continue
		}
		ch <- tgroups

		ref[p] = len(tgroups)
	}
	// Send empty updates for sources that disappeared.
	for f, n := range fd.lastRefresh {
		m, ok := ref[f]
		if !ok || n > m {
			for i := m; i < n; i++ {
				ch <- []*config.TargetGroup{
					{Source: fileSource(f, i)},
				}
			}
		}
	}
	fd.lastRefresh = ref

	fd.watchFiles()
}
Example #26
0
// Notify implements the Notifier interface.
func (w *Webhook) Notify(ctx context.Context, alerts ...*types.Alert) error {
	data := w.tmpl.Data(receiver(ctx), groupLabels(ctx), alerts...)

	groupKey, ok := GroupKey(ctx)
	if !ok {
		log.Errorf("group key missing")
	}

	msg := &WebhookMessage{
		Version:  "3",
		Data:     data,
		GroupKey: uint64(groupKey),
	}

	var buf bytes.Buffer
	if err := json.NewEncoder(&buf).Encode(msg); err != nil {
		return err
	}

	resp, err := ctxhttp.Post(ctx, http.DefaultClient, w.URL, contentTypeJSON, &buf)
	if err != nil {
		return err
	}
	resp.Body.Close()

	if resp.StatusCode/100 != 2 {
		return fmt.Errorf("unexpected status code %v from %s", resp.StatusCode, w.URL)
	}

	return nil
}
Example #27
0
// Sources implements the TargetProvider interface.
func (cd *ConsulDiscovery) Sources() []string {
	clientConf := *cd.clientConf
	clientConf.HttpClient = &http.Client{Timeout: 5 * time.Second}

	client, err := consul.NewClient(&clientConf)
	if err != nil {
		// NewClient always returns a nil error.
		panic(fmt.Errorf("discovery.ConsulDiscovery.Sources: %s", err))
	}

	srvs, _, err := client.Catalog().Services(nil)
	if err != nil {
		log.Errorf("Error refreshing service list: %s", err)
		return nil
	}
	cd.mu.Lock()
	defer cd.mu.Unlock()

	srcs := make([]string, 0, len(srvs))
	for name := range srvs {
		if _, ok := cd.scrapedServices[name]; len(cd.scrapedServices) == 0 || ok {
			srcs = append(srcs, name)
		}
	}
	return srcs
}
func (e *Exporter) setMetrics(csvRows <-chan []string) {
	for csvRow := range csvRows {
		if len(csvRow) < expectedCsvFieldCount {
			log.Errorf("Wrong CSV field count: %d vs. %d", len(csvRow), expectedCsvFieldCount)
			e.csvParseFailures.Inc()
			continue
		}

		pxname, svname, type_ := csvRow[0], csvRow[1], csvRow[32]

		const (
			frontend = "0"
			backend  = "1"
			server   = "2"
			listener = "3"
		)

		switch type_ {
		case frontend:
			e.exportCsvFields(e.frontendMetrics, csvRow, pxname)
		case backend:
			e.exportCsvFields(e.backendMetrics, csvRow, pxname)
		case server:
			e.exportCsvFields(e.serverMetrics, csvRow, pxname, svname)
		}

	}
}
Example #29
0
// LabelValuesForLabelName implements Storage.
func (s *memorySeriesStorage) LabelValuesForLabelName(labelName model.LabelName) model.LabelValues {
	lvs, err := s.persistence.labelValuesForLabelName(labelName)
	if err != nil {
		log.Errorf("Error getting label values for label name %q: %v", labelName, err)
	}
	return lvs
}
Example #30
0
// Run the Inihibitor's background processing.
func (ih *Inhibitor) Run() {
	ih.mtx.Lock()
	ih.stopc = make(chan struct{})
	ih.mtx.Unlock()

	go ih.runGC()

	it := ih.alerts.Subscribe()
	defer it.Close()

	for {
		select {
		case <-ih.stopc:
			return
		case a := <-it.Next():
			if err := it.Err(); err != nil {
				log.Errorf("Error iterating alerts: %s", err)
				continue
			}
			if a.Resolved() {
				// As alerts can also time out without an update, we never
				// handle new resolved alerts but invalidate the cache on read.
				continue
			}
			// Populate the inhibition rules' cache.
			for _, r := range ih.rules {
				if r.SourceMatchers.Match(a.Labels) {
					r.set(a)
				}
			}
		}
	}
}