func reloadConfig(filename string, rls ...Reloadable) (success bool) { log.Infof("Loading configuration file %s", filename) defer func() { if success { configSuccess.Set(1) configSuccessTime.Set(float64(time.Now().Unix())) } else { configSuccess.Set(0) } }() conf, err := config.LoadFile(filename) if err != nil { log.Errorf("Couldn't load configuration (-config.file=%s): %v", filename, err) // TODO(julius): Remove this notice when releasing 0.17.0 or 0.18.0. if err.Error() == "unknown fields in global config: labels" { log.Errorf("NOTE: The 'labels' setting in the global configuration section has been renamed to 'external_labels' and now has changed semantics (see release notes at https://github.com/prometheus/prometheus/blob/master/CHANGELOG.md). Please update your configuration file accordingly.") } return false } success = true for _, rl := range rls { success = success && rl.ApplyConfig(conf) } return success }
func matchRegularExpressions(reader io.Reader, config HTTPProbe) bool { body, err := ioutil.ReadAll(reader) if err != nil { log.Errorf("Error reading HTTP body: %s", err) return false } for _, expression := range config.FailIfMatchesRegexp { re, err := regexp.Compile(expression) if err != nil { log.Errorf("Could not compile expression %q as regular expression: %s", expression, err) return false } if re.Match(body) { return false } } for _, expression := range config.FailIfNotMatchesRegexp { re, err := regexp.Compile(expression) if err != nil { log.Errorf("Could not compile expression %q as regular expression: %s", expression, err) return false } if !re.Match(body) { return false } } return true }
// ApplyConfig updates the rule manager's state as the config requires. If // loading the new rules failed the old rule set is restored. Returns true on success. func (m *Manager) ApplyConfig(conf *config.Config) bool { m.Lock() defer m.Unlock() defer m.transferAlertState()() success := true m.interval = time.Duration(conf.GlobalConfig.EvaluationInterval) rulesSnapshot := make([]Rule, len(m.rules)) copy(rulesSnapshot, m.rules) m.rules = m.rules[:0] var files []string for _, pat := range conf.RuleFiles { fs, err := filepath.Glob(pat) if err != nil { // The only error can be a bad pattern. log.Errorf("Error retrieving rule files for %s: %s", pat, err) success = false } files = append(files, fs...) } if err := m.loadRuleFiles(files...); err != nil { // If loading the new rules failed, restore the old rule set. m.rules = rulesSnapshot log.Errorf("Error loading rules, previous rule set restored: %s", err) success = false } return success }
func (e *Exporter) scrape(csvRows chan<- []string) { defer close(csvRows) e.totalScrapes.Inc() resp, err := e.client.Get(e.URI) if err != nil { e.up.Set(0) log.Errorf("Can't scrape HAProxy: %v", err) return } defer resp.Body.Close() e.up.Set(1) reader := csv.NewReader(resp.Body) reader.TrailingComma = true reader.Comment = '#' for { row, err := reader.Read() if err == io.EOF { break } if err != nil { log.Errorf("Can't read CSV: %v", err) e.csvParseFailures.Inc() break } if len(row) == 0 { continue } csvRows <- row } }
// Mutes returns true iff the given label set is muted. func (ih *Inhibitor) Mutes(lset model.LabelSet) bool { alerts := ih.alerts.GetPending() defer alerts.Close() // TODO(fabxc): improve erroring for iterators so it does not // go silenced here. for alert := range alerts.Next() { if err := alerts.Err(); err != nil { log.Errorf("Error iterating alerts: %s", err) continue } if alert.Resolved() { continue } for _, rule := range ih.rules { if rule.Mutes(alert.Labels, lset) { ih.marker.SetInhibited(lset.Fingerprint(), true) return true } } } if err := alerts.Err(); err != nil { log.Errorf("Error after iterating alerts: %s", err) } ih.marker.SetInhibited(lset.Fingerprint(), false) return false }
func (s *memorySeriesStorage) getOrCreateSeries(fp model.Fingerprint, m model.Metric) *memorySeries { series, ok := s.fpToSeries.get(fp) if !ok { var cds []*chunkDesc var modTime time.Time unarchived, err := s.persistence.unarchiveMetric(fp) if err != nil { log.Errorf("Error unarchiving fingerprint %v (metric %v): %v", fp, m, err) } if unarchived { s.seriesOps.WithLabelValues(unarchive).Inc() // We have to load chunkDescs anyway to do anything with // the series, so let's do it right now so that we don't // end up with a series without any chunkDescs for a // while (which is confusing as it makes the series // appear as archived or purged). cds, err = s.loadChunkDescs(fp, 0) if err != nil { log.Errorf("Error loading chunk descs for fingerprint %v (metric %v): %v", fp, m, err) } modTime = s.persistence.seriesFileModTime(fp) } else { // This was a genuinely new series, so index the metric. s.persistence.indexMetric(fp, m) s.seriesOps.WithLabelValues(create).Inc() } series = newMemorySeries(m, cds, modTime) s.fpToSeries.put(fp, series) s.numSeries.Inc() } return series }
// purgeArchivedMetric deletes an archived fingerprint and its corresponding // metric entirely. It also queues the metric for un-indexing (no need to call // unindexMetric for the deleted metric.) It does not touch the series file, // though. The caller must have locked the fingerprint. func (p *persistence) purgeArchivedMetric(fp model.Fingerprint) (err error) { defer func() { if err != nil { p.setDirty(fmt.Errorf("error in method purgeArchivedMetric(%v): %s", fp, err)) } }() metric, err := p.archivedMetric(fp) if err != nil || metric == nil { return err } deleted, err := p.archivedFingerprintToMetrics.Delete(codable.Fingerprint(fp)) if err != nil { return err } if !deleted { log.Errorf("Tried to delete non-archived fingerprint %s from archivedFingerprintToMetrics index. This should never happen.", fp) } deleted, err = p.archivedFingerprintToTimeRange.Delete(codable.Fingerprint(fp)) if err != nil { return err } if !deleted { log.Errorf("Tried to delete non-archived fingerprint %s from archivedFingerprintToTimeRange index. This should never happen.", fp) } p.unindexMetric(fp, metric) return nil }
// Sources implements the TargetProvider interface. func (kd *Discovery) Sources() []string { sourceNames := make([]string, 0, len(kd.apiServers)) for _, apiServer := range kd.apiServers { sourceNames = append(sourceNames, apiServersTargetGroupName+":"+apiServer.Host) } nodes, _, err := kd.getNodes() if err != nil { // If we can't list nodes then we can't watch them. Assume this is a misconfiguration // & log & return empty. log.Errorf("Unable to initialize Kubernetes nodes: %s", err) return []string{} } sourceNames = append(sourceNames, kd.nodeSources(nodes)...) services, _, err := kd.getServices() if err != nil { // If we can't list services then we can't watch them. Assume this is a misconfiguration // & log & return empty. log.Errorf("Unable to initialize Kubernetes services: %s", err) return []string{} } sourceNames = append(sourceNames, kd.serviceSources(services)...) return sourceNames }
func (d *serviceDiscovery) addService(service *Service) *config.TargetGroup { namespace, ok := d.services[service.ObjectMeta.Namespace] if !ok { namespace = map[string]*Service{} d.services[service.ObjectMeta.Namespace] = namespace } namespace[service.ObjectMeta.Name] = service endpointURL := fmt.Sprintf(serviceEndpointsURL, service.ObjectMeta.Namespace, service.ObjectMeta.Name) res, err := d.kd.queryAPIServerPath(endpointURL) if err != nil { log.Errorf("Error getting service endpoints: %s", err) return nil } defer res.Body.Close() if res.StatusCode != http.StatusOK { log.Errorf("Failed to get service endpoints: %d", res.StatusCode) return nil } var eps Endpoints if err := json.NewDecoder(res.Body).Decode(&eps); err != nil { log.Errorf("Error getting service endpoints: %s", err) return nil } return d.updateServiceTargetGroup(service, &eps) }
func (e *Exporter) setMetrics(jsonStats <-chan []StatsEntry) (statsMap map[string]float64) { statsMap = make(map[string]float64) stats := <-jsonStats for _, s := range stats { statsMap[s.Name] = s.Value } if len(statsMap) == 0 { return } for _, def := range e.gaugeDefs { if value, ok := statsMap[def.key]; ok { // latency gauges need to be converted from microseconds to seconds if strings.HasSuffix(def.key, "latency") { value = value / 1000000 } e.gaugeMetrics[def.id].Set(value) } else { log.Errorf("Expected PowerDNS stats key not found: %s", def.key) e.jsonParseFailures.Inc() } } for _, def := range e.counterVecDefs { for key, label := range def.labelMap { if value, ok := statsMap[key]; ok { e.counterVecMetrics[def.id].WithLabelValues(label).Set(value) } else { log.Errorf("Expected PowerDNS stats key not found: %s", key) e.jsonParseFailures.Inc() } } } return }
func (e *Exporter) scrape() { resp, err := e.client.Get(e.URL) if err != nil { e.up.Set(0) log.Errorf("Can't scrape Spring Actuator: %v", err) return } defer resp.Body.Close() if !(resp.StatusCode >= 200 && resp.StatusCode < 300) { e.up.Set(0) log.Errorf("Can't scrape Spring Actuator: StatusCode: %d", resp.StatusCode) return } e.up.Set(1) body, err := ioutil.ReadAll(resp.Body) if err != nil { log.Errorf("Reading response body failed %v", err) return } var metrics map[string]*json.RawMessage if err := json.Unmarshal(body, &metrics); err != nil { log.Fatalf("JSON unmarshaling failed: %s", err) } e.export(metrics) }
func (e *Exporter) scrapeMetrics(json *gabs.Container, ch chan<- prometheus.Metric) { elements, _ := json.ChildrenMap() for key, element := range elements { switch key { case "message": log.Errorf("Problem collecting metrics: %s\n", element.Data().(string)) return case "version": data := element.Data() version, ok := data.(string) if !ok { log.Errorf(fmt.Sprintf("Bad conversion! Unexpected value \"%v\" for version\n", data)) } else { gauge, _ := e.Gauges.Fetch("metrics_version", "Marathon metrics version", "version") gauge.WithLabelValues(version).Set(1) gauge.Collect(ch) } case "counters": e.scrapeCounters(element) case "gauges": e.scrapeGauges(element) case "histograms": e.scrapeHistograms(element) case "meters": e.scrapeMeters(element) case "timers": e.scrapeTimers(element) } } }
// providersFromConfig returns all TargetProviders configured in cfg. func providersFromConfig(cfg *config.ScrapeConfig) map[string]TargetProvider { providers := map[string]TargetProvider{} app := func(mech string, i int, tp TargetProvider) { providers[fmt.Sprintf("%s/%d", mech, i)] = tp } for i, c := range cfg.DNSSDConfigs { app("dns", i, discovery.NewDNS(c)) } for i, c := range cfg.FileSDConfigs { app("file", i, discovery.NewFileDiscovery(c)) } for i, c := range cfg.ConsulSDConfigs { k, err := discovery.NewConsul(c) if err != nil { log.Errorf("Cannot create Consul discovery: %s", err) continue } app("consul", i, k) } for i, c := range cfg.MarathonSDConfigs { app("marathon", i, discovery.NewMarathon(c)) } for i, c := range cfg.KubernetesSDConfigs { k, err := discovery.NewKubernetesDiscovery(c) if err != nil { log.Errorf("Cannot create Kubernetes discovery: %s", err) continue } app("kubernetes", i, k) } for i, c := range cfg.ServersetSDConfigs { app("serverset", i, discovery.NewServersetDiscovery(c)) } for i, c := range cfg.NerveSDConfigs { app("nerve", i, discovery.NewNerveDiscovery(c)) } for i, c := range cfg.EC2SDConfigs { app("ec2", i, discovery.NewEC2Discovery(c)) } for i, c := range cfg.GCESDConfigs { gced, err := discovery.NewGCEDiscovery(c) if err != nil { log.Errorf("Cannot initialize GCE discovery: %s", err) continue } app("gce", i, gced) } for i, c := range cfg.AzureSDConfigs { app("azure", i, discovery.NewAzureDiscovery(c)) } if len(cfg.StaticConfigs) > 0 { app("static", 0, NewStaticProvider(cfg.StaticConfigs)) } return providers }
// Run implements the TargetProvider interface. func (fd *FileDiscovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) { defer close(ch) defer fd.stop() watcher, err := fsnotify.NewWatcher() if err != nil { log.Errorf("Error creating file watcher: %s", err) return } fd.watcher = watcher fd.refresh(ch) ticker := time.NewTicker(fd.interval) defer ticker.Stop() for { // Stopping has priority over refreshing. Thus we wrap the actual select // clause to always catch done signals. select { case <-ctx.Done(): return default: select { case <-ctx.Done(): return case event := <-fd.watcher.Events: // fsnotify sometimes sends a bunch of events without name or operation. // It's unclear what they are and why they are sent - filter them out. if len(event.Name) == 0 { break } // Everything but a chmod requires rereading. if event.Op^fsnotify.Chmod == 0 { break } // Changes to a file can spawn various sequences of events with // different combinations of operations. For all practical purposes // this is inaccurate. // The most reliable solution is to reload everything if anything happens. fd.refresh(ch) case <-ticker.C: // Setting a new watch after an update might fail. Make sure we don't lose // those files forever. fd.refresh(ch) case err := <-fd.watcher.Errors: if err != nil { log.Errorf("Error on file watch: %s", err) } } } } }
// ApplyConfig updates the rule manager's state as the config requires. If // loading the new rules failed the old rule set is restored. Returns true on success. func (m *Manager) ApplyConfig(conf *config.Config) bool { m.mtx.Lock() defer m.mtx.Unlock() // Get all rule files and load the groups they define. var files []string for _, pat := range conf.RuleFiles { fs, err := filepath.Glob(pat) if err != nil { // The only error can be a bad pattern. log.Errorf("Error retrieving rule files for %s: %s", pat, err) return false } files = append(files, fs...) } groups, err := m.loadGroups(files...) if err != nil { log.Errorf("Error loading rules, previous rule set restored: %s", err) return false } var wg sync.WaitGroup for _, newg := range groups { // To be replaced with a configurable per-group interval. newg.interval = time.Duration(conf.GlobalConfig.EvaluationInterval) wg.Add(1) // If there is an old group with the same identifier, stop it and wait for // it to finish the current iteration. Then copy its into the new group. oldg, ok := m.groups[newg.name] delete(m.groups, newg.name) go func(newg *Group) { if ok { oldg.stop() newg.copyState(oldg) } go newg.run() wg.Done() }(newg) } // Stop remaining old groups. for _, oldg := range m.groups { oldg.stop() } wg.Wait() m.groups = groups return true }
// Collect implements the prometheus.Collector interface. func (e *PfExporter) Collect(ch chan<- prometheus.Metric) { stats, err := e.fw.Stats() if err != nil { log.Errorf("failed to get pf stats: %v", err) return } e.gauges["state_total"].Set(float64(stats.StateCount())) e.counters["state_searches"].Set(float64(stats.StateSearches())) e.counters["state_inserts"].Set(float64(stats.StateInserts())) e.counters["state_removals"].Set(float64(stats.StateRemovals())) ifstats := stats.IfStats() if ifstats != nil { e.counters["ipv4_bytes_in"].Set(float64(ifstats.IPv4.BytesIn)) e.counters["ipv4_bytes_out"].Set(float64(ifstats.IPv4.BytesOut)) e.counters["ipv4_packets_in_passed"].Set(float64(ifstats.IPv4.PacketsInPassed)) e.counters["ipv4_packets_in_blocked"].Set(float64(ifstats.IPv4.PacketsInBlocked)) e.counters["ipv4_packets_out_passed"].Set(float64(ifstats.IPv4.PacketsOutPassed)) e.counters["ipv4_packets_out_blocked"].Set(float64(ifstats.IPv4.PacketsOutBlocked)) e.counters["ipv6_bytes_in"].Set(float64(ifstats.IPv6.BytesIn)) e.counters["ipv6_bytes_out"].Set(float64(ifstats.IPv6.BytesOut)) e.counters["ipv6_packets_in_passed"].Set(float64(ifstats.IPv6.PacketsInPassed)) e.counters["ipv6_packets_in_blocked"].Set(float64(ifstats.IPv6.PacketsInBlocked)) e.counters["ipv6_packets_out_passed"].Set(float64(ifstats.IPv6.PacketsOutPassed)) e.counters["ipv6_packets_out_blocked"].Set(float64(ifstats.IPv6.PacketsOutBlocked)) } queues, err := e.fw.Queues() if err != nil { log.Errorf("failed to get queue stats: %v", err) return } for _, queue := range queues { e.countervecs["queue_xmit_packets"].WithLabelValues(queue.Name, queue.IfName).Set(float64(queue.Stats.TransmitPackets)) e.countervecs["queue_xmit_bytes"].WithLabelValues(queue.Name, queue.IfName).Set(float64(queue.Stats.TransmitBytes)) e.countervecs["queue_dropped_packets"].WithLabelValues(queue.Name, queue.IfName).Set(float64(queue.Stats.DroppedPackets)) e.countervecs["queue_dropped_bytes"].WithLabelValues(queue.Name, queue.IfName).Set(float64(queue.Stats.DroppedBytes)) } for _, m := range e.gauges { m.Collect(ch) } for _, m := range e.counters { m.Collect(ch) } for _, m := range e.countervecs { m.Collect(ch) } }
// watchNodes watches nodes as they come & go. func (d *nodeDiscovery) watchNodes(events chan *nodeEvent, done <-chan struct{}, retryInterval time.Duration) { until(func() { nodes, resourceVersion, err := d.getNodes() if err != nil { log.Errorf("Cannot initialize nodes collection: %s", err) return } // Reset the known nodes. d.mtx.Lock() d.nodes = map[string]*Node{} d.mtx.Unlock() for _, node := range nodes { events <- &nodeEvent{Added, node} } req, err := http.NewRequest("GET", nodesURL, nil) if err != nil { log.Errorf("Cannot create nodes request: %s", err) return } values := req.URL.Query() values.Add("watch", "true") values.Add("resourceVersion", resourceVersion) req.URL.RawQuery = values.Encode() res, err := d.kd.queryAPIServerReq(req) if err != nil { log.Errorf("Failed to watch nodes: %s", err) return } defer res.Body.Close() if res.StatusCode != http.StatusOK { log.Errorf("Failed to watch nodes: %d", res.StatusCode) return } d := json.NewDecoder(res.Body) for { var event nodeEvent if err := d.Decode(&event); err != nil { log.Errorf("Watch nodes unexpectedly closed: %s", err) return } select { case events <- &event: case <-done: } } }, retryInterval, done) }
// providersFromConfig returns all TargetProviders configured in cfg. func providersFromConfig(cfg *config.ScrapeConfig) []TargetProvider { var providers []TargetProvider app := func(mech string, i int, tp TargetProvider) { providers = append(providers, &prefixedTargetProvider{ job: cfg.JobName, mechanism: mech, idx: i, TargetProvider: tp, }) } for i, c := range cfg.DNSSDConfigs { app("dns", i, discovery.NewDNSDiscovery(c)) } for i, c := range cfg.FileSDConfigs { app("file", i, discovery.NewFileDiscovery(c)) } for i, c := range cfg.ConsulSDConfigs { k, err := discovery.NewConsulDiscovery(c) if err != nil { log.Errorf("Cannot create Consul discovery: %s", err) continue } app("consul", i, k) } for i, c := range cfg.MarathonSDConfigs { app("marathon", i, discovery.NewMarathonDiscovery(c)) } for i, c := range cfg.KubernetesSDConfigs { k, err := discovery.NewKubernetesDiscovery(c) if err != nil { log.Errorf("Cannot create Kubernetes discovery: %s", err) continue } app("kubernetes", i, k) } for i, c := range cfg.ServersetSDConfigs { app("serverset", i, discovery.NewServersetDiscovery(c)) } for i, c := range cfg.NerveSDConfigs { app("nerve", i, discovery.NewNerveDiscovery(c)) } for i, c := range cfg.EC2SDConfigs { app("ec2", i, discovery.NewEC2Discovery(c)) } if len(cfg.TargetGroups) > 0 { app("static", 0, NewStaticProvider(cfg.TargetGroups)) } return providers }
func (d *Dispatcher) run(it provider.AlertIterator) { cleanup := time.NewTicker(30 * time.Second) defer cleanup.Stop() defer it.Close() for { select { case alert, ok := <-it.Next(): if !ok { // Iterator exhausted for some reason. if err := it.Err(); err != nil { log.Errorf("Error on alert update: %s", err) } return } d.log.With("alert", alert).Debug("Received alert") // Log errors but keep trying. if err := it.Err(); err != nil { log.Errorf("Error on alert update: %s", err) continue } for _, r := range d.route.Match(alert.Labels) { d.processAlert(alert, r) } case <-cleanup.C: d.mtx.Lock() for _, groups := range d.aggrGroups { for _, ag := range groups { if ag.empty() { ag.stop() delete(groups, ag.fingerprint()) } } } d.mtx.Unlock() case <-d.ctx.Done(): return } } }
func (d *podDiscovery) watchPods(events chan *podEvent, done <-chan struct{}, retryInterval time.Duration) { until(func() { pods, resourceVersion, err := d.getPods() if err != nil { log.Errorf("Cannot initialize pods collection: %s", err) return } d.mtx.Lock() d.pods = pods d.mtx.Unlock() req, err := http.NewRequest("GET", podsURL, nil) if err != nil { log.Errorf("Cannot create pods request: %s", err) return } values := req.URL.Query() values.Add("watch", "true") values.Add("resourceVersion", resourceVersion) req.URL.RawQuery = values.Encode() res, err := d.kd.queryAPIServerReq(req) if err != nil { log.Errorf("Failed to watch pods: %s", err) return } defer res.Body.Close() if res.StatusCode != http.StatusOK { log.Errorf("Failed to watch pods: %d", res.StatusCode) return } d := json.NewDecoder(res.Body) for { var event podEvent if err := d.Decode(&event); err != nil { log.Errorf("Watch pods unexpectedly closed: %s", err) return } select { case events <- &event: case <-done: } } }, retryInterval, done) }
// maintainArchivedSeries drops chunks older than beforeTime from an archived // series. If the series contains no chunks after that, it is purged entirely. func (s *memorySeriesStorage) maintainArchivedSeries(fp model.Fingerprint, beforeTime model.Time) { defer func(begin time.Time) { s.maintainSeriesDuration.WithLabelValues(maintainArchived).Observe( float64(time.Since(begin)) / float64(time.Millisecond), ) }(time.Now()) s.fpLocker.Lock(fp) defer s.fpLocker.Unlock(fp) has, firstTime, lastTime := s.persistence.hasArchivedMetric(fp) if !has || !firstTime.Before(beforeTime) { // Oldest sample not old enough, or metric purged or unarchived in the meantime. return } defer s.seriesOps.WithLabelValues(archiveMaintenance).Inc() newFirstTime, _, _, allDropped, err := s.persistence.dropAndPersistChunks(fp, beforeTime, nil) if err != nil { log.Error("Error dropping persisted chunks: ", err) } if allDropped { s.persistence.purgeArchivedMetric(fp) // Ignoring error. Nothing we can do. s.seriesOps.WithLabelValues(archivePurge).Inc() return } if err := s.persistence.updateArchivedTimeRange(fp, newFirstTime, lastTime); err != nil { log.Errorf("Error updating archived time range for fingerprint %v: %s", fp, err) } }
func parse(args []string) error { err := cfg.fs.Parse(args) if err != nil { if err != flag.ErrHelp { log.Errorf("Invalid command line arguments. Help: %s -h", os.Args[0]) } return err } if err := parsePrometheusURL(); err != nil { return err } // Default -web.route-prefix to path of -web.external-url. if cfg.web.RoutePrefix == "" { cfg.web.RoutePrefix = cfg.web.ExternalURL.Path } // RoutePrefix must always be at least '/'. cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/") if err := parseInfluxdbURL(); err != nil { return err } for u := range cfg.alertmanagerURLs { if err := validateAlertmanagerURL(u); err != nil { return err } cfg.notifier.AlertmanagerURLs = cfg.alertmanagerURLs.slice() } cfg.remote.InfluxdbPassword = os.Getenv("INFLUXDB_PW") return nil }
func (d *podDiscovery) run(ctx context.Context, ch chan<- []*config.TargetGroup) { pods, _, err := d.getPods() if err != nil { log.Errorf("Cannot initialize pods collection: %s", err) return } d.pods = pods initial := []*config.TargetGroup{} switch d.kd.Conf.Role { case config.KubernetesRolePod: initial = append(initial, d.updatePodsTargetGroup()) case config.KubernetesRoleContainer: for _, ns := range d.pods { for _, pod := range ns { initial = append(initial, d.updateContainerTargetGroup(pod)) } } } select { case ch <- initial: case <-ctx.Done(): return } update := make(chan *podEvent, 10) go d.watchPods(update, ctx.Done(), d.retryInterval) for { tgs := []*config.TargetGroup{} select { case <-ctx.Done(): return case e := <-update: log.Debugf("k8s discovery received pod event (EventType=%s, Pod Name=%s)", e.EventType, e.Pod.ObjectMeta.Name) d.updatePod(e.Pod, e.EventType) switch d.kd.Conf.Role { case config.KubernetesRoleContainer: // Update the per-pod target group tgs = append(tgs, d.updateContainerTargetGroup(e.Pod)) case config.KubernetesRolePod: // Update the all pods target group tgs = append(tgs, d.updatePodsTargetGroup()) } } if tgs == nil { continue } for _, tg := range tgs { select { case ch <- []*config.TargetGroup{tg}: case <-ctx.Done(): return } } } }
// Run implements the TargetProvider interface. func (ad *AzureDiscovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) { ticker := time.NewTicker(ad.interval) defer ticker.Stop() for { select { case <-ctx.Done(): return default: } tg, err := ad.refresh() if err != nil { log.Errorf("unable to refresh during Azure discovery: %s", err) } else { select { case <-ctx.Done(): case ch <- []*config.TargetGroup{tg}: } } select { case <-ticker.C: case <-ctx.Done(): return } } }
// refresh reads all files matching the discovery's patterns and sends the respective // updated target groups through the channel. func (fd *FileDiscovery) refresh(ch chan<- []*config.TargetGroup) { t0 := time.Now() defer func() { fileSDScanDuration.Observe(time.Since(t0).Seconds()) }() ref := map[string]int{} for _, p := range fd.listFiles() { tgroups, err := readFile(p) if err != nil { fileSDReadErrorsCount.Inc() log.Errorf("Error reading file %q: %s", p, err) // Prevent deletion down below. ref[p] = fd.lastRefresh[p] continue } ch <- tgroups ref[p] = len(tgroups) } // Send empty updates for sources that disappeared. for f, n := range fd.lastRefresh { m, ok := ref[f] if !ok || n > m { for i := m; i < n; i++ { ch <- []*config.TargetGroup{ {Source: fileSource(f, i)}, } } } } fd.lastRefresh = ref fd.watchFiles() }
// Notify implements the Notifier interface. func (w *Webhook) Notify(ctx context.Context, alerts ...*types.Alert) error { data := w.tmpl.Data(receiver(ctx), groupLabels(ctx), alerts...) groupKey, ok := GroupKey(ctx) if !ok { log.Errorf("group key missing") } msg := &WebhookMessage{ Version: "3", Data: data, GroupKey: uint64(groupKey), } var buf bytes.Buffer if err := json.NewEncoder(&buf).Encode(msg); err != nil { return err } resp, err := ctxhttp.Post(ctx, http.DefaultClient, w.URL, contentTypeJSON, &buf) if err != nil { return err } resp.Body.Close() if resp.StatusCode/100 != 2 { return fmt.Errorf("unexpected status code %v from %s", resp.StatusCode, w.URL) } return nil }
// Sources implements the TargetProvider interface. func (cd *ConsulDiscovery) Sources() []string { clientConf := *cd.clientConf clientConf.HttpClient = &http.Client{Timeout: 5 * time.Second} client, err := consul.NewClient(&clientConf) if err != nil { // NewClient always returns a nil error. panic(fmt.Errorf("discovery.ConsulDiscovery.Sources: %s", err)) } srvs, _, err := client.Catalog().Services(nil) if err != nil { log.Errorf("Error refreshing service list: %s", err) return nil } cd.mu.Lock() defer cd.mu.Unlock() srcs := make([]string, 0, len(srvs)) for name := range srvs { if _, ok := cd.scrapedServices[name]; len(cd.scrapedServices) == 0 || ok { srcs = append(srcs, name) } } return srcs }
func (e *Exporter) setMetrics(csvRows <-chan []string) { for csvRow := range csvRows { if len(csvRow) < expectedCsvFieldCount { log.Errorf("Wrong CSV field count: %d vs. %d", len(csvRow), expectedCsvFieldCount) e.csvParseFailures.Inc() continue } pxname, svname, type_ := csvRow[0], csvRow[1], csvRow[32] const ( frontend = "0" backend = "1" server = "2" listener = "3" ) switch type_ { case frontend: e.exportCsvFields(e.frontendMetrics, csvRow, pxname) case backend: e.exportCsvFields(e.backendMetrics, csvRow, pxname) case server: e.exportCsvFields(e.serverMetrics, csvRow, pxname, svname) } } }
// LabelValuesForLabelName implements Storage. func (s *memorySeriesStorage) LabelValuesForLabelName(labelName model.LabelName) model.LabelValues { lvs, err := s.persistence.labelValuesForLabelName(labelName) if err != nil { log.Errorf("Error getting label values for label name %q: %v", labelName, err) } return lvs }
// Run the Inihibitor's background processing. func (ih *Inhibitor) Run() { ih.mtx.Lock() ih.stopc = make(chan struct{}) ih.mtx.Unlock() go ih.runGC() it := ih.alerts.Subscribe() defer it.Close() for { select { case <-ih.stopc: return case a := <-it.Next(): if err := it.Err(); err != nil { log.Errorf("Error iterating alerts: %s", err) continue } if a.Resolved() { // As alerts can also time out without an update, we never // handle new resolved alerts but invalidate the cache on read. continue } // Populate the inhibition rules' cache. for _, r := range ih.rules { if r.SourceMatchers.Match(a.Labels) { r.set(a) } } } } }