func (tc *zookeeperTreeCache) loop(failureMode bool) { retryChan := make(chan struct{}) failure := func() { failureMode = true time.AfterFunc(time.Second*10, func() { retryChan <- struct{}{} }) } if failureMode { failure() } for { select { case ev := <-tc.head.events: log.Debugf("Received Zookeeper event: %s", ev) if failureMode { continue } if ev.Type == zk.EventNotWatching { log.Infof("Lost connection to Zookeeper.") failure() } else { path := strings.TrimPrefix(ev.Path, tc.prefix) parts := strings.Split(path, "/") node := tc.head for _, part := range parts[1:] { childNode := node.children[part] if childNode == nil { childNode = &zookeeperTreeCacheNode{ events: tc.head.events, children: map[string]*zookeeperTreeCacheNode{}, done: make(chan struct{}, 1), } node.children[part] = childNode } node = childNode } err := tc.recursiveNodeUpdate(ev.Path, node) if err != nil { log.Errorf("Error during processing of Zookeeper event: %s", err) failure() } } case <-retryChan: log.Infof("Attempting to resync state with Zookeeper") err := tc.recursiveNodeUpdate(tc.prefix, tc.head) if err == nil { failureMode = false } else { log.Errorf("Error during Zookeeper resync: %s", err) failure() } case <-tc.stop: close(tc.events) return } } }
func matchRegularExpressions(reader io.Reader, config HTTPProbe) bool { body, err := ioutil.ReadAll(reader) if err != nil { log.Errorf("Error reading HTTP body: %s", err) return false } for _, expression := range config.FailIfMatchesRegexp { re, err := regexp.Compile(expression) if err != nil { log.Errorf("Could not compile expression %q as regular expression: %s", expression, err) return false } if re.Match(body) { return false } } for _, expression := range config.FailIfNotMatchesRegexp { re, err := regexp.Compile(expression) if err != nil { log.Errorf("Could not compile expression %q as regular expression: %s", expression, err) return false } if !re.Match(body) { return false } } return true }
// purgeArchivedMetric deletes an archived fingerprint and its corresponding // metric entirely. It also queues the metric for un-indexing (no need to call // unindexMetric for the deleted metric.) It does not touch the series file, // though. The caller must have locked the fingerprint. func (p *persistence) purgeArchivedMetric(fp clientmodel.Fingerprint) (err error) { defer func() { if err != nil { p.setDirty(true) } }() metric, err := p.archivedMetric(fp) if err != nil || metric == nil { return err } deleted, err := p.archivedFingerprintToMetrics.Delete(codable.Fingerprint(fp)) if err != nil { return err } if !deleted { log.Errorf("Tried to delete non-archived fingerprint %s from archivedFingerprintToMetrics index. This should never happen.", fp) } deleted, err = p.archivedFingerprintToTimeRange.Delete(codable.Fingerprint(fp)) if err != nil { return err } if !deleted { log.Errorf("Tried to delete non-archived fingerprint %s from archivedFingerprintToTimeRange index. This should never happen.", fp) } p.unindexMetric(fp, metric) return nil }
func (kd *Discovery) addService(service *Service) *config.TargetGroup { namespace, ok := kd.services[service.ObjectMeta.Namespace] if !ok { namespace = map[string]*Service{} kd.services[service.ObjectMeta.Namespace] = namespace } namespace[service.ObjectMeta.Name] = service endpointURL := fmt.Sprintf(serviceEndpointsURL, service.ObjectMeta.Namespace, service.ObjectMeta.Name) res, err := kd.queryMasterPath(endpointURL) if err != nil { log.Errorf("Error getting service endpoints: %s", err) return nil } if res.StatusCode != http.StatusOK { log.Errorf("Failed to get service endpoints: %d", res.StatusCode) return nil } var eps Endpoints if err := json.NewDecoder(res.Body).Decode(&eps); err != nil { log.Errorf("Error getting service endpoints: %s", err) return nil } return kd.updateServiceTargetGroup(service, &eps) }
// unarchiveMetric deletes an archived fingerprint and its metric, but (in // contrast to purgeArchivedMetric) does not un-index the metric. If a metric // was actually deleted, the method returns true and the first time of the // deleted metric. The caller must have locked the fingerprint. func (p *persistence) unarchiveMetric(fp clientmodel.Fingerprint) ( deletedAnything bool, firstDeletedTime clientmodel.Timestamp, err error, ) { defer func() { if err != nil { p.setDirty(true) } }() firstTime, _, has, err := p.archivedFingerprintToTimeRange.Lookup(fp) if err != nil || !has { return false, firstTime, err } deleted, err := p.archivedFingerprintToMetrics.Delete(codable.Fingerprint(fp)) if err != nil { return false, firstTime, err } if !deleted { log.Errorf("Tried to delete non-archived fingerprint %s from archivedFingerprintToMetrics index. This should never happen.", fp) } deleted, err = p.archivedFingerprintToTimeRange.Delete(codable.Fingerprint(fp)) if err != nil { return false, firstTime, err } if !deleted { log.Errorf("Tried to delete non-archived fingerprint %s from archivedFingerprintToTimeRange index. This should never happen.", fp) } return true, firstTime, nil }
func reloadConfig(filename string, rls ...Reloadable) (success bool) { log.Infof("Loading configuration file %s", filename) defer func() { if success { configSuccess.Set(1) configSuccessTime.Set(float64(time.Now().Unix())) } else { configSuccess.Set(0) } }() conf, err := config.LoadFile(filename) if err != nil { log.Errorf("Couldn't load configuration (-config.file=%s): %v", filename, err) // TODO(julius): Remove this notice when releasing 0.17.0 or 0.18.0. if err.Error() == "unknown fields in global config: labels" { log.Errorf("NOTE: The 'labels' setting in the global configuration section has been renamed to 'external_labels' and now has changed semantics (see release notes at https://github.com/prometheus/prometheus/blob/master/CHANGELOG.md). Please update your configuration file accordingly.") } return false } success = true for _, rl := range rls { success = success && rl.ApplyConfig(conf) } return success }
func (s *memorySeriesStorage) getOrCreateSeries(fp model.Fingerprint, m model.Metric) *memorySeries { series, ok := s.fpToSeries.get(fp) if !ok { var cds []*chunkDesc var modTime time.Time unarchived, err := s.persistence.unarchiveMetric(fp) if err != nil { log.Errorf("Error unarchiving fingerprint %v (metric %v): %v", fp, m, err) } if unarchived { s.seriesOps.WithLabelValues(unarchive).Inc() // We have to load chunkDescs anyway to do anything with // the series, so let's do it right now so that we don't // end up with a series without any chunkDescs for a // while (which is confusing as it makes the series // appear as archived or purged). cds, err = s.loadChunkDescs(fp, 0) if err != nil { log.Errorf("Error loading chunk descs for fingerprint %v (metric %v): %v", fp, m, err) } modTime = s.persistence.seriesFileModTime(fp) } else { // This was a genuinely new series, so index the metric. s.persistence.indexMetric(fp, m) s.seriesOps.WithLabelValues(create).Inc() } series = newMemorySeries(m, cds, modTime) s.fpToSeries.put(fp, series) s.numSeries.Inc() } return series }
// ApplyConfig updates the rule manager's state as the config requires. If // loading the new rules failed the old rule set is restored. Returns true on success. func (m *Manager) ApplyConfig(conf *config.Config) bool { m.Lock() defer m.Unlock() success := true m.interval = time.Duration(conf.GlobalConfig.EvaluationInterval) rulesSnapshot := make([]Rule, len(m.rules)) copy(rulesSnapshot, m.rules) m.rules = m.rules[:0] var files []string for _, pat := range conf.RuleFiles { fs, err := filepath.Glob(pat) if err != nil { // The only error can be a bad pattern. log.Errorf("Error retrieving rule files for %s: %s", pat, err) success = false } files = append(files, fs...) } if err := m.loadRuleFiles(files...); err != nil { // If loading the new rules failed, restore the old rule set. m.rules = rulesSnapshot log.Errorf("Error loading rules, previous rule set restored: %s", err) success = false } return success }
func (e *Exporter) queryClient(services chan<- []*consul_api.ServiceEntry, checks chan<- []*consul_api.HealthCheck) { defer close(services) defer close(checks) // How many peers are in the Consul cluster? peers, err := e.client.Status().Peers() if err != nil { e.up.Set(0) log.Errorf("Query error is %v", err) return } // We'll use peers to decide that we're up. e.up.Set(1) e.clusterServers.Set(float64(len(peers))) // How many nodes are registered? nodes, _, err := e.client.Catalog().Nodes(&consul_api.QueryOptions{}) if err != nil { // FIXME: How should we handle a partial failure like this? } else { e.nodeCount.Set(float64(len(nodes))) } // Query for the full list of services. serviceNames, _, err := e.client.Catalog().Services(&consul_api.QueryOptions{}) e.serviceCount.Set(float64(len(serviceNames))) if err != nil { // FIXME: How should we handle a partial failure like this? return } e.serviceCount.Set(float64(len(serviceNames))) for s := range serviceNames { s_entries, _, err := e.client.Health().Service(s, "", false, &consul_api.QueryOptions{}) if err != nil { log.Errorf("Failed to query service health: %v", err) continue } services <- s_entries } c_entries, _, err := e.client.Health().State("any", &consul_api.QueryOptions{}) if err != nil { log.Errorf("Failed to query service health: %v", err) } else { checks <- c_entries } }
// Run implements the TargetProvider interface. func (fd *FileDiscovery) Run(ch chan<- *config.TargetGroup, done <-chan struct{}) { defer close(ch) defer fd.stop() watcher, err := fsnotify.NewWatcher() if err != nil { log.Errorf("Error creating file watcher: %s", err) return } fd.watcher = watcher fd.refresh(ch) ticker := time.NewTicker(fd.interval) defer ticker.Stop() for { // Stopping has priority over refreshing. Thus we wrap the actual select // clause to always catch done signals. select { case <-done: return default: select { case <-done: return case event := <-fd.watcher.Events: // fsnotify sometimes sends a bunch of events without name or operation. // It's unclear what they are and why they are sent - filter them out. if len(event.Name) == 0 { break } // Everything but a chmod requires rereading. if event.Op^fsnotify.Chmod == 0 { break } // Changes to a file can spawn various sequences of events with // different combinations of operations. For all practical purposes // this is inaccurate. // The most reliable solution is to reload everything if anything happens. fd.refresh(ch) case <-ticker.C: // Setting a new watch after an update might fail. Make sure we don't lose // those files forever. fd.refresh(ch) case err := <-fd.watcher.Errors: if err != nil { log.Errorf("Error on file watch: %s", err) } } } } }
func (e *Exporter) collect() { // How many peers are in the Consul cluster? peers, err := e.client.Status().Peers() if err != nil { e.up.Set(0) log.Errorf("Query error is %v", err) return } // We'll use peers to decide that we're up. e.up.Set(1) e.clusterServers.Set(float64(len(peers))) // How many nodes are registered? nodes, _, err := e.client.Catalog().Nodes(&consul_api.QueryOptions{}) if err != nil { // FIXME: How should we handle a partial failure like this? } else { e.nodeCount.Set(float64(len(nodes))) } // Query for the full list of services. serviceNames, _, err := e.client.Catalog().Services(&consul_api.QueryOptions{}) if err != nil { // FIXME: How should we handle a partial failure like this? return } e.serviceCount.Set(float64(len(serviceNames))) if e.healthSummary { e.collectHealthSummary(serviceNames) } checks, _, err := e.client.Health().State("any", &consul_api.QueryOptions{}) if err != nil { log.Errorf("Failed to query service health: %v", err) return } for _, hc := range checks { var passing float64 if hc.Status == consul.HealthPassing { passing = 1 } if hc.ServiceID == "" { e.nodeChecks.WithLabelValues(hc.CheckID, hc.Node).Set(passing) } else { e.serviceChecks.WithLabelValues(hc.CheckID, hc.Node, hc.ServiceID).Set(passing) } } }
func parse(args []string) error { err := cfg.fs.Parse(args) if err != nil { if err != flag.ErrHelp { log.Errorf("Invalid command line arguments. Help: %s -h", os.Args[0]) } return err } ppref := strings.TrimRight(cfg.web.PathPrefix, "/") if ppref != "" && !strings.HasPrefix(ppref, "/") { ppref = "/" + ppref } cfg.web.PathPrefix = ppref if cfg.web.Hostname == "" { cfg.web.Hostname, err = os.Hostname() if err != nil { return err } } _, port, err := net.SplitHostPort(cfg.web.ListenAddress) if err != nil { return err } cfg.prometheusURL = fmt.Sprintf("http://%s:%s%s/", cfg.web.Hostname, port, cfg.web.PathPrefix) return nil }
// Append implements Storage. func (s *memorySeriesStorage) Append(sample *clientmodel.Sample) { if s.getNumChunksToPersist() >= s.maxChunksToPersist { log.Warnf( "%d chunks waiting for persistence, sample ingestion suspended.", s.getNumChunksToPersist(), ) for s.getNumChunksToPersist() >= s.maxChunksToPersist { time.Sleep(time.Second) } log.Warn("Sample ingestion resumed.") } rawFP := sample.Metric.FastFingerprint() s.fpLocker.Lock(rawFP) fp, err := s.mapper.mapFP(rawFP, sample.Metric) if err != nil { log.Errorf("Error while mapping fingerprint %v: %v", rawFP, err) s.persistence.setDirty(true) } if fp != rawFP { // Switch locks. s.fpLocker.Unlock(rawFP) s.fpLocker.Lock(fp) } series := s.getOrCreateSeries(fp, sample.Metric) completedChunksCount := series.add(&metric.SamplePair{ Value: sample.Value, Timestamp: sample.Timestamp, }) s.fpLocker.Unlock(fp) s.ingestedSamplesCount.Inc() s.incNumChunksToPersist(completedChunksCount) }
// refresh reads all files matching the discoveries patterns and sends the respective // updated target groups through the channel. func (fd *FileDiscovery) refresh(ch chan<- *config.TargetGroup) { ref := map[string]int{} for _, p := range fd.listFiles() { tgroups, err := readFile(p) if err != nil { log.Errorf("Error reading file %q: %s", p, err) // Prevent deletion down below. ref[p] = fd.lastRefresh[p] continue } for _, tg := range tgroups { ch <- tg } ref[p] = len(tgroups) } // Send empty updates for sources that disappeared. for f, n := range fd.lastRefresh { m, ok := ref[f] if !ok || n > m { for i := m; i < n; i++ { ch <- &config.TargetGroup{Source: fileSource(f, i)} } } } fd.lastRefresh = ref fd.watchFiles() }
func (h *Handler) getTemplateFile(name string) (string, error) { if h.options.UseLocalAssets { file, err := ioutil.ReadFile(fmt.Sprintf("web/blob/templates/%s.html", name)) if err != nil { log.Errorf("Could not read %s template: %s", name, err) return "", err } return string(file), nil } file, err := blob.GetFile(blob.TemplateFiles, name+".html") if err != nil { log.Errorf("Could not read %s template: %s", name, err) return "", err } return string(file), nil }
func parse(args []string) error { err := cfg.fs.Parse(args) if err != nil { if err != flag.ErrHelp { log.Errorf("Invalid command line arguments. Help: %s -h", os.Args[0]) } return err } if cfg.prometheusURL == "" { hostname, err := os.Hostname() if err != nil { return err } _, port, err := net.SplitHostPort(cfg.web.ListenAddress) if err != nil { return err } cfg.prometheusURL = fmt.Sprintf("http://%s:%s/", hostname, port) } promURL, err := url.Parse(cfg.prometheusURL) if err != nil { return err } cfg.web.ExternalURL = promURL ppref := strings.TrimRight(cfg.web.ExternalURL.Path, "/") if ppref != "" && !strings.HasPrefix(ppref, "/") { ppref = "/" + ppref } cfg.web.ExternalURL.Path = ppref return nil }
func (t *Target) scrape(sampleAppender storage.SampleAppender) (err error) { start := time.Now() baseLabels := t.BaseLabels() defer func() { t.status.setLastError(err) recordScrapeHealth(sampleAppender, clientmodel.TimestampFromTime(start), baseLabels, t.status.Health(), time.Since(start)) }() req, err := http.NewRequest("GET", t.URL(), nil) if err != nil { panic(err) } req.Header.Add("Accept", acceptHeader) resp, err := t.httpClient.Do(req) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return fmt.Errorf("server returned HTTP status %s", resp.Status) } processor, err := extraction.ProcessorForRequestHeader(resp.Header) if err != nil { return err } t.ingestedSamples = make(chan clientmodel.Samples, ingestedSamplesCap) processOptions := &extraction.ProcessOptions{ Timestamp: clientmodel.TimestampFromTime(start), } go func() { err = processor.ProcessSingle(resp.Body, t, processOptions) close(t.ingestedSamples) }() for samples := range t.ingestedSamples { for _, s := range samples { s.Metric.MergeFromLabelSet(baseLabels, clientmodel.ExporterLabelPrefix) // Avoid the copy in Relabel if there are no configs. if len(t.metricRelabelConfigs) > 0 { labels, err := Relabel(clientmodel.LabelSet(s.Metric), t.metricRelabelConfigs...) if err != nil { log.Errorf("error while relabeling metric %s of instance %s: ", s.Metric, t.url, err) continue } // Check if the timeseries was dropped. if labels == nil { continue } s.Metric = clientmodel.Metric(labels) } sampleAppender.Append(s) } } return err }
func reloadConfig(filename string, rls ...Reloadable) bool { log.Infof("Loading configuration file %s", filename) conf, err := config.LoadFromFile(filename) if err != nil { log.Errorf("Couldn't load configuration (-config.file=%s): %v", filename, err) log.Errorf("Note: The configuration format has changed with version 0.14. Please see the documentation (http://prometheus.io/docs/operating/configuration/) and the provided configuration migration tool (https://github.com/prometheus/migrate).") return false } success := true for _, rl := range rls { success = success && rl.ApplyConfig(conf) } return success }
// LabelValuesForLabelName implements Storage. func (s *memorySeriesStorage) LabelValuesForLabelName(labelName model.LabelName) model.LabelValues { lvs, err := s.persistence.labelValuesForLabelName(labelName) if err != nil { log.Errorf("Error getting label values for label name %q: %v", labelName, err) } return lvs }
// watchServices retrieves updates from Consul's services endpoint and sends // potential updates to the update channel. func (cd *ConsulDiscovery) watchServices(update chan<- *consulService) { var lastIndex uint64 for { catalog := cd.client.Catalog() srvs, meta, err := catalog.Services(&consul.QueryOptions{ WaitIndex: lastIndex, WaitTime: consulWatchTimeout, }) if err != nil { log.Errorf("Error refreshing service list: %s", err) <-time.After(consulRetryInterval) continue } // If the index equals the previous one, the watch timed out with no update. if meta.LastIndex == lastIndex { continue } lastIndex = meta.LastIndex cd.mu.Lock() select { case <-cd.srvsDone: cd.mu.Unlock() return default: // Continue. } // Check for new services. for name := range srvs { if _, ok := cd.scrapedServices[name]; !ok { continue } srv, ok := cd.services[name] if !ok { srv = &consulService{ name: name, tgroup: &config.TargetGroup{}, done: make(chan struct{}, 1), } srv.tgroup.Source = consulSourcePrefix + ":" + name cd.services[name] = srv } srv.tgroup.Labels = clientmodel.LabelSet{ ConsulServiceLabel: clientmodel.LabelValue(name), ConsulDCLabel: clientmodel.LabelValue(cd.clientConf.Datacenter), } update <- srv } // Check for removed services. for name, srv := range cd.services { if _, ok := srvs[name]; !ok { srv.removed = true update <- srv srv.done <- struct{}{} delete(cd.services, name) } } cd.mu.Unlock() } }
// Sources implements the TargetProvider interface. func (cd *ConsulDiscovery) Sources() []string { clientConf := *cd.clientConf clientConf.HttpClient = &http.Client{Timeout: 5 * time.Second} client, err := consul.NewClient(&clientConf) if err != nil { // NewClient always returns a nil error. panic(fmt.Errorf("discovery.ConsulDiscovery.Sources: %s", err)) } srvs, _, err := client.Catalog().Services(nil) if err != nil { log.Errorf("Error refreshing service list: %s", err) return nil } cd.mu.Lock() defer cd.mu.Unlock() srcs := make([]string, 0, len(srvs)) for name := range srvs { if _, ok := cd.scrapedServices[name]; ok { srcs = append(srcs, consulSourcePrefix+":"+name) } } return srcs }
// update the services based on the given node. func (srvs *services) update(node *etcd.Node) { if node.Dir { for _, n := range node.Nodes { srvs.update(n) } return } if pathPatInfo.MatchString(node.Key) { var info *ServiceInfo err := json.Unmarshal([]byte(node.Value), &info) if err != nil { log.Errorln(err) return } name := pathPatInfo.FindStringSubmatch(node.Key)[1] srv, ok := srvs.m[name] if !ok { srv = &service{instances: map[string]*Instance{}} srvs.m[name] = srv } if !info.Monitored { srvs.del = append(srvs.del, name) } srv.info = info } else if pathPatInstance.MatchString(node.Key) { match := pathPatInstance.FindStringSubmatch(node.Key) name := match[1] srv, ok := srvs.m[name] if !ok { log.Errorf("instance update for unknown service %q", name) return } var inst *Instance err := json.Unmarshal([]byte(node.Value), &inst) if err != nil { log.Errorln(err) return } srv.instances[match[2]] = inst } else { log.Errorf("cannot resolve key %q", node.Key) } }
// maintainMemorySeries maintains a series that is in memory (i.e. not // archived). It returns true if the method has changed from clean to dirty // (i.e. it is inconsistent with the latest checkpoint now so that in case of a // crash a recovery operation that requires a disk seek needed to be applied). // // The method first closes the head chunk if it was not touched for the duration // of headChunkTimeout. // // Then it determines the chunks that need to be purged and the chunks that need // to be persisted. Depending on the result, it does the following: // // - If all chunks of a series need to be purged, the whole series is deleted // for good and the method returns false. (Detecting non-existence of a series // file does not require a disk seek.) // // - If any chunks need to be purged (but not all of them), it purges those // chunks from memory and rewrites the series file on disk, leaving out the // purged chunks and appending all chunks not yet persisted (with the exception // of a still open head chunk). // // - If no chunks on disk need to be purged, but chunks need to be persisted, // those chunks are simply appended to the existing series file (or the file is // created if it does not exist yet). // // - If no chunks need to be purged and no chunks need to be persisted, nothing // happens in this step. // // Next, the method checks if all chunks in the series are evicted. In that // case, it archives the series and returns true. // // Finally, it evicts chunkDescs if there are too many. func (s *memorySeriesStorage) maintainMemorySeries( fp model.Fingerprint, beforeTime model.Time, ) (becameDirty bool) { defer func(begin time.Time) { s.maintainSeriesDuration.WithLabelValues(maintainInMemory).Observe( float64(time.Since(begin)) / float64(time.Millisecond), ) }(time.Now()) s.fpLocker.Lock(fp) defer s.fpLocker.Unlock(fp) series, ok := s.fpToSeries.get(fp) if !ok { // Series is actually not in memory, perhaps archived or dropped in the meantime. return false } defer s.seriesOps.WithLabelValues(memoryMaintenance).Inc() if series.maybeCloseHeadChunk() { s.incNumChunksToPersist(1) } seriesWasDirty := series.dirty if s.writeMemorySeries(fp, series, beforeTime) { // Series is gone now, we are done. return false } iOldestNotEvicted := -1 for i, cd := range series.chunkDescs { if !cd.isEvicted() { iOldestNotEvicted = i break } } // Archive if all chunks are evicted. if iOldestNotEvicted == -1 { s.fpToSeries.del(fp) s.numSeries.Dec() if err := s.persistence.archiveMetric( fp, series.metric, series.firstTime(), series.lastTime, ); err != nil { log.Errorf("Error archiving metric %v: %v", series.metric, err) return } s.seriesOps.WithLabelValues(archive).Inc() return } // If we are here, the series is not archived, so check for chunkDesc // eviction next. series.evictChunkDescs(iOldestNotEvicted) return series.dirty && !seriesWasDirty }
// DropMetric implements Storage. func (s *memorySeriesStorage) DropMetricsForFingerprints(fps ...model.Fingerprint) { for _, fp := range fps { s.fpLocker.Lock(fp) if series, ok := s.fpToSeries.get(fp); ok { s.fpToSeries.del(fp) s.numSeries.Dec() s.persistence.unindexMetric(fp, series.metric) if _, err := s.persistence.deleteSeriesFile(fp); err != nil { log.Errorf("Error deleting series file for %v: %v", fp, err) } } else if err := s.persistence.purgeArchivedMetric(fp); err != nil { log.Errorf("Error purging metric with fingerprint %v: %v", fp, err) } s.fpLocker.Unlock(fp) } }
// handleTargetUpdates receives target group updates and handles them in the // context of the given job config. func (tm *TargetManager) handleTargetUpdates(cfg *config.ScrapeConfig, ch <-chan *config.TargetGroup) { for tg := range ch { log.Debugf("Received potential update for target group %q", tg.Source) if err := tm.updateTargetGroup(tg, cfg); err != nil { log.Errorf("Error updating targets: %s", err) } } }
// Update overwrites settings in the target that are derived from the job config // it belongs to. func (t *Target) Update(cfg *config.ScrapeConfig, baseLabels, metaLabels model.LabelSet) { t.Lock() defer t.Unlock() httpClient, err := newHTTPClient(cfg) if err != nil { log.Errorf("cannot create HTTP client: %v", err) return } t.httpClient = httpClient t.url.Scheme = string(baseLabels[model.SchemeLabel]) t.url.Path = string(baseLabels[model.MetricsPathLabel]) t.internalLabels = model.LabelSet{} t.internalLabels[model.SchemeLabel] = baseLabels[model.SchemeLabel] t.internalLabels[model.MetricsPathLabel] = baseLabels[model.MetricsPathLabel] t.internalLabels[model.AddressLabel] = model.LabelValue(t.url.Host) params := url.Values{} for k, v := range cfg.Params { params[k] = make([]string, len(v)) copy(params[k], v) } for k, v := range baseLabels { if strings.HasPrefix(string(k), model.ParamLabelPrefix) { if len(params[string(k[len(model.ParamLabelPrefix):])]) > 0 { params[string(k[len(model.ParamLabelPrefix):])][0] = string(v) } else { params[string(k[len(model.ParamLabelPrefix):])] = []string{string(v)} } t.internalLabels[model.ParamLabelPrefix+k[len(model.ParamLabelPrefix):]] = v } } t.url.RawQuery = params.Encode() t.scrapeInterval = time.Duration(cfg.ScrapeInterval) t.deadline = time.Duration(cfg.ScrapeTimeout) t.honorLabels = cfg.HonorLabels t.metaLabels = metaLabels t.baseLabels = model.LabelSet{} // All remaining internal labels will not be part of the label set. for name, val := range baseLabels { if !strings.HasPrefix(string(name), model.ReservedLabelPrefix) { t.baseLabels[name] = val } } if _, ok := t.baseLabels[model.InstanceLabel]; !ok { t.baseLabels[model.InstanceLabel] = model.LabelValue(t.InstanceIdentifier()) } t.metricRelabelConfigs = cfg.MetricRelabelConfigs }
func (s *Silencer) setupExpiryTimer(sc *Silence) { if sc.expiryTimer != nil { sc.expiryTimer.Stop() } expDuration := sc.EndsAt.Sub(time.Now()) sc.expiryTimer = time.AfterFunc(expDuration, func() { if err := s.DelSilence(sc.ID); err != nil { log.Errorf("Failed to delete silence %d: %s", sc.ID, err) } }) }
func (h *Handler) getTemplate(name string) (*template_std.Template, error) { t := template_std.New("_base") var err error t.Funcs(template_std.FuncMap{ "since": time.Since, "getConsoles": h.getConsoles, "pathPrefix": func() string { return h.options.PathPrefix }, "stripLabels": func(lset clientmodel.LabelSet, labels ...clientmodel.LabelName) clientmodel.LabelSet { for _, ln := range labels { delete(lset, ln) } return lset }, "globalURL": func(url string) string { for _, localhostRepresentation := range localhostRepresentations { url = strings.Replace(url, "//"+localhostRepresentation, "//"+h.options.Hostname, 1) } return url }, "healthToClass": func(th retrieval.TargetHealth) string { switch th { case retrieval.HealthUnknown: return "warning" case retrieval.HealthGood: return "success" default: return "danger" } }, }) file, err := h.getTemplateFile("_base") if err != nil { log.Errorln("Could not read base template:", err) return nil, err } t, err = t.Parse(file) if err != nil { log.Errorln("Could not parse base template:", err) } file, err = h.getTemplateFile(name) if err != nil { log.Error("Could not read template %s: %s", name, err) return nil, err } t, err = t.Parse(file) if err != nil { log.Errorf("Could not parse template %s: %s", name, err) } return t, err }
// listFiles returns a list of all files that match the configured patterns. func (fd *FileDiscovery) listFiles() []string { var paths []string for _, p := range fd.paths { files, err := filepath.Glob(p) if err != nil { log.Errorf("Error expanding glob %q: %s", p, err) continue } paths = append(paths, files...) } return paths }
// Run serves the HTTP endpoints. func (h *Handler) Run() { log.Infof("Listening on %s", h.options.ListenAddress) // If we cannot bind to a port, retry after 30 seconds. for { err := http.ListenAndServe(h.options.ListenAddress, h.router) if err != nil { log.Errorf("Could not listen on %s: %s", h.options.ListenAddress, err) } time.Sleep(30 * time.Second) } }