Example #1
0
func (tc *zookeeperTreeCache) loop(failureMode bool) {
	retryChan := make(chan struct{})

	failure := func() {
		failureMode = true
		time.AfterFunc(time.Second*10, func() {
			retryChan <- struct{}{}
		})
	}
	if failureMode {
		failure()
	}

	for {
		select {
		case ev := <-tc.head.events:
			log.Debugf("Received Zookeeper event: %s", ev)
			if failureMode {
				continue
			}
			if ev.Type == zk.EventNotWatching {
				log.Infof("Lost connection to Zookeeper.")
				failure()
			} else {
				path := strings.TrimPrefix(ev.Path, tc.prefix)
				parts := strings.Split(path, "/")
				node := tc.head
				for _, part := range parts[1:] {
					childNode := node.children[part]
					if childNode == nil {
						childNode = &zookeeperTreeCacheNode{
							events:   tc.head.events,
							children: map[string]*zookeeperTreeCacheNode{},
							done:     make(chan struct{}, 1),
						}
						node.children[part] = childNode
					}
					node = childNode
				}
				err := tc.recursiveNodeUpdate(ev.Path, node)
				if err != nil {
					log.Errorf("Error during processing of Zookeeper event: %s", err)
					failure()
				}
			}
		case <-retryChan:
			log.Infof("Attempting to resync state with Zookeeper")
			err := tc.recursiveNodeUpdate(tc.prefix, tc.head)
			if err == nil {
				failureMode = false
			} else {
				log.Errorf("Error during Zookeeper resync: %s", err)
				failure()
			}
		case <-tc.stop:
			close(tc.events)
			return
		}
	}
}
Example #2
0
func matchRegularExpressions(reader io.Reader, config HTTPProbe) bool {
	body, err := ioutil.ReadAll(reader)
	if err != nil {
		log.Errorf("Error reading HTTP body: %s", err)
		return false
	}
	for _, expression := range config.FailIfMatchesRegexp {
		re, err := regexp.Compile(expression)
		if err != nil {
			log.Errorf("Could not compile expression %q as regular expression: %s", expression, err)
			return false
		}
		if re.Match(body) {
			return false
		}
	}
	for _, expression := range config.FailIfNotMatchesRegexp {
		re, err := regexp.Compile(expression)
		if err != nil {
			log.Errorf("Could not compile expression %q as regular expression: %s", expression, err)
			return false
		}
		if !re.Match(body) {
			return false
		}
	}
	return true
}
Example #3
0
// purgeArchivedMetric deletes an archived fingerprint and its corresponding
// metric entirely. It also queues the metric for un-indexing (no need to call
// unindexMetric for the deleted metric.) It does not touch the series file,
// though. The caller must have locked the fingerprint.
func (p *persistence) purgeArchivedMetric(fp clientmodel.Fingerprint) (err error) {
	defer func() {
		if err != nil {
			p.setDirty(true)
		}
	}()

	metric, err := p.archivedMetric(fp)
	if err != nil || metric == nil {
		return err
	}
	deleted, err := p.archivedFingerprintToMetrics.Delete(codable.Fingerprint(fp))
	if err != nil {
		return err
	}
	if !deleted {
		log.Errorf("Tried to delete non-archived fingerprint %s from archivedFingerprintToMetrics index. This should never happen.", fp)
	}
	deleted, err = p.archivedFingerprintToTimeRange.Delete(codable.Fingerprint(fp))
	if err != nil {
		return err
	}
	if !deleted {
		log.Errorf("Tried to delete non-archived fingerprint %s from archivedFingerprintToTimeRange index. This should never happen.", fp)
	}
	p.unindexMetric(fp, metric)
	return nil
}
Example #4
0
func (kd *Discovery) addService(service *Service) *config.TargetGroup {
	namespace, ok := kd.services[service.ObjectMeta.Namespace]
	if !ok {
		namespace = map[string]*Service{}
		kd.services[service.ObjectMeta.Namespace] = namespace
	}

	namespace[service.ObjectMeta.Name] = service
	endpointURL := fmt.Sprintf(serviceEndpointsURL, service.ObjectMeta.Namespace, service.ObjectMeta.Name)

	res, err := kd.queryMasterPath(endpointURL)
	if err != nil {
		log.Errorf("Error getting service endpoints: %s", err)
		return nil
	}
	if res.StatusCode != http.StatusOK {
		log.Errorf("Failed to get service endpoints: %d", res.StatusCode)
		return nil
	}

	var eps Endpoints
	if err := json.NewDecoder(res.Body).Decode(&eps); err != nil {
		log.Errorf("Error getting service endpoints: %s", err)
		return nil
	}

	return kd.updateServiceTargetGroup(service, &eps)
}
Example #5
0
// unarchiveMetric deletes an archived fingerprint and its metric, but (in
// contrast to purgeArchivedMetric) does not un-index the metric.  If a metric
// was actually deleted, the method returns true and the first time of the
// deleted metric. The caller must have locked the fingerprint.
func (p *persistence) unarchiveMetric(fp clientmodel.Fingerprint) (
	deletedAnything bool,
	firstDeletedTime clientmodel.Timestamp,
	err error,
) {
	defer func() {
		if err != nil {
			p.setDirty(true)
		}
	}()

	firstTime, _, has, err := p.archivedFingerprintToTimeRange.Lookup(fp)
	if err != nil || !has {
		return false, firstTime, err
	}
	deleted, err := p.archivedFingerprintToMetrics.Delete(codable.Fingerprint(fp))
	if err != nil {
		return false, firstTime, err
	}
	if !deleted {
		log.Errorf("Tried to delete non-archived fingerprint %s from archivedFingerprintToMetrics index. This should never happen.", fp)
	}
	deleted, err = p.archivedFingerprintToTimeRange.Delete(codable.Fingerprint(fp))
	if err != nil {
		return false, firstTime, err
	}
	if !deleted {
		log.Errorf("Tried to delete non-archived fingerprint %s from archivedFingerprintToTimeRange index. This should never happen.", fp)
	}
	return true, firstTime, nil
}
Example #6
0
func reloadConfig(filename string, rls ...Reloadable) (success bool) {
	log.Infof("Loading configuration file %s", filename)
	defer func() {
		if success {
			configSuccess.Set(1)
			configSuccessTime.Set(float64(time.Now().Unix()))
		} else {
			configSuccess.Set(0)
		}
	}()

	conf, err := config.LoadFile(filename)
	if err != nil {
		log.Errorf("Couldn't load configuration (-config.file=%s): %v", filename, err)
		// TODO(julius): Remove this notice when releasing 0.17.0 or 0.18.0.
		if err.Error() == "unknown fields in global config: labels" {
			log.Errorf("NOTE: The 'labels' setting in the global configuration section has been renamed to 'external_labels' and now has changed semantics (see release notes at https://github.com/prometheus/prometheus/blob/master/CHANGELOG.md). Please update your configuration file accordingly.")
		}
		return false
	}
	success = true

	for _, rl := range rls {
		success = success && rl.ApplyConfig(conf)
	}
	return success
}
Example #7
0
func (s *memorySeriesStorage) getOrCreateSeries(fp model.Fingerprint, m model.Metric) *memorySeries {
	series, ok := s.fpToSeries.get(fp)
	if !ok {
		var cds []*chunkDesc
		var modTime time.Time
		unarchived, err := s.persistence.unarchiveMetric(fp)
		if err != nil {
			log.Errorf("Error unarchiving fingerprint %v (metric %v): %v", fp, m, err)
		}
		if unarchived {
			s.seriesOps.WithLabelValues(unarchive).Inc()
			// We have to load chunkDescs anyway to do anything with
			// the series, so let's do it right now so that we don't
			// end up with a series without any chunkDescs for a
			// while (which is confusing as it makes the series
			// appear as archived or purged).
			cds, err = s.loadChunkDescs(fp, 0)
			if err != nil {
				log.Errorf("Error loading chunk descs for fingerprint %v (metric %v): %v", fp, m, err)
			}
			modTime = s.persistence.seriesFileModTime(fp)
		} else {
			// This was a genuinely new series, so index the metric.
			s.persistence.indexMetric(fp, m)
			s.seriesOps.WithLabelValues(create).Inc()
		}
		series = newMemorySeries(m, cds, modTime)
		s.fpToSeries.put(fp, series)
		s.numSeries.Inc()
	}
	return series
}
Example #8
0
// ApplyConfig updates the rule manager's state as the config requires. If
// loading the new rules failed the old rule set is restored. Returns true on success.
func (m *Manager) ApplyConfig(conf *config.Config) bool {
	m.Lock()
	defer m.Unlock()

	success := true
	m.interval = time.Duration(conf.GlobalConfig.EvaluationInterval)

	rulesSnapshot := make([]Rule, len(m.rules))
	copy(rulesSnapshot, m.rules)
	m.rules = m.rules[:0]

	var files []string
	for _, pat := range conf.RuleFiles {
		fs, err := filepath.Glob(pat)
		if err != nil {
			// The only error can be a bad pattern.
			log.Errorf("Error retrieving rule files for %s: %s", pat, err)
			success = false
		}
		files = append(files, fs...)
	}
	if err := m.loadRuleFiles(files...); err != nil {
		// If loading the new rules failed, restore the old rule set.
		m.rules = rulesSnapshot
		log.Errorf("Error loading rules, previous rule set restored: %s", err)
		success = false
	}
	return success
}
func (e *Exporter) queryClient(services chan<- []*consul_api.ServiceEntry, checks chan<- []*consul_api.HealthCheck) {

	defer close(services)
	defer close(checks)

	// How many peers are in the Consul cluster?
	peers, err := e.client.Status().Peers()

	if err != nil {
		e.up.Set(0)
		log.Errorf("Query error is %v", err)
		return
	}

	// We'll use peers to decide that we're up.
	e.up.Set(1)
	e.clusterServers.Set(float64(len(peers)))

	// How many nodes are registered?
	nodes, _, err := e.client.Catalog().Nodes(&consul_api.QueryOptions{})

	if err != nil {
		// FIXME: How should we handle a partial failure like this?
	} else {
		e.nodeCount.Set(float64(len(nodes)))
	}

	// Query for the full list of services.
	serviceNames, _, err := e.client.Catalog().Services(&consul_api.QueryOptions{})
	e.serviceCount.Set(float64(len(serviceNames)))

	if err != nil {
		// FIXME: How should we handle a partial failure like this?
		return
	}

	e.serviceCount.Set(float64(len(serviceNames)))

	for s := range serviceNames {
		s_entries, _, err := e.client.Health().Service(s, "", false, &consul_api.QueryOptions{})

		if err != nil {
			log.Errorf("Failed to query service health: %v", err)
			continue
		}

		services <- s_entries
	}

	c_entries, _, err := e.client.Health().State("any", &consul_api.QueryOptions{})
	if err != nil {
		log.Errorf("Failed to query service health: %v", err)

	} else {
		checks <- c_entries
	}

}
Example #10
0
// Run implements the TargetProvider interface.
func (fd *FileDiscovery) Run(ch chan<- *config.TargetGroup, done <-chan struct{}) {
	defer close(ch)
	defer fd.stop()

	watcher, err := fsnotify.NewWatcher()
	if err != nil {
		log.Errorf("Error creating file watcher: %s", err)
		return
	}
	fd.watcher = watcher

	fd.refresh(ch)

	ticker := time.NewTicker(fd.interval)
	defer ticker.Stop()

	for {
		// Stopping has priority over refreshing. Thus we wrap the actual select
		// clause to always catch done signals.
		select {
		case <-done:
			return
		default:
			select {
			case <-done:
				return

			case event := <-fd.watcher.Events:
				// fsnotify sometimes sends a bunch of events without name or operation.
				// It's unclear what they are and why they are sent - filter them out.
				if len(event.Name) == 0 {
					break
				}
				// Everything but a chmod requires rereading.
				if event.Op^fsnotify.Chmod == 0 {
					break
				}
				// Changes to a file can spawn various sequences of events with
				// different combinations of operations. For all practical purposes
				// this is inaccurate.
				// The most reliable solution is to reload everything if anything happens.
				fd.refresh(ch)

			case <-ticker.C:
				// Setting a new watch after an update might fail. Make sure we don't lose
				// those files forever.
				fd.refresh(ch)

			case err := <-fd.watcher.Errors:
				if err != nil {
					log.Errorf("Error on file watch: %s", err)
				}
			}
		}
	}
}
func (e *Exporter) collect() {
	// How many peers are in the Consul cluster?
	peers, err := e.client.Status().Peers()
	if err != nil {
		e.up.Set(0)
		log.Errorf("Query error is %v", err)
		return
	}

	// We'll use peers to decide that we're up.
	e.up.Set(1)
	e.clusterServers.Set(float64(len(peers)))

	// How many nodes are registered?
	nodes, _, err := e.client.Catalog().Nodes(&consul_api.QueryOptions{})
	if err != nil {
		// FIXME: How should we handle a partial failure like this?
	} else {
		e.nodeCount.Set(float64(len(nodes)))
	}

	// Query for the full list of services.
	serviceNames, _, err := e.client.Catalog().Services(&consul_api.QueryOptions{})
	if err != nil {
		// FIXME: How should we handle a partial failure like this?
		return
	}
	e.serviceCount.Set(float64(len(serviceNames)))

	if e.healthSummary {
		e.collectHealthSummary(serviceNames)
	}

	checks, _, err := e.client.Health().State("any", &consul_api.QueryOptions{})
	if err != nil {
		log.Errorf("Failed to query service health: %v", err)
		return
	}

	for _, hc := range checks {
		var passing float64
		if hc.Status == consul.HealthPassing {
			passing = 1
		}
		if hc.ServiceID == "" {
			e.nodeChecks.WithLabelValues(hc.CheckID, hc.Node).Set(passing)
		} else {
			e.serviceChecks.WithLabelValues(hc.CheckID, hc.Node, hc.ServiceID).Set(passing)
		}
	}
}
Example #12
0
func parse(args []string) error {
	err := cfg.fs.Parse(args)
	if err != nil {
		if err != flag.ErrHelp {
			log.Errorf("Invalid command line arguments. Help: %s -h", os.Args[0])
		}
		return err
	}

	ppref := strings.TrimRight(cfg.web.PathPrefix, "/")
	if ppref != "" && !strings.HasPrefix(ppref, "/") {
		ppref = "/" + ppref
	}
	cfg.web.PathPrefix = ppref

	if cfg.web.Hostname == "" {
		cfg.web.Hostname, err = os.Hostname()
		if err != nil {
			return err
		}
	}

	_, port, err := net.SplitHostPort(cfg.web.ListenAddress)
	if err != nil {
		return err
	}
	cfg.prometheusURL = fmt.Sprintf("http://%s:%s%s/", cfg.web.Hostname, port, cfg.web.PathPrefix)

	return nil
}
Example #13
0
// Append implements Storage.
func (s *memorySeriesStorage) Append(sample *clientmodel.Sample) {
	if s.getNumChunksToPersist() >= s.maxChunksToPersist {
		log.Warnf(
			"%d chunks waiting for persistence, sample ingestion suspended.",
			s.getNumChunksToPersist(),
		)
		for s.getNumChunksToPersist() >= s.maxChunksToPersist {
			time.Sleep(time.Second)
		}
		log.Warn("Sample ingestion resumed.")
	}
	rawFP := sample.Metric.FastFingerprint()
	s.fpLocker.Lock(rawFP)
	fp, err := s.mapper.mapFP(rawFP, sample.Metric)
	if err != nil {
		log.Errorf("Error while mapping fingerprint %v: %v", rawFP, err)
		s.persistence.setDirty(true)
	}
	if fp != rawFP {
		// Switch locks.
		s.fpLocker.Unlock(rawFP)
		s.fpLocker.Lock(fp)
	}
	series := s.getOrCreateSeries(fp, sample.Metric)
	completedChunksCount := series.add(&metric.SamplePair{
		Value:     sample.Value,
		Timestamp: sample.Timestamp,
	})
	s.fpLocker.Unlock(fp)
	s.ingestedSamplesCount.Inc()
	s.incNumChunksToPersist(completedChunksCount)
}
Example #14
0
// refresh reads all files matching the discoveries patterns and sends the respective
// updated target groups through the channel.
func (fd *FileDiscovery) refresh(ch chan<- *config.TargetGroup) {
	ref := map[string]int{}
	for _, p := range fd.listFiles() {
		tgroups, err := readFile(p)
		if err != nil {
			log.Errorf("Error reading file %q: %s", p, err)
			// Prevent deletion down below.
			ref[p] = fd.lastRefresh[p]
			continue
		}
		for _, tg := range tgroups {
			ch <- tg
		}
		ref[p] = len(tgroups)
	}
	// Send empty updates for sources that disappeared.
	for f, n := range fd.lastRefresh {
		m, ok := ref[f]
		if !ok || n > m {
			for i := m; i < n; i++ {
				ch <- &config.TargetGroup{Source: fileSource(f, i)}
			}
		}
	}
	fd.lastRefresh = ref

	fd.watchFiles()
}
Example #15
0
func (h *Handler) getTemplateFile(name string) (string, error) {
	if h.options.UseLocalAssets {
		file, err := ioutil.ReadFile(fmt.Sprintf("web/blob/templates/%s.html", name))
		if err != nil {
			log.Errorf("Could not read %s template: %s", name, err)
			return "", err
		}
		return string(file), nil
	}
	file, err := blob.GetFile(blob.TemplateFiles, name+".html")
	if err != nil {
		log.Errorf("Could not read %s template: %s", name, err)
		return "", err
	}
	return string(file), nil
}
Example #16
0
func parse(args []string) error {
	err := cfg.fs.Parse(args)
	if err != nil {
		if err != flag.ErrHelp {
			log.Errorf("Invalid command line arguments. Help: %s -h", os.Args[0])
		}
		return err
	}

	if cfg.prometheusURL == "" {
		hostname, err := os.Hostname()
		if err != nil {
			return err
		}
		_, port, err := net.SplitHostPort(cfg.web.ListenAddress)
		if err != nil {
			return err
		}
		cfg.prometheusURL = fmt.Sprintf("http://%s:%s/", hostname, port)
	}

	promURL, err := url.Parse(cfg.prometheusURL)
	if err != nil {
		return err
	}
	cfg.web.ExternalURL = promURL

	ppref := strings.TrimRight(cfg.web.ExternalURL.Path, "/")
	if ppref != "" && !strings.HasPrefix(ppref, "/") {
		ppref = "/" + ppref
	}
	cfg.web.ExternalURL.Path = ppref

	return nil
}
Example #17
0
func (t *Target) scrape(sampleAppender storage.SampleAppender) (err error) {
	start := time.Now()
	baseLabels := t.BaseLabels()

	defer func() {
		t.status.setLastError(err)
		recordScrapeHealth(sampleAppender, clientmodel.TimestampFromTime(start), baseLabels, t.status.Health(), time.Since(start))
	}()

	req, err := http.NewRequest("GET", t.URL(), nil)
	if err != nil {
		panic(err)
	}
	req.Header.Add("Accept", acceptHeader)

	resp, err := t.httpClient.Do(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()
	if resp.StatusCode != http.StatusOK {
		return fmt.Errorf("server returned HTTP status %s", resp.Status)
	}

	processor, err := extraction.ProcessorForRequestHeader(resp.Header)
	if err != nil {
		return err
	}

	t.ingestedSamples = make(chan clientmodel.Samples, ingestedSamplesCap)

	processOptions := &extraction.ProcessOptions{
		Timestamp: clientmodel.TimestampFromTime(start),
	}
	go func() {
		err = processor.ProcessSingle(resp.Body, t, processOptions)
		close(t.ingestedSamples)
	}()

	for samples := range t.ingestedSamples {
		for _, s := range samples {
			s.Metric.MergeFromLabelSet(baseLabels, clientmodel.ExporterLabelPrefix)
			// Avoid the copy in Relabel if there are no configs.
			if len(t.metricRelabelConfigs) > 0 {
				labels, err := Relabel(clientmodel.LabelSet(s.Metric), t.metricRelabelConfigs...)
				if err != nil {
					log.Errorf("error while relabeling metric %s of instance %s: ", s.Metric, t.url, err)
					continue
				}
				// Check if the timeseries was dropped.
				if labels == nil {
					continue
				}
				s.Metric = clientmodel.Metric(labels)
			}
			sampleAppender.Append(s)
		}
	}
	return err
}
Example #18
0
func reloadConfig(filename string, rls ...Reloadable) bool {
	log.Infof("Loading configuration file %s", filename)

	conf, err := config.LoadFromFile(filename)
	if err != nil {
		log.Errorf("Couldn't load configuration (-config.file=%s): %v", filename, err)
		log.Errorf("Note: The configuration format has changed with version 0.14. Please see the documentation (http://prometheus.io/docs/operating/configuration/) and the provided configuration migration tool (https://github.com/prometheus/migrate).")
		return false
	}
	success := true

	for _, rl := range rls {
		success = success && rl.ApplyConfig(conf)
	}
	return success
}
Example #19
0
// LabelValuesForLabelName implements Storage.
func (s *memorySeriesStorage) LabelValuesForLabelName(labelName model.LabelName) model.LabelValues {
	lvs, err := s.persistence.labelValuesForLabelName(labelName)
	if err != nil {
		log.Errorf("Error getting label values for label name %q: %v", labelName, err)
	}
	return lvs
}
Example #20
0
// watchServices retrieves updates from Consul's services endpoint and sends
// potential updates to the update channel.
func (cd *ConsulDiscovery) watchServices(update chan<- *consulService) {
	var lastIndex uint64
	for {
		catalog := cd.client.Catalog()
		srvs, meta, err := catalog.Services(&consul.QueryOptions{
			WaitIndex: lastIndex,
			WaitTime:  consulWatchTimeout,
		})
		if err != nil {
			log.Errorf("Error refreshing service list: %s", err)
			<-time.After(consulRetryInterval)
			continue
		}
		// If the index equals the previous one, the watch timed out with no update.
		if meta.LastIndex == lastIndex {
			continue
		}
		lastIndex = meta.LastIndex

		cd.mu.Lock()
		select {
		case <-cd.srvsDone:
			cd.mu.Unlock()
			return
		default:
			// Continue.
		}
		// Check for new services.
		for name := range srvs {
			if _, ok := cd.scrapedServices[name]; !ok {
				continue
			}
			srv, ok := cd.services[name]
			if !ok {
				srv = &consulService{
					name:   name,
					tgroup: &config.TargetGroup{},
					done:   make(chan struct{}, 1),
				}
				srv.tgroup.Source = consulSourcePrefix + ":" + name
				cd.services[name] = srv
			}
			srv.tgroup.Labels = clientmodel.LabelSet{
				ConsulServiceLabel: clientmodel.LabelValue(name),
				ConsulDCLabel:      clientmodel.LabelValue(cd.clientConf.Datacenter),
			}
			update <- srv
		}
		// Check for removed services.
		for name, srv := range cd.services {
			if _, ok := srvs[name]; !ok {
				srv.removed = true
				update <- srv
				srv.done <- struct{}{}
				delete(cd.services, name)
			}
		}
		cd.mu.Unlock()
	}
}
Example #21
0
// Sources implements the TargetProvider interface.
func (cd *ConsulDiscovery) Sources() []string {
	clientConf := *cd.clientConf
	clientConf.HttpClient = &http.Client{Timeout: 5 * time.Second}

	client, err := consul.NewClient(&clientConf)
	if err != nil {
		// NewClient always returns a nil error.
		panic(fmt.Errorf("discovery.ConsulDiscovery.Sources: %s", err))
	}

	srvs, _, err := client.Catalog().Services(nil)
	if err != nil {
		log.Errorf("Error refreshing service list: %s", err)
		return nil
	}
	cd.mu.Lock()
	defer cd.mu.Unlock()

	srcs := make([]string, 0, len(srvs))
	for name := range srvs {
		if _, ok := cd.scrapedServices[name]; ok {
			srcs = append(srcs, consulSourcePrefix+":"+name)
		}
	}
	return srcs
}
Example #22
0
// update the services based on the given node.
func (srvs *services) update(node *etcd.Node) {
	if node.Dir {
		for _, n := range node.Nodes {
			srvs.update(n)
		}
		return
	}

	if pathPatInfo.MatchString(node.Key) {
		var info *ServiceInfo
		err := json.Unmarshal([]byte(node.Value), &info)
		if err != nil {
			log.Errorln(err)
			return
		}
		name := pathPatInfo.FindStringSubmatch(node.Key)[1]
		srv, ok := srvs.m[name]
		if !ok {
			srv = &service{instances: map[string]*Instance{}}
			srvs.m[name] = srv
		}
		if !info.Monitored {
			srvs.del = append(srvs.del, name)
		}
		srv.info = info

	} else if pathPatInstance.MatchString(node.Key) {
		match := pathPatInstance.FindStringSubmatch(node.Key)
		name := match[1]

		srv, ok := srvs.m[name]
		if !ok {
			log.Errorf("instance update for unknown service %q", name)
			return
		}
		var inst *Instance
		err := json.Unmarshal([]byte(node.Value), &inst)
		if err != nil {
			log.Errorln(err)
			return
		}
		srv.instances[match[2]] = inst
	} else {
		log.Errorf("cannot resolve key %q", node.Key)
	}
}
Example #23
0
// maintainMemorySeries maintains a series that is in memory (i.e. not
// archived). It returns true if the method has changed from clean to dirty
// (i.e. it is inconsistent with the latest checkpoint now so that in case of a
// crash a recovery operation that requires a disk seek needed to be applied).
//
// The method first closes the head chunk if it was not touched for the duration
// of headChunkTimeout.
//
// Then it determines the chunks that need to be purged and the chunks that need
// to be persisted. Depending on the result, it does the following:
//
// - If all chunks of a series need to be purged, the whole series is deleted
// for good and the method returns false. (Detecting non-existence of a series
// file does not require a disk seek.)
//
// - If any chunks need to be purged (but not all of them), it purges those
// chunks from memory and rewrites the series file on disk, leaving out the
// purged chunks and appending all chunks not yet persisted (with the exception
// of a still open head chunk).
//
// - If no chunks on disk need to be purged, but chunks need to be persisted,
// those chunks are simply appended to the existing series file (or the file is
// created if it does not exist yet).
//
// - If no chunks need to be purged and no chunks need to be persisted, nothing
// happens in this step.
//
// Next, the method checks if all chunks in the series are evicted. In that
// case, it archives the series and returns true.
//
// Finally, it evicts chunkDescs if there are too many.
func (s *memorySeriesStorage) maintainMemorySeries(
	fp model.Fingerprint, beforeTime model.Time,
) (becameDirty bool) {
	defer func(begin time.Time) {
		s.maintainSeriesDuration.WithLabelValues(maintainInMemory).Observe(
			float64(time.Since(begin)) / float64(time.Millisecond),
		)
	}(time.Now())

	s.fpLocker.Lock(fp)
	defer s.fpLocker.Unlock(fp)

	series, ok := s.fpToSeries.get(fp)
	if !ok {
		// Series is actually not in memory, perhaps archived or dropped in the meantime.
		return false
	}

	defer s.seriesOps.WithLabelValues(memoryMaintenance).Inc()

	if series.maybeCloseHeadChunk() {
		s.incNumChunksToPersist(1)
	}

	seriesWasDirty := series.dirty

	if s.writeMemorySeries(fp, series, beforeTime) {
		// Series is gone now, we are done.
		return false
	}

	iOldestNotEvicted := -1
	for i, cd := range series.chunkDescs {
		if !cd.isEvicted() {
			iOldestNotEvicted = i
			break
		}
	}

	// Archive if all chunks are evicted.
	if iOldestNotEvicted == -1 {
		s.fpToSeries.del(fp)
		s.numSeries.Dec()
		if err := s.persistence.archiveMetric(
			fp, series.metric, series.firstTime(), series.lastTime,
		); err != nil {
			log.Errorf("Error archiving metric %v: %v", series.metric, err)
			return
		}
		s.seriesOps.WithLabelValues(archive).Inc()
		return
	}
	// If we are here, the series is not archived, so check for chunkDesc
	// eviction next.
	series.evictChunkDescs(iOldestNotEvicted)

	return series.dirty && !seriesWasDirty
}
Example #24
0
// DropMetric implements Storage.
func (s *memorySeriesStorage) DropMetricsForFingerprints(fps ...model.Fingerprint) {
	for _, fp := range fps {
		s.fpLocker.Lock(fp)

		if series, ok := s.fpToSeries.get(fp); ok {
			s.fpToSeries.del(fp)
			s.numSeries.Dec()
			s.persistence.unindexMetric(fp, series.metric)
			if _, err := s.persistence.deleteSeriesFile(fp); err != nil {
				log.Errorf("Error deleting series file for %v: %v", fp, err)
			}
		} else if err := s.persistence.purgeArchivedMetric(fp); err != nil {
			log.Errorf("Error purging metric with fingerprint %v: %v", fp, err)
		}

		s.fpLocker.Unlock(fp)
	}
}
Example #25
0
// handleTargetUpdates receives target group updates and handles them in the
// context of the given job config.
func (tm *TargetManager) handleTargetUpdates(cfg *config.ScrapeConfig, ch <-chan *config.TargetGroup) {
	for tg := range ch {
		log.Debugf("Received potential update for target group %q", tg.Source)

		if err := tm.updateTargetGroup(tg, cfg); err != nil {
			log.Errorf("Error updating targets: %s", err)
		}
	}
}
Example #26
0
// Update overwrites settings in the target that are derived from the job config
// it belongs to.
func (t *Target) Update(cfg *config.ScrapeConfig, baseLabels, metaLabels model.LabelSet) {
	t.Lock()
	defer t.Unlock()

	httpClient, err := newHTTPClient(cfg)
	if err != nil {
		log.Errorf("cannot create HTTP client: %v", err)
		return
	}
	t.httpClient = httpClient

	t.url.Scheme = string(baseLabels[model.SchemeLabel])
	t.url.Path = string(baseLabels[model.MetricsPathLabel])

	t.internalLabels = model.LabelSet{}
	t.internalLabels[model.SchemeLabel] = baseLabels[model.SchemeLabel]
	t.internalLabels[model.MetricsPathLabel] = baseLabels[model.MetricsPathLabel]
	t.internalLabels[model.AddressLabel] = model.LabelValue(t.url.Host)

	params := url.Values{}

	for k, v := range cfg.Params {
		params[k] = make([]string, len(v))
		copy(params[k], v)
	}
	for k, v := range baseLabels {
		if strings.HasPrefix(string(k), model.ParamLabelPrefix) {
			if len(params[string(k[len(model.ParamLabelPrefix):])]) > 0 {
				params[string(k[len(model.ParamLabelPrefix):])][0] = string(v)
			} else {
				params[string(k[len(model.ParamLabelPrefix):])] = []string{string(v)}
			}
			t.internalLabels[model.ParamLabelPrefix+k[len(model.ParamLabelPrefix):]] = v
		}
	}
	t.url.RawQuery = params.Encode()

	t.scrapeInterval = time.Duration(cfg.ScrapeInterval)
	t.deadline = time.Duration(cfg.ScrapeTimeout)

	t.honorLabels = cfg.HonorLabels
	t.metaLabels = metaLabels
	t.baseLabels = model.LabelSet{}
	// All remaining internal labels will not be part of the label set.
	for name, val := range baseLabels {
		if !strings.HasPrefix(string(name), model.ReservedLabelPrefix) {
			t.baseLabels[name] = val
		}
	}
	if _, ok := t.baseLabels[model.InstanceLabel]; !ok {
		t.baseLabels[model.InstanceLabel] = model.LabelValue(t.InstanceIdentifier())
	}
	t.metricRelabelConfigs = cfg.MetricRelabelConfigs
}
Example #27
0
func (s *Silencer) setupExpiryTimer(sc *Silence) {
	if sc.expiryTimer != nil {
		sc.expiryTimer.Stop()
	}
	expDuration := sc.EndsAt.Sub(time.Now())
	sc.expiryTimer = time.AfterFunc(expDuration, func() {
		if err := s.DelSilence(sc.ID); err != nil {
			log.Errorf("Failed to delete silence %d: %s", sc.ID, err)
		}
	})
}
Example #28
0
func (h *Handler) getTemplate(name string) (*template_std.Template, error) {
	t := template_std.New("_base")
	var err error

	t.Funcs(template_std.FuncMap{
		"since":       time.Since,
		"getConsoles": h.getConsoles,
		"pathPrefix":  func() string { return h.options.PathPrefix },
		"stripLabels": func(lset clientmodel.LabelSet, labels ...clientmodel.LabelName) clientmodel.LabelSet {
			for _, ln := range labels {
				delete(lset, ln)
			}
			return lset
		},
		"globalURL": func(url string) string {
			for _, localhostRepresentation := range localhostRepresentations {
				url = strings.Replace(url, "//"+localhostRepresentation, "//"+h.options.Hostname, 1)
			}
			return url
		},
		"healthToClass": func(th retrieval.TargetHealth) string {
			switch th {
			case retrieval.HealthUnknown:
				return "warning"
			case retrieval.HealthGood:
				return "success"
			default:
				return "danger"
			}
		},
	})

	file, err := h.getTemplateFile("_base")
	if err != nil {
		log.Errorln("Could not read base template:", err)
		return nil, err
	}
	t, err = t.Parse(file)
	if err != nil {
		log.Errorln("Could not parse base template:", err)
	}

	file, err = h.getTemplateFile(name)
	if err != nil {
		log.Error("Could not read template %s: %s", name, err)
		return nil, err
	}
	t, err = t.Parse(file)
	if err != nil {
		log.Errorf("Could not parse template %s: %s", name, err)
	}
	return t, err
}
Example #29
0
// listFiles returns a list of all files that match the configured patterns.
func (fd *FileDiscovery) listFiles() []string {
	var paths []string
	for _, p := range fd.paths {
		files, err := filepath.Glob(p)
		if err != nil {
			log.Errorf("Error expanding glob %q: %s", p, err)
			continue
		}
		paths = append(paths, files...)
	}
	return paths
}
Example #30
0
// Run serves the HTTP endpoints.
func (h *Handler) Run() {
	log.Infof("Listening on %s", h.options.ListenAddress)

	// If we cannot bind to a port, retry after 30 seconds.
	for {
		err := http.ListenAndServe(h.options.ListenAddress, h.router)
		if err != nil {
			log.Errorf("Could not listen on %s: %s", h.options.ListenAddress, err)
		}
		time.Sleep(30 * time.Second)
	}
}