// stop background processing of the target manager. If removeTargets is true,
// existing targets will be stopped and removed.
func (tm *TargetManager) stop(removeTargets bool) {
	log.Info("Stopping target manager...")
	defer log.Info("Target manager stopped.")

	tm.m.Lock()
	provs := []TargetProvider{}
	for _, ps := range tm.providers {
		provs = append(provs, ps...)
	}
	tm.m.Unlock()

	var wg sync.WaitGroup
	wg.Add(len(provs))
	for _, prov := range provs {
		go func(p TargetProvider) {
			p.Stop()
			wg.Done()
		}(prov)
	}
	wg.Wait()

	tm.m.Lock()
	defer tm.m.Unlock()

	if removeTargets {
		tm.removeTargets(nil)
	}

	tm.running = false
}
Beispiel #2
0
func (p *persistence) rebuildLabelIndexes(
	fpToSeries map[clientmodel.Fingerprint]*memorySeries,
) error {
	count := 0
	log.Info("Rebuilding label indexes.")
	log.Info("Indexing metrics in memory.")
	for fp, s := range fpToSeries {
		p.indexMetric(fp, s.metric)
		count++
		if count%10000 == 0 {
			log.Infof("%d metrics queued for indexing.", count)
		}
	}
	log.Info("Indexing archived metrics.")
	var fp codable.Fingerprint
	var m codable.Metric
	if err := p.archivedFingerprintToMetrics.ForEach(func(kv index.KeyValueAccessor) error {
		if err := kv.Key(&fp); err != nil {
			return err
		}
		if err := kv.Value(&m); err != nil {
			return err
		}
		p.indexMetric(clientmodel.Fingerprint(fp), clientmodel.Metric(m))
		count++
		if count%10000 == 0 {
			log.Infof("%d metrics queued for indexing.", count)
		}
		return nil
	}); err != nil {
		return err
	}
	log.Info("All requests for rebuilding the label indexes queued. (Actual processing may lag behind.)")
	return nil
}
// stop background processing of the target manager. If removeTargets is true,
// existing targets will be stopped and removed.
func (tm *TargetManager) stop(removeTargets bool) {
	log.Info("Stopping target manager...")
	defer log.Info("Target manager stopped.")

	close(tm.done)

	tm.mtx.Lock()
	defer tm.mtx.Unlock()

	if removeTargets {
		tm.removeTargets(nil)
	}

	tm.running = false
}
Beispiel #4
0
func (w *fileWatcher) Watch(cb ReloadCallback) {
	watcher, err := fsnotify.NewWatcher()
	if err != nil {
		log.Fatal(err)
	}

	err = watcher.WatchFlags(w.fileName, fsnotify.FSN_MODIFY)
	if err != nil {
		log.Fatal(err)
	}

	for {
		select {
		case ev := <-watcher.Event:
			log.Infof("Config file changed (%s), attempting reload", ev)
			conf, err := LoadFromFile(w.fileName)
			if err != nil {
				log.Error("Error loading new config: ", err)
				failedConfigReloads.Inc()
			} else {
				cb(&conf)
				log.Info("Config reloaded successfully")
				configReloads.Inc()
			}
			// Re-add the file watcher since it can get lost on some changes. E.g.
			// saving a file with vim results in a RENAME-MODIFY-DELETE event
			// sequence, after which the newly written file is no longer watched.
			err = watcher.WatchFlags(w.fileName, fsnotify.FSN_MODIFY)
		case err := <-watcher.Error:
			log.Error("Error watching config: ", err)
		}
	}
}
Beispiel #5
0
func newMesosExporter(opts *exporterOpts) *periodicExporter {
	e := &periodicExporter{
		errors: prometheus.NewCounterVec(
			prometheus.CounterOpts{
				Namespace: "mesos_exporter",
				Name:      "slave_scrape_errors_total",
				Help:      "Current total scrape errors",
			},
			[]string{"slave"},
		),
		opts: opts,
	}
	e.slaves.urls = []string{e.opts.localURL}

	if e.opts.autoDiscover {
		log.Info("auto discovery enabled from command line flag.")

		// Update nr. of mesos slaves every 10 minutes
		e.updateSlaves()
		go runEvery(e.updateSlaves, 10*time.Minute)
	}

	// Fetch slave metrics every interval
	go runEvery(e.scrapeSlaves, e.opts.interval)

	return e
}
// GetJson return json from server
func GetJson(url string, accessKey string, secretKey string, target interface{}) error {

	start := time.Now()

	// Counter for internal exporter metrics
	measure.FunctionCountTotal.With(prometheus.Labels{"pkg": "utils", "fnc": "GetJson"}).Inc()

	log.Info("Scraping: ", url)

	client := &http.Client{}
	req, err := http.NewRequest("GET", url, nil)

	req.SetBasicAuth(accessKey, secretKey)
	resp, err := client.Do(req)

	if err != nil {
		log.Error("Error Collecting JSON from API: ", err)
		panic(err)
	}

	// Timings recorded as part of internal metrics
	elapsed := float64((time.Since(start)) / time.Microsecond)
	measure.FunctionDurations.WithLabelValues("hosts", "getJSON").Observe(elapsed)

	return json.NewDecoder(resp.Body).Decode(target)
}
// Run starts background processing to handle target updates.
func (tm *TargetManager) Run() {
	log.Info("Starting target manager...")

	sources := map[string]struct{}{}

	for scfg, provs := range tm.providers {
		for _, prov := range provs {
			ch := make(chan *config.TargetGroup)
			go tm.handleTargetUpdates(scfg, ch)

			for _, src := range prov.Sources() {
				src = fullSource(scfg, src)
				sources[src] = struct{}{}
			}

			// Run the target provider after cleanup of the stale targets is done.
			defer func(p TargetProvider, c chan *config.TargetGroup) {
				go p.Run(c)
			}(prov, ch)
		}
	}

	tm.m.Lock()
	defer tm.m.Unlock()

	tm.removeTargets(func(src string) bool {
		if _, ok := sources[src]; ok {
			return false
		}
		return true
	})

	tm.running = true
}
// Run starts background processing to handle target updates.
func (tm *TargetManager) Run() {
	log.Info("Starting target manager...")

	tm.done = make(chan struct{})

	sources := map[string]struct{}{}
	updates := []<-chan targetGroupUpdate{}

	for scfg, provs := range tm.providers {
		for _, prov := range provs {
			// Get an initial set of available sources so we don't remove
			// target groups from the last run that are still available.
			for _, src := range prov.Sources() {
				sources[src] = struct{}{}
			}

			tgc := make(chan *config.TargetGroup)
			// Run the target provider after cleanup of the stale targets is done.
			defer func(prov TargetProvider, tgc chan *config.TargetGroup) {
				go prov.Run(tgc, tm.done)
			}(prov, tgc)

			tgupc := make(chan targetGroupUpdate)
			updates = append(updates, tgupc)

			go func(scfg *config.ScrapeConfig) {
				defer close(tgupc)
				for {
					select {
					case tg := <-tgc:
						if tg == nil {
							break
						}
						tgupc <- targetGroupUpdate{tg: tg, scfg: scfg}
					case <-tm.done:
						return
					}
				}
			}(scfg)
		}
	}

	// Merge all channels of incoming target group updates into a single
	// one and keep applying the updates.
	go tm.handleUpdates(merge(tm.done, updates...), tm.done)

	tm.mtx.Lock()
	defer tm.mtx.Unlock()

	// Remove old target groups that are no longer in the set of sources.
	tm.removeTargets(func(src string) bool {
		if _, ok := sources[src]; ok {
			return false
		}
		return true
	})

	tm.running = true
}
Beispiel #9
0
func startPolling() {
	t := time.NewTicker(time.Duration(*pollRate) * time.Minute)
	log.Info("Starting polling of tanks.")
	for {
		readTanks()
		<-t.C
	}
}
Beispiel #10
0
// checkpointFPMappings persists the fingerprint mappings. This method is not
// goroutine-safe.
//
// Description of the file format, v1:
//
// (1) Magic string (const mappingsMagicString).
//
// (2) Uvarint-encoded format version (const mappingsFormatVersion).
//
// (3) Uvarint-encoded number of mappings in fpMappings.
//
// (4) Repeated once per mapping:
//
// (4.1) The raw fingerprint as big-endian uint64.
//
// (4.2) The uvarint-encoded number of sub-mappings for the raw fingerprint.
//
// (4.3) Repeated once per sub-mapping:
//
// (4.3.1) The uvarint-encoded length of the unique metric string.
// (4.3.2) The unique metric string.
// (4.3.3) The mapped fingerprint as big-endian uint64.
func (p *persistence) checkpointFPMappings(fpm fpMappings) (err error) {
	log.Info("Checkpointing fingerprint mappings...")
	begin := time.Now()
	f, err := os.OpenFile(p.mappingsTempFileName(), os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0640)
	if err != nil {
		return
	}

	defer func() {
		f.Sync()
		closeErr := f.Close()
		if err != nil {
			return
		}
		err = closeErr
		if err != nil {
			return
		}
		err = os.Rename(p.mappingsTempFileName(), p.mappingsFileName())
		duration := time.Since(begin)
		log.Infof("Done checkpointing fingerprint mappings in %v.", duration)
	}()

	w := bufio.NewWriterSize(f, fileBufSize)

	if _, err = w.WriteString(mappingsMagicString); err != nil {
		return
	}
	if _, err = codable.EncodeUvarint(w, mappingsFormatVersion); err != nil {
		return
	}
	if _, err = codable.EncodeUvarint(w, uint64(len(fpm))); err != nil {
		return
	}

	for fp, mappings := range fpm {
		if err = codable.EncodeUint64(w, uint64(fp)); err != nil {
			return
		}
		if _, err = codable.EncodeUvarint(w, uint64(len(mappings))); err != nil {
			return
		}
		for ms, mappedFP := range mappings {
			if _, err = codable.EncodeUvarint(w, uint64(len(ms))); err != nil {
				return
			}
			if _, err = w.WriteString(ms); err != nil {
				return
			}
			if err = codable.EncodeUint64(w, uint64(mappedFP)); err != nil {
				return
			}
		}
	}
	err = w.Flush()
	return
}
Beispiel #11
0
// Stop stops sending samples to the remote storage and waits for pending
// sends to complete.
func (t *StorageQueueManager) Stop() {
	log.Infof("Stopping remote storage...")
	close(t.queue)
	<-t.drained
	for i := 0; i < maxConcurrentSends; i++ {
		t.sendSemaphore <- true
	}
	log.Info("Remote storage stopped.")
}
// StacksURLCheck - Checks the API version for Rancher to determine the correct URL
func StacksURLCheck(rancherURL string) string {

	var stacksEndpoint string

	if strings.Contains(rancherURL, "v1") {
		log.Info("Version 1 API detected, using legacy API fields")
		stacksEndpoint = (rancherURL + "/environments/")

	} else if strings.Contains(rancherURL, "v2") {
		log.Info("Version 2 API detected, using updated API fields")
		stacksEndpoint = (rancherURL + "/stacks/")

	} else {
		log.Info("No known API version detected, defaulting to /stacks/")
		stacksEndpoint = (rancherURL + "/stacks/")
	}

	return stacksEndpoint

}
Beispiel #13
0
func newMesosExporter(opts *exporterOpts) *periodicExporter {
	e := &periodicExporter{
		errors: prometheus.NewCounterVec(
			prometheus.CounterOpts{
				Namespace: "mesos_exporter",
				Name:      "slave_scrape_errors_total",
				Help:      "Current total scrape errors",
			},
			[]string{"slave"},
		),
		opts: opts,
	}

	if opts.queryURL == "" {
		log.Fatal("Flag '-exporter.url' not set")
	}

	switch opts.mode {
	case "discover":
		log.Info("starting mesos_exporter in scrape mode 'discover'")

		e.queryURL = parseMasterURL(opts.queryURL)

		// Update nr. of mesos slaves.
		e.updateSlaves()
		go runEvery(e.updateSlaves, e.opts.autoDiscoverInterval)

		// Fetch slave metrics every interval.
		go runEvery(e.scrapeSlaves, e.opts.interval)
	case "master":
		log.Info("starting mesos_exporter in scrape mode 'master'")
		e.queryURL = parseMasterURL(opts.queryURL)
	case "slave":
		log.Info("starting mesos_exporter in scrape mode 'slave'")
		e.slaves.urls = []string{opts.queryURL}
	default:
		log.Fatalf("Invalid value '%s' of flag '-exporter.mode' - must be one of 'discover', 'master' or 'slave'", opts.mode)
	}

	return e
}
Beispiel #14
0
// Stop implements Storage.
func (s *memorySeriesStorage) Stop() error {
	log.Info("Stopping local storage...")

	log.Info("Stopping maintenance loop...")
	close(s.loopStopping)
	<-s.loopStopped

	log.Info("Stopping chunk eviction...")
	close(s.evictStopping)
	<-s.evictStopped

	// One final checkpoint of the series map and the head chunks.
	if err := s.persistence.checkpointSeriesMapAndHeads(s.fpToSeries, s.fpLocker); err != nil {
		return err
	}

	if err := s.persistence.close(); err != nil {
		return err
	}
	log.Info("Local storage stopped.")
	return nil
}
Beispiel #15
0
func (s *memorySeriesStorage) loop() {
	checkpointTimer := time.NewTimer(s.checkpointInterval)

	dirtySeriesCount := 0

	defer func() {
		checkpointTimer.Stop()
		log.Info("Maintenance loop stopped.")
		close(s.loopStopped)
	}()

	memoryFingerprints := s.cycleThroughMemoryFingerprints()
	archivedFingerprints := s.cycleThroughArchivedFingerprints()

loop:
	for {
		select {
		case <-s.loopStopping:
			break loop
		case <-checkpointTimer.C:
			err := s.persistence.checkpointSeriesMapAndHeads(s.fpToSeries, s.fpLocker)
			if err != nil {
				log.Errorln("Error while checkpointing:", err)
			} else {
				dirtySeriesCount = 0
			}
			checkpointTimer.Reset(s.checkpointInterval)
		case fp := <-memoryFingerprints:
			if s.maintainMemorySeries(fp, model.Now().Add(-s.dropAfter)) {
				dirtySeriesCount++
				// Check if we have enough "dirty" series so that we need an early checkpoint.
				// However, if we are already behind persisting chunks, creating a checkpoint
				// would be counterproductive, as it would slow down chunk persisting even more,
				// while in a situation like that, where we are clearly lacking speed of disk
				// maintenance, the best we can do for crash recovery is to persist chunks as
				// quickly as possible. So only checkpoint if the storage is not in "graceful
				// degradation mode".
				if dirtySeriesCount >= s.checkpointDirtySeriesLimit && !s.isDegraded() {
					checkpointTimer.Reset(0)
				}
			}
		case fp := <-archivedFingerprints:
			s.maintainArchivedSeries(fp, model.Now().Add(-s.dropAfter))
		}
	}
	// Wait until both channels are closed.
	for range memoryFingerprints {
	}
	for range archivedFingerprints {
	}
}
func main() {
	flag.Parse()
	exporter := newZooKeeperExporter(flag.Args(), *useExhibitor)
	prometheus.MustRegister(exporter)

	http.Handle(*metricPath, prometheus.Handler())
	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
		http.Redirect(w, r, *metricPath, http.StatusMovedPermanently)
	})

	log.Info("starting mesos_exporter on ", *addr)

	log.Fatal(http.ListenAndServe(*addr, nil))
}
func main() {
	flag.Parse()
	if rancherURL == "" {
		log.Fatal("CATTLE_URL must be set and non-empty")
	}

	log.Info("Starting Prometheus Exporter for Rancher. Listen Address: ", listenAddress, " metricsPath: ", metricsPath, " rancherURL: ", rancherURL, " AccessKey: ", accessKey)
	log.Info("System Services Reported on:", hideSys)

	// Register internal metrics
	measure.Init()

	// Pass URL & Credentials out to the Exporters
	servicesExporter := services.NewExporter(rancherURL, accessKey, secretKey, hideSys)
	stacksExporter := stacks.NewExporter(rancherURL, accessKey, secretKey, hideSys)
	hostsExporter := hosts.NewExporter(rancherURL, accessKey, secretKey)

	// Register Metrics from each of the endpoints
	prometheus.MustRegister(servicesExporter)
	prometheus.MustRegister(stacksExporter)
	prometheus.MustRegister(hostsExporter)

	http.Handle(metricsPath, prometheus.Handler())
	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
		w.Write([]byte(`<html>
		                <head><title>Rancher exporter</title></head>
		                <body>
		                   <h1>rancher exporter</h1>
		                   <p><a href='` + metricsPath + `'>Metrics</a></p>
		                   </body>
		                </html>
		              `))
	})

	log.Infof("Starting Server: %s", listenAddress)
	log.Fatal(http.ListenAndServe(listenAddress, nil))
}
Beispiel #18
0
// Start implements Storage.
func (s *memorySeriesStorage) Start() (err error) {
	var syncStrategy syncStrategy
	switch s.options.SyncStrategy {
	case Never:
		syncStrategy = func() bool { return false }
	case Always:
		syncStrategy = func() bool { return true }
	case Adaptive:
		syncStrategy = func() bool { return !s.isDegraded() }
	default:
		panic("unknown sync strategy")
	}

	var p *persistence
	p, err = newPersistence(s.options.PersistenceStoragePath, s.options.Dirty, s.options.PedanticChecks, syncStrategy)
	if err != nil {
		return err
	}
	s.persistence = p
	// Persistence must start running before loadSeriesMapAndHeads() is called.
	go s.persistence.run()

	defer func() {
		if err != nil {
			if e := p.close(); e != nil {
				log.Errorln("Error closing persistence:", e)
			}
		}
	}()

	log.Info("Loading series map and head chunks...")
	s.fpToSeries, s.numChunksToPersist, err = p.loadSeriesMapAndHeads()
	if err != nil {
		return err
	}
	log.Infof("%d series loaded.", s.fpToSeries.length())
	s.numSeries.Set(float64(s.fpToSeries.length()))

	s.mapper, err = newFPMapper(s.fpToSeries, p)
	if err != nil {
		return err
	}

	go s.handleEvictList()
	go s.loop()

	return nil
}
Beispiel #19
0
func (s *memorySeriesStorage) handleEvictList() {
	ticker := time.NewTicker(maxEvictInterval)
	count := 0

	for {
		// To batch up evictions a bit, this tries evictions at least
		// once per evict interval, but earlier if the number of evict
		// requests with evict==true that have happened since the last
		// evict run is more than maxMemoryChunks/1000.
		select {
		case req := <-s.evictRequests:
			if req.evict {
				req.cd.evictListElement = s.evictList.PushBack(req.cd)
				count++
				if count > s.maxMemoryChunks/1000 {
					s.maybeEvict()
					count = 0
				}
			} else {
				if req.cd.evictListElement != nil {
					s.evictList.Remove(req.cd.evictListElement)
					req.cd.evictListElement = nil
				}
			}
		case <-ticker.C:
			if s.evictList.Len() > 0 {
				s.maybeEvict()
			}
		case <-s.evictStopping:
			// Drain evictRequests forever in a goroutine to not let
			// requesters hang.
			go func() {
				for {
					<-s.evictRequests
				}
			}()
			ticker.Stop()
			log.Info("Chunk eviction stopped.")
			close(s.evictStopped)
			return
		}
	}
}
func testChunk(t *testing.T, encoding chunkEncoding) {
	samples := make(model.Samples, 500000)
	for i := range samples {
		samples[i] = &model.Sample{
			Timestamp: model.Time(i),
			Value:     model.SampleValue(float64(i) * 0.2),
		}
	}
	s, closer := NewTestStorage(t, encoding)
	defer closer.Close()

	for _, sample := range samples {
		s.Append(sample)
	}
	s.WaitForIndexing()

	for m := range s.fpToSeries.iter() {
		s.fpLocker.Lock(m.fp)

		var values []model.SamplePair
		for _, cd := range m.series.chunkDescs {
			if cd.isEvicted() {
				continue
			}
			for sample := range cd.c.newIterator().values() {
				values = append(values, *sample)
			}
		}

		for i, v := range values {
			if samples[i].Timestamp != v.Timestamp {
				t.Errorf("%d. Got %v; want %v", i, v.Timestamp, samples[i].Timestamp)
			}
			if samples[i].Value != v.Value {
				t.Errorf("%d. Got %v; want %v", i, v.Value, samples[i].Value)
			}
		}
		s.fpLocker.Unlock(m.fp)
	}
	log.Info("test done, closing")
}
func (e *Exporter) gatherMetrics(rancherURL string, accessKey string, secretKey string, ch chan<- prometheus.Metric) error {

	// Reset guageVecs back to 0
	for _, m := range e.gaugeVecs {
		m.Reset()
	}

	// Set the correct API endpoint for hosts
	endpoint := (rancherURL + "/hosts/")

	// Scrape EndPoint for JSON Data
	data := new(Data)
	err := utils.GetJson(endpoint, accessKey, secretKey, &data)

	if err != nil {
		log.Error("Error getting JSON from URL ", endpoint)
		return err
	}

	log.Info("JSON Fetched for hosts: ", data)

	// Host Metrics
	for _, x := range data.Data {

		// Pre-defines the known states from the Rancher API
		states := []string{"activating", "active", "deactivating", "error", "erroring", "inactive", "provisioned", "purged", "purging", "registering", "removed", "removing", "requested", "restoring", "updating_active", "updating_inactive"}

		// Set the state of the service to 1 when it matches one of the known states
		for _, y := range states {
			if x.State == y {
				e.gaugeVecs["HostState"].With(prometheus.Labels{"rancherURL": rancherURL, "name": x.Hostname, "state": y}).Set(1)
			} else {
				e.gaugeVecs["HostState"].With(prometheus.Labels{"rancherURL": rancherURL, "name": x.Hostname, "state": y}).Set(0)
			}
		}

	}

	return nil

}
Beispiel #22
0
func (w WebService) ServeForever(pathPrefix string) error {

	http.Handle(pathPrefix+"favicon.ico", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		http.Error(w, "", 404)
	}))

	http.HandleFunc("/", prometheus.InstrumentHandlerFunc("index", func(rw http.ResponseWriter, req *http.Request) {
		// The "/" pattern matches everything, so we need to check
		// that we're at the root here.
		if req.URL.Path == pathPrefix {
			w.AlertsHandler.ServeHTTP(rw, req)
		} else if req.URL.Path == strings.TrimRight(pathPrefix, "/") {
			http.Redirect(rw, req, pathPrefix, http.StatusFound)
		} else if !strings.HasPrefix(req.URL.Path, pathPrefix) {
			// We're running under a prefix but the user requested something
			// outside of it. Let's see if this page exists under the prefix.
			http.Redirect(rw, req, pathPrefix+strings.TrimLeft(req.URL.Path, "/"), http.StatusFound)
		} else {
			http.NotFound(rw, req)
		}
	}))

	http.Handle(pathPrefix+"alerts", prometheus.InstrumentHandler("alerts", w.AlertsHandler))
	http.Handle(pathPrefix+"silences", prometheus.InstrumentHandler("silences", w.SilencesHandler))
	http.Handle(pathPrefix+"status", prometheus.InstrumentHandler("status", w.StatusHandler))

	http.Handle(pathPrefix+"metrics", prometheus.Handler())
	if *useLocalAssets {
		http.Handle(pathPrefix+"static/", http.StripPrefix(pathPrefix+"static/", http.FileServer(http.Dir("web/static"))))
	} else {
		http.Handle(pathPrefix+"static/", http.StripPrefix(pathPrefix+"static/", new(blob.Handler)))
	}
	http.Handle(pathPrefix+"api/", w.AlertManagerService.Handler())

	log.Info("listening on ", *listenAddress)

	return http.ListenAndServe(*listenAddress, nil)
}
Beispiel #23
0
// Run the rule manager's periodic rule evaluation.
func (m *Manager) Run() {
	defer log.Info("Rule manager stopped.")

	m.Lock()
	lastInterval := m.interval
	m.Unlock()

	ticker := time.NewTicker(lastInterval)
	defer ticker.Stop()

	for {
		// The outer select clause makes sure that m.done is looked at
		// first. Otherwise, if m.runIteration takes longer than
		// m.interval, there is only a 50% chance that m.done will be
		// looked at before the next m.runIteration call happens.
		select {
		case <-m.done:
			return
		default:
			select {
			case <-ticker.C:
				start := time.Now()
				m.runIteration()
				iterationDuration.Observe(float64(time.Since(start) / time.Millisecond))

				m.Lock()
				if lastInterval != m.interval {
					ticker.Stop()
					ticker = time.NewTicker(m.interval)
					lastInterval = m.interval
				}
				m.Unlock()
			case <-m.done:
				return
			}
		}
	}
}
Beispiel #24
0
func main() {
	flag.Parse()

	opts := &exporterOpts{
		autoDiscoverInterval: *autoDiscoverInterval,
		interval:             *scrapeInterval,
		mode:                 *scrapeMode,
		queryURL:             strings.TrimRight(*queryURL, "/"),
	}
	exporter := newMesosExporter(opts)
	prometheus.MustRegister(exporter)

	http.Handle(*metricsPath, prometheus.Handler())
	http.HandleFunc("/status", func(w http.ResponseWriter, r *http.Request) {
		fmt.Fprintf(w, "OK")
	})
	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
		http.Redirect(w, r, *metricsPath, http.StatusMovedPermanently)
	})

	log.Info("starting mesos_exporter on ", *addr)

	log.Fatal(http.ListenAndServe(*addr, nil))
}
Beispiel #25
0
// Stop the rule manager's rule evaluation cycles.
func (m *Manager) Stop() {
	log.Info("Stopping rule manager...")
	m.done <- true
}
Beispiel #26
0
// recoverFromCrash is called by loadSeriesMapAndHeads if the persistence
// appears to be dirty after the loading (either because the loading resulted in
// an error or because the persistence was dirty from the start). Not goroutine
// safe. Only call before anything else is running (except index processing
// queue as started by newPersistence).
func (p *persistence) recoverFromCrash(fingerprintToSeries map[clientmodel.Fingerprint]*memorySeries) error {
	// TODO(beorn): We need proper tests for the crash recovery.
	log.Warn("Starting crash recovery. Prometheus is inoperational until complete.")
	log.Warn("To avoid crash recovery in future, shutdown Prometheus with SIGTERM or a HTTP POST to /-/quit.")

	fpsSeen := map[clientmodel.Fingerprint]struct{}{}
	count := 0
	seriesDirNameFmt := fmt.Sprintf("%%0%dx", seriesDirNameLen)

	// Delete the fingerprint mapping file as it might be stale or
	// corrupt. We'll rebuild the mappings as we go.
	os.Remove(p.mappingsFileName())
	// The mappings to rebuild.
	fpm := fpMappings{}

	log.Info("Scanning files.")
	for i := 0; i < 1<<(seriesDirNameLen*4); i++ {
		dirname := path.Join(p.basePath, fmt.Sprintf(seriesDirNameFmt, i))
		dir, err := os.Open(dirname)
		if os.IsNotExist(err) {
			continue
		}
		if err != nil {
			return err
		}
		defer dir.Close()
		for fis := []os.FileInfo{}; err != io.EOF; fis, err = dir.Readdir(1024) {
			if err != nil {
				return err
			}
			for _, fi := range fis {
				fp, ok := p.sanitizeSeries(dirname, fi, fingerprintToSeries, fpm)
				if ok {
					fpsSeen[fp] = struct{}{}
				}
				count++
				if count%10000 == 0 {
					log.Infof("%d files scanned.", count)
				}
			}
		}
	}
	log.Infof("File scan complete. %d series found.", len(fpsSeen))

	log.Info("Checking for series without series file.")
	for fp, s := range fingerprintToSeries {
		if _, seen := fpsSeen[fp]; !seen {
			// fp exists in fingerprintToSeries, but has no representation on disk.
			if s.persistWatermark == len(s.chunkDescs) {
				// Oops, everything including the head chunk was
				// already persisted, but nothing on disk.
				// Thus, we lost that series completely. Clean
				// up the remnants.
				delete(fingerprintToSeries, fp)
				if err := p.purgeArchivedMetric(fp); err != nil {
					// Purging the archived metric didn't work, so try
					// to unindex it, just in case it's in the indexes.
					p.unindexMetric(fp, s.metric)
				}
				log.Warnf("Lost series detected: fingerprint %v, metric %v.", fp, s.metric)
				continue
			}
			// If we are here, the only chunks we have are the chunks in the checkpoint.
			// Adjust things accordingly.
			if s.persistWatermark > 0 || s.chunkDescsOffset != 0 {
				minLostChunks := s.persistWatermark + s.chunkDescsOffset
				if minLostChunks <= 0 {
					log.Warnf(
						"Possible loss of chunks for fingerprint %v, metric %v.",
						fp, s.metric,
					)
				} else {
					log.Warnf(
						"Lost at least %d chunks for fingerprint %v, metric %v.",
						minLostChunks, fp, s.metric,
					)
				}
				s.chunkDescs = append(
					make([]*chunkDesc, 0, len(s.chunkDescs)-s.persistWatermark),
					s.chunkDescs[s.persistWatermark:]...,
				)
				numMemChunkDescs.Sub(float64(s.persistWatermark))
				s.persistWatermark = 0
				s.chunkDescsOffset = 0
			}
			maybeAddMapping(fp, s.metric, fpm)
			fpsSeen[fp] = struct{}{} // Add so that fpsSeen is complete.
		}
	}
	log.Info("Check for series without series file complete.")

	if err := p.cleanUpArchiveIndexes(fingerprintToSeries, fpsSeen, fpm); err != nil {
		return err
	}
	if err := p.rebuildLabelIndexes(fingerprintToSeries); err != nil {
		return err
	}
	// Finally rewrite the mappings file if there are any mappings.
	if len(fpm) > 0 {
		if err := p.checkpointFPMappings(fpm); err != nil {
			return err
		}
	}

	p.setDirty(false)
	log.Warn("Crash recovery complete.")
	return nil
}
Beispiel #27
0
// Main manages the startup and shutdown lifecycle of the entire Prometheus server.
func Main() int {
	if err := parse(os.Args[1:]); err != nil {
		return 2
	}

	printVersion()
	if cfg.printVersion {
		return 0
	}

	var reloadables []Reloadable

	var (
		memStorage     = local.NewMemorySeriesStorage(&cfg.storage)
		remoteStorage  = remote.New(&cfg.remote)
		sampleAppender = storage.Fanout{memStorage}
	)
	if remoteStorage != nil {
		sampleAppender = append(sampleAppender, remoteStorage)
		reloadables = append(reloadables, remoteStorage)
	}

	var (
		notificationHandler = notification.NewNotificationHandler(&cfg.notification)
		targetManager       = retrieval.NewTargetManager(sampleAppender)
		queryEngine         = promql.NewEngine(memStorage, &cfg.queryEngine)
	)

	ruleManager := rules.NewManager(&rules.ManagerOptions{
		SampleAppender:      sampleAppender,
		NotificationHandler: notificationHandler,
		QueryEngine:         queryEngine,
		ExternalURL:         cfg.web.ExternalURL,
	})

	flags := map[string]string{}
	cfg.fs.VisitAll(func(f *flag.Flag) {
		flags[f.Name] = f.Value.String()
	})

	status := &web.PrometheusStatus{
		TargetPools: targetManager.Pools,
		Rules:       ruleManager.Rules,
		Flags:       flags,
		Birth:       time.Now(),
	}

	webHandler := web.New(memStorage, queryEngine, ruleManager, status, &cfg.web)

	reloadables = append(reloadables, status, targetManager, ruleManager, webHandler, notificationHandler)

	if !reloadConfig(cfg.configFile, reloadables...) {
		return 1
	}

	// Wait for reload or termination signals. Start the handler for SIGHUP as
	// early as possible, but ignore it until we are ready to handle reloading
	// our config.
	hup := make(chan os.Signal)
	hupReady := make(chan bool)
	signal.Notify(hup, syscall.SIGHUP)
	go func() {
		<-hupReady
		for {
			select {
			case <-hup:
			case <-webHandler.Reload():
			}
			reloadConfig(cfg.configFile, reloadables...)
		}
	}()

	// Start all components.
	if err := memStorage.Start(); err != nil {
		log.Errorln("Error opening memory series storage:", err)
		return 1
	}
	defer func() {
		if err := memStorage.Stop(); err != nil {
			log.Errorln("Error stopping storage:", err)
		}
	}()

	if remoteStorage != nil {
		prometheus.MustRegister(remoteStorage)

		go remoteStorage.Run()
		defer remoteStorage.Stop()
	}
	// The storage has to be fully initialized before registering.
	prometheus.MustRegister(memStorage)
	prometheus.MustRegister(notificationHandler)
	prometheus.MustRegister(configSuccess)
	prometheus.MustRegister(configSuccessTime)

	go ruleManager.Run()
	defer ruleManager.Stop()

	go notificationHandler.Run()
	defer notificationHandler.Stop()

	go targetManager.Run()
	defer targetManager.Stop()

	defer queryEngine.Stop()

	go webHandler.Run()

	// Wait for reload or termination signals.
	close(hupReady) // Unblock SIGHUP handler.

	term := make(chan os.Signal)
	signal.Notify(term, os.Interrupt, syscall.SIGTERM)
	select {
	case <-term:
		log.Warn("Received SIGTERM, exiting gracefully...")
	case <-webHandler.Quit():
		log.Warn("Received termination request via web service, exiting gracefully...")
	case err := <-webHandler.ListenError():
		log.Errorln("Error starting web server, exiting gracefully:", err)
	}

	log.Info("See you next time!")
	return 0
}
Beispiel #28
0
func Main() int {
	if err := parse(os.Args[1:]); err != nil {
		return 2
	}

	versionInfoTmpl.Execute(os.Stdout, BuildInfo)
	if cfg.printVersion {
		return 0
	}

	memStorage := local.NewMemorySeriesStorage(&cfg.storage)

	var (
		sampleAppender      storage.SampleAppender
		remoteStorageQueues []*remote.StorageQueueManager
	)
	if cfg.opentsdbURL == "" && cfg.influxdbURL == "" {
		log.Warnf("No remote storage URLs provided; not sending any samples to long-term storage")
		sampleAppender = memStorage
	} else {
		fanout := storage.Fanout{memStorage}

		addRemoteStorage := func(c remote.StorageClient) {
			qm := remote.NewStorageQueueManager(c, 100*1024)
			fanout = append(fanout, qm)
			remoteStorageQueues = append(remoteStorageQueues, qm)
		}

		if cfg.opentsdbURL != "" {
			addRemoteStorage(opentsdb.NewClient(cfg.opentsdbURL, cfg.remoteStorageTimeout))
		}
		if cfg.influxdbURL != "" {
			addRemoteStorage(influxdb.NewClient(cfg.influxdbURL, cfg.remoteStorageTimeout, cfg.influxdbDatabase, cfg.influxdbRetentionPolicy))
		}

		sampleAppender = fanout
	}

	var (
		notificationHandler = notification.NewNotificationHandler(&cfg.notification)
		targetManager       = retrieval.NewTargetManager(sampleAppender)
		queryEngine         = promql.NewEngine(memStorage, &cfg.queryEngine)
	)

	ruleManager := rules.NewManager(&rules.ManagerOptions{
		SampleAppender:      sampleAppender,
		NotificationHandler: notificationHandler,
		QueryEngine:         queryEngine,
		PrometheusURL:       cfg.prometheusURL,
		PathPrefix:          cfg.web.PathPrefix,
	})

	flags := map[string]string{}
	cfg.fs.VisitAll(func(f *flag.Flag) {
		flags[f.Name] = f.Value.String()
	})

	status := &web.PrometheusStatus{
		BuildInfo:   BuildInfo,
		TargetPools: targetManager.Pools,
		Rules:       ruleManager.Rules,
		Flags:       flags,
		Birth:       time.Now(),
	}

	webHandler := web.New(memStorage, queryEngine, ruleManager, status, &cfg.web)

	if !reloadConfig(cfg.configFile, status, targetManager, ruleManager) {
		os.Exit(1)
	}

	// Wait for reload or termination signals. Start the handler for SIGHUP as
	// early as possible, but ignore it until we are ready to handle reloading
	// our config.
	hup := make(chan os.Signal)
	hupReady := make(chan bool)
	signal.Notify(hup, syscall.SIGHUP)
	go func() {
		<-hupReady
		for range hup {
			reloadConfig(cfg.configFile, status, targetManager, ruleManager)
		}
	}()

	// Start all components.
	if err := memStorage.Start(); err != nil {
		log.Errorln("Error opening memory series storage:", err)
		return 1
	}
	defer func() {
		if err := memStorage.Stop(); err != nil {
			log.Errorln("Error stopping storage:", err)
		}
	}()

	// The storage has to be fully initialized before registering.
	registry.MustRegister(memStorage)
	registry.MustRegister(notificationHandler)

	for _, q := range remoteStorageQueues {
		registry.MustRegister(q)

		go q.Run()
		defer q.Stop()
	}

	go ruleManager.Run()
	defer ruleManager.Stop()

	go notificationHandler.Run()
	defer notificationHandler.Stop()

	go targetManager.Run()
	defer targetManager.Stop()

	defer queryEngine.Stop()

	go webHandler.Run()

	// Wait for reload or termination signals.
	close(hupReady) // Unblock SIGHUP handler.

	term := make(chan os.Signal)
	signal.Notify(term, os.Interrupt, syscall.SIGTERM)
	select {
	case <-term:
		log.Warn("Received SIGTERM, exiting gracefully...")
	case <-webHandler.Quit():
		log.Warn("Received termination request via web service, exiting gracefully...")
	}

	close(hup)

	log.Info("See you next time!")
	return 0
}
Beispiel #29
0
// Stop shuts down the notification handler.
func (n *NotificationHandler) Stop() {
	log.Info("Stopping notification handler...")
	close(n.pendingNotifications)
	<-n.stopped
	log.Info("Notification handler stopped.")
}
Beispiel #30
0
// checkpointSeriesMapAndHeads persists the fingerprint to memory-series mapping
// and all non persisted chunks. Do not call concurrently with
// loadSeriesMapAndHeads. This method will only write heads format v2, but
// loadSeriesMapAndHeads can also understand v1.
//
// Description of the file format (for both, v1 and v2):
//
// (1) Magic string (const headsMagicString).
//
// (2) Varint-encoded format version (const headsFormatVersion).
//
// (3) Number of series in checkpoint as big-endian uint64.
//
// (4) Repeated once per series:
//
// (4.1) A flag byte, see flag constants above. (Present but unused in v2.)
//
// (4.2) The fingerprint as big-endian uint64.
//
// (4.3) The metric as defined by codable.Metric.
//
// (4.4) The varint-encoded persistWatermark. (Missing in v1.)
//
// (4.5) The modification time of the series file as nanoseconds elapsed since
// January 1, 1970 UTC. -1 if the modification time is unknown or no series file
// exists yet. (Missing in v1.)
//
// (4.6) The varint-encoded chunkDescsOffset.
//
// (4.6) The varint-encoded savedFirstTime.
//
// (4.7) The varint-encoded number of chunk descriptors.
//
// (4.8) Repeated once per chunk descriptor, oldest to most recent, either
// variant 4.8.1 (if index < persistWatermark) or variant 4.8.2 (if index >=
// persistWatermark). In v1, everything is variant 4.8.1 except for a
// non-persisted head-chunk (determined by the flags).
//
// (4.8.1.1) The varint-encoded first time.
// (4.8.1.2) The varint-encoded last time.
//
// (4.8.2.1) A byte defining the chunk type.
// (4.8.2.2) The chunk itself, marshaled with the marshal() method.
//
func (p *persistence) checkpointSeriesMapAndHeads(fingerprintToSeries *seriesMap, fpLocker *fingerprintLocker) (err error) {
	log.Info("Checkpointing in-memory metrics and chunks...")
	begin := time.Now()
	f, err := os.OpenFile(p.headsTempFileName(), os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0640)
	if err != nil {
		return
	}

	defer func() {
		f.Sync()
		closeErr := f.Close()
		if err != nil {
			return
		}
		err = closeErr
		if err != nil {
			return
		}
		err = os.Rename(p.headsTempFileName(), p.headsFileName())
		duration := time.Since(begin)
		p.checkpointDuration.Set(float64(duration) / float64(time.Millisecond))
		log.Infof("Done checkpointing in-memory metrics and chunks in %v.", duration)
	}()

	w := bufio.NewWriterSize(f, fileBufSize)

	if _, err = w.WriteString(headsMagicString); err != nil {
		return
	}
	var numberOfSeriesOffset int
	if numberOfSeriesOffset, err = codable.EncodeVarint(w, headsFormatVersion); err != nil {
		return
	}
	numberOfSeriesOffset += len(headsMagicString)
	numberOfSeriesInHeader := uint64(fingerprintToSeries.length())
	// We have to write the number of series as uint64 because we might need
	// to overwrite it later, and a varint might change byte width then.
	if err = codable.EncodeUint64(w, numberOfSeriesInHeader); err != nil {
		return
	}

	iter := fingerprintToSeries.iter()
	defer func() {
		// Consume the iterator in any case to not leak goroutines.
		for range iter {
		}
	}()

	var realNumberOfSeries uint64
	for m := range iter {
		func() { // Wrapped in function to use defer for unlocking the fp.
			fpLocker.Lock(m.fp)
			defer fpLocker.Unlock(m.fp)

			if len(m.series.chunkDescs) == 0 {
				// This series was completely purged or archived in the meantime. Ignore.
				return
			}
			realNumberOfSeries++
			// seriesFlags left empty in v2.
			if err = w.WriteByte(0); err != nil {
				return
			}
			if err = codable.EncodeUint64(w, uint64(m.fp)); err != nil {
				return
			}
			var buf []byte
			buf, err = codable.Metric(m.series.metric).MarshalBinary()
			if err != nil {
				return
			}
			w.Write(buf)
			if _, err = codable.EncodeVarint(w, int64(m.series.persistWatermark)); err != nil {
				return
			}
			if m.series.modTime.IsZero() {
				if _, err = codable.EncodeVarint(w, -1); err != nil {
					return
				}
			} else {
				if _, err = codable.EncodeVarint(w, m.series.modTime.UnixNano()); err != nil {
					return
				}
			}
			if _, err = codable.EncodeVarint(w, int64(m.series.chunkDescsOffset)); err != nil {
				return
			}
			if _, err = codable.EncodeVarint(w, int64(m.series.savedFirstTime)); err != nil {
				return
			}
			if _, err = codable.EncodeVarint(w, int64(len(m.series.chunkDescs))); err != nil {
				return
			}
			for i, chunkDesc := range m.series.chunkDescs {
				if i < m.series.persistWatermark {
					if _, err = codable.EncodeVarint(w, int64(chunkDesc.firstTime())); err != nil {
						return
					}
					if _, err = codable.EncodeVarint(w, int64(chunkDesc.lastTime())); err != nil {
						return
					}
				} else {
					// This is the non-persisted head chunk. Fully marshal it.
					if err = w.WriteByte(byte(chunkDesc.c.encoding())); err != nil {
						return
					}
					if err = chunkDesc.c.marshal(w); err != nil {
						return
					}
				}
			}
			// Series is checkpointed now, so declare it clean.
			m.series.dirty = false
		}()
		if err != nil {
			return
		}
	}
	if err = w.Flush(); err != nil {
		return
	}
	if realNumberOfSeries != numberOfSeriesInHeader {
		// The number of series has changed in the meantime.
		// Rewrite it in the header.
		if _, err = f.Seek(int64(numberOfSeriesOffset), os.SEEK_SET); err != nil {
			return
		}
		if err = codable.EncodeUint64(f, realNumberOfSeries); err != nil {
			return
		}
	}
	return
}