// Run implements the TargetProvider interface. func (gd *GCEDiscovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) { // Get an initial set right away. tg, err := gd.refresh() if err != nil { log.Error(err) } else { select { case ch <- []*config.TargetGroup{tg}: case <-ctx.Done(): } } ticker := time.NewTicker(gd.interval) defer ticker.Stop() for { select { case <-ticker.C: tg, err := gd.refresh() if err != nil { log.Error(err) continue } select { case ch <- []*config.TargetGroup{tg}: case <-ctx.Done(): } case <-ctx.Done(): return } } }
// close flushes the indexing queue and other buffered data and releases any // held resources. It also removes the dirty marker file if successful and if // the persistence is currently not marked as dirty. func (p *persistence) close() error { close(p.indexingQueue) <-p.indexingStopped var lastError, dirtyFileRemoveError error if err := p.archivedFingerprintToMetrics.Close(); err != nil { lastError = err log.Error("Error closing archivedFingerprintToMetric index DB: ", err) } if err := p.archivedFingerprintToTimeRange.Close(); err != nil { lastError = err log.Error("Error closing archivedFingerprintToTimeRange index DB: ", err) } if err := p.labelPairToFingerprints.Close(); err != nil { lastError = err log.Error("Error closing labelPairToFingerprints index DB: ", err) } if err := p.labelNameToLabelValues.Close(); err != nil { lastError = err log.Error("Error closing labelNameToLabelValues index DB: ", err) } if lastError == nil && !p.isDirty() { dirtyFileRemoveError = os.Remove(p.dirtyFileName) } if err := p.fLock.Release(); err != nil { lastError = err log.Error("Error releasing file lock: ", err) } if dirtyFileRemoveError != nil { // On Windows, removing the dirty file before unlocking is not // possible. So remove it here if it failed above. lastError = os.Remove(p.dirtyFileName) } return lastError }
// Run implements the TargetProvider interface. func (ed *EC2Discovery) Run(ch chan<- config.TargetGroup, done <-chan struct{}) { defer close(ch) ticker := time.NewTicker(ed.interval) defer ticker.Stop() // Get an initial set right away. tg, err := ed.refresh() if err != nil { log.Error(err) } else { ch <- *tg } for { select { case <-ticker.C: tg, err := ed.refresh() if err != nil { log.Error(err) } else { ch <- *tg } case <-done: return } } }
// Run implements the TargetProvider interface. func (ed *EC2Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) { defer close(ch) ticker := time.NewTicker(ed.interval) defer ticker.Stop() // Get an initial set right away. tg, err := ed.refresh() if err != nil { log.Error(err) } else { ch <- []*config.TargetGroup{tg} } for { select { case <-ticker.C: tg, err := ed.refresh() if err != nil { log.Error(err) } else { ch <- []*config.TargetGroup{tg} } case <-ctx.Done(): return } } }
// closeChunkFile first syncs the provided file if mandated so by the sync // strategy. Then it closes the file. Errors are logged. func (p *persistence) closeChunkFile(f *os.File) { if p.shouldSync() { if err := f.Sync(); err != nil { log.Error("Error syncing file:", err) } } if err := f.Close(); err != nil { log.Error("Error closing chunk file:", err) } }
func reloadConfig(filename string, rls ...Reloadable) (err error) { log.Infof("Loading configuration file %s", filename) defer func() { if err == nil { configSuccess.Set(1) configSuccessTime.Set(float64(time.Now().Unix())) } else { configSuccess.Set(0) } }() conf, err := config.LoadFile(filename) if err != nil { return fmt.Errorf("couldn't load configuration (-config.file=%s): %v", filename, err) } failed := false for _, rl := range rls { if err := rl.ApplyConfig(conf); err != nil { log.Error("Failed to apply configuration: ", err) failed = true } } if failed { return fmt.Errorf("one or more errors occurred while applying the new configuration (-config.file=%s)", filename) } return nil }
func MultiThreadedDevReady(h SCSICmdHandler, threads int) DevReadyFunc { return func(in chan *SCSICmd, out chan SCSIResponse) error { go func(h SCSICmdHandler, in chan *SCSICmd, out chan SCSIResponse, threads int) { w := sync.WaitGroup{} w.Add(threads) for i := 0; i < threads; i++ { go func(h SCSICmdHandler, in chan *SCSICmd, out chan SCSIResponse, w *sync.WaitGroup) { buf := make([]byte, 32*1024) for { v, ok := <-in if !ok { break } v.Buf = buf x, err := h.HandleCommand(v) buf = v.Buf if err != nil { log.Error(err) return } out <- x } w.Done() }(h, in, out, &w) } w.Wait() close(out) }(h, in, out, threads) return nil } }
func receiverName(ctx context.Context) string { recv, ok := ReceiverName(ctx) if !ok { log.Error("missing receiver") } return recv }
// maintainArchivedSeries drops chunks older than beforeTime from an archived // series. If the series contains no chunks after that, it is purged entirely. func (s *memorySeriesStorage) maintainArchivedSeries(fp model.Fingerprint, beforeTime model.Time) { defer func(begin time.Time) { s.maintainSeriesDuration.WithLabelValues(maintainArchived).Observe( float64(time.Since(begin)) / float64(time.Millisecond), ) }(time.Now()) s.fpLocker.Lock(fp) defer s.fpLocker.Unlock(fp) has, firstTime, lastTime := s.persistence.hasArchivedMetric(fp) if !has || !firstTime.Before(beforeTime) { // Oldest sample not old enough, or metric purged or unarchived in the meantime. return } defer s.seriesOps.WithLabelValues(archiveMaintenance).Inc() newFirstTime, _, _, allDropped, err := s.persistence.dropAndPersistChunks(fp, beforeTime, nil) if err != nil { log.Error("Error dropping persisted chunks: ", err) } if allDropped { s.persistence.purgeArchivedMetric(fp) // Ignoring error. Nothing we can do. s.seriesOps.WithLabelValues(archivePurge).Inc() return } if err := s.persistence.updateArchivedTimeRange(fp, newFirstTime, lastTime); err != nil { log.Errorf("Error updating archived time range for fingerprint %v: %s", fp, err) } }
// persistChunks persists a number of consecutive chunks of a series. It is the // caller's responsibility to not modify the chunks concurrently and to not // persist or drop anything for the same fingerprint concurrently. It returns // the (zero-based) index of the first persisted chunk within the series // file. In case of an error, the returned index is -1 (to avoid the // misconception that the chunk was written at position 0). func (p *persistence) persistChunks(fp model.Fingerprint, chunks []chunk) (index int, err error) { defer func() { if err != nil { log.Error("Error persisting chunks: ", err) p.setDirty(true) } }() f, err := p.openChunkFileForWriting(fp) if err != nil { return -1, err } defer p.closeChunkFile(f) if err := writeChunks(f, chunks); err != nil { return -1, err } // Determine index within the file. offset, err := f.Seek(0, os.SEEK_CUR) if err != nil { return -1, err } index, err = chunkIndexForOffset(offset) if err != nil { return -1, err } return index - len(chunks), err }
func groupLabels(ctx context.Context) model.LabelSet { groupLabels, ok := GroupLabels(ctx) if !ok { log.Error("missing group labels") } return groupLabels }
func dumpHeap(w http.ResponseWriter, r *http.Request) { target := fmt.Sprintf("/tmp/%d.heap", time.Now().Unix()) f, err := os.Create(target) if err != nil { log.Error("Could not dump heap: ", err) } fmt.Fprintf(w, "Writing to %s...", target) defer f.Close() pprof_runtime.WriteHeapProfile(f) fmt.Fprintf(w, "Done") }
// Collect fetches the stats from configured bind location and delivers them as // Prometheus metrics. It implements prometheus.Collector. func (e *Exporter) Collect(ch chan<- prometheus.Metric) { status := 0. if stats, err := e.client.Stats(e.groups...); err == nil { for _, c := range e.collectors { c(&stats).Collect(ch) } status = 1 } else { log.Error("Couldn't retrieve BIND stats: ", err) } ch <- prometheus.MustNewConstMetric(up, prometheus.GaugeValue, status) }
// nextItem returns the next item from the input. func (l *lexer) nextItem() item { item := <-l.items l.lastPos = item.pos // TODO(fabxc): remove for version 1.0. t := item.typ if t == itemSummary || t == itemDescription || t == itemRunbook { log.Errorf("Token %q is not valid anymore. Alerting rule syntax has changed with version 0.17.0. Please read https://prometheus.io/docs/alerting/rules/.", item) } else if t == itemKeepExtra { log.Error("Token 'keeping_extra' is not valid anymore. Use 'keep_common' instead.") } return item }
// Metrics handles the /api/metrics endpoint. func (api *API) Metrics(w http.ResponseWriter, r *http.Request) { setAccessControlHeaders(w) w.Header().Set("Content-Type", "application/json") metricNames := api.Storage.LabelValuesForLabelName(model.MetricNameLabel) sort.Sort(metricNames) resultBytes, err := json.Marshal(metricNames) if err != nil { log.Error("Error marshalling metric names: ", err) httpJSONError(w, fmt.Errorf("error marshalling metric names: %s", err), http.StatusInternalServerError) return } w.Write(resultBytes) }
// Create a new azureResource object from an ID string. func newAzureResourceFromID(id string) (azureResource, error) { // Resource IDs have the following format. // /subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP/providers/PROVIDER/TYPE/NAME s := strings.Split(id, "/") if len(s) != 9 { err := fmt.Errorf("invalid ID '%s'. Refusing to create azureResource", id) log.Error(err) return azureResource{}, err } return azureResource{ Name: strings.ToLower(s[8]), ResourceGroup: strings.ToLower(s[4]), }, nil }
// setDirty sets the dirty flag in a goroutine-safe way. Once the dirty flag was // set to true with this method, it cannot be set to false again. (If we became // dirty during our runtime, there is no way back. If we were dirty from the // start, a clean-up might make us clean again.) func (p *persistence) setDirty(dirty bool) { if dirty { p.dirtyCounter.Inc() } p.dirtyMtx.Lock() defer p.dirtyMtx.Unlock() if p.becameDirty { return } p.dirty = dirty if dirty { p.becameDirty = true log.Error("The storage is now inconsistent. Restart Prometheus ASAP to initiate recovery.") } }
// Run dispatches notifications continuously. func (n *NotificationHandler) Run() { for reqs := range n.pendingNotifications { if n.alertmanagerURL == "" { log.Warn("No alert manager configured, not dispatching notification") n.notificationDropped.Inc() continue } begin := time.Now() err := n.sendNotifications(reqs) if err != nil { log.Error("Error sending notification: ", err) n.notificationErrors.Inc() } n.notificationLatency.Observe(float64(time.Since(begin) / time.Millisecond)) } close(n.stopped) }
// cycleThroughArchivedFingerprints returns a channel that emits fingerprints // for archived series in a throttled fashion. It continues to cycle through all // archived fingerprints until s.loopStopping is closed. func (s *memorySeriesStorage) cycleThroughArchivedFingerprints() chan model.Fingerprint { archivedFingerprints := make(chan model.Fingerprint) go func() { defer close(archivedFingerprints) for { archivedFPs, err := s.persistence.fingerprintsModifiedBefore( model.Now().Add(-s.dropAfter), ) if err != nil { log.Error("Failed to lookup archived fingerprint ranges: ", err) s.waitForNextFP(0, 1) continue } // Initial wait, also important if there are no FPs yet. if !s.waitForNextFP(len(archivedFPs), 1) { return } begin := time.Now() for _, fp := range archivedFPs { select { case archivedFingerprints <- fp: case <-s.loopStopping: return } // Never speed up maintenance of archived FPs. s.waitForNextFP(len(archivedFPs), 1) } if len(archivedFPs) > 0 { log.Infof( "Completed maintenance sweep through %d archived fingerprints in %v.", len(archivedFPs), time.Since(begin), ) } } }() return archivedFingerprints }
// fingerprintsForLabelPairs returns the set of fingerprints that have the given labels. // This does not work with empty label values. func (s *memorySeriesStorage) fingerprintsForLabelPairs(pairs ...model.LabelPair) map[model.Fingerprint]struct{} { var result map[model.Fingerprint]struct{} for _, pair := range pairs { intersection := map[model.Fingerprint]struct{}{} fps, err := s.persistence.fingerprintsForLabelPair(pair) if err != nil { log.Error("Error getting fingerprints for label pair: ", err) } if len(fps) == 0 { return nil } for _, fp := range fps { if _, ok := result[fp]; ok || result == nil { intersection[fp] = struct{}{} } } if len(intersection) == 0 { return nil } result = intersection } return result }
func SingleThreadedDevReady(h SCSICmdHandler) DevReadyFunc { return func(in chan *SCSICmd, out chan SCSIResponse) error { go func(h SCSICmdHandler, in chan *SCSICmd, out chan SCSIResponse) { // Use io.Copy's trick buf := make([]byte, 32*1024) for { v, ok := <-in if !ok { close(out) return } v.Buf = buf x, err := h.HandleCommand(v) buf = v.Buf if err != nil { log.Error(err) return } out <- x } }(h, in, out) return nil } }
// Main manages the startup and shutdown lifecycle of the entire Prometheus server. func Main() int { if err := parse(os.Args[1:]); err != nil { log.Error(err) return 2 } if cfg.printVersion { fmt.Fprintln(os.Stdout, version.Print("prometheus")) return 0 } log.Infoln("Starting prometheus", version.Info()) log.Infoln("Build context", version.BuildContext()) var reloadables []Reloadable var ( memStorage = local.NewMemorySeriesStorage(&cfg.storage) remoteStorage = remote.New(&cfg.remote) sampleAppender = storage.Fanout{memStorage} ) if remoteStorage != nil { sampleAppender = append(sampleAppender, remoteStorage) reloadables = append(reloadables, remoteStorage) } var ( notifier = notifier.New(&cfg.notifier) targetManager = retrieval.NewTargetManager(sampleAppender) queryEngine = promql.NewEngine(memStorage, &cfg.queryEngine) ) ruleManager := rules.NewManager(&rules.ManagerOptions{ SampleAppender: sampleAppender, Notifier: notifier, QueryEngine: queryEngine, ExternalURL: cfg.web.ExternalURL, }) flags := map[string]string{} cfg.fs.VisitAll(func(f *flag.Flag) { flags[f.Name] = f.Value.String() }) version := &web.PrometheusVersion{ Version: version.Version, Revision: version.Revision, Branch: version.Branch, BuildUser: version.BuildUser, BuildDate: version.BuildDate, GoVersion: version.GoVersion, } webHandler := web.New(memStorage, queryEngine, targetManager, ruleManager, version, flags, &cfg.web) reloadables = append(reloadables, targetManager, ruleManager, webHandler, notifier) if !reloadConfig(cfg.configFile, reloadables...) { return 1 } // Wait for reload or termination signals. Start the handler for SIGHUP as // early as possible, but ignore it until we are ready to handle reloading // our config. hup := make(chan os.Signal) hupReady := make(chan bool) signal.Notify(hup, syscall.SIGHUP) go func() { <-hupReady for { select { case <-hup: case <-webHandler.Reload(): } reloadConfig(cfg.configFile, reloadables...) } }() // Start all components. The order is NOT arbitrary. if err := memStorage.Start(); err != nil { log.Errorln("Error opening memory series storage:", err) return 1 } defer func() { if err := memStorage.Stop(); err != nil { log.Errorln("Error stopping storage:", err) } }() if remoteStorage != nil { prometheus.MustRegister(remoteStorage) go remoteStorage.Run() defer remoteStorage.Stop() } // The storage has to be fully initialized before registering. prometheus.MustRegister(memStorage) prometheus.MustRegister(notifier) prometheus.MustRegister(configSuccess) prometheus.MustRegister(configSuccessTime) // The notifieris a dependency of the rule manager. It has to be // started before and torn down afterwards. go notifier.Run() defer notifier.Stop() go ruleManager.Run() defer ruleManager.Stop() go targetManager.Run() defer targetManager.Stop() // Shutting down the query engine before the rule manager will cause pending queries // to be canceled and ensures a quick shutdown of the rule manager. defer queryEngine.Stop() go webHandler.Run() // Wait for reload or termination signals. close(hupReady) // Unblock SIGHUP handler. term := make(chan os.Signal) signal.Notify(term, os.Interrupt, syscall.SIGTERM) select { case <-term: log.Warn("Received SIGTERM, exiting gracefully...") case <-webHandler.Quit(): log.Warn("Received termination request via web service, exiting gracefully...") case err := <-webHandler.ListenError(): log.Errorln("Error starting web server, exiting gracefully:", err) } log.Info("See you next time!") return 0 }
// dropAndPersistChunks deletes all chunks from a series file whose last sample // time is before beforeTime, and then appends the provided chunks, leaving out // those whose last sample time is before beforeTime. It returns the timestamp // of the first sample in the oldest chunk _not_ dropped, the offset within the // series file of the first chunk persisted (out of the provided chunks), the // number of deleted chunks, and true if all chunks of the series have been // deleted (in which case the returned timestamp will be 0 and must be ignored). // It is the caller's responsibility to make sure nothing is persisted or loaded // for the same fingerprint concurrently. func (p *persistence) dropAndPersistChunks( fp model.Fingerprint, beforeTime model.Time, chunks []chunk, ) ( firstTimeNotDropped model.Time, offset int, numDropped int, allDropped bool, err error, ) { // Style note: With the many return values, it was decided to use naked // returns in this method. They make the method more readable, but // please handle with care! defer func() { if err != nil { log.Error("Error dropping and/or persisting chunks: ", err) p.setDirty(true) } }() if len(chunks) > 0 { // We have chunks to persist. First check if those are already // too old. If that's the case, the chunks in the series file // are all too old, too. i := 0 for ; i < len(chunks) && chunks[i].newIterator().lastTimestamp().Before(beforeTime); i++ { } if i < len(chunks) { firstTimeNotDropped = chunks[i].firstTime() } if i > 0 || firstTimeNotDropped.Before(beforeTime) { // Series file has to go. if numDropped, err = p.deleteSeriesFile(fp); err != nil { return } numDropped += i if i == len(chunks) { allDropped = true return } // Now simply persist what has to be persisted to a new file. _, err = p.persistChunks(fp, chunks[i:]) return } } // If we are here, we have to check the series file itself. f, err := p.openChunkFileForReading(fp) if os.IsNotExist(err) { // No series file. Only need to create new file with chunks to // persist, if there are any. if len(chunks) == 0 { allDropped = true err = nil // Do not report not-exist err. return } offset, err = p.persistChunks(fp, chunks) return } if err != nil { return } defer f.Close() headerBuf := make([]byte, chunkHeaderLen) var firstTimeInFile model.Time // Find the first chunk in the file that should be kept. for ; ; numDropped++ { _, err = f.Seek(offsetForChunkIndex(numDropped), os.SEEK_SET) if err != nil { return } _, err = io.ReadFull(f, headerBuf) if err == io.EOF { // We ran into the end of the file without finding any chunks that should // be kept. Remove the whole file. if numDropped, err = p.deleteSeriesFile(fp); err != nil { return } if len(chunks) == 0 { allDropped = true return } offset, err = p.persistChunks(fp, chunks) return } if err != nil { return } if numDropped == 0 { firstTimeInFile = model.Time( binary.LittleEndian.Uint64(headerBuf[chunkHeaderFirstTimeOffset:]), ) } lastTime := model.Time( binary.LittleEndian.Uint64(headerBuf[chunkHeaderLastTimeOffset:]), ) if !lastTime.Before(beforeTime) { break } } // We've found the first chunk that should be kept. // First check if the shrink ratio is good enough to perform the the // actual drop or leave it for next time if it is not worth the effort. fi, err := f.Stat() if err != nil { return } totalChunks := int(fi.Size())/chunkLenWithHeader + len(chunks) if numDropped == 0 || float64(numDropped)/float64(totalChunks) < p.minShrinkRatio { // Nothing to drop. Just adjust the return values and append the chunks (if any). numDropped = 0 firstTimeNotDropped = firstTimeInFile if len(chunks) > 0 { offset, err = p.persistChunks(fp, chunks) } return } // If we are here, we have to drop some chunks for real. So we need to // record firstTimeNotDropped from the last read header, seek backwards // to the beginning of its header, and start copying everything from // there into a new file. Then append the chunks to the new file. firstTimeNotDropped = model.Time( binary.LittleEndian.Uint64(headerBuf[chunkHeaderFirstTimeOffset:]), ) chunkOps.WithLabelValues(drop).Add(float64(numDropped)) _, err = f.Seek(-chunkHeaderLen, os.SEEK_CUR) if err != nil { return } temp, err := os.OpenFile(p.tempFileNameForFingerprint(fp), os.O_WRONLY|os.O_CREATE, 0640) if err != nil { return } defer func() { p.closeChunkFile(temp) if err == nil { err = os.Rename(p.tempFileNameForFingerprint(fp), p.fileNameForFingerprint(fp)) } }() written, err := io.Copy(temp, f) if err != nil { return } offset = int(written / chunkLenWithHeader) if len(chunks) > 0 { if err = writeChunks(temp, chunks); err != nil { return } } return }
// Main manages the startup and shutdown lifecycle of the entire Prometheus server. func Main() int { if err := parse(os.Args[1:]); err != nil { log.Error(err) return 2 } if cfg.printVersion { fmt.Fprintln(os.Stdout, version.Print("prometheus")) return 0 } log.Infoln("Starting prometheus", version.Info()) log.Infoln("Build context", version.BuildContext()) var ( sampleAppender = storage.Fanout{} reloadables []Reloadable ) var localStorage local.Storage switch cfg.localStorageEngine { case "persisted": localStorage = local.NewMemorySeriesStorage(&cfg.storage) sampleAppender = storage.Fanout{localStorage} case "none": localStorage = &local.NoopStorage{} default: log.Errorf("Invalid local storage engine %q", cfg.localStorageEngine) return 1 } remoteStorage, err := remote.New(&cfg.remote) if err != nil { log.Errorf("Error initializing remote storage: %s", err) return 1 } if remoteStorage != nil { sampleAppender = append(sampleAppender, remoteStorage) reloadables = append(reloadables, remoteStorage) } reloadableRemoteStorage := remote.NewConfigurable() sampleAppender = append(sampleAppender, reloadableRemoteStorage) reloadables = append(reloadables, reloadableRemoteStorage) var ( notifier = notifier.New(&cfg.notifier) targetManager = retrieval.NewTargetManager(sampleAppender) queryEngine = promql.NewEngine(localStorage, &cfg.queryEngine) ctx, cancelCtx = context.WithCancel(context.Background()) ) ruleManager := rules.NewManager(&rules.ManagerOptions{ SampleAppender: sampleAppender, Notifier: notifier, QueryEngine: queryEngine, Context: ctx, ExternalURL: cfg.web.ExternalURL, }) cfg.web.Context = ctx cfg.web.Storage = localStorage cfg.web.QueryEngine = queryEngine cfg.web.TargetManager = targetManager cfg.web.RuleManager = ruleManager cfg.web.Version = &web.PrometheusVersion{ Version: version.Version, Revision: version.Revision, Branch: version.Branch, BuildUser: version.BuildUser, BuildDate: version.BuildDate, GoVersion: version.GoVersion, } cfg.web.Flags = map[string]string{} cfg.fs.VisitAll(func(f *flag.Flag) { cfg.web.Flags[f.Name] = f.Value.String() }) webHandler := web.New(&cfg.web) reloadables = append(reloadables, targetManager, ruleManager, webHandler, notifier) if err := reloadConfig(cfg.configFile, reloadables...); err != nil { log.Errorf("Error loading config: %s", err) return 1 } // Wait for reload or termination signals. Start the handler for SIGHUP as // early as possible, but ignore it until we are ready to handle reloading // our config. hup := make(chan os.Signal) hupReady := make(chan bool) signal.Notify(hup, syscall.SIGHUP) go func() { <-hupReady for { select { case <-hup: if err := reloadConfig(cfg.configFile, reloadables...); err != nil { log.Errorf("Error reloading config: %s", err) } case rc := <-webHandler.Reload(): if err := reloadConfig(cfg.configFile, reloadables...); err != nil { log.Errorf("Error reloading config: %s", err) rc <- err } else { rc <- nil } } } }() // Start all components. The order is NOT arbitrary. if err := localStorage.Start(); err != nil { log.Errorln("Error opening memory series storage:", err) return 1 } defer func() { if err := localStorage.Stop(); err != nil { log.Errorln("Error stopping storage:", err) } }() if remoteStorage != nil { remoteStorage.Start() defer remoteStorage.Stop() } defer reloadableRemoteStorage.Stop() // The storage has to be fully initialized before registering. if instrumentedStorage, ok := localStorage.(prometheus.Collector); ok { prometheus.MustRegister(instrumentedStorage) } prometheus.MustRegister(notifier) prometheus.MustRegister(configSuccess) prometheus.MustRegister(configSuccessTime) // The notifier is a dependency of the rule manager. It has to be // started before and torn down afterwards. go notifier.Run() defer notifier.Stop() go ruleManager.Run() defer ruleManager.Stop() go targetManager.Run() defer targetManager.Stop() // Shutting down the query engine before the rule manager will cause pending queries // to be canceled and ensures a quick shutdown of the rule manager. defer cancelCtx() go webHandler.Run() // Wait for reload or termination signals. close(hupReady) // Unblock SIGHUP handler. term := make(chan os.Signal) signal.Notify(term, os.Interrupt, syscall.SIGTERM) select { case <-term: log.Warn("Received SIGTERM, exiting gracefully...") case <-webHandler.Quit(): log.Warn("Received termination request via web service, exiting gracefully...") case err := <-webHandler.ListenError(): log.Errorln("Error starting web server, exiting gracefully:", err) } log.Info("See you next time!") return 0 }
func (p *persistence) processIndexingQueue() { batchSize := 0 nameToValues := index.LabelNameLabelValuesMapping{} pairToFPs := index.LabelPairFingerprintsMapping{} batchTimeout := time.NewTimer(indexingBatchTimeout) defer batchTimeout.Stop() commitBatch := func() { p.indexingBatchSizes.Observe(float64(batchSize)) defer func(begin time.Time) { p.indexingBatchDuration.Observe(time.Since(begin).Seconds()) }(time.Now()) if err := p.labelPairToFingerprints.IndexBatch(pairToFPs); err != nil { log.Error("Error indexing label pair to fingerprints batch: ", err) } if err := p.labelNameToLabelValues.IndexBatch(nameToValues); err != nil { log.Error("Error indexing label name to label values batch: ", err) } batchSize = 0 nameToValues = index.LabelNameLabelValuesMapping{} pairToFPs = index.LabelPairFingerprintsMapping{} batchTimeout.Reset(indexingBatchTimeout) } var flush chan chan int loop: for { // Only process flush requests if the queue is currently empty. if len(p.indexingQueue) == 0 { flush = p.indexingFlush } else { flush = nil } select { case <-batchTimeout.C: // Only commit if we have something to commit _and_ // nothing is waiting in the queue to be picked up. That // prevents a death spiral if the LookupSet calls below // are slow for some reason. if batchSize > 0 && len(p.indexingQueue) == 0 { commitBatch() } else { batchTimeout.Reset(indexingBatchTimeout) } case r := <-flush: if batchSize > 0 { commitBatch() } r <- len(p.indexingQueue) case op, ok := <-p.indexingQueue: if !ok { if batchSize > 0 { commitBatch() } break loop } batchSize++ for ln, lv := range op.metric { lp := model.LabelPair{Name: ln, Value: lv} baseFPs, ok := pairToFPs[lp] if !ok { var err error baseFPs, _, err = p.labelPairToFingerprints.LookupSet(lp) if err != nil { log.Errorf("Error looking up label pair %v: %s", lp, err) continue } pairToFPs[lp] = baseFPs } baseValues, ok := nameToValues[ln] if !ok { var err error baseValues, _, err = p.labelNameToLabelValues.LookupSet(ln) if err != nil { log.Errorf("Error looking up label name %v: %s", ln, err) continue } nameToValues[ln] = baseValues } switch op.opType { case add: baseFPs[op.fingerprint] = struct{}{} baseValues[lv] = struct{}{} case remove: delete(baseFPs, op.fingerprint) if len(baseFPs) == 0 { delete(baseValues, lv) } default: panic("unknown op type") } } if batchSize >= indexingMaxBatchSize { commitBatch() } } } close(p.indexingStopped) }