func main() { flag.Parse() conf, err := config.LoadFromFile(*configFile) if err != nil { log.Fatalf("Error loading configuration from %s: %v", *configFile, err) } ts, err := metric.NewTieredStorage(5000, 5000, 100, time.Second*30, time.Second*1, time.Second*20, *metricsStoragePath) if err != nil { log.Fatalf("Error opening storage: %s", err) } go ts.Serve() go func() { notifier := make(chan os.Signal) signal.Notify(notifier, os.Interrupt) <-notifier ts.Close() os.Exit(0) }() // Queue depth will need to be exposed scrapeResults := make(chan format.Result, *scrapeResultsQueueCapacity) targetManager := retrieval.NewTargetManager(scrapeResults, *concurrentRetrievalAllowance) targetManager.AddTargetsFromConfig(conf) ruleResults := make(chan *rules.Result, *ruleResultsQueueCapacity) ast.SetStorage(ts) ruleManager := rules.NewRuleManager(ruleResults, conf.Global.EvaluationInterval) err = ruleManager.AddRulesFromConfig(conf) if err != nil { log.Fatalf("Error loading rule files: %v", err) } appState := &appstate.ApplicationState{ Config: conf, RuleManager: ruleManager, Storage: ts, TargetManager: targetManager, } web.StartServing(appState) for { select { case scrapeResult := <-scrapeResults: if scrapeResult.Err == nil { ts.AppendSample(scrapeResult.Sample) } case ruleResult := <-ruleResults: for _, sample := range ruleResult.Samples { ts.AppendSample(sample) } } } }
// Main manages the startup and shutdown lifecycle of the entire Prometheus server. func Main() int { if err := parse(os.Args[1:]); err != nil { return 2 } printVersion() if cfg.printVersion { return 0 } var reloadables []Reloadable var ( memStorage = local.NewMemorySeriesStorage(&cfg.storage) remoteStorage = remote.New(&cfg.remote) sampleAppender = storage.Fanout{memStorage} ) if remoteStorage != nil { sampleAppender = append(sampleAppender, remoteStorage) reloadables = append(reloadables, remoteStorage) } var ( notificationHandler = notification.New(&cfg.notification) targetManager = retrieval.NewTargetManager(sampleAppender) queryEngine = promql.NewEngine(memStorage, &cfg.queryEngine) ) ruleManager := rules.NewManager(&rules.ManagerOptions{ SampleAppender: sampleAppender, NotificationHandler: notificationHandler, QueryEngine: queryEngine, ExternalURL: cfg.web.ExternalURL, }) flags := map[string]string{} cfg.fs.VisitAll(func(f *flag.Flag) { flags[f.Name] = f.Value.String() }) status := &web.PrometheusStatus{ TargetPools: targetManager.Pools, Rules: ruleManager.Rules, Flags: flags, Birth: time.Now(), } webHandler := web.New(memStorage, queryEngine, ruleManager, status, &cfg.web) reloadables = append(reloadables, status, targetManager, ruleManager, webHandler, notificationHandler) if !reloadConfig(cfg.configFile, reloadables...) { return 1 } // Wait for reload or termination signals. Start the handler for SIGHUP as // early as possible, but ignore it until we are ready to handle reloading // our config. hup := make(chan os.Signal) hupReady := make(chan bool) signal.Notify(hup, syscall.SIGHUP) go func() { <-hupReady for { select { case <-hup: case <-webHandler.Reload(): } reloadConfig(cfg.configFile, reloadables...) } }() // Start all components. if err := memStorage.Start(); err != nil { log.Errorln("Error opening memory series storage:", err) return 1 } defer func() { if err := memStorage.Stop(); err != nil { log.Errorln("Error stopping storage:", err) } }() if remoteStorage != nil { prometheus.MustRegister(remoteStorage) go remoteStorage.Run() defer remoteStorage.Stop() } // The storage has to be fully initialized before registering. prometheus.MustRegister(memStorage) prometheus.MustRegister(notificationHandler) prometheus.MustRegister(configSuccess) prometheus.MustRegister(configSuccessTime) defer ruleManager.Stop() go notificationHandler.Run() defer notificationHandler.Stop() go targetManager.Run() defer targetManager.Stop() defer queryEngine.Stop() go webHandler.Run() // Wait for reload or termination signals. close(hupReady) // Unblock SIGHUP handler. term := make(chan os.Signal) signal.Notify(term, os.Interrupt, syscall.SIGTERM) select { case <-term: log.Warn("Received SIGTERM, exiting gracefully...") case <-webHandler.Quit(): log.Warn("Received termination request via web service, exiting gracefully...") case err := <-webHandler.ListenError(): log.Errorln("Error starting web server, exiting gracefully:", err) } log.Info("See you next time!") return 0 }
func Main() int { if err := parse(os.Args[1:]); err != nil { return 2 } versionInfoTmpl.Execute(os.Stdout, BuildInfo) if cfg.printVersion { return 0 } memStorage := local.NewMemorySeriesStorage(&cfg.storage) var ( sampleAppender storage.SampleAppender remoteStorageQueues []*remote.StorageQueueManager ) if cfg.opentsdbURL == "" && cfg.influxdbURL == "" { log.Warnf("No remote storage URLs provided; not sending any samples to long-term storage") sampleAppender = memStorage } else { fanout := storage.Fanout{memStorage} addRemoteStorage := func(c remote.StorageClient) { qm := remote.NewStorageQueueManager(c, 100*1024) fanout = append(fanout, qm) remoteStorageQueues = append(remoteStorageQueues, qm) } if cfg.opentsdbURL != "" { addRemoteStorage(opentsdb.NewClient(cfg.opentsdbURL, cfg.remoteStorageTimeout)) } if cfg.influxdbURL != "" { addRemoteStorage(influxdb.NewClient(cfg.influxdbURL, cfg.remoteStorageTimeout, cfg.influxdbDatabase, cfg.influxdbRetentionPolicy)) } sampleAppender = fanout } var ( notificationHandler = notification.NewNotificationHandler(&cfg.notification) targetManager = retrieval.NewTargetManager(sampleAppender) queryEngine = promql.NewEngine(memStorage, &cfg.queryEngine) ) ruleManager := rules.NewManager(&rules.ManagerOptions{ SampleAppender: sampleAppender, NotificationHandler: notificationHandler, QueryEngine: queryEngine, PrometheusURL: cfg.prometheusURL, PathPrefix: cfg.web.PathPrefix, }) flags := map[string]string{} cfg.fs.VisitAll(func(f *flag.Flag) { flags[f.Name] = f.Value.String() }) status := &web.PrometheusStatus{ BuildInfo: BuildInfo, TargetPools: targetManager.Pools, Rules: ruleManager.Rules, Flags: flags, Birth: time.Now(), } webHandler := web.New(memStorage, queryEngine, ruleManager, status, &cfg.web) if !reloadConfig(cfg.configFile, status, targetManager, ruleManager) { os.Exit(1) } // Wait for reload or termination signals. Start the handler for SIGHUP as // early as possible, but ignore it until we are ready to handle reloading // our config. hup := make(chan os.Signal) hupReady := make(chan bool) signal.Notify(hup, syscall.SIGHUP) go func() { <-hupReady for range hup { reloadConfig(cfg.configFile, status, targetManager, ruleManager) } }() // Start all components. if err := memStorage.Start(); err != nil { log.Errorln("Error opening memory series storage:", err) return 1 } defer func() { if err := memStorage.Stop(); err != nil { log.Errorln("Error stopping storage:", err) } }() // The storage has to be fully initialized before registering. registry.MustRegister(memStorage) registry.MustRegister(notificationHandler) for _, q := range remoteStorageQueues { registry.MustRegister(q) go q.Run() defer q.Stop() } go ruleManager.Run() defer ruleManager.Stop() go notificationHandler.Run() defer notificationHandler.Stop() go targetManager.Run() defer targetManager.Stop() defer queryEngine.Stop() go webHandler.Run() // Wait for reload or termination signals. close(hupReady) // Unblock SIGHUP handler. term := make(chan os.Signal) signal.Notify(term, os.Interrupt, syscall.SIGTERM) select { case <-term: log.Warn("Received SIGTERM, exiting gracefully...") case <-webHandler.Quit(): log.Warn("Received termination request via web service, exiting gracefully...") } close(hup) log.Info("See you next time!") return 0 }
// Main manages the startup and shutdown lifecycle of the entire Prometheus server. func Main() int { if err := parse(os.Args[1:]); err != nil { log.Error(err) return 2 } if cfg.printVersion { fmt.Fprintln(os.Stdout, version.Print("prometheus")) return 0 } log.Infoln("Starting prometheus", version.Info()) log.Infoln("Build context", version.BuildContext()) var reloadables []Reloadable var ( memStorage = local.NewMemorySeriesStorage(&cfg.storage) remoteStorage = remote.New(&cfg.remote) sampleAppender = storage.Fanout{memStorage} ) if remoteStorage != nil { sampleAppender = append(sampleAppender, remoteStorage) reloadables = append(reloadables, remoteStorage) } var ( notifier = notifier.New(&cfg.notifier) targetManager = retrieval.NewTargetManager(sampleAppender) queryEngine = promql.NewEngine(memStorage, &cfg.queryEngine) ) ruleManager := rules.NewManager(&rules.ManagerOptions{ SampleAppender: sampleAppender, Notifier: notifier, QueryEngine: queryEngine, ExternalURL: cfg.web.ExternalURL, }) flags := map[string]string{} cfg.fs.VisitAll(func(f *flag.Flag) { flags[f.Name] = f.Value.String() }) version := &web.PrometheusVersion{ Version: version.Version, Revision: version.Revision, Branch: version.Branch, BuildUser: version.BuildUser, BuildDate: version.BuildDate, GoVersion: version.GoVersion, } webHandler := web.New(memStorage, queryEngine, targetManager, ruleManager, version, flags, &cfg.web) reloadables = append(reloadables, targetManager, ruleManager, webHandler, notifier) if !reloadConfig(cfg.configFile, reloadables...) { return 1 } // Wait for reload or termination signals. Start the handler for SIGHUP as // early as possible, but ignore it until we are ready to handle reloading // our config. hup := make(chan os.Signal) hupReady := make(chan bool) signal.Notify(hup, syscall.SIGHUP) go func() { <-hupReady for { select { case <-hup: case <-webHandler.Reload(): } reloadConfig(cfg.configFile, reloadables...) } }() // Start all components. The order is NOT arbitrary. if err := memStorage.Start(); err != nil { log.Errorln("Error opening memory series storage:", err) return 1 } defer func() { if err := memStorage.Stop(); err != nil { log.Errorln("Error stopping storage:", err) } }() if remoteStorage != nil { prometheus.MustRegister(remoteStorage) go remoteStorage.Run() defer remoteStorage.Stop() } // The storage has to be fully initialized before registering. prometheus.MustRegister(memStorage) prometheus.MustRegister(notifier) prometheus.MustRegister(configSuccess) prometheus.MustRegister(configSuccessTime) // The notifieris a dependency of the rule manager. It has to be // started before and torn down afterwards. go notifier.Run() defer notifier.Stop() go ruleManager.Run() defer ruleManager.Stop() go targetManager.Run() defer targetManager.Stop() // Shutting down the query engine before the rule manager will cause pending queries // to be canceled and ensures a quick shutdown of the rule manager. defer queryEngine.Stop() go webHandler.Run() // Wait for reload or termination signals. close(hupReady) // Unblock SIGHUP handler. term := make(chan os.Signal) signal.Notify(term, os.Interrupt, syscall.SIGTERM) select { case <-term: log.Warn("Received SIGTERM, exiting gracefully...") case <-webHandler.Quit(): log.Warn("Received termination request via web service, exiting gracefully...") case err := <-webHandler.ListenError(): log.Errorln("Error starting web server, exiting gracefully:", err) } log.Info("See you next time!") return 0 }
// Main manages the startup and shutdown lifecycle of the entire Prometheus server. func Main() int { if err := parse(os.Args[1:]); err != nil { log.Error(err) return 2 } if cfg.printVersion { fmt.Fprintln(os.Stdout, version.Print("prometheus")) return 0 } log.Infoln("Starting prometheus", version.Info()) log.Infoln("Build context", version.BuildContext()) var ( sampleAppender = storage.Fanout{} reloadables []Reloadable ) var localStorage local.Storage switch cfg.localStorageEngine { case "persisted": localStorage = local.NewMemorySeriesStorage(&cfg.storage) sampleAppender = storage.Fanout{localStorage} case "none": localStorage = &local.NoopStorage{} default: log.Errorf("Invalid local storage engine %q", cfg.localStorageEngine) return 1 } remoteStorage, err := remote.New(&cfg.remote) if err != nil { log.Errorf("Error initializing remote storage: %s", err) return 1 } if remoteStorage != nil { sampleAppender = append(sampleAppender, remoteStorage) reloadables = append(reloadables, remoteStorage) } reloadableRemoteStorage := remote.NewConfigurable() sampleAppender = append(sampleAppender, reloadableRemoteStorage) reloadables = append(reloadables, reloadableRemoteStorage) var ( notifier = notifier.New(&cfg.notifier) targetManager = retrieval.NewTargetManager(sampleAppender) queryEngine = promql.NewEngine(localStorage, &cfg.queryEngine) ctx, cancelCtx = context.WithCancel(context.Background()) ) ruleManager := rules.NewManager(&rules.ManagerOptions{ SampleAppender: sampleAppender, Notifier: notifier, QueryEngine: queryEngine, Context: ctx, ExternalURL: cfg.web.ExternalURL, }) cfg.web.Context = ctx cfg.web.Storage = localStorage cfg.web.QueryEngine = queryEngine cfg.web.TargetManager = targetManager cfg.web.RuleManager = ruleManager cfg.web.Version = &web.PrometheusVersion{ Version: version.Version, Revision: version.Revision, Branch: version.Branch, BuildUser: version.BuildUser, BuildDate: version.BuildDate, GoVersion: version.GoVersion, } cfg.web.Flags = map[string]string{} cfg.fs.VisitAll(func(f *flag.Flag) { cfg.web.Flags[f.Name] = f.Value.String() }) webHandler := web.New(&cfg.web) reloadables = append(reloadables, targetManager, ruleManager, webHandler, notifier) if err := reloadConfig(cfg.configFile, reloadables...); err != nil { log.Errorf("Error loading config: %s", err) return 1 } // Wait for reload or termination signals. Start the handler for SIGHUP as // early as possible, but ignore it until we are ready to handle reloading // our config. hup := make(chan os.Signal) hupReady := make(chan bool) signal.Notify(hup, syscall.SIGHUP) go func() { <-hupReady for { select { case <-hup: if err := reloadConfig(cfg.configFile, reloadables...); err != nil { log.Errorf("Error reloading config: %s", err) } case rc := <-webHandler.Reload(): if err := reloadConfig(cfg.configFile, reloadables...); err != nil { log.Errorf("Error reloading config: %s", err) rc <- err } else { rc <- nil } } } }() // Start all components. The order is NOT arbitrary. if err := localStorage.Start(); err != nil { log.Errorln("Error opening memory series storage:", err) return 1 } defer func() { if err := localStorage.Stop(); err != nil { log.Errorln("Error stopping storage:", err) } }() if remoteStorage != nil { remoteStorage.Start() defer remoteStorage.Stop() } defer reloadableRemoteStorage.Stop() // The storage has to be fully initialized before registering. if instrumentedStorage, ok := localStorage.(prometheus.Collector); ok { prometheus.MustRegister(instrumentedStorage) } prometheus.MustRegister(notifier) prometheus.MustRegister(configSuccess) prometheus.MustRegister(configSuccessTime) // The notifier is a dependency of the rule manager. It has to be // started before and torn down afterwards. go notifier.Run() defer notifier.Stop() go ruleManager.Run() defer ruleManager.Stop() go targetManager.Run() defer targetManager.Stop() // Shutting down the query engine before the rule manager will cause pending queries // to be canceled and ensures a quick shutdown of the rule manager. defer cancelCtx() go webHandler.Run() // Wait for reload or termination signals. close(hupReady) // Unblock SIGHUP handler. term := make(chan os.Signal) signal.Notify(term, os.Interrupt, syscall.SIGTERM) select { case <-term: log.Warn("Received SIGTERM, exiting gracefully...") case <-webHandler.Quit(): log.Warn("Received termination request via web service, exiting gracefully...") case err := <-webHandler.ListenError(): log.Errorln("Error starting web server, exiting gracefully:", err) } log.Info("See you next time!") return 0 }
// NewPrometheus creates a new prometheus object based on flag values. // Call Serve() to start serving and Close() for clean shutdown. func NewPrometheus() *prometheus { conf, err := config.LoadFromFile(*configFile) if err != nil { glog.Fatalf("Error loading configuration from %s: %v", *configFile, err) } unwrittenSamples := make(chan clientmodel.Samples, *samplesQueueCapacity) ingester := &retrieval.MergeLabelsIngester{ Labels: conf.GlobalLabels(), CollisionPrefix: clientmodel.ExporterLabelPrefix, Ingester: retrieval.ChannelIngester(unwrittenSamples), } targetManager := retrieval.NewTargetManager(ingester) targetManager.AddTargetsFromConfig(conf) notificationHandler := notification.NewNotificationHandler(*alertmanagerURL, *notificationQueueCapacity) o := &local.MemorySeriesStorageOptions{ MemoryChunks: *numMemoryChunks, PersistenceStoragePath: *metricsStoragePath, PersistenceRetentionPeriod: *storageRetentionPeriod, CheckpointInterval: *checkpointInterval, CheckpointDirtySeriesLimit: *checkpointDirtySeriesLimit, Dirty: *storageDirty, } memStorage, err := local.NewMemorySeriesStorage(o) if err != nil { glog.Fatal("Error opening memory series storage: ", err) } ruleManager := manager.NewRuleManager(&manager.RuleManagerOptions{ Results: unwrittenSamples, NotificationHandler: notificationHandler, EvaluationInterval: conf.EvaluationInterval(), Storage: memStorage, PrometheusURL: web.MustBuildServerURL(), }) if err := ruleManager.AddRulesFromConfig(conf); err != nil { glog.Fatal("Error loading rule files: ", err) } var remoteTSDBQueue *remote.TSDBQueueManager if *remoteTSDBUrl == "" { glog.Warningf("No TSDB URL provided; not sending any samples to long-term storage") } else { openTSDB := opentsdb.NewClient(*remoteTSDBUrl, *remoteTSDBTimeout) remoteTSDBQueue = remote.NewTSDBQueueManager(openTSDB, 512) } flags := map[string]string{} flag.VisitAll(func(f *flag.Flag) { flags[f.Name] = f.Value.String() }) prometheusStatus := &web.PrometheusStatusHandler{ BuildInfo: BuildInfo, Config: conf.String(), RuleManager: ruleManager, TargetPools: targetManager.Pools(), Flags: flags, Birth: time.Now(), } alertsHandler := &web.AlertsHandler{ RuleManager: ruleManager, } consolesHandler := &web.ConsolesHandler{ Storage: memStorage, } metricsService := &api.MetricsService{ Config: &conf, TargetManager: targetManager, Storage: memStorage, } webService := &web.WebService{ StatusHandler: prometheusStatus, MetricsHandler: metricsService, ConsolesHandler: consolesHandler, AlertsHandler: alertsHandler, } p := &prometheus{ unwrittenSamples: unwrittenSamples, ruleManager: ruleManager, targetManager: targetManager, notificationHandler: notificationHandler, storage: memStorage, remoteTSDBQueue: remoteTSDBQueue, webService: webService, } webService.QuitDelegate = p.Close return p }
func main() { // TODO(all): Future additions to main should be, where applicable, glumped // into the prometheus struct above---at least where the scoping of the entire // server is concerned. flag.Parse() versionInfoTmpl.Execute(os.Stdout, BuildInfo) if *printVersion { os.Exit(0) } conf, err := config.LoadFromFile(*configFile) if err != nil { glog.Fatalf("Error loading configuration from %s: %v", *configFile, err) } ts, err := tiered.NewTieredStorage(uint(*diskAppendQueueCapacity), 100, *arenaFlushInterval, *arenaTTL, *metricsStoragePath) if err != nil { glog.Fatal("Error opening storage: ", err) } var remoteTSDBQueue *remote.TSDBQueueManager = nil if *remoteTSDBUrl == "" { glog.Warningf("No TSDB URL provided; not sending any samples to long-term storage") } else { openTSDB := opentsdb.NewClient(*remoteTSDBUrl, *remoteTSDBTimeout) remoteTSDBQueue = remote.NewTSDBQueueManager(openTSDB, 512) go remoteTSDBQueue.Run() } unwrittenSamples := make(chan *extraction.Result, *samplesQueueCapacity) ingester := &retrieval.MergeLabelsIngester{ Labels: conf.GlobalLabels(), CollisionPrefix: clientmodel.ExporterLabelPrefix, Ingester: retrieval.ChannelIngester(unwrittenSamples), } compactionTimer := time.NewTicker(*compactInterval) deletionTimer := time.NewTicker(*deleteInterval) // Queue depth will need to be exposed targetManager := retrieval.NewTargetManager(ingester, *concurrentRetrievalAllowance) targetManager.AddTargetsFromConfig(conf) notifications := make(chan notification.NotificationReqs, *notificationQueueCapacity) // Queue depth will need to be exposed ruleManager := rules.NewRuleManager(&rules.RuleManagerOptions{ Results: unwrittenSamples, Notifications: notifications, EvaluationInterval: conf.EvaluationInterval(), Storage: ts, PrometheusUrl: web.MustBuildServerUrl(), }) if err := ruleManager.AddRulesFromConfig(conf); err != nil { glog.Fatal("Error loading rule files: ", err) } go ruleManager.Run() notificationHandler := notification.NewNotificationHandler(*alertmanagerUrl, notifications) go notificationHandler.Run() flags := map[string]string{} flag.VisitAll(func(f *flag.Flag) { flags[f.Name] = f.Value.String() }) prometheusStatus := &web.PrometheusStatusHandler{ BuildInfo: BuildInfo, Config: conf.String(), RuleManager: ruleManager, TargetPools: targetManager.Pools(), Flags: flags, Birth: time.Now(), } alertsHandler := &web.AlertsHandler{ RuleManager: ruleManager, } databasesHandler := &web.DatabasesHandler{ Provider: ts.DiskStorage, RefreshInterval: 5 * time.Minute, } metricsService := &api.MetricsService{ Config: &conf, TargetManager: targetManager, Storage: ts, } prometheus := &prometheus{ compactionTimer: compactionTimer, deletionTimer: deletionTimer, curationState: prometheusStatus, curationSema: make(chan struct{}, 1), unwrittenSamples: unwrittenSamples, stopBackgroundOperations: make(chan struct{}), ruleManager: ruleManager, targetManager: targetManager, notifications: notifications, storage: ts, remoteTSDBQueue: remoteTSDBQueue, } defer prometheus.Close() webService := &web.WebService{ StatusHandler: prometheusStatus, MetricsHandler: metricsService, DatabasesHandler: databasesHandler, AlertsHandler: alertsHandler, QuitDelegate: prometheus.Close, } prometheus.curationSema <- struct{}{} storageStarted := make(chan bool) go ts.Serve(storageStarted) <-storageStarted go prometheus.interruptHandler() go func() { for _ = range prometheus.compactionTimer.C { glog.Info("Starting compaction...") err := prometheus.compact(*compactAgeInclusiveness, *compactGroupSize) if err != nil { glog.Error("could not compact: ", err) } glog.Info("Done") } }() go func() { for _ = range prometheus.deletionTimer.C { glog.Info("Starting deletion of stale values...") err := prometheus.delete(*deleteAge, deletionBatchSize) if err != nil { glog.Error("could not delete: ", err) } glog.Info("Done") } }() go func() { err := webService.ServeForever() if err != nil { glog.Fatal(err) } }() // TODO(all): Migrate this into prometheus.serve(). for block := range unwrittenSamples { if block.Err == nil && len(block.Samples) > 0 { ts.AppendSamples(block.Samples) if remoteTSDBQueue != nil { remoteTSDBQueue.Queue(block.Samples) } } } }