func Main() int { if err := parse(os.Args[1:]); err != nil { return 2 } versionInfoTmpl.Execute(os.Stdout, BuildInfo) if cfg.printVersion { return 0 } memStorage := local.NewMemorySeriesStorage(&cfg.storage) var ( sampleAppender storage.SampleAppender remoteStorageQueues []*remote.StorageQueueManager ) if cfg.opentsdbURL == "" && cfg.influxdbURL == "" { log.Warnf("No remote storage URLs provided; not sending any samples to long-term storage") sampleAppender = memStorage } else { fanout := storage.Fanout{memStorage} addRemoteStorage := func(c remote.StorageClient) { qm := remote.NewStorageQueueManager(c, 100*1024) fanout = append(fanout, qm) remoteStorageQueues = append(remoteStorageQueues, qm) } if cfg.opentsdbURL != "" { addRemoteStorage(opentsdb.NewClient(cfg.opentsdbURL, cfg.remoteStorageTimeout)) } if cfg.influxdbURL != "" { addRemoteStorage(influxdb.NewClient(cfg.influxdbURL, cfg.remoteStorageTimeout, cfg.influxdbDatabase, cfg.influxdbRetentionPolicy)) } sampleAppender = fanout } var ( notificationHandler = notification.NewNotificationHandler(&cfg.notification) targetManager = retrieval.NewTargetManager(sampleAppender) queryEngine = promql.NewEngine(memStorage, &cfg.queryEngine) ) ruleManager := rules.NewManager(&rules.ManagerOptions{ SampleAppender: sampleAppender, NotificationHandler: notificationHandler, QueryEngine: queryEngine, PrometheusURL: cfg.prometheusURL, PathPrefix: cfg.web.PathPrefix, }) flags := map[string]string{} cfg.fs.VisitAll(func(f *flag.Flag) { flags[f.Name] = f.Value.String() }) status := &web.PrometheusStatus{ BuildInfo: BuildInfo, TargetPools: targetManager.Pools, Rules: ruleManager.Rules, Flags: flags, Birth: time.Now(), } webHandler := web.New(memStorage, queryEngine, ruleManager, status, &cfg.web) if !reloadConfig(cfg.configFile, status, targetManager, ruleManager) { os.Exit(1) } // Wait for reload or termination signals. Start the handler for SIGHUP as // early as possible, but ignore it until we are ready to handle reloading // our config. hup := make(chan os.Signal) hupReady := make(chan bool) signal.Notify(hup, syscall.SIGHUP) go func() { <-hupReady for range hup { reloadConfig(cfg.configFile, status, targetManager, ruleManager) } }() // Start all components. if err := memStorage.Start(); err != nil { log.Errorln("Error opening memory series storage:", err) return 1 } defer func() { if err := memStorage.Stop(); err != nil { log.Errorln("Error stopping storage:", err) } }() // The storage has to be fully initialized before registering. registry.MustRegister(memStorage) registry.MustRegister(notificationHandler) for _, q := range remoteStorageQueues { registry.MustRegister(q) go q.Run() defer q.Stop() } go ruleManager.Run() defer ruleManager.Stop() go notificationHandler.Run() defer notificationHandler.Stop() go targetManager.Run() defer targetManager.Stop() defer queryEngine.Stop() go webHandler.Run() // Wait for reload or termination signals. close(hupReady) // Unblock SIGHUP handler. term := make(chan os.Signal) signal.Notify(term, os.Interrupt, syscall.SIGTERM) select { case <-term: log.Warn("Received SIGTERM, exiting gracefully...") case <-webHandler.Quit(): log.Warn("Received termination request via web service, exiting gracefully...") } close(hup) log.Info("See you next time!") return 0 }
// NewPrometheus creates a new prometheus object based on flag values. // Call Serve() to start serving and Close() for clean shutdown. func NewPrometheus() *prometheus { conf, err := config.LoadFromFile(*configFile) if err != nil { glog.Fatalf("Error loading configuration from %s: %v", *configFile, err) } unwrittenSamples := make(chan clientmodel.Samples, *samplesQueueCapacity) ingester := &retrieval.MergeLabelsIngester{ Labels: conf.GlobalLabels(), CollisionPrefix: clientmodel.ExporterLabelPrefix, Ingester: retrieval.ChannelIngester(unwrittenSamples), } targetManager := retrieval.NewTargetManager(ingester) targetManager.AddTargetsFromConfig(conf) notificationHandler := notification.NewNotificationHandler(*alertmanagerURL, *notificationQueueCapacity) o := &local.MemorySeriesStorageOptions{ MemoryChunks: *numMemoryChunks, PersistenceStoragePath: *metricsStoragePath, PersistenceRetentionPeriod: *storageRetentionPeriod, CheckpointInterval: *checkpointInterval, CheckpointDirtySeriesLimit: *checkpointDirtySeriesLimit, Dirty: *storageDirty, } memStorage, err := local.NewMemorySeriesStorage(o) if err != nil { glog.Fatal("Error opening memory series storage: ", err) } ruleManager := manager.NewRuleManager(&manager.RuleManagerOptions{ Results: unwrittenSamples, NotificationHandler: notificationHandler, EvaluationInterval: conf.EvaluationInterval(), Storage: memStorage, PrometheusURL: web.MustBuildServerURL(), }) if err := ruleManager.AddRulesFromConfig(conf); err != nil { glog.Fatal("Error loading rule files: ", err) } var remoteTSDBQueue *remote.TSDBQueueManager if *remoteTSDBUrl == "" { glog.Warningf("No TSDB URL provided; not sending any samples to long-term storage") } else { openTSDB := opentsdb.NewClient(*remoteTSDBUrl, *remoteTSDBTimeout) remoteTSDBQueue = remote.NewTSDBQueueManager(openTSDB, 512) } flags := map[string]string{} flag.VisitAll(func(f *flag.Flag) { flags[f.Name] = f.Value.String() }) prometheusStatus := &web.PrometheusStatusHandler{ BuildInfo: BuildInfo, Config: conf.String(), RuleManager: ruleManager, TargetPools: targetManager.Pools(), Flags: flags, Birth: time.Now(), } alertsHandler := &web.AlertsHandler{ RuleManager: ruleManager, } consolesHandler := &web.ConsolesHandler{ Storage: memStorage, } metricsService := &api.MetricsService{ Config: &conf, TargetManager: targetManager, Storage: memStorage, } webService := &web.WebService{ StatusHandler: prometheusStatus, MetricsHandler: metricsService, ConsolesHandler: consolesHandler, AlertsHandler: alertsHandler, } p := &prometheus{ unwrittenSamples: unwrittenSamples, ruleManager: ruleManager, targetManager: targetManager, notificationHandler: notificationHandler, storage: memStorage, remoteTSDBQueue: remoteTSDBQueue, webService: webService, } webService.QuitDelegate = p.Close return p }
// Main manages the startup and shutdown lifecycle of the entire Prometheus server. func Main() int { if err := parse(os.Args[1:]); err != nil { return 2 } printVersion() if cfg.printVersion { return 0 } var reloadables []Reloadable var ( memStorage = local.NewMemorySeriesStorage(&cfg.storage) remoteStorage = remote.New(&cfg.remote) sampleAppender = storage.Fanout{memStorage} ) if remoteStorage != nil { sampleAppender = append(sampleAppender, remoteStorage) reloadables = append(reloadables, remoteStorage) } var ( notificationHandler = notification.NewNotificationHandler(&cfg.notification) targetManager = retrieval.NewTargetManager(sampleAppender) queryEngine = promql.NewEngine(memStorage, &cfg.queryEngine) ) ruleManager := rules.NewManager(&rules.ManagerOptions{ SampleAppender: sampleAppender, NotificationHandler: notificationHandler, QueryEngine: queryEngine, ExternalURL: cfg.web.ExternalURL, }) flags := map[string]string{} cfg.fs.VisitAll(func(f *flag.Flag) { flags[f.Name] = f.Value.String() }) status := &web.PrometheusStatus{ TargetPools: targetManager.Pools, Rules: ruleManager.Rules, Flags: flags, Birth: time.Now(), } webHandler := web.New(memStorage, queryEngine, ruleManager, status, &cfg.web) reloadables = append(reloadables, status, targetManager, ruleManager, webHandler, notificationHandler) if !reloadConfig(cfg.configFile, reloadables...) { return 1 } // Wait for reload or termination signals. Start the handler for SIGHUP as // early as possible, but ignore it until we are ready to handle reloading // our config. hup := make(chan os.Signal) hupReady := make(chan bool) signal.Notify(hup, syscall.SIGHUP) go func() { <-hupReady for { select { case <-hup: case <-webHandler.Reload(): } reloadConfig(cfg.configFile, reloadables...) } }() // Start all components. if err := memStorage.Start(); err != nil { log.Errorln("Error opening memory series storage:", err) return 1 } defer func() { if err := memStorage.Stop(); err != nil { log.Errorln("Error stopping storage:", err) } }() if remoteStorage != nil { prometheus.MustRegister(remoteStorage) go remoteStorage.Run() defer remoteStorage.Stop() } // The storage has to be fully initialized before registering. prometheus.MustRegister(memStorage) prometheus.MustRegister(notificationHandler) prometheus.MustRegister(configSuccess) prometheus.MustRegister(configSuccessTime) go ruleManager.Run() defer ruleManager.Stop() go notificationHandler.Run() defer notificationHandler.Stop() go targetManager.Run() defer targetManager.Stop() defer queryEngine.Stop() go webHandler.Run() // Wait for reload or termination signals. close(hupReady) // Unblock SIGHUP handler. term := make(chan os.Signal) signal.Notify(term, os.Interrupt, syscall.SIGTERM) select { case <-term: log.Warn("Received SIGTERM, exiting gracefully...") case <-webHandler.Quit(): log.Warn("Received termination request via web service, exiting gracefully...") case err := <-webHandler.ListenError(): log.Errorln("Error starting web server, exiting gracefully:", err) } log.Info("See you next time!") return 0 }
func main() { // TODO(all): Future additions to main should be, where applicable, glumped // into the prometheus struct above---at least where the scoping of the entire // server is concerned. flag.Parse() versionInfoTmpl.Execute(os.Stdout, BuildInfo) if *printVersion { os.Exit(0) } conf, err := config.LoadFromFile(*configFile) if err != nil { glog.Fatalf("Error loading configuration from %s: %v", *configFile, err) } ts, err := tiered.NewTieredStorage(uint(*diskAppendQueueCapacity), 100, *arenaFlushInterval, *arenaTTL, *metricsStoragePath) if err != nil { glog.Fatal("Error opening storage: ", err) } var remoteTSDBQueue *remote.TSDBQueueManager = nil if *remoteTSDBUrl == "" { glog.Warningf("No TSDB URL provided; not sending any samples to long-term storage") } else { openTSDB := opentsdb.NewClient(*remoteTSDBUrl, *remoteTSDBTimeout) remoteTSDBQueue = remote.NewTSDBQueueManager(openTSDB, 512) go remoteTSDBQueue.Run() } unwrittenSamples := make(chan *extraction.Result, *samplesQueueCapacity) ingester := &retrieval.MergeLabelsIngester{ Labels: conf.GlobalLabels(), CollisionPrefix: clientmodel.ExporterLabelPrefix, Ingester: retrieval.ChannelIngester(unwrittenSamples), } compactionTimer := time.NewTicker(*compactInterval) deletionTimer := time.NewTicker(*deleteInterval) // Queue depth will need to be exposed targetManager := retrieval.NewTargetManager(ingester, *concurrentRetrievalAllowance) targetManager.AddTargetsFromConfig(conf) notifications := make(chan notification.NotificationReqs, *notificationQueueCapacity) // Queue depth will need to be exposed ruleManager := rules.NewRuleManager(&rules.RuleManagerOptions{ Results: unwrittenSamples, Notifications: notifications, EvaluationInterval: conf.EvaluationInterval(), Storage: ts, PrometheusUrl: web.MustBuildServerUrl(), }) if err := ruleManager.AddRulesFromConfig(conf); err != nil { glog.Fatal("Error loading rule files: ", err) } go ruleManager.Run() notificationHandler := notification.NewNotificationHandler(*alertmanagerUrl, notifications) go notificationHandler.Run() flags := map[string]string{} flag.VisitAll(func(f *flag.Flag) { flags[f.Name] = f.Value.String() }) prometheusStatus := &web.PrometheusStatusHandler{ BuildInfo: BuildInfo, Config: conf.String(), RuleManager: ruleManager, TargetPools: targetManager.Pools(), Flags: flags, Birth: time.Now(), } alertsHandler := &web.AlertsHandler{ RuleManager: ruleManager, } databasesHandler := &web.DatabasesHandler{ Provider: ts.DiskStorage, RefreshInterval: 5 * time.Minute, } metricsService := &api.MetricsService{ Config: &conf, TargetManager: targetManager, Storage: ts, } prometheus := &prometheus{ compactionTimer: compactionTimer, deletionTimer: deletionTimer, curationState: prometheusStatus, curationSema: make(chan struct{}, 1), unwrittenSamples: unwrittenSamples, stopBackgroundOperations: make(chan struct{}), ruleManager: ruleManager, targetManager: targetManager, notifications: notifications, storage: ts, remoteTSDBQueue: remoteTSDBQueue, } defer prometheus.Close() webService := &web.WebService{ StatusHandler: prometheusStatus, MetricsHandler: metricsService, DatabasesHandler: databasesHandler, AlertsHandler: alertsHandler, QuitDelegate: prometheus.Close, } prometheus.curationSema <- struct{}{} storageStarted := make(chan bool) go ts.Serve(storageStarted) <-storageStarted go prometheus.interruptHandler() go func() { for _ = range prometheus.compactionTimer.C { glog.Info("Starting compaction...") err := prometheus.compact(*compactAgeInclusiveness, *compactGroupSize) if err != nil { glog.Error("could not compact: ", err) } glog.Info("Done") } }() go func() { for _ = range prometheus.deletionTimer.C { glog.Info("Starting deletion of stale values...") err := prometheus.delete(*deleteAge, deletionBatchSize) if err != nil { glog.Error("could not delete: ", err) } glog.Info("Done") } }() go func() { err := webService.ServeForever() if err != nil { glog.Fatal(err) } }() // TODO(all): Migrate this into prometheus.serve(). for block := range unwrittenSamples { if block.Err == nil && len(block.Samples) > 0 { ts.AppendSamples(block.Samples) if remoteTSDBQueue != nil { remoteTSDBQueue.Queue(block.Samples) } } } }