Example #1
0
func TestRuleEval(t *testing.T) {
	storage, closer := local.NewTestStorage(t, 2)
	defer closer.Close()
	engine := promql.NewEngine(storage, nil)
	ctx, cancelCtx := context.WithCancel(context.Background())
	defer cancelCtx()

	now := model.Now()

	suite := []struct {
		name   string
		expr   promql.Expr
		labels model.LabelSet
		result model.Vector
	}{
		{
			name:   "nolabels",
			expr:   &promql.NumberLiteral{Val: 1},
			labels: model.LabelSet{},
			result: model.Vector{&model.Sample{
				Value:     1,
				Timestamp: now,
				Metric:    model.Metric{"__name__": "nolabels"},
			}},
		},
		{
			name:   "labels",
			expr:   &promql.NumberLiteral{Val: 1},
			labels: model.LabelSet{"foo": "bar"},
			result: model.Vector{&model.Sample{
				Value:     1,
				Timestamp: now,
				Metric:    model.Metric{"__name__": "labels", "foo": "bar"},
			}},
		},
	}

	for _, test := range suite {
		rule := NewRecordingRule(test.name, test.expr, test.labels)
		result, err := rule.eval(ctx, now, engine, "")
		if err != nil {
			t.Fatalf("Error evaluating %s", test.name)
		}
		if !reflect.DeepEqual(result, test.result) {
			t.Fatalf("Error: expected %q, got %q", test.result, result)
		}
	}
}
Example #2
0
func TestAlertingRule(t *testing.T) {
	// Labels in expected output need to be alphabetically sorted.
	var evalOutputs = [][]string{
		{
			`ALERTS{alertname="HttpRequestRateLow", alertstate="pending", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`,
			`ALERTS{alertname="HttpRequestRateLow", alertstate="pending", group="canary", instance="1", job="app-server", severity="critical"} => 1 @[%v]`,
		},
		{
			`ALERTS{alertname="HttpRequestRateLow", alertstate="pending", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`,
			`ALERTS{alertname="HttpRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`,
			`ALERTS{alertname="HttpRequestRateLow", alertstate="pending", group="canary", instance="1", job="app-server", severity="critical"} => 0 @[%v]`,
			`ALERTS{alertname="HttpRequestRateLow", alertstate="firing", group="canary", instance="1", job="app-server", severity="critical"} => 1 @[%v]`,
		},
		{
			`ALERTS{alertname="HttpRequestRateLow", alertstate="firing", group="canary", instance="1", job="app-server", severity="critical"} => 0 @[%v]`,
			`ALERTS{alertname="HttpRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`,
		},
		{
		/* empty */
		},
		{
		/* empty */
		},
	}

	storage, closer := local.NewTestStorage(t, 1)
	defer closer.Close()

	storeMatrix(storage, testMatrix)

	engine := promql.NewEngine(storage, nil)
	defer engine.Stop()

	expr, err := promql.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`)
	if err != nil {
		t.Fatalf("Unable to parse alert expression: %s", err)
	}

	alertLabels := clientmodel.LabelSet{
		"severity": "critical",
	}
	rule := NewAlertingRule("HttpRequestRateLow", expr, time.Minute, alertLabels, "summary", "description")

	for i, expectedLines := range evalOutputs {
		evalTime := testStartTime.Add(testSampleInterval * time.Duration(i))

		res, err := rule.eval(evalTime, engine)
		if err != nil {
			t.Fatalf("Error during alerting rule evaluation: %s", err)
		}

		actualLines := strings.Split(res.String(), "\n")
		expectedLines := annotateWithTime(expectedLines, evalTime)
		if actualLines[0] == "" {
			actualLines = []string{}
		}

		failed := false
		if len(actualLines) != len(expectedLines) {
			t.Errorf("%d. Number of samples in expected and actual output don't match (%d vs. %d)", i, len(expectedLines), len(actualLines))
			failed = true
		}

		for j, expectedSample := range expectedLines {
			found := false
			for _, actualSample := range actualLines {
				if actualSample == expectedSample {
					found = true
				}
			}
			if !found {
				t.Errorf("%d.%d. Couldn't find expected sample in output: '%v'", i, j, expectedSample)
				failed = true
			}
		}

		if failed {
			t.Fatalf("%d. Expected and actual outputs don't match:\n%v", i, vectorComparisonString(expectedLines, actualLines))
		}
	}
}
Example #3
0
func TestTemplateExpansion(t *testing.T) {
	scenarios := []testTemplatesScenario{
		{
			// No template.
			text:   "plain text",
			output: "plain text",
		},
		{
			// Simple value.
			text:   "{{ 1 }}",
			output: "1",
		},
		{
			// HTML escaping.
			text:   "{{ \"<b>\" }}",
			output: "&lt;b&gt;",
			html:   true,
		},
		{
			// Disabling HTML escaping.
			text:   "{{ \"<b>\" | safeHtml }}",
			output: "<b>",
			html:   true,
		},
		{
			// HTML escaping doesn't apply to non-html.
			text:   "{{ \"<b>\" }}",
			output: "<b>",
		},
		{
			// Pass multiple arguments to templates.
			text:   "{{define \"x\"}}{{.arg0}} {{.arg1}}{{end}}{{template \"x\" (args 1 \"2\")}}",
			output: "1 2",
		},
		{
			text:   "{{ query \"1.5\" | first | value }}",
			output: "1.5",
		},
		{
			// Get value from scalar query.
			text:   "{{ query \"scalar(count(metric))\" | first | value }}",
			output: "2",
		},
		{
			// Get value from query.
			text:   "{{ query \"metric{instance='a'}\" | first | value }}",
			output: "11",
		},
		{
			// Get label from query.
			text:   "{{ query \"metric{instance='a'}\" | first | label \"instance\" }}",
			output: "a",
		},
		{
			// Range over query and sort by label.
			text:   "{{ range query \"metric\" | sortByLabel \"instance\" }}{{.Labels.instance}}:{{.Value}}: {{end}}",
			output: "a:11: b:21: ",
		},
		{
			// Unparsable template.
			text:       "{{",
			shouldFail: true,
		},
		{
			// Error in function.
			text:       "{{ query \"missing\" | first }}",
			shouldFail: true,
		},
		{
			// Panic.
			text:       "{{ (query \"missing\").banana }}",
			shouldFail: true,
		},
		{
			// Regex replacement.
			text:   "{{ reReplaceAll \"(a)b\" \"x$1\" \"ab\" }}",
			output: "xa",
		},
		{
			// Humanize.
			text:   "{{ range . }}{{ humanize . }}:{{ end }}",
			input:  []float64{0.0, 1.0, 1234567.0, .12},
			output: "0:1:1.235M:120m:",
		},
		{
			// Humanize1024.
			text:   "{{ range . }}{{ humanize1024 . }}:{{ end }}",
			input:  []float64{0.0, 1.0, 1048576.0, .12},
			output: "0:1:1Mi:0.12:",
		},
		{
			// HumanizeDuration - seconds.
			text:   "{{ range . }}{{ humanizeDuration . }}:{{ end }}",
			input:  []float64{0, 1, 60, 3600, 86400, 86400 + 3600, -(86400*2 + 3600*3 + 60*4 + 5), 899.99},
			output: "0s:1s:1m 0s:1h 0m 0s:1d 0h 0m 0s:1d 1h 0m 0s:-2d 3h 4m 5s:14m 59s:",
		},
		{
			// HumanizeDuration - subsecond and fractional seconds.
			text:   "{{ range . }}{{ humanizeDuration . }}:{{ end }}",
			input:  []float64{.1, .0001, .12345, 60.1, 60.5, 1.2345, 12.345},
			output: "100ms:100us:123.5ms:1m 0s:1m 0s:1.234s:12.35s:",
		},
		{
			// Humanize* Inf and NaN.
			text:   "{{ range . }}{{ humanize . }}:{{ humanize1024 . }}:{{ humanizeDuration . }}:{{humanizeTimestamp .}}:{{ end }}",
			input:  []float64{math.Inf(1), math.Inf(-1), math.NaN()},
			output: "+Inf:+Inf:+Inf:+Inf:-Inf:-Inf:-Inf:-Inf:NaN:NaN:NaN:NaN:",
		},
		{
			// HumanizeTimestamp - clientmodel.SampleValue input.
			text:   "{{ 1435065584.128 | humanizeTimestamp }}",
			output: "2015-06-23 13:19:44.128 +0000 UTC",
		},
		{
			// Title.
			text:   "{{ \"aa bb CC\" | title }}",
			output: "Aa Bb CC",
		},
		{
			// Match.
			text:   "{{ match \"a+\" \"aa\" }} {{ match \"a+\" \"b\" }}",
			output: "true false",
		},
		{
			// graphLink.
			text:   "{{ graphLink \"up\" }}",
			output: "/graph#%5B%7B%22expr%22%3A%22up%22%2C%22tab%22%3A0%7D%5D",
		},
		{
			// tableLink.
			text:   "{{ tableLink \"up\" }}",
			output: "/graph#%5B%7B%22expr%22%3A%22up%22%2C%22tab%22%3A1%7D%5D",
		},
		{
			// tmpl.
			text:   "{{ define \"a\" }}x{{ end }}{{ $name := \"a\"}}{{ tmpl $name . }}",
			output: "x",
			html:   true,
		},
	}

	time := clientmodel.Timestamp(0)

	storage, closer := local.NewTestStorage(t, 1)
	defer closer.Close()
	storage.Append(&clientmodel.Sample{
		Metric: clientmodel.Metric{
			clientmodel.MetricNameLabel: "metric",
			"instance":                  "a"},
		Value: 11,
	})
	storage.Append(&clientmodel.Sample{
		Metric: clientmodel.Metric{
			clientmodel.MetricNameLabel: "metric",
			"instance":                  "b"},
		Value: 21,
	})
	storage.WaitForIndexing()

	engine := promql.NewEngine(storage, nil)

	for i, s := range scenarios {
		var result string
		var err error
		expander := NewTemplateExpander(s.text, "test", s.input, time, engine, "")
		if s.html {
			result, err = expander.ExpandHTML(nil)
		} else {
			result, err = expander.Expand()
		}
		if s.shouldFail {
			if err == nil {
				t.Fatalf("%d. Error not returned from %v", i, s.text)
			}
			continue
		}
		if err != nil {
			t.Fatalf("%d. Error returned from %v: %v", i, s.text, err)
			continue
		}
		if result != s.output {
			t.Fatalf("%d. Error in result from %v: Expected '%v' Got '%v'", i, s.text, s.output, result)
			continue
		}
	}
}
Example #4
0
// Main manages the startup and shutdown lifecycle of the entire Prometheus server.
func Main() int {
	if err := parse(os.Args[1:]); err != nil {
		return 2
	}

	printVersion()
	if cfg.printVersion {
		return 0
	}

	var reloadables []Reloadable

	var (
		memStorage     = local.NewMemorySeriesStorage(&cfg.storage)
		remoteStorage  = remote.New(&cfg.remote)
		sampleAppender = storage.Fanout{memStorage}
	)
	if remoteStorage != nil {
		sampleAppender = append(sampleAppender, remoteStorage)
		reloadables = append(reloadables, remoteStorage)
	}

	var (
		notificationHandler = notification.New(&cfg.notification)
		targetManager       = retrieval.NewTargetManager(sampleAppender)
		queryEngine         = promql.NewEngine(memStorage, &cfg.queryEngine)
	)

	ruleManager := rules.NewManager(&rules.ManagerOptions{
		SampleAppender:      sampleAppender,
		NotificationHandler: notificationHandler,
		QueryEngine:         queryEngine,
		ExternalURL:         cfg.web.ExternalURL,
	})

	flags := map[string]string{}
	cfg.fs.VisitAll(func(f *flag.Flag) {
		flags[f.Name] = f.Value.String()
	})

	status := &web.PrometheusStatus{
		TargetPools: targetManager.Pools,
		Rules:       ruleManager.Rules,
		Flags:       flags,
		Birth:       time.Now(),
	}

	webHandler := web.New(memStorage, queryEngine, ruleManager, status, &cfg.web)

	reloadables = append(reloadables, status, targetManager, ruleManager, webHandler, notificationHandler)

	if !reloadConfig(cfg.configFile, reloadables...) {
		return 1
	}

	// Wait for reload or termination signals. Start the handler for SIGHUP as
	// early as possible, but ignore it until we are ready to handle reloading
	// our config.
	hup := make(chan os.Signal)
	hupReady := make(chan bool)
	signal.Notify(hup, syscall.SIGHUP)
	go func() {
		<-hupReady
		for {
			select {
			case <-hup:
			case <-webHandler.Reload():
			}
			reloadConfig(cfg.configFile, reloadables...)
		}
	}()

	// Start all components.
	if err := memStorage.Start(); err != nil {
		log.Errorln("Error opening memory series storage:", err)
		return 1
	}
	defer func() {
		if err := memStorage.Stop(); err != nil {
			log.Errorln("Error stopping storage:", err)
		}
	}()

	if remoteStorage != nil {
		prometheus.MustRegister(remoteStorage)

		go remoteStorage.Run()
		defer remoteStorage.Stop()
	}
	// The storage has to be fully initialized before registering.
	prometheus.MustRegister(memStorage)
	prometheus.MustRegister(notificationHandler)
	prometheus.MustRegister(configSuccess)
	prometheus.MustRegister(configSuccessTime)

	defer ruleManager.Stop()

	go notificationHandler.Run()
	defer notificationHandler.Stop()

	go targetManager.Run()
	defer targetManager.Stop()

	defer queryEngine.Stop()

	go webHandler.Run()

	// Wait for reload or termination signals.
	close(hupReady) // Unblock SIGHUP handler.

	term := make(chan os.Signal)
	signal.Notify(term, os.Interrupt, syscall.SIGTERM)
	select {
	case <-term:
		log.Warn("Received SIGTERM, exiting gracefully...")
	case <-webHandler.Quit():
		log.Warn("Received termination request via web service, exiting gracefully...")
	case err := <-webHandler.ListenError():
		log.Errorln("Error starting web server, exiting gracefully:", err)
	}

	log.Info("See you next time!")
	return 0
}
Example #5
0
func Main() int {
	if err := parse(os.Args[1:]); err != nil {
		return 2
	}

	versionInfoTmpl.Execute(os.Stdout, BuildInfo)
	if cfg.printVersion {
		return 0
	}

	memStorage := local.NewMemorySeriesStorage(&cfg.storage)

	var (
		sampleAppender      storage.SampleAppender
		remoteStorageQueues []*remote.StorageQueueManager
	)
	if cfg.opentsdbURL == "" && cfg.influxdbURL == "" {
		log.Warnf("No remote storage URLs provided; not sending any samples to long-term storage")
		sampleAppender = memStorage
	} else {
		fanout := storage.Fanout{memStorage}

		addRemoteStorage := func(c remote.StorageClient) {
			qm := remote.NewStorageQueueManager(c, 100*1024)
			fanout = append(fanout, qm)
			remoteStorageQueues = append(remoteStorageQueues, qm)
		}

		if cfg.opentsdbURL != "" {
			addRemoteStorage(opentsdb.NewClient(cfg.opentsdbURL, cfg.remoteStorageTimeout))
		}
		if cfg.influxdbURL != "" {
			addRemoteStorage(influxdb.NewClient(cfg.influxdbURL, cfg.remoteStorageTimeout, cfg.influxdbDatabase, cfg.influxdbRetentionPolicy))
		}

		sampleAppender = fanout
	}

	var (
		notificationHandler = notification.NewNotificationHandler(&cfg.notification)
		targetManager       = retrieval.NewTargetManager(sampleAppender)
		queryEngine         = promql.NewEngine(memStorage, &cfg.queryEngine)
	)

	ruleManager := rules.NewManager(&rules.ManagerOptions{
		SampleAppender:      sampleAppender,
		NotificationHandler: notificationHandler,
		QueryEngine:         queryEngine,
		PrometheusURL:       cfg.prometheusURL,
		PathPrefix:          cfg.web.PathPrefix,
	})

	flags := map[string]string{}
	cfg.fs.VisitAll(func(f *flag.Flag) {
		flags[f.Name] = f.Value.String()
	})

	status := &web.PrometheusStatus{
		BuildInfo:   BuildInfo,
		TargetPools: targetManager.Pools,
		Rules:       ruleManager.Rules,
		Flags:       flags,
		Birth:       time.Now(),
	}

	webHandler := web.New(memStorage, queryEngine, ruleManager, status, &cfg.web)

	if !reloadConfig(cfg.configFile, status, targetManager, ruleManager) {
		os.Exit(1)
	}

	// Wait for reload or termination signals. Start the handler for SIGHUP as
	// early as possible, but ignore it until we are ready to handle reloading
	// our config.
	hup := make(chan os.Signal)
	hupReady := make(chan bool)
	signal.Notify(hup, syscall.SIGHUP)
	go func() {
		<-hupReady
		for range hup {
			reloadConfig(cfg.configFile, status, targetManager, ruleManager)
		}
	}()

	// Start all components.
	if err := memStorage.Start(); err != nil {
		log.Errorln("Error opening memory series storage:", err)
		return 1
	}
	defer func() {
		if err := memStorage.Stop(); err != nil {
			log.Errorln("Error stopping storage:", err)
		}
	}()

	// The storage has to be fully initialized before registering.
	registry.MustRegister(memStorage)
	registry.MustRegister(notificationHandler)

	for _, q := range remoteStorageQueues {
		registry.MustRegister(q)

		go q.Run()
		defer q.Stop()
	}

	go ruleManager.Run()
	defer ruleManager.Stop()

	go notificationHandler.Run()
	defer notificationHandler.Stop()

	go targetManager.Run()
	defer targetManager.Stop()

	defer queryEngine.Stop()

	go webHandler.Run()

	// Wait for reload or termination signals.
	close(hupReady) // Unblock SIGHUP handler.

	term := make(chan os.Signal)
	signal.Notify(term, os.Interrupt, syscall.SIGTERM)
	select {
	case <-term:
		log.Warn("Received SIGTERM, exiting gracefully...")
	case <-webHandler.Quit():
		log.Warn("Received termination request via web service, exiting gracefully...")
	}

	close(hup)

	log.Info("See you next time!")
	return 0
}
Example #6
0
func TestQuery(t *testing.T) {
	scenarios := []struct {
		// URL query string.
		queryStr string
		// Expected HTTP response status code.
		status int
		// Regex to match against response body.
		bodyRe string
	}{
		{
			queryStr: "",
			status:   http.StatusOK,
			bodyRe:   `{"type":"error","value":"Parse error at char 1: no expression found in input","version":1}`,
		},
		{
			queryStr: "expr=testmetric",
			status:   http.StatusOK,
			bodyRe:   `{"type":"vector","value":\[\{"metric":{"__name__":"testmetric"},"value":"0","timestamp":\d+\.\d+}\],"version":1\}`,
		},
		{
			queryStr: "expr=testmetric&timestamp=" + testTimestamp.String(),
			status:   http.StatusOK,
			bodyRe:   `{"type":"vector","value":\[\{"metric":{"__name__":"testmetric"},"value":"0","timestamp":` + testTimestamp.String() + `}\],"version":1\}`,
		},
		{
			queryStr: "expr=testmetric&timestamp=" + testTimestamp.Add(-time.Hour).String(),
			status:   http.StatusOK,
			bodyRe:   `{"type":"vector","value":\[\],"version":1\}`,
		},
		{
			queryStr: "timestamp=invalid",
			status:   http.StatusBadRequest,
			bodyRe:   "invalid query timestamp",
		},
		{
			queryStr: "expr=(badexpression",
			status:   http.StatusOK,
			bodyRe:   `{"type":"error","value":"Parse error at char 15: unclosed left parenthesis","version":1}`,
		},
	}

	storage, closer := local.NewTestStorage(t, 1)
	defer closer.Close()
	storage.Append(&clientmodel.Sample{
		Metric: clientmodel.Metric{
			clientmodel.MetricNameLabel: "testmetric",
		},
		Timestamp: testTimestamp,
		Value:     0,
	})
	storage.WaitForIndexing()

	api := &API{
		Now:         testNow,
		Storage:     storage,
		QueryEngine: promql.NewEngine(storage, nil),
	}
	rtr := route.New()
	api.Register(rtr.WithPrefix("/api"))

	server := httptest.NewServer(rtr)
	defer server.Close()

	for i, s := range scenarios {
		// Do query.
		resp, err := http.Get(server.URL + "/api/query?" + s.queryStr)
		if err != nil {
			t.Fatalf("%d. Error querying API: %s", i, err)
		}

		// Check status code.
		if resp.StatusCode != s.status {
			t.Fatalf("%d. Unexpected status code; got %d, want %d", i, resp.StatusCode, s.status)
		}

		// Check response headers.
		ct := resp.Header["Content-Type"]
		if len(ct) != 1 {
			t.Fatalf("%d. Unexpected number of 'Content-Type' headers; got %d, want 1", i, len(ct))
		}
		if ct[0] != "application/json" {
			t.Fatalf("%d. Unexpected 'Content-Type' header; got %s; want %s", i, ct[0], "application/json")
		}

		// Check body.
		b, err := ioutil.ReadAll(resp.Body)
		if err != nil {
			t.Fatalf("%d. Error reading response body: %s", i, err)
		}
		re := regexp.MustCompile(s.bodyRe)
		if !re.Match(b) {
			t.Fatalf("%d. Body didn't match '%s'. Body: %s", i, s.bodyRe, string(b))
		}
	}
}
Example #7
0
// Main manages the startup and shutdown lifecycle of the entire Prometheus server.
func Main() int {
	if err := parse(os.Args[1:]); err != nil {
		log.Error(err)
		return 2
	}

	if cfg.printVersion {
		fmt.Fprintln(os.Stdout, version.Print("prometheus"))
		return 0
	}

	log.Infoln("Starting prometheus", version.Info())
	log.Infoln("Build context", version.BuildContext())

	var reloadables []Reloadable

	var (
		memStorage     = local.NewMemorySeriesStorage(&cfg.storage)
		remoteStorage  = remote.New(&cfg.remote)
		sampleAppender = storage.Fanout{memStorage}
	)
	if remoteStorage != nil {
		sampleAppender = append(sampleAppender, remoteStorage)
		reloadables = append(reloadables, remoteStorage)
	}

	var (
		notifier      = notifier.New(&cfg.notifier)
		targetManager = retrieval.NewTargetManager(sampleAppender)
		queryEngine   = promql.NewEngine(memStorage, &cfg.queryEngine)
	)

	ruleManager := rules.NewManager(&rules.ManagerOptions{
		SampleAppender: sampleAppender,
		Notifier:       notifier,
		QueryEngine:    queryEngine,
		ExternalURL:    cfg.web.ExternalURL,
	})

	flags := map[string]string{}
	cfg.fs.VisitAll(func(f *flag.Flag) {
		flags[f.Name] = f.Value.String()
	})

	version := &web.PrometheusVersion{
		Version:   version.Version,
		Revision:  version.Revision,
		Branch:    version.Branch,
		BuildUser: version.BuildUser,
		BuildDate: version.BuildDate,
		GoVersion: version.GoVersion,
	}

	webHandler := web.New(memStorage, queryEngine, targetManager, ruleManager, version, flags, &cfg.web)

	reloadables = append(reloadables, targetManager, ruleManager, webHandler, notifier)

	if !reloadConfig(cfg.configFile, reloadables...) {
		return 1
	}

	// Wait for reload or termination signals. Start the handler for SIGHUP as
	// early as possible, but ignore it until we are ready to handle reloading
	// our config.
	hup := make(chan os.Signal)
	hupReady := make(chan bool)
	signal.Notify(hup, syscall.SIGHUP)
	go func() {
		<-hupReady
		for {
			select {
			case <-hup:
			case <-webHandler.Reload():
			}
			reloadConfig(cfg.configFile, reloadables...)
		}
	}()

	// Start all components. The order is NOT arbitrary.

	if err := memStorage.Start(); err != nil {
		log.Errorln("Error opening memory series storage:", err)
		return 1
	}
	defer func() {
		if err := memStorage.Stop(); err != nil {
			log.Errorln("Error stopping storage:", err)
		}
	}()

	if remoteStorage != nil {
		prometheus.MustRegister(remoteStorage)

		go remoteStorage.Run()
		defer remoteStorage.Stop()
	}
	// The storage has to be fully initialized before registering.
	prometheus.MustRegister(memStorage)
	prometheus.MustRegister(notifier)
	prometheus.MustRegister(configSuccess)
	prometheus.MustRegister(configSuccessTime)

	// The notifieris a dependency of the rule manager. It has to be
	// started before and torn down afterwards.
	go notifier.Run()
	defer notifier.Stop()

	go ruleManager.Run()
	defer ruleManager.Stop()

	go targetManager.Run()
	defer targetManager.Stop()

	// Shutting down the query engine before the rule manager will cause pending queries
	// to be canceled and ensures a quick shutdown of the rule manager.
	defer queryEngine.Stop()

	go webHandler.Run()

	// Wait for reload or termination signals.
	close(hupReady) // Unblock SIGHUP handler.

	term := make(chan os.Signal)
	signal.Notify(term, os.Interrupt, syscall.SIGTERM)
	select {
	case <-term:
		log.Warn("Received SIGTERM, exiting gracefully...")
	case <-webHandler.Quit():
		log.Warn("Received termination request via web service, exiting gracefully...")
	case err := <-webHandler.ListenError():
		log.Errorln("Error starting web server, exiting gracefully:", err)
	}

	log.Info("See you next time!")
	return 0
}
Example #8
0
// Main manages the startup and shutdown lifecycle of the entire Prometheus server.
func Main() int {
	if err := parse(os.Args[1:]); err != nil {
		log.Error(err)
		return 2
	}

	if cfg.printVersion {
		fmt.Fprintln(os.Stdout, version.Print("prometheus"))
		return 0
	}

	log.Infoln("Starting prometheus", version.Info())
	log.Infoln("Build context", version.BuildContext())

	var (
		sampleAppender = storage.Fanout{}
		reloadables    []Reloadable
	)

	var localStorage local.Storage
	switch cfg.localStorageEngine {
	case "persisted":
		localStorage = local.NewMemorySeriesStorage(&cfg.storage)
		sampleAppender = storage.Fanout{localStorage}
	case "none":
		localStorage = &local.NoopStorage{}
	default:
		log.Errorf("Invalid local storage engine %q", cfg.localStorageEngine)
		return 1
	}

	remoteStorage, err := remote.New(&cfg.remote)
	if err != nil {
		log.Errorf("Error initializing remote storage: %s", err)
		return 1
	}
	if remoteStorage != nil {
		sampleAppender = append(sampleAppender, remoteStorage)
		reloadables = append(reloadables, remoteStorage)
	}

	reloadableRemoteStorage := remote.NewConfigurable()
	sampleAppender = append(sampleAppender, reloadableRemoteStorage)
	reloadables = append(reloadables, reloadableRemoteStorage)

	var (
		notifier       = notifier.New(&cfg.notifier)
		targetManager  = retrieval.NewTargetManager(sampleAppender)
		queryEngine    = promql.NewEngine(localStorage, &cfg.queryEngine)
		ctx, cancelCtx = context.WithCancel(context.Background())
	)

	ruleManager := rules.NewManager(&rules.ManagerOptions{
		SampleAppender: sampleAppender,
		Notifier:       notifier,
		QueryEngine:    queryEngine,
		Context:        ctx,
		ExternalURL:    cfg.web.ExternalURL,
	})

	cfg.web.Context = ctx
	cfg.web.Storage = localStorage
	cfg.web.QueryEngine = queryEngine
	cfg.web.TargetManager = targetManager
	cfg.web.RuleManager = ruleManager

	cfg.web.Version = &web.PrometheusVersion{
		Version:   version.Version,
		Revision:  version.Revision,
		Branch:    version.Branch,
		BuildUser: version.BuildUser,
		BuildDate: version.BuildDate,
		GoVersion: version.GoVersion,
	}

	cfg.web.Flags = map[string]string{}
	cfg.fs.VisitAll(func(f *flag.Flag) {
		cfg.web.Flags[f.Name] = f.Value.String()
	})

	webHandler := web.New(&cfg.web)

	reloadables = append(reloadables, targetManager, ruleManager, webHandler, notifier)

	if err := reloadConfig(cfg.configFile, reloadables...); err != nil {
		log.Errorf("Error loading config: %s", err)
		return 1
	}

	// Wait for reload or termination signals. Start the handler for SIGHUP as
	// early as possible, but ignore it until we are ready to handle reloading
	// our config.
	hup := make(chan os.Signal)
	hupReady := make(chan bool)
	signal.Notify(hup, syscall.SIGHUP)
	go func() {
		<-hupReady
		for {
			select {
			case <-hup:
				if err := reloadConfig(cfg.configFile, reloadables...); err != nil {
					log.Errorf("Error reloading config: %s", err)
				}
			case rc := <-webHandler.Reload():
				if err := reloadConfig(cfg.configFile, reloadables...); err != nil {
					log.Errorf("Error reloading config: %s", err)
					rc <- err
				} else {
					rc <- nil
				}
			}
		}
	}()

	// Start all components. The order is NOT arbitrary.

	if err := localStorage.Start(); err != nil {
		log.Errorln("Error opening memory series storage:", err)
		return 1
	}
	defer func() {
		if err := localStorage.Stop(); err != nil {
			log.Errorln("Error stopping storage:", err)
		}
	}()

	if remoteStorage != nil {
		remoteStorage.Start()
		defer remoteStorage.Stop()
	}

	defer reloadableRemoteStorage.Stop()

	// The storage has to be fully initialized before registering.
	if instrumentedStorage, ok := localStorage.(prometheus.Collector); ok {
		prometheus.MustRegister(instrumentedStorage)
	}
	prometheus.MustRegister(notifier)
	prometheus.MustRegister(configSuccess)
	prometheus.MustRegister(configSuccessTime)

	// The notifier is a dependency of the rule manager. It has to be
	// started before and torn down afterwards.
	go notifier.Run()
	defer notifier.Stop()

	go ruleManager.Run()
	defer ruleManager.Stop()

	go targetManager.Run()
	defer targetManager.Stop()

	// Shutting down the query engine before the rule manager will cause pending queries
	// to be canceled and ensures a quick shutdown of the rule manager.
	defer cancelCtx()

	go webHandler.Run()

	// Wait for reload or termination signals.
	close(hupReady) // Unblock SIGHUP handler.

	term := make(chan os.Signal)
	signal.Notify(term, os.Interrupt, syscall.SIGTERM)
	select {
	case <-term:
		log.Warn("Received SIGTERM, exiting gracefully...")
	case <-webHandler.Quit():
		log.Warn("Received termination request via web service, exiting gracefully...")
	case err := <-webHandler.ListenError():
		log.Errorln("Error starting web server, exiting gracefully:", err)
	}

	log.Info("See you next time!")
	return 0
}