Exemple #1
0
func (s *memorySeriesStorage) loop() {
	checkpointTimer := time.NewTimer(s.checkpointInterval)

	dirtySeriesCount := 0

	defer func() {
		checkpointTimer.Stop()
		log.Info("Maintenance loop stopped.")
		close(s.loopStopped)
	}()

	memoryFingerprints := s.cycleThroughMemoryFingerprints()
	archivedFingerprints := s.cycleThroughArchivedFingerprints()

loop:
	for {
		select {
		case <-s.loopStopping:
			break loop
		case <-checkpointTimer.C:
			err := s.persistence.checkpointSeriesMapAndHeads(s.fpToSeries, s.fpLocker)
			if err != nil {
				log.Errorln("Error while checkpointing:", err)
			} else {
				dirtySeriesCount = 0
			}
			// If a checkpoint takes longer than checkpointInterval, unluckily timed
			// combination with the Reset(0) call below can lead to a case where a
			// time is lurking in C leading to repeated checkpointing without break.
			select {
			case <-checkpointTimer.C: // Get rid of the lurking time.
			default:
			}
			checkpointTimer.Reset(s.checkpointInterval)
		case fp := <-memoryFingerprints:
			if s.maintainMemorySeries(fp, model.Now().Add(-s.dropAfter)) {
				dirtySeriesCount++
				// Check if we have enough "dirty" series so that we need an early checkpoint.
				// However, if we are already behind persisting chunks, creating a checkpoint
				// would be counterproductive, as it would slow down chunk persisting even more,
				// while in a situation like that, where we are clearly lacking speed of disk
				// maintenance, the best we can do for crash recovery is to persist chunks as
				// quickly as possible. So only checkpoint if the urgency score is < 1.
				if dirtySeriesCount >= s.checkpointDirtySeriesLimit &&
					s.calculatePersistenceUrgencyScore() < 1 {
					checkpointTimer.Reset(0)
				}
			}
		case fp := <-archivedFingerprints:
			s.maintainArchivedSeries(fp, model.Now().Add(-s.dropAfter))
		}
	}
	// Wait until both channels are closed.
	for range memoryFingerprints {
	}
	for range archivedFingerprints {
	}
}
Exemple #2
0
func (s *memorySeriesStorage) loop() {
	checkpointTimer := time.NewTimer(s.checkpointInterval)

	dirtySeriesCount := 0

	defer func() {
		checkpointTimer.Stop()
		log.Info("Maintenance loop stopped.")
		close(s.loopStopped)
	}()

	memoryFingerprints := s.cycleThroughMemoryFingerprints()
	archivedFingerprints := s.cycleThroughArchivedFingerprints()

loop:
	for {
		select {
		case <-s.loopStopping:
			break loop
		case <-checkpointTimer.C:
			err := s.persistence.checkpointSeriesMapAndHeads(s.fpToSeries, s.fpLocker)
			if err != nil {
				log.Errorln("Error while checkpointing:", err)
			} else {
				dirtySeriesCount = 0
			}
			checkpointTimer.Reset(s.checkpointInterval)
		case fp := <-memoryFingerprints:
			if s.maintainMemorySeries(fp, model.Now().Add(-s.dropAfter)) {
				dirtySeriesCount++
				// Check if we have enough "dirty" series so that we need an early checkpoint.
				// However, if we are already behind persisting chunks, creating a checkpoint
				// would be counterproductive, as it would slow down chunk persisting even more,
				// while in a situation like that, where we are clearly lacking speed of disk
				// maintenance, the best we can do for crash recovery is to persist chunks as
				// quickly as possible. So only checkpoint if the storage is not in "graceful
				// degradation mode".
				if dirtySeriesCount >= s.checkpointDirtySeriesLimit && !s.isDegraded() {
					checkpointTimer.Reset(0)
				}
			}
		case fp := <-archivedFingerprints:
			s.maintainArchivedSeries(fp, model.Now().Add(-s.dropAfter))
		}
	}
	// Wait until both channels are closed.
	for range memoryFingerprints {
	}
	for range archivedFingerprints {
	}
}
Exemple #3
0
func (api *API) alerts(r *http.Request) (interface{}, *apiError) {
	// Generate snapshot of notifications for all current alerts
	var reqs notification.NotificationReqs
	now := model.Now()

	for _, rule := range api.RuleManager.AlertingRules() {
		reqs = append(reqs, api.RuleManager.GetRuleAlertNotifications(rule, now)...)
	}

	// Generate an alert map
	alerts := make([]map[string]interface{}, 0, len(reqs))
	for _, req := range reqs {
		alerts = append(alerts, map[string]interface{}{
			"summary":     req.Summary,
			"description": req.Description,
			"runbook":     req.Runbook,
			"labels":      req.Labels,
			"payload": map[string]interface{}{
				"value":        req.Value,
				"activeSince":  req.ActiveSince,
				"generatorURL": req.GeneratorURL,
				"alertingRule": req.RuleString,
			},
		})
	}

	return alerts, nil
}
Exemple #4
0
// preloadChunksForInstant preloads chunks for the latest value in the given
// range. If the last sample saved in the memorySeries itself is the latest
// value in the given range, it will in fact preload zero chunks and just take
// that value.
func (s *memorySeries) preloadChunksForInstant(
	fp model.Fingerprint,
	from model.Time, through model.Time,
	mss *MemorySeriesStorage,
) (SeriesIterator, error) {
	// If we have a lastSamplePair in the series, and thas last samplePair
	// is in the interval, just take it in a singleSampleSeriesIterator. No
	// need to pin or load anything.
	lastSample := s.lastSamplePair()
	if !through.Before(lastSample.Timestamp) &&
		!from.After(lastSample.Timestamp) &&
		lastSample != model.ZeroSamplePair {
		iter := &boundedIterator{
			it: &singleSampleSeriesIterator{
				samplePair: lastSample,
				metric:     s.metric,
			},
			start: model.Now().Add(-mss.dropAfter),
		}
		return iter, nil
	}
	// If we are here, we are out of luck and have to delegate to the more
	// expensive method.
	return s.preloadChunksForRange(fp, from, through, mss)
}
Exemple #5
0
func TestRetentionCutoff(t *testing.T) {
	now := model.Now()
	insertStart := now.Add(-2 * time.Hour)

	s, closer := NewTestStorage(t, 2)
	defer closer.Close()

	// Stop maintenance loop to prevent actual purging.
	close(s.loopStopping)
	<-s.loopStopped
	<-s.logThrottlingStopped
	// Recreate channel to avoid panic when we really shut down.
	s.loopStopping = make(chan struct{})

	s.dropAfter = 1 * time.Hour

	for i := 0; i < 120; i++ {
		smpl := &model.Sample{
			Metric:    model.Metric{"job": "test"},
			Timestamp: insertStart.Add(time.Duration(i) * time.Minute), // 1 minute intervals.
			Value:     1,
		}
		s.Append(smpl)
	}
	s.WaitForIndexing()

	var fp model.Fingerprint
	for f := range s.fingerprintsForLabelPairs(model.LabelPair{Name: "job", Value: "test"}) {
		fp = f
		break
	}

	pl := s.NewPreloader()
	defer pl.Close()

	// Preload everything.
	it := pl.PreloadRange(fp, insertStart, now)

	val := it.ValueAtOrBeforeTime(now.Add(-61 * time.Minute))
	if val.Timestamp != model.Earliest {
		t.Errorf("unexpected result for timestamp before retention period")
	}

	vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now})
	// We get 59 values here because the model.Now() is slightly later
	// than our now.
	if len(vals) != 59 {
		t.Errorf("expected 59 values but got %d", len(vals))
	}
	if expt := now.Add(-1 * time.Hour).Add(time.Minute); vals[0].Timestamp != expt {
		t.Errorf("unexpected timestamp for first sample: %v, expected %v", vals[0].Timestamp.Time(), expt.Time())
	}
}
Exemple #6
0
func (m *Manager) runIteration() {
	now := model.Now()
	wg := sync.WaitGroup{}

	m.Lock()
	rulesSnapshot := make([]Rule, len(m.rules))
	copy(rulesSnapshot, m.rules)
	m.Unlock()

	for _, rule := range rulesSnapshot {
		wg.Add(1)
		// BUG(julius): Look at fixing thundering herd.
		go func(rule Rule) {
			defer wg.Done()

			start := time.Now()
			vector, err := rule.eval(now, m.queryEngine)
			duration := time.Since(start)

			if err != nil {
				evalFailures.Inc()
				log.Warnf("Error while evaluating rule %q: %s", rule, err)
				return
			}

			switch r := rule.(type) {
			case *AlertingRule:
				m.queueAlertNotifications(r, now)
				evalDuration.WithLabelValues(ruleTypeAlerting).Observe(
					float64(duration / time.Millisecond),
				)
			case *RecordingRule:
				evalDuration.WithLabelValues(ruleTypeRecording).Observe(
					float64(duration / time.Millisecond),
				)
			default:
				panic(fmt.Errorf("Unknown rule type: %T", rule))
			}

			for _, s := range vector {
				m.sampleAppender.Append(&model.Sample{
					Metric:    s.Metric.Metric,
					Value:     s.Value,
					Timestamp: s.Timestamp,
				})
			}
		}(rule)
	}
	wg.Wait()
}
Exemple #7
0
// eval runs a single evaluation cycle in which all rules are evaluated in parallel.
// In the future a single group will be evaluated sequentially to properly handle
// rule dependency.
func (g *Group) eval() {
	var (
		now = model.Now()
		wg  sync.WaitGroup
	)

	for _, rule := range g.rules {
		wg.Add(1)
		// BUG(julius): Look at fixing thundering herd.
		go func(rule Rule) {
			defer wg.Done()

			start := time.Now()
			evalTotal.Inc()

			vector, err := rule.eval(now, g.opts.QueryEngine)
			if err != nil {
				// Canceled queries are intentional termination of queries. This normally
				// happens on shutdown and thus we skip logging of any errors here.
				if _, ok := err.(promql.ErrQueryCanceled); !ok {
					log.Warnf("Error while evaluating rule %q: %s", rule, err)
				}
				evalFailures.Inc()
			}
			var rtyp ruleType

			switch r := rule.(type) {
			case *AlertingRule:
				rtyp = ruleTypeRecording
				g.sendAlerts(r, now)

			case *RecordingRule:
				rtyp = ruleTypeAlert

			default:
				panic(fmt.Errorf("unknown rule type: %T", rule))
			}

			evalDuration.WithLabelValues(string(rtyp)).Observe(
				float64(time.Since(start)) / float64(time.Second),
			)

			for _, s := range vector {
				g.opts.SampleAppender.Append(s)
			}
		}(rule)
	}
	wg.Wait()
}
Exemple #8
0
func (h *Handler) consoles(w http.ResponseWriter, r *http.Request) {
	ctx := route.Context(r)
	name := route.Param(ctx, "filepath")

	file, err := http.Dir(h.options.ConsoleTemplatesPath).Open(name)
	if err != nil {
		http.Error(w, err.Error(), http.StatusNotFound)
		return
	}
	text, err := ioutil.ReadAll(file)
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}

	// Provide URL parameters as a map for easy use. Advanced users may have need for
	// parameters beyond the first, so provide RawParams.
	rawParams, err := url.ParseQuery(r.URL.RawQuery)
	if err != nil {
		http.Error(w, err.Error(), http.StatusBadRequest)
		return
	}
	params := map[string]string{}
	for k, v := range rawParams {
		params[k] = v[0]
	}
	data := struct {
		RawParams url.Values
		Params    map[string]string
		Path      string
	}{
		RawParams: rawParams,
		Params:    params,
		Path:      strings.TrimLeft(name, "/"),
	}

	tmpl := template.NewTemplateExpander(string(text), "__console_"+name, data, model.Now(), h.queryEngine, h.options.ExternalURL.Path)
	filenames, err := filepath.Glob(h.options.ConsoleLibrariesPath + "/*.lib")
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
	result, err := tmpl.ExpandHTML(filenames)
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
	io.WriteString(w, result)
}
Exemple #9
0
// preloadChunks is an internal helper method.
func (s *memorySeries) preloadChunks(
	indexes []int, fp model.Fingerprint, mss *MemorySeriesStorage,
) (SeriesIterator, error) {
	loadIndexes := []int{}
	pinnedChunkDescs := make([]*chunk.Desc, 0, len(indexes))
	for _, idx := range indexes {
		cd := s.chunkDescs[idx]
		pinnedChunkDescs = append(pinnedChunkDescs, cd)
		cd.Pin(mss.evictRequests) // Have to pin everything first to prevent immediate eviction on chunk loading.
		if cd.IsEvicted() {
			loadIndexes = append(loadIndexes, idx)
		}
	}
	chunk.Ops.WithLabelValues(chunk.Pin).Add(float64(len(pinnedChunkDescs)))

	if len(loadIndexes) > 0 {
		if s.chunkDescsOffset == -1 {
			panic("requested loading chunks from persistence in a situation where we must not have persisted data for chunk descriptors in memory")
		}
		chunks, err := mss.loadChunks(fp, loadIndexes, s.chunkDescsOffset)
		if err != nil {
			// Unpin the chunks since we won't return them as pinned chunks now.
			for _, cd := range pinnedChunkDescs {
				cd.Unpin(mss.evictRequests)
			}
			chunk.Ops.WithLabelValues(chunk.Unpin).Add(float64(len(pinnedChunkDescs)))
			return nopIter, err
		}
		for i, c := range chunks {
			s.chunkDescs[loadIndexes[i]].SetChunk(c)
		}
	}

	if !s.headChunkClosed && indexes[len(indexes)-1] == len(s.chunkDescs)-1 {
		s.headChunkUsedByIterator = true
	}

	curriedQuarantineSeries := func(err error) {
		mss.quarantineSeries(fp, s.metric, err)
	}

	iter := &boundedIterator{
		it:    s.newIterator(pinnedChunkDescs, curriedQuarantineSeries, mss.evictRequests),
		start: model.Now().Add(-mss.dropAfter),
	}

	return iter, nil
}
Exemple #10
0
func TestRuleEval(t *testing.T) {
	storage, closer := local.NewTestStorage(t, 2)
	defer closer.Close()
	engine := promql.NewEngine(storage, nil)
	ctx, cancelCtx := context.WithCancel(context.Background())
	defer cancelCtx()

	now := model.Now()

	suite := []struct {
		name   string
		expr   promql.Expr
		labels model.LabelSet
		result model.Vector
	}{
		{
			name:   "nolabels",
			expr:   &promql.NumberLiteral{Val: 1},
			labels: model.LabelSet{},
			result: model.Vector{&model.Sample{
				Value:     1,
				Timestamp: now,
				Metric:    model.Metric{"__name__": "nolabels"},
			}},
		},
		{
			name:   "labels",
			expr:   &promql.NumberLiteral{Val: 1},
			labels: model.LabelSet{"foo": "bar"},
			result: model.Vector{&model.Sample{
				Value:     1,
				Timestamp: now,
				Metric:    model.Metric{"__name__": "labels", "foo": "bar"},
			}},
		},
	}

	for _, test := range suite {
		rule := NewRecordingRule(test.name, test.expr, test.labels)
		result, err := rule.eval(ctx, now, engine, "")
		if err != nil {
			t.Fatalf("Error evaluating %s", test.name)
		}
		if !reflect.DeepEqual(result, test.result) {
			t.Fatalf("Error: expected %q, got %q", test.result, result)
		}
	}
}
Exemple #11
0
func (h *Handler) executeTemplate(w http.ResponseWriter, name string, data interface{}) {
	text, err := h.getTemplate(name)
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
	}

	tmpl := template.NewTemplateExpander(text, name, data, model.Now(), h.queryEngine, h.options.ExternalURL.Path)
	tmpl.Funcs(tmplFuncs(h.consolesPath(), h.options))

	result, err := tmpl.ExpandHTML(nil)
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
	io.WriteString(w, result)
}
Exemple #12
0
// eval runs a single evaluation cycle in which all rules are evaluated in parallel.
// In the future a single group will be evaluated sequentially to properly handle
// rule dependency.
func (g *Group) eval() {
	var (
		now = model.Now()
		wg  sync.WaitGroup
	)

	for _, rule := range g.rules {
		wg.Add(1)
		// BUG(julius): Look at fixing thundering herd.
		go func(rule Rule) {
			defer wg.Done()

			start := time.Now()
			evalTotal.Inc()

			vector, err := rule.eval(now, g.opts.QueryEngine)
			if err != nil {
				evalFailures.Inc()
				log.Warnf("Error while evaluating rule %q: %s", rule, err)
			}
			var rtyp ruleType

			switch r := rule.(type) {
			case *AlertingRule:
				rtyp = ruleTypeRecording
				g.sendAlerts(r, now)

			case *RecordingRule:
				rtyp = ruleTypeAlert

			default:
				panic(fmt.Errorf("unknown rule type: %T", rule))
			}

			evalDuration.WithLabelValues(string(rtyp)).Observe(
				float64(time.Since(start)) / float64(time.Second),
			)

			for _, s := range vector {
				g.opts.SampleAppender.Append(s)
			}
		}(rule)
	}
	wg.Wait()
}
Exemple #13
0
func (g *Prometheus) gatherURL(url string, acc telegraf.Accumulator) error {
	resp, err := client.Get(url)
	if err != nil {
		return fmt.Errorf("error making HTTP request to %s: %s", url, err)
	}
	defer resp.Body.Close()
	if resp.StatusCode != http.StatusOK {
		return fmt.Errorf("%s returned HTTP status %s", url, resp.Status)
	}
	format := expfmt.ResponseFormat(resp.Header)

	decoder := expfmt.NewDecoder(resp.Body, format)

	options := &expfmt.DecodeOptions{
		Timestamp: model.Now(),
	}
	sampleDecoder := &expfmt.SampleDecoder{
		Dec:  decoder,
		Opts: options,
	}

	for {
		var samples model.Vector
		err := sampleDecoder.Decode(&samples)
		if err == io.EOF {
			break
		} else if err != nil {
			return fmt.Errorf("error getting processing samples for %s: %s",
				url, err)
		}
		for _, sample := range samples {
			tags := make(map[string]string)
			for key, value := range sample.Metric {
				if key == model.MetricNameLabel {
					continue
				}
				tags[string(key)] = string(value)
			}
			acc.Add("prometheus_"+string(sample.Metric[model.MetricNameLabel]),
				float64(sample.Value), tags)
		}
	}

	return nil
}
Exemple #14
0
func TestTargetRecordScrapeHealth(t *testing.T) {
	var (
		testTarget = newTestTarget("example.url:80", 0, model.LabelSet{model.JobLabel: "testjob"})
		now        = model.Now()
		appender   = &collectResultAppender{}
	)

	testTarget.report(appender, now.Time(), 2*time.Second, nil)

	result := appender.result

	if len(result) != 2 {
		t.Fatalf("Expected two samples, got %d", len(result))
	}

	actual := result[0]
	expected := &model.Sample{
		Metric: model.Metric{
			model.MetricNameLabel: scrapeHealthMetricName,
			model.InstanceLabel:   "example.url:80",
			model.JobLabel:        "testjob",
		},
		Timestamp: now,
		Value:     1,
	}

	if !actual.Equal(expected) {
		t.Fatalf("Expected and actual samples not equal. Expected: %v, actual: %v", expected, actual)
	}

	actual = result[1]
	expected = &model.Sample{
		Metric: model.Metric{
			model.MetricNameLabel: scrapeDurationMetricName,
			model.InstanceLabel:   "example.url:80",
			model.JobLabel:        "testjob",
		},
		Timestamp: now,
		Value:     2.0,
	}

	if !actual.Equal(expected) {
		t.Fatalf("Expected and actual samples not equal. Expected: %v, actual: %v", expected, actual)
	}
}
// NewIterator implements Storage.
func (s *memorySeriesStorage) NewIterator(fp model.Fingerprint) SeriesIterator {
	s.fpLocker.Lock(fp)
	defer s.fpLocker.Unlock(fp)

	series, ok := s.fpToSeries.get(fp)
	if !ok {
		// Oops, no series for fp found. That happens if, after
		// preloading is done, the whole series is identified as old
		// enough for purging and hence purged for good. As there is no
		// data left to iterate over, return an iterator that will never
		// return any values.
		return nopSeriesIterator{}
	}
	return &boundedIterator{
		it:    series.newIterator(),
		start: model.Now().Add(-s.dropAfter),
	}
}
Exemple #16
0
// eval runs a single evaluation cycle in which all rules are evaluated in parallel.
// In the future a single group will be evaluated sequentially to properly handle
// rule dependency.
func (g *Group) eval() {
	var (
		now = model.Now()
		wg  sync.WaitGroup
	)

	for _, rule := range g.rules {
		rtyp := string(typeForRule(rule))

		wg.Add(1)
		// BUG(julius): Look at fixing thundering herd.
		go func(rule Rule) {
			defer wg.Done()

			defer func(t time.Time) {
				evalDuration.WithLabelValues(rtyp).Observe(float64(time.Since(t)) / float64(time.Second))
			}(time.Now())

			evalTotal.WithLabelValues(rtyp).Inc()

			vector, err := rule.eval(now, g.opts.QueryEngine)
			if err != nil {
				// Canceled queries are intentional termination of queries. This normally
				// happens on shutdown and thus we skip logging of any errors here.
				if _, ok := err.(promql.ErrQueryCanceled); !ok {
					log.Warnf("Error while evaluating rule %q: %s", rule, err)
				}
				evalFailures.WithLabelValues(rtyp).Inc()
				return
			}

			if ar, ok := rule.(*AlertingRule); ok {
				g.sendAlerts(ar, now)
			}
			for _, s := range vector {
				g.opts.SampleAppender.Append(s)
			}
		}(rule)
	}
	wg.Wait()
}
Exemple #17
0
// cycleThroughArchivedFingerprints returns a channel that emits fingerprints
// for archived series in a throttled fashion. It continues to cycle through all
// archived fingerprints until s.loopStopping is closed.
func (s *memorySeriesStorage) cycleThroughArchivedFingerprints() chan model.Fingerprint {
	archivedFingerprints := make(chan model.Fingerprint)
	go func() {
		defer close(archivedFingerprints)

		for {
			archivedFPs, err := s.persistence.fingerprintsModifiedBefore(
				model.Now().Add(-s.dropAfter),
			)
			if err != nil {
				log.Error("Failed to lookup archived fingerprint ranges: ", err)
				s.waitForNextFP(0, 1)
				continue
			}
			// Initial wait, also important if there are no FPs yet.
			if !s.waitForNextFP(len(archivedFPs), 1) {
				return
			}
			begin := time.Now()
			for _, fp := range archivedFPs {
				select {
				case archivedFingerprints <- fp:
				case <-s.loopStopping:
					return
				}
				// Never speed up maintenance of archived FPs.
				s.waitForNextFP(len(archivedFPs), 1)
			}
			if len(archivedFPs) > 0 {
				log.Infof(
					"Completed maintenance sweep through %d archived fingerprints in %v.",
					len(archivedFPs), time.Since(begin),
				)
			}
		}
	}()
	return archivedFingerprints
}
Exemple #18
0
// Push pushes Prometheus metrics to the configured Graphite server.
func (b *Bridge) Push() error {
	mfs, err := b.g.Gather()
	if err != nil || len(mfs) == 0 {
		switch b.errorHandling {
		case AbortOnError:
			return err
		case ContinueOnError:
			if b.logger != nil {
				b.logger.Println("continue on error:", err)
			}
		default:
			panic("unrecognized error handling value")
		}
	}

	conn, err := net.DialTimeout("tcp", b.url, b.timeout)
	if err != nil {
		return err
	}
	defer conn.Close()

	return writeMetrics(conn, mfs, b.prefix, model.Now())
}
Exemple #19
0
// NewMemorySeriesStorage returns a newly allocated Storage. Storage.Serve still
// has to be called to start the storage.
func NewMemorySeriesStorage(o *MemorySeriesStorageOptions) Storage {
	s := &memorySeriesStorage{
		fpLocker: newFingerprintLocker(1024),

		options: o,

		loopStopping:               make(chan struct{}),
		loopStopped:                make(chan struct{}),
		logThrottlingStopped:       make(chan struct{}),
		throttled:                  make(chan struct{}, 1),
		maxMemoryChunks:            o.MemoryChunks,
		dropAfter:                  o.PersistenceRetentionPeriod,
		checkpointInterval:         o.CheckpointInterval,
		checkpointDirtySeriesLimit: o.CheckpointDirtySeriesLimit,
		archiveHighWatermark:       model.Now().Add(-headChunkTimeout),

		maxChunksToPersist: o.MaxChunksToPersist,

		evictList:     list.New(),
		evictRequests: make(chan evictRequest, evictRequestsCap),
		evictStopping: make(chan struct{}),
		evictStopped:  make(chan struct{}),

		quarantineRequests: make(chan quarantineRequest, quarantineRequestsCap),
		quarantineStopping: make(chan struct{}),
		quarantineStopped:  make(chan struct{}),

		persistErrors: prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "persist_errors_total",
			Help:      "The total number of errors while persisting chunks.",
		}),
		numSeries: prometheus.NewGauge(prometheus.GaugeOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "memory_series",
			Help:      "The current number of series in memory.",
		}),
		seriesOps: prometheus.NewCounterVec(
			prometheus.CounterOpts{
				Namespace: namespace,
				Subsystem: subsystem,
				Name:      "series_ops_total",
				Help:      "The total number of series operations by their type.",
			},
			[]string{opTypeLabel},
		),
		ingestedSamplesCount: prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "ingested_samples_total",
			Help:      "The total number of samples ingested.",
		}),
		outOfOrderSamplesCount: prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "out_of_order_samples_total",
			Help:      "The total number of samples that were discarded because their timestamps were at or before the last received sample for a series.",
		}),
		nonExistentSeriesMatchesCount: prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "non_existent_series_matches_total",
			Help:      "How often a non-existent series was referred to during label matching or chunk preloading. This is an indication of outdated label indexes.",
		}),
		maintainSeriesDuration: prometheus.NewSummaryVec(
			prometheus.SummaryOpts{
				Namespace: namespace,
				Subsystem: subsystem,
				Name:      "maintain_series_duration_milliseconds",
				Help:      "The duration (in milliseconds) it took to perform maintenance on a series.",
			},
			[]string{seriesLocationLabel},
		),
		persistenceUrgencyScore: prometheus.NewGauge(prometheus.GaugeOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "persistence_urgency_score",
			Help:      "A score of urgency to persist chunks, 0 is least urgent, 1 most.",
		}),
		rushedMode: prometheus.NewGauge(prometheus.GaugeOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "rushed_mode",
			Help:      "1 if the storage is in rushed mode, 0 otherwise. In rushed mode, the system behaves as if the persistence_urgency_score is 1.",
		}),
	}
	return s
}
Exemple #20
0
// maintainMemorySeries maintains a series that is in memory (i.e. not
// archived). It returns true if the method has changed from clean to dirty
// (i.e. it is inconsistent with the latest checkpoint now so that in case of a
// crash a recovery operation that requires a disk seek needed to be applied).
//
// The method first closes the head chunk if it was not touched for the duration
// of headChunkTimeout.
//
// Then it determines the chunks that need to be purged and the chunks that need
// to be persisted. Depending on the result, it does the following:
//
// - If all chunks of a series need to be purged, the whole series is deleted
// for good and the method returns false. (Detecting non-existence of a series
// file does not require a disk seek.)
//
// - If any chunks need to be purged (but not all of them), it purges those
// chunks from memory and rewrites the series file on disk, leaving out the
// purged chunks and appending all chunks not yet persisted (with the exception
// of a still open head chunk).
//
// - If no chunks on disk need to be purged, but chunks need to be persisted,
// those chunks are simply appended to the existing series file (or the file is
// created if it does not exist yet).
//
// - If no chunks need to be purged and no chunks need to be persisted, nothing
// happens in this step.
//
// Next, the method checks if all chunks in the series are evicted. In that
// case, it archives the series and returns true.
//
// Finally, it evicts chunkDescs if there are too many.
func (s *memorySeriesStorage) maintainMemorySeries(
	fp model.Fingerprint, beforeTime model.Time,
) (becameDirty bool) {
	defer func(begin time.Time) {
		s.maintainSeriesDuration.WithLabelValues(maintainInMemory).Observe(
			float64(time.Since(begin)) / float64(time.Millisecond),
		)
	}(time.Now())

	s.fpLocker.Lock(fp)
	defer s.fpLocker.Unlock(fp)

	series, ok := s.fpToSeries.get(fp)
	if !ok {
		// Series is actually not in memory, perhaps archived or dropped in the meantime.
		return false
	}

	defer s.seriesOps.WithLabelValues(memoryMaintenance).Inc()

	if series.maybeCloseHeadChunk() {
		s.incNumChunksToPersist(1)
	}

	seriesWasDirty := series.dirty

	if s.writeMemorySeries(fp, series, beforeTime) {
		// Series is gone now, we are done.
		return false
	}

	iOldestNotEvicted := -1
	for i, cd := range series.chunkDescs {
		if !cd.isEvicted() {
			iOldestNotEvicted = i
			break
		}
	}

	// Archive if all chunks are evicted. Also make sure the last sample has
	// an age of at least headChunkTimeout (which is very likely anyway).
	if iOldestNotEvicted == -1 && model.Now().Sub(series.lastTime) > headChunkTimeout {
		s.fpToSeries.del(fp)
		s.numSeries.Dec()
		s.persistence.archiveMetric(fp, series.metric, series.firstTime(), series.lastTime)
		s.seriesOps.WithLabelValues(archive).Inc()
		oldWatermark := atomic.LoadInt64((*int64)(&s.archiveHighWatermark))
		if oldWatermark < int64(series.lastTime) {
			if !atomic.CompareAndSwapInt64(
				(*int64)(&s.archiveHighWatermark),
				oldWatermark, int64(series.lastTime),
			) {
				panic("s.archiveHighWatermark modified outside of maintainMemorySeries")
			}
		}
		return
	}
	// If we are here, the series is not archived, so check for chunkDesc
	// eviction next.
	series.evictChunkDescs(iOldestNotEvicted)

	return series.dirty && !seriesWasDirty
}
Exemple #21
0
// NewMemorySeriesStorage returns a newly allocated Storage. Storage.Serve still
// has to be called to start the storage.
func NewMemorySeriesStorage(o *MemorySeriesStorageOptions) *MemorySeriesStorage {
	s := &MemorySeriesStorage{
		fpLocker: newFingerprintLocker(o.NumMutexes),

		options: o,

		loopStopping:               make(chan struct{}),
		loopStopped:                make(chan struct{}),
		logThrottlingStopped:       make(chan struct{}),
		throttled:                  make(chan struct{}, 1),
		maxMemoryChunks:            o.MemoryChunks,
		dropAfter:                  o.PersistenceRetentionPeriod,
		checkpointInterval:         o.CheckpointInterval,
		checkpointDirtySeriesLimit: o.CheckpointDirtySeriesLimit,
		archiveHighWatermark:       model.Now().Add(-headChunkTimeout),

		maxChunksToPersist: o.MaxChunksToPersist,

		evictList:     list.New(),
		evictRequests: make(chan evictRequest, evictRequestsCap),
		evictStopping: make(chan struct{}),
		evictStopped:  make(chan struct{}),

		quarantineRequests: make(chan quarantineRequest, quarantineRequestsCap),
		quarantineStopping: make(chan struct{}),
		quarantineStopped:  make(chan struct{}),

		persistErrors: prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "persist_errors_total",
			Help:      "The total number of errors while persisting chunks.",
		}),
		numSeries: prometheus.NewGauge(prometheus.GaugeOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "memory_series",
			Help:      "The current number of series in memory.",
		}),
		seriesOps: prometheus.NewCounterVec(
			prometheus.CounterOpts{
				Namespace: namespace,
				Subsystem: subsystem,
				Name:      "series_ops_total",
				Help:      "The total number of series operations by their type.",
			},
			[]string{opTypeLabel},
		),
		ingestedSamplesCount: prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "ingested_samples_total",
			Help:      "The total number of samples ingested.",
		}),
		discardedSamplesCount: prometheus.NewCounterVec(
			prometheus.CounterOpts{
				Namespace: namespace,
				Subsystem: subsystem,
				Name:      "out_of_order_samples_total",
				Help:      "The total number of samples that were discarded because their timestamps were at or before the last received sample for a series.",
			},
			[]string{discardReasonLabel},
		),
		nonExistentSeriesMatchesCount: prometheus.NewCounter(prometheus.CounterOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "non_existent_series_matches_total",
			Help:      "How often a non-existent series was referred to during label matching or chunk preloading. This is an indication of outdated label indexes.",
		}),
		maintainSeriesDuration: prometheus.NewSummaryVec(
			prometheus.SummaryOpts{
				Namespace: namespace,
				Subsystem: subsystem,
				Name:      "maintain_series_duration_seconds",
				Help:      "The duration in seconds it took to perform maintenance on a series.",
			},
			[]string{seriesLocationLabel},
		),
		persistenceUrgencyScore: prometheus.NewGauge(prometheus.GaugeOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "persistence_urgency_score",
			Help:      "A score of urgency to persist chunks, 0 is least urgent, 1 most.",
		}),
		rushedMode: prometheus.NewGauge(prometheus.GaugeOpts{
			Namespace: namespace,
			Subsystem: subsystem,
			Name:      "rushed_mode",
			Help:      "1 if the storage is in rushed mode, 0 otherwise. In rushed mode, the system behaves as if the persistence_urgency_score is 1.",
		}),
	}

	// Initialize metric vectors.
	// TODO(beorn7): Rework once we have a utility function for it in client_golang.
	s.discardedSamplesCount.WithLabelValues(outOfOrderTimestamp)
	s.discardedSamplesCount.WithLabelValues(duplicateSample)
	s.maintainSeriesDuration.WithLabelValues(maintainInMemory)
	s.maintainSeriesDuration.WithLabelValues(maintainArchived)
	s.seriesOps.WithLabelValues(create)
	s.seriesOps.WithLabelValues(archive)
	s.seriesOps.WithLabelValues(unarchive)
	s.seriesOps.WithLabelValues(memoryPurge)
	s.seriesOps.WithLabelValues(archivePurge)
	s.seriesOps.WithLabelValues(requestedPurge)
	s.seriesOps.WithLabelValues(memoryMaintenance)
	s.seriesOps.WithLabelValues(archiveMaintenance)
	s.seriesOps.WithLabelValues(completedQurantine)
	s.seriesOps.WithLabelValues(droppedQuarantine)
	s.seriesOps.WithLabelValues(failedQuarantine)

	return s
}
Exemple #22
0
func TestQuarantineMetric(t *testing.T) {
	now := model.Now()
	insertStart := now.Add(-2 * time.Hour)

	s, closer := NewTestStorage(t, 2)
	defer closer.Close()

	chunkFileExists := func(fp model.Fingerprint) (bool, error) {
		f, err := s.persistence.openChunkFileForReading(fp)
		if err == nil {
			f.Close()
			return true, nil
		}
		if os.IsNotExist(err) {
			return false, nil
		}
		return false, err
	}

	m1 := model.Metric{model.MetricNameLabel: "test", "n1": "v1"}
	m2 := model.Metric{model.MetricNameLabel: "test", "n1": "v2"}
	m3 := model.Metric{model.MetricNameLabel: "test", "n1": "v3"}

	N := 120000

	for j, m := range []model.Metric{m1, m2, m3} {
		for i := 0; i < N; i++ {
			smpl := &model.Sample{
				Metric:    m,
				Timestamp: insertStart.Add(time.Duration(i) * time.Millisecond), // 1 millisecond intervals.
				Value:     model.SampleValue(j),
			}
			s.Append(smpl)
		}
	}
	s.WaitForIndexing()

	// Archive m3, but first maintain it so that at least something is written to disk.
	fpToBeArchived := m3.FastFingerprint()
	s.maintainMemorySeries(fpToBeArchived, 0)
	s.fpLocker.Lock(fpToBeArchived)
	s.fpToSeries.del(fpToBeArchived)
	s.persistence.archiveMetric(fpToBeArchived, m3, 0, insertStart.Add(time.Duration(N-1)*time.Millisecond))
	s.fpLocker.Unlock(fpToBeArchived)

	// Corrupt the series file for m3.
	f, err := os.Create(s.persistence.fileNameForFingerprint(fpToBeArchived))
	if err != nil {
		t.Fatal(err)
	}
	if _, err := f.WriteString("This is clearly not the content of a series file."); err != nil {
		t.Fatal(err)
	}
	if f.Close(); err != nil {
		t.Fatal(err)
	}

	fps := s.fingerprintsForLabelPairs(model.LabelPair{Name: model.MetricNameLabel, Value: "test"})
	if len(fps) != 3 {
		t.Errorf("unexpected number of fingerprints: %d", len(fps))
	}

	pl := s.NewPreloader()
	// This will access the corrupt file and lead to quarantining.
	pl.PreloadInstant(fpToBeArchived, now.Add(-2*time.Hour), time.Minute)
	pl.Close()
	time.Sleep(time.Second) // Give time to quarantine. TODO(beorn7): Find a better way to wait.
	s.WaitForIndexing()

	fps2 := s.fingerprintsForLabelPairs(model.LabelPair{
		Name: model.MetricNameLabel, Value: "test",
	})
	if len(fps2) != 2 {
		t.Errorf("unexpected number of fingerprints: %d", len(fps2))
	}

	exists, err := chunkFileExists(fpToBeArchived)
	if err != nil {
		t.Fatal(err)
	}
	if exists {
		t.Errorf("chunk file exists for fp=%v", fpToBeArchived)
	}
}
Exemple #23
0
func TestDropMetrics(t *testing.T) {
	now := model.Now()
	insertStart := now.Add(-2 * time.Hour)

	s, closer := NewTestStorage(t, 1)
	defer closer.Close()

	chunkFileExists := func(fp model.Fingerprint) (bool, error) {
		f, err := s.persistence.openChunkFileForReading(fp)
		if err == nil {
			f.Close()
			return true, nil
		}
		if os.IsNotExist(err) {
			return false, nil
		}
		return false, err
	}

	m1 := model.Metric{model.MetricNameLabel: "test", "n1": "v1"}
	m2 := model.Metric{model.MetricNameLabel: "test", "n1": "v2"}
	m3 := model.Metric{model.MetricNameLabel: "test", "n1": "v3"}

	N := 120000

	for j, m := range []model.Metric{m1, m2, m3} {
		for i := 0; i < N; i++ {
			smpl := &model.Sample{
				Metric:    m,
				Timestamp: insertStart.Add(time.Duration(i) * time.Millisecond), // 1 millisecond intervals.
				Value:     model.SampleValue(j),
			}
			s.Append(smpl)
		}
	}
	s.WaitForIndexing()

	// Archive m3, but first maintain it so that at least something is written to disk.
	fpToBeArchived := m3.FastFingerprint()
	s.maintainMemorySeries(fpToBeArchived, 0)
	s.fpLocker.Lock(fpToBeArchived)
	s.fpToSeries.del(fpToBeArchived)
	if err := s.persistence.archiveMetric(
		fpToBeArchived, m3, 0, insertStart.Add(time.Duration(N-1)*time.Millisecond),
	); err != nil {
		t.Error(err)
	}
	s.fpLocker.Unlock(fpToBeArchived)

	fps := s.fingerprintsForLabelPairs(model.LabelPair{Name: model.MetricNameLabel, Value: "test"})
	if len(fps) != 3 {
		t.Errorf("unexpected number of fingerprints: %d", len(fps))
	}

	fpList := model.Fingerprints{m1.FastFingerprint(), m2.FastFingerprint(), fpToBeArchived}

	s.DropMetricsForFingerprints(fpList[0])
	s.WaitForIndexing()

	fps2 := s.fingerprintsForLabelPairs(model.LabelPair{
		Name: model.MetricNameLabel, Value: "test",
	})
	if len(fps2) != 2 {
		t.Errorf("unexpected number of fingerprints: %d", len(fps2))
	}

	it := s.NewIterator(fpList[0])
	if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != 0 {
		t.Errorf("unexpected number of samples: %d", len(vals))
	}
	it = s.NewIterator(fpList[1])
	if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != N {
		t.Errorf("unexpected number of samples: %d", len(vals))
	}
	exists, err := chunkFileExists(fpList[2])
	if err != nil {
		t.Fatal(err)
	}
	if !exists {
		t.Errorf("chunk file does not exist for fp=%v", fpList[2])
	}

	s.DropMetricsForFingerprints(fpList...)
	s.WaitForIndexing()

	fps3 := s.fingerprintsForLabelPairs(model.LabelPair{
		Name: model.MetricNameLabel, Value: "test",
	})
	if len(fps3) != 0 {
		t.Errorf("unexpected number of fingerprints: %d", len(fps3))
	}

	it = s.NewIterator(fpList[0])
	if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != 0 {
		t.Errorf("unexpected number of samples: %d", len(vals))
	}
	it = s.NewIterator(fpList[1])
	if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != 0 {
		t.Errorf("unexpected number of samples: %d", len(vals))
	}
	exists, err = chunkFileExists(fpList[2])
	if err != nil {
		t.Fatal(err)
	}
	if exists {
		t.Errorf("chunk file still exists for fp=%v", fpList[2])
	}
}
Exemple #24
0
// eval runs a single evaluation cycle in which all rules are evaluated in parallel.
// In the future a single group will be evaluated sequentially to properly handle
// rule dependency.
func (g *Group) eval() {
	var (
		now = model.Now()
		wg  sync.WaitGroup
	)

	for _, rule := range g.rules {
		rtyp := string(typeForRule(rule))

		wg.Add(1)
		// BUG(julius): Look at fixing thundering herd.
		go func(rule Rule) {
			defer wg.Done()

			defer func(t time.Time) {
				evalDuration.WithLabelValues(rtyp).Observe(time.Since(t).Seconds())
			}(time.Now())

			evalTotal.WithLabelValues(rtyp).Inc()

			vector, err := rule.eval(g.opts.Context, now, g.opts.QueryEngine, g.opts.ExternalURL.Path)
			if err != nil {
				// Canceled queries are intentional termination of queries. This normally
				// happens on shutdown and thus we skip logging of any errors here.
				if _, ok := err.(promql.ErrQueryCanceled); !ok {
					log.Warnf("Error while evaluating rule %q: %s", rule, err)
				}
				evalFailures.WithLabelValues(rtyp).Inc()
				return
			}

			if ar, ok := rule.(*AlertingRule); ok {
				g.sendAlerts(ar, now)
			}
			var (
				numOutOfOrder = 0
				numDuplicates = 0
			)
			for _, s := range vector {
				if err := g.opts.SampleAppender.Append(s); err != nil {
					switch err {
					case local.ErrOutOfOrderSample:
						numOutOfOrder++
						log.With("sample", s).With("error", err).Debug("Rule evaluation result discarded")
					case local.ErrDuplicateSampleForTimestamp:
						numDuplicates++
						log.With("sample", s).With("error", err).Debug("Rule evaluation result discarded")
					default:
						log.With("sample", s).With("error", err).Warn("Rule evaluation result discarded")
					}
				}
			}
			if numOutOfOrder > 0 {
				log.With("numDropped", numOutOfOrder).Warn("Error on ingesting out-of-order result from rule evaluation")
			}
			if numDuplicates > 0 {
				log.With("numDropped", numDuplicates).Warn("Error on ingesting results from rule evaluation with different value but same timestamp")
			}
		}(rule)
	}
	wg.Wait()
}
Exemple #25
0
func testEvictAndLoadChunkDescs(t *testing.T, encoding chunkEncoding) {
	samples := make(model.Samples, 10000)
	for i := range samples {
		samples[i] = &model.Sample{
			Timestamp: model.Time(2 * i),
			Value:     model.SampleValue(float64(i * i)),
		}
	}
	// Give last sample a timestamp of now so that the head chunk will not
	// be closed (which would then archive the time series later as
	// everything will get evicted).
	samples[len(samples)-1] = &model.Sample{
		Timestamp: model.Now(),
		Value:     model.SampleValue(3.14),
	}

	s, closer := NewTestStorage(t, encoding)
	defer closer.Close()

	// Adjust memory chunks to lower value to see evictions.
	s.maxMemoryChunks = 1

	for _, sample := range samples {
		s.Append(sample)
	}
	s.WaitForIndexing()

	fp := model.Metric{}.FastFingerprint()

	series, ok := s.fpToSeries.get(fp)
	if !ok {
		t.Fatal("could not find series")
	}

	oldLen := len(series.chunkDescs)
	// Maintain series without any dropped chunks.
	s.maintainMemorySeries(fp, 0)
	// Give the evict goroutine an opportunity to run.
	time.Sleep(50 * time.Millisecond)
	// Maintain series again to trigger chunkDesc eviction
	s.maintainMemorySeries(fp, 0)

	if oldLen <= len(series.chunkDescs) {
		t.Errorf("Expected number of chunkDescs to decrease, old number %d, current number %d.", oldLen, len(series.chunkDescs))
	}

	// Load everything back.
	p := s.NewPreloader()
	p.PreloadRange(fp, 0, 100000, time.Hour)

	if oldLen != len(series.chunkDescs) {
		t.Errorf("Expected number of chunkDescs to have reached old value again, old number %d, current number %d.", oldLen, len(series.chunkDescs))
	}

	p.Close()

	// Now maintain series with drops to make sure nothing crazy happens.
	s.maintainMemorySeries(fp, 100000)

	if len(series.chunkDescs) != 1 {
		t.Errorf("Expected exactly one chunkDesc left, got %d.", len(series.chunkDescs))
	}
}
func TestDropMetrics(t *testing.T) {
	now := model.Now()
	insertStart := now.Add(-2 * time.Hour)

	s, closer := NewTestStorage(t, 1)
	defer closer.Close()

	m1 := model.Metric{model.MetricNameLabel: "test", "n1": "v1"}
	m2 := model.Metric{model.MetricNameLabel: "test", "n1": "v2"}

	N := 120000

	for j, m := range []model.Metric{m1, m2} {
		for i := 0; i < N; i++ {
			smpl := &model.Sample{
				Metric:    m,
				Timestamp: insertStart.Add(time.Duration(i) * time.Millisecond), // 1 minute intervals.
				Value:     model.SampleValue(j),
			}
			s.Append(smpl)
		}
	}
	s.WaitForIndexing()

	fps := s.fingerprintsForLabelPairs(model.LabelPair{Name: model.MetricNameLabel, Value: "test"})
	if len(fps) != 2 {
		t.Fatalf("unexpected number of fingerprints: %d", len(fps))
	}

	var fpList model.Fingerprints
	for fp := range fps {
		it := s.NewIterator(fp)
		if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != N {
			t.Fatalf("unexpected number of samples: %d", len(vals))
		}
		fpList = append(fpList, fp)
	}

	s.DropMetricsForFingerprints(fpList[0])
	s.WaitForIndexing()

	fps2 := s.fingerprintsForLabelPairs(model.LabelPair{
		Name: model.MetricNameLabel, Value: "test",
	})
	if len(fps2) != 1 {
		t.Fatalf("unexpected number of fingerprints: %d", len(fps2))
	}

	it := s.NewIterator(fpList[0])
	if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != 0 {
		t.Fatalf("unexpected number of samples: %d", len(vals))
	}
	it = s.NewIterator(fpList[1])
	if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != N {
		t.Fatalf("unexpected number of samples: %d", len(vals))
	}

	s.DropMetricsForFingerprints(fpList...)
	s.WaitForIndexing()

	fps3 := s.fingerprintsForLabelPairs(model.LabelPair{
		Name: model.MetricNameLabel, Value: "test",
	})
	if len(fps3) != 0 {
		t.Fatalf("unexpected number of fingerprints: %d", len(fps3))
	}

	it = s.NewIterator(fpList[0])
	if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != 0 {
		t.Fatalf("unexpected number of samples: %d", len(vals))
	}
	it = s.NewIterator(fpList[1])
	if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != 0 {
		t.Fatalf("unexpected number of samples: %d", len(vals))
	}
}
func TestRetentionCutoff(t *testing.T) {
	now := model.Now()
	insertStart := now.Add(-2 * time.Hour)

	s, closer := NewTestStorage(t, 1)
	defer closer.Close()

	// Stop maintenance loop to prevent actual purging.
	s.loopStopping <- struct{}{}

	s.dropAfter = 1 * time.Hour

	for i := 0; i < 120; i++ {
		smpl := &model.Sample{
			Metric:    model.Metric{"job": "test"},
			Timestamp: insertStart.Add(time.Duration(i) * time.Minute), // 1 minute intervals.
			Value:     1,
		}
		s.Append(smpl)
	}
	s.WaitForIndexing()

	var fp model.Fingerprint
	for f := range s.fingerprintsForLabelPairs(model.LabelPair{Name: "job", Value: "test"}) {
		fp = f
		break
	}

	pl := s.NewPreloader()
	defer pl.Close()

	// Preload everything.
	err := pl.PreloadRange(fp, insertStart, now, 5*time.Minute)
	if err != nil {
		t.Fatalf("Error preloading outdated chunks: %s", err)
	}

	it := s.NewIterator(fp)

	vals := it.ValueAtTime(now.Add(-61 * time.Minute))
	if len(vals) != 0 {
		t.Errorf("unexpected result for timestamp before retention period")
	}

	vals = it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now})
	// We get 59 values here because the model.Now() is slightly later
	// than our now.
	if len(vals) != 59 {
		t.Errorf("expected 59 values but got %d", len(vals))
	}
	if expt := now.Add(-1 * time.Hour).Add(time.Minute); vals[0].Timestamp != expt {
		t.Errorf("unexpected timestamp for first sample: %v, expected %v", vals[0].Timestamp.Time(), expt.Time())
	}

	vals = it.BoundaryValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now})
	if len(vals) != 2 {
		t.Errorf("expected 2 values but got %d", len(vals))
	}
	if expt := now.Add(-1 * time.Hour).Add(time.Minute); vals[0].Timestamp != expt {
		t.Errorf("unexpected timestamp for first sample: %v, expected %v", vals[0].Timestamp.Time(), expt.Time())
	}
}
Exemple #28
0
func TestTextDecoder(t *testing.T) {
	var (
		ts = model.Now()
		in = `
# Only a quite simple scenario with two metric families.
# More complicated tests of the parser itself can be found in the text package.
# TYPE mf2 counter
mf2 3
mf1{label="value1"} -3.14 123456
mf1{label="value2"} 42
mf2 4
`
		out = model.Vector{
			&model.Sample{
				Metric: model.NewMetric(model.LabelSet{
					model.MetricNameLabel: "mf1",
					"label":               "value1",
				}),
				Value:     -3.14,
				Timestamp: 123456,
			},
			&model.Sample{
				Metric: model.NewMetric(model.LabelSet{
					model.MetricNameLabel: "mf1",
					"label":               "value2",
				}),
				Value:     42,
				Timestamp: ts,
			},
			&model.Sample{
				Metric: model.NewMetric(model.LabelSet{
					model.MetricNameLabel: "mf2",
				}),
				Value:     3,
				Timestamp: ts,
			},
			&model.Sample{
				Metric: model.NewMetric(model.LabelSet{
					model.MetricNameLabel: "mf2",
				}),
				Value:     4,
				Timestamp: ts,
			},
		}
	)

	dec := &SampleDecoder{
		Dec: &textDecoder{r: strings.NewReader(in)},
		Opts: &DecodeOptions{
			Timestamp: ts,
		},
	}
	var all model.Vector
	for {
		var smpls model.Vector
		err := dec.Decode(&smpls)
		if err == io.EOF {
			break
		}
		if err != nil {
			t.Fatal(err)
		}
		all = append(all, smpls...)
	}
	sort.Sort(all)
	sort.Sort(out)
	if !reflect.DeepEqual(all, out) {
		t.Fatalf("output does not match")
	}
}
Exemple #29
0
func createRandomSamples(metricName string, minLen int) model.Samples {
	type valueCreator func() model.SampleValue
	type deltaApplier func(model.SampleValue) model.SampleValue

	var (
		maxMetrics         = 5
		maxStreakLength    = 500
		maxTimeDelta       = 10000
		maxTimeDeltaFactor = 10
		timestamp          = model.Now() - model.Time(maxTimeDelta*maxTimeDeltaFactor*minLen/4) // So that some timestamps are in the future.
		generators         = []struct {
			createValue valueCreator
			applyDelta  []deltaApplier
		}{
			{ // "Boolean".
				createValue: func() model.SampleValue {
					return model.SampleValue(rand.Intn(2))
				},
				applyDelta: []deltaApplier{
					func(_ model.SampleValue) model.SampleValue {
						return model.SampleValue(rand.Intn(2))
					},
				},
			},
			{ // Integer with int deltas of various byte length.
				createValue: func() model.SampleValue {
					return model.SampleValue(rand.Int63() - 1<<62)
				},
				applyDelta: []deltaApplier{
					func(v model.SampleValue) model.SampleValue {
						return model.SampleValue(rand.Intn(1<<8) - 1<<7 + int(v))
					},
					func(v model.SampleValue) model.SampleValue {
						return model.SampleValue(rand.Intn(1<<16) - 1<<15 + int(v))
					},
					func(v model.SampleValue) model.SampleValue {
						return model.SampleValue(rand.Int63n(1<<32) - 1<<31 + int64(v))
					},
				},
			},
			{ // Float with float32 and float64 deltas.
				createValue: func() model.SampleValue {
					return model.SampleValue(rand.NormFloat64())
				},
				applyDelta: []deltaApplier{
					func(v model.SampleValue) model.SampleValue {
						return v + model.SampleValue(float32(rand.NormFloat64()))
					},
					func(v model.SampleValue) model.SampleValue {
						return v + model.SampleValue(rand.NormFloat64())
					},
				},
			},
		}
	)

	// Prefill result with two samples with colliding metrics (to test fingerprint mapping).
	result := model.Samples{
		&model.Sample{
			Metric: model.Metric{
				"instance": "ip-10-33-84-73.l05.ams5.s-cloud.net:24483",
				"status":   "503",
			},
			Value:     42,
			Timestamp: timestamp,
		},
		&model.Sample{
			Metric: model.Metric{
				"instance": "ip-10-33-84-73.l05.ams5.s-cloud.net:24480",
				"status":   "500",
			},
			Value:     2010,
			Timestamp: timestamp + 1,
		},
	}

	metrics := []model.Metric{}
	for n := rand.Intn(maxMetrics); n >= 0; n-- {
		metrics = append(metrics, model.Metric{
			model.MetricNameLabel:                             model.LabelValue(metricName),
			model.LabelName(fmt.Sprintf("labelname_%d", n+1)): model.LabelValue(fmt.Sprintf("labelvalue_%d", rand.Int())),
		})
	}

	for len(result) < minLen {
		// Pick a metric for this cycle.
		metric := metrics[rand.Intn(len(metrics))]
		timeDelta := rand.Intn(maxTimeDelta) + 1
		generator := generators[rand.Intn(len(generators))]
		createValue := generator.createValue
		applyDelta := generator.applyDelta[rand.Intn(len(generator.applyDelta))]
		incTimestamp := func() { timestamp += model.Time(timeDelta * (rand.Intn(maxTimeDeltaFactor) + 1)) }
		switch rand.Intn(4) {
		case 0: // A single sample.
			result = append(result, &model.Sample{
				Metric:    metric,
				Value:     createValue(),
				Timestamp: timestamp,
			})
			incTimestamp()
		case 1: // A streak of random sample values.
			for n := rand.Intn(maxStreakLength); n >= 0; n-- {
				result = append(result, &model.Sample{
					Metric:    metric,
					Value:     createValue(),
					Timestamp: timestamp,
				})
				incTimestamp()
			}
		case 2: // A streak of sample values with incremental changes.
			value := createValue()
			for n := rand.Intn(maxStreakLength); n >= 0; n-- {
				result = append(result, &model.Sample{
					Metric:    metric,
					Value:     value,
					Timestamp: timestamp,
				})
				incTimestamp()
				value = applyDelta(value)
			}
		case 3: // A streak of constant sample values.
			value := createValue()
			for n := rand.Intn(maxStreakLength); n >= 0; n-- {
				result = append(result, &model.Sample{
					Metric:    metric,
					Value:     value,
					Timestamp: timestamp,
				})
				incTimestamp()
			}
		}
	}

	return result
}
Exemple #30
0
func TestProtoDecoder(t *testing.T) {

	var testTime = model.Now()

	scenarios := []struct {
		in       string
		expected model.Vector
	}{
		{
			in: "",
		},
		{
			in: "\x8f\x01\n\rrequest_count\x12\x12Number of requests\x18\x00\"0\n#\n\x0fsome_label_name\x12\x10some_label_value\x1a\t\t\x00\x00\x00\x00\x00\x00E\xc0\"6\n)\n\x12another_label_name\x12\x13another_label_value\x1a\t\t\x00\x00\x00\x00\x00\x00U@",
			expected: model.Vector{
				&model.Sample{
					Metric: model.NewMetric(model.LabelSet{
						model.MetricNameLabel: "request_count",
						"some_label_name":     "some_label_value",
					}),
					Value:     -42,
					Timestamp: testTime,
				},
				&model.Sample{
					Metric: model.NewMetric(model.LabelSet{
						model.MetricNameLabel: "request_count",
						"another_label_name":  "another_label_value",
					}),
					Value:     84,
					Timestamp: testTime,
				},
			},
		},
		{
			in: "\xb9\x01\n\rrequest_count\x12\x12Number of requests\x18\x02\"O\n#\n\x0fsome_label_name\x12\x10some_label_value\"(\x1a\x12\t\xaeG\xe1z\x14\xae\xef?\x11\x00\x00\x00\x00\x00\x00E\xc0\x1a\x12\t+\x87\x16\xd9\xce\xf7\xef?\x11\x00\x00\x00\x00\x00\x00U\xc0\"A\n)\n\x12another_label_name\x12\x13another_label_value\"\x14\x1a\x12\t\x00\x00\x00\x00\x00\x00\xe0?\x11\x00\x00\x00\x00\x00\x00$@",
			expected: model.Vector{
				&model.Sample{
					Metric: model.NewMetric(model.LabelSet{
						model.MetricNameLabel: "request_count",
						"some_label_name":     "some_label_value",
						"quantile":            "0.99",
					}),
					Value:     -42,
					Timestamp: testTime,
				},
				&model.Sample{
					Metric: model.NewMetric(model.LabelSet{
						model.MetricNameLabel: "request_count",
						"some_label_name":     "some_label_value",
						"quantile":            "0.999",
					}),
					Value:     -84,
					Timestamp: testTime,
				},
				&model.Sample{
					Metric: model.NewMetric(model.LabelSet{
						model.MetricNameLabel: "request_count",
						"another_label_name":  "another_label_value",
						"quantile":            "0.5",
					}),
					Value:     10,
					Timestamp: testTime,
				},
			},
		},
		{
			in: "\x8d\x01\n\x1drequest_duration_microseconds\x12\x15The response latency.\x18\x04\"S:Q\b\x85\x15\x11\xcd\xcc\xccL\x8f\xcb:A\x1a\v\b{\x11\x00\x00\x00\x00\x00\x00Y@\x1a\f\b\x9c\x03\x11\x00\x00\x00\x00\x00\x00^@\x1a\f\b\xd0\x04\x11\x00\x00\x00\x00\x00\x00b@\x1a\f\b\xf4\v\x11\x9a\x99\x99\x99\x99\x99e@\x1a\f\b\x85\x15\x11\x00\x00\x00\x00\x00\x00\xf0\u007f",
			expected: model.Vector{
				&model.Sample{
					Metric: model.NewMetric(model.LabelSet{
						model.MetricNameLabel: "request_duration_microseconds_bucket",
						"le": "100",
					}),
					Value:     123,
					Timestamp: testTime,
				},
				&model.Sample{
					Metric: model.NewMetric(model.LabelSet{
						model.MetricNameLabel: "request_duration_microseconds_bucket",
						"le": "120",
					}),
					Value:     412,
					Timestamp: testTime,
				},
				&model.Sample{
					Metric: model.NewMetric(model.LabelSet{
						model.MetricNameLabel: "request_duration_microseconds_bucket",
						"le": "144",
					}),
					Value:     592,
					Timestamp: testTime,
				},
				&model.Sample{
					Metric: model.NewMetric(model.LabelSet{
						model.MetricNameLabel: "request_duration_microseconds_bucket",
						"le": "172.8",
					}),
					Value:     1524,
					Timestamp: testTime,
				},
				&model.Sample{
					Metric: model.NewMetric(model.LabelSet{
						model.MetricNameLabel: "request_duration_microseconds_bucket",
						"le": "+Inf",
					}),
					Value:     2693,
					Timestamp: testTime,
				},
				&model.Sample{
					Metric: model.NewMetric(model.LabelSet{
						model.MetricNameLabel: "request_duration_microseconds_sum",
					}),
					Value:     1756047.3,
					Timestamp: testTime,
				},
				&model.Sample{
					Metric: model.NewMetric(model.LabelSet{
						model.MetricNameLabel: "request_duration_microseconds_count",
					}),
					Value:     2693,
					Timestamp: testTime,
				},
			},
		},
	}

	for _, scenario := range scenarios {
		dec := &SampleDecoder{
			Dec: &protoDecoder{r: strings.NewReader(scenario.in)},
			Opts: &DecodeOptions{
				Timestamp: testTime,
			},
		}

		var all model.Vector
		for {
			var smpls model.Vector
			err := dec.Decode(&smpls)
			if err == io.EOF {
				break
			}
			if err != nil {
				t.Fatal(err)
			}
			all = append(all, smpls...)
		}
		sort.Sort(all)
		sort.Sort(scenario.expected)
		if !reflect.DeepEqual(all, scenario.expected) {
			t.Fatalf("output does not match")
		}
	}
}