func (s *memorySeriesStorage) loop() { checkpointTimer := time.NewTimer(s.checkpointInterval) dirtySeriesCount := 0 defer func() { checkpointTimer.Stop() log.Info("Maintenance loop stopped.") close(s.loopStopped) }() memoryFingerprints := s.cycleThroughMemoryFingerprints() archivedFingerprints := s.cycleThroughArchivedFingerprints() loop: for { select { case <-s.loopStopping: break loop case <-checkpointTimer.C: err := s.persistence.checkpointSeriesMapAndHeads(s.fpToSeries, s.fpLocker) if err != nil { log.Errorln("Error while checkpointing:", err) } else { dirtySeriesCount = 0 } // If a checkpoint takes longer than checkpointInterval, unluckily timed // combination with the Reset(0) call below can lead to a case where a // time is lurking in C leading to repeated checkpointing without break. select { case <-checkpointTimer.C: // Get rid of the lurking time. default: } checkpointTimer.Reset(s.checkpointInterval) case fp := <-memoryFingerprints: if s.maintainMemorySeries(fp, model.Now().Add(-s.dropAfter)) { dirtySeriesCount++ // Check if we have enough "dirty" series so that we need an early checkpoint. // However, if we are already behind persisting chunks, creating a checkpoint // would be counterproductive, as it would slow down chunk persisting even more, // while in a situation like that, where we are clearly lacking speed of disk // maintenance, the best we can do for crash recovery is to persist chunks as // quickly as possible. So only checkpoint if the urgency score is < 1. if dirtySeriesCount >= s.checkpointDirtySeriesLimit && s.calculatePersistenceUrgencyScore() < 1 { checkpointTimer.Reset(0) } } case fp := <-archivedFingerprints: s.maintainArchivedSeries(fp, model.Now().Add(-s.dropAfter)) } } // Wait until both channels are closed. for range memoryFingerprints { } for range archivedFingerprints { } }
func (s *memorySeriesStorage) loop() { checkpointTimer := time.NewTimer(s.checkpointInterval) dirtySeriesCount := 0 defer func() { checkpointTimer.Stop() log.Info("Maintenance loop stopped.") close(s.loopStopped) }() memoryFingerprints := s.cycleThroughMemoryFingerprints() archivedFingerprints := s.cycleThroughArchivedFingerprints() loop: for { select { case <-s.loopStopping: break loop case <-checkpointTimer.C: err := s.persistence.checkpointSeriesMapAndHeads(s.fpToSeries, s.fpLocker) if err != nil { log.Errorln("Error while checkpointing:", err) } else { dirtySeriesCount = 0 } checkpointTimer.Reset(s.checkpointInterval) case fp := <-memoryFingerprints: if s.maintainMemorySeries(fp, model.Now().Add(-s.dropAfter)) { dirtySeriesCount++ // Check if we have enough "dirty" series so that we need an early checkpoint. // However, if we are already behind persisting chunks, creating a checkpoint // would be counterproductive, as it would slow down chunk persisting even more, // while in a situation like that, where we are clearly lacking speed of disk // maintenance, the best we can do for crash recovery is to persist chunks as // quickly as possible. So only checkpoint if the storage is not in "graceful // degradation mode". if dirtySeriesCount >= s.checkpointDirtySeriesLimit && !s.isDegraded() { checkpointTimer.Reset(0) } } case fp := <-archivedFingerprints: s.maintainArchivedSeries(fp, model.Now().Add(-s.dropAfter)) } } // Wait until both channels are closed. for range memoryFingerprints { } for range archivedFingerprints { } }
func (api *API) alerts(r *http.Request) (interface{}, *apiError) { // Generate snapshot of notifications for all current alerts var reqs notification.NotificationReqs now := model.Now() for _, rule := range api.RuleManager.AlertingRules() { reqs = append(reqs, api.RuleManager.GetRuleAlertNotifications(rule, now)...) } // Generate an alert map alerts := make([]map[string]interface{}, 0, len(reqs)) for _, req := range reqs { alerts = append(alerts, map[string]interface{}{ "summary": req.Summary, "description": req.Description, "runbook": req.Runbook, "labels": req.Labels, "payload": map[string]interface{}{ "value": req.Value, "activeSince": req.ActiveSince, "generatorURL": req.GeneratorURL, "alertingRule": req.RuleString, }, }) } return alerts, nil }
// preloadChunksForInstant preloads chunks for the latest value in the given // range. If the last sample saved in the memorySeries itself is the latest // value in the given range, it will in fact preload zero chunks and just take // that value. func (s *memorySeries) preloadChunksForInstant( fp model.Fingerprint, from model.Time, through model.Time, mss *MemorySeriesStorage, ) (SeriesIterator, error) { // If we have a lastSamplePair in the series, and thas last samplePair // is in the interval, just take it in a singleSampleSeriesIterator. No // need to pin or load anything. lastSample := s.lastSamplePair() if !through.Before(lastSample.Timestamp) && !from.After(lastSample.Timestamp) && lastSample != model.ZeroSamplePair { iter := &boundedIterator{ it: &singleSampleSeriesIterator{ samplePair: lastSample, metric: s.metric, }, start: model.Now().Add(-mss.dropAfter), } return iter, nil } // If we are here, we are out of luck and have to delegate to the more // expensive method. return s.preloadChunksForRange(fp, from, through, mss) }
func TestRetentionCutoff(t *testing.T) { now := model.Now() insertStart := now.Add(-2 * time.Hour) s, closer := NewTestStorage(t, 2) defer closer.Close() // Stop maintenance loop to prevent actual purging. close(s.loopStopping) <-s.loopStopped <-s.logThrottlingStopped // Recreate channel to avoid panic when we really shut down. s.loopStopping = make(chan struct{}) s.dropAfter = 1 * time.Hour for i := 0; i < 120; i++ { smpl := &model.Sample{ Metric: model.Metric{"job": "test"}, Timestamp: insertStart.Add(time.Duration(i) * time.Minute), // 1 minute intervals. Value: 1, } s.Append(smpl) } s.WaitForIndexing() var fp model.Fingerprint for f := range s.fingerprintsForLabelPairs(model.LabelPair{Name: "job", Value: "test"}) { fp = f break } pl := s.NewPreloader() defer pl.Close() // Preload everything. it := pl.PreloadRange(fp, insertStart, now) val := it.ValueAtOrBeforeTime(now.Add(-61 * time.Minute)) if val.Timestamp != model.Earliest { t.Errorf("unexpected result for timestamp before retention period") } vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}) // We get 59 values here because the model.Now() is slightly later // than our now. if len(vals) != 59 { t.Errorf("expected 59 values but got %d", len(vals)) } if expt := now.Add(-1 * time.Hour).Add(time.Minute); vals[0].Timestamp != expt { t.Errorf("unexpected timestamp for first sample: %v, expected %v", vals[0].Timestamp.Time(), expt.Time()) } }
func (m *Manager) runIteration() { now := model.Now() wg := sync.WaitGroup{} m.Lock() rulesSnapshot := make([]Rule, len(m.rules)) copy(rulesSnapshot, m.rules) m.Unlock() for _, rule := range rulesSnapshot { wg.Add(1) // BUG(julius): Look at fixing thundering herd. go func(rule Rule) { defer wg.Done() start := time.Now() vector, err := rule.eval(now, m.queryEngine) duration := time.Since(start) if err != nil { evalFailures.Inc() log.Warnf("Error while evaluating rule %q: %s", rule, err) return } switch r := rule.(type) { case *AlertingRule: m.queueAlertNotifications(r, now) evalDuration.WithLabelValues(ruleTypeAlerting).Observe( float64(duration / time.Millisecond), ) case *RecordingRule: evalDuration.WithLabelValues(ruleTypeRecording).Observe( float64(duration / time.Millisecond), ) default: panic(fmt.Errorf("Unknown rule type: %T", rule)) } for _, s := range vector { m.sampleAppender.Append(&model.Sample{ Metric: s.Metric.Metric, Value: s.Value, Timestamp: s.Timestamp, }) } }(rule) } wg.Wait() }
// eval runs a single evaluation cycle in which all rules are evaluated in parallel. // In the future a single group will be evaluated sequentially to properly handle // rule dependency. func (g *Group) eval() { var ( now = model.Now() wg sync.WaitGroup ) for _, rule := range g.rules { wg.Add(1) // BUG(julius): Look at fixing thundering herd. go func(rule Rule) { defer wg.Done() start := time.Now() evalTotal.Inc() vector, err := rule.eval(now, g.opts.QueryEngine) if err != nil { // Canceled queries are intentional termination of queries. This normally // happens on shutdown and thus we skip logging of any errors here. if _, ok := err.(promql.ErrQueryCanceled); !ok { log.Warnf("Error while evaluating rule %q: %s", rule, err) } evalFailures.Inc() } var rtyp ruleType switch r := rule.(type) { case *AlertingRule: rtyp = ruleTypeRecording g.sendAlerts(r, now) case *RecordingRule: rtyp = ruleTypeAlert default: panic(fmt.Errorf("unknown rule type: %T", rule)) } evalDuration.WithLabelValues(string(rtyp)).Observe( float64(time.Since(start)) / float64(time.Second), ) for _, s := range vector { g.opts.SampleAppender.Append(s) } }(rule) } wg.Wait() }
func (h *Handler) consoles(w http.ResponseWriter, r *http.Request) { ctx := route.Context(r) name := route.Param(ctx, "filepath") file, err := http.Dir(h.options.ConsoleTemplatesPath).Open(name) if err != nil { http.Error(w, err.Error(), http.StatusNotFound) return } text, err := ioutil.ReadAll(file) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } // Provide URL parameters as a map for easy use. Advanced users may have need for // parameters beyond the first, so provide RawParams. rawParams, err := url.ParseQuery(r.URL.RawQuery) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } params := map[string]string{} for k, v := range rawParams { params[k] = v[0] } data := struct { RawParams url.Values Params map[string]string Path string }{ RawParams: rawParams, Params: params, Path: strings.TrimLeft(name, "/"), } tmpl := template.NewTemplateExpander(string(text), "__console_"+name, data, model.Now(), h.queryEngine, h.options.ExternalURL.Path) filenames, err := filepath.Glob(h.options.ConsoleLibrariesPath + "/*.lib") if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } result, err := tmpl.ExpandHTML(filenames) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } io.WriteString(w, result) }
// preloadChunks is an internal helper method. func (s *memorySeries) preloadChunks( indexes []int, fp model.Fingerprint, mss *MemorySeriesStorage, ) (SeriesIterator, error) { loadIndexes := []int{} pinnedChunkDescs := make([]*chunk.Desc, 0, len(indexes)) for _, idx := range indexes { cd := s.chunkDescs[idx] pinnedChunkDescs = append(pinnedChunkDescs, cd) cd.Pin(mss.evictRequests) // Have to pin everything first to prevent immediate eviction on chunk loading. if cd.IsEvicted() { loadIndexes = append(loadIndexes, idx) } } chunk.Ops.WithLabelValues(chunk.Pin).Add(float64(len(pinnedChunkDescs))) if len(loadIndexes) > 0 { if s.chunkDescsOffset == -1 { panic("requested loading chunks from persistence in a situation where we must not have persisted data for chunk descriptors in memory") } chunks, err := mss.loadChunks(fp, loadIndexes, s.chunkDescsOffset) if err != nil { // Unpin the chunks since we won't return them as pinned chunks now. for _, cd := range pinnedChunkDescs { cd.Unpin(mss.evictRequests) } chunk.Ops.WithLabelValues(chunk.Unpin).Add(float64(len(pinnedChunkDescs))) return nopIter, err } for i, c := range chunks { s.chunkDescs[loadIndexes[i]].SetChunk(c) } } if !s.headChunkClosed && indexes[len(indexes)-1] == len(s.chunkDescs)-1 { s.headChunkUsedByIterator = true } curriedQuarantineSeries := func(err error) { mss.quarantineSeries(fp, s.metric, err) } iter := &boundedIterator{ it: s.newIterator(pinnedChunkDescs, curriedQuarantineSeries, mss.evictRequests), start: model.Now().Add(-mss.dropAfter), } return iter, nil }
func TestRuleEval(t *testing.T) { storage, closer := local.NewTestStorage(t, 2) defer closer.Close() engine := promql.NewEngine(storage, nil) ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() now := model.Now() suite := []struct { name string expr promql.Expr labels model.LabelSet result model.Vector }{ { name: "nolabels", expr: &promql.NumberLiteral{Val: 1}, labels: model.LabelSet{}, result: model.Vector{&model.Sample{ Value: 1, Timestamp: now, Metric: model.Metric{"__name__": "nolabels"}, }}, }, { name: "labels", expr: &promql.NumberLiteral{Val: 1}, labels: model.LabelSet{"foo": "bar"}, result: model.Vector{&model.Sample{ Value: 1, Timestamp: now, Metric: model.Metric{"__name__": "labels", "foo": "bar"}, }}, }, } for _, test := range suite { rule := NewRecordingRule(test.name, test.expr, test.labels) result, err := rule.eval(ctx, now, engine, "") if err != nil { t.Fatalf("Error evaluating %s", test.name) } if !reflect.DeepEqual(result, test.result) { t.Fatalf("Error: expected %q, got %q", test.result, result) } } }
func (h *Handler) executeTemplate(w http.ResponseWriter, name string, data interface{}) { text, err := h.getTemplate(name) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) } tmpl := template.NewTemplateExpander(text, name, data, model.Now(), h.queryEngine, h.options.ExternalURL.Path) tmpl.Funcs(tmplFuncs(h.consolesPath(), h.options)) result, err := tmpl.ExpandHTML(nil) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } io.WriteString(w, result) }
// eval runs a single evaluation cycle in which all rules are evaluated in parallel. // In the future a single group will be evaluated sequentially to properly handle // rule dependency. func (g *Group) eval() { var ( now = model.Now() wg sync.WaitGroup ) for _, rule := range g.rules { wg.Add(1) // BUG(julius): Look at fixing thundering herd. go func(rule Rule) { defer wg.Done() start := time.Now() evalTotal.Inc() vector, err := rule.eval(now, g.opts.QueryEngine) if err != nil { evalFailures.Inc() log.Warnf("Error while evaluating rule %q: %s", rule, err) } var rtyp ruleType switch r := rule.(type) { case *AlertingRule: rtyp = ruleTypeRecording g.sendAlerts(r, now) case *RecordingRule: rtyp = ruleTypeAlert default: panic(fmt.Errorf("unknown rule type: %T", rule)) } evalDuration.WithLabelValues(string(rtyp)).Observe( float64(time.Since(start)) / float64(time.Second), ) for _, s := range vector { g.opts.SampleAppender.Append(s) } }(rule) } wg.Wait() }
func (g *Prometheus) gatherURL(url string, acc telegraf.Accumulator) error { resp, err := client.Get(url) if err != nil { return fmt.Errorf("error making HTTP request to %s: %s", url, err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return fmt.Errorf("%s returned HTTP status %s", url, resp.Status) } format := expfmt.ResponseFormat(resp.Header) decoder := expfmt.NewDecoder(resp.Body, format) options := &expfmt.DecodeOptions{ Timestamp: model.Now(), } sampleDecoder := &expfmt.SampleDecoder{ Dec: decoder, Opts: options, } for { var samples model.Vector err := sampleDecoder.Decode(&samples) if err == io.EOF { break } else if err != nil { return fmt.Errorf("error getting processing samples for %s: %s", url, err) } for _, sample := range samples { tags := make(map[string]string) for key, value := range sample.Metric { if key == model.MetricNameLabel { continue } tags[string(key)] = string(value) } acc.Add("prometheus_"+string(sample.Metric[model.MetricNameLabel]), float64(sample.Value), tags) } } return nil }
func TestTargetRecordScrapeHealth(t *testing.T) { var ( testTarget = newTestTarget("example.url:80", 0, model.LabelSet{model.JobLabel: "testjob"}) now = model.Now() appender = &collectResultAppender{} ) testTarget.report(appender, now.Time(), 2*time.Second, nil) result := appender.result if len(result) != 2 { t.Fatalf("Expected two samples, got %d", len(result)) } actual := result[0] expected := &model.Sample{ Metric: model.Metric{ model.MetricNameLabel: scrapeHealthMetricName, model.InstanceLabel: "example.url:80", model.JobLabel: "testjob", }, Timestamp: now, Value: 1, } if !actual.Equal(expected) { t.Fatalf("Expected and actual samples not equal. Expected: %v, actual: %v", expected, actual) } actual = result[1] expected = &model.Sample{ Metric: model.Metric{ model.MetricNameLabel: scrapeDurationMetricName, model.InstanceLabel: "example.url:80", model.JobLabel: "testjob", }, Timestamp: now, Value: 2.0, } if !actual.Equal(expected) { t.Fatalf("Expected and actual samples not equal. Expected: %v, actual: %v", expected, actual) } }
// NewIterator implements Storage. func (s *memorySeriesStorage) NewIterator(fp model.Fingerprint) SeriesIterator { s.fpLocker.Lock(fp) defer s.fpLocker.Unlock(fp) series, ok := s.fpToSeries.get(fp) if !ok { // Oops, no series for fp found. That happens if, after // preloading is done, the whole series is identified as old // enough for purging and hence purged for good. As there is no // data left to iterate over, return an iterator that will never // return any values. return nopSeriesIterator{} } return &boundedIterator{ it: series.newIterator(), start: model.Now().Add(-s.dropAfter), } }
// eval runs a single evaluation cycle in which all rules are evaluated in parallel. // In the future a single group will be evaluated sequentially to properly handle // rule dependency. func (g *Group) eval() { var ( now = model.Now() wg sync.WaitGroup ) for _, rule := range g.rules { rtyp := string(typeForRule(rule)) wg.Add(1) // BUG(julius): Look at fixing thundering herd. go func(rule Rule) { defer wg.Done() defer func(t time.Time) { evalDuration.WithLabelValues(rtyp).Observe(float64(time.Since(t)) / float64(time.Second)) }(time.Now()) evalTotal.WithLabelValues(rtyp).Inc() vector, err := rule.eval(now, g.opts.QueryEngine) if err != nil { // Canceled queries are intentional termination of queries. This normally // happens on shutdown and thus we skip logging of any errors here. if _, ok := err.(promql.ErrQueryCanceled); !ok { log.Warnf("Error while evaluating rule %q: %s", rule, err) } evalFailures.WithLabelValues(rtyp).Inc() return } if ar, ok := rule.(*AlertingRule); ok { g.sendAlerts(ar, now) } for _, s := range vector { g.opts.SampleAppender.Append(s) } }(rule) } wg.Wait() }
// cycleThroughArchivedFingerprints returns a channel that emits fingerprints // for archived series in a throttled fashion. It continues to cycle through all // archived fingerprints until s.loopStopping is closed. func (s *memorySeriesStorage) cycleThroughArchivedFingerprints() chan model.Fingerprint { archivedFingerprints := make(chan model.Fingerprint) go func() { defer close(archivedFingerprints) for { archivedFPs, err := s.persistence.fingerprintsModifiedBefore( model.Now().Add(-s.dropAfter), ) if err != nil { log.Error("Failed to lookup archived fingerprint ranges: ", err) s.waitForNextFP(0, 1) continue } // Initial wait, also important if there are no FPs yet. if !s.waitForNextFP(len(archivedFPs), 1) { return } begin := time.Now() for _, fp := range archivedFPs { select { case archivedFingerprints <- fp: case <-s.loopStopping: return } // Never speed up maintenance of archived FPs. s.waitForNextFP(len(archivedFPs), 1) } if len(archivedFPs) > 0 { log.Infof( "Completed maintenance sweep through %d archived fingerprints in %v.", len(archivedFPs), time.Since(begin), ) } } }() return archivedFingerprints }
// Push pushes Prometheus metrics to the configured Graphite server. func (b *Bridge) Push() error { mfs, err := b.g.Gather() if err != nil || len(mfs) == 0 { switch b.errorHandling { case AbortOnError: return err case ContinueOnError: if b.logger != nil { b.logger.Println("continue on error:", err) } default: panic("unrecognized error handling value") } } conn, err := net.DialTimeout("tcp", b.url, b.timeout) if err != nil { return err } defer conn.Close() return writeMetrics(conn, mfs, b.prefix, model.Now()) }
// NewMemorySeriesStorage returns a newly allocated Storage. Storage.Serve still // has to be called to start the storage. func NewMemorySeriesStorage(o *MemorySeriesStorageOptions) Storage { s := &memorySeriesStorage{ fpLocker: newFingerprintLocker(1024), options: o, loopStopping: make(chan struct{}), loopStopped: make(chan struct{}), logThrottlingStopped: make(chan struct{}), throttled: make(chan struct{}, 1), maxMemoryChunks: o.MemoryChunks, dropAfter: o.PersistenceRetentionPeriod, checkpointInterval: o.CheckpointInterval, checkpointDirtySeriesLimit: o.CheckpointDirtySeriesLimit, archiveHighWatermark: model.Now().Add(-headChunkTimeout), maxChunksToPersist: o.MaxChunksToPersist, evictList: list.New(), evictRequests: make(chan evictRequest, evictRequestsCap), evictStopping: make(chan struct{}), evictStopped: make(chan struct{}), quarantineRequests: make(chan quarantineRequest, quarantineRequestsCap), quarantineStopping: make(chan struct{}), quarantineStopped: make(chan struct{}), persistErrors: prometheus.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, Name: "persist_errors_total", Help: "The total number of errors while persisting chunks.", }), numSeries: prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: subsystem, Name: "memory_series", Help: "The current number of series in memory.", }), seriesOps: prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, Name: "series_ops_total", Help: "The total number of series operations by their type.", }, []string{opTypeLabel}, ), ingestedSamplesCount: prometheus.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, Name: "ingested_samples_total", Help: "The total number of samples ingested.", }), outOfOrderSamplesCount: prometheus.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, Name: "out_of_order_samples_total", Help: "The total number of samples that were discarded because their timestamps were at or before the last received sample for a series.", }), nonExistentSeriesMatchesCount: prometheus.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, Name: "non_existent_series_matches_total", Help: "How often a non-existent series was referred to during label matching or chunk preloading. This is an indication of outdated label indexes.", }), maintainSeriesDuration: prometheus.NewSummaryVec( prometheus.SummaryOpts{ Namespace: namespace, Subsystem: subsystem, Name: "maintain_series_duration_milliseconds", Help: "The duration (in milliseconds) it took to perform maintenance on a series.", }, []string{seriesLocationLabel}, ), persistenceUrgencyScore: prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: subsystem, Name: "persistence_urgency_score", Help: "A score of urgency to persist chunks, 0 is least urgent, 1 most.", }), rushedMode: prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: subsystem, Name: "rushed_mode", Help: "1 if the storage is in rushed mode, 0 otherwise. In rushed mode, the system behaves as if the persistence_urgency_score is 1.", }), } return s }
// maintainMemorySeries maintains a series that is in memory (i.e. not // archived). It returns true if the method has changed from clean to dirty // (i.e. it is inconsistent with the latest checkpoint now so that in case of a // crash a recovery operation that requires a disk seek needed to be applied). // // The method first closes the head chunk if it was not touched for the duration // of headChunkTimeout. // // Then it determines the chunks that need to be purged and the chunks that need // to be persisted. Depending on the result, it does the following: // // - If all chunks of a series need to be purged, the whole series is deleted // for good and the method returns false. (Detecting non-existence of a series // file does not require a disk seek.) // // - If any chunks need to be purged (but not all of them), it purges those // chunks from memory and rewrites the series file on disk, leaving out the // purged chunks and appending all chunks not yet persisted (with the exception // of a still open head chunk). // // - If no chunks on disk need to be purged, but chunks need to be persisted, // those chunks are simply appended to the existing series file (or the file is // created if it does not exist yet). // // - If no chunks need to be purged and no chunks need to be persisted, nothing // happens in this step. // // Next, the method checks if all chunks in the series are evicted. In that // case, it archives the series and returns true. // // Finally, it evicts chunkDescs if there are too many. func (s *memorySeriesStorage) maintainMemorySeries( fp model.Fingerprint, beforeTime model.Time, ) (becameDirty bool) { defer func(begin time.Time) { s.maintainSeriesDuration.WithLabelValues(maintainInMemory).Observe( float64(time.Since(begin)) / float64(time.Millisecond), ) }(time.Now()) s.fpLocker.Lock(fp) defer s.fpLocker.Unlock(fp) series, ok := s.fpToSeries.get(fp) if !ok { // Series is actually not in memory, perhaps archived or dropped in the meantime. return false } defer s.seriesOps.WithLabelValues(memoryMaintenance).Inc() if series.maybeCloseHeadChunk() { s.incNumChunksToPersist(1) } seriesWasDirty := series.dirty if s.writeMemorySeries(fp, series, beforeTime) { // Series is gone now, we are done. return false } iOldestNotEvicted := -1 for i, cd := range series.chunkDescs { if !cd.isEvicted() { iOldestNotEvicted = i break } } // Archive if all chunks are evicted. Also make sure the last sample has // an age of at least headChunkTimeout (which is very likely anyway). if iOldestNotEvicted == -1 && model.Now().Sub(series.lastTime) > headChunkTimeout { s.fpToSeries.del(fp) s.numSeries.Dec() s.persistence.archiveMetric(fp, series.metric, series.firstTime(), series.lastTime) s.seriesOps.WithLabelValues(archive).Inc() oldWatermark := atomic.LoadInt64((*int64)(&s.archiveHighWatermark)) if oldWatermark < int64(series.lastTime) { if !atomic.CompareAndSwapInt64( (*int64)(&s.archiveHighWatermark), oldWatermark, int64(series.lastTime), ) { panic("s.archiveHighWatermark modified outside of maintainMemorySeries") } } return } // If we are here, the series is not archived, so check for chunkDesc // eviction next. series.evictChunkDescs(iOldestNotEvicted) return series.dirty && !seriesWasDirty }
// NewMemorySeriesStorage returns a newly allocated Storage. Storage.Serve still // has to be called to start the storage. func NewMemorySeriesStorage(o *MemorySeriesStorageOptions) *MemorySeriesStorage { s := &MemorySeriesStorage{ fpLocker: newFingerprintLocker(o.NumMutexes), options: o, loopStopping: make(chan struct{}), loopStopped: make(chan struct{}), logThrottlingStopped: make(chan struct{}), throttled: make(chan struct{}, 1), maxMemoryChunks: o.MemoryChunks, dropAfter: o.PersistenceRetentionPeriod, checkpointInterval: o.CheckpointInterval, checkpointDirtySeriesLimit: o.CheckpointDirtySeriesLimit, archiveHighWatermark: model.Now().Add(-headChunkTimeout), maxChunksToPersist: o.MaxChunksToPersist, evictList: list.New(), evictRequests: make(chan evictRequest, evictRequestsCap), evictStopping: make(chan struct{}), evictStopped: make(chan struct{}), quarantineRequests: make(chan quarantineRequest, quarantineRequestsCap), quarantineStopping: make(chan struct{}), quarantineStopped: make(chan struct{}), persistErrors: prometheus.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, Name: "persist_errors_total", Help: "The total number of errors while persisting chunks.", }), numSeries: prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: subsystem, Name: "memory_series", Help: "The current number of series in memory.", }), seriesOps: prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, Name: "series_ops_total", Help: "The total number of series operations by their type.", }, []string{opTypeLabel}, ), ingestedSamplesCount: prometheus.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, Name: "ingested_samples_total", Help: "The total number of samples ingested.", }), discardedSamplesCount: prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, Name: "out_of_order_samples_total", Help: "The total number of samples that were discarded because their timestamps were at or before the last received sample for a series.", }, []string{discardReasonLabel}, ), nonExistentSeriesMatchesCount: prometheus.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, Name: "non_existent_series_matches_total", Help: "How often a non-existent series was referred to during label matching or chunk preloading. This is an indication of outdated label indexes.", }), maintainSeriesDuration: prometheus.NewSummaryVec( prometheus.SummaryOpts{ Namespace: namespace, Subsystem: subsystem, Name: "maintain_series_duration_seconds", Help: "The duration in seconds it took to perform maintenance on a series.", }, []string{seriesLocationLabel}, ), persistenceUrgencyScore: prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: subsystem, Name: "persistence_urgency_score", Help: "A score of urgency to persist chunks, 0 is least urgent, 1 most.", }), rushedMode: prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: subsystem, Name: "rushed_mode", Help: "1 if the storage is in rushed mode, 0 otherwise. In rushed mode, the system behaves as if the persistence_urgency_score is 1.", }), } // Initialize metric vectors. // TODO(beorn7): Rework once we have a utility function for it in client_golang. s.discardedSamplesCount.WithLabelValues(outOfOrderTimestamp) s.discardedSamplesCount.WithLabelValues(duplicateSample) s.maintainSeriesDuration.WithLabelValues(maintainInMemory) s.maintainSeriesDuration.WithLabelValues(maintainArchived) s.seriesOps.WithLabelValues(create) s.seriesOps.WithLabelValues(archive) s.seriesOps.WithLabelValues(unarchive) s.seriesOps.WithLabelValues(memoryPurge) s.seriesOps.WithLabelValues(archivePurge) s.seriesOps.WithLabelValues(requestedPurge) s.seriesOps.WithLabelValues(memoryMaintenance) s.seriesOps.WithLabelValues(archiveMaintenance) s.seriesOps.WithLabelValues(completedQurantine) s.seriesOps.WithLabelValues(droppedQuarantine) s.seriesOps.WithLabelValues(failedQuarantine) return s }
func TestQuarantineMetric(t *testing.T) { now := model.Now() insertStart := now.Add(-2 * time.Hour) s, closer := NewTestStorage(t, 2) defer closer.Close() chunkFileExists := func(fp model.Fingerprint) (bool, error) { f, err := s.persistence.openChunkFileForReading(fp) if err == nil { f.Close() return true, nil } if os.IsNotExist(err) { return false, nil } return false, err } m1 := model.Metric{model.MetricNameLabel: "test", "n1": "v1"} m2 := model.Metric{model.MetricNameLabel: "test", "n1": "v2"} m3 := model.Metric{model.MetricNameLabel: "test", "n1": "v3"} N := 120000 for j, m := range []model.Metric{m1, m2, m3} { for i := 0; i < N; i++ { smpl := &model.Sample{ Metric: m, Timestamp: insertStart.Add(time.Duration(i) * time.Millisecond), // 1 millisecond intervals. Value: model.SampleValue(j), } s.Append(smpl) } } s.WaitForIndexing() // Archive m3, but first maintain it so that at least something is written to disk. fpToBeArchived := m3.FastFingerprint() s.maintainMemorySeries(fpToBeArchived, 0) s.fpLocker.Lock(fpToBeArchived) s.fpToSeries.del(fpToBeArchived) s.persistence.archiveMetric(fpToBeArchived, m3, 0, insertStart.Add(time.Duration(N-1)*time.Millisecond)) s.fpLocker.Unlock(fpToBeArchived) // Corrupt the series file for m3. f, err := os.Create(s.persistence.fileNameForFingerprint(fpToBeArchived)) if err != nil { t.Fatal(err) } if _, err := f.WriteString("This is clearly not the content of a series file."); err != nil { t.Fatal(err) } if f.Close(); err != nil { t.Fatal(err) } fps := s.fingerprintsForLabelPairs(model.LabelPair{Name: model.MetricNameLabel, Value: "test"}) if len(fps) != 3 { t.Errorf("unexpected number of fingerprints: %d", len(fps)) } pl := s.NewPreloader() // This will access the corrupt file and lead to quarantining. pl.PreloadInstant(fpToBeArchived, now.Add(-2*time.Hour), time.Minute) pl.Close() time.Sleep(time.Second) // Give time to quarantine. TODO(beorn7): Find a better way to wait. s.WaitForIndexing() fps2 := s.fingerprintsForLabelPairs(model.LabelPair{ Name: model.MetricNameLabel, Value: "test", }) if len(fps2) != 2 { t.Errorf("unexpected number of fingerprints: %d", len(fps2)) } exists, err := chunkFileExists(fpToBeArchived) if err != nil { t.Fatal(err) } if exists { t.Errorf("chunk file exists for fp=%v", fpToBeArchived) } }
func TestDropMetrics(t *testing.T) { now := model.Now() insertStart := now.Add(-2 * time.Hour) s, closer := NewTestStorage(t, 1) defer closer.Close() chunkFileExists := func(fp model.Fingerprint) (bool, error) { f, err := s.persistence.openChunkFileForReading(fp) if err == nil { f.Close() return true, nil } if os.IsNotExist(err) { return false, nil } return false, err } m1 := model.Metric{model.MetricNameLabel: "test", "n1": "v1"} m2 := model.Metric{model.MetricNameLabel: "test", "n1": "v2"} m3 := model.Metric{model.MetricNameLabel: "test", "n1": "v3"} N := 120000 for j, m := range []model.Metric{m1, m2, m3} { for i := 0; i < N; i++ { smpl := &model.Sample{ Metric: m, Timestamp: insertStart.Add(time.Duration(i) * time.Millisecond), // 1 millisecond intervals. Value: model.SampleValue(j), } s.Append(smpl) } } s.WaitForIndexing() // Archive m3, but first maintain it so that at least something is written to disk. fpToBeArchived := m3.FastFingerprint() s.maintainMemorySeries(fpToBeArchived, 0) s.fpLocker.Lock(fpToBeArchived) s.fpToSeries.del(fpToBeArchived) if err := s.persistence.archiveMetric( fpToBeArchived, m3, 0, insertStart.Add(time.Duration(N-1)*time.Millisecond), ); err != nil { t.Error(err) } s.fpLocker.Unlock(fpToBeArchived) fps := s.fingerprintsForLabelPairs(model.LabelPair{Name: model.MetricNameLabel, Value: "test"}) if len(fps) != 3 { t.Errorf("unexpected number of fingerprints: %d", len(fps)) } fpList := model.Fingerprints{m1.FastFingerprint(), m2.FastFingerprint(), fpToBeArchived} s.DropMetricsForFingerprints(fpList[0]) s.WaitForIndexing() fps2 := s.fingerprintsForLabelPairs(model.LabelPair{ Name: model.MetricNameLabel, Value: "test", }) if len(fps2) != 2 { t.Errorf("unexpected number of fingerprints: %d", len(fps2)) } it := s.NewIterator(fpList[0]) if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != 0 { t.Errorf("unexpected number of samples: %d", len(vals)) } it = s.NewIterator(fpList[1]) if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != N { t.Errorf("unexpected number of samples: %d", len(vals)) } exists, err := chunkFileExists(fpList[2]) if err != nil { t.Fatal(err) } if !exists { t.Errorf("chunk file does not exist for fp=%v", fpList[2]) } s.DropMetricsForFingerprints(fpList...) s.WaitForIndexing() fps3 := s.fingerprintsForLabelPairs(model.LabelPair{ Name: model.MetricNameLabel, Value: "test", }) if len(fps3) != 0 { t.Errorf("unexpected number of fingerprints: %d", len(fps3)) } it = s.NewIterator(fpList[0]) if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != 0 { t.Errorf("unexpected number of samples: %d", len(vals)) } it = s.NewIterator(fpList[1]) if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != 0 { t.Errorf("unexpected number of samples: %d", len(vals)) } exists, err = chunkFileExists(fpList[2]) if err != nil { t.Fatal(err) } if exists { t.Errorf("chunk file still exists for fp=%v", fpList[2]) } }
// eval runs a single evaluation cycle in which all rules are evaluated in parallel. // In the future a single group will be evaluated sequentially to properly handle // rule dependency. func (g *Group) eval() { var ( now = model.Now() wg sync.WaitGroup ) for _, rule := range g.rules { rtyp := string(typeForRule(rule)) wg.Add(1) // BUG(julius): Look at fixing thundering herd. go func(rule Rule) { defer wg.Done() defer func(t time.Time) { evalDuration.WithLabelValues(rtyp).Observe(time.Since(t).Seconds()) }(time.Now()) evalTotal.WithLabelValues(rtyp).Inc() vector, err := rule.eval(g.opts.Context, now, g.opts.QueryEngine, g.opts.ExternalURL.Path) if err != nil { // Canceled queries are intentional termination of queries. This normally // happens on shutdown and thus we skip logging of any errors here. if _, ok := err.(promql.ErrQueryCanceled); !ok { log.Warnf("Error while evaluating rule %q: %s", rule, err) } evalFailures.WithLabelValues(rtyp).Inc() return } if ar, ok := rule.(*AlertingRule); ok { g.sendAlerts(ar, now) } var ( numOutOfOrder = 0 numDuplicates = 0 ) for _, s := range vector { if err := g.opts.SampleAppender.Append(s); err != nil { switch err { case local.ErrOutOfOrderSample: numOutOfOrder++ log.With("sample", s).With("error", err).Debug("Rule evaluation result discarded") case local.ErrDuplicateSampleForTimestamp: numDuplicates++ log.With("sample", s).With("error", err).Debug("Rule evaluation result discarded") default: log.With("sample", s).With("error", err).Warn("Rule evaluation result discarded") } } } if numOutOfOrder > 0 { log.With("numDropped", numOutOfOrder).Warn("Error on ingesting out-of-order result from rule evaluation") } if numDuplicates > 0 { log.With("numDropped", numDuplicates).Warn("Error on ingesting results from rule evaluation with different value but same timestamp") } }(rule) } wg.Wait() }
func testEvictAndLoadChunkDescs(t *testing.T, encoding chunkEncoding) { samples := make(model.Samples, 10000) for i := range samples { samples[i] = &model.Sample{ Timestamp: model.Time(2 * i), Value: model.SampleValue(float64(i * i)), } } // Give last sample a timestamp of now so that the head chunk will not // be closed (which would then archive the time series later as // everything will get evicted). samples[len(samples)-1] = &model.Sample{ Timestamp: model.Now(), Value: model.SampleValue(3.14), } s, closer := NewTestStorage(t, encoding) defer closer.Close() // Adjust memory chunks to lower value to see evictions. s.maxMemoryChunks = 1 for _, sample := range samples { s.Append(sample) } s.WaitForIndexing() fp := model.Metric{}.FastFingerprint() series, ok := s.fpToSeries.get(fp) if !ok { t.Fatal("could not find series") } oldLen := len(series.chunkDescs) // Maintain series without any dropped chunks. s.maintainMemorySeries(fp, 0) // Give the evict goroutine an opportunity to run. time.Sleep(50 * time.Millisecond) // Maintain series again to trigger chunkDesc eviction s.maintainMemorySeries(fp, 0) if oldLen <= len(series.chunkDescs) { t.Errorf("Expected number of chunkDescs to decrease, old number %d, current number %d.", oldLen, len(series.chunkDescs)) } // Load everything back. p := s.NewPreloader() p.PreloadRange(fp, 0, 100000, time.Hour) if oldLen != len(series.chunkDescs) { t.Errorf("Expected number of chunkDescs to have reached old value again, old number %d, current number %d.", oldLen, len(series.chunkDescs)) } p.Close() // Now maintain series with drops to make sure nothing crazy happens. s.maintainMemorySeries(fp, 100000) if len(series.chunkDescs) != 1 { t.Errorf("Expected exactly one chunkDesc left, got %d.", len(series.chunkDescs)) } }
func TestDropMetrics(t *testing.T) { now := model.Now() insertStart := now.Add(-2 * time.Hour) s, closer := NewTestStorage(t, 1) defer closer.Close() m1 := model.Metric{model.MetricNameLabel: "test", "n1": "v1"} m2 := model.Metric{model.MetricNameLabel: "test", "n1": "v2"} N := 120000 for j, m := range []model.Metric{m1, m2} { for i := 0; i < N; i++ { smpl := &model.Sample{ Metric: m, Timestamp: insertStart.Add(time.Duration(i) * time.Millisecond), // 1 minute intervals. Value: model.SampleValue(j), } s.Append(smpl) } } s.WaitForIndexing() fps := s.fingerprintsForLabelPairs(model.LabelPair{Name: model.MetricNameLabel, Value: "test"}) if len(fps) != 2 { t.Fatalf("unexpected number of fingerprints: %d", len(fps)) } var fpList model.Fingerprints for fp := range fps { it := s.NewIterator(fp) if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != N { t.Fatalf("unexpected number of samples: %d", len(vals)) } fpList = append(fpList, fp) } s.DropMetricsForFingerprints(fpList[0]) s.WaitForIndexing() fps2 := s.fingerprintsForLabelPairs(model.LabelPair{ Name: model.MetricNameLabel, Value: "test", }) if len(fps2) != 1 { t.Fatalf("unexpected number of fingerprints: %d", len(fps2)) } it := s.NewIterator(fpList[0]) if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != 0 { t.Fatalf("unexpected number of samples: %d", len(vals)) } it = s.NewIterator(fpList[1]) if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != N { t.Fatalf("unexpected number of samples: %d", len(vals)) } s.DropMetricsForFingerprints(fpList...) s.WaitForIndexing() fps3 := s.fingerprintsForLabelPairs(model.LabelPair{ Name: model.MetricNameLabel, Value: "test", }) if len(fps3) != 0 { t.Fatalf("unexpected number of fingerprints: %d", len(fps3)) } it = s.NewIterator(fpList[0]) if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != 0 { t.Fatalf("unexpected number of samples: %d", len(vals)) } it = s.NewIterator(fpList[1]) if vals := it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}); len(vals) != 0 { t.Fatalf("unexpected number of samples: %d", len(vals)) } }
func TestRetentionCutoff(t *testing.T) { now := model.Now() insertStart := now.Add(-2 * time.Hour) s, closer := NewTestStorage(t, 1) defer closer.Close() // Stop maintenance loop to prevent actual purging. s.loopStopping <- struct{}{} s.dropAfter = 1 * time.Hour for i := 0; i < 120; i++ { smpl := &model.Sample{ Metric: model.Metric{"job": "test"}, Timestamp: insertStart.Add(time.Duration(i) * time.Minute), // 1 minute intervals. Value: 1, } s.Append(smpl) } s.WaitForIndexing() var fp model.Fingerprint for f := range s.fingerprintsForLabelPairs(model.LabelPair{Name: "job", Value: "test"}) { fp = f break } pl := s.NewPreloader() defer pl.Close() // Preload everything. err := pl.PreloadRange(fp, insertStart, now, 5*time.Minute) if err != nil { t.Fatalf("Error preloading outdated chunks: %s", err) } it := s.NewIterator(fp) vals := it.ValueAtTime(now.Add(-61 * time.Minute)) if len(vals) != 0 { t.Errorf("unexpected result for timestamp before retention period") } vals = it.RangeValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}) // We get 59 values here because the model.Now() is slightly later // than our now. if len(vals) != 59 { t.Errorf("expected 59 values but got %d", len(vals)) } if expt := now.Add(-1 * time.Hour).Add(time.Minute); vals[0].Timestamp != expt { t.Errorf("unexpected timestamp for first sample: %v, expected %v", vals[0].Timestamp.Time(), expt.Time()) } vals = it.BoundaryValues(metric.Interval{OldestInclusive: insertStart, NewestInclusive: now}) if len(vals) != 2 { t.Errorf("expected 2 values but got %d", len(vals)) } if expt := now.Add(-1 * time.Hour).Add(time.Minute); vals[0].Timestamp != expt { t.Errorf("unexpected timestamp for first sample: %v, expected %v", vals[0].Timestamp.Time(), expt.Time()) } }
func TestTextDecoder(t *testing.T) { var ( ts = model.Now() in = ` # Only a quite simple scenario with two metric families. # More complicated tests of the parser itself can be found in the text package. # TYPE mf2 counter mf2 3 mf1{label="value1"} -3.14 123456 mf1{label="value2"} 42 mf2 4 ` out = model.Vector{ &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "mf1", "label": "value1", }), Value: -3.14, Timestamp: 123456, }, &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "mf1", "label": "value2", }), Value: 42, Timestamp: ts, }, &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "mf2", }), Value: 3, Timestamp: ts, }, &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "mf2", }), Value: 4, Timestamp: ts, }, } ) dec := &SampleDecoder{ Dec: &textDecoder{r: strings.NewReader(in)}, Opts: &DecodeOptions{ Timestamp: ts, }, } var all model.Vector for { var smpls model.Vector err := dec.Decode(&smpls) if err == io.EOF { break } if err != nil { t.Fatal(err) } all = append(all, smpls...) } sort.Sort(all) sort.Sort(out) if !reflect.DeepEqual(all, out) { t.Fatalf("output does not match") } }
func createRandomSamples(metricName string, minLen int) model.Samples { type valueCreator func() model.SampleValue type deltaApplier func(model.SampleValue) model.SampleValue var ( maxMetrics = 5 maxStreakLength = 500 maxTimeDelta = 10000 maxTimeDeltaFactor = 10 timestamp = model.Now() - model.Time(maxTimeDelta*maxTimeDeltaFactor*minLen/4) // So that some timestamps are in the future. generators = []struct { createValue valueCreator applyDelta []deltaApplier }{ { // "Boolean". createValue: func() model.SampleValue { return model.SampleValue(rand.Intn(2)) }, applyDelta: []deltaApplier{ func(_ model.SampleValue) model.SampleValue { return model.SampleValue(rand.Intn(2)) }, }, }, { // Integer with int deltas of various byte length. createValue: func() model.SampleValue { return model.SampleValue(rand.Int63() - 1<<62) }, applyDelta: []deltaApplier{ func(v model.SampleValue) model.SampleValue { return model.SampleValue(rand.Intn(1<<8) - 1<<7 + int(v)) }, func(v model.SampleValue) model.SampleValue { return model.SampleValue(rand.Intn(1<<16) - 1<<15 + int(v)) }, func(v model.SampleValue) model.SampleValue { return model.SampleValue(rand.Int63n(1<<32) - 1<<31 + int64(v)) }, }, }, { // Float with float32 and float64 deltas. createValue: func() model.SampleValue { return model.SampleValue(rand.NormFloat64()) }, applyDelta: []deltaApplier{ func(v model.SampleValue) model.SampleValue { return v + model.SampleValue(float32(rand.NormFloat64())) }, func(v model.SampleValue) model.SampleValue { return v + model.SampleValue(rand.NormFloat64()) }, }, }, } ) // Prefill result with two samples with colliding metrics (to test fingerprint mapping). result := model.Samples{ &model.Sample{ Metric: model.Metric{ "instance": "ip-10-33-84-73.l05.ams5.s-cloud.net:24483", "status": "503", }, Value: 42, Timestamp: timestamp, }, &model.Sample{ Metric: model.Metric{ "instance": "ip-10-33-84-73.l05.ams5.s-cloud.net:24480", "status": "500", }, Value: 2010, Timestamp: timestamp + 1, }, } metrics := []model.Metric{} for n := rand.Intn(maxMetrics); n >= 0; n-- { metrics = append(metrics, model.Metric{ model.MetricNameLabel: model.LabelValue(metricName), model.LabelName(fmt.Sprintf("labelname_%d", n+1)): model.LabelValue(fmt.Sprintf("labelvalue_%d", rand.Int())), }) } for len(result) < minLen { // Pick a metric for this cycle. metric := metrics[rand.Intn(len(metrics))] timeDelta := rand.Intn(maxTimeDelta) + 1 generator := generators[rand.Intn(len(generators))] createValue := generator.createValue applyDelta := generator.applyDelta[rand.Intn(len(generator.applyDelta))] incTimestamp := func() { timestamp += model.Time(timeDelta * (rand.Intn(maxTimeDeltaFactor) + 1)) } switch rand.Intn(4) { case 0: // A single sample. result = append(result, &model.Sample{ Metric: metric, Value: createValue(), Timestamp: timestamp, }) incTimestamp() case 1: // A streak of random sample values. for n := rand.Intn(maxStreakLength); n >= 0; n-- { result = append(result, &model.Sample{ Metric: metric, Value: createValue(), Timestamp: timestamp, }) incTimestamp() } case 2: // A streak of sample values with incremental changes. value := createValue() for n := rand.Intn(maxStreakLength); n >= 0; n-- { result = append(result, &model.Sample{ Metric: metric, Value: value, Timestamp: timestamp, }) incTimestamp() value = applyDelta(value) } case 3: // A streak of constant sample values. value := createValue() for n := rand.Intn(maxStreakLength); n >= 0; n-- { result = append(result, &model.Sample{ Metric: metric, Value: value, Timestamp: timestamp, }) incTimestamp() } } } return result }
func TestProtoDecoder(t *testing.T) { var testTime = model.Now() scenarios := []struct { in string expected model.Vector }{ { in: "", }, { in: "\x8f\x01\n\rrequest_count\x12\x12Number of requests\x18\x00\"0\n#\n\x0fsome_label_name\x12\x10some_label_value\x1a\t\t\x00\x00\x00\x00\x00\x00E\xc0\"6\n)\n\x12another_label_name\x12\x13another_label_value\x1a\t\t\x00\x00\x00\x00\x00\x00U@", expected: model.Vector{ &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "request_count", "some_label_name": "some_label_value", }), Value: -42, Timestamp: testTime, }, &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "request_count", "another_label_name": "another_label_value", }), Value: 84, Timestamp: testTime, }, }, }, { in: "\xb9\x01\n\rrequest_count\x12\x12Number of requests\x18\x02\"O\n#\n\x0fsome_label_name\x12\x10some_label_value\"(\x1a\x12\t\xaeG\xe1z\x14\xae\xef?\x11\x00\x00\x00\x00\x00\x00E\xc0\x1a\x12\t+\x87\x16\xd9\xce\xf7\xef?\x11\x00\x00\x00\x00\x00\x00U\xc0\"A\n)\n\x12another_label_name\x12\x13another_label_value\"\x14\x1a\x12\t\x00\x00\x00\x00\x00\x00\xe0?\x11\x00\x00\x00\x00\x00\x00$@", expected: model.Vector{ &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "request_count", "some_label_name": "some_label_value", "quantile": "0.99", }), Value: -42, Timestamp: testTime, }, &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "request_count", "some_label_name": "some_label_value", "quantile": "0.999", }), Value: -84, Timestamp: testTime, }, &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "request_count", "another_label_name": "another_label_value", "quantile": "0.5", }), Value: 10, Timestamp: testTime, }, }, }, { in: "\x8d\x01\n\x1drequest_duration_microseconds\x12\x15The response latency.\x18\x04\"S:Q\b\x85\x15\x11\xcd\xcc\xccL\x8f\xcb:A\x1a\v\b{\x11\x00\x00\x00\x00\x00\x00Y@\x1a\f\b\x9c\x03\x11\x00\x00\x00\x00\x00\x00^@\x1a\f\b\xd0\x04\x11\x00\x00\x00\x00\x00\x00b@\x1a\f\b\xf4\v\x11\x9a\x99\x99\x99\x99\x99e@\x1a\f\b\x85\x15\x11\x00\x00\x00\x00\x00\x00\xf0\u007f", expected: model.Vector{ &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "request_duration_microseconds_bucket", "le": "100", }), Value: 123, Timestamp: testTime, }, &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "request_duration_microseconds_bucket", "le": "120", }), Value: 412, Timestamp: testTime, }, &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "request_duration_microseconds_bucket", "le": "144", }), Value: 592, Timestamp: testTime, }, &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "request_duration_microseconds_bucket", "le": "172.8", }), Value: 1524, Timestamp: testTime, }, &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "request_duration_microseconds_bucket", "le": "+Inf", }), Value: 2693, Timestamp: testTime, }, &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "request_duration_microseconds_sum", }), Value: 1756047.3, Timestamp: testTime, }, &model.Sample{ Metric: model.NewMetric(model.LabelSet{ model.MetricNameLabel: "request_duration_microseconds_count", }), Value: 2693, Timestamp: testTime, }, }, }, } for _, scenario := range scenarios { dec := &SampleDecoder{ Dec: &protoDecoder{r: strings.NewReader(scenario.in)}, Opts: &DecodeOptions{ Timestamp: testTime, }, } var all model.Vector for { var smpls model.Vector err := dec.Decode(&smpls) if err == io.EOF { break } if err != nil { t.Fatal(err) } all = append(all, smpls...) } sort.Sort(all) sort.Sort(scenario.expected) if !reflect.DeepEqual(all, scenario.expected) { t.Fatalf("output does not match") } } }