func (h Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { ctx := route.Context(r) name := strings.Trim(route.Param(ctx, "filepath"), "/") if name == "" { name = "index.html" } file, err := GetFile(StaticFiles, name) if err != nil { if err != io.EOF { log.Warn("Could not get file: ", err) } w.WriteHeader(http.StatusNotFound) return } contentType := http.DetectContentType(file) if strings.Contains(contentType, "text/plain") || strings.Contains(contentType, "application/octet-stream") { parts := strings.Split(name, ".") contentType = mimeMap[parts[len(parts)-1]] } w.Header().Set("Content-Type", contentType) w.Header().Set("Cache-Control", "public, max-age=259200") w.Write(file) }
// Collect implements prometheus.Collector. func (c *viewCollector) Collect(ch chan<- prometheus.Metric) { for _, v := range c.stats.Views { for _, s := range v.Cache { ch <- prometheus.MustNewConstMetric( resolverCache, prometheus.GaugeValue, float64(s.Gauge), v.Name, s.Name, ) } for _, s := range v.ResolverQueries { ch <- prometheus.MustNewConstMetric( resolverQueries, prometheus.CounterValue, float64(s.Counter), v.Name, s.Name, ) } for _, s := range v.ResolverStats { if desc, ok := resolverMetricStats[s.Name]; ok { ch <- prometheus.MustNewConstMetric( desc, prometheus.CounterValue, float64(s.Counter), v.Name, ) } if desc, ok := resolverLabelStats[s.Name]; ok { ch <- prometheus.MustNewConstMetric( desc, prometheus.CounterValue, float64(s.Counter), v.Name, s.Name, ) } } if buckets, count, err := histogram(v.ResolverStats); err == nil { ch <- prometheus.MustNewConstHistogram( resolverQueryDuration, count, math.NaN(), buckets, v.Name, ) } else { log.Warn("Error parsing RTT:", err) } } }
// loadSeriesMapAndHeads loads the fingerprint to memory-series mapping and all // the chunks contained in the checkpoint (and thus not yet persisted to series // files). The method is capable of loading the checkpoint format v1 and v2. If // recoverable corruption is detected, or if the dirty flag was set from the // beginning, crash recovery is run, which might take a while. If an // unrecoverable error is encountered, it is returned. Call this method during // start-up while nothing else is running in storage land. This method is // utterly goroutine-unsafe. func (p *persistence) loadSeriesMapAndHeads() (sm *seriesMap, chunksToPersist int64, err error) { fingerprintToSeries := make(map[model.Fingerprint]*memorySeries) sm = &seriesMap{m: fingerprintToSeries} defer func() { if p.dirty { log.Warn("Persistence layer appears dirty.") p.startedDirty.Set(1) err = p.recoverFromCrash(fingerprintToSeries) if err != nil { sm = nil } } else { p.startedDirty.Set(0) } }() hs := newHeadsScanner(p.headsFileName()) defer hs.close() for hs.scan() { fingerprintToSeries[hs.fp] = hs.series } if os.IsNotExist(hs.err) { return sm, 0, nil } if hs.err != nil { p.dirty = true log. With("file", p.headsFileName()). With("error", hs.err). Error("Error reading heads file.") return sm, 0, hs.err } return sm, hs.chunksToPersistTotal, nil }
// Append queues a sample to be sent to the remote storage. It drops the // sample on the floor if the queue is full. It implements // storage.SampleAppender. func (t *StorageQueueManager) Append(s *model.Sample) { select { case t.queue <- s: default: t.samplesCount.WithLabelValues(dropped).Inc() log.Warn("Remote storage queue full, discarding sample.") } }
// Append queues a sample to be sent to the remote storage. It drops the // sample on the floor if the queue is full. // Always returns nil. func (t *StorageQueueManager) Append(s *model.Sample) error { fp := s.Metric.FastFingerprint() shard := uint64(fp) % uint64(t.cfg.Shards) select { case t.shards[shard] <- s: default: t.sentSamplesTotal.WithLabelValues(dropped).Inc() log.Warn("Remote storage queue full, discarding sample.") } return nil }
// Append implements Storage. func (s *memorySeriesStorage) Append(sample *model.Sample) { for ln, lv := range sample.Metric { if len(lv) == 0 { delete(sample.Metric, ln) } } if s.getNumChunksToPersist() >= s.maxChunksToPersist { log.Warnf( "%d chunks waiting for persistence, sample ingestion suspended.", s.getNumChunksToPersist(), ) for s.getNumChunksToPersist() >= s.maxChunksToPersist { time.Sleep(time.Second) } log.Warn("Sample ingestion resumed.") } rawFP := sample.Metric.FastFingerprint() s.fpLocker.Lock(rawFP) fp, err := s.mapper.mapFP(rawFP, sample.Metric) if err != nil { log.Errorf("Error while mapping fingerprint %v: %v", rawFP, err) s.persistence.setDirty(true) } if fp != rawFP { // Switch locks. s.fpLocker.Unlock(rawFP) s.fpLocker.Lock(fp) } series := s.getOrCreateSeries(fp, sample.Metric) if sample.Timestamp <= series.lastTime { // Don't log and track equal timestamps, as they are a common occurrence // when using client-side timestamps (e.g. Pushgateway or federation). // It would be even better to also compare the sample values here, but // we don't have efficient access to a series's last value. if sample.Timestamp != series.lastTime { log.Warnf("Ignoring sample with out-of-order timestamp for fingerprint %v (%v): %v is not after %v", fp, series.metric, sample.Timestamp, series.lastTime) s.outOfOrderSamplesCount.Inc() } s.fpLocker.Unlock(fp) return } completedChunksCount := series.add(&model.SamplePair{ Value: sample.Value, Timestamp: sample.Timestamp, }) s.fpLocker.Unlock(fp) s.ingestedSamplesCount.Inc() s.incNumChunksToPersist(completedChunksCount) }
// isDegraded returns whether the storage is in "graceful degradation mode", // which is the case if the number of chunks waiting for persistence has reached // a percentage of maxChunksToPersist that exceeds // percentChunksToPersistForDegradation. The method is not goroutine safe (but // only ever called from the goroutine dealing with series maintenance). // Changes of degradation mode are logged. func (s *memorySeriesStorage) isDegraded() bool { nowDegraded := s.getNumChunksToPersist() > s.maxChunksToPersist*percentChunksToPersistForDegradation/100 if s.degraded && !nowDegraded { log.Warn("Storage has left graceful degradation mode. Things are back to normal.") } else if !s.degraded && nowDegraded { log.Warnf( "%d chunks waiting for persistence (%d%% of the allowed maximum %d). Storage is now in graceful degradation mode. Series files are not synced anymore if following the adaptive strategy. Checkpoints are not performed more often than every %v. Series maintenance happens as frequently as possible.", s.getNumChunksToPersist(), s.getNumChunksToPersist()*100/s.maxChunksToPersist, s.maxChunksToPersist, s.checkpointInterval) } s.degraded = nowDegraded return s.degraded }
func serveAsset(w http.ResponseWriter, req *http.Request, fp string) { info, err := ui.AssetInfo(fp) if err != nil { log.Warn("Could not get file: ", err) w.WriteHeader(http.StatusNotFound) return } file, err := ui.Asset(fp) if err != nil { if err != io.EOF { log.With("file", fp).Warn("Could not get file: ", err) } w.WriteHeader(http.StatusNotFound) return } http.ServeContent(w, req, info.Name(), info.ModTime(), bytes.NewReader(file)) }
// Run dispatches notifications continuously. func (n *NotificationHandler) Run() { for reqs := range n.pendingNotifications { if n.alertmanagerURL == "" { log.Warn("No alert manager configured, not dispatching notification") n.notificationDropped.Inc() continue } begin := time.Now() err := n.sendNotifications(reqs) if err != nil { log.Error("Error sending notification: ", err) n.notificationErrors.Inc() } n.notificationLatency.Observe(float64(time.Since(begin) / time.Millisecond)) } close(n.stopped) }
// Main manages the startup and shutdown lifecycle of the entire Prometheus server. func Main() int { if err := parse(os.Args[1:]); err != nil { return 2 } printVersion() if cfg.printVersion { return 0 } var reloadables []Reloadable var ( memStorage = local.NewMemorySeriesStorage(&cfg.storage) remoteStorage = remote.New(&cfg.remote) sampleAppender = storage.Fanout{memStorage} ) if remoteStorage != nil { sampleAppender = append(sampleAppender, remoteStorage) reloadables = append(reloadables, remoteStorage) } var ( notificationHandler = notification.New(&cfg.notification) targetManager = retrieval.NewTargetManager(sampleAppender) queryEngine = promql.NewEngine(memStorage, &cfg.queryEngine) ) ruleManager := rules.NewManager(&rules.ManagerOptions{ SampleAppender: sampleAppender, NotificationHandler: notificationHandler, QueryEngine: queryEngine, ExternalURL: cfg.web.ExternalURL, }) flags := map[string]string{} cfg.fs.VisitAll(func(f *flag.Flag) { flags[f.Name] = f.Value.String() }) status := &web.PrometheusStatus{ TargetPools: targetManager.Pools, Rules: ruleManager.Rules, Flags: flags, Birth: time.Now(), } webHandler := web.New(memStorage, queryEngine, ruleManager, status, &cfg.web) reloadables = append(reloadables, status, targetManager, ruleManager, webHandler, notificationHandler) if !reloadConfig(cfg.configFile, reloadables...) { return 1 } // Wait for reload or termination signals. Start the handler for SIGHUP as // early as possible, but ignore it until we are ready to handle reloading // our config. hup := make(chan os.Signal) hupReady := make(chan bool) signal.Notify(hup, syscall.SIGHUP) go func() { <-hupReady for { select { case <-hup: case <-webHandler.Reload(): } reloadConfig(cfg.configFile, reloadables...) } }() // Start all components. if err := memStorage.Start(); err != nil { log.Errorln("Error opening memory series storage:", err) return 1 } defer func() { if err := memStorage.Stop(); err != nil { log.Errorln("Error stopping storage:", err) } }() if remoteStorage != nil { prometheus.MustRegister(remoteStorage) go remoteStorage.Run() defer remoteStorage.Stop() } // The storage has to be fully initialized before registering. prometheus.MustRegister(memStorage) prometheus.MustRegister(notificationHandler) prometheus.MustRegister(configSuccess) prometheus.MustRegister(configSuccessTime) defer ruleManager.Stop() go notificationHandler.Run() defer notificationHandler.Stop() go targetManager.Run() defer targetManager.Stop() defer queryEngine.Stop() go webHandler.Run() // Wait for reload or termination signals. close(hupReady) // Unblock SIGHUP handler. term := make(chan os.Signal) signal.Notify(term, os.Interrupt, syscall.SIGTERM) select { case <-term: log.Warn("Received SIGTERM, exiting gracefully...") case <-webHandler.Quit(): log.Warn("Received termination request via web service, exiting gracefully...") case err := <-webHandler.ListenError(): log.Errorln("Error starting web server, exiting gracefully:", err) } log.Info("See you next time!") return 0 }
// Main manages the startup and shutdown lifecycle of the entire Prometheus server. func Main() int { if err := parse(os.Args[1:]); err != nil { log.Error(err) return 2 } if cfg.printVersion { fmt.Fprintln(os.Stdout, version.Print("prometheus")) return 0 } log.Infoln("Starting prometheus", version.Info()) log.Infoln("Build context", version.BuildContext()) var reloadables []Reloadable var ( memStorage = local.NewMemorySeriesStorage(&cfg.storage) remoteStorage = remote.New(&cfg.remote) sampleAppender = storage.Fanout{memStorage} ) if remoteStorage != nil { sampleAppender = append(sampleAppender, remoteStorage) reloadables = append(reloadables, remoteStorage) } var ( notifier = notifier.New(&cfg.notifier) targetManager = retrieval.NewTargetManager(sampleAppender) queryEngine = promql.NewEngine(memStorage, &cfg.queryEngine) ) ruleManager := rules.NewManager(&rules.ManagerOptions{ SampleAppender: sampleAppender, Notifier: notifier, QueryEngine: queryEngine, ExternalURL: cfg.web.ExternalURL, }) flags := map[string]string{} cfg.fs.VisitAll(func(f *flag.Flag) { flags[f.Name] = f.Value.String() }) version := &web.PrometheusVersion{ Version: version.Version, Revision: version.Revision, Branch: version.Branch, BuildUser: version.BuildUser, BuildDate: version.BuildDate, GoVersion: version.GoVersion, } webHandler := web.New(memStorage, queryEngine, targetManager, ruleManager, version, flags, &cfg.web) reloadables = append(reloadables, targetManager, ruleManager, webHandler, notifier) if !reloadConfig(cfg.configFile, reloadables...) { return 1 } // Wait for reload or termination signals. Start the handler for SIGHUP as // early as possible, but ignore it until we are ready to handle reloading // our config. hup := make(chan os.Signal) hupReady := make(chan bool) signal.Notify(hup, syscall.SIGHUP) go func() { <-hupReady for { select { case <-hup: case <-webHandler.Reload(): } reloadConfig(cfg.configFile, reloadables...) } }() // Start all components. The order is NOT arbitrary. if err := memStorage.Start(); err != nil { log.Errorln("Error opening memory series storage:", err) return 1 } defer func() { if err := memStorage.Stop(); err != nil { log.Errorln("Error stopping storage:", err) } }() if remoteStorage != nil { prometheus.MustRegister(remoteStorage) go remoteStorage.Run() defer remoteStorage.Stop() } // The storage has to be fully initialized before registering. prometheus.MustRegister(memStorage) prometheus.MustRegister(notifier) prometheus.MustRegister(configSuccess) prometheus.MustRegister(configSuccessTime) // The notifieris a dependency of the rule manager. It has to be // started before and torn down afterwards. go notifier.Run() defer notifier.Stop() go ruleManager.Run() defer ruleManager.Stop() go targetManager.Run() defer targetManager.Stop() // Shutting down the query engine before the rule manager will cause pending queries // to be canceled and ensures a quick shutdown of the rule manager. defer queryEngine.Stop() go webHandler.Run() // Wait for reload or termination signals. close(hupReady) // Unblock SIGHUP handler. term := make(chan os.Signal) signal.Notify(term, os.Interrupt, syscall.SIGTERM) select { case <-term: log.Warn("Received SIGTERM, exiting gracefully...") case <-webHandler.Quit(): log.Warn("Received termination request via web service, exiting gracefully...") case err := <-webHandler.ListenError(): log.Errorln("Error starting web server, exiting gracefully:", err) } log.Info("See you next time!") return 0 }
// recoverFromCrash is called by loadSeriesMapAndHeads if the persistence // appears to be dirty after the loading (either because the loading resulted in // an error or because the persistence was dirty from the start). Not goroutine // safe. Only call before anything else is running (except index processing // queue as started by newPersistence). func (p *persistence) recoverFromCrash(fingerprintToSeries map[model.Fingerprint]*memorySeries) error { // TODO(beorn): We need proper tests for the crash recovery. log.Warn("Starting crash recovery. Prometheus is inoperational until complete.") log.Warn("To avoid crash recovery in the future, shut down Prometheus with SIGTERM or a HTTP POST to /-/quit.") fpsSeen := map[model.Fingerprint]struct{}{} count := 0 seriesDirNameFmt := fmt.Sprintf("%%0%dx", seriesDirNameLen) // Delete the fingerprint mapping file as it might be stale or // corrupt. We'll rebuild the mappings as we go. if err := os.RemoveAll(p.mappingsFileName()); err != nil { return fmt.Errorf("couldn't remove old fingerprint mapping file %s: %s", p.mappingsFileName(), err) } // The mappings to rebuild. fpm := fpMappings{} log.Info("Scanning files.") for i := 0; i < 1<<(seriesDirNameLen*4); i++ { dirname := filepath.Join(p.basePath, fmt.Sprintf(seriesDirNameFmt, i)) dir, err := os.Open(dirname) if os.IsNotExist(err) { continue } if err != nil { return err } for fis := []os.FileInfo{}; err != io.EOF; fis, err = dir.Readdir(1024) { if err != nil { dir.Close() return err } for _, fi := range fis { fp, ok := p.sanitizeSeries(dirname, fi, fingerprintToSeries, fpm) if ok { fpsSeen[fp] = struct{}{} } count++ if count%10000 == 0 { log.Infof("%d files scanned.", count) } } } dir.Close() } log.Infof("File scan complete. %d series found.", len(fpsSeen)) log.Info("Checking for series without series file.") for fp, s := range fingerprintToSeries { if _, seen := fpsSeen[fp]; !seen { // fp exists in fingerprintToSeries, but has no representation on disk. if s.persistWatermark == len(s.chunkDescs) { // Oops, everything including the head chunk was // already persisted, but nothing on disk. // Thus, we lost that series completely. Clean // up the remnants. delete(fingerprintToSeries, fp) if err := p.purgeArchivedMetric(fp); err != nil { // Purging the archived metric didn't work, so try // to unindex it, just in case it's in the indexes. p.unindexMetric(fp, s.metric) } log.Warnf("Lost series detected: fingerprint %v, metric %v.", fp, s.metric) continue } // If we are here, the only chunks we have are the chunks in the checkpoint. // Adjust things accordingly. if s.persistWatermark > 0 || s.chunkDescsOffset != 0 { minLostChunks := s.persistWatermark + s.chunkDescsOffset if minLostChunks <= 0 { log.Warnf( "Possible loss of chunks for fingerprint %v, metric %v.", fp, s.metric, ) } else { log.Warnf( "Lost at least %d chunks for fingerprint %v, metric %v.", minLostChunks, fp, s.metric, ) } s.chunkDescs = append( make([]*chunk.Desc, 0, len(s.chunkDescs)-s.persistWatermark), s.chunkDescs[s.persistWatermark:]..., ) chunk.NumMemDescs.Sub(float64(s.persistWatermark)) s.persistWatermark = 0 s.chunkDescsOffset = 0 } maybeAddMapping(fp, s.metric, fpm) fpsSeen[fp] = struct{}{} // Add so that fpsSeen is complete. } } log.Info("Check for series without series file complete.") if err := p.cleanUpArchiveIndexes(fingerprintToSeries, fpsSeen, fpm); err != nil { return err } if err := p.rebuildLabelIndexes(fingerprintToSeries); err != nil { return err } // Finally rewrite the mappings file if there are any mappings. if len(fpm) > 0 { if err := p.checkpointFPMappings(fpm); err != nil { return err } } p.dirtyMtx.Lock() // Only declare storage clean if it didn't become dirty during crash recovery. if !p.becameDirty { p.dirty = false } p.dirtyMtx.Unlock() log.Warn("Crash recovery complete.") return nil }
// Main manages the startup and shutdown lifecycle of the entire Prometheus server. func Main() int { if err := parse(os.Args[1:]); err != nil { log.Error(err) return 2 } if cfg.printVersion { fmt.Fprintln(os.Stdout, version.Print("prometheus")) return 0 } log.Infoln("Starting prometheus", version.Info()) log.Infoln("Build context", version.BuildContext()) var ( sampleAppender = storage.Fanout{} reloadables []Reloadable ) var localStorage local.Storage switch cfg.localStorageEngine { case "persisted": localStorage = local.NewMemorySeriesStorage(&cfg.storage) sampleAppender = storage.Fanout{localStorage} case "none": localStorage = &local.NoopStorage{} default: log.Errorf("Invalid local storage engine %q", cfg.localStorageEngine) return 1 } remoteStorage, err := remote.New(&cfg.remote) if err != nil { log.Errorf("Error initializing remote storage: %s", err) return 1 } if remoteStorage != nil { sampleAppender = append(sampleAppender, remoteStorage) reloadables = append(reloadables, remoteStorage) } reloadableRemoteStorage := remote.NewConfigurable() sampleAppender = append(sampleAppender, reloadableRemoteStorage) reloadables = append(reloadables, reloadableRemoteStorage) var ( notifier = notifier.New(&cfg.notifier) targetManager = retrieval.NewTargetManager(sampleAppender) queryEngine = promql.NewEngine(localStorage, &cfg.queryEngine) ctx, cancelCtx = context.WithCancel(context.Background()) ) ruleManager := rules.NewManager(&rules.ManagerOptions{ SampleAppender: sampleAppender, Notifier: notifier, QueryEngine: queryEngine, Context: ctx, ExternalURL: cfg.web.ExternalURL, }) cfg.web.Context = ctx cfg.web.Storage = localStorage cfg.web.QueryEngine = queryEngine cfg.web.TargetManager = targetManager cfg.web.RuleManager = ruleManager cfg.web.Version = &web.PrometheusVersion{ Version: version.Version, Revision: version.Revision, Branch: version.Branch, BuildUser: version.BuildUser, BuildDate: version.BuildDate, GoVersion: version.GoVersion, } cfg.web.Flags = map[string]string{} cfg.fs.VisitAll(func(f *flag.Flag) { cfg.web.Flags[f.Name] = f.Value.String() }) webHandler := web.New(&cfg.web) reloadables = append(reloadables, targetManager, ruleManager, webHandler, notifier) if err := reloadConfig(cfg.configFile, reloadables...); err != nil { log.Errorf("Error loading config: %s", err) return 1 } // Wait for reload or termination signals. Start the handler for SIGHUP as // early as possible, but ignore it until we are ready to handle reloading // our config. hup := make(chan os.Signal) hupReady := make(chan bool) signal.Notify(hup, syscall.SIGHUP) go func() { <-hupReady for { select { case <-hup: if err := reloadConfig(cfg.configFile, reloadables...); err != nil { log.Errorf("Error reloading config: %s", err) } case rc := <-webHandler.Reload(): if err := reloadConfig(cfg.configFile, reloadables...); err != nil { log.Errorf("Error reloading config: %s", err) rc <- err } else { rc <- nil } } } }() // Start all components. The order is NOT arbitrary. if err := localStorage.Start(); err != nil { log.Errorln("Error opening memory series storage:", err) return 1 } defer func() { if err := localStorage.Stop(); err != nil { log.Errorln("Error stopping storage:", err) } }() if remoteStorage != nil { remoteStorage.Start() defer remoteStorage.Stop() } defer reloadableRemoteStorage.Stop() // The storage has to be fully initialized before registering. if instrumentedStorage, ok := localStorage.(prometheus.Collector); ok { prometheus.MustRegister(instrumentedStorage) } prometheus.MustRegister(notifier) prometheus.MustRegister(configSuccess) prometheus.MustRegister(configSuccessTime) // The notifier is a dependency of the rule manager. It has to be // started before and torn down afterwards. go notifier.Run() defer notifier.Stop() go ruleManager.Run() defer ruleManager.Stop() go targetManager.Run() defer targetManager.Stop() // Shutting down the query engine before the rule manager will cause pending queries // to be canceled and ensures a quick shutdown of the rule manager. defer cancelCtx() go webHandler.Run() // Wait for reload or termination signals. close(hupReady) // Unblock SIGHUP handler. term := make(chan os.Signal) signal.Notify(term, os.Interrupt, syscall.SIGTERM) select { case <-term: log.Warn("Received SIGTERM, exiting gracefully...") case <-webHandler.Quit(): log.Warn("Received termination request via web service, exiting gracefully...") case err := <-webHandler.ListenError(): log.Errorln("Error starting web server, exiting gracefully:", err) } log.Info("See you next time!") return 0 }
// loadSeriesMapAndHeads loads the fingerprint to memory-series mapping and all // the chunks contained in the checkpoint (and thus not yet persisted to series // files). The method is capable of loading the checkpoint format v1 and v2. If // recoverable corruption is detected, or if the dirty flag was set from the // beginning, crash recovery is run, which might take a while. If an // unrecoverable error is encountered, it is returned. Call this method during // start-up while nothing else is running in storage land. This method is // utterly goroutine-unsafe. func (p *persistence) loadSeriesMapAndHeads() (sm *seriesMap, chunksToPersist int64, err error) { var chunkDescsTotal int64 fingerprintToSeries := make(map[model.Fingerprint]*memorySeries) sm = &seriesMap{m: fingerprintToSeries} defer func() { if sm != nil && p.dirty { log.Warn("Persistence layer appears dirty.") err = p.recoverFromCrash(fingerprintToSeries) if err != nil { sm = nil } } if err == nil { numMemChunkDescs.Add(float64(chunkDescsTotal)) } }() f, err := os.Open(p.headsFileName()) if os.IsNotExist(err) { return sm, 0, nil } if err != nil { log.Warn("Could not open heads file:", err) p.dirty = true return } defer f.Close() r := bufio.NewReaderSize(f, fileBufSize) buf := make([]byte, len(headsMagicString)) if _, err := io.ReadFull(r, buf); err != nil { log.Warn("Could not read from heads file:", err) p.dirty = true return sm, 0, nil } magic := string(buf) if magic != headsMagicString { log.Warnf( "unexpected magic string, want %q, got %q", headsMagicString, magic, ) p.dirty = true return } version, err := binary.ReadVarint(r) if (version != headsFormatVersion && version != headsFormatLegacyVersion) || err != nil { log.Warnf("unknown heads format version, want %d", headsFormatVersion) p.dirty = true return sm, 0, nil } numSeries, err := codable.DecodeUint64(r) if err != nil { log.Warn("Could not decode number of series:", err) p.dirty = true return sm, 0, nil } for ; numSeries > 0; numSeries-- { seriesFlags, err := r.ReadByte() if err != nil { log.Warn("Could not read series flags:", err) p.dirty = true return sm, chunksToPersist, nil } headChunkPersisted := seriesFlags&flagHeadChunkPersisted != 0 fp, err := codable.DecodeUint64(r) if err != nil { log.Warn("Could not decode fingerprint:", err) p.dirty = true return sm, chunksToPersist, nil } var metric codable.Metric if err := metric.UnmarshalFromReader(r); err != nil { log.Warn("Could not decode metric:", err) p.dirty = true return sm, chunksToPersist, nil } var persistWatermark int64 var modTime time.Time if version != headsFormatLegacyVersion { // persistWatermark only present in v2. persistWatermark, err = binary.ReadVarint(r) if err != nil { log.Warn("Could not decode persist watermark:", err) p.dirty = true return sm, chunksToPersist, nil } modTimeNano, err := binary.ReadVarint(r) if err != nil { log.Warn("Could not decode modification time:", err) p.dirty = true return sm, chunksToPersist, nil } if modTimeNano != -1 { modTime = time.Unix(0, modTimeNano) } } chunkDescsOffset, err := binary.ReadVarint(r) if err != nil { log.Warn("Could not decode chunk descriptor offset:", err) p.dirty = true return sm, chunksToPersist, nil } savedFirstTime, err := binary.ReadVarint(r) if err != nil { log.Warn("Could not decode saved first time:", err) p.dirty = true return sm, chunksToPersist, nil } numChunkDescs, err := binary.ReadVarint(r) if err != nil { log.Warn("Could not decode number of chunk descriptors:", err) p.dirty = true return sm, chunksToPersist, nil } chunkDescs := make([]*chunkDesc, numChunkDescs) if version == headsFormatLegacyVersion { if headChunkPersisted { persistWatermark = numChunkDescs } else { persistWatermark = numChunkDescs - 1 } } for i := int64(0); i < numChunkDescs; i++ { if i < persistWatermark { firstTime, err := binary.ReadVarint(r) if err != nil { log.Warn("Could not decode first time:", err) p.dirty = true return sm, chunksToPersist, nil } lastTime, err := binary.ReadVarint(r) if err != nil { log.Warn("Could not decode last time:", err) p.dirty = true return sm, chunksToPersist, nil } chunkDescs[i] = &chunkDesc{ chunkFirstTime: model.Time(firstTime), chunkLastTime: model.Time(lastTime), } chunkDescsTotal++ } else { // Non-persisted chunk. encoding, err := r.ReadByte() if err != nil { log.Warn("Could not decode chunk type:", err) p.dirty = true return sm, chunksToPersist, nil } chunk := newChunkForEncoding(chunkEncoding(encoding)) if err := chunk.unmarshal(r); err != nil { log.Warn("Could not decode chunk:", err) p.dirty = true return sm, chunksToPersist, nil } chunkDescs[i] = newChunkDesc(chunk) chunksToPersist++ } } fingerprintToSeries[model.Fingerprint(fp)] = &memorySeries{ metric: model.Metric(metric), chunkDescs: chunkDescs, persistWatermark: int(persistWatermark), modTime: modTime, chunkDescsOffset: int(chunkDescsOffset), savedFirstTime: model.Time(savedFirstTime), lastTime: chunkDescs[len(chunkDescs)-1].lastTime(), headChunkClosed: persistWatermark >= numChunkDescs, } } return sm, chunksToPersist, nil }