func main() { cfg, err := New() if err != nil { log.Fatalf("Failed to parse config: %s", err) return } runs := prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "elasticsearch_backup_runs_total", Help: "Number of elasticsearch backup runs", }, []string{"status"}, ) runs = prometheus.MustRegisterOrGet(runs).(*prometheus.CounterVec) duration := prometheus.NewSummaryVec( prometheus.SummaryOpts{ Name: "elasticsearch_backup_duration", Help: "Duration of elasticsearch backup runs", }, []string{"operation"}, ) duration = prometheus.MustRegisterOrGet(duration).(*prometheus.SummaryVec) go listen() interval := time.Hour * time.Duration(cfg.Interval) for { t0 := time.Now() opFunc := func() error { return backupAndRemove(cfg) } logFunc := func(err error, wait time.Duration) { log.Warnf("Failed to connect to ES: %s. Retry in %s", err, wait) } bo := backoff.NewExponentialBackOff() bo.InitialInterval = time.Second bo.MaxInterval = 60 * time.Second bo.MaxElapsedTime = 15 * time.Minute log.Infof("Attempting Snapshot ...") err := backoff.RetryNotify(opFunc, bo, logFunc) if err != nil { runs.WithLabelValues("failed").Inc() log.Warnf("Failed to delete snapshots: %s", err) continue } runs.WithLabelValues("ok").Inc() d0 := float64(time.Since(t0)) / float64(time.Microsecond) duration.WithLabelValues("backup").Observe(d0) if interval < time.Second { break } log.Infof("Waiting %s until next run", interval.String()) time.Sleep(interval) } os.Exit(0) }
// InstrumentHandlerFuncWithOpts works like InstrumentHandlerFunc but provides // more flexibility (at the cost of a more complex call syntax). // // As InstrumentHandlerFunc, this function registers four metric collectors, but it // uses the provided SummaryOpts to create them. However, the fields "Name" and // "Help" in the SummaryOpts are ignored. "Name" is replaced by // "requests_total", "request_duration_microseconds", "request_size_bytes", and // "response_size_bytes", respectively. "Help" is replaced by an appropriate // help string. The names of the variable labels of the http_requests_total // CounterVec are "method" (get, post, etc.), and "code" (HTTP status code). // // If InstrumentHandlerWithOpts is called as follows, it mimics exactly the // behavior of InstrumentHandler: // // prometheus.InstrumentHandlerWithOpts( // prometheus.SummaryOpts{ // Subsystem: "http", // ConstLabels: prometheus.Labels{"handler": handlerName}, // }, // handler, // ) // // Technical detail: "requests_total" is a CounterVec, not a SummaryVec, so it // cannot use SummaryOpts. Instead, a CounterOpts struct is created internally, // and all its fields are set to the equally named fields in the provided // SummaryOpts. func InstrumentHandlerFuncWithOpts(opts prometheus.SummaryOpts, handlerFunc gin.HandlerFunc) gin.HandlerFunc { reqCnt := prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: opts.Namespace, Subsystem: opts.Subsystem, Name: "requests_total", Help: "Total number of HTTP requests made.", ConstLabels: opts.ConstLabels, }, instLabels, ) opts.Name = "request_duration_microseconds" opts.Help = "The HTTP request latencies in microseconds." reqDur := prometheus.NewSummary(opts) opts.Name = "request_size_bytes" opts.Help = "The HTTP request sizes in bytes." reqSz := prometheus.NewSummary(opts) opts.Name = "response_size_bytes" opts.Help = "The HTTP response sizes in bytes." resSz := prometheus.NewSummary(opts) regReqCnt := prometheus.MustRegisterOrGet(reqCnt).(*prometheus.CounterVec) regReqDur := prometheus.MustRegisterOrGet(reqDur).(prometheus.Summary) regReqSz := prometheus.MustRegisterOrGet(reqSz).(prometheus.Summary) regResSz := prometheus.MustRegisterOrGet(resSz).(prometheus.Summary) return func(c *gin.Context) { now := time.Now() r := c.Request out := make(chan int) urlLen := 0 if r.URL != nil { urlLen = len(r.URL.String()) } go computeApproximateRequestSize(r, out, urlLen) handlerFunc(c) elapsed := float64(time.Since(now)) / float64(time.Microsecond) method := sanitizeMethod(r.Method) code := sanitizeCode(c.Writer.Status()) regReqCnt.WithLabelValues(method, code).Inc() regReqDur.Observe(elapsed) regResSz.Observe(float64(c.Writer.Size())) regReqSz.Observe(float64(<-out)) } }
// NewSwarm constructs a Swarm, with a Chan. func NewSwarm(ctx context.Context, listenAddrs []ma.Multiaddr, local peer.ID, peers peer.Peerstore, bwc metrics.Reporter) (*Swarm, error) { listenAddrs, err := filterAddrs(listenAddrs) if err != nil { return nil, err } wrap := func(c transport.Conn) transport.Conn { return mconn.WrapConn(bwc, c) } s := &Swarm{ swarm: ps.NewSwarm(PSTransport), local: local, peers: peers, ctx: ctx, dialT: DialTimeout, notifs: make(map[inet.Notifiee]ps.Notifiee), transports: []transport.Transport{transport.NewTCPTransport()}, bwc: bwc, fdRateLimit: make(chan struct{}, concurrentFdDials), Filters: filter.NewFilters(), dialer: conn.NewDialer(local, peers.PrivKey(local), wrap), } // configure Swarm s.proc = goprocessctx.WithContextAndTeardown(ctx, s.teardown) s.SetConnHandler(nil) // make sure to setup our own conn handler. // setup swarm metrics prom.MustRegisterOrGet(peersTotal) s.Notify((*metricsNotifiee)(s)) err = s.setupInterfaces(listenAddrs) if err != nil { return nil, err } return s, nil }
// InstrumentRouteFunc works like Prometheus' InstrumentHandlerFunc but wraps // the go-restful RouteFunction instead of a HandlerFunc func InstrumentRouteFunc(handlerName string, routeFunc restful.RouteFunction) restful.RouteFunction { opts := prometheus.SummaryOpts{ Subsystem: "http", ConstLabels: prometheus.Labels{"handler": handlerName}, } reqCnt := prometheus.NewCounterVec( prometheus.CounterOpts{ Subsystem: opts.Subsystem, Name: "requests_total", Help: "Total number of HTTP requests made.", ConstLabels: opts.ConstLabels, }, instLabels, ) opts.Name = "request_duration_microseconds" opts.Help = "The HTTP request latencies in microseconds." reqDur := prometheus.NewSummary(opts) opts.Name = "request_size_bytes" opts.Help = "The HTTP request sizes in bytes." reqSz := prometheus.NewSummary(opts) opts.Name = "response_size_bytes" opts.Help = "The HTTP response sizes in bytes." resSz := prometheus.NewSummary(opts) regReqCnt := prometheus.MustRegisterOrGet(reqCnt).(*prometheus.CounterVec) regReqDur := prometheus.MustRegisterOrGet(reqDur).(prometheus.Summary) regReqSz := prometheus.MustRegisterOrGet(reqSz).(prometheus.Summary) regResSz := prometheus.MustRegisterOrGet(resSz).(prometheus.Summary) return restful.RouteFunction(func(request *restful.Request, response *restful.Response) { now := time.Now() delegate := &responseWriterDelegator{ResponseWriter: response.ResponseWriter} out := make(chan int) urlLen := 0 if request.Request.URL != nil { urlLen = len(request.Request.URL.String()) } go computeApproximateRequestSize(request.Request, out, urlLen) _, cn := response.ResponseWriter.(http.CloseNotifier) _, fl := response.ResponseWriter.(http.Flusher) _, hj := response.ResponseWriter.(http.Hijacker) _, rf := response.ResponseWriter.(io.ReaderFrom) var rw http.ResponseWriter if cn && fl && hj && rf { rw = &fancyResponseWriterDelegator{delegate} } else { rw = delegate } response.ResponseWriter = rw routeFunc(request, response) elapsed := float64(time.Since(now)) / float64(time.Microsecond) method := strings.ToLower(request.Request.Method) code := strconv.Itoa(delegate.status) regReqCnt.WithLabelValues(method, code).Inc() regReqDur.Observe(elapsed) regResSz.Observe(float64(delegate.written)) regReqSz.Observe(float64(<-out)) }) }
func main() { logger := log.NewLogfmtLogger(os.Stderr) if os.Getenv("ENVIRONMENT") == "prod" || os.Getenv("ENVIRONMENT") == "stage" { logger = log.NewJSONLogger(os.Stdout) } logger = log.NewContext(logger).With( "ts", log.DefaultTimestampUTC, "caller", log.DefaultCaller, "name", Name, "version", Version, "build_time", BuildTime, "commit", Commit, ) logger.Log("level", "info", "msg", "Starting") cfg, err := New() if err != nil { logger.Log("level", "error", "msg", "Failed to parse config", "err", err) os.Exit(1) } runs := prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "elasticsearch_index_maint_runs_total", Help: "Number of elasticsearch index maintenance runs", }, []string{"status"}, ) runs = prometheus.MustRegisterOrGet(runs).(*prometheus.CounterVec) deleted = prometheus.NewCounter( prometheus.CounterOpts{ Name: "elasticsearch_indices_deleted_total", Help: "Size of elasticsearch indices deleted", }, ) deleted = prometheus.MustRegisterOrGet(deleted).(prometheus.Counter) duration := prometheus.NewSummaryVec( prometheus.SummaryOpts{ Name: "elasticsearch_index_maint_duration", Help: "Duration of elasticsearch index maintenance runs", }, []string{"operation"}, ) duration = prometheus.MustRegisterOrGet(duration).(*prometheus.SummaryVec) go listen(logger) time.Sleep(time.Duration(rand.Int31n(30)) * time.Second) client, err := elastic.NewClient( elastic.SetURL(cfg.URL()), elastic.SetMaxRetries(10), ) if err != nil { logger.Log("level", "error", "msg", "Failed to create new ES client", "err", err) os.Exit(1) } interval := time.Hour * time.Duration(cfg.Interval) for { t0 := time.Now() err := remove(client, cfg, logger) if err != nil { runs.WithLabelValues("failed").Inc() logger.Log("level", "error", "msg", "Failed to delete indices", "err", err) continue } runs.WithLabelValues("ok").Inc() d0 := float64(time.Since(t0)) / float64(time.Microsecond) duration.WithLabelValues("delete").Observe(d0) if interval < time.Second { break } logger.Log("level", "info", "msg", "Waiting until next run", "interval", interval.String()) time.Sleep(interval) } client.Stop() os.Exit(0) }