// error is what is used to determine to ACK or NACK func (kg *KairosGateway) process(job Job) error { msg := job.msg messagesSize.Value(int64(len(job.Msg.Msg))) log.Debug("processing metrics %s %d. timestamp: %s. format: %s. attempts: %d\n", job.qualifier, job.Msg.Id, time.Unix(0, msg.Timestamp), job.Msg.Format, msg.Attempts) err := job.Msg.DecodeMetricData() if err != nil { log.Info("%s: skipping message", err.Error()) return nil } metricsPerMessage.Value(int64(len(job.Msg.Metrics))) if !kg.dryRun { pre := time.Now() err = kg.kairos.SendMetricPointers(job.Msg.Metrics) if err != nil { metricsToKairosFail.Inc(int64(len(job.Msg.Metrics))) log.Warn("can't send to kairosdb: %s. retrying later", err) } else { metricsToKairosOK.Inc(int64(len(job.Msg.Metrics))) kairosPutDuration.Value(time.Now().Sub(pre)) } } log.Debug("finished metrics %s %d - %d metrics sent\n", job.qualifier, job.Msg.Id, len(job.Msg.Metrics)) return err }
func HandleRequest(c *middleware.Context, ds *m.DataSource) { var req sqlDataRequest req.Body, _ = ioutil.ReadAll(c.Req.Request.Body) json.Unmarshal(req.Body, &req) log.Debug("SQL request: query='%v'", req.Query) engine, err := getEngine(ds) if err != nil { c.JsonApiErr(500, "Unable to open SQL connection", err) return } defer engine.Close() session := engine.NewSession() defer session.Close() db := session.DB() result, err := getData(db, &req) if err != nil { c.JsonApiErr(500, fmt.Sprintf("Data error: %v, Query: %s", err.Error(), req.Query), err) return } c.JSON(200, result) }
func (this *thunderTask) fetch() error { this.Avatar.timestamp = time.Now() log.Debug("avatar.fetch(fetch new avatar): %s", this.Url) req, _ := http.NewRequest("GET", this.Url, nil) req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/jpeg,image/png,*/*;q=0.8") req.Header.Set("Accept-Encoding", "deflate,sdch") req.Header.Set("Accept-Language", "zh-CN,zh;q=0.8") req.Header.Set("Cache-Control", "no-cache") req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36") resp, err := client.Do(req) if err != nil { this.Avatar.notFound = true return fmt.Errorf("gravatar unreachable, %v", err) } defer resp.Body.Close() if resp.StatusCode != 200 { this.Avatar.notFound = true return fmt.Errorf("status code: %d", resp.StatusCode) } this.Avatar.data = &bytes.Buffer{} writer := bufio.NewWriter(this.Avatar.data) if _, err = io.Copy(writer, resp.Body); err != nil { return err } return nil }
func (k *KairosHandler) HandleMessage(m *nsq.Message) error { created := time.Unix(0, m.Timestamp) if time.Now().Add(-time.Duration(4) * time.Minute).After(created) { log.Debug("requeuing msg %s. timestamp: %s. attempts: %d\n ", m.ID, time.Unix(0, m.Timestamp), m.Attempts) attempts := 3 // try 3 different hosts before giving up and requeuing var err error for attempt := 1; attempt <= attempts; attempt++ { err = k.trySubmit(m.Body) if err == nil { msgsToLowPrioOK.Inc(1) return nil // we published the msg as lowprio and can mark it as processed } } msgsToLowPrioFail.Inc(1) log.Warn("failed to publish out of date message %s as low-prio. reprocessing later\n", m.ID) return err } err := k.gateway.ProcessHighPrio(m) if err != nil { msgsHandleHighPrioFail.Inc(1) } else { msgsHandleHighPrioOK.Inc(1) } return err }
func NewApiPluginProxy(ctx *middleware.Context, proxyPath string, route *plugins.AppPluginRoute, appId string) *httputil.ReverseProxy { targetUrl, _ := url.Parse(route.Url) director := func(req *http.Request) { req.URL.Scheme = targetUrl.Scheme req.URL.Host = targetUrl.Host req.Host = targetUrl.Host req.URL.Path = util.JoinUrlFragments(targetUrl.Path, proxyPath) // clear cookie headers req.Header.Del("Cookie") req.Header.Del("Set-Cookie") //Create a HTTP header with the context in it. ctxJson, err := json.Marshal(ctx.SignedInUser) if err != nil { ctx.JsonApiErr(500, "failed to marshal context to json.", err) return } req.Header.Add("Grafana-Context", string(ctxJson)) // add custom headers defined in the plugin config. for _, header := range route.Headers { var contentBuf bytes.Buffer t, err := template.New("content").Parse(header.Content) if err != nil { ctx.JsonApiErr(500, fmt.Sprintf("could not parse header content template for header %s.", header.Name), err) return } //lookup appSettings query := m.GetAppSettingByAppIdQuery{OrgId: ctx.OrgId, AppId: appId} if err := bus.Dispatch(&query); err != nil { ctx.JsonApiErr(500, "failed to get AppSettings.", err) return } type templateData struct { JsonData map[string]interface{} SecureJsonData map[string]string } data := templateData{ JsonData: query.Result.JsonData, SecureJsonData: query.Result.SecureJsonData.Decrypt(), } err = t.Execute(&contentBuf, data) if err != nil { ctx.JsonApiErr(500, fmt.Sprintf("failed to execute header content template for header %s.", header.Name), err) return } log.Debug("Adding header to proxy request. %s: %s", header.Name, contentBuf.String()) req.Header.Add(header.Name, contentBuf.String()) } } return &httputil.ReverseProxy{Director: director} }
func (a *AggMetric) Persist(c *Chunk) { log.Debug("starting to save %v", c) data := c.Series.Bytes() chunkSizeAtSave.Value(int64(len(data))) err := InsertMetric(a.Key, c.T0, data, *metricTTL) if err == nil { a.Lock() c.Saved = true a.Unlock() log.Debug("save complete. %v", c) chunkSaveOk.Inc(1) } else { log.Error(1, "failed to save metric to cassandra. %v, %s", c, err) chunkSaveFail.Inc(1) // TODO } }
func (mg *Migrator) Start() error { if mg.LogLevel <= log.INFO { log.Info("Migrator: Starting DB migration") } logMap, err := mg.GetMigrationLog() if err != nil { return err } for _, m := range mg.migrations { _, exists := logMap[m.Id()] if exists { if mg.LogLevel <= log.DEBUG { log.Debug("Migrator: Skipping migration: %v, Already executed", m.Id()) } continue } sql := m.Sql(mg.dialect) record := MigrationLog{ MigrationId: m.Id(), Sql: sql, Timestamp: time.Now(), } if mg.LogLevel <= log.DEBUG { log.Debug("Migrator: Executing SQL: \n %v \n", sql) } if err := mg.exec(m); err != nil { log.Error(3, "Migrator: error: \n%s:\n%s", err, sql) record.Error = err.Error() mg.x.Insert(&record) return err } else { record.Success = true mg.x.Insert(&record) } } return nil }
func indexMetric(m *schema.MetricDefinition) error { log.Debug("indexing %s in redis", m.Id) metricStr, err := json.Marshal(m) if err != nil { return err } if rerr := rs.SetEx(m.Id, time.Duration(300)*time.Second, string(metricStr)).Err(); err != nil { log.Error(3, "redis err. %s", rerr) } log.Debug("indexing %s in elasticsearch", m.Id) err = Indexer.Index("metric", "metric_index", m.Id, "", "", nil, m) if err != nil { log.Error(3, "failed to send payload to BulkApi indexer. %s", err) return err } return nil }
func authenticate(data *Auth_data, b []byte) error { auth_url := data.Server + "/v3/auth/tokens?nocatalog" log.Debug("Authentication request to URL: %s", auth_url) log.Debug("Authentication request body: \n%s", anonymisePasswordsTokens(data, b)) request, err := http.NewRequest("POST", auth_url, bytes.NewBuffer(b)) if err != nil { return err } resp, err := GetHttpClient().Do(request) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != 201 { return errors.New("Keystone authentication failed: " + resp.Status) } buf := new(bytes.Buffer) buf.ReadFrom(resp.Body) strBody := buf.Bytes() log.Debug("Authentication response: \n%s", strBody) bodyReader := bytes.NewBufferString(fmt.Sprintf("%s", strBody)) var decoder = json.NewDecoder(bodyReader) var auth_response auth_response_struct err = decoder.Decode(&auth_response) if err != nil { return err } data.Token = resp.Header.Get("X-Subject-Token") data.Expiration = auth_response.Token.Expires_at data.Roles = auth_response.Token.Roles return nil }
func Save(e *schema.ProbeEvent) error { if e.Id == "" { u := uuid.NewRandom() e.Id = u.String() } if e.Timestamp == 0 { // looks like this expects timestamps in milliseconds e.Timestamp = time.Now().UnixNano() / int64(time.Millisecond) } if err := e.Validate(); err != nil { return err } log.Debug("saving event to elasticsearch.") resp, err := es.Index("events", e.EventType, e.Id, nil, e) log.Debug("elasticsearch response: %v", resp) if err != nil { return err } return nil }
// don't ever call with a ts of 0, cause we use 0 to mean not initialized! func (a *AggMetric) Add(ts uint32, val float64) { a.Lock() defer a.Unlock() t0 := ts - (ts % a.ChunkSpan) currentChunk := a.getChunk(a.CurrentChunkPos) if currentChunk == nil { chunkCreate.Inc(1) // no data has been added to this metric at all. log.Debug("instantiating new circular buffer.") a.Chunks = append(a.Chunks, NewChunk(t0)) if err := a.Chunks[0].Push(ts, val); err != nil { panic(fmt.Sprintf("FATAL ERROR: this should never happen. Pushing initial value <%d,%f> to new chunk at pos 0 failed: %q", ts, val, err)) } log.Debug("created new chunk. %s: %v", a.Key, a.Chunks[0]) } else if t0 == currentChunk.T0 { if currentChunk.Saved { //TODO(awoods): allow the chunk to be re-opened. log.Error(3, "cant write to chunk that has already been saved. %s T0:%d", a.Key, currentChunk.T0) return } // last prior data was in same chunk as new point if err := a.Chunks[a.CurrentChunkPos].Push(ts, val); err != nil { log.Error(3, "failed to add metric to chunk for %s. %s", a.Key, err) return } } else if t0 < currentChunk.T0 { log.Error(3, "Point at %d has t0 %d, goes back into previous chunk. CurrentChunk t0: %d, LastTs: %d", ts, t0, currentChunk.T0, currentChunk.LastTs) return } else { currentChunk.Finish() go a.Persist(currentChunk) a.CurrentChunkPos++ if a.CurrentChunkPos >= int(a.NumChunks) { a.CurrentChunkPos = 0 } chunkCreate.Inc(1) if len(a.Chunks) < int(a.NumChunks) { log.Debug("adding new chunk to cirular Buffer. now %d chunks", a.CurrentChunkPos+1) a.Chunks = append(a.Chunks, NewChunk(t0)) } else { chunkClear.Inc(1) log.Debug("numChunks: %d currentPos: %d", len(a.Chunks), a.CurrentChunkPos) log.Debug("clearing chunk from circular buffer. %v", a.Chunks[a.CurrentChunkPos]) a.Chunks[a.CurrentChunkPos] = NewChunk(t0) } log.Debug("created new chunk. %s: %v", a.Key, a.Chunks[a.CurrentChunkPos]) if err := a.Chunks[a.CurrentChunkPos].Push(ts, val); err != nil { panic(fmt.Sprintf("FATAL ERROR: this should never happen. Pushing initial value <%d,%f> to new chunk at pos %d failed: %q", ts, val, a.CurrentChunkPos, err)) } } a.addAggregators(ts, val) }
func GetMetricDefinition(id string) (*schema.MetricDefinition, error) { // TODO: fetch from redis before checking elasticsearch if v, err := rs.Get(id).Result(); err != nil && err != redis.Nil { log.Error(3, "The redis client bombed: %s", err) return nil, err } else if err == nil { //fmt.Printf("json for %s found in redis\n", id) def, err := schema.MetricDefinitionFromJSON([]byte(v)) if err != nil { return nil, err } return def, nil } log.Debug("%s not in redis. checking elasticsearch.", id) res, err := es.Get("metric", "metric_index", id, nil) if err != nil { if err == elastigo.RecordNotFound { log.Debug("%s not in ES. %s", id, err) } else { log.Error(3, "elasticsearch query failed. %s", err) } return nil, err } //fmt.Printf("elasticsearch query returned %q\n", res.Source) //fmt.Printf("placing %s into redis\n", id) if rerr := rs.SetEx(id, time.Duration(300)*time.Second, string(*res.Source)).Err(); err != nil { log.Error(3, "redis err. %s", rerr) } def, err := schema.MetricDefinitionFromJSON(*res.Source) if err != nil { return nil, err } return def, nil }
func inspect(fn GraphiteReturner, job *Job, cache *lru.Cache) { key := fmt.Sprintf("%d-%d", job.MonitorId, job.LastPointTs.Unix()) if found, _ := cache.ContainsOrAdd(key, true); found { log.Debug("Job %s already done", job) return } gr, err := fn(job.OrgId) if err != nil { log.Debug("Job %s: FATAL: %q", job, err) return } evaluator, err := NewGraphiteCheckEvaluator(gr, job.Definition) if err != nil { log.Debug("Job %s: FATAL: invalid check definition: %q", job, err) return } res, err := evaluator.Eval(job.LastPointTs) if err != nil { log.Debug("Job %s: FATAL: eval failed: %q", job, err) return } log.Debug("Job %s results: %v", job, res) }
func LoadOrSetOffset() int { query := m.GetAlertSchedulerValueQuery{ Id: "offset", } err := bus.Dispatch(&query) if err != nil { panic(fmt.Sprintf("failure querying for current offset: %q", err)) } if query.Result == "" { log.Debug("initializing offset to default value of 30 seconds.") setOffset(30) return 30 } i, err := strconv.Atoi(query.Result) if err != nil { panic(fmt.Sprintf("failure reading in offset: %q. input value was: %q", err, query.Result)) } return i }
func (k *ESHandler) HandleMessage(m *nsq.Message) error { log.Debug("received message.") format := "unknown" if m.Body[0] == '\x00' { format = "msgFormatJson" } var id int64 buf := bytes.NewReader(m.Body[1:9]) binary.Read(buf, binary.BigEndian, &id) produced := time.Unix(0, id) msgsAge.Value(time.Now().Sub(produced).Nanoseconds() / 1000) messagesSize.Value(int64(len(m.Body))) event := new(schema.ProbeEvent) if err := json.Unmarshal(m.Body[9:], &event); err != nil { log.Error(3, "ERROR: failure to unmarshal message body via format %s: %s. skipping message", format, err) return nil } done := make(chan error, 1) go func() { pre := time.Now() if err := eventdef.Save(event); err != nil { log.Error(3, "ERROR: couldn't process %s: %s\n", event.Id, err) eventsToEsFail.Inc(1) done <- err return } esPutDuration.Value(time.Now().Sub(pre)) eventsToEsOK.Inc(1) done <- nil }() if err := <-done; err != nil { msgsHandleFail.Inc(1) return err } msgsHandleOK.Inc(1) return nil }
func (u *S3Uploader) Upload(imageDiskPath string) (string, error) { s3util.DefaultConfig.AccessKey = u.accessKey s3util.DefaultConfig.SecretKey = u.secretKey header := make(http.Header) header.Add("x-amz-acl", "public-read") header.Add("Content-Type", "image/png") var imageUrl *url.URL var err error if imageUrl, err = url.Parse(u.bucket); err != nil { return "", err } // add image to url imageUrl.Path = path.Join(imageUrl.Path, util.GetRandomString(20)+".png") imageUrlString := imageUrl.String() log.Debug("Uploading image to s3", "url", imageUrlString) writer, err := s3util.Create(imageUrlString, header, nil) if err != nil { return "", err } defer writer.Close() imgData, err := ioutil.ReadFile(imageDiskPath) if err != nil { return "", err } _, err = writer.Write(imgData) if err != nil { return "", err } return imageUrlString, nil }
func GetProjects(data *Projects_data) error { log.Info("Authentication request to URL: %s", data.Server+"/v3/auth/projects") request, err := http.NewRequest("GET", data.Server+"/v3/auth/projects", nil) if err != nil { return err } request.Header.Add("X-Auth-Token", data.Token) resp, err := GetHttpClient().Do(request) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != 200 { return errors.New("Keystone project-list failed: " + resp.Status) } buf := new(bytes.Buffer) buf.ReadFrom(resp.Body) strBody := buf.Bytes() log.Debug("Projects response: \n%s", strBody) bodyReader := bytes.NewBufferString(fmt.Sprintf("%s", strBody)) var decoder = json.NewDecoder(bodyReader) var project_response project_response_struct err = decoder.Decode(&project_response) if err != nil { return err } for _, project := range project_response.Projects { if project.Enabled { data.Projects = append(data.Projects, project.Name) } } return nil }
func (u *S3Uploader) Upload(imageDiskPath string) (string, error) { sess := session.New() creds := credentials.NewChainCredentials( []credentials.Provider{ &credentials.StaticProvider{Value: credentials.Value{ AccessKeyID: u.accessKey, SecretAccessKey: u.secretKey, }}, &credentials.EnvProvider{}, &ec2rolecreds.EC2RoleProvider{Client: ec2metadata.New(sess), ExpiryWindow: 5 * time.Minute}, }) cfg := &aws.Config{ Region: aws.String(u.region), Credentials: creds, } key := util.GetRandomString(20) + ".png" log.Debug("Uploading image to s3", "bucket = ", u.bucket, ", key = ", key) file, err := os.Open(imageDiskPath) if err != nil { return "", err } svc := s3.New(session.New(cfg), cfg) params := &s3.PutObjectInput{ Bucket: aws.String(u.bucket), Key: aws.String(key), ACL: aws.String(u.acl), Body: file, ContentType: aws.String("image/png"), } _, err = svc.PutObject(params) if err != nil { return "", err } return "https://" + u.bucket + ".s3.amazonaws.com/" + key, nil }
func InitAppPluginRoutes(r *macaron.Macaron) { for _, plugin := range plugins.Apps { for _, route := range plugin.Routes { url := util.JoinUrlFragments("/api/plugin-proxy/"+plugin.Id, route.Path) handlers := make([]macaron.Handler, 0) handlers = append(handlers, middleware.Auth(&middleware.AuthOptions{ ReqSignedIn: true, ReqGrafanaAdmin: route.ReqGrafanaAdmin, })) if route.ReqRole != "" { if route.ReqRole == m.ROLE_ADMIN { handlers = append(handlers, middleware.RoleAuth(m.ROLE_ADMIN)) } else if route.ReqRole == m.ROLE_EDITOR { handlers = append(handlers, middleware.RoleAuth(m.ROLE_EDITOR, m.ROLE_ADMIN)) } } handlers = append(handlers, AppPluginRoute(route, plugin.Id)) r.Route(url, route.Method, handlers...) log.Debug("Plugins: Adding proxy route %s", url) } } }
func QuotaReached(c *Context, target string) (bool, error) { if !setting.Quota.Enabled { return false, nil } // get the list of scopes that this target is valid for. Org, User, Global scopes, err := m.GetQuotaScopes(target) if err != nil { return false, err } log.Debug(fmt.Sprintf("checking quota for %s in scopes %v", target, scopes)) for _, scope := range scopes { log.Debug(fmt.Sprintf("checking scope %s", scope.Name)) switch scope.Name { case "global": if scope.DefaultLimit < 0 { continue } if scope.DefaultLimit == 0 { return true, nil } if target == "session" { usedSessions := getSessionCount() if int64(usedSessions) > scope.DefaultLimit { log.Debug(fmt.Sprintf("%d sessions active, limit is %d", usedSessions, scope.DefaultLimit)) return true, nil } continue } query := m.GetGlobalQuotaByTargetQuery{Target: scope.Target} if err := bus.Dispatch(&query); err != nil { return true, err } if query.Result.Used >= scope.DefaultLimit { return true, nil } case "org": if !c.IsSignedIn { continue } query := m.GetOrgQuotaByTargetQuery{OrgId: c.OrgId, Target: scope.Target, Default: scope.DefaultLimit} if err := bus.Dispatch(&query); err != nil { return true, err } if query.Result.Limit < 0 { continue } if query.Result.Limit == 0 { return true, nil } if query.Result.Used >= query.Result.Limit { return true, nil } case "user": if !c.IsSignedIn || c.UserId == 0 { continue } query := m.GetUserQuotaByTargetQuery{UserId: c.UserId, Target: scope.Target, Default: scope.DefaultLimit} if err := bus.Dispatch(&query); err != nil { return true, err } if query.Result.Limit < 0 { continue } if query.Result.Limit == 0 { return true, nil } if query.Result.Used >= query.Result.Limit { return true, nil } } } return false, nil }
// execute executes an alerting job and returns any errors. // errors are always prefixed with 'non-fatal' (i.e. error condition that imply retrying the job later might fix it) // or 'fatal', when we're sure the job will never process successfully. func execute(fn GraphiteReturner, job *Job, cache *lru.Cache) error { key := fmt.Sprintf("%d-%d", job.MonitorId, job.LastPointTs.Unix()) preConsider := time.Now() if found, _ := cache.ContainsOrAdd(key, true); found { log.Debug("T %s already done", key) executorNumAlreadyDone.Inc(1) executorConsiderJobAlreadyDone.Value(time.Since(preConsider)) return nil } log.Debug("T %s doing", key) executorNumOriginalTodo.Inc(1) executorConsiderJobOriginalTodo.Value(time.Since(preConsider)) gr, err := fn(job.OrgId) if err != nil { return fmt.Errorf("fatal: job %q: %q", job, err) } if gr, ok := gr.(*graphite.GraphiteContext); ok { gr.AssertMinSeries = job.AssertMinSeries gr.AssertStart = job.AssertStart gr.AssertStep = job.AssertStep gr.AssertSteps = job.AssertSteps } preExec := time.Now() executorJobExecDelay.Value(preExec.Sub(job.LastPointTs)) evaluator, err := NewGraphiteCheckEvaluator(gr, job.Definition) if err != nil { // expressions should be validated before they are stored in the db! return fmt.Errorf("fatal: job %q: invalid check definition %q: %q", job, job.Definition, err) } res, err := evaluator.Eval(job.LastPointTs) log.Debug("job results - job:%v err:%v res:%v", job, err, res) if err != nil { return fmt.Errorf("Eval failed for job %q : %s", job, err.Error()) } durationExec := time.Since(preExec) updateMonitorStateCmd := m.UpdateMonitorStateCommand{ Id: job.MonitorId, State: res, Updated: job.LastPointTs, Checked: preExec, } if err := bus.Dispatch(&updateMonitorStateCmd); err != nil { //check if we failed due to deadlock. if err.Error() == "Error 1213: Deadlock found when trying to get lock; try restarting transaction" { err = bus.Dispatch(&updateMonitorStateCmd) } } if err != nil { return fmt.Errorf("non-fatal: failed to update monitor state: %q", err) } if gr, ok := gr.(*graphite.GraphiteContext); ok { requests := "" for _, trace := range gr.Traces { r := trace.Request requests += fmt.Sprintf("\ntargets: %s\nfrom:%s\nto:%s\nresponse:%s\n", r.Targets, r.Start, r.End, trace.Response) } log.Debug("Job %s state_change=%t request traces: %s", job, updateMonitorStateCmd.Affected > 0, requests) } if updateMonitorStateCmd.Affected > 0 { //emit a state change event. if job.Notifications.Enabled { emails := strings.Split(job.Notifications.Addresses, ",") if len(emails) < 1 { log.Debug("no email addresses provided. OrgId: %d monitorId: %d", job.OrgId, job.MonitorId) } else { for _, email := range emails { log.Info("sending email. addr=%s, orgId=%d, monitorId=%d, endpointSlug=%s, state=%s", email, job.OrgId, job.MonitorId, job.EndpointSlug, res.String()) } sendCmd := m.SendEmailCommand{ To: emails, Template: "alerting_notification.html", Data: map[string]interface{}{ "EndpointId": job.EndpointId, "EndpointName": job.EndpointName, "EndpointSlug": job.EndpointSlug, "Settings": job.Settings, "CheckType": job.MonitorTypeName, "State": res.String(), "TimeLastData": job.LastPointTs, // timestamp of the most recent data used "TimeExec": preExec, // when we executed the alerting rule and made the determination }, } if err := bus.Dispatch(&sendCmd); err != nil { log.Info("failed to send email to %s. OrgId: %d monitorId: %d", emails, job.OrgId, job.MonitorId, err) } } } } //store the result in graphite. job.StoreResult(res) // the bosun api abstracts parsing, execution and graphite querying for us via 1 call. // we want to have some individual times if gr, ok := gr.(*graphite.GraphiteContext); ok { executorJobQueryGraphite.Value(gr.Dur) executorJobParseAndEval.Value(durationExec - gr.Dur) if gr.MissingVals > 0 { executorGraphiteMissingVals.Value(int64(gr.MissingVals)) } if gr.EmptyResp != 0 { executorGraphiteEmptyResponse.Inc(int64(gr.EmptyResp)) } if gr.IncompleteResp != 0 { executorGraphiteIncompleteResponse.Inc(int64(gr.IncompleteResp)) } if gr.BadStart != 0 { executorGraphiteBadStart.Inc(int64(gr.BadStart)) } if gr.BadStep != 0 { executorGraphiteBadStep.Inc(int64(gr.BadStep)) } if gr.BadSteps != 0 { executorGraphiteBadSteps.Inc(int64(gr.BadSteps)) } } switch res { case m.EvalResultOK: executorAlertOutcomesOk.Inc(1) case m.EvalResultWarn: executorAlertOutcomesWarn.Inc(1) case m.EvalResultCrit: executorAlertOutcomesCrit.Inc(1) case m.EvalResultUnknown: executorAlertOutcomesUnkn.Inc(1) } return nil }
func Executor(fn GraphiteReturner, jobQueue <-chan Job) { cache, err := lru.New(10000) // TODO configurable if err != nil { panic(fmt.Sprintf("Can't create LRU: %s", err.Error())) } // create series explicitly otherwise the grafana-influxdb graphs don't work if the series doesn't exist Stat.IncrementValue("alert-executor.alert-outcomes.ok", 0) Stat.IncrementValue("alert-executor.alert-outcomes.critical", 0) Stat.IncrementValue("alert-executor.alert-outcomes.unknown", 0) Stat.IncrementValue("alert-executor.graphite-emptyresponse", 0) Stat.TimeDuration("alert-executor.consider-job.already-done", 0) Stat.TimeDuration("alert-executor.consider-job.original-todo", 0) for job := range jobQueue { Stat.Gauge("alert-jobqueue-internal.items", int64(len(jobQueue))) Stat.Gauge("alert-jobqueue-internal.size", int64(jobQueueSize)) key := fmt.Sprintf("%s-%d", job.Key, job.LastPointTs.Unix()) preConsider := time.Now() if _, ok := cache.Get(key); ok { log.Debug("T %s alredy done", key) Stat.TimeDuration("alert-executor.consider-job.already-done", time.Since(preConsider)) continue } log.Debug("T %s doing", key) Stat.TimeDuration("alert-executor.consider-job.original-todo", time.Since(preConsider)) gr := fn(job.OrgId) preExec := time.Now() evaluator, err := NewGraphiteCheckEvaluator(gr, job.Definition) if err != nil { // expressions should be validated before they are stored in the db // if they fail now it's a critical error panic(fmt.Sprintf("received invalid check definition '%s': %s", job.Definition, err)) } res, err := evaluator.Eval(job.LastPointTs) log.Debug("job results - job:%v err:%v res:%v", job, err, res) durationExec := time.Since(preExec) if job.State != res { //monitor state has changed. updateMonitorStateCmd := m.UpdateMonitorStateCommand{ Id: job.MonitorId, State: res, Updated: job.LastPointTs, } if err := bus.Dispatch(&updateMonitorStateCmd); err != nil { panic(fmt.Sprintf("failed to update monitor state. %s", err.Error())) } //emit a state change event. if job.Notifications.Enabled { emails := strings.Split(job.Notifications.Addresses, ",") if len(emails) < 1 { log.Debug("no email addresses provided. OrgId: %d monitorId: %d", job.OrgId, job.MonitorId) continue } sendCmd := m.SendEmailCommand{ To: emails, Template: "alerting_notification.html", Data: map[string]interface{}{ "Endpoint": job.EndpointSlug, "CheckType": job.MonitorTypeName, "State": res.String(), }, } if err := bus.Dispatch(&sendCmd); err != nil { log.Info("failed to send email to %s. OrgId: %d monitorId: %d", emails, job.OrgId, job.MonitorId, err) } } } //store the result in graphite. job.StoreResult(res) // the bosun api abstracts parsing, execution and graphite querying for us via 1 call. // we want to have some individual times if gr, ok := gr.(*GraphiteContext); ok { Stat.TimeDuration("alert-executor.job_query_graphite", gr.dur) Stat.TimeDuration("alert-executor.job_parse-and-evaluate", durationExec-gr.dur) Stat.Timing("alert-executor.graphite-missingVals", int64(gr.missingVals)) if gr.emptyResp { Stat.Increment("alert-executor.graphite-emptyresponse") } } Stat.Increment(strings.ToLower(fmt.Sprintf("alert-executor.alert-outcomes.%s", res))) cache.Add(key, true) } }
// note: we don't normalize/quantize/fill-unknowns // we just serve what we know func Get(w http.ResponseWriter, req *http.Request) { pre := time.Now() values := req.URL.Query() keys, ok := values["target"] if !ok { http.Error(w, "missing render arg", http.StatusBadRequest) return } now := time.Now() fromUnix := uint32(now.Add(-time.Duration(24) * time.Hour).Unix()) toUnix := uint32(now.Add(time.Duration(1) * time.Second).Unix()) from := values.Get("from") if from != "" { fromUnixInt, err := strconv.Atoi(from) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } fromUnix = uint32(fromUnixInt) } to := values.Get("to") if to != "" { toUnixInt, err := strconv.Atoi(to) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } toUnix = uint32(toUnixInt) } if fromUnix >= toUnix { http.Error(w, "to must be higher than from", http.StatusBadRequest) return } out := make([]Series, len(keys)) for i, key := range keys { iters := make([]*tsz.Iter, 0) var memIters []*tsz.Iter oldest := toUnix if metric, ok := metrics.Get(key); ok { oldest, memIters = metric.Get(fromUnix, toUnix) } else { memIters = make([]*tsz.Iter, 0) } if oldest > fromUnix { reqSpanBoth.Value(int64(toUnix - fromUnix)) log.Debug("data load from cassandra: %s - %s from mem: %s - %s", TS(fromUnix), TS(oldest), TS(oldest), TS(toUnix)) storeIters, err := searchCassandra(key, fromUnix, oldest) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } //for _, i := range storeIters { // fmt.Println("c>", TS(i.T0())) // } iters = append(iters, storeIters...) } else { reqSpanMem.Value(int64(toUnix - fromUnix)) log.Debug("data load from mem: %s-%s, oldest (%d)", TS(fromUnix), TS(toUnix), oldest) } iters = append(iters, memIters...) // for _, i := range memIters { //fmt.Println("m>", TS(i.T0())) // } points := make([]Point, 0) for _, iter := range iters { for iter.Next() { ts, val := iter.Values() if ts >= fromUnix && ts < toUnix { points = append(points, Point{val, ts}) } } } out[i] = Series{ Target: key, Datapoints: points, } } js, err := json.Marshal(out) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") reqHandleDuration.Value(time.Now().Sub(pre)) w.Write(js) }
// Get all data between the requested time ranges. From is inclusive, to is exclusive. from <= x < to // more data then what's requested may be included // also returns oldest point we have, so that if your query needs data before it, the caller knows when to query cassandra func (a *AggMetric) Get(from, to uint32) (uint32, []*tsz.Iter) { log.Debug("GET: %s from: %d to:%d", a.Key, from, to) if from >= to { panic("invalid request. to must > from") } a.RLock() defer a.RUnlock() newestChunk := a.getChunk(a.CurrentChunkPos) if newestChunk == nil { // we dont have any data yet. log.Debug("no data for requested range.") return math.MaxUint32, make([]*tsz.Iter, 0) } if from >= newestChunk.T0+a.ChunkSpan { // we have no data in the requested range. log.Debug("no data for requested range.") return math.MaxUint32, make([]*tsz.Iter, 0) } // get the oldest chunk we have. // eg if we have 5 chunks, N is the current chunk and n-4 is the oldest chunk. // ----------------------------- // | n-4 | n-3 | n-2 | n-1 | n | CurrentChunkPos = 4 // ----------------------------- // ----------------------------- // | n | n-4 | n-3 | n-2 | n-1 | CurrentChunkPos = 0 // ----------------------------- // ----------------------------- // | n-2 | n-1 | n | n-4 | n-3 | CurrentChunkPos = 2 // ----------------------------- oldestPos := a.CurrentChunkPos + 1 if oldestPos >= len(a.Chunks) { oldestPos = 0 } oldestChunk := a.getChunk(oldestPos) if oldestChunk == nil { log.Error(3, "unexpected nil chunk.") return math.MaxUint32, make([]*tsz.Iter, 0) } if to <= oldestChunk.T0 { // the requested time range ends before any data we have. log.Debug("no data for requested range") return oldestChunk.T0, make([]*tsz.Iter, 0) } // Find the oldest Chunk that the "from" ts falls in. If from extends before the oldest // chunk, then we just use the oldest chunk. for from >= oldestChunk.T0+a.ChunkSpan { oldestPos++ if oldestPos >= len(a.Chunks) { oldestPos = 0 } oldestChunk = a.getChunk(oldestPos) if oldestChunk == nil { log.Error(3, "unexpected nil chunk.") return to, make([]*tsz.Iter, 0) } } // find the newest Chunk that "to" falls in. If "to" extends to after the newest data // then just return the newest chunk. // some examples to clarify this more. assume newestChunk.T0 is at 120, then // for a to of 121 -> data upto (incl) 120 -> stay at this chunk, it has a point we need // for a to of 120 -> data upto (incl) 119 -> use older chunk // for a to of 119 -> data upto (incl) 118 -> use older chunk newestPos := a.CurrentChunkPos for to <= newestChunk.T0 { newestPos-- if newestPos < 0 { newestPos += len(a.Chunks) } newestChunk = a.getChunk(newestPos) if newestChunk == nil { log.Error(3, "unexpected nil chunk.") return to, make([]*tsz.Iter, 0) } } // now just start at oldestPos and move through the Chunks circular Buffer to newestPos iters := make([]*tsz.Iter, 0, a.NumChunks) for oldestPos != newestPos { iters = append(iters, a.getChunk(oldestPos).Iter()) oldestPos++ if oldestPos >= int(a.NumChunks) { oldestPos = 0 } } // add the last chunk iters = append(iters, a.getChunk(oldestPos).Iter()) return oldestChunk.T0, iters }
// this function must only be called while holding the lock func (a *AggMetric) addAggregators(ts uint32, val float64) { for _, agg := range a.aggregators { log.Debug("pushing value to aggregator") agg.Add(ts, val) } }
func (a *keystoneAuther) syncOrgRoles(username, password string, user *m.User) error { log.Trace("syncOrgRoles()") err := a.getProjectList(username, password) if err != nil { return err } log.Debug("OpenStack project_list[roles]: %v", a.project_list) orgsQuery := m.GetUserOrgListQuery{UserId: user.Id} if err := bus.Dispatch(&orgsQuery); err != nil { return err } handledOrgIds := map[int64]bool{} // update or remove org roles for _, org := range orgsQuery.Result { handledOrgIds[org.OrgId] = true log.Info(fmt.Sprintf("Checking Grafana org %v for roles", org.Name)) if user_roles, ok := a.project_list[org.Name]; ok { // Update roles if user belongs to org role_name := a.getRole(user_roles) if role_name != "" { if err := a.updateGrafanaOrgUser(user.Id, org.OrgId, role_name); err != nil { return err } } else { // remove user if no permissions if err := a.removeGrafanaOrgUser(user.Id, org.OrgId); err != nil { return err } } } else { // remove role if no mappings match if err := a.removeGrafanaOrgUser(user.Id, org.OrgId); err != nil { return err } } } // add missing org roles for project, _ := range a.project_list { if grafanaOrg, err := a.getGrafanaOrgFor(project); err != nil { return err } else { if _, exists := handledOrgIds[grafanaOrg.Id]; exists { continue } // add role role_name := a.getRole(a.project_list[project]) if role_name != "" { cmd := m.AddOrgUserCommand{UserId: user.Id, Role: role_name, OrgId: grafanaOrg.Id} if err := bus.Dispatch(&cmd); err != nil { return err } } // mark this tenant has handled so we do not process it again handledOrgIds[grafanaOrg.Id] = true } } // set or unset admin permissions isAdmin := false role_map := make(map[string]bool) for _, role := range a.admin_roles { role_map[role] = true } for project, _ := range a.project_list { if isAdmin == true { break } project_roles := a.project_list[project] for _, role := range project_roles { if _, ok := role_map[role]; ok { isAdmin = true break } } } if isAdmin != user.IsAdmin { if err := a.updateGrafanaUserPermissions(user.Id, isAdmin); err != nil { return err } } orgsQuery = m.GetUserOrgListQuery{UserId: user.Id} if err := bus.Dispatch(&orgsQuery); err != nil { return err } if len(orgsQuery.Result) == 0 { return errors.New("Keystone authentication failed: No grafana permissions") } match := false var orgid int64 for _, org := range orgsQuery.Result { orgid = org.OrgId if user.OrgId == orgid { match = true break } } // set org if none is set (for new users), or if user no longer has permissions for the current org if (user.OrgId == 1) || (match == false) { cmd := m.SetUsingOrgCommand{UserId: user.Id, OrgId: orgid} if err := bus.Dispatch(&cmd); err != nil { return err } } return nil }