func (mg *Migrator) Start() error { if mg.LogLevel <= log.INFO { log.Info("Migrator: Starting DB migration") } logMap, err := mg.GetMigrationLog() if err != nil { return err } for _, m := range mg.migrations { _, exists := logMap[m.Id()] if exists { if mg.LogLevel <= log.DEBUG { log.Debug("Migrator: Skipping migration: %v, Already executed", m.Id()) } continue } sql := m.Sql(mg.dialect) record := MigrationLog{ MigrationId: m.Id(), Sql: sql, Timestamp: time.Now(), } if mg.LogLevel <= log.DEBUG { log.Debug("Migrator: Executing SQL: \n %v \n", sql) } if err := mg.exec(m); err != nil { log.Error(3, "Migrator: error: \n%s:\n%s", err, sql) record.Error = err.Error() mg.x.Insert(&record) return err } else { record.Success = true mg.x.Insert(&record) } } return nil }
func inspect(fn GraphiteReturner, job *m.AlertingJob, cache *lru.Cache) { key := fmt.Sprintf("%d-%d", job.CheckId, job.LastPointTs.Unix()) if found, _ := cache.ContainsOrAdd(key, true); found { //log.Debug("Job %s already done", job) return } gr, err := fn(job.OrgId) if err != nil { log.Debug("Job %s: FATAL: %q", job, err) return } evaluator, err := NewGraphiteCheckEvaluator(gr, job.Definition) if err != nil { log.Debug("Job %s: FATAL: invalid check definition: %q", job, err) return } res, err := evaluator.Eval(job.LastPointTs) if err != nil { log.Debug("Job %s: FATAL: eval failed: %q", job, err) return } log.Debug("Job %s results: %v", job, res) }
func handleMessages(c chan Message) { for m := range c { go func(msg Message) { e := RawEvent{} err := json.Unmarshal(msg.Payload, &e) if err != nil { log.Error(3, "unable to unmarshal event Message. %s", err) return } log.Debug("processing event of type %s", e.Type) //broadcast the event to listeners. for _, ch := range handlers.GetListeners(e.Type) { ch <- e } }(m) } }
func Publish(event *schema.ProbeEvent) error { if !enabled { return nil } id := time.Now().UnixNano() data, err := msg.CreateProbeEventMsg(event, id, msg.FormatProbeEventMsgp) if err != nil { log.Fatal(4, "Fatal error creating event message: %s", err) } collectorEventPublisherMsgs.Inc(1) err = globalProducer.Publish(topic, data) if err != nil { log.Fatal(4, "can't publish to nsqd: %s", err) } log.Debug("event published to NSQ %d", id) return nil }
func V1UpdateMonitor(c *middleware.Context, cmd m.UpdateMonitorCommand) { cmd.OrgId = c.OrgId if cmd.EndpointId == 0 { c.JSON(400, "EndpointId not set.") return } if cmd.MonitorTypeId == 0 { c.JSON(400, "MonitorTypeId not set.") return } if cmd.MonitorTypeId > 4 { c.JSON(400, "Invlaid MonitorTypeId.") return } if cmd.Frequency == 0 { c.JSON(400, "Frequency not set.") return } // get the endpoint that the check belongs too. endpoint, err := sqlstore.GetEndpointById(c.OrgId, cmd.EndpointId) if err != nil { handleError(c, err) return } if endpoint == nil { c.JSON(400, "endpoint does not exist.") return } route := &m.CheckRoute{} if len(cmd.CollectorTags) > 0 { route.Type = m.RouteByTags route.Config = map[string]interface{}{ "tags": cmd.CollectorTags, } } else { route.Type = m.RouteByIds route.Config = map[string]interface{}{ "ids": cmd.CollectorIds, } } checkPos := 0 found := false for pos, check := range endpoint.Checks { if check.Id == cmd.Id { checkPos = pos found = true log.Debug("updating check %d of endpoint %s", check.Id, endpoint.Slug) if check.Type != m.MonitorTypeToCheckTypeMap[cmd.MonitorTypeId-1] { c.JSON(400, "monitor Type cant be changed.") return } break } } if !found { c.JSON(400, "check does not exist in endpoint.") return } endpoint.Checks[checkPos].Frequency = cmd.Frequency endpoint.Checks[checkPos].Enabled = cmd.Enabled endpoint.Checks[checkPos].HealthSettings = cmd.HealthSettings endpoint.Checks[checkPos].Updated = time.Now() endpoint.Checks[checkPos].Route = route endpoint.Checks[checkPos].Settings = m.MonitorSettingsDTO(cmd.Settings).ToV2Setting(m.MonitorTypeToCheckTypeMap[cmd.MonitorTypeId-1]) err = sqlstore.ValidateCheckRoute(&endpoint.Checks[checkPos]) if err != nil { handleError(c, err) return } err = sqlstore.UpdateEndpoint(endpoint) if err != nil { handleError(c, err) return } c.JSON(200, "Monitor updated") }
// execute executes an alerting job and returns any errors. // errors are always prefixed with 'non-fatal' (i.e. error condition that imply retrying the job later might fix it) // or 'fatal', when we're sure the job will never process successfully. func execute(fn GraphiteReturner, job *m.AlertingJob, cache *lru.Cache) error { key := fmt.Sprintf("%d-%d", job.CheckId, job.LastPointTs.Unix()) preConsider := time.Now() if time.Now().Sub(job.GeneratedAt) > time.Minute*time.Duration(10) { executorNumTooOld.Inc(1) return nil } if found, _ := cache.ContainsOrAdd(key, true); found { //log.Debug("T %s already done", key) executorNumAlreadyDone.Inc(1) executorConsiderJobAlreadyDone.Value(time.Since(preConsider)) return nil } //log.Debug("T %s doing", key) executorNumOriginalTodo.Inc(1) executorConsiderJobOriginalTodo.Value(time.Since(preConsider)) gr, err := fn(job.OrgId) if err != nil { return fmt.Errorf("fatal: job %q: %q", job, err) } if gr, ok := gr.(*graphite.GraphiteContext); ok { gr.AssertMinSeries = job.AssertMinSeries gr.AssertStart = job.AssertStart gr.AssertStep = job.AssertStep gr.AssertSteps = job.AssertSteps } preExec := time.Now() executorJobExecDelay.Value(preExec.Sub(job.LastPointTs)) evaluator, err := NewGraphiteCheckEvaluator(gr, job.Definition) if err != nil { // expressions should be validated before they are stored in the db! return fmt.Errorf("fatal: job %q: invalid check definition %q: %q", job, job.Definition, err) } res, err := evaluator.Eval(job.LastPointTs) durationExec := time.Since(preExec) log.Debug("job results - job:%v err:%v res:%v", job, err, res) // the bosun api abstracts parsing, execution and graphite querying for us via 1 call. // we want to have some individual times if gr, ok := gr.(*graphite.GraphiteContext); ok { executorJobQueryGraphite.Value(gr.Dur) executorJobParseAndEval.Value(durationExec - gr.Dur) if gr.MissingVals > 0 { executorGraphiteMissingVals.Value(int64(gr.MissingVals)) } if gr.EmptyResp != 0 { executorGraphiteEmptyResponse.Inc(int64(gr.EmptyResp)) } if gr.IncompleteResp != 0 { executorGraphiteIncompleteResponse.Inc(int64(gr.IncompleteResp)) } if gr.BadStart != 0 { executorGraphiteBadStart.Inc(int64(gr.BadStart)) } if gr.BadStep != 0 { executorGraphiteBadStep.Inc(int64(gr.BadStep)) } if gr.BadSteps != 0 { executorGraphiteBadSteps.Inc(int64(gr.BadSteps)) } } if err != nil { executorAlertOutcomesErr.Inc(1) return fmt.Errorf("fatal: eval failed for job %q : %s", job, err.Error()) } job.NewState = res job.TimeExec = preExec // lets only update the stateCheck value every second check, which will half the load we place on the DB. if job.State != job.NewState || job.TimeExec.Sub(job.StateCheck) > (time.Second*time.Duration(job.Freq*2)) { ProcessResult(job) } //store the result in graphite. StoreResult(job) switch res { case m.EvalResultOK: executorAlertOutcomesOk.Inc(1) case m.EvalResultWarn: executorAlertOutcomesWarn.Inc(1) case m.EvalResultCrit: executorAlertOutcomesCrit.Inc(1) case m.EvalResultUnknown: executorAlertOutcomesUnkn.Inc(1) } return nil }