func Status(t miniprofiler.Timer, w http.ResponseWriter, r *http.Request) (interface{}, error) { r.ParseForm() type ExtStatus struct { AlertName string *models.IncidentState } m := make(map[string]ExtStatus) for _, k := range r.Form["ak"] { ak, err := models.ParseAlertKey(k) if err != nil { return nil, err } var state *models.IncidentState if r.FormValue("all") != "" { allInc, err := schedule.DataAccess.State().GetAllIncidents(ak) if err != nil { return nil, err } if len(allInc) == 0 { return nil, fmt.Errorf("No incidents for alert key") } state = allInc[0] allEvents := models.EventsByTime{} for _, inc := range allInc { for _, e := range inc.Events { allEvents = append(allEvents, e) } } sort.Sort(allEvents) state.Events = allEvents } else { state, err = schedule.DataAccess.State().GetLatestIncident(ak) if err != nil { return nil, err } } st := ExtStatus{IncidentState: state} if st.IncidentState == nil { return nil, fmt.Errorf("unknown alert key: %v", k) } st.AlertName = ak.Name() m[k] = st } return m, nil }
// RunHistory for a single alert key. Returns true if notifications were altered. func (s *Schedule) runHistory(r *RunHistory, ak models.AlertKey, event *models.Event, silenced SilenceTester) (checkNotify bool, err error) { event.Time = r.Start a := s.Conf.Alerts[ak.Name()] if a.UnknownsNormal && event.Status == models.StUnknown { event.Status = models.StNormal } data := s.DataAccess.State() err = data.TouchAlertKey(ak, utcNow()) if err != nil { return } si := silenced(ak) // get existing open incident if exists var incident *models.IncidentState incident, err = data.GetOpenIncident(ak) if err != nil { return } defer func() { // save unless incident is new and closed (log alert) if incident != nil && (incident.Id != 0 || incident.Open) { _, err = data.UpdateIncidentState(incident) } else { err = data.SetUnevaluated(ak, event.Unevaluated) // if nothing to save, at least store the unevaluated state } }() // If nothing is out of the ordinary we are done if event.Status <= models.StNormal && incident == nil { return } // if event is unevaluated, we are done also. if incident != nil { incident.Unevaluated = event.Unevaluated } if event.Unevaluated { return } shouldNotify := false newIncident := false if incident == nil { incident = NewIncident(ak) newIncident = true shouldNotify = true } // VICTOROPS INTEGRATION: Enables notification of incidents which have returned to normal (Sends normNotification defined in config) if event.Status <= models.StNormal && (incident.CurrentStatus == models.StWarning || incident.CurrentStatus == models.StCritical) { slog.Infof("TRIGGER_RESOLVED: from %s to %s", incident.CurrentStatus, event.Status) shouldNotify = true } // VICTOROPS INTEGRATION: Enables notification of Incidents which have returned to normal but are now back to warning or critical. i.e. enable Flapping if incident.CurrentStatus == models.StNormal && (event.Status == models.StCritical || event.Status == models.StWarning) { slog.Infof("TRIGGER_REALERT: from %s to %s", incident.CurrentStatus, event.Status) shouldNotify = true } // set state.Result according to event result if event.Status == models.StCritical { incident.Result = event.Crit } else if event.Status == models.StWarning { incident.Result = event.Warn } if event.Status > models.StNormal { incident.LastAbnormalStatus = event.Status incident.LastAbnormalTime = event.Time.UTC().Unix() } if event.Status > incident.WorstStatus { incident.WorstStatus = event.Status shouldNotify = true } if event.Status != incident.CurrentStatus { incident.Events = append(incident.Events, *event) } incident.CurrentStatus = event.Status //run a preliminary save on new incidents to get an id if newIncident { if a.Log || silencedOrIgnored(a, event, si) { //a log or silenced/ignored alert will not need to be saved } else { incident.Id, err = s.DataAccess.State().UpdateIncidentState(incident) if err != nil { return } } } //render templates and open alert key if abnormal if event.Status > models.StNormal { s.executeTemplates(incident, event, a, r) incident.Open = true if a.Log { incident.Open = false } } // On state increase, clear old notifications and notify current. // Do nothing if state did not change. notify := func(ns *conf.Notifications) { if a.Log { lastLogTime := s.lastLogTimes[ak] now := utcNow() if now.Before(lastLogTime.Add(a.MaxLogFrequency)) { return } s.lastLogTimes[ak] = now } nots := ns.Get(s.Conf, incident.AlertKey.Group()) for _, n := range nots { s.Notify(incident, n) checkNotify = true } } notifyCurrent := func() { //Auto close ignoreUnknowns for new incident. if silencedOrIgnored(a, event, si) { incident.Open = false return } // VICTOROPS INTEGRATION incident.NeedAck = false switch event.Status { case models.StCritical, models.StUnknown: notify(a.CritNotification) case models.StWarning: notify(a.WarnNotification) case models.StNormal: // VICTOROPS INTEGRATION incident.NeedAck = false notify(a.NormNotification) } } // lock while we change notifications. s.Lock("RunHistory") if shouldNotify { incident.NeedAck = false if err = s.DataAccess.Notifications().ClearNotifications(ak); err != nil { return } notifyCurrent() } // finally close an open alert with silence once it goes back to normal. if si := silenced(ak); si != nil && event.Status == models.StNormal { go func(ak models.AlertKey) { slog.Infof("auto close %s because was silenced", ak) err := s.Action("bosun", "Auto close because was silenced.", models.ActionClose, ak) if err != nil { slog.Errorln(err) } }(ak) } s.Unlock() return checkNotify, nil }
func procRule(t miniprofiler.Timer, c *conf.Conf, a *conf.Alert, now time.Time, summary bool, email string, template_group string) (*ruleResult, error) { s := &sched.Schedule{} s.DataAccess = schedule.DataAccess s.Search = schedule.Search if err := s.Init(c); err != nil { return nil, err } rh := s.NewRunHistory(now, cacheObj) if _, err := s.CheckExpr(t, rh, a, a.Warn, models.StWarning, nil); err != nil { return nil, err } if _, err := s.CheckExpr(t, rh, a, a.Crit, models.StCritical, nil); err != nil { return nil, err } keys := make(models.AlertKeys, len(rh.Events)) criticals, warnings, normals := make([]models.AlertKey, 0), make([]models.AlertKey, 0), make([]models.AlertKey, 0) i := 0 for k, v := range rh.Events { v.Time = now keys[i] = k i++ switch v.Status { case models.StNormal: normals = append(normals, k) case models.StWarning: warnings = append(warnings, k) case models.StCritical: criticals = append(criticals, k) default: return nil, fmt.Errorf("unknown state type %v", v.Status) } } sort.Sort(keys) var subject, body []byte var data interface{} warning := make([]string, 0) if !summary && len(keys) > 0 { var primaryIncident *models.IncidentState if template_group != "" { ts, err := opentsdb.ParseTags(template_group) if err != nil { return nil, err } for _, ak := range keys { if ak.Group().Subset(ts) { primaryIncident = sched.NewIncident(ak) primaryIncident.Events = []models.Event{*rh.Events[ak]} break } } } if primaryIncident == nil { primaryIncident = sched.NewIncident(keys[0]) primaryIncident.Events = []models.Event{*rh.Events[keys[0]]} if template_group != "" { warning = append(warning, fmt.Sprintf("template group %s was not a subset of any result", template_group)) } } if e := primaryIncident.Events[0]; e.Crit != nil { primaryIncident.Result = e.Crit } else if e.Warn != nil { primaryIncident.Result = e.Warn } var b_err, s_err error func() { defer func() { if err := recover(); err != nil { s := fmt.Sprint(err) warning = append(warning, s) b_err = fmt.Errorf(s) } }() if body, _, b_err = s.ExecuteBody(rh, a, primaryIncident, false); b_err != nil { warning = append(warning, b_err.Error()) } }() func() { defer func() { if err := recover(); err != nil { s := fmt.Sprint(err) warning = append(warning, s) s_err = fmt.Errorf(s) } }() subject, s_err = s.ExecuteSubject(rh, a, primaryIncident, false) if s_err != nil { warning = append(warning, s_err.Error()) } }() if s_err != nil || b_err != nil { var err error subject, body, err = s.ExecuteBadTemplate([]error{s_err, b_err}, rh, a, primaryIncident) if err != nil { subject = []byte(fmt.Sprintf("unable to create tempalate error notification: %v", err)) } } else if email != "" { m, err := mail.ParseAddress(email) if err != nil { return nil, err } n := conf.Notification{ Email: []*mail.Address{m}, } email, attachments, b_err := s.ExecuteBody(rh, a, primaryIncident, true) email_subject, s_err := s.ExecuteSubject(rh, a, primaryIncident, true) if b_err != nil { warning = append(warning, b_err.Error()) } else if s_err != nil { warning = append(warning, s_err.Error()) } else { n.DoEmail(email_subject, email, schedule.Conf, string(primaryIncident.AlertKey), attachments...) } } data = s.Data(rh, primaryIncident, a, false) } return &ruleResult{ criticals, warnings, normals, now, string(body), string(subject), data, rh.Events, warning, }, nil }