// RunHistory for a single alert key. Returns true if notifications were altered. func (s *Schedule) runHistory(r *RunHistory, ak models.AlertKey, event *models.Event, silenced SilenceTester) (checkNotify bool, err error) { event.Time = r.Start a := s.Conf.Alerts[ak.Name()] if a.UnknownsNormal && event.Status == models.StUnknown { event.Status = models.StNormal } data := s.DataAccess.State() err = data.TouchAlertKey(ak, utcNow()) if err != nil { return } si := silenced(ak) // get existing open incident if exists var incident *models.IncidentState incident, err = data.GetOpenIncident(ak) if err != nil { return } defer func() { // save unless incident is new and closed (log alert) if incident != nil && (incident.Id != 0 || incident.Open) { _, err = data.UpdateIncidentState(incident) } else { err = data.SetUnevaluated(ak, event.Unevaluated) // if nothing to save, at least store the unevaluated state } }() // If nothing is out of the ordinary we are done if event.Status <= models.StNormal && incident == nil { return } // if event is unevaluated, we are done also. if incident != nil { incident.Unevaluated = event.Unevaluated } if event.Unevaluated { return } shouldNotify := false newIncident := false if incident == nil { incident = NewIncident(ak) newIncident = true shouldNotify = true } // VICTOROPS INTEGRATION: Enables notification of incidents which have returned to normal (Sends normNotification defined in config) if event.Status <= models.StNormal && (incident.CurrentStatus == models.StWarning || incident.CurrentStatus == models.StCritical) { slog.Infof("TRIGGER_RESOLVED: from %s to %s", incident.CurrentStatus, event.Status) shouldNotify = true } // VICTOROPS INTEGRATION: Enables notification of Incidents which have returned to normal but are now back to warning or critical. i.e. enable Flapping if incident.CurrentStatus == models.StNormal && (event.Status == models.StCritical || event.Status == models.StWarning) { slog.Infof("TRIGGER_REALERT: from %s to %s", incident.CurrentStatus, event.Status) shouldNotify = true } // set state.Result according to event result if event.Status == models.StCritical { incident.Result = event.Crit } else if event.Status == models.StWarning { incident.Result = event.Warn } if event.Status > models.StNormal { incident.LastAbnormalStatus = event.Status incident.LastAbnormalTime = event.Time.UTC().Unix() } if event.Status > incident.WorstStatus { incident.WorstStatus = event.Status shouldNotify = true } if event.Status != incident.CurrentStatus { incident.Events = append(incident.Events, *event) } incident.CurrentStatus = event.Status //run a preliminary save on new incidents to get an id if newIncident { if a.Log || silencedOrIgnored(a, event, si) { //a log or silenced/ignored alert will not need to be saved } else { incident.Id, err = s.DataAccess.State().UpdateIncidentState(incident) if err != nil { return } } } //render templates and open alert key if abnormal if event.Status > models.StNormal { s.executeTemplates(incident, event, a, r) incident.Open = true if a.Log { incident.Open = false } } // On state increase, clear old notifications and notify current. // Do nothing if state did not change. notify := func(ns *conf.Notifications) { if a.Log { lastLogTime := s.lastLogTimes[ak] now := utcNow() if now.Before(lastLogTime.Add(a.MaxLogFrequency)) { return } s.lastLogTimes[ak] = now } nots := ns.Get(s.Conf, incident.AlertKey.Group()) for _, n := range nots { s.Notify(incident, n) checkNotify = true } } notifyCurrent := func() { //Auto close ignoreUnknowns for new incident. if silencedOrIgnored(a, event, si) { incident.Open = false return } // VICTOROPS INTEGRATION incident.NeedAck = false switch event.Status { case models.StCritical, models.StUnknown: notify(a.CritNotification) case models.StWarning: notify(a.WarnNotification) case models.StNormal: // VICTOROPS INTEGRATION incident.NeedAck = false notify(a.NormNotification) } } // lock while we change notifications. s.Lock("RunHistory") if shouldNotify { incident.NeedAck = false if err = s.DataAccess.Notifications().ClearNotifications(ak); err != nil { return } notifyCurrent() } // finally close an open alert with silence once it goes back to normal. if si := silenced(ak); si != nil && event.Status == models.StNormal { go func(ak models.AlertKey) { slog.Infof("auto close %s because was silenced", ak) err := s.Action("bosun", "Auto close because was silenced.", models.ActionClose, ak) if err != nil { slog.Errorln(err) } }(ak) } s.Unlock() return checkNotify, nil }
// RunHistory for a single alert key. Returns true if notifications were altered. func (s *Schedule) runHistory(r *RunHistory, ak models.AlertKey, event *models.Event, silenced SilenceTester) (checkNotify bool, err error) { event.Time = r.Start data := s.DataAccess.State() err = data.TouchAlertKey(ak, time.Now()) if err != nil { return } // get existing open incident if exists incident, err := data.GetOpenIncident(ak) if err != nil { return } defer func() { // save unless incident is new and closed (log alert) if incident != nil && (incident.Id != 0 || incident.Open) { err = data.UpdateIncidentState(incident) } else { err = data.SetUnevaluated(ak, event.Unevaluated) // if nothing to save, at least store the unevaluated state } }() // If nothing is out of the ordinary we are done if event.Status <= models.StNormal && incident == nil { return } // if event is unevaluated, we are done also. if incident != nil { incident.Unevaluated = event.Unevaluated } if event.Unevaluated { return } shouldNotify := false if incident == nil { incident = NewIncident(ak) shouldNotify = true } // set state.Result according to event result if event.Status == models.StCritical { incident.Result = event.Crit } else if event.Status == models.StWarning { incident.Result = event.Warn } if event.Status > models.StNormal { incident.LastAbnormalStatus = event.Status incident.LastAbnormalTime = event.Time.UTC().Unix() } if event.Status > incident.WorstStatus { incident.WorstStatus = event.Status shouldNotify = true } if event.Status != incident.CurrentStatus { incident.Events = append(incident.Events, *event) } incident.CurrentStatus = event.Status a := s.Conf.Alerts[ak.Name()] //render templates and open alert key if abnormal if event.Status > models.StNormal { s.executeTemplates(incident, event, a, r) incident.Open = true if a.Log { incident.Open = false } } // On state increase, clear old notifications and notify current. // Do nothing if state did not change. notify := func(ns *conf.Notifications) { if a.Log { lastLogTime := s.lastLogTimes[ak] now := time.Now() if now.Before(lastLogTime.Add(a.MaxLogFrequency)) { return } s.lastLogTimes[ak] = now } nots := ns.Get(s.Conf, incident.AlertKey.Group()) for _, n := range nots { s.Notify(incident, n) checkNotify = true } } notifyCurrent := func() { si := silenced(ak) //Auto close ignoreUnknowns for new incident. if a.IgnoreUnknown && event.Status == models.StUnknown { incident.Open = false return } else if si != nil && si.Forget && event.Status == models.StUnknown { incident.Open = false return } incident.NeedAck = true switch event.Status { case models.StCritical, models.StUnknown: notify(a.CritNotification) case models.StWarning: notify(a.WarnNotification) } } clearOld := func() { incident.NeedAck = false delete(s.Notifications, ak) } // lock while we change notifications. s.Lock("RunHistory") if shouldNotify { clearOld() notifyCurrent() } // finally close an open alert with silence once it goes back to normal. if si := silenced(ak); si != nil && event.Status == models.StNormal { go func(ak models.AlertKey) { slog.Infof("auto close %s because was silenced", ak) err := s.Action("bosun", "Auto close because was silenced.", models.ActionClose, ak) if err != nil { slog.Errorln(err) } }(ak) } s.Unlock() return checkNotify, nil }