Exemplo n.º 1
0
// RunHistory for a single alert key. Returns true if notifications were altered.
func (s *Schedule) runHistory(r *RunHistory, ak models.AlertKey, event *models.Event, silenced SilenceTester) (checkNotify bool, err error) {
	event.Time = r.Start
	a := s.Conf.Alerts[ak.Name()]
	if a.UnknownsNormal && event.Status == models.StUnknown {
		event.Status = models.StNormal
	}

	data := s.DataAccess.State()
	err = data.TouchAlertKey(ak, utcNow())
	if err != nil {
		return
	}

	si := silenced(ak)

	// get existing open incident if exists
	var incident *models.IncidentState
	incident, err = data.GetOpenIncident(ak)
	if err != nil {
		return
	}
	defer func() {
		// save unless incident is new and closed (log alert)
		if incident != nil && (incident.Id != 0 || incident.Open) {
			_, err = data.UpdateIncidentState(incident)
		} else {
			err = data.SetUnevaluated(ak, event.Unevaluated) // if nothing to save, at least store the unevaluated state
		}
	}()
	// If nothing is out of the ordinary we are done
	if event.Status <= models.StNormal && incident == nil {
		return
	}

	// if event is unevaluated, we are done also.
	if incident != nil {
		incident.Unevaluated = event.Unevaluated
	}
	if event.Unevaluated {
		return
	}

	shouldNotify := false
	newIncident := false
	if incident == nil {
		incident = NewIncident(ak)
		newIncident = true
		shouldNotify = true
	}

	// VICTOROPS INTEGRATION: Enables notification of incidents which have returned to normal (Sends normNotification defined in config)
	if event.Status <= models.StNormal && (incident.CurrentStatus == models.StWarning || incident.CurrentStatus == models.StCritical) {
		slog.Infof("TRIGGER_RESOLVED: from %s to %s", incident.CurrentStatus, event.Status)
		shouldNotify = true
	}

	// VICTOROPS INTEGRATION:  Enables notification of Incidents which have returned to normal but are now back to warning or critical. i.e. enable Flapping
	if incident.CurrentStatus == models.StNormal && (event.Status == models.StCritical || event.Status == models.StWarning) {
		slog.Infof("TRIGGER_REALERT: from %s to %s", incident.CurrentStatus, event.Status)
		shouldNotify = true
	}

	// set state.Result according to event result
	if event.Status == models.StCritical {
		incident.Result = event.Crit
	} else if event.Status == models.StWarning {
		incident.Result = event.Warn
	}

	if event.Status > models.StNormal {
		incident.LastAbnormalStatus = event.Status
		incident.LastAbnormalTime = event.Time.UTC().Unix()
	}
	if event.Status > incident.WorstStatus {
		incident.WorstStatus = event.Status
		shouldNotify = true
	}
	if event.Status != incident.CurrentStatus {
		incident.Events = append(incident.Events, *event)
	}
	incident.CurrentStatus = event.Status

	//run a preliminary save on new incidents to get an id
	if newIncident {
		if a.Log || silencedOrIgnored(a, event, si) {
			//a log or silenced/ignored alert will not need to be saved
		} else {
			incident.Id, err = s.DataAccess.State().UpdateIncidentState(incident)
			if err != nil {
				return
			}
		}
	}

	//render templates and open alert key if abnormal
	if event.Status > models.StNormal {
		s.executeTemplates(incident, event, a, r)
		incident.Open = true
		if a.Log {
			incident.Open = false
		}
	}

	// On state increase, clear old notifications and notify current.
	// Do nothing if state did not change.
	notify := func(ns *conf.Notifications) {
		if a.Log {
			lastLogTime := s.lastLogTimes[ak]
			now := utcNow()
			if now.Before(lastLogTime.Add(a.MaxLogFrequency)) {
				return
			}
			s.lastLogTimes[ak] = now
		}
		nots := ns.Get(s.Conf, incident.AlertKey.Group())
		for _, n := range nots {
			s.Notify(incident, n)
			checkNotify = true
		}
	}

	notifyCurrent := func() {
		//Auto close ignoreUnknowns for new incident.
		if silencedOrIgnored(a, event, si) {
			incident.Open = false
			return
		}
		// VICTOROPS INTEGRATION
		incident.NeedAck = false
		switch event.Status {
		case models.StCritical, models.StUnknown:
			notify(a.CritNotification)
		case models.StWarning:
			notify(a.WarnNotification)
		case models.StNormal:
			// VICTOROPS INTEGRATION
			incident.NeedAck = false
			notify(a.NormNotification)
		}
	}

	// lock while we change notifications.
	s.Lock("RunHistory")
	if shouldNotify {
		incident.NeedAck = false
		if err = s.DataAccess.Notifications().ClearNotifications(ak); err != nil {
			return
		}
		notifyCurrent()
	}

	// finally close an open alert with silence once it goes back to normal.
	if si := silenced(ak); si != nil && event.Status == models.StNormal {
		go func(ak models.AlertKey) {
			slog.Infof("auto close %s because was silenced", ak)
			err := s.Action("bosun", "Auto close because was silenced.", models.ActionClose, ak)
			if err != nil {
				slog.Errorln(err)
			}
		}(ak)
	}
	s.Unlock()
	return checkNotify, nil
}
Exemplo n.º 2
0
// RunHistory for a single alert key. Returns true if notifications were altered.
func (s *Schedule) runHistory(r *RunHistory, ak models.AlertKey, event *models.Event, silenced SilenceTester) (checkNotify bool, err error) {
	event.Time = r.Start
	data := s.DataAccess.State()
	err = data.TouchAlertKey(ak, time.Now())
	if err != nil {
		return
	}
	// get existing open incident if exists
	incident, err := data.GetOpenIncident(ak)
	if err != nil {
		return
	}
	defer func() {
		// save unless incident is new and closed (log alert)
		if incident != nil && (incident.Id != 0 || incident.Open) {
			err = data.UpdateIncidentState(incident)
		} else {
			err = data.SetUnevaluated(ak, event.Unevaluated) // if nothing to save, at least store the unevaluated state
		}
	}()
	// If nothing is out of the ordinary we are done
	if event.Status <= models.StNormal && incident == nil {
		return
	}

	// if event is unevaluated, we are done also.
	if incident != nil {
		incident.Unevaluated = event.Unevaluated
	}
	if event.Unevaluated {
		return
	}

	shouldNotify := false
	if incident == nil {
		incident = NewIncident(ak)
		shouldNotify = true
	}
	// set state.Result according to event result
	if event.Status == models.StCritical {
		incident.Result = event.Crit
	} else if event.Status == models.StWarning {
		incident.Result = event.Warn
	}

	if event.Status > models.StNormal {
		incident.LastAbnormalStatus = event.Status
		incident.LastAbnormalTime = event.Time.UTC().Unix()
	}
	if event.Status > incident.WorstStatus {
		incident.WorstStatus = event.Status
		shouldNotify = true
	}
	if event.Status != incident.CurrentStatus {
		incident.Events = append(incident.Events, *event)
	}
	incident.CurrentStatus = event.Status

	a := s.Conf.Alerts[ak.Name()]
	//render templates and open alert key if abnormal
	if event.Status > models.StNormal {
		s.executeTemplates(incident, event, a, r)
		incident.Open = true
		if a.Log {
			incident.Open = false
		}
	}

	// On state increase, clear old notifications and notify current.
	// Do nothing if state did not change.
	notify := func(ns *conf.Notifications) {
		if a.Log {
			lastLogTime := s.lastLogTimes[ak]
			now := time.Now()
			if now.Before(lastLogTime.Add(a.MaxLogFrequency)) {
				return
			}
			s.lastLogTimes[ak] = now
		}
		nots := ns.Get(s.Conf, incident.AlertKey.Group())
		for _, n := range nots {
			s.Notify(incident, n)
			checkNotify = true
		}
	}

	notifyCurrent := func() {
		si := silenced(ak)
		//Auto close ignoreUnknowns for new incident.
		if a.IgnoreUnknown && event.Status == models.StUnknown {
			incident.Open = false
			return
		} else if si != nil && si.Forget && event.Status == models.StUnknown {
			incident.Open = false
			return
		}
		incident.NeedAck = true
		switch event.Status {
		case models.StCritical, models.StUnknown:
			notify(a.CritNotification)
		case models.StWarning:
			notify(a.WarnNotification)
		}
	}
	clearOld := func() {
		incident.NeedAck = false
		delete(s.Notifications, ak)
	}

	// lock while we change notifications.
	s.Lock("RunHistory")
	if shouldNotify {
		clearOld()
		notifyCurrent()
	}

	// finally close an open alert with silence once it goes back to normal.
	if si := silenced(ak); si != nil && event.Status == models.StNormal {
		go func(ak models.AlertKey) {
			slog.Infof("auto close %s because was silenced", ak)
			err := s.Action("bosun", "Auto close because was silenced.", models.ActionClose, ak)
			if err != nil {
				slog.Errorln(err)
			}
		}(ak)
	}
	s.Unlock()
	return checkNotify, nil
}