示例#1
0
文件: sched.go 项目: nicollet/bosun
func (s *Schedule) action(user, message string, t models.ActionType, st *models.IncidentState) (ak models.AlertKey, e error) {
	if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": st.AlertKey.Name(), "type": t.String()}, 1); err != nil {
		slog.Errorln(err)
	}
	defer func() {
		if e == nil {
			if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": st.AlertKey.Name(), "type": t.String()}, 1); err != nil {
				slog.Errorln(err)
			}
			if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil {
				e = err
			}
		}
	}()
	isUnknown := st.LastAbnormalStatus == models.StUnknown
	timestamp := utcNow()
	switch t {
	case models.ActionAcknowledge:
		if !st.NeedAck {
			return "", fmt.Errorf("alert already acknowledged")
		}
		if !st.Open {
			return "", fmt.Errorf("cannot acknowledge closed alert")
		}
		st.NeedAck = false
	case models.ActionClose:
		if st.IsActive() {
			return "", fmt.Errorf("cannot close active alert")
		}
		fallthrough
	case models.ActionForceClose:
		st.Open = false
		st.End = &timestamp
	case models.ActionForget:
		if !isUnknown {
			return "", fmt.Errorf("can only forget unknowns")
		}
		fallthrough
	case models.ActionPurge:
		return st.AlertKey, s.DataAccess.State().Forget(st.AlertKey)
	case models.ActionNote:
		// pass
	default:
		return "", fmt.Errorf("unknown action type: %v", t)
	}
	st.Actions = append(st.Actions, models.Action{
		Message: message,
		Time:    timestamp,
		Type:    t,
		User:    user,
	})
	_, err := s.DataAccess.State().UpdateIncidentState(st)
	return st.AlertKey, err
}
示例#2
0
文件: check.go 项目: Skyscanner/bosun
// RunHistory for a single alert key. Returns true if notifications were altered.
func (s *Schedule) runHistory(r *RunHistory, ak models.AlertKey, event *models.Event, silenced SilenceTester) (checkNotify bool, err error) {
	event.Time = r.Start
	a := s.Conf.Alerts[ak.Name()]
	if a.UnknownsNormal && event.Status == models.StUnknown {
		event.Status = models.StNormal
	}

	data := s.DataAccess.State()
	err = data.TouchAlertKey(ak, utcNow())
	if err != nil {
		return
	}

	si := silenced(ak)

	// get existing open incident if exists
	var incident *models.IncidentState
	incident, err = data.GetOpenIncident(ak)
	if err != nil {
		return
	}
	defer func() {
		// save unless incident is new and closed (log alert)
		if incident != nil && (incident.Id != 0 || incident.Open) {
			_, err = data.UpdateIncidentState(incident)
		} else {
			err = data.SetUnevaluated(ak, event.Unevaluated) // if nothing to save, at least store the unevaluated state
		}
	}()
	// If nothing is out of the ordinary we are done
	if event.Status <= models.StNormal && incident == nil {
		return
	}

	// if event is unevaluated, we are done also.
	if incident != nil {
		incident.Unevaluated = event.Unevaluated
	}
	if event.Unevaluated {
		return
	}

	shouldNotify := false
	newIncident := false
	if incident == nil {
		incident = NewIncident(ak)
		newIncident = true
		shouldNotify = true
	}

	// VICTOROPS INTEGRATION: Enables notification of incidents which have returned to normal (Sends normNotification defined in config)
	if event.Status <= models.StNormal && (incident.CurrentStatus == models.StWarning || incident.CurrentStatus == models.StCritical) {
		slog.Infof("TRIGGER_RESOLVED: from %s to %s", incident.CurrentStatus, event.Status)
		shouldNotify = true
	}

	// VICTOROPS INTEGRATION:  Enables notification of Incidents which have returned to normal but are now back to warning or critical. i.e. enable Flapping
	if incident.CurrentStatus == models.StNormal && (event.Status == models.StCritical || event.Status == models.StWarning) {
		slog.Infof("TRIGGER_REALERT: from %s to %s", incident.CurrentStatus, event.Status)
		shouldNotify = true
	}

	// set state.Result according to event result
	if event.Status == models.StCritical {
		incident.Result = event.Crit
	} else if event.Status == models.StWarning {
		incident.Result = event.Warn
	}

	if event.Status > models.StNormal {
		incident.LastAbnormalStatus = event.Status
		incident.LastAbnormalTime = event.Time.UTC().Unix()
	}
	if event.Status > incident.WorstStatus {
		incident.WorstStatus = event.Status
		shouldNotify = true
	}
	if event.Status != incident.CurrentStatus {
		incident.Events = append(incident.Events, *event)
	}
	incident.CurrentStatus = event.Status

	//run a preliminary save on new incidents to get an id
	if newIncident {
		if a.Log || silencedOrIgnored(a, event, si) {
			//a log or silenced/ignored alert will not need to be saved
		} else {
			incident.Id, err = s.DataAccess.State().UpdateIncidentState(incident)
			if err != nil {
				return
			}
		}
	}

	//render templates and open alert key if abnormal
	if event.Status > models.StNormal {
		s.executeTemplates(incident, event, a, r)
		incident.Open = true
		if a.Log {
			incident.Open = false
		}
	}

	// On state increase, clear old notifications and notify current.
	// Do nothing if state did not change.
	notify := func(ns *conf.Notifications) {
		if a.Log {
			lastLogTime := s.lastLogTimes[ak]
			now := utcNow()
			if now.Before(lastLogTime.Add(a.MaxLogFrequency)) {
				return
			}
			s.lastLogTimes[ak] = now
		}
		nots := ns.Get(s.Conf, incident.AlertKey.Group())
		for _, n := range nots {
			s.Notify(incident, n)
			checkNotify = true
		}
	}

	notifyCurrent := func() {
		//Auto close ignoreUnknowns for new incident.
		if silencedOrIgnored(a, event, si) {
			incident.Open = false
			return
		}
		// VICTOROPS INTEGRATION
		incident.NeedAck = false
		switch event.Status {
		case models.StCritical, models.StUnknown:
			notify(a.CritNotification)
		case models.StWarning:
			notify(a.WarnNotification)
		case models.StNormal:
			// VICTOROPS INTEGRATION
			incident.NeedAck = false
			notify(a.NormNotification)
		}
	}

	// lock while we change notifications.
	s.Lock("RunHistory")
	if shouldNotify {
		incident.NeedAck = false
		if err = s.DataAccess.Notifications().ClearNotifications(ak); err != nil {
			return
		}
		notifyCurrent()
	}

	// finally close an open alert with silence once it goes back to normal.
	if si := silenced(ak); si != nil && event.Status == models.StNormal {
		go func(ak models.AlertKey) {
			slog.Infof("auto close %s because was silenced", ak)
			err := s.Action("bosun", "Auto close because was silenced.", models.ActionClose, ak)
			if err != nil {
				slog.Errorln(err)
			}
		}(ak)
	}
	s.Unlock()
	return checkNotify, nil
}