Exemple #1
0
// work waits for events to alert.
func (al *Alerter) work() {
	for {
		ev := <-al.In
		ew := models.NewWrapperOfEvent(ev) // Avoid locks
		if al.checkAlertAt(ew.Metric) {    // Check alert interval
			continue
		}
		if al.checkOneDayAlerts(ew.Metric) { // Check one day limit
			continue
		}
		// Avoid noises by issuing alerts only when same alert has occurred
		// predefined times.
		if al.checkAlertCount(ew.Metric) {
			al.setAlertRecord(ew.Metric)
			log.Warnf("Not enough alerts with in `AlertCheckInterval` time skipping..: %v", ew.Metric.Name)
			continue
		}
		al.setAlertRecord(ew.Metric)
		al.incrAlertNum(ew.Metric)
		// Store event
		if err := al.storeEvent(ev); err != nil {
			log.Warnf("failed to store event:%v, skipping..", err)
			continue
		}
		// Do alert.
		var err error
		if ew.Project, err = al.getProjByRule(ew.Rule); err != nil {
			continue
		}
		if al.shoudProjBeSilent(ew.Project) {
			continue
		}
		var users []models.User
		if users, err = al.getUsersByProj(ew.Project); err != nil {
			continue
		}
		for _, user := range users {
			ew.User = &user
			if ew.Rule.Level < user.RuleLevel {
				continue
			}
			if len(al.cfg.Alerter.Command) == 0 {
				log.Warnf("alert command not configured")
				continue
			}
			if err = al.execCommand(ew); err != nil { // Execute command
				log.Errorf("exec %s: %v", al.cfg.Alerter.Command, err)
				continue
			}
			log.Infof("send to %s with %s ok", user.Name, ew.Metric.Name)
		}
		if len(users) != 0 {
			al.setAlertAt(ew.Metric)
			health.IncrNumAlertingEvents(1)
		}
	}
}
Exemple #2
0
// work waits for detected metrics, then check each metric with all the
// rules, the configured shell command will be executed once a rule is hit.
func (al *Alerter) work() {
	for {
		ev := <-al.In
		// Check interval.
		v, ok := al.m.Get(ev.Metric.Name)
		if ok && ev.Metric.Stamp-v.(uint32) < al.cfg.Alerter.Interval {
			continue
		}
		// Check alert times in one day
		v, ok = al.c.Get(ev.Metric.Name)
		if ok && atomic.LoadUint32(v.(*uint32)) > al.cfg.Alerter.OneDayLimit {
			log.Warn("%s hit alerting one day limit, skipping..", ev.Metric.Name)
			continue
		}
		if !ok {
			var newCounter uint32
			newCounter = 1
			al.c.Set(ev.Metric.Name, &newCounter)
		} else {
			atomic.AddUint32(v.(*uint32), 1)
		}
		// Universals
		var univs []models.User
		if err := al.db.Admin.DB().Where("universal = ?", true).Find(&univs).Error; err != nil {
			log.Error("get universal users: %v, skiping..", err)
			continue
		}
		for _, rule := range ev.Metric.TestedRules {
			ev.Rule = rule
			ev.TranslateRuleComment()
			// Project
			proj := &models.Project{}
			if err := al.db.Admin.DB().Model(rule).Related(proj).Error; err != nil {
				log.Error("project, %v, skiping..", err)
				continue
			}
			ev.Project = proj
			// Silent
			if al.shouldSilent(proj) {
				continue
			}
			// Users
			var users []models.User
			if err := al.db.Admin.DB().Model(proj).Related(&users, "Users").Error; err != nil {
				log.Error("get users: %v, skiping..", err)
				continue
			}
			users = append(users, univs...)
			// Send
			for _, user := range users {
				ev.User = &user
				if rule.Level < user.RuleLevel {
					continue
				}
				// Exec
				if len(al.cfg.Alerter.Command) == 0 {
					log.Warn("alert command not configured")
					continue
				}
				if err := al.execCommand(ev); err != nil {
					log.Error("exec %s: %v", al.cfg.Alerter.Command, err)
					continue
				}
				log.Info("send message to %s with %s ok", user.Name, ev.Metric.Name)
			}
			if len(users) != 0 {
				al.m.Set(ev.Metric.Name, ev.Metric.Stamp)
				health.IncrNumAlertingEvents(1)
			}
		}
	}
}
Exemple #3
0
// work waits for detected metrics, then check each metric with all the
// rules, the configured shell command will be executed once a rule is hit.
func (al *Alerter) work() {
	for {
		metric := <-al.In
		// Check interval.
		v, ok := al.m.Get(metric.Name)
		if ok && metric.Stamp-v.(uint32) < al.cfg.Alerter.Interval {
			continue
		}
		// Check alert times in one day
		v, ok = al.c.Get(metric.Name)
		if ok && atomic.LoadUint32(v.(*uint32)) > al.cfg.Alerter.OneDayLimit {
			log.Warn("%s hit alerting one day limit, skipping..", metric.Name)
			continue
		}
		if !ok {
			var newCounter uint32
			newCounter = 1
			al.c.Set(metric.Name, &newCounter)
		} else {
			atomic.AddUint32(v.(*uint32), 1)
		}
		// Universals
		var univs []models.User
		if err := al.db.Admin.DB().Where("universal = ?", true).Find(&univs).Error; err != nil {
			log.Error("get universal users: %v, skiping..", err)
			continue
		}
		for _, rule := range metric.TestedRules {
			// Project
			proj := &models.Project{}
			if err := al.db.Admin.DB().Model(rule).Related(proj).Error; err != nil {
				log.Error("project, %v, skiping..", err)
				continue
			}
			// Users
			var users []models.User
			if err := al.db.Admin.DB().Model(proj).Related(&users, "Users").Error; err != nil {
				log.Error("get users: %v, skiping..", err)
				continue
			}
			users = append(users, univs...)
			// Send
			for _, user := range users {
				d := &msg{
					Project: proj,
					Metric:  metric,
					User:    &user,
					Rule:    rule,
				}
				// Exec
				if len(al.cfg.Alerter.Command) == 0 {
					log.Warn("alert command not configured")
					continue
				}
				b, _ := json.Marshal(d)
				cmd := exec.Command(al.cfg.Alerter.Command, string(b))
				if err := cmd.Run(); err != nil {
					log.Error("exec %s: %v", al.cfg.Alerter.Command, err)
					continue
				}
				log.Info("send message to %s with %s ok", user.Name, metric.Name)
			}
			if len(users) != 0 {
				al.m.Set(metric.Name, metric.Stamp)
				health.IncrNumAlertingEvents(1)
			}
		}
	}
}