Esempio n. 1
0
// Handle a new connection, it will:
//
//	1. Read input from the connection line by line.
//	2. Parse the lines into metrics.
//	3. Validate the metrics.
//
func (d *Detector) handle(conn net.Conn) {
	// New conn established.
	addr := conn.RemoteAddr()
	health.IncrNumClients(1)
	log.Info("conn %s established", addr)
	// Read
	scanner := bufio.NewScanner(conn)
	for scanner.Scan() {
		// Read line by line.
		if err := scanner.Err(); err != nil {
			// Close conn on read error.
			log.Error("read error: %v, closing conn..", err)
			break
		}
		line := scanner.Text()
		// Parse metric.
		m, err := parseMetric(line)
		if err != nil {
			// Skip invalid input.
			log.Error("parse error: %v, skipping..", err)
			continue
		}
		// Validate metric.
		if err := validateMetric(m); err != nil {
			log.Error("invalid metric: %v, skipping..", err)
			continue
		}
		// Process
		d.process(m)
	}
	// Close conn.
	conn.Close()
	log.Info("conn %s disconnected", addr)
	health.DecrNumClients(1)
}
Esempio n. 2
0
// work waits for detected metrics, then check each metric with all the
// rules, the configured shell command will be executed once a rule is hit.
func (al *Alerter) work() {
	for {
		metric := <-al.In
		// Check interval.
		v, ok := al.m.Get(metric.Name)
		if ok && metric.Stamp-v.(uint32) < al.cfg.Alerter.Interval {
			return
		}
		// Test with rules.
		rules := al.filter.MatchedRules(metric)
		for _, rule := range rules {
			// Test
			if !rule.Test(metric) {
				continue
			}
			// Project
			var proj *models.Project
			if err := al.db.Admin.DB().Model(rule).Related(proj); err != nil {
				log.Error("project not found, %v, skiping..", err)
				continue
			}
			// Users
			var users []models.User
			if err := al.db.Admin.DB().Model(proj).Related(&users, "Users"); err != nil {
				log.Error("get users: %v, skiping..", err)
				continue
			}
			// Universals
			var univs []models.User
			if err := al.db.Admin.DB().Where("universal = ?", true).Find(&univs); err != nil {
				log.Error("get universal users: %v, skiping..", err)
				continue
			}
			users = append(users, univs...)
			// Send
			for _, user := range users {
				d := &msg{
					Project: proj,
					Metric:  metric,
					User:    &user,
				}
				// Exec
				if len(al.cfg.Alerter.Command) == 0 {
					log.Warn("alert command not configured")
					continue
				}
				b, _ := json.Marshal(d)
				cmd := exec.Command(al.cfg.Alerter.Command, string(b))
				if err := cmd.Run(); err != nil {
					log.Error("exec %s: %v", al.cfg.Alerter.Command, err)
				}
			}
			if len(users) != 0 {
				al.m.Set(metric.Name, metric.Stamp)
			}
		}
	}
}
Esempio n. 3
0
// Match a metric with rules, and return matched rules.
//
//	If no rules matched, return false.
//	If any black patterns matched, return false.
//	Else, return true and matched rules.
//
func (d *Detector) match(m *models.Metric) (bool, []*models.Rule) {
	// Check rules.
	timer := util.NewTimer()
	rules := d.flt.MatchedRules(m)
	elapsed := timer.Elapsed()
	health.AddFilterCost(elapsed)
	if len(rules) == 0 {
		// Hit no rules.
		return false, rules
	}
	// Check blacklist.
	for _, p := range d.cfg.Detector.BlackList {
		ok, err := filepath.Match(p, m.Name)
		if err != nil {
			// Invalid black pattern.
			log.Error("invalid black pattern: %s, %v", p, err)
			continue
		}
		if ok {
			// Hit black pattern.
			log.Debug("%s hit black pattern %s", m.Name, p)
			return false, rules
		}
	}
	// Ok
	return true, rules
}
Esempio n. 4
0
// Process the input metric.
//
//	1. Match metric with rules.
//	2. Detect the metric with matched rules.
//
func (d *Detector) process(m *models.Metric) {
	health.IncrNumMetricIncomed(1)
	timer := util.NewTimer()
	// Match
	ok, rules := d.match(m)
	if !ok {
		// Not matched.
		return
	}
	// Detect
	err := d.detect(m, rules)
	if err != nil {
		log.Error("detect: %v, skipping..", err)
		return
	}
	health.IncrNumMetricDetected(1)
	// Output
	if len(m.TestedRules) > 0 {
		// Test ok.
		d.output(m)
	}
	// Time end.
	elapsed := timer.Elapsed()
	if elapsed > timeout {
		log.Warn("detection is slow: %.2fms", elapsed)
	}
	health.AddDetectionCost(elapsed)
}
Esempio n. 5
0
// Process the input metric.
//
//	1. Match metric with rules.
//	2. Detect the metric with matched rules.
//
func (d *Detector) process(m *models.Metric) {
	health.IncrNumMetricIncomed(1)
	// Time it.
	startAt := time.Now()
	// Match
	ok, rules := d.match(m)
	if !ok {
		// Not matched.
		return
	}
	// Detect
	err := d.detect(m, rules)
	if err != nil {
		log.Error("detect: %v, skipping..", err)
		return
	}
	health.IncrNumMetricDetected(1)
	// Output
	if len(m.TestedRules) > 0 {
		// Test ok.
		d.output(m)
	}
	// Time end.
	elapsed := float64(time.Since(startAt).Nanoseconds()) / float64(1000*1000)
	if elapsed > timeout {
		log.Warn("detection is slow: %.2fms", elapsed)
	}
	health.AddDetectionCost(elapsed)
}
Esempio n. 6
0
// Handle a connection, it will filter the mertics by rules and detect whether
// the metrics are anomalies.
func (d *Detector) handle(conn net.Conn) {
	// New conn
	addr := conn.RemoteAddr()
	defer func() {
		conn.Close()
		log.Info("conn %s disconnected", addr)
	}()
	log.Info("conn %s established", addr)
	// Scan line by line.
	scanner := bufio.NewScanner(conn)
	for scanner.Scan() {
		if err := scanner.Err(); err != nil {
			log.Info("read conn: %v, closing it..", err)
			break
		}
		startAt := time.Now()
		// Parse
		line := scanner.Text()
		m, err := parseMetric(line)
		if err != nil {
			if len(line) > 10 {
				line = line[:10]
			}
			log.Error("parse '%s': %v, skipping..", line, err)
			continue
		}
		// Filter
		if d.match(m) {
			// Detect
			err = d.detect(m)
			if err != nil {
				log.Error("failed to detect: %v, skipping..", err)
				continue
			}
			elapsed := time.Since(startAt)
			log.Debug("%dμs %s %.3f", elapsed.Nanoseconds()/1000, m.Name, m.Score)
			// Output
			d.output(m)
			// Store
			if err := d.store(m); err != nil {
				log.Error("store metric %s: %v, skiping..", m.Name, err)
			}
		}
	}
}
Esempio n. 7
0
// pushDeled pushes changed rule to listeners.
func (c *rulesCache) pushDeled(rule *models.Rule) {
	for _, ch := range c.lnsDel {
		select {
		case ch <- rule:
		default:
			log.Error("buffered deleted rules chan is full, skipping..")
		}
	}
}
Esempio n. 8
0
// output detected metrics to outs.
func (d *Detector) output(m *models.Metric) {
	for _, ch := range d.outs {
		select {
		case ch <- m:
		default:
			log.Error("output channel is full, skipping..")
		}
	}
}
Esempio n. 9
0
// Output detected metrics to channels in outs, will skip if the target channel
// is full.
func (d *Detector) output(ev *models.Event) {
	for _, ch := range d.outs {
		select {
		case ch <- ev:
		default:
			log.Error("output channel is full, skipping..")
			continue
		}
	}
}
Esempio n. 10
0
// Test whether a metric need to fill blank with zeros to its history
// values.
func (d *Detector) shouldFz(m *models.Metric) bool {
	for _, p := range d.cfg.Detector.FillBlankZeros {
		ok, err := filepath.Match(p, m.Name)
		if err != nil {
			// Invalid pattern.
			log.Error("invalid fillBlankZeros pattern: %s, %v", p, err)
			continue
		}
		if ok {
			// Ok.
			return true
		}
	}
	// No need.
	return false
}
Esempio n. 11
0
// Start the tcp server.
func (d *Detector) Start() {
	// Listen
	addr := fmt.Sprintf("0.0.0.0:%d", d.cfg.Detector.Port)
	ln, err := net.Listen("tcp", addr)
	if err != nil {
		log.Fatal("listen: %v", err)
	}
	log.Info("detector is listening on %s..", addr)
	// Accept
	for {
		conn, err := ln.Accept()
		if err != nil {
			log.Error("cannot accept conn: %v, skipping..", err)
			continue
		}
		go d.handle(conn)
	}
}
Esempio n. 12
0
// work waits for detected metrics, then check each metric with all the
// rules, the configured shell command will be executed once a rule is hit.
func (al *Alerter) work() {
	for {
		ev := <-al.In
		// Check interval.
		v, ok := al.m.Get(ev.Metric.Name)
		if ok && ev.Metric.Stamp-v.(uint32) < al.cfg.Alerter.Interval {
			continue
		}
		// Check alert times in one day
		v, ok = al.c.Get(ev.Metric.Name)
		if ok && atomic.LoadUint32(v.(*uint32)) > al.cfg.Alerter.OneDayLimit {
			log.Warn("%s hit alerting one day limit, skipping..", ev.Metric.Name)
			continue
		}
		if !ok {
			var newCounter uint32
			newCounter = 1
			al.c.Set(ev.Metric.Name, &newCounter)
		} else {
			atomic.AddUint32(v.(*uint32), 1)
		}
		// Universals
		var univs []models.User
		if err := al.db.Admin.DB().Where("universal = ?", true).Find(&univs).Error; err != nil {
			log.Error("get universal users: %v, skiping..", err)
			continue
		}
		for _, rule := range ev.Metric.TestedRules {
			ev.Rule = rule
			ev.TranslateRuleComment()
			// Project
			proj := &models.Project{}
			if err := al.db.Admin.DB().Model(rule).Related(proj).Error; err != nil {
				log.Error("project, %v, skiping..", err)
				continue
			}
			ev.Project = proj
			// Silent
			if al.shouldSilent(proj) {
				continue
			}
			// Users
			var users []models.User
			if err := al.db.Admin.DB().Model(proj).Related(&users, "Users").Error; err != nil {
				log.Error("get users: %v, skiping..", err)
				continue
			}
			users = append(users, univs...)
			// Send
			for _, user := range users {
				ev.User = &user
				if rule.Level < user.RuleLevel {
					continue
				}
				// Exec
				if len(al.cfg.Alerter.Command) == 0 {
					log.Warn("alert command not configured")
					continue
				}
				if err := al.execCommand(ev); err != nil {
					log.Error("exec %s: %v", al.cfg.Alerter.Command, err)
					continue
				}
				log.Info("send message to %s with %s ok", user.Name, ev.Metric.Name)
			}
			if len(users) != 0 {
				al.m.Set(ev.Metric.Name, ev.Metric.Stamp)
				health.IncrNumAlertingEvents(1)
			}
		}
	}
}
Esempio n. 13
0
// work waits for detected metrics, then check each metric with all the
// rules, the configured shell command will be executed once a rule is hit.
func (al *Alerter) work() {
	for {
		metric := <-al.In
		// Check interval.
		v, ok := al.m.Get(metric.Name)
		if ok && metric.Stamp-v.(uint32) < al.cfg.Alerter.Interval {
			continue
		}
		// Check alert times in one day
		v, ok = al.c.Get(metric.Name)
		if ok && atomic.LoadUint32(v.(*uint32)) > al.cfg.Alerter.OneDayLimit {
			log.Warn("%s hit alerting one day limit, skipping..", metric.Name)
			continue
		}
		if !ok {
			var newCounter uint32
			newCounter = 1
			al.c.Set(metric.Name, &newCounter)
		} else {
			atomic.AddUint32(v.(*uint32), 1)
		}
		// Universals
		var univs []models.User
		if err := al.db.Admin.DB().Where("universal = ?", true).Find(&univs).Error; err != nil {
			log.Error("get universal users: %v, skiping..", err)
			continue
		}
		for _, rule := range metric.TestedRules {
			// Project
			proj := &models.Project{}
			if err := al.db.Admin.DB().Model(rule).Related(proj).Error; err != nil {
				log.Error("project, %v, skiping..", err)
				continue
			}
			// Users
			var users []models.User
			if err := al.db.Admin.DB().Model(proj).Related(&users, "Users").Error; err != nil {
				log.Error("get users: %v, skiping..", err)
				continue
			}
			users = append(users, univs...)
			// Send
			for _, user := range users {
				d := &msg{
					Project: proj,
					Metric:  metric,
					User:    &user,
					Rule:    rule,
				}
				// Exec
				if len(al.cfg.Alerter.Command) == 0 {
					log.Warn("alert command not configured")
					continue
				}
				b, _ := json.Marshal(d)
				cmd := exec.Command(al.cfg.Alerter.Command, string(b))
				if err := cmd.Run(); err != nil {
					log.Error("exec %s: %v", al.cfg.Alerter.Command, err)
					continue
				}
				log.Info("send message to %s with %s ok", user.Name, metric.Name)
			}
			if len(users) != 0 {
				al.m.Set(metric.Name, metric.Stamp)
				health.IncrNumAlertingEvents(1)
			}
		}
	}
}