// handle a new connection: // Steps: // 1. Read input from connection line by line. // 2. Parse each line into a metric. // 3. Validate the metric // 4. Process the metric. func (d *Detector) handle(conn net.Conn) { addr := conn.RemoteAddr() health.IncrNumClients(1) defer health.DecrNumClients(1) log.Infof("conn %s established", addr) scanner := bufio.NewScanner(conn) for scanner.Scan() { // Read line by line. if err := scanner.Err(); err != nil { // Close on read error. log.Errorf("read error: %v, closing conn..", err) break } line := scanner.Text() m, err := parseMetric(line) // Parse if err != nil { log.Errorf("parse error: %v, skipping..", err) continue } if err = m.Validate(); err != nil { log.Errorf("invalid metric: %v, skipping..", err) return } d.process(m, true) } conn.Close() log.Infof("conn %s disconnected", addr) }
// match a metric with rules, and return matched rules. // Details: // 1. If no rules matched, return false. // 2. If any black patterns matched, return false. // 3. Else, return true and matched rules. func (d *Detector) match(m *models.Metric) (bool, []*models.Rule) { // Check rules. timer := util.NewTimer() // Filter timer rules := d.flt.MatchedRules(m) elapsed := timer.Elapsed() health.AddFilterCost(elapsed) if len(rules) == 0 { // Hit no rules. return false, rules } // Check blacklist. for _, p := range d.cfg.Detector.BlackList { ok, err := filepath.Match(p, m.Name) if err != nil { // Invalid black pattern. log.Errorf("invalid black pattern: %s, %v", p, err) continue } if ok { // Hit black pattern. log.Debugf("%s hit black pattern %s", m.Name, p) return false, rules } } return true, rules // OK }
// process the input metric. // Steps: // 1. Match metric with all rules. // 2. Detect the metric with matched rules. // 3. Output detection results to receivers. func (d *Detector) process(m *models.Metric, shouldAdjustIdle bool) { health.IncrNumMetricIncomed(1) timer := util.NewTimer() // Detection cost timer // Match ok, rules := d.match(m) if !ok { return } if shouldAdjustIdle { d.adjustIdleM(m, rules) } // Detect evs, err := d.detect(m, rules) if err != nil { log.Errorf("detect: %v, skipping..", err) return } health.IncrNumMetricDetected(1) // Output for _, ev := range evs { d.output(ev) } // Time end. elapsed := timer.Elapsed() if elapsed > float64(d.cfg.Detector.WarningTimeout) { log.Warnf("detection is slow: %.2fms", elapsed) } health.AddDetectionCost(elapsed) }
// work waits for events to alert. func (al *Alerter) work() { for { ev := <-al.In ew := models.NewWrapperOfEvent(ev) // Avoid locks if al.checkAlertAt(ew.Metric) { // Check alert interval continue } if al.checkOneDayAlerts(ew.Metric) { // Check one day limit continue } // Avoid noises by issuing alerts only when same alert has occurred // predefined times. if al.checkAlertCount(ew.Metric) { al.setAlertRecord(ew.Metric) log.Warnf("Not enough alerts with in `AlertCheckInterval` time skipping..: %v", ew.Metric.Name) continue } al.setAlertRecord(ew.Metric) al.incrAlertNum(ew.Metric) // Store event if err := al.storeEvent(ev); err != nil { log.Warnf("failed to store event:%v, skipping..", err) continue } // Do alert. var err error if ew.Project, err = al.getProjByRule(ew.Rule); err != nil { continue } if al.shoudProjBeSilent(ew.Project) { continue } var users []models.User if users, err = al.getUsersByProj(ew.Project); err != nil { continue } for _, user := range users { ew.User = &user if ew.Rule.Level < user.RuleLevel { continue } if len(al.cfg.Alerter.Command) == 0 { log.Warnf("alert command not configured") continue } if err = al.execCommand(ew); err != nil { // Execute command log.Errorf("exec %s: %v", al.cfg.Alerter.Command, err) continue } log.Infof("send to %s with %s ok", user.Name, ew.Metric.Name) } if len(users) != 0 { al.setAlertAt(ew.Metric) health.IncrNumAlertingEvents(1) } } }
// pushDeled pushes changed rule to listeners. func (c *rulesCache) pushDeled(rule *models.Rule) { for _, ch := range c.lnsDel { select { case ch <- rule: default: log.Errorf("buffered deleted rules chan is full, skipping..") } } }
// output detected metrics to all chans in outs. // Skip if the target chan is full. func (d *Detector) output(ev *models.Event) { for _, ch := range d.outs { select { case ch <- ev: default: log.Errorf("output channel is full, skipping..") continue } } }
// Start the tcp server. func (d *Detector) Start() { addr := fmt.Sprintf("0.0.0.0:%d", d.cfg.Detector.Port) ln, err := net.Listen("tcp", addr) if err != nil { log.Fatalf("listen: %v", err) } log.Infof("detector is listening on %s", addr) go d.startIdleTracking() for { conn, err := ln.Accept() if err != nil { log.Errorf("cannot accept conn: %v, skipping..", err) continue } go d.handle(conn) } }
// shouldFill0 returns true if given metric needs to fill blanks with zeros to // its hidtory values. // A metric should fill0 if it matches configured fill blank zero patterns and // the matching rules have no option NeverFillZero set. func (d *Detector) shouldFill0(m *models.Metric, rules []*models.Rule) bool { for _, p := range d.cfg.Detector.FillBlankZeros { ok, err := filepath.Match(p, m.Name) if err != nil { // Invalid pattern. log.Errorf("invalid fillBlankZeros pattern: %s, %v", p, err) continue } if ok { // Matched the fill zeros patterns, then check its rules. for _, rule := range rules { if rule.NeverFillZero { return false } } return true // OK } } return false }
// shouldTrackIdle returns true if given metric should be tracked for idle. // A metric should be tracked for idle states if it matches configured check // pattern list or its matched rules have an option TrackIdle set. func (d *Detector) shoudTrackIdle(m *models.Metric, rules []*models.Rule) bool { for _, rule := range rules { if rule.TrackIdle { return true } } isHighLevel := false for _, rule := range rules { // IdleMetricCheckList only works for high level rules. if rule.Level == models.RuleLevelHigh { isHighLevel = true } } for _, p := range d.cfg.Detector.IdleMetricCheckList { ok, err := filepath.Match(p, m.Name) if err != nil { log.Errorf("invalid idleMetricCheck pattern: %s, %v", p, err) continue } if ok && isHighLevel { return true } } return false }