// Migrate projects. // // 1. Fetch all projects from belldb. // 2. Create the project into bansheedb. // 3. Create the rules for each project. // func migrateProjects() { var projs []Project // Fetch all projects from belldb. if err := bellDB.Find(&projs).Error; err != nil { log.Fatal("fetch all projects from %s: %v", *bellDBFileName, err) } for _, proj := range projs { // Create banshee project. if err := models.ValidateProjectName(proj.Name); err != nil { log.Warn("project %s: %v, skipping..", proj.Name, err) continue } p := &models.Project{Name: proj.Name} if err := bansheeDB.Create(p).Error; err != nil { sqliteErr, ok := err.(sqlite3.Error) if ok && sqliteErr.ExtendedCode == sqlite3.ErrConstraintUnique { log.Warn("project %s already in %s, skipping..", p.Name, *bansheeDBFileName) } else { log.Fatal("cannot create project %s: %v", p.Name, err) } } // Fetch its rules from belldb. var rules []Rule if err := bellDB.Model(proj).Related(&rules).Error; err != nil { log.Fatal("cannot fetch rules for %s: %v", p.Name, err) } for _, rule := range rules { // Create banshee rule. if err := models.ValidateRulePattern(rule.Pattern); err != nil { log.Warn("rule %s: %v, belongs to %s, skippig..", rule.Pattern, err, proj.Name) continue } r := &models.Rule{ Pattern: rule.Pattern, ProjectID: p.ID, TrendUp: rule.Up, TrendDown: rule.Down, // Important: max and min for bell is reversed with banshee's. ThresholdMax: rule.Min, ThresholdMin: rule.Max, } if err := bansheeDB.Create(r).Error; err != nil { sqliteErr, ok := err.(sqlite3.Error) if ok && sqliteErr.ExtendedCode == sqlite3.ErrConstraintUnique { log.Warn("rule %s already in %s, skipping..", r.Pattern, *bansheeDBFileName) } else { log.Fatal("cannot create rule %s: %v", r.Pattern, err) } } } } }
// Process the input metric. // // 1. Match metric with rules. // 2. Detect the metric with matched rules. // func (d *Detector) process(m *models.Metric) { health.IncrNumMetricIncomed(1) timer := util.NewTimer() // Match ok, rules := d.match(m) if !ok { // Not matched. return } // Detect err := d.detect(m, rules) if err != nil { log.Error("detect: %v, skipping..", err) return } health.IncrNumMetricDetected(1) // Output if len(m.TestedRules) > 0 { // Test ok. d.output(m) } // Time end. elapsed := timer.Elapsed() if elapsed > timeout { log.Warn("detection is slow: %.2fms", elapsed) } health.AddDetectionCost(elapsed) }
// Process the input metric. // // 1. Match metric with rules. // 2. Detect the metric with matched rules. // func (d *Detector) process(m *models.Metric) { health.IncrNumMetricIncomed(1) // Time it. startAt := time.Now() // Match ok, rules := d.match(m) if !ok { // Not matched. return } // Detect err := d.detect(m, rules) if err != nil { log.Error("detect: %v, skipping..", err) return } health.IncrNumMetricDetected(1) // Output if len(m.TestedRules) > 0 { // Test ok. d.output(m) } // Time end. elapsed := float64(time.Since(startAt).Nanoseconds()) / float64(1000*1000) if elapsed > timeout { log.Warn("detection is slow: %.2fms", elapsed) } health.AddDetectionCost(elapsed) }
// work waits for detected metrics, then check each metric with all the // rules, the configured shell command will be executed once a rule is hit. func (al *Alerter) work() { for { metric := <-al.In // Check interval. v, ok := al.m.Get(metric.Name) if ok && metric.Stamp-v.(uint32) < al.cfg.Alerter.Interval { return } // Test with rules. rules := al.filter.MatchedRules(metric) for _, rule := range rules { // Test if !rule.Test(metric) { continue } // Project var proj *models.Project if err := al.db.Admin.DB().Model(rule).Related(proj); err != nil { log.Error("project not found, %v, skiping..", err) continue } // Users var users []models.User if err := al.db.Admin.DB().Model(proj).Related(&users, "Users"); err != nil { log.Error("get users: %v, skiping..", err) continue } // Universals var univs []models.User if err := al.db.Admin.DB().Where("universal = ?", true).Find(&univs); err != nil { log.Error("get universal users: %v, skiping..", err) continue } users = append(users, univs...) // Send for _, user := range users { d := &msg{ Project: proj, Metric: metric, User: &user, } // Exec if len(al.cfg.Alerter.Command) == 0 { log.Warn("alert command not configured") continue } b, _ := json.Marshal(d) cmd := exec.Command(al.cfg.Alerter.Command, string(b)) if err := cmd.Run(); err != nil { log.Error("exec %s: %v", al.cfg.Alerter.Command, err) } } if len(users) != 0 { al.m.Set(metric.Name, metric.Stamp) } } } }
func main() { // Arguments fileName := flag.String("c", "config.json", "config file") debug := flag.Bool("d", false, "debug mode") vers := flag.Bool("v", false, "version") flag.Parse() // Version if *vers { fmt.Fprintln(os.Stdout, version.Version) os.Exit(1) } // Logging log.SetName("banshee") if *debug { log.SetLevel(log.DEBUG) } log.Debug("using %s, max %d cpu", runtime.Version(), runtime.GOMAXPROCS(-1)) // Config cfg := config.New() if flag.NFlag() == 0 || (flag.NFlag() == 1 && *debug == true) { log.Warn("no config file specified, using default..") } else { err := cfg.UpdateWithJSONFile(*fileName) if err != nil { log.Fatal("failed to load %s, %s", *fileName, err) } } // Storage options := &storage.Options{ NumGrid: cfg.Period[0], GridLen: cfg.Period[1], } db, err := storage.Open(cfg.Storage.Path, options) if err != nil { log.Fatal("failed to open %s: %v", cfg.Storage.Path, err) } // Cleaner cleaner := cleaner.New(db, cfg.Period[0]*cfg.Period[1]) go cleaner.Start() // Filter filter := filter.New() filter.Init(db) // Alerter alerter := alerter.New(cfg, db, filter) alerter.Start() // Webapp go webapp.Start(cfg, db) // Detector detector := detector.New(cfg, db, filter) detector.Out(alerter.In) detector.Start() }
func initConfig() { // Config parsing. if flag.NFlag() == 0 || (flag.NFlag() == 1 && *debug) { // Case ./program [-d] log.Warn("no config specified, using default..") } else { // Update config. err := cfg.UpdateWithJSONFile(*fileName) if err != nil { log.Fatal("failed to load %s, %s", *fileName, err) } } // Config validation. err := cfg.Validate() if err != nil { if err == config.ErrAlerterCommandEmpty { // Ignore alerter command empty. log.Warn("config: %s", err) } else { log.Fatal("config: %s", err) } } }
// MatchedRules checks if a metric hit, l is the unchecked words list of the metric in order func (f *Filter) matchedRs(c *childFilter, prefix string, l []string) []*models.Rule { // when len(l)==0 means all words are checked and passed, return all matched rules if len(l) == 0 { v, exist := f.hitCounters.Get(prefix) if exist { //use atomic atomic.AddInt32(v.(*int32), 1) if f.enableHitLimit && atomic.LoadInt32(v.(*int32)) > int32(f.intervalHitLimit) { log.Warn("hits over intervalHitLimit, metric: %s", prefix) return []*models.Rule{} } } else { var counter int32 = 1 f.hitCounters.Set(prefix, &counter) } c.lock.RLock() defer c.lock.RUnlock() return c.matchedRules } rules := []*models.Rule{} //when next level is nil,return empty rules slice if c.children == nil { return rules } //check if this level has a "*" node v, exist := c.children.Get("*") if exist { //when has a "*" node, the suffix tree matched the metric words by now, so goto next // level and append matched rules to slice ch := v.(*childFilter) rules = append(rules, f.matchedRs(ch, prefix+"."+l[0], l[1:])...) } //check if this level has a same word node v, exist = c.children.Get(l[0]) if exist { //when has the node, matched by now, goto next level and append matched rules to slice ch := v.(*childFilter) rules = append(rules, f.matchedRs(ch, prefix+"."+l[0], l[1:])...) } //no matched node return empty rules slice, else return all matched rules return rules }
// work waits for detected metrics, then check each metric with all the // rules, the configured shell command will be executed once a rule is hit. func (al *Alerter) work() { for { ev := <-al.In // Check interval. v, ok := al.m.Get(ev.Metric.Name) if ok && ev.Metric.Stamp-v.(uint32) < al.cfg.Alerter.Interval { continue } // Check alert times in one day v, ok = al.c.Get(ev.Metric.Name) if ok && atomic.LoadUint32(v.(*uint32)) > al.cfg.Alerter.OneDayLimit { log.Warn("%s hit alerting one day limit, skipping..", ev.Metric.Name) continue } if !ok { var newCounter uint32 newCounter = 1 al.c.Set(ev.Metric.Name, &newCounter) } else { atomic.AddUint32(v.(*uint32), 1) } // Universals var univs []models.User if err := al.db.Admin.DB().Where("universal = ?", true).Find(&univs).Error; err != nil { log.Error("get universal users: %v, skiping..", err) continue } for _, rule := range ev.Metric.TestedRules { ev.Rule = rule ev.TranslateRuleComment() // Project proj := &models.Project{} if err := al.db.Admin.DB().Model(rule).Related(proj).Error; err != nil { log.Error("project, %v, skiping..", err) continue } ev.Project = proj // Silent if al.shouldSilent(proj) { continue } // Users var users []models.User if err := al.db.Admin.DB().Model(proj).Related(&users, "Users").Error; err != nil { log.Error("get users: %v, skiping..", err) continue } users = append(users, univs...) // Send for _, user := range users { ev.User = &user if rule.Level < user.RuleLevel { continue } // Exec if len(al.cfg.Alerter.Command) == 0 { log.Warn("alert command not configured") continue } if err := al.execCommand(ev); err != nil { log.Error("exec %s: %v", al.cfg.Alerter.Command, err) continue } log.Info("send message to %s with %s ok", user.Name, ev.Metric.Name) } if len(users) != 0 { al.m.Set(ev.Metric.Name, ev.Metric.Stamp) health.IncrNumAlertingEvents(1) } } } }
// Migrate users. // // 1. Fetch all users from belldb. // 2. Create the users into bansheedb. // 3. Establish the relationships between project and user. // func migrateUsers() { var users []Receiver // Fetch all users from belldb. if err := bellDB.Find(&users).Error; err != nil { log.Fatal("fetch all users from %s: %v", *bellDBFileName, err) } for _, user := range users { // Create banshee user. err := models.ValidateUserName(user.Name) if err == nil { err = models.ValidateUserEmail(user.Email) } if err == nil { err = models.ValidateUserPhone(user.Phone) } if err != nil { log.Warn("user %s: %v, skipping..", user.Name, err) } u := &models.User{ Name: user.Name, Email: user.Email, Phone: user.Phone, EnableEmail: user.EnableEmail, EnablePhone: user.EnablePhone, Universal: user.Universal, } if err := bansheeDB.Create(u).Error; err != nil { sqliteErr, ok := err.(sqlite3.Error) if ok && sqliteErr.ExtendedCode == sqlite3.ErrConstraintUnique { log.Warn("user %s already in %s, skipping..", u.Name, *bansheeDBFileName) } else { log.Fatal("cannot create user %s: %v", u.Name, err) } } // Establish relationship to project. if user.Universal { continue } // Get all relationships for this user. var relations []ReceiverProject if err := bellDB.Where("ReceiverId = ?", user.ID).Find(&relations).Error; err != nil { log.Fatal("cannot fetch user-project relations for user %s: %v", user.Name, err) } for _, relation := range relations { var proj Project if err := bellDB.First(&proj, relation.ProjectID).Error; err != nil { if err == gorm.RecordNotFound { log.Warn("project %d not found for user %s, skipping..", relation.ProjectID, user.Name) continue } log.Fatal("cannot get project %d for user %s", relation.ProjectID, user.Name) } p := &models.Project{} if err := bansheeDB.Where("name = ?", proj.Name).First(p).Error; err != nil { if err == gorm.RecordNotFound { log.Warn("project %s not found in %s, skipping..", proj.Name, *bansheeDBFileName) continue } log.Fatal("cannot get project %s in %s", proj.Name, *bansheeDBFileName) } if err := bansheeDB.Model(p).Association("Users").Append(u).Error; err != nil { if err == gorm.RecordNotFound { log.Warn("record not found: %v", err) continue } log.Fatal("cannot append user %s to project %s:%v", u.Name, p.Name, err) } } } }
// work waits for detected metrics, then check each metric with all the // rules, the configured shell command will be executed once a rule is hit. func (al *Alerter) work() { for { metric := <-al.In // Check interval. v, ok := al.m.Get(metric.Name) if ok && metric.Stamp-v.(uint32) < al.cfg.Alerter.Interval { continue } // Check alert times in one day v, ok = al.c.Get(metric.Name) if ok && atomic.LoadUint32(v.(*uint32)) > al.cfg.Alerter.OneDayLimit { log.Warn("%s hit alerting one day limit, skipping..", metric.Name) continue } if !ok { var newCounter uint32 newCounter = 1 al.c.Set(metric.Name, &newCounter) } else { atomic.AddUint32(v.(*uint32), 1) } // Universals var univs []models.User if err := al.db.Admin.DB().Where("universal = ?", true).Find(&univs).Error; err != nil { log.Error("get universal users: %v, skiping..", err) continue } for _, rule := range metric.TestedRules { // Project proj := &models.Project{} if err := al.db.Admin.DB().Model(rule).Related(proj).Error; err != nil { log.Error("project, %v, skiping..", err) continue } // Users var users []models.User if err := al.db.Admin.DB().Model(proj).Related(&users, "Users").Error; err != nil { log.Error("get users: %v, skiping..", err) continue } users = append(users, univs...) // Send for _, user := range users { d := &msg{ Project: proj, Metric: metric, User: &user, Rule: rule, } // Exec if len(al.cfg.Alerter.Command) == 0 { log.Warn("alert command not configured") continue } b, _ := json.Marshal(d) cmd := exec.Command(al.cfg.Alerter.Command, string(b)) if err := cmd.Run(); err != nil { log.Error("exec %s: %v", al.cfg.Alerter.Command, err) continue } log.Info("send message to %s with %s ok", user.Name, metric.Name) } if len(users) != 0 { al.m.Set(metric.Name, metric.Stamp) health.IncrNumAlertingEvents(1) } } } }