Exemple #1
0
// Migrate projects.
//
//	1. Fetch all projects from belldb.
//	2. Create the project into bansheedb.
//	3. Create the rules for each project.
//
func migrateProjects() {
	var projs []Project
	// Fetch all projects from belldb.
	if err := bellDB.Find(&projs).Error; err != nil {
		log.Fatal("fetch all projects from %s: %v", *bellDBFileName, err)
	}
	for _, proj := range projs {
		// Create banshee project.
		if err := models.ValidateProjectName(proj.Name); err != nil {
			log.Warn("project %s: %v, skipping..", proj.Name, err)
			continue
		}
		p := &models.Project{Name: proj.Name}
		if err := bansheeDB.Create(p).Error; err != nil {
			sqliteErr, ok := err.(sqlite3.Error)
			if ok && sqliteErr.ExtendedCode == sqlite3.ErrConstraintUnique {
				log.Warn("project %s already in %s, skipping..", p.Name, *bansheeDBFileName)
			} else {
				log.Fatal("cannot create project %s: %v", p.Name, err)
			}
		}
		// Fetch its rules from belldb.
		var rules []Rule
		if err := bellDB.Model(proj).Related(&rules).Error; err != nil {
			log.Fatal("cannot fetch rules for %s: %v", p.Name, err)
		}
		for _, rule := range rules {
			// Create banshee rule.
			if err := models.ValidateRulePattern(rule.Pattern); err != nil {
				log.Warn("rule %s: %v, belongs to %s, skippig..", rule.Pattern, err, proj.Name)
				continue
			}
			r := &models.Rule{
				Pattern:   rule.Pattern,
				ProjectID: p.ID,
				TrendUp:   rule.Up,
				TrendDown: rule.Down,
				// Important: max and min for bell is reversed with banshee's.
				ThresholdMax: rule.Min,
				ThresholdMin: rule.Max,
			}
			if err := bansheeDB.Create(r).Error; err != nil {
				sqliteErr, ok := err.(sqlite3.Error)
				if ok && sqliteErr.ExtendedCode == sqlite3.ErrConstraintUnique {
					log.Warn("rule %s already in %s, skipping..", r.Pattern, *bansheeDBFileName)
				} else {
					log.Fatal("cannot create rule %s: %v", r.Pattern, err)
				}
			}
		}
	}
}
Exemple #2
0
// Process the input metric.
//
//	1. Match metric with rules.
//	2. Detect the metric with matched rules.
//
func (d *Detector) process(m *models.Metric) {
	health.IncrNumMetricIncomed(1)
	timer := util.NewTimer()
	// Match
	ok, rules := d.match(m)
	if !ok {
		// Not matched.
		return
	}
	// Detect
	err := d.detect(m, rules)
	if err != nil {
		log.Error("detect: %v, skipping..", err)
		return
	}
	health.IncrNumMetricDetected(1)
	// Output
	if len(m.TestedRules) > 0 {
		// Test ok.
		d.output(m)
	}
	// Time end.
	elapsed := timer.Elapsed()
	if elapsed > timeout {
		log.Warn("detection is slow: %.2fms", elapsed)
	}
	health.AddDetectionCost(elapsed)
}
Exemple #3
0
// Process the input metric.
//
//	1. Match metric with rules.
//	2. Detect the metric with matched rules.
//
func (d *Detector) process(m *models.Metric) {
	health.IncrNumMetricIncomed(1)
	// Time it.
	startAt := time.Now()
	// Match
	ok, rules := d.match(m)
	if !ok {
		// Not matched.
		return
	}
	// Detect
	err := d.detect(m, rules)
	if err != nil {
		log.Error("detect: %v, skipping..", err)
		return
	}
	health.IncrNumMetricDetected(1)
	// Output
	if len(m.TestedRules) > 0 {
		// Test ok.
		d.output(m)
	}
	// Time end.
	elapsed := float64(time.Since(startAt).Nanoseconds()) / float64(1000*1000)
	if elapsed > timeout {
		log.Warn("detection is slow: %.2fms", elapsed)
	}
	health.AddDetectionCost(elapsed)
}
Exemple #4
0
// work waits for detected metrics, then check each metric with all the
// rules, the configured shell command will be executed once a rule is hit.
func (al *Alerter) work() {
	for {
		metric := <-al.In
		// Check interval.
		v, ok := al.m.Get(metric.Name)
		if ok && metric.Stamp-v.(uint32) < al.cfg.Alerter.Interval {
			return
		}
		// Test with rules.
		rules := al.filter.MatchedRules(metric)
		for _, rule := range rules {
			// Test
			if !rule.Test(metric) {
				continue
			}
			// Project
			var proj *models.Project
			if err := al.db.Admin.DB().Model(rule).Related(proj); err != nil {
				log.Error("project not found, %v, skiping..", err)
				continue
			}
			// Users
			var users []models.User
			if err := al.db.Admin.DB().Model(proj).Related(&users, "Users"); err != nil {
				log.Error("get users: %v, skiping..", err)
				continue
			}
			// Universals
			var univs []models.User
			if err := al.db.Admin.DB().Where("universal = ?", true).Find(&univs); err != nil {
				log.Error("get universal users: %v, skiping..", err)
				continue
			}
			users = append(users, univs...)
			// Send
			for _, user := range users {
				d := &msg{
					Project: proj,
					Metric:  metric,
					User:    &user,
				}
				// Exec
				if len(al.cfg.Alerter.Command) == 0 {
					log.Warn("alert command not configured")
					continue
				}
				b, _ := json.Marshal(d)
				cmd := exec.Command(al.cfg.Alerter.Command, string(b))
				if err := cmd.Run(); err != nil {
					log.Error("exec %s: %v", al.cfg.Alerter.Command, err)
				}
			}
			if len(users) != 0 {
				al.m.Set(metric.Name, metric.Stamp)
			}
		}
	}
}
Exemple #5
0
func main() {
	// Arguments
	fileName := flag.String("c", "config.json", "config file")
	debug := flag.Bool("d", false, "debug mode")
	vers := flag.Bool("v", false, "version")
	flag.Parse()
	// Version
	if *vers {
		fmt.Fprintln(os.Stdout, version.Version)
		os.Exit(1)
	}
	// Logging
	log.SetName("banshee")
	if *debug {
		log.SetLevel(log.DEBUG)
	}
	log.Debug("using %s, max %d cpu", runtime.Version(), runtime.GOMAXPROCS(-1))
	// Config
	cfg := config.New()
	if flag.NFlag() == 0 || (flag.NFlag() == 1 && *debug == true) {
		log.Warn("no config file specified, using default..")
	} else {
		err := cfg.UpdateWithJSONFile(*fileName)
		if err != nil {
			log.Fatal("failed to load %s, %s", *fileName, err)
		}
	}
	// Storage
	options := &storage.Options{
		NumGrid: cfg.Period[0],
		GridLen: cfg.Period[1],
	}
	db, err := storage.Open(cfg.Storage.Path, options)
	if err != nil {
		log.Fatal("failed to open %s: %v", cfg.Storage.Path, err)
	}
	// Cleaner
	cleaner := cleaner.New(db, cfg.Period[0]*cfg.Period[1])
	go cleaner.Start()
	// Filter
	filter := filter.New()
	filter.Init(db)
	// Alerter
	alerter := alerter.New(cfg, db, filter)
	alerter.Start()
	// Webapp
	go webapp.Start(cfg, db)
	// Detector
	detector := detector.New(cfg, db, filter)
	detector.Out(alerter.In)
	detector.Start()
}
Exemple #6
0
func initConfig() {
	// Config parsing.
	if flag.NFlag() == 0 || (flag.NFlag() == 1 && *debug) {
		// Case ./program [-d]
		log.Warn("no config specified, using default..")
	} else {
		// Update config.
		err := cfg.UpdateWithJSONFile(*fileName)
		if err != nil {
			log.Fatal("failed to load %s, %s", *fileName, err)
		}
	}
	// Config validation.
	err := cfg.Validate()
	if err != nil {
		if err == config.ErrAlerterCommandEmpty {
			// Ignore alerter command empty.
			log.Warn("config: %s", err)
		} else {
			log.Fatal("config: %s", err)
		}
	}
}
Exemple #7
0
// MatchedRules checks if a metric hit, l is the unchecked words list of the metric in order
func (f *Filter) matchedRs(c *childFilter, prefix string, l []string) []*models.Rule {
	// when len(l)==0 means all words are checked and passed, return all matched rules
	if len(l) == 0 {
		v, exist := f.hitCounters.Get(prefix)
		if exist {
			//use atomic
			atomic.AddInt32(v.(*int32), 1)
			if f.enableHitLimit && atomic.LoadInt32(v.(*int32)) > int32(f.intervalHitLimit) {
				log.Warn("hits over intervalHitLimit, metric: %s", prefix)
				return []*models.Rule{}
			}
		} else {
			var counter int32 = 1
			f.hitCounters.Set(prefix, &counter)
		}
		c.lock.RLock()
		defer c.lock.RUnlock()
		return c.matchedRules
	}

	rules := []*models.Rule{}
	//when next level is nil,return empty rules slice
	if c.children == nil {
		return rules
	}
	//check if this level has a "*" node
	v, exist := c.children.Get("*")
	if exist {
		//when has a "*" node, the suffix tree matched the metric words by now, so goto next
		// level and append matched rules to slice
		ch := v.(*childFilter)
		rules = append(rules, f.matchedRs(ch, prefix+"."+l[0], l[1:])...)
	}
	//check if this level has a same word node
	v, exist = c.children.Get(l[0])
	if exist {
		//when has the node, matched by now, goto next level and append matched rules to slice
		ch := v.(*childFilter)
		rules = append(rules, f.matchedRs(ch, prefix+"."+l[0], l[1:])...)
	}
	//no matched node return empty rules slice, else return all matched rules
	return rules
}
Exemple #8
0
// work waits for detected metrics, then check each metric with all the
// rules, the configured shell command will be executed once a rule is hit.
func (al *Alerter) work() {
	for {
		ev := <-al.In
		// Check interval.
		v, ok := al.m.Get(ev.Metric.Name)
		if ok && ev.Metric.Stamp-v.(uint32) < al.cfg.Alerter.Interval {
			continue
		}
		// Check alert times in one day
		v, ok = al.c.Get(ev.Metric.Name)
		if ok && atomic.LoadUint32(v.(*uint32)) > al.cfg.Alerter.OneDayLimit {
			log.Warn("%s hit alerting one day limit, skipping..", ev.Metric.Name)
			continue
		}
		if !ok {
			var newCounter uint32
			newCounter = 1
			al.c.Set(ev.Metric.Name, &newCounter)
		} else {
			atomic.AddUint32(v.(*uint32), 1)
		}
		// Universals
		var univs []models.User
		if err := al.db.Admin.DB().Where("universal = ?", true).Find(&univs).Error; err != nil {
			log.Error("get universal users: %v, skiping..", err)
			continue
		}
		for _, rule := range ev.Metric.TestedRules {
			ev.Rule = rule
			ev.TranslateRuleComment()
			// Project
			proj := &models.Project{}
			if err := al.db.Admin.DB().Model(rule).Related(proj).Error; err != nil {
				log.Error("project, %v, skiping..", err)
				continue
			}
			ev.Project = proj
			// Silent
			if al.shouldSilent(proj) {
				continue
			}
			// Users
			var users []models.User
			if err := al.db.Admin.DB().Model(proj).Related(&users, "Users").Error; err != nil {
				log.Error("get users: %v, skiping..", err)
				continue
			}
			users = append(users, univs...)
			// Send
			for _, user := range users {
				ev.User = &user
				if rule.Level < user.RuleLevel {
					continue
				}
				// Exec
				if len(al.cfg.Alerter.Command) == 0 {
					log.Warn("alert command not configured")
					continue
				}
				if err := al.execCommand(ev); err != nil {
					log.Error("exec %s: %v", al.cfg.Alerter.Command, err)
					continue
				}
				log.Info("send message to %s with %s ok", user.Name, ev.Metric.Name)
			}
			if len(users) != 0 {
				al.m.Set(ev.Metric.Name, ev.Metric.Stamp)
				health.IncrNumAlertingEvents(1)
			}
		}
	}
}
Exemple #9
0
// Migrate users.
//
//	1. Fetch all users from belldb.
//	2. Create the users into bansheedb.
//	3. Establish the relationships between project and user.
//
func migrateUsers() {
	var users []Receiver
	// Fetch all users from belldb.
	if err := bellDB.Find(&users).Error; err != nil {
		log.Fatal("fetch all users from %s: %v", *bellDBFileName, err)
	}
	for _, user := range users {
		// Create banshee user.
		err := models.ValidateUserName(user.Name)
		if err == nil {
			err = models.ValidateUserEmail(user.Email)
		}
		if err == nil {
			err = models.ValidateUserPhone(user.Phone)
		}
		if err != nil {
			log.Warn("user %s: %v, skipping..", user.Name, err)
		}
		u := &models.User{
			Name:        user.Name,
			Email:       user.Email,
			Phone:       user.Phone,
			EnableEmail: user.EnableEmail,
			EnablePhone: user.EnablePhone,
			Universal:   user.Universal,
		}
		if err := bansheeDB.Create(u).Error; err != nil {
			sqliteErr, ok := err.(sqlite3.Error)
			if ok && sqliteErr.ExtendedCode == sqlite3.ErrConstraintUnique {
				log.Warn("user %s already in %s, skipping..", u.Name, *bansheeDBFileName)
			} else {
				log.Fatal("cannot create user %s: %v", u.Name, err)
			}
		}
		// Establish relationship to project.
		if user.Universal {
			continue
		}
		// Get all relationships for this user.
		var relations []ReceiverProject
		if err := bellDB.Where("ReceiverId = ?", user.ID).Find(&relations).Error; err != nil {
			log.Fatal("cannot fetch user-project relations for user %s: %v", user.Name, err)
		}
		for _, relation := range relations {
			var proj Project
			if err := bellDB.First(&proj, relation.ProjectID).Error; err != nil {
				if err == gorm.RecordNotFound {
					log.Warn("project %d not found for user %s, skipping..", relation.ProjectID, user.Name)
					continue
				}
				log.Fatal("cannot get project %d for user %s", relation.ProjectID, user.Name)
			}
			p := &models.Project{}
			if err := bansheeDB.Where("name = ?", proj.Name).First(p).Error; err != nil {
				if err == gorm.RecordNotFound {
					log.Warn("project %s not found in %s, skipping..", proj.Name, *bansheeDBFileName)
					continue
				}
				log.Fatal("cannot get project %s in %s", proj.Name, *bansheeDBFileName)
			}
			if err := bansheeDB.Model(p).Association("Users").Append(u).Error; err != nil {
				if err == gorm.RecordNotFound {
					log.Warn("record not found: %v", err)
					continue
				}
				log.Fatal("cannot append user %s to project %s:%v", u.Name, p.Name, err)
			}
		}
	}
}
Exemple #10
0
// work waits for detected metrics, then check each metric with all the
// rules, the configured shell command will be executed once a rule is hit.
func (al *Alerter) work() {
	for {
		metric := <-al.In
		// Check interval.
		v, ok := al.m.Get(metric.Name)
		if ok && metric.Stamp-v.(uint32) < al.cfg.Alerter.Interval {
			continue
		}
		// Check alert times in one day
		v, ok = al.c.Get(metric.Name)
		if ok && atomic.LoadUint32(v.(*uint32)) > al.cfg.Alerter.OneDayLimit {
			log.Warn("%s hit alerting one day limit, skipping..", metric.Name)
			continue
		}
		if !ok {
			var newCounter uint32
			newCounter = 1
			al.c.Set(metric.Name, &newCounter)
		} else {
			atomic.AddUint32(v.(*uint32), 1)
		}
		// Universals
		var univs []models.User
		if err := al.db.Admin.DB().Where("universal = ?", true).Find(&univs).Error; err != nil {
			log.Error("get universal users: %v, skiping..", err)
			continue
		}
		for _, rule := range metric.TestedRules {
			// Project
			proj := &models.Project{}
			if err := al.db.Admin.DB().Model(rule).Related(proj).Error; err != nil {
				log.Error("project, %v, skiping..", err)
				continue
			}
			// Users
			var users []models.User
			if err := al.db.Admin.DB().Model(proj).Related(&users, "Users").Error; err != nil {
				log.Error("get users: %v, skiping..", err)
				continue
			}
			users = append(users, univs...)
			// Send
			for _, user := range users {
				d := &msg{
					Project: proj,
					Metric:  metric,
					User:    &user,
					Rule:    rule,
				}
				// Exec
				if len(al.cfg.Alerter.Command) == 0 {
					log.Warn("alert command not configured")
					continue
				}
				b, _ := json.Marshal(d)
				cmd := exec.Command(al.cfg.Alerter.Command, string(b))
				if err := cmd.Run(); err != nil {
					log.Error("exec %s: %v", al.cfg.Alerter.Command, err)
					continue
				}
				log.Info("send message to %s with %s ok", user.Name, metric.Name)
			}
			if len(users) != 0 {
				al.m.Set(metric.Name, metric.Stamp)
				health.IncrNumAlertingEvents(1)
			}
		}
	}
}