Example #1
0
func (s *Schedule) sendNotifications(silenced map[models.AlertKey]models.Silence) {
	if s.Conf.Quiet {
		slog.Infoln("quiet mode prevented", len(s.pendingNotifications), "notifications")
		return
	}
	for n, states := range s.pendingNotifications {
		for _, st := range states {
			ak := st.AlertKey()
			_, silenced := silenced[ak]
			if st.Last().Status == StUnknown {
				if silenced {
					slog.Infoln("silencing unknown", ak)
					continue
				}
				s.pendingUnknowns[n] = append(s.pendingUnknowns[n], st)
			} else if silenced {
				slog.Infoln("silencing", ak)
			} else {
				s.notify(st, n)
			}
			if n.Next != nil {
				s.AddNotification(ak, n.Next, time.Now().UTC())
			}
		}
	}
}
Example #2
0
func (s *Schedule) sendNotifications(silenced SilenceTester) {
	if s.Conf.Quiet {
		slog.Infoln("quiet mode prevented", len(s.pendingNotifications), "notifications")
		return
	}
	for n, states := range s.pendingNotifications {
		for _, st := range states {
			ak := st.AlertKey
			silenced := silenced(ak) != nil
			if st.CurrentStatus == models.StUnknown {
				if silenced {
					slog.Infoln("silencing unknown", ak)
					continue
				}
				s.pendingUnknowns[n] = append(s.pendingUnknowns[n], st)
			} else if silenced {
				slog.Infoln("silencing", ak)
			} else {
				s.notify(st, n)
			}
			if n.Next != nil {
				s.QueueNotification(ak, n.Next, utcNow())
			}
		}
	}
}
Example #3
0
func (s *Schedule) save() {
	if s.db == nil {
		return
	}
	s.Lock("Save")
	store := map[string]interface{}{
		dbMetric:        s.Search.Read.Metric,
		dbTagk:          s.Search.Read.Tagk,
		dbTagv:          s.Search.Read.Tagv,
		dbMetricTags:    s.Search.Read.MetricTags,
		dbNotifications: s.Notifications,
		dbSilence:       s.Silence,
		dbStatus:        s.status,
		dbMetadata:      s.Metadata,
		dbIncidents:     s.Incidents,
	}
	tostore := make(map[string][]byte)
	for name, data := range store {
		f := new(bytes.Buffer)
		gz := gzip.NewWriter(f)
		cw := &counterWriter{w: gz}
		enc := gob.NewEncoder(cw)
		if err := enc.Encode(data); err != nil {
			slog.Errorf("error saving %s: %v", name, err)
			s.Unlock()
			return
		}
		if err := gz.Flush(); err != nil {
			slog.Errorf("gzip flush error saving %s: %v", name, err)
		}
		if err := gz.Close(); err != nil {
			slog.Errorf("gzip close error saving %s: %v", name, err)
		}
		tostore[name] = f.Bytes()
		slog.Infof("wrote %s: %v", name, conf.ByteSize(cw.written))
		collect.Put("statefile.size", opentsdb.TagSet{"object": name}, cw.written)
	}
	s.Unlock()
	err := s.db.Update(func(tx *bolt.Tx) error {
		b, err := tx.CreateBucketIfNotExists([]byte(dbBucket))
		if err != nil {
			return err
		}
		for name, data := range tostore {
			if err := b.Put([]byte(name), data); err != nil {
				return err
			}
		}
		return nil
	})
	if err != nil {
		slog.Errorf("save db update error: %v", err)
		return
	}
	fi, err := os.Stat(s.Conf.StateFile)
	if err == nil {
		collect.Put("statefile.size", opentsdb.TagSet{"object": "total"}, fi.Size())
	}
	slog.Infoln("save to db complete")
}
Example #4
0
// CheckNotifications processes past notification events. It returns the next time a notification is needed.
func (s *Schedule) CheckNotifications() time.Time {
	silenced := s.Silenced()
	s.Lock("CheckNotifications")
	defer s.Unlock()
	latestTime := utcNow()
	notifications, err := s.DataAccess.Notifications().GetDueNotifications()
	if err != nil {
		slog.Error("Error getting notifications", err)
		return utcNow().Add(time.Minute)
	}
	for ak, ns := range notifications {
		if si := silenced(ak); si != nil {
			slog.Infoln("silencing", ak)
			continue
		}
		for name, t := range ns {
			n := s.RuleConf.GetNotification(name)
			if n == nil {
				continue
			}
			//If alert is currently unevaluated because of a dependency,
			//simply requeue it until the dependency resolves itself.
			_, uneval := s.GetUnknownAndUnevaluatedAlertKeys(ak.Name())
			unevaluated := false
			for _, un := range uneval {
				if un == ak {
					unevaluated = true
					break
				}
			}
			if unevaluated {
				s.QueueNotification(ak, n, t.Add(time.Minute))
				continue
			}
			st, err := s.DataAccess.State().GetLatestIncident(ak)
			if err != nil {
				slog.Error(err)
				continue
			}
			if st == nil {
				continue
			}
			s.Notify(st, n)
		}
	}
	s.sendNotifications(silenced)
	s.pendingNotifications = nil
	err = s.DataAccess.Notifications().ClearNotificationsBefore(latestTime)
	if err != nil {
		slog.Error("Error clearing notifications", err)
		return utcNow().Add(time.Minute)
	}
	timeout, err := s.DataAccess.Notifications().GetNextNotificationTime()
	if err != nil {
		slog.Error("Error getting next notification time", err)
		return utcNow().Add(time.Minute)
	}
	return timeout
}
Example #5
0
File: queue.go Project: pdf/bosun
func recordSent(num int) {
	if Debug {
		slog.Infoln("sent", num)
	}
	slock.Lock()
	sent += int64(num)
	slock.Unlock()
}
Example #6
0
func (c *ProgramCollector) Run(dpchan chan<- *opentsdb.DataPoint) {
	if c.Interval == 0 {
		for {
			next := time.After(DefaultFreq)
			if err := c.runProgram(dpchan); err != nil {
				slog.Infoln(err)
			}
			<-next
			slog.Infoln("restarting", c.Path)
		}
	} else {
		for {
			next := time.After(c.Interval)
			c.runProgram(dpchan)
			<-next
		}
	}
}
Example #7
0
// RestoreState restores notification and alert state from the file on disk.
func (s *Schedule) RestoreState() error {
	defer func() {
		bosunStartupTime = utcNow()
	}()
	slog.Infoln("RestoreState")
	start := utcNow()
	s.Lock("RestoreState")
	defer s.Unlock()
	s.Search.Lock()
	defer s.Search.Unlock()

	if err := migrateOldDataToRedis(s.db, s.DataAccess, s); err != nil {
		return err
	}
	// delete metrictags if they exist.
	deleteKey(s.db, "metrictags")
	slog.Infoln("RestoreState done in", time.Since(start))
	return nil
}
Example #8
0
func InitPrograms(cpath string) {
	cdir, err := os.Open(cpath)
	if err != nil {
		slog.Infoln(err)
		return
	}
	idirs, err := cdir.Readdir(0)
	if err != nil {
		slog.Infoln(err)
		return
	}
	for _, idir := range idirs {
		idirname := idir.Name()
		i, err := strconv.Atoi(idirname)
		if err != nil || i < 0 {
			if idirname != "etc" && idirname != "lib" {
				slog.Infoln("invalid collector folder name:", idirname)
			}
			continue
		}
		interval := time.Second * time.Duration(i)
		dir, err := os.Open(filepath.Join(cdir.Name(), idirname))
		if err != nil {
			slog.Infoln(err)
			continue
		}
		files, err := dir.Readdir(0)
		if err != nil {
			slog.Infoln(err)
			continue
		}
		for _, file := range files {
			if !isExecutable(file) {
				continue
			}
			collectors = append(collectors, &ProgramCollector{
				Path:     filepath.Join(dir.Name(), file.Name()),
				Interval: interval,
			})
		}
	}
}
Example #9
0
// CheckNotifications processes past notification events. It returns the
// duration until the soonest notification triggers.
func (s *Schedule) CheckNotifications() time.Duration {
	silenced := s.Silenced()
	s.Lock("CheckNotifications")
	defer s.Unlock()
	notifications := s.Notifications
	s.Notifications = nil
	for ak, ns := range notifications {
		if _, present := silenced[ak]; present {
			slog.Infoln("silencing", ak)
			continue
		}
		for name, t := range ns {
			n, present := s.Conf.Notifications[name]
			if !present {
				continue
			}
			remaining := t.Add(n.Timeout).Sub(time.Now())
			if remaining > 0 {
				s.AddNotification(ak, n, t)
				continue
			}
			st := s.status[ak]
			if st == nil {
				continue
			}
			// If alert is currently unevaluated because of a dependency,
			// simply requeue it until the dependency resolves itself.
			if st.Unevaluated {
				s.AddNotification(ak, n, t)
				continue
			}
			s.Notify(st, n)
		}
	}
	s.sendNotifications(silenced)
	s.pendingNotifications = nil
	timeout := time.Hour
	now := time.Now()
	for _, ns := range s.Notifications {
		for name, t := range ns {
			n, present := s.Conf.Notifications[name]
			if !present {
				continue
			}
			remaining := t.Add(n.Timeout).Sub(now)
			if remaining < timeout {
				timeout = remaining
			}
		}
	}
	return timeout
}
Example #10
0
func (s *Schedule) unotify(name string, group models.AlertKeys, n *conf.Notification) {
	subject := new(bytes.Buffer)
	body := new(bytes.Buffer)
	now := utcNow()
	s.Group[now] = group
	t := s.RuleConf.GetUnknownTemplate()
	if t == nil {
		t = defaultUnknownTemplate
	}
	data := s.unknownData(now, name, group)
	if t.Body != nil {
		if err := t.Body.Execute(body, &data); err != nil {
			slog.Infoln("unknown template error:", err)
		}
	}
	if t.Subject != nil {
		if err := t.Subject.Execute(subject, &data); err != nil {
			slog.Infoln("unknown template error:", err)
		}
	}
	n.Notify(subject.String(), body.String(), subject.Bytes(), body.Bytes(), s.SystemConf, name)
}
Example #11
0
func (c *ProgramCollector) Run(dpchan chan<- *opentsdb.DataPoint, quit <-chan struct{}) {
	if c.Interval == 0 {
		for {
			next := time.After(DefaultFreq)
			if err := c.runProgram(dpchan); err != nil {
				slog.Infoln(err)
			}
			<-next
			slog.Infoln("restarting", c.Path)
		}
	} else {
		for {
			next := time.After(c.Interval)
			c.runProgram(dpchan)
			select {
			case <-next:
			case <-quit:
				return
			}

		}
	}
}
Example #12
0
func (s *Schedule) sendNotifications(silenced SilenceTester) {
	if s.quiet {
		slog.Infoln("quiet mode prevented", len(s.pendingNotifications), "notifications")
		return
	}
	for n, states := range s.pendingNotifications {
		for _, st := range states {
			ak := st.AlertKey
			alert := s.RuleConf.GetAlert(ak.Name())
			if alert == nil {
				continue
			}
			silenced := silenced(ak) != nil
			if st.CurrentStatus == models.StUnknown {
				if silenced {
					slog.Infoln("silencing unknown", ak)
					continue
				}
				s.pendingUnknowns[n] = append(s.pendingUnknowns[n], st)
			} else if silenced {
				slog.Infof("silencing %s", ak)
				continue
			} else if !alert.Log && (!st.Open || !st.NeedAck) {
				slog.Errorf("Cannot notify acked or closed alert %s. Clearing.", ak)
				if err := s.DataAccess.Notifications().ClearNotifications(ak); err != nil {
					slog.Error(err)
				}
				continue
			} else {
				s.notify(st, n)
			}
			if n.Next != nil {
				s.QueueNotification(ak, n.Next, utcNow())
			}
		}
	}
}
Example #13
0
//If there is Route53 data for this row, then populate the Route 53 item
func (b *billLineItem) fetchR53(awsBilling *awsBillingConfig) {
	if b.ProductCode == "AmazonRoute53" { //Don't do anything if we don't have any R53 info to get
		zoneID := strings.Split(b.ResourceID, "/")[1]   //The billing ID has a huge resource ID, we only need the last part of it
		cachedR53Zone, ok := awsBillingR53zones[zoneID] //Check if we have a copy of this zone in our local cache
		if ok {                                         //If we have a copy of the zone, then use that
			b.Route53Zone = cachedR53Zone.HostedZone
			return
		}
		//Otherwise we need to fetch it from Route 53
		thisR53, fetchErr := awsBilling.r53svc.GetHostedZone(&route53.GetHostedZoneInput{
			Id: aws.String(zoneID),
		})
		if fetchErr != nil {
			slog.Infoln("Cannot fetch Route53 hosted zone", b.ResourceID, fetchErr)
		}
		awsBillingR53zones[zoneID] = *thisR53 //Store the fetched zone in the cache
		b.Route53Zone = thisR53.HostedZone    //And assign
	}
}
Example #14
0
File: bacula.go Project: jmj/bosun
func c_bacula_status(user, pass, dbase string) (opentsdb.MultiDataPoint, error) {
	dsn := fmt.Sprintf("%s:%s@/%s", user, pass, dbase)
	db, err := sql.Open("mysql", dsn)
	if err != nil {
		slog.Error("Failed to connect to database")
		return nil, err
	}
	defer db.Close()

	var md opentsdb.MultiDataPoint

	var name string
	var value int
	var tagSet opentsdb.TagSet
	var rate metadata.RateType
	var unit metadata.Unit

	tagSet = nil
	rate = metadata.Gauge
	unit = metadata.Item
	description := "Successful backup jobs in the last week"

	rows, err := db.Query("SELECT DISTINCT(Name) from Job")
	if err != nil {
		slog.Error("Query Error: " + err.Error())
		return nil, err
	}

	for rows.Next() {
		rows.Scan(&name)

		r := db.QueryRow("SELECT count(JobId) as value from Job where RealEndTime>SUBTIME(now(), '7 0:0:0') and JobStatus='T' and Name=?", name)

		r.Scan(&value)

		slog.Infoln(name, value)

		Add(&md, "bacula."+name+".last_week", value, tagSet, rate, unit, description)
	}

	return md, nil
}
Example #15
0
func watch(root, pattern string, f func()) {
	watcher, err := fsnotify.NewWatcher()
	if err != nil {
		slog.Fatal(err)
	}
	filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
		if matched, err := filepath.Match(pattern, info.Name()); err != nil {
			slog.Fatal(err)
		} else if !matched {
			return nil
		}
		err = watcher.Add(path)
		if err != nil {
			slog.Fatal(err)
		}
		return nil
	})
	slog.Infoln("watching", pattern, "in", root)
	wait := time.Now()
	go func() {
		for {
			select {
			case event := <-watcher.Events:
				if wait.After(time.Now()) {
					continue
				}
				if event.Op&fsnotify.Write == fsnotify.Write {
					f()
					wait = time.Now().Add(time.Second * 2)
				}
			case err := <-watcher.Errors:
				slog.Errorln("error:", err)
			}
		}
	}()
}
Example #16
0
func main() {
	flag.Parse()
	if *flagVersion {
		fmt.Println(version.GetVersionInfo("bosun"))
		os.Exit(0)
	}
	for _, m := range mains {
		m()
	}
	runtime.GOMAXPROCS(runtime.NumCPU())
	c, err := conf.ParseFile(*flagConf)
	if err != nil {
		slog.Fatal(err)
	}
	if *flagTest {
		os.Exit(0)
	}
	httpListen := &url.URL{
		Scheme: "http",
		Host:   c.HTTPListen,
	}
	if strings.HasPrefix(httpListen.Host, ":") {
		httpListen.Host = "localhost" + httpListen.Host
	}
	if err := metadata.Init(httpListen, false); err != nil {
		slog.Fatal(err)
	}
	if err := sched.Load(c); err != nil {
		slog.Fatal(err)
	}
	if c.RelayListen != "" {
		go func() {
			mux := http.NewServeMux()
			mux.Handle("/api/", httputil.NewSingleHostReverseProxy(httpListen))
			s := &http.Server{
				Addr:    c.RelayListen,
				Handler: mux,
			}
			slog.Fatal(s.ListenAndServe())
		}()
	}
	if c.TSDBHost != "" {
		if err := collect.Init(httpListen, "bosun"); err != nil {
			slog.Fatal(err)
		}
		tsdbHost := &url.URL{
			Scheme: "http",
			Host:   c.TSDBHost,
		}
		if *flagReadonly {
			rp := httputil.NewSingleHostReverseProxy(tsdbHost)
			ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
				if r.URL.Path == "/api/put" {
					w.WriteHeader(204)
					return
				}
				rp.ServeHTTP(w, r)
			}))
			slog.Infoln("readonly relay at", ts.URL, "to", tsdbHost)
			tsdbHost, _ = url.Parse(ts.URL)
			c.TSDBHost = tsdbHost.Host
		}
	}
	if *flagQuiet {
		c.Quiet = true
	}
	go func() { slog.Fatal(web.Listen(c.HTTPListen, *flagDev, c.TSDBHost)) }()
	go func() {
		if !*flagNoChecks {
			sched.Run()
		}
	}()
	go func() {
		sc := make(chan os.Signal, 1)
		signal.Notify(sc, os.Interrupt)
		killing := false
		for range sc {
			if killing {
				slog.Infoln("Second interrupt: exiting")
				os.Exit(1)
			}
			killing = true
			go func() {
				slog.Infoln("Interrupt: closing down...")
				sched.Close()
				slog.Infoln("done")
				os.Exit(1)
			}()
		}
	}()
	if *flagWatch {
		watch(".", "*.go", quit)
		watch(filepath.Join("web", "static", "templates"), "*.html", web.RunEsc)
		base := filepath.Join("web", "static", "js")
		watch(base, "*.ts", web.RunTsc)
	}
	select {}
}
Example #17
0
File: main.go Project: eswdd/bosun
func main() {
	flag.Parse()
	if *flagToToml != "" {
		toToml(*flagToToml)
		fmt.Println("toml conversion complete; remove all empty values by hand (empty strings, 0)")
		return
	}
	if *flagPrint || *flagDebug {
		slog.Set(&slog.StdLog{Log: log.New(os.Stdout, "", log.LstdFlags)})
	}
	if *flagVersion {
		fmt.Println(version.GetVersionInfo("scollector"))
		os.Exit(0)
	}
	for _, m := range mains {
		m()
	}
	conf := readConf()
	if *flagHost != "" {
		conf.Host = *flagHost
	}
	if *flagFilter != "" {
		conf.Filter = strings.Split(*flagFilter, ",")
	}
	if !conf.Tags.Valid() {
		slog.Fatalf("invalid tags: %v", conf.Tags)
	} else if conf.Tags["host"] != "" {
		slog.Fatalf("host not supported in custom tags, use Hostname instead")
	}
	if conf.PProf != "" {
		go func() {
			slog.Infof("Starting pprof at http://%s/debug/pprof/", conf.PProf)
			slog.Fatal(http.ListenAndServe(conf.PProf, nil))
		}()
	}
	collectors.AddTags = conf.Tags
	util.FullHostname = conf.FullHost
	util.Set()
	if conf.Hostname != "" {
		util.Hostname = conf.Hostname
	}
	if err := collect.SetHostname(util.Hostname); err != nil {
		slog.Fatal(err)
	}
	if conf.ColDir != "" {
		collectors.InitPrograms(conf.ColDir)
	}
	var err error
	check := func(e error) {
		if e != nil {
			err = e
		}
	}
	collectors.Init(conf)
	for _, r := range conf.MetricFilters {
		check(collectors.AddMetricFilters(r))
	}
	for _, rmq := range conf.RabbitMQ {
		check(collectors.RabbitMQ(rmq.URL))
	}
	for _, cfg := range conf.SNMP {
		check(collectors.SNMP(cfg, conf.MIBS))
	}
	for _, i := range conf.ICMP {
		check(collectors.ICMP(i.Host))
	}
	for _, a := range conf.AWS {
		check(collectors.AWS(a.AccessKey, a.SecretKey, a.Region))
	}
	for _, v := range conf.Vsphere {
		check(collectors.Vsphere(v.User, v.Password, v.Host))
	}
	for _, p := range conf.Process {
		check(collectors.AddProcessConfig(p))
	}
	for _, p := range conf.ProcessDotNet {
		check(collectors.AddProcessDotNetConfig(p))
	}
	for _, h := range conf.HTTPUnit {
		if h.TOML != "" {
			check(collectors.HTTPUnitTOML(h.TOML))
		}
		if h.Hiera != "" {
			check(collectors.HTTPUnitHiera(h.Hiera))
		}
	}
	for _, r := range conf.Riak {
		check(collectors.Riak(r.URL))
	}

	for _, x := range conf.ExtraHop {
		check(collectors.ExtraHop(x.Host, x.APIKey, x.FilterBy, x.FilterPercent))
	}

	if err != nil {
		slog.Fatal(err)
	}
	collectors.KeepalivedCommunity = conf.KeepalivedCommunity
	// Add all process collectors. This is platform specific.
	collectors.WatchProcesses()
	collectors.WatchProcessesDotNet()

	if *flagFake > 0 {
		collectors.InitFake(*flagFake)
	}
	collect.Debug = *flagDebug
	util.Debug = *flagDebug
	collect.DisableDefaultCollectors = conf.DisableSelf
	c := collectors.Search(conf.Filter)
	if len(c) == 0 {
		slog.Fatalf("Filter %v matches no collectors.", conf.Filter)
	}
	for _, col := range c {
		col.Init()
	}
	u, err := parseHost(conf.Host)
	if *flagList {
		list(c)
		return
	} else if *flagPrint {
		u = &url.URL{Scheme: "http", Host: "localhost:0"}
	} else if err != nil {
		slog.Fatalf("invalid host %v: %v", conf.Host, err)
	}
	freq := time.Second * time.Duration(conf.Freq)
	if freq <= 0 {
		slog.Fatal("freq must be > 0")
	}
	collectors.DefaultFreq = freq
	collect.Freq = freq
	if conf.BatchSize < 0 {
		slog.Fatal("BatchSize must be > 0")
	}
	if conf.BatchSize != 0 {
		collect.BatchSize = conf.BatchSize
	}
	collect.Tags = conf.Tags.Copy().Merge(opentsdb.TagSet{"os": runtime.GOOS})
	if *flagPrint {
		collect.Print = true
	}
	if !*flagDisableMetadata {
		if err := metadata.Init(u, *flagDebug); err != nil {
			slog.Fatal(err)
		}
	}
	cdp, cquit := collectors.Run(c)
	if u != nil {
		slog.Infoln("OpenTSDB host:", u)
	}
	if err := collect.InitChan(u, "scollector", cdp); err != nil {
		slog.Fatal(err)
	}
	if version.VersionDate != "" {
		v, err := strconv.ParseInt(version.VersionDate, 10, 64)
		if err == nil {
			go func() {
				metadata.AddMetricMeta("scollector.version", metadata.Gauge, metadata.None,
					"Scollector version number, which indicates when scollector was built.")
				for {
					if err := collect.Put("version", collect.Tags, v); err != nil {
						slog.Error(err)
					}
					time.Sleep(time.Hour)
				}
			}()
		}
	}
	if *flagBatchSize > 0 {
		collect.BatchSize = *flagBatchSize
	}
	go func() {
		const maxMem = 500 * 1024 * 1024 // 500MB
		var m runtime.MemStats
		for range time.Tick(time.Minute) {
			runtime.ReadMemStats(&m)
			if m.Alloc > maxMem {
				panic("memory max reached")
			}
		}
	}()
	sChan := make(chan os.Signal)
	signal.Notify(sChan, os.Interrupt)
	<-sChan
	close(cquit)
	// try to flush all datapoints on sigterm, but quit after 5 seconds no matter what.
	time.AfterFunc(5*time.Second, func() {
		os.Exit(0)
	})
	collect.Flush()
}
Example #18
0
// RestoreState restores notification and alert state from the file on disk.
func (s *Schedule) RestoreState() error {
	defer func() {
		bosunStartupTime = time.Now()
	}()
	slog.Infoln("RestoreState")
	start := time.Now()
	s.Lock("RestoreState")
	defer s.Unlock()
	s.Search.Lock()
	defer s.Search.Unlock()
	s.Notifications = nil
	decode := func(name string, dst interface{}) error {
		var data []byte
		err := s.db.View(func(tx *bolt.Tx) error {
			b := tx.Bucket([]byte(dbBucket))
			if b == nil {
				return fmt.Errorf("unknown bucket: %v", dbBucket)
			}
			data = b.Get([]byte(name))
			return nil
		})
		if err != nil {
			return err
		}
		gr, err := gzip.NewReader(bytes.NewReader(data))
		if err != nil {
			return err
		}
		defer gr.Close()
		return gob.NewDecoder(gr).Decode(dst)
	}
	if err := decode(dbMetadata, &s.Metadata); err != nil {
		slog.Errorln(dbMetadata, err)
	}
	if err := decode(dbMetricMetadata, &s.metricMetadata); err != nil {
		slog.Errorln(dbMetricMetadata, err)
	}
	for k, v := range s.Metadata {
		if k.Name == "desc" || k.Name == "rate" || k.Name == "unit" {
			s.PutMetadata(k, v.Value)
			delete(s.Metadata, k)
		}
	}
	if err := decode(dbMetric, &s.Search.Metric); err != nil {
		slog.Errorln(dbMetric, err)
	}
	if err := decode(dbTagk, &s.Search.Tagk); err != nil {
		slog.Errorln(dbTagk, err)
	}
	if err := decode(dbTagv, &s.Search.Tagv); err != nil {
		slog.Errorln(dbTagv, err)
	}
	if err := decode(dbMetricTags, &s.Search.MetricTags); err != nil {
		slog.Errorln(dbMetricTags, err)
	}
	notifications := make(map[expr.AlertKey]map[string]time.Time)
	if err := decode(dbNotifications, &notifications); err != nil {
		slog.Errorln(dbNotifications, err)
	}
	if err := decode(dbSilence, &s.Silence); err != nil {
		slog.Errorln(dbSilence, err)
	}
	if err := decode(dbIncidents, &s.Incidents); err != nil {
		slog.Errorln(dbIncidents, err)
	}
	if err := decode(dbErrors, &s.AlertStatuses); err != nil {
		slog.Errorln(dbErrors, err)
	}

	// Calculate next incident id.
	for _, i := range s.Incidents {
		if i.Id > s.maxIncidentId {
			s.maxIncidentId = i.Id
		}
	}
	status := make(States)
	if err := decode(dbStatus, &status); err != nil {
		slog.Errorln(dbStatus, err)
	}
	clear := func(r *Result) {
		if r == nil {
			return
		}
		r.Computations = nil
	}
	for ak, st := range status {
		a, present := s.Conf.Alerts[ak.Name()]
		if !present {
			slog.Errorln("sched: alert no longer present, ignoring:", ak)
			continue
		} else if s.Conf.Squelched(a, st.Group) {
			slog.Infoln("sched: alert now squelched:", ak)
			continue
		} else {
			t := a.Unknown
			if t == 0 {
				t = s.Conf.CheckFrequency
			}
			if t == 0 && st.Last().Status == StUnknown {
				st.Append(&Event{Status: StNormal, IncidentId: st.Last().IncidentId})
			}
		}
		clear(st.Result)
		newHistory := []Event{}
		for _, e := range st.History {
			clear(e.Warn)
			clear(e.Crit)
			// Remove error events which no longer are a thing.
			if e.Status <= StUnknown {
				newHistory = append(newHistory, e)
			}
		}
		st.History = newHistory
		s.status[ak] = st
		if a.Log && st.Open {
			st.Open = false
			slog.Infof("sched: alert %s is now log, closing, was %s", ak, st.Status())
		}
		for name, t := range notifications[ak] {
			n, present := s.Conf.Notifications[name]
			if !present {
				slog.Infoln("sched: notification not present during restore:", name)
				continue
			}
			if a.Log {
				slog.Infoln("sched: alert is now log, removing notification:", ak)
				continue
			}
			s.AddNotification(ak, n, t)
		}
	}
	if s.maxIncidentId == 0 {
		s.createHistoricIncidents()
	}

	s.Search.Copy()
	slog.Infoln("RestoreState done in", time.Since(start))
	return nil
}
Example #19
0
func main() {
	flag.Parse()
	if *flagToToml != "" {
		toToml(*flagToToml)
		fmt.Println("toml conversion complete; remove all empty values by hand (empty strings, 0)")
		return
	}
	if *flagPrint || *flagDebug {
		slog.Set(&slog.StdLog{Log: log.New(os.Stdout, "", log.LstdFlags)})
	}
	if *flagVersion {
		fmt.Println(version.GetVersionInfo("scollector"))
		os.Exit(0)
	}
	for _, m := range mains {
		m()
	}
	conf := readConf()
	ua := "Scollector/" + version.ShortVersion()
	if conf.UserAgentMessage != "" {
		ua += fmt.Sprintf(" (%s)", conf.UserAgentMessage)
	}
	client := &http.Client{
		Transport: &scollectorHTTPTransport{
			ua,
			&httpcontrol.Transport{
				RequestTimeout: time.Minute,
			},
		},
	}
	http.DefaultClient = client
	collect.DefaultClient = client
	if *flagHost != "" {
		conf.Host = *flagHost
	}
	if *flagNtlm {
		conf.UseNtlm = *flagNtlm
	}
	if *flagFilter != "" {
		conf.Filter = strings.Split(*flagFilter, ",")
	}
	if !conf.Tags.Valid() {
		slog.Fatalf("invalid tags: %v", conf.Tags)
	} else if conf.Tags["host"] != "" {
		slog.Fatalf("host not supported in custom tags, use Hostname instead")
	}
	if conf.PProf != "" {
		go func() {
			slog.Infof("Starting pprof at http://%s/debug/pprof/", conf.PProf)
			slog.Fatal(http.ListenAndServe(conf.PProf, nil))
		}()
	}
	collectors.AddTags = conf.Tags
	util.FullHostname = conf.FullHost
	util.Set()
	if conf.Hostname != "" {
		util.Hostname = conf.Hostname
	}
	if err := collect.SetHostname(util.Hostname); err != nil {
		slog.Fatal(err)
	}
	if conf.ColDir != "" {
		collectors.InitPrograms(conf.ColDir)
	}
	if conf.SNMPTimeout > 0 {
		snmp.Timeout = conf.SNMPTimeout
	}
	var err error
	check := func(e error) {
		if e != nil {
			err = e
		}
	}
	collectors.Init(conf)
	for _, r := range conf.MetricFilters {
		slog.Infof("Adding MetricFilter: %v\n", r)
		check(collectors.AddMetricFilters(r))
	}
	for _, rmq := range conf.RabbitMQ {
		check(collectors.RabbitMQ(rmq.URL))
	}
	for _, cfg := range conf.SNMP {
		check(collectors.SNMP(cfg, conf.MIBS))
	}
	for _, i := range conf.ICMP {
		check(collectors.ICMP(i.Host))
	}
	for _, a := range conf.AWS {
		check(collectors.AWS(a.AccessKey, a.SecretKey, a.Region, a.BillingProductCodesRegex, a.BillingBucketName, a.BillingBucketPath, a.BillingPurgeDays))
	}
	for _, ea := range conf.AzureEA {
		check(collectors.AzureEABilling(ea.EANumber, ea.APIKey, ea.LogBillingDetails))
	}
	for _, v := range conf.Vsphere {
		check(collectors.Vsphere(v.User, v.Password, v.Host))
	}
	for _, p := range conf.Process {
		check(collectors.AddProcessConfig(p))
	}
	for _, p := range conf.ProcessDotNet {
		check(collectors.AddProcessDotNetConfig(p))
	}
	for _, h := range conf.HTTPUnit {
		var freq time.Duration
		var parseerr error
		if h.Freq == "" {
			freq = time.Minute * 5
		} else {
			freq, parseerr = time.ParseDuration(h.Freq)
			if parseerr != nil {
				slog.Fatal(parseerr)
			}
			if freq < time.Second {
				slog.Fatalf("Invalid HTTPUnit frequency %s, cannot be less than 1 second.", h.Freq)
			}
		}
		if h.TOML != "" {
			check(collectors.HTTPUnitTOML(h.TOML, freq))
		}
		if h.Hiera != "" {
			check(collectors.HTTPUnitHiera(h.Hiera, freq))
		}
	}
	for _, r := range conf.Riak {
		check(collectors.Riak(r.URL))
	}

	for _, x := range conf.ExtraHop {
		check(collectors.ExtraHop(x.Host, x.APIKey, x.FilterBy, x.FilterPercent, x.AdditionalMetrics, x.CertificateSubjectMatch, x.CertificateActivityGroup))
	}

	if err != nil {
		slog.Fatal(err)
	}
	collectors.KeepalivedCommunity = conf.KeepalivedCommunity
	// Add all process collectors. This is platform specific.
	collectors.WatchProcesses()
	collectors.WatchProcessesDotNet()

	if *flagFake > 0 {
		collectors.InitFake(*flagFake)
	}
	collect.Debug = *flagDebug
	util.Debug = *flagDebug
	collect.DisableDefaultCollectors = conf.DisableSelf
	c := collectors.Search(conf.Filter)
	if len(c) == 0 {
		slog.Fatalf("Filter %v matches no collectors.", conf.Filter)
	}
	for _, col := range c {
		col.Init()
	}
	err = collectors.AddTagOverrides(c, conf.TagOverride)
	if err != nil {
		slog.Fatalf("Error adding tag overrides: %s", err)
	}
	u, err := parseHost(conf.Host)
	if *flagList {
		list(c)
		return
	} else if *flagPrint {
		u = &url.URL{Scheme: "http", Host: "localhost:0"}
	} else if err != nil {
		slog.Fatalf("invalid host %v: %v", conf.Host, err)
	}
	freq := time.Second * time.Duration(conf.Freq)
	if freq <= 0 {
		slog.Fatal("freq must be > 0")
	}
	collectors.DefaultFreq = freq
	collect.Freq = freq
	if conf.BatchSize < 0 {
		slog.Fatal("BatchSize must be > 0")
	}
	if conf.BatchSize != 0 {
		collect.BatchSize = conf.BatchSize
	}
	collect.Tags = conf.Tags.Copy().Merge(opentsdb.TagSet{"os": runtime.GOOS})
	if *flagPrint {
		collect.Print = true
	}
	if !*flagDisableMetadata {
		if err := metadata.Init(u, *flagDebug); err != nil {
			slog.Fatal(err)
		}
	}
	cdp, cquit := collectors.Run(c)
	if u != nil {
		slog.Infoln("OpenTSDB host:", u)
	}
	collect.UseNtlm = conf.UseNtlm
	if err := collect.InitChan(u, "scollector", cdp); err != nil {
		slog.Fatal(err)
	}
	if collect.DisableDefaultCollectors == false && version.VersionDate != "" {
		v, err := strconv.ParseInt(version.VersionDate, 10, 64)
		if err == nil {
			go func() {
				metadata.AddMetricMeta("scollector.version", metadata.Gauge, metadata.None,
					"Scollector version number, which indicates when scollector was built.")
				for {
					if err := collect.Put("version", collect.Tags, v); err != nil {
						slog.Error(err)
					}
					time.Sleep(time.Hour)
				}
			}()
		}
	}
	if *flagBatchSize > 0 {
		collect.BatchSize = *flagBatchSize
	}

	if conf.MaxQueueLen != 0 {
		if conf.MaxQueueLen < collect.BatchSize {
			slog.Fatalf("MaxQueueLen must be >= %d (BatchSize)", collect.BatchSize)
		}
		collect.MaxQueueLen = conf.MaxQueueLen
	}
	maxMemMB := uint64(500)
	if conf.MaxMem != 0 {
		maxMemMB = conf.MaxMem
	}
	go func() {
		var m runtime.MemStats
		for range time.Tick(time.Second * 30) {
			runtime.ReadMemStats(&m)
			allocMB := m.Alloc / 1024 / 1024
			if allocMB > maxMemMB {
				slog.Fatalf("memory max runtime reached: (current alloc: %v megabytes, max: %v megabytes)", allocMB, maxMemMB)
			}
			//See proccess_windows.go and process_linux.go for total process memory usage.
			//Note that in linux the rss metric includes shared pages, where as in
			//Windows the private working set does not include shared memory.
			//Total memory used seems to scale linerarly with m.Alloc.
			//But we want this to catch a memory leak outside the runtime (WMI/CGO).
			//So for now just add any runtime allocations to the allowed total limit.
			maxMemTotalMB := maxMemMB + allocMB
			if collectors.TotalScollectorMemoryMB > maxMemTotalMB {
				slog.Fatalf("memory max total reached: (current total: %v megabytes, current runtime alloc: %v megabytes, max: %v megabytes)", collectors.TotalScollectorMemoryMB, allocMB, maxMemTotalMB)
			}
		}
	}()
	sChan := make(chan os.Signal)
	signal.Notify(sChan, os.Interrupt)
	<-sChan
	close(cquit)
	// try to flush all datapoints on sigterm, but quit after 5 seconds no matter what.
	time.AfterFunc(5*time.Second, func() {
		os.Exit(0)
	})
	collect.Flush()
}
Example #20
0
// RestoreState restores notification and alert state from the file on disk.
func (s *Schedule) RestoreState() error {
	defer func() {
		bosunStartupTime = time.Now()
	}()
	slog.Infoln("RestoreState")
	start := time.Now()
	s.Lock("RestoreState")
	defer s.Unlock()
	s.Search.Lock()
	defer s.Search.Unlock()

	s.Notifications = nil
	db := s.db
	notifications := make(map[models.AlertKey]map[string]time.Time)
	if err := decode(db, dbNotifications, &notifications); err != nil {
		slog.Errorln(dbNotifications, err)
	}

	//status := make(States)
	//	if err := decode(db, dbStatus, &status); err != nil {
	//		slog.Errorln(dbStatus, err)
	//	}
	//	clear := func(r *models.Result) {
	//		if r == nil {
	//			return
	//		}
	//		r.Computations = nil
	//}
	//TODO: ???
	//	for ak, st := range status {
	//		a, present := s.Conf.Alerts[ak.Name()]
	//		if !present {
	//			slog.Errorln("sched: alert no longer present, ignoring:", ak)
	//			continue
	//		} else if s.Conf.Squelched(a, st.Group) {
	//			slog.Infoln("sched: alert now squelched:", ak)
	//			continue
	//		} else {
	//			t := a.Unknown
	//			if t == 0 {
	//				t = s.Conf.CheckFrequency
	//			}
	//			if t == 0 && st.Last().Status == StUnknown {
	//				st.Append(&Event{Status: StNormal, IncidentId: st.Last().IncidentId})
	//			}
	//		}
	//		clear(st.Result)
	//		newHistory := []Event{}
	//		for _, e := range st.History {
	//			clear(e.Warn)
	//			clear(e.Crit)
	//			// Remove error events which no longer are a thing.
	//			if e.Status <= StUnknown {
	//				newHistory = append(newHistory, e)
	//			}
	//		}
	//		st.History = newHistory
	//		s.status[ak] = st
	//		if a.Log && st.Open {
	//			st.Open = false
	//			slog.Infof("sched: alert %s is now log, closing, was %s", ak, st.Status())
	//		}
	//	for name, t := range notifications[ak] {
	//		n, present := s.Conf.Notifications[name]
	//		if !present {
	//			slog.Infoln("sched: notification not present during restore:", name)
	//			continue
	//		}
	//		if a.Log {
	//			slog.Infoln("sched: alert is now log, removing notification:", ak)
	//			continue
	//		}
	//		s.AddNotification(ak, n, t)
	//	}
	//}
	if err := migrateOldDataToRedis(db, s.DataAccess); err != nil {
		return err
	}
	// delete metrictags if they exist.
	deleteKey(s.db, "metrictags")
	slog.Infoln("RestoreState done in", time.Since(start))
	return nil
}
Example #21
0
func c_snmp_bridge(community, host string) (opentsdb.MultiDataPoint, error) {
	var md opentsdb.MultiDataPoint
	vlanRaw, err := snmp_subtree(host, community, vtpVlanState)
	if err != nil {
		return md, err
	}
	vlans := []string{}
	for vlan, state := range vlanRaw {
		Add(&md, "cisco.net.vlan_state", state, opentsdb.TagSet{"host": host, "vlan": vlan}, metadata.Gauge, metadata.StatusCode, "")
		vlans = append(vlans, vlan)
	}
	ifMacs := make(map[string][]string)
	for _, vlan := range vlans {
		// community string indexing: http://www.cisco.com/c/en/us/support/docs/ip/simple-network-management-protocol-snmp/40367-camsnmp40367.html
		macRaw, err := snmp_subtree(host, community+"@"+vlan, dot1dTpFdbAddress)
		if err != nil {
			slog.Infoln(err)
			// continue since it might just be the one vlan
			continue
		}
		remoteMacAddresses := make(map[string]string)
		for k, v := range macRaw {
			if ba, ok := v.([]byte); ok {
				remoteMacAddresses[k] = strings.ToUpper(hex.EncodeToString(ba))
			}
		}
		toPort := make(map[string]string)
		toPortRaw, err := snmp_subtree(host, community+"@"+vlan, dot1dTpFdbPort)
		if err != nil {
			slog.Infoln(err)
		}
		for k, v := range toPortRaw {
			toPort[k] = fmt.Sprintf("%v", v)
		}
		portToIfIndex := make(map[string]string)
		portToIfIndexRaw, err := snmp_subtree(host, community+"@"+vlan, dot1dBasePortIfIndex)
		for k, v := range portToIfIndexRaw {
			portToIfIndex[k] = fmt.Sprintf("%v", v)
		}
		if err != nil {
			slog.Infoln(err)
		}
		for port, mac := range remoteMacAddresses {
			if port, ok := toPort[port]; ok {
				if ifIndex, ok := portToIfIndex[port]; ok {
					if _, ok := ifMacs[ifIndex]; ok {
						ifMacs[ifIndex] = append(ifMacs[ifIndex], mac)
					} else {
						ifMacs[ifIndex] = []string{mac}
					}
				}
			}
		}
	}
	for iface, macs := range ifMacs {
		j, err := json.Marshal(macs)
		if err != nil {
			return md, nil
		}
		metadata.AddMeta("", opentsdb.TagSet{"host": host, "iface": iface}, "remoteMacs", string(j), false)
	}
	return md, nil
}
Example #22
0
// CheckNotifications processes past notification events. It returns the
// duration until the soonest notification triggers.
func (s *Schedule) CheckNotifications() time.Duration {
	silenced := s.Silenced()
	s.Lock("CheckNotifications")
	defer s.Unlock()
	notifications := s.Notifications
	s.Notifications = nil
	for ak, ns := range notifications {
		if si := silenced(ak); si != nil {
			slog.Infoln("silencing", ak)
			continue
		}
		for name, t := range ns {
			n, present := s.Conf.Notifications[name]
			if !present {
				continue
			}
			remaining := t.Add(n.Timeout).Sub(time.Now())
			if remaining > 0 {
				s.AddNotification(ak, n, t)
				continue
			}

			//If alert is currently unevaluated because of a dependency,
			//simply requeue it until the dependency resolves itself.
			_, uneval := s.GetUnknownAndUnevaluatedAlertKeys(ak.Name())
			unevaluated := false
			for _, un := range uneval {
				if un == ak {
					unevaluated = true
					break
				}
			}
			if unevaluated {
				s.AddNotification(ak, n, t)
				continue
			}
			st, err := s.DataAccess.State().GetLatestIncident(ak)
			if err != nil {
				slog.Error(err)
				continue
			}
			if st == nil {
				continue
			}

			s.Notify(st, n)
		}
	}
	s.sendNotifications(silenced)
	s.pendingNotifications = nil
	timeout := time.Hour
	now := time.Now()
	for _, ns := range s.Notifications {
		for name, t := range ns {
			n, present := s.Conf.Notifications[name]
			if !present {
				continue
			}
			remaining := t.Add(n.Timeout).Sub(now)
			if remaining < timeout {
				timeout = remaining
			}
		}
	}
	return timeout
}
Example #23
0
// RestoreState restores notification and alert state from the file on disk.
func (s *Schedule) RestoreState() error {
	defer func() {
		bosunStartupTime = time.Now()
	}()
	slog.Infoln("RestoreState")
	start := time.Now()
	s.Lock("RestoreState")
	defer s.Unlock()
	s.Search.Lock()
	defer s.Search.Unlock()

	s.Notifications = nil
	db := s.db
	notifications := make(map[expr.AlertKey]map[string]time.Time)
	if err := decode(db, dbNotifications, &notifications); err != nil {
		slog.Errorln(dbNotifications, err)
	}
	if err := decode(db, dbSilence, &s.Silence); err != nil {
		slog.Errorln(dbSilence, err)
	}
	if err := decode(db, dbIncidents, &s.Incidents); err != nil {
		slog.Errorln(dbIncidents, err)
	}

	// Calculate next incident id.
	for _, i := range s.Incidents {
		if i.Id > s.maxIncidentId {
			s.maxIncidentId = i.Id
		}
	}
	status := make(States)
	if err := decode(db, dbStatus, &status); err != nil {
		slog.Errorln(dbStatus, err)
	}
	clear := func(r *Result) {
		if r == nil {
			return
		}
		r.Computations = nil
	}
	for ak, st := range status {
		a, present := s.Conf.Alerts[ak.Name()]
		if !present {
			slog.Errorln("sched: alert no longer present, ignoring:", ak)
			continue
		} else if s.Conf.Squelched(a, st.Group) {
			slog.Infoln("sched: alert now squelched:", ak)
			continue
		} else {
			t := a.Unknown
			if t == 0 {
				t = s.Conf.CheckFrequency
			}
			if t == 0 && st.Last().Status == StUnknown {
				st.Append(&Event{Status: StNormal, IncidentId: st.Last().IncidentId})
			}
		}
		clear(st.Result)
		newHistory := []Event{}
		for _, e := range st.History {
			clear(e.Warn)
			clear(e.Crit)
			// Remove error events which no longer are a thing.
			if e.Status <= StUnknown {
				newHistory = append(newHistory, e)
			}
		}
		st.History = newHistory
		s.status[ak] = st
		if a.Log && st.Open {
			st.Open = false
			slog.Infof("sched: alert %s is now log, closing, was %s", ak, st.Status())
		}
		for name, t := range notifications[ak] {
			n, present := s.Conf.Notifications[name]
			if !present {
				slog.Infoln("sched: notification not present during restore:", name)
				continue
			}
			if a.Log {
				slog.Infoln("sched: alert is now log, removing notification:", ak)
				continue
			}
			s.AddNotification(ak, n, t)
		}
	}
	if s.maxIncidentId == 0 {
		s.createHistoricIncidents()
	}
	migrateOldDataToRedis(db, s.DataAccess)
	// delete metrictags if they exist.
	deleteKey(s.db, "metrictags")
	slog.Infoln("RestoreState done in", time.Since(start))
	return nil
}
Example #24
0
func main() {
	flag.Parse()
	if *flagToToml != "" {
		toToml(*flagToToml)
		fmt.Println("toml conversion complete; remove all empty values by hand (empty strings, 0)")
		return
	}
	if *flagPrint || *flagDebug {
		slog.Set(&slog.StdLog{Log: log.New(os.Stdout, "", log.LstdFlags)})
	}
	if *flagVersion {
		fmt.Println(version.GetVersionInfo("scollector"))
		os.Exit(0)
	}
	for _, m := range mains {
		m()
	}
	conf := readConf()
	if *flagHost != "" {
		conf.Host = *flagHost
	}
	if *flagFilter != "" {
		conf.Filter = strings.Split(*flagFilter, ",")
	}
	if !conf.Tags.Valid() {
		slog.Fatalf("invalid tags: %v", conf.Tags)
	} else if conf.Tags["host"] != "" {
		slog.Fatalf("host not supported in custom tags, use Hostname instead")
	}
	collectors.AddTags = conf.Tags
	util.FullHostname = conf.FullHost
	util.Set()
	if conf.Hostname != "" {
		util.Hostname = conf.Hostname
		if err := collect.SetHostname(conf.Hostname); err != nil {
			slog.Fatal(err)
		}
	}
	if conf.ColDir != "" {
		collectors.InitPrograms(conf.ColDir)
	}
	var err error
	check := func(e error) {
		if e != nil {
			err = e
		}
	}
	for _, h := range conf.HAProxy {
		for _, i := range h.Instances {
			collectors.HAProxy(h.User, h.Password, i.Tier, i.URL)
		}
	}
	for _, s := range conf.SNMP {
		check(collectors.SNMP(s.Community, s.Host))
	}
	for _, i := range conf.ICMP {
		check(collectors.ICMP(i.Host))
	}
	for _, a := range conf.AWS {
		check(collectors.AWS(a.AccessKey, a.SecretKey, a.Region))
	}
	for _, v := range conf.Vsphere {
		check(collectors.Vsphere(v.User, v.Password, v.Host))
	}
	for _, p := range conf.Process {
		check(collectors.AddProcessConfig(p))
	}
	for _, h := range conf.HTTPUnit {
		if h.TOML != "" {
			check(collectors.HTTPUnitTOML(h.TOML))
		}
		if h.Hiera != "" {
			check(collectors.HTTPUnitHiera(h.Hiera))
		}
	}
	if err != nil {
		slog.Fatal(err)
	}
	collectors.KeepalivedCommunity = conf.KeepalivedCommunity
	// Add all process collectors. This is platform specific.
	collectors.WatchProcesses()
	collectors.WatchProcessesDotNet()

	if *flagFake > 0 {
		collectors.InitFake(*flagFake)
	}
	collect.Debug = *flagDebug
	util.Debug = *flagDebug
	collect.DisableDefaultCollectors = conf.DisableSelf
	c := collectors.Search(conf.Filter)
	if len(c) == 0 {
		slog.Fatalf("Filter %v matches no collectors.", conf.Filter)
	}
	for _, col := range c {
		col.Init()
	}
	u, err := parseHost(conf.Host)
	if *flagList {
		list(c)
		return
	} else if err != nil {
		slog.Fatalf("invalid host %v: %v", conf.Host, err)
	}
	freq := time.Second * time.Duration(conf.Freq)
	if freq <= 0 {
		slog.Fatal("freq must be > 0")
	}
	collectors.DefaultFreq = freq
	collect.Freq = freq
	collect.Tags = opentsdb.TagSet{"os": runtime.GOOS}
	if *flagPrint {
		collect.Print = true
	}
	if !*flagDisableMetadata {
		if err := metadata.Init(u, *flagDebug); err != nil {
			slog.Fatal(err)
		}
	}
	cdp := collectors.Run(c)
	if u != nil {
		slog.Infoln("OpenTSDB host:", u)
	}
	if err := collect.InitChan(u, "scollector", cdp); err != nil {
		slog.Fatal(err)
	}

	if version.VersionDate != "" {
		v, err := strconv.ParseInt(version.VersionDate, 10, 64)
		if err == nil {
			go func() {
				metadata.AddMetricMeta("scollector.version", metadata.Gauge, metadata.None,
					"Scollector version number, which indicates when scollector was built.")
				for {
					if err := collect.Put("version", collect.Tags, v); err != nil {
						slog.Error(err)
					}
					time.Sleep(time.Hour)
				}
			}()
		}
	}
	if *flagBatchSize > 0 {
		collect.BatchSize = *flagBatchSize
	}
	go func() {
		const maxMem = 500 * 1024 * 1024 // 500MB
		var m runtime.MemStats
		for range time.Tick(time.Minute) {
			runtime.ReadMemStats(&m)
			if m.Alloc > maxMem {
				panic("memory max reached")
			}
		}
	}()
	select {}
}
Example #25
0
func Listen(listenAddr string, devMode bool, tsdbHost string, reloadFunc func() error) error {
	if devMode {
		slog.Infoln("using local web assets")
	}
	webFS := FS(devMode)

	indexTemplate = func() *template.Template {
		str := FSMustString(devMode, "/templates/index.html")
		templates, err := template.New("").Parse(str)
		if err != nil {
			slog.Fatal(err)
		}
		return templates
	}

	reload = reloadFunc

	if !devMode {
		tpl := indexTemplate()
		indexTemplate = func() *template.Template {
			return tpl
		}
	}

	if tsdbHost != "" {
		router.HandleFunc("/api/index", IndexTSDB)
		router.Handle("/api/put", Relay(tsdbHost))
	}

	router.HandleFunc("/api/", APIRedirect)
	router.Handle("/api/action", JSON(Action))
	router.Handle("/api/alerts", JSON(Alerts))
	router.Handle("/api/config", miniprofiler.NewHandler(Config))
	router.Handle("/api/config_test", miniprofiler.NewHandler(ConfigTest))
	router.Handle("/api/save_enabled", JSON(SaveEnabled))
	if schedule.SystemConf.ReloadEnabled() { // Is true of save is enabled
		router.Handle("/api/reload", JSON(Reload)).Methods(http.MethodPost)
	}
	if schedule.SystemConf.SaveEnabled() {
		router.Handle("/api/config/bulkedit", miniprofiler.NewHandler(BulkEdit)).Methods(http.MethodPost)
		router.Handle("/api/config/save", miniprofiler.NewHandler(SaveConfig)).Methods(http.MethodPost)
		router.Handle("/api/config/diff", miniprofiler.NewHandler(DiffConfig)).Methods(http.MethodPost)
		router.Handle("/api/config/running_hash", JSON(ConfigRunningHash))
	}
	router.Handle("/api/egraph/{bs}.{format:svg|png}", JSON(ExprGraph))
	router.Handle("/api/errors", JSON(ErrorHistory))
	router.Handle("/api/expr", JSON(Expr))
	router.Handle("/api/graph", JSON(Graph))
	router.Handle("/api/health", JSON(HealthCheck))
	router.Handle("/api/host", JSON(Host))
	router.Handle("/api/last", JSON(Last))
	router.Handle("/api/quiet", JSON(Quiet))
	router.Handle("/api/incidents", JSON(Incidents))
	router.Handle("/api/incidents/open", JSON(ListOpenIncidents))
	router.Handle("/api/incidents/events", JSON(IncidentEvents))
	router.Handle("/api/metadata/get", JSON(GetMetadata))
	router.Handle("/api/metadata/metrics", JSON(MetadataMetrics))
	router.Handle("/api/metadata/put", JSON(PutMetadata))
	router.Handle("/api/metadata/delete", JSON(DeleteMetadata)).Methods("DELETE")
	router.Handle("/api/metric", JSON(UniqueMetrics))
	router.Handle("/api/metric/{tagk}", JSON(MetricsByTagKey))
	router.Handle("/api/metric/{tagk}/{tagv}", JSON(MetricsByTagPair))
	router.Handle("/api/rule", JSON(Rule))
	router.HandleFunc("/api/shorten", Shorten)
	router.Handle("/api/silence/clear", JSON(SilenceClear))
	router.Handle("/api/silence/get", JSON(SilenceGet))
	router.Handle("/api/silence/set", JSON(SilenceSet))
	router.Handle("/api/status", JSON(Status))
	router.Handle("/api/tagk/{metric}", JSON(TagKeysByMetric))
	router.Handle("/api/tagv/{tagk}", JSON(TagValuesByTagKey))
	router.Handle("/api/tagv/{tagk}/{metric}", JSON(TagValuesByMetricTagKey))
	router.Handle("/api/tagsets/{metric}", JSON(FilteredTagsetsByMetric))
	router.Handle("/api/opentsdb/version", JSON(OpenTSDBVersion))
	router.Handle("/api/annotate", JSON(AnnotateEnabled))

	// Annotations
	if schedule.SystemConf.AnnotateEnabled() {
		index := schedule.SystemConf.GetAnnotateIndex()
		if index == "" {
			index = "annotate"
		}
		annotateBackend = backend.NewElastic(schedule.SystemConf.GetAnnotateElasticHosts(), index)

		go func() {
			for {
				err := annotateBackend.InitBackend()
				if err == nil {
					return
				}
				slog.Warningf("could not initalize annotate backend, will try again: %v", err)
				time.Sleep(time.Second * 30)
			}
		}()
		web.AddRoutes(router, "/api", []backend.Backend{annotateBackend}, false, false)

	}

	router.HandleFunc("/api/version", Version)
	router.Handle("/api/debug/schedlock", JSON(ScheduleLockStatus))
	http.Handle("/", miniprofiler.NewHandler(Index))
	http.Handle("/api/", router)
	fs := http.FileServer(webFS)
	http.Handle("/partials/", fs)
	http.Handle("/static/", http.StripPrefix("/static/", fs))
	http.Handle("/favicon.ico", fs)
	slog.Infoln("bosun web listening on:", listenAddr)
	slog.Infoln("tsdb host:", tsdbHost)
	return http.ListenAndServe(listenAddr, nil)
}
Example #26
0
File: web.go Project: eswdd/bosun
func Listen(listenAddr string, devMode bool, tsdbHost string) error {
	if devMode {
		slog.Infoln("using local web assets")
	}
	webFS := FS(devMode)

	indexTemplate = func() *template.Template {
		str := FSMustString(devMode, "/templates/index.html")
		templates, err := template.New("").Parse(str)
		if err != nil {
			slog.Fatal(err)
		}
		return templates
	}

	if !devMode {
		tpl := indexTemplate()
		indexTemplate = func() *template.Template {
			return tpl
		}
	}

	if tsdbHost != "" {
		router.HandleFunc("/api/index", IndexTSDB)
		router.Handle("/api/put", Relay(tsdbHost))
	}
	router.HandleFunc("/api/", APIRedirect)
	router.Handle("/api/action", JSON(Action))
	router.Handle("/api/alerts", JSON(Alerts))
	router.Handle("/api/backup", JSON(Backup))
	router.Handle("/api/config", miniprofiler.NewHandler(Config))
	router.Handle("/api/config_test", miniprofiler.NewHandler(ConfigTest))
	router.Handle("/api/egraph/{bs}.svg", JSON(ExprGraph))
	router.Handle("/api/errors", JSON(ErrorHistory))
	router.Handle("/api/expr", JSON(Expr))
	router.Handle("/api/graph", JSON(Graph))
	router.Handle("/api/health", JSON(HealthCheck))
	router.Handle("/api/host", JSON(Host))
	router.Handle("/api/last", JSON(Last))
	router.Handle("/api/incidents", JSON(Incidents))
	router.Handle("/api/incidents/events", JSON(IncidentEvents))
	router.Handle("/api/metadata/get", JSON(GetMetadata))
	router.Handle("/api/metadata/metrics", JSON(MetadataMetrics))
	router.Handle("/api/metadata/put", JSON(PutMetadata))
	router.Handle("/api/metadata/delete", JSON(DeleteMetadata)).Methods("DELETE")
	router.Handle("/api/metric", JSON(UniqueMetrics))
	router.Handle("/api/metric/{tagk}/{tagv}", JSON(MetricsByTagPair))
	router.Handle("/api/rule", JSON(Rule))
	router.HandleFunc("/api/shorten", Shorten)
	router.Handle("/api/silence/clear", JSON(SilenceClear))
	router.Handle("/api/silence/get", JSON(SilenceGet))
	router.Handle("/api/silence/set", JSON(SilenceSet))
	router.Handle("/api/status", JSON(Status))
	router.Handle("/api/tagk/{metric}", JSON(TagKeysByMetric))
	router.Handle("/api/tagv/{tagk}", JSON(TagValuesByTagKey))
	router.Handle("/api/tagv/{tagk}/{metric}", JSON(TagValuesByMetricTagKey))
	router.Handle("/api/tagsets/{metric}", JSON(FilteredTagsetsByMetric))
	router.Handle("/api/opentsdb/version", JSON(OpenTSDBVersion))
	router.HandleFunc("/api/version", Version)
	router.Handle("/api/debug/schedlock", JSON(ScheduleLockStatus))
	http.Handle("/", miniprofiler.NewHandler(Index))
	http.Handle("/api/", router)
	fs := http.FileServer(webFS)
	http.Handle("/partials/", fs)
	http.Handle("/static/", http.StripPrefix("/static/", fs))
	http.Handle("/favicon.ico", fs)
	slog.Infoln("bosun web listening on:", listenAddr)
	slog.Infoln("tsdb host:", tsdbHost)
	return http.ListenAndServe(listenAddr, nil)
}
Example #27
0
func (n *Notification) DoPrint(payload string) {
	slog.Infoln(payload)
}
Example #28
0
func main() {
	flag.Parse()
	if *flagVersion {
		fmt.Println(version.GetVersionInfo("bosun"))
		os.Exit(0)
	}
	for _, m := range mains {
		m()
	}
	systemConf, err := conf.LoadSystemConfigFile(*flagConf)
	if err != nil {
		slog.Fatalf("couldn't read system configuration: %v", err)
	}
	sysProvider, err := systemConf.GetSystemConfProvider()
	if err != nil {
		slog.Fatal(err)
	}
	ruleConf, err := rule.ParseFile(sysProvider.GetRuleFilePath(), systemConf.EnabledBackends())
	if err != nil {
		slog.Fatalf("couldn't read rules: %v", err)
	}
	if *flagTest {
		os.Exit(0)
	}
	var ruleProvider conf.RuleConfProvider = ruleConf
	httpListen := &url.URL{
		Scheme: "http",
		Host:   sysProvider.GetHTTPListen(),
	}
	if strings.HasPrefix(httpListen.Host, ":") {
		httpListen.Host = "localhost" + httpListen.Host
	}
	if err := metadata.Init(httpListen, false); err != nil {
		slog.Fatal(err)
	}
	if err := sched.Load(sysProvider, ruleProvider, *flagSkipLast, *flagQuiet); err != nil {
		slog.Fatal(err)
	}
	if sysProvider.GetRelayListen() != "" {
		go func() {
			mux := http.NewServeMux()
			mux.Handle("/api/", util.NewSingleHostProxy(httpListen))
			s := &http.Server{
				Addr:    sysProvider.GetRelayListen(),
				Handler: mux,
			}
			slog.Fatal(s.ListenAndServe())
		}()
	}
	if sysProvider.GetTSDBHost() != "" {
		if err := collect.Init(httpListen, "bosun"); err != nil {
			slog.Fatal(err)
		}
		tsdbHost := &url.URL{
			Scheme: "http",
			Host:   sysProvider.GetTSDBHost(),
		}
		if *flagReadonly {
			rp := util.NewSingleHostProxy(tsdbHost)
			ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
				if r.URL.Path == "/api/put" {
					w.WriteHeader(204)
					return
				}
				rp.ServeHTTP(w, r)
			}))
			slog.Infoln("readonly relay at", ts.URL, "to", tsdbHost)
			tsdbHost, _ = url.Parse(ts.URL)
			sysProvider.SetTSDBHost(tsdbHost.Host)
		}
	}
	if systemConf.GetPing() {
		go ping.PingHosts(sched.DefaultSched.Search, systemConf.GetPingDuration())
	}
	if sysProvider.GetInternetProxy() != "" {
		web.InternetProxy, err = url.Parse(sysProvider.GetInternetProxy())
		if err != nil {
			slog.Fatalf("InternetProxy error: %s", err)
		}
	}
	var cmdHook conf.SaveHook
	if hookPath := sysProvider.GetCommandHookPath(); hookPath != "" {
		cmdHook, err = conf.MakeSaveCommandHook(hookPath)
		if err != nil {
			slog.Fatal(err)
		}
		ruleProvider.SetSaveHook(cmdHook)
	}
	var reload func() error
	reloading := make(chan bool, 1) // a lock that we can give up acquiring
	reload = func() error {
		select {
		case reloading <- true:
			// Got lock
		default:
			return fmt.Errorf("not reloading, reload in progress")
		}
		defer func() {
			<-reloading
		}()
		newConf, err := rule.ParseFile(sysProvider.GetRuleFilePath(), sysProvider.EnabledBackends())
		if err != nil {
			return err
		}
		newConf.SetSaveHook(cmdHook)
		newConf.SetReload(reload)
		oldSched := sched.DefaultSched
		oldDA := oldSched.DataAccess
		oldSearch := oldSched.Search
		sched.Close(true)
		sched.Reset()
		newSched := sched.DefaultSched
		newSched.Search = oldSearch
		newSched.DataAccess = oldDA
		slog.Infoln("schedule shutdown, loading new schedule")

		// Load does not set the DataAccess or Search if it is already set
		if err := sched.Load(sysProvider, newConf, *flagSkipLast, *flagQuiet); err != nil {
			slog.Fatal(err)
		}
		web.ResetSchedule() // Signal web to point to the new DefaultSchedule
		go func() {
			slog.Infoln("running new schedule")
			if !*flagNoChecks {
				sched.Run()
			}
		}()
		slog.Infoln("config reload complete")
		return nil
	}

	ruleProvider.SetReload(reload)

	go func() {
		slog.Fatal(web.Listen(sysProvider.GetHTTPListen(), *flagDev, sysProvider.GetTSDBHost(), reload))
	}()
	go func() {
		if !*flagNoChecks {
			sched.Run()
		}
	}()

	go func() {
		sc := make(chan os.Signal, 1)
		signal.Notify(sc, os.Interrupt, syscall.SIGTERM)
		killing := false
		for range sc {
			if killing {
				slog.Infoln("Second interrupt: exiting")
				os.Exit(1)
			}
			killing = true
			go func() {
				slog.Infoln("Interrupt: closing down...")
				sched.Close(false)
				slog.Infoln("done")
				os.Exit(1)
			}()
		}
	}()

	if *flagWatch {
		watch(".", "*.go", quit)
		watch(filepath.Join("web", "static", "templates"), "*.html", web.RunEsc)
		base := filepath.Join("web", "static", "js")
		watch(base, "*.ts", web.RunTsc)
	}
	select {}
}
Example #29
0
func (n *Notification) DoPrint(subject string) {
	slog.Infoln(subject)
}