func (s *Schedule) sendNotifications(silenced map[models.AlertKey]models.Silence) { if s.Conf.Quiet { slog.Infoln("quiet mode prevented", len(s.pendingNotifications), "notifications") return } for n, states := range s.pendingNotifications { for _, st := range states { ak := st.AlertKey() _, silenced := silenced[ak] if st.Last().Status == StUnknown { if silenced { slog.Infoln("silencing unknown", ak) continue } s.pendingUnknowns[n] = append(s.pendingUnknowns[n], st) } else if silenced { slog.Infoln("silencing", ak) } else { s.notify(st, n) } if n.Next != nil { s.AddNotification(ak, n.Next, time.Now().UTC()) } } } }
func (s *Schedule) sendNotifications(silenced SilenceTester) { if s.Conf.Quiet { slog.Infoln("quiet mode prevented", len(s.pendingNotifications), "notifications") return } for n, states := range s.pendingNotifications { for _, st := range states { ak := st.AlertKey silenced := silenced(ak) != nil if st.CurrentStatus == models.StUnknown { if silenced { slog.Infoln("silencing unknown", ak) continue } s.pendingUnknowns[n] = append(s.pendingUnknowns[n], st) } else if silenced { slog.Infoln("silencing", ak) } else { s.notify(st, n) } if n.Next != nil { s.QueueNotification(ak, n.Next, utcNow()) } } } }
func (s *Schedule) save() { if s.db == nil { return } s.Lock("Save") store := map[string]interface{}{ dbMetric: s.Search.Read.Metric, dbTagk: s.Search.Read.Tagk, dbTagv: s.Search.Read.Tagv, dbMetricTags: s.Search.Read.MetricTags, dbNotifications: s.Notifications, dbSilence: s.Silence, dbStatus: s.status, dbMetadata: s.Metadata, dbIncidents: s.Incidents, } tostore := make(map[string][]byte) for name, data := range store { f := new(bytes.Buffer) gz := gzip.NewWriter(f) cw := &counterWriter{w: gz} enc := gob.NewEncoder(cw) if err := enc.Encode(data); err != nil { slog.Errorf("error saving %s: %v", name, err) s.Unlock() return } if err := gz.Flush(); err != nil { slog.Errorf("gzip flush error saving %s: %v", name, err) } if err := gz.Close(); err != nil { slog.Errorf("gzip close error saving %s: %v", name, err) } tostore[name] = f.Bytes() slog.Infof("wrote %s: %v", name, conf.ByteSize(cw.written)) collect.Put("statefile.size", opentsdb.TagSet{"object": name}, cw.written) } s.Unlock() err := s.db.Update(func(tx *bolt.Tx) error { b, err := tx.CreateBucketIfNotExists([]byte(dbBucket)) if err != nil { return err } for name, data := range tostore { if err := b.Put([]byte(name), data); err != nil { return err } } return nil }) if err != nil { slog.Errorf("save db update error: %v", err) return } fi, err := os.Stat(s.Conf.StateFile) if err == nil { collect.Put("statefile.size", opentsdb.TagSet{"object": "total"}, fi.Size()) } slog.Infoln("save to db complete") }
// CheckNotifications processes past notification events. It returns the next time a notification is needed. func (s *Schedule) CheckNotifications() time.Time { silenced := s.Silenced() s.Lock("CheckNotifications") defer s.Unlock() latestTime := utcNow() notifications, err := s.DataAccess.Notifications().GetDueNotifications() if err != nil { slog.Error("Error getting notifications", err) return utcNow().Add(time.Minute) } for ak, ns := range notifications { if si := silenced(ak); si != nil { slog.Infoln("silencing", ak) continue } for name, t := range ns { n := s.RuleConf.GetNotification(name) if n == nil { continue } //If alert is currently unevaluated because of a dependency, //simply requeue it until the dependency resolves itself. _, uneval := s.GetUnknownAndUnevaluatedAlertKeys(ak.Name()) unevaluated := false for _, un := range uneval { if un == ak { unevaluated = true break } } if unevaluated { s.QueueNotification(ak, n, t.Add(time.Minute)) continue } st, err := s.DataAccess.State().GetLatestIncident(ak) if err != nil { slog.Error(err) continue } if st == nil { continue } s.Notify(st, n) } } s.sendNotifications(silenced) s.pendingNotifications = nil err = s.DataAccess.Notifications().ClearNotificationsBefore(latestTime) if err != nil { slog.Error("Error clearing notifications", err) return utcNow().Add(time.Minute) } timeout, err := s.DataAccess.Notifications().GetNextNotificationTime() if err != nil { slog.Error("Error getting next notification time", err) return utcNow().Add(time.Minute) } return timeout }
func recordSent(num int) { if Debug { slog.Infoln("sent", num) } slock.Lock() sent += int64(num) slock.Unlock() }
func (c *ProgramCollector) Run(dpchan chan<- *opentsdb.DataPoint) { if c.Interval == 0 { for { next := time.After(DefaultFreq) if err := c.runProgram(dpchan); err != nil { slog.Infoln(err) } <-next slog.Infoln("restarting", c.Path) } } else { for { next := time.After(c.Interval) c.runProgram(dpchan) <-next } } }
// RestoreState restores notification and alert state from the file on disk. func (s *Schedule) RestoreState() error { defer func() { bosunStartupTime = utcNow() }() slog.Infoln("RestoreState") start := utcNow() s.Lock("RestoreState") defer s.Unlock() s.Search.Lock() defer s.Search.Unlock() if err := migrateOldDataToRedis(s.db, s.DataAccess, s); err != nil { return err } // delete metrictags if they exist. deleteKey(s.db, "metrictags") slog.Infoln("RestoreState done in", time.Since(start)) return nil }
func InitPrograms(cpath string) { cdir, err := os.Open(cpath) if err != nil { slog.Infoln(err) return } idirs, err := cdir.Readdir(0) if err != nil { slog.Infoln(err) return } for _, idir := range idirs { idirname := idir.Name() i, err := strconv.Atoi(idirname) if err != nil || i < 0 { if idirname != "etc" && idirname != "lib" { slog.Infoln("invalid collector folder name:", idirname) } continue } interval := time.Second * time.Duration(i) dir, err := os.Open(filepath.Join(cdir.Name(), idirname)) if err != nil { slog.Infoln(err) continue } files, err := dir.Readdir(0) if err != nil { slog.Infoln(err) continue } for _, file := range files { if !isExecutable(file) { continue } collectors = append(collectors, &ProgramCollector{ Path: filepath.Join(dir.Name(), file.Name()), Interval: interval, }) } } }
// CheckNotifications processes past notification events. It returns the // duration until the soonest notification triggers. func (s *Schedule) CheckNotifications() time.Duration { silenced := s.Silenced() s.Lock("CheckNotifications") defer s.Unlock() notifications := s.Notifications s.Notifications = nil for ak, ns := range notifications { if _, present := silenced[ak]; present { slog.Infoln("silencing", ak) continue } for name, t := range ns { n, present := s.Conf.Notifications[name] if !present { continue } remaining := t.Add(n.Timeout).Sub(time.Now()) if remaining > 0 { s.AddNotification(ak, n, t) continue } st := s.status[ak] if st == nil { continue } // If alert is currently unevaluated because of a dependency, // simply requeue it until the dependency resolves itself. if st.Unevaluated { s.AddNotification(ak, n, t) continue } s.Notify(st, n) } } s.sendNotifications(silenced) s.pendingNotifications = nil timeout := time.Hour now := time.Now() for _, ns := range s.Notifications { for name, t := range ns { n, present := s.Conf.Notifications[name] if !present { continue } remaining := t.Add(n.Timeout).Sub(now) if remaining < timeout { timeout = remaining } } } return timeout }
func (s *Schedule) unotify(name string, group models.AlertKeys, n *conf.Notification) { subject := new(bytes.Buffer) body := new(bytes.Buffer) now := utcNow() s.Group[now] = group t := s.RuleConf.GetUnknownTemplate() if t == nil { t = defaultUnknownTemplate } data := s.unknownData(now, name, group) if t.Body != nil { if err := t.Body.Execute(body, &data); err != nil { slog.Infoln("unknown template error:", err) } } if t.Subject != nil { if err := t.Subject.Execute(subject, &data); err != nil { slog.Infoln("unknown template error:", err) } } n.Notify(subject.String(), body.String(), subject.Bytes(), body.Bytes(), s.SystemConf, name) }
func (c *ProgramCollector) Run(dpchan chan<- *opentsdb.DataPoint, quit <-chan struct{}) { if c.Interval == 0 { for { next := time.After(DefaultFreq) if err := c.runProgram(dpchan); err != nil { slog.Infoln(err) } <-next slog.Infoln("restarting", c.Path) } } else { for { next := time.After(c.Interval) c.runProgram(dpchan) select { case <-next: case <-quit: return } } } }
func (s *Schedule) sendNotifications(silenced SilenceTester) { if s.quiet { slog.Infoln("quiet mode prevented", len(s.pendingNotifications), "notifications") return } for n, states := range s.pendingNotifications { for _, st := range states { ak := st.AlertKey alert := s.RuleConf.GetAlert(ak.Name()) if alert == nil { continue } silenced := silenced(ak) != nil if st.CurrentStatus == models.StUnknown { if silenced { slog.Infoln("silencing unknown", ak) continue } s.pendingUnknowns[n] = append(s.pendingUnknowns[n], st) } else if silenced { slog.Infof("silencing %s", ak) continue } else if !alert.Log && (!st.Open || !st.NeedAck) { slog.Errorf("Cannot notify acked or closed alert %s. Clearing.", ak) if err := s.DataAccess.Notifications().ClearNotifications(ak); err != nil { slog.Error(err) } continue } else { s.notify(st, n) } if n.Next != nil { s.QueueNotification(ak, n.Next, utcNow()) } } } }
//If there is Route53 data for this row, then populate the Route 53 item func (b *billLineItem) fetchR53(awsBilling *awsBillingConfig) { if b.ProductCode == "AmazonRoute53" { //Don't do anything if we don't have any R53 info to get zoneID := strings.Split(b.ResourceID, "/")[1] //The billing ID has a huge resource ID, we only need the last part of it cachedR53Zone, ok := awsBillingR53zones[zoneID] //Check if we have a copy of this zone in our local cache if ok { //If we have a copy of the zone, then use that b.Route53Zone = cachedR53Zone.HostedZone return } //Otherwise we need to fetch it from Route 53 thisR53, fetchErr := awsBilling.r53svc.GetHostedZone(&route53.GetHostedZoneInput{ Id: aws.String(zoneID), }) if fetchErr != nil { slog.Infoln("Cannot fetch Route53 hosted zone", b.ResourceID, fetchErr) } awsBillingR53zones[zoneID] = *thisR53 //Store the fetched zone in the cache b.Route53Zone = thisR53.HostedZone //And assign } }
func c_bacula_status(user, pass, dbase string) (opentsdb.MultiDataPoint, error) { dsn := fmt.Sprintf("%s:%s@/%s", user, pass, dbase) db, err := sql.Open("mysql", dsn) if err != nil { slog.Error("Failed to connect to database") return nil, err } defer db.Close() var md opentsdb.MultiDataPoint var name string var value int var tagSet opentsdb.TagSet var rate metadata.RateType var unit metadata.Unit tagSet = nil rate = metadata.Gauge unit = metadata.Item description := "Successful backup jobs in the last week" rows, err := db.Query("SELECT DISTINCT(Name) from Job") if err != nil { slog.Error("Query Error: " + err.Error()) return nil, err } for rows.Next() { rows.Scan(&name) r := db.QueryRow("SELECT count(JobId) as value from Job where RealEndTime>SUBTIME(now(), '7 0:0:0') and JobStatus='T' and Name=?", name) r.Scan(&value) slog.Infoln(name, value) Add(&md, "bacula."+name+".last_week", value, tagSet, rate, unit, description) } return md, nil }
func watch(root, pattern string, f func()) { watcher, err := fsnotify.NewWatcher() if err != nil { slog.Fatal(err) } filepath.Walk(root, func(path string, info os.FileInfo, err error) error { if matched, err := filepath.Match(pattern, info.Name()); err != nil { slog.Fatal(err) } else if !matched { return nil } err = watcher.Add(path) if err != nil { slog.Fatal(err) } return nil }) slog.Infoln("watching", pattern, "in", root) wait := time.Now() go func() { for { select { case event := <-watcher.Events: if wait.After(time.Now()) { continue } if event.Op&fsnotify.Write == fsnotify.Write { f() wait = time.Now().Add(time.Second * 2) } case err := <-watcher.Errors: slog.Errorln("error:", err) } } }() }
func main() { flag.Parse() if *flagVersion { fmt.Println(version.GetVersionInfo("bosun")) os.Exit(0) } for _, m := range mains { m() } runtime.GOMAXPROCS(runtime.NumCPU()) c, err := conf.ParseFile(*flagConf) if err != nil { slog.Fatal(err) } if *flagTest { os.Exit(0) } httpListen := &url.URL{ Scheme: "http", Host: c.HTTPListen, } if strings.HasPrefix(httpListen.Host, ":") { httpListen.Host = "localhost" + httpListen.Host } if err := metadata.Init(httpListen, false); err != nil { slog.Fatal(err) } if err := sched.Load(c); err != nil { slog.Fatal(err) } if c.RelayListen != "" { go func() { mux := http.NewServeMux() mux.Handle("/api/", httputil.NewSingleHostReverseProxy(httpListen)) s := &http.Server{ Addr: c.RelayListen, Handler: mux, } slog.Fatal(s.ListenAndServe()) }() } if c.TSDBHost != "" { if err := collect.Init(httpListen, "bosun"); err != nil { slog.Fatal(err) } tsdbHost := &url.URL{ Scheme: "http", Host: c.TSDBHost, } if *flagReadonly { rp := httputil.NewSingleHostReverseProxy(tsdbHost) ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/api/put" { w.WriteHeader(204) return } rp.ServeHTTP(w, r) })) slog.Infoln("readonly relay at", ts.URL, "to", tsdbHost) tsdbHost, _ = url.Parse(ts.URL) c.TSDBHost = tsdbHost.Host } } if *flagQuiet { c.Quiet = true } go func() { slog.Fatal(web.Listen(c.HTTPListen, *flagDev, c.TSDBHost)) }() go func() { if !*flagNoChecks { sched.Run() } }() go func() { sc := make(chan os.Signal, 1) signal.Notify(sc, os.Interrupt) killing := false for range sc { if killing { slog.Infoln("Second interrupt: exiting") os.Exit(1) } killing = true go func() { slog.Infoln("Interrupt: closing down...") sched.Close() slog.Infoln("done") os.Exit(1) }() } }() if *flagWatch { watch(".", "*.go", quit) watch(filepath.Join("web", "static", "templates"), "*.html", web.RunEsc) base := filepath.Join("web", "static", "js") watch(base, "*.ts", web.RunTsc) } select {} }
func main() { flag.Parse() if *flagToToml != "" { toToml(*flagToToml) fmt.Println("toml conversion complete; remove all empty values by hand (empty strings, 0)") return } if *flagPrint || *flagDebug { slog.Set(&slog.StdLog{Log: log.New(os.Stdout, "", log.LstdFlags)}) } if *flagVersion { fmt.Println(version.GetVersionInfo("scollector")) os.Exit(0) } for _, m := range mains { m() } conf := readConf() if *flagHost != "" { conf.Host = *flagHost } if *flagFilter != "" { conf.Filter = strings.Split(*flagFilter, ",") } if !conf.Tags.Valid() { slog.Fatalf("invalid tags: %v", conf.Tags) } else if conf.Tags["host"] != "" { slog.Fatalf("host not supported in custom tags, use Hostname instead") } if conf.PProf != "" { go func() { slog.Infof("Starting pprof at http://%s/debug/pprof/", conf.PProf) slog.Fatal(http.ListenAndServe(conf.PProf, nil)) }() } collectors.AddTags = conf.Tags util.FullHostname = conf.FullHost util.Set() if conf.Hostname != "" { util.Hostname = conf.Hostname } if err := collect.SetHostname(util.Hostname); err != nil { slog.Fatal(err) } if conf.ColDir != "" { collectors.InitPrograms(conf.ColDir) } var err error check := func(e error) { if e != nil { err = e } } collectors.Init(conf) for _, r := range conf.MetricFilters { check(collectors.AddMetricFilters(r)) } for _, rmq := range conf.RabbitMQ { check(collectors.RabbitMQ(rmq.URL)) } for _, cfg := range conf.SNMP { check(collectors.SNMP(cfg, conf.MIBS)) } for _, i := range conf.ICMP { check(collectors.ICMP(i.Host)) } for _, a := range conf.AWS { check(collectors.AWS(a.AccessKey, a.SecretKey, a.Region)) } for _, v := range conf.Vsphere { check(collectors.Vsphere(v.User, v.Password, v.Host)) } for _, p := range conf.Process { check(collectors.AddProcessConfig(p)) } for _, p := range conf.ProcessDotNet { check(collectors.AddProcessDotNetConfig(p)) } for _, h := range conf.HTTPUnit { if h.TOML != "" { check(collectors.HTTPUnitTOML(h.TOML)) } if h.Hiera != "" { check(collectors.HTTPUnitHiera(h.Hiera)) } } for _, r := range conf.Riak { check(collectors.Riak(r.URL)) } for _, x := range conf.ExtraHop { check(collectors.ExtraHop(x.Host, x.APIKey, x.FilterBy, x.FilterPercent)) } if err != nil { slog.Fatal(err) } collectors.KeepalivedCommunity = conf.KeepalivedCommunity // Add all process collectors. This is platform specific. collectors.WatchProcesses() collectors.WatchProcessesDotNet() if *flagFake > 0 { collectors.InitFake(*flagFake) } collect.Debug = *flagDebug util.Debug = *flagDebug collect.DisableDefaultCollectors = conf.DisableSelf c := collectors.Search(conf.Filter) if len(c) == 0 { slog.Fatalf("Filter %v matches no collectors.", conf.Filter) } for _, col := range c { col.Init() } u, err := parseHost(conf.Host) if *flagList { list(c) return } else if *flagPrint { u = &url.URL{Scheme: "http", Host: "localhost:0"} } else if err != nil { slog.Fatalf("invalid host %v: %v", conf.Host, err) } freq := time.Second * time.Duration(conf.Freq) if freq <= 0 { slog.Fatal("freq must be > 0") } collectors.DefaultFreq = freq collect.Freq = freq if conf.BatchSize < 0 { slog.Fatal("BatchSize must be > 0") } if conf.BatchSize != 0 { collect.BatchSize = conf.BatchSize } collect.Tags = conf.Tags.Copy().Merge(opentsdb.TagSet{"os": runtime.GOOS}) if *flagPrint { collect.Print = true } if !*flagDisableMetadata { if err := metadata.Init(u, *flagDebug); err != nil { slog.Fatal(err) } } cdp, cquit := collectors.Run(c) if u != nil { slog.Infoln("OpenTSDB host:", u) } if err := collect.InitChan(u, "scollector", cdp); err != nil { slog.Fatal(err) } if version.VersionDate != "" { v, err := strconv.ParseInt(version.VersionDate, 10, 64) if err == nil { go func() { metadata.AddMetricMeta("scollector.version", metadata.Gauge, metadata.None, "Scollector version number, which indicates when scollector was built.") for { if err := collect.Put("version", collect.Tags, v); err != nil { slog.Error(err) } time.Sleep(time.Hour) } }() } } if *flagBatchSize > 0 { collect.BatchSize = *flagBatchSize } go func() { const maxMem = 500 * 1024 * 1024 // 500MB var m runtime.MemStats for range time.Tick(time.Minute) { runtime.ReadMemStats(&m) if m.Alloc > maxMem { panic("memory max reached") } } }() sChan := make(chan os.Signal) signal.Notify(sChan, os.Interrupt) <-sChan close(cquit) // try to flush all datapoints on sigterm, but quit after 5 seconds no matter what. time.AfterFunc(5*time.Second, func() { os.Exit(0) }) collect.Flush() }
// RestoreState restores notification and alert state from the file on disk. func (s *Schedule) RestoreState() error { defer func() { bosunStartupTime = time.Now() }() slog.Infoln("RestoreState") start := time.Now() s.Lock("RestoreState") defer s.Unlock() s.Search.Lock() defer s.Search.Unlock() s.Notifications = nil decode := func(name string, dst interface{}) error { var data []byte err := s.db.View(func(tx *bolt.Tx) error { b := tx.Bucket([]byte(dbBucket)) if b == nil { return fmt.Errorf("unknown bucket: %v", dbBucket) } data = b.Get([]byte(name)) return nil }) if err != nil { return err } gr, err := gzip.NewReader(bytes.NewReader(data)) if err != nil { return err } defer gr.Close() return gob.NewDecoder(gr).Decode(dst) } if err := decode(dbMetadata, &s.Metadata); err != nil { slog.Errorln(dbMetadata, err) } if err := decode(dbMetricMetadata, &s.metricMetadata); err != nil { slog.Errorln(dbMetricMetadata, err) } for k, v := range s.Metadata { if k.Name == "desc" || k.Name == "rate" || k.Name == "unit" { s.PutMetadata(k, v.Value) delete(s.Metadata, k) } } if err := decode(dbMetric, &s.Search.Metric); err != nil { slog.Errorln(dbMetric, err) } if err := decode(dbTagk, &s.Search.Tagk); err != nil { slog.Errorln(dbTagk, err) } if err := decode(dbTagv, &s.Search.Tagv); err != nil { slog.Errorln(dbTagv, err) } if err := decode(dbMetricTags, &s.Search.MetricTags); err != nil { slog.Errorln(dbMetricTags, err) } notifications := make(map[expr.AlertKey]map[string]time.Time) if err := decode(dbNotifications, ¬ifications); err != nil { slog.Errorln(dbNotifications, err) } if err := decode(dbSilence, &s.Silence); err != nil { slog.Errorln(dbSilence, err) } if err := decode(dbIncidents, &s.Incidents); err != nil { slog.Errorln(dbIncidents, err) } if err := decode(dbErrors, &s.AlertStatuses); err != nil { slog.Errorln(dbErrors, err) } // Calculate next incident id. for _, i := range s.Incidents { if i.Id > s.maxIncidentId { s.maxIncidentId = i.Id } } status := make(States) if err := decode(dbStatus, &status); err != nil { slog.Errorln(dbStatus, err) } clear := func(r *Result) { if r == nil { return } r.Computations = nil } for ak, st := range status { a, present := s.Conf.Alerts[ak.Name()] if !present { slog.Errorln("sched: alert no longer present, ignoring:", ak) continue } else if s.Conf.Squelched(a, st.Group) { slog.Infoln("sched: alert now squelched:", ak) continue } else { t := a.Unknown if t == 0 { t = s.Conf.CheckFrequency } if t == 0 && st.Last().Status == StUnknown { st.Append(&Event{Status: StNormal, IncidentId: st.Last().IncidentId}) } } clear(st.Result) newHistory := []Event{} for _, e := range st.History { clear(e.Warn) clear(e.Crit) // Remove error events which no longer are a thing. if e.Status <= StUnknown { newHistory = append(newHistory, e) } } st.History = newHistory s.status[ak] = st if a.Log && st.Open { st.Open = false slog.Infof("sched: alert %s is now log, closing, was %s", ak, st.Status()) } for name, t := range notifications[ak] { n, present := s.Conf.Notifications[name] if !present { slog.Infoln("sched: notification not present during restore:", name) continue } if a.Log { slog.Infoln("sched: alert is now log, removing notification:", ak) continue } s.AddNotification(ak, n, t) } } if s.maxIncidentId == 0 { s.createHistoricIncidents() } s.Search.Copy() slog.Infoln("RestoreState done in", time.Since(start)) return nil }
func main() { flag.Parse() if *flagToToml != "" { toToml(*flagToToml) fmt.Println("toml conversion complete; remove all empty values by hand (empty strings, 0)") return } if *flagPrint || *flagDebug { slog.Set(&slog.StdLog{Log: log.New(os.Stdout, "", log.LstdFlags)}) } if *flagVersion { fmt.Println(version.GetVersionInfo("scollector")) os.Exit(0) } for _, m := range mains { m() } conf := readConf() ua := "Scollector/" + version.ShortVersion() if conf.UserAgentMessage != "" { ua += fmt.Sprintf(" (%s)", conf.UserAgentMessage) } client := &http.Client{ Transport: &scollectorHTTPTransport{ ua, &httpcontrol.Transport{ RequestTimeout: time.Minute, }, }, } http.DefaultClient = client collect.DefaultClient = client if *flagHost != "" { conf.Host = *flagHost } if *flagNtlm { conf.UseNtlm = *flagNtlm } if *flagFilter != "" { conf.Filter = strings.Split(*flagFilter, ",") } if !conf.Tags.Valid() { slog.Fatalf("invalid tags: %v", conf.Tags) } else if conf.Tags["host"] != "" { slog.Fatalf("host not supported in custom tags, use Hostname instead") } if conf.PProf != "" { go func() { slog.Infof("Starting pprof at http://%s/debug/pprof/", conf.PProf) slog.Fatal(http.ListenAndServe(conf.PProf, nil)) }() } collectors.AddTags = conf.Tags util.FullHostname = conf.FullHost util.Set() if conf.Hostname != "" { util.Hostname = conf.Hostname } if err := collect.SetHostname(util.Hostname); err != nil { slog.Fatal(err) } if conf.ColDir != "" { collectors.InitPrograms(conf.ColDir) } if conf.SNMPTimeout > 0 { snmp.Timeout = conf.SNMPTimeout } var err error check := func(e error) { if e != nil { err = e } } collectors.Init(conf) for _, r := range conf.MetricFilters { slog.Infof("Adding MetricFilter: %v\n", r) check(collectors.AddMetricFilters(r)) } for _, rmq := range conf.RabbitMQ { check(collectors.RabbitMQ(rmq.URL)) } for _, cfg := range conf.SNMP { check(collectors.SNMP(cfg, conf.MIBS)) } for _, i := range conf.ICMP { check(collectors.ICMP(i.Host)) } for _, a := range conf.AWS { check(collectors.AWS(a.AccessKey, a.SecretKey, a.Region, a.BillingProductCodesRegex, a.BillingBucketName, a.BillingBucketPath, a.BillingPurgeDays)) } for _, ea := range conf.AzureEA { check(collectors.AzureEABilling(ea.EANumber, ea.APIKey, ea.LogBillingDetails)) } for _, v := range conf.Vsphere { check(collectors.Vsphere(v.User, v.Password, v.Host)) } for _, p := range conf.Process { check(collectors.AddProcessConfig(p)) } for _, p := range conf.ProcessDotNet { check(collectors.AddProcessDotNetConfig(p)) } for _, h := range conf.HTTPUnit { var freq time.Duration var parseerr error if h.Freq == "" { freq = time.Minute * 5 } else { freq, parseerr = time.ParseDuration(h.Freq) if parseerr != nil { slog.Fatal(parseerr) } if freq < time.Second { slog.Fatalf("Invalid HTTPUnit frequency %s, cannot be less than 1 second.", h.Freq) } } if h.TOML != "" { check(collectors.HTTPUnitTOML(h.TOML, freq)) } if h.Hiera != "" { check(collectors.HTTPUnitHiera(h.Hiera, freq)) } } for _, r := range conf.Riak { check(collectors.Riak(r.URL)) } for _, x := range conf.ExtraHop { check(collectors.ExtraHop(x.Host, x.APIKey, x.FilterBy, x.FilterPercent, x.AdditionalMetrics, x.CertificateSubjectMatch, x.CertificateActivityGroup)) } if err != nil { slog.Fatal(err) } collectors.KeepalivedCommunity = conf.KeepalivedCommunity // Add all process collectors. This is platform specific. collectors.WatchProcesses() collectors.WatchProcessesDotNet() if *flagFake > 0 { collectors.InitFake(*flagFake) } collect.Debug = *flagDebug util.Debug = *flagDebug collect.DisableDefaultCollectors = conf.DisableSelf c := collectors.Search(conf.Filter) if len(c) == 0 { slog.Fatalf("Filter %v matches no collectors.", conf.Filter) } for _, col := range c { col.Init() } err = collectors.AddTagOverrides(c, conf.TagOverride) if err != nil { slog.Fatalf("Error adding tag overrides: %s", err) } u, err := parseHost(conf.Host) if *flagList { list(c) return } else if *flagPrint { u = &url.URL{Scheme: "http", Host: "localhost:0"} } else if err != nil { slog.Fatalf("invalid host %v: %v", conf.Host, err) } freq := time.Second * time.Duration(conf.Freq) if freq <= 0 { slog.Fatal("freq must be > 0") } collectors.DefaultFreq = freq collect.Freq = freq if conf.BatchSize < 0 { slog.Fatal("BatchSize must be > 0") } if conf.BatchSize != 0 { collect.BatchSize = conf.BatchSize } collect.Tags = conf.Tags.Copy().Merge(opentsdb.TagSet{"os": runtime.GOOS}) if *flagPrint { collect.Print = true } if !*flagDisableMetadata { if err := metadata.Init(u, *flagDebug); err != nil { slog.Fatal(err) } } cdp, cquit := collectors.Run(c) if u != nil { slog.Infoln("OpenTSDB host:", u) } collect.UseNtlm = conf.UseNtlm if err := collect.InitChan(u, "scollector", cdp); err != nil { slog.Fatal(err) } if collect.DisableDefaultCollectors == false && version.VersionDate != "" { v, err := strconv.ParseInt(version.VersionDate, 10, 64) if err == nil { go func() { metadata.AddMetricMeta("scollector.version", metadata.Gauge, metadata.None, "Scollector version number, which indicates when scollector was built.") for { if err := collect.Put("version", collect.Tags, v); err != nil { slog.Error(err) } time.Sleep(time.Hour) } }() } } if *flagBatchSize > 0 { collect.BatchSize = *flagBatchSize } if conf.MaxQueueLen != 0 { if conf.MaxQueueLen < collect.BatchSize { slog.Fatalf("MaxQueueLen must be >= %d (BatchSize)", collect.BatchSize) } collect.MaxQueueLen = conf.MaxQueueLen } maxMemMB := uint64(500) if conf.MaxMem != 0 { maxMemMB = conf.MaxMem } go func() { var m runtime.MemStats for range time.Tick(time.Second * 30) { runtime.ReadMemStats(&m) allocMB := m.Alloc / 1024 / 1024 if allocMB > maxMemMB { slog.Fatalf("memory max runtime reached: (current alloc: %v megabytes, max: %v megabytes)", allocMB, maxMemMB) } //See proccess_windows.go and process_linux.go for total process memory usage. //Note that in linux the rss metric includes shared pages, where as in //Windows the private working set does not include shared memory. //Total memory used seems to scale linerarly with m.Alloc. //But we want this to catch a memory leak outside the runtime (WMI/CGO). //So for now just add any runtime allocations to the allowed total limit. maxMemTotalMB := maxMemMB + allocMB if collectors.TotalScollectorMemoryMB > maxMemTotalMB { slog.Fatalf("memory max total reached: (current total: %v megabytes, current runtime alloc: %v megabytes, max: %v megabytes)", collectors.TotalScollectorMemoryMB, allocMB, maxMemTotalMB) } } }() sChan := make(chan os.Signal) signal.Notify(sChan, os.Interrupt) <-sChan close(cquit) // try to flush all datapoints on sigterm, but quit after 5 seconds no matter what. time.AfterFunc(5*time.Second, func() { os.Exit(0) }) collect.Flush() }
// RestoreState restores notification and alert state from the file on disk. func (s *Schedule) RestoreState() error { defer func() { bosunStartupTime = time.Now() }() slog.Infoln("RestoreState") start := time.Now() s.Lock("RestoreState") defer s.Unlock() s.Search.Lock() defer s.Search.Unlock() s.Notifications = nil db := s.db notifications := make(map[models.AlertKey]map[string]time.Time) if err := decode(db, dbNotifications, ¬ifications); err != nil { slog.Errorln(dbNotifications, err) } //status := make(States) // if err := decode(db, dbStatus, &status); err != nil { // slog.Errorln(dbStatus, err) // } // clear := func(r *models.Result) { // if r == nil { // return // } // r.Computations = nil //} //TODO: ??? // for ak, st := range status { // a, present := s.Conf.Alerts[ak.Name()] // if !present { // slog.Errorln("sched: alert no longer present, ignoring:", ak) // continue // } else if s.Conf.Squelched(a, st.Group) { // slog.Infoln("sched: alert now squelched:", ak) // continue // } else { // t := a.Unknown // if t == 0 { // t = s.Conf.CheckFrequency // } // if t == 0 && st.Last().Status == StUnknown { // st.Append(&Event{Status: StNormal, IncidentId: st.Last().IncidentId}) // } // } // clear(st.Result) // newHistory := []Event{} // for _, e := range st.History { // clear(e.Warn) // clear(e.Crit) // // Remove error events which no longer are a thing. // if e.Status <= StUnknown { // newHistory = append(newHistory, e) // } // } // st.History = newHistory // s.status[ak] = st // if a.Log && st.Open { // st.Open = false // slog.Infof("sched: alert %s is now log, closing, was %s", ak, st.Status()) // } // for name, t := range notifications[ak] { // n, present := s.Conf.Notifications[name] // if !present { // slog.Infoln("sched: notification not present during restore:", name) // continue // } // if a.Log { // slog.Infoln("sched: alert is now log, removing notification:", ak) // continue // } // s.AddNotification(ak, n, t) // } //} if err := migrateOldDataToRedis(db, s.DataAccess); err != nil { return err } // delete metrictags if they exist. deleteKey(s.db, "metrictags") slog.Infoln("RestoreState done in", time.Since(start)) return nil }
func c_snmp_bridge(community, host string) (opentsdb.MultiDataPoint, error) { var md opentsdb.MultiDataPoint vlanRaw, err := snmp_subtree(host, community, vtpVlanState) if err != nil { return md, err } vlans := []string{} for vlan, state := range vlanRaw { Add(&md, "cisco.net.vlan_state", state, opentsdb.TagSet{"host": host, "vlan": vlan}, metadata.Gauge, metadata.StatusCode, "") vlans = append(vlans, vlan) } ifMacs := make(map[string][]string) for _, vlan := range vlans { // community string indexing: http://www.cisco.com/c/en/us/support/docs/ip/simple-network-management-protocol-snmp/40367-camsnmp40367.html macRaw, err := snmp_subtree(host, community+"@"+vlan, dot1dTpFdbAddress) if err != nil { slog.Infoln(err) // continue since it might just be the one vlan continue } remoteMacAddresses := make(map[string]string) for k, v := range macRaw { if ba, ok := v.([]byte); ok { remoteMacAddresses[k] = strings.ToUpper(hex.EncodeToString(ba)) } } toPort := make(map[string]string) toPortRaw, err := snmp_subtree(host, community+"@"+vlan, dot1dTpFdbPort) if err != nil { slog.Infoln(err) } for k, v := range toPortRaw { toPort[k] = fmt.Sprintf("%v", v) } portToIfIndex := make(map[string]string) portToIfIndexRaw, err := snmp_subtree(host, community+"@"+vlan, dot1dBasePortIfIndex) for k, v := range portToIfIndexRaw { portToIfIndex[k] = fmt.Sprintf("%v", v) } if err != nil { slog.Infoln(err) } for port, mac := range remoteMacAddresses { if port, ok := toPort[port]; ok { if ifIndex, ok := portToIfIndex[port]; ok { if _, ok := ifMacs[ifIndex]; ok { ifMacs[ifIndex] = append(ifMacs[ifIndex], mac) } else { ifMacs[ifIndex] = []string{mac} } } } } } for iface, macs := range ifMacs { j, err := json.Marshal(macs) if err != nil { return md, nil } metadata.AddMeta("", opentsdb.TagSet{"host": host, "iface": iface}, "remoteMacs", string(j), false) } return md, nil }
// CheckNotifications processes past notification events. It returns the // duration until the soonest notification triggers. func (s *Schedule) CheckNotifications() time.Duration { silenced := s.Silenced() s.Lock("CheckNotifications") defer s.Unlock() notifications := s.Notifications s.Notifications = nil for ak, ns := range notifications { if si := silenced(ak); si != nil { slog.Infoln("silencing", ak) continue } for name, t := range ns { n, present := s.Conf.Notifications[name] if !present { continue } remaining := t.Add(n.Timeout).Sub(time.Now()) if remaining > 0 { s.AddNotification(ak, n, t) continue } //If alert is currently unevaluated because of a dependency, //simply requeue it until the dependency resolves itself. _, uneval := s.GetUnknownAndUnevaluatedAlertKeys(ak.Name()) unevaluated := false for _, un := range uneval { if un == ak { unevaluated = true break } } if unevaluated { s.AddNotification(ak, n, t) continue } st, err := s.DataAccess.State().GetLatestIncident(ak) if err != nil { slog.Error(err) continue } if st == nil { continue } s.Notify(st, n) } } s.sendNotifications(silenced) s.pendingNotifications = nil timeout := time.Hour now := time.Now() for _, ns := range s.Notifications { for name, t := range ns { n, present := s.Conf.Notifications[name] if !present { continue } remaining := t.Add(n.Timeout).Sub(now) if remaining < timeout { timeout = remaining } } } return timeout }
// RestoreState restores notification and alert state from the file on disk. func (s *Schedule) RestoreState() error { defer func() { bosunStartupTime = time.Now() }() slog.Infoln("RestoreState") start := time.Now() s.Lock("RestoreState") defer s.Unlock() s.Search.Lock() defer s.Search.Unlock() s.Notifications = nil db := s.db notifications := make(map[expr.AlertKey]map[string]time.Time) if err := decode(db, dbNotifications, ¬ifications); err != nil { slog.Errorln(dbNotifications, err) } if err := decode(db, dbSilence, &s.Silence); err != nil { slog.Errorln(dbSilence, err) } if err := decode(db, dbIncidents, &s.Incidents); err != nil { slog.Errorln(dbIncidents, err) } // Calculate next incident id. for _, i := range s.Incidents { if i.Id > s.maxIncidentId { s.maxIncidentId = i.Id } } status := make(States) if err := decode(db, dbStatus, &status); err != nil { slog.Errorln(dbStatus, err) } clear := func(r *Result) { if r == nil { return } r.Computations = nil } for ak, st := range status { a, present := s.Conf.Alerts[ak.Name()] if !present { slog.Errorln("sched: alert no longer present, ignoring:", ak) continue } else if s.Conf.Squelched(a, st.Group) { slog.Infoln("sched: alert now squelched:", ak) continue } else { t := a.Unknown if t == 0 { t = s.Conf.CheckFrequency } if t == 0 && st.Last().Status == StUnknown { st.Append(&Event{Status: StNormal, IncidentId: st.Last().IncidentId}) } } clear(st.Result) newHistory := []Event{} for _, e := range st.History { clear(e.Warn) clear(e.Crit) // Remove error events which no longer are a thing. if e.Status <= StUnknown { newHistory = append(newHistory, e) } } st.History = newHistory s.status[ak] = st if a.Log && st.Open { st.Open = false slog.Infof("sched: alert %s is now log, closing, was %s", ak, st.Status()) } for name, t := range notifications[ak] { n, present := s.Conf.Notifications[name] if !present { slog.Infoln("sched: notification not present during restore:", name) continue } if a.Log { slog.Infoln("sched: alert is now log, removing notification:", ak) continue } s.AddNotification(ak, n, t) } } if s.maxIncidentId == 0 { s.createHistoricIncidents() } migrateOldDataToRedis(db, s.DataAccess) // delete metrictags if they exist. deleteKey(s.db, "metrictags") slog.Infoln("RestoreState done in", time.Since(start)) return nil }
func main() { flag.Parse() if *flagToToml != "" { toToml(*flagToToml) fmt.Println("toml conversion complete; remove all empty values by hand (empty strings, 0)") return } if *flagPrint || *flagDebug { slog.Set(&slog.StdLog{Log: log.New(os.Stdout, "", log.LstdFlags)}) } if *flagVersion { fmt.Println(version.GetVersionInfo("scollector")) os.Exit(0) } for _, m := range mains { m() } conf := readConf() if *flagHost != "" { conf.Host = *flagHost } if *flagFilter != "" { conf.Filter = strings.Split(*flagFilter, ",") } if !conf.Tags.Valid() { slog.Fatalf("invalid tags: %v", conf.Tags) } else if conf.Tags["host"] != "" { slog.Fatalf("host not supported in custom tags, use Hostname instead") } collectors.AddTags = conf.Tags util.FullHostname = conf.FullHost util.Set() if conf.Hostname != "" { util.Hostname = conf.Hostname if err := collect.SetHostname(conf.Hostname); err != nil { slog.Fatal(err) } } if conf.ColDir != "" { collectors.InitPrograms(conf.ColDir) } var err error check := func(e error) { if e != nil { err = e } } for _, h := range conf.HAProxy { for _, i := range h.Instances { collectors.HAProxy(h.User, h.Password, i.Tier, i.URL) } } for _, s := range conf.SNMP { check(collectors.SNMP(s.Community, s.Host)) } for _, i := range conf.ICMP { check(collectors.ICMP(i.Host)) } for _, a := range conf.AWS { check(collectors.AWS(a.AccessKey, a.SecretKey, a.Region)) } for _, v := range conf.Vsphere { check(collectors.Vsphere(v.User, v.Password, v.Host)) } for _, p := range conf.Process { check(collectors.AddProcessConfig(p)) } for _, h := range conf.HTTPUnit { if h.TOML != "" { check(collectors.HTTPUnitTOML(h.TOML)) } if h.Hiera != "" { check(collectors.HTTPUnitHiera(h.Hiera)) } } if err != nil { slog.Fatal(err) } collectors.KeepalivedCommunity = conf.KeepalivedCommunity // Add all process collectors. This is platform specific. collectors.WatchProcesses() collectors.WatchProcessesDotNet() if *flagFake > 0 { collectors.InitFake(*flagFake) } collect.Debug = *flagDebug util.Debug = *flagDebug collect.DisableDefaultCollectors = conf.DisableSelf c := collectors.Search(conf.Filter) if len(c) == 0 { slog.Fatalf("Filter %v matches no collectors.", conf.Filter) } for _, col := range c { col.Init() } u, err := parseHost(conf.Host) if *flagList { list(c) return } else if err != nil { slog.Fatalf("invalid host %v: %v", conf.Host, err) } freq := time.Second * time.Duration(conf.Freq) if freq <= 0 { slog.Fatal("freq must be > 0") } collectors.DefaultFreq = freq collect.Freq = freq collect.Tags = opentsdb.TagSet{"os": runtime.GOOS} if *flagPrint { collect.Print = true } if !*flagDisableMetadata { if err := metadata.Init(u, *flagDebug); err != nil { slog.Fatal(err) } } cdp := collectors.Run(c) if u != nil { slog.Infoln("OpenTSDB host:", u) } if err := collect.InitChan(u, "scollector", cdp); err != nil { slog.Fatal(err) } if version.VersionDate != "" { v, err := strconv.ParseInt(version.VersionDate, 10, 64) if err == nil { go func() { metadata.AddMetricMeta("scollector.version", metadata.Gauge, metadata.None, "Scollector version number, which indicates when scollector was built.") for { if err := collect.Put("version", collect.Tags, v); err != nil { slog.Error(err) } time.Sleep(time.Hour) } }() } } if *flagBatchSize > 0 { collect.BatchSize = *flagBatchSize } go func() { const maxMem = 500 * 1024 * 1024 // 500MB var m runtime.MemStats for range time.Tick(time.Minute) { runtime.ReadMemStats(&m) if m.Alloc > maxMem { panic("memory max reached") } } }() select {} }
func Listen(listenAddr string, devMode bool, tsdbHost string, reloadFunc func() error) error { if devMode { slog.Infoln("using local web assets") } webFS := FS(devMode) indexTemplate = func() *template.Template { str := FSMustString(devMode, "/templates/index.html") templates, err := template.New("").Parse(str) if err != nil { slog.Fatal(err) } return templates } reload = reloadFunc if !devMode { tpl := indexTemplate() indexTemplate = func() *template.Template { return tpl } } if tsdbHost != "" { router.HandleFunc("/api/index", IndexTSDB) router.Handle("/api/put", Relay(tsdbHost)) } router.HandleFunc("/api/", APIRedirect) router.Handle("/api/action", JSON(Action)) router.Handle("/api/alerts", JSON(Alerts)) router.Handle("/api/config", miniprofiler.NewHandler(Config)) router.Handle("/api/config_test", miniprofiler.NewHandler(ConfigTest)) router.Handle("/api/save_enabled", JSON(SaveEnabled)) if schedule.SystemConf.ReloadEnabled() { // Is true of save is enabled router.Handle("/api/reload", JSON(Reload)).Methods(http.MethodPost) } if schedule.SystemConf.SaveEnabled() { router.Handle("/api/config/bulkedit", miniprofiler.NewHandler(BulkEdit)).Methods(http.MethodPost) router.Handle("/api/config/save", miniprofiler.NewHandler(SaveConfig)).Methods(http.MethodPost) router.Handle("/api/config/diff", miniprofiler.NewHandler(DiffConfig)).Methods(http.MethodPost) router.Handle("/api/config/running_hash", JSON(ConfigRunningHash)) } router.Handle("/api/egraph/{bs}.{format:svg|png}", JSON(ExprGraph)) router.Handle("/api/errors", JSON(ErrorHistory)) router.Handle("/api/expr", JSON(Expr)) router.Handle("/api/graph", JSON(Graph)) router.Handle("/api/health", JSON(HealthCheck)) router.Handle("/api/host", JSON(Host)) router.Handle("/api/last", JSON(Last)) router.Handle("/api/quiet", JSON(Quiet)) router.Handle("/api/incidents", JSON(Incidents)) router.Handle("/api/incidents/open", JSON(ListOpenIncidents)) router.Handle("/api/incidents/events", JSON(IncidentEvents)) router.Handle("/api/metadata/get", JSON(GetMetadata)) router.Handle("/api/metadata/metrics", JSON(MetadataMetrics)) router.Handle("/api/metadata/put", JSON(PutMetadata)) router.Handle("/api/metadata/delete", JSON(DeleteMetadata)).Methods("DELETE") router.Handle("/api/metric", JSON(UniqueMetrics)) router.Handle("/api/metric/{tagk}", JSON(MetricsByTagKey)) router.Handle("/api/metric/{tagk}/{tagv}", JSON(MetricsByTagPair)) router.Handle("/api/rule", JSON(Rule)) router.HandleFunc("/api/shorten", Shorten) router.Handle("/api/silence/clear", JSON(SilenceClear)) router.Handle("/api/silence/get", JSON(SilenceGet)) router.Handle("/api/silence/set", JSON(SilenceSet)) router.Handle("/api/status", JSON(Status)) router.Handle("/api/tagk/{metric}", JSON(TagKeysByMetric)) router.Handle("/api/tagv/{tagk}", JSON(TagValuesByTagKey)) router.Handle("/api/tagv/{tagk}/{metric}", JSON(TagValuesByMetricTagKey)) router.Handle("/api/tagsets/{metric}", JSON(FilteredTagsetsByMetric)) router.Handle("/api/opentsdb/version", JSON(OpenTSDBVersion)) router.Handle("/api/annotate", JSON(AnnotateEnabled)) // Annotations if schedule.SystemConf.AnnotateEnabled() { index := schedule.SystemConf.GetAnnotateIndex() if index == "" { index = "annotate" } annotateBackend = backend.NewElastic(schedule.SystemConf.GetAnnotateElasticHosts(), index) go func() { for { err := annotateBackend.InitBackend() if err == nil { return } slog.Warningf("could not initalize annotate backend, will try again: %v", err) time.Sleep(time.Second * 30) } }() web.AddRoutes(router, "/api", []backend.Backend{annotateBackend}, false, false) } router.HandleFunc("/api/version", Version) router.Handle("/api/debug/schedlock", JSON(ScheduleLockStatus)) http.Handle("/", miniprofiler.NewHandler(Index)) http.Handle("/api/", router) fs := http.FileServer(webFS) http.Handle("/partials/", fs) http.Handle("/static/", http.StripPrefix("/static/", fs)) http.Handle("/favicon.ico", fs) slog.Infoln("bosun web listening on:", listenAddr) slog.Infoln("tsdb host:", tsdbHost) return http.ListenAndServe(listenAddr, nil) }
func Listen(listenAddr string, devMode bool, tsdbHost string) error { if devMode { slog.Infoln("using local web assets") } webFS := FS(devMode) indexTemplate = func() *template.Template { str := FSMustString(devMode, "/templates/index.html") templates, err := template.New("").Parse(str) if err != nil { slog.Fatal(err) } return templates } if !devMode { tpl := indexTemplate() indexTemplate = func() *template.Template { return tpl } } if tsdbHost != "" { router.HandleFunc("/api/index", IndexTSDB) router.Handle("/api/put", Relay(tsdbHost)) } router.HandleFunc("/api/", APIRedirect) router.Handle("/api/action", JSON(Action)) router.Handle("/api/alerts", JSON(Alerts)) router.Handle("/api/backup", JSON(Backup)) router.Handle("/api/config", miniprofiler.NewHandler(Config)) router.Handle("/api/config_test", miniprofiler.NewHandler(ConfigTest)) router.Handle("/api/egraph/{bs}.svg", JSON(ExprGraph)) router.Handle("/api/errors", JSON(ErrorHistory)) router.Handle("/api/expr", JSON(Expr)) router.Handle("/api/graph", JSON(Graph)) router.Handle("/api/health", JSON(HealthCheck)) router.Handle("/api/host", JSON(Host)) router.Handle("/api/last", JSON(Last)) router.Handle("/api/incidents", JSON(Incidents)) router.Handle("/api/incidents/events", JSON(IncidentEvents)) router.Handle("/api/metadata/get", JSON(GetMetadata)) router.Handle("/api/metadata/metrics", JSON(MetadataMetrics)) router.Handle("/api/metadata/put", JSON(PutMetadata)) router.Handle("/api/metadata/delete", JSON(DeleteMetadata)).Methods("DELETE") router.Handle("/api/metric", JSON(UniqueMetrics)) router.Handle("/api/metric/{tagk}/{tagv}", JSON(MetricsByTagPair)) router.Handle("/api/rule", JSON(Rule)) router.HandleFunc("/api/shorten", Shorten) router.Handle("/api/silence/clear", JSON(SilenceClear)) router.Handle("/api/silence/get", JSON(SilenceGet)) router.Handle("/api/silence/set", JSON(SilenceSet)) router.Handle("/api/status", JSON(Status)) router.Handle("/api/tagk/{metric}", JSON(TagKeysByMetric)) router.Handle("/api/tagv/{tagk}", JSON(TagValuesByTagKey)) router.Handle("/api/tagv/{tagk}/{metric}", JSON(TagValuesByMetricTagKey)) router.Handle("/api/tagsets/{metric}", JSON(FilteredTagsetsByMetric)) router.Handle("/api/opentsdb/version", JSON(OpenTSDBVersion)) router.HandleFunc("/api/version", Version) router.Handle("/api/debug/schedlock", JSON(ScheduleLockStatus)) http.Handle("/", miniprofiler.NewHandler(Index)) http.Handle("/api/", router) fs := http.FileServer(webFS) http.Handle("/partials/", fs) http.Handle("/static/", http.StripPrefix("/static/", fs)) http.Handle("/favicon.ico", fs) slog.Infoln("bosun web listening on:", listenAddr) slog.Infoln("tsdb host:", tsdbHost) return http.ListenAndServe(listenAddr, nil) }
func (n *Notification) DoPrint(payload string) { slog.Infoln(payload) }
func main() { flag.Parse() if *flagVersion { fmt.Println(version.GetVersionInfo("bosun")) os.Exit(0) } for _, m := range mains { m() } systemConf, err := conf.LoadSystemConfigFile(*flagConf) if err != nil { slog.Fatalf("couldn't read system configuration: %v", err) } sysProvider, err := systemConf.GetSystemConfProvider() if err != nil { slog.Fatal(err) } ruleConf, err := rule.ParseFile(sysProvider.GetRuleFilePath(), systemConf.EnabledBackends()) if err != nil { slog.Fatalf("couldn't read rules: %v", err) } if *flagTest { os.Exit(0) } var ruleProvider conf.RuleConfProvider = ruleConf httpListen := &url.URL{ Scheme: "http", Host: sysProvider.GetHTTPListen(), } if strings.HasPrefix(httpListen.Host, ":") { httpListen.Host = "localhost" + httpListen.Host } if err := metadata.Init(httpListen, false); err != nil { slog.Fatal(err) } if err := sched.Load(sysProvider, ruleProvider, *flagSkipLast, *flagQuiet); err != nil { slog.Fatal(err) } if sysProvider.GetRelayListen() != "" { go func() { mux := http.NewServeMux() mux.Handle("/api/", util.NewSingleHostProxy(httpListen)) s := &http.Server{ Addr: sysProvider.GetRelayListen(), Handler: mux, } slog.Fatal(s.ListenAndServe()) }() } if sysProvider.GetTSDBHost() != "" { if err := collect.Init(httpListen, "bosun"); err != nil { slog.Fatal(err) } tsdbHost := &url.URL{ Scheme: "http", Host: sysProvider.GetTSDBHost(), } if *flagReadonly { rp := util.NewSingleHostProxy(tsdbHost) ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/api/put" { w.WriteHeader(204) return } rp.ServeHTTP(w, r) })) slog.Infoln("readonly relay at", ts.URL, "to", tsdbHost) tsdbHost, _ = url.Parse(ts.URL) sysProvider.SetTSDBHost(tsdbHost.Host) } } if systemConf.GetPing() { go ping.PingHosts(sched.DefaultSched.Search, systemConf.GetPingDuration()) } if sysProvider.GetInternetProxy() != "" { web.InternetProxy, err = url.Parse(sysProvider.GetInternetProxy()) if err != nil { slog.Fatalf("InternetProxy error: %s", err) } } var cmdHook conf.SaveHook if hookPath := sysProvider.GetCommandHookPath(); hookPath != "" { cmdHook, err = conf.MakeSaveCommandHook(hookPath) if err != nil { slog.Fatal(err) } ruleProvider.SetSaveHook(cmdHook) } var reload func() error reloading := make(chan bool, 1) // a lock that we can give up acquiring reload = func() error { select { case reloading <- true: // Got lock default: return fmt.Errorf("not reloading, reload in progress") } defer func() { <-reloading }() newConf, err := rule.ParseFile(sysProvider.GetRuleFilePath(), sysProvider.EnabledBackends()) if err != nil { return err } newConf.SetSaveHook(cmdHook) newConf.SetReload(reload) oldSched := sched.DefaultSched oldDA := oldSched.DataAccess oldSearch := oldSched.Search sched.Close(true) sched.Reset() newSched := sched.DefaultSched newSched.Search = oldSearch newSched.DataAccess = oldDA slog.Infoln("schedule shutdown, loading new schedule") // Load does not set the DataAccess or Search if it is already set if err := sched.Load(sysProvider, newConf, *flagSkipLast, *flagQuiet); err != nil { slog.Fatal(err) } web.ResetSchedule() // Signal web to point to the new DefaultSchedule go func() { slog.Infoln("running new schedule") if !*flagNoChecks { sched.Run() } }() slog.Infoln("config reload complete") return nil } ruleProvider.SetReload(reload) go func() { slog.Fatal(web.Listen(sysProvider.GetHTTPListen(), *flagDev, sysProvider.GetTSDBHost(), reload)) }() go func() { if !*flagNoChecks { sched.Run() } }() go func() { sc := make(chan os.Signal, 1) signal.Notify(sc, os.Interrupt, syscall.SIGTERM) killing := false for range sc { if killing { slog.Infoln("Second interrupt: exiting") os.Exit(1) } killing = true go func() { slog.Infoln("Interrupt: closing down...") sched.Close(false) slog.Infoln("done") os.Exit(1) }() } }() if *flagWatch { watch(".", "*.go", quit) watch(filepath.Join("web", "static", "templates"), "*.html", web.RunEsc) base := filepath.Join("web", "static", "js") watch(base, "*.ts", web.RunTsc) } select {} }
func (n *Notification) DoPrint(subject string) { slog.Infoln(subject) }