func detectInitd(root string) (InitSystem, error) { ctlpath := root + "etc/init.d/" result, err := util.FileExists(ctlpath) if err != nil { return nil, err } if !result { util.Debug("init.d not detected in " + ctlpath) return nil, nil } matches, err := filepath.Glob(ctlpath + "*") if err != nil { return nil, err } if !result { util.Debug("init.d not detected in " + ctlpath) return nil, nil } if len(matches) > 0 { util.Info("Detected init.d in " + ctlpath) return &Initd{ctlpath, root + "var/run/", pidForString}, nil } util.Info(ctlpath + " exists but appears to be empty") return nil, nil }
/* * So many hacks in this. OSX support can be seen as "bad" at best. */ func (ps *processStorage) capturePs(pid int) error { cmd := exec.Command("ps", "So", "rss,time,utime", "-p", strconv.Itoa(pid)) sout, err := util.SafeRun(cmd) if err != nil { return err } lines, err := util.ReadLines(sout) if err != nil { return err } if len(lines) < 2 { return errors.New("Insufficient output from ps") } fields := strings.Fields(lines[1]) val, err := strconv.ParseInt(fields[0], 10, 64) if err != nil { return err } ps.Save("memory", "rss", float64(1024*val)) times := timeRegexp.FindStringSubmatch(fields[1]) if times == nil { util.Debug("Unable to parse CPU time in " + lines[1]) return nil } min, _ := strconv.ParseUint(times[1], 10, 32) sec, _ := strconv.ParseUint(times[2], 10, 32) cs, _ := strconv.ParseUint(times[3], 10, 32) ticks := min*60*100 + sec*100 + cs times = timeRegexp.FindStringSubmatch(fields[2]) if times == nil { util.Debug("Unable to parse User time in " + lines[1]) return nil } min, _ = strconv.ParseUint(times[1], 10, 32) sec, _ = strconv.ParseUint(times[2], 10, 32) cs, _ = strconv.ParseUint(times[3], 10, 32) uticks := min*60*100 + sec*100 + cs ps.Save("cpu", "user", float64(uticks)) ps.Save("cpu", "system", float64(ticks-uticks)) return nil }
func (hs *hostStorage) collectDisk(path string) error { var lines []string if path == "" { cmd := exec.Command("df", "-P") sout, err := util.SafeRun(cmd) if err != nil { return err } lines, err = util.ReadLines(sout) if err != nil { return err } } else { data, err := ioutil.ReadFile(path) if err != nil { return err } lines, err = util.ReadLines(data) if err != nil { return err } } usage := map[string]float64{} for _, line := range lines { if line[0] == '/' { items := strings.Fields(line) if len(items) < 5 { util.Debug("Cannot parse df output: %v", items) continue } pct := items[4] if pct[len(pct)-1] == '%' { val, err := strconv.ParseInt(pct[0:len(pct)-1], 10, 32) if err != nil { util.Debug("Cannot parse df output: " + line) } usage[items[len(items)-1]] = float64(val) } } } for name, used := range usage { hs.saveType("disk", name, used, Gauge) } return nil }
func (l *Launchd) LookupService(serviceName string) (*ProcessStatus, error) { cmd := exec.Command("launchctl", "list") sout, err := util.SafeRun(cmd) if err != nil { return nil, &ServiceError{l.Name(), serviceName, err} } lines, err := util.ReadLines(sout) if err != nil { return nil, &ServiceError{l.Name(), serviceName, err} } for _, line := range lines { if strings.Contains(line, serviceName) { util.Debug("launchctl found " + serviceName) parts := strings.SplitN(line, "\t", 3) pid, err := strconv.ParseInt(parts[0], 10, 32) if err != nil { return nil, &ServiceError{l.Name(), serviceName, err} } return &ProcessStatus{int(pid), Up}, nil } } path := l.resolvePlist(serviceName) if path != "" { return &ProcessStatus{0, Down}, nil } return nil, &ServiceError{l.Name(), serviceName, ErrServiceNotFound} }
/* Resolve each defined service to its managing init system. Called only at startup, this is what maps services to init and fires ProcessDoesNotExist events. */ func (svc *Service) Resolve(mgrs []services.InitSystem) error { for _, sm := range mgrs { // TODO There's a bizarre race condition here. Figure out // why this is necessary. We shouldn't be multi-threaded yet. if sm == nil { continue } ps, err := sm.LookupService(svc.Name()) if err != nil { serr := err.(*services.ServiceError) if serr.Err == services.ErrServiceNotFound { util.Debug(sm.Name() + " doesn't have " + svc.Name()) continue } return err } util.Info("Found %s/%s with status %s", sm.Name(), svc.Name(), ps) svc.Manager = sm svc.Transition(ps, func(et EventType) { counters.Add("events", 1) err = svc.EventHandler.Trigger(&Event{et, svc, nil}) if err != nil { util.Warn("Error firing event: %s", err.Error()) } }) break } if svc.Manager == nil { return fmt.Errorf("Could not find service %s, did you misspell it?", svc.Name()) } return nil }
func (ps *processStorage) AddSource(name string, config map[string]string) (Source, error) { for _, x := range ps.daemonSpecific { if x.Name() == name { return x, nil } } builder := Sources[name] if builder == nil { return nil, nil } util.Info("Activating metrics for %s", name) src, err := builder(config) if err != nil { return nil, err } m, ok := src.(MandatorySource) if ok && m.Mandatory() { util.Debug("Registering all metrics for %s", name) descs := src.ValidMetrics() for _, d := range descs { if d.MetricType == Counter { ps.DeclareCounter(name, d.Name, nil, d.Display) } else { ps.DeclareGauge(name, d.Name, d.Display) } } } ps.daemonSpecific = append(ps.daemonSpecific, src) return src, nil }
func sendSlackAlert(url string, params url.Values) error { util.Debug("Sending slack alert to %s", url) resp, err := http.PostForm(url, params) if resp != nil { resp.Body.Close() } return err }
func triggeredHandler(rule *Rule, tripped bool) *Event { if !tripped { util.Info("%s[%s] recovered.", rule.EntityName(), rule.Metric()) rule.State = Recovered return nil } util.Debug("%s[%s] still triggered. Current: %.1f, Threshold: %.1f", rule.EntityName(), rule.Metric(), rule.CurrentValue, rule.Threshold) return nil }
func defaultClient(host string, port string, ep string) ([]byte, error) { url := fmt.Sprintf("http://%s:%s%s", host, port, ep) util.Debug("Fetching nginx status from %s", url) resp, err := http.Get(url) if err != nil { return nil, err } defer resp.Body.Close() return ioutil.ReadAll(resp.Body) }
func okHandler(rule *Rule, tripped bool) *Event { if tripped && rule.TrippedCount == rule.CycleCount { util.Warn("%s[%s] triggered. Current value = %.1f", rule.EntityName(), rule.Metric(), rule.CurrentValue) rule.State = Triggered return &Event{RuleFailed, rule.Entity, rule} } if tripped { util.Debug("%s[%s] tripped. Current: %.1f, Threshold: %.1f", rule.EntityName(), rule.Metric(), rule.CurrentValue, rule.Threshold) } return nil }
func (i *Inspeqtor) Start() { util.Debug("Starting command socket") err := i.openSocket(i.SocketPath) if err != nil { util.Warn("Could not create Unix socket: %s", err.Error()) exit(i) } go func() { for { if !i.safelyAccept() { util.Debug("Shutting down command socket") return } } }() // if expose_port is 0, disable the feature altogether if i.GlobalConfig.ExposePort != 0 { sock, err := net.Listen("tcp", fmt.Sprintf("localhost:%d", i.GlobalConfig.ExposePort)) if err != nil { util.Warn("Could not listen on port %d: %s", i.GlobalConfig.ExposePort, err.Error()) exit(i) } i.Expose = sock go func() { // TODO How do we error handling here? util.Info("Expose now available at port %d", i.GlobalConfig.ExposePort) err := http.Serve(i.Expose, nil) // Don't log an "error" when we shut down normally and close the socket if err != nil && !strings.Contains(err.Error(), "use of closed network") { util.Warn("HTTP server error: %s", err.Error()) } }() } util.Debug("Starting main run loop") go i.runLoop() Singleton = i }
func (svc *Service) Reload() error { go func() { util.Debug("Reloading %s", svc.Name()) err := svc.Manager.Reload(svc.Name()) if err != nil { util.Warn(err.Error()) } else { util.DebugDebug("Reloaded %s", svc.Name()) } }() return nil }
func Watch(i *inspeqtor.Inspeqtor, jobs map[string]*Job) { util.Debug("Starting recurring job watcher") go func() { for { untilNext := check(jobs) select { case <-i.Stopping: // reloading inspeqtor util.Debug("Shutting down recurring job watcher") return case <-runNotifier: // we just got notified a job ran, // verify we don't need to fire JobRan case <-time.After(untilNext + time.Minute): // a job is due at this point in time. // add an extra minute to allow for race conditions // and slow performance } } }() }
func HandleSignals() { signals := make(chan os.Signal) for k := range SignalHandlers { signal.Notify(signals, k) } for { sig := <-signals util.Debug("Received signal %d", sig) funk := SignalHandlers[sig] funk(Singleton) } }
func parseJobs(global *inspeqtor.ConfigFile, confDir string) (map[string]*Job, error) { util.Debug("Parsing jobs in " + confDir) files, err := filepath.Glob(confDir + "/jobs.d/*.inq") if err != nil { return nil, err } jobs := map[string]*Job{} for _, filename := range files { util.DebugDebug("Parsing " + filename) data, err := ioutil.ReadFile(filename) if err != nil { return nil, err } s := lexer.NewLexer([]byte(data)) p := parser.NewParser() obj, err := p.Parse(s) if err != nil { util.Warn("Unable to parse " + filename + ": " + err.Error()) continue } astcontent := obj.(*ast.Content) for _, astjob := range astcontent.Jobs { if _, ok := jobs[astjob.Name]; ok { return nil, fmt.Errorf("Duplicate job %s", astjob.Name) } j := New(astjob.Name, astjob.Interval, astcontent.Parameters) owner := j.Parameters["owner"] route := global.AlertRoutes[owner] if owner == "" && route == nil { return nil, fmt.Errorf("No default alert route configured!") } if route == nil { return nil, fmt.Errorf("No such alert route: %s", owner) } alert, err := inspeqtor.Actions["alert"](j, route) if err != nil { return nil, err } j.alerter = alert jobs[astjob.Name] = j } } return jobs, nil }
func (svc *Service) Restart() error { svc.Process.Pid = 0 svc.Process.Status = services.Starting go func() { util.Debug("Restarting %s", svc.Name()) err := svc.Manager.Restart(svc.Name()) if err != nil { util.Warn(err.Error()) } else { util.DebugDebug("Restarted %s", svc.Name()) } }() return nil }
func sendEmail(e *EmailNotifier, doc bytes.Buffer) error { if strings.Index(e.To, "@example.com") > 0 { util.Warn("Invalid email configured: %s", e.To) util.Warn(string(doc.Bytes())) } else { util.Debug("Sending email to %s", e.To) util.Debug("Sending email:\n%s", string(doc.Bytes())) if e.Username != "" { auth := smtp.PlainAuth("", e.Username, e.Password, e.Host) err := smtp.SendMail(e.Host+":"+e.TLSPort, auth, e.From, []string{e.To}, doc.Bytes()) if err != nil { return err } } else { err := smtp.SendMail(e.Host+":25", nil, e.From, []string{e.To}, doc.Bytes()) if err != nil { return err } } } return nil }
func detectUpstart(path string) (InitSystem, error) { result, err := util.FileExists(path) if err != nil { return nil, err } if !result { util.Debug("upstart not detected, no " + path) return nil, nil } matches, err := filepath.Glob(path + "/*.conf") if err != nil { return nil, err } if len(matches) > 0 { util.Info("Detected upstart in " + path) return &Upstart{path, nil}, nil } util.Debug("upstart not detected, empty " + path) return nil, nil }
func (i *Inspeqtor) TestAlertRoutes() int { bad := 0 util.Info("Testing alert routes") for _, route := range i.GlobalConfig.AlertRoutes { nm := route.Name if nm == "" { nm = "default" } util.Debug("Creating notification for %s/%s", route.Channel, nm) notifier, err := Actions["alert"](i.Host, route) if err != nil { bad++ util.Warn("Error creating %s/%s route: %s", route.Channel, nm, err.Error()) continue } util.Debug("Triggering notification for %s/%s", route.Channel, nm) err = notifier.Trigger(&Event{RuleFailed, i.Host, i.Host.Rules()[0]}) if err != nil { bad++ util.Warn("Error firing %s/%s route: %s", route.Channel, nm, err.Error()) } } return bad }
func detectSystemd(path string) (InitSystem, error) { result, err := util.FileExists(path) if err != nil { return nil, err } if !result { util.Debug("systemd not detected, no " + path) return nil, nil } matches, err := filepath.Glob(path + "/*.conf") if err != nil { return nil, err } if len(matches) > 0 { util.Info("Detected systemd in " + path) return &Systemd{path, "", ""}, nil } util.Debug("systemd not detected, empty " + path) return nil, nil }
func convertService(global *ConfigFile, inqsvc *ast.ProcessCheck) (*Service, error) { rules := make([]*Rule, len(inqsvc.Rules)) storage := metrics.NewProcessStore("/proc", global.CycleTime) svc := &Service{&Entity{inqsvc.Name, nil, storage, inqsvc.Parameters}, nil, services.NewStatus(), nil} action, err := BuildAction(global, svc, &ast.SimpleAction{ActionName: "alert"}) if err != nil { return nil, err } svc.EventHandler = action for idx, rule := range inqsvc.Rules { rule, err := convertRule(global, svc, rule) if err != nil { return nil, err } util.DebugDebug("Rule: %+v", *rule) rules[idx] = rule } svc.rules = rules for _, r := range rules { _, err := storage.AddSource(r.MetricFamily, svc.Parameters()) if err != nil { return nil, err } err = storage.Watch(r.MetricFamily, r.MetricName) if err != nil { return nil, err } util.Debug("Watching %s:%s", r.MetricFamily, r.MetricName) } if len(inqsvc.Exposed) > 0 { err := BuildExpose(global, svc, inqsvc.Exposed, inqsvc.Parameters) if err != nil { return nil, err } } err = storage.Prepare() if err != nil { return nil, err } return svc, nil }
func ParseGlobal(rootDir string) (*ConfigFile, error) { path := rootDir + "/inspeqtor.conf" exists, err := util.FileExists(path) if err != nil { return nil, err } if exists { util.Debug("Parsing " + path) data, err := ioutil.ReadFile(path) if err != nil { return nil, err } s := lexer.NewLexer([]byte(data)) p := parser.NewParser() obj, err := p.Parse(s) if err != nil { return nil, err } ast := obj.(ast.Config) config := ConfigFile{Defaults, map[string]*AlertRoute{}} config.Variables = ast.Variables if val, has := ast.Variables["log_level"]; has { util.SetLogLevel(val) } parseValue(ast, &config.CycleTime, "cycle_time", 15) parseValue(ast, &config.DeployLength, "deploy_length", 300) parseValue(ast, &config.ExposePort, "expose_port", 4677) for _, v := range ast.Routes { ar, err := ValidateChannel(v.Name, v.Channel, v.Config) if err != nil { return nil, err } if _, ok := config.AlertRoutes[v.Name]; ok { return nil, fmt.Errorf("Duplicate alert config for '%s'", v.Name) } config.AlertRoutes[v.Name] = ar } return &config, nil } util.Info("No configuration file found at " + rootDir + "/inspector.conf") return &ConfigFile{Defaults, nil}, nil }
func (i *Inspeqtor) scan() { start := time.Now() var barrier sync.WaitGroup barrier.Add(1) barrier.Add(len(i.Services)) go i.Host.Collect(i.silenced(), func(_ Checkable) { barrier.Done() }) for _, svc := range i.Services { go svc.Collect(i.silenced(), func(_ Checkable) { barrier.Done() }) } barrier.Wait() util.Debug("Collection complete in " + time.Now().Sub(start).String()) }
func sendHipchatAlert(url, token string, msg url.Values) error { util.Debug("Sending hipchat alert to %s", url) client := &http.Client{} req, err := http.NewRequest("POST", url, strings.NewReader(msg.Encode())) if err != nil { return err } req.Header.Set("Content-Type", "application/x-www-form-urlencoded") resp, err := client.Do(req) if resp != nil { if resp.StatusCode != 200 { util.Warn("Unable to send hipchat alert: %d", resp.StatusCode) } resp.Body.Close() } return err }
// this method never returns. // // since we can't test this method in an automated fashion, it should // contain as little logic as possible. func (i *Inspeqtor) runLoop() { util.DebugDebug("Resolving services") for _, svc := range i.Services { err := svc.Resolve(i.ServiceManagers) if err != nil { util.Warn(err.Error()) } } i.scanSystem() for { select { case <-time.After(time.Duration(i.GlobalConfig.CycleTime) * time.Second): i.scanSystem() case <-i.Stopping: util.Debug("Shutting down main run loop") return } } }
func statsdReload(_ *inspeqtor.Inspeqtor, newi *inspeqtor.Inspeqtor) error { val, ok := newi.GlobalConfig.Variables["statsd_location"] if !ok { util.Debug("No statsd_location configured, skipping...") return nil } util.Info("Pushing metrics to statsd at %s", val) conn, err := statsd.Dial(val) if err != nil { return err } newi.Listen("cycleComplete", func(ins *inspeqtor.Inspeqtor) error { return statsd.Export(conn, ins) }) newi.Listen("shutdown", func(ins *inspeqtor.Inspeqtor) error { return conn.Close() }) return nil }
/* Parses the service-specific rules in /etc/inspeqtor/services.d/*.inq */ func ParseServices(global *ConfigFile, confDir string) ([]Checkable, error) { util.Debug("Parsing config in " + confDir) files, err := filepath.Glob(confDir + "/*.inq") if err != nil { return nil, err } var checks []Checkable for _, filename := range files { util.DebugDebug("Parsing " + filename) data, err := ioutil.ReadFile(filename) if err != nil { return nil, err } s := lexer.NewLexer([]byte(data)) p := parser.NewParser() obj, err := p.Parse(s) if err != nil { util.Warn("Unable to parse " + filename + ": " + err.Error()) continue } switch x := obj.(type) { case *ast.ProcessCheck: svc, err := BuildService(global, x) if err != nil { return nil, err } util.DebugDebug("Service: %+v", *svc) checks = append(checks, svc) default: return nil, fmt.Errorf("Invalid configuration file: %s", filename) } } return checks, nil }
func (svc *Service) Verify() []*Event { events := []*Event{} if svc.Process.Status != services.Up { // we probably shouldn't verify anything that isn't actually Up util.Debug("%s is %s, skipping...", svc.Name(), svc.Process.Status) return events } for _, r := range svc.Rules() { evt := r.Check(svc.CycleTime()) if evt != nil { events = append(events, evt) for _, a := range r.Actions { err := a.Trigger(evt) if err != nil { util.Warn("Error firing event: %s", err.Error()) } } } } return events }
func sendCampfireAlert(url, token string, msg map[string]map[string]string) error { util.Debug("Sending campfire alert to %s", url) client := &http.Client{} jsonMsg, err := json.Marshal(msg) if err != nil { return err } req, err := http.NewRequest("POST", url, bytes.NewReader(jsonMsg)) if err != nil { return err } req.Header.Set("Content-Type", "application/json") req.Header.Set("User-Agent", "Inspeqtor Pro") req.SetBasicAuth(token, "X") resp, err := client.Do(req) if resp != nil { if resp.StatusCode != 201 { util.Warn("Unable to send campfire alert: %d", resp.StatusCode) } resp.Body.Close() } return err }