func check(jobs map[string]*Job) time.Duration { min := time.Hour for _, j := range jobs { now := time.Now() due := j.LastRun.Add(j.Interval) if due.After(now) && min > due.Sub(now) { // calculate the delay time until the next job check min = due.Sub(now) } if due.Before(now) && j.state == inspeqtor.Ok { util.Warn("Recurring job \"%s\" is overdue", j.JobName) j.state = inspeqtor.Triggered err := j.alert(JobOverdue) if err != nil { util.Warn(fmt.Sprintf("Error firing cron job alert: %s", err.Error())) } } if !due.Before(now) && j.state == inspeqtor.Triggered { util.Info("Recurring job \"%s\" has recovered", j.JobName) err := j.alert(JobRan) if err != nil { util.Warn(fmt.Sprintf("Error firing cron job alert: %s", err.Error())) } j.state = inspeqtor.Ok } } return min }
func reload(i *Inspeqtor) { util.Info(Name + " reloading") newi, err := New(i.RootDir, i.SocketPath) if err != nil { util.Warn("Unable to reload: %s", err.Error()) return } err = newi.Parse() if err != nil { util.Warn("Unable to reload: %s", err.Error()) return } // we're reloading and newcopy will become the new // singleton. Pro hooks into this to reload its features too. for _, callback := range Reloaders { err := callback(i, newi) if err != nil { util.Warn("Unable to reload: %s", err.Error()) return } } // TODO proper reloading would not throw away the existing metric data // in i but defining new metrics can change the storage tree. Implement // deep metric tree ring buffer sync if possible in basicReloader? i.Shutdown() newi.Start() }
func (rs *redisSource) runCli(funk executor) (metrics.Map, error) { sout, err := funk("redis-cli", rs.buildArgs(), nil) lines, err := util.ReadLines(sout) if err != nil { return nil, err } values := map[string]float64{} for _, line := range lines { if line == "" || line[0] == '#' { continue } parts := strings.Split(line, ":") if rs.metrics[parts[0]] { val, err := strconv.ParseInt(parts[1], 10, 64) if err != nil { return nil, errors.New("Invalid metric input for '" + line + "': " + err.Error()) } values[parts[0]] = float64(val) } } if len(rs.metrics) > len(values) { for k := range rs.metrics { if _, ok := values[k]; !ok { util.Warn("Could not find metric redis(%s), did you spell it right?", k) } } } return values, nil }
func (h *Host) Collect(silenced bool, completeCallback func(Checkable)) { defer completeCallback(h) err := h.Metrics().Collect(0) if err != nil { util.Warn("Error collecting host metrics: %s", err.Error()) } }
/* Resolve each defined service to its managing init system. Called only at startup, this is what maps services to init and fires ProcessDoesNotExist events. */ func (svc *Service) Resolve(mgrs []services.InitSystem) error { for _, sm := range mgrs { // TODO There's a bizarre race condition here. Figure out // why this is necessary. We shouldn't be multi-threaded yet. if sm == nil { continue } ps, err := sm.LookupService(svc.Name()) if err != nil { serr := err.(*services.ServiceError) if serr.Err == services.ErrServiceNotFound { util.Debug(sm.Name() + " doesn't have " + svc.Name()) continue } return err } util.Info("Found %s/%s with status %s", sm.Name(), svc.Name(), ps) svc.Manager = sm svc.Transition(ps, func(et EventType) { counters.Add("events", 1) err = svc.EventHandler.Trigger(&Event{et, svc, nil}) if err != nil { util.Warn("Error firing event: %s", err.Error()) } }) break } if svc.Manager == nil { return fmt.Errorf("Could not find service %s, did you misspell it?", svc.Name()) } return nil }
func recoveredHandler(rule *Rule, tripped bool) *Event { if tripped && rule.TrippedCount == rule.CycleCount { util.Warn("%s[%s] flapped. Current value = %.1f", rule.EntityName(), rule.Metric(), rule.CurrentValue) rule.State = Triggered return nil } rule.State = Ok return &Event{RuleRecovered, rule.Entity, rule} }
func (i *Inspeqtor) safelyAccept() bool { defer func() { if err := recover(); err != nil { // TODO Is there a way to print out the backtrace of the goroutine where it crashed? util.Warn("Command crashed:\n%s", err) } }() return i.acceptCommand() }
func (store *storage) Get(family string, name string) float64 { metric, _ := store.find(family, name) if metric == nil { // This can happen when using an Inspeqtor Pro .inq file // with Inspeqtor, if the metric only exists in Pro (e.g. memstats) util.Warn("BUG: Metric %s:%s does not exist", family, name) return 0 } return metric.Get() }
func parseValue(ast ast.Config, store *uint, name string, def uint) { if val, has := ast.Variables[name]; has { ival, err := strconv.ParseUint(val, 10, 32) if err != nil { util.Warn("Invalid %s: %d", name, val) ival = uint64(def) } *store = uint(ival) } }
func okHandler(rule *Rule, tripped bool) *Event { if tripped && rule.TrippedCount == rule.CycleCount { util.Warn("%s[%s] triggered. Current value = %.1f", rule.EntityName(), rule.Metric(), rule.CurrentValue) rule.State = Triggered return &Event{RuleFailed, rule.Entity, rule} } if tripped { util.Debug("%s[%s] tripped. Current: %.1f, Threshold: %.1f", rule.EntityName(), rule.Metric(), rule.CurrentValue, rule.Threshold) } return nil }
func (i *Inspeqtor) Shutdown() { close(i.Stopping) if i.Socket != nil { err := i.Socket.Close() if err != nil { util.Warn(err.Error()) } } if i.Expose != nil { err := i.Expose.Close() if err != nil { util.Warn(err.Error()) } } i.Fire("shutdown") // let other goroutines log their exit time.Sleep(time.Millisecond) }
func (i *Inspeqtor) Start() { util.Debug("Starting command socket") err := i.openSocket(i.SocketPath) if err != nil { util.Warn("Could not create Unix socket: %s", err.Error()) exit(i) } go func() { for { if !i.safelyAccept() { util.Debug("Shutting down command socket") return } } }() // if expose_port is 0, disable the feature altogether if i.GlobalConfig.ExposePort != 0 { sock, err := net.Listen("tcp", fmt.Sprintf("localhost:%d", i.GlobalConfig.ExposePort)) if err != nil { util.Warn("Could not listen on port %d: %s", i.GlobalConfig.ExposePort, err.Error()) exit(i) } i.Expose = sock go func() { // TODO How do we error handling here? util.Info("Expose now available at port %d", i.GlobalConfig.ExposePort) err := http.Serve(i.Expose, nil) // Don't log an "error" when we shut down normally and close the socket if err != nil && !strings.Contains(err.Error(), "use of closed network") { util.Warn("HTTP server error: %s", err.Error()) } }() } util.Debug("Starting main run loop") go i.runLoop() Singleton = i }
func Detect() []InitSystem { var inits []InitSystem for name, funk := range SupportedInits { sm, err := funk() if err != nil { util.Warn("Couldn't detect %s: %s", name, err.Error()) continue } if sm != nil { inits = append(inits, sm) } } if len(inits) == 0 { util.Warn("No init system detected. Inspeqtor cannot control any services!") } return inits }
func (svc *Service) Reload() error { go func() { util.Debug("Reloading %s", svc.Name()) err := svc.Manager.Reload(svc.Name()) if err != nil { util.Warn(err.Error()) } else { util.DebugDebug("Reloaded %s", svc.Name()) } }() return nil }
func parseJobs(global *inspeqtor.ConfigFile, confDir string) (map[string]*Job, error) { util.Debug("Parsing jobs in " + confDir) files, err := filepath.Glob(confDir + "/jobs.d/*.inq") if err != nil { return nil, err } jobs := map[string]*Job{} for _, filename := range files { util.DebugDebug("Parsing " + filename) data, err := ioutil.ReadFile(filename) if err != nil { return nil, err } s := lexer.NewLexer([]byte(data)) p := parser.NewParser() obj, err := p.Parse(s) if err != nil { util.Warn("Unable to parse " + filename + ": " + err.Error()) continue } astcontent := obj.(*ast.Content) for _, astjob := range astcontent.Jobs { if _, ok := jobs[astjob.Name]; ok { return nil, fmt.Errorf("Duplicate job %s", astjob.Name) } j := New(astjob.Name, astjob.Interval, astcontent.Parameters) owner := j.Parameters["owner"] route := global.AlertRoutes[owner] if owner == "" && route == nil { return nil, fmt.Errorf("No default alert route configured!") } if route == nil { return nil, fmt.Errorf("No such alert route: %s", owner) } alert, err := inspeqtor.Actions["alert"](j, route) if err != nil { return nil, err } j.alerter = alert jobs[astjob.Name] = j } } return jobs, nil }
func sendEmail(e *EmailNotifier, doc bytes.Buffer) error { if strings.Index(e.To, "@example.com") > 0 { util.Warn("Invalid email configured: %s", e.To) util.Warn(string(doc.Bytes())) } else { util.Debug("Sending email to %s", e.To) util.Debug("Sending email:\n%s", string(doc.Bytes())) if e.Username != "" { auth := smtp.PlainAuth("", e.Username, e.Password, e.Host) err := smtp.SendMail(e.Host+":"+e.TLSPort, auth, e.From, []string{e.To}, doc.Bytes()) if err != nil { return err } } else { err := smtp.SendMail(e.Host+":25", nil, e.From, []string{e.To}, doc.Bytes()) if err != nil { return err } } } return nil }
func (svc *Service) Restart() error { svc.Process.Pid = 0 svc.Process.Status = services.Starting go func() { util.Debug("Restarting %s", svc.Name()) err := svc.Manager.Restart(svc.Name()) if err != nil { util.Warn(err.Error()) } else { util.DebugDebug("Restarted %s", svc.Name()) } }() return nil }
func (i *Inspeqtor) acceptCommand() bool { c, err := i.Socket.Accept() if err != nil { select { case <-i.Stopping: // we're stopping or reloading, no big deal... default: util.Warn("%v", err) } return false } defer c.Close() c.SetDeadline(time.Now().Add(2 * time.Second)) reader := bufio.NewReader(c) line, err := reader.ReadString('\n') if err != nil { util.Info("Did not receive command line in time: %s", err.Error()) return true } fields := strings.Fields(line) if len(fields) == 0 { showHelp(i, []string{}, c) return true } funk := CommandHandlers[fields[0]] if funk == nil { util.Warn("Unknown command: %s", strings.TrimSpace(line)) io.WriteString(c, "Unknown command: "+line) return true } funk(i, fields[1:], c) return true }
func main() { inspeqtor.Name = "Inspeqtor Pro" cli.StartupInfo = func() { } cli.SetupLogging() options := cli.ParseArguments() _, err := verifyLicense(options.ConfigDirectory) if err != nil { util.Warn("Error verifying license file: %s", err) os.Exit(127) } ins, err := inspeqtor.New(options.ConfigDirectory, options.SocketPath) if err != nil { log.Fatalln(err) } err = ins.Parse() if err != nil { log.Fatalln(err) } err = bootstrapJobs(ins, options.ConfigDirectory) if err != nil { log.Fatalln(err) } err = bootstrapStatsd(ins, options.ConfigDirectory) if err != nil { log.Fatalln(err) } err = expose.Bootstrap(ins) if err != nil { log.Fatalln(err) } if options.TestConfig { util.Info("Configuration parsed ok.") os.Exit(0) } else if options.TestAlertRoutes { ins.TestAlertRoutes() } else { ins.Start() inspeqtor.HandleSignals() } }
func (i *Inspeqtor) TestAlertRoutes() int { bad := 0 util.Info("Testing alert routes") for _, route := range i.GlobalConfig.AlertRoutes { nm := route.Name if nm == "" { nm = "default" } util.Debug("Creating notification for %s/%s", route.Channel, nm) notifier, err := Actions["alert"](i.Host, route) if err != nil { bad++ util.Warn("Error creating %s/%s route: %s", route.Channel, nm, err.Error()) continue } util.Debug("Triggering notification for %s/%s", route.Channel, nm) err = notifier.Trigger(&Event{RuleFailed, i.Host, i.Host.Rules()[0]}) if err != nil { bad++ util.Warn("Error firing %s/%s route: %s", route.Channel, nm, err.Error()) } } return bad }
func (h *Host) Verify() []*Event { events := []*Event{} for _, r := range h.Rules() { // When running "make real", the race detector will complain // of a race condition here. I believe it's harmless. evt := r.Check(h.CycleTime()) if evt != nil { events = append(events, evt) for _, a := range r.Actions { err := a.Trigger(evt) if err != nil { util.Warn("Error firing event: %s", err.Error()) } } } } return events }
func sendHipchatAlert(url, token string, msg url.Values) error { util.Debug("Sending hipchat alert to %s", url) client := &http.Client{} req, err := http.NewRequest("POST", url, strings.NewReader(msg.Encode())) if err != nil { return err } req.Header.Set("Content-Type", "application/x-www-form-urlencoded") resp, err := client.Do(req) if resp != nil { if resp.StatusCode != 200 { util.Warn("Unable to send hipchat alert: %d", resp.StatusCode) } resp.Body.Close() } return err }
// this method never returns. // // since we can't test this method in an automated fashion, it should // contain as little logic as possible. func (i *Inspeqtor) runLoop() { util.DebugDebug("Resolving services") for _, svc := range i.Services { err := svc.Resolve(i.ServiceManagers) if err != nil { util.Warn(err.Error()) } } i.scanSystem() for { select { case <-time.After(time.Duration(i.GlobalConfig.CycleTime) * time.Second): i.scanSystem() case <-i.Stopping: util.Debug("Shutting down main run loop") return } } }
/* Parses the service-specific rules in /etc/inspeqtor/services.d/*.inq */ func ParseServices(global *ConfigFile, confDir string) ([]Checkable, error) { util.Debug("Parsing config in " + confDir) files, err := filepath.Glob(confDir + "/*.inq") if err != nil { return nil, err } var checks []Checkable for _, filename := range files { util.DebugDebug("Parsing " + filename) data, err := ioutil.ReadFile(filename) if err != nil { return nil, err } s := lexer.NewLexer([]byte(data)) p := parser.NewParser() obj, err := p.Parse(s) if err != nil { util.Warn("Unable to parse " + filename + ": " + err.Error()) continue } switch x := obj.(type) { case *ast.ProcessCheck: svc, err := BuildService(global, x) if err != nil { return nil, err } util.DebugDebug("Service: %+v", *svc) checks = append(checks, svc) default: return nil, fmt.Errorf("Invalid configuration file: %s", filename) } } return checks, nil }
func (svc *Service) Verify() []*Event { events := []*Event{} if svc.Process.Status != services.Up { // we probably shouldn't verify anything that isn't actually Up util.Debug("%s is %s, skipping...", svc.Name(), svc.Process.Status) return events } for _, r := range svc.Rules() { evt := r.Check(svc.CycleTime()) if evt != nil { events = append(events, evt) for _, a := range r.Actions { err := a.Trigger(evt) if err != nil { util.Warn("Error firing event: %s", err.Error()) } } } } return events }
func (rs *nginxSource) runCli() (metrics.Map, error) { sout, err := rs.client(rs.Hostname, rs.Port, rs.Endpoint) if err != nil { return nil, err } if sout[0] != 0x41 { // first char should be 'A' util.Warn(string(sout)) return nil, errors.New("Unknown nginx status output") } values := map[string]float64{} results := digits.FindAllStringSubmatch(string(sout), 7) if results == nil || len(results) != 7 { return nil, errors.New("Unknown nginx input") } for idx, met := range nginxMetrics { if !rs.metrics[met.Name] { continue } val, err := strconv.ParseInt(results[idx][0], 10, 64) if err != nil { return nil, err } values[met.Name] = float64(val) } if len(rs.metrics) > len(values) { for k := range rs.metrics { if _, ok := values[k]; !ok { util.Info("Could not find metric %s(%s), did you spell it right?", rs.Name(), k) } } } return values, nil }
func sendCampfireAlert(url, token string, msg map[string]map[string]string) error { util.Debug("Sending campfire alert to %s", url) client := &http.Client{} jsonMsg, err := json.Marshal(msg) if err != nil { return err } req, err := http.NewRequest("POST", url, bytes.NewReader(jsonMsg)) if err != nil { return err } req.Header.Set("Content-Type", "application/json") req.Header.Set("User-Agent", "Inspeqtor Pro") req.SetBasicAuth(token, "X") resp, err := client.Do(req) if resp != nil { if resp.StatusCode != 201 { util.Warn("Unable to send campfire alert: %d", resp.StatusCode) } resp.Body.Close() } return err }
func (rs *mysqlSource) runStatus(funk executor) (metrics.Map, error) { args := rs.buildArgs() args = append(args, "-e") args = append(args, "show global status") sout, err := funk("mysql", args, nil) lines, err := util.ReadLines(sout) if err != nil { return nil, err } values := map[string]float64{} for _, line := range lines { if line == "" || line[0] == '#' { continue } parts := strings.Fields(line) if rs.metrics[parts[0]] { val, err := strconv.ParseInt(parts[1], 10, 64) if err != nil { return nil, errors.New("Invalid metric input for '" + line + "': " + err.Error()) } values[parts[0]] = float64(val) } } if len(rs.metrics) > len(values) { for k := range rs.metrics { if _, ok := values[k]; !ok { util.Warn("Could not find metric mysql(%s), did you spell it right?", k) } } } return values, nil }
func (hs *hostStorage) collectMemory() error { ok, err := util.FileExists(hs.path + "/meminfo") if err != nil { return err } if ok { contentBytes, err := ioutil.ReadFile(hs.path + "/meminfo") if err != nil { return err } lines := strings.Split(string(contentBytes), "\n") memMetrics := make(map[string]float64) for _, line := range lines { if line == "" { continue } results := meminfoParser.FindStringSubmatch(line) if results == nil { util.Warn("Unknown input: " + line) continue } val, err := strconv.ParseInt(results[2], 10, 64) if err != nil { util.Warn("Unexpected input: " + results[2] + " in " + line) return err } memMetrics[results[1]] = float64(val) } free := memMetrics["SwapFree"] total := memMetrics["SwapTotal"] if free == 0 { hs.Save("swap", "", 100) } else if free == total { hs.Save("swap", "", 0) } else { hs.Save("swap", "", float64(100-int8(100*(float64(free)/float64(total))))) } } else { cmd := exec.Command("sysctl", "-n", "vm.swapusage") cmd.Env = []string{"LANG=C"} sout, err := util.SafeRun(cmd) if err != nil { return err } lines, err := util.ReadLines(sout) if err != nil { return err } rest := lines[0] matches := swapRegexp.FindStringSubmatch(rest) total := matches[1] rest = matches[2] matches = swapRegexp.FindStringSubmatch(rest) used := matches[1] tot, err := strconv.ParseFloat(total[0:len(total)-1], 64) if err != nil { return err } usd, err := strconv.ParseFloat(used[0:len(used)-1], 64) if err != nil { return err } t := normalizeSwap(tot, rune(total[len(total)-1])) u := normalizeSwap(usd, rune(used[len(used)-1])) if t == 0 { hs.Save("swap", "", 100) } else { hs.Save("swap", "", float64(100*(u/t))) } } return nil }
func buildSparkline(target Checkable, metric string, buf func(string, string) displayable) string { family, name := parseMetric(metric) buff := buf(family, name) if buff == nil { return fmt.Sprintf("Unknown metric: %s\n", metric) } sz := buff.Size() values := make([]float64, sz) for i := 0; i > -sz; i-- { v := buff.At(i) if v == nil { util.Warn("BUG: Nil data in ring buffer: %d %d", sz, i) return "Inspeqtor bug, error building graph\n" } values[-i] = *v } // does not work for some reason, SO to the rescue! //sort.Reverse(sort.Float64Slice(values)) for i, j := 0, len(values)-1; i < j; i, j = i+1, j-1 { values[i], values[j] = values[j], values[i] } var min, max, sum, avg float64 min = math.MaxFloat64 for _, val := range values { if min > val { min = val } if max < val { max = val } sum += val } if len(values) > 0 { avg = sum / float64(len(values)) } var resp bytes.Buffer resp.WriteString(fmt.Sprintf("%s %s min %s max %s avg %s\n", target.Name(), metric, buff.Displayable(min), buff.Displayable(max), buff.Displayable(avg))) runes := []string{"▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"} tick := (max - min) / 8 for _, x := range values { diff := int((x - min) / tick) if diff > 7 { diff = 7 } if diff < 0 { diff = 0 } resp.WriteString(runes[diff]) } resp.WriteString("\n") return string(resp.Bytes()) }