Beispiel #1
0
func check(jobs map[string]*Job) time.Duration {
	min := time.Hour

	for _, j := range jobs {
		now := time.Now()
		due := j.LastRun.Add(j.Interval)
		if due.After(now) && min > due.Sub(now) {
			// calculate the delay time until the next job check
			min = due.Sub(now)
		}

		if due.Before(now) && j.state == inspeqtor.Ok {
			util.Warn("Recurring job \"%s\" is overdue", j.JobName)
			j.state = inspeqtor.Triggered
			err := j.alert(JobOverdue)
			if err != nil {
				util.Warn(fmt.Sprintf("Error firing cron job alert: %s", err.Error()))
			}
		}
		if !due.Before(now) && j.state == inspeqtor.Triggered {
			util.Info("Recurring job \"%s\" has recovered", j.JobName)
			err := j.alert(JobRan)
			if err != nil {
				util.Warn(fmt.Sprintf("Error firing cron job alert: %s", err.Error()))
			}
			j.state = inspeqtor.Ok
		}
	}
	return min
}
Beispiel #2
0
func reload(i *Inspeqtor) {
	util.Info(Name + " reloading")
	newi, err := New(i.RootDir, i.SocketPath)
	if err != nil {
		util.Warn("Unable to reload: %s", err.Error())
		return
	}

	err = newi.Parse()
	if err != nil {
		util.Warn("Unable to reload: %s", err.Error())
		return
	}

	// we're reloading and newcopy will become the new
	// singleton.  Pro hooks into this to reload its features too.
	for _, callback := range Reloaders {
		err := callback(i, newi)
		if err != nil {
			util.Warn("Unable to reload: %s", err.Error())
			return
		}
	}

	// TODO proper reloading would not throw away the existing metric data
	// in i but defining new metrics can change the storage tree.  Implement
	// deep metric tree ring buffer sync if possible in basicReloader?
	i.Shutdown()
	newi.Start()
}
Beispiel #3
0
func (rs *redisSource) runCli(funk executor) (metrics.Map, error) {
	sout, err := funk("redis-cli", rs.buildArgs(), nil)
	lines, err := util.ReadLines(sout)
	if err != nil {
		return nil, err
	}

	values := map[string]float64{}

	for _, line := range lines {
		if line == "" || line[0] == '#' {
			continue
		}
		parts := strings.Split(line, ":")
		if rs.metrics[parts[0]] {
			val, err := strconv.ParseInt(parts[1], 10, 64)
			if err != nil {
				return nil, errors.New("Invalid metric input for '" + line + "': " + err.Error())
			}
			values[parts[0]] = float64(val)
		}
	}

	if len(rs.metrics) > len(values) {
		for k := range rs.metrics {
			if _, ok := values[k]; !ok {
				util.Warn("Could not find metric redis(%s), did you spell it right?", k)
			}
		}
	}

	return values, nil
}
Beispiel #4
0
func (h *Host) Collect(silenced bool, completeCallback func(Checkable)) {
	defer completeCallback(h)
	err := h.Metrics().Collect(0)
	if err != nil {
		util.Warn("Error collecting host metrics: %s", err.Error())
	}
}
Beispiel #5
0
/*
  Resolve each defined service to its managing init system.  Called only
  at startup, this is what maps services to init and fires ProcessDoesNotExist events.
*/
func (svc *Service) Resolve(mgrs []services.InitSystem) error {
	for _, sm := range mgrs {
		// TODO There's a bizarre race condition here. Figure out
		// why this is necessary.  We shouldn't be multi-threaded yet.
		if sm == nil {
			continue
		}

		ps, err := sm.LookupService(svc.Name())
		if err != nil {
			serr := err.(*services.ServiceError)
			if serr.Err == services.ErrServiceNotFound {
				util.Debug(sm.Name() + " doesn't have " + svc.Name())
				continue
			}
			return err
		}
		util.Info("Found %s/%s with status %s", sm.Name(), svc.Name(), ps)
		svc.Manager = sm
		svc.Transition(ps, func(et EventType) {
			counters.Add("events", 1)
			err = svc.EventHandler.Trigger(&Event{et, svc, nil})
			if err != nil {
				util.Warn("Error firing event: %s", err.Error())
			}
		})
		break
	}
	if svc.Manager == nil {
		return fmt.Errorf("Could not find service %s, did you misspell it?", svc.Name())
	}
	return nil
}
Beispiel #6
0
func recoveredHandler(rule *Rule, tripped bool) *Event {
	if tripped && rule.TrippedCount == rule.CycleCount {
		util.Warn("%s[%s] flapped.  Current value = %.1f", rule.EntityName(), rule.Metric(), rule.CurrentValue)
		rule.State = Triggered
		return nil
	}
	rule.State = Ok
	return &Event{RuleRecovered, rule.Entity, rule}
}
Beispiel #7
0
func (i *Inspeqtor) safelyAccept() bool {
	defer func() {
		if err := recover(); err != nil {
			// TODO Is there a way to print out the backtrace of the goroutine where it crashed?
			util.Warn("Command crashed:\n%s", err)
		}
	}()

	return i.acceptCommand()
}
Beispiel #8
0
func (store *storage) Get(family string, name string) float64 {
	metric, _ := store.find(family, name)
	if metric == nil {
		// This can happen when using an Inspeqtor Pro .inq file
		// with Inspeqtor, if the metric only exists in Pro (e.g. memstats)
		util.Warn("BUG: Metric %s:%s does not exist", family, name)
		return 0
	}
	return metric.Get()
}
Beispiel #9
0
func parseValue(ast ast.Config, store *uint, name string, def uint) {
	if val, has := ast.Variables[name]; has {
		ival, err := strconv.ParseUint(val, 10, 32)
		if err != nil {
			util.Warn("Invalid %s: %d", name, val)
			ival = uint64(def)
		}
		*store = uint(ival)
	}
}
Beispiel #10
0
func okHandler(rule *Rule, tripped bool) *Event {
	if tripped && rule.TrippedCount == rule.CycleCount {
		util.Warn("%s[%s] triggered.  Current value = %.1f", rule.EntityName(), rule.Metric(), rule.CurrentValue)
		rule.State = Triggered
		return &Event{RuleFailed, rule.Entity, rule}
	}
	if tripped {
		util.Debug("%s[%s] tripped. Current: %.1f, Threshold: %.1f", rule.EntityName(), rule.Metric(), rule.CurrentValue, rule.Threshold)
	}
	return nil
}
Beispiel #11
0
func (i *Inspeqtor) Shutdown() {
	close(i.Stopping)

	if i.Socket != nil {
		err := i.Socket.Close()
		if err != nil {
			util.Warn(err.Error())
		}
	}
	if i.Expose != nil {
		err := i.Expose.Close()
		if err != nil {
			util.Warn(err.Error())
		}
	}
	i.Fire("shutdown")

	// let other goroutines log their exit
	time.Sleep(time.Millisecond)
}
Beispiel #12
0
func (i *Inspeqtor) Start() {
	util.Debug("Starting command socket")
	err := i.openSocket(i.SocketPath)
	if err != nil {
		util.Warn("Could not create Unix socket: %s", err.Error())
		exit(i)
	}

	go func() {
		for {
			if !i.safelyAccept() {
				util.Debug("Shutting down command socket")
				return
			}
		}
	}()

	// if expose_port is 0, disable the feature altogether
	if i.GlobalConfig.ExposePort != 0 {
		sock, err := net.Listen("tcp", fmt.Sprintf("localhost:%d", i.GlobalConfig.ExposePort))
		if err != nil {
			util.Warn("Could not listen on port %d: %s", i.GlobalConfig.ExposePort, err.Error())
			exit(i)
		}
		i.Expose = sock
		go func() {
			// TODO How do we error handling here?
			util.Info("Expose now available at port %d", i.GlobalConfig.ExposePort)
			err := http.Serve(i.Expose, nil)
			// Don't log an "error" when we shut down normally and close the socket
			if err != nil && !strings.Contains(err.Error(), "use of closed network") {
				util.Warn("HTTP server error: %s", err.Error())
			}
		}()
	}

	util.Debug("Starting main run loop")
	go i.runLoop()

	Singleton = i
}
Beispiel #13
0
func Detect() []InitSystem {
	var inits []InitSystem

	for name, funk := range SupportedInits {
		sm, err := funk()
		if err != nil {
			util.Warn("Couldn't detect %s: %s", name, err.Error())
			continue
		}

		if sm != nil {
			inits = append(inits, sm)
		}
	}

	if len(inits) == 0 {
		util.Warn("No init system detected.  Inspeqtor cannot control any services!")
	}

	return inits
}
Beispiel #14
0
func (svc *Service) Reload() error {
	go func() {
		util.Debug("Reloading %s", svc.Name())
		err := svc.Manager.Reload(svc.Name())
		if err != nil {
			util.Warn(err.Error())
		} else {
			util.DebugDebug("Reloaded %s", svc.Name())
		}
	}()
	return nil
}
Beispiel #15
0
func parseJobs(global *inspeqtor.ConfigFile, confDir string) (map[string]*Job, error) {
	util.Debug("Parsing jobs in " + confDir)
	files, err := filepath.Glob(confDir + "/jobs.d/*.inq")
	if err != nil {
		return nil, err
	}

	jobs := map[string]*Job{}

	for _, filename := range files {
		util.DebugDebug("Parsing " + filename)
		data, err := ioutil.ReadFile(filename)
		if err != nil {
			return nil, err
		}

		s := lexer.NewLexer([]byte(data))
		p := parser.NewParser()
		obj, err := p.Parse(s)
		if err != nil {
			util.Warn("Unable to parse " + filename + ": " + err.Error())
			continue
		}

		astcontent := obj.(*ast.Content)
		for _, astjob := range astcontent.Jobs {
			if _, ok := jobs[astjob.Name]; ok {
				return nil, fmt.Errorf("Duplicate job %s", astjob.Name)
			}

			j := New(astjob.Name, astjob.Interval, astcontent.Parameters)

			owner := j.Parameters["owner"]
			route := global.AlertRoutes[owner]
			if owner == "" && route == nil {
				return nil, fmt.Errorf("No default alert route configured!")
			}
			if route == nil {
				return nil, fmt.Errorf("No such alert route: %s", owner)
			}
			alert, err := inspeqtor.Actions["alert"](j, route)
			if err != nil {
				return nil, err
			}
			j.alerter = alert
			jobs[astjob.Name] = j
		}
	}

	return jobs, nil
}
Beispiel #16
0
func sendEmail(e *EmailNotifier, doc bytes.Buffer) error {
	if strings.Index(e.To, "@example.com") > 0 {
		util.Warn("Invalid email configured: %s", e.To)
		util.Warn(string(doc.Bytes()))
	} else {
		util.Debug("Sending email to %s", e.To)
		util.Debug("Sending email:\n%s", string(doc.Bytes()))
		if e.Username != "" {
			auth := smtp.PlainAuth("", e.Username, e.Password, e.Host)
			err := smtp.SendMail(e.Host+":"+e.TLSPort, auth, e.From,
				[]string{e.To}, doc.Bytes())
			if err != nil {
				return err
			}
		} else {
			err := smtp.SendMail(e.Host+":25", nil, e.From, []string{e.To}, doc.Bytes())
			if err != nil {
				return err
			}
		}
	}
	return nil
}
Beispiel #17
0
func (svc *Service) Restart() error {
	svc.Process.Pid = 0
	svc.Process.Status = services.Starting
	go func() {
		util.Debug("Restarting %s", svc.Name())
		err := svc.Manager.Restart(svc.Name())
		if err != nil {
			util.Warn(err.Error())
		} else {
			util.DebugDebug("Restarted %s", svc.Name())
		}
	}()
	return nil
}
Beispiel #18
0
func (i *Inspeqtor) acceptCommand() bool {
	c, err := i.Socket.Accept()
	if err != nil {
		select {
		case <-i.Stopping:
			// we're stopping or reloading, no big deal...
		default:
			util.Warn("%v", err)
		}
		return false
	}
	defer c.Close()
	c.SetDeadline(time.Now().Add(2 * time.Second))

	reader := bufio.NewReader(c)
	line, err := reader.ReadString('\n')
	if err != nil {
		util.Info("Did not receive command line in time: %s", err.Error())
		return true
	}

	fields := strings.Fields(line)
	if len(fields) == 0 {
		showHelp(i, []string{}, c)
		return true
	}

	funk := CommandHandlers[fields[0]]
	if funk == nil {
		util.Warn("Unknown command: %s", strings.TrimSpace(line))
		io.WriteString(c, "Unknown command: "+line)
		return true
	}

	funk(i, fields[1:], c)
	return true
}
Beispiel #19
0
func main() {
	inspeqtor.Name = "Inspeqtor Pro"

	cli.StartupInfo = func() {
	}

	cli.SetupLogging()
	options := cli.ParseArguments()

	_, err := verifyLicense(options.ConfigDirectory)
	if err != nil {
		util.Warn("Error verifying license file: %s", err)
		os.Exit(127)
	}

	ins, err := inspeqtor.New(options.ConfigDirectory, options.SocketPath)
	if err != nil {
		log.Fatalln(err)
	}
	err = ins.Parse()
	if err != nil {
		log.Fatalln(err)
	}

	err = bootstrapJobs(ins, options.ConfigDirectory)
	if err != nil {
		log.Fatalln(err)
	}

	err = bootstrapStatsd(ins, options.ConfigDirectory)
	if err != nil {
		log.Fatalln(err)
	}

	err = expose.Bootstrap(ins)
	if err != nil {
		log.Fatalln(err)
	}

	if options.TestConfig {
		util.Info("Configuration parsed ok.")
		os.Exit(0)
	} else if options.TestAlertRoutes {
		ins.TestAlertRoutes()
	} else {
		ins.Start()
		inspeqtor.HandleSignals()
	}
}
Beispiel #20
0
func (i *Inspeqtor) TestAlertRoutes() int {
	bad := 0
	util.Info("Testing alert routes")
	for _, route := range i.GlobalConfig.AlertRoutes {
		nm := route.Name
		if nm == "" {
			nm = "default"
		}
		util.Debug("Creating notification for %s/%s", route.Channel, nm)
		notifier, err := Actions["alert"](i.Host, route)
		if err != nil {
			bad++
			util.Warn("Error creating %s/%s route: %s", route.Channel, nm, err.Error())
			continue
		}
		util.Debug("Triggering notification for %s/%s", route.Channel, nm)
		err = notifier.Trigger(&Event{RuleFailed, i.Host, i.Host.Rules()[0]})
		if err != nil {
			bad++
			util.Warn("Error firing %s/%s route: %s", route.Channel, nm, err.Error())
		}
	}
	return bad
}
Beispiel #21
0
func (h *Host) Verify() []*Event {
	events := []*Event{}
	for _, r := range h.Rules() {
		// When running "make real", the race detector will complain
		// of a race condition here.  I believe it's harmless.
		evt := r.Check(h.CycleTime())
		if evt != nil {
			events = append(events, evt)
			for _, a := range r.Actions {
				err := a.Trigger(evt)
				if err != nil {
					util.Warn("Error firing event: %s", err.Error())
				}
			}
		}
	}
	return events
}
Beispiel #22
0
func sendHipchatAlert(url, token string, msg url.Values) error {
	util.Debug("Sending hipchat alert to %s", url)

	client := &http.Client{}
	req, err := http.NewRequest("POST", url, strings.NewReader(msg.Encode()))
	if err != nil {
		return err
	}
	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
	resp, err := client.Do(req)
	if resp != nil {
		if resp.StatusCode != 200 {
			util.Warn("Unable to send hipchat alert: %d", resp.StatusCode)
		}
		resp.Body.Close()
	}
	return err
}
Beispiel #23
0
// this method never returns.
//
// since we can't test this method in an automated fashion, it should
// contain as little logic as possible.
func (i *Inspeqtor) runLoop() {
	util.DebugDebug("Resolving services")
	for _, svc := range i.Services {
		err := svc.Resolve(i.ServiceManagers)
		if err != nil {
			util.Warn(err.Error())
		}
	}

	i.scanSystem()

	for {
		select {
		case <-time.After(time.Duration(i.GlobalConfig.CycleTime) * time.Second):
			i.scanSystem()
		case <-i.Stopping:
			util.Debug("Shutting down main run loop")
			return
		}
	}
}
Beispiel #24
0
/*
Parses the service-specific rules in /etc/inspeqtor/services.d/*.inq
*/
func ParseServices(global *ConfigFile, confDir string) ([]Checkable, error) {
	util.Debug("Parsing config in " + confDir)
	files, err := filepath.Glob(confDir + "/*.inq")
	if err != nil {
		return nil, err
	}

	var checks []Checkable

	for _, filename := range files {
		util.DebugDebug("Parsing " + filename)
		data, err := ioutil.ReadFile(filename)
		if err != nil {
			return nil, err
		}

		s := lexer.NewLexer([]byte(data))
		p := parser.NewParser()
		obj, err := p.Parse(s)
		if err != nil {
			util.Warn("Unable to parse " + filename + ": " + err.Error())
			continue
		}

		switch x := obj.(type) {
		case *ast.ProcessCheck:
			svc, err := BuildService(global, x)
			if err != nil {
				return nil, err
			}
			util.DebugDebug("Service: %+v", *svc)
			checks = append(checks, svc)
		default:
			return nil, fmt.Errorf("Invalid configuration file: %s", filename)
		}
	}

	return checks, nil
}
Beispiel #25
0
func (svc *Service) Verify() []*Event {
	events := []*Event{}

	if svc.Process.Status != services.Up {
		// we probably shouldn't verify anything that isn't actually Up
		util.Debug("%s is %s, skipping...", svc.Name(), svc.Process.Status)
		return events
	}

	for _, r := range svc.Rules() {
		evt := r.Check(svc.CycleTime())
		if evt != nil {
			events = append(events, evt)
			for _, a := range r.Actions {
				err := a.Trigger(evt)
				if err != nil {
					util.Warn("Error firing event: %s", err.Error())
				}
			}
		}
	}
	return events
}
Beispiel #26
0
func (rs *nginxSource) runCli() (metrics.Map, error) {
	sout, err := rs.client(rs.Hostname, rs.Port, rs.Endpoint)
	if err != nil {
		return nil, err
	}
	if sout[0] != 0x41 { // first char should be 'A'
		util.Warn(string(sout))
		return nil, errors.New("Unknown nginx status output")
	}

	values := map[string]float64{}
	results := digits.FindAllStringSubmatch(string(sout), 7)
	if results == nil || len(results) != 7 {
		return nil, errors.New("Unknown nginx input")
	}

	for idx, met := range nginxMetrics {
		if !rs.metrics[met.Name] {
			continue
		}
		val, err := strconv.ParseInt(results[idx][0], 10, 64)
		if err != nil {
			return nil, err
		}
		values[met.Name] = float64(val)
	}

	if len(rs.metrics) > len(values) {
		for k := range rs.metrics {
			if _, ok := values[k]; !ok {
				util.Info("Could not find metric %s(%s), did you spell it right?", rs.Name(), k)
			}
		}
	}

	return values, nil
}
Beispiel #27
0
func sendCampfireAlert(url, token string, msg map[string]map[string]string) error {
	util.Debug("Sending campfire alert to %s", url)

	client := &http.Client{}
	jsonMsg, err := json.Marshal(msg)
	if err != nil {
		return err
	}
	req, err := http.NewRequest("POST", url, bytes.NewReader(jsonMsg))
	if err != nil {
		return err
	}
	req.Header.Set("Content-Type", "application/json")
	req.Header.Set("User-Agent", "Inspeqtor Pro")
	req.SetBasicAuth(token, "X")
	resp, err := client.Do(req)
	if resp != nil {
		if resp.StatusCode != 201 {
			util.Warn("Unable to send campfire alert: %d", resp.StatusCode)
		}
		resp.Body.Close()
	}
	return err
}
Beispiel #28
0
func (rs *mysqlSource) runStatus(funk executor) (metrics.Map, error) {
	args := rs.buildArgs()
	args = append(args, "-e")
	args = append(args, "show global status")
	sout, err := funk("mysql", args, nil)
	lines, err := util.ReadLines(sout)
	if err != nil {
		return nil, err
	}

	values := map[string]float64{}

	for _, line := range lines {
		if line == "" || line[0] == '#' {
			continue
		}
		parts := strings.Fields(line)
		if rs.metrics[parts[0]] {
			val, err := strconv.ParseInt(parts[1], 10, 64)
			if err != nil {
				return nil, errors.New("Invalid metric input for '" + line + "': " + err.Error())
			}
			values[parts[0]] = float64(val)
		}
	}

	if len(rs.metrics) > len(values) {
		for k := range rs.metrics {
			if _, ok := values[k]; !ok {
				util.Warn("Could not find metric mysql(%s), did you spell it right?", k)
			}
		}
	}

	return values, nil
}
Beispiel #29
0
func (hs *hostStorage) collectMemory() error {
	ok, err := util.FileExists(hs.path + "/meminfo")
	if err != nil {
		return err
	}

	if ok {
		contentBytes, err := ioutil.ReadFile(hs.path + "/meminfo")
		if err != nil {
			return err
		}
		lines := strings.Split(string(contentBytes), "\n")

		memMetrics := make(map[string]float64)
		for _, line := range lines {
			if line == "" {
				continue
			}

			results := meminfoParser.FindStringSubmatch(line)
			if results == nil {
				util.Warn("Unknown input: " + line)
				continue
			}
			val, err := strconv.ParseInt(results[2], 10, 64)
			if err != nil {
				util.Warn("Unexpected input: " + results[2] + " in " + line)
				return err
			}
			memMetrics[results[1]] = float64(val)
		}

		free := memMetrics["SwapFree"]
		total := memMetrics["SwapTotal"]
		if free == 0 {
			hs.Save("swap", "", 100)
		} else if free == total {
			hs.Save("swap", "", 0)
		} else {
			hs.Save("swap", "", float64(100-int8(100*(float64(free)/float64(total)))))
		}
	} else {
		cmd := exec.Command("sysctl", "-n", "vm.swapusage")
		cmd.Env = []string{"LANG=C"}
		sout, err := util.SafeRun(cmd)
		if err != nil {
			return err
		}
		lines, err := util.ReadLines(sout)
		if err != nil {
			return err
		}

		rest := lines[0]
		matches := swapRegexp.FindStringSubmatch(rest)
		total := matches[1]
		rest = matches[2]

		matches = swapRegexp.FindStringSubmatch(rest)
		used := matches[1]

		tot, err := strconv.ParseFloat(total[0:len(total)-1], 64)
		if err != nil {
			return err
		}
		usd, err := strconv.ParseFloat(used[0:len(used)-1], 64)
		if err != nil {
			return err
		}

		t := normalizeSwap(tot, rune(total[len(total)-1]))
		u := normalizeSwap(usd, rune(used[len(used)-1]))
		if t == 0 {
			hs.Save("swap", "", 100)
		} else {
			hs.Save("swap", "", float64(100*(u/t)))
		}
	}

	return nil
}
Beispiel #30
0
func buildSparkline(target Checkable, metric string, buf func(string, string) displayable) string {
	family, name := parseMetric(metric)

	buff := buf(family, name)
	if buff == nil {
		return fmt.Sprintf("Unknown metric: %s\n", metric)
	}

	sz := buff.Size()
	values := make([]float64, sz)

	for i := 0; i > -sz; i-- {
		v := buff.At(i)
		if v == nil {
			util.Warn("BUG: Nil data in ring buffer: %d %d", sz, i)
			return "Inspeqtor bug, error building graph\n"
		}
		values[-i] = *v
	}

	// does not work for some reason, SO to the rescue!
	//sort.Reverse(sort.Float64Slice(values))
	for i, j := 0, len(values)-1; i < j; i, j = i+1, j-1 {
		values[i], values[j] = values[j], values[i]
	}

	var min, max, sum, avg float64
	min = math.MaxFloat64

	for _, val := range values {
		if min > val {
			min = val
		}
		if max < val {
			max = val
		}
		sum += val
	}
	if len(values) > 0 {
		avg = sum / float64(len(values))
	}

	var resp bytes.Buffer

	resp.WriteString(fmt.Sprintf("%s %s min %s max %s avg %s\n",
		target.Name(),
		metric,
		buff.Displayable(min),
		buff.Displayable(max),
		buff.Displayable(avg)))

	runes := []string{"▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"}
	tick := (max - min) / 8

	for _, x := range values {
		diff := int((x - min) / tick)
		if diff > 7 {
			diff = 7
		}
		if diff < 0 {
			diff = 0
		}

		resp.WriteString(runes[diff])
	}

	resp.WriteString("\n")
	return string(resp.Bytes())
}