Exemple #1
0
func (i *Initd) LookupService(serviceName string) (*ProcessStatus, error) {
	path := i.ctlPath + serviceName
	result, _ := util.FileExists(path)
	if !result {
		// service script does not exist in etc/init.d, not under
		// init.d control
		return nil, &ServiceError{i.Name(), serviceName, ErrServiceNotFound}
	}

	// First try to find the PID file with same name in /var/run.
	paths := []string{
		i.varrunPath + serviceName + ".pid",
		i.varrunPath + serviceName + "/" + serviceName + ".pid",
	}

	for _, pidpath := range paths {
		st, err := i.readPidFile(pidpath)
		if err != nil {
			util.Info("Error processing PID file %s: %s", pidpath, err.Error())
			continue
		} else if st != nil {
			return st, nil
		} else {
			util.Info("No such pidfile %s", pidpath)
		}
	}

	return &ProcessStatus{0, Down}, nil
}
Exemple #2
0
func detectInitd(root string) (InitSystem, error) {
	ctlpath := root + "etc/init.d/"
	result, err := util.FileExists(ctlpath)
	if err != nil {
		return nil, err
	}

	if !result {
		util.Debug("init.d not detected in " + ctlpath)
		return nil, nil
	}

	matches, err := filepath.Glob(ctlpath + "*")
	if err != nil {
		return nil, err
	}

	if !result {
		util.Debug("init.d not detected in " + ctlpath)
		return nil, nil
	}

	if len(matches) > 0 {
		util.Info("Detected init.d in " + ctlpath)
		return &Initd{ctlpath, root + "var/run/", pidForString}, nil
	}

	util.Info(ctlpath + " exists but appears to be empty")
	return nil, nil
}
Exemple #3
0
func (ps *processStorage) AddSource(name string, config map[string]string) (Source, error) {
	for _, x := range ps.daemonSpecific {
		if x.Name() == name {
			return x, nil
		}
	}
	builder := Sources[name]
	if builder == nil {
		return nil, nil
	}
	util.Info("Activating metrics for %s", name)
	src, err := builder(config)
	if err != nil {
		return nil, err
	}

	m, ok := src.(MandatorySource)
	if ok && m.Mandatory() {
		util.Debug("Registering all metrics for %s", name)
		descs := src.ValidMetrics()
		for _, d := range descs {
			if d.MetricType == Counter {
				ps.DeclareCounter(name, d.Name, nil, d.Display)
			} else {
				ps.DeclareGauge(name, d.Name, d.Display)
			}
		}
	}

	ps.daemonSpecific = append(ps.daemonSpecific, src)
	return src, nil
}
Exemple #4
0
func reload(i *Inspeqtor) {
	util.Info(Name + " reloading")
	newi, err := New(i.RootDir, i.SocketPath)
	if err != nil {
		util.Warn("Unable to reload: %s", err.Error())
		return
	}

	err = newi.Parse()
	if err != nil {
		util.Warn("Unable to reload: %s", err.Error())
		return
	}

	// we're reloading and newcopy will become the new
	// singleton.  Pro hooks into this to reload its features too.
	for _, callback := range Reloaders {
		err := callback(i, newi)
		if err != nil {
			util.Warn("Unable to reload: %s", err.Error())
			return
		}
	}

	// TODO proper reloading would not throw away the existing metric data
	// in i but defining new metrics can change the storage tree.  Implement
	// deep metric tree ring buffer sync if possible in basicReloader?
	i.Shutdown()
	newi.Start()
}
Exemple #5
0
func main() {
	cli.SetupLogging()
	options := cli.ParseArguments()

	ins, err := inspeqtor.New(options.ConfigDirectory, options.SocketPath)
	if err != nil {
		log.Fatalln(err)
	}
	err = ins.Parse()
	if err != nil {
		log.Fatalln(err)
	}

	if options.TestConfig {
		util.Info("Configuration parsed ok.")
		os.Exit(0)
	} else if options.TestAlertRoutes {
		ins.TestAlertRoutes()
	} else {
		// Fire up the Inspeqtor singleton
		ins.Start()

		// Install the global signal handlers
		// This method never returns.
		inspeqtor.HandleSignals()
	}
}
Exemple #6
0
func check(jobs map[string]*Job) time.Duration {
	min := time.Hour

	for _, j := range jobs {
		now := time.Now()
		due := j.LastRun.Add(j.Interval)
		if due.After(now) && min > due.Sub(now) {
			// calculate the delay time until the next job check
			min = due.Sub(now)
		}

		if due.Before(now) && j.state == inspeqtor.Ok {
			util.Warn("Recurring job \"%s\" is overdue", j.JobName)
			j.state = inspeqtor.Triggered
			err := j.alert(JobOverdue)
			if err != nil {
				util.Warn(fmt.Sprintf("Error firing cron job alert: %s", err.Error()))
			}
		}
		if !due.Before(now) && j.state == inspeqtor.Triggered {
			util.Info("Recurring job \"%s\" has recovered", j.JobName)
			err := j.alert(JobRan)
			if err != nil {
				util.Warn(fmt.Sprintf("Error firing cron job alert: %s", err.Error()))
			}
			j.state = inspeqtor.Ok
		}
	}
	return min
}
Exemple #7
0
/*
  Resolve each defined service to its managing init system.  Called only
  at startup, this is what maps services to init and fires ProcessDoesNotExist events.
*/
func (svc *Service) Resolve(mgrs []services.InitSystem) error {
	for _, sm := range mgrs {
		// TODO There's a bizarre race condition here. Figure out
		// why this is necessary.  We shouldn't be multi-threaded yet.
		if sm == nil {
			continue
		}

		ps, err := sm.LookupService(svc.Name())
		if err != nil {
			serr := err.(*services.ServiceError)
			if serr.Err == services.ErrServiceNotFound {
				util.Debug(sm.Name() + " doesn't have " + svc.Name())
				continue
			}
			return err
		}
		util.Info("Found %s/%s with status %s", sm.Name(), svc.Name(), ps)
		svc.Manager = sm
		svc.Transition(ps, func(et EventType) {
			counters.Add("events", 1)
			err = svc.EventHandler.Trigger(&Event{et, svc, nil})
			if err != nil {
				util.Warn("Error firing event: %s", err.Error())
			}
		})
		break
	}
	if svc.Manager == nil {
		return fmt.Errorf("Could not find service %s, did you misspell it?", svc.Name())
	}
	return nil
}
Exemple #8
0
func startDeploy(i *Inspeqtor, args []string, resp io.Writer) {
	length := time.Duration(i.GlobalConfig.DeployLength) * time.Second
	i.SilenceUntil = time.Now().Add(length)

	counters.Get("deploy").(*expvar.Int).Set(1)
	util.Info("Starting deploy")
	io.WriteString(resp, "Starting deploy, now silenced\n")
}
Exemple #9
0
func triggeredHandler(rule *Rule, tripped bool) *Event {
	if !tripped {
		util.Info("%s[%s] recovered.", rule.EntityName(), rule.Metric())
		rule.State = Recovered
		return nil
	}
	util.Debug("%s[%s] still triggered. Current: %.1f, Threshold: %.1f", rule.EntityName(), rule.Metric(), rule.CurrentValue, rule.Threshold)
	return nil
}
Exemple #10
0
func finishDeploy(i *Inspeqtor, args []string, resp io.Writer) {
	// silence for a cycle, give processes a little time to
	// settle before alerting again. We don't want a restart
	// during a deploy to send email for those events.
	i.SilenceUntil = time.Now().Add(time.Duration(i.GlobalConfig.CycleTime) * time.Second)

	counters.Get("deploy").(*expvar.Int).Set(0)
	util.Info("Finished deploy")
	io.WriteString(resp, "Finished deploy, volume turned to 11\n")
}
Exemple #11
0
func main() {
	inspeqtor.Name = "Inspeqtor Pro"

	cli.StartupInfo = func() {
	}

	cli.SetupLogging()
	options := cli.ParseArguments()

	_, err := verifyLicense(options.ConfigDirectory)
	if err != nil {
		util.Warn("Error verifying license file: %s", err)
		os.Exit(127)
	}

	ins, err := inspeqtor.New(options.ConfigDirectory, options.SocketPath)
	if err != nil {
		log.Fatalln(err)
	}
	err = ins.Parse()
	if err != nil {
		log.Fatalln(err)
	}

	err = bootstrapJobs(ins, options.ConfigDirectory)
	if err != nil {
		log.Fatalln(err)
	}

	err = bootstrapStatsd(ins, options.ConfigDirectory)
	if err != nil {
		log.Fatalln(err)
	}

	err = expose.Bootstrap(ins)
	if err != nil {
		log.Fatalln(err)
	}

	if options.TestConfig {
		util.Info("Configuration parsed ok.")
		os.Exit(0)
	} else if options.TestAlertRoutes {
		ins.TestAlertRoutes()
	} else {
		ins.Start()
		inspeqtor.HandleSignals()
	}
}
Exemple #12
0
func Parse(global *inspeqtor.ConfigFile, confDir string) (map[string]*Job, error) {
	inspeqtor.CommandHandlers["job_done"] = jobDone

	parsedJobs, err := parseJobs(global, confDir)
	if err != nil {
		return nil, err
	}
	if len(parsedJobs) == 0 {
		return nil, nil
	}
	jobs = parsedJobs

	util.Info("Watching for %d recurring jobs", len(parsedJobs))
	return parsedJobs, nil
}
Exemple #13
0
func ParseGlobal(rootDir string) (*ConfigFile, error) {
	path := rootDir + "/inspeqtor.conf"
	exists, err := util.FileExists(path)
	if err != nil {
		return nil, err
	}

	if exists {
		util.Debug("Parsing " + path)
		data, err := ioutil.ReadFile(path)
		if err != nil {
			return nil, err
		}

		s := lexer.NewLexer([]byte(data))
		p := parser.NewParser()
		obj, err := p.Parse(s)
		if err != nil {
			return nil, err
		}
		ast := obj.(ast.Config)

		config := ConfigFile{Defaults, map[string]*AlertRoute{}}

		config.Variables = ast.Variables
		if val, has := ast.Variables["log_level"]; has {
			util.SetLogLevel(val)
		}
		parseValue(ast, &config.CycleTime, "cycle_time", 15)
		parseValue(ast, &config.DeployLength, "deploy_length", 300)
		parseValue(ast, &config.ExposePort, "expose_port", 4677)

		for _, v := range ast.Routes {
			ar, err := ValidateChannel(v.Name, v.Channel, v.Config)
			if err != nil {
				return nil, err
			}
			if _, ok := config.AlertRoutes[v.Name]; ok {
				return nil, fmt.Errorf("Duplicate alert config for '%s'", v.Name)
			}
			config.AlertRoutes[v.Name] = ar
		}
		return &config, nil
	}

	util.Info("No configuration file found at " + rootDir + "/inspector.conf")
	return &ConfigFile{Defaults, nil}, nil
}
Exemple #14
0
func detectLaunchd(rootDir string) (InitSystem, error) {
	if !util.Darwin() {
		return nil, nil
	}
	util.Info("Detected OSX, using launchd")

	usr, err := user.Current()
	if err != nil {
		return nil, err
	}

	dir := usr.HomeDir
	paths := []string{
		dir + "/Library/LaunchAgents",
		"/Library/LaunchAgents",
		"/Library/LaunchDaemons",
		"/System/Library/LaunchDaemons",
	}
	return &Launchd{paths}, nil
}
Exemple #15
0
func (i *Inspeqtor) Start() {
	util.Debug("Starting command socket")
	err := i.openSocket(i.SocketPath)
	if err != nil {
		util.Warn("Could not create Unix socket: %s", err.Error())
		exit(i)
	}

	go func() {
		for {
			if !i.safelyAccept() {
				util.Debug("Shutting down command socket")
				return
			}
		}
	}()

	// if expose_port is 0, disable the feature altogether
	if i.GlobalConfig.ExposePort != 0 {
		sock, err := net.Listen("tcp", fmt.Sprintf("localhost:%d", i.GlobalConfig.ExposePort))
		if err != nil {
			util.Warn("Could not listen on port %d: %s", i.GlobalConfig.ExposePort, err.Error())
			exit(i)
		}
		i.Expose = sock
		go func() {
			// TODO How do we error handling here?
			util.Info("Expose now available at port %d", i.GlobalConfig.ExposePort)
			err := http.Serve(i.Expose, nil)
			// Don't log an "error" when we shut down normally and close the socket
			if err != nil && !strings.Contains(err.Error(), "use of closed network") {
				util.Warn("HTTP server error: %s", err.Error())
			}
		}()
	}

	util.Debug("Starting main run loop")
	go i.runLoop()

	Singleton = i
}
Exemple #16
0
func statsdReload(_ *inspeqtor.Inspeqtor, newi *inspeqtor.Inspeqtor) error {
	val, ok := newi.GlobalConfig.Variables["statsd_location"]
	if !ok {
		util.Debug("No statsd_location configured, skipping...")
		return nil
	}

	util.Info("Pushing metrics to statsd at %s", val)
	conn, err := statsd.Dial(val)
	if err != nil {
		return err
	}
	newi.Listen("cycleComplete", func(ins *inspeqtor.Inspeqtor) error {
		return statsd.Export(conn, ins)
	})
	newi.Listen("shutdown", func(ins *inspeqtor.Inspeqtor) error {
		return conn.Close()
	})

	return nil
}
Exemple #17
0
func (hs *hostStorage) collectCPU() error {
	ok, err := util.FileExists(hs.path + "/stat")
	if err != nil {
		return err
	}

	if ok {
		contents, err := ioutil.ReadFile(hs.path + "/stat")
		if err != nil {
			return err
		}

		lines := strings.Split(string(contents), "\n")
		line := lines[0]
		fields := strings.Fields(line)

		user, _ := strconv.ParseInt(fields[1], 10, 64)
		nice, _ := strconv.ParseInt(fields[2], 10, 64)
		system, _ := strconv.ParseInt(fields[3], 10, 64)
		iowait, _ := strconv.ParseInt(fields[5], 10, 64)
		irq, _ := strconv.ParseInt(fields[6], 10, 64)
		softIrq, _ := strconv.ParseInt(fields[7], 10, 64)
		steal, _ := strconv.ParseInt(fields[8], 10, 64)
		total := user + nice + system + iowait + irq + softIrq + steal

		// These are the five I can envision writing rules against.
		// Open an issue if you want access to the other values.
		hs.Save("cpu", "", float64(total))
		hs.Save("cpu", "user", float64(user))
		hs.Save("cpu", "system", float64(system))
		hs.Save("cpu", "iowait", float64(iowait))
		hs.Save("cpu", "steal", float64(steal))
	} else {
		// TODO
		util.Info("Cannot collect host CPU metrics, not implemented on this platform")
	}
	return nil
}
Exemple #18
0
func (rs *nginxSource) runCli() (metrics.Map, error) {
	sout, err := rs.client(rs.Hostname, rs.Port, rs.Endpoint)
	if err != nil {
		return nil, err
	}
	if sout[0] != 0x41 { // first char should be 'A'
		util.Warn(string(sout))
		return nil, errors.New("Unknown nginx status output")
	}

	values := map[string]float64{}
	results := digits.FindAllStringSubmatch(string(sout), 7)
	if results == nil || len(results) != 7 {
		return nil, errors.New("Unknown nginx input")
	}

	for idx, met := range nginxMetrics {
		if !rs.metrics[met.Name] {
			continue
		}
		val, err := strconv.ParseInt(results[idx][0], 10, 64)
		if err != nil {
			return nil, err
		}
		values[met.Name] = float64(val)
	}

	if len(rs.metrics) > len(values) {
		for k := range rs.metrics {
			if _, ok := values[k]; !ok {
				util.Info("Could not find metric %s(%s), did you spell it right?", rs.Name(), k)
			}
		}
	}

	return values, nil
}
Exemple #19
0
func (i *Inspeqtor) acceptCommand() bool {
	c, err := i.Socket.Accept()
	if err != nil {
		select {
		case <-i.Stopping:
			// we're stopping or reloading, no big deal...
		default:
			util.Warn("%v", err)
		}
		return false
	}
	defer c.Close()
	c.SetDeadline(time.Now().Add(2 * time.Second))

	reader := bufio.NewReader(c)
	line, err := reader.ReadString('\n')
	if err != nil {
		util.Info("Did not receive command line in time: %s", err.Error())
		return true
	}

	fields := strings.Fields(line)
	if len(fields) == 0 {
		showHelp(i, []string{}, c)
		return true
	}

	funk := CommandHandlers[fields[0]]
	if funk == nil {
		util.Warn("Unknown command: %s", strings.TrimSpace(line))
		io.WriteString(c, "Unknown command: "+line)
		return true
	}

	funk(i, fields[1:], c)
	return true
}
Exemple #20
0
func (i *Inspeqtor) TestAlertRoutes() int {
	bad := 0
	util.Info("Testing alert routes")
	for _, route := range i.GlobalConfig.AlertRoutes {
		nm := route.Name
		if nm == "" {
			nm = "default"
		}
		util.Debug("Creating notification for %s/%s", route.Channel, nm)
		notifier, err := Actions["alert"](i.Host, route)
		if err != nil {
			bad++
			util.Warn("Error creating %s/%s route: %s", route.Channel, nm, err.Error())
			continue
		}
		util.Debug("Triggering notification for %s/%s", route.Channel, nm)
		err = notifier.Trigger(&Event{RuleFailed, i.Host, i.Host.Rules()[0]})
		if err != nil {
			bad++
			util.Warn("Error firing %s/%s route: %s", route.Channel, nm, err.Error())
		}
	}
	return bad
}
Exemple #21
0
func (rs *memcachedSource) runCli(funk executor) (metrics.Map, error) {
	sout, err := funk("nc", []string{rs.Hostname, rs.Port}, []byte("stats\n"))
	if err != nil {
		return nil, err
	}
	lines, err := util.ReadLines(sout)
	if err != nil {
		return nil, err
	}

	values := map[string]float64{}

	for _, line := range lines {
		if line == "" || line[0] != 'S' {
			continue
		}
		parts := strings.Fields(line)
		if rs.metrics[parts[1]] {
			val, err := strconv.ParseFloat(parts[2], 64)
			if err != nil {
				return nil, errors.New("Invalid metric input for '" + line + "': " + err.Error())
			}
			values[parts[1]] = val
		}
	}

	if len(rs.metrics) > len(values) {
		for k := range rs.metrics {
			if _, ok := values[k]; !ok {
				util.Info("Could not find metric %s(%s), did you spell it right?", rs.Name(), k)
			}
		}
	}

	return values, nil
}
Exemple #22
0
func detectUpstart(path string) (InitSystem, error) {
	result, err := util.FileExists(path)
	if err != nil {
		return nil, err
	}

	if !result {
		util.Debug("upstart not detected, no " + path)
		return nil, nil
	}

	matches, err := filepath.Glob(path + "/*.conf")
	if err != nil {
		return nil, err
	}

	if len(matches) > 0 {
		util.Info("Detected upstart in " + path)
		return &Upstart{path, nil}, nil
	}

	util.Debug("upstart not detected, empty " + path)
	return nil, nil
}
Exemple #23
0
func detectSystemd(path string) (InitSystem, error) {
	result, err := util.FileExists(path)
	if err != nil {
		return nil, err
	}

	if !result {
		util.Debug("systemd not detected, no " + path)
		return nil, nil
	}

	matches, err := filepath.Glob(path + "/*.conf")
	if err != nil {
		return nil, err
	}

	if len(matches) > 0 {
		util.Info("Detected systemd in " + path)
		return &Systemd{path, "", ""}, nil
	}

	util.Debug("systemd not detected, empty " + path)
	return nil, nil
}
Exemple #24
0
func exit(i *Inspeqtor) {
	util.Info(Name + " exiting")

	i.Shutdown()
	os.Exit(0)
}
Exemple #25
0
/*
  Called for each service each cycle, in parallel.  This
  method must be thread-safe.  Since this method executes
  in a goroutine, errors must be handled/logged here and
  not just returned.

  Each cycle we need to:
  1. verify service is Up and running.
  2. capture process metrics
  3. run rules
  4. trigger any necessary actions
*/
func (svc *Service) Collect(silenced bool, completeCallback func(Checkable)) {
	defer completeCallback(svc)

	if svc.Manager == nil {
		// Couldn't resolve it when we started up so we can't collect it.
		return
	}
	if svc.Process.Status != services.Up {
		status, err := svc.Manager.LookupService(svc.Name())
		if err != nil {
			util.Warn("%s", err)
		} else {
			svc.Transition(status, func(et EventType) {
				if !silenced {
					counters.Add("events", 1)
					err = svc.EventHandler.Trigger(&Event{et, svc, nil})
					if err != nil {
						util.Warn("Error firing event: %s", err.Error())
					}
				}
			})
		}
	}

	if svc.Process.Status == services.Up {
		merr := svc.Metrics().Collect(svc.Process.Pid)
		if merr != nil {
			err := syscall.Kill(svc.Process.Pid, syscall.Signal(0))
			if err != nil {
				// Process disappeared in the last cycle, mark it as Down.
				util.Info("Service %s with process %d does not exist: %s", svc.Name(), svc.Process.Pid, err)
				svc.Transition(services.WithStatus(0, services.Down), func(et EventType) {
					if !silenced {
						counters.Add("events", 1)
						err = svc.EventHandler.Trigger(&Event{et, svc, nil})
						if err != nil {
							util.Warn("Error firing event: %s", err.Error())
						}
					}
				})

				// Immediately try to find the replacement PID so we don't have
				// to wait for another cycle to mark it as Up.
				status, err := svc.Manager.LookupService(svc.Name())
				if err != nil {
					util.Warn("%s", err)
				} else {
					svc.Transition(status, func(et EventType) {
						if !silenced {
							counters.Add("events", 1)
							err = svc.EventHandler.Trigger(&Event{et, svc, nil})
							if err != nil {
								util.Warn("Error firing event: %s", err.Error())
							}
						}
					})
				}

			} else {
				util.Warn("Error capturing metrics for process %d: %s", svc.Process.Pid, merr)
			}
		}
	}
}