func executeNecessityCheck(wg *sync.WaitGroup, cmd buddha.Command, check buddha.Check, done chan bool, fail chan error) { defer wg.Done() for i := 1; true; i++ { log.Println(log.LevelInfo, "Check %s: checking...", check.String()) err := check.Execute(cmd.Timeout.Duration()) if err != nil { switch e := err.(type) { case buddha.CheckFalse: log.Println(log.LevelInfo, "Check %s: deemed job unnecessary: %s", check.String(), e) done <- false return default: // unexpected failure log.Println(log.LevelInfo, "Check %d/%d: %s: returned error: %s", i, cmd.Failures, check.String(), e) if i < cmd.Failures { log.Println(log.LevelInfo, "Check %d/%d: %s: waiting %s...", i, cmd.Failures, check.String(), cmd.Interval) time.Sleep(cmd.Interval.Duration()) } else { fail <- err return } } } else { log.Println(log.LevelInfo, "Check %s: deemed job necessary", check.String()) done <- true return } } }
func main() { var jobs buddha.Jobs var err error if *ConfigFile != "" { // load manual job configuration file jobs, err = buddha.OpenFile(*ConfigFile) if err != nil { log.Println(log.LevelFail, "fatal: could not read config file %s", *ConfigFile) } } else if *ConfigStdin { // load job configuration from stdin jobs, err = buddha.Open(os.Stdin) if err != nil { log.Println(log.LevelFail, "fatal: could not read config from STDIN") } } else { jobs, err = buddha.OpenDir(*ConfigDir) if err != nil { log.Println(log.LevelFail, "fatal: could not read config directory %s", *ConfigDir) } } if err != nil { log.Println(log.LevelFail, "fatal: %s", err) os.Exit(2) return } // exit with status code of run os.Exit(run(jobs)) }
func (c CheckTCP) Execute(timeout time.Duration) error { conn, err := net.DialTimeout("tcp", c.Addr, timeout) if err != nil { log.Println(log.LevelInfo, "TCP connection failed: %s", err) return CheckFalse(fmt.Sprintf("TCP connection failed: %s", err)) } defer conn.Close() return nil }
func run(jobs buddha.Jobs) int { lock, err := flock.Lock(*LockPath) if err != nil { if err == flock.ErrLocked { log.Println(log.LevelFail, "fatal: another instance of buddha is running") return 2 } log.Println(log.LevelFail, "fatal: could not obtain exclusive lock at %s", *LockPath) log.Println(log.LevelFail, "fatal: %s", err) return 1 } defer lock.Close() // sort jobs by name sort.Sort(jobs) jobsToRun := flag.Args() if len(jobsToRun) == 0 { log.Println(log.LevelFail, "please specify job names, or 'all' to run all") return 2 } // if not running all jobs, filter job list if jobsToRun[0] != "all" { var missing []string jobs, missing = jobs.Select(jobsToRun) if len(missing) > 0 { log.Println(log.LevelInfo, "info: missing jobs %v", missing) } } // perform sanity checks against jobs for i := 0; i < len(jobs); i++ { if jobs[i].Root && (os.Getuid() != 0) { log.Println(log.LevelFail, "fatal: job %s requires root privileges", jobs[i].Name) return 1 } } // execute jobs for i := 0; i < len(jobs); i++ { err := runJob(jobs[i]) if err != nil { log.Println(log.LevelFail, "fatal: job %s failed with unexpected error: %s", jobs[i].Name, err) return 1 } } return 0 }
// pipe exec stdout to log func execStdout(line string) { log.Println(log.LevelInfo, line) }
func runJob(job *buddha.Job) error { log.Println(log.LevelPrim, "Job: %s", job.Name) for _, cmd := range job.Commands { log.Println(log.LevelPrim, "Command: %s", cmd.Name) log.Println(log.LevelScnd, "Executing necessity checks") isNecessaryResults, err := executeChecks(cmd, cmd.Necessity, executeNecessityCheck) if err != nil { log.Println(log.LevelFail, "fatal: unexpected error from necessity check, ending run") return err } if allFalse(isNecessaryResults) { switch *OnUnnecessary { case ContinueBehaviour: log.Println(log.LevelFail, "warning: job unnecessary, continuing anyway") default: log.Println(log.LevelInfo, "Job deemed unnecessary, skipping") continue } } // execute before health checks // these will execute once and depending on --on-before-fail skip this job log.Println(log.LevelScnd, "Executing before checks") checksResults, err := executeChecks(cmd, cmd.Before, executeHealthCheck) if err != nil { log.Println(log.LevelFail, "fatal: unexpected error from before check, ending run") return err } if anyFalse(checksResults) { switch *OnBeforeFail { case StopBehaviour: log.Println(log.LevelFail, "fatal: before returned false, ending run") return nil case ContinueBehaviour: log.Println(log.LevelFail, "warning: before returned false, continuing anyway") default: log.Println(log.LevelFail, "warning: before returned false, skipping job") continue } } // execute command log.Println(log.LevelScnd, "Executing Command: %s %s", cmd.Path, strings.Join(cmd.Args, " ")) cmd.Stdout = execStdout err = cmd.Execute() if err != nil { log.Println(log.LevelFail, "fatal: %s", err) return err } // grace period between executing command and executing health checks/next command log.Println(log.LevelInfo, "Waiting %s grace...", cmd.Grace) time.Sleep(cmd.Grace.Duration()) // execute after health checks log.Println(log.LevelScnd, "Executing after checks") checksResults, err = executeChecks(cmd, cmd.After, executeHealthCheck) if err != nil { log.Println(log.LevelFail, "fatal: unexpected error from after check, ending run. err: %s", err) return err } if anyFalse(checksResults) { if *OnAfterFail == ContinueBehaviour { log.Println(log.LevelFail, "warning: after checks failed, continuing anyway") continue } log.Println(log.LevelFail, "fatal: after checks failed, ending run") return err } } return nil }