func main() { if *debugMode { log.SetLevel(log.DebugLevel) } else if *noWarn { log.SetLevel(log.ErrorLevel) } // Create channel for ProcessStats to trigger a sample psChan := make(chan agents.ProcessStatCommand) // Incoming remote command channel (new with each reconnect) rcChan := make(chan chan agents.RemoteControlCommand) args := flag.Args() var cmdArgs []string var cmd string if len(args) > 0 { cmd = args[0] } else { log.Fatal("Did you forget a command to run?") return } log.Debugf("cmd: %s cmdArgs: %q\n", cmd, cmdArgs) done := make(chan error) // Initialize job job, err := agents.NewControlledProcess(cmd, args, done, *stdoutByteLimit) if err != nil { log.Fatalf("Failed to create a NewControlledProcess: %s\n", err) return } log.Debugf("%#v\n", job) signals := make(chan os.Signal, 1) signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT) timer, err := agents.NewTimer(*wallclockTimeout) if err != nil { log.Warnln("Error returned creating timeout agent", err) } log.Debugf("Starting timer with timeout of: %v\n", *wallclockTimeout) timer.Start() sess := session{ job: job, exchange: *exchange, rcRoutingKey: *rmtKey, psChan: psChan, rcChan: rcChan, amqpConfig: amqp.Config{ Properties: amqp.Table{ "product": "proc_box", "version": "master", }, }, multiRmtKey: *multiRmtKey, } go redial(sess) go monitor(signals, rcChan, job, psChan, timer, done) err = <-done elapsedTime, _ := timer.ElapsedTime() // Print to standard out fmt.Printf("Task elapsed time: %.2f seconds.\n", elapsedTime.Seconds()) if err != nil { if exiterr, ok := err.(*exec.ExitError); ok { // Non-zero exit code if status, ok := exiterr.Sys().(syscall.WaitStatus); ok { exitStatus := status.ExitStatus() log.Debugf("Exit Status: %d\n", exitStatus) os.Exit(exitStatus) } } else { log.Debugf("cmd.Wait: %v\n", err) } } else { os.Exit(0) } }
func monitor( signals chan os.Signal, rcChanChan chan chan agents.RemoteControlCommand, job agents.JobControl, psChan chan agents.ProcessStatCommand, timer agents.Timer, done chan error, ) { // Catch any panics here to ensure we kill the child process before going // to our own doom. defer func() { if e := recover(); e != nil { job.Kill(-9) panic(e) } }() var logSampling <-chan time.Time var err error if *stdoutByteLimit > 0 { ticker := time.NewTicker(100 * time.Millisecond) // Replace the time channel with an actual ticker if this is in use logSampling = ticker.C } var rcChan chan agents.RemoteControlCommand for { select { case rcChan = <-rcChanChan: // Catch incoming signals and operate on them as if they were remote commands case sig := <-signals: switch sig { case syscall.SIGINT: log.Debugln("Caught SIGINT, graceful shutdown") // Initiate non-blocking send select { case psChan <- agents.ProcessStatCommand{}: log.Debugln("Sending psChan a msg to sample") default: log.Debugln("SIGINT failed to send a sample msg on the psChan") } err = job.Stop() case syscall.SIGTERM: log.Debugln("Caught SIGTERM, end abruptly") job.Kill(-9) case syscall.SIGHUP: log.Debugln("Caught SIGHUP, emit stats") // Initiate non-blocking send select { case psChan <- agents.ProcessStatCommand{}: log.Debugln("Sending psChan a msg to sample") default: log.Debugln("SIGHUP failed to send a sample msg on the psChan") } case syscall.SIGQUIT: log.Debugln("Caught SIGQUIT, graceful shutdown") select { case psChan <- agents.ProcessStatCommand{}: log.Debugln("Sending psChan a msg to sample") default: log.Debugln("SIGQUIT failed to send a sample msg on the psChan") } err = job.Stop() } // Process incoming remote commands, toss unknown requests case cmd := <-rcChan: log.Debugf("Got a command %#v\n", cmd) switch cmd.Command { case "suspend": log.Debugln("RemoteCommand: Suspend") job.Suspend() case "resume": log.Debugln("RemoteCommand: Resume") job.Resume() case "kill": log.Debugln("RemoteCommand: Kill") var args int64 if len(cmd.Arguments) == 0 { args = -9 } else { args, err = strconv.ParseInt(cmd.Arguments[0], 10, 32) if err != nil { log.Warnf("Unable to parse kill command argument[0] into int: %s\n", err) args = -9 } } job.Kill(args) case "stop": log.Debugln("RemoteCommand: Stop") select { case psChan <- agents.ProcessStatCommand{}: log.Debugln("Sending psChan a msg to sample") default: log.Debugln("RC Stop failed to send a sample msg on the psChan") } if err = job.Stop(); err != nil { log.Fatalf("Error received while stopping sub-process: %s\n", err) } case "sample": log.Debugln("RemoteCommand: Sample") select { case psChan <- agents.ProcessStatCommand{}: log.Debugln("Sending psChan a msg to sample") default: log.Debugln("RC Sample failed to send a sample msg on the psChan") } case "change_sample_rate": log.Debugln("RemoteCommand: Change Stats Sample Rate") if len(cmd.Arguments) > 0 { log.Debugf("change_sample_rate arg[0]: %s\n", cmd.Arguments[0]) d, err := time.ParseDuration(cmd.Arguments[0]) if err == nil { select { case psChan <- agents.ProcessStatCommand{ TimeUpdate: true, NewTime: d}: log.Debugln("Sending psChan a msg to update the ticker") default: log.Debugln("RC change_sample_rate failed to send a msg") } } else { log.Warnf("Unparseable duration argument to command change_sample_rate") } } else { log.Warnf("Missing argument to command change_sample_rate") } case "timer_reset": log.Debugln("RemoteCommand: Timer Reset") if err = timer.Reset(); err != nil { log.Fatalf("Error received from timer calling Reset: %s\n", err) } case "timer_start": log.Debugln("RemoteCommand: Timer Start") if err = timer.Start(); err != nil { log.Fatalf("Error received from timer calling Start: %s\n", err) } case "timer_stop": log.Debugln("RemoteCommand: Timer Stop") if err = timer.Stop(); err != nil { log.Fatalf("Error received from timer calling Stop: %s\n", err) } case "timer_resume": log.Debugln("RemoteCommand: Timer Resume") if err = timer.Resume(); err != nil { log.Fatalf("Error received from timer calling Resume: %s\n", err) } default: log.Debugf("Unknown command: %s\n", cmd) } case timeoutMsg := <-timer.Done(): log.Debugf("Timer timeout message: %s\n", timeoutMsg) if err = job.Stop(); err != nil { log.Fatalf("Error received while stopping sub-process: %v\n", err) // If there was an error stopping the process, kill the porcess. job.Kill(-9) } case jobDone := <-job.Done(): log.Debugln("Command exited gracefully; shutting down.") done <- jobDone case _ = <-logSampling: if job.StdoutByteCount() > 2*(*stdoutByteLimit) { err = job.Kill(-9) } else if job.StdoutByteCount() > *stdoutByteLimit { err = job.Stop() } } } }