// Kill forcefully stops a process func (j *Job) Kill(sig int64) error { var err error switch sig { case -9: log.Debugln("Sending process Kill (-9) signal") err = syscall.Kill(-j.Pgid, syscall.SIGKILL) default: signal := syscall.Signal(sig) err = syscall.Kill(-j.Pgid, signal) } if err != nil { log.Warnf("Error received calling kill on sub-process: %s", err) return err } return nil }
func timerTicker(timer *WallclockTimer) { for t := range timer.ticker.C { timer.incrementTimer() select { case command := <-timer.command: log.Debugln("Command tick at: ", t) log.Debugf("Command received: %s\n", command) switch command { case "reset": log.Debugln("Received a Reset") timer.tick = false timer.elapsedTime = time.Duration(0) case "start": log.Debugln("Received a start") timer.tick = true timer.elapsedTime = time.Duration(0) timer.previousTime = time.Now() case "stop": log.Debugln("Received a stop") timer.tick = false case "resume": log.Debugln("Received a resume") timer.tick = true timer.previousTime = time.Now() default: log.Errorln("Unknown command received") } default: //log.Debugln("No commands received, just keep ticking") } //log.Debugf("Timer elapsed time is: %v", timer.elapsedTime) //log.Debugf("Timer timeout time is: %v", timer.timeoutTime) if timer.elapsedTime > timer.timeoutTime { log.Debugln("Elapsed time has exceeded timeout time.") timer.done <- errors.New("The timer has timed out.") timer.tick = false // Possible bug? Reset timer to equal timeout timer.elapsedTime = timer.timeoutTime } } }
func monitor( signals chan os.Signal, rcChanChan chan chan agents.RemoteControlCommand, job agents.JobControl, psChan chan agents.ProcessStatCommand, timer agents.Timer, done chan error, ) { // Catch any panics here to ensure we kill the child process before going // to our own doom. defer func() { if e := recover(); e != nil { job.Kill(-9) panic(e) } }() var logSampling <-chan time.Time var err error if *stdoutByteLimit > 0 { ticker := time.NewTicker(100 * time.Millisecond) // Replace the time channel with an actual ticker if this is in use logSampling = ticker.C } var rcChan chan agents.RemoteControlCommand for { select { case rcChan = <-rcChanChan: // Catch incoming signals and operate on them as if they were remote commands case sig := <-signals: switch sig { case syscall.SIGINT: log.Debugln("Caught SIGINT, graceful shutdown") // Initiate non-blocking send select { case psChan <- agents.ProcessStatCommand{}: log.Debugln("Sending psChan a msg to sample") default: log.Debugln("SIGINT failed to send a sample msg on the psChan") } err = job.Stop() case syscall.SIGTERM: log.Debugln("Caught SIGTERM, end abruptly") job.Kill(-9) case syscall.SIGHUP: log.Debugln("Caught SIGHUP, emit stats") // Initiate non-blocking send select { case psChan <- agents.ProcessStatCommand{}: log.Debugln("Sending psChan a msg to sample") default: log.Debugln("SIGHUP failed to send a sample msg on the psChan") } case syscall.SIGQUIT: log.Debugln("Caught SIGQUIT, graceful shutdown") select { case psChan <- agents.ProcessStatCommand{}: log.Debugln("Sending psChan a msg to sample") default: log.Debugln("SIGQUIT failed to send a sample msg on the psChan") } err = job.Stop() } // Process incoming remote commands, toss unknown requests case cmd := <-rcChan: log.Debugf("Got a command %#v\n", cmd) switch cmd.Command { case "suspend": log.Debugln("RemoteCommand: Suspend") job.Suspend() case "resume": log.Debugln("RemoteCommand: Resume") job.Resume() case "kill": log.Debugln("RemoteCommand: Kill") var args int64 if len(cmd.Arguments) == 0 { args = -9 } else { args, err = strconv.ParseInt(cmd.Arguments[0], 10, 32) if err != nil { log.Warnf("Unable to parse kill command argument[0] into int: %s\n", err) args = -9 } } job.Kill(args) case "stop": log.Debugln("RemoteCommand: Stop") select { case psChan <- agents.ProcessStatCommand{}: log.Debugln("Sending psChan a msg to sample") default: log.Debugln("RC Stop failed to send a sample msg on the psChan") } if err = job.Stop(); err != nil { log.Fatalf("Error received while stopping sub-process: %s\n", err) } case "sample": log.Debugln("RemoteCommand: Sample") select { case psChan <- agents.ProcessStatCommand{}: log.Debugln("Sending psChan a msg to sample") default: log.Debugln("RC Sample failed to send a sample msg on the psChan") } case "change_sample_rate": log.Debugln("RemoteCommand: Change Stats Sample Rate") if len(cmd.Arguments) > 0 { log.Debugf("change_sample_rate arg[0]: %s\n", cmd.Arguments[0]) d, err := time.ParseDuration(cmd.Arguments[0]) if err == nil { select { case psChan <- agents.ProcessStatCommand{ TimeUpdate: true, NewTime: d}: log.Debugln("Sending psChan a msg to update the ticker") default: log.Debugln("RC change_sample_rate failed to send a msg") } } else { log.Warnf("Unparseable duration argument to command change_sample_rate") } } else { log.Warnf("Missing argument to command change_sample_rate") } case "timer_reset": log.Debugln("RemoteCommand: Timer Reset") if err = timer.Reset(); err != nil { log.Fatalf("Error received from timer calling Reset: %s\n", err) } case "timer_start": log.Debugln("RemoteCommand: Timer Start") if err = timer.Start(); err != nil { log.Fatalf("Error received from timer calling Start: %s\n", err) } case "timer_stop": log.Debugln("RemoteCommand: Timer Stop") if err = timer.Stop(); err != nil { log.Fatalf("Error received from timer calling Stop: %s\n", err) } case "timer_resume": log.Debugln("RemoteCommand: Timer Resume") if err = timer.Resume(); err != nil { log.Fatalf("Error received from timer calling Resume: %s\n", err) } default: log.Debugf("Unknown command: %s\n", cmd) } case timeoutMsg := <-timer.Done(): log.Debugf("Timer timeout message: %s\n", timeoutMsg) if err = job.Stop(); err != nil { log.Fatalf("Error received while stopping sub-process: %v\n", err) // If there was an error stopping the process, kill the porcess. job.Kill(-9) } case jobDone := <-job.Done(): log.Debugln("Command exited gracefully; shutting down.") done <- jobDone case _ = <-logSampling: if job.StdoutByteCount() > 2*(*stdoutByteLimit) { err = job.Kill(-9) } else if job.StdoutByteCount() > *stdoutByteLimit { err = job.Stop() } } } }
func redial(sess session) { var err error var stats agents.ProcessStats var rc *agents.RemoteControl // Initialize mini-router for incoming stats agent requests go func() { for s := range sess.psChan { if stats == nil { log.Warnln("No stats agents available (yet), dropping request") } else if s.TimeUpdate { stats.NewTicker(s.NewTime) } else { stats.Sample() } } }() rcKeys := []string{*rmtKey} if sess.multiRmtKey { rcKeys = deleteEmpty(strings.Split(*rmtKey, ",")) } for { sess.amqpConn, err = amqp.DialConfig(*uri, sess.amqpConfig) if err != nil { log.Warnf("Failed to connect to AMQP: %q", err) // Rate limit reconnection attempts time.Sleep(5 * time.Second) } else { rc, err = agents.NewRemoteControl(sess.amqpConn, rcKeys, *exchange) if err != nil { log.Warnf("Failed creating NewRemoteControl: %s", err) } else { sess.rcChan <- rc.Commands } if stats == nil { // initial setup stats, err = agents.NewProcessStats( sess.amqpConn, *procStatsKey, *exchange, &sess.job, *statsInterval, *msgTimeout, *userJSON, ) if err != nil { log.Warnf("Failed creating NewProcessStats: %s", err) } } else { err = stats.ReinitializeConnection(sess.amqpConn) if err != nil { log.Warnf("Failed to reinitialize process stats: %s", err) } } closings := sess.amqpConn.NotifyClose(make(chan *amqp.Error)) // Wait for close notification and loop back around to reconnect _ = <-closings log.Debugln("Saw a notification for closed connection, looping") } } }
// NewControlledProcess creates the child proc. func NewControlledProcess(cmd string, arguments []string, doneChan chan error, stdoutLimit int64) (JobControl, error) { var err error j := &Job{ nil, nil, 0, 0, doneChan, stdoutLimit, nil, } // Drop command from cmdline arguments and pass the rest as arguments separately var args []string if len(arguments) > 0 { args = arguments[1:] } j.Cmd = exec.Command(cmd, args...) // Collect stdout from the process to redirect to real stdout stdoutpipe, err := j.Cmd.StdoutPipe() if err != nil { return nil, fmt.Errorf("Failed to acquire stdout: %s", err) } stdout := iocontrol.NewMeasuredReader(stdoutpipe) j.stdoutReader = stdout var wg sync.WaitGroup stdin, err := j.Cmd.StdinPipe() if err != nil { return nil, fmt.Errorf("Failed to acquire stdin: %s", err) } stderr, err := j.Cmd.StderrPipe() if err != nil { return nil, fmt.Errorf("Failed to acquire stderr: %s", err) } // Map all child processes under this tree so Kill really ends everything. j.Cmd.SysProcAttr = &syscall.SysProcAttr{ Setpgid: true, // Set process group ID } log.Debugf("%#v\n", j.Cmd) // Start the sub-process but don't wait for completion to pickup the Pid // for resource monitoring. err = j.Cmd.Start() if err != nil { return nil, fmt.Errorf("Failed to execute sub-process: %s\n", err) } j.Pid = j.Cmd.Process.Pid j.Pgid, err = syscall.Getpgid(j.Pid) if err != nil { return nil, fmt.Errorf("Failed syscall.Getpgid: %s\n", err) } j.Proc, err = process.NewProcess(int32(j.Pgid)) if err != nil { return nil, fmt.Errorf("Unable to create process.NewProcess: %s\n", err) } wg.Add(1) go func(wg *sync.WaitGroup, r io.Reader) { defer wg.Done() io.Copy(os.Stdout, r) log.Debugln("child closed stdout") }(&wg, stdout) go func(w io.WriteCloser) { io.Copy(w, os.Stdin) }(stdin) wg.Add(1) go func(wg *sync.WaitGroup, r io.Reader) { defer wg.Done() io.Copy(os.Stderr, r) log.Debugln("child closed stderr") }(&wg, stderr) // Background waiting for the job to finish and emit a done channel message // when complete. go func(wg *sync.WaitGroup, j *Job) { log.Debugln("Waiting on wg.Wait()") wg.Wait() log.Debugln("Waiting on Cmd.Wait()") err := j.Cmd.Wait() log.Debugf("Job finished: %q\n", err) j.done <- err }(&wg, j) return j, nil }