Example #1
0
func main() {
	if *debugMode {
		log.SetLevel(log.DebugLevel)
	} else if *noWarn {
		log.SetLevel(log.ErrorLevel)
	}

	// Create channel for ProcessStats to trigger a sample
	psChan := make(chan agents.ProcessStatCommand)
	// Incoming remote command channel (new with each reconnect)
	rcChan := make(chan chan agents.RemoteControlCommand)

	args := flag.Args()
	var cmdArgs []string
	var cmd string
	if len(args) > 0 {
		cmd = args[0]
	} else {
		log.Fatal("Did you forget a command to run?")
		return
	}

	log.Debugf("cmd: %s cmdArgs: %q\n", cmd, cmdArgs)

	done := make(chan error)

	// Initialize job
	job, err := agents.NewControlledProcess(cmd, args, done, *stdoutByteLimit)
	if err != nil {
		log.Fatalf("Failed to create a NewControlledProcess: %s\n", err)
		return
	}

	log.Debugf("%#v\n", job)

	signals := make(chan os.Signal, 1)
	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT)

	timer, err := agents.NewTimer(*wallclockTimeout)
	if err != nil {
		log.Warnln("Error returned creating timeout agent", err)
	}
	log.Debugf("Starting timer with timeout of: %v\n", *wallclockTimeout)
	timer.Start()

	sess := session{
		job:          job,
		exchange:     *exchange,
		rcRoutingKey: *rmtKey,
		psChan:       psChan,
		rcChan:       rcChan,
		amqpConfig: amqp.Config{
			Properties: amqp.Table{
				"product": "proc_box",
				"version": "master",
			},
		},
		multiRmtKey: *multiRmtKey,
	}
	go redial(sess)

	go monitor(signals, rcChan, job, psChan, timer, done)

	err = <-done

	elapsedTime, _ := timer.ElapsedTime()
	// Print to standard out
	fmt.Printf("Task elapsed time: %.2f seconds.\n", elapsedTime.Seconds())

	if err != nil {
		if exiterr, ok := err.(*exec.ExitError); ok {
			// Non-zero exit code
			if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
				exitStatus := status.ExitStatus()
				log.Debugf("Exit Status: %d\n", exitStatus)
				os.Exit(exitStatus)
			}
		} else {
			log.Debugf("cmd.Wait: %v\n", err)
		}
	} else {
		os.Exit(0)
	}
}
Example #2
0
func monitor(
	signals chan os.Signal,
	rcChanChan chan chan agents.RemoteControlCommand,
	job agents.JobControl,
	psChan chan agents.ProcessStatCommand,
	timer agents.Timer,
	done chan error,
) {

	// Catch any panics here to ensure we kill the child process before going
	// to our own doom.
	defer func() {
		if e := recover(); e != nil {
			job.Kill(-9)
			panic(e)
		}
	}()
	var logSampling <-chan time.Time
	var err error

	if *stdoutByteLimit > 0 {
		ticker := time.NewTicker(100 * time.Millisecond)
		// Replace the time channel with an actual ticker if this is in use
		logSampling = ticker.C
	}

	var rcChan chan agents.RemoteControlCommand

	for {
		select {
		case rcChan = <-rcChanChan:
		// Catch incoming signals and operate on them as if they were remote commands
		case sig := <-signals:
			switch sig {
			case syscall.SIGINT:
				log.Debugln("Caught SIGINT, graceful shutdown")
				// Initiate non-blocking send
				select {
				case psChan <- agents.ProcessStatCommand{}:
					log.Debugln("Sending psChan a msg to sample")
				default:
					log.Debugln("SIGINT failed to send a sample msg on the psChan")
				}
				err = job.Stop()
			case syscall.SIGTERM:
				log.Debugln("Caught SIGTERM, end abruptly")
				job.Kill(-9)
			case syscall.SIGHUP:
				log.Debugln("Caught SIGHUP, emit stats")
				// Initiate non-blocking send
				select {
				case psChan <- agents.ProcessStatCommand{}:
					log.Debugln("Sending psChan a msg to sample")
				default:
					log.Debugln("SIGHUP failed to send a sample msg on the psChan")
				}
			case syscall.SIGQUIT:
				log.Debugln("Caught SIGQUIT, graceful shutdown")
				select {
				case psChan <- agents.ProcessStatCommand{}:
					log.Debugln("Sending psChan a msg to sample")
				default:
					log.Debugln("SIGQUIT failed to send a sample msg on the psChan")
				}
				err = job.Stop()
			}
		// Process incoming remote commands, toss unknown requests
		case cmd := <-rcChan:
			log.Debugf("Got a command %#v\n", cmd)
			switch cmd.Command {
			case "suspend":
				log.Debugln("RemoteCommand: Suspend")
				job.Suspend()
			case "resume":
				log.Debugln("RemoteCommand: Resume")
				job.Resume()
			case "kill":
				log.Debugln("RemoteCommand: Kill")
				var args int64
				if len(cmd.Arguments) == 0 {
					args = -9
				} else {
					args, err = strconv.ParseInt(cmd.Arguments[0], 10, 32)
					if err != nil {
						log.Warnf("Unable to parse kill command argument[0] into int: %s\n", err)
						args = -9
					}
				}
				job.Kill(args)
			case "stop":
				log.Debugln("RemoteCommand: Stop")
				select {
				case psChan <- agents.ProcessStatCommand{}:
					log.Debugln("Sending psChan a msg to sample")
				default:
					log.Debugln("RC Stop failed to send a sample msg on the psChan")
				}
				if err = job.Stop(); err != nil {
					log.Fatalf("Error received while stopping sub-process: %s\n", err)
				}
			case "sample":
				log.Debugln("RemoteCommand: Sample")
				select {
				case psChan <- agents.ProcessStatCommand{}:
					log.Debugln("Sending psChan a msg to sample")
				default:
					log.Debugln("RC Sample failed to send a sample msg on the psChan")
				}
			case "change_sample_rate":
				log.Debugln("RemoteCommand: Change Stats Sample Rate")
				if len(cmd.Arguments) > 0 {
					log.Debugf("change_sample_rate arg[0]: %s\n", cmd.Arguments[0])
					d, err := time.ParseDuration(cmd.Arguments[0])
					if err == nil {
						select {
						case psChan <- agents.ProcessStatCommand{
							TimeUpdate: true,
							NewTime:    d}:
							log.Debugln("Sending psChan a msg to update the ticker")
						default:
							log.Debugln("RC change_sample_rate failed to send a msg")
						}
					} else {
						log.Warnf("Unparseable duration argument to command change_sample_rate")
					}
				} else {
					log.Warnf("Missing argument to command change_sample_rate")
				}
			case "timer_reset":
				log.Debugln("RemoteCommand: Timer Reset")
				if err = timer.Reset(); err != nil {
					log.Fatalf("Error received from timer calling Reset: %s\n", err)
				}
			case "timer_start":
				log.Debugln("RemoteCommand: Timer Start")
				if err = timer.Start(); err != nil {
					log.Fatalf("Error received from timer calling Start: %s\n", err)
				}
			case "timer_stop":
				log.Debugln("RemoteCommand: Timer Stop")
				if err = timer.Stop(); err != nil {
					log.Fatalf("Error received from timer calling Stop: %s\n", err)
				}
			case "timer_resume":
				log.Debugln("RemoteCommand: Timer Resume")
				if err = timer.Resume(); err != nil {
					log.Fatalf("Error received from timer calling Resume: %s\n", err)
				}
			default:
				log.Debugf("Unknown command: %s\n", cmd)
			}
		case timeoutMsg := <-timer.Done():
			log.Debugf("Timer timeout message: %s\n", timeoutMsg)
			if err = job.Stop(); err != nil {
				log.Fatalf("Error received while stopping sub-process: %v\n", err)
				// If there was an error stopping the process, kill the porcess.
				job.Kill(-9)
			}
		case jobDone := <-job.Done():
			log.Debugln("Command exited gracefully; shutting down.")
			done <- jobDone
		case _ = <-logSampling:
			if job.StdoutByteCount() > 2*(*stdoutByteLimit) {
				err = job.Kill(-9)
			} else if job.StdoutByteCount() > *stdoutByteLimit {
				err = job.Stop()
			}
		}
	}
}