Пример #1
0
// Kill forcefully stops a process
func (j *Job) Kill(sig int64) error {
	var err error

	switch sig {
	case -9:
		log.Debugln("Sending process Kill (-9) signal")
		err = syscall.Kill(-j.Pgid, syscall.SIGKILL)
	default:
		signal := syscall.Signal(sig)
		err = syscall.Kill(-j.Pgid, signal)
	}

	if err != nil {
		log.Warnf("Error received calling kill on sub-process: %s", err)
		return err
	}

	return nil
}
Пример #2
0
func timerTicker(timer *WallclockTimer) {
	for t := range timer.ticker.C {
		timer.incrementTimer()
		select {
		case command := <-timer.command:
			log.Debugln("Command tick at: ", t)
			log.Debugf("Command received: %s\n", command)
			switch command {
			case "reset":
				log.Debugln("Received a Reset")
				timer.tick = false
				timer.elapsedTime = time.Duration(0)
			case "start":
				log.Debugln("Received a start")
				timer.tick = true
				timer.elapsedTime = time.Duration(0)
				timer.previousTime = time.Now()
			case "stop":
				log.Debugln("Received a stop")
				timer.tick = false
			case "resume":
				log.Debugln("Received a resume")
				timer.tick = true
				timer.previousTime = time.Now()
			default:
				log.Errorln("Unknown command received")
			}
		default:
			//log.Debugln("No commands received, just keep ticking")
		}
		//log.Debugf("Timer elapsed time is: %v", timer.elapsedTime)
		//log.Debugf("Timer timeout time is: %v", timer.timeoutTime)
		if timer.elapsedTime > timer.timeoutTime {
			log.Debugln("Elapsed time has exceeded timeout time.")
			timer.done <- errors.New("The timer has timed out.")
			timer.tick = false
			// Possible bug? Reset timer to equal timeout
			timer.elapsedTime = timer.timeoutTime
		}
	}
}
Пример #3
0
func monitor(
	signals chan os.Signal,
	rcChanChan chan chan agents.RemoteControlCommand,
	job agents.JobControl,
	psChan chan agents.ProcessStatCommand,
	timer agents.Timer,
	done chan error,
) {

	// Catch any panics here to ensure we kill the child process before going
	// to our own doom.
	defer func() {
		if e := recover(); e != nil {
			job.Kill(-9)
			panic(e)
		}
	}()
	var logSampling <-chan time.Time
	var err error

	if *stdoutByteLimit > 0 {
		ticker := time.NewTicker(100 * time.Millisecond)
		// Replace the time channel with an actual ticker if this is in use
		logSampling = ticker.C
	}

	var rcChan chan agents.RemoteControlCommand

	for {
		select {
		case rcChan = <-rcChanChan:
		// Catch incoming signals and operate on them as if they were remote commands
		case sig := <-signals:
			switch sig {
			case syscall.SIGINT:
				log.Debugln("Caught SIGINT, graceful shutdown")
				// Initiate non-blocking send
				select {
				case psChan <- agents.ProcessStatCommand{}:
					log.Debugln("Sending psChan a msg to sample")
				default:
					log.Debugln("SIGINT failed to send a sample msg on the psChan")
				}
				err = job.Stop()
			case syscall.SIGTERM:
				log.Debugln("Caught SIGTERM, end abruptly")
				job.Kill(-9)
			case syscall.SIGHUP:
				log.Debugln("Caught SIGHUP, emit stats")
				// Initiate non-blocking send
				select {
				case psChan <- agents.ProcessStatCommand{}:
					log.Debugln("Sending psChan a msg to sample")
				default:
					log.Debugln("SIGHUP failed to send a sample msg on the psChan")
				}
			case syscall.SIGQUIT:
				log.Debugln("Caught SIGQUIT, graceful shutdown")
				select {
				case psChan <- agents.ProcessStatCommand{}:
					log.Debugln("Sending psChan a msg to sample")
				default:
					log.Debugln("SIGQUIT failed to send a sample msg on the psChan")
				}
				err = job.Stop()
			}
		// Process incoming remote commands, toss unknown requests
		case cmd := <-rcChan:
			log.Debugf("Got a command %#v\n", cmd)
			switch cmd.Command {
			case "suspend":
				log.Debugln("RemoteCommand: Suspend")
				job.Suspend()
			case "resume":
				log.Debugln("RemoteCommand: Resume")
				job.Resume()
			case "kill":
				log.Debugln("RemoteCommand: Kill")
				var args int64
				if len(cmd.Arguments) == 0 {
					args = -9
				} else {
					args, err = strconv.ParseInt(cmd.Arguments[0], 10, 32)
					if err != nil {
						log.Warnf("Unable to parse kill command argument[0] into int: %s\n", err)
						args = -9
					}
				}
				job.Kill(args)
			case "stop":
				log.Debugln("RemoteCommand: Stop")
				select {
				case psChan <- agents.ProcessStatCommand{}:
					log.Debugln("Sending psChan a msg to sample")
				default:
					log.Debugln("RC Stop failed to send a sample msg on the psChan")
				}
				if err = job.Stop(); err != nil {
					log.Fatalf("Error received while stopping sub-process: %s\n", err)
				}
			case "sample":
				log.Debugln("RemoteCommand: Sample")
				select {
				case psChan <- agents.ProcessStatCommand{}:
					log.Debugln("Sending psChan a msg to sample")
				default:
					log.Debugln("RC Sample failed to send a sample msg on the psChan")
				}
			case "change_sample_rate":
				log.Debugln("RemoteCommand: Change Stats Sample Rate")
				if len(cmd.Arguments) > 0 {
					log.Debugf("change_sample_rate arg[0]: %s\n", cmd.Arguments[0])
					d, err := time.ParseDuration(cmd.Arguments[0])
					if err == nil {
						select {
						case psChan <- agents.ProcessStatCommand{
							TimeUpdate: true,
							NewTime:    d}:
							log.Debugln("Sending psChan a msg to update the ticker")
						default:
							log.Debugln("RC change_sample_rate failed to send a msg")
						}
					} else {
						log.Warnf("Unparseable duration argument to command change_sample_rate")
					}
				} else {
					log.Warnf("Missing argument to command change_sample_rate")
				}
			case "timer_reset":
				log.Debugln("RemoteCommand: Timer Reset")
				if err = timer.Reset(); err != nil {
					log.Fatalf("Error received from timer calling Reset: %s\n", err)
				}
			case "timer_start":
				log.Debugln("RemoteCommand: Timer Start")
				if err = timer.Start(); err != nil {
					log.Fatalf("Error received from timer calling Start: %s\n", err)
				}
			case "timer_stop":
				log.Debugln("RemoteCommand: Timer Stop")
				if err = timer.Stop(); err != nil {
					log.Fatalf("Error received from timer calling Stop: %s\n", err)
				}
			case "timer_resume":
				log.Debugln("RemoteCommand: Timer Resume")
				if err = timer.Resume(); err != nil {
					log.Fatalf("Error received from timer calling Resume: %s\n", err)
				}
			default:
				log.Debugf("Unknown command: %s\n", cmd)
			}
		case timeoutMsg := <-timer.Done():
			log.Debugf("Timer timeout message: %s\n", timeoutMsg)
			if err = job.Stop(); err != nil {
				log.Fatalf("Error received while stopping sub-process: %v\n", err)
				// If there was an error stopping the process, kill the porcess.
				job.Kill(-9)
			}
		case jobDone := <-job.Done():
			log.Debugln("Command exited gracefully; shutting down.")
			done <- jobDone
		case _ = <-logSampling:
			if job.StdoutByteCount() > 2*(*stdoutByteLimit) {
				err = job.Kill(-9)
			} else if job.StdoutByteCount() > *stdoutByteLimit {
				err = job.Stop()
			}
		}
	}
}
Пример #4
0
func redial(sess session) {
	var err error
	var stats agents.ProcessStats
	var rc *agents.RemoteControl

	// Initialize mini-router for incoming stats agent requests
	go func() {
		for s := range sess.psChan {
			if stats == nil {
				log.Warnln("No stats agents available (yet), dropping request")
			} else if s.TimeUpdate {
				stats.NewTicker(s.NewTime)
			} else {
				stats.Sample()
			}
		}
	}()

	rcKeys := []string{*rmtKey}
	if sess.multiRmtKey {
		rcKeys = deleteEmpty(strings.Split(*rmtKey, ","))
	}
	for {
		sess.amqpConn, err = amqp.DialConfig(*uri, sess.amqpConfig)

		if err != nil {
			log.Warnf("Failed to connect to AMQP: %q", err)
			// Rate limit reconnection attempts
			time.Sleep(5 * time.Second)
		} else {
			rc, err = agents.NewRemoteControl(sess.amqpConn, rcKeys, *exchange)
			if err != nil {
				log.Warnf("Failed creating NewRemoteControl: %s", err)
			} else {
				sess.rcChan <- rc.Commands
			}

			if stats == nil {
				// initial setup
				stats, err = agents.NewProcessStats(
					sess.amqpConn,
					*procStatsKey,
					*exchange,
					&sess.job,
					*statsInterval,
					*msgTimeout,
					*userJSON,
				)
				if err != nil {
					log.Warnf("Failed creating NewProcessStats: %s", err)
				}
			} else {
				err = stats.ReinitializeConnection(sess.amqpConn)
				if err != nil {
					log.Warnf("Failed to reinitialize process stats: %s", err)
				}
			}
			closings := sess.amqpConn.NotifyClose(make(chan *amqp.Error))

			// Wait for close notification and loop back around to reconnect
			_ = <-closings
			log.Debugln("Saw a notification for closed connection, looping")
		}

	}
}
Пример #5
0
// NewControlledProcess creates the child proc.
func NewControlledProcess(cmd string, arguments []string, doneChan chan error, stdoutLimit int64) (JobControl, error) {
	var err error

	j := &Job{
		nil,
		nil,
		0,
		0,
		doneChan,
		stdoutLimit,
		nil,
	}

	// Drop command from cmdline arguments and pass the rest as arguments separately
	var args []string
	if len(arguments) > 0 {
		args = arguments[1:]
	}
	j.Cmd = exec.Command(cmd, args...)

	// Collect stdout from the process to redirect to real stdout
	stdoutpipe, err := j.Cmd.StdoutPipe()
	if err != nil {
		return nil, fmt.Errorf("Failed to acquire stdout: %s", err)
	}
	stdout := iocontrol.NewMeasuredReader(stdoutpipe)
	j.stdoutReader = stdout

	var wg sync.WaitGroup

	stdin, err := j.Cmd.StdinPipe()
	if err != nil {
		return nil, fmt.Errorf("Failed to acquire stdin: %s", err)
	}

	stderr, err := j.Cmd.StderrPipe()
	if err != nil {
		return nil, fmt.Errorf("Failed to acquire stderr: %s", err)
	}

	// Map all child processes under this tree so Kill really ends everything.
	j.Cmd.SysProcAttr = &syscall.SysProcAttr{
		Setpgid: true, // Set process group ID
	}

	log.Debugf("%#v\n", j.Cmd)

	// Start the sub-process but don't wait for completion to pickup the Pid
	// for resource monitoring.
	err = j.Cmd.Start()
	if err != nil {
		return nil, fmt.Errorf("Failed to execute sub-process: %s\n", err)
	}

	j.Pid = j.Cmd.Process.Pid
	j.Pgid, err = syscall.Getpgid(j.Pid)
	if err != nil {
		return nil, fmt.Errorf("Failed syscall.Getpgid: %s\n", err)
	}

	j.Proc, err = process.NewProcess(int32(j.Pgid))
	if err != nil {
		return nil, fmt.Errorf("Unable to create process.NewProcess: %s\n", err)
	}

	wg.Add(1)
	go func(wg *sync.WaitGroup, r io.Reader) {
		defer wg.Done()
		io.Copy(os.Stdout, r)
		log.Debugln("child closed stdout")
	}(&wg, stdout)

	go func(w io.WriteCloser) {
		io.Copy(w, os.Stdin)
	}(stdin)

	wg.Add(1)
	go func(wg *sync.WaitGroup, r io.Reader) {
		defer wg.Done()
		io.Copy(os.Stderr, r)
		log.Debugln("child closed stderr")
	}(&wg, stderr)

	// Background waiting for the job to finish and emit a done channel message
	// when complete.
	go func(wg *sync.WaitGroup, j *Job) {
		log.Debugln("Waiting on wg.Wait()")
		wg.Wait()
		log.Debugln("Waiting on Cmd.Wait()")
		err := j.Cmd.Wait()
		log.Debugf("Job finished: %q\n", err)
		j.done <- err
	}(&wg, j)

	return j, nil
}