Exemplo n.º 1
0
func timerTicker(timer *WallclockTimer) {
	for t := range timer.ticker.C {
		timer.incrementTimer()
		select {
		case command := <-timer.command:
			log.Debugln("Command tick at: ", t)
			log.Debugf("Command received: %s\n", command)
			switch command {
			case "reset":
				log.Debugln("Received a Reset")
				timer.tick = false
				timer.elapsedTime = time.Duration(0)
			case "start":
				log.Debugln("Received a start")
				timer.tick = true
				timer.elapsedTime = time.Duration(0)
				timer.previousTime = time.Now()
			case "stop":
				log.Debugln("Received a stop")
				timer.tick = false
			case "resume":
				log.Debugln("Received a resume")
				timer.tick = true
				timer.previousTime = time.Now()
			default:
				log.Errorln("Unknown command received")
			}
		default:
			//log.Debugln("No commands received, just keep ticking")
		}
		//log.Debugf("Timer elapsed time is: %v", timer.elapsedTime)
		//log.Debugf("Timer timeout time is: %v", timer.timeoutTime)
		if timer.elapsedTime > timer.timeoutTime {
			log.Debugln("Elapsed time has exceeded timeout time.")
			timer.done <- errors.New("The timer has timed out.")
			timer.tick = false
			// Possible bug? Reset timer to equal timeout
			timer.elapsedTime = timer.timeoutTime
		}
	}
}
Exemplo n.º 2
0
func main() {
	if *debugMode {
		log.SetLevel(log.DebugLevel)
	} else if *noWarn {
		log.SetLevel(log.ErrorLevel)
	}

	// Create channel for ProcessStats to trigger a sample
	psChan := make(chan agents.ProcessStatCommand)
	// Incoming remote command channel (new with each reconnect)
	rcChan := make(chan chan agents.RemoteControlCommand)

	args := flag.Args()
	var cmdArgs []string
	var cmd string
	if len(args) > 0 {
		cmd = args[0]
	} else {
		log.Fatal("Did you forget a command to run?")
		return
	}

	log.Debugf("cmd: %s cmdArgs: %q\n", cmd, cmdArgs)

	done := make(chan error)

	// Initialize job
	job, err := agents.NewControlledProcess(cmd, args, done, *stdoutByteLimit)
	if err != nil {
		log.Fatalf("Failed to create a NewControlledProcess: %s\n", err)
		return
	}

	log.Debugf("%#v\n", job)

	signals := make(chan os.Signal, 1)
	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT)

	timer, err := agents.NewTimer(*wallclockTimeout)
	if err != nil {
		log.Warnln("Error returned creating timeout agent", err)
	}
	log.Debugf("Starting timer with timeout of: %v\n", *wallclockTimeout)
	timer.Start()

	sess := session{
		job:          job,
		exchange:     *exchange,
		rcRoutingKey: *rmtKey,
		psChan:       psChan,
		rcChan:       rcChan,
		amqpConfig: amqp.Config{
			Properties: amqp.Table{
				"product": "proc_box",
				"version": "master",
			},
		},
		multiRmtKey: *multiRmtKey,
	}
	go redial(sess)

	go monitor(signals, rcChan, job, psChan, timer, done)

	err = <-done

	elapsedTime, _ := timer.ElapsedTime()
	// Print to standard out
	fmt.Printf("Task elapsed time: %.2f seconds.\n", elapsedTime.Seconds())

	if err != nil {
		if exiterr, ok := err.(*exec.ExitError); ok {
			// Non-zero exit code
			if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
				exitStatus := status.ExitStatus()
				log.Debugf("Exit Status: %d\n", exitStatus)
				os.Exit(exitStatus)
			}
		} else {
			log.Debugf("cmd.Wait: %v\n", err)
		}
	} else {
		os.Exit(0)
	}
}
Exemplo n.º 3
0
func monitor(
	signals chan os.Signal,
	rcChanChan chan chan agents.RemoteControlCommand,
	job agents.JobControl,
	psChan chan agents.ProcessStatCommand,
	timer agents.Timer,
	done chan error,
) {

	// Catch any panics here to ensure we kill the child process before going
	// to our own doom.
	defer func() {
		if e := recover(); e != nil {
			job.Kill(-9)
			panic(e)
		}
	}()
	var logSampling <-chan time.Time
	var err error

	if *stdoutByteLimit > 0 {
		ticker := time.NewTicker(100 * time.Millisecond)
		// Replace the time channel with an actual ticker if this is in use
		logSampling = ticker.C
	}

	var rcChan chan agents.RemoteControlCommand

	for {
		select {
		case rcChan = <-rcChanChan:
		// Catch incoming signals and operate on them as if they were remote commands
		case sig := <-signals:
			switch sig {
			case syscall.SIGINT:
				log.Debugln("Caught SIGINT, graceful shutdown")
				// Initiate non-blocking send
				select {
				case psChan <- agents.ProcessStatCommand{}:
					log.Debugln("Sending psChan a msg to sample")
				default:
					log.Debugln("SIGINT failed to send a sample msg on the psChan")
				}
				err = job.Stop()
			case syscall.SIGTERM:
				log.Debugln("Caught SIGTERM, end abruptly")
				job.Kill(-9)
			case syscall.SIGHUP:
				log.Debugln("Caught SIGHUP, emit stats")
				// Initiate non-blocking send
				select {
				case psChan <- agents.ProcessStatCommand{}:
					log.Debugln("Sending psChan a msg to sample")
				default:
					log.Debugln("SIGHUP failed to send a sample msg on the psChan")
				}
			case syscall.SIGQUIT:
				log.Debugln("Caught SIGQUIT, graceful shutdown")
				select {
				case psChan <- agents.ProcessStatCommand{}:
					log.Debugln("Sending psChan a msg to sample")
				default:
					log.Debugln("SIGQUIT failed to send a sample msg on the psChan")
				}
				err = job.Stop()
			}
		// Process incoming remote commands, toss unknown requests
		case cmd := <-rcChan:
			log.Debugf("Got a command %#v\n", cmd)
			switch cmd.Command {
			case "suspend":
				log.Debugln("RemoteCommand: Suspend")
				job.Suspend()
			case "resume":
				log.Debugln("RemoteCommand: Resume")
				job.Resume()
			case "kill":
				log.Debugln("RemoteCommand: Kill")
				var args int64
				if len(cmd.Arguments) == 0 {
					args = -9
				} else {
					args, err = strconv.ParseInt(cmd.Arguments[0], 10, 32)
					if err != nil {
						log.Warnf("Unable to parse kill command argument[0] into int: %s\n", err)
						args = -9
					}
				}
				job.Kill(args)
			case "stop":
				log.Debugln("RemoteCommand: Stop")
				select {
				case psChan <- agents.ProcessStatCommand{}:
					log.Debugln("Sending psChan a msg to sample")
				default:
					log.Debugln("RC Stop failed to send a sample msg on the psChan")
				}
				if err = job.Stop(); err != nil {
					log.Fatalf("Error received while stopping sub-process: %s\n", err)
				}
			case "sample":
				log.Debugln("RemoteCommand: Sample")
				select {
				case psChan <- agents.ProcessStatCommand{}:
					log.Debugln("Sending psChan a msg to sample")
				default:
					log.Debugln("RC Sample failed to send a sample msg on the psChan")
				}
			case "change_sample_rate":
				log.Debugln("RemoteCommand: Change Stats Sample Rate")
				if len(cmd.Arguments) > 0 {
					log.Debugf("change_sample_rate arg[0]: %s\n", cmd.Arguments[0])
					d, err := time.ParseDuration(cmd.Arguments[0])
					if err == nil {
						select {
						case psChan <- agents.ProcessStatCommand{
							TimeUpdate: true,
							NewTime:    d}:
							log.Debugln("Sending psChan a msg to update the ticker")
						default:
							log.Debugln("RC change_sample_rate failed to send a msg")
						}
					} else {
						log.Warnf("Unparseable duration argument to command change_sample_rate")
					}
				} else {
					log.Warnf("Missing argument to command change_sample_rate")
				}
			case "timer_reset":
				log.Debugln("RemoteCommand: Timer Reset")
				if err = timer.Reset(); err != nil {
					log.Fatalf("Error received from timer calling Reset: %s\n", err)
				}
			case "timer_start":
				log.Debugln("RemoteCommand: Timer Start")
				if err = timer.Start(); err != nil {
					log.Fatalf("Error received from timer calling Start: %s\n", err)
				}
			case "timer_stop":
				log.Debugln("RemoteCommand: Timer Stop")
				if err = timer.Stop(); err != nil {
					log.Fatalf("Error received from timer calling Stop: %s\n", err)
				}
			case "timer_resume":
				log.Debugln("RemoteCommand: Timer Resume")
				if err = timer.Resume(); err != nil {
					log.Fatalf("Error received from timer calling Resume: %s\n", err)
				}
			default:
				log.Debugf("Unknown command: %s\n", cmd)
			}
		case timeoutMsg := <-timer.Done():
			log.Debugf("Timer timeout message: %s\n", timeoutMsg)
			if err = job.Stop(); err != nil {
				log.Fatalf("Error received while stopping sub-process: %v\n", err)
				// If there was an error stopping the process, kill the porcess.
				job.Kill(-9)
			}
		case jobDone := <-job.Done():
			log.Debugln("Command exited gracefully; shutting down.")
			done <- jobDone
		case _ = <-logSampling:
			if job.StdoutByteCount() > 2*(*stdoutByteLimit) {
				err = job.Kill(-9)
			} else if job.StdoutByteCount() > *stdoutByteLimit {
				err = job.Stop()
			}
		}
	}
}
Exemplo n.º 4
0
func (ps *ProcessStatCollector) collectSample() error {
	var err error
	job := *ps.job
	proc := job.Process()

	stat := ProcessStatSample{}

	stat.Pid = proc.Pid

	curTime := time.Now()
	stat.TimeUTC = curTime.UTC()
	stat.TimeUnix = curTime.Unix()

	hostinfo, err := host.HostInfo()
	var hostnameRoutingKey string
	if err != nil {
		log.Warnf("Error encountered collecting host stats: %s", err)
		hostnameRoutingKey = ""
	} else {
		stat.Host = *hostinfo
		hostnameRoutingKey = fmt.Sprintf(".%s", stat.Host.Hostname)
	}

	// Collect for parent
	stat.aggregateStatForProc(proc)

	children, err := proc.Children()
	// Collect on children (if any)
	if err == nil {
		for _, cproc := range children {
			stat.ChildPids = append(stat.ChildPids, cproc.Pid)
			stat.aggregateStatForProc(cproc)
		}
	}

	stat.StdoutBytes = job.StdoutByteCount()

	stat.UserData = ps.userJSON

	log.Debugf("Sample: %#v\n", stat)

	var body []byte
	body, err = json.Marshal(stat)

	routingKey := fmt.Sprintf("%s%s", ps.routingKey, hostnameRoutingKey)
	err = ps.channel.Publish(
		ps.exchange, // publish to an exchange
		routingKey,  // routing to queues
		false,       // mandatory
		false,       // immediate
		amqp.Publishing{
			Headers:         amqp.Table{},
			ContentType:     "text/javascript",
			ContentEncoding: "",
			Body:            body,
			DeliveryMode:    amqp.Transient, // non-persistent
			Priority:        0,              // 0-9
		},
	)
	if err != nil {
		log.Warnf("Error publishing statistics sample %s", err)
	}

	return err
}
Exemplo n.º 5
0
// NewControlledProcess creates the child proc.
func NewControlledProcess(cmd string, arguments []string, doneChan chan error, stdoutLimit int64) (JobControl, error) {
	var err error

	j := &Job{
		nil,
		nil,
		0,
		0,
		doneChan,
		stdoutLimit,
		nil,
	}

	// Drop command from cmdline arguments and pass the rest as arguments separately
	var args []string
	if len(arguments) > 0 {
		args = arguments[1:]
	}
	j.Cmd = exec.Command(cmd, args...)

	// Collect stdout from the process to redirect to real stdout
	stdoutpipe, err := j.Cmd.StdoutPipe()
	if err != nil {
		return nil, fmt.Errorf("Failed to acquire stdout: %s", err)
	}
	stdout := iocontrol.NewMeasuredReader(stdoutpipe)
	j.stdoutReader = stdout

	var wg sync.WaitGroup

	stdin, err := j.Cmd.StdinPipe()
	if err != nil {
		return nil, fmt.Errorf("Failed to acquire stdin: %s", err)
	}

	stderr, err := j.Cmd.StderrPipe()
	if err != nil {
		return nil, fmt.Errorf("Failed to acquire stderr: %s", err)
	}

	// Map all child processes under this tree so Kill really ends everything.
	j.Cmd.SysProcAttr = &syscall.SysProcAttr{
		Setpgid: true, // Set process group ID
	}

	log.Debugf("%#v\n", j.Cmd)

	// Start the sub-process but don't wait for completion to pickup the Pid
	// for resource monitoring.
	err = j.Cmd.Start()
	if err != nil {
		return nil, fmt.Errorf("Failed to execute sub-process: %s\n", err)
	}

	j.Pid = j.Cmd.Process.Pid
	j.Pgid, err = syscall.Getpgid(j.Pid)
	if err != nil {
		return nil, fmt.Errorf("Failed syscall.Getpgid: %s\n", err)
	}

	j.Proc, err = process.NewProcess(int32(j.Pgid))
	if err != nil {
		return nil, fmt.Errorf("Unable to create process.NewProcess: %s\n", err)
	}

	wg.Add(1)
	go func(wg *sync.WaitGroup, r io.Reader) {
		defer wg.Done()
		io.Copy(os.Stdout, r)
		log.Debugln("child closed stdout")
	}(&wg, stdout)

	go func(w io.WriteCloser) {
		io.Copy(w, os.Stdin)
	}(stdin)

	wg.Add(1)
	go func(wg *sync.WaitGroup, r io.Reader) {
		defer wg.Done()
		io.Copy(os.Stderr, r)
		log.Debugln("child closed stderr")
	}(&wg, stderr)

	// Background waiting for the job to finish and emit a done channel message
	// when complete.
	go func(wg *sync.WaitGroup, j *Job) {
		log.Debugln("Waiting on wg.Wait()")
		wg.Wait()
		log.Debugln("Waiting on Cmd.Wait()")
		err := j.Cmd.Wait()
		log.Debugf("Job finished: %q\n", err)
		j.done <- err
	}(&wg, j)

	return j, nil
}