// Process runs the Gearman job by running the configured task. // We need to implement the Task interface so we return (byte[], error) // though the byte[] is always nil. func (conf TaskConfig) Process(job baseworker.Job) (b []byte, returnErr error) { jobID := getJobID(job) if jobID == "" { jobID = strconv.Itoa(rand.Int()) lg.InfoD("rand-job-id", logger.M{"msg": "no job id parsed, random assigned."}) } jobData := string(job.Data()) data := logger.M{ "function": conf.FunctionName, "job_id": jobID, "job_data": jobData, } // This wraps the actual processing to do some logging lg.InfoD("START", data) start := time.Now() for try := 0; try < conf.RetryCount+1; try++ { // We create a temporary directory to be used as the work directory of the process. // A new work directory is created for every retry of the process. // We try to use MEOS_SANDBOX, the default will be the system temp directory. tempDirPath, err := ioutil.TempDir(os.Getenv("MESOS_SANDBOX"), fmt.Sprintf("%s-%s-%d-", conf.FunctionName, jobID, try)) if err != nil { lg.CriticalD("tempdir-failure", logger.M{"error": err.Error()}) return nil, err } defer os.RemoveAll(tempDirPath) // insert the job id and the work directory path into the environment extraEnvVars := []string{ fmt.Sprintf("JOB_ID=%s", jobID), fmt.Sprintf("WORK_DIR=%s", tempDirPath)} err = conf.doProcess(job, extraEnvVars, try) end := time.Now() data["type"] = "gauge" // Return if the job was successful. if err == nil { lg.InfoD("SUCCESS", logger.M{ "type": "counter", "function": conf.FunctionName}) legacyLg.InfoD("success", logger.M{ "type": "counter", "function": conf.FunctionName}) data["value"] = 1 data["success"] = true lg.InfoD("END", data) // Hopefully none of our jobs last long enough for a uint64... // Note that we cannot use lg.GaugeIntD because duration is uint64 lg.InfoD("duration", logger.M{ "value": uint64(end.Sub(start).Seconds() * 1000), "type": "gauge", "function": conf.FunctionName, "job_id": jobID, "job_data": jobData}) return nil, nil } data["value"] = 0 data["success"] = false data["error_message"] = err.Error() returnErr = err if try != conf.RetryCount { lg.ErrorD("RETRY", data) } } lg.InfoD("FAILURE", logger.M{"type": "counter", "function": conf.FunctionName}) legacyLg.InfoD("failure", logger.M{"type": "counter", "function": conf.FunctionName}) lg.ErrorD("END", data) return nil, returnErr }
func (conf TaskConfig) doProcess(job baseworker.Job, envVars []string, tryCount int) error { defer func() { // If we panicked then set the panic message as a warning. Gearman-go will // handle marking this job as failed. if r := recover(); r != nil { err := r.(error) job.SendWarning([]byte(err.Error())) } }() // shutdownTicker will effectively control the executution of the ticker. shutdownTicker := make(chan interface{}) defer func() { shutdownTicker <- 1 }() // every minute we will output a heartbeat kayvee log for the job. tickUnit := time.Minute ticker := time.NewTicker(tickUnit) go func() { defer ticker.Stop() units := 0 for { select { case <-shutdownTicker: close(shutdownTicker) return case <-ticker.C: units++ lg.GaugeIntD("heartbeat", units, logger.M{ "try_number": tryCount, "function": job.Fn(), "job_id": getJobID(job), "unit": tickUnit.String(), }) } } }() var args []string var err error if conf.ParseArgs { args, err = argsparser.ParseArgs(string(job.Data())) if err != nil { return fmt.Errorf("Failed to parse args: %s", err.Error()) } } else { args = []string{string(job.Data())} } cmd := exec.Command(conf.FunctionCmd, args...) // insert provided env vars into the job cmd.Env = append(os.Environ(), envVars...) // create new pgid for this process so we can later kill all subprocess launched by it cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} // Write the stdout and stderr of the process to both this process' stdout and stderr // and also write it to a byte buffer so that we can return it with the Gearman job // data as necessary. var stderrbuf bytes.Buffer cmd.Stderr = io.MultiWriter(os.Stderr, &stderrbuf) defer sendStderrWarnings(&stderrbuf, job, conf.WarningLines) stdoutReader, stdoutWriter := io.Pipe() cmd.Stdout = io.MultiWriter(os.Stdout, stdoutWriter) done := make(chan error) // Track when the job has started so that we don't try and sigterm a nil process started := make(chan struct{}) go func() { defer close(done) finishedProcessingStdout := make(chan error) go func() { finishedProcessingStdout <- streamToGearman(stdoutReader, job) }() if err := cmd.Start(); err != nil { done <- err return } close(started) // Save the cmdErr. We want to process stdout and stderr before we return it cmdErr := cmd.Wait() stdoutWriter.Close() stdoutErr := <-finishedProcessingStdout if cmdErr != nil { done <- cmdErr } else if stdoutErr != nil { done <- stdoutErr } }() <-started // No timeout if conf.CmdTimeout == 0 { select { case err := <-done: // Will be nil if the channel was closed without any errors return err case <-conf.Halt: if err := stopProcess(cmd.Process, conf.CmdTimeout); err != nil { return fmt.Errorf("error stopping process: %s", err) } return fmt.Errorf("killed process due to sigterm") } } select { case err := <-done: // Will be nil if the channel was closed without any errors return err case <-conf.Halt: if err := stopProcess(cmd.Process, conf.CmdTimeout); err != nil { return fmt.Errorf("error stopping process: %s", err) } return nil case <-time.After(conf.CmdTimeout): if err := stopProcess(cmd.Process, 0); err != nil { return fmt.Errorf("error timing out process after %s: %s", conf.CmdTimeout.String(), err) } return fmt.Errorf("process timed out after %s", conf.CmdTimeout.String()) } }