func (cmd *Cmd) Kill() error { switch cmd.State { case Running: cmd.State = Terminated lg.Debugf("Cmd:\tKilling %v\n", cmd.JobID) pgid, err := syscall.Getpgid(cmd.Cmd.Process.Pid) if err != nil { // Fall-back on error. Kill the main process only. cmd.Cmd.Process.Kill() break } // Kill the whole process group. syscall.Kill(-pgid, 15) case Finished: lg.Debug("Cmd:\tKilling pgroup %v\n", cmd.JobID) pgid, err := syscall.Getpgid(cmd.Cmd.Process.Pid) if err != nil { break } // Make sure to kill the whole process group, // so there are no subprocesses left. syscall.Kill(-pgid, 15) case Initialized: // This one is tricky, as the cmd's Start() might have // been called and is already in progress, but the cmd's // state is not Running yet. usCallingStartOnce := false cmd.StartOnce.Do(func() { cmd.WaitOnce.Do(func() { cmd.State = Invalidated cmd.StatusCode = -2 cmd.Err = errors.New("invalidated") lg.Debugf("Cmd: Invalidating %v\n", cmd.JobID) close(cmd.Finished) }) close(cmd.Started) usCallingStartOnce = true }) if !usCallingStartOnce { // It was cmd.Start() that called StartOnce.Do(), not us, // thus we need to wait for Started and try to Kill again: <-cmd.Started cmd.Kill() } } return cmd.Err }
func (qmd *Qmd) ListenQueue() { qmd.WaitListenQueue.Add(1) defer qmd.WaitListenQueue.Done() lg.Debug("Queue:\tListening") for { select { // Wait for some worker to become available. case worker := <-qmd.Workers: // Dequeue job or try again. job, err := qmd.Dequeue() if err != nil { qmd.Workers <- worker break } lg.Debugf("Queue:\tDequeued job %v", job.ID) // Send the job to the worker. worker <- job case <-qmd.ClosingListenQueue: lg.Debug("Queue:\tStopped listening") return } } }
func (cmd *Cmd) waitOnce() { err := cmd.Cmd.Wait() cmd.Duration = time.Since(cmd.StartTime) cmd.EndTime = cmd.StartTime.Add(cmd.Duration) if cmd.State != Terminated { cmd.State = Finished } if err != nil { cmd.Err = err if e, ok := err.(*exec.ExitError); ok { if s, ok := e.Sys().(syscall.WaitStatus); ok { cmd.StatusCode = s.ExitStatus() } } } if f, err := os.Open(cmd.QmdOutFile); err == nil { _, err := cmd.QmdOut.ReadFrom(f) if err != nil { cmd.Err = err } f.Close() } close(cmd.Finished) lg.Debugf("Cmd:\tFinished %v", cmd.JobID) }
func (cmd *Cmd) Cleanup() error { lg.Debugf("Cmd:\tCleaning %v\n", cmd.JobID) // Make sure to kill the whole process group, // so there are no subprocesses left. cmd.Kill() // Remove working directory. err := os.RemoveAll(cmd.Cmd.Dir) if err != nil { return err } return nil }
// Update walks ScriptDir directory for shell scripts and updates the files cache. func (s *Scripts) Update(dir string) error { info, err := os.Stat(dir) if err != nil { return errors.New("script_dir=\"" + dir + "\": no such directory") } if !info.IsDir() { return errors.New("script_dir=\"" + dir + "\": not a directory") } files := map[string]string{} if err := filepath.Walk(dir, func(file string, info os.FileInfo, err error) error { if !info.IsDir() { if path.Ext(file) == ".sh" { rel, err := filepath.Rel(dir, file) if err != nil { return err } files[rel] = file } } return nil }); err != nil { return err } if len(files) == 0 { return errors.New("script_dir=\"" + dir + "\" is empty") } s.Lock() defer s.Unlock() if !reflect.DeepEqual(s.files, files) { lg.Debug("Scripts: Loading new files from script_dir:") for rel, _ := range files { lg.Debugf("Scripts: - %v", rel) } } s.files = files return nil }
func CreateJob(ctx context.Context, w http.ResponseWriter, r *http.Request) { // Low, high and urgent priorities only (high is default). priority := r.URL.Query().Get("priority") switch priority { case "low", "high", "urgent": // NOP. case "": priority = "high" default: http.Error(w, "unknown priority \""+priority+"\"", 422) return } // Decode request data. var req *api.ScriptsRequest err := json.NewDecoder(r.Body).Decode(&req) if err != nil { http.Error(w, "parse request body: "+err.Error(), 422) return } req.Script = chi.URLParams(ctx)["filename"] // Make sure ASYNC callback is valid URL. if req.CallbackURL != "" { req.CallbackURL, err = urlx.NormalizeString(req.CallbackURL) if err != nil { http.Error(w, "parse request body: "+err.Error(), 422) return } } // Enqueue the request. data, err := json.Marshal(req) if err != nil { http.Error(w, err.Error(), 500) return } lg.Debugf("Handler:\tEnqueue \"%v\" request", priority) job, err := Qmd.Enqueue(string(data), priority) if err != nil { http.Error(w, err.Error(), 500) return } // Async. if req.CallbackURL != "" { resp, _ := Qmd.GetAsyncResponse(req, job.ID) w.Write(resp) lg.Debugf("Handler:\tResponded with job %s ASYNC result", job.ID) go func() { //TODO: Retry callback if it failed? err := Qmd.PostResponseCallback(req, job.ID) if err != nil { lg.Errorf("can't post callback to %v", err) } }() return } // Sync. lg.Debugf("Handler:\tWaiting for job %s", job.ID) resp, _ := Qmd.GetResponse(job.ID) w.Write(resp) lg.Debugf("Handler:\tResponded with job %s result", job.ID) // // Kill the job, if client closes the connection before // // it receives the data. // done := make(chan struct{}) // defer close(done) // connClosed := w.(http.CloseNotifier).CloseNotify() // go func() { // select { // case <-connClosed: // job.Kill() // case <-done: // } // }() }
func (cmd *Cmd) startOnce() { lg.Debugf("Cmd:\tStarting %v", cmd.JobID) cmd.Cmd.Dir += "/" + cmd.JobID cmd.QmdOutFile = cmd.Cmd.Dir + "/QMD_OUT" cmd.Cmd.Env = append(os.Environ(), "QMD_TMP="+cmd.Cmd.Dir, "QMD_STORE="+cmd.StoreDir, "QMD_OUT="+cmd.QmdOutFile, ) cmd.Cmd.SysProcAttr = &syscall.SysProcAttr{ Setpgid: true, //TODO: Chroot: cmd.Cmd.Dir, } cmd.Cmd.Stdout = &cmd.CmdOut cmd.Cmd.Stderr = &cmd.CmdOut // Create working directory. err := os.MkdirAll(cmd.Cmd.Dir, 0777) if err != nil { cmd.Err = err } // Create QMD_OUT file. qmdOut, err := os.Create(cmd.QmdOutFile) if err != nil { cmd.Err = err } qmdOut.Close() for file, data := range cmd.ExtraWorkDirFiles { // Must be a simple filename without slashes. if strings.Index(file, "/") != -1 { cmd.Err = errors.New("extra files must not contain any slashes in the path") goto failedToStart } err = ioutil.WriteFile(cmd.Cmd.Dir+"/"+file, []byte(data), 0644) if err != nil { cmd.Err = err goto failedToStart } } if err := cmd.Cmd.Start(); err != nil { cmd.Err = err goto failedToStart } cmd.StartTime = time.Now() cmd.State = Running close(cmd.Started) cmd.Err = nil return failedToStart: cmd.StatusCode = -1 cmd.State = Failed cmd.WaitOnce.Do(func() { close(cmd.Finished) }) close(cmd.Started) lg.Debugf("Cmd:\tFailed to start %v: %v", cmd.JobID, cmd.Err) }
func (qmd *Qmd) startWorker(id int, workers chan Worker) { qmd.WaitWorkers.Add(1) defer qmd.WaitWorkers.Done() worker := make(Worker) for { // Mark this worker as available. workers <- worker select { // Wait for a job. case job := <-worker: msg := fmt.Errorf("Worker %v:\tGot \"%v\" job %v/jobs/%v", id, job.Queue, qmd.Config.URL, job.ID) lg.Error(msg) qmd.Slack.Notify(msg.Error()) var req *api.ScriptsRequest err := json.Unmarshal([]byte(job.Data), &req) if err != nil { qmd.Queue.Ack(job) msg := fmt.Errorf("Worker %v:\tfailed: %v", err) lg.Error(msg) qmd.Slack.Notify(msg.Error()) break } script, err := qmd.GetScript(req.Script) if err != nil { qmd.Queue.Ack(job) msg := fmt.Errorf("Worker %v:\tfailed: %v", err) lg.Error(msg) qmd.Slack.Notify(msg.Error()) break } // Create QMD job to run the command. cmd, err := qmd.Cmd(exec.Command(script, req.Args...)) if err != nil { qmd.Queue.Ack(job) msg := fmt.Errorf("Worker %v:\tfailed: %v", err) lg.Error(msg) qmd.Slack.Notify(msg.Error()) break } cmd.JobID = job.ID cmd.CallbackURL = req.CallbackURL cmd.ExtraWorkDirFiles = req.Files // Run a job. go cmd.Run() <-cmd.Started select { // Wait for the job to finish. case <-cmd.Finished: // Or kill it, if it doesn't finish in a specified time. case <-time.After(time.Duration(qmd.Config.MaxExecTime) * time.Second): cmd.Kill() cmd.Wait() cmd.Cleanup() // Or kill it, if QMD is closing. case <-qmd.ClosingWorkers: lg.Debugf("Worker %d:\tStopping (busy)", id) cmd.Kill() cmd.Cleanup() qmd.Queue.Nack(job) msg := fmt.Errorf("Worker %d:\tNACKed job %v/jobs/%v", id, qmd.Config.URL, job.ID) lg.Error(msg) qmd.Slack.Notify(msg.Error()) return } // Response. resp := api.ScriptsResponse{ ID: job.ID, Script: req.Script, Args: req.Args, Files: req.Files, } // "OK" and "ERR" for backward compatibility. if cmd.StatusCode == 0 { resp.Status = "OK" } else { resp.Status = "ERR" } resp.EndTime = cmd.EndTime resp.Duration = fmt.Sprintf("%f", cmd.Duration.Seconds()) resp.QmdOut = cmd.QmdOut.String() resp.ExecLog = cmd.CmdOut.String() resp.StartTime = cmd.StartTime if cmd.Err != nil { resp.Err = cmd.Err.Error() } qmd.DB.SaveResponse(&resp) qmd.Queue.Ack(job) msg = fmt.Errorf("Worker %v:\tACKed job %v/jobs/%v", id, qmd.Config.URL, job.ID) lg.Error(msg) qmd.Slack.Notify(msg.Error()) case <-qmd.ClosingWorkers: lg.Debugf("Worker %d:\tStopping (idle)", id) return } } }
func (qmd *Qmd) StartWorkers() { lg.Debugf("Starting %v QMD workers", qmd.Config.MaxJobs) for i := 0; i < qmd.Config.MaxJobs; i++ { go qmd.startWorker(i, qmd.Workers) } }