// getJobsByRepGroup gets jobs in the given group (current and complete) func (s *Server) getJobsByRepGroup(q *queue.Queue, repgroup string, limit int, state string, getStd bool, getEnv bool) (jobs []*Job, srerr string, qerr string) { // look in the in-memory queue for matching jobs s.rpl.RLock() for key := range s.rpl.lookup[repgroup] { item, err := q.Get(key) if err == nil && item != nil { job := s.itemToJob(item, false, false) jobs = append(jobs, job) } } s.rpl.RUnlock() // look in the permanent store for matching jobs if state == "" || state == "complete" { var complete []*Job complete, srerr, qerr = s.getCompleteJobsByRepGroup(repgroup) if len(complete) > 0 { // a job is stored in the db with only the single most recent // RepGroup it had, but we're able to retrieve jobs based on any of // the RepGroups it ever had; set the RepGroup to the one the user // requested *** may want to change RepGroup to store a slice of // RepGroups? But that could be massive... for _, cj := range complete { cj.RepGroup = repgroup } jobs = append(jobs, complete...) } } if limit > 0 || state != "" { jobs = s.limitJobs(jobs, limit, state, getStd, getEnv) } return }
// adjust our count of how many jobs with this schedulerGroup we need in the job // scheduler. func (s *Server) decrementGroupCount(schedulerGroup string, q *queue.Queue) { if s.rc != "" { doSchedule := false doTrigger := false s.sgcmutex.Lock() if _, existed := s.sgroupcounts[schedulerGroup]; existed { s.sgroupcounts[schedulerGroup]-- doSchedule = true //log.Printf("decremented group [%s] to %d\n", schedulerGroup, s.sgroupcounts[schedulerGroup]) if count, set := s.sgrouptrigs[schedulerGroup]; set { s.sgrouptrigs[schedulerGroup]++ if count >= 100 { s.sgrouptrigs[schedulerGroup] = 0 if s.sgroupcounts[schedulerGroup] > 10 { doTrigger = true } } } } s.sgcmutex.Unlock() if doTrigger { // we most likely have completed 100 more jobs for this group, so // we'll trigger our ready callback which will re-calculate the // best resource requirements for the remaining jobs in the group // and then call scheduleRunners q.TriggerReadyAddedCallback() } else if doSchedule { // notify the job scheduler we need less jobs for this job's cmd now; // it will remove extraneous ones from its queue s.scheduleRunners(q, schedulerGroup) } } }
// getJobsByKeys gets jobs with the given keys (current and complete) func (s *Server) getJobsByKeys(q *queue.Queue, keys []string, getStd bool, getEnv bool) (jobs []*Job, srerr string, qerr string) { var notfound []string for _, jobkey := range keys { // try and get the job from the in-memory queue item, err := q.Get(jobkey) var job *Job if err == nil && item != nil { job = s.itemToJob(item, getStd, getEnv) } else { notfound = append(notfound, jobkey) } if job != nil { jobs = append(jobs, job) } } if len(notfound) > 0 { // try and get the jobs from the permanent store found, err := s.db.retrieveCompleteJobsByKeys(notfound, getStd, getEnv) if err != nil { srerr = ErrDBError qerr = err.Error() } else if len(found) > 0 { jobs = append(jobs, found...) } } return }
// getJobsCurrent gets all current (incomplete) jobs func (s *Server) getJobsCurrent(q *queue.Queue, limit int, state string, getStd bool, getEnv bool) (jobs []*Job) { for _, item := range q.AllItems() { jobs = append(jobs, s.itemToJob(item, false, false)) } if limit > 0 || state != "" { jobs = s.limitJobs(jobs, limit, state, getStd, getEnv) } return }
// enqueueItems adds new items to a queue, for when we have new jobs to handle. func (s *Server) enqueueItems(q *queue.Queue, itemdefs []*queue.ItemDef) (added int, dups int, err error) { added, dups, err = q.AddMany(itemdefs) if err != nil { return } // add to our lookup of job RepGroup to key s.rpl.Lock() for _, itemdef := range itemdefs { rp := itemdef.Data.(*Job).RepGroup if _, exists := s.rpl.lookup[rp]; !exists { s.rpl.lookup[rp] = make(map[string]bool) } s.rpl.lookup[rp][itemdef.Key] = true } s.rpl.Unlock() return }
// for the many j* methods in handleRequest, we do this common stuff to get // the desired item and job. func (s *Server) getij(cr *clientRequest, q *queue.Queue) (item *queue.Item, job *Job, errs string) { // clientRequest must have a Job if cr.Job == nil { errs = ErrBadRequest return } item, err := q.Get(cr.Job.key()) if err != nil || item.Stats().State != "run" { errs = ErrBadJob return } job = item.Data.(*Job) if !uuid.Equal(cr.ClientID, job.ReservedBy) { errs = ErrMustReserve } return }
func (s *Server) scheduleRunners(q *queue.Queue, group string) { if s.rc == "" { return } s.sgcmutex.Lock() req, hadreq := s.sgtr[group] if !hadreq { s.sgcmutex.Unlock() return } doClear := false groupCount := s.sgroupcounts[group] if groupCount < 0 { s.sgroupcounts[group] = 0 groupCount = 0 doClear = true } s.sgcmutex.Unlock() if !doClear { err := s.scheduler.Schedule(fmt.Sprintf(s.rc, q.Name, group, s.ServerInfo.Deployment, s.ServerInfo.Addr, s.scheduler.ReserveTimeout(), int(s.scheduler.MaxQueueTime(req).Minutes())), req, groupCount) if err != nil { problem := true if serr, ok := err.(scheduler.Error); ok && serr.Err == scheduler.ErrImpossible { // bury all jobs in this scheduler group problem = false rf := func(data interface{}) bool { job := data.(*Job) if job.schedulerGroup == group { return true } return false } s.sgcmutex.Lock() for { item, err := q.ReserveFiltered(rf) if err != nil { problem = true break } if item == nil { break } job := item.Data.(*Job) job.FailReason = FailReasonResource q.Bury(item.Key) s.sgroupcounts[group]-- } s.sgcmutex.Unlock() if !problem { doClear = true } } if problem { // log the error *** and inform (by email) the user about this // problem if it's persistent, once per hour (day?) log.Println(err) // retry the schedule in a while go func() { <-time.After(1 * time.Minute) s.scheduleRunners(q, group) }() return } } } if doClear { //log.Printf("group [%s] count dropped to 0, will clear\n", group) s.clearSchedulerGroup(group, q) } }