func (s *Server) scheduleRunners(q *queue.Queue, group string) { if s.rc == "" { return } s.sgcmutex.Lock() req, hadreq := s.sgtr[group] if !hadreq { s.sgcmutex.Unlock() return } doClear := false groupCount := s.sgroupcounts[group] if groupCount < 0 { s.sgroupcounts[group] = 0 groupCount = 0 doClear = true } s.sgcmutex.Unlock() if !doClear { err := s.scheduler.Schedule(fmt.Sprintf(s.rc, q.Name, group, s.ServerInfo.Deployment, s.ServerInfo.Addr, s.scheduler.ReserveTimeout(), int(s.scheduler.MaxQueueTime(req).Minutes())), req, groupCount) if err != nil { problem := true if serr, ok := err.(scheduler.Error); ok && serr.Err == scheduler.ErrImpossible { // bury all jobs in this scheduler group problem = false rf := func(data interface{}) bool { job := data.(*Job) if job.schedulerGroup == group { return true } return false } s.sgcmutex.Lock() for { item, err := q.ReserveFiltered(rf) if err != nil { problem = true break } if item == nil { break } job := item.Data.(*Job) job.FailReason = FailReasonResource q.Bury(item.Key) s.sgroupcounts[group]-- } s.sgcmutex.Unlock() if !problem { doClear = true } } if problem { // log the error *** and inform (by email) the user about this // problem if it's persistent, once per hour (day?) log.Println(err) // retry the schedule in a while go func() { <-time.After(1 * time.Minute) s.scheduleRunners(q, group) }() return } } } if doClear { //log.Printf("group [%s] count dropped to 0, will clear\n", group) s.clearSchedulerGroup(group, q) } }