Exemplo n.º 1
0
// getJobsByRepGroup gets jobs in the given group (current and complete)
func (s *Server) getJobsByRepGroup(q *queue.Queue, repgroup string, limit int, state string, getStd bool, getEnv bool) (jobs []*Job, srerr string, qerr string) {
	// look in the in-memory queue for matching jobs
	s.rpl.RLock()
	for key := range s.rpl.lookup[repgroup] {
		item, err := q.Get(key)
		if err == nil && item != nil {
			job := s.itemToJob(item, false, false)
			jobs = append(jobs, job)
		}
	}
	s.rpl.RUnlock()

	// look in the permanent store for matching jobs
	if state == "" || state == "complete" {
		var complete []*Job
		complete, srerr, qerr = s.getCompleteJobsByRepGroup(repgroup)
		if len(complete) > 0 {
			// a job is stored in the db with only the single most recent
			// RepGroup it had, but we're able to retrieve jobs based on any of
			// the RepGroups it ever had; set the RepGroup to the one the user
			// requested *** may want to change RepGroup to store a slice of
			// RepGroups? But that could be massive...
			for _, cj := range complete {
				cj.RepGroup = repgroup
			}
			jobs = append(jobs, complete...)
		}
	}

	if limit > 0 || state != "" {
		jobs = s.limitJobs(jobs, limit, state, getStd, getEnv)
	}

	return
}
Exemplo n.º 2
0
// adjust our count of how many jobs with this schedulerGroup we need in the job
// scheduler.
func (s *Server) decrementGroupCount(schedulerGroup string, q *queue.Queue) {
	if s.rc != "" {
		doSchedule := false
		doTrigger := false
		s.sgcmutex.Lock()
		if _, existed := s.sgroupcounts[schedulerGroup]; existed {
			s.sgroupcounts[schedulerGroup]--
			doSchedule = true
			//log.Printf("decremented group [%s] to %d\n", schedulerGroup, s.sgroupcounts[schedulerGroup])
			if count, set := s.sgrouptrigs[schedulerGroup]; set {
				s.sgrouptrigs[schedulerGroup]++
				if count >= 100 {
					s.sgrouptrigs[schedulerGroup] = 0
					if s.sgroupcounts[schedulerGroup] > 10 {
						doTrigger = true
					}
				}
			}
		}
		s.sgcmutex.Unlock()

		if doTrigger {
			// we most likely have completed 100 more jobs for this group, so
			// we'll trigger our ready callback which will re-calculate the
			// best resource requirements for the remaining jobs in the group
			// and then call scheduleRunners
			q.TriggerReadyAddedCallback()
		} else if doSchedule {
			// notify the job scheduler we need less jobs for this job's cmd now;
			// it will remove extraneous ones from its queue
			s.scheduleRunners(q, schedulerGroup)
		}
	}
}
Exemplo n.º 3
0
// getJobsByKeys gets jobs with the given keys (current and complete)
func (s *Server) getJobsByKeys(q *queue.Queue, keys []string, getStd bool, getEnv bool) (jobs []*Job, srerr string, qerr string) {
	var notfound []string
	for _, jobkey := range keys {
		// try and get the job from the in-memory queue
		item, err := q.Get(jobkey)
		var job *Job
		if err == nil && item != nil {
			job = s.itemToJob(item, getStd, getEnv)
		} else {
			notfound = append(notfound, jobkey)
		}

		if job != nil {
			jobs = append(jobs, job)
		}
	}

	if len(notfound) > 0 {
		// try and get the jobs from the permanent store
		found, err := s.db.retrieveCompleteJobsByKeys(notfound, getStd, getEnv)
		if err != nil {
			srerr = ErrDBError
			qerr = err.Error()
		} else if len(found) > 0 {
			jobs = append(jobs, found...)
		}
	}

	return
}
Exemplo n.º 4
0
// getJobsCurrent gets all current (incomplete) jobs
func (s *Server) getJobsCurrent(q *queue.Queue, limit int, state string, getStd bool, getEnv bool) (jobs []*Job) {
	for _, item := range q.AllItems() {
		jobs = append(jobs, s.itemToJob(item, false, false))
	}

	if limit > 0 || state != "" {
		jobs = s.limitJobs(jobs, limit, state, getStd, getEnv)
	}

	return
}
Exemplo n.º 5
0
// enqueueItems adds new items to a queue, for when we have new jobs to handle.
func (s *Server) enqueueItems(q *queue.Queue, itemdefs []*queue.ItemDef) (added int, dups int, err error) {
	added, dups, err = q.AddMany(itemdefs)
	if err != nil {
		return
	}

	// add to our lookup of job RepGroup to key
	s.rpl.Lock()
	for _, itemdef := range itemdefs {
		rp := itemdef.Data.(*Job).RepGroup
		if _, exists := s.rpl.lookup[rp]; !exists {
			s.rpl.lookup[rp] = make(map[string]bool)
		}
		s.rpl.lookup[rp][itemdef.Key] = true
	}
	s.rpl.Unlock()

	return
}
Exemplo n.º 6
0
// for the many j* methods in handleRequest, we do this common stuff to get
// the desired item and job.
func (s *Server) getij(cr *clientRequest, q *queue.Queue) (item *queue.Item, job *Job, errs string) {
	// clientRequest must have a Job
	if cr.Job == nil {
		errs = ErrBadRequest
		return
	}

	item, err := q.Get(cr.Job.key())
	if err != nil || item.Stats().State != "run" {
		errs = ErrBadJob
		return
	}
	job = item.Data.(*Job)

	if !uuid.Equal(cr.ClientID, job.ReservedBy) {
		errs = ErrMustReserve
	}

	return
}
Exemplo n.º 7
0
func (s *Server) scheduleRunners(q *queue.Queue, group string) {
	if s.rc == "" {
		return
	}

	s.sgcmutex.Lock()
	req, hadreq := s.sgtr[group]
	if !hadreq {
		s.sgcmutex.Unlock()
		return
	}

	doClear := false
	groupCount := s.sgroupcounts[group]
	if groupCount < 0 {
		s.sgroupcounts[group] = 0
		groupCount = 0
		doClear = true
	}
	s.sgcmutex.Unlock()

	if !doClear {
		err := s.scheduler.Schedule(fmt.Sprintf(s.rc, q.Name, group, s.ServerInfo.Deployment, s.ServerInfo.Addr, s.scheduler.ReserveTimeout(), int(s.scheduler.MaxQueueTime(req).Minutes())), req, groupCount)
		if err != nil {
			problem := true
			if serr, ok := err.(scheduler.Error); ok && serr.Err == scheduler.ErrImpossible {
				// bury all jobs in this scheduler group
				problem = false
				rf := func(data interface{}) bool {
					job := data.(*Job)
					if job.schedulerGroup == group {
						return true
					}
					return false
				}
				s.sgcmutex.Lock()
				for {
					item, err := q.ReserveFiltered(rf)
					if err != nil {
						problem = true
						break
					}
					if item == nil {
						break
					}
					job := item.Data.(*Job)
					job.FailReason = FailReasonResource
					q.Bury(item.Key)
					s.sgroupcounts[group]--
				}
				s.sgcmutex.Unlock()
				if !problem {
					doClear = true
				}
			}

			if problem {
				// log the error *** and inform (by email) the user about this
				// problem if it's persistent, once per hour (day?)
				log.Println(err)

				// retry the schedule in a while
				go func() {
					<-time.After(1 * time.Minute)
					s.scheduleRunners(q, group)
				}()
				return
			}
		}
	}

	if doClear {
		//log.Printf("group [%s] count dropped to 0, will clear\n", group)
		s.clearSchedulerGroup(group, q)
	}
}