Beispiel #1
0
// initialize finds out about the local machine. Compatible with linux-like
// systems with /proc/meminfo only!
func (s *local) initialize(config interface{}) (err error) {
	s.config = config.(*ConfigLocal)
	s.maxCores = runtime.NumCPU()
	s.maxRAM, err = s.procMeminfoMBs()
	if err != nil {
		return
	}

	// make our queue
	s.queue = queue.New(localPlace)
	s.running = make(map[string]int)

	// set our functions for use in schedule() and processQueue()
	s.reqCheckFunc = s.reqCheck
	s.canCountFunc = s.canCount
	s.runCmdFunc = s.runCmd

	return
}
Beispiel #2
0
// initialize sets up an openstack scheduler.
func (s *opst) initialize(config interface{}) (err error) {
	s.config = config.(*ConfigOpenStack)
	if s.config.OSRAM == 0 {
		s.config.OSRAM = 2048
	}

	// create a cloud provider for openstack, that we'll use to interact with
	// openstack
	provider, err := cloud.New("openstack", s.config.ResourceName, s.config.SavePath)
	if err != nil {
		return
	}
	s.provider = provider

	err = provider.Deploy(s.config.ServerPorts)
	if err != nil {
		return
	}

	// query our quota maximums for cpu and memory and total number of
	// instances; 0 will mean unlimited
	quota, err := provider.GetQuota()
	if err != nil {
		return
	}
	s.quotaMaxCores = quota.MaxCores
	s.quotaMaxRAM = quota.MaxRAM
	s.quotaMaxInstances = quota.MaxInstances
	if s.config.MaxInstances > 0 && s.config.MaxInstances < s.quotaMaxInstances {
		s.quotaMaxInstances = s.config.MaxInstances
	}

	// initialize our job queue and other trackers
	s.queue = queue.New(localPlace)
	s.running = make(map[string]int)

	// initialise our servers with details of ourself
	s.servers = make(map[string]*cloud.Server)
	maxRAM, err := s.procMeminfoMBs()
	if err != nil {
		return
	}
	usage := du.NewDiskUsage(".")
	s.servers["localhost"] = &cloud.Server{
		IP: "127.0.0.1",
		Flavor: cloud.Flavor{
			RAM:   maxRAM,
			Cores: runtime.NumCPU(),
			Disk:  int(usage.Size() / gb),
		},
	}

	// set our functions for use in schedule() and processQueue()
	s.reqCheckFunc = s.reqCheck
	s.canCountFunc = s.canCount
	s.runCmdFunc = s.runCmd

	// pass through our shell config to our local embed
	s.local.config = &ConfigLocal{Shell: s.config.Shell}

	s.standins = make(map[string]*standin)
	s.stopWaitingToSpawn = make(chan bool)

	return
}
Beispiel #3
0
// getOrCreateQueue returns a queue with the given name, creating one if we
// hadn't already done so.
func (s *Server) getOrCreateQueue(qname string) *queue.Queue {
	s.Lock()
	q, existed := s.qs[qname]
	if !existed {
		q = queue.New(qname)
		s.qs[qname] = q

		// we set a callback for things entering this queue's ready sub-queue.
		// This function will be called in a go routine and receives a slice of
		// all the ready jobs. Based on the requirements, we add to each job a
		// schedulerGroup, which the runners we spawn will be able to pass to
		// ReserveFiltered so that they run the correct jobs for the machine and
		// resource reservations the job scheduler will run them under
		q.SetReadyAddedCallback(func(queuename string, allitemdata []interface{}) {
			if s.drain {
				return
			}

			// calculate, set and count jobs by schedulerGroup
			groups := make(map[string]int)
			recMBs := make(map[string]int)
			recSecs := make(map[string]int)
			noRecGroups := make(map[string]bool)
			for _, inter := range allitemdata {
				job := inter.(*Job)
				// depending on job.Override, get memory and time
				// recommendations, which are rounded to get fewer larger
				// groups
				noRec := false
				if job.Override != 2 {
					var recm int
					var done bool
					var err error
					if recm, done = recMBs[job.ReqGroup]; !done {
						recm, err = s.db.recommendedReqGroupMemory(job.ReqGroup)
						if err == nil {
							recMBs[job.ReqGroup] = recm
						}
					}
					if recm == 0 {
						recm = job.RAM
						noRec = true
					}

					var recs int
					if recs, done = recSecs[job.ReqGroup]; !done {
						recs, err = s.db.recommendedReqGroupTime(job.ReqGroup)
						if err == nil {
							recSecs[job.ReqGroup] = recs
						}
					}
					if recs == 0 {
						recs = int(job.Time.Seconds())
						noRec = true
					}

					if job.Override == 1 {
						if recm > job.RAM {
							job.RAM = recm
						}
						if recs > int(job.Time.Seconds()) {
							job.Time = time.Duration(recs) * time.Second
						}
					} else {
						job.RAM = recm
						job.Time = time.Duration(recs) * time.Second
					}
				}

				// our req will be job memory + 100 to allow some leeway in
				// case the job scheduler calculates used memory differently,
				// and for other memory usage vagaries
				req := &scheduler.Requirements{
					RAM:   job.RAM + 100,
					Time:  job.Time,
					Cores: job.Cores,
					Disk:  0,
					Other: "", // *** how to pass though scheduler extra args?
				}
				job.schedulerGroup = fmt.Sprintf("%d:%.0f:%d", req.RAM, req.Time.Minutes(), req.Cores)

				if s.rc != "" {
					groups[job.schedulerGroup]++

					if noRec {
						noRecGroups[job.schedulerGroup] = true
					}

					s.sgcmutex.Lock()
					if _, set := s.sgtr[job.schedulerGroup]; !set {
						s.sgtr[job.schedulerGroup] = req
					}
					s.sgcmutex.Unlock()
				}
			}

			if s.rc != "" {
				// schedule runners for each group in the job scheduler
				for group, count := range groups {
					// we also keep a count of how many we request for this
					// group, so that when we Archive() or Bury() we can
					// decrement the count and re-call Schedule() to get rid
					// of no-longer-needed pending runners in the job
					// scheduler
					s.sgcmutex.Lock()
					s.sgroupcounts[group] = count
					s.sgcmutex.Unlock()
					s.scheduleRunners(q, group)

					if _, noRec := noRecGroups[group]; noRec && count > 100 {
						s.sgrouptrigs[group] = 0
					}
				}

				// clear out groups we no longer need
				for group := range s.sgroupcounts {
					if _, needed := groups[group]; !needed {
						s.clearSchedulerGroup(group, q)
					}
				}
				for group := range s.sgrouptrigs {
					if _, needed := groups[group]; !needed {
						delete(s.sgrouptrigs, group)
					}
				}
			}
		})

		// we set a callback for things changing in the queue, which lets us
		// update the status webpage with the minimal work and data transfer
		q.SetChangedCallback(func(from string, to string, data []interface{}) {
			if to == "removed" {
				// things are removed from the queue if deleted or completed;
				// disambiguate
				to = "deleted"
				for _, inter := range data {
					job := inter.(*Job)
					if job.State == "complete" {
						to = "complete"
						break
					}
				}
			}

			// overall count
			s.statusCaster.Send(&jstateCount{"+all+", from, to, len(data)})

			// counts per RepGroup
			groups := make(map[string]int)
			for _, inter := range data {
				job := inter.(*Job)
				groups[job.RepGroup]++
			}
			for group, count := range groups {
				s.statusCaster.Send(&jstateCount{group, from, to, count})
			}
		})
	}
	s.Unlock()

	return q
}