Beispiel #1
0
// Schedule all of the tasks to be run.  Works by finding all of the tasks that
// are ready to be run, splitting them by distro, prioritizing them, and saving
// the per-distro queues.  Then determines the number of new hosts to spin up
// for each distro, and spins them up.
func (self *Scheduler) Schedule() error {
	// make sure the correct static hosts are in the database
	evergreen.Logger.Logf(slogger.INFO, "Updating static hosts...")
	err := model.UpdateStaticHosts(self.Settings)
	if err != nil {
		return fmt.Errorf("error updating static hosts: %v", err)
	}

	// find all tasks ready to be run
	evergreen.Logger.Logf(slogger.INFO, "Finding runnable tasks...")

	runnableTasks, err := self.FindRunnableTasks()
	if err != nil {
		return fmt.Errorf("Error finding runnable tasks: %v", err)
	}

	evergreen.Logger.Logf(slogger.INFO, "There are %v tasks ready to be run", len(runnableTasks))

	// split the tasks by distro
	tasksByDistro, taskRunDistros, err := self.splitTasksByDistro(runnableTasks)
	if err != nil {
		return fmt.Errorf("Error splitting tasks by distro to run on: %v", err)
	}

	// load in all of the distros
	distros, err := distro.Find(distro.All)
	if err != nil {
		return fmt.Errorf("Error finding distros: %v", err)
	}

	taskIdToMinQueuePos := make(map[string]int)

	// get the expected run duration of all runnable tasks
	taskExpectedDuration, err := self.GetExpectedDurations(runnableTasks)

	if err != nil {
		return fmt.Errorf("Error getting expected task durations: %v", err)
	}

	// prioritize the tasks, one distro at a time
	taskQueueItems := make(map[string][]model.TaskQueueItem)
	for _, d := range distros {
		runnableTasksForDistro := tasksByDistro[d.Id]
		evergreen.Logger.Logf(slogger.INFO, "Prioritizing %v tasks for distro %v...",
			len(runnableTasksForDistro), d.Id)

		prioritizedTasks, err := self.PrioritizeTasks(self.Settings,
			runnableTasksForDistro)
		if err != nil {
			return fmt.Errorf("Error prioritizing tasks: %v", err)
		}

		// Update the running minimums of queue position
		// The value is 1-based primarily so that we can differentiate between
		// no value and being first in a queue
		for i, prioritizedTask := range prioritizedTasks {
			minQueuePos, ok := taskIdToMinQueuePos[prioritizedTask.Id]
			if ok {
				taskIdToMinQueuePos[prioritizedTask.Id] =
					int(math.Min(float64(minQueuePos), float64(i+1)))
			} else {
				taskIdToMinQueuePos[prioritizedTask.Id] = i + 1
			}
		}

		// persist the queue of tasks
		evergreen.Logger.Logf(slogger.INFO, "Saving task queue for distro %v...",
			d.Id)
		queuedTasks, err := self.PersistTaskQueue(d.Id, prioritizedTasks,
			taskExpectedDuration)
		if err != nil {
			return fmt.Errorf("Error saving task queue: %v", err)
		}

		// track scheduled time for prioritized tasks
		err = model.SetTasksScheduledTime(prioritizedTasks, time.Now())
		if err != nil {
			return fmt.Errorf("Error setting scheduled time for prioritized "+
				"tasks: %v", err)
		}

		taskQueueItems[d.Id] = queuedTasks
	}

	err = model.UpdateMinQueuePos(taskIdToMinQueuePos)
	if err != nil {
		return fmt.Errorf("Error updating tasks with queue positions: %v", err)
	}

	// split distros by name
	distrosByName := make(map[string]distro.Distro)
	for _, d := range distros {
		distrosByName[d.Id] = d
	}

	// fetch all hosts, split by distro
	allHosts, err := host.Find(host.IsLive)
	if err != nil {
		return fmt.Errorf("Error finding live hosts: %v", err)
	}

	// figure out all hosts we have up - per distro
	hostsByDistro := make(map[string][]host.Host)
	for _, liveHost := range allHosts {
		hostsByDistro[liveHost.Distro.Id] = append(hostsByDistro[liveHost.Distro.Id],
			liveHost)
	}

	// construct the data that will be needed by the host allocator
	hostAllocatorData := HostAllocatorData{
		existingDistroHosts:  hostsByDistro,
		distros:              distrosByName,
		taskQueueItems:       taskQueueItems,
		taskRunDistros:       taskRunDistros,
		projectTaskDurations: taskExpectedDuration,
	}

	// figure out how many new hosts we need
	newHostsNeeded, err := self.NewHostsNeeded(hostAllocatorData, self.Settings)
	if err != nil {
		return fmt.Errorf("Error determining how many new hosts are needed: %v",
			err)
	}

	// spawn up the hosts
	hostsSpawned, err := self.spawnHosts(newHostsNeeded)
	if err != nil {
		return fmt.Errorf("Error spawning new hosts: %v", err)
	}

	if len(hostsSpawned) != 0 {
		evergreen.Logger.Logf(slogger.INFO, "Hosts spawned (%v total), by distro: ",
			len(hostsSpawned))
		for distro, hosts := range hostsSpawned {
			evergreen.Logger.Logf(slogger.INFO, "  %v ->", distro)
			for _, host := range hosts {
				evergreen.Logger.Logf(slogger.INFO, "    %v", host.Id)
			}
		}
	} else {
		evergreen.Logger.Logf(slogger.INFO, "No new hosts spawned")
	}

	return nil
}
Beispiel #2
0
// Schedule all of the tasks to be run.  Works by finding all of the tasks that
// are ready to be run, splitting them by distro, prioritizing them, and saving
// the per-distro queues.  Then determines the number of new hosts to spin up
// for each distro, and spins them up.
func (s *Scheduler) Schedule() error {
	// make sure the correct static hosts are in the database
	evergreen.Logger.Logf(slogger.INFO, "Updating static hosts...")

	err := model.UpdateStaticHosts(s.Settings)
	if err != nil {
		return fmt.Errorf("error updating static hosts: %v", err)
	}

	// find all tasks ready to be run
	evergreen.Logger.Logf(slogger.INFO, "Finding runnable tasks...")

	runnableTasks, err := s.FindRunnableTasks()
	if err != nil {
		return fmt.Errorf("Error finding runnable tasks: %v", err)
	}

	evergreen.Logger.Logf(slogger.INFO, "There are %v tasks ready to be run", len(runnableTasks))

	// split the tasks by distro
	tasksByDistro, taskRunDistros, err := s.splitTasksByDistro(runnableTasks)
	if err != nil {
		return fmt.Errorf("Error splitting tasks by distro to run on: %v", err)
	}

	// load in all of the distros
	distros, err := distro.Find(distro.All)
	if err != nil {
		return fmt.Errorf("Error finding distros: %v", err)
	}

	// get the expected run duration of all runnable tasks
	taskExpectedDuration, err := s.GetExpectedDurations(runnableTasks)

	if err != nil {
		return fmt.Errorf("Error getting expected task durations: %v", err)
	}

	distroInputChan := make(chan distroSchedulerInput, len(distros))

	// put all of the needed input for the distro scheduler into a channel to be read by the
	// distro scheduling loop
	for distroId, task := range tasksByDistro {
		distroInputChan <- distroSchedulerInput{
			distroId:               distroId,
			runnableTasksForDistro: task,
		}
	}
	// close the channel to signal that the loop reading from it can terminate
	close(distroInputChan)
	workers := runtime.NumCPU()

	wg := sync.WaitGroup{}
	wg.Add(workers)

	// make a channel to collect all of function results from scheduling the distros
	distroSchedulerResultChan := make(chan *distroSchedulerResult)

	// for each worker, create a new goroutine
	for i := 0; i < workers; i++ {
		go func() {
			defer wg.Done()
			// read the inputs for scheduling this distro
			for d := range distroInputChan {
				// schedule the distro
				res := s.scheduleDistro(d.distroId, d.runnableTasksForDistro, taskExpectedDuration)
				if res.err != nil {
					evergreen.Logger.Logf(slogger.ERROR, "%v", err)
				}

				// write the results out to a results channel
				distroSchedulerResultChan <- res

			}
		}()
	}

	// signal the errCollector goroutine that it can terminate it's loop
	// intialize a map of scheduler events
	schedulerEvents := map[string]event.TaskQueueInfo{}

	// prioritize the tasks, one distro at a time
	taskQueueItems := make(map[string][]model.TaskQueueItem)

	var errResult error
	go func() {
		for res := range distroSchedulerResultChan {
			if res.err != nil {
				errResult = fmt.Errorf("error scheduling tasks on distro %v: %v", res.distroId, err)
				return
			}
			schedulerEvents[res.distroId] = res.schedulerEvent
			taskQueueItems[res.distroId] = res.taskQueueItem
		}
	}()

	if errResult != nil {
		return errResult
	}

	// wait for the distro scheduler goroutines to complete to complete
	wg.Wait()
	// wait group has terminated so scheduler channel can be closed
	close(distroSchedulerResultChan)

	// split distros by name
	distrosByName := make(map[string]distro.Distro)
	for _, d := range distros {
		distrosByName[d.Id] = d
	}

	// fetch all hosts, split by distro
	allHosts, err := host.Find(host.IsLive)
	if err != nil {
		return fmt.Errorf("Error finding live hosts: %v", err)
	}

	// figure out all hosts we have up - per distro
	hostsByDistro := make(map[string][]host.Host)
	for _, liveHost := range allHosts {
		hostsByDistro[liveHost.Distro.Id] = append(hostsByDistro[liveHost.Distro.Id],
			liveHost)
	}

	// add the length of the host lists of hosts that are running to the event log.
	for distroId, hosts := range hostsByDistro {
		taskQueueInfo := schedulerEvents[distroId]
		taskQueueInfo.NumHostsRunning = len(hosts)
		schedulerEvents[distroId] = taskQueueInfo
	}

	// construct the data that will be needed by the host allocator
	hostAllocatorData := HostAllocatorData{
		existingDistroHosts:  hostsByDistro,
		distros:              distrosByName,
		taskQueueItems:       taskQueueItems,
		taskRunDistros:       taskRunDistros,
		projectTaskDurations: taskExpectedDuration,
	}

	// figure out how many new hosts we need
	newHostsNeeded, err := s.NewHostsNeeded(hostAllocatorData, s.Settings)
	if err != nil {
		return fmt.Errorf("Error determining how many new hosts are needed: %v",
			err)
	}

	// spawn up the hosts
	hostsSpawned, err := s.spawnHosts(newHostsNeeded)
	if err != nil {
		return fmt.Errorf("Error spawning new hosts: %v", err)
	}

	if len(hostsSpawned) != 0 {
		evergreen.Logger.Logf(slogger.INFO, "Hosts spawned (%v total), by distro: ",
			len(hostsSpawned))
		for distro, hosts := range hostsSpawned {
			evergreen.Logger.Logf(slogger.INFO, "  %v ->", distro)
			for _, host := range hosts {
				evergreen.Logger.Logf(slogger.INFO, "    %v", host.Id)
			}

			taskQueueInfo := schedulerEvents[distro]
			taskQueueInfo.NumHostsRunning += len(hosts)
			schedulerEvents[distro] = taskQueueInfo
		}
	} else {
		evergreen.Logger.Logf(slogger.INFO, "No new hosts spawned")
	}

	for d, t := range schedulerEvents {
		eventLog := event.SchedulerEventData{
			ResourceType:  event.ResourceTypeScheduler,
			TaskQueueInfo: t,
			DistroId:      d,
		}
		event.LogSchedulerEvent(eventLog)
	}

	return nil
}