// Schedule all of the tasks to be run. Works by finding all of the tasks that // are ready to be run, splitting them by distro, prioritizing them, and saving // the per-distro queues. Then determines the number of new hosts to spin up // for each distro, and spins them up. func (self *Scheduler) Schedule() error { // make sure the correct static hosts are in the database evergreen.Logger.Logf(slogger.INFO, "Updating static hosts...") err := model.UpdateStaticHosts(self.Settings) if err != nil { return fmt.Errorf("error updating static hosts: %v", err) } // find all tasks ready to be run evergreen.Logger.Logf(slogger.INFO, "Finding runnable tasks...") runnableTasks, err := self.FindRunnableTasks() if err != nil { return fmt.Errorf("Error finding runnable tasks: %v", err) } evergreen.Logger.Logf(slogger.INFO, "There are %v tasks ready to be run", len(runnableTasks)) // split the tasks by distro tasksByDistro, taskRunDistros, err := self.splitTasksByDistro(runnableTasks) if err != nil { return fmt.Errorf("Error splitting tasks by distro to run on: %v", err) } // load in all of the distros distros, err := distro.Find(distro.All) if err != nil { return fmt.Errorf("Error finding distros: %v", err) } taskIdToMinQueuePos := make(map[string]int) // get the expected run duration of all runnable tasks taskExpectedDuration, err := self.GetExpectedDurations(runnableTasks) if err != nil { return fmt.Errorf("Error getting expected task durations: %v", err) } // prioritize the tasks, one distro at a time taskQueueItems := make(map[string][]model.TaskQueueItem) for _, d := range distros { runnableTasksForDistro := tasksByDistro[d.Id] evergreen.Logger.Logf(slogger.INFO, "Prioritizing %v tasks for distro %v...", len(runnableTasksForDistro), d.Id) prioritizedTasks, err := self.PrioritizeTasks(self.Settings, runnableTasksForDistro) if err != nil { return fmt.Errorf("Error prioritizing tasks: %v", err) } // Update the running minimums of queue position // The value is 1-based primarily so that we can differentiate between // no value and being first in a queue for i, prioritizedTask := range prioritizedTasks { minQueuePos, ok := taskIdToMinQueuePos[prioritizedTask.Id] if ok { taskIdToMinQueuePos[prioritizedTask.Id] = int(math.Min(float64(minQueuePos), float64(i+1))) } else { taskIdToMinQueuePos[prioritizedTask.Id] = i + 1 } } // persist the queue of tasks evergreen.Logger.Logf(slogger.INFO, "Saving task queue for distro %v...", d.Id) queuedTasks, err := self.PersistTaskQueue(d.Id, prioritizedTasks, taskExpectedDuration) if err != nil { return fmt.Errorf("Error saving task queue: %v", err) } // track scheduled time for prioritized tasks err = model.SetTasksScheduledTime(prioritizedTasks, time.Now()) if err != nil { return fmt.Errorf("Error setting scheduled time for prioritized "+ "tasks: %v", err) } taskQueueItems[d.Id] = queuedTasks } err = model.UpdateMinQueuePos(taskIdToMinQueuePos) if err != nil { return fmt.Errorf("Error updating tasks with queue positions: %v", err) } // split distros by name distrosByName := make(map[string]distro.Distro) for _, d := range distros { distrosByName[d.Id] = d } // fetch all hosts, split by distro allHosts, err := host.Find(host.IsLive) if err != nil { return fmt.Errorf("Error finding live hosts: %v", err) } // figure out all hosts we have up - per distro hostsByDistro := make(map[string][]host.Host) for _, liveHost := range allHosts { hostsByDistro[liveHost.Distro.Id] = append(hostsByDistro[liveHost.Distro.Id], liveHost) } // construct the data that will be needed by the host allocator hostAllocatorData := HostAllocatorData{ existingDistroHosts: hostsByDistro, distros: distrosByName, taskQueueItems: taskQueueItems, taskRunDistros: taskRunDistros, projectTaskDurations: taskExpectedDuration, } // figure out how many new hosts we need newHostsNeeded, err := self.NewHostsNeeded(hostAllocatorData, self.Settings) if err != nil { return fmt.Errorf("Error determining how many new hosts are needed: %v", err) } // spawn up the hosts hostsSpawned, err := self.spawnHosts(newHostsNeeded) if err != nil { return fmt.Errorf("Error spawning new hosts: %v", err) } if len(hostsSpawned) != 0 { evergreen.Logger.Logf(slogger.INFO, "Hosts spawned (%v total), by distro: ", len(hostsSpawned)) for distro, hosts := range hostsSpawned { evergreen.Logger.Logf(slogger.INFO, " %v ->", distro) for _, host := range hosts { evergreen.Logger.Logf(slogger.INFO, " %v", host.Id) } } } else { evergreen.Logger.Logf(slogger.INFO, "No new hosts spawned") } return nil }
// Schedule all of the tasks to be run. Works by finding all of the tasks that // are ready to be run, splitting them by distro, prioritizing them, and saving // the per-distro queues. Then determines the number of new hosts to spin up // for each distro, and spins them up. func (s *Scheduler) Schedule() error { // make sure the correct static hosts are in the database evergreen.Logger.Logf(slogger.INFO, "Updating static hosts...") err := model.UpdateStaticHosts(s.Settings) if err != nil { return fmt.Errorf("error updating static hosts: %v", err) } // find all tasks ready to be run evergreen.Logger.Logf(slogger.INFO, "Finding runnable tasks...") runnableTasks, err := s.FindRunnableTasks() if err != nil { return fmt.Errorf("Error finding runnable tasks: %v", err) } evergreen.Logger.Logf(slogger.INFO, "There are %v tasks ready to be run", len(runnableTasks)) // split the tasks by distro tasksByDistro, taskRunDistros, err := s.splitTasksByDistro(runnableTasks) if err != nil { return fmt.Errorf("Error splitting tasks by distro to run on: %v", err) } // load in all of the distros distros, err := distro.Find(distro.All) if err != nil { return fmt.Errorf("Error finding distros: %v", err) } // get the expected run duration of all runnable tasks taskExpectedDuration, err := s.GetExpectedDurations(runnableTasks) if err != nil { return fmt.Errorf("Error getting expected task durations: %v", err) } distroInputChan := make(chan distroSchedulerInput, len(distros)) // put all of the needed input for the distro scheduler into a channel to be read by the // distro scheduling loop for distroId, task := range tasksByDistro { distroInputChan <- distroSchedulerInput{ distroId: distroId, runnableTasksForDistro: task, } } // close the channel to signal that the loop reading from it can terminate close(distroInputChan) workers := runtime.NumCPU() wg := sync.WaitGroup{} wg.Add(workers) // make a channel to collect all of function results from scheduling the distros distroSchedulerResultChan := make(chan *distroSchedulerResult) // for each worker, create a new goroutine for i := 0; i < workers; i++ { go func() { defer wg.Done() // read the inputs for scheduling this distro for d := range distroInputChan { // schedule the distro res := s.scheduleDistro(d.distroId, d.runnableTasksForDistro, taskExpectedDuration) if res.err != nil { evergreen.Logger.Logf(slogger.ERROR, "%v", err) } // write the results out to a results channel distroSchedulerResultChan <- res } }() } // signal the errCollector goroutine that it can terminate it's loop // intialize a map of scheduler events schedulerEvents := map[string]event.TaskQueueInfo{} // prioritize the tasks, one distro at a time taskQueueItems := make(map[string][]model.TaskQueueItem) var errResult error go func() { for res := range distroSchedulerResultChan { if res.err != nil { errResult = fmt.Errorf("error scheduling tasks on distro %v: %v", res.distroId, err) return } schedulerEvents[res.distroId] = res.schedulerEvent taskQueueItems[res.distroId] = res.taskQueueItem } }() if errResult != nil { return errResult } // wait for the distro scheduler goroutines to complete to complete wg.Wait() // wait group has terminated so scheduler channel can be closed close(distroSchedulerResultChan) // split distros by name distrosByName := make(map[string]distro.Distro) for _, d := range distros { distrosByName[d.Id] = d } // fetch all hosts, split by distro allHosts, err := host.Find(host.IsLive) if err != nil { return fmt.Errorf("Error finding live hosts: %v", err) } // figure out all hosts we have up - per distro hostsByDistro := make(map[string][]host.Host) for _, liveHost := range allHosts { hostsByDistro[liveHost.Distro.Id] = append(hostsByDistro[liveHost.Distro.Id], liveHost) } // add the length of the host lists of hosts that are running to the event log. for distroId, hosts := range hostsByDistro { taskQueueInfo := schedulerEvents[distroId] taskQueueInfo.NumHostsRunning = len(hosts) schedulerEvents[distroId] = taskQueueInfo } // construct the data that will be needed by the host allocator hostAllocatorData := HostAllocatorData{ existingDistroHosts: hostsByDistro, distros: distrosByName, taskQueueItems: taskQueueItems, taskRunDistros: taskRunDistros, projectTaskDurations: taskExpectedDuration, } // figure out how many new hosts we need newHostsNeeded, err := s.NewHostsNeeded(hostAllocatorData, s.Settings) if err != nil { return fmt.Errorf("Error determining how many new hosts are needed: %v", err) } // spawn up the hosts hostsSpawned, err := s.spawnHosts(newHostsNeeded) if err != nil { return fmt.Errorf("Error spawning new hosts: %v", err) } if len(hostsSpawned) != 0 { evergreen.Logger.Logf(slogger.INFO, "Hosts spawned (%v total), by distro: ", len(hostsSpawned)) for distro, hosts := range hostsSpawned { evergreen.Logger.Logf(slogger.INFO, " %v ->", distro) for _, host := range hosts { evergreen.Logger.Logf(slogger.INFO, " %v", host.Id) } taskQueueInfo := schedulerEvents[distro] taskQueueInfo.NumHostsRunning += len(hosts) schedulerEvents[distro] = taskQueueInfo } } else { evergreen.Logger.Logf(slogger.INFO, "No new hosts spawned") } for d, t := range schedulerEvents { eventLog := event.SchedulerEventData{ ResourceType: event.ResourceTypeScheduler, TaskQueueInfo: t, DistroId: d, } event.LogSchedulerEvent(eventLog) } return nil }