func (as *APIServer) listDistros(w http.ResponseWriter, r *http.Request) { distros, err := distro.Find(distro.BySpawnAllowed()) if err != nil { as.LoggedError(w, r, http.StatusInternalServerError, err) return } distroList := []string{} for _, d := range distros { distroList = append(distroList, d.Id) } as.WriteJSON(w, http.StatusOK, spawnResponse{Distros: distroList}) }
// create a slice of all valid distro names func getDistroIds() ([]string, error) { // create a slice of all known distros distros, err := distro.Find(distro.All) if err != nil { return nil, err } distroIds := []string{} for _, d := range distros { if !util.SliceContains(distroIds, d.Id) { distroIds = append(distroIds, d.Id) } } return distroIds, nil }
// create a slice of all valid distro names func populateDistroIds() *ValidationError { // create a slice of all known distros distros, err := distro.Find(distro.All) if err != nil { return &ValidationError{ Message: fmt.Sprintf("error finding distros: %v", err), Level: Error, } } distroIds = []string{} for _, d := range distros { if !util.SliceContains(distroIds, d.Id) { distroIds = append(distroIds, d.Id) } } return nil }
func UpdateStaticHosts(e *evergreen.Settings) error { distros, err := distro.Find(distro.ByProvider(static.ProviderName)) if err != nil { return err } activeStaticHosts := make([]string, 0) settings := &static.Settings{} for _, d := range distros { err = mapstructure.Decode(d.ProviderSettings, settings) if err != nil { return fmt.Errorf("invalid static settings for '%v'", d.Id) } for _, h := range settings.Hosts { hostInfo, err := util.ParseSSHInfo(h.Name) if err != nil { return err } user := hostInfo.User if user == "" { user = d.User } staticHost := host.Host{ Id: h.Name, User: user, Host: h.Name, Distro: d, CreationTime: time.Now(), Provider: evergreen.HostTypeStatic, StartedBy: evergreen.User, Status: evergreen.HostRunning, Provisioned: true, } // upsert the host _, err = staticHost.Upsert() if err != nil { return err } activeStaticHosts = append(activeStaticHosts, h.Name) } } return host.DecommissionInactiveStaticHosts(activeStaticHosts) }
func (uis *UIServer) listSpawnableDistros(w http.ResponseWriter, r *http.Request) { // load in the distros distros, err := distro.Find(distro.All) if err != nil { uis.LoggedError(w, r, http.StatusInternalServerError, fmt.Errorf("Error loading distros: %v", err)) return } distroList := []map[string]interface{}{} for _, d := range distros { if d.SpawnAllowed { distroList = append(distroList, map[string]interface{}{ "name": d.Id, "userDataFile": d.UserData.File, "userDataValidate": d.UserData.Validate}) } } uis.WriteJSON(w, http.StatusOK, distroList) }
func (uis *UIServer) distrosPage(w http.ResponseWriter, r *http.Request) { projCtx := MustHaveProjectContext(r) distros, err := distro.Find(distro.All) if err != nil { message := fmt.Sprintf("error fetching distros: %v", err) PushFlash(uis.CookieStore, r, w, NewErrorFlash(message)) http.Error(w, message, http.StatusInternalServerError) return } sort.Sort(&sortableDistro{distros}) uis.WriteHTML(w, http.StatusOK, struct { Distros []distro.Distro Keys map[string]string User *user.DBUser ProjectData projectContext Flashes []interface{} }{distros, uis.Settings.Keys, GetUser(r), projCtx, PopFlashes(uis.CookieStore, r, w)}, "base", "distros.html", "base_angular.html", "menu.html") }
// run all monitoring functions func RunAllMonitoring(settings *evergreen.Settings) error { // load in all of the distros distros, err := distro.Find(db.Q{}) if err != nil { return fmt.Errorf("error finding distros: %v", err) } // fetch the project refs, which we will use to get all of the projects projectRefs, err := model.FindAllProjectRefs() if err != nil { return fmt.Errorf("error loading in project refs: %v", err) } // turn the project refs into a map of the project id -> project projects := map[string]model.Project{} for _, ref := range projectRefs { // only monitor projects that are enabled if !ref.Enabled { continue } project, err := model.FindProject("", &ref) // continue on error to stop the whole monitoring process from // being held up if err != nil { evergreen.Logger.Logf(slogger.ERROR, "error finding project %v: %v", ref.Identifier, err) continue } if project == nil { evergreen.Logger.Logf(slogger.ERROR, "no project entry found for"+ " ref %v", ref.Identifier) continue } projects[project.Identifier] = *project } // initialize the task monitor taskMonitor := &TaskMonitor{ flaggingFuncs: defaultTaskFlaggingFuncs, } // clean up any necessary tasks errs := taskMonitor.CleanupTasks(projects) for _, err := range errs { evergreen.Logger.Logf(slogger.ERROR, "Error cleaning up tasks: %v", err) } // initialize the host monitor hostMonitor := &HostMonitor{ flaggingFuncs: defaultHostFlaggingFuncs, monitoringFuncs: defaultHostMonitoringFuncs, } // clean up any necessary hosts errs = hostMonitor.CleanupHosts(distros, settings) for _, err := range errs { evergreen.Logger.Logf(slogger.ERROR, "Error cleaning up hosts: %v", err) } // run monitoring checks errs = hostMonitor.RunMonitoringChecks(settings) for _, err := range errs { evergreen.Logger.Logf(slogger.ERROR, "Error running host monitoring checks: %v", err) } // initialize the notifier notifier := &Notifier{ notificationBuilders: defaultNotificationBuilders, } // send notifications errs = notifier.Notify(settings) for _, err := range errs { evergreen.Logger.Logf(slogger.ERROR, "Error sending notifications: %v", err) } // Do alerts for spawnhosts - collect all hosts expiring in the next 12 hours. // The trigger logic will filter out any hosts that aren't in a notification window, or have // already have alerts sent. now := time.Now() thresholdTime := now.Add(12 * time.Hour) expiringSoonHosts, err := host.Find(host.ByExpiringBetween(now, thresholdTime)) if err != nil { return err } for _, h := range expiringSoonHosts { err := alerts.RunSpawnWarningTriggers(&h) if err != nil { evergreen.Logger.Logf(slogger.ERROR, "Error queueing alert: %v", err) } } return nil }
// Schedule all of the tasks to be run. Works by finding all of the tasks that // are ready to be run, splitting them by distro, prioritizing them, and saving // the per-distro queues. Then determines the number of new hosts to spin up // for each distro, and spins them up. func (self *Scheduler) Schedule() error { // make sure the correct static hosts are in the database evergreen.Logger.Logf(slogger.INFO, "Updating static hosts...") err := model.UpdateStaticHosts(self.Settings) if err != nil { return fmt.Errorf("error updating static hosts: %v", err) } // find all tasks ready to be run evergreen.Logger.Logf(slogger.INFO, "Finding runnable tasks...") runnableTasks, err := self.FindRunnableTasks() if err != nil { return fmt.Errorf("Error finding runnable tasks: %v", err) } evergreen.Logger.Logf(slogger.INFO, "There are %v tasks ready to be run", len(runnableTasks)) // split the tasks by distro tasksByDistro, taskRunDistros, err := self.splitTasksByDistro(runnableTasks) if err != nil { return fmt.Errorf("Error splitting tasks by distro to run on: %v", err) } // load in all of the distros distros, err := distro.Find(distro.All) if err != nil { return fmt.Errorf("Error finding distros: %v", err) } taskIdToMinQueuePos := make(map[string]int) // get the expected run duration of all runnable tasks taskExpectedDuration, err := self.GetExpectedDurations(runnableTasks) if err != nil { return fmt.Errorf("Error getting expected task durations: %v", err) } // prioritize the tasks, one distro at a time taskQueueItems := make(map[string][]model.TaskQueueItem) for _, d := range distros { runnableTasksForDistro := tasksByDistro[d.Id] evergreen.Logger.Logf(slogger.INFO, "Prioritizing %v tasks for distro %v...", len(runnableTasksForDistro), d.Id) prioritizedTasks, err := self.PrioritizeTasks(self.Settings, runnableTasksForDistro) if err != nil { return fmt.Errorf("Error prioritizing tasks: %v", err) } // Update the running minimums of queue position // The value is 1-based primarily so that we can differentiate between // no value and being first in a queue for i, prioritizedTask := range prioritizedTasks { minQueuePos, ok := taskIdToMinQueuePos[prioritizedTask.Id] if ok { taskIdToMinQueuePos[prioritizedTask.Id] = int(math.Min(float64(minQueuePos), float64(i+1))) } else { taskIdToMinQueuePos[prioritizedTask.Id] = i + 1 } } // persist the queue of tasks evergreen.Logger.Logf(slogger.INFO, "Saving task queue for distro %v...", d.Id) queuedTasks, err := self.PersistTaskQueue(d.Id, prioritizedTasks, taskExpectedDuration) if err != nil { return fmt.Errorf("Error saving task queue: %v", err) } // track scheduled time for prioritized tasks err = model.SetTasksScheduledTime(prioritizedTasks, time.Now()) if err != nil { return fmt.Errorf("Error setting scheduled time for prioritized "+ "tasks: %v", err) } taskQueueItems[d.Id] = queuedTasks } err = model.UpdateMinQueuePos(taskIdToMinQueuePos) if err != nil { return fmt.Errorf("Error updating tasks with queue positions: %v", err) } // split distros by name distrosByName := make(map[string]distro.Distro) for _, d := range distros { distrosByName[d.Id] = d } // fetch all hosts, split by distro allHosts, err := host.Find(host.IsLive) if err != nil { return fmt.Errorf("Error finding live hosts: %v", err) } // figure out all hosts we have up - per distro hostsByDistro := make(map[string][]host.Host) for _, liveHost := range allHosts { hostsByDistro[liveHost.Distro.Id] = append(hostsByDistro[liveHost.Distro.Id], liveHost) } // construct the data that will be needed by the host allocator hostAllocatorData := HostAllocatorData{ existingDistroHosts: hostsByDistro, distros: distrosByName, taskQueueItems: taskQueueItems, taskRunDistros: taskRunDistros, projectTaskDurations: taskExpectedDuration, } // figure out how many new hosts we need newHostsNeeded, err := self.NewHostsNeeded(hostAllocatorData, self.Settings) if err != nil { return fmt.Errorf("Error determining how many new hosts are needed: %v", err) } // spawn up the hosts hostsSpawned, err := self.spawnHosts(newHostsNeeded) if err != nil { return fmt.Errorf("Error spawning new hosts: %v", err) } if len(hostsSpawned) != 0 { evergreen.Logger.Logf(slogger.INFO, "Hosts spawned (%v total), by distro: ", len(hostsSpawned)) for distro, hosts := range hostsSpawned { evergreen.Logger.Logf(slogger.INFO, " %v ->", distro) for _, host := range hosts { evergreen.Logger.Logf(slogger.INFO, " %v", host.Id) } } } else { evergreen.Logger.Logf(slogger.INFO, "No new hosts spawned") } return nil }
// Schedule all of the tasks to be run. Works by finding all of the tasks that // are ready to be run, splitting them by distro, prioritizing them, and saving // the per-distro queues. Then determines the number of new hosts to spin up // for each distro, and spins them up. func (s *Scheduler) Schedule() error { // make sure the correct static hosts are in the database evergreen.Logger.Logf(slogger.INFO, "Updating static hosts...") err := model.UpdateStaticHosts(s.Settings) if err != nil { return fmt.Errorf("error updating static hosts: %v", err) } // find all tasks ready to be run evergreen.Logger.Logf(slogger.INFO, "Finding runnable tasks...") runnableTasks, err := s.FindRunnableTasks() if err != nil { return fmt.Errorf("Error finding runnable tasks: %v", err) } evergreen.Logger.Logf(slogger.INFO, "There are %v tasks ready to be run", len(runnableTasks)) // split the tasks by distro tasksByDistro, taskRunDistros, err := s.splitTasksByDistro(runnableTasks) if err != nil { return fmt.Errorf("Error splitting tasks by distro to run on: %v", err) } // load in all of the distros distros, err := distro.Find(distro.All) if err != nil { return fmt.Errorf("Error finding distros: %v", err) } // get the expected run duration of all runnable tasks taskExpectedDuration, err := s.GetExpectedDurations(runnableTasks) if err != nil { return fmt.Errorf("Error getting expected task durations: %v", err) } distroInputChan := make(chan distroSchedulerInput, len(distros)) // put all of the needed input for the distro scheduler into a channel to be read by the // distro scheduling loop for distroId, task := range tasksByDistro { distroInputChan <- distroSchedulerInput{ distroId: distroId, runnableTasksForDistro: task, } } // close the channel to signal that the loop reading from it can terminate close(distroInputChan) workers := runtime.NumCPU() wg := sync.WaitGroup{} wg.Add(workers) // make a channel to collect all of function results from scheduling the distros distroSchedulerResultChan := make(chan *distroSchedulerResult) // for each worker, create a new goroutine for i := 0; i < workers; i++ { go func() { defer wg.Done() // read the inputs for scheduling this distro for d := range distroInputChan { // schedule the distro res := s.scheduleDistro(d.distroId, d.runnableTasksForDistro, taskExpectedDuration) if res.err != nil { evergreen.Logger.Logf(slogger.ERROR, "%v", err) } // write the results out to a results channel distroSchedulerResultChan <- res } }() } // signal the errCollector goroutine that it can terminate it's loop // intialize a map of scheduler events schedulerEvents := map[string]event.TaskQueueInfo{} // prioritize the tasks, one distro at a time taskQueueItems := make(map[string][]model.TaskQueueItem) var errResult error go func() { for res := range distroSchedulerResultChan { if res.err != nil { errResult = fmt.Errorf("error scheduling tasks on distro %v: %v", res.distroId, err) return } schedulerEvents[res.distroId] = res.schedulerEvent taskQueueItems[res.distroId] = res.taskQueueItem } }() if errResult != nil { return errResult } // wait for the distro scheduler goroutines to complete to complete wg.Wait() // wait group has terminated so scheduler channel can be closed close(distroSchedulerResultChan) // split distros by name distrosByName := make(map[string]distro.Distro) for _, d := range distros { distrosByName[d.Id] = d } // fetch all hosts, split by distro allHosts, err := host.Find(host.IsLive) if err != nil { return fmt.Errorf("Error finding live hosts: %v", err) } // figure out all hosts we have up - per distro hostsByDistro := make(map[string][]host.Host) for _, liveHost := range allHosts { hostsByDistro[liveHost.Distro.Id] = append(hostsByDistro[liveHost.Distro.Id], liveHost) } // add the length of the host lists of hosts that are running to the event log. for distroId, hosts := range hostsByDistro { taskQueueInfo := schedulerEvents[distroId] taskQueueInfo.NumHostsRunning = len(hosts) schedulerEvents[distroId] = taskQueueInfo } // construct the data that will be needed by the host allocator hostAllocatorData := HostAllocatorData{ existingDistroHosts: hostsByDistro, distros: distrosByName, taskQueueItems: taskQueueItems, taskRunDistros: taskRunDistros, projectTaskDurations: taskExpectedDuration, } // figure out how many new hosts we need newHostsNeeded, err := s.NewHostsNeeded(hostAllocatorData, s.Settings) if err != nil { return fmt.Errorf("Error determining how many new hosts are needed: %v", err) } // spawn up the hosts hostsSpawned, err := s.spawnHosts(newHostsNeeded) if err != nil { return fmt.Errorf("Error spawning new hosts: %v", err) } if len(hostsSpawned) != 0 { evergreen.Logger.Logf(slogger.INFO, "Hosts spawned (%v total), by distro: ", len(hostsSpawned)) for distro, hosts := range hostsSpawned { evergreen.Logger.Logf(slogger.INFO, " %v ->", distro) for _, host := range hosts { evergreen.Logger.Logf(slogger.INFO, " %v", host.Id) } taskQueueInfo := schedulerEvents[distro] taskQueueInfo.NumHostsRunning += len(hosts) schedulerEvents[distro] = taskQueueInfo } } else { evergreen.Logger.Logf(slogger.INFO, "No new hosts spawned") } for d, t := range schedulerEvents { eventLog := event.SchedulerEventData{ ResourceType: event.ResourceTypeScheduler, TaskQueueInfo: t, DistroId: d, } event.LogSchedulerEvent(eventLog) } return nil }
func (uis *UIServer) allTaskQueues(w http.ResponseWriter, r *http.Request) { projCtx := MustHaveProjectContext(r) taskQueues, err := model.FindAllTaskQueues() if err != nil { uis.LoggedError(w, r, http.StatusInternalServerError, fmt.Errorf("Error finding task queues: %v", err)) return } // find all distros so that we only display task queues of distros that exist. allDistros, err := distro.Find(distro.All.WithFields(distro.IdKey)) if err != nil { message := fmt.Sprintf("error fetching distros: %v", err) http.Error(w, message, http.StatusInternalServerError) return } distroIds := []string{} for _, d := range allDistros { distroIds = append(distroIds, d.Id) } // cached map of version id to relevant patch cachedPatches := map[string]*patch.Patch{} // convert the task queues to the ui versions uiTaskQueues := []uiTaskQueue{} for _, tQ := range taskQueues { asUI := uiTaskQueue{ Distro: tQ.Distro, Queue: []uiTaskQueueItem{}, } if len(tQ.Queue) == 0 { uiTaskQueues = append(uiTaskQueues, asUI) continue } // convert the individual task queue items taskIds := []string{} for _, item := range tQ.Queue { // cache the ids, for fetching the tasks from the db taskIds = append(taskIds, item.Id) queueItemAsUI := uiTaskQueueItem{ Id: item.Id, DisplayName: item.DisplayName, BuildVariant: item.BuildVariant, RevisionOrderNumber: item.RevisionOrderNumber, Requester: item.Requester, Revision: item.Revision, Project: item.Project, ExpectedDuration: item.ExpectedDuration, Priority: item.Priority, } asUI.Queue = append(asUI.Queue, queueItemAsUI) } // find all the relevant tasks tasks, err := task.Find(task.ByIds(taskIds).WithFields(task.VersionKey, task.BuildIdKey)) if err != nil { msg := fmt.Sprintf("Error finding tasks: %v", err) evergreen.Logger.Errorf(slogger.ERROR, msg) http.Error(w, msg, http.StatusInternalServerError) return } // store all of the version and build ids in the relevant task queue // items for _, task := range tasks { // this sucks, but it's because we're not guaranteed the order out // of the db for idx, queueItemAsUI := range asUI.Queue { if queueItemAsUI.Id == task.Id { queueItemAsUI.Version = task.Version queueItemAsUI.Build = task.BuildId asUI.Queue[idx] = queueItemAsUI } } } // add all of the necessary patch info into the relevant task queue // items for idx, queueItemAsUI := range asUI.Queue { if queueItemAsUI.Requester == evergreen.PatchVersionRequester { // fetch the patch, if necessary var p *patch.Patch var ok bool if p, ok = cachedPatches[queueItemAsUI.Version]; ok { queueItemAsUI.User = p.Author asUI.Queue[idx] = queueItemAsUI } else { p, err = patch.FindOne( patch.ByVersion(queueItemAsUI.Version).WithFields(patch.AuthorKey), ) if err != nil { msg := fmt.Sprintf("Error finding patch: %v", err) evergreen.Logger.Errorf(slogger.ERROR, msg) http.Error(w, msg, http.StatusInternalServerError) return } if p == nil { msg := fmt.Sprintf("Couldn't find patch for version %v", queueItemAsUI.Version) evergreen.Logger.Errorf(slogger.ERROR, msg) http.Error(w, msg, http.StatusInternalServerError) return } cachedPatches[queueItemAsUI.Version] = p } queueItemAsUI.User = p.Author asUI.Queue[idx] = queueItemAsUI } } uiTaskQueues = append(uiTaskQueues, asUI) } // add other useful statistics to view alongside queue idleHosts, err := host.Find(host.IsIdle) if err != nil { msg := fmt.Sprintf("Error finding idle hosts: %v", err) evergreen.Logger.Errorf(slogger.ERROR, msg) http.Error(w, msg, http.StatusInternalServerError) return } activeHosts, err := host.Find(host.IsLive) if err != nil { msg := fmt.Sprintf("Error finding active hosts: %v", err) evergreen.Logger.Errorf(slogger.ERROR, msg) http.Error(w, msg, http.StatusInternalServerError) return } idleStaticHostsCount := 0 for _, host := range idleHosts { if host.Provider == evergreen.HostTypeStatic { idleStaticHostsCount++ } } activeStaticHostsCount := 0 for _, host := range activeHosts { if host.Provider == evergreen.HostTypeStatic { activeStaticHostsCount++ } } hostStats := uiHostStatistics{ ActiveHosts: len(activeHosts), ActiveStaticHosts: activeStaticHostsCount, IdleHosts: len(idleHosts), IdleStaticHosts: idleStaticHostsCount, } uis.WriteHTML(w, http.StatusOK, struct { ProjectData projectContext User *user.DBUser Flashes []interface{} Data uiResourceInfo }{projCtx, GetUser(r), []interface{}{}, uiResourceInfo{uiTaskQueues, hostStats, distroIds}}, "base", "task_queues.html", "base_angular.html", "menu.html") }