Ejemplo n.º 1
0
// monitorReachability is a hostMonitoringFunc responsible for seeing if
// hosts are reachable or not. returns a slice of any errors that occur
func monitorReachability(settings *evergreen.Settings) []error {

	evergreen.Logger.Logf(slogger.INFO, "Running reachability checks...")

	// used to store any errors that occur
	var errors []error

	// fetch all hosts that have not been checked recently
	// (> 10 minutes ago)
	threshold := time.Now().Add(-ReachabilityCheckInterval)
	hosts, err := host.Find(host.ByNotMonitoredSince(threshold))
	if err != nil {
		errors = append(errors, fmt.Errorf("error finding hosts not"+
			" monitored recently: %v", err))
		return errors
	}

	// check all of the hosts. continue on error so that other hosts can be
	// checked successfully
	for _, host := range hosts {

		if err := checkHostReachability(host, settings); err != nil {
			errors = append(errors, fmt.Errorf("error checking reachability"+
				" for host %v: %v", host.Id, err))
			continue
		}

	}

	evergreen.Logger.Logf(slogger.INFO, "Finished running host reachability checks")

	return errors
}
Ejemplo n.º 2
0
// flagDecommissionedHosts is a hostFlaggingFunc to get all hosts which should
// be terminated because they are decommissioned
func flagDecommissionedHosts(d []distro.Distro, s *evergreen.Settings) ([]host.Host, error) {
	hosts, err := host.Find(host.IsDecommissioned)
	if err != nil {
		return nil, fmt.Errorf("error finding decommissioned hosts: %v", err)
	}
	return hosts, nil
}
Ejemplo n.º 3
0
// monitorReachability is a hostMonitoringFunc responsible for seeing if
// hosts are reachable or not. returns a slice of any errors that occur
func monitorReachability(settings *evergreen.Settings) []error {

	evergreen.Logger.Logf(slogger.INFO, "Running reachability checks...")

	// used to store any errors that occur
	var errors []error

	// fetch all hosts that have not been checked recently
	// (> 10 minutes ago)
	threshold := time.Now().Add(-ReachabilityCheckInterval)
	hosts, err := host.Find(host.ByNotMonitoredSince(threshold))
	if err != nil {
		errors = append(errors, fmt.Errorf("error finding hosts not monitored recently: %v", err))
		return errors
	}

	workers := NumReachabilityWorkers
	if len(hosts) < workers {
		workers = len(hosts)
	}

	wg := sync.WaitGroup{}

	wg.Add(workers)

	hostsChan := make(chan host.Host, workers)
	errChan := make(chan error, workers)

	for i := 0; i < workers; i++ {
		go func() {
			defer wg.Done()
			for host := range hostsChan {
				if err := checkHostReachability(host, settings); err != nil {
					errChan <- err
				}
			}
		}()
	}

	errDone := make(chan struct{})
	go func() {
		defer close(errDone)
		for err := range errChan {
			errors = append(errors, fmt.Errorf("error checking reachability: %v", err))
		}
	}()

	// check all of the hosts. continue on error so that other hosts can be
	// checked successfully
	for _, host := range hosts {
		hostsChan <- host
	}
	close(hostsChan)
	wg.Wait()
	close(errChan)

	<-errDone
	return errors
}
Ejemplo n.º 4
0
// Call out to the embedded CloudManager to spawn hosts.  Takes in a map of
// distro -> number of hosts to spawn for the distro.
// Returns a map of distro -> hosts spawned, and an error if one occurs.
func (s *Scheduler) spawnHosts(newHostsNeeded map[string]int) (
	map[string][]host.Host, error) {

	// loop over the distros, spawning up the appropriate number of hosts
	// for each distro
	hostsSpawnedPerDistro := make(map[string][]host.Host)
	for distroId, numHostsToSpawn := range newHostsNeeded {

		if numHostsToSpawn == 0 {
			continue
		}

		hostsSpawnedPerDistro[distroId] = make([]host.Host, 0, numHostsToSpawn)
		for i := 0; i < numHostsToSpawn; i++ {
			d, err := distro.FindOne(distro.ById(distroId))
			if err != nil {
				evergreen.Logger.Logf(slogger.ERROR, "Failed to find distro '%v': %v", distroId, err)
			}

			allDistroHosts, err := host.Find(host.ByDistroId(distroId))
			if err != nil {
				evergreen.Logger.Logf(slogger.ERROR, "Error getting hosts for distro %v: %v", distroId, err)
				continue
			}

			if len(allDistroHosts) >= d.PoolSize {
				evergreen.Logger.Logf(slogger.ERROR, "Already at max (%v) hosts for distro '%v'",
					distroId,
					d.PoolSize)
				continue
			}

			cloudManager, err := providers.GetCloudManager(d.Provider, s.Settings)
			if err != nil {
				evergreen.Logger.Errorf(slogger.ERROR, "Error getting cloud manager for distro: %v", err)
				continue
			}

			hostOptions := cloud.HostOptions{
				UserName: evergreen.User,
				UserHost: false,
			}
			newHost, err := cloudManager.SpawnInstance(d, hostOptions)
			if err != nil {
				evergreen.Logger.Errorf(slogger.ERROR, "Error spawning instance: %v,",
					err)
				continue
			}
			hostsSpawnedPerDistro[distroId] =
				append(hostsSpawnedPerDistro[distroId], *newHost)

		}
		// if none were spawned successfully
		if len(hostsSpawnedPerDistro[distroId]) == 0 {
			delete(hostsSpawnedPerDistro, distroId)
		}
	}
	return hostsSpawnedPerDistro, nil
}
Ejemplo n.º 5
0
// FindAvailableHosts finds all hosts available to have a task run on them.
// It fetches hosts from the database whose status is "running" and who have
// no task currently being run on them.
func (self *DBHostFinder) FindAvailableHosts() ([]host.Host, error) {
	// find and return any hosts not currently running a task
	availableHosts, err := host.Find(host.IsAvailableAndFree)
	if err != nil {
		return nil, err
	}
	return availableHosts, nil
}
Ejemplo n.º 6
0
// flagUnreachableHosts is a hostFlaggingFunc to get all hosts which should
// be terminated because they are unreachable
func flagUnreachableHosts(d []distro.Distro, s *evergreen.Settings) ([]host.Host, error) {
	threshold := time.Now().Add(-1 * UnreachableCutoff)
	hosts, err := host.Find(host.ByUnreachableBefore(threshold))
	if err != nil {
		return nil, fmt.Errorf("error finding hosts unreachable since before %v: %v", threshold, err)
	}
	return hosts, nil
}
Ejemplo n.º 7
0
// FindAvailableHostsForDistro finds all hosts of a certain distro
// available to have a task run on them.
func (self *DBHostFinder) FindAvailableHostsForDistro(d string) ([]host.Host, error) {
	// find and return any hosts not currently running a task
	availableHosts, err := host.Find(host.ByAvailableForDistro(d))
	if err != nil {
		return nil, err
	}
	return availableHosts, nil
}
Ejemplo n.º 8
0
func TestCreateHostBuckets(t *testing.T) {
	testutil.HandleTestingErr(db.ClearCollections(host.Collection), t, "couldnt reset host")
	Convey("With a starting time and a minute bucket size and inserting dynamic hosts with different time frames", t, func() {
		now := time.Now()
		bucketSize := time.Duration(10) * time.Second

		// -20 -> 20
		beforeStartHost := host.Host{Id: "beforeStartHost", CreationTime: now.Add(time.Duration(-20) * time.Second), TerminationTime: now.Add(time.Duration(20) * time.Second), Provider: "ec2"}
		So(beforeStartHost.Insert(), ShouldBeNil)

		// 80 -> 120
		afterEndHost := host.Host{Id: "afterEndHost", CreationTime: now.Add(time.Duration(80) * time.Second), TerminationTime: now.Add(time.Duration(120) * time.Second), Provider: "ec2"}
		So(afterEndHost.Insert(), ShouldBeNil)

		// 20 -> 40
		h1 := host.Host{Id: "h1", CreationTime: now.Add(time.Duration(20) * time.Second), TerminationTime: now.Add(time.Duration(40) * time.Second), Provider: "ec2"}
		So(h1.Insert(), ShouldBeNil)

		// 10 -> 80
		h2 := host.Host{Id: "h2", CreationTime: now.Add(time.Duration(10) * time.Second), TerminationTime: now.Add(time.Duration(80) * time.Second), Provider: "ec2"}
		So(h2.Insert(), ShouldBeNil)

		// 20 ->
		h3 := host.Host{Id: "h3", CreationTime: now.Add(time.Duration(20) * time.Second), TerminationTime: util.ZeroTime, Provider: "ec2", Status: evergreen.HostRunning}
		So(h3.Insert(), ShouldBeNil)

		// 5 -> 7
		sameBucket := host.Host{Id: "sameBucket", CreationTime: now.Add(time.Duration(5) * time.Second), TerminationTime: now.Add(time.Duration(7) * time.Second), Provider: "ec2"}
		So(sameBucket.Insert(), ShouldBeNil)

		// 5 -> 30
		h4 := host.Host{Id: "h4", CreationTime: now.Add(time.Duration(5) * time.Second), TerminationTime: now.Add(time.Duration(30) * time.Second), Provider: "ec2"}
		So(h4.Insert(), ShouldBeNil)

		Convey("for three buckets of 10 seconds, should only retrieve pertinent host docs", func() {

			endTime := now.Add(time.Duration(30) * time.Second)
			hosts, err := host.Find(host.ByDynamicWithinTime(now, endTime))
			So(err, ShouldBeNil)
			So(len(hosts), ShouldEqual, 6)
			frameBounds := FrameBounds{
				StartTime:     now,
				EndTime:       endTime,
				BucketSize:    bucketSize,
				NumberBuckets: 3,
			}
			Convey("should create the correct buckets and bucket time accordingly", func() {
				buckets, errors := CreateHostBuckets(hosts, frameBounds)
				So(errors, ShouldBeEmpty)
				So(len(buckets), ShouldEqual, 3)
				So(int(buckets[0].TotalTime.Seconds()), ShouldEqual, 17)
				So(int(buckets[1].TotalTime.Seconds()), ShouldEqual, 30)
				So(int(math.Ceil(buckets[2].TotalTime.Seconds())), ShouldEqual, 40)
			})
		})

	})
}
Ejemplo n.º 9
0
// flagExpiredHosts is a hostFlaggingFunc to get all user-spawned hosts
// that have expired
func flagExpiredHosts(d []distro.Distro, s *evergreen.Settings) ([]host.Host, error) {
	// fetch the expired hosts
	hosts, err := host.Find(host.ByExpiredSince(time.Now()))
	if err != nil {
		return nil, fmt.Errorf("error finding expired spawned hosts: %v", err)
	}
	return hosts, nil

}
Ejemplo n.º 10
0
// flagUnprovisionedHosts is a hostFlaggingFunc to get all hosts that are
// taking too long to provision
func flagUnprovisionedHosts(d []distro.Distro, s *evergreen.Settings) ([]host.Host, error) {
	// fetch all hosts that are taking too long to provision
	threshold := time.Now().Add(-ProvisioningCutoff)
	hosts, err := host.Find(host.ByUnprovisionedSince(threshold))
	if err != nil {
		return nil, fmt.Errorf("error finding unprovisioned hosts: %v", err)
	}
	return hosts, err
}
Ejemplo n.º 11
0
// flagProvisioningFailedHosts is a hostFlaggingFunc to get all hosts
// whose provisioning failed
func flagProvisioningFailedHosts(d []distro.Distro, s *evergreen.Settings) ([]host.Host, error) {
	// fetch all hosts whose provisioning failed
	hosts, err := host.Find(host.IsProvisioningFailure)
	if err != nil {
		return nil, fmt.Errorf("error finding hosts whose provisioning"+
			" failed: %v", err)
	}
	return hosts, nil

}
Ejemplo n.º 12
0
func (uis *UIServer) modifyHosts(w http.ResponseWriter, r *http.Request) {
	_ = MustHaveUser(r)

	opts := &uiParams{}
	err := util.ReadJSONInto(r.Body, opts)
	if err != nil {
		http.Error(w, err.Error(), http.StatusBadRequest)
		return
	}

	hostIds := opts.HostIds
	if len(hostIds) == 1 && strings.TrimSpace(hostIds[0]) == "" {
		http.Error(w, "No host ID's found in request", http.StatusBadRequest)
		return
	}

	// fetch all relevant hosts
	hosts, err := host.Find(host.ByIds(hostIds))

	if err != nil {
		uis.LoggedError(w, r, http.StatusInternalServerError, fmt.Errorf("Error finding hosts: %v", err))
		return
	}
	if len(hosts) == 0 {
		http.Error(w, "No matching hosts found.", http.StatusBadRequest)
		return
	}

	// determine what action needs to be taken
	switch opts.Action {
	case "updateStatus":
		newStatus := opts.Status
		if !util.SliceContains(validUpdateToStatuses, newStatus) {
			http.Error(w, fmt.Sprintf("Invalid status: %v", opts.Status), http.StatusBadRequest)
			return
		}
		numHostsUpdated := 0

		for _, host := range hosts {
			err := host.SetStatus(newStatus)
			if err != nil {
				uis.LoggedError(w, r, http.StatusInternalServerError, fmt.Errorf("Error updating host %v", err))
				return
			}
			numHostsUpdated += 1
		}
		msg := NewSuccessFlash(fmt.Sprintf("%v host(s) status successfully updated to '%v'",
			numHostsUpdated, newStatus))
		PushFlash(uis.CookieStore, r, w, msg)
		return
	default:
		http.Error(w, fmt.Sprintf("Unrecognized action: %v", opts.Action), http.StatusBadRequest)
		return
	}
}
Ejemplo n.º 13
0
// slowProvisioningWarnings is a notificationBuilder to build any necessary
// warnings about hosts that are taking a long time to provision
func slowProvisioningWarnings(settings *evergreen.Settings) ([]notification,
	error) {

	evergreen.Logger.Logf(slogger.INFO, "Building warnings for hosts taking a long"+
		" time to provision...")

	if settings.Notify.SMTP == nil {
		return []notification{}, fmt.Errorf("no notification emails configured")
	}

	// fetch all hosts that are taking too long to provision
	threshold := time.Now().Add(-slowProvisioningThreshold)
	hosts, err := host.Find(host.ByUnprovisionedSince(threshold))
	if err != nil {
		return nil, fmt.Errorf("error finding unprovisioned hosts: %v", err)
	}

	// the list of warning notifications that will be returned
	warnings := []notification{}

	for _, h := range hosts {

		// if a warning has been sent for the host, skip it
		if h.Notifications[slowProvisioningWarning] {
			continue
		}

		evergreen.Logger.Logf(slogger.INFO, "Slow-provisioning warning needs to"+
			" be sent for host %v", h.Id)

		// build the notification
		hostNotification := notification{
			recipient: settings.Notify.SMTP.AdminEmail[0],
			subject: fmt.Sprintf("Host %v taking a long time to provision",
				h.Id),
			message: fmt.Sprintf("See %v/ui/host/%v",
				settings.Ui.Url, h.Id),
			threshold: slowProvisioningWarning,
			host:      h,
			callback: func(h host.Host, s string) error {
				return h.SetExpirationNotification(s)
			},
		}

		// add it to the final list
		warnings = append(warnings, hostNotification)

	}

	evergreen.Logger.Logf(slogger.INFO, "Built %v warnings about hosts taking a"+
		" long time to provision", len(warnings))

	return warnings, nil
}
Ejemplo n.º 14
0
// CreateAllHostUtilizationBuckets aggregates each bucket by creating a time frame given the number of days back
// and the granularity wanted (ie. days, minutes, seconds, hours) all in seconds. It returns a list of Host utilization
// information for each bucket.
func CreateAllHostUtilizationBuckets(daysBack, granularity int) ([]HostUtilizationBucket, error) {
	bounds := CalculateBounds(daysBack, granularity)
	// find non-static hosts
	dynamicHosts, err := host.Find(host.ByDynamicWithinTime(bounds.StartTime, bounds.EndTime))
	if err != nil {
		return nil, err
	}
	// find static hosts
	staticHosts, err := host.Find(host.AllStatic)
	if err != nil {
		return nil, err
	}

	dynamicBuckets, _ := CreateHostBuckets(dynamicHosts, bounds)
	staticBuckets, _ := CreateHostBuckets(staticHosts, bounds)

	tasks, err := task.Find(task.ByTimeRun(bounds.StartTime, bounds.EndTime).WithFields(task.StartTimeKey, task.FinishTimeKey, task.HostIdKey))
	if err != nil {
		return nil, err
	}

	oldTasks, err := task.FindOld(task.ByTimeRun(bounds.StartTime, bounds.EndTime))
	if err != nil {
		return nil, err
	}

	taskBuckets, _ := CreateTaskBuckets(tasks, oldTasks, bounds)
	bucketData := []HostUtilizationBucket{}
	for i, staticBucket := range staticBuckets {
		b := HostUtilizationBucket{
			StaticHost:  staticBucket.TotalTime,
			DynamicHost: dynamicBuckets[i].TotalTime,
			Task:        taskBuckets[i].TotalTime,
			StartTime:   bounds.StartTime.Add(time.Duration(i) * bounds.BucketSize),
			EndTime:     bounds.StartTime.Add(time.Duration(i+1) * bounds.BucketSize),
		}
		bucketData = append(bucketData, b)

	}
	return bucketData, nil
}
Ejemplo n.º 15
0
func (uis *UIServer) getSpawnedHosts(w http.ResponseWriter, r *http.Request) {
	user := MustHaveUser(r)

	hosts, err := host.Find(host.ByUserWithRunningStatus(user.Username()))
	if err != nil {
		uis.LoggedError(w, r, http.StatusInternalServerError,
			fmt.Errorf("Error finding running hosts for user %v: %v", user.Username(), err))
		return
	}

	uis.WriteJSON(w, http.StatusOK, hosts)
}
Ejemplo n.º 16
0
// returns info on all of the hosts spawned by a user
func (as *APIServer) hostsInfoForUser(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	user := vars["user"]

	hosts, err := host.Find(host.ByUserWithUnterminatedStatus(user))
	if err != nil {
		as.LoggedError(w, r, http.StatusInternalServerError, err)
		return
	}

	as.WriteJSON(w, http.StatusOK, spawnResponse{Hosts: hosts})
}
Ejemplo n.º 17
0
// flagExcessHosts is a hostFlaggingFunc to get all hosts that push their
// distros over the specified max hosts
func flagExcessHosts(distros []distro.Distro, s *evergreen.Settings) ([]host.Host, error) {
	// will ultimately contain all the hosts that can be terminated
	excessHosts := []host.Host{}

	// figure out the excess hosts for each distro
	for _, d := range distros {

		// fetch any hosts for the distro that count towards max hosts
		allHostsForDistro, err := host.Find(host.ByDistroId(d.Id))
		if err != nil {
			return nil, fmt.Errorf("error fetching hosts for distro %v: %v", d.Id, err)
		}

		// if there are more than the specified max hosts, then terminate
		// some, if they are not running tasks
		numExcessHosts := len(allHostsForDistro) - d.PoolSize
		if numExcessHosts > 0 {

			// track how many hosts for the distro are terminated
			counter := 0

			for _, host := range allHostsForDistro {

				// if the host is not dynamically spun up (and can
				// thus be terminated), skip it
				canTerminate, err := hostCanBeTerminated(host, s)
				if err != nil {
					return nil, fmt.Errorf("error checking if host %v can be terminated: %v", host.Id, err)
				}
				if !canTerminate {
					continue
				}

				// if the host is not running a task, it can be
				// safely terminated
				if host.RunningTask == "" {
					excessHosts = append(excessHosts, host)
					counter++
				}

				// break if we've marked enough to be terminated
				if counter == numExcessHosts {
					break
				}
			}

			evergreen.Logger.Logf(slogger.INFO, "Found %v excess hosts for distro %v", counter, d.Id)

		}

	}
	return excessHosts, nil
}
Ejemplo n.º 18
0
func getHostsData(includeSpawnedHosts bool) (*hostsData, error) {
	data := &hostsData{}

	// get all of the hosts
	var dbHosts []host.Host
	var err error
	if includeSpawnedHosts {
		dbHosts, err = host.Find(host.IsRunning)
	} else {
		dbHosts, err = host.Find(host.ByUserWithRunningStatus(evergreen.User))
	}

	if err != nil {
		return nil, err
	}

	// convert the hosts to the ui models
	uiHosts := make([]uiHost, len(dbHosts))
	for idx, dbHost := range dbHosts {
		// we only need the distro id for the hosts page
		dbHost.Distro = distro.Distro{Id: dbHost.Distro.Id}
		host := uiHost{
			Host:        dbHost,
			RunningTask: nil,
		}

		uiHosts[idx] = host
		// get the task running on this host
		task, err := model.FindTask(dbHost.RunningTask)
		if err != nil {
			return nil, err
		}
		if task != nil {
			uiHosts[idx].RunningTask = task
		}
	}
	data.Hosts = uiHosts
	return data, nil
}
Ejemplo n.º 19
0
// flagIdleHosts is a hostFlaggingFunc to get all hosts which have spent too
// long without running a task
func flagIdleHosts(d []distro.Distro, s *evergreen.Settings) ([]host.Host, error) {
	// will ultimately contain all of the hosts determined to be idle
	idleHosts := []host.Host{}

	// fetch all hosts not currently running a task
	freeHosts, err := host.Find(host.IsFree)
	if err != nil {
		return nil, fmt.Errorf("error finding free hosts: %v", err)
	}

	// go through the hosts, and see if they have idled long enough to
	// be terminated
	for _, host := range freeHosts {

		// ask the host how long it has been idle
		idleTime := host.IdleTime()

		// get a cloud manager for the host
		cloudManager, err := providers.GetCloudManager(host.Provider, s)
		if err != nil {
			return nil, fmt.Errorf("error getting cloud manager for host %v: %v", host.Id, err)
		}

		// if the host is not dynamically spun up (and can thus be terminated),
		// skip it
		canTerminate, err := hostCanBeTerminated(host, s)
		if err != nil {
			return nil, fmt.Errorf("error checking if host %v can be terminated: %v", host.Id, err)
		}
		if !canTerminate {
			continue
		}

		// ask how long until the next payment for the host
		tilNextPayment := cloudManager.TimeTilNextPayment(&host)

		// current determinants for idle:
		//  idle for at least 15 minutes and
		//  less than 5 minutes til next payment
		if idleTime >= 15*time.Minute && tilNextPayment <= 5*time.Minute {
			idleHosts = append(idleHosts, host)
		}
	}

	return idleHosts, nil
}
Ejemplo n.º 20
0
// spawnHostExpirationWarnings is a notificationBuilder to build any necessary
// warnings about hosts that will be expiring soon (but haven't expired yet)
func spawnHostExpirationWarnings(settings *evergreen.Settings) ([]notification,
	error) {

	evergreen.Logger.Logf(slogger.INFO, "Building spawned host expiration"+
		" warnings...")

	// sanity check, since the thresholds are supplied in code
	if len(spawnWarningThresholds) == 0 {
		evergreen.Logger.Logf(slogger.WARN, "there are no currently set warning"+
			" thresholds for spawned hosts - users will not receive emails"+
			" warning them of imminent host expiration")
		return nil, nil
	}

	// assumed to be the first warning threshold (the least recent one)
	firstWarningThreshold :=
		spawnWarningThresholds[len(spawnWarningThresholds)-1]

	// find all spawned hosts that have passed at least one warning threshold
	now := time.Now()
	thresholdTime := now.Add(firstWarningThreshold)
	hosts, err := host.Find(host.ByExpiringBetween(now, thresholdTime))
	if err != nil {
		return nil, fmt.Errorf("error finding spawned hosts that will be"+
			" expiring soon: %v", err)
	}

	// the eventual list of warning notifications to be sent
	warnings := []notification{}

	for _, h := range hosts {

		// figure out the most recent expiration notification threshold the host
		// has crossed
		threshold := lastWarningThresholdCrossed(&h)

		// for keying into the host's notifications map
		thresholdKey := strconv.Itoa(int(threshold.Minutes()))

		// if a notification has already been sent for the threshold for this
		// host, skip it
		if h.Notifications[thresholdKey] {
			continue
		}

		evergreen.Logger.Logf(slogger.INFO, "Warning needs to be sent for threshold"+
			" '%v' for host %v", thresholdKey, h.Id)

		// we need to send a notification for the threshold for this host
		hostNotification := notification{
			recipient: h.StartedBy,
			subject:   fmt.Sprintf("%v host termination reminder", h.Distro.Id),
			message: fmt.Sprintf("Your %v host with id %v will be terminated"+
				" at %v, in %v minutes. Visit %v to extend its lifetime.",
				h.Distro.Id, h.Id,
				h.ExpirationTime.Format(time.RFC850),
				h.ExpirationTime.Sub(time.Now()),
				settings.Ui.Url+"/ui/spawn"),
			threshold: thresholdKey,
			host:      h,
			callback: func(h host.Host, thresholdKey string) error {
				return h.SetExpirationNotification(thresholdKey)
			},
		}

		// add it to the list
		warnings = append(warnings, hostNotification)

	}

	evergreen.Logger.Logf(slogger.INFO, "Built %v warnings about imminently"+
		" expiring hosts", len(warnings))

	return warnings, nil
}
Ejemplo n.º 21
0
// setupReadyHosts runs the distro setup script of all hosts that are up and reachable.
func (init *HostInit) setupReadyHosts() error {
	// set SSH timeout duration
	if timeoutSecs := init.Settings.HostInit.SSHTimeoutSeconds; timeoutSecs <= 0 {
		evergreen.Logger.Logf(slogger.WARN, "SSH timeout set to %vs (<= 0s) using %vs instead", timeoutSecs, SSHTimeoutSeconds)
	} else {
		SSHTimeoutSeconds = timeoutSecs
	}

	// find all hosts in the uninitialized state
	uninitializedHosts, err := host.Find(host.IsUninitialized)
	if err != nil {
		return fmt.Errorf("error fetching uninitialized hosts: %v", err)
	}

	evergreen.Logger.Logf(slogger.DEBUG, "There are %v uninitialized hosts",
		len(uninitializedHosts))

	// used for making sure we don't exit before a setup script is done
	wg := &sync.WaitGroup{}

	for _, h := range uninitializedHosts {

		// check whether or not the host is ready for its setup script to be run
		ready, err := init.IsHostReady(&h)
		if err != nil {
			evergreen.Logger.Logf(slogger.ERROR, "Error checking host %v for readiness: %v",
				h.Id, err)
			continue
		}

		// if the host isn't ready (for instance, it might not be up yet), skip it
		if !ready {
			evergreen.Logger.Logf(slogger.DEBUG, "Host %v not ready for setup", h.Id)
			continue
		}

		evergreen.Logger.Logf(slogger.INFO, "Running setup script for host %v", h.Id)

		// kick off the setup, in its own goroutine, so pending setups don't have
		// to wait for it to finish
		wg.Add(1)
		go func(h host.Host) {

			if err := init.ProvisionHost(&h); err != nil {
				evergreen.Logger.Logf(slogger.ERROR, "Error provisioning host %v: %v",
					h.Id, err)

				// notify the admins of the failure
				subject := fmt.Sprintf("%v Evergreen provisioning failure on %v",
					notify.ProvisionFailurePreface, h.Distro.Id)
				hostLink := fmt.Sprintf("%v/host/%v", init.Settings.Ui.Url, h.Id)
				message := fmt.Sprintf("Provisioning failed on %v host -- %v: see %v",
					h.Distro.Id, h.Id, hostLink)
				if err := notify.NotifyAdmins(subject, message, init.Settings); err != nil {
					evergreen.Logger.Errorf(slogger.ERROR, "Error sending email: %v", err)
				}
			}

			wg.Done()

		}(h)

	}

	// let all setup routines finish
	wg.Wait()

	return nil
}
Ejemplo n.º 22
0
// Validate returns an instance of BadOptionsErr if the SpawnOptions object contains invalid
// data, SpawnLimitErr if the user is already at the spawned host limit, or some other untyped
// instance of Error if something fails during validation.
func (sm Spawn) Validate(so Options) error {
	d, err := distro.FindOne(distro.ById(so.Distro))
	if err != nil {
		return BadOptionsErr{fmt.Sprintf("Invalid dist %v", so.Distro)}
	}

	if !d.SpawnAllowed {
		return BadOptionsErr{fmt.Sprintf("Spawning not allowed for dist %v", so.Distro)}
	}

	// if the user already has too many active spawned hosts, deny the request
	activeSpawnedHosts, err := host.Find(host.ByUserWithRunningStatus(so.UserName))
	if err != nil {
		return fmt.Errorf("Error occurred finding user's current hosts: %v", err)
	}

	if len(activeSpawnedHosts) >= MaxPerUser {
		return SpawnLimitErr
	}

	// validate public key
	rsa := "ssh-rsa"
	dss := "ssh-dss"
	isRSA := strings.HasPrefix(so.PublicKey, rsa)
	isDSS := strings.HasPrefix(so.PublicKey, dss)
	if !isRSA && !isDSS {
		return BadOptionsErr{"key does not start with ssh-rsa or ssh-dss"}
	}

	sections := strings.Split(so.PublicKey, " ")
	if len(sections) < 2 {
		keyType := rsa
		if sections[0] == dss {
			keyType = dss
		}
		return BadOptionsErr{fmt.Sprintf("missing space after '%v'", keyType)}
	}

	// check for valid base64
	if _, err = base64.StdEncoding.DecodeString(sections[1]); err != nil {
		return BadOptionsErr{"key contains invalid base64 string"}
	}

	if d.UserData.File != "" {
		if strings.TrimSpace(so.UserData) == "" {
			return BadOptionsErr{}
		}

		var err error
		switch d.UserData.Validate {
		case distro.UserDataFormatFormURLEncoded:
			_, err = url.ParseQuery(so.UserData)
		case distro.UserDataFormatJSON:
			var out map[string]interface{}
			err = json.Unmarshal([]byte(so.UserData), &out)
		case distro.UserDataFormatYAML:
			var out map[string]interface{}
			err = yaml.Unmarshal([]byte(so.UserData), &out)
		}

		if err != nil {
			return BadOptionsErr{fmt.Sprintf("invalid %v: %v", d.UserData.Validate, err)}
		}
	}
	return nil
}
Ejemplo n.º 23
0
// Schedule all of the tasks to be run.  Works by finding all of the tasks that
// are ready to be run, splitting them by distro, prioritizing them, and saving
// the per-distro queues.  Then determines the number of new hosts to spin up
// for each distro, and spins them up.
func (s *Scheduler) Schedule() error {
	// make sure the correct static hosts are in the database
	evergreen.Logger.Logf(slogger.INFO, "Updating static hosts...")

	err := model.UpdateStaticHosts(s.Settings)
	if err != nil {
		return fmt.Errorf("error updating static hosts: %v", err)
	}

	// find all tasks ready to be run
	evergreen.Logger.Logf(slogger.INFO, "Finding runnable tasks...")

	runnableTasks, err := s.FindRunnableTasks()
	if err != nil {
		return fmt.Errorf("Error finding runnable tasks: %v", err)
	}

	evergreen.Logger.Logf(slogger.INFO, "There are %v tasks ready to be run", len(runnableTasks))

	// split the tasks by distro
	tasksByDistro, taskRunDistros, err := s.splitTasksByDistro(runnableTasks)
	if err != nil {
		return fmt.Errorf("Error splitting tasks by distro to run on: %v", err)
	}

	// load in all of the distros
	distros, err := distro.Find(distro.All)
	if err != nil {
		return fmt.Errorf("Error finding distros: %v", err)
	}

	// get the expected run duration of all runnable tasks
	taskExpectedDuration, err := s.GetExpectedDurations(runnableTasks)

	if err != nil {
		return fmt.Errorf("Error getting expected task durations: %v", err)
	}

	distroInputChan := make(chan distroSchedulerInput, len(distros))

	// put all of the needed input for the distro scheduler into a channel to be read by the
	// distro scheduling loop
	for distroId, task := range tasksByDistro {
		distroInputChan <- distroSchedulerInput{
			distroId:               distroId,
			runnableTasksForDistro: task,
		}
	}
	// close the channel to signal that the loop reading from it can terminate
	close(distroInputChan)
	workers := runtime.NumCPU()

	wg := sync.WaitGroup{}
	wg.Add(workers)

	// make a channel to collect all of function results from scheduling the distros
	distroSchedulerResultChan := make(chan *distroSchedulerResult)

	// for each worker, create a new goroutine
	for i := 0; i < workers; i++ {
		go func() {
			defer wg.Done()
			// read the inputs for scheduling this distro
			for d := range distroInputChan {
				// schedule the distro
				res := s.scheduleDistro(d.distroId, d.runnableTasksForDistro, taskExpectedDuration)
				if res.err != nil {
					evergreen.Logger.Logf(slogger.ERROR, "%v", err)
				}

				// write the results out to a results channel
				distroSchedulerResultChan <- res

			}
		}()
	}

	// signal the errCollector goroutine that it can terminate it's loop
	// intialize a map of scheduler events
	schedulerEvents := map[string]event.TaskQueueInfo{}

	// prioritize the tasks, one distro at a time
	taskQueueItems := make(map[string][]model.TaskQueueItem)

	var errResult error
	go func() {
		for res := range distroSchedulerResultChan {
			if res.err != nil {
				errResult = fmt.Errorf("error scheduling tasks on distro %v: %v", res.distroId, err)
				return
			}
			schedulerEvents[res.distroId] = res.schedulerEvent
			taskQueueItems[res.distroId] = res.taskQueueItem
		}
	}()

	if errResult != nil {
		return errResult
	}

	// wait for the distro scheduler goroutines to complete to complete
	wg.Wait()
	// wait group has terminated so scheduler channel can be closed
	close(distroSchedulerResultChan)

	// split distros by name
	distrosByName := make(map[string]distro.Distro)
	for _, d := range distros {
		distrosByName[d.Id] = d
	}

	// fetch all hosts, split by distro
	allHosts, err := host.Find(host.IsLive)
	if err != nil {
		return fmt.Errorf("Error finding live hosts: %v", err)
	}

	// figure out all hosts we have up - per distro
	hostsByDistro := make(map[string][]host.Host)
	for _, liveHost := range allHosts {
		hostsByDistro[liveHost.Distro.Id] = append(hostsByDistro[liveHost.Distro.Id],
			liveHost)
	}

	// add the length of the host lists of hosts that are running to the event log.
	for distroId, hosts := range hostsByDistro {
		taskQueueInfo := schedulerEvents[distroId]
		taskQueueInfo.NumHostsRunning = len(hosts)
		schedulerEvents[distroId] = taskQueueInfo
	}

	// construct the data that will be needed by the host allocator
	hostAllocatorData := HostAllocatorData{
		existingDistroHosts:  hostsByDistro,
		distros:              distrosByName,
		taskQueueItems:       taskQueueItems,
		taskRunDistros:       taskRunDistros,
		projectTaskDurations: taskExpectedDuration,
	}

	// figure out how many new hosts we need
	newHostsNeeded, err := s.NewHostsNeeded(hostAllocatorData, s.Settings)
	if err != nil {
		return fmt.Errorf("Error determining how many new hosts are needed: %v",
			err)
	}

	// spawn up the hosts
	hostsSpawned, err := s.spawnHosts(newHostsNeeded)
	if err != nil {
		return fmt.Errorf("Error spawning new hosts: %v", err)
	}

	if len(hostsSpawned) != 0 {
		evergreen.Logger.Logf(slogger.INFO, "Hosts spawned (%v total), by distro: ",
			len(hostsSpawned))
		for distro, hosts := range hostsSpawned {
			evergreen.Logger.Logf(slogger.INFO, "  %v ->", distro)
			for _, host := range hosts {
				evergreen.Logger.Logf(slogger.INFO, "    %v", host.Id)
			}

			taskQueueInfo := schedulerEvents[distro]
			taskQueueInfo.NumHostsRunning += len(hosts)
			schedulerEvents[distro] = taskQueueInfo
		}
	} else {
		evergreen.Logger.Logf(slogger.INFO, "No new hosts spawned")
	}

	for d, t := range schedulerEvents {
		eventLog := event.SchedulerEventData{
			ResourceType:  event.ResourceTypeScheduler,
			TaskQueueInfo: t,
			DistroId:      d,
		}
		event.LogSchedulerEvent(eventLog)
	}

	return nil
}
Ejemplo n.º 24
0
func (uis *UIServer) allTaskQueues(w http.ResponseWriter, r *http.Request) {
	projCtx := MustHaveProjectContext(r)

	taskQueues, err := model.FindAllTaskQueues()
	if err != nil {
		uis.LoggedError(w, r, http.StatusInternalServerError,
			fmt.Errorf("Error finding task queues: %v", err))
		return
	}

	// cached map of version id to relevant patch
	cachedPatches := map[string]*patch.Patch{}

	// convert the task queues to the ui versions
	uiTaskQueues := []uiTaskQueue{}
	for _, tQ := range taskQueues {
		asUI := uiTaskQueue{
			Distro: tQ.Distro,
			Queue:  []uiTaskQueueItem{},
		}

		if len(tQ.Queue) == 0 {
			uiTaskQueues = append(uiTaskQueues, asUI)
			continue
		}

		// convert the individual task queue items
		taskIds := []string{}
		for _, item := range tQ.Queue {

			// cache the ids, for fetching the tasks from the db
			taskIds = append(taskIds, item.Id)
			queueItemAsUI := uiTaskQueueItem{
				Id:                  item.Id,
				DisplayName:         item.DisplayName,
				BuildVariant:        item.BuildVariant,
				RevisionOrderNumber: item.RevisionOrderNumber,
				Requester:           item.Requester,
				Revision:            item.Revision,
				Project:             item.Project,
			}
			asUI.Queue = append(asUI.Queue, queueItemAsUI)
		}

		// find all the relevant tasks
		tasks, err := task.Find(task.ByIds(taskIds).WithFields(task.VersionKey, task.BuildIdKey))
		if err != nil {
			msg := fmt.Sprintf("Error finding tasks: %v", err)
			evergreen.Logger.Errorf(slogger.ERROR, msg)
			http.Error(w, msg, http.StatusInternalServerError)
			return
		}

		// store all of the version and build ids in the relevant task queue
		// items
		for _, task := range tasks {
			// this sucks, but it's because we're not guaranteed the order out
			// of the db
			for idx, queueItemAsUI := range asUI.Queue {
				if queueItemAsUI.Id == task.Id {
					queueItemAsUI.Version = task.Version
					queueItemAsUI.Build = task.BuildId
					asUI.Queue[idx] = queueItemAsUI
				}
			}
		}

		// add all of the necessary patch info into the relevant task queue
		// items
		for idx, queueItemAsUI := range asUI.Queue {
			if queueItemAsUI.Requester == evergreen.PatchVersionRequester {
				// fetch the patch, if necessary
				var p *patch.Patch
				var ok bool
				if p, ok = cachedPatches[queueItemAsUI.Version]; ok {
					queueItemAsUI.User = p.Author
					asUI.Queue[idx] = queueItemAsUI
				} else {
					p, err = patch.FindOne(
						patch.ByVersion(queueItemAsUI.Version).WithFields(patch.AuthorKey),
					)
					if err != nil {
						msg := fmt.Sprintf("Error finding patch: %v", err)
						evergreen.Logger.Errorf(slogger.ERROR, msg)
						http.Error(w, msg, http.StatusInternalServerError)
						return
					}
					if p == nil {
						msg := fmt.Sprintf("Couldn't find patch for version %v", queueItemAsUI.Version)
						evergreen.Logger.Errorf(slogger.ERROR, msg)
						http.Error(w, msg, http.StatusInternalServerError)
						return
					}
					cachedPatches[queueItemAsUI.Version] = p
				}
				queueItemAsUI.User = p.Author
				asUI.Queue[idx] = queueItemAsUI

			}
		}

		uiTaskQueues = append(uiTaskQueues, asUI)

	}

	// add other useful statistics to view alongside queue
	idleHosts, err := host.Find(host.IsIdle)
	if err != nil {
		msg := fmt.Sprintf("Error finding idle hosts: %v", err)
		evergreen.Logger.Errorf(slogger.ERROR, msg)
		http.Error(w, msg, http.StatusInternalServerError)
		return
	}
	activeHosts, err := host.Find(host.IsLive)
	if err != nil {
		msg := fmt.Sprintf("Error finding active hosts: %v", err)
		evergreen.Logger.Errorf(slogger.ERROR, msg)
		http.Error(w, msg, http.StatusInternalServerError)
		return
	}
	idleStaticHostsCount := 0
	for _, host := range idleHosts {
		if host.Provider == evergreen.HostTypeStatic {
			idleStaticHostsCount++
		}
	}
	activeStaticHostsCount := 0
	for _, host := range activeHosts {
		if host.Provider == evergreen.HostTypeStatic {
			activeStaticHostsCount++
		}
	}
	hostStats := uiHostStatistics{
		ActiveHosts:       len(activeHosts),
		ActiveStaticHosts: activeStaticHostsCount,
		IdleHosts:         len(idleHosts),
		IdleStaticHosts:   idleStaticHostsCount,
	}

	uis.WriteHTML(w, http.StatusOK, struct {
		ProjectData projectContext
		User        *user.DBUser
		Flashes     []interface{}
		Data        uiResourceInfo
	}{projCtx, GetUser(r), []interface{}{}, uiResourceInfo{uiTaskQueues, hostStats}},
		"base", "task_queues.html", "base_angular.html", "menu.html")
}
Ejemplo n.º 25
0
// spawnHostExpirationWarnings is a notificationBuilder to build any necessary
// warnings about hosts that will be expiring soon (but haven't expired yet)
func spawnHostExpirationWarnings(settings *evergreen.Settings) ([]notification,
	error) {

	evergreen.Logger.Logf(slogger.INFO, "Building spawned host expiration"+
		" warnings...")

	// sanity check, since the thresholds are supplied in code
	if len(spawnWarningThresholds) == 0 {
		evergreen.Logger.Logf(slogger.WARN, "there are no currently set warning"+
			" thresholds for spawned hosts - users will not receive emails"+
			" warning them of imminent host expiration")
		return nil, nil
	}

	// assumed to be the first warning threshold (the least recent one)
	firstWarningThreshold :=
		spawnWarningThresholds[len(spawnWarningThresholds)-1]

	// find all spawned hosts that have passed at least one warning threshold
	now := time.Now()
	thresholdTime := now.Add(firstWarningThreshold)
	hosts, err := host.Find(host.ByExpiringBetween(now, thresholdTime))
	if err != nil {
		return nil, fmt.Errorf("error finding spawned hosts that will be"+
			" expiring soon: %v", err)
	}

	// the eventual list of warning notifications to be sent
	warnings := []notification{}

	for _, h := range hosts {

		// figure out the most recent expiration notification threshold the host
		// has crossed
		threshold := lastWarningThresholdCrossed(&h)

		// for keying into the host's notifications map
		thresholdKey := strconv.Itoa(int(threshold.Minutes()))

		// if a notification has already been sent for the threshold for this
		// host, skip it
		if h.Notifications[thresholdKey] {
			continue
		}

		evergreen.Logger.Logf(slogger.INFO, "Warning needs to be sent for threshold"+
			" '%v' for host %v", thresholdKey, h.Id)

		// fetch information about the user we are notifying
		userToNotify, err := user.FindOne(user.ById(h.StartedBy))
		if err != nil {
			return nil, fmt.Errorf("error finding user to notify by Id %v: %v", h.StartedBy, err)

		}

		// if we didn't find a user (in the case of testing) set the timezone to ""
		// to avoid triggering a nil pointer exception
		timezone := ""
		if userToNotify != nil {
			timezone = userToNotify.Settings.Timezone
		}

		var expirationTimeFormatted string
		// use our fetched information to load proper time zone to notify the user with
		// (if time zone is empty, defaults to UTC)
		loc, err := time.LoadLocation(timezone)
		if err != nil {
			evergreen.Logger.Logf(slogger.ERROR, "Error loading timezone for email format with user_id %v: %v", userToNotify.Id, err)
			expirationTimeFormatted = h.ExpirationTime.Format(time.RFC1123)
		} else {
			expirationTimeFormatted = h.ExpirationTime.In(loc).Format(time.RFC1123)
		}
		// we need to send a notification for the threshold for this host
		hostNotification := notification{
			recipient: h.StartedBy,
			subject:   fmt.Sprintf("%v host termination reminder", h.Distro.Id),
			message: fmt.Sprintf("Your %v host with id %v will be terminated"+
				" at %v, in %v minutes. Visit %v to extend its lifetime.",
				h.Distro.Id, h.Id,
				expirationTimeFormatted,
				h.ExpirationTime.Sub(time.Now()),
				settings.Ui.Url+"/ui/spawn"),
			threshold: thresholdKey,
			host:      h,
			callback: func(h host.Host, thresholdKey string) error {
				return h.SetExpirationNotification(thresholdKey)
			},
		}

		// add it to the list
		warnings = append(warnings, hostNotification)

	}

	evergreen.Logger.Logf(slogger.INFO, "Built %v warnings about imminently"+
		" expiring hosts", len(warnings))

	return warnings, nil
}
Ejemplo n.º 26
0
// run all monitoring functions
func RunAllMonitoring(settings *evergreen.Settings) error {

	// load in all of the distros
	distros, err := distro.Find(db.Q{})
	if err != nil {
		return fmt.Errorf("error finding distros: %v", err)
	}

	// fetch the project refs, which we will use to get all of the projects
	projectRefs, err := model.FindAllProjectRefs()
	if err != nil {
		return fmt.Errorf("error loading in project refs: %v", err)
	}

	// turn the project refs into a map of the project id -> project
	projects := map[string]model.Project{}
	for _, ref := range projectRefs {
		// only monitor projects that are enabled
		if !ref.Enabled {
			continue
		}
		project, err := model.FindProject("", &ref)

		// continue on error to stop the whole monitoring process from
		// being held up
		if err != nil {
			evergreen.Logger.Logf(slogger.ERROR, "error finding project %v: %v",
				ref.Identifier, err)
			continue
		}

		if project == nil {
			evergreen.Logger.Logf(slogger.ERROR, "no project entry found for"+
				" ref %v", ref.Identifier)
			continue
		}

		projects[project.Identifier] = *project
	}

	// initialize the task monitor
	taskMonitor := &TaskMonitor{
		flaggingFuncs: defaultTaskFlaggingFuncs,
	}

	// clean up any necessary tasks
	errs := taskMonitor.CleanupTasks(projects)
	for _, err := range errs {
		evergreen.Logger.Logf(slogger.ERROR, "Error cleaning up tasks: %v", err)
	}

	// initialize the host monitor
	hostMonitor := &HostMonitor{
		flaggingFuncs:   defaultHostFlaggingFuncs,
		monitoringFuncs: defaultHostMonitoringFuncs,
	}

	// clean up any necessary hosts
	errs = hostMonitor.CleanupHosts(distros, settings)
	for _, err := range errs {
		evergreen.Logger.Logf(slogger.ERROR, "Error cleaning up hosts: %v", err)
	}

	// run monitoring checks
	errs = hostMonitor.RunMonitoringChecks(settings)
	for _, err := range errs {
		evergreen.Logger.Logf(slogger.ERROR, "Error running host monitoring checks: %v", err)
	}

	// initialize the notifier
	notifier := &Notifier{
		notificationBuilders: defaultNotificationBuilders,
	}

	// send notifications
	errs = notifier.Notify(settings)
	for _, err := range errs {
		evergreen.Logger.Logf(slogger.ERROR, "Error sending notifications: %v", err)
	}

	// Do alerts for spawnhosts - collect all hosts expiring in the next 12 hours.
	// The trigger logic will filter out any hosts that aren't in a notification window, or have
	// already have alerts sent.
	now := time.Now()
	thresholdTime := now.Add(12 * time.Hour)
	expiringSoonHosts, err := host.Find(host.ByExpiringBetween(now, thresholdTime))
	if err != nil {
		return err
	}

	for _, h := range expiringSoonHosts {
		err := alerts.RunSpawnWarningTriggers(&h)

		if err != nil {
			evergreen.Logger.Logf(slogger.ERROR, "Error queueing alert: %v", err)
		}
	}

	return nil

}
Ejemplo n.º 27
0
// Schedule all of the tasks to be run.  Works by finding all of the tasks that
// are ready to be run, splitting them by distro, prioritizing them, and saving
// the per-distro queues.  Then determines the number of new hosts to spin up
// for each distro, and spins them up.
func (self *Scheduler) Schedule() error {
	// make sure the correct static hosts are in the database
	evergreen.Logger.Logf(slogger.INFO, "Updating static hosts...")
	err := model.UpdateStaticHosts(self.Settings)
	if err != nil {
		return fmt.Errorf("error updating static hosts: %v", err)
	}

	// find all tasks ready to be run
	evergreen.Logger.Logf(slogger.INFO, "Finding runnable tasks...")

	runnableTasks, err := self.FindRunnableTasks()
	if err != nil {
		return fmt.Errorf("Error finding runnable tasks: %v", err)
	}

	evergreen.Logger.Logf(slogger.INFO, "There are %v tasks ready to be run", len(runnableTasks))

	// split the tasks by distro
	tasksByDistro, taskRunDistros, err := self.splitTasksByDistro(runnableTasks)
	if err != nil {
		return fmt.Errorf("Error splitting tasks by distro to run on: %v", err)
	}

	// load in all of the distros
	distros, err := distro.Find(distro.All)
	if err != nil {
		return fmt.Errorf("Error finding distros: %v", err)
	}

	taskIdToMinQueuePos := make(map[string]int)

	// get the expected run duration of all runnable tasks
	taskExpectedDuration, err := self.GetExpectedDurations(runnableTasks)

	if err != nil {
		return fmt.Errorf("Error getting expected task durations: %v", err)
	}

	// prioritize the tasks, one distro at a time
	taskQueueItems := make(map[string][]model.TaskQueueItem)
	for _, d := range distros {
		runnableTasksForDistro := tasksByDistro[d.Id]
		evergreen.Logger.Logf(slogger.INFO, "Prioritizing %v tasks for distro %v...",
			len(runnableTasksForDistro), d.Id)

		prioritizedTasks, err := self.PrioritizeTasks(self.Settings,
			runnableTasksForDistro)
		if err != nil {
			return fmt.Errorf("Error prioritizing tasks: %v", err)
		}

		// Update the running minimums of queue position
		// The value is 1-based primarily so that we can differentiate between
		// no value and being first in a queue
		for i, prioritizedTask := range prioritizedTasks {
			minQueuePos, ok := taskIdToMinQueuePos[prioritizedTask.Id]
			if ok {
				taskIdToMinQueuePos[prioritizedTask.Id] =
					int(math.Min(float64(minQueuePos), float64(i+1)))
			} else {
				taskIdToMinQueuePos[prioritizedTask.Id] = i + 1
			}
		}

		// persist the queue of tasks
		evergreen.Logger.Logf(slogger.INFO, "Saving task queue for distro %v...",
			d.Id)
		queuedTasks, err := self.PersistTaskQueue(d.Id, prioritizedTasks,
			taskExpectedDuration)
		if err != nil {
			return fmt.Errorf("Error saving task queue: %v", err)
		}

		// track scheduled time for prioritized tasks
		err = model.SetTasksScheduledTime(prioritizedTasks, time.Now())
		if err != nil {
			return fmt.Errorf("Error setting scheduled time for prioritized "+
				"tasks: %v", err)
		}

		taskQueueItems[d.Id] = queuedTasks
	}

	err = model.UpdateMinQueuePos(taskIdToMinQueuePos)
	if err != nil {
		return fmt.Errorf("Error updating tasks with queue positions: %v", err)
	}

	// split distros by name
	distrosByName := make(map[string]distro.Distro)
	for _, d := range distros {
		distrosByName[d.Id] = d
	}

	// fetch all hosts, split by distro
	allHosts, err := host.Find(host.IsLive)
	if err != nil {
		return fmt.Errorf("Error finding live hosts: %v", err)
	}

	// figure out all hosts we have up - per distro
	hostsByDistro := make(map[string][]host.Host)
	for _, liveHost := range allHosts {
		hostsByDistro[liveHost.Distro.Id] = append(hostsByDistro[liveHost.Distro.Id],
			liveHost)
	}

	// construct the data that will be needed by the host allocator
	hostAllocatorData := HostAllocatorData{
		existingDistroHosts:  hostsByDistro,
		distros:              distrosByName,
		taskQueueItems:       taskQueueItems,
		taskRunDistros:       taskRunDistros,
		projectTaskDurations: taskExpectedDuration,
	}

	// figure out how many new hosts we need
	newHostsNeeded, err := self.NewHostsNeeded(hostAllocatorData, self.Settings)
	if err != nil {
		return fmt.Errorf("Error determining how many new hosts are needed: %v",
			err)
	}

	// spawn up the hosts
	hostsSpawned, err := self.spawnHosts(newHostsNeeded)
	if err != nil {
		return fmt.Errorf("Error spawning new hosts: %v", err)
	}

	if len(hostsSpawned) != 0 {
		evergreen.Logger.Logf(slogger.INFO, "Hosts spawned (%v total), by distro: ",
			len(hostsSpawned))
		for distro, hosts := range hostsSpawned {
			evergreen.Logger.Logf(slogger.INFO, "  %v ->", distro)
			for _, host := range hosts {
				evergreen.Logger.Logf(slogger.INFO, "    %v", host.Id)
			}
		}
	} else {
		evergreen.Logger.Logf(slogger.INFO, "No new hosts spawned")
	}

	return nil
}