Example #1
0
// Runs the sequence of events that kicks off tasks on hosts.  Works by
// finding any hosts available to have a task run on them, and then figuring
// out the next appropriate task for each of the hosts and kicking them off.
// Returns an error if any error is thrown along the way.
func (self *TaskRunner) Run() error {

	evergreen.Logger.Logf(slogger.INFO, "Finding hosts available to take a task...")
	// find all hosts available to take a task
	availableHosts, err := self.FindAvailableHosts()
	if err != nil {
		return fmt.Errorf("error finding available hosts: %v", err)
	}
	evergreen.Logger.Logf(slogger.INFO, "Found %v host(s) available to take a task",
		len(availableHosts))

	// split the hosts by distro
	hostsByDistro := self.splitHostsByDistro(availableHosts)

	// we'll need this to wait for all the setups to finish
	waitGroup := &sync.WaitGroup{}

	// assign the free hosts for each distro to the tasks they need to run
	for distroId, freeHostsForDistro := range hostsByDistro {
		evergreen.Logger.Logf(slogger.INFO, "Kicking off tasks on distro %v...",
			distroId)

		// load in the queue of tasks for the distro
		taskQueue, err := self.FindTaskQueue(distroId)
		if err != nil {
			return fmt.Errorf("error finding task queue for distro %v: %v",
				distroId, err)
		}

		if taskQueue == nil {
			evergreen.Logger.Logf(slogger.ERROR, "nil task queue found for distro '%v'", distroId)
			continue
		}

		// while there are both free hosts and pending tasks left, pin
		// tasks to hosts
		for !taskQueue.IsEmpty() && len(freeHostsForDistro) > 0 {
			nextHost := freeHostsForDistro[0]
			nextTask, err := DispatchTaskForHost(taskQueue, &nextHost)
			if err != nil {
				return err
			}

			// can only get here if the queue is empty
			if nextTask == nil {
				continue
			}

			// once allocated to a task, pop the host off the distro's free host
			// list
			freeHostsForDistro = freeHostsForDistro[1:]

			// dereference the task before running the goroutine
			dereferencedTask := *nextTask

			// use the embedded host gateway to kick the task off
			waitGroup.Add(1)
			go func() {
				defer waitGroup.Done()
				agentRevision, err := self.RunTaskOnHost(self.Settings,
					dereferencedTask, nextHost)
				if err != nil {
					evergreen.Logger.Logf(slogger.ERROR, "error kicking off task %v"+
						" on host %v: %v", dereferencedTask.Id, nextHost.Id, err)

					if err := model.MarkTaskUndispatched(nextTask); err != nil {
						evergreen.Logger.Logf(slogger.ERROR, "error marking task %v as undispatched "+
							"on host %v: %v", dereferencedTask.Id, nextHost.Id, err)
					}
					return
				} else {
					evergreen.Logger.Logf(slogger.INFO, "Task %v successfully kicked"+
						" off on host %v", dereferencedTask.Id, nextHost.Id)
				}

				// now update the host's running task/agent revision fields
				// accordingly
				err = nextHost.SetRunningTask(dereferencedTask.Id, agentRevision, time.Now())
				if err != nil {
					evergreen.Logger.Logf(slogger.ERROR, "error updating running "+
						"task %v on host %v: %v", dereferencedTask.Id,
						nextHost.Id, err)
				}
			}()
		}
	}

	// wait for everything to finish
	waitGroup.Wait()

	evergreen.Logger.Logf(slogger.INFO, "Finished kicking off all pending tasks")

	return nil
}
Example #2
0
// processDistro copies and starts remote agents for the given distro.
// This function takes a global lock. Returns any errors that occur.
func (self *TaskRunner) processDistro(distroId string) error {
	lockKey := fmt.Sprintf("%v.%v", RunnerName, distroId)
	// sleep for 1 second to give other spinning locks a chance to preempt this one
	time.Sleep(time.Second)
	lockAcquired, err := db.WaitTillAcquireGlobalLock(lockKey, db.LockTimeout)
	if err != nil {
		return evergreen.Logger.Errorf(slogger.ERROR, "error acquiring global lock for %v: %v", lockKey, err)
	}
	if !lockAcquired {
		return evergreen.Logger.Errorf(slogger.ERROR, "timed out acquiring global lock for %v", lockKey)
	}
	defer func() {
		err := db.ReleaseGlobalLock(lockKey)
		if err != nil {
			evergreen.Logger.Errorf(slogger.ERROR, "error releasing global lock for %v: %v", lockKey, err)
		}
	}()

	freeHostsForDistro, err := self.FindAvailableHostsForDistro(distroId)
	if err != nil {
		return fmt.Errorf("loading available %v hosts: %v", distroId, err)
	}
	evergreen.Logger.Logf(slogger.INFO, "Found %v %v host(s) available to take a task",
		len(freeHostsForDistro), distroId)
	evergreen.Logger.Logf(slogger.INFO, "Kicking off tasks on distro %v...", distroId)
	taskQueue, err := self.FindTaskQueue(distroId)
	if err != nil {
		return fmt.Errorf("error finding task queue for distro %v: %v",
			distroId, err)
	}
	if taskQueue == nil {
		evergreen.Logger.Logf(slogger.INFO, "nil task queue found for distro '%v'", distroId)
		return nil // nothing to do
	}

	// while there are both free hosts and pending tasks left, pin tasks to hosts
	waitGroup := &sync.WaitGroup{}
	for !taskQueue.IsEmpty() && len(freeHostsForDistro) > 0 {
		nextHost := freeHostsForDistro[0]
		nextTask, err := DispatchTaskForHost(taskQueue, &nextHost)
		if err != nil {
			return err
		}

		// can only get here if the queue is empty
		if nextTask == nil {
			continue
		}

		// once allocated to a task, pop the host off the distro's free host
		// list
		freeHostsForDistro = freeHostsForDistro[1:]

		// use the embedded host gateway to kick the task off
		waitGroup.Add(1)
		go func(t task.Task) {
			defer waitGroup.Done()
			agentRevision, err := self.RunTaskOnHost(self.Settings,
				t, nextHost)
			if err != nil {
				evergreen.Logger.Logf(slogger.ERROR, "error kicking off task %v"+
					" on host %v: %v", t.Id, nextHost.Id, err)

				if err := model.MarkTaskUndispatched(nextTask); err != nil {
					evergreen.Logger.Logf(slogger.ERROR, "error marking task %v as undispatched "+
						"on host %v: %v", t.Id, nextHost.Id, err)
				}
				return
			} else {
				evergreen.Logger.Logf(slogger.INFO, "Task %v successfully kicked"+
					" off on host %v", t.Id, nextHost.Id)
			}

			// now update the host's running task/agent revision fields
			// accordingly
			err = nextHost.SetRunningTask(t.Id, agentRevision, time.Now())
			if err != nil {
				evergreen.Logger.Logf(slogger.ERROR, "error updating running "+
					"task %v on host %v: %v", t.Id,
					nextHost.Id, err)
			}
		}(*nextTask)
	}
	waitGroup.Wait()
	return nil
}