Exemple #1
0
func markHostRunningTaskFinished(h *host.Host, task *model.Task, newTaskId string) {
	// update the given host's running_task field accordingly
	if err := h.UpdateRunningTask(task.Id, newTaskId, time.Now()); err != nil {
		evergreen.Logger.Errorf(slogger.ERROR, "Error updating running task "+
			"%v on host %v to '': %v", task.Id, h.Id, err)
	}
}
Exemple #2
0
// setupHost runs the specified setup script for an individual host. Returns
// the output from running the script remotely, as well as any error that
// occurs. If the script exits with a non-zero exit code, the error will be non-nil.
func (init *HostInit) setupHost(targetHost *host.Host) (string, error) {

	// fetch the appropriate cloud provider for the host
	cloudMgr, err := providers.GetCloudManager(targetHost.Provider, init.Settings)
	if err != nil {
		return "",
			fmt.Errorf("failed to get cloud manager for host %v with provider %v: %v",
				targetHost.Id, targetHost.Provider, err)
	}

	// mark the host as initializing
	if err := targetHost.SetInitializing(); err != nil {
		if err == mgo.ErrNotFound {
			return "", ErrHostAlreadyInitializing
		} else {
			return "", fmt.Errorf("database error: %v", err)
		}
	}

	/* TESTING ONLY
	setupDebugSSHTunnel(path_to_ssh_key, targetHost.User, targetHost.Host)
	*/

	// run the function scheduled for when the host is up
	err = cloudMgr.OnUp(targetHost)
	if err != nil {
		// if this fails it is probably due to an API hiccup, so we keep going.
		evergreen.Logger.Logf(slogger.WARN, "OnUp callback failed for host '%v': '%v'", targetHost.Id, err)
	}
	cloudHost, err := providers.GetCloudHost(targetHost, init.Settings)
	if err != nil {
		return "", fmt.Errorf("failed to get cloud host for %v: %v", targetHost.Id, err)
	}
	sshOptions, err := cloudHost.GetSSHOptions()
	if err != nil {
		return "", fmt.Errorf("error getting ssh options for host %v: %v", targetHost.Id, err)
	}

	if targetHost.Distro.Teardown != "" {
		err = init.copyScript(targetHost, teardownScriptName, targetHost.Distro.Teardown)
		if err != nil {
			return "", fmt.Errorf("error copying script %v to host %v: %v",
				teardownScriptName, targetHost.Id, err)
		}
	}

	if targetHost.Distro.Setup != "" {
		err = init.copyScript(targetHost, setupScriptName, targetHost.Distro.Setup)
		if err != nil {
			return "", fmt.Errorf("error copying script %v to host %v: %v",
				setupScriptName, targetHost.Id, err)
		}
		logs, err := hostutil.RunRemoteScript(targetHost, setupScriptName, sshOptions)
		if err != nil {
			return logs, fmt.Errorf("error running setup script over ssh: %v", err)
		}
		return logs, nil
	}
	return "", nil
}
Exemple #3
0
// Provision the host, and update the database accordingly.
func (init *HostInit) ProvisionHost(h *host.Host) error {

	// run the setup script
	evergreen.Logger.Logf(slogger.INFO, "Setting up host %v", h.Id)
	output, err := init.setupHost(h)

	// deal with any errors that occured while running the setup
	if err != nil {
		evergreen.Logger.Logf(slogger.ERROR, "Error running setup script: %v", err)

		// another hostinit process beat us there
		if err == ErrHostAlreadyInitializing {
			evergreen.Logger.Logf(slogger.DEBUG, "Attempted to initialize already initializing host %v", h.Id)
			return nil
		}

		alerts.RunHostProvisionFailTriggers(h)
		event.LogProvisionFailed(h.Id, output)

		// setup script failed, mark the host's provisioning as failed
		if err := h.SetUnprovisioned(); err != nil {
			evergreen.Logger.Logf(slogger.ERROR, "unprovisioning host %v failed: %v", h.Id, err)
		}

		return fmt.Errorf("error initializing host %v: %v", h.Id, err)

	}

	evergreen.Logger.Logf(slogger.INFO, "Setup complete for host %v", h.Id)

	if h.ProvisionOptions != nil &&
		h.ProvisionOptions.LoadCLI &&
		h.ProvisionOptions.OwnerId != "" {
		evergreen.Logger.Logf(slogger.INFO, "Uploading client binary to host %v", h.Id)
		lcr, err := init.LoadClient(h)
		if err != nil {
			evergreen.Logger.Logf(slogger.ERROR, "Failed to load client binary onto host %v: %v", h.Id, err)
		} else if err == nil && len(h.ProvisionOptions.TaskId) > 0 {
			evergreen.Logger.Logf(slogger.INFO, "Fetching data for task %v onto host %v", h.ProvisionOptions.TaskId, h.Id)
			err = init.fetchRemoteTaskData(h.ProvisionOptions.TaskId, lcr.BinaryPath, lcr.ConfigPath, h)
			evergreen.Logger.Logf(slogger.ERROR, "Failed to fetch data onto host %v: %v", h.Id, err)
		}
	}

	// the setup was successful. update the host accordingly in the database
	if err := h.MarkAsProvisioned(); err != nil {
		return fmt.Errorf("error marking host %v as provisioned: %v", err)
	}

	evergreen.Logger.Logf(slogger.INFO, "Host %v successfully provisioned", h.Id)

	return nil
}
Exemple #4
0
func (cloudManager *EC2SpotManager) TerminateInstance(host *host.Host) error {
	// terminate the instance
	if host.Status == evergreen.HostTerminated {
		errMsg := fmt.Errorf("Can not terminate %v - already marked as "+
			"terminated!", host.Id)
		evergreen.Logger.Errorf(slogger.ERROR, errMsg.Error())
		return errMsg
	}

	spotDetails, err := cloudManager.describeSpotRequest(host.Id)
	if err != nil {
		ec2err, ok := err.(*ec2.Error)
		if ok && ec2err.Code == EC2ErrorSpotRequestNotFound {
			// EC2 says the spot request is not found - assume this means amazon
			// terminated our spot instance
			evergreen.Logger.Logf(slogger.WARN, "EC2 could not find spot instance '%v', "+
				"marking as terminated", host.Id)
			return host.Terminate()
		}
		return evergreen.Logger.Errorf(slogger.ERROR, "Couldn't terminate, "+
			"failed to get spot request info for %v: %v", host.Id, err)
	}

	evergreen.Logger.Logf(slogger.INFO, "Cancelling spot request %v", host.Id)
	//First cancel the spot request
	ec2Handle := getUSEast(*cloudManager.awsCredentials)
	_, err = ec2Handle.CancelSpotRequests([]string{host.Id})
	if err != nil {
		return evergreen.Logger.Errorf(slogger.ERROR, "Failed to cancel spot request for host %v: %v",
			host.Id, err)
	}

	//Canceling the spot request doesn't terminate the instance that fulfilled it,
	// if it was fulfilled. We need to terminate the instance explicitly
	if spotDetails.InstanceId != "" {
		evergreen.Logger.Logf(slogger.INFO, "Spot request %v cancelled, now terminating instance %v",
			spotDetails.InstanceId, host.Id)
		resp, err := ec2Handle.TerminateInstances([]string{spotDetails.InstanceId})
		if err != nil {
			return evergreen.Logger.Errorf(slogger.INFO, "Failed to terminate host %v: %v", host.Id, err)
		}

		for _, stateChange := range resp.StateChanges {
			evergreen.Logger.Logf(slogger.INFO, "Terminated %v", stateChange.InstanceId)
		}
	} else {
		evergreen.Logger.Logf(slogger.INFO, "Spot request %v cancelled (no instances have fulfilled it)", host.Id)
	}

	// set the host status as terminated and update its termination time
	return host.Terminate()
}
// check reachability for a single host, and take any necessary action
func checkHostReachability(host host.Host, settings *evergreen.Settings) error {
	evergreen.Logger.Logf(slogger.INFO, "Running reachability check for host %v...", host.Id)

	// get a cloud version of the host
	cloudHost, err := providers.GetCloudHost(&host, settings)
	if err != nil {
		return fmt.Errorf("error getting cloud host for host %v: %v", host.Id, err)
	}

	// get the cloud status for the host
	cloudStatus, err := cloudHost.GetInstanceStatus()
	if err != nil {
		return fmt.Errorf("error getting cloud status for host %v: %v", host.Id, err)
	}

	// take different action, depending on how the cloud provider reports the host's status
	switch cloudStatus {
	case cloud.StatusRunning:
		// check if the host is reachable via SSH
		reachable, err := cloudHost.IsSSHReachable()
		if err != nil {
			return fmt.Errorf("error checking ssh reachability for host %v: %v", host.Id, err)
		}

		// log the status update if the reachability of the host is changing
		if host.Status == evergreen.HostUnreachable && reachable {
			evergreen.Logger.Logf(slogger.INFO, "Setting host %v as reachable", host.Id)
		} else if host.Status != evergreen.HostUnreachable && !reachable {
			evergreen.Logger.Logf(slogger.INFO, "Setting host %v as unreachable", host.Id)
		}

		// mark the host appropriately
		if err := host.UpdateReachability(reachable); err != nil {
			return fmt.Errorf("error updating reachability for host %v: %v", host.Id, err)
		}
	case cloud.StatusTerminated:
		evergreen.Logger.Logf(slogger.INFO, "Host %v terminated externally; updating db status to terminated", host.Id)

		// the instance was terminated from outside our control
		if err := host.SetTerminated(); err != nil {
			return fmt.Errorf("error setting host %v terminated: %v", host.Id, err)
		}
	}

	// success
	return nil

}
Exemple #6
0
// Provision the host, and update the database accordingly.
func (init *HostInit) ProvisionHost(h *host.Host) error {

	// run the setup script
	output, err := init.setupHost(h)

	// deal with any errors that occured while running the setup
	if err != nil {

		evergreen.Logger.Logf(slogger.ERROR, "Error running setup script: %v", err)

		// another hostinit process beat us there
		if err == ErrHostAlreadyInitializing {
			evergreen.Logger.Logf(slogger.DEBUG,
				"Attempted to initialize already initializing host %v", h.Id)
			return nil
		}

		// log the provisioning failure
		setupLog := ""
		if output != nil {
			setupLog = string(output)
		}
		alerts.RunHostProvisionFailTriggers(h)
		event.LogProvisionFailed(h.Id, setupLog)

		// setup script failed, mark the host's provisioning as failed
		if err := h.SetUnprovisioned(); err != nil {
			evergreen.Logger.Logf(slogger.ERROR, "unprovisioning host %v failed: %v", h.Id, err)
		}

		return fmt.Errorf("error initializing host %v: %v", h.Id, err)

	}

	// the setup was successful. update the host accordingly in the database
	if err := h.MarkAsProvisioned(); err != nil {
		return fmt.Errorf("error marking host %v as provisioned: %v", err)
	}

	evergreen.Logger.Logf(slogger.INFO, "Host %v successfully provisioned", h.Id)

	return nil

}
func UpdateStaticHosts(e *evergreen.Settings) error {
	distros, err := distro.Find(distro.ByProvider(static.ProviderName))
	if err != nil {
		return err
	}
	activeStaticHosts := make([]string, 0)
	settings := &static.Settings{}

	for _, d := range distros {
		err = mapstructure.Decode(d.ProviderSettings, settings)
		if err != nil {
			return fmt.Errorf("invalid static settings for '%v'", d.Id)
		}
		for _, h := range settings.Hosts {
			hostInfo, err := util.ParseSSHInfo(h.Name)
			if err != nil {
				return err
			}
			user := hostInfo.User
			if user == "" {
				user = d.User
			}
			staticHost := host.Host{
				Id:           h.Name,
				User:         user,
				Host:         h.Name,
				Distro:       d,
				CreationTime: time.Now(),
				Provider:     evergreen.HostTypeStatic,
				StartedBy:    evergreen.User,
				Status:       evergreen.HostRunning,
				Provisioned:  true,
			}

			// upsert the host
			_, err = staticHost.Upsert()
			if err != nil {
				return err
			}
			activeStaticHosts = append(activeStaticHosts, h.Name)
		}
	}
	return host.DecommissionInactiveStaticHosts(activeStaticHosts)
}
Exemple #8
0
// IsHostReady returns whether or not the specified host is ready for its setup script
// to be run.
func (init *HostInit) IsHostReady(host *host.Host) (bool, error) {

	// fetch the appropriate cloud provider for the host
	cloudMgr, err := providers.GetCloudManager(host.Distro.Provider, init.Settings)
	if err != nil {
		return false,
			fmt.Errorf("failed to get cloud manager for provider %v: %v", host.Distro.Provider, err)
	}

	// ask for the instance's status
	hostStatus, err := cloudMgr.GetInstanceStatus(host)
	if err != nil {
		return false, fmt.Errorf("error checking instance status of host %v: %v", host.Id, err)
	}

	// if the host isn't up yet, we can't do anything
	if hostStatus != cloud.StatusRunning {
		return false, nil
	}

	// set the host's dns name, if it is not set
	if host.Host == "" {

		// get the DNS name for the host
		hostDNS, err := cloudMgr.GetDNSName(host)
		if err != nil {
			return false, fmt.Errorf("error checking DNS name for host %v: %v", host.Id, err)
		}

		// sanity check for the host DNS name
		if hostDNS == "" {
			return false, fmt.Errorf("instance %v is running but not returning a DNS name",
				host.Id)
		}

		// update the host's DNS name
		if err := host.SetDNSName(hostDNS); err != nil {
			return false, fmt.Errorf("error setting DNS name for host %v: %v", host.Id, err)
		}

	}

	// check if the host is reachable via SSH
	cloudHost, err := providers.GetCloudHost(host, init.Settings)
	if err != nil {
		return false, fmt.Errorf("failed to get cloud host for %v: %v", host.Id, err)
	}
	reachable, err := cloudHost.IsSSHReachable()
	if err != nil {
		return false, fmt.Errorf("error checking if host %v is reachable: %v", host.Id, err)
	}

	// at this point, we can run the setup if the host is reachable
	return reachable, nil
}
Exemple #9
0
// terminate an instance
func (staticMgr *StaticManager) TerminateInstance(host *host.Host) error {
	// a decommissioned static host will be removed from the database
	if host.Status == evergreen.HostDecommissioned {
		evergreen.Logger.Logf(slogger.DEBUG, "Removing decommissioned %v "+
			"static host (%v)", host.Distro, host.Host)
		if err := host.Remove(); err != nil {
			evergreen.Logger.Errorf(slogger.ERROR, "Error removing "+
				"decommissioned %v static host (%v): %v",
				host.Distro, host.Host, err)
		}
	}
	evergreen.Logger.Logf(slogger.DEBUG, "Not terminating static %v host: %v", host.Distro, host.Host)
	return nil
}
Exemple #10
0
//TerminateInstance destroys a droplet.
func (digoMgr *DigitalOceanManager) TerminateInstance(host *host.Host) error {
	hostIdAsInt, err := strconv.Atoi(host.Id)
	if err != nil {
		return evergreen.Logger.Errorf(slogger.ERROR, "Can't terminate '%v': DigitalOcean host id's must be integers", host.Id)
	}
	response, err := digoMgr.account.DestroyDroplet(hostIdAsInt)
	if err != nil {
		return evergreen.Logger.Errorf(slogger.ERROR, "Failed to destroy droplet '%v': %v", host.Id, err)
	}

	if response.Status != "OK" {
		return evergreen.Logger.Errorf(slogger.ERROR, "Failed to destroy droplet '%v': error message %v", err, response.ErrorMessage)
	}

	return host.Terminate()
}
Exemple #11
0
// clean up a task whose heartbeat has timed out
func cleanUpTimedOutHeartbeat(task model.Task, project model.Project, host *host.Host) error {
	// mock up the failure details of the task
	detail := &apimodels.TaskEndDetail{
		Description: model.AgentHeartbeat,
		TimedOut:    true,
	}

	// try to reset the task
	if err := task.TryReset("", RunnerName, &project, detail); err != nil {
		return fmt.Errorf("error trying to reset task %v: %v", task.Id, err)
	}

	// clear out the host's running task
	if err := host.UpdateRunningTask(task.Id, "", time.Now()); err != nil {
		return fmt.Errorf("error clearing running task %v from host %v: %v",
			task.Id, host.Id, err)
	}

	// success
	return nil
}
Exemple #12
0
func (cloudManager *EC2Manager) StopInstance(host *host.Host) error {
	ec2Handle := getUSEast(*cloudManager.awsCredentials)
	// stop the instance
	resp, err := ec2Handle.StopInstances(host.Id)

	if err != nil {
		return err
	}

	for _, stateChange := range resp.StateChanges {
		evergreen.Logger.Logf(slogger.INFO, "Stopped %v", stateChange.InstanceId)
	}

	err = host.ClearRunningTask()

	if err != nil {
		return err
	}

	return nil
}
Exemple #13
0
//TerminateInstance destroys a container.
func (dockerMgr *DockerManager) TerminateInstance(host *host.Host) error {
	dockerClient, _, err := generateClient(&host.Distro)
	if err != nil {
		return err
	}

	err = dockerClient.StopContainer(host.Id, TimeoutSeconds)
	if err != nil {
		return evergreen.Logger.Errorf(slogger.ERROR, "Failed to stop container '%v': %v", host.Id, err)
	}

	err = dockerClient.RemoveContainer(
		docker.RemoveContainerOptions{
			ID: host.Id,
		},
	)
	if err != nil {
		return evergreen.Logger.Errorf(slogger.ERROR, "Failed to remove container '%v': %v", host.Id, err)
	}

	return host.Terminate()
}
Exemple #14
0
func (cloudManager *EC2Manager) TerminateInstance(host *host.Host) error {
	// terminate the instance
	if host.Status == evergreen.HostTerminated {
		errMsg := fmt.Errorf("Can not terminate %v - already marked as "+
			"terminated!", host.Id)
		evergreen.Logger.Errorf(slogger.ERROR, errMsg.Error())
		return errMsg
	}

	ec2Handle := getUSEast(*cloudManager.awsCredentials)
	resp, err := ec2Handle.TerminateInstances([]string{host.Id})

	if err != nil {
		return err
	}

	for _, stateChange := range resp.StateChanges {
		evergreen.Logger.Logf(slogger.INFO, "Terminated %v", stateChange.InstanceId)
	}

	// set the host status as terminated and update its termination time
	return host.Terminate()
}
Exemple #15
0
// setupHost runs the specified setup script for an individual host. Returns
// the output from running the script remotely, as well as any error that
// occurs. If the script exits with a non-zero exit code, the error will be non-nil.
func (init *HostInit) setupHost(targetHost *host.Host) ([]byte, error) {

	// fetch the appropriate cloud provider for the host
	cloudMgr, err := providers.GetCloudManager(targetHost.Provider, init.Settings)
	if err != nil {
		return nil,
			fmt.Errorf("failed to get cloud manager for host %v with provider %v: %v",
				targetHost.Id, targetHost.Provider, err)
	}

	// mark the host as initializing
	if err := targetHost.SetInitializing(); err != nil {
		if err == mgo.ErrNotFound {
			return nil, ErrHostAlreadyInitializing
		} else {
			return nil, fmt.Errorf("database error: %v", err)
		}
	}

	// run the function scheduled for when the host is up
	err = cloudMgr.OnUp(targetHost)
	if err != nil {
		// if this fails it is probably due to an API hiccup, so we keep going.
		evergreen.Logger.Logf(slogger.WARN, "OnUp callback failed for host '%v': '%v'", targetHost.Id, err)
	}

	// run the remote setup script as sudo, if appropriate
	sudoStr := ""
	if targetHost.Distro.SetupAsSudo {
		sudoStr = "sudo "
	}

	// parse the hostname into the user, host and port
	hostInfo, err := util.ParseSSHInfo(targetHost.Host)
	if err != nil {
		return nil, err
	}
	user := targetHost.Distro.User
	if hostInfo.User != "" {
		user = hostInfo.User
	}

	// create a temp file for the setup script
	fileName := "setup.sh"
	file, err := ioutil.TempFile("", fileName)
	if err != nil {
		return nil, fmt.Errorf("error creating setup script: %v", err)
	}
	defer func() {
		file.Close()
		os.Remove(file.Name())
	}()

	// build the setup script
	setup, err := init.buildSetupScript(targetHost)
	if err != nil {
		return nil, fmt.Errorf("error building setup script for host %v: %v", targetHost.Id, err)
	}

	// write the setup script to the file
	if _, err := file.Write([]byte(setup)); err != nil {
		return nil, fmt.Errorf("error writing remote setup script: %v", err)
	}

	cloudHost, err := providers.GetCloudHost(targetHost, init.Settings)
	if err != nil {
		return nil, fmt.Errorf("Failed to get cloud host for %v: %v", targetHost.Id, err)
	}
	sshOptions, err := cloudHost.GetSSHOptions()
	if err != nil {
		return nil, fmt.Errorf("Error getting ssh options for host %v: %v", targetHost.Id, err)
	}

	// copy setup script over to the remote machine
	var scpSetupCmdStderr bytes.Buffer
	scpSetupCmd := &command.ScpCommand{
		Source:         file.Name(),
		Dest:           fileName,
		Stdout:         &scpSetupCmdStderr,
		Stderr:         &scpSetupCmdStderr,
		RemoteHostName: hostInfo.Hostname,
		User:           user,
		Options:        append([]string{"-P", hostInfo.Port}, sshOptions...),
	}

	// run the command to scp the setup script with a timeout
	err = util.RunFunctionWithTimeout(
		scpSetupCmd.Run,
		SCPTimeout,
	)
	if err != nil {
		if err == util.ErrTimedOut {
			scpSetupCmd.Stop()
			return nil, fmt.Errorf("scp-ing setup script timed out")
		}
		return nil, fmt.Errorf("error (%v) copying setup script to remote "+
			"machine: %v", err, scpSetupCmdStderr.String())
	}

	// run command to ssh into remote machine and execute setup script
	var sshSetupCmdStderr bytes.Buffer
	runSetupCmd := &command.RemoteCommand{
		CmdString:      sudoStr + "sh " + fileName,
		Stdout:         &sshSetupCmdStderr,
		Stderr:         &sshSetupCmdStderr,
		RemoteHostName: hostInfo.Hostname,
		User:           user,
		Options:        []string{"-p", hostInfo.Port},
		Background:     false,
	}

	// only force creation of a tty if sudo
	if targetHost.Distro.SetupAsSudo {
		runSetupCmd.Options = []string{"-t", "-t", "-p", hostInfo.Port}
	}
	runSetupCmd.Options = append(runSetupCmd.Options, sshOptions...)

	// run the ssh command with given timeout
	err = util.RunFunctionWithTimeout(
		runSetupCmd.Run,
		time.Duration(SSHTimeoutSeconds)*time.Second,
	)

	return sshSetupCmdStderr.Bytes(), err
}
Exemple #16
0
func startEC2Instance(ec2Handle *ec2.EC2, options *ec2.RunInstancesOptions,
	intentHost *host.Host) (*host.Host, *ec2.RunInstancesResp, error) {
	// start the instance
	resp, err := ec2Handle.RunInstances(options)

	if err != nil {
		// remove the intent host document
		rmErr := intentHost.Remove()
		if rmErr != nil {
			evergreen.Logger.Errorf(slogger.ERROR, "Could not remove intent host "+
				"“%v”: %v", intentHost.Id, rmErr)
		}
		return nil, nil, evergreen.Logger.Errorf(slogger.ERROR,
			"EC2 RunInstances API call returned error: %v", err)
	}

	evergreen.Logger.Logf(slogger.DEBUG, "Spawned %v instance", len(resp.Instances))

	// the instance should have been successfully spawned
	instance := resp.Instances[0]
	evergreen.Logger.Logf(slogger.DEBUG, "Started %v", instance.InstanceId)
	evergreen.Logger.Logf(slogger.DEBUG, "Key name: %v", string(options.KeyName))

	// find old intent host
	host, err := host.FindOne(host.ById(intentHost.Id))
	if host == nil {
		return nil, nil, evergreen.Logger.Errorf(slogger.ERROR, "Can't locate "+
			"record inserted for intended host “%v”", intentHost.Id)
	}
	if err != nil {
		return nil, nil, evergreen.Logger.Errorf(slogger.ERROR, "Can't locate "+
			"record inserted for intended host “%v” due to error: %v",
			intentHost.Id, err)
	}

	// we found the old document now we can insert the new one
	host.Id = instance.InstanceId
	err = host.Insert()
	if err != nil {
		return nil, nil, evergreen.Logger.Errorf(slogger.ERROR, "Could not insert "+
			"updated host information for “%v” with “%v”: %v", intentHost.Id,
			host.Id, err)
	}

	// remove the intent host document
	err = intentHost.Remove()
	if err != nil {
		return nil, nil, evergreen.Logger.Errorf(slogger.ERROR, "Could not remove "+
			"insert host “%v” (replaced by “%v”): %v", intentHost.Id, host.Id,
			err)
	}

	var infoResp *ec2.DescribeInstancesResp
	instanceInfoRetryCount := 0
	instanceInfoMaxRetries := 5
	for {
		infoResp, err = ec2Handle.DescribeInstances([]string{instance.InstanceId}, nil)
		if err != nil {
			instanceInfoRetryCount++
			if instanceInfoRetryCount == instanceInfoMaxRetries {
				evergreen.Logger.Errorf(slogger.ERROR, "There was an error querying for the "+
					"instance's information and retries are exhausted. The insance may "+
					"be up.")
				return nil, resp, err
			}

			evergreen.Logger.Errorf(slogger.DEBUG, "There was an error querying for the "+
				"instance's information. Retrying in 30 seconds. Error: %v", err)
			time.Sleep(30 * time.Second)
			continue
		}
		break
	}

	reservations := infoResp.Reservations
	if len(reservations) < 1 {
		return nil, resp, fmt.Errorf("Reservation was returned as nil, you " +
			"may have to check manually")
	}

	instancesInfo := reservations[0].Instances
	if len(instancesInfo) < 1 {
		return nil, resp, fmt.Errorf("Reservation appears to have no " +
			"associated instances")
	}
	return host, resp, nil
}
func TestFlaggingIdleHosts(t *testing.T) {

	testConfig := evergreen.TestConfig()

	db.SetGlobalSessionProvider(db.SessionFactoryFromConfig(testConfig))

	Convey("When flagging idle hosts to be terminated", t, func() {

		// reset the db
		testutil.HandleTestingErr(db.ClearCollections(host.Collection),
			t, "error clearing hosts collection")

		Convey("hosts currently running a task should never be"+
			" flagged", func() {

			// insert a host that is currently running a task - but whose
			// creation time would otherwise indicate it has been idle a while
			host1 := host.Host{
				Id:           "h1",
				Provider:     mock.ProviderName,
				CreationTime: time.Now().Add(-30 * time.Minute),
				RunningTask:  "t1",
				Status:       evergreen.HostRunning,
				StartedBy:    evergreen.User,
			}
			testutil.HandleTestingErr(host1.Insert(), t, "error inserting host")

			// finding idle hosts should not return the host
			idle, err := flagIdleHosts(nil, nil)
			So(err, ShouldBeNil)
			So(len(idle), ShouldEqual, 0)

		})

		Convey("hosts not currently running a task should be flagged if they"+
			" have been idle at least 15 minutes and will incur a payment in"+
			" less than 10 minutes", func() {

			// insert two hosts - one whose last task was more than 15 minutes
			// ago, one whose last task was less than 15 minutes ago

			host1 := host.Host{
				Id:                    "h1",
				Provider:              mock.ProviderName,
				LastTaskCompleted:     "t1",
				LastTaskCompletedTime: time.Now().Add(-time.Minute * 20),
				Status:                evergreen.HostRunning,
				StartedBy:             evergreen.User,
			}
			testutil.HandleTestingErr(host1.Insert(), t, "error inserting host")

			host2 := host.Host{
				Id:                    "h2",
				Provider:              mock.ProviderName,
				LastTaskCompleted:     "t2",
				LastTaskCompletedTime: time.Now().Add(-time.Minute * 5),
				Status:                evergreen.HostRunning,
				StartedBy:             evergreen.User,
			}
			testutil.HandleTestingErr(host2.Insert(), t, "error inserting host")

			// finding idle hosts should only return the first host
			idle, err := flagIdleHosts(nil, nil)
			So(err, ShouldBeNil)
			So(len(idle), ShouldEqual, 1)
			So(idle[0].Id, ShouldEqual, "h1")

		})

	})

}
Exemple #18
0
func TestMarkAsDispatched(t *testing.T) {

	var (
		taskId  string
		hostId  string
		buildId string
		task    *Task
		myHost  *host.Host
		b       *build.Build
	)

	Convey("With a task", t, func() {

		taskId = "t1"
		hostId = "h1"
		buildId = "b1"

		task = &Task{
			Id:      taskId,
			BuildId: buildId,
		}

		myHost = &host.Host{
			Id: hostId,
		}

		b = &build.Build{
			Id: buildId,
			Tasks: []build.TaskCache{
				{Id: taskId},
			},
		}

		testutil.HandleTestingErr(
			db.ClearCollections(TasksCollection, build.Collection, host.Collection),
			t, "Error clearing test collections")

		So(task.Insert(), ShouldBeNil)
		So(myHost.Insert(), ShouldBeNil)
		So(b.Insert(), ShouldBeNil)

		Convey("when marking the task as dispatched, the fields for"+
			" the task, the host it is on, and the build it is a part of"+
			" should be set to reflect this", func() {

			// mark the task as dispatched
			So(task.MarkAsDispatched(myHost, time.Now()), ShouldBeNil)

			// make sure the task's fields were updated, both in memory and
			// in the db
			So(task.DispatchTime, ShouldNotResemble, time.Unix(0, 0))
			So(task.Status, ShouldEqual, evergreen.TaskDispatched)
			So(task.HostId, ShouldEqual, myHost.Id)
			So(task.LastHeartbeat, ShouldResemble, task.DispatchTime)
			task, err := FindTask(taskId)
			So(err, ShouldBeNil)
			So(task.DispatchTime, ShouldNotResemble, time.Unix(0, 0))
			So(task.Status, ShouldEqual, evergreen.TaskDispatched)
			So(task.HostId, ShouldEqual, myHost.Id)
			So(task.LastHeartbeat, ShouldResemble, task.DispatchTime)

			// make sure the build's fields were updated in the db
			b, err = build.FindOne(build.ById(buildId))
			So(err, ShouldBeNil)
			So(b.Tasks[0].Status, ShouldEqual, evergreen.TaskDispatched)

		})

	})

}
func TestCreateHostBuckets(t *testing.T) {
	testutil.HandleTestingErr(db.ClearCollections(host.Collection), t, "couldnt reset host")
	Convey("With a starting time and a minute bucket size and inserting dynamic hosts with different time frames", t, func() {
		now := time.Now()
		bucketSize := time.Duration(10) * time.Second

		// -20 -> 20
		beforeStartHost := host.Host{Id: "beforeStartHost", CreationTime: now.Add(time.Duration(-20) * time.Second), TerminationTime: now.Add(time.Duration(20) * time.Second), Provider: "ec2"}
		So(beforeStartHost.Insert(), ShouldBeNil)

		// 80 -> 120
		afterEndHost := host.Host{Id: "afterEndHost", CreationTime: now.Add(time.Duration(80) * time.Second), TerminationTime: now.Add(time.Duration(120) * time.Second), Provider: "ec2"}
		So(afterEndHost.Insert(), ShouldBeNil)

		// 20 -> 40
		h1 := host.Host{Id: "h1", CreationTime: now.Add(time.Duration(20) * time.Second), TerminationTime: now.Add(time.Duration(40) * time.Second), Provider: "ec2"}
		So(h1.Insert(), ShouldBeNil)

		// 10 -> 80
		h2 := host.Host{Id: "h2", CreationTime: now.Add(time.Duration(10) * time.Second), TerminationTime: now.Add(time.Duration(80) * time.Second), Provider: "ec2"}
		So(h2.Insert(), ShouldBeNil)

		// 20 ->
		h3 := host.Host{Id: "h3", CreationTime: now.Add(time.Duration(20) * time.Second), TerminationTime: util.ZeroTime, Provider: "ec2", Status: evergreen.HostRunning}
		So(h3.Insert(), ShouldBeNil)

		// 5 -> 7
		sameBucket := host.Host{Id: "sameBucket", CreationTime: now.Add(time.Duration(5) * time.Second), TerminationTime: now.Add(time.Duration(7) * time.Second), Provider: "ec2"}
		So(sameBucket.Insert(), ShouldBeNil)

		// 5 -> 30
		h4 := host.Host{Id: "h4", CreationTime: now.Add(time.Duration(5) * time.Second), TerminationTime: now.Add(time.Duration(30) * time.Second), Provider: "ec2"}
		So(h4.Insert(), ShouldBeNil)

		Convey("for three buckets of 10 seconds, should only retrieve pertinent host docs", func() {

			endTime := now.Add(time.Duration(30) * time.Second)
			hosts, err := host.Find(host.ByDynamicWithinTime(now, endTime))
			So(err, ShouldBeNil)
			So(len(hosts), ShouldEqual, 6)
			frameBounds := FrameBounds{
				StartTime:     now,
				EndTime:       endTime,
				BucketSize:    bucketSize,
				NumberBuckets: 3,
			}
			Convey("should create the correct buckets and bucket time accordingly", func() {
				buckets, errors := CreateHostBuckets(hosts, frameBounds)
				So(errors, ShouldBeEmpty)
				So(len(buckets), ShouldEqual, 3)
				So(int(buckets[0].TotalTime.Seconds()), ShouldEqual, 17)
				So(int(buckets[1].TotalTime.Seconds()), ShouldEqual, 30)
				So(int(math.Ceil(buckets[2].TotalTime.Seconds())), ShouldEqual, 40)
			})
		})

	})
}