Example #1
0
// runCmd runs the command on next available server, or creates a new server if
// none are available. NB: we only return an error if we can't start the cmd,
// not if the command fails (schedule() only guarantees that the cmds are run
// count times, not that they are /successful/ that many times).
func (s *opst) runCmd(cmd string, req *Requirements) error {
	// look through space on existing servers to see if we can run cmd on one
	// of them
	s.mutex.Lock()
	var server *cloud.Server
	for sid, thisServer := range s.servers {
		if thisServer.Destroyed() {
			delete(s.servers, sid)
			continue
		}
		if thisServer.HasSpaceFor(req.Cores, req.RAM, req.Disk) > 0 {
			server = thisServer
			break
		}
	}

	// else see if there will be space on a soon-to-be-spawned server
	// *** this is untested
	if server == nil {
		for _, standinServer := range s.standins {
			if standinServer.hasSpaceFor(req) > 0 {
				standinServer.allocate(req)
				s.mutex.Unlock()
				server = standinServer.waitForServer()
				s.mutex.Lock()
			}
		}
	}

	// else spawn the smallest server that can run this cmd, recording our new
	// quota usage.
	if server == nil {
		reqForSpawn := req
		if req.RAM < s.config.OSRAM {
			reqForSpawn = &Requirements{
				RAM:   s.config.OSRAM,
				Time:  req.Time,
				Cores: req.Cores,
				Disk:  req.Disk,
				Other: req.Other,
			}
		}
		flavor, err := s.determineFlavor(reqForSpawn)
		if err != nil {
			s.mutex.Unlock()
			return err
		}

		// because spawning can take a while, we record that we're going to use
		// up some of our quota and unlock so other things can proceed
		numSpawning := s.waitingToSpawn + s.spawningNow
		if numSpawning == 0 {
			s.nextSpawnTime = time.Now().Add(10 * time.Second)
			s.spawningNow++
		} else {
			s.waitingToSpawn++
		}
		s.reservedInstances++
		s.reservedCores += flavor.Cores
		s.reservedRAM += flavor.RAM

		standinID := uuid.NewV4().String()
		standinServer := newStandin(standinID, flavor)
		standinServer.allocate(req)
		s.standins[standinID] = standinServer
		s.mutex.Unlock()

		// now spawn, but don't overload the system by trying to spawn too many
		// at once; wait at least 10 seconds between each spawn
		if numSpawning > 0 {
			done := make(chan error)
			go func() {
				ticker := time.NewTicker(1 * time.Second)
				for {
					select {
					case <-ticker.C:
						s.mutex.Lock()
						if time.Now().After(s.nextSpawnTime) {
							s.nextSpawnTime = time.Now().Add(10 * time.Second)
							s.waitingToSpawn--
							s.spawningNow++
							s.mutex.Unlock()
							ticker.Stop()
							done <- nil
							return
						}
						s.mutex.Unlock()
						continue
					case <-s.stopWaitingToSpawn:
						ticker.Stop()
						s.mutex.Lock()
						s.waitingToSpawn--
						standinServer.failed()
						delete(s.standins, standinID)
						s.mutex.Unlock()
						done <- errors.New("giving up waiting to spawn")
						return
					}
				}
			}()
			err = <-done
			if err != nil {
				return err
			}
		}
		server, err = s.provider.Spawn(s.config.OSPrefix, s.config.OSUser, flavor.ID, s.config.ServerKeepTime, false, s.config.PostCreationScript)

		if err == nil {
			// check that the exe of the cmd we're supposed to run exists on the
			// new server, and if not, copy it over *** this is just a hack to
			// get wr working, need to think of a better way of doing this...
			exe := strings.Split(cmd, " ")[0]
			if _, err = os.Stat(exe); err == nil {
				if stdout, err := server.RunCmd("file "+exe, false); err == nil {
					if strings.Contains(stdout, "No such file") {
						err = server.UploadFile(exe, exe)
						if err == nil {
							server.RunCmd("chmod u+x "+exe, false)
						} else {
							server.Destroy()
						}
					}
				} else {
					server.Destroy()
				}
			} else {
				server.Destroy()
			}
		}

		// handle Spawn() or upload-of-exe errors now, by noting we failed and
		// unreserving resources
		if err != nil {
			s.mutex.Lock()
			s.spawningNow--
			s.reservedInstances--
			s.reservedCores -= flavor.Cores
			s.reservedRAM -= flavor.RAM
			standinServer.failed()
			delete(s.standins, standinID)
			s.mutex.Unlock()
			return err
		}

		s.mutex.Lock()
		s.spawningNow--
		s.reservedInstances--
		s.reservedCores -= flavor.Cores
		s.reservedRAM -= flavor.RAM
		s.servers[server.ID] = server
		standinServer.worked(server)
		delete(s.standins, standinID)
	}

	server.Allocate(req.Cores, req.RAM, req.Disk)
	s.mutex.Unlock()

	// now we have a server, ssh over and run the cmd on it
	var err error
	if server.IP == "127.0.0.1" {
		err = s.local.runCmd(cmd, req)
	} else {
		_, err = server.RunCmd(cmd, false)
	}

	// having run a command, this server is now available for another; signal a
	// runCmd call that is waiting its turn to spawn a new server to give up
	// waiting and potentially get scheduled on us instead
	s.mutex.Lock()
	server.Release(req.Cores, req.RAM, req.Disk)
	if s.waitingToSpawn > 0 && server.IP != "127.0.0.1" {
		s.mutex.Unlock()
		s.stopWaitingToSpawn <- true
	} else {
		s.mutex.Unlock()
	}

	return err
}
Example #2
0
File: cloud.go Project: sb10/vrpipe
func bootstrapOnRemote(provider *cloud.Provider, server *cloud.Server, exe string, mp int, wp int, wrMayHaveStarted bool) {
	// upload ourselves
	remoteExe := filepath.Join(cloudBinDir, "wr")
	err := server.UploadFile(exe, remoteExe)
	if err != nil && !wrMayHaveStarted {
		provider.TearDown()
		die("failed to upload wr to the server at %s: %s", server.IP, err)
	}

	// create a config file on the remote to have the remote wr work on the same
	// ports that we'd use locally
	err = server.CreateFile(fmt.Sprintf("managerport: \"%d\"\nmanagerweb: \"%d\"\n", mp, wp), wrConfigFileName)
	if err != nil {
		provider.TearDown()
		die("failed to create our config file on the server at %s: %s", server.IP, err)
	}

	_, err = server.RunCmd("chmod u+x "+remoteExe, false)
	if err != nil && !wrMayHaveStarted {
		provider.TearDown()
		die("failed to make remote wr executable: %s", err)
	}

	// copy over our cloud resource details, including our ssh key
	localResourceFile := filepath.Join(config.ManagerDir, "cloud_resources."+providerName+".wr-"+config.Deployment)
	remoteResourceFile := filepath.Join("./.wr_"+config.Deployment, "cloud_resources."+providerName+".wr-"+config.Deployment)
	err = server.UploadFile(localResourceFile, remoteResourceFile)
	if err != nil && !wrMayHaveStarted {
		provider.TearDown()
		die("failed to upload wr cloud resources file to the server at %s: %s", server.IP, err)
	}
	localKeyFile := filepath.Join(config.ManagerDir, "cloud_resources."+providerName+".key")
	err = ioutil.WriteFile(localKeyFile, []byte(provider.PrivateKey()), 0600)
	if err != nil {
		provider.TearDown()
		die("failed to create key file %s: %s", localKeyFile, err)
	}
	remoteKeyFile := filepath.Join("./.wr_"+config.Deployment, "cloud_resources."+providerName+".key")
	err = server.UploadFile(localKeyFile, remoteKeyFile)
	if err != nil && !wrMayHaveStarted {
		provider.TearDown()
		die("failed to upload wr cloud key file to the server at %s: %s", server.IP, err)
	}

	// start up the manager
	var alreadyStarted bool
	if wrMayHaveStarted {
		response, err := server.RunCmd(fmt.Sprintf("%s manager status --deployment %s", remoteExe, config.Deployment), false)
		if err != nil && response == "started\n" {
			alreadyStarted = true
		}
	}
	if !alreadyStarted {
		// build a command prefix that sets all the required env vars for this
		// provider
		envvarPrefix := ""
		envvars, _ := cloud.RequiredEnv(providerName)
		for _, envvar := range envvars {
			envvarPrefix += fmt.Sprintf("%s=\"%s\" ", envvar, os.Getenv(envvar))
		}

		var postCreationArg string
		if postCreationScript != "" {
			// copy over the post creation script to the server so remote
			// manager can use it
			remoteScriptFile := filepath.Join("./.wr_"+config.Deployment, "cloud_resources."+providerName+".script")
			err = server.UploadFile(postCreationScript, remoteScriptFile)
			if err != nil && !wrMayHaveStarted {
				provider.TearDown()
				die("failed to upload wr cloud script file to the server at %s: %s", server.IP, err)
			}

			postCreationArg = " -p " + remoteScriptFile
		}

		var flavorArg string
		if flavorRegex != "" {
			flavorArg = " -l '" + flavorRegex + "'"
		}

		// get the manager running
		mCmd := fmt.Sprintf("%s%s manager start --deployment %s -s %s -k %d -o '%s' -r %d -m %d -u %s%s%s", envvarPrefix, remoteExe, config.Deployment, providerName, serverKeepAlive, osPrefix, osRAM, maxServers, osUsername, postCreationArg, flavorArg)
		_, err = server.RunCmd(mCmd, false)
		if err != nil {
			provider.TearDown()
			die("failed to start wr manager on the remote server: %s", err)
		}

		// wait a few seconds for the manager to start listening on its ports
		<-time.After(3 * time.Second)
	}
}