Beispiel #1
0
// runCmd runs the command on next available server, or creates a new server if
// none are available. NB: we only return an error if we can't start the cmd,
// not if the command fails (schedule() only guarantees that the cmds are run
// count times, not that they are /successful/ that many times).
func (s *opst) runCmd(cmd string, req *Requirements) error {
	// look through space on existing servers to see if we can run cmd on one
	// of them
	s.mutex.Lock()
	var server *cloud.Server
	for sid, thisServer := range s.servers {
		if thisServer.Destroyed() {
			delete(s.servers, sid)
			continue
		}
		if thisServer.HasSpaceFor(req.Cores, req.RAM, req.Disk) > 0 {
			server = thisServer
			break
		}
	}

	// else see if there will be space on a soon-to-be-spawned server
	// *** this is untested
	if server == nil {
		for _, standinServer := range s.standins {
			if standinServer.hasSpaceFor(req) > 0 {
				standinServer.allocate(req)
				s.mutex.Unlock()
				server = standinServer.waitForServer()
				s.mutex.Lock()
			}
		}
	}

	// else spawn the smallest server that can run this cmd, recording our new
	// quota usage.
	if server == nil {
		reqForSpawn := req
		if req.RAM < s.config.OSRAM {
			reqForSpawn = &Requirements{
				RAM:   s.config.OSRAM,
				Time:  req.Time,
				Cores: req.Cores,
				Disk:  req.Disk,
				Other: req.Other,
			}
		}
		flavor, err := s.determineFlavor(reqForSpawn)
		if err != nil {
			s.mutex.Unlock()
			return err
		}

		// because spawning can take a while, we record that we're going to use
		// up some of our quota and unlock so other things can proceed
		numSpawning := s.waitingToSpawn + s.spawningNow
		if numSpawning == 0 {
			s.nextSpawnTime = time.Now().Add(10 * time.Second)
			s.spawningNow++
		} else {
			s.waitingToSpawn++
		}
		s.reservedInstances++
		s.reservedCores += flavor.Cores
		s.reservedRAM += flavor.RAM

		standinID := uuid.NewV4().String()
		standinServer := newStandin(standinID, flavor)
		standinServer.allocate(req)
		s.standins[standinID] = standinServer
		s.mutex.Unlock()

		// now spawn, but don't overload the system by trying to spawn too many
		// at once; wait at least 10 seconds between each spawn
		if numSpawning > 0 {
			done := make(chan error)
			go func() {
				ticker := time.NewTicker(1 * time.Second)
				for {
					select {
					case <-ticker.C:
						s.mutex.Lock()
						if time.Now().After(s.nextSpawnTime) {
							s.nextSpawnTime = time.Now().Add(10 * time.Second)
							s.waitingToSpawn--
							s.spawningNow++
							s.mutex.Unlock()
							ticker.Stop()
							done <- nil
							return
						}
						s.mutex.Unlock()
						continue
					case <-s.stopWaitingToSpawn:
						ticker.Stop()
						s.mutex.Lock()
						s.waitingToSpawn--
						standinServer.failed()
						delete(s.standins, standinID)
						s.mutex.Unlock()
						done <- errors.New("giving up waiting to spawn")
						return
					}
				}
			}()
			err = <-done
			if err != nil {
				return err
			}
		}
		server, err = s.provider.Spawn(s.config.OSPrefix, s.config.OSUser, flavor.ID, s.config.ServerKeepTime, false, s.config.PostCreationScript)

		if err == nil {
			// check that the exe of the cmd we're supposed to run exists on the
			// new server, and if not, copy it over *** this is just a hack to
			// get wr working, need to think of a better way of doing this...
			exe := strings.Split(cmd, " ")[0]
			if _, err = os.Stat(exe); err == nil {
				if stdout, err := server.RunCmd("file "+exe, false); err == nil {
					if strings.Contains(stdout, "No such file") {
						err = server.UploadFile(exe, exe)
						if err == nil {
							server.RunCmd("chmod u+x "+exe, false)
						} else {
							server.Destroy()
						}
					}
				} else {
					server.Destroy()
				}
			} else {
				server.Destroy()
			}
		}

		// handle Spawn() or upload-of-exe errors now, by noting we failed and
		// unreserving resources
		if err != nil {
			s.mutex.Lock()
			s.spawningNow--
			s.reservedInstances--
			s.reservedCores -= flavor.Cores
			s.reservedRAM -= flavor.RAM
			standinServer.failed()
			delete(s.standins, standinID)
			s.mutex.Unlock()
			return err
		}

		s.mutex.Lock()
		s.spawningNow--
		s.reservedInstances--
		s.reservedCores -= flavor.Cores
		s.reservedRAM -= flavor.RAM
		s.servers[server.ID] = server
		standinServer.worked(server)
		delete(s.standins, standinID)
	}

	server.Allocate(req.Cores, req.RAM, req.Disk)
	s.mutex.Unlock()

	// now we have a server, ssh over and run the cmd on it
	var err error
	if server.IP == "127.0.0.1" {
		err = s.local.runCmd(cmd, req)
	} else {
		_, err = server.RunCmd(cmd, false)
	}

	// having run a command, this server is now available for another; signal a
	// runCmd call that is waiting its turn to spawn a new server to give up
	// waiting and potentially get scheduled on us instead
	s.mutex.Lock()
	server.Release(req.Cores, req.RAM, req.Disk)
	if s.waitingToSpawn > 0 && server.IP != "127.0.0.1" {
		s.mutex.Unlock()
		s.stopWaitingToSpawn <- true
	} else {
		s.mutex.Unlock()
	}

	return err
}