// runCmd runs the command on next available server, or creates a new server if // none are available. NB: we only return an error if we can't start the cmd, // not if the command fails (schedule() only guarantees that the cmds are run // count times, not that they are /successful/ that many times). func (s *opst) runCmd(cmd string, req *Requirements) error { // look through space on existing servers to see if we can run cmd on one // of them s.mutex.Lock() var server *cloud.Server for sid, thisServer := range s.servers { if thisServer.Destroyed() { delete(s.servers, sid) continue } if thisServer.HasSpaceFor(req.Cores, req.RAM, req.Disk) > 0 { server = thisServer break } } // else see if there will be space on a soon-to-be-spawned server // *** this is untested if server == nil { for _, standinServer := range s.standins { if standinServer.hasSpaceFor(req) > 0 { standinServer.allocate(req) s.mutex.Unlock() server = standinServer.waitForServer() s.mutex.Lock() } } } // else spawn the smallest server that can run this cmd, recording our new // quota usage. if server == nil { reqForSpawn := req if req.RAM < s.config.OSRAM { reqForSpawn = &Requirements{ RAM: s.config.OSRAM, Time: req.Time, Cores: req.Cores, Disk: req.Disk, Other: req.Other, } } flavor, err := s.determineFlavor(reqForSpawn) if err != nil { s.mutex.Unlock() return err } // because spawning can take a while, we record that we're going to use // up some of our quota and unlock so other things can proceed numSpawning := s.waitingToSpawn + s.spawningNow if numSpawning == 0 { s.nextSpawnTime = time.Now().Add(10 * time.Second) s.spawningNow++ } else { s.waitingToSpawn++ } s.reservedInstances++ s.reservedCores += flavor.Cores s.reservedRAM += flavor.RAM standinID := uuid.NewV4().String() standinServer := newStandin(standinID, flavor) standinServer.allocate(req) s.standins[standinID] = standinServer s.mutex.Unlock() // now spawn, but don't overload the system by trying to spawn too many // at once; wait at least 10 seconds between each spawn if numSpawning > 0 { done := make(chan error) go func() { ticker := time.NewTicker(1 * time.Second) for { select { case <-ticker.C: s.mutex.Lock() if time.Now().After(s.nextSpawnTime) { s.nextSpawnTime = time.Now().Add(10 * time.Second) s.waitingToSpawn-- s.spawningNow++ s.mutex.Unlock() ticker.Stop() done <- nil return } s.mutex.Unlock() continue case <-s.stopWaitingToSpawn: ticker.Stop() s.mutex.Lock() s.waitingToSpawn-- standinServer.failed() delete(s.standins, standinID) s.mutex.Unlock() done <- errors.New("giving up waiting to spawn") return } } }() err = <-done if err != nil { return err } } server, err = s.provider.Spawn(s.config.OSPrefix, s.config.OSUser, flavor.ID, s.config.ServerKeepTime, false, s.config.PostCreationScript) if err == nil { // check that the exe of the cmd we're supposed to run exists on the // new server, and if not, copy it over *** this is just a hack to // get wr working, need to think of a better way of doing this... exe := strings.Split(cmd, " ")[0] if _, err = os.Stat(exe); err == nil { if stdout, err := server.RunCmd("file "+exe, false); err == nil { if strings.Contains(stdout, "No such file") { err = server.UploadFile(exe, exe) if err == nil { server.RunCmd("chmod u+x "+exe, false) } else { server.Destroy() } } } else { server.Destroy() } } else { server.Destroy() } } // handle Spawn() or upload-of-exe errors now, by noting we failed and // unreserving resources if err != nil { s.mutex.Lock() s.spawningNow-- s.reservedInstances-- s.reservedCores -= flavor.Cores s.reservedRAM -= flavor.RAM standinServer.failed() delete(s.standins, standinID) s.mutex.Unlock() return err } s.mutex.Lock() s.spawningNow-- s.reservedInstances-- s.reservedCores -= flavor.Cores s.reservedRAM -= flavor.RAM s.servers[server.ID] = server standinServer.worked(server) delete(s.standins, standinID) } server.Allocate(req.Cores, req.RAM, req.Disk) s.mutex.Unlock() // now we have a server, ssh over and run the cmd on it var err error if server.IP == "127.0.0.1" { err = s.local.runCmd(cmd, req) } else { _, err = server.RunCmd(cmd, false) } // having run a command, this server is now available for another; signal a // runCmd call that is waiting its turn to spawn a new server to give up // waiting and potentially get scheduled on us instead s.mutex.Lock() server.Release(req.Cores, req.RAM, req.Disk) if s.waitingToSpawn > 0 && server.IP != "127.0.0.1" { s.mutex.Unlock() s.stopWaitingToSpawn <- true } else { s.mutex.Unlock() } return err }