示例#1
0
func main() {
	flag.Parse()

	//make the out dir
	logger.Component = "SIMULATOR"
	if outDir == "" {
		logger.Fatal("out.dir.unspecified")
	}
	err := os.MkdirAll(outDir, 0777)
	if err != nil {
		logger.Fatal("out.dir.creation.failed", err)
	}

	//set up logging
	outputFile, err := os.Create(filepath.Join(outDir, "simulator.log"))
	if err != nil {
		logger.Fatal("failed.to.create.simulator.log", err)
	}
	logger.Writer = io.MultiWriter(os.Stdout, outputFile)
	cleanup.Register(func() {
		outputFile.Sync()
	})

	//compile the executor
	logger.Info("compiling.executor")
	output, err := exec.Command("go", "install", "github.com/cloudfoundry-incubator/simulator/game_executor").CombinedOutput()
	if err != nil {
		logger.Fatal("failed.to.compile.executor", string(output))
	}

	//write info to the output dir
	writeInfo()

	//start etcd
	logger.Info("starting.etcd", etcdNodes)
	etcd = etcdstorerunner.NewETCDClusterRunner(4001, etcdNodes)
	etcd.Start()

	//set up the bbs
	pool := workerpool.NewWorkerPool(50)
	etcdAdapter = etcdstoreadapter.NewETCDStoreAdapter(etcd.NodeURLS(), pool)
	etcdAdapter.Connect()
	bbs = Bbs.New(etcdAdapter, timeprovider.NewTimeProvider())

	//monitor etcd
	monitorETCD()

	//start executors
	startExecutors()

	cleanup.Register(func() {
		logger.Info("stopping.etcd", etcdNodes)
		etcd.Stop()
	})

	//run the simulator
	runSimulation()

	cleanup.Exit(0)
}
示例#2
0
func startAndMonitorExecutor(index int, output *os.File, ready *sync.WaitGroup) {
	cmd := commandForExecutor(index, output)

	logger.Info("starting.executor", index)
	cmd.Start()
	time.Sleep(100 * time.Millisecond) //give it a second...
	ready.Done()

	shuttingDown := false
	cleanup.Register(func() {
		shuttingDown = true
		if cmd.Process != nil {
			cmd.Process.Kill()
		}
	})

	restartCount := 0
	for {
		err := cmd.Wait()
		logger.Error("executor.exited", index, err)
		if shuttingDown {
			return
		}
		restartCount++
		logger.Info("restarting.executor", index, restartCount)
		cmd = commandForExecutor(index, output)
		cmd.Start()
	}
}
示例#3
0
func convergeRunOnces(bbs Bbs.ExecutorBBS) {
	statusChannel, releaseLock, err := bbs.MaintainConvergeLock(*convergenceInterval, *executorID)
	if err != nil {
		logger.Fatal("executor.converge-lock.acquire-failed", err)
	}

	tasks.Add(1)

	for {
		select {
		case locked, ok := <-statusChannel:
			if !ok {
				tasks.Done()
				return
			}

			if locked {
				t := time.Now()
				logger.Info("converging")
				bbs.ConvergeRunOnce(*timeToClaimRunOnce)
				logger.Info("converged", time.Since(t))
			} else {
				logger.Error("lost.convergence.lock")
			}
		case <-stop:
			releaseLock <- nil
		}
	}
}
示例#4
0
func desireAllRunOnces() {
	logger.Info("desiring.runonces", nRunOnces)

	dt := over / time.Duration(nRunOnces)

	allDesired := &sync.WaitGroup{}
	for index := 1; index <= nRunOnces; index++ {
		allDesired.Add(1)
		runOnce := models.RunOnce{
			Guid:     fmt.Sprintf("%d", index),
			MemoryMB: runOnceMemory,
		}
		innerIndex := index
		go func() {
			defer allDesired.Done()
			logger.Info("desiring.runonce", innerIndex)
			registerDesired(innerIndex)
			err := bbs.DesireRunOnce(&runOnce)
			if err != nil {
				logger.Error("desire.runonce.failed", innerIndex, err)
			}
			logger.Info("desired.runonce", innerIndex)
		}()
		time.Sleep(dt)
	}
	allDesired.Wait()
	logger.Info("all.runonces.desired")
}
示例#5
0
func handleCompletedRunOnce(runOnce *models.RunOnce) {
	simulationLock.Lock()
	data, ok := runOnceTracker[runOnce.Guid]
	if !ok {
		logger.Error("uknown.runonce.completed", runOnce.Guid, "executor", runOnce.ExecutorID)
		simulationLock.Unlock()
		return
	}

	data.CompletionTime = float64(time.Now().UnixNano()) / 1e9
	logger.Info("runonce.completed", runOnce.Guid, "executor", runOnce.ExecutorID, "duration", data.CompletionTime-data.DesiredTime)
	data.ExecutorIndex, _ = strconv.Atoi(runOnce.ExecutorID)
	data.NumCompletions++
	numCompletions := data.NumCompletions
	simulationLock.Unlock()

	if numCompletions == 1 {
		defer simulationWait.Done()
		logger.Info("runonce.resolve", runOnce.Guid)
		err := bbs.ResolveRunOnce(runOnce)
		if err != nil {
			logger.Error("runonce.resolve.error", runOnce.Guid, err)
			return
		}
		logger.Info("runonce.resolved", runOnce.Guid)
	}
}
示例#6
0
func handleRunOnces(bbs Bbs.ExecutorBBS) {
	tasks.Add(1)

	for {
		logger.Info("watch.desired")
		runOnces, stopWatching, errors := bbs.WatchForDesiredRunOnce()

	INNER:
		for {
			select {
			case runOnce, ok := <-runOnces:
				if !ok {
					logger.Info("watch.desired.closed")
					break INNER
				}

				tasks.Add(1)
				go func() {
					handleRunOnce(bbs, runOnce)
					tasks.Done()
				}()
			case err, ok := <-errors:
				if ok && err != nil {
					logger.Error("watch.desired.error", err)
				}
				break INNER
			case <-stop:
				stopWatching <- true
				tasks.Done()
			}
		}
	}
}
示例#7
0
func handleRunOnce(bbs Bbs.ExecutorBBS, runOnce *models.RunOnce) {
	//hesitate
	logger.Info("handling.runonce", runOnce.Guid)
	sleepForARandomInterval("sleep.claim", 0, 100)

	//reserve memory
	ok := reserveMemory(runOnce.MemoryMB)
	if !ok {
		logger.Info("reserve.memory.failed", runOnce.Guid)
		return
	}
	defer releaseMemory(runOnce.MemoryMB)

	//mark claimed
	logger.Info("claiming.runonce", runOnce.Guid)

	err := bbs.ClaimRunOnce(runOnce, *executorID)
	if err != nil {
		logger.Info("claim.runonce.failed", runOnce.Guid, err)
		return
	}

	logger.Info("claimed.runonce", runOnce.Guid)

	//create container

	sleepForContainerCreationInterval()

	//mark started

	logger.Info("starting.runonce", runOnce.Guid)

	err = bbs.StartRunOnce(runOnce, "container")
	if err != nil {
		logger.Error("start.runonce.failed", runOnce.Guid, err)
		return
	}

	logger.Info("started.runonce", runOnce.Guid)

	//run

	sleepForRunInterval()

	//mark completed

	logger.Info("completing.runonce", runOnce.Guid)

	err = bbs.CompleteRunOnce(runOnce, false, "", "")
	if err != nil {
		logger.Error("complete.runonce.failed", runOnce.Guid, err)
		return
	}

	logger.Info("completed.runonce", runOnce.Guid)
}
示例#8
0
func monitorRunOnces(out io.Writer) {
	go func() {
		ticker := time.NewTicker(time.Second)
		for {
			<-ticker.C
			t := time.Now()
			logger.Info("fetch.etcd.runonce.data")
			runOnceNodes, err := etcdAdapter.ListRecursively(Bbs.RunOnceSchemaRoot)
			if err != nil {
				logger.Info("fetch.etcd.runOnceNodes.error", err)
			}

			executorNode, err := etcdAdapter.ListRecursively(Bbs.ExecutorSchemaRoot)
			if err != nil {
				logger.Info("fetch.etcd.executorNode.error", err)
			}
			readTime := time.Since(t)

			d := etcdData{
				Time:              float64(time.Now().UnixNano()) / 1e9,
				RunningByExecutor: map[string]int{},
				PresentExecutors:  len(executorNode.ChildNodes),
				ReadTime:          float64(readTime) / 1e9,
			}

			for _, node := range runOnceNodes.ChildNodes {
				runOnce, err := models.NewRunOnceFromJSON(node.Value)
				if err != nil {
					logger.Error("etcd.decode.runonce", err)
					continue
				}

				switch runOnce.State {
				case models.RunOnceStatePending:
					d.Pending++
				case models.RunOnceStateClaimed:
					d.Claimed++
				case models.RunOnceStateRunning:
					d.Running++
					d.RunningByExecutor[runOnce.ExecutorID]++
				case models.RunOnceStateCompleted:
					d.Completed++
				}
			}

			logger.Info("fetched.etcd.runonce.data", time.Since(t), d.String())
			out.Write(d.toJson())
			out.Write([]byte("\n"))
		}
	}()
}
示例#9
0
func runSimulation() {
	simulationLock = &sync.Mutex{}
	simulationWait = &sync.WaitGroup{}
	runOnceTracker = map[string]*runOnceData{}

	t := time.Now()
	logger.Info("simulation.start", nRunOnces)
	simulationWait.Add(nRunOnces)
	go watchForCompletedRunOnces()
	desireAllRunOnces()
	simulationWait.Wait()
	dt := time.Since(t)
	logger.Info("simulation.end", nRunOnces, "runtime", dt)
	simulationResult(dt)
}
示例#10
0
func startExecutors() {
	executorOutput, err := os.Create(filepath.Join(outDir, "executors.log"))
	if err != nil {
		logger.Fatal("executor.output.file.create.failed", err)
	}
	cleanup.Register(func() {
		executorOutput.Sync()
	})

	logger.Info("starting.all.executors", nExecutors)
	allExecutorsStarted := &sync.WaitGroup{}
	for index := 1; index <= nExecutors; index++ {
		allExecutorsStarted.Add(1)
		go startAndMonitorExecutor(index, executorOutput, allExecutorsStarted)
	}
	allExecutorsStarted.Wait()
	logger.Info("started.all.executors", nExecutors)
}
示例#11
0
func main() {
	flag.Parse()
	cleanup.Register(func() {
		logger.Info("executor.shuttingdown")
		close(stop)
		tasks.Wait()
		logger.Info("executor.shutdown")
	})
	logger.Component = fmt.Sprintf("EXECUTOR %s", *executorID)

	lock = &sync.Mutex{}
	currentMemory = *maxMemory

	etcdAdapter := etcdstoreadapter.NewETCDStoreAdapter(
		strings.Split(*etcdCluster, ","),
		workerpool.NewWorkerPool(10),
	)
	err := etcdAdapter.Connect()
	if err != nil {
		logger.Fatal("etcd.connect.fatal", err)
	}

	tasks = &sync.WaitGroup{}
	stop = make(chan bool)

	bbs := Bbs.New(etcdAdapter, timeprovider.NewTimeProvider())

	ready := make(chan bool, 1)

	err = maintainPresence(bbs, ready)
	if err != nil {
		logger.Fatal("executor.initializing-presence.failed", err)
	}

	go handleRunOnces(bbs)
	go convergeRunOnces(bbs)

	<-ready

	logger.Info("executor.up")

	select {}
}
示例#12
0
func watchForCompletedRunOnces() {
	for {
		logger.Info("watch.completed")
		runOnces, _, errs := bbs.WatchForCompletedRunOnce()
	waitForRunOnce:
		for {
			select {
			case runOnce, ok := <-runOnces:
				if !ok {
					logger.Info("watch.completed.closed")
					break waitForRunOnce
				}
				go handleCompletedRunOnce(runOnce)
			case err, ok := <-errs:
				if ok && err != nil {
					logger.Info("watch.completed.error", err)
				}
				break waitForRunOnce
			}
		}
	}
}
示例#13
0
func writeInfo() {
	data := fmt.Sprintf(`{
    etcd_nodes:%d,
    executors:%d,
    run_onces:%d,
    executor_available_memory:%d,
    run_once_memory:%d,
    over:%.4f
}
`, etcdNodes, nExecutors, nRunOnces, executorMemory, runOnceMemory, float64(over)/float64(time.Second))

	logger.Info("simulator.running", data)

	ioutil.WriteFile(filepath.Join(outDir, "info.json"), []byte(data), 0777)
}
示例#14
0
func sleepForARandomInterval(reason string, minSleepTime, maxSleepTime int) {
	interval := RAND.Intn(maxSleepTime-minSleepTime) + minSleepTime
	logger.Info(reason, fmt.Sprintf("%dms", interval))
	time.Sleep(time.Duration(interval) * time.Millisecond)
}