Example #1
0
func main() {
	flag.Parse()

	//make the out dir
	logger.Component = "SIMULATOR"
	if outDir == "" {
		logger.Fatal("out.dir.unspecified")
	}
	err := os.MkdirAll(outDir, 0777)
	if err != nil {
		logger.Fatal("out.dir.creation.failed", err)
	}

	//set up logging
	outputFile, err := os.Create(filepath.Join(outDir, "simulator.log"))
	if err != nil {
		logger.Fatal("failed.to.create.simulator.log", err)
	}
	logger.Writer = io.MultiWriter(os.Stdout, outputFile)
	cleanup.Register(func() {
		outputFile.Sync()
	})

	//compile the executor
	logger.Info("compiling.executor")
	output, err := exec.Command("go", "install", "github.com/cloudfoundry-incubator/simulator/game_executor").CombinedOutput()
	if err != nil {
		logger.Fatal("failed.to.compile.executor", string(output))
	}

	//write info to the output dir
	writeInfo()

	//start etcd
	logger.Info("starting.etcd", etcdNodes)
	etcd = etcdstorerunner.NewETCDClusterRunner(4001, etcdNodes)
	etcd.Start()

	//set up the bbs
	pool := workerpool.NewWorkerPool(50)
	etcdAdapter = etcdstoreadapter.NewETCDStoreAdapter(etcd.NodeURLS(), pool)
	etcdAdapter.Connect()
	bbs = Bbs.New(etcdAdapter, timeprovider.NewTimeProvider())

	//monitor etcd
	monitorETCD()

	//start executors
	startExecutors()

	cleanup.Register(func() {
		logger.Info("stopping.etcd", etcdNodes)
		etcd.Stop()
	})

	//run the simulator
	runSimulation()

	cleanup.Exit(0)
}
Example #2
0
func monitorETCD(etcdAdapter *etcdstoreadapter.ETCDStoreAdapter) {
	out, err := os.Create(filepath.Join(outDir, "etcdstats.log"))
	if err != nil {
		logger.Fatal("etcd.log.creation.failure", err)
	}

	cleanup.Register(func() {
		out.Sync()
	})

	go func() {
		ticker := time.NewTicker(time.Second)
		for {
			<-ticker.C
			t := time.Now()
			logger.Info("fetch.etcd.runonce.data")
			runOnceNodes, err := etcdAdapter.ListRecursively(Bbs.TaskSchemaRoot)
			if err != nil {
				logger.Info("fetch.etcd.runOnceNodes.error", err)
			}

			executorNode, err := etcdAdapter.ListRecursively(Bbs.ExecutorSchemaRoot)
			if err != nil {
				logger.Info("fetch.etcd.executorNode.error", err)
			}
			readTime := time.Since(t)

			d := etcdData{
				Time:              float64(time.Now().UnixNano()) / 1e9,
				RunningByExecutor: map[string]int{},
				PresentExecutors:  len(executorNode.ChildNodes),
				ReadTime:          float64(readTime) / 1e9,
			}

			for _, node := range runOnceNodes.ChildNodes {
				runOnce, err := models.NewTaskFromJSON(node.Value)
				if err != nil {
					logger.Error("etcd.decode.runonce", err)
					continue
				}

				switch runOnce.State {
				case models.TaskStatePending:
					d.Pending++
				case models.TaskStateClaimed:
					d.Claimed++
				case models.TaskStateRunning:
					d.Running++
					d.RunningByExecutor[runOnce.ExecutorID]++
				case models.TaskStateCompleted:
					d.Completed++
				}
			}

			logger.Info("fetched.etcd.runonce.data", time.Since(t), d.String())
			out.Write(d.toJson())
			out.Write([]byte("\n"))
		}
	}()
}
Example #3
0
func startAndMonitorExecutor(index int, output *os.File, ready *sync.WaitGroup) {
	cmd := commandForExecutor(index, output)

	logger.Info("starting.executor", index)
	cmd.Start()
	time.Sleep(100 * time.Millisecond) //give it a second...
	ready.Done()

	shuttingDown := false
	cleanup.Register(func() {
		shuttingDown = true
		if cmd.Process != nil {
			cmd.Process.Kill()
		}
	})

	restartCount := 0
	for {
		err := cmd.Wait()
		logger.Error("executor.exited", index, err)
		if shuttingDown {
			return
		}
		restartCount++
		logger.Info("restarting.executor", index, restartCount)
		cmd = commandForExecutor(index, output)
		cmd.Start()
	}
}
Example #4
0
func monitorETCD() {
	outputFile, err := os.Create(filepath.Join(outDir, "etcdstats.log"))
	if err != nil {
		logger.Fatal("etcd.log.creation.failure", err)
	}
	cleanup.Register(func() {
		outputFile.Sync()
	})

	go monitorRunOnces(outputFile)
}
Example #5
0
func startExecutors() {
	executorOutput, err := os.Create(filepath.Join(outDir, "executors.log"))
	if err != nil {
		logger.Fatal("executor.output.file.create.failed", err)
	}
	cleanup.Register(func() {
		executorOutput.Sync()
	})

	logger.Info("starting.all.executors", nExecutors)
	allExecutorsStarted := &sync.WaitGroup{}
	for index := 1; index <= nExecutors; index++ {
		allExecutorsStarted.Add(1)
		go startAndMonitorExecutor(index, executorOutput, allExecutorsStarted)
	}
	allExecutorsStarted.Wait()
	logger.Info("started.all.executors", nExecutors)
}
Example #6
0
func main() {
	flag.Parse()
	cleanup.Register(func() {
		logger.Info("executor.shuttingdown")
		close(stop)
		tasks.Wait()
		logger.Info("executor.shutdown")
	})
	logger.Component = fmt.Sprintf("EXECUTOR %s", *executorID)

	lock = &sync.Mutex{}
	currentMemory = *maxMemory

	etcdAdapter := etcdstoreadapter.NewETCDStoreAdapter(
		strings.Split(*etcdCluster, ","),
		workerpool.NewWorkerPool(10),
	)
	err := etcdAdapter.Connect()
	if err != nil {
		logger.Fatal("etcd.connect.fatal", err)
	}

	tasks = &sync.WaitGroup{}
	stop = make(chan bool)

	bbs := Bbs.New(etcdAdapter, timeprovider.NewTimeProvider())

	ready := make(chan bool, 1)

	err = maintainPresence(bbs, ready)
	if err != nil {
		logger.Fatal("executor.initializing-presence.failed", err)
	}

	go handleRunOnces(bbs)
	go convergeRunOnces(bbs)

	<-ready

	logger.Info("executor.up")

	select {}
}
Example #7
0
func main() {
	flag.Parse()

	runtime.GOMAXPROCS(runtime.NumCPU())

	//make the out dir
	logger.Component = "SIMULATOR"
	if outDir == "" {
		logger.Fatal("out.dir.unspecified")
	}
	err := os.MkdirAll(outDir, 0777)
	if err != nil {
		logger.Fatal("out.dir.creation.failed", err)
	}

	//set up logging
	outputFile, err := os.Create(filepath.Join(outDir, "simulator.log"))
	if err != nil {
		logger.Fatal("failed.to.create.simulator.log", err)
	}

	logger.Writer = io.MultiWriter(os.Stdout, outputFile)

	cleanup.Register(func() {
		outputFile.Sync()
	})

	//start etcd
	natsClient := yagnats.NewClient()

	natsMembers := []yagnats.ConnectionProvider{}

	for _, addr := range strings.Split(*natsAddresses, ",") {
		natsMembers = append(
			natsMembers,
			&yagnats.ConnectionInfo{addr, *natsUsername, *natsPassword},
		)
	}

	natsInfo := &yagnats.ConnectionCluster{Members: natsMembers}

	err = natsClient.Connect(natsInfo)
	if err != nil {
		logger.Fatal("could not connect to nats:", err)
	}

	logger.Component = "simulator"

	etcdAdapter := etcdstoreadapter.NewETCDStoreAdapter(
		strings.Split(*etcdCluster, ","),
		workerpool.NewWorkerPool(10),
	)

	err = etcdAdapter.Connect()
	if err != nil {
		logger.Fatal("etcd.connect-failed", map[string]interface{}{
			"error": err.Error(),
		})
	}

	//write info to the output dir
	writeInfo()

	//monitor etcd
	monitorETCD(etcdAdapter)

	//run the simulator
	runSimulation(natsClient)

	cleanup.Exit(0)
}
Example #8
0
func main() {
	var err error

	runtime.GOMAXPROCS(runtime.NumCPU())

	rand.Seed(time.Now().UnixNano())

	flag.Parse()

	executorUUID, err := uuid.NewV4()
	if err != nil {
		log.Fatalln("could not generate guid:", err)
	}

	executorID = executorUUID.String()

	cleanup.Register(func() {
		once.Do(func() {
			logger.Info("shutting-down", map[string]interface{}{})
			close(stop)
			tasks.Wait()
			logger.Info("shutdown", map[string]interface{}{})
		})
	})

	natsClient := yagnats.NewClient()

	natsMembers := []yagnats.ConnectionProvider{}

	for _, addr := range strings.Split(*natsAddresses, ",") {
		natsMembers = append(
			natsMembers,
			&yagnats.ConnectionInfo{addr, *natsUsername, *natsPassword},
		)
	}

	natsInfo := &yagnats.ConnectionCluster{Members: natsMembers}

	err = logger.Connect(natsInfo)
	if err != nil {
		log.Fatalln("could not connect logger:", err)
	}

	err = natsClient.Connect(natsInfo)
	if err != nil {
		log.Fatalln("could not connect to nats:", err)
	}

	logger.Component = fmt.Sprintf("executor.%s", executorID)

	etcdAdapter := etcdstoreadapter.NewETCDStoreAdapter(
		strings.Split(*etcdCluster, ","),
		workerpool.NewWorkerPool(10),
	)
	err = etcdAdapter.Connect()
	if err != nil {
		logger.Fatal("etcd.connect-failed", map[string]interface{}{
			"error": err.Error(),
		})
	}

	bbs := bbs.New(bbs.NewHurlerKicker(*hurlerAddress), etcdAdapter, timeprovider.NewTimeProvider())

	ready := make(chan bool, 1)

	err = maintainPresence(bbs, ready)
	if err != nil {
		logger.Fatal("initializing-presence", map[string]interface{}{
			"error": err.Error(),
		})
	}

	err = registerHandler(etcdAdapter, *listenAddr, ready)
	if err != nil {
		logger.Fatal("initializing-route", map[string]interface{}{
			"error": err.Error(),
		})
	}

	go handleTasks(bbs, *listenAddr)
	go convergeTasks(bbs)

	<-ready
	<-ready

	logger.Info("up", map[string]interface{}{
		"executor": executorID,
	})

	select {}
}
Example #9
0
func runSimulation(natsClient yagnats.NATSClient) {
	simulationLock = &sync.Mutex{}
	simulationWait = &sync.WaitGroup{}
	taskTracker = map[string]*taskData{}

	msg := stagingMessage{
		Count:    nTasks,
		MemoryMB: taskMemory,
	}

	payload, err := json.Marshal(msg)
	if err != nil {
		panic(err)
	}

	t := time.Now()

	logger.Info("simulation.start", nTasks)

	simulationWait.Add(nTasks)

	_, err = natsClient.Subscribe("info.stager.*.staging-request.desire", func(msg *yagnats.Message) {
		var desiredLog struct {
			Timestamp time.Time   `json:"_timestamp"`
			Task      models.Task `json:"task"`
		}

		err := json.Unmarshal(msg.Payload, &desiredLog)
		if err != nil {
			panic(err)
		}

		registerDesired(desiredLog.Task.Guid, desiredLog.Timestamp)
	})

	_, err = natsClient.Subscribe("error.>", func(msg *yagnats.Message) {
		var errorLog struct {
			Timestamp time.Time `json:"_timestamp"`
			Error     string    `json:"error"`
		}

		err := json.Unmarshal(msg.Payload, &errorLog)
		if err != nil {
			panic(err)
		}

		registerError(msg.Subject+": "+errorLog.Error, errorLog.Timestamp)
	})

	_, err = natsClient.Subscribe("fatal.>", func(msg *yagnats.Message) {
		var errorLog struct {
			Timestamp time.Time `json:"_timestamp"`
			Error     string    `json:"error"`
		}

		err := json.Unmarshal(msg.Payload, &errorLog)
		if err != nil {
			panic(err)
		}

		registerError(msg.Subject+": "+errorLog.Error, errorLog.Timestamp)
	})

	executorIndexes := map[string]int{}

	_, err = natsClient.Subscribe("completed-task", func(msg *yagnats.Message) {
		defer func() {
			e := recover()
			if e != nil {
				logger.Error("RECOVERED PANIC:", e)
			}
		}()

		var task *models.Task

		err := json.Unmarshal(msg.Payload, &task)
		if err != nil {
			panic(err)
		}

		simulationLock.Lock()

		index, ok := executorIndexes[task.ExecutorID]
		if !ok {
			index = len(executorIndexes) + 1
			executorIndexes[task.ExecutorID] = index
		}

		data, ok := taskTracker[task.Guid]
		if !ok {
			logger.Error("uknown.runonce.completed", task.Guid, "executor", task.ExecutorID)
			simulationLock.Unlock()
			return
		}

		data.CompletionTime = float64(time.Now().UnixNano()) / 1e9
		logger.Info("runonce.completed", task.Guid, "executor", task.ExecutorID, "duration", data.CompletionTime-data.DesiredTime)
		data.ExecutorIndex = index
		data.NumCompletions++

		simulationLock.Unlock()

		simulationWait.Done()
	})
	if err != nil {
		panic(err)
	}

	err = natsClient.PublishWithReplyTo("stage", "completed-task", payload)
	if err != nil {
		panic(err)
	}

	cleanup.Register(func() {
		dt := time.Since(t)
		logger.Info("simulation.end", nTasks, "runtime", dt)

		simulationResult(dt)
		simulationErrors()
	})

	simulationWait.Wait()
}