Beispiel #1
0
func handleCompletedRunOnce(runOnce *models.RunOnce) {
	simulationLock.Lock()
	data, ok := runOnceTracker[runOnce.Guid]
	if !ok {
		logger.Error("uknown.runonce.completed", runOnce.Guid, "executor", runOnce.ExecutorID)
		simulationLock.Unlock()
		return
	}

	data.CompletionTime = float64(time.Now().UnixNano()) / 1e9
	logger.Info("runonce.completed", runOnce.Guid, "executor", runOnce.ExecutorID, "duration", data.CompletionTime-data.DesiredTime)
	data.ExecutorIndex, _ = strconv.Atoi(runOnce.ExecutorID)
	data.NumCompletions++
	numCompletions := data.NumCompletions
	simulationLock.Unlock()

	if numCompletions == 1 {
		defer simulationWait.Done()
		logger.Info("runonce.resolve", runOnce.Guid)
		err := bbs.ResolveRunOnce(runOnce)
		if err != nil {
			logger.Error("runonce.resolve.error", runOnce.Guid, err)
			return
		}
		logger.Info("runonce.resolved", runOnce.Guid)
	}
}
Beispiel #2
0
func handleRunOnce(bbs Bbs.ExecutorBBS, runOnce *models.RunOnce) {
	//hesitate
	logger.Info("handling.runonce", runOnce.Guid)
	sleepForARandomInterval("sleep.claim", 0, 100)

	//reserve memory
	ok := reserveMemory(runOnce.MemoryMB)
	if !ok {
		logger.Info("reserve.memory.failed", runOnce.Guid)
		return
	}
	defer releaseMemory(runOnce.MemoryMB)

	//mark claimed
	logger.Info("claiming.runonce", runOnce.Guid)

	err := bbs.ClaimRunOnce(runOnce, *executorID)
	if err != nil {
		logger.Info("claim.runonce.failed", runOnce.Guid, err)
		return
	}

	logger.Info("claimed.runonce", runOnce.Guid)

	//create container

	sleepForContainerCreationInterval()

	//mark started

	logger.Info("starting.runonce", runOnce.Guid)

	err = bbs.StartRunOnce(runOnce, "container")
	if err != nil {
		logger.Error("start.runonce.failed", runOnce.Guid, err)
		return
	}

	logger.Info("started.runonce", runOnce.Guid)

	//run

	sleepForRunInterval()

	//mark completed

	logger.Info("completing.runonce", runOnce.Guid)

	err = bbs.CompleteRunOnce(runOnce, false, "", "")
	if err != nil {
		logger.Error("complete.runonce.failed", runOnce.Guid, err)
		return
	}

	logger.Info("completed.runonce", runOnce.Guid)
}
Beispiel #3
0
func startAndMonitorExecutor(index int, output *os.File, ready *sync.WaitGroup) {
	cmd := commandForExecutor(index, output)

	logger.Info("starting.executor", index)
	cmd.Start()
	time.Sleep(100 * time.Millisecond) //give it a second...
	ready.Done()

	shuttingDown := false
	cleanup.Register(func() {
		shuttingDown = true
		if cmd.Process != nil {
			cmd.Process.Kill()
		}
	})

	restartCount := 0
	for {
		err := cmd.Wait()
		logger.Error("executor.exited", index, err)
		if shuttingDown {
			return
		}
		restartCount++
		logger.Info("restarting.executor", index, restartCount)
		cmd = commandForExecutor(index, output)
		cmd.Start()
	}
}
Beispiel #4
0
func desireAllRunOnces() {
	logger.Info("desiring.runonces", nRunOnces)

	dt := over / time.Duration(nRunOnces)

	allDesired := &sync.WaitGroup{}
	for index := 1; index <= nRunOnces; index++ {
		allDesired.Add(1)
		runOnce := models.RunOnce{
			Guid:     fmt.Sprintf("%d", index),
			MemoryMB: runOnceMemory,
		}
		innerIndex := index
		go func() {
			defer allDesired.Done()
			logger.Info("desiring.runonce", innerIndex)
			registerDesired(innerIndex)
			err := bbs.DesireRunOnce(&runOnce)
			if err != nil {
				logger.Error("desire.runonce.failed", innerIndex, err)
			}
			logger.Info("desired.runonce", innerIndex)
		}()
		time.Sleep(dt)
	}
	allDesired.Wait()
	logger.Info("all.runonces.desired")
}
Beispiel #5
0
func convergeRunOnces(bbs Bbs.ExecutorBBS) {
	statusChannel, releaseLock, err := bbs.MaintainConvergeLock(*convergenceInterval, *executorID)
	if err != nil {
		logger.Fatal("executor.converge-lock.acquire-failed", err)
	}

	tasks.Add(1)

	for {
		select {
		case locked, ok := <-statusChannel:
			if !ok {
				tasks.Done()
				return
			}

			if locked {
				t := time.Now()
				logger.Info("converging")
				bbs.ConvergeRunOnce(*timeToClaimRunOnce)
				logger.Info("converged", time.Since(t))
			} else {
				logger.Error("lost.convergence.lock")
			}
		case <-stop:
			releaseLock <- nil
		}
	}
}
Beispiel #6
0
func handleRunOnces(bbs Bbs.ExecutorBBS) {
	tasks.Add(1)

	for {
		logger.Info("watch.desired")
		runOnces, stopWatching, errors := bbs.WatchForDesiredRunOnce()

	INNER:
		for {
			select {
			case runOnce, ok := <-runOnces:
				if !ok {
					logger.Info("watch.desired.closed")
					break INNER
				}

				tasks.Add(1)
				go func() {
					handleRunOnce(bbs, runOnce)
					tasks.Done()
				}()
			case err, ok := <-errors:
				if ok && err != nil {
					logger.Error("watch.desired.error", err)
				}
				break INNER
			case <-stop:
				stopWatching <- true
				tasks.Done()
			}
		}
	}
}
Beispiel #7
0
func (d *etcdData) toJson() []byte {
	data, err := json.Marshal(d)
	if err != nil {
		logger.Error("etcd.marshal.etcdData.failed", err)
	}
	return data
}
Beispiel #8
0
func releaseMemory(memory int) {
	lock.Lock()
	defer lock.Unlock()
	currentMemory = currentMemory + memory
	if currentMemory > *maxMemory {
		logger.Error("bookkeeping.fail", "current memory exceeds original max memory... how?")
		currentMemory = *maxMemory
	}
}
Beispiel #9
0
func monitorRunOnces(out io.Writer) {
	go func() {
		ticker := time.NewTicker(time.Second)
		for {
			<-ticker.C
			t := time.Now()
			logger.Info("fetch.etcd.runonce.data")
			runOnceNodes, err := etcdAdapter.ListRecursively(Bbs.RunOnceSchemaRoot)
			if err != nil {
				logger.Info("fetch.etcd.runOnceNodes.error", err)
			}

			executorNode, err := etcdAdapter.ListRecursively(Bbs.ExecutorSchemaRoot)
			if err != nil {
				logger.Info("fetch.etcd.executorNode.error", err)
			}
			readTime := time.Since(t)

			d := etcdData{
				Time:              float64(time.Now().UnixNano()) / 1e9,
				RunningByExecutor: map[string]int{},
				PresentExecutors:  len(executorNode.ChildNodes),
				ReadTime:          float64(readTime) / 1e9,
			}

			for _, node := range runOnceNodes.ChildNodes {
				runOnce, err := models.NewRunOnceFromJSON(node.Value)
				if err != nil {
					logger.Error("etcd.decode.runonce", err)
					continue
				}

				switch runOnce.State {
				case models.RunOnceStatePending:
					d.Pending++
				case models.RunOnceStateClaimed:
					d.Claimed++
				case models.RunOnceStateRunning:
					d.Running++
					d.RunningByExecutor[runOnce.ExecutorID]++
				case models.RunOnceStateCompleted:
					d.Completed++
				}
			}

			logger.Info("fetched.etcd.runonce.data", time.Since(t), d.String())
			out.Write(d.toJson())
			out.Write([]byte("\n"))
		}
	}()
}