Example #1
0
func (d *etcdData) toJson() []byte {
	data, err := json.Marshal(d)
	if err != nil {
		logger.Error("etcd.marshal.etcdData.failed", err)
	}
	return data
}
Example #2
0
func monitorETCD(etcdAdapter *etcdstoreadapter.ETCDStoreAdapter) {
	out, err := os.Create(filepath.Join(outDir, "etcdstats.log"))
	if err != nil {
		logger.Fatal("etcd.log.creation.failure", err)
	}

	cleanup.Register(func() {
		out.Sync()
	})

	go func() {
		ticker := time.NewTicker(time.Second)
		for {
			<-ticker.C
			t := time.Now()
			logger.Info("fetch.etcd.runonce.data")
			runOnceNodes, err := etcdAdapter.ListRecursively(Bbs.TaskSchemaRoot)
			if err != nil {
				logger.Info("fetch.etcd.runOnceNodes.error", err)
			}

			executorNode, err := etcdAdapter.ListRecursively(Bbs.ExecutorSchemaRoot)
			if err != nil {
				logger.Info("fetch.etcd.executorNode.error", err)
			}
			readTime := time.Since(t)

			d := etcdData{
				Time:              float64(time.Now().UnixNano()) / 1e9,
				RunningByExecutor: map[string]int{},
				PresentExecutors:  len(executorNode.ChildNodes),
				ReadTime:          float64(readTime) / 1e9,
			}

			for _, node := range runOnceNodes.ChildNodes {
				runOnce, err := models.NewTaskFromJSON(node.Value)
				if err != nil {
					logger.Error("etcd.decode.runonce", err)
					continue
				}

				switch runOnce.State {
				case models.TaskStatePending:
					d.Pending++
				case models.TaskStateClaimed:
					d.Claimed++
				case models.TaskStateRunning:
					d.Running++
					d.RunningByExecutor[runOnce.ExecutorID]++
				case models.TaskStateCompleted:
					d.Completed++
				}
			}

			logger.Info("fetched.etcd.runonce.data", time.Since(t), d.String())
			out.Write(d.toJson())
			out.Write([]byte("\n"))
		}
	}()
}
Example #3
0
func runSimulation(natsClient yagnats.NATSClient) {
	simulationLock = &sync.Mutex{}
	simulationWait = &sync.WaitGroup{}
	taskTracker = map[string]*taskData{}

	msg := stagingMessage{
		Count:    nTasks,
		MemoryMB: taskMemory,
	}

	payload, err := json.Marshal(msg)
	if err != nil {
		panic(err)
	}

	t := time.Now()

	logger.Info("simulation.start", nTasks)

	simulationWait.Add(nTasks)

	_, err = natsClient.Subscribe("info.stager.*.staging-request.desire", func(msg *yagnats.Message) {
		var desiredLog struct {
			Timestamp time.Time   `json:"_timestamp"`
			Task      models.Task `json:"task"`
		}

		err := json.Unmarshal(msg.Payload, &desiredLog)
		if err != nil {
			panic(err)
		}

		registerDesired(desiredLog.Task.Guid, desiredLog.Timestamp)
	})

	_, err = natsClient.Subscribe("error.>", func(msg *yagnats.Message) {
		var errorLog struct {
			Timestamp time.Time `json:"_timestamp"`
			Error     string    `json:"error"`
		}

		err := json.Unmarshal(msg.Payload, &errorLog)
		if err != nil {
			panic(err)
		}

		registerError(msg.Subject+": "+errorLog.Error, errorLog.Timestamp)
	})

	_, err = natsClient.Subscribe("fatal.>", func(msg *yagnats.Message) {
		var errorLog struct {
			Timestamp time.Time `json:"_timestamp"`
			Error     string    `json:"error"`
		}

		err := json.Unmarshal(msg.Payload, &errorLog)
		if err != nil {
			panic(err)
		}

		registerError(msg.Subject+": "+errorLog.Error, errorLog.Timestamp)
	})

	executorIndexes := map[string]int{}

	_, err = natsClient.Subscribe("completed-task", func(msg *yagnats.Message) {
		defer func() {
			e := recover()
			if e != nil {
				logger.Error("RECOVERED PANIC:", e)
			}
		}()

		var task *models.Task

		err := json.Unmarshal(msg.Payload, &task)
		if err != nil {
			panic(err)
		}

		simulationLock.Lock()

		index, ok := executorIndexes[task.ExecutorID]
		if !ok {
			index = len(executorIndexes) + 1
			executorIndexes[task.ExecutorID] = index
		}

		data, ok := taskTracker[task.Guid]
		if !ok {
			logger.Error("uknown.runonce.completed", task.Guid, "executor", task.ExecutorID)
			simulationLock.Unlock()
			return
		}

		data.CompletionTime = float64(time.Now().UnixNano()) / 1e9
		logger.Info("runonce.completed", task.Guid, "executor", task.ExecutorID, "duration", data.CompletionTime-data.DesiredTime)
		data.ExecutorIndex = index
		data.NumCompletions++

		simulationLock.Unlock()

		simulationWait.Done()
	})
	if err != nil {
		panic(err)
	}

	err = natsClient.PublishWithReplyTo("stage", "completed-task", payload)
	if err != nil {
		panic(err)
	}

	cleanup.Register(func() {
		dt := time.Since(t)
		logger.Info("simulation.end", nTasks, "runtime", dt)

		simulationResult(dt)
		simulationErrors()
	})

	simulationWait.Wait()
}