示例#1
0
// GetDemand calculates demand for each task
func (de *LocalEngine) GetDemand(tasks *demand.Tasks, demandUpdate chan struct{}) {
	var gettingMetrics sync.WaitGroup

	// In this we need to collect the metrics, calculate demand, and trigger a demand update
	demandTimeout := time.NewTicker(constGetDemandSleep * time.Millisecond)
	for _ = range demandTimeout.C {
		tasks.Lock()
		log.Debug("Getting demand")

		for _, task := range tasks.Tasks {
			gettingMetrics.Add(1)
			go func(task *demand.Task) {
				defer gettingMetrics.Done()
				log.Debugf("Getting metric for %s", task.Name)
				task.Metric.UpdateCurrent()
			}(task)
		}

		gettingMetrics.Wait()

		demandChanged := scalingCalculation(tasks)

		tasks.Unlock()
		if demandChanged {
			demandUpdate <- struct{}{}
		}
	}
}
示例#2
0
// CountAllTasks for the Toy scheduler simply reflects back what has been requested
func (t *ToyScheduler) CountAllTasks(running *demand.Tasks) error {
	running.Lock()
	defer running.Unlock()

	for _, task := range running.Tasks {
		task.Running = task.Requested
	}
	return nil
}
示例#3
0
// StopStartTasks asks the scheduler to bring the number of running tasks up to task.Demand.
func (t *ToyScheduler) StopStartTasks(tasks *demand.Tasks) error {
	tasks.Lock()
	defer tasks.Unlock()

	for _, task := range tasks.Tasks {
		task.Requested = task.Demand
		log.Debugf("Toy scheduler setting Requested for %s to %d", task.Name, task.Requested)
	}

	return nil
}
示例#4
0
// cleanup resets demand for all tasks to 0 before we quit
func cleanup(s scheduler.Scheduler, tasks *demand.Tasks) {
	tasks.Lock()
	for _, task := range tasks.Tasks {
		task.Demand = 0
	}
	tasks.Unlock()

	log.Debugf("Reset tasks to 0 for cleanup")
	err := s.StopStartTasks(tasks)
	if err != nil {
		log.Errorf("Failed to cleanup tasks. %v", err)
	}
}
示例#5
0
// StopStartTasks by calling the Marathon scaling API.
func (m *MarathonScheduler) StopStartTasks(tasks *demand.Tasks) error {
	// Create tasks if there aren't enough of them, and stop them if there are too many
	var tooMany []*demand.Task
	var tooFew []*demand.Task
	var err error

	// Check we're not already backed off. This could easily happen if we get a demand update arrive while we are in the midst
	// of a previous backoff.
	if m.backoff.Waiting() {
		log.Debug("Backoff timer still running")
		return nil
	}

	tasks.Lock()
	defer tasks.Unlock()

	// TODO: Consider checking the number running before we start & stop
	for _, task := range tasks.Tasks {
		if task.Demand > task.Requested {
			// There aren't enough of these containers yet
			tooFew = append(tooFew, task)
		}
		if task.Demand < task.Requested {
			// there aren't enough of these containers yet
			tooMany = append(tooMany, task)
		}
	}

	// Concatentate the two lists - scale down first to free up resources
	tasksToScale := append(tooMany, tooFew...)
	for _, task := range tasksToScale {
		blocked, err := m.stopStartTask(task)
		if blocked {
			// Marathon can't make scale changes at the moment.
			// Trigger a new scaling operation by signalling a demandUpdate after a backoff delay
			err = m.backoff.Backoff(m.demandUpdate)
			return err
		}

		if err != nil {
			log.Errorf("Couldn't scale %s: %v ", task.Name, err)
			return err
		}

		// Clear any backoffs on success
		m.backoff.Reset()
		log.Debugf("Now have %s: %d", task.Name, task.Requested)
	}

	return err
}
示例#6
0
// StopStartTasks creates containers if there aren't enough of them, and stop them if there are too many
func (c *DockerScheduler) StopStartTasks(tasks *demand.Tasks) error {
	var tooMany []*demand.Task
	var tooFew []*demand.Task
	var diff int
	var err error

	tasks.Lock()
	defer tasks.Unlock()

	// TODO: Consider checking the number running before we start & stop
	// Don't do more scaling if this task is already changin
	for _, task := range tasks.Tasks {
		if task.Demand > task.Requested && task.Requested == task.Running {
			// There aren't enough of these containers yet
			tooFew = append(tooFew, task)
		}

		if task.Demand < task.Requested && task.Requested == task.Running {
			// There aren't enough of these containers yet
			tooMany = append(tooMany, task)
		}
	}

	// Scale down first to free up resources
	for _, task := range tooMany {
		diff = task.Requested - task.Demand
		log.Infof("Stop %d of task %s", diff, task.Name)
		for i := 0; i < diff; i++ {
			err = c.stopTask(task)
			if err != nil {
				log.Errorf("Couldn't stop %s: %v ", task.Name, err)
			}
			task.Requested--
		}
	}

	// Now we can scale up
	for _, task := range tooFew {
		diff = task.Demand - task.Requested
		log.Infof("Start %d of task %s", diff, task.Name)
		for i := 0; i < diff; i++ {
			c.startTask(task)
			task.Requested++
		}
	}

	// Don't return until all the scale tasks are complete
	scaling.Wait()
	return err
}
示例#7
0
func updateTasks(dp api.DemandPayload, tasks *demand.Tasks) (demandChanged bool) {
	demandChanged = false
	tasks.Lock()
	defer tasks.Unlock()

	for _, taskFromServer := range dp.Demand.Tasks {
		name := taskFromServer.App

		if existingTask, err := tasks.GetTask(name); err == nil {
			if existingTask.Demand != taskFromServer.DemandCount {
				demandChanged = true
			}
			existingTask.Demand = taskFromServer.DemandCount
		}
	}
	return demandChanged
}
示例#8
0
// SendMetrics sends the current state of tasks to the API
func SendMetrics(ws *websocket.Conn, userID string, tasks *demand.Tasks) error {
	var err error
	var index int

	metrics := metrics{
		Tasks:     make([]taskMetrics, len(tasks.Tasks)),
		CreatedAt: time.Now().Unix(),
	}

	tasks.Lock()
	for _, task := range tasks.Tasks {
		metrics.Tasks[index] = taskMetrics{App: task.Name, RunningCount: task.Running, PendingCount: task.Requested}

		if task.Metric != nil {
			metrics.Tasks[index].Metric = task.Metric.Current()
		}

		index++
	}
	tasks.Unlock()

	payload := metricsPayload{
		User:    userID,
		Metrics: metrics,
	}

	b, err := json.Marshal(payload)
	if err != nil {
		return fmt.Errorf("Failed to encode API json. %v", err)
	}

	log.Debug("Sending metrics message")
	_, err = ws.Write(b)
	if err != nil {
		return fmt.Errorf("Failed to send metrics: %v", err)
	}

	return err
}
示例#9
0
// CountAllTasks tells us how many instances of each task are currently running.
func (m *MarathonScheduler) CountAllTasks(running *demand.Tasks) error {
	var (
		err         error
		appsMessage AppsMessage
	)

	running.Lock()
	defer running.Unlock()

	url := m.baseMarathonURL + "apps/"

	body, err := utils.GetJSON(url)
	if err != nil {
		log.Errorf("Error getting Marathon Apps %v", err)
		return err
	}

	err = json.Unmarshal(body, &appsMessage)
	if err != nil {
		log.Errorf("Error %v unmarshalling from %s", err, string(body[:]))
		return err
	}

	appCounts := make(map[string]int)

	// Remove leading slash from App IDs and set the instance counts.
	for _, app := range appsMessage.Apps {
		appCounts[strings.Replace(app.ID, "/", "", 1)] = app.Instances
	}

	// Set running counts. Defaults to 0 if the App does not exist.
	tasks := running.Tasks
	for _, t := range tasks {
		t.Running = appCounts[t.Name]
	}

	return err
}
示例#10
0
// CountAllTasks checks how many of each task are running
func (c *DockerScheduler) CountAllTasks(running *demand.Tasks) error {
	// Docker Remote API https://docs.docker.com/reference/api/docker_remote_api_v1.20/
	// get /containers/json
	var err error
	var containers []docker.APIContainers
	containers, err = c.client.ListContainers(docker.ListContainersOptions{})
	if err != nil {
		return fmt.Errorf("Failed to list containers: %v", err)
	}

	running.Lock()
	defer running.Unlock()
	c.Lock()
	defer c.Unlock()

	// Reset all the running counts to 0
	tasks := running.Tasks
	for _, t := range tasks {
		t.Running = 0

		for _, cc := range c.taskContainers[t.Name] {
			cc.updated = false
		}
	}

	var taskName string
	var present bool

	for i := range containers {
		labels := containers[i].Labels
		taskName, present = labels[labelMap]
		if present {
			// Only update tasks that are already in our task map - don't try to manage anything else
			// log.Debugf("Found a container with labels %v", labels)
			t, err := running.GetTask(taskName)
			if err != nil {
				log.Errorf("Received info about task %s that we're not managing", taskName)
			} else {
				newState := statusToState(containers[i].Status)
				id := containers[i].ID[:12]
				thisContainer, ok := c.taskContainers[taskName][id]
				if !ok {
					log.Infof("We have no previous record of container %s, state %s", id, newState)
					thisContainer = &dockerContainer{}
					c.taskContainers[taskName][id] = thisContainer
				}

				switch newState {
				case "running":
					t.Running++
					// We could be moving from starting to running, or it could be a container that's totally new to us
					if thisContainer.state == "starting" || thisContainer.state == "" {
						thisContainer.state = newState
					}
				case "removing":
					if thisContainer.state != "removing" {
						log.Errorf("Container %s is being removed, but we didn't terminate it", id)
					}
				case "exited":
					if thisContainer.state != "stopping" && thisContainer.state != "exited" {
						log.Errorf("Container %s is being removed, but we didn't terminate it", id)
					}
				case "dead":
					if thisContainer.state != "dead" {
						log.Errorf("Container %s is dead", id)
					}
					thisContainer.state = newState
				}

				thisContainer.updated = true
			}
		}
	}

	for _, task := range tasks {
		log.Debugf("  %s: internally running %d, requested %d", task.Name, task.Running, task.Requested)
		for id, cc := range c.taskContainers[task.Name] {
			log.Debugf("  %s - %s", id, cc.state)
			if !cc.updated {
				if cc.state == "removing" || cc.state == "exited" {
					log.Debugf("    Deleting %s", id)
					delete(c.taskContainers[task.Name], id)
				} else if cc.state != "created" && cc.state != "starting" && cc.state != "stopping" {
					log.Errorf("Bad state for container %s: %s", id, cc.state)
				}
			}
		}
	}

	return err
}