Example #1
0
// StatusUpdate takes care of updating the status
func (s *eremeticScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) {
	id := status.TaskId.GetValue()

	log.Debugf("Received task status [%s] for task [%s]", status.State.String(), id)

	task, err := database.ReadTask(id)
	if err != nil {
		log.Debugf("Error reading task from database: %s", err)
	}

	if task.ID == "" {
		task = types.EremeticTask{
			ID:      id,
			SlaveId: status.SlaveId.GetValue(),
		}
	}

	if !task.IsRunning() && *status.State == mesos.TaskState_TASK_RUNNING {
		TasksRunning.Inc()
	}

	if types.IsTerminal(status.State) {
		TasksTerminated.With(prometheus.Labels{"status": status.State.String()}).Inc()
		if task.WasRunning() {
			TasksRunning.Dec()
		}
	}

	task.UpdateStatus(types.Status{
		Status: status.State.String(),
		Time:   time.Now().Unix(),
	})

	if *status.State == mesos.TaskState_TASK_FAILED && !task.WasRunning() {
		if task.Retry >= maxRetries {
			log.Warnf("giving up on %s after %d retry attempts", id, task.Retry)
		} else {
			log.Infof("task %s was never running. re-scheduling", id)
			task.UpdateStatus(types.Status{
				Status: mesos.TaskState_TASK_STAGING.String(),
				Time:   time.Now().Unix(),
			})
			task.Retry++
			go func() {
				QueueSize.Inc()
				s.tasks <- id
			}()
		}
	}

	if types.IsTerminal(status.State) {
		handler.NotifyCallback(&task)
	}

	database.PutTask(&task)
}
Example #2
0
func updateStatusForTask(status *mesos.TaskStatus) {
	id := status.TaskId.GetValue()
	log.Debugf("TaskId [%s] status [%s]", id, status.State)
	task, _ := database.ReadTask(id)
	task.Status = status.State.String()
	database.PutTask(&task)
}
Example #3
0
// NotifyCallback handles posting a JSON back to the URI given with the task.
func NotifyCallback(task *types.EremeticTask) {
	if len(task.CallbackURI) == 0 {
		return
	}

	cbData := callbackData{
		Time:   task.Status[len(task.Status)-1].Time,
		Status: task.Status[len(task.Status)-1].Status,
		TaskID: task.ID,
	}

	body, err := json.Marshal(cbData)
	if err != nil {
		log.Errorf("Unable to create message for task %s, target uri %s", task.ID, task.CallbackURI)
		return
	}

	go func() {
		_, err = http.Post(task.CallbackURI, "application/json", bytes.NewBuffer(body))

		if err != nil {
			log.Error(err.Error())
		} else {
			log.Debugf("Sent callback to %s", task.CallbackURI)
		}
	}()

}
Example #4
0
// Reregistered is called when the Scheduler is Reregistered
func (s *eremeticScheduler) Reregistered(driver sched.SchedulerDriver, masterInfo *mesos.MasterInfo) {
	log.Debugf("Framework re-registered with master %s", masterInfo)
	if !s.initialised {
		driver.ReconcileTasks([]*mesos.TaskStatus{})
		s.initialised = true
	} else {
		s.Reconcile(driver)
	}
}
Example #5
0
// Registered is called when the Scheduler is Registered
func (s *eremeticScheduler) Registered(driver sched.SchedulerDriver, frameworkID *mesos.FrameworkID, masterInfo *mesos.MasterInfo) {
	log.Debugf("Framework %s registered with master %s", frameworkID.GetValue(), masterInfo.GetHostname())
	if !s.initialised {
		driver.ReconcileTasks([]*mesos.TaskStatus{})
		s.initialised = true
	} else {
		s.Reconcile(driver)
	}
}
Example #6
0
// ResourceOffers handles the Resource Offers
func (s *eremeticScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) {
	log.Tracef("Received %d resource offers", len(offers))
	var offer *mesos.Offer

loop:
	for len(offers) > 0 {
		select {
		case <-s.shutdown:
			log.Info("Shutting down: declining offers")
			break loop
		case tid := <-s.tasks:
			log.Debugf("Trying to find offer to launch %s with", tid)
			t, _ := database.ReadTask(tid)
			offer, offers = matchOffer(t, offers)

			if offer == nil {
				log.Warnf("Could not find a matching offer for %s", tid)
				TasksDelayed.Inc()
				go func() { s.tasks <- tid }()
				break loop
			}

			log.Debugf("Preparing to launch task %s with offer %s", tid, offer.Id.GetValue())
			t, task := s.newTask(t, offer)
			database.PutTask(&t)
			driver.LaunchTasks([]*mesos.OfferID{offer.Id}, []*mesos.TaskInfo{task}, defaultFilter)
			TasksLaunched.Inc()
			QueueSize.Dec()

			continue
		default:
			break loop
		}
	}

	log.Trace("No tasks to launch. Declining offers.")
	for _, offer := range offers {
		driver.DeclineOffer(offer.Id, defaultFilter)
	}
}
Example #7
0
func matchOffer(task types.EremeticTask, offers []*mesos.Offer) (*mesos.Offer, []*mesos.Offer) {
	var matcher = createMatcher(task)
	for i, off := range offers {
		if matches(matcher, off) {
			offers[i] = offers[len(offers)-1]
			offers = offers[:len(offers)-1]
			return off, offers
		} else {
			log.Debugf("%s does not match: %s", off.Id.GetValue(), matcher.Description())
		}
	}
	return nil, offers
}
Example #8
0
// GetTaskInfo returns information about the given task.
func GetTaskInfo(w http.ResponseWriter, r *http.Request) {
	vars := mux.Vars(r)
	id := vars["taskId"]
	log.Debugf("Fetching task for id: %s", id)
	task, _ := database.ReadTask(id)

	if strings.Contains(r.Header.Get("Accept"), "text/html") {
		renderHTML(w, r, task, id)
	} else {
		if task == (types.EremeticTask{}) {
			writeJSON(http.StatusNotFound, nil, w)
			return
		}
		writeJSON(http.StatusOK, task, w)
	}
}
Example #9
0
func scheduleTask(s *eremeticScheduler, request types.Request) (string, error) {
	log.Debugf(
		"Adding task running on %s to queue",
		request.DockerImage)

	request.Name = fmt.Sprintf("Eremetic task %d", nextId(s))

	task, err := createEremeticTask(request)
	if err != nil {
		return "", err
	}

	database.PutTask(&task)
	s.tasks <- task.ID
	return task.ID, nil
}
Example #10
0
func (s *eremeticScheduler) ScheduleTask(request types.Request) (string, error) {
	log.Debugf(
		"Adding task running on %s to queue",
		request.DockerImage)

	request.Name = fmt.Sprintf("Eremetic task %d", nextID(s))

	task, err := createEremeticTask(request)
	if err != nil {
		log.Error(err.Error())
		return "", err
	}

	TasksCreated.Inc()
	QueueSize.Inc()
	database.PutTask(&task)
	s.tasks <- task.ID
	return task.ID, nil
}
Example #11
0
func handleError(err error, w http.ResponseWriter, message string) {
	if err == nil {
		return
	}

	log.Debugf("%v", err.Error())

	var errorMessage = struct {
		Error   string `json:"error"`
		Message string `json:"message"`
	}{
		err.Error(),
		message,
	}

	if err = writeJSON(422, errorMessage, w); err != nil {
		log.Errorf("%v", err.Error())
		panic(err)
	}
}
Example #12
0
func (s *eremeticScheduler) FrameworkMessage(
	driver sched.SchedulerDriver,
	executorID *mesos.ExecutorID,
	slaveID *mesos.SlaveID,
	message string) {

	log.Debug("Getting a framework message")
	switch *executorID.Value {
	case "eremetic-executor":
		var result interface{}
		err := json.Unmarshal([]byte(message), &result)
		if err != nil {
			log.Errorf("Error deserializing Result: [%s]", err)
			return
		}
		log.Debug(message)

	default:
		log.Debugf("Received a framework message from some unknown source: %s", *executorID.Value)
	}
}
Example #13
0
// ResourceOffers handles the Resource Offers
func (s *eremeticScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) {
	log.Tracef("Received %d resource offers", len(offers))
	for _, offer := range offers {
		select {
		case <-s.shutdown:
			log.Infof("Shutting down: declining offer on [%s]", offer.Hostname)
			driver.DeclineOffer(offer.Id, defaultFilter)
			continue
		case tid := <-s.tasks:
			log.Debugf("Preparing to launch task %s with offer %s", tid, offer.Id.GetValue())
			t, _ := database.ReadTask(tid)
			task := s.newTask(offer, &t)
			database.PutTask(&t)
			driver.LaunchTasks([]*mesos.OfferID{offer.Id}, []*mesos.TaskInfo{task}, defaultFilter)
			continue
		default:
		}

		log.Trace("No tasks to launch. Declining offer.")
		driver.DeclineOffer(offer.Id, defaultFilter)
	}
}
Example #14
0
func (s *eremeticScheduler) ExecutorLost(_ sched.SchedulerDriver, executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, status int) {
	log.Debugf("Executor %s on slave %s was lost", executorID, slaveID)
}
Example #15
0
func ReconcileTasks(driver sched.SchedulerDriver) *Reconcile {
	cancel := make(chan struct{})
	done := make(chan struct{})

	go func() {
		var (
			c     uint
			delay int
		)

		tasks, err := database.ListNonTerminalTasks()
		if err != nil {
			log.Errorf("Failed to list non-terminal tasks: %s", err)
			close(done)
			return
		}

		log.Infof("Trying to reconcile with %d task(s)", len(tasks))
		start := time.Now()

		for len(tasks) > 0 {
			select {
			case <-cancel:
				log.Info("Cancelling reconciliation job")
				close(done)
				return
			case <-time.After(time.Duration(delay) * time.Second):
				// Filter tasks that has received a status update
				ntasks := []*types.EremeticTask{}
				for _, t := range tasks {
					nt, err := database.ReadTask(t.ID)
					if err != nil {
						log.Warnf("Task %s not found in database", t.ID)
						continue
					}
					if nt.LastUpdated().Before(start) {
						ntasks = append(ntasks, &nt)
					}
				}
				tasks = ntasks

				// Send reconciliation request
				if len(tasks) > 0 {
					var statuses []*mesos.TaskStatus
					for _, t := range tasks {
						statuses = append(statuses, &mesos.TaskStatus{
							State:   mesos.TaskState_TASK_STAGING.Enum(),
							TaskId:  &mesos.TaskID{Value: proto.String(t.ID)},
							SlaveId: &mesos.SlaveID{Value: proto.String(t.SlaveId)},
						})
					}
					log.Debugf("Sending reconciliation request #%d", c)
					driver.ReconcileTasks(statuses)
				}

				if delay < maxReconciliationDelay {
					delay = 10 << c
					if delay >= maxReconciliationDelay {
						delay = maxReconciliationDelay
					}
				}

				c += 1
			}
		}

		log.Info("Reconciliation done")
		close(done)
	}()

	return &Reconcile{
		cancel: cancel,
		done:   done,
	}
}
Example #16
0
// Reregistered is called when the Scheduler is Reregistered
func (s *eremeticScheduler) Reregistered(_ sched.SchedulerDriver, masterInfo *mesos.MasterInfo) {
	log.Debugf("Framework re-registered with master %s", masterInfo)
}
Example #17
0
// Disconnected is called when the Scheduler is Disconnected
func (s *eremeticScheduler) Disconnected(sched.SchedulerDriver) {
	log.Debugf("Framework disconnected with master")
}
Example #18
0
// StatusUpdate takes care of updating the status
func (s *eremeticScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) {
	log.Debugf("Received task status [%s] for task [%s]", status.State.String(), *status.TaskId.Value)

	updateStatusForTask(status)
}
Example #19
0
func (s *eremeticScheduler) Error(_ sched.SchedulerDriver, err string) {
	log.Debugf("Receiving an error: %s", err)
}
Example #20
0
// Registered is called when the Scheduler is Registered
func (s *eremeticScheduler) Registered(_ sched.SchedulerDriver, frameworkID *mesos.FrameworkID, masterInfo *mesos.MasterInfo) {
	log.Debugf("Framework %s registered with master %s", frameworkID.GetValue(), masterInfo.GetHostname())
}
Example #21
0
func (s *eremeticScheduler) SlaveLost(_ sched.SchedulerDriver, slaveID *mesos.SlaveID) {
	log.Debugf("Slave %s lost", slaveID)
}
Example #22
0
func (s *eremeticScheduler) OfferRescinded(_ sched.SchedulerDriver, offerID *mesos.OfferID) {
	log.Debugf("Offer %s rescinded", offerID)
}