// StatusUpdate takes care of updating the status func (s *eremeticScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { id := status.TaskId.GetValue() log.Debugf("Received task status [%s] for task [%s]", status.State.String(), id) task, err := database.ReadTask(id) if err != nil { log.Debugf("Error reading task from database: %s", err) } if task.ID == "" { task = types.EremeticTask{ ID: id, SlaveId: status.SlaveId.GetValue(), } } if !task.IsRunning() && *status.State == mesos.TaskState_TASK_RUNNING { TasksRunning.Inc() } if types.IsTerminal(status.State) { TasksTerminated.With(prometheus.Labels{"status": status.State.String()}).Inc() if task.WasRunning() { TasksRunning.Dec() } } task.UpdateStatus(types.Status{ Status: status.State.String(), Time: time.Now().Unix(), }) if *status.State == mesos.TaskState_TASK_FAILED && !task.WasRunning() { if task.Retry >= maxRetries { log.Warnf("giving up on %s after %d retry attempts", id, task.Retry) } else { log.Infof("task %s was never running. re-scheduling", id) task.UpdateStatus(types.Status{ Status: mesos.TaskState_TASK_STAGING.String(), Time: time.Now().Unix(), }) task.Retry++ go func() { QueueSize.Inc() s.tasks <- id }() } } if types.IsTerminal(status.State) { handler.NotifyCallback(&task) } database.PutTask(&task) }
func updateStatusForTask(status *mesos.TaskStatus) { id := status.TaskId.GetValue() log.Debugf("TaskId [%s] status [%s]", id, status.State) task, _ := database.ReadTask(id) task.Status = status.State.String() database.PutTask(&task) }
// NotifyCallback handles posting a JSON back to the URI given with the task. func NotifyCallback(task *types.EremeticTask) { if len(task.CallbackURI) == 0 { return } cbData := callbackData{ Time: task.Status[len(task.Status)-1].Time, Status: task.Status[len(task.Status)-1].Status, TaskID: task.ID, } body, err := json.Marshal(cbData) if err != nil { log.Errorf("Unable to create message for task %s, target uri %s", task.ID, task.CallbackURI) return } go func() { _, err = http.Post(task.CallbackURI, "application/json", bytes.NewBuffer(body)) if err != nil { log.Error(err.Error()) } else { log.Debugf("Sent callback to %s", task.CallbackURI) } }() }
// Reregistered is called when the Scheduler is Reregistered func (s *eremeticScheduler) Reregistered(driver sched.SchedulerDriver, masterInfo *mesos.MasterInfo) { log.Debugf("Framework re-registered with master %s", masterInfo) if !s.initialised { driver.ReconcileTasks([]*mesos.TaskStatus{}) s.initialised = true } else { s.Reconcile(driver) } }
// Registered is called when the Scheduler is Registered func (s *eremeticScheduler) Registered(driver sched.SchedulerDriver, frameworkID *mesos.FrameworkID, masterInfo *mesos.MasterInfo) { log.Debugf("Framework %s registered with master %s", frameworkID.GetValue(), masterInfo.GetHostname()) if !s.initialised { driver.ReconcileTasks([]*mesos.TaskStatus{}) s.initialised = true } else { s.Reconcile(driver) } }
// ResourceOffers handles the Resource Offers func (s *eremeticScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { log.Tracef("Received %d resource offers", len(offers)) var offer *mesos.Offer loop: for len(offers) > 0 { select { case <-s.shutdown: log.Info("Shutting down: declining offers") break loop case tid := <-s.tasks: log.Debugf("Trying to find offer to launch %s with", tid) t, _ := database.ReadTask(tid) offer, offers = matchOffer(t, offers) if offer == nil { log.Warnf("Could not find a matching offer for %s", tid) TasksDelayed.Inc() go func() { s.tasks <- tid }() break loop } log.Debugf("Preparing to launch task %s with offer %s", tid, offer.Id.GetValue()) t, task := s.newTask(t, offer) database.PutTask(&t) driver.LaunchTasks([]*mesos.OfferID{offer.Id}, []*mesos.TaskInfo{task}, defaultFilter) TasksLaunched.Inc() QueueSize.Dec() continue default: break loop } } log.Trace("No tasks to launch. Declining offers.") for _, offer := range offers { driver.DeclineOffer(offer.Id, defaultFilter) } }
func matchOffer(task types.EremeticTask, offers []*mesos.Offer) (*mesos.Offer, []*mesos.Offer) { var matcher = createMatcher(task) for i, off := range offers { if matches(matcher, off) { offers[i] = offers[len(offers)-1] offers = offers[:len(offers)-1] return off, offers } else { log.Debugf("%s does not match: %s", off.Id.GetValue(), matcher.Description()) } } return nil, offers }
// GetTaskInfo returns information about the given task. func GetTaskInfo(w http.ResponseWriter, r *http.Request) { vars := mux.Vars(r) id := vars["taskId"] log.Debugf("Fetching task for id: %s", id) task, _ := database.ReadTask(id) if strings.Contains(r.Header.Get("Accept"), "text/html") { renderHTML(w, r, task, id) } else { if task == (types.EremeticTask{}) { writeJSON(http.StatusNotFound, nil, w) return } writeJSON(http.StatusOK, task, w) } }
func scheduleTask(s *eremeticScheduler, request types.Request) (string, error) { log.Debugf( "Adding task running on %s to queue", request.DockerImage) request.Name = fmt.Sprintf("Eremetic task %d", nextId(s)) task, err := createEremeticTask(request) if err != nil { return "", err } database.PutTask(&task) s.tasks <- task.ID return task.ID, nil }
func (s *eremeticScheduler) ScheduleTask(request types.Request) (string, error) { log.Debugf( "Adding task running on %s to queue", request.DockerImage) request.Name = fmt.Sprintf("Eremetic task %d", nextID(s)) task, err := createEremeticTask(request) if err != nil { log.Error(err.Error()) return "", err } TasksCreated.Inc() QueueSize.Inc() database.PutTask(&task) s.tasks <- task.ID return task.ID, nil }
func handleError(err error, w http.ResponseWriter, message string) { if err == nil { return } log.Debugf("%v", err.Error()) var errorMessage = struct { Error string `json:"error"` Message string `json:"message"` }{ err.Error(), message, } if err = writeJSON(422, errorMessage, w); err != nil { log.Errorf("%v", err.Error()) panic(err) } }
func (s *eremeticScheduler) FrameworkMessage( driver sched.SchedulerDriver, executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, message string) { log.Debug("Getting a framework message") switch *executorID.Value { case "eremetic-executor": var result interface{} err := json.Unmarshal([]byte(message), &result) if err != nil { log.Errorf("Error deserializing Result: [%s]", err) return } log.Debug(message) default: log.Debugf("Received a framework message from some unknown source: %s", *executorID.Value) } }
// ResourceOffers handles the Resource Offers func (s *eremeticScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { log.Tracef("Received %d resource offers", len(offers)) for _, offer := range offers { select { case <-s.shutdown: log.Infof("Shutting down: declining offer on [%s]", offer.Hostname) driver.DeclineOffer(offer.Id, defaultFilter) continue case tid := <-s.tasks: log.Debugf("Preparing to launch task %s with offer %s", tid, offer.Id.GetValue()) t, _ := database.ReadTask(tid) task := s.newTask(offer, &t) database.PutTask(&t) driver.LaunchTasks([]*mesos.OfferID{offer.Id}, []*mesos.TaskInfo{task}, defaultFilter) continue default: } log.Trace("No tasks to launch. Declining offer.") driver.DeclineOffer(offer.Id, defaultFilter) } }
func (s *eremeticScheduler) ExecutorLost(_ sched.SchedulerDriver, executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, status int) { log.Debugf("Executor %s on slave %s was lost", executorID, slaveID) }
func ReconcileTasks(driver sched.SchedulerDriver) *Reconcile { cancel := make(chan struct{}) done := make(chan struct{}) go func() { var ( c uint delay int ) tasks, err := database.ListNonTerminalTasks() if err != nil { log.Errorf("Failed to list non-terminal tasks: %s", err) close(done) return } log.Infof("Trying to reconcile with %d task(s)", len(tasks)) start := time.Now() for len(tasks) > 0 { select { case <-cancel: log.Info("Cancelling reconciliation job") close(done) return case <-time.After(time.Duration(delay) * time.Second): // Filter tasks that has received a status update ntasks := []*types.EremeticTask{} for _, t := range tasks { nt, err := database.ReadTask(t.ID) if err != nil { log.Warnf("Task %s not found in database", t.ID) continue } if nt.LastUpdated().Before(start) { ntasks = append(ntasks, &nt) } } tasks = ntasks // Send reconciliation request if len(tasks) > 0 { var statuses []*mesos.TaskStatus for _, t := range tasks { statuses = append(statuses, &mesos.TaskStatus{ State: mesos.TaskState_TASK_STAGING.Enum(), TaskId: &mesos.TaskID{Value: proto.String(t.ID)}, SlaveId: &mesos.SlaveID{Value: proto.String(t.SlaveId)}, }) } log.Debugf("Sending reconciliation request #%d", c) driver.ReconcileTasks(statuses) } if delay < maxReconciliationDelay { delay = 10 << c if delay >= maxReconciliationDelay { delay = maxReconciliationDelay } } c += 1 } } log.Info("Reconciliation done") close(done) }() return &Reconcile{ cancel: cancel, done: done, } }
// Reregistered is called when the Scheduler is Reregistered func (s *eremeticScheduler) Reregistered(_ sched.SchedulerDriver, masterInfo *mesos.MasterInfo) { log.Debugf("Framework re-registered with master %s", masterInfo) }
// Disconnected is called when the Scheduler is Disconnected func (s *eremeticScheduler) Disconnected(sched.SchedulerDriver) { log.Debugf("Framework disconnected with master") }
// StatusUpdate takes care of updating the status func (s *eremeticScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { log.Debugf("Received task status [%s] for task [%s]", status.State.String(), *status.TaskId.Value) updateStatusForTask(status) }
func (s *eremeticScheduler) Error(_ sched.SchedulerDriver, err string) { log.Debugf("Receiving an error: %s", err) }
// Registered is called when the Scheduler is Registered func (s *eremeticScheduler) Registered(_ sched.SchedulerDriver, frameworkID *mesos.FrameworkID, masterInfo *mesos.MasterInfo) { log.Debugf("Framework %s registered with master %s", frameworkID.GetValue(), masterInfo.GetHostname()) }
func (s *eremeticScheduler) SlaveLost(_ sched.SchedulerDriver, slaveID *mesos.SlaveID) { log.Debugf("Slave %s lost", slaveID) }
func (s *eremeticScheduler) OfferRescinded(_ sched.SchedulerDriver, offerID *mesos.OfferID) { log.Debugf("Offer %s rescinded", offerID) }