func main() { if len(os.Args) == 2 && os.Args[1] == "--version" { fmt.Println(Version) os.Exit(0) } readConfig() setupLogging() setupMetrics() defer database.Close() bind := fmt.Sprintf("%s:%d", viper.GetString("address"), viper.GetInt("port")) // Catch interrupt go func() { c := make(chan os.Signal, 1) signal.Notify(c, os.Interrupt, os.Kill) s := <-c if s != os.Interrupt && s != os.Kill { return } log.Info("Eremetic is shutting down") os.Exit(0) }() sched := scheduler.Create() router := routes.Create(sched) log.Infof("listening to %s", bind) go scheduler.Run(sched) err := http.ListenAndServe(bind, router) if err != nil { log.Error(err.Error()) os.Exit(1) } }
// Run the eremetic scheduler func Run(s *eremeticScheduler) { driver, err := createDriver(s) if err != nil { log.Errorf("Unable to create scheduler driver: %s", err) return } defer close(s.shutdown) defer driver.Stop(false) if status, err := driver.Run(); err != nil { log.Errorf("Framework stopped with status %s and error: %s\n", status.String(), err.Error()) } log.Info("Exiting...") }
// ResourceOffers handles the Resource Offers func (s *eremeticScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { log.Tracef("Received %d resource offers", len(offers)) var offer *mesos.Offer loop: for len(offers) > 0 { select { case <-s.shutdown: log.Info("Shutting down: declining offers") break loop case tid := <-s.tasks: log.Debugf("Trying to find offer to launch %s with", tid) t, _ := database.ReadTask(tid) offer, offers = matchOffer(t, offers) if offer == nil { log.Warnf("Could not find a matching offer for %s", tid) TasksDelayed.Inc() go func() { s.tasks <- tid }() break loop } log.Debugf("Preparing to launch task %s with offer %s", tid, offer.Id.GetValue()) t, task := s.newTask(t, offer) database.PutTask(&t) driver.LaunchTasks([]*mesos.OfferID{offer.Id}, []*mesos.TaskInfo{task}, defaultFilter) TasksLaunched.Inc() QueueSize.Dec() continue default: break loop } } log.Trace("No tasks to launch. Declining offers.") for _, offer := range offers { driver.DeclineOffer(offer.Id, defaultFilter) } }
func ReconcileTasks(driver sched.SchedulerDriver) *Reconcile { cancel := make(chan struct{}) done := make(chan struct{}) go func() { var ( c uint delay int ) tasks, err := database.ListNonTerminalTasks() if err != nil { log.Errorf("Failed to list non-terminal tasks: %s", err) close(done) return } log.Infof("Trying to reconcile with %d task(s)", len(tasks)) start := time.Now() for len(tasks) > 0 { select { case <-cancel: log.Info("Cancelling reconciliation job") close(done) return case <-time.After(time.Duration(delay) * time.Second): // Filter tasks that has received a status update ntasks := []*types.EremeticTask{} for _, t := range tasks { nt, err := database.ReadTask(t.ID) if err != nil { log.Warnf("Task %s not found in database", t.ID) continue } if nt.LastUpdated().Before(start) { ntasks = append(ntasks, &nt) } } tasks = ntasks // Send reconciliation request if len(tasks) > 0 { var statuses []*mesos.TaskStatus for _, t := range tasks { statuses = append(statuses, &mesos.TaskStatus{ State: mesos.TaskState_TASK_STAGING.Enum(), TaskId: &mesos.TaskID{Value: proto.String(t.ID)}, SlaveId: &mesos.SlaveID{Value: proto.String(t.SlaveId)}, }) } log.Debugf("Sending reconciliation request #%d", c) driver.ReconcileTasks(statuses) } if delay < maxReconciliationDelay { delay = 10 << c if delay >= maxReconciliationDelay { delay = maxReconciliationDelay } } c += 1 } } log.Info("Reconciliation done") close(done) }() return &Reconcile{ cancel: cancel, done: done, } }