Exemple #1
0
// StatusUpdate takes care of updating the status
func (s *eremeticScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) {
	id := status.TaskId.GetValue()

	log.Debugf("Received task status [%s] for task [%s]", status.State.String(), id)

	task, err := database.ReadTask(id)
	if err != nil {
		log.Debugf("Error reading task from database: %s", err)
	}

	if task.ID == "" {
		task = types.EremeticTask{
			ID:      id,
			SlaveId: status.SlaveId.GetValue(),
		}
	}

	if !task.IsRunning() && *status.State == mesos.TaskState_TASK_RUNNING {
		TasksRunning.Inc()
	}

	if types.IsTerminal(status.State) {
		TasksTerminated.With(prometheus.Labels{"status": status.State.String()}).Inc()
		if task.WasRunning() {
			TasksRunning.Dec()
		}
	}

	task.UpdateStatus(types.Status{
		Status: status.State.String(),
		Time:   time.Now().Unix(),
	})

	if *status.State == mesos.TaskState_TASK_FAILED && !task.WasRunning() {
		if task.Retry >= maxRetries {
			log.Warnf("giving up on %s after %d retry attempts", id, task.Retry)
		} else {
			log.Infof("task %s was never running. re-scheduling", id)
			task.UpdateStatus(types.Status{
				Status: mesos.TaskState_TASK_STAGING.String(),
				Time:   time.Now().Unix(),
			})
			task.Retry++
			go func() {
				QueueSize.Inc()
				s.tasks <- id
			}()
		}
	}

	if types.IsTerminal(status.State) {
		handler.NotifyCallback(&task)
	}

	database.PutTask(&task)
}
Exemple #2
0
// ResourceOffers handles the Resource Offers
func (s *eremeticScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) {
	log.Tracef("Received %d resource offers", len(offers))
	var offer *mesos.Offer

loop:
	for len(offers) > 0 {
		select {
		case <-s.shutdown:
			log.Info("Shutting down: declining offers")
			break loop
		case tid := <-s.tasks:
			log.Debugf("Trying to find offer to launch %s with", tid)
			t, _ := database.ReadTask(tid)
			offer, offers = matchOffer(t, offers)

			if offer == nil {
				log.Warnf("Could not find a matching offer for %s", tid)
				TasksDelayed.Inc()
				go func() { s.tasks <- tid }()
				break loop
			}

			log.Debugf("Preparing to launch task %s with offer %s", tid, offer.Id.GetValue())
			t, task := s.newTask(t, offer)
			database.PutTask(&t)
			driver.LaunchTasks([]*mesos.OfferID{offer.Id}, []*mesos.TaskInfo{task}, defaultFilter)
			TasksLaunched.Inc()
			QueueSize.Dec()

			continue
		default:
			break loop
		}
	}

	log.Trace("No tasks to launch. Declining offers.")
	for _, offer := range offers {
		driver.DeclineOffer(offer.Id, defaultFilter)
	}
}
Exemple #3
0
func ReconcileTasks(driver sched.SchedulerDriver) *Reconcile {
	cancel := make(chan struct{})
	done := make(chan struct{})

	go func() {
		var (
			c     uint
			delay int
		)

		tasks, err := database.ListNonTerminalTasks()
		if err != nil {
			log.Errorf("Failed to list non-terminal tasks: %s", err)
			close(done)
			return
		}

		log.Infof("Trying to reconcile with %d task(s)", len(tasks))
		start := time.Now()

		for len(tasks) > 0 {
			select {
			case <-cancel:
				log.Info("Cancelling reconciliation job")
				close(done)
				return
			case <-time.After(time.Duration(delay) * time.Second):
				// Filter tasks that has received a status update
				ntasks := []*types.EremeticTask{}
				for _, t := range tasks {
					nt, err := database.ReadTask(t.ID)
					if err != nil {
						log.Warnf("Task %s not found in database", t.ID)
						continue
					}
					if nt.LastUpdated().Before(start) {
						ntasks = append(ntasks, &nt)
					}
				}
				tasks = ntasks

				// Send reconciliation request
				if len(tasks) > 0 {
					var statuses []*mesos.TaskStatus
					for _, t := range tasks {
						statuses = append(statuses, &mesos.TaskStatus{
							State:   mesos.TaskState_TASK_STAGING.Enum(),
							TaskId:  &mesos.TaskID{Value: proto.String(t.ID)},
							SlaveId: &mesos.SlaveID{Value: proto.String(t.SlaveId)},
						})
					}
					log.Debugf("Sending reconciliation request #%d", c)
					driver.ReconcileTasks(statuses)
				}

				if delay < maxReconciliationDelay {
					delay = 10 << c
					if delay >= maxReconciliationDelay {
						delay = maxReconciliationDelay
					}
				}

				c += 1
			}
		}

		log.Info("Reconciliation done")
		close(done)
	}()

	return &Reconcile{
		cancel: cancel,
		done:   done,
	}
}