Beispiel #1
0
// StatusUpdate takes care of updating the status
func (s *eremeticScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) {
	id := status.TaskId.GetValue()

	log.Debugf("Received task status [%s] for task [%s]", status.State.String(), id)

	task, err := database.ReadTask(id)
	if err != nil {
		log.Debugf("Error reading task from database: %s", err)
	}

	if task.ID == "" {
		task = types.EremeticTask{
			ID:      id,
			SlaveId: status.SlaveId.GetValue(),
		}
	}

	if !task.IsRunning() && *status.State == mesos.TaskState_TASK_RUNNING {
		TasksRunning.Inc()
	}

	if types.IsTerminal(status.State) {
		TasksTerminated.With(prometheus.Labels{"status": status.State.String()}).Inc()
		if task.WasRunning() {
			TasksRunning.Dec()
		}
	}

	task.UpdateStatus(types.Status{
		Status: status.State.String(),
		Time:   time.Now().Unix(),
	})

	if *status.State == mesos.TaskState_TASK_FAILED && !task.WasRunning() {
		if task.Retry >= maxRetries {
			log.Warnf("giving up on %s after %d retry attempts", id, task.Retry)
		} else {
			log.Infof("task %s was never running. re-scheduling", id)
			task.UpdateStatus(types.Status{
				Status: mesos.TaskState_TASK_STAGING.String(),
				Time:   time.Now().Unix(),
			})
			task.Retry++
			go func() {
				QueueSize.Inc()
				s.tasks <- id
			}()
		}
	}

	if types.IsTerminal(status.State) {
		handler.NotifyCallback(&task)
	}

	database.PutTask(&task)
}
Beispiel #2
0
// GetTaskInfo returns information about the given task.
func GetTaskInfo(scheduler types.Scheduler) http.HandlerFunc {
	return func(w http.ResponseWriter, r *http.Request) {
		vars := mux.Vars(r)
		id := vars["taskId"]
		log.Debugf("Fetching task for id: %s", id)
		task, _ := database.ReadTask(id)

		if strings.Contains(r.Header.Get("Accept"), "text/html") {
			renderHTML(w, r, task, id)
		} else {
			if reflect.DeepEqual(task, (types.EremeticTask{})) {
				writeJSON(http.StatusNotFound, nil, w)
				return
			}
			writeJSON(http.StatusOK, task, w)
		}
	}
}
Beispiel #3
0
// ResourceOffers handles the Resource Offers
func (s *eremeticScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) {
	log.Tracef("Received %d resource offers", len(offers))
	var offer *mesos.Offer

loop:
	for len(offers) > 0 {
		select {
		case <-s.shutdown:
			log.Info("Shutting down: declining offers")
			break loop
		case tid := <-s.tasks:
			log.Debugf("Trying to find offer to launch %s with", tid)
			t, _ := database.ReadTask(tid)
			offer, offers = matchOffer(t, offers)

			if offer == nil {
				log.Warnf("Could not find a matching offer for %s", tid)
				TasksDelayed.Inc()
				go func() { s.tasks <- tid }()
				break loop
			}

			log.Debugf("Preparing to launch task %s with offer %s", tid, offer.Id.GetValue())
			t, task := s.newTask(t, offer)
			database.PutTask(&t)
			driver.LaunchTasks([]*mesos.OfferID{offer.Id}, []*mesos.TaskInfo{task}, defaultFilter)
			TasksLaunched.Inc()
			QueueSize.Dec()

			continue
		default:
			break loop
		}
	}

	log.Trace("No tasks to launch. Declining offers.")
	for _, offer := range offers {
		driver.DeclineOffer(offer.Id, defaultFilter)
	}
}
Beispiel #4
0
func ReconcileTasks(driver sched.SchedulerDriver) *Reconcile {
	cancel := make(chan struct{})
	done := make(chan struct{})

	go func() {
		var (
			c     uint
			delay int
		)

		tasks, err := database.ListNonTerminalTasks()
		if err != nil {
			log.Errorf("Failed to list non-terminal tasks: %s", err)
			close(done)
			return
		}

		log.Infof("Trying to reconcile with %d task(s)", len(tasks))
		start := time.Now()

		for len(tasks) > 0 {
			select {
			case <-cancel:
				log.Info("Cancelling reconciliation job")
				close(done)
				return
			case <-time.After(time.Duration(delay) * time.Second):
				// Filter tasks that has received a status update
				ntasks := []*types.EremeticTask{}
				for _, t := range tasks {
					nt, err := database.ReadTask(t.ID)
					if err != nil {
						log.Warnf("Task %s not found in database", t.ID)
						continue
					}
					if nt.LastUpdated().Before(start) {
						ntasks = append(ntasks, &nt)
					}
				}
				tasks = ntasks

				// Send reconciliation request
				if len(tasks) > 0 {
					var statuses []*mesos.TaskStatus
					for _, t := range tasks {
						statuses = append(statuses, &mesos.TaskStatus{
							State:   mesos.TaskState_TASK_STAGING.Enum(),
							TaskId:  &mesos.TaskID{Value: proto.String(t.ID)},
							SlaveId: &mesos.SlaveID{Value: proto.String(t.SlaveId)},
						})
					}
					log.Debugf("Sending reconciliation request #%d", c)
					driver.ReconcileTasks(statuses)
				}

				if delay < maxReconciliationDelay {
					delay = 10 << c
					if delay >= maxReconciliationDelay {
						delay = maxReconciliationDelay
					}
				}

				c += 1
			}
		}

		log.Info("Reconciliation done")
		close(done)
	}()

	return &Reconcile{
		cancel: cancel,
		done:   done,
	}
}
Beispiel #5
0
func TestReconcile(t *testing.T) {
	dir, _ := os.Getwd()
	database.NewDB(fmt.Sprintf("%s/../db/test.db", dir))
	database.Clean()
	defer database.Close()

	maxReconciliationDelay = 1

	Convey("ReconcileTasks", t, func() {
		Convey("Finishes when there are no tasks", func() {
			driver := NewMockScheduler()
			r := ReconcileTasks(driver)

			select {
			case <-r.done:
			}

			So(driver.AssertNotCalled(t, "ReconcileTasks"), ShouldBeTrue)
		})

		Convey("Sends reconcile request", func() {
			driver := NewMockScheduler()
			driver.On("ReconcileTasks").Run(func(mock.Arguments) {
				t, err := database.ReadTask("1234")
				if err != nil {
					panic("mock error")
				}
				t.UpdateStatus(types.Status{
					Status: mesos.TaskState_TASK_RUNNING.String(),
					Time:   time.Now().Unix() + 1,
				})
				database.PutTask(&t)
			}).Once()

			database.PutTask(&types.EremeticTask{
				ID: "1234",
				Status: []types.Status{
					types.Status{
						Status: mesos.TaskState_TASK_STAGING.String(),
						Time:   time.Now().Unix(),
					},
				},
			})

			r := ReconcileTasks(driver)

			select {
			case <-r.done:
			}

			So(driver.AssertCalled(t, "ReconcileTasks"), ShouldBeTrue)
		})

		Convey("Cancel reconciliation", func() {
			driver := NewMockScheduler()

			database.PutTask(&types.EremeticTask{
				ID: "1234",
				Status: []types.Status{
					types.Status{
						Status: mesos.TaskState_TASK_STAGING.String(),
						Time:   time.Now().Unix(),
					},
				},
			})

			r := ReconcileTasks(driver)
			r.Cancel()

			select {
			case <-r.done:
			}

			So(driver.AssertNotCalled(t, "ReconcileTasks"), ShouldBeTrue)
		})
	})
}
Beispiel #6
0
func TestScheduler(t *testing.T) {
	dir, _ := os.Getwd()
	database.NewDB(fmt.Sprintf("%s/../db/test.db", dir))
	database.Clean()
	defer database.Close()

	Convey("eremeticScheduler", t, func() {
		s := eremeticScheduler{}
		id := "eremetic-task.9999"
		database.PutTask(&types.EremeticTask{ID: id})

		Convey("newTask", func() {
			task := types.EremeticTask{
				ID: "eremetic-task.1234",
			}
			offer := mesos.Offer{
				FrameworkId: &mesos.FrameworkID{
					Value: proto.String("framework-id"),
				},
				SlaveId: &mesos.SlaveID{
					Value: proto.String("slave-id"),
				},
				Hostname: proto.String("hostname"),
			}
			taskData, mesosTask := s.newTask(task, &offer)

			So(mesosTask.GetTaskId().GetValue(), ShouldEqual, task.ID)
			So(taskData.SlaveId, ShouldEqual, "slave-id")
		})

		Convey("createEremeticScheduler", func() {
			s := createEremeticScheduler()
			So(s.tasksCreated, ShouldEqual, 0)
		})

		Convey("API", func() {
			Convey("Registered", func() {
				driver := NewMockScheduler()
				driver.On("ReconcileTasks").Return("ok").Once()
				fID := mesos.FrameworkID{Value: proto.String("1234")}
				mInfo := mesos.MasterInfo{}
				s.Registered(driver, &fID, &mInfo)
				So(driver.AssertCalled(t, "ReconcileTasks"), ShouldBeTrue)
			})

			Convey("Reregistered", func() {
				driver := NewMockScheduler()
				driver.On("ReconcileTasks").Return("ok").Once()
				database.Clean()
				s.Reregistered(driver, &mesos.MasterInfo{})
				So(driver.AssertCalled(t, "ReconcileTasks"), ShouldBeTrue)
			})

			Convey("Disconnected", func() {
				s.Disconnected(nil)
			})

			Convey("ResourceOffers", func() {
				driver := NewMockScheduler()
				var offers []*mesos.Offer

				Convey("No offers", func() {
					s.ResourceOffers(driver, offers)
					So(driver.AssertNotCalled(t, "DeclineOffer"), ShouldBeTrue)
					So(driver.AssertNotCalled(t, "LaunchTasks"), ShouldBeTrue)
				})

				Convey("No tasks", func() {
					offers = append(offers, &mesos.Offer{Id: &mesos.OfferID{Value: proto.String("1234")}})
					driver.On("DeclineOffer").Return("declined").Once()
					s.ResourceOffers(driver, offers)
					So(driver.AssertCalled(t, "DeclineOffer"), ShouldBeTrue)
					So(driver.AssertNotCalled(t, "LaunchTasks"), ShouldBeTrue)
				})
			})

			Convey("StatusUpdate", func() {
				Convey("Running then failing", func() {
					s.StatusUpdate(nil, &mesos.TaskStatus{
						TaskId: &mesos.TaskID{
							Value: proto.String(id),
						},
						State: mesos.TaskState_TASK_RUNNING.Enum(),
					})
					task, _ := database.ReadTask(id)
					So(len(task.Status), ShouldEqual, 1)
					So(task.Status[0].Status, ShouldEqual, mesos.TaskState_TASK_RUNNING.String())

					s.StatusUpdate(nil, &mesos.TaskStatus{
						TaskId: &mesos.TaskID{
							Value: proto.String(id),
						},
						State: mesos.TaskState_TASK_FAILED.Enum(),
					})
					task, _ = database.ReadTask(id)

					So(len(task.Status), ShouldEqual, 2)
					So(task.Status[0].Status, ShouldEqual, mesos.TaskState_TASK_RUNNING.String())
					So(task.Status[1].Status, ShouldEqual, mesos.TaskState_TASK_FAILED.String())
				})

				Convey("Failing immediatly", func() {
					s.tasks = make(chan string, 100)
					s.StatusUpdate(nil, &mesos.TaskStatus{
						TaskId: &mesos.TaskID{
							Value: proto.String(id),
						},
						State: mesos.TaskState_TASK_FAILED.Enum(),
					})
					task, _ := database.ReadTask(id)
					So(len(task.Status), ShouldEqual, 2)
					So(task.Status[0].Status, ShouldEqual, mesos.TaskState_TASK_FAILED.String())
					So(task.Status[1].Status, ShouldEqual, mesos.TaskState_TASK_STAGING.String())

					select {
					case c := <-s.tasks:
						So(c, ShouldEqual, id)
					}
				})
			})

			Convey("FrameworkMessage", func() {
				driver := NewMockScheduler()
				message := `{"message": "this is a message"}`
				Convey("From Eremetic", func() {
					source := "eremetic-executor"
					executor := mesos.ExecutorID{
						Value: proto.String(source),
					}
					s.FrameworkMessage(driver, &executor, &mesos.SlaveID{}, message)
				})

				Convey("From an unknown source", func() {
					source := "other-source"
					executor := mesos.ExecutorID{
						Value: proto.String(source),
					}
					s.FrameworkMessage(driver, &executor, &mesos.SlaveID{}, message)
				})

				Convey("A bad json", func() {
					source := "eremetic-executor"
					executor := mesos.ExecutorID{
						Value: proto.String(source),
					}
					s.FrameworkMessage(driver, &executor, &mesos.SlaveID{}, "not a json")
				})
			})

			Convey("OfferRescinded", func() {
				s.OfferRescinded(nil, &mesos.OfferID{})
			})

			Convey("SlaveLost", func() {
				s.SlaveLost(nil, &mesos.SlaveID{})
			})

			Convey("ExecutorLost", func() {
				s.ExecutorLost(nil, &mesos.ExecutorID{}, &mesos.SlaveID{}, 2)
			})

			Convey("Error", func() {
				s.Error(nil, "Error")
			})
		})
	})

	Convey("ScheduleTask", t, func() {
		Convey("Given a valid Request", func() {
			scheduler := &eremeticScheduler{
				tasks: make(chan string, 100),
			}

			request := types.Request{
				TaskCPUs:    0.5,
				TaskMem:     22.0,
				DockerImage: "busybox",
				Command:     "echo hello",
			}

			Convey("It should put a task id on the channel", func() {
				taskID, err := scheduler.ScheduleTask(request)

				So(err, ShouldBeNil)

				select {
				case c := <-scheduler.tasks:
					So(c, ShouldEqual, taskID)
					task, _ := database.ReadTask(taskID)
					So(task.TaskCPUs, ShouldEqual, request.TaskCPUs)
					So(task.TaskMem, ShouldEqual, request.TaskMem)
					So(task.Command, ShouldEqual, request.Command)
					So(task.User, ShouldEqual, "root")
					So(task.Environment, ShouldBeEmpty)
					So(task.Image, ShouldEqual, request.DockerImage)
					So(task.ID, ShouldStartWith, "eremetic-task.")
				}
			})
		})
	})
}