Example #1
0
func (r *Reconciler) reconcile(driver scheduler.SchedulerDriver, implicit bool) {
	if time.Now().Sub(r.reconcileTime) >= r.ReconcileDelay {
		r.taskLock.Lock()
		defer r.taskLock.Unlock()

		r.reconciles++
		r.reconcileTime = time.Now()

		if r.reconciles > r.ReconcileMaxTries {
			for task := range r.tasks {
				Logger.Info("Reconciling exceeded %d tries, sending killTask for task %s", r.ReconcileMaxTries, task)
				driver.KillTask(util.NewTaskID(task))
			}
			r.reconciles = 0
		} else {
			if implicit {
				driver.ReconcileTasks(nil)
			} else {
				statuses := make([]*mesos.TaskStatus, 0)
				for task := range r.tasks {
					Logger.Debug("Reconciling %d/%d task state for task id %s", r.reconciles, r.ReconcileMaxTries, task)
					statuses = append(statuses, util.NewTaskStatus(util.NewTaskID(task), mesos.TaskState_TASK_STAGING))
				}
				driver.ReconcileTasks(statuses)
			}
		}
	}
}
func (s *Scheduler) reconcileTasks(force bool) {
	if time.Now().Sub(s.reconcileTime) >= reconcileDelay {
		if !s.cluster.IsReconciling() {
			s.reconciles = 0
		}
		s.reconciles++
		s.reconcileTime = time.Now()

		if s.reconciles > reconcileMaxTries {
			for _, task := range s.cluster.GetTasksWithState(TaskStateReconciling) {
				if task.Data().TaskID != "" {
					Logger.Infof("Reconciling exceeded %d tries for task %s, sending killTask for task %s", reconcileMaxTries, task.Data().ID, task.Data().TaskID)
					s.driver.KillTask(util.NewTaskID(task.Data().TaskID))

					task.Data().ResetTaskInfo()
				}
			}
		} else {
			if force {
				s.driver.ReconcileTasks(nil)
			} else {
				statuses := make([]*mesos.TaskStatus, 0)
				for _, task := range s.cluster.GetAllTasks() {
					if task.Data().TaskID != "" {
						task.Data().State = TaskStateReconciling
						Logger.Infof("Reconciling %d/%d task state for id %s, task id %s", s.reconciles, reconcileMaxTries, task.Data().ID, task.Data().TaskID)
						statuses = append(statuses, util.NewTaskStatus(util.NewTaskID(task.Data().TaskID), mesos.TaskState_TASK_STAGING))
					}
				}
				s.driver.ReconcileTasks(statuses)
			}
		}
	}
}
Example #3
0
func TestScheduler(t *gotesting.T) {
	mockdriver := &MockSchedulerDriver{}

	ntasks := 1
	chillFactor := 0
	testScheduler := NewEtcdScheduler(
		ntasks,
		chillFactor,
		0,
		false,
		[]*mesos.CommandInfo_URI{},
		false,
		4096,
		1,
		256,
	)

	// Skip initialization logic, tested in TestStartup.
	testScheduler.state = Mutable

	reconciliation := map[string]string{}
	testScheduler.reconciliationInfoFunc = func([]string, string, string) (map[string]string, error) {
		return reconciliation, nil
	}
	testScheduler.updateReconciliationInfoFunc = func(info map[string]string, _ []string, _ string, _ string) error {
		reconciliation = info
		return nil
	}

	taskStatus_task_starting := util.NewTaskStatus(
		util.NewTaskID("etcd-1 localhost 1 1 1"),
		mesos.TaskState_TASK_RUNNING,
	)
	testScheduler.StatusUpdate(mockdriver, taskStatus_task_starting)

	taskStatus_task_running := util.NewTaskStatus(
		util.NewTaskID("etcd-1 localhost 1 1 1"),
		mesos.TaskState_TASK_RUNNING,
	)
	testScheduler.StatusUpdate(mockdriver, taskStatus_task_running)

	taskStatus_task_failed := util.NewTaskStatus(
		util.NewTaskID("etcd-1 localhost 1 1 1"),
		mesos.TaskState_TASK_FAILED,
	)
	testScheduler.StatusUpdate(mockdriver, taskStatus_task_failed)

	//assert that mock was invoked
	mockdriver.AssertExpectations(t)
}
func (suite *SchedulerTestSuite) TestSchdulerDriverKillTask() {
	messenger := messenger.NewMockedMessenger()
	messenger.On("Start").Return(nil)
	messenger.On("UPID").Return(&upid.UPID{})
	messenger.On("Send").Return(nil)
	messenger.On("Stop").Return(nil)
	messenger.On("Route").Return(nil)

	driver, err := newTestSchedulerDriver(NewMockScheduler(), suite.framework, suite.master, nil)
	driver.messenger = messenger
	suite.NoError(err)
	suite.True(driver.Stopped())

	go func() {
		driver.Run()
	}()
	time.Sleep(time.Millisecond * 1)
	driver.setConnected(true) // simulated
	suite.False(driver.Stopped())
	suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status())

	stat, err := driver.KillTask(util.NewTaskID("test-task-1"))
	suite.NoError(err)
	suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
}
Example #5
0
// deletes the pod and task associated with the task identified by tid and sends a task
// status update to mesos. also attempts to reset the suicide watch.
// Assumes that the caller is locking around pod and task state.
func (k *KubernetesExecutor) removePodTask(driver bindings.ExecutorDriver, tid, reason string, state mesos.TaskState) {
	task, ok := k.tasks[tid]
	if !ok {
		log.V(1).Infof("Failed to remove task, unknown task %v\n", tid)
		return
	}
	delete(k.tasks, tid)
	k.resetSuicideWatch(driver)

	pid := task.podName
	if _, found := k.pods[pid]; !found {
		log.Warningf("Cannot remove unknown pod %v for task %v", pid, tid)
	} else {
		log.V(2).Infof("deleting pod %v for task %v", pid, tid)
		delete(k.pods, pid)

		// Send the pod updates to the channel.
		update := kubelet.PodUpdate{Op: kubelet.SET}
		for _, p := range k.pods {
			update.Pods = append(update.Pods, p)
		}
		k.updateChan <- update
	}
	// TODO(jdef): ensure that the update propagates, perhaps return a signal chan?
	k.sendStatus(driver, newStatus(mutil.NewTaskID(tid), state, reason))
}
func (suite *SchedulerTestSuite) TestSchdulerDriverLaunchTasks() {
	driver := newTestDriver(suite.T(), driverConfigMessenger(mock_scheduler.New(), suite.framework, suite.master, nil, mockedMessenger()))

	go func() {
		driver.Run()
	}()
	<-driver.Started()
	driver.SetConnected(true) // simulated
	suite.True(driver.Running())

	task := util.NewTaskInfo(
		"simple-task",
		util.NewTaskID("simpe-task-1"),
		util.NewSlaveID("slave-1"),
		[]*mesos.Resource{util.NewScalarResource("mem", 400)},
	)
	task.Command = util.NewCommandInfo("pwd")
	tasks := []*mesos.TaskInfo{task}

	stat, err := driver.LaunchTasks(
		[]*mesos.OfferID{{}},
		tasks,
		&mesos.Filters{},
	)
	suite.NoError(err)
	suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
}
Example #7
0
func (k *Executor) Init(driver bindings.ExecutorDriver) {
	defer close(k.initCompleted)

	k.killKubeletContainers()
	k.resetSuicideWatch(driver)

	k.watcher.addFilter(func(podEvent *PodEvent) bool {
		switch podEvent.eventType {
		case PodEventIncompatibleUpdate:
			log.Warningf("killing %s because of an incompatible update", podEvent.FormatShort())
			k.killPodTask(driver, podEvent.taskID)
			// halt processing of this event; when the pod is deleted we'll receive another
			// event for that.
			return false

		case PodEventDeleted:
			// an active pod-task was deleted, alert mesos:
			// send back a TASK_KILLED status, we completed the pod-task lifecycle normally.
			k.resetSuicideWatch(driver)
			k.sendStatus(driver, newStatus(mutil.NewTaskID(podEvent.taskID), mesos.TaskState_TASK_KILLED, "pod-deleted"))
		}
		return true
	})

	//TODO(jdef) monitor kubeletFinished and shutdown if it happens
}
Example #8
0
// deletes the pod and task associated with the task identified by tid and sends a task
// status update to mesos. also attempts to reset the suicide watch.
// Assumes that the caller is locking around pod and task state.
func (k *KubernetesExecutor) removePodTask(driver bindings.ExecutorDriver, tid, reason string, state mesos.TaskState) {
	task, ok := k.tasks[tid]
	if !ok {
		log.V(1).Infof("Failed to remove task, unknown task %v\n", tid)
		return
	}
	delete(k.tasks, tid)
	k.resetSuicideWatch(driver)

	pid := task.podName
	pod, found := k.pods[pid]
	if !found {
		log.Warningf("Cannot remove unknown pod %v for task %v", pid, tid)
	} else {
		log.V(2).Infof("deleting pod %v for task %v", pid, tid)
		delete(k.pods, pid)

		// tell the kubelet to remove the pod
		k.sendPodUpdate(&kubetypes.PodUpdate{
			Op:   kubetypes.REMOVE,
			Pods: []*api.Pod{pod},
		})
	}
	// TODO(jdef): ensure that the update propagates, perhaps return a signal chan?
	k.sendStatus(driver, newStatus(mutil.NewTaskID(tid), state, reason))
}
func (suite *SchedulerTestSuite) TestSchdulerDriverReconcileTasks() {
	messenger := messenger.NewMockedMessenger()
	messenger.On("Start").Return(nil)
	messenger.On("UPID").Return(&upid.UPID{})
	messenger.On("Send").Return(nil)
	messenger.On("Stop").Return(nil)
	messenger.On("Route").Return(nil)

	driver, err := newTestSchedulerDriver(NewMockScheduler(), suite.framework, suite.master, nil)
	driver.messenger = messenger
	suite.NoError(err)
	suite.True(driver.Stopped())

	driver.Start()
	driver.setConnected(true) // simulated
	suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status())

	stat, err := driver.ReconcileTasks(
		[]*mesos.TaskStatus{
			util.NewTaskStatus(util.NewTaskID("test-task-001"), mesos.TaskState_TASK_FINISHED),
		},
	)
	suite.NoError(err)
	suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
}
Example #10
0
func fakeStatusUpdate(taskId string, state mesos.TaskState) *mesos.TaskStatus {
	status := mesosutil.NewTaskStatus(mesosutil.NewTaskID(taskId), state)
	status.Data = []byte("{}") // empty json
	masterSource := mesos.TaskStatus_SOURCE_MASTER
	status.Source = &masterSource
	return status
}
func (s *Scheduler) stopTask(task Task) {
	if task.Data().State == TaskStateRunning || task.Data().State == TaskStateStaging {
		Logger.Infof("Stopping task %s", task.Data().TaskID)
		s.driver.KillTask(util.NewTaskID(task.Data().TaskID))
	}

	task.Data().State = TaskStateInactive
	task.Data().ResetTaskInfo()
}
Example #12
0
//test we can handle different status updates, TODO check state transitions
func TestStatus_Update(t *testing.T) {

	mockdriver := MockSchedulerDriver{}
	// setup expectations
	mockdriver.On("KillTask", util.NewTaskID("test-task-001")).Return(mesos.Status_DRIVER_RUNNING, nil)

	testFramework := &framework{
		offers: offers.CreateRegistry(offers.RegistryConfig{
			Compat: func(o *mesos.Offer) bool {
				return true
			},
			// remember expired offers so that we can tell if a previously scheduler offer relies on one
			LingerTTL:     schedcfg.DefaultOfferLingerTTL,
			TTL:           schedcfg.DefaultOfferTTL,
			ListenerDelay: schedcfg.DefaultListenerDelay,
		}),
		slaveHostNames: newSlaveRegistry(),
		driver:         &mockdriver,
		sched:          mockScheduler(),
	}

	taskStatus_task_starting := util.NewTaskStatus(
		util.NewTaskID("test-task-001"),
		mesos.TaskState_TASK_RUNNING,
	)
	testFramework.StatusUpdate(testFramework.driver, taskStatus_task_starting)

	taskStatus_task_running := util.NewTaskStatus(
		util.NewTaskID("test-task-001"),
		mesos.TaskState_TASK_RUNNING,
	)
	testFramework.StatusUpdate(testFramework.driver, taskStatus_task_running)

	taskStatus_task_failed := util.NewTaskStatus(
		util.NewTaskID("test-task-001"),
		mesos.TaskState_TASK_FAILED,
	)
	testFramework.StatusUpdate(testFramework.driver, taskStatus_task_failed)

	//assert that mock was invoked
	mockdriver.AssertExpectations(t)
}
func TestExecutorDriverRunTaskEvent(t *testing.T) {
	setTestEnv(t)
	ch := make(chan bool, 2)
	// Mock Slave process to respond to registration event.
	server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
		reqPath, err := url.QueryUnescape(req.URL.String())
		assert.NoError(t, err)
		log.Infoln("RCVD request", reqPath)
		rsp.WriteHeader(http.StatusAccepted)
	})

	defer server.Close()

	exec := newTestExecutor(t)
	exec.ch = ch
	exec.t = t

	// start
	driver := newIntegrationTestDriver(t, exec)
	stat, err := driver.Start()
	assert.NoError(t, err)
	assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat)
	driver.setConnected(true)
	defer driver.Stop()

	// send runtask event to driver
	pbMsg := &mesos.RunTaskMessage{
		FrameworkId: util.NewFrameworkID(frameworkID),
		Framework: util.NewFrameworkInfo(
			"test", "test-framework-001", util.NewFrameworkID(frameworkID),
		),
		Pid: proto.String(server.PID.String()),
		Task: util.NewTaskInfo(
			"test-task",
			util.NewTaskID("test-task-001"),
			util.NewSlaveID(slaveID),
			[]*mesos.Resource{
				util.NewScalarResource("mem", 112),
				util.NewScalarResource("cpus", 2),
			},
		),
	}

	c := testutil.NewMockMesosClient(t, server.PID)
	c.SendMessage(driver.self, pbMsg)

	select {
	case <-ch:
	case <-time.After(time.Second * 2):
		log.Errorf("Tired of waiting...")
	}

}
func (suite *SchedulerTestSuite) TestSchdulerDriverAcceptOffersWithError() {
	sched := mock_scheduler.New()
	sched.On("StatusUpdate").Return(nil)
	sched.On("Error").Return()

	msgr := mockedMessenger()
	driver := newTestDriver(suite.T(), driverConfigMessenger(sched, suite.framework, suite.master, nil, msgr))
	driver.OnDispatch(func(_ context.Context, _ *upid.UPID, _ proto.Message) error {
		return fmt.Errorf("Unable to send message")
	})

	go func() {
		driver.Run()
	}()
	<-driver.Started()
	driver.SetConnected(true) // simulated
	suite.True(driver.Running())

	// setup an offer
	offer := util.NewOffer(
		util.NewOfferID("test-offer-001"),
		suite.framework.Id,
		util.NewSlaveID("test-slave-001"),
		"test-slave(1)@localhost:5050",
	)

	pid, err := upid.Parse("test-slave(1)@localhost:5050")
	suite.NoError(err)
	driver.CacheOffer(offer, pid)

	// launch task
	task := util.NewTaskInfo(
		"simple-task",
		util.NewTaskID("simpe-task-1"),
		util.NewSlaveID("test-slave-001"),
		[]*mesos.Resource{util.NewScalarResourceWithReservation("mem", 400, "principal", "role")},
	)
	task.Command = util.NewCommandInfo("pwd")
	task.Executor = util.NewExecutorInfo(util.NewExecutorID("test-exec"), task.Command)
	tasks := []*mesos.TaskInfo{task}

	operations := []*mesos.Offer_Operation{util.NewLaunchOperation(tasks)}

	stat, err := driver.AcceptOffers(
		[]*mesos.OfferID{offer.Id},
		operations,
		&mesos.Filters{},
	)
	suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
	suite.Error(err)
}
func (suite *SchedulerTestSuite) TestSchdulerDriverKillTask() {
	driver := newTestDriver(suite.T(), driverConfigMessenger(mock_scheduler.New(), suite.framework, suite.master, nil, mockedMessenger()))

	go func() {
		driver.Run()
	}()
	<-driver.Started()
	driver.SetConnected(true) // simulated
	suite.True(driver.Running())

	stat, err := driver.KillTask(util.NewTaskID("test-task-1"))
	suite.NoError(err)
	suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
}
Example #16
0
// execute an explicit task reconciliation, as per http://mesos.apache.org/documentation/latest/reconciliation/
func (k *KubernetesScheduler) explicitlyReconcileTasks(driver bindings.SchedulerDriver, taskToSlave map[string]string, cancel <-chan struct{}) error {
	log.Info("explicit reconcile tasks")

	// tell mesos to send us the latest status updates for all the non-terminal tasks that we know about
	statusList := []*mesos.TaskStatus{}
	remaining := sets.KeySet(reflect.ValueOf(taskToSlave))
	for taskId, slaveId := range taskToSlave {
		if slaveId == "" {
			delete(taskToSlave, taskId)
			continue
		}
		statusList = append(statusList, &mesos.TaskStatus{
			TaskId:  mutil.NewTaskID(taskId),
			SlaveId: mutil.NewSlaveID(slaveId),
			State:   mesos.TaskState_TASK_RUNNING.Enum(), // req'd field, doesn't have to reflect reality
		})
	}

	select {
	case <-cancel:
		return reconciliationCancelledErr
	default:
		if _, err := driver.ReconcileTasks(statusList); err != nil {
			return err
		}
	}

	start := time.Now()
	first := true
	for backoff := 1 * time.Second; first || remaining.Len() > 0; backoff = backoff * 2 {
		first = false
		// nothing to do here other than wait for status updates..
		if backoff > k.schedcfg.ExplicitReconciliationMaxBackoff.Duration {
			backoff = k.schedcfg.ExplicitReconciliationMaxBackoff.Duration
		}
		select {
		case <-cancel:
			return reconciliationCancelledErr
		case <-time.After(backoff):
			for taskId := range remaining {
				if task, _ := k.taskRegistry.Get(taskId); task != nil && explicitTaskFilter(task) && task.UpdatedTime.Before(start) {
					// keep this task in remaining list
					continue
				}
				remaining.Delete(taskId)
			}
		}
	}
	return nil
}
func (suite *SchedulerTestSuite) TestSchdulerDriverReconcileTasks() {
	driver := newTestDriver(suite.T(), driverConfigMessenger(mock_scheduler.New(), suite.framework, suite.master, nil, mockedMessenger()))

	driver.Start()
	driver.SetConnected(true) // simulated
	suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status())

	stat, err := driver.ReconcileTasks(
		[]*mesos.TaskStatus{
			util.NewTaskStatus(util.NewTaskID("test-task-001"), mesos.TaskState_TASK_FINISHED),
		},
	)
	suite.NoError(err)
	suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
}
Example #18
0
func TestStatusUpdateAckRace_Issue103(t *testing.T) {
	driver, _, _ := createTestExecutorDriver(t)
	_, err := driver.Start()
	assert.NoError(t, err)

	msg := &mesosproto.StatusUpdateAcknowledgementMessage{}
	go driver.statusUpdateAcknowledgement(nil, msg)

	taskStatus := util.NewTaskStatus(
		util.NewTaskID("test-task-001"),
		mesosproto.TaskState_TASK_STAGING,
	)

	driver.SendStatusUpdate(taskStatus)
}
func (r *Reconciler) reconcile(driver scheduler.SchedulerDriver, implicit bool) {
	if time.Now().Sub(r.reconcileTime) >= r.ReconcileDelay {
		if !r.tasks.IsReconciling() {
			r.reconciles = 0
		}
		r.reconciles++
		r.reconcileTime = time.Now()

		if r.reconciles > r.ReconcileMaxTries {
			for _, task := range r.tasks.GetWithFilter(func(task Task) bool {
				return task.Data().State == TaskStateReconciling
			}) {
				if task.Data().TaskID != "" {
					Logger.Info("Reconciling exceeded %d tries for task %s, sending killTask for task %s", r.ReconcileMaxTries, task.Data().ID, task.Data().TaskID)
					driver.KillTask(util.NewTaskID(task.Data().TaskID))

					task.Data().Reset()
				}
			}
		} else {
			if implicit {
				driver.ReconcileTasks(nil)
			} else {
				statuses := make([]*mesos.TaskStatus, 0)
				for _, task := range r.tasks.GetAll() {
					if task.Data().TaskID != "" {
						task.Data().State = TaskStateReconciling
						Logger.Info("Reconciling %d/%d task state for id %s, task id %s", r.reconciles, r.ReconcileMaxTries, task.Data().ID, task.Data().TaskID)
						statuses = append(statuses, util.NewTaskStatus(util.NewTaskID(task.Data().TaskID), mesos.TaskState_TASK_STAGING))
					}
				}
				driver.ReconcileTasks(statuses)
			}
		}
	}
}
Example #20
0
func (sched *NoneScheduler) prepareTaskInfo(offer *mesos.Offer, c *Command) *mesos.TaskInfo {
	sched.tasksLaunched++

	task := &mesos.TaskInfo{
		Name:    proto.String("none-task-" + c.Id),
		TaskId:  util.NewTaskID(c.Id),
		SlaveId: offer.SlaveId,
		Command: c.GetCommandInfo(),
		// TODO: add role to resource allocation?
		Resources: c.GetResources(),
		Container: c.ContainerInfo,
	}
	log.Infof("Prepared task: %s with offer %s for launch\n", task.GetName(), offer.Id.GetValue())

	return task
}
Example #21
0
func (t *T) BuildTaskInfo() (*mesos.TaskInfo, error) {
	if t.Spec == nil {
		return nil, errors.New("no podtask.T.Spec given, cannot build task info")
	}

	info := &mesos.TaskInfo{
		Name:      proto.String(generateTaskName(&t.Pod)),
		TaskId:    mutil.NewTaskID(t.ID),
		Executor:  t.Spec.Executor,
		Data:      t.Spec.Data,
		Resources: t.Spec.Resources,
		SlaveId:   mutil.NewSlaveID(t.Spec.SlaveID),
	}

	return info, nil
}
Example #22
0
func (t *T) BuildTaskInfo() *mesos.TaskInfo {
	info := &mesos.TaskInfo{
		Name:     proto.String(generateTaskName(&t.Pod)),
		TaskId:   mutil.NewTaskID(t.ID),
		SlaveId:  mutil.NewSlaveID(t.Spec.SlaveID),
		Executor: t.executor,
		Data:     t.Spec.Data,
		Resources: []*mesos.Resource{
			mutil.NewScalarResource("cpus", float64(t.Spec.CPU)),
			mutil.NewScalarResource("mem", float64(t.Spec.Memory)),
		},
	}
	if portsResource := rangeResource("ports", t.Spec.Ports); portsResource != nil {
		info.Resources = append(info.Resources, portsResource)
	}
	return info
}
Example #23
0
func TestExecutorDriverSendStatusUpdateStaging(t *testing.T) {

	driver, _ := createTestExecutorDriver(t)
	stat, err := driver.Start()
	assert.NoError(t, err)
	assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat)
	driver.setConnected(true)

	taskStatus := util.NewTaskStatus(
		util.NewTaskID("test-task-001"),
		mesosproto.TaskState_TASK_STAGING,
	)

	stat, err = driver.SendStatusUpdate(taskStatus)
	assert.Error(t, err)
	assert.Equal(t, mesosproto.Status_DRIVER_ABORTED, stat)
}
Example #24
0
// KillJobTasks monitors all Jobs marked as STOPPED and kills all of their running Tasks
//
// KillJobTasks runs in a separate goroutine started in Worker.Start call
// It returns error if it can't read Job Store or the goroutine it is running in has been stopped
func (bw *BasicWorker) KillJobTasks(driver scheduler.SchedulerDriver) error {
	state := taurus.STOPPED
	errChan := make(chan error)
	ticker := time.NewTicker(StoreScanTick)
	go func() {
		var killErr error
	killer:
		for {
			select {
			case <-bw.done:
				ticker.Stop()
				killErr = nil
				log.Printf("Finishing %s Task queuer", state)
				break killer
			case <-ticker.C:
				jobs, err := bw.store.GetJobs(state)
				if err != nil {
					killErr = fmt.Errorf("Error reading %s Jobs: %s", state, err)
					break killer
				}
				for _, job := range jobs {
					ctx, cancel := context.WithTimeout(context.Background(), MasterTimeout)
					mesosTasks, err := taurus.MesosTasks(ctx, bw.master, job.Id, mesos.TaskState_TASK_RUNNING.Enum())
					if err != nil {
						log.Printf("Failed to read tasks for Job %s: %s", job.Id, err)
						cancel()
						continue
					}
					for taskId, _ := range mesosTasks {
						mesosTaskId := mesosutil.NewTaskID(taskId)
						killStatus, err := driver.KillTask(mesosTaskId)
						if err != nil {
							log.Printf("Mesos in state %s failed to kill the task %s: %s", killStatus, taskId, err)
							continue
						}
					}
				}
			}
		}
		errChan <- killErr
		log.Printf("%s tasks killer ticker stopped", state)
	}()

	return <-errChan
}
Example #25
0
func TestExecutorDriverSendStatusUpdate(t *testing.T) {

	driver, _, _ := createTestExecutorDriver(t)

	stat, err := driver.Start()
	assert.NoError(t, err)
	assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat)
	driver.connected = true
	driver.stopped = false

	taskStatus := util.NewTaskStatus(
		util.NewTaskID("test-task-001"),
		mesosproto.TaskState_TASK_RUNNING,
	)

	stat, err = driver.SendStatusUpdate(taskStatus)
	assert.NoError(t, err)
	assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat)
}
func (suite *SchedulerTestSuite) TestSchdulerDriverLaunchTasks() {
	messenger := messenger.NewMockedMessenger()
	messenger.On("Start").Return(nil)
	messenger.On("UPID").Return(&upid.UPID{})
	messenger.On("Send").Return(nil)
	messenger.On("Stop").Return(nil)
	messenger.On("Route").Return(nil)

	driver, err := newTestSchedulerDriver(NewMockScheduler(), suite.framework, suite.master, nil)
	driver.messenger = messenger
	suite.NoError(err)
	suite.True(driver.Stopped())

	go func() {
		driver.Run()
	}()
	time.Sleep(time.Millisecond * 1)
	driver.setConnected(true) // simulated
	suite.False(driver.Stopped())
	suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status())

	task := util.NewTaskInfo(
		"simple-task",
		util.NewTaskID("simpe-task-1"),
		util.NewSlaveID("slave-1"),
		[]*mesos.Resource{util.NewScalarResource("mem", 400)},
	)
	task.Command = util.NewCommandInfo("pwd")
	tasks := []*mesos.TaskInfo{task}

	stat, err := driver.LaunchTasks(
		[]*mesos.OfferID{&mesos.OfferID{}},
		tasks,
		&mesos.Filters{},
	)
	suite.NoError(err)
	suite.Equal(mesos.Status_DRIVER_RUNNING, stat)
}
Example #27
0
func TestExecutorDriverSendStatusUpdateStaging(t *testing.T) {

	driver, _, _ := createTestExecutorDriver(t)

	exec := NewMockedExecutor()
	exec.On("Error").Return(nil)
	driver.exec = exec

	stat, err := driver.Start()
	assert.NoError(t, err)
	assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat)
	driver.connected = true
	driver.stopped = false

	taskStatus := util.NewTaskStatus(
		util.NewTaskID("test-task-001"),
		mesosproto.TaskState_TASK_STAGING,
	)

	stat, err = driver.SendStatusUpdate(taskStatus)
	assert.Error(t, err)
	assert.Equal(t, mesosproto.Status_DRIVER_ABORTED, stat)
}
Example #28
0
func (t *T) BuildTaskInfo(prototype *mesos.ExecutorInfo) *mesos.TaskInfo {
	info := &mesos.TaskInfo{
		Name:     proto.String(generateTaskName(&t.Pod)),
		TaskId:   mutil.NewTaskID(t.ID),
		SlaveId:  mutil.NewSlaveID(t.Spec.SlaveID),
		Executor: proto.Clone(prototype).(*mesos.ExecutorInfo),
		Data:     t.Spec.Data,
		Resources: []*mesos.Resource{
			mutil.NewScalarResource("cpus", float64(t.Spec.CPU)),
			mutil.NewScalarResource("mem", float64(t.Spec.Memory)),
		},
	}

	if portsResource := rangeResource("ports", t.Spec.Ports); portsResource != nil {
		info.Resources = append(info.Resources, portsResource)
	}

	// hostname needs of the executor needs to match that of the offer, otherwise
	// the kubelet node status checker/updater is very unhappy
	setCommandArgument(info.Executor, "--hostname-override", t.Spec.AssignedSlave, true)

	return info
}
func TestExecutorDriverStatusUpdateAcknowledgement(t *testing.T) {
	setTestEnv(t)
	ch := make(chan bool, 2)
	// Mock Slave process to respond to registration event.
	server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
		reqPath, err := url.QueryUnescape(req.URL.String())
		assert.NoError(t, err)
		log.Infoln("RCVD request", reqPath)
		rsp.WriteHeader(http.StatusAccepted)
	})

	defer server.Close()

	exec := newTestExecutor(t)
	exec.ch = ch
	exec.t = t

	// start
	driver := newIntegrationTestDriver(t, exec)
	stat, err := driver.Start()
	assert.NoError(t, err)
	assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat)
	driver.setConnected(true)
	defer driver.Stop()

	// send ACK from server
	pbMsg := &mesos.StatusUpdateAcknowledgementMessage{
		SlaveId:     util.NewSlaveID(slaveID),
		FrameworkId: util.NewFrameworkID(frameworkID),
		TaskId:      util.NewTaskID("test-task-001"),
		Uuid:        []byte(uuid.NewRandom().String()),
	}

	c := testutil.NewMockMesosClient(t, server.PID)
	c.SendMessage(driver.self, pbMsg)
	<-time.After(time.Second * 1)
}
func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverStatusUpdatedEvent() {
	t := suite.T()
	var wg sync.WaitGroup
	wg.Add(2)
	suite.config = mockServerConfigurator(func(frameworkId *mesos.FrameworkID, suite *SchedulerIntegrationTestSuite) {
		defaultMockServerConfigurator(frameworkId, suite)
		suite.server.On("/master/mesos.internal.StatusUpdateAcknowledgementMessage").Do(func(rsp http.ResponseWriter, req *http.Request) {
			log.Infoln("Master cvd ACK")
			data, _ := ioutil.ReadAll(req.Body)
			defer req.Body.Close()
			assert.NotNil(t, data)
			wg.Done()
			log.Infof("MockMaster - Done with wait group")
		})
		suite.sched.wg = &wg
	})

	ok := suite.configureServerWithRegisteredFramework()
	suite.True(ok, "failed to establish running test server and driver")

	// Send a event to this SchedulerDriver (via http) to test handlers.
	pbMsg := &mesos.StatusUpdateMessage{
		Update: util.NewStatusUpdate(
			suite.registeredFrameworkId,
			util.NewTaskStatus(util.NewTaskID("test-task-001"), mesos.TaskState_TASK_STARTING),
			float64(time.Now().Unix()),
			[]byte("test-abcd-ef-3455-454-001"),
		),
		// note: cannot use driver's pid here if we want an ACK
		Pid: proto.String("test-slave-001(1)@foo.bar:1234"),
	}
	pbMsg.Update.SlaveId = &mesos.SlaveID{Value: proto.String("test-slave-001")}

	c := suite.newMockClient()
	c.SendMessage(suite.driver.UPID(), pbMsg)
	wg.Wait()
}