// TestExecutorLaunchAndKillTask ensures that the executor is able to launch
// and kill tasks while properly bookkeping its tasks.
func TestExecutorLaunchAndKillTask(t *testing.T) {
	// create a fake pod watch. We use that below to submit new pods to the scheduler
	podListWatch := NewMockPodsListWatch(api.PodList{})

	// create fake apiserver
	testApiServer := NewTestServer(t, api.NamespaceDefault, &podListWatch.list)
	defer testApiServer.server.Close()

	mockDriver := &MockExecutorDriver{}
	updates := make(chan interface{}, 1024)
	config := Config{
		Docker:  dockertools.ConnectToDockerOrDie("fake://"),
		Updates: updates,
		APIClient: client.NewOrDie(&client.Config{
			Host:    testApiServer.server.URL,
			Version: testapi.Version(),
		}),
		Kubelet: &fakeKubelet{
			Kubelet: &kubelet.Kubelet{},
			hostIP:  net.IPv4(127, 0, 0, 1),
		},
		PodStatusFunc: func(kl KubeletInterface, pod *api.Pod) (*api.PodStatus, error) {
			return &api.PodStatus{
				ContainerStatuses: []api.ContainerStatus{
					{
						Name: "foo",
						State: api.ContainerState{
							Running: &api.ContainerStateRunning{},
						},
					},
				},
				Phase: api.PodRunning,
			}, nil
		},
	}
	executor := New(config)

	executor.Init(mockDriver)
	executor.Registered(mockDriver, nil, nil, nil)

	select {
	case <-updates:
	case <-time.After(time.Second):
		t.Fatalf("Executor should send an intial update on Registration")
	}

	pod := NewTestPod(1)
	podTask, err := podtask.New(api.NewDefaultContext(), "",
		*pod, &mesosproto.ExecutorInfo{})
	assert.Equal(t, nil, err, "must be able to create a task from a pod")

	taskInfo := podTask.BuildTaskInfo()
	data, err := testapi.Codec().Encode(pod)
	assert.Equal(t, nil, err, "must be able to encode a pod's spec data")
	taskInfo.Data = data
	var statusUpdateCalls sync.WaitGroup
	statusUpdateDone := func(_ mock.Arguments) { statusUpdateCalls.Done() }

	statusUpdateCalls.Add(1)
	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_STARTING,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()

	statusUpdateCalls.Add(1)
	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_RUNNING,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()

	executor.LaunchTask(mockDriver, taskInfo)

	assertext.EventuallyTrue(t, 5*time.Second, func() bool {
		executor.lock.Lock()
		defer executor.lock.Unlock()
		return len(executor.tasks) == 1 && len(executor.pods) == 1
	}, "executor must be able to create a task and a pod")

	gotPodUpdate := false
	select {
	case m := <-updates:
		update, ok := m.(kubelet.PodUpdate)
		if ok && len(update.Pods) == 1 {
			gotPodUpdate = true
		}
	case <-time.After(time.Second):
	}
	assert.Equal(t, true, gotPodUpdate,
		"the executor should send an update about a new pod to "+
			"the updates chan when creating a new one.")

	// Allow some time for asynchronous requests to the driver.
	finished := kmruntime.After(statusUpdateCalls.Wait)
	select {
	case <-finished:
	case <-time.After(5 * time.Second):
		t.Fatalf("timed out waiting for status update calls to finish")
	}

	statusUpdateCalls.Add(1)
	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_KILLED,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()

	executor.KillTask(mockDriver, taskInfo.TaskId)

	assertext.EventuallyTrue(t, 5*time.Second, func() bool {
		executor.lock.Lock()
		defer executor.lock.Unlock()
		return len(executor.tasks) == 0 && len(executor.pods) == 0
	}, "executor must be able to kill a created task and pod")

	// Allow some time for asynchronous requests to the driver.
	finished = kmruntime.After(statusUpdateCalls.Wait)
	select {
	case <-finished:
	case <-time.After(5 * time.Second):
		t.Fatalf("timed out waiting for status update calls to finish")
	}
	mockDriver.AssertExpectations(t)
}
// TestExecutorFrameworkMessage ensures that the executor is able to
// handle messages from the framework, specifically about lost tasks
// and Kamikaze.  When a task is lost, the executor needs to clean up
// its state.  When a Kamikaze message is received, the executor should
// attempt suicide.
func TestExecutorFrameworkMessage(t *testing.T) {
	// create fake apiserver
	podListWatch := NewMockPodsListWatch(api.PodList{})
	testApiServer := NewTestServer(t, api.NamespaceDefault, &podListWatch.list)
	defer testApiServer.server.Close()

	// create and start executor
	mockDriver := &MockExecutorDriver{}
	kubeletFinished := make(chan struct{})
	config := Config{
		Docker:  dockertools.ConnectToDockerOrDie("fake://"),
		Updates: make(chan interface{}, 1024),
		APIClient: client.NewOrDie(&client.Config{
			Host:    testApiServer.server.URL,
			Version: testapi.Version(),
		}),
		Kubelet: &fakeKubelet{
			Kubelet: &kubelet.Kubelet{},
			hostIP:  net.IPv4(127, 0, 0, 1),
		},
		PodStatusFunc: func(kl KubeletInterface, pod *api.Pod) (*api.PodStatus, error) {
			return &api.PodStatus{
				ContainerStatuses: []api.ContainerStatus{
					{
						Name: "foo",
						State: api.ContainerState{
							Running: &api.ContainerStateRunning{},
						},
					},
				},
				Phase: api.PodRunning,
			}, nil
		},
		ShutdownAlert: func() {
			close(kubeletFinished)
		},
		KubeletFinished: kubeletFinished,
	}
	executor := New(config)

	executor.Init(mockDriver)
	executor.Registered(mockDriver, nil, nil, nil)

	executor.FrameworkMessage(mockDriver, "test framework message")

	// set up a pod to then lose
	pod := NewTestPod(1)
	podTask, _ := podtask.New(api.NewDefaultContext(), "foo",
		*pod, &mesosproto.ExecutorInfo{})

	taskInfo := podTask.BuildTaskInfo()
	data, _ := testapi.Codec().Encode(pod)
	taskInfo.Data = data

	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_STARTING,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Once()

	called := make(chan struct{})
	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_RUNNING,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()

	executor.LaunchTask(mockDriver, taskInfo)

	// waiting until the pod is really running b/c otherwise a TASK_FAILED could be
	// triggered by the asynchronously running  _launchTask, __launchTask methods
	// when removing the task from k.tasks through the "task-lost:foo" message below.
	select {
	case <-called:
	case <-time.After(5 * time.Second):
		t.Fatalf("timed out waiting for SendStatusUpdate for the running task")
	}

	// send task-lost message for it
	called = make(chan struct{})
	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_LOST,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()

	executor.FrameworkMessage(mockDriver, "task-lost:foo")
	assertext.EventuallyTrue(t, 5*time.Second, func() bool {
		executor.lock.Lock()
		defer executor.lock.Unlock()
		return len(executor.tasks) == 0 && len(executor.pods) == 0
	}, "executor must be able to kill a created task and pod")

	select {
	case <-called:
	case <-time.After(5 * time.Second):
		t.Fatalf("timed out waiting for SendStatusUpdate")
	}

	mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once()

	executor.FrameworkMessage(mockDriver, messages.Kamikaze)
	assert.Equal(t, true, executor.isDone(),
		"executor should have shut down after receiving a Kamikaze message")

	mockDriver.AssertExpectations(t)
}
Exemple #3
0
// Test to create the scheduler plugin with the config returned by the scheduler,
// and play through the whole life cycle of the plugin while creating pods, deleting
// and failing them.
func TestPlugin_LifeCycle(t *testing.T) {
	t.Skip("disabled due to flakiness; see #10795")
	assert := &EventAssertions{*assert.New(t)}

	// create a fake pod watch. We use that below to submit new pods to the scheduler
	podListWatch := NewMockPodsListWatch(api.PodList{})

	// create fake apiserver
	testApiServer := NewTestServer(t, api.NamespaceDefault, podListWatch)
	defer testApiServer.server.Close()

	// create executor with some data for static pods if set
	executor := util.NewExecutorInfo(
		util.NewExecutorID("executor-id"),
		util.NewCommandInfo("executor-cmd"),
	)
	executor.Data = []byte{0, 1, 2}

	// create scheduler
	testScheduler := New(Config{
		Executor:     executor,
		Client:       client.NewOrDie(&client.Config{Host: testApiServer.server.URL, Version: testapi.Version()}),
		ScheduleFunc: FCFSScheduleFunc,
		Schedcfg:     *schedcfg.CreateDefaultConfig(),
	})

	assert.NotNil(testScheduler.client, "client is nil")
	assert.NotNil(testScheduler.executor, "executor is nil")
	assert.NotNil(testScheduler.offers, "offer registry is nil")

	// create scheduler process
	schedulerProcess := ha.New(testScheduler)

	// get plugin config from it
	c := testScheduler.NewPluginConfig(schedulerProcess.Terminal(), http.DefaultServeMux, &podListWatch.ListWatch)
	assert.NotNil(c)

	// make events observable
	eventObserver := NewEventObserver()
	c.Recorder = eventObserver

	// create plugin
	p := NewPlugin(c)
	assert.NotNil(p)

	// run plugin
	p.Run(schedulerProcess.Terminal())
	defer schedulerProcess.End()

	// init scheduler
	err := testScheduler.Init(schedulerProcess.Master(), p, http.DefaultServeMux)
	assert.NoError(err)

	// create mock mesos scheduler driver
	mockDriver := &joinableDriver{}
	mockDriver.On("Start").Return(mesos.Status_DRIVER_RUNNING, nil).Once()
	started := mockDriver.Upon()

	mAny := mock.AnythingOfType
	mockDriver.On("ReconcileTasks", mAny("[]*mesosproto.TaskStatus")).Return(mesos.Status_DRIVER_RUNNING, nil)
	mockDriver.On("SendFrameworkMessage", mAny("*mesosproto.ExecutorID"), mAny("*mesosproto.SlaveID"), mAny("string")).
		Return(mesos.Status_DRIVER_RUNNING, nil)

	launchedTasks := make(chan *mesos.TaskInfo, 1)
	launchTasksCalledFunc := func(args mock.Arguments) {
		taskInfos := args.Get(1).([]*mesos.TaskInfo)
		assert.Equal(1, len(taskInfos))
		launchedTasks <- taskInfos[0]
	}
	mockDriver.On("LaunchTasks", mAny("[]*mesosproto.OfferID"), mAny("[]*mesosproto.TaskInfo"), mAny("*mesosproto.Filters")).
		Return(mesos.Status_DRIVER_RUNNING, nil).Run(launchTasksCalledFunc)

	// elect master with mock driver
	driverFactory := ha.DriverFactory(func() (bindings.SchedulerDriver, error) {
		return mockDriver, nil
	})
	schedulerProcess.Elect(driverFactory)
	elected := schedulerProcess.Elected()

	// driver will be started
	<-started

	// tell scheduler to be registered
	testScheduler.Registered(
		mockDriver,
		util.NewFrameworkID("kubernetes-id"),
		util.NewMasterInfo("master-id", (192<<24)+(168<<16)+(0<<8)+1, 5050),
	)

	// wait for being elected
	<-elected

	//TODO(jdef) refactor things above here into a test suite setup of some sort

	// fake new, unscheduled pod
	pod1 := NewTestPod(1)
	podListWatch.Add(pod1, true) // notify watchers

	// wait for failedScheduling event because there is no offer
	assert.EventWithReason(eventObserver, "failedScheduling", "failedScheduling event not received")

	// add some matching offer
	offers1 := []*mesos.Offer{NewTestOffer(1)}
	testScheduler.ResourceOffers(nil, offers1)

	// and wait for scheduled pod
	assert.EventWithReason(eventObserver, "scheduled")
	select {
	case launchedTask := <-launchedTasks:
		// report back that the task has been staged, and then started by mesos
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_STAGING))
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_RUNNING))

		// check that ExecutorInfo.data has the static pod data
		assert.Len(launchedTask.Executor.Data, 3)

		// report back that the task has been lost
		mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 0)
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_LOST))

		// and wait that framework message is sent to executor
		mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 1)

	case <-time.After(5 * time.Second):
		t.Fatalf("timed out waiting for launchTasks call")
	}

	// start another pod
	podNum := 1
	startPod := func(offers []*mesos.Offer) (*api.Pod, *mesos.TaskInfo) {
		podNum = podNum + 1

		// create pod and matching offer
		pod := NewTestPod(podNum)
		podListWatch.Add(pod, true) // notify watchers
		testScheduler.ResourceOffers(mockDriver, offers)
		assert.EventWithReason(eventObserver, "scheduled")

		// wait for driver.launchTasks call
		select {
		case launchedTask := <-launchedTasks:
			testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_STAGING))
			testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_RUNNING))
			return pod, launchedTask

		case <-time.After(5 * time.Second):
			t.Fatal("timed out waiting for launchTasks")
			return nil, nil
		}
	}

	pod, launchedTask := startPod(offers1)

	// mock drvier.KillTask, should be invoked when a pod is deleted
	mockDriver.On("KillTask", mAny("*mesosproto.TaskID")).Return(mesos.Status_DRIVER_RUNNING, nil).Run(func(args mock.Arguments) {
		killedTaskId := *(args.Get(0).(*mesos.TaskID))
		assert.Equal(*launchedTask.TaskId, killedTaskId, "expected same TaskID as during launch")
	})
	killTaskCalled := mockDriver.Upon()

	// stop it again via the apiserver mock
	podListWatch.Delete(pod, true) // notify watchers

	// and wait for the driver killTask call with the correct TaskId
	select {
	case <-killTaskCalled:
		// report back that the task is finished
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_FINISHED))

	case <-time.After(5 * time.Second):
		t.Fatal("timed out waiting for KillTask")
	}

	// start pods:
	// - which are failing while binding,
	// - leading to reconciliation
	// - with different states on the apiserver

	failPodFromExecutor := func(task *mesos.TaskInfo) {
		beforePodLookups := testApiServer.Stats(pod.Name)
		status := newTaskStatusForTask(task, mesos.TaskState_TASK_FAILED)
		message := messages.CreateBindingFailure
		status.Message = &message
		testScheduler.StatusUpdate(mockDriver, status)

		// wait until pod is looked up at the apiserver
		assertext.EventuallyTrue(t, time.Second, func() bool {
			return testApiServer.Stats(pod.Name) == beforePodLookups+1
		}, "expect that reconcilePod will access apiserver for pod %v", pod.Name)
	}

	// 1. with pod deleted from the apiserver
	pod, launchedTask = startPod(offers1)
	podListWatch.Delete(pod, false) // not notifying the watchers
	failPodFromExecutor(launchedTask)

	// 2. with pod still on the apiserver, not bound
	pod, launchedTask = startPod(offers1)
	failPodFromExecutor(launchedTask)

	// 3. with pod still on the apiserver, bound i.e. host!=""
	pod, launchedTask = startPod(offers1)
	pod.Spec.NodeName = *offers1[0].Hostname
	podListWatch.Modify(pod, false) // not notifying the watchers
	failPodFromExecutor(launchedTask)

	// 4. with pod still on the apiserver, bound i.e. host!="", notified via ListWatch
	pod, launchedTask = startPod(offers1)
	pod.Spec.NodeName = *offers1[0].Hostname
	podListWatch.Modify(pod, true) // notifying the watchers
	time.Sleep(time.Second / 2)
	failPodFromExecutor(launchedTask)
}