Example #1
0
// TestExecutorFrameworkMessage ensures that the executor is able to
// handle messages from the framework, specifically about lost tasks
// and Kamikaze.  When a task is lost, the executor needs to clean up
// its state.  When a Kamikaze message is received, the executor should
// attempt suicide.
func TestExecutorFrameworkMessage(t *testing.T) {
	// create fake apiserver
	podListWatch := NewMockPodsListWatch(api.PodList{})
	testApiServer := NewTestServer(t, api.NamespaceDefault, &podListWatch.list)
	defer testApiServer.server.Close()

	// create and start executor
	mockDriver := &MockExecutorDriver{}
	kubeletFinished := make(chan struct{})
	config := Config{
		Docker:  dockertools.ConnectToDockerOrDie("fake://"),
		Updates: make(chan interface{}, 1024),
		APIClient: client.NewOrDie(&client.Config{
			Host:    testApiServer.server.URL,
			Version: testapi.Default.Version(),
		}),
		Kubelet: &fakeKubelet{
			Kubelet: &kubelet.Kubelet{},
			hostIP:  net.IPv4(127, 0, 0, 1),
		},
		PodStatusFunc: func(kl KubeletInterface, pod *api.Pod) (*api.PodStatus, error) {
			return &api.PodStatus{
				ContainerStatuses: []api.ContainerStatus{
					{
						Name: "foo",
						State: api.ContainerState{
							Running: &api.ContainerStateRunning{},
						},
					},
				},
				Phase: api.PodRunning,
			}, nil
		},
		ShutdownAlert: func() {
			close(kubeletFinished)
		},
		KubeletFinished: kubeletFinished,
	}
	executor := New(config)

	executor.Init(mockDriver)
	executor.Registered(mockDriver, nil, nil, nil)

	executor.FrameworkMessage(mockDriver, "test framework message")

	// set up a pod to then lose
	pod := NewTestPod(1)
	podTask, _ := podtask.New(api.NewDefaultContext(), "foo",
		*pod, &mesosproto.ExecutorInfo{})

	taskInfo := podTask.BuildTaskInfo()
	data, _ := testapi.Default.Codec().Encode(pod)
	taskInfo.Data = data

	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_STARTING,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Once()

	called := make(chan struct{})
	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_RUNNING,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()

	executor.LaunchTask(mockDriver, taskInfo)

	// waiting until the pod is really running b/c otherwise a TASK_FAILED could be
	// triggered by the asynchronously running  _launchTask, __launchTask methods
	// when removing the task from k.tasks through the "task-lost:foo" message below.
	select {
	case <-called:
	case <-time.After(5 * time.Second):
		t.Fatalf("timed out waiting for SendStatusUpdate for the running task")
	}

	// send task-lost message for it
	called = make(chan struct{})
	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_LOST,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()

	executor.FrameworkMessage(mockDriver, "task-lost:foo")
	assertext.EventuallyTrue(t, 5*time.Second, func() bool {
		executor.lock.Lock()
		defer executor.lock.Unlock()
		return len(executor.tasks) == 0 && len(executor.pods) == 0
	}, "executor must be able to kill a created task and pod")

	select {
	case <-called:
	case <-time.After(5 * time.Second):
		t.Fatalf("timed out waiting for SendStatusUpdate")
	}

	mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once()

	executor.FrameworkMessage(mockDriver, messages.Kamikaze)
	assert.Equal(t, true, executor.isDone(),
		"executor should have shut down after receiving a Kamikaze message")

	mockDriver.AssertExpectations(t)
}
Example #2
0
// Test to create the scheduler plugin with the config returned by the scheduler,
// and play through the whole life cycle of the plugin while creating pods, deleting
// and failing them.
func TestPlugin_LifeCycle(t *testing.T) {
	t.Skip("This test is flaky, see #11901")
	assert := &EventAssertions{*assert.New(t)}

	// create a fake pod watch. We use that below to submit new pods to the scheduler
	podListWatch := NewMockPodsListWatch(api.PodList{})

	// create fake apiserver
	testApiServer := NewTestServer(t, api.NamespaceDefault, podListWatch)
	defer testApiServer.server.Close()

	// create executor with some data for static pods if set
	executor := util.NewExecutorInfo(
		util.NewExecutorID("executor-id"),
		util.NewCommandInfo("executor-cmd"),
	)
	executor.Data = []byte{0, 1, 2}

	// create scheduler
	nodeStore := cache.NewStore(cache.MetaNamespaceKeyFunc)
	as := NewAllocationStrategy(
		podtask.DefaultPredicate,
		podtask.NewDefaultProcurement(mresource.DefaultDefaultContainerCPULimit, mresource.DefaultDefaultContainerMemLimit))
	testScheduler := New(Config{
		Executor: executor,
		Client:   client.NewOrDie(&client.Config{Host: testApiServer.server.URL, Version: testapi.Default.Version()}),
		Scheduler: NewFCFSPodScheduler(as, func(node string) *api.Node {
			obj, _, _ := nodeStore.GetByKey(node)
			if obj == nil {
				return nil
			}
			return obj.(*api.Node)
		}),
		Schedcfg: *schedcfg.CreateDefaultConfig(),
	})

	assert.NotNil(testScheduler.client, "client is nil")
	assert.NotNil(testScheduler.executor, "executor is nil")
	assert.NotNil(testScheduler.offers, "offer registry is nil")

	// create scheduler process
	schedulerProcess := ha.New(testScheduler)

	// get plugin config from it
	c := testScheduler.NewPluginConfig(schedulerProcess.Terminal(), http.DefaultServeMux, &podListWatch.ListWatch)
	assert.NotNil(c)

	// make events observable
	eventObserver := NewEventObserver()
	c.Recorder = eventObserver

	// create plugin
	p := NewPlugin(c).(*schedulingPlugin)
	assert.NotNil(p)

	// run plugin
	p.Run(schedulerProcess.Terminal())
	defer schedulerProcess.End()

	// init scheduler
	err := testScheduler.Init(schedulerProcess.Master(), p, http.DefaultServeMux)
	assert.NoError(err)

	// create mock mesos scheduler driver
	mockDriver := &joinableDriver{}
	mockDriver.On("Start").Return(mesos.Status_DRIVER_RUNNING, nil).Once()
	started := mockDriver.Upon()

	mAny := mock.AnythingOfType
	mockDriver.On("ReconcileTasks", mAny("[]*mesosproto.TaskStatus")).Return(mesos.Status_DRIVER_RUNNING, nil)
	mockDriver.On("SendFrameworkMessage", mAny("*mesosproto.ExecutorID"), mAny("*mesosproto.SlaveID"), mAny("string")).
		Return(mesos.Status_DRIVER_RUNNING, nil)

	type LaunchedTask struct {
		offerId  mesos.OfferID
		taskInfo *mesos.TaskInfo
	}
	launchedTasks := make(chan LaunchedTask, 1)
	launchTasksCalledFunc := func(args mock.Arguments) {
		offerIDs := args.Get(0).([]*mesos.OfferID)
		taskInfos := args.Get(1).([]*mesos.TaskInfo)
		assert.Equal(1, len(offerIDs))
		assert.Equal(1, len(taskInfos))
		launchedTasks <- LaunchedTask{
			offerId:  *offerIDs[0],
			taskInfo: taskInfos[0],
		}
	}
	mockDriver.On("LaunchTasks", mAny("[]*mesosproto.OfferID"), mAny("[]*mesosproto.TaskInfo"), mAny("*mesosproto.Filters")).
		Return(mesos.Status_DRIVER_RUNNING, nil).Run(launchTasksCalledFunc)
	mockDriver.On("DeclineOffer", mAny("*mesosproto.OfferID"), mAny("*mesosproto.Filters")).
		Return(mesos.Status_DRIVER_RUNNING, nil)

	// elect master with mock driver
	driverFactory := ha.DriverFactory(func() (bindings.SchedulerDriver, error) {
		return mockDriver, nil
	})
	schedulerProcess.Elect(driverFactory)
	elected := schedulerProcess.Elected()

	// driver will be started
	<-started

	// tell scheduler to be registered
	testScheduler.Registered(
		mockDriver,
		util.NewFrameworkID("kubernetes-id"),
		util.NewMasterInfo("master-id", (192<<24)+(168<<16)+(0<<8)+1, 5050),
	)

	// wait for being elected
	<-elected

	//TODO(jdef) refactor things above here into a test suite setup of some sort

	// fake new, unscheduled pod
	pod, i := NewTestPod()
	podListWatch.Add(pod, true) // notify watchers

	// wait for failedScheduling event because there is no offer
	assert.EventWithReason(eventObserver, "failedScheduling", "failedScheduling event not received")

	// add some matching offer
	offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))}
	testScheduler.ResourceOffers(nil, offers)

	// and wait for scheduled pod
	assert.EventWithReason(eventObserver, "scheduled")
	select {
	case launchedTask := <-launchedTasks:
		// report back that the task has been staged, and then started by mesos
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_STAGING))
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_RUNNING))

		// check that ExecutorInfo.data has the static pod data
		assert.Len(launchedTask.taskInfo.Executor.Data, 3)

		// report back that the task has been lost
		mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 0)
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_LOST))

		// and wait that framework message is sent to executor
		mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 1)

	case <-time.After(5 * time.Second):
		t.Fatalf("timed out waiting for launchTasks call")
	}

	// Launch a pod and wait until the scheduler driver is called
	schedulePodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) {
		// wait for failedScheduling event because there is no offer
		assert.EventWithReason(eventObserver, "failedScheduling", "failedScheduling event not received")

		// supply a matching offer
		testScheduler.ResourceOffers(mockDriver, offers)

		// and wait to get scheduled
		assert.EventWithReason(eventObserver, "scheduled")

		// wait for driver.launchTasks call
		select {
		case launchedTask := <-launchedTasks:
			for _, offer := range offers {
				if offer.Id.GetValue() == launchedTask.offerId.GetValue() {
					return pod, &launchedTask, offer
				}
			}
			t.Fatalf("unknown offer used to start a pod")
			return nil, nil, nil
		case <-time.After(5 * time.Second):
			t.Fatal("timed out waiting for launchTasks")
			return nil, nil, nil
		}
	}
	// Launch a pod and wait until the scheduler driver is called
	launchPodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) {
		podListWatch.Add(pod, true)
		return schedulePodWithOffers(pod, offers)
	}

	// Launch a pod, wait until the scheduler driver is called and report back that it is running
	startPodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) {
		// notify about pod, offer resources and wait for scheduling
		pod, launchedTask, offer := launchPodWithOffers(pod, offers)
		if pod != nil {
			// report back status
			testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_STAGING))
			testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_RUNNING))
			return pod, launchedTask, offer
		}

		return nil, nil, nil
	}

	startTestPod := func() (*api.Pod, *LaunchedTask, *mesos.Offer) {
		pod, i := NewTestPod()
		offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))}
		return startPodWithOffers(pod, offers)
	}

	// start another pod
	pod, launchedTask, _ := startTestPod()

	// mock drvier.KillTask, should be invoked when a pod is deleted
	mockDriver.On("KillTask", mAny("*mesosproto.TaskID")).Return(mesos.Status_DRIVER_RUNNING, nil).Run(func(args mock.Arguments) {
		killedTaskId := *(args.Get(0).(*mesos.TaskID))
		assert.Equal(*launchedTask.taskInfo.TaskId, killedTaskId, "expected same TaskID as during launch")
	})
	killTaskCalled := mockDriver.Upon()

	// stop it again via the apiserver mock
	podListWatch.Delete(pod, true) // notify watchers

	// and wait for the driver killTask call with the correct TaskId
	select {
	case <-killTaskCalled:
		// report back that the task is finished
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_FINISHED))

	case <-time.After(5 * time.Second):
		t.Fatal("timed out waiting for KillTask")
	}

	// start a pod with on a given NodeName and check that it is scheduled to the right host
	pod, i = NewTestPod()
	pod.Spec.NodeName = "hostname1"
	offers = []*mesos.Offer{}
	for j := 0; j < 3; j++ {
		offer := NewTestOffer(fmt.Sprintf("offer%d_%d", i, j))
		hostname := fmt.Sprintf("hostname%d", j)
		offer.Hostname = &hostname
		offers = append(offers, offer)
	}

	_, _, usedOffer := startPodWithOffers(pod, offers)

	assert.Equal(offers[1].Id.GetValue(), usedOffer.Id.GetValue())
	assert.Equal(pod.Spec.NodeName, *usedOffer.Hostname)

	testScheduler.OfferRescinded(mockDriver, offers[0].Id)
	testScheduler.OfferRescinded(mockDriver, offers[2].Id)

	// start pods:
	// - which are failing while binding,
	// - leading to reconciliation
	// - with different states on the apiserver

	failPodFromExecutor := func(task *mesos.TaskInfo) {
		beforePodLookups := testApiServer.Stats(pod.Name)
		status := newTaskStatusForTask(task, mesos.TaskState_TASK_FAILED)
		message := messages.CreateBindingFailure
		status.Message = &message
		testScheduler.StatusUpdate(mockDriver, status)

		// wait until pod is looked up at the apiserver
		assertext.EventuallyTrue(t, time.Second, func() bool {
			return testApiServer.Stats(pod.Name) == beforePodLookups+1
		}, "expect that reconcileTask will access apiserver for pod %v", pod.Name)
	}

	launchTestPod := func() (*api.Pod, *LaunchedTask, *mesos.Offer) {
		pod, i := NewTestPod()
		offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))}
		return launchPodWithOffers(pod, offers)
	}

	// 1. with pod deleted from the apiserver
	//    expected: pod is removed from internal task registry
	pod, launchedTask, _ = launchTestPod()
	podListWatch.Delete(pod, false) // not notifying the watchers
	failPodFromExecutor(launchedTask.taskInfo)

	podKey, _ := podtask.MakePodKey(api.NewDefaultContext(), pod.Name)
	assertext.EventuallyTrue(t, time.Second, func() bool {
		t, _ := p.api.tasks().ForPod(podKey)
		return t == nil
	})

	// 2. with pod still on the apiserver, not bound
	//    expected: pod is rescheduled
	pod, launchedTask, _ = launchTestPod()
	failPodFromExecutor(launchedTask.taskInfo)

	retryOffers := []*mesos.Offer{NewTestOffer("retry-offer")}
	schedulePodWithOffers(pod, retryOffers)

	// 3. with pod still on the apiserver, bound, notified via ListWatch
	// expected: nothing, pod updates not supported, compare ReconcileTask function
	pod, launchedTask, usedOffer = startTestPod()
	pod.Annotations = map[string]string{
		meta.BindingHostKey: *usedOffer.Hostname,
	}
	pod.Spec.NodeName = *usedOffer.Hostname
	podListWatch.Modify(pod, true) // notifying the watchers
	time.Sleep(time.Second / 2)
	failPodFromExecutor(launchedTask.taskInfo)
}
Example #3
0
// TestExecutorLaunchAndKillTask ensures that the executor is able to launch
// and kill tasks while properly bookkeping its tasks.
func TestExecutorLaunchAndKillTask(t *testing.T) {
	// create a fake pod watch. We use that below to submit new pods to the scheduler
	podListWatch := NewMockPodsListWatch(api.PodList{})

	// create fake apiserver
	testApiServer := NewTestServer(t, api.NamespaceDefault, &podListWatch.list)
	defer testApiServer.server.Close()

	mockDriver := &MockExecutorDriver{}
	updates := make(chan interface{}, 1024)
	config := Config{
		Docker:  dockertools.ConnectToDockerOrDie("fake://"),
		Updates: updates,
		APIClient: client.NewOrDie(&client.Config{
			Host:    testApiServer.server.URL,
			Version: testapi.Default.Version(),
		}),
		Kubelet: &fakeKubelet{
			Kubelet: &kubelet.Kubelet{},
			hostIP:  net.IPv4(127, 0, 0, 1),
		},
		PodStatusFunc: func(kl KubeletInterface, pod *api.Pod) (*api.PodStatus, error) {
			return &api.PodStatus{
				ContainerStatuses: []api.ContainerStatus{
					{
						Name: "foo",
						State: api.ContainerState{
							Running: &api.ContainerStateRunning{},
						},
					},
				},
				Phase: api.PodRunning,
			}, nil
		},
	}
	executor := New(config)

	executor.Init(mockDriver)
	executor.Registered(mockDriver, nil, nil, nil)

	select {
	case <-updates:
	case <-time.After(time.Second):
		t.Fatalf("Executor should send an initial update on Registration")
	}

	pod := NewTestPod(1)
	podTask, err := podtask.New(api.NewDefaultContext(), "",
		*pod, &mesosproto.ExecutorInfo{})
	assert.Equal(t, nil, err, "must be able to create a task from a pod")

	taskInfo := podTask.BuildTaskInfo()
	data, err := testapi.Default.Codec().Encode(pod)
	assert.Equal(t, nil, err, "must be able to encode a pod's spec data")
	taskInfo.Data = data
	var statusUpdateCalls sync.WaitGroup
	statusUpdateDone := func(_ mock.Arguments) { statusUpdateCalls.Done() }

	statusUpdateCalls.Add(1)
	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_STARTING,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()

	statusUpdateCalls.Add(1)
	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_RUNNING,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()

	executor.LaunchTask(mockDriver, taskInfo)

	assertext.EventuallyTrue(t, 5*time.Second, func() bool {
		executor.lock.Lock()
		defer executor.lock.Unlock()
		return len(executor.tasks) == 1 && len(executor.pods) == 1
	}, "executor must be able to create a task and a pod")

	gotPodUpdate := false
	select {
	case m := <-updates:
		update, ok := m.(kubelet.PodUpdate)
		if ok && len(update.Pods) == 1 {
			gotPodUpdate = true
		}
	case <-time.After(time.Second):
	}
	assert.Equal(t, true, gotPodUpdate,
		"the executor should send an update about a new pod to "+
			"the updates chan when creating a new one.")

	// Allow some time for asynchronous requests to the driver.
	finished := kmruntime.After(statusUpdateCalls.Wait)
	select {
	case <-finished:
	case <-time.After(5 * time.Second):
		t.Fatalf("timed out waiting for status update calls to finish")
	}

	statusUpdateCalls.Add(1)
	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_KILLED,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()

	executor.KillTask(mockDriver, taskInfo.TaskId)

	assertext.EventuallyTrue(t, 5*time.Second, func() bool {
		executor.lock.Lock()
		defer executor.lock.Unlock()
		return len(executor.tasks) == 0 && len(executor.pods) == 0
	}, "executor must be able to kill a created task and pod")

	// Allow some time for asynchronous requests to the driver.
	finished = kmruntime.After(statusUpdateCalls.Wait)
	select {
	case <-finished:
	case <-time.After(5 * time.Second):
		t.Fatalf("timed out waiting for status update calls to finish")
	}
	mockDriver.AssertExpectations(t)
}
Example #4
0
func (srv *TestServer) WaitForNode(name string) {
	assertext.EventuallyTrue(srv.t, util.ForeverTestTimeout, func() bool {
		return srv.LookupNode(name) != nil
	})
}
Example #5
0
// TestScheduler_LifeCycle creates a scheduler plugin with the config returned by the scheduler,
// and plays through the whole life cycle of the plugin while creating pods, deleting
// and failing them.
func TestScheduler_LifeCycle(t *testing.T) {
	assert := &EventAssertions{*assert.New(t)}
	lt := newLifecycleTest(t)
	defer lt.Close()

	// run plugin
	launchedTasks := lt.Start()
	defer lt.End()

	// fake new, unscheduled pod
	pod, i := NewTestPod()
	lt.podsListWatch.Add(pod, true) // notify watchers

	// wait for failedScheduling event because there is no offer
	assert.EventWithReason(lt.eventObs, controller.FailedScheduling, "failedScheduling event not received")

	// add some matching offer
	offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))}
	lt.framework.ResourceOffers(nil, offers)

	// first offer is declined because node is not available yet
	lt.apiServer.WaitForNode("some_hostname")

	// add one more offer
	lt.framework.ResourceOffers(nil, offers)

	// and wait for scheduled pod
	assert.EventWithReason(lt.eventObs, controller.Scheduled)
	select {
	case launchedTask := <-launchedTasks:
		// report back that the task has been staged, and then started by mesos
		lt.framework.StatusUpdate(
			lt.driver,
			newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_STAGING),
		)

		lt.framework.StatusUpdate(
			lt.driver,
			newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_RUNNING),
		)

		// check that ExecutorInfo.data has the static pod data
		assert.Len(launchedTask.taskInfo.Executor.Data, 3)

		// report back that the task has been lost
		lt.driver.AssertNumberOfCalls(t, "SendFrameworkMessage", 0)

		lt.framework.StatusUpdate(
			lt.driver,
			newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_LOST),
		)

		// and wait that framework message is sent to executor
		lt.driver.AssertNumberOfCalls(t, "SendFrameworkMessage", 1)

	case <-time.After(util.ForeverTestTimeout):
		t.Fatalf("timed out waiting for launchTasks call")
	}

	offeredNodes := make(map[string]struct{})

	// Launch a pod and wait until the scheduler driver is called
	schedulePodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) {
		// wait for failedScheduling event because there is no offer
		assert.EventWithReason(lt.eventObs, controller.FailedScheduling, "failedScheduling event not received")

		// supply a matching offer
		lt.framework.ResourceOffers(lt.driver, offers)
		for _, offer := range offers {
			if _, ok := offeredNodes[offer.GetHostname()]; !ok {
				offeredNodes[offer.GetHostname()] = struct{}{}
				lt.apiServer.WaitForNode(offer.GetHostname())

				// reoffer since it must have been declined above
				lt.framework.ResourceOffers(lt.driver, []*mesos.Offer{offer})
			}
		}

		// and wait to get scheduled
		assert.EventWithReason(lt.eventObs, controller.Scheduled)

		// wait for driver.launchTasks call
		select {
		case launchedTask := <-launchedTasks:
			for _, offer := range offers {
				if offer.Id.GetValue() == launchedTask.offerId.GetValue() {
					return pod, &launchedTask, offer
				}
			}
			t.Fatalf("unknown offer used to start a pod")
			return nil, nil, nil
		case <-time.After(util.ForeverTestTimeout):
			t.Fatal("timed out waiting for launchTasks")
			return nil, nil, nil
		}
	}

	// Launch a pod and wait until the scheduler driver is called
	launchPodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) {
		lt.podsListWatch.Add(pod, true)
		return schedulePodWithOffers(pod, offers)
	}

	// Launch a pod, wait until the scheduler driver is called and report back that it is running
	startPodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) {
		// notify about pod, offer resources and wait for scheduling
		pod, launchedTask, offer := launchPodWithOffers(pod, offers)
		if pod != nil {
			// report back status
			lt.framework.StatusUpdate(
				lt.driver,
				newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_STAGING),
			)
			lt.framework.StatusUpdate(
				lt.driver,
				newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_RUNNING),
			)

			return pod, launchedTask, offer
		}

		return nil, nil, nil
	}

	startTestPod := func() (*api.Pod, *LaunchedTask, *mesos.Offer) {
		pod, i := NewTestPod()
		offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))}
		return startPodWithOffers(pod, offers)
	}

	// start another pod
	pod, launchedTask, _ := startTestPod()

	// mock driver.KillTask, should be invoked when a pod is deleted
	lt.driver.On("KillTask",
		mock.AnythingOfType("*mesosproto.TaskID"),
	).Return(mesos.Status_DRIVER_RUNNING, nil).Run(func(args mock.Arguments) {
		killedTaskId := *(args.Get(0).(*mesos.TaskID))
		assert.Equal(*launchedTask.taskInfo.TaskId, killedTaskId, "expected same TaskID as during launch")
	})
	killTaskCalled := lt.driver.Upon()

	// stop it again via the apiserver mock
	lt.podsListWatch.Delete(pod, true) // notify watchers

	// and wait for the driver killTask call with the correct TaskId
	select {
	case <-killTaskCalled:
		// report back that the task is finished
		lt.framework.StatusUpdate(
			lt.driver,
			newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_FINISHED),
		)

	case <-time.After(util.ForeverTestTimeout):
		t.Fatal("timed out waiting for KillTask")
	}

	// start a pod with on a given NodeName and check that it is scheduled to the right host
	pod, i = NewTestPod()
	pod.Spec.NodeName = "hostname1"
	offers = []*mesos.Offer{}
	for j := 0; j < 3; j++ {
		offer := NewTestOffer(fmt.Sprintf("offer%d_%d", i, j))
		hostname := fmt.Sprintf("hostname%d", j)
		offer.Hostname = &hostname
		offers = append(offers, offer)
	}

	_, _, usedOffer := startPodWithOffers(pod, offers)

	assert.Equal(offers[1].Id.GetValue(), usedOffer.Id.GetValue())
	assert.Equal(pod.Spec.NodeName, *usedOffer.Hostname)

	lt.framework.OfferRescinded(lt.driver, offers[0].Id)
	lt.framework.OfferRescinded(lt.driver, offers[2].Id)

	// start pods:
	// - which are failing while binding,
	// - leading to reconciliation
	// - with different states on the apiserver

	failPodFromExecutor := func(task *mesos.TaskInfo) {
		beforePodLookups := lt.apiServer.Stats(pod.Name)
		status := newTaskStatusForTask(task, mesos.TaskState_TASK_FAILED)
		message := messages.CreateBindingFailure
		status.Message = &message
		lt.framework.StatusUpdate(lt.driver, status)

		// wait until pod is looked up at the apiserver
		assertext.EventuallyTrue(t, util.ForeverTestTimeout, func() bool {
			return lt.apiServer.Stats(pod.Name) == beforePodLookups+1
		}, "expect that reconcileTask will access apiserver for pod %v", pod.Name)
	}

	launchTestPod := func() (*api.Pod, *LaunchedTask, *mesos.Offer) {
		pod, i := NewTestPod()
		offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))}
		return launchPodWithOffers(pod, offers)
	}

	// 1. with pod deleted from the apiserver
	//    expected: pod is removed from internal task registry
	pod, launchedTask, _ = launchTestPod()
	lt.podsListWatch.Delete(pod, false) // not notifying the watchers
	failPodFromExecutor(launchedTask.taskInfo)

	podKey, _ := podtask.MakePodKey(api.NewDefaultContext(), pod.Name)
	assertext.EventuallyTrue(t, util.ForeverTestTimeout, func() bool {
		t, _ := lt.sched.Tasks().ForPod(podKey)
		return t == nil
	})

	// 2. with pod still on the apiserver, not bound
	//    expected: pod is rescheduled
	pod, launchedTask, _ = launchTestPod()
	failPodFromExecutor(launchedTask.taskInfo)

	retryOffers := []*mesos.Offer{NewTestOffer("retry-offer")}
	schedulePodWithOffers(pod, retryOffers)

	// 3. with pod still on the apiserver, bound, notified via ListWatch
	// expected: nothing, pod updates not supported, compare ReconcileTask function
	pod, launchedTask, usedOffer = startTestPod()
	pod.Annotations = map[string]string{
		meta.BindingHostKey: *usedOffer.Hostname,
	}
	pod.Spec.NodeName = *usedOffer.Hostname
	lt.podsListWatch.Modify(pod, true) // notifying the watchers
	time.Sleep(time.Second / 2)
	failPodFromExecutor(launchedTask.taskInfo)
}
Example #6
0
// TestExecutorFrameworkMessage ensures that the executor is able to
// handle messages from the framework, specifically about lost tasks
// and Kamikaze.  When a task is lost, the executor needs to clean up
// its state.  When a Kamikaze message is received, the executor should
// attempt suicide.
func TestExecutorFrameworkMessage(t *testing.T) {
	// TODO(jdef): Fix the unexpected call in the mocking system.
	t.Skip("This test started failing when panic catching was disabled.")
	var (
		mockDriver      = &MockExecutorDriver{}
		kubeletFinished = make(chan struct{})
		registry        = newFakeRegistry()
		executor        = New(Config{
			Docker:    dockertools.ConnectToDockerOrDie("fake://", 0),
			NodeInfos: make(chan NodeInfo, 1),
			ShutdownAlert: func() {
				close(kubeletFinished)
			},
			KubeletFinished: kubeletFinished,
			Registry:        registry,
		})
		pod         = NewTestPod(1)
		mockKubeAPI = &mockKubeAPI{}
	)

	executor.kubeAPI = mockKubeAPI
	executor.Init(mockDriver)
	executor.Registered(mockDriver, nil, nil, nil)
	executor.FrameworkMessage(mockDriver, "test framework message")

	// set up a pod to then lose
	executorinfo := &mesosproto.ExecutorInfo{}
	podTask, _ := podtask.New(
		api.NewDefaultContext(),
		podtask.Config{
			ID:               "foo",
			Prototype:        executorinfo,
			HostPortStrategy: hostport.StrategyWildcard,
		},
		pod,
	)
	pod.Annotations = map[string]string{
		"k8s.mesosphere.io/taskId": podTask.ID,
	}
	podTask.Spec = &podtask.Spec{
		Executor: executorinfo,
	}

	taskInfo, err := podTask.BuildTaskInfo()
	assert.Equal(t, nil, err, "must be able to build task info")

	data, _ := runtime.Encode(testapi.Default.Codec(), pod)
	taskInfo.Data = data

	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_STARTING,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Once()

	called := make(chan struct{})
	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_RUNNING,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()

	executor.LaunchTask(mockDriver, taskInfo)

	// must wait for this otherwise phase changes may not apply
	assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool {
		executor.lock.Lock()
		defer executor.lock.Unlock()
		return !registry.empty()
	}, "executor must be able to create a task and a pod")

	err = registry.phaseChange(pod, api.PodPending)
	assert.NoError(t, err)
	err = registry.phaseChange(pod, api.PodRunning)
	assert.NoError(t, err)

	// waiting until the pod is really running b/c otherwise a TASK_FAILED could be
	// triggered by the asynchronously running executor methods when removing the task
	// from k.tasks through the "task-lost:foo" message below.
	select {
	case <-called:
	case <-time.After(wait.ForeverTestTimeout):
		t.Fatalf("timed out waiting for SendStatusUpdate for the running task")
	}

	// send task-lost message for it
	called = make(chan struct{})
	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_LOST,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once()

	// simulate what happens when the apiserver is told to delete a pod
	mockKubeAPI.On("killPod", pod.Namespace, pod.Name).Return(nil).Run(func(_ mock.Arguments) {
		registry.Remove(podTask.ID)
	})

	executor.FrameworkMessage(mockDriver, "task-lost:foo")

	assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool {
		executor.lock.Lock()
		defer executor.lock.Unlock()
		return registry.empty()
	}, "executor must be able to kill a created task and pod")

	select {
	case <-called:
	case <-time.After(wait.ForeverTestTimeout):
		t.Fatalf("timed out waiting for SendStatusUpdate")
	}

	mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once()

	executor.FrameworkMessage(mockDriver, messages.Kamikaze)
	assert.Equal(t, true, executor.isDone(),
		"executor should have shut down after receiving a Kamikaze message")

	mockDriver.AssertExpectations(t)
	mockKubeAPI.AssertExpectations(t)
}
Example #7
0
// TestExecutorLaunchAndKillTask ensures that the executor is able to launch tasks and generates
// appropriate status messages for mesos. It then kills the task and validates that appropriate
// actions are taken by the executor.
func TestExecutorLaunchAndKillTask(t *testing.T) {
	var (
		mockDriver = &MockExecutorDriver{}
		registry   = newFakeRegistry()
		executor   = New(Config{
			Docker:    dockertools.ConnectToDockerOrDie("fake://", 0),
			NodeInfos: make(chan NodeInfo, 1),
			Registry:  registry,
		})
		mockKubeAPI  = &mockKubeAPI{}
		pod          = NewTestPod(1)
		executorinfo = &mesosproto.ExecutorInfo{}
	)
	executor.kubeAPI = mockKubeAPI
	executor.Init(mockDriver)
	executor.Registered(mockDriver, nil, nil, nil)

	podTask, err := podtask.New(
		api.NewDefaultContext(),
		podtask.Config{
			Prototype:        executorinfo,
			HostPortStrategy: hostport.StrategyWildcard,
		},
		pod,
	)
	assert.Equal(t, nil, err, "must be able to create a task from a pod")

	pod.Annotations = map[string]string{
		"k8s.mesosphere.io/taskId": podTask.ID,
	}

	podTask.Spec = &podtask.Spec{Executor: executorinfo}
	taskInfo, err := podTask.BuildTaskInfo()
	assert.Equal(t, nil, err, "must be able to build task info")

	data, err := runtime.Encode(testapi.Default.Codec(), pod)
	assert.Equal(t, nil, err, "must be able to encode a pod's spec data")

	taskInfo.Data = data
	var statusUpdateCalls sync.WaitGroup
	statusUpdateCalls.Add(1)
	statusUpdateDone := func(_ mock.Arguments) { statusUpdateCalls.Done() }

	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_STARTING,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()

	statusUpdateCalls.Add(1)
	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_RUNNING,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()

	executor.LaunchTask(mockDriver, taskInfo)

	assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool {
		executor.lock.Lock()
		defer executor.lock.Unlock()
		return !registry.empty()
	}, "executor must be able to create a task and a pod")

	// simulate a pod source update; normally this update is generated when binding a pod
	err = registry.phaseChange(pod, api.PodPending)
	assert.NoError(t, err)

	// simulate a pod source update; normally this update is generated by the kubelet once the pod is healthy
	err = registry.phaseChange(pod, api.PodRunning)
	assert.NoError(t, err)

	// Allow some time for asynchronous requests to the driver.
	finished := kmruntime.After(statusUpdateCalls.Wait)
	select {
	case <-finished:
	case <-time.After(wait.ForeverTestTimeout):
		t.Fatalf("timed out waiting for status update calls to finish")
	}

	statusUpdateCalls.Add(1)
	mockDriver.On(
		"SendStatusUpdate",
		mesosproto.TaskState_TASK_KILLED,
	).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once()

	// simulate what happens when the apiserver is told to delete a pod
	mockKubeAPI.On("killPod", pod.Namespace, pod.Name).Return(nil).Run(func(_ mock.Arguments) {
		registry.Remove(podTask.ID)
	})

	executor.KillTask(mockDriver, taskInfo.TaskId)
	assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool {
		executor.lock.Lock()
		defer executor.lock.Unlock()
		return registry.empty()
	}, "executor must be able to kill a created task and pod")

	// Allow some time for asynchronous requests to the driver.
	finished = kmruntime.After(statusUpdateCalls.Wait)
	select {
	case <-finished:
	case <-time.After(wait.ForeverTestTimeout):
		t.Fatalf("timed out waiting for status update calls to finish")
	}

	mockDriver.AssertExpectations(t)
	mockKubeAPI.AssertExpectations(t)
}