// TestExecutorFrameworkMessage ensures that the executor is able to // handle messages from the framework, specifically about lost tasks // and Kamikaze. When a task is lost, the executor needs to clean up // its state. When a Kamikaze message is received, the executor should // attempt suicide. func TestExecutorFrameworkMessage(t *testing.T) { // create fake apiserver podListWatch := NewMockPodsListWatch(api.PodList{}) testApiServer := NewTestServer(t, api.NamespaceDefault, &podListWatch.list) defer testApiServer.server.Close() // create and start executor mockDriver := &MockExecutorDriver{} kubeletFinished := make(chan struct{}) config := Config{ Docker: dockertools.ConnectToDockerOrDie("fake://"), Updates: make(chan interface{}, 1024), APIClient: client.NewOrDie(&client.Config{ Host: testApiServer.server.URL, Version: testapi.Default.Version(), }), Kubelet: &fakeKubelet{ Kubelet: &kubelet.Kubelet{}, hostIP: net.IPv4(127, 0, 0, 1), }, PodStatusFunc: func(kl KubeletInterface, pod *api.Pod) (*api.PodStatus, error) { return &api.PodStatus{ ContainerStatuses: []api.ContainerStatus{ { Name: "foo", State: api.ContainerState{ Running: &api.ContainerStateRunning{}, }, }, }, Phase: api.PodRunning, }, nil }, ShutdownAlert: func() { close(kubeletFinished) }, KubeletFinished: kubeletFinished, } executor := New(config) executor.Init(mockDriver) executor.Registered(mockDriver, nil, nil, nil) executor.FrameworkMessage(mockDriver, "test framework message") // set up a pod to then lose pod := NewTestPod(1) podTask, _ := podtask.New(api.NewDefaultContext(), "foo", *pod, &mesosproto.ExecutorInfo{}) taskInfo := podTask.BuildTaskInfo() data, _ := testapi.Default.Codec().Encode(pod) taskInfo.Data = data mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_STARTING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Once() called := make(chan struct{}) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_RUNNING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once() executor.LaunchTask(mockDriver, taskInfo) // waiting until the pod is really running b/c otherwise a TASK_FAILED could be // triggered by the asynchronously running _launchTask, __launchTask methods // when removing the task from k.tasks through the "task-lost:foo" message below. select { case <-called: case <-time.After(5 * time.Second): t.Fatalf("timed out waiting for SendStatusUpdate for the running task") } // send task-lost message for it called = make(chan struct{}) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_LOST, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once() executor.FrameworkMessage(mockDriver, "task-lost:foo") assertext.EventuallyTrue(t, 5*time.Second, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return len(executor.tasks) == 0 && len(executor.pods) == 0 }, "executor must be able to kill a created task and pod") select { case <-called: case <-time.After(5 * time.Second): t.Fatalf("timed out waiting for SendStatusUpdate") } mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once() executor.FrameworkMessage(mockDriver, messages.Kamikaze) assert.Equal(t, true, executor.isDone(), "executor should have shut down after receiving a Kamikaze message") mockDriver.AssertExpectations(t) }
// Test to create the scheduler plugin with the config returned by the scheduler, // and play through the whole life cycle of the plugin while creating pods, deleting // and failing them. func TestPlugin_LifeCycle(t *testing.T) { t.Skip("This test is flaky, see #11901") assert := &EventAssertions{*assert.New(t)} // create a fake pod watch. We use that below to submit new pods to the scheduler podListWatch := NewMockPodsListWatch(api.PodList{}) // create fake apiserver testApiServer := NewTestServer(t, api.NamespaceDefault, podListWatch) defer testApiServer.server.Close() // create executor with some data for static pods if set executor := util.NewExecutorInfo( util.NewExecutorID("executor-id"), util.NewCommandInfo("executor-cmd"), ) executor.Data = []byte{0, 1, 2} // create scheduler nodeStore := cache.NewStore(cache.MetaNamespaceKeyFunc) as := NewAllocationStrategy( podtask.DefaultPredicate, podtask.NewDefaultProcurement(mresource.DefaultDefaultContainerCPULimit, mresource.DefaultDefaultContainerMemLimit)) testScheduler := New(Config{ Executor: executor, Client: client.NewOrDie(&client.Config{Host: testApiServer.server.URL, Version: testapi.Default.Version()}), Scheduler: NewFCFSPodScheduler(as, func(node string) *api.Node { obj, _, _ := nodeStore.GetByKey(node) if obj == nil { return nil } return obj.(*api.Node) }), Schedcfg: *schedcfg.CreateDefaultConfig(), }) assert.NotNil(testScheduler.client, "client is nil") assert.NotNil(testScheduler.executor, "executor is nil") assert.NotNil(testScheduler.offers, "offer registry is nil") // create scheduler process schedulerProcess := ha.New(testScheduler) // get plugin config from it c := testScheduler.NewPluginConfig(schedulerProcess.Terminal(), http.DefaultServeMux, &podListWatch.ListWatch) assert.NotNil(c) // make events observable eventObserver := NewEventObserver() c.Recorder = eventObserver // create plugin p := NewPlugin(c).(*schedulingPlugin) assert.NotNil(p) // run plugin p.Run(schedulerProcess.Terminal()) defer schedulerProcess.End() // init scheduler err := testScheduler.Init(schedulerProcess.Master(), p, http.DefaultServeMux) assert.NoError(err) // create mock mesos scheduler driver mockDriver := &joinableDriver{} mockDriver.On("Start").Return(mesos.Status_DRIVER_RUNNING, nil).Once() started := mockDriver.Upon() mAny := mock.AnythingOfType mockDriver.On("ReconcileTasks", mAny("[]*mesosproto.TaskStatus")).Return(mesos.Status_DRIVER_RUNNING, nil) mockDriver.On("SendFrameworkMessage", mAny("*mesosproto.ExecutorID"), mAny("*mesosproto.SlaveID"), mAny("string")). Return(mesos.Status_DRIVER_RUNNING, nil) type LaunchedTask struct { offerId mesos.OfferID taskInfo *mesos.TaskInfo } launchedTasks := make(chan LaunchedTask, 1) launchTasksCalledFunc := func(args mock.Arguments) { offerIDs := args.Get(0).([]*mesos.OfferID) taskInfos := args.Get(1).([]*mesos.TaskInfo) assert.Equal(1, len(offerIDs)) assert.Equal(1, len(taskInfos)) launchedTasks <- LaunchedTask{ offerId: *offerIDs[0], taskInfo: taskInfos[0], } } mockDriver.On("LaunchTasks", mAny("[]*mesosproto.OfferID"), mAny("[]*mesosproto.TaskInfo"), mAny("*mesosproto.Filters")). Return(mesos.Status_DRIVER_RUNNING, nil).Run(launchTasksCalledFunc) mockDriver.On("DeclineOffer", mAny("*mesosproto.OfferID"), mAny("*mesosproto.Filters")). Return(mesos.Status_DRIVER_RUNNING, nil) // elect master with mock driver driverFactory := ha.DriverFactory(func() (bindings.SchedulerDriver, error) { return mockDriver, nil }) schedulerProcess.Elect(driverFactory) elected := schedulerProcess.Elected() // driver will be started <-started // tell scheduler to be registered testScheduler.Registered( mockDriver, util.NewFrameworkID("kubernetes-id"), util.NewMasterInfo("master-id", (192<<24)+(168<<16)+(0<<8)+1, 5050), ) // wait for being elected <-elected //TODO(jdef) refactor things above here into a test suite setup of some sort // fake new, unscheduled pod pod, i := NewTestPod() podListWatch.Add(pod, true) // notify watchers // wait for failedScheduling event because there is no offer assert.EventWithReason(eventObserver, "failedScheduling", "failedScheduling event not received") // add some matching offer offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))} testScheduler.ResourceOffers(nil, offers) // and wait for scheduled pod assert.EventWithReason(eventObserver, "scheduled") select { case launchedTask := <-launchedTasks: // report back that the task has been staged, and then started by mesos testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_STAGING)) testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_RUNNING)) // check that ExecutorInfo.data has the static pod data assert.Len(launchedTask.taskInfo.Executor.Data, 3) // report back that the task has been lost mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 0) testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_LOST)) // and wait that framework message is sent to executor mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 1) case <-time.After(5 * time.Second): t.Fatalf("timed out waiting for launchTasks call") } // Launch a pod and wait until the scheduler driver is called schedulePodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) { // wait for failedScheduling event because there is no offer assert.EventWithReason(eventObserver, "failedScheduling", "failedScheduling event not received") // supply a matching offer testScheduler.ResourceOffers(mockDriver, offers) // and wait to get scheduled assert.EventWithReason(eventObserver, "scheduled") // wait for driver.launchTasks call select { case launchedTask := <-launchedTasks: for _, offer := range offers { if offer.Id.GetValue() == launchedTask.offerId.GetValue() { return pod, &launchedTask, offer } } t.Fatalf("unknown offer used to start a pod") return nil, nil, nil case <-time.After(5 * time.Second): t.Fatal("timed out waiting for launchTasks") return nil, nil, nil } } // Launch a pod and wait until the scheduler driver is called launchPodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) { podListWatch.Add(pod, true) return schedulePodWithOffers(pod, offers) } // Launch a pod, wait until the scheduler driver is called and report back that it is running startPodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) { // notify about pod, offer resources and wait for scheduling pod, launchedTask, offer := launchPodWithOffers(pod, offers) if pod != nil { // report back status testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_STAGING)) testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_RUNNING)) return pod, launchedTask, offer } return nil, nil, nil } startTestPod := func() (*api.Pod, *LaunchedTask, *mesos.Offer) { pod, i := NewTestPod() offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))} return startPodWithOffers(pod, offers) } // start another pod pod, launchedTask, _ := startTestPod() // mock drvier.KillTask, should be invoked when a pod is deleted mockDriver.On("KillTask", mAny("*mesosproto.TaskID")).Return(mesos.Status_DRIVER_RUNNING, nil).Run(func(args mock.Arguments) { killedTaskId := *(args.Get(0).(*mesos.TaskID)) assert.Equal(*launchedTask.taskInfo.TaskId, killedTaskId, "expected same TaskID as during launch") }) killTaskCalled := mockDriver.Upon() // stop it again via the apiserver mock podListWatch.Delete(pod, true) // notify watchers // and wait for the driver killTask call with the correct TaskId select { case <-killTaskCalled: // report back that the task is finished testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_FINISHED)) case <-time.After(5 * time.Second): t.Fatal("timed out waiting for KillTask") } // start a pod with on a given NodeName and check that it is scheduled to the right host pod, i = NewTestPod() pod.Spec.NodeName = "hostname1" offers = []*mesos.Offer{} for j := 0; j < 3; j++ { offer := NewTestOffer(fmt.Sprintf("offer%d_%d", i, j)) hostname := fmt.Sprintf("hostname%d", j) offer.Hostname = &hostname offers = append(offers, offer) } _, _, usedOffer := startPodWithOffers(pod, offers) assert.Equal(offers[1].Id.GetValue(), usedOffer.Id.GetValue()) assert.Equal(pod.Spec.NodeName, *usedOffer.Hostname) testScheduler.OfferRescinded(mockDriver, offers[0].Id) testScheduler.OfferRescinded(mockDriver, offers[2].Id) // start pods: // - which are failing while binding, // - leading to reconciliation // - with different states on the apiserver failPodFromExecutor := func(task *mesos.TaskInfo) { beforePodLookups := testApiServer.Stats(pod.Name) status := newTaskStatusForTask(task, mesos.TaskState_TASK_FAILED) message := messages.CreateBindingFailure status.Message = &message testScheduler.StatusUpdate(mockDriver, status) // wait until pod is looked up at the apiserver assertext.EventuallyTrue(t, time.Second, func() bool { return testApiServer.Stats(pod.Name) == beforePodLookups+1 }, "expect that reconcileTask will access apiserver for pod %v", pod.Name) } launchTestPod := func() (*api.Pod, *LaunchedTask, *mesos.Offer) { pod, i := NewTestPod() offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))} return launchPodWithOffers(pod, offers) } // 1. with pod deleted from the apiserver // expected: pod is removed from internal task registry pod, launchedTask, _ = launchTestPod() podListWatch.Delete(pod, false) // not notifying the watchers failPodFromExecutor(launchedTask.taskInfo) podKey, _ := podtask.MakePodKey(api.NewDefaultContext(), pod.Name) assertext.EventuallyTrue(t, time.Second, func() bool { t, _ := p.api.tasks().ForPod(podKey) return t == nil }) // 2. with pod still on the apiserver, not bound // expected: pod is rescheduled pod, launchedTask, _ = launchTestPod() failPodFromExecutor(launchedTask.taskInfo) retryOffers := []*mesos.Offer{NewTestOffer("retry-offer")} schedulePodWithOffers(pod, retryOffers) // 3. with pod still on the apiserver, bound, notified via ListWatch // expected: nothing, pod updates not supported, compare ReconcileTask function pod, launchedTask, usedOffer = startTestPod() pod.Annotations = map[string]string{ meta.BindingHostKey: *usedOffer.Hostname, } pod.Spec.NodeName = *usedOffer.Hostname podListWatch.Modify(pod, true) // notifying the watchers time.Sleep(time.Second / 2) failPodFromExecutor(launchedTask.taskInfo) }
// TestExecutorLaunchAndKillTask ensures that the executor is able to launch // and kill tasks while properly bookkeping its tasks. func TestExecutorLaunchAndKillTask(t *testing.T) { // create a fake pod watch. We use that below to submit new pods to the scheduler podListWatch := NewMockPodsListWatch(api.PodList{}) // create fake apiserver testApiServer := NewTestServer(t, api.NamespaceDefault, &podListWatch.list) defer testApiServer.server.Close() mockDriver := &MockExecutorDriver{} updates := make(chan interface{}, 1024) config := Config{ Docker: dockertools.ConnectToDockerOrDie("fake://"), Updates: updates, APIClient: client.NewOrDie(&client.Config{ Host: testApiServer.server.URL, Version: testapi.Default.Version(), }), Kubelet: &fakeKubelet{ Kubelet: &kubelet.Kubelet{}, hostIP: net.IPv4(127, 0, 0, 1), }, PodStatusFunc: func(kl KubeletInterface, pod *api.Pod) (*api.PodStatus, error) { return &api.PodStatus{ ContainerStatuses: []api.ContainerStatus{ { Name: "foo", State: api.ContainerState{ Running: &api.ContainerStateRunning{}, }, }, }, Phase: api.PodRunning, }, nil }, } executor := New(config) executor.Init(mockDriver) executor.Registered(mockDriver, nil, nil, nil) select { case <-updates: case <-time.After(time.Second): t.Fatalf("Executor should send an initial update on Registration") } pod := NewTestPod(1) podTask, err := podtask.New(api.NewDefaultContext(), "", *pod, &mesosproto.ExecutorInfo{}) assert.Equal(t, nil, err, "must be able to create a task from a pod") taskInfo := podTask.BuildTaskInfo() data, err := testapi.Default.Codec().Encode(pod) assert.Equal(t, nil, err, "must be able to encode a pod's spec data") taskInfo.Data = data var statusUpdateCalls sync.WaitGroup statusUpdateDone := func(_ mock.Arguments) { statusUpdateCalls.Done() } statusUpdateCalls.Add(1) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_STARTING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() statusUpdateCalls.Add(1) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_RUNNING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() executor.LaunchTask(mockDriver, taskInfo) assertext.EventuallyTrue(t, 5*time.Second, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return len(executor.tasks) == 1 && len(executor.pods) == 1 }, "executor must be able to create a task and a pod") gotPodUpdate := false select { case m := <-updates: update, ok := m.(kubelet.PodUpdate) if ok && len(update.Pods) == 1 { gotPodUpdate = true } case <-time.After(time.Second): } assert.Equal(t, true, gotPodUpdate, "the executor should send an update about a new pod to "+ "the updates chan when creating a new one.") // Allow some time for asynchronous requests to the driver. finished := kmruntime.After(statusUpdateCalls.Wait) select { case <-finished: case <-time.After(5 * time.Second): t.Fatalf("timed out waiting for status update calls to finish") } statusUpdateCalls.Add(1) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_KILLED, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() executor.KillTask(mockDriver, taskInfo.TaskId) assertext.EventuallyTrue(t, 5*time.Second, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return len(executor.tasks) == 0 && len(executor.pods) == 0 }, "executor must be able to kill a created task and pod") // Allow some time for asynchronous requests to the driver. finished = kmruntime.After(statusUpdateCalls.Wait) select { case <-finished: case <-time.After(5 * time.Second): t.Fatalf("timed out waiting for status update calls to finish") } mockDriver.AssertExpectations(t) }
func (srv *TestServer) WaitForNode(name string) { assertext.EventuallyTrue(srv.t, util.ForeverTestTimeout, func() bool { return srv.LookupNode(name) != nil }) }
// TestScheduler_LifeCycle creates a scheduler plugin with the config returned by the scheduler, // and plays through the whole life cycle of the plugin while creating pods, deleting // and failing them. func TestScheduler_LifeCycle(t *testing.T) { assert := &EventAssertions{*assert.New(t)} lt := newLifecycleTest(t) defer lt.Close() // run plugin launchedTasks := lt.Start() defer lt.End() // fake new, unscheduled pod pod, i := NewTestPod() lt.podsListWatch.Add(pod, true) // notify watchers // wait for failedScheduling event because there is no offer assert.EventWithReason(lt.eventObs, controller.FailedScheduling, "failedScheduling event not received") // add some matching offer offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))} lt.framework.ResourceOffers(nil, offers) // first offer is declined because node is not available yet lt.apiServer.WaitForNode("some_hostname") // add one more offer lt.framework.ResourceOffers(nil, offers) // and wait for scheduled pod assert.EventWithReason(lt.eventObs, controller.Scheduled) select { case launchedTask := <-launchedTasks: // report back that the task has been staged, and then started by mesos lt.framework.StatusUpdate( lt.driver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_STAGING), ) lt.framework.StatusUpdate( lt.driver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_RUNNING), ) // check that ExecutorInfo.data has the static pod data assert.Len(launchedTask.taskInfo.Executor.Data, 3) // report back that the task has been lost lt.driver.AssertNumberOfCalls(t, "SendFrameworkMessage", 0) lt.framework.StatusUpdate( lt.driver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_LOST), ) // and wait that framework message is sent to executor lt.driver.AssertNumberOfCalls(t, "SendFrameworkMessage", 1) case <-time.After(util.ForeverTestTimeout): t.Fatalf("timed out waiting for launchTasks call") } offeredNodes := make(map[string]struct{}) // Launch a pod and wait until the scheduler driver is called schedulePodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) { // wait for failedScheduling event because there is no offer assert.EventWithReason(lt.eventObs, controller.FailedScheduling, "failedScheduling event not received") // supply a matching offer lt.framework.ResourceOffers(lt.driver, offers) for _, offer := range offers { if _, ok := offeredNodes[offer.GetHostname()]; !ok { offeredNodes[offer.GetHostname()] = struct{}{} lt.apiServer.WaitForNode(offer.GetHostname()) // reoffer since it must have been declined above lt.framework.ResourceOffers(lt.driver, []*mesos.Offer{offer}) } } // and wait to get scheduled assert.EventWithReason(lt.eventObs, controller.Scheduled) // wait for driver.launchTasks call select { case launchedTask := <-launchedTasks: for _, offer := range offers { if offer.Id.GetValue() == launchedTask.offerId.GetValue() { return pod, &launchedTask, offer } } t.Fatalf("unknown offer used to start a pod") return nil, nil, nil case <-time.After(util.ForeverTestTimeout): t.Fatal("timed out waiting for launchTasks") return nil, nil, nil } } // Launch a pod and wait until the scheduler driver is called launchPodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) { lt.podsListWatch.Add(pod, true) return schedulePodWithOffers(pod, offers) } // Launch a pod, wait until the scheduler driver is called and report back that it is running startPodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) { // notify about pod, offer resources and wait for scheduling pod, launchedTask, offer := launchPodWithOffers(pod, offers) if pod != nil { // report back status lt.framework.StatusUpdate( lt.driver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_STAGING), ) lt.framework.StatusUpdate( lt.driver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_RUNNING), ) return pod, launchedTask, offer } return nil, nil, nil } startTestPod := func() (*api.Pod, *LaunchedTask, *mesos.Offer) { pod, i := NewTestPod() offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))} return startPodWithOffers(pod, offers) } // start another pod pod, launchedTask, _ := startTestPod() // mock driver.KillTask, should be invoked when a pod is deleted lt.driver.On("KillTask", mock.AnythingOfType("*mesosproto.TaskID"), ).Return(mesos.Status_DRIVER_RUNNING, nil).Run(func(args mock.Arguments) { killedTaskId := *(args.Get(0).(*mesos.TaskID)) assert.Equal(*launchedTask.taskInfo.TaskId, killedTaskId, "expected same TaskID as during launch") }) killTaskCalled := lt.driver.Upon() // stop it again via the apiserver mock lt.podsListWatch.Delete(pod, true) // notify watchers // and wait for the driver killTask call with the correct TaskId select { case <-killTaskCalled: // report back that the task is finished lt.framework.StatusUpdate( lt.driver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_FINISHED), ) case <-time.After(util.ForeverTestTimeout): t.Fatal("timed out waiting for KillTask") } // start a pod with on a given NodeName and check that it is scheduled to the right host pod, i = NewTestPod() pod.Spec.NodeName = "hostname1" offers = []*mesos.Offer{} for j := 0; j < 3; j++ { offer := NewTestOffer(fmt.Sprintf("offer%d_%d", i, j)) hostname := fmt.Sprintf("hostname%d", j) offer.Hostname = &hostname offers = append(offers, offer) } _, _, usedOffer := startPodWithOffers(pod, offers) assert.Equal(offers[1].Id.GetValue(), usedOffer.Id.GetValue()) assert.Equal(pod.Spec.NodeName, *usedOffer.Hostname) lt.framework.OfferRescinded(lt.driver, offers[0].Id) lt.framework.OfferRescinded(lt.driver, offers[2].Id) // start pods: // - which are failing while binding, // - leading to reconciliation // - with different states on the apiserver failPodFromExecutor := func(task *mesos.TaskInfo) { beforePodLookups := lt.apiServer.Stats(pod.Name) status := newTaskStatusForTask(task, mesos.TaskState_TASK_FAILED) message := messages.CreateBindingFailure status.Message = &message lt.framework.StatusUpdate(lt.driver, status) // wait until pod is looked up at the apiserver assertext.EventuallyTrue(t, util.ForeverTestTimeout, func() bool { return lt.apiServer.Stats(pod.Name) == beforePodLookups+1 }, "expect that reconcileTask will access apiserver for pod %v", pod.Name) } launchTestPod := func() (*api.Pod, *LaunchedTask, *mesos.Offer) { pod, i := NewTestPod() offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))} return launchPodWithOffers(pod, offers) } // 1. with pod deleted from the apiserver // expected: pod is removed from internal task registry pod, launchedTask, _ = launchTestPod() lt.podsListWatch.Delete(pod, false) // not notifying the watchers failPodFromExecutor(launchedTask.taskInfo) podKey, _ := podtask.MakePodKey(api.NewDefaultContext(), pod.Name) assertext.EventuallyTrue(t, util.ForeverTestTimeout, func() bool { t, _ := lt.sched.Tasks().ForPod(podKey) return t == nil }) // 2. with pod still on the apiserver, not bound // expected: pod is rescheduled pod, launchedTask, _ = launchTestPod() failPodFromExecutor(launchedTask.taskInfo) retryOffers := []*mesos.Offer{NewTestOffer("retry-offer")} schedulePodWithOffers(pod, retryOffers) // 3. with pod still on the apiserver, bound, notified via ListWatch // expected: nothing, pod updates not supported, compare ReconcileTask function pod, launchedTask, usedOffer = startTestPod() pod.Annotations = map[string]string{ meta.BindingHostKey: *usedOffer.Hostname, } pod.Spec.NodeName = *usedOffer.Hostname lt.podsListWatch.Modify(pod, true) // notifying the watchers time.Sleep(time.Second / 2) failPodFromExecutor(launchedTask.taskInfo) }
// TestExecutorFrameworkMessage ensures that the executor is able to // handle messages from the framework, specifically about lost tasks // and Kamikaze. When a task is lost, the executor needs to clean up // its state. When a Kamikaze message is received, the executor should // attempt suicide. func TestExecutorFrameworkMessage(t *testing.T) { // TODO(jdef): Fix the unexpected call in the mocking system. t.Skip("This test started failing when panic catching was disabled.") var ( mockDriver = &MockExecutorDriver{} kubeletFinished = make(chan struct{}) registry = newFakeRegistry() executor = New(Config{ Docker: dockertools.ConnectToDockerOrDie("fake://", 0), NodeInfos: make(chan NodeInfo, 1), ShutdownAlert: func() { close(kubeletFinished) }, KubeletFinished: kubeletFinished, Registry: registry, }) pod = NewTestPod(1) mockKubeAPI = &mockKubeAPI{} ) executor.kubeAPI = mockKubeAPI executor.Init(mockDriver) executor.Registered(mockDriver, nil, nil, nil) executor.FrameworkMessage(mockDriver, "test framework message") // set up a pod to then lose executorinfo := &mesosproto.ExecutorInfo{} podTask, _ := podtask.New( api.NewDefaultContext(), podtask.Config{ ID: "foo", Prototype: executorinfo, HostPortStrategy: hostport.StrategyWildcard, }, pod, ) pod.Annotations = map[string]string{ "k8s.mesosphere.io/taskId": podTask.ID, } podTask.Spec = &podtask.Spec{ Executor: executorinfo, } taskInfo, err := podTask.BuildTaskInfo() assert.Equal(t, nil, err, "must be able to build task info") data, _ := runtime.Encode(testapi.Default.Codec(), pod) taskInfo.Data = data mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_STARTING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Once() called := make(chan struct{}) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_RUNNING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once() executor.LaunchTask(mockDriver, taskInfo) // must wait for this otherwise phase changes may not apply assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return !registry.empty() }, "executor must be able to create a task and a pod") err = registry.phaseChange(pod, api.PodPending) assert.NoError(t, err) err = registry.phaseChange(pod, api.PodRunning) assert.NoError(t, err) // waiting until the pod is really running b/c otherwise a TASK_FAILED could be // triggered by the asynchronously running executor methods when removing the task // from k.tasks through the "task-lost:foo" message below. select { case <-called: case <-time.After(wait.ForeverTestTimeout): t.Fatalf("timed out waiting for SendStatusUpdate for the running task") } // send task-lost message for it called = make(chan struct{}) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_LOST, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once() // simulate what happens when the apiserver is told to delete a pod mockKubeAPI.On("killPod", pod.Namespace, pod.Name).Return(nil).Run(func(_ mock.Arguments) { registry.Remove(podTask.ID) }) executor.FrameworkMessage(mockDriver, "task-lost:foo") assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return registry.empty() }, "executor must be able to kill a created task and pod") select { case <-called: case <-time.After(wait.ForeverTestTimeout): t.Fatalf("timed out waiting for SendStatusUpdate") } mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once() executor.FrameworkMessage(mockDriver, messages.Kamikaze) assert.Equal(t, true, executor.isDone(), "executor should have shut down after receiving a Kamikaze message") mockDriver.AssertExpectations(t) mockKubeAPI.AssertExpectations(t) }
// TestExecutorLaunchAndKillTask ensures that the executor is able to launch tasks and generates // appropriate status messages for mesos. It then kills the task and validates that appropriate // actions are taken by the executor. func TestExecutorLaunchAndKillTask(t *testing.T) { var ( mockDriver = &MockExecutorDriver{} registry = newFakeRegistry() executor = New(Config{ Docker: dockertools.ConnectToDockerOrDie("fake://", 0), NodeInfos: make(chan NodeInfo, 1), Registry: registry, }) mockKubeAPI = &mockKubeAPI{} pod = NewTestPod(1) executorinfo = &mesosproto.ExecutorInfo{} ) executor.kubeAPI = mockKubeAPI executor.Init(mockDriver) executor.Registered(mockDriver, nil, nil, nil) podTask, err := podtask.New( api.NewDefaultContext(), podtask.Config{ Prototype: executorinfo, HostPortStrategy: hostport.StrategyWildcard, }, pod, ) assert.Equal(t, nil, err, "must be able to create a task from a pod") pod.Annotations = map[string]string{ "k8s.mesosphere.io/taskId": podTask.ID, } podTask.Spec = &podtask.Spec{Executor: executorinfo} taskInfo, err := podTask.BuildTaskInfo() assert.Equal(t, nil, err, "must be able to build task info") data, err := runtime.Encode(testapi.Default.Codec(), pod) assert.Equal(t, nil, err, "must be able to encode a pod's spec data") taskInfo.Data = data var statusUpdateCalls sync.WaitGroup statusUpdateCalls.Add(1) statusUpdateDone := func(_ mock.Arguments) { statusUpdateCalls.Done() } mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_STARTING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() statusUpdateCalls.Add(1) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_RUNNING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() executor.LaunchTask(mockDriver, taskInfo) assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return !registry.empty() }, "executor must be able to create a task and a pod") // simulate a pod source update; normally this update is generated when binding a pod err = registry.phaseChange(pod, api.PodPending) assert.NoError(t, err) // simulate a pod source update; normally this update is generated by the kubelet once the pod is healthy err = registry.phaseChange(pod, api.PodRunning) assert.NoError(t, err) // Allow some time for asynchronous requests to the driver. finished := kmruntime.After(statusUpdateCalls.Wait) select { case <-finished: case <-time.After(wait.ForeverTestTimeout): t.Fatalf("timed out waiting for status update calls to finish") } statusUpdateCalls.Add(1) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_KILLED, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() // simulate what happens when the apiserver is told to delete a pod mockKubeAPI.On("killPod", pod.Namespace, pod.Name).Return(nil).Run(func(_ mock.Arguments) { registry.Remove(podTask.ID) }) executor.KillTask(mockDriver, taskInfo.TaskId) assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return registry.empty() }, "executor must be able to kill a created task and pod") // Allow some time for asynchronous requests to the driver. finished = kmruntime.After(statusUpdateCalls.Wait) select { case <-finished: case <-time.After(wait.ForeverTestTimeout): t.Fatalf("timed out waiting for status update calls to finish") } mockDriver.AssertExpectations(t) mockKubeAPI.AssertExpectations(t) }