func TestDeleteOne_Running(t *testing.T) { assert := assert.New(t) obj := &types.MockScheduler{} reg := podtask.NewInMemoryRegistry() obj.On("Tasks").Return(reg) pod := &queuer.Pod{Pod: &api.Pod{ ObjectMeta: api.ObjectMeta{ Name: "foo", UID: "foo0", Namespace: api.NamespaceDefault, }}} task, err := podtask.New( api.NewDefaultContext(), podtask.Config{ ID: "bar", Prototype: &mesosproto.ExecutorInfo{}, HostPortStrategy: hostport.StrategyWildcard, }, pod.Pod, ) if err != nil { t.Fatalf("unexpected error: %v", err) } task, err = reg.Register(task) if err != nil { t.Fatalf("unexpected error: %v", err) } task.Set(podtask.Launched) err = reg.Update(task) if err != nil { t.Fatalf("unexpected error: %v", err) } // preconditions q := queue.NewDelayFIFO() qr := queuer.New(q, nil) q.Add(pod, queue.ReplaceExisting) assert.Equal(1, len(q.List())) _, found := q.Get("default/foo") assert.True(found) obj.On("KillTask", task.ID).Return(nil) // exec & post conditions d := New(obj, qr) err = d.DeleteOne(pod) assert.Nil(err) _, found = q.Get("foo0") assert.False(found) assert.Equal(0, len(q.List())) obj.AssertExpectations(t) }
func TestDeleteOne_Running(t *testing.T) { assert := assert.New(t) obj := &MockScheduler{} reg := podtask.NewInMemoryRegistry() obj.On("tasks").Return(reg) pod := &Pod{Pod: &api.Pod{ ObjectMeta: api.ObjectMeta{ Name: "foo", UID: "foo0", Namespace: api.NamespaceDefault, }}} task, err := podtask.New(api.NewDefaultContext(), "bar", *pod.Pod, &mesos.ExecutorInfo{}) if err != nil { t.Fatalf("unexpected error: %v", err) } task, err = reg.Register(task) if err != nil { t.Fatalf("unexpected error: %v", err) } task.Set(podtask.Launched) err = reg.Update(task) if err != nil { t.Fatalf("unexpected error: %v", err) } // preconditions qr := newQueuer(nil) qr.podQueue.Add(pod, queue.ReplaceExisting) assert.Equal(1, len(qr.podQueue.List())) _, found := qr.podQueue.Get("default/foo") assert.True(found) obj.On("killTask", task.ID).Return(nil) // exec & post conditions d := &deleter{ api: obj, qr: qr, } err = d.deleteOne(pod) assert.Nil(err) _, found = qr.podQueue.Get("foo0") assert.False(found) assert.Equal(0, len(qr.podQueue.List())) obj.AssertExpectations(t) }
func TestDeleteOne_PendingPod(t *testing.T) { assert := assert.New(t) obj := &types.MockScheduler{} reg := podtask.NewInMemoryRegistry() obj.On("Tasks").Return(reg) pod := &queuer.Pod{Pod: &api.Pod{ ObjectMeta: api.ObjectMeta{ Name: "foo", UID: "foo0", Namespace: api.NamespaceDefault, }}} task, err := podtask.New( api.NewDefaultContext(), "bar", pod.Pod, &mesosproto.ExecutorInfo{}, nil, nil, ) if err != nil { t.Fatalf("failed to create task: %v", err) } _, err = reg.Register(task) if err != nil { t.Fatalf("failed to register task: %v", err) } // preconditions q := queue.NewDelayFIFO() qr := queuer.New(q, nil) q.Add(pod, queue.ReplaceExisting) assert.Equal(1, len(q.List())) _, found := q.Get("default/foo") assert.True(found) // exec & post conditions d := New(obj, qr) err = d.DeleteOne(pod) assert.Nil(err) _, found = q.Get("foo0") assert.False(found) assert.Equal(0, len(q.List())) obj.AssertExpectations(t) }
// TestExecutorFrameworkMessage ensures that the executor is able to // handle messages from the framework, specifically about lost tasks // and Kamikaze. When a task is lost, the executor needs to clean up // its state. When a Kamikaze message is received, the executor should // attempt suicide. func TestExecutorFrameworkMessage(t *testing.T) { // create fake apiserver podListWatch := NewMockPodsListWatch(api.PodList{}) testApiServer := NewTestServer(t, api.NamespaceDefault, &podListWatch.list) defer testApiServer.server.Close() // create and start executor mockDriver := &MockExecutorDriver{} kubeletFinished := make(chan struct{}) config := Config{ Docker: dockertools.ConnectToDockerOrDie("fake://"), Updates: make(chan interface{}, 1024), APIClient: client.NewOrDie(&client.Config{ Host: testApiServer.server.URL, Version: testapi.Default.Version(), }), Kubelet: &fakeKubelet{ Kubelet: &kubelet.Kubelet{}, hostIP: net.IPv4(127, 0, 0, 1), }, PodStatusFunc: func(kl KubeletInterface, pod *api.Pod) (*api.PodStatus, error) { return &api.PodStatus{ ContainerStatuses: []api.ContainerStatus{ { Name: "foo", State: api.ContainerState{ Running: &api.ContainerStateRunning{}, }, }, }, Phase: api.PodRunning, }, nil }, ShutdownAlert: func() { close(kubeletFinished) }, KubeletFinished: kubeletFinished, } executor := New(config) executor.Init(mockDriver) executor.Registered(mockDriver, nil, nil, nil) executor.FrameworkMessage(mockDriver, "test framework message") // set up a pod to then lose pod := NewTestPod(1) podTask, _ := podtask.New(api.NewDefaultContext(), "foo", *pod, &mesosproto.ExecutorInfo{}) taskInfo := podTask.BuildTaskInfo() data, _ := testapi.Default.Codec().Encode(pod) taskInfo.Data = data mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_STARTING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Once() called := make(chan struct{}) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_RUNNING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once() executor.LaunchTask(mockDriver, taskInfo) // waiting until the pod is really running b/c otherwise a TASK_FAILED could be // triggered by the asynchronously running _launchTask, __launchTask methods // when removing the task from k.tasks through the "task-lost:foo" message below. select { case <-called: case <-time.After(5 * time.Second): t.Fatalf("timed out waiting for SendStatusUpdate for the running task") } // send task-lost message for it called = make(chan struct{}) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_LOST, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once() executor.FrameworkMessage(mockDriver, "task-lost:foo") assertext.EventuallyTrue(t, 5*time.Second, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return len(executor.tasks) == 0 && len(executor.pods) == 0 }, "executor must be able to kill a created task and pod") select { case <-called: case <-time.After(5 * time.Second): t.Fatalf("timed out waiting for SendStatusUpdate") } mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once() executor.FrameworkMessage(mockDriver, messages.Kamikaze) assert.Equal(t, true, executor.isDone(), "executor should have shut down after receiving a Kamikaze message") mockDriver.AssertExpectations(t) }
// TestExecutorLaunchAndKillTask ensures that the executor is able to launch // and kill tasks while properly bookkeping its tasks. func TestExecutorLaunchAndKillTask(t *testing.T) { // create a fake pod watch. We use that below to submit new pods to the scheduler podListWatch := NewMockPodsListWatch(api.PodList{}) // create fake apiserver testApiServer := NewTestServer(t, api.NamespaceDefault, &podListWatch.list) defer testApiServer.server.Close() mockDriver := &MockExecutorDriver{} updates := make(chan interface{}, 1024) config := Config{ Docker: dockertools.ConnectToDockerOrDie("fake://"), Updates: updates, APIClient: client.NewOrDie(&client.Config{ Host: testApiServer.server.URL, Version: testapi.Default.Version(), }), Kubelet: &fakeKubelet{ Kubelet: &kubelet.Kubelet{}, hostIP: net.IPv4(127, 0, 0, 1), }, PodStatusFunc: func(kl KubeletInterface, pod *api.Pod) (*api.PodStatus, error) { return &api.PodStatus{ ContainerStatuses: []api.ContainerStatus{ { Name: "foo", State: api.ContainerState{ Running: &api.ContainerStateRunning{}, }, }, }, Phase: api.PodRunning, }, nil }, } executor := New(config) executor.Init(mockDriver) executor.Registered(mockDriver, nil, nil, nil) select { case <-updates: case <-time.After(time.Second): t.Fatalf("Executor should send an initial update on Registration") } pod := NewTestPod(1) podTask, err := podtask.New(api.NewDefaultContext(), "", *pod, &mesosproto.ExecutorInfo{}) assert.Equal(t, nil, err, "must be able to create a task from a pod") taskInfo := podTask.BuildTaskInfo() data, err := testapi.Default.Codec().Encode(pod) assert.Equal(t, nil, err, "must be able to encode a pod's spec data") taskInfo.Data = data var statusUpdateCalls sync.WaitGroup statusUpdateDone := func(_ mock.Arguments) { statusUpdateCalls.Done() } statusUpdateCalls.Add(1) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_STARTING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() statusUpdateCalls.Add(1) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_RUNNING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() executor.LaunchTask(mockDriver, taskInfo) assertext.EventuallyTrue(t, 5*time.Second, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return len(executor.tasks) == 1 && len(executor.pods) == 1 }, "executor must be able to create a task and a pod") gotPodUpdate := false select { case m := <-updates: update, ok := m.(kubelet.PodUpdate) if ok && len(update.Pods) == 1 { gotPodUpdate = true } case <-time.After(time.Second): } assert.Equal(t, true, gotPodUpdate, "the executor should send an update about a new pod to "+ "the updates chan when creating a new one.") // Allow some time for asynchronous requests to the driver. finished := kmruntime.After(statusUpdateCalls.Wait) select { case <-finished: case <-time.After(5 * time.Second): t.Fatalf("timed out waiting for status update calls to finish") } statusUpdateCalls.Add(1) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_KILLED, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() executor.KillTask(mockDriver, taskInfo.TaskId) assertext.EventuallyTrue(t, 5*time.Second, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return len(executor.tasks) == 0 && len(executor.pods) == 0 }, "executor must be able to kill a created task and pod") // Allow some time for asynchronous requests to the driver. finished = kmruntime.After(statusUpdateCalls.Wait) select { case <-finished: case <-time.After(5 * time.Second): t.Fatalf("timed out waiting for status update calls to finish") } mockDriver.AssertExpectations(t) }
func (k *k8smScheduler) createPodTask(ctx api.Context, pod *api.Pod) (*podtask.T, error) { return podtask.New(ctx, "", *pod, k.internal.executor) }
// TestExecutorFrameworkMessage ensures that the executor is able to // handle messages from the framework, specifically about lost tasks // and Kamikaze. When a task is lost, the executor needs to clean up // its state. When a Kamikaze message is received, the executor should // attempt suicide. func TestExecutorFrameworkMessage(t *testing.T) { // TODO(jdef): Fix the unexpected call in the mocking system. t.Skip("This test started failing when panic catching was disabled.") var ( mockDriver = &MockExecutorDriver{} kubeletFinished = make(chan struct{}) registry = newFakeRegistry() executor = New(Config{ Docker: dockertools.ConnectToDockerOrDie("fake://", 0), NodeInfos: make(chan NodeInfo, 1), ShutdownAlert: func() { close(kubeletFinished) }, KubeletFinished: kubeletFinished, Registry: registry, }) pod = NewTestPod(1) mockKubeAPI = &mockKubeAPI{} ) executor.kubeAPI = mockKubeAPI executor.Init(mockDriver) executor.Registered(mockDriver, nil, nil, nil) executor.FrameworkMessage(mockDriver, "test framework message") // set up a pod to then lose executorinfo := &mesosproto.ExecutorInfo{} podTask, _ := podtask.New( api.NewDefaultContext(), podtask.Config{ ID: "foo", Prototype: executorinfo, HostPortStrategy: hostport.StrategyWildcard, }, pod, ) pod.Annotations = map[string]string{ "k8s.mesosphere.io/taskId": podTask.ID, } podTask.Spec = &podtask.Spec{ Executor: executorinfo, } taskInfo, err := podTask.BuildTaskInfo() assert.Equal(t, nil, err, "must be able to build task info") data, _ := runtime.Encode(testapi.Default.Codec(), pod) taskInfo.Data = data mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_STARTING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Once() called := make(chan struct{}) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_RUNNING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once() executor.LaunchTask(mockDriver, taskInfo) // must wait for this otherwise phase changes may not apply assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return !registry.empty() }, "executor must be able to create a task and a pod") err = registry.phaseChange(pod, api.PodPending) assert.NoError(t, err) err = registry.phaseChange(pod, api.PodRunning) assert.NoError(t, err) // waiting until the pod is really running b/c otherwise a TASK_FAILED could be // triggered by the asynchronously running executor methods when removing the task // from k.tasks through the "task-lost:foo" message below. select { case <-called: case <-time.After(wait.ForeverTestTimeout): t.Fatalf("timed out waiting for SendStatusUpdate for the running task") } // send task-lost message for it called = make(chan struct{}) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_LOST, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once() // simulate what happens when the apiserver is told to delete a pod mockKubeAPI.On("killPod", pod.Namespace, pod.Name).Return(nil).Run(func(_ mock.Arguments) { registry.Remove(podTask.ID) }) executor.FrameworkMessage(mockDriver, "task-lost:foo") assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return registry.empty() }, "executor must be able to kill a created task and pod") select { case <-called: case <-time.After(wait.ForeverTestTimeout): t.Fatalf("timed out waiting for SendStatusUpdate") } mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once() executor.FrameworkMessage(mockDriver, messages.Kamikaze) assert.Equal(t, true, executor.isDone(), "executor should have shut down after receiving a Kamikaze message") mockDriver.AssertExpectations(t) mockKubeAPI.AssertExpectations(t) }
// TestExecutorLaunchAndKillTask ensures that the executor is able to launch tasks and generates // appropriate status messages for mesos. It then kills the task and validates that appropriate // actions are taken by the executor. func TestExecutorLaunchAndKillTask(t *testing.T) { var ( mockDriver = &MockExecutorDriver{} registry = newFakeRegistry() executor = New(Config{ Docker: dockertools.ConnectToDockerOrDie("fake://", 0), NodeInfos: make(chan NodeInfo, 1), Registry: registry, }) mockKubeAPI = &mockKubeAPI{} pod = NewTestPod(1) executorinfo = &mesosproto.ExecutorInfo{} ) executor.kubeAPI = mockKubeAPI executor.Init(mockDriver) executor.Registered(mockDriver, nil, nil, nil) podTask, err := podtask.New( api.NewDefaultContext(), podtask.Config{ Prototype: executorinfo, HostPortStrategy: hostport.StrategyWildcard, }, pod, ) assert.Equal(t, nil, err, "must be able to create a task from a pod") pod.Annotations = map[string]string{ "k8s.mesosphere.io/taskId": podTask.ID, } podTask.Spec = &podtask.Spec{Executor: executorinfo} taskInfo, err := podTask.BuildTaskInfo() assert.Equal(t, nil, err, "must be able to build task info") data, err := runtime.Encode(testapi.Default.Codec(), pod) assert.Equal(t, nil, err, "must be able to encode a pod's spec data") taskInfo.Data = data var statusUpdateCalls sync.WaitGroup statusUpdateCalls.Add(1) statusUpdateDone := func(_ mock.Arguments) { statusUpdateCalls.Done() } mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_STARTING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() statusUpdateCalls.Add(1) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_RUNNING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() executor.LaunchTask(mockDriver, taskInfo) assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return !registry.empty() }, "executor must be able to create a task and a pod") // simulate a pod source update; normally this update is generated when binding a pod err = registry.phaseChange(pod, api.PodPending) assert.NoError(t, err) // simulate a pod source update; normally this update is generated by the kubelet once the pod is healthy err = registry.phaseChange(pod, api.PodRunning) assert.NoError(t, err) // Allow some time for asynchronous requests to the driver. finished := kmruntime.After(statusUpdateCalls.Wait) select { case <-finished: case <-time.After(wait.ForeverTestTimeout): t.Fatalf("timed out waiting for status update calls to finish") } statusUpdateCalls.Add(1) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_KILLED, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() // simulate what happens when the apiserver is told to delete a pod mockKubeAPI.On("killPod", pod.Namespace, pod.Name).Return(nil).Run(func(_ mock.Arguments) { registry.Remove(podTask.ID) }) executor.KillTask(mockDriver, taskInfo.TaskId) assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return registry.empty() }, "executor must be able to kill a created task and pod") // Allow some time for asynchronous requests to the driver. finished = kmruntime.After(statusUpdateCalls.Wait) select { case <-finished: case <-time.After(wait.ForeverTestTimeout): t.Fatalf("timed out waiting for status update calls to finish") } mockDriver.AssertExpectations(t) mockKubeAPI.AssertExpectations(t) }
func (k *schedulerAlgorithm) Schedule(pod *api.Pod) (string, error) { log.Infof("Try to schedule pod %v\n", pod.Name) ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace) // default upstream scheduler passes pod.Name as binding.PodID podKey, err := podtask.MakePodKey(ctx, pod.Name) if err != nil { return "", err } k.sched.Lock() defer k.sched.Unlock() switch task, state := k.sched.Tasks().ForPod(podKey); state { case podtask.StateUnknown: // There's a bit of a potential race here, a pod could have been yielded() and // then before we get *here* it could be deleted. // We use meta to index the pod in the store since that's what k8s reflector does. podName, err := cache.MetaNamespaceKeyFunc(pod) if err != nil { log.Warningf("aborting Schedule, unable to understand pod object %+v", pod) return "", errors.NoSuchPodErr } if deleted := k.podUpdates.Poll(podName, queue.DELETE_EVENT); deleted { // avoid scheduling a pod that's been deleted between yieldPod() and Schedule() log.Infof("aborting Schedule, pod has been deleted %+v", pod) return "", errors.NoSuchPodErr } // write resource limits into the pod spec. // From here on we can expect that the pod spec of a task has proper limits for CPU and memory. k.limitPod(pod) podTask, err := podtask.New(ctx, "", pod, k.prototype, k.frameworkRoles, k.defaultPodRoles) if err != nil { log.Warningf("aborting Schedule, unable to create podtask object %+v: %v", pod, err) return "", err } podTask, err = k.sched.Tasks().Register(podTask) if err != nil { return "", err } return k.doSchedule(podTask) //TODO(jdef) it's possible that the pod state has diverged from what //we knew previously, we should probably update the task.Pod state here //before proceeding with scheduling case podtask.StatePending: if pod.UID != task.Pod.UID { // we're dealing with a brand new pod spec here, so the old one must have been // deleted -- and so our task store is out of sync w/ respect to reality //TODO(jdef) reconcile task return "", fmt.Errorf("task %v spec is out of sync with pod %v spec, aborting schedule", task.ID, pod.Name) } else if task.Has(podtask.Launched) { // task has been marked as "launched" but the pod binding creation may have failed in k8s, // but we're going to let someone else handle it, probably the mesos task error handler return "", fmt.Errorf("task %s has already been launched, aborting schedule", task.ID) } else { return k.doSchedule(task) } default: return "", fmt.Errorf("task %s is not pending, nothing to schedule", task.ID) } }