func TestExecutorDriverRunTaskEvent(t *testing.T) { setTestEnv(t) ch := make(chan bool, 2) // Mock Slave process to respond to registration event. server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) { reqPath, err := url.QueryUnescape(req.URL.String()) assert.NoError(t, err) log.Infoln("RCVD request", reqPath) rsp.WriteHeader(http.StatusAccepted) }) defer server.Close() exec := newTestExecutor(t) exec.ch = ch exec.t = t // start driver := newIntegrationTestDriver(t, exec) stat, err := driver.Start() assert.NoError(t, err) assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat) driver.setConnected(true) defer driver.Stop() // send runtask event to driver pbMsg := &mesos.RunTaskMessage{ FrameworkId: util.NewFrameworkID(frameworkID), Framework: util.NewFrameworkInfo( "test", "test-framework-001", util.NewFrameworkID(frameworkID), ), Pid: proto.String(server.PID.String()), Task: util.NewTaskInfo( "test-task", util.NewTaskID("test-task-001"), util.NewSlaveID(slaveID), []*mesos.Resource{ util.NewScalarResource("mem", 112), util.NewScalarResource("cpus", 2), }, ), } c := testutil.NewMockMesosClient(t, server.PID) c.SendMessage(driver.self, pbMsg) select { case <-ch: case <-time.After(time.Second * 2): log.Errorf("Tired of waiting...") } }
func TestOffer(t *testing.T) { offer := util.NewOffer(util.NewOfferID("487c73d8-9951-f23c-34bd-8085bfd30c49"), util.NewFrameworkID("20150903-065451-84125888-5050-10715-0053"), util.NewSlaveID("20150903-065451-84125888-5050-10715-S1"), "slave0") if Offer(offer) != "slave0#30c49" { t.Errorf(`util.NewOffer(util.NewOfferID("487c73d8-9951-f23c-34bd-8085bfd30c49"), util.NewFrameworkID("20150903-065451-84125888-5050-10715-0053"), util.NewSlaveID("20150903-065451-84125888-5050-10715-S1"), "slave0") != "slave0#30c49"; actual %s`, Offer(offer)) } offer.Resources = []*mesos.Resource{util.NewScalarResource("cpus", 4), util.NewScalarResource("mem", 512), util.NewRangesResource("ports", []*mesos.Value_Range{util.NewValueRange(31000, 32000)})} if Offer(offer) != "slave0#30c49 cpus:4.00 mem:512.00 ports:[31000..32000]" { t.Errorf("Expected slave0#30c49 cpus:4.00 mem:512.00 ports:[31000..32000]; actual %s", Offer(offer)) } offer.Attributes = []*mesos.Attribute{&mesos.Attribute{ Name: proto.String("rack"), Type: mesos.Value_SCALAR.Enum(), Scalar: &mesos.Value_Scalar{Value: proto.Float64(2)}, }} if Offer(offer) != "slave0#30c49 cpus:4.00 mem:512.00 ports:[31000..32000] rack:2.00" { t.Errorf("Expected slave0#30c49 cpus:4.00 mem:512.00 ports:[31000..32000] rack:2.00; actual %s", Offer(offer)) } offer.Resources = nil if Offer(offer) != "slave0#30c49 rack:2.00" { t.Errorf("Expected slave0#30c49 rack:2.00; actual %s", Offer(offer)) } }
func createTestOffer(idSuffix string) *mesos.Offer { return util.NewOffer( util.NewOfferID("test-offer-"+idSuffix), util.NewFrameworkID("test-framework-"+idSuffix), util.NewSlaveID("test-slave-"+idSuffix), "localhost."+idSuffix, ) }
func TestOffers(t *testing.T) { offer1 := util.NewOffer(util.NewOfferID("487c73d8-9951-f23c-34bd-8085bfd30c49"), util.NewFrameworkID("20150903-065451-84125888-5050-10715-0053"), util.NewSlaveID("20150903-065451-84125888-5050-10715-S1"), "slave0") offer1.Resources = []*mesos.Resource{util.NewScalarResource("cpus", 4), util.NewScalarResource("mem", 512), util.NewRangesResource("ports", []*mesos.Value_Range{util.NewValueRange(31000, 32000)})} offer2 := util.NewOffer(util.NewOfferID("26d5b34c-ef81-638d-5ad5-32c743c9c033"), util.NewFrameworkID("20150903-065451-84125888-5050-10715-0037"), util.NewSlaveID("20150903-065451-84125888-5050-10715-S0"), "master") offer2.Resources = []*mesos.Resource{util.NewScalarResource("cpus", 2), util.NewScalarResource("mem", 1024), util.NewRangesResource("ports", []*mesos.Value_Range{util.NewValueRange(4000, 7000)})} offer2.Attributes = []*mesos.Attribute{&mesos.Attribute{ Name: proto.String("rack"), Type: mesos.Value_SCALAR.Enum(), Scalar: &mesos.Value_Scalar{Value: proto.Float64(2)}, }} offers := Offers([]*mesos.Offer{offer1, offer2}) if len(strings.Split(offers, "\n")) != 2 { t.Errorf("Offers([]*mesos.Offer{offer1, offer2}) should contain two offers split by new line, actual: %s", offers) } }
func TestStartup(t *gotesting.T) { mockdriver := &MockSchedulerDriver{} testScheduler := NewEtcdScheduler(1, 0, 0, false, []*mesos.CommandInfo_URI{}, false, 4096, 1, 256) testScheduler.running = map[string]*config.Node{ "etcd-1": nil, "etcd-2": nil, } reconciliation := map[string]string{ "etcd-1": "slave-1", "etcd-2": "slave-2", } testScheduler.reconciliationInfoFunc = func([]string, string, string) (map[string]string, error) { return reconciliation, nil } testScheduler.updateReconciliationInfoFunc = func(info map[string]string, _ []string, _ string, _ string) error { reconciliation = info return nil } // On registration, ReconcileTasks should be called. mockdriver.Lock() mockdriver.On( "ReconcileTasks", 0, ).Return(mesos.Status_DRIVER_RUNNING, nil).Once() mockdriver.On( "ReconcileTasks", 2, ).Return(mesos.Status_DRIVER_RUNNING, nil).Once() mockdriver.Unlock() masterInfo := util.NewMasterInfo("master-1", 0, 0) masterInfo.Hostname = proto.String("test-host") testScheduler.Registered( mockdriver, util.NewFrameworkID("framework-1"), masterInfo, ) assert.Equal(t, Immutable, testScheduler.state, "Scheduler should be placed in the Immutable state after registration "+ "as we wait for status updates to arrive in response to ReconcileTasks.") assert.Equal(t, 0, len(testScheduler.running), "Scheduler's running list should be cleared on registration, "+ "to be populated by ReconcileTasks.") time.Sleep(50 * time.Millisecond) mockdriver.Lock() defer mockdriver.Unlock() mockdriver.AssertExpectations(t) }
func (s *SchedulerTestSuiteCore) SetupTest() { s.master = "127.0.0.1:8080" s.masterUpid = "master(2)@" + s.master s.masterId = "some-master-id-uuid" s.frameworkID = "some-framework-id-uuid" s.framework = util.NewFrameworkInfo( "test-user", "test-name", util.NewFrameworkID(s.frameworkID), ) }
func TestExecutorDriverExecutorRegisteredEvent(t *testing.T) { setTestEnv(t) ch := make(chan bool, 2) // Mock Slave process to respond to registration event. server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) { reqPath, err := url.QueryUnescape(req.URL.String()) assert.NoError(t, err) log.Infoln("RCVD request", reqPath) rsp.WriteHeader(http.StatusAccepted) }) defer server.Close() exec := newTestExecutor(t) exec.ch = ch exec.t = t // start driver := newIntegrationTestDriver(t, exec) stat, err := driver.Start() assert.NoError(t, err) assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat) defer driver.Stop() //simulate sending ExecutorRegisteredMessage from server to exec pid. pbMsg := &mesos.ExecutorRegisteredMessage{ ExecutorInfo: util.NewExecutorInfo(util.NewExecutorID(executorID), util.NewCommandInfo("ls -l")), FrameworkId: util.NewFrameworkID(frameworkID), FrameworkInfo: util.NewFrameworkInfo("test", "test-framework", util.NewFrameworkID(frameworkID)), SlaveId: util.NewSlaveID(slaveID), SlaveInfo: &mesos.SlaveInfo{Hostname: proto.String("localhost")}, } c := testutil.NewMockMesosClient(t, server.PID) connected := driver.connectionListener() c.SendMessage(driver.self, pbMsg) select { case <-connected: case <-time.After(time.Second * 1): log.Errorf("Tired of waiting...") } }
func TestFilterResources(t *testing.T) { rf := ResourceFilter{} o := util.NewOffer(util.NewOfferID("offerid"), util.NewFrameworkID("frameworkid"), util.NewSlaveID("slaveId"), "hostname") o.Resources = []*mesos.Resource{ util.NewScalarResource("name", 1.0), util.NewScalarResource("ub0r-resource", 2.0), util.NewScalarResource("ub0r-resource", 3.0), } res := rf.FilterResources(o, "ub0r-resource") assert.Equal(t, 2, len(res)) assert.Equal(t, "ub0r-resource", res[0].GetName()) }
func (s *Scheduler) Start() error { Logger.Infof("Starting scheduler with configuration: \n%s", Config) sched = s // set this scheduler reachable for http server ctrlc := make(chan os.Signal, 1) signal.Notify(ctrlc, os.Interrupt) s.cluster = NewCluster() s.cluster.Load() s.httpServer = NewHttpServer(Config.Api) go s.httpServer.Start() frameworkInfo := &mesos.FrameworkInfo{ User: proto.String(Config.User), Name: proto.String(Config.FrameworkName), Role: proto.String(Config.FrameworkRole), FailoverTimeout: proto.Float64(float64(Config.FrameworkTimeout / 1e9)), Checkpoint: proto.Bool(true), } if s.cluster.frameworkID != "" { frameworkInfo.Id = util.NewFrameworkID(s.cluster.frameworkID) } driverConfig := scheduler.DriverConfig{ Scheduler: s, Framework: frameworkInfo, Master: Config.Master, } driver, err := scheduler.NewMesosSchedulerDriver(driverConfig) s.schedulerDriver = driver if err != nil { return fmt.Errorf("Unable to create SchedulerDriver: %s", err) } go func() { if stat, err := driver.Run(); err != nil { Logger.Infof("Framework stopped with status %s and error: %s\n", stat.String(), err) panic(err) } }() <-ctrlc return nil }
func NewOffer(id string) *mesos.Offer { return &mesos.Offer{ Id: util.NewOfferID(id), FrameworkId: util.NewFrameworkID("test-etcd-framework"), SlaveId: util.NewSlaveID("slave-" + id), Hostname: proto.String("localhost"), Resources: []*mesos.Resource{ util.NewScalarResource("cpus", 1), util.NewScalarResource("mem", 256), util.NewScalarResource("disk", 4096), util.NewRangesResource("ports", []*mesos.Value_Range{ util.NewValueRange(uint64(0), uint64(65535)), }), }, } }
func (s *StackDeployScheduler) Start() error { Logger.Info("Starting scheduler") frameworkInfo := &mesos.FrameworkInfo{ User: proto.String(s.User), Name: proto.String(s.FrameworkName), Role: proto.String(s.FrameworkRole), FailoverTimeout: proto.Float64(float64(s.FailoverTimeout / 1e9)), Checkpoint: proto.Bool(true), } if s.Storage.FrameworkID != "" { frameworkInfo.Id = mesosutil.NewFrameworkID(s.Storage.FrameworkID) } driverConfig := scheduler.DriverConfig{ Scheduler: s, Framework: frameworkInfo, Master: s.Master, } driver, err := scheduler.NewMesosSchedulerDriver(driverConfig) if err != nil { return fmt.Errorf("Unable to create SchedulerDriver: %s", err) } go func() { if stat, err := driver.Run(); err != nil { Logger.Info("Framework stopped with status %s and error: %s", stat.String(), err) panic(err) } }() s.Cron.Start() go func() { for { Logger.Info("Cron entries: %v\n", s.Cron.Entries()) for _, entry := range s.Cron.Entries() { Logger.Info("Entry: %v, %v, %v, %v", entry.Prev, entry.Next, entry.Schedule, entry.Job) } time.Sleep(10 * time.Second) } }() return nil }
func TestExecutorDriverFrameworkToExecutorMessageEvent(t *testing.T) { setTestEnv(t) ch := make(chan bool, 2) // Mock Slave process to respond to registration event. server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) { reqPath, err := url.QueryUnescape(req.URL.String()) assert.NoError(t, err) log.Infoln("RCVD request", reqPath) rsp.WriteHeader(http.StatusAccepted) }) defer server.Close() exec := newTestExecutor(t) exec.ch = ch exec.t = t // start driver := newIntegrationTestDriver(t, exec) stat, err := driver.Start() assert.NoError(t, err) assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat) driver.setConnected(true) defer driver.Stop() // send runtask event to driver pbMsg := &mesos.FrameworkToExecutorMessage{ SlaveId: util.NewSlaveID(slaveID), ExecutorId: util.NewExecutorID(executorID), FrameworkId: util.NewFrameworkID(frameworkID), Data: []byte("Hello-Test"), } c := testutil.NewMockMesosClient(t, server.PID) c.SendMessage(driver.self, pbMsg) select { case <-ch: case <-time.After(time.Second * 1): log.Errorf("Tired of waiting...") } }
func (s *SchedulerServer) fetchFrameworkID(client tools.EtcdGetSet) (*mesos.FrameworkID, error) { if s.FailoverTimeout > 0 { if response, err := client.Get(meta.FrameworkIDKey, false, false); err != nil { if !tools.IsEtcdNotFound(err) { return nil, fmt.Errorf("unexpected failure attempting to load framework ID from etcd: %v", err) } log.V(1).Infof("did not find framework ID in etcd") } else if response.Node.Value != "" { log.Infof("configuring FrameworkInfo with Id found in etcd: '%s'", response.Node.Value) return mutil.NewFrameworkID(response.Node.Value), nil } } else { //TODO(jdef) this seems like a totally hackish way to clean up the framework ID if _, err := client.Delete(meta.FrameworkIDKey, true); err != nil { if !tools.IsEtcdNotFound(err) { return nil, fmt.Errorf("failed to delete framework ID from etcd: %v", err) } log.V(1).Infof("nothing to delete: did not find framework ID in etcd") } } return nil, nil }
func TestExecutorDriverStatusUpdateAcknowledgement(t *testing.T) { setTestEnv(t) ch := make(chan bool, 2) // Mock Slave process to respond to registration event. server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) { reqPath, err := url.QueryUnescape(req.URL.String()) assert.NoError(t, err) log.Infoln("RCVD request", reqPath) rsp.WriteHeader(http.StatusAccepted) }) defer server.Close() exec := newTestExecutor(t) exec.ch = ch exec.t = t // start driver := newIntegrationTestDriver(t, exec) stat, err := driver.Start() assert.NoError(t, err) assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat) driver.setConnected(true) defer driver.Stop() // send ACK from server pbMsg := &mesos.StatusUpdateAcknowledgementMessage{ SlaveId: util.NewSlaveID(slaveID), FrameworkId: util.NewFrameworkID(frameworkID), TaskId: util.NewTaskID("test-task-001"), Uuid: []byte(uuid.NewRandom().String()), } c := testutil.NewMockMesosClient(t, server.PID) c.SendMessage(driver.self, pbMsg) <-time.After(time.Second * 1) }
// Test to create the scheduler plugin with the config returned by the scheduler, // and play through the whole life cycle of the plugin while creating pods, deleting // and failing them. func TestPlugin_LifeCycle(t *testing.T) { t.Skip("This test is flaky, see #11901") assert := &EventAssertions{*assert.New(t)} // create a fake pod watch. We use that below to submit new pods to the scheduler podListWatch := NewMockPodsListWatch(api.PodList{}) // create fake apiserver testApiServer := NewTestServer(t, api.NamespaceDefault, podListWatch) defer testApiServer.server.Close() // create executor with some data for static pods if set executor := util.NewExecutorInfo( util.NewExecutorID("executor-id"), util.NewCommandInfo("executor-cmd"), ) executor.Data = []byte{0, 1, 2} // create scheduler nodeStore := cache.NewStore(cache.MetaNamespaceKeyFunc) as := NewAllocationStrategy( podtask.DefaultPredicate, podtask.NewDefaultProcurement(mresource.DefaultDefaultContainerCPULimit, mresource.DefaultDefaultContainerMemLimit)) testScheduler := New(Config{ Executor: executor, Client: client.NewOrDie(&client.Config{Host: testApiServer.server.URL, Version: testapi.Default.Version()}), Scheduler: NewFCFSPodScheduler(as, func(node string) *api.Node { obj, _, _ := nodeStore.GetByKey(node) if obj == nil { return nil } return obj.(*api.Node) }), Schedcfg: *schedcfg.CreateDefaultConfig(), }) assert.NotNil(testScheduler.client, "client is nil") assert.NotNil(testScheduler.executor, "executor is nil") assert.NotNil(testScheduler.offers, "offer registry is nil") // create scheduler process schedulerProcess := ha.New(testScheduler) // get plugin config from it c := testScheduler.NewPluginConfig(schedulerProcess.Terminal(), http.DefaultServeMux, &podListWatch.ListWatch) assert.NotNil(c) // make events observable eventObserver := NewEventObserver() c.Recorder = eventObserver // create plugin p := NewPlugin(c).(*schedulingPlugin) assert.NotNil(p) // run plugin p.Run(schedulerProcess.Terminal()) defer schedulerProcess.End() // init scheduler err := testScheduler.Init(schedulerProcess.Master(), p, http.DefaultServeMux) assert.NoError(err) // create mock mesos scheduler driver mockDriver := &joinableDriver{} mockDriver.On("Start").Return(mesos.Status_DRIVER_RUNNING, nil).Once() started := mockDriver.Upon() mAny := mock.AnythingOfType mockDriver.On("ReconcileTasks", mAny("[]*mesosproto.TaskStatus")).Return(mesos.Status_DRIVER_RUNNING, nil) mockDriver.On("SendFrameworkMessage", mAny("*mesosproto.ExecutorID"), mAny("*mesosproto.SlaveID"), mAny("string")). Return(mesos.Status_DRIVER_RUNNING, nil) type LaunchedTask struct { offerId mesos.OfferID taskInfo *mesos.TaskInfo } launchedTasks := make(chan LaunchedTask, 1) launchTasksCalledFunc := func(args mock.Arguments) { offerIDs := args.Get(0).([]*mesos.OfferID) taskInfos := args.Get(1).([]*mesos.TaskInfo) assert.Equal(1, len(offerIDs)) assert.Equal(1, len(taskInfos)) launchedTasks <- LaunchedTask{ offerId: *offerIDs[0], taskInfo: taskInfos[0], } } mockDriver.On("LaunchTasks", mAny("[]*mesosproto.OfferID"), mAny("[]*mesosproto.TaskInfo"), mAny("*mesosproto.Filters")). Return(mesos.Status_DRIVER_RUNNING, nil).Run(launchTasksCalledFunc) mockDriver.On("DeclineOffer", mAny("*mesosproto.OfferID"), mAny("*mesosproto.Filters")). Return(mesos.Status_DRIVER_RUNNING, nil) // elect master with mock driver driverFactory := ha.DriverFactory(func() (bindings.SchedulerDriver, error) { return mockDriver, nil }) schedulerProcess.Elect(driverFactory) elected := schedulerProcess.Elected() // driver will be started <-started // tell scheduler to be registered testScheduler.Registered( mockDriver, util.NewFrameworkID("kubernetes-id"), util.NewMasterInfo("master-id", (192<<24)+(168<<16)+(0<<8)+1, 5050), ) // wait for being elected <-elected //TODO(jdef) refactor things above here into a test suite setup of some sort // fake new, unscheduled pod pod, i := NewTestPod() podListWatch.Add(pod, true) // notify watchers // wait for failedScheduling event because there is no offer assert.EventWithReason(eventObserver, "failedScheduling", "failedScheduling event not received") // add some matching offer offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))} testScheduler.ResourceOffers(nil, offers) // and wait for scheduled pod assert.EventWithReason(eventObserver, "scheduled") select { case launchedTask := <-launchedTasks: // report back that the task has been staged, and then started by mesos testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_STAGING)) testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_RUNNING)) // check that ExecutorInfo.data has the static pod data assert.Len(launchedTask.taskInfo.Executor.Data, 3) // report back that the task has been lost mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 0) testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_LOST)) // and wait that framework message is sent to executor mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 1) case <-time.After(5 * time.Second): t.Fatalf("timed out waiting for launchTasks call") } // Launch a pod and wait until the scheduler driver is called schedulePodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) { // wait for failedScheduling event because there is no offer assert.EventWithReason(eventObserver, "failedScheduling", "failedScheduling event not received") // supply a matching offer testScheduler.ResourceOffers(mockDriver, offers) // and wait to get scheduled assert.EventWithReason(eventObserver, "scheduled") // wait for driver.launchTasks call select { case launchedTask := <-launchedTasks: for _, offer := range offers { if offer.Id.GetValue() == launchedTask.offerId.GetValue() { return pod, &launchedTask, offer } } t.Fatalf("unknown offer used to start a pod") return nil, nil, nil case <-time.After(5 * time.Second): t.Fatal("timed out waiting for launchTasks") return nil, nil, nil } } // Launch a pod and wait until the scheduler driver is called launchPodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) { podListWatch.Add(pod, true) return schedulePodWithOffers(pod, offers) } // Launch a pod, wait until the scheduler driver is called and report back that it is running startPodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) { // notify about pod, offer resources and wait for scheduling pod, launchedTask, offer := launchPodWithOffers(pod, offers) if pod != nil { // report back status testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_STAGING)) testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_RUNNING)) return pod, launchedTask, offer } return nil, nil, nil } startTestPod := func() (*api.Pod, *LaunchedTask, *mesos.Offer) { pod, i := NewTestPod() offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))} return startPodWithOffers(pod, offers) } // start another pod pod, launchedTask, _ := startTestPod() // mock drvier.KillTask, should be invoked when a pod is deleted mockDriver.On("KillTask", mAny("*mesosproto.TaskID")).Return(mesos.Status_DRIVER_RUNNING, nil).Run(func(args mock.Arguments) { killedTaskId := *(args.Get(0).(*mesos.TaskID)) assert.Equal(*launchedTask.taskInfo.TaskId, killedTaskId, "expected same TaskID as during launch") }) killTaskCalled := mockDriver.Upon() // stop it again via the apiserver mock podListWatch.Delete(pod, true) // notify watchers // and wait for the driver killTask call with the correct TaskId select { case <-killTaskCalled: // report back that the task is finished testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_FINISHED)) case <-time.After(5 * time.Second): t.Fatal("timed out waiting for KillTask") } // start a pod with on a given NodeName and check that it is scheduled to the right host pod, i = NewTestPod() pod.Spec.NodeName = "hostname1" offers = []*mesos.Offer{} for j := 0; j < 3; j++ { offer := NewTestOffer(fmt.Sprintf("offer%d_%d", i, j)) hostname := fmt.Sprintf("hostname%d", j) offer.Hostname = &hostname offers = append(offers, offer) } _, _, usedOffer := startPodWithOffers(pod, offers) assert.Equal(offers[1].Id.GetValue(), usedOffer.Id.GetValue()) assert.Equal(pod.Spec.NodeName, *usedOffer.Hostname) testScheduler.OfferRescinded(mockDriver, offers[0].Id) testScheduler.OfferRescinded(mockDriver, offers[2].Id) // start pods: // - which are failing while binding, // - leading to reconciliation // - with different states on the apiserver failPodFromExecutor := func(task *mesos.TaskInfo) { beforePodLookups := testApiServer.Stats(pod.Name) status := newTaskStatusForTask(task, mesos.TaskState_TASK_FAILED) message := messages.CreateBindingFailure status.Message = &message testScheduler.StatusUpdate(mockDriver, status) // wait until pod is looked up at the apiserver assertext.EventuallyTrue(t, time.Second, func() bool { return testApiServer.Stats(pod.Name) == beforePodLookups+1 }, "expect that reconcileTask will access apiserver for pod %v", pod.Name) } launchTestPod := func() (*api.Pod, *LaunchedTask, *mesos.Offer) { pod, i := NewTestPod() offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))} return launchPodWithOffers(pod, offers) } // 1. with pod deleted from the apiserver // expected: pod is removed from internal task registry pod, launchedTask, _ = launchTestPod() podListWatch.Delete(pod, false) // not notifying the watchers failPodFromExecutor(launchedTask.taskInfo) podKey, _ := podtask.MakePodKey(api.NewDefaultContext(), pod.Name) assertext.EventuallyTrue(t, time.Second, func() bool { t, _ := p.api.tasks().ForPod(podKey) return t == nil }) // 2. with pod still on the apiserver, not bound // expected: pod is rescheduled pod, launchedTask, _ = launchTestPod() failPodFromExecutor(launchedTask.taskInfo) retryOffers := []*mesos.Offer{NewTestOffer("retry-offer")} schedulePodWithOffers(pod, retryOffers) // 3. with pod still on the apiserver, bound, notified via ListWatch // expected: nothing, pod updates not supported, compare ReconcileTask function pod, launchedTask, usedOffer = startTestPod() pod.Annotations = map[string]string{ meta.BindingHostKey: *usedOffer.Hostname, } pod.Spec.NodeName = *usedOffer.Hostname podListWatch.Modify(pod, true) // notifying the watchers time.Sleep(time.Second / 2) failPodFromExecutor(launchedTask.taskInfo) }
func (lt lifecycleTest) Start() <-chan LaunchedTask { assert := &EventAssertions{*assert.New(lt.t)} lt.sched.Run(lt.schedulerProc.Terminal()) // init framework err := lt.framework.Init( lt.sched, lt.schedulerProc.Master(), http.DefaultServeMux, ) assert.NoError(err) lt.driver.On("Start").Return(mesos.Status_DRIVER_RUNNING, nil).Once() started := lt.driver.Upon() lt.driver.On("ReconcileTasks", mock.AnythingOfType("[]*mesosproto.TaskStatus"), ).Return(mesos.Status_DRIVER_RUNNING, nil) lt.driver.On("SendFrameworkMessage", mock.AnythingOfType("*mesosproto.ExecutorID"), mock.AnythingOfType("*mesosproto.SlaveID"), mock.AnythingOfType("string"), ).Return(mesos.Status_DRIVER_RUNNING, nil) launchedTasks := make(chan LaunchedTask, 1) launchTasksFunc := func(args mock.Arguments) { offerIDs := args.Get(0).([]*mesos.OfferID) taskInfos := args.Get(1).([]*mesos.TaskInfo) assert.Equal(1, len(offerIDs)) assert.Equal(1, len(taskInfos)) launchedTasks <- LaunchedTask{ offerId: *offerIDs[0], taskInfo: taskInfos[0], } } lt.driver.On("LaunchTasks", mock.AnythingOfType("[]*mesosproto.OfferID"), mock.AnythingOfType("[]*mesosproto.TaskInfo"), mock.AnythingOfType("*mesosproto.Filters"), ).Return(mesos.Status_DRIVER_RUNNING, nil).Run(launchTasksFunc) lt.driver.On("DeclineOffer", mock.AnythingOfType("*mesosproto.OfferID"), mock.AnythingOfType("*mesosproto.Filters"), ).Return(mesos.Status_DRIVER_RUNNING, nil) // elect master with mock driver driverFactory := ha.DriverFactory(func() (bindings.SchedulerDriver, error) { return lt.driver, nil }) lt.schedulerProc.Elect(driverFactory) elected := lt.schedulerProc.Elected() // driver will be started <-started // tell scheduler to be registered lt.framework.Registered( lt.driver, mesosutil.NewFrameworkID("kubernetes-id"), mesosutil.NewMasterInfo("master-id", (192<<24)+(168<<16)+(0<<8)+1, 5050), ) // wait for being elected <-elected return launchedTasks }
// Test to create the scheduler plugin with the config returned by the scheduler, // and play through the whole life cycle of the plugin while creating pods, deleting // and failing them. func TestPlugin_LifeCycle(t *testing.T) { t.Skip("disabled due to flakiness; see #10795") assert := &EventAssertions{*assert.New(t)} // create a fake pod watch. We use that below to submit new pods to the scheduler podListWatch := NewMockPodsListWatch(api.PodList{}) // create fake apiserver testApiServer := NewTestServer(t, api.NamespaceDefault, podListWatch) defer testApiServer.server.Close() // create executor with some data for static pods if set executor := util.NewExecutorInfo( util.NewExecutorID("executor-id"), util.NewCommandInfo("executor-cmd"), ) executor.Data = []byte{0, 1, 2} // create scheduler testScheduler := New(Config{ Executor: executor, Client: client.NewOrDie(&client.Config{Host: testApiServer.server.URL, Version: testapi.Version()}), ScheduleFunc: FCFSScheduleFunc, Schedcfg: *schedcfg.CreateDefaultConfig(), }) assert.NotNil(testScheduler.client, "client is nil") assert.NotNil(testScheduler.executor, "executor is nil") assert.NotNil(testScheduler.offers, "offer registry is nil") // create scheduler process schedulerProcess := ha.New(testScheduler) // get plugin config from it c := testScheduler.NewPluginConfig(schedulerProcess.Terminal(), http.DefaultServeMux, &podListWatch.ListWatch) assert.NotNil(c) // make events observable eventObserver := NewEventObserver() c.Recorder = eventObserver // create plugin p := NewPlugin(c) assert.NotNil(p) // run plugin p.Run(schedulerProcess.Terminal()) defer schedulerProcess.End() // init scheduler err := testScheduler.Init(schedulerProcess.Master(), p, http.DefaultServeMux) assert.NoError(err) // create mock mesos scheduler driver mockDriver := &joinableDriver{} mockDriver.On("Start").Return(mesos.Status_DRIVER_RUNNING, nil).Once() started := mockDriver.Upon() mAny := mock.AnythingOfType mockDriver.On("ReconcileTasks", mAny("[]*mesosproto.TaskStatus")).Return(mesos.Status_DRIVER_RUNNING, nil) mockDriver.On("SendFrameworkMessage", mAny("*mesosproto.ExecutorID"), mAny("*mesosproto.SlaveID"), mAny("string")). Return(mesos.Status_DRIVER_RUNNING, nil) launchedTasks := make(chan *mesos.TaskInfo, 1) launchTasksCalledFunc := func(args mock.Arguments) { taskInfos := args.Get(1).([]*mesos.TaskInfo) assert.Equal(1, len(taskInfos)) launchedTasks <- taskInfos[0] } mockDriver.On("LaunchTasks", mAny("[]*mesosproto.OfferID"), mAny("[]*mesosproto.TaskInfo"), mAny("*mesosproto.Filters")). Return(mesos.Status_DRIVER_RUNNING, nil).Run(launchTasksCalledFunc) // elect master with mock driver driverFactory := ha.DriverFactory(func() (bindings.SchedulerDriver, error) { return mockDriver, nil }) schedulerProcess.Elect(driverFactory) elected := schedulerProcess.Elected() // driver will be started <-started // tell scheduler to be registered testScheduler.Registered( mockDriver, util.NewFrameworkID("kubernetes-id"), util.NewMasterInfo("master-id", (192<<24)+(168<<16)+(0<<8)+1, 5050), ) // wait for being elected <-elected //TODO(jdef) refactor things above here into a test suite setup of some sort // fake new, unscheduled pod pod1 := NewTestPod(1) podListWatch.Add(pod1, true) // notify watchers // wait for failedScheduling event because there is no offer assert.EventWithReason(eventObserver, "failedScheduling", "failedScheduling event not received") // add some matching offer offers1 := []*mesos.Offer{NewTestOffer(1)} testScheduler.ResourceOffers(nil, offers1) // and wait for scheduled pod assert.EventWithReason(eventObserver, "scheduled") select { case launchedTask := <-launchedTasks: // report back that the task has been staged, and then started by mesos testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_STAGING)) testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_RUNNING)) // check that ExecutorInfo.data has the static pod data assert.Len(launchedTask.Executor.Data, 3) // report back that the task has been lost mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 0) testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_LOST)) // and wait that framework message is sent to executor mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 1) case <-time.After(5 * time.Second): t.Fatalf("timed out waiting for launchTasks call") } // start another pod podNum := 1 startPod := func(offers []*mesos.Offer) (*api.Pod, *mesos.TaskInfo) { podNum = podNum + 1 // create pod and matching offer pod := NewTestPod(podNum) podListWatch.Add(pod, true) // notify watchers testScheduler.ResourceOffers(mockDriver, offers) assert.EventWithReason(eventObserver, "scheduled") // wait for driver.launchTasks call select { case launchedTask := <-launchedTasks: testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_STAGING)) testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_RUNNING)) return pod, launchedTask case <-time.After(5 * time.Second): t.Fatal("timed out waiting for launchTasks") return nil, nil } } pod, launchedTask := startPod(offers1) // mock drvier.KillTask, should be invoked when a pod is deleted mockDriver.On("KillTask", mAny("*mesosproto.TaskID")).Return(mesos.Status_DRIVER_RUNNING, nil).Run(func(args mock.Arguments) { killedTaskId := *(args.Get(0).(*mesos.TaskID)) assert.Equal(*launchedTask.TaskId, killedTaskId, "expected same TaskID as during launch") }) killTaskCalled := mockDriver.Upon() // stop it again via the apiserver mock podListWatch.Delete(pod, true) // notify watchers // and wait for the driver killTask call with the correct TaskId select { case <-killTaskCalled: // report back that the task is finished testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_FINISHED)) case <-time.After(5 * time.Second): t.Fatal("timed out waiting for KillTask") } // start pods: // - which are failing while binding, // - leading to reconciliation // - with different states on the apiserver failPodFromExecutor := func(task *mesos.TaskInfo) { beforePodLookups := testApiServer.Stats(pod.Name) status := newTaskStatusForTask(task, mesos.TaskState_TASK_FAILED) message := messages.CreateBindingFailure status.Message = &message testScheduler.StatusUpdate(mockDriver, status) // wait until pod is looked up at the apiserver assertext.EventuallyTrue(t, time.Second, func() bool { return testApiServer.Stats(pod.Name) == beforePodLookups+1 }, "expect that reconcilePod will access apiserver for pod %v", pod.Name) } // 1. with pod deleted from the apiserver pod, launchedTask = startPod(offers1) podListWatch.Delete(pod, false) // not notifying the watchers failPodFromExecutor(launchedTask) // 2. with pod still on the apiserver, not bound pod, launchedTask = startPod(offers1) failPodFromExecutor(launchedTask) // 3. with pod still on the apiserver, bound i.e. host!="" pod, launchedTask = startPod(offers1) pod.Spec.NodeName = *offers1[0].Hostname podListWatch.Modify(pod, false) // not notifying the watchers failPodFromExecutor(launchedTask) // 4. with pod still on the apiserver, bound i.e. host!="", notified via ListWatch pod, launchedTask = startPod(offers1) pod.Spec.NodeName = *offers1[0].Hostname podListWatch.Modify(pod, true) // notifying the watchers time.Sleep(time.Second / 2) failPodFromExecutor(launchedTask) }
func TestReconciliationOnStartup(t *gotesting.T) { testScheduler := NewEtcdScheduler(3, 0, 0, true, []*mesos.CommandInfo_URI{}, false, 4096, 1, 256) mockdriver := &MockSchedulerDriver{ runningStatuses: make(chan *mesos.TaskStatus, 10), scheduler: testScheduler, } reconciliation := map[string]string{ "etcd-1": "slave-1", "etcd-2": "slave-2", "etcd-3": "slave-3", } testScheduler.reconciliationInfoFunc = func([]string, string, string) (map[string]string, error) { return reconciliation, nil } testScheduler.updateReconciliationInfoFunc = func(info map[string]string, _ []string, _ string, _ string) error { reconciliation = info return nil } // Valid reconciled tasks should be added to the running list. for _, taskStatus := range []*mesos.TaskStatus{ util.NewTaskStatus( util.NewTaskID("etcd-1 localhost 0 0 0"), mesos.TaskState_TASK_RUNNING, ), util.NewTaskStatus( util.NewTaskID("etcd-2 localhost 0 0 0"), mesos.TaskState_TASK_RUNNING, ), util.NewTaskStatus( util.NewTaskID("etcd-3 localhost 0 0 0"), mesos.TaskState_TASK_RUNNING, ), } { mockdriver.runningStatuses <- taskStatus } mockdriver.Lock() mockdriver.On( "ReconcileTasks", 0, ).Return(mesos.Status_DRIVER_RUNNING, nil).Once() mockdriver.On( "ReconcileTasks", 3, ).Return(mesos.Status_DRIVER_RUNNING, nil).Once() mockdriver.Unlock() masterInfo := util.NewMasterInfo("master-1", 0, 0) masterInfo.Hostname = proto.String("test-host") testScheduler.Registered( mockdriver, util.NewFrameworkID("framework-1"), masterInfo, ) time.Sleep(50 * time.Millisecond) mockdriver.Lock() defer mockdriver.Unlock() assert.Equal(t, 3, len(testScheduler.running), "Scheduler should reconcile tasks properly.") mockdriver.AssertExpectations(t) }