func TestExecutorDriverRunTaskEvent(t *testing.T) {
	setTestEnv(t)
	ch := make(chan bool, 2)
	// Mock Slave process to respond to registration event.
	server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
		reqPath, err := url.QueryUnescape(req.URL.String())
		assert.NoError(t, err)
		log.Infoln("RCVD request", reqPath)
		rsp.WriteHeader(http.StatusAccepted)
	})

	defer server.Close()

	exec := newTestExecutor(t)
	exec.ch = ch
	exec.t = t

	// start
	driver := newIntegrationTestDriver(t, exec)
	stat, err := driver.Start()
	assert.NoError(t, err)
	assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat)
	driver.setConnected(true)
	defer driver.Stop()

	// send runtask event to driver
	pbMsg := &mesos.RunTaskMessage{
		FrameworkId: util.NewFrameworkID(frameworkID),
		Framework: util.NewFrameworkInfo(
			"test", "test-framework-001", util.NewFrameworkID(frameworkID),
		),
		Pid: proto.String(server.PID.String()),
		Task: util.NewTaskInfo(
			"test-task",
			util.NewTaskID("test-task-001"),
			util.NewSlaveID(slaveID),
			[]*mesos.Resource{
				util.NewScalarResource("mem", 112),
				util.NewScalarResource("cpus", 2),
			},
		),
	}

	c := testutil.NewMockMesosClient(t, server.PID)
	c.SendMessage(driver.self, pbMsg)

	select {
	case <-ch:
	case <-time.After(time.Second * 2):
		log.Errorf("Tired of waiting...")
	}

}
Example #2
0
func TestOffer(t *testing.T) {
	offer := util.NewOffer(util.NewOfferID("487c73d8-9951-f23c-34bd-8085bfd30c49"), util.NewFrameworkID("20150903-065451-84125888-5050-10715-0053"),
		util.NewSlaveID("20150903-065451-84125888-5050-10715-S1"), "slave0")

	if Offer(offer) != "slave0#30c49" {
		t.Errorf(`util.NewOffer(util.NewOfferID("487c73d8-9951-f23c-34bd-8085bfd30c49"), util.NewFrameworkID("20150903-065451-84125888-5050-10715-0053"), util.NewSlaveID("20150903-065451-84125888-5050-10715-S1"), "slave0") != "slave0#30c49"; actual %s`, Offer(offer))
	}

	offer.Resources = []*mesos.Resource{util.NewScalarResource("cpus", 4), util.NewScalarResource("mem", 512), util.NewRangesResource("ports", []*mesos.Value_Range{util.NewValueRange(31000, 32000)})}
	if Offer(offer) != "slave0#30c49 cpus:4.00 mem:512.00 ports:[31000..32000]" {
		t.Errorf("Expected slave0#30c49 cpus:4.00 mem:512.00 ports:[31000..32000]; actual %s", Offer(offer))
	}

	offer.Attributes = []*mesos.Attribute{&mesos.Attribute{
		Name:   proto.String("rack"),
		Type:   mesos.Value_SCALAR.Enum(),
		Scalar: &mesos.Value_Scalar{Value: proto.Float64(2)},
	}}
	if Offer(offer) != "slave0#30c49 cpus:4.00 mem:512.00 ports:[31000..32000] rack:2.00" {
		t.Errorf("Expected slave0#30c49 cpus:4.00 mem:512.00 ports:[31000..32000] rack:2.00; actual %s", Offer(offer))
	}

	offer.Resources = nil
	if Offer(offer) != "slave0#30c49 rack:2.00" {
		t.Errorf("Expected slave0#30c49 rack:2.00; actual %s", Offer(offer))
	}
}
func createTestOffer(idSuffix string) *mesos.Offer {
	return util.NewOffer(
		util.NewOfferID("test-offer-"+idSuffix),
		util.NewFrameworkID("test-framework-"+idSuffix),
		util.NewSlaveID("test-slave-"+idSuffix),
		"localhost."+idSuffix,
	)
}
Example #4
0
func TestOffers(t *testing.T) {
	offer1 := util.NewOffer(util.NewOfferID("487c73d8-9951-f23c-34bd-8085bfd30c49"), util.NewFrameworkID("20150903-065451-84125888-5050-10715-0053"),
		util.NewSlaveID("20150903-065451-84125888-5050-10715-S1"), "slave0")
	offer1.Resources = []*mesos.Resource{util.NewScalarResource("cpus", 4), util.NewScalarResource("mem", 512), util.NewRangesResource("ports", []*mesos.Value_Range{util.NewValueRange(31000, 32000)})}

	offer2 := util.NewOffer(util.NewOfferID("26d5b34c-ef81-638d-5ad5-32c743c9c033"), util.NewFrameworkID("20150903-065451-84125888-5050-10715-0037"),
		util.NewSlaveID("20150903-065451-84125888-5050-10715-S0"), "master")
	offer2.Resources = []*mesos.Resource{util.NewScalarResource("cpus", 2), util.NewScalarResource("mem", 1024), util.NewRangesResource("ports", []*mesos.Value_Range{util.NewValueRange(4000, 7000)})}
	offer2.Attributes = []*mesos.Attribute{&mesos.Attribute{
		Name:   proto.String("rack"),
		Type:   mesos.Value_SCALAR.Enum(),
		Scalar: &mesos.Value_Scalar{Value: proto.Float64(2)},
	}}

	offers := Offers([]*mesos.Offer{offer1, offer2})
	if len(strings.Split(offers, "\n")) != 2 {
		t.Errorf("Offers([]*mesos.Offer{offer1, offer2}) should contain two offers split by new line, actual: %s", offers)
	}
}
Example #5
0
func TestStartup(t *gotesting.T) {
	mockdriver := &MockSchedulerDriver{}
	testScheduler := NewEtcdScheduler(1, 0, 0, false, []*mesos.CommandInfo_URI{}, false, 4096, 1, 256)
	testScheduler.running = map[string]*config.Node{
		"etcd-1": nil,
		"etcd-2": nil,
	}

	reconciliation := map[string]string{
		"etcd-1": "slave-1",
		"etcd-2": "slave-2",
	}
	testScheduler.reconciliationInfoFunc = func([]string, string, string) (map[string]string, error) {
		return reconciliation, nil
	}
	testScheduler.updateReconciliationInfoFunc = func(info map[string]string, _ []string, _ string, _ string) error {
		reconciliation = info
		return nil
	}

	// On registration, ReconcileTasks should be called.
	mockdriver.Lock()
	mockdriver.On(
		"ReconcileTasks",
		0,
	).Return(mesos.Status_DRIVER_RUNNING, nil).Once()
	mockdriver.On(
		"ReconcileTasks",
		2,
	).Return(mesos.Status_DRIVER_RUNNING, nil).Once()
	mockdriver.Unlock()

	masterInfo := util.NewMasterInfo("master-1", 0, 0)
	masterInfo.Hostname = proto.String("test-host")

	testScheduler.Registered(
		mockdriver,
		util.NewFrameworkID("framework-1"),
		masterInfo,
	)

	assert.Equal(t, Immutable, testScheduler.state,
		"Scheduler should be placed in the Immutable state after registration "+
			"as we wait for status updates to arrive in response to ReconcileTasks.")

	assert.Equal(t, 0, len(testScheduler.running),
		"Scheduler's running list should be cleared on registration, "+
			"to be populated by ReconcileTasks.")

	time.Sleep(50 * time.Millisecond)
	mockdriver.Lock()
	defer mockdriver.Unlock()
	mockdriver.AssertExpectations(t)
}
func (s *SchedulerTestSuiteCore) SetupTest() {
	s.master = "127.0.0.1:8080"
	s.masterUpid = "master(2)@" + s.master
	s.masterId = "some-master-id-uuid"
	s.frameworkID = "some-framework-id-uuid"
	s.framework = util.NewFrameworkInfo(
		"test-user",
		"test-name",
		util.NewFrameworkID(s.frameworkID),
	)
}
func TestExecutorDriverExecutorRegisteredEvent(t *testing.T) {
	setTestEnv(t)
	ch := make(chan bool, 2)
	// Mock Slave process to respond to registration event.
	server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
		reqPath, err := url.QueryUnescape(req.URL.String())
		assert.NoError(t, err)
		log.Infoln("RCVD request", reqPath)
		rsp.WriteHeader(http.StatusAccepted)
	})

	defer server.Close()

	exec := newTestExecutor(t)
	exec.ch = ch
	exec.t = t

	// start
	driver := newIntegrationTestDriver(t, exec)
	stat, err := driver.Start()
	assert.NoError(t, err)
	assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat)
	defer driver.Stop()

	//simulate sending ExecutorRegisteredMessage from server to exec pid.
	pbMsg := &mesos.ExecutorRegisteredMessage{
		ExecutorInfo:  util.NewExecutorInfo(util.NewExecutorID(executorID), util.NewCommandInfo("ls -l")),
		FrameworkId:   util.NewFrameworkID(frameworkID),
		FrameworkInfo: util.NewFrameworkInfo("test", "test-framework", util.NewFrameworkID(frameworkID)),
		SlaveId:       util.NewSlaveID(slaveID),
		SlaveInfo:     &mesos.SlaveInfo{Hostname: proto.String("localhost")},
	}
	c := testutil.NewMockMesosClient(t, server.PID)
	connected := driver.connectionListener()
	c.SendMessage(driver.self, pbMsg)
	select {
	case <-connected:
	case <-time.After(time.Second * 1):
		log.Errorf("Tired of waiting...")
	}
}
Example #8
0
func TestFilterResources(t *testing.T) {
	rf := ResourceFilter{}
	o := util.NewOffer(util.NewOfferID("offerid"), util.NewFrameworkID("frameworkid"), util.NewSlaveID("slaveId"), "hostname")
	o.Resources = []*mesos.Resource{
		util.NewScalarResource("name", 1.0),
		util.NewScalarResource("ub0r-resource", 2.0),
		util.NewScalarResource("ub0r-resource", 3.0),
	}

	res := rf.FilterResources(o, "ub0r-resource")

	assert.Equal(t, 2, len(res))
	assert.Equal(t, "ub0r-resource", res[0].GetName())
}
func (s *Scheduler) Start() error {
	Logger.Infof("Starting scheduler with configuration: \n%s", Config)
	sched = s // set this scheduler reachable for http server

	ctrlc := make(chan os.Signal, 1)
	signal.Notify(ctrlc, os.Interrupt)

	s.cluster = NewCluster()
	s.cluster.Load()

	s.httpServer = NewHttpServer(Config.Api)
	go s.httpServer.Start()

	frameworkInfo := &mesos.FrameworkInfo{
		User:            proto.String(Config.User),
		Name:            proto.String(Config.FrameworkName),
		Role:            proto.String(Config.FrameworkRole),
		FailoverTimeout: proto.Float64(float64(Config.FrameworkTimeout / 1e9)),
		Checkpoint:      proto.Bool(true),
	}

	if s.cluster.frameworkID != "" {
		frameworkInfo.Id = util.NewFrameworkID(s.cluster.frameworkID)
	}

	driverConfig := scheduler.DriverConfig{
		Scheduler: s,
		Framework: frameworkInfo,
		Master:    Config.Master,
	}

	driver, err := scheduler.NewMesosSchedulerDriver(driverConfig)
	s.schedulerDriver = driver

	if err != nil {
		return fmt.Errorf("Unable to create SchedulerDriver: %s", err)
	}

	go func() {
		if stat, err := driver.Run(); err != nil {
			Logger.Infof("Framework stopped with status %s and error: %s\n", stat.String(), err)
			panic(err)
		}
	}()

	<-ctrlc
	return nil
}
Example #10
0
func NewOffer(id string) *mesos.Offer {
	return &mesos.Offer{
		Id:          util.NewOfferID(id),
		FrameworkId: util.NewFrameworkID("test-etcd-framework"),
		SlaveId:     util.NewSlaveID("slave-" + id),
		Hostname:    proto.String("localhost"),
		Resources: []*mesos.Resource{
			util.NewScalarResource("cpus", 1),
			util.NewScalarResource("mem", 256),
			util.NewScalarResource("disk", 4096),
			util.NewRangesResource("ports", []*mesos.Value_Range{
				util.NewValueRange(uint64(0), uint64(65535)),
			}),
		},
	}
}
Example #11
0
func (s *StackDeployScheduler) Start() error {
	Logger.Info("Starting scheduler")

	frameworkInfo := &mesos.FrameworkInfo{
		User:            proto.String(s.User),
		Name:            proto.String(s.FrameworkName),
		Role:            proto.String(s.FrameworkRole),
		FailoverTimeout: proto.Float64(float64(s.FailoverTimeout / 1e9)),
		Checkpoint:      proto.Bool(true),
	}

	if s.Storage.FrameworkID != "" {
		frameworkInfo.Id = mesosutil.NewFrameworkID(s.Storage.FrameworkID)
	}

	driverConfig := scheduler.DriverConfig{
		Scheduler: s,
		Framework: frameworkInfo,
		Master:    s.Master,
	}

	driver, err := scheduler.NewMesosSchedulerDriver(driverConfig)

	if err != nil {
		return fmt.Errorf("Unable to create SchedulerDriver: %s", err)
	}

	go func() {
		if stat, err := driver.Run(); err != nil {
			Logger.Info("Framework stopped with status %s and error: %s", stat.String(), err)
			panic(err)
		}
	}()

	s.Cron.Start()
	go func() {
		for {
			Logger.Info("Cron entries: %v\n", s.Cron.Entries())
			for _, entry := range s.Cron.Entries() {
				Logger.Info("Entry: %v, %v, %v, %v", entry.Prev, entry.Next, entry.Schedule, entry.Job)
			}
			time.Sleep(10 * time.Second)
		}
	}()

	return nil
}
func TestExecutorDriverFrameworkToExecutorMessageEvent(t *testing.T) {
	setTestEnv(t)
	ch := make(chan bool, 2)
	// Mock Slave process to respond to registration event.
	server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
		reqPath, err := url.QueryUnescape(req.URL.String())
		assert.NoError(t, err)
		log.Infoln("RCVD request", reqPath)
		rsp.WriteHeader(http.StatusAccepted)
	})

	defer server.Close()

	exec := newTestExecutor(t)
	exec.ch = ch
	exec.t = t

	// start
	driver := newIntegrationTestDriver(t, exec)
	stat, err := driver.Start()
	assert.NoError(t, err)
	assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat)
	driver.setConnected(true)
	defer driver.Stop()

	// send runtask event to driver
	pbMsg := &mesos.FrameworkToExecutorMessage{
		SlaveId:     util.NewSlaveID(slaveID),
		ExecutorId:  util.NewExecutorID(executorID),
		FrameworkId: util.NewFrameworkID(frameworkID),
		Data:        []byte("Hello-Test"),
	}

	c := testutil.NewMockMesosClient(t, server.PID)
	c.SendMessage(driver.self, pbMsg)

	select {
	case <-ch:
	case <-time.After(time.Second * 1):
		log.Errorf("Tired of waiting...")
	}
}
Example #13
0
func (s *SchedulerServer) fetchFrameworkID(client tools.EtcdGetSet) (*mesos.FrameworkID, error) {
	if s.FailoverTimeout > 0 {
		if response, err := client.Get(meta.FrameworkIDKey, false, false); err != nil {
			if !tools.IsEtcdNotFound(err) {
				return nil, fmt.Errorf("unexpected failure attempting to load framework ID from etcd: %v", err)
			}
			log.V(1).Infof("did not find framework ID in etcd")
		} else if response.Node.Value != "" {
			log.Infof("configuring FrameworkInfo with Id found in etcd: '%s'", response.Node.Value)
			return mutil.NewFrameworkID(response.Node.Value), nil
		}
	} else {
		//TODO(jdef) this seems like a totally hackish way to clean up the framework ID
		if _, err := client.Delete(meta.FrameworkIDKey, true); err != nil {
			if !tools.IsEtcdNotFound(err) {
				return nil, fmt.Errorf("failed to delete framework ID from etcd: %v", err)
			}
			log.V(1).Infof("nothing to delete: did not find framework ID in etcd")
		}
	}
	return nil, nil
}
func TestExecutorDriverStatusUpdateAcknowledgement(t *testing.T) {
	setTestEnv(t)
	ch := make(chan bool, 2)
	// Mock Slave process to respond to registration event.
	server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) {
		reqPath, err := url.QueryUnescape(req.URL.String())
		assert.NoError(t, err)
		log.Infoln("RCVD request", reqPath)
		rsp.WriteHeader(http.StatusAccepted)
	})

	defer server.Close()

	exec := newTestExecutor(t)
	exec.ch = ch
	exec.t = t

	// start
	driver := newIntegrationTestDriver(t, exec)
	stat, err := driver.Start()
	assert.NoError(t, err)
	assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat)
	driver.setConnected(true)
	defer driver.Stop()

	// send ACK from server
	pbMsg := &mesos.StatusUpdateAcknowledgementMessage{
		SlaveId:     util.NewSlaveID(slaveID),
		FrameworkId: util.NewFrameworkID(frameworkID),
		TaskId:      util.NewTaskID("test-task-001"),
		Uuid:        []byte(uuid.NewRandom().String()),
	}

	c := testutil.NewMockMesosClient(t, server.PID)
	c.SendMessage(driver.self, pbMsg)
	<-time.After(time.Second * 1)
}
Example #15
0
// Test to create the scheduler plugin with the config returned by the scheduler,
// and play through the whole life cycle of the plugin while creating pods, deleting
// and failing them.
func TestPlugin_LifeCycle(t *testing.T) {
	t.Skip("This test is flaky, see #11901")
	assert := &EventAssertions{*assert.New(t)}

	// create a fake pod watch. We use that below to submit new pods to the scheduler
	podListWatch := NewMockPodsListWatch(api.PodList{})

	// create fake apiserver
	testApiServer := NewTestServer(t, api.NamespaceDefault, podListWatch)
	defer testApiServer.server.Close()

	// create executor with some data for static pods if set
	executor := util.NewExecutorInfo(
		util.NewExecutorID("executor-id"),
		util.NewCommandInfo("executor-cmd"),
	)
	executor.Data = []byte{0, 1, 2}

	// create scheduler
	nodeStore := cache.NewStore(cache.MetaNamespaceKeyFunc)
	as := NewAllocationStrategy(
		podtask.DefaultPredicate,
		podtask.NewDefaultProcurement(mresource.DefaultDefaultContainerCPULimit, mresource.DefaultDefaultContainerMemLimit))
	testScheduler := New(Config{
		Executor: executor,
		Client:   client.NewOrDie(&client.Config{Host: testApiServer.server.URL, Version: testapi.Default.Version()}),
		Scheduler: NewFCFSPodScheduler(as, func(node string) *api.Node {
			obj, _, _ := nodeStore.GetByKey(node)
			if obj == nil {
				return nil
			}
			return obj.(*api.Node)
		}),
		Schedcfg: *schedcfg.CreateDefaultConfig(),
	})

	assert.NotNil(testScheduler.client, "client is nil")
	assert.NotNil(testScheduler.executor, "executor is nil")
	assert.NotNil(testScheduler.offers, "offer registry is nil")

	// create scheduler process
	schedulerProcess := ha.New(testScheduler)

	// get plugin config from it
	c := testScheduler.NewPluginConfig(schedulerProcess.Terminal(), http.DefaultServeMux, &podListWatch.ListWatch)
	assert.NotNil(c)

	// make events observable
	eventObserver := NewEventObserver()
	c.Recorder = eventObserver

	// create plugin
	p := NewPlugin(c).(*schedulingPlugin)
	assert.NotNil(p)

	// run plugin
	p.Run(schedulerProcess.Terminal())
	defer schedulerProcess.End()

	// init scheduler
	err := testScheduler.Init(schedulerProcess.Master(), p, http.DefaultServeMux)
	assert.NoError(err)

	// create mock mesos scheduler driver
	mockDriver := &joinableDriver{}
	mockDriver.On("Start").Return(mesos.Status_DRIVER_RUNNING, nil).Once()
	started := mockDriver.Upon()

	mAny := mock.AnythingOfType
	mockDriver.On("ReconcileTasks", mAny("[]*mesosproto.TaskStatus")).Return(mesos.Status_DRIVER_RUNNING, nil)
	mockDriver.On("SendFrameworkMessage", mAny("*mesosproto.ExecutorID"), mAny("*mesosproto.SlaveID"), mAny("string")).
		Return(mesos.Status_DRIVER_RUNNING, nil)

	type LaunchedTask struct {
		offerId  mesos.OfferID
		taskInfo *mesos.TaskInfo
	}
	launchedTasks := make(chan LaunchedTask, 1)
	launchTasksCalledFunc := func(args mock.Arguments) {
		offerIDs := args.Get(0).([]*mesos.OfferID)
		taskInfos := args.Get(1).([]*mesos.TaskInfo)
		assert.Equal(1, len(offerIDs))
		assert.Equal(1, len(taskInfos))
		launchedTasks <- LaunchedTask{
			offerId:  *offerIDs[0],
			taskInfo: taskInfos[0],
		}
	}
	mockDriver.On("LaunchTasks", mAny("[]*mesosproto.OfferID"), mAny("[]*mesosproto.TaskInfo"), mAny("*mesosproto.Filters")).
		Return(mesos.Status_DRIVER_RUNNING, nil).Run(launchTasksCalledFunc)
	mockDriver.On("DeclineOffer", mAny("*mesosproto.OfferID"), mAny("*mesosproto.Filters")).
		Return(mesos.Status_DRIVER_RUNNING, nil)

	// elect master with mock driver
	driverFactory := ha.DriverFactory(func() (bindings.SchedulerDriver, error) {
		return mockDriver, nil
	})
	schedulerProcess.Elect(driverFactory)
	elected := schedulerProcess.Elected()

	// driver will be started
	<-started

	// tell scheduler to be registered
	testScheduler.Registered(
		mockDriver,
		util.NewFrameworkID("kubernetes-id"),
		util.NewMasterInfo("master-id", (192<<24)+(168<<16)+(0<<8)+1, 5050),
	)

	// wait for being elected
	<-elected

	//TODO(jdef) refactor things above here into a test suite setup of some sort

	// fake new, unscheduled pod
	pod, i := NewTestPod()
	podListWatch.Add(pod, true) // notify watchers

	// wait for failedScheduling event because there is no offer
	assert.EventWithReason(eventObserver, "failedScheduling", "failedScheduling event not received")

	// add some matching offer
	offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))}
	testScheduler.ResourceOffers(nil, offers)

	// and wait for scheduled pod
	assert.EventWithReason(eventObserver, "scheduled")
	select {
	case launchedTask := <-launchedTasks:
		// report back that the task has been staged, and then started by mesos
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_STAGING))
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_RUNNING))

		// check that ExecutorInfo.data has the static pod data
		assert.Len(launchedTask.taskInfo.Executor.Data, 3)

		// report back that the task has been lost
		mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 0)
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_LOST))

		// and wait that framework message is sent to executor
		mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 1)

	case <-time.After(5 * time.Second):
		t.Fatalf("timed out waiting for launchTasks call")
	}

	// Launch a pod and wait until the scheduler driver is called
	schedulePodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) {
		// wait for failedScheduling event because there is no offer
		assert.EventWithReason(eventObserver, "failedScheduling", "failedScheduling event not received")

		// supply a matching offer
		testScheduler.ResourceOffers(mockDriver, offers)

		// and wait to get scheduled
		assert.EventWithReason(eventObserver, "scheduled")

		// wait for driver.launchTasks call
		select {
		case launchedTask := <-launchedTasks:
			for _, offer := range offers {
				if offer.Id.GetValue() == launchedTask.offerId.GetValue() {
					return pod, &launchedTask, offer
				}
			}
			t.Fatalf("unknown offer used to start a pod")
			return nil, nil, nil
		case <-time.After(5 * time.Second):
			t.Fatal("timed out waiting for launchTasks")
			return nil, nil, nil
		}
	}
	// Launch a pod and wait until the scheduler driver is called
	launchPodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) {
		podListWatch.Add(pod, true)
		return schedulePodWithOffers(pod, offers)
	}

	// Launch a pod, wait until the scheduler driver is called and report back that it is running
	startPodWithOffers := func(pod *api.Pod, offers []*mesos.Offer) (*api.Pod, *LaunchedTask, *mesos.Offer) {
		// notify about pod, offer resources and wait for scheduling
		pod, launchedTask, offer := launchPodWithOffers(pod, offers)
		if pod != nil {
			// report back status
			testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_STAGING))
			testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_RUNNING))
			return pod, launchedTask, offer
		}

		return nil, nil, nil
	}

	startTestPod := func() (*api.Pod, *LaunchedTask, *mesos.Offer) {
		pod, i := NewTestPod()
		offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))}
		return startPodWithOffers(pod, offers)
	}

	// start another pod
	pod, launchedTask, _ := startTestPod()

	// mock drvier.KillTask, should be invoked when a pod is deleted
	mockDriver.On("KillTask", mAny("*mesosproto.TaskID")).Return(mesos.Status_DRIVER_RUNNING, nil).Run(func(args mock.Arguments) {
		killedTaskId := *(args.Get(0).(*mesos.TaskID))
		assert.Equal(*launchedTask.taskInfo.TaskId, killedTaskId, "expected same TaskID as during launch")
	})
	killTaskCalled := mockDriver.Upon()

	// stop it again via the apiserver mock
	podListWatch.Delete(pod, true) // notify watchers

	// and wait for the driver killTask call with the correct TaskId
	select {
	case <-killTaskCalled:
		// report back that the task is finished
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask.taskInfo, mesos.TaskState_TASK_FINISHED))

	case <-time.After(5 * time.Second):
		t.Fatal("timed out waiting for KillTask")
	}

	// start a pod with on a given NodeName and check that it is scheduled to the right host
	pod, i = NewTestPod()
	pod.Spec.NodeName = "hostname1"
	offers = []*mesos.Offer{}
	for j := 0; j < 3; j++ {
		offer := NewTestOffer(fmt.Sprintf("offer%d_%d", i, j))
		hostname := fmt.Sprintf("hostname%d", j)
		offer.Hostname = &hostname
		offers = append(offers, offer)
	}

	_, _, usedOffer := startPodWithOffers(pod, offers)

	assert.Equal(offers[1].Id.GetValue(), usedOffer.Id.GetValue())
	assert.Equal(pod.Spec.NodeName, *usedOffer.Hostname)

	testScheduler.OfferRescinded(mockDriver, offers[0].Id)
	testScheduler.OfferRescinded(mockDriver, offers[2].Id)

	// start pods:
	// - which are failing while binding,
	// - leading to reconciliation
	// - with different states on the apiserver

	failPodFromExecutor := func(task *mesos.TaskInfo) {
		beforePodLookups := testApiServer.Stats(pod.Name)
		status := newTaskStatusForTask(task, mesos.TaskState_TASK_FAILED)
		message := messages.CreateBindingFailure
		status.Message = &message
		testScheduler.StatusUpdate(mockDriver, status)

		// wait until pod is looked up at the apiserver
		assertext.EventuallyTrue(t, time.Second, func() bool {
			return testApiServer.Stats(pod.Name) == beforePodLookups+1
		}, "expect that reconcileTask will access apiserver for pod %v", pod.Name)
	}

	launchTestPod := func() (*api.Pod, *LaunchedTask, *mesos.Offer) {
		pod, i := NewTestPod()
		offers := []*mesos.Offer{NewTestOffer(fmt.Sprintf("offer%d", i))}
		return launchPodWithOffers(pod, offers)
	}

	// 1. with pod deleted from the apiserver
	//    expected: pod is removed from internal task registry
	pod, launchedTask, _ = launchTestPod()
	podListWatch.Delete(pod, false) // not notifying the watchers
	failPodFromExecutor(launchedTask.taskInfo)

	podKey, _ := podtask.MakePodKey(api.NewDefaultContext(), pod.Name)
	assertext.EventuallyTrue(t, time.Second, func() bool {
		t, _ := p.api.tasks().ForPod(podKey)
		return t == nil
	})

	// 2. with pod still on the apiserver, not bound
	//    expected: pod is rescheduled
	pod, launchedTask, _ = launchTestPod()
	failPodFromExecutor(launchedTask.taskInfo)

	retryOffers := []*mesos.Offer{NewTestOffer("retry-offer")}
	schedulePodWithOffers(pod, retryOffers)

	// 3. with pod still on the apiserver, bound, notified via ListWatch
	// expected: nothing, pod updates not supported, compare ReconcileTask function
	pod, launchedTask, usedOffer = startTestPod()
	pod.Annotations = map[string]string{
		meta.BindingHostKey: *usedOffer.Hostname,
	}
	pod.Spec.NodeName = *usedOffer.Hostname
	podListWatch.Modify(pod, true) // notifying the watchers
	time.Sleep(time.Second / 2)
	failPodFromExecutor(launchedTask.taskInfo)
}
Example #16
0
func (lt lifecycleTest) Start() <-chan LaunchedTask {
	assert := &EventAssertions{*assert.New(lt.t)}
	lt.sched.Run(lt.schedulerProc.Terminal())

	// init framework
	err := lt.framework.Init(
		lt.sched,
		lt.schedulerProc.Master(),
		http.DefaultServeMux,
	)
	assert.NoError(err)

	lt.driver.On("Start").Return(mesos.Status_DRIVER_RUNNING, nil).Once()
	started := lt.driver.Upon()

	lt.driver.On("ReconcileTasks",
		mock.AnythingOfType("[]*mesosproto.TaskStatus"),
	).Return(mesos.Status_DRIVER_RUNNING, nil)

	lt.driver.On("SendFrameworkMessage",
		mock.AnythingOfType("*mesosproto.ExecutorID"),
		mock.AnythingOfType("*mesosproto.SlaveID"),
		mock.AnythingOfType("string"),
	).Return(mesos.Status_DRIVER_RUNNING, nil)

	launchedTasks := make(chan LaunchedTask, 1)
	launchTasksFunc := func(args mock.Arguments) {
		offerIDs := args.Get(0).([]*mesos.OfferID)
		taskInfos := args.Get(1).([]*mesos.TaskInfo)
		assert.Equal(1, len(offerIDs))
		assert.Equal(1, len(taskInfos))

		launchedTasks <- LaunchedTask{
			offerId:  *offerIDs[0],
			taskInfo: taskInfos[0],
		}
	}

	lt.driver.On("LaunchTasks",
		mock.AnythingOfType("[]*mesosproto.OfferID"),
		mock.AnythingOfType("[]*mesosproto.TaskInfo"),
		mock.AnythingOfType("*mesosproto.Filters"),
	).Return(mesos.Status_DRIVER_RUNNING, nil).Run(launchTasksFunc)

	lt.driver.On("DeclineOffer",
		mock.AnythingOfType("*mesosproto.OfferID"),
		mock.AnythingOfType("*mesosproto.Filters"),
	).Return(mesos.Status_DRIVER_RUNNING, nil)

	// elect master with mock driver
	driverFactory := ha.DriverFactory(func() (bindings.SchedulerDriver, error) {
		return lt.driver, nil
	})
	lt.schedulerProc.Elect(driverFactory)
	elected := lt.schedulerProc.Elected()

	// driver will be started
	<-started

	// tell scheduler to be registered
	lt.framework.Registered(
		lt.driver,
		mesosutil.NewFrameworkID("kubernetes-id"),
		mesosutil.NewMasterInfo("master-id", (192<<24)+(168<<16)+(0<<8)+1, 5050),
	)

	// wait for being elected
	<-elected
	return launchedTasks
}
Example #17
0
// Test to create the scheduler plugin with the config returned by the scheduler,
// and play through the whole life cycle of the plugin while creating pods, deleting
// and failing them.
func TestPlugin_LifeCycle(t *testing.T) {
	t.Skip("disabled due to flakiness; see #10795")
	assert := &EventAssertions{*assert.New(t)}

	// create a fake pod watch. We use that below to submit new pods to the scheduler
	podListWatch := NewMockPodsListWatch(api.PodList{})

	// create fake apiserver
	testApiServer := NewTestServer(t, api.NamespaceDefault, podListWatch)
	defer testApiServer.server.Close()

	// create executor with some data for static pods if set
	executor := util.NewExecutorInfo(
		util.NewExecutorID("executor-id"),
		util.NewCommandInfo("executor-cmd"),
	)
	executor.Data = []byte{0, 1, 2}

	// create scheduler
	testScheduler := New(Config{
		Executor:     executor,
		Client:       client.NewOrDie(&client.Config{Host: testApiServer.server.URL, Version: testapi.Version()}),
		ScheduleFunc: FCFSScheduleFunc,
		Schedcfg:     *schedcfg.CreateDefaultConfig(),
	})

	assert.NotNil(testScheduler.client, "client is nil")
	assert.NotNil(testScheduler.executor, "executor is nil")
	assert.NotNil(testScheduler.offers, "offer registry is nil")

	// create scheduler process
	schedulerProcess := ha.New(testScheduler)

	// get plugin config from it
	c := testScheduler.NewPluginConfig(schedulerProcess.Terminal(), http.DefaultServeMux, &podListWatch.ListWatch)
	assert.NotNil(c)

	// make events observable
	eventObserver := NewEventObserver()
	c.Recorder = eventObserver

	// create plugin
	p := NewPlugin(c)
	assert.NotNil(p)

	// run plugin
	p.Run(schedulerProcess.Terminal())
	defer schedulerProcess.End()

	// init scheduler
	err := testScheduler.Init(schedulerProcess.Master(), p, http.DefaultServeMux)
	assert.NoError(err)

	// create mock mesos scheduler driver
	mockDriver := &joinableDriver{}
	mockDriver.On("Start").Return(mesos.Status_DRIVER_RUNNING, nil).Once()
	started := mockDriver.Upon()

	mAny := mock.AnythingOfType
	mockDriver.On("ReconcileTasks", mAny("[]*mesosproto.TaskStatus")).Return(mesos.Status_DRIVER_RUNNING, nil)
	mockDriver.On("SendFrameworkMessage", mAny("*mesosproto.ExecutorID"), mAny("*mesosproto.SlaveID"), mAny("string")).
		Return(mesos.Status_DRIVER_RUNNING, nil)

	launchedTasks := make(chan *mesos.TaskInfo, 1)
	launchTasksCalledFunc := func(args mock.Arguments) {
		taskInfos := args.Get(1).([]*mesos.TaskInfo)
		assert.Equal(1, len(taskInfos))
		launchedTasks <- taskInfos[0]
	}
	mockDriver.On("LaunchTasks", mAny("[]*mesosproto.OfferID"), mAny("[]*mesosproto.TaskInfo"), mAny("*mesosproto.Filters")).
		Return(mesos.Status_DRIVER_RUNNING, nil).Run(launchTasksCalledFunc)

	// elect master with mock driver
	driverFactory := ha.DriverFactory(func() (bindings.SchedulerDriver, error) {
		return mockDriver, nil
	})
	schedulerProcess.Elect(driverFactory)
	elected := schedulerProcess.Elected()

	// driver will be started
	<-started

	// tell scheduler to be registered
	testScheduler.Registered(
		mockDriver,
		util.NewFrameworkID("kubernetes-id"),
		util.NewMasterInfo("master-id", (192<<24)+(168<<16)+(0<<8)+1, 5050),
	)

	// wait for being elected
	<-elected

	//TODO(jdef) refactor things above here into a test suite setup of some sort

	// fake new, unscheduled pod
	pod1 := NewTestPod(1)
	podListWatch.Add(pod1, true) // notify watchers

	// wait for failedScheduling event because there is no offer
	assert.EventWithReason(eventObserver, "failedScheduling", "failedScheduling event not received")

	// add some matching offer
	offers1 := []*mesos.Offer{NewTestOffer(1)}
	testScheduler.ResourceOffers(nil, offers1)

	// and wait for scheduled pod
	assert.EventWithReason(eventObserver, "scheduled")
	select {
	case launchedTask := <-launchedTasks:
		// report back that the task has been staged, and then started by mesos
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_STAGING))
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_RUNNING))

		// check that ExecutorInfo.data has the static pod data
		assert.Len(launchedTask.Executor.Data, 3)

		// report back that the task has been lost
		mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 0)
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_LOST))

		// and wait that framework message is sent to executor
		mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 1)

	case <-time.After(5 * time.Second):
		t.Fatalf("timed out waiting for launchTasks call")
	}

	// start another pod
	podNum := 1
	startPod := func(offers []*mesos.Offer) (*api.Pod, *mesos.TaskInfo) {
		podNum = podNum + 1

		// create pod and matching offer
		pod := NewTestPod(podNum)
		podListWatch.Add(pod, true) // notify watchers
		testScheduler.ResourceOffers(mockDriver, offers)
		assert.EventWithReason(eventObserver, "scheduled")

		// wait for driver.launchTasks call
		select {
		case launchedTask := <-launchedTasks:
			testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_STAGING))
			testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_RUNNING))
			return pod, launchedTask

		case <-time.After(5 * time.Second):
			t.Fatal("timed out waiting for launchTasks")
			return nil, nil
		}
	}

	pod, launchedTask := startPod(offers1)

	// mock drvier.KillTask, should be invoked when a pod is deleted
	mockDriver.On("KillTask", mAny("*mesosproto.TaskID")).Return(mesos.Status_DRIVER_RUNNING, nil).Run(func(args mock.Arguments) {
		killedTaskId := *(args.Get(0).(*mesos.TaskID))
		assert.Equal(*launchedTask.TaskId, killedTaskId, "expected same TaskID as during launch")
	})
	killTaskCalled := mockDriver.Upon()

	// stop it again via the apiserver mock
	podListWatch.Delete(pod, true) // notify watchers

	// and wait for the driver killTask call with the correct TaskId
	select {
	case <-killTaskCalled:
		// report back that the task is finished
		testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_FINISHED))

	case <-time.After(5 * time.Second):
		t.Fatal("timed out waiting for KillTask")
	}

	// start pods:
	// - which are failing while binding,
	// - leading to reconciliation
	// - with different states on the apiserver

	failPodFromExecutor := func(task *mesos.TaskInfo) {
		beforePodLookups := testApiServer.Stats(pod.Name)
		status := newTaskStatusForTask(task, mesos.TaskState_TASK_FAILED)
		message := messages.CreateBindingFailure
		status.Message = &message
		testScheduler.StatusUpdate(mockDriver, status)

		// wait until pod is looked up at the apiserver
		assertext.EventuallyTrue(t, time.Second, func() bool {
			return testApiServer.Stats(pod.Name) == beforePodLookups+1
		}, "expect that reconcilePod will access apiserver for pod %v", pod.Name)
	}

	// 1. with pod deleted from the apiserver
	pod, launchedTask = startPod(offers1)
	podListWatch.Delete(pod, false) // not notifying the watchers
	failPodFromExecutor(launchedTask)

	// 2. with pod still on the apiserver, not bound
	pod, launchedTask = startPod(offers1)
	failPodFromExecutor(launchedTask)

	// 3. with pod still on the apiserver, bound i.e. host!=""
	pod, launchedTask = startPod(offers1)
	pod.Spec.NodeName = *offers1[0].Hostname
	podListWatch.Modify(pod, false) // not notifying the watchers
	failPodFromExecutor(launchedTask)

	// 4. with pod still on the apiserver, bound i.e. host!="", notified via ListWatch
	pod, launchedTask = startPod(offers1)
	pod.Spec.NodeName = *offers1[0].Hostname
	podListWatch.Modify(pod, true) // notifying the watchers
	time.Sleep(time.Second / 2)
	failPodFromExecutor(launchedTask)
}
Example #18
0
func TestReconciliationOnStartup(t *gotesting.T) {
	testScheduler := NewEtcdScheduler(3, 0, 0, true, []*mesos.CommandInfo_URI{}, false, 4096, 1, 256)
	mockdriver := &MockSchedulerDriver{
		runningStatuses: make(chan *mesos.TaskStatus, 10),
		scheduler:       testScheduler,
	}

	reconciliation := map[string]string{
		"etcd-1": "slave-1",
		"etcd-2": "slave-2",
		"etcd-3": "slave-3",
	}
	testScheduler.reconciliationInfoFunc = func([]string, string, string) (map[string]string, error) {
		return reconciliation, nil
	}
	testScheduler.updateReconciliationInfoFunc = func(info map[string]string, _ []string, _ string, _ string) error {
		reconciliation = info
		return nil
	}
	// Valid reconciled tasks should be added to the running list.
	for _, taskStatus := range []*mesos.TaskStatus{
		util.NewTaskStatus(
			util.NewTaskID("etcd-1 localhost 0 0 0"),
			mesos.TaskState_TASK_RUNNING,
		),
		util.NewTaskStatus(
			util.NewTaskID("etcd-2 localhost 0 0 0"),
			mesos.TaskState_TASK_RUNNING,
		),
		util.NewTaskStatus(
			util.NewTaskID("etcd-3 localhost 0 0 0"),
			mesos.TaskState_TASK_RUNNING,
		),
	} {
		mockdriver.runningStatuses <- taskStatus
	}

	mockdriver.Lock()
	mockdriver.On(
		"ReconcileTasks",
		0,
	).Return(mesos.Status_DRIVER_RUNNING, nil).Once()

	mockdriver.On(
		"ReconcileTasks",
		3,
	).Return(mesos.Status_DRIVER_RUNNING, nil).Once()
	mockdriver.Unlock()

	masterInfo := util.NewMasterInfo("master-1", 0, 0)
	masterInfo.Hostname = proto.String("test-host")

	testScheduler.Registered(
		mockdriver,
		util.NewFrameworkID("framework-1"),
		masterInfo,
	)

	time.Sleep(50 * time.Millisecond)

	mockdriver.Lock()
	defer mockdriver.Unlock()
	assert.Equal(t, 3, len(testScheduler.running),
		"Scheduler should reconcile tasks properly.")

	mockdriver.AssertExpectations(t)
}