Example #1
0
// New creates a new KubernetesScheduler
func New(config Config) *KubernetesScheduler {
	var k *KubernetesScheduler
	k = &KubernetesScheduler{
		schedcfg:          &config.Schedcfg,
		RWMutex:           new(sync.RWMutex),
		executor:          config.Executor,
		executorGroup:     uid.Parse(config.Executor.ExecutorId.GetValue()).Group(),
		PodScheduler:      config.Scheduler,
		client:            config.Client,
		etcdClient:        config.EtcdClient,
		failoverTimeout:   config.FailoverTimeout,
		reconcileInterval: config.ReconcileInterval,
		nodeRegistrator:   node.NewRegistrator(config.Client, config.LookupNode),
		offers: offers.CreateRegistry(offers.RegistryConfig{
			Compat: func(o *mesos.Offer) bool {
				// the node must be registered and have up-to-date labels
				n := config.LookupNode(o.GetHostname())
				if n == nil || !node.IsUpToDate(n, node.SlaveAttributesToLabels(o.GetAttributes())) {
					return false
				}

				// the executor IDs must not identify a kubelet-executor with a group that doesn't match ours
				for _, eid := range o.GetExecutorIds() {
					execuid := uid.Parse(eid.GetValue())
					if execuid.Name() == execcfg.DefaultInfoID && execuid.Group() != k.executorGroup {
						return false
					}
				}

				return true
			},
			DeclineOffer: func(id string) <-chan error {
				errOnce := proc.NewErrorOnce(k.terminate)
				errOuter := k.asRegisteredMaster.Do(func() {
					var err error
					defer errOnce.Report(err)
					offerId := mutil.NewOfferID(id)
					filters := &mesos.Filters{}
					_, err = k.driver.DeclineOffer(offerId, filters)
				})
				return errOnce.Send(errOuter).Err()
			},
			// remember expired offers so that we can tell if a previously scheduler offer relies on one
			LingerTTL:     config.Schedcfg.OfferLingerTTL.Duration,
			TTL:           config.Schedcfg.OfferTTL.Duration,
			ListenerDelay: config.Schedcfg.ListenerDelay.Duration,
		}),
		slaveHostNames:    slave.NewRegistry(),
		taskRegistry:      podtask.NewInMemoryRegistry(),
		reconcileCooldown: config.ReconcileCooldown,
		registration:      make(chan struct{}),
		asRegisteredMaster: proc.DoerFunc(func(proc.Action) <-chan error {
			return proc.ErrorChanf("cannot execute action with unregistered scheduler")
		}),
	}
	return k
}
Example #2
0
//test that when a slave is lost we remove all offers
func TestSlave_Lost(t *testing.T) {
	assert := assert.New(t)

	//
	testFramework := &framework{
		offers: offers.CreateRegistry(offers.RegistryConfig{
			Compat: func(o *mesos.Offer) bool {
				return true
			},
			// remember expired offers so that we can tell if a previously scheduler offer relies on one
			LingerTTL:     schedcfg.DefaultOfferLingerTTL,
			TTL:           schedcfg.DefaultOfferTTL,
			ListenerDelay: schedcfg.DefaultListenerDelay,
		}),
		slaveHostNames: newSlaveRegistry(),
		sched:          mockScheduler(),
	}

	hostname := "h1"
	offer1 := &mesos.Offer{Id: util.NewOfferID("test1"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
	offers1 := []*mesos.Offer{offer1}
	testFramework.ResourceOffers(nil, offers1)
	offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
	offers2 := []*mesos.Offer{offer2}
	testFramework.ResourceOffers(nil, offers2)

	//add another offer from different slaveID
	hostname2 := "h2"
	offer3 := &mesos.Offer{Id: util.NewOfferID("test3"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)}
	offers3 := []*mesos.Offer{offer3}
	testFramework.ResourceOffers(nil, offers3)

	//test precondition
	assert.Equal(3, getNumberOffers(testFramework.offers))
	assert.Equal(2, len(testFramework.slaveHostNames.SlaveIDs()))

	//remove first slave
	testFramework.SlaveLost(nil, util.NewSlaveID(hostname))

	//offers should be removed
	assert.Equal(1, getNumberOffers(testFramework.offers))
	//slave hostnames should still be all present
	assert.Equal(2, len(testFramework.slaveHostNames.SlaveIDs()))

	//remove second slave
	testFramework.SlaveLost(nil, util.NewSlaveID(hostname2))

	//offers should be removed
	assert.Equal(0, getNumberOffers(testFramework.offers))
	//slave hostnames should still be all present
	assert.Equal(2, len(testFramework.slaveHostNames.SlaveIDs()))

	//try to remove non existing slave
	testFramework.SlaveLost(nil, util.NewSlaveID("notExist"))

}
Example #3
0
//test adding of ressource offer, should be added to offer registry and slavesf
func TestResourceOffer_Add_Rescind(t *testing.T) {
	assert := assert.New(t)

	testFramework := &framework{
		offers: offers.CreateRegistry(offers.RegistryConfig{
			Compat: func(o *mesos.Offer) bool {
				return true
			},
			DeclineOffer: func(offerId string) <-chan error {
				return proc.ErrorChan(nil)
			},
			// remember expired offers so that we can tell if a previously scheduler offer relies on one
			LingerTTL:     schedcfg.DefaultOfferLingerTTL,
			TTL:           schedcfg.DefaultOfferTTL,
			ListenerDelay: schedcfg.DefaultListenerDelay,
		}),
		slaveHostNames: newSlaveRegistry(),
		sched:          mockScheduler(),
	}

	hostname := "h1"
	offerID1 := util.NewOfferID("test1")
	offer1 := &mesos.Offer{Id: offerID1, Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
	offers1 := []*mesos.Offer{offer1}
	testFramework.ResourceOffers(nil, offers1)

	assert.Equal(1, getNumberOffers(testFramework.offers))

	//check slave hostname
	assert.Equal(1, len(testFramework.slaveHostNames.SlaveIDs()))

	//add another offer
	hostname2 := "h2"
	offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)}
	offers2 := []*mesos.Offer{offer2}
	testFramework.ResourceOffers(nil, offers2)

	assert.Equal(2, getNumberOffers(testFramework.offers))

	//check slave hostnames
	assert.Equal(2, len(testFramework.slaveHostNames.SlaveIDs()))

	//next whether offers can be rescinded
	testFramework.OfferRescinded(nil, offerID1)
	assert.Equal(1, getNumberOffers(testFramework.offers))

	//next whether offers can be rescinded
	testFramework.OfferRescinded(nil, util.NewOfferID("test2"))
	//walk offers again and check it is removed from registry
	assert.Equal(0, getNumberOffers(testFramework.offers))

	//remove non existing ID
	testFramework.OfferRescinded(nil, util.NewOfferID("notExist"))
}
Example #4
0
//test adding of ressource offer, should be added to offer registry and slaves
func TestResourceOffer_Add(t *testing.T) {
	assert := assert.New(t)

	registrator := &mockRegistrator{cache.NewStore(cache.MetaNamespaceKeyFunc)}
	testFramework := &framework{
		offers: offers.CreateRegistry(offers.RegistryConfig{
			Compat: func(o *mesos.Offer) bool {
				return true
			},
			DeclineOffer: func(offerId string) <-chan error {
				return proc.ErrorChan(nil)
			},
			// remember expired offers so that we can tell if a previously scheduler offer relies on one
			LingerTTL:     schedcfg.DefaultOfferLingerTTL,
			TTL:           schedcfg.DefaultOfferTTL,
			ListenerDelay: schedcfg.DefaultListenerDelay,
		}),
		slaveHostNames:  newSlaveRegistry(),
		nodeRegistrator: registrator,
		sched:           mockScheduler(),
	}

	hostname := "h1"
	offerID1 := util.NewOfferID("test1")
	offer1 := &mesos.Offer{Id: offerID1, Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
	offers1 := []*mesos.Offer{offer1}
	testFramework.ResourceOffers(nil, offers1)
	assert.Equal(1, len(registrator.store.List()))

	assert.Equal(1, getNumberOffers(testFramework.offers))
	//check slave hostname
	assert.Equal(1, len(testFramework.slaveHostNames.SlaveIDs()))

	//add another offer
	hostname2 := "h2"
	offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)}
	offers2 := []*mesos.Offer{offer2}
	testFramework.ResourceOffers(nil, offers2)

	//check it is stored in registry
	assert.Equal(2, getNumberOffers(testFramework.offers))

	//check slave hostnames
	assert.Equal(2, len(testFramework.slaveHostNames.SlaveIDs()))
}
Example #5
0
//test we can handle different status updates, TODO check state transitions
func TestStatus_Update(t *testing.T) {

	mockdriver := MockSchedulerDriver{}
	// setup expectations
	mockdriver.On("KillTask", util.NewTaskID("test-task-001")).Return(mesos.Status_DRIVER_RUNNING, nil)

	testFramework := &framework{
		offers: offers.CreateRegistry(offers.RegistryConfig{
			Compat: func(o *mesos.Offer) bool {
				return true
			},
			// remember expired offers so that we can tell if a previously scheduler offer relies on one
			LingerTTL:     schedcfg.DefaultOfferLingerTTL,
			TTL:           schedcfg.DefaultOfferTTL,
			ListenerDelay: schedcfg.DefaultListenerDelay,
		}),
		slaveHostNames: newSlaveRegistry(),
		driver:         &mockdriver,
		sched:          mockScheduler(),
	}

	taskStatus_task_starting := util.NewTaskStatus(
		util.NewTaskID("test-task-001"),
		mesos.TaskState_TASK_RUNNING,
	)
	testFramework.StatusUpdate(testFramework.driver, taskStatus_task_starting)

	taskStatus_task_running := util.NewTaskStatus(
		util.NewTaskID("test-task-001"),
		mesos.TaskState_TASK_RUNNING,
	)
	testFramework.StatusUpdate(testFramework.driver, taskStatus_task_running)

	taskStatus_task_failed := util.NewTaskStatus(
		util.NewTaskID("test-task-001"),
		mesos.TaskState_TASK_FAILED,
	)
	testFramework.StatusUpdate(testFramework.driver, taskStatus_task_failed)

	//assert that mock was invoked
	mockdriver.AssertExpectations(t)
}
Example #6
0
//test adding of ressource offer, should be added to offer registry and slavesf
func TestResourceOffer_Add(t *testing.T) {
	assert := assert.New(t)

	testScheduler := &KubernetesScheduler{
		offers: offers.CreateRegistry(offers.RegistryConfig{
			Compat: func(o *mesos.Offer) bool {
				return true
			},
			DeclineOffer: func(offerId string) <-chan error {
				return proc.ErrorChan(nil)
			},
			// remember expired offers so that we can tell if a previously scheduler offer relies on one
			LingerTTL:     schedcfg.DefaultOfferLingerTTL,
			TTL:           schedcfg.DefaultOfferTTL,
			ListenerDelay: schedcfg.DefaultListenerDelay,
		}),
		slaves: newSlaveStorage(),
	}

	hostname := "h1"
	offerID1 := util.NewOfferID("test1")
	offer1 := &mesos.Offer{Id: offerID1, Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
	offers1 := []*mesos.Offer{offer1}
	testScheduler.ResourceOffers(nil, offers1)

	assert.Equal(1, getNumberOffers(testScheduler.offers))
	//check slave hostname
	assert.Equal(1, len(testScheduler.slaves.getSlaveIds()))

	//add another offer
	hostname2 := "h2"
	offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)}
	offers2 := []*mesos.Offer{offer2}
	testScheduler.ResourceOffers(nil, offers2)

	//check it is stored in registry
	assert.Equal(2, getNumberOffers(testScheduler.offers))

	//check slave hostnames
	assert.Equal(2, len(testScheduler.slaves.getSlaveIds()))
}
//test when we loose connection to master we invalidate all cached offers
func TestDisconnect(t *testing.T) {
	assert := assert.New(t)

	//
	testScheduler := &KubernetesScheduler{
		offers: offers.CreateRegistry(offers.RegistryConfig{
			Compat: func(o *mesos.Offer) bool {
				return true
			},
			// remember expired offers so that we can tell if a previously scheduler offer relies on one
			LingerTTL:     schedcfg.DefaultOfferLingerTTL,
			TTL:           schedcfg.DefaultOfferTTL,
			ListenerDelay: schedcfg.DefaultListenerDelay,
		}),
		slaveHostNames: slave.NewRegistry(),
	}

	hostname := "h1"
	offer1 := &mesos.Offer{Id: util.NewOfferID("test1"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
	offers1 := []*mesos.Offer{offer1}
	testScheduler.ResourceOffers(nil, offers1)
	offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)}
	offers2 := []*mesos.Offer{offer2}
	testScheduler.ResourceOffers(nil, offers2)

	//add another offer from different slaveID
	hostname2 := "h2"
	offer3 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)}
	offers3 := []*mesos.Offer{offer3}
	testScheduler.ResourceOffers(nil, offers3)

	//disconnect
	testScheduler.Disconnected(nil)

	//all offers should be removed
	assert.Equal(0, getNumberOffers(testScheduler.offers))
	//slave hostnames should still be all present
	assert.Equal(2, len(testScheduler.slaveHostNames.SlaveIDs()))
}
Example #8
0
// New creates a new Framework
func New(config Config) Framework {
	var k *framework
	k = &framework{
		schedulerConfig:   &config.SchedulerConfig,
		RWMutex:           new(sync.RWMutex),
		client:            config.Client,
		failoverTimeout:   config.FailoverTimeout,
		reconcileInterval: config.ReconcileInterval,
		nodeRegistrator:   node.NewRegistrator(config.Client, config.LookupNode),
		executorId:        config.ExecutorId,
		offers: offers.CreateRegistry(offers.RegistryConfig{
			Compat: func(o *mesos.Offer) bool {
				// the node must be registered and have up-to-date labels
				n := config.LookupNode(o.GetHostname())
				if n == nil || !node.IsUpToDate(n, node.SlaveAttributesToLabels(o.GetAttributes())) {
					return false
				}

				eids := len(o.GetExecutorIds())
				switch {
				case eids > 1:
					// at most one executor id expected. More than one means that
					// the given node is seriously in trouble.
					return false

				case eids == 1:
					// the executor id must match, otherwise the running executor
					// is incompatible with the current scheduler configuration.
					if eid := o.GetExecutorIds()[0]; eid.GetValue() != config.ExecutorId.GetValue() {
						return false
					}
				}

				return true
			},
			DeclineOffer: func(id string) <-chan error {
				errOnce := proc.NewErrorOnce(k.terminate)
				errOuter := k.asRegisteredMaster.Do(func() {
					var err error
					defer errOnce.Report(err)
					offerId := mutil.NewOfferID(id)
					filters := &mesos.Filters{}
					_, err = k.driver.DeclineOffer(offerId, filters)
				})
				return errOnce.Send(errOuter).Err()
			},
			// remember expired offers so that we can tell if a previously scheduler offer relies on one
			LingerTTL:     config.SchedulerConfig.OfferLingerTTL.Duration,
			TTL:           config.SchedulerConfig.OfferTTL.Duration,
			ListenerDelay: config.SchedulerConfig.ListenerDelay.Duration,
		}),
		slaveHostNames:    newSlaveRegistry(),
		reconcileCooldown: config.ReconcileCooldown,
		registration:      make(chan struct{}),
		asRegisteredMaster: proc.DoerFunc(func(proc.Action) <-chan error {
			return proc.ErrorChanf("cannot execute action with unregistered scheduler")
		}),
		storeFrameworkId: config.StoreFrameworkId,
		lookupNode:       config.LookupNode,
	}
	return k
}
Example #9
0
func TestInMemoryRegistry_Update(t *testing.T) {
	assert := assert.New(t)

	// create offers registry
	ttl := time.Second / 4
	config := offers.RegistryConfig{
		DeclineOffer: func(offerId string) <-chan error {
			return proc.ErrorChan(nil)
		},
		Compat: func(o *mesos.Offer) bool {
			return true
		},
		TTL:       ttl,
		LingerTTL: 2 * ttl,
	}
	storage := offers.CreateRegistry(config)

	// Add offer
	offerId := mesosutil.NewOfferID("foo")
	mesosOffer := &mesos.Offer{Id: offerId}
	storage.Add([]*mesos.Offer{mesosOffer})
	offer, ok := storage.Get(offerId.GetValue())
	assert.True(ok)

	// create registry
	registry := NewInMemoryRegistry()
	a, _ := fakePodTask("a")
	registry.Register(a.Clone(), nil) // here clone a because we change it below

	// state changes are ignored
	a.State = StateRunning
	err := registry.Update(a)
	assert.NoError(err)
	a_clone, _ := registry.Get(a.ID)
	assert.Equal(StatePending, a_clone.State)

	// offer is updated while pending
	a.Offer = offer
	err = registry.Update(a)
	assert.NoError(err)
	a_clone, _ = registry.Get(a.ID)
	assert.Equal(offer.Id(), a_clone.Offer.Id())

	// spec is updated while pending
	a.Spec = Spec{SlaveID: "slave-1"}
	err = registry.Update(a)
	assert.NoError(err)
	a_clone, _ = registry.Get(a.ID)
	assert.Equal("slave-1", a_clone.Spec.SlaveID)

	// flags are updated while pending
	a.Flags[Launched] = struct{}{}
	err = registry.Update(a)
	assert.NoError(err)
	a_clone, _ = registry.Get(a.ID)

	_, found_launched := a_clone.Flags[Launched]
	assert.True(found_launched)

	// flags are updated while running
	registry.UpdateStatus(fakeStatusUpdate(a.ID, mesos.TaskState_TASK_RUNNING))
	a.Flags[Bound] = struct{}{}
	err = registry.Update(a)
	assert.NoError(err)
	a_clone, _ = registry.Get(a.ID)

	_, found_launched = a_clone.Flags[Launched]
	assert.True(found_launched)
	_, found_bound := a_clone.Flags[Bound]
	assert.True(found_bound)

	// spec is ignored while running
	a.Spec = Spec{SlaveID: "slave-2"}
	err = registry.Update(a)
	assert.NoError(err)
	a_clone, _ = registry.Get(a.ID)
	assert.Equal("slave-1", a_clone.Spec.SlaveID)

	// error when finished
	registry.UpdateStatus(fakeStatusUpdate(a.ID, mesos.TaskState_TASK_FINISHED))
	err = registry.Update(a)
	assert.Error(err)

	// update unknown task
	unknown_task, _ := fakePodTask("unknown-task")
	err = registry.Update(unknown_task)
	assert.Error(err)

	// update nil task
	err = registry.Update(nil)
	assert.Nil(err)
}