// reconciler action factory, performs explicit task reconciliation for non-terminal // tasks identified by annotations in the Kubernetes pod registry. func (k *KubernetesScheduler) makePodRegistryReconciler() ReconcilerAction { return ReconcilerAction(func(drv bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error { ctx := api.NewDefaultContext() podList, err := k.client.Pods(api.NamespaceValue(ctx)).List(labels.Everything(), fields.Everything()) if err != nil { return proc.ErrorChanf("failed to reconcile pod registry: %v", err) } taskToSlave := make(map[string]string) for _, pod := range podList.Items { if len(pod.Annotations) == 0 { continue } taskId, found := pod.Annotations[meta.TaskIdKey] if !found { continue } slaveId, found := pod.Annotations[meta.SlaveIdKey] if !found { continue } taskToSlave[taskId] = slaveId } return proc.ErrorChan(k.explicitlyReconcileTasks(drv, taskToSlave, cancel)) }) }
// returns a Process instance that will only execute a proc.Action if the scheduler is the elected master func (self *SchedulerProcess) Master() proc.Process { return &masterProcess{ SchedulerProcess: self, doer: proc.DoWith(self, proc.DoerFunc(func(a proc.Action) <-chan error { return proc.ErrorChan(masterStage.When(self, a)) })), } }
// reconciler action factory, performs explicit task reconciliation for non-terminal // tasks listed in the scheduler's internal taskRegistry. func (k *KubernetesScheduler) makeTaskRegistryReconciler() ReconcilerAction { return ReconcilerAction(func(drv bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error { taskToSlave := make(map[string]string) for _, t := range k.taskRegistry.List(explicitTaskFilter) { if t.Spec.SlaveID != "" { taskToSlave[t.ID] = t.Spec.SlaveID } } return proc.ErrorChan(k.explicitlyReconcileTasks(drv, taskToSlave, cancel)) }) }
//test adding of ressource offer, should be added to offer registry and slavesf func TestResourceOffer_Add_Rescind(t *testing.T) { assert := assert.New(t) testFramework := &framework{ offers: offers.CreateRegistry(offers.RegistryConfig{ Compat: func(o *mesos.Offer) bool { return true }, DeclineOffer: func(offerId string) <-chan error { return proc.ErrorChan(nil) }, // remember expired offers so that we can tell if a previously scheduler offer relies on one LingerTTL: schedcfg.DefaultOfferLingerTTL, TTL: schedcfg.DefaultOfferTTL, ListenerDelay: schedcfg.DefaultListenerDelay, }), slaveHostNames: newSlaveRegistry(), sched: mockScheduler(), } hostname := "h1" offerID1 := util.NewOfferID("test1") offer1 := &mesos.Offer{Id: offerID1, Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)} offers1 := []*mesos.Offer{offer1} testFramework.ResourceOffers(nil, offers1) assert.Equal(1, getNumberOffers(testFramework.offers)) //check slave hostname assert.Equal(1, len(testFramework.slaveHostNames.SlaveIDs())) //add another offer hostname2 := "h2" offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)} offers2 := []*mesos.Offer{offer2} testFramework.ResourceOffers(nil, offers2) assert.Equal(2, getNumberOffers(testFramework.offers)) //check slave hostnames assert.Equal(2, len(testFramework.slaveHostNames.SlaveIDs())) //next whether offers can be rescinded testFramework.OfferRescinded(nil, offerID1) assert.Equal(1, getNumberOffers(testFramework.offers)) //next whether offers can be rescinded testFramework.OfferRescinded(nil, util.NewOfferID("test2")) //walk offers again and check it is removed from registry assert.Equal(0, getNumberOffers(testFramework.offers)) //remove non existing ID testFramework.OfferRescinded(nil, util.NewOfferID("notExist")) }
func TestListen(t *testing.T) { ttl := time.Second / 4 config := RegistryConfig{ DeclineOffer: func(offerId string) <-chan error { return proc.ErrorChan(nil) }, Compat: func(o *mesos.Offer) bool { return true }, TTL: ttl, ListenerDelay: ttl / 2, } storage := CreateRegistry(config) done := make(chan struct{}) storage.Init(done) // Create two listeners with a hostname filter hostname1 := "hostname1" hostname2 := "hostname2" listener1 := storage.Listen("listener1", func(offer *mesos.Offer) bool { return offer.GetHostname() == hostname1 }) listener2 := storage.Listen("listener2", func(offer *mesos.Offer) bool { return offer.GetHostname() == hostname2 }) // Add hostname1 offer id := util.NewOfferID("foo") o := &mesos.Offer{Id: id, Hostname: &hostname1} storage.Add([]*mesos.Offer{o}) // listener1 is notified by closing channel select { case _, more := <-listener1: if more { t.Error("listener1 is not closed") } } // listener2 is not notified within ttl select { case <-listener2: t.Error("listener2 is notified") case <-time.After(ttl): } close(done) } // TestListen
//test adding of ressource offer, should be added to offer registry and slaves func TestResourceOffer_Add(t *testing.T) { assert := assert.New(t) registrator := &mockRegistrator{cache.NewStore(cache.MetaNamespaceKeyFunc)} testFramework := &framework{ offers: offers.CreateRegistry(offers.RegistryConfig{ Compat: func(o *mesos.Offer) bool { return true }, DeclineOffer: func(offerId string) <-chan error { return proc.ErrorChan(nil) }, // remember expired offers so that we can tell if a previously scheduler offer relies on one LingerTTL: schedcfg.DefaultOfferLingerTTL, TTL: schedcfg.DefaultOfferTTL, ListenerDelay: schedcfg.DefaultListenerDelay, }), slaveHostNames: newSlaveRegistry(), nodeRegistrator: registrator, sched: mockScheduler(), } hostname := "h1" offerID1 := util.NewOfferID("test1") offer1 := &mesos.Offer{Id: offerID1, Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)} offers1 := []*mesos.Offer{offer1} testFramework.ResourceOffers(nil, offers1) assert.Equal(1, len(registrator.store.List())) assert.Equal(1, getNumberOffers(testFramework.offers)) //check slave hostname assert.Equal(1, len(testFramework.slaveHostNames.SlaveIDs())) //add another offer hostname2 := "h2" offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)} offers2 := []*mesos.Offer{offer2} testFramework.ResourceOffers(nil, offers2) //check it is stored in registry assert.Equal(2, getNumberOffers(testFramework.offers)) //check slave hostnames assert.Equal(2, len(testFramework.slaveHostNames.SlaveIDs())) }
//test adding of ressource offer, should be added to offer registry and slavesf func TestResourceOffer_Add(t *testing.T) { assert := assert.New(t) testScheduler := &KubernetesScheduler{ offers: offers.CreateRegistry(offers.RegistryConfig{ Compat: func(o *mesos.Offer) bool { return true }, DeclineOffer: func(offerId string) <-chan error { return proc.ErrorChan(nil) }, // remember expired offers so that we can tell if a previously scheduler offer relies on one LingerTTL: schedcfg.DefaultOfferLingerTTL, TTL: schedcfg.DefaultOfferTTL, ListenerDelay: schedcfg.DefaultListenerDelay, }), slaves: newSlaveStorage(), } hostname := "h1" offerID1 := util.NewOfferID("test1") offer1 := &mesos.Offer{Id: offerID1, Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)} offers1 := []*mesos.Offer{offer1} testScheduler.ResourceOffers(nil, offers1) assert.Equal(1, getNumberOffers(testScheduler.offers)) //check slave hostname assert.Equal(1, len(testScheduler.slaves.getSlaveIds())) //add another offer hostname2 := "h2" offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)} offers2 := []*mesos.Offer{offer2} testScheduler.ResourceOffers(nil, offers2) //check it is stored in registry assert.Equal(2, getNumberOffers(testScheduler.offers)) //check slave hostnames assert.Equal(2, len(testScheduler.slaves.getSlaveIds())) }
// reconciler action factory, performs explicit task reconciliation for non-terminal // tasks identified by annotations in the Kubernetes pod registry. func (k *framework) makePodRegistryReconciler() taskreconciler.Action { return taskreconciler.Action(func(drv bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error { podList, err := k.client.Core().Pods(api.NamespaceAll).List(api.ListOptions{}) if err != nil { return proc.ErrorChanf("failed to reconcile pod registry: %v", err) } taskToSlave := make(map[string]string) for _, pod := range podList.Items { if len(pod.Annotations) == 0 { continue } taskId, found := pod.Annotations[meta.TaskIdKey] if !found { continue } slaveId, found := pod.Annotations[meta.SlaveIdKey] if !found { continue } taskToSlave[taskId] = slaveId } return proc.ErrorChan(k.explicitlyReconcileTasks(drv, taskToSlave, cancel)) }) }
func TestInMemoryRegistry_Update(t *testing.T) { assert := assert.New(t) // create offers registry ttl := time.Second / 4 config := offers.RegistryConfig{ DeclineOffer: func(offerId string) <-chan error { return proc.ErrorChan(nil) }, Compat: func(o *mesos.Offer) bool { return true }, TTL: ttl, LingerTTL: 2 * ttl, } storage := offers.CreateRegistry(config) // Add offer offerId := mesosutil.NewOfferID("foo") mesosOffer := &mesos.Offer{Id: offerId} storage.Add([]*mesos.Offer{mesosOffer}) offer, ok := storage.Get(offerId.GetValue()) assert.True(ok) // create registry registry := NewInMemoryRegistry() a, _ := fakePodTask("a") registry.Register(a.Clone(), nil) // here clone a because we change it below // state changes are ignored a.State = StateRunning err := registry.Update(a) assert.NoError(err) a_clone, _ := registry.Get(a.ID) assert.Equal(StatePending, a_clone.State) // offer is updated while pending a.Offer = offer err = registry.Update(a) assert.NoError(err) a_clone, _ = registry.Get(a.ID) assert.Equal(offer.Id(), a_clone.Offer.Id()) // spec is updated while pending a.Spec = Spec{SlaveID: "slave-1"} err = registry.Update(a) assert.NoError(err) a_clone, _ = registry.Get(a.ID) assert.Equal("slave-1", a_clone.Spec.SlaveID) // flags are updated while pending a.Flags[Launched] = struct{}{} err = registry.Update(a) assert.NoError(err) a_clone, _ = registry.Get(a.ID) _, found_launched := a_clone.Flags[Launched] assert.True(found_launched) // flags are updated while running registry.UpdateStatus(fakeStatusUpdate(a.ID, mesos.TaskState_TASK_RUNNING)) a.Flags[Bound] = struct{}{} err = registry.Update(a) assert.NoError(err) a_clone, _ = registry.Get(a.ID) _, found_launched = a_clone.Flags[Launched] assert.True(found_launched) _, found_bound := a_clone.Flags[Bound] assert.True(found_bound) // spec is ignored while running a.Spec = Spec{SlaveID: "slave-2"} err = registry.Update(a) assert.NoError(err) a_clone, _ = registry.Get(a.ID) assert.Equal("slave-1", a_clone.Spec.SlaveID) // error when finished registry.UpdateStatus(fakeStatusUpdate(a.ID, mesos.TaskState_TASK_FINISHED)) err = registry.Update(a) assert.Error(err) // update unknown task unknown_task, _ := fakePodTask("unknown-task") err = registry.Update(unknown_task) assert.Error(err) // update nil task err = registry.Update(nil) assert.Nil(err) }
func TestOfferStorage(t *testing.T) { ttl := time.Second / 4 var declinedNum int32 getDeclinedNum := func() int32 { return atomic.LoadInt32(&declinedNum) } config := RegistryConfig{ DeclineOffer: func(offerId string) <-chan error { atomic.AddInt32(&declinedNum, 1) return proc.ErrorChan(nil) }, Compat: func(o *mesos.Offer) bool { return o.Hostname == nil || *o.Hostname != "incompatiblehost" }, TTL: ttl, LingerTTL: 2 * ttl, } storage := CreateRegistry(config) done := make(chan struct{}) storage.Init(done) // Add offer id := util.NewOfferID("foo") o := &mesos.Offer{Id: id} storage.Add([]*mesos.Offer{o}) // Added offer should be in the storage if obj, ok := storage.Get(id.GetValue()); obj == nil || !ok { t.Error("offer not added") } if obj, _ := storage.Get(id.GetValue()); obj.Details() != o { t.Error("added offer differs from returned offer") } // Not-added offer is not in storage if obj, ok := storage.Get("bar"); obj != nil || ok { t.Error("offer bar should not exist in storage") } // Deleted offer lingers in storage, is acquired and declined offer, _ := storage.Get(id.GetValue()) declinedNumBefore := getDeclinedNum() storage.Delete(id.GetValue(), "deleted for test") if obj, _ := storage.Get(id.GetValue()); obj == nil { t.Error("deleted offer is not lingering") } if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() { t.Error("deleted offer is no expired") } if ok := offer.Acquire(); ok { t.Error("deleted offer can be acquired") } if getDeclinedNum() <= declinedNumBefore { t.Error("deleted offer was not declined") } // Acquired offer is only declined after 2*ttl id = util.NewOfferID("foo2") o = &mesos.Offer{Id: id} storage.Add([]*mesos.Offer{o}) offer, _ = storage.Get(id.GetValue()) declinedNumBefore = getDeclinedNum() offer.Acquire() storage.Delete(id.GetValue(), "deleted for test") if getDeclinedNum() > declinedNumBefore { t.Error("acquired offer is declined") } offer.Release() time.Sleep(3 * ttl) if getDeclinedNum() <= declinedNumBefore { t.Error("released offer is not declined after 2*ttl") } // Added offer should be expired after ttl, but lingering id = util.NewOfferID("foo3") o = &mesos.Offer{Id: id} storage.Add([]*mesos.Offer{o}) time.Sleep(2 * ttl) obj, ok := storage.Get(id.GetValue()) if obj == nil || !ok { t.Error("offer not lingering after ttl") } if !obj.HasExpired() { t.Error("offer is not expired after ttl") } // Should be deleted when waiting longer than LingerTTL time.Sleep(2 * ttl) if obj, ok := storage.Get(id.GetValue()); obj != nil || ok { t.Error("offer not deleted after LingerTTL") } // Incompatible offer is declined id = util.NewOfferID("foo4") incompatibleHostname := "incompatiblehost" o = &mesos.Offer{Id: id, Hostname: &incompatibleHostname} declinedNumBefore = getDeclinedNum() storage.Add([]*mesos.Offer{o}) if obj, ok := storage.Get(id.GetValue()); obj != nil || ok { t.Error("incompatible offer not rejected") } if getDeclinedNum() <= declinedNumBefore { t.Error("incompatible offer is not declined") } // Invalidated offer are not declined, but expired id = util.NewOfferID("foo5") o = &mesos.Offer{Id: id} storage.Add([]*mesos.Offer{o}) offer, _ = storage.Get(id.GetValue()) declinedNumBefore = getDeclinedNum() storage.Invalidate(id.GetValue()) if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() { t.Error("invalidated offer is not expired") } if getDeclinedNum() > declinedNumBefore { t.Error("invalidated offer is declined") } if ok := offer.Acquire(); ok { t.Error("invalidated offer can be acquired") } // Invalidate "" will invalidate all offers id = util.NewOfferID("foo6") o = &mesos.Offer{Id: id} storage.Add([]*mesos.Offer{o}) id2 := util.NewOfferID("foo7") o2 := &mesos.Offer{Id: id2} storage.Add([]*mesos.Offer{o2}) storage.Invalidate("") if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() { t.Error("invalidated offer is not expired") } if obj2, _ := storage.Get(id2.GetValue()); !obj2.HasExpired() { t.Error("invalidated offer is not expired") } // InvalidateForSlave invalides all offers for that slave, but only those id = util.NewOfferID("foo8") slaveId := util.NewSlaveID("test-slave") o = &mesos.Offer{Id: id, SlaveId: slaveId} storage.Add([]*mesos.Offer{o}) id2 = util.NewOfferID("foo9") o2 = &mesos.Offer{Id: id2} storage.Add([]*mesos.Offer{o2}) storage.InvalidateForSlave(slaveId.GetValue()) if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() { t.Error("invalidated offer for test-slave is not expired") } if obj2, _ := storage.Get(id2.GetValue()); obj2.HasExpired() { t.Error("invalidated offer another slave is expired") } close(done) } // TestOfferStorage
func TestWalk(t *testing.T) { t.Parallel() config := RegistryConfig{ DeclineOffer: func(offerId string) <-chan error { return proc.ErrorChan(nil) }, TTL: 0 * time.Second, LingerTTL: 0 * time.Second, ListenerDelay: 0 * time.Second, } storage := CreateRegistry(config) acceptedOfferId := "" walked := 0 walker1 := func(p Perishable) (bool, error) { walked++ if p.Acquire() { acceptedOfferId = p.Details().Id.GetValue() return true, nil } return false, nil } // sanity check err := storage.Walk(walker1) if err != nil { t.Fatalf("received impossible error %v", err) } if walked != 0 { t.Fatal("walked empty storage") } if acceptedOfferId != "" { t.Fatal("somehow found an offer when registry was empty") } impl, ok := storage.(*offerStorage) if !ok { t.Fatal("unexpected offer storage impl") } // single offer ttl := 2 * time.Second now := time.Now() o := &liveOffer{&mesos.Offer{Id: util.NewOfferID("foo")}, now.Add(ttl), 0} impl.offers.Add(o) err = storage.Walk(walker1) if err != nil { t.Fatalf("received impossible error %v", err) } if walked != 1 { t.Fatalf("walk count %d", walked) } if acceptedOfferId != "foo" { t.Fatalf("found offer %v", acceptedOfferId) } acceptedOfferId = "" err = storage.Walk(walker1) if err != nil { t.Fatalf("received impossible error %v", err) } if walked != 2 { t.Fatalf("walk count %d", walked) } if acceptedOfferId != "" { t.Fatalf("found offer %v", acceptedOfferId) } walker2 := func(p Perishable) (bool, error) { walked++ return true, nil } err = storage.Walk(walker2) if err != nil { t.Fatalf("received impossible error %v", err) } if walked != 3 { t.Fatalf("walk count %d", walked) } if acceptedOfferId != "" { t.Fatalf("found offer %v", acceptedOfferId) } walker3 := func(p Perishable) (bool, error) { walked++ return true, errors.New("baz") } err = storage.Walk(walker3) if err == nil { t.Fatal("expected error") } if walked != 4 { t.Fatalf("walk count %d", walked) } }