func newReconciler(doer proc.Doer, action ReconcilerAction, cooldown, explicitReconciliationAbortTimeout time.Duration, done <-chan struct{}) *Reconciler { return &Reconciler{ Doer: doer, explicit: make(chan struct{}, 1), implicit: make(chan struct{}, 1), cooldown: cooldown, explicitReconciliationAbortTimeout: explicitReconciliationAbortTimeout, done: done, Action: func(driver bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error { // trigged the reconciler action in the doer's execution context, // but it could take a while and the scheduler needs to be able to // process updates, the callbacks for which ALSO execute in the SAME // deferred execution context -- so the action MUST be executed async. errOnce := proc.NewErrorOnce(cancel) return errOnce.Send(doer.Do(func() { // only triggers the action if we're the currently elected, // registered master and runs the action async. go func() { var err <-chan error defer errOnce.Send(err) err = action(driver, cancel) }() })).Err() }, } }
// execute some action in the deferred context of the process, but only if we // match the stage of the process at the time the action is executed. func (stage stageType) Do(p *SchedulerProcess, a proc.Action) <-chan error { errOnce := proc.NewErrorOnce(p.fin) errOuter := p.Do(proc.Action(func() { switch stage { case standbyStage: //await standby signal or death select { case <-p.standby: case <-p.Done(): } case masterStage: //await elected signal or death select { case <-p.elected: case <-p.Done(): } case finStage: errOnce.Reportf("scheduler process is dying, dropping action") return default: } errOnce.Report(stage.When(p, a)) })) return errOnce.Send(errOuter).Err() }
// New creates a new KubernetesScheduler func New(config Config) *KubernetesScheduler { var k *KubernetesScheduler k = &KubernetesScheduler{ schedcfg: &config.Schedcfg, RWMutex: new(sync.RWMutex), executor: config.Executor, executorGroup: uid.Parse(config.Executor.ExecutorId.GetValue()).Group(), PodScheduler: config.Scheduler, client: config.Client, etcdClient: config.EtcdClient, failoverTimeout: config.FailoverTimeout, reconcileInterval: config.ReconcileInterval, nodeRegistrator: node.NewRegistrator(config.Client, config.LookupNode), offers: offers.CreateRegistry(offers.RegistryConfig{ Compat: func(o *mesos.Offer) bool { // the node must be registered and have up-to-date labels n := config.LookupNode(o.GetHostname()) if n == nil || !node.IsUpToDate(n, node.SlaveAttributesToLabels(o.GetAttributes())) { return false } // the executor IDs must not identify a kubelet-executor with a group that doesn't match ours for _, eid := range o.GetExecutorIds() { execuid := uid.Parse(eid.GetValue()) if execuid.Name() == execcfg.DefaultInfoID && execuid.Group() != k.executorGroup { return false } } return true }, DeclineOffer: func(id string) <-chan error { errOnce := proc.NewErrorOnce(k.terminate) errOuter := k.asRegisteredMaster.Do(func() { var err error defer errOnce.Report(err) offerId := mutil.NewOfferID(id) filters := &mesos.Filters{} _, err = k.driver.DeclineOffer(offerId, filters) }) return errOnce.Send(errOuter).Err() }, // remember expired offers so that we can tell if a previously scheduler offer relies on one LingerTTL: config.Schedcfg.OfferLingerTTL.Duration, TTL: config.Schedcfg.OfferTTL.Duration, ListenerDelay: config.Schedcfg.ListenerDelay.Duration, }), slaveHostNames: slave.NewRegistry(), taskRegistry: podtask.NewInMemoryRegistry(), reconcileCooldown: config.ReconcileCooldown, registration: make(chan struct{}), asRegisteredMaster: proc.DoerFunc(func(proc.Action) <-chan error { return proc.ErrorChanf("cannot execute action with unregistered scheduler") }), } return k }
func (self *SchedulerProcess) Elect(newDriver DriverFactory) { errOnce := proc.NewErrorOnce(self.fin) proc.OnError(errOnce.Send(standbyStage.Do(self, proc.Action(func() { if !(&self.stage).transition(standbyStage, masterStage) { log.Errorf("failed to transition from standby to master stage, aborting") self.End() return } log.Infoln("scheduler process entered master stage") drv, err := newDriver() if err != nil { log.Errorf("failed to fetch scheduler driver: %v", err) self.End() return } log.V(1).Infoln("starting driver...") stat, err := drv.Start() if stat == mesos.Status_DRIVER_RUNNING && err == nil { log.Infoln("driver started successfully and is running") close(self.elected) go func() { defer self.End() _, err := drv.Join() if err != nil { log.Errorf("driver failed with error: %v", err) } errOnce.Report(err) }() return } defer self.End() if err != nil { log.Errorf("failed to start scheduler driver: %v", err) } else { log.Errorf("expected RUNNING status, not %v", stat) } }))).Err(), func(err error) { defer self.End() log.Errorf("failed to handle election event, aborting: %v", err) }, self.fin) }
// execute task reconciliation, returns when r.done is closed. intended to run as a goroutine. // if reconciliation is requested while another is in progress, the in-progress operation will be // cancelled before the new reconciliation operation begins. func (r *Reconciler) Run(driver bindings.SchedulerDriver) { var cancel, finished chan struct{} requestLoop: for { select { case <-r.done: return default: // proceed } select { case <-r.implicit: metrics.ReconciliationRequested.WithLabelValues("implicit").Inc() select { case <-r.done: return case <-r.explicit: break // give preference to a pending request for explicit default: // continue // don't run implicit reconciliation while explicit is ongoing if finished != nil { select { case <-finished: // continue w/ implicit default: log.Infoln("skipping implicit reconcile because explicit reconcile is ongoing") continue requestLoop } } errOnce := proc.NewErrorOnce(r.done) errCh := r.Do(func() { var err error defer errOnce.Report(err) log.Infoln("implicit reconcile tasks") metrics.ReconciliationExecuted.WithLabelValues("implicit").Inc() if _, err = driver.ReconcileTasks([]*mesos.TaskStatus{}); err != nil { log.V(1).Infof("failed to request implicit reconciliation from mesos: %v", err) } }) proc.OnError(errOnce.Send(errCh).Err(), func(err error) { log.Errorf("failed to run implicit reconciliation: %v", err) }, r.done) goto slowdown } case <-r.done: return case <-r.explicit: // continue metrics.ReconciliationRequested.WithLabelValues("explicit").Inc() } if cancel != nil { close(cancel) cancel = nil // play nice and wait for the prior operation to finish, complain // if it doesn't select { case <-r.done: return case <-finished: // noop, expected case <-time.After(r.explicitReconciliationAbortTimeout): // very unexpected log.Error("reconciler action failed to stop upon cancellation") } } // copy 'finished' to 'fin' here in case we end up with simultaneous go-routines, // if cancellation takes too long or fails - we don't want to close the same chan // more than once cancel = make(chan struct{}) finished = make(chan struct{}) go func(fin chan struct{}) { startedAt := time.Now() defer func() { metrics.ReconciliationLatency.Observe(metrics.InMicroseconds(time.Since(startedAt))) }() metrics.ReconciliationExecuted.WithLabelValues("explicit").Inc() defer close(fin) err := <-r.Action(driver, cancel) if err == reconciliationCancelledErr { metrics.ReconciliationCancelled.WithLabelValues("explicit").Inc() log.Infoln(err.Error()) } else if err != nil { log.Errorf("reconciler action failed: %v", err) } }(finished) slowdown: // don't allow reconciliation to run very frequently, either explicit or implicit select { case <-r.done: return case <-time.After(r.cooldown): // noop } } // for }
// New creates a new Framework func New(config Config) Framework { var k *framework k = &framework{ schedulerConfig: &config.SchedulerConfig, RWMutex: new(sync.RWMutex), client: config.Client, failoverTimeout: config.FailoverTimeout, reconcileInterval: config.ReconcileInterval, nodeRegistrator: node.NewRegistrator(config.Client, config.LookupNode), executorId: config.ExecutorId, offers: offers.CreateRegistry(offers.RegistryConfig{ Compat: func(o *mesos.Offer) bool { // the node must be registered and have up-to-date labels n := config.LookupNode(o.GetHostname()) if n == nil || !node.IsUpToDate(n, node.SlaveAttributesToLabels(o.GetAttributes())) { return false } eids := len(o.GetExecutorIds()) switch { case eids > 1: // at most one executor id expected. More than one means that // the given node is seriously in trouble. return false case eids == 1: // the executor id must match, otherwise the running executor // is incompatible with the current scheduler configuration. if eid := o.GetExecutorIds()[0]; eid.GetValue() != config.ExecutorId.GetValue() { return false } } return true }, DeclineOffer: func(id string) <-chan error { errOnce := proc.NewErrorOnce(k.terminate) errOuter := k.asRegisteredMaster.Do(func() { var err error defer errOnce.Report(err) offerId := mutil.NewOfferID(id) filters := &mesos.Filters{} _, err = k.driver.DeclineOffer(offerId, filters) }) return errOnce.Send(errOuter).Err() }, // remember expired offers so that we can tell if a previously scheduler offer relies on one LingerTTL: config.SchedulerConfig.OfferLingerTTL.Duration, TTL: config.SchedulerConfig.OfferTTL.Duration, ListenerDelay: config.SchedulerConfig.ListenerDelay.Duration, }), slaveHostNames: newSlaveRegistry(), reconcileCooldown: config.ReconcileCooldown, registration: make(chan struct{}), asRegisteredMaster: proc.DoerFunc(func(proc.Action) <-chan error { return proc.ErrorChanf("cannot execute action with unregistered scheduler") }), storeFrameworkId: config.StoreFrameworkId, lookupNode: config.LookupNode, } return k }