// New creates a new kubernetes executor. func New(config Config) *Executor { launchGracePeriod := config.LaunchGracePeriod if launchGracePeriod == 0 { // this is the equivalent of saying "the timer never expires" and simplies nil // timer checks elsewhere in the code. it's a little hacky but less code to // maintain that alternative approaches. launchGracePeriod = time.Duration(math.MaxInt64) } k := &Executor{ state: disconnectedState, terminate: make(chan struct{}), outgoing: make(chan func() (mesos.Status, error), 1024), dockerClient: config.Docker, suicideTimeout: config.SuicideTimeout, kubeletFinished: config.KubeletFinished, suicideWatch: &suicideTimer{}, shutdownAlert: config.ShutdownAlert, exitFunc: config.ExitFunc, staticPodsConfigPath: config.StaticPodsConfigPath, launchGracePeriod: launchGracePeriod, nodeInfos: config.NodeInfos, initCompleted: make(chan struct{}), registry: config.Registry, kubeAPI: &clientAPIWrapper{config.APIClient}, nodeAPI: &clientAPIWrapper{config.APIClient}, } runtime.On(k.initCompleted, k.runSendLoop) k.watcher = newWatcher(k.registry.watch()) runtime.On(k.initCompleted, k.watcher.run) return k }
// New creates a new kubernetes executor. func New(config Config) *Executor { launchGracePeriod := config.LaunchGracePeriod if launchGracePeriod == 0 { // this is the equivalent of saying "the timer never expires" and simplies nil // timer checks elsewhere in the code. it's a little hacky but less code to // maintain that alternative approaches. launchGracePeriod = time.Duration(math.MaxInt64) } k := &Executor{ updateChan: config.Updates, state: disconnectedState, tasks: make(map[string]*kuberTask), pods: make(map[string]*api.Pod), client: config.APIClient, terminate: make(chan struct{}), outgoing: make(chan func() (mesos.Status, error), 1024), dockerClient: config.Docker, suicideTimeout: config.SuicideTimeout, kubeletFinished: config.KubeletFinished, suicideWatch: &suicideTimer{}, shutdownAlert: config.ShutdownAlert, exitFunc: config.ExitFunc, podStatusFunc: config.PodStatusFunc, staticPodsConfigPath: config.StaticPodsConfigPath, launchGracePeriod: launchGracePeriod, nodeInfos: config.NodeInfos, initCompleted: make(chan struct{}), } runtime.On(k.initCompleted, k.runSendLoop) po := newPodObserver(config.PodLW, k.updateTask, k.terminate) runtime.On(k.initCompleted, po.run) return k }
func New(c *config.Config, fw framework.Framework, ps podschedulers.PodScheduler, client *client.Client, recorder record.EventRecorder, terminate <-chan struct{}, mux *http.ServeMux, lw *cache.ListWatch) scheduler.Scheduler { core := &sched{ framework: fw, taskRegistry: podtask.NewInMemoryRegistry(), } // Watch and queue pods that need scheduling. podUpdatesBypass := make(chan queue.Entry, c.UpdatesBacklog) podUpdates := &podStoreAdapter{queue.NewHistorical(podUpdatesBypass)} reflector := cache.NewReflector(lw, &api.Pod{}, podUpdates, 0) q := queuer.New(queue.NewDelayFIFO(), podUpdates) algorithm := algorithm.New(core, podUpdates, ps) podDeleter := deleter.New(core, q) core.podReconciler = podreconciler.New(core, client, q, podDeleter) bo := backoff.New(c.InitialPodBackoff.Duration, c.MaxPodBackoff.Duration) newBC := func(podKey string) queue.BreakChan { return queue.BreakChan(core.Offers().Listen(podKey, func(offer *mesos.Offer) bool { core.Lock() defer core.Unlock() switch task, state := core.Tasks().ForPod(podKey); state { case podtask.StatePending: // Assess fitness of pod with the current offer. The scheduler normally // "backs off" when it can't find an offer that matches up with a pod. // The backoff period for a pod can terminate sooner if an offer becomes // available that matches up. return !task.Has(podtask.Launched) && ps.FitPredicate()(task, offer, nil) default: // no point in continuing to check for matching offers return true } })) } errorHandler := errorhandler.New(core, bo, q, newBC) binder := binder.New(core) startLatch := make(chan struct{}) runtime.On(startLatch, func() { reflector.Run() // TODO(jdef) should listen for termination podDeleter.Run(podUpdatesBypass, terminate) q.Run(terminate) q.InstallDebugHandlers(mux) podtask.InstallDebugHandlers(core.Tasks(), mux) }) core.controller = controller.New(client, algorithm, recorder, q.Yield, errorHandler.Error, binder, startLatch) return core }
func (k *KubernetesScheduler) NewPluginConfig(terminate <-chan struct{}, mux *http.ServeMux, podsWatcher *cache.ListWatch) *PluginConfig { // Watch and queue pods that need scheduling. updates := make(chan queue.Entry, k.schedcfg.UpdatesBacklog) podUpdates := &podStoreAdapter{queue.NewHistorical(updates)} reflector := cache.NewReflector(podsWatcher, &api.Pod{}, podUpdates, 0) // lock that guards critial sections that involve transferring pods from // the store (cache) to the scheduling queue; its purpose is to maintain // an ordering (vs interleaving) of operations that's easier to reason about. kapi := &k8smScheduler{internal: k} q := newQueuer(podUpdates) podDeleter := &deleter{ api: kapi, qr: q, } eh := &errorHandler{ api: kapi, backoff: backoff.New(k.schedcfg.InitialPodBackoff.Duration, k.schedcfg.MaxPodBackoff.Duration), qr: q, } startLatch := make(chan struct{}) eventBroadcaster := record.NewBroadcaster() runtime.On(startLatch, func() { eventBroadcaster.StartRecordingToSink(k.client.Events("")) reflector.Run() // TODO(jdef) should listen for termination podDeleter.Run(updates, terminate) q.Run(terminate) q.installDebugHandlers(mux) podtask.InstallDebugHandlers(k.taskRegistry, mux) }) return &PluginConfig{ Config: &plugin.Config{ MinionLister: nil, Algorithm: &kubeScheduler{ api: kapi, podUpdates: podUpdates, defaultContainerCPULimit: k.defaultContainerCPULimit, defaultContainerMemLimit: k.defaultContainerMemLimit, }, Binder: &binder{api: kapi}, NextPod: q.yield, Error: eh.handleSchedulingError, Recorder: eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"}), }, api: kapi, client: k.client, qr: q, deleter: podDeleter, starting: startLatch, } }
func TestProc_manyEndings(t *testing.T) { p := New() const COUNT = 20 var wg sync.WaitGroup wg.Add(COUNT) for i := 0; i < COUNT; i++ { runtime.On(p.End(), wg.Done) } wg.Wait() <-p.Done() }
func TestProc_manyEndings(t *testing.T) { p := New() const COUNT = 20 var wg sync.WaitGroup wg.Add(COUNT) for i := 0; i < COUNT; i++ { runtime.On(p.End(), wg.Done) } fatalAfter(t, runtime.After(wg.Wait), 5*time.Second, "timed out waiting for loose End()s") fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death") }
func New(sched bindings.Scheduler) *SchedulerProcess { p := &SchedulerProcess{ Process: proc.New(), Scheduler: sched, stage: initStage, elected: make(chan struct{}), failover: make(chan struct{}), standby: make(chan struct{}), fin: make(chan struct{}), } runtime.On(p.Running(), p.begin) return p }
// watch the scheduler process for failover signals and properly handle such. may never return. func (s *SchedulerServer) awaitFailover(schedulerProcess schedulerProcessInterface, handler func() error) error { // we only want to return the first error (if any), everyone else can block forever errCh := make(chan error, 1) doFailover := func() error { // we really don't expect handler to return, if it does something went seriously wrong err := handler() if err != nil { defer schedulerProcess.End() err = fmt.Errorf("failover failed, scheduler will terminate: %v", err) } return err } // guard for failover signal processing, first signal processor wins failoverLatch := &runtime.Latch{} runtime.On(schedulerProcess.Terminal(), func() { if !failoverLatch.Acquire() { log.V(1).Infof("scheduler process ending, already failing over") select {} } var err error defer func() { errCh <- err }() select { case <-schedulerProcess.Failover(): err = doFailover() default: if s.ha { err = fmt.Errorf("ha scheduler exiting instead of failing over") } else { log.Infof("exiting scheduler") } } }) runtime.OnOSSignal(makeFailoverSigChan(), func(_ os.Signal) { if !failoverLatch.Acquire() { log.V(1).Infof("scheduler process signalled, already failing over") select {} } errCh <- doFailover() }) return <-errCh }
func (s *SchedulerServer) bootstrap(hks hyperkube.Interface, sc *schedcfg.Config) (*ha.SchedulerProcess, ha.DriverFactory, etcd.Client, *mesos.ExecutorID) { s.frameworkName = strings.TrimSpace(s.frameworkName) if s.frameworkName == "" { log.Fatalf("framework-name must be a non-empty string") } s.frameworkWebURI = strings.TrimSpace(s.frameworkWebURI) metrics.Register() runtime.Register() s.mux.Handle("/metrics", prometheus.Handler()) healthz.InstallHandler(s.mux) if len(s.etcdServerList) == 0 { log.Fatalf("specify --etcd-servers must be specified") } if len(s.apiServerList) < 1 { log.Fatal("No api servers specified.") } clientConfig, err := s.createAPIServerClientConfig() if err != nil { log.Fatalf("Unable to make apiserver client config: %v", err) } s.client, err = clientset.NewForConfig(clientConfig) if err != nil { log.Fatalf("Unable to make apiserver clientset: %v", err) } if s.reconcileCooldown < defaultReconcileCooldown { s.reconcileCooldown = defaultReconcileCooldown log.Warningf("user-specified reconcile cooldown too small, defaulting to %v", s.reconcileCooldown) } eiPrototype, err := s.prepareExecutorInfo(hks) if err != nil { log.Fatalf("misconfigured executor: %v", err) } // TODO(jdef): remove the dependency on etcd as soon as // (1) the generic config store is available for the FrameworkId storage // (2) the generic master election is provided by the apiserver // Compare docs/proposals/high-availability.md etcdClient, err := newEtcd(s.etcdServerList) if err != nil { log.Fatalf("misconfigured etcd: %v", err) } keysAPI := etcd.NewKeysAPI(etcdClient) // mirror all nodes into the nodeStore var eiRegistry executorinfo.Registry nodesClientConfig := *clientConfig nodesClient, err := clientset.NewForConfig(&nodesClientConfig) if err != nil { log.Fatalf("Cannot create client to watch nodes: %v", err) } nodeLW := cache.NewListWatchFromClient(nodesClient.CoreClient, "nodes", api.NamespaceAll, fields.Everything()) nodeStore, nodeCtl := controllerfw.NewInformer(nodeLW, &api.Node{}, s.nodeRelistPeriod, &controllerfw.ResourceEventHandlerFuncs{ DeleteFunc: func(obj interface{}) { if eiRegistry != nil { // TODO(jdef) use controllerfw.DeletionHandlingMetaNamespaceKeyFunc at some point? nodeName := "" if tombstone, ok := obj.(cache.DeletedFinalStateUnknown); ok { nodeName = tombstone.Key } else if node, ok := obj.(*api.Node); ok { nodeName = node.Name } if nodeName != "" { log.V(2).Infof("deleting node %q from registry", nodeName) eiRegistry.Invalidate(nodeName) } } }, }) lookupNode := func(hostName string) *api.Node { n, _, _ := nodeStore.GetByKey(hostName) // ignore error and return nil then if n == nil { return nil } return n.(*api.Node) } execInfoCache, err := executorinfo.NewCache(defaultExecutorInfoCacheSize) if err != nil { log.Fatalf("cannot create executorinfo cache: %v", err) } eiRegistry, err = executorinfo.NewRegistry(lookupNode, eiPrototype, execInfoCache) if err != nil { log.Fatalf("cannot create executorinfo registry: %v", err) } pr := podtask.NewDefaultProcurement(eiPrototype, eiRegistry) fcfs := podschedulers.NewFCFSPodScheduler(pr, lookupNode) frameworkIDStorage, err := s.frameworkIDStorage(keysAPI) if err != nil { log.Fatalf("cannot init framework ID storage: %v", err) } framework := framework.New(framework.Config{ SchedulerConfig: *sc, Client: s.client, FailoverTimeout: s.failoverTimeout, ReconcileInterval: s.reconcileInterval, ReconcileCooldown: s.reconcileCooldown, LookupNode: lookupNode, StoreFrameworkId: frameworkIDStorage.Set, ExecutorId: eiPrototype.GetExecutorId(), }) masterUri := s.mesosMaster info, cred, err := s.buildFrameworkInfo() if err != nil { log.Fatalf("Misconfigured mesos framework: %v", err) } schedulerProcess := ha.New(framework) // try publishing on the same IP as the slave var publishedAddress net.IP if libprocessIP := os.Getenv("LIBPROCESS_IP"); libprocessIP != "" { publishedAddress = net.ParseIP(libprocessIP) } if publishedAddress != nil { log.V(1).Infof("driver will publish address %v", publishedAddress) } dconfig := &bindings.DriverConfig{ Scheduler: schedulerProcess, Framework: info, Master: masterUri, Credential: cred, BindingAddress: s.address, BindingPort: uint16(s.driverPort), PublishedAddress: publishedAddress, HostnameOverride: s.hostnameOverride, WithAuthContext: func(ctx context.Context) context.Context { ctx = auth.WithLoginProvider(ctx, s.mesosAuthProvider) ctx = sasl.WithBindingAddress(ctx, s.address) return ctx }, } // create event recorder sending events to the "" namespace of the apiserver eventsClientConfig := *clientConfig eventsClient, err := clientset.NewForConfig(&eventsClientConfig) if err != nil { log.Fatalf("Invalid API configuration: %v", err) } broadcaster := record.NewBroadcaster() recorder := broadcaster.NewRecorder(api.EventSource{Component: api.DefaultSchedulerName}) broadcaster.StartLogging(log.Infof) broadcaster.StartRecordingToSink(&unversionedcore.EventSinkImpl{Interface: eventsClient.Events("")}) lw := cache.NewListWatchFromClient(s.client.CoreClient, "pods", api.NamespaceAll, fields.Everything()) hostPortStrategy := hostport.StrategyFixed if s.useHostPortEndpoints { hostPortStrategy = hostport.StrategyWildcard } // create scheduler core with all components arranged around it sched := components.New( sc, framework, fcfs, s.client, recorder, schedulerProcess.Terminal(), s.mux, lw, podtask.Config{ DefaultPodRoles: s.defaultPodRoles, FrameworkRoles: s.frameworkRoles, GenerateTaskDiscoveryEnabled: s.generateTaskDiscovery, HostPortStrategy: hostPortStrategy, Prototype: eiPrototype, }, s.defaultContainerCPULimit, s.defaultContainerMemLimit, ) runtime.On(framework.Registration(), func() { sched.Run(schedulerProcess.Terminal()) }) runtime.On(framework.Registration(), s.newServiceWriter(publishedAddress, schedulerProcess.Terminal())) runtime.On(framework.Registration(), func() { nodeCtl.Run(schedulerProcess.Terminal()) }) driverFactory := ha.DriverFactory(func() (drv bindings.SchedulerDriver, err error) { log.V(1).Infoln("performing deferred initialization") if err = framework.Init(sched, schedulerProcess.Master(), s.mux); err != nil { return nil, fmt.Errorf("failed to initialize pod scheduler: %v", err) } log.V(1).Infoln("deferred init complete") if s.failoverTimeout > 0 { // defer obtaining framework ID to prevent multiple schedulers // from overwriting each other's framework IDs var frameworkID string frameworkID, err = frameworkIDStorage.Get(context.TODO()) if err != nil { return nil, fmt.Errorf("failed to fetch framework ID from storage: %v", err) } if frameworkID != "" { log.Infof("configuring FrameworkInfo with ID found in storage: %q", frameworkID) dconfig.Framework.Id = &mesos.FrameworkID{Value: &frameworkID} } else { log.V(1).Infof("did not find framework ID in storage") } } else { // TODO(jdef) this is a hack, really for development, to simplify clean up of old framework IDs frameworkIDStorage.Remove(context.TODO()) } log.V(1).Infoln("constructing mesos scheduler driver") drv, err = bindings.NewMesosSchedulerDriver(*dconfig) if err != nil { return nil, fmt.Errorf("failed to construct scheduler driver: %v", err) } log.V(1).Infoln("constructed mesos scheduler driver:", drv) s.setDriver(drv) return drv, nil }) return schedulerProcess, driverFactory, etcdClient, eiPrototype.GetExecutorId() }
func (s *SchedulerServer) bootstrap(hks hyperkube.Interface, sc *schedcfg.Config) (*ha.SchedulerProcess, ha.DriverFactory, tools.EtcdClient, *uid.UID) { s.FrameworkName = strings.TrimSpace(s.FrameworkName) if s.FrameworkName == "" { log.Fatalf("framework-name must be a non-empty string") } s.FrameworkWebURI = strings.TrimSpace(s.FrameworkWebURI) metrics.Register() runtime.Register() s.mux.Handle("/metrics", prometheus.Handler()) healthz.InstallHandler(s.mux) if (s.EtcdConfigFile != "" && len(s.EtcdServerList) != 0) || (s.EtcdConfigFile == "" && len(s.EtcdServerList) == 0) { log.Fatalf("specify either --etcd-servers or --etcd-config") } if len(s.APIServerList) < 1 { log.Fatal("No api servers specified.") } client, err := s.createAPIServerClient() if err != nil { log.Fatalf("Unable to make apiserver client: %v", err) } s.client = client if s.ReconcileCooldown < defaultReconcileCooldown { s.ReconcileCooldown = defaultReconcileCooldown log.Warningf("user-specified reconcile cooldown too small, defaulting to %v", s.ReconcileCooldown) } executor, eid, err := s.prepareExecutorInfo(hks) if err != nil { log.Fatalf("misconfigured executor: %v", err) } // TODO(jdef): remove the dependency on etcd as soon as // (1) the generic config store is available for the FrameworkId storage // (2) the generic master election is provided by the apiserver // Compare docs/proposals/high-availability.md etcdClient, err := newEtcd(s.EtcdConfigFile, s.EtcdServerList) if err != nil { log.Fatalf("misconfigured etcd: %v", err) } as := scheduler.NewAllocationStrategy( podtask.NewDefaultPredicate( s.DefaultContainerCPULimit, s.DefaultContainerMemLimit, ), podtask.NewDefaultProcurement( s.DefaultContainerCPULimit, s.DefaultContainerMemLimit, ), ) // downgrade allocation strategy if user disables "account-for-pod-resources" if !s.AccountForPodResources { as = scheduler.NewAllocationStrategy( podtask.DefaultMinimalPredicate, podtask.DefaultMinimalProcurement) } // mirror all nodes into the nodeStore nodesClient, err := s.createAPIServerClient() if err != nil { log.Fatalf("Cannot create client to watch nodes: %v", err) } nodeStore := cache.NewStore(cache.MetaNamespaceKeyFunc) nodeLW := cache.NewListWatchFromClient(nodesClient, "nodes", api.NamespaceAll, fields.Everything()) cache.NewReflector(nodeLW, &api.Node{}, nodeStore, s.nodeRelistPeriod).Run() lookupNode := func(hostName string) *api.Node { n, _, _ := nodeStore.GetByKey(hostName) // ignore error and return nil then if n == nil { return nil } return n.(*api.Node) } fcfs := scheduler.NewFCFSPodScheduler(as, lookupNode) mesosPodScheduler := scheduler.New(scheduler.Config{ Schedcfg: *sc, Executor: executor, Scheduler: fcfs, Client: client, EtcdClient: etcdClient, FailoverTimeout: s.FailoverTimeout, ReconcileInterval: s.ReconcileInterval, ReconcileCooldown: s.ReconcileCooldown, LookupNode: lookupNode, }) masterUri := s.MesosMaster info, cred, err := s.buildFrameworkInfo() if err != nil { log.Fatalf("Misconfigured mesos framework: %v", err) } schedulerProcess := ha.New(mesosPodScheduler) dconfig := &bindings.DriverConfig{ Scheduler: schedulerProcess, Framework: info, Master: masterUri, Credential: cred, BindingAddress: s.Address, BindingPort: uint16(s.DriverPort), HostnameOverride: s.HostnameOverride, WithAuthContext: func(ctx context.Context) context.Context { ctx = auth.WithLoginProvider(ctx, s.MesosAuthProvider) ctx = sasl.WithBindingAddress(ctx, s.Address) return ctx }, } kpl := scheduler.NewPlugin(mesosPodScheduler.NewDefaultPluginConfig(schedulerProcess.Terminal(), s.mux)) runtime.On(mesosPodScheduler.Registration(), func() { kpl.Run(schedulerProcess.Terminal()) }) runtime.On(mesosPodScheduler.Registration(), s.newServiceWriter(schedulerProcess.Terminal())) driverFactory := ha.DriverFactory(func() (drv bindings.SchedulerDriver, err error) { log.V(1).Infoln("performing deferred initialization") if err = mesosPodScheduler.Init(schedulerProcess.Master(), kpl, s.mux); err != nil { return nil, fmt.Errorf("failed to initialize pod scheduler: %v", err) } log.V(1).Infoln("deferred init complete") // defer obtaining framework ID to prevent multiple schedulers // from overwriting each other's framework IDs dconfig.Framework.Id, err = s.fetchFrameworkID(etcdClient) if err != nil { return nil, fmt.Errorf("failed to fetch framework ID from etcd: %v", err) } log.V(1).Infoln("constructing mesos scheduler driver") drv, err = bindings.NewMesosSchedulerDriver(*dconfig) if err != nil { return nil, fmt.Errorf("failed to construct scheduler driver: %v", err) } log.V(1).Infoln("constructed mesos scheduler driver:", drv) s.setDriver(drv) return drv, nil }) return schedulerProcess, driverFactory, etcdClient, eid }
func (s *SchedulerServer) bootstrap(hks hyperkube.Interface, sc *schedcfg.Config) (*ha.SchedulerProcess, ha.DriverFactory, tools.EtcdClient, *uid.UID) { s.FrameworkName = strings.TrimSpace(s.FrameworkName) if s.FrameworkName == "" { log.Fatalf("framework-name must be a non-empty string") } s.FrameworkWebURI = strings.TrimSpace(s.FrameworkWebURI) metrics.Register() runtime.Register() s.mux.Handle("/metrics", prometheus.Handler()) if (s.EtcdConfigFile != "" && len(s.EtcdServerList) != 0) || (s.EtcdConfigFile == "" && len(s.EtcdServerList) == 0) { log.Fatalf("specify either --etcd-servers or --etcd-config") } if len(s.APIServerList) < 1 { log.Fatal("No api servers specified.") } client, err := s.createAPIServerClient() if err != nil { log.Fatalf("Unable to make apiserver client: %v", err) } s.client = client if s.ReconcileCooldown < defaultReconcileCooldown { s.ReconcileCooldown = defaultReconcileCooldown log.Warningf("user-specified reconcile cooldown too small, defaulting to %v", s.ReconcileCooldown) } executor, eid, err := s.prepareExecutorInfo(hks) if err != nil { log.Fatalf("misconfigured executor: %v", err) } // TODO(jdef): remove the dependency on etcd as soon as // (1) the generic config store is available for the FrameworkId storage // (2) the generic master election is provided by the apiserver // Compare docs/proposals/high-availability.md etcdClient, err := newEtcd(s.EtcdConfigFile, s.EtcdServerList) if err != nil { log.Fatalf("misconfigured etcd: %v", err) } mesosPodScheduler := scheduler.New(scheduler.Config{ Schedcfg: *sc, Executor: executor, ScheduleFunc: scheduler.FCFSScheduleFunc, Client: client, EtcdClient: etcdClient, FailoverTimeout: s.FailoverTimeout, ReconcileInterval: s.ReconcileInterval, ReconcileCooldown: s.ReconcileCooldown, DefaultContainerCPULimit: s.DefaultContainerCPULimit, DefaultContainerMemLimit: s.DefaultContainerMemLimit, }) masterUri := s.MesosMaster info, cred, err := s.buildFrameworkInfo() if err != nil { log.Fatalf("Misconfigured mesos framework: %v", err) } schedulerProcess := ha.New(mesosPodScheduler) dconfig := &bindings.DriverConfig{ Scheduler: schedulerProcess, Framework: info, Master: masterUri, Credential: cred, BindingAddress: net.IP(s.Address), BindingPort: uint16(s.DriverPort), HostnameOverride: s.HostnameOverride, WithAuthContext: func(ctx context.Context) context.Context { ctx = auth.WithLoginProvider(ctx, s.MesosAuthProvider) ctx = sasl.WithBindingAddress(ctx, net.IP(s.Address)) return ctx }, } kpl := scheduler.NewPlugin(mesosPodScheduler.NewDefaultPluginConfig(schedulerProcess.Terminal(), s.mux)) runtime.On(mesosPodScheduler.Registration(), func() { kpl.Run(schedulerProcess.Terminal()) }) runtime.On(mesosPodScheduler.Registration(), s.newServiceWriter(schedulerProcess.Terminal())) driverFactory := ha.DriverFactory(func() (drv bindings.SchedulerDriver, err error) { log.V(1).Infoln("performing deferred initialization") if err = mesosPodScheduler.Init(schedulerProcess.Master(), kpl, s.mux); err != nil { return nil, fmt.Errorf("failed to initialize pod scheduler: %v", err) } log.V(1).Infoln("deferred init complete") // defer obtaining framework ID to prevent multiple schedulers // from overwriting each other's framework IDs dconfig.Framework.Id, err = s.fetchFrameworkID(etcdClient) if err != nil { return nil, fmt.Errorf("failed to fetch framework ID from etcd: %v", err) } log.V(1).Infoln("constructing mesos scheduler driver") drv, err = bindings.NewMesosSchedulerDriver(*dconfig) if err != nil { return nil, fmt.Errorf("failed to construct scheduler driver: %v", err) } log.V(1).Infoln("constructed mesos scheduler driver:", drv) s.setDriver(drv) return drv, nil }) return schedulerProcess, driverFactory, etcdClient, eid }
func (s *SchedulerServer) bootstrap(hks hyperkube.Interface, sc *schedcfg.Config) (*ha.SchedulerProcess, ha.DriverFactory, *etcd.Client, *mesos.ExecutorID) { s.frameworkName = strings.TrimSpace(s.frameworkName) if s.frameworkName == "" { log.Fatalf("framework-name must be a non-empty string") } s.frameworkWebURI = strings.TrimSpace(s.frameworkWebURI) metrics.Register() runtime.Register() s.mux.Handle("/metrics", prometheus.Handler()) healthz.InstallHandler(s.mux) if (s.etcdConfigFile != "" && len(s.etcdServerList) != 0) || (s.etcdConfigFile == "" && len(s.etcdServerList) == 0) { log.Fatalf("specify either --etcd-servers or --etcd-config") } if len(s.apiServerList) < 1 { log.Fatal("No api servers specified.") } client, err := s.createAPIServerClient() if err != nil { log.Fatalf("Unable to make apiserver client: %v", err) } s.client = client if s.reconcileCooldown < defaultReconcileCooldown { s.reconcileCooldown = defaultReconcileCooldown log.Warningf("user-specified reconcile cooldown too small, defaulting to %v", s.reconcileCooldown) } eiPrototype, err := s.prepareExecutorInfo(hks) if err != nil { log.Fatalf("misconfigured executor: %v", err) } // TODO(jdef): remove the dependency on etcd as soon as // (1) the generic config store is available for the FrameworkId storage // (2) the generic master election is provided by the apiserver // Compare docs/proposals/high-availability.md etcdClient, err := newEtcd(s.etcdConfigFile, s.etcdServerList) if err != nil { log.Fatalf("misconfigured etcd: %v", err) } // mirror all nodes into the nodeStore var eiRegistry executorinfo.Registry nodesClient, err := s.createAPIServerClient() if err != nil { log.Fatalf("Cannot create client to watch nodes: %v", err) } nodeLW := cache.NewListWatchFromClient(nodesClient, "nodes", api.NamespaceAll, fields.Everything()) nodeStore, nodeCtl := controllerfw.NewInformer(nodeLW, &api.Node{}, s.nodeRelistPeriod, &controllerfw.ResourceEventHandlerFuncs{ DeleteFunc: func(obj interface{}) { node := obj.(*api.Node) if eiRegistry != nil { log.V(2).Infof("deleting node %q from registry", node.Name) eiRegistry.Invalidate(node.Name) } }, }) lookupNode := func(hostName string) *api.Node { n, _, _ := nodeStore.GetByKey(hostName) // ignore error and return nil then if n == nil { return nil } return n.(*api.Node) } execInfoCache, err := executorinfo.NewCache(defaultExecutorInfoCacheSize) if err != nil { log.Fatalf("cannot create executorinfo cache: %v", err) } eiRegistry, err = executorinfo.NewRegistry(lookupNode, eiPrototype, execInfoCache) if err != nil { log.Fatalf("cannot create executorinfo registry: %v", err) } pr := podtask.NewDefaultProcurement(eiPrototype, eiRegistry) fcfs := podschedulers.NewFCFSPodScheduler(pr, lookupNode) framework := framework.New(framework.Config{ SchedulerConfig: *sc, Client: client, FailoverTimeout: s.failoverTimeout, ReconcileInterval: s.reconcileInterval, ReconcileCooldown: s.reconcileCooldown, LookupNode: lookupNode, StoreFrameworkId: func(id string) { // TODO(jdef): port FrameworkId store to generic Kubernetes config store as soon as available _, err := etcdClient.Set(meta.FrameworkIDKey, id, uint64(s.failoverTimeout)) if err != nil { log.Errorf("failed to renew frameworkId TTL: %v", err) } }, ExecutorId: eiPrototype.GetExecutorId(), }) masterUri := s.mesosMaster info, cred, err := s.buildFrameworkInfo() if err != nil { log.Fatalf("Misconfigured mesos framework: %v", err) } schedulerProcess := ha.New(framework) dconfig := &bindings.DriverConfig{ Scheduler: schedulerProcess, Framework: info, Master: masterUri, Credential: cred, BindingAddress: s.address, BindingPort: uint16(s.driverPort), HostnameOverride: s.hostnameOverride, WithAuthContext: func(ctx context.Context) context.Context { ctx = auth.WithLoginProvider(ctx, s.mesosAuthProvider) ctx = sasl.WithBindingAddress(ctx, s.address) return ctx }, } // create event recorder sending events to the "" namespace of the apiserver broadcaster := record.NewBroadcaster() recorder := broadcaster.NewRecorder(api.EventSource{Component: "scheduler"}) broadcaster.StartRecordingToSink(client.Events("")) // create scheduler core with all components arranged around it lw := cache.NewListWatchFromClient(client, "pods", api.NamespaceAll, fields.Everything()) sched := components.New( sc, framework, fcfs, client, recorder, schedulerProcess.Terminal(), s.mux, lw, eiPrototype, s.mesosRoles, s.defaultContainerCPULimit, s.defaultContainerMemLimit, ) runtime.On(framework.Registration(), func() { sched.Run(schedulerProcess.Terminal()) }) runtime.On(framework.Registration(), s.newServiceWriter(schedulerProcess.Terminal())) runtime.On(framework.Registration(), func() { nodeCtl.Run(schedulerProcess.Terminal()) }) driverFactory := ha.DriverFactory(func() (drv bindings.SchedulerDriver, err error) { log.V(1).Infoln("performing deferred initialization") if err = framework.Init(sched, schedulerProcess.Master(), s.mux); err != nil { return nil, fmt.Errorf("failed to initialize pod scheduler: %v", err) } log.V(1).Infoln("deferred init complete") // defer obtaining framework ID to prevent multiple schedulers // from overwriting each other's framework IDs dconfig.Framework.Id, err = s.fetchFrameworkID(etcdClient) if err != nil { return nil, fmt.Errorf("failed to fetch framework ID from etcd: %v", err) } log.V(1).Infoln("constructing mesos scheduler driver") drv, err = bindings.NewMesosSchedulerDriver(*dconfig) if err != nil { return nil, fmt.Errorf("failed to construct scheduler driver: %v", err) } log.V(1).Infoln("constructed mesos scheduler driver:", drv) s.setDriver(drv) return drv, nil }) return schedulerProcess, driverFactory, etcdClient, eiPrototype.GetExecutorId() }