Example #1
0
// New creates a new kubernetes executor.
func New(config Config) *Executor {
	launchGracePeriod := config.LaunchGracePeriod
	if launchGracePeriod == 0 {
		// this is the equivalent of saying "the timer never expires" and simplies nil
		// timer checks elsewhere in the code. it's a little hacky but less code to
		// maintain that alternative approaches.
		launchGracePeriod = time.Duration(math.MaxInt64)
	}
	k := &Executor{
		state:                disconnectedState,
		terminate:            make(chan struct{}),
		outgoing:             make(chan func() (mesos.Status, error), 1024),
		dockerClient:         config.Docker,
		suicideTimeout:       config.SuicideTimeout,
		kubeletFinished:      config.KubeletFinished,
		suicideWatch:         &suicideTimer{},
		shutdownAlert:        config.ShutdownAlert,
		exitFunc:             config.ExitFunc,
		staticPodsConfigPath: config.StaticPodsConfigPath,
		launchGracePeriod:    launchGracePeriod,
		nodeInfos:            config.NodeInfos,
		initCompleted:        make(chan struct{}),
		registry:             config.Registry,
		kubeAPI:              &clientAPIWrapper{config.APIClient},
		nodeAPI:              &clientAPIWrapper{config.APIClient},
	}

	runtime.On(k.initCompleted, k.runSendLoop)

	k.watcher = newWatcher(k.registry.watch())
	runtime.On(k.initCompleted, k.watcher.run)

	return k
}
Example #2
0
// New creates a new kubernetes executor.
func New(config Config) *Executor {
	launchGracePeriod := config.LaunchGracePeriod
	if launchGracePeriod == 0 {
		// this is the equivalent of saying "the timer never expires" and simplies nil
		// timer checks elsewhere in the code. it's a little hacky but less code to
		// maintain that alternative approaches.
		launchGracePeriod = time.Duration(math.MaxInt64)
	}
	k := &Executor{
		updateChan:           config.Updates,
		state:                disconnectedState,
		tasks:                make(map[string]*kuberTask),
		pods:                 make(map[string]*api.Pod),
		client:               config.APIClient,
		terminate:            make(chan struct{}),
		outgoing:             make(chan func() (mesos.Status, error), 1024),
		dockerClient:         config.Docker,
		suicideTimeout:       config.SuicideTimeout,
		kubeletFinished:      config.KubeletFinished,
		suicideWatch:         &suicideTimer{},
		shutdownAlert:        config.ShutdownAlert,
		exitFunc:             config.ExitFunc,
		podStatusFunc:        config.PodStatusFunc,
		staticPodsConfigPath: config.StaticPodsConfigPath,
		launchGracePeriod:    launchGracePeriod,
		nodeInfos:            config.NodeInfos,
		initCompleted:        make(chan struct{}),
	}
	runtime.On(k.initCompleted, k.runSendLoop)

	po := newPodObserver(config.PodLW, k.updateTask, k.terminate)
	runtime.On(k.initCompleted, po.run)

	return k
}
func New(c *config.Config, fw framework.Framework, ps podschedulers.PodScheduler,
	client *client.Client, recorder record.EventRecorder, terminate <-chan struct{}, mux *http.ServeMux, lw *cache.ListWatch) scheduler.Scheduler {

	core := &sched{
		framework:    fw,
		taskRegistry: podtask.NewInMemoryRegistry(),
	}

	// Watch and queue pods that need scheduling.
	podUpdatesBypass := make(chan queue.Entry, c.UpdatesBacklog)
	podUpdates := &podStoreAdapter{queue.NewHistorical(podUpdatesBypass)}
	reflector := cache.NewReflector(lw, &api.Pod{}, podUpdates, 0)

	q := queuer.New(queue.NewDelayFIFO(), podUpdates)

	algorithm := algorithm.New(core, podUpdates, ps)

	podDeleter := deleter.New(core, q)

	core.podReconciler = podreconciler.New(core, client, q, podDeleter)

	bo := backoff.New(c.InitialPodBackoff.Duration, c.MaxPodBackoff.Duration)
	newBC := func(podKey string) queue.BreakChan {
		return queue.BreakChan(core.Offers().Listen(podKey, func(offer *mesos.Offer) bool {
			core.Lock()
			defer core.Unlock()
			switch task, state := core.Tasks().ForPod(podKey); state {
			case podtask.StatePending:
				// Assess fitness of pod with the current offer. The scheduler normally
				// "backs off" when it can't find an offer that matches up with a pod.
				// The backoff period for a pod can terminate sooner if an offer becomes
				// available that matches up.
				return !task.Has(podtask.Launched) && ps.FitPredicate()(task, offer, nil)
			default:
				// no point in continuing to check for matching offers
				return true
			}
		}))
	}
	errorHandler := errorhandler.New(core, bo, q, newBC)

	binder := binder.New(core)

	startLatch := make(chan struct{})

	runtime.On(startLatch, func() {
		reflector.Run() // TODO(jdef) should listen for termination
		podDeleter.Run(podUpdatesBypass, terminate)
		q.Run(terminate)

		q.InstallDebugHandlers(mux)
		podtask.InstallDebugHandlers(core.Tasks(), mux)
	})

	core.controller = controller.New(client, algorithm, recorder, q.Yield, errorHandler.Error, binder, startLatch)
	return core
}
Example #4
0
func (k *KubernetesScheduler) NewPluginConfig(terminate <-chan struct{}, mux *http.ServeMux,
	podsWatcher *cache.ListWatch) *PluginConfig {

	// Watch and queue pods that need scheduling.
	updates := make(chan queue.Entry, k.schedcfg.UpdatesBacklog)
	podUpdates := &podStoreAdapter{queue.NewHistorical(updates)}
	reflector := cache.NewReflector(podsWatcher, &api.Pod{}, podUpdates, 0)

	// lock that guards critial sections that involve transferring pods from
	// the store (cache) to the scheduling queue; its purpose is to maintain
	// an ordering (vs interleaving) of operations that's easier to reason about.
	kapi := &k8smScheduler{internal: k}
	q := newQueuer(podUpdates)
	podDeleter := &deleter{
		api: kapi,
		qr:  q,
	}
	eh := &errorHandler{
		api:     kapi,
		backoff: backoff.New(k.schedcfg.InitialPodBackoff.Duration, k.schedcfg.MaxPodBackoff.Duration),
		qr:      q,
	}
	startLatch := make(chan struct{})
	eventBroadcaster := record.NewBroadcaster()
	runtime.On(startLatch, func() {
		eventBroadcaster.StartRecordingToSink(k.client.Events(""))
		reflector.Run() // TODO(jdef) should listen for termination
		podDeleter.Run(updates, terminate)
		q.Run(terminate)

		q.installDebugHandlers(mux)
		podtask.InstallDebugHandlers(k.taskRegistry, mux)
	})
	return &PluginConfig{
		Config: &plugin.Config{
			MinionLister: nil,
			Algorithm: &kubeScheduler{
				api:                      kapi,
				podUpdates:               podUpdates,
				defaultContainerCPULimit: k.defaultContainerCPULimit,
				defaultContainerMemLimit: k.defaultContainerMemLimit,
			},
			Binder:   &binder{api: kapi},
			NextPod:  q.yield,
			Error:    eh.handleSchedulingError,
			Recorder: eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"}),
		},
		api:      kapi,
		client:   k.client,
		qr:       q,
		deleter:  podDeleter,
		starting: startLatch,
	}
}
Example #5
0
func TestProc_manyEndings(t *testing.T) {
	p := New()
	const COUNT = 20
	var wg sync.WaitGroup
	wg.Add(COUNT)
	for i := 0; i < COUNT; i++ {
		runtime.On(p.End(), wg.Done)
	}
	wg.Wait()
	<-p.Done()
}
Example #6
0
func TestProc_manyEndings(t *testing.T) {
	p := New()
	const COUNT = 20
	var wg sync.WaitGroup
	wg.Add(COUNT)
	for i := 0; i < COUNT; i++ {
		runtime.On(p.End(), wg.Done)
	}
	fatalAfter(t, runtime.After(wg.Wait), 5*time.Second, "timed out waiting for loose End()s")
	fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death")
}
Example #7
0
func New(sched bindings.Scheduler) *SchedulerProcess {
	p := &SchedulerProcess{
		Process:   proc.New(),
		Scheduler: sched,
		stage:     initStage,
		elected:   make(chan struct{}),
		failover:  make(chan struct{}),
		standby:   make(chan struct{}),
		fin:       make(chan struct{}),
	}
	runtime.On(p.Running(), p.begin)
	return p
}
Example #8
0
// watch the scheduler process for failover signals and properly handle such. may never return.
func (s *SchedulerServer) awaitFailover(schedulerProcess schedulerProcessInterface, handler func() error) error {

	// we only want to return the first error (if any), everyone else can block forever
	errCh := make(chan error, 1)
	doFailover := func() error {
		// we really don't expect handler to return, if it does something went seriously wrong
		err := handler()
		if err != nil {
			defer schedulerProcess.End()
			err = fmt.Errorf("failover failed, scheduler will terminate: %v", err)
		}
		return err
	}

	// guard for failover signal processing, first signal processor wins
	failoverLatch := &runtime.Latch{}
	runtime.On(schedulerProcess.Terminal(), func() {
		if !failoverLatch.Acquire() {
			log.V(1).Infof("scheduler process ending, already failing over")
			select {}
		}
		var err error
		defer func() { errCh <- err }()
		select {
		case <-schedulerProcess.Failover():
			err = doFailover()
		default:
			if s.ha {
				err = fmt.Errorf("ha scheduler exiting instead of failing over")
			} else {
				log.Infof("exiting scheduler")
			}
		}
	})
	runtime.OnOSSignal(makeFailoverSigChan(), func(_ os.Signal) {
		if !failoverLatch.Acquire() {
			log.V(1).Infof("scheduler process signalled, already failing over")
			select {}
		}
		errCh <- doFailover()
	})
	return <-errCh
}
Example #9
0
func (s *SchedulerServer) bootstrap(hks hyperkube.Interface, sc *schedcfg.Config) (*ha.SchedulerProcess, ha.DriverFactory, etcd.Client, *mesos.ExecutorID) {
	s.frameworkName = strings.TrimSpace(s.frameworkName)
	if s.frameworkName == "" {
		log.Fatalf("framework-name must be a non-empty string")
	}
	s.frameworkWebURI = strings.TrimSpace(s.frameworkWebURI)

	metrics.Register()
	runtime.Register()
	s.mux.Handle("/metrics", prometheus.Handler())
	healthz.InstallHandler(s.mux)

	if len(s.etcdServerList) == 0 {
		log.Fatalf("specify --etcd-servers must be specified")
	}

	if len(s.apiServerList) < 1 {
		log.Fatal("No api servers specified.")
	}

	clientConfig, err := s.createAPIServerClientConfig()
	if err != nil {
		log.Fatalf("Unable to make apiserver client config: %v", err)
	}
	s.client, err = clientset.NewForConfig(clientConfig)
	if err != nil {
		log.Fatalf("Unable to make apiserver clientset: %v", err)
	}

	if s.reconcileCooldown < defaultReconcileCooldown {
		s.reconcileCooldown = defaultReconcileCooldown
		log.Warningf("user-specified reconcile cooldown too small, defaulting to %v", s.reconcileCooldown)
	}

	eiPrototype, err := s.prepareExecutorInfo(hks)
	if err != nil {
		log.Fatalf("misconfigured executor: %v", err)
	}

	// TODO(jdef): remove the dependency on etcd as soon as
	// (1) the generic config store is available for the FrameworkId storage
	// (2) the generic master election is provided by the apiserver
	// Compare docs/proposals/high-availability.md
	etcdClient, err := newEtcd(s.etcdServerList)
	if err != nil {
		log.Fatalf("misconfigured etcd: %v", err)
	}
	keysAPI := etcd.NewKeysAPI(etcdClient)

	// mirror all nodes into the nodeStore
	var eiRegistry executorinfo.Registry
	nodesClientConfig := *clientConfig
	nodesClient, err := clientset.NewForConfig(&nodesClientConfig)
	if err != nil {
		log.Fatalf("Cannot create client to watch nodes: %v", err)
	}
	nodeLW := cache.NewListWatchFromClient(nodesClient.CoreClient, "nodes", api.NamespaceAll, fields.Everything())
	nodeStore, nodeCtl := controllerfw.NewInformer(nodeLW, &api.Node{}, s.nodeRelistPeriod, &controllerfw.ResourceEventHandlerFuncs{
		DeleteFunc: func(obj interface{}) {
			if eiRegistry != nil {
				// TODO(jdef) use controllerfw.DeletionHandlingMetaNamespaceKeyFunc at some point?
				nodeName := ""
				if tombstone, ok := obj.(cache.DeletedFinalStateUnknown); ok {
					nodeName = tombstone.Key
				} else if node, ok := obj.(*api.Node); ok {
					nodeName = node.Name
				}
				if nodeName != "" {
					log.V(2).Infof("deleting node %q from registry", nodeName)
					eiRegistry.Invalidate(nodeName)
				}
			}
		},
	})

	lookupNode := func(hostName string) *api.Node {
		n, _, _ := nodeStore.GetByKey(hostName) // ignore error and return nil then
		if n == nil {
			return nil
		}
		return n.(*api.Node)
	}

	execInfoCache, err := executorinfo.NewCache(defaultExecutorInfoCacheSize)
	if err != nil {
		log.Fatalf("cannot create executorinfo cache: %v", err)
	}

	eiRegistry, err = executorinfo.NewRegistry(lookupNode, eiPrototype, execInfoCache)
	if err != nil {
		log.Fatalf("cannot create executorinfo registry: %v", err)
	}

	pr := podtask.NewDefaultProcurement(eiPrototype, eiRegistry)
	fcfs := podschedulers.NewFCFSPodScheduler(pr, lookupNode)
	frameworkIDStorage, err := s.frameworkIDStorage(keysAPI)
	if err != nil {
		log.Fatalf("cannot init framework ID storage: %v", err)
	}
	framework := framework.New(framework.Config{
		SchedulerConfig:   *sc,
		Client:            s.client,
		FailoverTimeout:   s.failoverTimeout,
		ReconcileInterval: s.reconcileInterval,
		ReconcileCooldown: s.reconcileCooldown,
		LookupNode:        lookupNode,
		StoreFrameworkId:  frameworkIDStorage.Set,
		ExecutorId:        eiPrototype.GetExecutorId(),
	})
	masterUri := s.mesosMaster
	info, cred, err := s.buildFrameworkInfo()
	if err != nil {
		log.Fatalf("Misconfigured mesos framework: %v", err)
	}

	schedulerProcess := ha.New(framework)

	// try publishing on the same IP as the slave
	var publishedAddress net.IP
	if libprocessIP := os.Getenv("LIBPROCESS_IP"); libprocessIP != "" {
		publishedAddress = net.ParseIP(libprocessIP)
	}
	if publishedAddress != nil {
		log.V(1).Infof("driver will publish address %v", publishedAddress)
	}

	dconfig := &bindings.DriverConfig{
		Scheduler:        schedulerProcess,
		Framework:        info,
		Master:           masterUri,
		Credential:       cred,
		BindingAddress:   s.address,
		BindingPort:      uint16(s.driverPort),
		PublishedAddress: publishedAddress,
		HostnameOverride: s.hostnameOverride,
		WithAuthContext: func(ctx context.Context) context.Context {
			ctx = auth.WithLoginProvider(ctx, s.mesosAuthProvider)
			ctx = sasl.WithBindingAddress(ctx, s.address)
			return ctx
		},
	}

	// create event recorder sending events to the "" namespace of the apiserver
	eventsClientConfig := *clientConfig
	eventsClient, err := clientset.NewForConfig(&eventsClientConfig)
	if err != nil {
		log.Fatalf("Invalid API configuration: %v", err)
	}
	broadcaster := record.NewBroadcaster()
	recorder := broadcaster.NewRecorder(api.EventSource{Component: api.DefaultSchedulerName})
	broadcaster.StartLogging(log.Infof)
	broadcaster.StartRecordingToSink(&unversionedcore.EventSinkImpl{Interface: eventsClient.Events("")})

	lw := cache.NewListWatchFromClient(s.client.CoreClient, "pods", api.NamespaceAll, fields.Everything())

	hostPortStrategy := hostport.StrategyFixed
	if s.useHostPortEndpoints {
		hostPortStrategy = hostport.StrategyWildcard
	}

	// create scheduler core with all components arranged around it
	sched := components.New(
		sc,
		framework,
		fcfs,
		s.client,
		recorder,
		schedulerProcess.Terminal(),
		s.mux,
		lw,
		podtask.Config{
			DefaultPodRoles:              s.defaultPodRoles,
			FrameworkRoles:               s.frameworkRoles,
			GenerateTaskDiscoveryEnabled: s.generateTaskDiscovery,
			HostPortStrategy:             hostPortStrategy,
			Prototype:                    eiPrototype,
		},
		s.defaultContainerCPULimit,
		s.defaultContainerMemLimit,
	)

	runtime.On(framework.Registration(), func() { sched.Run(schedulerProcess.Terminal()) })
	runtime.On(framework.Registration(), s.newServiceWriter(publishedAddress, schedulerProcess.Terminal()))
	runtime.On(framework.Registration(), func() { nodeCtl.Run(schedulerProcess.Terminal()) })

	driverFactory := ha.DriverFactory(func() (drv bindings.SchedulerDriver, err error) {
		log.V(1).Infoln("performing deferred initialization")
		if err = framework.Init(sched, schedulerProcess.Master(), s.mux); err != nil {
			return nil, fmt.Errorf("failed to initialize pod scheduler: %v", err)
		}

		log.V(1).Infoln("deferred init complete")
		if s.failoverTimeout > 0 {
			// defer obtaining framework ID to prevent multiple schedulers
			// from overwriting each other's framework IDs
			var frameworkID string
			frameworkID, err = frameworkIDStorage.Get(context.TODO())
			if err != nil {
				return nil, fmt.Errorf("failed to fetch framework ID from storage: %v", err)
			}
			if frameworkID != "" {
				log.Infof("configuring FrameworkInfo with ID found in storage: %q", frameworkID)
				dconfig.Framework.Id = &mesos.FrameworkID{Value: &frameworkID}
			} else {
				log.V(1).Infof("did not find framework ID in storage")
			}
		} else {
			// TODO(jdef) this is a hack, really for development, to simplify clean up of old framework IDs
			frameworkIDStorage.Remove(context.TODO())
		}

		log.V(1).Infoln("constructing mesos scheduler driver")
		drv, err = bindings.NewMesosSchedulerDriver(*dconfig)
		if err != nil {
			return nil, fmt.Errorf("failed to construct scheduler driver: %v", err)
		}

		log.V(1).Infoln("constructed mesos scheduler driver:", drv)
		s.setDriver(drv)
		return drv, nil
	})

	return schedulerProcess, driverFactory, etcdClient, eiPrototype.GetExecutorId()
}
Example #10
0
func (s *SchedulerServer) bootstrap(hks hyperkube.Interface, sc *schedcfg.Config) (*ha.SchedulerProcess, ha.DriverFactory, tools.EtcdClient, *uid.UID) {

	s.FrameworkName = strings.TrimSpace(s.FrameworkName)
	if s.FrameworkName == "" {
		log.Fatalf("framework-name must be a non-empty string")
	}
	s.FrameworkWebURI = strings.TrimSpace(s.FrameworkWebURI)

	metrics.Register()
	runtime.Register()
	s.mux.Handle("/metrics", prometheus.Handler())
	healthz.InstallHandler(s.mux)

	if (s.EtcdConfigFile != "" && len(s.EtcdServerList) != 0) || (s.EtcdConfigFile == "" && len(s.EtcdServerList) == 0) {
		log.Fatalf("specify either --etcd-servers or --etcd-config")
	}

	if len(s.APIServerList) < 1 {
		log.Fatal("No api servers specified.")
	}

	client, err := s.createAPIServerClient()
	if err != nil {
		log.Fatalf("Unable to make apiserver client: %v", err)
	}
	s.client = client

	if s.ReconcileCooldown < defaultReconcileCooldown {
		s.ReconcileCooldown = defaultReconcileCooldown
		log.Warningf("user-specified reconcile cooldown too small, defaulting to %v", s.ReconcileCooldown)
	}

	executor, eid, err := s.prepareExecutorInfo(hks)
	if err != nil {
		log.Fatalf("misconfigured executor: %v", err)
	}

	// TODO(jdef): remove the dependency on etcd as soon as
	// (1) the generic config store is available for the FrameworkId storage
	// (2) the generic master election is provided by the apiserver
	// Compare docs/proposals/high-availability.md
	etcdClient, err := newEtcd(s.EtcdConfigFile, s.EtcdServerList)
	if err != nil {
		log.Fatalf("misconfigured etcd: %v", err)
	}

	as := scheduler.NewAllocationStrategy(
		podtask.NewDefaultPredicate(
			s.DefaultContainerCPULimit,
			s.DefaultContainerMemLimit,
		),
		podtask.NewDefaultProcurement(
			s.DefaultContainerCPULimit,
			s.DefaultContainerMemLimit,
		),
	)

	// downgrade allocation strategy if user disables "account-for-pod-resources"
	if !s.AccountForPodResources {
		as = scheduler.NewAllocationStrategy(
			podtask.DefaultMinimalPredicate,
			podtask.DefaultMinimalProcurement)
	}

	// mirror all nodes into the nodeStore
	nodesClient, err := s.createAPIServerClient()
	if err != nil {
		log.Fatalf("Cannot create client to watch nodes: %v", err)
	}
	nodeStore := cache.NewStore(cache.MetaNamespaceKeyFunc)
	nodeLW := cache.NewListWatchFromClient(nodesClient, "nodes", api.NamespaceAll, fields.Everything())
	cache.NewReflector(nodeLW, &api.Node{}, nodeStore, s.nodeRelistPeriod).Run()

	lookupNode := func(hostName string) *api.Node {
		n, _, _ := nodeStore.GetByKey(hostName) // ignore error and return nil then
		if n == nil {
			return nil
		}
		return n.(*api.Node)
	}

	fcfs := scheduler.NewFCFSPodScheduler(as, lookupNode)
	mesosPodScheduler := scheduler.New(scheduler.Config{
		Schedcfg:          *sc,
		Executor:          executor,
		Scheduler:         fcfs,
		Client:            client,
		EtcdClient:        etcdClient,
		FailoverTimeout:   s.FailoverTimeout,
		ReconcileInterval: s.ReconcileInterval,
		ReconcileCooldown: s.ReconcileCooldown,
		LookupNode:        lookupNode,
	})

	masterUri := s.MesosMaster
	info, cred, err := s.buildFrameworkInfo()
	if err != nil {
		log.Fatalf("Misconfigured mesos framework: %v", err)
	}

	schedulerProcess := ha.New(mesosPodScheduler)
	dconfig := &bindings.DriverConfig{
		Scheduler:        schedulerProcess,
		Framework:        info,
		Master:           masterUri,
		Credential:       cred,
		BindingAddress:   s.Address,
		BindingPort:      uint16(s.DriverPort),
		HostnameOverride: s.HostnameOverride,
		WithAuthContext: func(ctx context.Context) context.Context {
			ctx = auth.WithLoginProvider(ctx, s.MesosAuthProvider)
			ctx = sasl.WithBindingAddress(ctx, s.Address)
			return ctx
		},
	}

	kpl := scheduler.NewPlugin(mesosPodScheduler.NewDefaultPluginConfig(schedulerProcess.Terminal(), s.mux))
	runtime.On(mesosPodScheduler.Registration(), func() { kpl.Run(schedulerProcess.Terminal()) })
	runtime.On(mesosPodScheduler.Registration(), s.newServiceWriter(schedulerProcess.Terminal()))

	driverFactory := ha.DriverFactory(func() (drv bindings.SchedulerDriver, err error) {
		log.V(1).Infoln("performing deferred initialization")
		if err = mesosPodScheduler.Init(schedulerProcess.Master(), kpl, s.mux); err != nil {
			return nil, fmt.Errorf("failed to initialize pod scheduler: %v", err)
		}
		log.V(1).Infoln("deferred init complete")
		// defer obtaining framework ID to prevent multiple schedulers
		// from overwriting each other's framework IDs
		dconfig.Framework.Id, err = s.fetchFrameworkID(etcdClient)
		if err != nil {
			return nil, fmt.Errorf("failed to fetch framework ID from etcd: %v", err)
		}
		log.V(1).Infoln("constructing mesos scheduler driver")
		drv, err = bindings.NewMesosSchedulerDriver(*dconfig)
		if err != nil {
			return nil, fmt.Errorf("failed to construct scheduler driver: %v", err)
		}
		log.V(1).Infoln("constructed mesos scheduler driver:", drv)
		s.setDriver(drv)
		return drv, nil
	})

	return schedulerProcess, driverFactory, etcdClient, eid
}
Example #11
0
func (s *SchedulerServer) bootstrap(hks hyperkube.Interface, sc *schedcfg.Config) (*ha.SchedulerProcess, ha.DriverFactory, tools.EtcdClient, *uid.UID) {

	s.FrameworkName = strings.TrimSpace(s.FrameworkName)
	if s.FrameworkName == "" {
		log.Fatalf("framework-name must be a non-empty string")
	}
	s.FrameworkWebURI = strings.TrimSpace(s.FrameworkWebURI)

	metrics.Register()
	runtime.Register()
	s.mux.Handle("/metrics", prometheus.Handler())

	if (s.EtcdConfigFile != "" && len(s.EtcdServerList) != 0) || (s.EtcdConfigFile == "" && len(s.EtcdServerList) == 0) {
		log.Fatalf("specify either --etcd-servers or --etcd-config")
	}

	if len(s.APIServerList) < 1 {
		log.Fatal("No api servers specified.")
	}

	client, err := s.createAPIServerClient()
	if err != nil {
		log.Fatalf("Unable to make apiserver client: %v", err)
	}
	s.client = client

	if s.ReconcileCooldown < defaultReconcileCooldown {
		s.ReconcileCooldown = defaultReconcileCooldown
		log.Warningf("user-specified reconcile cooldown too small, defaulting to %v", s.ReconcileCooldown)
	}

	executor, eid, err := s.prepareExecutorInfo(hks)
	if err != nil {
		log.Fatalf("misconfigured executor: %v", err)
	}

	// TODO(jdef): remove the dependency on etcd as soon as
	// (1) the generic config store is available for the FrameworkId storage
	// (2) the generic master election is provided by the apiserver
	// Compare docs/proposals/high-availability.md
	etcdClient, err := newEtcd(s.EtcdConfigFile, s.EtcdServerList)
	if err != nil {
		log.Fatalf("misconfigured etcd: %v", err)
	}

	mesosPodScheduler := scheduler.New(scheduler.Config{
		Schedcfg:                 *sc,
		Executor:                 executor,
		ScheduleFunc:             scheduler.FCFSScheduleFunc,
		Client:                   client,
		EtcdClient:               etcdClient,
		FailoverTimeout:          s.FailoverTimeout,
		ReconcileInterval:        s.ReconcileInterval,
		ReconcileCooldown:        s.ReconcileCooldown,
		DefaultContainerCPULimit: s.DefaultContainerCPULimit,
		DefaultContainerMemLimit: s.DefaultContainerMemLimit,
	})

	masterUri := s.MesosMaster
	info, cred, err := s.buildFrameworkInfo()
	if err != nil {
		log.Fatalf("Misconfigured mesos framework: %v", err)
	}

	schedulerProcess := ha.New(mesosPodScheduler)
	dconfig := &bindings.DriverConfig{
		Scheduler:        schedulerProcess,
		Framework:        info,
		Master:           masterUri,
		Credential:       cred,
		BindingAddress:   net.IP(s.Address),
		BindingPort:      uint16(s.DriverPort),
		HostnameOverride: s.HostnameOverride,
		WithAuthContext: func(ctx context.Context) context.Context {
			ctx = auth.WithLoginProvider(ctx, s.MesosAuthProvider)
			ctx = sasl.WithBindingAddress(ctx, net.IP(s.Address))
			return ctx
		},
	}

	kpl := scheduler.NewPlugin(mesosPodScheduler.NewDefaultPluginConfig(schedulerProcess.Terminal(), s.mux))
	runtime.On(mesosPodScheduler.Registration(), func() { kpl.Run(schedulerProcess.Terminal()) })
	runtime.On(mesosPodScheduler.Registration(), s.newServiceWriter(schedulerProcess.Terminal()))

	driverFactory := ha.DriverFactory(func() (drv bindings.SchedulerDriver, err error) {
		log.V(1).Infoln("performing deferred initialization")
		if err = mesosPodScheduler.Init(schedulerProcess.Master(), kpl, s.mux); err != nil {
			return nil, fmt.Errorf("failed to initialize pod scheduler: %v", err)
		}
		log.V(1).Infoln("deferred init complete")
		// defer obtaining framework ID to prevent multiple schedulers
		// from overwriting each other's framework IDs
		dconfig.Framework.Id, err = s.fetchFrameworkID(etcdClient)
		if err != nil {
			return nil, fmt.Errorf("failed to fetch framework ID from etcd: %v", err)
		}
		log.V(1).Infoln("constructing mesos scheduler driver")
		drv, err = bindings.NewMesosSchedulerDriver(*dconfig)
		if err != nil {
			return nil, fmt.Errorf("failed to construct scheduler driver: %v", err)
		}
		log.V(1).Infoln("constructed mesos scheduler driver:", drv)
		s.setDriver(drv)
		return drv, nil
	})

	return schedulerProcess, driverFactory, etcdClient, eid
}
Example #12
0
func (s *SchedulerServer) bootstrap(hks hyperkube.Interface, sc *schedcfg.Config) (*ha.SchedulerProcess, ha.DriverFactory, *etcd.Client, *mesos.ExecutorID) {
	s.frameworkName = strings.TrimSpace(s.frameworkName)
	if s.frameworkName == "" {
		log.Fatalf("framework-name must be a non-empty string")
	}
	s.frameworkWebURI = strings.TrimSpace(s.frameworkWebURI)

	metrics.Register()
	runtime.Register()
	s.mux.Handle("/metrics", prometheus.Handler())
	healthz.InstallHandler(s.mux)

	if (s.etcdConfigFile != "" && len(s.etcdServerList) != 0) || (s.etcdConfigFile == "" && len(s.etcdServerList) == 0) {
		log.Fatalf("specify either --etcd-servers or --etcd-config")
	}

	if len(s.apiServerList) < 1 {
		log.Fatal("No api servers specified.")
	}

	client, err := s.createAPIServerClient()
	if err != nil {
		log.Fatalf("Unable to make apiserver client: %v", err)
	}
	s.client = client

	if s.reconcileCooldown < defaultReconcileCooldown {
		s.reconcileCooldown = defaultReconcileCooldown
		log.Warningf("user-specified reconcile cooldown too small, defaulting to %v", s.reconcileCooldown)
	}

	eiPrototype, err := s.prepareExecutorInfo(hks)
	if err != nil {
		log.Fatalf("misconfigured executor: %v", err)
	}

	// TODO(jdef): remove the dependency on etcd as soon as
	// (1) the generic config store is available for the FrameworkId storage
	// (2) the generic master election is provided by the apiserver
	// Compare docs/proposals/high-availability.md
	etcdClient, err := newEtcd(s.etcdConfigFile, s.etcdServerList)
	if err != nil {
		log.Fatalf("misconfigured etcd: %v", err)
	}

	// mirror all nodes into the nodeStore
	var eiRegistry executorinfo.Registry
	nodesClient, err := s.createAPIServerClient()
	if err != nil {
		log.Fatalf("Cannot create client to watch nodes: %v", err)
	}
	nodeLW := cache.NewListWatchFromClient(nodesClient, "nodes", api.NamespaceAll, fields.Everything())
	nodeStore, nodeCtl := controllerfw.NewInformer(nodeLW, &api.Node{}, s.nodeRelistPeriod, &controllerfw.ResourceEventHandlerFuncs{
		DeleteFunc: func(obj interface{}) {
			node := obj.(*api.Node)
			if eiRegistry != nil {
				log.V(2).Infof("deleting node %q from registry", node.Name)
				eiRegistry.Invalidate(node.Name)
			}
		},
	})

	lookupNode := func(hostName string) *api.Node {
		n, _, _ := nodeStore.GetByKey(hostName) // ignore error and return nil then
		if n == nil {
			return nil
		}
		return n.(*api.Node)
	}

	execInfoCache, err := executorinfo.NewCache(defaultExecutorInfoCacheSize)
	if err != nil {
		log.Fatalf("cannot create executorinfo cache: %v", err)
	}

	eiRegistry, err = executorinfo.NewRegistry(lookupNode, eiPrototype, execInfoCache)
	if err != nil {
		log.Fatalf("cannot create executorinfo registry: %v", err)
	}

	pr := podtask.NewDefaultProcurement(eiPrototype, eiRegistry)
	fcfs := podschedulers.NewFCFSPodScheduler(pr, lookupNode)

	framework := framework.New(framework.Config{
		SchedulerConfig:   *sc,
		Client:            client,
		FailoverTimeout:   s.failoverTimeout,
		ReconcileInterval: s.reconcileInterval,
		ReconcileCooldown: s.reconcileCooldown,
		LookupNode:        lookupNode,
		StoreFrameworkId: func(id string) {
			// TODO(jdef): port FrameworkId store to generic Kubernetes config store as soon as available
			_, err := etcdClient.Set(meta.FrameworkIDKey, id, uint64(s.failoverTimeout))
			if err != nil {
				log.Errorf("failed to renew frameworkId TTL: %v", err)
			}
		},
		ExecutorId: eiPrototype.GetExecutorId(),
	})

	masterUri := s.mesosMaster
	info, cred, err := s.buildFrameworkInfo()
	if err != nil {
		log.Fatalf("Misconfigured mesos framework: %v", err)
	}

	schedulerProcess := ha.New(framework)
	dconfig := &bindings.DriverConfig{
		Scheduler:        schedulerProcess,
		Framework:        info,
		Master:           masterUri,
		Credential:       cred,
		BindingAddress:   s.address,
		BindingPort:      uint16(s.driverPort),
		HostnameOverride: s.hostnameOverride,
		WithAuthContext: func(ctx context.Context) context.Context {
			ctx = auth.WithLoginProvider(ctx, s.mesosAuthProvider)
			ctx = sasl.WithBindingAddress(ctx, s.address)
			return ctx
		},
	}

	// create event recorder sending events to the "" namespace of the apiserver
	broadcaster := record.NewBroadcaster()
	recorder := broadcaster.NewRecorder(api.EventSource{Component: "scheduler"})
	broadcaster.StartRecordingToSink(client.Events(""))

	// create scheduler core with all components arranged around it
	lw := cache.NewListWatchFromClient(client, "pods", api.NamespaceAll, fields.Everything())
	sched := components.New(
		sc,
		framework,
		fcfs,
		client,
		recorder,
		schedulerProcess.Terminal(),
		s.mux,
		lw,
		eiPrototype,
		s.mesosRoles,
		s.defaultContainerCPULimit,
		s.defaultContainerMemLimit,
	)

	runtime.On(framework.Registration(), func() { sched.Run(schedulerProcess.Terminal()) })
	runtime.On(framework.Registration(), s.newServiceWriter(schedulerProcess.Terminal()))
	runtime.On(framework.Registration(), func() { nodeCtl.Run(schedulerProcess.Terminal()) })

	driverFactory := ha.DriverFactory(func() (drv bindings.SchedulerDriver, err error) {
		log.V(1).Infoln("performing deferred initialization")
		if err = framework.Init(sched, schedulerProcess.Master(), s.mux); err != nil {
			return nil, fmt.Errorf("failed to initialize pod scheduler: %v", err)
		}
		log.V(1).Infoln("deferred init complete")
		// defer obtaining framework ID to prevent multiple schedulers
		// from overwriting each other's framework IDs
		dconfig.Framework.Id, err = s.fetchFrameworkID(etcdClient)
		if err != nil {
			return nil, fmt.Errorf("failed to fetch framework ID from etcd: %v", err)
		}
		log.V(1).Infoln("constructing mesos scheduler driver")
		drv, err = bindings.NewMesosSchedulerDriver(*dconfig)
		if err != nil {
			return nil, fmt.Errorf("failed to construct scheduler driver: %v", err)
		}
		log.V(1).Infoln("constructed mesos scheduler driver:", drv)
		s.setDriver(drv)
		return drv, nil
	})

	return schedulerProcess, driverFactory, etcdClient, eiPrototype.GetExecutorId()
}