Exemplo n.º 1
0
// reconciler action factory, performs explicit task reconciliation for non-terminal
// tasks identified by annotations in the Kubernetes pod registry.
func (k *KubernetesScheduler) makePodRegistryReconciler() ReconcilerAction {
	return ReconcilerAction(func(drv bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error {
		ctx := api.NewDefaultContext()
		podList, err := k.client.Pods(api.NamespaceValue(ctx)).List(labels.Everything(), fields.Everything())
		if err != nil {
			return proc.ErrorChanf("failed to reconcile pod registry: %v", err)
		}
		taskToSlave := make(map[string]string)
		for _, pod := range podList.Items {
			if len(pod.Annotations) == 0 {
				continue
			}
			taskId, found := pod.Annotations[meta.TaskIdKey]
			if !found {
				continue
			}
			slaveId, found := pod.Annotations[meta.SlaveIdKey]
			if !found {
				continue
			}
			taskToSlave[taskId] = slaveId
		}
		return proc.ErrorChan(k.explicitlyReconcileTasks(drv, taskToSlave, cancel))
	})
}
Exemplo n.º 2
0
// New creates a new KubernetesScheduler
func New(config Config) *KubernetesScheduler {
	var k *KubernetesScheduler
	k = &KubernetesScheduler{
		schedcfg:          &config.Schedcfg,
		RWMutex:           new(sync.RWMutex),
		executor:          config.Executor,
		executorGroup:     uid.Parse(config.Executor.ExecutorId.GetValue()).Group(),
		PodScheduler:      config.Scheduler,
		client:            config.Client,
		etcdClient:        config.EtcdClient,
		failoverTimeout:   config.FailoverTimeout,
		reconcileInterval: config.ReconcileInterval,
		nodeRegistrator:   node.NewRegistrator(config.Client, config.LookupNode),
		offers: offers.CreateRegistry(offers.RegistryConfig{
			Compat: func(o *mesos.Offer) bool {
				// the node must be registered and have up-to-date labels
				n := config.LookupNode(o.GetHostname())
				if n == nil || !node.IsUpToDate(n, node.SlaveAttributesToLabels(o.GetAttributes())) {
					return false
				}

				// the executor IDs must not identify a kubelet-executor with a group that doesn't match ours
				for _, eid := range o.GetExecutorIds() {
					execuid := uid.Parse(eid.GetValue())
					if execuid.Name() == execcfg.DefaultInfoID && execuid.Group() != k.executorGroup {
						return false
					}
				}

				return true
			},
			DeclineOffer: func(id string) <-chan error {
				errOnce := proc.NewErrorOnce(k.terminate)
				errOuter := k.asRegisteredMaster.Do(func() {
					var err error
					defer errOnce.Report(err)
					offerId := mutil.NewOfferID(id)
					filters := &mesos.Filters{}
					_, err = k.driver.DeclineOffer(offerId, filters)
				})
				return errOnce.Send(errOuter).Err()
			},
			// remember expired offers so that we can tell if a previously scheduler offer relies on one
			LingerTTL:     config.Schedcfg.OfferLingerTTL.Duration,
			TTL:           config.Schedcfg.OfferTTL.Duration,
			ListenerDelay: config.Schedcfg.ListenerDelay.Duration,
		}),
		slaveHostNames:    slave.NewRegistry(),
		taskRegistry:      podtask.NewInMemoryRegistry(),
		reconcileCooldown: config.ReconcileCooldown,
		registration:      make(chan struct{}),
		asRegisteredMaster: proc.DoerFunc(func(proc.Action) <-chan error {
			return proc.ErrorChanf("cannot execute action with unregistered scheduler")
		}),
	}
	return k
}
Exemplo n.º 3
0
func (k *KubernetesScheduler) Init(electedMaster proc.Process, pl PluginInterface, mux *http.ServeMux) error {
	log.V(1).Infoln("initializing kubernetes mesos scheduler")

	k.asRegisteredMaster = proc.DoerFunc(func(a proc.Action) <-chan error {
		if !k.registered {
			return proc.ErrorChanf("failed to execute action, scheduler is disconnected")
		}
		return electedMaster.Do(a)
	})
	k.terminate = electedMaster.Done()
	k.plugin = pl
	k.offers.Init(k.terminate)
	k.InstallDebugHandlers(mux)
	return k.recoverTasks()
}
Exemplo n.º 4
0
func (k *framework) Init(sched scheduler.Scheduler, electedMaster proc.Process, mux *http.ServeMux) error {
	log.V(1).Infoln("initializing kubernetes mesos scheduler")

	k.sched = sched
	k.mux = mux
	k.asRegisteredMaster = proc.DoerFunc(func(a proc.Action) <-chan error {
		if !k.registered {
			return proc.ErrorChanf("failed to execute action, scheduler is disconnected")
		}
		return electedMaster.Do(a)
	})
	k.terminate = electedMaster.Done()
	k.offers.Init(k.terminate)
	k.nodeRegistrator.Run(k.terminate)
	return k.recoverTasks()
}
Exemplo n.º 5
0
// reconciler action factory, performs explicit task reconciliation for non-terminal
// tasks identified by annotations in the Kubernetes pod registry.
func (k *framework) makePodRegistryReconciler() taskreconciler.Action {
	return taskreconciler.Action(func(drv bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error {
		podList, err := k.client.Core().Pods(api.NamespaceAll).List(api.ListOptions{})
		if err != nil {
			return proc.ErrorChanf("failed to reconcile pod registry: %v", err)
		}
		taskToSlave := make(map[string]string)
		for _, pod := range podList.Items {
			if len(pod.Annotations) == 0 {
				continue
			}
			taskId, found := pod.Annotations[meta.TaskIdKey]
			if !found {
				continue
			}
			slaveId, found := pod.Annotations[meta.SlaveIdKey]
			if !found {
				continue
			}
			taskToSlave[taskId] = slaveId
		}
		return proc.ErrorChan(k.explicitlyReconcileTasks(drv, taskToSlave, cancel))
	})
}
Exemplo n.º 6
0
// New creates a new Framework
func New(config Config) Framework {
	var k *framework
	k = &framework{
		schedulerConfig:   &config.SchedulerConfig,
		RWMutex:           new(sync.RWMutex),
		client:            config.Client,
		failoverTimeout:   config.FailoverTimeout,
		reconcileInterval: config.ReconcileInterval,
		nodeRegistrator:   node.NewRegistrator(config.Client, config.LookupNode),
		executorId:        config.ExecutorId,
		offers: offers.CreateRegistry(offers.RegistryConfig{
			Compat: func(o *mesos.Offer) bool {
				// the node must be registered and have up-to-date labels
				n := config.LookupNode(o.GetHostname())
				if n == nil || !node.IsUpToDate(n, node.SlaveAttributesToLabels(o.GetAttributes())) {
					return false
				}

				eids := len(o.GetExecutorIds())
				switch {
				case eids > 1:
					// at most one executor id expected. More than one means that
					// the given node is seriously in trouble.
					return false

				case eids == 1:
					// the executor id must match, otherwise the running executor
					// is incompatible with the current scheduler configuration.
					if eid := o.GetExecutorIds()[0]; eid.GetValue() != config.ExecutorId.GetValue() {
						return false
					}
				}

				return true
			},
			DeclineOffer: func(id string) <-chan error {
				errOnce := proc.NewErrorOnce(k.terminate)
				errOuter := k.asRegisteredMaster.Do(func() {
					var err error
					defer errOnce.Report(err)
					offerId := mutil.NewOfferID(id)
					filters := &mesos.Filters{}
					_, err = k.driver.DeclineOffer(offerId, filters)
				})
				return errOnce.Send(errOuter).Err()
			},
			// remember expired offers so that we can tell if a previously scheduler offer relies on one
			LingerTTL:     config.SchedulerConfig.OfferLingerTTL.Duration,
			TTL:           config.SchedulerConfig.OfferTTL.Duration,
			ListenerDelay: config.SchedulerConfig.ListenerDelay.Duration,
		}),
		slaveHostNames:    newSlaveRegistry(),
		reconcileCooldown: config.ReconcileCooldown,
		registration:      make(chan struct{}),
		asRegisteredMaster: proc.DoerFunc(func(proc.Action) <-chan error {
			return proc.ErrorChanf("cannot execute action with unregistered scheduler")
		}),
		storeFrameworkId: config.StoreFrameworkId,
		lookupNode:       config.LookupNode,
	}
	return k
}