// runs the main kubelet loop, closing the kubeletFinished chan when the loop exits. // never returns. func (ms *MinionServer) Run(hks hyperkube.Interface, _ []string) error { if ms.privateMountNS { // only the Linux version will do anything enterPrivateMountNamespace() } // create apiserver client clientConfig, err := ms.KubeletExecutorServer.CreateAPIServerClientConfig() if err != nil { // required for k8sm since we need to send api.Binding information // back to the apiserver log.Fatalf("No API client: %v", err) } ms.clientConfig = clientConfig // run subprocesses until ms.done is closed on return of this function defer close(ms.done) if ms.runProxy { go runtime.Until(ms.launchProxyServer, 5*time.Second, ms.done) } go runtime.Until(ms.launchExecutorServer, 5*time.Second, ms.done) // wait until minion exit is requested // don't close ms.exit here to avoid panics of go routines writing an error to it return <-ms.exit }
func (m *MockScheduler) Run(done <-chan struct{}) { _ = m.Called() runtime.Until(func() { time.Sleep(time.Second) }, time.Second, done) return }
// perform one-time initialization actions upon the first registration event received from Mesos. func (k *framework) onInitialRegistration(driver bindings.SchedulerDriver) { defer close(k.registration) if k.failoverTimeout > 0 { refreshInterval := k.schedulerConfig.FrameworkIdRefreshInterval.Duration if k.failoverTimeout < k.schedulerConfig.FrameworkIdRefreshInterval.Duration.Seconds() { refreshInterval = time.Duration(math.Max(1, k.failoverTimeout/2)) * time.Second } // wait until we've written the framework ID at least once before proceeding firstStore := make(chan struct{}) go runtime.Until(func() { // only close firstStore once select { case <-firstStore: default: defer close(firstStore) } err := k.storeFrameworkId(context.TODO(), k.frameworkId.GetValue()) if err != nil { log.Errorf("failed to store framework ID: %v", err) if err == frameworkid.ErrMismatch { // we detected a framework ID in storage that doesn't match what we're trying // to save. this is a dangerous state: // (1) perhaps we failed to initially recover the framework ID and so mesos // issued us a new one. now that we're trying to save it there's a mismatch. // (2) we've somehow bungled the framework ID and we're out of alignment with // what mesos is expecting. // (3) multiple schedulers were launched at the same time, and both have // registered with mesos (because when they each checked, there was no ID in // storage, so they asked for a new one). one of them has already written the // ID to storage -- we lose. log.Error("aborting due to framework ID mismatch") driver.Abort() } } }, refreshInterval, k.terminate) // wait for the first store attempt of the framework ID select { case <-firstStore: case <-k.terminate: } } r1 := k.makeTaskRegistryReconciler() r2 := k.makePodRegistryReconciler() k.tasksReconciler = taskreconciler.New(k.asRegisteredMaster, taskreconciler.MakeComposite(k.terminate, r1, r2), k.reconcileCooldown, k.schedulerConfig.ExplicitReconciliationAbortTimeout.Duration, k.terminate) go k.tasksReconciler.Run(driver, k.terminate) if k.reconcileInterval > 0 { ri := time.Duration(k.reconcileInterval) * time.Second time.AfterFunc(k.schedulerConfig.InitialImplicitReconciliationDelay.Duration, func() { runtime.Until(k.tasksReconciler.RequestImplicit, ri, k.terminate) }) log.Infof("will perform implicit task reconciliation at interval: %v after %v", ri, k.schedulerConfig.InitialImplicitReconciliationDelay.Duration) } k.installDebugHandlers(k.mux) }
// perform one-time initialization actions upon the first registration event received from Mesos. func (k *framework) onInitialRegistration(driver bindings.SchedulerDriver) { defer close(k.registration) if k.failoverTimeout > 0 { refreshInterval := k.schedulerConfig.FrameworkIdRefreshInterval.Duration if k.failoverTimeout < k.schedulerConfig.FrameworkIdRefreshInterval.Duration.Seconds() { refreshInterval = time.Duration(math.Max(1, k.failoverTimeout/2)) * time.Second } go runtime.Until(func() { k.storeFrameworkId(k.frameworkId.GetValue()) }, refreshInterval, k.terminate) } r1 := k.makeTaskRegistryReconciler() r2 := k.makePodRegistryReconciler() k.tasksReconciler = taskreconciler.New(k.asRegisteredMaster, taskreconciler.MakeComposite(k.terminate, r1, r2), k.reconcileCooldown, k.schedulerConfig.ExplicitReconciliationAbortTimeout.Duration, k.terminate) go k.tasksReconciler.Run(driver, k.terminate) if k.reconcileInterval > 0 { ri := time.Duration(k.reconcileInterval) * time.Second time.AfterFunc(k.schedulerConfig.InitialImplicitReconciliationDelay.Duration, func() { runtime.Until(k.tasksReconciler.RequestImplicit, ri, k.terminate) }) log.Infof("will perform implicit task reconciliation at interval: %v after %v", ri, k.schedulerConfig.InitialImplicitReconciliationDelay.Duration) } k.installDebugHandlers(k.mux) }
func (self *procImpl) begin() runtime.Signal { if !self.state.transition(stateNew, stateRunning) { panic(fmt.Errorf("failed to transition from New to Idle state")) } defer log.V(2).Infof("started process %d", self.pid) var entered runtime.Latch // execute actions on the backlog chan return runtime.After(func() { runtime.Until(func() { if entered.Acquire() { close(self.running) self.wg.Add(1) } for action := range self.backlog { select { case <-self.terminate: return default: // signal to indicate there's room in the backlog now self.changed.Broadcast() // rely on Until to handle action panics action() } } }, self.actionHandlerCrashDelay, self.terminate) }).Then(func() { log.V(2).Infof("finished processing action backlog for process %d", self.pid) if !entered.Acquire() { self.wg.Done() } }) }
// Notify runs Elect() on m, and calls Start()/Stop() on s when the // elected master starts/stops matching 'id'. Never returns. func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) { n := ¬ifier{id: Master(id), service: s, masters: make(chan Master, 1)} finished := runtime.After(func() { runtime.Until(func() { for { w := m.Elect(path, id) for { select { case <-abort: return case event, open := <-w.ResultChan(): if !open { break } if event.Type != watch.Modified { continue } electedMaster, ok := event.Object.(Master) if !ok { glog.Errorf("Unexpected object from election channel: %v", event.Object) break } sendElected: for { select { case <-abort: return case n.masters <- electedMaster: break sendElected default: // ring full, discard old value and add the new select { case <-abort: return case <-n.masters: default: // ring was cleared for us?! } } } } } } }, 0, abort) }) runtime.Until(func() { n.serviceLoop(finished) }, 0, abort) }
func (s *SchedulerServer) Run(hks hyperkube.Interface, _ []string) error { if n := len(s.frameworkRoles); n == 0 || n > 2 || (n == 2 && s.frameworkRoles[0] != "*" && s.frameworkRoles[1] != "*") { log.Fatalf(`only one custom role allowed in addition to "*"`) } fwSet := sets.NewString(s.frameworkRoles...) podSet := sets.NewString(s.defaultPodRoles...) if !fwSet.IsSuperset(podSet) { log.Fatalf("all default pod roles %q must be included in framework roles %q", s.defaultPodRoles, s.frameworkRoles) } // get scheduler low-level config sc := schedcfg.CreateDefaultConfig() if s.schedulerConfigFileName != "" { f, err := os.Open(s.schedulerConfigFileName) if err != nil { log.Fatalf("Cannot open scheduler config file: %v", err) } defer f.Close() err = sc.Read(bufio.NewReader(f)) if err != nil { log.Fatalf("Invalid scheduler config file: %v", err) } } schedulerProcess, driverFactory, etcdClient, eid := s.bootstrap(hks, sc) if s.enableProfiling { profile.InstallHandler(s.mux) } go runtime.Until(func() { log.V(1).Info("Starting HTTP interface") log.Error(http.ListenAndServe(net.JoinHostPort(s.address.String(), strconv.Itoa(s.port)), s.mux)) }, sc.HttpBindInterval.Duration, schedulerProcess.Terminal()) if s.ha { validation := ha.ValidationFunc(validateLeadershipTransition) srv := ha.NewCandidate(schedulerProcess, driverFactory, validation) path := meta.ElectionPath(s.frameworkName) uuid := eid.GetValue() + ":" + uuid.New() // unique for each scheduler instance log.Infof("registering for election at %v with id %v", path, uuid) go election.Notify( election.NewEtcdMasterElector(etcdClient), path, uuid, srv, nil) } else { log.Infoln("self-electing in non-HA mode") schedulerProcess.Elect(driverFactory) } return s.awaitFailover(schedulerProcess, func() error { return s.failover(s.getDriver(), hks) }) }
func (s *offerStorage) Init(done <-chan struct{}) { // zero delay, reap offers as soon as they expire go runtime.Until(s.ageOffers, 0, done) // cached offer ids for the purposes of listener notification idCache := &stringsCache{ refill: func() sets.String { result := sets.NewString() for _, v := range s.offers.List() { if offer, ok := v.(Perishable); ok { result.Insert(offer.Id()) } } return result }, ttl: offerIdCacheTTL, } go runtime.Until(func() { s.notifyListeners(idCache.Strings) }, notifyListenersDelay, done) }
// Notify runs Elect() on m, and calls Start()/Stop() on s when the // elected master starts/stops matching 'id'. Never returns. func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) { n := ¬ifier{id: Master(id), service: s} n.changed = make(chan struct{}) finished := runtime.After(func() { runtime.Until(func() { for { w := m.Elect(path, id) for { select { case <-abort: return case event, open := <-w.ResultChan(): if !open { break } if event.Type != watch.Modified { continue } electedMaster, ok := event.Object.(Master) if !ok { glog.Errorf("Unexpected object from election channel: %v", event.Object) break } n.lock.Lock() n.desired = electedMaster n.lock.Unlock() // notify serviceLoop, but don't block. If a change // is queued already it will see the new n.desired. select { case n.changed <- struct{}{}: } } } } }, 0, abort) }) runtime.Until(func() { n.serviceLoop(finished) }, 0, abort) }
// perform one-time initialization actions upon the first registration event received from Mesos. func (k *KubernetesScheduler) onInitialRegistration(driver bindings.SchedulerDriver) { defer close(k.registration) if k.failoverTimeout > 0 { refreshInterval := k.schedcfg.FrameworkIdRefreshInterval.Duration if k.failoverTimeout < k.schedcfg.FrameworkIdRefreshInterval.Duration.Seconds() { refreshInterval = time.Duration(math.Max(1, k.failoverTimeout/2)) * time.Second } go runtime.Until(k.storeFrameworkId, refreshInterval, k.terminate) } r1 := k.makeTaskRegistryReconciler() r2 := k.makePodRegistryReconciler() k.reconciler = newReconciler(k.asRegisteredMaster, k.makeCompositeReconciler(r1, r2), k.reconcileCooldown, k.schedcfg.ExplicitReconciliationAbortTimeout.Duration, k.terminate) go k.reconciler.Run(driver) if k.reconcileInterval > 0 { ri := time.Duration(k.reconcileInterval) * time.Second time.AfterFunc(k.schedcfg.InitialImplicitReconciliationDelay.Duration, func() { runtime.Until(k.reconciler.RequestImplicit, ri, k.terminate) }) log.Infof("will perform implicit task reconciliation at interval: %v after %v", ri, k.schedcfg.InitialImplicitReconciliationDelay.Duration) } }
// currently monitors for "pod deleted" events, upon which handle() // is invoked. func (k *deleter) Run(updates <-chan queue.Entry, done <-chan struct{}) { go runtime.Until(func() { for { entry := <-updates pod := entry.Value().(*Pod) if entry.Is(queue.DELETE_EVENT) { if err := k.deleteOne(pod); err != nil { log.Error(err) } } else if !entry.Is(queue.POP_EVENT) { k.qr.updatesAvailable() } } }, 1*time.Second, done) }
// spawns a go-routine to watch for unscheduled pods and queue them up // for scheduling. returns immediately. func (q *queuer) Run(done <-chan struct{}) { go runtime.Until(func() { log.Info("Watching for newly created pods") q.lock.Lock() defer q.lock.Unlock() for { // limit blocking here for short intervals so that scheduling // may proceed even if there have been no recent pod changes p := q.podUpdates.Await(enqueuePopTimeout) if p == nil { signalled := runtime.After(q.deltaCond.Wait) // we've yielded the lock select { case <-time.After(enqueueWaitTimeout): q.deltaCond.Broadcast() // abort Wait() <-signalled // wait for lock re-acquisition log.V(4).Infoln("timed out waiting for a pod update") case <-signalled: // we've acquired the lock and there may be // changes for us to process now } continue } pod := p.(*Pod) if recoverAssignedSlave(pod.Pod) != "" { log.V(3).Infof("dequeuing assigned pod for scheduling: %v", pod.Pod.Name) q.dequeue(pod.GetUID()) } else if pod.InGracefulTermination() { // pods which are pre-scheduled (i.e. NodeName is set) may be gracefully deleted, // even though they are not running yet. log.V(3).Infof("dequeuing graceful deleted pre-scheduled pod for scheduling: %v", pod.Pod.Name) q.dequeue(pod.GetUID()) } else { // use ReplaceExisting because we are always pushing the latest state now := time.Now() pod.deadline = &now if q.podQueue.Offer(pod, queue.ReplaceExisting) { q.unscheduledCond.Broadcast() log.V(3).Infof("queued pod for scheduling: %v", pod.Pod.Name) } else { log.Warningf("failed to queue pod for scheduling: %v", pod.Pod.Name) } } } }, 1*time.Second, done) }
func (r *clientRegistrator) Run(terminate <-chan struct{}) error { loop := func() { RegistrationLoop: for { obj := r.queue.Pop(terminate) log.V(3).Infof("registration event observed") if obj == nil { break RegistrationLoop } select { case <-terminate: break RegistrationLoop default: } rg := obj.(*registration) n, needsUpdate := r.updateNecessary(rg.hostName, rg.labels) if !needsUpdate { log.V(2).Infof("no update needed, skipping for %s: %v", rg.hostName, rg.labels) continue } if n == nil { log.V(2).Infof("creating node %s with labels %v", rg.hostName, rg.labels) _, err := CreateOrUpdate(r.client, rg.hostName, rg.labels, nil) if err != nil { log.Errorf("error creating the node %s: %v", rg.hostName, rg.labels) } } else { log.V(2).Infof("updating node %s with labels %v", rg.hostName, rg.labels) _, err := Update(r.client, rg.hostName, rg.labels, nil) if err != nil && errors.IsNotFound(err) { // last chance when our store was out of date _, err = Create(r.client, rg.hostName, rg.labels, nil) } if err != nil { log.Errorf("error updating the node %s: %v", rg.hostName, rg.labels) } } } } go runtime.Until(loop, time.Second, terminate) return nil }
func (u *StatusUpdater) Run(terminate <-chan struct{}) error { nodeStore := cache.NewStore(cache.MetaNamespaceKeyFunc) nodeLW := cache.NewListWatchFromClient(u.client, "nodes", api.NamespaceAll, fields.Everything()) cache.NewReflector(nodeLW, &api.Node{}, nodeStore, u.relistPeriod).Run() monitor := func() { // build up a slave set of nodes without kubelet slavesWithoutKubeletList, err := mesos.CloudProvider.ListWithoutKubelet() if err != nil { log.Errorf("Error while updating slave nodes: %v", err) return } slavesWithoutKubelet := make(map[string]struct{}, len(slavesWithoutKubeletList)) for _, s := range slavesWithoutKubeletList { slavesWithoutKubelet[s] = struct{}{} } // update status for nodes which do not have a kubelet running and // which are still existing as slave. This status update must be done // before the node controller counts down the NodeMonitorGracePeriod obj, err := nodeLW.List() if err != nil { log.Errorf("Error listing the nodes for status updates: %v", err) } nl, _ := obj.(*api.NodeList) nodes := nl.Items for i := range nodes { if _, ok := slavesWithoutKubelet[nodes[i].Spec.ExternalID]; !ok { // let the kubelet do its job updating the status, or the // node controller will remove this node if the node does not even // exist anymore continue } err := u.updateStatus(&nodes[i]) if err != nil { log.Errorf("Error updating node status: %v", err) } } } go runtime.Until(monitor, u.heartBeatPeriod, terminate) return nil }
func (u *StatusUpdater) Run(terminate <-chan struct{}) error { nodeStore := cache.NewStore(cache.MetaNamespaceKeyFunc) nodeLW := cache.NewListWatchFromClient(u.client.CoreClient, "nodes", api.NamespaceAll, fields.Everything()) cache.NewReflector(nodeLW, &api.Node{}, nodeStore, u.relistPeriod).Run() monitor := func() { // build up a set of listed slave nodes without a kubelet slaves, err := mesos.CloudProvider.ListWithoutKubelet() if err != nil { log.Errorf("Error listing slaves without kubelet: %v", err) return } slavesWithoutKubelet := sets.NewString(slaves...) // update status for nodes which do not have a kubelet running and // which are still existing as slave. This status update must be done // before the node controller counts down the NodeMonitorGracePeriod nodes := nodeStore.List() for _, n := range nodes { node := n.(*api.Node) if !slavesWithoutKubelet.Has(node.Spec.ExternalID) { // let the kubelet do its job updating the status, or the // node controller will remove this node if the node does not even // exist anymore continue } err := u.updateStatus(node) if err != nil { log.Errorf("Error updating node status: %v", err) } } } go runtime.Until(monitor, u.heartBeatPeriod, terminate) return nil }
func (s *SchedulerServer) Run(hks hyperkube.Interface, _ []string) error { // get scheduler low-level config sc := schedcfg.CreateDefaultConfig() if s.SchedulerConfigFileName != "" { f, err := os.Open(s.SchedulerConfigFileName) if err != nil { log.Fatalf("Cannot open scheduler config file: %v", err) } err = sc.Read(bufio.NewReader(f)) if err != nil { log.Fatalf("Invalid scheduler config file: %v", err) } } schedulerProcess, driverFactory, etcdClient, eid := s.bootstrap(hks, sc) if s.EnableProfiling { profile.InstallHandler(s.mux) } go runtime.Until(func() { log.V(1).Info("Starting HTTP interface") log.Error(http.ListenAndServe(net.JoinHostPort(s.Address.String(), strconv.Itoa(s.Port)), s.mux)) }, sc.HttpBindInterval.Duration, schedulerProcess.Terminal()) if s.HA { validation := ha.ValidationFunc(validateLeadershipTransition) srv := ha.NewCandidate(schedulerProcess, driverFactory, validation) path := fmt.Sprintf(meta.DefaultElectionFormat, s.FrameworkName) sid := uid.New(eid.Group(), "").String() log.Infof("registering for election at %v with id %v", path, sid) go election.Notify(election.NewEtcdMasterElector(etcdClient), path, sid, srv, nil) } else { log.Infoln("self-electing in non-HA mode") schedulerProcess.Elect(driverFactory) } return s.awaitFailover(schedulerProcess, func() error { return s.failover(s.getDriver(), hks) }) }
func (s *schedulingPlugin) Run(done <-chan struct{}) { defer close(s.starting) go runtime.Until(s.scheduleOne, pluginRecoveryDelay, done) }
func (s *controller) Run(done <-chan struct{}) { defer close(s.started) go runtime.Until(s.scheduleOne, recoveryDelay, done) }