Example #1
0
// runs the main kubelet loop, closing the kubeletFinished chan when the loop exits.
// never returns.
func (ms *MinionServer) Run(hks hyperkube.Interface, _ []string) error {
	if ms.privateMountNS {
		// only the Linux version will do anything
		enterPrivateMountNamespace()
	}

	// create apiserver client
	clientConfig, err := ms.KubeletExecutorServer.CreateAPIServerClientConfig()
	if err != nil {
		// required for k8sm since we need to send api.Binding information
		// back to the apiserver
		log.Fatalf("No API client: %v", err)
	}
	ms.clientConfig = clientConfig

	// run subprocesses until ms.done is closed on return of this function
	defer close(ms.done)
	if ms.runProxy {
		go runtime.Until(ms.launchProxyServer, 5*time.Second, ms.done)
	}
	go runtime.Until(ms.launchExecutorServer, 5*time.Second, ms.done)

	// wait until minion exit is requested
	// don't close ms.exit here to avoid panics of go routines writing an error to it
	return <-ms.exit
}
Example #2
0
func (m *MockScheduler) Run(done <-chan struct{}) {
	_ = m.Called()
	runtime.Until(func() {
		time.Sleep(time.Second)
	}, time.Second, done)
	return
}
Example #3
0
// perform one-time initialization actions upon the first registration event received from Mesos.
func (k *framework) onInitialRegistration(driver bindings.SchedulerDriver) {
	defer close(k.registration)

	if k.failoverTimeout > 0 {
		refreshInterval := k.schedulerConfig.FrameworkIdRefreshInterval.Duration
		if k.failoverTimeout < k.schedulerConfig.FrameworkIdRefreshInterval.Duration.Seconds() {
			refreshInterval = time.Duration(math.Max(1, k.failoverTimeout/2)) * time.Second
		}

		// wait until we've written the framework ID at least once before proceeding
		firstStore := make(chan struct{})
		go runtime.Until(func() {
			// only close firstStore once
			select {
			case <-firstStore:
			default:
				defer close(firstStore)
			}
			err := k.storeFrameworkId(context.TODO(), k.frameworkId.GetValue())
			if err != nil {
				log.Errorf("failed to store framework ID: %v", err)
				if err == frameworkid.ErrMismatch {
					// we detected a framework ID in storage that doesn't match what we're trying
					// to save. this is a dangerous state:
					// (1) perhaps we failed to initially recover the framework ID and so mesos
					// issued us a new one. now that we're trying to save it there's a mismatch.
					// (2) we've somehow bungled the framework ID and we're out of alignment with
					// what mesos is expecting.
					// (3) multiple schedulers were launched at the same time, and both have
					// registered with mesos (because when they each checked, there was no ID in
					// storage, so they asked for a new one). one of them has already written the
					// ID to storage -- we lose.
					log.Error("aborting due to framework ID mismatch")
					driver.Abort()
				}
			}
		}, refreshInterval, k.terminate)

		// wait for the first store attempt of the framework ID
		select {
		case <-firstStore:
		case <-k.terminate:
		}
	}

	r1 := k.makeTaskRegistryReconciler()
	r2 := k.makePodRegistryReconciler()

	k.tasksReconciler = taskreconciler.New(k.asRegisteredMaster, taskreconciler.MakeComposite(k.terminate, r1, r2),
		k.reconcileCooldown, k.schedulerConfig.ExplicitReconciliationAbortTimeout.Duration, k.terminate)
	go k.tasksReconciler.Run(driver, k.terminate)

	if k.reconcileInterval > 0 {
		ri := time.Duration(k.reconcileInterval) * time.Second
		time.AfterFunc(k.schedulerConfig.InitialImplicitReconciliationDelay.Duration, func() { runtime.Until(k.tasksReconciler.RequestImplicit, ri, k.terminate) })
		log.Infof("will perform implicit task reconciliation at interval: %v after %v", ri, k.schedulerConfig.InitialImplicitReconciliationDelay.Duration)
	}

	k.installDebugHandlers(k.mux)
}
Example #4
0
// perform one-time initialization actions upon the first registration event received from Mesos.
func (k *framework) onInitialRegistration(driver bindings.SchedulerDriver) {
	defer close(k.registration)

	if k.failoverTimeout > 0 {
		refreshInterval := k.schedulerConfig.FrameworkIdRefreshInterval.Duration
		if k.failoverTimeout < k.schedulerConfig.FrameworkIdRefreshInterval.Duration.Seconds() {
			refreshInterval = time.Duration(math.Max(1, k.failoverTimeout/2)) * time.Second
		}
		go runtime.Until(func() {
			k.storeFrameworkId(k.frameworkId.GetValue())
		}, refreshInterval, k.terminate)
	}

	r1 := k.makeTaskRegistryReconciler()
	r2 := k.makePodRegistryReconciler()

	k.tasksReconciler = taskreconciler.New(k.asRegisteredMaster, taskreconciler.MakeComposite(k.terminate, r1, r2),
		k.reconcileCooldown, k.schedulerConfig.ExplicitReconciliationAbortTimeout.Duration, k.terminate)
	go k.tasksReconciler.Run(driver, k.terminate)

	if k.reconcileInterval > 0 {
		ri := time.Duration(k.reconcileInterval) * time.Second
		time.AfterFunc(k.schedulerConfig.InitialImplicitReconciliationDelay.Duration, func() { runtime.Until(k.tasksReconciler.RequestImplicit, ri, k.terminate) })
		log.Infof("will perform implicit task reconciliation at interval: %v after %v", ri, k.schedulerConfig.InitialImplicitReconciliationDelay.Duration)
	}

	k.installDebugHandlers(k.mux)
}
Example #5
0
func (self *procImpl) begin() runtime.Signal {
	if !self.state.transition(stateNew, stateRunning) {
		panic(fmt.Errorf("failed to transition from New to Idle state"))
	}
	defer log.V(2).Infof("started process %d", self.pid)
	var entered runtime.Latch

	// execute actions on the backlog chan
	return runtime.After(func() {
		runtime.Until(func() {
			if entered.Acquire() {
				close(self.running)
				self.wg.Add(1)
			}
			for action := range self.backlog {
				select {
				case <-self.terminate:
					return
				default:
					// signal to indicate there's room in the backlog now
					self.changed.Broadcast()
					// rely on Until to handle action panics
					action()
				}
			}
		}, self.actionHandlerCrashDelay, self.terminate)
	}).Then(func() {
		log.V(2).Infof("finished processing action backlog for process %d", self.pid)
		if !entered.Acquire() {
			self.wg.Done()
		}
	})
}
Example #6
0
// Notify runs Elect() on m, and calls Start()/Stop() on s when the
// elected master starts/stops matching 'id'. Never returns.
func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) {
	n := &notifier{id: Master(id), service: s, masters: make(chan Master, 1)}
	finished := runtime.After(func() {
		runtime.Until(func() {
			for {
				w := m.Elect(path, id)
				for {
					select {
					case <-abort:
						return
					case event, open := <-w.ResultChan():
						if !open {
							break
						}
						if event.Type != watch.Modified {
							continue
						}
						electedMaster, ok := event.Object.(Master)
						if !ok {
							glog.Errorf("Unexpected object from election channel: %v", event.Object)
							break
						}

					sendElected:
						for {
							select {
							case <-abort:
								return
							case n.masters <- electedMaster:
								break sendElected
							default: // ring full, discard old value and add the new
								select {
								case <-abort:
									return
								case <-n.masters:
								default: // ring was cleared for us?!
								}
							}
						}
					}
				}
			}
		}, 0, abort)
	})
	runtime.Until(func() { n.serviceLoop(finished) }, 0, abort)
}
Example #7
0
func (s *SchedulerServer) Run(hks hyperkube.Interface, _ []string) error {
	if n := len(s.frameworkRoles); n == 0 || n > 2 || (n == 2 && s.frameworkRoles[0] != "*" && s.frameworkRoles[1] != "*") {
		log.Fatalf(`only one custom role allowed in addition to "*"`)
	}

	fwSet := sets.NewString(s.frameworkRoles...)
	podSet := sets.NewString(s.defaultPodRoles...)
	if !fwSet.IsSuperset(podSet) {
		log.Fatalf("all default pod roles %q must be included in framework roles %q", s.defaultPodRoles, s.frameworkRoles)
	}

	// get scheduler low-level config
	sc := schedcfg.CreateDefaultConfig()
	if s.schedulerConfigFileName != "" {
		f, err := os.Open(s.schedulerConfigFileName)
		if err != nil {
			log.Fatalf("Cannot open scheduler config file: %v", err)
		}
		defer f.Close()

		err = sc.Read(bufio.NewReader(f))
		if err != nil {
			log.Fatalf("Invalid scheduler config file: %v", err)
		}
	}

	schedulerProcess, driverFactory, etcdClient, eid := s.bootstrap(hks, sc)

	if s.enableProfiling {
		profile.InstallHandler(s.mux)
	}
	go runtime.Until(func() {
		log.V(1).Info("Starting HTTP interface")
		log.Error(http.ListenAndServe(net.JoinHostPort(s.address.String(), strconv.Itoa(s.port)), s.mux))
	}, sc.HttpBindInterval.Duration, schedulerProcess.Terminal())

	if s.ha {
		validation := ha.ValidationFunc(validateLeadershipTransition)
		srv := ha.NewCandidate(schedulerProcess, driverFactory, validation)
		path := meta.ElectionPath(s.frameworkName)
		uuid := eid.GetValue() + ":" + uuid.New() // unique for each scheduler instance
		log.Infof("registering for election at %v with id %v", path, uuid)
		go election.Notify(
			election.NewEtcdMasterElector(etcdClient),
			path,
			uuid,
			srv,
			nil)
	} else {
		log.Infoln("self-electing in non-HA mode")
		schedulerProcess.Elect(driverFactory)
	}
	return s.awaitFailover(schedulerProcess, func() error { return s.failover(s.getDriver(), hks) })
}
Example #8
0
func (s *offerStorage) Init(done <-chan struct{}) {
	// zero delay, reap offers as soon as they expire
	go runtime.Until(s.ageOffers, 0, done)

	// cached offer ids for the purposes of listener notification
	idCache := &stringsCache{
		refill: func() sets.String {
			result := sets.NewString()
			for _, v := range s.offers.List() {
				if offer, ok := v.(Perishable); ok {
					result.Insert(offer.Id())
				}
			}
			return result
		},
		ttl: offerIdCacheTTL,
	}

	go runtime.Until(func() { s.notifyListeners(idCache.Strings) }, notifyListenersDelay, done)
}
Example #9
0
// Notify runs Elect() on m, and calls Start()/Stop() on s when the
// elected master starts/stops matching 'id'. Never returns.
func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) {
	n := &notifier{id: Master(id), service: s}
	n.changed = make(chan struct{})
	finished := runtime.After(func() {
		runtime.Until(func() {
			for {
				w := m.Elect(path, id)
				for {
					select {
					case <-abort:
						return
					case event, open := <-w.ResultChan():
						if !open {
							break
						}
						if event.Type != watch.Modified {
							continue
						}
						electedMaster, ok := event.Object.(Master)
						if !ok {
							glog.Errorf("Unexpected object from election channel: %v", event.Object)
							break
						}

						n.lock.Lock()
						n.desired = electedMaster
						n.lock.Unlock()

						// notify serviceLoop, but don't block. If a change
						// is queued already it will see the new n.desired.
						select {
						case n.changed <- struct{}{}:
						}
					}
				}
			}
		}, 0, abort)
	})
	runtime.Until(func() { n.serviceLoop(finished) }, 0, abort)
}
Example #10
0
// perform one-time initialization actions upon the first registration event received from Mesos.
func (k *KubernetesScheduler) onInitialRegistration(driver bindings.SchedulerDriver) {
	defer close(k.registration)

	if k.failoverTimeout > 0 {
		refreshInterval := k.schedcfg.FrameworkIdRefreshInterval.Duration
		if k.failoverTimeout < k.schedcfg.FrameworkIdRefreshInterval.Duration.Seconds() {
			refreshInterval = time.Duration(math.Max(1, k.failoverTimeout/2)) * time.Second
		}
		go runtime.Until(k.storeFrameworkId, refreshInterval, k.terminate)
	}

	r1 := k.makeTaskRegistryReconciler()
	r2 := k.makePodRegistryReconciler()

	k.reconciler = newReconciler(k.asRegisteredMaster, k.makeCompositeReconciler(r1, r2),
		k.reconcileCooldown, k.schedcfg.ExplicitReconciliationAbortTimeout.Duration, k.terminate)
	go k.reconciler.Run(driver)

	if k.reconcileInterval > 0 {
		ri := time.Duration(k.reconcileInterval) * time.Second
		time.AfterFunc(k.schedcfg.InitialImplicitReconciliationDelay.Duration, func() { runtime.Until(k.reconciler.RequestImplicit, ri, k.terminate) })
		log.Infof("will perform implicit task reconciliation at interval: %v after %v", ri, k.schedcfg.InitialImplicitReconciliationDelay.Duration)
	}
}
Example #11
0
// currently monitors for "pod deleted" events, upon which handle()
// is invoked.
func (k *deleter) Run(updates <-chan queue.Entry, done <-chan struct{}) {
	go runtime.Until(func() {
		for {
			entry := <-updates
			pod := entry.Value().(*Pod)
			if entry.Is(queue.DELETE_EVENT) {
				if err := k.deleteOne(pod); err != nil {
					log.Error(err)
				}
			} else if !entry.Is(queue.POP_EVENT) {
				k.qr.updatesAvailable()
			}
		}
	}, 1*time.Second, done)
}
Example #12
0
// spawns a go-routine to watch for unscheduled pods and queue them up
// for scheduling. returns immediately.
func (q *queuer) Run(done <-chan struct{}) {
	go runtime.Until(func() {
		log.Info("Watching for newly created pods")
		q.lock.Lock()
		defer q.lock.Unlock()

		for {
			// limit blocking here for short intervals so that scheduling
			// may proceed even if there have been no recent pod changes
			p := q.podUpdates.Await(enqueuePopTimeout)
			if p == nil {
				signalled := runtime.After(q.deltaCond.Wait)
				// we've yielded the lock
				select {
				case <-time.After(enqueueWaitTimeout):
					q.deltaCond.Broadcast() // abort Wait()
					<-signalled             // wait for lock re-acquisition
					log.V(4).Infoln("timed out waiting for a pod update")
				case <-signalled:
					// we've acquired the lock and there may be
					// changes for us to process now
				}
				continue
			}

			pod := p.(*Pod)
			if recoverAssignedSlave(pod.Pod) != "" {
				log.V(3).Infof("dequeuing assigned pod for scheduling: %v", pod.Pod.Name)
				q.dequeue(pod.GetUID())
			} else if pod.InGracefulTermination() {
				// pods which are pre-scheduled (i.e. NodeName is set) may be gracefully deleted,
				// even though they are not running yet.
				log.V(3).Infof("dequeuing graceful deleted pre-scheduled pod for scheduling: %v", pod.Pod.Name)
				q.dequeue(pod.GetUID())
			} else {
				// use ReplaceExisting because we are always pushing the latest state
				now := time.Now()
				pod.deadline = &now
				if q.podQueue.Offer(pod, queue.ReplaceExisting) {
					q.unscheduledCond.Broadcast()
					log.V(3).Infof("queued pod for scheduling: %v", pod.Pod.Name)
				} else {
					log.Warningf("failed to queue pod for scheduling: %v", pod.Pod.Name)
				}
			}
		}
	}, 1*time.Second, done)
}
Example #13
0
func (r *clientRegistrator) Run(terminate <-chan struct{}) error {
	loop := func() {
	RegistrationLoop:
		for {
			obj := r.queue.Pop(terminate)
			log.V(3).Infof("registration event observed")
			if obj == nil {
				break RegistrationLoop
			}
			select {
			case <-terminate:
				break RegistrationLoop
			default:
			}

			rg := obj.(*registration)
			n, needsUpdate := r.updateNecessary(rg.hostName, rg.labels)
			if !needsUpdate {
				log.V(2).Infof("no update needed, skipping for %s: %v", rg.hostName, rg.labels)
				continue
			}

			if n == nil {
				log.V(2).Infof("creating node %s with labels %v", rg.hostName, rg.labels)
				_, err := CreateOrUpdate(r.client, rg.hostName, rg.labels, nil)
				if err != nil {
					log.Errorf("error creating the node %s: %v", rg.hostName, rg.labels)
				}
			} else {
				log.V(2).Infof("updating node %s with labels %v", rg.hostName, rg.labels)
				_, err := Update(r.client, rg.hostName, rg.labels, nil)
				if err != nil && errors.IsNotFound(err) {
					// last chance when our store was out of date
					_, err = Create(r.client, rg.hostName, rg.labels, nil)
				}
				if err != nil {
					log.Errorf("error updating the node %s: %v", rg.hostName, rg.labels)
				}
			}
		}
	}
	go runtime.Until(loop, time.Second, terminate)

	return nil
}
func (u *StatusUpdater) Run(terminate <-chan struct{}) error {
	nodeStore := cache.NewStore(cache.MetaNamespaceKeyFunc)
	nodeLW := cache.NewListWatchFromClient(u.client, "nodes", api.NamespaceAll, fields.Everything())
	cache.NewReflector(nodeLW, &api.Node{}, nodeStore, u.relistPeriod).Run()

	monitor := func() {
		// build up a slave set of nodes without kubelet
		slavesWithoutKubeletList, err := mesos.CloudProvider.ListWithoutKubelet()
		if err != nil {
			log.Errorf("Error while updating slave nodes: %v", err)
			return
		}
		slavesWithoutKubelet := make(map[string]struct{}, len(slavesWithoutKubeletList))
		for _, s := range slavesWithoutKubeletList {
			slavesWithoutKubelet[s] = struct{}{}
		}

		// update status for nodes which do not have a kubelet running and
		// which are still existing as slave. This status update must be done
		// before the node controller counts down the NodeMonitorGracePeriod
		obj, err := nodeLW.List()
		if err != nil {
			log.Errorf("Error listing the nodes for status updates: %v", err)
		}
		nl, _ := obj.(*api.NodeList)
		nodes := nl.Items

		for i := range nodes {
			if _, ok := slavesWithoutKubelet[nodes[i].Spec.ExternalID]; !ok {
				// let the kubelet do its job updating the status, or the
				// node controller will remove this node if the node does not even
				// exist anymore
				continue
			}

			err := u.updateStatus(&nodes[i])
			if err != nil {
				log.Errorf("Error updating node status: %v", err)
			}
		}
	}
	go runtime.Until(monitor, u.heartBeatPeriod, terminate)

	return nil
}
Example #15
0
func (u *StatusUpdater) Run(terminate <-chan struct{}) error {
	nodeStore := cache.NewStore(cache.MetaNamespaceKeyFunc)
	nodeLW := cache.NewListWatchFromClient(u.client.CoreClient, "nodes", api.NamespaceAll, fields.Everything())
	cache.NewReflector(nodeLW, &api.Node{}, nodeStore, u.relistPeriod).Run()

	monitor := func() {
		// build up a set of listed slave nodes without a kubelet
		slaves, err := mesos.CloudProvider.ListWithoutKubelet()
		if err != nil {
			log.Errorf("Error listing slaves without kubelet: %v", err)
			return
		}
		slavesWithoutKubelet := sets.NewString(slaves...)

		// update status for nodes which do not have a kubelet running and
		// which are still existing as slave. This status update must be done
		// before the node controller counts down the NodeMonitorGracePeriod
		nodes := nodeStore.List()

		for _, n := range nodes {
			node := n.(*api.Node)
			if !slavesWithoutKubelet.Has(node.Spec.ExternalID) {
				// let the kubelet do its job updating the status, or the
				// node controller will remove this node if the node does not even
				// exist anymore
				continue
			}

			err := u.updateStatus(node)
			if err != nil {
				log.Errorf("Error updating node status: %v", err)
			}
		}
	}

	go runtime.Until(monitor, u.heartBeatPeriod, terminate)
	return nil
}
Example #16
0
func (s *SchedulerServer) Run(hks hyperkube.Interface, _ []string) error {
	// get scheduler low-level config
	sc := schedcfg.CreateDefaultConfig()
	if s.SchedulerConfigFileName != "" {
		f, err := os.Open(s.SchedulerConfigFileName)
		if err != nil {
			log.Fatalf("Cannot open scheduler config file: %v", err)
		}

		err = sc.Read(bufio.NewReader(f))
		if err != nil {
			log.Fatalf("Invalid scheduler config file: %v", err)
		}
	}

	schedulerProcess, driverFactory, etcdClient, eid := s.bootstrap(hks, sc)

	if s.EnableProfiling {
		profile.InstallHandler(s.mux)
	}
	go runtime.Until(func() {
		log.V(1).Info("Starting HTTP interface")
		log.Error(http.ListenAndServe(net.JoinHostPort(s.Address.String(), strconv.Itoa(s.Port)), s.mux))
	}, sc.HttpBindInterval.Duration, schedulerProcess.Terminal())

	if s.HA {
		validation := ha.ValidationFunc(validateLeadershipTransition)
		srv := ha.NewCandidate(schedulerProcess, driverFactory, validation)
		path := fmt.Sprintf(meta.DefaultElectionFormat, s.FrameworkName)
		sid := uid.New(eid.Group(), "").String()
		log.Infof("registering for election at %v with id %v", path, sid)
		go election.Notify(election.NewEtcdMasterElector(etcdClient), path, sid, srv, nil)
	} else {
		log.Infoln("self-electing in non-HA mode")
		schedulerProcess.Elect(driverFactory)
	}
	return s.awaitFailover(schedulerProcess, func() error { return s.failover(s.getDriver(), hks) })
}
Example #17
0
func (s *schedulingPlugin) Run(done <-chan struct{}) {
	defer close(s.starting)
	go runtime.Until(s.scheduleOne, pluginRecoveryDelay, done)
}
Example #18
0
func (s *controller) Run(done <-chan struct{}) {
	defer close(s.started)
	go runtime.Until(s.scheduleOne, recoveryDelay, done)
}