// Runs e; will not return until stopCh is closed. workers determines how many // endpoints will be handled in parallel. func (e *EndpointController) Run(workers int, stopCh <-chan struct{}) { defer utilruntime.HandleCrash() defer e.queue.ShutDown() go e.serviceController.Run(stopCh) go e.podController.Run(stopCh) if !cache.WaitForCacheSync(stopCh, e.podStoreSynced) { return } for i := 0; i < workers; i++ { go wait.Until(e.worker, time.Second, stopCh) } go func() { defer utilruntime.HandleCrash() time.Sleep(5 * time.Minute) // give time for our cache to fill e.checkLeftoverEndpoints() }() if e.internalPodInformer != nil { go e.internalPodInformer.Run(stopCh) } <-stopCh }
// Run starts an asynchronous loop that monitors the status of cluster nodes. func (nc *NodeController) Run() { go func() { defer utilruntime.HandleCrash() if !cache.WaitForCacheSync(wait.NeverStop, nc.nodeInformer.Informer().HasSynced, nc.podInformer.Informer().HasSynced, nc.daemonSetInformer.Informer().HasSynced) { utilruntime.HandleError(errors.New("NodeController timed out while waiting for informers to sync...")) return } // Incorporate the results of node status pushed from kubelet to master. go wait.Until(func() { if err := nc.monitorNodeStatus(); err != nil { glog.Errorf("Error monitoring node status: %v", err) } }, nc.nodeMonitorPeriod, wait.NeverStop) // Managing eviction of nodes: // When we delete pods off a node, if the node was not empty at the time we then // queue an eviction watcher. If we hit an error, retry deletion. go wait.Until(func() { nc.evictorLock.Lock() defer nc.evictorLock.Unlock() for k := range nc.zonePodEvictor { nc.zonePodEvictor[k].Try(func(value TimedValue) (bool, time.Duration) { obj, exists, err := nc.nodeStore.GetByKey(value.Value) if err != nil { glog.Warningf("Failed to get Node %v from the nodeStore: %v", value.Value, err) } else if !exists { glog.Warningf("Node %v no longer present in nodeStore!", value.Value) } else { node, _ := obj.(*v1.Node) zone := utilnode.GetZoneKey(node) EvictionsNumber.WithLabelValues(zone).Inc() } nodeUid, _ := value.UID.(string) remaining, err := deletePods(nc.kubeClient, nc.recorder, value.Value, nodeUid, nc.daemonSetStore) if err != nil { utilruntime.HandleError(fmt.Errorf("unable to evict node %q: %v", value.Value, err)) return false, 0 } if remaining { glog.Infof("Pods awaiting deletion due to NodeController eviction") } return true, 0 }) } }, nodeEvictionPeriod, wait.NeverStop) }() }
// Run the main goroutine responsible for watching and syncing jobs. func (jm *JobController) Run(workers int, stopCh <-chan struct{}) { defer utilruntime.HandleCrash() defer jm.queue.ShutDown() if !cache.WaitForCacheSync(stopCh, jm.podStoreSynced, jm.jobStoreSynced) { return } for i := 0; i < workers; i++ { go wait.Until(jm.worker, time.Second, stopCh) } <-stopCh glog.Infof("Shutting down Job Manager") }
// Run begins watching and syncing. func (dc *DeploymentController) Run(workers int, stopCh <-chan struct{}) { defer utilruntime.HandleCrash() defer dc.queue.ShutDown() glog.Infof("Starting deployment controller") if !cache.WaitForCacheSync(stopCh, dc.dListerSynced, dc.rsListerSynced, dc.podListerSynced) { return } for i := 0; i < workers; i++ { go wait.Until(dc.worker, time.Second, stopCh) } <-stopCh glog.Infof("Shutting down deployment controller") }
// Run begins watching and syncing daemon sets. func (dsc *DaemonSetsController) Run(workers int, stopCh <-chan struct{}) { defer utilruntime.HandleCrash() defer dsc.queue.ShutDown() glog.Infof("Starting Daemon Sets controller manager") if !cache.WaitForCacheSync(stopCh, dsc.podStoreSynced, dsc.nodeStoreSynced, dsc.dsStoreSynced) { return } for i := 0; i < workers; i++ { go wait.Until(dsc.runWorker, time.Second, stopCh) } <-stopCh glog.Infof("Shutting down Daemon Set Controller") }
func (c *ServiceAccountsController) Run(workers int, stopCh <-chan struct{}) { defer utilruntime.HandleCrash() defer c.queue.ShutDown() glog.Infof("Starting ServiceAccount controller") if !cache.WaitForCacheSync(stopCh, c.saSynced) { return } for i := 0; i < workers; i++ { go wait.Until(c.runWorker, time.Second, stopCh) } <-stopCh glog.Infof("Shutting down ServiceAccount controller") }
// Run begins watching and syncing. func (rsc *ReplicaSetController) Run(workers int, stopCh <-chan struct{}) { defer utilruntime.HandleCrash() defer rsc.queue.ShutDown() glog.Infof("Starting ReplicaSet controller") if !cache.WaitForCacheSync(stopCh, rsc.podListerSynced) { return } for i := 0; i < workers; i++ { go wait.Until(rsc.worker, time.Second, stopCh) } <-stopCh glog.Infof("Shutting down ReplicaSet Controller") }
// Run begins watching and syncing. func (rm *ReplicationManager) Run(workers int, stopCh <-chan struct{}) { defer utilruntime.HandleCrash() defer rm.queue.ShutDown() glog.Infof("Starting RC Manager") if rm.internalPodInformer != nil { go rm.internalPodInformer.Run(stopCh) } if !cache.WaitForCacheSync(stopCh, rm.podListerSynced) { return } for i := 0; i < workers; i++ { go wait.Until(rm.worker, time.Second, stopCh) } <-stopCh glog.Infof("Shutting down RC Manager") }
// Run starts an asynchronous loop that monitors the status of cluster nodes. func (nc *NodeController) Run() { go nc.nodeController.Run(wait.NeverStop) go nc.podController.Run(wait.NeverStop) go nc.daemonSetController.Run(wait.NeverStop) if nc.internalPodInformer != nil { go nc.internalPodInformer.Run(wait.NeverStop) } go func() { defer utilruntime.HandleCrash() if !cache.WaitForCacheSync(wait.NeverStop, nc.nodeController.HasSynced, nc.podController.HasSynced, nc.daemonSetController.HasSynced) { utilruntime.HandleError(errors.New("NodeController timed out while waiting for informers to sync...")) return } // Incorporate the results of node status pushed from kubelet to master. go wait.Until(func() { if err := nc.monitorNodeStatus(); err != nil { glog.Errorf("Error monitoring node status: %v", err) } }, nc.nodeMonitorPeriod, wait.NeverStop) // Managing eviction of nodes: // 1. when we delete pods off a node, if the node was not empty at the time we then // queue a termination watcher // a. If we hit an error, retry deletion // 2. The terminator loop ensures that pods are eventually cleaned and we never // terminate a pod in a time period less than nc.maximumGracePeriod. AddedAt // is the time from which we measure "has this pod been terminating too long", // after which we will delete the pod with grace period 0 (force delete). // a. If we hit errors, retry instantly // b. If there are no pods left terminating, exit // c. If there are pods still terminating, wait for their estimated completion // before retrying go wait.Until(func() { nc.evictorLock.Lock() defer nc.evictorLock.Unlock() for k := range nc.zonePodEvictor { nc.zonePodEvictor[k].Try(func(value TimedValue) (bool, time.Duration) { obj, exists, err := nc.nodeStore.GetByKey(value.Value) if err != nil { glog.Warningf("Failed to get Node %v from the nodeStore: %v", value.Value, err) } else if !exists { glog.Warningf("Node %v no longer present in nodeStore!", value.Value) } else { node, _ := obj.(*api.Node) zone := utilnode.GetZoneKey(node) EvictionsNumber.WithLabelValues(zone).Inc() } nodeUid, _ := value.UID.(string) remaining, err := deletePods(nc.kubeClient, nc.recorder, value.Value, nodeUid, nc.daemonSetStore) if err != nil { utilruntime.HandleError(fmt.Errorf("unable to evict node %q: %v", value.Value, err)) return false, 0 } if remaining { nc.zoneTerminationEvictor[k].Add(value.Value, value.UID) } return true, 0 }) } }, nodeEvictionPeriod, wait.NeverStop) // TODO: replace with a controller that ensures pods that are terminating complete // in a particular time period go wait.Until(func() { nc.evictorLock.Lock() defer nc.evictorLock.Unlock() for k := range nc.zoneTerminationEvictor { nc.zoneTerminationEvictor[k].Try(func(value TimedValue) (bool, time.Duration) { nodeUid, _ := value.UID.(string) completed, remaining, err := terminatePods(nc.kubeClient, nc.recorder, value.Value, nodeUid, value.AddedAt, nc.maximumGracePeriod) if err != nil { utilruntime.HandleError(fmt.Errorf("unable to terminate pods on node %q: %v", value.Value, err)) return false, 0 } if completed { glog.V(2).Infof("All pods terminated on %s", value.Value) recordNodeEvent(nc.recorder, value.Value, nodeUid, api.EventTypeNormal, "TerminatedAllPods", fmt.Sprintf("Terminated all Pods on Node %s.", value.Value)) return true, 0 } glog.V(2).Infof("Pods terminating since %s on %q, estimated completion %s", value.AddedAt, value.Value, remaining) // clamp very short intervals if remaining < nodeEvictionPeriod { remaining = nodeEvictionPeriod } return false, remaining }) } }, nodeEvictionPeriod, wait.NeverStop) go wait.Until(func() { pods, err := nc.podStore.List(labels.Everything()) if err != nil { utilruntime.HandleError(err) return } cleanupOrphanedPods(pods, nc.nodeStore.Store, nc.forcefullyDeletePod) }, 30*time.Second, wait.NeverStop) }() }