// NewDeploymentController creates a new DeploymentController. func NewDeploymentController(dInformer informers.DeploymentInformer, rsInformer informers.ReplicaSetInformer, podInformer informers.PodInformer, client clientset.Interface) *DeploymentController { eventBroadcaster := record.NewBroadcaster() eventBroadcaster.StartLogging(glog.Infof) // TODO: remove the wrapper when every clients have moved to use the clientset. eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: client.Core().Events("")}) if client != nil && client.Core().RESTClient().GetRateLimiter() != nil { metrics.RegisterMetricAndTrackRateLimiterUsage("deployment_controller", client.Core().RESTClient().GetRateLimiter()) } dc := &DeploymentController{ client: client, eventRecorder: eventBroadcaster.NewRecorder(v1.EventSource{Component: "deployment-controller"}), queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "deployment"), progressQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "progress-check"), } dc.rsControl = controller.RealRSControl{ KubeClient: client, Recorder: dc.eventRecorder, } dInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: dc.addDeployment, UpdateFunc: dc.updateDeployment, // This will enter the sync loop and no-op, because the deployment has been deleted from the store. DeleteFunc: dc.deleteDeployment, }) rsInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: dc.addReplicaSet, UpdateFunc: dc.updateReplicaSet, DeleteFunc: dc.deleteReplicaSet, }) podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ DeleteFunc: dc.deletePod, }) dc.syncHandler = dc.syncDeployment dc.enqueueDeployment = dc.enqueue dc.dLister = dInformer.Lister() dc.rsLister = rsInformer.Lister() dc.podLister = podInformer.Lister() dc.dListerSynced = dInformer.Informer().HasSynced dc.rsListerSynced = dInformer.Informer().HasSynced dc.podListerSynced = dInformer.Informer().HasSynced return dc }
// NewReplicaSetController configures a replica set controller with the specified event recorder func NewReplicaSetController(rsInformer informers.ReplicaSetInformer, podInformer informers.PodInformer, kubeClient clientset.Interface, burstReplicas int, lookupCacheSize int, garbageCollectorEnabled bool) *ReplicaSetController { if kubeClient != nil && kubeClient.Core().RESTClient().GetRateLimiter() != nil { metrics.RegisterMetricAndTrackRateLimiterUsage("replicaset_controller", kubeClient.Core().RESTClient().GetRateLimiter()) } eventBroadcaster := record.NewBroadcaster() eventBroadcaster.StartLogging(glog.Infof) eventBroadcaster.StartRecordingToSink(&unversionedcore.EventSinkImpl{Interface: kubeClient.Core().Events("")}) rsc := &ReplicaSetController{ kubeClient: kubeClient, podControl: controller.RealPodControl{ KubeClient: kubeClient, Recorder: eventBroadcaster.NewRecorder(api.EventSource{Component: "replicaset-controller"}), }, burstReplicas: burstReplicas, expectations: controller.NewUIDTrackingControllerExpectations(controller.NewControllerExpectations()), queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "replicaset"), garbageCollectorEnabled: garbageCollectorEnabled, } rsInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: rsc.enqueueReplicaSet, UpdateFunc: rsc.updateRS, // This will enter the sync loop and no-op, because the replica set has been deleted from the store. // Note that deleting a replica set immediately after scaling it to 0 will not work. The recommended // way of achieving this is by performing a `stop` operation on the replica set. DeleteFunc: rsc.enqueueReplicaSet, }) podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: rsc.addPod, // This invokes the ReplicaSet for every pod change, eg: host assignment. Though this might seem like // overkill the most frequent pod update is status, and the associated ReplicaSet will only list from // local storage, so it should be ok. UpdateFunc: rsc.updatePod, DeleteFunc: rsc.deletePod, }) rsc.syncHandler = rsc.syncReplicaSet rsc.rsLister = rsInformer.Lister() rsc.podLister = podInformer.Lister() rsc.podListerSynced = podInformer.Informer().HasSynced rsc.lookupCache = controller.NewMatchingCache(lookupCacheSize) return rsc }
// NewNodeController returns a new node controller to sync instances from cloudprovider. // This method returns an error if it is unable to initialize the CIDR bitmap with // podCIDRs it has already allocated to nodes. Since we don't allow podCIDR changes // currently, this should be handled as a fatal error. func NewNodeController( podInformer informers.PodInformer, nodeInformer informers.NodeInformer, daemonSetInformer informers.DaemonSetInformer, cloud cloudprovider.Interface, kubeClient clientset.Interface, podEvictionTimeout time.Duration, evictionLimiterQPS float32, secondaryEvictionLimiterQPS float32, largeClusterThreshold int32, unhealthyZoneThreshold float32, nodeMonitorGracePeriod time.Duration, nodeStartupGracePeriod time.Duration, nodeMonitorPeriod time.Duration, clusterCIDR *net.IPNet, serviceCIDR *net.IPNet, nodeCIDRMaskSize int, allocateNodeCIDRs bool) (*NodeController, error) { eventBroadcaster := record.NewBroadcaster() recorder := eventBroadcaster.NewRecorder(v1.EventSource{Component: "controllermanager"}) eventBroadcaster.StartLogging(glog.Infof) if kubeClient != nil { glog.V(0).Infof("Sending events to api server.") eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.Core().Events("")}) } else { glog.V(0).Infof("No api server defined - no events will be sent to API server.") } if kubeClient != nil && kubeClient.Core().RESTClient().GetRateLimiter() != nil { metrics.RegisterMetricAndTrackRateLimiterUsage("node_controller", kubeClient.Core().RESTClient().GetRateLimiter()) } if allocateNodeCIDRs { if clusterCIDR == nil { glog.Fatal("NodeController: Must specify clusterCIDR if allocateNodeCIDRs == true.") } mask := clusterCIDR.Mask if maskSize, _ := mask.Size(); maskSize > nodeCIDRMaskSize { glog.Fatal("NodeController: Invalid clusterCIDR, mask size of clusterCIDR must be less than nodeCIDRMaskSize.") } } nc := &NodeController{ cloud: cloud, knownNodeSet: make(map[string]*v1.Node), kubeClient: kubeClient, recorder: recorder, podEvictionTimeout: podEvictionTimeout, maximumGracePeriod: 5 * time.Minute, zonePodEvictor: make(map[string]*RateLimitedTimedQueue), nodeStatusMap: make(map[string]nodeStatusData), nodeMonitorGracePeriod: nodeMonitorGracePeriod, nodeMonitorPeriod: nodeMonitorPeriod, nodeStartupGracePeriod: nodeStartupGracePeriod, lookupIP: net.LookupIP, now: metav1.Now, clusterCIDR: clusterCIDR, serviceCIDR: serviceCIDR, allocateNodeCIDRs: allocateNodeCIDRs, forcefullyDeletePod: func(p *v1.Pod) error { return forcefullyDeletePod(kubeClient, p) }, nodeExistsInCloudProvider: func(nodeName types.NodeName) (bool, error) { return nodeExistsInCloudProvider(cloud, nodeName) }, evictionLimiterQPS: evictionLimiterQPS, secondaryEvictionLimiterQPS: secondaryEvictionLimiterQPS, largeClusterThreshold: largeClusterThreshold, unhealthyZoneThreshold: unhealthyZoneThreshold, zoneStates: make(map[string]zoneState), podInformer: podInformer, nodeInformer: nodeInformer, daemonSetInformer: daemonSetInformer, } nc.enterPartialDisruptionFunc = nc.ReducedQPSFunc nc.enterFullDisruptionFunc = nc.HealthyQPSFunc nc.computeZoneStateFunc = nc.ComputeZoneState podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: nc.maybeDeleteTerminatingPod, UpdateFunc: func(_, obj interface{}) { nc.maybeDeleteTerminatingPod(obj) }, }) nc.podStore = *podInformer.Lister() nodeEventHandlerFuncs := cache.ResourceEventHandlerFuncs{} if nc.allocateNodeCIDRs { var nodeList *v1.NodeList var err error // We must poll because apiserver might not be up. This error causes // controller manager to restart. if pollErr := wait.Poll(10*time.Second, apiserverStartupGracePeriod, func() (bool, error) { nodeList, err = kubeClient.Core().Nodes().List(v1.ListOptions{ FieldSelector: fields.Everything().String(), LabelSelector: labels.Everything().String(), }) if err != nil { glog.Errorf("Failed to list all nodes: %v", err) return false, nil } return true, nil }); pollErr != nil { return nil, fmt.Errorf("Failed to list all nodes in %v, cannot proceed without updating CIDR map", apiserverStartupGracePeriod) } nc.cidrAllocator, err = NewCIDRRangeAllocator(kubeClient, clusterCIDR, serviceCIDR, nodeCIDRMaskSize, nodeList) if err != nil { return nil, err } nodeEventHandlerFuncs = cache.ResourceEventHandlerFuncs{ AddFunc: func(originalObj interface{}) { obj, err := api.Scheme.DeepCopy(originalObj) if err != nil { utilruntime.HandleError(err) return } node := obj.(*v1.Node) if err := nc.cidrAllocator.AllocateOrOccupyCIDR(node); err != nil { utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err)) } }, UpdateFunc: func(_, obj interface{}) { node := obj.(*v1.Node) // If the PodCIDR is not empty we either: // - already processed a Node that already had a CIDR after NC restarted // (cidr is marked as used), // - already processed a Node successfully and allocated a CIDR for it // (cidr is marked as used), // - already processed a Node but we did saw a "timeout" response and // request eventually got through in this case we haven't released // the allocated CIDR (cidr is still marked as used). // There's a possible error here: // - NC sees a new Node and assigns a CIDR X to it, // - Update Node call fails with a timeout, // - Node is updated by some other component, NC sees an update and // assigns CIDR Y to the Node, // - Both CIDR X and CIDR Y are marked as used in the local cache, // even though Node sees only CIDR Y // The problem here is that in in-memory cache we see CIDR X as marked, // which prevents it from being assigned to any new node. The cluster // state is correct. // Restart of NC fixes the issue. if node.Spec.PodCIDR == "" { nodeCopy, err := api.Scheme.Copy(node) if err != nil { utilruntime.HandleError(err) return } if err := nc.cidrAllocator.AllocateOrOccupyCIDR(nodeCopy.(*v1.Node)); err != nil { utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err)) } } }, DeleteFunc: func(originalObj interface{}) { obj, err := api.Scheme.DeepCopy(originalObj) if err != nil { utilruntime.HandleError(err) return } node, isNode := obj.(*v1.Node) // We can get DeletedFinalStateUnknown instead of *v1.Node here and we need to handle that correctly. #34692 if !isNode { deletedState, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { glog.Errorf("Received unexpected object: %v", obj) return } node, ok = deletedState.Obj.(*v1.Node) if !ok { glog.Errorf("DeletedFinalStateUnknown contained non-Node object: %v", deletedState.Obj) return } } if err := nc.cidrAllocator.ReleaseCIDR(node); err != nil { glog.Errorf("Error releasing CIDR: %v", err) } }, } } nodeInformer.Informer().AddEventHandler(nodeEventHandlerFuncs) nc.nodeStore = *nodeInformer.Lister() nc.daemonSetStore = *daemonSetInformer.Lister() return nc, nil }
func NewDaemonSetsController(daemonSetInformer informers.DaemonSetInformer, podInformer informers.PodInformer, nodeInformer informers.NodeInformer, kubeClient clientset.Interface, lookupCacheSize int) *DaemonSetsController { eventBroadcaster := record.NewBroadcaster() eventBroadcaster.StartLogging(glog.Infof) // TODO: remove the wrapper when every clients have moved to use the clientset. eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.Core().Events("")}) if kubeClient != nil && kubeClient.Core().RESTClient().GetRateLimiter() != nil { metrics.RegisterMetricAndTrackRateLimiterUsage("daemon_controller", kubeClient.Core().RESTClient().GetRateLimiter()) } dsc := &DaemonSetsController{ kubeClient: kubeClient, eventRecorder: eventBroadcaster.NewRecorder(v1.EventSource{Component: "daemonset-controller"}), podControl: controller.RealPodControl{ KubeClient: kubeClient, Recorder: eventBroadcaster.NewRecorder(v1.EventSource{Component: "daemon-set"}), }, burstReplicas: BurstReplicas, expectations: controller.NewControllerExpectations(), queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "daemonset"), } daemonSetInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { ds := obj.(*extensions.DaemonSet) glog.V(4).Infof("Adding daemon set %s", ds.Name) dsc.enqueueDaemonSet(ds) }, UpdateFunc: func(old, cur interface{}) { oldDS := old.(*extensions.DaemonSet) curDS := cur.(*extensions.DaemonSet) // We should invalidate the whole lookup cache if a DS's selector has been updated. // // Imagine that you have two RSs: // * old DS1 // * new DS2 // You also have a pod that is attached to DS2 (because it doesn't match DS1 selector). // Now imagine that you are changing DS1 selector so that it is now matching that pod, // in such case we must invalidate the whole cache so that pod could be adopted by DS1 // // This makes the lookup cache less helpful, but selector update does not happen often, // so it's not a big problem if !reflect.DeepEqual(oldDS.Spec.Selector, curDS.Spec.Selector) { dsc.lookupCache.InvalidateAll() } glog.V(4).Infof("Updating daemon set %s", oldDS.Name) dsc.enqueueDaemonSet(curDS) }, DeleteFunc: dsc.deleteDaemonset, }) dsc.dsStore = daemonSetInformer.Lister() dsc.dsStoreSynced = daemonSetInformer.Informer().HasSynced // Watch for creation/deletion of pods. The reason we watch is that we don't want a daemon set to create/delete // more pods until all the effects (expectations) of a daemon set's create/delete have been observed. podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: dsc.addPod, UpdateFunc: dsc.updatePod, DeleteFunc: dsc.deletePod, }) dsc.podStore = podInformer.Lister() dsc.podStoreSynced = podInformer.Informer().HasSynced nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: dsc.addNode, UpdateFunc: dsc.updateNode, }, ) dsc.nodeStoreSynced = nodeInformer.Informer().HasSynced dsc.nodeStore = nodeInformer.Lister() dsc.syncHandler = dsc.syncDaemonSet dsc.lookupCache = controller.NewMatchingCache(lookupCacheSize) return dsc }