func NewMetricsGrabber(c *client.Client, kubelets bool, scheduler bool, controllers bool, apiServer bool) (*MetricsGrabber, error) { registeredMaster := false masterName := "" nodeList, err := c.Nodes().List(api.ListOptions{}) if err != nil { return nil, err } if len(nodeList.Items) < 1 { glog.Warning("Can't find any Nodes in the API server to grab metrics from") } for _, node := range nodeList.Items { if system.IsMasterNode(&node) { registeredMaster = true masterName = node.Name break } } if !registeredMaster { scheduler = false controllers = false glog.Warningf("Master node is not registered. Grabbing metrics from Scheduler and ControllerManager is disabled.") } return &MetricsGrabber{ client: c, grabFromApiServer: apiServer, grabFromControllerManager: controllers, grabFromKubelets: kubelets, grabFromScheduler: scheduler, masterName: masterName, registeredMaster: registeredMaster, }, nil }
func NewResourceUsageGatherer(c clientset.Interface, options ResourceGathererOptions) (*containerResourceGatherer, error) { g := containerResourceGatherer{ client: c, stopCh: make(chan struct{}), containerIDToNameMap: make(map[string]string), containerIDs: make([]string, 0), options: options, } if options.inKubemark { g.workerWg.Add(1) g.workers = append(g.workers, resourceGatherWorker{ inKubemark: true, stopCh: g.stopCh, wg: &g.workerWg, finished: false, }) } else { pods, err := c.Core().Pods("kube-system").List(v1.ListOptions{}) if err != nil { Logf("Error while listing Pods: %v", err) return nil, err } for _, pod := range pods.Items { for _, container := range pod.Status.ContainerStatuses { containerID := strings.TrimPrefix(container.ContainerID, "docker:/") g.containerIDToNameMap[containerID] = pod.Name + "/" + container.Name g.containerIDs = append(g.containerIDs, containerID) } } nodeList, err := c.Core().Nodes().List(v1.ListOptions{}) if err != nil { Logf("Error while listing Nodes: %v", err) return nil, err } for _, node := range nodeList.Items { if !options.masterOnly || system.IsMasterNode(node.Name) { g.workerWg.Add(1) g.workers = append(g.workers, resourceGatherWorker{ c: c, nodeName: node.Name, wg: &g.workerWg, containerIDToNameMap: g.containerIDToNameMap, containerIDs: g.containerIDs, stopCh: g.stopCh, finished: false, inKubemark: false, }) if options.masterOnly { break } } } } return &g, nil }
// monitorNodeStatus verifies node status are constantly updated by kubelet, and if not, // post "NodeReady==ConditionUnknown". It also evicts all pods if node is not ready or // not reachable for a long period of time. func (nc *NodeController) monitorNodeStatus() error { // We are listing nodes from local cache as we can tolerate some small delays // comparing to state from etcd and there is eventual consistency anyway. nodes, err := nc.nodeStore.List() if err != nil { return err } added, deleted := nc.checkForNodeAddedDeleted(&nodes) for i := range added { glog.V(1).Infof("NodeController observed a new Node: %#v", added[i].Name) recordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in NodeController", added[i].Name)) nc.knownNodeSet[added[i].Name] = added[i] // When adding new Nodes we need to check if new zone appeared, and if so add new evictor. zone := utilnode.GetZoneKey(added[i]) if _, found := nc.zonePodEvictor[zone]; !found { nc.zonePodEvictor[zone] = NewRateLimitedTimedQueue( flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, evictionRateLimiterBurst)) // Init the metric for the new zone. glog.Infof("Initializing eviction metric for zone: %v", zone) EvictionsNumber.WithLabelValues(zone).Add(0) } nc.cancelPodEviction(added[i]) } for i := range deleted { glog.V(1).Infof("NodeController observed a Node deletion: %v", deleted[i].Name) recordNodeEvent(nc.recorder, deleted[i].Name, string(deleted[i].UID), v1.EventTypeNormal, "RemovingNode", fmt.Sprintf("Removing Node %v from NodeController", deleted[i].Name)) delete(nc.knownNodeSet, deleted[i].Name) } zoneToNodeConditions := map[string][]*v1.NodeCondition{} for i := range nodes.Items { var gracePeriod time.Duration var observedReadyCondition v1.NodeCondition var currentReadyCondition *v1.NodeCondition nodeCopy, err := api.Scheme.DeepCopy(&nodes.Items[i]) if err != nil { utilruntime.HandleError(err) continue } node := nodeCopy.(*v1.Node) for rep := 0; rep < nodeStatusUpdateRetry; rep++ { gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeStatus(node) if err == nil { break } name := node.Name node, err = nc.kubeClient.Core().Nodes().Get(name, metav1.GetOptions{}) if err != nil { glog.Errorf("Failed while getting a Node to retry updating NodeStatus. Probably Node %s was deleted.", name) break } time.Sleep(retrySleepTime) } if err != nil { glog.Errorf("Update status of Node %v from NodeController exceeds retry count."+ "Skipping - no pods will be evicted.", node.Name) continue } // We do not treat a master node as a part of the cluster for network disruption checking. if !system.IsMasterNode(node.Name) { zoneToNodeConditions[utilnode.GetZoneKey(node)] = append(zoneToNodeConditions[utilnode.GetZoneKey(node)], currentReadyCondition) } decisionTimestamp := nc.now() if currentReadyCondition != nil { // Check eviction timeout against decisionTimestamp if observedReadyCondition.Status == v1.ConditionFalse && decisionTimestamp.After(nc.nodeStatusMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) { if nc.evictPods(node) { glog.V(2).Infof("Evicting pods on node %s: %v is later than %v + %v", node.Name, decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout) } } if observedReadyCondition.Status == v1.ConditionUnknown && decisionTimestamp.After(nc.nodeStatusMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout)) { if nc.evictPods(node) { glog.V(2).Infof("Evicting pods on node %s: %v is later than %v + %v", node.Name, decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout-gracePeriod) } } if observedReadyCondition.Status == v1.ConditionTrue { if nc.cancelPodEviction(node) { glog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name) } } // Report node event. if currentReadyCondition.Status != v1.ConditionTrue && observedReadyCondition.Status == v1.ConditionTrue { recordNodeStatusChange(nc.recorder, node, "NodeNotReady") if err = markAllPodsNotReady(nc.kubeClient, node); err != nil { utilruntime.HandleError(fmt.Errorf("Unable to mark all pods NotReady on node %v: %v", node.Name, err)) } } // Check with the cloud provider to see if the node still exists. If it // doesn't, delete the node immediately. if currentReadyCondition.Status != v1.ConditionTrue && nc.cloud != nil { exists, err := nc.nodeExistsInCloudProvider(types.NodeName(node.Name)) if err != nil { glog.Errorf("Error determining if node %v exists in cloud: %v", node.Name, err) continue } if !exists { glog.V(2).Infof("Deleting node (no longer present in cloud provider): %s", node.Name) recordNodeEvent(nc.recorder, node.Name, string(node.UID), v1.EventTypeNormal, "DeletingNode", fmt.Sprintf("Deleting Node %v because it's not present according to cloud provider", node.Name)) go func(nodeName string) { defer utilruntime.HandleCrash() // Kubelet is not reporting and Cloud Provider says node // is gone. Delete it without worrying about grace // periods. if err := forcefullyDeleteNode(nc.kubeClient, nodeName); err != nil { glog.Errorf("Unable to forcefully delete node %q: %v", nodeName, err) } }(node.Name) } } } } nc.handleDisruption(zoneToNodeConditions, &nodes) return nil }
By("Cleaning up the replication controller") err := framework.DeleteRC(c, ns, RCName) framework.ExpectNoError(err) } }) f := framework.NewDefaultFramework("sched-pred") BeforeEach(func() { c = f.Client ns = f.Namespace.Name nodeList = &api.NodeList{} nodes, err := c.Nodes().List(api.ListOptions{}) masterNodes = sets.NewString() for _, node := range nodes.Items { if system.IsMasterNode(&node) { masterNodes.Insert(node.Name) } else { nodeList.Items = append(nodeList.Items, node) } } err = framework.CheckTestingNSDeletedExcept(c, ns) framework.ExpectNoError(err) // Every test case in this suite assumes that cluster add-on pods stay stable and // cannot be run in parallel with any other test that touches Nodes or Pods. // It is so because we need to have precise control on what's running in the cluster. systemPods, err := framework.GetPodsInNamespace(c, ns, ignoreLabels) Expect(err).NotTo(HaveOccurred()) systemPodsNo = 0
// monitorNodeStatus verifies node status are constantly updated by kubelet, and if not, // post "NodeReady==ConditionUnknown". It also evicts all pods if node is not ready or // not reachable for a long period of time. func (nc *NodeController) monitorNodeStatus() error { nodes, err := nc.kubeClient.Core().Nodes().List(api.ListOptions{}) if err != nil { return err } for _, node := range nodes.Items { if !nc.knownNodeSet.Has(node.Name) { glog.V(1).Infof("NodeController observed a new Node: %#v", node) nc.recordNodeEvent(node.Name, api.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in NodeController", node.Name)) nc.cancelPodEviction(node.Name) nc.knownNodeSet.Insert(node.Name) } } // If there's a difference between lengths of known Nodes and observed nodes // we must have removed some Node. if len(nc.knownNodeSet) != len(nodes.Items) { observedSet := make(sets.String) for _, node := range nodes.Items { observedSet.Insert(node.Name) } deleted := nc.knownNodeSet.Difference(observedSet) for nodeName := range deleted { glog.V(1).Infof("NodeController observed a Node deletion: %v", nodeName) nc.recordNodeEvent(nodeName, api.EventTypeNormal, "RemovingNode", fmt.Sprintf("Removing Node %v from NodeController", nodeName)) nc.evictPods(nodeName) nc.knownNodeSet.Delete(nodeName) } } seenReady := false for i := range nodes.Items { var gracePeriod time.Duration var observedReadyCondition api.NodeCondition var currentReadyCondition *api.NodeCondition node := &nodes.Items[i] for rep := 0; rep < nodeStatusUpdateRetry; rep++ { gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeStatus(node) if err == nil { break } name := node.Name node, err = nc.kubeClient.Core().Nodes().Get(name) if err != nil { glog.Errorf("Failed while getting a Node to retry updating NodeStatus. Probably Node %s was deleted.", name) break } } if err != nil { glog.Errorf("Update status of Node %v from NodeController exceeds retry count."+ "Skipping - no pods will be evicted.", node.Name) continue } decisionTimestamp := nc.now() if currentReadyCondition != nil { // Check eviction timeout against decisionTimestamp if observedReadyCondition.Status == api.ConditionFalse && decisionTimestamp.After(nc.nodeStatusMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) { if nc.evictPods(node.Name) { glog.V(4).Infof("Evicting pods on node %s: %v is later than %v + %v", node.Name, decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout) } } if observedReadyCondition.Status == api.ConditionUnknown && decisionTimestamp.After(nc.nodeStatusMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout)) { if nc.evictPods(node.Name) { glog.V(4).Infof("Evicting pods on node %s: %v is later than %v + %v", node.Name, decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout-gracePeriod) } } if observedReadyCondition.Status == api.ConditionTrue { // We do not treat a master node as a part of the cluster for network segmentation checking. if !system.IsMasterNode(node) { seenReady = true } if nc.cancelPodEviction(node.Name) { glog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name) } } // Report node event. if currentReadyCondition.Status != api.ConditionTrue && observedReadyCondition.Status == api.ConditionTrue { nc.recordNodeStatusChange(node, "NodeNotReady") if err = nc.markAllPodsNotReady(node.Name); err != nil { utilruntime.HandleError(fmt.Errorf("Unable to mark all pods NotReady on node %v: %v", node.Name, err)) } } // Check with the cloud provider to see if the node still exists. If it // doesn't, delete the node immediately. if currentReadyCondition.Status != api.ConditionTrue && nc.cloud != nil { exists, err := nc.nodeExistsInCloudProvider(node.Name) if err != nil { glog.Errorf("Error determining if node %v exists in cloud: %v", node.Name, err) continue } if !exists { glog.V(2).Infof("Deleting node (no longer present in cloud provider): %s", node.Name) nc.recordNodeEvent(node.Name, api.EventTypeNormal, "DeletingNode", fmt.Sprintf("Deleting Node %v because it's not present according to cloud provider", node.Name)) go func(nodeName string) { defer utilruntime.HandleCrash() // Kubelet is not reporting and Cloud Provider says node // is gone. Delete it without worrying about grace // periods. if err := nc.forcefullyDeleteNode(nodeName); err != nil { glog.Errorf("Unable to forcefully delete node %q: %v", nodeName, err) } }(node.Name) continue } } } } // NC don't see any Ready Node. We assume that the network is segmented and Nodes cannot connect to API server and // update their statuses. NC enteres network segmentation mode and cancels all evictions in progress. if !seenReady { nc.networkSegmentationMode = true nc.stopAllPodEvictions() glog.V(2).Info("NodeController is entering network segmentation mode.") } else { if nc.networkSegmentationMode { nc.forceUpdateAllProbeTimes() nc.networkSegmentationMode = false glog.V(2).Info("NodeController exited network segmentation mode.") } } return nil }
"pattern": "` + tempMessage + `" }, { "type": "permanent", "condition": "` + string(condition) + `", "reason": "` + permReason + `", "pattern": "` + permMessage + `" } ] }` By("Get a non master node to run the pod") nodes, err := c.Nodes().List(api.ListOptions{}) Expect(err).NotTo(HaveOccurred()) node = nil for _, n := range nodes.Items { if !system.IsMasterNode(&n) { node = &n break } } Expect(node).NotTo(BeNil()) By("Generate event list options") selector := fields.Set{ "involvedObject.kind": "Node", "involvedObject.name": node.Name, "involvedObject.namespace": api.NamespaceAll, "source": source, }.AsSelector() eventListOptions = api.ListOptions{FieldSelector: selector} By("Create the test log file") tmpDir = "/tmp/" + name
"pattern": "` + tempMessage + `" }, { "type": "permanent", "condition": "` + string(condition) + `", "reason": "` + permReason + `", "pattern": "` + permMessage + `" } ] }` By("Get a non master node to run the pod") nodes, err := c.Core().Nodes().List(v1.ListOptions{}) Expect(err).NotTo(HaveOccurred()) node = nil for _, n := range nodes.Items { if !system.IsMasterNode(n.Name) { node = &n break } } Expect(node).NotTo(BeNil()) By("Generate event list options") selector := fields.Set{ "involvedObject.kind": "Node", "involvedObject.name": node.Name, "involvedObject.namespace": v1.NamespaceAll, "source": source, }.AsSelector().String() eventListOptions = v1.ListOptions{FieldSelector: selector} By("Create the test log file") tmpDir = "/tmp/" + name
// monitorNodeStatus verifies node status are constantly updated by kubelet, and if not, // post "NodeReady==ConditionUnknown". It also evicts all pods if node is not ready or // not reachable for a long period of time. func (nc *NodeController) monitorNodeStatus() error { nodes, err := nc.kubeClient.Core().Nodes().List(api.ListOptions{}) if err != nil { return err } added, deleted := nc.checkForNodeAddedDeleted(nodes) for i := range added { glog.V(1).Infof("NodeController observed a new Node: %#v", added[i].Name) recordNodeEvent(nc.recorder, added[i].Name, api.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in NodeController", added[i].Name)) nc.knownNodeSet[added[i].Name] = added[i] // When adding new Nodes we need to check if new zone appeared, and if so add new evictor. zone := utilnode.GetZoneKey(added[i]) if _, found := nc.zonePodEvictor[zone]; !found { nc.zonePodEvictor[zone] = NewRateLimitedTimedQueue( flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, evictionRateLimiterBurst)) } if _, found := nc.zoneTerminationEvictor[zone]; !found { nc.zoneTerminationEvictor[zone] = NewRateLimitedTimedQueue( flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, evictionRateLimiterBurst)) } nc.cancelPodEviction(added[i]) } for i := range deleted { glog.V(1).Infof("NodeController observed a Node deletion: %v", deleted[i].Name) recordNodeEvent(nc.recorder, deleted[i].Name, api.EventTypeNormal, "RemovingNode", fmt.Sprintf("Removing Node %v from NodeController", deleted[i].Name)) nc.evictPods(deleted[i]) delete(nc.knownNodeSet, deleted[i].Name) } zoneToNodeConditions := map[string][]*api.NodeCondition{} for i := range nodes.Items { var gracePeriod time.Duration var observedReadyCondition api.NodeCondition var currentReadyCondition *api.NodeCondition node := &nodes.Items[i] for rep := 0; rep < nodeStatusUpdateRetry; rep++ { gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeStatus(node) if err == nil { break } name := node.Name node, err = nc.kubeClient.Core().Nodes().Get(name) if err != nil { glog.Errorf("Failed while getting a Node to retry updating NodeStatus. Probably Node %s was deleted.", name) break } } if err != nil { glog.Errorf("Update status of Node %v from NodeController exceeds retry count."+ "Skipping - no pods will be evicted.", node.Name) continue } // We do not treat a master node as a part of the cluster for network segmentation checking. if !system.IsMasterNode(node) { zoneToNodeConditions[utilnode.GetZoneKey(node)] = append(zoneToNodeConditions[utilnode.GetZoneKey(node)], currentReadyCondition) } decisionTimestamp := nc.now() if currentReadyCondition != nil { // Check eviction timeout against decisionTimestamp if observedReadyCondition.Status == api.ConditionFalse && decisionTimestamp.After(nc.nodeStatusMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) { if nc.evictPods(node) { glog.V(4).Infof("Evicting pods on node %s: %v is later than %v + %v", node.Name, decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout) } } if observedReadyCondition.Status == api.ConditionUnknown && decisionTimestamp.After(nc.nodeStatusMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout)) { if nc.evictPods(node) { glog.V(4).Infof("Evicting pods on node %s: %v is later than %v + %v", node.Name, decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout-gracePeriod) } } if observedReadyCondition.Status == api.ConditionTrue { if nc.cancelPodEviction(node) { glog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name) } } // Report node event. if currentReadyCondition.Status != api.ConditionTrue && observedReadyCondition.Status == api.ConditionTrue { recordNodeStatusChange(nc.recorder, node, "NodeNotReady") if err = markAllPodsNotReady(nc.kubeClient, node.Name); err != nil { utilruntime.HandleError(fmt.Errorf("Unable to mark all pods NotReady on node %v: %v", node.Name, err)) } } // Check with the cloud provider to see if the node still exists. If it // doesn't, delete the node immediately. if currentReadyCondition.Status != api.ConditionTrue && nc.cloud != nil { exists, err := nc.nodeExistsInCloudProvider(node.Name) if err != nil { glog.Errorf("Error determining if node %v exists in cloud: %v", node.Name, err) continue } if !exists { glog.V(2).Infof("Deleting node (no longer present in cloud provider): %s", node.Name) recordNodeEvent(nc.recorder, node.Name, api.EventTypeNormal, "DeletingNode", fmt.Sprintf("Deleting Node %v because it's not present according to cloud provider", node.Name)) go func(nodeName string) { defer utilruntime.HandleCrash() // Kubelet is not reporting and Cloud Provider says node // is gone. Delete it without worrying about grace // periods. if err := forcefullyDeleteNode(nc.kubeClient, nodeName, nc.forcefullyDeletePod); err != nil { glog.Errorf("Unable to forcefully delete node %q: %v", nodeName, err) } }(node.Name) continue } } } } for k, v := range zoneToNodeConditions { newState := nc.computeZoneStateFunc(v) if newState == nc.zoneStates[k] { continue } if newState == stateFullSegmentation { glog.V(2).Infof("NodeController is entering network segmentation mode in zone %v.", k) } else if newState == stateNormal { glog.V(2).Infof("NodeController exited network segmentation mode in zone %v.", k) } for i := range nodes.Items { if utilnode.GetZoneKey(&nodes.Items[i]) == k { if newState == stateFullSegmentation { // When zone is fully segmented we stop the eviction all together. nc.cancelPodEviction(&nodes.Items[i]) } if newState == stateNormal && nc.zoneStates[k] == stateFullSegmentation { // When exiting segmentation mode update probe timestamps on all Nodes. now := nc.now() v := nc.nodeStatusMap[nodes.Items[i].Name] v.probeTimestamp = now v.readyTransitionTimestamp = now nc.nodeStatusMap[nodes.Items[i].Name] = v } } } nc.zoneStates[k] = newState } return nil }