// StartPods check for numPods in TestNS. If they exist, it no-ops, otherwise it starts up // a temp rc, scales it to match numPods, then deletes the rc leaving behind the pods. func StartPods(numPods int, host string, restClient *client.Client) error { start := time.Now() defer func() { glog.Infof("StartPods took %v with numPods %d", time.Since(start), numPods) }() hostField := fields.OneTermEqualSelector(client.PodHost, host) pods, err := restClient.Pods(TestNS).List(labels.Everything(), hostField) if err != nil || len(pods.Items) == numPods { return err } glog.Infof("Found %d pods that match host %v, require %d", len(pods.Items), hostField, numPods) // For the sake of simplicity, assume all pods in TestNS have selectors matching TestRCManifest. controller := RCFromManifest(TestRCManifest) // Make the rc unique to the given host. controller.Spec.Replicas = numPods controller.Spec.Template.Spec.NodeName = host controller.Name = controller.Name + host controller.Spec.Selector["host"] = host controller.Spec.Template.Labels["host"] = host if rc, err := StartRC(controller, restClient); err != nil { return err } else { // Delete the rc, otherwise when we restart master components for the next benchmark // the rc controller will race with the pods controller in the rc manager. return restClient.ReplicationControllers(TestNS).Delete(rc.Name) } }
// deletePods will delete all pods from master running on given node, and return true // if any pods were deleted. func (nc *NodeController) deletePods(nodeName string) (bool, error) { remaining := false pods, err := nc.kubeClient.Pods(api.NamespaceAll).List(labels.Everything(), fields.OneTermEqualSelector(client.PodHost, nodeName)) if err != nil { return remaining, err } if len(pods.Items) > 0 { nc.recordNodeEvent(nodeName, "DeletingAllPods", fmt.Sprintf("Deleting all Pods from Node %v.", nodeName)) } for _, pod := range pods.Items { // Defensive check, also needed for tests. if pod.Spec.NodeName != nodeName { continue } // if the pod has already been deleted, ignore it if pod.DeletionGracePeriodSeconds != nil { continue } glog.V(2).Infof("Starting deletion of pod %v", pod.Name) nc.recorder.Eventf(&pod, "NodeControllerEviction", "Marking for deletion Pod %s from Node %s", pod.Name, nodeName) if err := nc.kubeClient.Pods(pod.Namespace).Delete(pod.Name, nil); err != nil { return false, err } remaining = true } return remaining, nil }
// returns true if the provided node still has pods scheduled to it, or an error if // the server could not be contacted. func (nc *NodeController) hasPods(nodeName string) (bool, error) { pods, err := nc.kubeClient.Pods(api.NamespaceAll).List(labels.Everything(), fields.OneTermEqualSelector(client.PodHost, nodeName)) if err != nil { return false, err } return len(pods.Items) > 0, nil }
// terminatePods will ensure all pods on the given node that are in terminating state are eventually // cleaned up. Returns true if the node has no pods in terminating state, a duration that indicates how // long before we should check again (the next deadline for a pod to complete), or an error. func (nc *NodeController) terminatePods(nodeName string, since time.Time) (bool, time.Duration, error) { // the time before we should try again nextAttempt := time.Duration(0) // have we deleted all pods complete := true pods, err := nc.kubeClient.Pods(api.NamespaceAll).List(labels.Everything(), fields.OneTermEqualSelector(client.PodHost, nodeName)) if err != nil { return false, nextAttempt, err } now := time.Now() elapsed := now.Sub(since) for _, pod := range pods.Items { // Defensive check, also needed for tests. if pod.Spec.NodeName != nodeName { continue } // only clean terminated pods if pod.DeletionGracePeriodSeconds == nil { continue } // the user's requested grace period grace := time.Duration(*pod.DeletionGracePeriodSeconds) * time.Second if grace > nc.maximumGracePeriod { grace = nc.maximumGracePeriod } // the time remaining before the pod should have been deleted remaining := grace - elapsed if remaining < 0 { remaining = 0 glog.V(2).Infof("Removing pod %v after %s grace period", pod.Name, grace) nc.recordNodeEvent(nodeName, "TerminatingEvictedPod", fmt.Sprintf("Pod %s has exceeded the grace period for deletion after being evicted from Node %q and is being force killed", pod.Name, nodeName)) if err := nc.kubeClient.Pods(pod.Namespace).Delete(pod.Name, api.NewDeleteOptions(0)); err != nil { glog.Errorf("Error completing deletion of pod %s: %v", pod.Name, err) complete = false } } else { glog.V(2).Infof("Pod %v still terminating, requested grace period %s, %s remaining", pod.Name, grace, remaining) complete = false } if nextAttempt < remaining { nextAttempt = remaining } } return complete, nextAttempt, nil }
// Benchmark pod listing by waiting on `Tasks` listers to list `Pods` pods via `Workers`. func BenchmarkPodList(b *testing.B) { b.StopTimer() m := framework.NewMasterComponents(&framework.Config{nil, true, false, 250.0, 500}) defer m.Stop(true, true) numPods, numTasks, iter := getPods(b.N), getTasks(b.N), getIterations(b.N) podsPerNode := numPods / numTasks if podsPerNode < 1 { podsPerNode = 1 } glog.Infof("Starting benchmark: b.N %d, pods %d, workers %d, podsPerNode %d", b.N, numPods, numTasks, podsPerNode) startPodsOnNodes(numPods, numTasks, m.RestClient) // Stop the rc manager so it doesn't steal resources m.Stop(false, true) b.StartTimer() for i := 0; i < iter; i++ { framework.RunParallel(func(id int) error { host := fmt.Sprintf("host.%d", id) now := time.Now() defer func() { glog.V(3).Infof("Worker %d: Node %v listing pods took %v", id, host, time.Since(now)) }() if pods, err := m.RestClient.Pods(framework.TestNS).List( labels.Everything(), fields.OneTermEqualSelector(client.PodHost, host)); err != nil { return err } else if len(pods.Items) < podsPerNode { glog.Fatalf("List retrieved %d pods, which is less than %d", len(pods.Items), podsPerNode) } return nil }, numTasks, Workers) } b.StopTimer() }
// NewSourceApiserver creates a config source that watches and pulls from the apiserver. func NewSourceApiserver(c *client.Client, nodeName string, updates chan<- interface{}) { lw := cache.NewListWatchFromClient(c, "pods", api.NamespaceAll, fields.OneTermEqualSelector(client.PodHost, nodeName)) newSourceApiserverFromLW(lw, updates) }
// ListResource returns a function that handles retrieving a list of resources from a rest.Storage object. func ListResource(r rest.Lister, rw rest.Watcher, scope RequestScope, forceWatch bool, minRequestTimeout time.Duration) restful.RouteFunction { return func(req *restful.Request, res *restful.Response) { w := res.ResponseWriter namespace, err := scope.Namer.Namespace(req) if err != nil { errorJSON(err, scope.Codec, w) return } // Watches for single objects are routed to this function. // Treat a /name parameter the same as a field selector entry. hasName := true _, name, err := scope.Namer.Name(req) if err != nil { hasName = false } ctx := scope.ContextFunc(req) ctx = api.WithNamespace(ctx, namespace) out, err := queryToObject(req.Request.URL.Query(), scope, "ListOptions") if err != nil { errorJSON(err, scope.Codec, w) return } opts := *out.(*api.ListOptions) // transform fields // TODO: queryToObject should do this. fn := func(label, value string) (newLabel, newValue string, err error) { return scope.Convertor.ConvertFieldLabel(scope.APIVersion, scope.Kind, label, value) } if opts.FieldSelector, err = opts.FieldSelector.Transform(fn); err != nil { // TODO: allow bad request to set field causes based on query parameters err = errors.NewBadRequest(err.Error()) errorJSON(err, scope.Codec, w) return } if hasName { // metadata.name is the canonical internal name. // generic.SelectionPredicate will notice that this is // a request for a single object and optimize the // storage query accordingly. nameSelector := fields.OneTermEqualSelector("metadata.name", name) if opts.FieldSelector != nil && !opts.FieldSelector.Empty() { // It doesn't make sense to ask for both a name // and a field selector, since just the name is // sufficient to narrow down the request to a // single object. errorJSON( errors.NewBadRequest("both a name and a field selector provided; please provide one or the other."), scope.Codec, w, ) return } opts.FieldSelector = nameSelector } if (opts.Watch || forceWatch) && rw != nil { watcher, err := rw.Watch(ctx, opts.LabelSelector, opts.FieldSelector, opts.ResourceVersion) if err != nil { errorJSON(err, scope.Codec, w) return } serveWatch(watcher, scope, w, req, minRequestTimeout) return } result, err := r.List(ctx, opts.LabelSelector, opts.FieldSelector) if err != nil { errorJSON(err, scope.Codec, w) return } if err := setListSelfLink(result, req, scope.Namer); err != nil { errorJSON(err, scope.Codec, w) return } write(http.StatusOK, scope.APIVersion, scope.Codec, result, w, req.Request) } }
func (ks *KubeletExecutorServer) createAndInitKubelet( kc *app.KubeletConfig, hks hyperkube.Interface, clientConfig *client.Config, shutdownCloser io.Closer, ) (app.KubeletBootstrap, *kconfig.PodConfig, error) { // TODO(k8s): block until all sources have delivered at least one update to the channel, or break the sync loop // up into "per source" synchronizations // TODO(k8s): KubeletConfig.KubeClient should be a client interface, but client interface misses certain methods // used by kubelet. Since NewMainKubelet expects a client interface, we need to make sure we are not passing // a nil pointer to it when what we really want is a nil interface. var kubeClient client.Interface if kc.KubeClient == nil { kubeClient = nil } else { kubeClient = kc.KubeClient } gcPolicy := kubelet.ContainerGCPolicy{ MinAge: kc.MinimumGCAge, MaxPerPodContainer: kc.MaxPerPodContainerCount, MaxContainers: kc.MaxContainerCount, } pc := kconfig.NewPodConfig(kconfig.PodConfigNotificationIncremental, kc.Recorder) updates := pc.Channel(MESOS_CFG_SOURCE) klet, err := kubelet.NewMainKubelet( kc.Hostname, kc.NodeName, kc.DockerClient, kubeClient, kc.RootDirectory, kc.PodInfraContainerImage, kc.SyncFrequency, float32(kc.RegistryPullQPS), kc.RegistryBurst, kc.EventRecordQPS, kc.EventBurst, gcPolicy, pc.SeenAllSources, kc.RegisterNode, kc.StandaloneMode, kc.ClusterDomain, net.IP(kc.ClusterDNS), kc.MasterServiceNamespace, kc.VolumePlugins, kc.NetworkPlugins, kc.NetworkPluginName, kc.StreamingConnectionIdleTimeout, kc.Recorder, kc.CAdvisorInterface, kc.ImageGCPolicy, kc.DiskSpacePolicy, kc.Cloud, kc.NodeStatusUpdateFrequency, kc.ResourceContainer, kc.OSInterface, kc.CgroupRoot, kc.ContainerRuntime, kc.RktPath, kc.RktStage1Image, kc.Mounter, kc.Writer, kc.DockerDaemonContainer, kc.SystemContainer, kc.ConfigureCBR0, kc.PodCIDR, kc.MaxPods, kc.DockerExecHandler, kc.ResolverConfig, kc.CPUCFSQuota, &api.NodeDaemonEndpoints{ KubeletEndpoint: api.DaemonEndpoint{Port: int(kc.Port)}, }, true, // Serialize Image pulls ) if err != nil { return nil, nil, err } //TODO(jdef) either configure Watch here with something useful, or else // get rid of it from executor.Config kubeletFinished := make(chan struct{}) staticPodsConfigPath := filepath.Join(kc.RootDirectory, "static-pods") exec := executor.New(executor.Config{ Kubelet: klet, Updates: updates, SourceName: MESOS_CFG_SOURCE, APIClient: kc.KubeClient, Docker: kc.DockerClient, SuicideTimeout: ks.SuicideTimeout, KubeletFinished: kubeletFinished, ShutdownAlert: func() { if shutdownCloser != nil { if e := shutdownCloser.Close(); e != nil { log.Warningf("failed to signal shutdown to external watcher: %v", e) } } }, ExitFunc: os.Exit, PodStatusFunc: func(_ executor.KubeletInterface, pod *api.Pod) (*api.PodStatus, error) { return klet.GetRuntime().GetPodStatus(pod) }, StaticPodsConfigPath: staticPodsConfigPath, PodLW: cache.NewListWatchFromClient(kc.KubeClient, "pods", api.NamespaceAll, fields.OneTermEqualSelector(client.PodHost, kc.NodeName)), }) go exec.InitializeStaticPodsSource(func() { // Create file source only when we are called back. Otherwise, it is never marked unseen. fileSourceUpdates := pc.Channel(kubelet.FileSource) kconfig.NewSourceFile(staticPodsConfigPath, kc.Hostname, kc.FileCheckFrequency, fileSourceUpdates) }) k := &kubeletExecutor{ Kubelet: klet, address: ks.Address, dockerClient: kc.DockerClient, hks: hks, kubeletFinished: kubeletFinished, executorDone: exec.Done(), clientConfig: clientConfig, } dconfig := bindings.DriverConfig{ Executor: exec, HostnameOverride: ks.HostnameOverride, BindingAddress: ks.Address, } if driver, err := bindings.NewMesosExecutorDriver(dconfig); err != nil { log.Fatalf("failed to create executor driver: %v", err) } else { k.driver = driver } log.V(2).Infof("Initialize executor driver...") k.BirthCry() exec.Init(k.driver) k.StartGarbageCollection() return k, pc, nil }
// rebootNode takes node name on provider through the following steps using c: // - ensures the node is ready // - ensures all pods on the node are running and ready // - reboots the node (by executing rebootCmd over ssh) // - ensures the node reaches some non-ready state // - ensures the node becomes ready again // - ensures all pods on the node become running and ready again // // It returns true through result only if all of the steps pass; at the first // failed step, it will return false through result and not run the rest. func rebootNode(c *client.Client, provider, name, rebootCmd string, result chan bool) { // Setup ns := api.NamespaceSystem ps := newPodStore(c, ns, labels.Everything(), fields.OneTermEqualSelector(client.PodHost, name)) defer ps.Stop() // Get the node initially. Logf("Getting %s", name) node, err := c.Nodes().Get(name) if err != nil { Logf("Couldn't get node %s", name) result <- false return } // Node sanity check: ensure it is "ready". if !waitForNodeToBeReady(c, name, nodeReadyInitialTimeout) { result <- false return } // Get all the pods on the node that don't have liveness probe set. // Liveness probe may cause restart of a pod during node reboot, and the pod may not be running. pods := ps.List() podNames := []string{} for _, p := range pods { probe := false for _, c := range p.Spec.Containers { if c.LivenessProbe != nil { probe = true break } } if !probe { podNames = append(podNames, p.ObjectMeta.Name) } } Logf("Node %s has %d pods: %v", name, len(podNames), podNames) // For each pod, we do a sanity check to ensure it's running / healthy // now, as that's what we'll be checking later. if !checkPodsRunningReady(c, ns, podNames, podReadyBeforeTimeout) { result <- false return } // Reboot the node. if err = issueSSHCommand(node, provider, rebootCmd); err != nil { Logf("Error while issuing ssh command: %v", err) result <- false return } // Wait for some kind of "not ready" status. if !waitForNodeToBeNotReady(c, name, rebootNodeNotReadyTimeout) { result <- false return } // Wait for some kind of "ready" status. if !waitForNodeToBeReady(c, name, rebootNodeReadyAgainTimeout) { result <- false return } // Ensure all of the pods that we found on this node before the reboot are // running / healthy. if !checkPodsRunningReady(c, ns, podNames, rebootPodReadyAgainTimeout) { result <- false return } Logf("Reboot successful on node %s", name) result <- true }