// Scale updates a ReplicationController to a new size, with optional precondition check (if preconditions is not nil), // optional retries (if retry is not nil), and then optionally waits for it's replica count to reach the new value // (if wait is not nil). func (scaler *ReplicationControllerScaler) Scale(namespace, name string, newSize uint, preconditions *ScalePrecondition, retry, waitForReplicas *RetryParams) error { if preconditions == nil { preconditions = &ScalePrecondition{-1, ""} } if retry == nil { // Make it try only once, immediately retry = &RetryParams{Interval: time.Millisecond, Timeout: time.Millisecond} } var updatedResourceVersion string cond := ScaleCondition(scaler, preconditions, namespace, name, newSize, &updatedResourceVersion) if err := wait.PollImmediate(retry.Interval, retry.Timeout, cond); err != nil { return err } if waitForReplicas != nil { checkRC := func(rc *api.ReplicationController) bool { if uint(rc.Spec.Replicas) != newSize { // the size is changed by other party. Don't need to wait for the new change to complete. return true } return rc.Status.ObservedGeneration >= rc.Generation && rc.Status.Replicas == rc.Spec.Replicas } // If number of replicas doesn't change, then the update may not event // be sent to underlying databse (we don't send no-op changes). // In such case, <updatedResourceVersion> will have value of the most // recent update (which may be far in the past) so we may get "too old // RV" error from watch or potentially no ReplicationController events // will be deliver, since it may already be in the expected state. // To protect from these two, we first issue Get() to ensure that we // are not already in the expected state. currentRC, err := scaler.c.ReplicationControllers(namespace).Get(name, metav1.GetOptions{}) if err != nil { return err } if !checkRC(currentRC) { watchOptions := api.ListOptions{ FieldSelector: fields.OneTermEqualSelector("metadata.name", name), ResourceVersion: updatedResourceVersion, } watcher, err := scaler.c.ReplicationControllers(namespace).Watch(watchOptions) if err != nil { return err } _, err = watch.Until(waitForReplicas.Timeout, watcher, func(event watch.Event) (bool, error) { if event.Type != watch.Added && event.Type != watch.Modified { return false, nil } return checkRC(event.Object.(*api.ReplicationController)), nil }) if err == wait.ErrWaitTimeout { return fmt.Errorf("timed out waiting for %q to be synced", name) } return err } } return nil }
// Returns true if a node update matching the predicate was emitted from the // system after performing the supplied action. func observeNodeUpdateAfterAction(f *framework.Framework, nodeName string, nodePredicate func(*v1.Node) bool, action func() error) (bool, error) { observedMatchingNode := false nodeSelector := fields.OneTermEqualSelector("metadata.name", nodeName) informerStartedChan := make(chan struct{}) var informerStartedGuard sync.Once _, controller := cache.NewInformer( &cache.ListWatch{ ListFunc: func(options v1.ListOptions) (runtime.Object, error) { options.FieldSelector = nodeSelector.String() ls, err := f.ClientSet.Core().Nodes().List(options) return ls, err }, WatchFunc: func(options v1.ListOptions) (watch.Interface, error) { options.FieldSelector = nodeSelector.String() w, err := f.ClientSet.Core().Nodes().Watch(options) // Signal parent goroutine that watching has begun. informerStartedGuard.Do(func() { close(informerStartedChan) }) return w, err }, }, &v1.Node{}, 0, cache.ResourceEventHandlerFuncs{ UpdateFunc: func(oldObj, newObj interface{}) { n, ok := newObj.(*v1.Node) Expect(ok).To(Equal(true)) if nodePredicate(n) { observedMatchingNode = true } }, }, ) // Start the informer and block this goroutine waiting for the started signal. informerStopChan := make(chan struct{}) defer func() { close(informerStopChan) }() go controller.Run(informerStopChan) <-informerStartedChan // Invoke the action function. err := action() if err != nil { return false, err } // Poll whether the informer has found a matching node update with a timeout. // Wait up 2 minutes polling every second. timeout := 2 * time.Minute interval := 1 * time.Second err = wait.Poll(interval, timeout, func() (bool, error) { return observedMatchingNode, nil }) return err == nil, err }
// deletePods will delete all pods from master running on given node, and return true // if any pods were deleted, or were found pending deletion. func deletePods(kubeClient clientset.Interface, recorder record.EventRecorder, nodeName, nodeUID string, daemonStore cache.StoreToDaemonSetLister) (bool, error) { remaining := false selector := fields.OneTermEqualSelector(api.PodHostField, nodeName).String() options := v1.ListOptions{FieldSelector: selector} pods, err := kubeClient.Core().Pods(v1.NamespaceAll).List(options) var updateErrList []error if err != nil { return remaining, err } if len(pods.Items) > 0 { recordNodeEvent(recorder, nodeName, nodeUID, v1.EventTypeNormal, "DeletingAllPods", fmt.Sprintf("Deleting all Pods from Node %v.", nodeName)) } for _, pod := range pods.Items { // Defensive check, also needed for tests. if pod.Spec.NodeName != nodeName { continue } // Set reason and message in the pod object. if _, err = setPodTerminationReason(kubeClient, &pod, nodeName); err != nil { if errors.IsConflict(err) { updateErrList = append(updateErrList, fmt.Errorf("update status failed for pod %q: %v", format.Pod(&pod), err)) continue } } // if the pod has already been marked for deletion, we still return true that there are remaining pods. if pod.DeletionGracePeriodSeconds != nil { remaining = true continue } // if the pod is managed by a daemonset, ignore it _, err := daemonStore.GetPodDaemonSets(&pod) if err == nil { // No error means at least one daemonset was found continue } glog.V(2).Infof("Starting deletion of pod %v", pod.Name) recorder.Eventf(&pod, v1.EventTypeNormal, "NodeControllerEviction", "Marking for deletion Pod %s from Node %s", pod.Name, nodeName) if err := kubeClient.Core().Pods(pod.Namespace).Delete(pod.Name, nil); err != nil { return false, err } remaining = true } if len(updateErrList) > 0 { return false, utilerrors.NewAggregate(updateErrList) } return remaining, nil }
// Benchmark pod listing by waiting on `Tasks` listers to list `Pods` pods via `Workers`. func BenchmarkPodList(b *testing.B) { b.StopTimer() m := framework.NewMasterComponents(&framework.Config{nil, true, false, 250.0, 500}) defer m.Stop(true, true) ns := framework.CreateTestingNamespace("benchmark-pod-list", s, t) defer framework.DeleteTestingNamespace(ns, s, t) numPods, numTasks, iter := getPods(b.N), getTasks(b.N), getIterations(b.N) podsPerNode := numPods / numTasks if podsPerNode < 1 { podsPerNode = 1 } glog.Infof("Starting benchmark: b.N %d, pods %d, workers %d, podsPerNode %d", b.N, numPods, numTasks, podsPerNode) startPodsOnNodes(ns.Name, numPods, numTasks, m.RestClient) // Stop the rc manager so it doesn't steal resources m.Stop(false, true) b.StartTimer() for i := 0; i < iter; i++ { framework.RunParallel(func(id int) error { host := fmt.Sprintf("host.%d", id) now := time.Now() defer func() { glog.V(3).Infof("Worker %d: Node %v listing pods took %v", id, host, time.Since(now)) }() if pods, err := m.ClientSet.Core().Pods(ns.Name).List(api.ListOptions{ LabelSelector: labels.Everything(), FieldSelector: fields.OneTermEqualSelector(api.PodHostField, host), }); err != nil { return err } else if len(pods.Items) < podsPerNode { glog.Fatalf("List retrieved %d pods, which is less than %d", len(pods.Items), podsPerNode) } return nil }, numTasks, Workers) } b.StopTimer() }
// update ready status of all pods running on given node from master // return true if success func markAllPodsNotReady(kubeClient clientset.Interface, node *v1.Node) error { // Don't set pods to NotReady if the kubelet is running a version that // doesn't understand how to correct readiness. // TODO: Remove this check when we no longer guarantee backward compatibility // with node versions < 1.2.0. if nodeRunningOutdatedKubelet(node) { return nil } nodeName := node.Name glog.V(2).Infof("Update ready status of pods on node [%v]", nodeName) opts := v1.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, nodeName).String()} pods, err := kubeClient.Core().Pods(v1.NamespaceAll).List(opts) if err != nil { return err } errMsg := []string{} for _, pod := range pods.Items { // Defensive check, also needed for tests. if pod.Spec.NodeName != nodeName { continue } for i, cond := range pod.Status.Conditions { if cond.Type == v1.PodReady { pod.Status.Conditions[i].Status = v1.ConditionFalse glog.V(2).Infof("Updating ready status of pod %v to false", pod.Name) _, err := kubeClient.Core().Pods(pod.Namespace).UpdateStatus(&pod) if err != nil { glog.Warningf("Failed to update status for pod %q: %v", format.Pod(&pod), err) errMsg = append(errMsg, fmt.Sprintf("%v", err)) } break } } } if len(errMsg) == 0 { return nil } return fmt.Errorf("%v", strings.Join(errMsg, "; ")) }
// SingleObject returns a ListOptions for watching a single object. func SingleObject(meta metav1.ObjectMeta) ListOptions { return ListOptions{ FieldSelector: fields.OneTermEqualSelector("metadata.name", meta.Name), ResourceVersion: meta.ResourceVersion, } }
// Expect to observe: // 1. Node is marked NotReady after timeout by nodecontroller (40seconds) // 2. All pods on node are marked NotReady shortly after #1 // 3. Node and pods return to Ready after connectivivty recovers It("All pods on the unreachable node should be marked as NotReady upon the node turn NotReady "+ "AND all pods should be mark back to Ready when the node get back to Ready before pod eviction timeout", func() { By("choose a node - we will block all network traffic on this node") var podOpts v1.ListOptions nodeOpts := v1.ListOptions{} nodes, err := c.Core().Nodes().List(nodeOpts) Expect(err).NotTo(HaveOccurred()) framework.FilterNodes(nodes, func(node v1.Node) bool { if !framework.IsNodeConditionSetAsExpected(&node, v1.NodeReady, true) { return false } podOpts = v1.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, node.Name).String()} pods, err := c.Core().Pods(v1.NamespaceAll).List(podOpts) if err != nil || len(pods.Items) <= 0 { return false } return true }) if len(nodes.Items) <= 0 { framework.Failf("No eligible node were found: %d", len(nodes.Items)) } node := nodes.Items[0] podOpts = v1.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, node.Name).String()} if err = framework.WaitForMatchPodsCondition(c, podOpts, "Running and Ready", podReadyTimeout, testutils.PodRunningReady); err != nil { framework.Failf("Pods on node %s are not ready and running within %v: %v", node.Name, podReadyTimeout, err) }
// ListResource returns a function that handles retrieving a list of resources from a rest.Storage object. func ListResource(r rest.Lister, rw rest.Watcher, scope RequestScope, forceWatch bool, minRequestTimeout time.Duration) restful.RouteFunction { return func(req *restful.Request, res *restful.Response) { // For performance tracking purposes. trace := util.NewTrace("List " + req.Request.URL.Path) w := res.ResponseWriter namespace, err := scope.Namer.Namespace(req) if err != nil { scope.err(err, res.ResponseWriter, req.Request) return } // Watches for single objects are routed to this function. // Treat a /name parameter the same as a field selector entry. hasName := true _, name, err := scope.Namer.Name(req) if err != nil { hasName = false } ctx := scope.ContextFunc(req) ctx = request.WithNamespace(ctx, namespace) opts := api.ListOptions{} if err := scope.ParameterCodec.DecodeParameters(req.Request.URL.Query(), scope.Kind.GroupVersion(), &opts); err != nil { scope.err(err, res.ResponseWriter, req.Request) return } // transform fields // TODO: DecodeParametersInto should do this. if opts.FieldSelector != nil { fn := func(label, value string) (newLabel, newValue string, err error) { return scope.Convertor.ConvertFieldLabel(scope.Kind.GroupVersion().String(), scope.Kind.Kind, label, value) } if opts.FieldSelector, err = opts.FieldSelector.Transform(fn); err != nil { // TODO: allow bad request to set field causes based on query parameters err = errors.NewBadRequest(err.Error()) scope.err(err, res.ResponseWriter, req.Request) return } } if hasName { // metadata.name is the canonical internal name. // SelectionPredicate will notice that this is // a request for a single object and optimize the // storage query accordingly. nameSelector := fields.OneTermEqualSelector("metadata.name", name) if opts.FieldSelector != nil && !opts.FieldSelector.Empty() { // It doesn't make sense to ask for both a name // and a field selector, since just the name is // sufficient to narrow down the request to a // single object. scope.err(errors.NewBadRequest("both a name and a field selector provided; please provide one or the other."), res.ResponseWriter, req.Request) return } opts.FieldSelector = nameSelector } if (opts.Watch || forceWatch) && rw != nil { watcher, err := rw.Watch(ctx, &opts) if err != nil { scope.err(err, res.ResponseWriter, req.Request) return } // TODO: Currently we explicitly ignore ?timeout= and use only ?timeoutSeconds=. timeout := time.Duration(0) if opts.TimeoutSeconds != nil { timeout = time.Duration(*opts.TimeoutSeconds) * time.Second } if timeout == 0 && minRequestTimeout > 0 { timeout = time.Duration(float64(minRequestTimeout) * (rand.Float64() + 1.0)) } serveWatch(watcher, scope, req, res, timeout) return } // Log only long List requests (ignore Watch). defer trace.LogIfLong(500 * time.Millisecond) trace.Step("About to List from storage") result, err := r.List(ctx, &opts) if err != nil { scope.err(err, res.ResponseWriter, req.Request) return } trace.Step("Listing from storage done") numberOfItems, err := setListSelfLink(result, req, scope.Namer) if err != nil { scope.err(err, res.ResponseWriter, req.Request) return } trace.Step("Self-linking done") // Ensure empty lists return a non-nil items slice if numberOfItems == 0 && meta.IsListType(result) { if err := meta.SetList(result, []runtime.Object{}); err != nil { scope.err(err, res.ResponseWriter, req.Request) return } } responsewriters.WriteObject(http.StatusOK, scope.Kind.GroupVersion(), scope.Serializer, result, w, req.Request) trace.Step(fmt.Sprintf("Writing http response done (%d items)", numberOfItems)) } }
// rebootNode takes node name on provider through the following steps using c: // - ensures the node is ready // - ensures all pods on the node are running and ready // - reboots the node (by executing rebootCmd over ssh) // - ensures the node reaches some non-ready state // - ensures the node becomes ready again // - ensures all pods on the node become running and ready again // // It returns true through result only if all of the steps pass; at the first // failed step, it will return false through result and not run the rest. func rebootNode(c clientset.Interface, provider, name, rebootCmd string) bool { // Setup ns := api.NamespaceSystem ps := testutils.NewPodStore(c, ns, labels.Everything(), fields.OneTermEqualSelector(api.PodHostField, name)) defer ps.Stop() // Get the node initially. framework.Logf("Getting %s", name) node, err := c.Core().Nodes().Get(name, metav1.GetOptions{}) if err != nil { framework.Logf("Couldn't get node %s", name) return false } // Node sanity check: ensure it is "ready". if !framework.WaitForNodeToBeReady(c, name, framework.NodeReadyInitialTimeout) { return false } // Get all the pods on the node that don't have liveness probe set. // Liveness probe may cause restart of a pod during node reboot, and the pod may not be running. pods := ps.List() podNames := []string{} for _, p := range pods { probe := false for _, c := range p.Spec.Containers { if c.LivenessProbe != nil { probe = true break } } if !probe { podNames = append(podNames, p.ObjectMeta.Name) } } framework.Logf("Node %s has %d assigned pods with no liveness probes: %v", name, len(podNames), podNames) // For each pod, we do a sanity check to ensure it's running / healthy // or succeeded now, as that's what we'll be checking later. if !framework.CheckPodsRunningReadyOrSucceeded(c, ns, podNames, framework.PodReadyBeforeTimeout) { printStatusAndLogsForNotReadyPods(c, ns, podNames, pods) return false } // Reboot the node. if err = framework.IssueSSHCommand(rebootCmd, provider, node); err != nil { framework.Logf("Error while issuing ssh command: %v", err) return false } // Wait for some kind of "not ready" status. if !framework.WaitForNodeToBeNotReady(c, name, rebootNodeNotReadyTimeout) { return false } // Wait for some kind of "ready" status. if !framework.WaitForNodeToBeReady(c, name, rebootNodeReadyAgainTimeout) { return false } // Ensure all of the pods that we found on this node before the reboot are // running / healthy, or succeeded. if !framework.CheckPodsRunningReadyOrSucceeded(c, ns, podNames, rebootPodReadyAgainTimeout) { newPods := ps.List() printStatusAndLogsForNotReadyPods(c, ns, podNames, newPods) return false } framework.Logf("Reboot successful on node %s", name) return true }
// NewSourceApiserver creates a config source that watches and pulls from the apiserver. func NewSourceApiserver(c *clientset.Clientset, nodeName types.NodeName, updates chan<- interface{}) { lw := cache.NewListWatchFromClient(c.Core().RESTClient(), "pods", v1.NamespaceAll, fields.OneTermEqualSelector(api.PodHostField, string(nodeName))) newSourceApiserverFromLW(lw, updates) }
// RequestNodeCertificate will create a certificate signing request and send it to API server, // then it will watch the object's status, once approved by API server, it will return the API // server's issued certificate (pem-encoded). If there is any errors, or the watch timeouts, // it will return an error. This is intended for use on nodes (kubelet and kubeadm). func RequestNodeCertificate(client certificatesclient.CertificateSigningRequestInterface, privateKeyData []byte, nodeName types.NodeName) (certData []byte, err error) { subject := &pkix.Name{ Organization: []string{"system:nodes"}, CommonName: fmt.Sprintf("system:node:%s", nodeName), } privateKey, err := certutil.ParsePrivateKeyPEM(privateKeyData) if err != nil { return nil, fmt.Errorf("invalid private key for certificate request: %v", err) } csr, err := certutil.MakeCSR(privateKey, subject, nil, nil) if err != nil { return nil, fmt.Errorf("unable to generate certificate request: %v", err) } req, err := client.Create(&certificates.CertificateSigningRequest{ // Username, UID, Groups will be injected by API server. TypeMeta: metav1.TypeMeta{Kind: "CertificateSigningRequest"}, ObjectMeta: metav1.ObjectMeta{GenerateName: "csr-"}, Spec: certificates.CertificateSigningRequestSpec{ Request: csr, Usages: []certificates.KeyUsage{ certificates.UsageDigitalSignature, certificates.UsageKeyEncipherment, certificates.UsageClientAuth, }, }, }) if err != nil { return nil, fmt.Errorf("cannot create certificate signing request: %v", err) } // Make a default timeout = 3600s. var defaultTimeoutSeconds int64 = 3600 resultCh, err := client.Watch(v1.ListOptions{ Watch: true, TimeoutSeconds: &defaultTimeoutSeconds, FieldSelector: fields.OneTermEqualSelector("metadata.name", req.Name).String(), }) if err != nil { return nil, fmt.Errorf("cannot watch on the certificate signing request: %v", err) } var status certificates.CertificateSigningRequestStatus ch := resultCh.ResultChan() for { event, ok := <-ch if !ok { break } if event.Type == watch.Modified || event.Type == watch.Added { if event.Object.(*certificates.CertificateSigningRequest).UID != req.UID { continue } status = event.Object.(*certificates.CertificateSigningRequest).Status for _, c := range status.Conditions { if c.Type == certificates.CertificateDenied { return nil, fmt.Errorf("certificate signing request is not approved, reason: %v, message: %v", c.Reason, c.Message) } if c.Type == certificates.CertificateApproved && status.Certificate != nil { return status.Certificate, nil } } } } return nil, fmt.Errorf("watch channel closed") }