// update ready status of all pods running on given node from master // return true if success func (nc *NodeController) markAllPodsNotReady(nodeName string) error { glog.V(2).Infof("Update ready status of pods on node [%v]", nodeName) opts := api.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, nodeName)} pods, err := nc.kubeClient.Core().Pods(api.NamespaceAll).List(opts) if err != nil { return err } errMsg := []string{} for _, pod := range pods.Items { // Defensive check, also needed for tests. if pod.Spec.NodeName != nodeName { continue } for i, cond := range pod.Status.Conditions { if cond.Type == api.PodReady { pod.Status.Conditions[i].Status = api.ConditionFalse glog.V(2).Infof("Updating ready status of pod %v to false", pod.Name) _, err := nc.kubeClient.Core().Pods(pod.Namespace).UpdateStatus(&pod) if err != nil { glog.Warningf("Failed to update status for pod %q: %v", format.Pod(&pod), err) errMsg = append(errMsg, fmt.Sprintf("%v", err)) } break } } } if len(errMsg) == 0 { return nil } return fmt.Errorf("%v", strings.Join(errMsg, "; ")) }
// terminatePods will ensure all pods on the given node that are in terminating state are eventually // cleaned up. Returns true if the node has no pods in terminating state, a duration that indicates how // long before we should check again (the next deadline for a pod to complete), or an error. func (nc *NodeController) terminatePods(nodeName string, since time.Time) (bool, time.Duration, error) { // the time before we should try again nextAttempt := time.Duration(0) // have we deleted all pods complete := true selector := fields.OneTermEqualSelector(api.PodHostField, nodeName) options := api.ListOptions{FieldSelector: selector} pods, err := nc.kubeClient.Core().Pods(api.NamespaceAll).List(options) if err != nil { return false, nextAttempt, err } now := time.Now() elapsed := now.Sub(since) for _, pod := range pods.Items { // Defensive check, also needed for tests. if pod.Spec.NodeName != nodeName { continue } // only clean terminated pods if pod.DeletionGracePeriodSeconds == nil { continue } // the user's requested grace period grace := time.Duration(*pod.DeletionGracePeriodSeconds) * time.Second if grace > nc.maximumGracePeriod { grace = nc.maximumGracePeriod } // the time remaining before the pod should have been deleted remaining := grace - elapsed if remaining < 0 { remaining = 0 glog.V(2).Infof("Removing pod %v after %s grace period", pod.Name, grace) nc.recordNodeEvent(nodeName, api.EventTypeNormal, "TerminatingEvictedPod", fmt.Sprintf("Pod %s has exceeded the grace period for deletion after being evicted from Node %q and is being force killed", pod.Name, nodeName)) if err := nc.kubeClient.Core().Pods(pod.Namespace).Delete(pod.Name, api.NewDeleteOptions(0)); err != nil { glog.Errorf("Error completing deletion of pod %s: %v", pod.Name, err) complete = false } } else { glog.V(2).Infof("Pod %v still terminating, requested grace period %s, %s remaining", pod.Name, grace, remaining) complete = false } if nextAttempt < remaining { nextAttempt = remaining } } return complete, nextAttempt, nil }
// forcefullyDeleteNode immediately deletes all pods on the node, and then // deletes the node itself. func (nc *NodeController) forcefullyDeleteNode(nodeName string) error { selector := fields.OneTermEqualSelector(api.PodHostField, nodeName) options := api.ListOptions{FieldSelector: selector} pods, err := nc.kubeClient.Core().Pods(api.NamespaceAll).List(options) if err != nil { return fmt.Errorf("unable to list pods on node %q: %v", nodeName, err) } for _, pod := range pods.Items { if pod.Spec.NodeName != nodeName { continue } if err := nc.forcefullyDeletePod(&pod); err != nil { return fmt.Errorf("unable to delete pod %q on node %q: %v", pod.Name, nodeName, err) } } if err := nc.kubeClient.Core().Nodes().Delete(nodeName, nil); err != nil { return fmt.Errorf("unable to delete node %q: %v", nodeName, err) } return nil }
// deletePods will delete all pods from master running on given node, and return true // if any pods were deleted. func (nc *NodeController) deletePods(nodeName string) (bool, error) { remaining := false selector := fields.OneTermEqualSelector(api.PodHostField, nodeName) options := api.ListOptions{FieldSelector: selector} pods, err := nc.kubeClient.Core().Pods(api.NamespaceAll).List(options) if err != nil { return remaining, err } if len(pods.Items) > 0 { nc.recordNodeEvent(nodeName, api.EventTypeNormal, "DeletingAllPods", fmt.Sprintf("Deleting all Pods from Node %v.", nodeName)) } for _, pod := range pods.Items { // Defensive check, also needed for tests. if pod.Spec.NodeName != nodeName { continue } // if the pod has already been deleted, ignore it if pod.DeletionGracePeriodSeconds != nil { continue } // if the pod is managed by a daemonset, ignore it _, err := nc.daemonSetStore.GetPodDaemonSets(&pod) if err == nil { // No error means at least one daemonset was found continue } glog.V(2).Infof("Starting deletion of pod %v", pod.Name) nc.recorder.Eventf(&pod, api.EventTypeNormal, "NodeControllerEviction", "Marking for deletion Pod %s from Node %s", pod.Name, nodeName) if err := nc.kubeClient.Core().Pods(pod.Namespace).Delete(pod.Name, nil); err != nil { return false, err } remaining = true } return remaining, nil }
// SingleObject returns a ListOptions for watching a single object. func SingleObject(meta ObjectMeta) ListOptions { return ListOptions{ FieldSelector: fields.OneTermEqualSelector("metadata.name", meta.Name), ResourceVersion: meta.ResourceVersion, } }
// ListResource returns a function that handles retrieving a list of resources from a rest.Storage object. func ListResource(r rest.Lister, rw rest.Watcher, scope RequestScope, forceWatch bool, minRequestTimeout time.Duration) restful.RouteFunction { return func(req *restful.Request, res *restful.Response) { // For performance tracking purposes. trace := util.NewTrace("List " + req.Request.URL.Path) w := res.ResponseWriter namespace, err := scope.Namer.Namespace(req) if err != nil { scope.err(err, res.ResponseWriter, req.Request) return } // Watches for single objects are routed to this function. // Treat a /name parameter the same as a field selector entry. hasName := true _, name, err := scope.Namer.Name(req) if err != nil { hasName = false } ctx := scope.ContextFunc(req) ctx = api.WithNamespace(ctx, namespace) opts := api.ListOptions{} if err := scope.ParameterCodec.DecodeParameters(req.Request.URL.Query(), scope.Kind.GroupVersion(), &opts); err != nil { scope.err(err, res.ResponseWriter, req.Request) return } // transform fields // TODO: DecodeParametersInto should do this. if opts.FieldSelector != nil { fn := func(label, value string) (newLabel, newValue string, err error) { return scope.Convertor.ConvertFieldLabel(scope.Kind.GroupVersion().String(), scope.Kind.Kind, label, value) } if opts.FieldSelector, err = opts.FieldSelector.Transform(fn); err != nil { // TODO: allow bad request to set field causes based on query parameters err = errors.NewBadRequest(err.Error()) scope.err(err, res.ResponseWriter, req.Request) return } } if hasName { // metadata.name is the canonical internal name. // generic.SelectionPredicate will notice that this is // a request for a single object and optimize the // storage query accordingly. nameSelector := fields.OneTermEqualSelector("metadata.name", name) if opts.FieldSelector != nil && !opts.FieldSelector.Empty() { // It doesn't make sense to ask for both a name // and a field selector, since just the name is // sufficient to narrow down the request to a // single object. scope.err(errors.NewBadRequest("both a name and a field selector provided; please provide one or the other."), res.ResponseWriter, req.Request) return } opts.FieldSelector = nameSelector } if (opts.Watch || forceWatch) && rw != nil { watcher, err := rw.Watch(ctx, &opts) if err != nil { scope.err(err, res.ResponseWriter, req.Request) return } // TODO: Currently we explicitly ignore ?timeout= and use only ?timeoutSeconds=. timeout := time.Duration(0) if opts.TimeoutSeconds != nil { timeout = time.Duration(*opts.TimeoutSeconds) * time.Second } if timeout == 0 && minRequestTimeout > 0 { timeout = time.Duration(float64(minRequestTimeout) * (rand.Float64() + 1.0)) } serveWatch(watcher, scope, req, res, timeout) return } // Log only long List requests (ignore Watch). defer trace.LogIfLong(500 * time.Millisecond) trace.Step("About to List from storage") result, err := r.List(ctx, &opts) if err != nil { scope.err(err, res.ResponseWriter, req.Request) return } trace.Step("Listing from storage done") numberOfItems, err := setListSelfLink(result, req, scope.Namer) if err != nil { scope.err(err, res.ResponseWriter, req.Request) return } trace.Step("Self-linking done") write(http.StatusOK, scope.Kind.GroupVersion(), scope.Serializer, result, w, req.Request) trace.Step(fmt.Sprintf("Writing http response done (%d items)", numberOfItems)) } }