// Because openshift-sdn uses an overlay and doesn't need GCE Routes, we need to // clear the NetworkUnavailable condition that kubelet adds to initial node // status when using GCE. // TODO: make upstream kubelet more flexible with overlays and GCE so this // condition doesn't get added for network plugins that don't want it, and then // we can remove this function. func (master *OsdnMaster) clearInitialNodeNetworkUnavailableCondition(node *kapi.Node) { knode := node cleared := false resultErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { var err error if knode != node { knode, err = master.kClient.Nodes().Get(node.ObjectMeta.Name) if err != nil { return err } } // Let caller modify knode's status, then push to api server. _, condition := kapi.GetNodeCondition(&node.Status, kapi.NodeNetworkUnavailable) if condition != nil && condition.Status != kapi.ConditionFalse && condition.Reason == "NoRouteCreated" { condition.Status = kapi.ConditionFalse condition.Reason = "RouteCreated" condition.Message = "openshift-sdn cleared kubelet-set NoRouteCreated" condition.LastTransitionTime = kapiunversioned.Now() knode, err = master.kClient.Nodes().UpdateStatus(knode) if err == nil { cleared = true } } return err }) if resultErr != nil { utilruntime.HandleError(fmt.Errorf("Status update failed for local node: %v", resultErr)) } else if cleared { log.Infof("Cleared node NetworkUnavailable/NoRouteCreated condition for %s", node.ObjectMeta.Name) } }
// updateRcWithRetries retries updating the given rc on conflict with the following steps: // 1. Get latest resource // 2. applyUpdate // 3. Update the resource func updateRcWithRetries(rcClient coreclient.ReplicationControllersGetter, namespace string, rc *api.ReplicationController, applyUpdate updateRcFunc) (*api.ReplicationController, error) { // Deep copy the rc in case we failed on Get during retry loop obj, err := api.Scheme.Copy(rc) if err != nil { return nil, fmt.Errorf("failed to deep copy rc before updating it: %v", err) } oldRc := obj.(*api.ReplicationController) err = retry.RetryOnConflict(retry.DefaultBackoff, func() (e error) { // Apply the update, then attempt to push it to the apiserver. applyUpdate(rc) if rc, e = rcClient.ReplicationControllers(namespace).Update(rc); e == nil { // rc contains the latest controller post update return } updateErr := e // Update the controller with the latest resource version, if the update failed we // can't trust rc so use oldRc.Name. if rc, e = rcClient.ReplicationControllers(namespace).Get(oldRc.Name); e != nil { // The Get failed: Value in rc cannot be trusted. rc = oldRc } // Only return the error from update return updateErr }) // If the error is non-nil the returned controller cannot be trusted, if it is nil, the returned // controller contains the applied update. return rc, err }
// UpdateRSWithRetries updates a RS with given applyUpdate function. Note that RS not found error is ignored. // The returned bool value can be used to tell if the RS is actually updated. func UpdateRSWithRetries(rsClient unversionedextensions.ReplicaSetInterface, rsLister *cache.StoreToReplicaSetLister, namespace, name string, applyUpdate updateRSFunc) (*extensions.ReplicaSet, error) { var rs *extensions.ReplicaSet retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { var err error rs, err = rsLister.ReplicaSets(namespace).Get(name) if err != nil { return err } obj, deepCopyErr := api.Scheme.DeepCopy(rs) if deepCopyErr != nil { return deepCopyErr } rs = obj.(*extensions.ReplicaSet) // Apply the update, then attempt to push it to the apiserver. if applyErr := applyUpdate(rs); applyErr != nil { return applyErr } rs, err = rsClient.Update(rs) return err }) // Ignore the precondition violated error, but the RS isn't updated. if retryErr == errorsutil.ErrPreconditionViolated { glog.V(4).Infof("Replica set %s/%s precondition doesn't hold, skip updating it.", namespace, name) retryErr = nil } return rs, retryErr }
// UpdatePodWithRetries updates a pod with given applyUpdate function. Note that pod not found error is ignored. // The returned bool value can be used to tell if the pod is actually updated. func UpdatePodWithRetries(podClient v1core.PodInterface, podLister *cache.StoreToPodLister, namespace, name string, applyUpdate updatePodFunc) (*v1.Pod, error) { var pod *v1.Pod retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { var err error pod, err = podLister.Pods(namespace).Get(name) if err != nil { return err } obj, deepCopyErr := api.Scheme.DeepCopy(pod) if deepCopyErr != nil { return deepCopyErr } pod = obj.(*v1.Pod) // Apply the update, then attempt to push it to the apiserver. if applyErr := applyUpdate(pod); applyErr != nil { return applyErr } pod, err = podClient.Update(pod) return err }) // Ignore the precondition violated error, this pod is already updated // with the desired label. if retryErr == errorsutil.ErrPreconditionViolated { glog.V(4).Infof("Pod %s/%s precondition doesn't hold, skip updating it.", namespace, name) retryErr = nil } return pod, retryErr }
func (e *TokensController) syncSecret() { key, quit := e.syncSecretQueue.Get() if quit { return } defer e.syncSecretQueue.Done(key) // Track whether or not we should retry this sync retry := false defer func() { e.retryOrForget(e.syncSecretQueue, key, retry) }() secretInfo, err := parseSecretQueueKey(key) if err != nil { glog.Error(err) return } secret, err := e.getSecret(secretInfo.namespace, secretInfo.name, secretInfo.uid, false) switch { case err != nil: glog.Error(err) retry = true case secret == nil: // If the service account exists if sa, saErr := e.getServiceAccount(secretInfo.namespace, secretInfo.saName, secretInfo.saUID, false); saErr == nil && sa != nil { // secret no longer exists, so delete references to this secret from the service account if err := clientretry.RetryOnConflict(RemoveTokenBackoff, func() error { return e.removeSecretReference(secretInfo.namespace, secretInfo.saName, secretInfo.saUID, secretInfo.name) }); err != nil { glog.Error(err) } } default: // Ensure service account exists sa, saErr := e.getServiceAccount(secretInfo.namespace, secretInfo.saName, secretInfo.saUID, true) switch { case saErr != nil: glog.Error(saErr) retry = true case sa == nil: // Delete token glog.V(4).Infof("syncSecret(%s/%s), service account does not exist, deleting token", secretInfo.namespace, secretInfo.name) if retriable, err := e.deleteToken(secretInfo.namespace, secretInfo.name, secretInfo.uid); err != nil { glog.Errorf("error deleting serviceaccount token %s/%s for service account %s: %v", secretInfo.namespace, secretInfo.name, secretInfo.saName, err) retry = retriable } default: // Update token if needed if retriable, err := e.generateTokenIfNeeded(sa, secret); err != nil { glog.Errorf("error populating serviceaccount token %s/%s for service account %s: %v", secretInfo.namespace, secretInfo.name, secretInfo.saName, err) retry = retriable } } } }
// ensureOpenShiftInfraNamespace is called as part of global policy initialization to ensure infra namespace exists func (c *MasterConfig) ensureOpenShiftInfraNamespace() { ns := c.Options.PolicyConfig.OpenShiftInfrastructureNamespace // Ensure namespace exists namespace, err := c.KubeClientset().Namespaces().Create(&kapi.Namespace{ObjectMeta: kapi.ObjectMeta{Name: ns}}) if kapierror.IsAlreadyExists(err) { // Get the persisted namespace namespace, err = c.KubeClientset().Namespaces().Get(ns) if err != nil { glog.Errorf("Error getting namespace %s: %v", ns, err) return } } else if err != nil { glog.Errorf("Error creating namespace %s: %v", ns, err) return } roleAccessor := policy.NewClusterRoleBindingAccessor(c.ServiceAccountRoleBindingClient()) for _, saName := range bootstrappolicy.InfraSAs.GetServiceAccounts() { _, err := c.KubeClientset().ServiceAccounts(ns).Create(&kapi.ServiceAccount{ObjectMeta: kapi.ObjectMeta{Name: saName}}) if err != nil && !kapierror.IsAlreadyExists(err) { glog.Errorf("Error creating service account %s/%s: %v", ns, saName, err) } role, _ := bootstrappolicy.InfraSAs.RoleFor(saName) reconcileRole := &policy.ReconcileClusterRolesOptions{ RolesToReconcile: []string{role.Name}, Confirmed: true, Union: true, Out: ioutil.Discard, RoleClient: c.PrivilegedLoopbackOpenShiftClient.ClusterRoles(), } if err := reconcileRole.RunReconcileClusterRoles(nil, nil); err != nil { glog.Errorf("Could not reconcile %v: %v\n", role.Name, err) } addRole := &policy.RoleModificationOptions{ RoleName: role.Name, RoleBindingAccessor: roleAccessor, Subjects: []kapi.ObjectReference{{Namespace: ns, Name: saName, Kind: "ServiceAccount"}}, } if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { return addRole.AddRole() }); err != nil { glog.Errorf("Could not add %v service accounts to the %v cluster role: %v\n", saName, role.Name, err) } else { glog.V(2).Infof("Added %v service accounts to the %v cluster role: %v\n", saName, role.Name, err) } } c.ensureNamespaceServiceAccountRoleBindings(namespace) }
// UpdateConfigWithRetries will try to update a deployment config and ignore any update conflicts. func UpdateConfigWithRetries(dn DeploymentConfigsNamespacer, namespace, name string, applyUpdate updateConfigFunc) (*deployapi.DeploymentConfig, error) { var config *deployapi.DeploymentConfig resultErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { var err error config, err = dn.DeploymentConfigs(namespace).Get(name) if err != nil { return err } // Apply the update, then attempt to push it to the apiserver. applyUpdate(config) config, err = dn.DeploymentConfigs(namespace).Update(config) return err }) return config, resultErr }
func retryBuildStatusUpdate(build *api.Build, client client.BuildInterface, sourceRev *api.SourceRevision) error { return retry.RetryOnConflict(retry.DefaultBackoff, func() error { // before updating, make sure we are using the latest version of the build latestBuild, err := client.Get(build.Name) if err != nil { // usually this means we failed to get resources due to the missing // privilleges return err } if sourceRev != nil { latestBuild.Spec.Revision = sourceRev latestBuild.ResourceVersion = "" } latestBuild.Status.Reason = build.Status.Reason latestBuild.Status.Message = build.Status.Message if _, err := client.UpdateDetails(latestBuild); err != nil { return err } return nil }) }
// updatePodWithRetries retries updating the given pod on conflict with the following steps: // 1. Get latest resource // 2. applyUpdate // 3. Update the resource func updatePodWithRetries(podClient coreclient.PodsGetter, namespace string, pod *api.Pod, applyUpdate updatePodFunc) (*api.Pod, error) { // Deep copy the pod in case we failed on Get during retry loop obj, err := api.Scheme.Copy(pod) if err != nil { return nil, fmt.Errorf("failed to deep copy pod before updating it: %v", err) } oldPod := obj.(*api.Pod) err = retry.RetryOnConflict(retry.DefaultBackoff, func() (e error) { // Apply the update, then attempt to push it to the apiserver. applyUpdate(pod) if pod, e = podClient.Pods(namespace).Update(pod); e == nil { return } updateErr := e if pod, e = podClient.Pods(namespace).Get(oldPod.Name); e != nil { pod = oldPod } // Only return the error from update return updateErr }) // If the error is non-nil the returned pod cannot be trusted, if it is nil, the returned // controller contains the applied update. return pod, err }
// ensureNamespaceServiceAccountRoleBindings initializes roles for service accounts in the namespace func (c *MasterConfig) ensureNamespaceServiceAccountRoleBindings(namespace *kapi.Namespace) { const ServiceAccountRolesInitializedAnnotation = "openshift.io/sa.initialized-roles" // Short-circuit if we're already initialized if namespace.Annotations[ServiceAccountRolesInitializedAnnotation] == "true" { return } hasErrors := false for _, binding := range bootstrappolicy.GetBootstrapServiceAccountProjectRoleBindings(namespace.Name) { addRole := &policy.RoleModificationOptions{ RoleName: binding.RoleRef.Name, RoleNamespace: binding.RoleRef.Namespace, RoleBindingAccessor: policy.NewLocalRoleBindingAccessor(namespace.Name, c.ServiceAccountRoleBindingClient()), Subjects: binding.Subjects, } if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { return addRole.AddRole() }); err != nil { glog.Errorf("Could not add service accounts to the %v role in the %q namespace: %v\n", binding.RoleRef.Name, namespace.Name, err) hasErrors = true } } // If we had errors, don't register initialization so we can try again if hasErrors { return } if namespace.Annotations == nil { namespace.Annotations = map[string]string{} } namespace.Annotations[ServiceAccountRolesInitializedAnnotation] = "true" // Log any error other than a conflict (the update will be retried and recorded again on next startup in that case) if _, err := c.KubeClientset().Namespaces().Update(namespace); err != nil && !kapierror.IsConflict(err) { glog.Errorf("Error recording adding service account roles to %q namespace: %v", namespace.Name, err) } }
// RunOnce verifies the state of the port allocations and returns an error if an unrecoverable problem occurs. func (c *Repair) RunOnce() error { return retry.RetryOnConflict(retry.DefaultBackoff, c.runOnce) }
func TestEnforcingServiceAccount(t *testing.T) { testutil.RequireEtcd(t) defer testutil.DumpEtcdOnFailure(t) masterConfig, err := testserver.DefaultMasterOptions() masterConfig.ServiceAccountConfig.LimitSecretReferences = false if err != nil { t.Fatalf("unexpected error: %v", err) } clusterAdminConfig, err := testserver.StartConfiguredMaster(masterConfig) if err != nil { t.Fatalf("unexpected error: %v", err) } clusterAdminKubeClient, err := testutil.GetClusterAdminKubeClient(clusterAdminConfig) if err != nil { t.Fatalf("unexpected error: %v", err) } // Get a service account token saToken, err := waitForServiceAccountToken(clusterAdminKubeClient, api.NamespaceDefault, serviceaccountadmission.DefaultServiceAccountName, 20, time.Second) if err != nil { t.Errorf("unexpected error: %v", err) } if len(saToken) == 0 { t.Errorf("token was not created") } pod := &api.Pod{} pod.Name = "foo" pod.Namespace = api.NamespaceDefault pod.Spec.ServiceAccountName = serviceaccountadmission.DefaultServiceAccountName container := api.Container{} container.Name = "foo" container.Image = "openshift/hello-openshift" pod.Spec.Containers = []api.Container{container} secretVolume := api.Volume{} secretVolume.Name = "bar-vol" secretVolume.Secret = &api.SecretVolumeSource{} secretVolume.Secret.SecretName = "bar" pod.Spec.Volumes = []api.Volume{secretVolume} err = wait.Poll(100*time.Millisecond, 5*time.Second, func() (bool, error) { if _, err := clusterAdminKubeClient.Pods(api.NamespaceDefault).Create(pod); err != nil { // The SA admission controller cache seems to take forever to update. This check comes after the limit check, so until we get it sorted out // check if we're getting this particular error if strings.Contains(err.Error(), "no API token found for service account") { return true, nil } t.Log(err) return false, nil } return true, nil }) if err != nil { t.Errorf("unexpected error: %v", err) } clusterAdminKubeClient.Pods(api.NamespaceDefault).Delete(pod.Name, nil) err = retry.RetryOnConflict(retry.DefaultBackoff, func() error { sa, err := clusterAdminKubeClient.ServiceAccounts(api.NamespaceDefault).Get(bootstrappolicy.DeployerServiceAccountName) if err != nil { t.Fatalf("unexpected error: %v", err) } if sa.Annotations == nil { sa.Annotations = map[string]string{} } sa.Annotations[serviceaccountadmission.EnforceMountableSecretsAnnotation] = "true" _, err = clusterAdminKubeClient.ServiceAccounts(api.NamespaceDefault).Update(sa) return err }) if err != nil { t.Fatalf("unexpected error: %v", err) } expectedMessage := "is not allowed because service account deployer does not reference that secret" pod.Spec.ServiceAccountName = bootstrappolicy.DeployerServiceAccountName err = wait.Poll(100*time.Millisecond, 5*time.Second, func() (bool, error) { if _, err := clusterAdminKubeClient.Pods(api.NamespaceDefault).Create(pod); err == nil || !strings.Contains(err.Error(), expectedMessage) { clusterAdminKubeClient.Pods(api.NamespaceDefault).Delete(pod.Name, nil) return false, nil } return true, nil }) if err != nil { t.Errorf("unexpected error: %v", err) } }
func RunBuildPodControllerTest(t testingT, osClient *client.Client, kClient *kclientset.Clientset) { ns := testutil.Namespace() waitTime := BuildPodControllerTestWait tests := []buildControllerPodTest{ { Name: "running state test", States: []buildControllerPodState{ { PodPhase: kapi.PodRunning, BuildPhase: buildapi.BuildPhaseRunning, }, }, }, { Name: "build succeeded", States: []buildControllerPodState{ { PodPhase: kapi.PodRunning, BuildPhase: buildapi.BuildPhaseRunning, }, { PodPhase: kapi.PodSucceeded, BuildPhase: buildapi.BuildPhaseComplete, }, }, }, { Name: "build failed", States: []buildControllerPodState{ { PodPhase: kapi.PodRunning, BuildPhase: buildapi.BuildPhaseRunning, }, { PodPhase: kapi.PodFailed, BuildPhase: buildapi.BuildPhaseFailed, }, }, }, } for _, test := range tests { // Setup communications channels podReadyChan := make(chan *kapi.Pod) // Will receive a value when a build pod is ready errChan := make(chan error) // Will receive a value when an error occurs stateReached := int32(0) // Create a build b, err := osClient.Builds(ns).Create(mockBuild()) if err != nil { t.Fatal(err) } // Watch build pod for transition to pending podWatch, err := kClient.Pods(ns).Watch(kapi.ListOptions{FieldSelector: fields.OneTermEqualSelector("metadata.name", buildapi.GetBuildPodName(b))}) if err != nil { t.Fatal(err) } go func() { for e := range podWatch.ResultChan() { pod, ok := e.Object.(*kapi.Pod) if !ok { t.Fatalf("%s: unexpected object received: %#v\n", test.Name, e.Object) } if pod.Status.Phase == kapi.PodPending { podReadyChan <- pod break } } }() var pod *kapi.Pod select { case pod = <-podReadyChan: if pod.Status.Phase != kapi.PodPending { t.Errorf("Got wrong pod phase: %s", pod.Status.Phase) podWatch.Stop() continue } case <-time.After(BuildControllersWatchTimeout): t.Errorf("Timed out waiting for build pod to be ready") podWatch.Stop() continue } podWatch.Stop() for _, state := range test.States { if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { // Update pod state and verify that corresponding build state happens accordingly pod, err := kClient.Pods(ns).Get(pod.Name) if err != nil { return err } if pod.Status.Phase == state.PodPhase { return fmt.Errorf("another client altered the pod phase to %s: %#v", state.PodPhase, pod) } pod.Status.Phase = state.PodPhase _, err = kClient.Pods(ns).UpdateStatus(pod) return err }); err != nil { t.Fatal(err) } buildWatch, err := osClient.Builds(ns).Watch(kapi.ListOptions{FieldSelector: fields.OneTermEqualSelector("metadata.name", b.Name), ResourceVersion: b.ResourceVersion}) if err != nil { t.Fatal(err) } defer buildWatch.Stop() go func() { done := false for e := range buildWatch.ResultChan() { var ok bool b, ok = e.Object.(*buildapi.Build) if !ok { errChan <- fmt.Errorf("%s: unexpected object received: %#v", test.Name, e.Object) } if e.Type != watchapi.Modified { errChan <- fmt.Errorf("%s: unexpected event received: %s, object: %#v", test.Name, e.Type, e.Object) } if done { errChan <- fmt.Errorf("%s: unexpected build state: %#v", test.Name, e.Object) } else if b.Status.Phase == state.BuildPhase { done = true atomic.StoreInt32(&stateReached, 1) } } }() select { case err := <-errChan: buildWatch.Stop() t.Errorf("%s: Error: %v\n", test.Name, err) break case <-time.After(waitTime): buildWatch.Stop() if atomic.LoadInt32(&stateReached) != 1 { t.Errorf("%s: Did not reach desired build state: %s", test.Name, state.BuildPhase) break } } } } }
func (e *DockercfgController) syncServiceAccount(key string) error { obj, exists, err := e.serviceAccountCache.GetByKey(key) if err != nil { glog.V(4).Infof("Unable to retrieve service account %v from store: %v", key, err) return err } if !exists { glog.V(4).Infof("Service account has been deleted %v", key) return nil } if !needsDockercfgSecret(obj.(*api.ServiceAccount)) { return nil } uncastSA, err := api.Scheme.DeepCopy(obj) if err != nil { return err } serviceAccount := uncastSA.(*api.ServiceAccount) mountableDockercfgSecrets, imageDockercfgPullSecrets := getGeneratedDockercfgSecretNames(serviceAccount) // If we have a pull secret in one list, use it for the other. It must only be in one list because // otherwise we wouldn't "needsDockercfgSecret" foundPullSecret := len(imageDockercfgPullSecrets) > 0 foundMountableSecret := len(mountableDockercfgSecrets) > 0 if foundPullSecret || foundMountableSecret { switch { case foundPullSecret: serviceAccount.Secrets = append(serviceAccount.Secrets, api.ObjectReference{Name: imageDockercfgPullSecrets.List()[0]}) case foundMountableSecret: serviceAccount.ImagePullSecrets = append(serviceAccount.ImagePullSecrets, api.LocalObjectReference{Name: mountableDockercfgSecrets.List()[0]}) } // Clear the pending token annotation when updating delete(serviceAccount.Annotations, PendingTokenAnnotation) updatedSA, err := e.client.Core().ServiceAccounts(serviceAccount.Namespace).Update(serviceAccount) if err == nil { e.serviceAccountCache.Mutation(updatedSA) } return err } dockercfgSecret, created, err := e.createDockerPullSecret(serviceAccount) if err != nil { return err } if !created { glog.V(5).Infof("The dockercfg secret was not created for service account %s/%s, will retry", serviceAccount.Namespace, serviceAccount.Name) return nil } first := true err = retry.RetryOnConflict(retry.DefaultBackoff, func() error { if !first { obj, exists, err := e.serviceAccountCache.GetByKey(key) if err != nil { return err } if !exists || !needsDockercfgSecret(obj.(*api.ServiceAccount)) || serviceAccount.UID != obj.(*api.ServiceAccount).UID { // somehow a dockercfg secret appeared or the SA disappeared. cleanup the secret we made and return glog.V(2).Infof("Deleting secret because the work is already done %s/%s", dockercfgSecret.Namespace, dockercfgSecret.Name) e.client.Core().Secrets(dockercfgSecret.Namespace).Delete(dockercfgSecret.Name, nil) return nil } uncastSA, err := api.Scheme.DeepCopy(obj) if err != nil { return err } serviceAccount = uncastSA.(*api.ServiceAccount) } first = false serviceAccount.Secrets = append(serviceAccount.Secrets, api.ObjectReference{Name: dockercfgSecret.Name}) serviceAccount.ImagePullSecrets = append(serviceAccount.ImagePullSecrets, api.LocalObjectReference{Name: dockercfgSecret.Name}) // Clear the pending token annotation when updating delete(serviceAccount.Annotations, PendingTokenAnnotation) updatedSA, err := e.client.Core().ServiceAccounts(serviceAccount.Namespace).Update(serviceAccount) if err == nil { e.serviceAccountCache.Mutation(updatedSA) } return err }) if err != nil { // nothing to do. Our choice was stale or we got a conflict. Either way that means that the service account was updated. We simply need to return because we'll get an update notification later // we do need to clean up our dockercfgSecret. token secrets are cleaned up by the controller handling service account dockercfg secret deletes glog.V(2).Infof("Deleting secret %s/%s (err=%v)", dockercfgSecret.Namespace, dockercfgSecret.Name, err) e.client.Core().Secrets(dockercfgSecret.Namespace).Delete(dockercfgSecret.Name, nil) } return err }
// Create attempts to create a new eviction. That is, it tries to evict a pod. func (r *EvictionREST) Create(ctx api.Context, obj runtime.Object) (runtime.Object, error) { eviction := obj.(*policy.Eviction) obj, err := r.store.Get(ctx, eviction.Name, &metav1.GetOptions{}) if err != nil { return nil, err } pod := obj.(*api.Pod) var rtStatus *metav1.Status var pdbName string err = retry.RetryOnConflict(EvictionsRetry, func() error { pdbs, err := r.getPodDisruptionBudgets(ctx, pod) if err != nil { return err } if len(pdbs) > 1 { rtStatus = &metav1.Status{ Status: metav1.StatusFailure, Message: "This pod has more than one PodDisruptionBudget, which the eviction subresource does not support.", Code: 500, } return nil } else if len(pdbs) == 1 { pdb := pdbs[0] pdbName = pdb.Name // Try to verify-and-decrement // If it was false already, or if it becomes false during the course of our retries, // raise an error marked as a 429. ok, err := r.checkAndDecrement(pod.Namespace, pod.Name, pdb) if err != nil { return err } if !ok { rtStatus = &metav1.Status{ Status: metav1.StatusFailure, // TODO(mml): Include some more details about why the eviction is disallowed. // Ideally any such text is generated by the DisruptionController (offline). Message: "Cannot evict pod as it would violate the pod's disruption budget.", Code: 429, // TODO(mml): Add a Retry-After header. Once there are time-based // budgets, we can sometimes compute a sensible suggested value. But // even without that, we can give a suggestion (10 minutes?) that // prevents well-behaved clients from hammering us. } } } return nil }) if err == wait.ErrWaitTimeout { err = errors.NewTimeoutError(fmt.Sprintf("couldn't update PodDisruptionBudget %q due to conflicts", pdbName), 10) } if err != nil { return nil, err } if rtStatus != nil { return rtStatus, nil } // At this point there was either no PDB or we succeded in decrementing // Try the delete _, err = r.store.Delete(ctx, eviction.Name, eviction.DeleteOptions) if err != nil { return nil, err } // Success! return &metav1.Status{Status: metav1.StatusSuccess}, nil }