func (o *DrainOptions) evictPods(pods []api.Pod, policyGroupVersion string, getPodFn func(namespace, name string) (*api.Pod, error)) error { doneCh := make(chan bool, len(pods)) errCh := make(chan error, 1) for _, pod := range pods { go func(pod api.Pod, doneCh chan bool, errCh chan error) { var err error for { err = o.evictPod(pod, policyGroupVersion) if err == nil { break } else if apierrors.IsTooManyRequests(err) { time.Sleep(5 * time.Second) } else { errCh <- fmt.Errorf("error when evicting pod %q: %v", pod.Name, err) return } } podArray := []api.Pod{pod} _, err = o.waitForDelete(podArray, kubectl.Interval, time.Duration(math.MaxInt64), true, getPodFn) if err == nil { doneCh <- true } else { errCh <- fmt.Errorf("error when waiting for pod %q terminating: %v", pod.Name, err) } }(pod, doneCh, errCh) } doneCount := 0 // 0 timeout means infinite, we use MaxInt64 to represent it. var globalTimeout time.Duration if o.Timeout == 0 { globalTimeout = time.Duration(math.MaxInt64) } else { globalTimeout = o.Timeout } for { select { case err := <-errCh: return err case <-doneCh: doneCount++ if doneCount == len(pods) { return nil } case <-time.After(globalTimeout): return fmt.Errorf("Drain did not complete within %v", globalTimeout) } } }
// RunDrain runs the 'drain' command func (o *DrainOptions) RunDrain() error { if err := o.RunCordonOrUncordon(true); err != nil { return err } err := o.deleteOrEvictPodsSimple() // TODO: update IsTooManyRequests() when the TooManyRequests(429) error returned from the API server has a non-empty Reason field for i := 1; i <= maxPatchRetry && apierrors.IsTooManyRequests(err); i++ { if i > triesBeforeBackOff { currBackOffPeriod := time.Duration(math.Exp2(float64(i-triesBeforeBackOff))) * backOffPeriod fmt.Fprintf(o.errOut, "Retry in %v\n", currBackOffPeriod) o.backOff.Sleep(currBackOffPeriod) } fmt.Fprintf(o.errOut, "Retrying\n") err = o.deleteOrEvictPodsSimple() } if err == nil { cmdutil.PrintSuccess(o.mapper, false, o.out, "node", o.nodeInfo.Name, false, "drained") } return err }
func TestConcurrentEvictionRequests(t *testing.T) { podNameFormat := "test-pod-%d" s, rm, podInformer, clientSet := rmSetup(t) defer s.Close() ns := framework.CreateTestingNamespace("concurrent-eviction-requests", s, t) defer framework.DeleteTestingNamespace(ns, s, t) stopCh := make(chan struct{}) go podInformer.Run(stopCh) go rm.Run(stopCh) config := restclient.Config{Host: s.URL} clientSet, err := clientset.NewForConfig(&config) var gracePeriodSeconds int64 = 30 deleteOption := &v1.DeleteOptions{ GracePeriodSeconds: &gracePeriodSeconds, } // Generate 10 pods to evict for i := 0; i < 10; i++ { podName := fmt.Sprintf(podNameFormat, i) pod := newPod(podName) if _, err := clientSet.Core().Pods(ns.Name).Create(pod); err != nil { t.Errorf("Failed to create pod: %v", err) } addPodConditionReady(pod) if _, err := clientSet.Core().Pods(ns.Name).UpdateStatus(pod); err != nil { t.Fatal(err) } } waitToObservePods(t, podInformer, 10) pdb := newPDB() if _, err := clientSet.Policy().PodDisruptionBudgets(ns.Name).Create(pdb); err != nil { t.Errorf("Failed to create PodDisruptionBudget: %v", err) } waitPDBStable(t, clientSet, 10, ns.Name, pdb.Name) doneCh := make(chan bool, 10) errCh := make(chan error, 1) // spawn 10 goroutine to concurrently evict the pods for i := 0; i < 10; i++ { go func(id int, doneCh chan bool, errCh chan error) { evictionName := fmt.Sprintf(podNameFormat, id) eviction := newEviction(ns.Name, evictionName, deleteOption) var e error for { e = clientSet.Policy().Evictions(ns.Name).Evict(eviction) if errors.IsTooManyRequests(e) { time.Sleep(5 * time.Second) } else { break } } if e != nil { if errors.IsConflict(err) { fmt.Errorf("Unexpected Conflict (409) error caused by failing to handle concurrent PDB updates: %v", e) } else { errCh <- e } return } doneCh <- true }(i, doneCh, errCh) } doneCount := 0 for { select { case err := <-errCh: t.Errorf("%v", err) return case <-doneCh: doneCount++ if doneCount == 10 { return } case <-time.After(defaultTimeout): t.Errorf("Eviction did not complete within %v", defaultTimeout) } } for i := 0; i < 10; i++ { podName := fmt.Sprintf(podNameFormat, i) _, err := clientSet.Core().Pods(ns.Name).Get(podName, metav1.GetOptions{}) if !errors.IsNotFound(err) { t.Errorf("Pod %q is expected to be evicted", podName) } } if err := clientSet.Policy().PodDisruptionBudgets(ns.Name).Delete(pdb.Name, deleteOption); err != nil { t.Errorf("Failed to delete PodDisruptionBudget: %v", err) } close(stopCh) }