// maxUnavailable returns the maximum unavailable pods a rolling deployment can take. func maxUnavailable(deployment extensions.Deployment) int32 { if !deploymentutil.IsRollingUpdate(&deployment) { return int32(0) } // Error caught by validation _, maxUnavailable, _ := deploymentutil.ResolveFenceposts(&deployment.Spec.Strategy.RollingUpdate.MaxSurge, &deployment.Spec.Strategy.RollingUpdate.MaxUnavailable, deployment.Spec.Replicas) return maxUnavailable }
// scaleDownOldReplicaSetsForRollingUpdate scales down old replica sets when deployment strategy is "RollingUpdate". // Need check maxUnavailable to ensure availability func (dc *DeploymentController) scaleDownOldReplicaSetsForRollingUpdate(allRSs []*extensions.ReplicaSet, oldRSs []*extensions.ReplicaSet, deployment *extensions.Deployment) (int, error) { _, maxUnavailable, err := deploymentutil.ResolveFenceposts(&deployment.Spec.Strategy.RollingUpdate.MaxSurge, &deployment.Spec.Strategy.RollingUpdate.MaxUnavailable, deployment.Spec.Replicas) if err != nil { return 0, err } // Check if we can scale down. minAvailable := deployment.Spec.Replicas - maxUnavailable minReadySeconds := deployment.Spec.MinReadySeconds // Find the number of ready pods. readyPodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, allRSs, minReadySeconds) if err != nil { return 0, fmt.Errorf("could not find available pods: %v", err) } if readyPodCount <= minAvailable { // Cannot scale down. return 0, nil } sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs)) totalScaledDown := 0 totalScaleDownCount := readyPodCount - minAvailable for _, targetRS := range oldRSs { if totalScaledDown >= totalScaleDownCount { // No further scaling required. break } if targetRS.Spec.Replicas == 0 { // cannot scale down this ReplicaSet. continue } // Scale down. scaleDownCount := integer.IntMin(targetRS.Spec.Replicas, totalScaleDownCount-totalScaledDown) newReplicasCount := targetRS.Spec.Replicas - scaleDownCount _, _, err = dc.scaleReplicaSetAndRecordEvent(targetRS, newReplicasCount, deployment) if err != nil { return totalScaledDown, err } totalScaledDown += scaleDownCount } return totalScaledDown, nil }
// Update all pods for a ReplicationController (oldRc) by creating a new // controller (newRc) with 0 replicas, and synchronously scaling oldRc and // newRc until oldRc has 0 replicas and newRc has the original # of desired // replicas. Cleanup occurs based on a RollingUpdaterCleanupPolicy. // // Each interval, the updater will attempt to make progress however it can // without violating any availability constraints defined by the config. This // means the amount scaled up or down each interval will vary based on the // timeliness of readiness and the updater will always try to make progress, // even slowly. // // If an update from newRc to oldRc is already in progress, we attempt to // drive it to completion. If an error occurs at any step of the update, the // error will be returned. // // A scaling event (either up or down) is considered progress; if no progress // is made within the config.Timeout, an error is returned. // // TODO: make this handle performing a rollback of a partially completed // rollout. func (r *RollingUpdater) Update(config *RollingUpdaterConfig) error { out := config.Out oldRc := config.OldRc scaleRetryParams := NewRetryParams(config.Interval, config.Timeout) // Find an existing controller (for continuing an interrupted update) or // create a new one if necessary. sourceId := fmt.Sprintf("%s:%s", oldRc.Name, oldRc.UID) newRc, existed, err := r.getOrCreateTargetController(config.NewRc, sourceId) if err != nil { return err } if existed { fmt.Fprintf(out, "Continuing update with existing controller %s.\n", newRc.Name) } else { fmt.Fprintf(out, "Created %s\n", newRc.Name) } // Extract the desired replica count from the controller. desiredAnnotation, err := strconv.Atoi(newRc.Annotations[desiredReplicasAnnotation]) if err != nil { return fmt.Errorf("Unable to parse annotation for %s: %s=%s", newRc.Name, desiredReplicasAnnotation, newRc.Annotations[desiredReplicasAnnotation]) } desired := int32(desiredAnnotation) // Extract the original replica count from the old controller, adding the // annotation if it doesn't yet exist. _, hasOriginalAnnotation := oldRc.Annotations[originalReplicasAnnotation] if !hasOriginalAnnotation { existing, err := r.c.ReplicationControllers(oldRc.Namespace).Get(oldRc.Name) if err != nil { return err } originReplicas := strconv.Itoa(int(existing.Spec.Replicas)) applyUpdate := func(rc *api.ReplicationController) { if rc.Annotations == nil { rc.Annotations = map[string]string{} } rc.Annotations[originalReplicasAnnotation] = originReplicas } if oldRc, err = updateRcWithRetries(r.c, existing.Namespace, existing, applyUpdate); err != nil { return err } } // maxSurge is the maximum scaling increment and maxUnavailable are the maximum pods // that can be unavailable during a rollout. maxSurge, maxUnavailable, err := deployment.ResolveFenceposts(&config.MaxSurge, &config.MaxUnavailable, desired) if err != nil { return err } // Validate maximums. if desired > 0 && maxUnavailable == 0 && maxSurge == 0 { return fmt.Errorf("one of maxSurge or maxUnavailable must be specified") } // The minumum pods which must remain available througout the update // calculated for internal convenience. minAvailable := int32(integer.IntMax(0, int(desired-maxUnavailable))) // If the desired new scale is 0, then the max unavailable is necessarily // the effective scale of the old RC regardless of the configuration // (equivalent to 100% maxUnavailable). if desired == 0 { maxUnavailable = oldRc.Spec.Replicas minAvailable = 0 } fmt.Fprintf(out, "Scaling up %s from %d to %d, scaling down %s from %d to 0 (keep %d pods available, don't exceed %d pods)\n", newRc.Name, newRc.Spec.Replicas, desired, oldRc.Name, oldRc.Spec.Replicas, minAvailable, desired+maxSurge) // give a caller incremental notification and allow them to exit early goal := desired - newRc.Spec.Replicas if goal < 0 { goal = -goal } progress := func(complete bool) error { if config.OnProgress == nil { return nil } progress := desired - newRc.Spec.Replicas if progress < 0 { progress = -progress } percentage := 100 if !complete && goal > 0 { percentage = int((goal - progress) * 100 / goal) } return config.OnProgress(oldRc, newRc, percentage) } // Scale newRc and oldRc until newRc has the desired number of replicas and // oldRc has 0 replicas. progressDeadline := time.Now().UnixNano() + config.Timeout.Nanoseconds() for newRc.Spec.Replicas != desired || oldRc.Spec.Replicas != 0 { // Store the existing replica counts for progress timeout tracking. newReplicas := newRc.Spec.Replicas oldReplicas := oldRc.Spec.Replicas // Scale up as much as possible. scaledRc, err := r.scaleUp(newRc, oldRc, desired, maxSurge, maxUnavailable, scaleRetryParams, config) if err != nil { return err } newRc = scaledRc // notify the caller if necessary if err := progress(false); err != nil { return err } // Wait between scaling operations for things to settle. time.Sleep(config.UpdatePeriod) // Scale down as much as possible. scaledRc, err = r.scaleDown(newRc, oldRc, desired, minAvailable, maxUnavailable, maxSurge, config) if err != nil { return err } oldRc = scaledRc // notify the caller if necessary if err := progress(false); err != nil { return err } // If we are making progress, continue to advance the progress deadline. // Otherwise, time out with an error. progressMade := (newRc.Spec.Replicas != newReplicas) || (oldRc.Spec.Replicas != oldReplicas) if progressMade { progressDeadline = time.Now().UnixNano() + config.Timeout.Nanoseconds() } else if time.Now().UnixNano() > progressDeadline { return fmt.Errorf("timed out waiting for any update progress to be made") } } // notify the caller if necessary if err := progress(true); err != nil { return err } // Housekeeping and cleanup policy execution. return r.cleanup(oldRc, newRc, config) }
func (dc *DeploymentController) reconcileOldReplicaSets(allRSs []*extensions.ReplicaSet, oldRSs []*extensions.ReplicaSet, newRS *extensions.ReplicaSet, deployment *extensions.Deployment) (bool, error) { oldPodsCount := deploymentutil.GetReplicaCountForReplicaSets(oldRSs) if oldPodsCount == 0 { // Can't scale down further return false, nil } minReadySeconds := deployment.Spec.MinReadySeconds allPodsCount := deploymentutil.GetReplicaCountForReplicaSets(allRSs) newRSAvailablePodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, []*extensions.ReplicaSet{newRS}, minReadySeconds) if err != nil { return false, fmt.Errorf("could not find available pods: %v", err) } _, maxUnavailable, err := deploymentutil.ResolveFenceposts(&deployment.Spec.Strategy.RollingUpdate.MaxSurge, &deployment.Spec.Strategy.RollingUpdate.MaxUnavailable, deployment.Spec.Replicas) if err != nil { return false, err } // Check if we can scale down. We can scale down in the following 2 cases: // * Some old replica sets have unhealthy replicas, we could safely scale down those unhealthy replicas since that won't further // increase unavailability. // * New replica set has scaled up and it's replicas becomes ready, then we can scale down old replica sets in a further step. // // maxScaledDown := allPodsCount - minAvailable - newReplicaSetPodsUnavailable // take into account not only maxUnavailable and any surge pods that have been created, but also unavailable pods from // the newRS, so that the unavailable pods from the newRS would not make us scale down old replica sets in a further // step(that will increase unavailability). // // Concrete example: // // * 10 replicas // * 2 maxUnavailable (absolute number, not percent) // * 3 maxSurge (absolute number, not percent) // // case 1: // * Deployment is updated, newRS is created with 3 replicas, oldRS is scaled down to 8, and newRS is scaled up to 5. // * The new replica set pods crashloop and never become available. // * allPodsCount is 13. minAvailable is 8. newRSPodsUnavailable is 5. // * A node fails and causes one of the oldRS pods to become unavailable. However, 13 - 8 - 5 = 0, so the oldRS won't be scaled down. // * The user notices the crashloop and does kubectl rollout undo to rollback. // * newRSPodsUnavailable is 1, since we rolled back to the good replica set, so maxScaledDown = 13 - 8 - 1 = 4. 4 of the crashlooping pods will be scaled down. // * The total number of pods will then be 9 and the newRS can be scaled up to 10. // // case 2: // Same example, but pushing a new pod template instead of rolling back (aka "roll over"): // * The new replica set created must start with 0 replicas because allPodsCount is already at 13. // * However, newRSPodsUnavailable would also be 0, so the 2 old replica sets could be scaled down by 5 (13 - 8 - 0), which would then // allow the new replica set to be scaled up by 5. minAvailable := deployment.Spec.Replicas - maxUnavailable newRSUnavailablePodCount := newRS.Spec.Replicas - newRSAvailablePodCount maxScaledDown := allPodsCount - minAvailable - newRSUnavailablePodCount if maxScaledDown <= 0 { return false, nil } // Clean up unhealthy replicas first, otherwise unhealthy replicas will block deployment // and cause timeout. See https://github.com/kubernetes/kubernetes/issues/16737 cleanupCount, err := dc.cleanupUnhealthyReplicas(oldRSs, deployment, maxScaledDown) if err != nil { return false, nil } // Scale down old replica sets, need check maxUnavailable to ensure we can scale down scaledDownCount, err := dc.scaleDownOldReplicaSetsForRollingUpdate(allRSs, oldRSs, deployment) if err != nil { return false, nil } totalScaledDown := cleanupCount + scaledDownCount return totalScaledDown > 0, nil }