// NewRSNewReplicas calculates the number of replicas a deployment's new RS should have. // When one of the followings is true, we're rolling out the deployment; otherwise, we're scaling it. // 1) The new RS is saturated: newRS's replicas == deployment's replicas // 2) Max number of pods allowed is reached: deployment's replicas + maxSurge == all RSs' replicas func NewRSNewReplicas(deployment *extensions.Deployment, allRSs []*extensions.ReplicaSet, newRS *extensions.ReplicaSet) (int32, error) { switch deployment.Spec.Strategy.Type { case extensions.RollingUpdateDeploymentStrategyType: // Check if we can scale up. maxSurge, err := intstrutil.GetValueFromIntOrPercent(&deployment.Spec.Strategy.RollingUpdate.MaxSurge, int(deployment.Spec.Replicas), true) if err != nil { return 0, err } // Find the total number of pods currentPodCount := GetReplicaCountForReplicaSets(allRSs) maxTotalPods := deployment.Spec.Replicas + int32(maxSurge) if currentPodCount >= maxTotalPods { // Cannot scale up. return newRS.Spec.Replicas, nil } // Scale up. scaleUpCount := maxTotalPods - currentPodCount // Do not exceed the number of desired replicas. scaleUpCount = int32(integer.IntMin(int(scaleUpCount), int(deployment.Spec.Replicas-newRS.Spec.Replicas))) return newRS.Spec.Replicas + scaleUpCount, nil case extensions.RecreateDeploymentStrategyType: return deployment.Spec.Replicas, nil default: return 0, fmt.Errorf("deployment type %v isn't supported", deployment.Spec.Strategy.Type) } }
func (dc *DeploymentController) reconcileNewReplicaSet(allRSs []*extensions.ReplicaSet, newRS *extensions.ReplicaSet, deployment extensions.Deployment) (bool, error) { if newRS.Spec.Replicas == deployment.Spec.Replicas { // Scaling not required. return false, nil } if newRS.Spec.Replicas > deployment.Spec.Replicas { // Scale down. _, err := dc.scaleReplicaSetAndRecordEvent(newRS, deployment.Spec.Replicas, deployment) return true, err } // Check if we can scale up. maxSurge, err := intstrutil.GetValueFromIntOrPercent(&deployment.Spec.Strategy.RollingUpdate.MaxSurge, deployment.Spec.Replicas) if err != nil { return false, err } // Find the total number of pods currentPodCount := deploymentutil.GetReplicaCountForReplicaSets(allRSs) maxTotalPods := deployment.Spec.Replicas + maxSurge if currentPodCount >= maxTotalPods { // Cannot scale up. return false, nil } // Scale up. scaleUpCount := maxTotalPods - currentPodCount // Do not exceed the number of desired replicas. scaleUpCount = integer.IntMin(scaleUpCount, deployment.Spec.Replicas-newRS.Spec.Replicas) newReplicasCount := newRS.Spec.Replicas + scaleUpCount _, err = dc.scaleReplicaSetAndRecordEvent(newRS, newReplicasCount, deployment) return true, err }
// ResolveFenceposts resolves both maxSurge and maxUnavailable. This needs to happen in one // step. For example: // // 2 desired, max unavailable 1%, surge 0% - should scale old(-1), then new(+1), then old(-1), then new(+1) // 1 desired, max unavailable 1%, surge 0% - should scale old(-1), then new(+1) // 2 desired, max unavailable 25%, surge 1% - should scale new(+1), then old(-1), then new(+1), then old(-1) // 1 desired, max unavailable 25%, surge 1% - should scale new(+1), then old(-1) // 2 desired, max unavailable 0%, surge 1% - should scale new(+1), then old(-1), then new(+1), then old(-1) // 1 desired, max unavailable 0%, surge 1% - should scale new(+1), then old(-1) func ResolveFenceposts(maxSurge, maxUnavailable *intstrutil.IntOrString, desired int32) (int32, int32, error) { surge, err := intstrutil.GetValueFromIntOrPercent(maxSurge, int(desired), true) if err != nil { return 0, 0, err } unavailable, err := intstrutil.GetValueFromIntOrPercent(maxUnavailable, int(desired), false) if err != nil { return 0, 0, err } if surge == 0 && unavailable == 0 { // Validation should never allow the user to explicitly use zero values for both maxSurge // maxUnavailable. Due to rounding down maxUnavailable though, it may resolve to zero. // If both fenceposts resolve to zero, then we should set maxUnavailable to 1 on the // theory that surge might not work due to quota. unavailable = 1 } return int32(surge), int32(unavailable), nil }
// scaleDownOldReplicaSetsForRollingUpdate scales down old replica sets when deployment strategy is "RollingUpdate". // Need check maxUnavailable to ensure availability func (dc *DeploymentController) scaleDownOldReplicaSetsForRollingUpdate(allRSs []*extensions.ReplicaSet, oldRSs []*extensions.ReplicaSet, deployment extensions.Deployment) (int, error) { maxUnavailable, err := intstrutil.GetValueFromIntOrPercent(&deployment.Spec.Strategy.RollingUpdate.MaxUnavailable, deployment.Spec.Replicas) if err != nil { return 0, err } // Check if we can scale down. minAvailable := deployment.Spec.Replicas - maxUnavailable minReadySeconds := deployment.Spec.MinReadySeconds // Find the number of ready pods. readyPodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, allRSs, minReadySeconds) if err != nil { return 0, fmt.Errorf("could not find available pods: %v", err) } if readyPodCount <= minAvailable { // Cannot scale down. return 0, nil } sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs)) totalScaledDown := 0 totalScaleDownCount := readyPodCount - minAvailable for _, targetRS := range oldRSs { if totalScaledDown >= totalScaleDownCount { // No further scaling required. break } if targetRS.Spec.Replicas == 0 { // cannot scale down this ReplicaSet. continue } // Scale down. scaleDownCount := integer.IntMin(targetRS.Spec.Replicas, totalScaleDownCount-totalScaledDown) newReplicasCount := targetRS.Spec.Replicas - scaleDownCount _, _, err = dc.scaleReplicaSetAndRecordEvent(targetRS, newReplicasCount, deployment) if err != nil { return totalScaledDown, err } totalScaledDown += scaleDownCount } return totalScaledDown, nil }
func (dc *DisruptionController) getExpectedPodCount(pdb *policy.PodDisruptionBudget, pods []*api.Pod) (expectedCount, desiredHealthy int32, err error) { err = nil // TODO(davidopp): consider making the way expectedCount and rules about // permitted controller configurations (specifically, considering it an error // if a pod covered by a PDB has 0 controllers or > 1 controller) should be // handled the same way for integer and percentage minAvailable if pdb.Spec.MinAvailable.Type == intstr.Int { desiredHealthy = pdb.Spec.MinAvailable.IntVal expectedCount = int32(len(pods)) } else if pdb.Spec.MinAvailable.Type == intstr.String { // When the user specifies a fraction of pods that must be available, we // use as the fraction's denominator // SUM_{all c in C} scale(c) // where C is the union of C_p1, C_p2, ..., C_pN // and each C_pi is the set of controllers controlling the pod pi // k8s only defines what will happens when 0 or 1 controllers control a // given pod. We explicitly exclude the 0 controllers case here, and we // report an error if we find a pod with more than 1 controller. Thus in // practice each C_pi is a set of exactly 1 controller. // A mapping from controllers to their scale. controllerScale := map[types.UID]int32{} // 1. Find the controller(s) for each pod. If any pod has 0 controllers, // that's an error. If any pod has more than 1 controller, that's also an // error. for _, pod := range pods { controllerCount := 0 for _, finder := range dc.finders() { var controllers []controllerAndScale controllers, err = finder(pod) if err != nil { return } for _, controller := range controllers { controllerScale[controller.UID] = controller.scale controllerCount++ } } if controllerCount == 0 { err = fmt.Errorf("asked for percentage, but found no controllers for pod %q", pod.Name) dc.recorder.Event(pdb, api.EventTypeWarning, "NoControllers", err.Error()) return } else if controllerCount > 1 { err = fmt.Errorf("pod %q has %v>1 controllers", pod.Name, controllerCount) dc.recorder.Event(pdb, api.EventTypeWarning, "TooManyControllers", err.Error()) return } } // 2. Add up all the controllers. expectedCount = 0 for _, count := range controllerScale { expectedCount += count } // 3. Do the math. var dh int dh, err = intstr.GetValueFromIntOrPercent(&pdb.Spec.MinAvailable, int(expectedCount), true) if err != nil { return } desiredHealthy = int32(dh) } return }
// Set expectationsCheck to false to bypass expectations check when testing func (dc *DeploymentController) reconcileOldReplicaSets(allRSs []*extensions.ReplicaSet, oldRSs []*extensions.ReplicaSet, newRS *extensions.ReplicaSet, deployment extensions.Deployment, expectationsCheck bool) (bool, error) { oldPodsCount := deploymentutil.GetReplicaCountForReplicaSets(oldRSs) if oldPodsCount == 0 { // Can't scale down further return false, nil } // Check the expectations of deployment before reconciling dKey, err := controller.KeyFunc(&deployment) if err != nil { return false, fmt.Errorf("Couldn't get key for deployment %#v: %v", deployment, err) } if expectationsCheck && !dc.podExpectations.SatisfiedExpectations(dKey) { glog.V(4).Infof("Pod expectations not met yet before reconciling old replica sets\n") return false, nil } minReadySeconds := deployment.Spec.MinReadySeconds allPodsCount := deploymentutil.GetReplicaCountForReplicaSets(allRSs) newRSAvailablePodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, []*extensions.ReplicaSet{newRS}, minReadySeconds) if err != nil { return false, fmt.Errorf("could not find available pods: %v", err) } maxUnavailable, err := intstrutil.GetValueFromIntOrPercent(&deployment.Spec.Strategy.RollingUpdate.MaxUnavailable, deployment.Spec.Replicas) if err != nil { return false, err } // Check if we can scale down. We can scale down in the following 2 cases: // * Some old replica sets have unhealthy replicas, we could safely scale down those unhealthy replicas since that won't further // increase unavailability. // * New replica set has scaled up and it's replicas becomes ready, then we can scale down old replica sets in a further step. // // maxScaledDown := allPodsCount - minAvailable - newReplicaSetPodsUnavailable // take into account not only maxUnavailable and any surge pods that have been created, but also unavailable pods from // the newRS, so that the unavailable pods from the newRS would not make us scale down old replica sets in a further // step(that will increase unavailability). // // Concrete example: // // * 10 replicas // * 2 maxUnavailable (absolute number, not percent) // * 3 maxSurge (absolute number, not percent) // // case 1: // * Deployment is updated, newRS is created with 3 replicas, oldRS is scaled down to 8, and newRS is scaled up to 5. // * The new replica set pods crashloop and never become available. // * allPodsCount is 13. minAvailable is 8. newRSPodsUnavailable is 5. // * A node fails and causes one of the oldRS pods to become unavailable. However, 13 - 8 - 5 = 0, so the oldRS won't be scaled down. // * The user notices the crashloop and does kubectl rollout undo to rollback. // * newRSPodsUnavailable is 1, since we rolled back to the good replica set, so maxScaledDown = 13 - 8 - 1 = 4. 4 of the crashlooping pods will be scaled down. // * The total number of pods will then be 9 and the newRS can be scaled up to 10. // // case 2: // Same example, but pushing a new pod template instead of rolling back (aka "roll over"): // * The new replica set created must start with 0 replicas because allPodsCount is already at 13. // * However, newRSPodsUnavailable would also be 0, so the 2 old replica sets could be scaled down by 5 (13 - 8 - 0), which would then // allow the new replica set to be scaled up by 5. minAvailable := deployment.Spec.Replicas - maxUnavailable newRSUnavailablePodCount := newRS.Spec.Replicas - newRSAvailablePodCount maxScaledDown := allPodsCount - minAvailable - newRSUnavailablePodCount if maxScaledDown <= 0 { return false, nil } // Clean up unhealthy replicas first, otherwise unhealthy replicas will block deployment // and cause timeout. See https://github.com/kubernetes/kubernetes/issues/16737 cleanupCount, err := dc.cleanupUnhealthyReplicas(oldRSs, deployment, maxScaledDown) if err != nil { return false, nil } // Scale down old replica sets, need check maxUnavailable to ensure we can scale down scaledDownCount, err := dc.scaleDownOldReplicaSetsForRollingUpdate(allRSs, oldRSs, deployment) if err != nil { return false, nil } totalScaledDown := cleanupCount + scaledDownCount if expectationsCheck { dc.podExpectations.ExpectDeletions(dKey, totalScaledDown) } return totalScaledDown > 0, nil }
// Update all pods for a ReplicationController (oldRc) by creating a new // controller (newRc) with 0 replicas, and synchronously scaling oldRc and // newRc until oldRc has 0 replicas and newRc has the original # of desired // replicas. Cleanup occurs based on a RollingUpdaterCleanupPolicy. // // Each interval, the updater will attempt to make progress however it can // without violating any availability constraints defined by the config. This // means the amount scaled up or down each interval will vary based on the // timeliness of readiness and the updater will always try to make progress, // even slowly. // // If an update from newRc to oldRc is already in progress, we attempt to // drive it to completion. If an error occurs at any step of the update, the // error will be returned. // // A scaling event (either up or down) is considered progress; if no progress // is made within the config.Timeout, an error is returned. // // TODO: make this handle performing a rollback of a partially completed // rollout. func (r *RollingUpdater) Update(config *RollingUpdaterConfig) error { out := config.Out oldRc := config.OldRc scaleRetryParams := NewRetryParams(config.Interval, config.Timeout) // Find an existing controller (for continuing an interrupted update) or // create a new one if necessary. sourceId := fmt.Sprintf("%s:%s", oldRc.Name, oldRc.UID) newRc, existed, err := r.getOrCreateTargetController(config.NewRc, sourceId) if err != nil { return err } if existed { fmt.Fprintf(out, "Continuing update with existing controller %s.\n", newRc.Name) } else { fmt.Fprintf(out, "Created %s\n", newRc.Name) } // Extract the desired replica count from the controller. desired, err := strconv.Atoi(newRc.Annotations[desiredReplicasAnnotation]) if err != nil { return fmt.Errorf("Unable to parse annotation for %s: %s=%s", newRc.Name, desiredReplicasAnnotation, newRc.Annotations[desiredReplicasAnnotation]) } // Extract the original replica count from the old controller, adding the // annotation if it doesn't yet exist. _, hasOriginalAnnotation := oldRc.Annotations[originalReplicasAnnotation] if !hasOriginalAnnotation { existing, err := r.c.ReplicationControllers(oldRc.Namespace).Get(oldRc.Name) if err != nil { return err } if existing.Annotations == nil { existing.Annotations = map[string]string{} } existing.Annotations[originalReplicasAnnotation] = strconv.Itoa(existing.Spec.Replicas) updated, err := r.c.ReplicationControllers(existing.Namespace).Update(existing) if err != nil { return err } oldRc = updated } original, err := strconv.Atoi(oldRc.Annotations[originalReplicasAnnotation]) if err != nil { return fmt.Errorf("Unable to parse annotation for %s: %s=%s\n", oldRc.Name, originalReplicasAnnotation, oldRc.Annotations[originalReplicasAnnotation]) } // The maximum pods which can go unavailable during the update. maxUnavailable, err := intstr.GetValueFromIntOrPercent(&config.MaxUnavailable, desired, false) if err != nil { return err } // The maximum scaling increment. maxSurge, err := intstr.GetValueFromIntOrPercent(&config.MaxSurge, desired, true) if err != nil { return err } // Validate maximums. if desired > 0 && maxUnavailable == 0 && maxSurge == 0 { return fmt.Errorf("one of maxSurge or maxUnavailable must be specified") } // The minumum pods which must remain available througout the update // calculated for internal convenience. minAvailable := integer.IntMax(0, desired-maxUnavailable) // If the desired new scale is 0, then the max unavailable is necessarily // the effective scale of the old RC regardless of the configuration // (equivalent to 100% maxUnavailable). if desired == 0 { maxUnavailable = original minAvailable = 0 } fmt.Fprintf(out, "Scaling up %s from %d to %d, scaling down %s from %d to 0 (keep %d pods available, don't exceed %d pods)\n", newRc.Name, newRc.Spec.Replicas, desired, oldRc.Name, oldRc.Spec.Replicas, minAvailable, original+maxSurge) // Scale newRc and oldRc until newRc has the desired number of replicas and // oldRc has 0 replicas. progressDeadline := time.Now().UnixNano() + config.Timeout.Nanoseconds() for newRc.Spec.Replicas != desired || oldRc.Spec.Replicas != 0 { // Store the existing replica counts for progress timeout tracking. newReplicas := newRc.Spec.Replicas oldReplicas := oldRc.Spec.Replicas // Scale up as much as possible. scaledRc, err := r.scaleUp(newRc, oldRc, original, desired, maxSurge, maxUnavailable, scaleRetryParams, config) if err != nil { return err } newRc = scaledRc // Wait between scaling operations for things to settle. time.Sleep(config.UpdatePeriod) // Scale down as much as possible. scaledRc, err = r.scaleDown(newRc, oldRc, desired, minAvailable, maxUnavailable, maxSurge, config) if err != nil { return err } oldRc = scaledRc // If we are making progress, continue to advance the progress deadline. // Otherwise, time out with an error. progressMade := (newRc.Spec.Replicas != newReplicas) || (oldRc.Spec.Replicas != oldReplicas) if progressMade { progressDeadline = time.Now().UnixNano() + config.Timeout.Nanoseconds() } else if time.Now().UnixNano() > progressDeadline { return fmt.Errorf("timed out waiting for any update progress to be made") } } // Housekeeping and cleanup policy execution. return r.cleanup(oldRc, newRc, config) }