// scaleDownOldReplicaSetsForRollingUpdate scales down old replica sets when deployment strategy is "RollingUpdate". // Need check maxUnavailable to ensure availability func (dc *DeploymentController) scaleDownOldReplicaSetsForRollingUpdate(allRSs []*extensions.ReplicaSet, oldRSs []*extensions.ReplicaSet, deployment *extensions.Deployment) (int32, error) { maxUnavailable := deploymentutil.MaxUnavailable(*deployment) // Check if we can scale down. minAvailable := deployment.Spec.Replicas - maxUnavailable minReadySeconds := deployment.Spec.MinReadySeconds // Find the number of ready pods. // TODO: use dc.getAvailablePodsForReplicaSets instead availablePodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, deployment, allRSs, minReadySeconds) if err != nil { return 0, fmt.Errorf("could not find available pods: %v", err) } if availablePodCount <= minAvailable { // Cannot scale down. return 0, nil } glog.V(4).Infof("Found %d available pods in deployment %s, scaling down old RSes", availablePodCount, deployment.Name) sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs)) totalScaledDown := int32(0) totalScaleDownCount := availablePodCount - minAvailable for _, targetRS := range oldRSs { if totalScaledDown >= totalScaleDownCount { // No further scaling required. break } if targetRS.Spec.Replicas == 0 { // cannot scale down this ReplicaSet. continue } // Scale down. scaleDownCount := int32(integer.IntMin(int(targetRS.Spec.Replicas), int(totalScaleDownCount-totalScaledDown))) newReplicasCount := targetRS.Spec.Replicas - scaleDownCount if newReplicasCount > targetRS.Spec.Replicas { return 0, fmt.Errorf("when scaling down old RS, got invalid request to scale down %s/%s %d -> %d", targetRS.Namespace, targetRS.Name, targetRS.Spec.Replicas, newReplicasCount) } _, _, err = dc.scaleReplicaSetAndRecordEvent(targetRS, newReplicasCount, deployment) if err != nil { return totalScaledDown, err } totalScaledDown += scaleDownCount } return totalScaledDown, nil }
// cleanupUnhealthyReplicas will scale down old replica sets with unhealthy replicas, so that all unhealthy replicas will be deleted. func (dc *DeploymentController) cleanupUnhealthyReplicas(oldRSs []*extensions.ReplicaSet, deployment *extensions.Deployment, minReadySeconds, maxCleanupCount int32) ([]*extensions.ReplicaSet, int32, error) { sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs)) // Safely scale down all old replica sets with unhealthy replicas. Replica set will sort the pods in the order // such that not-ready < ready, unscheduled < scheduled, and pending < running. This ensures that unhealthy replicas will // been deleted first and won't increase unavailability. totalScaledDown := int32(0) for i, targetRS := range oldRSs { if totalScaledDown >= maxCleanupCount { break } if targetRS.Spec.Replicas == 0 { // cannot scale down this replica set. continue } // TODO: use dc.getAvailablePodsForReplicaSets instead availablePodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, deployment, []*extensions.ReplicaSet{targetRS}, minReadySeconds) if err != nil { return nil, totalScaledDown, fmt.Errorf("could not find available pods: %v", err) } glog.V(4).Infof("Found %d available pods in old RS %s/%s", availablePodCount, targetRS.Namespace, targetRS.Name) if targetRS.Spec.Replicas == availablePodCount { // no unhealthy replicas found, no scaling required. continue } scaledDownCount := int32(integer.IntMin(int(maxCleanupCount-totalScaledDown), int(targetRS.Spec.Replicas-availablePodCount))) newReplicasCount := targetRS.Spec.Replicas - scaledDownCount if newReplicasCount > targetRS.Spec.Replicas { return nil, 0, fmt.Errorf("when cleaning up unhealthy replicas, got invalid request to scale down %s/%s %d -> %d", targetRS.Namespace, targetRS.Name, targetRS.Spec.Replicas, newReplicasCount) } _, updatedOldRS, err := dc.scaleReplicaSetAndRecordEvent(targetRS, newReplicasCount, deployment) if err != nil { return nil, totalScaledDown, err } totalScaledDown += scaledDownCount oldRSs[i] = updatedOldRS } return oldRSs, totalScaledDown, nil }
func (dc *DeploymentController) reconcileOldReplicaSets(allRSs []*extensions.ReplicaSet, oldRSs []*extensions.ReplicaSet, newRS *extensions.ReplicaSet, deployment *extensions.Deployment) (bool, error) { oldPodsCount := deploymentutil.GetReplicaCountForReplicaSets(oldRSs) if oldPodsCount == 0 { // Can't scale down further return false, nil } minReadySeconds := deployment.Spec.MinReadySeconds allPodsCount := deploymentutil.GetReplicaCountForReplicaSets(allRSs) // TODO: use dc.getAvailablePodsForReplicaSets instead newRSAvailablePodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, deployment, []*extensions.ReplicaSet{newRS}, minReadySeconds) if err != nil { return false, fmt.Errorf("could not find available pods: %v", err) } glog.V(4).Infof("New RS %s/%s has %d available pods.", newRS.Namespace, newRS.Name, newRSAvailablePodCount) maxUnavailable := deploymentutil.MaxUnavailable(*deployment) // Check if we can scale down. We can scale down in the following 2 cases: // * Some old replica sets have unhealthy replicas, we could safely scale down those unhealthy replicas since that won't further // increase unavailability. // * New replica set has scaled up and it's replicas becomes ready, then we can scale down old replica sets in a further step. // // maxScaledDown := allPodsCount - minAvailable - newReplicaSetPodsUnavailable // take into account not only maxUnavailable and any surge pods that have been created, but also unavailable pods from // the newRS, so that the unavailable pods from the newRS would not make us scale down old replica sets in a further // step(that will increase unavailability). // // Concrete example: // // * 10 replicas // * 2 maxUnavailable (absolute number, not percent) // * 3 maxSurge (absolute number, not percent) // // case 1: // * Deployment is updated, newRS is created with 3 replicas, oldRS is scaled down to 8, and newRS is scaled up to 5. // * The new replica set pods crashloop and never become available. // * allPodsCount is 13. minAvailable is 8. newRSPodsUnavailable is 5. // * A node fails and causes one of the oldRS pods to become unavailable. However, 13 - 8 - 5 = 0, so the oldRS won't be scaled down. // * The user notices the crashloop and does kubectl rollout undo to rollback. // * newRSPodsUnavailable is 1, since we rolled back to the good replica set, so maxScaledDown = 13 - 8 - 1 = 4. 4 of the crashlooping pods will be scaled down. // * The total number of pods will then be 9 and the newRS can be scaled up to 10. // // case 2: // Same example, but pushing a new pod template instead of rolling back (aka "roll over"): // * The new replica set created must start with 0 replicas because allPodsCount is already at 13. // * However, newRSPodsUnavailable would also be 0, so the 2 old replica sets could be scaled down by 5 (13 - 8 - 0), which would then // allow the new replica set to be scaled up by 5. minAvailable := deployment.Spec.Replicas - maxUnavailable newRSUnavailablePodCount := newRS.Spec.Replicas - newRSAvailablePodCount maxScaledDown := allPodsCount - minAvailable - newRSUnavailablePodCount if maxScaledDown <= 0 { return false, nil } // Clean up unhealthy replicas first, otherwise unhealthy replicas will block deployment // and cause timeout. See https://github.com/kubernetes/kubernetes/issues/16737 oldRSs, cleanupCount, err := dc.cleanupUnhealthyReplicas(oldRSs, deployment, deployment.Spec.MinReadySeconds, maxScaledDown) if err != nil { return false, nil } glog.V(4).Infof("Cleaned up unhealthy replicas from old RSes by %d", cleanupCount) // Scale down old replica sets, need check maxUnavailable to ensure we can scale down allRSs = append(oldRSs, newRS) scaledDownCount, err := dc.scaleDownOldReplicaSetsForRollingUpdate(allRSs, oldRSs, deployment) if err != nil { return false, nil } glog.V(4).Infof("Scaled down old RSes of deployment %s by %d", deployment.Name, scaledDownCount) totalScaledDown := cleanupCount + scaledDownCount return totalScaledDown > 0, nil }