// cleanupUnhealthyReplicas will scale down old replica sets with unhealthy replicas, so that all unhealthy replicas will be deleted. func (dc *DeploymentController) cleanupUnhealthyReplicas(oldRSs []*extensions.ReplicaSet, deployment extensions.Deployment, maxCleanupCount int) (int, error) { sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs)) // Safely scale down all old replica sets with unhealthy replicas. Replica set will sort the pods in the order // such that not-ready < ready, unscheduled < scheduled, and pending < running. This ensures that unhealthy replicas will // been deleted first and won't increase unavailability. totalScaledDown := 0 for _, targetRS := range oldRSs { if totalScaledDown >= maxCleanupCount { break } if targetRS.Spec.Replicas == 0 { // cannot scale down this replica set. continue } readyPodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, []*extensions.ReplicaSet{targetRS}, 0) if err != nil { return totalScaledDown, fmt.Errorf("could not find available pods: %v", err) } if targetRS.Spec.Replicas == readyPodCount { // no unhealthy replicas found, no scaling required. continue } scaledDownCount := integer.IntMin(maxCleanupCount-totalScaledDown, targetRS.Spec.Replicas-readyPodCount) newReplicasCount := targetRS.Spec.Replicas - scaledDownCount _, _, err = dc.scaleReplicaSetAndRecordEvent(targetRS, newReplicasCount, deployment) if err != nil { return totalScaledDown, err } totalScaledDown += scaledDownCount } return totalScaledDown, nil }
// getDeploymentForPod returns the deployment managing the given Pod. func (dc *DeploymentController) getDeploymentForPod(pod *v1.Pod) *extensions.Deployment { // Find the owning replica set var rs *extensions.ReplicaSet var err error // Look at the owner reference controllerRef := controller.GetControllerOf(&pod.ObjectMeta) if controllerRef != nil { // Not a pod owned by a replica set. if controllerRef.Kind != extensions.SchemeGroupVersion.WithKind("ReplicaSet").Kind { return nil } rs, err = dc.rsLister.ReplicaSets(pod.Namespace).Get(controllerRef.Name) if err != nil { glog.V(4).Infof("Cannot get replicaset %q for pod %q: %v", controllerRef.Name, pod.Name, err) return nil } } else { // Fallback to listing replica sets. rss, err := dc.rsLister.GetPodReplicaSets(pod) if err != nil { glog.V(4).Infof("Cannot list replica sets for pod %q: %v", pod.Name, err) return nil } // TODO: Handle multiple replica sets gracefully // For now we return the oldest replica set. if len(rss) > 1 { utilruntime.HandleError(fmt.Errorf("more than one ReplicaSet is selecting pod %q with labels: %+v", pod.Name, pod.Labels)) sort.Sort(controller.ReplicaSetsByCreationTimestamp(rss)) } rs = rss[0] } return dc.getDeploymentForReplicaSet(rs) }
// cleanupDeployment is responsible for cleaning up a deployment ie. retains all but the latest N old replica sets // where N=d.Spec.RevisionHistoryLimit. Old replica sets are older versions of the podtemplate of a deployment kept // around by default 1) for historical reasons and 2) for the ability to rollback a deployment. func (dc *DeploymentController) cleanupDeployment(oldRSs []*extensions.ReplicaSet, deployment *extensions.Deployment) error { if deployment.Spec.RevisionHistoryLimit == nil { return nil } diff := int32(len(oldRSs)) - *deployment.Spec.RevisionHistoryLimit if diff <= 0 { return nil } sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs)) var errList []error // TODO: This should be parallelized. for i := int32(0); i < diff; i++ { rs := oldRSs[i] // Avoid delete replica set with non-zero replica counts if rs.Status.Replicas != 0 || *(rs.Spec.Replicas) != 0 || rs.Generation > rs.Status.ObservedGeneration { continue } if err := dc.client.Extensions().ReplicaSets(rs.Namespace).Delete(rs.Name, nil); err != nil && !errors.IsNotFound(err) { glog.V(2).Infof("Failed deleting old replica set %v for deployment %v: %v", rs.Name, deployment.Name, err) errList = append(errList, err) } } return utilerrors.NewAggregate(errList) }
// cleanupUnhealthyReplicas will scale down old replica sets with unhealthy replicas, so that all unhealthy replicas will be deleted. func (dc *DeploymentController) cleanupUnhealthyReplicas(oldRSs []*extensions.ReplicaSet, deployment *extensions.Deployment, maxCleanupCount int32) ([]*extensions.ReplicaSet, int32, error) { sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs)) // Safely scale down all old replica sets with unhealthy replicas. Replica set will sort the pods in the order // such that not-ready < ready, unscheduled < scheduled, and pending < running. This ensures that unhealthy replicas will // been deleted first and won't increase unavailability. totalScaledDown := int32(0) for i, targetRS := range oldRSs { if totalScaledDown >= maxCleanupCount { break } if *(targetRS.Spec.Replicas) == 0 { // cannot scale down this replica set. continue } glog.V(4).Infof("Found %d available pods in old RS %s/%s", targetRS.Status.AvailableReplicas, targetRS.Namespace, targetRS.Name) if *(targetRS.Spec.Replicas) == targetRS.Status.AvailableReplicas { // no unhealthy replicas found, no scaling required. continue } scaledDownCount := int32(integer.IntMin(int(maxCleanupCount-totalScaledDown), int(*(targetRS.Spec.Replicas)-targetRS.Status.AvailableReplicas))) newReplicasCount := *(targetRS.Spec.Replicas) - scaledDownCount if newReplicasCount > *(targetRS.Spec.Replicas) { return nil, 0, fmt.Errorf("when cleaning up unhealthy replicas, got invalid request to scale down %s/%s %d -> %d", targetRS.Namespace, targetRS.Name, *(targetRS.Spec.Replicas), newReplicasCount) } _, updatedOldRS, err := dc.scaleReplicaSetAndRecordEvent(targetRS, newReplicasCount, deployment) if err != nil { return nil, totalScaledDown, err } totalScaledDown += scaledDownCount oldRSs[i] = updatedOldRS } return oldRSs, totalScaledDown, nil }
// scaleDownOldReplicaSetsForRollingUpdate scales down old replica sets when deployment strategy is "RollingUpdate". // Need check maxUnavailable to ensure availability func (dc *DeploymentController) scaleDownOldReplicaSetsForRollingUpdate(allRSs []*extensions.ReplicaSet, oldRSs []*extensions.ReplicaSet, deployment extensions.Deployment) (int, error) { maxUnavailable, isPercent, err := util.GetIntOrPercentValue(&deployment.Spec.Strategy.RollingUpdate.MaxUnavailable) if err != nil { return 0, fmt.Errorf("invalid value for MaxUnavailable: %v", err) } if isPercent { maxUnavailable = util.GetValueFromPercent(maxUnavailable, deployment.Spec.Replicas) } // Check if we can scale down. minAvailable := deployment.Spec.Replicas - maxUnavailable minReadySeconds := deployment.Spec.MinReadySeconds // Find the number of ready pods. readyPodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, allRSs, minReadySeconds) if err != nil { return 0, fmt.Errorf("could not find available pods: %v", err) } if readyPodCount <= minAvailable { // Cannot scale down. return 0, nil } sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs)) totalScaledDown := 0 totalScaleDownCount := readyPodCount - minAvailable for _, targetRS := range oldRSs { if totalScaledDown >= totalScaleDownCount { // No further scaling required. break } if targetRS.Spec.Replicas == 0 { // cannot scale down this ReplicaSet. continue } // Scale down. scaleDownCount := int(math.Min(float64(targetRS.Spec.Replicas), float64(totalScaleDownCount-totalScaledDown))) newReplicasCount := targetRS.Spec.Replicas - scaleDownCount _, err = dc.scaleReplicaSetAndRecordEvent(targetRS, newReplicasCount, deployment) if err != nil { return totalScaledDown, err } totalScaledDown += scaleDownCount } return totalScaledDown, nil }
// scaleDownOldReplicaSetsForRollingUpdate scales down old replica sets when deployment strategy is "RollingUpdate". // Need check maxUnavailable to ensure availability func (dc *DeploymentController) scaleDownOldReplicaSetsForRollingUpdate(allRSs []*extensions.ReplicaSet, oldRSs []*extensions.ReplicaSet, deployment *extensions.Deployment) (int32, error) { maxUnavailable := deploymentutil.MaxUnavailable(*deployment) // Check if we can scale down. minAvailable := deployment.Spec.Replicas - maxUnavailable minReadySeconds := deployment.Spec.MinReadySeconds // Find the number of ready pods. // TODO: use dc.getAvailablePodsForReplicaSets instead availablePodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, deployment, allRSs, minReadySeconds) if err != nil { return 0, fmt.Errorf("could not find available pods: %v", err) } if availablePodCount <= minAvailable { // Cannot scale down. return 0, nil } glog.V(4).Infof("Found %d available pods in deployment %s, scaling down old RSes", availablePodCount, deployment.Name) sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs)) totalScaledDown := int32(0) totalScaleDownCount := availablePodCount - minAvailable for _, targetRS := range oldRSs { if totalScaledDown >= totalScaleDownCount { // No further scaling required. break } if targetRS.Spec.Replicas == 0 { // cannot scale down this ReplicaSet. continue } // Scale down. scaleDownCount := int32(integer.IntMin(int(targetRS.Spec.Replicas), int(totalScaleDownCount-totalScaledDown))) newReplicasCount := targetRS.Spec.Replicas - scaleDownCount if newReplicasCount > targetRS.Spec.Replicas { return 0, fmt.Errorf("when scaling down old RS, got invalid request to scale down %s/%s %d -> %d", targetRS.Namespace, targetRS.Name, targetRS.Spec.Replicas, newReplicasCount) } _, _, err = dc.scaleReplicaSetAndRecordEvent(targetRS, newReplicasCount, deployment) if err != nil { return totalScaledDown, err } totalScaledDown += scaleDownCount } return totalScaledDown, nil }
// FindActiveOrLatest returns the only active or the latest replica set in case there is at most one active // replica set. If there are more active replica sets, then we should proportionally scale them. func FindActiveOrLatest(newRS *extensions.ReplicaSet, oldRSs []*extensions.ReplicaSet) *extensions.ReplicaSet { if newRS == nil && len(oldRSs) == 0 { return nil } sort.Sort(sort.Reverse(controller.ReplicaSetsByCreationTimestamp(oldRSs))) allRSs := controller.FilterActiveReplicaSets(append(oldRSs, newRS)) switch len(allRSs) { case 0: // If there is no active replica set then we should return the newest. if newRS != nil { return newRS } return oldRSs[0] case 1: return allRSs[0] default: return nil } }
// getPodReplicaSet returns the replica set managing the given pod. // TODO: Surface that we are ignoring multiple replica sets for a single pod. // TODO: use ownerReference.Controller to determine if the rs controls the pod. func (rsc *ReplicaSetController) getPodReplicaSet(pod *v1.Pod) *extensions.ReplicaSet { // look up in the cache, if cached and the cache is valid, just return cached value if obj, cached := rsc.lookupCache.GetMatchingObject(pod); cached { rs, ok := obj.(*extensions.ReplicaSet) if !ok { // This should not happen utilruntime.HandleError(fmt.Errorf("lookup cache does not return a ReplicaSet object")) return nil } if cached && rsc.isCacheValid(pod, rs) { return rs } } // if not cached or cached value is invalid, search all the rs to find the matching one, and update cache rss, err := rsc.rsLister.GetPodReplicaSets(pod) if err != nil { glog.V(4).Infof("No ReplicaSets found for pod %v, ReplicaSet controller will avoid syncing", pod.Name) return nil } // In theory, overlapping ReplicaSets is user error. This sorting will not prevent // oscillation of replicas in all cases, eg: // rs1 (older rs): [(k1=v1)], replicas=1 rs2: [(k2=v2)], replicas=2 // pod: [(k1:v1), (k2:v2)] will wake both rs1 and rs2, and we will sync rs1. // pod: [(k2:v2)] will wake rs2 which creates a new replica. if len(rss) > 1 { // More than two items in this list indicates user error. If two replicasets // overlap, sort by creation timestamp, subsort by name, then pick // the first. utilruntime.HandleError(fmt.Errorf("user error! more than one ReplicaSet is selecting pods with labels: %+v", pod.Labels)) sort.Sort(controller.ReplicaSetsByCreationTimestamp(rss)) } // update lookup cache rsc.lookupCache.Update(pod, rss[0]) return rss[0] }
// cleanupUnhealthyReplicas will scale down old replica sets with unhealthy replicas, so that all unhealthy replicas will be deleted. func (dc *DeploymentController) cleanupUnhealthyReplicas(oldRSs []*extensions.ReplicaSet, deployment *extensions.Deployment, minReadySeconds, maxCleanupCount int32) ([]*extensions.ReplicaSet, int32, error) { sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs)) // Safely scale down all old replica sets with unhealthy replicas. Replica set will sort the pods in the order // such that not-ready < ready, unscheduled < scheduled, and pending < running. This ensures that unhealthy replicas will // been deleted first and won't increase unavailability. totalScaledDown := int32(0) for i, targetRS := range oldRSs { if totalScaledDown >= maxCleanupCount { break } if targetRS.Spec.Replicas == 0 { // cannot scale down this replica set. continue } // TODO: use dc.getAvailablePodsForReplicaSets instead availablePodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, deployment, []*extensions.ReplicaSet{targetRS}, minReadySeconds) if err != nil { return nil, totalScaledDown, fmt.Errorf("could not find available pods: %v", err) } if targetRS.Spec.Replicas == availablePodCount { // no unhealthy replicas found, no scaling required. continue } scaledDownCount := int32(integer.IntMin(int(maxCleanupCount-totalScaledDown), int(targetRS.Spec.Replicas-availablePodCount))) newReplicasCount := targetRS.Spec.Replicas - scaledDownCount if newReplicasCount > targetRS.Spec.Replicas { return nil, 0, fmt.Errorf("when cleaning up unhealthy replicas, got invalid request to scale down %s/%s %d -> %d", targetRS.Namespace, targetRS.Name, targetRS.Spec.Replicas, newReplicasCount) } _, updatedOldRS, err := dc.scaleReplicaSetAndRecordEvent(targetRS, newReplicasCount, deployment) if err != nil { return nil, totalScaledDown, err } totalScaledDown += scaledDownCount oldRSs[i] = updatedOldRS } return oldRSs, totalScaledDown, nil }