Exemple #1
0
// NewRSNewReplicas calculates the number of replicas a deployment's new RS should have.
// When one of the followings is true, we're rolling out the deployment; otherwise, we're scaling it.
// 1) The new RS is saturated: newRS's replicas == deployment's replicas
// 2) Max number of pods allowed is reached: deployment's replicas + maxSurge == all RSs' replicas
func NewRSNewReplicas(deployment *extensions.Deployment, allRSs []*extensions.ReplicaSet, newRS *extensions.ReplicaSet) (int32, error) {
	switch deployment.Spec.Strategy.Type {
	case extensions.RollingUpdateDeploymentStrategyType:
		// Check if we can scale up.
		maxSurge, err := intstrutil.GetValueFromIntOrPercent(&deployment.Spec.Strategy.RollingUpdate.MaxSurge, int(deployment.Spec.Replicas), true)
		if err != nil {
			return 0, err
		}
		// Find the total number of pods
		currentPodCount := GetReplicaCountForReplicaSets(allRSs)
		maxTotalPods := deployment.Spec.Replicas + int32(maxSurge)
		if currentPodCount >= maxTotalPods {
			// Cannot scale up.
			return newRS.Spec.Replicas, nil
		}
		// Scale up.
		scaleUpCount := maxTotalPods - currentPodCount
		// Do not exceed the number of desired replicas.
		scaleUpCount = int32(integer.IntMin(int(scaleUpCount), int(deployment.Spec.Replicas-newRS.Spec.Replicas)))
		return newRS.Spec.Replicas + scaleUpCount, nil
	case extensions.RecreateDeploymentStrategyType:
		return deployment.Spec.Replicas, nil
	default:
		return 0, fmt.Errorf("deployment type %v isn't supported", deployment.Spec.Strategy.Type)
	}
}
func (dc *DeploymentController) reconcileNewReplicaSet(allRSs []*extensions.ReplicaSet, newRS *extensions.ReplicaSet, deployment extensions.Deployment) (bool, error) {
	if newRS.Spec.Replicas == deployment.Spec.Replicas {
		// Scaling not required.
		return false, nil
	}
	if newRS.Spec.Replicas > deployment.Spec.Replicas {
		// Scale down.
		_, err := dc.scaleReplicaSetAndRecordEvent(newRS, deployment.Spec.Replicas, deployment)
		return true, err
	}
	// Check if we can scale up.
	maxSurge, err := intstrutil.GetValueFromIntOrPercent(&deployment.Spec.Strategy.RollingUpdate.MaxSurge, deployment.Spec.Replicas)
	if err != nil {
		return false, err
	}

	// Find the total number of pods
	currentPodCount := deploymentutil.GetReplicaCountForReplicaSets(allRSs)
	maxTotalPods := deployment.Spec.Replicas + maxSurge
	if currentPodCount >= maxTotalPods {
		// Cannot scale up.
		return false, nil
	}
	// Scale up.
	scaleUpCount := maxTotalPods - currentPodCount
	// Do not exceed the number of desired replicas.
	scaleUpCount = integer.IntMin(scaleUpCount, deployment.Spec.Replicas-newRS.Spec.Replicas)
	newReplicasCount := newRS.Spec.Replicas + scaleUpCount
	_, err = dc.scaleReplicaSetAndRecordEvent(newRS, newReplicasCount, deployment)
	return true, err
}
Exemple #3
0
// ResolveFenceposts resolves both maxSurge and maxUnavailable. This needs to happen in one
// step. For example:
//
// 2 desired, max unavailable 1%, surge 0% - should scale old(-1), then new(+1), then old(-1), then new(+1)
// 1 desired, max unavailable 1%, surge 0% - should scale old(-1), then new(+1)
// 2 desired, max unavailable 25%, surge 1% - should scale new(+1), then old(-1), then new(+1), then old(-1)
// 1 desired, max unavailable 25%, surge 1% - should scale new(+1), then old(-1)
// 2 desired, max unavailable 0%, surge 1% - should scale new(+1), then old(-1), then new(+1), then old(-1)
// 1 desired, max unavailable 0%, surge 1% - should scale new(+1), then old(-1)
func ResolveFenceposts(maxSurge, maxUnavailable *intstrutil.IntOrString, desired int32) (int32, int32, error) {
	surge, err := intstrutil.GetValueFromIntOrPercent(maxSurge, int(desired), true)
	if err != nil {
		return 0, 0, err
	}
	unavailable, err := intstrutil.GetValueFromIntOrPercent(maxUnavailable, int(desired), false)
	if err != nil {
		return 0, 0, err
	}

	if surge == 0 && unavailable == 0 {
		// Validation should never allow the user to explicitly use zero values for both maxSurge
		// maxUnavailable. Due to rounding down maxUnavailable though, it may resolve to zero.
		// If both fenceposts resolve to zero, then we should set maxUnavailable to 1 on the
		// theory that surge might not work due to quota.
		unavailable = 1
	}

	return int32(surge), int32(unavailable), nil
}
// scaleDownOldReplicaSetsForRollingUpdate scales down old replica sets when deployment strategy is "RollingUpdate".
// Need check maxUnavailable to ensure availability
func (dc *DeploymentController) scaleDownOldReplicaSetsForRollingUpdate(allRSs []*extensions.ReplicaSet, oldRSs []*extensions.ReplicaSet, deployment extensions.Deployment) (int, error) {
	maxUnavailable, err := intstrutil.GetValueFromIntOrPercent(&deployment.Spec.Strategy.RollingUpdate.MaxUnavailable, deployment.Spec.Replicas)
	if err != nil {
		return 0, err
	}

	// Check if we can scale down.
	minAvailable := deployment.Spec.Replicas - maxUnavailable
	minReadySeconds := deployment.Spec.MinReadySeconds
	// Find the number of ready pods.
	readyPodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, allRSs, minReadySeconds)
	if err != nil {
		return 0, fmt.Errorf("could not find available pods: %v", err)
	}
	if readyPodCount <= minAvailable {
		// Cannot scale down.
		return 0, nil
	}

	sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs))

	totalScaledDown := 0
	totalScaleDownCount := readyPodCount - minAvailable
	for _, targetRS := range oldRSs {
		if totalScaledDown >= totalScaleDownCount {
			// No further scaling required.
			break
		}
		if targetRS.Spec.Replicas == 0 {
			// cannot scale down this ReplicaSet.
			continue
		}
		// Scale down.
		scaleDownCount := integer.IntMin(targetRS.Spec.Replicas, totalScaleDownCount-totalScaledDown)
		newReplicasCount := targetRS.Spec.Replicas - scaleDownCount
		_, _, err = dc.scaleReplicaSetAndRecordEvent(targetRS, newReplicasCount, deployment)
		if err != nil {
			return totalScaledDown, err
		}

		totalScaledDown += scaleDownCount
	}

	return totalScaledDown, nil
}
func (dc *DisruptionController) getExpectedPodCount(pdb *policy.PodDisruptionBudget, pods []*api.Pod) (expectedCount, desiredHealthy int32, err error) {
	err = nil
	// TODO(davidopp): consider making the way expectedCount and rules about
	// permitted controller configurations (specifically, considering it an error
	// if a pod covered by a PDB has 0 controllers or > 1 controller) should be
	// handled the same way for integer and percentage minAvailable
	if pdb.Spec.MinAvailable.Type == intstr.Int {
		desiredHealthy = pdb.Spec.MinAvailable.IntVal
		expectedCount = int32(len(pods))
	} else if pdb.Spec.MinAvailable.Type == intstr.String {
		// When the user specifies a fraction of pods that must be available, we
		// use as the fraction's denominator
		// SUM_{all c in C} scale(c)
		// where C is the union of C_p1, C_p2, ..., C_pN
		// and each C_pi is the set of controllers controlling the pod pi

		// k8s only defines what will happens when 0 or 1 controllers control a
		// given pod.  We explicitly exclude the 0 controllers case here, and we
		// report an error if we find a pod with more than 1 controller.  Thus in
		// practice each C_pi is a set of exactly 1 controller.

		// A mapping from controllers to their scale.
		controllerScale := map[types.UID]int32{}

		// 1. Find the controller(s) for each pod.  If any pod has 0 controllers,
		// that's an error.  If any pod has more than 1 controller, that's also an
		// error.
		for _, pod := range pods {
			controllerCount := 0
			for _, finder := range dc.finders() {
				var controllers []controllerAndScale
				controllers, err = finder(pod)
				if err != nil {
					return
				}
				for _, controller := range controllers {
					controllerScale[controller.UID] = controller.scale
					controllerCount++
				}
			}
			if controllerCount == 0 {
				err = fmt.Errorf("asked for percentage, but found no controllers for pod %q", pod.Name)
				dc.recorder.Event(pdb, api.EventTypeWarning, "NoControllers", err.Error())
				return
			} else if controllerCount > 1 {
				err = fmt.Errorf("pod %q has %v>1 controllers", pod.Name, controllerCount)
				dc.recorder.Event(pdb, api.EventTypeWarning, "TooManyControllers", err.Error())
				return
			}
		}

		// 2. Add up all the controllers.
		expectedCount = 0
		for _, count := range controllerScale {
			expectedCount += count
		}

		// 3. Do the math.
		var dh int
		dh, err = intstr.GetValueFromIntOrPercent(&pdb.Spec.MinAvailable, int(expectedCount), true)
		if err != nil {
			return
		}
		desiredHealthy = int32(dh)
	}

	return
}
// Set expectationsCheck to false to bypass expectations check when testing
func (dc *DeploymentController) reconcileOldReplicaSets(allRSs []*extensions.ReplicaSet, oldRSs []*extensions.ReplicaSet, newRS *extensions.ReplicaSet, deployment extensions.Deployment, expectationsCheck bool) (bool, error) {
	oldPodsCount := deploymentutil.GetReplicaCountForReplicaSets(oldRSs)
	if oldPodsCount == 0 {
		// Can't scale down further
		return false, nil
	}

	// Check the expectations of deployment before reconciling
	dKey, err := controller.KeyFunc(&deployment)
	if err != nil {
		return false, fmt.Errorf("Couldn't get key for deployment %#v: %v", deployment, err)
	}
	if expectationsCheck && !dc.podExpectations.SatisfiedExpectations(dKey) {
		glog.V(4).Infof("Pod expectations not met yet before reconciling old replica sets\n")
		return false, nil
	}

	minReadySeconds := deployment.Spec.MinReadySeconds
	allPodsCount := deploymentutil.GetReplicaCountForReplicaSets(allRSs)
	newRSAvailablePodCount, err := deploymentutil.GetAvailablePodsForReplicaSets(dc.client, []*extensions.ReplicaSet{newRS}, minReadySeconds)
	if err != nil {
		return false, fmt.Errorf("could not find available pods: %v", err)
	}

	maxUnavailable, err := intstrutil.GetValueFromIntOrPercent(&deployment.Spec.Strategy.RollingUpdate.MaxUnavailable, deployment.Spec.Replicas)
	if err != nil {
		return false, err
	}

	// Check if we can scale down. We can scale down in the following 2 cases:
	// * Some old replica sets have unhealthy replicas, we could safely scale down those unhealthy replicas since that won't further
	//  increase unavailability.
	// * New replica set has scaled up and it's replicas becomes ready, then we can scale down old replica sets in a further step.
	//
	// maxScaledDown := allPodsCount - minAvailable - newReplicaSetPodsUnavailable
	// take into account not only maxUnavailable and any surge pods that have been created, but also unavailable pods from
	// the newRS, so that the unavailable pods from the newRS would not make us scale down old replica sets in a further
	// step(that will increase unavailability).
	//
	// Concrete example:
	//
	// * 10 replicas
	// * 2 maxUnavailable (absolute number, not percent)
	// * 3 maxSurge (absolute number, not percent)
	//
	// case 1:
	// * Deployment is updated, newRS is created with 3 replicas, oldRS is scaled down to 8, and newRS is scaled up to 5.
	// * The new replica set pods crashloop and never become available.
	// * allPodsCount is 13. minAvailable is 8. newRSPodsUnavailable is 5.
	// * A node fails and causes one of the oldRS pods to become unavailable. However, 13 - 8 - 5 = 0, so the oldRS won't be scaled down.
	// * The user notices the crashloop and does kubectl rollout undo to rollback.
	// * newRSPodsUnavailable is 1, since we rolled back to the good replica set, so maxScaledDown = 13 - 8 - 1 = 4. 4 of the crashlooping pods will be scaled down.
	// * The total number of pods will then be 9 and the newRS can be scaled up to 10.
	//
	// case 2:
	// Same example, but pushing a new pod template instead of rolling back (aka "roll over"):
	// * The new replica set created must start with 0 replicas because allPodsCount is already at 13.
	// * However, newRSPodsUnavailable would also be 0, so the 2 old replica sets could be scaled down by 5 (13 - 8 - 0), which would then
	// allow the new replica set to be scaled up by 5.
	minAvailable := deployment.Spec.Replicas - maxUnavailable
	newRSUnavailablePodCount := newRS.Spec.Replicas - newRSAvailablePodCount
	maxScaledDown := allPodsCount - minAvailable - newRSUnavailablePodCount
	if maxScaledDown <= 0 {
		return false, nil
	}

	// Clean up unhealthy replicas first, otherwise unhealthy replicas will block deployment
	// and cause timeout. See https://github.com/kubernetes/kubernetes/issues/16737
	cleanupCount, err := dc.cleanupUnhealthyReplicas(oldRSs, deployment, maxScaledDown)
	if err != nil {
		return false, nil
	}

	// Scale down old replica sets, need check maxUnavailable to ensure we can scale down
	scaledDownCount, err := dc.scaleDownOldReplicaSetsForRollingUpdate(allRSs, oldRSs, deployment)
	if err != nil {
		return false, nil
	}

	totalScaledDown := cleanupCount + scaledDownCount
	if expectationsCheck {
		dc.podExpectations.ExpectDeletions(dKey, totalScaledDown)
	}

	return totalScaledDown > 0, nil
}
// Update all pods for a ReplicationController (oldRc) by creating a new
// controller (newRc) with 0 replicas, and synchronously scaling oldRc and
// newRc until oldRc has 0 replicas and newRc has the original # of desired
// replicas. Cleanup occurs based on a RollingUpdaterCleanupPolicy.
//
// Each interval, the updater will attempt to make progress however it can
// without violating any availability constraints defined by the config. This
// means the amount scaled up or down each interval will vary based on the
// timeliness of readiness and the updater will always try to make progress,
// even slowly.
//
// If an update from newRc to oldRc is already in progress, we attempt to
// drive it to completion. If an error occurs at any step of the update, the
// error will be returned.
//
// A scaling event (either up or down) is considered progress; if no progress
// is made within the config.Timeout, an error is returned.
//
// TODO: make this handle performing a rollback of a partially completed
// rollout.
func (r *RollingUpdater) Update(config *RollingUpdaterConfig) error {
	out := config.Out
	oldRc := config.OldRc
	scaleRetryParams := NewRetryParams(config.Interval, config.Timeout)

	// Find an existing controller (for continuing an interrupted update) or
	// create a new one if necessary.
	sourceId := fmt.Sprintf("%s:%s", oldRc.Name, oldRc.UID)
	newRc, existed, err := r.getOrCreateTargetController(config.NewRc, sourceId)
	if err != nil {
		return err
	}
	if existed {
		fmt.Fprintf(out, "Continuing update with existing controller %s.\n", newRc.Name)
	} else {
		fmt.Fprintf(out, "Created %s\n", newRc.Name)
	}
	// Extract the desired replica count from the controller.
	desired, err := strconv.Atoi(newRc.Annotations[desiredReplicasAnnotation])
	if err != nil {
		return fmt.Errorf("Unable to parse annotation for %s: %s=%s",
			newRc.Name, desiredReplicasAnnotation, newRc.Annotations[desiredReplicasAnnotation])
	}
	// Extract the original replica count from the old controller, adding the
	// annotation if it doesn't yet exist.
	_, hasOriginalAnnotation := oldRc.Annotations[originalReplicasAnnotation]
	if !hasOriginalAnnotation {
		existing, err := r.c.ReplicationControllers(oldRc.Namespace).Get(oldRc.Name)
		if err != nil {
			return err
		}
		if existing.Annotations == nil {
			existing.Annotations = map[string]string{}
		}
		existing.Annotations[originalReplicasAnnotation] = strconv.Itoa(existing.Spec.Replicas)
		updated, err := r.c.ReplicationControllers(existing.Namespace).Update(existing)
		if err != nil {
			return err
		}
		oldRc = updated
	}
	original, err := strconv.Atoi(oldRc.Annotations[originalReplicasAnnotation])
	if err != nil {
		return fmt.Errorf("Unable to parse annotation for %s: %s=%s\n",
			oldRc.Name, originalReplicasAnnotation, oldRc.Annotations[originalReplicasAnnotation])
	}
	// The maximum pods which can go unavailable during the update.
	maxUnavailable, err := intstr.GetValueFromIntOrPercent(&config.MaxUnavailable, desired, false)
	if err != nil {
		return err
	}
	// The maximum scaling increment.
	maxSurge, err := intstr.GetValueFromIntOrPercent(&config.MaxSurge, desired, true)
	if err != nil {
		return err
	}
	// Validate maximums.
	if desired > 0 && maxUnavailable == 0 && maxSurge == 0 {
		return fmt.Errorf("one of maxSurge or maxUnavailable must be specified")
	}
	// The minumum pods which must remain available througout the update
	// calculated for internal convenience.
	minAvailable := integer.IntMax(0, desired-maxUnavailable)
	// If the desired new scale is 0, then the max unavailable is necessarily
	// the effective scale of the old RC regardless of the configuration
	// (equivalent to 100% maxUnavailable).
	if desired == 0 {
		maxUnavailable = original
		minAvailable = 0
	}

	fmt.Fprintf(out, "Scaling up %s from %d to %d, scaling down %s from %d to 0 (keep %d pods available, don't exceed %d pods)\n",
		newRc.Name, newRc.Spec.Replicas, desired, oldRc.Name, oldRc.Spec.Replicas, minAvailable, original+maxSurge)

	// Scale newRc and oldRc until newRc has the desired number of replicas and
	// oldRc has 0 replicas.
	progressDeadline := time.Now().UnixNano() + config.Timeout.Nanoseconds()
	for newRc.Spec.Replicas != desired || oldRc.Spec.Replicas != 0 {
		// Store the existing replica counts for progress timeout tracking.
		newReplicas := newRc.Spec.Replicas
		oldReplicas := oldRc.Spec.Replicas

		// Scale up as much as possible.
		scaledRc, err := r.scaleUp(newRc, oldRc, original, desired, maxSurge, maxUnavailable, scaleRetryParams, config)
		if err != nil {
			return err
		}
		newRc = scaledRc

		// Wait between scaling operations for things to settle.
		time.Sleep(config.UpdatePeriod)

		// Scale down as much as possible.
		scaledRc, err = r.scaleDown(newRc, oldRc, desired, minAvailable, maxUnavailable, maxSurge, config)
		if err != nil {
			return err
		}
		oldRc = scaledRc

		// If we are making progress, continue to advance the progress deadline.
		// Otherwise, time out with an error.
		progressMade := (newRc.Spec.Replicas != newReplicas) || (oldRc.Spec.Replicas != oldReplicas)
		if progressMade {
			progressDeadline = time.Now().UnixNano() + config.Timeout.Nanoseconds()
		} else if time.Now().UnixNano() > progressDeadline {
			return fmt.Errorf("timed out waiting for any update progress to be made")
		}
	}

	// Housekeeping and cleanup policy execution.
	return r.cleanup(oldRc, newRc, config)
}