// checkPausedConditions checks if the given deployment is paused or not and adds an appropriate condition. // These conditions are needed so that we won't accidentally report lack of progress for resumed deployments // that were paused for longer than progressDeadlineSeconds. func (dc *DeploymentController) checkPausedConditions(d *extensions.Deployment) error { if d.Spec.ProgressDeadlineSeconds == nil { return nil } cond := deploymentutil.GetDeploymentCondition(d.Status, extensions.DeploymentProgressing) if cond != nil && cond.Reason == deploymentutil.TimedOutReason { // If we have reported lack of progress, do not overwrite it with a paused condition. return nil } pausedCondExists := cond != nil && cond.Reason == deploymentutil.PausedDeployReason needsUpdate := false if d.Spec.Paused && !pausedCondExists { condition := deploymentutil.NewDeploymentCondition(extensions.DeploymentProgressing, v1.ConditionUnknown, deploymentutil.PausedDeployReason, "Deployment is paused") deploymentutil.SetDeploymentCondition(&d.Status, *condition) needsUpdate = true } else if !d.Spec.Paused && pausedCondExists { condition := deploymentutil.NewDeploymentCondition(extensions.DeploymentProgressing, v1.ConditionUnknown, deploymentutil.ResumedDeployReason, "Deployment is resumed") deploymentutil.SetDeploymentCondition(&d.Status, *condition) needsUpdate = true } if !needsUpdate { return nil } var err error d, err = dc.client.Extensions().Deployments(d.Namespace).UpdateStatus(d) return err }
// Status returns a message describing deployment status, and a bool value indicating if the status is considered done func (s *DeploymentStatusViewer) Status(namespace, name string, revision int64) (string, bool, error) { deployment, err := s.c.Deployments(namespace).Get(name) if err != nil { return "", false, err } if revision > 0 { deploymentRev, err := util.Revision(deployment) if err != nil { return "", false, fmt.Errorf("cannot get the revision of deployment %q: %v", deployment.Name, err) } if revision != deploymentRev { return "", false, fmt.Errorf("desired revision (%d) is different from the running revision (%d)", revision, deploymentRev) } } if deployment.Generation <= deployment.Status.ObservedGeneration { cond := util.GetDeploymentCondition(deployment.Status, extensions.DeploymentProgressing) if cond != nil && cond.Reason == util.TimedOutReason { return "", false, fmt.Errorf("deployment %q exceeded its progress deadline", name) } if deployment.Status.UpdatedReplicas < deployment.Spec.Replicas { return fmt.Sprintf("Waiting for rollout to finish: %d out of %d new replicas have been updated...\n", deployment.Status.UpdatedReplicas, deployment.Spec.Replicas), false, nil } if deployment.Status.Replicas > deployment.Status.UpdatedReplicas { return fmt.Sprintf("Waiting for rollout to finish: %d old replicas are pending termination...\n", deployment.Status.Replicas-deployment.Status.UpdatedReplicas), false, nil } if deployment.Status.AvailableReplicas < deployment.Status.UpdatedReplicas { return fmt.Sprintf("Waiting for rollout to finish: %d of %d updated replicas are available...\n", deployment.Status.AvailableReplicas, deployment.Status.UpdatedReplicas), false, nil } return fmt.Sprintf("deployment %q successfully rolled out\n", name), true, nil } return fmt.Sprintf("Waiting for deployment spec update to be observed...\n"), false, nil }
// hasFailed determines if a deployment has failed or not by estimating its progress. // Progress for a deployment is considered when a new replica set is created or adopted, // and when new pods scale up or old pods scale down. Progress is not estimated for paused // deployments or when users don't really care about it ie. progressDeadlineSeconds is not // specified. func (dc *DeploymentController) hasFailed(d *extensions.Deployment) (bool, error) { if d.Spec.ProgressDeadlineSeconds == nil || d.Spec.RollbackTo != nil || d.Spec.Paused { return false, nil } newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(d, false) if err != nil { return false, err } // There is a template change so we don't need to check for any progress right now. if newRS == nil { return false, nil } // Look at the status of the deployment - if there is already a NewRSAvailableReason // then we don't need to estimate any progress. This is needed in order to avoid // estimating progress for scaling events after a rollout has finished. cond := util.GetDeploymentCondition(d.Status, extensions.DeploymentProgressing) if cond != nil && cond.Reason == util.NewRSAvailableReason { return false, nil } // TODO: Look for permanent failures here. // See https://github.com/kubernetes/kubernetes/issues/18568 allRSs := append(oldRSs, newRS) newStatus := dc.calculateStatus(allRSs, newRS, d) // If the deployment is complete or it is progressing, there is no need to check if it // has timed out. if util.DeploymentComplete(d, &newStatus) || util.DeploymentProgressing(d, &newStatus) { return false, nil } // Check if the deployment has timed out. return util.DeploymentTimedOut(d, &newStatus), nil }
// checkForProgress checks the progress for the provided deployment. Meant to be called // by the progressWorker and work on items synced in a secondary queue. func (dc *DeploymentController) checkForProgress(key string) (bool, error) { obj, exists, err := dc.dLister.Indexer.GetByKey(key) if err != nil { glog.V(2).Infof("Cannot retrieve deployment %q found in the secondary queue: %#v", key, err) return false, err } if !exists { return false, nil } deployment := obj.(*extensions.Deployment) cond := util.GetDeploymentCondition(deployment.Status, extensions.DeploymentProgressing) // Already marked with a terminal reason - no need to add it back to the main queue. if cond != nil && (cond.Reason == util.TimedOutReason || cond.Reason == util.NewRSAvailableReason) { return false, nil } // Deep-copy otherwise we may mutate our cache. // TODO: Remove deep-copying from here. This worker does not need to sync the annotations // in the deployment. d, err := util.DeploymentDeepCopy(deployment) if err != nil { return false, err } return dc.hasFailed(d) }
// syncRolloutStatus updates the status of a deployment during a rollout. There are // cases this helper will run that cannot be prevented from the scaling detection, // for example a resync of the deployment after it was scaled up. In those cases, // we shouldn't try to estimate any progress. func (dc *DeploymentController) syncRolloutStatus(allRSs []*extensions.ReplicaSet, newRS *extensions.ReplicaSet, d *extensions.Deployment) error { newStatus := dc.calculateStatus(allRSs, newRS, d) // If there is no progressDeadlineSeconds set, remove any Progressing condition. if d.Spec.ProgressDeadlineSeconds == nil { util.RemoveDeploymentCondition(&newStatus, extensions.DeploymentProgressing) } // If there is only one replica set that is active then that means we are not running // a new rollout and this is a resync where we don't need to estimate any progress. // In such a case, we should simply not estimate any progress for this deployment. currentCond := util.GetDeploymentCondition(d.Status, extensions.DeploymentProgressing) isResyncEvent := newStatus.Replicas == newStatus.UpdatedReplicas && currentCond != nil && currentCond.Reason == util.NewRSAvailableReason // Check for progress only if there is a progress deadline set and the latest rollout // hasn't completed yet. if d.Spec.ProgressDeadlineSeconds != nil && !isResyncEvent { switch { case util.DeploymentComplete(d, &newStatus): // Update the deployment conditions with a message for the new replica set that // was successfully deployed. If the condition already exists, we ignore this update. msg := fmt.Sprintf("Replica set %q has successfully progressed.", newRS.Name) condition := util.NewDeploymentCondition(extensions.DeploymentProgressing, api.ConditionTrue, util.NewRSAvailableReason, msg) util.SetDeploymentCondition(&newStatus, *condition) case util.DeploymentProgressing(d, &newStatus): // If there is any progress made, continue by not checking if the deployment failed. This // behavior emulates the rolling updater progressDeadline check. msg := fmt.Sprintf("Replica set %q is progressing.", newRS.Name) condition := util.NewDeploymentCondition(extensions.DeploymentProgressing, api.ConditionTrue, util.ReplicaSetUpdatedReason, msg) // Update the current Progressing condition or add a new one if it doesn't exist. // If a Progressing condition with status=true already exists, we should update // everything but lastTransitionTime. SetDeploymentCondition already does that but // it also is not updating conditions when the reason of the new condition is the // same as the old. The Progressing condition is a special case because we want to // update with the same reason and change just lastUpdateTime iff we notice any // progress. That's why we handle it here. if currentCond != nil { if currentCond.Status == api.ConditionTrue { condition.LastTransitionTime = currentCond.LastTransitionTime } util.RemoveDeploymentCondition(&newStatus, extensions.DeploymentProgressing) } util.SetDeploymentCondition(&newStatus, *condition) case util.DeploymentTimedOut(d, &newStatus): // Update the deployment with a timeout condition. If the condition already exists, // we ignore this update. msg := fmt.Sprintf("Replica set %q has timed out progressing.", newRS.Name) condition := util.NewDeploymentCondition(extensions.DeploymentProgressing, api.ConditionFalse, util.TimedOutReason, msg) util.SetDeploymentCondition(&newStatus, *condition) } } // Move failure conditions of all replica sets in deployment conditions. For now, // only one failure condition is returned from getReplicaFailures. if replicaFailureCond := dc.getReplicaFailures(allRSs, newRS); len(replicaFailureCond) > 0 { // There will be only one ReplicaFailure condition on the replica set. util.SetDeploymentCondition(&newStatus, replicaFailureCond[0]) } else { util.RemoveDeploymentCondition(&newStatus, extensions.DeploymentReplicaFailure) } // Do not update if there is nothing new to add. if reflect.DeepEqual(d.Status, newStatus) { // TODO: If there is no sign of progress at this point then there is a high chance that the // deployment is stuck. We should resync this deployment at some point[1] in the future[2] and // check if it has timed out. We definitely need this, otherwise we depend on the controller // resync interval. See https://github.com/kubernetes/kubernetes/issues/34458. // // [1] time.Now() + progressDeadlineSeconds - lastUpdateTime (of the Progressing condition). // [2] Use dc.queue.AddAfter return nil } newDeployment := d newDeployment.Status = newStatus _, err := dc.client.Extensions().Deployments(newDeployment.Namespace).UpdateStatus(newDeployment) return err }
// Returns a replica set that matches the intent of the given deployment. Returns nil if the new replica set doesn't exist yet. // 1. Get existing new RS (the RS that the given deployment targets, whose pod template is the same as deployment's). // 2. If there's existing new RS, update its revision number if it's smaller than (maxOldRevision + 1), where maxOldRevision is the max revision number among all old RSes. // 3. If there's no existing new RS and createIfNotExisted is true, create one with appropriate revision number (maxOldRevision + 1) and replicas. // Note that the pod-template-hash will be added to adopted RSes and pods. func (dc *DeploymentController) getNewReplicaSet(deployment *extensions.Deployment, rsList, oldRSs []*extensions.ReplicaSet, createIfNotExisted bool) (*extensions.ReplicaSet, error) { existingNewRS, err := deploymentutil.FindNewReplicaSet(deployment, rsList) if err != nil { return nil, err } // Calculate the max revision number among all old RSes maxOldRevision := deploymentutil.MaxRevision(oldRSs) // Calculate revision number for this new replica set newRevision := strconv.FormatInt(maxOldRevision+1, 10) // Latest replica set exists. We need to sync its annotations (includes copying all but // annotationsToSkip from the parent deployment, and update revision, desiredReplicas, // and maxReplicas) and also update the revision annotation in the deployment with the // latest revision. if existingNewRS != nil { objCopy, err := api.Scheme.Copy(existingNewRS) if err != nil { return nil, err } rsCopy := objCopy.(*extensions.ReplicaSet) // Set existing new replica set's annotation annotationsUpdated := deploymentutil.SetNewReplicaSetAnnotations(deployment, rsCopy, newRevision, true) minReadySecondsNeedsUpdate := rsCopy.Spec.MinReadySeconds != deployment.Spec.MinReadySeconds if annotationsUpdated || minReadySecondsNeedsUpdate { rsCopy.Spec.MinReadySeconds = deployment.Spec.MinReadySeconds return dc.client.Extensions().ReplicaSets(rsCopy.ObjectMeta.Namespace).Update(rsCopy) } updateConditions := deploymentutil.SetDeploymentRevision(deployment, newRevision) // If no other Progressing condition has been recorded and we need to estimate the progress // of this deployment then it is likely that old users started caring about progress. In that // case we need to take into account the first time we noticed their new replica set. cond := deploymentutil.GetDeploymentCondition(deployment.Status, extensions.DeploymentProgressing) if deployment.Spec.ProgressDeadlineSeconds != nil && cond == nil { msg := fmt.Sprintf("Found new replica set %q", rsCopy.Name) condition := deploymentutil.NewDeploymentCondition(extensions.DeploymentProgressing, v1.ConditionTrue, deploymentutil.FoundNewRSReason, msg) deploymentutil.SetDeploymentCondition(&deployment.Status, *condition) updateConditions = true } if updateConditions { if deployment, err = dc.client.Extensions().Deployments(deployment.Namespace).UpdateStatus(deployment); err != nil { return nil, err } } return rsCopy, nil } if !createIfNotExisted { return nil, nil } // new ReplicaSet does not exist, create one. namespace := deployment.Namespace podTemplateSpecHash := podutil.GetPodTemplateSpecHash(deployment.Spec.Template) newRSTemplate := deploymentutil.GetNewReplicaSetTemplate(deployment) // Add podTemplateHash label to selector. newRSSelector := labelsutil.CloneSelectorAndAddLabel(deployment.Spec.Selector, extensions.DefaultDeploymentUniqueLabelKey, podTemplateSpecHash) // Create new ReplicaSet newRS := extensions.ReplicaSet{ ObjectMeta: v1.ObjectMeta{ // Make the name deterministic, to ensure idempotence Name: deployment.Name + "-" + fmt.Sprintf("%d", podTemplateSpecHash), Namespace: namespace, }, Spec: extensions.ReplicaSetSpec{ Replicas: func(i int32) *int32 { return &i }(0), MinReadySeconds: deployment.Spec.MinReadySeconds, Selector: newRSSelector, Template: newRSTemplate, }, } allRSs := append(oldRSs, &newRS) newReplicasCount, err := deploymentutil.NewRSNewReplicas(deployment, allRSs, &newRS) if err != nil { return nil, err } *(newRS.Spec.Replicas) = newReplicasCount // Set new replica set's annotation deploymentutil.SetNewReplicaSetAnnotations(deployment, &newRS, newRevision, false) createdRS, err := dc.client.Extensions().ReplicaSets(namespace).Create(&newRS) switch { // We may end up hitting this due to a slow cache or a fast resync of the deployment. // TODO: Restore once https://github.com/kubernetes/kubernetes/issues/29735 is fixed // ie. we start using a new hashing algorithm. case errors.IsAlreadyExists(err): return nil, err // return dc.rsLister.ReplicaSets(namespace).Get(newRS.Name) case err != nil: msg := fmt.Sprintf("Failed to create new replica set %q: %v", newRS.Name, err) if deployment.Spec.ProgressDeadlineSeconds != nil { cond := deploymentutil.NewDeploymentCondition(extensions.DeploymentProgressing, v1.ConditionFalse, deploymentutil.FailedRSCreateReason, msg) deploymentutil.SetDeploymentCondition(&deployment.Status, *cond) // We don't really care about this error at this point, since we have a bigger issue to report. // TODO: Update the rest of the Deployment status, too. We may need to do this every time we // error out in all other places in the controller so that we let users know that their deployments // have been noticed by the controller, albeit with errors. // TODO: Identify which errors are permanent and switch DeploymentIsFailed to take into account // these reasons as well. Related issue: https://github.com/kubernetes/kubernetes/issues/18568 _, _ = dc.client.Extensions().Deployments(deployment.ObjectMeta.Namespace).UpdateStatus(deployment) } dc.eventRecorder.Eventf(deployment, v1.EventTypeWarning, deploymentutil.FailedRSCreateReason, msg) return nil, err } if newReplicasCount > 0 { dc.eventRecorder.Eventf(deployment, v1.EventTypeNormal, "ScalingReplicaSet", "Scaled up replica set %s to %d", createdRS.Name, newReplicasCount) } deploymentutil.SetDeploymentRevision(deployment, newRevision) if deployment.Spec.ProgressDeadlineSeconds != nil { msg := fmt.Sprintf("Created new replica set %q", createdRS.Name) condition := deploymentutil.NewDeploymentCondition(extensions.DeploymentProgressing, v1.ConditionTrue, deploymentutil.NewReplicaSetReason, msg) deploymentutil.SetDeploymentCondition(&deployment.Status, *condition) } _, err = dc.client.Extensions().Deployments(deployment.Namespace).UpdateStatus(deployment) return createdRS, err }