// syncReplicationController will sync the rc with the given key if it has had its expectations fulfilled, meaning // it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked // concurrently with the same key. func (rm *ReplicationManager) syncReplicationController(key string) error { startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing controller %q (%v)", key, time.Now().Sub(startTime)) }() if !rm.podStoreSynced() { // Sleep so we give the pod reflector goroutine a chance to run. time.Sleep(PodStoreSyncedPollPeriod) glog.Infof("Waiting for pods controller to sync, requeuing rc %v", key) rm.queue.Add(key) return nil } obj, exists, err := rm.rcStore.Store.GetByKey(key) if !exists { glog.Infof("Replication Controller has been deleted %v", key) rm.expectations.DeleteExpectations(key) return nil } if err != nil { glog.Infof("Unable to retrieve rc %v from store: %v", key, err) rm.queue.Add(key) return err } rc := *obj.(*api.ReplicationController) // Check the expectations of the rc before counting active pods, otherwise a new pod can sneak in // and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the rc sync is just deferred till the next relist. rcKey, err := controller.KeyFunc(&rc) if err != nil { glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err) return err } rcNeedsSync := rm.expectations.SatisfiedExpectations(rcKey) podList, err := rm.podStore.Pods(rc.Namespace).List(labels.Set(rc.Spec.Selector).AsSelector()) if err != nil { glog.Errorf("Error getting pods for rc %q: %v", key, err) rm.queue.Add(key) return err } // TODO: Do this in a single pass, or use an index. filteredPods := controller.FilterActivePods(podList.Items) if rcNeedsSync { rm.manageReplicas(filteredPods, &rc) } // Always updates status as pods come up or die. if err := updateReplicaCount(rm.kubeClient.Core().ReplicationControllers(rc.Namespace), rc, len(filteredPods)); err != nil { // Multiple things could lead to this update failing. Requeuing the controller ensures // we retry with some fairness. glog.V(2).Infof("Failed to update replica count for controller %v/%v; requeuing; error: %v", rc.Namespace, rc.Name, err) rm.enqueueController(&rc) } return nil }
// syncReplicationController will sync the rc with the given key if it has had its expectations fulfilled, meaning // it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked // concurrently with the same key. func (rm *ReplicationManager) syncReplicationController(key string) error { trace := util.NewTrace("syncReplicationController: " + key) defer trace.LogIfLong(250 * time.Millisecond) startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing controller %q (%v)", key, time.Now().Sub(startTime)) }() if !rm.podStoreSynced() { // Sleep so we give the pod reflector goroutine a chance to run. time.Sleep(PodStoreSyncedPollPeriod) glog.Infof("Waiting for pods controller to sync, requeuing rc %v", key) rm.queue.Add(key) return nil } obj, exists, err := rm.rcStore.Indexer.GetByKey(key) if !exists { glog.Infof("Replication Controller has been deleted %v", key) rm.expectations.DeleteExpectations(key) return nil } if err != nil { return err } rc := *obj.(*api.ReplicationController) // Check the expectations of the rc before counting active pods, otherwise a new pod can sneak in // and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the rc sync is just deferred till the next relist. rcKey, err := controller.KeyFunc(&rc) if err != nil { glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err) return err } trace.Step("ReplicationController restored") rcNeedsSync := rm.expectations.SatisfiedExpectations(rcKey) trace.Step("Expectations restored") // NOTE: filteredPods are pointing to objects from cache - if you need to // modify them, you need to copy it first. // TODO: Do the List and Filter in a single pass, or use an index. var filteredPods []*api.Pod if rm.garbageCollectorEnabled { // list all pods to include the pods that don't match the rc's selector // anymore but has the stale controller ref. pods, err := rm.podStore.Pods(rc.Namespace).List(labels.Everything()) if err != nil { glog.Errorf("Error getting pods for rc %q: %v", key, err) rm.queue.Add(key) return err } cm := controller.NewPodControllerRefManager(rm.podControl, rc.ObjectMeta, labels.Set(rc.Spec.Selector).AsSelectorPreValidated(), getRCKind()) matchesAndControlled, matchesNeedsController, controlledDoesNotMatch := cm.Classify(pods) for _, pod := range matchesNeedsController { err := cm.AdoptPod(pod) // continue to next pod if adoption fails. if err != nil { // If the pod no longer exists, don't even log the error. if !errors.IsNotFound(err) { utilruntime.HandleError(err) } } else { matchesAndControlled = append(matchesAndControlled, pod) } } filteredPods = matchesAndControlled // remove the controllerRef for the pods that no longer have matching labels var errlist []error for _, pod := range controlledDoesNotMatch { err := cm.ReleasePod(pod) if err != nil { errlist = append(errlist, err) } } if len(errlist) != 0 { aggregate := utilerrors.NewAggregate(errlist) // push the RC into work queue again. We need to try to free the // pods again otherwise they will stuck with the stale // controllerRef. rm.queue.Add(key) return aggregate } } else { pods, err := rm.podStore.Pods(rc.Namespace).List(labels.Set(rc.Spec.Selector).AsSelectorPreValidated()) if err != nil { glog.Errorf("Error getting pods for rc %q: %v", key, err) rm.queue.Add(key) return err } filteredPods = controller.FilterActivePods(pods) } var manageReplicasErr error if rcNeedsSync && rc.DeletionTimestamp == nil { manageReplicasErr = rm.manageReplicas(filteredPods, &rc) } trace.Step("manageReplicas done") // Count the number of pods that have labels matching the labels of the pod // template of the replication controller, the matching pods may have more // labels than are in the template. Because the label of podTemplateSpec is // a superset of the selector of the replication controller, so the possible // matching pods must be part of the filteredPods. fullyLabeledReplicasCount := 0 readyReplicasCount := 0 availableReplicasCount := 0 templateLabel := labels.Set(rc.Spec.Template.Labels).AsSelectorPreValidated() for _, pod := range filteredPods { if templateLabel.Matches(labels.Set(pod.Labels)) { fullyLabeledReplicasCount++ } if api.IsPodReady(pod) { readyReplicasCount++ if api.IsPodAvailable(pod, rc.Spec.MinReadySeconds, unversioned.Now()) { availableReplicasCount++ } } } // Always updates status as pods come up or die. if err := updateReplicaCount( rm.kubeClient.Core().ReplicationControllers(rc.Namespace), rc, len(filteredPods), fullyLabeledReplicasCount, readyReplicasCount, availableReplicasCount, ); err != nil { // Multiple things could lead to this update failing. Returning an error causes a requeue without forcing a hotloop return err } return manageReplicasErr }
// syncReplicaSet will sync the ReplicaSet with the given key if it has had its expectations fulfilled, // meaning it did not expect to see any more of its pods created or deleted. This function is not meant to be // invoked concurrently with the same key. func (rsc *ReplicaSetController) syncReplicaSet(key string) error { startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing replica set %q (%v)", key, time.Now().Sub(startTime)) }() obj, exists, err := rsc.rsStore.Store.GetByKey(key) if !exists { glog.Infof("ReplicaSet has been deleted %v", key) rsc.expectations.DeleteExpectations(key) return nil } if err != nil { glog.Infof("Unable to retrieve ReplicaSet %v from store: %v", key, err) rsc.queue.Add(key) return err } rs := *obj.(*extensions.ReplicaSet) if !rsc.podStoreSynced() { // Sleep so we give the pod reflector goroutine a chance to run. time.Sleep(PodStoreSyncedPollPeriod) glog.Infof("Waiting for pods controller to sync, requeuing ReplicaSet %v", rs.Name) rsc.enqueueReplicaSet(&rs) return nil } // Check the expectations of the ReplicaSet before counting active pods, otherwise a new pod can sneak // in and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the ReplicaSet sync is just deferred till the next // relist. rsKey, err := controller.KeyFunc(&rs) if err != nil { glog.Errorf("Couldn't get key for ReplicaSet %#v: %v", rs, err) return err } rsNeedsSync := rsc.expectations.SatisfiedExpectations(rsKey) selector, err := extensions.LabelSelectorAsSelector(rs.Spec.Selector) if err != nil { glog.Errorf("Error converting pod selector to selector: %v", err) return err } podList, err := rsc.podStore.Pods(rs.Namespace).List(selector) if err != nil { glog.Errorf("Error getting pods for ReplicaSet %q: %v", key, err) rsc.queue.Add(key) return err } // TODO: Do this in a single pass, or use an index. filteredPods := controller.FilterActivePods(podList.Items) if rsNeedsSync { rsc.manageReplicas(filteredPods, &rs) } // Always updates status as pods come up or die. if err := updateReplicaCount(rsc.kubeClient.Extensions().ReplicaSets(rs.Namespace), rs, len(filteredPods)); err != nil { // Multiple things could lead to this update failing. Requeuing the replica set ensures // we retry with some fairness. glog.V(2).Infof("Failed to update replica count for controller %v/%v; requeuing; error: %v", rs.Namespace, rs.Name, err) rsc.enqueueReplicaSet(&rs) } return nil }
// syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning // it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked // concurrently with the same key. func (jm *JobController) syncJob(key string) error { startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing job %q (%v)", key, time.Now().Sub(startTime)) }() obj, exists, err := jm.jobStore.Store.GetByKey(key) if !exists { glog.V(4).Infof("Job has been deleted: %v", key) jm.expectations.DeleteExpectations(key) return nil } if err != nil { glog.Errorf("Unable to retrieve job %v from store: %v", key, err) jm.queue.Add(key) return err } job := *obj.(*extensions.Job) if !jm.podStoreSynced() { // Sleep so we give the pod reflector goroutine a chance to run. time.Sleep(replicationcontroller.PodStoreSyncedPollPeriod) glog.V(4).Infof("Waiting for pods controller to sync, requeuing job %v", job.Name) jm.enqueueController(&job) return nil } // Check the expectations of the job before counting active pods, otherwise a new pod can sneak in // and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the job sync is just deferred till the next relist. jobKey, err := controller.KeyFunc(&job) if err != nil { glog.Errorf("Couldn't get key for job %#v: %v", job, err) return err } jobNeedsSync := jm.expectations.SatisfiedExpectations(jobKey) selector, _ := extensions.LabelSelectorAsSelector(job.Spec.Selector) podList, err := jm.podStore.Pods(job.Namespace).List(selector) if err != nil { glog.Errorf("Error getting pods for job %q: %v", key, err) jm.queue.Add(key) return err } activePods := controller.FilterActivePods(podList.Items) active := len(activePods) succeeded, failed := getStatus(podList.Items) conditions := len(job.Status.Conditions) if job.Status.StartTime == nil { now := unversioned.Now() job.Status.StartTime = &now } if pastActiveDeadline(&job) { // if job was finished previously, we don't want to redo the termination if isJobFinished(&job) { return nil } // TODO: below code should be replaced with pod termination resulting in // pod failures, rather than killing pods. Unfortunately none such solution // exists ATM. There's an open discussion in the topic in // https://github.com/kubernetes/kubernetes/issues/14602 which might give // some sort of solution to above problem. // kill remaining active pods wait := sync.WaitGroup{} wait.Add(active) for i := 0; i < active; i++ { go func(ix int) { defer wait.Done() if err := jm.podControl.DeletePod(job.Namespace, activePods[ix].Name); err != nil { defer util.HandleError(err) } }(i) } wait.Wait() // update status values accordingly failed += active active = 0 job.Status.Conditions = append(job.Status.Conditions, newCondition(extensions.JobFailed, "DeadlineExceeded", "Job was active longer than specified deadline")) jm.recorder.Event(&job, api.EventTypeNormal, "DeadlineExceeded", "Job was active longer than specified deadline") } else { if jobNeedsSync { active = jm.manageJob(activePods, succeeded, &job) } completions := succeeded if completions == *job.Spec.Completions { job.Status.Conditions = append(job.Status.Conditions, newCondition(extensions.JobComplete, "", "")) now := unversioned.Now() job.Status.CompletionTime = &now } } // no need to update the job if the status hasn't changed since last time if job.Status.Active != active || job.Status.Succeeded != succeeded || job.Status.Failed != failed || len(job.Status.Conditions) != conditions { job.Status.Active = active job.Status.Succeeded = succeeded job.Status.Failed = failed if err := jm.updateHandler(&job); err != nil { glog.Errorf("Failed to update job %v, requeuing. Error: %v", job.Name, err) jm.enqueueController(&job) } } return nil }
// syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning // it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked // concurrently with the same key. func (jm *JobController) syncJob(key string) error { startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing job %q (%v)", key, time.Now().Sub(startTime)) }() obj, exists, err := jm.jobStore.Store.GetByKey(key) if !exists { glog.V(4).Infof("Job has been deleted: %v", key) jm.expectations.DeleteExpectations(key) return nil } if err != nil { glog.Errorf("Unable to retrieve job %v from store: %v", key, err) jm.queue.Add(key) return err } job := *obj.(*experimental.Job) if !jm.podStoreSynced() { // Sleep so we give the pod reflector goroutine a chance to run. time.Sleep(replicationcontroller.PodStoreSyncedPollPeriod) glog.V(4).Infof("Waiting for pods controller to sync, requeuing job %v", job.Name) jm.enqueueController(&job) return nil } // Check the expectations of the job before counting active pods, otherwise a new pod can sneak in // and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the job sync is just deferred till the next relist. jobKey, err := controller.KeyFunc(&job) if err != nil { glog.Errorf("Couldn't get key for job %#v: %v", job, err) return err } jobNeedsSync := jm.expectations.SatisfiedExpectations(jobKey) podList, err := jm.podStore.Pods(job.Namespace).List(labels.Set(job.Spec.Selector).AsSelector()) if err != nil { glog.Errorf("Error getting pods for job %q: %v", key, err) jm.queue.Add(key) return err } activePods := controller.FilterActivePods(podList.Items) active := len(activePods) successful, unsuccessful := getStatus(podList.Items) if jobNeedsSync { active = jm.manageJob(activePods, successful, unsuccessful, &job) } completions := successful if completions == *job.Spec.Completions { job.Status.Conditions = append(job.Status.Conditions, newCondition()) } // no need to update the job if the status hasn't changed since last time if job.Status.Active != active || job.Status.Successful != successful || job.Status.Unsuccessful != unsuccessful { job.Status.Active = active job.Status.Successful = successful job.Status.Unsuccessful = unsuccessful if err := jm.updateHandler(&job); err != nil { glog.Errorf("Failed to update job %v, requeuing. Error: %v", job.Name, err) jm.enqueueController(&job) } } return nil }
// syncReplicaSet will sync the ReplicaSet with the given key if it has had its expectations fulfilled, // meaning it did not expect to see any more of its pods created or deleted. This function is not meant to be // invoked concurrently with the same key. func (rsc *ReplicaSetController) syncReplicaSet(key string) error { startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing replica set %q (%v)", key, time.Now().Sub(startTime)) }() if !rsc.podStoreSynced() { // Sleep so we give the pod reflector goroutine a chance to run. time.Sleep(PodStoreSyncedPollPeriod) glog.Infof("Waiting for pods controller to sync, requeuing ReplicaSet %v", key) rsc.queue.Add(key) return nil } obj, exists, err := rsc.rsStore.Store.GetByKey(key) if !exists { glog.Infof("ReplicaSet has been deleted %v", key) rsc.expectations.DeleteExpectations(key) return nil } if err != nil { glog.Infof("Unable to retrieve ReplicaSet %v from store: %v", key, err) rsc.queue.Add(key) return err } rs := *obj.(*extensions.ReplicaSet) // Check the expectations of the ReplicaSet before counting active pods, otherwise a new pod can sneak // in and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the ReplicaSet sync is just deferred till the next // relist. rsKey, err := controller.KeyFunc(&rs) if err != nil { glog.Errorf("Couldn't get key for ReplicaSet %#v: %v", rs, err) return err } rsNeedsSync := rsc.expectations.SatisfiedExpectations(rsKey) selector, err := unversioned.LabelSelectorAsSelector(rs.Spec.Selector) if err != nil { glog.Errorf("Error converting pod selector to selector: %v", err) return err } // TODO: Do the List and Filter in a single pass, or use an index. var filteredPods []*api.Pod if rsc.garbageCollectorEnabled { // list all pods to include the pods that don't match the rs`s selector // anymore but has the stale controller ref. podList, err := rsc.podStore.Pods(rs.Namespace).List(labels.Everything()) if err != nil { glog.Errorf("Error getting pods for rs %q: %v", key, err) rsc.queue.Add(key) return err } cm := controller.NewPodControllerRefManager(rsc.podControl, rs.ObjectMeta, selector, getRSKind()) matchesAndControlled, matchesNeedsController, controlledDoesNotMatch := cm.Classify(podList.Items) for _, pod := range matchesNeedsController { err := cm.AdoptPod(pod) // continue to next pod if adoption fails. if err != nil { // If the pod no longer exists, don't even log the error. if !errors.IsNotFound(err) { utilruntime.HandleError(err) } } else { matchesAndControlled = append(matchesAndControlled, pod) } } filteredPods = matchesAndControlled // remove the controllerRef for the pods that no longer have matching labels var errlist []error for _, pod := range controlledDoesNotMatch { err := cm.ReleasePod(pod) if err != nil { errlist = append(errlist, err) } } if len(errlist) != 0 { aggregate := utilerrors.NewAggregate(errlist) // push the RS into work queue again. We need to try to free the // pods again otherwise they will stuck with the stale // controllerRef. rsc.queue.Add(key) return aggregate } } else { podList, err := rsc.podStore.Pods(rs.Namespace).List(selector) if err != nil { glog.Errorf("Error getting pods for rs %q: %v", key, err) rsc.queue.Add(key) return err } filteredPods = controller.FilterActivePods(podList.Items) } if rsNeedsSync && rs.DeletionTimestamp == nil { rsc.manageReplicas(filteredPods, &rs) } // Count the number of pods that have labels matching the labels of the pod // template of the replicaSet, the matching pods may have more labels than // are in the template. Because the label of podTemplateSpec is a superset // of the selector of the replicaset, so the possible matching pods must be // part of the filteredPods. fullyLabeledReplicasCount := 0 templateLabel := labels.Set(rs.Spec.Template.Labels).AsSelector() for _, pod := range filteredPods { if templateLabel.Matches(labels.Set(pod.Labels)) { fullyLabeledReplicasCount++ } } // Always updates status as pods come up or die. if err := updateReplicaCount(rsc.kubeClient.Extensions().ReplicaSets(rs.Namespace), rs, len(filteredPods), fullyLabeledReplicasCount); err != nil { // Multiple things could lead to this update failing. Requeuing the replica set ensures // we retry with some fairness. glog.V(2).Infof("Failed to update replica count for controller %v/%v; requeuing; error: %v", rs.Namespace, rs.Name, err) rsc.enqueueReplicaSet(&rs) } return nil }
// syncReplicationController will sync the rc with the given key if it has had its expectations fulfilled, meaning // it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked // concurrently with the same key. func (rm *ReplicationManager) syncReplicationController(key string) error { trace := util.NewTrace("syncReplicationController: " + key) defer trace.LogIfLong(250 * time.Millisecond) startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing controller %q (%v)", key, time.Now().Sub(startTime)) }() if !rm.podStoreSynced() { // Sleep so we give the pod reflector goroutine a chance to run. time.Sleep(PodStoreSyncedPollPeriod) glog.Infof("Waiting for pods controller to sync, requeuing rc %v", key) rm.queue.Add(key) return nil } obj, exists, err := rm.rcStore.Indexer.GetByKey(key) if !exists { glog.Infof("Replication Controller has been deleted %v", key) rm.expectations.DeleteExpectations(key) return nil } if err != nil { glog.Infof("Unable to retrieve rc %v from store: %v", key, err) rm.queue.Add(key) return err } rc := *obj.(*api.ReplicationController) // Check the expectations of the rc before counting active pods, otherwise a new pod can sneak in // and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the rc sync is just deferred till the next relist. rcKey, err := controller.KeyFunc(&rc) if err != nil { glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err) return err } trace.Step("ReplicationController restored") rcNeedsSync := rm.expectations.SatisfiedExpectations(rcKey) trace.Step("Expectations restored") podList, err := rm.podStore.Pods(rc.Namespace).List(labels.Set(rc.Spec.Selector).AsSelector()) if err != nil { glog.Errorf("Error getting pods for rc %q: %v", key, err) rm.queue.Add(key) return err } trace.Step("Pods listed") // TODO: Do this in a single pass, or use an index. filteredPods := controller.FilterActivePods(podList.Items) if rcNeedsSync { rm.manageReplicas(filteredPods, &rc) } trace.Step("manageReplicas done") // Count the number of pods that have labels matching the labels of the pod // template of the replication controller, the matching pods may have more // labels than are in the template. Because the label of podTemplateSpec is // a superset of the selector of the replication controller, so the possible // matching pods must be part of the filteredPods. fullyLabeledReplicasCount := 0 templateLabel := labels.Set(rc.Spec.Template.Labels).AsSelector() for _, pod := range filteredPods { if templateLabel.Matches(labels.Set(pod.Labels)) { fullyLabeledReplicasCount++ } } // Always updates status as pods come up or die. if err := updateReplicaCount(rm.kubeClient.Core().ReplicationControllers(rc.Namespace), rc, len(filteredPods), fullyLabeledReplicasCount); err != nil { // Multiple things could lead to this update failing. Requeuing the controller ensures // we retry with some fairness. glog.V(2).Infof("Failed to update replica count for controller %v/%v; requeuing; error: %v", rc.Namespace, rc.Name, err) rm.enqueueController(&rc) } return nil }
// syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning // it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked // concurrently with the same key. func (jm *JobController) syncJob(key string) error { startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing job %q (%v)", key, time.Now().Sub(startTime)) }() obj, exists, err := jm.jobStore.Store.GetByKey(key) if !exists { glog.V(4).Infof("Job has been deleted: %v", key) jm.expectations.DeleteExpectations(key) return nil } if err != nil { return err } job := *obj.(*batch.Job) // Check the expectations of the job before counting active pods, otherwise a new pod can sneak in // and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the job sync is just deferred till the next relist. jobNeedsSync := jm.expectations.SatisfiedExpectations(key) selector, _ := unversioned.LabelSelectorAsSelector(job.Spec.Selector) pods, err := jm.podStore.Pods(job.Namespace).List(selector) if err != nil { return err } activePods := controller.FilterActivePods(pods) active := int32(len(activePods)) succeeded, failed := getStatus(pods) conditions := len(job.Status.Conditions) if job.Status.StartTime == nil { now := unversioned.Now() job.Status.StartTime = &now } // if job was finished previously, we don't want to redo the termination if IsJobFinished(&job) { return nil } if pastActiveDeadline(&job) { // TODO: below code should be replaced with pod termination resulting in // pod failures, rather than killing pods. Unfortunately none such solution // exists ATM. There's an open discussion in the topic in // https://github.com/kubernetes/kubernetes/issues/14602 which might give // some sort of solution to above problem. // kill remaining active pods wait := sync.WaitGroup{} wait.Add(int(active)) for i := int32(0); i < active; i++ { go func(ix int32) { defer wait.Done() if err := jm.podControl.DeletePod(job.Namespace, activePods[ix].Name, &job); err != nil { defer utilruntime.HandleError(err) } }(i) } wait.Wait() // update status values accordingly failed += active active = 0 job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobFailed, "DeadlineExceeded", "Job was active longer than specified deadline")) jm.recorder.Event(&job, api.EventTypeNormal, "DeadlineExceeded", "Job was active longer than specified deadline") } else { if jobNeedsSync && job.DeletionTimestamp == nil { active = jm.manageJob(activePods, succeeded, &job) } completions := succeeded complete := false if job.Spec.Completions == nil { // This type of job is complete when any pod exits with success. // Each pod is capable of // determining whether or not the entire Job is done. Subsequent pods are // not expected to fail, but if they do, the failure is ignored. Once any // pod succeeds, the controller waits for remaining pods to finish, and // then the job is complete. if succeeded > 0 && active == 0 { complete = true } } else { // Job specifies a number of completions. This type of job signals // success by having that number of successes. Since we do not // start more pods than there are remaining completions, there should // not be any remaining active pods once this count is reached. if completions >= *job.Spec.Completions { complete = true if active > 0 { jm.recorder.Event(&job, api.EventTypeWarning, "TooManyActivePods", "Too many active pods running after completion count reached") } if completions > *job.Spec.Completions { jm.recorder.Event(&job, api.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached") } } } if complete { job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobComplete, "", "")) now := unversioned.Now() job.Status.CompletionTime = &now } } // no need to update the job if the status hasn't changed since last time if job.Status.Active != active || job.Status.Succeeded != succeeded || job.Status.Failed != failed || len(job.Status.Conditions) != conditions { job.Status.Active = active job.Status.Succeeded = succeeded job.Status.Failed = failed if err := jm.updateHandler(&job); err != nil { return err } } return nil }
// syncReplicaSet will sync the ReplicaSet with the given key if it has had its expectations fulfilled, // meaning it did not expect to see any more of its pods created or deleted. This function is not meant to be // invoked concurrently with the same key. func (rsc *ReplicaSetController) syncReplicaSet(key string) error { startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing replica set %q (%v)", key, time.Now().Sub(startTime)) }() obj, exists, err := rsc.rsLister.Indexer.GetByKey(key) if !exists { glog.V(4).Infof("ReplicaSet has been deleted %v", key) rsc.expectations.DeleteExpectations(key) return nil } if err != nil { return err } rs := *obj.(*extensions.ReplicaSet) // Check the expectations of the ReplicaSet before counting active pods, otherwise a new pod can sneak // in and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the ReplicaSet sync is just deferred till the next // relist. if err != nil { utilruntime.HandleError(fmt.Errorf("Couldn't get key for ReplicaSet %#v: %v", rs, err)) // Explicitly return nil to avoid re-enqueue bad key return nil } rsNeedsSync := rsc.expectations.SatisfiedExpectations(key) selector, err := unversioned.LabelSelectorAsSelector(rs.Spec.Selector) if err != nil { utilruntime.HandleError(fmt.Errorf("Error converting pod selector to selector: %v", err)) return nil } // NOTE: filteredPods are pointing to objects from cache - if you need to // modify them, you need to copy it first. // TODO: Do the List and Filter in a single pass, or use an index. var filteredPods []*api.Pod if rsc.garbageCollectorEnabled { // list all pods to include the pods that don't match the rs`s selector // anymore but has the stale controller ref. pods, err := rsc.podLister.Pods(rs.Namespace).List(labels.Everything()) if err != nil { return err } cm := controller.NewPodControllerRefManager(rsc.podControl, rs.ObjectMeta, selector, getRSKind()) matchesAndControlled, matchesNeedsController, controlledDoesNotMatch := cm.Classify(pods) for _, pod := range matchesNeedsController { err := cm.AdoptPod(pod) // continue to next pod if adoption fails. if err != nil { // If the pod no longer exists, don't even log the error. if !errors.IsNotFound(err) { utilruntime.HandleError(err) } } else { matchesAndControlled = append(matchesAndControlled, pod) } } filteredPods = matchesAndControlled // remove the controllerRef for the pods that no longer have matching labels var errlist []error for _, pod := range controlledDoesNotMatch { err := cm.ReleasePod(pod) if err != nil { errlist = append(errlist, err) } } if len(errlist) != 0 { aggregate := utilerrors.NewAggregate(errlist) // push the RS into work queue again. We need to try to free the // pods again otherwise they will stuck with the stale // controllerRef. return aggregate } } else { pods, err := rsc.podLister.Pods(rs.Namespace).List(selector) if err != nil { return err } filteredPods = controller.FilterActivePods(pods) } var manageReplicasErr error if rsNeedsSync && rs.DeletionTimestamp == nil { manageReplicasErr = rsc.manageReplicas(filteredPods, &rs) } // Count the number of pods that have labels matching the labels of the pod // template of the replicaSet, the matching pods may have more labels than // are in the template. Because the label of podTemplateSpec is a superset // of the selector of the replicaset, so the possible matching pods must be // part of the filteredPods. fullyLabeledReplicasCount := 0 readyReplicasCount := 0 availableReplicasCount := 0 templateLabel := labels.Set(rs.Spec.Template.Labels).AsSelectorPreValidated() for _, pod := range filteredPods { if templateLabel.Matches(labels.Set(pod.Labels)) { fullyLabeledReplicasCount++ } if api.IsPodReady(pod) { readyReplicasCount++ if api.IsPodAvailable(pod, rs.Spec.MinReadySeconds, unversioned.Now()) { availableReplicasCount++ } } } // Always updates status as pods come up or die. if err := updateReplicaCount( rsc.kubeClient.Extensions().ReplicaSets(rs.Namespace), rs, len(filteredPods), fullyLabeledReplicasCount, readyReplicasCount, availableReplicasCount, ); err != nil { // Multiple things could lead to this update failing. Requeuing the replica set ensures // Returning an error causes a requeue without forcing a hotloop return err } return manageReplicasErr }
func TestControllerSyncJob(t *testing.T) { testCases := map[string]struct { // job setup parallelism int completions int // pod setup podControllerError error activePods int succeededPods int failedPods int // expectations expectedCreations int expectedDeletions int expectedActive int expectedSucceeded int expectedFailed int expectedComplete bool }{ "job start": { 2, 5, nil, 0, 0, 0, 2, 0, 2, 0, 0, false, }, "correct # of pods": { 2, 5, nil, 2, 0, 0, 0, 0, 2, 0, 0, false, }, "too few active pods": { 2, 5, nil, 1, 1, 0, 1, 0, 2, 1, 0, false, }, "too few active pods, with controller error": { 2, 5, fmt.Errorf("Fake error"), 1, 1, 0, 0, 0, 1, 1, 0, false, }, "too many active pods": { 2, 5, nil, 3, 0, 0, 0, 1, 2, 0, 0, false, }, "too many active pods, with controller error": { 2, 5, fmt.Errorf("Fake error"), 3, 0, 0, 0, 0, 3, 0, 0, false, }, "failed pod": { 2, 5, nil, 1, 1, 1, 1, 0, 2, 1, 1, false, }, "job finish": { 2, 5, nil, 0, 5, 0, 0, 0, 0, 5, 0, true, }, "more active pods than completions": { 2, 5, nil, 10, 0, 0, 0, 8, 2, 0, 0, false, }, "status change": { 2, 5, nil, 2, 2, 0, 0, 0, 2, 2, 0, false, }, } for name, tc := range testCases { // job manager setup client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Default.GroupAndVersion()}) manager := NewJobController(client, controller.NoResyncPeriodFunc) fakePodControl := controller.FakePodControl{Err: tc.podControllerError} manager.podControl = &fakePodControl var job *extensions.Job manager.podStoreSynced = func() bool { selector, _ := extensions.PodSelectorAsSelector(job.Spec.Selector) podList, _ := manager.podStore.Pods(job.Namespace).List(selector) active := len(controller.FilterActivePods(podList.Items)) succeeded, failed := getStatus(podList.Items) return active == tc.activePods && succeeded == tc.succeededPods && failed == tc.failedPods } var actual *extensions.Job manager.updateHandler = func(job *extensions.Job) error { actual = job return nil } // job & pods setup job = newJob(tc.parallelism, tc.completions) manager.jobStore.Store.Add(job) for _, pod := range newPodList(tc.activePods, api.PodRunning, job) { manager.podStore.Store.Add(&pod) } for _, pod := range newPodList(tc.succeededPods, api.PodSucceeded, job) { manager.podStore.Store.Add(&pod) } for _, pod := range newPodList(tc.failedPods, api.PodFailed, job) { manager.podStore.Store.Add(&pod) } // run err := manager.syncJob(getKey(job, t)) if err != nil { t.Errorf("%s: unexpected error when syncing jobs %v", err) } // validate created/deleted pods if len(fakePodControl.Templates) != tc.expectedCreations { t.Errorf("%s: unexpected number of creates. Expected %d, saw %d\n", name, tc.expectedCreations, len(fakePodControl.Templates)) } if len(fakePodControl.DeletePodName) != tc.expectedDeletions { t.Errorf("%s: unexpected number of deletes. Expected %d, saw %d\n", name, tc.expectedDeletions, len(fakePodControl.DeletePodName)) } // validate status if actual.Status.Active != tc.expectedActive { t.Errorf("%s: unexpected number of active pods. Expected %d, saw %d\n", name, tc.expectedActive, actual.Status.Active) } if actual.Status.Succeeded != tc.expectedSucceeded { t.Errorf("%s: unexpected number of succeeded pods. Expected %d, saw %d\n", name, tc.expectedSucceeded, actual.Status.Succeeded) } if actual.Status.Failed != tc.expectedFailed { t.Errorf("%s: unexpected number of failed pods. Expected %d, saw %d\n", name, tc.expectedFailed, actual.Status.Failed) } // validate conditions if tc.expectedComplete { completed := false for _, v := range actual.Status.Conditions { if v.Type == extensions.JobComplete && v.Status == api.ConditionTrue { completed = true break } } if !completed { t.Errorf("%s: expected completion condition. Got %v", name, actual.Status.Conditions) } } } }
// syncReplicaSet will sync the ReplicaSet with the given key if it has had its expectations fulfilled, // meaning it did not expect to see any more of its pods created or deleted. This function is not meant to be // invoked concurrently with the same key. func (rsc *ReplicaSetController) syncReplicaSet(key string) error { startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing replica set %q (%v)", key, time.Now().Sub(startTime)) }() obj, exists, err := rsc.rsLister.Indexer.GetByKey(key) if !exists { glog.V(4).Infof("ReplicaSet has been deleted %v", key) rsc.expectations.DeleteExpectations(key) return nil } if err != nil { return err } rs := *obj.(*extensions.ReplicaSet) rsNeedsSync := rsc.expectations.SatisfiedExpectations(key) selector, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector) if err != nil { utilruntime.HandleError(fmt.Errorf("Error converting pod selector to selector: %v", err)) return nil } // NOTE: filteredPods are pointing to objects from cache - if you need to // modify them, you need to copy it first. // TODO: Do the List and Filter in a single pass, or use an index. var filteredPods []*v1.Pod if rsc.garbageCollectorEnabled { // list all pods to include the pods that don't match the rs`s selector // anymore but has the stale controller ref. pods, err := rsc.podLister.Pods(rs.Namespace).List(labels.Everything()) if err != nil { return err } cm := controller.NewPodControllerRefManager(rsc.podControl, rs.ObjectMeta, selector, getRSKind()) matchesAndControlled, matchesNeedsController, controlledDoesNotMatch := cm.Classify(pods) // Adopt pods only if this replica set is not going to be deleted. if rs.DeletionTimestamp == nil { for _, pod := range matchesNeedsController { err := cm.AdoptPod(pod) // continue to next pod if adoption fails. if err != nil { // If the pod no longer exists, don't even log the error. if !errors.IsNotFound(err) { utilruntime.HandleError(err) } } else { matchesAndControlled = append(matchesAndControlled, pod) } } } filteredPods = matchesAndControlled // remove the controllerRef for the pods that no longer have matching labels var errlist []error for _, pod := range controlledDoesNotMatch { err := cm.ReleasePod(pod) if err != nil { errlist = append(errlist, err) } } if len(errlist) != 0 { aggregate := utilerrors.NewAggregate(errlist) // push the RS into work queue again. We need to try to free the // pods again otherwise they will stuck with the stale // controllerRef. return aggregate } } else { pods, err := rsc.podLister.Pods(rs.Namespace).List(selector) if err != nil { return err } filteredPods = controller.FilterActivePods(pods) } var manageReplicasErr error if rsNeedsSync && rs.DeletionTimestamp == nil { manageReplicasErr = rsc.manageReplicas(filteredPods, &rs) } newStatus := calculateStatus(rs, filteredPods, manageReplicasErr) // Always updates status as pods come up or die. if err := updateReplicaSetStatus(rsc.kubeClient.Extensions().ReplicaSets(rs.Namespace), rs, newStatus); err != nil { // Multiple things could lead to this update failing. Requeuing the replica set ensures // Returning an error causes a requeue without forcing a hotloop return err } return manageReplicasErr }
// syncReplicationController will sync the rc with the given key if it has had its expectations fulfilled, meaning // it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked // concurrently with the same key. func (rm *ReplicationManager) syncReplicationController(key string) error { trace := util.NewTrace("syncReplicationController: " + key) defer trace.LogIfLong(250 * time.Millisecond) startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing controller %q (%v)", key, time.Now().Sub(startTime)) }() if !rm.podStoreSynced() { // Sleep so we give the pod reflector goroutine a chance to run. time.Sleep(PodStoreSyncedPollPeriod) glog.Infof("Waiting for pods controller to sync, requeuing rc %v", key) rm.queue.Add(key) return nil } obj, exists, err := rm.rcStore.Indexer.GetByKey(key) if !exists { glog.Infof("Replication Controller has been deleted %v", key) rm.expectations.DeleteExpectations(key) return nil } if err != nil { return err } rc := *obj.(*v1.ReplicationController) trace.Step("ReplicationController restored") rcNeedsSync := rm.expectations.SatisfiedExpectations(key) trace.Step("Expectations restored") // NOTE: filteredPods are pointing to objects from cache - if you need to // modify them, you need to copy it first. // TODO: Do the List and Filter in a single pass, or use an index. var filteredPods []*v1.Pod if rm.garbageCollectorEnabled { // list all pods to include the pods that don't match the rc's selector // anymore but has the stale controller ref. pods, err := rm.podStore.Pods(rc.Namespace).List(labels.Everything()) if err != nil { glog.Errorf("Error getting pods for rc %q: %v", key, err) rm.queue.Add(key) return err } cm := controller.NewPodControllerRefManager(rm.podControl, rc.ObjectMeta, labels.Set(rc.Spec.Selector).AsSelectorPreValidated(), getRCKind()) matchesAndControlled, matchesNeedsController, controlledDoesNotMatch := cm.Classify(pods) // Adopt pods only if this replication controller is not going to be deleted. if rc.DeletionTimestamp == nil { for _, pod := range matchesNeedsController { err := cm.AdoptPod(pod) // continue to next pod if adoption fails. if err != nil { // If the pod no longer exists, don't even log the error. if !errors.IsNotFound(err) { utilruntime.HandleError(err) } } else { matchesAndControlled = append(matchesAndControlled, pod) } } } filteredPods = matchesAndControlled // remove the controllerRef for the pods that no longer have matching labels var errlist []error for _, pod := range controlledDoesNotMatch { err := cm.ReleasePod(pod) if err != nil { errlist = append(errlist, err) } } if len(errlist) != 0 { aggregate := utilerrors.NewAggregate(errlist) // push the RC into work queue again. We need to try to free the // pods again otherwise they will stuck with the stale // controllerRef. rm.queue.Add(key) return aggregate } } else { pods, err := rm.podStore.Pods(rc.Namespace).List(labels.Set(rc.Spec.Selector).AsSelectorPreValidated()) if err != nil { glog.Errorf("Error getting pods for rc %q: %v", key, err) rm.queue.Add(key) return err } filteredPods = controller.FilterActivePods(pods) } var manageReplicasErr error if rcNeedsSync && rc.DeletionTimestamp == nil { manageReplicasErr = rm.manageReplicas(filteredPods, &rc) } trace.Step("manageReplicas done") newStatus := calculateStatus(rc, filteredPods, manageReplicasErr) // Always updates status as pods come up or die. if err := updateReplicationControllerStatus(rm.kubeClient.Core().ReplicationControllers(rc.Namespace), rc, newStatus); err != nil { // Multiple things could lead to this update failing. Returning an error causes a requeue without forcing a hotloop return err } return manageReplicasErr }