// syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning // it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked // concurrently with the same key. func (jm *JobController) syncJob(key string) error { startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing job %q (%v)", key, time.Now().Sub(startTime)) }() if !jm.podStoreSynced() { // Sleep so we give the pod reflector goroutine a chance to run. time.Sleep(replicationcontroller.PodStoreSyncedPollPeriod) glog.V(4).Infof("Waiting for pods controller to sync, requeuing job %v", key) jm.queue.Add(key) return nil } obj, exists, err := jm.jobStore.Store.GetByKey(key) if !exists { glog.V(4).Infof("Job has been deleted: %v", key) jm.expectations.DeleteExpectations(key) return nil } if err != nil { glog.Errorf("Unable to retrieve job %v from store: %v", key, err) jm.queue.Add(key) return err } job := *obj.(*batch.Job) // Check the expectations of the job before counting active pods, otherwise a new pod can sneak in // and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the job sync is just deferred till the next relist. jobKey, err := controller.KeyFunc(&job) if err != nil { glog.Errorf("Couldn't get key for job %#v: %v", job, err) return err } jobNeedsSync := jm.expectations.SatisfiedExpectations(jobKey) selector, _ := unversioned.LabelSelectorAsSelector(job.Spec.Selector) podList, err := jm.podStore.Pods(job.Namespace).List(selector) if err != nil { glog.Errorf("Error getting pods for job %q: %v", key, err) jm.queue.Add(key) return err } activePods := controller.FilterActivePods(podList.Items) active := int32(len(activePods)) succeeded, failed := getStatus(podList.Items) conditions := len(job.Status.Conditions) if job.Status.StartTime == nil { now := unversioned.Now() job.Status.StartTime = &now } // if job was finished previously, we don't want to redo the termination if isJobFinished(&job) { return nil } if pastActiveDeadline(&job) { // TODO: below code should be replaced with pod termination resulting in // pod failures, rather than killing pods. Unfortunately none such solution // exists ATM. There's an open discussion in the topic in // https://github.com/kubernetes/kubernetes/issues/14602 which might give // some sort of solution to above problem. // kill remaining active pods wait := sync.WaitGroup{} wait.Add(int(active)) for i := int32(0); i < active; i++ { go func(ix int32) { defer wait.Done() if err := jm.podControl.DeletePod(job.Namespace, activePods[ix].Name, &job); err != nil { defer utilruntime.HandleError(err) } }(i) } wait.Wait() // update status values accordingly failed += active active = 0 job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobFailed, "DeadlineExceeded", "Job was active longer than specified deadline")) jm.recorder.Event(&job, api.EventTypeNormal, "DeadlineExceeded", "Job was active longer than specified deadline") } else { if jobNeedsSync { active = jm.manageJob(activePods, succeeded, &job) } completions := succeeded complete := false if job.Spec.Completions == nil { // This type of job is complete when any pod exits with success. // Each pod is capable of // determining whether or not the entire Job is done. Subsequent pods are // not expected to fail, but if they do, the failure is ignored. Once any // pod succeeds, the controller waits for remaining pods to finish, and // then the job is complete. if succeeded > 0 && active == 0 { complete = true } } else { // Job specifies a number of completions. This type of job signals // success by having that number of successes. Since we do not // start more pods than there are remaining completions, there should // not be any remaining active pods once this count is reached. if completions >= *job.Spec.Completions { complete = true if active > 0 { jm.recorder.Event(&job, api.EventTypeWarning, "TooManyActivePods", "Too many active pods running after completion count reached") } if completions > *job.Spec.Completions { jm.recorder.Event(&job, api.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached") } } } if complete { job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobComplete, "", "")) now := unversioned.Now() job.Status.CompletionTime = &now } } // no need to update the job if the status hasn't changed since last time if job.Status.Active != active || job.Status.Succeeded != succeeded || job.Status.Failed != failed || len(job.Status.Conditions) != conditions { job.Status.Active = active job.Status.Succeeded = succeeded job.Status.Failed = failed if err := jm.updateHandler(&job); err != nil { glog.Errorf("Failed to update job %v, requeuing. Error: %v", job.Name, err) jm.enqueueController(&job) } } return nil }
// syncReplicaSet will sync the ReplicaSet with the given key if it has had its expectations fulfilled, // meaning it did not expect to see any more of its pods created or deleted. This function is not meant to be // invoked concurrently with the same key. func (rsc *ReplicaSetController) syncReplicaSet(key string) error { startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing replica set %q (%v)", key, time.Now().Sub(startTime)) }() if !rsc.podStoreSynced() { // Sleep so we give the pod reflector goroutine a chance to run. time.Sleep(PodStoreSyncedPollPeriod) glog.Infof("Waiting for pods controller to sync, requeuing ReplicaSet %v", key) rsc.queue.Add(key) return nil } obj, exists, err := rsc.rsStore.Store.GetByKey(key) if !exists { glog.Infof("ReplicaSet has been deleted %v", key) rsc.expectations.DeleteExpectations(key) return nil } if err != nil { glog.Infof("Unable to retrieve ReplicaSet %v from store: %v", key, err) rsc.queue.Add(key) return err } rs := *obj.(*extensions.ReplicaSet) // Check the expectations of the ReplicaSet before counting active pods, otherwise a new pod can sneak // in and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the ReplicaSet sync is just deferred till the next // relist. rsKey, err := controller.KeyFunc(&rs) if err != nil { glog.Errorf("Couldn't get key for ReplicaSet %#v: %v", rs, err) return err } rsNeedsSync := rsc.expectations.SatisfiedExpectations(rsKey) selector, err := unversioned.LabelSelectorAsSelector(rs.Spec.Selector) if err != nil { glog.Errorf("Error converting pod selector to selector: %v", err) return err } podList, err := rsc.podStore.Pods(rs.Namespace).List(selector) if err != nil { glog.Errorf("Error getting pods for ReplicaSet %q: %v", key, err) rsc.queue.Add(key) return err } // TODO: Do this in a single pass, or use an index. filteredPods := controller.FilterActivePods(podList.Items) if rsNeedsSync { rsc.manageReplicas(filteredPods, &rs) } // Count the number of pods that have labels matching the labels of the pod // template of the replicaSet, the matching pods may have more labels than // are in the template. Because the label of podTemplateSpec is a superset // of the selector of the replicaset, so the possible matching pods must be // part of the filteredPods. fullyLabeledReplicasCount := 0 templateLabel := labels.Set(rs.Spec.Template.Labels).AsSelector() for _, pod := range filteredPods { if templateLabel.Matches(labels.Set(pod.Labels)) { fullyLabeledReplicasCount++ } } // Always updates status as pods come up or die. if err := updateReplicaCount(rsc.kubeClient.Extensions().ReplicaSets(rs.Namespace), rs, len(filteredPods), fullyLabeledReplicasCount); err != nil { // Multiple things could lead to this update failing. Requeuing the replica set ensures // we retry with some fairness. glog.V(2).Infof("Failed to update replica count for controller %v/%v; requeuing; error: %v", rs.Namespace, rs.Name, err) rsc.enqueueReplicaSet(&rs) } return nil }