// manageJob is the core method responsible for managing the number of running // pods according to what is specified in the job.Spec. func (jm *JobController) manageJob(activePods []*api.Pod, succeeded int32, job *batch.Job) int32 { var activeLock sync.Mutex active := int32(len(activePods)) parallelism := *job.Spec.Parallelism jobKey, err := controller.KeyFunc(job) if err != nil { glog.Errorf("Couldn't get key for job %#v: %v", job, err) return 0 } if active > parallelism { diff := active - parallelism jm.expectations.ExpectDeletions(jobKey, int(diff)) glog.V(4).Infof("Too many pods running job %q, need %d, deleting %d", jobKey, parallelism, diff) // Sort the pods in the order such that not-ready < ready, unscheduled // < scheduled, and pending < running. This ensures that we delete pods // in the earlier stages whenever possible. sort.Sort(controller.ActivePods(activePods)) active -= diff wait := sync.WaitGroup{} wait.Add(int(diff)) for i := int32(0); i < diff; i++ { go func(ix int32) { defer wait.Done() if err := jm.podControl.DeletePod(job.Namespace, activePods[ix].Name, job); err != nil { defer utilruntime.HandleError(err) // Decrement the expected number of deletes because the informer won't observe this deletion jm.expectations.DeletionObserved(jobKey) activeLock.Lock() active++ activeLock.Unlock() } }(i) } wait.Wait() } else if active < parallelism { wantActive := int32(0) if job.Spec.Completions == nil { // Job does not specify a number of completions. Therefore, number active // should be equal to parallelism, unless the job has seen at least // once success, in which leave whatever is running, running. if succeeded > 0 { wantActive = active } else { wantActive = parallelism } } else { // Job specifies a specific number of completions. Therefore, number // active should not ever exceed number of remaining completions. wantActive = *job.Spec.Completions - succeeded if wantActive > parallelism { wantActive = parallelism } } diff := wantActive - active if diff < 0 { glog.Errorf("More active than wanted: job %q, want %d, have %d", jobKey, wantActive, active) diff = 0 } jm.expectations.ExpectCreations(jobKey, int(diff)) glog.V(4).Infof("Too few pods running job %q, need %d, creating %d", jobKey, wantActive, diff) active += diff wait := sync.WaitGroup{} wait.Add(int(diff)) for i := int32(0); i < diff; i++ { go func() { defer wait.Done() if err := jm.podControl.CreatePods(job.Namespace, &job.Spec.Template, job); err != nil { defer utilruntime.HandleError(err) // Decrement the expected number of creates because the informer won't observe this pod jm.expectations.CreationObserved(jobKey) activeLock.Lock() active-- activeLock.Unlock() } }() } wait.Wait() } return active }
// manageReplicas checks and updates replicas for the given ReplicaSet. func (rsc *ReplicaSetController) manageReplicas(filteredPods []*api.Pod, rs *extensions.ReplicaSet) { diff := len(filteredPods) - int(rs.Spec.Replicas) rsKey, err := controller.KeyFunc(rs) if err != nil { glog.Errorf("Couldn't get key for ReplicaSet %#v: %v", rs, err) return } if diff < 0 { diff *= -1 if diff > rsc.burstReplicas { diff = rsc.burstReplicas } // TODO: Track UIDs of creates just like deletes. The problem currently // is we'd need to wait on the result of a create to record the pod's // UID, which would require locking *across* the create, which will turn // into a performance bottleneck. We should generate a UID for the pod // beforehand and store it via ExpectCreations. rsc.expectations.ExpectCreations(rsKey, diff) wait := sync.WaitGroup{} wait.Add(diff) glog.V(2).Infof("Too few %q/%q replicas, need %d, creating %d", rs.Namespace, rs.Name, rs.Spec.Replicas, diff) for i := 0; i < diff; i++ { go func() { defer wait.Done() if err := rsc.podControl.CreatePods(rs.Namespace, &rs.Spec.Template, rs); err != nil { // Decrement the expected number of creates because the informer won't observe this pod glog.V(2).Infof("Failed creation, decrementing expectations for replica set %q/%q", rs.Namespace, rs.Name) rsc.expectations.CreationObserved(rsKey) utilruntime.HandleError(err) } }() } wait.Wait() } else if diff > 0 { if diff > rsc.burstReplicas { diff = rsc.burstReplicas } glog.V(2).Infof("Too many %q/%q replicas, need %d, deleting %d", rs.Namespace, rs.Name, rs.Spec.Replicas, diff) // No need to sort pods if we are about to delete all of them if rs.Spec.Replicas != 0 { // Sort the pods in the order such that not-ready < ready, unscheduled // < scheduled, and pending < running. This ensures that we delete pods // in the earlier stages whenever possible. sort.Sort(controller.ActivePods(filteredPods)) } // Snapshot the UIDs (ns/name) of the pods we're expecting to see // deleted, so we know to record their expectations exactly once either // when we see it as an update of the deletion timestamp, or as a delete. // Note that if the labels on a pod/rs change in a way that the pod gets // orphaned, the rs will only wake up after the expectations have // expired even if other pods are deleted. deletedPodKeys := []string{} for i := 0; i < diff; i++ { deletedPodKeys = append(deletedPodKeys, controller.PodKey(filteredPods[i])) } rsc.expectations.ExpectDeletions(rsKey, deletedPodKeys) wait := sync.WaitGroup{} wait.Add(diff) for i := 0; i < diff; i++ { go func(ix int) { defer wait.Done() if err := rsc.podControl.DeletePod(rs.Namespace, filteredPods[ix].Name, rs); err != nil { // Decrement the expected number of deletes because the informer won't observe this deletion podKey := controller.PodKey(filteredPods[ix]) glog.V(2).Infof("Failed to delete %v, decrementing expectations for controller %q/%q", podKey, rs.Namespace, rs.Name) rsc.expectations.DeletionObserved(rsKey, podKey) utilruntime.HandleError(err) } }(i) } wait.Wait() } }