func ValidateJobSpec(spec *extensions.JobSpec) errs.ValidationErrorList { allErrs := errs.ValidationErrorList{} if spec.Parallelism != nil && *spec.Parallelism < 0 { allErrs = append(allErrs, errs.NewFieldInvalid("parallelism", spec.Parallelism, isNegativeErrorMsg)) } if spec.Completions != nil && *spec.Completions < 0 { allErrs = append(allErrs, errs.NewFieldInvalid("completions", spec.Completions, isNegativeErrorMsg)) } if spec.Selector == nil { allErrs = append(allErrs, errs.NewFieldRequired("selector")) } else { allErrs = append(allErrs, ValidatePodSelector(spec.Selector).Prefix("selector")...) } if selector, err := extensions.PodSelectorAsSelector(spec.Selector); err == nil { labels := labels.Set(spec.Template.Labels) if !selector.Matches(labels) { allErrs = append(allErrs, errs.NewFieldInvalid("template.metadata.labels", spec.Template.Labels, "selector does not match template")) } } allErrs = append(allErrs, apivalidation.ValidatePodTemplateSpec(&spec.Template).Prefix("template")...) if spec.Template.Spec.RestartPolicy != api.RestartPolicyOnFailure && spec.Template.Spec.RestartPolicy != api.RestartPolicyNever { allErrs = append(allErrs, errs.NewFieldValueNotSupported("template.spec.restartPolicy", spec.Template.Spec.RestartPolicy, []string{string(api.RestartPolicyOnFailure), string(api.RestartPolicyNever)})) } return allErrs }
// GetPodControllers returns a list of jobs managing a pod. Returns an error only if no matching jobs are found. func (s *StoreToJobLister) GetPodJobs(pod *api.Pod) (jobs []extensions.Job, err error) { var selector labels.Selector var job extensions.Job if len(pod.Labels) == 0 { err = fmt.Errorf("No jobs found for pod %v because it has no labels", pod.Name) return } for _, m := range s.Store.List() { job = *m.(*extensions.Job) if job.Namespace != pod.Namespace { continue } selector, _ = extensions.PodSelectorAsSelector(job.Spec.Selector) if !selector.Matches(labels.Set(pod.Labels)) { continue } jobs = append(jobs, job) } if len(jobs) == 0 { err = fmt.Errorf("Could not find jobs for pod %s in namespace %s with labels: %v", pod.Name, pod.Namespace, pod.Labels) } return }
func ValidateJobSpec(spec *extensions.JobSpec) validation.ErrorList { allErrs := validation.ErrorList{} if spec.Parallelism != nil { allErrs = append(allErrs, apivalidation.ValidatePositiveField(int64(*spec.Parallelism), "parallelism")...) } if spec.Completions != nil { allErrs = append(allErrs, apivalidation.ValidatePositiveField(int64(*spec.Completions), "completions")...) } if spec.Selector == nil { allErrs = append(allErrs, validation.NewRequiredError("selector")) } else { allErrs = append(allErrs, ValidatePodSelector(spec.Selector).Prefix("selector")...) } if selector, err := extensions.PodSelectorAsSelector(spec.Selector); err == nil { labels := labels.Set(spec.Template.Labels) if !selector.Matches(labels) { allErrs = append(allErrs, validation.NewInvalidError("template.metadata.labels", spec.Template.Labels, "selector does not match template")) } } allErrs = append(allErrs, apivalidation.ValidatePodTemplateSpec(&spec.Template).Prefix("template")...) if spec.Template.Spec.RestartPolicy != api.RestartPolicyOnFailure && spec.Template.Spec.RestartPolicy != api.RestartPolicyNever { allErrs = append(allErrs, validation.NewNotSupportedError("template.spec.restartPolicy", spec.Template.Spec.RestartPolicy, []string{string(api.RestartPolicyOnFailure), string(api.RestartPolicyNever)})) } return allErrs }
// ValidateDaemonSetSpec tests if required fields in the DaemonSetSpec are set. func ValidateDaemonSetSpec(spec *extensions.DaemonSetSpec) validation.ErrorList { allErrs := validation.ErrorList{} allErrs = append(allErrs, ValidatePodSelector(spec.Selector)...) if spec.Template == nil { allErrs = append(allErrs, validation.NewRequiredError("template")) return allErrs } selector, err := extensions.PodSelectorAsSelector(spec.Selector) if err == nil && !selector.Matches(labels.Set(spec.Template.Labels)) { allErrs = append(allErrs, validation.NewInvalidError("template.metadata.labels", spec.Template.Labels, "selector does not match template")) } allErrs = append(allErrs, apivalidation.ValidatePodTemplateSpec(spec.Template).Prefix("template")...) // Daemons typically run on more than one node, so mark Read-Write persistent disks as invalid. allErrs = append(allErrs, apivalidation.ValidateReadOnlyPersistentDisks(spec.Template.Spec.Volumes).Prefix("template.spec.volumes")...) // RestartPolicy has already been first-order validated as per ValidatePodTemplateSpec(). if spec.Template.Spec.RestartPolicy != api.RestartPolicyAlways { allErrs = append(allErrs, validation.NewNotSupportedError("template.spec.restartPolicy", spec.Template.Spec.RestartPolicy, []string{string(api.RestartPolicyAlways)})) } return allErrs }
// GetPodDaemonSets returns a list of daemon sets managing a pod. // Returns an error if and only if no matching daemon sets are found. func (s *StoreToDaemonSetLister) GetPodDaemonSets(pod *api.Pod) (daemonSets []extensions.DaemonSet, err error) { var selector labels.Selector var daemonSet extensions.DaemonSet if len(pod.Labels) == 0 { err = fmt.Errorf("No daemon sets found for pod %v because it has no labels", pod.Name) return } for _, m := range s.Store.List() { daemonSet = *m.(*extensions.DaemonSet) if daemonSet.Namespace != pod.Namespace { continue } selector, err = extensions.PodSelectorAsSelector(daemonSet.Spec.Selector) if err != nil { // this should not happen if the DaemonSet passed validation return nil, err } // If a daemonSet with a nil or empty selector creeps in, it should match nothing, not everything. if selector.Empty() || !selector.Matches(labels.Set(pod.Labels)) { continue } daemonSets = append(daemonSets, daemonSet) } if len(daemonSets) == 0 { err = fmt.Errorf("Could not find daemon set for pod %s in namespace %s with labels: %v", pod.Name, pod.Namespace, pod.Labels) } return }
// getNodesToDaemonSetPods returns a map from nodes to daemon pods (corresponding to ds) running on the nodes. func (dsc *DaemonSetsController) getNodesToDaemonPods(ds *extensions.DaemonSet) (map[string][]*api.Pod, error) { nodeToDaemonPods := make(map[string][]*api.Pod) selector, err := extensions.PodSelectorAsSelector(ds.Spec.Selector) if err != nil { return nil, err } daemonPods, err := dsc.podStore.Pods(ds.Namespace).List(selector) if err != nil { return nodeToDaemonPods, err } for i := range daemonPods.Items { nodeName := daemonPods.Items[i].Spec.NodeName nodeToDaemonPods[nodeName] = append(nodeToDaemonPods[nodeName], &daemonPods.Items[i]) } return nodeToDaemonPods, nil }
func (reaper *JobReaper) Stop(namespace, name string, timeout time.Duration, gracePeriod *api.DeleteOptions) error { jobs := reaper.Extensions().Jobs(namespace) pods := reaper.Pods(namespace) scaler, err := ScalerFor(extensions.Kind("Job"), *reaper) if err != nil { return err } job, err := jobs.Get(name) if err != nil { return err } if timeout == 0 { // we will never have more active pods than job.Spec.Parallelism parallelism := *job.Spec.Parallelism timeout = Timeout + time.Duration(10*parallelism)*time.Second } // TODO: handle overlapping jobs retry := NewRetryParams(reaper.pollInterval, reaper.timeout) waitForJobs := NewRetryParams(reaper.pollInterval, timeout) if err = scaler.Scale(namespace, name, 0, nil, retry, waitForJobs); err != nil { return err } // at this point only dead pods are left, that should be removed selector, _ := extensions.PodSelectorAsSelector(job.Spec.Selector) options := unversioned.ListOptions{LabelSelector: unversioned.LabelSelector{selector}} podList, err := pods.List(options) if err != nil { return err } errList := []error{} for _, pod := range podList.Items { if err := pods.Delete(pod.Name, gracePeriod); err != nil { errList = append(errList, err) } } if len(errList) > 0 { return utilerrors.NewAggregate(errList) } // once we have all the pods removed we can safely remove the job itself if err := jobs.Delete(name, gracePeriod); err != nil { return err } return nil }
// syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning // it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked // concurrently with the same key. func (jm *JobController) syncJob(key string) error { startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing job %q (%v)", key, time.Now().Sub(startTime)) }() obj, exists, err := jm.jobStore.Store.GetByKey(key) if !exists { glog.V(4).Infof("Job has been deleted: %v", key) jm.expectations.DeleteExpectations(key) return nil } if err != nil { glog.Errorf("Unable to retrieve job %v from store: %v", key, err) jm.queue.Add(key) return err } job := *obj.(*extensions.Job) if !jm.podStoreSynced() { // Sleep so we give the pod reflector goroutine a chance to run. time.Sleep(replicationcontroller.PodStoreSyncedPollPeriod) glog.V(4).Infof("Waiting for pods controller to sync, requeuing job %v", job.Name) jm.enqueueController(&job) return nil } // Check the expectations of the job before counting active pods, otherwise a new pod can sneak in // and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the job sync is just deferred till the next relist. jobKey, err := controller.KeyFunc(&job) if err != nil { glog.Errorf("Couldn't get key for job %#v: %v", job, err) return err } jobNeedsSync := jm.expectations.SatisfiedExpectations(jobKey) selector, _ := extensions.PodSelectorAsSelector(job.Spec.Selector) podList, err := jm.podStore.Pods(job.Namespace).List(selector) if err != nil { glog.Errorf("Error getting pods for job %q: %v", key, err) jm.queue.Add(key) return err } activePods := controller.FilterActivePods(podList.Items) active := len(activePods) succeeded, failed := getStatus(podList.Items) if jobNeedsSync { active = jm.manageJob(activePods, succeeded, &job) } completions := succeeded if completions == *job.Spec.Completions { job.Status.Conditions = append(job.Status.Conditions, newCondition()) } // no need to update the job if the status hasn't changed since last time if job.Status.Active != active || job.Status.Succeeded != succeeded || job.Status.Failed != failed { job.Status.Active = active job.Status.Succeeded = succeeded job.Status.Failed = failed if err := jm.updateHandler(&job); err != nil { glog.Errorf("Failed to update job %v, requeuing. Error: %v", job.Name, err) jm.enqueueController(&job) } } return nil }
func TestControllerSyncJob(t *testing.T) { testCases := map[string]struct { // job setup parallelism int completions int // pod setup podControllerError error activePods int succeededPods int failedPods int // expectations expectedCreations int expectedDeletions int expectedActive int expectedSucceeded int expectedFailed int expectedComplete bool }{ "job start": { 2, 5, nil, 0, 0, 0, 2, 0, 2, 0, 0, false, }, "correct # of pods": { 2, 5, nil, 2, 0, 0, 0, 0, 2, 0, 0, false, }, "too few active pods": { 2, 5, nil, 1, 1, 0, 1, 0, 2, 1, 0, false, }, "too few active pods, with controller error": { 2, 5, fmt.Errorf("Fake error"), 1, 1, 0, 0, 0, 1, 1, 0, false, }, "too many active pods": { 2, 5, nil, 3, 0, 0, 0, 1, 2, 0, 0, false, }, "too many active pods, with controller error": { 2, 5, fmt.Errorf("Fake error"), 3, 0, 0, 0, 0, 3, 0, 0, false, }, "failed pod": { 2, 5, nil, 1, 1, 1, 1, 0, 2, 1, 1, false, }, "job finish": { 2, 5, nil, 0, 5, 0, 0, 0, 0, 5, 0, true, }, "more active pods than completions": { 2, 5, nil, 10, 0, 0, 0, 8, 2, 0, 0, false, }, "status change": { 2, 5, nil, 2, 2, 0, 0, 0, 2, 2, 0, false, }, } for name, tc := range testCases { // job manager setup client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Default.GroupAndVersion()}) manager := NewJobController(client, controller.NoResyncPeriodFunc) fakePodControl := controller.FakePodControl{Err: tc.podControllerError} manager.podControl = &fakePodControl var job *extensions.Job manager.podStoreSynced = func() bool { selector, _ := extensions.PodSelectorAsSelector(job.Spec.Selector) podList, _ := manager.podStore.Pods(job.Namespace).List(selector) active := len(controller.FilterActivePods(podList.Items)) succeeded, failed := getStatus(podList.Items) return active == tc.activePods && succeeded == tc.succeededPods && failed == tc.failedPods } var actual *extensions.Job manager.updateHandler = func(job *extensions.Job) error { actual = job return nil } // job & pods setup job = newJob(tc.parallelism, tc.completions) manager.jobStore.Store.Add(job) for _, pod := range newPodList(tc.activePods, api.PodRunning, job) { manager.podStore.Store.Add(&pod) } for _, pod := range newPodList(tc.succeededPods, api.PodSucceeded, job) { manager.podStore.Store.Add(&pod) } for _, pod := range newPodList(tc.failedPods, api.PodFailed, job) { manager.podStore.Store.Add(&pod) } // run err := manager.syncJob(getKey(job, t)) if err != nil { t.Errorf("%s: unexpected error when syncing jobs %v", err) } // validate created/deleted pods if len(fakePodControl.Templates) != tc.expectedCreations { t.Errorf("%s: unexpected number of creates. Expected %d, saw %d\n", name, tc.expectedCreations, len(fakePodControl.Templates)) } if len(fakePodControl.DeletePodName) != tc.expectedDeletions { t.Errorf("%s: unexpected number of deletes. Expected %d, saw %d\n", name, tc.expectedDeletions, len(fakePodControl.DeletePodName)) } // validate status if actual.Status.Active != tc.expectedActive { t.Errorf("%s: unexpected number of active pods. Expected %d, saw %d\n", name, tc.expectedActive, actual.Status.Active) } if actual.Status.Succeeded != tc.expectedSucceeded { t.Errorf("%s: unexpected number of succeeded pods. Expected %d, saw %d\n", name, tc.expectedSucceeded, actual.Status.Succeeded) } if actual.Status.Failed != tc.expectedFailed { t.Errorf("%s: unexpected number of failed pods. Expected %d, saw %d\n", name, tc.expectedFailed, actual.Status.Failed) } // validate conditions if tc.expectedComplete { completed := false for _, v := range actual.Status.Conditions { if v.Type == extensions.JobComplete && v.Status == api.ConditionTrue { completed = true break } } if !completed { t.Errorf("%s: expected completion condition. Got %v", name, actual.Status.Conditions) } } } }