Exemplo n.º 1
0
func ValidateJobSpec(spec *extensions.JobSpec) errs.ValidationErrorList {
	allErrs := errs.ValidationErrorList{}

	if spec.Parallelism != nil && *spec.Parallelism < 0 {
		allErrs = append(allErrs, errs.NewFieldInvalid("parallelism", spec.Parallelism, isNegativeErrorMsg))
	}
	if spec.Completions != nil && *spec.Completions < 0 {
		allErrs = append(allErrs, errs.NewFieldInvalid("completions", spec.Completions, isNegativeErrorMsg))
	}
	if spec.Selector == nil {
		allErrs = append(allErrs, errs.NewFieldRequired("selector"))
	} else {
		allErrs = append(allErrs, ValidatePodSelector(spec.Selector).Prefix("selector")...)
	}

	if selector, err := extensions.PodSelectorAsSelector(spec.Selector); err == nil {
		labels := labels.Set(spec.Template.Labels)
		if !selector.Matches(labels) {
			allErrs = append(allErrs, errs.NewFieldInvalid("template.metadata.labels", spec.Template.Labels, "selector does not match template"))
		}
	}

	allErrs = append(allErrs, apivalidation.ValidatePodTemplateSpec(&spec.Template).Prefix("template")...)
	if spec.Template.Spec.RestartPolicy != api.RestartPolicyOnFailure &&
		spec.Template.Spec.RestartPolicy != api.RestartPolicyNever {
		allErrs = append(allErrs, errs.NewFieldValueNotSupported("template.spec.restartPolicy",
			spec.Template.Spec.RestartPolicy, []string{string(api.RestartPolicyOnFailure), string(api.RestartPolicyNever)}))
	}
	return allErrs
}
Exemplo n.º 2
0
// GetPodControllers returns a list of jobs managing a pod. Returns an error only if no matching jobs are found.
func (s *StoreToJobLister) GetPodJobs(pod *api.Pod) (jobs []extensions.Job, err error) {
	var selector labels.Selector
	var job extensions.Job

	if len(pod.Labels) == 0 {
		err = fmt.Errorf("No jobs found for pod %v because it has no labels", pod.Name)
		return
	}

	for _, m := range s.Store.List() {
		job = *m.(*extensions.Job)
		if job.Namespace != pod.Namespace {
			continue
		}

		selector, _ = extensions.PodSelectorAsSelector(job.Spec.Selector)
		if !selector.Matches(labels.Set(pod.Labels)) {
			continue
		}
		jobs = append(jobs, job)
	}
	if len(jobs) == 0 {
		err = fmt.Errorf("Could not find jobs for pod %s in namespace %s with labels: %v", pod.Name, pod.Namespace, pod.Labels)
	}
	return
}
Exemplo n.º 3
0
func ValidateJobSpec(spec *extensions.JobSpec) validation.ErrorList {
	allErrs := validation.ErrorList{}

	if spec.Parallelism != nil {
		allErrs = append(allErrs, apivalidation.ValidatePositiveField(int64(*spec.Parallelism), "parallelism")...)
	}
	if spec.Completions != nil {
		allErrs = append(allErrs, apivalidation.ValidatePositiveField(int64(*spec.Completions), "completions")...)
	}
	if spec.Selector == nil {
		allErrs = append(allErrs, validation.NewRequiredError("selector"))
	} else {
		allErrs = append(allErrs, ValidatePodSelector(spec.Selector).Prefix("selector")...)
	}

	if selector, err := extensions.PodSelectorAsSelector(spec.Selector); err == nil {
		labels := labels.Set(spec.Template.Labels)
		if !selector.Matches(labels) {
			allErrs = append(allErrs, validation.NewInvalidError("template.metadata.labels", spec.Template.Labels, "selector does not match template"))
		}
	}

	allErrs = append(allErrs, apivalidation.ValidatePodTemplateSpec(&spec.Template).Prefix("template")...)
	if spec.Template.Spec.RestartPolicy != api.RestartPolicyOnFailure &&
		spec.Template.Spec.RestartPolicy != api.RestartPolicyNever {
		allErrs = append(allErrs, validation.NewNotSupportedError("template.spec.restartPolicy",
			spec.Template.Spec.RestartPolicy, []string{string(api.RestartPolicyOnFailure), string(api.RestartPolicyNever)}))
	}
	return allErrs
}
Exemplo n.º 4
0
// ValidateDaemonSetSpec tests if required fields in the DaemonSetSpec are set.
func ValidateDaemonSetSpec(spec *extensions.DaemonSetSpec) validation.ErrorList {
	allErrs := validation.ErrorList{}

	allErrs = append(allErrs, ValidatePodSelector(spec.Selector)...)

	if spec.Template == nil {
		allErrs = append(allErrs, validation.NewRequiredError("template"))
		return allErrs
	}

	selector, err := extensions.PodSelectorAsSelector(spec.Selector)
	if err == nil && !selector.Matches(labels.Set(spec.Template.Labels)) {
		allErrs = append(allErrs, validation.NewInvalidError("template.metadata.labels", spec.Template.Labels, "selector does not match template"))
	}

	allErrs = append(allErrs, apivalidation.ValidatePodTemplateSpec(spec.Template).Prefix("template")...)
	// Daemons typically run on more than one node, so mark Read-Write persistent disks as invalid.
	allErrs = append(allErrs, apivalidation.ValidateReadOnlyPersistentDisks(spec.Template.Spec.Volumes).Prefix("template.spec.volumes")...)
	// RestartPolicy has already been first-order validated as per ValidatePodTemplateSpec().
	if spec.Template.Spec.RestartPolicy != api.RestartPolicyAlways {
		allErrs = append(allErrs, validation.NewNotSupportedError("template.spec.restartPolicy", spec.Template.Spec.RestartPolicy, []string{string(api.RestartPolicyAlways)}))
	}

	return allErrs
}
Exemplo n.º 5
0
// GetPodDaemonSets returns a list of daemon sets managing a pod.
// Returns an error if and only if no matching daemon sets are found.
func (s *StoreToDaemonSetLister) GetPodDaemonSets(pod *api.Pod) (daemonSets []extensions.DaemonSet, err error) {
	var selector labels.Selector
	var daemonSet extensions.DaemonSet

	if len(pod.Labels) == 0 {
		err = fmt.Errorf("No daemon sets found for pod %v because it has no labels", pod.Name)
		return
	}

	for _, m := range s.Store.List() {
		daemonSet = *m.(*extensions.DaemonSet)
		if daemonSet.Namespace != pod.Namespace {
			continue
		}
		selector, err = extensions.PodSelectorAsSelector(daemonSet.Spec.Selector)
		if err != nil {
			// this should not happen if the DaemonSet passed validation
			return nil, err
		}

		// If a daemonSet with a nil or empty selector creeps in, it should match nothing, not everything.
		if selector.Empty() || !selector.Matches(labels.Set(pod.Labels)) {
			continue
		}
		daemonSets = append(daemonSets, daemonSet)
	}
	if len(daemonSets) == 0 {
		err = fmt.Errorf("Could not find daemon set for pod %s in namespace %s with labels: %v", pod.Name, pod.Namespace, pod.Labels)
	}
	return
}
Exemplo n.º 6
0
// getNodesToDaemonSetPods returns a map from nodes to daemon pods (corresponding to ds) running on the nodes.
func (dsc *DaemonSetsController) getNodesToDaemonPods(ds *extensions.DaemonSet) (map[string][]*api.Pod, error) {
	nodeToDaemonPods := make(map[string][]*api.Pod)
	selector, err := extensions.PodSelectorAsSelector(ds.Spec.Selector)
	if err != nil {
		return nil, err
	}
	daemonPods, err := dsc.podStore.Pods(ds.Namespace).List(selector)
	if err != nil {
		return nodeToDaemonPods, err
	}
	for i := range daemonPods.Items {
		nodeName := daemonPods.Items[i].Spec.NodeName
		nodeToDaemonPods[nodeName] = append(nodeToDaemonPods[nodeName], &daemonPods.Items[i])
	}
	return nodeToDaemonPods, nil
}
Exemplo n.º 7
0
func (reaper *JobReaper) Stop(namespace, name string, timeout time.Duration, gracePeriod *api.DeleteOptions) error {
	jobs := reaper.Extensions().Jobs(namespace)
	pods := reaper.Pods(namespace)
	scaler, err := ScalerFor(extensions.Kind("Job"), *reaper)
	if err != nil {
		return err
	}
	job, err := jobs.Get(name)
	if err != nil {
		return err
	}
	if timeout == 0 {
		// we will never have more active pods than job.Spec.Parallelism
		parallelism := *job.Spec.Parallelism
		timeout = Timeout + time.Duration(10*parallelism)*time.Second
	}

	// TODO: handle overlapping jobs
	retry := NewRetryParams(reaper.pollInterval, reaper.timeout)
	waitForJobs := NewRetryParams(reaper.pollInterval, timeout)
	if err = scaler.Scale(namespace, name, 0, nil, retry, waitForJobs); err != nil {
		return err
	}
	// at this point only dead pods are left, that should be removed
	selector, _ := extensions.PodSelectorAsSelector(job.Spec.Selector)
	options := unversioned.ListOptions{LabelSelector: unversioned.LabelSelector{selector}}
	podList, err := pods.List(options)
	if err != nil {
		return err
	}
	errList := []error{}
	for _, pod := range podList.Items {
		if err := pods.Delete(pod.Name, gracePeriod); err != nil {
			errList = append(errList, err)
		}
	}
	if len(errList) > 0 {
		return utilerrors.NewAggregate(errList)
	}
	// once we have all the pods removed we can safely remove the job itself
	if err := jobs.Delete(name, gracePeriod); err != nil {
		return err
	}
	return nil
}
Exemplo n.º 8
0
// syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning
// it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked
// concurrently with the same key.
func (jm *JobController) syncJob(key string) error {
	startTime := time.Now()
	defer func() {
		glog.V(4).Infof("Finished syncing job %q (%v)", key, time.Now().Sub(startTime))
	}()

	obj, exists, err := jm.jobStore.Store.GetByKey(key)
	if !exists {
		glog.V(4).Infof("Job has been deleted: %v", key)
		jm.expectations.DeleteExpectations(key)
		return nil
	}
	if err != nil {
		glog.Errorf("Unable to retrieve job %v from store: %v", key, err)
		jm.queue.Add(key)
		return err
	}
	job := *obj.(*extensions.Job)
	if !jm.podStoreSynced() {
		// Sleep so we give the pod reflector goroutine a chance to run.
		time.Sleep(replicationcontroller.PodStoreSyncedPollPeriod)
		glog.V(4).Infof("Waiting for pods controller to sync, requeuing job %v", job.Name)
		jm.enqueueController(&job)
		return nil
	}

	// Check the expectations of the job before counting active pods, otherwise a new pod can sneak in
	// and update the expectations after we've retrieved active pods from the store. If a new pod enters
	// the store after we've checked the expectation, the job sync is just deferred till the next relist.
	jobKey, err := controller.KeyFunc(&job)
	if err != nil {
		glog.Errorf("Couldn't get key for job %#v: %v", job, err)
		return err
	}
	jobNeedsSync := jm.expectations.SatisfiedExpectations(jobKey)
	selector, _ := extensions.PodSelectorAsSelector(job.Spec.Selector)
	podList, err := jm.podStore.Pods(job.Namespace).List(selector)
	if err != nil {
		glog.Errorf("Error getting pods for job %q: %v", key, err)
		jm.queue.Add(key)
		return err
	}

	activePods := controller.FilterActivePods(podList.Items)
	active := len(activePods)
	succeeded, failed := getStatus(podList.Items)
	if jobNeedsSync {
		active = jm.manageJob(activePods, succeeded, &job)
	}
	completions := succeeded
	if completions == *job.Spec.Completions {
		job.Status.Conditions = append(job.Status.Conditions, newCondition())
	}

	// no need to update the job if the status hasn't changed since last time
	if job.Status.Active != active || job.Status.Succeeded != succeeded || job.Status.Failed != failed {
		job.Status.Active = active
		job.Status.Succeeded = succeeded
		job.Status.Failed = failed

		if err := jm.updateHandler(&job); err != nil {
			glog.Errorf("Failed to update job %v, requeuing.  Error: %v", job.Name, err)
			jm.enqueueController(&job)
		}
	}
	return nil
}
Exemplo n.º 9
0
func TestControllerSyncJob(t *testing.T) {
	testCases := map[string]struct {
		// job setup
		parallelism int
		completions int

		// pod setup
		podControllerError error
		activePods         int
		succeededPods      int
		failedPods         int

		// expectations
		expectedCreations int
		expectedDeletions int
		expectedActive    int
		expectedSucceeded int
		expectedFailed    int
		expectedComplete  bool
	}{
		"job start": {
			2, 5,
			nil, 0, 0, 0,
			2, 0, 2, 0, 0, false,
		},
		"correct # of pods": {
			2, 5,
			nil, 2, 0, 0,
			0, 0, 2, 0, 0, false,
		},
		"too few active pods": {
			2, 5,
			nil, 1, 1, 0,
			1, 0, 2, 1, 0, false,
		},
		"too few active pods, with controller error": {
			2, 5,
			fmt.Errorf("Fake error"), 1, 1, 0,
			0, 0, 1, 1, 0, false,
		},
		"too many active pods": {
			2, 5,
			nil, 3, 0, 0,
			0, 1, 2, 0, 0, false,
		},
		"too many active pods, with controller error": {
			2, 5,
			fmt.Errorf("Fake error"), 3, 0, 0,
			0, 0, 3, 0, 0, false,
		},
		"failed pod": {
			2, 5,
			nil, 1, 1, 1,
			1, 0, 2, 1, 1, false,
		},
		"job finish": {
			2, 5,
			nil, 0, 5, 0,
			0, 0, 0, 5, 0, true,
		},
		"more active pods than completions": {
			2, 5,
			nil, 10, 0, 0,
			0, 8, 2, 0, 0, false,
		},
		"status change": {
			2, 5,
			nil, 2, 2, 0,
			0, 0, 2, 2, 0, false,
		},
	}

	for name, tc := range testCases {
		// job manager setup
		client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Default.GroupAndVersion()})
		manager := NewJobController(client, controller.NoResyncPeriodFunc)
		fakePodControl := controller.FakePodControl{Err: tc.podControllerError}
		manager.podControl = &fakePodControl
		var job *extensions.Job
		manager.podStoreSynced = func() bool {
			selector, _ := extensions.PodSelectorAsSelector(job.Spec.Selector)
			podList, _ := manager.podStore.Pods(job.Namespace).List(selector)
			active := len(controller.FilterActivePods(podList.Items))
			succeeded, failed := getStatus(podList.Items)
			return active == tc.activePods && succeeded == tc.succeededPods && failed == tc.failedPods
		}
		var actual *extensions.Job
		manager.updateHandler = func(job *extensions.Job) error {
			actual = job
			return nil
		}

		// job & pods setup
		job = newJob(tc.parallelism, tc.completions)
		manager.jobStore.Store.Add(job)
		for _, pod := range newPodList(tc.activePods, api.PodRunning, job) {
			manager.podStore.Store.Add(&pod)
		}
		for _, pod := range newPodList(tc.succeededPods, api.PodSucceeded, job) {
			manager.podStore.Store.Add(&pod)
		}
		for _, pod := range newPodList(tc.failedPods, api.PodFailed, job) {
			manager.podStore.Store.Add(&pod)
		}

		// run
		err := manager.syncJob(getKey(job, t))
		if err != nil {
			t.Errorf("%s: unexpected error when syncing jobs %v", err)
		}

		// validate created/deleted pods
		if len(fakePodControl.Templates) != tc.expectedCreations {
			t.Errorf("%s: unexpected number of creates.  Expected %d, saw %d\n", name, tc.expectedCreations, len(fakePodControl.Templates))
		}
		if len(fakePodControl.DeletePodName) != tc.expectedDeletions {
			t.Errorf("%s: unexpected number of deletes.  Expected %d, saw %d\n", name, tc.expectedDeletions, len(fakePodControl.DeletePodName))
		}
		// validate status
		if actual.Status.Active != tc.expectedActive {
			t.Errorf("%s: unexpected number of active pods.  Expected %d, saw %d\n", name, tc.expectedActive, actual.Status.Active)
		}
		if actual.Status.Succeeded != tc.expectedSucceeded {
			t.Errorf("%s: unexpected number of succeeded pods.  Expected %d, saw %d\n", name, tc.expectedSucceeded, actual.Status.Succeeded)
		}
		if actual.Status.Failed != tc.expectedFailed {
			t.Errorf("%s: unexpected number of failed pods.  Expected %d, saw %d\n", name, tc.expectedFailed, actual.Status.Failed)
		}
		// validate conditions
		if tc.expectedComplete {
			completed := false
			for _, v := range actual.Status.Conditions {
				if v.Type == extensions.JobComplete && v.Status == api.ConditionTrue {
					completed = true
					break
				}
			}
			if !completed {
				t.Errorf("%s: expected completion condition.  Got %v", name, actual.Status.Conditions)
			}
		}
	}
}