// syncReplicationController will sync the rc with the given key if it has had its expectations fulfilled, meaning
// it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked
// concurrently with the same key.
func (rm *ReplicationManager) syncReplicationController(key string) error {
	startTime := time.Now()
	defer func() {
		glog.V(4).Infof("Finished syncing controller %q (%v)", key, time.Now().Sub(startTime))
	}()

	if !rm.podStoreSynced() {
		// Sleep so we give the pod reflector goroutine a chance to run.
		time.Sleep(PodStoreSyncedPollPeriod)
		glog.Infof("Waiting for pods controller to sync, requeuing rc %v", key)
		rm.queue.Add(key)
		return nil
	}

	obj, exists, err := rm.rcStore.Store.GetByKey(key)
	if !exists {
		glog.Infof("Replication Controller has been deleted %v", key)
		rm.expectations.DeleteExpectations(key)
		return nil
	}
	if err != nil {
		glog.Infof("Unable to retrieve rc %v from store: %v", key, err)
		rm.queue.Add(key)
		return err
	}
	rc := *obj.(*api.ReplicationController)

	// Check the expectations of the rc before counting active pods, otherwise a new pod can sneak in
	// and update the expectations after we've retrieved active pods from the store. If a new pod enters
	// the store after we've checked the expectation, the rc sync is just deferred till the next relist.
	rcKey, err := controller.KeyFunc(&rc)
	if err != nil {
		glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err)
		return err
	}
	rcNeedsSync := rm.expectations.SatisfiedExpectations(rcKey)
	podList, err := rm.podStore.Pods(rc.Namespace).List(labels.Set(rc.Spec.Selector).AsSelector())
	if err != nil {
		glog.Errorf("Error getting pods for rc %q: %v", key, err)
		rm.queue.Add(key)
		return err
	}

	// TODO: Do this in a single pass, or use an index.
	filteredPods := controller.FilterActivePods(podList.Items)
	if rcNeedsSync {
		rm.manageReplicas(filteredPods, &rc)
	}

	// Always updates status as pods come up or die.
	if err := updateReplicaCount(rm.kubeClient.Core().ReplicationControllers(rc.Namespace), rc, len(filteredPods)); err != nil {
		// Multiple things could lead to this update failing. Requeuing the controller ensures
		// we retry with some fairness.
		glog.V(2).Infof("Failed to update replica count for controller %v/%v; requeuing; error: %v", rc.Namespace, rc.Name, err)
		rm.enqueueController(&rc)
	}
	return nil
}
// syncReplicationController will sync the rc with the given key if it has had its expectations fulfilled, meaning
// it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked
// concurrently with the same key.
func (rm *ReplicationManager) syncReplicationController(key string) error {
	trace := util.NewTrace("syncReplicationController: " + key)
	defer trace.LogIfLong(250 * time.Millisecond)

	startTime := time.Now()
	defer func() {
		glog.V(4).Infof("Finished syncing controller %q (%v)", key, time.Now().Sub(startTime))
	}()

	if !rm.podStoreSynced() {
		// Sleep so we give the pod reflector goroutine a chance to run.
		time.Sleep(PodStoreSyncedPollPeriod)
		glog.Infof("Waiting for pods controller to sync, requeuing rc %v", key)
		rm.queue.Add(key)
		return nil
	}

	obj, exists, err := rm.rcStore.Indexer.GetByKey(key)
	if !exists {
		glog.Infof("Replication Controller has been deleted %v", key)
		rm.expectations.DeleteExpectations(key)
		return nil
	}
	if err != nil {
		return err
	}
	rc := *obj.(*api.ReplicationController)

	// Check the expectations of the rc before counting active pods, otherwise a new pod can sneak in
	// and update the expectations after we've retrieved active pods from the store. If a new pod enters
	// the store after we've checked the expectation, the rc sync is just deferred till the next relist.
	rcKey, err := controller.KeyFunc(&rc)
	if err != nil {
		glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err)
		return err
	}
	trace.Step("ReplicationController restored")
	rcNeedsSync := rm.expectations.SatisfiedExpectations(rcKey)
	trace.Step("Expectations restored")

	// NOTE: filteredPods are pointing to objects from cache - if you need to
	// modify them, you need to copy it first.
	// TODO: Do the List and Filter in a single pass, or use an index.
	var filteredPods []*api.Pod
	if rm.garbageCollectorEnabled {
		// list all pods to include the pods that don't match the rc's selector
		// anymore but has the stale controller ref.
		pods, err := rm.podStore.Pods(rc.Namespace).List(labels.Everything())
		if err != nil {
			glog.Errorf("Error getting pods for rc %q: %v", key, err)
			rm.queue.Add(key)
			return err
		}
		cm := controller.NewPodControllerRefManager(rm.podControl, rc.ObjectMeta, labels.Set(rc.Spec.Selector).AsSelectorPreValidated(), getRCKind())
		matchesAndControlled, matchesNeedsController, controlledDoesNotMatch := cm.Classify(pods)
		for _, pod := range matchesNeedsController {
			err := cm.AdoptPod(pod)
			// continue to next pod if adoption fails.
			if err != nil {
				// If the pod no longer exists, don't even log the error.
				if !errors.IsNotFound(err) {
					utilruntime.HandleError(err)
				}
			} else {
				matchesAndControlled = append(matchesAndControlled, pod)
			}
		}
		filteredPods = matchesAndControlled
		// remove the controllerRef for the pods that no longer have matching labels
		var errlist []error
		for _, pod := range controlledDoesNotMatch {
			err := cm.ReleasePod(pod)
			if err != nil {
				errlist = append(errlist, err)
			}
		}
		if len(errlist) != 0 {
			aggregate := utilerrors.NewAggregate(errlist)
			// push the RC into work queue again. We need to try to free the
			// pods again otherwise they will stuck with the stale
			// controllerRef.
			rm.queue.Add(key)
			return aggregate
		}
	} else {
		pods, err := rm.podStore.Pods(rc.Namespace).List(labels.Set(rc.Spec.Selector).AsSelectorPreValidated())
		if err != nil {
			glog.Errorf("Error getting pods for rc %q: %v", key, err)
			rm.queue.Add(key)
			return err
		}
		filteredPods = controller.FilterActivePods(pods)
	}

	var manageReplicasErr error
	if rcNeedsSync && rc.DeletionTimestamp == nil {
		manageReplicasErr = rm.manageReplicas(filteredPods, &rc)
	}
	trace.Step("manageReplicas done")

	// Count the number of pods that have labels matching the labels of the pod
	// template of the replication controller, the matching pods may have more
	// labels than are in the template. Because the label of podTemplateSpec is
	// a superset of the selector of the replication controller, so the possible
	// matching pods must be part of the filteredPods.
	fullyLabeledReplicasCount := 0
	readyReplicasCount := 0
	availableReplicasCount := 0
	templateLabel := labels.Set(rc.Spec.Template.Labels).AsSelectorPreValidated()
	for _, pod := range filteredPods {
		if templateLabel.Matches(labels.Set(pod.Labels)) {
			fullyLabeledReplicasCount++
		}
		if api.IsPodReady(pod) {
			readyReplicasCount++
			if api.IsPodAvailable(pod, rc.Spec.MinReadySeconds, unversioned.Now()) {
				availableReplicasCount++
			}
		}
	}

	// Always updates status as pods come up or die.
	if err := updateReplicaCount(
		rm.kubeClient.Core().ReplicationControllers(rc.Namespace),
		rc,
		len(filteredPods),
		fullyLabeledReplicasCount,
		readyReplicasCount,
		availableReplicasCount,
	); err != nil {
		// Multiple things could lead to this update failing.  Returning an error causes a requeue without forcing a hotloop
		return err
	}

	return manageReplicasErr
}
Example #3
0
// syncReplicaSet will sync the ReplicaSet with the given key if it has had its expectations fulfilled,
// meaning it did not expect to see any more of its pods created or deleted. This function is not meant to be
// invoked concurrently with the same key.
func (rsc *ReplicaSetController) syncReplicaSet(key string) error {
	startTime := time.Now()
	defer func() {
		glog.V(4).Infof("Finished syncing replica set %q (%v)", key, time.Now().Sub(startTime))
	}()

	obj, exists, err := rsc.rsStore.Store.GetByKey(key)
	if !exists {
		glog.Infof("ReplicaSet has been deleted %v", key)
		rsc.expectations.DeleteExpectations(key)
		return nil
	}
	if err != nil {
		glog.Infof("Unable to retrieve ReplicaSet %v from store: %v", key, err)
		rsc.queue.Add(key)
		return err
	}
	rs := *obj.(*extensions.ReplicaSet)
	if !rsc.podStoreSynced() {
		// Sleep so we give the pod reflector goroutine a chance to run.
		time.Sleep(PodStoreSyncedPollPeriod)
		glog.Infof("Waiting for pods controller to sync, requeuing ReplicaSet %v", rs.Name)
		rsc.enqueueReplicaSet(&rs)
		return nil
	}

	// Check the expectations of the ReplicaSet before counting active pods, otherwise a new pod can sneak
	// in and update the expectations after we've retrieved active pods from the store. If a new pod enters
	// the store after we've checked the expectation, the ReplicaSet sync is just deferred till the next
	// relist.
	rsKey, err := controller.KeyFunc(&rs)
	if err != nil {
		glog.Errorf("Couldn't get key for ReplicaSet %#v: %v", rs, err)
		return err
	}
	rsNeedsSync := rsc.expectations.SatisfiedExpectations(rsKey)
	selector, err := extensions.LabelSelectorAsSelector(rs.Spec.Selector)
	if err != nil {
		glog.Errorf("Error converting pod selector to selector: %v", err)
		return err
	}
	podList, err := rsc.podStore.Pods(rs.Namespace).List(selector)
	if err != nil {
		glog.Errorf("Error getting pods for ReplicaSet %q: %v", key, err)
		rsc.queue.Add(key)
		return err
	}

	// TODO: Do this in a single pass, or use an index.
	filteredPods := controller.FilterActivePods(podList.Items)
	if rsNeedsSync {
		rsc.manageReplicas(filteredPods, &rs)
	}

	// Always updates status as pods come up or die.
	if err := updateReplicaCount(rsc.kubeClient.Extensions().ReplicaSets(rs.Namespace), rs, len(filteredPods)); err != nil {
		// Multiple things could lead to this update failing. Requeuing the replica set ensures
		// we retry with some fairness.
		glog.V(2).Infof("Failed to update replica count for controller %v/%v; requeuing; error: %v", rs.Namespace, rs.Name, err)
		rsc.enqueueReplicaSet(&rs)
	}
	return nil
}
Example #4
0
// syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning
// it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked
// concurrently with the same key.
func (jm *JobController) syncJob(key string) error {
	startTime := time.Now()
	defer func() {
		glog.V(4).Infof("Finished syncing job %q (%v)", key, time.Now().Sub(startTime))
	}()

	obj, exists, err := jm.jobStore.Store.GetByKey(key)
	if !exists {
		glog.V(4).Infof("Job has been deleted: %v", key)
		jm.expectations.DeleteExpectations(key)
		return nil
	}
	if err != nil {
		glog.Errorf("Unable to retrieve job %v from store: %v", key, err)
		jm.queue.Add(key)
		return err
	}
	job := *obj.(*extensions.Job)
	if !jm.podStoreSynced() {
		// Sleep so we give the pod reflector goroutine a chance to run.
		time.Sleep(replicationcontroller.PodStoreSyncedPollPeriod)
		glog.V(4).Infof("Waiting for pods controller to sync, requeuing job %v", job.Name)
		jm.enqueueController(&job)
		return nil
	}

	// Check the expectations of the job before counting active pods, otherwise a new pod can sneak in
	// and update the expectations after we've retrieved active pods from the store. If a new pod enters
	// the store after we've checked the expectation, the job sync is just deferred till the next relist.
	jobKey, err := controller.KeyFunc(&job)
	if err != nil {
		glog.Errorf("Couldn't get key for job %#v: %v", job, err)
		return err
	}
	jobNeedsSync := jm.expectations.SatisfiedExpectations(jobKey)
	selector, _ := extensions.LabelSelectorAsSelector(job.Spec.Selector)
	podList, err := jm.podStore.Pods(job.Namespace).List(selector)
	if err != nil {
		glog.Errorf("Error getting pods for job %q: %v", key, err)
		jm.queue.Add(key)
		return err
	}

	activePods := controller.FilterActivePods(podList.Items)
	active := len(activePods)
	succeeded, failed := getStatus(podList.Items)
	conditions := len(job.Status.Conditions)
	if job.Status.StartTime == nil {
		now := unversioned.Now()
		job.Status.StartTime = &now
	}
	if pastActiveDeadline(&job) {
		// if job was finished previously, we don't want to redo the termination
		if isJobFinished(&job) {
			return nil
		}
		// TODO: below code should be replaced with pod termination resulting in
		// pod failures, rather than killing pods. Unfortunately none such solution
		// exists ATM. There's an open discussion in the topic in
		// https://github.com/kubernetes/kubernetes/issues/14602 which might give
		// some sort of solution to above problem.
		// kill remaining active pods
		wait := sync.WaitGroup{}
		wait.Add(active)
		for i := 0; i < active; i++ {
			go func(ix int) {
				defer wait.Done()
				if err := jm.podControl.DeletePod(job.Namespace, activePods[ix].Name); err != nil {
					defer util.HandleError(err)
				}
			}(i)
		}
		wait.Wait()
		// update status values accordingly
		failed += active
		active = 0
		job.Status.Conditions = append(job.Status.Conditions, newCondition(extensions.JobFailed, "DeadlineExceeded", "Job was active longer than specified deadline"))
		jm.recorder.Event(&job, api.EventTypeNormal, "DeadlineExceeded", "Job was active longer than specified deadline")
	} else {
		if jobNeedsSync {
			active = jm.manageJob(activePods, succeeded, &job)
		}
		completions := succeeded
		if completions == *job.Spec.Completions {
			job.Status.Conditions = append(job.Status.Conditions, newCondition(extensions.JobComplete, "", ""))
			now := unversioned.Now()
			job.Status.CompletionTime = &now
		}
	}

	// no need to update the job if the status hasn't changed since last time
	if job.Status.Active != active || job.Status.Succeeded != succeeded || job.Status.Failed != failed || len(job.Status.Conditions) != conditions {
		job.Status.Active = active
		job.Status.Succeeded = succeeded
		job.Status.Failed = failed

		if err := jm.updateHandler(&job); err != nil {
			glog.Errorf("Failed to update job %v, requeuing.  Error: %v", job.Name, err)
			jm.enqueueController(&job)
		}
	}
	return nil
}
Example #5
0
// syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning
// it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked
// concurrently with the same key.
func (jm *JobController) syncJob(key string) error {
	startTime := time.Now()
	defer func() {
		glog.V(4).Infof("Finished syncing job %q (%v)", key, time.Now().Sub(startTime))
	}()

	obj, exists, err := jm.jobStore.Store.GetByKey(key)
	if !exists {
		glog.V(4).Infof("Job has been deleted: %v", key)
		jm.expectations.DeleteExpectations(key)
		return nil
	}
	if err != nil {
		glog.Errorf("Unable to retrieve job %v from store: %v", key, err)
		jm.queue.Add(key)
		return err
	}
	job := *obj.(*experimental.Job)
	if !jm.podStoreSynced() {
		// Sleep so we give the pod reflector goroutine a chance to run.
		time.Sleep(replicationcontroller.PodStoreSyncedPollPeriod)
		glog.V(4).Infof("Waiting for pods controller to sync, requeuing job %v", job.Name)
		jm.enqueueController(&job)
		return nil
	}

	// Check the expectations of the job before counting active pods, otherwise a new pod can sneak in
	// and update the expectations after we've retrieved active pods from the store. If a new pod enters
	// the store after we've checked the expectation, the job sync is just deferred till the next relist.
	jobKey, err := controller.KeyFunc(&job)
	if err != nil {
		glog.Errorf("Couldn't get key for job %#v: %v", job, err)
		return err
	}
	jobNeedsSync := jm.expectations.SatisfiedExpectations(jobKey)
	podList, err := jm.podStore.Pods(job.Namespace).List(labels.Set(job.Spec.Selector).AsSelector())
	if err != nil {
		glog.Errorf("Error getting pods for job %q: %v", key, err)
		jm.queue.Add(key)
		return err
	}

	activePods := controller.FilterActivePods(podList.Items)
	active := len(activePods)
	successful, unsuccessful := getStatus(podList.Items)
	if jobNeedsSync {
		active = jm.manageJob(activePods, successful, unsuccessful, &job)
	}
	completions := successful
	if completions == *job.Spec.Completions {
		job.Status.Conditions = append(job.Status.Conditions, newCondition())
	}

	// no need to update the job if the status hasn't changed since last time
	if job.Status.Active != active || job.Status.Successful != successful || job.Status.Unsuccessful != unsuccessful {
		job.Status.Active = active
		job.Status.Successful = successful
		job.Status.Unsuccessful = unsuccessful

		if err := jm.updateHandler(&job); err != nil {
			glog.Errorf("Failed to update job %v, requeuing.  Error: %v", job.Name, err)
			jm.enqueueController(&job)
		}
	}
	return nil
}
Example #6
0
// syncReplicaSet will sync the ReplicaSet with the given key if it has had its expectations fulfilled,
// meaning it did not expect to see any more of its pods created or deleted. This function is not meant to be
// invoked concurrently with the same key.
func (rsc *ReplicaSetController) syncReplicaSet(key string) error {
	startTime := time.Now()
	defer func() {
		glog.V(4).Infof("Finished syncing replica set %q (%v)", key, time.Now().Sub(startTime))
	}()

	if !rsc.podStoreSynced() {
		// Sleep so we give the pod reflector goroutine a chance to run.
		time.Sleep(PodStoreSyncedPollPeriod)
		glog.Infof("Waiting for pods controller to sync, requeuing ReplicaSet %v", key)
		rsc.queue.Add(key)
		return nil
	}

	obj, exists, err := rsc.rsStore.Store.GetByKey(key)
	if !exists {
		glog.Infof("ReplicaSet has been deleted %v", key)
		rsc.expectations.DeleteExpectations(key)
		return nil
	}
	if err != nil {
		glog.Infof("Unable to retrieve ReplicaSet %v from store: %v", key, err)
		rsc.queue.Add(key)
		return err
	}
	rs := *obj.(*extensions.ReplicaSet)

	// Check the expectations of the ReplicaSet before counting active pods, otherwise a new pod can sneak
	// in and update the expectations after we've retrieved active pods from the store. If a new pod enters
	// the store after we've checked the expectation, the ReplicaSet sync is just deferred till the next
	// relist.
	rsKey, err := controller.KeyFunc(&rs)
	if err != nil {
		glog.Errorf("Couldn't get key for ReplicaSet %#v: %v", rs, err)
		return err
	}
	rsNeedsSync := rsc.expectations.SatisfiedExpectations(rsKey)
	selector, err := unversioned.LabelSelectorAsSelector(rs.Spec.Selector)
	if err != nil {
		glog.Errorf("Error converting pod selector to selector: %v", err)
		return err
	}

	// TODO: Do the List and Filter in a single pass, or use an index.
	var filteredPods []*api.Pod
	if rsc.garbageCollectorEnabled {
		// list all pods to include the pods that don't match the rs`s selector
		// anymore but has the stale controller ref.
		podList, err := rsc.podStore.Pods(rs.Namespace).List(labels.Everything())
		if err != nil {
			glog.Errorf("Error getting pods for rs %q: %v", key, err)
			rsc.queue.Add(key)
			return err
		}
		cm := controller.NewPodControllerRefManager(rsc.podControl, rs.ObjectMeta, selector, getRSKind())
		matchesAndControlled, matchesNeedsController, controlledDoesNotMatch := cm.Classify(podList.Items)
		for _, pod := range matchesNeedsController {
			err := cm.AdoptPod(pod)
			// continue to next pod if adoption fails.
			if err != nil {
				// If the pod no longer exists, don't even log the error.
				if !errors.IsNotFound(err) {
					utilruntime.HandleError(err)
				}
			} else {
				matchesAndControlled = append(matchesAndControlled, pod)
			}
		}
		filteredPods = matchesAndControlled
		// remove the controllerRef for the pods that no longer have matching labels
		var errlist []error
		for _, pod := range controlledDoesNotMatch {
			err := cm.ReleasePod(pod)
			if err != nil {
				errlist = append(errlist, err)
			}
		}
		if len(errlist) != 0 {
			aggregate := utilerrors.NewAggregate(errlist)
			// push the RS into work queue again. We need to try to free the
			// pods again otherwise they will stuck with the stale
			// controllerRef.
			rsc.queue.Add(key)
			return aggregate
		}
	} else {
		podList, err := rsc.podStore.Pods(rs.Namespace).List(selector)
		if err != nil {
			glog.Errorf("Error getting pods for rs %q: %v", key, err)
			rsc.queue.Add(key)
			return err
		}
		filteredPods = controller.FilterActivePods(podList.Items)
	}

	if rsNeedsSync && rs.DeletionTimestamp == nil {
		rsc.manageReplicas(filteredPods, &rs)
	}

	// Count the number of pods that have labels matching the labels of the pod
	// template of the replicaSet, the matching pods may have more labels than
	// are in the template. Because the label of podTemplateSpec is a superset
	// of the selector of the replicaset, so the possible matching pods must be
	// part of the filteredPods.
	fullyLabeledReplicasCount := 0
	templateLabel := labels.Set(rs.Spec.Template.Labels).AsSelector()
	for _, pod := range filteredPods {
		if templateLabel.Matches(labels.Set(pod.Labels)) {
			fullyLabeledReplicasCount++
		}
	}

	// Always updates status as pods come up or die.
	if err := updateReplicaCount(rsc.kubeClient.Extensions().ReplicaSets(rs.Namespace), rs, len(filteredPods), fullyLabeledReplicasCount); err != nil {
		// Multiple things could lead to this update failing. Requeuing the replica set ensures
		// we retry with some fairness.
		glog.V(2).Infof("Failed to update replica count for controller %v/%v; requeuing; error: %v", rs.Namespace, rs.Name, err)
		rsc.enqueueReplicaSet(&rs)
	}
	return nil
}
// syncReplicationController will sync the rc with the given key if it has had its expectations fulfilled, meaning
// it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked
// concurrently with the same key.
func (rm *ReplicationManager) syncReplicationController(key string) error {
	trace := util.NewTrace("syncReplicationController: " + key)
	defer trace.LogIfLong(250 * time.Millisecond)

	startTime := time.Now()
	defer func() {
		glog.V(4).Infof("Finished syncing controller %q (%v)", key, time.Now().Sub(startTime))
	}()

	if !rm.podStoreSynced() {
		// Sleep so we give the pod reflector goroutine a chance to run.
		time.Sleep(PodStoreSyncedPollPeriod)
		glog.Infof("Waiting for pods controller to sync, requeuing rc %v", key)
		rm.queue.Add(key)
		return nil
	}

	obj, exists, err := rm.rcStore.Indexer.GetByKey(key)
	if !exists {
		glog.Infof("Replication Controller has been deleted %v", key)
		rm.expectations.DeleteExpectations(key)
		return nil
	}
	if err != nil {
		glog.Infof("Unable to retrieve rc %v from store: %v", key, err)
		rm.queue.Add(key)
		return err
	}
	rc := *obj.(*api.ReplicationController)

	// Check the expectations of the rc before counting active pods, otherwise a new pod can sneak in
	// and update the expectations after we've retrieved active pods from the store. If a new pod enters
	// the store after we've checked the expectation, the rc sync is just deferred till the next relist.
	rcKey, err := controller.KeyFunc(&rc)
	if err != nil {
		glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err)
		return err
	}
	trace.Step("ReplicationController restored")
	rcNeedsSync := rm.expectations.SatisfiedExpectations(rcKey)
	trace.Step("Expectations restored")
	podList, err := rm.podStore.Pods(rc.Namespace).List(labels.Set(rc.Spec.Selector).AsSelector())
	if err != nil {
		glog.Errorf("Error getting pods for rc %q: %v", key, err)
		rm.queue.Add(key)
		return err
	}
	trace.Step("Pods listed")

	// TODO: Do this in a single pass, or use an index.
	filteredPods := controller.FilterActivePods(podList.Items)
	if rcNeedsSync {
		rm.manageReplicas(filteredPods, &rc)
	}
	trace.Step("manageReplicas done")

	// Count the number of pods that have labels matching the labels of the pod
	// template of the replication controller, the matching pods may have more
	// labels than are in the template. Because the label of podTemplateSpec is
	// a superset of the selector of the replication controller, so the possible
	// matching pods must be part of the filteredPods.
	fullyLabeledReplicasCount := 0
	templateLabel := labels.Set(rc.Spec.Template.Labels).AsSelector()
	for _, pod := range filteredPods {
		if templateLabel.Matches(labels.Set(pod.Labels)) {
			fullyLabeledReplicasCount++
		}
	}

	// Always updates status as pods come up or die.
	if err := updateReplicaCount(rm.kubeClient.Core().ReplicationControllers(rc.Namespace), rc, len(filteredPods), fullyLabeledReplicasCount); err != nil {
		// Multiple things could lead to this update failing. Requeuing the controller ensures
		// we retry with some fairness.
		glog.V(2).Infof("Failed to update replica count for controller %v/%v; requeuing; error: %v", rc.Namespace, rc.Name, err)
		rm.enqueueController(&rc)
	}
	return nil
}
Example #8
0
// syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning
// it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked
// concurrently with the same key.
func (jm *JobController) syncJob(key string) error {
	startTime := time.Now()
	defer func() {
		glog.V(4).Infof("Finished syncing job %q (%v)", key, time.Now().Sub(startTime))
	}()

	obj, exists, err := jm.jobStore.Store.GetByKey(key)
	if !exists {
		glog.V(4).Infof("Job has been deleted: %v", key)
		jm.expectations.DeleteExpectations(key)
		return nil
	}
	if err != nil {
		return err
	}
	job := *obj.(*batch.Job)

	// Check the expectations of the job before counting active pods, otherwise a new pod can sneak in
	// and update the expectations after we've retrieved active pods from the store. If a new pod enters
	// the store after we've checked the expectation, the job sync is just deferred till the next relist.
	jobNeedsSync := jm.expectations.SatisfiedExpectations(key)
	selector, _ := unversioned.LabelSelectorAsSelector(job.Spec.Selector)
	pods, err := jm.podStore.Pods(job.Namespace).List(selector)
	if err != nil {
		return err
	}

	activePods := controller.FilterActivePods(pods)
	active := int32(len(activePods))
	succeeded, failed := getStatus(pods)
	conditions := len(job.Status.Conditions)
	if job.Status.StartTime == nil {
		now := unversioned.Now()
		job.Status.StartTime = &now
	}
	// if job was finished previously, we don't want to redo the termination
	if IsJobFinished(&job) {
		return nil
	}
	if pastActiveDeadline(&job) {
		// TODO: below code should be replaced with pod termination resulting in
		// pod failures, rather than killing pods. Unfortunately none such solution
		// exists ATM. There's an open discussion in the topic in
		// https://github.com/kubernetes/kubernetes/issues/14602 which might give
		// some sort of solution to above problem.
		// kill remaining active pods
		wait := sync.WaitGroup{}
		wait.Add(int(active))
		for i := int32(0); i < active; i++ {
			go func(ix int32) {
				defer wait.Done()
				if err := jm.podControl.DeletePod(job.Namespace, activePods[ix].Name, &job); err != nil {
					defer utilruntime.HandleError(err)
				}
			}(i)
		}
		wait.Wait()
		// update status values accordingly
		failed += active
		active = 0
		job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobFailed, "DeadlineExceeded", "Job was active longer than specified deadline"))
		jm.recorder.Event(&job, api.EventTypeNormal, "DeadlineExceeded", "Job was active longer than specified deadline")
	} else {
		if jobNeedsSync && job.DeletionTimestamp == nil {
			active = jm.manageJob(activePods, succeeded, &job)
		}
		completions := succeeded
		complete := false
		if job.Spec.Completions == nil {
			// This type of job is complete when any pod exits with success.
			// Each pod is capable of
			// determining whether or not the entire Job is done.  Subsequent pods are
			// not expected to fail, but if they do, the failure is ignored.  Once any
			// pod succeeds, the controller waits for remaining pods to finish, and
			// then the job is complete.
			if succeeded > 0 && active == 0 {
				complete = true
			}
		} else {
			// Job specifies a number of completions.  This type of job signals
			// success by having that number of successes.  Since we do not
			// start more pods than there are remaining completions, there should
			// not be any remaining active pods once this count is reached.
			if completions >= *job.Spec.Completions {
				complete = true
				if active > 0 {
					jm.recorder.Event(&job, api.EventTypeWarning, "TooManyActivePods", "Too many active pods running after completion count reached")
				}
				if completions > *job.Spec.Completions {
					jm.recorder.Event(&job, api.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached")
				}
			}
		}
		if complete {
			job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobComplete, "", ""))
			now := unversioned.Now()
			job.Status.CompletionTime = &now
		}
	}

	// no need to update the job if the status hasn't changed since last time
	if job.Status.Active != active || job.Status.Succeeded != succeeded || job.Status.Failed != failed || len(job.Status.Conditions) != conditions {
		job.Status.Active = active
		job.Status.Succeeded = succeeded
		job.Status.Failed = failed

		if err := jm.updateHandler(&job); err != nil {
			return err
		}
	}
	return nil
}
// syncReplicaSet will sync the ReplicaSet with the given key if it has had its expectations fulfilled,
// meaning it did not expect to see any more of its pods created or deleted. This function is not meant to be
// invoked concurrently with the same key.
func (rsc *ReplicaSetController) syncReplicaSet(key string) error {
	startTime := time.Now()
	defer func() {
		glog.V(4).Infof("Finished syncing replica set %q (%v)", key, time.Now().Sub(startTime))
	}()

	obj, exists, err := rsc.rsLister.Indexer.GetByKey(key)
	if !exists {
		glog.V(4).Infof("ReplicaSet has been deleted %v", key)
		rsc.expectations.DeleteExpectations(key)
		return nil
	}
	if err != nil {
		return err
	}
	rs := *obj.(*extensions.ReplicaSet)

	// Check the expectations of the ReplicaSet before counting active pods, otherwise a new pod can sneak
	// in and update the expectations after we've retrieved active pods from the store. If a new pod enters
	// the store after we've checked the expectation, the ReplicaSet sync is just deferred till the next
	// relist.
	if err != nil {
		utilruntime.HandleError(fmt.Errorf("Couldn't get key for ReplicaSet %#v: %v", rs, err))
		// Explicitly return nil to avoid re-enqueue bad key
		return nil
	}
	rsNeedsSync := rsc.expectations.SatisfiedExpectations(key)
	selector, err := unversioned.LabelSelectorAsSelector(rs.Spec.Selector)
	if err != nil {
		utilruntime.HandleError(fmt.Errorf("Error converting pod selector to selector: %v", err))
		return nil
	}

	// NOTE: filteredPods are pointing to objects from cache - if you need to
	// modify them, you need to copy it first.
	// TODO: Do the List and Filter in a single pass, or use an index.
	var filteredPods []*api.Pod
	if rsc.garbageCollectorEnabled {
		// list all pods to include the pods that don't match the rs`s selector
		// anymore but has the stale controller ref.
		pods, err := rsc.podLister.Pods(rs.Namespace).List(labels.Everything())
		if err != nil {
			return err
		}
		cm := controller.NewPodControllerRefManager(rsc.podControl, rs.ObjectMeta, selector, getRSKind())
		matchesAndControlled, matchesNeedsController, controlledDoesNotMatch := cm.Classify(pods)
		for _, pod := range matchesNeedsController {
			err := cm.AdoptPod(pod)
			// continue to next pod if adoption fails.
			if err != nil {
				// If the pod no longer exists, don't even log the error.
				if !errors.IsNotFound(err) {
					utilruntime.HandleError(err)
				}
			} else {
				matchesAndControlled = append(matchesAndControlled, pod)
			}
		}
		filteredPods = matchesAndControlled
		// remove the controllerRef for the pods that no longer have matching labels
		var errlist []error
		for _, pod := range controlledDoesNotMatch {
			err := cm.ReleasePod(pod)
			if err != nil {
				errlist = append(errlist, err)
			}
		}
		if len(errlist) != 0 {
			aggregate := utilerrors.NewAggregate(errlist)
			// push the RS into work queue again. We need to try to free the
			// pods again otherwise they will stuck with the stale
			// controllerRef.
			return aggregate
		}
	} else {
		pods, err := rsc.podLister.Pods(rs.Namespace).List(selector)
		if err != nil {
			return err
		}
		filteredPods = controller.FilterActivePods(pods)
	}

	var manageReplicasErr error
	if rsNeedsSync && rs.DeletionTimestamp == nil {
		manageReplicasErr = rsc.manageReplicas(filteredPods, &rs)
	}

	// Count the number of pods that have labels matching the labels of the pod
	// template of the replicaSet, the matching pods may have more labels than
	// are in the template. Because the label of podTemplateSpec is a superset
	// of the selector of the replicaset, so the possible matching pods must be
	// part of the filteredPods.
	fullyLabeledReplicasCount := 0
	readyReplicasCount := 0
	availableReplicasCount := 0
	templateLabel := labels.Set(rs.Spec.Template.Labels).AsSelectorPreValidated()
	for _, pod := range filteredPods {
		if templateLabel.Matches(labels.Set(pod.Labels)) {
			fullyLabeledReplicasCount++
		}
		if api.IsPodReady(pod) {
			readyReplicasCount++
			if api.IsPodAvailable(pod, rs.Spec.MinReadySeconds, unversioned.Now()) {
				availableReplicasCount++
			}
		}
	}

	// Always updates status as pods come up or die.
	if err := updateReplicaCount(
		rsc.kubeClient.Extensions().ReplicaSets(rs.Namespace),
		rs,
		len(filteredPods),
		fullyLabeledReplicasCount,
		readyReplicasCount,
		availableReplicasCount,
	); err != nil {
		// Multiple things could lead to this update failing. Requeuing the replica set ensures
		// Returning an error causes a requeue without forcing a hotloop
		return err
	}
	return manageReplicasErr
}
Example #10
0
func TestControllerSyncJob(t *testing.T) {
	testCases := map[string]struct {
		// job setup
		parallelism int
		completions int

		// pod setup
		podControllerError error
		activePods         int
		succeededPods      int
		failedPods         int

		// expectations
		expectedCreations int
		expectedDeletions int
		expectedActive    int
		expectedSucceeded int
		expectedFailed    int
		expectedComplete  bool
	}{
		"job start": {
			2, 5,
			nil, 0, 0, 0,
			2, 0, 2, 0, 0, false,
		},
		"correct # of pods": {
			2, 5,
			nil, 2, 0, 0,
			0, 0, 2, 0, 0, false,
		},
		"too few active pods": {
			2, 5,
			nil, 1, 1, 0,
			1, 0, 2, 1, 0, false,
		},
		"too few active pods, with controller error": {
			2, 5,
			fmt.Errorf("Fake error"), 1, 1, 0,
			0, 0, 1, 1, 0, false,
		},
		"too many active pods": {
			2, 5,
			nil, 3, 0, 0,
			0, 1, 2, 0, 0, false,
		},
		"too many active pods, with controller error": {
			2, 5,
			fmt.Errorf("Fake error"), 3, 0, 0,
			0, 0, 3, 0, 0, false,
		},
		"failed pod": {
			2, 5,
			nil, 1, 1, 1,
			1, 0, 2, 1, 1, false,
		},
		"job finish": {
			2, 5,
			nil, 0, 5, 0,
			0, 0, 0, 5, 0, true,
		},
		"more active pods than completions": {
			2, 5,
			nil, 10, 0, 0,
			0, 8, 2, 0, 0, false,
		},
		"status change": {
			2, 5,
			nil, 2, 2, 0,
			0, 0, 2, 2, 0, false,
		},
	}

	for name, tc := range testCases {
		// job manager setup
		client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Default.GroupAndVersion()})
		manager := NewJobController(client, controller.NoResyncPeriodFunc)
		fakePodControl := controller.FakePodControl{Err: tc.podControllerError}
		manager.podControl = &fakePodControl
		var job *extensions.Job
		manager.podStoreSynced = func() bool {
			selector, _ := extensions.PodSelectorAsSelector(job.Spec.Selector)
			podList, _ := manager.podStore.Pods(job.Namespace).List(selector)
			active := len(controller.FilterActivePods(podList.Items))
			succeeded, failed := getStatus(podList.Items)
			return active == tc.activePods && succeeded == tc.succeededPods && failed == tc.failedPods
		}
		var actual *extensions.Job
		manager.updateHandler = func(job *extensions.Job) error {
			actual = job
			return nil
		}

		// job & pods setup
		job = newJob(tc.parallelism, tc.completions)
		manager.jobStore.Store.Add(job)
		for _, pod := range newPodList(tc.activePods, api.PodRunning, job) {
			manager.podStore.Store.Add(&pod)
		}
		for _, pod := range newPodList(tc.succeededPods, api.PodSucceeded, job) {
			manager.podStore.Store.Add(&pod)
		}
		for _, pod := range newPodList(tc.failedPods, api.PodFailed, job) {
			manager.podStore.Store.Add(&pod)
		}

		// run
		err := manager.syncJob(getKey(job, t))
		if err != nil {
			t.Errorf("%s: unexpected error when syncing jobs %v", err)
		}

		// validate created/deleted pods
		if len(fakePodControl.Templates) != tc.expectedCreations {
			t.Errorf("%s: unexpected number of creates.  Expected %d, saw %d\n", name, tc.expectedCreations, len(fakePodControl.Templates))
		}
		if len(fakePodControl.DeletePodName) != tc.expectedDeletions {
			t.Errorf("%s: unexpected number of deletes.  Expected %d, saw %d\n", name, tc.expectedDeletions, len(fakePodControl.DeletePodName))
		}
		// validate status
		if actual.Status.Active != tc.expectedActive {
			t.Errorf("%s: unexpected number of active pods.  Expected %d, saw %d\n", name, tc.expectedActive, actual.Status.Active)
		}
		if actual.Status.Succeeded != tc.expectedSucceeded {
			t.Errorf("%s: unexpected number of succeeded pods.  Expected %d, saw %d\n", name, tc.expectedSucceeded, actual.Status.Succeeded)
		}
		if actual.Status.Failed != tc.expectedFailed {
			t.Errorf("%s: unexpected number of failed pods.  Expected %d, saw %d\n", name, tc.expectedFailed, actual.Status.Failed)
		}
		// validate conditions
		if tc.expectedComplete {
			completed := false
			for _, v := range actual.Status.Conditions {
				if v.Type == extensions.JobComplete && v.Status == api.ConditionTrue {
					completed = true
					break
				}
			}
			if !completed {
				t.Errorf("%s: expected completion condition.  Got %v", name, actual.Status.Conditions)
			}
		}
	}
}
Example #11
0
// syncReplicaSet will sync the ReplicaSet with the given key if it has had its expectations fulfilled,
// meaning it did not expect to see any more of its pods created or deleted. This function is not meant to be
// invoked concurrently with the same key.
func (rsc *ReplicaSetController) syncReplicaSet(key string) error {
	startTime := time.Now()
	defer func() {
		glog.V(4).Infof("Finished syncing replica set %q (%v)", key, time.Now().Sub(startTime))
	}()

	obj, exists, err := rsc.rsLister.Indexer.GetByKey(key)
	if !exists {
		glog.V(4).Infof("ReplicaSet has been deleted %v", key)
		rsc.expectations.DeleteExpectations(key)
		return nil
	}
	if err != nil {
		return err
	}
	rs := *obj.(*extensions.ReplicaSet)

	rsNeedsSync := rsc.expectations.SatisfiedExpectations(key)
	selector, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector)
	if err != nil {
		utilruntime.HandleError(fmt.Errorf("Error converting pod selector to selector: %v", err))
		return nil
	}

	// NOTE: filteredPods are pointing to objects from cache - if you need to
	// modify them, you need to copy it first.
	// TODO: Do the List and Filter in a single pass, or use an index.
	var filteredPods []*v1.Pod
	if rsc.garbageCollectorEnabled {
		// list all pods to include the pods that don't match the rs`s selector
		// anymore but has the stale controller ref.
		pods, err := rsc.podLister.Pods(rs.Namespace).List(labels.Everything())
		if err != nil {
			return err
		}
		cm := controller.NewPodControllerRefManager(rsc.podControl, rs.ObjectMeta, selector, getRSKind())
		matchesAndControlled, matchesNeedsController, controlledDoesNotMatch := cm.Classify(pods)
		// Adopt pods only if this replica set is not going to be deleted.
		if rs.DeletionTimestamp == nil {
			for _, pod := range matchesNeedsController {
				err := cm.AdoptPod(pod)
				// continue to next pod if adoption fails.
				if err != nil {
					// If the pod no longer exists, don't even log the error.
					if !errors.IsNotFound(err) {
						utilruntime.HandleError(err)
					}
				} else {
					matchesAndControlled = append(matchesAndControlled, pod)
				}
			}
		}
		filteredPods = matchesAndControlled
		// remove the controllerRef for the pods that no longer have matching labels
		var errlist []error
		for _, pod := range controlledDoesNotMatch {
			err := cm.ReleasePod(pod)
			if err != nil {
				errlist = append(errlist, err)
			}
		}
		if len(errlist) != 0 {
			aggregate := utilerrors.NewAggregate(errlist)
			// push the RS into work queue again. We need to try to free the
			// pods again otherwise they will stuck with the stale
			// controllerRef.
			return aggregate
		}
	} else {
		pods, err := rsc.podLister.Pods(rs.Namespace).List(selector)
		if err != nil {
			return err
		}
		filteredPods = controller.FilterActivePods(pods)
	}

	var manageReplicasErr error
	if rsNeedsSync && rs.DeletionTimestamp == nil {
		manageReplicasErr = rsc.manageReplicas(filteredPods, &rs)
	}

	newStatus := calculateStatus(rs, filteredPods, manageReplicasErr)

	// Always updates status as pods come up or die.
	if err := updateReplicaSetStatus(rsc.kubeClient.Extensions().ReplicaSets(rs.Namespace), rs, newStatus); err != nil {
		// Multiple things could lead to this update failing. Requeuing the replica set ensures
		// Returning an error causes a requeue without forcing a hotloop
		return err
	}
	return manageReplicasErr
}
Example #12
0
// syncReplicationController will sync the rc with the given key if it has had its expectations fulfilled, meaning
// it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked
// concurrently with the same key.
func (rm *ReplicationManager) syncReplicationController(key string) error {
	trace := util.NewTrace("syncReplicationController: " + key)
	defer trace.LogIfLong(250 * time.Millisecond)

	startTime := time.Now()
	defer func() {
		glog.V(4).Infof("Finished syncing controller %q (%v)", key, time.Now().Sub(startTime))
	}()

	if !rm.podStoreSynced() {
		// Sleep so we give the pod reflector goroutine a chance to run.
		time.Sleep(PodStoreSyncedPollPeriod)
		glog.Infof("Waiting for pods controller to sync, requeuing rc %v", key)
		rm.queue.Add(key)
		return nil
	}

	obj, exists, err := rm.rcStore.Indexer.GetByKey(key)
	if !exists {
		glog.Infof("Replication Controller has been deleted %v", key)
		rm.expectations.DeleteExpectations(key)
		return nil
	}
	if err != nil {
		return err
	}
	rc := *obj.(*v1.ReplicationController)

	trace.Step("ReplicationController restored")
	rcNeedsSync := rm.expectations.SatisfiedExpectations(key)
	trace.Step("Expectations restored")

	// NOTE: filteredPods are pointing to objects from cache - if you need to
	// modify them, you need to copy it first.
	// TODO: Do the List and Filter in a single pass, or use an index.
	var filteredPods []*v1.Pod
	if rm.garbageCollectorEnabled {
		// list all pods to include the pods that don't match the rc's selector
		// anymore but has the stale controller ref.
		pods, err := rm.podStore.Pods(rc.Namespace).List(labels.Everything())
		if err != nil {
			glog.Errorf("Error getting pods for rc %q: %v", key, err)
			rm.queue.Add(key)
			return err
		}
		cm := controller.NewPodControllerRefManager(rm.podControl, rc.ObjectMeta, labels.Set(rc.Spec.Selector).AsSelectorPreValidated(), getRCKind())
		matchesAndControlled, matchesNeedsController, controlledDoesNotMatch := cm.Classify(pods)
		// Adopt pods only if this replication controller is not going to be deleted.
		if rc.DeletionTimestamp == nil {
			for _, pod := range matchesNeedsController {
				err := cm.AdoptPod(pod)
				// continue to next pod if adoption fails.
				if err != nil {
					// If the pod no longer exists, don't even log the error.
					if !errors.IsNotFound(err) {
						utilruntime.HandleError(err)
					}
				} else {
					matchesAndControlled = append(matchesAndControlled, pod)
				}
			}
		}
		filteredPods = matchesAndControlled
		// remove the controllerRef for the pods that no longer have matching labels
		var errlist []error
		for _, pod := range controlledDoesNotMatch {
			err := cm.ReleasePod(pod)
			if err != nil {
				errlist = append(errlist, err)
			}
		}
		if len(errlist) != 0 {
			aggregate := utilerrors.NewAggregate(errlist)
			// push the RC into work queue again. We need to try to free the
			// pods again otherwise they will stuck with the stale
			// controllerRef.
			rm.queue.Add(key)
			return aggregate
		}
	} else {
		pods, err := rm.podStore.Pods(rc.Namespace).List(labels.Set(rc.Spec.Selector).AsSelectorPreValidated())
		if err != nil {
			glog.Errorf("Error getting pods for rc %q: %v", key, err)
			rm.queue.Add(key)
			return err
		}
		filteredPods = controller.FilterActivePods(pods)
	}

	var manageReplicasErr error
	if rcNeedsSync && rc.DeletionTimestamp == nil {
		manageReplicasErr = rm.manageReplicas(filteredPods, &rc)
	}
	trace.Step("manageReplicas done")

	newStatus := calculateStatus(rc, filteredPods, manageReplicasErr)

	// Always updates status as pods come up or die.
	if err := updateReplicationControllerStatus(rm.kubeClient.Core().ReplicationControllers(rc.Namespace), rc, newStatus); err != nil {
		// Multiple things could lead to this update failing.  Returning an error causes a requeue without forcing a hotloop
		return err
	}

	return manageReplicasErr
}