Example #1
0
// implementation of scheduling plugin's Error func; see plugin/pkg/scheduler
func (k *errorHandler) handleSchedulingError(pod *api.Pod, schedulingErr error) {

	if schedulingErr == noSuchPodErr {
		log.V(2).Infof("Not rescheduling non-existent pod %v", pod.Name)
		return
	}

	log.Infof("Error scheduling %v: %v; retrying", pod.Name, schedulingErr)
	defer util.HandleCrash()

	// default upstream scheduler passes pod.Name as binding.PodID
	ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace)
	podKey, err := podtask.MakePodKey(ctx, pod.Name)
	if err != nil {
		log.Errorf("Failed to construct pod key, aborting scheduling for pod %v: %v", pod.Name, err)
		return
	}

	k.backoff.GC()
	k.api.Lock()
	defer k.api.Unlock()

	switch task, state := k.api.tasks().ForPod(podKey); state {
	case podtask.StateUnknown:
		// if we don't have a mapping here any more then someone deleted the pod
		log.V(2).Infof("Could not resolve pod to task, aborting pod reschdule: %s", podKey)
		return

	case podtask.StatePending:
		if task.Has(podtask.Launched) {
			log.V(2).Infof("Skipping re-scheduling for already-launched pod %v", podKey)
			return
		}
		breakoutEarly := queue.BreakChan(nil)
		if schedulingErr == noSuitableOffersErr {
			log.V(3).Infof("adding backoff breakout handler for pod %v", podKey)
			breakoutEarly = queue.BreakChan(k.api.offers().Listen(podKey, func(offer *mesos.Offer) bool {
				k.api.Lock()
				defer k.api.Unlock()
				switch task, state := k.api.tasks().Get(task.ID); state {
				case podtask.StatePending:
					// Assess fitness of pod with the current offer. The scheduler normally
					// "backs off" when it can't find an offer that matches up with a pod.
					// The backoff period for a pod can terminate sooner if an offer becomes
					// available that matches up.
					return !task.Has(podtask.Launched) && k.api.algorithm().FitPredicate()(task, offer, nil)
				default:
					// no point in continuing to check for matching offers
					return true
				}
			}))
		}
		delay := k.backoff.Get(podKey)
		log.V(3).Infof("requeuing pod %v with delay %v", podKey, delay)
		k.qr.requeue(&Pod{Pod: pod, delay: &delay, notify: breakoutEarly})

	default:
		log.V(2).Infof("Task is no longer pending, aborting reschedule for pod %v", podKey)
	}
}
func New(c *config.Config, fw framework.Framework, ps podschedulers.PodScheduler,
	client *client.Client, recorder record.EventRecorder, terminate <-chan struct{}, mux *http.ServeMux, lw *cache.ListWatch) scheduler.Scheduler {

	core := &sched{
		framework:    fw,
		taskRegistry: podtask.NewInMemoryRegistry(),
	}

	// Watch and queue pods that need scheduling.
	podUpdatesBypass := make(chan queue.Entry, c.UpdatesBacklog)
	podUpdates := &podStoreAdapter{queue.NewHistorical(podUpdatesBypass)}
	reflector := cache.NewReflector(lw, &api.Pod{}, podUpdates, 0)

	q := queuer.New(queue.NewDelayFIFO(), podUpdates)

	algorithm := algorithm.New(core, podUpdates, ps)

	podDeleter := deleter.New(core, q)

	core.podReconciler = podreconciler.New(core, client, q, podDeleter)

	bo := backoff.New(c.InitialPodBackoff.Duration, c.MaxPodBackoff.Duration)
	newBC := func(podKey string) queue.BreakChan {
		return queue.BreakChan(core.Offers().Listen(podKey, func(offer *mesos.Offer) bool {
			core.Lock()
			defer core.Unlock()
			switch task, state := core.Tasks().ForPod(podKey); state {
			case podtask.StatePending:
				// Assess fitness of pod with the current offer. The scheduler normally
				// "backs off" when it can't find an offer that matches up with a pod.
				// The backoff period for a pod can terminate sooner if an offer becomes
				// available that matches up.
				return !task.Has(podtask.Launched) && ps.FitPredicate()(task, offer, nil)
			default:
				// no point in continuing to check for matching offers
				return true
			}
		}))
	}
	errorHandler := errorhandler.New(core, bo, q, newBC)

	binder := binder.New(core)

	startLatch := make(chan struct{})

	runtime.On(startLatch, func() {
		reflector.Run() // TODO(jdef) should listen for termination
		podDeleter.Run(podUpdatesBypass, terminate)
		q.Run(terminate)

		q.InstallDebugHandlers(mux)
		podtask.InstallDebugHandlers(core.Tasks(), mux)
	})

	core.controller = controller.New(client, algorithm, recorder, q.Yield, errorHandler.Error, binder, startLatch)
	return core
}
Example #3
0
// implementation of scheduling plugin's Error func; see plugin/pkg/scheduler
func (k *errorHandler) Error(pod *api.Pod, schedulingErr error) {

	if schedulingErr == errors.NoSuchPodErr {
		log.V(2).Infof("Not rescheduling non-existent pod %v", pod.Name)
		return
	}

	log.Infof("Error scheduling %v: %v; retrying", pod.Name, schedulingErr)
	defer runtime.HandleCrash()

	// default upstream scheduler passes pod.Name as binding.PodID
	ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace)
	podKey, err := podtask.MakePodKey(ctx, pod.Name)
	if err != nil {
		log.Errorf("Failed to construct pod key, aborting scheduling for pod %v: %v", pod.Name, err)
		return
	}

	k.backoff.GC()
	k.sched.Lock()
	defer k.sched.Unlock()

	switch task, state := k.sched.Tasks().ForPod(podKey); state {
	case podtask.StateUnknown:
		// if we don't have a mapping here any more then someone deleted the pod
		log.V(2).Infof("Could not resolve pod to task, aborting pod reschdule: %s", podKey)
		return

	case podtask.StatePending:
		if task.Has(podtask.Launched) {
			log.V(2).Infof("Skipping re-scheduling for already-launched pod %v", podKey)
			return
		}
		breakoutEarly := queue.BreakChan(nil)
		if schedulingErr == errors.NoSuitableOffersErr {
			log.V(3).Infof("adding backoff breakout handler for pod %v", podKey)
			breakoutEarly = k.newBreakChan(podKey)
		}
		delay := k.backoff.Get(podKey)
		log.V(3).Infof("requeuing pod %v with delay %v", podKey, delay)
		k.qr.Requeue(queuer.NewPod(pod, queuer.Delay(delay), queuer.Notify(breakoutEarly)))

	default:
		log.V(2).Infof("Task is no longer pending, aborting reschedule for pod %v", podKey)
	}
}