// implementation of scheduling plugin's Error func; see plugin/pkg/scheduler func (k *errorHandler) handleSchedulingError(pod *api.Pod, schedulingErr error) { if schedulingErr == noSuchPodErr { log.V(2).Infof("Not rescheduling non-existent pod %v", pod.Name) return } log.Infof("Error scheduling %v: %v; retrying", pod.Name, schedulingErr) defer util.HandleCrash() // default upstream scheduler passes pod.Name as binding.PodID ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace) podKey, err := podtask.MakePodKey(ctx, pod.Name) if err != nil { log.Errorf("Failed to construct pod key, aborting scheduling for pod %v: %v", pod.Name, err) return } k.backoff.GC() k.api.Lock() defer k.api.Unlock() switch task, state := k.api.tasks().ForPod(podKey); state { case podtask.StateUnknown: // if we don't have a mapping here any more then someone deleted the pod log.V(2).Infof("Could not resolve pod to task, aborting pod reschdule: %s", podKey) return case podtask.StatePending: if task.Has(podtask.Launched) { log.V(2).Infof("Skipping re-scheduling for already-launched pod %v", podKey) return } breakoutEarly := queue.BreakChan(nil) if schedulingErr == noSuitableOffersErr { log.V(3).Infof("adding backoff breakout handler for pod %v", podKey) breakoutEarly = queue.BreakChan(k.api.offers().Listen(podKey, func(offer *mesos.Offer) bool { k.api.Lock() defer k.api.Unlock() switch task, state := k.api.tasks().Get(task.ID); state { case podtask.StatePending: // Assess fitness of pod with the current offer. The scheduler normally // "backs off" when it can't find an offer that matches up with a pod. // The backoff period for a pod can terminate sooner if an offer becomes // available that matches up. return !task.Has(podtask.Launched) && k.api.algorithm().FitPredicate()(task, offer, nil) default: // no point in continuing to check for matching offers return true } })) } delay := k.backoff.Get(podKey) log.V(3).Infof("requeuing pod %v with delay %v", podKey, delay) k.qr.requeue(&Pod{Pod: pod, delay: &delay, notify: breakoutEarly}) default: log.V(2).Infof("Task is no longer pending, aborting reschedule for pod %v", podKey) } }
func New(c *config.Config, fw framework.Framework, ps podschedulers.PodScheduler, client *client.Client, recorder record.EventRecorder, terminate <-chan struct{}, mux *http.ServeMux, lw *cache.ListWatch) scheduler.Scheduler { core := &sched{ framework: fw, taskRegistry: podtask.NewInMemoryRegistry(), } // Watch and queue pods that need scheduling. podUpdatesBypass := make(chan queue.Entry, c.UpdatesBacklog) podUpdates := &podStoreAdapter{queue.NewHistorical(podUpdatesBypass)} reflector := cache.NewReflector(lw, &api.Pod{}, podUpdates, 0) q := queuer.New(queue.NewDelayFIFO(), podUpdates) algorithm := algorithm.New(core, podUpdates, ps) podDeleter := deleter.New(core, q) core.podReconciler = podreconciler.New(core, client, q, podDeleter) bo := backoff.New(c.InitialPodBackoff.Duration, c.MaxPodBackoff.Duration) newBC := func(podKey string) queue.BreakChan { return queue.BreakChan(core.Offers().Listen(podKey, func(offer *mesos.Offer) bool { core.Lock() defer core.Unlock() switch task, state := core.Tasks().ForPod(podKey); state { case podtask.StatePending: // Assess fitness of pod with the current offer. The scheduler normally // "backs off" when it can't find an offer that matches up with a pod. // The backoff period for a pod can terminate sooner if an offer becomes // available that matches up. return !task.Has(podtask.Launched) && ps.FitPredicate()(task, offer, nil) default: // no point in continuing to check for matching offers return true } })) } errorHandler := errorhandler.New(core, bo, q, newBC) binder := binder.New(core) startLatch := make(chan struct{}) runtime.On(startLatch, func() { reflector.Run() // TODO(jdef) should listen for termination podDeleter.Run(podUpdatesBypass, terminate) q.Run(terminate) q.InstallDebugHandlers(mux) podtask.InstallDebugHandlers(core.Tasks(), mux) }) core.controller = controller.New(client, algorithm, recorder, q.Yield, errorHandler.Error, binder, startLatch) return core }
// implementation of scheduling plugin's Error func; see plugin/pkg/scheduler func (k *errorHandler) Error(pod *api.Pod, schedulingErr error) { if schedulingErr == errors.NoSuchPodErr { log.V(2).Infof("Not rescheduling non-existent pod %v", pod.Name) return } log.Infof("Error scheduling %v: %v; retrying", pod.Name, schedulingErr) defer runtime.HandleCrash() // default upstream scheduler passes pod.Name as binding.PodID ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace) podKey, err := podtask.MakePodKey(ctx, pod.Name) if err != nil { log.Errorf("Failed to construct pod key, aborting scheduling for pod %v: %v", pod.Name, err) return } k.backoff.GC() k.sched.Lock() defer k.sched.Unlock() switch task, state := k.sched.Tasks().ForPod(podKey); state { case podtask.StateUnknown: // if we don't have a mapping here any more then someone deleted the pod log.V(2).Infof("Could not resolve pod to task, aborting pod reschdule: %s", podKey) return case podtask.StatePending: if task.Has(podtask.Launched) { log.V(2).Infof("Skipping re-scheduling for already-launched pod %v", podKey) return } breakoutEarly := queue.BreakChan(nil) if schedulingErr == errors.NoSuitableOffersErr { log.V(3).Infof("adding backoff breakout handler for pod %v", podKey) breakoutEarly = k.newBreakChan(podKey) } delay := k.backoff.Get(podKey) log.V(3).Infof("requeuing pod %v with delay %v", podKey, delay) k.qr.Requeue(queuer.NewPod(pod, queuer.Delay(delay), queuer.Notify(breakoutEarly))) default: log.V(2).Infof("Task is no longer pending, aborting reschedule for pod %v", podKey) } }