func (s *offerStorage) declineOffer(offerId, hostname string, reason metrics.OfferDeclinedReason) { //TODO(jdef) might be nice to spec an abort chan here runtime.Signal(proc.OnError(s.DeclineOffer(offerId), func(err error) { log.Warningf("decline failed for offer id %v: %v", offerId, err) }, nil)).Then(func() { metrics.OffersDeclined.WithLabelValues(hostname, string(reason)).Inc() }) }
func (k *KubernetesScheduler) InstallDebugHandlers(mux *http.ServeMux) { wrappedHandler := func(uri string, h http.Handler) { mux.HandleFunc(uri, func(w http.ResponseWriter, r *http.Request) { ch := make(chan struct{}) closer := runtime.Closer(ch) proc.OnError(k.asMaster().Do(func() { defer closer() h.ServeHTTP(w, r) }), func(err error) { defer closer() log.Warningf("failed HTTP request for %s: %v", uri, err) w.WriteHeader(http.StatusServiceUnavailable) }, k.terminate) select { case <-time.After(k.schedcfg.HttpHandlerTimeout.Duration): log.Warningf("timed out waiting for request to be processed") w.WriteHeader(http.StatusServiceUnavailable) return case <-ch: // noop } }) } requestReconciliation := func(uri string, requestAction func()) { wrappedHandler(uri, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { requestAction() w.WriteHeader(http.StatusNoContent) })) } requestReconciliation("/debug/actions/requestExplicit", k.reconciler.RequestExplicit) requestReconciliation("/debug/actions/requestImplicit", k.reconciler.RequestImplicit) wrappedHandler("/debug/actions/kamikaze", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { slaves := k.slaves.getSlaveIds() for _, slaveId := range slaves { _, err := k.driver.SendFrameworkMessage( k.executor.ExecutorId, mutil.NewSlaveID(slaveId), messages.Kamikaze) if err != nil { log.Warningf("failed to send kamikaze message to slave %s: %v", slaveId, err) } else { io.WriteString(w, fmt.Sprintf("kamikaze slave %s\n", slaveId)) } } io.WriteString(w, "OK") })) }
func (self *SchedulerProcess) Elect(newDriver DriverFactory) { errOnce := proc.NewErrorOnce(self.fin) proc.OnError(errOnce.Send(standbyStage.Do(self, proc.Action(func() { if !(&self.stage).transition(standbyStage, masterStage) { log.Errorf("failed to transition from standby to master stage, aborting") self.End() return } log.Infoln("scheduler process entered master stage") drv, err := newDriver() if err != nil { log.Errorf("failed to fetch scheduler driver: %v", err) self.End() return } log.V(1).Infoln("starting driver...") stat, err := drv.Start() if stat == mesos.Status_DRIVER_RUNNING && err == nil { log.Infoln("driver started successfully and is running") close(self.elected) go func() { defer self.End() _, err := drv.Join() if err != nil { log.Errorf("driver failed with error: %v", err) } errOnce.Report(err) }() return } defer self.End() if err != nil { log.Errorf("failed to start scheduler driver: %v", err) } else { log.Errorf("expected RUNNING status, not %v", stat) } }))).Err(), func(err error) { defer self.End() log.Errorf("failed to handle election event, aborting: %v", err) }, self.fin) }
// execute task reconciliation, returns when r.done is closed. intended to run as a goroutine. // if reconciliation is requested while another is in progress, the in-progress operation will be // cancelled before the new reconciliation operation begins. func (r *Reconciler) Run(driver bindings.SchedulerDriver) { var cancel, finished chan struct{} requestLoop: for { select { case <-r.done: return default: // proceed } select { case <-r.implicit: metrics.ReconciliationRequested.WithLabelValues("implicit").Inc() select { case <-r.done: return case <-r.explicit: break // give preference to a pending request for explicit default: // continue // don't run implicit reconciliation while explicit is ongoing if finished != nil { select { case <-finished: // continue w/ implicit default: log.Infoln("skipping implicit reconcile because explicit reconcile is ongoing") continue requestLoop } } errOnce := proc.NewErrorOnce(r.done) errCh := r.Do(func() { var err error defer errOnce.Report(err) log.Infoln("implicit reconcile tasks") metrics.ReconciliationExecuted.WithLabelValues("implicit").Inc() if _, err = driver.ReconcileTasks([]*mesos.TaskStatus{}); err != nil { log.V(1).Infof("failed to request implicit reconciliation from mesos: %v", err) } }) proc.OnError(errOnce.Send(errCh).Err(), func(err error) { log.Errorf("failed to run implicit reconciliation: %v", err) }, r.done) goto slowdown } case <-r.done: return case <-r.explicit: // continue metrics.ReconciliationRequested.WithLabelValues("explicit").Inc() } if cancel != nil { close(cancel) cancel = nil // play nice and wait for the prior operation to finish, complain // if it doesn't select { case <-r.done: return case <-finished: // noop, expected case <-time.After(r.explicitReconciliationAbortTimeout): // very unexpected log.Error("reconciler action failed to stop upon cancellation") } } // copy 'finished' to 'fin' here in case we end up with simultaneous go-routines, // if cancellation takes too long or fails - we don't want to close the same chan // more than once cancel = make(chan struct{}) finished = make(chan struct{}) go func(fin chan struct{}) { startedAt := time.Now() defer func() { metrics.ReconciliationLatency.Observe(metrics.InMicroseconds(time.Since(startedAt))) }() metrics.ReconciliationExecuted.WithLabelValues("explicit").Inc() defer close(fin) err := <-r.Action(driver, cancel) if err == reconciliationCancelledErr { metrics.ReconciliationCancelled.WithLabelValues("explicit").Inc() log.Infoln(err.Error()) } else if err != nil { log.Errorf("reconciler action failed: %v", err) } }(finished) slowdown: // don't allow reconciliation to run very frequently, either explicit or implicit select { case <-r.done: return case <-time.After(r.cooldown): // noop } } // for }