Beispiel #1
0
func (s *offerStorage) declineOffer(offerId, hostname string, reason metrics.OfferDeclinedReason) {
	//TODO(jdef) might be nice to spec an abort chan here
	runtime.Signal(proc.OnError(s.DeclineOffer(offerId), func(err error) {
		log.Warningf("decline failed for offer id %v: %v", offerId, err)
	}, nil)).Then(func() {
		metrics.OffersDeclined.WithLabelValues(hostname, string(reason)).Inc()
	})
}
Beispiel #2
0
func (k *QingYuanScheduler) InstallDebugHandlers(mux *http.ServeMux) {
	wrappedHandler := func(uri string, h http.Handler) {
		mux.HandleFunc(uri, func(w http.ResponseWriter, r *http.Request) {
			ch := make(chan struct{})
			closer := runtime.Closer(ch)
			proc.OnError(k.asMaster().Do(func() {
				defer closer()
				h.ServeHTTP(w, r)
			}), func(err error) {
				defer closer()
				log.Warningf("failed HTTP request for %s: %v", uri, err)
				w.WriteHeader(http.StatusServiceUnavailable)
			}, k.terminate)
			select {
			case <-time.After(k.schedcfg.HttpHandlerTimeout.Duration):
				log.Warningf("timed out waiting for request to be processed")
				w.WriteHeader(http.StatusServiceUnavailable)
				return
			case <-ch: // noop
			}
		})
	}
	requestReconciliation := func(uri string, requestAction func()) {
		wrappedHandler(uri, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
			requestAction()
			w.WriteHeader(http.StatusNoContent)
		}))
	}
	requestReconciliation("/debug/actions/requestExplicit", k.reconciler.RequestExplicit)
	requestReconciliation("/debug/actions/requestImplicit", k.reconciler.RequestImplicit)

	wrappedHandler("/debug/actions/kamikaze", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		slaves := k.slaves.getSlaveIds()
		for _, slaveId := range slaves {
			_, err := k.driver.SendFrameworkMessage(
				k.executor.ExecutorId,
				mutil.NewSlaveID(slaveId),
				messages.Kamikaze)
			if err != nil {
				log.Warningf("failed to send kamikaze message to slave %s: %v", slaveId, err)
			} else {
				io.WriteString(w, fmt.Sprintf("kamikaze slave %s\n", slaveId))
			}
		}
		io.WriteString(w, "OK")
	}))
}
Beispiel #3
0
func (self *SchedulerProcess) Elect(newDriver DriverFactory) {
	errOnce := proc.NewErrorOnce(self.fin)
	proc.OnError(errOnce.Send(standbyStage.Do(self, proc.Action(func() {
		if !(&self.stage).transition(standbyStage, masterStage) {
			log.Errorf("failed to transition from standby to master stage, aborting")
			self.End()
			return
		}
		log.Infoln("scheduler process entered master stage")
		drv, err := newDriver()
		if err != nil {
			log.Errorf("failed to fetch scheduler driver: %v", err)
			self.End()
			return
		}
		log.V(1).Infoln("starting driver...")
		stat, err := drv.Start()
		if stat == mesos.Status_DRIVER_RUNNING && err == nil {
			log.Infoln("driver started successfully and is running")
			close(self.elected)
			go func() {
				defer self.End()
				_, err := drv.Join()
				if err != nil {
					log.Errorf("driver failed with error: %v", err)
				}
				errOnce.Report(err)
			}()
			return
		}
		defer self.End()
		if err != nil {
			log.Errorf("failed to start scheduler driver: %v", err)
		} else {
			log.Errorf("expected RUNNING status, not %v", stat)
		}
	}))).Err(), func(err error) {
		defer self.End()
		log.Errorf("failed to handle election event, aborting: %v", err)
	}, self.fin)
}
Beispiel #4
0
// execute task reconciliation, returns when r.done is closed. intended to run as a goroutine.
// if reconciliation is requested while another is in progress, the in-progress operation will be
// cancelled before the new reconciliation operation begins.
func (r *Reconciler) Run(driver bindings.SchedulerDriver) {
	var cancel, finished chan struct{}
requestLoop:
	for {
		select {
		case <-r.done:
			return
		default: // proceed
		}
		select {
		case <-r.implicit:
			metrics.ReconciliationRequested.WithLabelValues("implicit").Inc()
			select {
			case <-r.done:
				return
			case <-r.explicit:
				break // give preference to a pending request for explicit
			default: // continue
				// don't run implicit reconciliation while explicit is ongoing
				if finished != nil {
					select {
					case <-finished: // continue w/ implicit
					default:
						log.Infoln("skipping implicit reconcile because explicit reconcile is ongoing")
						continue requestLoop
					}
				}
				errOnce := proc.NewErrorOnce(r.done)
				errCh := r.Do(func() {
					var err error
					defer errOnce.Report(err)
					log.Infoln("implicit reconcile tasks")
					metrics.ReconciliationExecuted.WithLabelValues("implicit").Inc()
					if _, err = driver.ReconcileTasks([]*mesos.TaskStatus{}); err != nil {
						log.V(1).Infof("failed to request implicit reconciliation from mesos: %v", err)
					}
				})
				proc.OnError(errOnce.Send(errCh).Err(), func(err error) {
					log.Errorf("failed to run implicit reconciliation: %v", err)
				}, r.done)
				goto slowdown
			}
		case <-r.done:
			return
		case <-r.explicit: // continue
			metrics.ReconciliationRequested.WithLabelValues("explicit").Inc()
		}

		if cancel != nil {
			close(cancel)
			cancel = nil

			// play nice and wait for the prior operation to finish, complain
			// if it doesn't
			select {
			case <-r.done:
				return
			case <-finished: // noop, expected
			case <-time.After(r.explicitReconciliationAbortTimeout): // very unexpected
				log.Error("reconciler action failed to stop upon cancellation")
			}
		}
		// copy 'finished' to 'fin' here in case we end up with simultaneous go-routines,
		// if cancellation takes too long or fails - we don't want to close the same chan
		// more than once
		cancel = make(chan struct{})
		finished = make(chan struct{})
		go func(fin chan struct{}) {
			startedAt := time.Now()
			defer func() {
				metrics.ReconciliationLatency.Observe(metrics.InMicroseconds(time.Since(startedAt)))
			}()

			metrics.ReconciliationExecuted.WithLabelValues("explicit").Inc()
			defer close(fin)
			err := <-r.Action(driver, cancel)
			if err == reconciliationCancelledErr {
				metrics.ReconciliationCancelled.WithLabelValues("explicit").Inc()
				log.Infoln(err.Error())
			} else if err != nil {
				log.Errorf("reconciler action failed: %v", err)
			}
		}(finished)
	slowdown:
		// don't allow reconciliation to run very frequently, either explicit or implicit
		select {
		case <-r.done:
			return
		case <-time.After(r.cooldown): // noop
		}
	} // for
}