Ejemplo n.º 1
0
func isLeader(l lease.Lease, machID string) bool {
	if l == nil {
		return false
	}
	if l.MachineID() != machID {
		return false
	}
	return true
}
Ejemplo n.º 2
0
func renewLeadership(l lease.Lease, ttl time.Duration) lease.Lease {
	err := l.Renew(ttl)
	if err != nil {
		log.Errorf("Engine leadership lost, renewal failed: %v", err)
		return nil
	}

	log.Debugf("Engine leadership renewed")
	return l
}
Ejemplo n.º 3
0
func (e *Engine) rpcLeadership(leaseTTL time.Duration, machID string) lease.Lease {
	var previousEngine string
	if e.lease != nil {
		previousEngine = e.lease.MachineID()
	}

	var l lease.Lease
	if isLeader(e.lease, machID) {
		l = rpcRenewLeadership(e.lManager, e.lease, engineVersion, leaseTTL)
	} else {
		l = rpcAcquireLeadership(e.registry, e.lManager, machID, engineVersion, leaseTTL)
	}

	// log all leadership changes
	if l != nil && e.lease == nil && l.MachineID() != machID {
		log.Infof("Engine leader is %s", l.MachineID())
	} else if l != nil && e.lease != nil && l.MachineID() != e.lease.MachineID() {
		log.Infof("Engine leadership changed from %s to %s", e.lease.MachineID(), l.MachineID())
	}

	e.lease = l
	if e.lease != nil && previousEngine != e.lease.MachineID() {
		engineState, err := e.getMachineState(e.lease.MachineID())
		if err != nil {
			log.Errorf("Failed to get machine state for machine %s %v", e.lease.MachineID(), err)
		}
		if engineState != nil {
			log.Infof("Updating engine state... engineState: %v previous: %s lease: %v", engineState, previousEngine, e.lease)
			go e.updateEngineState(*engineState)
		}
	}

	return e.lease
}
Ejemplo n.º 4
0
func rpcRenewLeadership(lManager lease.Manager, l lease.Lease, ver int, ttl time.Duration) lease.Lease {
	err := l.Renew(ttl)
	if err != nil {
		if eerr, ok := err.(*etcdErr.Error); ok && eerr.ErrorCode == etcdErr.EcodeKeyNotFound {
			log.Errorf("Retry renew etcd operation that failed due to %v", err)
			l, err = lManager.AcquireLease(engineLeaseName, l.MachineID(), ver, ttl)
			if err != nil {
				log.Errorf("Engine leadership re-acquisition failed: %v", err)
				return nil
			} else if l == nil {
				log.Infof("Unable to re-acquire engine leadership")
				return nil
			}
			log.Infof("Engine leadership re-acquired")
			return l
		} else {
			log.Errorf("Engine leadership lost, renewal failed: %v", err)
			return nil
		}
	}

	log.Debugf("Engine leadership renewed")
	return l
}
Ejemplo n.º 5
0
func (e *Engine) Run(ival time.Duration, stop chan bool) {
	leaseTTL := ival * 5
	machID := e.machine.State().ID

	reconcile := func() {
		if !ensureEngineVersionMatch(e.cRegistry, engineVersion) {
			return
		}

		var l lease.Lease
		if isLeader(e.lease, machID) {
			l = renewLeadership(e.lease, leaseTTL)
		} else {
			l = acquireLeadership(e.lManager, machID, engineVersion, leaseTTL)
		}

		// log all leadership changes
		if l != nil && e.lease == nil && l.MachineID() != machID {
			log.Infof("Engine leader is %s", l.MachineID())
		} else if l != nil && e.lease != nil && l.MachineID() != e.lease.MachineID() {
			log.Infof("Engine leadership changed from %s to %s", e.lease.MachineID(), l.MachineID())
		}

		e.lease = l

		if !isLeader(e.lease, machID) {
			return
		}

		// abort is closed when reconciliation must stop prematurely, either
		// by a local timeout or the fleet server shutting down
		abort := make(chan struct{})

		// monitor is used to shut down the following goroutine
		monitor := make(chan struct{})

		go func() {
			select {
			case <-monitor:
				return
			case <-time.After(leaseTTL):
				close(abort)
			case <-stop:
				close(abort)
			}
		}()

		start := time.Now()
		e.rec.Reconcile(e, abort)
		close(monitor)
		elapsed := time.Now().Sub(start)

		msg := fmt.Sprintf("Engine completed reconciliation in %s", elapsed)
		if elapsed > ival {
			log.Warning(msg)
		} else {
			log.Debug(msg)
		}
	}

	rec := pkg.NewPeriodicReconciler(ival, reconcile, e.rStream)
	rec.Run(stop)
}
Ejemplo n.º 6
0
func (e *Engine) Run(ival time.Duration, stop <-chan struct{}) {
	leaseTTL := ival * 5
	if e.machine.State().Capabilities.Has(machine.CapGRPC) {
		// With grpc it doesn't make sense to set to 5secs the TTL of the etcd key.
		// This has a special impact whenever we have high worload in the cluster, cause
		// it'd provoke constant leader re-elections.
		// TODO: IMHO, this should be configurable via a flag to disable the TTL.
		leaseTTL = ival * 500000
	}
	machID := e.machine.State().ID

	reconcile := func() {
		if !ensureEngineVersionMatch(e.cRegistry, engineVersion) {
			return
		}

		if e.machine.State().Capabilities.Has(machine.CapGRPC) {
			// rpcLeadership gets the lease (leader), and apply changes to the engine state if need it.
			e.lease = e.rpcLeadership(leaseTTL, machID)
		} else {
			var l lease.Lease
			if isLeader(e.lease, machID) {
				l = renewLeadership(e.lease, leaseTTL)
			} else {
				l = acquireLeadership(e.lManager, machID, engineVersion, leaseTTL)
			}

			// log all leadership changes
			if l != nil && e.lease == nil && l.MachineID() != machID {
				log.Infof("Engine leader is %s", l.MachineID())
			} else if l != nil && e.lease != nil && l.MachineID() != e.lease.MachineID() {
				log.Infof("Engine leadership changed from %s to %s", e.lease.MachineID(), l.MachineID())
			}

			e.lease = l
		}

		if !isLeader(e.lease, machID) {
			return
		}

		// abort is closed when reconciliation must stop prematurely, either
		// by a local timeout or the fleet server shutting down
		abort := make(chan struct{})

		// monitor is used to shut down the following goroutine
		monitor := make(chan struct{})

		go func() {
			select {
			case <-monitor:
				return
			case <-time.After(leaseTTL):
				close(abort)
			case <-stop:
				close(abort)
			}
		}()

		start := time.Now()
		e.rec.Reconcile(e, abort)
		close(monitor)
		elapsed := time.Now().Sub(start)
		metrics.ReportEngineReconcileSuccess(start)

		msg := fmt.Sprintf("Engine completed reconciliation in %s", elapsed)
		if elapsed > ival {
			log.Warning(msg)
		} else {
			log.Debug(msg)
		}
	}

	rec := pkg.NewPeriodicReconciler(ival, reconcile, e.rStream)
	rec.Run(stop)
}