func isLeader(l lease.Lease, machID string) bool { if l == nil { return false } if l.MachineID() != machID { return false } return true }
func renewLeadership(l lease.Lease, ttl time.Duration) lease.Lease { err := l.Renew(ttl) if err != nil { log.Errorf("Engine leadership lost, renewal failed: %v", err) return nil } log.Debugf("Engine leadership renewed") return l }
func (e *Engine) rpcLeadership(leaseTTL time.Duration, machID string) lease.Lease { var previousEngine string if e.lease != nil { previousEngine = e.lease.MachineID() } var l lease.Lease if isLeader(e.lease, machID) { l = rpcRenewLeadership(e.lManager, e.lease, engineVersion, leaseTTL) } else { l = rpcAcquireLeadership(e.registry, e.lManager, machID, engineVersion, leaseTTL) } // log all leadership changes if l != nil && e.lease == nil && l.MachineID() != machID { log.Infof("Engine leader is %s", l.MachineID()) } else if l != nil && e.lease != nil && l.MachineID() != e.lease.MachineID() { log.Infof("Engine leadership changed from %s to %s", e.lease.MachineID(), l.MachineID()) } e.lease = l if e.lease != nil && previousEngine != e.lease.MachineID() { engineState, err := e.getMachineState(e.lease.MachineID()) if err != nil { log.Errorf("Failed to get machine state for machine %s %v", e.lease.MachineID(), err) } if engineState != nil { log.Infof("Updating engine state... engineState: %v previous: %s lease: %v", engineState, previousEngine, e.lease) go e.updateEngineState(*engineState) } } return e.lease }
func rpcRenewLeadership(lManager lease.Manager, l lease.Lease, ver int, ttl time.Duration) lease.Lease { err := l.Renew(ttl) if err != nil { if eerr, ok := err.(*etcdErr.Error); ok && eerr.ErrorCode == etcdErr.EcodeKeyNotFound { log.Errorf("Retry renew etcd operation that failed due to %v", err) l, err = lManager.AcquireLease(engineLeaseName, l.MachineID(), ver, ttl) if err != nil { log.Errorf("Engine leadership re-acquisition failed: %v", err) return nil } else if l == nil { log.Infof("Unable to re-acquire engine leadership") return nil } log.Infof("Engine leadership re-acquired") return l } else { log.Errorf("Engine leadership lost, renewal failed: %v", err) return nil } } log.Debugf("Engine leadership renewed") return l }
func (e *Engine) Run(ival time.Duration, stop chan bool) { leaseTTL := ival * 5 machID := e.machine.State().ID reconcile := func() { if !ensureEngineVersionMatch(e.cRegistry, engineVersion) { return } var l lease.Lease if isLeader(e.lease, machID) { l = renewLeadership(e.lease, leaseTTL) } else { l = acquireLeadership(e.lManager, machID, engineVersion, leaseTTL) } // log all leadership changes if l != nil && e.lease == nil && l.MachineID() != machID { log.Infof("Engine leader is %s", l.MachineID()) } else if l != nil && e.lease != nil && l.MachineID() != e.lease.MachineID() { log.Infof("Engine leadership changed from %s to %s", e.lease.MachineID(), l.MachineID()) } e.lease = l if !isLeader(e.lease, machID) { return } // abort is closed when reconciliation must stop prematurely, either // by a local timeout or the fleet server shutting down abort := make(chan struct{}) // monitor is used to shut down the following goroutine monitor := make(chan struct{}) go func() { select { case <-monitor: return case <-time.After(leaseTTL): close(abort) case <-stop: close(abort) } }() start := time.Now() e.rec.Reconcile(e, abort) close(monitor) elapsed := time.Now().Sub(start) msg := fmt.Sprintf("Engine completed reconciliation in %s", elapsed) if elapsed > ival { log.Warning(msg) } else { log.Debug(msg) } } rec := pkg.NewPeriodicReconciler(ival, reconcile, e.rStream) rec.Run(stop) }
func (e *Engine) Run(ival time.Duration, stop <-chan struct{}) { leaseTTL := ival * 5 if e.machine.State().Capabilities.Has(machine.CapGRPC) { // With grpc it doesn't make sense to set to 5secs the TTL of the etcd key. // This has a special impact whenever we have high worload in the cluster, cause // it'd provoke constant leader re-elections. // TODO: IMHO, this should be configurable via a flag to disable the TTL. leaseTTL = ival * 500000 } machID := e.machine.State().ID reconcile := func() { if !ensureEngineVersionMatch(e.cRegistry, engineVersion) { return } if e.machine.State().Capabilities.Has(machine.CapGRPC) { // rpcLeadership gets the lease (leader), and apply changes to the engine state if need it. e.lease = e.rpcLeadership(leaseTTL, machID) } else { var l lease.Lease if isLeader(e.lease, machID) { l = renewLeadership(e.lease, leaseTTL) } else { l = acquireLeadership(e.lManager, machID, engineVersion, leaseTTL) } // log all leadership changes if l != nil && e.lease == nil && l.MachineID() != machID { log.Infof("Engine leader is %s", l.MachineID()) } else if l != nil && e.lease != nil && l.MachineID() != e.lease.MachineID() { log.Infof("Engine leadership changed from %s to %s", e.lease.MachineID(), l.MachineID()) } e.lease = l } if !isLeader(e.lease, machID) { return } // abort is closed when reconciliation must stop prematurely, either // by a local timeout or the fleet server shutting down abort := make(chan struct{}) // monitor is used to shut down the following goroutine monitor := make(chan struct{}) go func() { select { case <-monitor: return case <-time.After(leaseTTL): close(abort) case <-stop: close(abort) } }() start := time.Now() e.rec.Reconcile(e, abort) close(monitor) elapsed := time.Now().Sub(start) metrics.ReportEngineReconcileSuccess(start) msg := fmt.Sprintf("Engine completed reconciliation in %s", elapsed) if elapsed > ival { log.Warning(msg) } else { log.Debug(msg) } } rec := pkg.NewPeriodicReconciler(ival, reconcile, e.rStream) rec.Run(stop) }