func (r *Reconciler) Reconcile(e *Engine, stop chan struct{}) { log.Debugf("Polling Registry for actionable work") start := time.Now() clust, err := e.clusterState() if err != nil { log.Errorf("Failed getting current cluster state: %v", err) return } for t := range r.calculateClusterTasks(clust, stop) { err = doTask(t, e) if err != nil { log.Errorf("Failed resolving task: task=%s err=%v", t, err) } } metrics.ReportEngineReconcileSuccess(start) }
func (e *Engine) Run(ival time.Duration, stop <-chan struct{}) { leaseTTL := ival * 5 if e.machine.State().Capabilities.Has(machine.CapGRPC) { // With grpc it doesn't make sense to set to 5secs the TTL of the etcd key. // This has a special impact whenever we have high worload in the cluster, cause // it'd provoke constant leader re-elections. // TODO: IMHO, this should be configurable via a flag to disable the TTL. leaseTTL = ival * 500000 } machID := e.machine.State().ID reconcile := func() { if !ensureEngineVersionMatch(e.cRegistry, engineVersion) { return } if e.machine.State().Capabilities.Has(machine.CapGRPC) { // rpcLeadership gets the lease (leader), and apply changes to the engine state if need it. e.lease = e.rpcLeadership(leaseTTL, machID) } else { var l lease.Lease if isLeader(e.lease, machID) { l = renewLeadership(e.lease, leaseTTL) } else { l = acquireLeadership(e.lManager, machID, engineVersion, leaseTTL) } // log all leadership changes if l != nil && e.lease == nil && l.MachineID() != machID { log.Infof("Engine leader is %s", l.MachineID()) } else if l != nil && e.lease != nil && l.MachineID() != e.lease.MachineID() { log.Infof("Engine leadership changed from %s to %s", e.lease.MachineID(), l.MachineID()) } e.lease = l } if !isLeader(e.lease, machID) { return } // abort is closed when reconciliation must stop prematurely, either // by a local timeout or the fleet server shutting down abort := make(chan struct{}) // monitor is used to shut down the following goroutine monitor := make(chan struct{}) go func() { select { case <-monitor: return case <-time.After(leaseTTL): close(abort) case <-stop: close(abort) } }() start := time.Now() e.rec.Reconcile(e, abort) close(monitor) elapsed := time.Now().Sub(start) metrics.ReportEngineReconcileSuccess(start) msg := fmt.Sprintf("Engine completed reconciliation in %s", elapsed) if elapsed > ival { log.Warning(msg) } else { log.Debug(msg) } } rec := pkg.NewPeriodicReconciler(ival, reconcile, e.rStream) rec.Run(stop) }