func (u *Uniter) loop(unitTag names.UnitTag) (err error) { if err := u.init(unitTag); err != nil { if err == worker.ErrTerminateAgent { return err } return fmt.Errorf("failed to initialize uniter for %q: %v", unitTag, err) } logger.Infof("unit %q started", u.unit) // Install is a special case, as it must run before there // is any remote state, and before the remote state watcher // is started. var charmURL *corecharm.URL var charmModifiedVersion int opState := u.operationExecutor.State() if opState.Kind == operation.Install { logger.Infof("resuming charm install") op, err := u.operationFactory.NewInstall(opState.CharmURL) if err != nil { return errors.Trace(err) } if err := u.operationExecutor.Run(op); err != nil { return errors.Trace(err) } charmURL = opState.CharmURL } else { curl, err := u.unit.CharmURL() if err != nil { return errors.Trace(err) } charmURL = curl svc, err := u.unit.Service() if err != nil { return errors.Trace(err) } charmModifiedVersion, err = svc.CharmModifiedVersion() if err != nil { return errors.Trace(err) } } var ( watcher *remotestate.RemoteStateWatcher watcherMu sync.Mutex ) logger.Infof("hooks are retried %v", u.hookRetryStrategy.ShouldRetry) retryHookChan := make(chan struct{}, 1) retryHookTimer := utils.NewBackoffTimer(utils.BackoffTimerConfig{ Min: u.hookRetryStrategy.MinRetryTime, Max: u.hookRetryStrategy.MaxRetryTime, Jitter: u.hookRetryStrategy.JitterRetryTime, Factor: u.hookRetryStrategy.RetryTimeFactor, Func: func() { // Don't try to send on the channel if it's already full // This can happen if the timer fires off before the event is consumed // by the resolver loop select { case retryHookChan <- struct{}{}: default: } }, Clock: u.clock, }) defer func() { // Whenever we exit the uniter we want to stop a potentially // running timer so it doesn't trigger for nothing. retryHookTimer.Reset() }() restartWatcher := func() error { watcherMu.Lock() defer watcherMu.Unlock() if watcher != nil { // watcher added to catacomb, will kill uniter if there's an error. worker.Stop(watcher) } var err error watcher, err = remotestate.NewWatcher( remotestate.WatcherConfig{ State: remotestate.NewAPIState(u.st), LeadershipTracker: u.leadershipTracker, UnitTag: unitTag, UpdateStatusChannel: u.updateStatusAt, CommandChannel: u.commandChannel, RetryHookChannel: retryHookChan, }) if err != nil { return errors.Trace(err) } if err := u.catacomb.Add(watcher); err != nil { return errors.Trace(err) } return nil } onIdle := func() error { opState := u.operationExecutor.State() if opState.Kind != operation.Continue { // We should only set idle status if we're in // the "Continue" state, which indicates that // there is nothing to do and we're not in an // error state. return nil } return setAgentStatus(u, status.StatusIdle, "", nil) } clearResolved := func() error { if err := u.unit.ClearResolved(); err != nil { return errors.Trace(err) } watcher.ClearResolvedMode() return nil } for { if err = restartWatcher(); err != nil { err = errors.Annotate(err, "(re)starting watcher") break } uniterResolver := NewUniterResolver(ResolverConfig{ ClearResolved: clearResolved, ReportHookError: u.reportHookError, FixDeployer: u.deployer.Fix, ShouldRetryHooks: u.hookRetryStrategy.ShouldRetry, StartRetryHookTimer: retryHookTimer.Start, StopRetryHookTimer: retryHookTimer.Reset, Actions: actions.NewResolver(), Leadership: uniterleadership.NewResolver(), Relations: relation.NewRelationsResolver(u.relations), Storage: storage.NewResolver(u.storage), Commands: runcommands.NewCommandsResolver( u.commands, watcher.CommandCompleted, ), }) // We should not do anything until there has been a change // to the remote state. The watcher will trigger at least // once initially. select { case <-u.catacomb.Dying(): return u.catacomb.ErrDying() case <-watcher.RemoteStateChanged(): } localState := resolver.LocalState{ CharmURL: charmURL, CharmModifiedVersion: charmModifiedVersion, } for err == nil { err = resolver.Loop(resolver.LoopConfig{ Resolver: uniterResolver, Watcher: watcher, Executor: u.operationExecutor, Factory: u.operationFactory, Abort: u.catacomb.Dying(), OnIdle: onIdle, CharmDirGuard: u.charmDirGuard, }, &localState) switch cause := errors.Cause(err); cause { case nil: // Loop back around. case resolver.ErrLoopAborted: err = u.catacomb.ErrDying() case operation.ErrNeedsReboot: err = worker.ErrRebootMachine case operation.ErrHookFailed: // Loop back around. The resolver can tell that it is in // an error state by inspecting the operation state. err = nil case resolver.ErrTerminate: err = u.terminate() case resolver.ErrRestart: // make sure we update the two values used above in // creating LocalState. charmURL = localState.CharmURL charmModifiedVersion = localState.CharmModifiedVersion // leave err assigned, causing loop to break default: // We need to set conflicted from here, because error // handling is outside of the resolver's control. if operation.IsDeployConflictError(cause) { localState.Conflicted = true err = setAgentStatus(u, status.StatusError, "upgrade failed", nil) } else { reportAgentError(u, "resolver loop error", err) } } } if errors.Cause(err) != resolver.ErrRestart { break } } logger.Infof("unit %q shutting down: %s", u.unit, err) return err }
func (u *Uniter) loop(unitTag names.UnitTag) (err error) { if err := u.init(unitTag); err != nil { if err == worker.ErrTerminateAgent { return err } return fmt.Errorf("failed to initialize uniter for %q: %v", unitTag, err) } logger.Infof("unit %q started", u.unit) // Install is a special case, as it must run before there // is any remote state, and before the remote state watcher // is started. var charmURL *corecharm.URL opState := u.operationExecutor.State() if opState.Kind == operation.Install { logger.Infof("resuming charm install") op, err := u.operationFactory.NewInstall(opState.CharmURL) if err != nil { return errors.Trace(err) } if err := u.operationExecutor.Run(op); err != nil { return errors.Trace(err) } charmURL = opState.CharmURL } else { curl, err := u.unit.CharmURL() if err != nil { return errors.Trace(err) } charmURL = curl } var ( watcher *remotestate.RemoteStateWatcher watcherMu sync.Mutex ) restartWatcher := func() error { watcherMu.Lock() defer watcherMu.Unlock() if watcher != nil { if err := watcher.Stop(); err != nil { return errors.Trace(err) } } var err error watcher, err = remotestate.NewWatcher( remotestate.WatcherConfig{ State: remotestate.NewAPIState(u.st), LeadershipTracker: u.leadershipTracker, UnitTag: unitTag, UpdateStatusChannel: u.updateStatusAt, }) if err != nil { return errors.Trace(err) } // Stop the uniter if the watcher fails. The watcher may be // stopped cleanly, so only kill the tomb if the error is // non-nil. go func(w *remotestate.RemoteStateWatcher) { if err := w.Wait(); err != nil { u.tomb.Kill(err) } }(watcher) return nil } // watcher may be replaced, so use a closure. u.addCleanup(func() error { watcherMu.Lock() defer watcherMu.Unlock() if watcher != nil { return watcher.Stop() } return nil }) onIdle := func() error { opState := u.operationExecutor.State() if opState.Kind != operation.Continue { // We should only set idle status if we're in // the "Continue" state, which indicates that // there is nothing to do and we're not in an // error state. return nil } return setAgentStatus(u, params.StatusIdle, "", nil) } clearResolved := func() error { if err := u.unit.ClearResolved(); err != nil { return errors.Trace(err) } watcher.ClearResolvedMode() return nil } for { if err = restartWatcher(); err != nil { err = errors.Annotate(err, "(re)starting watcher") break } uniterResolver := &uniterResolver{ clearResolved: clearResolved, reportHookError: u.reportHookError, fixDeployer: u.deployer.Fix, actionsResolver: actions.NewResolver(), leadershipResolver: uniterleadership.NewResolver(), relationsResolver: relation.NewRelationsResolver(u.relations), storageResolver: storage.NewResolver(u.storage), } // We should not do anything until there has been a change // to the remote state. The watcher will trigger at least // once initially. select { case <-u.tomb.Dying(): return tomb.ErrDying case <-watcher.RemoteStateChanged(): } localState := resolver.LocalState{CharmURL: charmURL} for err == nil { err = resolver.Loop(resolver.LoopConfig{ Resolver: uniterResolver, Watcher: watcher, Executor: u.operationExecutor, Factory: u.operationFactory, Dying: u.tomb.Dying(), OnIdle: onIdle, CharmDirLocker: u.charmDirLocker, }, &localState) switch cause := errors.Cause(err); cause { case nil: // Loop back around. case tomb.ErrDying: err = tomb.ErrDying case operation.ErrNeedsReboot: err = worker.ErrRebootMachine case operation.ErrHookFailed: // Loop back around. The resolver can tell that it is in // an error state by inspecting the operation state. err = nil case resolver.ErrTerminate: err = u.terminate() case resolver.ErrRestart: charmURL = localState.CharmURL // leave err assigned, causing loop to break default: // We need to set conflicted from here, because error // handling is outside of the resolver's control. if operation.IsDeployConflictError(cause) { localState.Conflicted = true err = setAgentStatus(u, params.StatusError, "upgrade failed", nil) } else { reportAgentError(u, "resolver loop error", err) } } } if errors.Cause(err) != resolver.ErrRestart { break } } logger.Infof("unit %q shutting down: %s", u.unit, err) return err }