Exemplo n.º 1
0
func (u *Uniter) loop(unitTag names.UnitTag) (err error) {
	if err := u.init(unitTag); err != nil {
		if err == worker.ErrTerminateAgent {
			return err
		}
		return fmt.Errorf("failed to initialize uniter for %q: %v", unitTag, err)
	}
	logger.Infof("unit %q started", u.unit)

	// Install is a special case, as it must run before there
	// is any remote state, and before the remote state watcher
	// is started.
	var charmURL *corecharm.URL
	var charmModifiedVersion int
	opState := u.operationExecutor.State()
	if opState.Kind == operation.Install {
		logger.Infof("resuming charm install")
		op, err := u.operationFactory.NewInstall(opState.CharmURL)
		if err != nil {
			return errors.Trace(err)
		}
		if err := u.operationExecutor.Run(op); err != nil {
			return errors.Trace(err)
		}
		charmURL = opState.CharmURL
	} else {
		curl, err := u.unit.CharmURL()
		if err != nil {
			return errors.Trace(err)
		}
		charmURL = curl
		svc, err := u.unit.Service()
		if err != nil {
			return errors.Trace(err)
		}
		charmModifiedVersion, err = svc.CharmModifiedVersion()
		if err != nil {
			return errors.Trace(err)
		}
	}

	var (
		watcher   *remotestate.RemoteStateWatcher
		watcherMu sync.Mutex
	)

	logger.Infof("hooks are retried %v", u.hookRetryStrategy.ShouldRetry)
	retryHookChan := make(chan struct{}, 1)
	retryHookTimer := utils.NewBackoffTimer(utils.BackoffTimerConfig{
		Min:    u.hookRetryStrategy.MinRetryTime,
		Max:    u.hookRetryStrategy.MaxRetryTime,
		Jitter: u.hookRetryStrategy.JitterRetryTime,
		Factor: u.hookRetryStrategy.RetryTimeFactor,
		Func: func() {
			// Don't try to send on the channel if it's already full
			// This can happen if the timer fires off before the event is consumed
			// by the resolver loop
			select {
			case retryHookChan <- struct{}{}:
			default:
			}
		},
		Clock: u.clock,
	})
	defer func() {
		// Whenever we exit the uniter we want to stop a potentially
		// running timer so it doesn't trigger for nothing.
		retryHookTimer.Reset()
	}()

	restartWatcher := func() error {
		watcherMu.Lock()
		defer watcherMu.Unlock()

		if watcher != nil {
			// watcher added to catacomb, will kill uniter if there's an error.
			worker.Stop(watcher)
		}
		var err error
		watcher, err = remotestate.NewWatcher(
			remotestate.WatcherConfig{
				State:               remotestate.NewAPIState(u.st),
				LeadershipTracker:   u.leadershipTracker,
				UnitTag:             unitTag,
				UpdateStatusChannel: u.updateStatusAt,
				CommandChannel:      u.commandChannel,
				RetryHookChannel:    retryHookChan,
			})
		if err != nil {
			return errors.Trace(err)
		}
		if err := u.catacomb.Add(watcher); err != nil {
			return errors.Trace(err)
		}
		return nil
	}

	onIdle := func() error {
		opState := u.operationExecutor.State()
		if opState.Kind != operation.Continue {
			// We should only set idle status if we're in
			// the "Continue" state, which indicates that
			// there is nothing to do and we're not in an
			// error state.
			return nil
		}
		return setAgentStatus(u, status.StatusIdle, "", nil)
	}

	clearResolved := func() error {
		if err := u.unit.ClearResolved(); err != nil {
			return errors.Trace(err)
		}
		watcher.ClearResolvedMode()
		return nil
	}

	for {
		if err = restartWatcher(); err != nil {
			err = errors.Annotate(err, "(re)starting watcher")
			break
		}

		uniterResolver := NewUniterResolver(ResolverConfig{
			ClearResolved:       clearResolved,
			ReportHookError:     u.reportHookError,
			FixDeployer:         u.deployer.Fix,
			ShouldRetryHooks:    u.hookRetryStrategy.ShouldRetry,
			StartRetryHookTimer: retryHookTimer.Start,
			StopRetryHookTimer:  retryHookTimer.Reset,
			Actions:             actions.NewResolver(),
			Leadership:          uniterleadership.NewResolver(),
			Relations:           relation.NewRelationsResolver(u.relations),
			Storage:             storage.NewResolver(u.storage),
			Commands: runcommands.NewCommandsResolver(
				u.commands, watcher.CommandCompleted,
			),
		})

		// We should not do anything until there has been a change
		// to the remote state. The watcher will trigger at least
		// once initially.
		select {
		case <-u.catacomb.Dying():
			return u.catacomb.ErrDying()
		case <-watcher.RemoteStateChanged():
		}

		localState := resolver.LocalState{
			CharmURL:             charmURL,
			CharmModifiedVersion: charmModifiedVersion,
		}
		for err == nil {
			err = resolver.Loop(resolver.LoopConfig{
				Resolver:      uniterResolver,
				Watcher:       watcher,
				Executor:      u.operationExecutor,
				Factory:       u.operationFactory,
				Abort:         u.catacomb.Dying(),
				OnIdle:        onIdle,
				CharmDirGuard: u.charmDirGuard,
			}, &localState)
			switch cause := errors.Cause(err); cause {
			case nil:
				// Loop back around.
			case resolver.ErrLoopAborted:
				err = u.catacomb.ErrDying()
			case operation.ErrNeedsReboot:
				err = worker.ErrRebootMachine
			case operation.ErrHookFailed:
				// Loop back around. The resolver can tell that it is in
				// an error state by inspecting the operation state.
				err = nil
			case resolver.ErrTerminate:
				err = u.terminate()
			case resolver.ErrRestart:
				// make sure we update the two values used above in
				// creating LocalState.
				charmURL = localState.CharmURL
				charmModifiedVersion = localState.CharmModifiedVersion
				// leave err assigned, causing loop to break
			default:
				// We need to set conflicted from here, because error
				// handling is outside of the resolver's control.
				if operation.IsDeployConflictError(cause) {
					localState.Conflicted = true
					err = setAgentStatus(u, status.StatusError, "upgrade failed", nil)
				} else {
					reportAgentError(u, "resolver loop error", err)
				}
			}
		}

		if errors.Cause(err) != resolver.ErrRestart {
			break
		}
	}

	logger.Infof("unit %q shutting down: %s", u.unit, err)
	return err
}
Exemplo n.º 2
0
func (u *Uniter) loop(unitTag names.UnitTag) (err error) {
	if err := u.init(unitTag); err != nil {
		if err == worker.ErrTerminateAgent {
			return err
		}
		return fmt.Errorf("failed to initialize uniter for %q: %v", unitTag, err)
	}
	logger.Infof("unit %q started", u.unit)

	// Install is a special case, as it must run before there
	// is any remote state, and before the remote state watcher
	// is started.
	var charmURL *corecharm.URL
	opState := u.operationExecutor.State()
	if opState.Kind == operation.Install {
		logger.Infof("resuming charm install")
		op, err := u.operationFactory.NewInstall(opState.CharmURL)
		if err != nil {
			return errors.Trace(err)
		}
		if err := u.operationExecutor.Run(op); err != nil {
			return errors.Trace(err)
		}
		charmURL = opState.CharmURL
	} else {
		curl, err := u.unit.CharmURL()
		if err != nil {
			return errors.Trace(err)
		}
		charmURL = curl
	}

	var (
		watcher   *remotestate.RemoteStateWatcher
		watcherMu sync.Mutex
	)

	restartWatcher := func() error {
		watcherMu.Lock()
		defer watcherMu.Unlock()

		if watcher != nil {
			if err := watcher.Stop(); err != nil {
				return errors.Trace(err)
			}
		}
		var err error
		watcher, err = remotestate.NewWatcher(
			remotestate.WatcherConfig{
				State:               remotestate.NewAPIState(u.st),
				LeadershipTracker:   u.leadershipTracker,
				UnitTag:             unitTag,
				UpdateStatusChannel: u.updateStatusAt,
			})
		if err != nil {
			return errors.Trace(err)
		}
		// Stop the uniter if the watcher fails. The watcher may be
		// stopped cleanly, so only kill the tomb if the error is
		// non-nil.
		go func(w *remotestate.RemoteStateWatcher) {
			if err := w.Wait(); err != nil {
				u.tomb.Kill(err)
			}
		}(watcher)
		return nil
	}

	// watcher may be replaced, so use a closure.
	u.addCleanup(func() error {
		watcherMu.Lock()
		defer watcherMu.Unlock()

		if watcher != nil {
			return watcher.Stop()
		}
		return nil
	})

	onIdle := func() error {
		opState := u.operationExecutor.State()
		if opState.Kind != operation.Continue {
			// We should only set idle status if we're in
			// the "Continue" state, which indicates that
			// there is nothing to do and we're not in an
			// error state.
			return nil
		}
		return setAgentStatus(u, params.StatusIdle, "", nil)
	}

	clearResolved := func() error {
		if err := u.unit.ClearResolved(); err != nil {
			return errors.Trace(err)
		}
		watcher.ClearResolvedMode()
		return nil
	}

	for {
		if err = restartWatcher(); err != nil {
			err = errors.Annotate(err, "(re)starting watcher")
			break
		}

		uniterResolver := &uniterResolver{
			clearResolved:      clearResolved,
			reportHookError:    u.reportHookError,
			fixDeployer:        u.deployer.Fix,
			actionsResolver:    actions.NewResolver(),
			leadershipResolver: uniterleadership.NewResolver(),
			relationsResolver:  relation.NewRelationsResolver(u.relations),
			storageResolver:    storage.NewResolver(u.storage),
		}

		// We should not do anything until there has been a change
		// to the remote state. The watcher will trigger at least
		// once initially.
		select {
		case <-u.tomb.Dying():
			return tomb.ErrDying
		case <-watcher.RemoteStateChanged():
		}

		localState := resolver.LocalState{CharmURL: charmURL}
		for err == nil {
			err = resolver.Loop(resolver.LoopConfig{
				Resolver:       uniterResolver,
				Watcher:        watcher,
				Executor:       u.operationExecutor,
				Factory:        u.operationFactory,
				Dying:          u.tomb.Dying(),
				OnIdle:         onIdle,
				CharmDirLocker: u.charmDirLocker,
			}, &localState)
			switch cause := errors.Cause(err); cause {
			case nil:
				// Loop back around.
			case tomb.ErrDying:
				err = tomb.ErrDying
			case operation.ErrNeedsReboot:
				err = worker.ErrRebootMachine
			case operation.ErrHookFailed:
				// Loop back around. The resolver can tell that it is in
				// an error state by inspecting the operation state.
				err = nil
			case resolver.ErrTerminate:
				err = u.terminate()
			case resolver.ErrRestart:
				charmURL = localState.CharmURL
				// leave err assigned, causing loop to break
			default:
				// We need to set conflicted from here, because error
				// handling is outside of the resolver's control.
				if operation.IsDeployConflictError(cause) {
					localState.Conflicted = true
					err = setAgentStatus(u, params.StatusError, "upgrade failed", nil)
				} else {
					reportAgentError(u, "resolver loop error", err)
				}
			}
		}

		if errors.Cause(err) != resolver.ErrRestart {
			break
		}
	}

	logger.Infof("unit %q shutting down: %s", u.unit, err)
	return err
}