// NewMetricsManager creates a runner that will run the metricsmanagement workers. func newMetricsManager(client metricsmanager.MetricsManagerClient, notify chan string) (worker.Runner, error) { // TODO(fwereade): break this out into separate manifolds (with their own facades). // Periodic workers automatically retry so none should return an error. If they do // it's ok to restart them individually. isFatal := func(error) bool { return false } // All errors are equal moreImportant := func(error, error) bool { return false } runner := worker.NewRunner(isFatal, moreImportant, worker.RestartDelay) err := runner.StartWorker("sender", func() (worker.Worker, error) { return newSender(client, notify), nil }) if err != nil { return nil, errors.Trace(err) } err = runner.StartWorker("cleanup", func() (worker.Worker, error) { return newCleanup(client, notify), nil }) if err != nil { return nil, errors.Trace(err) } return runner, nil }
// Init initializes the command for running. func (a *UnitAgent) Init(args []string) error { if a.UnitName == "" { return cmdutil.RequiredError("unit-name") } if !names.IsValidUnit(a.UnitName) { return fmt.Errorf(`--unit-name option expects "<service>/<n>" argument`) } if err := a.AgentConf.CheckArgs(args); err != nil { return err } a.runner = worker.NewRunner(cmdutil.IsFatal, cmdutil.MoreImportant, worker.RestartDelay) if !a.logToStdErr { if err := a.ReadConfig(a.Tag().String()); err != nil { return err } agentConfig := a.CurrentConfig() // the writer in ctx.stderr gets set as the loggo writer in github.com/juju/cmd/logging.go a.ctx.Stderr = &lumberjack.Logger{ Filename: agent.LogFilename(agentConfig), MaxSize: 300, // megabytes MaxBackups: 2, } } return nil }
func (*RunnerSuite) TestOneWorkerStartWhenStopping(c *gc.C) { runner := worker.NewRunner(allFatal, noImportance, 3*time.Second) starter := newTestWorkerStarter() starter.stopWait = make(chan struct{}) // Start a worker, and wait for it. err := runner.StartWorker("id", testWorkerStart(starter)) c.Assert(err, jc.ErrorIsNil) starter.assertStarted(c, true) // XXX the above does not imply the *runner* knows it's started. // voodoo sleep ahoy! time.Sleep(testing.ShortWait) // Stop the worker, which will block... err = runner.StopWorker("id") c.Assert(err, jc.ErrorIsNil) // While it's still blocked, try to start another. err = runner.StartWorker("id", testWorkerStart(starter)) c.Assert(err, jc.ErrorIsNil) // Unblock the stopping worker, and check that the task is // restarted immediately without the usual restart timeout // delay. t0 := time.Now() close(starter.stopWait) starter.assertStarted(c, false) // stop notification starter.assertStarted(c, true) // start notification restartDuration := time.Since(t0) if restartDuration > 1*time.Second { c.Fatalf("task did not restart immediately") } c.Assert(worker.Stop(runner), gc.IsNil) }
// NewMetricsManager creates a runner that will run the metricsmanagement workers. func NewMetricsManager(client metricsmanager.MetricsManagerClient) (worker.Runner, error) { // Periodic workers automatically retry so none should return an error. If they do // it's ok to restart them individually. isFatal := func(error) bool { return false } // All errors are equal moreImportant := func(error, error) bool { return false } runner := worker.NewRunner(isFatal, moreImportant, worker.RestartDelay) err := runner.StartWorker("sender", func() (worker.Worker, error) { return NewSender(client), nil }) if err != nil { return nil, errors.Trace(err) } err = runner.StartWorker("cleanup", func() (worker.Worker, error) { return NewCleanup(client), nil }) if err != nil { return nil, errors.Trace(err) } return runner, nil }
func (a *agent) mongoWorker() (worker.Worker, error) { dialInfo := gitjujutesting.MgoDialInfo(coretesting.Certs, a.hostPort) session, err := mgo.DialWithInfo(dialInfo) if err != nil { return nil, err } mc := &mongoConn{ localHostPort: a.hostPort, session: session, } fn := func(err0, err1 error) bool { return true } runner := worker.NewRunner(connectionIsFatal(mc), fn, worker.RestartDelay) singularRunner, err := singular.New(runner, mc) if err != nil { return nil, fmt.Errorf("cannot start singular runner: %v", err) } a.notify.workerConnected() singularRunner.StartWorker(fmt.Sprint("worker-", a.notify.id), func() (worker.Worker, error) { return worker.NewSimpleWorker(func(stop <-chan struct{}) error { return a.worker(session, stop) }), nil }) return runner, nil }
func (m *modelWorkerManager) loop() error { m.runner = worker.NewRunner( neverFatal, neverImportant, m.config.ErrorDelay, ) if err := m.catacomb.Add(m.runner); err != nil { return errors.Trace(err) } watcher := m.config.Backend.WatchModels() if err := m.catacomb.Add(watcher); err != nil { return errors.Trace(err) } for { select { case <-m.catacomb.Dying(): return m.catacomb.ErrDying() case uuids, ok := <-watcher.Changes(): if !ok { return errors.New("changes stopped") } for _, modelUUID := range uuids { if err := m.ensure(m.config.ControllerUUID, modelUUID); err != nil { return errors.Trace(err) } } } } }
func (s *ContainerSetupSuite) setupContainerWorker(c *gc.C, tag names.MachineTag) (worker.StringsWatchHandler, worker.Runner) { testing.PatchExecutable(c, s, "ubuntu-cloudimg-query", containertesting.FakeLxcURLScript) runner := worker.NewRunner(allFatal, noImportance) pr := s.st.Provisioner() machine, err := pr.Machine(tag) c.Assert(err, jc.ErrorIsNil) err = machine.SetSupportedContainers(instance.ContainerTypes...) c.Assert(err, jc.ErrorIsNil) cfg := s.AgentConfigForTag(c, tag) watcherName := fmt.Sprintf("%s-container-watcher", machine.Id()) params := provisioner.ContainerSetupParams{ Runner: runner, WorkerName: watcherName, SupportedContainers: instance.ContainerTypes, ImageURLGetter: &containertesting.MockURLGetter{}, Machine: machine, Provisioner: pr, Config: cfg, InitLock: s.initLock, } handler := provisioner.NewContainerSetupHandler(params) runner.StartWorker(watcherName, func() (worker.Worker, error) { return worker.NewStringsWorker(handler), nil }) return handler, runner }
func (*runnerSuite) TestOneWorkerStartWhenStopping(c *gc.C) { worker.RestartDelay = 3 * time.Second runner := worker.NewRunner(allFatal, noImportance) starter := newTestWorkerStarter() starter.stopWait = make(chan struct{}) err := runner.StartWorker("id", testWorkerStart(starter)) c.Assert(err, jc.ErrorIsNil) starter.assertStarted(c, true) err = runner.StopWorker("id") c.Assert(err, jc.ErrorIsNil) err = runner.StartWorker("id", testWorkerStart(starter)) c.Assert(err, jc.ErrorIsNil) close(starter.stopWait) starter.assertStarted(c, false) // Check that the task is restarted immediately without // the usual restart timeout delay. t0 := time.Now() starter.assertStarted(c, true) restartDuration := time.Since(t0) if restartDuration > 1*time.Second { c.Fatalf("task did not restart immediately") } c.Assert(worker.Stop(runner), gc.IsNil) }
func (a *UnitAgent) APIWorkers() (worker.Worker, error) { agentConfig := a.CurrentConfig() dataDir := agentConfig.DataDir() hookLock, err := hookExecutionLock(dataDir) if err != nil { return nil, err } st, entity, err := openAPIState(agentConfig, a) if err != nil { return nil, err } runner := worker.NewRunner(connectionIsFatal(st), moreImportant) runner.StartWorker("upgrader", func() (worker.Worker, error) { return upgrader.NewUpgrader(st.Upgrader(), agentConfig), nil }) runner.StartWorker("logger", func() (worker.Worker, error) { return workerlogger.NewLogger(st.Logger(), agentConfig), nil }) runner.StartWorker("uniter", func() (worker.Worker, error) { return uniter.NewUniter(st.Uniter(), entity.Tag(), dataDir, hookLock), nil }) runner.StartWorker("apiaddressupdater", func() (worker.Worker, error) { return apiaddressupdater.NewAPIAddressUpdater(st.Uniter(), a), nil }) runner.StartWorker("rsyslog", func() (worker.Worker, error) { return newRsyslogConfigWorker(st.Rsyslog(), agentConfig, rsyslog.RsyslogModeForwarding) }) return newCloseWorker(runner, st), nil }
func newRunner() worker.Runner { return worker.NewRunner( func(err error) bool { return err == errFatal }, func(err0, err1 error) bool { return true }, ) }
func (*runnerSuite) TestOneWorkerStartFatalError(c *gc.C) { runner := worker.NewRunner(allFatal, noImportance) starter := newTestWorkerStarter() starter.startErr = errors.New("cannot start test task") err := runner.StartWorker("id", testWorkerStart(starter)) c.Assert(err, jc.ErrorIsNil) err = runner.Wait() c.Assert(err, gc.Equals, starter.startErr) }
func (*runnerSuite) TestOneWorkerStart(c *gc.C) { runner := worker.NewRunner(noneFatal, noImportance) starter := newTestWorkerStarter() err := runner.StartWorker("id", testWorkerStart(starter)) c.Assert(err, jc.ErrorIsNil) starter.assertStarted(c, true) c.Assert(worker.Stop(runner), gc.IsNil) starter.assertStarted(c, false) }
func (*runnerSuite) TestOneWorkerStartStop(c *gc.C) { runner := worker.NewRunner(allFatal, noImportance, time.Millisecond) starter := newTestWorkerStarter() err := runner.StartWorker("id", testWorkerStart(starter)) c.Assert(err, jc.ErrorIsNil) starter.assertStarted(c, true) err = runner.StopWorker("id") c.Assert(err, jc.ErrorIsNil) starter.assertStarted(c, false) c.Assert(worker.Stop(runner), gc.IsNil) }
func (*runnerSuite) TestOneWorkerStopFatalError(c *gc.C) { runner := worker.NewRunner(allFatal, noImportance) starter := newTestWorkerStarter() starter.stopErr = errors.New("stop error") err := runner.StartWorker("id", testWorkerStart(starter)) c.Assert(err, jc.ErrorIsNil) starter.assertStarted(c, true) err = runner.StopWorker("id") c.Assert(err, jc.ErrorIsNil) err = runner.Wait() c.Assert(err, gc.Equals, starter.stopErr) }
func (*runnerSuite) TestOneWorkerDieFatalError(c *gc.C) { runner := worker.NewRunner(allFatal, noImportance) starter := newTestWorkerStarter() err := runner.StartWorker("id", testWorkerStart(starter)) c.Assert(err, jc.ErrorIsNil) starter.assertStarted(c, true) dieErr := errors.New("error when running") starter.die <- dieErr err = runner.Wait() c.Assert(err, gc.Equals, dieErr) starter.assertStarted(c, false) }
// Init initializes the command for running. func (a *UnitAgent) Init(args []string) error { if a.UnitName == "" { return requiredError("unit-name") } if !names.IsUnit(a.UnitName) { return fmt.Errorf(`--unit-name option expects "<service>/<n>" argument`) } if err := a.AgentConf.CheckArgs(args); err != nil { return err } a.runner = worker.NewRunner(isFatal, moreImportant) return nil }
func (*runnerSuite) TestOneWorkerFinish(c *gc.C) { runner := worker.NewRunner(noneFatal, noImportance, time.Millisecond) starter := newTestWorkerStarter() err := runner.StartWorker("id", testWorkerStart(starter)) c.Assert(err, jc.ErrorIsNil) starter.assertStarted(c, true) starter.die <- nil starter.assertStarted(c, false) starter.assertNeverStarted(c) c.Assert(worker.Stop(runner), gc.IsNil) }
// NewEnvWorkerManager returns a Worker which manages a worker which // needs to run on a per environment basis. It takes a function which will // be called to start a worker for a new environment. This worker // will be killed when an environment goes away. func NewEnvWorkerManager( st InitialState, startEnvWorker func(InitialState, *state.State) (worker.Worker, error), ) worker.Worker { m := &envWorkerManager{ st: st, startEnvWorker: startEnvWorker, } m.runner = worker.NewRunner(cmdutil.IsFatal, cmdutil.MoreImportant) go func() { defer m.tomb.Done() m.tomb.Kill(m.loop()) }() return m }
func (*runnerSuite) TestOneWorkerRestartDelay(c *gc.C) { worker.RestartDelay = 100 * time.Millisecond runner := worker.NewRunner(noneFatal, noImportance) starter := newTestWorkerStarter() err := runner.StartWorker("id", testWorkerStart(starter)) c.Assert(err, gc.IsNil) starter.assertStarted(c, true) starter.die <- fmt.Errorf("non-fatal error") starter.assertStarted(c, false) t0 := time.Now() starter.assertStarted(c, true) restartDuration := time.Since(t0) if restartDuration < worker.RestartDelay { c.Fatalf("restart delay was not respected; got %v want %v", restartDuration, worker.RestartDelay) } }
// MachineAgentFactoryFn returns a function which instantiates a // MachineAgent given a machineId. func MachineAgentFactoryFn( agentConfWriter AgentConfigWriter, bufferedLogs logsender.LogRecordCh, rootDir string, ) func(string) *MachineAgent { return func(machineId string) *MachineAgent { return NewMachineAgent( machineId, agentConfWriter, bufferedLogs, worker.NewRunner(cmdutil.IsFatal, cmdutil.MoreImportant, worker.RestartDelay), looputil.NewLoopDeviceManager(), rootDir, ) } }
func (s *ContainerSetupSuite) setupContainerWorker(c *gc.C, tag names.MachineTag) (worker.StringsWatchHandler, worker.Runner) { runner := worker.NewRunner(allFatal, noImportance) pr := s.st.Provisioner() machine, err := pr.Machine(tag) c.Assert(err, gc.IsNil) err = machine.SetSupportedContainers(instance.ContainerTypes...) c.Assert(err, gc.IsNil) cfg := s.AgentConfigForTag(c, tag) watcherName := fmt.Sprintf("%s-container-watcher", machine.Id()) handler := provisioner.NewContainerSetupHandler(runner, watcherName, instance.ContainerTypes, machine, pr, cfg, s.initLock) runner.StartWorker(watcherName, func() (worker.Worker, error) { return worker.NewStringsWorker(handler), nil }) return handler, runner }
func (*runnerSuite) TestErrorImportance(c *gc.C) { moreImportant := func(err0, err1 error) bool { return err0.(errorLevel) > err1.(errorLevel) } id := func(i int) string { return fmt.Sprint(i) } runner := worker.NewRunner(allFatal, moreImportant) for i := 0; i < 10; i++ { starter := newTestWorkerStarter() starter.stopErr = errorLevel(i) err := runner.StartWorker(id(i), testWorkerStart(starter)) c.Assert(err, jc.ErrorIsNil) } err := runner.StopWorker(id(4)) c.Assert(err, jc.ErrorIsNil) err = runner.Wait() c.Assert(err, gc.Equals, errorLevel(9)) }
func (*runnerSuite) TestOneWorkerRestart(c *gc.C) { runner := worker.NewRunner(noneFatal, noImportance) starter := newTestWorkerStarter() err := runner.StartWorker("id", testWorkerStart(starter)) c.Assert(err, jc.ErrorIsNil) starter.assertStarted(c, true) // Check it restarts a few times time. for i := 0; i < 3; i++ { starter.die <- fmt.Errorf("an error") starter.assertStarted(c, false) starter.assertStarted(c, true) } c.Assert(worker.Stop(runner), gc.IsNil) starter.assertStarted(c, false) }
// NewModelWorkerManager returns a Worker which manages a worker which // needs to run on a per model basis. It takes a function which will // be called to start a worker for a new model. This worker // will be killed when an model goes away. func NewModelWorkerManager( st InitialState, startModelWorker modelWorkersCreator, dyingModelWorker modelWorkersCreator, delay time.Duration, ) worker.Worker { m := &modelWorkerManager{ st: st, startModelWorker: startModelWorker, dyingModelWorker: dyingModelWorker, } m.runner = worker.NewRunner(cmdutil.IsFatal, cmdutil.MoreImportant, delay) go func() { defer m.tomb.Done() m.tomb.Kill(m.loop()) }() return m }
func (a *UnitAgent) APIWorkers() (worker.Worker, error) { agentConfig := a.CurrentConfig() dataDir := agentConfig.DataDir() hookLock, err := hookExecutionLock(dataDir) if err != nil { return nil, err } st, entity, err := openAPIState(agentConfig, a) if err != nil { return nil, err } // Before starting any workers, ensure we record the Juju version this unit // agent is running. currentTools := &tools.Tools{Version: version.Current} if err := st.Upgrader().SetVersion(agentConfig.Tag().String(), currentTools.Version); err != nil { return nil, errors.Annotate(err, "cannot set unit agent version") } runner := worker.NewRunner(connectionIsFatal(st), moreImportant) runner.StartWorker("upgrader", func() (worker.Worker, error) { return upgrader.NewUpgrader( st.Upgrader(), agentConfig, agentConfig.UpgradedToVersion(), func() bool { return false }, ), nil }) runner.StartWorker("logger", func() (worker.Worker, error) { return workerlogger.NewLogger(st.Logger(), agentConfig), nil }) runner.StartWorker("uniter", func() (worker.Worker, error) { return uniter.NewUniter(st.Uniter(), entity.Tag(), dataDir, hookLock), nil }) runner.StartWorker("apiaddressupdater", func() (worker.Worker, error) { return apiaddressupdater.NewAPIAddressUpdater(st.Uniter(), a), nil }) runner.StartWorker("rsyslog", func() (worker.Worker, error) { return newRsyslogConfigWorker(st.Rsyslog(), agentConfig, rsyslog.RsyslogModeForwarding) }) return newCloseWorker(runner, st), nil }
func (*runnerSuite) TestAllWorkersStoppedWhenOneDiesWithFatalError(c *gc.C) { runner := worker.NewRunner(allFatal, noImportance) var starters []*testWorkerStarter for i := 0; i < 10; i++ { starter := newTestWorkerStarter() err := runner.StartWorker(fmt.Sprint(i), testWorkerStart(starter)) c.Assert(err, jc.ErrorIsNil) starters = append(starters, starter) } for _, starter := range starters { starter.assertStarted(c, true) } dieErr := errors.New("fatal error") starters[4].die <- dieErr err := runner.Wait() c.Assert(err, gc.Equals, dieErr) for _, starter := range starters { starter.assertStarted(c, false) } }
func (*runnerSuite) TestFatalErrorWhileSelfStartWorker(c *gc.C) { // Original deadlock problem that this tests for: // A worker tries to call StartWorker in its start function // at the same time another worker dies with a fatal error. // It might not be able to send on startc. runner := worker.NewRunner(allFatal, noImportance) selfStarter := newTestWorkerStarter() // make the startNotify channel synchronous so // we can delay the start indefinitely. selfStarter.startNotify = make(chan bool) selfStarter.hook = func() { runner.StartWorker("another", func() (worker.Worker, error) { return nil, fmt.Errorf("no worker started") }) } err := runner.StartWorker("self starter", testWorkerStart(selfStarter)) c.Assert(err, jc.ErrorIsNil) fatalStarter := newTestWorkerStarter() fatalStarter.startErr = fmt.Errorf("a fatal error") err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter)) c.Assert(err, jc.ErrorIsNil) // Wait for the runner loop to react to the fatal // error and go into final shutdown mode. time.Sleep(10 * time.Millisecond) // At this point, the loop is in shutdown mode, but the // selfStarter's worker is still in its start function. // When the start function continues (the first assertStarted // allows that to happen) it will try to create a new // worker. This failed in an earlier version of the code because the // loop was not ready to receive start requests. selfStarter.assertStarted(c, true) selfStarter.assertStarted(c, false) err = runner.Wait() c.Assert(err, gc.Equals, fatalStarter.startErr) }
func (*runnerSuite) TestFatalErrorWhileStarting(c *gc.C) { // Original deadlock problem that this tests for: // A worker dies with fatal error while another worker // is inside start(). runWorker can't send startInfo on startedc. runner := worker.NewRunner(allFatal, noImportance) slowStarter := newTestWorkerStarter() // make the startNotify channel synchronous so // we can delay the start indefinitely. slowStarter.startNotify = make(chan bool) err := runner.StartWorker("slow starter", testWorkerStart(slowStarter)) c.Assert(err, jc.ErrorIsNil) fatalStarter := newTestWorkerStarter() fatalStarter.startErr = fmt.Errorf("a fatal error") err = runner.StartWorker("fatal worker", testWorkerStart(fatalStarter)) c.Assert(err, jc.ErrorIsNil) // Wait for the runner loop to react to the fatal // error and go into final shutdown mode. time.Sleep(10 * time.Millisecond) // At this point, the loop is in shutdown mode, but the // slowStarter's worker is still in its start function. // When the start function continues (the first assertStarted // allows that to happen) and returns the new Worker, // runWorker will try to send it on runner.startedc. // This test makes sure that succeeds ok. slowStarter.assertStarted(c, true) slowStarter.assertStarted(c, false) err = runner.Wait() c.Assert(err, gc.Equals, fatalStarter.startErr) }
func (s *UpgradeSuite) TestLoginsDuringUpgrade(c *gc.C) { // Create machine agent to upgrade machine, machine0Conf, _ := s.primeAgentVersion(c, s.oldVersion, state.JobManageEnviron) a := s.newAgent(c, machine) // Mock out upgrade logic, using a channel so that the test knows // when upgrades have started and can control when upgrades // should finish. upgradeCh := make(chan bool) abort := make(chan bool) fakePerformUpgrade := func(version.Number, []upgrades.Target, upgrades.Context) error { // Signal that upgrade has started. select { case upgradeCh <- true: case <-abort: return nil } // Wait for signal that upgrades should finish. select { case <-upgradeCh: case <-abort: return nil } return nil } s.PatchValue(&upgradesPerformUpgrade, fakePerformUpgrade) // Start the API server and upgrade-steps works just as the agent would. runner := worker.NewRunner(cmdutil.IsFatal, cmdutil.MoreImportant) defer func() { close(abort) runner.Kill() runner.Wait() }() certChangedChan := make(chan params.StateServingInfo) runner.StartWorker("apiserver", a.apiserverWorkerStarter(s.State, certChangedChan)) runner.StartWorker("upgrade-steps", a.upgradeStepsWorkerStarter( s.APIState, []multiwatcher.MachineJob{multiwatcher.JobManageEnviron}, )) // Set up a second machine to log in as. // API logins are tested manually so there's no need to actually // start this machine. var machine1Conf agent.Config _, machine1Conf, _ = s.primeAgent(c, state.JobHostUnits) c.Assert(waitForUpgradeToStart(upgradeCh), jc.IsTrue) // Only user and local logins are allowed during upgrade. Users get a restricted API. s.checkLoginToAPIAsUser(c, machine0Conf, RestrictedAPIExposed) c.Assert(canLoginToAPIAsMachine(c, machine0Conf, machine0Conf), jc.IsTrue) c.Assert(canLoginToAPIAsMachine(c, machine1Conf, machine0Conf), jc.IsFalse) close(upgradeCh) // Allow upgrade to complete waitForUpgradeToFinish(c, machine0Conf) // Only user and local logins are allowed even after upgrade steps because // agent upgrade not finished yet. s.checkLoginToAPIAsUser(c, machine0Conf, RestrictedAPIExposed) c.Assert(canLoginToAPIAsMachine(c, machine0Conf, machine0Conf), jc.IsTrue) c.Assert(canLoginToAPIAsMachine(c, machine1Conf, machine0Conf), jc.IsFalse) machineAPI := s.OpenAPIAsMachine(c, machine.Tag(), initialMachinePassword, agent.BootstrapNonce) runner.StartWorker("upgrader", a.agentUpgraderWorkerStarter(machineAPI.Upgrader(), machine0Conf)) // Wait for agent upgrade worker to determine that no // agent upgrades are required. select { case <-a.initialAgentUpgradeCheckComplete: case <-time.After(coretesting.LongWait): c.Fatalf("timeout waiting for upgrade check") } // All logins are allowed after upgrade s.checkLoginToAPIAsUser(c, machine0Conf, FullAPIExposed) c.Assert(canLoginToAPIAsMachine(c, machine0Conf, machine0Conf), jc.IsTrue) c.Assert(canLoginToAPIAsMachine(c, machine1Conf, machine0Conf), jc.IsTrue) }
func newConnRunner(conns ...cmdutil.Pinger) worker.Runner { return worker.NewRunner(cmdutil.ConnectionIsFatal(logger, conns...), cmdutil.MoreImportant, worker.RestartDelay) }