// NewLock creates a gate.Lock to be used to synchronise workers which // need to start after upgrades have completed. If no upgrade steps // are required the Lock is unlocked and the version in agent's // configuration is updated to the currently running version. // // The returned Lock should be passed to NewWorker. func NewLock(a agent.Agent) (gate.Lock, error) { lock := gate.NewLock() if wrench.IsActive("machine-agent", "always-try-upgrade") { // Always enter upgrade mode. This allows test of upgrades // even when there's actually no upgrade steps to run. return lock, nil } err := a.ChangeConfig(func(agentConfig agent.ConfigSetter) error { if !upgrades.AreUpgradesDefined(agentConfig.UpgradedToVersion()) { logger.Infof("no upgrade steps required or upgrade steps for %v "+ "have already been run.", jujuversion.Current) lock.Unlock() // Even if no upgrade is required the version number in // the agent's config still needs to be bumped. agentConfig.SetUpgradedToVersion(jujuversion.Current) } return nil }) if err != nil { return nil, err } return lock, nil }
func (s *wrenchSuite) TestSetEnabled(c *gc.C) { s.createWrenchDir(c) s.createWrenchFile(c, "foo", "bar") // Starts enabled. c.Assert(wrench.IsEnabled(), jc.IsTrue) c.Assert(wrench.IsActive("foo", "bar"), jc.IsTrue) // Disable. c.Assert(wrench.SetEnabled(false), jc.IsTrue) c.Assert(wrench.IsEnabled(), jc.IsFalse) c.Assert(wrench.IsActive("foo", "bar"), jc.IsFalse) // Enable again. c.Assert(wrench.SetEnabled(true), jc.IsFalse) c.Assert(wrench.IsEnabled(), jc.IsTrue) c.Assert(wrench.IsActive("foo", "bar"), jc.IsTrue) }
func (s *wrenchSuite) TestDirectoryNotOwnedByJujuUser(c *gc.C) { s.createWrenchDir(c) s.tweakOwner(c, s.wrenchDir) c.Assert(wrench.IsActive("foo", "bar"), jc.IsFalse) c.Assert(s.logWriter.Log(), jc.LogMatches, []jc.SimpleMessage{{ loggo.ERROR, `wrench directory has incorrect ownership - wrench functionality disabled \(.+\)`, }}) }
func (s *wrenchSuite) TestFileNotOwnedByJujuUser(c *gc.C) { s.createWrenchDir(c) filename := s.createWrenchFile(c, "foo", "bar") s.tweakOwner(c, filename) c.Assert(wrench.IsActive("foo", "bar"), jc.IsFalse) c.Assert(s.logWriter.Log(), jc.LogMatches, []jc.SimpleMessage{{ loggo.ERROR, `wrench file for foo/bar has incorrect ownership - ignoring ` + filename, }}) }
func (c *upgradeWorkerContext) run(stop <-chan struct{}) error { if wrench.IsActive("machine-agent", "fail-upgrade-start") { return nil // Make the worker stop } select { case <-c.UpgradeComplete: // Our work is already done (we're probably being restarted // because the API connection has gone down), so do nothing. return nil default: } c.agentConfig = c.agent.CurrentConfig() // If the machine agent is a state server, flag that state // needs to be opened before running upgrade steps for _, job := range c.jobs { if job == params.JobManageEnviron { c.isStateServer = true } } // We need a *state.State for upgrades. We open it independently // of StateWorker, because we have no guarantees about when // and how often StateWorker might run. if c.isStateServer { var err error c.st, err = openStateForUpgrade(c.agent, c.agentConfig) if err != nil { return err } defer c.st.Close() } if err := c.runUpgrades(); err != nil { // Only return an error from the worker if the connection to // state went away (possible mongo master change). Returning // an error when the connection is lost will cause the agent // to restart. // // For other errors, the error is not returned because we want // the machine agent to stay running in an error state waiting // for user intervention. if isAPILostDuringUpgrade(err) { return err } } else { // Upgrade succeeded - signal that the upgrade is complete. close(c.UpgradeComplete) } return nil }
func getUpgradeStartTimeout(isMaster bool) time.Duration { if wrench.IsActive("machine-agent", "short-upgrade-timeout") { // This duration is fairly arbitrary. During manual testing it // avoids the normal long wait but still provides a small // window to check the environment status and logs before the // timeout is triggered. return time.Minute } if isMaster { return upgradeStartTimeoutMaster } return upgradeStartTimeoutSecondary }
func (s *wrenchSuite) TestFilePermsTooLoose(c *gc.C) { if runtime.GOOS == "windows" { c.Skip("Windows is not fully POSIX compliant") } s.createWrenchDir(c) filename := s.createWrenchFile(c, "foo", "bar") err := os.Chmod(filename, 0666) c.Assert(err, jc.ErrorIsNil) c.Assert(wrench.IsActive("foo", "bar"), jc.IsFalse) c.Assert(s.logWriter.Log(), jc.LogMatches, []jc.SimpleMessage{{ loggo.ERROR, `wrench file for foo/bar should only be writable by owner - ignoring ` + filename, }}) }
func (task *provisionerTask) stopInstances(instances []instance.Instance) error { // Although calling StopInstance with an empty slice should produce no change in the // provider, environs like dummy do not consider this a noop. if len(instances) == 0 { return nil } if wrench.IsActive("provisioner", "stop-instances") { return errors.New("wrench in the works") } ids := make([]instance.Id, len(instances)) for i, inst := range instances { ids[i] = inst.Id() } if err := task.broker.StopInstances(ids...); err != nil { return errors.Annotate(err, "broker failed to stop instances") } return nil }
// InitialiseUsingAgent sets up a upgradeWorkerContext from a machine agent instance. // It may update the agent's configuration. func (c *upgradeWorkerContext) InitializeUsingAgent(a upgradingMachineAgent) error { if wrench.IsActive("machine-agent", "always-try-upgrade") { // Always enter upgrade mode. This allows test of upgrades // even when there's actually no upgrade steps to run. return nil } return a.ChangeConfig(func(agentConfig agent.ConfigSetter) error { if !upgrades.AreUpgradesDefined(agentConfig.UpgradedToVersion()) { logger.Infof("no upgrade steps required or upgrade steps for %v "+ "have already been run.", version.Current.Number) close(c.UpgradeComplete) // Even if no upgrade is required the version number in // the agent's config still needs to be bumped. agentConfig.SetUpgradedToVersion(version.Current.Number) } return nil }) }
// runUpgrades runs the upgrade operations for each job type and // updates the updatedToVersion on success. func (c *upgradeWorkerContext) runUpgrades() error { upgradeInfo, err := c.prepareForUpgrade() if err != nil { return err } if wrench.IsActive("machine-agent", "fail-upgrade") { return errors.New("wrench") } if err := c.agent.ChangeConfig(c.runUpgradeSteps); err != nil { return err } if err := c.finaliseUpgrade(upgradeInfo); err != nil { return err } return nil }
func (s *wrenchSuite) TestIsActiveMultiFeaturesWithMixedNewlines(c *gc.C) { s.createWrenchDir(c) s.createWrenchFile(c, "foo", "one\ntwo\r\nthree\nbar\n") c.Assert(wrench.IsActive("foo", "bar"), jc.IsTrue) s.AssertActivationLogged(c) }
func (s *wrenchSuite) TestNoDirectory(c *gc.C) { s.PatchValue(wrench.WrenchDir, "/does/not/exist") c.Assert(wrench.IsActive("foo", "bar"), jc.IsFalse) s.AssertDirErrorLogged(c) }
func (s *wrenchSuite) TestIsActiveWithWhitespace(c *gc.C) { s.createWrenchDir(c) s.createWrenchFile(c, "foo", "\tbar ") c.Assert(wrench.IsActive("foo", "bar"), jc.IsTrue) s.AssertActivationLogged(c) }
func (c *upgradeWorkerContext) run(stop <-chan struct{}) error { if wrench.IsActive("machine-agent", "fail-upgrade-start") { return nil // Make the worker stop } select { case <-c.UpgradeComplete: // Our work is already done (we're probably being restarted // because the API connection has gone down), so do nothing. return nil default: } c.agentConfig = c.agent.CurrentConfig() c.fromVersion = c.agentConfig.UpgradedToVersion() c.toVersion = version.Current.Number if c.fromVersion == c.toVersion { logger.Infof("upgrade to %v already completed.", c.toVersion) close(c.UpgradeComplete) return nil } if err := c.initTag(c.agentConfig.Tag()); err != nil { return errors.Trace(err) } // If the machine agent is a state server, flag that state // needs to be opened before running upgrade steps for _, job := range c.jobs { if job == multiwatcher.JobManageEnviron { c.isStateServer = true } } // We need a *state.State for upgrades. We open it independently // of StateWorker, because we have no guarantees about when // and how often StateWorker might run. if c.isStateServer { var err error if c.st, err = openStateForUpgrade(c.agent, c.agentConfig); err != nil { return err } defer c.st.Close() if c.isMaster, err = isMachineMaster(c.st, c.machineId); err != nil { return errors.Trace(err) } stor := storage.NewStorage(c.st.EnvironUUID(), c.st.MongoSession()) registerSimplestreamsDataSource(stor) } if err := c.runUpgrades(); err != nil { // Only return an error from the worker if the connection to // state went away (possible mongo master change). Returning // an error when the connection is lost will cause the agent // to restart. // // For other errors, the error is not returned because we want // the machine agent to stay running in an error state waiting // for user intervention. if isAPILostDuringUpgrade(err) { return err } c.reportUpgradeFailure(err, false) } else { // Upgrade succeeded - signal that the upgrade is complete. logger.Infof("upgrade to %v completed successfully.", c.toVersion) c.agent.setMachineStatus(c.apiState, params.StatusStarted, "") close(c.UpgradeComplete) } return nil }
func (w *upgradesteps) run() error { if wrench.IsActive("machine-agent", "fail-upgrade-start") { return nil // Make the worker stop } if w.upgradeComplete.IsUnlocked() { // Our work is already done (we're probably being restarted // because the API connection has gone down), so do nothing. return nil } w.fromVersion = w.agent.CurrentConfig().UpgradedToVersion() w.toVersion = jujuversion.Current if w.fromVersion == w.toVersion { logger.Infof("upgrade to %v already completed.", w.toVersion) w.upgradeComplete.Unlock() return nil } // If the machine agent is a controller, flag that state // needs to be opened before running upgrade steps for _, job := range w.jobs { if job == multiwatcher.JobManageModel { w.isController = true } } // We need a *state.State for upgrades. We open it independently // of StateWorker, because we have no guarantees about when // and how often StateWorker might run. if w.isController { var err error if w.st, err = w.openState(); err != nil { return err } defer w.st.Close() if w.isMaster, err = IsMachineMaster(w.st, w.tag.Id()); err != nil { return errors.Trace(err) } } if err := w.runUpgrades(); err != nil { // Only return an error from the worker if the connection to // state went away (possible mongo master change). Returning // an error when the connection is lost will cause the agent // to restart. // // For other errors, the error is not returned because we want // the machine agent to stay running in an error state waiting // for user intervention. if isAPILostDuringUpgrade(err) { return err } w.reportUpgradeFailure(err, false) } else { // Upgrade succeeded - signal that the upgrade is complete. logger.Infof("upgrade to %v completed successfully.", w.toVersion) w.machine.SetStatus(status.Started, "", nil) w.upgradeComplete.Unlock() } return nil }
func (s *wrenchSuite) TestMatchInOtherCategory(c *gc.C) { s.createWrenchDir(c) s.createWrenchFile(c, "other", "bar") c.Assert(wrench.IsActive("foo", "bar"), jc.IsFalse) s.AssertFileErrorLogged(c) }
func (s *wrenchSuite) TestNoFile(c *gc.C) { s.createWrenchDir(c) c.Assert(wrench.IsActive("foo", "bar"), jc.IsFalse) s.AssertFileErrorLogged(c) }
func (s *wrenchSuite) TestNotActive(c *gc.C) { s.createWrenchDir(c) s.createWrenchFile(c, "foo", "abc") c.Assert(wrench.IsActive("foo", "bar"), jc.IsFalse) s.AssertNothingLogged(c) }
if err != nil { // This shouldn't happen, and if it does, the state worker will have // found out before us, and already errored, or is likely to error out // very shortly. All we do here is return the error. The state worker // returns an error that will cause the agent to be terminated. return false, errors.Trace(err) } isMaster, err := mongo.IsMaster(st.MongoSession(), machine) if err != nil { return false, errors.Trace(err) } return isMaster, nil } var waitForOtherStateServers = func(st *state.State, isMaster bool) error { if wrench.IsActive("machine-agent", "fail-state-server-upgrade-wait") { return errors.New("failing other state servers check due to wrench") } // TODO(mjs) - for now, assume that the other state servers are // ready. This function will be fleshed out once the UpgradeInfo // work is done. return nil } var getUpgradeRetryStrategy = func() utils.AttemptStrategy { return utils.AttemptStrategy{ Delay: 2 * time.Minute, Min: 5, } }