func (s *UpgradeSuite) assertUpgradeSteps(c *gc.C, job state.MachineJob) { s.agentSuite.PatchValue(&version.Current, s.upgradeToVersion) err := s.State.SetEnvironAgentVersion(s.upgradeToVersion.Number) c.Assert(err, gc.IsNil) oldVersion := s.upgradeToVersion oldVersion.Major = 1 oldVersion.Minor = 16 var oldConfig agent.Config s.machine, oldConfig, _ = s.primeAgent(c, oldVersion, job) a := s.newAgent(c, s.machine) go func() { c.Check(a.Run(nil), gc.IsNil) }() defer func() { c.Check(a.Stop(), gc.IsNil) }() // Wait for upgrade steps to run. success := false for attempt := coretesting.LongAttempt.Start(); attempt.Next(); { conf, err := agent.ReadConfig(agent.ConfigPath(oldConfig.DataDir(), s.machine.Tag())) c.Assert(err, gc.IsNil) success = conf.UpgradedToVersion() == s.upgradeToVersion.Number if success { break } } // Upgrade worker has completed ok. c.Assert(success, jc.IsTrue) }
func ensureMongoService(agentConfig agent.Config) error { var oplogSize int if oplogSizeString := agentConfig.Value(agent.MongoOplogSize); oplogSizeString != "" { var err error if oplogSize, err = strconv.Atoi(oplogSizeString); err != nil { return errors.Annotatef(err, "invalid oplog size: %q", oplogSizeString) } } var numaCtlPolicy bool if numaCtlString := agentConfig.Value(agent.NumaCtlPreference); numaCtlString != "" { var err error if numaCtlPolicy, err = strconv.ParseBool(numaCtlString); err != nil { return errors.Annotatef(err, "invalid numactl preference: %q", numaCtlString) } } si, ok := agentConfig.StateServingInfo() if !ok { return errors.Errorf("agent config has no state serving info") } err := mongo.EnsureServiceInstalled(agentConfig.DataDir(), si.StatePort, oplogSize, numaCtlPolicy) return errors.Annotate(err, "cannot ensure that mongo service start/stop scripts are in place") }
func (a *MachineAgent) uninstallAgent(agentConfig agent.Config) error { var errors []error agentServiceName := agentConfig.Value(agent.AgentServiceName) if agentServiceName == "" { // For backwards compatibility, handle lack of AgentServiceName. agentServiceName = os.Getenv("UPSTART_JOB") } if agentServiceName != "" { if err := service.NewService(agentServiceName, common.Conf{}).Remove(); err != nil { errors = append(errors, fmt.Errorf("cannot remove service %q: %v", agentServiceName, err)) } } // Remove the juju-run symlink. if err := os.Remove(jujuRun); err != nil && !os.IsNotExist(err) { errors = append(errors, err) } namespace := agentConfig.Value(agent.Namespace) if err := mongo.RemoveService(namespace); err != nil { errors = append(errors, fmt.Errorf("cannot stop/remove mongo service with namespace %q: %v", namespace, err)) } if err := os.RemoveAll(agentConfig.DataDir()); err != nil { errors = append(errors, err) } if len(errors) == 0 { return nil } return fmt.Errorf("uninstall failed: %v", errors) }
// newEnsureServerParams creates an EnsureServerParams from an agent configuration. func newEnsureServerParams(agentConfig agent.Config) (mongo.EnsureServerParams, error) { // If oplog size is specified in the agent configuration, use that. // Otherwise leave the default zero value to indicate to EnsureServer // that it should calculate the size. var oplogSize int if oplogSizeString := agentConfig.Value(agent.MongoOplogSize); oplogSizeString != "" { var err error if oplogSize, err = strconv.Atoi(oplogSizeString); err != nil { return mongo.EnsureServerParams{}, fmt.Errorf("invalid oplog size: %q", oplogSizeString) } } servingInfo, ok := agentConfig.StateServingInfo() if !ok { return mongo.EnsureServerParams{}, fmt.Errorf("agent config has no state serving info") } params := mongo.EnsureServerParams{ StateServingInfo: servingInfo, DataDir: agentConfig.DataDir(), Namespace: agentConfig.Value(agent.Namespace), OplogSize: oplogSize, } return params, nil }
func waitForUpgradeToFinish(c *gc.C, conf agent.Config) { success := false for attempt := coretesting.LongAttempt.Start(); attempt.Next(); { diskConf := readConfigFromDisk(c, conf.DataDir(), conf.Tag()) success = diskConf.UpgradedToVersion() == version.Current if success { break } } c.Assert(success, jc.IsTrue) }
// NewUpgrader returns a new upgrader worker. It watches changes to the // current version of the current agent (with the given tag) and tries to // download the tools for any new version into the given data directory. If // an upgrade is needed, the worker will exit with an UpgradeReadyError // holding details of the requested upgrade. The tools will have been // downloaded and unpacked. func NewUpgrader(st *upgrader.State, agentConfig agent.Config) *Upgrader { u := &Upgrader{ st: st, dataDir: agentConfig.DataDir(), tag: agentConfig.Tag(), } go func() { defer u.tomb.Done() u.tomb.Kill(u.loop()) }() return u }
// PreUpgradeSteps runs various checks and prepares for performing an upgrade. // If any check fails, an error is returned which aborts the upgrade. func PreUpgradeSteps(st *state.State, agentConf agent.Config, isController, isMaster bool) error { if err := checkDiskSpace(agentConf.DataDir()); err != nil { return errors.Trace(err) } if isController { // Update distro info in case the new Juju controller version // is aware of new supported series. We'll keep going if this // fails, and the user can manually update it if they need to. logger.Infof("updating distro-info") err := updateDistroInfo() return errors.Annotate(err, "failed to update distro-info") } return nil }
// updateSupportedContainers records in state that a machine can run the specified containers. // It starts a watcher and when a container of a given type is first added to the machine, // the watcher is killed, the machine is set up to be able to start containers of the given type, // and a suitable provisioner is started. func (a *MachineAgent) updateSupportedContainers( runner worker.Runner, st *api.State, machineTag string, containers []instance.ContainerType, agentConfig agent.Config, ) error { pr := st.Provisioner() tag, err := names.ParseMachineTag(machineTag) if err != nil { return err } machine, err := pr.Machine(tag) if errors.IsNotFound(err) || err == nil && machine.Life() == params.Dead { return worker.ErrTerminateAgent } if err != nil { return errors.Annotatef(err, "cannot load machine %s from state", tag) } if len(containers) == 0 { if err := machine.SupportsNoContainers(); err != nil { return errors.Annotatef(err, "clearing supported containers for %s", tag) } return nil } if err := machine.SetSupportedContainers(containers...); err != nil { return errors.Annotatef(err, "setting supported containers for %s", tag) } initLock, err := hookExecutionLock(agentConfig.DataDir()) if err != nil { return err } // Start the watcher to fire when a container is first requested on the machine. watcherName := fmt.Sprintf("%s-container-watcher", machine.Id()) handler := provisioner.NewContainerSetupHandler( runner, watcherName, containers, machine, pr, agentConfig, initLock, ) a.startWorkerAfterUpgrade(runner, watcherName, func() (worker.Worker, error) { return worker.NewStringsWorker(handler), nil }) return nil }
func (c *BootstrapCommand) startMongo(addrs []network.Address, agentConfig agent.Config) error { logger.Debugf("starting mongo") info, ok := agentConfig.StateInfo() if !ok { return fmt.Errorf("no state info available") } // When bootstrapping, we need to allow enough time for mongo // to start as there's no retry loop in place. // 5 minutes should suffice. bootstrapDialOpts := mongo.DialOpts{Timeout: 5 * time.Minute} dialInfo, err := mongo.DialInfo(info.Info, bootstrapDialOpts) if err != nil { return err } servingInfo, ok := agentConfig.StateServingInfo() if !ok { return fmt.Errorf("agent config has no state serving info") } // Use localhost to dial the mongo server, because it's running in // auth mode and will refuse to perform any operations unless // we dial that address. dialInfo.Addrs = []string{ net.JoinHostPort("127.0.0.1", fmt.Sprint(servingInfo.StatePort)), } logger.Debugf("calling ensureMongoServer") err = ensureMongoServer( agentConfig.DataDir(), agentConfig.Value(agent.Namespace), servingInfo, ) if err != nil { return err } peerAddr := mongo.SelectPeerAddress(addrs) if peerAddr == "" { return fmt.Errorf("no appropriate peer address found in %q", addrs) } peerHostPort := net.JoinHostPort(peerAddr, fmt.Sprint(servingInfo.StatePort)) return maybeInitiateMongoServer(peergrouper.InitiateMongoParams{ DialInfo: dialInfo, MemberHostPort: peerHostPort, }) }
// NewEnsureServerParams creates an EnsureServerParams from an agent // configuration. func NewEnsureServerParams(agentConfig agent.Config) (mongo.EnsureServerParams, error) { // If oplog size is specified in the agent configuration, use that. // Otherwise leave the default zero value to indicate to EnsureServer // that it should calculate the size. var oplogSize int if oplogSizeString := agentConfig.Value(agent.MongoOplogSize); oplogSizeString != "" { var err error if oplogSize, err = strconv.Atoi(oplogSizeString); err != nil { return mongo.EnsureServerParams{}, fmt.Errorf("invalid oplog size: %q", oplogSizeString) } } // If numa ctl preference is specified in the agent configuration, use that. // Otherwise leave the default false value to indicate to EnsureServer // that numactl should not be used. var numaCtlPolicy bool if numaCtlString := agentConfig.Value(agent.NumaCtlPreference); numaCtlString != "" { var err error if numaCtlPolicy, err = strconv.ParseBool(numaCtlString); err != nil { return mongo.EnsureServerParams{}, fmt.Errorf("invalid numactl preference: %q", numaCtlString) } } si, ok := agentConfig.StateServingInfo() if !ok { return mongo.EnsureServerParams{}, fmt.Errorf("agent config has no state serving info") } params := mongo.EnsureServerParams{ APIPort: si.APIPort, StatePort: si.StatePort, Cert: si.Cert, PrivateKey: si.PrivateKey, CAPrivateKey: si.CAPrivateKey, SharedSecret: si.SharedSecret, SystemIdentity: si.SystemIdentity, DataDir: agentConfig.DataDir(), OplogSize: oplogSize, SetNumaControlPolicy: numaCtlPolicy, Version: agentConfig.MongoVersion(), } return params, nil }
// NewUpgrader returns a new upgrader worker. It watches changes to the // current version of the current agent (with the given tag) and tries to // download the tools for any new version into the given data directory. If // an upgrade is needed, the worker will exit with an UpgradeReadyError // holding details of the requested upgrade. The tools will have been // downloaded and unpacked. func NewUpgrader( st *upgrader.State, agentConfig agent.Config, origAgentVersion version.Number, isUpgradeRunning func() bool, ) *Upgrader { u := &Upgrader{ st: st, dataDir: agentConfig.DataDir(), tag: agentConfig.Tag(), origAgentVersion: origAgentVersion, isUpgradeRunning: isUpgradeRunning, } go func() { defer u.tomb.Done() u.tomb.Kill(u.loop()) }() return u }
// updateSupportedContainers records in state that a machine can run the specified containers. // It starts a watcher and when a container of a given type is first added to the machine, // the watcher is killed, the machine is set up to be able to start containers of the given type, // and a suitable provisioner is started. func (a *MachineAgent) updateSupportedContainers( runner worker.Runner, st *api.State, tag string, containers []instance.ContainerType, agentConfig agent.Config, ) error { pr := st.Provisioner() machine, err := pr.Machine(tag) if err != nil { return fmt.Errorf("%s is not in state: %v", tag, err) } if len(containers) == 0 { if err := machine.SupportsNoContainers(); err != nil { return fmt.Errorf("clearing supported containers for %s: %v", tag, err) } return nil } if err := machine.SetSupportedContainers(containers...); err != nil { return fmt.Errorf("setting supported containers for %s: %v", tag, err) } initLock, err := hookExecutionLock(agentConfig.DataDir()) if err != nil { return err } // Start the watcher to fire when a container is first requested on the machine. watcherName := fmt.Sprintf("%s-container-watcher", machine.Id()) handler := provisioner.NewContainerSetupHandler( runner, watcherName, containers, machine, pr, agentConfig, initLock, ) a.startWorkerAfterUpgrade(runner, watcherName, func() (worker.Worker, error) { return worker.NewStringsWorker(handler), nil }) return nil }
// NewAgentUpgrader returns a new upgrader worker. It watches changes to the // current version of the current agent (with the given tag) and tries to // download the tools for any new version into the given data directory. If // an upgrade is needed, the worker will exit with an UpgradeReadyError // holding details of the requested upgrade. The tools will have been // downloaded and unpacked. func NewAgentUpgrader( st *upgrader.State, agentConfig agent.Config, origAgentVersion version.Number, areUpgradeStepsRunning func() bool, agentUpgradeComplete chan struct{}, ) *Upgrader { u := &Upgrader{ st: st, dataDir: agentConfig.DataDir(), tag: agentConfig.Tag(), origAgentVersion: origAgentVersion, areUpgradeStepsRunning: areUpgradeStepsRunning, agentUpgradeComplete: agentUpgradeComplete, } go func() { defer u.tomb.Done() u.tomb.Kill(u.loop()) }() return u }
func (a *MachineAgent) ensureMongoAdminUser(agentConfig agent.Config) (added bool, err error) { stateInfo, ok1 := agentConfig.MongoInfo() servingInfo, ok2 := agentConfig.StateServingInfo() if !ok1 || !ok2 { return false, fmt.Errorf("no state serving info configuration") } dialInfo, err := mongo.DialInfo(stateInfo.Info, mongo.DefaultDialOpts()) if err != nil { return false, err } if len(dialInfo.Addrs) > 1 { logger.Infof("more than one state server; admin user must exist") return false, nil } return ensureMongoAdminUser(mongo.EnsureAdminUserParams{ DialInfo: dialInfo, Namespace: agentConfig.Value(agent.Namespace), DataDir: agentConfig.DataDir(), Port: servingInfo.StatePort, User: stateInfo.Tag.String(), Password: stateInfo.Password, }) }
// NewAgentUpgrader returns a new upgrader worker. It watches changes to the // current version of the current agent (with the given tag) and tries to // download the tools for any new version into the given data directory. If // an upgrade is needed, the worker will exit with an UpgradeReadyError // holding details of the requested upgrade. The tools will have been // downloaded and unpacked. func NewAgentUpgrader( st *upgrader.State, agentConfig agent.Config, origAgentVersion version.Number, upgradeStepsWaiter gate.Waiter, initialUpgradeCheckComplete gate.Unlocker, ) (*Upgrader, error) { u := &Upgrader{ st: st, dataDir: agentConfig.DataDir(), tag: agentConfig.Tag(), origAgentVersion: origAgentVersion, upgradeStepsWaiter: upgradeStepsWaiter, initialUpgradeCheckComplete: initialUpgradeCheckComplete, } err := catacomb.Invoke(catacomb.Plan{ Site: &u.catacomb, Work: u.loop, }) if err != nil { return nil, errors.Trace(err) } return u, nil }
func refreshConfig(c *gc.C, config agent.Config) agent.ConfigSetterWriter { config1, err := agent.ReadConfig(agent.ConfigPath(config.DataDir(), config.Tag())) c.Assert(err, gc.IsNil) return config1 }
// updateSupportedContainers records in state that a machine can run the specified containers. // It starts a watcher and when a container of a given type is first added to the machine, // the watcher is killed, the machine is set up to be able to start containers of the given type, // and a suitable provisioner is started. func (a *MachineAgent) updateSupportedContainers( runner worker.Runner, st api.Connection, containers []instance.ContainerType, agentConfig agent.Config, ) error { pr := st.Provisioner() tag := agentConfig.Tag().(names.MachineTag) machine, err := pr.Machine(tag) if errors.IsNotFound(err) || err == nil && machine.Life() == params.Dead { return worker.ErrTerminateAgent } if err != nil { return errors.Annotatef(err, "cannot load machine %s from state", tag) } if len(containers) == 0 { if err := machine.SupportsNoContainers(); err != nil { return errors.Annotatef(err, "clearing supported containers for %s", tag) } return nil } if err := machine.SetSupportedContainers(containers...); err != nil { return errors.Annotatef(err, "setting supported containers for %s", tag) } initLock, err := cmdutil.HookExecutionLock(agentConfig.DataDir()) if err != nil { return err } // Start the watcher to fire when a container is first requested on the machine. modelUUID, err := st.ModelTag() if err != nil { return err } watcherName := fmt.Sprintf("%s-container-watcher", machine.Id()) // There may not be a CA certificate private key available, and without // it we can't ensure that other Juju nodes can connect securely, so only // use an image URL getter if there's a private key. var imageURLGetter container.ImageURLGetter if agentConfig.Value(agent.AllowsSecureConnection) == "true" { cfg, err := pr.ModelConfig() if err != nil { return errors.Annotate(err, "unable to get environ config") } imageURLGetter = container.NewImageURLGetter( // Explicitly call the non-named constructor so if anyone // adds additional fields, this fails. container.ImageURLGetterConfig{ ServerRoot: st.Addr(), ModelUUID: modelUUID.Id(), CACert: []byte(agentConfig.CACert()), CloudimgBaseUrl: cfg.CloudImageBaseURL(), Stream: cfg.ImageStream(), ImageDownloadFunc: container.ImageDownloadURL, }) } params := provisioner.ContainerSetupParams{ Runner: runner, WorkerName: watcherName, SupportedContainers: containers, ImageURLGetter: imageURLGetter, Machine: machine, Provisioner: pr, Config: agentConfig, InitLock: initLock, } handler := provisioner.NewContainerSetupHandler(params) a.startWorkerAfterUpgrade(runner, watcherName, func() (worker.Worker, error) { w, err := watcher.NewStringsWorker(watcher.StringsConfig{ Handler: handler, }) if err != nil { return nil, errors.Annotatef(err, "cannot start %s worker", watcherName) } return w, nil }) return nil }
// ensureMongoServer ensures that mongo is installed and running, // and ready for opening a state connection. func (a *MachineAgent) ensureMongoServer(agentConfig agent.Config) error { a.mongoInitMutex.Lock() defer a.mongoInitMutex.Unlock() if a.mongoInitialized { logger.Debugf("mongo is already initialized") return nil } servingInfo, ok := agentConfig.StateServingInfo() if !ok { return fmt.Errorf("state worker was started with no state serving info") } namespace := agentConfig.Value(agent.Namespace) // When upgrading from a pre-HA-capable environment, // we must add machine-0 to the admin database and // initiate its replicaset. // // TODO(axw) remove this when we no longer need // to upgrade from pre-HA-capable environments. var shouldInitiateMongoServer bool var addrs []network.Address if isPreHAVersion(agentConfig.UpgradedToVersion()) { _, err := a.ensureMongoAdminUser(agentConfig) if err != nil { return err } if servingInfo.SharedSecret == "" { servingInfo.SharedSecret, err = mongo.GenerateSharedSecret() if err != nil { return err } if err = a.ChangeConfig(func(config agent.ConfigSetter) { config.SetStateServingInfo(servingInfo) }); err != nil { return err } agentConfig = a.CurrentConfig() } // Note: we set Direct=true in the mongo options because it's // possible that we've previously upgraded the mongo server's // configuration to form a replicaset, but failed to initiate it. st, m, err := openState(agentConfig, mongo.DialOpts{Direct: true}) if err != nil { return err } if err := st.SetStateServingInfo(servingInfo); err != nil { st.Close() return fmt.Errorf("cannot set state serving info: %v", err) } st.Close() addrs = m.Addresses() shouldInitiateMongoServer = true } // ensureMongoServer installs/upgrades the upstart config as necessary. if err := ensureMongoServer( agentConfig.DataDir(), namespace, servingInfo, ); err != nil { return err } if !shouldInitiateMongoServer { return nil } // Initiate the replicaset for upgraded environments. // // TODO(axw) remove this when we no longer need // to upgrade from pre-HA-capable environments. stateInfo, ok := agentConfig.StateInfo() if !ok { return fmt.Errorf("state worker was started with no state serving info") } dialInfo, err := mongo.DialInfo(stateInfo.Info, mongo.DefaultDialOpts()) if err != nil { return err } peerAddr := mongo.SelectPeerAddress(addrs) if peerAddr == "" { return fmt.Errorf("no appropriate peer address found in %q", addrs) } if err := maybeInitiateMongoServer(peergrouper.InitiateMongoParams{ DialInfo: dialInfo, MemberHostPort: net.JoinHostPort(peerAddr, fmt.Sprint(servingInfo.StatePort)), User: stateInfo.Tag, Password: stateInfo.Password, }); err != nil { return err } a.mongoInitialized = true return nil }