Ejemplo n.º 1
0
func openState(agentConfig agent.Config, dialOpts mongo.DialOpts) (_ *state.State, _ *state.Machine, err error) {
	info, ok := agentConfig.MongoInfo()
	if !ok {
		return nil, nil, fmt.Errorf("no state info available")
	}
	st, err := state.Open(agentConfig.Model(), info, dialOpts, environs.NewStatePolicy())
	if err != nil {
		return nil, nil, err
	}
	defer func() {
		if err != nil {
			st.Close()
		}
	}()
	m0, err := st.FindEntity(agentConfig.Tag())
	if err != nil {
		if errors.IsNotFound(err) {
			err = worker.ErrTerminateAgent
		}
		return nil, nil, err
	}
	m := m0.(*state.Machine)
	if m.Life() == state.Dead {
		return nil, nil, worker.ErrTerminateAgent
	}
	// Check the machine nonce as provisioned matches the agent.Conf value.
	if !m.CheckProvisioned(agentConfig.Nonce()) {
		// The agent is running on a different machine to the one it
		// should be according to state. It must stop immediately.
		logger.Errorf("running machine %v agent on inappropriate instance", m)
		return nil, nil, worker.ErrTerminateAgent
	}
	return st, m, nil
}
Ejemplo n.º 2
0
func (c *BootstrapCommand) startMongo(addrs []network.Address, agentConfig agent.Config) error {
	logger.Debugf("starting mongo")

	info, ok := agentConfig.MongoInfo()
	if !ok {
		return fmt.Errorf("no state info available")
	}
	// When bootstrapping, we need to allow enough time for mongo
	// to start as there's no retry loop in place.
	// 5 minutes should suffice.
	mongoDialOpts := mongo.DialOpts{Timeout: 5 * time.Minute}
	dialInfo, err := mongo.DialInfo(info.Info, mongoDialOpts)
	if err != nil {
		return err
	}
	servingInfo, ok := agentConfig.StateServingInfo()
	if !ok {
		return fmt.Errorf("agent config has no state serving info")
	}
	// Use localhost to dial the mongo server, because it's running in
	// auth mode and will refuse to perform any operations unless
	// we dial that address.
	dialInfo.Addrs = []string{
		net.JoinHostPort("127.0.0.1", fmt.Sprint(servingInfo.StatePort)),
	}

	logger.Debugf("calling ensureMongoServer")
	ensureServerParams, err := cmdutil.NewEnsureServerParams(agentConfig)
	if err != nil {
		return err
	}
	err = cmdutil.EnsureMongoServer(ensureServerParams)
	if err != nil {
		return err
	}

	peerAddr := mongo.SelectPeerAddress(addrs)
	if peerAddr == "" {
		return fmt.Errorf("no appropriate peer address found in %q", addrs)
	}
	peerHostPort := net.JoinHostPort(peerAddr, fmt.Sprint(servingInfo.StatePort))

	if err := initiateMongoServer(peergrouper.InitiateMongoParams{
		DialInfo:       dialInfo,
		MemberHostPort: peerHostPort,
	}); err != nil {
		return err
	}
	logger.Infof("started mongo")
	return nil
}
Ejemplo n.º 3
0
func (a *MachineAgent) ensureMongoAdminUser(agentConfig agent.Config) (added bool, err error) {
	stateInfo, ok1 := agentConfig.MongoInfo()
	servingInfo, ok2 := agentConfig.StateServingInfo()
	if !ok1 || !ok2 {
		return false, fmt.Errorf("no state serving info configuration")
	}
	dialInfo, err := mongo.DialInfo(stateInfo.Info, mongo.DefaultDialOpts())
	if err != nil {
		return false, err
	}
	if len(dialInfo.Addrs) > 1 {
		logger.Infof("more than one state server; admin user must exist")
		return false, nil
	}
	return ensureMongoAdminUser(mongo.EnsureAdminUserParams{
		DialInfo:  dialInfo,
		Namespace: agentConfig.Value(agent.Namespace),
		DataDir:   agentConfig.DataDir(),
		Port:      servingInfo.StatePort,
		User:      stateInfo.Tag.String(),
		Password:  stateInfo.Password,
	})
}
Ejemplo n.º 4
0
// ensureMongoServer ensures that mongo is installed and running,
// and ready for opening a state connection.
func (a *MachineAgent) ensureMongoServer(agentConfig agent.Config) (err error) {
	a.mongoInitMutex.Lock()
	defer a.mongoInitMutex.Unlock()
	if a.mongoInitialized {
		logger.Debugf("mongo is already initialized")
		return nil
	}
	defer func() {
		if err == nil {
			a.mongoInitialized = true
		}
	}()

	servingInfo, ok := agentConfig.StateServingInfo()
	if !ok {
		return fmt.Errorf("state worker was started with no state serving info")
	}

	// When upgrading from a pre-HA-capable environment,
	// we must add machine-0 to the admin database and
	// initiate its replicaset.
	//
	// TODO(axw) remove this when we no longer need
	// to upgrade from pre-HA-capable environments.
	var shouldInitiateMongoServer bool
	var addrs []network.Address
	if isPreHAVersion(a.previousAgentVersion) {
		_, err := a.ensureMongoAdminUser(agentConfig)
		if err != nil {
			return err
		}
		if servingInfo.SharedSecret == "" {
			servingInfo.SharedSecret, err = mongo.GenerateSharedSecret()
			if err != nil {
				return err
			}
			if err = a.ChangeConfig(func(config agent.ConfigSetter) error {
				config.SetStateServingInfo(servingInfo)
				return nil
			}); err != nil {
				return err
			}
			agentConfig = a.CurrentConfig()
		}
		// Note: we set Direct=true in the mongo options because it's
		// possible that we've previously upgraded the mongo server's
		// configuration to form a replicaset, but failed to initiate it.
		st, m, err := openState(agentConfig, mongo.DialOpts{Direct: true})
		if err != nil {
			return err
		}
		ssi := paramsStateServingInfoToStateStateServingInfo(servingInfo)
		if err := st.SetStateServingInfo(ssi); err != nil {
			st.Close()
			return fmt.Errorf("cannot set state serving info: %v", err)
		}
		st.Close()
		addrs = m.Addresses()
		shouldInitiateMongoServer = true
	}

	// ensureMongoServer installs/upgrades the upstart config as necessary.
	ensureServerParams, err := newEnsureServerParams(agentConfig)
	if err != nil {
		return err
	}
	if err := ensureMongoServer(ensureServerParams); err != nil {
		return err
	}
	if !shouldInitiateMongoServer {
		return nil
	}

	// Initiate the replicaset for upgraded environments.
	//
	// TODO(axw) remove this when we no longer need
	// to upgrade from pre-HA-capable environments.
	stateInfo, ok := agentConfig.MongoInfo()
	if !ok {
		return fmt.Errorf("state worker was started with no state serving info")
	}
	dialInfo, err := mongo.DialInfo(stateInfo.Info, mongo.DefaultDialOpts())
	if err != nil {
		return err
	}
	peerAddr := mongo.SelectPeerAddress(addrs)
	if peerAddr == "" {
		return fmt.Errorf("no appropriate peer address found in %q", addrs)
	}
	if err := maybeInitiateMongoServer(peergrouper.InitiateMongoParams{
		DialInfo:       dialInfo,
		MemberHostPort: net.JoinHostPort(peerAddr, fmt.Sprint(servingInfo.StatePort)),
		// TODO(dfc) InitiateMongoParams should take a Tag
		User:     stateInfo.Tag.String(),
		Password: stateInfo.Password,
	}); err != nil {
		return err
	}
	return nil
}
Ejemplo n.º 5
0
// upgradeWorker runs the required upgrade operations to upgrade to the current Juju version.
func (a *MachineAgent) upgradeWorker(
	apiState *api.State,
	jobs []params.MachineJob,
	agentConfig agent.Config,
) worker.Worker {
	return worker.NewSimpleWorker(func(stop <-chan struct{}) error {
		select {
		case <-a.upgradeComplete:
			// Our work is already done (we're probably being restarted
			// because the API connection has gone down), so do nothing.
			<-stop
			return nil
		default:
		}
		// If the machine agent is a state server, flag that state
		// needs to be opened before running upgrade steps
		needsState := false
		for _, job := range jobs {
			if job == params.JobManageEnviron {
				needsState = true
			}
		}
		// We need a *state.State for upgrades. We open it independently
		// of StateWorker, because we have no guarantees about when
		// and how often StateWorker might run.
		var st *state.State
		if needsState {
			if err := a.ensureMongoServer(agentConfig); err != nil {
				return err
			}
			var err error
			info, ok := agentConfig.MongoInfo()
			if !ok {
				return fmt.Errorf("no state info available")
			}
			st, err = state.Open(info, mongo.DialOpts{}, environs.NewStatePolicy())
			if err != nil {
				return err
			}
			defer st.Close()
		}
		err := a.runUpgrades(st, apiState, jobs, agentConfig)
		if err == nil {
			// Only signal that the upgrade is complete if no error
			// was returned.
			close(a.upgradeComplete)
		} else if !isFatal(err) {
			// Only non-fatal errors are returned (this will trigger
			// the worker to be restarted).
			//
			// Fatal upgrade errors are not returned because user
			// intervention is required at that point. We don't want
			// the upgrade worker or the agent to restart. Status
			// output and the logs will report that the upgrade has
			// failed.
			return err
		}
		<-stop
		return nil
	})
}