func (p *PostgresKeeper) postgresKeeperSM(pctx context.Context) { e := p.e pgm := p.pgm cv, _, err := e.GetClusterView() if err != nil { log.Errorf("error retrieving cluster view: %v", err) return } log.Debugf(spew.Sprintf("clusterView: %#v", cv)) if cv == nil { log.Infof("no clusterview available, waiting for it to appear") return } followersIDs := cv.GetFollowersIDs(p.id) // Update cluster config clusterConfig := cv.Config.ToConfig() log.Debugf(spew.Sprintf("clusterConfig: %#v", clusterConfig)) // This shouldn't need a lock p.clusterConfig = clusterConfig prevPGParameters := pgm.GetParameters() // create postgres parameteres pgParameters := p.createPGParameters(followersIDs) // update pgm postgres parameters pgm.SetParameters(pgParameters) keepersState, _, err := e.GetKeepersState() if err != nil { log.Errorf("err: %v", err) return } if keepersState == nil { keepersState = cluster.KeepersState{} } log.Debugf(spew.Sprintf("keepersState: %#v", keepersState)) keeper := keepersState[p.id] log.Debugf(spew.Sprintf("keeperState: %#v", keeper)) initialized, err := pgm.IsInitialized() if err != nil { log.Errorf("failed to detect if instance is initialized: %v", err) return } if len(cv.KeepersRole) == 0 { if !initialized { log.Infof("Initializing database") err = pgm.Init() if err != nil { log.Errorf("failed to initialized postgres instance: %v", err) return } initialized = true } } started := false if initialized { started, err = pgm.IsStarted() if err != nil { log.Errorf("failed to retrieve instance status: %v", err) } else if !started { err = pgm.Start() if err != nil { log.Errorf("failed to start postgres: %v", err) } else { started = true } } } if cv != nil { if !started && p.id == cv.Master { // If the clusterView says we are master but we cannot get // instance status or start then stop here, if we are standby then we can // recover return } } role, err := pgm.GetRole() if err != nil { log.Infof("error retrieving current pg role: %v", err) return } isMaster := false if role == common.MasterRole { log.Infof("current pg state: master") isMaster = true } else { log.Infof("current pg state: standby") } // publish ourself for discovery if err := p.publish(); err != nil { log.Errorf("failed to publish ourself to the cluster: %v", err) return } if cv == nil { return } // cv != nil masterID := cv.Master log.Debugf("masterID: %q", masterID) master := keepersState[masterID] log.Debugf(spew.Sprintf("masterState: %#v", master)) keeperRole, ok := cv.KeepersRole[p.id] if !ok { log.Infof("our keeper requested role is not available") return } if keeperRole.Follow == "" { log.Infof("our cluster requested state is master") if role != common.MasterRole { log.Infof("promoting to master") err := pgm.Promote() if err != nil { log.Errorf("err: %v", err) return } } else { log.Infof("already master") replSlots := []string{} replSlots, err = pgm.GetReplicatinSlots() if err != nil { log.Errorf("err: %v", err) return } // Create replication slots for _, slotName := range replSlots { if !util.StringInSlice(followersIDs, slotName) { log.Infof("dropping replication slot for keeper %q not marked as follower", slotName) err := pgm.DropReplicationSlot(slotName) if err != nil { log.Errorf("err: %v", err) } } } for _, followerID := range followersIDs { if followerID == p.id { continue } if !util.StringInSlice(replSlots, followerID) { err := pgm.CreateReplicationSlot(followerID) if err != nil { log.Errorf("err: %v", err) } } } } } else { log.Infof("our cluster requested state is standby following %q", keeperRole.Follow) if isMaster { if err := p.fullResync(master, initialized, started); err != nil { log.Errorf("failed to full resync from master: %v", err) return } } else { log.Infof("already standby") curConnParams, err := pgm.GetPrimaryConninfo() if err != nil { log.Errorf("err: %v", err) return } log.Debugf(spew.Sprintf("curConnParams: %v", curConnParams)) replConnString := p.getReplConnString(master) newConnParams, err := pg.URLToConnParams(replConnString) if err != nil { log.Errorf("cannot get conn params: %v", err) return } log.Debugf(spew.Sprintf("newConnParams: %v", newConnParams)) // Check that we can sync with master // Check timeline history // We need to update our pgState to avoid dealing with // an old pgState not reflecting the real state p.updatePGState(pctx) pgState := p.getLastPGState() if pgState == nil { log.Errorf("our pgstate is unknown: %v", err) return } mPGState := master.PGState if p.isDifferentTimelineBranch(mPGState, pgState) { if err := p.fullResync(master, initialized, started); err != nil { log.Errorf("failed to full resync from master: %v", err) return } } // TODO(sgotti) Check that the master has all the needed WAL segments // Update our primary_conninfo if replConnString changed if !curConnParams.Equals(newConnParams) { log.Infof("master connection parameters changed. Reconfiguring...") log.Infof("following %s with connection url %s", keeperRole.Follow, replConnString) err = pgm.BecomeStandby(replConnString) if err != nil { log.Errorf("err: %v", err) return } err = pgm.Restart(true) if err != nil { log.Errorf("err: %v", err) return } } } } // Log synchronous replication changes prevSyncStandbyNames := prevPGParameters["synchronous_standby_names"] syncStandbyNames := pgParameters["synchronous_standby_names"] if p.clusterConfig.SynchronousReplication { if prevSyncStandbyNames != syncStandbyNames { log.Infof("needed synchronous_standby_names changed from %q to %q", prevSyncStandbyNames, syncStandbyNames) } } else { if prevSyncStandbyNames != "" { log.Infof("sync replication disabled, removing current synchronous_standby_names %q", prevSyncStandbyNames) } } if !pgParameters.Equals(prevPGParameters) { log.Infof("postgres parameters changed, reloading postgres instance") pgm.SetParameters(pgParameters) if err := pgm.Reload(); err != nil { log.Errorf("failed to reload postgres instance: %v", err) } } else { // for tests log.Debugf("postgres parameters not changed") } if err := p.saveCVVersion(cv.Version); err != nil { log.Errorf("err: %v", err) return } }
func (p *PostgresKeeper) postgresKeeperSM(pctx context.Context) { e := p.e pgm := p.pgm // Update cluster config clusterConfig, _, err := e.GetClusterConfig() if err != nil { log.Errorf("cannot get cluster config: %v", err) return } log.Debugf(spew.Sprintf("clusterConfig: %#v", clusterConfig)) // This shouldn't need a lock p.clusterConfig = clusterConfig cv, _, err := e.GetClusterView() if err != nil { log.Errorf("err: %v", err) return } log.Debugf(spew.Sprintf("clusterView: %#v", cv)) membersState, _, err := e.GetMembersState() if err != nil { log.Errorf("err: %v", err) return } if membersState == nil { membersState = cluster.MembersState{} } log.Debugf(spew.Sprintf("membersState: %#v", membersState)) member := membersState[p.id] log.Debugf(spew.Sprintf("memberState: %#v", member)) initialized, err := pgm.IsInitialized() if err != nil { log.Errorf("failed to detect if instance is initialized: %v", err) return } if cv == nil { if !initialized { log.Infof("Initializing database") err = pgm.Init() if err != nil { log.Errorf("failed to initialized postgres instance: %v", err) return } initialized = true } } started := false if initialized { started, err = pgm.IsStarted() if err != nil { log.Errorf("failed to retrieve instance status: %v", err) } else if !started { err = pgm.Start() if err != nil { log.Errorf("err: %v", err) } else { started = true } } } if cv != nil { if !started && p.id == cv.Master { // If the clusterView says we are master but we cannot get // instance status or start then stop here, if we are standby then we can // recover return } } role, err := pgm.GetRole() if err != nil { log.Infof("error retrieving current pg role: %v", err) return } isMaster := false if role == common.MasterRole { log.Infof("current pg state: master") isMaster = true } else { log.Infof("current pg state: standby") } // publish ourself for discovery if err := p.publish(); err != nil { log.Errorf("failed to publish ourself to the cluster: %v", err) return } if cv == nil { return } // cv != nil masterID := cv.Master log.Debugf("masterID: %q", masterID) master := membersState[masterID] log.Debugf(spew.Sprintf("masterState: %#v", master)) followersIDs := cv.GetFollowersIDs(p.id) memberRole, ok := cv.MembersRole[p.id] if !ok { log.Infof("our member state is not available") return } if memberRole.Follow == "" { log.Infof("our cluster requested state is master") if role != common.MasterRole { log.Infof("promoting to master") err := pgm.Promote() if err != nil { log.Errorf("err: %v", err) return } } else { log.Infof("already master") replSlots := []string{} replSlots, err = pgm.GetReplicatinSlots() if err != nil { log.Errorf("err: %v", err) return } // Create replication slots for _, slotName := range replSlots { if !util.StringInSlice(followersIDs, slotName) { log.Infof("dropping replication slot for member %q not marked as follower", slotName) err := pgm.DropReplicationSlot(slotName) if err != nil { log.Errorf("err: %v", err) } } } for _, followerID := range followersIDs { if followerID == p.id { continue } if !util.StringInSlice(replSlots, followerID) { err := pgm.CreateReplicationSlot(followerID) if err != nil { log.Errorf("err: %v", err) } } } // Setup synchronous replication syncStandbyNames, _ := pgm.GetServerParameter("synchronous_standby_names") if p.clusterConfig.SynchronousReplication { newSyncStandbyNames := strings.Join(followersIDs, ",") if syncStandbyNames != newSyncStandbyNames { log.Infof("needed synchronous_standby_names changed from %q to %q, reconfiguring", syncStandbyNames, newSyncStandbyNames) pgm.SetServerParameter("synchronous_standby_names", newSyncStandbyNames) pgm.Reload() } } else { if syncStandbyNames != "" { log.Infof("sync replication disabled, removing current synchronous_standby_names %q", syncStandbyNames) pgm.SetServerParameter("synchronous_standby_names", "") pgm.Reload() } } } } else { log.Infof("our cluster requested state is standby following %q", memberRole.Follow) if isMaster { if err := p.fullResync(master, initialized, started); err != nil { log.Errorf("failed to full resync from master: %v", err) return } } else { log.Infof("already standby") curConnParams, err := pgm.GetPrimaryConninfo() if err != nil { log.Errorf("err: %v", err) return } log.Debugf(spew.Sprintf("curConnParams: %v", curConnParams)) replConnString := p.getReplConnString(master) newConnParams, err := pg.URLToConnParams(replConnString) if err != nil { log.Errorf("cannot get conn params: %v", err) return } log.Debugf(spew.Sprintf("newConnParams: %v", newConnParams)) // Check that we can sync with master // Check timeline history ctx, cancel := context.WithTimeout(context.Background(), p.clusterConfig.RequestTimeout) pgState, err := pg.GetPGState(ctx, p.getOurReplConnString()) cancel() if err != nil { log.Errorf("cannot get our pgstate: %v", err) return } mPGState := master.PGState if p.isDifferentTimelineBranch(mPGState, pgState) { if err := p.fullResync(master, initialized, started); err != nil { log.Errorf("failed to full resync from master: %v", err) return } } // TODO(sgotti) Check that the master has all the needed WAL segments // Update our primary_conninfo if replConnString changed if !curConnParams.Equals(newConnParams) { log.Infof("master connection parameters changed. Reconfiguring...") log.Infof("following %s with connection url %s", memberRole.Follow, replConnString) err = pgm.BecomeStandby(replConnString) if err != nil { log.Errorf("err: %v", err) return } err = pgm.Restart(true) if err != nil { log.Errorf("err: %v", err) return } } } } if err := p.saveCVVersion(cv.Version); err != nil { log.Errorf("err: %v", err) return } }
func (p *PostgresKeeper) GetPGState(pctx context.Context) (*cluster.PostgresState, error) { p.getPGStateMutex.Lock() defer p.getPGStateMutex.Unlock() // Just get one pgstate at a time to avoid exausting available connections pgState := &cluster.PostgresState{} p.localStateMutex.Lock() pgState.UID = p.dbLocalState.UID pgState.Generation = p.dbLocalState.Generation p.localStateMutex.Unlock() pgState.ListenAddress = p.pgListenAddress pgState.Port = p.pgPort initialized, err := p.pgm.IsInitialized() if err != nil { return nil, err } if initialized { pgParameters, err := p.pgm.GetConfigFilePGParameters() if err != nil { log.Error("cannot get configured pg parameters", zap.Error(err)) return pgState, nil } log.Debug("got configured pg parameters", zap.Object("pgParameters", pgParameters)) filteredPGParameters := common.Parameters{} for k, v := range pgParameters { if !util.StringInSlice(managedPGParameters, k) { filteredPGParameters[k] = v } } log.Debug("filtered out managed pg parameters", zap.Object("filteredPGParameters", filteredPGParameters)) pgState.PGParameters = filteredPGParameters sd, err := p.pgm.GetSystemData() if err != nil { log.Error("error getting pg state", zap.Error(err)) return pgState, nil } pgState.SystemID = sd.SystemID pgState.TimelineID = sd.TimelineID pgState.XLogPos = sd.XLogPos // if timeline <= 1 then no timeline history file exists. pgState.TimelinesHistory = cluster.PostgresTimelinesHistory{} if pgState.TimelineID > 1 { tlsh, err := p.pgm.GetTimelinesHistory(pgState.TimelineID) if err != nil { log.Error("error getting timeline history", zap.Error(err)) return pgState, nil } ctlsh := cluster.PostgresTimelinesHistory{} for _, tlh := range tlsh { ctlh := &cluster.PostgresTimelineHistory{ TimelineID: tlh.TimelineID, SwitchPoint: tlh.SwitchPoint, Reason: tlh.Reason, } ctlsh = append(ctlsh, ctlh) } pgState.TimelinesHistory = ctlsh } pgState.Healthy = true } return pgState, nil }
func (p *PostgresKeeper) postgresKeeperSM(pctx context.Context) { e := p.e pgm := p.pgm cd, _, err := e.GetClusterData() if err != nil { log.Error("error retrieving cluster data", zap.Error(err)) return } log.Debug("cd dump", zap.String("cd", spew.Sdump(cd))) if cd == nil { log.Info("no cluster data available, waiting for it to appear") return } if cd.FormatVersion != cluster.CurrentCDFormatVersion { log.Error("unsupported clusterdata format version", zap.Uint64("version", cd.FormatVersion)) return } if cd.Cluster != nil { p.sleepInterval = cd.Cluster.Spec.SleepInterval.Duration p.requestTimeout = cd.Cluster.Spec.RequestTimeout.Duration if p.keeperLocalState.ClusterUID != cd.Cluster.UID { p.keeperLocalState.ClusterUID = cd.Cluster.UID if err = p.saveKeeperLocalState(); err != nil { log.Error("error", zap.Error(err)) return } } } k, ok := cd.Keepers[p.keeperLocalState.UID] if !ok { log.Info("our keeper data is not available, waiting for it to appear") return } // TODO(sgotti) Check that the Keeper.Status address:port has been updated db := cd.FindDB(k) if db == nil { log.Info("no db assigned") return } // TODO(sgotti) Check that the DB.Status address:port has been updated followersUIDs := db.Spec.Followers prevPGParameters := pgm.GetParameters() // create postgres parameteres pgParameters := p.createPGParameters(db) // update pgm postgres parameters pgm.SetParameters(pgParameters) dbls := p.dbLocalState if dbls.Initializing { // If we are here this means that the db initialization or // resync as failed so we have to clean up stale data log.Error("db failed to initialize or resync") // Clean up cluster db datadir if err = pgm.RemoveAll(); err != nil { log.Error("failed to remove the postgres data dir", zap.Error(err)) return } // Reset current db local state since it's not valid anymore p.localStateMutex.Lock() dbls.UID = "" dbls.Generation = cluster.NoGeneration dbls.Initializing = false p.localStateMutex.Unlock() if err = p.saveDBLocalState(); err != nil { log.Error("error", zap.Error(err)) return } } initialized, err := pgm.IsInitialized() if err != nil { log.Error("failed to detect if instance is initialized", zap.Error(err)) return } started := false if initialized { started, err = pgm.IsStarted() if err != nil { // log error getting instance state but go ahead. log.Info("failed to retrieve instance status", zap.Error(err)) } } log.Debug("db status", zap.Bool("started", started)) // if the db is initialized but there isn't a db local state then generate a new one if initialized && dbls.UID == "" { p.localStateMutex.Lock() dbls.UID = common.UID() dbls.Generation = cluster.NoGeneration dbls.InitPGParameters = nil dbls.Initializing = false p.localStateMutex.Unlock() if err = p.saveDBLocalState(); err != nil { log.Error("error", zap.Error(err)) return } } if dbls.UID != db.UID { log.Info("current db UID different than cluster data db UID", zap.String("db", dbls.UID), zap.String("cdDB", db.UID)) switch db.Spec.InitMode { case cluster.DBInitModeNew: log.Info("initializing the database cluster") p.localStateMutex.Lock() dbls.UID = db.UID // Set a no generation since we aren't already converged. dbls.Generation = cluster.NoGeneration dbls.InitPGParameters = nil dbls.Initializing = true p.localStateMutex.Unlock() if err = p.saveDBLocalState(); err != nil { log.Error("error", zap.Error(err)) return } if started { if err = pgm.Stop(true); err != nil { log.Error("failed to stop pg instance", zap.Error(err)) return } started = false } if err = pgm.RemoveAll(); err != nil { log.Error("failed to remove the postgres data dir", zap.Error(err)) return } if err = pgm.Init(); err != nil { log.Error("failed to initialize postgres database cluster", zap.Error(err)) return } initialized = true if db.Spec.IncludeConfig { if err = pgm.StartTmpMerged(); err != nil { log.Error("failed to start instance", zap.Error(err)) return } pgParameters, err = pgm.GetConfigFilePGParameters() if err != nil { log.Error("failed to rename previous postgresql.conf", zap.Error(err)) return } p.localStateMutex.Lock() dbls.InitPGParameters = pgParameters p.localStateMutex.Unlock() } else { if err = pgm.StartTmpMerged(); err != nil { log.Error("failed to start instance", zap.Error(err)) return } } log.Info("setting roles") if err = pgm.SetupRoles(); err != nil { log.Error("failed to setup roles", zap.Error(err)) return } if err = p.saveDBLocalState(); err != nil { log.Error("error", zap.Error(err)) return } if err = pgm.Stop(true); err != nil { log.Error("failed to stop pg instance", zap.Error(err)) return } case cluster.DBInitModePITR: log.Info("restoring the database cluster") p.localStateMutex.Lock() dbls.UID = db.UID // Set a no generation since we aren't already converged. dbls.Generation = cluster.NoGeneration dbls.InitPGParameters = nil dbls.Initializing = true p.localStateMutex.Unlock() if err = p.saveDBLocalState(); err != nil { log.Error("error", zap.Error(err)) return } if started { if err = pgm.Stop(true); err != nil { log.Error("failed to stop pg instance", zap.Error(err)) return } started = false } if err = pgm.RemoveAll(); err != nil { log.Error("failed to remove the postgres data dir", zap.Error(err)) return } if err = pgm.Restore(db.Spec.PITRConfig.DataRestoreCommand); err != nil { log.Error("failed to restore postgres database cluster", zap.Error(err)) return } if err = pgm.WriteRecoveryConf(p.createRecoveryParameters(nil, db.Spec.PITRConfig.ArchiveRecoverySettings)); err != nil { log.Error("err", zap.Error(err)) return } if db.Spec.IncludeConfig { if err = pgm.StartTmpMerged(); err != nil { log.Error("failed to start instance", zap.Error(err)) return } pgParameters, err = pgm.GetConfigFilePGParameters() if err != nil { log.Error("failed to rename previous postgresql.conf", zap.Error(err)) return } p.localStateMutex.Lock() dbls.InitPGParameters = pgParameters p.localStateMutex.Unlock() } else { if err = pgm.StartTmpMerged(); err != nil { log.Error("failed to start instance", zap.Error(err)) return } } initialized = true if err = p.saveDBLocalState(); err != nil { log.Error("error", zap.Error(err)) return } if err = pgm.Stop(true); err != nil { log.Error("failed to stop pg instance", zap.Error(err)) return } case cluster.DBInitModeExisting: // replace our current db uid with the required one. p.localStateMutex.Lock() dbls.UID = db.UID // Set a no generation since we aren't already converged. dbls.Generation = cluster.NoGeneration dbls.InitPGParameters = nil p.localStateMutex.Unlock() if err = p.saveDBLocalState(); err != nil { log.Error("error", zap.Error(err)) return } if started { if err = pgm.Stop(true); err != nil { log.Error("failed to stop pg instance", zap.Error(err)) return } started = false } if db.Spec.IncludeConfig { if err = pgm.StartTmpMerged(); err != nil { log.Error("failed to start instance", zap.Error(err)) return } pgParameters, err = pgm.GetConfigFilePGParameters() if err != nil { log.Error("failed to rename previous postgresql.conf", zap.Error(err)) return } p.localStateMutex.Lock() dbls.InitPGParameters = pgParameters p.localStateMutex.Unlock() } else { if err = pgm.StartTmpMerged(); err != nil { log.Error("failed to start instance", zap.Error(err)) return } } log.Info("updating our db UID with the cluster data provided db UID") // replace our current db uid with the required one. p.localStateMutex.Lock() dbls.InitPGParameters = pgParameters p.localStateMutex.Unlock() if err = p.saveDBLocalState(); err != nil { log.Error("error", zap.Error(err)) return } if err = pgm.Stop(true); err != nil { log.Error("failed to stop pg instance", zap.Error(err)) return } case cluster.DBInitModeNone: // replace our current db uid with the required one. p.localStateMutex.Lock() dbls.UID = db.UID // Set a no generation since we aren't already converged. dbls.Generation = cluster.NoGeneration dbls.InitPGParameters = nil p.localStateMutex.Unlock() if err = p.saveDBLocalState(); err != nil { log.Error("error", zap.Error(err)) return } return default: log.Error("unknown db init mode", zap.String("initMode", string(db.Spec.InitMode))) return } } pgm.SetParameters(pgParameters) var localRole common.Role var systemID string if !initialized { log.Info("database cluster not initialized") localRole = common.RoleUndefined } else { localRole, err = pgm.GetRole() if err != nil { log.Error("error retrieving current pg role", zap.Error(err)) return } systemID, err = p.pgm.GetSystemdID() if err != nil { log.Error("error retrieving systemd ID", zap.Error(err)) return } } targetRole := db.Spec.Role log.Debug("target role", zap.String("targetRole", string(targetRole))) switch targetRole { case common.RoleMaster: // We are the elected master log.Info("our db requested role is master") if localRole == common.RoleUndefined { log.Error("database cluster not initialized but requested role is master. This shouldn't happen!") return } if !started { if err = pgm.Start(); err != nil { log.Error("failed to start postgres", zap.Error(err)) return } started = true } if localRole == common.RoleStandby { log.Info("promoting to master") if err = pgm.Promote(); err != nil { log.Error("err", zap.Error(err)) return } } else { log.Info("already master") } var replSlots []string replSlots, err = pgm.GetReplicatinSlots() log.Debug("replication slots", zap.Object("replSlots", replSlots)) if err != nil { log.Error("err", zap.Error(err)) return } // Drop replication slots for _, slotName := range replSlots { if !common.IsStolonName(slotName) { continue } if !util.StringInSlice(followersUIDs, common.NameFromStolonName(slotName)) { log.Info("dropping replication slot since db not marked as follower", zap.String("slot", slotName), zap.String("db", common.NameFromStolonName(slotName))) if err = pgm.DropReplicationSlot(slotName); err != nil { log.Error("err", zap.Error(err)) } } } // Create replication slots for _, followerUID := range followersUIDs { if followerUID == dbls.UID { continue } replSlot := common.StolonName(followerUID) if !util.StringInSlice(replSlots, replSlot) { log.Info("creating replication slot", zap.String("slot", replSlot), zap.String("db", followerUID)) if err = pgm.CreateReplicationSlot(replSlot); err != nil { log.Error("err", zap.Error(err)) } } } case common.RoleStandby: // We are a standby followedUID := db.Spec.FollowConfig.DBUID log.Info("our db requested role is standby", zap.String("followedDB", followedUID)) followedDB, ok := cd.DBs[followedUID] if !ok { log.Error("no db data available for followed db", zap.String("followedDB", followedUID)) return } switch localRole { case common.RoleMaster: if systemID == followedDB.Status.SystemID { // There can be the possibility that this // database is on the same branch of the // current followed instance. // So we try to put it in recovery and then // check if it's on the same branch or force a // resync replConnParams := p.getReplConnParams(db, followedDB) standbySettings := &cluster.StandbySettings{PrimaryConninfo: replConnParams.ConnString(), PrimarySlotName: common.StolonName(db.UID)} if err = pgm.WriteRecoveryConf(p.createRecoveryParameters(standbySettings, nil)); err != nil { log.Error("err", zap.Error(err)) return } if !started { if err = pgm.Start(); err != nil { log.Error("err", zap.Error(err)) return } started = true } else { if err = pgm.Restart(true); err != nil { log.Error("err", zap.Error(err)) return } } // TODO(sgotti) pg_rewind considers databases on the same timeline as in sync and doesn't check if they diverged at different position in previous timelines. // So check that the db as been synced or resync again with pg_rewind disabled. Will need to report this upstream. // Check timeline history // We need to update our pgState to avoid dealing with // an old pgState not reflecting the real state var pgState *cluster.PostgresState pgState, err = p.GetPGState(pctx) if err != nil { log.Error("cannot get current pgstate", zap.Error(err)) return } if p.isDifferentTimelineBranch(followedDB, pgState) { if err = p.resync(db, followedDB, true, started); err != nil { log.Error("failed to resync from followed instance", zap.Error(err)) return } if err = pgm.Start(); err != nil { log.Error("err", zap.Error(err)) return } started = true // Check again if it was really synced pgState, err = p.GetPGState(pctx) if err != nil { log.Error("cannot get current pgstate", zap.Error(err)) return } if p.isDifferentTimelineBranch(followedDB, pgState) { if err = p.resync(db, followedDB, false, started); err != nil { log.Error("failed to resync from followed instance", zap.Error(err)) return } if err = pgm.Start(); err != nil { log.Error("err", zap.Error(err)) return } started = true } } } else { if err = p.resync(db, followedDB, false, started); err != nil { log.Error("failed to resync from followed instance", zap.Error(err)) return } if err = pgm.Start(); err != nil { log.Error("err", zap.Error(err)) return } started = true } case common.RoleStandby: log.Info("already standby") if !started { replConnParams := p.getReplConnParams(db, followedDB) standbySettings := &cluster.StandbySettings{PrimaryConninfo: replConnParams.ConnString(), PrimarySlotName: common.StolonName(db.UID)} if err = pgm.WriteRecoveryConf(p.createRecoveryParameters(standbySettings, nil)); err != nil { log.Error("err", zap.Error(err)) return } if err = pgm.Start(); err != nil { log.Error("failed to start postgres", zap.Error(err)) return } started = true } // Check that we can sync with followed instance // We need to update our pgState to avoid dealing with // an old pgState not reflecting the real state var pgState *cluster.PostgresState pgState, err = p.GetPGState(pctx) if err != nil { log.Error("cannot get current pgstate", zap.Error(err)) return } needsResync := false tryPgrewind := false // If the db has a different systemdID then a resync is needed if systemID != followedDB.Status.SystemID { needsResync = true // Check timeline history } else if p.isDifferentTimelineBranch(followedDB, pgState) { needsResync = true tryPgrewind = true } if needsResync { // TODO(sgotti) pg_rewind considers databases on the same timeline as in sync and doesn't check if they diverged at different position in previous timelines. // So check that the db as been synced or resync again with pg_rewind disabled. Will need to report this upstream. if err = p.resync(db, followedDB, tryPgrewind, started); err != nil { log.Error("failed to full resync from followed instance", zap.Error(err)) return } if err = pgm.Start(); err != nil { log.Error("err", zap.Error(err)) return } started = true // Check again if it was really synced pgState, err = p.GetPGState(pctx) if err != nil { log.Error("cannot get current pgstate", zap.Error(err)) return } if p.isDifferentTimelineBranch(followedDB, pgState) { if err = p.resync(db, followedDB, false, started); err != nil { log.Error("failed to resync from followed instance", zap.Error(err)) return } if err = pgm.Start(); err != nil { log.Error("err", zap.Error(err)) return } started = true } } // TODO(sgotti) Check that the followed instance has all the needed WAL segments // Update our primary_conninfo if replConnString changed var curReplConnParams postgresql.ConnParams curReplConnParams, err = pgm.GetPrimaryConninfo() if err != nil { log.Error("err", zap.Error(err)) return } log.Debug("curReplConnParams", zap.Object("curReplConnParams", curReplConnParams)) newReplConnParams := p.getReplConnParams(db, followedDB) log.Debug("newReplConnParams", zap.Object("newReplConnParams", newReplConnParams)) if !curReplConnParams.Equals(newReplConnParams) { log.Info("connection parameters changed. Reconfiguring.", zap.String("followedDB", followedUID), zap.Object("replConnParams", newReplConnParams)) standbySettings := &cluster.StandbySettings{PrimaryConninfo: newReplConnParams.ConnString(), PrimarySlotName: common.StolonName(db.UID)} if err = pgm.WriteRecoveryConf(p.createRecoveryParameters(standbySettings, nil)); err != nil { log.Error("err", zap.Error(err)) return } if err = pgm.Restart(true); err != nil { log.Error("err", zap.Error(err)) return } } case common.RoleUndefined: if err = p.resync(db, followedDB, false, started); err != nil { log.Error("failed to full resync from followed instance", zap.Error(err)) return } if err = pgm.Start(); err != nil { log.Error("err", zap.Error(err)) return } started = true } case common.RoleUndefined: log.Info("our db requested role is none") return } // update pg parameters pgParameters = p.createPGParameters(db) // Log synchronous replication changes prevSyncStandbyNames := prevPGParameters["synchronous_standby_names"] syncStandbyNames := pgParameters["synchronous_standby_names"] if db.Spec.SynchronousReplication { if prevSyncStandbyNames != syncStandbyNames { log.Info("needed synchronous_standby_names changed", zap.String("prevSyncStandbyNames", prevSyncStandbyNames), zap.String("syncStandbyNames", syncStandbyNames)) } } else { if prevSyncStandbyNames != "" { log.Info("sync replication disabled, removing current synchronous_standby_names", zap.String("syncStandbyNames", prevSyncStandbyNames)) } } if !pgParameters.Equals(prevPGParameters) { log.Info("postgres parameters changed, reloading postgres instance") pgm.SetParameters(pgParameters) if err := pgm.Reload(); err != nil { log.Error("failed to reload postgres instance", zap.Error(err)) } } else { // for tests log.Info("postgres parameters not changed") } // If we are here, then all went well and we can update the db generation and save it locally p.localStateMutex.Lock() dbls.Generation = db.Generation dbls.Initializing = false p.localStateMutex.Unlock() if err := p.saveDBLocalState(); err != nil { log.Error("err", zap.Error(err)) return } }
func (s *Sentinel) updateCluster(cd *cluster.ClusterData) (*cluster.ClusterData, error) { newcd := cd.DeepCopy() switch cd.Cluster.Status.Phase { case cluster.ClusterPhaseInitializing: switch *cd.Cluster.DefSpec().InitMode { case cluster.ClusterInitModeNew: // Is there already a keeper choosed to be the new master? if cd.Cluster.Status.Master == "" { log.Info("trying to find initial master") k, err := s.findInitialKeeper(cd) if err != nil { return nil, fmt.Errorf("cannot choose initial master: %v", err) } log.Info("initializing cluster", zap.String("keeper", k.UID)) db := &cluster.DB{ UID: s.UIDFn(), Generation: cluster.InitialGeneration, ChangeTime: time.Now(), Spec: &cluster.DBSpec{ KeeperUID: k.UID, InitMode: cluster.DBInitModeNew, Role: common.RoleMaster, Followers: []string{}, IncludeConfig: *cd.Cluster.DefSpec().MergePgParameters, }, } newcd.DBs[db.UID] = db newcd.Cluster.Status.Master = db.UID log.Debug("newcd dump", zap.String("newcd", spew.Sdump(newcd))) } else { db, ok := cd.DBs[cd.Cluster.Status.Master] if !ok { panic(fmt.Errorf("db %q object doesn't exists. This shouldn't happen", cd.Cluster.Status.Master)) } // Check that the choosed db for being the master has correctly initialized switch s.dbConvergenceState(db, cd.Cluster.DefSpec().InitTimeout.Duration) { case Converged: if db.Status.Healthy { log.Info("db initialized", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) // Set db initMode to none, not needed but just a security measure db.Spec.InitMode = cluster.DBInitModeNone // Don't include previous config anymore db.Spec.IncludeConfig = false // Replace reported pg parameters in cluster spec if *cd.Cluster.DefSpec().MergePgParameters { newcd.Cluster.Spec.PGParameters = db.Status.PGParameters } // Cluster initialized, switch to Normal state newcd.Cluster.Status.Phase = cluster.ClusterPhaseNormal } case Converging: log.Info("waiting for db", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) case ConvergenceFailed: log.Info("db failed to initialize", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) // Empty DBs newcd.DBs = cluster.DBs{} // Unset master so another keeper can be choosen newcd.Cluster.Status.Master = "" } } case cluster.ClusterInitModeExisting: if cd.Cluster.Status.Master == "" { wantedKeeper := cd.Cluster.DefSpec().ExistingConfig.KeeperUID log.Info("trying to use keeper as initial master", zap.String("keeper", wantedKeeper)) k, ok := cd.Keepers[wantedKeeper] if !ok { return nil, fmt.Errorf("keeper %q state not available", wantedKeeper) } log.Info("initializing cluster using selected keeper as master db owner", zap.String("keeper", k.UID)) db := &cluster.DB{ UID: s.UIDFn(), Generation: cluster.InitialGeneration, ChangeTime: time.Now(), Spec: &cluster.DBSpec{ KeeperUID: k.UID, InitMode: cluster.DBInitModeExisting, Role: common.RoleMaster, Followers: []string{}, IncludeConfig: *cd.Cluster.DefSpec().MergePgParameters, }, } newcd.DBs[db.UID] = db newcd.Cluster.Status.Master = db.UID log.Debug("newcd dump", zap.String("newcd", spew.Sdump(newcd))) } else { db, ok := newcd.DBs[cd.Cluster.Status.Master] if !ok { panic(fmt.Errorf("db %q object doesn't exists. This shouldn't happen", cd.Cluster.Status.Master)) } // Check that the choosed db for being the master has correctly initialized if db.Status.Healthy && s.dbConvergenceState(db, cd.Cluster.DefSpec().ConvergenceTimeout.Duration) == Converged { log.Info("db initialized", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) // Don't include previous config anymore db.Spec.IncludeConfig = false // Replace reported pg parameters in cluster spec if *cd.Cluster.DefSpec().MergePgParameters { newcd.Cluster.Spec.PGParameters = db.Status.PGParameters } // Cluster initialized, switch to Normal state newcd.Cluster.Status.Phase = cluster.ClusterPhaseNormal } } case cluster.ClusterInitModePITR: // Is there already a keeper choosed to be the new master? if cd.Cluster.Status.Master == "" { log.Info("trying to find initial master") k, err := s.findInitialKeeper(cd) if err != nil { return nil, fmt.Errorf("cannot choose initial master: %v", err) } log.Info("initializing cluster using selected keeper as master db owner", zap.String("keeper", k.UID)) db := &cluster.DB{ UID: s.UIDFn(), Generation: cluster.InitialGeneration, ChangeTime: time.Now(), Spec: &cluster.DBSpec{ KeeperUID: k.UID, InitMode: cluster.DBInitModePITR, PITRConfig: cd.Cluster.DefSpec().PITRConfig, Role: common.RoleMaster, Followers: []string{}, IncludeConfig: *cd.Cluster.DefSpec().MergePgParameters, }, } newcd.DBs[db.UID] = db newcd.Cluster.Status.Master = db.UID log.Debug("newcd dump", zap.String("newcd", spew.Sdump(newcd))) } else { db, ok := cd.DBs[cd.Cluster.Status.Master] if !ok { panic(fmt.Errorf("db %q object doesn't exists. This shouldn't happen", cd.Cluster.Status.Master)) } // Check that the choosed db for being the master has correctly initialized // TODO(sgotti) set a timeout (the max time for a restore operation) switch s.dbConvergenceState(db, 0) { case Converged: if db.Status.Healthy { log.Info("db initialized", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) // Set db initMode to none, not needed but just a security measure db.Spec.InitMode = cluster.DBInitModeNone // Don't include previous config anymore db.Spec.IncludeConfig = false // Replace reported pg parameters in cluster spec if *cd.Cluster.DefSpec().MergePgParameters { newcd.Cluster.Spec.PGParameters = db.Status.PGParameters } // Cluster initialized, switch to Normal state newcd.Cluster.Status.Phase = cluster.ClusterPhaseNormal } case Converging: log.Info("waiting for db to converge", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) case ConvergenceFailed: log.Info("db failed to initialize", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) // Empty DBs newcd.DBs = cluster.DBs{} // Unset master so another keeper can be choosen newcd.Cluster.Status.Master = "" } } default: return nil, fmt.Errorf("unknown init mode %q", cd.Cluster.DefSpec().InitMode) } case cluster.ClusterPhaseNormal: // TODO(sgotti) When keeper removal is implemented, remove DBs for unexistent keepers // Calculate current master status curMasterDBUID := cd.Cluster.Status.Master wantedMasterDBUID := curMasterDBUID masterOK := true curMasterDB := cd.DBs[curMasterDBUID] if curMasterDB == nil { return nil, fmt.Errorf("db for keeper %q not available. This shouldn't happen!", curMasterDBUID) } log.Debug("db dump", zap.String("db", spew.Sdump(curMasterDB))) if !curMasterDB.Status.Healthy { log.Info("master db is failed", zap.String("db", curMasterDB.UID), zap.String("keeper", curMasterDB.Spec.KeeperUID)) masterOK = false } // Check that the wanted master is in master state (i.e. check that promotion from standby to master happened) if s.dbConvergenceState(curMasterDB, cd.Cluster.DefSpec().ConvergenceTimeout.Duration) == ConvergenceFailed { log.Info("db not converged", zap.String("db", curMasterDB.UID), zap.String("keeper", curMasterDB.Spec.KeeperUID)) masterOK = false } if !masterOK { log.Info("trying to find a new master to replace failed master") bestNewMasters := s.findBestNewMasters(cd, curMasterDB) if len(bestNewMasters) == 0 { log.Error("no eligible masters") } else { // if synchronous replication is enabled, only choose new master in the synchronous replication standbys. var bestNewMasterDB *cluster.DB if *cd.Cluster.DefSpec().SynchronousReplication { onlyFake := true // if only fake synchronous standbys are defined we cannot choose any standby for _, dbUID := range curMasterDB.Spec.SynchronousStandbys { if dbUID != fakeStandbyName { onlyFake = false } } if !onlyFake { if !util.CompareStringSlice(curMasterDB.Status.SynchronousStandbys, curMasterDB.Spec.SynchronousStandbys) { log.Warn("cannot choose synchronous standby since the latest master reported synchronous standbys are different from the db spec ones", zap.Object("reported", spew.Sdump(curMasterDB.Status.SynchronousStandbys)), zap.Object("spec", spew.Sdump(curMasterDB.Spec.SynchronousStandbys))) } else { for _, nm := range bestNewMasters { if util.StringInSlice(curMasterDB.Spec.SynchronousStandbys, nm.UID) { bestNewMasterDB = nm break } } } } } else { bestNewMasterDB = bestNewMasters[0] } if bestNewMasterDB != nil { log.Info("electing db as the new master", zap.String("db", bestNewMasterDB.UID), zap.String("keeper", bestNewMasterDB.Spec.KeeperUID)) wantedMasterDBUID = bestNewMasterDB.UID } else { log.Error("no eligible masters") } } } // New master elected if curMasterDBUID != wantedMasterDBUID { // maintain the current role, remove followers oldMasterdb := newcd.DBs[curMasterDBUID] oldMasterdb.Spec.Followers = []string{} newcd.Cluster.Status.Master = wantedMasterDBUID newMasterDB := newcd.DBs[wantedMasterDBUID] newMasterDB.Spec.Role = common.RoleMaster newMasterDB.Spec.FollowConfig = nil // Tell proxy that there's currently no active master newcd.Proxy.Spec.MasterDBUID = "" newcd.Proxy.ChangeTime = time.Now() // Setup synchronous standbys to the one of the previous master (replacing ourself with the previous master) if *cd.Cluster.DefSpec().SynchronousReplication { for _, dbUID := range oldMasterdb.Spec.SynchronousStandbys { newMasterDB.Spec.SynchronousStandbys = []string{} if dbUID != newMasterDB.UID { newMasterDB.Spec.SynchronousStandbys = append(newMasterDB.Spec.SynchronousStandbys, dbUID) } else { newMasterDB.Spec.SynchronousStandbys = append(newMasterDB.Spec.SynchronousStandbys, oldMasterdb.UID) } } if len(newMasterDB.Spec.SynchronousStandbys) == 0 { newMasterDB.Spec.SynchronousStandbys = []string{fakeStandbyName} } } } // TODO(sgotti) Wait for the proxies being converged (closed connections to old master)? // Setup standbys, do this only when there's no master change if curMasterDBUID == wantedMasterDBUID { masterDB := newcd.DBs[curMasterDBUID] // Set standbys to follow master only if it's healthy and converged if masterDB.Status.Healthy && s.dbConvergenceState(masterDB, cd.Cluster.DefSpec().ConvergenceTimeout.Duration) == Converged { // Tell proxy that there's a new active master newcd.Proxy.Spec.MasterDBUID = wantedMasterDBUID newcd.Proxy.ChangeTime = time.Now() // Remove old masters toRemove := []*cluster.DB{} for _, db := range newcd.DBs { if db.UID == wantedMasterDBUID { continue } if s.dbType(newcd, db.UID) != dbTypeMaster { continue } log.Info("removing old master db", zap.String("db", db.UID)) toRemove = append(toRemove, db) } for _, db := range toRemove { delete(newcd.DBs, db.UID) } // Remove invalid dbs toRemove = []*cluster.DB{} for _, db := range newcd.DBs { if db.UID == wantedMasterDBUID { continue } if s.dbValidity(newcd, db.UID) != dbValidityInvalid { continue } log.Info("removing invalid db", zap.String("db", db.UID)) toRemove = append(toRemove, db) } for _, db := range toRemove { delete(newcd.DBs, db.UID) } goodStandbys, failedStandbys, convergingStandbys := s.validStandbysByStatus(newcd) goodStandbysCount := len(goodStandbys) failedStandbysCount := len(failedStandbys) convergingStandbysCount := len(convergingStandbys) log.Debug("standbys states", zap.Int("good", goodStandbysCount), zap.Int("failed", failedStandbysCount), zap.Int("converging", convergingStandbysCount)) // Setup synchronous standbys if *cd.Cluster.DefSpec().SynchronousReplication { // make a map of synchronous standbys starting from the current ones synchronousStandbys := map[string]struct{}{} for _, dbUID := range masterDB.Spec.SynchronousStandbys { // filter out fake standby if dbUID == fakeStandbyName { continue } synchronousStandbys[dbUID] = struct{}{} } // Check if the current synchronous standbys are healthy or remove them toRemove := map[string]struct{}{} for dbUID, _ := range synchronousStandbys { if _, ok := goodStandbys[dbUID]; !ok { log.Info("removing failed synchronous standby", zap.String("masterDB", masterDB.UID), zap.String("db", dbUID)) toRemove[dbUID] = struct{}{} } } for dbUID, _ := range toRemove { delete(synchronousStandbys, dbUID) } // Remove synchronous standbys in excess if uint16(len(synchronousStandbys)) > *cd.Cluster.DefSpec().MaxSynchronousStandbys { rc := len(synchronousStandbys) - int(*cd.Cluster.DefSpec().MaxSynchronousStandbys) removedCount := 0 toRemove = map[string]struct{}{} for dbUID, _ := range synchronousStandbys { if removedCount >= rc { break } log.Info("removing synchronous standby in excess", zap.String("masterDB", masterDB.UID), zap.String("db", dbUID)) toRemove[dbUID] = struct{}{} removedCount++ } for dbUID, _ := range toRemove { delete(synchronousStandbys, dbUID) } } // try to add missing standbys up to *cd.Cluster.DefSpec().MaxSynchronousStandbys bestStandbys := s.findBestStandbys(newcd, curMasterDB) ac := int(*cd.Cluster.DefSpec().MaxSynchronousStandbys) - len(synchronousStandbys) addedCount := 0 for _, bestStandby := range bestStandbys { if addedCount >= ac { break } if _, ok := synchronousStandbys[bestStandby.UID]; ok { continue } log.Info("adding synchronous standby", zap.String("masterDB", masterDB.UID), zap.String("synchronousStandbyDB", bestStandby.UID)) synchronousStandbys[bestStandby.UID] = struct{}{} addedCount++ } // If there're not enough real synchronous standbys add a fake synchronous standby because we have to be strict and make the master block transactions until MaxSynchronousStandbys real standbys are available if len(synchronousStandbys) < int(*cd.Cluster.DefSpec().MinSynchronousStandbys) { log.Info("using a fake synchronous standby since there are not enough real standbys available", zap.String("masterDB", masterDB.UID), zap.Int("required", int(*cd.Cluster.DefSpec().MinSynchronousStandbys))) synchronousStandbys[fakeStandbyName] = struct{}{} } masterDB.Spec.SynchronousStandbys = []string{} for dbUID, _ := range synchronousStandbys { masterDB.Spec.SynchronousStandbys = append(masterDB.Spec.SynchronousStandbys, dbUID) } // Sort synchronousStandbys so we can compare the slice regardless of its order sort.Sort(sort.StringSlice(masterDB.Spec.SynchronousStandbys)) } // NotFailed != Good since there can be some dbs that are converging // it's the total number of standbys - the failed standbys // or the sum of good + converging standbys notFailedStandbysCount := goodStandbysCount + convergingStandbysCount // Remove dbs in excess if we have a good number >= MaxStandbysPerSender if uint16(goodStandbysCount) >= *cd.Cluster.DefSpec().MaxStandbysPerSender { toRemove := []*cluster.DB{} // Remove all non good standbys for _, db := range newcd.DBs { if s.dbType(newcd, db.UID) != dbTypeStandby { continue } if _, ok := goodStandbys[db.UID]; !ok { log.Info("removing non good standby", zap.String("db", db.UID)) toRemove = append(toRemove, db) } } // Remove good standbys in excess nr := int(uint16(goodStandbysCount) - *cd.Cluster.DefSpec().MaxStandbysPerSender) i := 0 for _, db := range goodStandbys { if i >= nr { break } // Don't remove standbys marked as synchronous standbys if util.StringInSlice(masterDB.Spec.SynchronousStandbys, db.UID) { continue } log.Info("removing good standby in excess", zap.String("db", db.UID)) toRemove = append(toRemove, db) i++ } for _, db := range toRemove { delete(newcd.DBs, db.UID) } } else { // Add new dbs to substitute failed dbs. we // don't remove failed db until the number of // good db is >= MaxStandbysPerSender since they can come back // define, if there're available keepers, new dbs // nc can be negative if MaxStandbysPerSender has been lowered nc := int(*cd.Cluster.DefSpec().MaxStandbysPerSender - uint16(notFailedStandbysCount)) // Add missing DBs until MaxStandbysPerSender freeKeepers := s.freeKeepers(newcd) nf := len(freeKeepers) for i := 0; i < nc && i < nf; i++ { freeKeeper := freeKeepers[i] db := &cluster.DB{ UID: s.UIDFn(), Generation: cluster.InitialGeneration, ChangeTime: time.Now(), Spec: &cluster.DBSpec{ KeeperUID: freeKeeper.UID, InitMode: cluster.DBInitModeResync, Role: common.RoleStandby, Followers: []string{}, FollowConfig: &cluster.FollowConfig{Type: cluster.FollowTypeInternal, DBUID: wantedMasterDBUID}, }, } newcd.DBs[db.UID] = db log.Info("added new standby db", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) } } // Reconfigure all standbys as followers of the current master for _, db := range newcd.DBs { if s.dbType(newcd, db.UID) != dbTypeStandby { continue } db.Spec.Role = common.RoleStandby // Remove followers db.Spec.Followers = []string{} db.Spec.FollowConfig = &cluster.FollowConfig{Type: cluster.FollowTypeInternal, DBUID: wantedMasterDBUID} } // Set followers for master DB masterDB.Spec.Followers = []string{} for _, db := range newcd.DBs { if masterDB.UID == db.UID { continue } fc := db.Spec.FollowConfig if fc != nil { if fc.Type == cluster.FollowTypeInternal && fc.DBUID == wantedMasterDBUID { masterDB.Spec.Followers = append(masterDB.Spec.Followers, db.UID) } } } // Sort followers so the slice won't be considered changed due to different order of the same entries. sort.Strings(masterDB.Spec.Followers) } } default: return nil, fmt.Errorf("unknown cluster phase %s", cd.Cluster.Status.Phase) } // Copy the clusterSpec parameters to the dbSpec s.setDBSpecFromClusterSpec(newcd) // Update generation on DBs if they have changed for dbUID, db := range newcd.DBs { prevDB, ok := cd.DBs[dbUID] if !ok { continue } if !reflect.DeepEqual(db.Spec, prevDB.Spec) { log.Debug("db spec changed, updating generation", zap.String("prevDB", spew.Sdump(prevDB.Spec)), zap.String("db", spew.Sdump(db.Spec))) db.Generation++ db.ChangeTime = time.Now() } } return newcd, nil }