예제 #1
0
파일: sentinel.go 프로젝트: sgotti/stolon
func (s *Sentinel) updateKeepersStatus(cd *cluster.ClusterData, keepersInfo cluster.KeepersInfo, firstRun bool) (*cluster.ClusterData, KeeperInfoHistories) {
	// Create a copy of cd
	cd = cd.DeepCopy()

	kihs := s.keeperInfoHistories.DeepCopy()

	// Remove keepers with wrong cluster UID
	tmpKeepersInfo := keepersInfo.DeepCopy()
	for _, ki := range keepersInfo {
		if ki.ClusterUID != cd.Cluster.UID {
			delete(tmpKeepersInfo, ki.UID)
		}
	}
	keepersInfo = tmpKeepersInfo

	// On first run just insert keepers info in the history with Seen set
	// to false and don't do any change to the keepers' state
	if firstRun {
		for keeperUID, ki := range keepersInfo {
			kihs[keeperUID] = &KeeperInfoHistory{KeeperInfo: ki, Seen: false}
		}
		return cd, kihs
	}

	tmpKeepersInfo = keepersInfo.DeepCopy()
	// keep only updated keepers info
	for keeperUID, ki := range keepersInfo {
		if kih, ok := kihs[keeperUID]; ok {
			log.Debug("kih", zap.Object("kih", kih))
			if kih.KeeperInfo.InfoUID == ki.InfoUID {
				if !kih.Seen {
					//Remove since it was already there and wasn't updated
					delete(tmpKeepersInfo, ki.UID)
				} else if kih.Seen && timer.Since(kih.Timer) > s.sleepInterval {
					//Remove since it wasn't updated
					delete(tmpKeepersInfo, ki.UID)
				}
			}
			if kih.KeeperInfo.InfoUID != ki.InfoUID {
				kihs[keeperUID] = &KeeperInfoHistory{KeeperInfo: ki, Seen: true, Timer: timer.Now()}
			}
		} else {
			kihs[keeperUID] = &KeeperInfoHistory{KeeperInfo: ki, Seen: true, Timer: timer.Now()}
		}
	}
	keepersInfo = tmpKeepersInfo

	// Create new keepers from keepersInfo
	for keeperUID, ki := range keepersInfo {
		if _, ok := cd.Keepers[keeperUID]; !ok {
			k := cluster.NewKeeperFromKeeperInfo(ki)
			cd.Keepers[k.UID] = k
		}
	}

	// Mark keepers without a keeperInfo (cleaned up above from not updated
	// ones) as in error
	for keeperUID, _ := range cd.Keepers {
		if _, ok := keepersInfo[keeperUID]; !ok {
			s.SetKeeperError(keeperUID)
		} else {
			s.CleanKeeperError(keeperUID)
		}
	}

	// Update keepers' healthy states
	for _, k := range cd.Keepers {
		k.Status.Healthy = s.isKeeperHealthy(cd, k)
	}

	// Update dbs' states
	for _, db := range cd.DBs {
		// Mark not found DBs in DBstates in error
		k, ok := keepersInfo[db.Spec.KeeperUID]
		if !ok {
			log.Error("no keeper info available", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
			s.SetDBError(db.UID)
			continue
		}
		dbs := k.PostgresState
		if dbs == nil {
			log.Error("no db state available", zap.String("db", db.UID))
			s.SetDBError(db.UID)
			continue
		}
		if dbs.UID != db.UID {
			log.Warn("received db state for unexpected db uid", zap.String("receivedDB", dbs.UID), zap.String("db", db.UID))
			s.SetDBError(db.UID)
			continue
		}
		log.Debug("received db state", zap.String("db", db.UID))
		db.Status.ListenAddress = dbs.ListenAddress
		db.Status.Port = dbs.Port
		db.Status.CurrentGeneration = dbs.Generation
		if dbs.Healthy {
			s.CleanDBError(db.UID)
			db.Status.SystemID = dbs.SystemID
			db.Status.TimelineID = dbs.TimelineID
			db.Status.XLogPos = dbs.XLogPos
			db.Status.TimelinesHistory = dbs.TimelinesHistory
			db.Status.PGParameters = cluster.PGParameters(dbs.PGParameters)
		} else {
			s.SetDBError(db.UID)
		}
	}

	// Update dbs' healthy state
	for _, db := range cd.DBs {
		db.Status.Healthy = s.isDBHealthy(cd, db)
	}

	return cd, kihs
}
예제 #2
0
파일: sentinel.go 프로젝트: sgotti/stolon
func (s *Sentinel) updateCluster(cd *cluster.ClusterData) (*cluster.ClusterData, error) {
	newcd := cd.DeepCopy()
	switch cd.Cluster.Status.Phase {
	case cluster.ClusterPhaseInitializing:
		switch cd.Cluster.Spec.InitMode {
		case cluster.ClusterInitModeNew:
			// Is there already a keeper choosed to be the new master?
			if cd.Cluster.Status.Master == "" {
				log.Info("trying to find initial master")
				k, err := s.findInitialKeeper(cd)
				if err != nil {
					return nil, fmt.Errorf("cannot choose initial master: %v", err)
				}
				log.Info("initializing cluster", zap.String("keeper", k.UID))
				db := &cluster.DB{
					UID:        s.UIDFn(),
					Generation: cluster.InitialGeneration,
					ChangeTime: time.Now(),
					Spec: &cluster.DBSpec{
						KeeperUID:     k.UID,
						InitMode:      cluster.DBInitModeNew,
						Role:          common.RoleMaster,
						Followers:     []string{},
						IncludeConfig: *cd.Cluster.Spec.MergePgParameters,
					},
				}
				newcd.DBs[db.UID] = db
				newcd.Cluster.Status.Master = db.UID
				log.Debug("newcd dump", zap.String("newcd", spew.Sdump(newcd)))
			} else {
				db, ok := cd.DBs[cd.Cluster.Status.Master]
				if !ok {
					panic(fmt.Errorf("db %q object doesn't exists. This shouldn't happen", cd.Cluster.Status.Master))
				}
				// Check that the choosed db for being the master has correctly initialized
				switch s.dbConvergenceState(cd, db, cd.Cluster.Spec.InitTimeout.Duration) {
				case Converged:
					if db.Status.Healthy {
						log.Info("db initialized", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
						// Set db initMode to none, not needed but just a security measure
						db.Spec.InitMode = cluster.DBInitModeNone
						// Don't include previous config anymore
						db.Spec.IncludeConfig = false
						// Replace reported pg parameters in cluster spec
						if *cd.Cluster.Spec.MergePgParameters {
							newcd.Cluster.Spec.PGParameters = db.Status.PGParameters
						}
						// Cluster initialized, switch to Normal state
						newcd.Cluster.Status.Phase = cluster.ClusterPhaseNormal
					}
				case Converging:
					log.Info("waiting for db", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
				case ConvergenceFailed:
					log.Info("db failed to initialize", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
					// Empty DBs
					newcd.DBs = cluster.DBs{}
					// Unset master so another keeper can be choosen
					newcd.Cluster.Status.Master = ""
				}
			}
		case cluster.ClusterInitModeExisting:
			if cd.Cluster.Status.Master == "" {
				wantedKeeper := cd.Cluster.Spec.ExistingConfig.KeeperUID
				log.Info("trying to use keeper as initial master", zap.String("keeper", wantedKeeper))

				k, ok := cd.Keepers[wantedKeeper]
				if !ok {
					return nil, fmt.Errorf("keeper %q state not available", wantedKeeper)
				}

				log.Info("initializing cluster using selected keeper as master db owner", zap.String("keeper", k.UID))

				db := &cluster.DB{
					UID:        s.UIDFn(),
					Generation: cluster.InitialGeneration,
					ChangeTime: time.Now(),
					Spec: &cluster.DBSpec{
						KeeperUID:     k.UID,
						InitMode:      cluster.DBInitModeExisting,
						Role:          common.RoleMaster,
						Followers:     []string{},
						IncludeConfig: *cd.Cluster.Spec.MergePgParameters,
					},
				}
				newcd.DBs[db.UID] = db
				newcd.Cluster.Status.Master = db.UID
				log.Debug("newcd dump", zap.String("newcd", spew.Sdump(newcd)))
			} else {
				db, ok := newcd.DBs[cd.Cluster.Status.Master]
				if !ok {
					panic(fmt.Errorf("db %q object doesn't exists. This shouldn't happen", cd.Cluster.Status.Master))
				}
				// Check that the choosed db for being the master has correctly initialized
				// TODO(sgotti) set a timeout (the max time for a noop operation, just a start/restart)
				if db.Status.Healthy && s.dbConvergenceState(cd, db, 0) == Converged {
					log.Info("db initialized", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
					// Don't include previous config anymore
					db.Spec.IncludeConfig = false
					// Replace reported pg parameters in cluster spec
					if *cd.Cluster.Spec.MergePgParameters {
						newcd.Cluster.Spec.PGParameters = db.Status.PGParameters
					}
					// Cluster initialized, switch to Normal state
					newcd.Cluster.Status.Phase = cluster.ClusterPhaseNormal
				}
			}
		case cluster.ClusterInitModePITR:
			// Is there already a keeper choosed to be the new master?
			if cd.Cluster.Status.Master == "" {
				log.Info("trying to find initial master")
				k, err := s.findInitialKeeper(cd)
				if err != nil {
					return nil, fmt.Errorf("cannot choose initial master: %v", err)
				}
				log.Info("initializing cluster using selected keeper as master db owner", zap.String("keeper", k.UID))
				db := &cluster.DB{
					UID:        s.UIDFn(),
					Generation: cluster.InitialGeneration,
					ChangeTime: time.Now(),
					Spec: &cluster.DBSpec{
						KeeperUID:     k.UID,
						InitMode:      cluster.DBInitModePITR,
						PITRConfig:    cd.Cluster.Spec.PITRConfig,
						Role:          common.RoleMaster,
						Followers:     []string{},
						IncludeConfig: *cd.Cluster.Spec.MergePgParameters,
					},
				}
				newcd.DBs[db.UID] = db
				newcd.Cluster.Status.Master = db.UID
				log.Debug("newcd dump", zap.String("newcd", spew.Sdump(newcd)))
			} else {
				db, ok := cd.DBs[cd.Cluster.Status.Master]
				if !ok {
					panic(fmt.Errorf("db %q object doesn't exists. This shouldn't happen", cd.Cluster.Status.Master))
				}
				// Check that the choosed db for being the master has correctly initialized
				// TODO(sgotti) set a timeout (the max time for a restore operation)
				switch s.dbConvergenceState(cd, db, 0) {
				case Converged:
					if db.Status.Healthy {
						log.Info("db initialized", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
						// Set db initMode to none, not needed but just a security measure
						db.Spec.InitMode = cluster.DBInitModeNone
						// Don't include previous config anymore
						db.Spec.IncludeConfig = false
						// Replace reported pg parameters in cluster spec
						if *cd.Cluster.Spec.MergePgParameters {
							newcd.Cluster.Spec.PGParameters = db.Status.PGParameters
						}
						// Cluster initialized, switch to Normal state
						newcd.Cluster.Status.Phase = cluster.ClusterPhaseNormal
					}
				case Converging:
					log.Info("waiting for db to converge", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
				case ConvergenceFailed:
					log.Info("db failed to initialize", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
					// Empty DBs
					newcd.DBs = cluster.DBs{}
					// Unset master so another keeper can be choosen
					newcd.Cluster.Status.Master = ""
				}
			}
		default:
			return nil, fmt.Errorf("unknown init mode %q", cd.Cluster.Spec.InitMode)
		}
	case cluster.ClusterPhaseNormal:
		// Add missing DBs
		for _, k := range cd.Keepers {
			if db := cd.FindDB(k); db == nil {
				db := &cluster.DB{
					UID:        s.UIDFn(),
					Generation: cluster.InitialGeneration,
					ChangeTime: time.Now(),
					Spec: &cluster.DBSpec{
						KeeperUID: k.UID,
						InitMode:  cluster.DBInitModeNone,
						Role:      common.RoleUndefined,
						Followers: []string{},
					},
				}
				newcd.DBs[db.UID] = db
			}
		}

		// TODO(sgotti) When keeper removal is implemented, remove DBs for unexistent keepers

		// Calculate current master status
		curMasterDBUID := cd.Cluster.Status.Master
		wantedMasterDBUID := curMasterDBUID

		masterOK := true
		curMasterDB := cd.DBs[curMasterDBUID]
		if curMasterDB == nil {
			return nil, fmt.Errorf("db for keeper %q not available. This shouldn't happen!", curMasterDBUID)
		}
		log.Debug("db dump", zap.String("db", spew.Sdump(curMasterDB)))

		if !curMasterDB.Status.Healthy {
			log.Info("master db is failed", zap.String("db", curMasterDB.UID), zap.String("keeper", curMasterDB.Spec.KeeperUID))
			masterOK = false
		}

		// Check that the wanted master is in master state (i.e. check that promotion from standby to master happened)
		if s.dbConvergenceState(cd, curMasterDB, newcd.Cluster.Spec.ConvergenceTimeout.Duration) == ConvergenceFailed {
			log.Info("db not converged", zap.String("db", curMasterDB.UID), zap.String("keeper", curMasterDB.Spec.KeeperUID))
			masterOK = false
		}

		if !masterOK {
			log.Info("trying to find a standby to replace failed master")
			bestStandbyDB, err := s.findBestStandby(cd, curMasterDB)
			if err != nil {
				log.Error("error trying to find the best standby", zap.Error(err))
			} else {
				log.Info("electing db as the new master", zap.String("db", bestStandbyDB.UID), zap.String("keeper", bestStandbyDB.Spec.KeeperUID))
				wantedMasterDBUID = bestStandbyDB.UID
			}
		}

		// New master elected
		if curMasterDBUID != wantedMasterDBUID {
			// maintain the current role, remove followers
			oldMasterdb := newcd.DBs[curMasterDBUID]
			oldMasterdb.Spec.Followers = []string{}

			newcd.Cluster.Status.Master = wantedMasterDBUID
			newMasterDB := newcd.DBs[wantedMasterDBUID]
			newMasterDB.Spec.Role = common.RoleMaster
			newMasterDB.Spec.FollowConfig = nil

			// Tell proxy that there's currently no active master
			newcd.Proxy.Spec.MasterDBUID = ""
			newcd.Proxy.ChangeTime = time.Now()
		}

		// TODO(sgotti) Wait for the proxies being converged (closed connections to old master)?

		// Setup standbys, do this only when there's no master change
		if curMasterDBUID == wantedMasterDBUID {
			masterDB := newcd.DBs[curMasterDBUID]
			// Set standbys to follow master only if it's healthy and converged
			if masterDB.Status.Healthy && s.dbConvergenceState(newcd, masterDB, newcd.Cluster.Spec.ConvergenceTimeout.Duration) == Converged {
				// Tell proxy that there's a new active master
				newcd.Proxy.Spec.MasterDBUID = wantedMasterDBUID
				newcd.Proxy.ChangeTime = time.Now()

				// TODO(sgotti) do this only for the defined number of MaxStandbysPerSender (needs also to detect unhealthy standbys and switch to healthy one)
				for id, db := range newcd.DBs {
					if id == wantedMasterDBUID {
						continue
					}
					db.Spec.Role = common.RoleStandby
					// Remove followers
					db.Spec.Followers = []string{}
					db.Spec.FollowConfig = &cluster.FollowConfig{Type: cluster.FollowTypeInternal, DBUID: wantedMasterDBUID}
				}

				// Define followers for master DB
				masterDB.Spec.Followers = []string{}
				for _, db := range newcd.DBs {
					if masterDB.UID == db.UID {
						continue
					}
					fc := db.Spec.FollowConfig
					if fc != nil {
						if fc.Type == cluster.FollowTypeInternal && fc.DBUID == wantedMasterDBUID {
							masterDB.Spec.Followers = append(masterDB.Spec.Followers, db.UID)
							// Sort followers
							sort.Strings(masterDB.Spec.Followers)
						}
					}
				}
			}
		}

		// Update generation on DBs if they have changed
		for dbUID, db := range newcd.DBs {
			prevDB, ok := cd.DBs[dbUID]
			if !ok {
				continue
			}
			if !reflect.DeepEqual(db.Spec, prevDB.Spec) {
				log.Debug("db spec changed, updating generation", zap.String("prevDB", spew.Sdump(prevDB.Spec)), zap.String("db", spew.Sdump(db.Spec)))
				db.Generation++
				db.ChangeTime = time.Now()
			}
		}

	default:
		return nil, fmt.Errorf("unknown cluster phase %s", cd.Cluster.Status.Phase)
	}

	// Copy the clusterSpec parameters to the dbSpec
	s.setDBSpecFromClusterSpec(newcd)

	return newcd, nil
}
예제 #3
0
func (s *Sentinel) updateCluster(cd *cluster.ClusterData) (*cluster.ClusterData, error) {
	newcd := cd.DeepCopy()
	switch cd.Cluster.Status.Phase {
	case cluster.ClusterPhaseInitializing:
		switch *cd.Cluster.DefSpec().InitMode {
		case cluster.ClusterInitModeNew:
			// Is there already a keeper choosed to be the new master?
			if cd.Cluster.Status.Master == "" {
				log.Info("trying to find initial master")
				k, err := s.findInitialKeeper(cd)
				if err != nil {
					return nil, fmt.Errorf("cannot choose initial master: %v", err)
				}
				log.Info("initializing cluster", zap.String("keeper", k.UID))
				db := &cluster.DB{
					UID:        s.UIDFn(),
					Generation: cluster.InitialGeneration,
					ChangeTime: time.Now(),
					Spec: &cluster.DBSpec{
						KeeperUID:     k.UID,
						InitMode:      cluster.DBInitModeNew,
						Role:          common.RoleMaster,
						Followers:     []string{},
						IncludeConfig: *cd.Cluster.DefSpec().MergePgParameters,
					},
				}
				newcd.DBs[db.UID] = db
				newcd.Cluster.Status.Master = db.UID
				log.Debug("newcd dump", zap.String("newcd", spew.Sdump(newcd)))
			} else {
				db, ok := cd.DBs[cd.Cluster.Status.Master]
				if !ok {
					panic(fmt.Errorf("db %q object doesn't exists. This shouldn't happen", cd.Cluster.Status.Master))
				}
				// Check that the choosed db for being the master has correctly initialized
				switch s.dbConvergenceState(db, cd.Cluster.DefSpec().InitTimeout.Duration) {
				case Converged:
					if db.Status.Healthy {
						log.Info("db initialized", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
						// Set db initMode to none, not needed but just a security measure
						db.Spec.InitMode = cluster.DBInitModeNone
						// Don't include previous config anymore
						db.Spec.IncludeConfig = false
						// Replace reported pg parameters in cluster spec
						if *cd.Cluster.DefSpec().MergePgParameters {
							newcd.Cluster.Spec.PGParameters = db.Status.PGParameters
						}
						// Cluster initialized, switch to Normal state
						newcd.Cluster.Status.Phase = cluster.ClusterPhaseNormal
					}
				case Converging:
					log.Info("waiting for db", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
				case ConvergenceFailed:
					log.Info("db failed to initialize", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
					// Empty DBs
					newcd.DBs = cluster.DBs{}
					// Unset master so another keeper can be choosen
					newcd.Cluster.Status.Master = ""
				}
			}
		case cluster.ClusterInitModeExisting:
			if cd.Cluster.Status.Master == "" {
				wantedKeeper := cd.Cluster.DefSpec().ExistingConfig.KeeperUID
				log.Info("trying to use keeper as initial master", zap.String("keeper", wantedKeeper))

				k, ok := cd.Keepers[wantedKeeper]
				if !ok {
					return nil, fmt.Errorf("keeper %q state not available", wantedKeeper)
				}

				log.Info("initializing cluster using selected keeper as master db owner", zap.String("keeper", k.UID))

				db := &cluster.DB{
					UID:        s.UIDFn(),
					Generation: cluster.InitialGeneration,
					ChangeTime: time.Now(),
					Spec: &cluster.DBSpec{
						KeeperUID:     k.UID,
						InitMode:      cluster.DBInitModeExisting,
						Role:          common.RoleMaster,
						Followers:     []string{},
						IncludeConfig: *cd.Cluster.DefSpec().MergePgParameters,
					},
				}
				newcd.DBs[db.UID] = db
				newcd.Cluster.Status.Master = db.UID
				log.Debug("newcd dump", zap.String("newcd", spew.Sdump(newcd)))
			} else {
				db, ok := newcd.DBs[cd.Cluster.Status.Master]
				if !ok {
					panic(fmt.Errorf("db %q object doesn't exists. This shouldn't happen", cd.Cluster.Status.Master))
				}
				// Check that the choosed db for being the master has correctly initialized
				if db.Status.Healthy && s.dbConvergenceState(db, cd.Cluster.DefSpec().ConvergenceTimeout.Duration) == Converged {
					log.Info("db initialized", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
					// Don't include previous config anymore
					db.Spec.IncludeConfig = false
					// Replace reported pg parameters in cluster spec
					if *cd.Cluster.DefSpec().MergePgParameters {
						newcd.Cluster.Spec.PGParameters = db.Status.PGParameters
					}
					// Cluster initialized, switch to Normal state
					newcd.Cluster.Status.Phase = cluster.ClusterPhaseNormal
				}
			}
		case cluster.ClusterInitModePITR:
			// Is there already a keeper choosed to be the new master?
			if cd.Cluster.Status.Master == "" {
				log.Info("trying to find initial master")
				k, err := s.findInitialKeeper(cd)
				if err != nil {
					return nil, fmt.Errorf("cannot choose initial master: %v", err)
				}
				log.Info("initializing cluster using selected keeper as master db owner", zap.String("keeper", k.UID))
				db := &cluster.DB{
					UID:        s.UIDFn(),
					Generation: cluster.InitialGeneration,
					ChangeTime: time.Now(),
					Spec: &cluster.DBSpec{
						KeeperUID:     k.UID,
						InitMode:      cluster.DBInitModePITR,
						PITRConfig:    cd.Cluster.DefSpec().PITRConfig,
						Role:          common.RoleMaster,
						Followers:     []string{},
						IncludeConfig: *cd.Cluster.DefSpec().MergePgParameters,
					},
				}
				newcd.DBs[db.UID] = db
				newcd.Cluster.Status.Master = db.UID
				log.Debug("newcd dump", zap.String("newcd", spew.Sdump(newcd)))
			} else {
				db, ok := cd.DBs[cd.Cluster.Status.Master]
				if !ok {
					panic(fmt.Errorf("db %q object doesn't exists. This shouldn't happen", cd.Cluster.Status.Master))
				}
				// Check that the choosed db for being the master has correctly initialized
				// TODO(sgotti) set a timeout (the max time for a restore operation)
				switch s.dbConvergenceState(db, 0) {
				case Converged:
					if db.Status.Healthy {
						log.Info("db initialized", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
						// Set db initMode to none, not needed but just a security measure
						db.Spec.InitMode = cluster.DBInitModeNone
						// Don't include previous config anymore
						db.Spec.IncludeConfig = false
						// Replace reported pg parameters in cluster spec
						if *cd.Cluster.DefSpec().MergePgParameters {
							newcd.Cluster.Spec.PGParameters = db.Status.PGParameters
						}
						// Cluster initialized, switch to Normal state
						newcd.Cluster.Status.Phase = cluster.ClusterPhaseNormal
					}
				case Converging:
					log.Info("waiting for db to converge", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
				case ConvergenceFailed:
					log.Info("db failed to initialize", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
					// Empty DBs
					newcd.DBs = cluster.DBs{}
					// Unset master so another keeper can be choosen
					newcd.Cluster.Status.Master = ""
				}
			}
		default:
			return nil, fmt.Errorf("unknown init mode %q", cd.Cluster.DefSpec().InitMode)
		}
	case cluster.ClusterPhaseNormal:
		// TODO(sgotti) When keeper removal is implemented, remove DBs for unexistent keepers

		// Calculate current master status
		curMasterDBUID := cd.Cluster.Status.Master
		wantedMasterDBUID := curMasterDBUID

		masterOK := true
		curMasterDB := cd.DBs[curMasterDBUID]
		if curMasterDB == nil {
			return nil, fmt.Errorf("db for keeper %q not available. This shouldn't happen!", curMasterDBUID)
		}
		log.Debug("db dump", zap.String("db", spew.Sdump(curMasterDB)))

		if !curMasterDB.Status.Healthy {
			log.Info("master db is failed", zap.String("db", curMasterDB.UID), zap.String("keeper", curMasterDB.Spec.KeeperUID))
			masterOK = false
		}

		// Check that the wanted master is in master state (i.e. check that promotion from standby to master happened)
		if s.dbConvergenceState(curMasterDB, cd.Cluster.DefSpec().ConvergenceTimeout.Duration) == ConvergenceFailed {
			log.Info("db not converged", zap.String("db", curMasterDB.UID), zap.String("keeper", curMasterDB.Spec.KeeperUID))
			masterOK = false
		}

		if !masterOK {
			log.Info("trying to find a new master to replace failed master")
			bestNewMasters := s.findBestNewMasters(cd, curMasterDB)
			if len(bestNewMasters) == 0 {
				log.Error("no eligible masters")
			} else {
				// if synchronous replication is enabled, only choose new master in the synchronous replication standbys.
				var bestNewMasterDB *cluster.DB
				if *cd.Cluster.DefSpec().SynchronousReplication {
					onlyFake := true
					// if only fake synchronous standbys are defined we cannot choose any standby
					for _, dbUID := range curMasterDB.Spec.SynchronousStandbys {
						if dbUID != fakeStandbyName {
							onlyFake = false
						}
					}
					if !onlyFake {
						if !util.CompareStringSlice(curMasterDB.Status.SynchronousStandbys, curMasterDB.Spec.SynchronousStandbys) {
							log.Warn("cannot choose synchronous standby since the latest master reported synchronous standbys are different from the db spec ones", zap.Object("reported", spew.Sdump(curMasterDB.Status.SynchronousStandbys)), zap.Object("spec", spew.Sdump(curMasterDB.Spec.SynchronousStandbys)))
						} else {
							for _, nm := range bestNewMasters {
								if util.StringInSlice(curMasterDB.Spec.SynchronousStandbys, nm.UID) {
									bestNewMasterDB = nm
									break
								}
							}
						}
					}
				} else {
					bestNewMasterDB = bestNewMasters[0]
				}
				if bestNewMasterDB != nil {
					log.Info("electing db as the new master", zap.String("db", bestNewMasterDB.UID), zap.String("keeper", bestNewMasterDB.Spec.KeeperUID))
					wantedMasterDBUID = bestNewMasterDB.UID
				} else {
					log.Error("no eligible masters")
				}
			}
		}

		// New master elected
		if curMasterDBUID != wantedMasterDBUID {
			// maintain the current role, remove followers
			oldMasterdb := newcd.DBs[curMasterDBUID]
			oldMasterdb.Spec.Followers = []string{}

			newcd.Cluster.Status.Master = wantedMasterDBUID
			newMasterDB := newcd.DBs[wantedMasterDBUID]
			newMasterDB.Spec.Role = common.RoleMaster
			newMasterDB.Spec.FollowConfig = nil

			// Tell proxy that there's currently no active master
			newcd.Proxy.Spec.MasterDBUID = ""
			newcd.Proxy.ChangeTime = time.Now()

			// Setup synchronous standbys to the one of the previous master (replacing ourself with the previous master)
			if *cd.Cluster.DefSpec().SynchronousReplication {
				for _, dbUID := range oldMasterdb.Spec.SynchronousStandbys {
					newMasterDB.Spec.SynchronousStandbys = []string{}
					if dbUID != newMasterDB.UID {
						newMasterDB.Spec.SynchronousStandbys = append(newMasterDB.Spec.SynchronousStandbys, dbUID)
					} else {
						newMasterDB.Spec.SynchronousStandbys = append(newMasterDB.Spec.SynchronousStandbys, oldMasterdb.UID)
					}
				}
				if len(newMasterDB.Spec.SynchronousStandbys) == 0 {
					newMasterDB.Spec.SynchronousStandbys = []string{fakeStandbyName}
				}
			}
		}

		// TODO(sgotti) Wait for the proxies being converged (closed connections to old master)?

		// Setup standbys, do this only when there's no master change
		if curMasterDBUID == wantedMasterDBUID {
			masterDB := newcd.DBs[curMasterDBUID]
			// Set standbys to follow master only if it's healthy and converged
			if masterDB.Status.Healthy && s.dbConvergenceState(masterDB, cd.Cluster.DefSpec().ConvergenceTimeout.Duration) == Converged {
				// Tell proxy that there's a new active master
				newcd.Proxy.Spec.MasterDBUID = wantedMasterDBUID
				newcd.Proxy.ChangeTime = time.Now()

				// Remove old masters
				toRemove := []*cluster.DB{}
				for _, db := range newcd.DBs {
					if db.UID == wantedMasterDBUID {
						continue
					}
					if s.dbType(newcd, db.UID) != dbTypeMaster {
						continue
					}
					log.Info("removing old master db", zap.String("db", db.UID))
					toRemove = append(toRemove, db)
				}
				for _, db := range toRemove {
					delete(newcd.DBs, db.UID)
				}

				// Remove invalid dbs
				toRemove = []*cluster.DB{}
				for _, db := range newcd.DBs {
					if db.UID == wantedMasterDBUID {
						continue
					}
					if s.dbValidity(newcd, db.UID) != dbValidityInvalid {
						continue
					}
					log.Info("removing invalid db", zap.String("db", db.UID))
					toRemove = append(toRemove, db)
				}
				for _, db := range toRemove {
					delete(newcd.DBs, db.UID)
				}

				goodStandbys, failedStandbys, convergingStandbys := s.validStandbysByStatus(newcd)
				goodStandbysCount := len(goodStandbys)
				failedStandbysCount := len(failedStandbys)
				convergingStandbysCount := len(convergingStandbys)
				log.Debug("standbys states", zap.Int("good", goodStandbysCount), zap.Int("failed", failedStandbysCount), zap.Int("converging", convergingStandbysCount))

				// Setup synchronous standbys
				if *cd.Cluster.DefSpec().SynchronousReplication {
					// make a map of synchronous standbys starting from the current ones
					synchronousStandbys := map[string]struct{}{}
					for _, dbUID := range masterDB.Spec.SynchronousStandbys {
						// filter out fake standby
						if dbUID == fakeStandbyName {
							continue
						}
						synchronousStandbys[dbUID] = struct{}{}
					}

					// Check if the current synchronous standbys are healthy or remove them
					toRemove := map[string]struct{}{}
					for dbUID, _ := range synchronousStandbys {
						if _, ok := goodStandbys[dbUID]; !ok {
							log.Info("removing failed synchronous standby", zap.String("masterDB", masterDB.UID), zap.String("db", dbUID))
							toRemove[dbUID] = struct{}{}
						}
					}
					for dbUID, _ := range toRemove {
						delete(synchronousStandbys, dbUID)
					}

					// Remove synchronous standbys in excess
					if uint16(len(synchronousStandbys)) > *cd.Cluster.DefSpec().MaxSynchronousStandbys {
						rc := len(synchronousStandbys) - int(*cd.Cluster.DefSpec().MaxSynchronousStandbys)
						removedCount := 0
						toRemove = map[string]struct{}{}
						for dbUID, _ := range synchronousStandbys {
							if removedCount >= rc {
								break
							}
							log.Info("removing synchronous standby in excess", zap.String("masterDB", masterDB.UID), zap.String("db", dbUID))
							toRemove[dbUID] = struct{}{}
							removedCount++
						}
						for dbUID, _ := range toRemove {
							delete(synchronousStandbys, dbUID)
						}
					}

					// try to add missing standbys up to *cd.Cluster.DefSpec().MaxSynchronousStandbys
					bestStandbys := s.findBestStandbys(newcd, curMasterDB)
					ac := int(*cd.Cluster.DefSpec().MaxSynchronousStandbys) - len(synchronousStandbys)
					addedCount := 0
					for _, bestStandby := range bestStandbys {
						if addedCount >= ac {
							break
						}
						if _, ok := synchronousStandbys[bestStandby.UID]; ok {
							continue
						}
						log.Info("adding synchronous standby", zap.String("masterDB", masterDB.UID), zap.String("synchronousStandbyDB", bestStandby.UID))
						synchronousStandbys[bestStandby.UID] = struct{}{}
						addedCount++
					}

					// If there're not enough real synchronous standbys add a fake synchronous standby because we have to be strict and make the master block transactions until MaxSynchronousStandbys real standbys are available
					if len(synchronousStandbys) < int(*cd.Cluster.DefSpec().MinSynchronousStandbys) {
						log.Info("using a fake synchronous standby since there are not enough real standbys available", zap.String("masterDB", masterDB.UID), zap.Int("required", int(*cd.Cluster.DefSpec().MinSynchronousStandbys)))
						synchronousStandbys[fakeStandbyName] = struct{}{}
					}

					masterDB.Spec.SynchronousStandbys = []string{}
					for dbUID, _ := range synchronousStandbys {
						masterDB.Spec.SynchronousStandbys = append(masterDB.Spec.SynchronousStandbys, dbUID)
					}

					// Sort synchronousStandbys so we can compare the slice regardless of its order
					sort.Sort(sort.StringSlice(masterDB.Spec.SynchronousStandbys))
				}

				// NotFailed != Good since there can be some dbs that are converging
				// it's the total number of standbys - the failed standbys
				// or the sum of good + converging standbys
				notFailedStandbysCount := goodStandbysCount + convergingStandbysCount

				// Remove dbs in excess if we have a good number >= MaxStandbysPerSender
				if uint16(goodStandbysCount) >= *cd.Cluster.DefSpec().MaxStandbysPerSender {
					toRemove := []*cluster.DB{}
					// Remove all non good standbys
					for _, db := range newcd.DBs {
						if s.dbType(newcd, db.UID) != dbTypeStandby {
							continue
						}
						if _, ok := goodStandbys[db.UID]; !ok {
							log.Info("removing non good standby", zap.String("db", db.UID))
							toRemove = append(toRemove, db)
						}
					}
					// Remove good standbys in excess
					nr := int(uint16(goodStandbysCount) - *cd.Cluster.DefSpec().MaxStandbysPerSender)
					i := 0
					for _, db := range goodStandbys {
						if i >= nr {
							break
						}
						// Don't remove standbys marked as synchronous standbys
						if util.StringInSlice(masterDB.Spec.SynchronousStandbys, db.UID) {
							continue
						}
						log.Info("removing good standby in excess", zap.String("db", db.UID))
						toRemove = append(toRemove, db)
						i++
					}
					for _, db := range toRemove {
						delete(newcd.DBs, db.UID)
					}

				} else {
					// Add new dbs to substitute failed dbs. we
					// don't remove failed db until the number of
					// good db is >= MaxStandbysPerSender since they can come back

					// define, if there're available keepers, new dbs
					// nc can be negative if MaxStandbysPerSender has been lowered
					nc := int(*cd.Cluster.DefSpec().MaxStandbysPerSender - uint16(notFailedStandbysCount))
					// Add missing DBs until MaxStandbysPerSender
					freeKeepers := s.freeKeepers(newcd)
					nf := len(freeKeepers)
					for i := 0; i < nc && i < nf; i++ {
						freeKeeper := freeKeepers[i]
						db := &cluster.DB{
							UID:        s.UIDFn(),
							Generation: cluster.InitialGeneration,
							ChangeTime: time.Now(),
							Spec: &cluster.DBSpec{
								KeeperUID:    freeKeeper.UID,
								InitMode:     cluster.DBInitModeResync,
								Role:         common.RoleStandby,
								Followers:    []string{},
								FollowConfig: &cluster.FollowConfig{Type: cluster.FollowTypeInternal, DBUID: wantedMasterDBUID},
							},
						}
						newcd.DBs[db.UID] = db
						log.Info("added new standby db", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
					}
				}

				// Reconfigure all standbys as followers of the current master
				for _, db := range newcd.DBs {
					if s.dbType(newcd, db.UID) != dbTypeStandby {
						continue
					}

					db.Spec.Role = common.RoleStandby
					// Remove followers
					db.Spec.Followers = []string{}
					db.Spec.FollowConfig = &cluster.FollowConfig{Type: cluster.FollowTypeInternal, DBUID: wantedMasterDBUID}
				}

				// Set followers for master DB
				masterDB.Spec.Followers = []string{}
				for _, db := range newcd.DBs {
					if masterDB.UID == db.UID {
						continue
					}
					fc := db.Spec.FollowConfig
					if fc != nil {
						if fc.Type == cluster.FollowTypeInternal && fc.DBUID == wantedMasterDBUID {
							masterDB.Spec.Followers = append(masterDB.Spec.Followers, db.UID)
						}
					}
				}
				// Sort followers so the slice won't be considered changed due to different order of the same entries.
				sort.Strings(masterDB.Spec.Followers)
			}
		}

	default:
		return nil, fmt.Errorf("unknown cluster phase %s", cd.Cluster.Status.Phase)
	}

	// Copy the clusterSpec parameters to the dbSpec
	s.setDBSpecFromClusterSpec(newcd)

	// Update generation on DBs if they have changed
	for dbUID, db := range newcd.DBs {
		prevDB, ok := cd.DBs[dbUID]
		if !ok {
			continue
		}
		if !reflect.DeepEqual(db.Spec, prevDB.Spec) {
			log.Debug("db spec changed, updating generation", zap.String("prevDB", spew.Sdump(prevDB.Spec)), zap.String("db", spew.Sdump(db.Spec)))
			db.Generation++
			db.ChangeTime = time.Now()
		}
	}

	return newcd, nil
}