func (s *Sentinel) isDBHealthy(cd *cluster.ClusterData, db *cluster.DB) bool { t, ok := s.dbErrorTimers[db.UID] if !ok { return true } if timer.Since(t) > cd.Cluster.Spec.FailInterval.Duration { return false } return true }
func (s *Sentinel) isKeeperHealthy(cd *cluster.ClusterData, keeper *cluster.Keeper) bool { t, ok := s.keeperErrorTimers[keeper.UID] if !ok { return true } if timer.Since(t) > cd.Cluster.Spec.FailInterval.Duration { return false } return true }
func (s *Sentinel) dbConvergenceState(db *cluster.DB, timeout time.Duration) ConvergenceState { if db.Status.CurrentGeneration == db.Generation { return Converged } if timeout != 0 { d, ok := s.dbConvergenceInfos[db.UID] if !ok { panic(fmt.Errorf("no db convergence info for db %q, this shouldn't happen!", db.UID)) } if timer.Since(d.Timer) > timeout { return ConvergenceFailed } } return Converging }
func (s *Sentinel) updateKeepersStatus(cd *cluster.ClusterData, keepersInfo cluster.KeepersInfo, firstRun bool) (*cluster.ClusterData, KeeperInfoHistories) { // Create a copy of cd cd = cd.DeepCopy() kihs := s.keeperInfoHistories.DeepCopy() // Remove keepers with wrong cluster UID tmpKeepersInfo := keepersInfo.DeepCopy() for _, ki := range keepersInfo { if ki.ClusterUID != cd.Cluster.UID { delete(tmpKeepersInfo, ki.UID) } } keepersInfo = tmpKeepersInfo // On first run just insert keepers info in the history with Seen set // to false and don't do any change to the keepers' state if firstRun { for keeperUID, ki := range keepersInfo { kihs[keeperUID] = &KeeperInfoHistory{KeeperInfo: ki, Seen: false} } return cd, kihs } tmpKeepersInfo = keepersInfo.DeepCopy() // keep only updated keepers info for keeperUID, ki := range keepersInfo { if kih, ok := kihs[keeperUID]; ok { log.Debug("kih", zap.Object("kih", kih)) if kih.KeeperInfo.InfoUID == ki.InfoUID { if !kih.Seen { //Remove since it was already there and wasn't updated delete(tmpKeepersInfo, ki.UID) } else if kih.Seen && timer.Since(kih.Timer) > s.sleepInterval { //Remove since it wasn't updated delete(tmpKeepersInfo, ki.UID) } } if kih.KeeperInfo.InfoUID != ki.InfoUID { kihs[keeperUID] = &KeeperInfoHistory{KeeperInfo: ki, Seen: true, Timer: timer.Now()} } } else { kihs[keeperUID] = &KeeperInfoHistory{KeeperInfo: ki, Seen: true, Timer: timer.Now()} } } keepersInfo = tmpKeepersInfo // Create new keepers from keepersInfo for keeperUID, ki := range keepersInfo { if _, ok := cd.Keepers[keeperUID]; !ok { k := cluster.NewKeeperFromKeeperInfo(ki) cd.Keepers[k.UID] = k } } // Mark keepers without a keeperInfo (cleaned up above from not updated // ones) as in error for keeperUID, _ := range cd.Keepers { if _, ok := keepersInfo[keeperUID]; !ok { s.SetKeeperError(keeperUID) } else { s.CleanKeeperError(keeperUID) } } // Update keepers' healthy states for _, k := range cd.Keepers { k.Status.Healthy = s.isKeeperHealthy(cd, k) } // Update dbs' states for _, db := range cd.DBs { // Mark not found DBs in DBstates in error k, ok := keepersInfo[db.Spec.KeeperUID] if !ok { log.Error("no keeper info available", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) s.SetDBError(db.UID) continue } dbs := k.PostgresState if dbs == nil { log.Error("no db state available", zap.String("db", db.UID)) s.SetDBError(db.UID) continue } if dbs.UID != db.UID { log.Warn("received db state for unexpected db uid", zap.String("receivedDB", dbs.UID), zap.String("db", db.UID)) s.SetDBError(db.UID) continue } log.Debug("received db state", zap.String("db", db.UID)) db.Status.ListenAddress = dbs.ListenAddress db.Status.Port = dbs.Port db.Status.CurrentGeneration = dbs.Generation if dbs.Healthy { s.CleanDBError(db.UID) db.Status.SystemID = dbs.SystemID db.Status.TimelineID = dbs.TimelineID db.Status.XLogPos = dbs.XLogPos db.Status.TimelinesHistory = dbs.TimelinesHistory db.Status.PGParameters = cluster.PGParameters(dbs.PGParameters) } else { s.SetDBError(db.UID) } } // Update dbs' healthy state for _, db := range cd.DBs { db.Status.Healthy = s.isDBHealthy(cd, db) } return cd, kihs }