func (m *Monitor) repairCluster() error { log := monitorLogger.New("fn", "repairCluster") log.Info("initiating cluster repair") hosts, err := m.c.Hosts() if err != nil { return err } f := fixer.NewClusterFixer(hosts, m.c, log) // killing the schedulers to prevent interference f.KillSchedulers() // ensure postgres is working f.FixPostgres() // ensure controller api is working controllerService := discoverd.NewService("controller") controllerInstances, _ := controllerService.Instances() if len(controllerInstances) == 0 { controllerInstances, err = f.StartAppJob("controller", "web", "controller") if err != nil { return err } } // fix any formations and start the scheduler again if err := f.FixController(controllerInstances, true); err != nil { return err } // zero out the deadline timer m.deadline = time.Time{} return nil }
func (m *Monitor) repairCluster() error { log := monitorLogger.New("fn", "repairCluster") log.Info("initiating cluster repair") hosts, err := m.c.Hosts() if err != nil { return err } f := fixer.NewClusterFixer(hosts, m.c, log) // killing the schedulers to prevent interference f.KillSchedulers() log.Info("checking status of sirenia databases") for _, db := range []string{"postgres", "mariadb"} { log.Info("checking for database state", "db", db) if _, err := discoverd.NewService(db).GetMeta(); err != nil { if discoverd.IsNotFound(err) { log.Info("skipping recovery of db, no state in discoverd", "db", db) continue } log.Error("error checking database state", "db", db) return err } if err := f.CheckSirenia(db); err != nil { if err := f.FixSirenia(db); err != nil { if db == "postgres" { return err } else { log.Error("failed database recovery", "db", db) } } } } // ensure controller api is working controllerService := discoverd.NewService("controller") controllerInstances, _ := controllerService.Instances() if len(controllerInstances) == 0 { controllerInstances, err = f.StartAppJob("controller", "web", "controller") if err != nil { return err } } // fix any formations and start the scheduler again if err := f.FixController(controllerInstances, true); err != nil { return err } // zero out the deadline timer m.deadline = time.Time{} return nil }