func (f *ClusterFixer) FixController(instances []*discoverd.Instance, startScheduler bool) error { f.l.Info("found controller instance, checking critical formations") inst := instances[0] client, err := controller.NewClient("http://"+inst.Addr, inst.Meta["AUTH_KEY"]) if err != nil { return fmt.Errorf("unexpected error creating controller client: %s", err) } // check that formations for critical components are expected apps := []string{"controller", "router", "discoverd", "flannel", "postgres"} changes := make(map[string]*ct.Formation, len(apps)) var controllerFormation *ct.Formation for _, app := range apps { release, err := client.GetAppRelease(app) if err != nil { return fmt.Errorf("error getting %s release: %s", app, err) } formation, err := client.GetFormation(app, release.ID) if err != nil { // TODO: handle ErrNotFound return fmt.Errorf("error getting %s formation: %s", app, err) } if app == "controller" { controllerFormation = formation } for typ := range release.Processes { var want int if app == "postgres" && typ == "postgres" && len(f.hosts) > 1 && formation.Processes[typ] < 3 { want = 3 } else if formation.Processes[typ] < 1 { want = 1 } if want > 0 { f.l.Info("found broken formation", "app", app, "process", typ) if _, ok := changes[app]; !ok { if formation.Processes == nil { formation.Processes = make(map[string]int) } changes[app] = formation } changes[app].Processes[typ] = want } } } for app, formation := range changes { f.l.Info("fixing broken formation", "app", app) if err := client.PutFormation(formation); err != nil { return fmt.Errorf("error putting %s formation: %s", app, err) } } if startScheduler { if err := f.StartScheduler(client, controllerFormation); err != nil { return err } } return nil }
// CheckScale examines sirenia cluster formation to check if cluster // has been scaled up yet. // Returns true if scaled, false if not. func CheckScale(app, controllerKey, procName string, logger log15.Logger) (bool, error) { logger = logger.New("fn", "CheckScale") // Connect to controller. logger.Info("connecting to controller") client, err := controller.NewClient("", controllerKey) if err != nil { logger.Error("controller client error", "err", err) return false, err } // Retrieve app release. logger.Info("retrieving app release", "app", app) release, err := client.GetAppRelease(app) if err == controller.ErrNotFound { logger.Error("release not found", "app", app) return false, err } else if err != nil { logger.Error("get release error", "app", app, "err", err) return false, err } // Retrieve current formation. logger.Info("retrieving formation", "app", app, "release_id", release.ID) formation, err := client.GetFormation(app, release.ID) if err == controller.ErrNotFound { logger.Error("formation not found", "app", app, "release_id", release.ID) return false, err } else if err != nil { logger.Error("formation error", "app", app, "release_id", release.ID, "err", err) return false, err } // Database hasn't been scaled up yet if formation.Processes[procName] == 0 { return false, nil } return true, nil }
// ScaleUp scales up a dormant Sirenia cluster func ScaleUp(app, controllerKey, serviceAddr, procName, singleton string, logger log15.Logger) error { logger = logger.New("fn", "ScaleUp") sc := sirenia.NewClient(serviceAddr) logger.Info("checking status", "host", serviceAddr) if status, err := sc.Status(); err == nil && status.Database != nil && status.Database.ReadWrite { logger.Info("database is up, skipping scale") // Skip the rest, the database is already available return nil } else if err != nil { logger.Info("error checking status", "err", err) } else { logger.Info("got status, but database is not read-write") } // Connect to controller. logger.Info("connecting to controller") client, err := controller.NewClient("", controllerKey) if err != nil { logger.Error("controller client error", "err", err) return err } // Retrieve the app release. logger.Info("retrieving app release", "app", app) release, err := client.GetAppRelease(app) if err == controller.ErrNotFound { logger.Error("release not found", "app", app) return errors.New("release not found") } else if err != nil { logger.Error("get release error", "app", app, "err", err) return err } // Retrieve current formation. logger.Info("retrieving formation", "app", app, "release_id", release.ID) formation, err := client.GetFormation(app, release.ID) if err == controller.ErrNotFound { logger.Error("formation not found", "app", app, "release_id", release.ID) return errors.New("formation not found") } else if err != nil { logger.Error("formation error", "app", app, "release_id", release.ID, "err", err) return err } // If database is running then exit. if formation.Processes[procName] > 0 { logger.Info("database is running, scaling not necessary") return nil } // Copy processes and increase database processes. processes := make(map[string]int, len(formation.Processes)) for k, v := range formation.Processes { processes[k] = v } if singleton == "true" { processes[procName] = 1 } else { processes[procName] = 3 } // Update formation. logger.Info("updating formation", "app", app, "release_id", release.ID) formation.Processes = processes if err := client.PutFormation(formation); err != nil { logger.Error("put formation error", "app", app, "release_id", release.ID, "err", err) return err } if err := sc.WaitForReadWrite(5 * time.Minute); err != nil { logger.Error("wait for read write", "err", err) return errors.New("timed out while starting sirenia cluster") } logger.Info("scaling complete") return nil }
func (a *API) scaleUp() error { a.mtx.Lock() defer a.mtx.Unlock() // Ignore if already scaled up. if a.scaledUp { return nil } app := os.Getenv("FLYNN_APP_ID") logger := a.logger().New("fn", "scaleUp") sc := sirenia.NewClient(serviceHost + ":3306") logger.Info("checking status", "host", serviceHost) if status, err := sc.Status(); err == nil && status.Database != nil && status.Database.ReadWrite { logger.Info("database is up, skipping scale") // Skip the rest, the database is already available a.scaledUp = true return nil } else if err != nil { logger.Info("error checking status", "err", err) } else { logger.Info("got status, but database is not read-write") } // Connect to controller. logger.Info("connecting to controller") client, err := controller.NewClient("", os.Getenv("CONTROLLER_KEY")) if err != nil { logger.Error("controller client error", "err", err) return err } // Retrieve mariadb release. logger.Info("retrieving app release", "app", app) release, err := client.GetAppRelease(app) if err == controller.ErrNotFound { logger.Error("release not found", "app", app) return errors.New("mariadb release not found") } else if err != nil { logger.Error("get release error", "app", app, "err", err) return err } // Retrieve current formation. logger.Info("retrieving formation", "app", app, "release_id", release.ID) formation, err := client.GetFormation(app, release.ID) if err == controller.ErrNotFound { logger.Error("formation not found", "app", app, "release_id", release.ID) return errors.New("mariadb formation not found") } else if err != nil { logger.Error("formation error", "app", app, "release_id", release.ID, "err", err) return err } // If mariadb is running then exit. if formation.Processes["mariadb"] > 0 { logger.Info("database is running, scaling not necessary") return nil } // Copy processes and increase database processes. processes := make(map[string]int, len(formation.Processes)) for k, v := range formation.Processes { processes[k] = v } if os.Getenv("SINGLETON") == "true" { processes["mariadb"] = 1 } else { processes["mariadb"] = 3 } // Update formation. logger.Info("updating formation", "app", app, "release_id", release.ID) formation.Processes = processes if err := client.PutFormation(formation); err != nil { logger.Error("put formation error", "app", app, "release_id", release.ID, "err", err) return err } if err := sc.WaitForReadWrite(5 * time.Minute); err != nil { logger.Error("wait for read write", "err", err) return errors.New("timed out while starting mariadb cluster") } logger.Info("scaling complete") // Mark as successfully scaled up. a.scaledUp = true return nil }
func (a *API) ping(ctx context.Context, w http.ResponseWriter, req *http.Request) { app := os.Getenv("FLYNN_APP_ID") logger := a.logger().New("fn", "ping") logger.Info("checking status", "host", serviceHost) if status, err := sirenia.NewClient(serviceHost + ":3306").Status(); err == nil && status.Database != nil && status.Database.ReadWrite { logger.Info("database is up, skipping scale check") } else { // Connect to controller. logger.Info("connecting to controller") client, err := controller.NewClient("", os.Getenv("CONTROLLER_KEY")) if err != nil { logger.Error("controller client error", "err", err) httphelper.Error(w, err) return } // Retrieve mariadb release. logger.Info("retrieving app release", "app", app) release, err := client.GetAppRelease(app) if err == controller.ErrNotFound { logger.Error("release not found", "app", app) httphelper.Error(w, err) return } else if err != nil { logger.Error("get release error", "app", app, "err", err) httphelper.Error(w, err) return } // Retrieve current formation. logger.Info("retrieving formation", "app", app, "release_id", release.ID) formation, err := client.GetFormation(app, release.ID) if err == controller.ErrNotFound { logger.Error("formation not found", "app", app, "release_id", release.ID) httphelper.Error(w, err) return } else if err != nil { logger.Error("formation error", "app", app, "release_id", release.ID, "err", err) httphelper.Error(w, err) return } // MariaDB isn't running, just return healthy if formation.Processes["mariadb"] == 0 { w.WriteHeader(200) return } } db, err := a.connect() if err != nil { httphelper.Error(w, err) return } defer db.Close() if _, err := db.Exec("SELECT 1"); err != nil { httphelper.Error(w, err) return } w.WriteHeader(200) }
func runClusterBackup(args *docopt.Args) error { client, err := getClusterClient() if err != nil { return err } var bar *pb.ProgressBar if term.IsTerminal(os.Stderr.Fd()) { bar = pb.New(0) bar.SetUnits(pb.U_BYTES) bar.ShowBar = false bar.ShowSpeed = true bar.Output = os.Stderr bar.Start() } var dest io.Writer = os.Stdout if filename := args.String["--file"]; filename != "" { f, err := os.Create(filename) if err != nil { return err } defer f.Close() dest = f } fmt.Fprintln(os.Stderr, "Creating cluster backup...") tw := NewTarWriter("flynn-backup-"+time.Now().UTC().Format("2006-01-02_150405"), dest) defer tw.Close() // get app and release details for key apps data := make(map[string]*ct.ExpandedFormation, 4) for _, name := range []string{"postgres", "discoverd", "flannel", "controller"} { app, err := client.GetApp(name) if err != nil { return fmt.Errorf("error getting %s app details: %s", name, err) } release, err := client.GetAppRelease(app.ID) if err != nil { return fmt.Errorf("error getting %s app release: %s", name, err) } formation, err := client.GetFormation(app.ID, release.ID) if err != nil { return fmt.Errorf("error getting %s app formation: %s", name, err) } artifact, err := client.GetArtifact(release.ArtifactID) if err != nil { return fmt.Errorf("error getting %s app artifact: %s", name, err) } data[name] = &ct.ExpandedFormation{ App: app, Release: release, Artifact: artifact, Processes: formation.Processes, } } if err := tw.WriteJSON("flynn.json", data); err != nil { return err } config := &runConfig{ App: "postgres", Release: data["postgres"].Release.ID, Entrypoint: []string{"sh"}, Args: []string{"-c", "pg_dumpall --clean --if-exists | gzip -9"}, Env: map[string]string{ "PGHOST": "leader.postgres.discoverd", "PGUSER": "******", "PGPASSWORD": data["postgres"].Release.Env["PGPASSWORD"], }, DisableLog: true, } if err := tw.WriteCommandOutput(client, "postgres.sql.gz", config, bar); err != nil { return fmt.Errorf("error dumping database: %s", err) } if bar != nil { bar.Finish() } fmt.Fprintln(os.Stderr, "Backup complete.") return nil }