Esempio n. 1
0
func (f *ClusterFixer) FixController(instances []*discoverd.Instance, startScheduler bool) error {
	f.l.Info("found controller instance, checking critical formations")
	inst := instances[0]
	client, err := controller.NewClient("http://"+inst.Addr, inst.Meta["AUTH_KEY"])
	if err != nil {
		return fmt.Errorf("unexpected error creating controller client: %s", err)
	}

	// check that formations for critical components are expected
	apps := []string{"controller", "router", "discoverd", "flannel", "postgres"}
	changes := make(map[string]*ct.Formation, len(apps))
	var controllerFormation *ct.Formation
	for _, app := range apps {
		release, err := client.GetAppRelease(app)
		if err != nil {
			return fmt.Errorf("error getting %s release: %s", app, err)
		}
		formation, err := client.GetFormation(app, release.ID)
		if err != nil {
			// TODO: handle ErrNotFound
			return fmt.Errorf("error getting %s formation: %s", app, err)
		}
		if app == "controller" {
			controllerFormation = formation
		}
		for typ := range release.Processes {
			var want int
			if app == "postgres" && typ == "postgres" && len(f.hosts) > 1 && formation.Processes[typ] < 3 {
				want = 3
			} else if formation.Processes[typ] < 1 {
				want = 1
			}
			if want > 0 {
				f.l.Info("found broken formation", "app", app, "process", typ)
				if _, ok := changes[app]; !ok {
					if formation.Processes == nil {
						formation.Processes = make(map[string]int)
					}
					changes[app] = formation
				}
				changes[app].Processes[typ] = want
			}
		}
	}

	for app, formation := range changes {
		f.l.Info("fixing broken formation", "app", app)
		if err := client.PutFormation(formation); err != nil {
			return fmt.Errorf("error putting %s formation: %s", app, err)
		}
	}

	if startScheduler {
		if err := f.StartScheduler(client, controllerFormation); err != nil {
			return err
		}
	}
	return nil
}
Esempio n. 2
0
// CheckScale examines sirenia cluster formation to check if cluster
// has been scaled up yet.
// Returns true if scaled, false if not.
func CheckScale(app, controllerKey, procName string, logger log15.Logger) (bool, error) {
	logger = logger.New("fn", "CheckScale")
	// Connect to controller.
	logger.Info("connecting to controller")
	client, err := controller.NewClient("", controllerKey)
	if err != nil {
		logger.Error("controller client error", "err", err)
		return false, err
	}

	// Retrieve app release.
	logger.Info("retrieving app release", "app", app)
	release, err := client.GetAppRelease(app)
	if err == controller.ErrNotFound {
		logger.Error("release not found", "app", app)
		return false, err
	} else if err != nil {
		logger.Error("get release error", "app", app, "err", err)
		return false, err
	}

	// Retrieve current formation.
	logger.Info("retrieving formation", "app", app, "release_id", release.ID)
	formation, err := client.GetFormation(app, release.ID)
	if err == controller.ErrNotFound {
		logger.Error("formation not found", "app", app, "release_id", release.ID)
		return false, err
	} else if err != nil {
		logger.Error("formation error", "app", app, "release_id", release.ID, "err", err)
		return false, err
	}

	// Database hasn't been scaled up yet
	if formation.Processes[procName] == 0 {
		return false, nil
	}

	return true, nil

}
Esempio n. 3
0
// ScaleUp scales up a dormant Sirenia cluster
func ScaleUp(app, controllerKey, serviceAddr, procName, singleton string, logger log15.Logger) error {
	logger = logger.New("fn", "ScaleUp")
	sc := sirenia.NewClient(serviceAddr)

	logger.Info("checking status", "host", serviceAddr)
	if status, err := sc.Status(); err == nil && status.Database != nil && status.Database.ReadWrite {
		logger.Info("database is up, skipping scale")
		// Skip the rest, the database is already available
		return nil
	} else if err != nil {
		logger.Info("error checking status", "err", err)
	} else {
		logger.Info("got status, but database is not read-write")
	}

	// Connect to controller.
	logger.Info("connecting to controller")
	client, err := controller.NewClient("", controllerKey)
	if err != nil {
		logger.Error("controller client error", "err", err)
		return err
	}

	// Retrieve the app release.
	logger.Info("retrieving app release", "app", app)
	release, err := client.GetAppRelease(app)
	if err == controller.ErrNotFound {
		logger.Error("release not found", "app", app)
		return errors.New("release not found")
	} else if err != nil {
		logger.Error("get release error", "app", app, "err", err)
		return err
	}

	// Retrieve current formation.
	logger.Info("retrieving formation", "app", app, "release_id", release.ID)
	formation, err := client.GetFormation(app, release.ID)
	if err == controller.ErrNotFound {
		logger.Error("formation not found", "app", app, "release_id", release.ID)
		return errors.New("formation not found")
	} else if err != nil {
		logger.Error("formation error", "app", app, "release_id", release.ID, "err", err)
		return err
	}

	// If database is running then exit.
	if formation.Processes[procName] > 0 {
		logger.Info("database is running, scaling not necessary")
		return nil
	}

	// Copy processes and increase database processes.
	processes := make(map[string]int, len(formation.Processes))
	for k, v := range formation.Processes {
		processes[k] = v
	}

	if singleton == "true" {
		processes[procName] = 1
	} else {
		processes[procName] = 3
	}

	// Update formation.
	logger.Info("updating formation", "app", app, "release_id", release.ID)
	formation.Processes = processes
	if err := client.PutFormation(formation); err != nil {
		logger.Error("put formation error", "app", app, "release_id", release.ID, "err", err)
		return err
	}

	if err := sc.WaitForReadWrite(5 * time.Minute); err != nil {
		logger.Error("wait for read write", "err", err)
		return errors.New("timed out while starting sirenia cluster")
	}

	logger.Info("scaling complete")
	return nil
}
Esempio n. 4
0
func (a *API) scaleUp() error {
	a.mtx.Lock()
	defer a.mtx.Unlock()

	// Ignore if already scaled up.
	if a.scaledUp {
		return nil
	}

	app := os.Getenv("FLYNN_APP_ID")
	logger := a.logger().New("fn", "scaleUp")
	sc := sirenia.NewClient(serviceHost + ":3306")

	logger.Info("checking status", "host", serviceHost)
	if status, err := sc.Status(); err == nil && status.Database != nil && status.Database.ReadWrite {
		logger.Info("database is up, skipping scale")
		// Skip the rest, the database is already available
		a.scaledUp = true
		return nil
	} else if err != nil {
		logger.Info("error checking status", "err", err)
	} else {
		logger.Info("got status, but database is not read-write")
	}

	// Connect to controller.
	logger.Info("connecting to controller")
	client, err := controller.NewClient("", os.Getenv("CONTROLLER_KEY"))
	if err != nil {
		logger.Error("controller client error", "err", err)
		return err
	}

	// Retrieve mariadb release.
	logger.Info("retrieving app release", "app", app)
	release, err := client.GetAppRelease(app)
	if err == controller.ErrNotFound {
		logger.Error("release not found", "app", app)
		return errors.New("mariadb release not found")
	} else if err != nil {
		logger.Error("get release error", "app", app, "err", err)
		return err
	}

	// Retrieve current formation.
	logger.Info("retrieving formation", "app", app, "release_id", release.ID)
	formation, err := client.GetFormation(app, release.ID)
	if err == controller.ErrNotFound {
		logger.Error("formation not found", "app", app, "release_id", release.ID)
		return errors.New("mariadb formation not found")
	} else if err != nil {
		logger.Error("formation error", "app", app, "release_id", release.ID, "err", err)
		return err
	}

	// If mariadb is running then exit.
	if formation.Processes["mariadb"] > 0 {
		logger.Info("database is running, scaling not necessary")
		return nil
	}

	// Copy processes and increase database processes.
	processes := make(map[string]int, len(formation.Processes))
	for k, v := range formation.Processes {
		processes[k] = v
	}

	if os.Getenv("SINGLETON") == "true" {
		processes["mariadb"] = 1
	} else {
		processes["mariadb"] = 3
	}

	// Update formation.
	logger.Info("updating formation", "app", app, "release_id", release.ID)
	formation.Processes = processes
	if err := client.PutFormation(formation); err != nil {
		logger.Error("put formation error", "app", app, "release_id", release.ID, "err", err)
		return err
	}

	if err := sc.WaitForReadWrite(5 * time.Minute); err != nil {
		logger.Error("wait for read write", "err", err)
		return errors.New("timed out while starting mariadb cluster")
	}

	logger.Info("scaling complete")

	// Mark as successfully scaled up.
	a.scaledUp = true

	return nil
}
Esempio n. 5
0
func (a *API) ping(ctx context.Context, w http.ResponseWriter, req *http.Request) {
	app := os.Getenv("FLYNN_APP_ID")
	logger := a.logger().New("fn", "ping")

	logger.Info("checking status", "host", serviceHost)
	if status, err := sirenia.NewClient(serviceHost + ":3306").Status(); err == nil && status.Database != nil && status.Database.ReadWrite {
		logger.Info("database is up, skipping scale check")
	} else {
		// Connect to controller.
		logger.Info("connecting to controller")
		client, err := controller.NewClient("", os.Getenv("CONTROLLER_KEY"))
		if err != nil {
			logger.Error("controller client error", "err", err)
			httphelper.Error(w, err)
			return
		}

		// Retrieve mariadb release.
		logger.Info("retrieving app release", "app", app)
		release, err := client.GetAppRelease(app)
		if err == controller.ErrNotFound {
			logger.Error("release not found", "app", app)
			httphelper.Error(w, err)
			return
		} else if err != nil {
			logger.Error("get release error", "app", app, "err", err)
			httphelper.Error(w, err)
			return
		}

		// Retrieve current formation.
		logger.Info("retrieving formation", "app", app, "release_id", release.ID)
		formation, err := client.GetFormation(app, release.ID)
		if err == controller.ErrNotFound {
			logger.Error("formation not found", "app", app, "release_id", release.ID)
			httphelper.Error(w, err)
			return
		} else if err != nil {
			logger.Error("formation error", "app", app, "release_id", release.ID, "err", err)
			httphelper.Error(w, err)
			return
		}

		// MariaDB isn't running, just return healthy
		if formation.Processes["mariadb"] == 0 {
			w.WriteHeader(200)
			return
		}
	}

	db, err := a.connect()
	if err != nil {
		httphelper.Error(w, err)
		return
	}
	defer db.Close()

	if _, err := db.Exec("SELECT 1"); err != nil {
		httphelper.Error(w, err)
		return
	}
	w.WriteHeader(200)
}
Esempio n. 6
0
func runClusterBackup(args *docopt.Args) error {
	client, err := getClusterClient()
	if err != nil {
		return err
	}

	var bar *pb.ProgressBar
	if term.IsTerminal(os.Stderr.Fd()) {
		bar = pb.New(0)
		bar.SetUnits(pb.U_BYTES)
		bar.ShowBar = false
		bar.ShowSpeed = true
		bar.Output = os.Stderr
		bar.Start()
	}

	var dest io.Writer = os.Stdout
	if filename := args.String["--file"]; filename != "" {
		f, err := os.Create(filename)
		if err != nil {
			return err
		}
		defer f.Close()
		dest = f
	}

	fmt.Fprintln(os.Stderr, "Creating cluster backup...")

	tw := NewTarWriter("flynn-backup-"+time.Now().UTC().Format("2006-01-02_150405"), dest)
	defer tw.Close()

	// get app and release details for key apps
	data := make(map[string]*ct.ExpandedFormation, 4)
	for _, name := range []string{"postgres", "discoverd", "flannel", "controller"} {
		app, err := client.GetApp(name)
		if err != nil {
			return fmt.Errorf("error getting %s app details: %s", name, err)
		}
		release, err := client.GetAppRelease(app.ID)
		if err != nil {
			return fmt.Errorf("error getting %s app release: %s", name, err)
		}
		formation, err := client.GetFormation(app.ID, release.ID)
		if err != nil {
			return fmt.Errorf("error getting %s app formation: %s", name, err)
		}
		artifact, err := client.GetArtifact(release.ArtifactID)
		if err != nil {
			return fmt.Errorf("error getting %s app artifact: %s", name, err)
		}
		data[name] = &ct.ExpandedFormation{
			App:       app,
			Release:   release,
			Artifact:  artifact,
			Processes: formation.Processes,
		}
	}
	if err := tw.WriteJSON("flynn.json", data); err != nil {
		return err
	}

	config := &runConfig{
		App:        "postgres",
		Release:    data["postgres"].Release.ID,
		Entrypoint: []string{"sh"},
		Args:       []string{"-c", "pg_dumpall --clean --if-exists | gzip -9"},
		Env: map[string]string{
			"PGHOST":     "leader.postgres.discoverd",
			"PGUSER":     "******",
			"PGPASSWORD": data["postgres"].Release.Env["PGPASSWORD"],
		},
		DisableLog: true,
	}
	if err := tw.WriteCommandOutput(client, "postgres.sql.gz", config, bar); err != nil {
		return fmt.Errorf("error dumping database: %s", err)
	}

	if bar != nil {
		bar.Finish()
	}
	fmt.Fprintln(os.Stderr, "Backup complete.")

	return nil
}