Example #1
0
func (a *API) ping(w http.ResponseWriter, req *http.Request, _ httprouter.Params) {
	logger := a.logger().New("fn", "ping")

	logger.Info("checking status", "host", serviceHost)
	if status, err := sirenia.NewClient(serviceHost + ":3306").Status(); err == nil && status.Database != nil && status.Database.ReadWrite {
		logger.Info("database is up, skipping scale check")
	} else {
		scaled, err := scale.CheckScale(app, controllerKey, "mongodb", a.logger())
		if err != nil {
			httphelper.Error(w, err)
			return
		}

		// Cluster has yet to be scaled, return healthy
		if !scaled {
			w.WriteHeader(200)
			return
		}
	}

	session, err := mgo.DialWithInfo(&mgo.DialInfo{
		Addrs:    []string{net.JoinHostPort(serviceHost, "27017")},
		Username: "******",
		Password: os.Getenv("MONGO_PWD"),
		Database: "admin",
	})
	if err != nil {
		httphelper.Error(w, err)
		return
	}
	defer session.Close()

	w.WriteHeader(200)
}
Example #2
0
func (f *ClusterFixer) CheckSirenia(svc string) error {
	log := f.l.New("fn", "CheckSirenia", "service", svc)
	log.Info("checking sirenia cluster status")
	service := discoverd.NewService(svc)
	leader, _ := service.Leader()
	if leader == nil || leader.Addr == "" {
		log.Info("no running leader")
		leader = nil
	} else {
		log.Info("found running leader")
	}
	instances, _ := service.Instances()
	log.Info("found running instances", "count", len(instances))

	log.Info("getting sirenia status")
	var status *sirenia.Status
	if leader != nil && leader.Addr != "" {
		client := sirenia.NewClient(leader.Addr)
		var err error
		status, err = client.Status()
		if err != nil {
			log.Error("error getting status from leader", "error", err)
		}
	}
	if status != nil && status.Database != nil && status.Database.ReadWrite {
		log.Info("cluster claims to be read-write")
		return nil
	}
	return fmt.Errorf("cluster isn't read-write")
}
Example #3
0
func (a *API) ping(ctx context.Context, w http.ResponseWriter, req *http.Request) {
	logger := a.logger().New("fn", "ping")

	logger.Info("checking status", "host", serviceHost)
	if status, err := sirenia.NewClient(serviceHost + ":3306").Status(); err == nil && status.Database != nil && status.Database.ReadWrite {
		logger.Info("database is up, skipping scale check")
	} else {
		scaled, err := scale.CheckScale(app, controllerKey, "mariadb", a.logger())
		if err != nil {
			httphelper.Error(w, err)
			return
		}

		// Cluster has yet to be scaled, return healthy
		if !scaled {
			w.WriteHeader(200)
			return
		}
	}

	db, err := a.connect()
	if err != nil {
		httphelper.Error(w, err)
		return
	}
	defer db.Close()

	if _, err := db.Exec("SELECT 1"); err != nil {
		httphelper.Error(w, err)
		return
	}
	w.WriteHeader(200)
}
Example #4
0
func testSireniaDeploy(client controller.Client, disc *discoverd.Client, t *c.C, d *sireniaDeploy) {
	// create app
	app := &ct.App{Name: d.name, Strategy: "sirenia"}
	t.Assert(client.CreateApp(app), c.IsNil)

	// copy release from default app
	release, err := client.GetAppRelease(d.db.appName)
	t.Assert(err, c.IsNil)
	release.ID = ""
	release.Env[d.db.hostKey] = fmt.Sprintf("leader.%s.discoverd", d.name)
	release.Env[d.db.serviceKey] = d.name
	procName := release.Env["SIRENIA_PROCESS"]
	proc := release.Processes[procName]
	delete(proc.Env, "SINGLETON")
	proc.Service = d.name
	release.Processes[procName] = proc
	t.Assert(client.CreateRelease(release), c.IsNil)
	t.Assert(client.SetAppRelease(app.ID, release.ID), c.IsNil)
	oldRelease := release.ID

	// create formation
	discEvents := make(chan *discoverd.Event)
	discService := disc.Service(d.name)
	discStream, err := discService.Watch(discEvents)
	t.Assert(err, c.IsNil)
	defer discStream.Close()
	jobEvents := make(chan *ct.Job)
	jobStream, err := client.StreamJobEvents(d.name, jobEvents)
	t.Assert(err, c.IsNil)
	defer jobStream.Close()
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{procName: d.sireniaJobs, "web": d.webJobs},
	}), c.IsNil)

	// watch cluster state changes
	type stateChange struct {
		state *state.State
		err   error
	}
	stateCh := make(chan stateChange)
	go func() {
		for event := range discEvents {
			if event.Kind != discoverd.EventKindServiceMeta {
				continue
			}
			var state state.State
			if err := json.Unmarshal(event.ServiceMeta.Data, &state); err != nil {
				stateCh <- stateChange{err: err}
				return
			}
			primary := ""
			if state.Primary != nil {
				primary = state.Primary.Addr
			}
			sync := ""
			if state.Sync != nil {
				sync = state.Sync.Addr
			}
			var async []string
			for _, a := range state.Async {
				async = append(async, a.Addr)
			}
			debugf(t, "got cluster state: index=%d primary=%s sync=%s async=%s",
				event.ServiceMeta.Index, primary, sync, strings.Join(async, ","))
			stateCh <- stateChange{state: &state}
		}
	}()

	// wait for correct cluster state and number of web processes
	var sireniaState state.State
	var webJobs int
	ready := func() bool {
		if webJobs != d.webJobs {
			return false
		}
		if sireniaState.Primary == nil {
			return false
		}
		if d.sireniaJobs > 1 && sireniaState.Sync == nil {
			return false
		}
		if d.sireniaJobs > 2 && len(sireniaState.Async) != d.sireniaJobs-2 {
			return false
		}
		return true
	}
	for {
		if ready() {
			break
		}
		select {
		case s := <-stateCh:
			t.Assert(s.err, c.IsNil)
			sireniaState = *s.state
		case e, ok := <-jobEvents:
			if !ok {
				t.Fatalf("job event stream closed: %s", jobStream.Err())
			}
			debugf(t, "got job event: %s %s %s", e.Type, e.ID, e.State)
			if e.Type == "web" && e.State == ct.JobStateUp {
				webJobs++
			}
		case <-time.After(30 * time.Second):
			t.Fatal("timed out waiting for formation")
		}
	}

	// wait for the primary to indicate downstream replication sync
	debug(t, "waiting for primary to indicate downstream replication sync")
	sireniaClient := sc.NewClient(sireniaState.Primary.Addr)
	t.Assert(sireniaClient.WaitForReplSync(sireniaState.Sync, 1*time.Minute), c.IsNil)

	// connect to the db so we can test writes
	d.db.initDb(t, release, d)

	// check currently writeable
	d.db.assertWriteable(t, release, d)

	// check a deploy completes with expected cluster state changes
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	newRelease := release.ID
	deployment, err := client.CreateDeployment(app.ID, newRelease)
	t.Assert(err, c.IsNil)
	deployEvents := make(chan *ct.DeploymentEvent)
	deployStream, err := client.StreamDeployment(deployment, deployEvents)
	t.Assert(err, c.IsNil)
	defer deployStream.Close()

	// assertNextState checks that the next state received is in the remaining states
	// that were expected, so handles the fact that some states don't happen, but the
	// states that do happen are expected and in-order.
	assertNextState := func(remaining []expectedSireniaState) int {
		var state state.State
	loop:
		for {
			select {
			case s := <-stateCh:
				t.Assert(s.err, c.IsNil)
				if len(s.state.Async) < d.expectedAsyncs() {
					// we shouldn't usually receive states with less asyncs than
					// expected, but they can occur as an intermediate state between
					// two expected states (e.g. when a sync does a takeover at the
					// same time as a new async is started) so just ignore them.
					debug(t, "ignoring state with too few asyncs")
					continue
				}
				state = *s.state
				break loop
			case <-time.After(60 * time.Second):
				t.Fatal("timed out waiting for cluster state")
			}
		}
		if state.Primary == nil {
			t.Fatal("no primary configured")
		}
		log := func(format string, v ...interface{}) {
			debugf(t, "skipping expected state: %s", fmt.Sprintf(format, v...))
		}
	outer:
		for i, expected := range remaining {
			if state.Primary.Meta["FLYNN_RELEASE_ID"] != expected.Primary {
				log("primary has incorrect release")
				continue
			}
			if state.Sync == nil {
				if expected.Sync == "" {
					return i
				}
				log("state has no sync node")
				continue
			}
			if state.Sync.Meta["FLYNN_RELEASE_ID"] != expected.Sync {
				log("sync has incorrect release")
				continue
			}
			if state.Async == nil {
				if expected.Async == nil {
					return i
				}
				log("state has no async nodes")
				continue
			}
			if len(state.Async) != len(expected.Async) {
				log("expected %d asyncs, got %d", len(expected.Async), len(state.Async))
				continue
			}
			for i, release := range expected.Async {
				if state.Async[i].Meta["FLYNN_RELEASE_ID"] != release {
					log("async[%d] has incorrect release", i)
					continue outer
				}
			}
			return i
		}
		t.Fatal("unexpected state")
		return -1
	}
	expected := d.expected(oldRelease, newRelease)
	var expectedIndex, newWebJobs int
loop:
	for {
		select {
		case e, ok := <-deployEvents:
			if !ok {
				t.Fatal("unexpected close of deployment event stream")
			}
			switch e.Status {
			case "complete":
				break loop
			case "failed":
				t.Fatalf("deployment failed: %s", e.Error)
			}
			debugf(t, "got deployment event: %s %s", e.JobType, e.JobState)
			if e.JobState != ct.JobStateUp && e.JobState != ct.JobStateDown {
				continue
			}
			switch e.JobType {
			case procName:
				// move on if we have seen all the expected events
				if expectedIndex >= len(expected) {
					continue
				}
				skipped := assertNextState(expected[expectedIndex:])
				expectedIndex += 1 + skipped
			case "web":
				if e.JobState == ct.JobStateUp && e.ReleaseID == newRelease {
					newWebJobs++
				}
			}
		case <-time.After(2 * time.Minute):
			t.Fatal("timed out waiting for deployment")
		}
	}

	// check we have the correct number of new web jobs
	t.Assert(newWebJobs, c.Equals, d.webJobs)

	// check writeable now deploy is complete
	d.db.assertWriteable(t, release, d)
}
Example #5
0
// ScaleUp scales up a dormant Sirenia cluster
func ScaleUp(app, controllerKey, serviceAddr, procName, singleton string, logger log15.Logger) error {
	logger = logger.New("fn", "ScaleUp")
	sc := sirenia.NewClient(serviceAddr)

	logger.Info("checking status", "host", serviceAddr)
	if status, err := sc.Status(); err == nil && status.Database != nil && status.Database.ReadWrite {
		logger.Info("database is up, skipping scale")
		// Skip the rest, the database is already available
		return nil
	} else if err != nil {
		logger.Info("error checking status", "err", err)
	} else {
		logger.Info("got status, but database is not read-write")
	}

	// Connect to controller.
	logger.Info("connecting to controller")
	client, err := controller.NewClient("", controllerKey)
	if err != nil {
		logger.Error("controller client error", "err", err)
		return err
	}

	// Retrieve the app release.
	logger.Info("retrieving app release", "app", app)
	release, err := client.GetAppRelease(app)
	if err == controller.ErrNotFound {
		logger.Error("release not found", "app", app)
		return errors.New("release not found")
	} else if err != nil {
		logger.Error("get release error", "app", app, "err", err)
		return err
	}

	// Retrieve current formation.
	logger.Info("retrieving formation", "app", app, "release_id", release.ID)
	formation, err := client.GetFormation(app, release.ID)
	if err == controller.ErrNotFound {
		logger.Error("formation not found", "app", app, "release_id", release.ID)
		return errors.New("formation not found")
	} else if err != nil {
		logger.Error("formation error", "app", app, "release_id", release.ID, "err", err)
		return err
	}

	// If database is running then exit.
	if formation.Processes[procName] > 0 {
		logger.Info("database is running, scaling not necessary")
		return nil
	}

	// Copy processes and increase database processes.
	processes := make(map[string]int, len(formation.Processes))
	for k, v := range formation.Processes {
		processes[k] = v
	}

	if singleton == "true" {
		processes[procName] = 1
	} else {
		processes[procName] = 3
	}

	// Update formation.
	logger.Info("updating formation", "app", app, "release_id", release.ID)
	formation.Processes = processes
	if err := client.PutFormation(formation); err != nil {
		logger.Error("put formation error", "app", app, "release_id", release.ID, "err", err)
		return err
	}

	if err := sc.WaitForReadWrite(5 * time.Minute); err != nil {
		logger.Error("wait for read write", "err", err)
		return errors.New("timed out while starting sirenia cluster")
	}

	logger.Info("scaling complete")
	return nil
}
Example #6
0
func (f *ClusterFixer) FixSirenia(svc string) error {
	log := f.l.New("fn", "FixSirenia", "service", svc)

	service := discoverd.NewService(svc)
	instances, _ := service.Instances()
	leader, _ := service.Leader()

	log.Info("getting service metadata")
	meta, err := service.GetMeta()
	if err != nil {
		return fmt.Errorf("error getting sirenia state from discoverd: %s", err)
	}

	var state state.State
	if err := json.Unmarshal(meta.Data, &state); err != nil {
		return fmt.Errorf("error decoding state: %s", err)
	}
	if state.Primary == nil {
		return fmt.Errorf("no primary in sirenia state")
	}

	log.Info("getting primary job info", "job.id", state.Primary.Meta["FLYNN_JOB_ID"])
	primaryJob, primaryHost, err := f.GetJob(state.Primary.Meta["FLYNN_JOB_ID"])
	if err != nil {
		log.Error("unable to get primary job info")
	}
	var syncJob *host.Job
	var syncHost *cluster.Host
	if state.Sync != nil {
		log.Info("getting sync job info", "job.id", state.Sync.Meta["FLYNN_JOB_ID"])
		syncJob, syncHost, err = f.GetJob(state.Sync.Meta["FLYNN_JOB_ID"])
		if err != nil {
			log.Error("unable to get sync job info")
		}
	}

	waitForInstance := func(jobID string) (func() (string, error), error) {
		watchCh := make(chan *discoverd.Event)
		upCh := make(chan string)
		stream, err := service.Watch(watchCh)
		if err != nil {
			return nil, fmt.Errorf("error watching discoverd service: %s", err)
		}
		go func() {
			var current bool
			for event := range watchCh {
				if event.Kind == discoverd.EventKindCurrent {
					current = true
					continue
				}
				if !current || event.Kind != discoverd.EventKindUp {
					continue
				}
				if event.Instance.Meta["FLYNN_JOB_ID"] == jobID {
					upCh <- event.Instance.Addr
				}
			}
		}()
		return func() (string, error) {
			log.Info("waiting for instance to start", "job.id", jobID)
			defer stream.Close()
			select {
			case addr := <-upCh:
				return addr, nil
			case <-time.After(time.Minute):
				return "", fmt.Errorf("timed out waiting for sirenia instance to come up")
			}
		}, nil
	}

	log.Info("terminating unassigned sirenia instances")
outer:
	for _, i := range instances {
		if i.Addr == state.Primary.Addr || (state.Sync != nil && i.Addr == state.Sync.Addr) {
			continue
		}
		for _, a := range state.Async {
			if i.Addr == a.Addr {
				continue outer
			}
		}
		// job not assigned in state, attempt to terminate it
		if jobID, ok := i.Meta["FLYNN_JOB_ID"]; ok {
			hostID, err := cluster.ExtractHostID(jobID)
			if err != nil {
				log.Error("error extracting host id from jobID", "jobID", jobID, "err", err)
			}
			h := f.Host(hostID)
			if h != nil {
				if err := h.StopJob(jobID); err != nil {
					log.Error("error stopping unassigned sirenia job", "jobID", jobID)
				}
			} else {
				log.Error("host not found", "hostID", hostID)
			}
		}
	}

	isRunning := func(addr string) bool {
		for _, i := range instances {
			if i.Addr == addr {
				return true
			}
		}
		return false
	}

	// if the leader isn't currently running then start it using primaryJob/primaryHost
	var wait func() (string, error)
	if !isRunning(state.Primary.Addr) {
		// if we don't have info about the primary job attempt to promote the sync
		if primaryJob == nil {
			if syncJob != nil {
				// set primary job to sync
				primaryJob = syncJob
				primaryHost = syncHost

				// nil out sync job now so we can re-allocate it.
				syncJob = nil
				syncHost = nil
			} else {
				return fmt.Errorf("neither primary or sync job info available")
			}
		}

		primaryJob.ID = cluster.GenerateJobID(primaryHost.ID(), "")
		f.FixJobEnv(primaryJob)
		log.Info("starting primary job", "job.id", primaryJob.ID)
		wait, err = waitForInstance(primaryJob.ID)
		if err != nil {
			return err
		}
		if err := primaryHost.AddJob(primaryJob); err != nil {
			return fmt.Errorf("error starting primary job on %s: %s", primaryHost.ID(), err)
		}
	}
	if !state.Singleton && !isRunning(state.Sync.Addr) {
		if syncHost == nil {
			for _, h := range f.hosts {
				if h.ID() != primaryHost.ID() {
					syncHost = h
					break
				}
			}
			if syncHost == nil {
				// if there are no other hosts, use the same one we put the primary on
				syncHost = primaryHost
			}
		}
		// if we don't have a sync job then copy the primary job
		// and provision a new volume
		if syncJob == nil {
			syncJob = primaryJob
			vol := &ct.VolumeReq{Path: "/data"}
			if _, err := utils.ProvisionVolume(vol, syncHost, syncJob); err != nil {
				return fmt.Errorf("error creating volume on %s: %s", syncHost.ID(), err)
			}
		}
		syncJob.ID = cluster.GenerateJobID(syncHost.ID(), "")
		f.FixJobEnv(syncJob)
		log.Info("starting sync job", "job.id", syncJob.ID)
		if wait == nil {
			wait, err = waitForInstance(syncJob.ID)
			if err != nil {
				return err
			}
		}
		if err := syncHost.AddJob(syncJob); err != nil {
			return fmt.Errorf("error starting additional job on %s: %s", syncHost.ID(), err)
		}
	}

	if wait != nil {
		addr, err := wait()
		if err != nil {
			return err
		}
		if leader != nil && leader.Addr != "" {
			addr = leader.Addr
		}
		log.Info("waiting for cluster to come up read-write", "addr", addr)
		return sirenia.NewClient(addr).WaitForReadWrite(5 * time.Minute)
	}
	return nil
}
Example #7
0
func (f *ClusterFixer) FixSirenia(svc string) error {
	log := f.l.New("fn", "FixSirenia", "service", svc)
	log.Info("checking sirenia cluster status")
	service := discoverd.NewService(svc)
	leader, _ := service.Leader()
	if leader == nil || leader.Addr == "" {
		log.Info("no running leader")
		leader = nil
	} else {
		log.Info("found running leader")
	}
	instances, _ := service.Instances()
	log.Info("found running instances", "count", len(instances))

	log.Info("getting sirenia status")
	var status *sirenia.Status
	if leader != nil && leader.Addr != "" {
		client := sirenia.NewClient(leader.Addr)
		var err error
		status, err = client.Status()
		if err != nil {
			log.Error("error getting status from leader", "error", err)
		}
	}
	if status != nil && status.Database != nil && status.Database.ReadWrite {
		log.Info("cluster claims to be read-write")
		return nil
	}

	log.Info("getting service metadata")
	meta, err := discoverd.NewService(svc).GetMeta()
	if err != nil {
		return fmt.Errorf("error getting sirenia state from discoverd: %s", err)
	}

	var state state.State
	if err := json.Unmarshal(meta.Data, &state); err != nil {
		return fmt.Errorf("error decoding state: %s", err)
	}
	if state.Primary == nil {
		return fmt.Errorf("no primary in sirenia state")
	}

	log.Info("getting primary job info", "job.id", state.Primary.Meta["FLYNN_JOB_ID"])
	job, host, err := f.GetJob(state.Primary.Meta["FLYNN_JOB_ID"])
	if err != nil {
		if state.Sync != nil {
			f.l.Error("unable to get primary job info", "error", err)
			f.l.Info("getting sync job info", "job.id", state.Sync.Meta["FLYNN_JOB_ID"])
			job, host, err = f.GetJob(state.Sync.Meta["FLYNN_JOB_ID"])
			if err != nil {
				return fmt.Errorf("unable to get primary or sync job details: %s", err)
			}
		} else {
			return fmt.Errorf("unable to get primary job details: %s", err)
		}
	}

	if leader != nil && state.Singleton {
		return fmt.Errorf("sirenia leader is running in singleton mode, unable to fix")
	}

	waitForInstance := func(jobID string) (func() (string, error), error) {
		watchCh := make(chan *discoverd.Event)
		upCh := make(chan string)
		stream, err := service.Watch(watchCh)
		if err != nil {
			return nil, fmt.Errorf("error watching discoverd service: %s", err)
		}
		go func() {
			var current bool
			for event := range watchCh {
				if event.Kind == discoverd.EventKindCurrent {
					current = true
					continue
				}
				if !current || event.Kind != discoverd.EventKindUp {
					continue
				}
				if event.Instance.Meta["FLYNN_JOB_ID"] == jobID {
					upCh <- event.Instance.Addr
				}
			}
		}()
		return func() (string, error) {
			log.Info("waiting for instance to start", "job.id", jobID)
			defer stream.Close()
			select {
			case addr := <-upCh:
				return addr, nil
			case <-time.After(time.Minute):
				return "", fmt.Errorf("timed out waiting for sirenia instance to come up")
			}
		}, nil
	}

	var wait func() (string, error)
	have := len(instances)
	want := 2
	if state.Singleton {
		want = 1
	}
	if have >= want {
		return fmt.Errorf("already have enough instances, unable to fix")
	}
	log.Info("attempting to start missing jobs", "want", want, "have", have)
	if leader == nil {
		job.ID = cluster.GenerateJobID(host.ID(), "")
		f.FixJobEnv(job)
		log.Info("starting primary job", "job.id", job.ID)
		wait, err = waitForInstance(job.ID)
		if err != nil {
			return err
		}
		if err := host.AddJob(job); err != nil {
			return fmt.Errorf("error starting primary job on %s: %s", host.ID(), err)
		}
		have++
	}
	if want > have {
		// if not enough postgres instances, start another
		var secondHost *cluster.Host
		for _, h := range f.hosts {
			if h.ID() != host.ID() {
				secondHost = h
				break
			}
		}
		if secondHost == nil {
			// if there are no other hosts, use the same one we put the primary on
			secondHost = host
		}
		job.ID = cluster.GenerateJobID(secondHost.ID(), "")
		f.FixJobEnv(job)
		log.Info("starting second job", "job.id", job.ID)
		if wait == nil {
			wait, err = waitForInstance(job.ID)
			if err != nil {
				return err
			}
		}
		if err := utils.ProvisionVolume(secondHost, job); err != nil {
			return fmt.Errorf("error creating volume on %s: %s", secondHost.ID(), err)
		}
		if err := secondHost.AddJob(job); err != nil {
			return fmt.Errorf("error starting additional job on %s: %s", secondHost.ID(), err)
		}
	}

	if wait != nil {
		addr, err := wait()
		if err != nil {
			return err
		}
		if leader != nil {
			addr = leader.Addr
		}
		log.Info("waiting for cluster to come up read-write")
		return sirenia.NewClient(addr).WaitForReadWrite(5 * time.Minute)
	}
	return nil
}
Example #8
0
func (a *API) scaleUp() error {
	a.mtx.Lock()
	defer a.mtx.Unlock()

	// Ignore if already scaled up.
	if a.scaledUp {
		return nil
	}

	app := os.Getenv("FLYNN_APP_ID")
	logger := a.logger().New("fn", "scaleUp")
	sc := sirenia.NewClient(serviceHost + ":3306")

	logger.Info("checking status", "host", serviceHost)
	if status, err := sc.Status(); err == nil && status.Database != nil && status.Database.ReadWrite {
		logger.Info("database is up, skipping scale")
		// Skip the rest, the database is already available
		a.scaledUp = true
		return nil
	} else if err != nil {
		logger.Info("error checking status", "err", err)
	} else {
		logger.Info("got status, but database is not read-write")
	}

	// Connect to controller.
	logger.Info("connecting to controller")
	client, err := controller.NewClient("", os.Getenv("CONTROLLER_KEY"))
	if err != nil {
		logger.Error("controller client error", "err", err)
		return err
	}

	// Retrieve mariadb release.
	logger.Info("retrieving app release", "app", app)
	release, err := client.GetAppRelease(app)
	if err == controller.ErrNotFound {
		logger.Error("release not found", "app", app)
		return errors.New("mariadb release not found")
	} else if err != nil {
		logger.Error("get release error", "app", app, "err", err)
		return err
	}

	// Retrieve current formation.
	logger.Info("retrieving formation", "app", app, "release_id", release.ID)
	formation, err := client.GetFormation(app, release.ID)
	if err == controller.ErrNotFound {
		logger.Error("formation not found", "app", app, "release_id", release.ID)
		return errors.New("mariadb formation not found")
	} else if err != nil {
		logger.Error("formation error", "app", app, "release_id", release.ID, "err", err)
		return err
	}

	// If mariadb is running then exit.
	if formation.Processes["mariadb"] > 0 {
		logger.Info("database is running, scaling not necessary")
		return nil
	}

	// Copy processes and increase database processes.
	processes := make(map[string]int, len(formation.Processes))
	for k, v := range formation.Processes {
		processes[k] = v
	}

	if os.Getenv("SINGLETON") == "true" {
		processes["mariadb"] = 1
	} else {
		processes["mariadb"] = 3
	}

	// Update formation.
	logger.Info("updating formation", "app", app, "release_id", release.ID)
	formation.Processes = processes
	if err := client.PutFormation(formation); err != nil {
		logger.Error("put formation error", "app", app, "release_id", release.ID, "err", err)
		return err
	}

	if err := sc.WaitForReadWrite(5 * time.Minute); err != nil {
		logger.Error("wait for read write", "err", err)
		return errors.New("timed out while starting mariadb cluster")
	}

	logger.Info("scaling complete")

	// Mark as successfully scaled up.
	a.scaledUp = true

	return nil
}
Example #9
0
func (a *API) ping(ctx context.Context, w http.ResponseWriter, req *http.Request) {
	app := os.Getenv("FLYNN_APP_ID")
	logger := a.logger().New("fn", "ping")

	logger.Info("checking status", "host", serviceHost)
	if status, err := sirenia.NewClient(serviceHost + ":3306").Status(); err == nil && status.Database != nil && status.Database.ReadWrite {
		logger.Info("database is up, skipping scale check")
	} else {
		// Connect to controller.
		logger.Info("connecting to controller")
		client, err := controller.NewClient("", os.Getenv("CONTROLLER_KEY"))
		if err != nil {
			logger.Error("controller client error", "err", err)
			httphelper.Error(w, err)
			return
		}

		// Retrieve mariadb release.
		logger.Info("retrieving app release", "app", app)
		release, err := client.GetAppRelease(app)
		if err == controller.ErrNotFound {
			logger.Error("release not found", "app", app)
			httphelper.Error(w, err)
			return
		} else if err != nil {
			logger.Error("get release error", "app", app, "err", err)
			httphelper.Error(w, err)
			return
		}

		// Retrieve current formation.
		logger.Info("retrieving formation", "app", app, "release_id", release.ID)
		formation, err := client.GetFormation(app, release.ID)
		if err == controller.ErrNotFound {
			logger.Error("formation not found", "app", app, "release_id", release.ID)
			httphelper.Error(w, err)
			return
		} else if err != nil {
			logger.Error("formation error", "app", app, "release_id", release.ID, "err", err)
			httphelper.Error(w, err)
			return
		}

		// MariaDB isn't running, just return healthy
		if formation.Processes["mariadb"] == 0 {
			w.WriteHeader(200)
			return
		}
	}

	db, err := a.connect()
	if err != nil {
		httphelper.Error(w, err)
		return
	}
	defer db.Close()

	if _, err := db.Exec("SELECT 1"); err != nil {
		httphelper.Error(w, err)
		return
	}
	w.WriteHeader(200)
}