Пример #1
0
func (s *DeployerSuite) TestRollbackFailedJob(t *c.C) {
	// create a running release
	app, release := s.createRelease(t, "printer", "all-at-once")

	// deploy a release which will fail to start
	client := s.controllerClient(t)
	release.ID = ""
	printer := release.Processes["printer"]
	printer.Args = []string{"this-is-gonna-fail"}
	release.Processes["printer"] = printer
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	// check the deployment fails
	events := make(chan *ct.DeploymentEvent)
	stream, err := client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	defer stream.Close()
	expected := []*ct.DeploymentEvent{
		{ReleaseID: release.ID, JobType: "", JobState: "", Status: "pending"},
		{ReleaseID: release.ID, JobType: "printer", JobState: ct.JobStateStarting, Status: "running"},
		{ReleaseID: release.ID, JobType: "printer", JobState: ct.JobStateStarting, Status: "running"},
		{ReleaseID: release.ID, JobType: "printer", JobState: ct.JobStateDown, Status: "running"},
		{ReleaseID: release.ID, JobType: "", JobState: "", Status: "failed", Error: `deployer: printer job failed to start: exec: "this-is-gonna-fail": executable file not found in $PATH`},
	}
	waitForDeploymentEvents(t, events, expected)

	s.assertRolledBack(t, deployment, map[string]int{"printer": 2})
}
Пример #2
0
func (s *DeployerSuite) TestRollbackNoService(t *c.C) {
	// create a running release
	app, release := s.createRelease(t, "printer", "all-at-once")

	// deploy a release which will not register the service
	client := s.controllerClient(t)
	release.ID = ""
	printer := release.Processes["printer"]
	printer.Service = "printer"
	printer.Ports = []ct.Port{{
		Port:  12345,
		Proto: "tcp",
		Service: &host.Service{
			Name:   "printer",
			Create: true,
			Check: &host.HealthCheck{
				Type:         "tcp",
				Interval:     100 * time.Millisecond,
				Threshold:    1,
				KillDown:     true,
				StartTimeout: 100 * time.Millisecond,
			},
		},
	}}
	release.Processes["printer"] = printer
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	// check the deployment fails
	events := make(chan *ct.DeploymentEvent)
	stream, err := client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	defer stream.Close()
	expected := []*ct.DeploymentEvent{
		{ReleaseID: release.ID, JobType: "", JobState: "", Status: "pending"},
		{ReleaseID: release.ID, JobType: "printer", JobState: ct.JobStateStarting, Status: "running"},
		{ReleaseID: release.ID, JobType: "printer", JobState: ct.JobStateStarting, Status: "running"},
		{ReleaseID: release.ID, JobType: "printer", JobState: ct.JobStateDown, Status: "running"},
		{ReleaseID: release.ID, JobType: "", JobState: "", Status: "failed", Error: "printer process type failed to start, got down job event"},
	}
	waitForDeploymentEvents(t, events, expected)

	s.assertRolledBack(t, deployment, map[string]int{"printer": 2})

	// check a new deployment can be created
	_, err = client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)
}
Пример #3
0
func (s *CLISuite) TestScaleAll(t *c.C) {
	client := s.controllerClient(t)
	app := s.newCliTestApp(t)
	release := app.release
	defer app.cleanup()

	scale := app.flynn("scale", "echoer=1", "printer=2")
	t.Assert(scale, Succeeds)

	scale = app.flynn("scale", "--all")
	t.Assert(scale, Succeeds)
	t.Assert(scale, SuccessfulOutputContains, fmt.Sprintf("%s (current)\n", release.ID))
	t.Assert(scale, SuccessfulOutputContains, "echoer=1")
	t.Assert(scale, SuccessfulOutputContains, "printer=2")

	prevRelease := release
	release = &ct.Release{
		ArtifactID: release.ArtifactID,
		Env:        release.Env,
		Meta:       release.Meta,
		Processes:  release.Processes,
	}
	t.Assert(client.CreateRelease(release), c.IsNil)
	t.Assert(client.SetAppRelease(app.ID, release.ID), c.IsNil)

	scale = app.flynn("scale", "echoer=2", "printer=1")
	t.Assert(scale, Succeeds)

	scale = app.flynn("scale", "--all")
	t.Assert(scale, Succeeds)
	t.Assert(scale, SuccessfulOutputContains, fmt.Sprintf("%s (current)\n", release.ID))
	t.Assert(scale, SuccessfulOutputContains, "echoer=2")
	t.Assert(scale, SuccessfulOutputContains, "printer=1")
	t.Assert(scale, SuccessfulOutputContains, fmt.Sprintf("%s\n", prevRelease.ID))
	t.Assert(scale, SuccessfulOutputContains, "echoer=1")
	t.Assert(scale, SuccessfulOutputContains, "printer=2")

	scale = app.flynn("scale", "--all", "--release", release.ID)
	t.Assert(scale, c.Not(Succeeds))

	scale = app.flynn("scale", "--all", "echoer=3", "printer=3")
	t.Assert(scale, c.Not(Succeeds))
}
Пример #4
0
func (s *ZDiscoverdSuite) TestDeploy(t *c.C) {
	// ensure we have enough hosts in the cluster
	hosts, err := s.clusterClient(t).Hosts()
	t.Assert(err, c.IsNil)
	if len(hosts) <= 1 {
		t.Skip("cannot deploy discoverd in a single node cluster")
	}

	client := s.controllerClient(t)
	app, err := client.GetApp("discoverd")
	t.Assert(err, c.IsNil)
	release, err := client.GetAppRelease(app.ID)
	t.Assert(err, c.IsNil)
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	events := make(chan *ct.DeploymentEvent)
	stream, err := client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	defer stream.Close()

loop:
	for {
		select {
		case event, ok := <-events:
			if !ok {
				t.Fatal("unexpected close of deployment event stream")
			}
			if event.Status == "complete" {
				debugf(t, "got deployment event: %s", event.Status)
				break loop
			}
			if event.Status == "failed" {
				t.Fatal("the deployment failed")
			}
			debugf(t, "got deployment event: %s %s", event.JobType, event.JobState)
		case <-time.After(time.Duration(app.DeployTimeout) * time.Second):
			t.Fatal("timed out waiting for deployment event")
		}
	}
}
Пример #5
0
func (s *DeployerSuite) createDeployment(t *c.C, process, strategy, service string) *testDeploy {
	app, release := s.createRelease(t, process, strategy)

	if service != "" {
		debugf(t, "waiting for 2 %s services", service)
		events := make(chan *discoverd.Event)
		stream, err := s.discoverdClient(t).Service(service).Watch(events)
		t.Assert(err, c.IsNil)
		defer stream.Close()
		count := 0
	loop:
		for {
			select {
			case event, ok := <-events:
				if !ok {
					t.Fatalf("service discovery stream closed unexpectedly")
				}
				if event.Kind == discoverd.EventKindUp {
					if id, ok := event.Instance.Meta["FLYNN_RELEASE_ID"]; !ok || id != release.ID {
						continue
					}
					debugf(t, "got %s service up event", service)
					count++
				}
				if count == 2 {
					// although the services are up, give them a few more seconds
					// to make sure the deployer will also see them as up.
					time.Sleep(5 * time.Second)
					break loop
				}
			case <-time.After(10 * time.Second):
				t.Fatalf("timed out waiting for %s service to come up", service)
			}
		}
	}

	client := s.controllerClient(t)
	jobEvents := make(chan *ct.Job)
	jobStream, err := client.StreamJobEvents(app.ID, jobEvents)
	t.Assert(err, c.IsNil)

	// create a new release for the deployment
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)

	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	debugf(t, "created deployment %s", deployment.ID)
	debugf(t, "deploying from release %s to %s", deployment.OldReleaseID, deployment.NewReleaseID)

	deployEvents := make(chan *ct.DeploymentEvent)
	deployStream, err := client.StreamDeployment(deployment, deployEvents)
	t.Assert(err, c.IsNil)

	return &testDeploy{
		s:            s,
		t:            t,
		deployment:   deployment,
		deployEvents: deployEvents,
		deployStream: deployStream,
		jobEvents:    jobEvents,
		jobStream:    jobStream,
	}
}
Пример #6
0
func (s *DeployerSuite) TestOmniProcess(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot determine test cluster size")
	}

	// create and scale an omni release
	omniScale := 2
	totalJobs := omniScale * testCluster.Size()
	client := s.controllerClient(t)
	app, release := s.createApp(t)

	watcher, err := client.WatchJobEvents(app.Name, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"omni": omniScale},
	}), c.IsNil)
	err = watcher.WaitFor(ct.JobEvents{"omni": {ct.JobStateUp: totalJobs}}, scaleTimeout, nil)
	t.Assert(err, c.IsNil)

	// deploy using all-at-once and check we get the correct events
	app.Strategy = "all-at-once"
	t.Assert(client.UpdateApp(app), c.IsNil)
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	events := make(chan *ct.DeploymentEvent)
	stream, err := client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	defer stream.Close()
	expected := make([]*ct.Job, 0, 3*totalJobs+1)
	appendEvents := func(releaseID string, state ct.JobState, count int) {
		for i := 0; i < count; i++ {
			expected = append(expected, &ct.Job{
				ReleaseID: releaseID,
				Type:      "omni",
				State:     state,
			})
		}
	}
	appendEvents(deployment.NewReleaseID, ct.JobStateUp, totalJobs)
	appendEvents(deployment.OldReleaseID, ct.JobStateDown, totalJobs)
	s.waitForDeploymentStatus(t, events, "complete")

	// deploy using one-by-one and check we get the correct events
	app.Strategy = "one-by-one"
	t.Assert(client.UpdateApp(app), c.IsNil)
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	// try creating the deployment multiple times to avoid getting a
	// "Cannot create deploy, one is already in progress" error (there
	// is no guarantee the previous deploy has finished yet)
	attempts := attempt.Strategy{Total: 10 * time.Second, Delay: 100 * time.Millisecond}
	err = attempts.Run(func() (err error) {
		deployment, err = client.CreateDeployment(app.ID, release.ID)
		return
	})
	t.Assert(err, c.IsNil)
	events = make(chan *ct.DeploymentEvent)
	stream, err = client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	expected = make([]*ct.Job, 0, 4*totalJobs+1)
	appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size())
	appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size())
	s.waitForDeploymentStatus(t, events, "complete")
}
Пример #7
0
func (s *SchedulerSuite) TestDeployController(t *c.C) {
	// get the current controller release
	client := s.controllerClient(t)
	app, err := client.GetApp("controller")
	t.Assert(err, c.IsNil)
	release, err := client.GetAppRelease(app.ID)
	t.Assert(err, c.IsNil)

	// get the current controller formation
	formation, err := client.GetFormation(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	// create a controller deployment
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	events := make(chan *ct.DeploymentEvent)
	eventStream, err := client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	defer eventStream.Close()

	// wait for the deploy to complete (this doesn't wait for specific events
	// due to the fact that when the deployer deploys itself, some events will
	// not get sent)
loop:
	for {
		select {
		case e, ok := <-events:
			if !ok {
				t.Fatal("unexpected close of deployment event stream")
			}
			debugf(t, "got deployment event: %s %s", e.JobType, e.JobState)
			switch e.Status {
			case "complete":
				break loop
			case "failed":
				t.Fatal("the deployment failed")
			}
		case <-time.After(time.Duration(app.DeployTimeout) * time.Second):
			t.Fatal("timed out waiting for the deploy to complete")
		}
	}

	// check the correct controller jobs are running
	hosts, err := s.clusterClient(t).Hosts()
	t.Assert(err, c.IsNil)
	t.Assert(hosts, c.Not(c.HasLen), 0)
	actual := make(map[string]map[string]int)
	for _, h := range hosts {
		jobs, err := h.ListJobs()
		t.Assert(err, c.IsNil)
		for _, job := range jobs {
			if job.Status != host.StatusRunning {
				continue
			}
			appID := job.Job.Metadata["flynn-controller.app"]
			if appID != app.ID {
				continue
			}
			releaseID := job.Job.Metadata["flynn-controller.release"]
			if _, ok := actual[releaseID]; !ok {
				actual[releaseID] = make(map[string]int)
			}
			typ := job.Job.Metadata["flynn-controller.type"]
			actual[releaseID][typ]++
		}
	}
	expected := map[string]map[string]int{release.ID: {
		"web":       formation.Processes["web"],
		"worker":    formation.Processes["worker"],
		"scheduler": len(hosts),
	}}
	t.Assert(actual, c.DeepEquals, expected)
}
Пример #8
0
func (s *SchedulerSuite) TestRollbackController(t *c.C) {
	// get the current controller release
	client := s.controllerClient(t)
	app, err := client.GetApp("controller")
	t.Assert(err, c.IsNil)
	release, err := client.GetAppRelease(app.ID)
	t.Assert(err, c.IsNil)

	watcher, err := s.controllerClient(t).WatchJobEvents(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	// get the current controller formation
	formation, err := client.GetFormation(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	currentReleaseID := release.ID

	// create a controller deployment that will fail
	release.ID = ""
	worker := release.Processes["worker"]
	worker.Entrypoint = []string{"/i/dont/exist"}
	release.Processes["worker"] = worker
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	events := make(chan *ct.DeploymentEvent)
	eventStream, err := client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	defer eventStream.Close()

	// wait for the deploy to fail
loop:
	for {
		select {
		case e, ok := <-events:
			if !ok {
				t.Fatal("unexpected close of deployment event stream")
			}
			debugf(t, "got deployment event: %s %s", e.JobType, e.JobState)
			switch e.Status {
			case "complete":
				t.Fatal("the deployment succeeded when it should have failed")
			case "failed":
				break loop
			}
		case <-time.After(2 * time.Minute):
			t.Fatal("timed out waiting for the deploy to fail")
		}
	}

	// wait for jobs to come back up
	hosts, err := s.clusterClient(t).Hosts()
	expected := map[string]map[ct.JobState]int{
		"web":       {ct.JobStateUp: formation.Processes["web"]},
		"scheduler": {ct.JobStateUp: len(hosts)},
	}
	t.Assert(watcher.WaitFor(expected, scaleTimeout, nil), c.IsNil)

	// check the correct controller jobs are running
	t.Assert(err, c.IsNil)
	t.Assert(hosts, c.Not(c.HasLen), 0)
	actual := make(map[string]map[string]int)
	for _, h := range hosts {
		jobs, err := h.ListJobs()
		t.Assert(err, c.IsNil)
		for _, job := range jobs {
			if job.Status != host.StatusRunning {
				continue
			}
			appID := job.Job.Metadata["flynn-controller.app"]
			if appID != app.ID {
				continue
			}
			releaseID := job.Job.Metadata["flynn-controller.release"]
			if releaseID != currentReleaseID {
				continue
			}
			if _, ok := actual[releaseID]; !ok {
				actual[releaseID] = make(map[string]int)
			}
			typ := job.Job.Metadata["flynn-controller.type"]
			actual[releaseID][typ]++
		}
	}
	t.Assert(actual, c.DeepEquals, map[string]map[string]int{
		currentReleaseID: {
			"web":       formation.Processes["web"],
			"scheduler": formation.Processes["scheduler"] * len(hosts),
			"worker":    formation.Processes["worker"],
		},
	})
}
Пример #9
0
func (s *DeployerSuite) TestOmniProcess(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot determine test cluster size")
	}

	// create and scale an omni release
	omniScale := 2
	totalJobs := omniScale * testCluster.Size()
	client := s.controllerClient(t)
	app, release := s.createApp(t)

	watcher, err := client.WatchJobEvents(app.Name, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"omni": omniScale},
	}), c.IsNil)
	err = watcher.WaitFor(ct.JobEvents{"omni": {ct.JobStateUp: totalJobs}}, scaleTimeout, nil)
	t.Assert(err, c.IsNil)

	// deploy using all-at-once and check we get the correct events
	app.Strategy = "all-at-once"
	t.Assert(client.UpdateApp(app), c.IsNil)
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	events := make(chan *ct.DeploymentEvent)
	stream, err := client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	defer stream.Close()
	expected := make([]*ct.DeploymentEvent, 0, 4*totalJobs+1)
	appendEvents := func(releaseID string, state ct.JobState, count int) {
		for i := 0; i < count; i++ {
			event := &ct.DeploymentEvent{
				ReleaseID: releaseID,
				JobType:   "omni",
				JobState:  state,
				Status:    "running",
			}
			expected = append(expected, event)
		}
	}
	expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "pending"})
	appendEvents(deployment.NewReleaseID, ct.JobStateStarting, totalJobs)
	appendEvents(deployment.NewReleaseID, ct.JobStateUp, totalJobs)
	appendEvents(deployment.OldReleaseID, ct.JobStateStopping, totalJobs)
	appendEvents(deployment.OldReleaseID, ct.JobStateDown, totalJobs)
	expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "complete"})
	waitForDeploymentEvents(t, events, expected)

	// deploy using one-by-one and check we get the correct events
	app.Strategy = "one-by-one"
	t.Assert(client.UpdateApp(app), c.IsNil)
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err = client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	events = make(chan *ct.DeploymentEvent)
	stream, err = client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	expected = make([]*ct.DeploymentEvent, 0, 4*totalJobs+1)
	expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "pending"})
	appendEvents(deployment.NewReleaseID, ct.JobStateStarting, testCluster.Size())
	appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateStopping, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size())
	appendEvents(deployment.NewReleaseID, ct.JobStateStarting, testCluster.Size())
	appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateStopping, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size())
	expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "complete"})
	waitForDeploymentEvents(t, events, expected)
}
Пример #10
0
func (s *PostgresSuite) testDeploy(t *c.C, d *pgDeploy) {
	// create postgres app
	client := s.controllerClient(t)
	app := &ct.App{Name: d.name, Strategy: "postgres"}
	t.Assert(client.CreateApp(app), c.IsNil)

	// copy release from default postgres app
	release, err := client.GetAppRelease("postgres")
	t.Assert(err, c.IsNil)
	release.ID = ""
	proc := release.Processes["postgres"]
	delete(proc.Env, "SINGLETON")
	proc.Env["FLYNN_POSTGRES"] = d.name
	proc.Service = d.name
	release.Processes["postgres"] = proc
	t.Assert(client.CreateRelease(release), c.IsNil)
	t.Assert(client.SetAppRelease(app.ID, release.ID), c.IsNil)
	oldRelease := release.ID

	// create formation
	discEvents := make(chan *discoverd.Event)
	discStream, err := s.discoverdClient(t).Service(d.name).Watch(discEvents)
	t.Assert(err, c.IsNil)
	defer discStream.Close()
	jobEvents := make(chan *ct.Job)
	jobStream, err := client.StreamJobEvents(d.name, jobEvents)
	t.Assert(err, c.IsNil)
	defer jobStream.Close()
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"postgres": d.pgJobs, "web": d.webJobs},
	}), c.IsNil)

	// watch cluster state changes
	type stateChange struct {
		state *state.State
		err   error
	}
	stateCh := make(chan stateChange)
	go func() {
		for event := range discEvents {
			if event.Kind != discoverd.EventKindServiceMeta {
				continue
			}
			var state state.State
			if err := json.Unmarshal(event.ServiceMeta.Data, &state); err != nil {
				stateCh <- stateChange{err: err}
				return
			}
			primary := ""
			if state.Primary != nil {
				primary = state.Primary.Addr
			}
			sync := ""
			if state.Sync != nil {
				sync = state.Sync.Addr
			}
			var async []string
			for _, a := range state.Async {
				async = append(async, a.Addr)
			}
			debugf(t, "got pg cluster state: index=%d primary=%s sync=%s async=%s",
				event.ServiceMeta.Index, primary, sync, strings.Join(async, ","))
			stateCh <- stateChange{state: &state}
		}
	}()

	// wait for correct cluster state and number of web processes
	var pgState state.State
	var webJobs int
	ready := func() bool {
		if webJobs != d.webJobs {
			return false
		}
		if pgState.Primary == nil {
			return false
		}
		if d.pgJobs > 1 && pgState.Sync == nil {
			return false
		}
		if d.pgJobs > 2 && len(pgState.Async) != d.pgJobs-2 {
			return false
		}
		return true
	}
	for {
		if ready() {
			break
		}
		select {
		case s := <-stateCh:
			t.Assert(s.err, c.IsNil)
			pgState = *s.state
		case e, ok := <-jobEvents:
			if !ok {
				t.Fatalf("job event stream closed: %s", jobStream.Err())
			}
			debugf(t, "got job event: %s %s %s", e.Type, e.ID, e.State)
			if e.Type == "web" && e.State == "up" {
				webJobs++
			}
		case <-time.After(30 * time.Second):
			t.Fatal("timed out waiting for postgres formation")
		}
	}

	// connect to the db so we can test writes
	db := postgres.Wait(d.name, fmt.Sprintf("dbname=postgres user=flynn password=%s", release.Env["PGPASSWORD"]))
	dbname := "deploy-test"
	t.Assert(db.Exec(fmt.Sprintf(`CREATE DATABASE "%s" WITH OWNER = "flynn"`, dbname)), c.IsNil)
	db.Close()
	db, err = postgres.Open(d.name, fmt.Sprintf("dbname=%s user=flynn password=%s", dbname, release.Env["PGPASSWORD"]))
	t.Assert(err, c.IsNil)
	defer db.Close()
	t.Assert(db.Exec(`CREATE TABLE deploy_test ( data text)`), c.IsNil)
	assertWriteable := func() {
		debug(t, "writing to postgres database")
		t.Assert(db.Exec(`INSERT INTO deploy_test (data) VALUES ('data')`), c.IsNil)
	}

	// check currently writeable
	assertWriteable()

	// check a deploy completes with expected cluster state changes
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	newRelease := release.ID
	deployment, err := client.CreateDeployment(app.ID, newRelease)
	t.Assert(err, c.IsNil)
	deployEvents := make(chan *ct.DeploymentEvent)
	deployStream, err := client.StreamDeployment(deployment, deployEvents)
	t.Assert(err, c.IsNil)
	defer deployStream.Close()

	// assertNextState checks that the next state received is in the remaining states
	// that were expected, so handles the fact that some states don't happen, but the
	// states that do happen are expected and in-order.
	assertNextState := func(remaining []expectedPgState) int {
		var state state.State
	loop:
		for {
			select {
			case s := <-stateCh:
				t.Assert(s.err, c.IsNil)
				if len(s.state.Async) < d.expectedAsyncs() {
					// we shouldn't usually receive states with less asyncs than
					// expected, but they can occur as an intermediate state between
					// two expected states (e.g. when a sync does a takeover at the
					// same time as a new async is started) so just ignore them.
					debug(t, "ignoring state with too few asyncs")
					continue
				}
				state = *s.state
				break loop
			case <-time.After(60 * time.Second):
				t.Fatal("timed out waiting for postgres cluster state")
			}
		}
		if state.Primary == nil {
			t.Fatal("no primary configured")
		}
		log := func(format string, v ...interface{}) {
			debugf(t, "skipping expected state: %s", fmt.Sprintf(format, v...))
		}
	outer:
		for i, expected := range remaining {
			if state.Primary.Meta["FLYNN_RELEASE_ID"] != expected.Primary {
				log("primary has incorrect release")
				continue
			}
			if state.Sync == nil {
				if expected.Sync == "" {
					return i
				}
				log("state has no sync node")
				continue
			}
			if state.Sync.Meta["FLYNN_RELEASE_ID"] != expected.Sync {
				log("sync has incorrect release")
				continue
			}
			if state.Async == nil {
				if expected.Async == nil {
					return i
				}
				log("state has no async nodes")
				continue
			}
			if len(state.Async) != len(expected.Async) {
				log("expected %d asyncs, got %d", len(expected.Async), len(state.Async))
				continue
			}
			for i, release := range expected.Async {
				if state.Async[i].Meta["FLYNN_RELEASE_ID"] != release {
					log("async[%d] has incorrect release", i)
					continue outer
				}
			}
			return i
		}
		t.Fatal("unexpected pg state")
		return -1
	}
	expected := d.expected(oldRelease, newRelease)
	var expectedIndex, newWebJobs int
loop:
	for {
		select {
		case e, ok := <-deployEvents:
			if !ok {
				t.Fatal("unexpected close of deployment event stream")
			}
			switch e.Status {
			case "complete":
				break loop
			case "failed":
				t.Fatalf("deployment failed: %s", e.Error)
			}
			debugf(t, "got deployment event: %s %s", e.JobType, e.JobState)
			if e.JobState != "up" && e.JobState != "down" {
				continue
			}
			switch e.JobType {
			case "postgres":
				// move on if we have seen all the expected events
				if expectedIndex >= len(expected) {
					continue
				}
				skipped := assertNextState(expected[expectedIndex:])
				expectedIndex += 1 + skipped
			case "web":
				if e.JobState == "up" && e.ReleaseID == newRelease {
					newWebJobs++
				}
			}
		case <-time.After(2 * time.Minute):
			t.Fatal("timed out waiting for deployment")
		}
	}

	// check we have the correct number of new web jobs
	t.Assert(newWebJobs, c.Equals, d.webJobs)

	// check writeable now deploy is complete
	assertWriteable()
}