func (c *Client) ExpectedScalingEvents(actual, expected map[string]int, releaseProcesses map[string]ct.ProcessType, clusterSize int) ct.JobEvents { events := make(ct.JobEvents, len(expected)) for typ, count := range expected { diff := count val, ok := actual[typ] if ok { diff = count - val } proc, ok := releaseProcesses[typ] if ok && proc.Omni { diff *= clusterSize } if diff > 0 { events[typ] = ct.JobUpEvents(diff) } else if diff < 0 { events[typ] = ct.JobDownEvents(-diff) } } return events }
func (s *SchedulerSuite) TestScaleTags(t *c.C) { // ensure we have more than 1 host to test with hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) if len(hosts) <= 1 { t.Skip("not enough hosts to test tagged based scheduling") } // watch service events so we can wait for tag changes events := make(chan *discoverd.Event) stream, err := s.discoverdClient(t).Service("flynn-host").Watch(events) t.Assert(err, c.IsNil) defer stream.Close() waitServiceEvent := func(kind discoverd.EventKind) *discoverd.Event { for { select { case event, ok := <-events: if !ok { t.Fatalf("service event stream closed unexpectedly: %s", stream.Err()) } if event.Kind == kind { return event } case <-time.After(10 * time.Second): t.Fatalf("timed out waiting for service %s event", kind) } } } // wait for the watch to be current before changing tags waitServiceEvent(discoverd.EventKindCurrent) updateTags := func(host *cluster.Host, tags map[string]string) { debugf(t, "setting host tags: %s => %v", host.ID(), tags) t.Assert(host.UpdateTags(tags), c.IsNil) event := waitServiceEvent(discoverd.EventKindUpdate) t.Assert(event.Instance.Meta["id"], c.Equals, host.ID()) for key, val := range tags { t.Assert(event.Instance.Meta["tag:"+key], c.Equals, val) } } // create an app with a tagged process and watch job events app, release := s.createApp(t) formation := &ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Tags: map[string]map[string]string{"printer": {"active": "true"}}, } client := s.controllerClient(t) watcher, err := client.WatchJobEvents(app.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() // add tag to host 1 host1 := hosts[0] updateTags(host1, map[string]string{"active": "true"}) // start jobs debug(t, "scaling printer=2") formation.Processes = map[string]int{"printer": 2} t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil) assertHostJobCounts := func(expected map[string]int) { jobs, err := client.JobList(app.ID) t.Assert(err, c.IsNil) actual := make(map[string]int) for _, job := range jobs { if job.State == ct.JobStateUp { actual[job.HostID]++ } } t.Assert(actual, c.DeepEquals, expected) } // check all jobs on host 1 assertHostJobCounts(map[string]int{host1.ID(): 2}) // add tag to host 2 host2 := hosts[1] updateTags(host2, map[string]string{"active": "true"}) // scale up debug(t, "scaling printer=4") formation.Processes["printer"] = 4 t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil) // check jobs distributed across hosts 1 and 2 assertHostJobCounts(map[string]int{host1.ID(): 2, host2.ID(): 2}) // remove tag from host 2 updateTags(host2, map[string]string{"active": ""}) // check jobs are moved to host1 jobEvents := ct.JobEvents{"printer": map[ct.JobState]int{ ct.JobStateDown: 2, ct.JobStateUp: 2, }} t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host1.ID(): 4}) // remove tag from host 1 updateTags(host1, map[string]string{"active": ""}) assertStateCounts := func(expected map[ct.JobState]int) { jobs, err := client.JobList(app.ID) t.Assert(err, c.IsNil) actual := make(map[ct.JobState]int) for _, job := range jobs { actual[job.State]++ } t.Assert(actual, c.DeepEquals, expected) } // check 4 pending jobs, rest are stopped t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil) assertStateCounts(map[ct.JobState]int{ct.JobStatePending: 4, ct.JobStateDown: 6}) // re-add tag to host 1 updateTags(host1, map[string]string{"active": "true"}) // check pending jobs are started on host 1 t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(4)}, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host1.ID(): 4}) assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 6}) // add different tag to host 2 updateTags(host2, map[string]string{"disk": "ssd"}) // update formation tags, check jobs are moved to host 2 debug(t, "updating formation tags to disk=ssd") formation.Tags["printer"] = map[string]string{"disk": "ssd"} t.Assert(client.PutFormation(formation), c.IsNil) jobEvents = ct.JobEvents{"printer": map[ct.JobState]int{ ct.JobStateDown: 4, ct.JobStateUp: 4, }} t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host2.ID(): 4}) assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 10}) // scale down stops the jobs debug(t, "scaling printer=0") formation.Processes = nil t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil) assertStateCounts(map[ct.JobState]int{ct.JobStateDown: 14}) }
func (d *DeployJob) deployOneByOneWithWaitFn(waitJobs WaitJobsFn) error { log := d.logger.New("fn", "deployOneByOne") log.Info("starting one-by-one deployment") oldScale := make(map[string]int, len(d.oldReleaseState)) for typ, count := range d.oldReleaseState { oldScale[typ] = count if d.isOmni(typ) { oldScale[typ] /= d.hostCount } } newScale := make(map[string]int, len(d.newReleaseState)) for typ, count := range d.newReleaseState { newScale[typ] = count if d.isOmni(typ) { newScale[typ] /= d.hostCount } } processTypes := make([]string, 0, len(d.Processes)) for typ := range d.Processes { processTypes = append(processTypes, typ) } sort.Sort(sort.StringSlice(processTypes)) olog := log.New("release_id", d.OldReleaseID) nlog := log.New("release_id", d.NewReleaseID) for _, typ := range processTypes { num := d.Processes[typ] // don't scale processes which no longer exist in the new release if _, ok := d.newRelease.Processes[typ]; !ok { num = 0 } diff := 1 if d.isOmni(typ) { diff = d.hostCount } for i := newScale[typ]; i < num; i++ { nlog.Info("scaling new formation up by one", "type", typ) newScale[typ]++ if err := d.client.PutFormation(&ct.Formation{ AppID: d.AppID, ReleaseID: d.NewReleaseID, Processes: newScale, }); err != nil { nlog.Error("error scaling new formation up by one", "type", typ, "err", err) return err } nlog.Info(fmt.Sprintf("waiting for %d job up event(s)", diff), "type", typ) if err := waitJobs(d.NewReleaseID, ct.JobEvents{typ: ct.JobUpEvents(diff)}, nlog); err != nil { nlog.Error("error waiting for job up events", "err", err) return err } olog.Info("scaling old formation down by one", "type", typ) oldScale[typ]-- if err := d.client.PutFormation(&ct.Formation{ AppID: d.AppID, ReleaseID: d.OldReleaseID, Processes: oldScale, }); err != nil { olog.Error("error scaling old formation down by one", "type", typ, "err", err) return err } olog.Info(fmt.Sprintf("waiting for %d job down event(s)", diff), "type", typ) if err := waitJobs(d.OldReleaseID, ct.JobEvents{typ: ct.JobDownEvents(diff)}, olog); err != nil { olog.Error("error waiting for job down events", "err", err) return err } } } // ensure any old leftover jobs are stopped (this can happen when new // workers continue deployments from old workers and still see the // old worker running even though it has been scaled down), returning // ErrSkipRollback if an error occurs (rolling back doesn't make a ton // of sense because it involves stopping the new working jobs). log.Info("ensuring old formation is scaled down to zero") diff := make(ct.JobEvents, len(oldScale)) for typ, count := range oldScale { if count > 0 { diff[typ] = ct.JobDownEvents(count) } } if err := d.client.PutFormation(&ct.Formation{ AppID: d.AppID, ReleaseID: d.OldReleaseID, }); err != nil { log.Error("error scaling old formation down to zero", "err", err) return ErrSkipRollback{err.Error()} } // treat the deployment as finished now (rather than potentially // waiting for the jobs to actually stop) as we can trust that the // scheduler will actually kill the jobs, so no need to delay the // deployment. log.Info("finished one-by-one deployment") return nil }
func (d *DeployJob) deployAllAtOnce() error { log := d.logger.New("fn", "deployAllAtOnce") log.Info("starting all-at-once deployment") expected := make(ct.JobEvents) newProcs := make(map[string]int, len(d.Processes)) for typ, n := range d.Processes { // ignore processes which no longer exist in the new // release if _, ok := d.newRelease.Processes[typ]; !ok { continue } newProcs[typ] = n total := n if d.isOmni(typ) { total *= d.hostCount } existing := d.newReleaseState[typ] for i := existing; i < total; i++ { d.deployEvents <- ct.DeploymentEvent{ ReleaseID: d.NewReleaseID, JobState: ct.JobStateStarting, JobType: typ, } } if total > existing { expected[typ] = ct.JobUpEvents(total - existing) } } if expected.Count() > 0 { log := log.New("release_id", d.NewReleaseID) log.Info("creating new formation", "processes", newProcs) if err := d.client.PutFormation(&ct.Formation{ AppID: d.AppID, ReleaseID: d.NewReleaseID, Processes: newProcs, }); err != nil { log.Error("error creating new formation", "err", err) return err } log.Info("waiting for job events", "expected", expected) if err := d.waitForJobEvents(d.NewReleaseID, expected, log); err != nil { log.Error("error waiting for job events", "err", err) return err } } expected = make(ct.JobEvents) for typ := range d.Processes { existing := d.oldReleaseState[typ] for i := 0; i < existing; i++ { d.deployEvents <- ct.DeploymentEvent{ ReleaseID: d.OldReleaseID, JobState: ct.JobStateStopping, JobType: typ, } } if existing > 0 { expected[typ] = ct.JobDownEvents(existing) } } // the new jobs have now started and they are up, so return // ErrSkipRollback from here on out if an error occurs (rolling // back doesn't make a ton of sense because it involves // stopping the new working jobs). log = log.New("release_id", d.OldReleaseID) log.Info("scaling old formation to zero") if err := d.client.PutFormation(&ct.Formation{ AppID: d.AppID, ReleaseID: d.OldReleaseID, }); err != nil { log.Error("error scaling old formation to zero", "err", err) return ErrSkipRollback{err.Error()} } if expected.Count() > 0 { log.Info("waiting for job events", "expected", expected) if err := d.waitForJobEvents(d.OldReleaseID, expected, log); err != nil { log.Error("error waiting for job events", "err", err) return ErrSkipRollback{err.Error()} } } log.Info("finished all-at-once deployment") return nil }
func (d *DeployJob) deployAllAtOnce() error { log := d.logger.New("fn", "deployAllAtOnce") log.Info("starting all-at-once deployment") expected := make(ct.JobEvents) newProcs := make(map[string]int, len(d.Processes)) for typ, n := range d.Processes { // ignore processes which no longer exist in the new // release if _, ok := d.newRelease.Processes[typ]; !ok { continue } newProcs[typ] = n total := n if d.isOmni(typ) { total *= d.hostCount } existing := d.newReleaseState[typ] if total > existing { expected[typ] = ct.JobUpEvents(total - existing) } } if expected.Count() > 0 { log := log.New("release_id", d.NewReleaseID) log.Info("creating new formation", "processes", newProcs) if err := d.client.PutFormation(&ct.Formation{ AppID: d.AppID, ReleaseID: d.NewReleaseID, Processes: newProcs, }); err != nil { log.Error("error creating new formation", "err", err) return err } log.Info("waiting for job events", "expected", expected) if err := d.waitForJobEvents(d.NewReleaseID, expected, log); err != nil { log.Error("error waiting for job events", "err", err) return err } } expected = make(ct.JobEvents) for typ := range d.Processes { if existing := d.oldReleaseState[typ]; existing > 0 { expected[typ] = ct.JobDownEvents(existing) } } log = log.New("release_id", d.OldReleaseID) log.Info("scaling old formation to zero") if err := d.client.PutFormation(&ct.Formation{ AppID: d.AppID, ReleaseID: d.OldReleaseID, }); err != nil { // the new jobs have now started and they are up, so return // ErrSkipRollback (rolling back doesn't make a ton of sense // because it involves stopping the new working jobs). log.Error("error scaling old formation to zero", "err", err) return ErrSkipRollback{err.Error()} } // treat the deployment as finished now (rather than waiting for the // jobs to actually stop) as we can trust that the scheduler will // actually kill the jobs, so no need to delay the deployment. log.Info("finished all-at-once deployment") return nil }
func (d *DeployJob) deployOneByOneWithWaitFn(waitJobs WaitJobsFn) error { log := d.logger.New("fn", "deployOneByOne") log.Info("starting one-by-one deployment") oldScale := make(map[string]int, len(d.oldReleaseState)) for typ, count := range d.oldReleaseState { oldScale[typ] = count if d.isOmni(typ) { oldScale[typ] /= d.hostCount } } newScale := make(map[string]int, len(d.newReleaseState)) for typ, count := range d.newReleaseState { newScale[typ] = count if d.isOmni(typ) { newScale[typ] /= d.hostCount } } processTypes := make([]string, 0, len(d.Processes)) for typ := range d.Processes { processTypes = append(processTypes, typ) } sort.Sort(sort.StringSlice(processTypes)) olog := log.New("release_id", d.OldReleaseID) nlog := log.New("release_id", d.NewReleaseID) for _, typ := range processTypes { num := d.Processes[typ] diff := 1 if d.isOmni(typ) { diff = d.hostCount } for i := newScale[typ]; i < num; i++ { nlog.Info("scaling new formation up by one", "type", typ) newScale[typ]++ if err := d.client.PutFormation(&ct.Formation{ AppID: d.AppID, ReleaseID: d.NewReleaseID, Processes: newScale, }); err != nil { nlog.Error("error scaling new formation up by one", "type", typ, "err", err) return err } for i := 0; i < diff; i++ { d.deployEvents <- ct.DeploymentEvent{ ReleaseID: d.NewReleaseID, JobState: ct.JobStateStarting, JobType: typ, } } nlog.Info(fmt.Sprintf("waiting for %d job up event(s)", diff), "type", typ) if err := waitJobs(d.NewReleaseID, ct.JobEvents{typ: ct.JobUpEvents(diff)}, nlog); err != nil { nlog.Error("error waiting for job up events", "err", err) return err } olog.Info("scaling old formation down by one", "type", typ) oldScale[typ]-- if err := d.client.PutFormation(&ct.Formation{ AppID: d.AppID, ReleaseID: d.OldReleaseID, Processes: oldScale, }); err != nil { olog.Error("error scaling old formation down by one", "type", typ, "err", err) return err } for i := 0; i < diff; i++ { d.deployEvents <- ct.DeploymentEvent{ ReleaseID: d.OldReleaseID, JobState: ct.JobStateStopping, JobType: typ, } } olog.Info(fmt.Sprintf("waiting for %d job down event(s)", diff), "type", typ) if err := waitJobs(d.OldReleaseID, ct.JobEvents{typ: ct.JobDownEvents(diff)}, olog); err != nil { olog.Error("error waiting for job down events", "err", err) return err } } } // ensure any old leftover jobs are stopped (this can happen when new // workers continue deployments from old workers and still see the // old worker running even though it has been scaled down). log.Info("ensuring old formation is scaled down to zero") diff := make(ct.JobEvents, len(oldScale)) for typ, count := range oldScale { diff[typ] = ct.JobDownEvents(count) } if err := d.client.PutFormation(&ct.Formation{ AppID: d.AppID, ReleaseID: d.OldReleaseID, }); err != nil { log.Error("error scaling old formation down to zero", "err", err) return err } if diff.Count() > 0 { log.Info(fmt.Sprintf("waiting for %d job down event(s)", diff.Count())) if err := d.waitForJobEvents(d.OldReleaseID, diff, log); err != nil { log.Error("error waiting for job down events", "err", err) return err } } log.Info("finished one-by-one deployment") return nil }
func (d *DeployJob) deployAllAtOnce() error { log := d.logger.New("fn", "deployAllAtOnce") log.Info("starting all-at-once deployment") expected := make(ct.JobEvents) for typ, n := range d.Processes { total := n if d.isOmni(typ) { total *= d.hostCount } existing := d.newReleaseState[typ] for i := existing; i < total; i++ { d.deployEvents <- ct.DeploymentEvent{ ReleaseID: d.NewReleaseID, JobState: ct.JobStateStarting, JobType: typ, } } if total > existing { expected[typ] = ct.JobUpEvents(total - existing) } } if expected.Count() > 0 { log := log.New("release_id", d.NewReleaseID) log.Info("creating new formation", "processes", d.Processes) if err := d.client.PutFormation(&ct.Formation{ AppID: d.AppID, ReleaseID: d.NewReleaseID, Processes: d.Processes, }); err != nil { log.Error("error creating new formation", "err", err) return err } log.Info("waiting for job events", "expected", expected) if err := d.waitForJobEvents(d.NewReleaseID, expected, log); err != nil { log.Error("error waiting for job events", "err", err) return err } } expected = make(ct.JobEvents) for typ := range d.Processes { existing := d.oldReleaseState[typ] for i := 0; i < existing; i++ { d.deployEvents <- ct.DeploymentEvent{ ReleaseID: d.OldReleaseID, JobState: ct.JobStateStopping, JobType: typ, } } if existing > 0 { expected[typ] = ct.JobDownEvents(existing) } } log = log.New("release_id", d.OldReleaseID) log.Info("scaling old formation to zero") if err := d.client.PutFormation(&ct.Formation{ AppID: d.AppID, ReleaseID: d.OldReleaseID, }); err != nil { log.Error("error scaling old formation to zero", "err", err) return err } if expected.Count() > 0 { log.Info("waiting for job events", "expected", expected) if err := d.waitForJobEvents(d.OldReleaseID, expected, log); err != nil { log.Error("error waiting for job events", "err", err) // we have started the new jobs (and they are up) and requested that the old jobs stop. at this point // there's not much more we can do. Rolling back doesn't make a ton of sense because it involves // stopping the new (working) jobs. return ErrSkipRollback{err.Error()} } } log.Info("finished all-at-once deployment") return nil }
func (s *SchedulerSuite) TestGracefulShutdown(t *c.C) { app, release := s.createApp(t) client := s.controllerClient(t) debug(t, "scaling to blocker=1") watcher, err := client.WatchJobEvents(app.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"blocker": 1}, }), c.IsNil) var jobID string err = watcher.WaitFor(ct.JobEvents{"blocker": ct.JobUpEvents(1)}, scaleTimeout, func(job *ct.Job) error { jobID = job.ID return nil }) t.Assert(err, c.IsNil) jobs, err := s.discoverdClient(t).Instances("test-http-blocker", 10*time.Second) t.Assert(err, c.IsNil) t.Assert(jobs, c.HasLen, 1) jobAddr := jobs[0].Addr debug(t, "subscribing to backend events from all routers") routers, err := s.discoverdClient(t).Instances("router-api", 10*time.Second) t.Assert(err, c.IsNil) routerEvents := make(chan *router.StreamEvent) for _, r := range routers { events := make(chan *router.StreamEvent) stream, err := routerc.NewWithAddr(r.Addr).StreamEvents(&router.StreamEventsOptions{ EventTypes: []router.EventType{ router.EventTypeBackendUp, router.EventTypeBackendDown, router.EventTypeBackendDrained, }, }, events) t.Assert(err, c.IsNil) defer stream.Close() go func(router *discoverd.Instance) { for event := range events { if event.Backend != nil && event.Backend.JobID == jobID { debugf(t, "got %s router event from %s", event.Event, router.Host()) routerEvents <- event } } }(r) } debug(t, "adding HTTP route with backend drain enabled") route := &router.HTTPRoute{ Domain: random.String(32) + ".com", Service: "test-http-blocker", DrainBackends: true, } t.Assert(client.CreateRoute(app.ID, route.ToRoute()), c.IsNil) waitForRouterEvents := func(typ router.EventType) { debugf(t, "waiting for %d router %s events", len(routers), typ) count := 0 for { select { case event := <-routerEvents: if event.Event != typ { t.Fatal("expected %s router event, got %s", typ, event.Event) } count++ if count == len(routers) { return } case <-time.After(30 * time.Second): t.Fatalf("timed out waiting for router %s events", typ) } } } waitForRouterEvents(router.EventTypeBackendUp) debug(t, "making blocked HTTP request through each router") reqErrs := make(chan error) for _, router := range routers { req, err := http.NewRequest("GET", "http://"+router.Host()+"/block", nil) t.Assert(err, c.IsNil) req.Host = route.Domain res, err := http.DefaultClient.Do(req) t.Assert(err, c.IsNil) t.Assert(res.StatusCode, c.Equals, http.StatusOK) go func() { defer res.Body.Close() data, err := ioutil.ReadAll(res.Body) if err == nil && !bytes.Equal(data, []byte("done")) { err = fmt.Errorf("unexpected response: %q", data) } reqErrs <- err }() } debug(t, "scaling to blocker=0") t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"blocker": 0}, }), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"blocker": {ct.JobStateStopping: 1}}, scaleTimeout, nil), c.IsNil) waitForRouterEvents(router.EventTypeBackendDown) debug(t, "checking new HTTP requests return 503") for _, router := range routers { req, err := http.NewRequest("GET", "http://"+router.Host()+"/ping", nil) t.Assert(err, c.IsNil) req.Host = route.Domain res, err := http.DefaultClient.Do(req) t.Assert(err, c.IsNil) res.Body.Close() t.Assert(res.StatusCode, c.Equals, http.StatusServiceUnavailable) } debug(t, "checking blocked HTTP requests are still blocked") select { case err := <-reqErrs: t.Fatal(err) default: } debug(t, "unblocking HTTP requests") res, err := http.Get("http://" + jobAddr + "/unblock") t.Assert(err, c.IsNil) t.Assert(res.StatusCode, c.Equals, http.StatusOK) debug(t, "checking the blocked HTTP requests completed without error") for range routers { if err := <-reqErrs; err != nil { t.Fatal(err) } } waitForRouterEvents(router.EventTypeBackendDrained) debug(t, "waiting for the job to exit") t.Assert(watcher.WaitFor(ct.JobEvents{"blocker": ct.JobDownEvents(1)}, scaleTimeout, nil), c.IsNil) }
func (s *SchedulerSuite) TestDeployController(t *c.C) { // get the current controller release client := s.controllerClient(t) app, err := client.GetApp("controller") t.Assert(err, c.IsNil) release, err := client.GetAppRelease(app.ID) t.Assert(err, c.IsNil) // get the current controller formation formation, err := client.GetFormation(app.ID, release.ID) t.Assert(err, c.IsNil) // watch job events of the current release so we can wait for down // events later watcher, err := client.WatchJobEvents(app.Name, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() // create a controller deployment release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err := client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) events := make(chan *ct.DeploymentEvent) eventStream, err := client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) defer eventStream.Close() // wait for the deploy to complete (this doesn't wait for specific events // due to the fact that when the deployer deploys itself, some events will // not get sent) loop: for { select { case e, ok := <-events: if !ok { t.Fatal("unexpected close of deployment event stream") } debugf(t, "got deployment %s event", e.Status) switch e.Status { case "complete": break loop case "failed": t.Fatal("the deployment failed") } case <-time.After(time.Duration(app.DeployTimeout) * time.Second): t.Fatal("timed out waiting for the deploy to complete") } } // wait for the old release to be fully scaled down hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) t.Assert(hosts, c.Not(c.HasLen), 0) err = watcher.WaitFor(ct.JobEvents{ "web": ct.JobDownEvents(formation.Processes["web"]), "worker": ct.JobDownEvents(formation.Processes["worker"]), "scheduler": ct.JobDownEvents(len(hosts)), }, scaleTimeout, nil) t.Assert(err, c.IsNil) // check the correct controller jobs are running actual := make(map[string]map[string]int) for _, h := range hosts { jobs, err := h.ListJobs() t.Assert(err, c.IsNil) for _, job := range jobs { if job.Status != host.StatusRunning { continue } appID := job.Job.Metadata["flynn-controller.app"] if appID != app.ID { continue } releaseID := job.Job.Metadata["flynn-controller.release"] if _, ok := actual[releaseID]; !ok { actual[releaseID] = make(map[string]int) } typ := job.Job.Metadata["flynn-controller.type"] actual[releaseID][typ]++ } } expected := map[string]map[string]int{release.ID: { "web": formation.Processes["web"], "worker": formation.Processes["worker"], "scheduler": len(hosts), }} t.Assert(actual, c.DeepEquals, expected) }