func (s *DeployerSuite) TestOmniProcess(t *c.C) { if testCluster == nil { t.Skip("cannot determine test cluster size") } // create and scale an omni release omniScale := 2 totalJobs := omniScale * testCluster.Size() client := s.controllerClient(t) app, release := s.createApp(t) watcher, err := client.WatchJobEvents(app.Name, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"omni": omniScale}, }), c.IsNil) err = watcher.WaitFor(ct.JobEvents{"omni": {ct.JobStateUp: totalJobs}}, scaleTimeout, nil) t.Assert(err, c.IsNil) // deploy using all-at-once and check we get the correct events app.Strategy = "all-at-once" t.Assert(client.UpdateApp(app), c.IsNil) release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err := client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) events := make(chan *ct.DeploymentEvent) stream, err := client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) defer stream.Close() expected := make([]*ct.Job, 0, 3*totalJobs+1) appendEvents := func(releaseID string, state ct.JobState, count int) { for i := 0; i < count; i++ { expected = append(expected, &ct.Job{ ReleaseID: releaseID, Type: "omni", State: state, }) } } appendEvents(deployment.NewReleaseID, ct.JobStateUp, totalJobs) appendEvents(deployment.OldReleaseID, ct.JobStateDown, totalJobs) s.waitForDeploymentStatus(t, events, "complete") // deploy using one-by-one and check we get the correct events app.Strategy = "one-by-one" t.Assert(client.UpdateApp(app), c.IsNil) release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) // try creating the deployment multiple times to avoid getting a // "Cannot create deploy, one is already in progress" error (there // is no guarantee the previous deploy has finished yet) attempts := attempt.Strategy{Total: 10 * time.Second, Delay: 100 * time.Millisecond} err = attempts.Run(func() (err error) { deployment, err = client.CreateDeployment(app.ID, release.ID) return }) t.Assert(err, c.IsNil) events = make(chan *ct.DeploymentEvent) stream, err = client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) expected = make([]*ct.Job, 0, 4*totalJobs+1) appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size()) appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size()) s.waitForDeploymentStatus(t, events, "complete") }
func (s *SchedulerSuite) TestScaleTags(t *c.C) { // ensure we have more than 1 host to test with hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) if len(hosts) <= 1 { t.Skip("not enough hosts to test tagged based scheduling") } // watch service events so we can wait for tag changes events := make(chan *discoverd.Event) stream, err := s.discoverdClient(t).Service("flynn-host").Watch(events) t.Assert(err, c.IsNil) defer stream.Close() waitServiceEvent := func(kind discoverd.EventKind) *discoverd.Event { for { select { case event, ok := <-events: if !ok { t.Fatalf("service event stream closed unexpectedly: %s", stream.Err()) } if event.Kind == kind { return event } case <-time.After(10 * time.Second): t.Fatalf("timed out waiting for service %s event", kind) } } } // wait for the watch to be current before changing tags waitServiceEvent(discoverd.EventKindCurrent) updateTags := func(host *cluster.Host, tags map[string]string) { debugf(t, "setting host tags: %s => %v", host.ID(), tags) t.Assert(host.UpdateTags(tags), c.IsNil) event := waitServiceEvent(discoverd.EventKindUpdate) t.Assert(event.Instance.Meta["id"], c.Equals, host.ID()) for key, val := range tags { t.Assert(event.Instance.Meta["tag:"+key], c.Equals, val) } } // create an app with a tagged process and watch job events app, release := s.createApp(t) formation := &ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Tags: map[string]map[string]string{"printer": {"active": "true"}}, } client := s.controllerClient(t) watcher, err := client.WatchJobEvents(app.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() // add tag to host 1 host1 := hosts[0] updateTags(host1, map[string]string{"active": "true"}) // start jobs debug(t, "scaling printer=2") formation.Processes = map[string]int{"printer": 2} t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil) assertHostJobCounts := func(expected map[string]int) { jobs, err := client.JobList(app.ID) t.Assert(err, c.IsNil) actual := make(map[string]int) for _, job := range jobs { if job.State == ct.JobStateUp { actual[job.HostID]++ } } t.Assert(actual, c.DeepEquals, expected) } // check all jobs on host 1 assertHostJobCounts(map[string]int{host1.ID(): 2}) // add tag to host 2 host2 := hosts[1] updateTags(host2, map[string]string{"active": "true"}) // scale up debug(t, "scaling printer=4") formation.Processes["printer"] = 4 t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil) // check jobs distributed across hosts 1 and 2 assertHostJobCounts(map[string]int{host1.ID(): 2, host2.ID(): 2}) // remove tag from host 2 updateTags(host2, map[string]string{"active": ""}) // check jobs are moved to host1 jobEvents := ct.JobEvents{"printer": map[ct.JobState]int{ ct.JobStateDown: 2, ct.JobStateUp: 2, }} t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host1.ID(): 4}) // remove tag from host 1 updateTags(host1, map[string]string{"active": ""}) assertStateCounts := func(expected map[ct.JobState]int) { jobs, err := client.JobList(app.ID) t.Assert(err, c.IsNil) actual := make(map[ct.JobState]int) for _, job := range jobs { actual[job.State]++ } t.Assert(actual, c.DeepEquals, expected) } // check 4 pending jobs, rest are stopped t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil) assertStateCounts(map[ct.JobState]int{ct.JobStatePending: 4, ct.JobStateDown: 6}) // re-add tag to host 1 updateTags(host1, map[string]string{"active": "true"}) // check pending jobs are started on host 1 t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(4)}, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host1.ID(): 4}) assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 6}) // add different tag to host 2 updateTags(host2, map[string]string{"disk": "ssd"}) // update formation tags, check jobs are moved to host 2 debug(t, "updating formation tags to disk=ssd") formation.Tags["printer"] = map[string]string{"disk": "ssd"} t.Assert(client.PutFormation(formation), c.IsNil) jobEvents = ct.JobEvents{"printer": map[ct.JobState]int{ ct.JobStateDown: 4, ct.JobStateUp: 4, }} t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host2.ID(): 4}) assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 10}) // scale down stops the jobs debug(t, "scaling printer=0") formation.Processes = nil t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil) assertStateCounts(map[ct.JobState]int{ct.JobStateDown: 14}) }
func (s *SchedulerSuite) TestGracefulShutdown(t *c.C) { app, release := s.createApp(t) client := s.controllerClient(t) debug(t, "scaling to blocker=1") watcher, err := client.WatchJobEvents(app.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"blocker": 1}, }), c.IsNil) var jobID string err = watcher.WaitFor(ct.JobEvents{"blocker": ct.JobUpEvents(1)}, scaleTimeout, func(job *ct.Job) error { jobID = job.ID return nil }) t.Assert(err, c.IsNil) jobs, err := s.discoverdClient(t).Instances("test-http-blocker", 10*time.Second) t.Assert(err, c.IsNil) t.Assert(jobs, c.HasLen, 1) jobAddr := jobs[0].Addr debug(t, "subscribing to backend events from all routers") routers, err := s.discoverdClient(t).Instances("router-api", 10*time.Second) t.Assert(err, c.IsNil) routerEvents := make(chan *router.StreamEvent) for _, r := range routers { events := make(chan *router.StreamEvent) stream, err := routerc.NewWithAddr(r.Addr).StreamEvents(&router.StreamEventsOptions{ EventTypes: []router.EventType{ router.EventTypeBackendUp, router.EventTypeBackendDown, router.EventTypeBackendDrained, }, }, events) t.Assert(err, c.IsNil) defer stream.Close() go func(router *discoverd.Instance) { for event := range events { if event.Backend != nil && event.Backend.JobID == jobID { debugf(t, "got %s router event from %s", event.Event, router.Host()) routerEvents <- event } } }(r) } debug(t, "adding HTTP route with backend drain enabled") route := &router.HTTPRoute{ Domain: random.String(32) + ".com", Service: "test-http-blocker", DrainBackends: true, } t.Assert(client.CreateRoute(app.ID, route.ToRoute()), c.IsNil) waitForRouterEvents := func(typ router.EventType) { debugf(t, "waiting for %d router %s events", len(routers), typ) count := 0 for { select { case event := <-routerEvents: if event.Event != typ { t.Fatal("expected %s router event, got %s", typ, event.Event) } count++ if count == len(routers) { return } case <-time.After(30 * time.Second): t.Fatalf("timed out waiting for router %s events", typ) } } } waitForRouterEvents(router.EventTypeBackendUp) debug(t, "making blocked HTTP request through each router") reqErrs := make(chan error) for _, router := range routers { req, err := http.NewRequest("GET", "http://"+router.Host()+"/block", nil) t.Assert(err, c.IsNil) req.Host = route.Domain res, err := http.DefaultClient.Do(req) t.Assert(err, c.IsNil) t.Assert(res.StatusCode, c.Equals, http.StatusOK) go func() { defer res.Body.Close() data, err := ioutil.ReadAll(res.Body) if err == nil && !bytes.Equal(data, []byte("done")) { err = fmt.Errorf("unexpected response: %q", data) } reqErrs <- err }() } debug(t, "scaling to blocker=0") t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"blocker": 0}, }), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"blocker": {ct.JobStateStopping: 1}}, scaleTimeout, nil), c.IsNil) waitForRouterEvents(router.EventTypeBackendDown) debug(t, "checking new HTTP requests return 503") for _, router := range routers { req, err := http.NewRequest("GET", "http://"+router.Host()+"/ping", nil) t.Assert(err, c.IsNil) req.Host = route.Domain res, err := http.DefaultClient.Do(req) t.Assert(err, c.IsNil) res.Body.Close() t.Assert(res.StatusCode, c.Equals, http.StatusServiceUnavailable) } debug(t, "checking blocked HTTP requests are still blocked") select { case err := <-reqErrs: t.Fatal(err) default: } debug(t, "unblocking HTTP requests") res, err := http.Get("http://" + jobAddr + "/unblock") t.Assert(err, c.IsNil) t.Assert(res.StatusCode, c.Equals, http.StatusOK) debug(t, "checking the blocked HTTP requests completed without error") for range routers { if err := <-reqErrs; err != nil { t.Fatal(err) } } waitForRouterEvents(router.EventTypeBackendDrained) debug(t, "waiting for the job to exit") t.Assert(watcher.WaitFor(ct.JobEvents{"blocker": ct.JobDownEvents(1)}, scaleTimeout, nil), c.IsNil) }
func (s *DeployerSuite) TestOmniProcess(t *c.C) { if testCluster == nil { t.Skip("cannot determine test cluster size") } // create and scale an omni release omniScale := 2 totalJobs := omniScale * testCluster.Size() client := s.controllerClient(t) app, release := s.createApp(t) watcher, err := client.WatchJobEvents(app.Name, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"omni": omniScale}, }), c.IsNil) err = watcher.WaitFor(ct.JobEvents{"omni": {ct.JobStateUp: totalJobs}}, scaleTimeout, nil) t.Assert(err, c.IsNil) // deploy using all-at-once and check we get the correct events app.Strategy = "all-at-once" t.Assert(client.UpdateApp(app), c.IsNil) release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err := client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) events := make(chan *ct.DeploymentEvent) stream, err := client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) defer stream.Close() expected := make([]*ct.DeploymentEvent, 0, 4*totalJobs+1) appendEvents := func(releaseID string, state ct.JobState, count int) { for i := 0; i < count; i++ { event := &ct.DeploymentEvent{ ReleaseID: releaseID, JobType: "omni", JobState: state, Status: "running", } expected = append(expected, event) } } expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "pending"}) appendEvents(deployment.NewReleaseID, ct.JobStateStarting, totalJobs) appendEvents(deployment.NewReleaseID, ct.JobStateUp, totalJobs) appendEvents(deployment.OldReleaseID, ct.JobStateStopping, totalJobs) appendEvents(deployment.OldReleaseID, ct.JobStateDown, totalJobs) expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "complete"}) waitForDeploymentEvents(t, events, expected) // deploy using one-by-one and check we get the correct events app.Strategy = "one-by-one" t.Assert(client.UpdateApp(app), c.IsNil) release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err = client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) events = make(chan *ct.DeploymentEvent) stream, err = client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) expected = make([]*ct.DeploymentEvent, 0, 4*totalJobs+1) expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "pending"}) appendEvents(deployment.NewReleaseID, ct.JobStateStarting, testCluster.Size()) appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateStopping, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size()) appendEvents(deployment.NewReleaseID, ct.JobStateStarting, testCluster.Size()) appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateStopping, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size()) expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "complete"}) waitForDeploymentEvents(t, events, expected) }
func (s *SchedulerSuite) TestDeployController(t *c.C) { // get the current controller release client := s.controllerClient(t) app, err := client.GetApp("controller") t.Assert(err, c.IsNil) release, err := client.GetAppRelease(app.ID) t.Assert(err, c.IsNil) // get the current controller formation formation, err := client.GetFormation(app.ID, release.ID) t.Assert(err, c.IsNil) // watch job events of the current release so we can wait for down // events later watcher, err := client.WatchJobEvents(app.Name, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() // create a controller deployment release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err := client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) events := make(chan *ct.DeploymentEvent) eventStream, err := client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) defer eventStream.Close() // wait for the deploy to complete (this doesn't wait for specific events // due to the fact that when the deployer deploys itself, some events will // not get sent) loop: for { select { case e, ok := <-events: if !ok { t.Fatal("unexpected close of deployment event stream") } debugf(t, "got deployment %s event", e.Status) switch e.Status { case "complete": break loop case "failed": t.Fatal("the deployment failed") } case <-time.After(time.Duration(app.DeployTimeout) * time.Second): t.Fatal("timed out waiting for the deploy to complete") } } // wait for the old release to be fully scaled down hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) t.Assert(hosts, c.Not(c.HasLen), 0) err = watcher.WaitFor(ct.JobEvents{ "web": ct.JobDownEvents(formation.Processes["web"]), "worker": ct.JobDownEvents(formation.Processes["worker"]), "scheduler": ct.JobDownEvents(len(hosts)), }, scaleTimeout, nil) t.Assert(err, c.IsNil) // check the correct controller jobs are running actual := make(map[string]map[string]int) for _, h := range hosts { jobs, err := h.ListJobs() t.Assert(err, c.IsNil) for _, job := range jobs { if job.Status != host.StatusRunning { continue } appID := job.Job.Metadata["flynn-controller.app"] if appID != app.ID { continue } releaseID := job.Job.Metadata["flynn-controller.release"] if _, ok := actual[releaseID]; !ok { actual[releaseID] = make(map[string]int) } typ := job.Job.Metadata["flynn-controller.type"] actual[releaseID][typ]++ } } expected := map[string]map[string]int{release.ID: { "web": formation.Processes["web"], "worker": formation.Processes["worker"], "scheduler": len(hosts), }} t.Assert(actual, c.DeepEquals, expected) }