func (s *HostSuite) TestAddFailingJob(t *c.C) { // get a host and watch events hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) t.Assert(hosts, c.Not(c.HasLen), 0) h := hosts[0] jobID := random.UUID() events := make(chan *host.Event) stream, err := h.StreamEvents(jobID, events) t.Assert(err, c.IsNil) defer stream.Close() // add a job with a non existent partition job := &host.Job{ ID: jobID, ImageArtifact: &host.Artifact{ Type: host.ArtifactTypeDocker, URI: "http://example.com?name=foo&id=bar", }, Partition: "nonexistent", } t.Assert(h.AddJob(job), c.IsNil) // check we get a create then error event actual := make(map[string]*host.Event, 2) loop: for { select { case e, ok := <-events: if !ok { t.Fatalf("job event stream closed unexpectedly: %s", stream.Err()) } if _, ok := actual[e.Event]; ok { t.Fatalf("unexpected event: %v", e) } actual[e.Event] = e if len(actual) >= 2 { break loop } case <-time.After(30 * time.Second): t.Fatal("timed out waiting for job event") } } t.Assert(actual[host.JobEventCreate], c.NotNil) e := actual[host.JobEventError] t.Assert(e, c.NotNil) t.Assert(e.Job, c.NotNil) t.Assert(e.Job.Error, c.NotNil) t.Assert(*e.Job.Error, c.Equals, `host: invalid job partition "nonexistent"`) }
func (s *DockerReceiveSuite) TestPushImage(t *c.C) { // build a Docker image repo := "docker-receive-test-push" s.buildDockerImage(t, repo, "RUN echo foo > /foo.txt") // subscribe to artifact events client := s.controllerClient(t) events := make(chan *ct.Event) stream, err := client.StreamEvents(ct.StreamEventsOptions{ ObjectTypes: []ct.EventType{ct.EventTypeArtifact}, }, events) t.Assert(err, c.IsNil) defer stream.Close() // push the Docker image to docker-receive u, err := url.Parse(s.clusterConf(t).DockerPushURL) t.Assert(err, c.IsNil) tag := fmt.Sprintf("%s/%s:latest", u.Host, repo) t.Assert(run(t, exec.Command("docker", "tag", "--force", repo, tag)), Succeeds) t.Assert(run(t, exec.Command("docker", "push", tag)), Succeeds) // wait for an artifact to be created var artifact ct.Artifact loop: for { select { case event, ok := <-events: if !ok { t.Fatalf("event stream closed unexpectedly: %s", stream.Err()) } t.Assert(json.Unmarshal(event.Data, &artifact), c.IsNil) if artifact.Meta["docker-receive.repository"] == repo { break loop } case <-time.After(30 * time.Second): t.Fatal("timed out waiting for artifact") } } // create a release with the Docker artifact app := &ct.App{} t.Assert(client.CreateApp(app), c.IsNil) release := &ct.Release{ArtifactIDs: []string{artifact.ID}} t.Assert(client.CreateRelease(release), c.IsNil) t.Assert(client.SetAppRelease(app.ID, release.ID), c.IsNil) // check running a job uses the image t.Assert(flynn(t, "/", "-a", app.ID, "run", "cat", "/foo.txt"), SuccessfulOutputContains, "foo") }
func (s *DeployerSuite) waitForDeploymentStatus(t *c.C, events chan *ct.DeploymentEvent, status string) *ct.DeploymentEvent { for { select { case event := <-events: // ignore pending status if event.Status == "pending" { continue } if event.Status != status { t.Fatalf("expected deploy %s event, got %s", status, event.Status) } return event case <-time.After(60 * time.Second): t.Fatalf("timed out waiting for deploy %s event", status) } } return nil }
func (s *DeployerSuite) createDeployment(t *c.C, process, strategy, service string) *ct.Deployment { app, release := s.createRelease(t, process, strategy) if service != "" { debugf(t, "waiting for 2 %s services", service) events := make(chan *discoverd.Event) stream, err := s.discoverdClient(t).Service(service).Watch(events) t.Assert(err, c.IsNil) defer stream.Close() count := 0 loop: for { select { case event, ok := <-events: if !ok { t.Fatalf("service discovery stream closed unexpectedly") } if event.Kind == discoverd.EventKindUp { if id, ok := event.Instance.Meta["FLYNN_RELEASE_ID"]; !ok || id != release.ID { continue } debugf(t, "got %s service up event", service) count++ } if count == 2 { // although the services are up, give them a few more seconds // to make sure the deployer will also see them as up. time.Sleep(5 * time.Second) break loop } case <-time.After(10 * time.Second): t.Fatalf("timed out waiting for %s service to come up", service) } } } // create a new release for the deployment release.ID = "" t.Assert(s.controllerClient(t).CreateRelease(release), c.IsNil) deployment, err := s.controllerClient(t).CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) return deployment }
func (s *SchedulerSuite) TestJobRestartBackoffPolicy(t *c.C) { startTimeout := 20 * time.Second app, release := s.createApp(t) watcher, err := s.controllerClient(t).WatchJobEvents(app.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(s.controllerClient(t).PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"printer": 1}, }), c.IsNil) var id string var assignId = func(j *ct.Job) error { debugf(t, "got job event: %s %s", j.ID, j.State) id = j.ID return nil } err = watcher.WaitFor(ct.JobEvents{"printer": {ct.JobStateUp: 1}}, scaleTimeout, assignId) t.Assert(err, c.IsNil) waitForRestart := func(duration time.Duration) { start := time.Now() s.stopJob(t, id) debugf(t, "expecting new job to start in %s", duration) err = watcher.WaitFor(ct.JobEvents{"printer": {ct.JobStateUp: 1}}, duration+startTimeout, assignId) t.Assert(err, c.IsNil) actual := time.Now().Sub(start) if actual < duration { t.Fatalf("expected new job to start after %s but started after %s", duration, actual) } } waitForRestart(0) waitForRestart(0) waitForRestart(0) waitForRestart(0) }
func (s *CLISuite) TestDockerExportImport(t *c.C) { // release via docker-receive client := s.controllerClient(t) app := &ct.App{Name: "cli-test-docker-export"} t.Assert(client.CreateApp(app), c.IsNil) repo := "cli-test-export" s.buildDockerImage(t, repo, `CMD ["/bin/pingserv"]`) t.Assert(flynn(t, "/", "-a", app.Name, "docker", "push", repo), Succeeds) t.Assert(flynn(t, "/", "-a", app.Name, "scale", "app=1"), Succeeds) defer flynn(t, "/", "-a", app.Name, "scale", "app=0") // grab the Flynn image layers release, err := client.GetAppRelease(app.ID) t.Assert(err, c.IsNil) artifact, err := client.GetArtifact(release.ArtifactIDs[0]) t.Assert(err, c.IsNil) layers := artifact.Manifest().Rootfs[0].Layers layerNames := make([]string, len(layers)) for i, layer := range layers { layerNames[i] = layer.ID + ".layer" } // check exporting to stdout works file := filepath.Join(t.MkDir(), "export.tar") cmd := exec.Command("sh", "-c", fmt.Sprintf("%s -a %s export > %s", args.CLI, app.Name, file)) cmd.Env = flynnEnv(flynnrc) var stderr bytes.Buffer cmd.Stderr = &stderr if args.Stream { cmd.Stderr = io.MultiWriter(os.Stderr, &stderr) } if err := cmd.Run(); err != nil { t.Fatalf("error exporting docker app to stdout: %s: %s", err, stderr.String()) } exportFiles := append([]string{ "app.json", "routes.json", "release.json", "artifacts.json", }, append(layerNames, "formation.json")...) assertExportContains(t, file, exportFiles...) // export the app directly to the file t.Assert(flynn(t, "/", "-a", app.Name, "export", "-f", file), Succeeds) assertExportContains(t, file, exportFiles...) // delete the image from the registry u, err := url.Parse(s.clusterConf(t).DockerPushURL) t.Assert(err, c.IsNil) uri := fmt.Sprintf("http://%s/v2/%s/manifests/%s", u.Host, app.Name, artifact.Meta["docker-receive.digest"]) req, err := http.NewRequest("DELETE", uri, nil) req.SetBasicAuth("", s.clusterConf(t).Key) t.Assert(err, c.IsNil) res, err := http.DefaultClient.Do(req) t.Assert(err, c.IsNil) res.Body.Close() // import to another app importApp := "cli-test-docker-import" t.Assert(flynn(t, "/", "import", "--name", importApp, "--file", file), Succeeds) defer flynn(t, "/", "-a", importApp, "scale", "app=0") // wait for it to start _, err = s.discoverdClient(t).Instances(importApp+"-web", 10*time.Second) t.Assert(err, c.IsNil) }
func (s *CLISuite) TestSlugReleaseGarbageCollection(t *c.C) { client := s.controllerClient(t) // create app with gc.max_inactive_slug_releases=3 maxInactiveSlugReleases := 3 app := &ct.App{Meta: map[string]string{"gc.max_inactive_slug_releases": strconv.Itoa(maxInactiveSlugReleases)}} t.Assert(client.CreateApp(app), c.IsNil) // create an image artifact imageArtifact := s.createArtifact(t, "test-apps") // create 5 slug artifacts tmp, err := ioutil.TempFile("", "squashfs-") t.Assert(err, c.IsNil) defer os.Remove(tmp.Name()) defer tmp.Close() t.Assert(exec.Command("mksquashfs", t.MkDir(), tmp.Name(), "-noappend").Run(), c.IsNil) slug, err := ioutil.ReadAll(tmp) t.Assert(err, c.IsNil) slugHash := sha512.Sum512(slug) slugs := []string{ "http://blobstore.discoverd/layer/1.squashfs", "http://blobstore.discoverd/layer/2.squashfs", "http://blobstore.discoverd/layer/3.squashfs", "http://blobstore.discoverd/layer/4.squashfs", "http://blobstore.discoverd/layer/5.squashfs", } slugArtifacts := make([]*ct.Artifact, len(slugs)) put := func(url string, data []byte) { req, err := http.NewRequest("PUT", url, bytes.NewReader(data)) t.Assert(err, c.IsNil) res, err := http.DefaultClient.Do(req) t.Assert(err, c.IsNil) res.Body.Close() t.Assert(res.StatusCode, c.Equals, http.StatusOK) } for i, layerURL := range slugs { manifest := &ct.ImageManifest{ Type: ct.ImageManifestTypeV1, Rootfs: []*ct.ImageRootfs{{ Layers: []*ct.ImageLayer{{ ID: strconv.Itoa(i + 1), Type: ct.ImageLayerTypeSquashfs, Length: int64(len(slug)), Hashes: map[string]string{"sha512": hex.EncodeToString(slugHash[:])}, }}, }}, } data := manifest.RawManifest() url := fmt.Sprintf("http://blobstore.discoverd/image/%s.json", manifest.ID()) put(url, data) put(layerURL, slug) artifact := &ct.Artifact{ Type: ct.ArtifactTypeFlynn, URI: url, Meta: map[string]string{"blobstore": "true"}, RawManifest: data, Hashes: manifest.Hashes(), Size: int64(len(data)), LayerURLTemplate: "http://blobstore.discoverd/layer/{id}.squashfs", } t.Assert(client.CreateArtifact(artifact), c.IsNil) slugArtifacts[i] = artifact } // create 6 releases, the second being scaled up and having the // same slug as the third (so prevents the slug being deleted) releases := make([]*ct.Release, 6) for i, r := range []struct { slug *ct.Artifact active bool }{ {slugArtifacts[0], false}, {slugArtifacts[1], true}, {slugArtifacts[1], false}, {slugArtifacts[2], false}, {slugArtifacts[3], false}, {slugArtifacts[4], false}, } { release := &ct.Release{ ArtifactIDs: []string{imageArtifact.ID, r.slug.ID}, Processes: map[string]ct.ProcessType{ "app": {Args: []string{"/bin/pingserv"}, Ports: []ct.Port{{Proto: "tcp"}}}, }, Meta: map[string]string{"git": "true"}, } t.Assert(client.CreateRelease(release), c.IsNil) procs := map[string]int{"app": 0} if r.active { procs["app"] = 1 } t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: procs, }), c.IsNil) releases[i] = release } // scale the last release so we can deploy it lastRelease := releases[len(releases)-1] watcher, err := client.WatchJobEvents(app.ID, lastRelease.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: lastRelease.ID, Processes: map[string]int{"app": 1}, }), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"app": ct.JobUpEvents(1)}, scaleTimeout, nil), c.IsNil) t.Assert(client.SetAppRelease(app.ID, lastRelease.ID), c.IsNil) // subscribe to garbage collection events gcEvents := make(chan *ct.Event) stream, err := client.StreamEvents(ct.StreamEventsOptions{ AppID: app.ID, ObjectTypes: []ct.EventType{ct.EventTypeAppGarbageCollection}, }, gcEvents) t.Assert(err, c.IsNil) defer stream.Close() // deploy a new release with the same slug as the last release timeoutCh := make(chan struct{}) time.AfterFunc(5*time.Minute, func() { close(timeoutCh) }) newRelease := *lastRelease newRelease.ID = "" t.Assert(client.CreateRelease(&newRelease), c.IsNil) t.Assert(client.DeployAppRelease(app.ID, newRelease.ID, timeoutCh), c.IsNil) // wait for garbage collection select { case event, ok := <-gcEvents: if !ok { t.Fatalf("event stream closed unexpectedly: %s", stream.Err()) } var e ct.AppGarbageCollectionEvent t.Assert(json.Unmarshal(event.Data, &e), c.IsNil) if e.Error != "" { t.Fatalf("garbage collection failed: %s", e.Error) } case <-time.After(60 * time.Second): t.Fatal("timed out waiting for garbage collection") } // check we have 4 distinct slug releases (so 5 in total, only 3 are // inactive) list, err := client.AppReleaseList(app.ID) t.Assert(err, c.IsNil) t.Assert(list, c.HasLen, maxInactiveSlugReleases+2) distinctSlugs := make(map[string]struct{}, len(list)) for _, release := range list { t.Assert(release.ArtifactIDs, c.HasLen, 2) distinctSlugs[release.ArtifactIDs[1]] = struct{}{} } t.Assert(distinctSlugs, c.HasLen, maxInactiveSlugReleases+1) // check the first and third releases got deleted, but the rest remain assertDeleted := func(release *ct.Release, deleted bool) { _, err := client.GetRelease(release.ID) if deleted { t.Assert(err, c.Equals, controller.ErrNotFound) } else { t.Assert(err, c.IsNil) } } assertDeleted(releases[0], true) assertDeleted(releases[1], false) assertDeleted(releases[2], true) assertDeleted(releases[3], false) assertDeleted(releases[4], false) assertDeleted(releases[5], false) assertDeleted(&newRelease, false) // check the first slug got deleted, but the rest remain s.assertURI(t, slugs[0], http.StatusNotFound) for i := 1; i < len(slugs); i++ { s.assertURI(t, slugs[i], http.StatusOK) } }
func (s *ReleaseSuite) TestReleaseImages(t *c.C) { if testCluster == nil { t.Skip("cannot boot release cluster") } // stream script output to t.Log logReader, logWriter := io.Pipe() defer logWriter.Close() go func() { buf := bufio.NewReader(logReader) for { line, err := buf.ReadString('\n') if err != nil { return } debug(t, line[0:len(line)-1]) } }() // boot the release cluster, release components to a blobstore and output the new images.json releaseCluster := s.addReleaseHosts(t) buildHost := releaseCluster.Instances[0] var imagesJSON bytes.Buffer var script bytes.Buffer slugImageID := random.UUID() releaseScript.Execute(&script, struct{ ControllerKey, SlugImageID string }{releaseCluster.ControllerKey, slugImageID}) t.Assert(buildHost.Run("bash -ex", &tc.Streams{Stdin: &script, Stdout: &imagesJSON, Stderr: logWriter}), c.IsNil) var images map[string]*ct.Artifact t.Assert(json.Unmarshal(imagesJSON.Bytes(), &images), c.IsNil) // install Flynn from the blobstore on the vanilla host blobstoreAddr := buildHost.IP + ":8080" installHost := releaseCluster.Instances[3] script.Reset() installScript.Execute(&script, map[string]string{"Blobstore": blobstoreAddr}) var installOutput bytes.Buffer out := io.MultiWriter(logWriter, &installOutput) t.Assert(installHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil) // check the flynn-host version is correct var hostVersion bytes.Buffer t.Assert(installHost.Run("flynn-host version", &tc.Streams{Stdout: &hostVersion}), c.IsNil) t.Assert(strings.TrimSpace(hostVersion.String()), c.Equals, "v20161108.0-test") // check rebuilt images were downloaded assertInstallOutput := func(format string, v ...interface{}) { expected := fmt.Sprintf(format, v...) if !strings.Contains(installOutput.String(), expected) { t.Fatalf(`expected install to output %q`, expected) } } for name, image := range images { assertInstallOutput("pulling %s image", name) for _, layer := range image.Manifest().Rootfs[0].Layers { assertInstallOutput("pulling %s layer %s", name, layer.ID) } } // installing on an instance with Flynn running should fail script.Reset() installScript.Execute(&script, map[string]string{"Blobstore": blobstoreAddr}) installOutput.Reset() err := buildHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}) if err == nil || !strings.Contains(installOutput.String(), "ERROR: Flynn is already installed.") { t.Fatal("expected Flynn install to fail but it didn't") } // create a controller client for the release cluster pin, err := base64.StdEncoding.DecodeString(releaseCluster.ControllerPin) t.Assert(err, c.IsNil) client, err := controller.NewClientWithConfig( "https://"+buildHost.IP, releaseCluster.ControllerKey, controller.Config{Pin: pin, Domain: releaseCluster.ControllerDomain}, ) t.Assert(err, c.IsNil) // deploy a slug based app + Redis resource slugApp := &ct.App{} t.Assert(client.CreateApp(slugApp), c.IsNil) gitreceive, err := client.GetAppRelease("gitreceive") t.Assert(err, c.IsNil) imageArtifact, err := client.GetArtifact(gitreceive.Env["SLUGRUNNER_IMAGE_ID"]) t.Assert(err, c.IsNil) slugArtifact, err := client.GetArtifact(slugImageID) t.Assert(err, c.IsNil) resource, err := client.ProvisionResource(&ct.ResourceReq{ProviderID: "redis", Apps: []string{slugApp.ID}}) t.Assert(err, c.IsNil) release := &ct.Release{ ArtifactIDs: []string{imageArtifact.ID, slugArtifact.ID}, Processes: map[string]ct.ProcessType{"web": {Args: []string{"/runner/init", "bin/http"}}}, Meta: map[string]string{"git": "true"}, Env: resource.Env, } t.Assert(client.CreateRelease(release), c.IsNil) t.Assert(client.SetAppRelease(slugApp.ID, release.ID), c.IsNil) watcher, err := client.WatchJobEvents(slugApp.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: slugApp.ID, ReleaseID: release.ID, Processes: map[string]int{"web": 1}, }), c.IsNil) err = watcher.WaitFor(ct.JobEvents{"web": {ct.JobStateUp: 1}}, scaleTimeout, nil) t.Assert(err, c.IsNil) // run a cluster update from the blobstore updateHost := releaseCluster.Instances[1] script.Reset() updateScript.Execute(&script, map[string]string{"Blobstore": blobstoreAddr, "Discoverd": updateHost.IP + ":1111"}) var updateOutput bytes.Buffer out = io.MultiWriter(logWriter, &updateOutput) t.Assert(updateHost.Run("bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil) // check rebuilt images were downloaded for name := range images { for _, host := range releaseCluster.Instances[0:2] { expected := fmt.Sprintf(`"pulling %s image" host=%s`, name, host.ID) if !strings.Contains(updateOutput.String(), expected) { t.Fatalf(`expected update to download %s on host %s`, name, host.ID) } } } assertImage := func(uri, image string) { t.Assert(uri, c.Equals, images[image].URI) } // check system apps were deployed correctly for _, app := range updater.SystemApps { if app.ImageOnly { continue // we don't deploy ImageOnly updates } debugf(t, "checking new %s release is using image %s", app.Name, images[app.Name].URI) expected := fmt.Sprintf(`"finished deploy of system app" name=%s`, app.Name) if !strings.Contains(updateOutput.String(), expected) { t.Fatalf(`expected update to deploy %s`, app.Name) } release, err := client.GetAppRelease(app.Name) t.Assert(err, c.IsNil) debugf(t, "new %s release ID: %s", app.Name, release.ID) artifact, err := client.GetArtifact(release.ArtifactIDs[0]) t.Assert(err, c.IsNil) debugf(t, "new %s artifact: %+v", app.Name, artifact) assertImage(artifact.URI, app.Name) } // check gitreceive has the correct slug env vars gitreceive, err = client.GetAppRelease("gitreceive") t.Assert(err, c.IsNil) for _, name := range []string{"slugbuilder", "slugrunner"} { artifact, err := client.GetArtifact(gitreceive.Env[strings.ToUpper(name)+"_IMAGE_ID"]) t.Assert(err, c.IsNil) assertImage(artifact.URI, name) } // check slug based app was deployed correctly release, err = client.GetAppRelease(slugApp.Name) t.Assert(err, c.IsNil) imageArtifact, err = client.GetArtifact(release.ArtifactIDs[0]) t.Assert(err, c.IsNil) assertImage(imageArtifact.URI, "slugrunner") // check Redis app was deployed correctly release, err = client.GetAppRelease(resource.Env["FLYNN_REDIS"]) t.Assert(err, c.IsNil) imageArtifact, err = client.GetArtifact(release.ArtifactIDs[0]) t.Assert(err, c.IsNil) assertImage(imageArtifact.URI, "redis") }
func (s *HostSuite) TestNotifyOOM(t *c.C) { appID := random.UUID() // subscribe to init log messages from the logaggregator client, err := logaggc.New("") t.Assert(err, c.IsNil) opts := logagg.LogOpts{ Follow: true, StreamTypes: []logagg.StreamType{logagg.StreamTypeInit}, } rc, err := client.GetLog(appID, &opts) t.Assert(err, c.IsNil) defer rc.Close() msgs := make(chan *logaggc.Message) stream := stream.New() defer stream.Close() go func() { defer close(msgs) dec := json.NewDecoder(rc) for { var msg logaggc.Message if err := dec.Decode(&msg); err != nil { stream.Error = err return } select { case msgs <- &msg: case <-stream.StopCh: return } } }() // run the OOM job cmd := exec.CommandUsingCluster( s.clusterClient(t), s.createArtifact(t, "test-apps"), "/bin/oom", ) cmd.Meta = map[string]string{"flynn-controller.app": appID} runErr := make(chan error) go func() { runErr <- cmd.Run() }() // wait for the OOM notification for { select { case err := <-runErr: t.Assert(err, c.IsNil) case msg, ok := <-msgs: if !ok { t.Fatalf("message stream closed unexpectedly: %s", stream.Err()) } t.Log(msg.Msg) if strings.Contains(msg.Msg, "FATAL: a container process was killed due to lack of available memory") { return } case <-time.After(30 * time.Second): t.Fatal("timed out waiting for OOM notification") } } }
func (s *ZDomainMigrationSuite) migrateDomain(t *c.C, dm *ct.DomainMigration) *ct.DomainMigration { debugf(t, "migrating domain from %s to %s", dm.OldDomain, dm.Domain) client := s.controllerClient(t) events := make(chan *ct.Event) stream, err := client.StreamEvents(ct.StreamEventsOptions{ ObjectTypes: []ct.EventType{ct.EventTypeDomainMigration}, }, events) t.Assert(err, c.IsNil) defer stream.Close() prevRouterRelease, err := client.GetAppRelease("router") t.Assert(err, c.IsNil) err = client.PutDomain(dm) t.Assert(err, c.IsNil) waitEvent := func(typ string, timeout time.Duration) (event ct.DomainMigrationEvent) { debugf(t, "waiting for %s domain migration event", typ) var e *ct.Event var ok bool select { case e, ok = <-events: if !ok { t.Fatalf("event stream closed unexpectedly: %s", stream.Err()) } debugf(t, "got %s domain migration event", typ) case <-time.After(timeout): t.Fatalf("timed out waiting for %s domain migration event", typ) } t.Assert(e.Data, c.NotNil) t.Assert(json.Unmarshal(e.Data, &event), c.IsNil) return } // created event := waitEvent("initial", 2*time.Minute) t.Assert(event.Error, c.Equals, "") t.Assert(event.DomainMigration, c.NotNil) t.Assert(event.DomainMigration.ID, c.Equals, dm.ID) t.Assert(event.DomainMigration.OldDomain, c.Equals, dm.OldDomain) t.Assert(event.DomainMigration.Domain, c.Equals, dm.Domain) t.Assert(event.DomainMigration.OldTLSCert, c.NotNil) t.Assert(event.DomainMigration.CreatedAt, c.NotNil) t.Assert(event.DomainMigration.CreatedAt.Equal(*dm.CreatedAt), c.Equals, true) t.Assert(event.DomainMigration.FinishedAt, c.IsNil) // complete event = waitEvent("final", 3*time.Minute) t.Assert(event.Error, c.Equals, "") t.Assert(event.DomainMigration, c.NotNil) t.Assert(event.DomainMigration.ID, c.Equals, dm.ID) t.Assert(event.DomainMigration.OldDomain, c.Equals, dm.OldDomain) t.Assert(event.DomainMigration.Domain, c.Equals, dm.Domain) t.Assert(event.DomainMigration.TLSCert, c.NotNil) t.Assert(event.DomainMigration.OldTLSCert, c.NotNil) t.Assert(event.DomainMigration.CreatedAt, c.NotNil) t.Assert(event.DomainMigration.CreatedAt.Equal(*dm.CreatedAt), c.Equals, true) t.Assert(event.DomainMigration.FinishedAt, c.NotNil) cert := event.DomainMigration.TLSCert controllerRelease, err := client.GetAppRelease("controller") t.Assert(err, c.IsNil) t.Assert(controllerRelease.Env["DEFAULT_ROUTE_DOMAIN"], c.Equals, dm.Domain) t.Assert(controllerRelease.Env["CA_CERT"], c.Equals, cert.CACert) routerRelease, err := client.GetAppRelease("router") t.Assert(err, c.IsNil) t.Assert(routerRelease.Env["TLSCERT"], c.Equals, cert.Cert) t.Assert(routerRelease.Env["TLSKEY"], c.Not(c.Equals), "") t.Assert(routerRelease.Env["TLSKEY"], c.Not(c.Equals), prevRouterRelease.Env["TLSKEY"]) dashboardRelease, err := client.GetAppRelease("dashboard") t.Assert(err, c.IsNil) t.Assert(dashboardRelease.Env["DEFAULT_ROUTE_DOMAIN"], c.Equals, dm.Domain) t.Assert(dashboardRelease.Env["CONTROLLER_DOMAIN"], c.Equals, fmt.Sprintf("controller.%s", dm.Domain)) t.Assert(dashboardRelease.Env["URL"], c.Equals, fmt.Sprintf("https://dashboard.%s", dm.Domain)) t.Assert(dashboardRelease.Env["CA_CERT"], c.Equals, cert.CACert) routes, err := client.RouteList("controller") t.Assert(err, c.IsNil) t.Assert(len(routes), c.Equals, 2) // one for both new and old domain var route *router.Route for _, r := range routes { if strings.HasSuffix(r.Domain, dm.Domain) { route = r break } } t.Assert(route, c.Not(c.IsNil)) t.Assert(route.Domain, c.Equals, fmt.Sprintf("controller.%s", dm.Domain)) t.Assert(route.Certificate.Cert, c.Equals, strings.TrimSuffix(cert.Cert, "\n")) var doPing func(string, int) doPing = func(component string, retriesRemaining int) { url := fmt.Sprintf("http://%s.%s/ping", component, dm.Domain) httpClient := &http.Client{Transport: &http.Transport{Dial: dialer.Retry.Dial}} res, err := httpClient.Get(url) if (err != nil || res.StatusCode != 200) && retriesRemaining > 0 { time.Sleep(100 * time.Millisecond) doPing(component, retriesRemaining-1) return } t.Assert(err, c.IsNil) t.Assert(res.StatusCode, c.Equals, 200, c.Commentf("failed to ping %s", component)) } doPing("controller", 3) doPing("dashboard", 3) return event.DomainMigration }
func (s *CLISuite) TestSlugReleaseGarbageCollection(t *c.C) { client := s.controllerClient(t) // create app with gc.max_inactive_slug_releases=3 maxInactiveSlugReleases := 3 app := &ct.App{Meta: map[string]string{"gc.max_inactive_slug_releases": strconv.Itoa(maxInactiveSlugReleases)}} t.Assert(client.CreateApp(app), c.IsNil) // create an image artifact imageArtifact := &ct.Artifact{Type: host.ArtifactTypeDocker, URI: imageURIs["test-apps"]} t.Assert(client.CreateArtifact(imageArtifact), c.IsNil) // create 5 slug artifacts var slug bytes.Buffer gz := gzip.NewWriter(&slug) t.Assert(tar.NewWriter(gz).Close(), c.IsNil) t.Assert(gz.Close(), c.IsNil) slugs := []string{ "http://blobstore.discoverd/1/slug.tgz", "http://blobstore.discoverd/2/slug.tgz", "http://blobstore.discoverd/3/slug.tgz", "http://blobstore.discoverd/4/slug.tgz", "http://blobstore.discoverd/5/slug.tgz", } slugArtifacts := make([]*ct.Artifact, len(slugs)) for i, uri := range slugs { req, err := http.NewRequest("PUT", uri, bytes.NewReader(slug.Bytes())) t.Assert(err, c.IsNil) res, err := http.DefaultClient.Do(req) t.Assert(err, c.IsNil) res.Body.Close() t.Assert(res.StatusCode, c.Equals, http.StatusOK) artifact := &ct.Artifact{ Type: host.ArtifactTypeFile, URI: uri, Meta: map[string]string{"blobstore": "true"}, } t.Assert(client.CreateArtifact(artifact), c.IsNil) slugArtifacts[i] = artifact } // create 6 releases, the second being scaled up and having the // same slug as the third (so prevents the slug being deleted) releases := make([]*ct.Release, 6) for i, r := range []struct { slug *ct.Artifact active bool }{ {slugArtifacts[0], false}, {slugArtifacts[1], true}, {slugArtifacts[1], false}, {slugArtifacts[2], false}, {slugArtifacts[3], false}, {slugArtifacts[4], false}, } { release := &ct.Release{ ArtifactIDs: []string{imageArtifact.ID, r.slug.ID}, Processes: map[string]ct.ProcessType{ "app": {Args: []string{"/bin/pingserv"}, Ports: []ct.Port{{Proto: "tcp"}}}, }, } t.Assert(client.CreateRelease(release), c.IsNil) procs := map[string]int{"app": 0} if r.active { procs["app"] = 1 } t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: procs, }), c.IsNil) releases[i] = release } // scale the last release so we can deploy it lastRelease := releases[len(releases)-1] watcher, err := client.WatchJobEvents(app.ID, lastRelease.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: lastRelease.ID, Processes: map[string]int{"app": 1}, }), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"app": ct.JobUpEvents(1)}, scaleTimeout, nil), c.IsNil) t.Assert(client.SetAppRelease(app.ID, lastRelease.ID), c.IsNil) // subscribe to garbage collection events gcEvents := make(chan *ct.Event) stream, err := client.StreamEvents(ct.StreamEventsOptions{ AppID: app.ID, ObjectTypes: []ct.EventType{ct.EventTypeAppGarbageCollection}, }, gcEvents) t.Assert(err, c.IsNil) defer stream.Close() // deploy a new release with the same slug as the last release timeoutCh := make(chan struct{}) time.AfterFunc(5*time.Minute, func() { close(timeoutCh) }) newRelease := *lastRelease newRelease.ID = "" t.Assert(client.CreateRelease(&newRelease), c.IsNil) t.Assert(client.DeployAppRelease(app.ID, newRelease.ID, timeoutCh), c.IsNil) // wait for garbage collection select { case event, ok := <-gcEvents: if !ok { t.Fatalf("event stream closed unexpectedly: %s", stream.Err()) } var e ct.AppGarbageCollectionEvent t.Assert(json.Unmarshal(event.Data, &e), c.IsNil) if e.Error != "" { t.Fatalf("garbage collection failed: %s", e.Error) } case <-time.After(60 * time.Second): t.Fatal("timed out waiting for garbage collection") } // check we have 4 distinct slug releases (so 5 in total, only 3 are // inactive) list, err := client.AppReleaseList(app.ID) t.Assert(err, c.IsNil) t.Assert(list, c.HasLen, maxInactiveSlugReleases+2) distinctSlugs := make(map[string]struct{}, len(list)) for _, release := range list { files := release.FileArtifactIDs() t.Assert(files, c.HasLen, 1) distinctSlugs[files[0]] = struct{}{} } t.Assert(distinctSlugs, c.HasLen, maxInactiveSlugReleases+1) // check the first and third releases got deleted, but the rest remain assertDeleted := func(release *ct.Release, deleted bool) { _, err := client.GetRelease(release.ID) if deleted { t.Assert(err, c.Equals, controller.ErrNotFound) } else { t.Assert(err, c.IsNil) } } assertDeleted(releases[0], true) assertDeleted(releases[1], false) assertDeleted(releases[2], true) assertDeleted(releases[3], false) assertDeleted(releases[4], false) assertDeleted(releases[5], false) assertDeleted(&newRelease, false) // check the first slug got deleted, but the rest remain s.assertURI(t, slugs[0], http.StatusNotFound) for i := 1; i < len(slugs); i++ { s.assertURI(t, slugs[i], http.StatusOK) } }
func testSireniaDeploy(client controller.Client, disc *discoverd.Client, t *c.C, d *sireniaDeploy) { // create app app := &ct.App{Name: d.name, Strategy: "sirenia"} t.Assert(client.CreateApp(app), c.IsNil) // copy release from default app release, err := client.GetAppRelease(d.db.appName) t.Assert(err, c.IsNil) release.ID = "" release.Env[d.db.hostKey] = fmt.Sprintf("leader.%s.discoverd", d.name) release.Env[d.db.serviceKey] = d.name procName := release.Env["SIRENIA_PROCESS"] proc := release.Processes[procName] delete(proc.Env, "SINGLETON") proc.Service = d.name release.Processes[procName] = proc t.Assert(client.CreateRelease(release), c.IsNil) t.Assert(client.SetAppRelease(app.ID, release.ID), c.IsNil) oldRelease := release.ID // create formation discEvents := make(chan *discoverd.Event) discService := disc.Service(d.name) discStream, err := discService.Watch(discEvents) t.Assert(err, c.IsNil) defer discStream.Close() jobEvents := make(chan *ct.Job) jobStream, err := client.StreamJobEvents(d.name, jobEvents) t.Assert(err, c.IsNil) defer jobStream.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{procName: d.sireniaJobs, "web": d.webJobs}, }), c.IsNil) // watch cluster state changes type stateChange struct { state *state.State err error } stateCh := make(chan stateChange) go func() { for event := range discEvents { if event.Kind != discoverd.EventKindServiceMeta { continue } var state state.State if err := json.Unmarshal(event.ServiceMeta.Data, &state); err != nil { stateCh <- stateChange{err: err} return } primary := "" if state.Primary != nil { primary = state.Primary.Addr } sync := "" if state.Sync != nil { sync = state.Sync.Addr } var async []string for _, a := range state.Async { async = append(async, a.Addr) } debugf(t, "got cluster state: index=%d primary=%s sync=%s async=%s", event.ServiceMeta.Index, primary, sync, strings.Join(async, ",")) stateCh <- stateChange{state: &state} } }() // wait for correct cluster state and number of web processes var sireniaState state.State var webJobs int ready := func() bool { if webJobs != d.webJobs { return false } if sireniaState.Primary == nil { return false } if d.sireniaJobs > 1 && sireniaState.Sync == nil { return false } if d.sireniaJobs > 2 && len(sireniaState.Async) != d.sireniaJobs-2 { return false } return true } for { if ready() { break } select { case s := <-stateCh: t.Assert(s.err, c.IsNil) sireniaState = *s.state case e, ok := <-jobEvents: if !ok { t.Fatalf("job event stream closed: %s", jobStream.Err()) } debugf(t, "got job event: %s %s %s", e.Type, e.ID, e.State) if e.Type == "web" && e.State == ct.JobStateUp { webJobs++ } case <-time.After(30 * time.Second): t.Fatal("timed out waiting for formation") } } // wait for the primary to indicate downstream replication sync debug(t, "waiting for primary to indicate downstream replication sync") sireniaClient := sc.NewClient(sireniaState.Primary.Addr) t.Assert(sireniaClient.WaitForReplSync(sireniaState.Sync, 1*time.Minute), c.IsNil) // connect to the db and run any initialisation required to later test writes debug(t, "initialising db") if d.db.initDb != nil { d.db.initDb(t, release, d) } // check currently writeable d.db.assertWriteable(t, release, d) // check a deploy completes with expected cluster state changes release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) newRelease := release.ID deployment, err := client.CreateDeployment(app.ID, newRelease) t.Assert(err, c.IsNil) deployEvents := make(chan *ct.DeploymentEvent) deployStream, err := client.StreamDeployment(deployment, deployEvents) t.Assert(err, c.IsNil) defer deployStream.Close() // assertNextState checks that the next state received is in the remaining states // that were expected, so handles the fact that some states don't happen, but the // states that do happen are expected and in-order. assertNextState := func(remaining []expectedSireniaState) int { var state state.State loop: for { select { case s := <-stateCh: t.Assert(s.err, c.IsNil) if len(s.state.Async) < d.expectedAsyncs() { // we shouldn't usually receive states with less asyncs than // expected, but they can occur as an intermediate state between // two expected states (e.g. when a sync does a takeover at the // same time as a new async is started) so just ignore them. debug(t, "ignoring state with too few asyncs") continue } state = *s.state break loop case <-time.After(60 * time.Second): t.Fatal("timed out waiting for cluster state") } } if state.Primary == nil { t.Fatal("no primary configured") } logf := func(format string, v ...interface{}) { debugf(t, "skipping expected state: %s", fmt.Sprintf(format, v...)) } outer: for i, expected := range remaining { if state.Primary.Meta["FLYNN_RELEASE_ID"] != expected.Primary { logf("primary has incorrect release") continue } if state.Sync == nil { if expected.Sync == "" { return i } logf("state has no sync node") continue } if state.Sync.Meta["FLYNN_RELEASE_ID"] != expected.Sync { logf("sync has incorrect release") continue } if state.Async == nil { if expected.Async == nil { return i } logf("state has no async nodes") continue } if len(state.Async) != len(expected.Async) { logf("expected %d asyncs, got %d", len(expected.Async), len(state.Async)) continue } for i, release := range expected.Async { if state.Async[i].Meta["FLYNN_RELEASE_ID"] != release { logf("async[%d] has incorrect release", i) continue outer } } return i } t.Fatal("unexpected state") return -1 } expected := d.expected(oldRelease, newRelease) var expectedIndex, newWebJobs int loop: for { select { case e, ok := <-deployEvents: if !ok { t.Fatal("unexpected close of deployment event stream") } switch e.Status { case "complete": break loop case "failed": t.Fatalf("deployment failed: %s", e.Error) } debugf(t, "got deployment event: %s %s", e.JobType, e.JobState) if e.JobState != ct.JobStateUp && e.JobState != ct.JobStateDown { continue } if e.JobType == procName { // move on if we have seen all the expected events if expectedIndex >= len(expected) { continue } skipped := assertNextState(expected[expectedIndex:]) expectedIndex += 1 + skipped } case e, ok := <-jobEvents: if !ok { t.Fatalf("unexpected close of job event stream: %s", jobStream.Err()) } debugf(t, "got job event: %s %s %s", e.Type, e.ID, e.State) if e.Type == "web" && e.State == ct.JobStateUp && e.ReleaseID == newRelease { newWebJobs++ } } } // check we have the correct number of new web jobs t.Assert(newWebJobs, c.Equals, d.webJobs) // check writeable now deploy is complete d.db.assertWriteable(t, release, d) }
func (s *SchedulerSuite) TestScaleTags(t *c.C) { // ensure we have more than 1 host to test with hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) if len(hosts) <= 1 { t.Skip("not enough hosts to test tagged based scheduling") } // stream the scheduler leader log so we can synchronize tag changes leader, err := s.discoverdClient(t).Service("controller-scheduler").Leader() t.Assert(err, c.IsNil) client := s.controllerClient(t) res, err := client.GetAppLog("controller", &ct.LogOpts{ Follow: true, JobID: leader.Meta["FLYNN_JOB_ID"], ProcessType: typeconv.StringPtr("scheduler"), Lines: typeconv.IntPtr(0), }) t.Assert(err, c.IsNil) defer res.Close() tagChange := make(chan struct{}) go func() { dec := json.NewDecoder(res) for { var msg logaggc.Message if err := dec.Decode(&msg); err != nil { return } if strings.Contains(msg.Msg, "host tags changed") { tagChange <- struct{}{} } } }() waitSchedulerTagChange := func() { select { case <-tagChange: return case <-time.After(10 * time.Second): t.Fatalf("timed out waiting for scheduler leader to see tag change") } } // watch service events so we can wait for tag changes events := make(chan *discoverd.Event) stream, err := s.discoverdClient(t).Service("flynn-host").Watch(events) t.Assert(err, c.IsNil) defer stream.Close() waitServiceEvent := func(kind discoverd.EventKind) *discoverd.Event { for { select { case event, ok := <-events: if !ok { t.Fatalf("service event stream closed unexpectedly: %s", stream.Err()) } if event.Kind == kind { return event } case <-time.After(10 * time.Second): t.Fatalf("timed out waiting for service %s event", kind) } } } // wait for the watch to be current before changing tags waitServiceEvent(discoverd.EventKindCurrent) updateTags := func(host *cluster.Host, tags map[string]string) { debugf(t, "setting host tags: %s => %v", host.ID(), tags) t.Assert(host.UpdateTags(tags), c.IsNil) event := waitServiceEvent(discoverd.EventKindUpdate) t.Assert(event.Instance.Meta["id"], c.Equals, host.ID()) for key, val := range tags { t.Assert(event.Instance.Meta["tag:"+key], c.Equals, val) } waitSchedulerTagChange() } // create an app with a tagged process and watch job events app, release := s.createApp(t) formation := &ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Tags: map[string]map[string]string{"printer": {"active": "true"}}, } watcher, err := client.WatchJobEvents(app.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() // add tag to host 1 host1 := hosts[0] updateTags(host1, map[string]string{"active": "true"}) // start jobs debug(t, "scaling printer=2") formation.Processes = map[string]int{"printer": 2} t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil) assertHostJobCounts := func(expected map[string]int) { jobs, err := client.JobList(app.ID) t.Assert(err, c.IsNil) actual := make(map[string]int) for _, job := range jobs { if job.State == ct.JobStateUp { actual[job.HostID]++ } } t.Assert(actual, c.DeepEquals, expected) } // check all jobs on host 1 assertHostJobCounts(map[string]int{host1.ID(): 2}) // add tag to host 2 host2 := hosts[1] updateTags(host2, map[string]string{"active": "true"}) // scale up debug(t, "scaling printer=4") formation.Processes["printer"] = 4 t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil) // check jobs distributed across hosts 1 and 2 assertHostJobCounts(map[string]int{host1.ID(): 2, host2.ID(): 2}) // remove tag from host 2 updateTags(host2, map[string]string{"active": ""}) // check jobs are moved to host1 jobEvents := ct.JobEvents{"printer": map[ct.JobState]int{ ct.JobStateDown: 2, ct.JobStateUp: 2, }} t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host1.ID(): 4}) // remove tag from host 1 updateTags(host1, map[string]string{"active": ""}) assertStateCounts := func(expected map[ct.JobState]int) { jobs, err := client.JobList(app.ID) t.Assert(err, c.IsNil) actual := make(map[ct.JobState]int) for _, job := range jobs { actual[job.State]++ } t.Assert(actual, c.DeepEquals, expected) } // check 4 pending jobs, rest are stopped t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil) assertStateCounts(map[ct.JobState]int{ct.JobStatePending: 4, ct.JobStateDown: 6}) // re-add tag to host 1 updateTags(host1, map[string]string{"active": "true"}) // check pending jobs are started on host 1 t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(4)}, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host1.ID(): 4}) assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 6}) // add different tag to host 2 updateTags(host2, map[string]string{"disk": "ssd"}) // update formation tags, check jobs are moved to host 2 debug(t, "updating formation tags to disk=ssd") formation.Tags["printer"] = map[string]string{"disk": "ssd"} t.Assert(client.PutFormation(formation), c.IsNil) jobEvents = ct.JobEvents{"printer": map[ct.JobState]int{ ct.JobStateDown: 4, ct.JobStateUp: 4, }} t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host2.ID(): 4}) assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 10}) // scale down stops the jobs debug(t, "scaling printer=0") formation.Processes = nil t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil) assertStateCounts(map[ct.JobState]int{ct.JobStateDown: 14}) }
func (s *SchedulerSuite) TestGracefulShutdown(t *c.C) { app, release := s.createApp(t) client := s.controllerClient(t) debug(t, "scaling to blocker=1") watcher, err := client.WatchJobEvents(app.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"blocker": 1}, }), c.IsNil) var jobID string err = watcher.WaitFor(ct.JobEvents{"blocker": ct.JobUpEvents(1)}, scaleTimeout, func(job *ct.Job) error { jobID = job.ID return nil }) t.Assert(err, c.IsNil) jobs, err := s.discoverdClient(t).Instances("test-http-blocker", 10*time.Second) t.Assert(err, c.IsNil) t.Assert(jobs, c.HasLen, 1) jobAddr := jobs[0].Addr debug(t, "subscribing to backend events from all routers") routers, err := s.discoverdClient(t).Instances("router-api", 10*time.Second) t.Assert(err, c.IsNil) routerEvents := make(chan *router.StreamEvent) for _, r := range routers { events := make(chan *router.StreamEvent) stream, err := routerc.NewWithAddr(r.Addr).StreamEvents(&router.StreamEventsOptions{ EventTypes: []router.EventType{ router.EventTypeBackendUp, router.EventTypeBackendDown, router.EventTypeBackendDrained, }, }, events) t.Assert(err, c.IsNil) defer stream.Close() go func(router *discoverd.Instance) { for event := range events { if event.Backend != nil && event.Backend.JobID == jobID { debugf(t, "got %s router event from %s", event.Event, router.Host()) routerEvents <- event } } }(r) } debug(t, "adding HTTP route with backend drain enabled") route := &router.HTTPRoute{ Domain: random.String(32) + ".com", Service: "test-http-blocker", DrainBackends: true, } t.Assert(client.CreateRoute(app.ID, route.ToRoute()), c.IsNil) waitForRouterEvents := func(typ router.EventType) { debugf(t, "waiting for %d router %s events", len(routers), typ) count := 0 for { select { case event := <-routerEvents: if event.Event != typ { t.Fatal("expected %s router event, got %s", typ, event.Event) } count++ if count == len(routers) { return } case <-time.After(30 * time.Second): t.Fatalf("timed out waiting for router %s events", typ) } } } waitForRouterEvents(router.EventTypeBackendUp) debug(t, "making blocked HTTP request through each router") reqErrs := make(chan error) for _, router := range routers { req, err := http.NewRequest("GET", "http://"+router.Host()+"/block", nil) t.Assert(err, c.IsNil) req.Host = route.Domain res, err := http.DefaultClient.Do(req) t.Assert(err, c.IsNil) t.Assert(res.StatusCode, c.Equals, http.StatusOK) go func() { defer res.Body.Close() data, err := ioutil.ReadAll(res.Body) if err == nil && !bytes.Equal(data, []byte("done")) { err = fmt.Errorf("unexpected response: %q", data) } reqErrs <- err }() } debug(t, "scaling to blocker=0") t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"blocker": 0}, }), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"blocker": {ct.JobStateStopping: 1}}, scaleTimeout, nil), c.IsNil) waitForRouterEvents(router.EventTypeBackendDown) debug(t, "checking new HTTP requests return 503") for _, router := range routers { req, err := http.NewRequest("GET", "http://"+router.Host()+"/ping", nil) t.Assert(err, c.IsNil) req.Host = route.Domain res, err := http.DefaultClient.Do(req) t.Assert(err, c.IsNil) res.Body.Close() t.Assert(res.StatusCode, c.Equals, http.StatusServiceUnavailable) } debug(t, "checking blocked HTTP requests are still blocked") select { case err := <-reqErrs: t.Fatal(err) default: } debug(t, "unblocking HTTP requests") res, err := http.Get("http://" + jobAddr + "/unblock") t.Assert(err, c.IsNil) t.Assert(res.StatusCode, c.Equals, http.StatusOK) debug(t, "checking the blocked HTTP requests completed without error") for range routers { if err := <-reqErrs; err != nil { t.Fatal(err) } } waitForRouterEvents(router.EventTypeBackendDrained) debug(t, "waiting for the job to exit") t.Assert(watcher.WaitFor(ct.JobEvents{"blocker": ct.JobDownEvents(1)}, scaleTimeout, nil), c.IsNil) }