func (s *VolumeSuite) TestInterhostVolumeTransmitAPI(t *c.C) { hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) if len(hosts) < 2 { t.Skip("need multiple hosts for this test") } s.doVolumeTransmitAPI(hosts[0], hosts[1], t) }
func (s *BlobstoreSuite) TestBlobstoreBackendAzure(t *c.C) { s3Config := os.Getenv("BLOBSTORE_AZURE_CONFIG") if s3Config == "" { // BLOBSTORE_AZURE_CONFIG should be set to a valid configuration like: // backend=azure account_name=xxx account_key=xxx container=blobstore-ci t.Skip("missing BLOBSTORE_AZURE_CONFIG env var") } s.testBlobstoreBackend(t, "azure", ".+blob.core.windows.net.+", `"BACKEND_AZURE=$BLOBSTORE_AZURE_CONFIG"`) }
func (s *BlobstoreSuite) TestBlobstoreBackendS3(t *c.C) { s3Config := os.Getenv("BLOBSTORE_S3_CONFIG") if s3Config == "" { // BLOBSTORE_S3_CONFIG should be set to a valid configuration like: // backend=s3 access_key_id=xxx secret_access_key=xxx bucket=blobstore-ci region=us-east-1 t.Skip("missing BLOBSTORE_S3_CONFIG env var") } s.testBlobstoreBackend(t, "s3", ".+s3.amazonaws.com.+", `"BACKEND_S3=$BLOBSTORE_S3_CONFIG"`) }
func (s *ZDiscoverdSuite) TestPromoteDemote(t *c.C) { if testCluster == nil { t.Skip("cannot boot new hosts") } // ensure we have 3 node cluster, TODO(jpg): Support running test on anything larger than 2 node cluster hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) if len(hosts) != 3 { t.Skip("promotion and demotion tests require a 3 node cluster") } // Check the original number of peers is correct initialPeers, err := s.discoverdClient(t).RaftPeers() t.Assert(err, c.IsNil) t.Assert(len(initialPeers), c.Equals, 3) // Add a new host to the cluster, initially it will join as a proxy newHost := s.addHost(t, "discoverd") defer s.removeHost(t, newHost, "discoverd") // Ping the new node until it comes up url := "http://" + newHost.IP + ":1111" dd := discoverd.NewClientWithURL(url) err = pingAttempts.Run(func() error { return dd.Ping(url) }) t.Assert(err, c.IsNil) // Promote the new node to a Raft member err = dd.Promote(url) t.Assert(err, c.IsNil) // Check that we now have one additional peer, also ensure our new peer is in the list newPeers, err := s.discoverdClient(t).RaftPeers() t.Assert(err, c.IsNil) t.Assert(len(newPeers), c.Equals, 4) t.Assert(peerPresent(newHost, newPeers), c.Equals, true) // Now demote the newly promoted node err = dd.Demote(url) t.Assert(err, c.IsNil) //XXX(jpg): Better way to wait for leadership? time.Sleep(2 * time.Second) // We are going to ask the leader for the list of peers as it's definitely canonical leader, err := s.discoverdClient(t).RaftLeader() t.Assert(err, c.IsNil) dd = discoverd.NewClientWithURL(leader.Host) // There should now be only the original peers, additionally make sure our host isn't one of them finalPeers, err := dd.RaftPeers() t.Assert(err, c.IsNil) t.Assert(len(finalPeers), c.Equals, 3) t.Assert(peerPresent(newHost, finalPeers), c.Equals, false) }
func (s *BlobstoreSuite) TestBlobstoreBackendGCS(t *c.C) { gcsConfig := os.Getenv("BLOBSTORE_GCS_CONFIG") if gcsConfig == "" { // BLOBSTORE_S3_CONFIG should be set to a JSON-encoded Google Cloud // Service Account key that includes an extra field named "bucket" that // specifies the bucket to use t.Skip("missing BLOBSTORE_GCS_CONFIG env var") } var data struct{ Bucket string } err := json.Unmarshal([]byte(gcsConfig), &data) t.Assert(err, c.IsNil) s.testBlobstoreBackend(t, "gcs", ".+google.+", fmt.Sprintf(`"BACKEND_GCS=backend=gcs bucket=%s"`, data.Bucket), `"BACKEND_GCS_KEY=$BLOBSTORE_GCS_CONFIG"`) }
func (s *ZZBackupSuite) TestClusterBackups(t *c.C) { if args.BootConfig.BackupsDir == "" { t.Skip("--backups-dir not set") } backups, err := ioutil.ReadDir(args.BootConfig.BackupsDir) t.Assert(err, c.IsNil) if len(backups) == 0 { t.Fatal("backups dir is empty") } for i, backup := range backups { s.testClusterBackup(t, i, filepath.Join(args.BootConfig.BackupsDir, backup.Name())) } }
func (s *ZDiscoverdSuite) TestDeploy(t *c.C) { // ensure we have enough hosts in the cluster hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) if len(hosts) <= 1 { t.Skip("cannot deploy discoverd in a single node cluster") } client := s.controllerClient(t) app, err := client.GetApp("discoverd") t.Assert(err, c.IsNil) release, err := client.GetAppRelease(app.ID) t.Assert(err, c.IsNil) release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err := client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) events := make(chan *ct.DeploymentEvent) stream, err := client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) defer stream.Close() loop: for { select { case event, ok := <-events: if !ok { t.Fatal("unexpected close of deployment event stream") } if event.Status == "complete" { debugf(t, "got deployment event: %s", event.Status) break loop } if event.Status == "failed" { t.Fatal("the deployment failed") } debugf(t, "got deployment event: %s %s", event.JobType, event.JobState) case <-time.After(time.Duration(app.DeployTimeout) * time.Second): t.Fatal("timed out waiting for deployment event") } } }
func (s *SchedulerSuite) TestOmniJobs(t *c.C) { if testCluster == nil { t.Skip("cannot boot new hosts") } app, release := s.createApp(t) watcher, err := s.controllerClient(t).WatchJobEvents(app.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() formation := &ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: make(map[string]int), } current := make(map[string]int) updates := []map[string]int{ {"printer": 2}, {"printer": 3, "omni": 2}, {"printer": 1, "omni": 1}, } for _, procs := range updates { debugf(t, "scaling formation to %v", procs) formation.Processes = procs t.Assert(s.controllerClient(t).PutFormation(formation), c.IsNil) expected := s.controllerClient(t).ExpectedScalingEvents(current, procs, release.Processes, testCluster.Size()) err = watcher.WaitFor(expected, scaleTimeout, nil) t.Assert(err, c.IsNil) current = procs } // Check that new hosts get omni jobs newHosts := s.addHosts(t, 2, false, "router-api") defer s.removeHosts(t, newHosts, "router-api") err = watcher.WaitFor(ct.JobEvents{"omni": {ct.JobStateUp: 2}}, scaleTimeout, nil) t.Assert(err, c.IsNil) }
func (s *DeployerSuite) TestOmniProcess(t *c.C) { if testCluster == nil { t.Skip("cannot determine test cluster size") } // create and scale an omni release omniScale := 2 totalJobs := omniScale * testCluster.Size() client := s.controllerClient(t) app, release := s.createApp(t) watcher, err := client.WatchJobEvents(app.Name, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"omni": omniScale}, }), c.IsNil) err = watcher.WaitFor(ct.JobEvents{"omni": {ct.JobStateUp: totalJobs}}, scaleTimeout, nil) t.Assert(err, c.IsNil) // deploy using all-at-once and check we get the correct events app.Strategy = "all-at-once" t.Assert(client.UpdateApp(app), c.IsNil) release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err := client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) events := make(chan *ct.DeploymentEvent) stream, err := client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) defer stream.Close() expected := make([]*ct.Job, 0, 3*totalJobs+1) appendEvents := func(releaseID string, state ct.JobState, count int) { for i := 0; i < count; i++ { expected = append(expected, &ct.Job{ ReleaseID: releaseID, Type: "omni", State: state, }) } } appendEvents(deployment.NewReleaseID, ct.JobStateUp, totalJobs) appendEvents(deployment.OldReleaseID, ct.JobStateDown, totalJobs) s.waitForDeploymentStatus(t, events, "complete") // deploy using one-by-one and check we get the correct events app.Strategy = "one-by-one" t.Assert(client.UpdateApp(app), c.IsNil) release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) // try creating the deployment multiple times to avoid getting a // "Cannot create deploy, one is already in progress" error (there // is no guarantee the previous deploy has finished yet) attempts := attempt.Strategy{Total: 10 * time.Second, Delay: 100 * time.Millisecond} err = attempts.Run(func() (err error) { deployment, err = client.CreateDeployment(app.ID, release.ID) return }) t.Assert(err, c.IsNil) events = make(chan *ct.DeploymentEvent) stream, err = client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) expected = make([]*ct.Job, 0, 4*totalJobs+1) appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size()) appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size()) s.waitForDeploymentStatus(t, events, "complete") }
func (s *ReleaseSuite) TestReleaseImages(t *c.C) { if testCluster == nil { t.Skip("cannot boot release cluster") } // stream script output to t.Log logReader, logWriter := io.Pipe() defer logWriter.Close() go func() { buf := bufio.NewReader(logReader) for { line, err := buf.ReadString('\n') if err != nil { return } debug(t, line[0:len(line)-1]) } }() // boot the release cluster, release components to a blobstore and output the new images.json releaseCluster := s.addReleaseHosts(t) buildHost := releaseCluster.Instances[0] var imagesJSON bytes.Buffer var script bytes.Buffer slugImageID := random.UUID() releaseScript.Execute(&script, struct{ ControllerKey, SlugImageID string }{releaseCluster.ControllerKey, slugImageID}) t.Assert(buildHost.Run("bash -ex", &tc.Streams{Stdin: &script, Stdout: &imagesJSON, Stderr: logWriter}), c.IsNil) var images map[string]*ct.Artifact t.Assert(json.Unmarshal(imagesJSON.Bytes(), &images), c.IsNil) // install Flynn from the blobstore on the vanilla host blobstoreAddr := buildHost.IP + ":8080" installHost := releaseCluster.Instances[3] script.Reset() installScript.Execute(&script, map[string]string{"Blobstore": blobstoreAddr}) var installOutput bytes.Buffer out := io.MultiWriter(logWriter, &installOutput) t.Assert(installHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil) // check the flynn-host version is correct var hostVersion bytes.Buffer t.Assert(installHost.Run("flynn-host version", &tc.Streams{Stdout: &hostVersion}), c.IsNil) t.Assert(strings.TrimSpace(hostVersion.String()), c.Equals, "v20161108.0-test") // check rebuilt images were downloaded assertInstallOutput := func(format string, v ...interface{}) { expected := fmt.Sprintf(format, v...) if !strings.Contains(installOutput.String(), expected) { t.Fatalf(`expected install to output %q`, expected) } } for name, image := range images { assertInstallOutput("pulling %s image", name) for _, layer := range image.Manifest().Rootfs[0].Layers { assertInstallOutput("pulling %s layer %s", name, layer.ID) } } // installing on an instance with Flynn running should fail script.Reset() installScript.Execute(&script, map[string]string{"Blobstore": blobstoreAddr}) installOutput.Reset() err := buildHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}) if err == nil || !strings.Contains(installOutput.String(), "ERROR: Flynn is already installed.") { t.Fatal("expected Flynn install to fail but it didn't") } // create a controller client for the release cluster pin, err := base64.StdEncoding.DecodeString(releaseCluster.ControllerPin) t.Assert(err, c.IsNil) client, err := controller.NewClientWithConfig( "https://"+buildHost.IP, releaseCluster.ControllerKey, controller.Config{Pin: pin, Domain: releaseCluster.ControllerDomain}, ) t.Assert(err, c.IsNil) // deploy a slug based app + Redis resource slugApp := &ct.App{} t.Assert(client.CreateApp(slugApp), c.IsNil) gitreceive, err := client.GetAppRelease("gitreceive") t.Assert(err, c.IsNil) imageArtifact, err := client.GetArtifact(gitreceive.Env["SLUGRUNNER_IMAGE_ID"]) t.Assert(err, c.IsNil) slugArtifact, err := client.GetArtifact(slugImageID) t.Assert(err, c.IsNil) resource, err := client.ProvisionResource(&ct.ResourceReq{ProviderID: "redis", Apps: []string{slugApp.ID}}) t.Assert(err, c.IsNil) release := &ct.Release{ ArtifactIDs: []string{imageArtifact.ID, slugArtifact.ID}, Processes: map[string]ct.ProcessType{"web": {Args: []string{"/runner/init", "bin/http"}}}, Meta: map[string]string{"git": "true"}, Env: resource.Env, } t.Assert(client.CreateRelease(release), c.IsNil) t.Assert(client.SetAppRelease(slugApp.ID, release.ID), c.IsNil) watcher, err := client.WatchJobEvents(slugApp.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: slugApp.ID, ReleaseID: release.ID, Processes: map[string]int{"web": 1}, }), c.IsNil) err = watcher.WaitFor(ct.JobEvents{"web": {ct.JobStateUp: 1}}, scaleTimeout, nil) t.Assert(err, c.IsNil) // run a cluster update from the blobstore updateHost := releaseCluster.Instances[1] script.Reset() updateScript.Execute(&script, map[string]string{"Blobstore": blobstoreAddr, "Discoverd": updateHost.IP + ":1111"}) var updateOutput bytes.Buffer out = io.MultiWriter(logWriter, &updateOutput) t.Assert(updateHost.Run("bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil) // check rebuilt images were downloaded for name := range images { for _, host := range releaseCluster.Instances[0:2] { expected := fmt.Sprintf(`"pulling %s image" host=%s`, name, host.ID) if !strings.Contains(updateOutput.String(), expected) { t.Fatalf(`expected update to download %s on host %s`, name, host.ID) } } } assertImage := func(uri, image string) { t.Assert(uri, c.Equals, images[image].URI) } // check system apps were deployed correctly for _, app := range updater.SystemApps { if app.ImageOnly { continue // we don't deploy ImageOnly updates } debugf(t, "checking new %s release is using image %s", app.Name, images[app.Name].URI) expected := fmt.Sprintf(`"finished deploy of system app" name=%s`, app.Name) if !strings.Contains(updateOutput.String(), expected) { t.Fatalf(`expected update to deploy %s`, app.Name) } release, err := client.GetAppRelease(app.Name) t.Assert(err, c.IsNil) debugf(t, "new %s release ID: %s", app.Name, release.ID) artifact, err := client.GetArtifact(release.ArtifactIDs[0]) t.Assert(err, c.IsNil) debugf(t, "new %s artifact: %+v", app.Name, artifact) assertImage(artifact.URI, app.Name) } // check gitreceive has the correct slug env vars gitreceive, err = client.GetAppRelease("gitreceive") t.Assert(err, c.IsNil) for _, name := range []string{"slugbuilder", "slugrunner"} { artifact, err := client.GetArtifact(gitreceive.Env[strings.ToUpper(name)+"_IMAGE_ID"]) t.Assert(err, c.IsNil) assertImage(artifact.URI, name) } // check slug based app was deployed correctly release, err = client.GetAppRelease(slugApp.Name) t.Assert(err, c.IsNil) imageArtifact, err = client.GetArtifact(release.ArtifactIDs[0]) t.Assert(err, c.IsNil) assertImage(imageArtifact.URI, "slugrunner") // check Redis app was deployed correctly release, err = client.GetAppRelease(resource.Env["FLYNN_REDIS"]) t.Assert(err, c.IsNil) imageArtifact, err = client.GetArtifact(release.ArtifactIDs[0]) t.Assert(err, c.IsNil) assertImage(imageArtifact.URI, "redis") }
func (s *DeployerSuite) TestOmniProcess(t *c.C) { if testCluster == nil { t.Skip("cannot determine test cluster size") } // create and scale an omni release omniScale := 2 totalJobs := omniScale * testCluster.Size() client := s.controllerClient(t) app, release := s.createApp(t) watcher, err := client.WatchJobEvents(app.Name, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"omni": omniScale}, }), c.IsNil) err = watcher.WaitFor(ct.JobEvents{"omni": {ct.JobStateUp: totalJobs}}, scaleTimeout, nil) t.Assert(err, c.IsNil) // deploy using all-at-once and check we get the correct events app.Strategy = "all-at-once" t.Assert(client.UpdateApp(app), c.IsNil) release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err := client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) events := make(chan *ct.DeploymentEvent) stream, err := client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) defer stream.Close() expected := make([]*ct.DeploymentEvent, 0, 4*totalJobs+1) appendEvents := func(releaseID string, state ct.JobState, count int) { for i := 0; i < count; i++ { event := &ct.DeploymentEvent{ ReleaseID: releaseID, JobType: "omni", JobState: state, Status: "running", } expected = append(expected, event) } } expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "pending"}) appendEvents(deployment.NewReleaseID, ct.JobStateStarting, totalJobs) appendEvents(deployment.NewReleaseID, ct.JobStateUp, totalJobs) appendEvents(deployment.OldReleaseID, ct.JobStateStopping, totalJobs) appendEvents(deployment.OldReleaseID, ct.JobStateDown, totalJobs) expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "complete"}) waitForDeploymentEvents(t, events, expected) // deploy using one-by-one and check we get the correct events app.Strategy = "one-by-one" t.Assert(client.UpdateApp(app), c.IsNil) release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err = client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) events = make(chan *ct.DeploymentEvent) stream, err = client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) expected = make([]*ct.DeploymentEvent, 0, 4*totalJobs+1) expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "pending"}) appendEvents(deployment.NewReleaseID, ct.JobStateStarting, testCluster.Size()) appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateStopping, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size()) appendEvents(deployment.NewReleaseID, ct.JobStateStarting, testCluster.Size()) appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateStopping, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size()) expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "complete"}) waitForDeploymentEvents(t, events, expected) }
func (s *HostUpdateSuite) TestUpdateLogs(t *c.C) { if testCluster == nil { t.Skip("cannot boot new hosts") } instance := s.addHost(t, "router-api") defer s.removeHost(t, instance, "router-api") httpClient := &http.Client{Transport: &http.Transport{Dial: dialer.Retry.Dial}} client := cluster.NewHost(instance.ID, fmt.Sprintf("http://%s:1113", instance.IP), httpClient, nil) // start partial logger job cmd := exec.JobUsingHost( client, exec.DockerImage(imageURIs["test-apps"]), &host.Job{ Config: host.ContainerConfig{Args: []string{"/bin/partial-logger"}}, Metadata: map[string]string{ "flynn-controller.app": "partial-logger", }, }, ) t.Assert(cmd.Start(), c.IsNil) defer cmd.Kill() // wait for partial line _, err := s.discoverdClient(t).Instances("partial-logger", 10*time.Second) t.Assert(err, c.IsNil) // update flynn-host pid, err := client.Update("/usr/local/bin/flynn-host", "daemon", "--id", cmd.HostID) t.Assert(err, c.IsNil) // update the pid file so removeHost works t.Assert(instance.Run(fmt.Sprintf("echo -n %d | sudo tee /var/run/flynn-host.pid", pid), nil), c.IsNil) // stream the log from the logaggregator logc, err := logaggc.New("") t.Assert(err, c.IsNil) log, err := logc.GetLog("partial-logger", &logaggc.LogOpts{Follow: true}) t.Assert(err, c.IsNil) defer log.Close() msgs := make(chan *logaggc.Message) go func() { defer close(msgs) dec := json.NewDecoder(log) for { var msg logaggc.Message if err := dec.Decode(&msg); err != nil { debugf(t, "error decoding message: %s", err) return } msgs <- &msg } }() // finish logging t.Assert(client.SignalJob(cmd.Job.ID, int(syscall.SIGUSR1)), c.IsNil) // check we get a single log line for { select { case msg, ok := <-msgs: if !ok { t.Fatal("error getting log") } if msg.Stream == "stdout" { t.Assert(msg.Msg, c.Equals, "hello world") return } case <-time.After(10 * time.Second): t.Fatal("timed out waiting for log") } } }
func (s *SchedulerSuite) TestScaleTags(t *c.C) { // ensure we have more than 1 host to test with hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) if len(hosts) <= 1 { t.Skip("not enough hosts to test tagged based scheduling") } // stream the scheduler leader log so we can synchronize tag changes leader, err := s.discoverdClient(t).Service("controller-scheduler").Leader() t.Assert(err, c.IsNil) client := s.controllerClient(t) res, err := client.GetAppLog("controller", &ct.LogOpts{ Follow: true, JobID: leader.Meta["FLYNN_JOB_ID"], ProcessType: typeconv.StringPtr("scheduler"), Lines: typeconv.IntPtr(0), }) t.Assert(err, c.IsNil) defer res.Close() tagChange := make(chan struct{}) go func() { dec := json.NewDecoder(res) for { var msg logaggc.Message if err := dec.Decode(&msg); err != nil { return } if strings.Contains(msg.Msg, "host tags changed") { tagChange <- struct{}{} } } }() waitSchedulerTagChange := func() { select { case <-tagChange: return case <-time.After(10 * time.Second): t.Fatalf("timed out waiting for scheduler leader to see tag change") } } // watch service events so we can wait for tag changes events := make(chan *discoverd.Event) stream, err := s.discoverdClient(t).Service("flynn-host").Watch(events) t.Assert(err, c.IsNil) defer stream.Close() waitServiceEvent := func(kind discoverd.EventKind) *discoverd.Event { for { select { case event, ok := <-events: if !ok { t.Fatalf("service event stream closed unexpectedly: %s", stream.Err()) } if event.Kind == kind { return event } case <-time.After(10 * time.Second): t.Fatalf("timed out waiting for service %s event", kind) } } } // wait for the watch to be current before changing tags waitServiceEvent(discoverd.EventKindCurrent) updateTags := func(host *cluster.Host, tags map[string]string) { debugf(t, "setting host tags: %s => %v", host.ID(), tags) t.Assert(host.UpdateTags(tags), c.IsNil) event := waitServiceEvent(discoverd.EventKindUpdate) t.Assert(event.Instance.Meta["id"], c.Equals, host.ID()) for key, val := range tags { t.Assert(event.Instance.Meta["tag:"+key], c.Equals, val) } waitSchedulerTagChange() } // create an app with a tagged process and watch job events app, release := s.createApp(t) formation := &ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Tags: map[string]map[string]string{"printer": {"active": "true"}}, } watcher, err := client.WatchJobEvents(app.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() // add tag to host 1 host1 := hosts[0] updateTags(host1, map[string]string{"active": "true"}) // start jobs debug(t, "scaling printer=2") formation.Processes = map[string]int{"printer": 2} t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil) assertHostJobCounts := func(expected map[string]int) { jobs, err := client.JobList(app.ID) t.Assert(err, c.IsNil) actual := make(map[string]int) for _, job := range jobs { if job.State == ct.JobStateUp { actual[job.HostID]++ } } t.Assert(actual, c.DeepEquals, expected) } // check all jobs on host 1 assertHostJobCounts(map[string]int{host1.ID(): 2}) // add tag to host 2 host2 := hosts[1] updateTags(host2, map[string]string{"active": "true"}) // scale up debug(t, "scaling printer=4") formation.Processes["printer"] = 4 t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil) // check jobs distributed across hosts 1 and 2 assertHostJobCounts(map[string]int{host1.ID(): 2, host2.ID(): 2}) // remove tag from host 2 updateTags(host2, map[string]string{"active": ""}) // check jobs are moved to host1 jobEvents := ct.JobEvents{"printer": map[ct.JobState]int{ ct.JobStateDown: 2, ct.JobStateUp: 2, }} t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host1.ID(): 4}) // remove tag from host 1 updateTags(host1, map[string]string{"active": ""}) assertStateCounts := func(expected map[ct.JobState]int) { jobs, err := client.JobList(app.ID) t.Assert(err, c.IsNil) actual := make(map[ct.JobState]int) for _, job := range jobs { actual[job.State]++ } t.Assert(actual, c.DeepEquals, expected) } // check 4 pending jobs, rest are stopped t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil) assertStateCounts(map[ct.JobState]int{ct.JobStatePending: 4, ct.JobStateDown: 6}) // re-add tag to host 1 updateTags(host1, map[string]string{"active": "true"}) // check pending jobs are started on host 1 t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(4)}, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host1.ID(): 4}) assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 6}) // add different tag to host 2 updateTags(host2, map[string]string{"disk": "ssd"}) // update formation tags, check jobs are moved to host 2 debug(t, "updating formation tags to disk=ssd") formation.Tags["printer"] = map[string]string{"disk": "ssd"} t.Assert(client.PutFormation(formation), c.IsNil) jobEvents = ct.JobEvents{"printer": map[ct.JobState]int{ ct.JobStateDown: 4, ct.JobStateUp: 4, }} t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host2.ID(): 4}) assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 10}) // scale down stops the jobs debug(t, "scaling printer=0") formation.Processes = nil t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil) assertStateCounts(map[ct.JobState]int{ct.JobStateDown: 14}) }