Ejemplo n.º 1
0
func (s *HostSuite) TestAddFailingJob(t *c.C) {
	// get a host and watch events
	hosts, err := s.clusterClient(t).Hosts()
	t.Assert(err, c.IsNil)
	t.Assert(hosts, c.Not(c.HasLen), 0)
	h := hosts[0]
	jobID := random.UUID()
	events := make(chan *host.Event)
	stream, err := h.StreamEvents(jobID, events)
	t.Assert(err, c.IsNil)
	defer stream.Close()

	// add a job with a non existent partition
	job := &host.Job{
		ID: jobID,
		ImageArtifact: &host.Artifact{
			Type: host.ArtifactTypeDocker,
			URI:  "http://example.com?name=foo&id=bar",
		},
		Partition: "nonexistent",
	}
	t.Assert(h.AddJob(job), c.IsNil)

	// check we get a create then error event
	actual := make(map[string]*host.Event, 2)
loop:
	for {
		select {
		case e, ok := <-events:
			if !ok {
				t.Fatalf("job event stream closed unexpectedly: %s", stream.Err())
			}
			if _, ok := actual[e.Event]; ok {
				t.Fatalf("unexpected event: %v", e)
			}
			actual[e.Event] = e
			if len(actual) >= 2 {
				break loop
			}
		case <-time.After(30 * time.Second):
			t.Fatal("timed out waiting for job event")
		}
	}
	t.Assert(actual[host.JobEventCreate], c.NotNil)
	e := actual[host.JobEventError]
	t.Assert(e, c.NotNil)
	t.Assert(e.Job, c.NotNil)
	t.Assert(e.Job.Error, c.NotNil)
	t.Assert(*e.Job.Error, c.Equals, `host: invalid job partition "nonexistent"`)
}
Ejemplo n.º 2
0
func (s *DockerReceiveSuite) TestPushImage(t *c.C) {
	// build a Docker image
	repo := "docker-receive-test-push"
	s.buildDockerImage(t, repo, "RUN echo foo > /foo.txt")

	// subscribe to artifact events
	client := s.controllerClient(t)
	events := make(chan *ct.Event)
	stream, err := client.StreamEvents(ct.StreamEventsOptions{
		ObjectTypes: []ct.EventType{ct.EventTypeArtifact},
	}, events)
	t.Assert(err, c.IsNil)
	defer stream.Close()

	// push the Docker image to docker-receive
	u, err := url.Parse(s.clusterConf(t).DockerPushURL)
	t.Assert(err, c.IsNil)
	tag := fmt.Sprintf("%s/%s:latest", u.Host, repo)
	t.Assert(run(t, exec.Command("docker", "tag", "--force", repo, tag)), Succeeds)
	t.Assert(run(t, exec.Command("docker", "push", tag)), Succeeds)

	// wait for an artifact to be created
	var artifact ct.Artifact
loop:
	for {
		select {
		case event, ok := <-events:
			if !ok {
				t.Fatalf("event stream closed unexpectedly: %s", stream.Err())
			}
			t.Assert(json.Unmarshal(event.Data, &artifact), c.IsNil)
			if artifact.Meta["docker-receive.repository"] == repo {
				break loop
			}
		case <-time.After(30 * time.Second):
			t.Fatal("timed out waiting for artifact")
		}
	}

	// create a release with the Docker artifact
	app := &ct.App{}
	t.Assert(client.CreateApp(app), c.IsNil)
	release := &ct.Release{ArtifactIDs: []string{artifact.ID}}
	t.Assert(client.CreateRelease(release), c.IsNil)
	t.Assert(client.SetAppRelease(app.ID, release.ID), c.IsNil)

	// check running a job uses the image
	t.Assert(flynn(t, "/", "-a", app.ID, "run", "cat", "/foo.txt"), SuccessfulOutputContains, "foo")
}
Ejemplo n.º 3
0
func (s *DeployerSuite) waitForDeploymentStatus(t *c.C, events chan *ct.DeploymentEvent, status string) *ct.DeploymentEvent {
	for {
		select {
		case event := <-events:
			// ignore pending status
			if event.Status == "pending" {
				continue
			}
			if event.Status != status {
				t.Fatalf("expected deploy %s event, got %s", status, event.Status)
			}
			return event
		case <-time.After(60 * time.Second):
			t.Fatalf("timed out waiting for deploy %s event", status)
		}
	}
	return nil
}
Ejemplo n.º 4
0
func (s *DeployerSuite) createDeployment(t *c.C, process, strategy, service string) *ct.Deployment {
	app, release := s.createRelease(t, process, strategy)

	if service != "" {
		debugf(t, "waiting for 2 %s services", service)
		events := make(chan *discoverd.Event)
		stream, err := s.discoverdClient(t).Service(service).Watch(events)
		t.Assert(err, c.IsNil)
		defer stream.Close()
		count := 0
	loop:
		for {
			select {
			case event, ok := <-events:
				if !ok {
					t.Fatalf("service discovery stream closed unexpectedly")
				}
				if event.Kind == discoverd.EventKindUp {
					if id, ok := event.Instance.Meta["FLYNN_RELEASE_ID"]; !ok || id != release.ID {
						continue
					}
					debugf(t, "got %s service up event", service)
					count++
				}
				if count == 2 {
					// although the services are up, give them a few more seconds
					// to make sure the deployer will also see them as up.
					time.Sleep(5 * time.Second)
					break loop
				}
			case <-time.After(10 * time.Second):
				t.Fatalf("timed out waiting for %s service to come up", service)
			}
		}
	}

	// create a new release for the deployment
	release.ID = ""
	t.Assert(s.controllerClient(t).CreateRelease(release), c.IsNil)

	deployment, err := s.controllerClient(t).CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	return deployment
}
Ejemplo n.º 5
0
func (s *SchedulerSuite) TestJobRestartBackoffPolicy(t *c.C) {
	startTimeout := 20 * time.Second

	app, release := s.createApp(t)

	watcher, err := s.controllerClient(t).WatchJobEvents(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	t.Assert(s.controllerClient(t).PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"printer": 1},
	}), c.IsNil)
	var id string
	var assignId = func(j *ct.Job) error {
		debugf(t, "got job event: %s %s", j.ID, j.State)
		id = j.ID
		return nil
	}
	err = watcher.WaitFor(ct.JobEvents{"printer": {ct.JobStateUp: 1}}, scaleTimeout, assignId)
	t.Assert(err, c.IsNil)

	waitForRestart := func(duration time.Duration) {
		start := time.Now()
		s.stopJob(t, id)
		debugf(t, "expecting new job to start in %s", duration)
		err = watcher.WaitFor(ct.JobEvents{"printer": {ct.JobStateUp: 1}}, duration+startTimeout, assignId)
		t.Assert(err, c.IsNil)
		actual := time.Now().Sub(start)
		if actual < duration {
			t.Fatalf("expected new job to start after %s but started after %s", duration, actual)
		}
	}

	waitForRestart(0)
	waitForRestart(0)
	waitForRestart(0)
	waitForRestart(0)
}
Ejemplo n.º 6
0
func (s *CLISuite) TestDockerExportImport(t *c.C) {
	// release via docker-receive
	client := s.controllerClient(t)
	app := &ct.App{Name: "cli-test-docker-export"}
	t.Assert(client.CreateApp(app), c.IsNil)
	repo := "cli-test-export"
	s.buildDockerImage(t, repo, `CMD ["/bin/pingserv"]`)
	t.Assert(flynn(t, "/", "-a", app.Name, "docker", "push", repo), Succeeds)
	t.Assert(flynn(t, "/", "-a", app.Name, "scale", "app=1"), Succeeds)
	defer flynn(t, "/", "-a", app.Name, "scale", "app=0")

	// grab the Flynn image layers
	release, err := client.GetAppRelease(app.ID)
	t.Assert(err, c.IsNil)
	artifact, err := client.GetArtifact(release.ArtifactIDs[0])
	t.Assert(err, c.IsNil)
	layers := artifact.Manifest().Rootfs[0].Layers
	layerNames := make([]string, len(layers))
	for i, layer := range layers {
		layerNames[i] = layer.ID + ".layer"
	}

	// check exporting to stdout works
	file := filepath.Join(t.MkDir(), "export.tar")
	cmd := exec.Command("sh", "-c", fmt.Sprintf("%s -a %s export > %s", args.CLI, app.Name, file))
	cmd.Env = flynnEnv(flynnrc)
	var stderr bytes.Buffer
	cmd.Stderr = &stderr
	if args.Stream {
		cmd.Stderr = io.MultiWriter(os.Stderr, &stderr)
	}
	if err := cmd.Run(); err != nil {
		t.Fatalf("error exporting docker app to stdout: %s: %s", err, stderr.String())
	}
	exportFiles := append([]string{
		"app.json", "routes.json", "release.json", "artifacts.json",
	}, append(layerNames, "formation.json")...)
	assertExportContains(t, file, exportFiles...)

	// export the app directly to the file
	t.Assert(flynn(t, "/", "-a", app.Name, "export", "-f", file), Succeeds)
	assertExportContains(t, file, exportFiles...)

	// delete the image from the registry
	u, err := url.Parse(s.clusterConf(t).DockerPushURL)
	t.Assert(err, c.IsNil)
	uri := fmt.Sprintf("http://%s/v2/%s/manifests/%s", u.Host, app.Name, artifact.Meta["docker-receive.digest"])
	req, err := http.NewRequest("DELETE", uri, nil)
	req.SetBasicAuth("", s.clusterConf(t).Key)
	t.Assert(err, c.IsNil)
	res, err := http.DefaultClient.Do(req)
	t.Assert(err, c.IsNil)
	res.Body.Close()

	// import to another app
	importApp := "cli-test-docker-import"
	t.Assert(flynn(t, "/", "import", "--name", importApp, "--file", file), Succeeds)
	defer flynn(t, "/", "-a", importApp, "scale", "app=0")

	// wait for it to start
	_, err = s.discoverdClient(t).Instances(importApp+"-web", 10*time.Second)
	t.Assert(err, c.IsNil)
}
Ejemplo n.º 7
0
func (s *CLISuite) TestSlugReleaseGarbageCollection(t *c.C) {
	client := s.controllerClient(t)

	// create app with gc.max_inactive_slug_releases=3
	maxInactiveSlugReleases := 3
	app := &ct.App{Meta: map[string]string{"gc.max_inactive_slug_releases": strconv.Itoa(maxInactiveSlugReleases)}}
	t.Assert(client.CreateApp(app), c.IsNil)

	// create an image artifact
	imageArtifact := s.createArtifact(t, "test-apps")

	// create 5 slug artifacts
	tmp, err := ioutil.TempFile("", "squashfs-")
	t.Assert(err, c.IsNil)
	defer os.Remove(tmp.Name())
	defer tmp.Close()
	t.Assert(exec.Command("mksquashfs", t.MkDir(), tmp.Name(), "-noappend").Run(), c.IsNil)
	slug, err := ioutil.ReadAll(tmp)
	t.Assert(err, c.IsNil)
	slugHash := sha512.Sum512(slug)
	slugs := []string{
		"http://blobstore.discoverd/layer/1.squashfs",
		"http://blobstore.discoverd/layer/2.squashfs",
		"http://blobstore.discoverd/layer/3.squashfs",
		"http://blobstore.discoverd/layer/4.squashfs",
		"http://blobstore.discoverd/layer/5.squashfs",
	}
	slugArtifacts := make([]*ct.Artifact, len(slugs))
	put := func(url string, data []byte) {
		req, err := http.NewRequest("PUT", url, bytes.NewReader(data))
		t.Assert(err, c.IsNil)
		res, err := http.DefaultClient.Do(req)
		t.Assert(err, c.IsNil)
		res.Body.Close()
		t.Assert(res.StatusCode, c.Equals, http.StatusOK)
	}
	for i, layerURL := range slugs {
		manifest := &ct.ImageManifest{
			Type: ct.ImageManifestTypeV1,
			Rootfs: []*ct.ImageRootfs{{
				Layers: []*ct.ImageLayer{{
					ID:     strconv.Itoa(i + 1),
					Type:   ct.ImageLayerTypeSquashfs,
					Length: int64(len(slug)),
					Hashes: map[string]string{"sha512": hex.EncodeToString(slugHash[:])},
				}},
			}},
		}
		data := manifest.RawManifest()
		url := fmt.Sprintf("http://blobstore.discoverd/image/%s.json", manifest.ID())
		put(url, data)
		put(layerURL, slug)
		artifact := &ct.Artifact{
			Type:             ct.ArtifactTypeFlynn,
			URI:              url,
			Meta:             map[string]string{"blobstore": "true"},
			RawManifest:      data,
			Hashes:           manifest.Hashes(),
			Size:             int64(len(data)),
			LayerURLTemplate: "http://blobstore.discoverd/layer/{id}.squashfs",
		}
		t.Assert(client.CreateArtifact(artifact), c.IsNil)
		slugArtifacts[i] = artifact
	}

	// create 6 releases, the second being scaled up and having the
	// same slug as the third (so prevents the slug being deleted)
	releases := make([]*ct.Release, 6)
	for i, r := range []struct {
		slug   *ct.Artifact
		active bool
	}{
		{slugArtifacts[0], false},
		{slugArtifacts[1], true},
		{slugArtifacts[1], false},
		{slugArtifacts[2], false},
		{slugArtifacts[3], false},
		{slugArtifacts[4], false},
	} {
		release := &ct.Release{
			ArtifactIDs: []string{imageArtifact.ID, r.slug.ID},
			Processes: map[string]ct.ProcessType{
				"app": {Args: []string{"/bin/pingserv"}, Ports: []ct.Port{{Proto: "tcp"}}},
			},
			Meta: map[string]string{"git": "true"},
		}
		t.Assert(client.CreateRelease(release), c.IsNil)
		procs := map[string]int{"app": 0}
		if r.active {
			procs["app"] = 1
		}
		t.Assert(client.PutFormation(&ct.Formation{
			AppID:     app.ID,
			ReleaseID: release.ID,
			Processes: procs,
		}), c.IsNil)
		releases[i] = release
	}

	// scale the last release so we can deploy it
	lastRelease := releases[len(releases)-1]
	watcher, err := client.WatchJobEvents(app.ID, lastRelease.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: lastRelease.ID,
		Processes: map[string]int{"app": 1},
	}), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"app": ct.JobUpEvents(1)}, scaleTimeout, nil), c.IsNil)
	t.Assert(client.SetAppRelease(app.ID, lastRelease.ID), c.IsNil)

	// subscribe to garbage collection events
	gcEvents := make(chan *ct.Event)
	stream, err := client.StreamEvents(ct.StreamEventsOptions{
		AppID:       app.ID,
		ObjectTypes: []ct.EventType{ct.EventTypeAppGarbageCollection},
	}, gcEvents)
	t.Assert(err, c.IsNil)
	defer stream.Close()

	// deploy a new release with the same slug as the last release
	timeoutCh := make(chan struct{})
	time.AfterFunc(5*time.Minute, func() { close(timeoutCh) })
	newRelease := *lastRelease
	newRelease.ID = ""
	t.Assert(client.CreateRelease(&newRelease), c.IsNil)
	t.Assert(client.DeployAppRelease(app.ID, newRelease.ID, timeoutCh), c.IsNil)

	// wait for garbage collection
	select {
	case event, ok := <-gcEvents:
		if !ok {
			t.Fatalf("event stream closed unexpectedly: %s", stream.Err())
		}
		var e ct.AppGarbageCollectionEvent
		t.Assert(json.Unmarshal(event.Data, &e), c.IsNil)
		if e.Error != "" {
			t.Fatalf("garbage collection failed: %s", e.Error)
		}
	case <-time.After(60 * time.Second):
		t.Fatal("timed out waiting for garbage collection")
	}

	// check we have 4 distinct slug releases (so 5 in total, only 3 are
	// inactive)
	list, err := client.AppReleaseList(app.ID)
	t.Assert(err, c.IsNil)
	t.Assert(list, c.HasLen, maxInactiveSlugReleases+2)
	distinctSlugs := make(map[string]struct{}, len(list))
	for _, release := range list {
		t.Assert(release.ArtifactIDs, c.HasLen, 2)
		distinctSlugs[release.ArtifactIDs[1]] = struct{}{}
	}
	t.Assert(distinctSlugs, c.HasLen, maxInactiveSlugReleases+1)

	// check the first and third releases got deleted, but the rest remain
	assertDeleted := func(release *ct.Release, deleted bool) {
		_, err := client.GetRelease(release.ID)
		if deleted {
			t.Assert(err, c.Equals, controller.ErrNotFound)
		} else {
			t.Assert(err, c.IsNil)
		}
	}
	assertDeleted(releases[0], true)
	assertDeleted(releases[1], false)
	assertDeleted(releases[2], true)
	assertDeleted(releases[3], false)
	assertDeleted(releases[4], false)
	assertDeleted(releases[5], false)
	assertDeleted(&newRelease, false)

	// check the first slug got deleted, but the rest remain
	s.assertURI(t, slugs[0], http.StatusNotFound)
	for i := 1; i < len(slugs); i++ {
		s.assertURI(t, slugs[i], http.StatusOK)
	}
}
Ejemplo n.º 8
0
func (s *ReleaseSuite) TestReleaseImages(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot boot release cluster")
	}

	// stream script output to t.Log
	logReader, logWriter := io.Pipe()
	defer logWriter.Close()
	go func() {
		buf := bufio.NewReader(logReader)
		for {
			line, err := buf.ReadString('\n')
			if err != nil {
				return
			}
			debug(t, line[0:len(line)-1])
		}
	}()

	// boot the release cluster, release components to a blobstore and output the new images.json
	releaseCluster := s.addReleaseHosts(t)
	buildHost := releaseCluster.Instances[0]
	var imagesJSON bytes.Buffer
	var script bytes.Buffer
	slugImageID := random.UUID()
	releaseScript.Execute(&script, struct{ ControllerKey, SlugImageID string }{releaseCluster.ControllerKey, slugImageID})
	t.Assert(buildHost.Run("bash -ex", &tc.Streams{Stdin: &script, Stdout: &imagesJSON, Stderr: logWriter}), c.IsNil)
	var images map[string]*ct.Artifact
	t.Assert(json.Unmarshal(imagesJSON.Bytes(), &images), c.IsNil)

	// install Flynn from the blobstore on the vanilla host
	blobstoreAddr := buildHost.IP + ":8080"
	installHost := releaseCluster.Instances[3]
	script.Reset()
	installScript.Execute(&script, map[string]string{"Blobstore": blobstoreAddr})
	var installOutput bytes.Buffer
	out := io.MultiWriter(logWriter, &installOutput)
	t.Assert(installHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil)

	// check the flynn-host version is correct
	var hostVersion bytes.Buffer
	t.Assert(installHost.Run("flynn-host version", &tc.Streams{Stdout: &hostVersion}), c.IsNil)
	t.Assert(strings.TrimSpace(hostVersion.String()), c.Equals, "v20161108.0-test")

	// check rebuilt images were downloaded
	assertInstallOutput := func(format string, v ...interface{}) {
		expected := fmt.Sprintf(format, v...)
		if !strings.Contains(installOutput.String(), expected) {
			t.Fatalf(`expected install to output %q`, expected)
		}
	}
	for name, image := range images {
		assertInstallOutput("pulling %s image", name)
		for _, layer := range image.Manifest().Rootfs[0].Layers {
			assertInstallOutput("pulling %s layer %s", name, layer.ID)
		}
	}

	// installing on an instance with Flynn running should fail
	script.Reset()
	installScript.Execute(&script, map[string]string{"Blobstore": blobstoreAddr})
	installOutput.Reset()
	err := buildHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out})
	if err == nil || !strings.Contains(installOutput.String(), "ERROR: Flynn is already installed.") {
		t.Fatal("expected Flynn install to fail but it didn't")
	}

	// create a controller client for the release cluster
	pin, err := base64.StdEncoding.DecodeString(releaseCluster.ControllerPin)
	t.Assert(err, c.IsNil)
	client, err := controller.NewClientWithConfig(
		"https://"+buildHost.IP,
		releaseCluster.ControllerKey,
		controller.Config{Pin: pin, Domain: releaseCluster.ControllerDomain},
	)
	t.Assert(err, c.IsNil)

	// deploy a slug based app + Redis resource
	slugApp := &ct.App{}
	t.Assert(client.CreateApp(slugApp), c.IsNil)
	gitreceive, err := client.GetAppRelease("gitreceive")
	t.Assert(err, c.IsNil)
	imageArtifact, err := client.GetArtifact(gitreceive.Env["SLUGRUNNER_IMAGE_ID"])
	t.Assert(err, c.IsNil)
	slugArtifact, err := client.GetArtifact(slugImageID)
	t.Assert(err, c.IsNil)
	resource, err := client.ProvisionResource(&ct.ResourceReq{ProviderID: "redis", Apps: []string{slugApp.ID}})
	t.Assert(err, c.IsNil)
	release := &ct.Release{
		ArtifactIDs: []string{imageArtifact.ID, slugArtifact.ID},
		Processes:   map[string]ct.ProcessType{"web": {Args: []string{"/runner/init", "bin/http"}}},
		Meta:        map[string]string{"git": "true"},
		Env:         resource.Env,
	}
	t.Assert(client.CreateRelease(release), c.IsNil)
	t.Assert(client.SetAppRelease(slugApp.ID, release.ID), c.IsNil)
	watcher, err := client.WatchJobEvents(slugApp.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     slugApp.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"web": 1},
	}), c.IsNil)
	err = watcher.WaitFor(ct.JobEvents{"web": {ct.JobStateUp: 1}}, scaleTimeout, nil)
	t.Assert(err, c.IsNil)

	// run a cluster update from the blobstore
	updateHost := releaseCluster.Instances[1]
	script.Reset()
	updateScript.Execute(&script, map[string]string{"Blobstore": blobstoreAddr, "Discoverd": updateHost.IP + ":1111"})
	var updateOutput bytes.Buffer
	out = io.MultiWriter(logWriter, &updateOutput)
	t.Assert(updateHost.Run("bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil)

	// check rebuilt images were downloaded
	for name := range images {
		for _, host := range releaseCluster.Instances[0:2] {
			expected := fmt.Sprintf(`"pulling %s image" host=%s`, name, host.ID)
			if !strings.Contains(updateOutput.String(), expected) {
				t.Fatalf(`expected update to download %s on host %s`, name, host.ID)
			}
		}
	}

	assertImage := func(uri, image string) {
		t.Assert(uri, c.Equals, images[image].URI)
	}

	// check system apps were deployed correctly
	for _, app := range updater.SystemApps {
		if app.ImageOnly {
			continue // we don't deploy ImageOnly updates
		}
		debugf(t, "checking new %s release is using image %s", app.Name, images[app.Name].URI)
		expected := fmt.Sprintf(`"finished deploy of system app" name=%s`, app.Name)
		if !strings.Contains(updateOutput.String(), expected) {
			t.Fatalf(`expected update to deploy %s`, app.Name)
		}
		release, err := client.GetAppRelease(app.Name)
		t.Assert(err, c.IsNil)
		debugf(t, "new %s release ID: %s", app.Name, release.ID)
		artifact, err := client.GetArtifact(release.ArtifactIDs[0])
		t.Assert(err, c.IsNil)
		debugf(t, "new %s artifact: %+v", app.Name, artifact)
		assertImage(artifact.URI, app.Name)
	}

	// check gitreceive has the correct slug env vars
	gitreceive, err = client.GetAppRelease("gitreceive")
	t.Assert(err, c.IsNil)
	for _, name := range []string{"slugbuilder", "slugrunner"} {
		artifact, err := client.GetArtifact(gitreceive.Env[strings.ToUpper(name)+"_IMAGE_ID"])
		t.Assert(err, c.IsNil)
		assertImage(artifact.URI, name)
	}

	// check slug based app was deployed correctly
	release, err = client.GetAppRelease(slugApp.Name)
	t.Assert(err, c.IsNil)
	imageArtifact, err = client.GetArtifact(release.ArtifactIDs[0])
	t.Assert(err, c.IsNil)
	assertImage(imageArtifact.URI, "slugrunner")

	// check Redis app was deployed correctly
	release, err = client.GetAppRelease(resource.Env["FLYNN_REDIS"])
	t.Assert(err, c.IsNil)
	imageArtifact, err = client.GetArtifact(release.ArtifactIDs[0])
	t.Assert(err, c.IsNil)
	assertImage(imageArtifact.URI, "redis")
}
Ejemplo n.º 9
0
func (s *HostSuite) TestNotifyOOM(t *c.C) {
	appID := random.UUID()

	// subscribe to init log messages from the logaggregator
	client, err := logaggc.New("")
	t.Assert(err, c.IsNil)
	opts := logagg.LogOpts{
		Follow:      true,
		StreamTypes: []logagg.StreamType{logagg.StreamTypeInit},
	}
	rc, err := client.GetLog(appID, &opts)
	t.Assert(err, c.IsNil)
	defer rc.Close()
	msgs := make(chan *logaggc.Message)
	stream := stream.New()
	defer stream.Close()
	go func() {
		defer close(msgs)
		dec := json.NewDecoder(rc)
		for {
			var msg logaggc.Message
			if err := dec.Decode(&msg); err != nil {
				stream.Error = err
				return
			}
			select {
			case msgs <- &msg:
			case <-stream.StopCh:
				return
			}
		}
	}()

	// run the OOM job
	cmd := exec.CommandUsingCluster(
		s.clusterClient(t),
		s.createArtifact(t, "test-apps"),
		"/bin/oom",
	)
	cmd.Meta = map[string]string{"flynn-controller.app": appID}
	runErr := make(chan error)
	go func() {
		runErr <- cmd.Run()
	}()

	// wait for the OOM notification
	for {
		select {
		case err := <-runErr:
			t.Assert(err, c.IsNil)
		case msg, ok := <-msgs:
			if !ok {
				t.Fatalf("message stream closed unexpectedly: %s", stream.Err())
			}
			t.Log(msg.Msg)
			if strings.Contains(msg.Msg, "FATAL: a container process was killed due to lack of available memory") {
				return
			}
		case <-time.After(30 * time.Second):
			t.Fatal("timed out waiting for OOM notification")
		}
	}
}
Ejemplo n.º 10
0
func (s *ZDomainMigrationSuite) migrateDomain(t *c.C, dm *ct.DomainMigration) *ct.DomainMigration {
	debugf(t, "migrating domain from %s to %s", dm.OldDomain, dm.Domain)
	client := s.controllerClient(t)

	events := make(chan *ct.Event)
	stream, err := client.StreamEvents(ct.StreamEventsOptions{
		ObjectTypes: []ct.EventType{ct.EventTypeDomainMigration},
	}, events)
	t.Assert(err, c.IsNil)
	defer stream.Close()

	prevRouterRelease, err := client.GetAppRelease("router")
	t.Assert(err, c.IsNil)

	err = client.PutDomain(dm)
	t.Assert(err, c.IsNil)

	waitEvent := func(typ string, timeout time.Duration) (event ct.DomainMigrationEvent) {
		debugf(t, "waiting for %s domain migration event", typ)
		var e *ct.Event
		var ok bool
		select {
		case e, ok = <-events:
			if !ok {
				t.Fatalf("event stream closed unexpectedly: %s", stream.Err())
			}
			debugf(t, "got %s domain migration event", typ)
		case <-time.After(timeout):
			t.Fatalf("timed out waiting for %s domain migration event", typ)
		}
		t.Assert(e.Data, c.NotNil)
		t.Assert(json.Unmarshal(e.Data, &event), c.IsNil)
		return
	}

	// created
	event := waitEvent("initial", 2*time.Minute)
	t.Assert(event.Error, c.Equals, "")
	t.Assert(event.DomainMigration, c.NotNil)
	t.Assert(event.DomainMigration.ID, c.Equals, dm.ID)
	t.Assert(event.DomainMigration.OldDomain, c.Equals, dm.OldDomain)
	t.Assert(event.DomainMigration.Domain, c.Equals, dm.Domain)
	t.Assert(event.DomainMigration.OldTLSCert, c.NotNil)
	t.Assert(event.DomainMigration.CreatedAt, c.NotNil)
	t.Assert(event.DomainMigration.CreatedAt.Equal(*dm.CreatedAt), c.Equals, true)
	t.Assert(event.DomainMigration.FinishedAt, c.IsNil)

	// complete
	event = waitEvent("final", 3*time.Minute)
	t.Assert(event.Error, c.Equals, "")
	t.Assert(event.DomainMigration, c.NotNil)
	t.Assert(event.DomainMigration.ID, c.Equals, dm.ID)
	t.Assert(event.DomainMigration.OldDomain, c.Equals, dm.OldDomain)
	t.Assert(event.DomainMigration.Domain, c.Equals, dm.Domain)
	t.Assert(event.DomainMigration.TLSCert, c.NotNil)
	t.Assert(event.DomainMigration.OldTLSCert, c.NotNil)
	t.Assert(event.DomainMigration.CreatedAt, c.NotNil)
	t.Assert(event.DomainMigration.CreatedAt.Equal(*dm.CreatedAt), c.Equals, true)
	t.Assert(event.DomainMigration.FinishedAt, c.NotNil)

	cert := event.DomainMigration.TLSCert

	controllerRelease, err := client.GetAppRelease("controller")
	t.Assert(err, c.IsNil)
	t.Assert(controllerRelease.Env["DEFAULT_ROUTE_DOMAIN"], c.Equals, dm.Domain)
	t.Assert(controllerRelease.Env["CA_CERT"], c.Equals, cert.CACert)

	routerRelease, err := client.GetAppRelease("router")
	t.Assert(err, c.IsNil)
	t.Assert(routerRelease.Env["TLSCERT"], c.Equals, cert.Cert)
	t.Assert(routerRelease.Env["TLSKEY"], c.Not(c.Equals), "")
	t.Assert(routerRelease.Env["TLSKEY"], c.Not(c.Equals), prevRouterRelease.Env["TLSKEY"])

	dashboardRelease, err := client.GetAppRelease("dashboard")
	t.Assert(err, c.IsNil)
	t.Assert(dashboardRelease.Env["DEFAULT_ROUTE_DOMAIN"], c.Equals, dm.Domain)
	t.Assert(dashboardRelease.Env["CONTROLLER_DOMAIN"], c.Equals, fmt.Sprintf("controller.%s", dm.Domain))
	t.Assert(dashboardRelease.Env["URL"], c.Equals, fmt.Sprintf("https://dashboard.%s", dm.Domain))
	t.Assert(dashboardRelease.Env["CA_CERT"], c.Equals, cert.CACert)

	routes, err := client.RouteList("controller")
	t.Assert(err, c.IsNil)
	t.Assert(len(routes), c.Equals, 2) // one for both new and old domain
	var route *router.Route
	for _, r := range routes {
		if strings.HasSuffix(r.Domain, dm.Domain) {
			route = r
			break
		}
	}
	t.Assert(route, c.Not(c.IsNil))
	t.Assert(route.Domain, c.Equals, fmt.Sprintf("controller.%s", dm.Domain))
	t.Assert(route.Certificate.Cert, c.Equals, strings.TrimSuffix(cert.Cert, "\n"))

	var doPing func(string, int)
	doPing = func(component string, retriesRemaining int) {
		url := fmt.Sprintf("http://%s.%s/ping", component, dm.Domain)
		httpClient := &http.Client{Transport: &http.Transport{Dial: dialer.Retry.Dial}}
		res, err := httpClient.Get(url)
		if (err != nil || res.StatusCode != 200) && retriesRemaining > 0 {
			time.Sleep(100 * time.Millisecond)
			doPing(component, retriesRemaining-1)
			return
		}
		t.Assert(err, c.IsNil)
		t.Assert(res.StatusCode, c.Equals, 200, c.Commentf("failed to ping %s", component))
	}
	doPing("controller", 3)
	doPing("dashboard", 3)

	return event.DomainMigration
}
Ejemplo n.º 11
0
func (s *CLISuite) TestSlugReleaseGarbageCollection(t *c.C) {
	client := s.controllerClient(t)

	// create app with gc.max_inactive_slug_releases=3
	maxInactiveSlugReleases := 3
	app := &ct.App{Meta: map[string]string{"gc.max_inactive_slug_releases": strconv.Itoa(maxInactiveSlugReleases)}}
	t.Assert(client.CreateApp(app), c.IsNil)

	// create an image artifact
	imageArtifact := &ct.Artifact{Type: host.ArtifactTypeDocker, URI: imageURIs["test-apps"]}
	t.Assert(client.CreateArtifact(imageArtifact), c.IsNil)

	// create 5 slug artifacts
	var slug bytes.Buffer
	gz := gzip.NewWriter(&slug)
	t.Assert(tar.NewWriter(gz).Close(), c.IsNil)
	t.Assert(gz.Close(), c.IsNil)
	slugs := []string{
		"http://blobstore.discoverd/1/slug.tgz",
		"http://blobstore.discoverd/2/slug.tgz",
		"http://blobstore.discoverd/3/slug.tgz",
		"http://blobstore.discoverd/4/slug.tgz",
		"http://blobstore.discoverd/5/slug.tgz",
	}
	slugArtifacts := make([]*ct.Artifact, len(slugs))
	for i, uri := range slugs {
		req, err := http.NewRequest("PUT", uri, bytes.NewReader(slug.Bytes()))
		t.Assert(err, c.IsNil)
		res, err := http.DefaultClient.Do(req)
		t.Assert(err, c.IsNil)
		res.Body.Close()
		t.Assert(res.StatusCode, c.Equals, http.StatusOK)
		artifact := &ct.Artifact{
			Type: host.ArtifactTypeFile,
			URI:  uri,
			Meta: map[string]string{"blobstore": "true"},
		}
		t.Assert(client.CreateArtifact(artifact), c.IsNil)
		slugArtifacts[i] = artifact
	}

	// create 6 releases, the second being scaled up and having the
	// same slug as the third (so prevents the slug being deleted)
	releases := make([]*ct.Release, 6)
	for i, r := range []struct {
		slug   *ct.Artifact
		active bool
	}{
		{slugArtifacts[0], false},
		{slugArtifacts[1], true},
		{slugArtifacts[1], false},
		{slugArtifacts[2], false},
		{slugArtifacts[3], false},
		{slugArtifacts[4], false},
	} {
		release := &ct.Release{
			ArtifactIDs: []string{imageArtifact.ID, r.slug.ID},
			Processes: map[string]ct.ProcessType{
				"app": {Args: []string{"/bin/pingserv"}, Ports: []ct.Port{{Proto: "tcp"}}},
			},
		}
		t.Assert(client.CreateRelease(release), c.IsNil)
		procs := map[string]int{"app": 0}
		if r.active {
			procs["app"] = 1
		}
		t.Assert(client.PutFormation(&ct.Formation{
			AppID:     app.ID,
			ReleaseID: release.ID,
			Processes: procs,
		}), c.IsNil)
		releases[i] = release
	}

	// scale the last release so we can deploy it
	lastRelease := releases[len(releases)-1]
	watcher, err := client.WatchJobEvents(app.ID, lastRelease.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: lastRelease.ID,
		Processes: map[string]int{"app": 1},
	}), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"app": ct.JobUpEvents(1)}, scaleTimeout, nil), c.IsNil)
	t.Assert(client.SetAppRelease(app.ID, lastRelease.ID), c.IsNil)

	// subscribe to garbage collection events
	gcEvents := make(chan *ct.Event)
	stream, err := client.StreamEvents(ct.StreamEventsOptions{
		AppID:       app.ID,
		ObjectTypes: []ct.EventType{ct.EventTypeAppGarbageCollection},
	}, gcEvents)
	t.Assert(err, c.IsNil)
	defer stream.Close()

	// deploy a new release with the same slug as the last release
	timeoutCh := make(chan struct{})
	time.AfterFunc(5*time.Minute, func() { close(timeoutCh) })
	newRelease := *lastRelease
	newRelease.ID = ""
	t.Assert(client.CreateRelease(&newRelease), c.IsNil)
	t.Assert(client.DeployAppRelease(app.ID, newRelease.ID, timeoutCh), c.IsNil)

	// wait for garbage collection
	select {
	case event, ok := <-gcEvents:
		if !ok {
			t.Fatalf("event stream closed unexpectedly: %s", stream.Err())
		}
		var e ct.AppGarbageCollectionEvent
		t.Assert(json.Unmarshal(event.Data, &e), c.IsNil)
		if e.Error != "" {
			t.Fatalf("garbage collection failed: %s", e.Error)
		}
	case <-time.After(60 * time.Second):
		t.Fatal("timed out waiting for garbage collection")
	}

	// check we have 4 distinct slug releases (so 5 in total, only 3 are
	// inactive)
	list, err := client.AppReleaseList(app.ID)
	t.Assert(err, c.IsNil)
	t.Assert(list, c.HasLen, maxInactiveSlugReleases+2)
	distinctSlugs := make(map[string]struct{}, len(list))
	for _, release := range list {
		files := release.FileArtifactIDs()
		t.Assert(files, c.HasLen, 1)
		distinctSlugs[files[0]] = struct{}{}
	}
	t.Assert(distinctSlugs, c.HasLen, maxInactiveSlugReleases+1)

	// check the first and third releases got deleted, but the rest remain
	assertDeleted := func(release *ct.Release, deleted bool) {
		_, err := client.GetRelease(release.ID)
		if deleted {
			t.Assert(err, c.Equals, controller.ErrNotFound)
		} else {
			t.Assert(err, c.IsNil)
		}
	}
	assertDeleted(releases[0], true)
	assertDeleted(releases[1], false)
	assertDeleted(releases[2], true)
	assertDeleted(releases[3], false)
	assertDeleted(releases[4], false)
	assertDeleted(releases[5], false)
	assertDeleted(&newRelease, false)

	// check the first slug got deleted, but the rest remain
	s.assertURI(t, slugs[0], http.StatusNotFound)
	for i := 1; i < len(slugs); i++ {
		s.assertURI(t, slugs[i], http.StatusOK)
	}
}
Ejemplo n.º 12
0
func testSireniaDeploy(client controller.Client, disc *discoverd.Client, t *c.C, d *sireniaDeploy) {
	// create app
	app := &ct.App{Name: d.name, Strategy: "sirenia"}
	t.Assert(client.CreateApp(app), c.IsNil)

	// copy release from default app
	release, err := client.GetAppRelease(d.db.appName)
	t.Assert(err, c.IsNil)
	release.ID = ""
	release.Env[d.db.hostKey] = fmt.Sprintf("leader.%s.discoverd", d.name)
	release.Env[d.db.serviceKey] = d.name
	procName := release.Env["SIRENIA_PROCESS"]
	proc := release.Processes[procName]
	delete(proc.Env, "SINGLETON")
	proc.Service = d.name
	release.Processes[procName] = proc
	t.Assert(client.CreateRelease(release), c.IsNil)
	t.Assert(client.SetAppRelease(app.ID, release.ID), c.IsNil)
	oldRelease := release.ID

	// create formation
	discEvents := make(chan *discoverd.Event)
	discService := disc.Service(d.name)
	discStream, err := discService.Watch(discEvents)
	t.Assert(err, c.IsNil)
	defer discStream.Close()
	jobEvents := make(chan *ct.Job)
	jobStream, err := client.StreamJobEvents(d.name, jobEvents)
	t.Assert(err, c.IsNil)
	defer jobStream.Close()
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{procName: d.sireniaJobs, "web": d.webJobs},
	}), c.IsNil)

	// watch cluster state changes
	type stateChange struct {
		state *state.State
		err   error
	}
	stateCh := make(chan stateChange)
	go func() {
		for event := range discEvents {
			if event.Kind != discoverd.EventKindServiceMeta {
				continue
			}
			var state state.State
			if err := json.Unmarshal(event.ServiceMeta.Data, &state); err != nil {
				stateCh <- stateChange{err: err}
				return
			}
			primary := ""
			if state.Primary != nil {
				primary = state.Primary.Addr
			}
			sync := ""
			if state.Sync != nil {
				sync = state.Sync.Addr
			}
			var async []string
			for _, a := range state.Async {
				async = append(async, a.Addr)
			}
			debugf(t, "got cluster state: index=%d primary=%s sync=%s async=%s",
				event.ServiceMeta.Index, primary, sync, strings.Join(async, ","))
			stateCh <- stateChange{state: &state}
		}
	}()

	// wait for correct cluster state and number of web processes
	var sireniaState state.State
	var webJobs int
	ready := func() bool {
		if webJobs != d.webJobs {
			return false
		}
		if sireniaState.Primary == nil {
			return false
		}
		if d.sireniaJobs > 1 && sireniaState.Sync == nil {
			return false
		}
		if d.sireniaJobs > 2 && len(sireniaState.Async) != d.sireniaJobs-2 {
			return false
		}
		return true
	}
	for {
		if ready() {
			break
		}
		select {
		case s := <-stateCh:
			t.Assert(s.err, c.IsNil)
			sireniaState = *s.state
		case e, ok := <-jobEvents:
			if !ok {
				t.Fatalf("job event stream closed: %s", jobStream.Err())
			}
			debugf(t, "got job event: %s %s %s", e.Type, e.ID, e.State)
			if e.Type == "web" && e.State == ct.JobStateUp {
				webJobs++
			}
		case <-time.After(30 * time.Second):
			t.Fatal("timed out waiting for formation")
		}
	}

	// wait for the primary to indicate downstream replication sync
	debug(t, "waiting for primary to indicate downstream replication sync")
	sireniaClient := sc.NewClient(sireniaState.Primary.Addr)
	t.Assert(sireniaClient.WaitForReplSync(sireniaState.Sync, 1*time.Minute), c.IsNil)

	// connect to the db and run any initialisation required to later test writes
	debug(t, "initialising db")
	if d.db.initDb != nil {
		d.db.initDb(t, release, d)
	}

	// check currently writeable
	d.db.assertWriteable(t, release, d)

	// check a deploy completes with expected cluster state changes
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	newRelease := release.ID
	deployment, err := client.CreateDeployment(app.ID, newRelease)
	t.Assert(err, c.IsNil)
	deployEvents := make(chan *ct.DeploymentEvent)
	deployStream, err := client.StreamDeployment(deployment, deployEvents)
	t.Assert(err, c.IsNil)
	defer deployStream.Close()

	// assertNextState checks that the next state received is in the remaining states
	// that were expected, so handles the fact that some states don't happen, but the
	// states that do happen are expected and in-order.
	assertNextState := func(remaining []expectedSireniaState) int {
		var state state.State
	loop:
		for {
			select {
			case s := <-stateCh:
				t.Assert(s.err, c.IsNil)
				if len(s.state.Async) < d.expectedAsyncs() {
					// we shouldn't usually receive states with less asyncs than
					// expected, but they can occur as an intermediate state between
					// two expected states (e.g. when a sync does a takeover at the
					// same time as a new async is started) so just ignore them.
					debug(t, "ignoring state with too few asyncs")
					continue
				}
				state = *s.state
				break loop
			case <-time.After(60 * time.Second):
				t.Fatal("timed out waiting for cluster state")
			}
		}
		if state.Primary == nil {
			t.Fatal("no primary configured")
		}
		logf := func(format string, v ...interface{}) {
			debugf(t, "skipping expected state: %s", fmt.Sprintf(format, v...))
		}
	outer:
		for i, expected := range remaining {
			if state.Primary.Meta["FLYNN_RELEASE_ID"] != expected.Primary {
				logf("primary has incorrect release")
				continue
			}
			if state.Sync == nil {
				if expected.Sync == "" {
					return i
				}
				logf("state has no sync node")
				continue
			}
			if state.Sync.Meta["FLYNN_RELEASE_ID"] != expected.Sync {
				logf("sync has incorrect release")
				continue
			}
			if state.Async == nil {
				if expected.Async == nil {
					return i
				}
				logf("state has no async nodes")
				continue
			}
			if len(state.Async) != len(expected.Async) {
				logf("expected %d asyncs, got %d", len(expected.Async), len(state.Async))
				continue
			}
			for i, release := range expected.Async {
				if state.Async[i].Meta["FLYNN_RELEASE_ID"] != release {
					logf("async[%d] has incorrect release", i)
					continue outer
				}
			}
			return i
		}
		t.Fatal("unexpected state")
		return -1
	}
	expected := d.expected(oldRelease, newRelease)
	var expectedIndex, newWebJobs int
loop:
	for {
		select {
		case e, ok := <-deployEvents:
			if !ok {
				t.Fatal("unexpected close of deployment event stream")
			}
			switch e.Status {
			case "complete":
				break loop
			case "failed":
				t.Fatalf("deployment failed: %s", e.Error)
			}
			debugf(t, "got deployment event: %s %s", e.JobType, e.JobState)
			if e.JobState != ct.JobStateUp && e.JobState != ct.JobStateDown {
				continue
			}
			if e.JobType == procName {
				// move on if we have seen all the expected events
				if expectedIndex >= len(expected) {
					continue
				}
				skipped := assertNextState(expected[expectedIndex:])
				expectedIndex += 1 + skipped
			}
		case e, ok := <-jobEvents:
			if !ok {
				t.Fatalf("unexpected close of job event stream: %s", jobStream.Err())
			}
			debugf(t, "got job event: %s %s %s", e.Type, e.ID, e.State)
			if e.Type == "web" && e.State == ct.JobStateUp && e.ReleaseID == newRelease {
				newWebJobs++
			}
		}
	}

	// check we have the correct number of new web jobs
	t.Assert(newWebJobs, c.Equals, d.webJobs)

	// check writeable now deploy is complete
	d.db.assertWriteable(t, release, d)
}
Ejemplo n.º 13
0
func (s *SchedulerSuite) TestScaleTags(t *c.C) {
	// ensure we have more than 1 host to test with
	hosts, err := s.clusterClient(t).Hosts()
	t.Assert(err, c.IsNil)
	if len(hosts) <= 1 {
		t.Skip("not enough hosts to test tagged based scheduling")
	}

	// stream the scheduler leader log so we can synchronize tag changes
	leader, err := s.discoverdClient(t).Service("controller-scheduler").Leader()
	t.Assert(err, c.IsNil)
	client := s.controllerClient(t)
	res, err := client.GetAppLog("controller", &ct.LogOpts{
		Follow:      true,
		JobID:       leader.Meta["FLYNN_JOB_ID"],
		ProcessType: typeconv.StringPtr("scheduler"),
		Lines:       typeconv.IntPtr(0),
	})
	t.Assert(err, c.IsNil)
	defer res.Close()
	tagChange := make(chan struct{})
	go func() {
		dec := json.NewDecoder(res)
		for {
			var msg logaggc.Message
			if err := dec.Decode(&msg); err != nil {
				return
			}
			if strings.Contains(msg.Msg, "host tags changed") {
				tagChange <- struct{}{}
			}
		}
	}()
	waitSchedulerTagChange := func() {
		select {
		case <-tagChange:
			return
		case <-time.After(10 * time.Second):
			t.Fatalf("timed out waiting for scheduler leader to see tag change")
		}
	}

	// watch service events so we can wait for tag changes
	events := make(chan *discoverd.Event)
	stream, err := s.discoverdClient(t).Service("flynn-host").Watch(events)
	t.Assert(err, c.IsNil)
	defer stream.Close()
	waitServiceEvent := func(kind discoverd.EventKind) *discoverd.Event {
		for {
			select {
			case event, ok := <-events:
				if !ok {
					t.Fatalf("service event stream closed unexpectedly: %s", stream.Err())
				}
				if event.Kind == kind {
					return event
				}
			case <-time.After(10 * time.Second):
				t.Fatalf("timed out waiting for service %s event", kind)
			}
		}
	}

	// wait for the watch to be current before changing tags
	waitServiceEvent(discoverd.EventKindCurrent)

	updateTags := func(host *cluster.Host, tags map[string]string) {
		debugf(t, "setting host tags: %s => %v", host.ID(), tags)
		t.Assert(host.UpdateTags(tags), c.IsNil)
		event := waitServiceEvent(discoverd.EventKindUpdate)
		t.Assert(event.Instance.Meta["id"], c.Equals, host.ID())
		for key, val := range tags {
			t.Assert(event.Instance.Meta["tag:"+key], c.Equals, val)
		}
		waitSchedulerTagChange()
	}

	// create an app with a tagged process and watch job events
	app, release := s.createApp(t)
	formation := &ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Tags:      map[string]map[string]string{"printer": {"active": "true"}},
	}
	watcher, err := client.WatchJobEvents(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	// add tag to host 1
	host1 := hosts[0]
	updateTags(host1, map[string]string{"active": "true"})

	// start jobs
	debug(t, "scaling printer=2")
	formation.Processes = map[string]int{"printer": 2}
	t.Assert(client.PutFormation(formation), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil)

	assertHostJobCounts := func(expected map[string]int) {
		jobs, err := client.JobList(app.ID)
		t.Assert(err, c.IsNil)
		actual := make(map[string]int)
		for _, job := range jobs {
			if job.State == ct.JobStateUp {
				actual[job.HostID]++
			}
		}
		t.Assert(actual, c.DeepEquals, expected)
	}

	// check all jobs on host 1
	assertHostJobCounts(map[string]int{host1.ID(): 2})

	// add tag to host 2
	host2 := hosts[1]
	updateTags(host2, map[string]string{"active": "true"})

	// scale up
	debug(t, "scaling printer=4")
	formation.Processes["printer"] = 4
	t.Assert(client.PutFormation(formation), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil)

	// check jobs distributed across hosts 1 and 2
	assertHostJobCounts(map[string]int{host1.ID(): 2, host2.ID(): 2})

	// remove tag from host 2
	updateTags(host2, map[string]string{"active": ""})

	// check jobs are moved to host1
	jobEvents := ct.JobEvents{"printer": map[ct.JobState]int{
		ct.JobStateDown: 2,
		ct.JobStateUp:   2,
	}}
	t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil)
	assertHostJobCounts(map[string]int{host1.ID(): 4})

	// remove tag from host 1
	updateTags(host1, map[string]string{"active": ""})

	assertStateCounts := func(expected map[ct.JobState]int) {
		jobs, err := client.JobList(app.ID)
		t.Assert(err, c.IsNil)
		actual := make(map[ct.JobState]int)
		for _, job := range jobs {
			actual[job.State]++
		}
		t.Assert(actual, c.DeepEquals, expected)
	}

	// check 4 pending jobs, rest are stopped
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil)
	assertStateCounts(map[ct.JobState]int{ct.JobStatePending: 4, ct.JobStateDown: 6})

	// re-add tag to host 1
	updateTags(host1, map[string]string{"active": "true"})

	// check pending jobs are started on host 1
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(4)}, scaleTimeout, nil), c.IsNil)
	assertHostJobCounts(map[string]int{host1.ID(): 4})
	assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 6})

	// add different tag to host 2
	updateTags(host2, map[string]string{"disk": "ssd"})

	// update formation tags, check jobs are moved to host 2
	debug(t, "updating formation tags to disk=ssd")
	formation.Tags["printer"] = map[string]string{"disk": "ssd"}
	t.Assert(client.PutFormation(formation), c.IsNil)
	jobEvents = ct.JobEvents{"printer": map[ct.JobState]int{
		ct.JobStateDown: 4,
		ct.JobStateUp:   4,
	}}
	t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil)
	assertHostJobCounts(map[string]int{host2.ID(): 4})
	assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 10})

	// scale down stops the jobs
	debug(t, "scaling printer=0")
	formation.Processes = nil
	t.Assert(client.PutFormation(formation), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil)
	assertStateCounts(map[ct.JobState]int{ct.JobStateDown: 14})
}
Ejemplo n.º 14
0
func (s *SchedulerSuite) TestGracefulShutdown(t *c.C) {
	app, release := s.createApp(t)
	client := s.controllerClient(t)

	debug(t, "scaling to blocker=1")
	watcher, err := client.WatchJobEvents(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"blocker": 1},
	}), c.IsNil)
	var jobID string
	err = watcher.WaitFor(ct.JobEvents{"blocker": ct.JobUpEvents(1)}, scaleTimeout, func(job *ct.Job) error {
		jobID = job.ID
		return nil
	})
	t.Assert(err, c.IsNil)
	jobs, err := s.discoverdClient(t).Instances("test-http-blocker", 10*time.Second)
	t.Assert(err, c.IsNil)
	t.Assert(jobs, c.HasLen, 1)
	jobAddr := jobs[0].Addr

	debug(t, "subscribing to backend events from all routers")
	routers, err := s.discoverdClient(t).Instances("router-api", 10*time.Second)
	t.Assert(err, c.IsNil)
	routerEvents := make(chan *router.StreamEvent)
	for _, r := range routers {
		events := make(chan *router.StreamEvent)
		stream, err := routerc.NewWithAddr(r.Addr).StreamEvents(&router.StreamEventsOptions{
			EventTypes: []router.EventType{
				router.EventTypeBackendUp,
				router.EventTypeBackendDown,
				router.EventTypeBackendDrained,
			},
		}, events)
		t.Assert(err, c.IsNil)
		defer stream.Close()
		go func(router *discoverd.Instance) {
			for event := range events {
				if event.Backend != nil && event.Backend.JobID == jobID {
					debugf(t, "got %s router event from %s", event.Event, router.Host())
					routerEvents <- event
				}
			}
		}(r)
	}

	debug(t, "adding HTTP route with backend drain enabled")
	route := &router.HTTPRoute{
		Domain:        random.String(32) + ".com",
		Service:       "test-http-blocker",
		DrainBackends: true,
	}
	t.Assert(client.CreateRoute(app.ID, route.ToRoute()), c.IsNil)

	waitForRouterEvents := func(typ router.EventType) {
		debugf(t, "waiting for %d router %s events", len(routers), typ)
		count := 0
		for {
			select {
			case event := <-routerEvents:
				if event.Event != typ {
					t.Fatal("expected %s router event, got %s", typ, event.Event)
				}
				count++
				if count == len(routers) {
					return
				}
			case <-time.After(30 * time.Second):
				t.Fatalf("timed out waiting for router %s events", typ)
			}
		}
	}
	waitForRouterEvents(router.EventTypeBackendUp)

	debug(t, "making blocked HTTP request through each router")
	reqErrs := make(chan error)
	for _, router := range routers {
		req, err := http.NewRequest("GET", "http://"+router.Host()+"/block", nil)
		t.Assert(err, c.IsNil)
		req.Host = route.Domain
		res, err := http.DefaultClient.Do(req)
		t.Assert(err, c.IsNil)
		t.Assert(res.StatusCode, c.Equals, http.StatusOK)
		go func() {
			defer res.Body.Close()
			data, err := ioutil.ReadAll(res.Body)
			if err == nil && !bytes.Equal(data, []byte("done")) {
				err = fmt.Errorf("unexpected response: %q", data)
			}
			reqErrs <- err
		}()
	}

	debug(t, "scaling to blocker=0")
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"blocker": 0},
	}), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"blocker": {ct.JobStateStopping: 1}}, scaleTimeout, nil), c.IsNil)
	waitForRouterEvents(router.EventTypeBackendDown)

	debug(t, "checking new HTTP requests return 503")
	for _, router := range routers {
		req, err := http.NewRequest("GET", "http://"+router.Host()+"/ping", nil)
		t.Assert(err, c.IsNil)
		req.Host = route.Domain
		res, err := http.DefaultClient.Do(req)
		t.Assert(err, c.IsNil)
		res.Body.Close()
		t.Assert(res.StatusCode, c.Equals, http.StatusServiceUnavailable)
	}

	debug(t, "checking blocked HTTP requests are still blocked")
	select {
	case err := <-reqErrs:
		t.Fatal(err)
	default:
	}

	debug(t, "unblocking HTTP requests")
	res, err := http.Get("http://" + jobAddr + "/unblock")
	t.Assert(err, c.IsNil)
	t.Assert(res.StatusCode, c.Equals, http.StatusOK)

	debug(t, "checking the blocked HTTP requests completed without error")
	for range routers {
		if err := <-reqErrs; err != nil {
			t.Fatal(err)
		}
	}
	waitForRouterEvents(router.EventTypeBackendDrained)

	debug(t, "waiting for the job to exit")
	t.Assert(watcher.WaitFor(ct.JobEvents{"blocker": ct.JobDownEvents(1)}, scaleTimeout, nil), c.IsNil)
}