Exemple #1
0
func (s *DeployerSuite) TestOmniProcess(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot determine test cluster size")
	}

	// create and scale an omni release
	omniScale := 2
	totalJobs := omniScale * testCluster.Size()
	client := s.controllerClient(t)
	app, release := s.createApp(t)

	watcher, err := client.WatchJobEvents(app.Name, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"omni": omniScale},
	}), c.IsNil)
	err = watcher.WaitFor(ct.JobEvents{"omni": {ct.JobStateUp: totalJobs}}, scaleTimeout, nil)
	t.Assert(err, c.IsNil)

	// deploy using all-at-once and check we get the correct events
	app.Strategy = "all-at-once"
	t.Assert(client.UpdateApp(app), c.IsNil)
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	events := make(chan *ct.DeploymentEvent)
	stream, err := client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	defer stream.Close()
	expected := make([]*ct.Job, 0, 3*totalJobs+1)
	appendEvents := func(releaseID string, state ct.JobState, count int) {
		for i := 0; i < count; i++ {
			expected = append(expected, &ct.Job{
				ReleaseID: releaseID,
				Type:      "omni",
				State:     state,
			})
		}
	}
	appendEvents(deployment.NewReleaseID, ct.JobStateUp, totalJobs)
	appendEvents(deployment.OldReleaseID, ct.JobStateDown, totalJobs)
	s.waitForDeploymentStatus(t, events, "complete")

	// deploy using one-by-one and check we get the correct events
	app.Strategy = "one-by-one"
	t.Assert(client.UpdateApp(app), c.IsNil)
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	// try creating the deployment multiple times to avoid getting a
	// "Cannot create deploy, one is already in progress" error (there
	// is no guarantee the previous deploy has finished yet)
	attempts := attempt.Strategy{Total: 10 * time.Second, Delay: 100 * time.Millisecond}
	err = attempts.Run(func() (err error) {
		deployment, err = client.CreateDeployment(app.ID, release.ID)
		return
	})
	t.Assert(err, c.IsNil)
	events = make(chan *ct.DeploymentEvent)
	stream, err = client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	expected = make([]*ct.Job, 0, 4*totalJobs+1)
	appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size())
	appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size())
	s.waitForDeploymentStatus(t, events, "complete")
}
func (s *SchedulerSuite) TestScaleTags(t *c.C) {
	// ensure we have more than 1 host to test with
	hosts, err := s.clusterClient(t).Hosts()
	t.Assert(err, c.IsNil)
	if len(hosts) <= 1 {
		t.Skip("not enough hosts to test tagged based scheduling")
	}

	// watch service events so we can wait for tag changes
	events := make(chan *discoverd.Event)
	stream, err := s.discoverdClient(t).Service("flynn-host").Watch(events)
	t.Assert(err, c.IsNil)
	defer stream.Close()
	waitServiceEvent := func(kind discoverd.EventKind) *discoverd.Event {
		for {
			select {
			case event, ok := <-events:
				if !ok {
					t.Fatalf("service event stream closed unexpectedly: %s", stream.Err())
				}
				if event.Kind == kind {
					return event
				}
			case <-time.After(10 * time.Second):
				t.Fatalf("timed out waiting for service %s event", kind)
			}
		}
	}

	// wait for the watch to be current before changing tags
	waitServiceEvent(discoverd.EventKindCurrent)

	updateTags := func(host *cluster.Host, tags map[string]string) {
		debugf(t, "setting host tags: %s => %v", host.ID(), tags)
		t.Assert(host.UpdateTags(tags), c.IsNil)
		event := waitServiceEvent(discoverd.EventKindUpdate)
		t.Assert(event.Instance.Meta["id"], c.Equals, host.ID())
		for key, val := range tags {
			t.Assert(event.Instance.Meta["tag:"+key], c.Equals, val)
		}
	}

	// create an app with a tagged process and watch job events
	app, release := s.createApp(t)
	formation := &ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Tags:      map[string]map[string]string{"printer": {"active": "true"}},
	}
	client := s.controllerClient(t)
	watcher, err := client.WatchJobEvents(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	// add tag to host 1
	host1 := hosts[0]
	updateTags(host1, map[string]string{"active": "true"})

	// start jobs
	debug(t, "scaling printer=2")
	formation.Processes = map[string]int{"printer": 2}
	t.Assert(client.PutFormation(formation), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil)

	assertHostJobCounts := func(expected map[string]int) {
		jobs, err := client.JobList(app.ID)
		t.Assert(err, c.IsNil)
		actual := make(map[string]int)
		for _, job := range jobs {
			if job.State == ct.JobStateUp {
				actual[job.HostID]++
			}
		}
		t.Assert(actual, c.DeepEquals, expected)
	}

	// check all jobs on host 1
	assertHostJobCounts(map[string]int{host1.ID(): 2})

	// add tag to host 2
	host2 := hosts[1]
	updateTags(host2, map[string]string{"active": "true"})

	// scale up
	debug(t, "scaling printer=4")
	formation.Processes["printer"] = 4
	t.Assert(client.PutFormation(formation), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil)

	// check jobs distributed across hosts 1 and 2
	assertHostJobCounts(map[string]int{host1.ID(): 2, host2.ID(): 2})

	// remove tag from host 2
	updateTags(host2, map[string]string{"active": ""})

	// check jobs are moved to host1
	jobEvents := ct.JobEvents{"printer": map[ct.JobState]int{
		ct.JobStateDown: 2,
		ct.JobStateUp:   2,
	}}
	t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil)
	assertHostJobCounts(map[string]int{host1.ID(): 4})

	// remove tag from host 1
	updateTags(host1, map[string]string{"active": ""})

	assertStateCounts := func(expected map[ct.JobState]int) {
		jobs, err := client.JobList(app.ID)
		t.Assert(err, c.IsNil)
		actual := make(map[ct.JobState]int)
		for _, job := range jobs {
			actual[job.State]++
		}
		t.Assert(actual, c.DeepEquals, expected)
	}

	// check 4 pending jobs, rest are stopped
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil)
	assertStateCounts(map[ct.JobState]int{ct.JobStatePending: 4, ct.JobStateDown: 6})

	// re-add tag to host 1
	updateTags(host1, map[string]string{"active": "true"})

	// check pending jobs are started on host 1
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(4)}, scaleTimeout, nil), c.IsNil)
	assertHostJobCounts(map[string]int{host1.ID(): 4})
	assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 6})

	// add different tag to host 2
	updateTags(host2, map[string]string{"disk": "ssd"})

	// update formation tags, check jobs are moved to host 2
	debug(t, "updating formation tags to disk=ssd")
	formation.Tags["printer"] = map[string]string{"disk": "ssd"}
	t.Assert(client.PutFormation(formation), c.IsNil)
	jobEvents = ct.JobEvents{"printer": map[ct.JobState]int{
		ct.JobStateDown: 4,
		ct.JobStateUp:   4,
	}}
	t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil)
	assertHostJobCounts(map[string]int{host2.ID(): 4})
	assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 10})

	// scale down stops the jobs
	debug(t, "scaling printer=0")
	formation.Processes = nil
	t.Assert(client.PutFormation(formation), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil)
	assertStateCounts(map[ct.JobState]int{ct.JobStateDown: 14})
}
Exemple #3
0
func (s *SchedulerSuite) TestGracefulShutdown(t *c.C) {
	app, release := s.createApp(t)
	client := s.controllerClient(t)

	debug(t, "scaling to blocker=1")
	watcher, err := client.WatchJobEvents(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"blocker": 1},
	}), c.IsNil)
	var jobID string
	err = watcher.WaitFor(ct.JobEvents{"blocker": ct.JobUpEvents(1)}, scaleTimeout, func(job *ct.Job) error {
		jobID = job.ID
		return nil
	})
	t.Assert(err, c.IsNil)
	jobs, err := s.discoverdClient(t).Instances("test-http-blocker", 10*time.Second)
	t.Assert(err, c.IsNil)
	t.Assert(jobs, c.HasLen, 1)
	jobAddr := jobs[0].Addr

	debug(t, "subscribing to backend events from all routers")
	routers, err := s.discoverdClient(t).Instances("router-api", 10*time.Second)
	t.Assert(err, c.IsNil)
	routerEvents := make(chan *router.StreamEvent)
	for _, r := range routers {
		events := make(chan *router.StreamEvent)
		stream, err := routerc.NewWithAddr(r.Addr).StreamEvents(&router.StreamEventsOptions{
			EventTypes: []router.EventType{
				router.EventTypeBackendUp,
				router.EventTypeBackendDown,
				router.EventTypeBackendDrained,
			},
		}, events)
		t.Assert(err, c.IsNil)
		defer stream.Close()
		go func(router *discoverd.Instance) {
			for event := range events {
				if event.Backend != nil && event.Backend.JobID == jobID {
					debugf(t, "got %s router event from %s", event.Event, router.Host())
					routerEvents <- event
				}
			}
		}(r)
	}

	debug(t, "adding HTTP route with backend drain enabled")
	route := &router.HTTPRoute{
		Domain:        random.String(32) + ".com",
		Service:       "test-http-blocker",
		DrainBackends: true,
	}
	t.Assert(client.CreateRoute(app.ID, route.ToRoute()), c.IsNil)

	waitForRouterEvents := func(typ router.EventType) {
		debugf(t, "waiting for %d router %s events", len(routers), typ)
		count := 0
		for {
			select {
			case event := <-routerEvents:
				if event.Event != typ {
					t.Fatal("expected %s router event, got %s", typ, event.Event)
				}
				count++
				if count == len(routers) {
					return
				}
			case <-time.After(30 * time.Second):
				t.Fatalf("timed out waiting for router %s events", typ)
			}
		}
	}
	waitForRouterEvents(router.EventTypeBackendUp)

	debug(t, "making blocked HTTP request through each router")
	reqErrs := make(chan error)
	for _, router := range routers {
		req, err := http.NewRequest("GET", "http://"+router.Host()+"/block", nil)
		t.Assert(err, c.IsNil)
		req.Host = route.Domain
		res, err := http.DefaultClient.Do(req)
		t.Assert(err, c.IsNil)
		t.Assert(res.StatusCode, c.Equals, http.StatusOK)
		go func() {
			defer res.Body.Close()
			data, err := ioutil.ReadAll(res.Body)
			if err == nil && !bytes.Equal(data, []byte("done")) {
				err = fmt.Errorf("unexpected response: %q", data)
			}
			reqErrs <- err
		}()
	}

	debug(t, "scaling to blocker=0")
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"blocker": 0},
	}), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"blocker": {ct.JobStateStopping: 1}}, scaleTimeout, nil), c.IsNil)
	waitForRouterEvents(router.EventTypeBackendDown)

	debug(t, "checking new HTTP requests return 503")
	for _, router := range routers {
		req, err := http.NewRequest("GET", "http://"+router.Host()+"/ping", nil)
		t.Assert(err, c.IsNil)
		req.Host = route.Domain
		res, err := http.DefaultClient.Do(req)
		t.Assert(err, c.IsNil)
		res.Body.Close()
		t.Assert(res.StatusCode, c.Equals, http.StatusServiceUnavailable)
	}

	debug(t, "checking blocked HTTP requests are still blocked")
	select {
	case err := <-reqErrs:
		t.Fatal(err)
	default:
	}

	debug(t, "unblocking HTTP requests")
	res, err := http.Get("http://" + jobAddr + "/unblock")
	t.Assert(err, c.IsNil)
	t.Assert(res.StatusCode, c.Equals, http.StatusOK)

	debug(t, "checking the blocked HTTP requests completed without error")
	for range routers {
		if err := <-reqErrs; err != nil {
			t.Fatal(err)
		}
	}
	waitForRouterEvents(router.EventTypeBackendDrained)

	debug(t, "waiting for the job to exit")
	t.Assert(watcher.WaitFor(ct.JobEvents{"blocker": ct.JobDownEvents(1)}, scaleTimeout, nil), c.IsNil)
}
Exemple #4
0
func (s *DeployerSuite) TestOmniProcess(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot determine test cluster size")
	}

	// create and scale an omni release
	omniScale := 2
	totalJobs := omniScale * testCluster.Size()
	client := s.controllerClient(t)
	app, release := s.createApp(t)

	watcher, err := client.WatchJobEvents(app.Name, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"omni": omniScale},
	}), c.IsNil)
	err = watcher.WaitFor(ct.JobEvents{"omni": {ct.JobStateUp: totalJobs}}, scaleTimeout, nil)
	t.Assert(err, c.IsNil)

	// deploy using all-at-once and check we get the correct events
	app.Strategy = "all-at-once"
	t.Assert(client.UpdateApp(app), c.IsNil)
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	events := make(chan *ct.DeploymentEvent)
	stream, err := client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	defer stream.Close()
	expected := make([]*ct.DeploymentEvent, 0, 4*totalJobs+1)
	appendEvents := func(releaseID string, state ct.JobState, count int) {
		for i := 0; i < count; i++ {
			event := &ct.DeploymentEvent{
				ReleaseID: releaseID,
				JobType:   "omni",
				JobState:  state,
				Status:    "running",
			}
			expected = append(expected, event)
		}
	}
	expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "pending"})
	appendEvents(deployment.NewReleaseID, ct.JobStateStarting, totalJobs)
	appendEvents(deployment.NewReleaseID, ct.JobStateUp, totalJobs)
	appendEvents(deployment.OldReleaseID, ct.JobStateStopping, totalJobs)
	appendEvents(deployment.OldReleaseID, ct.JobStateDown, totalJobs)
	expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "complete"})
	waitForDeploymentEvents(t, events, expected)

	// deploy using one-by-one and check we get the correct events
	app.Strategy = "one-by-one"
	t.Assert(client.UpdateApp(app), c.IsNil)
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err = client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	events = make(chan *ct.DeploymentEvent)
	stream, err = client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	expected = make([]*ct.DeploymentEvent, 0, 4*totalJobs+1)
	expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "pending"})
	appendEvents(deployment.NewReleaseID, ct.JobStateStarting, testCluster.Size())
	appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateStopping, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size())
	appendEvents(deployment.NewReleaseID, ct.JobStateStarting, testCluster.Size())
	appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateStopping, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size())
	expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "complete"})
	waitForDeploymentEvents(t, events, expected)
}
Exemple #5
0
func (s *SchedulerSuite) TestDeployController(t *c.C) {
	// get the current controller release
	client := s.controllerClient(t)
	app, err := client.GetApp("controller")
	t.Assert(err, c.IsNil)
	release, err := client.GetAppRelease(app.ID)
	t.Assert(err, c.IsNil)

	// get the current controller formation
	formation, err := client.GetFormation(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	// watch job events of the current release so we can wait for down
	// events later
	watcher, err := client.WatchJobEvents(app.Name, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	// create a controller deployment
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	events := make(chan *ct.DeploymentEvent)
	eventStream, err := client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	defer eventStream.Close()

	// wait for the deploy to complete (this doesn't wait for specific events
	// due to the fact that when the deployer deploys itself, some events will
	// not get sent)
loop:
	for {
		select {
		case e, ok := <-events:
			if !ok {
				t.Fatal("unexpected close of deployment event stream")
			}
			debugf(t, "got deployment %s event", e.Status)
			switch e.Status {
			case "complete":
				break loop
			case "failed":
				t.Fatal("the deployment failed")
			}
		case <-time.After(time.Duration(app.DeployTimeout) * time.Second):
			t.Fatal("timed out waiting for the deploy to complete")
		}
	}

	// wait for the old release to be fully scaled down
	hosts, err := s.clusterClient(t).Hosts()
	t.Assert(err, c.IsNil)
	t.Assert(hosts, c.Not(c.HasLen), 0)
	err = watcher.WaitFor(ct.JobEvents{
		"web":       ct.JobDownEvents(formation.Processes["web"]),
		"worker":    ct.JobDownEvents(formation.Processes["worker"]),
		"scheduler": ct.JobDownEvents(len(hosts)),
	}, scaleTimeout, nil)
	t.Assert(err, c.IsNil)

	// check the correct controller jobs are running
	actual := make(map[string]map[string]int)
	for _, h := range hosts {
		jobs, err := h.ListJobs()
		t.Assert(err, c.IsNil)
		for _, job := range jobs {
			if job.Status != host.StatusRunning {
				continue
			}
			appID := job.Job.Metadata["flynn-controller.app"]
			if appID != app.ID {
				continue
			}
			releaseID := job.Job.Metadata["flynn-controller.release"]
			if _, ok := actual[releaseID]; !ok {
				actual[releaseID] = make(map[string]int)
			}
			typ := job.Job.Metadata["flynn-controller.type"]
			actual[releaseID][typ]++
		}
	}
	expected := map[string]map[string]int{release.ID: {
		"web":       formation.Processes["web"],
		"worker":    formation.Processes["worker"],
		"scheduler": len(hosts),
	}}
	t.Assert(actual, c.DeepEquals, expected)
}