Exemple #1
0
func (c *Client) ExpectedScalingEvents(actual, expected map[string]int, releaseProcesses map[string]ct.ProcessType, clusterSize int) ct.JobEvents {
	events := make(ct.JobEvents, len(expected))
	for typ, count := range expected {
		diff := count
		val, ok := actual[typ]
		if ok {
			diff = count - val
		}
		proc, ok := releaseProcesses[typ]
		if ok && proc.Omni {
			diff *= clusterSize
		}
		if diff > 0 {
			events[typ] = ct.JobUpEvents(diff)
		} else if diff < 0 {
			events[typ] = ct.JobDownEvents(-diff)
		}
	}
	return events
}
func (s *SchedulerSuite) TestScaleTags(t *c.C) {
	// ensure we have more than 1 host to test with
	hosts, err := s.clusterClient(t).Hosts()
	t.Assert(err, c.IsNil)
	if len(hosts) <= 1 {
		t.Skip("not enough hosts to test tagged based scheduling")
	}

	// watch service events so we can wait for tag changes
	events := make(chan *discoverd.Event)
	stream, err := s.discoverdClient(t).Service("flynn-host").Watch(events)
	t.Assert(err, c.IsNil)
	defer stream.Close()
	waitServiceEvent := func(kind discoverd.EventKind) *discoverd.Event {
		for {
			select {
			case event, ok := <-events:
				if !ok {
					t.Fatalf("service event stream closed unexpectedly: %s", stream.Err())
				}
				if event.Kind == kind {
					return event
				}
			case <-time.After(10 * time.Second):
				t.Fatalf("timed out waiting for service %s event", kind)
			}
		}
	}

	// wait for the watch to be current before changing tags
	waitServiceEvent(discoverd.EventKindCurrent)

	updateTags := func(host *cluster.Host, tags map[string]string) {
		debugf(t, "setting host tags: %s => %v", host.ID(), tags)
		t.Assert(host.UpdateTags(tags), c.IsNil)
		event := waitServiceEvent(discoverd.EventKindUpdate)
		t.Assert(event.Instance.Meta["id"], c.Equals, host.ID())
		for key, val := range tags {
			t.Assert(event.Instance.Meta["tag:"+key], c.Equals, val)
		}
	}

	// create an app with a tagged process and watch job events
	app, release := s.createApp(t)
	formation := &ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Tags:      map[string]map[string]string{"printer": {"active": "true"}},
	}
	client := s.controllerClient(t)
	watcher, err := client.WatchJobEvents(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	// add tag to host 1
	host1 := hosts[0]
	updateTags(host1, map[string]string{"active": "true"})

	// start jobs
	debug(t, "scaling printer=2")
	formation.Processes = map[string]int{"printer": 2}
	t.Assert(client.PutFormation(formation), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil)

	assertHostJobCounts := func(expected map[string]int) {
		jobs, err := client.JobList(app.ID)
		t.Assert(err, c.IsNil)
		actual := make(map[string]int)
		for _, job := range jobs {
			if job.State == ct.JobStateUp {
				actual[job.HostID]++
			}
		}
		t.Assert(actual, c.DeepEquals, expected)
	}

	// check all jobs on host 1
	assertHostJobCounts(map[string]int{host1.ID(): 2})

	// add tag to host 2
	host2 := hosts[1]
	updateTags(host2, map[string]string{"active": "true"})

	// scale up
	debug(t, "scaling printer=4")
	formation.Processes["printer"] = 4
	t.Assert(client.PutFormation(formation), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil)

	// check jobs distributed across hosts 1 and 2
	assertHostJobCounts(map[string]int{host1.ID(): 2, host2.ID(): 2})

	// remove tag from host 2
	updateTags(host2, map[string]string{"active": ""})

	// check jobs are moved to host1
	jobEvents := ct.JobEvents{"printer": map[ct.JobState]int{
		ct.JobStateDown: 2,
		ct.JobStateUp:   2,
	}}
	t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil)
	assertHostJobCounts(map[string]int{host1.ID(): 4})

	// remove tag from host 1
	updateTags(host1, map[string]string{"active": ""})

	assertStateCounts := func(expected map[ct.JobState]int) {
		jobs, err := client.JobList(app.ID)
		t.Assert(err, c.IsNil)
		actual := make(map[ct.JobState]int)
		for _, job := range jobs {
			actual[job.State]++
		}
		t.Assert(actual, c.DeepEquals, expected)
	}

	// check 4 pending jobs, rest are stopped
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil)
	assertStateCounts(map[ct.JobState]int{ct.JobStatePending: 4, ct.JobStateDown: 6})

	// re-add tag to host 1
	updateTags(host1, map[string]string{"active": "true"})

	// check pending jobs are started on host 1
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(4)}, scaleTimeout, nil), c.IsNil)
	assertHostJobCounts(map[string]int{host1.ID(): 4})
	assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 6})

	// add different tag to host 2
	updateTags(host2, map[string]string{"disk": "ssd"})

	// update formation tags, check jobs are moved to host 2
	debug(t, "updating formation tags to disk=ssd")
	formation.Tags["printer"] = map[string]string{"disk": "ssd"}
	t.Assert(client.PutFormation(formation), c.IsNil)
	jobEvents = ct.JobEvents{"printer": map[ct.JobState]int{
		ct.JobStateDown: 4,
		ct.JobStateUp:   4,
	}}
	t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil)
	assertHostJobCounts(map[string]int{host2.ID(): 4})
	assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 10})

	// scale down stops the jobs
	debug(t, "scaling printer=0")
	formation.Processes = nil
	t.Assert(client.PutFormation(formation), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil)
	assertStateCounts(map[ct.JobState]int{ct.JobStateDown: 14})
}
Exemple #3
0
func (d *DeployJob) deployOneByOneWithWaitFn(waitJobs WaitJobsFn) error {
	log := d.logger.New("fn", "deployOneByOne")
	log.Info("starting one-by-one deployment")

	oldScale := make(map[string]int, len(d.oldReleaseState))
	for typ, count := range d.oldReleaseState {
		oldScale[typ] = count
		if d.isOmni(typ) {
			oldScale[typ] /= d.hostCount
		}
	}

	newScale := make(map[string]int, len(d.newReleaseState))
	for typ, count := range d.newReleaseState {
		newScale[typ] = count
		if d.isOmni(typ) {
			newScale[typ] /= d.hostCount
		}
	}

	processTypes := make([]string, 0, len(d.Processes))
	for typ := range d.Processes {
		processTypes = append(processTypes, typ)
	}
	sort.Sort(sort.StringSlice(processTypes))

	olog := log.New("release_id", d.OldReleaseID)
	nlog := log.New("release_id", d.NewReleaseID)
	for _, typ := range processTypes {
		num := d.Processes[typ]
		// don't scale processes which no longer exist in the new release
		if _, ok := d.newRelease.Processes[typ]; !ok {
			num = 0
		}
		diff := 1
		if d.isOmni(typ) {
			diff = d.hostCount
		}

		for i := newScale[typ]; i < num; i++ {
			nlog.Info("scaling new formation up by one", "type", typ)
			newScale[typ]++
			if err := d.client.PutFormation(&ct.Formation{
				AppID:     d.AppID,
				ReleaseID: d.NewReleaseID,
				Processes: newScale,
			}); err != nil {
				nlog.Error("error scaling new formation up by one", "type", typ, "err", err)
				return err
			}
			nlog.Info(fmt.Sprintf("waiting for %d job up event(s)", diff), "type", typ)
			if err := waitJobs(d.NewReleaseID, ct.JobEvents{typ: ct.JobUpEvents(diff)}, nlog); err != nil {
				nlog.Error("error waiting for job up events", "err", err)
				return err
			}

			olog.Info("scaling old formation down by one", "type", typ)
			oldScale[typ]--
			if err := d.client.PutFormation(&ct.Formation{
				AppID:     d.AppID,
				ReleaseID: d.OldReleaseID,
				Processes: oldScale,
			}); err != nil {
				olog.Error("error scaling old formation down by one", "type", typ, "err", err)
				return err
			}

			olog.Info(fmt.Sprintf("waiting for %d job down event(s)", diff), "type", typ)
			if err := waitJobs(d.OldReleaseID, ct.JobEvents{typ: ct.JobDownEvents(diff)}, olog); err != nil {
				olog.Error("error waiting for job down events", "err", err)
				return err
			}
		}
	}

	// ensure any old leftover jobs are stopped (this can happen when new
	// workers continue deployments from old workers and still see the
	// old worker running even though it has been scaled down), returning
	// ErrSkipRollback if an error occurs (rolling back doesn't make a ton
	// of sense because it involves stopping the new working jobs).
	log.Info("ensuring old formation is scaled down to zero")
	diff := make(ct.JobEvents, len(oldScale))
	for typ, count := range oldScale {
		if count > 0 {
			diff[typ] = ct.JobDownEvents(count)
		}
	}
	if err := d.client.PutFormation(&ct.Formation{
		AppID:     d.AppID,
		ReleaseID: d.OldReleaseID,
	}); err != nil {
		log.Error("error scaling old formation down to zero", "err", err)
		return ErrSkipRollback{err.Error()}
	}

	// treat the deployment as finished now (rather than potentially
	// waiting for the jobs to actually stop) as we can trust that the
	// scheduler will actually kill the jobs, so no need to delay the
	// deployment.
	log.Info("finished one-by-one deployment")
	return nil
}
Exemple #4
0
func (d *DeployJob) deployAllAtOnce() error {
	log := d.logger.New("fn", "deployAllAtOnce")
	log.Info("starting all-at-once deployment")

	expected := make(ct.JobEvents)
	newProcs := make(map[string]int, len(d.Processes))
	for typ, n := range d.Processes {
		// ignore processes which no longer exist in the new
		// release
		if _, ok := d.newRelease.Processes[typ]; !ok {
			continue
		}
		newProcs[typ] = n
		total := n
		if d.isOmni(typ) {
			total *= d.hostCount
		}
		existing := d.newReleaseState[typ]
		for i := existing; i < total; i++ {
			d.deployEvents <- ct.DeploymentEvent{
				ReleaseID: d.NewReleaseID,
				JobState:  ct.JobStateStarting,
				JobType:   typ,
			}
		}
		if total > existing {
			expected[typ] = ct.JobUpEvents(total - existing)
		}
	}
	if expected.Count() > 0 {
		log := log.New("release_id", d.NewReleaseID)
		log.Info("creating new formation", "processes", newProcs)
		if err := d.client.PutFormation(&ct.Formation{
			AppID:     d.AppID,
			ReleaseID: d.NewReleaseID,
			Processes: newProcs,
		}); err != nil {
			log.Error("error creating new formation", "err", err)
			return err
		}

		log.Info("waiting for job events", "expected", expected)
		if err := d.waitForJobEvents(d.NewReleaseID, expected, log); err != nil {
			log.Error("error waiting for job events", "err", err)
			return err
		}
	}

	expected = make(ct.JobEvents)
	for typ := range d.Processes {
		existing := d.oldReleaseState[typ]
		for i := 0; i < existing; i++ {
			d.deployEvents <- ct.DeploymentEvent{
				ReleaseID: d.OldReleaseID,
				JobState:  ct.JobStateStopping,
				JobType:   typ,
			}
		}
		if existing > 0 {
			expected[typ] = ct.JobDownEvents(existing)
		}
	}

	// the new jobs have now started and they are up, so return
	// ErrSkipRollback from here on out if an error occurs (rolling
	// back doesn't make a ton of sense because it involves
	// stopping the new working jobs).
	log = log.New("release_id", d.OldReleaseID)
	log.Info("scaling old formation to zero")
	if err := d.client.PutFormation(&ct.Formation{
		AppID:     d.AppID,
		ReleaseID: d.OldReleaseID,
	}); err != nil {
		log.Error("error scaling old formation to zero", "err", err)
		return ErrSkipRollback{err.Error()}
	}

	if expected.Count() > 0 {
		log.Info("waiting for job events", "expected", expected)
		if err := d.waitForJobEvents(d.OldReleaseID, expected, log); err != nil {
			log.Error("error waiting for job events", "err", err)
			return ErrSkipRollback{err.Error()}
		}
	}

	log.Info("finished all-at-once deployment")
	return nil
}
Exemple #5
0
func (d *DeployJob) deployAllAtOnce() error {
	log := d.logger.New("fn", "deployAllAtOnce")
	log.Info("starting all-at-once deployment")

	expected := make(ct.JobEvents)
	newProcs := make(map[string]int, len(d.Processes))
	for typ, n := range d.Processes {
		// ignore processes which no longer exist in the new
		// release
		if _, ok := d.newRelease.Processes[typ]; !ok {
			continue
		}
		newProcs[typ] = n
		total := n
		if d.isOmni(typ) {
			total *= d.hostCount
		}
		existing := d.newReleaseState[typ]
		if total > existing {
			expected[typ] = ct.JobUpEvents(total - existing)
		}
	}
	if expected.Count() > 0 {
		log := log.New("release_id", d.NewReleaseID)
		log.Info("creating new formation", "processes", newProcs)
		if err := d.client.PutFormation(&ct.Formation{
			AppID:     d.AppID,
			ReleaseID: d.NewReleaseID,
			Processes: newProcs,
		}); err != nil {
			log.Error("error creating new formation", "err", err)
			return err
		}

		log.Info("waiting for job events", "expected", expected)
		if err := d.waitForJobEvents(d.NewReleaseID, expected, log); err != nil {
			log.Error("error waiting for job events", "err", err)
			return err
		}
	}

	expected = make(ct.JobEvents)
	for typ := range d.Processes {
		if existing := d.oldReleaseState[typ]; existing > 0 {
			expected[typ] = ct.JobDownEvents(existing)
		}
	}

	log = log.New("release_id", d.OldReleaseID)
	log.Info("scaling old formation to zero")
	if err := d.client.PutFormation(&ct.Formation{
		AppID:     d.AppID,
		ReleaseID: d.OldReleaseID,
	}); err != nil {
		// the new jobs have now started and they are up, so return
		// ErrSkipRollback (rolling back doesn't make a ton of sense
		// because it involves stopping the new working jobs).
		log.Error("error scaling old formation to zero", "err", err)
		return ErrSkipRollback{err.Error()}
	}

	// treat the deployment as finished now (rather than waiting for the
	// jobs to actually stop) as we can trust that the scheduler will
	// actually kill the jobs, so no need to delay the deployment.
	log.Info("finished all-at-once deployment")
	return nil
}
Exemple #6
0
func (d *DeployJob) deployOneByOneWithWaitFn(waitJobs WaitJobsFn) error {
	log := d.logger.New("fn", "deployOneByOne")
	log.Info("starting one-by-one deployment")

	oldScale := make(map[string]int, len(d.oldReleaseState))
	for typ, count := range d.oldReleaseState {
		oldScale[typ] = count
		if d.isOmni(typ) {
			oldScale[typ] /= d.hostCount
		}
	}

	newScale := make(map[string]int, len(d.newReleaseState))
	for typ, count := range d.newReleaseState {
		newScale[typ] = count
		if d.isOmni(typ) {
			newScale[typ] /= d.hostCount
		}
	}

	processTypes := make([]string, 0, len(d.Processes))
	for typ := range d.Processes {
		processTypes = append(processTypes, typ)
	}
	sort.Sort(sort.StringSlice(processTypes))

	olog := log.New("release_id", d.OldReleaseID)
	nlog := log.New("release_id", d.NewReleaseID)
	for _, typ := range processTypes {
		num := d.Processes[typ]
		diff := 1
		if d.isOmni(typ) {
			diff = d.hostCount
		}

		for i := newScale[typ]; i < num; i++ {
			nlog.Info("scaling new formation up by one", "type", typ)
			newScale[typ]++
			if err := d.client.PutFormation(&ct.Formation{
				AppID:     d.AppID,
				ReleaseID: d.NewReleaseID,
				Processes: newScale,
			}); err != nil {
				nlog.Error("error scaling new formation up by one", "type", typ, "err", err)
				return err
			}
			for i := 0; i < diff; i++ {
				d.deployEvents <- ct.DeploymentEvent{
					ReleaseID: d.NewReleaseID,
					JobState:  ct.JobStateStarting,
					JobType:   typ,
				}
			}
			nlog.Info(fmt.Sprintf("waiting for %d job up event(s)", diff), "type", typ)
			if err := waitJobs(d.NewReleaseID, ct.JobEvents{typ: ct.JobUpEvents(diff)}, nlog); err != nil {
				nlog.Error("error waiting for job up events", "err", err)
				return err
			}

			olog.Info("scaling old formation down by one", "type", typ)
			oldScale[typ]--
			if err := d.client.PutFormation(&ct.Formation{
				AppID:     d.AppID,
				ReleaseID: d.OldReleaseID,
				Processes: oldScale,
			}); err != nil {
				olog.Error("error scaling old formation down by one", "type", typ, "err", err)
				return err
			}
			for i := 0; i < diff; i++ {
				d.deployEvents <- ct.DeploymentEvent{
					ReleaseID: d.OldReleaseID,
					JobState:  ct.JobStateStopping,
					JobType:   typ,
				}
			}

			olog.Info(fmt.Sprintf("waiting for %d job down event(s)", diff), "type", typ)
			if err := waitJobs(d.OldReleaseID, ct.JobEvents{typ: ct.JobDownEvents(diff)}, olog); err != nil {
				olog.Error("error waiting for job down events", "err", err)
				return err
			}
		}
	}

	// ensure any old leftover jobs are stopped (this can happen when new
	// workers continue deployments from old workers and still see the
	// old worker running even though it has been scaled down).
	log.Info("ensuring old formation is scaled down to zero")
	diff := make(ct.JobEvents, len(oldScale))
	for typ, count := range oldScale {
		diff[typ] = ct.JobDownEvents(count)
	}
	if err := d.client.PutFormation(&ct.Formation{
		AppID:     d.AppID,
		ReleaseID: d.OldReleaseID,
	}); err != nil {
		log.Error("error scaling old formation down to zero", "err", err)
		return err
	}
	if diff.Count() > 0 {
		log.Info(fmt.Sprintf("waiting for %d job down event(s)", diff.Count()))
		if err := d.waitForJobEvents(d.OldReleaseID, diff, log); err != nil {
			log.Error("error waiting for job down events", "err", err)
			return err
		}
	}

	log.Info("finished one-by-one deployment")
	return nil
}
Exemple #7
0
func (d *DeployJob) deployAllAtOnce() error {
	log := d.logger.New("fn", "deployAllAtOnce")
	log.Info("starting all-at-once deployment")

	expected := make(ct.JobEvents)
	for typ, n := range d.Processes {
		total := n
		if d.isOmni(typ) {
			total *= d.hostCount
		}
		existing := d.newReleaseState[typ]
		for i := existing; i < total; i++ {
			d.deployEvents <- ct.DeploymentEvent{
				ReleaseID: d.NewReleaseID,
				JobState:  ct.JobStateStarting,
				JobType:   typ,
			}
		}
		if total > existing {
			expected[typ] = ct.JobUpEvents(total - existing)
		}
	}
	if expected.Count() > 0 {
		log := log.New("release_id", d.NewReleaseID)
		log.Info("creating new formation", "processes", d.Processes)
		if err := d.client.PutFormation(&ct.Formation{
			AppID:     d.AppID,
			ReleaseID: d.NewReleaseID,
			Processes: d.Processes,
		}); err != nil {
			log.Error("error creating new formation", "err", err)
			return err
		}

		log.Info("waiting for job events", "expected", expected)
		if err := d.waitForJobEvents(d.NewReleaseID, expected, log); err != nil {
			log.Error("error waiting for job events", "err", err)
			return err
		}
	}

	expected = make(ct.JobEvents)
	for typ := range d.Processes {
		existing := d.oldReleaseState[typ]
		for i := 0; i < existing; i++ {
			d.deployEvents <- ct.DeploymentEvent{
				ReleaseID: d.OldReleaseID,
				JobState:  ct.JobStateStopping,
				JobType:   typ,
			}
		}
		if existing > 0 {
			expected[typ] = ct.JobDownEvents(existing)
		}
	}

	log = log.New("release_id", d.OldReleaseID)
	log.Info("scaling old formation to zero")
	if err := d.client.PutFormation(&ct.Formation{
		AppID:     d.AppID,
		ReleaseID: d.OldReleaseID,
	}); err != nil {
		log.Error("error scaling old formation to zero", "err", err)
		return err
	}

	if expected.Count() > 0 {
		log.Info("waiting for job events", "expected", expected)
		if err := d.waitForJobEvents(d.OldReleaseID, expected, log); err != nil {
			log.Error("error waiting for job events", "err", err)
			// we have started the new jobs (and they are up) and requested that the old jobs stop. at this point
			// there's not much more we can do. Rolling back doesn't make a ton of sense because it involves
			// stopping the new (working) jobs.
			return ErrSkipRollback{err.Error()}
		}
	}

	log.Info("finished all-at-once deployment")
	return nil
}
Exemple #8
0
func (s *SchedulerSuite) TestGracefulShutdown(t *c.C) {
	app, release := s.createApp(t)
	client := s.controllerClient(t)

	debug(t, "scaling to blocker=1")
	watcher, err := client.WatchJobEvents(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"blocker": 1},
	}), c.IsNil)
	var jobID string
	err = watcher.WaitFor(ct.JobEvents{"blocker": ct.JobUpEvents(1)}, scaleTimeout, func(job *ct.Job) error {
		jobID = job.ID
		return nil
	})
	t.Assert(err, c.IsNil)
	jobs, err := s.discoverdClient(t).Instances("test-http-blocker", 10*time.Second)
	t.Assert(err, c.IsNil)
	t.Assert(jobs, c.HasLen, 1)
	jobAddr := jobs[0].Addr

	debug(t, "subscribing to backend events from all routers")
	routers, err := s.discoverdClient(t).Instances("router-api", 10*time.Second)
	t.Assert(err, c.IsNil)
	routerEvents := make(chan *router.StreamEvent)
	for _, r := range routers {
		events := make(chan *router.StreamEvent)
		stream, err := routerc.NewWithAddr(r.Addr).StreamEvents(&router.StreamEventsOptions{
			EventTypes: []router.EventType{
				router.EventTypeBackendUp,
				router.EventTypeBackendDown,
				router.EventTypeBackendDrained,
			},
		}, events)
		t.Assert(err, c.IsNil)
		defer stream.Close()
		go func(router *discoverd.Instance) {
			for event := range events {
				if event.Backend != nil && event.Backend.JobID == jobID {
					debugf(t, "got %s router event from %s", event.Event, router.Host())
					routerEvents <- event
				}
			}
		}(r)
	}

	debug(t, "adding HTTP route with backend drain enabled")
	route := &router.HTTPRoute{
		Domain:        random.String(32) + ".com",
		Service:       "test-http-blocker",
		DrainBackends: true,
	}
	t.Assert(client.CreateRoute(app.ID, route.ToRoute()), c.IsNil)

	waitForRouterEvents := func(typ router.EventType) {
		debugf(t, "waiting for %d router %s events", len(routers), typ)
		count := 0
		for {
			select {
			case event := <-routerEvents:
				if event.Event != typ {
					t.Fatal("expected %s router event, got %s", typ, event.Event)
				}
				count++
				if count == len(routers) {
					return
				}
			case <-time.After(30 * time.Second):
				t.Fatalf("timed out waiting for router %s events", typ)
			}
		}
	}
	waitForRouterEvents(router.EventTypeBackendUp)

	debug(t, "making blocked HTTP request through each router")
	reqErrs := make(chan error)
	for _, router := range routers {
		req, err := http.NewRequest("GET", "http://"+router.Host()+"/block", nil)
		t.Assert(err, c.IsNil)
		req.Host = route.Domain
		res, err := http.DefaultClient.Do(req)
		t.Assert(err, c.IsNil)
		t.Assert(res.StatusCode, c.Equals, http.StatusOK)
		go func() {
			defer res.Body.Close()
			data, err := ioutil.ReadAll(res.Body)
			if err == nil && !bytes.Equal(data, []byte("done")) {
				err = fmt.Errorf("unexpected response: %q", data)
			}
			reqErrs <- err
		}()
	}

	debug(t, "scaling to blocker=0")
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"blocker": 0},
	}), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"blocker": {ct.JobStateStopping: 1}}, scaleTimeout, nil), c.IsNil)
	waitForRouterEvents(router.EventTypeBackendDown)

	debug(t, "checking new HTTP requests return 503")
	for _, router := range routers {
		req, err := http.NewRequest("GET", "http://"+router.Host()+"/ping", nil)
		t.Assert(err, c.IsNil)
		req.Host = route.Domain
		res, err := http.DefaultClient.Do(req)
		t.Assert(err, c.IsNil)
		res.Body.Close()
		t.Assert(res.StatusCode, c.Equals, http.StatusServiceUnavailable)
	}

	debug(t, "checking blocked HTTP requests are still blocked")
	select {
	case err := <-reqErrs:
		t.Fatal(err)
	default:
	}

	debug(t, "unblocking HTTP requests")
	res, err := http.Get("http://" + jobAddr + "/unblock")
	t.Assert(err, c.IsNil)
	t.Assert(res.StatusCode, c.Equals, http.StatusOK)

	debug(t, "checking the blocked HTTP requests completed without error")
	for range routers {
		if err := <-reqErrs; err != nil {
			t.Fatal(err)
		}
	}
	waitForRouterEvents(router.EventTypeBackendDrained)

	debug(t, "waiting for the job to exit")
	t.Assert(watcher.WaitFor(ct.JobEvents{"blocker": ct.JobDownEvents(1)}, scaleTimeout, nil), c.IsNil)
}
Exemple #9
0
func (s *SchedulerSuite) TestDeployController(t *c.C) {
	// get the current controller release
	client := s.controllerClient(t)
	app, err := client.GetApp("controller")
	t.Assert(err, c.IsNil)
	release, err := client.GetAppRelease(app.ID)
	t.Assert(err, c.IsNil)

	// get the current controller formation
	formation, err := client.GetFormation(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	// watch job events of the current release so we can wait for down
	// events later
	watcher, err := client.WatchJobEvents(app.Name, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	// create a controller deployment
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	events := make(chan *ct.DeploymentEvent)
	eventStream, err := client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	defer eventStream.Close()

	// wait for the deploy to complete (this doesn't wait for specific events
	// due to the fact that when the deployer deploys itself, some events will
	// not get sent)
loop:
	for {
		select {
		case e, ok := <-events:
			if !ok {
				t.Fatal("unexpected close of deployment event stream")
			}
			debugf(t, "got deployment %s event", e.Status)
			switch e.Status {
			case "complete":
				break loop
			case "failed":
				t.Fatal("the deployment failed")
			}
		case <-time.After(time.Duration(app.DeployTimeout) * time.Second):
			t.Fatal("timed out waiting for the deploy to complete")
		}
	}

	// wait for the old release to be fully scaled down
	hosts, err := s.clusterClient(t).Hosts()
	t.Assert(err, c.IsNil)
	t.Assert(hosts, c.Not(c.HasLen), 0)
	err = watcher.WaitFor(ct.JobEvents{
		"web":       ct.JobDownEvents(formation.Processes["web"]),
		"worker":    ct.JobDownEvents(formation.Processes["worker"]),
		"scheduler": ct.JobDownEvents(len(hosts)),
	}, scaleTimeout, nil)
	t.Assert(err, c.IsNil)

	// check the correct controller jobs are running
	actual := make(map[string]map[string]int)
	for _, h := range hosts {
		jobs, err := h.ListJobs()
		t.Assert(err, c.IsNil)
		for _, job := range jobs {
			if job.Status != host.StatusRunning {
				continue
			}
			appID := job.Job.Metadata["flynn-controller.app"]
			if appID != app.ID {
				continue
			}
			releaseID := job.Job.Metadata["flynn-controller.release"]
			if _, ok := actual[releaseID]; !ok {
				actual[releaseID] = make(map[string]int)
			}
			typ := job.Job.Metadata["flynn-controller.type"]
			actual[releaseID][typ]++
		}
	}
	expected := map[string]map[string]int{release.ID: {
		"web":       formation.Processes["web"],
		"worker":    formation.Processes["worker"],
		"scheduler": len(hosts),
	}}
	t.Assert(actual, c.DeepEquals, expected)
}