Ejemplo n.º 1
0
func (c *context) rollback(l log15.Logger, deployment *ct.Deployment, original *ct.Formation) error {
	log := l.New("fn", "rollback")

	log.Info("creating job watcher")
	jobWatcher, err := c.client.WatchJobEvents(deployment.AppID, deployment.OldReleaseID)
	if err != nil {
		log.Error("error opening job event stream", "err", err)
		return err
	}
	appJobs, err := c.client.JobList(deployment.AppID)
	if err != nil {
		log.Error("error listing app jobs", "err", err)
		return err
	}
	runningJobs := make(map[string]int)
	for _, j := range appJobs {
		if j.ReleaseID != deployment.OldReleaseID {
			continue
		}
		if j.State == ct.JobStateUp {
			runningJobs[j.Type]++
		}
	}
	expectedJobEvents := make(ct.JobEvents, len(original.Processes))
	for name, count := range original.Processes {
		count = count - runningJobs[name]
		if count > 0 {
			expectedJobEvents[name] = ct.JobUpEvents(count)
		}
	}

	log.Info("restoring the original formation", "release.id", original.ReleaseID)
	if err := c.client.PutFormation(original); err != nil {
		log.Error("error restoring the original formation", "err", err)
		return err
	}

	if len(expectedJobEvents) > 0 {
		log.Info("waiting for job events", "events", expectedJobEvents)
		callback := func(job *ct.Job) error {
			log.Info("got job event", "job.id", job.ID, "job.type", job.Type, "job.state", job.State)
			return nil
		}
		if err := jobWatcher.WaitFor(expectedJobEvents, 10*time.Second, callback); err != nil {
			log.Error("error waiting for job events", "err", err)
		}
	}

	log.Info("deleting the new formation")
	if err := c.client.DeleteFormation(deployment.AppID, deployment.NewReleaseID); err != nil {
		log.Error("error deleting the new formation:", "err", err)
		return err
	}

	log.Info("rollback complete")
	return nil
}
Ejemplo n.º 2
0
func (c *Client) ExpectedScalingEvents(actual, expected map[string]int, releaseProcesses map[string]ct.ProcessType, clusterSize int) ct.JobEvents {
	events := make(ct.JobEvents, len(expected))
	for typ, count := range expected {
		diff := count
		val, ok := actual[typ]
		if ok {
			diff = count - val
		}
		proc, ok := releaseProcesses[typ]
		if ok && proc.Omni {
			diff *= clusterSize
		}
		if diff > 0 {
			events[typ] = ct.JobUpEvents(diff)
		} else if diff < 0 {
			events[typ] = ct.JobDownEvents(-diff)
		}
	}
	return events
}
Ejemplo n.º 3
0
func (s *CLISuite) TestSlugReleaseGarbageCollection(t *c.C) {
	client := s.controllerClient(t)

	// create app with gc.max_inactive_slug_releases=3
	maxInactiveSlugReleases := 3
	app := &ct.App{Meta: map[string]string{"gc.max_inactive_slug_releases": strconv.Itoa(maxInactiveSlugReleases)}}
	t.Assert(client.CreateApp(app), c.IsNil)

	// create an image artifact
	imageArtifact := s.createArtifact(t, "test-apps")

	// create 5 slug artifacts
	tmp, err := ioutil.TempFile("", "squashfs-")
	t.Assert(err, c.IsNil)
	defer os.Remove(tmp.Name())
	defer tmp.Close()
	t.Assert(exec.Command("mksquashfs", t.MkDir(), tmp.Name(), "-noappend").Run(), c.IsNil)
	slug, err := ioutil.ReadAll(tmp)
	t.Assert(err, c.IsNil)
	slugHash := sha512.Sum512(slug)
	slugs := []string{
		"http://blobstore.discoverd/layer/1.squashfs",
		"http://blobstore.discoverd/layer/2.squashfs",
		"http://blobstore.discoverd/layer/3.squashfs",
		"http://blobstore.discoverd/layer/4.squashfs",
		"http://blobstore.discoverd/layer/5.squashfs",
	}
	slugArtifacts := make([]*ct.Artifact, len(slugs))
	put := func(url string, data []byte) {
		req, err := http.NewRequest("PUT", url, bytes.NewReader(data))
		t.Assert(err, c.IsNil)
		res, err := http.DefaultClient.Do(req)
		t.Assert(err, c.IsNil)
		res.Body.Close()
		t.Assert(res.StatusCode, c.Equals, http.StatusOK)
	}
	for i, layerURL := range slugs {
		manifest := &ct.ImageManifest{
			Type: ct.ImageManifestTypeV1,
			Rootfs: []*ct.ImageRootfs{{
				Layers: []*ct.ImageLayer{{
					ID:     strconv.Itoa(i + 1),
					Type:   ct.ImageLayerTypeSquashfs,
					Length: int64(len(slug)),
					Hashes: map[string]string{"sha512": hex.EncodeToString(slugHash[:])},
				}},
			}},
		}
		data := manifest.RawManifest()
		url := fmt.Sprintf("http://blobstore.discoverd/image/%s.json", manifest.ID())
		put(url, data)
		put(layerURL, slug)
		artifact := &ct.Artifact{
			Type:             ct.ArtifactTypeFlynn,
			URI:              url,
			Meta:             map[string]string{"blobstore": "true"},
			RawManifest:      data,
			Hashes:           manifest.Hashes(),
			Size:             int64(len(data)),
			LayerURLTemplate: "http://blobstore.discoverd/layer/{id}.squashfs",
		}
		t.Assert(client.CreateArtifact(artifact), c.IsNil)
		slugArtifacts[i] = artifact
	}

	// create 6 releases, the second being scaled up and having the
	// same slug as the third (so prevents the slug being deleted)
	releases := make([]*ct.Release, 6)
	for i, r := range []struct {
		slug   *ct.Artifact
		active bool
	}{
		{slugArtifacts[0], false},
		{slugArtifacts[1], true},
		{slugArtifacts[1], false},
		{slugArtifacts[2], false},
		{slugArtifacts[3], false},
		{slugArtifacts[4], false},
	} {
		release := &ct.Release{
			ArtifactIDs: []string{imageArtifact.ID, r.slug.ID},
			Processes: map[string]ct.ProcessType{
				"app": {Args: []string{"/bin/pingserv"}, Ports: []ct.Port{{Proto: "tcp"}}},
			},
			Meta: map[string]string{"git": "true"},
		}
		t.Assert(client.CreateRelease(release), c.IsNil)
		procs := map[string]int{"app": 0}
		if r.active {
			procs["app"] = 1
		}
		t.Assert(client.PutFormation(&ct.Formation{
			AppID:     app.ID,
			ReleaseID: release.ID,
			Processes: procs,
		}), c.IsNil)
		releases[i] = release
	}

	// scale the last release so we can deploy it
	lastRelease := releases[len(releases)-1]
	watcher, err := client.WatchJobEvents(app.ID, lastRelease.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: lastRelease.ID,
		Processes: map[string]int{"app": 1},
	}), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"app": ct.JobUpEvents(1)}, scaleTimeout, nil), c.IsNil)
	t.Assert(client.SetAppRelease(app.ID, lastRelease.ID), c.IsNil)

	// subscribe to garbage collection events
	gcEvents := make(chan *ct.Event)
	stream, err := client.StreamEvents(ct.StreamEventsOptions{
		AppID:       app.ID,
		ObjectTypes: []ct.EventType{ct.EventTypeAppGarbageCollection},
	}, gcEvents)
	t.Assert(err, c.IsNil)
	defer stream.Close()

	// deploy a new release with the same slug as the last release
	timeoutCh := make(chan struct{})
	time.AfterFunc(5*time.Minute, func() { close(timeoutCh) })
	newRelease := *lastRelease
	newRelease.ID = ""
	t.Assert(client.CreateRelease(&newRelease), c.IsNil)
	t.Assert(client.DeployAppRelease(app.ID, newRelease.ID, timeoutCh), c.IsNil)

	// wait for garbage collection
	select {
	case event, ok := <-gcEvents:
		if !ok {
			t.Fatalf("event stream closed unexpectedly: %s", stream.Err())
		}
		var e ct.AppGarbageCollectionEvent
		t.Assert(json.Unmarshal(event.Data, &e), c.IsNil)
		if e.Error != "" {
			t.Fatalf("garbage collection failed: %s", e.Error)
		}
	case <-time.After(60 * time.Second):
		t.Fatal("timed out waiting for garbage collection")
	}

	// check we have 4 distinct slug releases (so 5 in total, only 3 are
	// inactive)
	list, err := client.AppReleaseList(app.ID)
	t.Assert(err, c.IsNil)
	t.Assert(list, c.HasLen, maxInactiveSlugReleases+2)
	distinctSlugs := make(map[string]struct{}, len(list))
	for _, release := range list {
		t.Assert(release.ArtifactIDs, c.HasLen, 2)
		distinctSlugs[release.ArtifactIDs[1]] = struct{}{}
	}
	t.Assert(distinctSlugs, c.HasLen, maxInactiveSlugReleases+1)

	// check the first and third releases got deleted, but the rest remain
	assertDeleted := func(release *ct.Release, deleted bool) {
		_, err := client.GetRelease(release.ID)
		if deleted {
			t.Assert(err, c.Equals, controller.ErrNotFound)
		} else {
			t.Assert(err, c.IsNil)
		}
	}
	assertDeleted(releases[0], true)
	assertDeleted(releases[1], false)
	assertDeleted(releases[2], true)
	assertDeleted(releases[3], false)
	assertDeleted(releases[4], false)
	assertDeleted(releases[5], false)
	assertDeleted(&newRelease, false)

	// check the first slug got deleted, but the rest remain
	s.assertURI(t, slugs[0], http.StatusNotFound)
	for i := 1; i < len(slugs); i++ {
		s.assertURI(t, slugs[i], http.StatusOK)
	}
}
Ejemplo n.º 4
0
func (d *DeployJob) deployAllAtOnce() error {
	log := d.logger.New("fn", "deployAllAtOnce")
	log.Info("starting all-at-once deployment")

	expected := make(ct.JobEvents)
	newProcs := make(map[string]int, len(d.Processes))
	for typ, n := range d.Processes {
		// ignore processes which no longer exist in the new
		// release
		if _, ok := d.newRelease.Processes[typ]; !ok {
			continue
		}
		newProcs[typ] = n
		total := n
		if d.isOmni(typ) {
			total *= d.hostCount
		}
		existing := d.newReleaseState[typ]
		for i := existing; i < total; i++ {
			d.deployEvents <- ct.DeploymentEvent{
				ReleaseID: d.NewReleaseID,
				JobState:  ct.JobStateStarting,
				JobType:   typ,
			}
		}
		if total > existing {
			expected[typ] = ct.JobUpEvents(total - existing)
		}
	}
	if expected.Count() > 0 {
		log := log.New("release_id", d.NewReleaseID)
		log.Info("creating new formation", "processes", newProcs)
		if err := d.client.PutFormation(&ct.Formation{
			AppID:     d.AppID,
			ReleaseID: d.NewReleaseID,
			Processes: newProcs,
		}); err != nil {
			log.Error("error creating new formation", "err", err)
			return err
		}

		log.Info("waiting for job events", "expected", expected)
		if err := d.waitForJobEvents(d.NewReleaseID, expected, log); err != nil {
			log.Error("error waiting for job events", "err", err)
			return err
		}
	}

	expected = make(ct.JobEvents)
	for typ := range d.Processes {
		existing := d.oldReleaseState[typ]
		for i := 0; i < existing; i++ {
			d.deployEvents <- ct.DeploymentEvent{
				ReleaseID: d.OldReleaseID,
				JobState:  ct.JobStateStopping,
				JobType:   typ,
			}
		}
		if existing > 0 {
			expected[typ] = ct.JobDownEvents(existing)
		}
	}

	// the new jobs have now started and they are up, so return
	// ErrSkipRollback from here on out if an error occurs (rolling
	// back doesn't make a ton of sense because it involves
	// stopping the new working jobs).
	log = log.New("release_id", d.OldReleaseID)
	log.Info("scaling old formation to zero")
	if err := d.client.PutFormation(&ct.Formation{
		AppID:     d.AppID,
		ReleaseID: d.OldReleaseID,
	}); err != nil {
		log.Error("error scaling old formation to zero", "err", err)
		return ErrSkipRollback{err.Error()}
	}

	if expected.Count() > 0 {
		log.Info("waiting for job events", "expected", expected)
		if err := d.waitForJobEvents(d.OldReleaseID, expected, log); err != nil {
			log.Error("error waiting for job events", "err", err)
			return ErrSkipRollback{err.Error()}
		}
	}

	log.Info("finished all-at-once deployment")
	return nil
}
Ejemplo n.º 5
0
func (s *SchedulerSuite) TestScaleTags(t *c.C) {
	// ensure we have more than 1 host to test with
	hosts, err := s.clusterClient(t).Hosts()
	t.Assert(err, c.IsNil)
	if len(hosts) <= 1 {
		t.Skip("not enough hosts to test tagged based scheduling")
	}

	// watch service events so we can wait for tag changes
	events := make(chan *discoverd.Event)
	stream, err := s.discoverdClient(t).Service("flynn-host").Watch(events)
	t.Assert(err, c.IsNil)
	defer stream.Close()
	waitServiceEvent := func(kind discoverd.EventKind) *discoverd.Event {
		for {
			select {
			case event, ok := <-events:
				if !ok {
					t.Fatalf("service event stream closed unexpectedly: %s", stream.Err())
				}
				if event.Kind == kind {
					return event
				}
			case <-time.After(10 * time.Second):
				t.Fatalf("timed out waiting for service %s event", kind)
			}
		}
	}

	// wait for the watch to be current before changing tags
	waitServiceEvent(discoverd.EventKindCurrent)

	updateTags := func(host *cluster.Host, tags map[string]string) {
		debugf(t, "setting host tags: %s => %v", host.ID(), tags)
		t.Assert(host.UpdateTags(tags), c.IsNil)
		event := waitServiceEvent(discoverd.EventKindUpdate)
		t.Assert(event.Instance.Meta["id"], c.Equals, host.ID())
		for key, val := range tags {
			t.Assert(event.Instance.Meta["tag:"+key], c.Equals, val)
		}
	}

	// create an app with a tagged process and watch job events
	app, release := s.createApp(t)
	formation := &ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Tags:      map[string]map[string]string{"printer": {"active": "true"}},
	}
	client := s.controllerClient(t)
	watcher, err := client.WatchJobEvents(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	// add tag to host 1
	host1 := hosts[0]
	updateTags(host1, map[string]string{"active": "true"})

	// start jobs
	debug(t, "scaling printer=2")
	formation.Processes = map[string]int{"printer": 2}
	t.Assert(client.PutFormation(formation), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil)

	assertHostJobCounts := func(expected map[string]int) {
		jobs, err := client.JobList(app.ID)
		t.Assert(err, c.IsNil)
		actual := make(map[string]int)
		for _, job := range jobs {
			if job.State == ct.JobStateUp {
				actual[job.HostID]++
			}
		}
		t.Assert(actual, c.DeepEquals, expected)
	}

	// check all jobs on host 1
	assertHostJobCounts(map[string]int{host1.ID(): 2})

	// add tag to host 2
	host2 := hosts[1]
	updateTags(host2, map[string]string{"active": "true"})

	// scale up
	debug(t, "scaling printer=4")
	formation.Processes["printer"] = 4
	t.Assert(client.PutFormation(formation), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil)

	// check jobs distributed across hosts 1 and 2
	assertHostJobCounts(map[string]int{host1.ID(): 2, host2.ID(): 2})

	// remove tag from host 2
	updateTags(host2, map[string]string{"active": ""})

	// check jobs are moved to host1
	jobEvents := ct.JobEvents{"printer": map[ct.JobState]int{
		ct.JobStateDown: 2,
		ct.JobStateUp:   2,
	}}
	t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil)
	assertHostJobCounts(map[string]int{host1.ID(): 4})

	// remove tag from host 1
	updateTags(host1, map[string]string{"active": ""})

	assertStateCounts := func(expected map[ct.JobState]int) {
		jobs, err := client.JobList(app.ID)
		t.Assert(err, c.IsNil)
		actual := make(map[ct.JobState]int)
		for _, job := range jobs {
			actual[job.State]++
		}
		t.Assert(actual, c.DeepEquals, expected)
	}

	// check 4 pending jobs, rest are stopped
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil)
	assertStateCounts(map[ct.JobState]int{ct.JobStatePending: 4, ct.JobStateDown: 6})

	// re-add tag to host 1
	updateTags(host1, map[string]string{"active": "true"})

	// check pending jobs are started on host 1
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(4)}, scaleTimeout, nil), c.IsNil)
	assertHostJobCounts(map[string]int{host1.ID(): 4})
	assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 6})

	// add different tag to host 2
	updateTags(host2, map[string]string{"disk": "ssd"})

	// update formation tags, check jobs are moved to host 2
	debug(t, "updating formation tags to disk=ssd")
	formation.Tags["printer"] = map[string]string{"disk": "ssd"}
	t.Assert(client.PutFormation(formation), c.IsNil)
	jobEvents = ct.JobEvents{"printer": map[ct.JobState]int{
		ct.JobStateDown: 4,
		ct.JobStateUp:   4,
	}}
	t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil)
	assertHostJobCounts(map[string]int{host2.ID(): 4})
	assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 10})

	// scale down stops the jobs
	debug(t, "scaling printer=0")
	formation.Processes = nil
	t.Assert(client.PutFormation(formation), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil)
	assertStateCounts(map[ct.JobState]int{ct.JobStateDown: 14})
}
Ejemplo n.º 6
0
func (s *CLISuite) TestSlugReleaseGarbageCollection(t *c.C) {
	client := s.controllerClient(t)

	// create app with gc.max_inactive_slug_releases=3
	maxInactiveSlugReleases := 3
	app := &ct.App{Meta: map[string]string{"gc.max_inactive_slug_releases": strconv.Itoa(maxInactiveSlugReleases)}}
	t.Assert(client.CreateApp(app), c.IsNil)

	// create an image artifact
	imageArtifact := &ct.Artifact{Type: host.ArtifactTypeDocker, URI: imageURIs["test-apps"]}
	t.Assert(client.CreateArtifact(imageArtifact), c.IsNil)

	// create 5 slug artifacts
	var slug bytes.Buffer
	gz := gzip.NewWriter(&slug)
	t.Assert(tar.NewWriter(gz).Close(), c.IsNil)
	t.Assert(gz.Close(), c.IsNil)
	slugs := []string{
		"http://blobstore.discoverd/1/slug.tgz",
		"http://blobstore.discoverd/2/slug.tgz",
		"http://blobstore.discoverd/3/slug.tgz",
		"http://blobstore.discoverd/4/slug.tgz",
		"http://blobstore.discoverd/5/slug.tgz",
	}
	slugArtifacts := make([]*ct.Artifact, len(slugs))
	for i, uri := range slugs {
		req, err := http.NewRequest("PUT", uri, bytes.NewReader(slug.Bytes()))
		t.Assert(err, c.IsNil)
		res, err := http.DefaultClient.Do(req)
		t.Assert(err, c.IsNil)
		res.Body.Close()
		t.Assert(res.StatusCode, c.Equals, http.StatusOK)
		artifact := &ct.Artifact{
			Type: host.ArtifactTypeFile,
			URI:  uri,
			Meta: map[string]string{"blobstore": "true"},
		}
		t.Assert(client.CreateArtifact(artifact), c.IsNil)
		slugArtifacts[i] = artifact
	}

	// create 6 releases, the second being scaled up and having the
	// same slug as the third (so prevents the slug being deleted)
	releases := make([]*ct.Release, 6)
	for i, r := range []struct {
		slug   *ct.Artifact
		active bool
	}{
		{slugArtifacts[0], false},
		{slugArtifacts[1], true},
		{slugArtifacts[1], false},
		{slugArtifacts[2], false},
		{slugArtifacts[3], false},
		{slugArtifacts[4], false},
	} {
		release := &ct.Release{
			ArtifactIDs: []string{imageArtifact.ID, r.slug.ID},
			Processes: map[string]ct.ProcessType{
				"app": {Args: []string{"/bin/pingserv"}, Ports: []ct.Port{{Proto: "tcp"}}},
			},
		}
		t.Assert(client.CreateRelease(release), c.IsNil)
		procs := map[string]int{"app": 0}
		if r.active {
			procs["app"] = 1
		}
		t.Assert(client.PutFormation(&ct.Formation{
			AppID:     app.ID,
			ReleaseID: release.ID,
			Processes: procs,
		}), c.IsNil)
		releases[i] = release
	}

	// scale the last release so we can deploy it
	lastRelease := releases[len(releases)-1]
	watcher, err := client.WatchJobEvents(app.ID, lastRelease.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: lastRelease.ID,
		Processes: map[string]int{"app": 1},
	}), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"app": ct.JobUpEvents(1)}, scaleTimeout, nil), c.IsNil)
	t.Assert(client.SetAppRelease(app.ID, lastRelease.ID), c.IsNil)

	// subscribe to garbage collection events
	gcEvents := make(chan *ct.Event)
	stream, err := client.StreamEvents(ct.StreamEventsOptions{
		AppID:       app.ID,
		ObjectTypes: []ct.EventType{ct.EventTypeAppGarbageCollection},
	}, gcEvents)
	t.Assert(err, c.IsNil)
	defer stream.Close()

	// deploy a new release with the same slug as the last release
	timeoutCh := make(chan struct{})
	time.AfterFunc(5*time.Minute, func() { close(timeoutCh) })
	newRelease := *lastRelease
	newRelease.ID = ""
	t.Assert(client.CreateRelease(&newRelease), c.IsNil)
	t.Assert(client.DeployAppRelease(app.ID, newRelease.ID, timeoutCh), c.IsNil)

	// wait for garbage collection
	select {
	case event, ok := <-gcEvents:
		if !ok {
			t.Fatalf("event stream closed unexpectedly: %s", stream.Err())
		}
		var e ct.AppGarbageCollectionEvent
		t.Assert(json.Unmarshal(event.Data, &e), c.IsNil)
		if e.Error != "" {
			t.Fatalf("garbage collection failed: %s", e.Error)
		}
	case <-time.After(60 * time.Second):
		t.Fatal("timed out waiting for garbage collection")
	}

	// check we have 4 distinct slug releases (so 5 in total, only 3 are
	// inactive)
	list, err := client.AppReleaseList(app.ID)
	t.Assert(err, c.IsNil)
	t.Assert(list, c.HasLen, maxInactiveSlugReleases+2)
	distinctSlugs := make(map[string]struct{}, len(list))
	for _, release := range list {
		files := release.FileArtifactIDs()
		t.Assert(files, c.HasLen, 1)
		distinctSlugs[files[0]] = struct{}{}
	}
	t.Assert(distinctSlugs, c.HasLen, maxInactiveSlugReleases+1)

	// check the first and third releases got deleted, but the rest remain
	assertDeleted := func(release *ct.Release, deleted bool) {
		_, err := client.GetRelease(release.ID)
		if deleted {
			t.Assert(err, c.Equals, controller.ErrNotFound)
		} else {
			t.Assert(err, c.IsNil)
		}
	}
	assertDeleted(releases[0], true)
	assertDeleted(releases[1], false)
	assertDeleted(releases[2], true)
	assertDeleted(releases[3], false)
	assertDeleted(releases[4], false)
	assertDeleted(releases[5], false)
	assertDeleted(&newRelease, false)

	// check the first slug got deleted, but the rest remain
	s.assertURI(t, slugs[0], http.StatusNotFound)
	for i := 1; i < len(slugs); i++ {
		s.assertURI(t, slugs[i], http.StatusOK)
	}
}
Ejemplo n.º 7
0
func main() {
	client, err := controller.NewClient("", os.Getenv("CONTROLLER_KEY"))
	if err != nil {
		log.Fatalln("Unable to connect to controller:", err)
	}

	usage := `
Usage: flynn-receiver <app> <rev> [-e <var>=<val>]... [-m <key>=<val>]...

Options:
	-e,--env <var>=<val>
	-m,--meta <key>=<val>
`[1:]
	args, _ := docopt.Parse(usage, nil, true, version.String(), false)

	appName := args.String["<app>"]
	env, err := parsePairs(args, "--env")
	if err != nil {
		log.Fatal(err)
	}
	meta, err := parsePairs(args, "--meta")
	if err != nil {
		log.Fatal(err)
	}

	app, err := client.GetApp(appName)
	if err == controller.ErrNotFound {
		log.Fatalf("Unknown app %q", appName)
	} else if err != nil {
		log.Fatalln("Error retrieving app:", err)
	}
	prevRelease, err := client.GetAppRelease(app.Name)
	if err == controller.ErrNotFound {
		prevRelease = &ct.Release{}
	} else if err != nil {
		log.Fatalln("Error getting current app release:", err)
	}

	fmt.Printf("-----> Building %s...\n", app.Name)

	var output bytes.Buffer
	slugURL := fmt.Sprintf("%s/%s.tgz", blobstoreURL, random.UUID())
	cmd := exec.Command(exec.DockerImage(os.Getenv("SLUGBUILDER_IMAGE_URI")), slugURL)
	cmd.Stdout = io.MultiWriter(os.Stdout, &output)
	cmd.Stderr = os.Stderr
	cmd.Meta = map[string]string{
		"flynn-controller.app":      app.ID,
		"flynn-controller.app_name": app.Name,
		"flynn-controller.release":  prevRelease.ID,
	}
	if len(prevRelease.Env) > 0 {
		stdin, err := cmd.StdinPipe()
		if err != nil {
			log.Fatalln(err)
		}
		go appendEnvDir(os.Stdin, stdin, prevRelease.Env)
	} else {
		cmd.Stdin = os.Stdin
	}
	cmd.Env = make(map[string]string)
	cmd.Env["BUILD_CACHE_URL"] = fmt.Sprintf("%s/%s-cache.tgz", blobstoreURL, app.ID)
	if buildpackURL, ok := env["BUILDPACK_URL"]; ok {
		cmd.Env["BUILDPACK_URL"] = buildpackURL
	} else if buildpackURL, ok := prevRelease.Env["BUILDPACK_URL"]; ok {
		cmd.Env["BUILDPACK_URL"] = buildpackURL
	}
	for _, k := range []string{"SSH_CLIENT_KEY", "SSH_CLIENT_HOSTS"} {
		if v := os.Getenv(k); v != "" {
			cmd.Env[k] = v
		}
	}

	if err := cmd.Run(); err != nil {
		log.Fatalln("Build failed:", err)
	}

	var types []string
	if match := typesPattern.FindSubmatch(output.Bytes()); match != nil {
		types = strings.Split(string(match[1]), ", ")
	}

	fmt.Printf("-----> Creating release...\n")

	artifact := &ct.Artifact{Type: "docker", URI: os.Getenv("SLUGRUNNER_IMAGE_URI")}
	if err := client.CreateArtifact(artifact); err != nil {
		log.Fatalln("Error creating artifact:", err)
	}

	release := &ct.Release{
		ArtifactID: artifact.ID,
		Env:        prevRelease.Env,
		Meta:       prevRelease.Meta,
	}
	if release.Meta == nil {
		release.Meta = make(map[string]string, len(meta))
	}
	if release.Env == nil {
		release.Env = make(map[string]string, len(env))
	}
	for k, v := range env {
		release.Env[k] = v
	}
	for k, v := range meta {
		release.Meta[k] = v
	}
	procs := make(map[string]ct.ProcessType)
	for _, t := range types {
		proc := prevRelease.Processes[t]
		proc.Cmd = []string{"start", t}
		if t == "web" || strings.HasSuffix(t, "-web") {
			proc.Service = app.Name + "-" + t
			proc.Ports = []ct.Port{{
				Port:  8080,
				Proto: "tcp",
				Service: &host.Service{
					Name:   proc.Service,
					Create: true,
					Check:  &host.HealthCheck{Type: "tcp"},
				},
			}}
		}
		procs[t] = proc
	}
	release.Processes = procs
	if release.Env == nil {
		release.Env = make(map[string]string)
	}
	release.Env["SLUG_URL"] = slugURL

	if err := client.CreateRelease(release); err != nil {
		log.Fatalln("Error creating release:", err)
	}
	if err := client.DeployAppRelease(app.Name, release.ID); err != nil {
		log.Fatalln("Error deploying app release:", err)
	}

	fmt.Println("=====> Application deployed")

	if needsDefaultScale(app.ID, prevRelease.ID, procs, client) {
		formation := &ct.Formation{
			AppID:     app.ID,
			ReleaseID: release.ID,
			Processes: map[string]int{"web": 1},
		}

		watcher, err := client.WatchJobEvents(app.ID, release.ID)
		if err != nil {
			log.Fatalln("Error streaming job events", err)
			return
		}
		defer watcher.Close()

		if err := client.PutFormation(formation); err != nil {
			log.Fatalln("Error putting formation:", err)
		}
		fmt.Println("=====> Waiting for web job to start...")

		err = watcher.WaitFor(ct.JobEvents{"web": ct.JobUpEvents(1)}, scaleTimeout, func(e *ct.Job) error {
			switch e.State {
			case ct.JobStateUp:
				fmt.Println("=====> Default web formation scaled to 1")
			case ct.JobStateDown:
				return fmt.Errorf("Failed to scale web process type")
			}
			return nil
		})
		if err != nil {
			log.Fatalln(err.Error())
		}
	}
}
Ejemplo n.º 8
0
func (d *DeployJob) deployOneByOneWithWaitFn(waitJobs WaitJobsFn) error {
	log := d.logger.New("fn", "deployOneByOne")
	log.Info("starting one-by-one deployment")

	oldScale := make(map[string]int, len(d.oldReleaseState))
	for typ, count := range d.oldReleaseState {
		oldScale[typ] = count
		if d.isOmni(typ) {
			oldScale[typ] /= d.hostCount
		}
	}

	newScale := make(map[string]int, len(d.newReleaseState))
	for typ, count := range d.newReleaseState {
		newScale[typ] = count
		if d.isOmni(typ) {
			newScale[typ] /= d.hostCount
		}
	}

	processTypes := make([]string, 0, len(d.Processes))
	for typ := range d.Processes {
		processTypes = append(processTypes, typ)
	}
	sort.Sort(sort.StringSlice(processTypes))

	olog := log.New("release_id", d.OldReleaseID)
	nlog := log.New("release_id", d.NewReleaseID)
	for _, typ := range processTypes {
		num := d.Processes[typ]
		// don't scale processes which no longer exist in the new release
		if _, ok := d.newRelease.Processes[typ]; !ok {
			num = 0
		}
		diff := 1
		if d.isOmni(typ) {
			diff = d.hostCount
		}

		for i := newScale[typ]; i < num; i++ {
			nlog.Info("scaling new formation up by one", "type", typ)
			newScale[typ]++
			if err := d.client.PutFormation(&ct.Formation{
				AppID:     d.AppID,
				ReleaseID: d.NewReleaseID,
				Processes: newScale,
			}); err != nil {
				nlog.Error("error scaling new formation up by one", "type", typ, "err", err)
				return err
			}
			nlog.Info(fmt.Sprintf("waiting for %d job up event(s)", diff), "type", typ)
			if err := waitJobs(d.NewReleaseID, ct.JobEvents{typ: ct.JobUpEvents(diff)}, nlog); err != nil {
				nlog.Error("error waiting for job up events", "err", err)
				return err
			}

			olog.Info("scaling old formation down by one", "type", typ)
			oldScale[typ]--
			if err := d.client.PutFormation(&ct.Formation{
				AppID:     d.AppID,
				ReleaseID: d.OldReleaseID,
				Processes: oldScale,
			}); err != nil {
				olog.Error("error scaling old formation down by one", "type", typ, "err", err)
				return err
			}

			olog.Info(fmt.Sprintf("waiting for %d job down event(s)", diff), "type", typ)
			if err := waitJobs(d.OldReleaseID, ct.JobEvents{typ: ct.JobDownEvents(diff)}, olog); err != nil {
				olog.Error("error waiting for job down events", "err", err)
				return err
			}
		}
	}

	// ensure any old leftover jobs are stopped (this can happen when new
	// workers continue deployments from old workers and still see the
	// old worker running even though it has been scaled down), returning
	// ErrSkipRollback if an error occurs (rolling back doesn't make a ton
	// of sense because it involves stopping the new working jobs).
	log.Info("ensuring old formation is scaled down to zero")
	diff := make(ct.JobEvents, len(oldScale))
	for typ, count := range oldScale {
		if count > 0 {
			diff[typ] = ct.JobDownEvents(count)
		}
	}
	if err := d.client.PutFormation(&ct.Formation{
		AppID:     d.AppID,
		ReleaseID: d.OldReleaseID,
	}); err != nil {
		log.Error("error scaling old formation down to zero", "err", err)
		return ErrSkipRollback{err.Error()}
	}

	// treat the deployment as finished now (rather than potentially
	// waiting for the jobs to actually stop) as we can trust that the
	// scheduler will actually kill the jobs, so no need to delay the
	// deployment.
	log.Info("finished one-by-one deployment")
	return nil
}
Ejemplo n.º 9
0
func (d *DeployJob) deployAllAtOnce() error {
	log := d.logger.New("fn", "deployAllAtOnce")
	log.Info("starting all-at-once deployment")

	expected := make(ct.JobEvents)
	newProcs := make(map[string]int, len(d.Processes))
	for typ, n := range d.Processes {
		// ignore processes which no longer exist in the new
		// release
		if _, ok := d.newRelease.Processes[typ]; !ok {
			continue
		}
		newProcs[typ] = n
		total := n
		if d.isOmni(typ) {
			total *= d.hostCount
		}
		existing := d.newReleaseState[typ]
		if total > existing {
			expected[typ] = ct.JobUpEvents(total - existing)
		}
	}
	if expected.Count() > 0 {
		log := log.New("release_id", d.NewReleaseID)
		log.Info("creating new formation", "processes", newProcs)
		if err := d.client.PutFormation(&ct.Formation{
			AppID:     d.AppID,
			ReleaseID: d.NewReleaseID,
			Processes: newProcs,
		}); err != nil {
			log.Error("error creating new formation", "err", err)
			return err
		}

		log.Info("waiting for job events", "expected", expected)
		if err := d.waitForJobEvents(d.NewReleaseID, expected, log); err != nil {
			log.Error("error waiting for job events", "err", err)
			return err
		}
	}

	expected = make(ct.JobEvents)
	for typ := range d.Processes {
		if existing := d.oldReleaseState[typ]; existing > 0 {
			expected[typ] = ct.JobDownEvents(existing)
		}
	}

	log = log.New("release_id", d.OldReleaseID)
	log.Info("scaling old formation to zero")
	if err := d.client.PutFormation(&ct.Formation{
		AppID:     d.AppID,
		ReleaseID: d.OldReleaseID,
	}); err != nil {
		// the new jobs have now started and they are up, so return
		// ErrSkipRollback (rolling back doesn't make a ton of sense
		// because it involves stopping the new working jobs).
		log.Error("error scaling old formation to zero", "err", err)
		return ErrSkipRollback{err.Error()}
	}

	// treat the deployment as finished now (rather than waiting for the
	// jobs to actually stop) as we can trust that the scheduler will
	// actually kill the jobs, so no need to delay the deployment.
	log.Info("finished all-at-once deployment")
	return nil
}
Ejemplo n.º 10
0
func (d *DeployJob) deployOneByOneWithWaitFn(waitJobs WaitJobsFn) error {
	log := d.logger.New("fn", "deployOneByOne")
	log.Info("starting one-by-one deployment")

	oldScale := make(map[string]int, len(d.oldReleaseState))
	for typ, count := range d.oldReleaseState {
		oldScale[typ] = count
		if d.isOmni(typ) {
			oldScale[typ] /= d.hostCount
		}
	}

	newScale := make(map[string]int, len(d.newReleaseState))
	for typ, count := range d.newReleaseState {
		newScale[typ] = count
		if d.isOmni(typ) {
			newScale[typ] /= d.hostCount
		}
	}

	processTypes := make([]string, 0, len(d.Processes))
	for typ := range d.Processes {
		processTypes = append(processTypes, typ)
	}
	sort.Sort(sort.StringSlice(processTypes))

	olog := log.New("release_id", d.OldReleaseID)
	nlog := log.New("release_id", d.NewReleaseID)
	for _, typ := range processTypes {
		num := d.Processes[typ]
		diff := 1
		if d.isOmni(typ) {
			diff = d.hostCount
		}

		for i := newScale[typ]; i < num; i++ {
			nlog.Info("scaling new formation up by one", "type", typ)
			newScale[typ]++
			if err := d.client.PutFormation(&ct.Formation{
				AppID:     d.AppID,
				ReleaseID: d.NewReleaseID,
				Processes: newScale,
			}); err != nil {
				nlog.Error("error scaling new formation up by one", "type", typ, "err", err)
				return err
			}
			for i := 0; i < diff; i++ {
				d.deployEvents <- ct.DeploymentEvent{
					ReleaseID: d.NewReleaseID,
					JobState:  ct.JobStateStarting,
					JobType:   typ,
				}
			}
			nlog.Info(fmt.Sprintf("waiting for %d job up event(s)", diff), "type", typ)
			if err := waitJobs(d.NewReleaseID, ct.JobEvents{typ: ct.JobUpEvents(diff)}, nlog); err != nil {
				nlog.Error("error waiting for job up events", "err", err)
				return err
			}

			olog.Info("scaling old formation down by one", "type", typ)
			oldScale[typ]--
			if err := d.client.PutFormation(&ct.Formation{
				AppID:     d.AppID,
				ReleaseID: d.OldReleaseID,
				Processes: oldScale,
			}); err != nil {
				olog.Error("error scaling old formation down by one", "type", typ, "err", err)
				return err
			}
			for i := 0; i < diff; i++ {
				d.deployEvents <- ct.DeploymentEvent{
					ReleaseID: d.OldReleaseID,
					JobState:  ct.JobStateStopping,
					JobType:   typ,
				}
			}

			olog.Info(fmt.Sprintf("waiting for %d job down event(s)", diff), "type", typ)
			if err := waitJobs(d.OldReleaseID, ct.JobEvents{typ: ct.JobDownEvents(diff)}, olog); err != nil {
				olog.Error("error waiting for job down events", "err", err)
				return err
			}
		}
	}

	// ensure any old leftover jobs are stopped (this can happen when new
	// workers continue deployments from old workers and still see the
	// old worker running even though it has been scaled down).
	log.Info("ensuring old formation is scaled down to zero")
	diff := make(ct.JobEvents, len(oldScale))
	for typ, count := range oldScale {
		diff[typ] = ct.JobDownEvents(count)
	}
	if err := d.client.PutFormation(&ct.Formation{
		AppID:     d.AppID,
		ReleaseID: d.OldReleaseID,
	}); err != nil {
		log.Error("error scaling old formation down to zero", "err", err)
		return err
	}
	if diff.Count() > 0 {
		log.Info(fmt.Sprintf("waiting for %d job down event(s)", diff.Count()))
		if err := d.waitForJobEvents(d.OldReleaseID, diff, log); err != nil {
			log.Error("error waiting for job down events", "err", err)
			return err
		}
	}

	log.Info("finished one-by-one deployment")
	return nil
}
Ejemplo n.º 11
0
func (d *DeployJob) deployAllAtOnce() error {
	log := d.logger.New("fn", "deployAllAtOnce")
	log.Info("starting all-at-once deployment")

	expected := make(ct.JobEvents)
	for typ, n := range d.Processes {
		total := n
		if d.isOmni(typ) {
			total *= d.hostCount
		}
		existing := d.newReleaseState[typ]
		for i := existing; i < total; i++ {
			d.deployEvents <- ct.DeploymentEvent{
				ReleaseID: d.NewReleaseID,
				JobState:  ct.JobStateStarting,
				JobType:   typ,
			}
		}
		if total > existing {
			expected[typ] = ct.JobUpEvents(total - existing)
		}
	}
	if expected.Count() > 0 {
		log := log.New("release_id", d.NewReleaseID)
		log.Info("creating new formation", "processes", d.Processes)
		if err := d.client.PutFormation(&ct.Formation{
			AppID:     d.AppID,
			ReleaseID: d.NewReleaseID,
			Processes: d.Processes,
		}); err != nil {
			log.Error("error creating new formation", "err", err)
			return err
		}

		log.Info("waiting for job events", "expected", expected)
		if err := d.waitForJobEvents(d.NewReleaseID, expected, log); err != nil {
			log.Error("error waiting for job events", "err", err)
			return err
		}
	}

	expected = make(ct.JobEvents)
	for typ := range d.Processes {
		existing := d.oldReleaseState[typ]
		for i := 0; i < existing; i++ {
			d.deployEvents <- ct.DeploymentEvent{
				ReleaseID: d.OldReleaseID,
				JobState:  ct.JobStateStopping,
				JobType:   typ,
			}
		}
		if existing > 0 {
			expected[typ] = ct.JobDownEvents(existing)
		}
	}

	log = log.New("release_id", d.OldReleaseID)
	log.Info("scaling old formation to zero")
	if err := d.client.PutFormation(&ct.Formation{
		AppID:     d.AppID,
		ReleaseID: d.OldReleaseID,
	}); err != nil {
		log.Error("error scaling old formation to zero", "err", err)
		return err
	}

	if expected.Count() > 0 {
		log.Info("waiting for job events", "expected", expected)
		if err := d.waitForJobEvents(d.OldReleaseID, expected, log); err != nil {
			log.Error("error waiting for job events", "err", err)
			// we have started the new jobs (and they are up) and requested that the old jobs stop. at this point
			// there's not much more we can do. Rolling back doesn't make a ton of sense because it involves
			// stopping the new (working) jobs.
			return ErrSkipRollback{err.Error()}
		}
	}

	log.Info("finished all-at-once deployment")
	return nil
}
Ejemplo n.º 12
0
func (s *SchedulerSuite) TestGracefulShutdown(t *c.C) {
	app, release := s.createApp(t)
	client := s.controllerClient(t)

	debug(t, "scaling to blocker=1")
	watcher, err := client.WatchJobEvents(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"blocker": 1},
	}), c.IsNil)
	var jobID string
	err = watcher.WaitFor(ct.JobEvents{"blocker": ct.JobUpEvents(1)}, scaleTimeout, func(job *ct.Job) error {
		jobID = job.ID
		return nil
	})
	t.Assert(err, c.IsNil)
	jobs, err := s.discoverdClient(t).Instances("test-http-blocker", 10*time.Second)
	t.Assert(err, c.IsNil)
	t.Assert(jobs, c.HasLen, 1)
	jobAddr := jobs[0].Addr

	debug(t, "subscribing to backend events from all routers")
	routers, err := s.discoverdClient(t).Instances("router-api", 10*time.Second)
	t.Assert(err, c.IsNil)
	routerEvents := make(chan *router.StreamEvent)
	for _, r := range routers {
		events := make(chan *router.StreamEvent)
		stream, err := routerc.NewWithAddr(r.Addr).StreamEvents(&router.StreamEventsOptions{
			EventTypes: []router.EventType{
				router.EventTypeBackendUp,
				router.EventTypeBackendDown,
				router.EventTypeBackendDrained,
			},
		}, events)
		t.Assert(err, c.IsNil)
		defer stream.Close()
		go func(router *discoverd.Instance) {
			for event := range events {
				if event.Backend != nil && event.Backend.JobID == jobID {
					debugf(t, "got %s router event from %s", event.Event, router.Host())
					routerEvents <- event
				}
			}
		}(r)
	}

	debug(t, "adding HTTP route with backend drain enabled")
	route := &router.HTTPRoute{
		Domain:        random.String(32) + ".com",
		Service:       "test-http-blocker",
		DrainBackends: true,
	}
	t.Assert(client.CreateRoute(app.ID, route.ToRoute()), c.IsNil)

	waitForRouterEvents := func(typ router.EventType) {
		debugf(t, "waiting for %d router %s events", len(routers), typ)
		count := 0
		for {
			select {
			case event := <-routerEvents:
				if event.Event != typ {
					t.Fatal("expected %s router event, got %s", typ, event.Event)
				}
				count++
				if count == len(routers) {
					return
				}
			case <-time.After(30 * time.Second):
				t.Fatalf("timed out waiting for router %s events", typ)
			}
		}
	}
	waitForRouterEvents(router.EventTypeBackendUp)

	debug(t, "making blocked HTTP request through each router")
	reqErrs := make(chan error)
	for _, router := range routers {
		req, err := http.NewRequest("GET", "http://"+router.Host()+"/block", nil)
		t.Assert(err, c.IsNil)
		req.Host = route.Domain
		res, err := http.DefaultClient.Do(req)
		t.Assert(err, c.IsNil)
		t.Assert(res.StatusCode, c.Equals, http.StatusOK)
		go func() {
			defer res.Body.Close()
			data, err := ioutil.ReadAll(res.Body)
			if err == nil && !bytes.Equal(data, []byte("done")) {
				err = fmt.Errorf("unexpected response: %q", data)
			}
			reqErrs <- err
		}()
	}

	debug(t, "scaling to blocker=0")
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"blocker": 0},
	}), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"blocker": {ct.JobStateStopping: 1}}, scaleTimeout, nil), c.IsNil)
	waitForRouterEvents(router.EventTypeBackendDown)

	debug(t, "checking new HTTP requests return 503")
	for _, router := range routers {
		req, err := http.NewRequest("GET", "http://"+router.Host()+"/ping", nil)
		t.Assert(err, c.IsNil)
		req.Host = route.Domain
		res, err := http.DefaultClient.Do(req)
		t.Assert(err, c.IsNil)
		res.Body.Close()
		t.Assert(res.StatusCode, c.Equals, http.StatusServiceUnavailable)
	}

	debug(t, "checking blocked HTTP requests are still blocked")
	select {
	case err := <-reqErrs:
		t.Fatal(err)
	default:
	}

	debug(t, "unblocking HTTP requests")
	res, err := http.Get("http://" + jobAddr + "/unblock")
	t.Assert(err, c.IsNil)
	t.Assert(res.StatusCode, c.Equals, http.StatusOK)

	debug(t, "checking the blocked HTTP requests completed without error")
	for range routers {
		if err := <-reqErrs; err != nil {
			t.Fatal(err)
		}
	}
	waitForRouterEvents(router.EventTypeBackendDrained)

	debug(t, "waiting for the job to exit")
	t.Assert(watcher.WaitFor(ct.JobEvents{"blocker": ct.JobDownEvents(1)}, scaleTimeout, nil), c.IsNil)
}