Example #1
0
func (s *SchedulerSuite) TestOmniJobs(t *c.C) {
	if args.ClusterAPI == "" {
		t.Skip("cannot boot new hosts")
	}

	app, release := s.createApp(t)

	stream, err := s.controllerClient(t).StreamJobEvents(app.ID, 0)
	t.Assert(err, c.IsNil)
	defer stream.Close()

	formation := &ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: make(map[string]int),
	}

	current := make(map[string]int)
	updates := []map[string]int{
		{"printer": 2},
		{"printer": 3, "omni": 2},
		{"printer": 1, "omni": 1},
	}

	for _, procs := range updates {
		debugf(t, "scaling formation to %v", procs)
		formation.Processes = procs
		t.Assert(s.controllerClient(t).PutFormation(formation), c.IsNil)

		expected := make(jobEvents)
		for typ, count := range procs {
			diff := count - current[typ]
			if typ == "omni" {
				diff *= testCluster.Size()
			}
			if diff > 0 {
				expected[typ] = map[string]int{"up": diff}
			} else {
				expected[typ] = map[string]int{"down": -diff}
			}
		}
		for typ, count := range current {
			if _, ok := procs[typ]; !ok {
				diff := count
				if typ == "omni" {
					diff *= testCluster.Size()
				}
				expected[typ] = map[string]int{"down": diff}
			}
		}
		waitForJobEvents(t, stream.Events, expected)

		current = procs
	}

	// Check that new hosts get omni jobs
	newHosts := s.addHosts(t, 2)
	defer s.removeHosts(t, newHosts)
	waitForJobEvents(t, stream.Events, jobEvents{"omni": {"up": 2}})
}
Example #2
0
func (s *VolumeSuite) TestInterhostVolumeTransmitAPI(t *c.C) {
	hosts, err := s.clusterClient(t).Hosts()
	t.Assert(err, c.IsNil)
	if len(hosts) < 2 {
		t.Skip("need multiple hosts for this test")
	}
	s.doVolumeTransmitAPI(hosts[0], hosts[1], t)
}
Example #3
0
func (s *SchedulerSuite) TestJobRestartBackoffPolicy(t *c.C) {
	// To run this test on local, set BACKOFF_PERIOD on the flynn host machine
	var backoffPeriod time.Duration
	var err error
	if testCluster == nil {
		backoffPeriod, err = time.ParseDuration(os.Getenv("BACKOFF_PERIOD"))
		if err != nil {
			t.Skip("cannot determine backoff period")
		}
	} else {
		backoffPeriod = testCluster.BackoffPeriod()
	}
	startTimeout := 20 * time.Second
	debugf(t, "job restart backoff period: %s", backoffPeriod)

	app, release := s.createApp(t)

	watcher, err := s.controllerClient(t).WatchJobEvents(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	t.Assert(s.controllerClient(t).PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"printer": 1},
	}), c.IsNil)
	var id string
	var assignId = func(j *ct.Job) error {
		debugf(t, "got job event: %s %s", j.ID, j.State)
		id = j.ID
		return nil
	}
	err = watcher.WaitFor(ct.JobEvents{"printer": {ct.JobStateUp: 1}}, scaleTimeout, assignId)
	t.Assert(err, c.IsNil)

	waitForRestart := func(duration time.Duration) {
		start := time.Now()
		s.stopJob(t, id)
		debugf(t, "expecting new job to start in %s", duration)
		err = watcher.WaitFor(ct.JobEvents{"printer": {ct.JobStateUp: 1}}, duration+startTimeout, assignId)
		t.Assert(err, c.IsNil)
		actual := time.Now().Sub(start)
		if actual < duration {
			t.Fatalf("expected new job to start after %s but started after %s", duration, actual)
		}
	}

	waitForRestart(0)
	waitForRestart(backoffPeriod)
	waitForRestart(2 * backoffPeriod)
	debug(t, "waiting for backoff period to expire")
	time.Sleep(backoffPeriod)
	waitForRestart(0)
}
Example #4
0
func (s *ZDiscoverdSuite) TestPromoteDemote(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot boot new hosts")
	}
	// ensure we have 3 node cluster, TODO(jpg): Support running test on anything larger than 2 node cluster
	hosts, err := s.clusterClient(t).Hosts()
	t.Assert(err, c.IsNil)
	if len(hosts) != 3 {
		t.Skip("promotion and demotion tests require a 3 node cluster")
	}

	// Check the original number of peers is correct
	initialPeers, err := s.discoverdClient(t).RaftPeers()
	t.Assert(err, c.IsNil)
	t.Assert(len(initialPeers), c.Equals, 3)

	// Add a new host to the cluster, initially it will join as a proxy
	newHost := s.addHost(t, "discoverd")
	defer s.removeHost(t, newHost, "discoverd")

	// Sleep just a little to give discoverd time to get started
	time.Sleep(2 * time.Second)

	// Promote the new node to a Raft member
	url := "http://" + newHost.IP + ":1111"
	dd := discoverd.NewClientWithURL(url)
	err = dd.Promote(url)
	t.Assert(err, c.IsNil)

	// Check that we now have one additional peer, also ensure our new peer is in the list
	newPeers, err := s.discoverdClient(t).RaftPeers()
	t.Assert(err, c.IsNil)
	t.Assert(len(newPeers), c.Equals, 4)
	t.Assert(peerPresent(newHost, newPeers), c.Equals, true)

	// Now demote the newly promoted node
	err = dd.Demote(url)
	t.Assert(err, c.IsNil)

	//XXX(jpg): Better way to wait for leadership?
	time.Sleep(2 * time.Second)

	// We are going to ask the leader for the list of peers as it's definitely canonical
	leader, err := s.discoverdClient(t).RaftLeader()
	t.Assert(err, c.IsNil)
	dd = discoverd.NewClientWithURL(leader.Host)

	// There should now be only the original peers, additionally make sure our host isn't one of them
	finalPeers, err := dd.RaftPeers()
	t.Assert(err, c.IsNil)
	t.Assert(len(finalPeers), c.Equals, 3)
	t.Assert(peerPresent(newHost, finalPeers), c.Equals, false)
}
Example #5
0
func (s *SchedulerSuite) TestJobRestartBackoffPolicy(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot determine scheduler backoff period")
	}
	backoffPeriod := testCluster.BackoffPeriod()
	startTimeout := 20 * time.Second
	debugf(t, "job restart backoff period: %s", backoffPeriod)

	app, release := s.createApp(t)

	watcher, err := s.controllerClient(t).WatchJobEvents(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	t.Assert(s.controllerClient(t).PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"printer": 1},
	}), c.IsNil)
	var id string
	var assignId = func(e *ct.Job) error {
		id = e.ID
		return nil
	}
	err = watcher.WaitFor(ct.JobEvents{"printer": {"up": 1}}, scaleTimeout, assignId)

	// First restart: scheduled immediately
	s.stopJob(t, id)
	err = watcher.WaitFor(ct.JobEvents{"printer": {"up": 1}}, startTimeout, assignId)
	t.Assert(err, c.IsNil)

	// Second restart after 1 * backoffPeriod
	start := time.Now()
	s.stopJob(t, id)
	err = watcher.WaitFor(ct.JobEvents{"printer": {"up": 1}}, backoffPeriod+startTimeout, assignId)
	t.Assert(err, c.IsNil)
	t.Assert(time.Now().Sub(start) > backoffPeriod, c.Equals, true)

	// Third restart after 2 * backoffPeriod
	start = time.Now()
	s.stopJob(t, id)
	err = watcher.WaitFor(ct.JobEvents{"printer": {"up": 1}}, 2*backoffPeriod+startTimeout, assignId)
	t.Assert(err, c.IsNil)
	t.Assert(time.Now().Sub(start) > 2*backoffPeriod, c.Equals, true)

	// After backoffPeriod has elapsed: scheduled immediately
	time.Sleep(backoffPeriod)
	s.stopJob(t, id)
	err = watcher.WaitFor(ct.JobEvents{"printer": {"up": 1}}, startTimeout, assignId)
	t.Assert(err, c.IsNil)
}
Example #6
0
func (s *HostSuite) TestVolumePersistence(t *c.C) {
	t.Skip("test intermittently fails due to host bind mount leaks, see https://github.com/flynn/flynn/issues/1125")

	// most of the volume tests (snapshotting, quotas, etc) are unit tests under their own package.
	// these tests exist to cover the last mile where volumes are bind-mounted into containers.

	cluster := s.clusterClient(t)
	h := s.anyHostClient(t)

	// create a volume!
	vol, err := h.CreateVolume("default")
	t.Assert(err, c.IsNil)
	defer func() {
		t.Assert(h.DestroyVolume(vol.ID), c.IsNil)
	}()

	// create first job
	cmd, service, err := makeIshApp(cluster, h, s.discoverdClient(t), host.ContainerConfig{
		Volumes: []host.VolumeBinding{{
			Target:    "/vol",
			VolumeID:  vol.ID,
			Writeable: true,
		}},
	})
	t.Assert(err, c.IsNil)
	defer cmd.Kill()
	// add data to the volume
	resp, err := runIshCommand(service, "echo 'testcontent' > /vol/alpha ; echo $?")
	t.Assert(err, c.IsNil)
	t.Assert(resp, c.Equals, "0\n")

	// start another one that mounts the same volume
	cmd, service, err = makeIshApp(cluster, h, s.discoverdClient(t), host.ContainerConfig{
		Volumes: []host.VolumeBinding{{
			Target:    "/vol",
			VolumeID:  vol.ID,
			Writeable: false,
		}},
	})
	t.Assert(err, c.IsNil)
	defer cmd.Kill()
	// read data back from the volume
	resp, err = runIshCommand(service, "cat /vol/alpha")
	t.Assert(err, c.IsNil)
	t.Assert(resp, c.Equals, "testcontent\n")
}
Example #7
0
func (s *ZDiscoverdSuite) TestDeploy(t *c.C) {
	// ensure we have enough hosts in the cluster
	hosts, err := s.clusterClient(t).Hosts()
	t.Assert(err, c.IsNil)
	if len(hosts) <= 1 {
		t.Skip("cannot deploy discoverd in a single node cluster")
	}

	client := s.controllerClient(t)
	app, err := client.GetApp("discoverd")
	t.Assert(err, c.IsNil)
	release, err := client.GetAppRelease(app.ID)
	t.Assert(err, c.IsNil)
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	events := make(chan *ct.DeploymentEvent)
	stream, err := client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	defer stream.Close()

loop:
	for {
		select {
		case event, ok := <-events:
			if !ok {
				t.Fatal("unexpected close of deployment event stream")
			}
			if event.Status == "complete" {
				debugf(t, "got deployment event: %s", event.Status)
				break loop
			}
			if event.Status == "failed" {
				t.Fatal("the deployment failed")
			}
			debugf(t, "got deployment event: %s %s", event.JobType, event.JobState)
		case <-time.After(time.Duration(app.DeployTimeout) * time.Second):
			t.Fatal("timed out waiting for deployment event")
		}
	}
}
Example #8
0
func (s *SchedulerSuite) TestOmniJobs(t *c.C) {
	t.Skip("skipping due to router startup hang: https://github.com/flynn/flynn/issues/1855")
	if testCluster == nil {
		t.Skip("cannot boot new hosts")
	}

	app, release := s.createApp(t)

	watcher, err := s.controllerClient(t).WatchJobEvents(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	formation := &ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: make(map[string]int),
	}

	current := make(map[string]int)
	updates := []map[string]int{
		{"printer": 2},
		{"printer": 3, "omni": 2},
		{"printer": 1, "omni": 1},
	}

	for _, procs := range updates {
		debugf(t, "scaling formation to %v", procs)
		formation.Processes = procs
		t.Assert(s.controllerClient(t).PutFormation(formation), c.IsNil)

		expected := s.controllerClient(t).ExpectedScalingEvents(current, procs, release.Processes, testCluster.Size())
		err = watcher.WaitFor(expected, scaleTimeout, nil)
		t.Assert(err, c.IsNil)

		current = procs
	}

	// Check that new hosts get omni jobs
	newHosts := s.addHosts(t, 2, false)
	defer s.removeHosts(t, newHosts)
	err = watcher.WaitFor(ct.JobEvents{"omni": {"up": 2}}, scaleTimeout, nil)
	t.Assert(err, c.IsNil)
}
Example #9
0
func (s *SchedulerSuite) TestJobRestartBackoffPolicy(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot determine scheduler backoff period")
	}
	backoffPeriod := testCluster.BackoffPeriod()
	startTimeout := 20 * time.Second
	debugf(t, "job restart backoff period: %s", backoffPeriod)

	app, release := s.createApp(t)

	events := make(chan *ct.JobEvent)
	stream, err := s.controllerClient(t).StreamJobEvents(app.ID, 0, events)
	t.Assert(err, c.IsNil)
	defer stream.Close()

	t.Assert(s.controllerClient(t).PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"printer": 1},
	}), c.IsNil)
	_, id := waitForJobEvents(t, stream, events, jobEvents{"printer": {"up": 1}})

	// First restart: scheduled immediately
	s.stopJob(t, id)
	id = waitForJobRestart(t, stream, events, "printer", startTimeout)

	// Second restart after 1 * backoffPeriod
	start := time.Now()
	s.stopJob(t, id)
	id = waitForJobRestart(t, stream, events, "printer", backoffPeriod+startTimeout)
	t.Assert(time.Now().Sub(start) > backoffPeriod, c.Equals, true)

	// Third restart after 2 * backoffPeriod
	start = time.Now()
	s.stopJob(t, id)
	id = waitForJobRestart(t, stream, events, "printer", 2*backoffPeriod+startTimeout)
	t.Assert(time.Now().Sub(start) > 2*backoffPeriod, c.Equals, true)

	// After backoffPeriod has elapsed: scheduled immediately
	time.Sleep(backoffPeriod)
	s.stopJob(t, id)
	waitForJobRestart(t, stream, events, "printer", startTimeout)
}
Example #10
0
func (s *HostUpdateSuite) TestUpdateLogs(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot boot new hosts")
	}

	instance := s.addHost(t)
	defer s.removeHost(t, instance)
	httpClient := &http.Client{Transport: &http.Transport{Dial: dialer.Retry.Dial}}
	client := cluster.NewHost(instance.ID, fmt.Sprintf("http://%s:1113", instance.IP), httpClient)

	// start partial logger job
	cmd := exec.JobUsingHost(
		client,
		exec.DockerImage(imageURIs["test-apps"]),
		&host.Job{
			Config: host.ContainerConfig{Cmd: []string{"/bin/partial-logger"}},
			Metadata: map[string]string{
				"flynn-controller.app": "partial-logger",
			},
		},
	)
	t.Assert(cmd.Start(), c.IsNil)
	defer cmd.Kill()

	// wait for partial line
	_, err := s.discoverdClient(t).Instances("partial-logger", 10*time.Second)
	t.Assert(err, c.IsNil)

	// update flynn-host
	pid, err := client.Update("/usr/local/bin/flynn-host", "daemon", "--id", cmd.HostID)
	t.Assert(err, c.IsNil)
	// update the pid file so removeHost works
	t.Assert(instance.Run(fmt.Sprintf("echo -n %d | sudo tee /var/run/flynn-host.pid", pid), nil), c.IsNil)

	// finish logging
	t.Assert(client.SignalJob(cmd.Job.ID, int(syscall.SIGUSR1)), c.IsNil)

	// check we get a single log line
	logc, err := logaggc.New("")
	t.Assert(err, c.IsNil)
	log, err := logc.GetLog("partial-logger", &logaggc.LogOpts{Follow: true})
	t.Assert(err, c.IsNil)
	defer log.Close()
	msgs := make(chan *logaggc.Message)
	go func() {
		defer close(msgs)
		dec := json.NewDecoder(log)
		for {
			var msg logaggc.Message
			if err := dec.Decode(&msg); err != nil {
				debugf(t, "error decoding message: %s", err)
				return
			}
			msgs <- &msg
		}
	}()
	for {
		select {
		case msg, ok := <-msgs:
			if !ok {
				t.Fatal("error getting log")
			}
			if msg.Stream == "stdout" {
				t.Assert(msg.Msg, c.Equals, "hello world")
				return
			}
		case <-time.After(10 * time.Second):
			t.Fatal("timed out waiting for log")
		}
	}
}
Example #11
0
func (s *SchedulerSuite) TestScaleTags(t *c.C) {
	// ensure we have more than 1 host to test with
	hosts, err := s.clusterClient(t).Hosts()
	t.Assert(err, c.IsNil)
	if len(hosts) <= 1 {
		t.Skip("not enough hosts to test tagged based scheduling")
	}

	// watch service events so we can wait for tag changes
	events := make(chan *discoverd.Event)
	stream, err := s.discoverdClient(t).Service("flynn-host").Watch(events)
	t.Assert(err, c.IsNil)
	defer stream.Close()
	waitServiceEvent := func(kind discoverd.EventKind) *discoverd.Event {
		for {
			select {
			case event, ok := <-events:
				if !ok {
					t.Fatalf("service event stream closed unexpectedly: %s", stream.Err())
				}
				if event.Kind == kind {
					return event
				}
			case <-time.After(10 * time.Second):
				t.Fatalf("timed out waiting for service %s event", kind)
			}
		}
	}

	// wait for the watch to be current before changing tags
	waitServiceEvent(discoverd.EventKindCurrent)

	updateTags := func(host *cluster.Host, tags map[string]string) {
		debugf(t, "setting host tags: %s => %v", host.ID(), tags)
		t.Assert(host.UpdateTags(tags), c.IsNil)
		event := waitServiceEvent(discoverd.EventKindUpdate)
		t.Assert(event.Instance.Meta["id"], c.Equals, host.ID())
		for key, val := range tags {
			t.Assert(event.Instance.Meta["tag:"+key], c.Equals, val)
		}
	}

	// create an app with a tagged process and watch job events
	app, release := s.createApp(t)
	formation := &ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Tags:      map[string]map[string]string{"printer": {"active": "true"}},
	}
	client := s.controllerClient(t)
	watcher, err := client.WatchJobEvents(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	// add tag to host 1
	host1 := hosts[0]
	updateTags(host1, map[string]string{"active": "true"})

	// start jobs
	debug(t, "scaling printer=2")
	formation.Processes = map[string]int{"printer": 2}
	t.Assert(client.PutFormation(formation), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil)

	assertHostJobCounts := func(expected map[string]int) {
		jobs, err := client.JobList(app.ID)
		t.Assert(err, c.IsNil)
		actual := make(map[string]int)
		for _, job := range jobs {
			if job.State == ct.JobStateUp {
				actual[job.HostID]++
			}
		}
		t.Assert(actual, c.DeepEquals, expected)
	}

	// check all jobs on host 1
	assertHostJobCounts(map[string]int{host1.ID(): 2})

	// add tag to host 2
	host2 := hosts[1]
	updateTags(host2, map[string]string{"active": "true"})

	// scale up
	debug(t, "scaling printer=4")
	formation.Processes["printer"] = 4
	t.Assert(client.PutFormation(formation), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil)

	// check jobs distributed across hosts 1 and 2
	assertHostJobCounts(map[string]int{host1.ID(): 2, host2.ID(): 2})

	// remove tag from host 2
	updateTags(host2, map[string]string{"active": ""})

	// check jobs are moved to host1
	jobEvents := ct.JobEvents{"printer": map[ct.JobState]int{
		ct.JobStateDown: 2,
		ct.JobStateUp:   2,
	}}
	t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil)
	assertHostJobCounts(map[string]int{host1.ID(): 4})

	// remove tag from host 1
	updateTags(host1, map[string]string{"active": ""})

	assertStateCounts := func(expected map[ct.JobState]int) {
		jobs, err := client.JobList(app.ID)
		t.Assert(err, c.IsNil)
		actual := make(map[ct.JobState]int)
		for _, job := range jobs {
			actual[job.State]++
		}
		t.Assert(actual, c.DeepEquals, expected)
	}

	// check 4 pending jobs, rest are stopped
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil)
	assertStateCounts(map[ct.JobState]int{ct.JobStatePending: 4, ct.JobStateDown: 6})

	// re-add tag to host 1
	updateTags(host1, map[string]string{"active": "true"})

	// check pending jobs are started on host 1
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(4)}, scaleTimeout, nil), c.IsNil)
	assertHostJobCounts(map[string]int{host1.ID(): 4})
	assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 6})

	// add different tag to host 2
	updateTags(host2, map[string]string{"disk": "ssd"})

	// update formation tags, check jobs are moved to host 2
	debug(t, "updating formation tags to disk=ssd")
	formation.Tags["printer"] = map[string]string{"disk": "ssd"}
	t.Assert(client.PutFormation(formation), c.IsNil)
	jobEvents = ct.JobEvents{"printer": map[ct.JobState]int{
		ct.JobStateDown: 4,
		ct.JobStateUp:   4,
	}}
	t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil)
	assertHostJobCounts(map[string]int{host2.ID(): 4})
	assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 10})

	// scale down stops the jobs
	debug(t, "scaling printer=0")
	formation.Processes = nil
	t.Assert(client.PutFormation(formation), c.IsNil)
	t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil)
	assertStateCounts(map[ct.JobState]int{ct.JobStateDown: 14})
}
Example #12
0
func (s *ReleaseSuite) TestReleaseImages(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot boot release cluster")
	}

	// stream script output to t.Log
	logReader, logWriter := io.Pipe()
	defer logWriter.Close()
	go func() {
		buf := bufio.NewReader(logReader)
		for {
			line, err := buf.ReadString('\n')
			if err != nil {
				return
			}
			debug(t, line[0:len(line)-1])
		}
	}()

	// boot the release cluster, release components to a blobstore and output the new version.json
	releaseCluster := s.addReleaseHosts(t)
	buildHost := releaseCluster.Instances[0]
	var versionJSON bytes.Buffer
	t.Assert(buildHost.Run("bash -ex", &tc.Streams{Stdin: releaseScript, Stdout: &versionJSON, Stderr: logWriter}), c.IsNil)
	var versions map[string]string
	t.Assert(json.Unmarshal(versionJSON.Bytes(), &versions), c.IsNil)

	// install Flynn from the blobstore on the vanilla host
	blobstore := struct{ Blobstore string }{buildHost.IP + ":8080"}
	installHost := releaseCluster.Instances[3]
	var script bytes.Buffer
	installScript.Execute(&script, blobstore)
	var installOutput bytes.Buffer
	out := io.MultiWriter(logWriter, &installOutput)
	t.Assert(installHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil)

	// check the flynn-host version is correct
	var hostVersion bytes.Buffer
	t.Assert(installHost.Run("flynn-host version", &tc.Streams{Stdout: &hostVersion}), c.IsNil)
	t.Assert(strings.TrimSpace(hostVersion.String()), c.Equals, "v20150131.0-test")

	// check rebuilt images were downloaded
	for name, id := range versions {
		expected := fmt.Sprintf("%s image %s downloaded", name, id)
		if !strings.Contains(installOutput.String(), expected) {
			t.Fatalf(`expected install to download %s %s`, name, id)
		}
	}

	// run a cluster update from the blobstore
	updateHost := releaseCluster.Instances[1]
	script = bytes.Buffer{}
	updateScript.Execute(&script, blobstore)
	var updateOutput bytes.Buffer
	out = io.MultiWriter(logWriter, &updateOutput)
	t.Assert(updateHost.Run("bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil)

	// check rebuilt images were downloaded
	for name := range versions {
		for _, host := range releaseCluster.Instances[0:2] {
			expected := fmt.Sprintf(`"pulled image" host=%s name=%s`, host.ID, name)
			if !strings.Contains(updateOutput.String(), expected) {
				t.Fatalf(`expected update to download %s on host %s`, name, host.ID)
			}
		}
	}

	// create a controller client for the new cluster
	pin, err := base64.StdEncoding.DecodeString(releaseCluster.ControllerPin)
	t.Assert(err, c.IsNil)
	client, err := controller.NewClientWithConfig(
		"https://"+buildHost.IP,
		releaseCluster.ControllerKey,
		controller.Config{Pin: pin, Domain: releaseCluster.ControllerDomain},
	)
	t.Assert(err, c.IsNil)

	// check system apps were deployed correctly
	for _, app := range updater.SystemApps {
		image := "flynn/" + app
		if app == "gitreceive" {
			image = "flynn/receiver"
		}
		debugf(t, "checking new %s release is using image %s", app, versions[image])
		expected := fmt.Sprintf(`"finished deploy of system app" name=%s`, app)
		if !strings.Contains(updateOutput.String(), expected) {
			t.Fatalf(`expected update to deploy %s`, app)
		}
		release, err := client.GetAppRelease(app)
		t.Assert(err, c.IsNil)
		debugf(t, "new %s release ID: %s", app, release.ID)
		artifact, err := client.GetArtifact(release.ArtifactID)
		t.Assert(err, c.IsNil)
		debugf(t, "new %s artifact: %+v", app, artifact)
		uri, err := url.Parse(artifact.URI)
		t.Assert(err, c.IsNil)
		t.Assert(uri.Query().Get("id"), c.Equals, versions[image])
	}
}
Example #13
0
func (s *SchedulerSuite) TestDeployController(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot determine test cluster size")
	}

	// get the current controller release
	client := s.controllerClient(t)
	app, err := client.GetApp("controller")
	t.Assert(err, c.IsNil)
	release, err := client.GetAppRelease(app.ID)
	t.Assert(err, c.IsNil)

	// create a controller deployment
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	events := make(chan *ct.DeploymentEvent)
	eventStream, err := client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	defer eventStream.Close()

	// wait for the deploy to complete (this doesn't wait for specific events
	// due to the fact that when the deployer deploys itself, some events will
	// not get sent)
loop:
	for {
		select {
		case e, ok := <-events:
			if !ok {
				t.Fatal("unexpected close of deployment event stream")
			}
			debugf(t, "got deployment event: %s %s", e.JobType, e.JobState)
			switch e.Status {
			case "complete":
				break loop
			case "failed":
				t.Fatal("the deployment failed")
			}
		case <-time.After(2 * time.Minute):
			t.Fatal("timed out waiting for the deploy to complete")
		}
	}

	// check the correct controller jobs are running
	hosts, err := s.clusterClient(t).Hosts()
	t.Assert(err, c.IsNil)
	actual := make(map[string]map[string]int)
	for _, h := range hosts {
		jobs, err := h.ListJobs()
		t.Assert(err, c.IsNil)
		for _, job := range jobs {
			if job.Status != host.StatusRunning {
				continue
			}
			appID := job.Job.Metadata["flynn-controller.app"]
			if appID != app.ID {
				continue
			}
			releaseID := job.Job.Metadata["flynn-controller.release"]
			if _, ok := actual[releaseID]; !ok {
				actual[releaseID] = make(map[string]int)
			}
			typ := job.Job.Metadata["flynn-controller.type"]
			actual[releaseID][typ]++
		}
	}
	expected := map[string]map[string]int{release.ID: {
		"web":       2,
		"worker":    2,
		"scheduler": testCluster.Size(),
	}}
	t.Assert(actual, c.DeepEquals, expected)
}
Example #14
0
func (s *ReleaseSuite) TestReleaseImages(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot boot release cluster")
	}

	// stream script output to t.Log
	logReader, logWriter := io.Pipe()
	defer logWriter.Close()
	go func() {
		buf := bufio.NewReader(logReader)
		for {
			line, err := buf.ReadString('\n')
			if err != nil {
				return
			}
			debug(t, line[0:len(line)-1])
		}
	}()

	// boot the release cluster, release components to a blobstore and output the new version.json
	releaseCluster := s.addReleaseHosts(t)
	buildHost := releaseCluster.Instances[0]
	var versionJSON bytes.Buffer
	t.Assert(buildHost.Run("bash -ex", &tc.Streams{Stdin: releaseScript, Stdout: &versionJSON, Stderr: logWriter}), c.IsNil)
	var versions map[string]string
	t.Assert(json.Unmarshal(versionJSON.Bytes(), &versions), c.IsNil)

	// install Flynn from the blobstore on the vanilla host
	blobstore := struct{ Blobstore string }{buildHost.IP + ":8080"}
	installHost := releaseCluster.Instances[3]
	var script bytes.Buffer
	installScript.Execute(&script, blobstore)
	var installOutput bytes.Buffer
	out := io.MultiWriter(logWriter, &installOutput)
	t.Assert(installHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil)

	// check the flynn-host version is correct
	var hostVersion bytes.Buffer
	t.Assert(installHost.Run("flynn-host version", &tc.Streams{Stdout: &hostVersion}), c.IsNil)
	t.Assert(strings.TrimSpace(hostVersion.String()), c.Equals, "v20150131.0-test")

	// check rebuilt images were downloaded
	for name, id := range versions {
		expected := fmt.Sprintf("%s image %s downloaded", name, id)
		if !strings.Contains(installOutput.String(), expected) {
			t.Fatalf(`expected install to download %s %s`, name, id)
		}
	}

	// installing on an instance with Flynn running should not fail
	script.Reset()
	installScript.Execute(&script, blobstore)
	t.Assert(buildHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: logWriter, Stderr: logWriter}), c.IsNil)

	// create a controller client for the release cluster
	pin, err := base64.StdEncoding.DecodeString(releaseCluster.ControllerPin)
	t.Assert(err, c.IsNil)
	client, err := controller.NewClientWithConfig(
		"https://"+buildHost.IP,
		releaseCluster.ControllerKey,
		controller.Config{Pin: pin, Domain: releaseCluster.ControllerDomain},
	)
	t.Assert(err, c.IsNil)

	// deploy a slug based app
	slugApp := &ct.App{}
	t.Assert(client.CreateApp(slugApp), c.IsNil)
	gitreceive, err := client.GetAppRelease("gitreceive")
	t.Assert(err, c.IsNil)
	imageArtifact := &ct.Artifact{Type: host.ArtifactTypeDocker, URI: gitreceive.Env["SLUGRUNNER_IMAGE_URI"]}
	t.Assert(client.CreateArtifact(imageArtifact), c.IsNil)
	slugArtifact := &ct.Artifact{Type: host.ArtifactTypeFile, URI: fmt.Sprintf("http://%s:8080/slug.tgz", buildHost.IP)}
	t.Assert(client.CreateArtifact(slugArtifact), c.IsNil)
	release := &ct.Release{
		ArtifactIDs: []string{imageArtifact.ID, slugArtifact.ID},
		Processes:   map[string]ct.ProcessType{"web": {Cmd: []string{"bin/http"}}},
	}
	t.Assert(client.CreateRelease(release), c.IsNil)
	t.Assert(client.SetAppRelease(slugApp.ID, release.ID), c.IsNil)
	watcher, err := client.WatchJobEvents(slugApp.ID, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()
	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     slugApp.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"web": 1},
	}), c.IsNil)
	err = watcher.WaitFor(ct.JobEvents{"web": {ct.JobStateUp: 1}}, scaleTimeout, nil)
	t.Assert(err, c.IsNil)

	// run a cluster update from the blobstore
	updateHost := releaseCluster.Instances[1]
	script.Reset()
	updateScript.Execute(&script, blobstore)
	var updateOutput bytes.Buffer
	out = io.MultiWriter(logWriter, &updateOutput)
	t.Assert(updateHost.Run("bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil)

	// check rebuilt images were downloaded
	for name := range versions {
		for _, host := range releaseCluster.Instances[0:2] {
			expected := fmt.Sprintf(`"pulled image" host=%s name=%s`, host.ID, name)
			if !strings.Contains(updateOutput.String(), expected) {
				t.Fatalf(`expected update to download %s on host %s`, name, host.ID)
			}
		}
	}

	assertImage := func(uri, image string) {
		u, err := url.Parse(uri)
		t.Assert(err, c.IsNil)
		t.Assert(u.Query().Get("id"), c.Equals, versions[image])
	}

	// check system apps were deployed correctly
	for _, app := range updater.SystemApps {
		if app.ImageOnly {
			continue // we don't deploy ImageOnly updates
		}
		if app.Image == "" {
			app.Image = "flynn/" + app.Name
		}
		debugf(t, "checking new %s release is using image %s", app.Name, versions[app.Image])
		expected := fmt.Sprintf(`"finished deploy of system app" name=%s`, app.Name)
		if !strings.Contains(updateOutput.String(), expected) {
			t.Fatalf(`expected update to deploy %s`, app.Name)
		}
		release, err := client.GetAppRelease(app.Name)
		t.Assert(err, c.IsNil)
		debugf(t, "new %s release ID: %s", app.Name, release.ID)
		artifact, err := client.GetArtifact(release.ImageArtifactID())
		t.Assert(err, c.IsNil)
		debugf(t, "new %s artifact: %+v", app.Name, artifact)
		assertImage(artifact.URI, app.Image)
	}

	// check gitreceive has the correct slug env vars
	gitreceive, err = client.GetAppRelease("gitreceive")
	t.Assert(err, c.IsNil)
	assertImage(gitreceive.Env["SLUGBUILDER_IMAGE_URI"], "flynn/slugbuilder")
	assertImage(gitreceive.Env["SLUGRUNNER_IMAGE_URI"], "flynn/slugrunner")

	// check slug based app was deployed correctly
	release, err = client.GetAppRelease(slugApp.Name)
	t.Assert(err, c.IsNil)
	imageArtifact, err = client.GetArtifact(release.ImageArtifactID())
	t.Assert(err, c.IsNil)
	assertImage(imageArtifact.URI, "flynn/slugrunner")
}
Example #15
0
func (s *DeployerSuite) TestOmniProcess(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot determine test cluster size")
	}

	// create and scale an omni release
	omniScale := 2
	totalJobs := omniScale * testCluster.Size()
	client := s.controllerClient(t)
	app, release := s.createApp(t)

	watcher, err := client.WatchJobEvents(app.Name, release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()

	t.Assert(client.PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"omni": omniScale},
	}), c.IsNil)
	err = watcher.WaitFor(ct.JobEvents{"omni": {ct.JobStateUp: totalJobs}}, scaleTimeout, nil)
	t.Assert(err, c.IsNil)

	// deploy using all-at-once and check we get the correct events
	app.Strategy = "all-at-once"
	t.Assert(client.UpdateApp(app), c.IsNil)
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	events := make(chan *ct.DeploymentEvent)
	stream, err := client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	defer stream.Close()
	expected := make([]*ct.DeploymentEvent, 0, 4*totalJobs+1)
	appendEvents := func(releaseID string, state ct.JobState, count int) {
		for i := 0; i < count; i++ {
			event := &ct.DeploymentEvent{
				ReleaseID: releaseID,
				JobType:   "omni",
				JobState:  state,
				Status:    "running",
			}
			expected = append(expected, event)
		}
	}
	expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "pending"})
	appendEvents(deployment.NewReleaseID, ct.JobStateStarting, totalJobs)
	appendEvents(deployment.NewReleaseID, ct.JobStateUp, totalJobs)
	appendEvents(deployment.OldReleaseID, ct.JobStateStopping, totalJobs)
	appendEvents(deployment.OldReleaseID, ct.JobStateDown, totalJobs)
	expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "complete"})
	waitForDeploymentEvents(t, events, expected)

	// deploy using one-by-one and check we get the correct events
	app.Strategy = "one-by-one"
	t.Assert(client.UpdateApp(app), c.IsNil)
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err = client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	events = make(chan *ct.DeploymentEvent)
	stream, err = client.StreamDeployment(deployment, events)
	t.Assert(err, c.IsNil)
	expected = make([]*ct.DeploymentEvent, 0, 4*totalJobs+1)
	expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "pending"})
	appendEvents(deployment.NewReleaseID, ct.JobStateStarting, testCluster.Size())
	appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateStopping, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size())
	appendEvents(deployment.NewReleaseID, ct.JobStateStarting, testCluster.Size())
	appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateStopping, testCluster.Size())
	appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size())
	expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "complete"})
	waitForDeploymentEvents(t, events, expected)
}
Example #16
0
func (s *SchedulerSuite) TestDeployController(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot determine test cluster size")
	}

	// get the current controller release
	client := s.controllerClient(t)
	app, err := client.GetApp("controller")
	t.Assert(err, c.IsNil)
	release, err := client.GetAppRelease(app.ID)
	t.Assert(err, c.IsNil)

	// create a controller deployment
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	// use a function to create the event stream as a new stream will be needed
	// after deploying the controller
	var events chan *ct.DeploymentEvent
	var eventStream stream.Stream
	connectStream := func() {
		events = make(chan *ct.DeploymentEvent)
		err := attempt.Strategy{
			Total: 10 * time.Second,
			Delay: 500 * time.Millisecond,
		}.Run(func() (err error) {
			eventStream, err = client.StreamDeployment(deployment.ID, events)
			return
		})
		t.Assert(err, c.IsNil)
	}
	connectStream()
	defer eventStream.Close()

	// wait for the deploy to complete (this doesn't wait for specific events
	// due to the fact that when the deployer deploys itself, some events will
	// not get sent)
loop:
	for {
		select {
		case e, ok := <-events:
			if !ok {
				// reconnect the stream as it may of been closed
				// due to the controller being deployed
				debug(t, "reconnecting deployment event stream")
				connectStream()
				continue
			}
			debugf(t, "got deployment event: %s %s", e.JobType, e.JobState)
			switch e.Status {
			case "complete":
				break loop
			case "failed":
				t.Fatal("the deployment failed")
			}
		case <-time.After(60 * time.Second):
			t.Fatal("timed out waiting for the deploy to complete")
		}
	}

	// check the correct controller jobs are running
	hosts, err := s.clusterClient(t).ListHosts()
	t.Assert(err, c.IsNil)
	actual := make(map[string]map[string]int)
	for _, host := range hosts {
		for _, job := range host.Jobs {
			appID := job.Metadata["flynn-controller.app"]
			if appID != app.ID {
				continue
			}
			releaseID := job.Metadata["flynn-controller.release"]
			if _, ok := actual[releaseID]; !ok {
				actual[releaseID] = make(map[string]int)
			}
			typ := job.Metadata["flynn-controller.type"]
			actual[releaseID][typ]++
		}
	}
	expected := map[string]map[string]int{release.ID: {
		"web":       2,
		"deployer":  2,
		"scheduler": testCluster.Size(),
	}}
	t.Assert(actual, c.DeepEquals, expected)
}