func (s *SchedulerSuite) TestOmniJobs(t *c.C) { if args.ClusterAPI == "" { t.Skip("cannot boot new hosts") } app, release := s.createApp(t) stream, err := s.controllerClient(t).StreamJobEvents(app.ID, 0) t.Assert(err, c.IsNil) defer stream.Close() formation := &ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: make(map[string]int), } current := make(map[string]int) updates := []map[string]int{ {"printer": 2}, {"printer": 3, "omni": 2}, {"printer": 1, "omni": 1}, } for _, procs := range updates { debugf(t, "scaling formation to %v", procs) formation.Processes = procs t.Assert(s.controllerClient(t).PutFormation(formation), c.IsNil) expected := make(jobEvents) for typ, count := range procs { diff := count - current[typ] if typ == "omni" { diff *= testCluster.Size() } if diff > 0 { expected[typ] = map[string]int{"up": diff} } else { expected[typ] = map[string]int{"down": -diff} } } for typ, count := range current { if _, ok := procs[typ]; !ok { diff := count if typ == "omni" { diff *= testCluster.Size() } expected[typ] = map[string]int{"down": diff} } } waitForJobEvents(t, stream.Events, expected) current = procs } // Check that new hosts get omni jobs newHosts := s.addHosts(t, 2) defer s.removeHosts(t, newHosts) waitForJobEvents(t, stream.Events, jobEvents{"omni": {"up": 2}}) }
func (s *VolumeSuite) TestInterhostVolumeTransmitAPI(t *c.C) { hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) if len(hosts) < 2 { t.Skip("need multiple hosts for this test") } s.doVolumeTransmitAPI(hosts[0], hosts[1], t) }
func (s *SchedulerSuite) TestJobRestartBackoffPolicy(t *c.C) { // To run this test on local, set BACKOFF_PERIOD on the flynn host machine var backoffPeriod time.Duration var err error if testCluster == nil { backoffPeriod, err = time.ParseDuration(os.Getenv("BACKOFF_PERIOD")) if err != nil { t.Skip("cannot determine backoff period") } } else { backoffPeriod = testCluster.BackoffPeriod() } startTimeout := 20 * time.Second debugf(t, "job restart backoff period: %s", backoffPeriod) app, release := s.createApp(t) watcher, err := s.controllerClient(t).WatchJobEvents(app.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(s.controllerClient(t).PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"printer": 1}, }), c.IsNil) var id string var assignId = func(j *ct.Job) error { debugf(t, "got job event: %s %s", j.ID, j.State) id = j.ID return nil } err = watcher.WaitFor(ct.JobEvents{"printer": {ct.JobStateUp: 1}}, scaleTimeout, assignId) t.Assert(err, c.IsNil) waitForRestart := func(duration time.Duration) { start := time.Now() s.stopJob(t, id) debugf(t, "expecting new job to start in %s", duration) err = watcher.WaitFor(ct.JobEvents{"printer": {ct.JobStateUp: 1}}, duration+startTimeout, assignId) t.Assert(err, c.IsNil) actual := time.Now().Sub(start) if actual < duration { t.Fatalf("expected new job to start after %s but started after %s", duration, actual) } } waitForRestart(0) waitForRestart(backoffPeriod) waitForRestart(2 * backoffPeriod) debug(t, "waiting for backoff period to expire") time.Sleep(backoffPeriod) waitForRestart(0) }
func (s *ZDiscoverdSuite) TestPromoteDemote(t *c.C) { if testCluster == nil { t.Skip("cannot boot new hosts") } // ensure we have 3 node cluster, TODO(jpg): Support running test on anything larger than 2 node cluster hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) if len(hosts) != 3 { t.Skip("promotion and demotion tests require a 3 node cluster") } // Check the original number of peers is correct initialPeers, err := s.discoverdClient(t).RaftPeers() t.Assert(err, c.IsNil) t.Assert(len(initialPeers), c.Equals, 3) // Add a new host to the cluster, initially it will join as a proxy newHost := s.addHost(t, "discoverd") defer s.removeHost(t, newHost, "discoverd") // Sleep just a little to give discoverd time to get started time.Sleep(2 * time.Second) // Promote the new node to a Raft member url := "http://" + newHost.IP + ":1111" dd := discoverd.NewClientWithURL(url) err = dd.Promote(url) t.Assert(err, c.IsNil) // Check that we now have one additional peer, also ensure our new peer is in the list newPeers, err := s.discoverdClient(t).RaftPeers() t.Assert(err, c.IsNil) t.Assert(len(newPeers), c.Equals, 4) t.Assert(peerPresent(newHost, newPeers), c.Equals, true) // Now demote the newly promoted node err = dd.Demote(url) t.Assert(err, c.IsNil) //XXX(jpg): Better way to wait for leadership? time.Sleep(2 * time.Second) // We are going to ask the leader for the list of peers as it's definitely canonical leader, err := s.discoverdClient(t).RaftLeader() t.Assert(err, c.IsNil) dd = discoverd.NewClientWithURL(leader.Host) // There should now be only the original peers, additionally make sure our host isn't one of them finalPeers, err := dd.RaftPeers() t.Assert(err, c.IsNil) t.Assert(len(finalPeers), c.Equals, 3) t.Assert(peerPresent(newHost, finalPeers), c.Equals, false) }
func (s *SchedulerSuite) TestJobRestartBackoffPolicy(t *c.C) { if testCluster == nil { t.Skip("cannot determine scheduler backoff period") } backoffPeriod := testCluster.BackoffPeriod() startTimeout := 20 * time.Second debugf(t, "job restart backoff period: %s", backoffPeriod) app, release := s.createApp(t) watcher, err := s.controllerClient(t).WatchJobEvents(app.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(s.controllerClient(t).PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"printer": 1}, }), c.IsNil) var id string var assignId = func(e *ct.Job) error { id = e.ID return nil } err = watcher.WaitFor(ct.JobEvents{"printer": {"up": 1}}, scaleTimeout, assignId) // First restart: scheduled immediately s.stopJob(t, id) err = watcher.WaitFor(ct.JobEvents{"printer": {"up": 1}}, startTimeout, assignId) t.Assert(err, c.IsNil) // Second restart after 1 * backoffPeriod start := time.Now() s.stopJob(t, id) err = watcher.WaitFor(ct.JobEvents{"printer": {"up": 1}}, backoffPeriod+startTimeout, assignId) t.Assert(err, c.IsNil) t.Assert(time.Now().Sub(start) > backoffPeriod, c.Equals, true) // Third restart after 2 * backoffPeriod start = time.Now() s.stopJob(t, id) err = watcher.WaitFor(ct.JobEvents{"printer": {"up": 1}}, 2*backoffPeriod+startTimeout, assignId) t.Assert(err, c.IsNil) t.Assert(time.Now().Sub(start) > 2*backoffPeriod, c.Equals, true) // After backoffPeriod has elapsed: scheduled immediately time.Sleep(backoffPeriod) s.stopJob(t, id) err = watcher.WaitFor(ct.JobEvents{"printer": {"up": 1}}, startTimeout, assignId) t.Assert(err, c.IsNil) }
func (s *HostSuite) TestVolumePersistence(t *c.C) { t.Skip("test intermittently fails due to host bind mount leaks, see https://github.com/flynn/flynn/issues/1125") // most of the volume tests (snapshotting, quotas, etc) are unit tests under their own package. // these tests exist to cover the last mile where volumes are bind-mounted into containers. cluster := s.clusterClient(t) h := s.anyHostClient(t) // create a volume! vol, err := h.CreateVolume("default") t.Assert(err, c.IsNil) defer func() { t.Assert(h.DestroyVolume(vol.ID), c.IsNil) }() // create first job cmd, service, err := makeIshApp(cluster, h, s.discoverdClient(t), host.ContainerConfig{ Volumes: []host.VolumeBinding{{ Target: "/vol", VolumeID: vol.ID, Writeable: true, }}, }) t.Assert(err, c.IsNil) defer cmd.Kill() // add data to the volume resp, err := runIshCommand(service, "echo 'testcontent' > /vol/alpha ; echo $?") t.Assert(err, c.IsNil) t.Assert(resp, c.Equals, "0\n") // start another one that mounts the same volume cmd, service, err = makeIshApp(cluster, h, s.discoverdClient(t), host.ContainerConfig{ Volumes: []host.VolumeBinding{{ Target: "/vol", VolumeID: vol.ID, Writeable: false, }}, }) t.Assert(err, c.IsNil) defer cmd.Kill() // read data back from the volume resp, err = runIshCommand(service, "cat /vol/alpha") t.Assert(err, c.IsNil) t.Assert(resp, c.Equals, "testcontent\n") }
func (s *ZDiscoverdSuite) TestDeploy(t *c.C) { // ensure we have enough hosts in the cluster hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) if len(hosts) <= 1 { t.Skip("cannot deploy discoverd in a single node cluster") } client := s.controllerClient(t) app, err := client.GetApp("discoverd") t.Assert(err, c.IsNil) release, err := client.GetAppRelease(app.ID) t.Assert(err, c.IsNil) release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err := client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) events := make(chan *ct.DeploymentEvent) stream, err := client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) defer stream.Close() loop: for { select { case event, ok := <-events: if !ok { t.Fatal("unexpected close of deployment event stream") } if event.Status == "complete" { debugf(t, "got deployment event: %s", event.Status) break loop } if event.Status == "failed" { t.Fatal("the deployment failed") } debugf(t, "got deployment event: %s %s", event.JobType, event.JobState) case <-time.After(time.Duration(app.DeployTimeout) * time.Second): t.Fatal("timed out waiting for deployment event") } } }
func (s *SchedulerSuite) TestOmniJobs(t *c.C) { t.Skip("skipping due to router startup hang: https://github.com/flynn/flynn/issues/1855") if testCluster == nil { t.Skip("cannot boot new hosts") } app, release := s.createApp(t) watcher, err := s.controllerClient(t).WatchJobEvents(app.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() formation := &ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: make(map[string]int), } current := make(map[string]int) updates := []map[string]int{ {"printer": 2}, {"printer": 3, "omni": 2}, {"printer": 1, "omni": 1}, } for _, procs := range updates { debugf(t, "scaling formation to %v", procs) formation.Processes = procs t.Assert(s.controllerClient(t).PutFormation(formation), c.IsNil) expected := s.controllerClient(t).ExpectedScalingEvents(current, procs, release.Processes, testCluster.Size()) err = watcher.WaitFor(expected, scaleTimeout, nil) t.Assert(err, c.IsNil) current = procs } // Check that new hosts get omni jobs newHosts := s.addHosts(t, 2, false) defer s.removeHosts(t, newHosts) err = watcher.WaitFor(ct.JobEvents{"omni": {"up": 2}}, scaleTimeout, nil) t.Assert(err, c.IsNil) }
func (s *SchedulerSuite) TestJobRestartBackoffPolicy(t *c.C) { if testCluster == nil { t.Skip("cannot determine scheduler backoff period") } backoffPeriod := testCluster.BackoffPeriod() startTimeout := 20 * time.Second debugf(t, "job restart backoff period: %s", backoffPeriod) app, release := s.createApp(t) events := make(chan *ct.JobEvent) stream, err := s.controllerClient(t).StreamJobEvents(app.ID, 0, events) t.Assert(err, c.IsNil) defer stream.Close() t.Assert(s.controllerClient(t).PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"printer": 1}, }), c.IsNil) _, id := waitForJobEvents(t, stream, events, jobEvents{"printer": {"up": 1}}) // First restart: scheduled immediately s.stopJob(t, id) id = waitForJobRestart(t, stream, events, "printer", startTimeout) // Second restart after 1 * backoffPeriod start := time.Now() s.stopJob(t, id) id = waitForJobRestart(t, stream, events, "printer", backoffPeriod+startTimeout) t.Assert(time.Now().Sub(start) > backoffPeriod, c.Equals, true) // Third restart after 2 * backoffPeriod start = time.Now() s.stopJob(t, id) id = waitForJobRestart(t, stream, events, "printer", 2*backoffPeriod+startTimeout) t.Assert(time.Now().Sub(start) > 2*backoffPeriod, c.Equals, true) // After backoffPeriod has elapsed: scheduled immediately time.Sleep(backoffPeriod) s.stopJob(t, id) waitForJobRestart(t, stream, events, "printer", startTimeout) }
func (s *HostUpdateSuite) TestUpdateLogs(t *c.C) { if testCluster == nil { t.Skip("cannot boot new hosts") } instance := s.addHost(t) defer s.removeHost(t, instance) httpClient := &http.Client{Transport: &http.Transport{Dial: dialer.Retry.Dial}} client := cluster.NewHost(instance.ID, fmt.Sprintf("http://%s:1113", instance.IP), httpClient) // start partial logger job cmd := exec.JobUsingHost( client, exec.DockerImage(imageURIs["test-apps"]), &host.Job{ Config: host.ContainerConfig{Cmd: []string{"/bin/partial-logger"}}, Metadata: map[string]string{ "flynn-controller.app": "partial-logger", }, }, ) t.Assert(cmd.Start(), c.IsNil) defer cmd.Kill() // wait for partial line _, err := s.discoverdClient(t).Instances("partial-logger", 10*time.Second) t.Assert(err, c.IsNil) // update flynn-host pid, err := client.Update("/usr/local/bin/flynn-host", "daemon", "--id", cmd.HostID) t.Assert(err, c.IsNil) // update the pid file so removeHost works t.Assert(instance.Run(fmt.Sprintf("echo -n %d | sudo tee /var/run/flynn-host.pid", pid), nil), c.IsNil) // finish logging t.Assert(client.SignalJob(cmd.Job.ID, int(syscall.SIGUSR1)), c.IsNil) // check we get a single log line logc, err := logaggc.New("") t.Assert(err, c.IsNil) log, err := logc.GetLog("partial-logger", &logaggc.LogOpts{Follow: true}) t.Assert(err, c.IsNil) defer log.Close() msgs := make(chan *logaggc.Message) go func() { defer close(msgs) dec := json.NewDecoder(log) for { var msg logaggc.Message if err := dec.Decode(&msg); err != nil { debugf(t, "error decoding message: %s", err) return } msgs <- &msg } }() for { select { case msg, ok := <-msgs: if !ok { t.Fatal("error getting log") } if msg.Stream == "stdout" { t.Assert(msg.Msg, c.Equals, "hello world") return } case <-time.After(10 * time.Second): t.Fatal("timed out waiting for log") } } }
func (s *SchedulerSuite) TestScaleTags(t *c.C) { // ensure we have more than 1 host to test with hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) if len(hosts) <= 1 { t.Skip("not enough hosts to test tagged based scheduling") } // watch service events so we can wait for tag changes events := make(chan *discoverd.Event) stream, err := s.discoverdClient(t).Service("flynn-host").Watch(events) t.Assert(err, c.IsNil) defer stream.Close() waitServiceEvent := func(kind discoverd.EventKind) *discoverd.Event { for { select { case event, ok := <-events: if !ok { t.Fatalf("service event stream closed unexpectedly: %s", stream.Err()) } if event.Kind == kind { return event } case <-time.After(10 * time.Second): t.Fatalf("timed out waiting for service %s event", kind) } } } // wait for the watch to be current before changing tags waitServiceEvent(discoverd.EventKindCurrent) updateTags := func(host *cluster.Host, tags map[string]string) { debugf(t, "setting host tags: %s => %v", host.ID(), tags) t.Assert(host.UpdateTags(tags), c.IsNil) event := waitServiceEvent(discoverd.EventKindUpdate) t.Assert(event.Instance.Meta["id"], c.Equals, host.ID()) for key, val := range tags { t.Assert(event.Instance.Meta["tag:"+key], c.Equals, val) } } // create an app with a tagged process and watch job events app, release := s.createApp(t) formation := &ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Tags: map[string]map[string]string{"printer": {"active": "true"}}, } client := s.controllerClient(t) watcher, err := client.WatchJobEvents(app.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() // add tag to host 1 host1 := hosts[0] updateTags(host1, map[string]string{"active": "true"}) // start jobs debug(t, "scaling printer=2") formation.Processes = map[string]int{"printer": 2} t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil) assertHostJobCounts := func(expected map[string]int) { jobs, err := client.JobList(app.ID) t.Assert(err, c.IsNil) actual := make(map[string]int) for _, job := range jobs { if job.State == ct.JobStateUp { actual[job.HostID]++ } } t.Assert(actual, c.DeepEquals, expected) } // check all jobs on host 1 assertHostJobCounts(map[string]int{host1.ID(): 2}) // add tag to host 2 host2 := hosts[1] updateTags(host2, map[string]string{"active": "true"}) // scale up debug(t, "scaling printer=4") formation.Processes["printer"] = 4 t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil) // check jobs distributed across hosts 1 and 2 assertHostJobCounts(map[string]int{host1.ID(): 2, host2.ID(): 2}) // remove tag from host 2 updateTags(host2, map[string]string{"active": ""}) // check jobs are moved to host1 jobEvents := ct.JobEvents{"printer": map[ct.JobState]int{ ct.JobStateDown: 2, ct.JobStateUp: 2, }} t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host1.ID(): 4}) // remove tag from host 1 updateTags(host1, map[string]string{"active": ""}) assertStateCounts := func(expected map[ct.JobState]int) { jobs, err := client.JobList(app.ID) t.Assert(err, c.IsNil) actual := make(map[ct.JobState]int) for _, job := range jobs { actual[job.State]++ } t.Assert(actual, c.DeepEquals, expected) } // check 4 pending jobs, rest are stopped t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil) assertStateCounts(map[ct.JobState]int{ct.JobStatePending: 4, ct.JobStateDown: 6}) // re-add tag to host 1 updateTags(host1, map[string]string{"active": "true"}) // check pending jobs are started on host 1 t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(4)}, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host1.ID(): 4}) assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 6}) // add different tag to host 2 updateTags(host2, map[string]string{"disk": "ssd"}) // update formation tags, check jobs are moved to host 2 debug(t, "updating formation tags to disk=ssd") formation.Tags["printer"] = map[string]string{"disk": "ssd"} t.Assert(client.PutFormation(formation), c.IsNil) jobEvents = ct.JobEvents{"printer": map[ct.JobState]int{ ct.JobStateDown: 4, ct.JobStateUp: 4, }} t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host2.ID(): 4}) assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 10}) // scale down stops the jobs debug(t, "scaling printer=0") formation.Processes = nil t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil) assertStateCounts(map[ct.JobState]int{ct.JobStateDown: 14}) }
func (s *ReleaseSuite) TestReleaseImages(t *c.C) { if testCluster == nil { t.Skip("cannot boot release cluster") } // stream script output to t.Log logReader, logWriter := io.Pipe() defer logWriter.Close() go func() { buf := bufio.NewReader(logReader) for { line, err := buf.ReadString('\n') if err != nil { return } debug(t, line[0:len(line)-1]) } }() // boot the release cluster, release components to a blobstore and output the new version.json releaseCluster := s.addReleaseHosts(t) buildHost := releaseCluster.Instances[0] var versionJSON bytes.Buffer t.Assert(buildHost.Run("bash -ex", &tc.Streams{Stdin: releaseScript, Stdout: &versionJSON, Stderr: logWriter}), c.IsNil) var versions map[string]string t.Assert(json.Unmarshal(versionJSON.Bytes(), &versions), c.IsNil) // install Flynn from the blobstore on the vanilla host blobstore := struct{ Blobstore string }{buildHost.IP + ":8080"} installHost := releaseCluster.Instances[3] var script bytes.Buffer installScript.Execute(&script, blobstore) var installOutput bytes.Buffer out := io.MultiWriter(logWriter, &installOutput) t.Assert(installHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil) // check the flynn-host version is correct var hostVersion bytes.Buffer t.Assert(installHost.Run("flynn-host version", &tc.Streams{Stdout: &hostVersion}), c.IsNil) t.Assert(strings.TrimSpace(hostVersion.String()), c.Equals, "v20150131.0-test") // check rebuilt images were downloaded for name, id := range versions { expected := fmt.Sprintf("%s image %s downloaded", name, id) if !strings.Contains(installOutput.String(), expected) { t.Fatalf(`expected install to download %s %s`, name, id) } } // run a cluster update from the blobstore updateHost := releaseCluster.Instances[1] script = bytes.Buffer{} updateScript.Execute(&script, blobstore) var updateOutput bytes.Buffer out = io.MultiWriter(logWriter, &updateOutput) t.Assert(updateHost.Run("bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil) // check rebuilt images were downloaded for name := range versions { for _, host := range releaseCluster.Instances[0:2] { expected := fmt.Sprintf(`"pulled image" host=%s name=%s`, host.ID, name) if !strings.Contains(updateOutput.String(), expected) { t.Fatalf(`expected update to download %s on host %s`, name, host.ID) } } } // create a controller client for the new cluster pin, err := base64.StdEncoding.DecodeString(releaseCluster.ControllerPin) t.Assert(err, c.IsNil) client, err := controller.NewClientWithConfig( "https://"+buildHost.IP, releaseCluster.ControllerKey, controller.Config{Pin: pin, Domain: releaseCluster.ControllerDomain}, ) t.Assert(err, c.IsNil) // check system apps were deployed correctly for _, app := range updater.SystemApps { image := "flynn/" + app if app == "gitreceive" { image = "flynn/receiver" } debugf(t, "checking new %s release is using image %s", app, versions[image]) expected := fmt.Sprintf(`"finished deploy of system app" name=%s`, app) if !strings.Contains(updateOutput.String(), expected) { t.Fatalf(`expected update to deploy %s`, app) } release, err := client.GetAppRelease(app) t.Assert(err, c.IsNil) debugf(t, "new %s release ID: %s", app, release.ID) artifact, err := client.GetArtifact(release.ArtifactID) t.Assert(err, c.IsNil) debugf(t, "new %s artifact: %+v", app, artifact) uri, err := url.Parse(artifact.URI) t.Assert(err, c.IsNil) t.Assert(uri.Query().Get("id"), c.Equals, versions[image]) } }
func (s *SchedulerSuite) TestDeployController(t *c.C) { if testCluster == nil { t.Skip("cannot determine test cluster size") } // get the current controller release client := s.controllerClient(t) app, err := client.GetApp("controller") t.Assert(err, c.IsNil) release, err := client.GetAppRelease(app.ID) t.Assert(err, c.IsNil) // create a controller deployment release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err := client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) events := make(chan *ct.DeploymentEvent) eventStream, err := client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) defer eventStream.Close() // wait for the deploy to complete (this doesn't wait for specific events // due to the fact that when the deployer deploys itself, some events will // not get sent) loop: for { select { case e, ok := <-events: if !ok { t.Fatal("unexpected close of deployment event stream") } debugf(t, "got deployment event: %s %s", e.JobType, e.JobState) switch e.Status { case "complete": break loop case "failed": t.Fatal("the deployment failed") } case <-time.After(2 * time.Minute): t.Fatal("timed out waiting for the deploy to complete") } } // check the correct controller jobs are running hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) actual := make(map[string]map[string]int) for _, h := range hosts { jobs, err := h.ListJobs() t.Assert(err, c.IsNil) for _, job := range jobs { if job.Status != host.StatusRunning { continue } appID := job.Job.Metadata["flynn-controller.app"] if appID != app.ID { continue } releaseID := job.Job.Metadata["flynn-controller.release"] if _, ok := actual[releaseID]; !ok { actual[releaseID] = make(map[string]int) } typ := job.Job.Metadata["flynn-controller.type"] actual[releaseID][typ]++ } } expected := map[string]map[string]int{release.ID: { "web": 2, "worker": 2, "scheduler": testCluster.Size(), }} t.Assert(actual, c.DeepEquals, expected) }
func (s *ReleaseSuite) TestReleaseImages(t *c.C) { if testCluster == nil { t.Skip("cannot boot release cluster") } // stream script output to t.Log logReader, logWriter := io.Pipe() defer logWriter.Close() go func() { buf := bufio.NewReader(logReader) for { line, err := buf.ReadString('\n') if err != nil { return } debug(t, line[0:len(line)-1]) } }() // boot the release cluster, release components to a blobstore and output the new version.json releaseCluster := s.addReleaseHosts(t) buildHost := releaseCluster.Instances[0] var versionJSON bytes.Buffer t.Assert(buildHost.Run("bash -ex", &tc.Streams{Stdin: releaseScript, Stdout: &versionJSON, Stderr: logWriter}), c.IsNil) var versions map[string]string t.Assert(json.Unmarshal(versionJSON.Bytes(), &versions), c.IsNil) // install Flynn from the blobstore on the vanilla host blobstore := struct{ Blobstore string }{buildHost.IP + ":8080"} installHost := releaseCluster.Instances[3] var script bytes.Buffer installScript.Execute(&script, blobstore) var installOutput bytes.Buffer out := io.MultiWriter(logWriter, &installOutput) t.Assert(installHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil) // check the flynn-host version is correct var hostVersion bytes.Buffer t.Assert(installHost.Run("flynn-host version", &tc.Streams{Stdout: &hostVersion}), c.IsNil) t.Assert(strings.TrimSpace(hostVersion.String()), c.Equals, "v20150131.0-test") // check rebuilt images were downloaded for name, id := range versions { expected := fmt.Sprintf("%s image %s downloaded", name, id) if !strings.Contains(installOutput.String(), expected) { t.Fatalf(`expected install to download %s %s`, name, id) } } // installing on an instance with Flynn running should not fail script.Reset() installScript.Execute(&script, blobstore) t.Assert(buildHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: logWriter, Stderr: logWriter}), c.IsNil) // create a controller client for the release cluster pin, err := base64.StdEncoding.DecodeString(releaseCluster.ControllerPin) t.Assert(err, c.IsNil) client, err := controller.NewClientWithConfig( "https://"+buildHost.IP, releaseCluster.ControllerKey, controller.Config{Pin: pin, Domain: releaseCluster.ControllerDomain}, ) t.Assert(err, c.IsNil) // deploy a slug based app slugApp := &ct.App{} t.Assert(client.CreateApp(slugApp), c.IsNil) gitreceive, err := client.GetAppRelease("gitreceive") t.Assert(err, c.IsNil) imageArtifact := &ct.Artifact{Type: host.ArtifactTypeDocker, URI: gitreceive.Env["SLUGRUNNER_IMAGE_URI"]} t.Assert(client.CreateArtifact(imageArtifact), c.IsNil) slugArtifact := &ct.Artifact{Type: host.ArtifactTypeFile, URI: fmt.Sprintf("http://%s:8080/slug.tgz", buildHost.IP)} t.Assert(client.CreateArtifact(slugArtifact), c.IsNil) release := &ct.Release{ ArtifactIDs: []string{imageArtifact.ID, slugArtifact.ID}, Processes: map[string]ct.ProcessType{"web": {Cmd: []string{"bin/http"}}}, } t.Assert(client.CreateRelease(release), c.IsNil) t.Assert(client.SetAppRelease(slugApp.ID, release.ID), c.IsNil) watcher, err := client.WatchJobEvents(slugApp.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: slugApp.ID, ReleaseID: release.ID, Processes: map[string]int{"web": 1}, }), c.IsNil) err = watcher.WaitFor(ct.JobEvents{"web": {ct.JobStateUp: 1}}, scaleTimeout, nil) t.Assert(err, c.IsNil) // run a cluster update from the blobstore updateHost := releaseCluster.Instances[1] script.Reset() updateScript.Execute(&script, blobstore) var updateOutput bytes.Buffer out = io.MultiWriter(logWriter, &updateOutput) t.Assert(updateHost.Run("bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil) // check rebuilt images were downloaded for name := range versions { for _, host := range releaseCluster.Instances[0:2] { expected := fmt.Sprintf(`"pulled image" host=%s name=%s`, host.ID, name) if !strings.Contains(updateOutput.String(), expected) { t.Fatalf(`expected update to download %s on host %s`, name, host.ID) } } } assertImage := func(uri, image string) { u, err := url.Parse(uri) t.Assert(err, c.IsNil) t.Assert(u.Query().Get("id"), c.Equals, versions[image]) } // check system apps were deployed correctly for _, app := range updater.SystemApps { if app.ImageOnly { continue // we don't deploy ImageOnly updates } if app.Image == "" { app.Image = "flynn/" + app.Name } debugf(t, "checking new %s release is using image %s", app.Name, versions[app.Image]) expected := fmt.Sprintf(`"finished deploy of system app" name=%s`, app.Name) if !strings.Contains(updateOutput.String(), expected) { t.Fatalf(`expected update to deploy %s`, app.Name) } release, err := client.GetAppRelease(app.Name) t.Assert(err, c.IsNil) debugf(t, "new %s release ID: %s", app.Name, release.ID) artifact, err := client.GetArtifact(release.ImageArtifactID()) t.Assert(err, c.IsNil) debugf(t, "new %s artifact: %+v", app.Name, artifact) assertImage(artifact.URI, app.Image) } // check gitreceive has the correct slug env vars gitreceive, err = client.GetAppRelease("gitreceive") t.Assert(err, c.IsNil) assertImage(gitreceive.Env["SLUGBUILDER_IMAGE_URI"], "flynn/slugbuilder") assertImage(gitreceive.Env["SLUGRUNNER_IMAGE_URI"], "flynn/slugrunner") // check slug based app was deployed correctly release, err = client.GetAppRelease(slugApp.Name) t.Assert(err, c.IsNil) imageArtifact, err = client.GetArtifact(release.ImageArtifactID()) t.Assert(err, c.IsNil) assertImage(imageArtifact.URI, "flynn/slugrunner") }
func (s *DeployerSuite) TestOmniProcess(t *c.C) { if testCluster == nil { t.Skip("cannot determine test cluster size") } // create and scale an omni release omniScale := 2 totalJobs := omniScale * testCluster.Size() client := s.controllerClient(t) app, release := s.createApp(t) watcher, err := client.WatchJobEvents(app.Name, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"omni": omniScale}, }), c.IsNil) err = watcher.WaitFor(ct.JobEvents{"omni": {ct.JobStateUp: totalJobs}}, scaleTimeout, nil) t.Assert(err, c.IsNil) // deploy using all-at-once and check we get the correct events app.Strategy = "all-at-once" t.Assert(client.UpdateApp(app), c.IsNil) release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err := client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) events := make(chan *ct.DeploymentEvent) stream, err := client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) defer stream.Close() expected := make([]*ct.DeploymentEvent, 0, 4*totalJobs+1) appendEvents := func(releaseID string, state ct.JobState, count int) { for i := 0; i < count; i++ { event := &ct.DeploymentEvent{ ReleaseID: releaseID, JobType: "omni", JobState: state, Status: "running", } expected = append(expected, event) } } expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "pending"}) appendEvents(deployment.NewReleaseID, ct.JobStateStarting, totalJobs) appendEvents(deployment.NewReleaseID, ct.JobStateUp, totalJobs) appendEvents(deployment.OldReleaseID, ct.JobStateStopping, totalJobs) appendEvents(deployment.OldReleaseID, ct.JobStateDown, totalJobs) expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "complete"}) waitForDeploymentEvents(t, events, expected) // deploy using one-by-one and check we get the correct events app.Strategy = "one-by-one" t.Assert(client.UpdateApp(app), c.IsNil) release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err = client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) events = make(chan *ct.DeploymentEvent) stream, err = client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) expected = make([]*ct.DeploymentEvent, 0, 4*totalJobs+1) expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "pending"}) appendEvents(deployment.NewReleaseID, ct.JobStateStarting, testCluster.Size()) appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateStopping, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size()) appendEvents(deployment.NewReleaseID, ct.JobStateStarting, testCluster.Size()) appendEvents(deployment.NewReleaseID, ct.JobStateUp, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateStopping, testCluster.Size()) appendEvents(deployment.OldReleaseID, ct.JobStateDown, testCluster.Size()) expected = append(expected, &ct.DeploymentEvent{ReleaseID: deployment.NewReleaseID, Status: "complete"}) waitForDeploymentEvents(t, events, expected) }
func (s *SchedulerSuite) TestDeployController(t *c.C) { if testCluster == nil { t.Skip("cannot determine test cluster size") } // get the current controller release client := s.controllerClient(t) app, err := client.GetApp("controller") t.Assert(err, c.IsNil) release, err := client.GetAppRelease(app.ID) t.Assert(err, c.IsNil) // create a controller deployment release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err := client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) // use a function to create the event stream as a new stream will be needed // after deploying the controller var events chan *ct.DeploymentEvent var eventStream stream.Stream connectStream := func() { events = make(chan *ct.DeploymentEvent) err := attempt.Strategy{ Total: 10 * time.Second, Delay: 500 * time.Millisecond, }.Run(func() (err error) { eventStream, err = client.StreamDeployment(deployment.ID, events) return }) t.Assert(err, c.IsNil) } connectStream() defer eventStream.Close() // wait for the deploy to complete (this doesn't wait for specific events // due to the fact that when the deployer deploys itself, some events will // not get sent) loop: for { select { case e, ok := <-events: if !ok { // reconnect the stream as it may of been closed // due to the controller being deployed debug(t, "reconnecting deployment event stream") connectStream() continue } debugf(t, "got deployment event: %s %s", e.JobType, e.JobState) switch e.Status { case "complete": break loop case "failed": t.Fatal("the deployment failed") } case <-time.After(60 * time.Second): t.Fatal("timed out waiting for the deploy to complete") } } // check the correct controller jobs are running hosts, err := s.clusterClient(t).ListHosts() t.Assert(err, c.IsNil) actual := make(map[string]map[string]int) for _, host := range hosts { for _, job := range host.Jobs { appID := job.Metadata["flynn-controller.app"] if appID != app.ID { continue } releaseID := job.Metadata["flynn-controller.release"] if _, ok := actual[releaseID]; !ok { actual[releaseID] = make(map[string]int) } typ := job.Metadata["flynn-controller.type"] actual[releaseID][typ]++ } } expected := map[string]map[string]int{release.ID: { "web": 2, "deployer": 2, "scheduler": testCluster.Size(), }} t.Assert(actual, c.DeepEquals, expected) }