func waitForDeploymentEvents(t *c.C, stream chan *ct.DeploymentEvent, expected []*ct.DeploymentEvent) { debugf(t, "waiting for %d deployment events", len(expected)) actual := make([]*ct.DeploymentEvent, 0, len(expected)) loop: for { select { case e, ok := <-stream: if !ok { t.Fatal("unexpected close of deployment event stream") } actual = append(actual, e) if e.Status == "complete" || e.Status == "failed" { debugf(t, "got deployment event: %s", e.Status) break loop } debugf(t, "got deployment event: %s %s", e.JobType, e.JobState) case <-time.After(60 * time.Second): t.Fatal("timed out waiting for deployment event") } } compare := func(t *c.C, i *ct.DeploymentEvent, j *ct.DeploymentEvent) { t.Assert(i.ReleaseID, c.Equals, j.ReleaseID) t.Assert(i.JobType, c.Equals, j.JobType) t.Assert(i.JobState, c.Equals, j.JobState) t.Assert(i.Status, c.Equals, j.Status) t.Assert(i.Error, c.Equals, j.Error) } for i, e := range expected { compare(t, actual[i], e) } }
func (s *HostSuite) TestAttachFinishedInteractiveJob(t *c.C) { cluster := s.clusterClient(t) // run a quick interactive job cmd := exec.CommandUsingCluster(cluster, exec.DockerImage(imageURIs["test-apps"]), "/bin/true") cmd.TTY = true runErr := make(chan error) go func() { runErr <- cmd.Run() }() select { case err := <-runErr: t.Assert(err, c.IsNil) case <-time.After(30 * time.Second): t.Fatal("timed out waiting for interactive job") } h, err := cluster.Host(cmd.HostID) t.Assert(err, c.IsNil) // Getting the logs for the job should fail, as it has none because it was // interactive attachErr := make(chan error) go func() { _, err = h.Attach(&host.AttachReq{JobID: cmd.Job.ID, Flags: host.AttachFlagLogs}, false) attachErr <- err }() select { case err := <-attachErr: t.Assert(err, c.NotNil) case <-time.After(time.Second): t.Error("timed out waiting for attach") } }
func (s *SchedulerSuite) TestTCPApp(t *c.C) { app, _ := s.createApp(t) t.Assert(flynn(t, "/", "-a", app.Name, "scale", "echoer=1"), Succeeds) newRoute := flynn(t, "/", "-a", app.Name, "route", "add", "tcp", "-s", "echo-service") t.Assert(newRoute, Succeeds) t.Assert(newRoute.Output, Matches, `.+ on port \d+`) str := strings.Split(strings.TrimSpace(string(newRoute.Output)), " ") port := str[len(str)-1] // use Attempts to give the processes time to start if err := Attempts.Run(func() error { servAddr := routerIP + ":" + port conn, err := net.Dial("tcp", servAddr) if err != nil { return err } defer conn.Close() msg := []byte("hello there!\n") _, err = conn.Write(msg) if err != nil { return err } reply := make([]byte, len(msg)) _, err = conn.Read(reply) if err != nil { return err } t.Assert(reply, c.DeepEquals, msg) return nil }); err != nil { t.Fatal(err) } }
func (s *HostSuite) TestResourceLimits(t *c.C) { cmd := exec.JobUsingCluster( s.clusterClient(t), exec.DockerImage(imageURIs["test-apps"]), &host.Job{ Config: host.ContainerConfig{Args: []string{"sh", "-c", resourceCmd}}, Resources: testResources(), }, ) var out bytes.Buffer cmd.Stdout = &out runErr := make(chan error) go func() { runErr <- cmd.Run() }() select { case err := <-runErr: t.Assert(err, c.IsNil) case <-time.After(30 * time.Second): t.Fatal("timed out waiting for resource limits job") } assertResourceLimits(t, out.String()) }
func (s *HostSuite) TestDevStdout(t *c.C) { cmd := exec.CommandUsingCluster( s.clusterClient(t), s.createArtifact(t, "test-apps"), "sh", ) cmd.Stdin = strings.NewReader(` echo foo > /dev/stdout echo bar > /dev/stderr echo "SUBSHELL: $(echo baz > /dev/stdout)" echo "SUBSHELL: $(echo qux 2>&1 > /dev/stderr)" >&2`) var stdout, stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr runErr := make(chan error) go func() { runErr <- cmd.Run() }() select { case err := <-runErr: t.Assert(err, c.IsNil) case <-time.After(30 * time.Second): t.Fatal("timed out waiting for /dev/stdout job") } t.Assert(stdout.String(), c.Equals, "foo\nSUBSHELL: baz\n") t.Assert(stderr.String(), c.Equals, "bar\nSUBSHELL: qux\n") }
func (s *DeployerSuite) waitForJobEvents(t *c.C, jobType string, events chan *ct.Job, expected []*ct.Job) { debugf(t, "waiting for %d job events", len(expected)) actual := make([]*ct.Job, 0, len(expected)) loop: for { select { case e, ok := <-events: if !ok { t.Fatal("unexpected close of job event stream") } // only track up and down events as we can't always // predict the order of pending / starting / stopping // events when scaling multiple jobs if e.State != ct.JobStateUp && e.State != ct.JobStateDown { continue } actual = append(actual, e) if len(actual) == len(expected) { break loop } case <-time.After(60 * time.Second): t.Fatal("timed out waiting for job events") } } for i, event := range expected { t.Assert(actual[i].ReleaseID, c.Equals, event.ReleaseID) t.Assert(actual[i].State, c.Equals, event.State) t.Assert(actual[i].Type, c.Equals, jobType) } }
func (s *HostSuite) TestVolumeDeleteOnStop(t *c.C) { hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) t.Assert(hosts, c.Not(c.HasLen), 0) h := hosts[0] // stream job events so we can wait for cleanup events events := make(chan *host.Event) stream, err := h.StreamEvents("all", events) t.Assert(err, c.IsNil) defer stream.Close() waitCleanup := func(jobID string) { timeout := time.After(30 * time.Second) for { select { case event := <-events: if event.JobID == jobID && event.Event == host.JobEventCleanup { return } case <-timeout: t.Fatal("timed out waiting for cleanup event") } } } for _, deleteOnStop := range []bool{true, false} { job := &host.Job{ Config: host.ContainerConfig{ Args: []string{"sh", "-c", "ls -d /foo"}, DisableLog: true, }, } // provision a volume req := &ct.VolumeReq{Path: "/foo", DeleteOnStop: deleteOnStop} vol, err := utils.ProvisionVolume(req, h, job) t.Assert(err, c.IsNil) defer h.DestroyVolume(vol.ID) // run the job cmd := exec.JobUsingCluster(s.clusterClient(t), s.createArtifact(t, "test-apps"), job) cmd.HostID = h.ID() out, err := cmd.CombinedOutput() t.Assert(err, c.IsNil) t.Assert(string(out), c.Equals, "/foo\n") // wait for a cleanup event waitCleanup(job.ID) // check if the volume was deleted or not vol, err = h.GetVolume(vol.ID) if deleteOnStop { t.Assert(hh.IsObjectNotFoundError(err), c.Equals, true) } else { t.Assert(err, c.IsNil) } } }
func (s *GitDeploySuite) TestCancel(t *c.C) { r := s.newGitRepo(t, "cancel-hang") t.Assert(r.flynn("create", "cancel-hang"), Succeeds) t.Assert(r.flynn("env", "set", "FOO=bar", "BUILDPACK_URL=https://github.com/kr/heroku-buildpack-inline"), Succeeds) // start watching for slugbuilder events watcher, err := s.controllerClient(t).WatchJobEvents("cancel-hang", "") t.Assert(err, c.IsNil) // start push cmd := exec.Command("git", "push", "flynn", "master") // put the command in its own process group (to emulate the way shells handle Ctrl-C) cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} cmd.Dir = r.dir var stdout io.Reader stdout, _ = cmd.StdoutPipe() cmd.Stderr = cmd.Stdout out := &bytes.Buffer{} stdout = io.TeeReader(stdout, out) err = cmd.Start() t.Assert(err, c.IsNil) done := make(chan struct{}) go func() { select { case <-done: case <-time.After(30 * time.Second): cmd.Process.Signal(syscall.SIGTERM) cmd.Wait() t.Fatal("git push timed out") } }() // wait for sentinel sc := bufio.NewScanner(stdout) found := false for sc.Scan() { if strings.Contains(sc.Text(), "hanging...") { found = true break } } t.Log(out.String()) t.Assert(found, c.Equals, true) // send Ctrl-C to git process group syscall.Kill(-cmd.Process.Pid, syscall.SIGINT) t.Assert(err, c.IsNil) go io.Copy(ioutil.Discard, stdout) cmd.Wait() close(done) // check that slugbuilder exits immediately err = watcher.WaitFor(ct.JobEvents{"slugbuilder": {ct.JobStateUp: 1, ct.JobStateDown: 1}}, 10*time.Second, nil) t.Assert(err, c.IsNil) }
// TestAppEvents checks that streaming events for an app only receives events // for that particular app. func (s *ControllerSuite) TestAppEvents(t *c.C) { client := s.controllerClient(t) app1, release1 := s.createApp(t) app2, release2 := s.createApp(t) // stream events for app1 events := make(chan *ct.Job) stream, err := client.StreamJobEvents(app1.ID, events) t.Assert(err, c.IsNil) defer stream.Close() runJob := func(appID, releaseID string) { rwc, err := client.RunJobAttached(appID, &ct.NewJob{ ReleaseID: releaseID, Args: []string{"/bin/true"}, DisableLog: true, }) t.Assert(err, c.IsNil) rwc.Close() } // generate events for app2 and wait for them watcher, err := client.WatchJobEvents(app2.ID, release2.ID) t.Assert(err, c.IsNil) defer watcher.Close() runJob(app2.ID, release2.ID) t.Assert(watcher.WaitFor( ct.JobEvents{"": {ct.JobStateUp: 1, ct.JobStateDown: 1}}, 10*time.Second, func(e *ct.Job) error { debugf(t, "got %s job event for app2", e.State) return nil }, ), c.IsNil) // generate events for app1 runJob(app1.ID, release1.ID) // check the stream only gets events for app1 for { select { case e, ok := <-events: if !ok { t.Fatal("unexpected close of job event stream") } t.Assert(e.AppID, c.Equals, app1.ID) debugf(t, "got %s job event for app1", e.State) if e.State == ct.JobStateDown { return } case <-time.After(10 * time.Second): t.Fatal("timed out waiting for job events for app1") } } }
func (s *CLISuite) TestDockerPush(t *c.C) { // build image with ENV and CMD repo := "cli-test-push" s.buildDockerImage(t, repo, `ENV FOO=BAR`, `CMD ["/bin/pingserv"]`, ) // create app client := s.controllerClient(t) app := &ct.App{Name: "cli-test-docker-push"} t.Assert(client.CreateApp(app), c.IsNil) // flynn docker push image t.Assert(flynn(t, "/", "-a", app.Name, "docker", "push", repo), Succeeds) // check app was released with correct env, meta and process type release, err := client.GetAppRelease(app.ID) t.Assert(err, c.IsNil) t.Assert(release.Env["FOO"], c.Equals, "BAR") t.Assert(release.Meta["docker-receive"], c.Equals, "true") t.Assert(release.Processes, c.HasLen, 1) proc, ok := release.Processes["app"] if !ok { t.Fatal(`release missing "app" process type`) } t.Assert(proc.Args, c.DeepEquals, []string{"/bin/pingserv"}) // check updated env vars are not overwritten // // need to remove the tag before pushing as we are using Docker 1.9 // which does not overwrite tags. // TODO: remove this when upgrading Docker > 1.9 u, err := url.Parse(s.clusterConf(t).DockerPushURL) t.Assert(err, c.IsNil) tag := fmt.Sprintf("%s/%s:latest", u.Host, app.Name) t.Assert(run(t, exec.Command("docker", "rmi", tag)), Succeeds) t.Assert(flynn(t, "/", "-a", app.Name, "env", "set", "FOO=BAZ"), Succeeds) t.Assert(flynn(t, "/", "-a", app.Name, "docker", "push", repo), Succeeds) t.Assert(flynn(t, "/", "-a", app.Name, "env", "get", "FOO"), Outputs, "BAZ\n") // check the release can be scaled up t.Assert(flynn(t, "/", "-a", app.Name, "scale", "app=1"), Succeeds) // check the job is reachable with the app's name in discoverd instances, err := s.discoverdClient(t).Instances(app.Name+"-web", 10*time.Second) t.Assert(err, c.IsNil) res, err := hh.RetryClient.Get("http://" + instances[0].Addr) t.Assert(err, c.IsNil) defer res.Body.Close() body, err := ioutil.ReadAll(res.Body) t.Assert(err, c.IsNil) t.Assert(string(body), c.Equals, "OK") }
func (s *HostSuite) TestAddFailingJob(t *c.C) { // get a host and watch events hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) t.Assert(hosts, c.Not(c.HasLen), 0) h := hosts[0] jobID := random.UUID() events := make(chan *host.Event) stream, err := h.StreamEvents(jobID, events) t.Assert(err, c.IsNil) defer stream.Close() // add a job with a non existent partition job := &host.Job{ ID: jobID, ImageArtifact: &host.Artifact{ Type: host.ArtifactTypeDocker, URI: "http://example.com?name=foo&id=bar", }, Partition: "nonexistent", } t.Assert(h.AddJob(job), c.IsNil) // check we get a create then error event actual := make(map[string]*host.Event, 2) loop: for { select { case e, ok := <-events: if !ok { t.Fatalf("job event stream closed unexpectedly: %s", stream.Err()) } if _, ok := actual[e.Event]; ok { t.Fatalf("unexpected event: %v", e) } actual[e.Event] = e if len(actual) >= 2 { break loop } case <-time.After(30 * time.Second): t.Fatal("timed out waiting for job event") } } t.Assert(actual[host.JobEventCreate], c.NotNil) e := actual[host.JobEventError] t.Assert(e, c.NotNil) t.Assert(e.Job, c.NotNil) t.Assert(e.Job.Error, c.NotNil) t.Assert(*e.Job.Error, c.Equals, `host: invalid job partition "nonexistent"`) }
func (s *HostSuite) TestUpdateTags(t *c.C) { events := make(chan *discoverd.Event) stream, err := s.discoverdClient(t).Service("flynn-host").Watch(events) t.Assert(err, c.IsNil) defer stream.Close() nextEvent := func() *discoverd.Event { select { case e, ok := <-events: if !ok { t.Fatal("unexpected close of discoverd stream") } return e case <-time.After(10 * time.Second): t.Fatal("timed out waiting for discoverd event") } return nil } var client *cluster.Host for { e := nextEvent() if e.Kind == discoverd.EventKindUp && client == nil { client = cluster.NewHost(e.Instance.Meta["id"], e.Instance.Addr, nil, nil) } if e.Kind == discoverd.EventKindCurrent { break } } if client == nil { t.Fatal("did not initialize flynn-host client") } t.Assert(client.UpdateTags(map[string]string{"foo": "bar"}), c.IsNil) var meta map[string]string for { e := nextEvent() if e.Kind == discoverd.EventKindUpdate && e.Instance.Meta["id"] == client.ID() { meta = e.Instance.Meta break } } t.Assert(meta["tag:foo"], c.Equals, "bar") // setting to empty string should delete the tag t.Assert(client.UpdateTags(map[string]string{"foo": ""}), c.IsNil) for { e := nextEvent() if e.Kind == discoverd.EventKindUpdate && e.Instance.Meta["id"] == client.ID() { meta = e.Instance.Meta break } } if _, ok := meta["tag:foo"]; ok { t.Fatal("expected tag to be deleted but is still present") } }
func (s *DockerReceiveSuite) TestPushImage(t *c.C) { // build a Docker image repo := "docker-receive-test-push" s.buildDockerImage(t, repo, "RUN echo foo > /foo.txt") // subscribe to artifact events client := s.controllerClient(t) events := make(chan *ct.Event) stream, err := client.StreamEvents(ct.StreamEventsOptions{ ObjectTypes: []ct.EventType{ct.EventTypeArtifact}, }, events) t.Assert(err, c.IsNil) defer stream.Close() // push the Docker image to docker-receive u, err := url.Parse(s.clusterConf(t).DockerPushURL) t.Assert(err, c.IsNil) tag := fmt.Sprintf("%s/%s:latest", u.Host, repo) t.Assert(run(t, exec.Command("docker", "tag", "--force", repo, tag)), Succeeds) t.Assert(run(t, exec.Command("docker", "push", tag)), Succeeds) // wait for an artifact to be created var artifact ct.Artifact loop: for { select { case event, ok := <-events: if !ok { t.Fatalf("event stream closed unexpectedly: %s", stream.Err()) } t.Assert(json.Unmarshal(event.Data, &artifact), c.IsNil) if artifact.Meta["docker-receive.repository"] == repo { break loop } case <-time.After(30 * time.Second): t.Fatal("timed out waiting for artifact") } } // create a release with the Docker artifact app := &ct.App{} t.Assert(client.CreateApp(app), c.IsNil) release := &ct.Release{ArtifactIDs: []string{artifact.ID}} t.Assert(client.CreateRelease(release), c.IsNil) t.Assert(client.SetAppRelease(app.ID, release.ID), c.IsNil) // check running a job uses the image t.Assert(flynn(t, "/", "-a", app.ID, "run", "cat", "/foo.txt"), SuccessfulOutputContains, "foo") }
func (s *ZZBackupSuite) TestClusterBackups(t *c.C) { if args.BootConfig.BackupsDir == "" { t.Skip("--backups-dir not set") } backups, err := ioutil.ReadDir(args.BootConfig.BackupsDir) t.Assert(err, c.IsNil) if len(backups) == 0 { t.Fatal("backups dir is empty") } for i, backup := range backups { s.testClusterBackup(t, i, filepath.Join(args.BootConfig.BackupsDir, backup.Name())) } }
func (s *CLISuite) TestRun(t *c.C) { app := s.newCliTestApp(t) defer app.cleanup() // this shouldn't be logged t.Assert(app.sh("echo foo"), Outputs, "foo\n") // drain the events app.waitFor(ct.JobEvents{"": {ct.JobStateUp: 1, ct.JobStateDown: 1}}) // this should be logged due to the --enable-log flag t.Assert(app.flynn("run", "--enable-log", "echo", "hello"), Outputs, "hello\n") app.waitFor(ct.JobEvents{"": {ct.JobStateUp: 1, ct.JobStateDown: 1}}) detached := app.flynn("run", "-d", "echo", "world") t.Assert(detached, Succeeds) t.Assert(detached, c.Not(Outputs), "world\n") id := strings.TrimSpace(detached.Output) jobID := app.waitFor(ct.JobEvents{"": {ct.JobStateUp: 1, ct.JobStateDown: 1}}) t.Assert(jobID, c.Equals, id) t.Assert(app.flynn("log", "--raw-output"), Outputs, "hello\nworld\n") // test stdin and stderr streams := app.flynnCmd("run", "sh", "-c", "cat 1>&2") stdin, err := streams.StdinPipe() t.Assert(err, c.IsNil) go func() { stdin.Write([]byte("goto stderr")) stdin.Close() }() var stderr bytes.Buffer var stdout bytes.Buffer streams.Stderr = &stderr streams.Stdout = &stdout t.Assert(streams.Run(), c.IsNil) t.Assert(stderr.String(), c.Equals, "goto stderr") t.Assert(stdout.String(), c.Equals, "") // test exit code exit := app.sh("exit 42") t.Assert(exit, c.Not(Succeeds)) if msg, ok := exit.Err.(*exec.ExitError); ok { // there is error code code := msg.Sys().(syscall.WaitStatus).ExitStatus() t.Assert(code, c.Equals, 42) } else { t.Fatal("There was no error code!") } }
func (s *HostSuite) TestSignalJob(t *c.C) { cluster := s.clusterClient(t) // pick a host to run the job on hosts, err := cluster.Hosts() t.Assert(err, c.IsNil) client := schedutil.PickHost(hosts) // start a signal-service job cmd := exec.JobUsingCluster(cluster, exec.DockerImage(imageURIs["test-apps"]), &host.Job{ Config: host.ContainerConfig{ Args: []string{"/bin/signal"}, DisableLog: true, }, }) cmd.HostID = client.ID() var out bytes.Buffer cmd.Stdout = &out t.Assert(cmd.Start(), c.IsNil) _, err = s.discoverdClient(t).Instances("signal-service", 10*time.Second) t.Assert(err, c.IsNil) // send the job a signal t.Assert(client.SignalJob(cmd.Job.ID, int(syscall.SIGTERM)), c.IsNil) // wait for the job to exit done := make(chan error) go func() { done <- cmd.Wait() }() select { case err := <-done: t.Assert(err, c.IsNil) case <-time.After(12 * time.Second): t.Fatal("timed out waiting for job to stop") } // check the output t.Assert(out.String(), c.Equals, "got signal: terminated") }
func (s *CLISuite) TestLimits(t *c.C) { app := s.newCliTestApp(t) defer app.cleanup() t.Assert(app.flynn("limit", "set", "resources", "memory=512MB", "max_fd=12k", "cpu=2000"), Succeeds) release, err := s.controller.GetAppRelease(app.name) t.Assert(err, c.IsNil) proc, ok := release.Processes["resources"] if !ok { t.Fatal("missing resources process type") } r := proc.Resources t.Assert(*r[resource.TypeMemory].Limit, c.Equals, int64(536870912)) t.Assert(*r[resource.TypeCPU].Limit, c.Equals, int64(2000)) t.Assert(*r[resource.TypeMaxFD].Limit, c.Equals, int64(12000)) cmd := app.flynn("limit", "-t", "resources") t.Assert(cmd, Succeeds) t.Assert(cmd, OutputContains, "memory=512MB") t.Assert(cmd, OutputContains, "cpu=2000") t.Assert(cmd, OutputContains, "max_fd=12000") }
func (s *HostSuite) TestDevSHM(t *c.C) { cmd := exec.CommandUsingCluster( s.clusterClient(t), exec.DockerImage(imageURIs["test-apps"]), "sh", "-c", "df -h /dev/shm && echo foo > /dev/shm/asdf", ) var out bytes.Buffer cmd.Stdout = &out cmd.Stderr = &out runErr := make(chan error) go func() { runErr <- cmd.Run() }() select { case err := <-runErr: t.Assert(err, c.IsNil) case <-time.After(30 * time.Second): t.Fatal("timed out waiting for /dev/shm job") } t.Assert(out.String(), c.Equals, "Filesystem Size Used Available Use% Mounted on\nshm 64.0M 0 64.0M 0% /dev/shm\n") }
func (s *HostSuite) TestDevStdout(t *c.C) { cmd := exec.CommandUsingCluster( s.clusterClient(t), exec.DockerImage(imageURIs["test-apps"]), "sh", "-c", "echo foo > /dev/stdout; echo bar > /dev/stderr", ) var stdout, stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr runErr := make(chan error) go func() { runErr <- cmd.Run() }() select { case err := <-runErr: t.Assert(err, c.IsNil) case <-time.After(30 * time.Second): t.Fatal("timed out waiting for /dev/stdout job") } t.Assert(stdout.String(), c.Equals, "foo\n") t.Assert(stderr.String(), c.Equals, "bar\n") }
func (h *Helper) addHosts(t *c.C, count int, vanilla bool, service string) []*tc.Instance { debugf(t, "adding %d hosts", count) // wait for the router-api to start on the host (rather than using // StreamHostEvents) as we wait for router-api when removing the // host (so that could fail if the router-api never starts). events := make(chan *discoverd.Event) stream, err := h.discoverdClient(t).Service(service).Watch(events) t.Assert(err, c.IsNil) defer stream.Close() // wait for the current state loop: for { select { case e, ok := <-events: if !ok { t.Fatal("event stream closed unexpectedly") } if e.Kind == discoverd.EventKindCurrent { break loop } case <-time.After(10 * time.Second): t.Fatal("timed out waiting for current service state") } } hosts := make([]*tc.Instance, count) for i := 0; i < count; i++ { host, err := testCluster.AddHost(events, vanilla) t.Assert(err, c.IsNil) debugf(t, "host added: %s", host.ID) hosts[i] = host } return hosts }
func (s *ZDiscoverdSuite) TestDeploy(t *c.C) { // ensure we have enough hosts in the cluster hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) if len(hosts) <= 1 { t.Skip("cannot deploy discoverd in a single node cluster") } client := s.controllerClient(t) app, err := client.GetApp("discoverd") t.Assert(err, c.IsNil) release, err := client.GetAppRelease(app.ID) t.Assert(err, c.IsNil) release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err := client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) events := make(chan *ct.DeploymentEvent) stream, err := client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) defer stream.Close() loop: for { select { case event, ok := <-events: if !ok { t.Fatal("unexpected close of deployment event stream") } if event.Status == "complete" { debugf(t, "got deployment event: %s", event.Status) break loop } if event.Status == "failed" { t.Fatal("the deployment failed") } debugf(t, "got deployment event: %s %s", event.JobType, event.JobState) case <-time.After(time.Duration(app.DeployTimeout) * time.Second): t.Fatal("timed out waiting for deployment event") } } }
func (s *HostSuite) TestUpdate(t *c.C) { dir := t.MkDir() flynnHost := filepath.Join(dir, "flynn-host") run(t, osexec.Command("cp", args.FlynnHost, flynnHost)) // start flynn-host id := random.String(8) var out bytes.Buffer cmd := osexec.Command( flynnHost, "daemon", "--http-port", "11113", "--state", filepath.Join(dir, "host-state.bolt"), "--id", id, "--backend", "mock", "--vol-provider", "mock", "--volpath", filepath.Join(dir, "volumes"), "--log-dir", filepath.Join(dir, "logs"), ) cmd.Stdout = &out cmd.Stderr = &out defer func() { debug(t, "*** flynn-host output ***") debug(t, out.String()) debug(t, "*************************") }() t.Assert(cmd.Start(), c.IsNil) defer cmd.Process.Kill() httpClient := &http.Client{Transport: &http.Transport{Dial: dialer.Retry.Dial}} client := cluster.NewHost(id, "http://127.0.0.1:11113", httpClient, nil) // exec a program which exits straight away _, err := client.Update("/bin/true") t.Assert(err, c.NotNil) status, err := client.GetStatus() t.Assert(err, c.IsNil) t.Assert(status.ID, c.Equals, id) t.Assert(status.PID, c.Equals, cmd.Process.Pid) // exec a program which reads the control socket but then exits _, err = client.Update("/bin/bash", "-c", "<&4; exit") t.Assert(err, c.NotNil) status, err = client.GetStatus() t.Assert(err, c.IsNil) t.Assert(status.ID, c.Equals, id) t.Assert(status.PID, c.Equals, cmd.Process.Pid) // exec flynn-host and check we get the status from the new daemon pid, err := client.Update( flynnHost, "daemon", "--http-port", "11113", "--state", filepath.Join(dir, "host-state.bolt"), "--id", id, "--backend", "mock", "--vol-provider", "mock", "--volpath", filepath.Join(dir, "volumes"), "--log-dir", filepath.Join(dir, "logs"), ) t.Assert(err, c.IsNil) defer syscall.Kill(pid, syscall.SIGKILL) done := make(chan struct{}) go func() { cmd.Process.Signal(syscall.SIGTERM) syscall.Wait4(cmd.Process.Pid, nil, 0, nil) close(done) }() select { case <-done: case <-time.After(15 * time.Second): t.Fatal("timed out waiting for flynn-host daemon to exit") } // client.GetStatus intermittently returns io.EOF right after the update. We // don't currently understand why (likely due to the way the listener is // passed around), so for now just retry the request. // // TODO(lmars): figure out why and remove this loop. delay := 100 * time.Millisecond for start := time.Now(); time.Since(start) < 10*time.Second; time.Sleep(delay) { status, err = client.GetStatus() if e, ok := err.(*url.Error); ok && strings.Contains(e.Err.Error(), "EOF") { debugf(t, "got io.EOF from flynn-host, trying again in %s", delay) continue } break } t.Assert(err, c.IsNil) t.Assert(status.ID, c.Equals, id) t.Assert(status.PID, c.Equals, pid) }
func (s *CLISuite) TestSlugReleaseGarbageCollection(t *c.C) { client := s.controllerClient(t) // create app with gc.max_inactive_slug_releases=3 maxInactiveSlugReleases := 3 app := &ct.App{Meta: map[string]string{"gc.max_inactive_slug_releases": strconv.Itoa(maxInactiveSlugReleases)}} t.Assert(client.CreateApp(app), c.IsNil) // create an image artifact imageArtifact := &ct.Artifact{Type: host.ArtifactTypeDocker, URI: imageURIs["test-apps"]} t.Assert(client.CreateArtifact(imageArtifact), c.IsNil) // create 5 slug artifacts var slug bytes.Buffer gz := gzip.NewWriter(&slug) t.Assert(tar.NewWriter(gz).Close(), c.IsNil) t.Assert(gz.Close(), c.IsNil) slugs := []string{ "http://blobstore.discoverd/1/slug.tgz", "http://blobstore.discoverd/2/slug.tgz", "http://blobstore.discoverd/3/slug.tgz", "http://blobstore.discoverd/4/slug.tgz", "http://blobstore.discoverd/5/slug.tgz", } slugArtifacts := make([]*ct.Artifact, len(slugs)) for i, uri := range slugs { req, err := http.NewRequest("PUT", uri, bytes.NewReader(slug.Bytes())) t.Assert(err, c.IsNil) res, err := http.DefaultClient.Do(req) t.Assert(err, c.IsNil) res.Body.Close() t.Assert(res.StatusCode, c.Equals, http.StatusOK) artifact := &ct.Artifact{ Type: host.ArtifactTypeFile, URI: uri, Meta: map[string]string{"blobstore": "true"}, } t.Assert(client.CreateArtifact(artifact), c.IsNil) slugArtifacts[i] = artifact } // create 6 releases, the second being scaled up and having the // same slug as the third (so prevents the slug being deleted) releases := make([]*ct.Release, 6) for i, r := range []struct { slug *ct.Artifact active bool }{ {slugArtifacts[0], false}, {slugArtifacts[1], true}, {slugArtifacts[1], false}, {slugArtifacts[2], false}, {slugArtifacts[3], false}, {slugArtifacts[4], false}, } { release := &ct.Release{ ArtifactIDs: []string{imageArtifact.ID, r.slug.ID}, Processes: map[string]ct.ProcessType{ "app": {Args: []string{"/bin/pingserv"}, Ports: []ct.Port{{Proto: "tcp"}}}, }, } t.Assert(client.CreateRelease(release), c.IsNil) procs := map[string]int{"app": 0} if r.active { procs["app"] = 1 } t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: procs, }), c.IsNil) releases[i] = release } // scale the last release so we can deploy it lastRelease := releases[len(releases)-1] watcher, err := client.WatchJobEvents(app.ID, lastRelease.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: lastRelease.ID, Processes: map[string]int{"app": 1}, }), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"app": ct.JobUpEvents(1)}, scaleTimeout, nil), c.IsNil) t.Assert(client.SetAppRelease(app.ID, lastRelease.ID), c.IsNil) // subscribe to garbage collection events gcEvents := make(chan *ct.Event) stream, err := client.StreamEvents(ct.StreamEventsOptions{ AppID: app.ID, ObjectTypes: []ct.EventType{ct.EventTypeAppGarbageCollection}, }, gcEvents) t.Assert(err, c.IsNil) defer stream.Close() // deploy a new release with the same slug as the last release timeoutCh := make(chan struct{}) time.AfterFunc(5*time.Minute, func() { close(timeoutCh) }) newRelease := *lastRelease newRelease.ID = "" t.Assert(client.CreateRelease(&newRelease), c.IsNil) t.Assert(client.DeployAppRelease(app.ID, newRelease.ID, timeoutCh), c.IsNil) // wait for garbage collection select { case event, ok := <-gcEvents: if !ok { t.Fatalf("event stream closed unexpectedly: %s", stream.Err()) } var e ct.AppGarbageCollectionEvent t.Assert(json.Unmarshal(event.Data, &e), c.IsNil) if e.Error != "" { t.Fatalf("garbage collection failed: %s", e.Error) } case <-time.After(60 * time.Second): t.Fatal("timed out waiting for garbage collection") } // check we have 4 distinct slug releases (so 5 in total, only 3 are // inactive) list, err := client.AppReleaseList(app.ID) t.Assert(err, c.IsNil) t.Assert(list, c.HasLen, maxInactiveSlugReleases+2) distinctSlugs := make(map[string]struct{}, len(list)) for _, release := range list { files := release.FileArtifactIDs() t.Assert(files, c.HasLen, 1) distinctSlugs[files[0]] = struct{}{} } t.Assert(distinctSlugs, c.HasLen, maxInactiveSlugReleases+1) // check the first and third releases got deleted, but the rest remain assertDeleted := func(release *ct.Release, deleted bool) { _, err := client.GetRelease(release.ID) if deleted { t.Assert(err, c.Equals, controller.ErrNotFound) } else { t.Assert(err, c.IsNil) } } assertDeleted(releases[0], true) assertDeleted(releases[1], false) assertDeleted(releases[2], true) assertDeleted(releases[3], false) assertDeleted(releases[4], false) assertDeleted(releases[5], false) assertDeleted(&newRelease, false) // check the first slug got deleted, but the rest remain s.assertURI(t, slugs[0], http.StatusNotFound) for i := 1; i < len(slugs); i++ { s.assertURI(t, slugs[i], http.StatusOK) } }
func (s *HostSuite) TestNotifyOOM(t *c.C) { appID := random.UUID() // subscribe to init log messages from the logaggregator client, err := logaggc.New("") t.Assert(err, c.IsNil) opts := logagg.LogOpts{ Follow: true, StreamTypes: []logagg.StreamType{logagg.StreamTypeInit}, } rc, err := client.GetLog(appID, &opts) t.Assert(err, c.IsNil) defer rc.Close() msgs := make(chan *logaggc.Message) stream := stream.New() defer stream.Close() go func() { defer close(msgs) dec := json.NewDecoder(rc) for { var msg logaggc.Message if err := dec.Decode(&msg); err != nil { stream.Error = err return } select { case msgs <- &msg: case <-stream.StopCh: return } } }() // run the OOM job cmd := exec.CommandUsingCluster( s.clusterClient(t), s.createArtifact(t, "test-apps"), "/bin/oom", ) cmd.Meta = map[string]string{"flynn-controller.app": appID} runErr := make(chan error) go func() { runErr <- cmd.Run() }() // wait for the OOM notification for { select { case err := <-runErr: t.Assert(err, c.IsNil) case msg, ok := <-msgs: if !ok { t.Fatalf("message stream closed unexpectedly: %s", stream.Err()) } t.Log(msg.Msg) if strings.Contains(msg.Msg, "FATAL: a container process was killed due to lack of available memory") { return } case <-time.After(30 * time.Second): t.Fatal("timed out waiting for OOM notification") } } }
func testSireniaDeploy(client controller.Client, disc *discoverd.Client, t *c.C, d *sireniaDeploy) { // create app app := &ct.App{Name: d.name, Strategy: "sirenia"} t.Assert(client.CreateApp(app), c.IsNil) // copy release from default app release, err := client.GetAppRelease(d.db.appName) t.Assert(err, c.IsNil) release.ID = "" release.Env[d.db.hostKey] = fmt.Sprintf("leader.%s.discoverd", d.name) release.Env[d.db.serviceKey] = d.name procName := release.Env["SIRENIA_PROCESS"] proc := release.Processes[procName] delete(proc.Env, "SINGLETON") proc.Service = d.name release.Processes[procName] = proc t.Assert(client.CreateRelease(release), c.IsNil) t.Assert(client.SetAppRelease(app.ID, release.ID), c.IsNil) oldRelease := release.ID // create formation discEvents := make(chan *discoverd.Event) discService := disc.Service(d.name) discStream, err := discService.Watch(discEvents) t.Assert(err, c.IsNil) defer discStream.Close() jobEvents := make(chan *ct.Job) jobStream, err := client.StreamJobEvents(d.name, jobEvents) t.Assert(err, c.IsNil) defer jobStream.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{procName: d.sireniaJobs, "web": d.webJobs}, }), c.IsNil) // watch cluster state changes type stateChange struct { state *state.State err error } stateCh := make(chan stateChange) go func() { for event := range discEvents { if event.Kind != discoverd.EventKindServiceMeta { continue } var state state.State if err := json.Unmarshal(event.ServiceMeta.Data, &state); err != nil { stateCh <- stateChange{err: err} return } primary := "" if state.Primary != nil { primary = state.Primary.Addr } sync := "" if state.Sync != nil { sync = state.Sync.Addr } var async []string for _, a := range state.Async { async = append(async, a.Addr) } debugf(t, "got cluster state: index=%d primary=%s sync=%s async=%s", event.ServiceMeta.Index, primary, sync, strings.Join(async, ",")) stateCh <- stateChange{state: &state} } }() // wait for correct cluster state and number of web processes var sireniaState state.State var webJobs int ready := func() bool { if webJobs != d.webJobs { return false } if sireniaState.Primary == nil { return false } if d.sireniaJobs > 1 && sireniaState.Sync == nil { return false } if d.sireniaJobs > 2 && len(sireniaState.Async) != d.sireniaJobs-2 { return false } return true } for { if ready() { break } select { case s := <-stateCh: t.Assert(s.err, c.IsNil) sireniaState = *s.state case e, ok := <-jobEvents: if !ok { t.Fatalf("job event stream closed: %s", jobStream.Err()) } debugf(t, "got job event: %s %s %s", e.Type, e.ID, e.State) if e.Type == "web" && e.State == ct.JobStateUp { webJobs++ } case <-time.After(30 * time.Second): t.Fatal("timed out waiting for formation") } } // wait for the primary to indicate downstream replication sync debug(t, "waiting for primary to indicate downstream replication sync") sireniaClient := sc.NewClient(sireniaState.Primary.Addr) t.Assert(sireniaClient.WaitForReplSync(sireniaState.Sync, 1*time.Minute), c.IsNil) // connect to the db and run any initialisation required to later test writes debug(t, "initialising db") if d.db.initDb != nil { d.db.initDb(t, release, d) } // check currently writeable d.db.assertWriteable(t, release, d) // check a deploy completes with expected cluster state changes release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) newRelease := release.ID deployment, err := client.CreateDeployment(app.ID, newRelease) t.Assert(err, c.IsNil) deployEvents := make(chan *ct.DeploymentEvent) deployStream, err := client.StreamDeployment(deployment, deployEvents) t.Assert(err, c.IsNil) defer deployStream.Close() // assertNextState checks that the next state received is in the remaining states // that were expected, so handles the fact that some states don't happen, but the // states that do happen are expected and in-order. assertNextState := func(remaining []expectedSireniaState) int { var state state.State loop: for { select { case s := <-stateCh: t.Assert(s.err, c.IsNil) if len(s.state.Async) < d.expectedAsyncs() { // we shouldn't usually receive states with less asyncs than // expected, but they can occur as an intermediate state between // two expected states (e.g. when a sync does a takeover at the // same time as a new async is started) so just ignore them. debug(t, "ignoring state with too few asyncs") continue } state = *s.state break loop case <-time.After(60 * time.Second): t.Fatal("timed out waiting for cluster state") } } if state.Primary == nil { t.Fatal("no primary configured") } logf := func(format string, v ...interface{}) { debugf(t, "skipping expected state: %s", fmt.Sprintf(format, v...)) } outer: for i, expected := range remaining { if state.Primary.Meta["FLYNN_RELEASE_ID"] != expected.Primary { logf("primary has incorrect release") continue } if state.Sync == nil { if expected.Sync == "" { return i } logf("state has no sync node") continue } if state.Sync.Meta["FLYNN_RELEASE_ID"] != expected.Sync { logf("sync has incorrect release") continue } if state.Async == nil { if expected.Async == nil { return i } logf("state has no async nodes") continue } if len(state.Async) != len(expected.Async) { logf("expected %d asyncs, got %d", len(expected.Async), len(state.Async)) continue } for i, release := range expected.Async { if state.Async[i].Meta["FLYNN_RELEASE_ID"] != release { logf("async[%d] has incorrect release", i) continue outer } } return i } t.Fatal("unexpected state") return -1 } expected := d.expected(oldRelease, newRelease) var expectedIndex, newWebJobs int loop: for { select { case e, ok := <-deployEvents: if !ok { t.Fatal("unexpected close of deployment event stream") } switch e.Status { case "complete": break loop case "failed": t.Fatalf("deployment failed: %s", e.Error) } debugf(t, "got deployment event: %s %s", e.JobType, e.JobState) if e.JobState != ct.JobStateUp && e.JobState != ct.JobStateDown { continue } if e.JobType == procName { // move on if we have seen all the expected events if expectedIndex >= len(expected) { continue } skipped := assertNextState(expected[expectedIndex:]) expectedIndex += 1 + skipped } case e, ok := <-jobEvents: if !ok { t.Fatalf("unexpected close of job event stream: %s", jobStream.Err()) } debugf(t, "got job event: %s %s %s", e.Type, e.ID, e.State) if e.Type == "web" && e.State == ct.JobStateUp && e.ReleaseID == newRelease { newWebJobs++ } } } // check we have the correct number of new web jobs t.Assert(newWebJobs, c.Equals, d.webJobs) // check writeable now deploy is complete d.db.assertWriteable(t, release, d) }
func (s *BlobstoreSuite) testBlobstoreBackend(t *c.C, name, redirectPattern string, env ...string) { r := s.newGitRepo(t, "http") t.Assert(r.flynn("create", "blobstore-backend-test-"+name), Succeeds) t.Assert(r.git("push", "flynn", "master"), Succeeds) // set default backend to external backend without printing secrets cmd := exec.Command("sh", "-c", fmt.Sprintf("%s -a blobstore env set %s DEFAULT_BACKEND=%s", args.CLI, strings.Join(env, " "), name)) cmd.Env = flynnEnv(flynnrc) cmd.Dir = "/" t.Assert(cmd.Run(), c.IsNil) // test that downloading blob from postgres still works t.Assert(r.flynn("run", "echo", "1"), Succeeds) // get slug artifact details release, err := s.controllerClient(t).GetAppRelease("blobstore-backend-test-" + name) t.Assert(err, c.IsNil) artifact, err := s.controllerClient(t).GetArtifact(release.ArtifactIDs[1]) t.Assert(err, c.IsNil) t.Assert(artifact.Type, c.Equals, ct.ArtifactTypeFlynn) // migrate slug to external backend layer := artifact.Manifest().Rootfs[0].Layers[0] u, err := url.Parse(artifact.LayerURL(layer)) t.Assert(err, c.IsNil) migration := flynn(t, "/", "-a", "blobstore", "run", "-e", "/bin/flynn-blobstore", "migrate", "--delete", "--prefix", u.Path) t.Assert(migration, Succeeds) t.Assert(migration, OutputContains, "Moving "+u.Path) t.Assert(migration, OutputContains, "from postgres to "+name) // check that slug is now stored in external backend noRedirectsClient := &http.Client{ CheckRedirect: func(req *http.Request, via []*http.Request) error { return errors.New("no redirects") }, } res, err := noRedirectsClient.Get(u.String()) if res == nil { t.Fatal(err) } t.Assert(res.StatusCode, c.Equals, 302) t.Assert(res.Header.Get("Location"), c.Matches, redirectPattern) // test that downloading blob from external backend works t.Assert(r.flynn("run", "echo", "1"), Succeeds) // test that deploying still works t.Assert(r.git("commit", "--allow-empty", "-m", "foo"), Succeeds) t.Assert(r.git("push", "flynn", "master"), Succeeds) // test that build caching still works s.testBuildCaching(t) // test that exporting the app works t.Assert(r.flynn("export", "--file", "/dev/null"), Succeeds) // change default backend back to postgres t.Assert(flynn(t, "/", "-a", "blobstore", "env", "set", "DEFAULT_BACKEND=postgres"), Succeeds) // test that downloading blob from s3 still works t.Assert(r.flynn("run", "echo", "1"), Succeeds) // test a docker push repo := "s3-test" s.buildDockerImage(t, repo, "RUN echo foo > /foo.txt") u, err = url.Parse(s.clusterConf(t).DockerPushURL) t.Assert(err, c.IsNil) tag := fmt.Sprintf("%s/%s:latest", u.Host, repo) t.Assert(run(t, exec.Command("docker", "tag", "--force", repo, tag)), Succeeds) t.Assert(run(t, exec.Command("docker", "push", tag)), Succeeds) // migrate blobs back to postgres migration = flynn(t, "/", "-a", "blobstore", "run", "-e", "/bin/flynn-blobstore", "migrate", "--delete") t.Assert(migration, Succeeds) t.Assert(migration, OutputContains, fmt.Sprintf("from %s to postgres", name)) // test that downloading blob from postgres still works t.Assert(r.flynn("run", "echo", "1"), Succeeds) // check that all blobs are in postgres t.Assert(flynn(t, "/", "-a", "blobstore", "pg", "psql", "--", "-c", fmt.Sprintf("SELECT count(*) FROM files WHERE backend = '%s' AND deleted_at IS NULL", name)), OutputContains, "0") }
func (s *CLISuite) TestSlugReleaseGarbageCollection(t *c.C) { client := s.controllerClient(t) // create app with gc.max_inactive_slug_releases=3 maxInactiveSlugReleases := 3 app := &ct.App{Meta: map[string]string{"gc.max_inactive_slug_releases": strconv.Itoa(maxInactiveSlugReleases)}} t.Assert(client.CreateApp(app), c.IsNil) // create an image artifact imageArtifact := s.createArtifact(t, "test-apps") // create 5 slug artifacts tmp, err := ioutil.TempFile("", "squashfs-") t.Assert(err, c.IsNil) defer os.Remove(tmp.Name()) defer tmp.Close() t.Assert(exec.Command("mksquashfs", t.MkDir(), tmp.Name(), "-noappend").Run(), c.IsNil) slug, err := ioutil.ReadAll(tmp) t.Assert(err, c.IsNil) slugHash := sha512.Sum512(slug) slugs := []string{ "http://blobstore.discoverd/layer/1.squashfs", "http://blobstore.discoverd/layer/2.squashfs", "http://blobstore.discoverd/layer/3.squashfs", "http://blobstore.discoverd/layer/4.squashfs", "http://blobstore.discoverd/layer/5.squashfs", } slugArtifacts := make([]*ct.Artifact, len(slugs)) put := func(url string, data []byte) { req, err := http.NewRequest("PUT", url, bytes.NewReader(data)) t.Assert(err, c.IsNil) res, err := http.DefaultClient.Do(req) t.Assert(err, c.IsNil) res.Body.Close() t.Assert(res.StatusCode, c.Equals, http.StatusOK) } for i, layerURL := range slugs { manifest := &ct.ImageManifest{ Type: ct.ImageManifestTypeV1, Rootfs: []*ct.ImageRootfs{{ Layers: []*ct.ImageLayer{{ ID: strconv.Itoa(i + 1), Type: ct.ImageLayerTypeSquashfs, Length: int64(len(slug)), Hashes: map[string]string{"sha512": hex.EncodeToString(slugHash[:])}, }}, }}, } data := manifest.RawManifest() url := fmt.Sprintf("http://blobstore.discoverd/image/%s.json", manifest.ID()) put(url, data) put(layerURL, slug) artifact := &ct.Artifact{ Type: ct.ArtifactTypeFlynn, URI: url, Meta: map[string]string{"blobstore": "true"}, RawManifest: data, Hashes: manifest.Hashes(), Size: int64(len(data)), LayerURLTemplate: "http://blobstore.discoverd/layer/{id}.squashfs", } t.Assert(client.CreateArtifact(artifact), c.IsNil) slugArtifacts[i] = artifact } // create 6 releases, the second being scaled up and having the // same slug as the third (so prevents the slug being deleted) releases := make([]*ct.Release, 6) for i, r := range []struct { slug *ct.Artifact active bool }{ {slugArtifacts[0], false}, {slugArtifacts[1], true}, {slugArtifacts[1], false}, {slugArtifacts[2], false}, {slugArtifacts[3], false}, {slugArtifacts[4], false}, } { release := &ct.Release{ ArtifactIDs: []string{imageArtifact.ID, r.slug.ID}, Processes: map[string]ct.ProcessType{ "app": {Args: []string{"/bin/pingserv"}, Ports: []ct.Port{{Proto: "tcp"}}}, }, Meta: map[string]string{"git": "true"}, } t.Assert(client.CreateRelease(release), c.IsNil) procs := map[string]int{"app": 0} if r.active { procs["app"] = 1 } t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: procs, }), c.IsNil) releases[i] = release } // scale the last release so we can deploy it lastRelease := releases[len(releases)-1] watcher, err := client.WatchJobEvents(app.ID, lastRelease.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: lastRelease.ID, Processes: map[string]int{"app": 1}, }), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"app": ct.JobUpEvents(1)}, scaleTimeout, nil), c.IsNil) t.Assert(client.SetAppRelease(app.ID, lastRelease.ID), c.IsNil) // subscribe to garbage collection events gcEvents := make(chan *ct.Event) stream, err := client.StreamEvents(ct.StreamEventsOptions{ AppID: app.ID, ObjectTypes: []ct.EventType{ct.EventTypeAppGarbageCollection}, }, gcEvents) t.Assert(err, c.IsNil) defer stream.Close() // deploy a new release with the same slug as the last release timeoutCh := make(chan struct{}) time.AfterFunc(5*time.Minute, func() { close(timeoutCh) }) newRelease := *lastRelease newRelease.ID = "" t.Assert(client.CreateRelease(&newRelease), c.IsNil) t.Assert(client.DeployAppRelease(app.ID, newRelease.ID, timeoutCh), c.IsNil) // wait for garbage collection select { case event, ok := <-gcEvents: if !ok { t.Fatalf("event stream closed unexpectedly: %s", stream.Err()) } var e ct.AppGarbageCollectionEvent t.Assert(json.Unmarshal(event.Data, &e), c.IsNil) if e.Error != "" { t.Fatalf("garbage collection failed: %s", e.Error) } case <-time.After(60 * time.Second): t.Fatal("timed out waiting for garbage collection") } // check we have 4 distinct slug releases (so 5 in total, only 3 are // inactive) list, err := client.AppReleaseList(app.ID) t.Assert(err, c.IsNil) t.Assert(list, c.HasLen, maxInactiveSlugReleases+2) distinctSlugs := make(map[string]struct{}, len(list)) for _, release := range list { t.Assert(release.ArtifactIDs, c.HasLen, 2) distinctSlugs[release.ArtifactIDs[1]] = struct{}{} } t.Assert(distinctSlugs, c.HasLen, maxInactiveSlugReleases+1) // check the first and third releases got deleted, but the rest remain assertDeleted := func(release *ct.Release, deleted bool) { _, err := client.GetRelease(release.ID) if deleted { t.Assert(err, c.Equals, controller.ErrNotFound) } else { t.Assert(err, c.IsNil) } } assertDeleted(releases[0], true) assertDeleted(releases[1], false) assertDeleted(releases[2], true) assertDeleted(releases[3], false) assertDeleted(releases[4], false) assertDeleted(releases[5], false) assertDeleted(&newRelease, false) // check the first slug got deleted, but the rest remain s.assertURI(t, slugs[0], http.StatusNotFound) for i := 1; i < len(slugs); i++ { s.assertURI(t, slugs[i], http.StatusOK) } }
func (s *ReleaseSuite) TestReleaseImages(t *c.C) { if testCluster == nil { t.Skip("cannot boot release cluster") } // stream script output to t.Log logReader, logWriter := io.Pipe() defer logWriter.Close() go func() { buf := bufio.NewReader(logReader) for { line, err := buf.ReadString('\n') if err != nil { return } debug(t, line[0:len(line)-1]) } }() // boot the release cluster, release components to a blobstore and output the new images.json releaseCluster := s.addReleaseHosts(t) buildHost := releaseCluster.Instances[0] var imagesJSON bytes.Buffer var script bytes.Buffer slugImageID := random.UUID() releaseScript.Execute(&script, struct{ ControllerKey, SlugImageID string }{releaseCluster.ControllerKey, slugImageID}) t.Assert(buildHost.Run("bash -ex", &tc.Streams{Stdin: &script, Stdout: &imagesJSON, Stderr: logWriter}), c.IsNil) var images map[string]*ct.Artifact t.Assert(json.Unmarshal(imagesJSON.Bytes(), &images), c.IsNil) // install Flynn from the blobstore on the vanilla host blobstoreAddr := buildHost.IP + ":8080" installHost := releaseCluster.Instances[3] script.Reset() installScript.Execute(&script, map[string]string{"Blobstore": blobstoreAddr}) var installOutput bytes.Buffer out := io.MultiWriter(logWriter, &installOutput) t.Assert(installHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil) // check the flynn-host version is correct var hostVersion bytes.Buffer t.Assert(installHost.Run("flynn-host version", &tc.Streams{Stdout: &hostVersion}), c.IsNil) t.Assert(strings.TrimSpace(hostVersion.String()), c.Equals, "v20161108.0-test") // check rebuilt images were downloaded assertInstallOutput := func(format string, v ...interface{}) { expected := fmt.Sprintf(format, v...) if !strings.Contains(installOutput.String(), expected) { t.Fatalf(`expected install to output %q`, expected) } } for name, image := range images { assertInstallOutput("pulling %s image", name) for _, layer := range image.Manifest().Rootfs[0].Layers { assertInstallOutput("pulling %s layer %s", name, layer.ID) } } // installing on an instance with Flynn running should fail script.Reset() installScript.Execute(&script, map[string]string{"Blobstore": blobstoreAddr}) installOutput.Reset() err := buildHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}) if err == nil || !strings.Contains(installOutput.String(), "ERROR: Flynn is already installed.") { t.Fatal("expected Flynn install to fail but it didn't") } // create a controller client for the release cluster pin, err := base64.StdEncoding.DecodeString(releaseCluster.ControllerPin) t.Assert(err, c.IsNil) client, err := controller.NewClientWithConfig( "https://"+buildHost.IP, releaseCluster.ControllerKey, controller.Config{Pin: pin, Domain: releaseCluster.ControllerDomain}, ) t.Assert(err, c.IsNil) // deploy a slug based app + Redis resource slugApp := &ct.App{} t.Assert(client.CreateApp(slugApp), c.IsNil) gitreceive, err := client.GetAppRelease("gitreceive") t.Assert(err, c.IsNil) imageArtifact, err := client.GetArtifact(gitreceive.Env["SLUGRUNNER_IMAGE_ID"]) t.Assert(err, c.IsNil) slugArtifact, err := client.GetArtifact(slugImageID) t.Assert(err, c.IsNil) resource, err := client.ProvisionResource(&ct.ResourceReq{ProviderID: "redis", Apps: []string{slugApp.ID}}) t.Assert(err, c.IsNil) release := &ct.Release{ ArtifactIDs: []string{imageArtifact.ID, slugArtifact.ID}, Processes: map[string]ct.ProcessType{"web": {Args: []string{"/runner/init", "bin/http"}}}, Meta: map[string]string{"git": "true"}, Env: resource.Env, } t.Assert(client.CreateRelease(release), c.IsNil) t.Assert(client.SetAppRelease(slugApp.ID, release.ID), c.IsNil) watcher, err := client.WatchJobEvents(slugApp.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: slugApp.ID, ReleaseID: release.ID, Processes: map[string]int{"web": 1}, }), c.IsNil) err = watcher.WaitFor(ct.JobEvents{"web": {ct.JobStateUp: 1}}, scaleTimeout, nil) t.Assert(err, c.IsNil) // run a cluster update from the blobstore updateHost := releaseCluster.Instances[1] script.Reset() updateScript.Execute(&script, map[string]string{"Blobstore": blobstoreAddr, "Discoverd": updateHost.IP + ":1111"}) var updateOutput bytes.Buffer out = io.MultiWriter(logWriter, &updateOutput) t.Assert(updateHost.Run("bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil) // check rebuilt images were downloaded for name := range images { for _, host := range releaseCluster.Instances[0:2] { expected := fmt.Sprintf(`"pulling %s image" host=%s`, name, host.ID) if !strings.Contains(updateOutput.String(), expected) { t.Fatalf(`expected update to download %s on host %s`, name, host.ID) } } } assertImage := func(uri, image string) { t.Assert(uri, c.Equals, images[image].URI) } // check system apps were deployed correctly for _, app := range updater.SystemApps { if app.ImageOnly { continue // we don't deploy ImageOnly updates } debugf(t, "checking new %s release is using image %s", app.Name, images[app.Name].URI) expected := fmt.Sprintf(`"finished deploy of system app" name=%s`, app.Name) if !strings.Contains(updateOutput.String(), expected) { t.Fatalf(`expected update to deploy %s`, app.Name) } release, err := client.GetAppRelease(app.Name) t.Assert(err, c.IsNil) debugf(t, "new %s release ID: %s", app.Name, release.ID) artifact, err := client.GetArtifact(release.ArtifactIDs[0]) t.Assert(err, c.IsNil) debugf(t, "new %s artifact: %+v", app.Name, artifact) assertImage(artifact.URI, app.Name) } // check gitreceive has the correct slug env vars gitreceive, err = client.GetAppRelease("gitreceive") t.Assert(err, c.IsNil) for _, name := range []string{"slugbuilder", "slugrunner"} { artifact, err := client.GetArtifact(gitreceive.Env[strings.ToUpper(name)+"_IMAGE_ID"]) t.Assert(err, c.IsNil) assertImage(artifact.URI, name) } // check slug based app was deployed correctly release, err = client.GetAppRelease(slugApp.Name) t.Assert(err, c.IsNil) imageArtifact, err = client.GetArtifact(release.ArtifactIDs[0]) t.Assert(err, c.IsNil) assertImage(imageArtifact.URI, "slugrunner") // check Redis app was deployed correctly release, err = client.GetAppRelease(resource.Env["FLYNN_REDIS"]) t.Assert(err, c.IsNil) imageArtifact, err = client.GetArtifact(release.ArtifactIDs[0]) t.Assert(err, c.IsNil) assertImage(imageArtifact.URI, "redis") }
func (s *SchedulerSuite) TestDeployController(t *c.C) { // get the current controller release client := s.controllerClient(t) app, err := client.GetApp("controller") t.Assert(err, c.IsNil) release, err := client.GetAppRelease(app.ID) t.Assert(err, c.IsNil) // get the current controller formation formation, err := client.GetFormation(app.ID, release.ID) t.Assert(err, c.IsNil) // create a controller deployment release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) deployment, err := client.CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) events := make(chan *ct.DeploymentEvent) eventStream, err := client.StreamDeployment(deployment, events) t.Assert(err, c.IsNil) defer eventStream.Close() // wait for the deploy to complete (this doesn't wait for specific events // due to the fact that when the deployer deploys itself, some events will // not get sent) loop: for { select { case e, ok := <-events: if !ok { t.Fatal("unexpected close of deployment event stream") } debugf(t, "got deployment event: %s %s", e.JobType, e.JobState) switch e.Status { case "complete": break loop case "failed": t.Fatal("the deployment failed") } case <-time.After(time.Duration(app.DeployTimeout) * time.Second): t.Fatal("timed out waiting for the deploy to complete") } } // check the correct controller jobs are running hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) t.Assert(hosts, c.Not(c.HasLen), 0) actual := make(map[string]map[string]int) for _, h := range hosts { jobs, err := h.ListJobs() t.Assert(err, c.IsNil) for _, job := range jobs { if job.Status != host.StatusRunning { continue } appID := job.Job.Metadata["flynn-controller.app"] if appID != app.ID { continue } releaseID := job.Job.Metadata["flynn-controller.release"] if _, ok := actual[releaseID]; !ok { actual[releaseID] = make(map[string]int) } typ := job.Job.Metadata["flynn-controller.type"] actual[releaseID][typ]++ } } expected := map[string]map[string]int{release.ID: { "web": formation.Processes["web"], "worker": formation.Processes["worker"], "scheduler": len(hosts), }} t.Assert(actual, c.DeepEquals, expected) }
func (s *HostUpdateSuite) TestUpdateLogs(t *c.C) { if testCluster == nil { t.Skip("cannot boot new hosts") } instance := s.addHost(t, "router-api") defer s.removeHost(t, instance, "router-api") httpClient := &http.Client{Transport: &http.Transport{Dial: dialer.Retry.Dial}} client := cluster.NewHost(instance.ID, fmt.Sprintf("http://%s:1113", instance.IP), httpClient, nil) // start partial logger job cmd := exec.JobUsingHost( client, exec.DockerImage(imageURIs["test-apps"]), &host.Job{ Config: host.ContainerConfig{Args: []string{"/bin/partial-logger"}}, Metadata: map[string]string{ "flynn-controller.app": "partial-logger", }, }, ) t.Assert(cmd.Start(), c.IsNil) defer cmd.Kill() // wait for partial line _, err := s.discoverdClient(t).Instances("partial-logger", 10*time.Second) t.Assert(err, c.IsNil) // update flynn-host pid, err := client.Update("/usr/local/bin/flynn-host", "daemon", "--id", cmd.HostID) t.Assert(err, c.IsNil) // update the pid file so removeHost works t.Assert(instance.Run(fmt.Sprintf("echo -n %d | sudo tee /var/run/flynn-host.pid", pid), nil), c.IsNil) // stream the log from the logaggregator logc, err := logaggc.New("") t.Assert(err, c.IsNil) log, err := logc.GetLog("partial-logger", &logaggc.LogOpts{Follow: true}) t.Assert(err, c.IsNil) defer log.Close() msgs := make(chan *logaggc.Message) go func() { defer close(msgs) dec := json.NewDecoder(log) for { var msg logaggc.Message if err := dec.Decode(&msg); err != nil { debugf(t, "error decoding message: %s", err) return } msgs <- &msg } }() // finish logging t.Assert(client.SignalJob(cmd.Job.ID, int(syscall.SIGUSR1)), c.IsNil) // check we get a single log line for { select { case msg, ok := <-msgs: if !ok { t.Fatal("error getting log") } if msg.Stream == "stdout" { t.Assert(msg.Msg, c.Equals, "hello world") return } case <-time.After(10 * time.Second): t.Fatal("timed out waiting for log") } } }