func waitForJobEvents(t *c.C, stream stream.Stream, events chan *ct.JobEvent, expected jobEvents) (lastID int64, jobID string) { debugf(t, "waiting for job events: %v", expected) actual := make(jobEvents) for { inner: select { case event, ok := <-events: if !ok { t.Fatalf("job event stream closed: %s", stream.Err()) } debugf(t, "got job event: %s %s %s", event.Type, event.JobID, event.State) lastID = event.ID jobID = event.JobID if _, ok := actual[event.Type]; !ok { actual[event.Type] = make(map[string]int) } switch event.State { case "starting", "up", "down": actual[event.Type][event.State] += 1 case "crashed": actual[event.Type]["down"] += 1 default: break inner } if jobEventsEqual(expected, actual) { return } case <-time.After(60 * time.Second): t.Fatal("timed out waiting for job events: ", expected) } } }
func (s *SchedulerSuite) TestJobRestartBackoffPolicy(t *c.C) { // To run this test on local, set BACKOFF_PERIOD on the flynn host machine var backoffPeriod time.Duration var err error if testCluster == nil { backoffPeriod, err = time.ParseDuration(os.Getenv("BACKOFF_PERIOD")) if err != nil { t.Skip("cannot determine backoff period") } } else { backoffPeriod = testCluster.BackoffPeriod() } startTimeout := 20 * time.Second debugf(t, "job restart backoff period: %s", backoffPeriod) app, release := s.createApp(t) watcher, err := s.controllerClient(t).WatchJobEvents(app.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(s.controllerClient(t).PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"printer": 1}, }), c.IsNil) var id string var assignId = func(j *ct.Job) error { debugf(t, "got job event: %s %s", j.ID, j.State) id = j.ID return nil } err = watcher.WaitFor(ct.JobEvents{"printer": {ct.JobStateUp: 1}}, scaleTimeout, assignId) t.Assert(err, c.IsNil) waitForRestart := func(duration time.Duration) { start := time.Now() s.stopJob(t, id) debugf(t, "expecting new job to start in %s", duration) err = watcher.WaitFor(ct.JobEvents{"printer": {ct.JobStateUp: 1}}, duration+startTimeout, assignId) t.Assert(err, c.IsNil) actual := time.Now().Sub(start) if actual < duration { t.Fatalf("expected new job to start after %s but started after %s", duration, actual) } } waitForRestart(0) waitForRestart(backoffPeriod) waitForRestart(2 * backoffPeriod) debug(t, "waiting for backoff period to expire") time.Sleep(backoffPeriod) waitForRestart(0) }
func (s *HostSuite) TestAddFailingJob(t *c.C) { // get a host and watch events hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) t.Assert(hosts, c.Not(c.HasLen), 0) h := hosts[0] jobID := random.UUID() events := make(chan *host.Event) stream, err := h.StreamEvents(jobID, events) t.Assert(err, c.IsNil) defer stream.Close() // add a job with a non existent partition job := &host.Job{ ID: jobID, ImageArtifact: &host.Artifact{ Type: host.ArtifactTypeDocker, URI: "http://example.com?name=foo&id=bar", }, Partition: "nonexistent", } t.Assert(h.AddJob(job), c.IsNil) // check we get a create then error event actual := make([]*host.Event, 0, 2) loop: for { select { case e, ok := <-events: if !ok { t.Fatalf("job event stream closed unexpectedly: %s", stream.Err()) } actual = append(actual, e) if len(actual) >= 2 { break loop } case <-time.After(30 * time.Second): t.Fatal("timed out waiting for job event") } } t.Assert(actual, c.HasLen, 2) t.Assert(actual[0].Event, c.Equals, host.JobEventCreate) t.Assert(actual[1].Event, c.Equals, host.JobEventError) jobErr := actual[1].Job.Error t.Assert(jobErr, c.NotNil) t.Assert(*jobErr, c.Equals, `host: invalid job partition "nonexistent"`) }
func waitForJobRestart(t *c.C, stream stream.Stream, events chan *ct.JobEvent, typ string, timeout time.Duration) string { debug(t, "waiting for job restart") for { select { case event, ok := <-events: if !ok { t.Fatalf("job event stream closed: %s", stream.Err()) } debug(t, "got job event: ", event.Type, event.JobID, event.State) if event.Type == typ && event.State == "up" { return event.JobID } case <-time.After(timeout): t.Fatal("timed out waiting for job restart") } } }
func (s *DeployerSuite) createDeployment(t *c.C, process, strategy, service string) *ct.Deployment { app, release := s.createRelease(t, process, strategy) if service != "" { debugf(t, "waiting for 2 %s services", service) events := make(chan *discoverd.Event) stream, err := s.discoverdClient(t).Service(service).Watch(events) t.Assert(err, c.IsNil) defer stream.Close() count := 0 loop: for { select { case event, ok := <-events: if !ok { t.Fatalf("service discovery stream closed unexpectedly") } if event.Kind == discoverd.EventKindUp { if id, ok := event.Instance.Meta["FLYNN_RELEASE_ID"]; !ok || id != release.ID { continue } debugf(t, "got %s service up event", service) count++ } if count == 2 { // although the services are up, give them a few more seconds // to make sure the deployer will also see them as up. time.Sleep(5 * time.Second) break loop } case <-time.After(10 * time.Second): t.Fatalf("timed out waiting for %s service to come up", service) } } } // create a new release for the deployment release.ID = "" t.Assert(s.controllerClient(t).CreateRelease(release), c.IsNil) deployment, err := s.controllerClient(t).CreateDeployment(app.ID, release.ID) t.Assert(err, c.IsNil) return deployment }
func testSireniaDeploy(client controller.Client, disc *discoverd.Client, t *c.C, d *sireniaDeploy) { // create app app := &ct.App{Name: d.name, Strategy: "sirenia"} t.Assert(client.CreateApp(app), c.IsNil) // copy release from default app release, err := client.GetAppRelease(d.db.appName) t.Assert(err, c.IsNil) release.ID = "" release.Env[d.db.hostKey] = fmt.Sprintf("leader.%s.discoverd", d.name) release.Env[d.db.serviceKey] = d.name procName := release.Env["SIRENIA_PROCESS"] proc := release.Processes[procName] delete(proc.Env, "SINGLETON") proc.Service = d.name release.Processes[procName] = proc t.Assert(client.CreateRelease(release), c.IsNil) t.Assert(client.SetAppRelease(app.ID, release.ID), c.IsNil) oldRelease := release.ID // create formation discEvents := make(chan *discoverd.Event) discService := disc.Service(d.name) discStream, err := discService.Watch(discEvents) t.Assert(err, c.IsNil) defer discStream.Close() jobEvents := make(chan *ct.Job) jobStream, err := client.StreamJobEvents(d.name, jobEvents) t.Assert(err, c.IsNil) defer jobStream.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{procName: d.sireniaJobs, "web": d.webJobs}, }), c.IsNil) // watch cluster state changes type stateChange struct { state *state.State err error } stateCh := make(chan stateChange) go func() { for event := range discEvents { if event.Kind != discoverd.EventKindServiceMeta { continue } var state state.State if err := json.Unmarshal(event.ServiceMeta.Data, &state); err != nil { stateCh <- stateChange{err: err} return } primary := "" if state.Primary != nil { primary = state.Primary.Addr } sync := "" if state.Sync != nil { sync = state.Sync.Addr } var async []string for _, a := range state.Async { async = append(async, a.Addr) } debugf(t, "got cluster state: index=%d primary=%s sync=%s async=%s", event.ServiceMeta.Index, primary, sync, strings.Join(async, ",")) stateCh <- stateChange{state: &state} } }() // wait for correct cluster state and number of web processes var sireniaState state.State var webJobs int ready := func() bool { if webJobs != d.webJobs { return false } if sireniaState.Primary == nil { return false } if d.sireniaJobs > 1 && sireniaState.Sync == nil { return false } if d.sireniaJobs > 2 && len(sireniaState.Async) != d.sireniaJobs-2 { return false } return true } for { if ready() { break } select { case s := <-stateCh: t.Assert(s.err, c.IsNil) sireniaState = *s.state case e, ok := <-jobEvents: if !ok { t.Fatalf("job event stream closed: %s", jobStream.Err()) } debugf(t, "got job event: %s %s %s", e.Type, e.ID, e.State) if e.Type == "web" && e.State == ct.JobStateUp { webJobs++ } case <-time.After(30 * time.Second): t.Fatal("timed out waiting for formation") } } // wait for the primary to indicate downstream replication sync debug(t, "waiting for primary to indicate downstream replication sync") sireniaClient := sc.NewClient(sireniaState.Primary.Addr) t.Assert(sireniaClient.WaitForReplSync(sireniaState.Sync, 1*time.Minute), c.IsNil) // connect to the db so we can test writes d.db.initDb(t, release, d) // check currently writeable d.db.assertWriteable(t, release, d) // check a deploy completes with expected cluster state changes release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) newRelease := release.ID deployment, err := client.CreateDeployment(app.ID, newRelease) t.Assert(err, c.IsNil) deployEvents := make(chan *ct.DeploymentEvent) deployStream, err := client.StreamDeployment(deployment, deployEvents) t.Assert(err, c.IsNil) defer deployStream.Close() // assertNextState checks that the next state received is in the remaining states // that were expected, so handles the fact that some states don't happen, but the // states that do happen are expected and in-order. assertNextState := func(remaining []expectedSireniaState) int { var state state.State loop: for { select { case s := <-stateCh: t.Assert(s.err, c.IsNil) if len(s.state.Async) < d.expectedAsyncs() { // we shouldn't usually receive states with less asyncs than // expected, but they can occur as an intermediate state between // two expected states (e.g. when a sync does a takeover at the // same time as a new async is started) so just ignore them. debug(t, "ignoring state with too few asyncs") continue } state = *s.state break loop case <-time.After(60 * time.Second): t.Fatal("timed out waiting for cluster state") } } if state.Primary == nil { t.Fatal("no primary configured") } log := func(format string, v ...interface{}) { debugf(t, "skipping expected state: %s", fmt.Sprintf(format, v...)) } outer: for i, expected := range remaining { if state.Primary.Meta["FLYNN_RELEASE_ID"] != expected.Primary { log("primary has incorrect release") continue } if state.Sync == nil { if expected.Sync == "" { return i } log("state has no sync node") continue } if state.Sync.Meta["FLYNN_RELEASE_ID"] != expected.Sync { log("sync has incorrect release") continue } if state.Async == nil { if expected.Async == nil { return i } log("state has no async nodes") continue } if len(state.Async) != len(expected.Async) { log("expected %d asyncs, got %d", len(expected.Async), len(state.Async)) continue } for i, release := range expected.Async { if state.Async[i].Meta["FLYNN_RELEASE_ID"] != release { log("async[%d] has incorrect release", i) continue outer } } return i } t.Fatal("unexpected state") return -1 } expected := d.expected(oldRelease, newRelease) var expectedIndex, newWebJobs int loop: for { select { case e, ok := <-deployEvents: if !ok { t.Fatal("unexpected close of deployment event stream") } switch e.Status { case "complete": break loop case "failed": t.Fatalf("deployment failed: %s", e.Error) } debugf(t, "got deployment event: %s %s", e.JobType, e.JobState) if e.JobState != ct.JobStateUp && e.JobState != ct.JobStateDown { continue } switch e.JobType { case procName: // move on if we have seen all the expected events if expectedIndex >= len(expected) { continue } skipped := assertNextState(expected[expectedIndex:]) expectedIndex += 1 + skipped case "web": if e.JobState == ct.JobStateUp && e.ReleaseID == newRelease { newWebJobs++ } } case <-time.After(2 * time.Minute): t.Fatal("timed out waiting for deployment") } } // check we have the correct number of new web jobs t.Assert(newWebJobs, c.Equals, d.webJobs) // check writeable now deploy is complete d.db.assertWriteable(t, release, d) }
func (s *DomainMigrationSuite) migrateDomain(t *c.C, dm *ct.DomainMigration) { debugf(t, "migrating domain from %s to %s", dm.OldDomain, dm.Domain) client := s.controllerClient(t) events := make(chan *ct.Event) stream, err := client.StreamEvents(controller.StreamEventsOptions{ ObjectTypes: []ct.EventType{ct.EventTypeDomainMigration}, }, events) t.Assert(err, c.IsNil) defer stream.Close() prevRouterRelease, err := client.GetAppRelease("router") t.Assert(err, c.IsNil) err = client.PutDomain(dm) t.Assert(err, c.IsNil) waitEvent := func(typ string, timeout time.Duration) (event ct.DomainMigrationEvent) { debugf(t, "waiting for %s domain migration event", typ) var e *ct.Event var ok bool select { case e, ok = <-events: if !ok { t.Fatal("event stream closed unexpectedly") } debugf(t, "got %s domain migration event", typ) case <-time.After(timeout): t.Fatalf("timed out waiting for %s domain migration event", typ) } t.Assert(e.Data, c.NotNil) t.Assert(json.Unmarshal(e.Data, &event), c.IsNil) return } // created event := waitEvent("initial", 2*time.Minute) t.Assert(event.Error, c.Equals, "") t.Assert(event.DomainMigration, c.NotNil) t.Assert(event.DomainMigration.ID, c.Equals, dm.ID) t.Assert(event.DomainMigration.OldDomain, c.Equals, dm.OldDomain) t.Assert(event.DomainMigration.Domain, c.Equals, dm.Domain) t.Assert(event.DomainMigration.TLSCert, c.IsNil) t.Assert(event.DomainMigration.OldTLSCert, c.NotNil) t.Assert(event.DomainMigration.CreatedAt, c.NotNil) t.Assert(event.DomainMigration.CreatedAt.Equal(*dm.CreatedAt), c.Equals, true) t.Assert(event.DomainMigration.FinishedAt, c.IsNil) // complete event = waitEvent("final", 3*time.Minute) t.Assert(event.Error, c.Equals, "") t.Assert(event.DomainMigration, c.NotNil) t.Assert(event.DomainMigration.ID, c.Equals, dm.ID) t.Assert(event.DomainMigration.OldDomain, c.Equals, dm.OldDomain) t.Assert(event.DomainMigration.Domain, c.Equals, dm.Domain) t.Assert(event.DomainMigration.TLSCert, c.NotNil) t.Assert(event.DomainMigration.OldTLSCert, c.NotNil) t.Assert(event.DomainMigration.CreatedAt, c.NotNil) t.Assert(event.DomainMigration.CreatedAt.Equal(*dm.CreatedAt), c.Equals, true) t.Assert(event.DomainMigration.FinishedAt, c.NotNil) cert := event.DomainMigration.TLSCert controllerRelease, err := client.GetAppRelease("controller") t.Assert(err, c.IsNil) t.Assert(controllerRelease.Env["DEFAULT_ROUTE_DOMAIN"], c.Equals, dm.Domain) t.Assert(controllerRelease.Env["CA_CERT"], c.Equals, cert.CACert) routerRelease, err := client.GetAppRelease("router") t.Assert(err, c.IsNil) t.Assert(routerRelease.Env["TLSCERT"], c.Equals, cert.Cert) t.Assert(routerRelease.Env["TLSKEY"], c.Not(c.Equals), "") t.Assert(routerRelease.Env["TLSKEY"], c.Not(c.Equals), prevRouterRelease.Env["TLSKEY"]) dashboardRelease, err := client.GetAppRelease("dashboard") t.Assert(err, c.IsNil) t.Assert(dashboardRelease.Env["DEFAULT_ROUTE_DOMAIN"], c.Equals, dm.Domain) t.Assert(dashboardRelease.Env["CONTROLLER_DOMAIN"], c.Equals, fmt.Sprintf("controller.%s", dm.Domain)) t.Assert(dashboardRelease.Env["URL"], c.Equals, fmt.Sprintf("dashboard.%s", dm.Domain)) t.Assert(dashboardRelease.Env["CA_CERT"], c.Equals, cert.CACert) var doPing func(string, int) doPing = func(component string, retriesRemaining int) { url := fmt.Sprintf("http://%s.%s/ping", component, dm.Domain) res, err := (&http.Client{}).Get(url) if (err != nil || res.StatusCode != 200) && retriesRemaining > 0 { time.Sleep(100 * time.Millisecond) doPing(component, retriesRemaining-1) return } t.Assert(err, c.IsNil) t.Assert(res.StatusCode, c.Equals, 200, c.Commentf("failed to ping %s", component)) } doPing("controller", 3) doPing("dashboard", 3) }
func (s *SchedulerSuite) TestScaleTags(t *c.C) { // ensure we have more than 1 host to test with hosts, err := s.clusterClient(t).Hosts() t.Assert(err, c.IsNil) if len(hosts) <= 1 { t.Skip("not enough hosts to test tagged based scheduling") } // watch service events so we can wait for tag changes events := make(chan *discoverd.Event) stream, err := s.discoverdClient(t).Service("flynn-host").Watch(events) t.Assert(err, c.IsNil) defer stream.Close() waitServiceEvent := func(kind discoverd.EventKind) *discoverd.Event { for { select { case event, ok := <-events: if !ok { t.Fatalf("service event stream closed unexpectedly: %s", stream.Err()) } if event.Kind == kind { return event } case <-time.After(10 * time.Second): t.Fatalf("timed out waiting for service %s event", kind) } } } // wait for the watch to be current before changing tags waitServiceEvent(discoverd.EventKindCurrent) updateTags := func(host *cluster.Host, tags map[string]string) { debugf(t, "setting host tags: %s => %v", host.ID(), tags) t.Assert(host.UpdateTags(tags), c.IsNil) event := waitServiceEvent(discoverd.EventKindUpdate) t.Assert(event.Instance.Meta["id"], c.Equals, host.ID()) for key, val := range tags { t.Assert(event.Instance.Meta["tag:"+key], c.Equals, val) } } // create an app with a tagged process and watch job events app, release := s.createApp(t) formation := &ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Tags: map[string]map[string]string{"printer": {"active": "true"}}, } client := s.controllerClient(t) watcher, err := client.WatchJobEvents(app.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() // add tag to host 1 host1 := hosts[0] updateTags(host1, map[string]string{"active": "true"}) // start jobs debug(t, "scaling printer=2") formation.Processes = map[string]int{"printer": 2} t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil) assertHostJobCounts := func(expected map[string]int) { jobs, err := client.JobList(app.ID) t.Assert(err, c.IsNil) actual := make(map[string]int) for _, job := range jobs { if job.State == ct.JobStateUp { actual[job.HostID]++ } } t.Assert(actual, c.DeepEquals, expected) } // check all jobs on host 1 assertHostJobCounts(map[string]int{host1.ID(): 2}) // add tag to host 2 host2 := hosts[1] updateTags(host2, map[string]string{"active": "true"}) // scale up debug(t, "scaling printer=4") formation.Processes["printer"] = 4 t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(2)}, scaleTimeout, nil), c.IsNil) // check jobs distributed across hosts 1 and 2 assertHostJobCounts(map[string]int{host1.ID(): 2, host2.ID(): 2}) // remove tag from host 2 updateTags(host2, map[string]string{"active": ""}) // check jobs are moved to host1 jobEvents := ct.JobEvents{"printer": map[ct.JobState]int{ ct.JobStateDown: 2, ct.JobStateUp: 2, }} t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host1.ID(): 4}) // remove tag from host 1 updateTags(host1, map[string]string{"active": ""}) assertStateCounts := func(expected map[ct.JobState]int) { jobs, err := client.JobList(app.ID) t.Assert(err, c.IsNil) actual := make(map[ct.JobState]int) for _, job := range jobs { actual[job.State]++ } t.Assert(actual, c.DeepEquals, expected) } // check 4 pending jobs, rest are stopped t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil) assertStateCounts(map[ct.JobState]int{ct.JobStatePending: 4, ct.JobStateDown: 6}) // re-add tag to host 1 updateTags(host1, map[string]string{"active": "true"}) // check pending jobs are started on host 1 t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobUpEvents(4)}, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host1.ID(): 4}) assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 6}) // add different tag to host 2 updateTags(host2, map[string]string{"disk": "ssd"}) // update formation tags, check jobs are moved to host 2 debug(t, "updating formation tags to disk=ssd") formation.Tags["printer"] = map[string]string{"disk": "ssd"} t.Assert(client.PutFormation(formation), c.IsNil) jobEvents = ct.JobEvents{"printer": map[ct.JobState]int{ ct.JobStateDown: 4, ct.JobStateUp: 4, }} t.Assert(watcher.WaitFor(jobEvents, scaleTimeout, nil), c.IsNil) assertHostJobCounts(map[string]int{host2.ID(): 4}) assertStateCounts(map[ct.JobState]int{ct.JobStateUp: 4, ct.JobStateDown: 10}) // scale down stops the jobs debug(t, "scaling printer=0") formation.Processes = nil t.Assert(client.PutFormation(formation), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"printer": ct.JobDownEvents(4)}, scaleTimeout, nil), c.IsNil) assertStateCounts(map[ct.JobState]int{ct.JobStateDown: 14}) }
func (s *ReleaseSuite) TestReleaseImages(t *c.C) { if testCluster == nil { t.Skip("cannot boot release cluster") } // stream script output to t.Log logReader, logWriter := io.Pipe() defer logWriter.Close() go func() { buf := bufio.NewReader(logReader) for { line, err := buf.ReadString('\n') if err != nil { return } debug(t, line[0:len(line)-1]) } }() // boot the release cluster, release components to a blobstore and output the new version.json releaseCluster := s.addReleaseHosts(t) buildHost := releaseCluster.Instances[0] var versionJSON bytes.Buffer t.Assert(buildHost.Run("bash -ex", &tc.Streams{Stdin: releaseScript, Stdout: &versionJSON, Stderr: logWriter}), c.IsNil) var versions map[string]string t.Assert(json.Unmarshal(versionJSON.Bytes(), &versions), c.IsNil) // install Flynn from the blobstore on the vanilla host blobstore := struct{ Blobstore string }{buildHost.IP + ":8080"} installHost := releaseCluster.Instances[3] var script bytes.Buffer installScript.Execute(&script, blobstore) var installOutput bytes.Buffer out := io.MultiWriter(logWriter, &installOutput) t.Assert(installHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil) // check the flynn-host version is correct var hostVersion bytes.Buffer t.Assert(installHost.Run("flynn-host version", &tc.Streams{Stdout: &hostVersion}), c.IsNil) t.Assert(strings.TrimSpace(hostVersion.String()), c.Equals, "v20150131.0-test") // check rebuilt images were downloaded for name, id := range versions { expected := fmt.Sprintf("%s image %s downloaded", name, id) if !strings.Contains(installOutput.String(), expected) { t.Fatalf(`expected install to download %s %s`, name, id) } } // run a cluster update from the blobstore updateHost := releaseCluster.Instances[1] script = bytes.Buffer{} updateScript.Execute(&script, blobstore) var updateOutput bytes.Buffer out = io.MultiWriter(logWriter, &updateOutput) t.Assert(updateHost.Run("bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil) // check rebuilt images were downloaded for name := range versions { for _, host := range releaseCluster.Instances[0:2] { expected := fmt.Sprintf(`"pulled image" host=%s name=%s`, host.ID, name) if !strings.Contains(updateOutput.String(), expected) { t.Fatalf(`expected update to download %s on host %s`, name, host.ID) } } } // create a controller client for the new cluster pin, err := base64.StdEncoding.DecodeString(releaseCluster.ControllerPin) t.Assert(err, c.IsNil) client, err := controller.NewClientWithConfig( "https://"+buildHost.IP, releaseCluster.ControllerKey, controller.Config{Pin: pin, Domain: releaseCluster.ControllerDomain}, ) t.Assert(err, c.IsNil) // check system apps were deployed correctly for _, app := range updater.SystemApps { image := "flynn/" + app if app == "gitreceive" { image = "flynn/receiver" } debugf(t, "checking new %s release is using image %s", app, versions[image]) expected := fmt.Sprintf(`"finished deploy of system app" name=%s`, app) if !strings.Contains(updateOutput.String(), expected) { t.Fatalf(`expected update to deploy %s`, app) } release, err := client.GetAppRelease(app) t.Assert(err, c.IsNil) debugf(t, "new %s release ID: %s", app, release.ID) artifact, err := client.GetArtifact(release.ArtifactID) t.Assert(err, c.IsNil) debugf(t, "new %s artifact: %+v", app, artifact) uri, err := url.Parse(artifact.URI) t.Assert(err, c.IsNil) t.Assert(uri.Query().Get("id"), c.Equals, versions[image]) } }
func (s *ReleaseSuite) TestReleaseImages(t *c.C) { if testCluster == nil { t.Skip("cannot boot release cluster") } // stream script output to t.Log logReader, logWriter := io.Pipe() defer logWriter.Close() go func() { buf := bufio.NewReader(logReader) for { line, err := buf.ReadString('\n') if err != nil { return } debug(t, line[0:len(line)-1]) } }() // boot the release cluster, release components to a blobstore and output the new version.json releaseCluster := s.addReleaseHosts(t) buildHost := releaseCluster.Instances[0] var versionJSON bytes.Buffer t.Assert(buildHost.Run("bash -ex", &tc.Streams{Stdin: releaseScript, Stdout: &versionJSON, Stderr: logWriter}), c.IsNil) var versions map[string]string t.Assert(json.Unmarshal(versionJSON.Bytes(), &versions), c.IsNil) // install Flynn from the blobstore on the vanilla host blobstore := struct{ Blobstore string }{buildHost.IP + ":8080"} installHost := releaseCluster.Instances[3] var script bytes.Buffer installScript.Execute(&script, blobstore) var installOutput bytes.Buffer out := io.MultiWriter(logWriter, &installOutput) t.Assert(installHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil) // check the flynn-host version is correct var hostVersion bytes.Buffer t.Assert(installHost.Run("flynn-host version", &tc.Streams{Stdout: &hostVersion}), c.IsNil) t.Assert(strings.TrimSpace(hostVersion.String()), c.Equals, "v20150131.0-test") // check rebuilt images were downloaded for name, id := range versions { expected := fmt.Sprintf("%s image %s downloaded", name, id) if !strings.Contains(installOutput.String(), expected) { t.Fatalf(`expected install to download %s %s`, name, id) } } // installing on an instance with Flynn running should not fail script.Reset() installScript.Execute(&script, blobstore) t.Assert(buildHost.Run("sudo bash -ex", &tc.Streams{Stdin: &script, Stdout: logWriter, Stderr: logWriter}), c.IsNil) // create a controller client for the release cluster pin, err := base64.StdEncoding.DecodeString(releaseCluster.ControllerPin) t.Assert(err, c.IsNil) client, err := controller.NewClientWithConfig( "https://"+buildHost.IP, releaseCluster.ControllerKey, controller.Config{Pin: pin, Domain: releaseCluster.ControllerDomain}, ) t.Assert(err, c.IsNil) // deploy a slug based app slugApp := &ct.App{} t.Assert(client.CreateApp(slugApp), c.IsNil) gitreceive, err := client.GetAppRelease("gitreceive") t.Assert(err, c.IsNil) imageArtifact := &ct.Artifact{Type: host.ArtifactTypeDocker, URI: gitreceive.Env["SLUGRUNNER_IMAGE_URI"]} t.Assert(client.CreateArtifact(imageArtifact), c.IsNil) slugArtifact := &ct.Artifact{Type: host.ArtifactTypeFile, URI: fmt.Sprintf("http://%s:8080/slug.tgz", buildHost.IP)} t.Assert(client.CreateArtifact(slugArtifact), c.IsNil) release := &ct.Release{ ArtifactIDs: []string{imageArtifact.ID, slugArtifact.ID}, Processes: map[string]ct.ProcessType{"web": {Cmd: []string{"bin/http"}}}, } t.Assert(client.CreateRelease(release), c.IsNil) t.Assert(client.SetAppRelease(slugApp.ID, release.ID), c.IsNil) watcher, err := client.WatchJobEvents(slugApp.ID, release.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: slugApp.ID, ReleaseID: release.ID, Processes: map[string]int{"web": 1}, }), c.IsNil) err = watcher.WaitFor(ct.JobEvents{"web": {ct.JobStateUp: 1}}, scaleTimeout, nil) t.Assert(err, c.IsNil) // run a cluster update from the blobstore updateHost := releaseCluster.Instances[1] script.Reset() updateScript.Execute(&script, blobstore) var updateOutput bytes.Buffer out = io.MultiWriter(logWriter, &updateOutput) t.Assert(updateHost.Run("bash -ex", &tc.Streams{Stdin: &script, Stdout: out, Stderr: out}), c.IsNil) // check rebuilt images were downloaded for name := range versions { for _, host := range releaseCluster.Instances[0:2] { expected := fmt.Sprintf(`"pulled image" host=%s name=%s`, host.ID, name) if !strings.Contains(updateOutput.String(), expected) { t.Fatalf(`expected update to download %s on host %s`, name, host.ID) } } } assertImage := func(uri, image string) { u, err := url.Parse(uri) t.Assert(err, c.IsNil) t.Assert(u.Query().Get("id"), c.Equals, versions[image]) } // check system apps were deployed correctly for _, app := range updater.SystemApps { if app.ImageOnly { continue // we don't deploy ImageOnly updates } if app.Image == "" { app.Image = "flynn/" + app.Name } debugf(t, "checking new %s release is using image %s", app.Name, versions[app.Image]) expected := fmt.Sprintf(`"finished deploy of system app" name=%s`, app.Name) if !strings.Contains(updateOutput.String(), expected) { t.Fatalf(`expected update to deploy %s`, app.Name) } release, err := client.GetAppRelease(app.Name) t.Assert(err, c.IsNil) debugf(t, "new %s release ID: %s", app.Name, release.ID) artifact, err := client.GetArtifact(release.ImageArtifactID()) t.Assert(err, c.IsNil) debugf(t, "new %s artifact: %+v", app.Name, artifact) assertImage(artifact.URI, app.Image) } // check gitreceive has the correct slug env vars gitreceive, err = client.GetAppRelease("gitreceive") t.Assert(err, c.IsNil) assertImage(gitreceive.Env["SLUGBUILDER_IMAGE_URI"], "flynn/slugbuilder") assertImage(gitreceive.Env["SLUGRUNNER_IMAGE_URI"], "flynn/slugrunner") // check slug based app was deployed correctly release, err = client.GetAppRelease(slugApp.Name) t.Assert(err, c.IsNil) imageArtifact, err = client.GetArtifact(release.ImageArtifactID()) t.Assert(err, c.IsNil) assertImage(imageArtifact.URI, "flynn/slugrunner") }
func (s *PostgresSuite) testDeploy(t *c.C, d *pgDeploy) { // create postgres app client := s.controllerClient(t) app := &ct.App{Name: d.name, Strategy: "postgres"} t.Assert(client.CreateApp(app), c.IsNil) // copy release from default postgres app release, err := client.GetAppRelease("postgres") t.Assert(err, c.IsNil) release.ID = "" proc := release.Processes["postgres"] delete(proc.Env, "SINGLETON") proc.Env["FLYNN_POSTGRES"] = d.name proc.Service = d.name release.Processes["postgres"] = proc t.Assert(client.CreateRelease(release), c.IsNil) t.Assert(client.SetAppRelease(app.ID, release.ID), c.IsNil) oldRelease := release.ID // create formation discEvents := make(chan *discoverd.Event) discStream, err := s.discoverdClient(t).Service(d.name).Watch(discEvents) t.Assert(err, c.IsNil) defer discStream.Close() jobEvents := make(chan *ct.Job) jobStream, err := client.StreamJobEvents(d.name, jobEvents) t.Assert(err, c.IsNil) defer jobStream.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"postgres": d.pgJobs, "web": d.webJobs}, }), c.IsNil) // watch cluster state changes type stateChange struct { state *state.State err error } stateCh := make(chan stateChange) go func() { for event := range discEvents { if event.Kind != discoverd.EventKindServiceMeta { continue } var state state.State if err := json.Unmarshal(event.ServiceMeta.Data, &state); err != nil { stateCh <- stateChange{err: err} return } primary := "" if state.Primary != nil { primary = state.Primary.Addr } sync := "" if state.Sync != nil { sync = state.Sync.Addr } var async []string for _, a := range state.Async { async = append(async, a.Addr) } debugf(t, "got pg cluster state: index=%d primary=%s sync=%s async=%s", event.ServiceMeta.Index, primary, sync, strings.Join(async, ",")) stateCh <- stateChange{state: &state} } }() // wait for correct cluster state and number of web processes var pgState state.State var webJobs int ready := func() bool { if webJobs != d.webJobs { return false } if pgState.Primary == nil { return false } if d.pgJobs > 1 && pgState.Sync == nil { return false } if d.pgJobs > 2 && len(pgState.Async) != d.pgJobs-2 { return false } return true } for { if ready() { break } select { case s := <-stateCh: t.Assert(s.err, c.IsNil) pgState = *s.state case e, ok := <-jobEvents: if !ok { t.Fatalf("job event stream closed: %s", jobStream.Err()) } debugf(t, "got job event: %s %s %s", e.Type, e.ID, e.State) if e.Type == "web" && e.State == "up" { webJobs++ } case <-time.After(30 * time.Second): t.Fatal("timed out waiting for postgres formation") } } // connect to the db so we can test writes db := postgres.Wait(d.name, fmt.Sprintf("dbname=postgres user=flynn password=%s", release.Env["PGPASSWORD"])) dbname := "deploy-test" t.Assert(db.Exec(fmt.Sprintf(`CREATE DATABASE "%s" WITH OWNER = "flynn"`, dbname)), c.IsNil) db.Close() db, err = postgres.Open(d.name, fmt.Sprintf("dbname=%s user=flynn password=%s", dbname, release.Env["PGPASSWORD"])) t.Assert(err, c.IsNil) defer db.Close() t.Assert(db.Exec(`CREATE TABLE deploy_test ( data text)`), c.IsNil) assertWriteable := func() { debug(t, "writing to postgres database") t.Assert(db.Exec(`INSERT INTO deploy_test (data) VALUES ('data')`), c.IsNil) } // check currently writeable assertWriteable() // check a deploy completes with expected cluster state changes release.ID = "" t.Assert(client.CreateRelease(release), c.IsNil) newRelease := release.ID deployment, err := client.CreateDeployment(app.ID, newRelease) t.Assert(err, c.IsNil) deployEvents := make(chan *ct.DeploymentEvent) deployStream, err := client.StreamDeployment(deployment, deployEvents) t.Assert(err, c.IsNil) defer deployStream.Close() // assertNextState checks that the next state received is in the remaining states // that were expected, so handles the fact that some states don't happen, but the // states that do happen are expected and in-order. assertNextState := func(remaining []expectedPgState) int { var state state.State loop: for { select { case s := <-stateCh: t.Assert(s.err, c.IsNil) if len(s.state.Async) < d.expectedAsyncs() { // we shouldn't usually receive states with less asyncs than // expected, but they can occur as an intermediate state between // two expected states (e.g. when a sync does a takeover at the // same time as a new async is started) so just ignore them. debug(t, "ignoring state with too few asyncs") continue } state = *s.state break loop case <-time.After(60 * time.Second): t.Fatal("timed out waiting for postgres cluster state") } } if state.Primary == nil { t.Fatal("no primary configured") } log := func(format string, v ...interface{}) { debugf(t, "skipping expected state: %s", fmt.Sprintf(format, v...)) } outer: for i, expected := range remaining { if state.Primary.Meta["FLYNN_RELEASE_ID"] != expected.Primary { log("primary has incorrect release") continue } if state.Sync == nil { if expected.Sync == "" { return i } log("state has no sync node") continue } if state.Sync.Meta["FLYNN_RELEASE_ID"] != expected.Sync { log("sync has incorrect release") continue } if state.Async == nil { if expected.Async == nil { return i } log("state has no async nodes") continue } if len(state.Async) != len(expected.Async) { log("expected %d asyncs, got %d", len(expected.Async), len(state.Async)) continue } for i, release := range expected.Async { if state.Async[i].Meta["FLYNN_RELEASE_ID"] != release { log("async[%d] has incorrect release", i) continue outer } } return i } t.Fatal("unexpected pg state") return -1 } expected := d.expected(oldRelease, newRelease) var expectedIndex, newWebJobs int loop: for { select { case e, ok := <-deployEvents: if !ok { t.Fatal("unexpected close of deployment event stream") } switch e.Status { case "complete": break loop case "failed": t.Fatalf("deployment failed: %s", e.Error) } debugf(t, "got deployment event: %s %s", e.JobType, e.JobState) if e.JobState != "up" && e.JobState != "down" { continue } switch e.JobType { case "postgres": // move on if we have seen all the expected events if expectedIndex >= len(expected) { continue } skipped := assertNextState(expected[expectedIndex:]) expectedIndex += 1 + skipped case "web": if e.JobState == "up" && e.ReleaseID == newRelease { newWebJobs++ } } case <-time.After(2 * time.Minute): t.Fatal("timed out waiting for deployment") } } // check we have the correct number of new web jobs t.Assert(newWebJobs, c.Equals, d.webJobs) // check writeable now deploy is complete assertWriteable() }
func (s *CLISuite) TestSlugReleaseGarbageCollection(t *c.C) { client := s.controllerClient(t) // create app with gc.max_inactive_slug_releases=3 maxInactiveSlugReleases := 3 app := &ct.App{Meta: map[string]string{"gc.max_inactive_slug_releases": strconv.Itoa(maxInactiveSlugReleases)}} t.Assert(client.CreateApp(app), c.IsNil) // create an image artifact imageArtifact := &ct.Artifact{Type: host.ArtifactTypeDocker, URI: imageURIs["test-apps"]} t.Assert(client.CreateArtifact(imageArtifact), c.IsNil) // create 5 slug artifacts var slug bytes.Buffer gz := gzip.NewWriter(&slug) t.Assert(tar.NewWriter(gz).Close(), c.IsNil) t.Assert(gz.Close(), c.IsNil) slugs := []string{ "http://blobstore.discoverd/1/slug.tgz", "http://blobstore.discoverd/2/slug.tgz", "http://blobstore.discoverd/3/slug.tgz", "http://blobstore.discoverd/4/slug.tgz", "http://blobstore.discoverd/5/slug.tgz", } slugArtifacts := make([]*ct.Artifact, len(slugs)) for i, uri := range slugs { req, err := http.NewRequest("PUT", uri, bytes.NewReader(slug.Bytes())) t.Assert(err, c.IsNil) res, err := http.DefaultClient.Do(req) t.Assert(err, c.IsNil) res.Body.Close() t.Assert(res.StatusCode, c.Equals, http.StatusOK) artifact := &ct.Artifact{ Type: host.ArtifactTypeFile, URI: uri, Meta: map[string]string{"blobstore": "true"}, } t.Assert(client.CreateArtifact(artifact), c.IsNil) slugArtifacts[i] = artifact } // create 6 releases, the second being scaled up and having the // same slug as the third (so prevents the slug being deleted) releases := make([]*ct.Release, 6) for i, r := range []struct { slug *ct.Artifact active bool }{ {slugArtifacts[0], false}, {slugArtifacts[1], true}, {slugArtifacts[1], false}, {slugArtifacts[2], false}, {slugArtifacts[3], false}, {slugArtifacts[4], false}, } { release := &ct.Release{ ArtifactIDs: []string{imageArtifact.ID, r.slug.ID}, Processes: map[string]ct.ProcessType{ "app": {Cmd: []string{"/bin/pingserv"}, Ports: []ct.Port{{Proto: "tcp"}}}, }, } t.Assert(client.CreateRelease(release), c.IsNil) procs := map[string]int{"app": 0} if r.active { procs["app"] = 1 } t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: procs, }), c.IsNil) releases[i] = release } // scale the last release so we can deploy it lastRelease := releases[len(releases)-1] watcher, err := client.WatchJobEvents(app.ID, lastRelease.ID) t.Assert(err, c.IsNil) defer watcher.Close() t.Assert(client.PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: lastRelease.ID, Processes: map[string]int{"app": 1}, }), c.IsNil) t.Assert(watcher.WaitFor(ct.JobEvents{"app": ct.JobUpEvents(1)}, scaleTimeout, nil), c.IsNil) t.Assert(client.SetAppRelease(app.ID, lastRelease.ID), c.IsNil) // subscribe to garbage collection events gcEvents := make(chan *ct.Event) stream, err := client.StreamEvents(ct.StreamEventsOptions{ AppID: app.ID, ObjectTypes: []ct.EventType{ct.EventTypeAppGarbageCollection}, }, gcEvents) t.Assert(err, c.IsNil) defer stream.Close() // deploy a new release with the same slug as the last release newRelease := *lastRelease newRelease.ID = "" t.Assert(client.CreateRelease(&newRelease), c.IsNil) t.Assert(client.DeployAppRelease(app.ID, newRelease.ID), c.IsNil) // wait for garbage collection select { case event, ok := <-gcEvents: if !ok { t.Fatalf("event stream closed unexpectedly: %s", stream.Err()) } var e ct.AppGarbageCollectionEvent t.Assert(json.Unmarshal(event.Data, &e), c.IsNil) if e.Error != "" { t.Fatalf("garbage collection failed: %s", e.Error) } case <-time.After(60 * time.Second): t.Fatal("timed out waiting for garbage collection") } // check we have 4 distinct slug releases (so 5 in total, only 3 are // inactive) list, err := client.AppReleaseList(app.ID) t.Assert(err, c.IsNil) t.Assert(list, c.HasLen, maxInactiveSlugReleases+2) distinctSlugs := make(map[string]struct{}, len(list)) for _, release := range list { files := release.FileArtifactIDs() t.Assert(files, c.HasLen, 1) distinctSlugs[files[0]] = struct{}{} } t.Assert(distinctSlugs, c.HasLen, maxInactiveSlugReleases+1) // check the first and third releases got deleted, but the rest remain assertDeleted := func(release *ct.Release, deleted bool) { _, err := client.GetRelease(release.ID) if deleted { t.Assert(err, c.Equals, controller.ErrNotFound) } else { t.Assert(err, c.IsNil) } } assertDeleted(releases[0], true) assertDeleted(releases[1], false) assertDeleted(releases[2], true) assertDeleted(releases[3], false) assertDeleted(releases[4], false) assertDeleted(releases[5], false) assertDeleted(&newRelease, false) // check the first slug got deleted, but the rest remain s.assertURI(t, slugs[0], http.StatusNotFound) for i := 1; i < len(slugs); i++ { s.assertURI(t, slugs[i], http.StatusOK) } }