func main() { grohl.AddContext("app", "controller-scheduler") grohl.Log(grohl.Data{"at": "start"}) cc, err := controller.NewClient("", os.Getenv("AUTH_KEY")) if err != nil { log.Fatal(err) } cl, err := cluster.NewClient() if err != nil { log.Fatal(err) } c := newContext(cc, cl) grohl.Log(grohl.Data{"at": "leaderwait"}) leaderWait, err := discoverd.RegisterAndStandby("flynn-controller-scheduler", ":"+os.Getenv("PORT"), nil) if err != nil { log.Fatal(err) } <-leaderWait grohl.Log(grohl.Data{"at": "leader"}) // TODO: periodic full cluster sync for anti-entropy c.watchFormations(nil, nil) }
func main() { port := os.Getenv("PORT") if port == "" { port = "3000" } addr := ":" + port db, err := postgres.Open("", "") if err != nil { log.Fatal(err) } if err := migrateDB(db.DB); err != nil { log.Fatal(err) } cc, err := cluster.NewClient() if err != nil { log.Fatal(err) } sc, err := strowgerc.New() if err != nil { log.Fatal(err) } if err := discoverd.Register("flynn-controller", addr); err != nil { log.Fatal(err) } handler, _ := appHandler(handlerConfig{db: db, cc: cc, sc: sc, dc: discoverd.DefaultClient, key: os.Getenv("AUTH_KEY")}) log.Fatal(http.ListenAndServe(addr, handler)) }
func Run(name string, args []string) error { argv := make([]string, 1, 1+len(args)) argv[0] = name argv = append(argv, args...) cmd, ok := commands[name] if !ok { return ErrInvalidCommand } parsedArgs, err := docopt.Parse(cmd.usage, argv, true, "", strings.Contains(cmd.usage, "[--]")) if err != nil { return err } switch f := cmd.f.(type) { case func(*docopt.Args, *cluster.Client) error: return f(parsedArgs, cluster.NewClient()) case func(*docopt.Args): f(parsedArgs) return nil case func(*docopt.Args) error: return f(parsedArgs) case func() error: return f() case func(): f() return nil } return fmt.Errorf("unexpected command type %T", cmd.f) }
func main() { grohl.AddContext("app", "controller-scheduler") grohl.Log(grohl.Data{"at": "start"}) if period := os.Getenv("BACKOFF_PERIOD"); period != "" { var err error backoffPeriod, err = time.ParseDuration(period) if err != nil { log.Fatal(err) } grohl.Log(grohl.Data{"at": "backoff_period", "period": backoffPeriod.String()}) } cc, err := controller.NewClient("", os.Getenv("AUTH_KEY")) if err != nil { log.Fatal(err) } cl, err := cluster.NewClient() if err != nil { log.Fatal(err) } c := newContext(cc, cl) grohl.Log(grohl.Data{"at": "leaderwait"}) leaderWait, err := discoverd.RegisterAndStandby("flynn-controller-scheduler", ":"+os.Getenv("PORT"), nil) if err != nil { log.Fatal(err) } <-leaderWait grohl.Log(grohl.Data{"at": "leader"}) // TODO: periodic full cluster sync for anti-entropy c.watchFormations() }
func runListHosts(args *docopt.Args) error { clusterClient := cluster.NewClient() hosts, err := clusterClient.Hosts() if err != nil { return err } if len(hosts) == 0 { return errors.New("no hosts found") } peers, _ := discoverd.DefaultClient.RaftPeers() leader, _ := discoverd.DefaultClient.RaftLeader() w := tabwriter.NewWriter(os.Stdout, 1, 2, 2, ' ', 0) defer w.Flush() listRec(w, "ID", "ADDR", "RAFT STATUS") for _, h := range hosts { // If we have the list of raft peers augument the output // with each hosts raft proxy/peer status. raftStatus := "" if len(peers) > 0 { raftStatus = hostRaftStatus(h, peers, leader.Host) } listRec(w, h.ID(), h.Addr(), raftStatus) } return nil }
func captureJobs(gist *Gist, env bool) error { client := cluster.NewClient() jobs, err := jobList(client, true) if err != nil { return err } var buf bytes.Buffer printJobs(jobs, &buf) gist.AddFile("1-jobs.log", buf.String()) for _, job := range jobs { var name string if app, ok := job.Job.Metadata["flynn-controller.app_name"]; ok { name += app + "-" } if typ, ok := job.Job.Metadata["flynn-controller.type"]; ok { name += typ + "-" } name += job.Job.ID + ".log" var content bytes.Buffer printJobDesc(&job, &content, env) fmt.Fprint(&content, "\n\n***** ***** ***** ***** ***** ***** ***** ***** ***** *****\n\n") getLog(job.HostID, job.Job.ID, client, false, true, &content, &content) gist.AddFile(name, content.String()) } return nil }
func init() { log.SetFlags(0) var err error clusterc, err = cluster.NewClient() if err != nil { log.Fatalln("Error connecting to cluster leader:", err) } }
func (s *State) ClusterClient() (*cluster.Client, error) { if s.clusterc == nil { cc, err := cluster.NewClient() if err != nil { return nil, err } s.clusterc = cc } return s.clusterc, nil }
func main() { defer shutdown.Exit() grohl.AddContext("app", "controller-scheduler") grohl.Log(grohl.Data{"at": "start"}) go startHTTPServer() if period := os.Getenv("BACKOFF_PERIOD"); period != "" { var err error backoffPeriod, err = time.ParseDuration(period) if err != nil { shutdown.Fatal(err) } grohl.Log(grohl.Data{"at": "backoff_period", "period": backoffPeriod.String()}) } cc, err := controller.NewClient("", os.Getenv("AUTH_KEY")) if err != nil { shutdown.Fatal(err) } c := newContext(cc, cluster.NewClient()) c.watchHosts() grohl.Log(grohl.Data{"at": "leaderwait"}) hb, err := discoverd.AddServiceAndRegister("controller-scheduler", ":"+os.Getenv("PORT")) if err != nil { shutdown.Fatal(err) } shutdown.BeforeExit(func() { hb.Close() }) leaders := make(chan *discoverd.Instance) stream, err := discoverd.NewService("controller-scheduler").Leaders(leaders) if err != nil { shutdown.Fatal(err) } for leader := range leaders { if leader.Addr == hb.Addr() { break } } if err := stream.Err(); err != nil { // TODO: handle discoverd errors shutdown.Fatal(err) } stream.Close() // TODO: handle demotion grohl.Log(grohl.Data{"at": "leader"}) // TODO: periodic full cluster sync for anti-entropy c.watchFormations() }
func (h *Host) ServeHTTP() { r := httprouter.New() r.POST("/attach", (&attachHandler{state: h.state, backend: h.backend}).ServeHTTP) jobAPI := &jobAPI{host: h} jobAPI.RegisterRoutes(r) volAPI := volumeapi.NewHTTPAPI(cluster.NewClient(), h.vman) volAPI.RegisterRoutes(r) go http.Serve(h.listener, httphelper.ContextInjector("host", httphelper.NewRequestLogger(r))) }
func (h *Host) ServeHTTP() { r := httprouter.New() r.POST("/attach", newAttachHandler(h.state, h.backend, h.log).ServeHTTP) jobAPI := &jobAPI{ host: h, addJobRateLimitBucket: NewRateLimitBucket(h.maxJobConcurrency), } jobAPI.RegisterRoutes(r) volAPI := volumeapi.NewHTTPAPI(cluster.NewClient(), h.vman) volAPI.RegisterRoutes(r) go http.Serve(h.listener, httphelper.ContextInjector("host", httphelper.NewRequestLogger(r))) }
func main() { port := os.Getenv("PORT") if port == "" { port = "3000" } addr := ":" + port if seed := os.Getenv("NAME_SEED"); seed != "" { s, err := hex.DecodeString(seed) if err != nil { log.Fatalln("error decoding NAME_SEED:", err) } name.SetSeed(s) } db, err := postgres.Open("", "") if err != nil { log.Fatal(err) } if err := migrateDB(db.DB); err != nil { log.Fatal(err) } cc, err := cluster.NewClient() if err != nil { log.Fatal(err) } sc, err := routerc.New() if err != nil { log.Fatal(err) } if err := discoverd.Register("flynn-controller", addr); err != nil { log.Fatal(err) } shutdown.BeforeExit(func() { discoverd.Unregister("flynn-controller", addr) }) handler, _ := appHandler(handlerConfig{db: db, cc: cc, sc: sc, dc: discoverd.DefaultClient, key: os.Getenv("AUTH_KEY")}) log.Fatal(http.ListenAndServe(addr, handler)) }
func (m *Monitor) Run() { log := monitorLogger.New("fn", "Run") log.Info("waiting for discoverd") m.waitDiscoverd() log.Info("waiting for raft leader") m.waitRaftLeader() // we can connect the leader election wrapper now m.discoverd = newDiscoverdWrapper(m.addr+":1113", m.logger) // connect cluster client now that discoverd is up. m.c = cluster.NewClient() m.monitorSvc = discoverd.NewService("cluster-monitor") log.Info("waiting for monitor service to be enabled for this cluster") m.waitEnabled() log.Info("registering cluster-monitor") m.waitRegister() leaderCh := m.discoverd.LeaderCh() ticker := time.NewTicker(checkInterval) log.Info("starting monitor loop") for { var isLeader bool select { case <-m.shutdownCh: log.Info("shutting down monitor") return case isLeader = <-leaderCh: m.isLeader = isLeader continue default: } select { case <-ticker.C: if m.isLeader { m.checkCluster() } } } }
func runListHosts(args *docopt.Args) error { clusterClient := cluster.NewClient() hosts, err := clusterClient.Hosts() if err != nil { return err } if len(hosts) == 0 { return errors.New("no hosts found") } w := tabwriter.NewWriter(os.Stdout, 1, 2, 2, ' ', 0) defer w.Flush() listRec(w, "ID", "ADDR") for _, h := range hosts { listRec(w, h.ID(), h.Addr()) } return nil }
func Run(name string, args []string) error { argv := make([]string, 1, 1+len(args)) argv[0] = name argv = append(argv, args...) cmd, ok := commands[name] if !ok { return fmt.Errorf("%s is not a valid command", name) } parsedArgs, err := docopt.Parse(cmd.usage, argv, true, "", false) if err != nil { return err } switch f := cmd.f.(type) { case func(*docopt.Args, *cluster.Client) error: client, err := cluster.NewClient() if err != nil { return err } defer client.Close() return f(parsedArgs, client) case func(*docopt.Args): f(parsedArgs) return nil case func(*docopt.Args) error: return f(parsedArgs) case func() error: return f() case func(): f() return nil } return fmt.Errorf("unexpected command type %T", cmd.f) }
func (c *Cmd) Start() error { if c.started { return errors.New("exec: already started") } c.started = true if c.cluster == nil { var err error c.cluster, err = cluster.NewClient() if err != nil { return err } c.closeCluster = true } hosts, err := c.cluster.ListHosts() if err != nil { return err } if c.HostID == "" { // TODO: check if this is actually random for c.HostID = range hosts { break } } if c.JobID == "" { c.JobID = cluster.RandomJobID("") } job := &host.Job{ ID: c.JobID, Config: &docker.Config{ Image: c.Image, Cmd: c.Cmd, Tty: c.TTY, Env: formatEnv(c.Env), }, Attributes: c.Attrs, } if c.Stdout != nil || c.stdoutPipe != nil { job.Config.AttachStdout = true } if c.Stderr != nil || c.stderrPipe != nil { job.Config.AttachStderr = true } if c.Stdin != nil || c.stdinPipe != nil { job.Config.AttachStdin = true job.Config.OpenStdin = true job.Config.StdinOnce = true } c.host, err = c.cluster.DialHost(c.HostID) if err != nil { return err } // subscribe to host events ch := make(chan *host.Event) stream := c.host.StreamEvents(job.ID, ch) go func() { for event := range ch { if event.Event == "stop" || event.Event == "error" { close(c.done) return } } c.streamErr = stream.Err() close(c.done) // TODO: handle disconnections }() var rwc cluster.ReadWriteCloser var attachWait func() error if c.Stdout != nil || c.Stderr != nil || c.Stdin != nil || c.stdoutPipe != nil || c.stderrPipe != nil || c.stdinPipe != nil { req := &host.AttachReq{ JobID: job.ID, Height: c.TermHeight, Width: c.TermWidth, Flags: host.AttachFlagStream, } if job.Config.AttachStdout { req.Flags |= host.AttachFlagStdout } if job.Config.AttachStderr { req.Flags |= host.AttachFlagStderr } if job.Config.AttachStdin { req.Flags |= host.AttachFlagStdin } rwc, attachWait, err = c.host.Attach(req, true) if err != nil { c.close() return err } } goroutines := make([]func() error, 0, 4) c.attachConn = rwc if attachWait != nil { goroutines = append(goroutines, attachWait) } if c.stdinPipe != nil { c.stdinPipe.set(writeCloseCloser{rwc}) } else if c.Stdin != nil { goroutines = append(goroutines, func() error { _, err := io.Copy(rwc, c.Stdin) rwc.CloseWrite() return err }) } if !c.TTY { if c.stdoutPipe != nil || c.stderrPipe != nil { stdout, stderr := demultiplex.Streams(rwc) if c.stdoutPipe != nil { c.stdoutPipe.set(stdout) } else if c.Stdout != nil { goroutines = append(goroutines, cpFunc(c.Stdout, stdout)) } if c.stderrPipe != nil { c.stderrPipe.set(stderr) } else if c.Stderr != nil { goroutines = append(goroutines, cpFunc(c.Stderr, stderr)) } } else if c.Stdout != nil || c.Stderr != nil { goroutines = append(goroutines, func() error { return demultiplex.Copy(c.Stdout, c.Stderr, rwc) }) } } else if c.stdoutPipe != nil { c.stdoutPipe.set(rwc) } else if c.Stdout != nil { goroutines = append(goroutines, cpFunc(c.Stdout, rwc)) } c.errCh = make(chan error, len(goroutines)) for _, fn := range goroutines { go func(fn func() error) { c.errCh <- fn() }(fn) } _, err = c.cluster.AddJobs(&host.AddJobsReq{HostJobs: map[string][]*host.Job{c.HostID: {job}}}) return err }
func runUpdate(args *docopt.Args) error { log := log15.New() // create and update a TUF client log.Info("initializing TUF client") local, err := tuf.FileLocalStore(args.String["--tuf-db"]) if err != nil { log.Error("error creating local TUF client", "err", err) return err } remote, err := tuf.HTTPRemoteStore(args.String["--repository"], tufHTTPOpts("updater")) if err != nil { log.Error("error creating remote TUF client", "err", err) return err } client := tuf.NewClient(local, remote) log.Info("updating TUF data") if _, err := client.Update(); err != nil && !tuf.IsLatestSnapshot(err) { log.Error("error updating TUF client", "err", err) return err } // read the TUF db so we can pass it to hosts log.Info("reading TUF database") tufDB, err := ioutil.ReadFile(args.String["--tuf-db"]) if err != nil { log.Error("error reading the TUF database", "err", err) return err } log.Info("getting host list") clusterClient := cluster.NewClient() hosts, err := clusterClient.Hosts() if err != nil { log.Error("error getting host list", "err", err) return err } if len(hosts) == 0 { return errors.New("no hosts found") } log.Info("pulling images on all hosts") images := make(map[string]string) var imageMtx sync.Mutex hostErrs := make(chan error) for _, h := range hosts { go func(host *cluster.Host) { log := log.New("host", host.ID()) log.Info("connecting to host") log.Info("pulling images") ch := make(chan *layer.PullInfo) stream, err := host.PullImages( args.String["--repository"], args.String["--driver"], args.String["--root"], bytes.NewReader(tufDB), ch, ) if err != nil { log.Error("error pulling images", "err", err) hostErrs <- err return } defer stream.Close() for info := range ch { if info.Type == layer.TypeLayer { continue } log.Info("pulled image", "name", info.Repo) imageURI := fmt.Sprintf("%s?name=%s&id=%s", args.String["--repository"], info.Repo, info.ID) imageMtx.Lock() images[info.Repo] = imageURI imageMtx.Unlock() } hostErrs <- stream.Err() }(h) } var hostErr error for _, h := range hosts { if err := <-hostErrs; err != nil { log.Error("error pulling images", "host", h.ID(), "err", err) hostErr = err continue } log.Info("images pulled successfully", "host", h.ID()) } if hostErr != nil { return hostErr } updaterImage, ok := images["flynn/updater"] if !ok { e := "missing flynn/updater image" log.Error(e) return errors.New(e) } imageJSON, err := json.Marshal(images) if err != nil { log.Error("error encoding images", "err", err) return err } // use a flag to determine whether to use a TTY log formatter because actually // assigning a TTY to the job causes reading images via stdin to fail. cmd := exec.Command(exec.DockerImage(updaterImage), fmt.Sprintf("--tty=%t", term.IsTerminal(os.Stdout.Fd()))) cmd.Stdin = bytes.NewReader(imageJSON) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr if err := cmd.Run(); err != nil { return err } log.Info("update complete") return nil }
func (c *Cmd) Start() error { if c.started { return errors.New("exec: already started") } c.started = true if c.cluster == nil { var err error c.cluster, err = cluster.NewClient() if err != nil { return err } c.closeCluster = true } hosts, err := c.cluster.ListHosts() if err != nil { return err } if c.HostID == "" { // TODO: check if this is actually random for c.HostID = range hosts { break } } if c.JobID == "" { c.JobID = cluster.RandomJobID("") } job := &host.Job{ ID: c.JobID, Artifact: c.Artifact, Config: host.ContainerConfig{ Entrypoint: c.Entrypoint, Cmd: c.Cmd, TTY: c.TTY, Env: c.Env, Stdin: c.Stdin != nil || c.stdinPipe != nil, }, Metadata: c.Meta, } c.host, err = c.cluster.DialHost(c.HostID) if err != nil { return err } if c.Stdout != nil || c.Stderr != nil || c.Stdin != nil || c.stdinPipe != nil { req := &host.AttachReq{ JobID: job.ID, Height: c.TermHeight, Width: c.TermWidth, Flags: host.AttachFlagStream, } if c.Stdout != nil { req.Flags |= host.AttachFlagStdout } if c.Stderr != nil { req.Flags |= host.AttachFlagStderr } if job.Config.Stdin { req.Flags |= host.AttachFlagStdin } c.attachClient, err = c.host.Attach(req, true) if err != nil { c.close() return err } } if c.stdinPipe != nil { c.stdinPipe.set(writeCloseCloser{c.attachClient}) } else if c.Stdin != nil { go func() { io.Copy(c.attachClient, c.Stdin) c.attachClient.CloseWrite() }() } go func() { c.exitStatus, c.streamErr = c.attachClient.Receive(c.Stdout, c.Stderr) close(c.done) }() _, err = c.cluster.AddJobs(&host.AddJobsReq{HostJobs: map[string][]*host.Job{c.HostID: {job}}}) return err }
func (c *Cmd) Start() error { if c.started { return errors.New("exec: already started") } c.done = make(chan struct{}) c.started = true if c.host == nil && c.cluster == nil { var err error c.cluster = cluster.NewClient() if err != nil { return err } c.closeCluster = true } if c.HostID == "" { hosts, err := c.cluster.Hosts() if err != nil { return err } if len(hosts) == 0 { return errors.New("exec: no hosts found") } host := schedutil.PickHost(hosts) c.HostID = host.ID() c.host = host } // Use the pre-defined host.Job configuration if provided; // otherwise generate one from the fields on exec.Cmd that mirror stdlib's os.exec. if c.Job == nil { c.Job = &host.Job{ ImageArtifact: &c.ImageArtifact, Config: host.ContainerConfig{ Args: c.Args, TTY: c.TTY, Env: c.Env, Stdin: c.Stdin != nil || c.stdinPipe != nil, }, Metadata: c.Meta, } // if attaching to stdout / stderr, avoid round tripping the // streams via on-disk log files. if c.Stdout != nil || c.Stderr != nil { c.Job.Config.DisableLog = true } } else { c.Job.ImageArtifact = &c.ImageArtifact } if c.Job.ID == "" { c.Job.ID = cluster.GenerateJobID(c.HostID, "") } if c.host == nil { var err error c.host, err = c.cluster.Host(c.HostID) if err != nil { return err } } if c.Stdout != nil || c.Stderr != nil || c.Stdin != nil || c.stdinPipe != nil { req := &host.AttachReq{ JobID: c.Job.ID, Height: c.TermHeight, Width: c.TermWidth, Flags: host.AttachFlagStream, } if c.Stdout != nil { req.Flags |= host.AttachFlagStdout } if c.Stderr != nil { req.Flags |= host.AttachFlagStderr } if c.Job.Config.Stdin { req.Flags |= host.AttachFlagStdin } var err error c.attachClient, err = c.host.Attach(req, true) if err != nil { c.close() return err } } if c.stdinPipe != nil { c.stdinPipe.set(writeCloseCloser{c.attachClient}) } else if c.Stdin != nil { go func() { io.Copy(c.attachClient, c.Stdin) c.attachClient.CloseWrite() }() } if c.attachClient == nil { c.eventChan = make(chan *host.Event) var err error c.eventStream, err = c.host.StreamEvents(c.Job.ID, c.eventChan) if err != nil { return err } } go func() { defer close(c.done) if c.attachClient != nil { c.exitStatus, c.streamErr = c.attachClient.Receive(c.Stdout, c.Stderr) } else { outer: for e := range c.eventChan { switch e.Event { case "stop": c.exitStatus = *e.Job.ExitStatus break outer case "error": c.streamErr = errors.New(*e.Job.Error) break outer } } c.eventStream.Close() if c.streamErr == nil { c.streamErr = c.eventStream.Err() } } }() return c.host.AddJob(c.Job) }
func (d *DeployJob) Perform() error { log := d.logger.New("fn", "Perform", "deployment_id", d.ID, "app_id", d.AppID) log.Info("validating deployment strategy") var deployFunc func() error switch d.Strategy { case "one-by-one": deployFunc = d.deployOneByOne case "all-at-once": deployFunc = d.deployAllAtOnce case "sirenia": deployFunc = d.deploySirenia case "discoverd-meta": deployFunc = d.deployDiscoverdMeta default: err := UnknownStrategyError{d.Strategy} log.Error("error validating deployment strategy", "err", err) return err } log.Info("determining cluster size") hosts, err := cluster.NewClient().Hosts() if err != nil { log.Error("error listing cluster hosts", "err", err) return err } d.hostCount = len(hosts) log.Info("determining current release state") oldRelease, err := d.client.GetRelease(d.OldReleaseID) if err != nil { log.Error("error getting new release", "release_id", d.NewReleaseID, "err", err) return err } d.oldRelease = oldRelease log.Info("determining release services and deployment state") release, err := d.client.GetRelease(d.NewReleaseID) if err != nil { log.Error("error getting new release", "release_id", d.NewReleaseID, "err", err) return err } d.newRelease = release for typ, proc := range release.Processes { if proc.Omni { d.omni[typ] = struct{}{} } if proc.Service == "" { log.Info(fmt.Sprintf("using job events for %s process type, no service defined", typ)) d.useJobEvents[typ] = struct{}{} continue } d.serviceNames[typ] = proc.Service log.Info(fmt.Sprintf("using service discovery for %s process type", typ), "service", proc.Service) events := make(chan *discoverd.Event) stream, err := discoverd.NewService(proc.Service).Watch(events) if err != nil { log.Error("error creating service discovery watcher", "service", proc.Service, "err", err) return err } defer stream.Close() outer: for { select { case <-d.stop: return worker.ErrStopped case event, ok := <-events: if !ok { log.Error("error creating service discovery watcher, channel closed", "service", proc.Service) return fmt.Errorf("deployer: could not create watcher for service: %s", proc.Service) } switch event.Kind { case discoverd.EventKindCurrent: break outer case discoverd.EventKindServiceMeta: d.serviceMeta = event.ServiceMeta case discoverd.EventKindUp: releaseID, ok := event.Instance.Meta["FLYNN_RELEASE_ID"] if !ok { continue } switch releaseID { case d.OldReleaseID: d.oldReleaseState[typ]++ case d.NewReleaseID: d.newReleaseState[typ]++ } } case <-time.After(5 * time.Second): log.Error("error creating service discovery watcher, timeout reached", "service", proc.Service) return fmt.Errorf("deployer: could not create watcher for service: %s", proc.Service) } } go func() { for { event, ok := <-events if !ok { // this usually means deferred cleanup is in progress, but send an error // in case the deploy is still waiting for an event which will now not come. d.JobEventErr(errors.New("unexpected close of service event stream")) return } if event.Instance == nil { continue } if id, ok := event.Instance.Meta["FLYNN_APP_ID"]; !ok || id != d.AppID { continue } releaseID, ok := event.Instance.Meta["FLYNN_RELEASE_ID"] if !ok { continue } d.ReleaseJobEvents(releaseID) <- &JobEvent{ Type: JobEventTypeDiscoverd, DiscoverdEvent: event, } } }() } log.Info("getting job event stream") jobEvents := make(chan *ct.Job) stream, err := d.client.StreamJobEvents(d.AppID, jobEvents) if err != nil { log.Error("error getting job event stream", "err", err) return err } defer stream.Close() go func() { for { event, ok := <-jobEvents if !ok { d.JobEventErr(errors.New("unexpected close of job event stream")) return } d.ReleaseJobEvents(event.ReleaseID) <- &JobEvent{ Type: JobEventTypeController, JobEvent: event, } } }() log.Info("getting current jobs") jobs, err := d.client.JobList(d.AppID) if err != nil { log.Error("error getting current jobs", "err", err) return err } for _, job := range jobs { if job.State != ct.JobStateUp { continue } if _, ok := d.useJobEvents[job.Type]; !ok { continue } // track the jobs so we can drop any events received between // connecting the job stream and getting the list of jobs d.knownJobStates[jobIDState{job.ID, ct.JobStateUp}] = struct{}{} switch job.ReleaseID { case d.OldReleaseID: d.oldReleaseState[job.Type]++ case d.NewReleaseID: d.newReleaseState[job.Type]++ } } log.Info( "determined deployment state", "original", d.Processes, "old_release", d.oldReleaseState, "new_release", d.newReleaseState, ) return deployFunc() }
func runDaemon(args *docopt.Args) { hostname, _ := os.Hostname() externalIP := args.String["--external-ip"] stateFile := args.String["--state"] hostID := args.String["--id"] force := args.Bool["--force"] volPath := args.String["--volpath"] backendName := args.String["--backend"] flynnInit := args.String["--flynn-init"] nsumount := args.String["--nsumount"] logDir := args.String["--log-dir"] discoveryToken := args.String["--discovery"] var peerIPs []string if args.String["--peer-ips"] != "" { peerIPs = strings.Split(args.String["--peer-ips"], ",") } grohl.AddContext("app", "host") grohl.Log(grohl.Data{"at": "start"}) g := grohl.NewContext(grohl.Data{"fn": "main"}) if hostID == "" { hostID = strings.Replace(hostname, "-", "", -1) } if strings.Contains(hostID, "-") { shutdown.Fatal("host id must not contain dashes") } if externalIP == "" { var err error externalIP, err = config.DefaultExternalIP() if err != nil { shutdown.Fatal(err) } } publishAddr := net.JoinHostPort(externalIP, "1113") if discoveryToken != "" { // TODO: retry discoveryID, err := discovery.RegisterInstance(discovery.Info{ ClusterURL: discoveryToken, InstanceURL: "http://" + publishAddr, Name: hostID, }) if err != nil { g.Log(grohl.Data{"at": "register_discovery", "status": "error", "err": err.Error()}) shutdown.Fatal(err) } g.Log(grohl.Data{"at": "register_discovery", "id": discoveryID}) } state := NewState(hostID, stateFile) var backend Backend var err error // create volume manager vman, err := volumemanager.New( filepath.Join(volPath, "volumes.bolt"), func() (volume.Provider, error) { // use a zpool backing file size of either 70% of the device on which // volumes will reside, or 100GB if that can't be determined. var size int64 var dev syscall.Statfs_t if err := syscall.Statfs(volPath, &dev); err == nil { size = (dev.Bsize * int64(dev.Blocks) * 7) / 10 } else { size = 100000000000 } g.Log(grohl.Data{"at": "zpool_size", "size": size}) return zfsVolume.NewProvider(&zfsVolume.ProviderConfig{ DatasetName: "flynn-default", Make: &zfsVolume.MakeDev{ BackingFilename: filepath.Join(volPath, "zfs/vdev/flynn-default-zpool.vdev"), Size: size, }, WorkingDir: filepath.Join(volPath, "zfs"), }) }, ) if err != nil { shutdown.Fatal(err) } mux := logmux.New(1000) shutdown.BeforeExit(func() { mux.Close() }) switch backendName { case "libvirt-lxc": backend, err = NewLibvirtLXCBackend(state, vman, logDir, flynnInit, nsumount, mux) default: log.Fatalf("unknown backend %q", backendName) } if err != nil { shutdown.Fatal(err) } backend.SetDefaultEnv("EXTERNAL_IP", externalIP) discoverdManager := NewDiscoverdManager(backend, mux, hostID, publishAddr) publishURL := "http://" + publishAddr host := &Host{ id: hostID, url: publishURL, state: state, backend: backend, status: &host.HostStatus{ID: hostID, URL: publishURL}, } // stopJobs stops all jobs, leaving discoverd until the end so other // jobs can unregister themselves on shutdown. stopJobs := func() (err error) { var except []string host.statusMtx.RLock() if host.status.Discoverd != nil && host.status.Discoverd.JobID != "" { except = []string{host.status.Discoverd.JobID} } host.statusMtx.RUnlock() if err := backend.Cleanup(except); err != nil { return err } for _, id := range except { if e := backend.Stop(id); e != nil { err = e } } return } resurrect, err := state.Restore(backend) if err != nil { shutdown.Fatal(err) } shutdown.BeforeExit(func() { // close discoverd before stopping jobs so we can unregister first discoverdManager.Close() stopJobs() }) shutdown.BeforeExit(func() { if err := state.MarkForResurrection(); err != nil { log.Print("error marking for resurrection", err) } }) if err := serveHTTP( host, &attachHandler{state: state, backend: backend}, cluster.NewClient(), vman, discoverdManager.ConnectLocal, ); err != nil { shutdown.Fatal(err) } if force { if err := stopJobs(); err != nil { shutdown.Fatal(err) } } if discoveryToken != "" { instances, err := discovery.GetCluster(discoveryToken) if err != nil { // TODO(titanous): retry? shutdown.Fatal(err) } peerIPs = make([]string, 0, len(instances)) for _, inst := range instances { u, err := url.Parse(inst.URL) if err != nil { continue } ip, _, err := net.SplitHostPort(u.Host) if err != nil || ip == externalIP { continue } peerIPs = append(peerIPs, ip) } } if err := discoverdManager.ConnectPeer(peerIPs); err != nil { // No peers have working discoverd, so resurrect any available jobs resurrect() } <-make(chan struct{}) }
func runUpdate(args *docopt.Args) error { log := log15.New() // create and update a TUF client log.Info("initializing TUF client") local, err := tuf.FileLocalStore(args.String["--tuf-db"]) if err != nil { log.Error("error creating local TUF client", "err", err) return err } remote, err := tuf.HTTPRemoteStore(args.String["--repository"], tufHTTPOpts("updater")) if err != nil { log.Error("error creating remote TUF client", "err", err) return err } client := tuf.NewClient(local, remote) if !args.Bool["--is-latest"] { return updateAndExecLatest(args.String["--config-dir"], client, log) } // unlink the current binary if it is a temp file if args.Bool["--is-tempfile"] { os.Remove(os.Args[0]) } // read the TUF db so we can pass it to hosts log.Info("reading TUF database") tufDB, err := ioutil.ReadFile(args.String["--tuf-db"]) if err != nil { log.Error("error reading the TUF database", "err", err) return err } log.Info("getting host list") clusterClient := cluster.NewClient() hosts, err := clusterClient.Hosts() if err != nil { log.Error("error getting host list", "err", err) return err } if len(hosts) == 0 { return errors.New("no hosts found") } log.Info(fmt.Sprintf("updating %d hosts", len(hosts))) // eachHost invokes the given function in a goroutine for each host, // returning an error if any of the functions returns an error. eachHost := func(f func(*cluster.Host, log15.Logger) error) (err error) { errs := make(chan error) for _, h := range hosts { go func(host *cluster.Host) { log := log.New("host", host.ID()) errs <- f(host, log) }(h) } for range hosts { if e := <-errs; e != nil { err = e } } return } var mtx sync.Mutex images := make(map[string]string) log.Info("pulling latest images on all hosts") if err := eachHost(func(host *cluster.Host, log log15.Logger) error { log.Info("pulling images") ch := make(chan *layer.PullInfo) stream, err := host.PullImages( args.String["--repository"], args.String["--driver"], args.String["--root"], version.String(), bytes.NewReader(tufDB), ch, ) if err != nil { log.Error("error pulling images", "err", err) return err } defer stream.Close() for info := range ch { if info.Type == layer.TypeLayer { continue } log.Info("pulled image", "name", info.Repo) imageURI := fmt.Sprintf("%s?name=%s&id=%s", args.String["--repository"], info.Repo, info.ID) mtx.Lock() images[info.Repo] = imageURI mtx.Unlock() } if err := stream.Err(); err != nil { log.Error("error pulling images", "err", err) return err } return nil }); err != nil { return err } var binaries map[string]string log.Info("pulling latest binaries and config on all hosts") if err := eachHost(func(host *cluster.Host, log log15.Logger) error { log.Info("pulling binaries and config") paths, err := host.PullBinariesAndConfig( args.String["--repository"], args.String["--bin-dir"], args.String["--config-dir"], version.String(), bytes.NewReader(tufDB), ) if err != nil { log.Error("error pulling binaries and config", "err", err) return err } mtx.Lock() binaries = paths mtx.Unlock() log.Info("binaries and config pulled successfully") return nil }); err != nil { return err } log.Info("validating binaries") flynnHost, ok := binaries["flynn-host"] if !ok { return fmt.Errorf("missing flynn-host binary") } flynnInit, ok := binaries["flynn-init"] if !ok { return fmt.Errorf("missing flynn-init binary") } flynnNSUmount, ok := binaries["flynn-nsumount"] if !ok { return fmt.Errorf("missing flynn-nsumount binary") } log.Info("updating flynn-host daemon on all hosts") if err := eachHost(func(host *cluster.Host, log log15.Logger) error { // TODO(lmars): handle daemons using custom flags (e.g. --state=/foo) _, err := host.Update( flynnHost, "daemon", "--id", host.ID(), "--flynn-init", flynnInit, "--nsumount", flynnNSUmount, ) if err != nil { log.Error("error updating binaries", "err", err) return err } log.Info("flynn-host updated successfully") return nil }); err != nil { return err } updaterImage, ok := images["flynn/updater"] if !ok { e := "missing flynn/updater image" log.Error(e) return errors.New(e) } imageJSON, err := json.Marshal(images) if err != nil { log.Error("error encoding images", "err", err) return err } // use a flag to determine whether to use a TTY log formatter because actually // assigning a TTY to the job causes reading images via stdin to fail. cmd := exec.Command(exec.DockerImage(updaterImage), fmt.Sprintf("--tty=%t", term.IsTerminal(os.Stdout.Fd()))) cmd.Stdin = bytes.NewReader(imageJSON) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr if err := cmd.Run(); err != nil { return err } log.Info("update complete") return nil }
func (d *DeployJob) Perform() error { log := d.logger.New("fn", "Perform", "deployment_id", d.ID, "app_id", d.AppID) log.Info("validating deployment strategy") var deployFunc func() error switch d.Strategy { case "one-by-one": deployFunc = d.deployOneByOne case "all-at-once": deployFunc = d.deployAllAtOnce case "postgres": deployFunc = d.deployPostgres default: err := UnknownStrategyError{d.Strategy} log.Error("error validating deployment strategy", "err", err) return err } log.Info("determining cluster size") hosts, err := cluster.NewClient().Hosts() if err != nil { log.Error("error listing cluster hosts", "err", err) return err } d.hostCount = len(hosts) log.Info("determining release services and deployment state") release, err := d.client.GetRelease(d.NewReleaseID) if err != nil { log.Error("error getting new release", "release_id", d.NewReleaseID, "err", err) return err } for typ, proc := range release.Processes { if proc.Omni { d.omni[typ] = struct{}{} } if proc.Service == "" { log.Info(fmt.Sprintf("using job events for %s process type, no service defined", typ)) d.useJobEvents[typ] = struct{}{} continue } log.Info(fmt.Sprintf("using service discovery for %s process type", typ), "service", proc.Service) events := make(chan *discoverd.Event) stream, err := discoverd.NewService(proc.Service).Watch(events) if err != nil { log.Error("error creating service discovery watcher", "service", proc.Service, "err", err) return err } defer stream.Close() outer: for { select { case <-d.stop: return ErrStopped case event, ok := <-events: if !ok { log.Error("error creating service discovery watcher, channel closed", "service", proc.Service) return fmt.Errorf("deployer: could not create watcher for service: %s", proc.Service) } switch event.Kind { case discoverd.EventKindCurrent: break outer case discoverd.EventKindServiceMeta: d.serviceMeta = event.ServiceMeta case discoverd.EventKindUp: releaseID, ok := event.Instance.Meta["FLYNN_RELEASE_ID"] if !ok { continue } switch releaseID { case d.OldReleaseID: d.oldReleaseState[typ]++ case d.NewReleaseID: d.newReleaseState[typ]++ } } case <-time.After(5 * time.Second): log.Error("error creating service discovery watcher, timeout reached", "service", proc.Service) return fmt.Errorf("deployer: could not create watcher for service: %s", proc.Service) } } go func() { for { event, ok := <-events if !ok { // if this happens, it means defer cleanup is in progress // TODO: this could also happen if the stream connection // dropped. handle that case return } d.serviceEvents <- event } }() } log.Info("getting job event stream") d.jobEvents = make(chan *ct.Job) stream, err := d.client.StreamJobEvents(d.AppID, d.jobEvents) if err != nil { log.Error("error getting job event stream", "err", err) return err } defer stream.Close() log.Info("getting current jobs") jobs, err := d.client.JobList(d.AppID) if err != nil { log.Error("error getting current jobs", "err", err) return err } for _, job := range jobs { if job.State != "up" { continue } if _, ok := d.useJobEvents[job.Type]; !ok { continue } // track the jobs so we can drop any events received between // connecting the job stream and getting the list of jobs d.knownJobStates[jobIDState{job.ID, "up"}] = struct{}{} switch job.ReleaseID { case d.OldReleaseID: d.oldReleaseState[job.Type]++ case d.NewReleaseID: d.newReleaseState[job.Type]++ } } log.Info( "determined deployment state", "original", d.Processes, "old_release", d.oldReleaseState, "new_release", d.newReleaseState, ) return deployFunc() }
func main() { defer shutdown.Exit() port := os.Getenv("PORT") if port == "" { port = "3000" } addr := ":" + port if seed := os.Getenv("NAME_SEED"); seed != "" { s, err := hex.DecodeString(seed) if err != nil { log.Fatalln("error decoding NAME_SEED:", err) } name.SetSeed(s) } db := postgres.Wait("", "") if err := migrateDB(db.DB); err != nil { shutdown.Fatal(err) } pgxcfg, err := pgx.ParseURI(fmt.Sprintf("http://%s:%s@%s/%s", os.Getenv("PGUSER"), os.Getenv("PGPASSWORD"), db.Addr(), os.Getenv("PGDATABASE"))) if err != nil { log.Fatal(err) } pgxcfg.Dial = dialer.Retry.Dial pgxpool, err := pgx.NewConnPool(pgx.ConnPoolConfig{ ConnConfig: pgxcfg, AfterConnect: que.PrepareStatements, }) if err != nil { log.Fatal(err) } shutdown.BeforeExit(func() { pgxpool.Close() }) lc, err := logaggc.New("") if err != nil { shutdown.Fatal(err) } rc := routerc.New() hb, err := discoverd.DefaultClient.AddServiceAndRegisterInstance("flynn-controller", &discoverd.Instance{ Addr: addr, Proto: "http", Meta: map[string]string{ "AUTH_KEY": os.Getenv("AUTH_KEY"), }, }) if err != nil { shutdown.Fatal(err) } shutdown.BeforeExit(func() { hb.Close() }) handler := appHandler(handlerConfig{ db: db, cc: clusterClientWrapper{cluster.NewClient()}, lc: lc, rc: rc, pgxpool: pgxpool, keys: strings.Split(os.Getenv("AUTH_KEY"), ","), }) shutdown.Fatal(http.ListenAndServe(addr, handler)) }
"log" "os" "path" "regexp" "strings" "time" "github.com/flynn/flynn/controller/client" ct "github.com/flynn/flynn/controller/types" "github.com/flynn/flynn/host/types" "github.com/flynn/flynn/pkg/cluster" "github.com/flynn/flynn/pkg/exec" "github.com/flynn/flynn/pkg/random" ) var clusterc = cluster.NewClient() func init() { log.SetFlags(0) } var typesPattern = regexp.MustCompile("types.* -> (.+)\n") const blobstoreURL = "http://blobstore.discoverd" const scaleTimeout = 20 * time.Second func main() { client, err := controller.NewClient("", os.Getenv("CONTROLLER_KEY")) if err != nil { log.Fatalln("Unable to connect to controller:", err) }