Example #1
0
func main() {
	grohl.AddContext("app", "controller-scheduler")
	grohl.Log(grohl.Data{"at": "start"})

	cc, err := controller.NewClient("", os.Getenv("AUTH_KEY"))
	if err != nil {
		log.Fatal(err)
	}
	cl, err := cluster.NewClient()
	if err != nil {
		log.Fatal(err)
	}
	c := newContext(cc, cl)

	grohl.Log(grohl.Data{"at": "leaderwait"})
	leaderWait, err := discoverd.RegisterAndStandby("flynn-controller-scheduler", ":"+os.Getenv("PORT"), nil)
	if err != nil {
		log.Fatal(err)
	}
	<-leaderWait
	grohl.Log(grohl.Data{"at": "leader"})

	// TODO: periodic full cluster sync for anti-entropy
	c.watchFormations(nil, nil)
}
Example #2
0
func main() {
	port := os.Getenv("PORT")
	if port == "" {
		port = "3000"
	}
	addr := ":" + port

	db, err := postgres.Open("", "")
	if err != nil {
		log.Fatal(err)
	}

	if err := migrateDB(db.DB); err != nil {
		log.Fatal(err)
	}

	cc, err := cluster.NewClient()
	if err != nil {
		log.Fatal(err)
	}

	sc, err := strowgerc.New()
	if err != nil {
		log.Fatal(err)
	}

	if err := discoverd.Register("flynn-controller", addr); err != nil {
		log.Fatal(err)
	}

	handler, _ := appHandler(handlerConfig{db: db, cc: cc, sc: sc, dc: discoverd.DefaultClient, key: os.Getenv("AUTH_KEY")})
	log.Fatal(http.ListenAndServe(addr, handler))
}
Example #3
0
func Run(name string, args []string) error {
	argv := make([]string, 1, 1+len(args))
	argv[0] = name
	argv = append(argv, args...)

	cmd, ok := commands[name]
	if !ok {
		return ErrInvalidCommand
	}
	parsedArgs, err := docopt.Parse(cmd.usage, argv, true, "", strings.Contains(cmd.usage, "[--]"))
	if err != nil {
		return err
	}

	switch f := cmd.f.(type) {
	case func(*docopt.Args, *cluster.Client) error:
		return f(parsedArgs, cluster.NewClient())
	case func(*docopt.Args):
		f(parsedArgs)
		return nil
	case func(*docopt.Args) error:
		return f(parsedArgs)
	case func() error:
		return f()
	case func():
		f()
		return nil
	}

	return fmt.Errorf("unexpected command type %T", cmd.f)
}
Example #4
0
func main() {
	grohl.AddContext("app", "controller-scheduler")
	grohl.Log(grohl.Data{"at": "start"})

	if period := os.Getenv("BACKOFF_PERIOD"); period != "" {
		var err error
		backoffPeriod, err = time.ParseDuration(period)
		if err != nil {
			log.Fatal(err)
		}
		grohl.Log(grohl.Data{"at": "backoff_period", "period": backoffPeriod.String()})
	}

	cc, err := controller.NewClient("", os.Getenv("AUTH_KEY"))
	if err != nil {
		log.Fatal(err)
	}
	cl, err := cluster.NewClient()
	if err != nil {
		log.Fatal(err)
	}
	c := newContext(cc, cl)

	grohl.Log(grohl.Data{"at": "leaderwait"})
	leaderWait, err := discoverd.RegisterAndStandby("flynn-controller-scheduler", ":"+os.Getenv("PORT"), nil)
	if err != nil {
		log.Fatal(err)
	}
	<-leaderWait
	grohl.Log(grohl.Data{"at": "leader"})

	// TODO: periodic full cluster sync for anti-entropy
	c.watchFormations()
}
Example #5
0
func runListHosts(args *docopt.Args) error {
	clusterClient := cluster.NewClient()
	hosts, err := clusterClient.Hosts()
	if err != nil {
		return err
	}
	if len(hosts) == 0 {
		return errors.New("no hosts found")
	}

	peers, _ := discoverd.DefaultClient.RaftPeers()
	leader, _ := discoverd.DefaultClient.RaftLeader()

	w := tabwriter.NewWriter(os.Stdout, 1, 2, 2, ' ', 0)
	defer w.Flush()
	listRec(w, "ID", "ADDR", "RAFT STATUS")
	for _, h := range hosts {
		// If we have the list of raft peers augument the output
		// with each hosts raft proxy/peer status.
		raftStatus := ""
		if len(peers) > 0 {
			raftStatus = hostRaftStatus(h, peers, leader.Host)
		}
		listRec(w, h.ID(), h.Addr(), raftStatus)
	}
	return nil
}
Example #6
0
func captureJobs(gist *Gist, env bool) error {
	client := cluster.NewClient()

	jobs, err := jobList(client, true)
	if err != nil {
		return err
	}

	var buf bytes.Buffer
	printJobs(jobs, &buf)
	gist.AddFile("1-jobs.log", buf.String())

	for _, job := range jobs {
		var name string
		if app, ok := job.Job.Metadata["flynn-controller.app_name"]; ok {
			name += app + "-"
		}
		if typ, ok := job.Job.Metadata["flynn-controller.type"]; ok {
			name += typ + "-"
		}
		name += job.Job.ID + ".log"

		var content bytes.Buffer
		printJobDesc(&job, &content, env)
		fmt.Fprint(&content, "\n\n***** ***** ***** ***** ***** ***** ***** ***** ***** *****\n\n")
		getLog(job.HostID, job.Job.ID, client, false, true, &content, &content)

		gist.AddFile(name, content.String())
	}

	return nil
}
Example #7
0
func init() {
	log.SetFlags(0)

	var err error
	clusterc, err = cluster.NewClient()
	if err != nil {
		log.Fatalln("Error connecting to cluster leader:", err)
	}
}
Example #8
0
func (s *State) ClusterClient() (*cluster.Client, error) {
	if s.clusterc == nil {
		cc, err := cluster.NewClient()
		if err != nil {
			return nil, err
		}
		s.clusterc = cc
	}
	return s.clusterc, nil
}
Example #9
0
func main() {
	defer shutdown.Exit()

	grohl.AddContext("app", "controller-scheduler")
	grohl.Log(grohl.Data{"at": "start"})

	go startHTTPServer()

	if period := os.Getenv("BACKOFF_PERIOD"); period != "" {
		var err error
		backoffPeriod, err = time.ParseDuration(period)
		if err != nil {
			shutdown.Fatal(err)
		}
		grohl.Log(grohl.Data{"at": "backoff_period", "period": backoffPeriod.String()})
	}

	cc, err := controller.NewClient("", os.Getenv("AUTH_KEY"))
	if err != nil {
		shutdown.Fatal(err)
	}
	c := newContext(cc, cluster.NewClient())

	c.watchHosts()

	grohl.Log(grohl.Data{"at": "leaderwait"})
	hb, err := discoverd.AddServiceAndRegister("controller-scheduler", ":"+os.Getenv("PORT"))
	if err != nil {
		shutdown.Fatal(err)
	}
	shutdown.BeforeExit(func() { hb.Close() })

	leaders := make(chan *discoverd.Instance)
	stream, err := discoverd.NewService("controller-scheduler").Leaders(leaders)
	if err != nil {
		shutdown.Fatal(err)
	}
	for leader := range leaders {
		if leader.Addr == hb.Addr() {
			break
		}
	}
	if err := stream.Err(); err != nil {
		// TODO: handle discoverd errors
		shutdown.Fatal(err)
	}
	stream.Close()
	// TODO: handle demotion

	grohl.Log(grohl.Data{"at": "leader"})

	// TODO: periodic full cluster sync for anti-entropy
	c.watchFormations()
}
Example #10
0
File: http.go Project: devick/flynn
func (h *Host) ServeHTTP() {
	r := httprouter.New()

	r.POST("/attach", (&attachHandler{state: h.state, backend: h.backend}).ServeHTTP)

	jobAPI := &jobAPI{host: h}
	jobAPI.RegisterRoutes(r)

	volAPI := volumeapi.NewHTTPAPI(cluster.NewClient(), h.vman)
	volAPI.RegisterRoutes(r)

	go http.Serve(h.listener, httphelper.ContextInjector("host", httphelper.NewRequestLogger(r)))
}
Example #11
0
func (h *Host) ServeHTTP() {
	r := httprouter.New()

	r.POST("/attach", newAttachHandler(h.state, h.backend, h.log).ServeHTTP)

	jobAPI := &jobAPI{
		host: h,
		addJobRateLimitBucket: NewRateLimitBucket(h.maxJobConcurrency),
	}
	jobAPI.RegisterRoutes(r)

	volAPI := volumeapi.NewHTTPAPI(cluster.NewClient(), h.vman)
	volAPI.RegisterRoutes(r)

	go http.Serve(h.listener, httphelper.ContextInjector("host", httphelper.NewRequestLogger(r)))
}
Example #12
0
func main() {
	port := os.Getenv("PORT")
	if port == "" {
		port = "3000"
	}
	addr := ":" + port

	if seed := os.Getenv("NAME_SEED"); seed != "" {
		s, err := hex.DecodeString(seed)
		if err != nil {
			log.Fatalln("error decoding NAME_SEED:", err)
		}
		name.SetSeed(s)
	}

	db, err := postgres.Open("", "")
	if err != nil {
		log.Fatal(err)
	}

	if err := migrateDB(db.DB); err != nil {
		log.Fatal(err)
	}

	cc, err := cluster.NewClient()
	if err != nil {
		log.Fatal(err)
	}

	sc, err := routerc.New()
	if err != nil {
		log.Fatal(err)
	}

	if err := discoverd.Register("flynn-controller", addr); err != nil {
		log.Fatal(err)
	}

	shutdown.BeforeExit(func() {
		discoverd.Unregister("flynn-controller", addr)
	})

	handler, _ := appHandler(handlerConfig{db: db, cc: cc, sc: sc, dc: discoverd.DefaultClient, key: os.Getenv("AUTH_KEY")})
	log.Fatal(http.ListenAndServe(addr, handler))
}
Example #13
0
func (m *Monitor) Run() {
	log := monitorLogger.New("fn", "Run")
	log.Info("waiting for discoverd")
	m.waitDiscoverd()

	log.Info("waiting for raft leader")
	m.waitRaftLeader()

	// we can connect the leader election wrapper now
	m.discoverd = newDiscoverdWrapper(m.addr+":1113", m.logger)
	// connect cluster client now that discoverd is up.
	m.c = cluster.NewClient()

	m.monitorSvc = discoverd.NewService("cluster-monitor")

	log.Info("waiting for monitor service to be enabled for this cluster")
	m.waitEnabled()

	log.Info("registering cluster-monitor")
	m.waitRegister()

	leaderCh := m.discoverd.LeaderCh()
	ticker := time.NewTicker(checkInterval)

	log.Info("starting monitor loop")
	for {
		var isLeader bool
		select {
		case <-m.shutdownCh:
			log.Info("shutting down monitor")
			return
		case isLeader = <-leaderCh:
			m.isLeader = isLeader
			continue
		default:
		}

		select {
		case <-ticker.C:
			if m.isLeader {
				m.checkCluster()
			}
		}
	}
}
Example #14
0
File: list.go Project: devick/flynn
func runListHosts(args *docopt.Args) error {
	clusterClient := cluster.NewClient()
	hosts, err := clusterClient.Hosts()
	if err != nil {
		return err
	}
	if len(hosts) == 0 {
		return errors.New("no hosts found")
	}

	w := tabwriter.NewWriter(os.Stdout, 1, 2, 2, ' ', 0)
	defer w.Flush()
	listRec(w, "ID", "ADDR")
	for _, h := range hosts {
		listRec(w, h.ID(), h.Addr())
	}
	return nil
}
Example #15
0
func Run(name string, args []string) error {
	argv := make([]string, 1, 1+len(args))
	argv[0] = name
	argv = append(argv, args...)

	cmd, ok := commands[name]
	if !ok {
		return fmt.Errorf("%s is not a valid command", name)
	}
	parsedArgs, err := docopt.Parse(cmd.usage, argv, true, "", false)
	if err != nil {
		return err
	}

	switch f := cmd.f.(type) {
	case func(*docopt.Args, *cluster.Client) error:
		client, err := cluster.NewClient()
		if err != nil {
			return err
		}
		defer client.Close()
		return f(parsedArgs, client)
	case func(*docopt.Args):
		f(parsedArgs)
		return nil
	case func(*docopt.Args) error:
		return f(parsedArgs)
	case func() error:
		return f()
	case func():
		f()
		return nil
	}

	return fmt.Errorf("unexpected command type %T", cmd.f)
}
Example #16
0
func (c *Cmd) Start() error {
	if c.started {
		return errors.New("exec: already started")
	}
	c.started = true
	if c.cluster == nil {
		var err error
		c.cluster, err = cluster.NewClient()
		if err != nil {
			return err
		}
		c.closeCluster = true
	}

	hosts, err := c.cluster.ListHosts()
	if err != nil {
		return err
	}
	if c.HostID == "" {
		// TODO: check if this is actually random
		for c.HostID = range hosts {
			break
		}
	}
	if c.JobID == "" {
		c.JobID = cluster.RandomJobID("")
	}

	job := &host.Job{
		ID: c.JobID,
		Config: &docker.Config{
			Image: c.Image,
			Cmd:   c.Cmd,
			Tty:   c.TTY,
			Env:   formatEnv(c.Env),
		},
		Attributes: c.Attrs,
	}
	if c.Stdout != nil || c.stdoutPipe != nil {
		job.Config.AttachStdout = true
	}
	if c.Stderr != nil || c.stderrPipe != nil {
		job.Config.AttachStderr = true
	}
	if c.Stdin != nil || c.stdinPipe != nil {
		job.Config.AttachStdin = true
		job.Config.OpenStdin = true
		job.Config.StdinOnce = true
	}

	c.host, err = c.cluster.DialHost(c.HostID)
	if err != nil {
		return err
	}

	// subscribe to host events
	ch := make(chan *host.Event)
	stream := c.host.StreamEvents(job.ID, ch)
	go func() {
		for event := range ch {
			if event.Event == "stop" || event.Event == "error" {
				close(c.done)
				return
			}
		}
		c.streamErr = stream.Err()
		close(c.done)
		// TODO: handle disconnections
	}()

	var rwc cluster.ReadWriteCloser
	var attachWait func() error

	if c.Stdout != nil || c.Stderr != nil || c.Stdin != nil ||
		c.stdoutPipe != nil || c.stderrPipe != nil || c.stdinPipe != nil {
		req := &host.AttachReq{
			JobID:  job.ID,
			Height: c.TermHeight,
			Width:  c.TermWidth,
			Flags:  host.AttachFlagStream,
		}
		if job.Config.AttachStdout {
			req.Flags |= host.AttachFlagStdout
		}
		if job.Config.AttachStderr {
			req.Flags |= host.AttachFlagStderr
		}
		if job.Config.AttachStdin {
			req.Flags |= host.AttachFlagStdin
		}
		rwc, attachWait, err = c.host.Attach(req, true)
		if err != nil {
			c.close()
			return err
		}
	}

	goroutines := make([]func() error, 0, 4)

	c.attachConn = rwc
	if attachWait != nil {
		goroutines = append(goroutines, attachWait)
	}

	if c.stdinPipe != nil {
		c.stdinPipe.set(writeCloseCloser{rwc})
	} else if c.Stdin != nil {
		goroutines = append(goroutines, func() error {
			_, err := io.Copy(rwc, c.Stdin)
			rwc.CloseWrite()
			return err
		})
	}
	if !c.TTY {
		if c.stdoutPipe != nil || c.stderrPipe != nil {
			stdout, stderr := demultiplex.Streams(rwc)
			if c.stdoutPipe != nil {
				c.stdoutPipe.set(stdout)
			} else if c.Stdout != nil {
				goroutines = append(goroutines, cpFunc(c.Stdout, stdout))
			}
			if c.stderrPipe != nil {
				c.stderrPipe.set(stderr)
			} else if c.Stderr != nil {
				goroutines = append(goroutines, cpFunc(c.Stderr, stderr))
			}
		} else if c.Stdout != nil || c.Stderr != nil {
			goroutines = append(goroutines, func() error {
				return demultiplex.Copy(c.Stdout, c.Stderr, rwc)
			})
		}
	} else if c.stdoutPipe != nil {
		c.stdoutPipe.set(rwc)
	} else if c.Stdout != nil {
		goroutines = append(goroutines, cpFunc(c.Stdout, rwc))
	}

	c.errCh = make(chan error, len(goroutines))
	for _, fn := range goroutines {
		go func(fn func() error) {
			c.errCh <- fn()
		}(fn)
	}

	_, err = c.cluster.AddJobs(&host.AddJobsReq{HostJobs: map[string][]*host.Job{c.HostID: {job}}})
	return err
}
Example #17
0
func runUpdate(args *docopt.Args) error {
	log := log15.New()

	// create and update a TUF client
	log.Info("initializing TUF client")
	local, err := tuf.FileLocalStore(args.String["--tuf-db"])
	if err != nil {
		log.Error("error creating local TUF client", "err", err)
		return err
	}
	remote, err := tuf.HTTPRemoteStore(args.String["--repository"], tufHTTPOpts("updater"))
	if err != nil {
		log.Error("error creating remote TUF client", "err", err)
		return err
	}
	client := tuf.NewClient(local, remote)

	log.Info("updating TUF data")
	if _, err := client.Update(); err != nil && !tuf.IsLatestSnapshot(err) {
		log.Error("error updating TUF client", "err", err)
		return err
	}

	// read the TUF db so we can pass it to hosts
	log.Info("reading TUF database")
	tufDB, err := ioutil.ReadFile(args.String["--tuf-db"])
	if err != nil {
		log.Error("error reading the TUF database", "err", err)
		return err
	}

	log.Info("getting host list")
	clusterClient := cluster.NewClient()
	hosts, err := clusterClient.Hosts()
	if err != nil {
		log.Error("error getting host list", "err", err)
		return err
	}
	if len(hosts) == 0 {
		return errors.New("no hosts found")
	}

	log.Info("pulling images on all hosts")
	images := make(map[string]string)
	var imageMtx sync.Mutex
	hostErrs := make(chan error)
	for _, h := range hosts {
		go func(host *cluster.Host) {
			log := log.New("host", host.ID())

			log.Info("connecting to host")

			log.Info("pulling images")
			ch := make(chan *layer.PullInfo)
			stream, err := host.PullImages(
				args.String["--repository"],
				args.String["--driver"],
				args.String["--root"],
				bytes.NewReader(tufDB),
				ch,
			)
			if err != nil {
				log.Error("error pulling images", "err", err)
				hostErrs <- err
				return
			}
			defer stream.Close()
			for info := range ch {
				if info.Type == layer.TypeLayer {
					continue
				}
				log.Info("pulled image", "name", info.Repo)
				imageURI := fmt.Sprintf("%s?name=%s&id=%s", args.String["--repository"], info.Repo, info.ID)
				imageMtx.Lock()
				images[info.Repo] = imageURI
				imageMtx.Unlock()
			}
			hostErrs <- stream.Err()
		}(h)
	}
	var hostErr error
	for _, h := range hosts {
		if err := <-hostErrs; err != nil {
			log.Error("error pulling images", "host", h.ID(), "err", err)
			hostErr = err
			continue
		}
		log.Info("images pulled successfully", "host", h.ID())
	}
	if hostErr != nil {
		return hostErr
	}

	updaterImage, ok := images["flynn/updater"]
	if !ok {
		e := "missing flynn/updater image"
		log.Error(e)
		return errors.New(e)
	}
	imageJSON, err := json.Marshal(images)
	if err != nil {
		log.Error("error encoding images", "err", err)
		return err
	}

	// use a flag to determine whether to use a TTY log formatter because actually
	// assigning a TTY to the job causes reading images via stdin to fail.
	cmd := exec.Command(exec.DockerImage(updaterImage), fmt.Sprintf("--tty=%t", term.IsTerminal(os.Stdout.Fd())))
	cmd.Stdin = bytes.NewReader(imageJSON)
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	if err := cmd.Run(); err != nil {
		return err
	}
	log.Info("update complete")
	return nil
}
Example #18
0
func (c *Cmd) Start() error {
	if c.started {
		return errors.New("exec: already started")
	}
	c.started = true
	if c.cluster == nil {
		var err error
		c.cluster, err = cluster.NewClient()
		if err != nil {
			return err
		}
		c.closeCluster = true
	}

	hosts, err := c.cluster.ListHosts()
	if err != nil {
		return err
	}
	if c.HostID == "" {
		// TODO: check if this is actually random
		for c.HostID = range hosts {
			break
		}
	}
	if c.JobID == "" {
		c.JobID = cluster.RandomJobID("")
	}

	job := &host.Job{
		ID:       c.JobID,
		Artifact: c.Artifact,
		Config: host.ContainerConfig{
			Entrypoint: c.Entrypoint,
			Cmd:        c.Cmd,
			TTY:        c.TTY,
			Env:        c.Env,
			Stdin:      c.Stdin != nil || c.stdinPipe != nil,
		},
		Metadata: c.Meta,
	}

	c.host, err = c.cluster.DialHost(c.HostID)
	if err != nil {
		return err
	}

	if c.Stdout != nil || c.Stderr != nil || c.Stdin != nil || c.stdinPipe != nil {
		req := &host.AttachReq{
			JobID:  job.ID,
			Height: c.TermHeight,
			Width:  c.TermWidth,
			Flags:  host.AttachFlagStream,
		}
		if c.Stdout != nil {
			req.Flags |= host.AttachFlagStdout
		}
		if c.Stderr != nil {
			req.Flags |= host.AttachFlagStderr
		}
		if job.Config.Stdin {
			req.Flags |= host.AttachFlagStdin
		}
		c.attachClient, err = c.host.Attach(req, true)
		if err != nil {
			c.close()
			return err
		}
	}

	if c.stdinPipe != nil {
		c.stdinPipe.set(writeCloseCloser{c.attachClient})
	} else if c.Stdin != nil {
		go func() {
			io.Copy(c.attachClient, c.Stdin)
			c.attachClient.CloseWrite()
		}()
	}
	go func() {
		c.exitStatus, c.streamErr = c.attachClient.Receive(c.Stdout, c.Stderr)
		close(c.done)
	}()

	_, err = c.cluster.AddJobs(&host.AddJobsReq{HostJobs: map[string][]*host.Job{c.HostID: {job}}})
	return err
}
Example #19
0
func (c *Cmd) Start() error {
	if c.started {
		return errors.New("exec: already started")
	}
	c.done = make(chan struct{})
	c.started = true
	if c.host == nil && c.cluster == nil {
		var err error
		c.cluster = cluster.NewClient()
		if err != nil {
			return err
		}
		c.closeCluster = true
	}

	if c.HostID == "" {
		hosts, err := c.cluster.Hosts()
		if err != nil {
			return err
		}
		if len(hosts) == 0 {
			return errors.New("exec: no hosts found")
		}
		host := schedutil.PickHost(hosts)
		c.HostID = host.ID()
		c.host = host
	}

	// Use the pre-defined host.Job configuration if provided;
	// otherwise generate one from the fields on exec.Cmd that mirror stdlib's os.exec.
	if c.Job == nil {
		c.Job = &host.Job{
			ImageArtifact: &c.ImageArtifact,
			Config: host.ContainerConfig{
				Args:  c.Args,
				TTY:   c.TTY,
				Env:   c.Env,
				Stdin: c.Stdin != nil || c.stdinPipe != nil,
			},
			Metadata: c.Meta,
		}
		// if attaching to stdout / stderr, avoid round tripping the
		// streams via on-disk log files.
		if c.Stdout != nil || c.Stderr != nil {
			c.Job.Config.DisableLog = true
		}
	} else {
		c.Job.ImageArtifact = &c.ImageArtifact
	}
	if c.Job.ID == "" {
		c.Job.ID = cluster.GenerateJobID(c.HostID, "")
	}

	if c.host == nil {
		var err error
		c.host, err = c.cluster.Host(c.HostID)
		if err != nil {
			return err
		}
	}

	if c.Stdout != nil || c.Stderr != nil || c.Stdin != nil || c.stdinPipe != nil {
		req := &host.AttachReq{
			JobID:  c.Job.ID,
			Height: c.TermHeight,
			Width:  c.TermWidth,
			Flags:  host.AttachFlagStream,
		}
		if c.Stdout != nil {
			req.Flags |= host.AttachFlagStdout
		}
		if c.Stderr != nil {
			req.Flags |= host.AttachFlagStderr
		}
		if c.Job.Config.Stdin {
			req.Flags |= host.AttachFlagStdin
		}
		var err error
		c.attachClient, err = c.host.Attach(req, true)
		if err != nil {
			c.close()
			return err
		}
	}

	if c.stdinPipe != nil {
		c.stdinPipe.set(writeCloseCloser{c.attachClient})
	} else if c.Stdin != nil {
		go func() {
			io.Copy(c.attachClient, c.Stdin)
			c.attachClient.CloseWrite()
		}()
	}

	if c.attachClient == nil {
		c.eventChan = make(chan *host.Event)
		var err error
		c.eventStream, err = c.host.StreamEvents(c.Job.ID, c.eventChan)
		if err != nil {
			return err
		}
	}

	go func() {
		defer close(c.done)
		if c.attachClient != nil {
			c.exitStatus, c.streamErr = c.attachClient.Receive(c.Stdout, c.Stderr)
		} else {
		outer:
			for e := range c.eventChan {
				switch e.Event {
				case "stop":
					c.exitStatus = *e.Job.ExitStatus
					break outer
				case "error":
					c.streamErr = errors.New(*e.Job.Error)
					break outer
				}
			}
			c.eventStream.Close()
			if c.streamErr == nil {
				c.streamErr = c.eventStream.Err()
			}
		}
	}()

	return c.host.AddJob(c.Job)
}
Example #20
0
func (d *DeployJob) Perform() error {
	log := d.logger.New("fn", "Perform", "deployment_id", d.ID, "app_id", d.AppID)

	log.Info("validating deployment strategy")
	var deployFunc func() error
	switch d.Strategy {
	case "one-by-one":
		deployFunc = d.deployOneByOne
	case "all-at-once":
		deployFunc = d.deployAllAtOnce
	case "sirenia":
		deployFunc = d.deploySirenia
	case "discoverd-meta":
		deployFunc = d.deployDiscoverdMeta
	default:
		err := UnknownStrategyError{d.Strategy}
		log.Error("error validating deployment strategy", "err", err)
		return err
	}

	log.Info("determining cluster size")
	hosts, err := cluster.NewClient().Hosts()
	if err != nil {
		log.Error("error listing cluster hosts", "err", err)
		return err
	}
	d.hostCount = len(hosts)

	log.Info("determining current release state")
	oldRelease, err := d.client.GetRelease(d.OldReleaseID)
	if err != nil {
		log.Error("error getting new release", "release_id", d.NewReleaseID, "err", err)
		return err
	}
	d.oldRelease = oldRelease

	log.Info("determining release services and deployment state")
	release, err := d.client.GetRelease(d.NewReleaseID)
	if err != nil {
		log.Error("error getting new release", "release_id", d.NewReleaseID, "err", err)
		return err
	}
	d.newRelease = release
	for typ, proc := range release.Processes {
		if proc.Omni {
			d.omni[typ] = struct{}{}
		}
		if proc.Service == "" {
			log.Info(fmt.Sprintf("using job events for %s process type, no service defined", typ))
			d.useJobEvents[typ] = struct{}{}
			continue
		}

		d.serviceNames[typ] = proc.Service

		log.Info(fmt.Sprintf("using service discovery for %s process type", typ), "service", proc.Service)
		events := make(chan *discoverd.Event)
		stream, err := discoverd.NewService(proc.Service).Watch(events)
		if err != nil {
			log.Error("error creating service discovery watcher", "service", proc.Service, "err", err)
			return err
		}
		defer stream.Close()

	outer:
		for {
			select {
			case <-d.stop:
				return worker.ErrStopped
			case event, ok := <-events:
				if !ok {
					log.Error("error creating service discovery watcher, channel closed", "service", proc.Service)
					return fmt.Errorf("deployer: could not create watcher for service: %s", proc.Service)
				}
				switch event.Kind {
				case discoverd.EventKindCurrent:
					break outer
				case discoverd.EventKindServiceMeta:
					d.serviceMeta = event.ServiceMeta
				case discoverd.EventKindUp:
					releaseID, ok := event.Instance.Meta["FLYNN_RELEASE_ID"]
					if !ok {
						continue
					}
					switch releaseID {
					case d.OldReleaseID:
						d.oldReleaseState[typ]++
					case d.NewReleaseID:
						d.newReleaseState[typ]++
					}
				}
			case <-time.After(5 * time.Second):
				log.Error("error creating service discovery watcher, timeout reached", "service", proc.Service)
				return fmt.Errorf("deployer: could not create watcher for service: %s", proc.Service)
			}
		}
		go func() {
			for {
				event, ok := <-events
				if !ok {
					// this usually means deferred cleanup is in progress, but send an error
					// in case the deploy is still waiting for an event which will now not come.
					d.JobEventErr(errors.New("unexpected close of service event stream"))
					return
				}
				if event.Instance == nil {
					continue
				}
				if id, ok := event.Instance.Meta["FLYNN_APP_ID"]; !ok || id != d.AppID {
					continue
				}
				releaseID, ok := event.Instance.Meta["FLYNN_RELEASE_ID"]
				if !ok {
					continue
				}
				d.ReleaseJobEvents(releaseID) <- &JobEvent{
					Type:           JobEventTypeDiscoverd,
					DiscoverdEvent: event,
				}
			}
		}()
	}

	log.Info("getting job event stream")
	jobEvents := make(chan *ct.Job)
	stream, err := d.client.StreamJobEvents(d.AppID, jobEvents)
	if err != nil {
		log.Error("error getting job event stream", "err", err)
		return err
	}
	defer stream.Close()
	go func() {
		for {
			event, ok := <-jobEvents
			if !ok {
				d.JobEventErr(errors.New("unexpected close of job event stream"))
				return
			}
			d.ReleaseJobEvents(event.ReleaseID) <- &JobEvent{
				Type:     JobEventTypeController,
				JobEvent: event,
			}
		}
	}()

	log.Info("getting current jobs")
	jobs, err := d.client.JobList(d.AppID)
	if err != nil {
		log.Error("error getting current jobs", "err", err)
		return err
	}
	for _, job := range jobs {
		if job.State != ct.JobStateUp {
			continue
		}
		if _, ok := d.useJobEvents[job.Type]; !ok {
			continue
		}

		// track the jobs so we can drop any events received between
		// connecting the job stream and getting the list of jobs
		d.knownJobStates[jobIDState{job.ID, ct.JobStateUp}] = struct{}{}

		switch job.ReleaseID {
		case d.OldReleaseID:
			d.oldReleaseState[job.Type]++
		case d.NewReleaseID:
			d.newReleaseState[job.Type]++
		}
	}

	log.Info(
		"determined deployment state",
		"original", d.Processes,
		"old_release", d.oldReleaseState,
		"new_release", d.newReleaseState,
	)
	return deployFunc()
}
Example #21
0
func runDaemon(args *docopt.Args) {
	hostname, _ := os.Hostname()
	externalIP := args.String["--external-ip"]
	stateFile := args.String["--state"]
	hostID := args.String["--id"]
	force := args.Bool["--force"]
	volPath := args.String["--volpath"]
	backendName := args.String["--backend"]
	flynnInit := args.String["--flynn-init"]
	nsumount := args.String["--nsumount"]
	logDir := args.String["--log-dir"]
	discoveryToken := args.String["--discovery"]

	var peerIPs []string
	if args.String["--peer-ips"] != "" {
		peerIPs = strings.Split(args.String["--peer-ips"], ",")
	}

	grohl.AddContext("app", "host")
	grohl.Log(grohl.Data{"at": "start"})
	g := grohl.NewContext(grohl.Data{"fn": "main"})

	if hostID == "" {
		hostID = strings.Replace(hostname, "-", "", -1)
	}
	if strings.Contains(hostID, "-") {
		shutdown.Fatal("host id must not contain dashes")
	}
	if externalIP == "" {
		var err error
		externalIP, err = config.DefaultExternalIP()
		if err != nil {
			shutdown.Fatal(err)
		}
	}

	publishAddr := net.JoinHostPort(externalIP, "1113")
	if discoveryToken != "" {
		// TODO: retry
		discoveryID, err := discovery.RegisterInstance(discovery.Info{
			ClusterURL:  discoveryToken,
			InstanceURL: "http://" + publishAddr,
			Name:        hostID,
		})
		if err != nil {
			g.Log(grohl.Data{"at": "register_discovery", "status": "error", "err": err.Error()})
			shutdown.Fatal(err)
		}
		g.Log(grohl.Data{"at": "register_discovery", "id": discoveryID})
	}

	state := NewState(hostID, stateFile)
	var backend Backend
	var err error

	// create volume manager
	vman, err := volumemanager.New(
		filepath.Join(volPath, "volumes.bolt"),
		func() (volume.Provider, error) {
			// use a zpool backing file size of either 70% of the device on which
			// volumes will reside, or 100GB if that can't be determined.
			var size int64
			var dev syscall.Statfs_t
			if err := syscall.Statfs(volPath, &dev); err == nil {
				size = (dev.Bsize * int64(dev.Blocks) * 7) / 10
			} else {
				size = 100000000000
			}
			g.Log(grohl.Data{"at": "zpool_size", "size": size})

			return zfsVolume.NewProvider(&zfsVolume.ProviderConfig{
				DatasetName: "flynn-default",
				Make: &zfsVolume.MakeDev{
					BackingFilename: filepath.Join(volPath, "zfs/vdev/flynn-default-zpool.vdev"),
					Size:            size,
				},
				WorkingDir: filepath.Join(volPath, "zfs"),
			})
		},
	)
	if err != nil {
		shutdown.Fatal(err)
	}

	mux := logmux.New(1000)
	shutdown.BeforeExit(func() { mux.Close() })

	switch backendName {
	case "libvirt-lxc":
		backend, err = NewLibvirtLXCBackend(state, vman, logDir, flynnInit, nsumount, mux)
	default:
		log.Fatalf("unknown backend %q", backendName)
	}
	if err != nil {
		shutdown.Fatal(err)
	}
	backend.SetDefaultEnv("EXTERNAL_IP", externalIP)

	discoverdManager := NewDiscoverdManager(backend, mux, hostID, publishAddr)
	publishURL := "http://" + publishAddr
	host := &Host{
		id:      hostID,
		url:     publishURL,
		state:   state,
		backend: backend,
		status:  &host.HostStatus{ID: hostID, URL: publishURL},
	}

	// stopJobs stops all jobs, leaving discoverd until the end so other
	// jobs can unregister themselves on shutdown.
	stopJobs := func() (err error) {
		var except []string
		host.statusMtx.RLock()
		if host.status.Discoverd != nil && host.status.Discoverd.JobID != "" {
			except = []string{host.status.Discoverd.JobID}
		}
		host.statusMtx.RUnlock()
		if err := backend.Cleanup(except); err != nil {
			return err
		}
		for _, id := range except {
			if e := backend.Stop(id); e != nil {
				err = e
			}
		}
		return
	}

	resurrect, err := state.Restore(backend)
	if err != nil {
		shutdown.Fatal(err)
	}
	shutdown.BeforeExit(func() {
		// close discoverd before stopping jobs so we can unregister first
		discoverdManager.Close()
		stopJobs()
	})
	shutdown.BeforeExit(func() {
		if err := state.MarkForResurrection(); err != nil {
			log.Print("error marking for resurrection", err)
		}
	})

	if err := serveHTTP(
		host,
		&attachHandler{state: state, backend: backend},
		cluster.NewClient(),
		vman,
		discoverdManager.ConnectLocal,
	); err != nil {
		shutdown.Fatal(err)
	}

	if force {
		if err := stopJobs(); err != nil {
			shutdown.Fatal(err)
		}
	}

	if discoveryToken != "" {
		instances, err := discovery.GetCluster(discoveryToken)
		if err != nil {
			// TODO(titanous): retry?
			shutdown.Fatal(err)
		}
		peerIPs = make([]string, 0, len(instances))
		for _, inst := range instances {
			u, err := url.Parse(inst.URL)
			if err != nil {
				continue
			}
			ip, _, err := net.SplitHostPort(u.Host)
			if err != nil || ip == externalIP {
				continue
			}
			peerIPs = append(peerIPs, ip)
		}
	}
	if err := discoverdManager.ConnectPeer(peerIPs); err != nil {
		// No peers have working discoverd, so resurrect any available jobs
		resurrect()
	}

	<-make(chan struct{})
}
Example #22
0
func runUpdate(args *docopt.Args) error {
	log := log15.New()

	// create and update a TUF client
	log.Info("initializing TUF client")
	local, err := tuf.FileLocalStore(args.String["--tuf-db"])
	if err != nil {
		log.Error("error creating local TUF client", "err", err)
		return err
	}
	remote, err := tuf.HTTPRemoteStore(args.String["--repository"], tufHTTPOpts("updater"))
	if err != nil {
		log.Error("error creating remote TUF client", "err", err)
		return err
	}
	client := tuf.NewClient(local, remote)

	if !args.Bool["--is-latest"] {
		return updateAndExecLatest(args.String["--config-dir"], client, log)
	}

	// unlink the current binary if it is a temp file
	if args.Bool["--is-tempfile"] {
		os.Remove(os.Args[0])
	}

	// read the TUF db so we can pass it to hosts
	log.Info("reading TUF database")
	tufDB, err := ioutil.ReadFile(args.String["--tuf-db"])
	if err != nil {
		log.Error("error reading the TUF database", "err", err)
		return err
	}

	log.Info("getting host list")
	clusterClient := cluster.NewClient()
	hosts, err := clusterClient.Hosts()
	if err != nil {
		log.Error("error getting host list", "err", err)
		return err
	}
	if len(hosts) == 0 {
		return errors.New("no hosts found")
	}

	log.Info(fmt.Sprintf("updating %d hosts", len(hosts)))

	// eachHost invokes the given function in a goroutine for each host,
	// returning an error if any of the functions returns an error.
	eachHost := func(f func(*cluster.Host, log15.Logger) error) (err error) {
		errs := make(chan error)
		for _, h := range hosts {
			go func(host *cluster.Host) {
				log := log.New("host", host.ID())
				errs <- f(host, log)
			}(h)
		}
		for range hosts {
			if e := <-errs; e != nil {
				err = e
			}
		}
		return
	}

	var mtx sync.Mutex
	images := make(map[string]string)
	log.Info("pulling latest images on all hosts")
	if err := eachHost(func(host *cluster.Host, log log15.Logger) error {
		log.Info("pulling images")
		ch := make(chan *layer.PullInfo)
		stream, err := host.PullImages(
			args.String["--repository"],
			args.String["--driver"],
			args.String["--root"],
			version.String(),
			bytes.NewReader(tufDB),
			ch,
		)
		if err != nil {
			log.Error("error pulling images", "err", err)
			return err
		}
		defer stream.Close()
		for info := range ch {
			if info.Type == layer.TypeLayer {
				continue
			}
			log.Info("pulled image", "name", info.Repo)
			imageURI := fmt.Sprintf("%s?name=%s&id=%s", args.String["--repository"], info.Repo, info.ID)
			mtx.Lock()
			images[info.Repo] = imageURI
			mtx.Unlock()
		}
		if err := stream.Err(); err != nil {
			log.Error("error pulling images", "err", err)
			return err
		}
		return nil
	}); err != nil {
		return err
	}

	var binaries map[string]string
	log.Info("pulling latest binaries and config on all hosts")
	if err := eachHost(func(host *cluster.Host, log log15.Logger) error {
		log.Info("pulling binaries and config")
		paths, err := host.PullBinariesAndConfig(
			args.String["--repository"],
			args.String["--bin-dir"],
			args.String["--config-dir"],
			version.String(),
			bytes.NewReader(tufDB),
		)
		if err != nil {
			log.Error("error pulling binaries and config", "err", err)
			return err
		}
		mtx.Lock()
		binaries = paths
		mtx.Unlock()
		log.Info("binaries and config pulled successfully")
		return nil
	}); err != nil {
		return err
	}

	log.Info("validating binaries")
	flynnHost, ok := binaries["flynn-host"]
	if !ok {
		return fmt.Errorf("missing flynn-host binary")
	}
	flynnInit, ok := binaries["flynn-init"]
	if !ok {
		return fmt.Errorf("missing flynn-init binary")
	}
	flynnNSUmount, ok := binaries["flynn-nsumount"]
	if !ok {
		return fmt.Errorf("missing flynn-nsumount binary")
	}

	log.Info("updating flynn-host daemon on all hosts")
	if err := eachHost(func(host *cluster.Host, log log15.Logger) error {
		// TODO(lmars): handle daemons using custom flags (e.g. --state=/foo)
		_, err := host.Update(
			flynnHost,
			"daemon",
			"--id", host.ID(),
			"--flynn-init", flynnInit,
			"--nsumount", flynnNSUmount,
		)
		if err != nil {
			log.Error("error updating binaries", "err", err)
			return err
		}
		log.Info("flynn-host updated successfully")
		return nil
	}); err != nil {
		return err
	}

	updaterImage, ok := images["flynn/updater"]
	if !ok {
		e := "missing flynn/updater image"
		log.Error(e)
		return errors.New(e)
	}
	imageJSON, err := json.Marshal(images)
	if err != nil {
		log.Error("error encoding images", "err", err)
		return err
	}

	// use a flag to determine whether to use a TTY log formatter because actually
	// assigning a TTY to the job causes reading images via stdin to fail.
	cmd := exec.Command(exec.DockerImage(updaterImage), fmt.Sprintf("--tty=%t", term.IsTerminal(os.Stdout.Fd())))
	cmd.Stdin = bytes.NewReader(imageJSON)
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	if err := cmd.Run(); err != nil {
		return err
	}
	log.Info("update complete")
	return nil
}
Example #23
0
File: job.go Project: BobbWu/flynn
func (d *DeployJob) Perform() error {
	log := d.logger.New("fn", "Perform", "deployment_id", d.ID, "app_id", d.AppID)

	log.Info("validating deployment strategy")
	var deployFunc func() error
	switch d.Strategy {
	case "one-by-one":
		deployFunc = d.deployOneByOne
	case "all-at-once":
		deployFunc = d.deployAllAtOnce
	case "postgres":
		deployFunc = d.deployPostgres
	default:
		err := UnknownStrategyError{d.Strategy}
		log.Error("error validating deployment strategy", "err", err)
		return err
	}

	log.Info("determining cluster size")
	hosts, err := cluster.NewClient().Hosts()
	if err != nil {
		log.Error("error listing cluster hosts", "err", err)
		return err
	}
	d.hostCount = len(hosts)

	log.Info("determining release services and deployment state")
	release, err := d.client.GetRelease(d.NewReleaseID)
	if err != nil {
		log.Error("error getting new release", "release_id", d.NewReleaseID, "err", err)
		return err
	}
	for typ, proc := range release.Processes {
		if proc.Omni {
			d.omni[typ] = struct{}{}
		}
		if proc.Service == "" {
			log.Info(fmt.Sprintf("using job events for %s process type, no service defined", typ))
			d.useJobEvents[typ] = struct{}{}
			continue
		}

		log.Info(fmt.Sprintf("using service discovery for %s process type", typ), "service", proc.Service)
		events := make(chan *discoverd.Event)
		stream, err := discoverd.NewService(proc.Service).Watch(events)
		if err != nil {
			log.Error("error creating service discovery watcher", "service", proc.Service, "err", err)
			return err
		}
		defer stream.Close()

	outer:
		for {
			select {
			case <-d.stop:
				return ErrStopped
			case event, ok := <-events:
				if !ok {
					log.Error("error creating service discovery watcher, channel closed", "service", proc.Service)
					return fmt.Errorf("deployer: could not create watcher for service: %s", proc.Service)
				}
				switch event.Kind {
				case discoverd.EventKindCurrent:
					break outer
				case discoverd.EventKindServiceMeta:
					d.serviceMeta = event.ServiceMeta
				case discoverd.EventKindUp:
					releaseID, ok := event.Instance.Meta["FLYNN_RELEASE_ID"]
					if !ok {
						continue
					}
					switch releaseID {
					case d.OldReleaseID:
						d.oldReleaseState[typ]++
					case d.NewReleaseID:
						d.newReleaseState[typ]++
					}
				}
			case <-time.After(5 * time.Second):
				log.Error("error creating service discovery watcher, timeout reached", "service", proc.Service)
				return fmt.Errorf("deployer: could not create watcher for service: %s", proc.Service)
			}
		}
		go func() {
			for {
				event, ok := <-events
				if !ok {
					// if this happens, it means defer cleanup is in progress

					// TODO: this could also happen if the stream connection
					// dropped. handle that case
					return
				}
				d.serviceEvents <- event
			}
		}()
	}

	log.Info("getting job event stream")
	d.jobEvents = make(chan *ct.Job)
	stream, err := d.client.StreamJobEvents(d.AppID, d.jobEvents)
	if err != nil {
		log.Error("error getting job event stream", "err", err)
		return err
	}
	defer stream.Close()

	log.Info("getting current jobs")
	jobs, err := d.client.JobList(d.AppID)
	if err != nil {
		log.Error("error getting current jobs", "err", err)
		return err
	}
	for _, job := range jobs {
		if job.State != "up" {
			continue
		}
		if _, ok := d.useJobEvents[job.Type]; !ok {
			continue
		}

		// track the jobs so we can drop any events received between
		// connecting the job stream and getting the list of jobs
		d.knownJobStates[jobIDState{job.ID, "up"}] = struct{}{}

		switch job.ReleaseID {
		case d.OldReleaseID:
			d.oldReleaseState[job.Type]++
		case d.NewReleaseID:
			d.newReleaseState[job.Type]++
		}
	}

	log.Info(
		"determined deployment state",
		"original", d.Processes,
		"old_release", d.oldReleaseState,
		"new_release", d.newReleaseState,
	)
	return deployFunc()
}
Example #24
0
func main() {
	defer shutdown.Exit()

	port := os.Getenv("PORT")
	if port == "" {
		port = "3000"
	}
	addr := ":" + port

	if seed := os.Getenv("NAME_SEED"); seed != "" {
		s, err := hex.DecodeString(seed)
		if err != nil {
			log.Fatalln("error decoding NAME_SEED:", err)
		}
		name.SetSeed(s)
	}

	db := postgres.Wait("", "")

	if err := migrateDB(db.DB); err != nil {
		shutdown.Fatal(err)
	}

	pgxcfg, err := pgx.ParseURI(fmt.Sprintf("http://%s:%s@%s/%s", os.Getenv("PGUSER"), os.Getenv("PGPASSWORD"), db.Addr(), os.Getenv("PGDATABASE")))
	if err != nil {
		log.Fatal(err)
	}
	pgxcfg.Dial = dialer.Retry.Dial

	pgxpool, err := pgx.NewConnPool(pgx.ConnPoolConfig{
		ConnConfig:   pgxcfg,
		AfterConnect: que.PrepareStatements,
	})
	if err != nil {
		log.Fatal(err)
	}
	shutdown.BeforeExit(func() { pgxpool.Close() })

	lc, err := logaggc.New("")
	if err != nil {
		shutdown.Fatal(err)
	}
	rc := routerc.New()

	hb, err := discoverd.DefaultClient.AddServiceAndRegisterInstance("flynn-controller", &discoverd.Instance{
		Addr:  addr,
		Proto: "http",
		Meta: map[string]string{
			"AUTH_KEY": os.Getenv("AUTH_KEY"),
		},
	})
	if err != nil {
		shutdown.Fatal(err)
	}

	shutdown.BeforeExit(func() {
		hb.Close()
	})

	handler := appHandler(handlerConfig{
		db:      db,
		cc:      clusterClientWrapper{cluster.NewClient()},
		lc:      lc,
		rc:      rc,
		pgxpool: pgxpool,
		keys:    strings.Split(os.Getenv("AUTH_KEY"), ","),
	})
	shutdown.Fatal(http.ListenAndServe(addr, handler))
}
Example #25
0
	"log"
	"os"
	"path"
	"regexp"
	"strings"
	"time"

	"github.com/flynn/flynn/controller/client"
	ct "github.com/flynn/flynn/controller/types"
	"github.com/flynn/flynn/host/types"
	"github.com/flynn/flynn/pkg/cluster"
	"github.com/flynn/flynn/pkg/exec"
	"github.com/flynn/flynn/pkg/random"
)

var clusterc = cluster.NewClient()

func init() {
	log.SetFlags(0)
}

var typesPattern = regexp.MustCompile("types.* -> (.+)\n")

const blobstoreURL = "http://blobstore.discoverd"
const scaleTimeout = 20 * time.Second

func main() {
	client, err := controller.NewClient("", os.Getenv("CONTROLLER_KEY"))
	if err != nil {
		log.Fatalln("Unable to connect to controller:", err)
	}