Exemple #1
0
func (c *context) watchFormations(events chan<- *FormationEvent, hostEvents chan<- *host.Event) {
	g := grohl.NewContext(grohl.Data{"fn": "watchFormations"})

	c.syncCluster(hostEvents)
	if events != nil {
		events <- &FormationEvent{}
	}

	var attempts int
	var lastUpdatedAt time.Time
	for {
		// wait a second if we've tried more than once
		attempts++
		if attempts > 1 {
			time.Sleep(time.Second)
		}

		g.Log(grohl.Data{"at": "connect", "attempt": attempts})
		updates, err := c.StreamFormations(&lastUpdatedAt)
		for ef := range updates.Chan {
			// we are now connected so reset attempts
			attempts = 0

			if ef.App == nil {
				// sentinel
				continue
			}
			lastUpdatedAt = ef.UpdatedAt
			f := c.formations.Get(ef.App.ID, ef.Release.ID)
			if f != nil {
				g.Log(grohl.Data{"app.id": ef.App.ID, "release.id": ef.Release.ID, "at": "update"})
				f.SetProcesses(ef.Processes)
			} else {
				g.Log(grohl.Data{"app.id": ef.App.ID, "release.id": ef.Release.ID, "at": "new"})
				f = NewFormation(c, ef)
				c.formations.Add(f)
			}
			// check for omnipresence
			for _, proctype := range f.Release.Processes {
				if proctype.Omni {
					c.omniMtx.Lock()
					c.omni[f] = struct{}{}
					c.omniMtx.Unlock()
					break
				}
			}
			go func() {
				f.Rectify()
				if events != nil {
					events <- &FormationEvent{Formation: f}
				}
			}()
		}
		if *err != nil {
			g.Log(grohl.Data{"at": "error", "error": *err})
		}
		g.Log(grohl.Data{"at": "disconnect"})
		updates.Close()
	}
}
Exemple #2
0
func (f *Formation) remove(n int, name string, hostID string) {
	g := grohl.NewContext(grohl.Data{"fn": "remove", "app.id": f.AppID, "release.id": f.Release.ID})

	i := 0
	sj := make(sortJobs, 0, len(f.jobs[name]))
	for _, job := range f.jobs[name] {
		sj = append(sj, job)
	}
	sj.Sort()
	for _, job := range sj {
		g.Log(grohl.Data{"host.id": job.HostID, "job.id": job.ID})
		if hostID != "" && job.HostID != hostID { // remove from a specific host
			continue
		}
		// TODO: robust host handling
		if err := f.c.hosts.Get(job.HostID).StopJob(job.ID); err != nil {
			g.Log(grohl.Data{"at": "error", "err": err.Error()})
			// TODO: handle error
		}
		f.jobs.Remove(job)
		if i++; i == n {
			break
		}
	}
}
func (c *libvirtContainer) cleanup() error {
	g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "cleanup", "job.id": c.job.ID})
	g.Log(grohl.Data{"at": "start"})

	if err := syscall.Unmount(filepath.Join(c.RootPath, ".containerinit"), 0); err != nil {
		g.Log(grohl.Data{"at": "unmount", "file": ".containerinit", "status": "error", "err": err})
	}
	if err := syscall.Unmount(filepath.Join(c.RootPath, "etc/resolv.conf"), 0); err != nil {
		g.Log(grohl.Data{"at": "unmount", "file": "resolv.conf", "status": "error", "err": err})
	}
	if err := pinkerton.Cleanup(c.job.ID); err != nil {
		g.Log(grohl.Data{"at": "pinkerton", "status": "error", "err": err})
	}
	for _, m := range c.job.Config.Mounts {
		if err := syscall.Unmount(filepath.Join(c.RootPath, m.Location), 0); err != nil {
			g.Log(grohl.Data{"at": "unmount", "location": m.Location, "status": "error", "err": err})
		}
	}
	for _, p := range c.job.Config.Ports {
		if err := c.l.forwarder.Remove(&net.TCPAddr{IP: c.IP, Port: p.Port}, p.RangeEnd, p.Proto); err != nil {
			g.Log(grohl.Data{"at": "iptables", "status": "error", "err": err, "port": p.Port})
		}
		c.l.ports[p.Proto].Put(uint16(p.Port))
	}
	ipallocator.ReleaseIP(defaultNet, &c.IP)
	g.Log(grohl.Data{"at": "finish"})
	return nil
}
Exemple #4
0
func (m *LogMux) drainTo(w io.Writer) {
	defer close(m.donec)

	g := grohl.NewContext(grohl.Data{"at": "logmux_drain"})

	for {
		msg, ok := <-m.logc
		if !ok {
			return // shutdown
		}

		_, err := w.Write(rfc6587.Bytes(msg))
		if err != nil {
			g.Log(grohl.Data{"status": "error", "err": err.Error()})

			// write logs to local logger when the writer fails
			g.Log(grohl.Data{"msg": msg.String()})
			for msg := range m.logc {
				g.Log(grohl.Data{"msg": msg.String()})
			}

			return // shutdown
		}
	}
}
func (c *libvirtContainer) cleanup() error {
	g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "cleanup", "job.id": c.job.ID})
	g.Log(grohl.Data{"at": "start"})

	if err := syscall.Unmount(filepath.Join(c.RootPath, ".containerinit"), 0); err != nil {
		g.Log(grohl.Data{"at": "unmount", "file": ".containerinit", "status": "error", "err": err})
	}
	if err := syscall.Unmount(filepath.Join(c.RootPath, "etc/resolv.conf"), 0); err != nil {
		g.Log(grohl.Data{"at": "unmount", "file": "resolv.conf", "status": "error", "err": err})
	}
	if err := c.l.pinkerton.Cleanup(c.job.ID); err != nil {
		g.Log(grohl.Data{"at": "pinkerton", "status": "error", "err": err})
	}
	for _, m := range c.job.Config.Mounts {
		if err := syscall.Unmount(filepath.Join(c.RootPath, m.Location), 0); err != nil {
			g.Log(grohl.Data{"at": "unmount", "location": m.Location, "status": "error", "err": err})
		}
	}
	for _, v := range c.job.Config.Volumes {
		if err := syscall.Unmount(filepath.Join(c.RootPath, v.Target), 0); err != nil {
			g.Log(grohl.Data{"at": "unmount", "target": v.Target, "volumeID": v.VolumeID, "status": "error", "err": err})
		}
	}
	if !c.job.Config.HostNetwork && c.l.bridgeNet != nil {
		c.l.ipalloc.ReleaseIP(c.l.bridgeNet, c.IP)
	}
	g.Log(grohl.Data{"at": "finish"})
	return nil
}
func (l *LibvirtLXCBackend) Cleanup() error {
	g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "Cleanup"})
	l.containersMtx.Lock()
	ids := make([]string, 0, len(l.containers))
	for id := range l.containers {
		ids = append(ids, id)
	}
	l.containersMtx.Unlock()
	g.Log(grohl.Data{"at": "start", "count": len(ids)})
	errs := make(chan error)
	for _, id := range ids {
		go func(id string) {
			g.Log(grohl.Data{"at": "stop", "job.id": id})
			err := l.Stop(id)
			if err != nil {
				g.Log(grohl.Data{"at": "error", "job.id": id, "err": err.Error()})
			}
			errs <- err
		}(id)
	}
	var err error
	for i := 0; i < len(ids); i++ {
		stopErr := <-errs
		if stopErr != nil {
			err = stopErr
		}
	}
	g.Log(grohl.Data{"at": "finish"})
	return err
}
// waitExit waits for the libvirt domain to be marked as done or five seconds to
// elapse
func (c *libvirtContainer) waitExit() {
	g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "waitExit", "job.id": c.job.ID})
	g.Log(grohl.Data{"at": "start"})
	domain, err := c.l.libvirt.LookupDomainByName(c.job.ID)
	if err != nil {
		g.Log(grohl.Data{"at": "domain_error", "err": err.Error()})
		return
	}
	defer domain.Free()

	maxWait := time.After(5 * time.Second)
	for {
		state, err := domain.GetState()
		if err != nil {
			g.Log(grohl.Data{"at": "state_error", "err": err.Error()})
			return
		}
		if state[0] != libvirt.VIR_DOMAIN_RUNNING && state[0] != libvirt.VIR_DOMAIN_SHUTDOWN {
			g.Log(grohl.Data{"at": "done"})
			return
		}
		select {
		case <-maxWait:
			g.Log(grohl.Data{"at": "maxWait"})
			return
		default:
			time.Sleep(100 * time.Millisecond)
		}
	}
}
Exemple #8
0
func (h *attachHandler) attach(req *host.AttachReq, conn io.ReadWriteCloser) {
	defer conn.Close()

	g := grohl.NewContext(grohl.Data{"fn": "attach", "job.id": req.JobID})
	g.Log(grohl.Data{"at": "start"})
	attachWait := make(chan struct{})
	job := h.state.AddAttacher(req.JobID, attachWait)
	if job == nil {
		defer h.state.RemoveAttacher(req.JobID, attachWait)
		if _, err := conn.Write([]byte{host.AttachWaiting}); err != nil {
			return
		}
		// TODO: add timeout
		<-attachWait
		job = h.state.GetJob(req.JobID)
	}

	success := make(chan struct{})
	failed := make(chan struct{})
	opts := &AttachRequest{
		Job:        job,
		Logs:       req.Flags&host.AttachFlagLogs != 0,
		Stream:     req.Flags&host.AttachFlagStream != 0,
		Height:     req.Height,
		Width:      req.Width,
		Attached:   success,
		ReadWriter: conn,
		Streams:    make([]string, 0, 3),
	}
	if req.Flags&host.AttachFlagStdin != 0 {
		opts.Streams = append(opts.Streams, "stdin")
	}
	if req.Flags&host.AttachFlagStdout != 0 {
		opts.Streams = append(opts.Streams, "stdout")
	}
	if req.Flags&host.AttachFlagStderr != 0 {
		opts.Streams = append(opts.Streams, "stderr")
	}

	go func() {
		select {
		case <-success:
			conn.Write([]byte{host.AttachSuccess})
			close(success)
		case <-failed:
		}
		close(attachWait)
	}()
	if err := h.backend.Attach(opts); err != nil {
		select {
		case <-success:
		default:
			close(failed)
			conn.Write(append([]byte{host.AttachError}, err.Error()...))
		}
		g.Log(grohl.Data{"status": "error", "err": err})
		return
	}
	g.Log(grohl.Data{"at": "finish"})
}
Exemple #9
0
func (c *context) watchFormations() {
	g := grohl.NewContext(grohl.Data{"fn": "watchFormations"})

	c.syncCluster()

	var attempts int
	var lastUpdatedAt time.Time
	for {
		// wait a second if we've tried more than once
		attempts++
		if attempts > 1 {
			time.Sleep(time.Second)
		}

		g.Log(grohl.Data{"at": "connect", "attempt": attempts})
		updates := make(chan *ct.ExpandedFormation)
		streamCtrl, err := c.StreamFormations(&lastUpdatedAt, updates)
		if err != nil {
			g.Log(grohl.Data{"at": "error", "error": err})
			continue
		}
		for ef := range updates {
			// we are now connected so reset attempts
			attempts = 0

			if ef.App == nil {
				// sentinel
				continue
			}
			lastUpdatedAt = ef.UpdatedAt
			f := c.formations.Get(ef.App.ID, ef.Release.ID)
			if f != nil {
				g.Log(grohl.Data{"app.id": ef.App.ID, "release.id": ef.Release.ID, "at": "update"})
				f.SetProcesses(ef.Processes)
			} else {
				g.Log(grohl.Data{"app.id": ef.App.ID, "release.id": ef.Release.ID, "at": "new"})
				f = NewFormation(c, ef)
				c.formations.Add(f)
			}
			// check for omnipresence
			for _, proctype := range f.Release.Processes {
				if proctype.Omni {
					c.omniMtx.Lock()
					c.omni[f] = struct{}{}
					c.omniMtx.Unlock()
					break
				}
			}
			go f.Rectify()
		}
		if streamCtrl.Err() != nil {
			g.Log(grohl.Data{"at": "disconnect", "err": streamCtrl.Err()})
		}
		g.Log(grohl.Data{"at": "disconnect"})
	}
}
Exemple #10
0
func (f *Formation) add(n int, name string, hostID string) {
	g := grohl.NewContext(grohl.Data{"fn": "add", "app.id": f.AppID, "release.id": f.Release.ID})
	for i := 0; i < n; i++ {
		job, err := f.start(name, hostID)
		if err != nil {
			// TODO: log/handle error
			continue
		}
		g.Log(grohl.Data{"host.id": job.HostID, "job.id": job.ID})
	}
}
func (l *LibvirtLXCBackend) OpenLogs(buffers host.LogBuffers) error {
	l.containersMtx.RLock()
	defer l.containersMtx.RUnlock()
	for id, c := range l.containers {
		g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "OpenLogs", "job.id": id})
		if err := c.followLogs(g, buffers[id]); err != nil {
			return err
		}
	}
	return nil
}
Exemple #12
0
func (f *Formation) rectify() {
	g := grohl.NewContext(grohl.Data{"fn": "rectify", "app.id": f.AppID, "release.id": f.Release.ID})

	var hosts []*cluster.Host
	if _, ok := f.c.omni[f]; ok {
		hosts = f.c.hosts.List()
	}
	// update job counts
	for t, expected := range f.Processes {
		if f.Release.Processes[t].Omni {
			// get job counts per host
			hostCounts := make(map[string]int, len(hosts))
			for _, h := range hosts {
				hostCounts[h.ID()] = 0
			}
			for k := range f.jobs[t] {
				hostCounts[k.hostID]++
			}
			// update per host
			for hostID, actual := range hostCounts {
				diff := expected - actual
				g.Log(grohl.Data{"at": "update", "type": t, "expected": expected, "actual": actual, "diff": diff})
				if diff > 0 {
					f.add(diff, t, hostID)
				} else if diff < 0 {
					f.remove(-diff, t, hostID)
				}
			}
		} else {
			actual := len(f.jobs[t])
			diff := expected - actual
			g.Log(grohl.Data{"at": "update", "type": t, "expected": expected, "actual": actual, "diff": diff})
			if diff > 0 {
				f.add(diff, t, "")
			} else if diff < 0 {
				f.remove(-diff, t, "")
			}
		}
	}

	// remove process types
	for t, jobs := range f.jobs {
		// ignore one-off jobs which have no type
		if t == "" {
			continue
		}
		if _, exists := f.Processes[t]; !exists {
			g.Log(grohl.Data{"at": "cleanup", "type": t, "count": len(jobs)})
			f.remove(len(jobs), t, "")
		}
	}
}
func (c *libvirtContainer) cleanup() error {
	g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "cleanup", "job.id": c.job.ID})
	g.Log(grohl.Data{"at": "start"})

	c.unbindMounts()
	if err := c.l.pinkerton.Cleanup(c.job.ID); err != nil {
		g.Log(grohl.Data{"at": "pinkerton", "status": "error", "err": err})
	}
	if !c.job.Config.HostNetwork && c.l.bridgeNet != nil {
		c.l.ipalloc.ReleaseIP(c.l.bridgeNet, c.IP)
	}
	g.Log(grohl.Data{"at": "finish"})
	return nil
}
func (l *LibvirtLXCBackend) CloseLogs() (host.LogBuffers, error) {
	g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "CloseLogs"})
	l.logStreamMtx.Lock()
	defer l.logStreamMtx.Unlock()
	buffers := make(host.LogBuffers, len(l.logStreams))
	for id, streams := range l.logStreams {
		g.Log(grohl.Data{"job.id": id})
		buffer := make(host.LogBuffer, len(streams))
		for fd, stream := range streams {
			buffer[fd] = stream.Close()
		}
		buffers[id] = buffer
		delete(l.logStreams, id)
	}
	return buffers, nil
}
Exemple #15
0
func (f *Formation) restart(stoppedJob *Job) error {
	g := grohl.NewContext(grohl.Data{"fn": "restart", "app.id": f.AppID, "release.id": f.Release.ID})
	g.Log(grohl.Data{"old.host.id": stoppedJob.HostID, "old.job.id": stoppedJob.ID})

	f.jobs.Remove(stoppedJob)

	var hostID string
	if f.Release.Processes[stoppedJob.Type].Omni {
		hostID = stoppedJob.HostID
	}
	newJob, err := f.start(stoppedJob.Type, hostID)
	if err != nil {
		return err
	}
	newJob.restarts = stoppedJob.restarts + 1
	g.Log(grohl.Data{"new.host.id": newJob.HostID, "new.job.id": newJob.ID})
	return nil
}
Exemple #16
0
func (f *Formation) remove(n int, name string, hostID string) {
	g := grohl.NewContext(grohl.Data{"fn": "remove", "app.id": f.AppID, "release.id": f.Release.ID})

	i := 0
	for _, job := range f.jobs[name] {
		g.Log(grohl.Data{"host.id": job.HostID, "job.id": job.ID})
		if hostID != "" && job.HostID != hostID { // remove from a specific host
			continue
		}
		// TODO: robust host handling
		if err := f.c.hosts.Get(job.HostID).StopJob(job.ID); err != nil {
			// TODO: log/handle error
		}
		f.jobs.Remove(job)
		if i++; i == n {
			break
		}
	}
}
Exemple #17
0
func (m *LogMux) follow(r io.Reader, hdr *rfc5424.Header) {
	defer m.producerwg.Done()

	g := grohl.NewContext(grohl.Data{"at": "logmux_follow"})
	s := bufio.NewScanner(r)

	for s.Scan() {
		msg := rfc5424.NewMessage(hdr, s.Bytes())

		select {
		case m.logc <- msg:
		default:
			// throw away msg if logc buffer is full
		}
	}

	if s.Err() != nil {
		g.Log(grohl.Data{"status": "error", "err": s.Err()})
	}
}
func (c *libvirtContainer) unbindMounts() {
	g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "unbind_mounts", "job.id": c.job.ID})
	g.Log(grohl.Data{"at": "start"})

	if err := syscall.Unmount(filepath.Join(c.RootPath, ".containerinit"), 0); err != nil {
		g.Log(grohl.Data{"at": "unmount", "file": ".containerinit", "status": "error", "err": err})
	}
	if err := syscall.Unmount(filepath.Join(c.RootPath, "etc/resolv.conf"), 0); err != nil {
		g.Log(grohl.Data{"at": "unmount", "file": "resolv.conf", "status": "error", "err": err})
	}
	for _, m := range c.job.Config.Mounts {
		if err := syscall.Unmount(filepath.Join(c.RootPath, m.Location), 0); err != nil {
			g.Log(grohl.Data{"at": "unmount", "location": m.Location, "status": "error", "err": err})
		}
	}
	for _, v := range c.job.Config.Volumes {
		if err := syscall.Unmount(filepath.Join(c.RootPath, v.Target), 0); err != nil {
			g.Log(grohl.Data{"at": "unmount", "target": v.Target, "volumeID": v.VolumeID, "status": "error", "err": err})
		}
	}

	g.Log(grohl.Data{"at": "finish"})
}
Exemple #19
0
func (d *DockerBackend) Cleanup() error {
	g := grohl.NewContext(grohl.Data{"backend": "docker", "fn": "cleanup"})
	g.Log(grohl.Data{"at": "start"})
	containers, err := d.docker.ListContainers(docker.ListContainersOptions{})
	if err != nil {
		g.Log(grohl.Data{"at": "list", "status": "error", "err": err})
		return err
	}
outer:
	for _, c := range containers {
		for _, name := range c.Names {
			if strings.HasPrefix(name, "/flynn-") {
				g.Log(grohl.Data{"at": "kill", "container.id": c.ID, "container.name": name})
				if err := d.docker.KillContainer(docker.KillContainerOptions{ID: c.ID}); err != nil {
					g.Log(grohl.Data{"at": "kill", "container.id": c.ID, "container.name": name, "status": "error", "err": err})
				}
				continue outer
			}
		}
	}
	g.Log(grohl.Data{"at": "finish"})
	return nil
}
Exemple #20
0
func (c *context) syncJobStates() error {
	g := grohl.NewContext(grohl.Data{"fn": "syncJobStates"})
	g.Log(grohl.Data{"at": "appList"})
	apps, err := c.AppList()
	if err != nil {
		g.Log(grohl.Data{"at": "appList", "status": "error", "err": err})
		return err
	}
	for _, app := range apps {
		g.Log(grohl.Data{"at": "jobList", "app.id": app.ID})
		jobs, err := c.JobList(app.ID)
		if err != nil {
			g.Log(grohl.Data{"at": "jobList", "app.id": app.ID, "status": "error", "err": err})
			continue
		}
		for _, job := range jobs {
			gg := g.New(grohl.Data{"job.id": job.ID, "app.id": app.ID, "state": job.State})
			gg.Log(grohl.Data{"at": "checkState"})
			if job.State != "up" {
				continue
			}
			hostID, err := cluster.ExtractHostID(job.ID)
			if err != nil {
				gg.Log(grohl.Data{"at": "jobHostID", "status": "error", "err": err})
				continue
			}
			if j := c.jobs.Get(hostID, job.ID); j != nil {
				continue
			}
			job.State = "down"
			gg.Log(grohl.Data{"at": "putJob", "state": "down"})
			go c.PutJob(job)
		}
	}
	return nil
}
Exemple #21
0
func (m *manifestRunner) runManifest(r io.Reader) (map[string]*ManifestData, error) {
	g := grohl.NewContext(grohl.Data{"fn": "run_manifest"})
	var services []*manifestService
	if err := json.NewDecoder(r).Decode(&services); err != nil {
		return nil, err
	}

	serviceData := make(map[string]*ManifestData, len(services))

	m.state.mtx.Lock()
	for _, job := range m.state.jobs {
		if job.ManifestID == "" || job.Status != host.StatusRunning {
			continue
		}
		var service *manifestService
		for _, service = range services {
			if service.ID == job.ManifestID {
				break
			}
		}
		if service == nil {
			continue
		}
		g.Log(grohl.Data{"at": "restore", "service": service.ID, "job.id": job.Job.ID})

		data := &ManifestData{
			ExternalIP: m.externalAddr,
			InternalIP: job.InternalIP,
			Env:        job.Job.Config.Env,
			Services:   serviceData,
			ports:      m.ports["tcp"],
			readonly:   true,
		}
		data.TCPPorts = make([]int, 0, len(job.Job.Config.Ports))
		for _, p := range job.Job.Config.Ports {
			if p.Proto != "tcp" {
				continue
			}
			data.TCPPorts = append(data.TCPPorts, p.Port)
		}
		serviceData[service.ID] = data
	}
	m.state.mtx.Unlock()

	for _, service := range services {
		if _, exists := serviceData[service.ID]; exists {
			continue
		}

		data := &ManifestData{
			Env:        parseEnviron(),
			Services:   serviceData,
			ExternalIP: m.externalAddr,
			ports:      m.ports["tcp"],
		}

		// Add explicit tcp ports to data.TCPPorts
		for _, port := range service.TCPPorts {
			port, err := strconv.Atoi(port)
			if err != nil {
				return nil, err
			}
			data.TCPPorts = append(data.TCPPorts, port)
		}

		var buf bytes.Buffer

		interp := func(s string) (string, error) {
			t, err := template.New("arg").Parse(s)
			if err != nil {
				return "", err
			}
			if err := t.Execute(&buf, data); err != nil {
				return "", err
			}
			defer buf.Reset()
			return buf.String(), nil
		}

		args := make([]string, 0, len(service.Args))
		for _, arg := range service.Args {
			arg, err := interp(arg)
			if err != nil {
				return nil, err
			}
			if strings.TrimSpace(arg) == "" {
				continue
			}
			args = append(args, arg)
		}
		var err error
		for k, v := range service.Env {
			service.Env[k], err = interp(v)
			if err != nil {
				return nil, err
			}
		}
		data.Env = service.Env

		if service.Image == "" {
			service.Image = "https://registry.hub.docker.com/flynn/" + service.ID
		}
		if service.ImageID != "" {
			service.Image += "?id=" + service.ImageID
		}

		job := &host.Job{
			ID: cluster.RandomJobID("flynn-" + service.ID + "-"),
			Artifact: host.Artifact{
				Type: "docker",
				URI:  service.Image,
			},
			Config: host.ContainerConfig{
				Entrypoint: service.Entrypoint,
				Cmd:        args,
				Env:        data.Env,
			},
		}
		if job.Config.Env == nil {
			job.Config.Env = make(map[string]string)
		}
		job.Config.Env["EXTERNAL_IP"] = m.externalAddr

		job.Config.Ports = make([]host.Port, len(data.TCPPorts))
		for i, port := range data.TCPPorts {
			job.Config.Ports[i] = host.Port{Proto: "tcp", Port: port}
		}
		if len(job.Config.Ports) == 0 {
			job.Config.Ports = []host.Port{{Proto: "tcp"}}
		}

		if err := m.backend.Run(job); err != nil {
			return nil, err
		}

		m.state.SetManifestID(job.ID, service.ID)
		activeJob := m.state.GetJob(job.ID)
		data.InternalIP = activeJob.InternalIP
		data.readonly = true
		serviceData[service.ID] = data
	}

	return serviceData, nil
}
Exemple #22
0
func runDaemon(args *docopt.Args) {
	hostname, _ := os.Hostname()
	externalIP := args.String["--external-ip"]
	stateFile := args.String["--state"]
	hostID := args.String["--id"]
	force := args.Bool["--force"]
	volPath := args.String["--volpath"]
	backendName := args.String["--backend"]
	flynnInit := args.String["--flynn-init"]
	nsumount := args.String["--nsumount"]
	logDir := args.String["--log-dir"]
	discoveryToken := args.String["--discovery"]

	var peerIPs []string
	if args.String["--peer-ips"] != "" {
		peerIPs = strings.Split(args.String["--peer-ips"], ",")
	}

	grohl.AddContext("app", "host")
	grohl.Log(grohl.Data{"at": "start"})
	g := grohl.NewContext(grohl.Data{"fn": "main"})

	if hostID == "" {
		hostID = strings.Replace(hostname, "-", "", -1)
	}
	if strings.Contains(hostID, "-") {
		shutdown.Fatal("host id must not contain dashes")
	}
	if externalIP == "" {
		var err error
		externalIP, err = config.DefaultExternalIP()
		if err != nil {
			shutdown.Fatal(err)
		}
	}

	publishAddr := net.JoinHostPort(externalIP, "1113")
	if discoveryToken != "" {
		// TODO: retry
		discoveryID, err := discovery.RegisterInstance(discovery.Info{
			ClusterURL:  discoveryToken,
			InstanceURL: "http://" + publishAddr,
			Name:        hostID,
		})
		if err != nil {
			g.Log(grohl.Data{"at": "register_discovery", "status": "error", "err": err.Error()})
			shutdown.Fatal(err)
		}
		g.Log(grohl.Data{"at": "register_discovery", "id": discoveryID})
	}

	state := NewState(hostID, stateFile)
	var backend Backend
	var err error

	// create volume manager
	vman, err := volumemanager.New(
		filepath.Join(volPath, "volumes.bolt"),
		func() (volume.Provider, error) {
			// use a zpool backing file size of either 70% of the device on which
			// volumes will reside, or 100GB if that can't be determined.
			var size int64
			var dev syscall.Statfs_t
			if err := syscall.Statfs(volPath, &dev); err == nil {
				size = (dev.Bsize * int64(dev.Blocks) * 7) / 10
			} else {
				size = 100000000000
			}
			g.Log(grohl.Data{"at": "zpool_size", "size": size})

			return zfsVolume.NewProvider(&zfsVolume.ProviderConfig{
				DatasetName: "flynn-default",
				Make: &zfsVolume.MakeDev{
					BackingFilename: filepath.Join(volPath, "zfs/vdev/flynn-default-zpool.vdev"),
					Size:            size,
				},
				WorkingDir: filepath.Join(volPath, "zfs"),
			})
		},
	)
	if err != nil {
		shutdown.Fatal(err)
	}

	mux := logmux.New(1000)
	shutdown.BeforeExit(func() { mux.Close() })

	switch backendName {
	case "libvirt-lxc":
		backend, err = NewLibvirtLXCBackend(state, vman, logDir, flynnInit, nsumount, mux)
	default:
		log.Fatalf("unknown backend %q", backendName)
	}
	if err != nil {
		shutdown.Fatal(err)
	}
	backend.SetDefaultEnv("EXTERNAL_IP", externalIP)

	discoverdManager := NewDiscoverdManager(backend, mux, hostID, publishAddr)
	publishURL := "http://" + publishAddr
	host := &Host{
		id:      hostID,
		url:     publishURL,
		state:   state,
		backend: backend,
		status:  &host.HostStatus{ID: hostID, URL: publishURL},
	}

	// stopJobs stops all jobs, leaving discoverd until the end so other
	// jobs can unregister themselves on shutdown.
	stopJobs := func() (err error) {
		var except []string
		host.statusMtx.RLock()
		if host.status.Discoverd != nil && host.status.Discoverd.JobID != "" {
			except = []string{host.status.Discoverd.JobID}
		}
		host.statusMtx.RUnlock()
		if err := backend.Cleanup(except); err != nil {
			return err
		}
		for _, id := range except {
			if e := backend.Stop(id); e != nil {
				err = e
			}
		}
		return
	}

	resurrect, err := state.Restore(backend)
	if err != nil {
		shutdown.Fatal(err)
	}
	shutdown.BeforeExit(func() {
		// close discoverd before stopping jobs so we can unregister first
		discoverdManager.Close()
		stopJobs()
	})
	shutdown.BeforeExit(func() {
		if err := state.MarkForResurrection(); err != nil {
			log.Print("error marking for resurrection", err)
		}
	})

	if err := serveHTTP(
		host,
		&attachHandler{state: state, backend: backend},
		cluster.NewClient(),
		vman,
		discoverdManager.ConnectLocal,
	); err != nil {
		shutdown.Fatal(err)
	}

	if force {
		if err := stopJobs(); err != nil {
			shutdown.Fatal(err)
		}
	}

	if discoveryToken != "" {
		instances, err := discovery.GetCluster(discoveryToken)
		if err != nil {
			// TODO(titanous): retry?
			shutdown.Fatal(err)
		}
		peerIPs = make([]string, 0, len(instances))
		for _, inst := range instances {
			u, err := url.Parse(inst.URL)
			if err != nil {
				continue
			}
			ip, _, err := net.SplitHostPort(u.Host)
			if err != nil || ip == externalIP {
				continue
			}
			peerIPs = append(peerIPs, ip)
		}
	}
	if err := discoverdManager.ConnectPeer(peerIPs); err != nil {
		// No peers have working discoverd, so resurrect any available jobs
		resurrect()
	}

	<-make(chan struct{})
}
func (c *libvirtContainer) watch(ready chan<- error) error {
	g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "watch_container", "job.id": c.job.ID})
	g.Log(grohl.Data{"at": "start"})

	defer func() {
		// TODO: kill containerinit/domain if it is still running
		c.l.containersMtx.Lock()
		delete(c.l.containers, c.job.ID)
		c.l.containersMtx.Unlock()
		c.cleanup()
		close(c.done)
	}()

	var symlinked bool
	var err error
	symlink := "/tmp/containerinit-rpc." + c.job.ID
	socketPath := path.Join(c.RootPath, containerinit.SocketPath)
	for startTime := time.Now(); time.Since(startTime) < 10*time.Second; time.Sleep(time.Millisecond) {
		if !symlinked {
			// We can't connect to the socket file directly because
			// the path to it is longer than 108 characters (UNIX_PATH_MAX).
			// Create a temporary symlink to connect to.
			if err = os.Symlink(socketPath, symlink); err != nil && !os.IsExist(err) {
				g.Log(grohl.Data{"at": "symlink_socket", "status": "error", "err": err, "source": socketPath, "target": symlink})
				continue
			}
			defer os.Remove(symlink)
			symlinked = true
		}

		c.Client, err = containerinit.NewClient(symlink)
		if err == nil {
			break
		}
	}
	if ready != nil {
		ready <- err
	}
	if err != nil {
		g.Log(grohl.Data{"at": "connect", "status": "error", "err": err.Error()})
		c.l.state.SetStatusFailed(c.job.ID, errors.New("failed to connect to container"))

		d, e := c.l.libvirt.LookupDomainByName(c.job.ID)
		if e != nil {
			return e
		}
		if err := d.Destroy(); err != nil {
			g.Log(grohl.Data{"at": "destroy", "status": "error", "err": err.Error()})
		}
		return err
	}
	defer c.Client.Close()

	c.l.containersMtx.Lock()
	c.l.containers[c.job.ID] = c
	c.l.containersMtx.Unlock()

	if !c.job.Config.DisableLog && !c.job.Config.TTY {
		g.Log(grohl.Data{"at": "get_stdout"})
		stdout, stderr, initLog, err := c.Client.GetStreams()
		if err != nil {
			g.Log(grohl.Data{"at": "get_streams", "status": "error", "err": err.Error()})
			return err
		}

		log := c.l.openLog(c.job.ID)
		defer log.Close()

		muxConfig := logmux.Config{
			AppID:   c.job.Metadata["flynn-controller.app"],
			HostID:  c.l.state.id,
			JobType: c.job.Metadata["flynn-controller.type"],
			JobID:   c.job.ID,
		}

		// TODO(benburkert): remove file logging once attach proto uses logaggregator
		streams := []io.Reader{stdout, stderr}
		for i, stream := range streams {
			bufr, bufw := io.Pipe()
			muxr, muxw := io.Pipe()
			go func(r io.Reader, pw1, pw2 *io.PipeWriter) {
				mw := io.MultiWriter(pw1, pw2)
				_, err := io.Copy(mw, r)
				pw1.CloseWithError(err)
				pw2.CloseWithError(err)
			}(stream, bufw, muxw)

			fd := i + 1
			go log.Follow(fd, bufr)
			go c.l.mux.Follow(muxr, fd, muxConfig)
		}

		go log.Follow(3, initLog)
	}

	g.Log(grohl.Data{"at": "watch_changes"})
	for change := range c.Client.StreamState() {
		g.Log(grohl.Data{"at": "change", "state": change.State.String()})
		if change.Error != "" {
			err := errors.New(change.Error)
			g.Log(grohl.Data{"at": "change", "status": "error", "err": err})
			c.Client.Resume()
			c.l.state.SetStatusFailed(c.job.ID, err)
			return err
		}
		switch change.State {
		case containerinit.StateInitial:
			g.Log(grohl.Data{"at": "wait_attach"})
			c.l.state.WaitAttach(c.job.ID)
			g.Log(grohl.Data{"at": "resume"})
			c.Client.Resume()
		case containerinit.StateRunning:
			g.Log(grohl.Data{"at": "running"})
			c.l.state.SetStatusRunning(c.job.ID)

			// if the job was stopped before it started, exit
			if c.l.state.GetJob(c.job.ID).ForceStop {
				c.Stop()
			}
		case containerinit.StateExited:
			g.Log(grohl.Data{"at": "exited", "status": change.ExitStatus})
			c.Client.Resume()
			c.l.state.SetStatusDone(c.job.ID, change.ExitStatus)
			return nil
		case containerinit.StateFailed:
			g.Log(grohl.Data{"at": "failed"})
			c.Client.Resume()
			c.l.state.SetStatusFailed(c.job.ID, errors.New("container failed to start"))
			return nil
		}
	}
	g.Log(grohl.Data{"at": "unknown_failure"})
	c.l.state.SetStatusFailed(c.job.ID, errors.New("unknown failure"))

	return nil
}
func (l *LibvirtLXCBackend) Run(job *host.Job, runConfig *RunConfig) (err error) {
	g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "run", "job.id": job.ID})
	g.Log(grohl.Data{"at": "start", "job.artifact.uri": job.Artifact.URI, "job.cmd": job.Config.Cmd})

	if !job.Config.HostNetwork {
		<-l.networkConfigured
	}
	if _, ok := job.Config.Env["DISCOVERD"]; !ok {
		<-l.discoverdConfigured
	}

	if runConfig == nil {
		runConfig = &RunConfig{}
	}
	container := &libvirtContainer{
		l:    l,
		job:  job,
		done: make(chan struct{}),
	}
	if !job.Config.HostNetwork {
		container.IP, err = l.ipalloc.RequestIP(l.bridgeNet, runConfig.IP)
		if err != nil {
			g.Log(grohl.Data{"at": "request_ip", "status": "error", "err": err})
			return err
		}
	}
	defer func() {
		if err != nil {
			go container.cleanup()
		}
	}()

	g.Log(grohl.Data{"at": "pull_image"})
	layers, err := l.pinkertonPull(job.Artifact.URI)
	if err != nil {
		g.Log(grohl.Data{"at": "pull_image", "status": "error", "err": err})
		return err
	}
	imageID, err := pinkerton.ImageID(job.Artifact.URI)
	if err == pinkerton.ErrNoImageID && len(layers) > 0 {
		imageID = layers[len(layers)-1].ID
	} else if err != nil {
		g.Log(grohl.Data{"at": "image_id", "status": "error", "err": err})
		return err
	}

	g.Log(grohl.Data{"at": "read_config"})
	imageConfig, err := readDockerImageConfig(imageID)
	if err != nil {
		g.Log(grohl.Data{"at": "read_config", "status": "error", "err": err})
		return err
	}

	g.Log(grohl.Data{"at": "checkout"})
	rootPath, err := l.pinkerton.Checkout(job.ID, imageID)
	if err != nil {
		g.Log(grohl.Data{"at": "checkout", "status": "error", "err": err})
		return err
	}
	container.RootPath = rootPath

	g.Log(grohl.Data{"at": "mount"})
	if err := bindMount(l.InitPath, filepath.Join(rootPath, ".containerinit"), false, true); err != nil {
		g.Log(grohl.Data{"at": "mount", "file": ".containerinit", "status": "error", "err": err})
		return err
	}
	if err := os.MkdirAll(filepath.Join(rootPath, "etc"), 0755); err != nil {
		g.Log(grohl.Data{"at": "mkdir", "dir": "etc", "status": "error", "err": err})
		return err
	}

	if err := bindMount(l.resolvConf, filepath.Join(rootPath, "etc/resolv.conf"), false, true); err != nil {
		g.Log(grohl.Data{"at": "mount", "file": "resolv.conf", "status": "error", "err": err})
		return err
	}

	if err := writeHostname(filepath.Join(rootPath, "etc/hosts"), job.ID); err != nil {
		g.Log(grohl.Data{"at": "write_hosts", "status": "error", "err": err})
		return err
	}
	if err := os.MkdirAll(filepath.Join(rootPath, ".container-shared"), 0700); err != nil {
		g.Log(grohl.Data{"at": "mkdir", "dir": ".container-shared", "status": "error", "err": err})
		return err
	}
	for i, m := range job.Config.Mounts {
		if err := os.MkdirAll(filepath.Join(rootPath, m.Location), 0755); err != nil {
			g.Log(grohl.Data{"at": "mkdir_mount", "dir": m.Location, "status": "error", "err": err})
			return err
		}
		if m.Target == "" {
			m.Target = filepath.Join(l.VolPath, cluster.RandomJobID(""))
			job.Config.Mounts[i].Target = m.Target
			if err := os.MkdirAll(m.Target, 0755); err != nil {
				g.Log(grohl.Data{"at": "mkdir_vol", "dir": m.Target, "status": "error", "err": err})
				return err
			}
		}
		if err := bindMount(m.Target, filepath.Join(rootPath, m.Location), m.Writeable, true); err != nil {
			g.Log(grohl.Data{"at": "mount", "target": m.Target, "location": m.Location, "status": "error", "err": err})
			return err
		}
	}

	// apply volumes
	for _, v := range job.Config.Volumes {
		vol := l.vman.GetVolume(v.VolumeID)
		if vol == nil {
			err := fmt.Errorf("job %s required volume %s, but that volume does not exist", job.ID, v.VolumeID)
			g.Log(grohl.Data{"at": "volume", "volumeID": v.VolumeID, "status": "error", "err": err})
			return err
		}
		if err := os.MkdirAll(filepath.Join(rootPath, v.Target), 0755); err != nil {
			g.Log(grohl.Data{"at": "volume_mkdir", "dir": v.Target, "status": "error", "err": err})
			return err
		}
		if err != nil {
			g.Log(grohl.Data{"at": "volume_mount", "target": v.Target, "volumeID": v.VolumeID, "status": "error", "err": err})
			return err
		}
		if err := bindMount(vol.Location(), filepath.Join(rootPath, v.Target), v.Writeable, true); err != nil {
			g.Log(grohl.Data{"at": "volume_mount2", "target": v.Target, "volumeID": v.VolumeID, "status": "error", "err": err})
			return err
		}
	}

	if job.Config.Env == nil {
		job.Config.Env = make(map[string]string)
	}
	for i, p := range job.Config.Ports {
		if p.Proto != "tcp" && p.Proto != "udp" {
			return fmt.Errorf("unknown port proto %q", p.Proto)
		}

		if p.Port == 0 {
			job.Config.Ports[i].Port = 5000 + i
		}
		if i == 0 {
			job.Config.Env["PORT"] = strconv.Itoa(job.Config.Ports[i].Port)
		}
		job.Config.Env[fmt.Sprintf("PORT_%d", i)] = strconv.Itoa(job.Config.Ports[i].Port)
	}

	if !job.Config.HostNetwork {
		job.Config.Env["EXTERNAL_IP"] = container.IP.String()
	}

	config := &containerinit.Config{
		TTY:       job.Config.TTY,
		OpenStdin: job.Config.Stdin,
		WorkDir:   job.Config.WorkingDir,
		Resources: job.Resources,
	}
	if !job.Config.HostNetwork {
		config.IP = container.IP.String() + "/24"
		config.Gateway = l.bridgeAddr.String()
	}
	if config.WorkDir == "" {
		config.WorkDir = imageConfig.WorkingDir
	}
	if job.Config.Uid > 0 {
		config.User = strconv.Itoa(job.Config.Uid)
	} else if imageConfig.User != "" {
		// TODO: check and lookup user from image config
	}
	if len(job.Config.Entrypoint) > 0 {
		config.Args = job.Config.Entrypoint
		config.Args = append(config.Args, job.Config.Cmd...)
	} else {
		config.Args = imageConfig.Entrypoint
		if len(job.Config.Cmd) > 0 {
			config.Args = append(config.Args, job.Config.Cmd...)
		} else {
			config.Args = append(config.Args, imageConfig.Cmd...)
		}
	}
	for _, port := range job.Config.Ports {
		config.Ports = append(config.Ports, port)
	}

	g.Log(grohl.Data{"at": "write_config"})
	l.envMtx.RLock()
	err = writeContainerConfig(filepath.Join(rootPath, ".containerconfig"), config,
		map[string]string{
			"PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
			"TERM": "xterm",
			"HOME": "/",
		},
		l.defaultEnv,
		job.Config.Env,
		map[string]string{
			"HOSTNAME": job.ID,
		},
	)
	l.envMtx.RUnlock()
	if err != nil {
		g.Log(grohl.Data{"at": "write_config", "status": "error", "err": err})
		return err
	}

	l.state.AddJob(job, container.IP)
	domain := &lt.Domain{
		Type:   "lxc",
		Name:   job.ID,
		Memory: lt.UnitInt{Value: 1, Unit: "GiB"},
		OS: lt.OS{
			Type: lt.OSType{Value: "exe"},
			Init: "/.containerinit",
		},
		Devices: lt.Devices{
			Filesystems: []lt.Filesystem{{
				Type:   "mount",
				Source: lt.FSRef{Dir: rootPath},
				Target: lt.FSRef{Dir: "/"},
			}},
			Consoles: []lt.Console{{Type: "pty"}},
		},
		OnPoweroff: "preserve",
		OnCrash:    "preserve",
	}
	if spec, ok := job.Resources[resource.TypeMemory]; ok && spec.Limit != nil {
		domain.Memory = lt.UnitInt{Value: *spec.Limit, Unit: "bytes"}
	}

	if !job.Config.HostNetwork {
		domain.Devices.Interfaces = []lt.Interface{{
			Type:   "network",
			Source: lt.InterfaceSrc{Network: libvirtNetName},
		}}
	}

	// attempt to run libvirt commands multiple times in case the libvirt daemon is
	// temporarily unavailable (e.g. it has restarted, which sometimes happens in CI)
	g.Log(grohl.Data{"at": "define_domain"})
	var vd libvirt.VirDomain
	if err := l.withConnRetries(func() (err error) {
		vd, err = l.libvirt.DomainDefineXML(string(domain.XML()))
		return
	}); err != nil {
		g.Log(grohl.Data{"at": "define_domain", "status": "error", "err": err})
		return err
	}

	g.Log(grohl.Data{"at": "create_domain"})
	if err := l.withConnRetries(vd.Create); err != nil {
		g.Log(grohl.Data{"at": "create_domain", "status": "error", "err": err})
		return err
	}
	uuid, err := vd.GetUUIDString()
	if err != nil {
		g.Log(grohl.Data{"at": "get_domain_uuid", "status": "error", "err": err})
		return err
	}
	g.Log(grohl.Data{"at": "get_uuid", "uuid": uuid})
	l.state.SetContainerID(job.ID, uuid)

	domainXML, err := vd.GetXMLDesc(0)
	if err != nil {
		g.Log(grohl.Data{"at": "get_domain_xml", "status": "error", "err": err})
		return err
	}
	domain = &lt.Domain{}
	if err := xml.Unmarshal([]byte(domainXML), domain); err != nil {
		g.Log(grohl.Data{"at": "unmarshal_domain_xml", "status": "error", "err": err})
		return err
	}

	go container.watch(nil)

	g.Log(grohl.Data{"at": "finish"})
	return nil
}
func (c *libvirtContainer) watch(ready chan<- error) error {
	g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "watch_container", "job.id": c.job.ID})
	g.Log(grohl.Data{"at": "start"})

	defer func() {
		// TODO: kill containerinit/domain if it is still running
		c.l.containersMtx.Lock()
		delete(c.l.containers, c.job.ID)
		c.l.containersMtx.Unlock()
		c.cleanup()
		close(c.done)
	}()

	var symlinked bool
	var err error
	symlink := "/tmp/containerinit-rpc." + c.job.ID
	socketPath := path.Join(c.RootPath, containerinit.SocketPath)
	for startTime := time.Now(); time.Since(startTime) < 5*time.Second; time.Sleep(time.Millisecond) {
		if !symlinked {
			// We can't connect to the socket file directly because
			// the path to it is longer than 108 characters (UNIX_PATH_MAX).
			// Create a temporary symlink to connect to.
			if err = os.Symlink(socketPath, symlink); err != nil {
				g.Log(grohl.Data{"at": "symlink_socket", "status": "error", "err": err, "source": socketPath, "target": symlink})
				continue
			}
			defer os.Remove(symlink)
			symlinked = true
		}

		c.Client, err = containerinit.NewClient(symlink)
		if err == nil {
			break
		}
	}
	if ready != nil {
		ready <- err
	}
	if err != nil {
		g.Log(grohl.Data{"at": "connect", "status": "error", "err": err})
		return err
	}
	defer c.Client.Close()

	c.l.containersMtx.Lock()
	c.l.containers[c.job.ID] = c
	c.l.containersMtx.Unlock()

	if !c.job.Config.TTY {
		g.Log(grohl.Data{"at": "get_stdout"})
		stdout, stderr, err := c.Client.GetStdout()
		if err != nil {
			g.Log(grohl.Data{"at": "get_stdout", "status": "error", "err": err.Error()})
			return err
		}
		log := c.l.openLog(c.job.ID)
		defer log.Close()
		// TODO: log errors from these
		go log.ReadFrom(1, stdout)
		go log.ReadFrom(2, stderr)
	}

	g.Log(grohl.Data{"at": "watch_changes"})
	for change := range c.Client.StreamState() {
		g.Log(grohl.Data{"at": "change", "state": change.State.String()})
		if change.Error != "" {
			err := errors.New(change.Error)
			g.Log(grohl.Data{"at": "change", "status": "error", "err": err})
			c.l.state.SetStatusFailed(c.job.ID, err)
			return err
		}
		switch change.State {
		case containerinit.StateInitial:
			g.Log(grohl.Data{"at": "wait_attach"})
			c.l.state.WaitAttach(c.job.ID)
			g.Log(grohl.Data{"at": "resume"})
			c.Client.Resume()
		case containerinit.StateRunning:
			g.Log(grohl.Data{"at": "running"})
			c.l.state.SetStatusRunning(c.job.ID)
		case containerinit.StateExited:
			g.Log(grohl.Data{"at": "exited", "status": change.ExitStatus})
			c.Client.Resume()
			c.l.state.SetStatusDone(c.job.ID, change.ExitStatus)
			return nil
		case containerinit.StateFailed:
			g.Log(grohl.Data{"at": "failed"})
			c.Client.Resume()
			c.l.state.SetStatusFailed(c.job.ID, errors.New("container failed to start"))
			return nil
		}
	}
	g.Log(grohl.Data{"at": "unknown_failure"})
	c.l.state.SetStatusFailed(c.job.ID, errors.New("unknown failure"))

	return nil
}
Exemple #26
0
func (c *context) watchHost(id string, events chan<- *host.Event) {
	if !c.hosts.Add(id) {
		return
	}
	defer c.hosts.Remove(id)

	g := grohl.NewContext(grohl.Data{"fn": "watchHost", "host.id": id})

	h, err := c.DialHost(id)
	if err != nil {
		// TODO: log/handle error
	}
	c.hosts.Set(id, h)

	g.Log(grohl.Data{"at": "start"})

	ch := make(chan *host.Event)
	h.StreamEvents("all", ch)

	// Nil event to mark the start of watching a host
	if events != nil {
		events <- nil
	}

	for event := range ch {
		job := c.jobs.Get(id, event.JobID)
		if job == nil {
			continue
		}

		j := &ct.Job{ID: id + "-" + event.JobID, AppID: job.Formation.AppID, ReleaseID: job.Formation.Release.ID, Type: job.Type}
		switch event.Event {
		case "create":
			j.State = "starting"
		case "start":
			j.State = "up"
			job.startedAt = event.Job.StartedAt
		case "stop":
			j.State = "down"
		case "error":
			j.State = "crashed"
		}
		if err = c.PutJob(j); err != nil {
			// TODO: log/handle error
		}

		if event.Event != "error" && event.Event != "stop" {
			if events != nil {
				events <- event
			}
			continue
		}
		g.Log(grohl.Data{"at": "remove", "job.id": event.JobID, "event": event.Event})

		c.jobs.Remove(id, event.JobID)
		go func(event *host.Event) {
			c.mtx.RLock()
			job.Formation.RestartJob(job.Type, id, event.JobID)
			c.mtx.RUnlock()
			if events != nil {
				events <- event
			}
		}(event)
	}
	// TODO: check error/reconnect
}
Exemple #27
0
func (c *context) watchHost(h *cluster.Host, ready chan struct{}) {
	if !c.hosts.Add(h.ID()) {
		if ready != nil {
			ready <- struct{}{}
		}
		return
	}
	defer c.hosts.Remove(h.ID())

	g := grohl.NewContext(grohl.Data{"fn": "watchHost", "host.id": h.ID()})

	c.hosts.Set(h.ID(), h)

	g.Log(grohl.Data{"at": "start"})

	ch := make(chan *host.Event)
	h.StreamEvents("all", ch)
	if ready != nil {
		ready <- struct{}{}
	}

	// Call PutJob in a goroutine so we don't block receiving job events whilst potentially
	// making multiple requests to the controller (e.g. if the controller is down).
	//
	// Use a channel (rather than spawning a goroutine per event) so that events are delivered in order.
	jobs := make(chan *ct.Job, 10)
	go func() {
		for job := range jobs {
			putJobAttempts.Run(func() error {
				if err := c.PutJob(job); err != nil {
					g.Log(grohl.Data{"at": "put_job_error", "job.id": job.ID, "state": job.State, "err": err})
					// ignore validation / not found errors
					if httphelper.IsValidationError(err) || err == controller.ErrNotFound {
						return nil
					}
					return err
				}
				g.Log(grohl.Data{"at": "put_job", "job.id": job.ID, "state": job.State})
				return nil
			})
		}
	}()

	for event := range ch {
		meta := event.Job.Job.Metadata
		appID := meta["flynn-controller.app"]
		releaseID := meta["flynn-controller.release"]
		jobType := meta["flynn-controller.type"]

		if appID == "" || releaseID == "" {
			continue
		}

		job := &ct.Job{
			ID:        event.JobID,
			AppID:     appID,
			ReleaseID: releaseID,
			Type:      jobType,
			State:     jobState(event),
			Meta:      jobMetaFromMetadata(meta),
		}
		g.Log(grohl.Data{"at": "event", "job.id": event.JobID, "event": event.Event})
		jobs <- job

		// get a read lock on the mutex to ensure we are not currently
		// syncing with the cluster
		c.mtx.RLock()
		j := c.jobs.Get(h.ID(), event.JobID)
		c.mtx.RUnlock()
		if j == nil {
			continue
		}
		j.startedAt = event.Job.StartedAt

		if event.Event != "error" && event.Event != "stop" {
			continue
		}
		g.Log(grohl.Data{"at": "remove", "job.id": event.JobID, "event": event.Event})

		c.jobs.Remove(h.ID(), event.JobID)
		go func(event *host.Event) {
			c.mtx.RLock()
			j.Formation.RestartJob(jobType, h.ID(), event.JobID)
			c.mtx.RUnlock()
		}(event)
	}
	// TODO: check error/reconnect
}
Exemple #28
0
func (f *Formation) rectify() {
	g := grohl.NewContext(grohl.Data{"fn": "rectify", "app.id": f.AppID, "release.id": f.Release.ID})

	var hosts map[string]host.Host
	if _, ok := f.c.omni[f]; ok {
		var err error
		hosts, err = f.c.ListHosts()
		if err != nil {
			return
		}
		if len(hosts) == 0 {
			// TODO: log/handle error
		}
	}
	// update job counts
	for t, expected := range f.Processes {
		if f.Release.Processes[t].Omni {
			// get job counts per host
			hostCounts := make(map[string]int, len(hosts))
			for _, h := range hosts {
				hostCounts[h.ID] = 0
				for _, job := range h.Jobs {
					if f.jobType(job) != t {
						continue
					}
					hostCounts[h.ID]++
				}
			}
			// update per host
			for hostID, actual := range hostCounts {
				diff := expected - actual
				g.Log(grohl.Data{"at": "update", "type": t, "expected": expected, "actual": actual, "diff": diff})
				if diff > 0 {
					f.add(diff, t, hostID)
				} else if diff < 0 {
					f.remove(-diff, t, hostID)
				}
			}
		} else {
			actual := len(f.jobs[t])
			diff := expected - actual
			g.Log(grohl.Data{"at": "update", "type": t, "expected": expected, "actual": actual, "diff": diff})
			if diff > 0 {
				f.add(diff, t, "")
			} else if diff < 0 {
				f.remove(-diff, t, "")
			}
		}
	}

	// remove process types
	for t, jobs := range f.jobs {
		// ignore one-off jobs which have no type
		if t == "" {
			continue
		}
		if _, exists := f.Processes[t]; !exists {
			g.Log(grohl.Data{"at": "cleanup", "type": t, "count": len(jobs)})
			f.remove(len(jobs), t, "")
		}
	}
}
Exemple #29
0
func (c *context) syncCluster() {
	g := grohl.NewContext(grohl.Data{"fn": "syncCluster"})

	artifacts := make(map[string]*ct.Artifact)
	releases := make(map[string]*ct.Release)
	rectify := make(map[*Formation]struct{})

	hosts, err := c.Hosts()
	if err != nil {
		// TODO: log/handle error
	}

	c.mtx.Lock()
	for _, h := range hosts {
		jobs, err := h.ListJobs()
		if err != nil {
			// TODO: log/handle error
			continue
		}
		for _, j := range jobs {
			if j.Status != host.StatusStarting && j.Status != host.StatusRunning {
				continue
			}
			job := j.Job
			appID := job.Metadata["flynn-controller.app"]
			appName := job.Metadata["flynn-controller.app_name"]
			releaseID := job.Metadata["flynn-controller.release"]
			jobType := job.Metadata["flynn-controller.type"]
			gg := g.New(grohl.Data{"host.id": h.ID(), "job.id": job.ID, "app.id": appID, "release.id": releaseID, "type": jobType})

			if appID == "" || releaseID == "" {
				continue
			}
			if job := c.jobs.Get(h.ID(), job.ID); job != nil {
				continue
			}

			f := c.formations.Get(appID, releaseID)
			if f == nil {
				release := releases[releaseID]
				if release == nil {
					release, err = c.GetRelease(releaseID)
					if err != nil {
						gg.Log(grohl.Data{"at": "getRelease", "status": "error", "err": err})
						continue
					}
					releases[release.ID] = release
				}

				artifact := artifacts[release.ArtifactID]
				if artifact == nil {
					artifact, err = c.GetArtifact(release.ArtifactID)
					if err != nil {
						gg.Log(grohl.Data{"at": "getArtifact", "status": "error", "err": err})
						continue
					}
					artifacts[artifact.ID] = artifact
				}

				formation, err := c.GetFormation(appID, releaseID)
				if err != nil {
					gg.Log(grohl.Data{"at": "getFormation", "status": "error", "err": err})
					continue
				}

				f = NewFormation(c, &ct.ExpandedFormation{
					App:       &ct.App{ID: appID, Name: appName},
					Release:   release,
					Artifact:  artifact,
					Processes: formation.Processes,
				})
				gg.Log(grohl.Data{"at": "addFormation"})
				f = c.formations.Add(f)
			}

			gg.Log(grohl.Data{"at": "addJob"})
			go c.PutJob(&ct.Job{
				ID:        job.ID,
				AppID:     appID,
				ReleaseID: releaseID,
				Type:      jobType,
				State:     "up",
				Meta:      jobMetaFromMetadata(job.Metadata),
			})
			j := f.jobs.Add(jobType, h.ID(), job.ID)
			j.Formation = f
			c.jobs.Add(j)
			rectify[f] = struct{}{}
		}
	}
	if err := c.syncJobStates(); err != nil {
		// TODO: handle error
	}
	c.mtx.Unlock()

	for f := range rectify {
		go f.Rectify()
	}
}
Exemple #30
0
func (c *context) syncCluster(events chan<- *host.Event) {
	g := grohl.NewContext(grohl.Data{"fn": "syncCluster"})

	artifacts := make(map[string]*ct.Artifact)
	releases := make(map[string]*ct.Release)
	rectify := make(map[*Formation]struct{})

	go c.watchHosts(events)

	hosts, err := c.ListHosts()
	if err != nil {
		// TODO: log/handle error
	}

	c.mtx.Lock()
	for _, h := range hosts {
		for _, job := range h.Jobs {
			appID := job.Metadata["flynn-controller.app"]
			releaseID := job.Metadata["flynn-controller.release"]
			jobType := job.Metadata["flynn-controller.type"]
			gg := g.New(grohl.Data{"host.id": h.ID, "job.id": job.ID, "app.id": appID, "release.id": releaseID, "type": jobType})

			if appID == "" || releaseID == "" {
				continue
			}
			if job := c.jobs.Get(h.ID, job.ID); job != nil {
				continue
			}

			f := c.formations.Get(appID, releaseID)
			if f == nil {
				release := releases[releaseID]
				if release == nil {
					release, err = c.GetRelease(releaseID)
					if err != nil {
						gg.Log(grohl.Data{"at": "getRelease", "status": "error", "err": err})
						continue
					}
					releases[release.ID] = release
				}

				artifact := artifacts[release.ArtifactID]
				if artifact == nil {
					artifact, err = c.GetArtifact(release.ArtifactID)
					if err != nil {
						gg.Log(grohl.Data{"at": "getArtifact", "status": "error", "err": err})
						continue
					}
					artifacts[artifact.ID] = artifact
				}

				formation, err := c.GetFormation(appID, releaseID)
				if err != nil {
					gg.Log(grohl.Data{"at": "getFormation", "status": "error", "err": err})
					continue
				}

				f = NewFormation(c, &ct.ExpandedFormation{
					App:       &ct.App{ID: appID},
					Release:   release,
					Artifact:  artifact,
					Processes: formation.Processes,
				})
				gg.Log(grohl.Data{"at": "addFormation"})
				f = c.formations.Add(f)
			}

			gg.Log(grohl.Data{"at": "addJob"})
			j := f.jobs.Add(jobType, h.ID, job.ID)
			j.Formation = f
			c.jobs.Add(j)
			rectify[f] = struct{}{}
		}
	}
	c.mtx.Unlock()

	for f := range rectify {
		go f.Rectify()
	}
}