func (c *libvirtContainer) watch(ready chan<- error) error {
	g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "watch_container", "job.id": c.job.ID})
	g.Log(grohl.Data{"at": "start"})

	defer func() {
		// TODO: kill containerinit/domain if it is still running
		c.l.containersMtx.Lock()
		delete(c.l.containers, c.job.ID)
		c.l.containersMtx.Unlock()
		c.cleanup()
		close(c.done)
	}()

	var symlinked bool
	var err error
	symlink := "/tmp/containerinit-rpc." + c.job.ID
	socketPath := path.Join(c.RootPath, containerinit.SocketPath)
	for startTime := time.Now(); time.Since(startTime) < 10*time.Second; time.Sleep(time.Millisecond) {
		if !symlinked {
			// We can't connect to the socket file directly because
			// the path to it is longer than 108 characters (UNIX_PATH_MAX).
			// Create a temporary symlink to connect to.
			if err = os.Symlink(socketPath, symlink); err != nil && !os.IsExist(err) {
				g.Log(grohl.Data{"at": "symlink_socket", "status": "error", "err": err, "source": socketPath, "target": symlink})
				continue
			}
			defer os.Remove(symlink)
			symlinked = true
		}

		c.Client, err = containerinit.NewClient(symlink)
		if err == nil {
			break
		}
	}
	if ready != nil {
		ready <- err
	}
	if err != nil {
		g.Log(grohl.Data{"at": "connect", "status": "error", "err": err.Error()})
		c.l.state.SetStatusFailed(c.job.ID, errors.New("failed to connect to container"))

		d, e := c.l.libvirt.LookupDomainByName(c.job.ID)
		if e != nil {
			return e
		}
		if err := d.Destroy(); err != nil {
			g.Log(grohl.Data{"at": "destroy", "status": "error", "err": err.Error()})
		}
		return err
	}
	defer c.Client.Close()

	c.l.containersMtx.Lock()
	c.l.containers[c.job.ID] = c
	c.l.containersMtx.Unlock()

	if !c.job.Config.DisableLog && !c.job.Config.TTY {
		g.Log(grohl.Data{"at": "get_stdout"})
		stdout, stderr, initLog, err := c.Client.GetStreams()
		if err != nil {
			g.Log(grohl.Data{"at": "get_streams", "status": "error", "err": err.Error()})
			return err
		}

		log := c.l.openLog(c.job.ID)
		defer log.Close()

		muxConfig := logmux.Config{
			AppID:   c.job.Metadata["flynn-controller.app"],
			HostID:  c.l.state.id,
			JobType: c.job.Metadata["flynn-controller.type"],
			JobID:   c.job.ID,
		}

		// TODO(benburkert): remove file logging once attach proto uses logaggregator
		streams := []io.Reader{stdout, stderr}
		for i, stream := range streams {
			bufr, bufw := io.Pipe()
			muxr, muxw := io.Pipe()
			go func(r io.Reader, pw1, pw2 *io.PipeWriter) {
				mw := io.MultiWriter(pw1, pw2)
				_, err := io.Copy(mw, r)
				pw1.CloseWithError(err)
				pw2.CloseWithError(err)
			}(stream, bufw, muxw)

			fd := i + 1
			go log.Follow(fd, bufr)
			go c.l.mux.Follow(muxr, fd, muxConfig)
		}

		go log.Follow(3, initLog)
	}

	g.Log(grohl.Data{"at": "watch_changes"})
	for change := range c.Client.StreamState() {
		g.Log(grohl.Data{"at": "change", "state": change.State.String()})
		if change.Error != "" {
			err := errors.New(change.Error)
			g.Log(grohl.Data{"at": "change", "status": "error", "err": err})
			c.Client.Resume()
			c.l.state.SetStatusFailed(c.job.ID, err)
			return err
		}
		switch change.State {
		case containerinit.StateInitial:
			g.Log(grohl.Data{"at": "wait_attach"})
			c.l.state.WaitAttach(c.job.ID)
			g.Log(grohl.Data{"at": "resume"})
			c.Client.Resume()
		case containerinit.StateRunning:
			g.Log(grohl.Data{"at": "running"})
			c.l.state.SetStatusRunning(c.job.ID)

			// if the job was stopped before it started, exit
			if c.l.state.GetJob(c.job.ID).ForceStop {
				c.Stop()
			}
		case containerinit.StateExited:
			g.Log(grohl.Data{"at": "exited", "status": change.ExitStatus})
			c.Client.Resume()
			c.l.state.SetStatusDone(c.job.ID, change.ExitStatus)
			return nil
		case containerinit.StateFailed:
			g.Log(grohl.Data{"at": "failed"})
			c.Client.Resume()
			c.l.state.SetStatusFailed(c.job.ID, errors.New("container failed to start"))
			return nil
		}
	}
	g.Log(grohl.Data{"at": "unknown_failure"})
	c.l.state.SetStatusFailed(c.job.ID, errors.New("unknown failure"))

	return nil
}
Example #2
0
func (c *libvirtContainer) watch(ready chan<- error) error {
	g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "watch_container", "job.id": c.job.ID})
	g.Log(grohl.Data{"at": "start"})

	defer func() {
		// TODO: kill containerinit/domain if it is still running
		c.l.containersMtx.Lock()
		delete(c.l.containers, c.job.ID)
		c.l.containersMtx.Unlock()
		c.cleanup()
		close(c.done)
	}()

	var symlinked bool
	var err error
	symlink := "/tmp/containerinit-rpc." + c.job.ID
	socketPath := path.Join(c.RootPath, containerinit.SocketPath)
	for startTime := time.Now(); time.Since(startTime) < 5*time.Second; time.Sleep(time.Millisecond) {
		if !symlinked {
			// We can't connect to the socket file directly because
			// the path to it is longer than 108 characters (UNIX_PATH_MAX).
			// Create a temporary symlink to connect to.
			if err = os.Symlink(socketPath, symlink); err != nil {
				g.Log(grohl.Data{"at": "symlink_socket", "status": "error", "err": err, "source": socketPath, "target": symlink})
				continue
			}
			defer os.Remove(symlink)
			symlinked = true
		}

		c.Client, err = containerinit.NewClient(symlink)
		if err == nil {
			break
		}
	}
	if ready != nil {
		ready <- err
	}
	if err != nil {
		g.Log(grohl.Data{"at": "connect", "status": "error", "err": err})
		return err
	}
	defer c.Client.Close()

	c.l.containersMtx.Lock()
	c.l.containers[c.job.ID] = c
	c.l.containersMtx.Unlock()

	if !c.job.Config.TTY {
		g.Log(grohl.Data{"at": "get_stdout"})
		stdout, stderr, err := c.Client.GetStdout()
		if err != nil {
			g.Log(grohl.Data{"at": "get_stdout", "status": "error", "err": err.Error()})
			return err
		}
		log := c.l.openLog(c.job.ID)
		defer log.Close()
		// TODO: log errors from these
		go log.ReadFrom(1, stdout)
		go log.ReadFrom(2, stderr)
	}

	g.Log(grohl.Data{"at": "watch_changes"})
	for change := range c.Client.StreamState() {
		g.Log(grohl.Data{"at": "change", "state": change.State.String()})
		if change.Error != "" {
			err := errors.New(change.Error)
			g.Log(grohl.Data{"at": "change", "status": "error", "err": err})
			c.l.state.SetStatusFailed(c.job.ID, err)
			return err
		}
		switch change.State {
		case containerinit.StateInitial:
			g.Log(grohl.Data{"at": "wait_attach"})
			c.l.state.WaitAttach(c.job.ID)
			g.Log(grohl.Data{"at": "resume"})
			c.Client.Resume()
		case containerinit.StateRunning:
			g.Log(grohl.Data{"at": "running"})
			c.l.state.SetStatusRunning(c.job.ID)
		case containerinit.StateExited:
			g.Log(grohl.Data{"at": "exited", "status": change.ExitStatus})
			c.Client.Resume()
			c.l.state.SetStatusDone(c.job.ID, change.ExitStatus)
			return nil
		case containerinit.StateFailed:
			g.Log(grohl.Data{"at": "failed"})
			c.Client.Resume()
			c.l.state.SetStatusFailed(c.job.ID, errors.New("container failed to start"))
			return nil
		}
	}
	g.Log(grohl.Data{"at": "unknown_failure"})
	c.l.state.SetStatusFailed(c.job.ID, errors.New("unknown failure"))

	return nil
}
Example #3
0
func (c *Container) watch(ready chan<- error, buffer host.LogBuffer) error {
	log := c.l.logger.New("fn", "watch", "job.id", c.job.ID)
	log.Info("start watching container")

	readyErr := func(err error) {
		if ready != nil {
			ready <- err
		}
	}

	defer func() {
		c.container.Destroy()
		c.l.containersMtx.Lock()
		delete(c.l.containers, c.job.ID)
		c.l.containersMtx.Unlock()
		c.cleanup()
		close(c.done)
	}()

	var symlinked bool
	var err error
	symlink := "/tmp/containerinit-rpc." + c.job.ID
	socketPath := path.Join(c.RootPath, containerinit.SocketPath)
	for startTime := time.Now(); time.Since(startTime) < 10*time.Second; time.Sleep(time.Millisecond) {
		if !symlinked {
			// We can't connect to the socket file directly because
			// the path to it is longer than 108 characters (UNIX_PATH_MAX).
			// Create a temporary symlink to connect to.
			if err = os.Symlink(socketPath, symlink); err != nil && !os.IsExist(err) {
				log.Error("error symlinking socket", "err", err)
				continue
			}
			defer os.Remove(symlink)
			symlinked = true
		}

		c.Client, err = containerinit.NewClient(symlink)
		if err == nil {
			break
		}
	}
	if err != nil {
		log.Error("error connecting to container", "err", err)
		readyErr(err)
		c.l.state.SetStatusFailed(c.job.ID, errors.New("failed to connect to container"))
		return err
	}
	defer c.Client.Close()

	c.l.containersMtx.Lock()
	c.l.containers[c.job.ID] = c
	c.l.containersMtx.Unlock()

	readyErr(nil)

	if !c.job.Config.DisableLog && !c.job.Config.TTY {
		if err := c.followLogs(log, buffer); err != nil {
			return err
		}
	}

	log.Info("watching for changes")
	for change := range c.Client.StreamState() {
		log.Info("state change", "state", change.State.String())
		if change.Error != "" {
			err := errors.New(change.Error)
			log.Error("error in change state", "err", err)
			c.Client.Resume()
			c.l.state.SetStatusFailed(c.job.ID, err)
			return err
		}
		switch change.State {
		case containerinit.StateInitial:
			log.Info("waiting for attach")
			c.l.state.WaitAttach(c.job.ID)
			log.Info("resuming")
			c.Client.Resume()
			log.Info("resumed")
		case containerinit.StateRunning:
			log.Info("container running")
			c.l.state.SetStatusRunning(c.job.ID)

			// if the job was stopped before it started, exit
			if c.l.state.GetJob(c.job.ID).ForceStop {
				c.Stop()
			}
		case containerinit.StateExited:
			log.Info("container exited", "status", change.ExitStatus)
			c.Client.Resume()
			c.l.state.SetStatusDone(c.job.ID, change.ExitStatus)
			return nil
		case containerinit.StateFailed:
			log.Info("container failed to start")
			c.Client.Resume()
			c.l.state.SetStatusFailed(c.job.ID, errors.New("container failed to start"))
			return nil
		}
	}
	log.Error("unknown failure")
	c.l.state.SetStatusFailed(c.job.ID, errors.New("unknown failure"))

	return nil
}
func (c *libvirtContainer) watch(ready chan<- error, buffer host.LogBuffer) error {
	log := c.l.logger.New("fn", "watch", "job.id", c.job.ID)
	log.Info("start watching container")

	defer func() {
		c.waitExit()
		// TODO: kill containerinit/domain if it is still running
		c.l.containersMtx.Lock()
		delete(c.l.containers, c.job.ID)
		c.l.containersMtx.Unlock()
		c.cleanup()
		close(c.done)
	}()

	var symlinked bool
	var err error
	symlink := "/tmp/containerinit-rpc." + c.job.ID
	socketPath := path.Join(c.RootPath, containerinit.SocketPath)
	for startTime := time.Now(); time.Since(startTime) < 10*time.Second; time.Sleep(time.Millisecond) {
		if !symlinked {
			// We can't connect to the socket file directly because
			// the path to it is longer than 108 characters (UNIX_PATH_MAX).
			// Create a temporary symlink to connect to.
			if err = os.Symlink(socketPath, symlink); err != nil && !os.IsExist(err) {
				log.Error("error symlinking socket", "err", err)
				continue
			}
			defer os.Remove(symlink)
			symlinked = true
		}

		c.Client, err = containerinit.NewClient(symlink)
		if err == nil {
			break
		}
	}
	if ready != nil {
		ready <- err
	}
	if err != nil {
		log.Error("error connecting to container", "err", err)
		c.l.state.SetStatusFailed(c.job.ID, errors.New("failed to connect to container"))

		d, e := c.l.libvirt.LookupDomainByName(c.job.ID)
		if e != nil {
			return e
		}
		defer d.Free()
		if err := d.Destroy(); err != nil {
			log.Error("error destroying domain", "err", err)
		}
		return err
	}
	defer c.Client.Close()

	go func() {
		// Workaround for mounts leaking into the libvirt_lxc supervisor process,
		// see https://github.com/flynn/flynn/issues/1125 for details. Remove
		// nsumount from the tree when deleting.
		log.Info("cleaning up mounts")
		if err := c.cleanupMounts(c.Domain.ID); err != nil {
			log.Error("error cleaning up mounts", "err", err)
		}

		// The bind mounts are copied when we spin up the container, we don't
		// need them in the root mount namespace any more.
		c.unbindMounts()
		log.Info("finished cleaning up mounts")
	}()

	c.l.containersMtx.Lock()
	c.l.containers[c.job.ID] = c
	c.l.containersMtx.Unlock()

	if !c.job.Config.DisableLog && !c.job.Config.TTY {
		if err := c.followLogs(log, buffer); err != nil {
			return err
		}
	}

	log.Info("watching for changes")
	for change := range c.Client.StreamState() {
		log.Info("state change", "state", change.State.String())
		if change.Error != "" {
			err := errors.New(change.Error)
			log.Error("error in change state", "err", err)
			c.Client.Resume()
			c.l.state.SetStatusFailed(c.job.ID, err)
			return err
		}
		switch change.State {
		case containerinit.StateInitial:
			log.Info("waiting for attach")
			c.l.state.WaitAttach(c.job.ID)
			log.Info("resuming")
			c.Client.Resume()
			log.Info("resumed")
		case containerinit.StateRunning:
			log.Info("container running")
			c.l.state.SetStatusRunning(c.job.ID)

			// if the job was stopped before it started, exit
			if c.l.state.GetJob(c.job.ID).ForceStop {
				c.Stop()
			}
		case containerinit.StateExited:
			log.Info("container exited", "status", change.ExitStatus)
			c.Client.Resume()
			c.l.state.SetStatusDone(c.job.ID, change.ExitStatus)
			return nil
		case containerinit.StateFailed:
			log.Info("container failed to start")
			c.Client.Resume()
			c.l.state.SetStatusFailed(c.job.ID, errors.New("container failed to start"))
			return nil
		}
	}
	log.Error("unknown failure")
	c.l.state.SetStatusFailed(c.job.ID, errors.New("unknown failure"))

	return nil
}