func (c *libvirtContainer) watch(ready chan<- error) error { g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "watch_container", "job.id": c.job.ID}) g.Log(grohl.Data{"at": "start"}) defer func() { // TODO: kill containerinit/domain if it is still running c.l.containersMtx.Lock() delete(c.l.containers, c.job.ID) c.l.containersMtx.Unlock() c.cleanup() close(c.done) }() var symlinked bool var err error symlink := "/tmp/containerinit-rpc." + c.job.ID socketPath := path.Join(c.RootPath, containerinit.SocketPath) for startTime := time.Now(); time.Since(startTime) < 10*time.Second; time.Sleep(time.Millisecond) { if !symlinked { // We can't connect to the socket file directly because // the path to it is longer than 108 characters (UNIX_PATH_MAX). // Create a temporary symlink to connect to. if err = os.Symlink(socketPath, symlink); err != nil && !os.IsExist(err) { g.Log(grohl.Data{"at": "symlink_socket", "status": "error", "err": err, "source": socketPath, "target": symlink}) continue } defer os.Remove(symlink) symlinked = true } c.Client, err = containerinit.NewClient(symlink) if err == nil { break } } if ready != nil { ready <- err } if err != nil { g.Log(grohl.Data{"at": "connect", "status": "error", "err": err.Error()}) c.l.state.SetStatusFailed(c.job.ID, errors.New("failed to connect to container")) d, e := c.l.libvirt.LookupDomainByName(c.job.ID) if e != nil { return e } if err := d.Destroy(); err != nil { g.Log(grohl.Data{"at": "destroy", "status": "error", "err": err.Error()}) } return err } defer c.Client.Close() c.l.containersMtx.Lock() c.l.containers[c.job.ID] = c c.l.containersMtx.Unlock() if !c.job.Config.DisableLog && !c.job.Config.TTY { g.Log(grohl.Data{"at": "get_stdout"}) stdout, stderr, initLog, err := c.Client.GetStreams() if err != nil { g.Log(grohl.Data{"at": "get_streams", "status": "error", "err": err.Error()}) return err } log := c.l.openLog(c.job.ID) defer log.Close() muxConfig := logmux.Config{ AppID: c.job.Metadata["flynn-controller.app"], HostID: c.l.state.id, JobType: c.job.Metadata["flynn-controller.type"], JobID: c.job.ID, } // TODO(benburkert): remove file logging once attach proto uses logaggregator streams := []io.Reader{stdout, stderr} for i, stream := range streams { bufr, bufw := io.Pipe() muxr, muxw := io.Pipe() go func(r io.Reader, pw1, pw2 *io.PipeWriter) { mw := io.MultiWriter(pw1, pw2) _, err := io.Copy(mw, r) pw1.CloseWithError(err) pw2.CloseWithError(err) }(stream, bufw, muxw) fd := i + 1 go log.Follow(fd, bufr) go c.l.mux.Follow(muxr, fd, muxConfig) } go log.Follow(3, initLog) } g.Log(grohl.Data{"at": "watch_changes"}) for change := range c.Client.StreamState() { g.Log(grohl.Data{"at": "change", "state": change.State.String()}) if change.Error != "" { err := errors.New(change.Error) g.Log(grohl.Data{"at": "change", "status": "error", "err": err}) c.Client.Resume() c.l.state.SetStatusFailed(c.job.ID, err) return err } switch change.State { case containerinit.StateInitial: g.Log(grohl.Data{"at": "wait_attach"}) c.l.state.WaitAttach(c.job.ID) g.Log(grohl.Data{"at": "resume"}) c.Client.Resume() case containerinit.StateRunning: g.Log(grohl.Data{"at": "running"}) c.l.state.SetStatusRunning(c.job.ID) // if the job was stopped before it started, exit if c.l.state.GetJob(c.job.ID).ForceStop { c.Stop() } case containerinit.StateExited: g.Log(grohl.Data{"at": "exited", "status": change.ExitStatus}) c.Client.Resume() c.l.state.SetStatusDone(c.job.ID, change.ExitStatus) return nil case containerinit.StateFailed: g.Log(grohl.Data{"at": "failed"}) c.Client.Resume() c.l.state.SetStatusFailed(c.job.ID, errors.New("container failed to start")) return nil } } g.Log(grohl.Data{"at": "unknown_failure"}) c.l.state.SetStatusFailed(c.job.ID, errors.New("unknown failure")) return nil }
func (c *libvirtContainer) watch(ready chan<- error) error { g := grohl.NewContext(grohl.Data{"backend": "libvirt-lxc", "fn": "watch_container", "job.id": c.job.ID}) g.Log(grohl.Data{"at": "start"}) defer func() { // TODO: kill containerinit/domain if it is still running c.l.containersMtx.Lock() delete(c.l.containers, c.job.ID) c.l.containersMtx.Unlock() c.cleanup() close(c.done) }() var symlinked bool var err error symlink := "/tmp/containerinit-rpc." + c.job.ID socketPath := path.Join(c.RootPath, containerinit.SocketPath) for startTime := time.Now(); time.Since(startTime) < 5*time.Second; time.Sleep(time.Millisecond) { if !symlinked { // We can't connect to the socket file directly because // the path to it is longer than 108 characters (UNIX_PATH_MAX). // Create a temporary symlink to connect to. if err = os.Symlink(socketPath, symlink); err != nil { g.Log(grohl.Data{"at": "symlink_socket", "status": "error", "err": err, "source": socketPath, "target": symlink}) continue } defer os.Remove(symlink) symlinked = true } c.Client, err = containerinit.NewClient(symlink) if err == nil { break } } if ready != nil { ready <- err } if err != nil { g.Log(grohl.Data{"at": "connect", "status": "error", "err": err}) return err } defer c.Client.Close() c.l.containersMtx.Lock() c.l.containers[c.job.ID] = c c.l.containersMtx.Unlock() if !c.job.Config.TTY { g.Log(grohl.Data{"at": "get_stdout"}) stdout, stderr, err := c.Client.GetStdout() if err != nil { g.Log(grohl.Data{"at": "get_stdout", "status": "error", "err": err.Error()}) return err } log := c.l.openLog(c.job.ID) defer log.Close() // TODO: log errors from these go log.ReadFrom(1, stdout) go log.ReadFrom(2, stderr) } g.Log(grohl.Data{"at": "watch_changes"}) for change := range c.Client.StreamState() { g.Log(grohl.Data{"at": "change", "state": change.State.String()}) if change.Error != "" { err := errors.New(change.Error) g.Log(grohl.Data{"at": "change", "status": "error", "err": err}) c.l.state.SetStatusFailed(c.job.ID, err) return err } switch change.State { case containerinit.StateInitial: g.Log(grohl.Data{"at": "wait_attach"}) c.l.state.WaitAttach(c.job.ID) g.Log(grohl.Data{"at": "resume"}) c.Client.Resume() case containerinit.StateRunning: g.Log(grohl.Data{"at": "running"}) c.l.state.SetStatusRunning(c.job.ID) case containerinit.StateExited: g.Log(grohl.Data{"at": "exited", "status": change.ExitStatus}) c.Client.Resume() c.l.state.SetStatusDone(c.job.ID, change.ExitStatus) return nil case containerinit.StateFailed: g.Log(grohl.Data{"at": "failed"}) c.Client.Resume() c.l.state.SetStatusFailed(c.job.ID, errors.New("container failed to start")) return nil } } g.Log(grohl.Data{"at": "unknown_failure"}) c.l.state.SetStatusFailed(c.job.ID, errors.New("unknown failure")) return nil }
func (c *Container) watch(ready chan<- error, buffer host.LogBuffer) error { log := c.l.logger.New("fn", "watch", "job.id", c.job.ID) log.Info("start watching container") readyErr := func(err error) { if ready != nil { ready <- err } } defer func() { c.container.Destroy() c.l.containersMtx.Lock() delete(c.l.containers, c.job.ID) c.l.containersMtx.Unlock() c.cleanup() close(c.done) }() var symlinked bool var err error symlink := "/tmp/containerinit-rpc." + c.job.ID socketPath := path.Join(c.RootPath, containerinit.SocketPath) for startTime := time.Now(); time.Since(startTime) < 10*time.Second; time.Sleep(time.Millisecond) { if !symlinked { // We can't connect to the socket file directly because // the path to it is longer than 108 characters (UNIX_PATH_MAX). // Create a temporary symlink to connect to. if err = os.Symlink(socketPath, symlink); err != nil && !os.IsExist(err) { log.Error("error symlinking socket", "err", err) continue } defer os.Remove(symlink) symlinked = true } c.Client, err = containerinit.NewClient(symlink) if err == nil { break } } if err != nil { log.Error("error connecting to container", "err", err) readyErr(err) c.l.state.SetStatusFailed(c.job.ID, errors.New("failed to connect to container")) return err } defer c.Client.Close() c.l.containersMtx.Lock() c.l.containers[c.job.ID] = c c.l.containersMtx.Unlock() readyErr(nil) if !c.job.Config.DisableLog && !c.job.Config.TTY { if err := c.followLogs(log, buffer); err != nil { return err } } log.Info("watching for changes") for change := range c.Client.StreamState() { log.Info("state change", "state", change.State.String()) if change.Error != "" { err := errors.New(change.Error) log.Error("error in change state", "err", err) c.Client.Resume() c.l.state.SetStatusFailed(c.job.ID, err) return err } switch change.State { case containerinit.StateInitial: log.Info("waiting for attach") c.l.state.WaitAttach(c.job.ID) log.Info("resuming") c.Client.Resume() log.Info("resumed") case containerinit.StateRunning: log.Info("container running") c.l.state.SetStatusRunning(c.job.ID) // if the job was stopped before it started, exit if c.l.state.GetJob(c.job.ID).ForceStop { c.Stop() } case containerinit.StateExited: log.Info("container exited", "status", change.ExitStatus) c.Client.Resume() c.l.state.SetStatusDone(c.job.ID, change.ExitStatus) return nil case containerinit.StateFailed: log.Info("container failed to start") c.Client.Resume() c.l.state.SetStatusFailed(c.job.ID, errors.New("container failed to start")) return nil } } log.Error("unknown failure") c.l.state.SetStatusFailed(c.job.ID, errors.New("unknown failure")) return nil }
func (c *libvirtContainer) watch(ready chan<- error, buffer host.LogBuffer) error { log := c.l.logger.New("fn", "watch", "job.id", c.job.ID) log.Info("start watching container") defer func() { c.waitExit() // TODO: kill containerinit/domain if it is still running c.l.containersMtx.Lock() delete(c.l.containers, c.job.ID) c.l.containersMtx.Unlock() c.cleanup() close(c.done) }() var symlinked bool var err error symlink := "/tmp/containerinit-rpc." + c.job.ID socketPath := path.Join(c.RootPath, containerinit.SocketPath) for startTime := time.Now(); time.Since(startTime) < 10*time.Second; time.Sleep(time.Millisecond) { if !symlinked { // We can't connect to the socket file directly because // the path to it is longer than 108 characters (UNIX_PATH_MAX). // Create a temporary symlink to connect to. if err = os.Symlink(socketPath, symlink); err != nil && !os.IsExist(err) { log.Error("error symlinking socket", "err", err) continue } defer os.Remove(symlink) symlinked = true } c.Client, err = containerinit.NewClient(symlink) if err == nil { break } } if ready != nil { ready <- err } if err != nil { log.Error("error connecting to container", "err", err) c.l.state.SetStatusFailed(c.job.ID, errors.New("failed to connect to container")) d, e := c.l.libvirt.LookupDomainByName(c.job.ID) if e != nil { return e } defer d.Free() if err := d.Destroy(); err != nil { log.Error("error destroying domain", "err", err) } return err } defer c.Client.Close() go func() { // Workaround for mounts leaking into the libvirt_lxc supervisor process, // see https://github.com/flynn/flynn/issues/1125 for details. Remove // nsumount from the tree when deleting. log.Info("cleaning up mounts") if err := c.cleanupMounts(c.Domain.ID); err != nil { log.Error("error cleaning up mounts", "err", err) } // The bind mounts are copied when we spin up the container, we don't // need them in the root mount namespace any more. c.unbindMounts() log.Info("finished cleaning up mounts") }() c.l.containersMtx.Lock() c.l.containers[c.job.ID] = c c.l.containersMtx.Unlock() if !c.job.Config.DisableLog && !c.job.Config.TTY { if err := c.followLogs(log, buffer); err != nil { return err } } log.Info("watching for changes") for change := range c.Client.StreamState() { log.Info("state change", "state", change.State.String()) if change.Error != "" { err := errors.New(change.Error) log.Error("error in change state", "err", err) c.Client.Resume() c.l.state.SetStatusFailed(c.job.ID, err) return err } switch change.State { case containerinit.StateInitial: log.Info("waiting for attach") c.l.state.WaitAttach(c.job.ID) log.Info("resuming") c.Client.Resume() log.Info("resumed") case containerinit.StateRunning: log.Info("container running") c.l.state.SetStatusRunning(c.job.ID) // if the job was stopped before it started, exit if c.l.state.GetJob(c.job.ID).ForceStop { c.Stop() } case containerinit.StateExited: log.Info("container exited", "status", change.ExitStatus) c.Client.Resume() c.l.state.SetStatusDone(c.job.ID, change.ExitStatus) return nil case containerinit.StateFailed: log.Info("container failed to start") c.Client.Resume() c.l.state.SetStatusFailed(c.job.ID, errors.New("container failed to start")) return nil } } log.Error("unknown failure") c.l.state.SetStatusFailed(c.job.ID, errors.New("unknown failure")) return nil }