func (s *Splicer) splice(direction string, inFD, outFD int, wg *sync.WaitGroup) { // Signal to the caller that we're done defer func() { // If a reliable delivery socket has data associated with it when a close takes place, the system continues to attempt data transfer. if err := syscall.Shutdown(inFD, SHUT_RDWR); err != nil { log.Printf("Shutdown err %v", err) } if err := syscall.Shutdown(outFD, SHUT_RDWR); err != nil { log.Printf("Shutdown err %v", err) } if wg != nil { wg.Done() } }() pipe := make([]int, 2) if err := syscall.Pipe(pipe); err != nil { log.Fatal(err) } defer func() { syscall.Close(pipe[0]) syscall.Close(pipe[1]) }() var netWrittenBytes, netReadBytes int64 log.Printf("[%v] Splicing pipe %+v, tcpfds %+v", direction, s.pipe, s.tcpFD) for { // SPLICE_F_NONBLOCK: don't block if TCP buffer is empty // SPLICE_F_MOVE: directly move pages into splice buffer in kernel memory // SPLICE_F_MORE: just makes mysql connections slow log.Printf("[input (%v)] Entering input", direction) inBytes, err := syscall.Splice(inFD, nil, pipe[1], nil, s.bufferSize, SPLICE_F_NONBLOCK) if err := s.checkSpliceErr(inBytes, err, fmt.Sprintf("input (%v)", direction)); err != nil { log.Printf("ERROR [input (%v)] error: %v", direction, err) return } netReadBytes += inBytes log.Printf("[input (%v)] %d bytes read", direction, inBytes) log.Printf("[input (%v)] Entering output", direction) outBytes, err := syscall.Splice(pipe[0], nil, outFD, nil, s.bufferSize, SPLICE_F_NONBLOCK) if err := s.checkSpliceErr(inBytes, err, fmt.Sprintf("output (%v)", direction)); err != nil { log.Printf("ERROR [output (%v)] error: %v", direction, err) return } log.Printf("[output (%v)] %d bytes written, out of given input %d", direction, outBytes, inBytes) netWrittenBytes += outBytes } log.Printf("[%v] Spliced %d bytes read %d bytes written", direction, netWrittenBytes, netReadBytes) }
// createFDProxies creates pipes at /dev/stdout and /dev/stderr and copies data // written to them to the job's stdout and stderr streams respectively. // // This is necessary (rather than just symlinking those paths to /proc/self/fd/{1,2}) // because the standard streams are sockets, and calling open(2) on a socket // leads to an ENXIO error (see http://marc.info/?l=ast-users&m=120978595414993). func createFDProxies(cmd *exec.Cmd) error { for path, dst := range map[string]*os.File{ "/dev/stdout": cmd.Stdout.(*os.File), "/dev/stderr": cmd.Stderr.(*os.File), } { os.Remove(path) if err := syscall.Mkfifo(path, 0666); err != nil { return err } pipe, err := os.OpenFile(path, os.O_RDWR, os.ModeNamedPipe) if err != nil { return err } go func(dst *os.File) { defer pipe.Close() for { // copy data from the pipe to dst using splice(2) (rather than io.Copy) // to avoid a needless copy through user space n, err := syscall.Splice(int(pipe.Fd()), nil, int(dst.Fd()), nil, 65535, 0) if err != nil || n == 0 { return } } }(dst) } return nil }
func (p *Pair) WriteTo(fd uintptr, n int) (int, error) { m, err := syscall.Splice(int(p.r.Fd()), nil, int(fd), nil, int(n), 0) if err != nil { err = os.NewSyscallError("Splice write", err) } return int(m), err }
func main() { log.Printf("Input file %v", *inFile) in, err := os.Open(*inFile) if err != nil { log.Fatal(err) } defer in.Close() inFD := int(in.Fd()) log.Printf("Output file %v", *outFile) out, err := os.Create(*outFile) if err != nil { log.Fatal(err) } defer out.Close() outFD := int(out.Fd()) pipe := make([]int, 2) if err := syscall.Pipe(pipe); err != nil { log.Fatal(err) } defer syscall.Close(pipe[0]) defer syscall.Close(pipe[1]) flags := SPLICE_F_NONBLOCK blockSize := 1 inFileSize := fileSize(in) var netRead int64 for { inBytes, err := syscall.Splice(inFD, nil, pipe[1], nil, blockSize, flags) checkSpliceErr(inBytes, err, "input") netRead += inBytes log.Printf("[input] %d bytes read, %d remaining", inBytes, inFileSize-netRead) if err := syscall.SetNonblock(inFD, true); err != nil { log.Fatalf("Unable to close in fd") } outBytes, err := syscall.Splice(pipe[0], nil, outFD, nil, blockSize, flags) checkSpliceErr(outBytes, err, "output") log.Printf("[output] %d bytes written, out of given input %d", outBytes, inBytes) } }
func (p *Pair) LoadFrom(fd uintptr, sz int) (int, error) { if sz > p.size { return 0, fmt.Errorf("LoadFrom: not enough space %d, %d", sz, p.size) } n, err := syscall.Splice(int(fd), nil, int(p.w.Fd()), nil, sz, 0) if err != nil { err = os.NewSyscallError("Splice load from", err) } return int(n), err }
// splices all contents out to the specified file, updates outOff func (p *pipe) spliceOut(out *os.File, outOff *int64) error { toSplice := p.numInBuffer for toSplice > 0 { n, err := syscall.Splice(int(p.r.Fd()), nil, int(out.Fd()), outOff, toSplice, 0) if err != nil { return err } toSplice -= int(n) if outOff != nil { *outOff -= n } } p.numInBuffer = 0 return nil }
func (p *pipe) spliceIn(in *os.File, inOff *int64, length int) (int64, error) { if p.numInBuffer+length > PIPELEN { length = PIPELEN - p.numInBuffer } spliced := int64(0) for spliced < int64(length) { w, err := syscall.Splice(int(in.Fd()), inOff, int(p.w.Fd()), nil, length, 0) if err != nil { return spliced, err } spliced += w if inOff != nil { *inOff += w } } return spliced, nil }
func (p *Pair) LoadFromAt(fd uintptr, sz int, off int64) (int, error) { n, err := syscall.Splice(int(fd), &off, int(p.w.Fd()), nil, sz, 0) return int(n), err }
// Run as pid 1 and monitor the contained process to return its exit code. func containerInitApp(c *Config, logFile *os.File) error { log := logger.New() init := newContainerInit(c, logFile) log.Debug("registering RPC server") if err := rpcplus.Register(init); err != nil { log.Error("error registering RPC server", "err", err) return err } init.mtx.Lock() defer init.mtx.Unlock() // Prepare the cmd based on the given args // If this fails we report that below cmdPath, cmdErr := getCmdPath(c) cmd := exec.Command(cmdPath, c.Args[1:]...) cmd.Dir = c.WorkDir cmd.Env = make([]string, 0, len(c.Env)) for k, v := range c.Env { cmd.Env = append(cmd.Env, k+"="+v) } // App runs in its own session cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true} if c.Uid != nil || c.Gid != nil { cmd.SysProcAttr.Credential = &syscall.Credential{} if c.Uid != nil { cmd.SysProcAttr.Credential.Uid = *c.Uid } if c.Gid != nil { cmd.SysProcAttr.Credential.Gid = *c.Gid } } // Console setup. Hook up the container app's stdin/stdout/stderr to // either a pty or pipes. The FDs for the controlling side of the // pty/pipes will be passed to flynn-host later via a UNIX socket. if c.TTY { log.Debug("creating PTY") ptyMaster, ptySlave, err := pty.Open() if err != nil { log.Error("error creating PTY", "err", err) return err } init.ptyMaster = ptyMaster cmd.Stdout = ptySlave cmd.Stderr = ptySlave if c.OpenStdin { log.Debug("attaching stdin to PTY") cmd.Stdin = ptySlave cmd.SysProcAttr.Setctty = true } if c.Uid != nil && c.Gid != nil { if err := syscall.Fchown(int(ptySlave.Fd()), int(*c.Uid), int(*c.Gid)); err != nil { log.Error("error changing PTY ownership", "err", err) return err } } } else { // We copy through a socketpair (rather than using cmd.StdoutPipe directly) to make // it easier for flynn-host to do non-blocking I/O (via net.FileConn) so that no // read(2) calls can succeed after closing the logs during an update. // // We also don't assign the socketpair directly to fd 1 because that prevents jobs // using /dev/stdout (calling open(2) on a socket leads to an ENXIO error, see // http://marc.info/?l=ast-users&m=120978595414993). newPipe := func(pipeFn func() (io.ReadCloser, error), name string) (*os.File, error) { pipe, err := pipeFn() if err != nil { return nil, err } if c.Uid != nil && c.Gid != nil { if err := syscall.Fchown(int(pipe.(*os.File).Fd()), int(*c.Uid), int(*c.Gid)); err != nil { return nil, err } } sockR, sockW, err := newSocketPair(name) if err != nil { return nil, err } go func() { defer sockW.Close() for { // copy data from the pipe to the socket using splice(2) // (rather than io.Copy) to avoid a needless copy through // user space n, err := syscall.Splice(int(pipe.(*os.File).Fd()), nil, int(sockW.Fd()), nil, 65535, 0) if err != nil || n == 0 { return } } }() return sockR, nil } log.Debug("creating stdout pipe") var err error init.stdout, err = newPipe(cmd.StdoutPipe, "stdout") if err != nil { log.Error("error creating stdout pipe", "err", err) return err } log.Debug("creating stderr pipe") init.stderr, err = newPipe(cmd.StderrPipe, "stderr") if err != nil { log.Error("error creating stderr pipe", "err", err) return err } if c.OpenStdin { // Can't use cmd.StdinPipe() here, since in Go 1.2 it // returns an io.WriteCloser with the underlying object // being an *exec.closeOnce, neither of which provides // a way to convert to an FD. log.Debug("creating stdin pipe") pipeRead, pipeWrite, err := os.Pipe() if err != nil { log.Error("creating stdin pipe", "err", err) return err } cmd.Stdin = pipeRead init.stdin = pipeWrite } } go runRPCServer() // Wait for flynn-host to tell us to start init.mtx.Unlock() // Allow calls log.Debug("waiting to be resumed") <-init.resume log.Debug("resuming") init.mtx.Lock() log.Info("starting the job", "args", cmd.Args) if cmdErr != nil { log.Error("error starting the job", "err", cmdErr) init.changeState(StateFailed, cmdErr.Error(), -1) init.exit(1) } if err := cmd.Start(); err != nil { log.Error("error starting the job", "err", err) init.changeState(StateFailed, err.Error(), -1) init.exit(1) } log.Debug("setting state to running") init.process = cmd.Process init.changeState(StateRunning, "", -1) init.mtx.Unlock() // Allow calls // monitor services hbs := make([]discoverd.Heartbeater, 0, len(c.Ports)) for _, port := range c.Ports { if port.Service == nil { continue } log := log.New("name", port.Service.Name, "port", port.Port, "proto", port.Proto) log.Info("monitoring service") hb, err := monitor(port, init, c.Env, log) if err != nil { log.Error("error monitoring service", "err", err) os.Exit(70) } hbs = append(hbs, hb) } exitCode := babySit(init, hbs) log.Info("job exited", "status", exitCode) init.mtx.Lock() init.changeState(StateExited, "", exitCode) init.mtx.Unlock() // Allow calls log.Info("exiting") init.exit(exitCode) return nil }