// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state // This is a low level implementation detail of the reexec and should not be consumed externally func (l *LinuxFactory) StartInitialization() (err error) { var pipefd, rootfd int for _, pair := range []struct { k string v *int }{ {"_LIBCONTAINER_INITPIPE", &pipefd}, {"_LIBCONTAINER_STATEDIR", &rootfd}, } { s := os.Getenv(pair.k) i, err := strconv.Atoi(s) if err != nil { return fmt.Errorf("unable to convert %s=%s to int", pair.k, s) } *pair.v = i } var ( pipe = os.NewFile(uintptr(pipefd), "pipe") it = initType(os.Getenv("_LIBCONTAINER_INITTYPE")) ) // clear the current process's environment to clean any libcontainer // specific env vars. os.Clearenv() var i initer defer func() { // We have an error during the initialization of the container's init, // send it back to the parent process in the form of an initError. // If container's init successed, syscall.Exec will not return, hence // this defer function will never be called. if _, ok := i.(*linuxStandardInit); ok { // Synchronisation only necessary for standard init. if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil { fmt.Fprintln(os.Stderr, err) return } } if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil { fmt.Fprintln(os.Stderr, err) return } // ensure that this pipe is always closed pipe.Close() }() defer func() { if e := recover(); e != nil { err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack())) } }() i, err = newContainerInit(it, pipe, rootfd) if err != nil { return err } return i.Init() }
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state // This is a low level implementation detail of the reexec and should not be consumed externally func (l *LinuxFactory) StartInitialization() (err error) { fdStr := os.Getenv("_LIBCONTAINER_INITPIPE") pipefd, err := strconv.Atoi(fdStr) if err != nil { return fmt.Errorf("error converting env var _LIBCONTAINER_INITPIPE(%q) to an int: %s", fdStr, err) } var ( pipe = os.NewFile(uintptr(pipefd), "pipe") it = initType(os.Getenv("_LIBCONTAINER_INITTYPE")) ) // clear the current process's environment to clean any libcontainer // specific env vars. os.Clearenv() defer func() { // if we have an error during the initialization of the container's init then send it back to the // parent process in the form of an initError. if err != nil { if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil { panic(err) } } // ensure that this pipe is always closed pipe.Close() }() i, err := newContainerInit(it, pipe) if err != nil { return err } return i.Init() }
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state // This is a low level implementation detail of the reexec and should not be consumed externally func (l *LinuxFactory) StartInitialization() (err error) { // start the signal handler as soon as we can s := make(chan os.Signal, 1) signal.Notify(s, InitContinueSignal) fdStr := os.Getenv("_LIBCONTAINER_INITPIPE") pipefd, err := strconv.Atoi(fdStr) if err != nil { return fmt.Errorf("error converting env var _LIBCONTAINER_INITPIPE(%q) to an int: %s", fdStr, err) } var ( pipe = os.NewFile(uintptr(pipefd), "pipe") it = initType(os.Getenv("_LIBCONTAINER_INITTYPE")) ) // clear the current process's environment to clean any libcontainer // specific env vars. os.Clearenv() var i initer defer func() { // We have an error during the initialization of the container's init, // send it back to the parent process in the form of an initError. // If container's init successed, syscall.Exec will not return, hence // this defer function will never be called. if _, ok := i.(*linuxStandardInit); ok { // Synchronisation only necessary for standard init. if err := utils.WriteJSON(pipe, syncT{procError}); err != nil { panic(err) } } if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil { panic(err) } // ensure that this pipe is always closed pipe.Close() }() defer func() { if e := recover(); e != nil { err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack())) } }() i, err = newContainerInit(it, pipe) if err != nil { return err } return i.Init(s) }
func (p *initProcess) sendConfig() error { // send the state to the container's init process then shutdown writes for the parent if err := utils.WriteJSON(p.parentPipe, p.config); err != nil { return err } // shutdown writes for the parent side of the pipe return syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR) }
func (l *LinuxFactory) sendError(i initer, pipe *os.File, err error) { // We have an error during the initialization of the container's init, // send it back to the parent process in the form of an initError. // If container's init successed, syscall.Exec will not return, hence // this defer function will never be called. if i != nil { if _, ok := i.(*linuxStandardInit); ok { // Synchronisation only necessary for standard init. if err := utils.WriteJSON(pipe, syncT{procError}); err != nil { panic(err) } } } if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil { panic(err) } }
func (c *linuxContainer) saveState(s *State) error { f, err := os.Create(filepath.Join(c.root, stateFilename)) if err != nil { return err } defer f.Close() return utils.WriteJSON(f, s) }
func marshal(path string, v interface{}) error { f, err := os.Create(path) if err != nil { return err } defer f.Close() return utils.WriteJSON(f, v) }
func (p *setnsProcess) start() (err error) { defer p.parentPipe.Close() err = p.cmd.Start() p.childPipe.Close() p.rootDir.Close() if err != nil { return newSystemErrorWithCause(err, "starting setns process") } if p.bootstrapData != nil { if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { return newSystemErrorWithCause(err, "copying bootstrap data to pipe") } } if err = p.execSetns(); err != nil { return newSystemErrorWithCause(err, "executing setns process") } if len(p.cgroupPaths) > 0 { if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil { return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) } } // set oom_score_adj if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { return newSystemErrorWithCause(err, "setting oom score") } // set rlimits, this has to be done here because we lose permissions // to raise the limits once we enter a user-namespace if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { return newSystemErrorWithCause(err, "setting rlimits for process") } if err := utils.WriteJSON(p.parentPipe, p.config); err != nil { return newSystemErrorWithCause(err, "writing config to pipe") } if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil { return newSystemErrorWithCause(err, "calling shutdown on init pipe") } // wait for the child process to fully complete and receive an error message // if one was encoutered var ierr *genericError if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF { return newSystemErrorWithCause(err, "decoding init error from pipe") } // Must be done after Shutdown so the child will exit and we can wait for it. if ierr != nil { p.wait() return ierr } return nil }
func (c *linuxContainer) updateState(process parentProcess) error { c.initProcess = process state, err := c.currentState() if err != nil { return err } f, err := os.Create(filepath.Join(c.root, stateFilename)) if err != nil { return err } defer f.Close() os.Remove(filepath.Join(c.root, "checkpoint")) return utils.WriteJSON(f, state) }
// syncParentHooks sends to the given pipe a JSON payload which indicates that // the parent should execute pre-start hooks. It then waits for the parent to // indicate that it is cleared to resume. func syncParentHooks(pipe io.ReadWriter) error { // Tell parent. if err := utils.WriteJSON(pipe, syncT{procHooks}); err != nil { return err } // Wait for parent to give the all-clear. var procSync syncT if err := json.NewDecoder(pipe).Decode(&procSync); err != nil { if err == io.EOF { return fmt.Errorf("parent closed synchronisation channel") } if procSync.Type != procResume { return fmt.Errorf("invalid synchronisation flag from parent") } } return nil }
func (p *setnsProcess) start() (err error) { defer p.parentPipe.Close() err = p.cmd.Start() p.childPipe.Close() if err != nil { return newSystemError(err) } if p.bootstrapData != nil { if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { return newSystemError(err) } } if err = p.execSetns(); err != nil { return newSystemError(err) } if len(p.cgroupPaths) > 0 { if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil { return newSystemError(err) } } if err := utils.WriteJSON(p.parentPipe, p.config); err != nil { return newSystemError(err) } // set oom_score_adj if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { return newSystemError(err) } if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil { return newSystemError(err) } // wait for the child process to fully complete and receive an error message // if one was encoutered var ierr *genericError if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF { return newSystemError(err) } // Must be done after Shutdown so the child will exit and we can wait for it. if ierr != nil { p.wait() return newSystemError(ierr) } return nil }
func (p *setnsProcess) start() (err error) { defer p.parentPipe.Close() err = p.cmd.Start() p.childPipe.Close() p.rootDir.Close() if err != nil { return newSystemErrorWithCause(err, "starting setns process") } if p.bootstrapData != nil { if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { return newSystemErrorWithCause(err, "copying bootstrap data to pipe") } } if err = p.execSetns(); err != nil { return newSystemErrorWithCause(err, "executing setns process") } if len(p.cgroupPaths) > 0 { if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil { return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) } } // set oom_score_adj if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { return newSystemErrorWithCause(err, "setting oom score") } // set rlimits, this has to be done here because we lose permissions // to raise the limits once we enter a user-namespace if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { return newSystemErrorWithCause(err, "setting rlimits for process") } if err := utils.WriteJSON(p.parentPipe, p.config); err != nil { return newSystemErrorWithCause(err, "writing config to pipe") } ierr := parseSync(p.parentPipe, func(sync *syncT) error { switch sync.Type { case procConsole: if err := writeSync(p.parentPipe, procConsoleReq); err != nil { return newSystemErrorWithCause(err, "writing syncT 'request fd'") } masterFile, err := utils.RecvFd(p.parentPipe) if err != nil { return newSystemErrorWithCause(err, "getting master pty from child pipe") } if p.process.consoleChan == nil { // TODO: Don't panic here, do something more sane. panic("consoleChan is nil") } p.process.consoleChan <- masterFile if err := writeSync(p.parentPipe, procConsoleAck); err != nil { return newSystemErrorWithCause(err, "writing syncT 'ack fd'") } case procReady: // This shouldn't happen. panic("unexpected procReady in setns") case procHooks: // This shouldn't happen. panic("unexpected procHooks in setns") default: return newSystemError(fmt.Errorf("invalid JSON payload from child")) } return nil }) if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil { return newSystemErrorWithCause(err, "calling shutdown on init pipe") } // Must be done after Shutdown so the child will exit and we can wait for it. if ierr != nil { p.wait() return ierr } return nil }
func (p *initProcess) sendConfig() error { // send the config to the container's init process, we don't use JSON Encode // here because there might be a problem in JSON decoder in some cases, see: // https://github.com/docker/docker/issues/14203#issuecomment-174177790 return utils.WriteJSON(p.parentPipe, p.config) }
func (p *initProcess) start() (err error) { defer p.parentPipe.Close() err = p.cmd.Start() p.process.ops = p p.childPipe.Close() if err != nil { p.process.ops = nil return newSystemError(err) } // Save the standard descriptor names before the container process // can potentially move them (e.g., via dup2()). If we don't do this now, // we won't know at checkpoint time which file descriptor to look up. fds, err := getPipeFds(p.pid()) if err != nil { return newSystemError(err) } p.setExternalDescriptors(fds) // Do this before syncing with child so that no children // can escape the cgroup if err := p.manager.Apply(p.pid()); err != nil { return newSystemError(err) } defer func() { if err != nil { // TODO: should not be the responsibility to call here p.manager.Destroy() } }() if p.config.Config.Hooks != nil { s := configs.HookState{ Version: p.container.config.Version, ID: p.container.id, Pid: p.pid(), Root: p.config.Config.Rootfs, } for _, hook := range p.config.Config.Hooks.Prestart { if err := hook.Run(s); err != nil { return newSystemError(err) } } } if err := p.createNetworkInterfaces(); err != nil { return newSystemError(err) } if err := p.sendConfig(); err != nil { return newSystemError(err) } var ( procSync syncT sentRun bool ierr *genericError ) loop: for { if err := json.NewDecoder(p.parentPipe).Decode(&procSync); err != nil { if err == io.EOF { break loop } return newSystemError(err) } switch procSync.Type { case procStart: break loop case procReady: if err := p.manager.Set(p.config.Config); err != nil { return newSystemError(err) } // Sync with child. if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil { return newSystemError(err) } sentRun = true case procError: // wait for the child process to fully complete and receive an error message // if one was encoutered if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF { return newSystemError(err) } if ierr != nil { break loop } // Programmer error. panic("No error following JSON procError payload.") default: return newSystemError(fmt.Errorf("invalid JSON synchronisation payload from child")) } } if !sentRun { return newSystemError(fmt.Errorf("could not synchronise with container process")) } if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil { return newSystemError(err) } // Must be done after Shutdown so the child will exit and we can wait for it. if ierr != nil { p.wait() return newSystemError(ierr) } return nil }
func (p *initProcess) start() error { defer p.parentPipe.Close() err := p.cmd.Start() p.process.ops = p p.childPipe.Close() p.rootDir.Close() if err != nil { p.process.ops = nil return newSystemErrorWithCause(err, "starting init process command") } if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { return err } if err := p.execSetns(); err != nil { return newSystemErrorWithCause(err, "running exec setns process for init") } // Save the standard descriptor names before the container process // can potentially move them (e.g., via dup2()). If we don't do this now, // we won't know at checkpoint time which file descriptor to look up. fds, err := getPipeFds(p.pid()) if err != nil { return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid()) } p.setExternalDescriptors(fds) // Do this before syncing with child so that no children // can escape the cgroup if err := p.manager.Apply(p.pid()); err != nil { return newSystemErrorWithCause(err, "applying cgroup configuration for process") } defer func() { if err != nil { // TODO: should not be the responsibility to call here p.manager.Destroy() } }() if err := p.createNetworkInterfaces(); err != nil { return newSystemErrorWithCause(err, "creating nework interfaces") } if err := p.sendConfig(); err != nil { return newSystemErrorWithCause(err, "sending config to init process") } var ( procSync syncT sentRun bool sentResume bool ierr *genericError ) dec := json.NewDecoder(p.parentPipe) loop: for { if err := dec.Decode(&procSync); err != nil { if err == io.EOF { break loop } return newSystemErrorWithCause(err, "decoding sync type from init pipe") } switch procSync.Type { case procReady: if err := p.manager.Set(p.config.Config); err != nil { return newSystemErrorWithCause(err, "setting cgroup config for ready process") } // set oom_score_adj if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { return newSystemErrorWithCause(err, "setting oom score for ready process") } // set rlimits, this has to be done here because we lose permissions // to raise the limits once we enter a user-namespace if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { return newSystemErrorWithCause(err, "setting rlimits for ready process") } // call prestart hooks if !p.config.Config.Namespaces.Contains(configs.NEWNS) { if p.config.Config.Hooks != nil { s := configs.HookState{ Version: p.container.config.Version, ID: p.container.id, Pid: p.pid(), Root: p.config.Config.Rootfs, } for i, hook := range p.config.Config.Hooks.Prestart { if err := hook.Run(s); err != nil { return newSystemErrorWithCausef(err, "running prestart hook %d", i) } } } } // Sync with child. if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil { return newSystemErrorWithCause(err, "reading syncT run type") } sentRun = true case procHooks: if p.config.Config.Hooks != nil { s := configs.HookState{ Version: p.container.config.Version, ID: p.container.id, Pid: p.pid(), Root: p.config.Config.Rootfs, BundlePath: utils.SearchLabels(p.config.Config.Labels, "bundle"), } for i, hook := range p.config.Config.Hooks.Prestart { if err := hook.Run(s); err != nil { return newSystemErrorWithCausef(err, "running prestart hook %d", i) } } } // Sync with child. if err := utils.WriteJSON(p.parentPipe, syncT{procResume}); err != nil { return newSystemErrorWithCause(err, "reading syncT resume type") } sentResume = true case procError: // wait for the child process to fully complete and receive an error message // if one was encoutered if err := dec.Decode(&ierr); err != nil && err != io.EOF { return newSystemErrorWithCause(err, "decoding proc error from init") } if ierr != nil { break loop } // Programmer error. panic("No error following JSON procError payload.") default: return newSystemError(fmt.Errorf("invalid JSON payload from child")) } } if !sentRun { return newSystemErrorWithCause(ierr, "container init") } if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume { return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process")) } if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil { return newSystemErrorWithCause(err, "shutting down init pipe") } // Must be done after Shutdown so the child will exit and we can wait for it. if ierr != nil { p.wait() return ierr } return nil }
// writeSync is used to write to a synchronisation pipe. An error is returned // if there was a problem writing the payload. func writeSync(pipe io.Writer, sync syncType) error { if err := utils.WriteJSON(pipe, syncT{sync}); err != nil { return err } return nil }
func (p *initProcess) sendConfig() error { // send the state to the container's init process then shutdown writes for the parent return utils.WriteJSON(p.parentPipe, p.config) }