Example #1
0
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
	var pipefd, rootfd int
	for _, pair := range []struct {
		k string
		v *int
	}{
		{"_LIBCONTAINER_INITPIPE", &pipefd},
		{"_LIBCONTAINER_STATEDIR", &rootfd},
	} {

		s := os.Getenv(pair.k)

		i, err := strconv.Atoi(s)
		if err != nil {
			return fmt.Errorf("unable to convert %s=%s to int", pair.k, s)
		}
		*pair.v = i
	}
	var (
		pipe = os.NewFile(uintptr(pipefd), "pipe")
		it   = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
	)
	// clear the current process's environment to clean any libcontainer
	// specific env vars.
	os.Clearenv()

	var i initer
	defer func() {
		// We have an error during the initialization of the container's init,
		// send it back to the parent process in the form of an initError.
		// If container's init successed, syscall.Exec will not return, hence
		// this defer function will never be called.
		if _, ok := i.(*linuxStandardInit); ok {
			//  Synchronisation only necessary for standard init.
			if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
				fmt.Fprintln(os.Stderr, err)
				return
			}
		}
		if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
			fmt.Fprintln(os.Stderr, err)
			return
		}
		// ensure that this pipe is always closed
		pipe.Close()
	}()
	defer func() {
		if e := recover(); e != nil {
			err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
		}
	}()
	i, err = newContainerInit(it, pipe, rootfd)
	if err != nil {
		return err
	}
	return i.Init()
}
Example #2
0
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
	fdStr := os.Getenv("_LIBCONTAINER_INITPIPE")
	pipefd, err := strconv.Atoi(fdStr)
	if err != nil {
		return fmt.Errorf("error converting env var _LIBCONTAINER_INITPIPE(%q) to an int: %s", fdStr, err)
	}
	var (
		pipe = os.NewFile(uintptr(pipefd), "pipe")
		it   = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
	)
	// clear the current process's environment to clean any libcontainer
	// specific env vars.
	os.Clearenv()
	defer func() {
		// if we have an error during the initialization of the container's init then send it back to the
		// parent process in the form of an initError.
		if err != nil {
			if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil {
				panic(err)
			}
		}
		// ensure that this pipe is always closed
		pipe.Close()
	}()
	i, err := newContainerInit(it, pipe)
	if err != nil {
		return err
	}
	return i.Init()
}
Example #3
0
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
	// start the signal handler as soon as we can
	s := make(chan os.Signal, 1)
	signal.Notify(s, InitContinueSignal)
	fdStr := os.Getenv("_LIBCONTAINER_INITPIPE")
	pipefd, err := strconv.Atoi(fdStr)
	if err != nil {
		return fmt.Errorf("error converting env var _LIBCONTAINER_INITPIPE(%q) to an int: %s", fdStr, err)
	}
	var (
		pipe = os.NewFile(uintptr(pipefd), "pipe")
		it   = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
	)
	// clear the current process's environment to clean any libcontainer
	// specific env vars.
	os.Clearenv()
	var i initer
	defer func() {
		// We have an error during the initialization of the container's init,
		// send it back to the parent process in the form of an initError.
		// If container's init successed, syscall.Exec will not return, hence
		// this defer function will never be called.
		if _, ok := i.(*linuxStandardInit); ok {
			//  Synchronisation only necessary for standard init.
			if err := utils.WriteJSON(pipe, syncT{procError}); err != nil {
				panic(err)
			}
		}
		if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil {
			panic(err)
		}
		// ensure that this pipe is always closed
		pipe.Close()
	}()

	defer func() {
		if e := recover(); e != nil {
			err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
		}
	}()

	i, err = newContainerInit(it, pipe)
	if err != nil {
		return err
	}
	return i.Init(s)
}
Example #4
0
func (p *initProcess) sendConfig() error {
	// send the state to the container's init process then shutdown writes for the parent
	if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
		return err
	}
	// shutdown writes for the parent side of the pipe
	return syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR)
}
Example #5
0
func (l *LinuxFactory) sendError(i initer, pipe *os.File, err error) {
	// We have an error during the initialization of the container's init,
	// send it back to the parent process in the form of an initError.
	// If container's init successed, syscall.Exec will not return, hence
	// this defer function will never be called.
	if i != nil {
		if _, ok := i.(*linuxStandardInit); ok {
			//  Synchronisation only necessary for standard init.
			if err := utils.WriteJSON(pipe, syncT{procError}); err != nil {
				panic(err)
			}
		}
	}
	if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil {
		panic(err)
	}
}
Example #6
0
func (c *linuxContainer) saveState(s *State) error {
	f, err := os.Create(filepath.Join(c.root, stateFilename))
	if err != nil {
		return err
	}
	defer f.Close()
	return utils.WriteJSON(f, s)
}
Example #7
0
func marshal(path string, v interface{}) error {
	f, err := os.Create(path)
	if err != nil {
		return err
	}
	defer f.Close()
	return utils.WriteJSON(f, v)
}
Example #8
0
func (p *setnsProcess) start() (err error) {
	defer p.parentPipe.Close()
	err = p.cmd.Start()
	p.childPipe.Close()
	p.rootDir.Close()
	if err != nil {
		return newSystemErrorWithCause(err, "starting setns process")
	}
	if p.bootstrapData != nil {
		if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
			return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
		}
	}
	if err = p.execSetns(); err != nil {
		return newSystemErrorWithCause(err, "executing setns process")
	}
	if len(p.cgroupPaths) > 0 {
		if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
			return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
		}
	}
	// set oom_score_adj
	if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
		return newSystemErrorWithCause(err, "setting oom score")
	}
	// set rlimits, this has to be done here because we lose permissions
	// to raise the limits once we enter a user-namespace
	if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
		return newSystemErrorWithCause(err, "setting rlimits for process")
	}
	if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
		return newSystemErrorWithCause(err, "writing config to pipe")
	}

	if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
		return newSystemErrorWithCause(err, "calling shutdown on init pipe")
	}
	// wait for the child process to fully complete and receive an error message
	// if one was encoutered
	var ierr *genericError
	if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
		return newSystemErrorWithCause(err, "decoding init error from pipe")
	}
	// Must be done after Shutdown so the child will exit and we can wait for it.
	if ierr != nil {
		p.wait()
		return ierr
	}
	return nil
}
Example #9
0
func (c *linuxContainer) updateState(process parentProcess) error {
	c.initProcess = process
	state, err := c.currentState()
	if err != nil {
		return err
	}
	f, err := os.Create(filepath.Join(c.root, stateFilename))
	if err != nil {
		return err
	}
	defer f.Close()
	os.Remove(filepath.Join(c.root, "checkpoint"))
	return utils.WriteJSON(f, state)
}
Example #10
0
// syncParentHooks sends to the given pipe a JSON payload which indicates that
// the parent should execute pre-start hooks. It then waits for the parent to
// indicate that it is cleared to resume.
func syncParentHooks(pipe io.ReadWriter) error {
	// Tell parent.
	if err := utils.WriteJSON(pipe, syncT{procHooks}); err != nil {
		return err
	}
	// Wait for parent to give the all-clear.
	var procSync syncT
	if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
		if err == io.EOF {
			return fmt.Errorf("parent closed synchronisation channel")
		}
		if procSync.Type != procResume {
			return fmt.Errorf("invalid synchronisation flag from parent")
		}
	}
	return nil
}
Example #11
0
func (p *setnsProcess) start() (err error) {
	defer p.parentPipe.Close()
	err = p.cmd.Start()
	p.childPipe.Close()
	if err != nil {
		return newSystemError(err)
	}
	if p.bootstrapData != nil {
		if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
			return newSystemError(err)
		}
	}
	if err = p.execSetns(); err != nil {
		return newSystemError(err)
	}
	if len(p.cgroupPaths) > 0 {
		if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
			return newSystemError(err)
		}
	}
	if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
		return newSystemError(err)
	}
	// set oom_score_adj
	if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
		return newSystemError(err)
	}

	if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
		return newSystemError(err)
	}
	// wait for the child process to fully complete and receive an error message
	// if one was encoutered
	var ierr *genericError
	if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
		return newSystemError(err)
	}
	// Must be done after Shutdown so the child will exit and we can wait for it.
	if ierr != nil {
		p.wait()
		return newSystemError(ierr)
	}
	return nil
}
Example #12
0
func (p *setnsProcess) start() (err error) {
	defer p.parentPipe.Close()
	err = p.cmd.Start()
	p.childPipe.Close()
	p.rootDir.Close()
	if err != nil {
		return newSystemErrorWithCause(err, "starting setns process")
	}
	if p.bootstrapData != nil {
		if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
			return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
		}
	}
	if err = p.execSetns(); err != nil {
		return newSystemErrorWithCause(err, "executing setns process")
	}
	if len(p.cgroupPaths) > 0 {
		if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
			return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
		}
	}
	// set oom_score_adj
	if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
		return newSystemErrorWithCause(err, "setting oom score")
	}
	// set rlimits, this has to be done here because we lose permissions
	// to raise the limits once we enter a user-namespace
	if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
		return newSystemErrorWithCause(err, "setting rlimits for process")
	}
	if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
		return newSystemErrorWithCause(err, "writing config to pipe")
	}

	ierr := parseSync(p.parentPipe, func(sync *syncT) error {
		switch sync.Type {
		case procConsole:
			if err := writeSync(p.parentPipe, procConsoleReq); err != nil {
				return newSystemErrorWithCause(err, "writing syncT 'request fd'")
			}

			masterFile, err := utils.RecvFd(p.parentPipe)
			if err != nil {
				return newSystemErrorWithCause(err, "getting master pty from child pipe")
			}

			if p.process.consoleChan == nil {
				// TODO: Don't panic here, do something more sane.
				panic("consoleChan is nil")
			}
			p.process.consoleChan <- masterFile

			if err := writeSync(p.parentPipe, procConsoleAck); err != nil {
				return newSystemErrorWithCause(err, "writing syncT 'ack fd'")
			}
		case procReady:
			// This shouldn't happen.
			panic("unexpected procReady in setns")
		case procHooks:
			// This shouldn't happen.
			panic("unexpected procHooks in setns")
		default:
			return newSystemError(fmt.Errorf("invalid JSON payload from child"))
		}
		return nil
	})

	if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
		return newSystemErrorWithCause(err, "calling shutdown on init pipe")
	}
	// Must be done after Shutdown so the child will exit and we can wait for it.
	if ierr != nil {
		p.wait()
		return ierr
	}
	return nil
}
Example #13
0
func (p *initProcess) sendConfig() error {
	// send the config to the container's init process, we don't use JSON Encode
	// here because there might be a problem in JSON decoder in some cases, see:
	// https://github.com/docker/docker/issues/14203#issuecomment-174177790
	return utils.WriteJSON(p.parentPipe, p.config)
}
Example #14
0
func (p *initProcess) start() (err error) {
	defer p.parentPipe.Close()
	err = p.cmd.Start()
	p.process.ops = p
	p.childPipe.Close()
	if err != nil {
		p.process.ops = nil
		return newSystemError(err)
	}
	// Save the standard descriptor names before the container process
	// can potentially move them (e.g., via dup2()).  If we don't do this now,
	// we won't know at checkpoint time which file descriptor to look up.
	fds, err := getPipeFds(p.pid())
	if err != nil {
		return newSystemError(err)
	}
	p.setExternalDescriptors(fds)
	// Do this before syncing with child so that no children
	// can escape the cgroup
	if err := p.manager.Apply(p.pid()); err != nil {
		return newSystemError(err)
	}
	defer func() {
		if err != nil {
			// TODO: should not be the responsibility to call here
			p.manager.Destroy()
		}
	}()
	if p.config.Config.Hooks != nil {
		s := configs.HookState{
			Version: p.container.config.Version,
			ID:      p.container.id,
			Pid:     p.pid(),
			Root:    p.config.Config.Rootfs,
		}
		for _, hook := range p.config.Config.Hooks.Prestart {
			if err := hook.Run(s); err != nil {
				return newSystemError(err)
			}
		}
	}
	if err := p.createNetworkInterfaces(); err != nil {
		return newSystemError(err)
	}
	if err := p.sendConfig(); err != nil {
		return newSystemError(err)
	}
	var (
		procSync syncT
		sentRun  bool
		ierr     *genericError
	)

loop:
	for {
		if err := json.NewDecoder(p.parentPipe).Decode(&procSync); err != nil {
			if err == io.EOF {
				break loop
			}
			return newSystemError(err)
		}
		switch procSync.Type {
		case procStart:
			break loop
		case procReady:
			if err := p.manager.Set(p.config.Config); err != nil {
				return newSystemError(err)
			}
			// Sync with child.
			if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil {
				return newSystemError(err)
			}
			sentRun = true
		case procError:
			// wait for the child process to fully complete and receive an error message
			// if one was encoutered
			if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
				return newSystemError(err)
			}
			if ierr != nil {
				break loop
			}
			// Programmer error.
			panic("No error following JSON procError payload.")
		default:
			return newSystemError(fmt.Errorf("invalid JSON synchronisation payload from child"))
		}
	}
	if !sentRun {
		return newSystemError(fmt.Errorf("could not synchronise with container process"))
	}
	if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
		return newSystemError(err)
	}
	// Must be done after Shutdown so the child will exit and we can wait for it.
	if ierr != nil {
		p.wait()
		return newSystemError(ierr)
	}
	return nil
}
Example #15
0
func (p *initProcess) start() error {
	defer p.parentPipe.Close()
	err := p.cmd.Start()
	p.process.ops = p
	p.childPipe.Close()
	p.rootDir.Close()
	if err != nil {
		p.process.ops = nil
		return newSystemErrorWithCause(err, "starting init process command")
	}
	if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
		return err
	}
	if err := p.execSetns(); err != nil {
		return newSystemErrorWithCause(err, "running exec setns process for init")
	}
	// Save the standard descriptor names before the container process
	// can potentially move them (e.g., via dup2()).  If we don't do this now,
	// we won't know at checkpoint time which file descriptor to look up.
	fds, err := getPipeFds(p.pid())
	if err != nil {
		return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
	}
	p.setExternalDescriptors(fds)
	// Do this before syncing with child so that no children
	// can escape the cgroup
	if err := p.manager.Apply(p.pid()); err != nil {
		return newSystemErrorWithCause(err, "applying cgroup configuration for process")
	}
	defer func() {
		if err != nil {
			// TODO: should not be the responsibility to call here
			p.manager.Destroy()
		}
	}()
	if err := p.createNetworkInterfaces(); err != nil {
		return newSystemErrorWithCause(err, "creating nework interfaces")
	}
	if err := p.sendConfig(); err != nil {
		return newSystemErrorWithCause(err, "sending config to init process")
	}
	var (
		procSync   syncT
		sentRun    bool
		sentResume bool
		ierr       *genericError
	)

	dec := json.NewDecoder(p.parentPipe)
loop:
	for {
		if err := dec.Decode(&procSync); err != nil {
			if err == io.EOF {
				break loop
			}
			return newSystemErrorWithCause(err, "decoding sync type from init pipe")
		}
		switch procSync.Type {
		case procReady:
			if err := p.manager.Set(p.config.Config); err != nil {
				return newSystemErrorWithCause(err, "setting cgroup config for ready process")
			}
			// set oom_score_adj
			if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
				return newSystemErrorWithCause(err, "setting oom score for ready process")
			}
			// set rlimits, this has to be done here because we lose permissions
			// to raise the limits once we enter a user-namespace
			if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
				return newSystemErrorWithCause(err, "setting rlimits for ready process")
			}
			// call prestart hooks
			if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
				if p.config.Config.Hooks != nil {
					s := configs.HookState{
						Version: p.container.config.Version,
						ID:      p.container.id,
						Pid:     p.pid(),
						Root:    p.config.Config.Rootfs,
					}
					for i, hook := range p.config.Config.Hooks.Prestart {
						if err := hook.Run(s); err != nil {
							return newSystemErrorWithCausef(err, "running prestart hook %d", i)
						}
					}
				}
			}
			// Sync with child.
			if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil {
				return newSystemErrorWithCause(err, "reading syncT run type")
			}
			sentRun = true
		case procHooks:
			if p.config.Config.Hooks != nil {
				s := configs.HookState{
					Version:    p.container.config.Version,
					ID:         p.container.id,
					Pid:        p.pid(),
					Root:       p.config.Config.Rootfs,
					BundlePath: utils.SearchLabels(p.config.Config.Labels, "bundle"),
				}
				for i, hook := range p.config.Config.Hooks.Prestart {
					if err := hook.Run(s); err != nil {
						return newSystemErrorWithCausef(err, "running prestart hook %d", i)
					}
				}
			}
			// Sync with child.
			if err := utils.WriteJSON(p.parentPipe, syncT{procResume}); err != nil {
				return newSystemErrorWithCause(err, "reading syncT resume type")
			}
			sentResume = true
		case procError:
			// wait for the child process to fully complete and receive an error message
			// if one was encoutered
			if err := dec.Decode(&ierr); err != nil && err != io.EOF {
				return newSystemErrorWithCause(err, "decoding proc error from init")
			}
			if ierr != nil {
				break loop
			}
			// Programmer error.
			panic("No error following JSON procError payload.")
		default:
			return newSystemError(fmt.Errorf("invalid JSON payload from child"))
		}
	}
	if !sentRun {
		return newSystemErrorWithCause(ierr, "container init")
	}
	if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
		return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process"))
	}
	if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
		return newSystemErrorWithCause(err, "shutting down init pipe")
	}
	// Must be done after Shutdown so the child will exit and we can wait for it.
	if ierr != nil {
		p.wait()
		return ierr
	}
	return nil
}
Example #16
0
// writeSync is used to write to a synchronisation pipe. An error is returned
// if there was a problem writing the payload.
func writeSync(pipe io.Writer, sync syncType) error {
	if err := utils.WriteJSON(pipe, syncT{sync}); err != nil {
		return err
	}
	return nil
}
Example #17
0
func (p *initProcess) sendConfig() error {
	// send the state to the container's init process then shutdown writes for the parent
	return utils.WriteJSON(p.parentPipe, p.config)
}