// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work. // Move this to libcontainer package. // Exec performs setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. func Exec(container *libcontainer.Config, term Terminal, rootfs, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) { var ( master *os.File console string err error ) // create a pipe so that we can syncronize with the namespaced process and // pass the veth name to the child syncPipe, err := syncpipe.NewSyncPipe() if err != nil { return -1, err } defer syncPipe.Close() if container.Tty { master, console, err = system.CreateMasterAndConsole() if err != nil { return -1, err } term.SetMaster(master) } command := createCommand(container, console, rootfs, dataPath, os.Args[0], syncPipe.Child(), args) if err := term.Attach(command); err != nil { return -1, err } defer term.Close() if err := command.Start(); err != nil { return -1, err } // Now we passed the pipe to the child, close our side syncPipe.CloseChild() started, err := system.GetProcessStartTime(command.Process.Pid) if err != nil { return -1, err } // Do this before syncing with child so that no children // can escape the cgroup cleaner, err := SetupCgroups(container, command.Process.Pid) if err != nil { command.Process.Kill() command.Wait() return -1, err } if cleaner != nil { defer cleaner.Cleanup() } var networkState network.NetworkState if err := InitializeNetworking(container, command.Process.Pid, syncPipe, &networkState); err != nil { command.Process.Kill() command.Wait() return -1, err } state := &libcontainer.State{ InitPid: command.Process.Pid, InitStartTime: started, NetworkState: networkState, } if err := libcontainer.SaveState(dataPath, state); err != nil { command.Process.Kill() command.Wait() return -1, err } defer libcontainer.DeleteState(dataPath) // Sync with child if err := syncPipe.ReadFromChild(); err != nil { command.Process.Kill() command.Wait() return -1, err } if startCallback != nil { startCallback() } if err := command.Wait(); err != nil { if _, ok := err.(*exec.ExitError); !ok { return -1, err } } return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil }
func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (execdriver.ExitStatus, error) { var ( term execdriver.Terminal err error dataPath = d.containerDir(c.ID) ) if c.ProcessConfig.Tty { term, err = NewTtyConsole(&c.ProcessConfig, pipes) } else { term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes) } c.ProcessConfig.Terminal = term container, err := d.createContainer(c) if err != nil { return execdriver.ExitStatus{ExitCode: -1}, err } d.Lock() d.activeContainers[c.ID] = &activeContainer{ container: container, cmd: &c.ProcessConfig.Cmd, } d.Unlock() c.Mounts = append(c.Mounts, execdriver.Mount{ Source: d.initPath, Destination: c.InitPath, Writable: false, Private: true, }) if err := d.generateEnvConfig(c); err != nil { return execdriver.ExitStatus{ExitCode: -1}, err } configPath, err := d.generateLXCConfig(c) if err != nil { return execdriver.ExitStatus{ExitCode: -1}, err } params := []string{ "lxc-start", "-n", c.ID, "-f", configPath, } if c.Network.ContainerID != "" { params = append(params, "--share-net", c.Network.ContainerID, ) } if c.Ipc != nil { if c.Ipc.ContainerID != "" { params = append(params, "--share-ipc", c.Ipc.ContainerID, ) } else if c.Ipc.HostIpc { params = append(params, "--share-ipc", "1", ) } } params = append(params, "--", c.InitPath, ) if c.Network.Interface != nil { params = append(params, "-g", c.Network.Interface.Gateway, "-i", fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen), ) } params = append(params, "-mtu", strconv.Itoa(c.Network.Mtu), ) if c.ProcessConfig.User != "" { params = append(params, "-u", c.ProcessConfig.User) } if c.ProcessConfig.Privileged { if d.apparmor { params[0] = path.Join(d.root, "lxc-start-unconfined") } params = append(params, "-privileged") } if c.WorkingDir != "" { params = append(params, "-w", c.WorkingDir) } params = append(params, "--", c.ProcessConfig.Entrypoint) params = append(params, c.ProcessConfig.Arguments...) if d.sharedRoot { // lxc-start really needs / to be non-shared, or all kinds of stuff break // when lxc-start unmount things and those unmounts propagate to the main // mount namespace. // What we really want is to clone into a new namespace and then // mount / MS_REC|MS_SLAVE, but since we can't really clone or fork // without exec in go we have to do this horrible shell hack... shellString := "mount --make-rslave /; exec " + utils.ShellQuoteArguments(params) params = []string{ "unshare", "-m", "--", "/bin/sh", "-c", shellString, } } log.Debugf("lxc params %s", params) var ( name = params[0] arg = params[1:] ) aname, err := exec.LookPath(name) if err != nil { aname = name } c.ProcessConfig.Path = aname c.ProcessConfig.Args = append([]string{name}, arg...) if err := nodes.CreateDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil { return execdriver.ExitStatus{ExitCode: -1}, err } if err := c.ProcessConfig.Start(); err != nil { return execdriver.ExitStatus{ExitCode: -1}, err } var ( waitErr error waitLock = make(chan struct{}) ) go func() { if err := c.ProcessConfig.Wait(); err != nil { if _, ok := err.(*exec.ExitError); !ok { // Do not propagate the error if it's simply a status code != 0 waitErr = err } } close(waitLock) }() terminate := func(terr error) (execdriver.ExitStatus, error) { if c.ProcessConfig.Process != nil { c.ProcessConfig.Process.Kill() c.ProcessConfig.Wait() } return execdriver.ExitStatus{ExitCode: -1}, terr } // Poll lxc for RUNNING status pid, err := d.waitForStart(c, waitLock) if err != nil { return terminate(err) } cgroupPaths, err := cgroupPaths(c.ID) if err != nil { return terminate(err) } state := &libcontainer.State{ InitPid: pid, CgroupPaths: cgroupPaths, } if err := libcontainer.SaveState(dataPath, state); err != nil { return terminate(err) } c.ContainerPid = pid if startCallback != nil { log.Debugf("Invoking startCallback") startCallback(&c.ProcessConfig, pid) } oomKill := false oomKillNotification, err := libcontainer.NotifyOnOOM(state) if err == nil { _, oomKill = <-oomKillNotification log.Debugf("oomKill error %s waitErr %s", oomKill, waitErr) } else { log.Warnf("WARNING: Your kernel does not support OOM notifications: %s", err) } <-waitLock // check oom error exitCode := getExitCode(c) if oomKill { exitCode = 137 } return execdriver.ExitStatus{ExitCode: exitCode, OOMKilled: oomKill}, waitErr }
// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work. // Move this to libcontainer package. // Exec performs setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Writer, console string, rootfs, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) { var ( err error ) // create a pipe so that we can syncronize with the namespaced process and // pass the veth name to the child syncPipe, err := syncpipe.NewSyncPipe() if err != nil { return -1, err } defer syncPipe.Close() command := createCommand(container, console, rootfs, dataPath, os.Args[0], syncPipe.Child(), args) // Note: these are only used in non-tty mode // if there is a tty for the container it will be opened within the namespace and the // fds will be duped to stdin, stdiout, and stderr command.Stdin = stdin command.Stdout = stdout command.Stderr = stderr if err := command.Start(); err != nil { return -1, err } // Now we passed the pipe to the child, close our side syncPipe.CloseChild() started, err := system.GetProcessStartTime(command.Process.Pid) if err != nil { return -1, err } // Do this before syncing with child so that no children // can escape the cgroup cgroupRef, err := SetupCgroups(container, command.Process.Pid) if err != nil { command.Process.Kill() command.Wait() return -1, err } defer cgroupRef.Cleanup() cgroupPaths, err := cgroupRef.Paths() if err != nil { command.Process.Kill() command.Wait() return -1, err } var networkState network.NetworkState if err := InitializeNetworking(container, command.Process.Pid, syncPipe, &networkState); err != nil { command.Process.Kill() command.Wait() return -1, err } state := &libcontainer.State{ InitPid: command.Process.Pid, InitStartTime: started, NetworkState: networkState, CgroupPaths: cgroupPaths, } if err := libcontainer.SaveState(dataPath, state); err != nil { command.Process.Kill() command.Wait() return -1, err } defer libcontainer.DeleteState(dataPath) // Sync with child if err := syncPipe.ReadFromChild(); err != nil { command.Process.Kill() command.Wait() return -1, err } if startCallback != nil { startCallback() } if err := command.Wait(); err != nil { if _, ok := err.(*exec.ExitError); !ok { return -1, err } } return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil }
// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work. // Move this to libcontainer package. // Exec performs setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Writer, console, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) { var err error // create a pipe so that we can syncronize with the namespaced process and // pass the state and configuration to the child process parent, child, err := newInitPipe() if err != nil { return -1, err } defer parent.Close() command := createCommand(container, console, dataPath, os.Args[0], child, args) // Note: these are only used in non-tty mode // if there is a tty for the container it will be opened within the namespace and the // fds will be duped to stdin, stdiout, and stderr command.Stdin = stdin command.Stdout = stdout command.Stderr = stderr if err := command.Start(); err != nil { child.Close() return -1, err } child.Close() wait := func() (*os.ProcessState, error) { ps, err := command.Process.Wait() // we should kill all processes in cgroup when init is died if we use // host PID namespace if !container.Namespaces.Contains(libcontainer.NEWPID) { killAllPids(container) } return ps, err } terminate := func(terr error) (int, error) { // TODO: log the errors for kill and wait command.Process.Kill() wait() return -1, terr } started, err := system.GetProcessStartTime(command.Process.Pid) if err != nil { return terminate(err) } // Do this before syncing with child so that no children // can escape the cgroup cgroupPaths, err := SetupCgroups(container, command.Process.Pid) if err != nil { return terminate(err) } defer cgroups.RemovePaths(cgroupPaths) var networkState network.NetworkState if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil { return terminate(err) } // send the state to the container's init process then shutdown writes for the parent if err := json.NewEncoder(parent).Encode(networkState); err != nil { return terminate(err) } // shutdown writes for the parent side of the pipe if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil { return terminate(err) } state := &libcontainer.State{ InitPid: command.Process.Pid, InitStartTime: started, NetworkState: networkState, CgroupPaths: cgroupPaths, } if err := libcontainer.SaveState(dataPath, state); err != nil { return terminate(err) } defer libcontainer.DeleteState(dataPath) // wait for the child process to fully complete and receive an error message // if one was encoutered var ierr *initError if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF { return terminate(err) } if ierr != nil { return terminate(ierr) } if startCallback != nil { startCallback() } ps, err := wait() if err != nil { if _, ok := err.(*exec.ExitError); !ok { return -1, err } } // waiting for pipe flushing command.Wait() waitStatus := ps.Sys().(syscall.WaitStatus) if waitStatus.Signaled() { return EXIT_SIGNAL_OFFSET + int(waitStatus.Signal()), nil } return waitStatus.ExitStatus(), nil }