// Init is the init process that first runs inside a new namespace to setup mounts, users, networking, // and other options required for the new container. func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consolePath string, syncPipe *SyncPipe, args []string) error { rootfs, err := utils.ResolveRootfs(uncleanRootfs) if err != nil { return err } // We always read this as it is a way to sync with the parent as well ns.logger.Printf("reading from sync pipe fd %d\n", syncPipe.child.Fd()) context, err := syncPipe.ReadFromParent() if err != nil { syncPipe.Close() return err } ns.logger.Println("received context from parent") syncPipe.Close() if consolePath != "" { ns.logger.Printf("setting up %s as console\n", consolePath) if err := console.OpenAndDup(consolePath); err != nil { return err } } if _, err := system.Setsid(); err != nil { return fmt.Errorf("setsid %s", err) } if consolePath != "" { if err := system.Setctty(); err != nil { return fmt.Errorf("setctty %s", err) } } if err := setupNetwork(container, context); err != nil { return fmt.Errorf("setup networking %s", err) } label.Init() ns.logger.Println("setup mount namespace") if err := mount.InitializeMountNamespace(rootfs, consolePath, container); err != nil { return fmt.Errorf("setup mount namespace %s", err) } if err := system.Sethostname(container.Hostname); err != nil { return fmt.Errorf("sethostname %s", err) } if err := finalizeNamespace(container); err != nil { return fmt.Errorf("finalize namespace %s", err) } if profile := container.Context["apparmor_profile"]; profile != "" { ns.logger.Printf("setting apparmor profile %s\n", profile) if err := apparmor.ApplyProfile(os.Getpid(), profile); err != nil { return err } } runtime.LockOSThread() if err := label.SetProcessLabel(container.Context["process_label"]); err != nil { return fmt.Errorf("SetProcessLabel label %s", err) } ns.logger.Printf("execing %s\n", args[0]) return system.Execv(args[0], args[0:], container.Env) }
// Init is the init process that first runs inside a new namespace to setup mounts, users, networking, // and other options required for the new container. func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error { rootfs, err := utils.ResolveRootfs(uncleanRootfs) if err != nil { return err } // We always read this as it is a way to sync with the parent as well context, err := syncPipe.ReadFromParent() if err != nil { syncPipe.Close() return err } syncPipe.Close() if console != "" { slave, err := system.OpenTerminal(console, syscall.O_RDWR) if err != nil { return fmt.Errorf("open terminal %s", err) } if err := dupSlave(slave); err != nil { return fmt.Errorf("dup2 slave %s", err) } } if _, err := system.Setsid(); err != nil { return fmt.Errorf("setsid %s", err) } if console != "" { if err := system.Setctty(); err != nil { return fmt.Errorf("setctty %s", err) } } // this is our best effort to let the process know that the parent has died and that it // should it should act on it how it sees fit if err := system.ParentDeathSignal(uintptr(syscall.SIGTERM)); err != nil { return fmt.Errorf("parent death signal %s", err) } if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot); err != nil { return fmt.Errorf("setup mount namespace %s", err) } if err := setupNetwork(container, context); err != nil { return fmt.Errorf("setup networking %s", err) } if err := system.Sethostname(container.Hostname); err != nil { return fmt.Errorf("sethostname %s", err) } if err := finalizeNamespace(container); err != nil { return fmt.Errorf("finalize namespace %s", err) } if err := apparmor.ApplyProfile(os.Getpid(), container.Context["apparmor_profile"]); err != nil { return err } return system.Execv(args[0], args[0:], container.Env) }
// Init is the init process that first runs inside a new namespace to setup mounts, users, networking, // and other options required for the new container. func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error { rootfs, err := utils.ResolveRootfs(uncleanRootfs) if err != nil { return err } // We always read this as it is a way to sync with the parent as well context, err := syncPipe.ReadFromParent() if err != nil { syncPipe.Close() return err } syncPipe.Close() if console != "" { // close pipes so that we can replace it with the pty closeStdPipes() slave, err := system.OpenTerminal(console, syscall.O_RDWR) if err != nil { return fmt.Errorf("open terminal %s", err) } if err := dupSlave(slave); err != nil { return fmt.Errorf("dup2 slave %s", err) } } if _, err := system.Setsid(); err != nil { return fmt.Errorf("setsid %s", err) } if console != "" { if err := system.Setctty(); err != nil { return fmt.Errorf("setctty %s", err) } } /* if err := system.ParentDeathSignal(); err != nil { return fmt.Errorf("parent death signal %s", err) } */ if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs); err != nil { return fmt.Errorf("setup mount namespace %s", err) } if err := setupNetwork(container, context); err != nil { return fmt.Errorf("setup networking %s", err) } if err := system.Sethostname(container.Hostname); err != nil { return fmt.Errorf("sethostname %s", err) } if err := finalizeNamespace(container); err != nil { return fmt.Errorf("finalize namespace %s", err) } return system.Execv(args[0], args[0:], container.Env) }
// ExecIn uses an existing pid and joins the pid's namespaces with the new command. func ExecIn(container *libcontainer.Config, state *libcontainer.State, args []string) error { // TODO(vmarmol): If this gets too long, send it over a pipe to the child. // Marshall the container into JSON since it won't be available in the namespace. containerJson, err := json.Marshal(container) if err != nil { return err } // Enter the namespace and then finish setup finalArgs := []string{os.Args[0], "nsenter", "--nspid", strconv.Itoa(state.InitPid), "--containerjson", string(containerJson), "--"} finalArgs = append(finalArgs, args...) if err := system.Execv(finalArgs[0], finalArgs[0:], os.Environ()); err != nil { return err } panic("unreachable") }
// NsEnter is run after entering the namespace. func NsEnter(container *libcontainer.Container, processLabel string, nspid int, args []string) error { // clear the current processes env and replace it with the environment // defined on the container if err := LoadContainerEnvironment(container); err != nil { return err } if err := FinalizeNamespace(container); err != nil { return err } if err := label.SetProcessLabel(processLabel); err != nil { return err } if err := system.Execv(args[0], args[0:], container.Env); err != nil { return err } panic("unreachable") }
// ExecIn uses an existing pid and joins the pid's namespaces with the new command. func ExecIn(container *libcontainer.Container, nspid int, args []string) error { // TODO(vmarmol): If this gets too long, send it over a pipe to the child. // Marshall the container into JSON since it won't be available in the namespace. containerJson, err := json.Marshal(container) if err != nil { return err } // TODO(vmarmol): Move this to the container JSON. processLabel, err := label.GetPidCon(nspid) if err != nil { return err } // Enter the namespace and then finish setup finalArgs := []string{os.Args[0], "nsenter", strconv.Itoa(nspid), processLabel, string(containerJson)} finalArgs = append(finalArgs, args...) if err := system.Execv(finalArgs[0], finalArgs[0:], os.Environ()); err != nil { return err } panic("unreachable") }
// Init is the init process that first runs inside a new namespace to setup mounts, users, networking, // and other options required for the new container. func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, console string, syncPipe *SyncPipe, args []string) error { rootfs, err := utils.ResolveRootfs(uncleanRootfs) if err != nil { return err } // We always read this as it is a way to sync with the parent as well ns.logger.Printf("reading from sync pipe fd %d\n", syncPipe.child.Fd()) context, err := syncPipe.ReadFromParent() if err != nil { syncPipe.Close() return err } ns.logger.Println("received context from parent") syncPipe.Close() if console != "" { ns.logger.Printf("setting up %s as console\n", console) slave, err := system.OpenTerminal(console, syscall.O_RDWR) if err != nil { return fmt.Errorf("open terminal %s", err) } if err := dupSlave(slave); err != nil { return fmt.Errorf("dup2 slave %s", err) } } if _, err := system.Setsid(); err != nil { return fmt.Errorf("setsid %s", err) } if console != "" { if err := system.Setctty(); err != nil { return fmt.Errorf("setctty %s", err) } } // this is our best effort to let the process know that the parent has died and that it // should it should act on it how it sees fit if err := system.ParentDeathSignal(uintptr(syscall.SIGTERM)); err != nil { return fmt.Errorf("parent death signal %s", err) } ns.logger.Println("setup mount namespace") if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot, container.Context["mount_label"]); err != nil { return fmt.Errorf("setup mount namespace %s", err) } if err := setupNetwork(container, context); err != nil { return fmt.Errorf("setup networking %s", err) } if err := system.Sethostname(container.Hostname); err != nil { return fmt.Errorf("sethostname %s", err) } if err := finalizeNamespace(container); err != nil { return fmt.Errorf("finalize namespace %s", err) } if profile := container.Context["apparmor_profile"]; profile != "" { ns.logger.Printf("setting apparmor profile %s\n", profile) if err := apparmor.ApplyProfile(os.Getpid(), profile); err != nil { return err } } runtime.LockOSThread() if err := label.SetProcessLabel(container.Context["process_label"]); err != nil { return fmt.Errorf("SetProcessLabel label %s", err) } ns.logger.Printf("execing %s\n", args[0]) return system.Execv(args[0], args[0:], container.Env) }
// Init is the init process that first runs inside a new namespace to setup mounts, users, networking, // and other options required for the new container. func Init(container *libcontainer.Container, uncleanRootfs, consolePath string, syncPipe *SyncPipe, args []string) error { rootfs, err := utils.ResolveRootfs(uncleanRootfs) if err != nil { return err } // clear the current processes env and replace it with the environment // defined on the container if err := LoadContainerEnvironment(container); err != nil { return err } // We always read this as it is a way to sync with the parent as well context, err := syncPipe.ReadFromParent() if err != nil { syncPipe.Close() return err } syncPipe.Close() if consolePath != "" { if err := console.OpenAndDup(consolePath); err != nil { return err } } if _, err := system.Setsid(); err != nil { return fmt.Errorf("setsid %s", err) } if consolePath != "" { if err := system.Setctty(); err != nil { return fmt.Errorf("setctty %s", err) } } if err := setupNetwork(container, context); err != nil { return fmt.Errorf("setup networking %s", err) } label.Init() if err := mount.InitializeMountNamespace(rootfs, consolePath, container); err != nil { return fmt.Errorf("setup mount namespace %s", err) } if container.Hostname != "" { if err := system.Sethostname(container.Hostname); err != nil { return fmt.Errorf("sethostname %s", err) } } runtime.LockOSThread() if err := apparmor.ApplyProfile(container.Context["apparmor_profile"]); err != nil { return fmt.Errorf("set apparmor profile %s: %s", container.Context["apparmor_profile"], err) } if err := label.SetProcessLabel(container.Context["process_label"]); err != nil { return fmt.Errorf("set process label %s", err) } if container.Context["restrictions"] != "" { if err := restrict.Restrict("proc", "sys"); err != nil { return err } } if err := FinalizeNamespace(container); err != nil { return fmt.Errorf("finalize namespace %s", err) } return system.Execv(args[0], args[0:], container.Env) }
// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work. // Move this to libcontainer package. // Init is the init process that first runs inside a new namespace to setup mounts, users, networking, // and other options required for the new container. func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syncPipe *syncpipe.SyncPipe, args []string) (err error) { defer func() { if err != nil { syncPipe.ReportChildError(err) } }() rootfs, err := utils.ResolveRootfs(uncleanRootfs) if err != nil { return err } // clear the current processes env and replace it with the environment // defined on the container if err := LoadContainerEnvironment(container); err != nil { return err } // We always read this as it is a way to sync with the parent as well networkState, err := syncPipe.ReadFromParent() if err != nil { return err } if consolePath != "" { if err := console.OpenAndDup(consolePath); err != nil { return err } } if _, err := system.Setsid(); err != nil { return fmt.Errorf("setsid %s", err) } if consolePath != "" { if err := system.Setctty(); err != nil { return fmt.Errorf("setctty %s", err) } } if err := setupNetwork(container, networkState); err != nil { return fmt.Errorf("setup networking %s", err) } if err := setupRoute(container); err != nil { return fmt.Errorf("setup route %s", err) } label.Init() if err := mount.InitializeMountNamespace(rootfs, consolePath, (*mount.MountConfig)(container.MountConfig)); err != nil { return fmt.Errorf("setup mount namespace %s", err) } if container.Hostname != "" { if err := system.Sethostname(container.Hostname); err != nil { return fmt.Errorf("sethostname %s", err) } } runtime.LockOSThread() if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) } if err := label.SetProcessLabel(container.ProcessLabel); err != nil { return fmt.Errorf("set process label %s", err) } // TODO: (crosbymichael) make this configurable at the Config level if container.RestrictSys { if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus", "sys"); err != nil { return err } } pdeathSignal, err := system.GetParentDeathSignal() if err != nil { return fmt.Errorf("get parent death signal %s", err) } if err := FinalizeNamespace(container); err != nil { return fmt.Errorf("finalize namespace %s", err) } // FinalizeNamespace can change user/group which clears the parent death // signal, so we restore it here. if err := RestoreParentDeathSignal(pdeathSignal); err != nil { return fmt.Errorf("restore parent death signal %s", err) } return system.Execv(args[0], args[0:], container.Env) }
// ExecIn uses an existing pid and joins the pid's namespaces with the new command. func ExecIn(container *libcontainer.Container, nspid int, args []string) (int, error) { // clear the current processes env and replace it with the environment // defined on the container if err := LoadContainerEnvironment(container); err != nil { return -1, err } for key, enabled := range container.Namespaces { // skip the PID namespace on unshare because it it not supported if enabled && key != "NEWPID" { if ns := libcontainer.GetNamespace(key); ns != nil { if err := system.Unshare(ns.Value); err != nil { return -1, err } } } } fds, err := getNsFds(nspid, container) closeFds := func() { for _, f := range fds { system.Closefd(f) } } if err != nil { closeFds() return -1, err } processLabel, err := label.GetPidCon(nspid) if err != nil { closeFds() return -1, err } // foreach namespace fd, use setns to join an existing container's namespaces for _, fd := range fds { if fd > 0 { if err := system.Setns(fd, 0); err != nil { closeFds() return -1, fmt.Errorf("setns %s", err) } } system.Closefd(fd) } // if the container has a new pid and mount namespace we need to // remount proc and sys to pick up the changes if container.Namespaces["NEWNS"] && container.Namespaces["NEWPID"] { pid, err := system.Fork() if err != nil { return -1, err } if pid == 0 { // TODO: make all raw syscalls to be fork safe if err := system.Unshare(syscall.CLONE_NEWNS); err != nil { return -1, err } if err := mount.RemountProc(); err != nil { return -1, fmt.Errorf("remount proc %s", err) } if err := mount.RemountSys(); err != nil { return -1, fmt.Errorf("remount sys %s", err) } goto dropAndExec } proc, err := os.FindProcess(pid) if err != nil { return -1, err } state, err := proc.Wait() if err != nil { return -1, err } os.Exit(state.Sys().(syscall.WaitStatus).ExitStatus()) } dropAndExec: if err := FinalizeNamespace(container); err != nil { return -1, err } err = label.SetProcessLabel(processLabel) if err != nil { return -1, err } if err := system.Execv(args[0], args[0:], container.Env); err != nil { return -1, err } panic("unreachable") }
// ExecIn uses an existing pid and joins the pid's namespaces with the new command. func (ns *linuxNs) ExecIn(container *libcontainer.Container, nspid int, args []string) (int, error) { for _, ns := range container.Namespaces { if err := system.Unshare(ns.Value); err != nil { return -1, err } } fds, err := ns.getNsFds(nspid, container) closeFds := func() { for _, f := range fds { system.Closefd(f) } } if err != nil { closeFds() return -1, err } // foreach namespace fd, use setns to join an existing container's namespaces for _, fd := range fds { if fd > 0 { if err := system.Setns(fd, 0); err != nil { closeFds() return -1, fmt.Errorf("setns %s", err) } } system.Closefd(fd) } // if the container has a new pid and mount namespace we need to // remount proc and sys to pick up the changes if container.Namespaces.Contains("NEWNS") && container.Namespaces.Contains("NEWPID") { pid, err := system.Fork() if err != nil { return -1, err } if pid == 0 { // TODO: make all raw syscalls to be fork safe if err := system.Unshare(syscall.CLONE_NEWNS); err != nil { return -1, err } if err := remountProc(); err != nil { return -1, fmt.Errorf("remount proc %s", err) } if err := remountSys(); err != nil { return -1, fmt.Errorf("remount sys %s", err) } goto dropAndExec } proc, err := os.FindProcess(pid) if err != nil { return -1, err } state, err := proc.Wait() if err != nil { return -1, err } os.Exit(state.Sys().(syscall.WaitStatus).ExitStatus()) } dropAndExec: if err := finalizeNamespace(container); err != nil { return -1, err } if err := system.Execv(args[0], args[0:], container.Env); err != nil { return -1, err } panic("unreachable") }