// Finalize expects that the setns calls have been setup and that is has joined an // existing namespace func FinalizeSetns(container *libcontainer.Config, args []string) error { // clear the current processes env and replace it with the environment defined on the container if err := LoadContainerEnvironment(container); err != nil { return err } if err := FinalizeNamespace(container); err != nil { return err } if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) } if container.ProcessLabel != "" { if err := label.SetProcessLabel(container.ProcessLabel); err != nil { return err } } if err := system.Execv(args[0], args[0:], os.Environ()); err != nil { return err } panic("unreachable") }
// ExecIn uses an existing pid and joins the pid's namespaces with the new command. func ExecIn(container *libcontainer.Config, state *libcontainer.State, args []string) error { // Enter the namespace and then finish setup args, err := getNsEnterCommand(strconv.Itoa(state.InitPid), container, "", args) if err != nil { return err } finalArgs := append([]string{os.Args[0]}, args...) if err := system.Execv(finalArgs[0], finalArgs[0:], os.Environ()); err != nil { return err } panic("unreachable") }
// ExecIn uses an existing pid and joins the pid's namespaces with the new command. func ExecIn(container *libcontainer.Config, state *libcontainer.State, args []string) error { // TODO(vmarmol): If this gets too long, send it over a pipe to the child. // Marshall the container into JSON since it won't be available in the namespace. containerJson, err := json.Marshal(container) if err != nil { return err } // Enter the namespace and then finish setup finalArgs := []string{os.Args[0], "nsenter", "--nspid", strconv.Itoa(state.InitPid), "--containerjson", string(containerJson), "--"} finalArgs = append(finalArgs, args...) if err := system.Execv(finalArgs[0], finalArgs[0:], os.Environ()); err != nil { return err } panic("unreachable") }
func (l *linuxSetnsInit) Init() error { if err := setupRlimits(l.config.Config); err != nil { return err } if err := finalizeNamespace(l.config); err != nil { return err } if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil { return err } if l.config.Config.ProcessLabel != "" { if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { return err } } return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) }
// Run a command in a container after entering the namespace. func NsEnter(container *libcontainer.Config, args []string) error { // clear the current processes env and replace it with the environment // defined on the container if err := LoadContainerEnvironment(container); err != nil { return err } if err := FinalizeNamespace(container); err != nil { return err } if container.ProcessLabel != "" { if err := label.SetProcessLabel(container.ProcessLabel); err != nil { return err } } if err := system.Execv(args[0], args[0:], container.Env); err != nil { return err } panic("unreachable") }
func (l *linuxStandardInit) Init() error { // join any namespaces via a path to the namespace fd if provided if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil { return err } var console *linuxConsole if l.config.Console != "" { console = newConsoleFromPath(l.config.Console) if err := console.dupStdio(); err != nil { return err } } if _, err := syscall.Setsid(); err != nil { return err } if console != nil { if err := system.Setctty(); err != nil { return err } } if err := setupNetwork(l.config); err != nil { return err } if err := setupRoute(l.config.Config); err != nil { return err } if err := setupRlimits(l.config.Config); err != nil { return err } label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace if l.config.Config.Namespaces.Contains(configs.NEWNS) { if err := setupRootfs(l.config.Config, console); err != nil { return err } } if hostname := l.config.Config.Hostname; hostname != "" { if err := syscall.Sethostname([]byte(hostname)); err != nil { return err } } if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil { return err } if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { return err } for _, path := range l.config.Config.ReadonlyPaths { if err := remountReadonly(path); err != nil { return err } } for _, path := range l.config.Config.MaskPaths { if err := maskFile(path); err != nil { return err } } pdeath, err := system.GetParentDeathSignal() if err != nil { return err } if err := finalizeNamespace(l.config); err != nil { return err } // finalizeNamespace can change user/group which clears the parent death // signal, so we restore it here. if err := pdeath.Restore(); err != nil { return err } // Signal self if parent is already dead. Does nothing if running in a new // PID namespace, as Getppid will always return 0. if syscall.Getppid() == 1 { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) }
// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work. // Move this to libcontainer package. // Init is the init process that first runs inside a new namespace to setup mounts, users, networking, // and other options required for the new container. // The caller of Init function has to ensure that the go runtime is locked to an OS thread // (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended. func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syncPipe *syncpipe.SyncPipe, args []string) (err error) { defer func() { if err != nil { syncPipe.ReportChildError(err) } }() rootfs, err := utils.ResolveRootfs(uncleanRootfs) if err != nil { return err } // clear the current processes env and replace it with the environment // defined on the container if err := LoadContainerEnvironment(container); err != nil { return err } // We always read this as it is a way to sync with the parent as well var networkState *network.NetworkState if err := syncPipe.ReadFromParent(&networkState); err != nil { return err } if consolePath != "" { if err := console.OpenAndDup(consolePath); err != nil { return err } } if _, err := syscall.Setsid(); err != nil { return fmt.Errorf("setsid %s", err) } if consolePath != "" { if err := system.Setctty(); err != nil { return fmt.Errorf("setctty %s", err) } } if err := ipc.Initialize(container.IpcNsPath); err != nil { return fmt.Errorf("setup IPC %s", err) } if err := setupNetwork(container, networkState); err != nil { return fmt.Errorf("setup networking %s", err) } if err := setupRoute(container); err != nil { return fmt.Errorf("setup route %s", err) } label.Init() if err := mount.InitializeMountNamespace(rootfs, consolePath, container.RestrictSys, (*mount.MountConfig)(container.MountConfig)); err != nil { return fmt.Errorf("setup mount namespace %s", err) } if container.Hostname != "" { if err := syscall.Sethostname([]byte(container.Hostname)); err != nil { return fmt.Errorf("sethostname %s", err) } } if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) } if err := label.SetProcessLabel(container.ProcessLabel); err != nil { return fmt.Errorf("set process label %s", err) } // TODO: (crosbymichael) make this configurable at the Config level if container.RestrictSys { if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { return err } } pdeathSignal, err := system.GetParentDeathSignal() if err != nil { return fmt.Errorf("get parent death signal %s", err) } if err := FinalizeNamespace(container); err != nil { return fmt.Errorf("finalize namespace %s", err) } // FinalizeNamespace can change user/group which clears the parent death // signal, so we restore it here. if err := RestoreParentDeathSignal(pdeathSignal); err != nil { return fmt.Errorf("restore parent death signal %s", err) } return system.Execv(args[0], args[0:], os.Environ()) }
// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work. // Move this to libcontainer package. // Init is the init process that first runs inside a new namespace to setup mounts, users, networking, // and other options required for the new container. // The caller of Init function has to ensure that the go runtime is locked to an OS thread // (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended. func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, pipe *os.File, args []string) (err error) { defer func() { // if we have an error during the initialization of the container's init then send it back to the // parent process in the form of an initError. if err != nil { // ensure that any data sent from the parent is consumed so it doesn't // receive ECONNRESET when the child writes to the pipe. ioutil.ReadAll(pipe) if err := json.NewEncoder(pipe).Encode(initError{ Message: err.Error(), }); err != nil { panic(err) } } // ensure that this pipe is always closed pipe.Close() }() rootfs, err := utils.ResolveRootfs(uncleanRootfs) if err != nil { return err } // clear the current processes env and replace it with the environment // defined on the container if err := LoadContainerEnvironment(container); err != nil { return err } // We always read this as it is a way to sync with the parent as well var networkState *network.NetworkState if err := json.NewDecoder(pipe).Decode(&networkState); err != nil { return err } // join any namespaces via a path to the namespace fd if provided if err := joinExistingNamespaces(container.Namespaces); err != nil { return err } if consolePath != "" { if err := console.OpenAndDup(consolePath); err != nil { return err } } if _, err := syscall.Setsid(); err != nil { return fmt.Errorf("setsid %s", err) } if consolePath != "" { if err := system.Setctty(); err != nil { return fmt.Errorf("setctty %s", err) } } if err := setupNetwork(container, networkState); err != nil { return fmt.Errorf("setup networking %s", err) } if err := setupRoute(container); err != nil { return fmt.Errorf("setup route %s", err) } if err := setupRlimits(container); err != nil { return fmt.Errorf("setup rlimits %s", err) } label.Init() if err := mount.InitializeMountNamespace(rootfs, consolePath, container.RestrictSys, (*mount.MountConfig)(container.MountConfig)); err != nil { return fmt.Errorf("setup mount namespace %s", err) } if container.Hostname != "" { if err := syscall.Sethostname([]byte(container.Hostname)); err != nil { return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err) } } if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) } if err := label.SetProcessLabel(container.ProcessLabel); err != nil { return fmt.Errorf("set process label %s", err) } // TODO: (crosbymichael) make this configurable at the Config level if container.RestrictSys { if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { return err } } pdeathSignal, err := system.GetParentDeathSignal() if err != nil { return fmt.Errorf("get parent death signal %s", err) } if err := FinalizeNamespace(container); err != nil { return fmt.Errorf("finalize namespace %s", err) } // FinalizeNamespace can change user/group which clears the parent death // signal, so we restore it here. if err := RestoreParentDeathSignal(pdeathSignal); err != nil { return fmt.Errorf("restore parent death signal %s", err) } return system.Execv(args[0], args[0:], os.Environ()) }
func (l *linuxStandardInit) Init() error { // join any namespaces via a path to the namespace fd if provided if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil { return err } var console *linuxConsole if l.config.Console != "" { console = newConsoleFromPath(l.config.Console) if err := console.dupStdio(); err != nil { return err } } if _, err := syscall.Setsid(); err != nil { return err } if console != nil { if err := system.Setctty(); err != nil { return err } } if err := setupNetwork(l.config); err != nil { return err } if err := setupRoute(l.config.Config); err != nil { return err } if err := setupRlimits(l.config.Config); err != nil { return err } label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace if l.config.Config.Namespaces.Contains(configs.NEWNS) { if err := setupRootfs(l.config.Config, console); err != nil { return err } } if hostname := l.config.Config.Hostname; hostname != "" { if err := syscall.Sethostname([]byte(hostname)); err != nil { return err } } if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil { return err } if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { return err } for _, path := range l.config.Config.ReadonlyPaths { if err := remountReadonly(path); err != nil { return err } } for _, path := range l.config.Config.MaskPaths { if err := maskFile(path); err != nil { return err } } pdeath, err := system.GetParentDeathSignal() if err != nil { return err } if err := finalizeNamespace(l.config); err != nil { return err } // finalizeNamespace can change user/group which clears the parent death // signal, so we restore it here. if err := pdeath.Restore(); err != nil { return err } // compare the parent from the inital start of the init process and make sure that it did not change. // if the parent changes that means it died and we were reparened to something else so we should // just kill ourself and not cause problems for someone else. if syscall.Getppid() != l.parentPid { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) }
func initDefault(container *libcontainer.Config, uncleanRootfs, consolePath string, pipe *os.File, args []string) (err error) { rootfs, err := utils.ResolveRootfs(uncleanRootfs) if err != nil { return err } // clear the current processes env and replace it with the environment // defined on the container if err := LoadContainerEnvironment(container); err != nil { return err } // We always read this as it is a way to sync with the parent as well var networkState *network.NetworkState if err := json.NewDecoder(pipe).Decode(&networkState); err != nil { return err } // join any namespaces via a path to the namespace fd if provided if err := joinExistingNamespaces(container.Namespaces); err != nil { return err } if consolePath != "" { if err := console.OpenAndDup(consolePath); err != nil { return err } } if _, err := syscall.Setsid(); err != nil { return fmt.Errorf("setsid %s", err) } if consolePath != "" { if err := system.Setctty(); err != nil { return fmt.Errorf("setctty %s", err) } } cloneFlags := GetNamespaceFlags(container.Namespaces) if (cloneFlags & syscall.CLONE_NEWNET) == 0 { if len(container.Networks) != 0 || len(container.Routes) != 0 { return fmt.Errorf("unable to apply network parameters without network namespace") } } else { if err := setupNetwork(container, networkState); err != nil { return fmt.Errorf("setup networking %s", err) } if err := setupRoute(container); err != nil { return fmt.Errorf("setup route %s", err) } } if err := setupRlimits(container); err != nil { return fmt.Errorf("setup rlimits %s", err) } label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace if (cloneFlags & syscall.CLONE_NEWNS) == 0 { if container.MountConfig != nil { return fmt.Errorf("mount config is set without mount namespace") } } else if err := mount.InitializeMountNamespace(rootfs, consolePath, container.RestrictSys, 0, // Default Root Uid 0, // Default Root Gid (*mount.MountConfig)(container.MountConfig)); err != nil { return fmt.Errorf("setup mount namespace %s", err) } if container.Hostname != "" { if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { return fmt.Errorf("unable to set the hostname without UTS namespace") } if err := syscall.Sethostname([]byte(container.Hostname)); err != nil { return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err) } } if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) } if err := label.SetProcessLabel(container.ProcessLabel); err != nil { return fmt.Errorf("set process label %s", err) } // TODO: (crosbymichael) make this configurable at the Config level if container.RestrictSys { if (cloneFlags & syscall.CLONE_NEWNS) == 0 { return fmt.Errorf("unable to restrict access to kernel files without mount namespace") } if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { return err } } pdeathSignal, err := system.GetParentDeathSignal() if err != nil { return fmt.Errorf("get parent death signal %s", err) } if err := FinalizeNamespace(container); err != nil { return fmt.Errorf("finalize namespace %s", err) } // FinalizeNamespace can change user/group which clears the parent death // signal, so we restore it here. if err := RestoreParentDeathSignal(pdeathSignal); err != nil { return fmt.Errorf("restore parent death signal %s", err) } return system.Execv(args[0], args[0:], os.Environ()) }
func initUserNs(container *libcontainer.Config, uncleanRootfs, consolePath string, pipe *os.File, args []string) (err error) { // clear the current processes env and replace it with the environment // defined on the container if err := LoadContainerEnvironment(container); err != nil { return err } // We always read this as it is a way to sync with the parent as well var networkState *network.NetworkState if err := json.NewDecoder(pipe).Decode(&networkState); err != nil { return err } // join any namespaces via a path to the namespace fd if provided if err := joinExistingNamespaces(container.Namespaces); err != nil { return err } if consolePath != "" { if err := console.OpenAndDup("/dev/console"); err != nil { return err } } if _, err := syscall.Setsid(); err != nil { return fmt.Errorf("setsid %s", err) } if consolePath != "" { if err := system.Setctty(); err != nil { return fmt.Errorf("setctty %s", err) } } if container.WorkingDir == "" { container.WorkingDir = "/" } if err := setupRlimits(container); err != nil { return fmt.Errorf("setup rlimits %s", err) } cloneFlags := GetNamespaceFlags(container.Namespaces) if container.Hostname != "" { if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { return fmt.Errorf("unable to set the hostname without UTS namespace") } if err := syscall.Sethostname([]byte(container.Hostname)); err != nil { return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err) } } if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) } if err := label.SetProcessLabel(container.ProcessLabel); err != nil { return fmt.Errorf("set process label %s", err) } if container.RestrictSys { if (cloneFlags & syscall.CLONE_NEWNS) == 0 { return fmt.Errorf("unable to restrict access to kernel files without mount namespace") } if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { return err } } pdeathSignal, err := system.GetParentDeathSignal() if err != nil { return fmt.Errorf("get parent death signal %s", err) } if err := FinalizeNamespace(container); err != nil { return fmt.Errorf("finalize namespace %s", err) } // FinalizeNamespace can change user/group which clears the parent death // signal, so we restore it here. if err := RestoreParentDeathSignal(pdeathSignal); err != nil { return fmt.Errorf("restore parent death signal %s", err) } return system.Execv(args[0], args[0:], os.Environ()) }