func (l *linuxStandardInit) Init() error { // do not inherit the parent's session keyring sessKeyId, err := keyctl.JoinSessionKeyring("") if err != nil { return err } // make session keyring searcheable // without user ns we need 'UID' search permissions // with user ns we need 'other' search permissions if err := keyctl.ModKeyringPerm(sessKeyId, 0xffffffff, 0x080008); err != nil { return err } // join any namespaces via a path to the namespace fd if provided if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil { return err } var console *linuxConsole if l.config.Console != "" { console = newConsoleFromPath(l.config.Console) if err := console.dupStdio(); err != nil { return err } } if _, err := syscall.Setsid(); err != nil { return err } if console != nil { if err := system.Setctty(); err != nil { return err } } if err := setupNetwork(l.config); err != nil { return err } if err := setupRoute(l.config.Config); err != nil { return err } if err := setupRlimits(l.config.Config); err != nil { return err } if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil { return err } label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace if l.config.Config.Namespaces.Contains(configs.NEWNS) { if err := setupRootfs(l.config.Config, console); err != nil { return err } } if hostname := l.config.Config.Hostname; hostname != "" { if err := syscall.Sethostname([]byte(hostname)); err != nil { return err } } if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil { return err } if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { return err } for key, value := range l.config.Config.Sysctl { if err := writeSystemProperty(key, value); err != nil { return err } } for _, path := range l.config.Config.ReadonlyPaths { if err := remountReadonly(path); err != nil { return err } } for _, path := range l.config.Config.MaskPaths { if err := maskFile(path); err != nil { return err } } pdeath, err := system.GetParentDeathSignal() if err != nil { return err } if l.config.Config.NoNewPrivileges { if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { return err } } // Tell our parent that we're ready to Execv. This must be done before the // Seccomp rules have been applied, because we need to be able to read and // write to a socket. if err := syncParentReady(l.pipe); err != nil { return err } if l.config.Config.Seccomp != nil { if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { return err } } if err := finalizeNamespace(l.config); err != nil { return err } // finalizeNamespace can change user/group which clears the parent death // signal, so we restore it here. if err := pdeath.Restore(); err != nil { return err } // compare the parent from the inital start of the init process and make sure that it did not change. // if the parent changes that means it died and we were reparened to something else so we should // just kill ourself and not cause problems for someone else. if syscall.Getppid() != l.parentPid { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) }
func (l *linuxStandardInit) Init() error { // join any namespaces via a path to the namespace fd if provided if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil { return err } var console *linuxConsole if l.config.Console != "" { console = newConsoleFromPath(l.config.Console) if err := console.dupStdio(); err != nil { return err } } if _, err := syscall.Setsid(); err != nil { return err } if console != nil { if err := system.Setctty(); err != nil { return err } } if err := setupNetwork(l.config); err != nil { return err } if err := setupRoute(l.config.Config); err != nil { return err } if err := setupRlimits(l.config.Config); err != nil { return err } label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace if l.config.Config.Namespaces.Contains(configs.NEWNS) { if err := setupRootfs(l.config.Config, console); err != nil { return err } } if hostname := l.config.Config.Hostname; hostname != "" { if err := syscall.Sethostname([]byte(hostname)); err != nil { return err } } if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil { return err } if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { return err } for key, value := range l.config.Config.Sysctl { if err := writeSystemProperty(key, value); err != nil { return err } } for _, path := range l.config.Config.ReadonlyPaths { if err := remountReadonly(path); err != nil { return err } } for _, path := range l.config.Config.MaskPaths { if err := maskFile(path); err != nil { return err } } pdeath, err := system.GetParentDeathSignal() if err != nil { return err } if l.config.Config.Seccomp != nil { if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { return err } } if err := finalizeNamespace(l.config); err != nil { return err } // finalizeNamespace can change user/group which clears the parent death // signal, so we restore it here. if err := pdeath.Restore(); err != nil { return err } // compare the parent from the inital start of the init process and make sure that it did not change. // if the parent changes that means it died and we were reparened to something else so we should // just kill ourself and not cause problems for someone else. if syscall.Getppid() != l.parentPid { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) }
func (l *linuxStandardInit) Init() error { ringname, keepperms, newperms := l.getSessionRingParams() // do not inherit the parent's session keyring sessKeyId, err := keyctl.JoinSessionKeyring(ringname) if err != nil { return err } // make session keyring searcheable if err := keyctl.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil { return err } var console *linuxConsole if l.config.Console != "" { console = newConsoleFromPath(l.config.Console) if err := console.dupStdio(); err != nil { return err } } if console != nil { if err := system.Setctty(); err != nil { return err } } if err := setupNetwork(l.config); err != nil { return err } if err := setupRoute(l.config.Config); err != nil { return err } label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace if l.config.Config.Namespaces.Contains(configs.NEWNS) { if err := setupRootfs(l.config.Config, console, l.pipe); err != nil { return err } } if hostname := l.config.Config.Hostname; hostname != "" { if err := syscall.Sethostname([]byte(hostname)); err != nil { return err } } if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { return err } if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { return err } for key, value := range l.config.Config.Sysctl { if err := writeSystemProperty(key, value); err != nil { return err } } for _, path := range l.config.Config.ReadonlyPaths { if err := remountReadonly(path); err != nil { return err } } for _, path := range l.config.Config.MaskPaths { if err := maskFile(path); err != nil { return err } } pdeath, err := system.GetParentDeathSignal() if err != nil { return err } if l.config.NoNewPrivileges { if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { return err } } // Tell our parent that we're ready to Execv. This must be done before the // Seccomp rules have been applied, because we need to be able to read and // write to a socket. if err := syncParentReady(l.pipe); err != nil { return err } // Without NoNewPrivileges seccomp is a privileged operation, so we need to // do this before dropping capabilities; otherwise do it as late as possible // just before execve so as few syscalls take place after it as possible. if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges { if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { return err } } if err := finalizeNamespace(l.config); err != nil { return err } // finalizeNamespace can change user/group which clears the parent death // signal, so we restore it here. if err := pdeath.Restore(); err != nil { return err } // compare the parent from the inital start of the init process and make sure that it did not change. // if the parent changes that means it died and we were reparened to something else so we should // just kill ourself and not cause problems for someone else. if syscall.Getppid() != l.parentPid { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges { if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { return err } } return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) }
func (l *linuxStandardInit) Init() error { if !l.config.Config.NoNewKeyring { ringname, keepperms, newperms := l.getSessionRingParams() // do not inherit the parent's session keyring sessKeyId, err := keys.JoinSessionKeyring(ringname) if err != nil { return err } // make session keyring searcheable if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil { return err } } if err := setupNetwork(l.config); err != nil { return err } if err := setupRoute(l.config.Config); err != nil { return err } label.Init() // prepareRootfs() can be executed only for a new mount namespace. if l.config.Config.Namespaces.Contains(configs.NEWNS) { if err := prepareRootfs(l.pipe, l.config.Config); err != nil { return err } } // Set up the console. This has to be done *before* we finalize the rootfs, // but *after* we've given the user the chance to set up all of the mounts // they wanted. if l.config.CreateConsole { if err := setupConsole(l.pipe, l.config, true); err != nil { return err } if err := system.Setctty(); err != nil { return err } } // Finish the rootfs setup. if l.config.Config.Namespaces.Contains(configs.NEWNS) { if err := finalizeRootfs(l.config.Config); err != nil { return err } } if hostname := l.config.Config.Hostname; hostname != "" { if err := syscall.Sethostname([]byte(hostname)); err != nil { return err } } if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { return err } if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { return err } for key, value := range l.config.Config.Sysctl { if err := writeSystemProperty(key, value); err != nil { return err } } for _, path := range l.config.Config.ReadonlyPaths { if err := remountReadonly(path); err != nil { return err } } for _, path := range l.config.Config.MaskPaths { if err := maskPath(path); err != nil { return err } } pdeath, err := system.GetParentDeathSignal() if err != nil { return err } if l.config.NoNewPrivileges { if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { return err } } // Tell our parent that we're ready to Execv. This must be done before the // Seccomp rules have been applied, because we need to be able to read and // write to a socket. if err := syncParentReady(l.pipe); err != nil { return err } // Without NoNewPrivileges seccomp is a privileged operation, so we need to // do this before dropping capabilities; otherwise do it as late as possible // just before execve so as few syscalls take place after it as possible. if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges { if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { return err } } if err := finalizeNamespace(l.config); err != nil { return err } // finalizeNamespace can change user/group which clears the parent death // signal, so we restore it here. if err := pdeath.Restore(); err != nil { return err } // compare the parent from the initial start of the init process and make sure that it did not change. // if the parent changes that means it died and we were reparented to something else so we should // just kill ourself and not cause problems for someone else. if syscall.Getppid() != l.parentPid { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } // check for the arg before waiting to make sure it exists and it is returned // as a create time error. name, err := exec.LookPath(l.config.Args[0]) if err != nil { return err } // close the pipe to signal that we have completed our init. l.pipe.Close() // wait for the fifo to be opened on the other side before // exec'ing the users process. fd, err := syscall.Openat(l.stateDirFD, execFifoFilename, os.O_WRONLY|syscall.O_CLOEXEC, 0) if err != nil { return newSystemErrorWithCause(err, "openat exec fifo") } if _, err := syscall.Write(fd, []byte("0")); err != nil { return newSystemErrorWithCause(err, "write 0 exec fifo") } if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges { if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { return newSystemErrorWithCause(err, "init seccomp") } } if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil { return newSystemErrorWithCause(err, "exec user process") } return nil }