func setUser(username string) error { if username == "" { return errors.New("-username argument missing") } newUser, err := user.Lookup(username) if err != nil { return err } uid, err := strconv.Atoi(newUser.Uid) if err != nil { return err } gid, err := strconv.Atoi(newUser.Gid) if err != nil { return err } if uid == 0 { return errors.New("Do not run the Dominator as root") os.Exit(1) } if err := syscall.Setresgid(gid, gid, gid); err != nil { return err } return syscall.Setresuid(uid, uid, uid) }
// DropRootTo calls setresgid(2) and setresuid(2) to permenently // revoke root privileges. The USER, LOGNAME and HOME environmental // variables are updated to match. func DropRootTo(u *user.User) (err error) { var uid, gid int if gid, err = strconv.Atoi(u.Gid); err != nil { return } if uid, err = strconv.Atoi(u.Uid); err != nil { return } if err = syscall.Setresgid(gid, gid, gid); err != nil { return } if err = syscall.Setresuid(uid, uid, uid); err != nil { return } // based on what Avahi does. os.Setenv("USER", u.Name) os.Setenv("LOGNAME", u.Name) os.Setenv("HOME", u.HomeDir) return nil }
func Setresuid(ruid, euid, suid int) error { return syscall.Setresuid(ruid, euid, suid) }
func stage1() int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { log.Print("UUID is missing or malformed\n") return 1 } root := "." p, err := stage1commontypes.LoadPod(root, uuid) if err != nil { log.PrintE("can't load pod", err) return 1 } // Sanity checks if len(p.Manifest.Apps) != 1 { log.Printf("flavor %q only supports 1 application per Pod for now", flavor) return 1 } ra := p.Manifest.Apps[0] imgName := p.AppNameToImageName(ra.Name) args := ra.App.Exec if len(args) == 0 { log.Printf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) return 1 } lfd, err := common.GetRktLockFD() if err != nil { log.PrintE("can't get rkt lock fd", err) return 1 } // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed after execution is finished if err := sys.CloseOnExec(lfd, true); err != nil { log.PrintE("can't set FD_CLOEXEC on rkt lock", err) return 1 } workDir := "/" if ra.App.WorkingDirectory != "" { workDir = ra.App.WorkingDirectory } env := []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"} for _, e := range ra.App.Environment { env = append(env, e.Name+"="+e.Value) } rfs := filepath.Join(common.AppPath(p.Root, ra.Name), "rootfs") if err := copyResolv(p); err != nil { log.PrintE("can't copy /etc/resolv.conf", err) return 1 } argFlyMounts, err := evaluateMounts(rfs, string(ra.Name), p) if err != nil { log.PrintE("can't evaluate mounts", err) return 1 } effectiveMounts := append( []flyMount{ {"", "", "/dev", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/dev", rfs, "/dev", "none", syscall.MS_BIND | syscall.MS_REC}, {"", "", "/proc", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/proc", rfs, "/proc", "none", syscall.MS_BIND | syscall.MS_REC}, {"", "", "/sys", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/sys", rfs, "/sys", "none", syscall.MS_BIND | syscall.MS_REC}, {"tmpfs", rfs, "/tmp", "tmpfs", 0}, }, argFlyMounts..., ) for _, mount := range effectiveMounts { var ( err error hostPathInfo os.FileInfo targetPathInfo os.FileInfo ) if strings.HasPrefix(mount.HostPath, "/") { if hostPathInfo, err = os.Stat(mount.HostPath); err != nil { log.PrintE(fmt.Sprintf("stat of host path %s", mount.HostPath), err) return 1 } } else { hostPathInfo = nil } absTargetPath := filepath.Join(mount.TargetPrefixPath, mount.RelTargetPath) if targetPathInfo, err = os.Stat(absTargetPath); err != nil && !os.IsNotExist(err) { log.PrintE(fmt.Sprintf("stat of target path %s", absTargetPath), err) return 1 } switch { case targetPathInfo == nil: absTargetPathParent, _ := filepath.Split(absTargetPath) if err := os.MkdirAll(absTargetPathParent, 0755); err != nil { log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err) return 1 } switch { case hostPathInfo == nil || hostPathInfo.IsDir(): if err := os.Mkdir(absTargetPath, 0755); err != nil { log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err) return 1 } case !hostPathInfo.IsDir(): file, err := os.OpenFile(absTargetPath, os.O_CREATE, 0700) if err != nil { log.PrintE(fmt.Sprintf("can't create file %q", absTargetPath), err) return 1 } file.Close() } case hostPathInfo != nil: switch { case hostPathInfo.IsDir() && !targetPathInfo.IsDir(): log.Printf("can't mount because %q is a directory while %q is not", mount.HostPath, absTargetPath) return 1 case !hostPathInfo.IsDir() && targetPathInfo.IsDir(): log.Printf("can't mount because %q is not a directory while %q is", mount.HostPath, absTargetPath) return 1 } } if err := syscall.Mount(mount.HostPath, absTargetPath, mount.Fs, mount.Flags, ""); err != nil { log.PrintE(fmt.Sprintf("can't mount %q on %q with flags %v", mount.HostPath, absTargetPath, mount.Flags), err) return 1 } } if err = stage1common.WritePid(os.Getpid(), "pid"); err != nil { log.Error(err) return 1 } var uidResolver, gidResolver user.Resolver var uid, gid int uidResolver, err = user.NumericIDs(ra.App.User) if err != nil { uidResolver, err = user.IDsFromStat(rfs, ra.App.User, nil) } if err != nil { // give up log.PrintE(fmt.Sprintf("invalid user %q", ra.App.User), err) return 1 } if uid, _, err = uidResolver.IDs(); err != nil { log.PrintE(fmt.Sprintf("failed to configure user %q", ra.App.User), err) return 1 } gidResolver, err = user.NumericIDs(ra.App.Group) if err != nil { gidResolver, err = user.IDsFromStat(rfs, ra.App.Group, nil) } if err != nil { // give up log.PrintE(fmt.Sprintf("invalid group %q", ra.App.Group), err) return 1 } if _, gid, err = gidResolver.IDs(); err != nil { log.PrintE(fmt.Sprintf("failed to configure group %q", ra.App.Group), err) return 1 } diag.Printf("chroot to %q", rfs) if err := syscall.Chroot(rfs); err != nil { log.PrintE("can't chroot", err) return 1 } if err := os.Chdir(workDir); err != nil { log.PrintE(fmt.Sprintf("can't change to working directory %q", workDir), err) return 1 } // lock the current goroutine to its current OS thread. // This will force the subsequent syscalls to be executed in the same OS thread as Setresuid, and Setresgid, // see https://github.com/golang/go/issues/1435#issuecomment-66054163. runtime.LockOSThread() diag.Printf("setting uid %d gid %d", uid, gid) if err := syscall.Setresgid(gid, gid, gid); err != nil { log.PrintE(fmt.Sprintf("can't set gid %d", gid), err) return 1 } if err := syscall.Setresuid(uid, uid, uid); err != nil { log.PrintE(fmt.Sprintf("can't set uid %d", uid), err) return 1 } diag.Printf("execing %q in %q", args, rfs) err = stage1common.WithClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) }) if err != nil { log.PrintE(fmt.Sprintf("can't execute %q", args[0]), err) return 1 } return 0 }
func stage1(rp *stage1commontypes.RuntimePod) int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { log.Print("UUID is missing or malformed\n") return 254 } root := "." p, err := stage1commontypes.LoadPod(root, uuid, rp) if err != nil { log.PrintE("can't load pod", err) return 254 } if err := p.SaveRuntime(); err != nil { log.FatalE("failed to save runtime parameters", err) } // Sanity checks if len(p.Manifest.Apps) != 1 { log.Printf("flavor %q only supports 1 application per Pod for now", flavor) return 254 } ra := p.Manifest.Apps[0] imgName := p.AppNameToImageName(ra.Name) args := ra.App.Exec if len(args) == 0 { log.Printf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) return 254 } lfd, err := common.GetRktLockFD() if err != nil { log.PrintE("can't get rkt lock fd", err) return 254 } // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed after execution is finished if err := sys.CloseOnExec(lfd, true); err != nil { log.PrintE("can't set FD_CLOEXEC on rkt lock", err) return 254 } workDir := "/" if ra.App.WorkingDirectory != "" { workDir = ra.App.WorkingDirectory } env := []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"} for _, e := range ra.App.Environment { env = append(env, e.Name+"="+e.Value) } rfs := filepath.Join(common.AppPath(p.Root, ra.Name), "rootfs") argFlyMounts, err := evaluateMounts(rfs, string(ra.Name), p) if err != nil { log.PrintE("can't evaluate mounts", err) return 254 } effectiveMounts := append( []flyMount{ {"", "", "/dev", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/dev", rfs, "/dev", "none", syscall.MS_BIND | syscall.MS_REC}, {"", "", "/proc", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/proc", rfs, "/proc", "none", syscall.MS_BIND | syscall.MS_REC}, {"", "", "/sys", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/sys", rfs, "/sys", "none", syscall.MS_BIND | syscall.MS_REC}, {"tmpfs", rfs, "/tmp", "tmpfs", 0}, }, argFlyMounts..., ) /* Process DNS config files * * /etc/resolv.conf: four modes * 'host' - bind-mount host's file * 'stage0' - bind-mount the file created by stage0 * 'default' - do nothing (we would respect CNI if fly had networking) * 'none' - do nothing */ switch p.ResolvConfMode { case "host": effectiveMounts = append(effectiveMounts, flyMount{"/etc/resolv.conf", rfs, "/etc/resolv.conf", "none", syscall.MS_BIND | syscall.MS_RDONLY}) case "stage0": if err := copyResolv(p); err != nil { log.PrintE("can't copy /etc/resolv.conf", err) return 254 } } /* * /etc/hosts: three modes: * 'host' - bind-mount hosts's file * 'stage0' - bind mount the file created by stage1 * 'default' - create a stub /etc/hosts if needed */ switch p.EtcHostsMode { case "host": effectiveMounts = append(effectiveMounts, flyMount{"/etc/hosts", rfs, "/etc/hosts", "none", syscall.MS_BIND | syscall.MS_RDONLY}) case "stage0": effectiveMounts = append(effectiveMounts, flyMount{ filepath.Join(common.Stage1RootfsPath(p.Root), "etc", "rkt-hosts"), rfs, "/etc/hosts", "none", syscall.MS_BIND | syscall.MS_RDONLY}) case "default": stage2HostsPath := filepath.Join(common.AppRootfsPath(p.Root, ra.Name), "etc", "hosts") if _, err := os.Stat(stage2HostsPath); err != nil && os.IsNotExist(err) { fallbackHosts := []byte("127.0.0.1 localhost localdomain\n") ioutil.WriteFile(stage2HostsPath, fallbackHosts, 0644) } } for _, mount := range effectiveMounts { diag.Printf("Processing %+v", mount) var ( err error hostPathInfo os.FileInfo targetPathInfo os.FileInfo ) if strings.HasPrefix(mount.HostPath, "/") { if hostPathInfo, err = os.Stat(mount.HostPath); err != nil { log.PrintE(fmt.Sprintf("stat of host path %s", mount.HostPath), err) return 254 } } else { hostPathInfo = nil } absTargetPath := filepath.Join(mount.TargetPrefixPath, mount.RelTargetPath) if targetPathInfo, err = os.Stat(absTargetPath); err != nil && !os.IsNotExist(err) { log.PrintE(fmt.Sprintf("stat of target path %s", absTargetPath), err) return 254 } switch { case (mount.Flags & syscall.MS_REMOUNT) != 0: { diag.Printf("don't attempt to create files for remount of %q", absTargetPath) } case targetPathInfo == nil: absTargetPathParent, _ := filepath.Split(absTargetPath) if err := os.MkdirAll(absTargetPathParent, 0755); err != nil { log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err) return 254 } switch { case hostPathInfo == nil || hostPathInfo.IsDir(): if err := os.Mkdir(absTargetPath, 0755); err != nil { log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err) return 254 } case !hostPathInfo.IsDir(): file, err := os.OpenFile(absTargetPath, os.O_CREATE, 0700) if err != nil { log.PrintE(fmt.Sprintf("can't create file %q", absTargetPath), err) return 254 } file.Close() } case hostPathInfo != nil: switch { case hostPathInfo.IsDir() && !targetPathInfo.IsDir(): log.Printf("can't mount because %q is a directory while %q is not", mount.HostPath, absTargetPath) return 254 case !hostPathInfo.IsDir() && targetPathInfo.IsDir(): log.Printf("can't mount because %q is not a directory while %q is", mount.HostPath, absTargetPath) return 254 } } if err := syscall.Mount(mount.HostPath, absTargetPath, mount.Fs, mount.Flags, ""); err != nil { log.PrintE(fmt.Sprintf("can't mount %q on %q with flags %v", mount.HostPath, absTargetPath, mount.Flags), err) return 254 } } if err = stage1common.WritePid(os.Getpid(), "pid"); err != nil { log.Error(err) return 254 } var uidResolver, gidResolver user.Resolver var uid, gid int uidResolver, err = user.NumericIDs(ra.App.User) if err != nil { uidResolver, err = user.IDsFromStat(rfs, ra.App.User, nil) } if err != nil { // give up log.PrintE(fmt.Sprintf("invalid user %q", ra.App.User), err) return 254 } if uid, _, err = uidResolver.IDs(); err != nil { log.PrintE(fmt.Sprintf("failed to configure user %q", ra.App.User), err) return 254 } gidResolver, err = user.NumericIDs(ra.App.Group) if err != nil { gidResolver, err = user.IDsFromStat(rfs, ra.App.Group, nil) } if err != nil { // give up log.PrintE(fmt.Sprintf("invalid group %q", ra.App.Group), err) return 254 } if _, gid, err = gidResolver.IDs(); err != nil { log.PrintE(fmt.Sprintf("failed to configure group %q", ra.App.Group), err) return 254 } diag.Printf("chroot to %q", rfs) if err := syscall.Chroot(rfs); err != nil { log.PrintE("can't chroot", err) return 254 } if err := os.Chdir(workDir); err != nil { log.PrintE(fmt.Sprintf("can't change to working directory %q", workDir), err) return 254 } // lock the current goroutine to its current OS thread. // This will force the subsequent syscalls to be executed in the same OS thread as Setresuid, and Setresgid, // see https://github.com/golang/go/issues/1435#issuecomment-66054163. runtime.LockOSThread() diag.Printf("setting uid %d gid %d", uid, gid) if err := syscall.Setresgid(gid, gid, gid); err != nil { log.PrintE(fmt.Sprintf("can't set gid %d", gid), err) return 254 } if err := syscall.Setresuid(uid, uid, uid); err != nil { log.PrintE(fmt.Sprintf("can't set uid %d", uid), err) return 254 } diag.Printf("execing %q in %q", args, rfs) err = stage1common.WithClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) }) if err != nil { log.PrintE(fmt.Sprintf("can't execute %q", args[0]), err) return 254 } return 0 }
func doChild(rootdir string, progfile string, plan plan, stdin_fd, stdout_fd, stderr_fd [2]int) (error, string) { if os.Getpid() != 1 { return errors.New("not cloned?"), "" } if err := syscall.Dup2(stdin_fd[0], 0); err != nil { return err, "dup2 failed" } syscall.Close(stdin_fd[0]) syscall.Close(stdin_fd[1]) if err := syscall.Dup2(stdout_fd[1], 1); err != nil { return err, "dup2 failed" } syscall.Close(stdout_fd[0]) syscall.Close(stdout_fd[1]) if err := syscall.Dup2(stderr_fd[1], 2); err != nil { return err, "dup2 failed" } syscall.Close(stderr_fd[0]) syscall.Close(stderr_fd[1]) if _, _, err := syscall.Syscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, uintptr(syscall.SIGKILL), 0); err != 0 { return error(err), "PR_SET_PDEATHSIG failed" } if err := syscall.Sethostname([]byte("poe-sandbox")); err != nil { return err, "sethostname failed" } mounts := []mountPoint{ {"none", "/", "", syscall.MS_PRIVATE | syscall.MS_REC, ""}, {rootdir, rootdir, "bind", syscall.MS_BIND | syscall.MS_REC, ""}, // { "none", rootdir + "/proc", "proc", syscall.MS_NOSUID | syscall.MS_NOEXEC | syscall.MS_NODEV, "" }, // { "none", rootdir + "/dev", "devtmpfs", syscall.MS_NOSUID | syscall.MS_NOEXEC, "" }, // { "none", rootdir + "/dev/shm", "tmpfs", syscall.MS_NOSUID | syscall.MS_NODEV, "" }, } for _, point := range mounts { if err := syscall.Mount(point.source, point.target, point.fstype, point.flags, point.data); err != nil { return err, fmt.Sprintf("mount '%s' on '%s' (%s) failed", point.source, point.target, point.fstype) } } if err := syscall.Chroot(rootdir); err != nil { return err, "chroot failed" } if _, err := syscall.Setsid(); err != nil { return err, "setsid failed" } pw, err := user.Lookup(username) if err != nil { return err, "getpwnam failed" } uid, err := strconv.Atoi(pw.Uid) if err != nil { return err, "atoi error" } gid, err := strconv.Atoi(pw.Gid) if err != nil { return err, "atoi error" } // TODO: initgroups if err := syscall.Setresgid(gid, gid, gid); err != nil { return err, "setresgid failed" } if err := syscall.Setresuid(uid, uid, uid); err != nil { return err, "setresuid failed" } if err := syscall.Chdir("/tmp"); err != nil { return err, "chdir failed" } // stop os.Stdin.Read(make([]byte, 1)) // be traced if _, _, err := syscall.Syscall(syscall.SYS_PRCTL, PR_SET_NO_NEW_PRIVS, 1, 0); err != 0 { return error(err), "PR_SET_NO_NEW_PRIVS failed" } if err := SetupSeccomp(); err != nil { return err, "seccomp fail" } envp := []string{ "PATH=/opt/bin:/usr/bin", "USER="******"LOGNAME=" + username, } cmdl := make([]string, 0, len(plan.Compiler.Command)+len(plan.Extra)-1) for _, arg := range plan.Compiler.Command { if arg == "PROGRAM" { cmdl = append(cmdl, progfile) } else if arg == "EXTRA" { cmdl = append(cmdl, plan.Extra...) } else { cmdl = append(cmdl, arg) } } if err := syscall.Exec(cmdl[0], cmdl, envp); err != nil { return err, "execve failed" } return errors.New("unreachable"), "" }
func main() { runtime.GOMAXPROCS(8) dec := json.NewDecoder(os.Stdin) var plan plan if err := dec.Decode(&plan); err != nil { panic(fmt.Sprintf("failed to parse plan: %s", err)) } //fmt.Fprintf(os.Stderr, "plan: %+v\n", plan) runtime.LockOSThread() if err := syscall.Setresuid(0, 0, 0); err != nil { poePanic(err, "setuid failed") } if err := InitializeSystemdBus(); err != nil { poePanic(err, "failed to connect to systemd") } runtime.UnlockOSThread() rootdir, errx := PlaygroundCreate(plan.Base, plan.Compiler.Overlay) if errx != nil { poePanic(errx, "playground_create failed") } progfile, errx := PlaygroundCopy(rootdir, plan.Source) if errx != nil { poePanic(errx, "playground_copy failed") } var stdin_fd, stdout_fd, stderr_fd [2]int if err := syscall.Pipe2(stdin_fd[:], 0); err != nil { poePanic(err, "pipe2 failed") } if err := syscall.Pipe2(stdout_fd[:], syscall.O_DIRECT); err != nil { poePanic(err, "pipe2 failed") } if err := syscall.Pipe2(stderr_fd[:], syscall.O_DIRECT); err != nil { poePanic(err, "pipe2 failed") } pid_, _, err := syscall.Syscall(syscall.SYS_CLONE, uintptr(syscall.SIGCHLD|syscall.CLONE_NEWIPC|syscall.CLONE_NEWNS|syscall.CLONE_NEWPID|syscall.CLONE_NEWUTS|syscall.CLONE_NEWNET), 0, 0) pid := int(pid_) if err != 0 { poePanic(error(err), "clone failed") } else if pid == 0 { runtime.LockOSThread() if err, msg := doChild(rootdir, progfile, plan, stdin_fd, stdout_fd, stderr_fd); err != nil { fmt.Fprintf(os.Stderr, "%s (%s)", msg, err.Error()) os.Exit(127) } // unreachable } else { res := doParent(pid, stdin_fd, stdout_fd, stderr_fd) cleanup() var buf bytes.Buffer binary.Write(&buf, binary.LittleEndian, int32(res.result)) binary.Write(&buf, binary.LittleEndian, int32(res.status)) if _, err := os.Stderr.Write(buf.Bytes()); err != nil { poePanic(err, "stderr write failed") } if _, err := os.Stderr.Write([]byte(res.msg)); err != nil { poePanic(err, "stderr write failed") } os.Exit(0) // unreachable } }