// findBinPath takes a binary path and returns a the absolute path of the // binary relative to the app rootfs. This can be passed to ExecStart on the // app's systemd service file directly. func findBinPath(p *stage1commontypes.Pod, appName types.ACName, app types.App, workDir string, bin string) (string, error) { var binPath string switch { // absolute path, just use it case filepath.IsAbs(bin): binPath = bin // non-absolute path containing a slash, look in the working dir case strings.Contains(bin, "/"): binPath = filepath.Join(workDir, bin) // filename, search in the app's $PATH default: absRoot, err := filepath.Abs(p.Root) if err != nil { return "", errwrap.Wrap(errors.New("could not get pod's root absolute path"), err) } appRootfs := common.AppRootfsPath(absRoot, appName) appPathDirs := appSearchPaths(p, workDir, app) appPath := strings.Join(appPathDirs, ":") binPath, err = lookupPathInsideApp(bin, appPath, appRootfs, workDir) if err != nil { return "", errwrap.Wrap(fmt.Errorf("error looking up %q", bin), err) } } return binPath, nil }
func appHasMountpoints(podPath string, appName types.ACName) (bool, error) { appRootfs := common.AppRootfsPath(podPath, appName) // add a filepath separator so we don't match the appRootfs path appRootfs += string(filepath.Separator) mi, err := os.Open("/proc/self/mountinfo") if err != nil { return false, err } defer mi.Close() sc := bufio.NewScanner(mi) for sc.Scan() { line := sc.Text() lineResult := strings.Split(line, " ") if len(lineResult) < 7 { return false, fmt.Errorf("not enough fields from line %q: %+v", line, lineResult) } mp := lineResult[4] if strings.HasPrefix(mp, appRootfs) { return true, nil } } if err := sc.Err(); err != nil { return false, err } return false, nil }
// FindBinPath takes a binary path and returns a the absolute path of the // binary relative to the app rootfs. This can be passed to ExecStart on the // app's systemd service file directly. func FindBinPath(p *stage1commontypes.Pod, ra *schema.RuntimeApp) (string, error) { if len(ra.App.Exec) == 0 { return "", errors.New("app has no executable") } bin := ra.App.Exec[0] var binPath string switch { // absolute path, just use it case filepath.IsAbs(bin): binPath = bin // non-absolute path containing a slash, look in the working dir case strings.Contains(bin, "/"): binPath = filepath.Join(ra.App.WorkingDirectory, bin) // filename, search in the app's $PATH default: absRoot, err := filepath.Abs(p.Root) if err != nil { return "", errwrap.Wrap(errors.New("could not get pod's root absolute path"), err) } appRootfs := common.AppRootfsPath(absRoot, ra.Name) appPathDirs := appSearchPaths(p, ra.App.WorkingDirectory, *ra.App) appPath := strings.Join(appPathDirs, ":") binPath, err = lookupPathInsideApp(bin, appPath, appRootfs, ra.App.WorkingDirectory) if err != nil { return "", errwrap.Wrap(fmt.Errorf("error looking up %q", bin), err) } } return binPath, nil }
func copyResolv(p *stage1commontypes.Pod) error { ra := p.Manifest.Apps[0] stage1Rootfs := common.Stage1RootfsPath(p.Root) resolvPath := filepath.Join(stage1Rootfs, "etc", "rkt-resolv.conf") appRootfs := common.AppRootfsPath(p.Root, ra.Name) targetEtc := filepath.Join(appRootfs, "etc") targetResolvPath := filepath.Join(targetEtc, "resolv.conf") _, err := os.Stat(resolvPath) switch { case os.IsNotExist(err): return nil case err != nil: return err } _, err = os.Stat(targetResolvPath) if err != nil && !os.IsNotExist(err) { return err } return fileutil.CopyRegularFile(resolvPath, targetResolvPath) }
func mountSharedVolumes(root string, p *stage1commontypes.Pod, ra *schema.RuntimeApp) error { appName := ra.Name sharedVolPath := common.SharedVolumesPath(root) if err := os.MkdirAll(sharedVolPath, stage1initcommon.SharedVolPerm); err != nil { return errwrap.Wrap(errors.New("could not create shared volumes directory"), err) } if err := os.Chmod(sharedVolPath, stage1initcommon.SharedVolPerm); err != nil { return errwrap.Wrap(fmt.Errorf("could not change permissions of %q", sharedVolPath), err) } imageManifest := p.Images[appName.String()] mounts, err := stage1initcommon.GenerateMounts(ra, p.Manifest.Volumes, stage1initcommon.ConvertedFromDocker(imageManifest)) if err != nil { return err } for _, m := range mounts { absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs. if err != nil { return errwrap.Wrap(errors.New("could not get pod's root absolute path"), err) } absAppRootfs := common.AppRootfsPath(absRoot, appName) if err != nil { return fmt.Errorf(`could not evaluate absolute path for application rootfs in app: %v`, appName) } mntPath, err := stage1initcommon.EvaluateSymlinksInsideApp(absAppRootfs, m.Mount.Path) if err != nil { return errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Mount.Path), err) } absDestination := filepath.Join(absAppRootfs, mntPath) shPath := filepath.Join(sharedVolPath, m.Volume.Name.String()) if err := stage1initcommon.PrepareMountpoints(shPath, absDestination, &m.Volume, m.DockerImplicit); err != nil { return err } var source string switch m.Volume.Kind { case "host": source = m.Volume.Source case "empty": source = filepath.Join(common.SharedVolumesPath(root), m.Volume.Name.String()) default: return fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, m.Volume.Kind) } if cleanedSource, err := filepath.EvalSymlinks(source); err != nil { return errwrap.Wrap(fmt.Errorf("could not resolve symlink for source: %v", source), err) } else if err := ensureDestinationExists(cleanedSource, absDestination); err != nil { return errwrap.Wrap(fmt.Errorf("could not create destination mount point: %v", absDestination), err) } else if err := doBindMount(cleanedSource, absDestination, m.ReadOnly, m.Volume.Recursive); err != nil { return errwrap.Wrap(fmt.Errorf("could not bind mount path %v (s: %v, d: %v)", m.Mount.Path, source, absDestination), err) } } return nil }
func AppAddMounts(p *stage1commontypes.Pod, ra *schema.RuntimeApp, enterCmd []string) error { sharedVolPath, err := common.CreateSharedVolumesPath(p.Root) if err != nil { return err } vols := make(map[types.ACName]types.Volume) for _, v := range p.Manifest.Volumes { vols[v.Name] = v } imageManifest := p.Images[ra.Name.String()] mounts, err := GenerateMounts(ra, p.Manifest.Volumes, ConvertedFromDocker(imageManifest)) if err != nil { log.FatalE("Could not generate mounts", err) os.Exit(254) } absRoot, err := filepath.Abs(p.Root) if err != nil { log.FatalE("could not determine pod's absolute path", err) } appRootfs := common.AppRootfsPath(absRoot, ra.Name) // This logic is mostly copied from appToNspawnArgs // TODO(cdc): deduplicate for _, m := range mounts { shPath := filepath.Join(sharedVolPath, m.Volume.Name.String()) // Evaluate symlinks within the app's rootfs - otherwise absolute // symlinks will be wrong. mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Mount.Path) if err != nil { log.Fatalf("Could not evaluate path %v: %v", m.Mount.Path, err) } mntAbsPath := filepath.Join(appRootfs, mntPath) // Create the stage1 destination if err := PrepareMountpoints(shPath, mntAbsPath, &m.Volume, m.DockerImplicit); err != nil { log.FatalE("could not prepare mountpoint", err) } err = AppAddOneMount(p, ra, m.Source(absRoot), m.Mount.Path, m.ReadOnly, enterCmd) if err != nil { log.FatalE("Unable to setup app mounts", err) } } return nil }
func mountSharedVolumes(p *stage1commontypes.Pod, ra *schema.RuntimeApp) error { appName := ra.Name sharedVolPath, err := common.CreateSharedVolumesPath(p.Root) if err != nil { return err } imageManifest := p.Images[appName.String()] mounts, err := stage1initcommon.GenerateMounts(ra, p.Manifest.Volumes, stage1initcommon.ConvertedFromDocker(imageManifest)) if err != nil { return err } for _, m := range mounts { absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs. if err != nil { return errwrap.Wrap(errors.New("could not get pod's root absolute path"), err) } absAppRootfs := common.AppRootfsPath(absRoot, appName) if err != nil { return fmt.Errorf(`could not evaluate absolute path for application rootfs in app: %v`, appName) } mntPath, err := stage1initcommon.EvaluateSymlinksInsideApp(absAppRootfs, m.Mount.Path) if err != nil { return errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Mount.Path), err) } absDestination := filepath.Join(absAppRootfs, mntPath) shPath := filepath.Join(sharedVolPath, m.Volume.Name.String()) if err := stage1initcommon.PrepareMountpoints(shPath, absDestination, &m.Volume, m.DockerImplicit); err != nil { return err } source := m.Source(p.Root) if cleanedSource, err := filepath.EvalSymlinks(source); err != nil { return errwrap.Wrap(fmt.Errorf("could not resolve symlink for source: %v", source), err) } else if err := ensureDestinationExists(cleanedSource, absDestination); err != nil { return errwrap.Wrap(fmt.Errorf("could not create destination mount point: %v", absDestination), err) } else if err := doBindMount(cleanedSource, absDestination, m.ReadOnly, m.Volume.Recursive); err != nil { return errwrap.Wrap(fmt.Errorf("could not bind mount path %v (s: %v, d: %v)", m.Mount.Path, source, absDestination), err) } } return nil }
// parseUserGroup parses the User and Group fields of an App and returns its // UID and GID. // The User and Group fields accept several formats: // 1. the hardcoded string "root" // 2. a path // 3. a number // 4. a name in reference to /etc/{group,passwd} in the image // See https://github.com/appc/spec/blob/master/spec/aci.md#image-manifest-schema func parseUserGroup(p *stage1commontypes.Pod, ra *schema.RuntimeApp) (int, int, error) { var uidResolver, gidResolver user.Resolver var uid, gid int var err error root := common.AppRootfsPath(p.Root, ra.Name) uidResolver, err = user.NumericIDs(ra.App.User) if err != nil { uidResolver, err = user.IDsFromStat(root, ra.App.User, &p.UidRange) } if err != nil { uidResolver, err = user.IDsFromEtc(root, ra.App.User, "") } if err != nil { // give up return -1, -1, errwrap.Wrap(fmt.Errorf("invalid user %q", ra.App.User), err) } if uid, _, err = uidResolver.IDs(); err != nil { return -1, -1, errwrap.Wrap(fmt.Errorf("failed to configure user %q", ra.App.User), err) } gidResolver, err = user.NumericIDs(ra.App.Group) if err != nil { gidResolver, err = user.IDsFromStat(root, ra.App.Group, &p.UidRange) } if err != nil { gidResolver, err = user.IDsFromEtc(root, "", ra.App.Group) } if err != nil { // give up return -1, -1, errwrap.Wrap(fmt.Errorf("invalid group %q", ra.App.Group), err) } if _, gid, err = gidResolver.IDs(); err != nil { return -1, -1, errwrap.Wrap(fmt.Errorf("failed to configure group %q", ra.App.Group), err) } return uid, gid, nil }
// appToNspawnArgs transforms the given app manifest, with the given associated // app name, into a subset of applicable systemd-nspawn argument func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp) ([]string, error) { var args []string appName := ra.Name app := ra.App sharedVolPath := common.SharedVolumesPath(p.Root) if err := os.MkdirAll(sharedVolPath, sharedVolPerm); err != nil { return nil, errwrap.Wrap(errors.New("could not create shared volumes directory"), err) } if err := os.Chmod(sharedVolPath, sharedVolPerm); err != nil { return nil, errwrap.Wrap(fmt.Errorf("could not change permissions of %q", sharedVolPath), err) } vols := make(map[types.ACName]types.Volume) for _, v := range p.Manifest.Volumes { vols[v.Name] = v } mounts := GenerateMounts(ra, vols) for _, m := range mounts { vol := vols[m.Volume] if vol.Kind == "empty" { p := filepath.Join(sharedVolPath, vol.Name.String()) if err := os.MkdirAll(p, sharedVolPerm); err != nil { return nil, errwrap.Wrap(fmt.Errorf("could not create shared volume %q", vol.Name), err) } if err := os.Chown(p, *vol.UID, *vol.GID); err != nil { return nil, errwrap.Wrap(fmt.Errorf("could not change owner of %q", p), err) } mod, err := strconv.ParseUint(*vol.Mode, 8, 32) if err != nil { return nil, errwrap.Wrap(fmt.Errorf("invalid mode %q for volume %q", *vol.Mode, vol.Name), err) } if err := os.Chmod(p, os.FileMode(mod)); err != nil { return nil, errwrap.Wrap(fmt.Errorf("could not change permissions of %q", p), err) } } opt := make([]string, 4) if IsMountReadOnly(vol, app.MountPoints) { opt[0] = "--bind-ro=" } else { opt[0] = "--bind=" } absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs. if err != nil { return nil, errwrap.Wrap(errors.New("could not get pod's root absolute path"), err) } switch vol.Kind { case "host": opt[1] = vol.Source case "empty": opt[1] = filepath.Join(common.SharedVolumesPath(absRoot), vol.Name.String()) default: return nil, fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, vol.Kind) } opt[2] = ":" appRootfs := common.AppRootfsPath(absRoot, appName) mntPath, err := evaluateAppMountPath(appRootfs, m.Path) if err != nil { return nil, errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Path), err) } opt[3] = filepath.Join(common.RelAppRootfsPath(appName), mntPath) args = append(args, strings.Join(opt, "")) } for _, i := range app.Isolators { switch v := i.Value().(type) { case types.LinuxCapabilitiesSet: var caps []string // TODO: cleanup the API on LinuxCapabilitiesSet to give strings easily. for _, c := range v.Set() { caps = append(caps, string(c)) } if i.Name == types.LinuxCapabilitiesRetainSetName { capList := strings.Join(caps, ",") args = append(args, "--capability="+capList) } } } return args, nil }
func stage1(rp *stage1commontypes.RuntimePod) int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { log.Print("UUID is missing or malformed\n") return 254 } root := "." p, err := stage1commontypes.LoadPod(root, uuid, rp) if err != nil { log.PrintE("can't load pod", err) return 254 } if err := p.SaveRuntime(); err != nil { log.FatalE("failed to save runtime parameters", err) } // Sanity checks if len(p.Manifest.Apps) != 1 { log.Printf("flavor %q only supports 1 application per Pod for now", flavor) return 254 } ra := p.Manifest.Apps[0] imgName := p.AppNameToImageName(ra.Name) args := ra.App.Exec if len(args) == 0 { log.Printf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) return 254 } lfd, err := common.GetRktLockFD() if err != nil { log.PrintE("can't get rkt lock fd", err) return 254 } // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed after execution is finished if err := sys.CloseOnExec(lfd, true); err != nil { log.PrintE("can't set FD_CLOEXEC on rkt lock", err) return 254 } workDir := "/" if ra.App.WorkingDirectory != "" { workDir = ra.App.WorkingDirectory } env := []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"} for _, e := range ra.App.Environment { env = append(env, e.Name+"="+e.Value) } rfs := filepath.Join(common.AppPath(p.Root, ra.Name), "rootfs") argFlyMounts, err := evaluateMounts(rfs, string(ra.Name), p) if err != nil { log.PrintE("can't evaluate mounts", err) return 254 } effectiveMounts := append( []flyMount{ {"", "", "/dev", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/dev", rfs, "/dev", "none", syscall.MS_BIND | syscall.MS_REC}, {"", "", "/proc", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/proc", rfs, "/proc", "none", syscall.MS_BIND | syscall.MS_REC}, {"", "", "/sys", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/sys", rfs, "/sys", "none", syscall.MS_BIND | syscall.MS_REC}, {"tmpfs", rfs, "/tmp", "tmpfs", 0}, }, argFlyMounts..., ) /* Process DNS config files * * /etc/resolv.conf: four modes * 'host' - bind-mount host's file * 'stage0' - bind-mount the file created by stage0 * 'default' - do nothing (we would respect CNI if fly had networking) * 'none' - do nothing */ switch p.ResolvConfMode { case "host": effectiveMounts = append(effectiveMounts, flyMount{"/etc/resolv.conf", rfs, "/etc/resolv.conf", "none", syscall.MS_BIND | syscall.MS_RDONLY}) case "stage0": if err := copyResolv(p); err != nil { log.PrintE("can't copy /etc/resolv.conf", err) return 254 } } /* * /etc/hosts: three modes: * 'host' - bind-mount hosts's file * 'stage0' - bind mount the file created by stage1 * 'default' - create a stub /etc/hosts if needed */ switch p.EtcHostsMode { case "host": effectiveMounts = append(effectiveMounts, flyMount{"/etc/hosts", rfs, "/etc/hosts", "none", syscall.MS_BIND | syscall.MS_RDONLY}) case "stage0": effectiveMounts = append(effectiveMounts, flyMount{ filepath.Join(common.Stage1RootfsPath(p.Root), "etc", "rkt-hosts"), rfs, "/etc/hosts", "none", syscall.MS_BIND | syscall.MS_RDONLY}) case "default": stage2HostsPath := filepath.Join(common.AppRootfsPath(p.Root, ra.Name), "etc", "hosts") if _, err := os.Stat(stage2HostsPath); err != nil && os.IsNotExist(err) { fallbackHosts := []byte("127.0.0.1 localhost localdomain\n") ioutil.WriteFile(stage2HostsPath, fallbackHosts, 0644) } } for _, mount := range effectiveMounts { diag.Printf("Processing %+v", mount) var ( err error hostPathInfo os.FileInfo targetPathInfo os.FileInfo ) if strings.HasPrefix(mount.HostPath, "/") { if hostPathInfo, err = os.Stat(mount.HostPath); err != nil { log.PrintE(fmt.Sprintf("stat of host path %s", mount.HostPath), err) return 254 } } else { hostPathInfo = nil } absTargetPath := filepath.Join(mount.TargetPrefixPath, mount.RelTargetPath) if targetPathInfo, err = os.Stat(absTargetPath); err != nil && !os.IsNotExist(err) { log.PrintE(fmt.Sprintf("stat of target path %s", absTargetPath), err) return 254 } switch { case (mount.Flags & syscall.MS_REMOUNT) != 0: { diag.Printf("don't attempt to create files for remount of %q", absTargetPath) } case targetPathInfo == nil: absTargetPathParent, _ := filepath.Split(absTargetPath) if err := os.MkdirAll(absTargetPathParent, 0755); err != nil { log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err) return 254 } switch { case hostPathInfo == nil || hostPathInfo.IsDir(): if err := os.Mkdir(absTargetPath, 0755); err != nil { log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err) return 254 } case !hostPathInfo.IsDir(): file, err := os.OpenFile(absTargetPath, os.O_CREATE, 0700) if err != nil { log.PrintE(fmt.Sprintf("can't create file %q", absTargetPath), err) return 254 } file.Close() } case hostPathInfo != nil: switch { case hostPathInfo.IsDir() && !targetPathInfo.IsDir(): log.Printf("can't mount because %q is a directory while %q is not", mount.HostPath, absTargetPath) return 254 case !hostPathInfo.IsDir() && targetPathInfo.IsDir(): log.Printf("can't mount because %q is not a directory while %q is", mount.HostPath, absTargetPath) return 254 } } if err := syscall.Mount(mount.HostPath, absTargetPath, mount.Fs, mount.Flags, ""); err != nil { log.PrintE(fmt.Sprintf("can't mount %q on %q with flags %v", mount.HostPath, absTargetPath, mount.Flags), err) return 254 } } if err = stage1common.WritePid(os.Getpid(), "pid"); err != nil { log.Error(err) return 254 } var uidResolver, gidResolver user.Resolver var uid, gid int uidResolver, err = user.NumericIDs(ra.App.User) if err != nil { uidResolver, err = user.IDsFromStat(rfs, ra.App.User, nil) } if err != nil { // give up log.PrintE(fmt.Sprintf("invalid user %q", ra.App.User), err) return 254 } if uid, _, err = uidResolver.IDs(); err != nil { log.PrintE(fmt.Sprintf("failed to configure user %q", ra.App.User), err) return 254 } gidResolver, err = user.NumericIDs(ra.App.Group) if err != nil { gidResolver, err = user.IDsFromStat(rfs, ra.App.Group, nil) } if err != nil { // give up log.PrintE(fmt.Sprintf("invalid group %q", ra.App.Group), err) return 254 } if _, gid, err = gidResolver.IDs(); err != nil { log.PrintE(fmt.Sprintf("failed to configure group %q", ra.App.Group), err) return 254 } diag.Printf("chroot to %q", rfs) if err := syscall.Chroot(rfs); err != nil { log.PrintE("can't chroot", err) return 254 } if err := os.Chdir(workDir); err != nil { log.PrintE(fmt.Sprintf("can't change to working directory %q", workDir), err) return 254 } // lock the current goroutine to its current OS thread. // This will force the subsequent syscalls to be executed in the same OS thread as Setresuid, and Setresgid, // see https://github.com/golang/go/issues/1435#issuecomment-66054163. runtime.LockOSThread() diag.Printf("setting uid %d gid %d", uid, gid) if err := syscall.Setresgid(gid, gid, gid); err != nil { log.PrintE(fmt.Sprintf("can't set gid %d", gid), err) return 254 } if err := syscall.Setresuid(uid, uid, uid); err != nil { log.PrintE(fmt.Sprintf("can't set uid %d", uid), err) return 254 } diag.Printf("execing %q in %q", args, rfs) err = stage1common.WithClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) }) if err != nil { log.PrintE(fmt.Sprintf("can't execute %q", args[0]), err) return 254 } return 0 }
func (uw *UnitWriter) AppUnit( ra *schema.RuntimeApp, binPath, privateUsers string, insecureOptions Stage1InsecureOptions, opts ...*unit.UnitOption, ) { if uw.err != nil { return } flavor, systemdVersion, err := GetFlavor(uw.p) if err != nil { uw.err = errwrap.Wrap(errors.New("unable to determine stage1 flavor"), err) return } app := ra.App appName := ra.Name imgName := uw.p.AppNameToImageName(appName) if len(app.Exec) == 0 { uw.err = fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) return } env := app.Environment env.Set("AC_APP_NAME", appName.String()) if uw.p.MetadataServiceURL != "" { env.Set("AC_METADATA_URL", uw.p.MetadataServiceURL) } envFilePath := EnvFilePath(uw.p.Root, appName) uidRange := user.NewBlankUidRange() if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { uw.err = err return } if err := common.WriteEnvFile(env, uidRange, envFilePath); err != nil { uw.err = errwrap.Wrap(errors.New("unable to write environment file for systemd"), err) return } u, g, err := parseUserGroup(uw.p, ra, uidRange) if err != nil { uw.err = err return } if err := generateSysusers(uw.p, ra, u, g, uidRange); err != nil { uw.err = errwrap.Wrap(errors.New("unable to generate sysusers"), err) return } var supplementaryGroups []string for _, g := range app.SupplementaryGIDs { supplementaryGroups = append(supplementaryGroups, strconv.Itoa(g)) } capabilitiesStr, err := getAppCapabilities(app.Isolators) if err != nil { uw.err = err return } execStart := append([]string{binPath}, app.Exec[1:]...) execStartString := quoteExec(execStart) opts = append(opts, []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", execStartString), unit.NewUnitOption("Service", "RootDirectory", common.RelAppRootfsPath(appName)), // MountFlags=shared creates a new mount namespace and (as unintuitive // as it might seem) makes sure the mount is slave+shared. unit.NewUnitOption("Service", "MountFlags", "shared"), unit.NewUnitOption("Service", "WorkingDirectory", app.WorkingDirectory), unit.NewUnitOption("Service", "EnvironmentFile", RelEnvFilePath(appName)), unit.NewUnitOption("Service", "User", strconv.Itoa(u)), unit.NewUnitOption("Service", "Group", strconv.Itoa(g)), // This helps working around a race // (https://github.com/systemd/systemd/issues/2913) that causes the // systemd unit name not getting written to the journal if the unit is // short-lived and runs as non-root. unit.NewUnitOption("Service", "SyslogIdentifier", appName.String()), }...) if len(supplementaryGroups) > 0 { opts = appendOptionsList(opts, "Service", "SupplementaryGroups", "", supplementaryGroups) } if supportsNotify(uw.p, appName.String()) { opts = append(opts, unit.NewUnitOption("Service", "Type", "notify")) } if !insecureOptions.DisableCapabilities { opts = append(opts, unit.NewUnitOption("Service", "CapabilityBoundingSet", strings.Join(capabilitiesStr, " "))) } noNewPrivileges := getAppNoNewPrivileges(app.Isolators) // Apply seccomp isolator, if any and not opt-ing out; // see https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter= if !insecureOptions.DisableSeccomp { var forceNoNewPrivileges bool unprivileged := (u != 0) opts, forceNoNewPrivileges, err = getSeccompFilter(opts, uw.p, unprivileged, app.Isolators) if err != nil { uw.err = err return } // Seccomp filters require NoNewPrivileges for unprivileged apps, that may override // manifest annotation. if forceNoNewPrivileges { noNewPrivileges = true } } opts = append(opts, unit.NewUnitOption("Service", "NoNewPrivileges", strconv.FormatBool(noNewPrivileges))) if ra.ReadOnlyRootFS { opts = append(opts, unit.NewUnitOption("Service", "ReadOnlyDirectories", common.RelAppRootfsPath(appName))) } // TODO(tmrts): Extract this logic into a utility function. vols := make(map[types.ACName]types.Volume) for _, v := range uw.p.Manifest.Volumes { vols[v.Name] = v } absRoot, err := filepath.Abs(uw.p.Root) // Absolute path to the pod's rootfs. if err != nil { uw.err = err return } appRootfs := common.AppRootfsPath(absRoot, appName) rwDirs := []string{} imageManifest := uw.p.Images[appName.String()] mounts := GenerateMounts(ra, vols, imageManifest) for _, m := range mounts { mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path) if err != nil { uw.err = err return } if !IsMountReadOnly(vols[m.Volume], app.MountPoints) { rwDirs = append(rwDirs, filepath.Join(common.RelAppRootfsPath(appName), mntPath)) } } if len(rwDirs) > 0 { opts = appendOptionsList(opts, "Service", "ReadWriteDirectories", "", rwDirs) } // Restrict access to sensitive paths (eg. procfs and sysfs entries). if !insecureOptions.DisablePaths { opts = protectKernelTunables(opts, appName, systemdVersion) } // Generate default device policy for the app, as well as the list of allowed devices. // For kvm flavor, devices are VM-specific and restricting them is not strictly needed. if !insecureOptions.DisablePaths && flavor != "kvm" { opts = append(opts, unit.NewUnitOption("Service", "DevicePolicy", "closed")) deviceAllows, err := generateDeviceAllows(common.Stage1RootfsPath(absRoot), appName, app.MountPoints, mounts, vols, uidRange) if err != nil { uw.err = err return } for _, dev := range deviceAllows { opts = append(opts, unit.NewUnitOption("Service", "DeviceAllow", dev)) } } // When an app fails, we shut down the pod opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target")) for _, eh := range app.EventHandlers { var typ string switch eh.Name { case "pre-start": typ = "ExecStartPre" case "post-stop": typ = "ExecStopPost" default: uw.err = fmt.Errorf("unrecognized eventHandler: %v", eh.Name) return } exec := quoteExec(eh.Exec) opts = append(opts, unit.NewUnitOption("Service", typ, exec)) } // Some pre-start jobs take a long time, set the timeout to 0 opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0")) var saPorts []types.Port for _, p := range app.Ports { if p.SocketActivated { saPorts = append(saPorts, p) } } doWithIsolator := func(isolator string, f func() error) bool { ok, err := cgroup.IsIsolatorSupported(isolator) if err != nil { uw.err = err return true } if !ok { fmt.Fprintf(os.Stderr, "warning: resource/%s isolator set but support disabled in the kernel, skipping\n", isolator) } if err := f(); err != nil { uw.err = err return true } return false } exit := false for _, i := range app.Isolators { if exit { return } switch v := i.Value().(type) { case *types.ResourceMemory: exit = doWithIsolator("memory", func() error { if v.Limit() == nil { return nil } opts = append(opts, unit.NewUnitOption("Service", "MemoryLimit", strconv.Itoa(int(v.Limit().Value())))) return nil }) case *types.ResourceCPU: exit = doWithIsolator("cpu", func() error { if v.Limit() == nil { return nil } if v.Limit().Value() > resource.MaxMilliValue { return fmt.Errorf("cpu limit exceeds the maximum millivalue: %v", v.Limit().String()) } quota := strconv.Itoa(int(v.Limit().MilliValue()/10)) + "%" opts = append(opts, unit.NewUnitOption("Service", "CPUQuota", quota)) return nil }) } } if len(saPorts) > 0 { sockopts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Socket", "BindIPv6Only", "both"), unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)), } for _, sap := range saPorts { var proto string switch sap.Protocol { case "tcp": proto = "ListenStream" case "udp": proto = "ListenDatagram" default: uw.err = fmt.Errorf("unrecognized protocol: %v", sap.Protocol) return } // We find the host port for the pod's port and use that in the // socket unit file. // This is so because systemd inside the pod will match based on // the socket port number, and since the socket was created on the // host, it will have the host port number. port := findHostPort(*uw.p.Manifest, sap.Name) if port == 0 { log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port) port = sap.Port } sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port))) } file, err := os.OpenFile(SocketUnitPath(uw.p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { uw.err = errwrap.Wrap(errors.New("failed to create socket file"), err) return } defer file.Close() if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil { uw.err = errwrap.Wrap(errors.New("failed to write socket unit file"), err) return } if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(uw.p.Root, appName)); err != nil { uw.err = errwrap.Wrap(errors.New("failed to link socket want"), err) return } opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName))) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "Requires", "sysusers.service")) opts = append(opts, unit.NewUnitOption("Unit", "After", "sysusers.service")) uw.WriteUnit(ServiceUnitPath(uw.p.Root, appName), "failed to create service unit file", opts...) uw.Activate(ServiceUnitName(appName), ServiceWantPath(uw.p.Root, appName)) }
// TODO(iaguis): RmConfig? func RmApp(dir string, uuid *types.UUID, usesOverlay bool, appName *types.ACName, podPID int) error { p, err := stage1types.LoadPod(dir, uuid) if err != nil { return errwrap.Wrap(errors.New("error loading pod manifest"), err) } pm := p.Manifest var mutable bool ms, ok := pm.Annotations.Get("coreos.com/rkt/stage1/mutable") if ok { mutable, err = strconv.ParseBool(ms) if err != nil { return errwrap.Wrap(errors.New("error parsing mutable annotation"), err) } } if !mutable { return errors.New("immutable pod: cannot remove application") } app := pm.Apps.Get(*appName) if app == nil { return fmt.Errorf("error: nonexistent app %q", *appName) } treeStoreID, err := ioutil.ReadFile(common.AppTreeStoreIDPath(dir, *appName)) if err != nil { return err } eep, err := getStage1Entrypoint(dir, enterEntrypoint) if err != nil { return errwrap.Wrap(errors.New("error determining 'enter' entrypoint"), err) } args := []string{ uuid.String(), appName.String(), filepath.Join(common.Stage1RootfsPath(dir), eep), strconv.Itoa(podPID), } if err := callEntrypoint(dir, appStopEntrypoint, args); err != nil { return err } if err := callEntrypoint(dir, appRmEntrypoint, args); err != nil { return err } appInfoDir := common.AppInfoPath(dir, *appName) if err := os.RemoveAll(appInfoDir); err != nil { return errwrap.Wrap(errors.New("error removing app info directory"), err) } if usesOverlay { appRootfs := common.AppRootfsPath(dir, *appName) if err := syscall.Unmount(appRootfs, 0); err != nil { return err } ts := filepath.Join(dir, "overlay", string(treeStoreID)) if err := os.RemoveAll(ts); err != nil { return errwrap.Wrap(errors.New("error removing app info directory"), err) } } if err := os.RemoveAll(common.AppPath(dir, *appName)); err != nil { return err } appStatusPath := filepath.Join(common.Stage1RootfsPath(dir), "rkt", "status", appName.String()) if err := os.Remove(appStatusPath); err != nil && !os.IsNotExist(err) { return err } envPath := filepath.Join(common.Stage1RootfsPath(dir), "rkt", "env", appName.String()) if err := os.Remove(envPath); err != nil && !os.IsNotExist(err) { return err } removeAppFromPodManifest(pm, appName) if err := updatePodManifest(dir, pm); err != nil { return err } return nil }
// appToNspawnArgs transforms the given app manifest, with the given associated // app name, into a subset of applicable systemd-nspawn argument func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp, insecureOptions Stage1InsecureOptions) ([]string, error) { var args []string appName := ra.Name app := ra.App sharedVolPath := common.SharedVolumesPath(p.Root) if err := os.MkdirAll(sharedVolPath, SharedVolPerm); err != nil { return nil, errwrap.Wrap(errors.New("could not create shared volumes directory"), err) } if err := os.Chmod(sharedVolPath, SharedVolPerm); err != nil { return nil, errwrap.Wrap(fmt.Errorf("could not change permissions of %q", sharedVolPath), err) } vols := make(map[types.ACName]types.Volume) for _, v := range p.Manifest.Volumes { vols[v.Name] = v } imageManifest := p.Images[appName.String()] mounts, err := GenerateMounts(ra, p.Manifest.Volumes, ConvertedFromDocker(imageManifest)) if err != nil { return nil, errwrap.Wrap(fmt.Errorf("could not generate app %q mounts", appName), err) } for _, m := range mounts { shPath := filepath.Join(sharedVolPath, m.Volume.Name.String()) absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs. if err != nil { return nil, errwrap.Wrap(errors.New("could not get pod's root absolute path"), err) } appRootfs := common.AppRootfsPath(absRoot, appName) // TODO(yifan): This is a temporary fix for systemd-nspawn not handling symlink mounts well. // Could be removed when https://github.com/systemd/systemd/issues/2860 is resolved, and systemd // version is bumped. mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Mount.Path) if err != nil { return nil, errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Mount.Path), err) } mntAbsPath := filepath.Join(appRootfs, mntPath) if err := PrepareMountpoints(shPath, mntAbsPath, &m.Volume, m.DockerImplicit); err != nil { return nil, err } opt := make([]string, 6) if m.ReadOnly { opt[0] = "--bind-ro=" } else { opt[0] = "--bind=" } switch m.Volume.Kind { case "host": opt[1] = m.Volume.Source case "empty": opt[1] = filepath.Join(common.SharedVolumesPath(absRoot), m.Volume.Name.String()) default: return nil, fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, m.Volume.Kind) } opt[2] = ":" opt[3] = filepath.Join(common.RelAppRootfsPath(appName), mntPath) opt[4] = ":" // If Recursive is not set, default to recursive. recursive := true if m.Volume.Recursive != nil { recursive = *m.Volume.Recursive } // rbind/norbind options exist since systemd-nspawn v226 if recursive { opt[5] = "rbind" } else { opt[5] = "norbind" } args = append(args, strings.Join(opt, "")) } if !insecureOptions.DisableCapabilities { capabilitiesStr, err := getAppCapabilities(app.Isolators) if err != nil { return nil, err } capList := strings.Join(capabilitiesStr, ",") args = append(args, "--capability="+capList) } return args, nil }
func RmApp(cfg RmConfig) error { pod, err := pkgPod.PodFromUUIDString(cfg.DataDir, cfg.UUID.String()) if err != nil { return errwrap.Wrap(errors.New("error loading pod"), err) } defer pod.Close() debug("locking sandbox manifest") if err := pod.ExclusiveLockManifest(); err != nil { return errwrap.Wrap(errors.New("failed to lock sandbox manifest"), err) } defer pod.UnlockManifest() pm, err := pod.SandboxManifest() if err != nil { return errwrap.Wrap(errors.New("cannot remove application, sandbox validation failed"), err) } app := pm.Apps.Get(*cfg.AppName) if app == nil { return fmt.Errorf("error: nonexistent app %q", *cfg.AppName) } if cfg.PodPID > 0 { // Call app-stop and app-rm entrypoint only if the pod is still running. // Otherwise, there's not much we can do about it except unmounting/removing // the file system. args := []string{ fmt.Sprintf("--debug=%t", cfg.Debug), fmt.Sprintf("--app=%s", cfg.AppName), } ce := CrossingEntrypoint{ PodPath: cfg.PodPath, PodPID: cfg.PodPID, AppName: cfg.AppName.String(), EntrypointName: appStopEntrypoint, EntrypointArgs: args, Interactive: false, } if err := ce.Run(); err != nil { status, err := common.GetExitStatus(err) // ignore nonexistent units failing to stop. Exit status 5 // comes from systemctl and means the unit doesn't exist if err != nil { return err } else if status != 5 { return fmt.Errorf("exit status %d", status) } } ce.EntrypointName = appRmEntrypoint if err := ce.Run(); err != nil { return err } } if cfg.UsesOverlay { treeStoreID, err := ioutil.ReadFile(common.AppTreeStoreIDPath(cfg.PodPath, *cfg.AppName)) if err != nil { return err } appRootfs := common.AppRootfsPath(cfg.PodPath, *cfg.AppName) if err := syscall.Unmount(appRootfs, 0); err != nil { return err } ts := filepath.Join(cfg.PodPath, "overlay", string(treeStoreID)) if err := os.RemoveAll(ts); err != nil { return errwrap.Wrap(errors.New("error removing app info directory"), err) } } appInfoDir := common.AppInfoPath(cfg.PodPath, *cfg.AppName) if err := os.RemoveAll(appInfoDir); err != nil { return errwrap.Wrap(errors.New("error removing app info directory"), err) } if err := os.RemoveAll(common.AppPath(cfg.PodPath, *cfg.AppName)); err != nil { return err } appStatusPath := filepath.Join(common.Stage1RootfsPath(cfg.PodPath), "rkt", "status", cfg.AppName.String()) if err := os.Remove(appStatusPath); err != nil && !os.IsNotExist(err) { return err } envPath := filepath.Join(common.Stage1RootfsPath(cfg.PodPath), "rkt", "env", cfg.AppName.String()) if err := os.Remove(envPath); err != nil && !os.IsNotExist(err) { return err } for i, app := range pm.Apps { if app.Name == *cfg.AppName { pm.Apps = append(pm.Apps[:i], pm.Apps[i+1:]...) break } } return pod.UpdateManifest(pm, cfg.PodPath) }
// appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error { app := ra.App appName := ra.Name imgName := p.AppNameToImageName(appName) if len(app.Exec) == 0 { return fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) } workDir := "/" if app.WorkingDirectory != "" { workDir = app.WorkingDirectory } env := app.Environment env.Set("AC_APP_NAME", appName.String()) if p.MetadataServiceURL != "" { env.Set("AC_METADATA_URL", p.MetadataServiceURL) } envFilePath := EnvFilePath(p.Root, appName) uidRange := user.NewBlankUidRange() if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { return err } if err := writeEnvFile(p, env, appName, uidRange, '\n', envFilePath); err != nil { return errwrap.Wrap(errors.New("unable to write environment file for systemd"), err) } u, g, err := parseUserGroup(p, ra, uidRange) if err != nil { return err } if err := generateSysusers(p, ra, u, g, uidRange); err != nil { return errwrap.Wrap(errors.New("unable to generate sysusers"), err) } binPath, err := findBinPath(p, appName, *app, workDir, app.Exec[0]) if err != nil { return err } var supplementaryGroups []string for _, g := range app.SupplementaryGIDs { supplementaryGroups = append(supplementaryGroups, strconv.Itoa(g)) } capabilitiesStr, err := getAppCapabilities(app.Isolators) if err != nil { return err } noNewPrivileges := getAppNoNewPrivileges(app.Isolators) execStart := append([]string{binPath}, app.Exec[1:]...) execStartString := quoteExec(execStart) opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", execStartString), unit.NewUnitOption("Service", "RootDirectory", common.RelAppRootfsPath(appName)), // MountFlags=shared creates a new mount namespace and (as unintuitive // as it might seem) makes sure the mount is slave+shared. unit.NewUnitOption("Service", "MountFlags", "shared"), unit.NewUnitOption("Service", "WorkingDirectory", workDir), unit.NewUnitOption("Service", "EnvironmentFile", RelEnvFilePath(appName)), unit.NewUnitOption("Service", "User", strconv.Itoa(u)), unit.NewUnitOption("Service", "Group", strconv.Itoa(g)), unit.NewUnitOption("Service", "SupplementaryGroups", strings.Join(supplementaryGroups, " ")), unit.NewUnitOption("Service", "CapabilityBoundingSet", strings.Join(capabilitiesStr, " ")), unit.NewUnitOption("Service", "NoNewPrivileges", strconv.FormatBool(noNewPrivileges)), // This helps working around a race // (https://github.com/systemd/systemd/issues/2913) that causes the // systemd unit name not getting written to the journal if the unit is // short-lived and runs as non-root. unit.NewUnitOption("Service", "SyslogIdentifier", appName.String()), } // Restrict access to sensitive paths (eg. procfs) opts = protectSystemFiles(opts, appName) if ra.ReadOnlyRootFS { opts = append(opts, unit.NewUnitOption("Service", "ReadOnlyDirectories", common.RelAppRootfsPath(appName))) } // TODO(tmrts): Extract this logic into a utility function. vols := make(map[types.ACName]types.Volume) for _, v := range p.Manifest.Volumes { vols[v.Name] = v } absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs. if err != nil { return err } appRootfs := common.AppRootfsPath(absRoot, appName) rwDirs := []string{} imageManifest := p.Images[appName.String()] for _, m := range GenerateMounts(ra, vols, imageManifest) { mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path) if err != nil { return err } if !IsMountReadOnly(vols[m.Volume], app.MountPoints) { rwDirs = append(rwDirs, filepath.Join(common.RelAppRootfsPath(appName), mntPath)) } } opts = append(opts, unit.NewUnitOption("Service", "ReadWriteDirectories", strings.Join(rwDirs, " "))) if interactive { opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty")) } else { opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console")) } // When an app fails, we shut down the pod opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target")) for _, eh := range app.EventHandlers { var typ string switch eh.Name { case "pre-start": typ = "ExecStartPre" case "post-stop": typ = "ExecStopPost" default: return fmt.Errorf("unrecognized eventHandler: %v", eh.Name) } exec := quoteExec(eh.Exec) opts = append(opts, unit.NewUnitOption("Service", typ, exec)) } // Some pre-start jobs take a long time, set the timeout to 0 opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0")) var saPorts []types.Port for _, p := range app.Ports { if p.SocketActivated { saPorts = append(saPorts, p) } } for _, i := range app.Isolators { switch v := i.Value().(type) { case *types.ResourceMemory: opts, err = cgroup.MaybeAddIsolator(opts, "memory", v.Limit()) if err != nil { return err } case *types.ResourceCPU: opts, err = cgroup.MaybeAddIsolator(opts, "cpu", v.Limit()) if err != nil { return err } } } if len(saPorts) > 0 { sockopts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Socket", "BindIPv6Only", "both"), unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)), } for _, sap := range saPorts { var proto string switch sap.Protocol { case "tcp": proto = "ListenStream" case "udp": proto = "ListenDatagram" default: return fmt.Errorf("unrecognized protocol: %v", sap.Protocol) } // We find the host port for the pod's port and use that in the // socket unit file. // This is so because systemd inside the pod will match based on // the socket port number, and since the socket was created on the // host, it will have the host port number. port := findHostPort(*p.Manifest, sap.Name) if port == 0 { log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port) port = sap.Port } sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port))) } file, err := os.OpenFile(SocketUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create socket file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil { return errwrap.Wrap(errors.New("failed to write socket unit file"), err) } if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link socket want"), err) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName))) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "Requires", "sysusers.service")) opts = append(opts, unit.NewUnitOption("Unit", "After", "sysusers.service")) file, err := os.OpenFile(ServiceUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create service unit file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { return errwrap.Wrap(errors.New("failed to write service unit file"), err) } if err = os.Symlink(path.Join("..", ServiceUnitName(appName)), ServiceWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link service want"), err) } if err = writeAppReaper(p, appName.String(), common.RelAppRootfsPath(appName), binPath); err != nil { return errwrap.Wrap(fmt.Errorf("failed to write app %q reaper service", appName), err) } return nil }
func RmApp(cfg RmConfig) error { pod, err := pkgPod.PodFromUUIDString(cfg.DataDir, cfg.UUID.String()) if err != nil { return errwrap.Wrap(errors.New("error loading pod"), err) } defer pod.Close() debug("locking pod manifest") if err := pod.ExclusiveManifestLock(); err != nil { return errwrap.Wrap(errors.New("failed to lock pod manifest"), err) } defer pod.ManifestUnlock() _, pm, err := pod.PodManifest() if err != nil { return errwrap.Wrap(errors.New("error loading pod manifest"), err) } var mutable bool ms, ok := pm.Annotations.Get("coreos.com/rkt/stage1/mutable") if ok { mutable, err = strconv.ParseBool(ms) if err != nil { return errwrap.Wrap(errors.New("error parsing mutable annotation"), err) } } if !mutable { return errors.New("immutable pod: cannot remove application") } app := pm.Apps.Get(*cfg.AppName) if app == nil { return fmt.Errorf("error: nonexistent app %q", *cfg.AppName) } if cfg.PodPID > 0 { // Call app-stop and app-rm entrypoint only if the pod is still running. // Otherwise, there's not much we can do about it except unmounting/removing // the file system. args := []string{ fmt.Sprintf("--app=%s", cfg.AppName), } ce := CrossingEntrypoint{ PodPath: cfg.PodPath, PodPID: cfg.PodPID, AppName: cfg.AppName.String(), EntrypointName: appStopEntrypoint, EntrypointArgs: args, Interactive: false, } if err := ce.Run(); err != nil { status, err := common.GetExitStatus(err) // ignore nonexistent units failing to stop. Exit status 5 // comes from systemctl and means the unit doesn't exist if err != nil { return err } else if status != 5 { return fmt.Errorf("exit status %d", status) } } ce.EntrypointName = appRmEntrypoint if err := ce.Run(); err != nil { return err } } if cfg.UsesOverlay { treeStoreID, err := ioutil.ReadFile(common.AppTreeStoreIDPath(cfg.PodPath, *cfg.AppName)) if err != nil { return err } appRootfs := common.AppRootfsPath(cfg.PodPath, *cfg.AppName) if err := syscall.Unmount(appRootfs, 0); err != nil { return err } ts := filepath.Join(cfg.PodPath, "overlay", string(treeStoreID)) if err := os.RemoveAll(ts); err != nil { return errwrap.Wrap(errors.New("error removing app info directory"), err) } } appInfoDir := common.AppInfoPath(cfg.PodPath, *cfg.AppName) if err := os.RemoveAll(appInfoDir); err != nil { return errwrap.Wrap(errors.New("error removing app info directory"), err) } if err := os.RemoveAll(common.AppPath(cfg.PodPath, *cfg.AppName)); err != nil { return err } appStatusPath := filepath.Join(common.Stage1RootfsPath(cfg.PodPath), "rkt", "status", cfg.AppName.String()) if err := os.Remove(appStatusPath); err != nil && !os.IsNotExist(err) { return err } envPath := filepath.Join(common.Stage1RootfsPath(cfg.PodPath), "rkt", "env", cfg.AppName.String()) if err := os.Remove(envPath); err != nil && !os.IsNotExist(err) { return err } removeAppFromPodManifest(pm, cfg.AppName) if err := updatePodManifest(cfg.PodPath, pm); err != nil { return err } return nil }
func runExport(cmd *cobra.Command, args []string) (exit int) { if len(args) != 2 { cmd.Usage() return 254 } outACI := args[1] ext := filepath.Ext(outACI) if ext != schema.ACIExtension { stderr.Printf("extension must be %s (given %s)", schema.ACIExtension, outACI) return 254 } p, err := pkgPod.PodFromUUIDString(getDataDir(), args[0]) if err != nil { stderr.PrintE("problem retrieving pod", err) return 254 } defer p.Close() state := p.State() if state != pkgPod.Exited && state != pkgPod.ExitedGarbage { stderr.Print("pod is not exited. Only exited pods can be exported") return 254 } app, err := getApp(p) if err != nil { stderr.PrintE("unable to find app", err) return 254 } root := common.AppPath(p.Path(), app.Name) manifestPath := filepath.Join(common.AppInfoPath(p.Path(), app.Name), aci.ManifestFile) if p.UsesOverlay() { tmpDir := filepath.Join(getDataDir(), "tmp") if err := os.MkdirAll(tmpDir, common.DefaultRegularDirPerm); err != nil { stderr.PrintE("unable to create temp directory", err) return 254 } podDir, err := ioutil.TempDir(tmpDir, fmt.Sprintf("rkt-export-%s", p.UUID)) if err != nil { stderr.PrintE("unable to create export temp directory", err) return 254 } defer func() { if err := os.RemoveAll(podDir); err != nil { stderr.PrintE("problem removing temp directory", err) exit = 1 } }() mntDir := filepath.Join(podDir, "rootfs") if err := os.Mkdir(mntDir, common.DefaultRegularDirPerm); err != nil { stderr.PrintE("unable to create rootfs directory inside temp directory", err) return 254 } if err := mountOverlay(p, app, mntDir); err != nil { stderr.PrintE(fmt.Sprintf("couldn't mount directory at %s", mntDir), err) return 254 } defer func() { if err := syscall.Unmount(mntDir, 0); err != nil { stderr.PrintE(fmt.Sprintf("error unmounting directory %s", mntDir), err) exit = 1 } }() root = podDir } else { // trailing filepath separator so we don't match the appRootfs path appRootfs := common.AppRootfsPath(p.Path(), app.Name) + string(filepath.Separator) mnts, err := mountinfo.ParseMounts(0) if err != nil { stderr.PrintE("error parsing mountpoints", err) return 254 } mnts = mnts.Filter(mountinfo.HasPrefix(appRootfs)) if len(mnts) > 0 { stderr.Printf("pod has remaining mountpoints. Only pods using overlayfs or with no mountpoints can be exported") return 254 } } // Check for user namespace (--private-user), if in use get uidRange var uidRange *user.UidRange privUserFile := filepath.Join(p.Path(), common.PrivateUsersPreparedFilename) privUserContent, err := ioutil.ReadFile(privUserFile) if err == nil { uidRange = user.NewBlankUidRange() // The file was found, save uid & gid shift and count if err := uidRange.Deserialize(privUserContent); err != nil { stderr.PrintE(fmt.Sprintf("problem deserializing the content of %s", common.PrivateUsersPreparedFilename), err) return 254 } } if err = buildAci(root, manifestPath, outACI, uidRange); err != nil { stderr.PrintE("error building aci", err) return 254 } return 0 }
// appToNspawnArgs transforms the given app manifest, with the given associated // app name, into a subset of applicable systemd-nspawn argument func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp) ([]string, error) { var args []string appName := ra.Name app := ra.App sharedVolPath, err := common.CreateSharedVolumesPath(p.Root) if err != nil { return nil, err } vols := make(map[types.ACName]types.Volume) for _, v := range p.Manifest.Volumes { vols[v.Name] = v } imageManifest := p.Images[appName.String()] mounts, err := GenerateMounts(ra, p.Manifest.Volumes, ConvertedFromDocker(imageManifest)) if err != nil { return nil, errwrap.Wrap(fmt.Errorf("could not generate app %q mounts", appName), err) } for _, m := range mounts { shPath := filepath.Join(sharedVolPath, m.Volume.Name.String()) absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs. if err != nil { return nil, errwrap.Wrap(errors.New("could not get pod's root absolute path"), err) } appRootfs := common.AppRootfsPath(absRoot, appName) // Evaluate symlinks within the app's rootfs. This is needed because symlinks // within the container can be absolute, which will, of course, be wrong in our ns. // Systemd also gets this wrong, see https://github.com/systemd/systemd/issues/2860 // When the above issue is fixed, we can pass the un-evaluated path to --bind instead. mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Mount.Path) if err != nil { return nil, errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Mount.Path), err) } mntAbsPath := filepath.Join(appRootfs, mntPath) if err := PrepareMountpoints(shPath, mntAbsPath, &m.Volume, m.DockerImplicit); err != nil { return nil, err } opt := make([]string, 6) if m.ReadOnly { opt[0] = "--bind-ro=" } else { opt[0] = "--bind=" } opt[1] = m.Source(absRoot) opt[2] = ":" opt[3] = filepath.Join(common.RelAppRootfsPath(appName), mntPath) opt[4] = ":" // If Recursive is not set, default to recursive. recursive := true if m.Volume.Recursive != nil { recursive = *m.Volume.Recursive } // rbind/norbind options exist since systemd-nspawn v226 if recursive { opt[5] = "rbind" } else { opt[5] = "norbind" } args = append(args, strings.Join(opt, "")) } if !p.InsecureOptions.DisableCapabilities { capabilitiesStr, err := getAppCapabilities(app.Isolators) if err != nil { return nil, err } capList := strings.Join(capabilitiesStr, ",") args = append(args, "--capability="+capList) } return args, nil }
// appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error { app := ra.App appName := ra.Name imgName := p.AppNameToImageName(appName) if len(app.Exec) == 0 { return fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) } workDir := "/" if app.WorkingDirectory != "" { workDir = app.WorkingDirectory } env := app.Environment env.Set("AC_APP_NAME", appName.String()) if p.MetadataServiceURL != "" { env.Set("AC_METADATA_URL", p.MetadataServiceURL) } if err := writeEnvFile(p, env, appName, privateUsers); err != nil { return errwrap.Wrap(errors.New("unable to write environment file"), err) } var _uid, gid int var err error uidRange := uid.NewBlankUidRange() if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { return errwrap.Wrap(errors.New("unable to deserialize uid range"), err) } if strings.HasPrefix(app.User, "/") { var stat syscall.Stat_t if err = syscall.Lstat(filepath.Join(common.AppRootfsPath(p.Root, appName), app.User), &stat); err != nil { return errwrap.Wrap(fmt.Errorf("unable to get uid from file %q", app.User), err) } uidReal, _, err := uidRange.UnshiftRange(stat.Uid, 0) if err != nil { return errwrap.Wrap(errors.New("unable to determine real uid"), err) } _uid = int(uidReal) } else { _uid, err = strconv.Atoi(app.User) if err != nil { _uid, err = passwd.LookupUidFromFile(app.User, filepath.Join(common.AppRootfsPath(p.Root, appName), "etc/passwd")) if err != nil { return errwrap.Wrap(fmt.Errorf("cannot lookup user %q", app.User), err) } } } if strings.HasPrefix(app.Group, "/") { var stat syscall.Stat_t if err = syscall.Lstat(filepath.Join(common.AppRootfsPath(p.Root, appName), app.Group), &stat); err != nil { return errwrap.Wrap(fmt.Errorf("unable to get gid from file %q", app.Group), err) } _, gidReal, err := uidRange.UnshiftRange(0, stat.Gid) if err != nil { return errwrap.Wrap(errors.New("unable to determine real gid"), err) } gid = int(gidReal) } else { gid, err = strconv.Atoi(app.Group) if err != nil { gid, err = group.LookupGidFromFile(app.Group, filepath.Join(common.AppRootfsPath(p.Root, appName), "etc/group")) if err != nil { return errwrap.Wrap(fmt.Errorf("cannot lookup group %q", app.Group), err) } } } execWrap := []string{"/appexec", common.RelAppRootfsPath(appName), workDir, RelEnvFilePath(appName), strconv.Itoa(_uid), generateGidArg(gid, app.SupplementaryGIDs), "--"} execStart := quoteExec(append(execWrap, app.Exec...)) opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", execStart), unit.NewUnitOption("Service", "User", "0"), unit.NewUnitOption("Service", "Group", "0"), } if interactive { opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty")) } else { opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console")) opts = append(opts, unit.NewUnitOption("Service", "SyslogIdentifier", filepath.Base(app.Exec[0]))) } // When an app fails, we shut down the pod opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target")) for _, eh := range app.EventHandlers { var typ string switch eh.Name { case "pre-start": typ = "ExecStartPre" case "post-stop": typ = "ExecStopPost" default: return fmt.Errorf("unrecognized eventHandler: %v", eh.Name) } exec := quoteExec(append(execWrap, eh.Exec...)) opts = append(opts, unit.NewUnitOption("Service", typ, exec)) } // Some pre-start jobs take a long time, set the timeout to 0 opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0")) var saPorts []types.Port for _, p := range app.Ports { if p.SocketActivated { saPorts = append(saPorts, p) } } for _, i := range app.Isolators { switch v := i.Value().(type) { case *types.ResourceMemory: opts, err = cgroup.MaybeAddIsolator(opts, "memory", v.Limit()) if err != nil { return err } case *types.ResourceCPU: opts, err = cgroup.MaybeAddIsolator(opts, "cpu", v.Limit()) if err != nil { return err } } } if len(saPorts) > 0 { sockopts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Socket", "BindIPv6Only", "both"), unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)), } for _, sap := range saPorts { var proto string switch sap.Protocol { case "tcp": proto = "ListenStream" case "udp": proto = "ListenDatagram" default: return fmt.Errorf("unrecognized protocol: %v", sap.Protocol) } // We find the host port for the pod's port and use that in the // socket unit file. // This is so because systemd inside the pod will match based on // the socket port number, and since the socket was created on the // host, it will have the host port number. port := findHostPort(*p.Manifest, sap.Name) if port == 0 { log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port) port = sap.Port } sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port))) } file, err := os.OpenFile(SocketUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create socket file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil { return errwrap.Wrap(errors.New("failed to write socket unit file"), err) } if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link socket want"), err) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName))) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName))) file, err := os.OpenFile(ServiceUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create service unit file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { return errwrap.Wrap(errors.New("failed to write service unit file"), err) } if err = os.Symlink(path.Join("..", ServiceUnitName(appName)), ServiceWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link service want"), err) } if flavor == "kvm" { // bind mount all shared volumes from /mnt/volumeName (we don't use mechanism for bind-mounting given by nspawn) err := AppToSystemdMountUnits(common.Stage1RootfsPath(p.Root), appName, p.Manifest.Volumes, ra, UnitsDir) if err != nil { return errwrap.Wrap(errors.New("failed to prepare mount units"), err) } } if err = writeAppReaper(p, appName.String()); err != nil { return errwrap.Wrap(fmt.Errorf("failed to write app %q reaper service", appName), err) } return nil }
// appToNspawnArgs transforms the given app manifest, with the given associated // app name, into a subset of applicable systemd-nspawn argument func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp) ([]string, error) { var args []string appName := ra.Name app := ra.App sharedVolPath := common.SharedVolumesPath(p.Root) if err := os.MkdirAll(sharedVolPath, SharedVolPerm); err != nil { return nil, errwrap.Wrap(errors.New("could not create shared volumes directory"), err) } if err := os.Chmod(sharedVolPath, SharedVolPerm); err != nil { return nil, errwrap.Wrap(fmt.Errorf("could not change permissions of %q", sharedVolPath), err) } vols := make(map[types.ACName]types.Volume) for _, v := range p.Manifest.Volumes { vols[v.Name] = v } imageManifest := p.Images[appName.String()] mounts := GenerateMounts(ra, vols, imageManifest) for _, m := range mounts { vol := vols[m.Volume] shPath := filepath.Join(sharedVolPath, vol.Name.String()) absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs. if err != nil { return nil, errwrap.Wrap(errors.New("could not get pod's root absolute path"), err) } appRootfs := common.AppRootfsPath(absRoot, appName) // TODO(yifan): This is a temporary fix for systemd-nspawn not handling symlink mounts well. // Could be removed when https://github.com/systemd/systemd/issues/2860 is resolved, and systemd // version is bumped. mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path) if err != nil { return nil, errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Path), err) } mntAbsPath := filepath.Join(appRootfs, mntPath) if err := PrepareMountpoints(shPath, mntAbsPath, &vol, m.DockerImplicit); err != nil { return nil, err } opt := make([]string, 4) if IsMountReadOnly(vol, app.MountPoints) { opt[0] = "--bind-ro=" } else { opt[0] = "--bind=" } switch vol.Kind { case "host": opt[1] = vol.Source case "empty": opt[1] = filepath.Join(common.SharedVolumesPath(absRoot), vol.Name.String()) default: return nil, fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, vol.Kind) } opt[2] = ":" opt[3] = filepath.Join(common.RelAppRootfsPath(appName), mntPath) args = append(args, strings.Join(opt, "")) } capabilitiesStr, err := getAppCapabilities(app.Isolators) if err != nil { return nil, err } capList := strings.Join(capabilitiesStr, ",") args = append(args, "--capability="+capList) return args, nil }
func mountSharedVolumes(root string, p *stage1commontypes.Pod, ra *schema.RuntimeApp) error { app := ra.App appName := ra.Name volumes := p.Manifest.Volumes vols := make(map[types.ACName]types.Volume) for _, v := range volumes { vols[v.Name] = v } sharedVolPath := common.SharedVolumesPath(root) if err := os.MkdirAll(sharedVolPath, stage1initcommon.SharedVolPerm); err != nil { return errwrap.Wrap(errors.New("could not create shared volumes directory"), err) } if err := os.Chmod(sharedVolPath, stage1initcommon.SharedVolPerm); err != nil { return errwrap.Wrap(fmt.Errorf("could not change permissions of %q", sharedVolPath), err) } imageManifest := p.Images[appName.String()] mounts := stage1initcommon.GenerateMounts(ra, vols, imageManifest) for _, m := range mounts { vol := vols[m.Volume] if vol.Kind == "empty" { p := filepath.Join(sharedVolPath, vol.Name.String()) if err := os.MkdirAll(p, stage1initcommon.SharedVolPerm); err != nil { return errwrap.Wrap(fmt.Errorf("could not create shared volume %q", vol.Name), err) } if err := os.Chown(p, *vol.UID, *vol.GID); err != nil { return errwrap.Wrap(fmt.Errorf("could not change owner of %q", p), err) } mod, err := strconv.ParseUint(*vol.Mode, 8, 32) if err != nil { return errwrap.Wrap(fmt.Errorf("invalid mode %q for volume %q", *vol.Mode, vol.Name), err) } if err := os.Chmod(p, os.FileMode(mod)); err != nil { return errwrap.Wrap(fmt.Errorf("could not change permissions of %q", p), err) } } readOnly := stage1initcommon.IsMountReadOnly(vol, app.MountPoints) var source string switch vol.Kind { case "host": source = vol.Source case "empty": source = filepath.Join(common.SharedVolumesPath(root), vol.Name.String()) default: return fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, vol.Kind) } absAppRootfs, err := filepath.Abs(common.AppRootfsPath(root, appName)) if err != nil { return fmt.Errorf(`could not evaluate absolute path for application rootfs in app: %v`, appName) } absDestination, err := filepath.Abs(filepath.Join(absAppRootfs, m.Path)) if err != nil { return fmt.Errorf(`could not evaluate absolute path for application volume path %q in: %v`, m.Path, appName) } if !strings.HasPrefix(absDestination, absAppRootfs) { return fmt.Errorf("path escapes app's root: %v", absDestination) } if cleanedSource, err := filepath.EvalSymlinks(source); err != nil { return errwrap.Wrap(fmt.Errorf("could not resolve symlink for source: %v", source), err) } else if err := ensureDestinationExists(cleanedSource, absDestination); err != nil { return errwrap.Wrap(fmt.Errorf("could not create destination mount point: %v", absDestination), err) } else if err := doBindMount(cleanedSource, absDestination, readOnly); err != nil { return errwrap.Wrap(fmt.Errorf("could not bind mount path %v (s: %v, d: %v)", m.Path, source, absDestination), err) } } return nil }
// parseUserGroup parses the User and Group fields of an App and returns its // UID and GID. // The User and Group fields accept several formats: // 1. the hardcoded string "root" // 2. a path // 3. a number // 4. a name in reference to /etc/{group,passwod} in the image // See https://github.com/appc/spec/blob/master/spec/aci.md#image-manifest-schema func parseUserGroup(p *stage1commontypes.Pod, ra *schema.RuntimeApp, privateUsers string) (int, int, error) { app := ra.App appName := ra.Name var uid_, gid_ int var err error uidRange := uid.NewBlankUidRange() if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { return -1, -1, errwrap.Wrap(errors.New("unable to deserialize uid range"), err) } switch { case app.User == "root": uid_ = 0 case strings.HasPrefix(app.User, "/"): var stat syscall.Stat_t if err = syscall.Lstat(filepath.Join(common.AppRootfsPath(p.Root, appName), app.User), &stat); err != nil { return -1, -1, errwrap.Wrap(fmt.Errorf("unable to get uid from file %q", app.User), err) } uidReal, _, err := uidRange.UnshiftRange(stat.Uid, 0) if err != nil { return -1, -1, errwrap.Wrap(errors.New("unable to determine real uid"), err) } uid_ = int(uidReal) default: uid_, err = strconv.Atoi(app.User) if err != nil { uid_, err = passwd.LookupUidFromFile(app.User, filepath.Join(common.AppRootfsPath(p.Root, appName), "etc/passwd")) if err != nil { return -1, -1, errwrap.Wrap(fmt.Errorf("cannot lookup user %q", app.User), err) } } } switch { case app.Group == "root": gid_ = 0 case strings.HasPrefix(app.Group, "/"): var stat syscall.Stat_t if err = syscall.Lstat(filepath.Join(common.AppRootfsPath(p.Root, appName), app.Group), &stat); err != nil { return -1, -1, errwrap.Wrap(fmt.Errorf("unable to get gid from file %q", app.Group), err) } _, gidReal, err := uidRange.UnshiftRange(0, stat.Gid) if err != nil { return -1, -1, errwrap.Wrap(errors.New("unable to determine real gid"), err) } gid_ = int(gidReal) default: gid_, err = strconv.Atoi(app.Group) if err != nil { gid_, err = group.LookupGidFromFile(app.Group, filepath.Join(common.AppRootfsPath(p.Root, appName), "etc/group")) if err != nil { return -1, -1, errwrap.Wrap(fmt.Errorf("cannot lookup group %q", app.Group), err) } } } return uid_, gid_, nil }