func TestCopyTree(t *testing.T) { td, err := ioutil.TempDir("", tstprefix) if err != nil { t.Fatal(err) } defer os.RemoveAll(td) src := filepath.Join(td, "src") dst := filepath.Join(td, "dst") if err := os.MkdirAll(filepath.Join(td, "src"), 0755); err != nil { panic(err) } tr := []tree{ { path: "dir1", dir: true, }, { path: "dir2", dir: true, }, { path: "dir1/foo", dir: false, }, { path: "dir1/bar", dir: false, }, } createTree(t, src, tr) // absolute paths if err := CopyTree(src, dst, user.NewBlankUidRange()); err != nil { t.Fatal(err) } checkTree(t, dst, tr) // relative paths if err := os.Chdir(td); err != nil { t.Fatal(err) } dst = "dst-rel1" if err := CopyTree("././src/", dst, user.NewBlankUidRange()); err != nil { t.Fatal(err) } checkTree(t, dst, tr) dst = "./dst-rel2" if err := CopyTree("./src", dst, user.NewBlankUidRange()); err != nil { t.Fatal(err) } checkTree(t, dst, tr) }
// ExtractImage will extract the contents of the image at path to the directory // at dst. If fileMap is set, only files in it will be extracted. func ExtractImage(path, dst string, fileMap map[string]struct{}) error { dst, err := filepath.Abs(dst) if err != nil { return err } file, err := os.Open(path) if err != nil { return err } defer file.Close() dr, err := aci.NewCompressedReader(file) if err != nil { return fmt.Errorf("error decompressing image: %v", err) } defer dr.Close() uidRange := user.NewBlankUidRange() if os.Geteuid() == 0 { return rkttar.ExtractTar(dr, dst, true, uidRange, fileMap) } editor, err := rkttar.NewUidShiftingFilePermEditor(uidRange) if err != nil { return fmt.Errorf("error determining current user: %v", err) } return rkttar.ExtractTarInsecure(tar.NewReader(dr), dst, true, fileMap, editor) }
// CopyToDir will copy all elements specified in the froms slice into the // directory inside the current ACI specified by the to string. func (a *ACBuild) CopyToDir(froms []string, to string) (err error) { if err = a.lock(); err != nil { return err } defer func() { if err1 := a.unlock(); err == nil { err = err1 } }() target := path.Join(a.CurrentACIPath, aci.RootfsDir, to) targetInfo, err := os.Stat(target) switch { case os.IsNotExist(err): err := os.MkdirAll(target, 0755) if err != nil { return err } case err != nil: return err case !targetInfo.IsDir(): return fmt.Errorf("target %q is not a directory", to) } for _, from := range froms { _, file := path.Split(from) tmptarget := path.Join(target, file) err := fileutil.CopyTree(from, tmptarget, user.NewBlankUidRange()) if err != nil { return err } } return nil }
func extractTarInsecureHelperPWL(rdr io.Reader, target string, pwl PathWhitelistMap) error { editor, err := NewUidShiftingFilePermEditor(user.NewBlankUidRange()) if err != nil { return err } return ExtractTarInsecure(tar.NewReader(rdr), target, true, pwl, editor) }
// LoadPod loads a Pod Manifest (as prepared by stage0), the runtime data, and // its associated Application Manifests, under $root/stage1/opt/stage1/$apphash func LoadPod(root string, uuid *types.UUID, rp *RuntimePod) (*Pod, error) { p := &Pod{ Root: root, UUID: *uuid, Images: make(map[string]*schema.ImageManifest), UidRange: *user.NewBlankUidRange(), } // Unserialize runtime parameters if rp != nil { p.RuntimePod = *rp } else { buf, err := ioutil.ReadFile(filepath.Join(p.Root, RuntimeConfigPath)) if err != nil { return nil, errwrap.Wrap(errors.New("failed reading runtime params"), err) } if err := json.Unmarshal(buf, &p.RuntimePod); err != nil { return nil, errwrap.Wrap(errors.New("failed unmarshalling runtime params"), err) } } buf, err := ioutil.ReadFile(common.PodManifestPath(p.Root)) if err != nil { return nil, errwrap.Wrap(errors.New("failed reading pod manifest"), err) } pm := &schema.PodManifest{} if err := json.Unmarshal(buf, pm); err != nil { return nil, errwrap.Wrap(errors.New("failed unmarshalling pod manifest"), err) } p.Manifest = pm for i, app := range p.Manifest.Apps { impath := common.ImageManifestPath(p.Root, app.Name) buf, err := ioutil.ReadFile(impath) if err != nil { return nil, errwrap.Wrap(fmt.Errorf("failed reading image manifest %q", impath), err) } im := &schema.ImageManifest{} if err = json.Unmarshal(buf, im); err != nil { return nil, errwrap.Wrap(fmt.Errorf("failed unmarshalling image manifest %q", impath), err) } if _, ok := p.Images[app.Name.String()]; ok { return nil, fmt.Errorf("got multiple definitions for app: %v", app.Name) } if app.App == nil { p.Manifest.Apps[i].App = im.App } p.Images[app.Name.String()] = im } if err := p.UidRange.Deserialize([]byte(p.PrivateUsers)); err != nil { return nil, err } return p, nil }
func (e Engine) Run(command string, args []string, environment types.Environment, chroot, workingDir string) error { resolvConfFile := filepath.Join(chroot, "/etc/resolv.conf") _, err := os.Stat(resolvConfFile) switch { case os.IsNotExist(err): err := os.MkdirAll(filepath.Dir(resolvConfFile), 0755) if err != nil { return err } err = fileutil.CopyTree("/etc/resolv.conf", resolvConfFile, user.NewBlankUidRange()) if err != nil { return err } defer os.RemoveAll(resolvConfFile) case err != nil: return err } var serializedArgs string for _, arg := range args { if serializedArgs != "" { serializedArgs += "," } serializedArgs += arg } var serializedEnv string for _, envvar := range environment { if serializedEnv != "" { serializedEnv += "," } serializedEnv += envvar.Name + "=" + envvar.Value } path := "PATH=" for _, p := range engine.Pathlist { if path != "PATH=" { path += ":" } path += p } chrootArgs := []string{ "--cmd", command, "--chroot", chroot, "--working-dir", workingDir, } if len(serializedArgs) > 0 { chrootArgs = append(chrootArgs, "--args", serializedArgs) } if len(serializedEnv) > 0 { chrootArgs = append(chrootArgs, "--env", serializedEnv) } cmd := exec.Command("acbuild-chroot", chrootArgs...) cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr cmd.Env = []string{path} return cmd.Run() }
func (a *ACBuild) beginFromLocalDirectory(start string) error { err := os.MkdirAll(a.CurrentACIPath, 0755) if err != nil { return err } err = fileutil.CopyTree(start, path.Join(a.CurrentACIPath, aci.RootfsDir), user.NewBlankUidRange()) if err != nil { return err } return a.writeEmptyManifest() }
// PrepareMountpoints creates and sets permissions for empty volumes. // If the mountpoint comes from a Docker image and it is an implicit empty // volume, we copy files from the image to the volume, see // https://docs.docker.com/engine/userguide/containers/dockervolumes/#data-volumes func PrepareMountpoints(volPath string, targetPath string, vol *types.Volume, dockerImplicit bool) error { if vol.Kind != "empty" { return nil } diag.Printf("creating an empty volume folder for sharing: %q", volPath) m, err := strconv.ParseUint(*vol.Mode, 8, 32) if err != nil { return errwrap.Wrap(fmt.Errorf("invalid mode %q for volume %q", *vol.Mode, vol.Name), err) } mode := os.FileMode(m) Uid := *vol.UID Gid := *vol.GID if dockerImplicit { fi, err := os.Stat(targetPath) if err == nil { // the directory exists in the image, let's set the same // permissions and copy files from there to the empty volume mode = fi.Mode() Uid = int(fi.Sys().(*syscall.Stat_t).Uid) Gid = int(fi.Sys().(*syscall.Stat_t).Gid) if err := fileutil.CopyTree(targetPath, volPath, user.NewBlankUidRange()); err != nil { return errwrap.Wrap(fmt.Errorf("error copying image files to empty volume %q", volPath), err) } } } if err := os.MkdirAll(volPath, 0770); err != nil { return errwrap.Wrap(fmt.Errorf("error creating %q", volPath), err) } if err := os.Chown(volPath, Uid, Gid); err != nil { return errwrap.Wrap(fmt.Errorf("could not change owner of %q", volPath), err) } if err := os.Chmod(volPath, mode); err != nil { return errwrap.Wrap(fmt.Errorf("could not change permissions of %q", volPath), err) } return nil }
// CreateBackup backs a directory up in a given directory. It basically // copies this directory into a given backups directory. The backups // directory has a simple structure - a directory inside named "0" is // the most recent backup. A directory name for oldest backup is // deduced from a given limit. For instance, for limit being 5 the // name for the oldest backup would be "4". If a backups number // exceeds the given limit then only newest ones are kept and the rest // is removed. func CreateBackup(dir, backupsDir string, limit int) error { tmpBackupDir := filepath.Join(backupsDir, "tmp") if err := os.MkdirAll(backupsDir, 0750); err != nil { return err } if err := fileutil.CopyTree(dir, tmpBackupDir, user.NewBlankUidRange()); err != nil { return err } defer os.RemoveAll(tmpBackupDir) // prune backups if err := pruneOldBackups(backupsDir, limit-1); err != nil { return err } if err := shiftBackups(backupsDir, limit-2); err != nil { return err } if err := os.Rename(tmpBackupDir, filepath.Join(backupsDir, "0")); err != nil { return err } return nil }
// CopyToTarget will copy a single file/directory from the from string to the // path specified by the to string inside the current ACI. func (a *ACBuild) CopyToTarget(from string, to string) (err error) { if err = a.lock(); err != nil { return err } defer func() { if err1 := a.unlock(); err == nil { err = err1 } }() target := path.Join(a.CurrentACIPath, aci.RootfsDir, to) dir, _ := path.Split(target) if dir != "" { err := os.MkdirAll(dir, 0755) if err != nil { return err } } return fileutil.CopyTree(from, target, user.NewBlankUidRange()) }
func AddApp(cfg AddConfig) error { // there should be only one app in the config app := cfg.Apps.Last() if app == nil { return errors.New("no image specified") } am, err := cfg.Store.GetImageManifest(cfg.Image.String()) if err != nil { return err } var appName *types.ACName if app.Name != "" { appName, err = types.NewACName(app.Name) if err != nil { return err } } else { appName, err = imageNameToAppName(am.Name) if err != nil { return err } } pod, err := pkgPod.PodFromUUIDString(cfg.DataDir, cfg.UUID.String()) if err != nil { return errwrap.Wrap(errors.New("error loading pod"), err) } defer pod.Close() debug("locking pod manifest") if err := pod.ExclusiveLockManifest(); err != nil { return errwrap.Wrap(errors.New("failed to lock pod manifest"), err) } defer pod.UnlockManifest() pm, err := pod.SandboxManifest() if err != nil { return errwrap.Wrap(errors.New("cannot add application"), err) } if pm.Apps.Get(*appName) != nil { return fmt.Errorf("error: multiple apps with name %s", *appName) } if am.App == nil && app.Exec == "" { return fmt.Errorf("error: image %s has no app section and --exec argument is not provided", cfg.Image) } appInfoDir := common.AppInfoPath(cfg.PodPath, *appName) if err := os.MkdirAll(appInfoDir, common.DefaultRegularDirPerm); err != nil { return errwrap.Wrap(errors.New("error creating apps info directory"), err) } pcfg := PrepareConfig{ CommonConfig: cfg.CommonConfig, PrivateUsers: user.NewBlankUidRange(), } if cfg.UsesOverlay { privateUsers, err := preparedWithPrivateUsers(cfg.PodPath) if err != nil { log.FatalE("error reading user namespace information", err) } if err := pcfg.PrivateUsers.Deserialize([]byte(privateUsers)); err != nil { return err } } treeStoreID, err := prepareAppImage(pcfg, *appName, cfg.Image, cfg.PodPath, cfg.UsesOverlay) if err != nil { return errwrap.Wrap(fmt.Errorf("error preparing image %s", cfg.Image), err) } rcfg := RunConfig{ CommonConfig: cfg.CommonConfig, UseOverlay: cfg.UsesOverlay, RktGid: cfg.RktGid, } if err := setupAppImage(rcfg, *appName, cfg.Image, cfg.PodPath, cfg.UsesOverlay); err != nil { return fmt.Errorf("error setting up app image: %v", err) } if cfg.UsesOverlay { imgDir := filepath.Join(cfg.PodPath, "overlay", treeStoreID) if err := os.Chown(imgDir, -1, cfg.RktGid); err != nil { return err } } ra := schema.RuntimeApp{ Name: *appName, App: am.App, Image: schema.RuntimeImage{ Name: &am.Name, ID: cfg.Image, Labels: am.Labels, }, Mounts: MergeMounts(cfg.Apps.Mounts, app.Mounts), ReadOnlyRootFS: app.ReadOnlyRootFS, } if app.Exec != "" { // Create a minimal App section if not present if am.App == nil { ra.App = &types.App{ User: strconv.Itoa(os.Getuid()), Group: strconv.Itoa(os.Getgid()), } } ra.App.Exec = []string{app.Exec} } if app.Args != nil { ra.App.Exec = append(ra.App.Exec, app.Args...) } if app.WorkingDir != "" { ra.App.WorkingDirectory = app.WorkingDir } if err := prepareIsolators(app, ra.App); err != nil { return err } if app.User != "" { ra.App.User = app.User } if app.Group != "" { ra.App.Group = app.Group } if app.SupplementaryGIDs != nil { ra.App.SupplementaryGIDs = app.SupplementaryGIDs } if app.UserAnnotations != nil { ra.App.UserAnnotations = app.UserAnnotations } if app.UserLabels != nil { ra.App.UserLabels = app.UserLabels } if app.Environments != nil { envs := make([]string, 0, len(app.Environments)) for name, value := range app.Environments { envs = append(envs, fmt.Sprintf("%s=%s", name, value)) } // Let the app level environment override the environment variables. mergeEnvs(&ra.App.Environment, envs, true) } env := ra.App.Environment env.Set("AC_APP_NAME", appName.String()) envFilePath := filepath.Join(common.Stage1RootfsPath(cfg.PodPath), "rkt", "env", appName.String()) if err := common.WriteEnvFile(env, pcfg.PrivateUsers, envFilePath); err != nil { return err } debug("adding app to sandbox") pm.Apps = append(pm.Apps, ra) if err := pod.UpdateManifest(pm, cfg.PodPath); err != nil { return err } args := []string{ fmt.Sprintf("--debug=%t", cfg.Debug), fmt.Sprintf("--uuid=%s", cfg.UUID), fmt.Sprintf("--app=%s", appName), } if _, err := os.Create(common.AppCreatedPath(pod.Path(), appName.String())); err != nil { return err } ce := CrossingEntrypoint{ PodPath: cfg.PodPath, PodPID: cfg.PodPID, AppName: appName.String(), EntrypointName: appAddEntrypoint, EntrypointArgs: args, Interactive: false, } if err := ce.Run(); err != nil { return err } return nil }
func runAppSandbox(cmd *cobra.Command, args []string) int { s, err := imagestore.NewStore(storeDir()) if err != nil { stderr.PrintE("cannot open store", err) return 1 } ts, err := treestore.NewStore(treeStoreDir(), s) if err != nil { stderr.PrintE("cannot open treestore", err) return 1 } config, err := getConfig() if err != nil { stderr.PrintE("cannot get configuration", err) return 1 } s1img, err := getStage1Hash(s, ts, config) if err != nil { stderr.Error(err) return 1 } p, err := pod.NewPod(getDataDir()) if err != nil { stderr.PrintE("error creating new pod", err) return 1 } if flagUUIDFileSave != "" { if err := pod.WriteUUIDToFile(p.UUID, flagUUIDFileSave); err != nil { stderr.PrintE("error saving pod UUID to file", err) return 1 } } processLabel, mountLabel, err := label.InitLabels("/var/run/rkt/mcs", []string{}) if err != nil { stderr.PrintE("error initialising SELinux", err) return 1 } p.MountLabel = mountLabel cfg := stage0.CommonConfig{ DataDir: getDataDir(), MountLabel: mountLabel, ProcessLabel: processLabel, Store: s, TreeStore: ts, Stage1Image: *s1img, UUID: p.UUID, Debug: globalFlags.Debug, Mutable: true, } ovlOk := true if err := common.PathSupportsOverlay(getDataDir()); err != nil { if oerr, ok := err.(common.ErrOverlayUnsupported); ok { stderr.Printf("disabling overlay support: %q", oerr.Error()) ovlOk = false } else { stderr.PrintE("error determining overlay support", err) return 1 } } useOverlay := !flagNoOverlay && ovlOk pcfg := stage0.PrepareConfig{ CommonConfig: &cfg, UseOverlay: useOverlay, PrivateUsers: user.NewBlankUidRange(), SkipTreeStoreCheck: globalFlags.InsecureFlags.SkipOnDiskCheck(), Apps: &rktApps, Ports: []types.ExposedPort(flagAppPorts), UserAnnotations: parseAnnotations(&flagAnnotations), UserLabels: parseLabels(&flagLabels), } if globalFlags.Debug { stage0.InitDebug() } keyLock, err := lock.SharedKeyLock(lockDir(), common.PrepareLock) if err != nil { stderr.PrintE("cannot get shared prepare lock", err) return 1 } err = stage0.Prepare(pcfg, p.Path(), p.UUID) if err != nil { stderr.PrintE("error setting up stage0", err) keyLock.Close() return 1 } keyLock.Close() // get the lock fd for run lfd, err := p.Fd() if err != nil { stderr.PrintE("error getting pod lock fd", err) return 1 } // skip prepared by jumping directly to run, we own this pod if err := p.ToRun(); err != nil { stderr.PrintE("unable to transition to run", err) return 1 } rktgid, err := common.LookupGid(common.RktGroup) if err != nil { stderr.Printf("group %q not found, will use default gid when rendering images", common.RktGroup) rktgid = -1 } DNSConfMode, DNSConfig, HostsEntries, err := parseDNSFlags(flagHostsEntries, flagDNS, flagDNSSearch, flagDNSOpt, flagDNSDomain) if err != nil { stderr.PrintE("error with dns flags", err) return 1 } rcfg := stage0.RunConfig{ CommonConfig: &cfg, Net: flagNet, LockFd: lfd, Interactive: true, DNSConfMode: DNSConfMode, DNSConfig: DNSConfig, MDSRegister: false, LocalConfig: globalFlags.LocalConfigDir, RktGid: rktgid, Hostname: flagHostname, InsecureCapabilities: globalFlags.InsecureFlags.SkipCapabilities(), InsecurePaths: globalFlags.InsecureFlags.SkipPaths(), InsecureSeccomp: globalFlags.InsecureFlags.SkipSeccomp(), UseOverlay: useOverlay, HostsEntries: *HostsEntries, } _, manifest, err := p.PodManifest() if err != nil { stderr.PrintE("cannot get the pod manifest", err) return 1 } rcfg.Apps = manifest.Apps stage0.Run(rcfg, p.Path(), getDataDir()) // execs, never returns return 1 }
func runPrepare(cmd *cobra.Command, args []string) (exit int) { var err error origStdout := os.Stdout privateUsers := user.NewBlankUidRange() if flagQuiet { if os.Stdout, err = os.Open("/dev/null"); err != nil { stderr.PrintE("unable to open /dev/null", err) return 254 } } if flagStoreOnly && flagNoStore { stderr.Print("both --store-only and --no-store specified") return 254 } if flagPrivateUsers { if !common.SupportsUserNS() { stderr.Print("--private-users is not supported, kernel compiled without user namespace support") return 254 } privateUsers.SetRandomUidRange(user.DefaultRangeCount) } if err = parseApps(&rktApps, args, cmd.Flags(), true); err != nil { stderr.PrintE("error parsing app image arguments", err) return 254 } if len(flagPodManifest) > 0 && (rktApps.Count() > 0 || (*appsVolume)(&rktApps).String() != "" || (*appMount)(&rktApps).String() != "" || len(flagPorts) > 0 || flagStoreOnly || flagNoStore || flagInheritEnv || !flagExplicitEnv.IsEmpty() || !flagEnvFromFile.IsEmpty()) { stderr.Print("conflicting flags set with --pod-manifest (see --help)") return 254 } if rktApps.Count() < 1 && len(flagPodManifest) == 0 { stderr.Print("must provide at least one image or specify the pod manifest") return 254 } s, err := imagestore.NewStore(storeDir()) if err != nil { stderr.PrintE("cannot open store", err) return 254 } ts, err := treestore.NewStore(treeStoreDir(), s) if err != nil { stderr.PrintE("cannot open treestore", err) return 254 } config, err := getConfig() if err != nil { stderr.PrintE("cannot get configuration", err) return 254 } s1img, err := getStage1Hash(s, ts, config) if err != nil { stderr.Error(err) return 254 } fn := &image.Finder{ S: s, Ts: ts, Ks: getKeystore(), Headers: config.AuthPerHost, DockerAuth: config.DockerCredentialsPerRegistry, InsecureFlags: globalFlags.InsecureFlags, Debug: globalFlags.Debug, TrustKeysFromHTTPS: globalFlags.TrustKeysFromHTTPS, StoreOnly: flagStoreOnly, NoStore: flagNoStore, WithDeps: true, } if err := fn.FindImages(&rktApps); err != nil { stderr.PrintE("error finding images", err) return 254 } p, err := pkgPod.NewPod(getDataDir()) if err != nil { stderr.PrintE("error creating new pod", err) return 254 } cfg := stage0.CommonConfig{ DataDir: getDataDir(), Store: s, TreeStore: ts, Stage1Image: *s1img, UUID: p.UUID, Debug: globalFlags.Debug, } ovlOk := true if err := common.PathSupportsOverlay(getDataDir()); err != nil { if oerr, ok := err.(common.ErrOverlayUnsupported); ok { stderr.Printf("disabling overlay support: %q", oerr.Error()) ovlOk = false } else { stderr.PrintE("error determining overlay support", err) return 254 } } pcfg := stage0.PrepareConfig{ CommonConfig: &cfg, UseOverlay: !flagNoOverlay && ovlOk, PrivateUsers: privateUsers, SkipTreeStoreCheck: globalFlags.InsecureFlags.SkipOnDiskCheck(), } if len(flagPodManifest) > 0 { pcfg.PodManifest = flagPodManifest } else { pcfg.Ports = []types.ExposedPort(flagPorts) pcfg.InheritEnv = flagInheritEnv pcfg.ExplicitEnv = flagExplicitEnv.Strings() pcfg.EnvFromFile = flagEnvFromFile.Strings() pcfg.Apps = &rktApps } if globalFlags.Debug { stage0.InitDebug() } keyLock, err := lock.SharedKeyLock(lockDir(), common.PrepareLock) if err != nil { stderr.PrintE("cannot get shared prepare lock", err) return 254 } if err = stage0.Prepare(pcfg, p.Path(), p.UUID); err != nil { stderr.PrintE("error setting up stage0", err) keyLock.Close() return 254 } keyLock.Close() if err := p.Sync(); err != nil { stderr.PrintE("error syncing pod data", err) return 254 } if err := p.ToPrepared(); err != nil { stderr.PrintE("error transitioning to prepared", err) return 254 } os.Stdout = origStdout // restore output in case of --quiet stdout.Printf("%s", p.UUID.String()) return 0 }
// appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error { app := ra.App appName := ra.Name imgName := p.AppNameToImageName(appName) if len(app.Exec) == 0 { return fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) } workDir := "/" if app.WorkingDirectory != "" { workDir = app.WorkingDirectory } env := app.Environment env.Set("AC_APP_NAME", appName.String()) if p.MetadataServiceURL != "" { env.Set("AC_METADATA_URL", p.MetadataServiceURL) } envFilePath := EnvFilePath(p.Root, appName) uidRange := user.NewBlankUidRange() if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { return err } if err := writeEnvFile(p, env, appName, uidRange, '\n', envFilePath); err != nil { return errwrap.Wrap(errors.New("unable to write environment file for systemd"), err) } u, g, err := parseUserGroup(p, ra, uidRange) if err != nil { return err } if err := generateSysusers(p, ra, u, g, uidRange); err != nil { return errwrap.Wrap(errors.New("unable to generate sysusers"), err) } binPath, err := findBinPath(p, appName, *app, workDir, app.Exec[0]) if err != nil { return err } var supplementaryGroups []string for _, g := range app.SupplementaryGIDs { supplementaryGroups = append(supplementaryGroups, strconv.Itoa(g)) } capabilitiesStr, err := getAppCapabilities(app.Isolators) if err != nil { return err } noNewPrivileges := getAppNoNewPrivileges(app.Isolators) execStart := append([]string{binPath}, app.Exec[1:]...) execStartString := quoteExec(execStart) opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", execStartString), unit.NewUnitOption("Service", "RootDirectory", common.RelAppRootfsPath(appName)), // MountFlags=shared creates a new mount namespace and (as unintuitive // as it might seem) makes sure the mount is slave+shared. unit.NewUnitOption("Service", "MountFlags", "shared"), unit.NewUnitOption("Service", "WorkingDirectory", workDir), unit.NewUnitOption("Service", "EnvironmentFile", RelEnvFilePath(appName)), unit.NewUnitOption("Service", "User", strconv.Itoa(u)), unit.NewUnitOption("Service", "Group", strconv.Itoa(g)), unit.NewUnitOption("Service", "SupplementaryGroups", strings.Join(supplementaryGroups, " ")), unit.NewUnitOption("Service", "CapabilityBoundingSet", strings.Join(capabilitiesStr, " ")), unit.NewUnitOption("Service", "NoNewPrivileges", strconv.FormatBool(noNewPrivileges)), // This helps working around a race // (https://github.com/systemd/systemd/issues/2913) that causes the // systemd unit name not getting written to the journal if the unit is // short-lived and runs as non-root. unit.NewUnitOption("Service", "SyslogIdentifier", appName.String()), } // Restrict access to sensitive paths (eg. procfs) opts = protectSystemFiles(opts, appName) if ra.ReadOnlyRootFS { opts = append(opts, unit.NewUnitOption("Service", "ReadOnlyDirectories", common.RelAppRootfsPath(appName))) } // TODO(tmrts): Extract this logic into a utility function. vols := make(map[types.ACName]types.Volume) for _, v := range p.Manifest.Volumes { vols[v.Name] = v } absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs. if err != nil { return err } appRootfs := common.AppRootfsPath(absRoot, appName) rwDirs := []string{} imageManifest := p.Images[appName.String()] for _, m := range GenerateMounts(ra, vols, imageManifest) { mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path) if err != nil { return err } if !IsMountReadOnly(vols[m.Volume], app.MountPoints) { rwDirs = append(rwDirs, filepath.Join(common.RelAppRootfsPath(appName), mntPath)) } } opts = append(opts, unit.NewUnitOption("Service", "ReadWriteDirectories", strings.Join(rwDirs, " "))) if interactive { opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty")) } else { opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console")) } // When an app fails, we shut down the pod opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target")) for _, eh := range app.EventHandlers { var typ string switch eh.Name { case "pre-start": typ = "ExecStartPre" case "post-stop": typ = "ExecStopPost" default: return fmt.Errorf("unrecognized eventHandler: %v", eh.Name) } exec := quoteExec(eh.Exec) opts = append(opts, unit.NewUnitOption("Service", typ, exec)) } // Some pre-start jobs take a long time, set the timeout to 0 opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0")) var saPorts []types.Port for _, p := range app.Ports { if p.SocketActivated { saPorts = append(saPorts, p) } } for _, i := range app.Isolators { switch v := i.Value().(type) { case *types.ResourceMemory: opts, err = cgroup.MaybeAddIsolator(opts, "memory", v.Limit()) if err != nil { return err } case *types.ResourceCPU: opts, err = cgroup.MaybeAddIsolator(opts, "cpu", v.Limit()) if err != nil { return err } } } if len(saPorts) > 0 { sockopts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Socket", "BindIPv6Only", "both"), unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)), } for _, sap := range saPorts { var proto string switch sap.Protocol { case "tcp": proto = "ListenStream" case "udp": proto = "ListenDatagram" default: return fmt.Errorf("unrecognized protocol: %v", sap.Protocol) } // We find the host port for the pod's port and use that in the // socket unit file. // This is so because systemd inside the pod will match based on // the socket port number, and since the socket was created on the // host, it will have the host port number. port := findHostPort(*p.Manifest, sap.Name) if port == 0 { log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port) port = sap.Port } sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port))) } file, err := os.OpenFile(SocketUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create socket file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil { return errwrap.Wrap(errors.New("failed to write socket unit file"), err) } if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link socket want"), err) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName))) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "Requires", "sysusers.service")) opts = append(opts, unit.NewUnitOption("Unit", "After", "sysusers.service")) file, err := os.OpenFile(ServiceUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create service unit file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { return errwrap.Wrap(errors.New("failed to write service unit file"), err) } if err = os.Symlink(path.Join("..", ServiceUnitName(appName)), ServiceWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link service want"), err) } if err = writeAppReaper(p, appName.String(), common.RelAppRootfsPath(appName), binPath); err != nil { return errwrap.Wrap(fmt.Errorf("failed to write app %q reaper service", appName), err) } return nil }
func runRun(cmd *cobra.Command, args []string) (exit int) { privateUsers := user.NewBlankUidRange() err := parseApps(&rktApps, args, cmd.Flags(), true) if err != nil { stderr.PrintE("error parsing app image arguments", err) return 1 } if flagStoreOnly && flagNoStore { stderr.Print("both --store-only and --no-store specified") return 1 } if flagPrivateUsers { if !common.SupportsUserNS() { stderr.Print("--private-users is not supported, kernel compiled without user namespace support") return 1 } privateUsers.SetRandomUidRange(user.DefaultRangeCount) } if len(flagPorts) > 0 && flagNet.None() { stderr.Print("--port flag does not work with 'none' networking") return 1 } if len(flagPorts) > 0 && flagNet.Host() { stderr.Print("--port flag does not work with 'host' networking") return 1 } if flagMDSRegister && flagNet.None() { stderr.Print("--mds-register flag does not work with --net=none. Please use 'host', 'default' or an equivalent network") return 1 } if len(flagPodManifest) > 0 && (len(flagPorts) > 0 || rktApps.Count() > 0 || flagStoreOnly || flagNoStore || flagInheritEnv || !flagExplicitEnv.IsEmpty() || !flagEnvFromFile.IsEmpty() || (*appsVolume)(&rktApps).String() != "" || (*appMount)(&rktApps).String() != "" || (*appExec)(&rktApps).String() != "" || (*appUser)(&rktApps).String() != "" || (*appGroup)(&rktApps).String() != "" || (*appCapsRetain)(&rktApps).String() != "" || (*appCapsRemove)(&rktApps).String() != "") { stderr.Print("conflicting flags set with --pod-manifest (see --help)") return 1 } if flagInteractive && rktApps.Count() > 1 { stderr.Print("interactive option only supports one image") return 1 } if rktApps.Count() < 1 && len(flagPodManifest) == 0 { stderr.Print("must provide at least one image or specify the pod manifest") return 1 } s, err := imagestore.NewStore(storeDir()) if err != nil { stderr.PrintE("cannot open store", err) return 1 } ts, err := treestore.NewStore(treeStoreDir(), s) if err != nil { stderr.PrintE("cannot open treestore", err) return 1 } config, err := getConfig() if err != nil { stderr.PrintE("cannot get configuration", err) return 1 } s1img, err := getStage1Hash(s, ts, config) if err != nil { stderr.Error(err) return 1 } fn := &image.Finder{ S: s, Ts: ts, Ks: getKeystore(), Headers: config.AuthPerHost, DockerAuth: config.DockerCredentialsPerRegistry, InsecureFlags: globalFlags.InsecureFlags, Debug: globalFlags.Debug, TrustKeysFromHTTPS: globalFlags.TrustKeysFromHTTPS, StoreOnly: flagStoreOnly, NoStore: flagNoStore, WithDeps: true, } if err := fn.FindImages(&rktApps); err != nil { stderr.Error(err) return 1 } p, err := newPod() if err != nil { stderr.PrintE("error creating new pod", err) return 1 } // if requested, write out pod UUID early so "rkt rm" can // clean it up even if something goes wrong if flagUUIDFileSave != "" { if err := writeUUIDToFile(p.uuid, flagUUIDFileSave); err != nil { stderr.PrintE("error saving pod UUID to file", err) return 1 } } processLabel, mountLabel, err := label.InitLabels([]string{"mcsdir:/var/run/rkt/mcs"}) if err != nil { stderr.PrintE("error initialising SELinux", err) return 1 } p.mountLabel = mountLabel cfg := stage0.CommonConfig{ MountLabel: mountLabel, ProcessLabel: processLabel, Store: s, TreeStore: ts, Stage1Image: *s1img, UUID: p.uuid, Debug: globalFlags.Debug, } pcfg := stage0.PrepareConfig{ CommonConfig: &cfg, UseOverlay: !flagNoOverlay && common.SupportsOverlay() && common.FSSupportsOverlay(getDataDir()), PrivateUsers: privateUsers, SkipTreeStoreCheck: globalFlags.InsecureFlags.SkipOnDiskCheck(), } if len(flagPodManifest) > 0 { pcfg.PodManifest = flagPodManifest } else { pcfg.Ports = []types.ExposedPort(flagPorts) pcfg.InheritEnv = flagInheritEnv pcfg.ExplicitEnv = flagExplicitEnv.Strings() pcfg.EnvFromFile = flagEnvFromFile.Strings() pcfg.Apps = &rktApps } if globalFlags.Debug { stage0.InitDebug() } keyLock, err := lock.SharedKeyLock(lockDir(), common.PrepareLock) if err != nil { stderr.PrintE("cannot get shared prepare lock", err) return 1 } err = stage0.Prepare(pcfg, p.path(), p.uuid) if err != nil { stderr.PrintE("error setting up stage0", err) keyLock.Close() return 1 } keyLock.Close() // get the lock fd for run lfd, err := p.Fd() if err != nil { stderr.PrintE("error getting pod lock fd", err) return 1 } // skip prepared by jumping directly to run, we own this pod if err := p.xToRun(); err != nil { stderr.PrintE("unable to transition to run", err) return 1 } rktgid, err := common.LookupGid(common.RktGroup) if err != nil { stderr.Printf("group %q not found, will use default gid when rendering images", common.RktGroup) rktgid = -1 } rcfg := stage0.RunConfig{ CommonConfig: &cfg, Net: flagNet, LockFd: lfd, Interactive: flagInteractive, DNS: flagDNS, DNSSearch: flagDNSSearch, DNSOpt: flagDNSOpt, MDSRegister: flagMDSRegister, LocalConfig: globalFlags.LocalConfigDir, RktGid: rktgid, Hostname: flagHostname, } apps, err := p.getApps() if err != nil { stderr.PrintE("cannot get the appList in the pod manifest", err) return 1 } rcfg.Apps = apps stage0.Run(rcfg, p.path(), getDataDir()) // execs, never returns return 1 }
func TestStat(t *testing.T) { tmp, err := ioutil.TempFile("", "rkt-TestStat-") if err != nil { panic(err) } defer os.Remove(tmp.Name()) rng := user.NewBlankUidRange() rng.SetRandomUidRange(100) u, err := osuser.Current() if err != nil { panic(err) } procUid, err := strconv.Atoi(u.Uid) if err != nil { panic(err) } procGid, err := strconv.Atoi(u.Gid) if err != nil { panic(err) } for i, tt := range []struct { root, path string // expected errIDs, err bool uid, gid int }{ { root: "", path: "", err: true, }, { root: "unknown", path: "", err: true, }, { root: "", path: "unknown", err: true, }, { root: "", path: tmp.Name(), uid: procUid, gid: procGid, }, { root: "/", path: tmp.Name(), uid: procUid, gid: procGid, }, { root: "unknown", path: tmp.Name(), errIDs: true, uid: -1, gid: -1, }, { root: filepath.Dir(tmp.Name()), path: "", err: true, }, { root: filepath.Dir(tmp.Name()), path: "/" + filepath.Base(tmp.Name()), uid: procUid, gid: procGid, }, { root: filepath.Dir(tmp.Name()), path: "/unknown", errIDs: true, uid: -1, gid: -1, }, { root: filepath.Dir(tmp.Name()), path: "unknown", err: true, }, } { gen, err := user.IDsFromStat(tt.root, tt.path, nil) if err == nil && tt.err { t.Errorf("test %d: expected error but got one", i) } if err != nil { continue } uid, gid, err := gen.IDs() if err == nil && tt.errIDs { t.Errorf("test %d: expected err but got none", i) } if uid != tt.uid { t.Errorf("test %d: expected uid %d but got %d", i, tt.uid, uid) } if gid != tt.gid { t.Errorf("test %d: expected gid %d but got %d", i, tt.gid, gid) } } }
func extractTarOverwriteHelper(rdr io.Reader, target string) error { return ExtractTar(rdr, target, true, user.NewBlankUidRange(), nil) }
func extractTarHelperPWL(rdr io.Reader, target string, pwl PathWhitelistMap) error { return ExtractTar(rdr, target, false, user.NewBlankUidRange(), pwl) }
func runImageRender(cmd *cobra.Command, args []string) (exit int) { if len(args) != 2 { cmd.Usage() return 254 } outputDir := args[1] s, err := imagestore.NewStore(storeDir()) if err != nil { stderr.PrintE("cannot open store", err) return 254 } ts, err := treestore.NewStore(treeStoreDir(), s) if err != nil { stderr.PrintE("cannot open store", err) return } key, err := getStoreKeyFromAppOrHash(s, args[0]) if err != nil { stderr.Error(err) return 254 } id, _, err := ts.Render(key, false) if err != nil { stderr.PrintE("error rendering ACI", err) return 254 } if _, err := ts.Check(id); err != nil { stderr.Print("warning: tree cache is in a bad state. Rebuilding...") var err error if id, _, err = ts.Render(key, true); err != nil { stderr.PrintE("error rendering ACI", err) return 254 } } if _, err := os.Stat(outputDir); err == nil { if !flagRenderOverwrite { stderr.Print("output directory exists (try --overwrite)") return 254 } // don't allow the user to delete the root filesystem by mistake if outputDir == "/" { stderr.Print("this would delete your root filesystem. Refusing.") return 254 } if err := os.RemoveAll(outputDir); err != nil { stderr.PrintE("error removing existing output dir", err) return 254 } } rootfsOutDir := outputDir if !flagRenderRootfsOnly { if err := os.MkdirAll(outputDir, 0755); err != nil { stderr.PrintE("error creating output directory", err) return 254 } rootfsOutDir = filepath.Join(rootfsOutDir, "rootfs") manifest, err := s.GetImageManifest(key) if err != nil { stderr.PrintE("error getting manifest", err) return 254 } mb, err := json.Marshal(manifest) if err != nil { stderr.PrintE("error marshalling image manifest", err) return 254 } if err := ioutil.WriteFile(filepath.Join(outputDir, "manifest"), mb, 0700); err != nil { stderr.PrintE("error writing image manifest", err) return 254 } } cachedTreePath := ts.GetRootFS(id) if err := fileutil.CopyTree(cachedTreePath, rootfsOutDir, user.NewBlankUidRange()); err != nil { stderr.PrintE("error copying ACI rootfs", err) return 254 } return 0 }
// render renders the ACI with the provided key in the treestore. id references // that specific tree store rendered image. // render, to avoid having a rendered ACI with old stale files, requires that // the destination directory doesn't exist (usually remove should be called // before render) func (ts *Store) render(id string, key string) (string, error) { treepath := ts.GetPath(id) fi, _ := os.Stat(treepath) if fi != nil { return "", fmt.Errorf("path %s already exists", treepath) } imageID, err := types.NewHash(key) if err != nil { return "", errwrap.Wrap(errors.New("cannot convert key to imageID"), err) } if err := os.MkdirAll(treepath, 0755); err != nil { return "", errwrap.Wrap(fmt.Errorf("cannot create treestore directory %s", treepath), err) } err = aci.RenderACIWithImageID(*imageID, treepath, ts.store, user.NewBlankUidRange()) if err != nil { return "", errwrap.Wrap(errors.New("cannot render aci"), err) } hash, err := ts.Hash(id) if err != nil { return "", errwrap.Wrap(errors.New("cannot calculate tree hash"), err) } err = ioutil.WriteFile(filepath.Join(treepath, hashfilename), []byte(hash), 0644) if err != nil { return "", errwrap.Wrap(errors.New("cannot write hash file"), err) } // before creating the "rendered" flag file we need to ensure that all data is fsynced dfd, err := syscall.Open(treepath, syscall.O_RDONLY, 0) if err != nil { return "", err } defer syscall.Close(dfd) if err := sys.Syncfs(dfd); err != nil { return "", errwrap.Wrap(errors.New("failed to sync data"), err) } // Create rendered file f, err := os.Create(filepath.Join(treepath, renderedfilename)) if err != nil { return "", errwrap.Wrap(errors.New("failed to write rendered file"), err) } f.Close() // Write the hash of the image that will use this tree store err = ioutil.WriteFile(filepath.Join(treepath, imagefilename), []byte(key), 0644) if err != nil { return "", errwrap.Wrap(errors.New("cannot write image file"), err) } if err := syscall.Fsync(dfd); err != nil { return "", errwrap.Wrap(errors.New("failed to sync tree store directory"), err) } // TODO(sgotti) this is wrong for various reasons: // * Doesn't consider that can there can be multiple treestore per ACI // (and fixing this adding/subtracting sizes is bad since cannot be // atomic and could bring to duplicated/missing subtractions causing // wrong sizes) // * ImageStore and TreeStore are decoupled (TreeStore should just use acirenderer.ACIRegistry interface) treeSize, err := ts.Size(id) if err != nil { return "", err } if err := ts.store.UpdateTreeStoreSize(key, treeSize); err != nil { return "", err } return string(hash), nil }
func runImageExtract(cmd *cobra.Command, args []string) (exit int) { if len(args) != 2 { cmd.Usage() return 254 } outputDir := args[1] s, err := imagestore.NewStore(storeDir()) if err != nil { stderr.PrintE("cannot open store", err) return 254 } key, err := getStoreKeyFromAppOrHash(s, args[0]) if err != nil { stderr.Error(err) return 254 } aci, err := s.ReadStream(key) if err != nil { stderr.PrintE("error reading ACI from the store", err) return 254 } // ExtractTar needs an absolute path absOutputDir, err := filepath.Abs(outputDir) if err != nil { stderr.PrintE("error converting output to an absolute path", err) return 254 } if _, err := os.Stat(absOutputDir); err == nil { if !flagExtractOverwrite { stderr.Print("output directory exists (try --overwrite)") return 254 } // don't allow the user to delete the root filesystem by mistake if absOutputDir == "/" { stderr.Print("this would delete your root filesystem. Refusing.") return 254 } if err := os.RemoveAll(absOutputDir); err != nil { stderr.PrintE("error removing existing output dir", err) return 254 } } // if the user only asks for the rootfs we extract the image to a temporary // directory and then move/copy the rootfs to the output directory, if not // we just extract the image to the output directory extractDir := absOutputDir if flagExtractRootfsOnly { rktTmpDir, err := s.TmpDir() if err != nil { stderr.PrintE("error creating rkt temporary directory", err) return 254 } tmpDir, err := ioutil.TempDir(rktTmpDir, "rkt-image-extract-") if err != nil { stderr.PrintE("error creating temporary directory", err) return 254 } defer os.RemoveAll(tmpDir) extractDir = tmpDir } else { if err := os.MkdirAll(absOutputDir, 0755); err != nil { stderr.PrintE("error creating output directory", err) return 254 } } if err := tar.ExtractTar(aci, extractDir, false, user.NewBlankUidRange(), nil); err != nil { stderr.PrintE("error extracting ACI", err) return 254 } if flagExtractRootfsOnly { rootfsDir := filepath.Join(extractDir, "rootfs") if err := os.Rename(rootfsDir, absOutputDir); err != nil { if e, ok := err.(*os.LinkError); ok && e.Err == syscall.EXDEV { // it's on a different device, fall back to copying if err := fileutil.CopyTree(rootfsDir, absOutputDir, user.NewBlankUidRange()); err != nil { stderr.PrintE("error copying ACI rootfs", err) return 254 } } else { stderr.PrintE("error moving ACI rootfs", err) return 254 } } } return 0 }
func runExport(cmd *cobra.Command, args []string) (exit int) { if len(args) != 2 { cmd.Usage() return 1 } outACI := args[1] ext := filepath.Ext(outACI) if ext != schema.ACIExtension { stderr.Printf("extension must be %s (given %s)", schema.ACIExtension, outACI) return 1 } p, err := getPodFromUUIDString(args[0]) if err != nil { stderr.PrintE("problem retrieving pod", err) return 1 } defer p.Close() if !p.isExited { stderr.Print("pod is not exited. Only exited pods can be exported") return 1 } app, err := getApp(p) if err != nil { stderr.PrintE("unable to find app", err) return 1 } root := common.AppPath(p.path(), app.Name) manifestPath := filepath.Join(common.AppInfoPath(p.path(), app.Name), aci.ManifestFile) if p.usesOverlay() { tmpDir := filepath.Join(getDataDir(), "tmp") if err := os.MkdirAll(tmpDir, common.DefaultRegularDirPerm); err != nil { stderr.PrintE("unable to create temp directory", err) return 1 } podDir, err := ioutil.TempDir(tmpDir, fmt.Sprintf("rkt-export-%s", p.uuid)) if err != nil { stderr.PrintE("unable to create export temp directory", err) return 1 } defer func() { if err := os.RemoveAll(podDir); err != nil { stderr.PrintE("problem removing temp directory", err) exit = 1 } }() mntDir := filepath.Join(podDir, "rootfs") if err := os.Mkdir(mntDir, common.DefaultRegularDirPerm); err != nil { stderr.PrintE("unable to create rootfs directory inside temp directory", err) return 1 } if err := mountOverlay(p, app, mntDir); err != nil { stderr.PrintE(fmt.Sprintf("couldn't mount directory at %s", mntDir), err) return 1 } defer func() { if err := syscall.Unmount(mntDir, 0); err != nil { stderr.PrintE(fmt.Sprintf("error unmounting directory %s", mntDir), err) exit = 1 } }() root = podDir } else { hasMPs, err := appHasMountpoints(p.path(), app.Name) if err != nil { stderr.PrintE("error parsing mountpoints", err) return 1 } if hasMPs { stderr.Printf("pod has remaining mountpoints. Only pods using overlayfs or with no mountpoints can be exported") return 1 } } // Check for user namespace (--private-user), if in use get uidRange var uidRange *user.UidRange privUserFile := filepath.Join(p.path(), common.PrivateUsersPreparedFilename) privUserContent, err := ioutil.ReadFile(privUserFile) if err == nil { uidRange = user.NewBlankUidRange() // The file was found, save uid & gid shift and count if err := uidRange.Deserialize(privUserContent); err != nil { stderr.PrintE(fmt.Sprintf("problem deserializing the content of %s", common.PrivateUsersPreparedFilename), err) return 1 } } if err = buildAci(root, manifestPath, outACI, uidRange); err != nil { stderr.PrintE("error building aci", err) return 1 } return 0 }
// TODO(iaguis): add override options for Exec, Environment (à la patch-manifest) func AddApp(cfg RunConfig, dir string, img *types.Hash) error { im, err := cfg.Store.GetImageManifest(img.String()) if err != nil { return err } appName, err := imageNameToAppName(im.Name) if err != nil { return err } p, err := stage1types.LoadPod(dir, cfg.UUID) if err != nil { return errwrap.Wrap(errors.New("error loading pod manifest"), err) } pm := p.Manifest var mutable bool ms, ok := pm.Annotations.Get("coreos.com/rkt/stage1/mutable") if ok { mutable, err = strconv.ParseBool(ms) if err != nil { return errwrap.Wrap(errors.New("error parsing mutable annotation"), err) } } if !mutable { return errors.New("immutable pod: cannot add application") } if pm.Apps.Get(*appName) != nil { return fmt.Errorf("error: multiple apps with name %s", *appName) } if im.App == nil { return fmt.Errorf("error: image %s has no app section)", img) } appInfoDir := common.AppInfoPath(dir, *appName) if err := os.MkdirAll(appInfoDir, common.DefaultRegularDirPerm); err != nil { return errwrap.Wrap(errors.New("error creating apps info directory"), err) } uidRange := user.NewBlankUidRange() // TODO(iaguis): DRY: refactor this var treeStoreID string if cfg.UseOverlay { treeStoreID, _, err := cfg.TreeStore.Render(img.String(), false) if err != nil { return errwrap.Wrap(errors.New("error rendering tree image"), err) } hash, err := cfg.TreeStore.Check(treeStoreID) if err != nil { log.PrintE("warning: tree cache is in a bad state. Rebuilding...", err) var err error treeStoreID, hash, err = cfg.TreeStore.Render(img.String(), true) if err != nil { return errwrap.Wrap(errors.New("error rendering tree image"), err) } } cfg.RootHash = hash if err := ioutil.WriteFile(common.AppTreeStoreIDPath(dir, *appName), []byte(treeStoreID), common.DefaultRegularFilePerm); err != nil { return errwrap.Wrap(errors.New("error writing app treeStoreID"), err) } } else { ad := common.AppPath(dir, *appName) err := os.MkdirAll(ad, common.DefaultRegularDirPerm) if err != nil { return errwrap.Wrap(errors.New("error creating image directory"), err) } privateUsers, err := preparedWithPrivateUsers(dir) if err != nil { log.FatalE("error reading user namespace information", err) } if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { return err } shiftedUid, shiftedGid, err := uidRange.ShiftRange(uint32(os.Getuid()), uint32(os.Getgid())) if err != nil { return errwrap.Wrap(errors.New("error getting uid, gid"), err) } if err := os.Chown(ad, int(shiftedUid), int(shiftedGid)); err != nil { return errwrap.Wrap(fmt.Errorf("error shifting app %q's stage2 dir", *appName), err) } if err := aci.RenderACIWithImageID(*img, ad, cfg.Store, uidRange); err != nil { return errwrap.Wrap(errors.New("error rendering ACI"), err) } } if err := writeManifest(*cfg.CommonConfig, *img, appInfoDir); err != nil { return errwrap.Wrap(errors.New("error writing manifest"), err) } if err := setupAppImage(cfg, *appName, *img, dir, cfg.UseOverlay); err != nil { return fmt.Errorf("error setting up app image: %v", err) } if cfg.UseOverlay { imgDir := filepath.Join(dir, "overlay", treeStoreID) if err := os.Chown(imgDir, -1, cfg.RktGid); err != nil { return err } } ra := schema.RuntimeApp{ Name: *appName, App: im.App, Image: schema.RuntimeImage{ Name: &im.Name, ID: *img, Labels: im.Labels, }, // TODO(iaguis): default isolators } env := ra.App.Environment env.Set("AC_APP_NAME", appName.String()) envFilePath := filepath.Join(common.Stage1RootfsPath(dir), "rkt", "env", appName.String()) if err := common.WriteEnvFile(env, uidRange, envFilePath); err != nil { return err } apps := append(p.Manifest.Apps, ra) p.Manifest.Apps = apps if err := updatePodManifest(dir, p.Manifest); err != nil { return err } if _, err := os.Create(common.AppCreatedPath(p.Root, appName.String())); err != nil { return err } return nil }
func (uw *UnitWriter) AppUnit( ra *schema.RuntimeApp, binPath, privateUsers string, insecureOptions Stage1InsecureOptions, opts ...*unit.UnitOption, ) { if uw.err != nil { return } flavor, systemdVersion, err := GetFlavor(uw.p) if err != nil { uw.err = errwrap.Wrap(errors.New("unable to determine stage1 flavor"), err) return } app := ra.App appName := ra.Name imgName := uw.p.AppNameToImageName(appName) if len(app.Exec) == 0 { uw.err = fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) return } env := app.Environment env.Set("AC_APP_NAME", appName.String()) if uw.p.MetadataServiceURL != "" { env.Set("AC_METADATA_URL", uw.p.MetadataServiceURL) } envFilePath := EnvFilePath(uw.p.Root, appName) uidRange := user.NewBlankUidRange() if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { uw.err = err return } if err := common.WriteEnvFile(env, uidRange, envFilePath); err != nil { uw.err = errwrap.Wrap(errors.New("unable to write environment file for systemd"), err) return } u, g, err := parseUserGroup(uw.p, ra, uidRange) if err != nil { uw.err = err return } if err := generateSysusers(uw.p, ra, u, g, uidRange); err != nil { uw.err = errwrap.Wrap(errors.New("unable to generate sysusers"), err) return } var supplementaryGroups []string for _, g := range app.SupplementaryGIDs { supplementaryGroups = append(supplementaryGroups, strconv.Itoa(g)) } capabilitiesStr, err := getAppCapabilities(app.Isolators) if err != nil { uw.err = err return } execStart := append([]string{binPath}, app.Exec[1:]...) execStartString := quoteExec(execStart) opts = append(opts, []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", execStartString), unit.NewUnitOption("Service", "RootDirectory", common.RelAppRootfsPath(appName)), // MountFlags=shared creates a new mount namespace and (as unintuitive // as it might seem) makes sure the mount is slave+shared. unit.NewUnitOption("Service", "MountFlags", "shared"), unit.NewUnitOption("Service", "WorkingDirectory", app.WorkingDirectory), unit.NewUnitOption("Service", "EnvironmentFile", RelEnvFilePath(appName)), unit.NewUnitOption("Service", "User", strconv.Itoa(u)), unit.NewUnitOption("Service", "Group", strconv.Itoa(g)), // This helps working around a race // (https://github.com/systemd/systemd/issues/2913) that causes the // systemd unit name not getting written to the journal if the unit is // short-lived and runs as non-root. unit.NewUnitOption("Service", "SyslogIdentifier", appName.String()), }...) if len(supplementaryGroups) > 0 { opts = appendOptionsList(opts, "Service", "SupplementaryGroups", "", supplementaryGroups) } if supportsNotify(uw.p, appName.String()) { opts = append(opts, unit.NewUnitOption("Service", "Type", "notify")) } if !insecureOptions.DisableCapabilities { opts = append(opts, unit.NewUnitOption("Service", "CapabilityBoundingSet", strings.Join(capabilitiesStr, " "))) } noNewPrivileges := getAppNoNewPrivileges(app.Isolators) // Apply seccomp isolator, if any and not opt-ing out; // see https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter= if !insecureOptions.DisableSeccomp { var forceNoNewPrivileges bool unprivileged := (u != 0) opts, forceNoNewPrivileges, err = getSeccompFilter(opts, uw.p, unprivileged, app.Isolators) if err != nil { uw.err = err return } // Seccomp filters require NoNewPrivileges for unprivileged apps, that may override // manifest annotation. if forceNoNewPrivileges { noNewPrivileges = true } } opts = append(opts, unit.NewUnitOption("Service", "NoNewPrivileges", strconv.FormatBool(noNewPrivileges))) if ra.ReadOnlyRootFS { opts = append(opts, unit.NewUnitOption("Service", "ReadOnlyDirectories", common.RelAppRootfsPath(appName))) } // TODO(tmrts): Extract this logic into a utility function. vols := make(map[types.ACName]types.Volume) for _, v := range uw.p.Manifest.Volumes { vols[v.Name] = v } absRoot, err := filepath.Abs(uw.p.Root) // Absolute path to the pod's rootfs. if err != nil { uw.err = err return } appRootfs := common.AppRootfsPath(absRoot, appName) rwDirs := []string{} imageManifest := uw.p.Images[appName.String()] mounts := GenerateMounts(ra, vols, imageManifest) for _, m := range mounts { mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path) if err != nil { uw.err = err return } if !IsMountReadOnly(vols[m.Volume], app.MountPoints) { rwDirs = append(rwDirs, filepath.Join(common.RelAppRootfsPath(appName), mntPath)) } } if len(rwDirs) > 0 { opts = appendOptionsList(opts, "Service", "ReadWriteDirectories", "", rwDirs) } // Restrict access to sensitive paths (eg. procfs and sysfs entries). if !insecureOptions.DisablePaths { opts = protectKernelTunables(opts, appName, systemdVersion) } // Generate default device policy for the app, as well as the list of allowed devices. // For kvm flavor, devices are VM-specific and restricting them is not strictly needed. if !insecureOptions.DisablePaths && flavor != "kvm" { opts = append(opts, unit.NewUnitOption("Service", "DevicePolicy", "closed")) deviceAllows, err := generateDeviceAllows(common.Stage1RootfsPath(absRoot), appName, app.MountPoints, mounts, vols, uidRange) if err != nil { uw.err = err return } for _, dev := range deviceAllows { opts = append(opts, unit.NewUnitOption("Service", "DeviceAllow", dev)) } } // When an app fails, we shut down the pod opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target")) for _, eh := range app.EventHandlers { var typ string switch eh.Name { case "pre-start": typ = "ExecStartPre" case "post-stop": typ = "ExecStopPost" default: uw.err = fmt.Errorf("unrecognized eventHandler: %v", eh.Name) return } exec := quoteExec(eh.Exec) opts = append(opts, unit.NewUnitOption("Service", typ, exec)) } // Some pre-start jobs take a long time, set the timeout to 0 opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0")) var saPorts []types.Port for _, p := range app.Ports { if p.SocketActivated { saPorts = append(saPorts, p) } } doWithIsolator := func(isolator string, f func() error) bool { ok, err := cgroup.IsIsolatorSupported(isolator) if err != nil { uw.err = err return true } if !ok { fmt.Fprintf(os.Stderr, "warning: resource/%s isolator set but support disabled in the kernel, skipping\n", isolator) } if err := f(); err != nil { uw.err = err return true } return false } exit := false for _, i := range app.Isolators { if exit { return } switch v := i.Value().(type) { case *types.ResourceMemory: exit = doWithIsolator("memory", func() error { if v.Limit() == nil { return nil } opts = append(opts, unit.NewUnitOption("Service", "MemoryLimit", strconv.Itoa(int(v.Limit().Value())))) return nil }) case *types.ResourceCPU: exit = doWithIsolator("cpu", func() error { if v.Limit() == nil { return nil } if v.Limit().Value() > resource.MaxMilliValue { return fmt.Errorf("cpu limit exceeds the maximum millivalue: %v", v.Limit().String()) } quota := strconv.Itoa(int(v.Limit().MilliValue()/10)) + "%" opts = append(opts, unit.NewUnitOption("Service", "CPUQuota", quota)) return nil }) } } if len(saPorts) > 0 { sockopts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Socket", "BindIPv6Only", "both"), unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)), } for _, sap := range saPorts { var proto string switch sap.Protocol { case "tcp": proto = "ListenStream" case "udp": proto = "ListenDatagram" default: uw.err = fmt.Errorf("unrecognized protocol: %v", sap.Protocol) return } // We find the host port for the pod's port and use that in the // socket unit file. // This is so because systemd inside the pod will match based on // the socket port number, and since the socket was created on the // host, it will have the host port number. port := findHostPort(*uw.p.Manifest, sap.Name) if port == 0 { log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port) port = sap.Port } sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port))) } file, err := os.OpenFile(SocketUnitPath(uw.p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { uw.err = errwrap.Wrap(errors.New("failed to create socket file"), err) return } defer file.Close() if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil { uw.err = errwrap.Wrap(errors.New("failed to write socket unit file"), err) return } if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(uw.p.Root, appName)); err != nil { uw.err = errwrap.Wrap(errors.New("failed to link socket want"), err) return } opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName))) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "Requires", "sysusers.service")) opts = append(opts, unit.NewUnitOption("Unit", "After", "sysusers.service")) uw.WriteUnit(ServiceUnitPath(uw.p.Root, appName), "failed to create service unit file", opts...) uw.Activate(ServiceUnitName(appName), ServiceWantPath(uw.p.Root, appName)) }