func AddApp(cfg AddConfig) error { // there should be only one app in the config app := cfg.Apps.Last() if app == nil { return errors.New("no image specified") } am, err := cfg.Store.GetImageManifest(cfg.Image.String()) if err != nil { return err } var appName *types.ACName if app.Name != "" { appName, err = types.NewACName(app.Name) if err != nil { return err } } else { appName, err = imageNameToAppName(am.Name) if err != nil { return err } } pod, err := pkgPod.PodFromUUIDString(cfg.DataDir, cfg.UUID.String()) if err != nil { return errwrap.Wrap(errors.New("error loading pod"), err) } defer pod.Close() debug("locking pod manifest") if err := pod.ExclusiveLockManifest(); err != nil { return errwrap.Wrap(errors.New("failed to lock pod manifest"), err) } defer pod.UnlockManifest() pm, err := pod.SandboxManifest() if err != nil { return errwrap.Wrap(errors.New("cannot add application"), err) } if pm.Apps.Get(*appName) != nil { return fmt.Errorf("error: multiple apps with name %s", *appName) } if am.App == nil && app.Exec == "" { return fmt.Errorf("error: image %s has no app section and --exec argument is not provided", cfg.Image) } appInfoDir := common.AppInfoPath(cfg.PodPath, *appName) if err := os.MkdirAll(appInfoDir, common.DefaultRegularDirPerm); err != nil { return errwrap.Wrap(errors.New("error creating apps info directory"), err) } pcfg := PrepareConfig{ CommonConfig: cfg.CommonConfig, PrivateUsers: user.NewBlankUidRange(), } if cfg.UsesOverlay { privateUsers, err := preparedWithPrivateUsers(cfg.PodPath) if err != nil { log.FatalE("error reading user namespace information", err) } if err := pcfg.PrivateUsers.Deserialize([]byte(privateUsers)); err != nil { return err } } treeStoreID, err := prepareAppImage(pcfg, *appName, cfg.Image, cfg.PodPath, cfg.UsesOverlay) if err != nil { return errwrap.Wrap(fmt.Errorf("error preparing image %s", cfg.Image), err) } rcfg := RunConfig{ CommonConfig: cfg.CommonConfig, UseOverlay: cfg.UsesOverlay, RktGid: cfg.RktGid, } if err := setupAppImage(rcfg, *appName, cfg.Image, cfg.PodPath, cfg.UsesOverlay); err != nil { return fmt.Errorf("error setting up app image: %v", err) } if cfg.UsesOverlay { imgDir := filepath.Join(cfg.PodPath, "overlay", treeStoreID) if err := os.Chown(imgDir, -1, cfg.RktGid); err != nil { return err } } ra := schema.RuntimeApp{ Name: *appName, App: am.App, Image: schema.RuntimeImage{ Name: &am.Name, ID: cfg.Image, Labels: am.Labels, }, Mounts: MergeMounts(cfg.Apps.Mounts, app.Mounts), ReadOnlyRootFS: app.ReadOnlyRootFS, } if app.Exec != "" { // Create a minimal App section if not present if am.App == nil { ra.App = &types.App{ User: strconv.Itoa(os.Getuid()), Group: strconv.Itoa(os.Getgid()), } } ra.App.Exec = []string{app.Exec} } if app.Args != nil { ra.App.Exec = append(ra.App.Exec, app.Args...) } if app.WorkingDir != "" { ra.App.WorkingDirectory = app.WorkingDir } if err := prepareIsolators(app, ra.App); err != nil { return err } if app.User != "" { ra.App.User = app.User } if app.Group != "" { ra.App.Group = app.Group } if app.SupplementaryGIDs != nil { ra.App.SupplementaryGIDs = app.SupplementaryGIDs } if app.UserAnnotations != nil { ra.App.UserAnnotations = app.UserAnnotations } if app.UserLabels != nil { ra.App.UserLabels = app.UserLabels } if app.Environments != nil { envs := make([]string, 0, len(app.Environments)) for name, value := range app.Environments { envs = append(envs, fmt.Sprintf("%s=%s", name, value)) } // Let the app level environment override the environment variables. mergeEnvs(&ra.App.Environment, envs, true) } env := ra.App.Environment env.Set("AC_APP_NAME", appName.String()) envFilePath := filepath.Join(common.Stage1RootfsPath(cfg.PodPath), "rkt", "env", appName.String()) if err := common.WriteEnvFile(env, pcfg.PrivateUsers, envFilePath); err != nil { return err } debug("adding app to sandbox") pm.Apps = append(pm.Apps, ra) if err := pod.UpdateManifest(pm, cfg.PodPath); err != nil { return err } args := []string{ fmt.Sprintf("--debug=%t", cfg.Debug), fmt.Sprintf("--uuid=%s", cfg.UUID), fmt.Sprintf("--app=%s", appName), } if _, err := os.Create(common.AppCreatedPath(pod.Path(), appName.String())); err != nil { return err } ce := CrossingEntrypoint{ PodPath: cfg.PodPath, PodPID: cfg.PodPID, AppName: appName.String(), EntrypointName: appAddEntrypoint, EntrypointArgs: args, Interactive: false, } if err := ce.Run(); err != nil { return err } return nil }
// TODO(iaguis): add override options for Exec, Environment (à la patch-manifest) func AddApp(cfg RunConfig, dir string, img *types.Hash) error { im, err := cfg.Store.GetImageManifest(img.String()) if err != nil { return err } appName, err := imageNameToAppName(im.Name) if err != nil { return err } p, err := stage1types.LoadPod(dir, cfg.UUID) if err != nil { return errwrap.Wrap(errors.New("error loading pod manifest"), err) } pm := p.Manifest var mutable bool ms, ok := pm.Annotations.Get("coreos.com/rkt/stage1/mutable") if ok { mutable, err = strconv.ParseBool(ms) if err != nil { return errwrap.Wrap(errors.New("error parsing mutable annotation"), err) } } if !mutable { return errors.New("immutable pod: cannot add application") } if pm.Apps.Get(*appName) != nil { return fmt.Errorf("error: multiple apps with name %s", *appName) } if im.App == nil { return fmt.Errorf("error: image %s has no app section)", img) } appInfoDir := common.AppInfoPath(dir, *appName) if err := os.MkdirAll(appInfoDir, common.DefaultRegularDirPerm); err != nil { return errwrap.Wrap(errors.New("error creating apps info directory"), err) } uidRange := user.NewBlankUidRange() // TODO(iaguis): DRY: refactor this var treeStoreID string if cfg.UseOverlay { treeStoreID, _, err := cfg.TreeStore.Render(img.String(), false) if err != nil { return errwrap.Wrap(errors.New("error rendering tree image"), err) } hash, err := cfg.TreeStore.Check(treeStoreID) if err != nil { log.PrintE("warning: tree cache is in a bad state. Rebuilding...", err) var err error treeStoreID, hash, err = cfg.TreeStore.Render(img.String(), true) if err != nil { return errwrap.Wrap(errors.New("error rendering tree image"), err) } } cfg.RootHash = hash if err := ioutil.WriteFile(common.AppTreeStoreIDPath(dir, *appName), []byte(treeStoreID), common.DefaultRegularFilePerm); err != nil { return errwrap.Wrap(errors.New("error writing app treeStoreID"), err) } } else { ad := common.AppPath(dir, *appName) err := os.MkdirAll(ad, common.DefaultRegularDirPerm) if err != nil { return errwrap.Wrap(errors.New("error creating image directory"), err) } privateUsers, err := preparedWithPrivateUsers(dir) if err != nil { log.FatalE("error reading user namespace information", err) } if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { return err } shiftedUid, shiftedGid, err := uidRange.ShiftRange(uint32(os.Getuid()), uint32(os.Getgid())) if err != nil { return errwrap.Wrap(errors.New("error getting uid, gid"), err) } if err := os.Chown(ad, int(shiftedUid), int(shiftedGid)); err != nil { return errwrap.Wrap(fmt.Errorf("error shifting app %q's stage2 dir", *appName), err) } if err := aci.RenderACIWithImageID(*img, ad, cfg.Store, uidRange); err != nil { return errwrap.Wrap(errors.New("error rendering ACI"), err) } } if err := writeManifest(*cfg.CommonConfig, *img, appInfoDir); err != nil { return errwrap.Wrap(errors.New("error writing manifest"), err) } if err := setupAppImage(cfg, *appName, *img, dir, cfg.UseOverlay); err != nil { return fmt.Errorf("error setting up app image: %v", err) } if cfg.UseOverlay { imgDir := filepath.Join(dir, "overlay", treeStoreID) if err := os.Chown(imgDir, -1, cfg.RktGid); err != nil { return err } } ra := schema.RuntimeApp{ Name: *appName, App: im.App, Image: schema.RuntimeImage{ Name: &im.Name, ID: *img, Labels: im.Labels, }, // TODO(iaguis): default isolators } env := ra.App.Environment env.Set("AC_APP_NAME", appName.String()) envFilePath := filepath.Join(common.Stage1RootfsPath(dir), "rkt", "env", appName.String()) if err := common.WriteEnvFile(env, uidRange, envFilePath); err != nil { return err } apps := append(p.Manifest.Apps, ra) p.Manifest.Apps = apps if err := updatePodManifest(dir, p.Manifest); err != nil { return err } if _, err := os.Create(common.AppCreatedPath(p.Root, appName.String())); err != nil { return err } return nil }
func (uw *UnitWriter) AppUnit( ra *schema.RuntimeApp, binPath, privateUsers string, insecureOptions Stage1InsecureOptions, opts ...*unit.UnitOption, ) { if uw.err != nil { return } flavor, systemdVersion, err := GetFlavor(uw.p) if err != nil { uw.err = errwrap.Wrap(errors.New("unable to determine stage1 flavor"), err) return } app := ra.App appName := ra.Name imgName := uw.p.AppNameToImageName(appName) if len(app.Exec) == 0 { uw.err = fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) return } env := app.Environment env.Set("AC_APP_NAME", appName.String()) if uw.p.MetadataServiceURL != "" { env.Set("AC_METADATA_URL", uw.p.MetadataServiceURL) } envFilePath := EnvFilePath(uw.p.Root, appName) uidRange := user.NewBlankUidRange() if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { uw.err = err return } if err := common.WriteEnvFile(env, uidRange, envFilePath); err != nil { uw.err = errwrap.Wrap(errors.New("unable to write environment file for systemd"), err) return } u, g, err := parseUserGroup(uw.p, ra, uidRange) if err != nil { uw.err = err return } if err := generateSysusers(uw.p, ra, u, g, uidRange); err != nil { uw.err = errwrap.Wrap(errors.New("unable to generate sysusers"), err) return } var supplementaryGroups []string for _, g := range app.SupplementaryGIDs { supplementaryGroups = append(supplementaryGroups, strconv.Itoa(g)) } capabilitiesStr, err := getAppCapabilities(app.Isolators) if err != nil { uw.err = err return } execStart := append([]string{binPath}, app.Exec[1:]...) execStartString := quoteExec(execStart) opts = append(opts, []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", execStartString), unit.NewUnitOption("Service", "RootDirectory", common.RelAppRootfsPath(appName)), // MountFlags=shared creates a new mount namespace and (as unintuitive // as it might seem) makes sure the mount is slave+shared. unit.NewUnitOption("Service", "MountFlags", "shared"), unit.NewUnitOption("Service", "WorkingDirectory", app.WorkingDirectory), unit.NewUnitOption("Service", "EnvironmentFile", RelEnvFilePath(appName)), unit.NewUnitOption("Service", "User", strconv.Itoa(u)), unit.NewUnitOption("Service", "Group", strconv.Itoa(g)), // This helps working around a race // (https://github.com/systemd/systemd/issues/2913) that causes the // systemd unit name not getting written to the journal if the unit is // short-lived and runs as non-root. unit.NewUnitOption("Service", "SyslogIdentifier", appName.String()), }...) if len(supplementaryGroups) > 0 { opts = appendOptionsList(opts, "Service", "SupplementaryGroups", "", supplementaryGroups) } if supportsNotify(uw.p, appName.String()) { opts = append(opts, unit.NewUnitOption("Service", "Type", "notify")) } if !insecureOptions.DisableCapabilities { opts = append(opts, unit.NewUnitOption("Service", "CapabilityBoundingSet", strings.Join(capabilitiesStr, " "))) } noNewPrivileges := getAppNoNewPrivileges(app.Isolators) // Apply seccomp isolator, if any and not opt-ing out; // see https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter= if !insecureOptions.DisableSeccomp { var forceNoNewPrivileges bool unprivileged := (u != 0) opts, forceNoNewPrivileges, err = getSeccompFilter(opts, uw.p, unprivileged, app.Isolators) if err != nil { uw.err = err return } // Seccomp filters require NoNewPrivileges for unprivileged apps, that may override // manifest annotation. if forceNoNewPrivileges { noNewPrivileges = true } } opts = append(opts, unit.NewUnitOption("Service", "NoNewPrivileges", strconv.FormatBool(noNewPrivileges))) if ra.ReadOnlyRootFS { opts = append(opts, unit.NewUnitOption("Service", "ReadOnlyDirectories", common.RelAppRootfsPath(appName))) } // TODO(tmrts): Extract this logic into a utility function. vols := make(map[types.ACName]types.Volume) for _, v := range uw.p.Manifest.Volumes { vols[v.Name] = v } absRoot, err := filepath.Abs(uw.p.Root) // Absolute path to the pod's rootfs. if err != nil { uw.err = err return } appRootfs := common.AppRootfsPath(absRoot, appName) rwDirs := []string{} imageManifest := uw.p.Images[appName.String()] mounts := GenerateMounts(ra, vols, imageManifest) for _, m := range mounts { mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path) if err != nil { uw.err = err return } if !IsMountReadOnly(vols[m.Volume], app.MountPoints) { rwDirs = append(rwDirs, filepath.Join(common.RelAppRootfsPath(appName), mntPath)) } } if len(rwDirs) > 0 { opts = appendOptionsList(opts, "Service", "ReadWriteDirectories", "", rwDirs) } // Restrict access to sensitive paths (eg. procfs and sysfs entries). if !insecureOptions.DisablePaths { opts = protectKernelTunables(opts, appName, systemdVersion) } // Generate default device policy for the app, as well as the list of allowed devices. // For kvm flavor, devices are VM-specific and restricting them is not strictly needed. if !insecureOptions.DisablePaths && flavor != "kvm" { opts = append(opts, unit.NewUnitOption("Service", "DevicePolicy", "closed")) deviceAllows, err := generateDeviceAllows(common.Stage1RootfsPath(absRoot), appName, app.MountPoints, mounts, vols, uidRange) if err != nil { uw.err = err return } for _, dev := range deviceAllows { opts = append(opts, unit.NewUnitOption("Service", "DeviceAllow", dev)) } } // When an app fails, we shut down the pod opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target")) for _, eh := range app.EventHandlers { var typ string switch eh.Name { case "pre-start": typ = "ExecStartPre" case "post-stop": typ = "ExecStopPost" default: uw.err = fmt.Errorf("unrecognized eventHandler: %v", eh.Name) return } exec := quoteExec(eh.Exec) opts = append(opts, unit.NewUnitOption("Service", typ, exec)) } // Some pre-start jobs take a long time, set the timeout to 0 opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0")) var saPorts []types.Port for _, p := range app.Ports { if p.SocketActivated { saPorts = append(saPorts, p) } } doWithIsolator := func(isolator string, f func() error) bool { ok, err := cgroup.IsIsolatorSupported(isolator) if err != nil { uw.err = err return true } if !ok { fmt.Fprintf(os.Stderr, "warning: resource/%s isolator set but support disabled in the kernel, skipping\n", isolator) } if err := f(); err != nil { uw.err = err return true } return false } exit := false for _, i := range app.Isolators { if exit { return } switch v := i.Value().(type) { case *types.ResourceMemory: exit = doWithIsolator("memory", func() error { if v.Limit() == nil { return nil } opts = append(opts, unit.NewUnitOption("Service", "MemoryLimit", strconv.Itoa(int(v.Limit().Value())))) return nil }) case *types.ResourceCPU: exit = doWithIsolator("cpu", func() error { if v.Limit() == nil { return nil } if v.Limit().Value() > resource.MaxMilliValue { return fmt.Errorf("cpu limit exceeds the maximum millivalue: %v", v.Limit().String()) } quota := strconv.Itoa(int(v.Limit().MilliValue()/10)) + "%" opts = append(opts, unit.NewUnitOption("Service", "CPUQuota", quota)) return nil }) } } if len(saPorts) > 0 { sockopts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Socket", "BindIPv6Only", "both"), unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)), } for _, sap := range saPorts { var proto string switch sap.Protocol { case "tcp": proto = "ListenStream" case "udp": proto = "ListenDatagram" default: uw.err = fmt.Errorf("unrecognized protocol: %v", sap.Protocol) return } // We find the host port for the pod's port and use that in the // socket unit file. // This is so because systemd inside the pod will match based on // the socket port number, and since the socket was created on the // host, it will have the host port number. port := findHostPort(*uw.p.Manifest, sap.Name) if port == 0 { log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port) port = sap.Port } sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port))) } file, err := os.OpenFile(SocketUnitPath(uw.p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { uw.err = errwrap.Wrap(errors.New("failed to create socket file"), err) return } defer file.Close() if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil { uw.err = errwrap.Wrap(errors.New("failed to write socket unit file"), err) return } if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(uw.p.Root, appName)); err != nil { uw.err = errwrap.Wrap(errors.New("failed to link socket want"), err) return } opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName))) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "Requires", "sysusers.service")) opts = append(opts, unit.NewUnitOption("Unit", "After", "sysusers.service")) uw.WriteUnit(ServiceUnitPath(uw.p.Root, appName), "failed to create service unit file", opts...) uw.Activate(ServiceUnitName(appName), ServiceWantPath(uw.p.Root, appName)) }