func writeAppReaper(p *stage1commontypes.Pod, appName string) error { opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("%s Reaper", appName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "StopWhenUnneeded", "yes"), unit.NewUnitOption("Unit", "Wants", "shutdown.service"), unit.NewUnitOption("Unit", "After", "shutdown.service"), unit.NewUnitOption("Unit", "Conflicts", "exit.target"), unit.NewUnitOption("Unit", "Conflicts", "halt.target"), unit.NewUnitOption("Unit", "Conflicts", "poweroff.target"), unit.NewUnitOption("Service", "RemainAfterExit", "yes"), unit.NewUnitOption("Service", "ExecStop", fmt.Sprintf("/reaper.sh %s", appName)), } unitsPath := filepath.Join(common.Stage1RootfsPath(p.Root), UnitsDir) file, err := os.OpenFile(filepath.Join(unitsPath, fmt.Sprintf("reaper-%s.service", appName)), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create service unit file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { return errwrap.Wrap(errors.New("failed to write service unit file"), err) } return nil }
// WriteDefaultTarget writes the default.target unit file // which is responsible for bringing up the applications func WriteDefaultTarget(p *stage1commontypes.Pod) error { opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", "rkt apps target"), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), } for i := range p.Manifest.Apps { ra := &p.Manifest.Apps[i] serviceName := ServiceUnitName(ra.Name) opts = append(opts, unit.NewUnitOption("Unit", "After", serviceName)) opts = append(opts, unit.NewUnitOption("Unit", "Wants", serviceName)) } unitsPath := filepath.Join(common.Stage1RootfsPath(p.Root), UnitsDir) file, err := os.OpenFile(filepath.Join(unitsPath, "default.target"), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) if err != nil { return err } defer file.Close() if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { return err } return nil }
func GenerateNetworkInterfaceUnits(unitsPath string, netDescriptions []netDescriber) error { for i, netDescription := range netDescriptions { ifName := fmt.Sprintf(networking.IfNamePattern, i) netAddress := net.IPNet{ IP: netDescription.GuestIP(), Mask: net.IPMask(netDescription.Mask()), } address := netAddress.String() mac, err := generateMacAddress() if err != nil { return err } opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Network configuration for device: %v", ifName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Service", "Type", "oneshot"), unit.NewUnitOption("Service", "RemainAfterExit", "true"), unit.NewUnitOption("Service", "ExecStartPre", downInterfaceCommand(ifName)), unit.NewUnitOption("Service", "ExecStartPre", setMacCommand(ifName, mac.String())), unit.NewUnitOption("Service", "ExecStartPre", upInterfaceCommand(ifName)), unit.NewUnitOption("Service", "ExecStart", addAddressCommand(address, ifName)), unit.NewUnitOption("Install", "RequiredBy", "default.target"), } for _, route := range netDescription.Routes() { gw := route.GW if gw == nil { gw = netDescription.Gateway() } opts = append( opts, unit.NewUnitOption( "Service", "ExecStartPost", addRouteCommand(route.Dst.String(), gw.String()), ), ) } unitName := fmt.Sprintf("interface-%s", ifName) + ".service" unitBytes, err := ioutil.ReadAll(unit.Serialize(opts)) if err != nil { return errwrap.Wrap(fmt.Errorf("failed to serialize network unit file to bytes %q", unitName), err) } err = ioutil.WriteFile(filepath.Join(unitsPath, unitName), unitBytes, 0644) if err != nil { return errwrap.Wrap(fmt.Errorf("failed to create network unit file %q", unitName), err) } log.Printf("network unit created: %q in %q (iface=%q, addr=%q)", unitName, unitsPath, ifName, address) } return nil }
// AppReaperUnit writes an app reaper service unit for the given app in the given path using the given unit options. func (uw *UnitWriter) AppReaperUnit(appName types.ACName, binPath string, opts ...*unit.UnitOption) { if uw.err != nil { return } opts = append(opts, []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("%s Reaper", appName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "StopWhenUnneeded", "yes"), unit.NewUnitOption("Unit", "Before", "halt.target"), unit.NewUnitOption("Unit", "Conflicts", "exit.target"), unit.NewUnitOption("Unit", "Conflicts", "halt.target"), unit.NewUnitOption("Unit", "Conflicts", "poweroff.target"), unit.NewUnitOption("Service", "RemainAfterExit", "yes"), unit.NewUnitOption("Service", "ExecStop", fmt.Sprintf( "/reaper.sh \"%s\" \"%s\" \"%s\"", appName, common.RelAppRootfsPath(appName), binPath, )), }...) uw.WriteUnit( ServiceUnitPath(uw.p.Root, types.ACName(fmt.Sprintf("reaper-%s", appName))), fmt.Sprintf("failed to write app %q reaper service", appName), opts..., ) }
// protectKernelTunables restricts access to some security-sensitive paths under // /proc and /sys. Entries are either hidden or just made read-only to app. // This protection is enabled by default. func protectKernelTunables(opts []*unit.UnitOption, appName types.ACName, systemdVersion int) []*unit.UnitOption { roPaths := []string{ "/proc/bus/", "/proc/sys/kernel/core_pattern", "/proc/sys/kernel/modprobe", "/proc/sys/vm/panic_on_oom", "/proc/sysrq-trigger", "/sys/block/", "/sys/bus/", "/sys/class/", "/sys/dev/", "/sys/devices/", "/sys/kernel/", } hiddenDirs := []string{ "/sys/firmware/", "/sys/fs/", "/sys/hypervisor/", "/sys/module/", "/sys/power/", } hiddenPaths := []string{ "/proc/config.gz", "/proc/kallsyms", "/proc/sched_debug", "/proc/kcore", "/proc/kmem", "/proc/mem", } // Paths prefixed with "-" are ignored if they do not exist: // https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ReadWriteDirectories= for _, p := range roPaths { opts = append(opts, unit.NewUnitOption("Service", "ReadOnlyDirectories", fmt.Sprintf("-%s", filepath.Join(common.RelAppRootfsPath(appName), p)))) } for _, p := range hiddenDirs { opts = append(opts, unit.NewUnitOption("Service", "InaccessibleDirectories", fmt.Sprintf("-%s", filepath.Join(common.RelAppRootfsPath(appName), p)))) } if systemdVersion >= 231 { for _, p := range hiddenPaths { opts = append(opts, unit.NewUnitOption("Service", "InaccessiblePaths", fmt.Sprintf("-%s", filepath.Join(common.RelAppRootfsPath(appName), p)))) } } if systemdVersion >= 233 { opts = append(opts, unit.NewUnitOption("Service", "ProtectKernelTunables", "true")) } return opts }
// writeShutdownService writes a shutdown.service unit with the given unit options // if no previous error occured. // exec specifies how systemctl should be invoked, i.e. ExecStart, or ExecStop. func (uw *UnitWriter) writeShutdownService(exec string, opts ...*unit.UnitOption) { if uw.err != nil { return } flavor, systemdVersion, err := GetFlavor(uw.p) if err != nil { uw.err = errwrap.Wrap(errors.New("failed to create shutdown service"), err) return } opts = append(opts, []*unit.UnitOption{ // The default stdout is /dev/console (the tty created by nspawn). // But the tty might be destroyed if rkt is executed via ssh and // the user terminates the ssh session. We still want // shutdown.service to succeed in that case, so don't use // /dev/console. unit.NewUnitOption("Service", "StandardInput", "null"), unit.NewUnitOption("Service", "StandardOutput", "null"), unit.NewUnitOption("Service", "StandardError", "null"), }...) shutdownVerb := "exit" // systemd <v227 doesn't allow the "exit" verb when running as PID 1, so // use "halt". // If systemdVersion is 0 it means it couldn't be guessed, assume it's new // enough for "systemctl exit". // This can happen, for example, when building rkt with: // // ./configure --with-stage1-flavors=src --with-stage1-systemd-version=master // // The patches for the "exit" verb are backported to the "coreos" flavor, so // don't rely on the systemd version on the "coreos" flavor. if flavor != "coreos" && systemdVersion != 0 && systemdVersion < 227 { shutdownVerb = "halt" } opts = append( opts, unit.NewUnitOption("Service", exec, fmt.Sprintf("/usr/bin/systemctl --force %s", shutdownVerb)), ) uw.WriteUnit( ServiceUnitPath(uw.p.Root, "shutdown"), "failed to create shutdown service", opts..., ) }
func addCpuLimit(opts []*unit.UnitOption, limit *resource.Quantity) ([]*unit.UnitOption, error) { if limit.Value() > resource.MaxMilliValue { return nil, fmt.Errorf("cpu limit exceeds the maximum millivalue: %v", limit.String()) } quota := strconv.Itoa(int(limit.MilliValue()/10)) + "%" opts = append(opts, unit.NewUnitOption("Service", "CPUQuota", quota)) return opts, nil }
// installNewMountUnit creates and installs a new mount unit in the default // systemd location (/usr/lib/systemd/system) inside the pod stage1 filesystem. // root is pod's absolute stage1 path (from Pod.Root). // beforeAndrequiredBy creates a systemd unit dependency (can be space separated // for multi). // It returns the name of the generated unit. func installNewMountUnit(root, what, where, fsType, options, beforeAndrequiredBy, unitsDir string) (string, error) { opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Mount unit for %s", where)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "Before", beforeAndrequiredBy), unit.NewUnitOption("Mount", "What", what), unit.NewUnitOption("Mount", "Where", where), unit.NewUnitOption("Mount", "Type", fsType), unit.NewUnitOption("Mount", "Options", options), unit.NewUnitOption("Install", "RequiredBy", beforeAndrequiredBy), } unitsPath := filepath.Join(root, unitsDir) unitName := unit.UnitNamePathEscape(where + ".mount") if err := writeUnit(opts, filepath.Join(unitsPath, unitName)); err != nil { return "", err } log.Printf("mount unit created: %q in %q (what=%q, where=%q)", unitName, unitsPath, what, where) return unitName, nil }
func writeShutdownService(p *stage1commontypes.Pod) error { flavor, systemdVersion, err := GetFlavor(p) if err != nil { return err } opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", "Pod shutdown"), unit.NewUnitOption("Unit", "AllowIsolate", "true"), unit.NewUnitOption("Unit", "StopWhenUnneeded", "yes"), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Service", "RemainAfterExit", "yes"), } shutdownVerb := "exit" // systemd <v227 doesn't allow the "exit" verb when running as PID 1, so // use "halt". // If systemdVersion is 0 it means it couldn't be guessed, assume it's new // enough for "systemctl exit". // This can happen, for example, when building rkt with: // // ./configure --with-stage1-flavors=src --with-stage1-systemd-version=master // // The patches for the "exit" verb are backported to the "coreos" flavor, so // don't rely on the systemd version on the "coreos" flavor. if flavor != "coreos" && systemdVersion != 0 && systemdVersion < 227 { shutdownVerb = "halt" } opts = append(opts, unit.NewUnitOption("Service", "ExecStop", fmt.Sprintf("/usr/bin/systemctl --force %s", shutdownVerb))) unitsPath := filepath.Join(common.Stage1RootfsPath(p.Root), UnitsDir) file, err := os.OpenFile(filepath.Join(unitsPath, "shutdown.service"), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create unit file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { return errwrap.Wrap(errors.New("failed to write unit file"), err) } return nil }
func promptSystemd() { if !systemd.IsRunningSystemd() { log.Debugf("not running systemd") return } log.Debug("connecting to systemd") conn, err := sddbus.New() if err != nil { log.Errore(err, "connect to systemd") return } defer conn.Close() log.Debug("connected") props, err := conn.GetUnitProperties("acmetool-redirector.service") if err != nil { log.Errore(err, "systemd GetUnitProperties") return } if props["LoadState"].(string) != "not-found" { log.Info("acmetool-redirector.service unit already installed, skipping") return } r, err := interaction.Auto.Prompt(&interaction.Challenge{ Title: "Install Redirector as systemd Service?", Body: `Would you like acmetool to automatically install the redirector as a systemd service? The service name will be acmetool-redirector.`, ResponseType: interaction.RTYesNo, UniqueID: "acmetool-quickstart-install-redirector-systemd", }) log.Fatale(err, "interaction") if r.Cancelled { return } username, err := determineAppropriateUsername() if err != nil { log.Errore(err, "determine appropriate username") return } f, err := os.OpenFile("/etc/systemd/system/acmetool-redirector.service", os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0644) if err != nil { log.Errore(err, "acmetool-redirector.service unit file already exists?") return } defer f.Close() rdr := sdunit.Serialize([]*sdunit.UnitOption{ sdunit.NewUnitOption("Unit", "Description", "acmetool HTTP redirector"), sdunit.NewUnitOption("Service", "Type", "notify"), sdunit.NewUnitOption("Service", "ExecStart", exepath.Abs+` redirector --service.uid=`+username), sdunit.NewUnitOption("Service", "Restart", "always"), sdunit.NewUnitOption("Service", "RestartSec", "30"), sdunit.NewUnitOption("Install", "WantedBy", "multi-user.target"), }) _, err = io.Copy(f, rdr) if err != nil { log.Errore(err, "cannot write unit file") return } f.Close() err = conn.Reload() // softfail log.Warne(err, "systemctl daemon-reload failed") _, _, err = conn.EnableUnitFiles([]string{"acmetool-redirector.service"}, false, false) log.Errore(err, "failed to enable unit acmetool-redirector.service") _, err = conn.StartUnit("acmetool-redirector.service", "replace", nil) log.Errore(err, "failed to start acmetool-redirector") resultStr := "The acmetool-redirector service was successfully started." if err != nil { resultStr = "The acmetool-redirector service WAS NOT successfully started. You may have a web server listening on port 80. You will need to troubleshoot this yourself." } _, err = interaction.Auto.Prompt(&interaction.Challenge{ Title: "systemd Service Installation Complete", Body: fmt.Sprintf(`acmetool-redirector has been installed as a systemd service. %s`, resultStr), UniqueID: "acmetool-quickstart-complete", }) log.Errore(err, "interaction") }
// AppToSystemdMountUnits prepare bind mount unit for empty or host kind mounting // between stage1 rootfs and chrooted filesystem for application func AppToSystemdMountUnits(root string, appName types.ACName, volumes []types.Volume, ra *schema.RuntimeApp, unitsDir string) error { app := ra.App vols := make(map[types.ACName]types.Volume) for _, v := range volumes { vols[v.Name] = v } mounts := GenerateMounts(ra, vols) for _, m := range mounts { vol := vols[m.Volume] // source relative to stage1 rootfs to relative pod root whatPath := filepath.Join(stage1MntDir, vol.Name.String()) whatFullPath := filepath.Join(root, whatPath) if vol.Kind == "empty" { log.Printf("creating an empty volume folder for sharing: %q", whatFullPath) err := os.MkdirAll(whatFullPath, 0700) if err != nil { return err } } // destination relative to stage1 rootfs and relative to pod root wherePath := filepath.Join(common.RelAppRootfsPath(appName), m.Path) whereFullPath := filepath.Join(root, wherePath) // assertion to make sure that "what" exists (created earlier by PodToSystemdHostMountUnits) log.Printf("checking required source path: %q", whatFullPath) if _, err := os.Stat(whatFullPath); os.IsNotExist(err) { return fmt.Errorf("bug: missing source for volume %v", vol.Name) } // optionally prepare app directory log.Printf("optionally preparing destination path: %q", whereFullPath) err := os.MkdirAll(whereFullPath, 0700) if err != nil { return errwrap.Wrap(fmt.Errorf("failed to prepare dir for mount %v", m.Volume), err) } // install new mount unit for bind mount /mnt/volumeName -> /opt/stage2/{app-id}/rootfs/{{mountPoint.Path}} mu, err := installNewMountUnit( root, // where put a mount unit whatPath, // what - stage1 rootfs /mnt/VolumeName wherePath, // where - inside chroot app filesystem "bind", // fstype "bind", // options serviceUnitName(appName), unitsDir, ) if err != nil { return errwrap.Wrap(fmt.Errorf("cannot install new mount unit for app %q", appName.String()), err) } // TODO(iaguis) when we update util-linux to 2.27, this code can go // away and we can bind-mount RO with one unit file. // http://ftp.kernel.org/pub/linux/utils/util-linux/v2.27/v2.27-ReleaseNotes if IsMountReadOnly(vol, app.MountPoints) { opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Remount read-only unit for %s", wherePath)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "After", mu), unit.NewUnitOption("Unit", "Wants", mu), unit.NewUnitOption("Service", "ExecStart", fmt.Sprintf("/usr/bin/mount -o remount,ro %s", wherePath)), unit.NewUnitOption("Install", "RequiredBy", mu), } remountUnitPath := filepath.Join(root, unitsDir, unit.UnitNamePathEscape(wherePath+"-remount.service")) if err := writeUnit(opts, remountUnitPath); err != nil { return err } } } return nil }
// TODO use named flags instead of positional func main() { flag.Parse() stage1initcommon.InitDebug(debug) log, diag, _ = rktlog.NewLogSet("stage1", debug) if !debug { diag.SetOutput(ioutil.Discard) } uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { log.PrintE("UUID is missing or malformed", err) os.Exit(254) } appName, err := types.NewACName(flag.Arg(1)) if err != nil { log.PrintE("invalid app name", err) os.Exit(254) } enterEP := flag.Arg(2) root := "." p, err := stage1types.LoadPod(root, uuid) if err != nil { log.PrintE("failed to load pod", err) os.Exit(254) } insecureOptions := stage1initcommon.Stage1InsecureOptions{ DisablePaths: disablePaths, DisableCapabilities: disableCapabilities, DisableSeccomp: disableSeccomp, } ra := p.Manifest.Apps.Get(*appName) if ra == nil { log.Printf("failed to get app") os.Exit(254) } if ra.App.WorkingDirectory == "" { ra.App.WorkingDirectory = "/" } binPath, err := stage1initcommon.FindBinPath(p, ra) if err != nil { log.PrintE("failed to find bin path", err) os.Exit(254) } w := stage1initcommon.NewUnitWriter(p) w.AppUnit(ra, binPath, privateUsers, insecureOptions, unit.NewUnitOption("Unit", "Before", "halt.target"), unit.NewUnitOption("Unit", "Conflicts", "halt.target"), unit.NewUnitOption("Service", "StandardOutput", "journal+console"), unit.NewUnitOption("Service", "StandardError", "journal+console"), ) w.AppReaperUnit(ra.Name, binPath) if err := w.Error(); err != nil { log.PrintE("error generating app units", err) os.Exit(254) } args := []string{enterEP} args = append(args, fmt.Sprintf("--pid=%s", flag.Arg(3))) args = append(args, "/usr/bin/systemctl") args = append(args, "daemon-reload") cmd := exec.Cmd{ Path: args[0], Args: args, } if err := cmd.Run(); err != nil { log.PrintE("error executing daemon-reload", err) os.Exit(254) } args = []string{enterEP} args = append(args, fmt.Sprintf("--pid=%s", flag.Arg(3))) args = append(args, "/usr/bin/systemctl") args = append(args, "start") args = append(args, appName.String()) cmd = exec.Cmd{ Path: args[0], Args: args, } if err := cmd.Run(); err != nil { log.PrintE(fmt.Sprintf("error starting app %q", appName.String()), err) os.Exit(254) } // TODO unmount all the volumes os.Exit(0) }
func MutableEnv(p *stage1commontypes.Pod) error { w := NewUnitWriter(p) w.WriteUnit( TargetUnitPath(p.Root, "default"), "failed to write default.target", unit.NewUnitOption("Unit", "Description", "rkt apps target"), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "Requires", "systemd-journald.service"), unit.NewUnitOption("Unit", "After", "systemd-journald.service"), unit.NewUnitOption("Unit", "Before", "halt.target"), unit.NewUnitOption("Unit", "Conflicts", "halt.target"), ) w.WriteUnit( ServiceUnitPath(p.Root, "prepare-app@"), "failed to write prepare-app service template", unit.NewUnitOption("Unit", "Description", "Prepare minimum environment for chrooted applications"), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "OnFailureJobMode", "fail"), unit.NewUnitOption("Service", "Type", "oneshot"), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", "/prepare-app %I"), unit.NewUnitOption("Service", "User", "0"), unit.NewUnitOption("Service", "Group", "0"), unit.NewUnitOption("Service", "CapabilityBoundingSet", "CAP_SYS_ADMIN CAP_DAC_OVERRIDE"), ) w.WriteUnit( TargetUnitPath(p.Root, "halt"), "failed to write halt target", unit.NewUnitOption("Unit", "Description", "Halt"), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "AllowIsolate", "true"), unit.NewUnitOption("Unit", "Requires", "shutdown.service"), unit.NewUnitOption("Unit", "After", "shutdown.service"), ) w.writeShutdownService( "ExecStart", unit.NewUnitOption("Unit", "Description", "Pod shutdown"), unit.NewUnitOption("Unit", "AllowIsolate", "true"), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Service", "RemainAfterExit", "yes"), ) return w.Error() }
// appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error { app := ra.App appName := ra.Name image, ok := p.Images[appName.String()] if !ok { // This is impossible as we have updated the map in LoadPod(). panic(fmt.Sprintf("No images for app %q", ra.Name.String())) } imgName := image.Name if len(app.Exec) == 0 { return fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) } workDir := "/" if app.WorkingDirectory != "" { workDir = app.WorkingDirectory } env := app.Environment env.Set("AC_APP_NAME", appName.String()) if p.MetadataServiceURL != "" { env.Set("AC_METADATA_URL", p.MetadataServiceURL) } if err := writeEnvFile(p, env, appName, privateUsers); err != nil { return errwrap.Wrap(errors.New("unable to write environment file"), err) } // This is a partial implementation for app.User and app.Group: // For now, only numeric ids (and the string "root") are supported. var uid, gid int var err error if app.User == "root" { uid = 0 } else { uid, err = strconv.Atoi(app.User) if err != nil { return fmt.Errorf("non-numerical user id not supported yet") } } if app.Group == "root" { gid = 0 } else { gid, err = strconv.Atoi(app.Group) if err != nil { return fmt.Errorf("non-numerical group id not supported yet") } } execWrap := []string{"/appexec", common.RelAppRootfsPath(appName), workDir, RelEnvFilePath(appName), strconv.Itoa(uid), generateGidArg(gid, app.SupplementaryGIDs)} execStart := quoteExec(append(execWrap, app.Exec...)) opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", execStart), unit.NewUnitOption("Service", "User", "0"), unit.NewUnitOption("Service", "Group", "0"), } if interactive { opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty")) } else { opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console")) opts = append(opts, unit.NewUnitOption("Service", "SyslogIdentifier", filepath.Base(app.Exec[0]))) } // When an app fails, we shut down the pod opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target")) for _, eh := range app.EventHandlers { var typ string switch eh.Name { case "pre-start": typ = "ExecStartPre" case "post-stop": typ = "ExecStopPost" default: return fmt.Errorf("unrecognized eventHandler: %v", eh.Name) } exec := quoteExec(append(execWrap, eh.Exec...)) opts = append(opts, unit.NewUnitOption("Service", typ, exec)) } // Some pre-start jobs take a long time, set the timeout to 0 opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0")) var saPorts []types.Port for _, p := range app.Ports { if p.SocketActivated { saPorts = append(saPorts, p) } } for _, i := range app.Isolators { switch v := i.Value().(type) { case *types.ResourceMemory: opts, err = cgroup.MaybeAddIsolator(opts, "memory", v.Limit()) if err != nil { return err } case *types.ResourceCPU: opts, err = cgroup.MaybeAddIsolator(opts, "cpu", v.Limit()) if err != nil { return err } } } if len(saPorts) > 0 { sockopts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Socket", "BindIPv6Only", "both"), unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)), } for _, sap := range saPorts { var proto string switch sap.Protocol { case "tcp": proto = "ListenStream" case "udp": proto = "ListenDatagram" default: return fmt.Errorf("unrecognized protocol: %v", sap.Protocol) } sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", sap.Port))) } file, err := os.OpenFile(SocketUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create socket file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil { return errwrap.Wrap(errors.New("failed to write socket unit file"), err) } if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link socket want"), err) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName))) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName))) file, err := os.OpenFile(ServiceUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create service unit file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { return errwrap.Wrap(errors.New("failed to write service unit file"), err) } if err = os.Symlink(path.Join("..", ServiceUnitName(appName)), ServiceWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link service want"), err) } if flavor == "kvm" { // bind mount all shared volumes from /mnt/volumeName (we don't use mechanism for bind-mounting given by nspawn) err := AppToSystemdMountUnits(common.Stage1RootfsPath(p.Root), appName, p.Manifest.Volumes, ra, UnitsDir) if err != nil { return errwrap.Wrap(errors.New("failed to prepare mount units"), err) } } if err = writeAppReaper(p, appName.String()); err != nil { return errwrap.Wrap(fmt.Errorf("failed to write app %q reaper service", appName), err) } return nil }
// appendOptionsList updates an existing unit options list appending // an array of new properties, one entry at a time. // This is the preferred method to avoid hitting line length limits // in unit files. Target property must support multi-line entries. func appendOptionsList(opts []*unit.UnitOption, section string, property string, prefix string, vals []string) []*unit.UnitOption { for _, v := range vals { opts = append(opts, unit.NewUnitOption(section, property, fmt.Sprintf("%s%s", prefix, v))) } return opts }
// WritePrepareAppTemplate writes service unit files for preparing the pod's applications func WritePrepareAppTemplate(p *stage1commontypes.Pod) error { opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", "Prepare minimum environment for chrooted applications"), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "OnFailureJobMode", "fail"), unit.NewUnitOption("Unit", "Requires", "systemd-journald.service"), unit.NewUnitOption("Unit", "After", "systemd-journald.service"), unit.NewUnitOption("Service", "Type", "oneshot"), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", "/prepare-app %I"), unit.NewUnitOption("Service", "User", "0"), unit.NewUnitOption("Service", "Group", "0"), unit.NewUnitOption("Service", "CapabilityBoundingSet", "CAP_SYS_ADMIN CAP_DAC_OVERRIDE"), } unitsPath := filepath.Join(common.Stage1RootfsPath(p.Root), UnitsDir) file, err := os.OpenFile(filepath.Join(unitsPath, "[email protected]"), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create service unit file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { return errwrap.Wrap(errors.New("failed to write service unit file"), err) } return nil }
func addMemoryLimit(opts []*unit.UnitOption, limit *resource.Quantity) ([]*unit.UnitOption, error) { opts = append(opts, unit.NewUnitOption("Service", "MemoryLimit", strconv.Itoa(int(limit.Value())))) return opts, nil }
func ImmutableEnv(p *stage1commontypes.Pod, interactive bool, privateUsers string, insecureOptions Stage1InsecureOptions) error { w := NewUnitWriter(p) opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", "rkt apps target"), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), } for i := range p.Manifest.Apps { ra := &p.Manifest.Apps[i] serviceName := ServiceUnitName(ra.Name) opts = append(opts, unit.NewUnitOption("Unit", "After", serviceName)) opts = append(opts, unit.NewUnitOption("Unit", "Wants", serviceName)) } w.WriteUnit( TargetUnitPath(p.Root, "default"), "failed to write default.target", opts..., ) w.WriteUnit( ServiceUnitPath(p.Root, "prepare-app@"), "failed to write prepare-app service template", unit.NewUnitOption("Unit", "Description", "Prepare minimum environment for chrooted applications"), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "OnFailureJobMode", "fail"), unit.NewUnitOption("Unit", "Requires", "systemd-journald.service"), unit.NewUnitOption("Unit", "After", "systemd-journald.service"), unit.NewUnitOption("Service", "Type", "oneshot"), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", "/prepare-app %I"), unit.NewUnitOption("Service", "User", "0"), unit.NewUnitOption("Service", "Group", "0"), unit.NewUnitOption("Service", "CapabilityBoundingSet", "CAP_SYS_ADMIN CAP_DAC_OVERRIDE"), ) w.WriteUnit( TargetUnitPath(p.Root, "halt"), "failed to write halt target", unit.NewUnitOption("Unit", "Description", "Halt"), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "AllowIsolate", "true"), ) w.writeShutdownService( "ExecStop", unit.NewUnitOption("Unit", "Description", "Pod shutdown"), unit.NewUnitOption("Unit", "AllowIsolate", "true"), unit.NewUnitOption("Unit", "StopWhenUnneeded", "yes"), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Service", "RemainAfterExit", "yes"), ) if err := w.Error(); err != nil { return err } for i := range p.Manifest.Apps { ra := &p.Manifest.Apps[i] if ra.App.WorkingDirectory == "" { ra.App.WorkingDirectory = "/" } binPath, err := FindBinPath(p, ra) if err != nil { return err } var opts []*unit.UnitOption if interactive { opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty")) } else { opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console")) } w.AppUnit(ra, binPath, privateUsers, insecureOptions, opts...) w.AppReaperUnit(ra.Name, binPath, unit.NewUnitOption("Unit", "Wants", "shutdown.service"), unit.NewUnitOption("Unit", "After", "shutdown.service"), ) } return w.Error() }
// appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error { app := ra.App appName := ra.Name imgName := p.AppNameToImageName(appName) if len(app.Exec) == 0 { return fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) } workDir := "/" if app.WorkingDirectory != "" { workDir = app.WorkingDirectory } env := app.Environment env.Set("AC_APP_NAME", appName.String()) if p.MetadataServiceURL != "" { env.Set("AC_METADATA_URL", p.MetadataServiceURL) } envFilePath := EnvFilePath(p.Root, appName) uidRange := user.NewBlankUidRange() if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { return err } if err := writeEnvFile(p, env, appName, uidRange, '\n', envFilePath); err != nil { return errwrap.Wrap(errors.New("unable to write environment file for systemd"), err) } u, g, err := parseUserGroup(p, ra, uidRange) if err != nil { return err } if err := generateSysusers(p, ra, u, g, uidRange); err != nil { return errwrap.Wrap(errors.New("unable to generate sysusers"), err) } binPath, err := findBinPath(p, appName, *app, workDir, app.Exec[0]) if err != nil { return err } var supplementaryGroups []string for _, g := range app.SupplementaryGIDs { supplementaryGroups = append(supplementaryGroups, strconv.Itoa(g)) } capabilitiesStr, err := getAppCapabilities(app.Isolators) if err != nil { return err } noNewPrivileges := getAppNoNewPrivileges(app.Isolators) execStart := append([]string{binPath}, app.Exec[1:]...) execStartString := quoteExec(execStart) opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", execStartString), unit.NewUnitOption("Service", "RootDirectory", common.RelAppRootfsPath(appName)), // MountFlags=shared creates a new mount namespace and (as unintuitive // as it might seem) makes sure the mount is slave+shared. unit.NewUnitOption("Service", "MountFlags", "shared"), unit.NewUnitOption("Service", "WorkingDirectory", workDir), unit.NewUnitOption("Service", "EnvironmentFile", RelEnvFilePath(appName)), unit.NewUnitOption("Service", "User", strconv.Itoa(u)), unit.NewUnitOption("Service", "Group", strconv.Itoa(g)), unit.NewUnitOption("Service", "SupplementaryGroups", strings.Join(supplementaryGroups, " ")), unit.NewUnitOption("Service", "CapabilityBoundingSet", strings.Join(capabilitiesStr, " ")), unit.NewUnitOption("Service", "NoNewPrivileges", strconv.FormatBool(noNewPrivileges)), // This helps working around a race // (https://github.com/systemd/systemd/issues/2913) that causes the // systemd unit name not getting written to the journal if the unit is // short-lived and runs as non-root. unit.NewUnitOption("Service", "SyslogIdentifier", appName.String()), } // Restrict access to sensitive paths (eg. procfs) opts = protectSystemFiles(opts, appName) if ra.ReadOnlyRootFS { opts = append(opts, unit.NewUnitOption("Service", "ReadOnlyDirectories", common.RelAppRootfsPath(appName))) } // TODO(tmrts): Extract this logic into a utility function. vols := make(map[types.ACName]types.Volume) for _, v := range p.Manifest.Volumes { vols[v.Name] = v } absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs. if err != nil { return err } appRootfs := common.AppRootfsPath(absRoot, appName) rwDirs := []string{} imageManifest := p.Images[appName.String()] for _, m := range GenerateMounts(ra, vols, imageManifest) { mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path) if err != nil { return err } if !IsMountReadOnly(vols[m.Volume], app.MountPoints) { rwDirs = append(rwDirs, filepath.Join(common.RelAppRootfsPath(appName), mntPath)) } } opts = append(opts, unit.NewUnitOption("Service", "ReadWriteDirectories", strings.Join(rwDirs, " "))) if interactive { opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty")) } else { opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console")) } // When an app fails, we shut down the pod opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target")) for _, eh := range app.EventHandlers { var typ string switch eh.Name { case "pre-start": typ = "ExecStartPre" case "post-stop": typ = "ExecStopPost" default: return fmt.Errorf("unrecognized eventHandler: %v", eh.Name) } exec := quoteExec(eh.Exec) opts = append(opts, unit.NewUnitOption("Service", typ, exec)) } // Some pre-start jobs take a long time, set the timeout to 0 opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0")) var saPorts []types.Port for _, p := range app.Ports { if p.SocketActivated { saPorts = append(saPorts, p) } } for _, i := range app.Isolators { switch v := i.Value().(type) { case *types.ResourceMemory: opts, err = cgroup.MaybeAddIsolator(opts, "memory", v.Limit()) if err != nil { return err } case *types.ResourceCPU: opts, err = cgroup.MaybeAddIsolator(opts, "cpu", v.Limit()) if err != nil { return err } } } if len(saPorts) > 0 { sockopts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Socket", "BindIPv6Only", "both"), unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)), } for _, sap := range saPorts { var proto string switch sap.Protocol { case "tcp": proto = "ListenStream" case "udp": proto = "ListenDatagram" default: return fmt.Errorf("unrecognized protocol: %v", sap.Protocol) } // We find the host port for the pod's port and use that in the // socket unit file. // This is so because systemd inside the pod will match based on // the socket port number, and since the socket was created on the // host, it will have the host port number. port := findHostPort(*p.Manifest, sap.Name) if port == 0 { log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port) port = sap.Port } sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port))) } file, err := os.OpenFile(SocketUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create socket file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil { return errwrap.Wrap(errors.New("failed to write socket unit file"), err) } if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link socket want"), err) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName))) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "Requires", "sysusers.service")) opts = append(opts, unit.NewUnitOption("Unit", "After", "sysusers.service")) file, err := os.OpenFile(ServiceUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create service unit file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { return errwrap.Wrap(errors.New("failed to write service unit file"), err) } if err = os.Symlink(path.Join("..", ServiceUnitName(appName)), ServiceWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link service want"), err) } if err = writeAppReaper(p, appName.String(), common.RelAppRootfsPath(appName), binPath); err != nil { return errwrap.Wrap(fmt.Errorf("failed to write app %q reaper service", appName), err) } return nil }
func main() { flag.Parse() stage1initcommon.InitDebug(debug) log, diag, _ = rktlog.NewLogSet("app-add", debug) if !debug { diag.SetOutput(ioutil.Discard) } uuid, err := types.NewUUID(flagUUID) if err != nil { log.FatalE("UUID is missing or malformed", err) } appName, err := types.NewACName(flagApp) if err != nil { log.FatalE("invalid app name", err) } root := "." p, err := stage1types.LoadPod(root, uuid) if err != nil { log.FatalE("failed to load pod", err) } flavor, _, err := stage1initcommon.GetFlavor(p) if err != nil { log.FatalE("failed to get stage1 flavor", err) } insecureOptions := stage1initcommon.Stage1InsecureOptions{ DisablePaths: disablePaths, DisableCapabilities: disableCapabilities, DisableSeccomp: disableSeccomp, } ra := p.Manifest.Apps.Get(*appName) if ra == nil { log.Fatalf("failed to find app %q", *appName) } binPath, err := stage1initcommon.FindBinPath(p, ra) if err != nil { log.FatalE("failed to find bin path", err) } if ra.App.WorkingDirectory == "" { ra.App.WorkingDirectory = "/" } enterCmd := stage1common.PrepareEnterCmd(false) stage1initcommon.AppAddMounts(p, ra, enterCmd) // when using host cgroups, make the subgroup writable by pod systemd if flavor != "kvm" { err = prepareAppCgroups(p, ra, enterCmd) if err != nil { log.FatalE("error preparing cgroups", err) } } // write service files w := stage1initcommon.NewUnitWriter(p) w.AppUnit(ra, binPath, privateUsers, insecureOptions, unit.NewUnitOption("Unit", "Before", "halt.target"), unit.NewUnitOption("Unit", "Conflicts", "halt.target"), unit.NewUnitOption("Service", "StandardOutput", "journal+console"), unit.NewUnitOption("Service", "StandardError", "journal+console"), ) w.AppReaperUnit(ra.Name, binPath) if err := w.Error(); err != nil { log.FatalE("error generating app units", err) } // stage2 environment is ready at this point, but systemd does not know // about the new application yet args := enterCmd args = append(args, "/usr/bin/systemctl") args = append(args, "daemon-reload") cmd := exec.Cmd{ Path: args[0], Args: args, } if out, err := cmd.CombinedOutput(); err != nil { log.Fatalf("%q failed at daemon-reload:\n%s", appName, out) } os.Exit(0) }
func (uw *UnitWriter) AppUnit( ra *schema.RuntimeApp, binPath, privateUsers string, insecureOptions Stage1InsecureOptions, opts ...*unit.UnitOption, ) { if uw.err != nil { return } flavor, systemdVersion, err := GetFlavor(uw.p) if err != nil { uw.err = errwrap.Wrap(errors.New("unable to determine stage1 flavor"), err) return } app := ra.App appName := ra.Name imgName := uw.p.AppNameToImageName(appName) if len(app.Exec) == 0 { uw.err = fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) return } env := app.Environment env.Set("AC_APP_NAME", appName.String()) if uw.p.MetadataServiceURL != "" { env.Set("AC_METADATA_URL", uw.p.MetadataServiceURL) } envFilePath := EnvFilePath(uw.p.Root, appName) uidRange := user.NewBlankUidRange() if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { uw.err = err return } if err := common.WriteEnvFile(env, uidRange, envFilePath); err != nil { uw.err = errwrap.Wrap(errors.New("unable to write environment file for systemd"), err) return } u, g, err := parseUserGroup(uw.p, ra, uidRange) if err != nil { uw.err = err return } if err := generateSysusers(uw.p, ra, u, g, uidRange); err != nil { uw.err = errwrap.Wrap(errors.New("unable to generate sysusers"), err) return } var supplementaryGroups []string for _, g := range app.SupplementaryGIDs { supplementaryGroups = append(supplementaryGroups, strconv.Itoa(g)) } capabilitiesStr, err := getAppCapabilities(app.Isolators) if err != nil { uw.err = err return } execStart := append([]string{binPath}, app.Exec[1:]...) execStartString := quoteExec(execStart) opts = append(opts, []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", execStartString), unit.NewUnitOption("Service", "RootDirectory", common.RelAppRootfsPath(appName)), // MountFlags=shared creates a new mount namespace and (as unintuitive // as it might seem) makes sure the mount is slave+shared. unit.NewUnitOption("Service", "MountFlags", "shared"), unit.NewUnitOption("Service", "WorkingDirectory", app.WorkingDirectory), unit.NewUnitOption("Service", "EnvironmentFile", RelEnvFilePath(appName)), unit.NewUnitOption("Service", "User", strconv.Itoa(u)), unit.NewUnitOption("Service", "Group", strconv.Itoa(g)), // This helps working around a race // (https://github.com/systemd/systemd/issues/2913) that causes the // systemd unit name not getting written to the journal if the unit is // short-lived and runs as non-root. unit.NewUnitOption("Service", "SyslogIdentifier", appName.String()), }...) if len(supplementaryGroups) > 0 { opts = appendOptionsList(opts, "Service", "SupplementaryGroups", "", supplementaryGroups) } if supportsNotify(uw.p, appName.String()) { opts = append(opts, unit.NewUnitOption("Service", "Type", "notify")) } if !insecureOptions.DisableCapabilities { opts = append(opts, unit.NewUnitOption("Service", "CapabilityBoundingSet", strings.Join(capabilitiesStr, " "))) } noNewPrivileges := getAppNoNewPrivileges(app.Isolators) // Apply seccomp isolator, if any and not opt-ing out; // see https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter= if !insecureOptions.DisableSeccomp { var forceNoNewPrivileges bool unprivileged := (u != 0) opts, forceNoNewPrivileges, err = getSeccompFilter(opts, uw.p, unprivileged, app.Isolators) if err != nil { uw.err = err return } // Seccomp filters require NoNewPrivileges for unprivileged apps, that may override // manifest annotation. if forceNoNewPrivileges { noNewPrivileges = true } } opts = append(opts, unit.NewUnitOption("Service", "NoNewPrivileges", strconv.FormatBool(noNewPrivileges))) if ra.ReadOnlyRootFS { opts = append(opts, unit.NewUnitOption("Service", "ReadOnlyDirectories", common.RelAppRootfsPath(appName))) } // TODO(tmrts): Extract this logic into a utility function. vols := make(map[types.ACName]types.Volume) for _, v := range uw.p.Manifest.Volumes { vols[v.Name] = v } absRoot, err := filepath.Abs(uw.p.Root) // Absolute path to the pod's rootfs. if err != nil { uw.err = err return } appRootfs := common.AppRootfsPath(absRoot, appName) rwDirs := []string{} imageManifest := uw.p.Images[appName.String()] mounts := GenerateMounts(ra, vols, imageManifest) for _, m := range mounts { mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path) if err != nil { uw.err = err return } if !IsMountReadOnly(vols[m.Volume], app.MountPoints) { rwDirs = append(rwDirs, filepath.Join(common.RelAppRootfsPath(appName), mntPath)) } } if len(rwDirs) > 0 { opts = appendOptionsList(opts, "Service", "ReadWriteDirectories", "", rwDirs) } // Restrict access to sensitive paths (eg. procfs and sysfs entries). if !insecureOptions.DisablePaths { opts = protectKernelTunables(opts, appName, systemdVersion) } // Generate default device policy for the app, as well as the list of allowed devices. // For kvm flavor, devices are VM-specific and restricting them is not strictly needed. if !insecureOptions.DisablePaths && flavor != "kvm" { opts = append(opts, unit.NewUnitOption("Service", "DevicePolicy", "closed")) deviceAllows, err := generateDeviceAllows(common.Stage1RootfsPath(absRoot), appName, app.MountPoints, mounts, vols, uidRange) if err != nil { uw.err = err return } for _, dev := range deviceAllows { opts = append(opts, unit.NewUnitOption("Service", "DeviceAllow", dev)) } } // When an app fails, we shut down the pod opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target")) for _, eh := range app.EventHandlers { var typ string switch eh.Name { case "pre-start": typ = "ExecStartPre" case "post-stop": typ = "ExecStopPost" default: uw.err = fmt.Errorf("unrecognized eventHandler: %v", eh.Name) return } exec := quoteExec(eh.Exec) opts = append(opts, unit.NewUnitOption("Service", typ, exec)) } // Some pre-start jobs take a long time, set the timeout to 0 opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0")) var saPorts []types.Port for _, p := range app.Ports { if p.SocketActivated { saPorts = append(saPorts, p) } } doWithIsolator := func(isolator string, f func() error) bool { ok, err := cgroup.IsIsolatorSupported(isolator) if err != nil { uw.err = err return true } if !ok { fmt.Fprintf(os.Stderr, "warning: resource/%s isolator set but support disabled in the kernel, skipping\n", isolator) } if err := f(); err != nil { uw.err = err return true } return false } exit := false for _, i := range app.Isolators { if exit { return } switch v := i.Value().(type) { case *types.ResourceMemory: exit = doWithIsolator("memory", func() error { if v.Limit() == nil { return nil } opts = append(opts, unit.NewUnitOption("Service", "MemoryLimit", strconv.Itoa(int(v.Limit().Value())))) return nil }) case *types.ResourceCPU: exit = doWithIsolator("cpu", func() error { if v.Limit() == nil { return nil } if v.Limit().Value() > resource.MaxMilliValue { return fmt.Errorf("cpu limit exceeds the maximum millivalue: %v", v.Limit().String()) } quota := strconv.Itoa(int(v.Limit().MilliValue()/10)) + "%" opts = append(opts, unit.NewUnitOption("Service", "CPUQuota", quota)) return nil }) } } if len(saPorts) > 0 { sockopts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Socket", "BindIPv6Only", "both"), unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)), } for _, sap := range saPorts { var proto string switch sap.Protocol { case "tcp": proto = "ListenStream" case "udp": proto = "ListenDatagram" default: uw.err = fmt.Errorf("unrecognized protocol: %v", sap.Protocol) return } // We find the host port for the pod's port and use that in the // socket unit file. // This is so because systemd inside the pod will match based on // the socket port number, and since the socket was created on the // host, it will have the host port number. port := findHostPort(*uw.p.Manifest, sap.Name) if port == 0 { log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port) port = sap.Port } sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port))) } file, err := os.OpenFile(SocketUnitPath(uw.p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { uw.err = errwrap.Wrap(errors.New("failed to create socket file"), err) return } defer file.Close() if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil { uw.err = errwrap.Wrap(errors.New("failed to write socket unit file"), err) return } if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(uw.p.Root, appName)); err != nil { uw.err = errwrap.Wrap(errors.New("failed to link socket want"), err) return } opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName))) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "Requires", "sysusers.service")) opts = append(opts, unit.NewUnitOption("Unit", "After", "sysusers.service")) uw.WriteUnit(ServiceUnitPath(uw.p.Root, appName), "failed to create service unit file", opts...) uw.Activate(ServiceUnitName(appName), ServiceWantPath(uw.p.Root, appName)) }
// getSeccompFilter gets an appc seccomp set and an optional error mode, // returning those values in a format suitable for systemd consumption. func getSeccompFilter(opts []*unit.UnitOption, p *stage1commontypes.Pod, unprivileged bool, isolators types.Isolators) ([]*unit.UnitOption, bool, error) { var filterMode string flag := "" seccompIsolators := 0 seccompErrno := "" noNewPrivs := false var err error var seccompSet []string for _, i := range isolators { if seccomp, ok := i.Value().(types.LinuxSeccompSet); ok { seccompIsolators++ // By appc spec, only one seccomp isolator per app is allowed if seccompIsolators > 1 { return nil, false, ErrTooManySeccompIsolators } switch i.Name { case types.LinuxSeccompRemoveSetName: filterMode = sdBlacklistMode seccompSet, flag, err = parseLinuxSeccompSet(p, seccomp) if err != nil { return nil, false, err } if flag == "empty" { // Opt-in to rkt default whitelist seccompSet = nil break } case types.LinuxSeccompRetainSetName: filterMode = sdWhitelistMode seccompSet, flag, err = parseLinuxSeccompSet(p, seccomp) if err != nil { return nil, false, err } if flag == "all" { // Opt-out seccomp filtering return opts, false, nil } } seccompErrno = string(seccomp.Errno()) } } // If unset, use rkt default whitelist if len(seccompSet) == 0 { filterMode = sdWhitelistMode seccompSet = RktDefaultSeccompWhitelist } // Append computed options if seccompErrno != "" { opts = append(opts, unit.NewUnitOption("Service", "SystemCallErrorNumber", seccompErrno)) } // SystemCallFilter options are written down one entry per line, because // filtering sets may be quite large and overlong lines break unit serialization. opts = appendOptionsList(opts, "Service", "SystemCallFilter", filterMode, seccompSet...) // In order to install seccomp filters, unprivileged process must first set no-news-privs. if unprivileged { noNewPrivs = true } return opts, noNewPrivs, nil }
// appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error { app := ra.App appName := ra.Name imgName := p.AppNameToImageName(appName) if len(app.Exec) == 0 { return fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) } workDir := "/" if app.WorkingDirectory != "" { workDir = app.WorkingDirectory } env := app.Environment env.Set("AC_APP_NAME", appName.String()) if p.MetadataServiceURL != "" { env.Set("AC_METADATA_URL", p.MetadataServiceURL) } if err := writeEnvFile(p, env, appName, privateUsers); err != nil { return errwrap.Wrap(errors.New("unable to write environment file"), err) } var _uid, gid int var err error uidRange := uid.NewBlankUidRange() if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { return errwrap.Wrap(errors.New("unable to deserialize uid range"), err) } if strings.HasPrefix(app.User, "/") { var stat syscall.Stat_t if err = syscall.Lstat(filepath.Join(common.AppRootfsPath(p.Root, appName), app.User), &stat); err != nil { return errwrap.Wrap(fmt.Errorf("unable to get uid from file %q", app.User), err) } uidReal, _, err := uidRange.UnshiftRange(stat.Uid, 0) if err != nil { return errwrap.Wrap(errors.New("unable to determine real uid"), err) } _uid = int(uidReal) } else { _uid, err = strconv.Atoi(app.User) if err != nil { _uid, err = passwd.LookupUidFromFile(app.User, filepath.Join(common.AppRootfsPath(p.Root, appName), "etc/passwd")) if err != nil { return errwrap.Wrap(fmt.Errorf("cannot lookup user %q", app.User), err) } } } if strings.HasPrefix(app.Group, "/") { var stat syscall.Stat_t if err = syscall.Lstat(filepath.Join(common.AppRootfsPath(p.Root, appName), app.Group), &stat); err != nil { return errwrap.Wrap(fmt.Errorf("unable to get gid from file %q", app.Group), err) } _, gidReal, err := uidRange.UnshiftRange(0, stat.Gid) if err != nil { return errwrap.Wrap(errors.New("unable to determine real gid"), err) } gid = int(gidReal) } else { gid, err = strconv.Atoi(app.Group) if err != nil { gid, err = group.LookupGidFromFile(app.Group, filepath.Join(common.AppRootfsPath(p.Root, appName), "etc/group")) if err != nil { return errwrap.Wrap(fmt.Errorf("cannot lookup group %q", app.Group), err) } } } execWrap := []string{"/appexec", common.RelAppRootfsPath(appName), workDir, RelEnvFilePath(appName), strconv.Itoa(_uid), generateGidArg(gid, app.SupplementaryGIDs), "--"} execStart := quoteExec(append(execWrap, app.Exec...)) opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", execStart), unit.NewUnitOption("Service", "User", "0"), unit.NewUnitOption("Service", "Group", "0"), } if interactive { opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty")) } else { opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console")) opts = append(opts, unit.NewUnitOption("Service", "SyslogIdentifier", filepath.Base(app.Exec[0]))) } // When an app fails, we shut down the pod opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target")) for _, eh := range app.EventHandlers { var typ string switch eh.Name { case "pre-start": typ = "ExecStartPre" case "post-stop": typ = "ExecStopPost" default: return fmt.Errorf("unrecognized eventHandler: %v", eh.Name) } exec := quoteExec(append(execWrap, eh.Exec...)) opts = append(opts, unit.NewUnitOption("Service", typ, exec)) } // Some pre-start jobs take a long time, set the timeout to 0 opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0")) var saPorts []types.Port for _, p := range app.Ports { if p.SocketActivated { saPorts = append(saPorts, p) } } for _, i := range app.Isolators { switch v := i.Value().(type) { case *types.ResourceMemory: opts, err = cgroup.MaybeAddIsolator(opts, "memory", v.Limit()) if err != nil { return err } case *types.ResourceCPU: opts, err = cgroup.MaybeAddIsolator(opts, "cpu", v.Limit()) if err != nil { return err } } } if len(saPorts) > 0 { sockopts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Socket", "BindIPv6Only", "both"), unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)), } for _, sap := range saPorts { var proto string switch sap.Protocol { case "tcp": proto = "ListenStream" case "udp": proto = "ListenDatagram" default: return fmt.Errorf("unrecognized protocol: %v", sap.Protocol) } // We find the host port for the pod's port and use that in the // socket unit file. // This is so because systemd inside the pod will match based on // the socket port number, and since the socket was created on the // host, it will have the host port number. port := findHostPort(*p.Manifest, sap.Name) if port == 0 { log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port) port = sap.Port } sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port))) } file, err := os.OpenFile(SocketUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create socket file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil { return errwrap.Wrap(errors.New("failed to write socket unit file"), err) } if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link socket want"), err) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName))) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName))) file, err := os.OpenFile(ServiceUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create service unit file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { return errwrap.Wrap(errors.New("failed to write service unit file"), err) } if err = os.Symlink(path.Join("..", ServiceUnitName(appName)), ServiceWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link service want"), err) } if flavor == "kvm" { // bind mount all shared volumes from /mnt/volumeName (we don't use mechanism for bind-mounting given by nspawn) err := AppToSystemdMountUnits(common.Stage1RootfsPath(p.Root), appName, p.Manifest.Volumes, ra, UnitsDir) if err != nil { return errwrap.Wrap(errors.New("failed to prepare mount units"), err) } } if err = writeAppReaper(p, appName.String()); err != nil { return errwrap.Wrap(fmt.Errorf("failed to write app %q reaper service", appName), err) } return nil }