// appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error { app := ra.App appName := ra.Name imgName := p.AppNameToImageName(appName) if len(app.Exec) == 0 { return fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) } workDir := "/" if app.WorkingDirectory != "" { workDir = app.WorkingDirectory } env := app.Environment env.Set("AC_APP_NAME", appName.String()) if p.MetadataServiceURL != "" { env.Set("AC_METADATA_URL", p.MetadataServiceURL) } if err := writeEnvFile(p, env, appName, privateUsers); err != nil { return errwrap.Wrap(errors.New("unable to write environment file"), err) } var _uid, gid int var err error uidRange := uid.NewBlankUidRange() if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { return errwrap.Wrap(errors.New("unable to deserialize uid range"), err) } if strings.HasPrefix(app.User, "/") { var stat syscall.Stat_t if err = syscall.Lstat(filepath.Join(common.AppRootfsPath(p.Root, appName), app.User), &stat); err != nil { return errwrap.Wrap(fmt.Errorf("unable to get uid from file %q", app.User), err) } uidReal, _, err := uidRange.UnshiftRange(stat.Uid, 0) if err != nil { return errwrap.Wrap(errors.New("unable to determine real uid"), err) } _uid = int(uidReal) } else { _uid, err = strconv.Atoi(app.User) if err != nil { _uid, err = passwd.LookupUidFromFile(app.User, filepath.Join(common.AppRootfsPath(p.Root, appName), "etc/passwd")) if err != nil { return errwrap.Wrap(fmt.Errorf("cannot lookup user %q", app.User), err) } } } if strings.HasPrefix(app.Group, "/") { var stat syscall.Stat_t if err = syscall.Lstat(filepath.Join(common.AppRootfsPath(p.Root, appName), app.Group), &stat); err != nil { return errwrap.Wrap(fmt.Errorf("unable to get gid from file %q", app.Group), err) } _, gidReal, err := uidRange.UnshiftRange(0, stat.Gid) if err != nil { return errwrap.Wrap(errors.New("unable to determine real gid"), err) } gid = int(gidReal) } else { gid, err = strconv.Atoi(app.Group) if err != nil { gid, err = group.LookupGidFromFile(app.Group, filepath.Join(common.AppRootfsPath(p.Root, appName), "etc/group")) if err != nil { return errwrap.Wrap(fmt.Errorf("cannot lookup group %q", app.Group), err) } } } execWrap := []string{"/appexec", common.RelAppRootfsPath(appName), workDir, RelEnvFilePath(appName), strconv.Itoa(_uid), generateGidArg(gid, app.SupplementaryGIDs), "--"} execStart := quoteExec(append(execWrap, app.Exec...)) opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", execStart), unit.NewUnitOption("Service", "User", "0"), unit.NewUnitOption("Service", "Group", "0"), } if interactive { opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty")) } else { opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console")) opts = append(opts, unit.NewUnitOption("Service", "SyslogIdentifier", filepath.Base(app.Exec[0]))) } // When an app fails, we shut down the pod opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target")) for _, eh := range app.EventHandlers { var typ string switch eh.Name { case "pre-start": typ = "ExecStartPre" case "post-stop": typ = "ExecStopPost" default: return fmt.Errorf("unrecognized eventHandler: %v", eh.Name) } exec := quoteExec(append(execWrap, eh.Exec...)) opts = append(opts, unit.NewUnitOption("Service", typ, exec)) } // Some pre-start jobs take a long time, set the timeout to 0 opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0")) var saPorts []types.Port for _, p := range app.Ports { if p.SocketActivated { saPorts = append(saPorts, p) } } for _, i := range app.Isolators { switch v := i.Value().(type) { case *types.ResourceMemory: opts, err = cgroup.MaybeAddIsolator(opts, "memory", v.Limit()) if err != nil { return err } case *types.ResourceCPU: opts, err = cgroup.MaybeAddIsolator(opts, "cpu", v.Limit()) if err != nil { return err } } } if len(saPorts) > 0 { sockopts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Socket", "BindIPv6Only", "both"), unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)), } for _, sap := range saPorts { var proto string switch sap.Protocol { case "tcp": proto = "ListenStream" case "udp": proto = "ListenDatagram" default: return fmt.Errorf("unrecognized protocol: %v", sap.Protocol) } // We find the host port for the pod's port and use that in the // socket unit file. // This is so because systemd inside the pod will match based on // the socket port number, and since the socket was created on the // host, it will have the host port number. port := findHostPort(*p.Manifest, sap.Name) if port == 0 { log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port) port = sap.Port } sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port))) } file, err := os.OpenFile(SocketUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create socket file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil { return errwrap.Wrap(errors.New("failed to write socket unit file"), err) } if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link socket want"), err) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName))) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName))) file, err := os.OpenFile(ServiceUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create service unit file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { return errwrap.Wrap(errors.New("failed to write service unit file"), err) } if err = os.Symlink(path.Join("..", ServiceUnitName(appName)), ServiceWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link service want"), err) } if flavor == "kvm" { // bind mount all shared volumes from /mnt/volumeName (we don't use mechanism for bind-mounting given by nspawn) err := AppToSystemdMountUnits(common.Stage1RootfsPath(p.Root), appName, p.Manifest.Volumes, ra, UnitsDir) if err != nil { return errwrap.Wrap(errors.New("failed to prepare mount units"), err) } } if err = writeAppReaper(p, appName.String()); err != nil { return errwrap.Wrap(fmt.Errorf("failed to write app %q reaper service", appName), err) } return nil }
// getArgsEnv returns the nspawn or lkvm args and env according to the flavor // as the first two return values respectively. func getArgsEnv(p *stage1commontypes.Pod, flavor string, canMachinedRegister bool, debug bool, n *networking.Networking) ([]string, []string, error) { var args []string env := os.Environ() // We store the pod's flavor so we can later garbage collect it correctly if err := os.Symlink(flavor, filepath.Join(p.Root, stage1initcommon.FlavorFile)); err != nil { return nil, nil, errwrap.Wrap(errors.New("failed to create flavor symlink"), err) } // set hostname inside pod // According to systemd manual (https://www.freedesktop.org/software/systemd/man/hostname.html) : // "The /etc/hostname file configures the name of the local system that is set // during boot using the sethostname system call" if p.Hostname == "" { p.Hostname = stage1initcommon.GetMachineID(p) } hostnamePath := filepath.Join(common.Stage1RootfsPath(p.Root), "etc/hostname") if err := ioutil.WriteFile(hostnamePath, []byte(p.Hostname), 0644); err != nil { return nil, nil, fmt.Errorf("error writing %s, %s", hostnamePath, err) } // systemd-nspawn needs /etc/machine-id to link the container's journal // to the host. Since systemd-v230, /etc/machine-id is mandatory, see // https://github.com/systemd/systemd/commit/e01ff70a77e781734e1e73a2238af2e9bf7967a8 mPath := filepath.Join(common.Stage1RootfsPath(p.Root), "etc", "machine-id") machineID := strings.Replace(p.UUID.String(), "-", "", -1) switch flavor { case "kvm": if p.PrivateUsers != "" { return nil, nil, fmt.Errorf("flag --private-users cannot be used with an lkvm stage1") } // kernel and hypervisor binaries are located relative to the working directory // of init (/var/lib/rkt/..../uuid) // TODO: move to path.go kernelPath := filepath.Join(common.Stage1RootfsPath(p.Root), "bzImage") netDescriptions := kvm.GetNetworkDescriptions(n) cpu, mem := kvm.GetAppsResources(p.Manifest.Apps) // Parse hypervisor hv, err := KvmCheckHypervisor(common.Stage1RootfsPath(p.Root)) if err != nil { return nil, nil, err } // Set start command for hypervisor StartCmd := hvlkvm.StartCmd switch hv { case "lkvm": StartCmd = hvlkvm.StartCmd case "qemu": StartCmd = hvqemu.StartCmd default: return nil, nil, fmt.Errorf("unrecognized hypervisor") } hvStartCmd := StartCmd( common.Stage1RootfsPath(p.Root), p.UUID.String(), kernelPath, netDescriptions, cpu, mem, debug, ) if hvStartCmd == nil { return nil, nil, fmt.Errorf("no hypervisor") } args = append(args, hvStartCmd...) // lkvm requires $HOME to be defined, // see https://github.com/coreos/rkt/issues/1393 if os.Getenv("HOME") == "" { env = append(env, "HOME=/root") } if err := linkJournal(common.Stage1RootfsPath(p.Root), machineID); err != nil { return nil, nil, errwrap.Wrap(errors.New("error linking pod's journal"), err) } // use only dynamic libraries provided in the image // from systemd v231 there's a new internal libsystemd-shared-v231.so // which is present in /usr/lib/systemd env = append(env, "LD_LIBRARY_PATH="+filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib/systemd")) return args, env, nil case "coreos": args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), interpBin)) args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), nspawnBin)) args = append(args, "--boot") // Launch systemd in the pod args = append(args, "--notify-ready=yes") // From systemd v231 if context := os.Getenv(common.EnvSELinuxContext); context != "" { args = append(args, fmt.Sprintf("-Z%s", context)) } if context := os.Getenv(common.EnvSELinuxMountContext); context != "" { args = append(args, fmt.Sprintf("-L%s", context)) } if canMachinedRegister { args = append(args, fmt.Sprintf("--register=true")) } else { args = append(args, fmt.Sprintf("--register=false")) } // use only dynamic libraries provided in the image // from systemd v231 there's a new internal libsystemd-shared-v231.so // which is present in /usr/lib/systemd env = append(env, "LD_LIBRARY_PATH="+ filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib")+":"+ filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib/systemd")) case "src": args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), interpBin)) args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), nspawnBin)) args = append(args, "--boot") // Launch systemd in the pod args = append(args, "--notify-ready=yes") // From systemd v231 if context := os.Getenv(common.EnvSELinuxContext); context != "" { args = append(args, fmt.Sprintf("-Z%s", context)) } if context := os.Getenv(common.EnvSELinuxMountContext); context != "" { args = append(args, fmt.Sprintf("-L%s", context)) } if canMachinedRegister { args = append(args, fmt.Sprintf("--register=true")) } else { args = append(args, fmt.Sprintf("--register=false")) } // use only dynamic libraries provided in the image // from systemd v231 there's a new internal libsystemd-shared-v231.so // which is present in /usr/lib/systemd env = append(env, "LD_LIBRARY_PATH="+ filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib")+":"+ filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib/systemd")) case "host": hostNspawnBin, err := common.LookupPath("systemd-nspawn", os.Getenv("PATH")) if err != nil { return nil, nil, err } // Check dynamically which version is installed on the host // Support version >= 220 versionBytes, err := exec.Command(hostNspawnBin, "--version").CombinedOutput() if err != nil { return nil, nil, errwrap.Wrap(fmt.Errorf("unable to probe %s version", hostNspawnBin), err) } versionStr := strings.SplitN(string(versionBytes), "\n", 2)[0] var version int n, err := fmt.Sscanf(versionStr, "systemd %d", &version) if err != nil { return nil, nil, fmt.Errorf("cannot parse version: %q", versionStr) } if n != 1 || version < 220 { return nil, nil, fmt.Errorf("rkt needs systemd-nspawn >= 220. %s version not supported: %v", hostNspawnBin, versionStr) } // Copy systemd, bash, etc. in stage1 at run-time if err := installAssets(); err != nil { return nil, nil, errwrap.Wrap(errors.New("cannot install assets from the host"), err) } args = append(args, hostNspawnBin) args = append(args, "--boot") // Launch systemd in the pod args = append(args, fmt.Sprintf("--register=true")) if version >= 231 { args = append(args, "--notify-ready=yes") // From systemd v231 } if context := os.Getenv(common.EnvSELinuxContext); context != "" { args = append(args, fmt.Sprintf("-Z%s", context)) } if context := os.Getenv(common.EnvSELinuxMountContext); context != "" { args = append(args, fmt.Sprintf("-L%s", context)) } default: return nil, nil, fmt.Errorf("unrecognized stage1 flavor: %q", flavor) } machineIDBytes := append([]byte(machineID), '\n') if err := ioutil.WriteFile(mPath, machineIDBytes, 0644); err != nil { log.FatalE("error writing /etc/machine-id", err) } // link journal only if the host is running systemd if util.IsRunningSystemd() { args = append(args, "--link-journal=try-guest") keepUnit, err := util.RunningFromSystemService() if err != nil { if err == dlopen.ErrSoNotFound { log.Print("warning: libsystemd not found even though systemd is running. Cgroup limits set by the environment (e.g. a systemd service) won't be enforced.") } else { return nil, nil, errwrap.Wrap(errors.New("error determining if we're running from a system service"), err) } } if keepUnit { args = append(args, "--keep-unit") } } else { args = append(args, "--link-journal=no") } if !debug { args = append(args, "--quiet") // silence most nspawn output (log_warning is currently not covered by this) env = append(env, "SYSTEMD_LOG_LEVEL=err") // silence log_warning too } env = append(env, "SYSTEMD_NSPAWN_CONTAINER_SERVICE=rkt") // TODO (alepuccetti) remove this line when rkt will use cgroup namespace // If the kernel has the cgroup namespace enabled, systemd v232 will use it by default. // This was introduced by https://github.com/systemd/systemd/pull/3809 and it will cause // problems in rkt when cgns is enabled and cgroup-v1 is used. For more information see // https://github.com/systemd/systemd/pull/3589#discussion_r70277625. // The following line tells systemd-nspawn not to use cgroup namespace using the environment variable // introduced by https://github.com/systemd/systemd/pull/3809. env = append(env, "SYSTEMD_NSPAWN_USE_CGNS=no") if p.InsecureOptions.DisablePaths { env = append(env, "SYSTEMD_NSPAWN_API_VFS_WRITABLE=yes") } if len(p.PrivateUsers) > 0 { args = append(args, "--private-users="+p.PrivateUsers) } nsargs, err := stage1initcommon.PodToNspawnArgs(p) if err != nil { return nil, nil, errwrap.Wrap(errors.New("failed to generate nspawn args"), err) } args = append(args, nsargs...) // Arguments to systemd args = append(args, "--") args = append(args, "--default-standard-output=tty") // redirect all service logs straight to tty if !debug { args = append(args, "--log-target=null") // silence systemd output inside pod args = append(args, "--show-status=0") // silence systemd initialization status output } return args, env, nil }
// appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error { app := ra.App appName := ra.Name imgName := p.AppNameToImageName(appName) if len(app.Exec) == 0 { return fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) } workDir := "/" if app.WorkingDirectory != "" { workDir = app.WorkingDirectory } env := app.Environment env.Set("AC_APP_NAME", appName.String()) if p.MetadataServiceURL != "" { env.Set("AC_METADATA_URL", p.MetadataServiceURL) } envFilePath := EnvFilePath(p.Root, appName) uidRange := user.NewBlankUidRange() if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { return err } if err := writeEnvFile(p, env, appName, uidRange, '\n', envFilePath); err != nil { return errwrap.Wrap(errors.New("unable to write environment file for systemd"), err) } u, g, err := parseUserGroup(p, ra, uidRange) if err != nil { return err } if err := generateSysusers(p, ra, u, g, uidRange); err != nil { return errwrap.Wrap(errors.New("unable to generate sysusers"), err) } binPath, err := findBinPath(p, appName, *app, workDir, app.Exec[0]) if err != nil { return err } var supplementaryGroups []string for _, g := range app.SupplementaryGIDs { supplementaryGroups = append(supplementaryGroups, strconv.Itoa(g)) } capabilitiesStr, err := getAppCapabilities(app.Isolators) if err != nil { return err } noNewPrivileges := getAppNoNewPrivileges(app.Isolators) execStart := append([]string{binPath}, app.Exec[1:]...) execStartString := quoteExec(execStart) opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", execStartString), unit.NewUnitOption("Service", "RootDirectory", common.RelAppRootfsPath(appName)), // MountFlags=shared creates a new mount namespace and (as unintuitive // as it might seem) makes sure the mount is slave+shared. unit.NewUnitOption("Service", "MountFlags", "shared"), unit.NewUnitOption("Service", "WorkingDirectory", workDir), unit.NewUnitOption("Service", "EnvironmentFile", RelEnvFilePath(appName)), unit.NewUnitOption("Service", "User", strconv.Itoa(u)), unit.NewUnitOption("Service", "Group", strconv.Itoa(g)), unit.NewUnitOption("Service", "SupplementaryGroups", strings.Join(supplementaryGroups, " ")), unit.NewUnitOption("Service", "CapabilityBoundingSet", strings.Join(capabilitiesStr, " ")), unit.NewUnitOption("Service", "NoNewPrivileges", strconv.FormatBool(noNewPrivileges)), // This helps working around a race // (https://github.com/systemd/systemd/issues/2913) that causes the // systemd unit name not getting written to the journal if the unit is // short-lived and runs as non-root. unit.NewUnitOption("Service", "SyslogIdentifier", appName.String()), } // Restrict access to sensitive paths (eg. procfs) opts = protectSystemFiles(opts, appName) if ra.ReadOnlyRootFS { opts = append(opts, unit.NewUnitOption("Service", "ReadOnlyDirectories", common.RelAppRootfsPath(appName))) } // TODO(tmrts): Extract this logic into a utility function. vols := make(map[types.ACName]types.Volume) for _, v := range p.Manifest.Volumes { vols[v.Name] = v } absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs. if err != nil { return err } appRootfs := common.AppRootfsPath(absRoot, appName) rwDirs := []string{} imageManifest := p.Images[appName.String()] for _, m := range GenerateMounts(ra, vols, imageManifest) { mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path) if err != nil { return err } if !IsMountReadOnly(vols[m.Volume], app.MountPoints) { rwDirs = append(rwDirs, filepath.Join(common.RelAppRootfsPath(appName), mntPath)) } } opts = append(opts, unit.NewUnitOption("Service", "ReadWriteDirectories", strings.Join(rwDirs, " "))) if interactive { opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty")) } else { opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console")) } // When an app fails, we shut down the pod opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target")) for _, eh := range app.EventHandlers { var typ string switch eh.Name { case "pre-start": typ = "ExecStartPre" case "post-stop": typ = "ExecStopPost" default: return fmt.Errorf("unrecognized eventHandler: %v", eh.Name) } exec := quoteExec(eh.Exec) opts = append(opts, unit.NewUnitOption("Service", typ, exec)) } // Some pre-start jobs take a long time, set the timeout to 0 opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0")) var saPorts []types.Port for _, p := range app.Ports { if p.SocketActivated { saPorts = append(saPorts, p) } } for _, i := range app.Isolators { switch v := i.Value().(type) { case *types.ResourceMemory: opts, err = cgroup.MaybeAddIsolator(opts, "memory", v.Limit()) if err != nil { return err } case *types.ResourceCPU: opts, err = cgroup.MaybeAddIsolator(opts, "cpu", v.Limit()) if err != nil { return err } } } if len(saPorts) > 0 { sockopts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Socket", "BindIPv6Only", "both"), unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)), } for _, sap := range saPorts { var proto string switch sap.Protocol { case "tcp": proto = "ListenStream" case "udp": proto = "ListenDatagram" default: return fmt.Errorf("unrecognized protocol: %v", sap.Protocol) } // We find the host port for the pod's port and use that in the // socket unit file. // This is so because systemd inside the pod will match based on // the socket port number, and since the socket was created on the // host, it will have the host port number. port := findHostPort(*p.Manifest, sap.Name) if port == 0 { log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port) port = sap.Port } sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port))) } file, err := os.OpenFile(SocketUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create socket file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil { return errwrap.Wrap(errors.New("failed to write socket unit file"), err) } if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link socket want"), err) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName))) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "Requires", "sysusers.service")) opts = append(opts, unit.NewUnitOption("Unit", "After", "sysusers.service")) file, err := os.OpenFile(ServiceUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create service unit file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { return errwrap.Wrap(errors.New("failed to write service unit file"), err) } if err = os.Symlink(path.Join("..", ServiceUnitName(appName)), ServiceWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link service want"), err) } if err = writeAppReaper(p, appName.String(), common.RelAppRootfsPath(appName), binPath); err != nil { return errwrap.Wrap(fmt.Errorf("failed to write app %q reaper service", appName), err) } return nil }
// appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error { app := ra.App appName := ra.Name imgName := p.AppNameToImageName(appName) if len(app.Exec) == 0 { return fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) } workDir := "/" if app.WorkingDirectory != "" { workDir = app.WorkingDirectory } env := app.Environment env.Set("AC_APP_NAME", appName.String()) if p.MetadataServiceURL != "" { env.Set("AC_METADATA_URL", p.MetadataServiceURL) } if err := writeEnvFile(p, env, appName, privateUsers); err != nil { return errwrap.Wrap(errors.New("unable to write environment file"), err) } // This is a partial implementation for app.User and app.Group: // For now, only numeric ids (and the string "root") are supported. var uid, gid int var err error if app.User == "root" { uid = 0 } else { uid, err = strconv.Atoi(app.User) if err != nil { return fmt.Errorf("non-numerical user id not supported yet") } } if app.Group == "root" { gid = 0 } else { gid, err = strconv.Atoi(app.Group) if err != nil { return fmt.Errorf("non-numerical group id not supported yet") } } execWrap := []string{"/appexec", common.RelAppRootfsPath(appName), workDir, RelEnvFilePath(appName), strconv.Itoa(uid), generateGidArg(gid, app.SupplementaryGIDs)} execStart := quoteExec(append(execWrap, app.Exec...)) opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)), unit.NewUnitOption("Service", "Restart", "no"), unit.NewUnitOption("Service", "ExecStart", execStart), unit.NewUnitOption("Service", "User", "0"), unit.NewUnitOption("Service", "Group", "0"), } if interactive { opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty")) } else { opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console")) opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console")) opts = append(opts, unit.NewUnitOption("Service", "SyslogIdentifier", filepath.Base(app.Exec[0]))) } // When an app fails, we shut down the pod opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target")) for _, eh := range app.EventHandlers { var typ string switch eh.Name { case "pre-start": typ = "ExecStartPre" case "post-stop": typ = "ExecStopPost" default: return fmt.Errorf("unrecognized eventHandler: %v", eh.Name) } exec := quoteExec(append(execWrap, eh.Exec...)) opts = append(opts, unit.NewUnitOption("Service", typ, exec)) } // Some pre-start jobs take a long time, set the timeout to 0 opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0")) var saPorts []types.Port for _, p := range app.Ports { if p.SocketActivated { saPorts = append(saPorts, p) } } for _, i := range app.Isolators { switch v := i.Value().(type) { case *types.ResourceMemory: opts, err = cgroup.MaybeAddIsolator(opts, "memory", v.Limit()) if err != nil { return err } case *types.ResourceCPU: opts, err = cgroup.MaybeAddIsolator(opts, "cpu", v.Limit()) if err != nil { return err } } } if len(saPorts) > 0 { sockopts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), unit.NewUnitOption("Socket", "BindIPv6Only", "both"), unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)), } for _, sap := range saPorts { var proto string switch sap.Protocol { case "tcp": proto = "ListenStream" case "udp": proto = "ListenDatagram" default: return fmt.Errorf("unrecognized protocol: %v", sap.Protocol) } sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", sap.Port))) } file, err := os.OpenFile(SocketUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create socket file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil { return errwrap.Wrap(errors.New("failed to write socket unit file"), err) } if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link socket want"), err) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName))) } opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName))) opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName))) file, err := os.OpenFile(ServiceUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return errwrap.Wrap(errors.New("failed to create service unit file"), err) } defer file.Close() if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { return errwrap.Wrap(errors.New("failed to write service unit file"), err) } if err = os.Symlink(path.Join("..", ServiceUnitName(appName)), ServiceWantPath(p.Root, appName)); err != nil { return errwrap.Wrap(errors.New("failed to link service want"), err) } if flavor == "kvm" { // bind mount all shared volumes from /mnt/volumeName (we don't use mechanism for bind-mounting given by nspawn) err := AppToSystemdMountUnits(common.Stage1RootfsPath(p.Root), appName, p.Manifest.Volumes, ra, UnitsDir) if err != nil { return errwrap.Wrap(errors.New("failed to prepare mount units"), err) } } if err = writeAppReaper(p, appName.String()); err != nil { return errwrap.Wrap(fmt.Errorf("failed to write app %q reaper service", appName), err) } return nil }