Example #1
0
// InstantiatedPrepareAppUnitName returns the systemd service unit name for prepare-app
// instantiated for the given root
func InstantiatedPrepareAppUnitName(imageID types.Hash) string {
	// Naming respecting escaping rules, see systemd.unit(5) and systemd-escape(1)
	escaped_root := common.RelAppRootfsPath(imageID)
	escaped_root = strings.Replace(escaped_root, "-", "\\x2d", -1)
	escaped_root = strings.Replace(escaped_root, "/", "-", -1)
	return "prepare-app@" + escaped_root + ".service"
}
Example #2
0
File: units.go Project: nhlfr/rkt
// AppReaperUnit writes an app reaper service unit for the given app in the given path using the given unit options.
func (uw *UnitWriter) AppReaperUnit(appName types.ACName, binPath string, opts ...*unit.UnitOption) {
	if uw.err != nil {
		return
	}

	opts = append(opts, []*unit.UnitOption{
		unit.NewUnitOption("Unit", "Description", fmt.Sprintf("%s Reaper", appName)),
		unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
		unit.NewUnitOption("Unit", "StopWhenUnneeded", "yes"),
		unit.NewUnitOption("Unit", "Before", "halt.target"),
		unit.NewUnitOption("Unit", "Conflicts", "exit.target"),
		unit.NewUnitOption("Unit", "Conflicts", "halt.target"),
		unit.NewUnitOption("Unit", "Conflicts", "poweroff.target"),
		unit.NewUnitOption("Service", "RemainAfterExit", "yes"),
		unit.NewUnitOption("Service", "ExecStop", fmt.Sprintf(
			"/reaper.sh \"%s\" \"%s\" \"%s\"",
			appName,
			common.RelAppRootfsPath(appName),
			binPath,
		)),
	}...)

	uw.WriteUnit(
		ServiceUnitPath(uw.p.Root, types.ACName(fmt.Sprintf("reaper-%s", appName))),
		fmt.Sprintf("failed to write app %q reaper service", appName),
		opts...,
	)
}
Example #3
0
File: mount.go Project: NeilW/rkt
// AppToSystemdMountUnits prepare bind mount unit for empty or host kind mounting
// between stage1 rootfs and chrooted filesystem for application
func AppToSystemdMountUnits(root string, appName types.ACName, mountPoints []types.MountPoint, unitsDir string) error {

	for _, mountPoint := range mountPoints {

		name := mountPoint.Name.String()
		// source relative to stage1 rootfs to relative pod root
		whatPath := filepath.Join(stage1MntDir, name)
		whatFullPath := filepath.Join(root, whatPath)

		// destination relative to stage1 rootfs and relative to pod root
		wherePath := filepath.Join(common.RelAppRootfsPath(appName), mountPoint.Path)
		whereFullPath := filepath.Join(root, wherePath)

		// readOnly
		mountOptions := "bind"
		if mountPoint.ReadOnly {
			mountOptions += ",ro"
		}

		// assertion to make sure that "what" exists (created earlier by podToSystemdHostMountUnits)
		log.Printf("checking required source path: %q", whatFullPath)
		if _, err := os.Stat(whatFullPath); os.IsNotExist(err) {
			return fmt.Errorf("app requires a volume that is not defined in Pod (try adding --volume=%s,kind=empty)!", name)
		}

		// optionally prepare app directory
		log.Printf("optionally preparing destination path: %q", whereFullPath)
		err := os.MkdirAll(whereFullPath, 0700)
		if err != nil {
			return fmt.Errorf("failed to prepare dir for mountPoint %v: %v", mountPoint.Name, err)
		}

		// install new mount unit for bind mount /mnt/volumeName -> /opt/stage2/{app-id}/rootfs/{{mountPoint.Path}}
		err = installNewMountUnit(
			root,      // where put a mount unit
			whatPath,  // what - stage1 rootfs /mnt/VolumeName
			wherePath, // where - inside chroot app filesystem
			"bind",    // fstype
			mountOptions,
			serviceUnitName(appName),
			unitsDir,
		)
		if err != nil {
			return fmt.Errorf("cannot install new mount unit for app %q: %v", appName.String(), err)
		}

	}
	return nil
}
Example #4
0
File: pod.go Project: nhlfr/rkt
// generateDeviceAllows generates a DeviceAllow= line for an app.
// To make it work, the path needs to start with "/dev" but the device won't
// exist inside the container. So for a given mount, if the volume is a device
// node, we create a symlink to its target in "/rkt/volumes". Later,
// prepare-app will copy those to "/dev/.rkt/" so that's what we use in the
// DeviceAllow= line.
func generateDeviceAllows(root string, appName types.ACName, mountPoints []types.MountPoint, mounts []mountWrapper, vols map[types.ACName]types.Volume, uidRange *user.UidRange) ([]string, error) {
	var devAllow []string

	rktVolumeLinksPath := filepath.Join(root, "rkt", "volumes")
	if err := os.MkdirAll(rktVolumeLinksPath, 0600); err != nil {
		return nil, err
	}
	if err := user.ShiftFiles([]string{rktVolumeLinksPath}, uidRange); err != nil {
		return nil, err
	}

	for _, m := range mounts {
		v := vols[m.Volume]
		if v.Kind != "host" {
			continue
		}
		if fileutil.IsDeviceNode(v.Source) {
			mode := "r"
			if !IsMountReadOnly(v, mountPoints) {
				mode += "w"
			}

			tgt := filepath.Join(common.RelAppRootfsPath(appName), m.Path)
			// the DeviceAllow= line needs the link path in /dev/.rkt/
			linkRel := filepath.Join("/dev/.rkt", v.Name.String())
			// the real link should be in /rkt/volumes for now
			link := filepath.Join(rktVolumeLinksPath, v.Name.String())

			err := os.Symlink(tgt, link)
			// if the link already exists, we don't need to do anything
			if err != nil && !os.IsExist(err) {
				return nil, err
			}

			devAllow = append(devAllow, linkRel+" "+mode)
		}
	}

	return devAllow, nil
}
Example #5
0
File: pod.go Project: runyontr/rkt
// appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units
func (p *Pod) appToSystemd(ra *schema.RuntimeApp, interactive bool) error {
	name := ra.Name.String()
	id := ra.Image.ID
	app := ra.App

	workDir := "/"
	if app.WorkingDirectory != "" {
		workDir = app.WorkingDirectory
	}

	env := app.Environment
	env.Set("AC_APP_NAME", name)
	env.Set("AC_METADATA_URL", p.MetadataServiceURL)

	if err := p.writeEnvFile(env, id); err != nil {
		return fmt.Errorf("unable to write environment file: %v", err)
	}

	// This is a partial implementation for app.User and app.Group:
	// For now, only numeric ids (and the string "root") are supported.
	var uid, gid int
	var err error
	if app.User == "root" {
		uid = 0
	} else {
		uid, err = strconv.Atoi(app.User)
		if err != nil {
			return fmt.Errorf("non-numerical user id not supported yet")
		}
	}
	if app.Group == "root" {
		gid = 0
	} else {
		gid, err = strconv.Atoi(app.Group)
		if err != nil {
			return fmt.Errorf("non-numerical group id not supported yet")
		}
	}

	execWrap := []string{"/diagexec", common.RelAppRootfsPath(id), workDir, RelEnvFilePath(id), strconv.Itoa(uid), strconv.Itoa(gid)}
	execStart := quoteExec(append(execWrap, app.Exec...))
	opts := []*unit.UnitOption{
		unit.NewUnitOption("Unit", "Description", name),
		unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
		unit.NewUnitOption("Unit", "OnFailure", "reaper.service"),
		unit.NewUnitOption("Unit", "Wants", "exit-watcher.service"),
		unit.NewUnitOption("Service", "Restart", "no"),
		unit.NewUnitOption("Service", "ExecStart", execStart),
		unit.NewUnitOption("Service", "User", "0"),
		unit.NewUnitOption("Service", "Group", "0"),
	}

	_, systemdStage1Version, err := p.getFlavor()
	if err != nil {
		return fmt.Errorf("Failed to get stage1 flavor: %v\n", err)
	}

	if interactive {
		opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty"))
	} else if systemdSupportsJournalLinking(systemdStage1Version) {
		opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console"))
		opts = append(opts, unit.NewUnitOption("Service", "SyslogIdentifier", filepath.Base(app.Exec[0])))
	}

	for _, eh := range app.EventHandlers {
		var typ string
		switch eh.Name {
		case "pre-start":
			typ = "ExecStartPre"
		case "post-stop":
			typ = "ExecStopPost"
		default:
			return fmt.Errorf("unrecognized eventHandler: %v", eh.Name)
		}
		exec := quoteExec(append(execWrap, eh.Exec...))
		opts = append(opts, unit.NewUnitOption("Service", typ, exec))
	}

	saPorts := []types.Port{}
	for _, p := range app.Ports {
		if p.SocketActivated {
			saPorts = append(saPorts, p)
		}
	}

	for _, i := range app.Isolators {
		switch v := i.Value().(type) {
		case *types.ResourceMemory:
			limit := v.Limit().String()
			opts, err = cgroup.MaybeAddIsolator(opts, "memory", limit)
			if err != nil {
				return err
			}
		case *types.ResourceCPU:
			limit := v.Limit().String()
			opts, err = cgroup.MaybeAddIsolator(opts, "cpu", limit)
			if err != nil {
				return err
			}
		}
	}

	if len(saPorts) > 0 {
		sockopts := []*unit.UnitOption{
			unit.NewUnitOption("Unit", "Description", name+" socket-activated ports"),
			unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
			unit.NewUnitOption("Socket", "BindIPv6Only", "both"),
			unit.NewUnitOption("Socket", "Service", ServiceUnitName(id)),
		}

		for _, sap := range saPorts {
			var proto string
			switch sap.Protocol {
			case "tcp":
				proto = "ListenStream"
			case "udp":
				proto = "ListenDatagram"
			default:
				return fmt.Errorf("unrecognized protocol: %v", sap.Protocol)
			}
			sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", sap.Port)))
		}

		file, err := os.OpenFile(SocketUnitPath(p.Root, id), os.O_WRONLY|os.O_CREATE, 0644)
		if err != nil {
			return fmt.Errorf("failed to create socket file: %v", err)
		}
		defer file.Close()

		if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil {
			return fmt.Errorf("failed to write socket unit file: %v", err)
		}

		if err = os.Symlink(path.Join("..", SocketUnitName(id)), SocketWantPath(p.Root, id)); err != nil {
			return fmt.Errorf("failed to link socket want: %v", err)
		}

		opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(id)))
	}

	opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(id)))
	opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(id)))

	file, err := os.OpenFile(ServiceUnitPath(p.Root, id), os.O_WRONLY|os.O_CREATE, 0644)
	if err != nil {
		return fmt.Errorf("failed to create service unit file: %v", err)
	}
	defer file.Close()

	if _, err = io.Copy(file, unit.Serialize(opts)); err != nil {
		return fmt.Errorf("failed to write service unit file: %v", err)
	}

	if err = os.Symlink(path.Join("..", ServiceUnitName(id)), ServiceWantPath(p.Root, id)); err != nil {
		return fmt.Errorf("failed to link service want: %v", err)
	}

	return nil
}
Example #6
0
// protectKernelTunables restricts access to some security-sensitive paths under
// /proc and /sys. Entries are either hidden or just made read-only to app.
// This protection is enabled by default.
func protectKernelTunables(opts []*unit.UnitOption, appName types.ACName, systemdVersion int) []*unit.UnitOption {
	roPaths := []string{
		"/proc/bus/",
		"/proc/sys/kernel/core_pattern",
		"/proc/sys/kernel/modprobe",
		"/proc/sys/vm/panic_on_oom",
		"/proc/sysrq-trigger",
		"/sys/block/",
		"/sys/bus/",
		"/sys/class/",
		"/sys/dev/",
		"/sys/devices/",
		"/sys/kernel/",
	}
	hiddenDirs := []string{
		"/sys/firmware/",
		"/sys/fs/",
		"/sys/hypervisor/",
		"/sys/module/",
		"/sys/power/",
	}
	hiddenPaths := []string{
		"/proc/config.gz",
		"/proc/kallsyms",
		"/proc/sched_debug",
		"/proc/kcore",
		"/proc/kmem",
		"/proc/mem",
	}

	// Paths prefixed with "-" are ignored if they do not exist:
	// https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ReadWriteDirectories=
	for _, p := range roPaths {
		opts = append(opts, unit.NewUnitOption("Service", "ReadOnlyDirectories", fmt.Sprintf("-%s", filepath.Join(common.RelAppRootfsPath(appName), p))))
	}
	for _, p := range hiddenDirs {
		opts = append(opts, unit.NewUnitOption("Service", "InaccessibleDirectories", fmt.Sprintf("-%s", filepath.Join(common.RelAppRootfsPath(appName), p))))
	}
	if systemdVersion >= 231 {
		for _, p := range hiddenPaths {
			opts = append(opts, unit.NewUnitOption("Service", "InaccessiblePaths", fmt.Sprintf("-%s", filepath.Join(common.RelAppRootfsPath(appName), p))))
		}
	}
	if systemdVersion >= 233 {
		opts = append(opts, unit.NewUnitOption("Service", "ProtectKernelTunables", "true"))
	}

	return opts
}
Example #7
0
// appToNspawnArgs transforms the given app manifest, with the given associated
// app name, into a subset of applicable systemd-nspawn argument
func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp) ([]string, error) {
	var args []string
	appName := ra.Name
	app := ra.App

	sharedVolPath, err := common.CreateSharedVolumesPath(p.Root)
	if err != nil {
		return nil, err
	}

	vols := make(map[types.ACName]types.Volume)
	for _, v := range p.Manifest.Volumes {
		vols[v.Name] = v
	}

	imageManifest := p.Images[appName.String()]
	mounts, err := GenerateMounts(ra, p.Manifest.Volumes, ConvertedFromDocker(imageManifest))
	if err != nil {
		return nil, errwrap.Wrap(fmt.Errorf("could not generate app %q mounts", appName), err)
	}
	for _, m := range mounts {
		shPath := filepath.Join(sharedVolPath, m.Volume.Name.String())

		absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs.
		if err != nil {
			return nil, errwrap.Wrap(errors.New("could not get pod's root absolute path"), err)
		}

		appRootfs := common.AppRootfsPath(absRoot, appName)

		// Evaluate symlinks within the app's rootfs. This is needed because symlinks
		// within the container can be absolute, which will, of course, be wrong in our ns.
		// Systemd also gets this wrong, see https://github.com/systemd/systemd/issues/2860
		// When the above issue is fixed, we can pass the un-evaluated path to --bind instead.
		mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Mount.Path)
		if err != nil {
			return nil, errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Mount.Path), err)
		}
		mntAbsPath := filepath.Join(appRootfs, mntPath)

		if err := PrepareMountpoints(shPath, mntAbsPath, &m.Volume, m.DockerImplicit); err != nil {
			return nil, err
		}

		opt := make([]string, 6)

		if m.ReadOnly {
			opt[0] = "--bind-ro="
		} else {
			opt[0] = "--bind="
		}

		opt[1] = m.Source(absRoot)
		opt[2] = ":"
		opt[3] = filepath.Join(common.RelAppRootfsPath(appName), mntPath)
		opt[4] = ":"

		// If Recursive is not set, default to recursive.
		recursive := true
		if m.Volume.Recursive != nil {
			recursive = *m.Volume.Recursive
		}

		// rbind/norbind options exist since systemd-nspawn v226
		if recursive {
			opt[5] = "rbind"
		} else {
			opt[5] = "norbind"
		}
		args = append(args, strings.Join(opt, ""))
	}

	if !p.InsecureOptions.DisableCapabilities {
		capabilitiesStr, err := getAppCapabilities(app.Isolators)
		if err != nil {
			return nil, err
		}
		capList := strings.Join(capabilitiesStr, ",")
		args = append(args, "--capability="+capList)
	}

	return args, nil
}
Example #8
0
// appToNspawnArgs transforms the given app manifest, with the given associated
// app name, into a subset of applicable systemd-nspawn argument
func (p *Pod) appToNspawnArgs(ra *schema.RuntimeApp) ([]string, error) {
	var args []string
	appName := ra.Name
	id := ra.Image.ID
	app := ra.App

	vols := make(map[types.ACName]types.Volume)

	// TODO(philips): this is implicitly creating a mapping from MountPoint
	// to volumes. This is a nice convenience for users but we will need to
	// introduce a --mount flag so they can control which mountPoint maps to
	// which volume.

	sharedVolPath := common.SharedVolumesPath(p.Root)
	if err := os.MkdirAll(sharedVolPath, sharedVolPerm); err != nil {
		return nil, fmt.Errorf("could not create shared volumes directory: %v", err)
	}
	if err := os.Chmod(sharedVolPath, sharedVolPerm); err != nil {
		return nil, fmt.Errorf("could not change permissions of %q: %v", sharedVolPath, err)
	}
	for _, v := range p.Manifest.Volumes {
		vols[v.Name] = v
		if v.Kind == "empty" {
			if err := os.MkdirAll(filepath.Join(sharedVolPath, v.Name.String()), sharedVolPerm); err != nil {
				return nil, fmt.Errorf("could not create shared volume %q: %v", v.Name, err)
			}
		}
	}

	for _, mp := range app.MountPoints {
		key := mp.Name
		vol, ok := vols[key]
		if !ok {
			catCmd := fmt.Sprintf("sudo rkt image cat-manifest --pretty-print %v", id)
			volumeCmd := ""
			for _, mp := range app.MountPoints {
				volumeCmd += fmt.Sprintf("--volume %s,kind=host,source=/some/path ", mp.Name)
			}

			return nil, fmt.Errorf("no volume for mountpoint %q in app %q.\n"+
				"You can inspect the volumes with:\n\t%v\n"+
				"App %q requires the following volumes:\n\t%v",
				key, appName, catCmd, appName, volumeCmd)
		}
		opt := make([]string, 4)

		// If the readonly flag in the pod manifest is not nil,
		// then use it to override the readonly flag in the image manifest.
		readOnly := mp.ReadOnly
		if vol.ReadOnly != nil {
			readOnly = *vol.ReadOnly
		}

		if readOnly {
			opt[0] = "--bind-ro="
		} else {
			opt[0] = "--bind="
		}

		switch vol.Kind {
		case "host":
			opt[1] = vol.Source
		case "empty":
			absRoot, err := filepath.Abs(p.Root)
			if err != nil {
				return nil, fmt.Errorf("cannot get pod's root absolute path: %v\n", err)
			}
			opt[1] = filepath.Join(common.SharedVolumesPath(absRoot), vol.Name.String())
		default:
			return nil, fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty".`, vol.Kind)
		}
		opt[2] = ":"
		opt[3] = filepath.Join(common.RelAppRootfsPath(appName), mp.Path)

		args = append(args, strings.Join(opt, ""))
	}

	for _, i := range app.Isolators {
		switch v := i.Value().(type) {
		case types.LinuxCapabilitiesSet:
			var caps []string
			// TODO: cleanup the API on LinuxCapabilitiesSet to give strings easily.
			for _, c := range v.Set() {
				caps = append(caps, string(c))
			}
			if i.Name == types.LinuxCapabilitiesRetainSetName {
				capList := strings.Join(caps, ",")
				args = append(args, "--capability="+capList)
			}
		}
	}

	return args, nil
}
Example #9
0
File: pod.go Project: matomesc/rkt
// appToNspawnArgs transforms the given app manifest, with the given associated
// app name, into a subset of applicable systemd-nspawn argument
func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp) ([]string, error) {
	var args []string
	appName := ra.Name
	app := ra.App

	sharedVolPath := common.SharedVolumesPath(p.Root)
	if err := os.MkdirAll(sharedVolPath, sharedVolPerm); err != nil {
		return nil, fmt.Errorf("could not create shared volumes directory: %v", err)
	}
	if err := os.Chmod(sharedVolPath, sharedVolPerm); err != nil {
		return nil, fmt.Errorf("could not change permissions of %q: %v", sharedVolPath, err)
	}

	vols := make(map[types.ACName]types.Volume)
	for _, v := range p.Manifest.Volumes {
		vols[v.Name] = v
	}

	mounts := GenerateMounts(ra, vols)
	for _, m := range mounts {
		vol := vols[m.Volume]

		if vol.Kind == "empty" {
			p := filepath.Join(sharedVolPath, vol.Name.String())
			if err := os.MkdirAll(p, sharedVolPerm); err != nil {
				return nil, fmt.Errorf("could not create shared volume %q: %v", vol.Name, err)
			}
			if err := os.Chown(p, *vol.UID, *vol.GID); err != nil {
				return nil, fmt.Errorf("could not change owner of %q: %v", p, err)
			}
			mod, err := strconv.ParseUint(*vol.Mode, 8, 32)
			if err != nil {
				return nil, fmt.Errorf("invalid mode %q for volume %q: %v", *vol.Mode, vol.Name, err)
			}
			if err := os.Chmod(p, os.FileMode(mod)); err != nil {
				return nil, fmt.Errorf("could not change permissions of %q: %v", p, err)
			}
		}

		opt := make([]string, 4)

		if IsMountReadOnly(vol, app.MountPoints) {
			opt[0] = "--bind-ro="
		} else {
			opt[0] = "--bind="
		}

		switch vol.Kind {
		case "host":
			opt[1] = vol.Source
		case "empty":
			absRoot, err := filepath.Abs(p.Root)
			if err != nil {
				return nil, fmt.Errorf("cannot get pod's root absolute path: %v\n", err)
			}
			opt[1] = filepath.Join(common.SharedVolumesPath(absRoot), vol.Name.String())
		default:
			return nil, fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, vol.Kind)
		}
		opt[2] = ":"
		opt[3] = filepath.Join(common.RelAppRootfsPath(appName), m.Path)

		args = append(args, strings.Join(opt, ""))
	}

	for _, i := range app.Isolators {
		switch v := i.Value().(type) {
		case types.LinuxCapabilitiesSet:
			var caps []string
			// TODO: cleanup the API on LinuxCapabilitiesSet to give strings easily.
			for _, c := range v.Set() {
				caps = append(caps, string(c))
			}
			if i.Name == types.LinuxCapabilitiesRetainSetName {
				capList := strings.Join(caps, ",")
				args = append(args, "--capability="+capList)
			}
		}
	}

	return args, nil
}
Example #10
0
// appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units
func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error {
	app := ra.App
	appName := ra.Name
	imgName := p.AppNameToImageName(appName)

	if len(app.Exec) == 0 {
		return fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName)
	}

	workDir := "/"
	if app.WorkingDirectory != "" {
		workDir = app.WorkingDirectory
	}

	env := app.Environment

	env.Set("AC_APP_NAME", appName.String())
	if p.MetadataServiceURL != "" {
		env.Set("AC_METADATA_URL", p.MetadataServiceURL)
	}

	if err := writeEnvFile(p, env, appName, privateUsers); err != nil {
		return errwrap.Wrap(errors.New("unable to write environment file"), err)
	}

	var _uid, gid int
	var err error

	uidRange := uid.NewBlankUidRange()
	if err := uidRange.Deserialize([]byte(privateUsers)); err != nil {
		return errwrap.Wrap(errors.New("unable to deserialize uid range"), err)
	}

	if strings.HasPrefix(app.User, "/") {
		var stat syscall.Stat_t
		if err = syscall.Lstat(filepath.Join(common.AppRootfsPath(p.Root, appName),
			app.User), &stat); err != nil {
			return errwrap.Wrap(fmt.Errorf("unable to get uid from file %q",
				app.User), err)
		}
		uidReal, _, err := uidRange.UnshiftRange(stat.Uid, 0)
		if err != nil {
			return errwrap.Wrap(errors.New("unable to determine real uid"), err)
		}
		_uid = int(uidReal)
	} else {
		_uid, err = strconv.Atoi(app.User)
		if err != nil {
			_uid, err = passwd.LookupUidFromFile(app.User,
				filepath.Join(common.AppRootfsPath(p.Root, appName), "etc/passwd"))
			if err != nil {
				return errwrap.Wrap(fmt.Errorf("cannot lookup user %q", app.User), err)
			}
		}
	}

	if strings.HasPrefix(app.Group, "/") {
		var stat syscall.Stat_t
		if err = syscall.Lstat(filepath.Join(common.AppRootfsPath(p.Root, appName),
			app.Group), &stat); err != nil {
			return errwrap.Wrap(fmt.Errorf("unable to get gid from file %q",
				app.Group), err)
		}
		_, gidReal, err := uidRange.UnshiftRange(0, stat.Gid)
		if err != nil {
			return errwrap.Wrap(errors.New("unable to determine real gid"), err)
		}
		gid = int(gidReal)
	} else {
		gid, err = strconv.Atoi(app.Group)
		if err != nil {
			gid, err = group.LookupGidFromFile(app.Group,
				filepath.Join(common.AppRootfsPath(p.Root, appName), "etc/group"))
			if err != nil {
				return errwrap.Wrap(fmt.Errorf("cannot lookup group %q", app.Group), err)
			}
		}
	}

	execWrap := []string{"/appexec", common.RelAppRootfsPath(appName), workDir, RelEnvFilePath(appName),
		strconv.Itoa(_uid), generateGidArg(gid, app.SupplementaryGIDs), "--"}
	execStart := quoteExec(append(execWrap, app.Exec...))
	opts := []*unit.UnitOption{
		unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)),
		unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
		unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)),
		unit.NewUnitOption("Service", "Restart", "no"),
		unit.NewUnitOption("Service", "ExecStart", execStart),
		unit.NewUnitOption("Service", "User", "0"),
		unit.NewUnitOption("Service", "Group", "0"),
	}

	if interactive {
		opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty"))
	} else {
		opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console"))
		opts = append(opts, unit.NewUnitOption("Service", "SyslogIdentifier", filepath.Base(app.Exec[0])))
	}

	// When an app fails, we shut down the pod
	opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target"))

	for _, eh := range app.EventHandlers {
		var typ string
		switch eh.Name {
		case "pre-start":
			typ = "ExecStartPre"
		case "post-stop":
			typ = "ExecStopPost"
		default:
			return fmt.Errorf("unrecognized eventHandler: %v", eh.Name)
		}
		exec := quoteExec(append(execWrap, eh.Exec...))
		opts = append(opts, unit.NewUnitOption("Service", typ, exec))
	}

	// Some pre-start jobs take a long time, set the timeout to 0
	opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0"))

	var saPorts []types.Port
	for _, p := range app.Ports {
		if p.SocketActivated {
			saPorts = append(saPorts, p)
		}
	}

	for _, i := range app.Isolators {
		switch v := i.Value().(type) {
		case *types.ResourceMemory:
			opts, err = cgroup.MaybeAddIsolator(opts, "memory", v.Limit())
			if err != nil {
				return err
			}
		case *types.ResourceCPU:
			opts, err = cgroup.MaybeAddIsolator(opts, "cpu", v.Limit())
			if err != nil {
				return err
			}
		}
	}

	if len(saPorts) > 0 {
		sockopts := []*unit.UnitOption{
			unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")),
			unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
			unit.NewUnitOption("Socket", "BindIPv6Only", "both"),
			unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)),
		}

		for _, sap := range saPorts {
			var proto string
			switch sap.Protocol {
			case "tcp":
				proto = "ListenStream"
			case "udp":
				proto = "ListenDatagram"
			default:
				return fmt.Errorf("unrecognized protocol: %v", sap.Protocol)
			}
			// We find the host port for the pod's port and use that in the
			// socket unit file.
			// This is so because systemd inside the pod will match based on
			// the socket port number, and since the socket was created on the
			// host, it will have the host port number.
			port := findHostPort(*p.Manifest, sap.Name)
			if port == 0 {
				log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port)
				port = sap.Port
			}
			sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port)))
		}

		file, err := os.OpenFile(SocketUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644)
		if err != nil {
			return errwrap.Wrap(errors.New("failed to create socket file"), err)
		}
		defer file.Close()

		if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil {
			return errwrap.Wrap(errors.New("failed to write socket unit file"), err)
		}

		if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(p.Root, appName)); err != nil {
			return errwrap.Wrap(errors.New("failed to link socket want"), err)
		}

		opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName)))
	}

	opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName)))
	opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName)))

	file, err := os.OpenFile(ServiceUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644)
	if err != nil {
		return errwrap.Wrap(errors.New("failed to create service unit file"), err)
	}
	defer file.Close()

	if _, err = io.Copy(file, unit.Serialize(opts)); err != nil {
		return errwrap.Wrap(errors.New("failed to write service unit file"), err)
	}

	if err = os.Symlink(path.Join("..", ServiceUnitName(appName)), ServiceWantPath(p.Root, appName)); err != nil {
		return errwrap.Wrap(errors.New("failed to link service want"), err)
	}

	if flavor == "kvm" {
		// bind mount all shared volumes from /mnt/volumeName (we don't use mechanism for bind-mounting given by nspawn)
		err := AppToSystemdMountUnits(common.Stage1RootfsPath(p.Root), appName, p.Manifest.Volumes, ra, UnitsDir)
		if err != nil {
			return errwrap.Wrap(errors.New("failed to prepare mount units"), err)
		}

	}

	if err = writeAppReaper(p, appName.String()); err != nil {
		return errwrap.Wrap(fmt.Errorf("failed to write app %q reaper service", appName), err)
	}

	return nil
}
Example #11
0
// appToNspawnArgs transforms the given app manifest, with the given associated
// app name, into a subset of applicable systemd-nspawn argument
func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp) ([]string, error) {
	var args []string
	appName := ra.Name
	app := ra.App

	sharedVolPath := common.SharedVolumesPath(p.Root)
	if err := os.MkdirAll(sharedVolPath, SharedVolPerm); err != nil {
		return nil, errwrap.Wrap(errors.New("could not create shared volumes directory"), err)
	}
	if err := os.Chmod(sharedVolPath, SharedVolPerm); err != nil {
		return nil, errwrap.Wrap(fmt.Errorf("could not change permissions of %q", sharedVolPath), err)
	}

	vols := make(map[types.ACName]types.Volume)
	for _, v := range p.Manifest.Volumes {
		vols[v.Name] = v
	}

	imageManifest := p.Images[appName.String()]
	mounts := GenerateMounts(ra, vols, imageManifest)
	for _, m := range mounts {
		vol := vols[m.Volume]

		shPath := filepath.Join(sharedVolPath, vol.Name.String())

		absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs.
		if err != nil {
			return nil, errwrap.Wrap(errors.New("could not get pod's root absolute path"), err)
		}

		appRootfs := common.AppRootfsPath(absRoot, appName)

		// TODO(yifan): This is a temporary fix for systemd-nspawn not handling symlink mounts well.
		// Could be removed when https://github.com/systemd/systemd/issues/2860 is resolved, and systemd
		// version is bumped.
		mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path)
		if err != nil {
			return nil, errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Path), err)
		}
		mntAbsPath := filepath.Join(appRootfs, mntPath)

		if err := PrepareMountpoints(shPath, mntAbsPath, &vol, m.DockerImplicit); err != nil {
			return nil, err
		}

		opt := make([]string, 4)

		if IsMountReadOnly(vol, app.MountPoints) {
			opt[0] = "--bind-ro="
		} else {
			opt[0] = "--bind="
		}

		switch vol.Kind {
		case "host":
			opt[1] = vol.Source
		case "empty":
			opt[1] = filepath.Join(common.SharedVolumesPath(absRoot), vol.Name.String())
		default:
			return nil, fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, vol.Kind)
		}
		opt[2] = ":"
		opt[3] = filepath.Join(common.RelAppRootfsPath(appName), mntPath)
		args = append(args, strings.Join(opt, ""))
	}

	capabilitiesStr, err := getAppCapabilities(app.Isolators)
	if err != nil {
		return nil, err
	}
	capList := strings.Join(capabilitiesStr, ",")
	args = append(args, "--capability="+capList)

	return args, nil
}
Example #12
0
// appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units
func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error {
	app := ra.App
	appName := ra.Name
	imgName := p.AppNameToImageName(appName)

	if len(app.Exec) == 0 {
		return fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName)
	}

	workDir := "/"
	if app.WorkingDirectory != "" {
		workDir = app.WorkingDirectory
	}

	env := app.Environment

	env.Set("AC_APP_NAME", appName.String())
	if p.MetadataServiceURL != "" {
		env.Set("AC_METADATA_URL", p.MetadataServiceURL)
	}

	envFilePath := EnvFilePath(p.Root, appName)

	uidRange := user.NewBlankUidRange()
	if err := uidRange.Deserialize([]byte(privateUsers)); err != nil {
		return err
	}

	if err := writeEnvFile(p, env, appName, uidRange, '\n', envFilePath); err != nil {
		return errwrap.Wrap(errors.New("unable to write environment file for systemd"), err)
	}

	u, g, err := parseUserGroup(p, ra, uidRange)
	if err != nil {
		return err
	}

	if err := generateSysusers(p, ra, u, g, uidRange); err != nil {
		return errwrap.Wrap(errors.New("unable to generate sysusers"), err)
	}

	binPath, err := findBinPath(p, appName, *app, workDir, app.Exec[0])
	if err != nil {
		return err
	}

	var supplementaryGroups []string
	for _, g := range app.SupplementaryGIDs {
		supplementaryGroups = append(supplementaryGroups, strconv.Itoa(g))
	}

	capabilitiesStr, err := getAppCapabilities(app.Isolators)
	if err != nil {
		return err
	}

	noNewPrivileges := getAppNoNewPrivileges(app.Isolators)

	execStart := append([]string{binPath}, app.Exec[1:]...)
	execStartString := quoteExec(execStart)
	opts := []*unit.UnitOption{
		unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)),
		unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
		unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)),
		unit.NewUnitOption("Service", "Restart", "no"),
		unit.NewUnitOption("Service", "ExecStart", execStartString),
		unit.NewUnitOption("Service", "RootDirectory", common.RelAppRootfsPath(appName)),
		// MountFlags=shared creates a new mount namespace and (as unintuitive
		// as it might seem) makes sure the mount is slave+shared.
		unit.NewUnitOption("Service", "MountFlags", "shared"),
		unit.NewUnitOption("Service", "WorkingDirectory", workDir),
		unit.NewUnitOption("Service", "EnvironmentFile", RelEnvFilePath(appName)),
		unit.NewUnitOption("Service", "User", strconv.Itoa(u)),
		unit.NewUnitOption("Service", "Group", strconv.Itoa(g)),
		unit.NewUnitOption("Service", "SupplementaryGroups", strings.Join(supplementaryGroups, " ")),
		unit.NewUnitOption("Service", "CapabilityBoundingSet", strings.Join(capabilitiesStr, " ")),
		unit.NewUnitOption("Service", "NoNewPrivileges", strconv.FormatBool(noNewPrivileges)),
		// This helps working around a race
		// (https://github.com/systemd/systemd/issues/2913) that causes the
		// systemd unit name not getting written to the journal if the unit is
		// short-lived and runs as non-root.
		unit.NewUnitOption("Service", "SyslogIdentifier", appName.String()),
	}

	// Restrict access to sensitive paths (eg. procfs)
	opts = protectSystemFiles(opts, appName)

	if ra.ReadOnlyRootFS {
		opts = append(opts, unit.NewUnitOption("Service", "ReadOnlyDirectories", common.RelAppRootfsPath(appName)))
	}

	// TODO(tmrts): Extract this logic into a utility function.
	vols := make(map[types.ACName]types.Volume)
	for _, v := range p.Manifest.Volumes {
		vols[v.Name] = v
	}

	absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs.
	if err != nil {
		return err
	}
	appRootfs := common.AppRootfsPath(absRoot, appName)

	rwDirs := []string{}
	imageManifest := p.Images[appName.String()]
	for _, m := range GenerateMounts(ra, vols, imageManifest) {
		mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path)
		if err != nil {
			return err
		}

		if !IsMountReadOnly(vols[m.Volume], app.MountPoints) {
			rwDirs = append(rwDirs, filepath.Join(common.RelAppRootfsPath(appName), mntPath))
		}
	}

	opts = append(opts, unit.NewUnitOption("Service", "ReadWriteDirectories", strings.Join(rwDirs, " ")))

	if interactive {
		opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty"))
	} else {
		opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console"))
	}

	// When an app fails, we shut down the pod
	opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target"))

	for _, eh := range app.EventHandlers {
		var typ string
		switch eh.Name {
		case "pre-start":
			typ = "ExecStartPre"
		case "post-stop":
			typ = "ExecStopPost"
		default:
			return fmt.Errorf("unrecognized eventHandler: %v", eh.Name)
		}
		exec := quoteExec(eh.Exec)
		opts = append(opts, unit.NewUnitOption("Service", typ, exec))
	}

	// Some pre-start jobs take a long time, set the timeout to 0
	opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0"))

	var saPorts []types.Port
	for _, p := range app.Ports {
		if p.SocketActivated {
			saPorts = append(saPorts, p)
		}
	}

	for _, i := range app.Isolators {
		switch v := i.Value().(type) {
		case *types.ResourceMemory:
			opts, err = cgroup.MaybeAddIsolator(opts, "memory", v.Limit())
			if err != nil {
				return err
			}
		case *types.ResourceCPU:
			opts, err = cgroup.MaybeAddIsolator(opts, "cpu", v.Limit())
			if err != nil {
				return err
			}
		}
	}

	if len(saPorts) > 0 {
		sockopts := []*unit.UnitOption{
			unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")),
			unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
			unit.NewUnitOption("Socket", "BindIPv6Only", "both"),
			unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)),
		}

		for _, sap := range saPorts {
			var proto string
			switch sap.Protocol {
			case "tcp":
				proto = "ListenStream"
			case "udp":
				proto = "ListenDatagram"
			default:
				return fmt.Errorf("unrecognized protocol: %v", sap.Protocol)
			}
			// We find the host port for the pod's port and use that in the
			// socket unit file.
			// This is so because systemd inside the pod will match based on
			// the socket port number, and since the socket was created on the
			// host, it will have the host port number.
			port := findHostPort(*p.Manifest, sap.Name)
			if port == 0 {
				log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port)
				port = sap.Port
			}
			sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port)))
		}

		file, err := os.OpenFile(SocketUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644)
		if err != nil {
			return errwrap.Wrap(errors.New("failed to create socket file"), err)
		}
		defer file.Close()

		if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil {
			return errwrap.Wrap(errors.New("failed to write socket unit file"), err)
		}

		if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(p.Root, appName)); err != nil {
			return errwrap.Wrap(errors.New("failed to link socket want"), err)
		}

		opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName)))
	}

	opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName)))
	opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName)))

	opts = append(opts, unit.NewUnitOption("Unit", "Requires", "sysusers.service"))
	opts = append(opts, unit.NewUnitOption("Unit", "After", "sysusers.service"))

	file, err := os.OpenFile(ServiceUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644)
	if err != nil {
		return errwrap.Wrap(errors.New("failed to create service unit file"), err)
	}
	defer file.Close()

	if _, err = io.Copy(file, unit.Serialize(opts)); err != nil {
		return errwrap.Wrap(errors.New("failed to write service unit file"), err)
	}

	if err = os.Symlink(path.Join("..", ServiceUnitName(appName)), ServiceWantPath(p.Root, appName)); err != nil {
		return errwrap.Wrap(errors.New("failed to link service want"), err)
	}

	if err = writeAppReaper(p, appName.String(), common.RelAppRootfsPath(appName), binPath); err != nil {
		return errwrap.Wrap(fmt.Errorf("failed to write app %q reaper service", appName), err)
	}

	return nil
}
Example #13
0
File: units.go Project: nhlfr/rkt
func (uw *UnitWriter) AppUnit(
	ra *schema.RuntimeApp, binPath, privateUsers string, insecureOptions Stage1InsecureOptions,
	opts ...*unit.UnitOption,
) {
	if uw.err != nil {
		return
	}

	flavor, systemdVersion, err := GetFlavor(uw.p)
	if err != nil {
		uw.err = errwrap.Wrap(errors.New("unable to determine stage1 flavor"), err)
		return
	}

	app := ra.App
	appName := ra.Name
	imgName := uw.p.AppNameToImageName(appName)

	if len(app.Exec) == 0 {
		uw.err = fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName)
		return
	}

	env := app.Environment

	env.Set("AC_APP_NAME", appName.String())
	if uw.p.MetadataServiceURL != "" {
		env.Set("AC_METADATA_URL", uw.p.MetadataServiceURL)
	}

	envFilePath := EnvFilePath(uw.p.Root, appName)

	uidRange := user.NewBlankUidRange()
	if err := uidRange.Deserialize([]byte(privateUsers)); err != nil {
		uw.err = err
		return
	}

	if err := common.WriteEnvFile(env, uidRange, envFilePath); err != nil {
		uw.err = errwrap.Wrap(errors.New("unable to write environment file for systemd"), err)
		return
	}

	u, g, err := parseUserGroup(uw.p, ra, uidRange)
	if err != nil {
		uw.err = err
		return
	}

	if err := generateSysusers(uw.p, ra, u, g, uidRange); err != nil {
		uw.err = errwrap.Wrap(errors.New("unable to generate sysusers"), err)
		return
	}

	var supplementaryGroups []string
	for _, g := range app.SupplementaryGIDs {
		supplementaryGroups = append(supplementaryGroups, strconv.Itoa(g))
	}

	capabilitiesStr, err := getAppCapabilities(app.Isolators)
	if err != nil {
		uw.err = err
		return
	}

	execStart := append([]string{binPath}, app.Exec[1:]...)
	execStartString := quoteExec(execStart)
	opts = append(opts, []*unit.UnitOption{
		unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)),
		unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
		unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)),
		unit.NewUnitOption("Service", "Restart", "no"),
		unit.NewUnitOption("Service", "ExecStart", execStartString),
		unit.NewUnitOption("Service", "RootDirectory", common.RelAppRootfsPath(appName)),
		// MountFlags=shared creates a new mount namespace and (as unintuitive
		// as it might seem) makes sure the mount is slave+shared.
		unit.NewUnitOption("Service", "MountFlags", "shared"),
		unit.NewUnitOption("Service", "WorkingDirectory", app.WorkingDirectory),
		unit.NewUnitOption("Service", "EnvironmentFile", RelEnvFilePath(appName)),
		unit.NewUnitOption("Service", "User", strconv.Itoa(u)),
		unit.NewUnitOption("Service", "Group", strconv.Itoa(g)),

		// This helps working around a race
		// (https://github.com/systemd/systemd/issues/2913) that causes the
		// systemd unit name not getting written to the journal if the unit is
		// short-lived and runs as non-root.
		unit.NewUnitOption("Service", "SyslogIdentifier", appName.String()),
	}...)

	if len(supplementaryGroups) > 0 {
		opts = appendOptionsList(opts, "Service", "SupplementaryGroups", "", supplementaryGroups)
	}

	if supportsNotify(uw.p, appName.String()) {
		opts = append(opts, unit.NewUnitOption("Service", "Type", "notify"))
	}

	if !insecureOptions.DisableCapabilities {
		opts = append(opts, unit.NewUnitOption("Service", "CapabilityBoundingSet", strings.Join(capabilitiesStr, " ")))
	}

	noNewPrivileges := getAppNoNewPrivileges(app.Isolators)

	// Apply seccomp isolator, if any and not opt-ing out;
	// see https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=
	if !insecureOptions.DisableSeccomp {
		var forceNoNewPrivileges bool

		unprivileged := (u != 0)
		opts, forceNoNewPrivileges, err = getSeccompFilter(opts, uw.p, unprivileged, app.Isolators)
		if err != nil {
			uw.err = err
			return
		}

		// Seccomp filters require NoNewPrivileges for unprivileged apps, that may override
		// manifest annotation.
		if forceNoNewPrivileges {
			noNewPrivileges = true
		}
	}

	opts = append(opts, unit.NewUnitOption("Service", "NoNewPrivileges", strconv.FormatBool(noNewPrivileges)))

	if ra.ReadOnlyRootFS {
		opts = append(opts, unit.NewUnitOption("Service", "ReadOnlyDirectories", common.RelAppRootfsPath(appName)))
	}

	// TODO(tmrts): Extract this logic into a utility function.
	vols := make(map[types.ACName]types.Volume)
	for _, v := range uw.p.Manifest.Volumes {
		vols[v.Name] = v
	}

	absRoot, err := filepath.Abs(uw.p.Root) // Absolute path to the pod's rootfs.
	if err != nil {
		uw.err = err
		return
	}
	appRootfs := common.AppRootfsPath(absRoot, appName)

	rwDirs := []string{}
	imageManifest := uw.p.Images[appName.String()]
	mounts := GenerateMounts(ra, vols, imageManifest)
	for _, m := range mounts {
		mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path)
		if err != nil {
			uw.err = err
			return
		}

		if !IsMountReadOnly(vols[m.Volume], app.MountPoints) {
			rwDirs = append(rwDirs, filepath.Join(common.RelAppRootfsPath(appName), mntPath))
		}
	}
	if len(rwDirs) > 0 {
		opts = appendOptionsList(opts, "Service", "ReadWriteDirectories", "", rwDirs)
	}

	// Restrict access to sensitive paths (eg. procfs and sysfs entries).
	if !insecureOptions.DisablePaths {
		opts = protectKernelTunables(opts, appName, systemdVersion)
	}

	// Generate default device policy for the app, as well as the list of allowed devices.
	// For kvm flavor, devices are VM-specific and restricting them is not strictly needed.
	if !insecureOptions.DisablePaths && flavor != "kvm" {
		opts = append(opts, unit.NewUnitOption("Service", "DevicePolicy", "closed"))
		deviceAllows, err := generateDeviceAllows(common.Stage1RootfsPath(absRoot), appName, app.MountPoints, mounts, vols, uidRange)
		if err != nil {
			uw.err = err
			return
		}
		for _, dev := range deviceAllows {
			opts = append(opts, unit.NewUnitOption("Service", "DeviceAllow", dev))
		}
	}

	// When an app fails, we shut down the pod
	opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target"))

	for _, eh := range app.EventHandlers {
		var typ string
		switch eh.Name {
		case "pre-start":
			typ = "ExecStartPre"
		case "post-stop":
			typ = "ExecStopPost"
		default:
			uw.err = fmt.Errorf("unrecognized eventHandler: %v", eh.Name)
			return
		}
		exec := quoteExec(eh.Exec)
		opts = append(opts, unit.NewUnitOption("Service", typ, exec))
	}

	// Some pre-start jobs take a long time, set the timeout to 0
	opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0"))

	var saPorts []types.Port
	for _, p := range app.Ports {
		if p.SocketActivated {
			saPorts = append(saPorts, p)
		}
	}

	doWithIsolator := func(isolator string, f func() error) bool {
		ok, err := cgroup.IsIsolatorSupported(isolator)
		if err != nil {
			uw.err = err
			return true
		}

		if !ok {
			fmt.Fprintf(os.Stderr, "warning: resource/%s isolator set but support disabled in the kernel, skipping\n", isolator)
		}

		if err := f(); err != nil {
			uw.err = err
			return true
		}

		return false
	}

	exit := false
	for _, i := range app.Isolators {
		if exit {
			return
		}

		switch v := i.Value().(type) {
		case *types.ResourceMemory:
			exit = doWithIsolator("memory", func() error {
				if v.Limit() == nil {
					return nil
				}

				opts = append(opts, unit.NewUnitOption("Service", "MemoryLimit", strconv.Itoa(int(v.Limit().Value()))))
				return nil
			})
		case *types.ResourceCPU:
			exit = doWithIsolator("cpu", func() error {
				if v.Limit() == nil {
					return nil
				}

				if v.Limit().Value() > resource.MaxMilliValue {
					return fmt.Errorf("cpu limit exceeds the maximum millivalue: %v", v.Limit().String())
				}

				quota := strconv.Itoa(int(v.Limit().MilliValue()/10)) + "%"
				opts = append(opts, unit.NewUnitOption("Service", "CPUQuota", quota))

				return nil
			})
		}
	}

	if len(saPorts) > 0 {
		sockopts := []*unit.UnitOption{
			unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")),
			unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
			unit.NewUnitOption("Socket", "BindIPv6Only", "both"),
			unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)),
		}

		for _, sap := range saPorts {
			var proto string
			switch sap.Protocol {
			case "tcp":
				proto = "ListenStream"
			case "udp":
				proto = "ListenDatagram"
			default:
				uw.err = fmt.Errorf("unrecognized protocol: %v", sap.Protocol)
				return
			}
			// We find the host port for the pod's port and use that in the
			// socket unit file.
			// This is so because systemd inside the pod will match based on
			// the socket port number, and since the socket was created on the
			// host, it will have the host port number.
			port := findHostPort(*uw.p.Manifest, sap.Name)
			if port == 0 {
				log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port)
				port = sap.Port
			}
			sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port)))
		}

		file, err := os.OpenFile(SocketUnitPath(uw.p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644)
		if err != nil {
			uw.err = errwrap.Wrap(errors.New("failed to create socket file"), err)
			return
		}
		defer file.Close()

		if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil {
			uw.err = errwrap.Wrap(errors.New("failed to write socket unit file"), err)
			return
		}

		if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(uw.p.Root, appName)); err != nil {
			uw.err = errwrap.Wrap(errors.New("failed to link socket want"), err)
			return
		}

		opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName)))
	}

	opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName)))
	opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName)))
	opts = append(opts, unit.NewUnitOption("Unit", "Requires", "sysusers.service"))
	opts = append(opts, unit.NewUnitOption("Unit", "After", "sysusers.service"))

	uw.WriteUnit(ServiceUnitPath(uw.p.Root, appName), "failed to create service unit file", opts...)
	uw.Activate(ServiceUnitName(appName), ServiceWantPath(uw.p.Root, appName))
}
Example #14
0
File: pod.go Project: runyontr/rkt
// appToNspawnArgs transforms the given app manifest, with the given associated
// app image id, into a subset of applicable systemd-nspawn argument
func (p *Pod) appToNspawnArgs(ra *schema.RuntimeApp) ([]string, error) {
	args := []string{}
	name := ra.Name.String()
	id := ra.Image.ID
	app := ra.App

	vols := make(map[types.ACName]types.Volume)

	// TODO(philips): this is implicitly creating a mapping from MountPoint
	// to volumes. This is a nice convenience for users but we will need to
	// introduce a --mount flag so they can control which mountPoint maps to
	// which volume.

	for _, v := range p.Manifest.Volumes {
		vols[v.Name] = v
	}

	for _, mp := range app.MountPoints {
		key := mp.Name
		vol, ok := vols[key]
		if !ok {
			catCmd := fmt.Sprintf("sudo rkt image cat-manifest --pretty-print %v", id)
			volumeCmd := ""
			for _, mp := range app.MountPoints {
				volumeCmd += fmt.Sprintf("--volume %s,kind=host,source=/some/path ", mp.Name)
			}

			return nil, fmt.Errorf("no volume for mountpoint %q in app %q.\n"+
				"You can inspect the volumes with:\n\t%v\n"+
				"App %q requires the following volumes:\n\t%v",
				key, name, catCmd, name, volumeCmd)
		}
		opt := make([]string, 4)

		// If the readonly flag in the pod manifest is not nil,
		// then use it to override the readonly flag in the image manifest.
		readOnly := mp.ReadOnly
		if vol.ReadOnly != nil {
			readOnly = *vol.ReadOnly
		}

		if readOnly {
			opt[0] = "--bind-ro="
		} else {
			opt[0] = "--bind="
		}

		opt[1] = vol.Source
		opt[2] = ":"
		opt[3] = filepath.Join(common.RelAppRootfsPath(id), mp.Path)

		args = append(args, strings.Join(opt, ""))
	}

	for _, i := range app.Isolators {
		switch v := i.Value().(type) {
		case types.LinuxCapabilitiesSet:
			var caps []string
			// TODO: cleanup the API on LinuxCapabilitiesSet to give strings easily.
			for _, c := range v.Set() {
				caps = append(caps, string(c))
			}
			if i.Name == types.LinuxCapabilitiesRetainSetName {
				capList := strings.Join(caps, ",")
				args = append(args, "--capability="+capList)
			}
		}
	}

	return args, nil
}
Example #15
0
File: path.go Project: matomesc/rkt
// InstantiatedPrepareAppUnitName returns the systemd service unit name for prepare-app
// instantiated for the given root.
func InstantiatedPrepareAppUnitName(appName types.ACName) string {
	// Naming respecting escaping rules, see systemd.unit(5) and systemd-escape(1)
	escapedRoot := unit.UnitNamePathEscape(common.RelAppRootfsPath(appName))
	return "prepare-app@" + escapedRoot + ".service"
}
Example #16
0
File: pod.go Project: matomesc/rkt
// appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units
func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error {
	app := ra.App
	appName := ra.Name
	image, ok := p.Images[appName.String()]
	if !ok {
		// This is impossible as we have updated the map in LoadPod().
		panic(fmt.Sprintf("No images for app %q", ra.Name.String()))
	}
	imgName := image.Name

	if len(app.Exec) == 0 {
		return fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName)
	}

	workDir := "/"
	if app.WorkingDirectory != "" {
		workDir = app.WorkingDirectory
	}

	env := app.Environment

	env.Set("AC_APP_NAME", appName.String())
	if p.MetadataServiceURL != "" {
		env.Set("AC_METADATA_URL", p.MetadataServiceURL)
	}

	if err := writeEnvFile(p, env, appName, privateUsers); err != nil {
		return fmt.Errorf("unable to write environment file: %v", err)
	}

	// This is a partial implementation for app.User and app.Group:
	// For now, only numeric ids (and the string "root") are supported.
	var uid, gid int
	var err error
	if app.User == "root" {
		uid = 0
	} else {
		uid, err = strconv.Atoi(app.User)
		if err != nil {
			return fmt.Errorf("non-numerical user id not supported yet")
		}
	}
	if app.Group == "root" {
		gid = 0
	} else {
		gid, err = strconv.Atoi(app.Group)
		if err != nil {
			return fmt.Errorf("non-numerical group id not supported yet")
		}
	}

	execWrap := []string{"/appexec", common.RelAppRootfsPath(appName), workDir, RelEnvFilePath(appName), strconv.Itoa(uid), generateGidArg(gid, app.SupplementaryGIDs)}
	execStart := quoteExec(append(execWrap, app.Exec...))
	opts := []*unit.UnitOption{
		unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)),
		unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
		unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)),
		unit.NewUnitOption("Service", "Restart", "no"),
		unit.NewUnitOption("Service", "ExecStart", execStart),
		unit.NewUnitOption("Service", "User", "0"),
		unit.NewUnitOption("Service", "Group", "0"),
	}

	if interactive {
		opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty"))
	} else {
		opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console"))
		opts = append(opts, unit.NewUnitOption("Service", "SyslogIdentifier", filepath.Base(app.Exec[0])))
	}

	// When an app fails, we shut down the pod
	opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target"))

	for _, eh := range app.EventHandlers {
		var typ string
		switch eh.Name {
		case "pre-start":
			typ = "ExecStartPre"
		case "post-stop":
			typ = "ExecStopPost"
		default:
			return fmt.Errorf("unrecognized eventHandler: %v", eh.Name)
		}
		exec := quoteExec(append(execWrap, eh.Exec...))
		opts = append(opts, unit.NewUnitOption("Service", typ, exec))
	}

	// Some pre-start jobs take a long time, set the timeout to 0
	opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0"))

	var saPorts []types.Port
	for _, p := range app.Ports {
		if p.SocketActivated {
			saPorts = append(saPorts, p)
		}
	}

	for _, i := range app.Isolators {
		switch v := i.Value().(type) {
		case *types.ResourceMemory:
			opts, err = cgroup.MaybeAddIsolator(opts, "memory", v.Limit())
			if err != nil {
				return err
			}
		case *types.ResourceCPU:
			opts, err = cgroup.MaybeAddIsolator(opts, "cpu", v.Limit())
			if err != nil {
				return err
			}
		}
	}

	if len(saPorts) > 0 {
		sockopts := []*unit.UnitOption{
			unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")),
			unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
			unit.NewUnitOption("Socket", "BindIPv6Only", "both"),
			unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)),
		}

		for _, sap := range saPorts {
			var proto string
			switch sap.Protocol {
			case "tcp":
				proto = "ListenStream"
			case "udp":
				proto = "ListenDatagram"
			default:
				return fmt.Errorf("unrecognized protocol: %v", sap.Protocol)
			}
			sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", sap.Port)))
		}

		file, err := os.OpenFile(SocketUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644)
		if err != nil {
			return fmt.Errorf("failed to create socket file: %v", err)
		}
		defer file.Close()

		if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil {
			return fmt.Errorf("failed to write socket unit file: %v", err)
		}

		if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(p.Root, appName)); err != nil {
			return fmt.Errorf("failed to link socket want: %v", err)
		}

		opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName)))
	}

	opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName)))
	opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName)))

	file, err := os.OpenFile(ServiceUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644)
	if err != nil {
		return fmt.Errorf("failed to create service unit file: %v", err)
	}
	defer file.Close()

	if _, err = io.Copy(file, unit.Serialize(opts)); err != nil {
		return fmt.Errorf("failed to write service unit file: %v", err)
	}

	if err = os.Symlink(path.Join("..", ServiceUnitName(appName)), ServiceWantPath(p.Root, appName)); err != nil {
		return fmt.Errorf("failed to link service want: %v", err)
	}

	if flavor == "kvm" {
		// bind mount all shared volumes from /mnt/volumeName (we don't use mechanism for bind-mounting given by nspawn)
		err := AppToSystemdMountUnits(common.Stage1RootfsPath(p.Root), appName, p.Manifest.Volumes, ra, UnitsDir)
		if err != nil {
			return fmt.Errorf("failed to prepare mount units: %v", err)
		}

	}

	if err = writeAppReaper(p, appName.String()); err != nil {
		return fmt.Errorf("Failed to write app %q reaper service: %v\n", appName, err)
	}

	return nil
}
Example #17
0
File: pod.go Project: joshix/rkt
// appToNspawnArgs transforms the given app manifest, with the given associated
// app name, into a subset of applicable systemd-nspawn argument
func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp, insecureOptions Stage1InsecureOptions) ([]string, error) {
	var args []string
	appName := ra.Name
	app := ra.App

	sharedVolPath := common.SharedVolumesPath(p.Root)
	if err := os.MkdirAll(sharedVolPath, SharedVolPerm); err != nil {
		return nil, errwrap.Wrap(errors.New("could not create shared volumes directory"), err)
	}
	if err := os.Chmod(sharedVolPath, SharedVolPerm); err != nil {
		return nil, errwrap.Wrap(fmt.Errorf("could not change permissions of %q", sharedVolPath), err)
	}

	vols := make(map[types.ACName]types.Volume)
	for _, v := range p.Manifest.Volumes {
		vols[v.Name] = v
	}

	imageManifest := p.Images[appName.String()]
	mounts, err := GenerateMounts(ra, p.Manifest.Volumes, ConvertedFromDocker(imageManifest))
	if err != nil {
		return nil, errwrap.Wrap(fmt.Errorf("could not generate app %q mounts", appName), err)
	}
	for _, m := range mounts {

		shPath := filepath.Join(sharedVolPath, m.Volume.Name.String())

		absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs.
		if err != nil {
			return nil, errwrap.Wrap(errors.New("could not get pod's root absolute path"), err)
		}

		appRootfs := common.AppRootfsPath(absRoot, appName)

		// TODO(yifan): This is a temporary fix for systemd-nspawn not handling symlink mounts well.
		// Could be removed when https://github.com/systemd/systemd/issues/2860 is resolved, and systemd
		// version is bumped.
		mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Mount.Path)
		if err != nil {
			return nil, errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Mount.Path), err)
		}
		mntAbsPath := filepath.Join(appRootfs, mntPath)

		if err := PrepareMountpoints(shPath, mntAbsPath, &m.Volume, m.DockerImplicit); err != nil {
			return nil, err
		}

		opt := make([]string, 6)

		if m.ReadOnly {
			opt[0] = "--bind-ro="
		} else {
			opt[0] = "--bind="
		}

		switch m.Volume.Kind {
		case "host":
			opt[1] = m.Volume.Source
		case "empty":
			opt[1] = filepath.Join(common.SharedVolumesPath(absRoot), m.Volume.Name.String())
		default:
			return nil, fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, m.Volume.Kind)
		}
		opt[2] = ":"
		opt[3] = filepath.Join(common.RelAppRootfsPath(appName), mntPath)
		opt[4] = ":"

		// If Recursive is not set, default to recursive.
		recursive := true
		if m.Volume.Recursive != nil {
			recursive = *m.Volume.Recursive
		}

		// rbind/norbind options exist since systemd-nspawn v226
		if recursive {
			opt[5] = "rbind"
		} else {
			opt[5] = "norbind"
		}
		args = append(args, strings.Join(opt, ""))
	}

	if !insecureOptions.DisableCapabilities {
		capabilitiesStr, err := getAppCapabilities(app.Isolators)
		if err != nil {
			return nil, err
		}
		capList := strings.Join(capabilitiesStr, ",")
		args = append(args, "--capability="+capList)
	}

	return args, nil
}
Example #18
0
// AppToSystemdMountUnits prepare bind mount unit for empty or host kind mounting
// between stage1 rootfs and chrooted filesystem for application
func AppToSystemdMountUnits(root string, appName types.ACName, volumes []types.Volume, ra *schema.RuntimeApp, unitsDir string) error {
	app := ra.App

	vols := make(map[types.ACName]types.Volume)
	for _, v := range volumes {
		vols[v.Name] = v
	}

	mounts := GenerateMounts(ra, vols)
	for _, m := range mounts {
		vol := vols[m.Volume]

		// source relative to stage1 rootfs to relative pod root
		whatPath := filepath.Join(stage1MntDir, vol.Name.String())
		whatFullPath := filepath.Join(root, whatPath)

		if vol.Kind == "empty" {
			log.Printf("creating an empty volume folder for sharing: %q", whatFullPath)
			err := os.MkdirAll(whatFullPath, 0700)
			if err != nil {
				return err
			}
		}

		// destination relative to stage1 rootfs and relative to pod root
		wherePath := filepath.Join(common.RelAppRootfsPath(appName), m.Path)
		whereFullPath := filepath.Join(root, wherePath)

		// assertion to make sure that "what" exists (created earlier by PodToSystemdHostMountUnits)
		log.Printf("checking required source path: %q", whatFullPath)
		if _, err := os.Stat(whatFullPath); os.IsNotExist(err) {
			return fmt.Errorf("bug: missing source for volume %v", vol.Name)
		}

		// optionally prepare app directory
		log.Printf("optionally preparing destination path: %q", whereFullPath)
		err := os.MkdirAll(whereFullPath, 0700)
		if err != nil {
			return errwrap.Wrap(fmt.Errorf("failed to prepare dir for mount %v", m.Volume), err)
		}

		// install new mount unit for bind mount /mnt/volumeName -> /opt/stage2/{app-id}/rootfs/{{mountPoint.Path}}
		mu, err := installNewMountUnit(
			root,      // where put a mount unit
			whatPath,  // what - stage1 rootfs /mnt/VolumeName
			wherePath, // where - inside chroot app filesystem
			"bind",    // fstype
			"bind",    // options
			serviceUnitName(appName),
			unitsDir,
		)
		if err != nil {
			return errwrap.Wrap(fmt.Errorf("cannot install new mount unit for app %q", appName.String()), err)
		}

		// TODO(iaguis) when we update util-linux to 2.27, this code can go
		// away and we can bind-mount RO with one unit file.
		// http://ftp.kernel.org/pub/linux/utils/util-linux/v2.27/v2.27-ReleaseNotes
		if IsMountReadOnly(vol, app.MountPoints) {
			opts := []*unit.UnitOption{
				unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Remount read-only unit for %s", wherePath)),
				unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
				unit.NewUnitOption("Unit", "After", mu),
				unit.NewUnitOption("Unit", "Wants", mu),
				unit.NewUnitOption("Service", "ExecStart", fmt.Sprintf("/usr/bin/mount -o remount,ro %s", wherePath)),
				unit.NewUnitOption("Install", "RequiredBy", mu),
			}

			remountUnitPath := filepath.Join(root, unitsDir, unit.UnitNamePathEscape(wherePath+"-remount.service"))
			if err := writeUnit(opts, remountUnitPath); err != nil {
				return err
			}
		}
	}
	return nil
}
Example #19
0
// appToNspawnArgs transforms the given app manifest, with the given associated
// app name, into a subset of applicable systemd-nspawn argument
func (p *Pod) appToNspawnArgs(ra *schema.RuntimeApp) ([]string, error) {
	var args []string
	appName := ra.Name
	id := ra.Image.ID
	app := ra.App

	vols := make(map[types.ACName]types.Volume)
	mounts := make(map[string]schema.Mount)
	for _, m := range ra.Mounts {
		mounts[m.Path] = m
	}

	sharedVolPath := common.SharedVolumesPath(p.Root)
	if err := os.MkdirAll(sharedVolPath, sharedVolPerm); err != nil {
		return nil, fmt.Errorf("could not create shared volumes directory: %v", err)
	}
	if err := os.Chmod(sharedVolPath, sharedVolPerm); err != nil {
		return nil, fmt.Errorf("could not change permissions of %q: %v", sharedVolPath, err)
	}
	// Here we bind the volumes to the mountpoints via runtime mounts (--mount)
	for _, v := range p.Manifest.Volumes {
		vols[v.Name] = v
		if v.Kind == "empty" {
			if err := os.MkdirAll(filepath.Join(sharedVolPath, v.Name.String()), sharedVolPerm); err != nil {
				return nil, fmt.Errorf("could not create shared volume %q: %v", v.Name, err)
			}
		}
	}

	for _, mp := range app.MountPoints {
		// there's already an injected mount for this target path, skip
		if _, ok := mounts[mp.Path]; ok {
			continue
		}
		vol, ok := vols[mp.Name]
		if !ok {
			catCmd := fmt.Sprintf("sudo rkt image cat-manifest --pretty-print %v", id)
			volumeCmd := ""
			for _, mp := range app.MountPoints {
				volumeCmd += fmt.Sprintf("--volume %s,kind=host,source=/some/path ", mp.Name)
			}

			return nil, fmt.Errorf("no volume for mountpoint %q:%q in app %q.\n"+
				"You can inspect the volumes with:\n\t%v\n"+
				"App %q requires the following volumes:\n\t%v",
				mp.Name, mp.Path, appName, catCmd, appName, volumeCmd)
		}
		ra.Mounts = append(ra.Mounts, schema.Mount{Volume: vol.Name, Path: mp.Path})
	}

	for _, m := range ra.Mounts {
		vol := vols[m.Volume]

		opt := make([]string, 4)

		// If the readonly flag in the pod manifest is not nil,
		// then use it to override the readonly flag in the image manifest.
		readOnly := isMPReadOnly(app.MountPoints, vol.Name)
		if vol.ReadOnly != nil {
			readOnly = *vol.ReadOnly
		}

		if readOnly {
			opt[0] = "--bind-ro="
		} else {
			opt[0] = "--bind="
		}

		switch vol.Kind {
		case "host":
			opt[1] = vol.Source
		case "empty":
			absRoot, err := filepath.Abs(p.Root)
			if err != nil {
				return nil, fmt.Errorf("cannot get pod's root absolute path: %v\n", err)
			}
			opt[1] = filepath.Join(common.SharedVolumesPath(absRoot), vol.Name.String())
		default:
			return nil, fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty".`, vol.Kind)
		}
		opt[2] = ":"
		opt[3] = filepath.Join(common.RelAppRootfsPath(appName), m.Path)

		args = append(args, strings.Join(opt, ""))
	}

	for _, i := range app.Isolators {
		switch v := i.Value().(type) {
		case types.LinuxCapabilitiesSet:
			var caps []string
			// TODO: cleanup the API on LinuxCapabilitiesSet to give strings easily.
			for _, c := range v.Set() {
				caps = append(caps, string(c))
			}
			if i.Name == types.LinuxCapabilitiesRetainSetName {
				capList := strings.Join(caps, ",")
				args = append(args, "--capability="+capList)
			}
		}
	}

	return args, nil
}