Example #1
0
// findBinPath takes a binary path and returns a the absolute path of the
// binary relative to the app rootfs. This can be passed to ExecStart on the
// app's systemd service file directly.
func findBinPath(p *stage1commontypes.Pod, appName types.ACName, app types.App, workDir string, bin string) (string, error) {
	var binPath string
	switch {
	// absolute path, just use it
	case filepath.IsAbs(bin):
		binPath = bin
	// non-absolute path containing a slash, look in the working dir
	case strings.Contains(bin, "/"):
		binPath = filepath.Join(workDir, bin)
	// filename, search in the app's $PATH
	default:
		absRoot, err := filepath.Abs(p.Root)
		if err != nil {
			return "", errwrap.Wrap(errors.New("could not get pod's root absolute path"), err)
		}
		appRootfs := common.AppRootfsPath(absRoot, appName)
		appPathDirs := appSearchPaths(p, workDir, app)
		appPath := strings.Join(appPathDirs, ":")

		binPath, err = lookupPathInsideApp(bin, appPath, appRootfs, workDir)
		if err != nil {
			return "", errwrap.Wrap(fmt.Errorf("error looking up %q", bin), err)
		}
	}

	return binPath, nil
}
Example #2
0
File: export.go Project: nak3/rkt
func appHasMountpoints(podPath string, appName types.ACName) (bool, error) {
	appRootfs := common.AppRootfsPath(podPath, appName)
	// add a filepath separator so we don't match the appRootfs path
	appRootfs += string(filepath.Separator)

	mi, err := os.Open("/proc/self/mountinfo")
	if err != nil {
		return false, err
	}
	defer mi.Close()

	sc := bufio.NewScanner(mi)
	for sc.Scan() {
		line := sc.Text()
		lineResult := strings.Split(line, " ")
		if len(lineResult) < 7 {
			return false, fmt.Errorf("not enough fields from line %q: %+v", line, lineResult)
		}

		mp := lineResult[4]

		if strings.HasPrefix(mp, appRootfs) {
			return true, nil
		}
	}
	if err := sc.Err(); err != nil {
		return false, err
	}
	return false, nil
}
Example #3
0
// FindBinPath takes a binary path and returns a the absolute path of the
// binary relative to the app rootfs. This can be passed to ExecStart on the
// app's systemd service file directly.
func FindBinPath(p *stage1commontypes.Pod, ra *schema.RuntimeApp) (string, error) {
	if len(ra.App.Exec) == 0 {
		return "", errors.New("app has no executable")
	}

	bin := ra.App.Exec[0]

	var binPath string
	switch {
	// absolute path, just use it
	case filepath.IsAbs(bin):
		binPath = bin
	// non-absolute path containing a slash, look in the working dir
	case strings.Contains(bin, "/"):
		binPath = filepath.Join(ra.App.WorkingDirectory, bin)
	// filename, search in the app's $PATH
	default:
		absRoot, err := filepath.Abs(p.Root)
		if err != nil {
			return "", errwrap.Wrap(errors.New("could not get pod's root absolute path"), err)
		}
		appRootfs := common.AppRootfsPath(absRoot, ra.Name)
		appPathDirs := appSearchPaths(p, ra.App.WorkingDirectory, *ra.App)
		appPath := strings.Join(appPathDirs, ":")

		binPath, err = lookupPathInsideApp(bin, appPath, appRootfs, ra.App.WorkingDirectory)
		if err != nil {
			return "", errwrap.Wrap(fmt.Errorf("error looking up %q", bin), err)
		}
	}

	return binPath, nil
}
Example #4
0
File: main.go Project: nak3/rkt
func copyResolv(p *stage1commontypes.Pod) error {
	ra := p.Manifest.Apps[0]

	stage1Rootfs := common.Stage1RootfsPath(p.Root)
	resolvPath := filepath.Join(stage1Rootfs, "etc", "rkt-resolv.conf")

	appRootfs := common.AppRootfsPath(p.Root, ra.Name)
	targetEtc := filepath.Join(appRootfs, "etc")
	targetResolvPath := filepath.Join(targetEtc, "resolv.conf")

	_, err := os.Stat(resolvPath)
	switch {
	case os.IsNotExist(err):
		return nil
	case err != nil:
		return err
	}

	_, err = os.Stat(targetResolvPath)
	if err != nil && !os.IsNotExist(err) {
		return err
	}

	return fileutil.CopyRegularFile(resolvPath, targetResolvPath)
}
Example #5
0
File: kvm.go Project: joshix/rkt
func mountSharedVolumes(root string, p *stage1commontypes.Pod, ra *schema.RuntimeApp) error {
	appName := ra.Name

	sharedVolPath := common.SharedVolumesPath(root)
	if err := os.MkdirAll(sharedVolPath, stage1initcommon.SharedVolPerm); err != nil {
		return errwrap.Wrap(errors.New("could not create shared volumes directory"), err)
	}
	if err := os.Chmod(sharedVolPath, stage1initcommon.SharedVolPerm); err != nil {
		return errwrap.Wrap(fmt.Errorf("could not change permissions of %q", sharedVolPath), err)
	}

	imageManifest := p.Images[appName.String()]
	mounts, err := stage1initcommon.GenerateMounts(ra, p.Manifest.Volumes, stage1initcommon.ConvertedFromDocker(imageManifest))
	if err != nil {
		return err
	}
	for _, m := range mounts {
		absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs.
		if err != nil {
			return errwrap.Wrap(errors.New("could not get pod's root absolute path"), err)
		}

		absAppRootfs := common.AppRootfsPath(absRoot, appName)
		if err != nil {
			return fmt.Errorf(`could not evaluate absolute path for application rootfs in app: %v`, appName)
		}

		mntPath, err := stage1initcommon.EvaluateSymlinksInsideApp(absAppRootfs, m.Mount.Path)
		if err != nil {
			return errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Mount.Path), err)
		}
		absDestination := filepath.Join(absAppRootfs, mntPath)
		shPath := filepath.Join(sharedVolPath, m.Volume.Name.String())
		if err := stage1initcommon.PrepareMountpoints(shPath, absDestination, &m.Volume, m.DockerImplicit); err != nil {
			return err
		}

		var source string
		switch m.Volume.Kind {
		case "host":
			source = m.Volume.Source
		case "empty":
			source = filepath.Join(common.SharedVolumesPath(root), m.Volume.Name.String())
		default:
			return fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, m.Volume.Kind)
		}
		if cleanedSource, err := filepath.EvalSymlinks(source); err != nil {
			return errwrap.Wrap(fmt.Errorf("could not resolve symlink for source: %v", source), err)
		} else if err := ensureDestinationExists(cleanedSource, absDestination); err != nil {
			return errwrap.Wrap(fmt.Errorf("could not create destination mount point: %v", absDestination), err)
		} else if err := doBindMount(cleanedSource, absDestination, m.ReadOnly, m.Volume.Recursive); err != nil {
			return errwrap.Wrap(fmt.Errorf("could not bind mount path %v (s: %v, d: %v)", m.Mount.Path, source, absDestination), err)
		}
	}
	return nil
}
Example #6
0
func AppAddMounts(p *stage1commontypes.Pod, ra *schema.RuntimeApp, enterCmd []string) error {
	sharedVolPath, err := common.CreateSharedVolumesPath(p.Root)
	if err != nil {
		return err
	}

	vols := make(map[types.ACName]types.Volume)
	for _, v := range p.Manifest.Volumes {
		vols[v.Name] = v
	}

	imageManifest := p.Images[ra.Name.String()]

	mounts, err := GenerateMounts(ra, p.Manifest.Volumes, ConvertedFromDocker(imageManifest))
	if err != nil {
		log.FatalE("Could not generate mounts", err)
		os.Exit(254)
	}

	absRoot, err := filepath.Abs(p.Root)
	if err != nil {
		log.FatalE("could not determine pod's absolute path", err)
	}

	appRootfs := common.AppRootfsPath(absRoot, ra.Name)

	// This logic is mostly copied from appToNspawnArgs
	// TODO(cdc): deduplicate
	for _, m := range mounts {
		shPath := filepath.Join(sharedVolPath, m.Volume.Name.String())

		// Evaluate symlinks within the app's rootfs - otherwise absolute
		// symlinks will be wrong.
		mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Mount.Path)
		if err != nil {
			log.Fatalf("Could not evaluate path %v: %v", m.Mount.Path, err)
		}
		mntAbsPath := filepath.Join(appRootfs, mntPath)
		// Create the stage1 destination
		if err := PrepareMountpoints(shPath, mntAbsPath, &m.Volume, m.DockerImplicit); err != nil {
			log.FatalE("could not prepare mountpoint", err)
		}
		err = AppAddOneMount(p, ra, m.Source(absRoot), m.Mount.Path, m.ReadOnly, enterCmd)
		if err != nil {
			log.FatalE("Unable to setup app mounts", err)
		}
	}
	return nil
}
Example #7
0
func mountSharedVolumes(p *stage1commontypes.Pod, ra *schema.RuntimeApp) error {
	appName := ra.Name

	sharedVolPath, err := common.CreateSharedVolumesPath(p.Root)
	if err != nil {
		return err
	}

	imageManifest := p.Images[appName.String()]
	mounts, err := stage1initcommon.GenerateMounts(ra, p.Manifest.Volumes, stage1initcommon.ConvertedFromDocker(imageManifest))
	if err != nil {
		return err
	}
	for _, m := range mounts {
		absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs.
		if err != nil {
			return errwrap.Wrap(errors.New("could not get pod's root absolute path"), err)
		}

		absAppRootfs := common.AppRootfsPath(absRoot, appName)
		if err != nil {
			return fmt.Errorf(`could not evaluate absolute path for application rootfs in app: %v`, appName)
		}

		mntPath, err := stage1initcommon.EvaluateSymlinksInsideApp(absAppRootfs, m.Mount.Path)
		if err != nil {
			return errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Mount.Path), err)
		}
		absDestination := filepath.Join(absAppRootfs, mntPath)
		shPath := filepath.Join(sharedVolPath, m.Volume.Name.String())
		if err := stage1initcommon.PrepareMountpoints(shPath, absDestination, &m.Volume, m.DockerImplicit); err != nil {
			return err
		}

		source := m.Source(p.Root)
		if cleanedSource, err := filepath.EvalSymlinks(source); err != nil {
			return errwrap.Wrap(fmt.Errorf("could not resolve symlink for source: %v", source), err)
		} else if err := ensureDestinationExists(cleanedSource, absDestination); err != nil {
			return errwrap.Wrap(fmt.Errorf("could not create destination mount point: %v", absDestination), err)
		} else if err := doBindMount(cleanedSource, absDestination, m.ReadOnly, m.Volume.Recursive); err != nil {
			return errwrap.Wrap(fmt.Errorf("could not bind mount path %v (s: %v, d: %v)", m.Mount.Path, source, absDestination), err)
		}
	}
	return nil
}
Example #8
0
// parseUserGroup parses the User and Group fields of an App and returns its
// UID and GID.
// The User and Group fields accept several formats:
//   1. the hardcoded string "root"
//   2. a path
//   3. a number
//   4. a name in reference to /etc/{group,passwd} in the image
// See https://github.com/appc/spec/blob/master/spec/aci.md#image-manifest-schema
func parseUserGroup(p *stage1commontypes.Pod, ra *schema.RuntimeApp) (int, int, error) {
	var uidResolver, gidResolver user.Resolver
	var uid, gid int
	var err error

	root := common.AppRootfsPath(p.Root, ra.Name)

	uidResolver, err = user.NumericIDs(ra.App.User)
	if err != nil {
		uidResolver, err = user.IDsFromStat(root, ra.App.User, &p.UidRange)
	}

	if err != nil {
		uidResolver, err = user.IDsFromEtc(root, ra.App.User, "")
	}

	if err != nil { // give up
		return -1, -1, errwrap.Wrap(fmt.Errorf("invalid user %q", ra.App.User), err)
	}

	if uid, _, err = uidResolver.IDs(); err != nil {
		return -1, -1, errwrap.Wrap(fmt.Errorf("failed to configure user %q", ra.App.User), err)
	}

	gidResolver, err = user.NumericIDs(ra.App.Group)
	if err != nil {
		gidResolver, err = user.IDsFromStat(root, ra.App.Group, &p.UidRange)
	}

	if err != nil {
		gidResolver, err = user.IDsFromEtc(root, "", ra.App.Group)
	}

	if err != nil { // give up
		return -1, -1, errwrap.Wrap(fmt.Errorf("invalid group %q", ra.App.Group), err)
	}

	if _, gid, err = gidResolver.IDs(); err != nil {
		return -1, -1, errwrap.Wrap(fmt.Errorf("failed to configure group %q", ra.App.Group), err)
	}

	return uid, gid, nil
}
Example #9
0
// appToNspawnArgs transforms the given app manifest, with the given associated
// app name, into a subset of applicable systemd-nspawn argument
func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp) ([]string, error) {
	var args []string
	appName := ra.Name
	app := ra.App

	sharedVolPath := common.SharedVolumesPath(p.Root)
	if err := os.MkdirAll(sharedVolPath, sharedVolPerm); err != nil {
		return nil, errwrap.Wrap(errors.New("could not create shared volumes directory"), err)
	}
	if err := os.Chmod(sharedVolPath, sharedVolPerm); err != nil {
		return nil, errwrap.Wrap(fmt.Errorf("could not change permissions of %q", sharedVolPath), err)
	}

	vols := make(map[types.ACName]types.Volume)
	for _, v := range p.Manifest.Volumes {
		vols[v.Name] = v
	}

	mounts := GenerateMounts(ra, vols)
	for _, m := range mounts {
		vol := vols[m.Volume]

		if vol.Kind == "empty" {
			p := filepath.Join(sharedVolPath, vol.Name.String())
			if err := os.MkdirAll(p, sharedVolPerm); err != nil {
				return nil, errwrap.Wrap(fmt.Errorf("could not create shared volume %q", vol.Name), err)
			}
			if err := os.Chown(p, *vol.UID, *vol.GID); err != nil {
				return nil, errwrap.Wrap(fmt.Errorf("could not change owner of %q", p), err)
			}
			mod, err := strconv.ParseUint(*vol.Mode, 8, 32)
			if err != nil {
				return nil, errwrap.Wrap(fmt.Errorf("invalid mode %q for volume %q", *vol.Mode, vol.Name), err)
			}
			if err := os.Chmod(p, os.FileMode(mod)); err != nil {
				return nil, errwrap.Wrap(fmt.Errorf("could not change permissions of %q", p), err)
			}
		}

		opt := make([]string, 4)

		if IsMountReadOnly(vol, app.MountPoints) {
			opt[0] = "--bind-ro="
		} else {
			opt[0] = "--bind="
		}

		absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs.
		if err != nil {
			return nil, errwrap.Wrap(errors.New("could not get pod's root absolute path"), err)
		}

		switch vol.Kind {
		case "host":
			opt[1] = vol.Source
		case "empty":
			opt[1] = filepath.Join(common.SharedVolumesPath(absRoot), vol.Name.String())
		default:
			return nil, fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, vol.Kind)
		}
		opt[2] = ":"

		appRootfs := common.AppRootfsPath(absRoot, appName)
		mntPath, err := evaluateAppMountPath(appRootfs, m.Path)
		if err != nil {
			return nil, errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Path), err)
		}

		opt[3] = filepath.Join(common.RelAppRootfsPath(appName), mntPath)

		args = append(args, strings.Join(opt, ""))
	}

	for _, i := range app.Isolators {
		switch v := i.Value().(type) {
		case types.LinuxCapabilitiesSet:
			var caps []string
			// TODO: cleanup the API on LinuxCapabilitiesSet to give strings easily.
			for _, c := range v.Set() {
				caps = append(caps, string(c))
			}
			if i.Name == types.LinuxCapabilitiesRetainSetName {
				capList := strings.Join(caps, ",")
				args = append(args, "--capability="+capList)
			}
		}
	}

	return args, nil
}
Example #10
0
func stage1(rp *stage1commontypes.RuntimePod) int {
	uuid, err := types.NewUUID(flag.Arg(0))
	if err != nil {
		log.Print("UUID is missing or malformed\n")
		return 254
	}

	root := "."
	p, err := stage1commontypes.LoadPod(root, uuid, rp)
	if err != nil {
		log.PrintE("can't load pod", err)
		return 254
	}

	if err := p.SaveRuntime(); err != nil {
		log.FatalE("failed to save runtime parameters", err)
	}

	// Sanity checks
	if len(p.Manifest.Apps) != 1 {
		log.Printf("flavor %q only supports 1 application per Pod for now", flavor)
		return 254
	}

	ra := p.Manifest.Apps[0]

	imgName := p.AppNameToImageName(ra.Name)
	args := ra.App.Exec
	if len(args) == 0 {
		log.Printf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName)
		return 254
	}

	lfd, err := common.GetRktLockFD()
	if err != nil {
		log.PrintE("can't get rkt lock fd", err)
		return 254
	}

	// set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed after execution is finished
	if err := sys.CloseOnExec(lfd, true); err != nil {
		log.PrintE("can't set FD_CLOEXEC on rkt lock", err)
		return 254
	}

	workDir := "/"
	if ra.App.WorkingDirectory != "" {
		workDir = ra.App.WorkingDirectory
	}

	env := []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"}
	for _, e := range ra.App.Environment {
		env = append(env, e.Name+"="+e.Value)
	}

	rfs := filepath.Join(common.AppPath(p.Root, ra.Name), "rootfs")

	argFlyMounts, err := evaluateMounts(rfs, string(ra.Name), p)
	if err != nil {
		log.PrintE("can't evaluate mounts", err)
		return 254
	}

	effectiveMounts := append(
		[]flyMount{
			{"", "", "/dev", "none", syscall.MS_REC | syscall.MS_SHARED},
			{"/dev", rfs, "/dev", "none", syscall.MS_BIND | syscall.MS_REC},

			{"", "", "/proc", "none", syscall.MS_REC | syscall.MS_SHARED},
			{"/proc", rfs, "/proc", "none", syscall.MS_BIND | syscall.MS_REC},

			{"", "", "/sys", "none", syscall.MS_REC | syscall.MS_SHARED},
			{"/sys", rfs, "/sys", "none", syscall.MS_BIND | syscall.MS_REC},

			{"tmpfs", rfs, "/tmp", "tmpfs", 0},
		},
		argFlyMounts...,
	)

	/* Process DNS config files
	 *
	 * /etc/resolv.conf: four modes
	 * 'host' - bind-mount host's file
	 * 'stage0' - bind-mount the file created by stage0
	 * 'default' - do nothing (we would respect CNI if fly had networking)
	 * 'none' - do nothing
	 */
	switch p.ResolvConfMode {
	case "host":
		effectiveMounts = append(effectiveMounts,
			flyMount{"/etc/resolv.conf", rfs, "/etc/resolv.conf", "none", syscall.MS_BIND | syscall.MS_RDONLY})
	case "stage0":
		if err := copyResolv(p); err != nil {
			log.PrintE("can't copy /etc/resolv.conf", err)
			return 254
		}
	}

	/*
	 * /etc/hosts: three modes:
	 * 'host' - bind-mount hosts's file
	 * 'stage0' - bind mount the file created by stage1
	 * 'default' - create a stub /etc/hosts if needed
	 */

	switch p.EtcHostsMode {
	case "host":
		effectiveMounts = append(effectiveMounts,
			flyMount{"/etc/hosts", rfs, "/etc/hosts", "none", syscall.MS_BIND | syscall.MS_RDONLY})
	case "stage0":
		effectiveMounts = append(effectiveMounts, flyMount{
			filepath.Join(common.Stage1RootfsPath(p.Root), "etc", "rkt-hosts"),
			rfs,
			"/etc/hosts",
			"none",
			syscall.MS_BIND | syscall.MS_RDONLY})
	case "default":
		stage2HostsPath := filepath.Join(common.AppRootfsPath(p.Root, ra.Name), "etc", "hosts")
		if _, err := os.Stat(stage2HostsPath); err != nil && os.IsNotExist(err) {
			fallbackHosts := []byte("127.0.0.1 localhost localdomain\n")
			ioutil.WriteFile(stage2HostsPath, fallbackHosts, 0644)
		}
	}

	for _, mount := range effectiveMounts {
		diag.Printf("Processing %+v", mount)

		var (
			err            error
			hostPathInfo   os.FileInfo
			targetPathInfo os.FileInfo
		)

		if strings.HasPrefix(mount.HostPath, "/") {
			if hostPathInfo, err = os.Stat(mount.HostPath); err != nil {
				log.PrintE(fmt.Sprintf("stat of host path %s", mount.HostPath), err)
				return 254
			}
		} else {
			hostPathInfo = nil
		}

		absTargetPath := filepath.Join(mount.TargetPrefixPath, mount.RelTargetPath)
		if targetPathInfo, err = os.Stat(absTargetPath); err != nil && !os.IsNotExist(err) {
			log.PrintE(fmt.Sprintf("stat of target path %s", absTargetPath), err)
			return 254
		}

		switch {
		case (mount.Flags & syscall.MS_REMOUNT) != 0:
			{
				diag.Printf("don't attempt to create files for remount of %q", absTargetPath)
			}
		case targetPathInfo == nil:
			absTargetPathParent, _ := filepath.Split(absTargetPath)
			if err := os.MkdirAll(absTargetPathParent, 0755); err != nil {
				log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err)
				return 254
			}
			switch {
			case hostPathInfo == nil || hostPathInfo.IsDir():
				if err := os.Mkdir(absTargetPath, 0755); err != nil {
					log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err)
					return 254
				}
			case !hostPathInfo.IsDir():
				file, err := os.OpenFile(absTargetPath, os.O_CREATE, 0700)
				if err != nil {
					log.PrintE(fmt.Sprintf("can't create file %q", absTargetPath), err)
					return 254
				}
				file.Close()
			}
		case hostPathInfo != nil:
			switch {
			case hostPathInfo.IsDir() && !targetPathInfo.IsDir():
				log.Printf("can't mount because %q is a directory while %q is not", mount.HostPath, absTargetPath)
				return 254
			case !hostPathInfo.IsDir() && targetPathInfo.IsDir():
				log.Printf("can't mount because %q is not a directory while %q is", mount.HostPath, absTargetPath)
				return 254
			}
		}

		if err := syscall.Mount(mount.HostPath, absTargetPath, mount.Fs, mount.Flags, ""); err != nil {
			log.PrintE(fmt.Sprintf("can't mount %q on %q with flags %v", mount.HostPath, absTargetPath, mount.Flags), err)
			return 254
		}
	}

	if err = stage1common.WritePid(os.Getpid(), "pid"); err != nil {
		log.Error(err)
		return 254
	}

	var uidResolver, gidResolver user.Resolver
	var uid, gid int

	uidResolver, err = user.NumericIDs(ra.App.User)
	if err != nil {
		uidResolver, err = user.IDsFromStat(rfs, ra.App.User, nil)
	}

	if err != nil { // give up
		log.PrintE(fmt.Sprintf("invalid user %q", ra.App.User), err)
		return 254
	}

	if uid, _, err = uidResolver.IDs(); err != nil {
		log.PrintE(fmt.Sprintf("failed to configure user %q", ra.App.User), err)
		return 254
	}

	gidResolver, err = user.NumericIDs(ra.App.Group)
	if err != nil {
		gidResolver, err = user.IDsFromStat(rfs, ra.App.Group, nil)
	}

	if err != nil { // give up
		log.PrintE(fmt.Sprintf("invalid group %q", ra.App.Group), err)
		return 254
	}

	if _, gid, err = gidResolver.IDs(); err != nil {
		log.PrintE(fmt.Sprintf("failed to configure group %q", ra.App.Group), err)
		return 254
	}

	diag.Printf("chroot to %q", rfs)
	if err := syscall.Chroot(rfs); err != nil {
		log.PrintE("can't chroot", err)
		return 254
	}

	if err := os.Chdir(workDir); err != nil {
		log.PrintE(fmt.Sprintf("can't change to working directory %q", workDir), err)
		return 254
	}

	// lock the current goroutine to its current OS thread.
	// This will force the subsequent syscalls to be executed in the same OS thread as Setresuid, and Setresgid,
	// see https://github.com/golang/go/issues/1435#issuecomment-66054163.
	runtime.LockOSThread()

	diag.Printf("setting uid %d gid %d", uid, gid)

	if err := syscall.Setresgid(gid, gid, gid); err != nil {
		log.PrintE(fmt.Sprintf("can't set gid %d", gid), err)
		return 254
	}

	if err := syscall.Setresuid(uid, uid, uid); err != nil {
		log.PrintE(fmt.Sprintf("can't set uid %d", uid), err)
		return 254
	}

	diag.Printf("execing %q in %q", args, rfs)
	err = stage1common.WithClearedCloExec(lfd, func() error {
		return syscall.Exec(args[0], args, env)
	})
	if err != nil {
		log.PrintE(fmt.Sprintf("can't execute %q", args[0]), err)
		return 254
	}

	return 0
}
Example #11
0
File: units.go Project: nhlfr/rkt
func (uw *UnitWriter) AppUnit(
	ra *schema.RuntimeApp, binPath, privateUsers string, insecureOptions Stage1InsecureOptions,
	opts ...*unit.UnitOption,
) {
	if uw.err != nil {
		return
	}

	flavor, systemdVersion, err := GetFlavor(uw.p)
	if err != nil {
		uw.err = errwrap.Wrap(errors.New("unable to determine stage1 flavor"), err)
		return
	}

	app := ra.App
	appName := ra.Name
	imgName := uw.p.AppNameToImageName(appName)

	if len(app.Exec) == 0 {
		uw.err = fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName)
		return
	}

	env := app.Environment

	env.Set("AC_APP_NAME", appName.String())
	if uw.p.MetadataServiceURL != "" {
		env.Set("AC_METADATA_URL", uw.p.MetadataServiceURL)
	}

	envFilePath := EnvFilePath(uw.p.Root, appName)

	uidRange := user.NewBlankUidRange()
	if err := uidRange.Deserialize([]byte(privateUsers)); err != nil {
		uw.err = err
		return
	}

	if err := common.WriteEnvFile(env, uidRange, envFilePath); err != nil {
		uw.err = errwrap.Wrap(errors.New("unable to write environment file for systemd"), err)
		return
	}

	u, g, err := parseUserGroup(uw.p, ra, uidRange)
	if err != nil {
		uw.err = err
		return
	}

	if err := generateSysusers(uw.p, ra, u, g, uidRange); err != nil {
		uw.err = errwrap.Wrap(errors.New("unable to generate sysusers"), err)
		return
	}

	var supplementaryGroups []string
	for _, g := range app.SupplementaryGIDs {
		supplementaryGroups = append(supplementaryGroups, strconv.Itoa(g))
	}

	capabilitiesStr, err := getAppCapabilities(app.Isolators)
	if err != nil {
		uw.err = err
		return
	}

	execStart := append([]string{binPath}, app.Exec[1:]...)
	execStartString := quoteExec(execStart)
	opts = append(opts, []*unit.UnitOption{
		unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)),
		unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
		unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)),
		unit.NewUnitOption("Service", "Restart", "no"),
		unit.NewUnitOption("Service", "ExecStart", execStartString),
		unit.NewUnitOption("Service", "RootDirectory", common.RelAppRootfsPath(appName)),
		// MountFlags=shared creates a new mount namespace and (as unintuitive
		// as it might seem) makes sure the mount is slave+shared.
		unit.NewUnitOption("Service", "MountFlags", "shared"),
		unit.NewUnitOption("Service", "WorkingDirectory", app.WorkingDirectory),
		unit.NewUnitOption("Service", "EnvironmentFile", RelEnvFilePath(appName)),
		unit.NewUnitOption("Service", "User", strconv.Itoa(u)),
		unit.NewUnitOption("Service", "Group", strconv.Itoa(g)),

		// This helps working around a race
		// (https://github.com/systemd/systemd/issues/2913) that causes the
		// systemd unit name not getting written to the journal if the unit is
		// short-lived and runs as non-root.
		unit.NewUnitOption("Service", "SyslogIdentifier", appName.String()),
	}...)

	if len(supplementaryGroups) > 0 {
		opts = appendOptionsList(opts, "Service", "SupplementaryGroups", "", supplementaryGroups)
	}

	if supportsNotify(uw.p, appName.String()) {
		opts = append(opts, unit.NewUnitOption("Service", "Type", "notify"))
	}

	if !insecureOptions.DisableCapabilities {
		opts = append(opts, unit.NewUnitOption("Service", "CapabilityBoundingSet", strings.Join(capabilitiesStr, " ")))
	}

	noNewPrivileges := getAppNoNewPrivileges(app.Isolators)

	// Apply seccomp isolator, if any and not opt-ing out;
	// see https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=
	if !insecureOptions.DisableSeccomp {
		var forceNoNewPrivileges bool

		unprivileged := (u != 0)
		opts, forceNoNewPrivileges, err = getSeccompFilter(opts, uw.p, unprivileged, app.Isolators)
		if err != nil {
			uw.err = err
			return
		}

		// Seccomp filters require NoNewPrivileges for unprivileged apps, that may override
		// manifest annotation.
		if forceNoNewPrivileges {
			noNewPrivileges = true
		}
	}

	opts = append(opts, unit.NewUnitOption("Service", "NoNewPrivileges", strconv.FormatBool(noNewPrivileges)))

	if ra.ReadOnlyRootFS {
		opts = append(opts, unit.NewUnitOption("Service", "ReadOnlyDirectories", common.RelAppRootfsPath(appName)))
	}

	// TODO(tmrts): Extract this logic into a utility function.
	vols := make(map[types.ACName]types.Volume)
	for _, v := range uw.p.Manifest.Volumes {
		vols[v.Name] = v
	}

	absRoot, err := filepath.Abs(uw.p.Root) // Absolute path to the pod's rootfs.
	if err != nil {
		uw.err = err
		return
	}
	appRootfs := common.AppRootfsPath(absRoot, appName)

	rwDirs := []string{}
	imageManifest := uw.p.Images[appName.String()]
	mounts := GenerateMounts(ra, vols, imageManifest)
	for _, m := range mounts {
		mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path)
		if err != nil {
			uw.err = err
			return
		}

		if !IsMountReadOnly(vols[m.Volume], app.MountPoints) {
			rwDirs = append(rwDirs, filepath.Join(common.RelAppRootfsPath(appName), mntPath))
		}
	}
	if len(rwDirs) > 0 {
		opts = appendOptionsList(opts, "Service", "ReadWriteDirectories", "", rwDirs)
	}

	// Restrict access to sensitive paths (eg. procfs and sysfs entries).
	if !insecureOptions.DisablePaths {
		opts = protectKernelTunables(opts, appName, systemdVersion)
	}

	// Generate default device policy for the app, as well as the list of allowed devices.
	// For kvm flavor, devices are VM-specific and restricting them is not strictly needed.
	if !insecureOptions.DisablePaths && flavor != "kvm" {
		opts = append(opts, unit.NewUnitOption("Service", "DevicePolicy", "closed"))
		deviceAllows, err := generateDeviceAllows(common.Stage1RootfsPath(absRoot), appName, app.MountPoints, mounts, vols, uidRange)
		if err != nil {
			uw.err = err
			return
		}
		for _, dev := range deviceAllows {
			opts = append(opts, unit.NewUnitOption("Service", "DeviceAllow", dev))
		}
	}

	// When an app fails, we shut down the pod
	opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target"))

	for _, eh := range app.EventHandlers {
		var typ string
		switch eh.Name {
		case "pre-start":
			typ = "ExecStartPre"
		case "post-stop":
			typ = "ExecStopPost"
		default:
			uw.err = fmt.Errorf("unrecognized eventHandler: %v", eh.Name)
			return
		}
		exec := quoteExec(eh.Exec)
		opts = append(opts, unit.NewUnitOption("Service", typ, exec))
	}

	// Some pre-start jobs take a long time, set the timeout to 0
	opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0"))

	var saPorts []types.Port
	for _, p := range app.Ports {
		if p.SocketActivated {
			saPorts = append(saPorts, p)
		}
	}

	doWithIsolator := func(isolator string, f func() error) bool {
		ok, err := cgroup.IsIsolatorSupported(isolator)
		if err != nil {
			uw.err = err
			return true
		}

		if !ok {
			fmt.Fprintf(os.Stderr, "warning: resource/%s isolator set but support disabled in the kernel, skipping\n", isolator)
		}

		if err := f(); err != nil {
			uw.err = err
			return true
		}

		return false
	}

	exit := false
	for _, i := range app.Isolators {
		if exit {
			return
		}

		switch v := i.Value().(type) {
		case *types.ResourceMemory:
			exit = doWithIsolator("memory", func() error {
				if v.Limit() == nil {
					return nil
				}

				opts = append(opts, unit.NewUnitOption("Service", "MemoryLimit", strconv.Itoa(int(v.Limit().Value()))))
				return nil
			})
		case *types.ResourceCPU:
			exit = doWithIsolator("cpu", func() error {
				if v.Limit() == nil {
					return nil
				}

				if v.Limit().Value() > resource.MaxMilliValue {
					return fmt.Errorf("cpu limit exceeds the maximum millivalue: %v", v.Limit().String())
				}

				quota := strconv.Itoa(int(v.Limit().MilliValue()/10)) + "%"
				opts = append(opts, unit.NewUnitOption("Service", "CPUQuota", quota))

				return nil
			})
		}
	}

	if len(saPorts) > 0 {
		sockopts := []*unit.UnitOption{
			unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")),
			unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
			unit.NewUnitOption("Socket", "BindIPv6Only", "both"),
			unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)),
		}

		for _, sap := range saPorts {
			var proto string
			switch sap.Protocol {
			case "tcp":
				proto = "ListenStream"
			case "udp":
				proto = "ListenDatagram"
			default:
				uw.err = fmt.Errorf("unrecognized protocol: %v", sap.Protocol)
				return
			}
			// We find the host port for the pod's port and use that in the
			// socket unit file.
			// This is so because systemd inside the pod will match based on
			// the socket port number, and since the socket was created on the
			// host, it will have the host port number.
			port := findHostPort(*uw.p.Manifest, sap.Name)
			if port == 0 {
				log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port)
				port = sap.Port
			}
			sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port)))
		}

		file, err := os.OpenFile(SocketUnitPath(uw.p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644)
		if err != nil {
			uw.err = errwrap.Wrap(errors.New("failed to create socket file"), err)
			return
		}
		defer file.Close()

		if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil {
			uw.err = errwrap.Wrap(errors.New("failed to write socket unit file"), err)
			return
		}

		if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(uw.p.Root, appName)); err != nil {
			uw.err = errwrap.Wrap(errors.New("failed to link socket want"), err)
			return
		}

		opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName)))
	}

	opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName)))
	opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName)))
	opts = append(opts, unit.NewUnitOption("Unit", "Requires", "sysusers.service"))
	opts = append(opts, unit.NewUnitOption("Unit", "After", "sysusers.service"))

	uw.WriteUnit(ServiceUnitPath(uw.p.Root, appName), "failed to create service unit file", opts...)
	uw.Activate(ServiceUnitName(appName), ServiceWantPath(uw.p.Root, appName))
}
Example #12
0
File: app.go Project: nhlfr/rkt
// TODO(iaguis): RmConfig?
func RmApp(dir string, uuid *types.UUID, usesOverlay bool, appName *types.ACName, podPID int) error {
	p, err := stage1types.LoadPod(dir, uuid)
	if err != nil {
		return errwrap.Wrap(errors.New("error loading pod manifest"), err)
	}

	pm := p.Manifest

	var mutable bool
	ms, ok := pm.Annotations.Get("coreos.com/rkt/stage1/mutable")
	if ok {
		mutable, err = strconv.ParseBool(ms)
		if err != nil {
			return errwrap.Wrap(errors.New("error parsing mutable annotation"), err)
		}
	}

	if !mutable {
		return errors.New("immutable pod: cannot remove application")
	}

	app := pm.Apps.Get(*appName)
	if app == nil {
		return fmt.Errorf("error: nonexistent app %q", *appName)
	}

	treeStoreID, err := ioutil.ReadFile(common.AppTreeStoreIDPath(dir, *appName))
	if err != nil {
		return err
	}

	eep, err := getStage1Entrypoint(dir, enterEntrypoint)
	if err != nil {
		return errwrap.Wrap(errors.New("error determining 'enter' entrypoint"), err)
	}

	args := []string{
		uuid.String(),
		appName.String(),
		filepath.Join(common.Stage1RootfsPath(dir), eep),
		strconv.Itoa(podPID),
	}

	if err := callEntrypoint(dir, appStopEntrypoint, args); err != nil {
		return err
	}

	if err := callEntrypoint(dir, appRmEntrypoint, args); err != nil {
		return err
	}

	appInfoDir := common.AppInfoPath(dir, *appName)
	if err := os.RemoveAll(appInfoDir); err != nil {
		return errwrap.Wrap(errors.New("error removing app info directory"), err)
	}

	if usesOverlay {
		appRootfs := common.AppRootfsPath(dir, *appName)
		if err := syscall.Unmount(appRootfs, 0); err != nil {
			return err
		}

		ts := filepath.Join(dir, "overlay", string(treeStoreID))
		if err := os.RemoveAll(ts); err != nil {
			return errwrap.Wrap(errors.New("error removing app info directory"), err)
		}
	}

	if err := os.RemoveAll(common.AppPath(dir, *appName)); err != nil {
		return err
	}

	appStatusPath := filepath.Join(common.Stage1RootfsPath(dir), "rkt", "status", appName.String())
	if err := os.Remove(appStatusPath); err != nil && !os.IsNotExist(err) {
		return err
	}

	envPath := filepath.Join(common.Stage1RootfsPath(dir), "rkt", "env", appName.String())
	if err := os.Remove(envPath); err != nil && !os.IsNotExist(err) {
		return err
	}

	removeAppFromPodManifest(pm, appName)

	if err := updatePodManifest(dir, pm); err != nil {
		return err
	}

	return nil
}
Example #13
0
File: pod.go Project: joshix/rkt
// appToNspawnArgs transforms the given app manifest, with the given associated
// app name, into a subset of applicable systemd-nspawn argument
func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp, insecureOptions Stage1InsecureOptions) ([]string, error) {
	var args []string
	appName := ra.Name
	app := ra.App

	sharedVolPath := common.SharedVolumesPath(p.Root)
	if err := os.MkdirAll(sharedVolPath, SharedVolPerm); err != nil {
		return nil, errwrap.Wrap(errors.New("could not create shared volumes directory"), err)
	}
	if err := os.Chmod(sharedVolPath, SharedVolPerm); err != nil {
		return nil, errwrap.Wrap(fmt.Errorf("could not change permissions of %q", sharedVolPath), err)
	}

	vols := make(map[types.ACName]types.Volume)
	for _, v := range p.Manifest.Volumes {
		vols[v.Name] = v
	}

	imageManifest := p.Images[appName.String()]
	mounts, err := GenerateMounts(ra, p.Manifest.Volumes, ConvertedFromDocker(imageManifest))
	if err != nil {
		return nil, errwrap.Wrap(fmt.Errorf("could not generate app %q mounts", appName), err)
	}
	for _, m := range mounts {

		shPath := filepath.Join(sharedVolPath, m.Volume.Name.String())

		absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs.
		if err != nil {
			return nil, errwrap.Wrap(errors.New("could not get pod's root absolute path"), err)
		}

		appRootfs := common.AppRootfsPath(absRoot, appName)

		// TODO(yifan): This is a temporary fix for systemd-nspawn not handling symlink mounts well.
		// Could be removed when https://github.com/systemd/systemd/issues/2860 is resolved, and systemd
		// version is bumped.
		mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Mount.Path)
		if err != nil {
			return nil, errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Mount.Path), err)
		}
		mntAbsPath := filepath.Join(appRootfs, mntPath)

		if err := PrepareMountpoints(shPath, mntAbsPath, &m.Volume, m.DockerImplicit); err != nil {
			return nil, err
		}

		opt := make([]string, 6)

		if m.ReadOnly {
			opt[0] = "--bind-ro="
		} else {
			opt[0] = "--bind="
		}

		switch m.Volume.Kind {
		case "host":
			opt[1] = m.Volume.Source
		case "empty":
			opt[1] = filepath.Join(common.SharedVolumesPath(absRoot), m.Volume.Name.String())
		default:
			return nil, fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, m.Volume.Kind)
		}
		opt[2] = ":"
		opt[3] = filepath.Join(common.RelAppRootfsPath(appName), mntPath)
		opt[4] = ":"

		// If Recursive is not set, default to recursive.
		recursive := true
		if m.Volume.Recursive != nil {
			recursive = *m.Volume.Recursive
		}

		// rbind/norbind options exist since systemd-nspawn v226
		if recursive {
			opt[5] = "rbind"
		} else {
			opt[5] = "norbind"
		}
		args = append(args, strings.Join(opt, ""))
	}

	if !insecureOptions.DisableCapabilities {
		capabilitiesStr, err := getAppCapabilities(app.Isolators)
		if err != nil {
			return nil, err
		}
		capList := strings.Join(capabilitiesStr, ",")
		args = append(args, "--capability="+capList)
	}

	return args, nil
}
Example #14
0
File: app.go Project: kinvolk/rkt
func RmApp(cfg RmConfig) error {
	pod, err := pkgPod.PodFromUUIDString(cfg.DataDir, cfg.UUID.String())
	if err != nil {
		return errwrap.Wrap(errors.New("error loading pod"), err)
	}
	defer pod.Close()

	debug("locking sandbox manifest")
	if err := pod.ExclusiveLockManifest(); err != nil {
		return errwrap.Wrap(errors.New("failed to lock sandbox manifest"), err)
	}
	defer pod.UnlockManifest()

	pm, err := pod.SandboxManifest()
	if err != nil {
		return errwrap.Wrap(errors.New("cannot remove application, sandbox validation failed"), err)
	}

	app := pm.Apps.Get(*cfg.AppName)
	if app == nil {
		return fmt.Errorf("error: nonexistent app %q", *cfg.AppName)
	}

	if cfg.PodPID > 0 {
		// Call app-stop and app-rm entrypoint only if the pod is still running.
		// Otherwise, there's not much we can do about it except unmounting/removing
		// the file system.
		args := []string{
			fmt.Sprintf("--debug=%t", cfg.Debug),
			fmt.Sprintf("--app=%s", cfg.AppName),
		}

		ce := CrossingEntrypoint{
			PodPath:        cfg.PodPath,
			PodPID:         cfg.PodPID,
			AppName:        cfg.AppName.String(),
			EntrypointName: appStopEntrypoint,
			EntrypointArgs: args,
			Interactive:    false,
		}
		if err := ce.Run(); err != nil {
			status, err := common.GetExitStatus(err)
			// ignore nonexistent units failing to stop. Exit status 5
			// comes from systemctl and means the unit doesn't exist
			if err != nil {
				return err
			} else if status != 5 {
				return fmt.Errorf("exit status %d", status)
			}
		}

		ce.EntrypointName = appRmEntrypoint
		if err := ce.Run(); err != nil {
			return err
		}
	}

	if cfg.UsesOverlay {
		treeStoreID, err := ioutil.ReadFile(common.AppTreeStoreIDPath(cfg.PodPath, *cfg.AppName))
		if err != nil {
			return err
		}

		appRootfs := common.AppRootfsPath(cfg.PodPath, *cfg.AppName)
		if err := syscall.Unmount(appRootfs, 0); err != nil {
			return err
		}

		ts := filepath.Join(cfg.PodPath, "overlay", string(treeStoreID))
		if err := os.RemoveAll(ts); err != nil {
			return errwrap.Wrap(errors.New("error removing app info directory"), err)
		}
	}

	appInfoDir := common.AppInfoPath(cfg.PodPath, *cfg.AppName)
	if err := os.RemoveAll(appInfoDir); err != nil {
		return errwrap.Wrap(errors.New("error removing app info directory"), err)
	}

	if err := os.RemoveAll(common.AppPath(cfg.PodPath, *cfg.AppName)); err != nil {
		return err
	}

	appStatusPath := filepath.Join(common.Stage1RootfsPath(cfg.PodPath), "rkt", "status", cfg.AppName.String())
	if err := os.Remove(appStatusPath); err != nil && !os.IsNotExist(err) {
		return err
	}

	envPath := filepath.Join(common.Stage1RootfsPath(cfg.PodPath), "rkt", "env", cfg.AppName.String())
	if err := os.Remove(envPath); err != nil && !os.IsNotExist(err) {
		return err
	}

	for i, app := range pm.Apps {
		if app.Name == *cfg.AppName {
			pm.Apps = append(pm.Apps[:i], pm.Apps[i+1:]...)
			break
		}
	}

	return pod.UpdateManifest(pm, cfg.PodPath)
}
Example #15
0
// appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units
func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error {
	app := ra.App
	appName := ra.Name
	imgName := p.AppNameToImageName(appName)

	if len(app.Exec) == 0 {
		return fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName)
	}

	workDir := "/"
	if app.WorkingDirectory != "" {
		workDir = app.WorkingDirectory
	}

	env := app.Environment

	env.Set("AC_APP_NAME", appName.String())
	if p.MetadataServiceURL != "" {
		env.Set("AC_METADATA_URL", p.MetadataServiceURL)
	}

	envFilePath := EnvFilePath(p.Root, appName)

	uidRange := user.NewBlankUidRange()
	if err := uidRange.Deserialize([]byte(privateUsers)); err != nil {
		return err
	}

	if err := writeEnvFile(p, env, appName, uidRange, '\n', envFilePath); err != nil {
		return errwrap.Wrap(errors.New("unable to write environment file for systemd"), err)
	}

	u, g, err := parseUserGroup(p, ra, uidRange)
	if err != nil {
		return err
	}

	if err := generateSysusers(p, ra, u, g, uidRange); err != nil {
		return errwrap.Wrap(errors.New("unable to generate sysusers"), err)
	}

	binPath, err := findBinPath(p, appName, *app, workDir, app.Exec[0])
	if err != nil {
		return err
	}

	var supplementaryGroups []string
	for _, g := range app.SupplementaryGIDs {
		supplementaryGroups = append(supplementaryGroups, strconv.Itoa(g))
	}

	capabilitiesStr, err := getAppCapabilities(app.Isolators)
	if err != nil {
		return err
	}

	noNewPrivileges := getAppNoNewPrivileges(app.Isolators)

	execStart := append([]string{binPath}, app.Exec[1:]...)
	execStartString := quoteExec(execStart)
	opts := []*unit.UnitOption{
		unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)),
		unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
		unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)),
		unit.NewUnitOption("Service", "Restart", "no"),
		unit.NewUnitOption("Service", "ExecStart", execStartString),
		unit.NewUnitOption("Service", "RootDirectory", common.RelAppRootfsPath(appName)),
		// MountFlags=shared creates a new mount namespace and (as unintuitive
		// as it might seem) makes sure the mount is slave+shared.
		unit.NewUnitOption("Service", "MountFlags", "shared"),
		unit.NewUnitOption("Service", "WorkingDirectory", workDir),
		unit.NewUnitOption("Service", "EnvironmentFile", RelEnvFilePath(appName)),
		unit.NewUnitOption("Service", "User", strconv.Itoa(u)),
		unit.NewUnitOption("Service", "Group", strconv.Itoa(g)),
		unit.NewUnitOption("Service", "SupplementaryGroups", strings.Join(supplementaryGroups, " ")),
		unit.NewUnitOption("Service", "CapabilityBoundingSet", strings.Join(capabilitiesStr, " ")),
		unit.NewUnitOption("Service", "NoNewPrivileges", strconv.FormatBool(noNewPrivileges)),
		// This helps working around a race
		// (https://github.com/systemd/systemd/issues/2913) that causes the
		// systemd unit name not getting written to the journal if the unit is
		// short-lived and runs as non-root.
		unit.NewUnitOption("Service", "SyslogIdentifier", appName.String()),
	}

	// Restrict access to sensitive paths (eg. procfs)
	opts = protectSystemFiles(opts, appName)

	if ra.ReadOnlyRootFS {
		opts = append(opts, unit.NewUnitOption("Service", "ReadOnlyDirectories", common.RelAppRootfsPath(appName)))
	}

	// TODO(tmrts): Extract this logic into a utility function.
	vols := make(map[types.ACName]types.Volume)
	for _, v := range p.Manifest.Volumes {
		vols[v.Name] = v
	}

	absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs.
	if err != nil {
		return err
	}
	appRootfs := common.AppRootfsPath(absRoot, appName)

	rwDirs := []string{}
	imageManifest := p.Images[appName.String()]
	for _, m := range GenerateMounts(ra, vols, imageManifest) {
		mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path)
		if err != nil {
			return err
		}

		if !IsMountReadOnly(vols[m.Volume], app.MountPoints) {
			rwDirs = append(rwDirs, filepath.Join(common.RelAppRootfsPath(appName), mntPath))
		}
	}

	opts = append(opts, unit.NewUnitOption("Service", "ReadWriteDirectories", strings.Join(rwDirs, " ")))

	if interactive {
		opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty"))
	} else {
		opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console"))
	}

	// When an app fails, we shut down the pod
	opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target"))

	for _, eh := range app.EventHandlers {
		var typ string
		switch eh.Name {
		case "pre-start":
			typ = "ExecStartPre"
		case "post-stop":
			typ = "ExecStopPost"
		default:
			return fmt.Errorf("unrecognized eventHandler: %v", eh.Name)
		}
		exec := quoteExec(eh.Exec)
		opts = append(opts, unit.NewUnitOption("Service", typ, exec))
	}

	// Some pre-start jobs take a long time, set the timeout to 0
	opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0"))

	var saPorts []types.Port
	for _, p := range app.Ports {
		if p.SocketActivated {
			saPorts = append(saPorts, p)
		}
	}

	for _, i := range app.Isolators {
		switch v := i.Value().(type) {
		case *types.ResourceMemory:
			opts, err = cgroup.MaybeAddIsolator(opts, "memory", v.Limit())
			if err != nil {
				return err
			}
		case *types.ResourceCPU:
			opts, err = cgroup.MaybeAddIsolator(opts, "cpu", v.Limit())
			if err != nil {
				return err
			}
		}
	}

	if len(saPorts) > 0 {
		sockopts := []*unit.UnitOption{
			unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")),
			unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
			unit.NewUnitOption("Socket", "BindIPv6Only", "both"),
			unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)),
		}

		for _, sap := range saPorts {
			var proto string
			switch sap.Protocol {
			case "tcp":
				proto = "ListenStream"
			case "udp":
				proto = "ListenDatagram"
			default:
				return fmt.Errorf("unrecognized protocol: %v", sap.Protocol)
			}
			// We find the host port for the pod's port and use that in the
			// socket unit file.
			// This is so because systemd inside the pod will match based on
			// the socket port number, and since the socket was created on the
			// host, it will have the host port number.
			port := findHostPort(*p.Manifest, sap.Name)
			if port == 0 {
				log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port)
				port = sap.Port
			}
			sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port)))
		}

		file, err := os.OpenFile(SocketUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644)
		if err != nil {
			return errwrap.Wrap(errors.New("failed to create socket file"), err)
		}
		defer file.Close()

		if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil {
			return errwrap.Wrap(errors.New("failed to write socket unit file"), err)
		}

		if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(p.Root, appName)); err != nil {
			return errwrap.Wrap(errors.New("failed to link socket want"), err)
		}

		opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName)))
	}

	opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName)))
	opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName)))

	opts = append(opts, unit.NewUnitOption("Unit", "Requires", "sysusers.service"))
	opts = append(opts, unit.NewUnitOption("Unit", "After", "sysusers.service"))

	file, err := os.OpenFile(ServiceUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644)
	if err != nil {
		return errwrap.Wrap(errors.New("failed to create service unit file"), err)
	}
	defer file.Close()

	if _, err = io.Copy(file, unit.Serialize(opts)); err != nil {
		return errwrap.Wrap(errors.New("failed to write service unit file"), err)
	}

	if err = os.Symlink(path.Join("..", ServiceUnitName(appName)), ServiceWantPath(p.Root, appName)); err != nil {
		return errwrap.Wrap(errors.New("failed to link service want"), err)
	}

	if err = writeAppReaper(p, appName.String(), common.RelAppRootfsPath(appName), binPath); err != nil {
		return errwrap.Wrap(fmt.Errorf("failed to write app %q reaper service", appName), err)
	}

	return nil
}
Example #16
0
File: app.go Project: joshix/rkt
func RmApp(cfg RmConfig) error {
	pod, err := pkgPod.PodFromUUIDString(cfg.DataDir, cfg.UUID.String())
	if err != nil {
		return errwrap.Wrap(errors.New("error loading pod"), err)
	}
	defer pod.Close()

	debug("locking pod manifest")
	if err := pod.ExclusiveManifestLock(); err != nil {
		return errwrap.Wrap(errors.New("failed to lock pod manifest"), err)
	}
	defer pod.ManifestUnlock()

	_, pm, err := pod.PodManifest()
	if err != nil {
		return errwrap.Wrap(errors.New("error loading pod manifest"), err)
	}

	var mutable bool
	ms, ok := pm.Annotations.Get("coreos.com/rkt/stage1/mutable")
	if ok {
		mutable, err = strconv.ParseBool(ms)
		if err != nil {
			return errwrap.Wrap(errors.New("error parsing mutable annotation"), err)
		}
	}

	if !mutable {
		return errors.New("immutable pod: cannot remove application")
	}

	app := pm.Apps.Get(*cfg.AppName)
	if app == nil {
		return fmt.Errorf("error: nonexistent app %q", *cfg.AppName)
	}

	if cfg.PodPID > 0 {
		// Call app-stop and app-rm entrypoint only if the pod is still running.
		// Otherwise, there's not much we can do about it except unmounting/removing
		// the file system.
		args := []string{
			fmt.Sprintf("--app=%s", cfg.AppName),
		}

		ce := CrossingEntrypoint{
			PodPath:        cfg.PodPath,
			PodPID:         cfg.PodPID,
			AppName:        cfg.AppName.String(),
			EntrypointName: appStopEntrypoint,
			EntrypointArgs: args,
			Interactive:    false,
		}
		if err := ce.Run(); err != nil {
			status, err := common.GetExitStatus(err)
			// ignore nonexistent units failing to stop. Exit status 5
			// comes from systemctl and means the unit doesn't exist
			if err != nil {
				return err
			} else if status != 5 {
				return fmt.Errorf("exit status %d", status)
			}
		}

		ce.EntrypointName = appRmEntrypoint
		if err := ce.Run(); err != nil {
			return err
		}
	}

	if cfg.UsesOverlay {
		treeStoreID, err := ioutil.ReadFile(common.AppTreeStoreIDPath(cfg.PodPath, *cfg.AppName))
		if err != nil {
			return err
		}

		appRootfs := common.AppRootfsPath(cfg.PodPath, *cfg.AppName)
		if err := syscall.Unmount(appRootfs, 0); err != nil {
			return err
		}

		ts := filepath.Join(cfg.PodPath, "overlay", string(treeStoreID))
		if err := os.RemoveAll(ts); err != nil {
			return errwrap.Wrap(errors.New("error removing app info directory"), err)
		}
	}

	appInfoDir := common.AppInfoPath(cfg.PodPath, *cfg.AppName)
	if err := os.RemoveAll(appInfoDir); err != nil {
		return errwrap.Wrap(errors.New("error removing app info directory"), err)
	}

	if err := os.RemoveAll(common.AppPath(cfg.PodPath, *cfg.AppName)); err != nil {
		return err
	}

	appStatusPath := filepath.Join(common.Stage1RootfsPath(cfg.PodPath), "rkt", "status", cfg.AppName.String())
	if err := os.Remove(appStatusPath); err != nil && !os.IsNotExist(err) {
		return err
	}

	envPath := filepath.Join(common.Stage1RootfsPath(cfg.PodPath), "rkt", "env", cfg.AppName.String())
	if err := os.Remove(envPath); err != nil && !os.IsNotExist(err) {
		return err
	}

	removeAppFromPodManifest(pm, cfg.AppName)

	if err := updatePodManifest(cfg.PodPath, pm); err != nil {
		return err
	}

	return nil
}
Example #17
0
func runExport(cmd *cobra.Command, args []string) (exit int) {
	if len(args) != 2 {
		cmd.Usage()
		return 254
	}

	outACI := args[1]
	ext := filepath.Ext(outACI)
	if ext != schema.ACIExtension {
		stderr.Printf("extension must be %s (given %s)", schema.ACIExtension, outACI)
		return 254
	}

	p, err := pkgPod.PodFromUUIDString(getDataDir(), args[0])
	if err != nil {
		stderr.PrintE("problem retrieving pod", err)
		return 254
	}
	defer p.Close()

	state := p.State()
	if state != pkgPod.Exited && state != pkgPod.ExitedGarbage {
		stderr.Print("pod is not exited. Only exited pods can be exported")
		return 254
	}

	app, err := getApp(p)
	if err != nil {
		stderr.PrintE("unable to find app", err)
		return 254
	}

	root := common.AppPath(p.Path(), app.Name)
	manifestPath := filepath.Join(common.AppInfoPath(p.Path(), app.Name), aci.ManifestFile)
	if p.UsesOverlay() {
		tmpDir := filepath.Join(getDataDir(), "tmp")
		if err := os.MkdirAll(tmpDir, common.DefaultRegularDirPerm); err != nil {
			stderr.PrintE("unable to create temp directory", err)
			return 254
		}
		podDir, err := ioutil.TempDir(tmpDir, fmt.Sprintf("rkt-export-%s", p.UUID))
		if err != nil {
			stderr.PrintE("unable to create export temp directory", err)
			return 254
		}
		defer func() {
			if err := os.RemoveAll(podDir); err != nil {
				stderr.PrintE("problem removing temp directory", err)
				exit = 1
			}
		}()
		mntDir := filepath.Join(podDir, "rootfs")
		if err := os.Mkdir(mntDir, common.DefaultRegularDirPerm); err != nil {
			stderr.PrintE("unable to create rootfs directory inside temp directory", err)
			return 254
		}

		if err := mountOverlay(p, app, mntDir); err != nil {
			stderr.PrintE(fmt.Sprintf("couldn't mount directory at %s", mntDir), err)
			return 254
		}
		defer func() {
			if err := syscall.Unmount(mntDir, 0); err != nil {
				stderr.PrintE(fmt.Sprintf("error unmounting directory %s", mntDir), err)
				exit = 1
			}
		}()
		root = podDir
	} else {
		// trailing filepath separator so we don't match the appRootfs path
		appRootfs := common.AppRootfsPath(p.Path(), app.Name) + string(filepath.Separator)
		mnts, err := mountinfo.ParseMounts(0)
		if err != nil {
			stderr.PrintE("error parsing mountpoints", err)
			return 254
		}
		mnts = mnts.Filter(mountinfo.HasPrefix(appRootfs))
		if len(mnts) > 0 {
			stderr.Printf("pod has remaining mountpoints. Only pods using overlayfs or with no mountpoints can be exported")
			return 254
		}
	}

	// Check for user namespace (--private-user), if in use get uidRange
	var uidRange *user.UidRange
	privUserFile := filepath.Join(p.Path(), common.PrivateUsersPreparedFilename)
	privUserContent, err := ioutil.ReadFile(privUserFile)
	if err == nil {
		uidRange = user.NewBlankUidRange()
		// The file was found, save uid & gid shift and count
		if err := uidRange.Deserialize(privUserContent); err != nil {
			stderr.PrintE(fmt.Sprintf("problem deserializing the content of %s", common.PrivateUsersPreparedFilename), err)
			return 254
		}
	}

	if err = buildAci(root, manifestPath, outACI, uidRange); err != nil {
		stderr.PrintE("error building aci", err)
		return 254
	}
	return 0
}
Example #18
0
// appToNspawnArgs transforms the given app manifest, with the given associated
// app name, into a subset of applicable systemd-nspawn argument
func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp) ([]string, error) {
	var args []string
	appName := ra.Name
	app := ra.App

	sharedVolPath, err := common.CreateSharedVolumesPath(p.Root)
	if err != nil {
		return nil, err
	}

	vols := make(map[types.ACName]types.Volume)
	for _, v := range p.Manifest.Volumes {
		vols[v.Name] = v
	}

	imageManifest := p.Images[appName.String()]
	mounts, err := GenerateMounts(ra, p.Manifest.Volumes, ConvertedFromDocker(imageManifest))
	if err != nil {
		return nil, errwrap.Wrap(fmt.Errorf("could not generate app %q mounts", appName), err)
	}
	for _, m := range mounts {
		shPath := filepath.Join(sharedVolPath, m.Volume.Name.String())

		absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs.
		if err != nil {
			return nil, errwrap.Wrap(errors.New("could not get pod's root absolute path"), err)
		}

		appRootfs := common.AppRootfsPath(absRoot, appName)

		// Evaluate symlinks within the app's rootfs. This is needed because symlinks
		// within the container can be absolute, which will, of course, be wrong in our ns.
		// Systemd also gets this wrong, see https://github.com/systemd/systemd/issues/2860
		// When the above issue is fixed, we can pass the un-evaluated path to --bind instead.
		mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Mount.Path)
		if err != nil {
			return nil, errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Mount.Path), err)
		}
		mntAbsPath := filepath.Join(appRootfs, mntPath)

		if err := PrepareMountpoints(shPath, mntAbsPath, &m.Volume, m.DockerImplicit); err != nil {
			return nil, err
		}

		opt := make([]string, 6)

		if m.ReadOnly {
			opt[0] = "--bind-ro="
		} else {
			opt[0] = "--bind="
		}

		opt[1] = m.Source(absRoot)
		opt[2] = ":"
		opt[3] = filepath.Join(common.RelAppRootfsPath(appName), mntPath)
		opt[4] = ":"

		// If Recursive is not set, default to recursive.
		recursive := true
		if m.Volume.Recursive != nil {
			recursive = *m.Volume.Recursive
		}

		// rbind/norbind options exist since systemd-nspawn v226
		if recursive {
			opt[5] = "rbind"
		} else {
			opt[5] = "norbind"
		}
		args = append(args, strings.Join(opt, ""))
	}

	if !p.InsecureOptions.DisableCapabilities {
		capabilitiesStr, err := getAppCapabilities(app.Isolators)
		if err != nil {
			return nil, err
		}
		capList := strings.Join(capabilitiesStr, ",")
		args = append(args, "--capability="+capList)
	}

	return args, nil
}
Example #19
0
// appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units
func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error {
	app := ra.App
	appName := ra.Name
	imgName := p.AppNameToImageName(appName)

	if len(app.Exec) == 0 {
		return fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName)
	}

	workDir := "/"
	if app.WorkingDirectory != "" {
		workDir = app.WorkingDirectory
	}

	env := app.Environment

	env.Set("AC_APP_NAME", appName.String())
	if p.MetadataServiceURL != "" {
		env.Set("AC_METADATA_URL", p.MetadataServiceURL)
	}

	if err := writeEnvFile(p, env, appName, privateUsers); err != nil {
		return errwrap.Wrap(errors.New("unable to write environment file"), err)
	}

	var _uid, gid int
	var err error

	uidRange := uid.NewBlankUidRange()
	if err := uidRange.Deserialize([]byte(privateUsers)); err != nil {
		return errwrap.Wrap(errors.New("unable to deserialize uid range"), err)
	}

	if strings.HasPrefix(app.User, "/") {
		var stat syscall.Stat_t
		if err = syscall.Lstat(filepath.Join(common.AppRootfsPath(p.Root, appName),
			app.User), &stat); err != nil {
			return errwrap.Wrap(fmt.Errorf("unable to get uid from file %q",
				app.User), err)
		}
		uidReal, _, err := uidRange.UnshiftRange(stat.Uid, 0)
		if err != nil {
			return errwrap.Wrap(errors.New("unable to determine real uid"), err)
		}
		_uid = int(uidReal)
	} else {
		_uid, err = strconv.Atoi(app.User)
		if err != nil {
			_uid, err = passwd.LookupUidFromFile(app.User,
				filepath.Join(common.AppRootfsPath(p.Root, appName), "etc/passwd"))
			if err != nil {
				return errwrap.Wrap(fmt.Errorf("cannot lookup user %q", app.User), err)
			}
		}
	}

	if strings.HasPrefix(app.Group, "/") {
		var stat syscall.Stat_t
		if err = syscall.Lstat(filepath.Join(common.AppRootfsPath(p.Root, appName),
			app.Group), &stat); err != nil {
			return errwrap.Wrap(fmt.Errorf("unable to get gid from file %q",
				app.Group), err)
		}
		_, gidReal, err := uidRange.UnshiftRange(0, stat.Gid)
		if err != nil {
			return errwrap.Wrap(errors.New("unable to determine real gid"), err)
		}
		gid = int(gidReal)
	} else {
		gid, err = strconv.Atoi(app.Group)
		if err != nil {
			gid, err = group.LookupGidFromFile(app.Group,
				filepath.Join(common.AppRootfsPath(p.Root, appName), "etc/group"))
			if err != nil {
				return errwrap.Wrap(fmt.Errorf("cannot lookup group %q", app.Group), err)
			}
		}
	}

	execWrap := []string{"/appexec", common.RelAppRootfsPath(appName), workDir, RelEnvFilePath(appName),
		strconv.Itoa(_uid), generateGidArg(gid, app.SupplementaryGIDs), "--"}
	execStart := quoteExec(append(execWrap, app.Exec...))
	opts := []*unit.UnitOption{
		unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)),
		unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
		unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)),
		unit.NewUnitOption("Service", "Restart", "no"),
		unit.NewUnitOption("Service", "ExecStart", execStart),
		unit.NewUnitOption("Service", "User", "0"),
		unit.NewUnitOption("Service", "Group", "0"),
	}

	if interactive {
		opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty"))
	} else {
		opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console"))
		opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console"))
		opts = append(opts, unit.NewUnitOption("Service", "SyslogIdentifier", filepath.Base(app.Exec[0])))
	}

	// When an app fails, we shut down the pod
	opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target"))

	for _, eh := range app.EventHandlers {
		var typ string
		switch eh.Name {
		case "pre-start":
			typ = "ExecStartPre"
		case "post-stop":
			typ = "ExecStopPost"
		default:
			return fmt.Errorf("unrecognized eventHandler: %v", eh.Name)
		}
		exec := quoteExec(append(execWrap, eh.Exec...))
		opts = append(opts, unit.NewUnitOption("Service", typ, exec))
	}

	// Some pre-start jobs take a long time, set the timeout to 0
	opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0"))

	var saPorts []types.Port
	for _, p := range app.Ports {
		if p.SocketActivated {
			saPorts = append(saPorts, p)
		}
	}

	for _, i := range app.Isolators {
		switch v := i.Value().(type) {
		case *types.ResourceMemory:
			opts, err = cgroup.MaybeAddIsolator(opts, "memory", v.Limit())
			if err != nil {
				return err
			}
		case *types.ResourceCPU:
			opts, err = cgroup.MaybeAddIsolator(opts, "cpu", v.Limit())
			if err != nil {
				return err
			}
		}
	}

	if len(saPorts) > 0 {
		sockopts := []*unit.UnitOption{
			unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")),
			unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
			unit.NewUnitOption("Socket", "BindIPv6Only", "both"),
			unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)),
		}

		for _, sap := range saPorts {
			var proto string
			switch sap.Protocol {
			case "tcp":
				proto = "ListenStream"
			case "udp":
				proto = "ListenDatagram"
			default:
				return fmt.Errorf("unrecognized protocol: %v", sap.Protocol)
			}
			// We find the host port for the pod's port and use that in the
			// socket unit file.
			// This is so because systemd inside the pod will match based on
			// the socket port number, and since the socket was created on the
			// host, it will have the host port number.
			port := findHostPort(*p.Manifest, sap.Name)
			if port == 0 {
				log.Printf("warning: no --port option for socket-activated port %q, assuming port %d as specified in the manifest", sap.Name, sap.Port)
				port = sap.Port
			}
			sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", port)))
		}

		file, err := os.OpenFile(SocketUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644)
		if err != nil {
			return errwrap.Wrap(errors.New("failed to create socket file"), err)
		}
		defer file.Close()

		if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil {
			return errwrap.Wrap(errors.New("failed to write socket unit file"), err)
		}

		if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(p.Root, appName)); err != nil {
			return errwrap.Wrap(errors.New("failed to link socket want"), err)
		}

		opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName)))
	}

	opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName)))
	opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName)))

	file, err := os.OpenFile(ServiceUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644)
	if err != nil {
		return errwrap.Wrap(errors.New("failed to create service unit file"), err)
	}
	defer file.Close()

	if _, err = io.Copy(file, unit.Serialize(opts)); err != nil {
		return errwrap.Wrap(errors.New("failed to write service unit file"), err)
	}

	if err = os.Symlink(path.Join("..", ServiceUnitName(appName)), ServiceWantPath(p.Root, appName)); err != nil {
		return errwrap.Wrap(errors.New("failed to link service want"), err)
	}

	if flavor == "kvm" {
		// bind mount all shared volumes from /mnt/volumeName (we don't use mechanism for bind-mounting given by nspawn)
		err := AppToSystemdMountUnits(common.Stage1RootfsPath(p.Root), appName, p.Manifest.Volumes, ra, UnitsDir)
		if err != nil {
			return errwrap.Wrap(errors.New("failed to prepare mount units"), err)
		}

	}

	if err = writeAppReaper(p, appName.String()); err != nil {
		return errwrap.Wrap(fmt.Errorf("failed to write app %q reaper service", appName), err)
	}

	return nil
}
Example #20
0
// appToNspawnArgs transforms the given app manifest, with the given associated
// app name, into a subset of applicable systemd-nspawn argument
func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp) ([]string, error) {
	var args []string
	appName := ra.Name
	app := ra.App

	sharedVolPath := common.SharedVolumesPath(p.Root)
	if err := os.MkdirAll(sharedVolPath, SharedVolPerm); err != nil {
		return nil, errwrap.Wrap(errors.New("could not create shared volumes directory"), err)
	}
	if err := os.Chmod(sharedVolPath, SharedVolPerm); err != nil {
		return nil, errwrap.Wrap(fmt.Errorf("could not change permissions of %q", sharedVolPath), err)
	}

	vols := make(map[types.ACName]types.Volume)
	for _, v := range p.Manifest.Volumes {
		vols[v.Name] = v
	}

	imageManifest := p.Images[appName.String()]
	mounts := GenerateMounts(ra, vols, imageManifest)
	for _, m := range mounts {
		vol := vols[m.Volume]

		shPath := filepath.Join(sharedVolPath, vol.Name.String())

		absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs.
		if err != nil {
			return nil, errwrap.Wrap(errors.New("could not get pod's root absolute path"), err)
		}

		appRootfs := common.AppRootfsPath(absRoot, appName)

		// TODO(yifan): This is a temporary fix for systemd-nspawn not handling symlink mounts well.
		// Could be removed when https://github.com/systemd/systemd/issues/2860 is resolved, and systemd
		// version is bumped.
		mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path)
		if err != nil {
			return nil, errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Path), err)
		}
		mntAbsPath := filepath.Join(appRootfs, mntPath)

		if err := PrepareMountpoints(shPath, mntAbsPath, &vol, m.DockerImplicit); err != nil {
			return nil, err
		}

		opt := make([]string, 4)

		if IsMountReadOnly(vol, app.MountPoints) {
			opt[0] = "--bind-ro="
		} else {
			opt[0] = "--bind="
		}

		switch vol.Kind {
		case "host":
			opt[1] = vol.Source
		case "empty":
			opt[1] = filepath.Join(common.SharedVolumesPath(absRoot), vol.Name.String())
		default:
			return nil, fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, vol.Kind)
		}
		opt[2] = ":"
		opt[3] = filepath.Join(common.RelAppRootfsPath(appName), mntPath)
		args = append(args, strings.Join(opt, ""))
	}

	capabilitiesStr, err := getAppCapabilities(app.Isolators)
	if err != nil {
		return nil, err
	}
	capList := strings.Join(capabilitiesStr, ",")
	args = append(args, "--capability="+capList)

	return args, nil
}
Example #21
0
func mountSharedVolumes(root string, p *stage1commontypes.Pod, ra *schema.RuntimeApp) error {
	app := ra.App
	appName := ra.Name
	volumes := p.Manifest.Volumes
	vols := make(map[types.ACName]types.Volume)
	for _, v := range volumes {
		vols[v.Name] = v
	}

	sharedVolPath := common.SharedVolumesPath(root)
	if err := os.MkdirAll(sharedVolPath, stage1initcommon.SharedVolPerm); err != nil {
		return errwrap.Wrap(errors.New("could not create shared volumes directory"), err)
	}
	if err := os.Chmod(sharedVolPath, stage1initcommon.SharedVolPerm); err != nil {
		return errwrap.Wrap(fmt.Errorf("could not change permissions of %q", sharedVolPath), err)
	}

	imageManifest := p.Images[appName.String()]
	mounts := stage1initcommon.GenerateMounts(ra, vols, imageManifest)
	for _, m := range mounts {
		vol := vols[m.Volume]

		if vol.Kind == "empty" {
			p := filepath.Join(sharedVolPath, vol.Name.String())
			if err := os.MkdirAll(p, stage1initcommon.SharedVolPerm); err != nil {
				return errwrap.Wrap(fmt.Errorf("could not create shared volume %q", vol.Name), err)
			}
			if err := os.Chown(p, *vol.UID, *vol.GID); err != nil {
				return errwrap.Wrap(fmt.Errorf("could not change owner of %q", p), err)
			}
			mod, err := strconv.ParseUint(*vol.Mode, 8, 32)
			if err != nil {
				return errwrap.Wrap(fmt.Errorf("invalid mode %q for volume %q", *vol.Mode, vol.Name), err)
			}
			if err := os.Chmod(p, os.FileMode(mod)); err != nil {
				return errwrap.Wrap(fmt.Errorf("could not change permissions of %q", p), err)
			}
		}

		readOnly := stage1initcommon.IsMountReadOnly(vol, app.MountPoints)
		var source string
		switch vol.Kind {
		case "host":
			source = vol.Source
		case "empty":
			source = filepath.Join(common.SharedVolumesPath(root), vol.Name.String())
		default:
			return fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, vol.Kind)
		}
		absAppRootfs, err := filepath.Abs(common.AppRootfsPath(root, appName))
		if err != nil {
			return fmt.Errorf(`could not evaluate absolute path for application rootfs in app: %v`, appName)
		}

		absDestination, err := filepath.Abs(filepath.Join(absAppRootfs, m.Path))
		if err != nil {
			return fmt.Errorf(`could not evaluate absolute path for application volume path %q in: %v`, m.Path, appName)
		}
		if !strings.HasPrefix(absDestination, absAppRootfs) {
			return fmt.Errorf("path escapes app's root: %v", absDestination)
		}
		if cleanedSource, err := filepath.EvalSymlinks(source); err != nil {
			return errwrap.Wrap(fmt.Errorf("could not resolve symlink for source: %v", source), err)
		} else if err := ensureDestinationExists(cleanedSource, absDestination); err != nil {
			return errwrap.Wrap(fmt.Errorf("could not create destination mount point: %v", absDestination), err)
		} else if err := doBindMount(cleanedSource, absDestination, readOnly); err != nil {
			return errwrap.Wrap(fmt.Errorf("could not bind mount path %v (s: %v, d: %v)", m.Path, source, absDestination), err)
		}
	}
	return nil
}
Example #22
0
// parseUserGroup parses the User and Group fields of an App and returns its
// UID and GID.
// The User and Group fields accept several formats:
//   1. the hardcoded string "root"
//   2. a path
//   3. a number
//   4. a name in reference to /etc/{group,passwod} in the image
// See https://github.com/appc/spec/blob/master/spec/aci.md#image-manifest-schema
func parseUserGroup(p *stage1commontypes.Pod, ra *schema.RuntimeApp, privateUsers string) (int, int, error) {
	app := ra.App
	appName := ra.Name

	var uid_, gid_ int
	var err error

	uidRange := uid.NewBlankUidRange()
	if err := uidRange.Deserialize([]byte(privateUsers)); err != nil {
		return -1, -1, errwrap.Wrap(errors.New("unable to deserialize uid range"), err)
	}

	switch {
	case app.User == "root":
		uid_ = 0
	case strings.HasPrefix(app.User, "/"):
		var stat syscall.Stat_t
		if err = syscall.Lstat(filepath.Join(common.AppRootfsPath(p.Root, appName),
			app.User), &stat); err != nil {
			return -1, -1, errwrap.Wrap(fmt.Errorf("unable to get uid from file %q",
				app.User), err)
		}
		uidReal, _, err := uidRange.UnshiftRange(stat.Uid, 0)
		if err != nil {
			return -1, -1, errwrap.Wrap(errors.New("unable to determine real uid"), err)
		}
		uid_ = int(uidReal)
	default:
		uid_, err = strconv.Atoi(app.User)
		if err != nil {
			uid_, err = passwd.LookupUidFromFile(app.User,
				filepath.Join(common.AppRootfsPath(p.Root, appName), "etc/passwd"))
			if err != nil {
				return -1, -1, errwrap.Wrap(fmt.Errorf("cannot lookup user %q", app.User), err)
			}
		}
	}

	switch {
	case app.Group == "root":
		gid_ = 0
	case strings.HasPrefix(app.Group, "/"):
		var stat syscall.Stat_t
		if err = syscall.Lstat(filepath.Join(common.AppRootfsPath(p.Root, appName),
			app.Group), &stat); err != nil {
			return -1, -1, errwrap.Wrap(fmt.Errorf("unable to get gid from file %q",
				app.Group), err)
		}
		_, gidReal, err := uidRange.UnshiftRange(0, stat.Gid)
		if err != nil {
			return -1, -1, errwrap.Wrap(errors.New("unable to determine real gid"), err)
		}
		gid_ = int(gidReal)
	default:
		gid_, err = strconv.Atoi(app.Group)
		if err != nil {
			gid_, err = group.LookupGidFromFile(app.Group,
				filepath.Join(common.AppRootfsPath(p.Root, appName), "etc/group"))
			if err != nil {
				return -1, -1, errwrap.Wrap(fmt.Errorf("cannot lookup group %q", app.Group), err)
			}
		}
	}

	return uid_, gid_, nil
}