// mountFsRO remounts the given mountPoint using the given flags read-only. func mountFsRO(m fs.Mounter, mountPoint string, flags uintptr) error { flags = flags | syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY if err := m.Mount(mountPoint, mountPoint, "", flags, ""); err != nil { return errwrap.Wrap(fmt.Errorf("error remounting read-only %q", mountPoint), err) } return nil }
// RemountCgroups remounts the v1 cgroup hierarchy under root. // It mounts /sys/fs/cgroup/[controller] read-only, // but leaves needed knobs in the subcgroup for each app read-write, // such that systemd inside stage1 can apply isolators to them. // It leaves /sys read-write, if the given readWrite parameter is true. func RemountCgroups(m fs.Mounter, root string, enabledCgroups map[int][]string, subcgroup string, serviceNames []string, readWrite bool) error { controllers := GetControllerDirs(enabledCgroups) cgroupTmpfs := filepath.Join(root, "/sys/fs/cgroup") sysPath := filepath.Join(root, "/sys") var flags uintptr = syscall.MS_NOSUID | syscall.MS_NOEXEC | syscall.MS_NODEV // Mount RW knobs we need to make the enabled isolators work for _, c := range controllers { cPath := filepath.Join(cgroupTmpfs, c) subcgroupPath := filepath.Join(cPath, subcgroup, "system.slice") // Workaround for https://github.com/coreos/rkt/issues/1210 if c == "cpuset" { fixCpusetKnobs(cPath) } // Create cgroup directories and mount the files we need over // themselves so they stay read-write for _, serviceName := range serviceNames { appCgroup := filepath.Join(subcgroupPath, serviceName) if err := os.MkdirAll(appCgroup, 0755); err != nil { return err } for _, f := range getControllerRWFiles(c) { cgroupFilePath := filepath.Join(appCgroup, f) // the file may not be there if kernel doesn't support the // feature, skip it in that case if _, err := os.Stat(cgroupFilePath); os.IsNotExist(err) { continue } if err := m.Mount(cgroupFilePath, cgroupFilePath, "", syscall.MS_BIND, ""); err != nil { return errwrap.Wrap(fmt.Errorf("error bind mounting %q", cgroupFilePath), err) } } } // Re-mount controller read-only to prevent the container modifying host controllers if err := mountFsRO(m, cPath, flags); err != nil { return err } } if readWrite { return nil } // Bind-mount sys filesystem read-only return mountFsRO(m, sysPath, flags) }
// mountHostV1Cgroups mounts the host v1 cgroup hierarchy as required by // systemd-nspawn. We need this because some distributions don't have the // "name=systemd" cgroup or don't mount the cgroup controllers in // "/sys/fs/cgroup", and systemd-nspawn needs this. Since this is mounted // inside the rkt mount namespace, it doesn't affect the host. func mountHostV1Cgroups(m fs.Mounter, enabledCgroups map[int][]string) error { systemdControllerPath := "/sys/fs/cgroup/systemd" if !areHostV1CgroupsMounted(enabledCgroups) { mountContext := os.Getenv(common.EnvSELinuxMountContext) if err := v1.CreateCgroups(m, "/", enabledCgroups, mountContext); err != nil { return errwrap.Wrap(errors.New("error creating host cgroups"), err) } } if !v1.IsControllerMounted("systemd") { if err := os.MkdirAll(systemdControllerPath, 0700); err != nil { return err } if err := m.Mount("cgroup", systemdControllerPath, "cgroup", 0, "none,name=systemd"); err != nil { return errwrap.Wrap(fmt.Errorf("error mounting name=systemd hierarchy on %q", systemdControllerPath), err) } } return nil }
// CreateCgroups mounts the v1 cgroup controllers hierarchy in /sys/fs/cgroup // under root func CreateCgroups(m fs.Mounter, root string, enabledCgroups map[int][]string, mountContext string) error { controllers := GetControllerDirs(enabledCgroups) sys := filepath.Join(root, "/sys") if err := os.MkdirAll(sys, 0700); err != nil { return err } var sysfsFlags uintptr = syscall.MS_NOSUID | syscall.MS_NOEXEC | syscall.MS_NODEV // If we're mounting the host cgroups, /sys is probably mounted so we // ignore EBUSY if err := m.Mount("sysfs", sys, "sysfs", sysfsFlags, ""); err != nil && err != syscall.EBUSY { return errwrap.Wrap(fmt.Errorf("error mounting %q", sys), err) } cgroupTmpfs := filepath.Join(root, "/sys/fs/cgroup") if err := os.MkdirAll(cgroupTmpfs, 0700); err != nil { return err } var cgroupTmpfsFlags uintptr = syscall.MS_NOSUID | syscall.MS_NOEXEC | syscall.MS_NODEV | syscall.MS_STRICTATIME options := "mode=755" if mountContext != "" { options = fmt.Sprintf("mode=755,context=\"%s\"", mountContext) } if err := m.Mount("tmpfs", cgroupTmpfs, "tmpfs", cgroupTmpfsFlags, options); err != nil { return errwrap.Wrap(fmt.Errorf("error mounting %q", cgroupTmpfs), err) } // Mount controllers for _, c := range controllers { cPath := filepath.Join(root, "/sys/fs/cgroup", c) if err := os.MkdirAll(cPath, 0700); err != nil { return err } var flags uintptr = syscall.MS_NOSUID | syscall.MS_NOEXEC | syscall.MS_NODEV if err := m.Mount("cgroup", cPath, "cgroup", flags, c); err != nil { return errwrap.Wrap(fmt.Errorf("error mounting %q", cPath), err) } } // Create symlinks for combined controllers symlinks := getControllerSymlinks(enabledCgroups) for ln, tgt := range symlinks { lnPath := filepath.Join(cgroupTmpfs, ln) if err := os.Symlink(tgt, lnPath); err != nil { return errwrap.Wrap(errors.New("error creating symlink"), err) } } systemdControllerPath := filepath.Join(root, "/sys/fs/cgroup/systemd") if err := os.MkdirAll(systemdControllerPath, 0700); err != nil { return err } // Bind-mount cgroup tmpfs filesystem read-only return mountFsRO(m, cgroupTmpfs, cgroupTmpfsFlags) }