Пример #1
0
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
	root, err := getCgroupRoot()
	if err != nil {
		return nil, err
	}

	if (c.Name != "" || c.Parent != "") && c.Path != "" {
		return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used")
	}

	// XXX: Do not remove this code. Path safety is important! -- cyphar
	cgPath := libcontainerUtils.CleanPath(c.Path)
	cgParent := libcontainerUtils.CleanPath(c.Parent)
	cgName := libcontainerUtils.CleanPath(c.Name)

	innerPath := cgPath
	if innerPath == "" {
		innerPath = filepath.Join(cgParent, cgName)
	}

	return &cgroupData{
		root:      root,
		innerPath: innerPath,
		config:    c,
		pid:       pid,
	}, nil
}
Пример #2
0
// Do the mount operation followed by additional mounts required to take care
// of propagation flags.
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
	var (
		dest  = m.Destination
		data  = label.FormatMountLabel(m.Data, mountLabel)
		flags = m.Flags
	)
	if libcontainerUtils.CleanPath(dest) == "/dev" {
		flags &= ^syscall.MS_RDONLY
	}

	copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
	if !(copyUp || strings.HasPrefix(dest, rootfs)) {
		dest = filepath.Join(rootfs, dest)
	}

	if err := syscall.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
		return err
	}

	for _, pflag := range m.PropagationFlags {
		if err := syscall.Mount("", dest, "", uintptr(pflag), ""); err != nil {
			return err
		}
	}
	return nil
}
Пример #3
0
// needsSetupDev returns true if /dev needs to be set up.
func needsSetupDev(config *configs.Config) bool {
	for _, m := range config.Mounts {
		if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" {
			return false
		}
	}
	return true
}
Пример #4
0
// checkMountDestination checks to ensure that the mount destination is not over the top of /proc.
// dest is required to be an abs path and have any symlinks resolved before calling this function.
func checkMountDestination(rootfs, dest string) error {
	if libcontainerUtils.CleanPath(rootfs) == libcontainerUtils.CleanPath(dest) {
		return fmt.Errorf("mounting into / is prohibited")
	}
	invalidDestinations := []string{
		"/proc",
	}
	// White list, it should be sub directories of invalid destinations
	validDestinations := []string{
		// These entries can be bind mounted by files emulated by fuse,
		// so commands like top, free displays stats in container.
		"/proc/cpuinfo",
		"/proc/diskstats",
		"/proc/meminfo",
		"/proc/stat",
		"/proc/swaps",
		"/proc/uptime",
		"/proc/net/dev",
	}
	for _, valid := range validDestinations {
		path, err := filepath.Rel(filepath.Join(rootfs, valid), dest)
		if err != nil {
			return err
		}
		if path == "." {
			return nil
		}
	}
	for _, invalid := range invalidDestinations {
		path, err := filepath.Rel(filepath.Join(rootfs, invalid), dest)
		if err != nil {
			return err
		}
		if path == "." || !strings.HasPrefix(path, "..") {
			return fmt.Errorf("%q cannot be mounted because it is located inside %q", dest, invalid)
		}
	}
	return nil
}
Пример #5
0
// ensureParent makes sure that the parent directory of current is created
// and populated with the proper cpus and mems files copied from
// it's parent.
func (s *CpusetGroup) ensureParent(current, root string) error {
	parent := filepath.Dir(current)
	if libcontainerUtils.CleanPath(parent) == root {
		return nil
	}
	// Avoid infinite recursion.
	if parent == current {
		return fmt.Errorf("cpuset: cgroup parent path outside cgroup root")
	}
	if err := s.ensureParent(parent, root); err != nil {
		return err
	}
	if err := os.MkdirAll(current, 0755); err != nil {
		return err
	}
	return s.copyIfNeeded(current, parent)
}
// getCgroupPath gets the file path to the "devices" subsystem of the desired cgroup.
// cgroupPath is the path in the cgroup hierarchy.
func getCgroupPath(cgroupPath string) (string, error) {
	cgroupPath = libcontainerutils.CleanPath(cgroupPath)

	mnt, root, err := libcontainercgroups.FindCgroupMountpointAndRoot("devices")
	// If we didn't mount the subsystem, there is no point we make the path.
	if err != nil {
		return "", err
	}

	// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
	if filepath.IsAbs(cgroupPath) {
		// Sometimes subsystems can be mounted togethger as 'cpu,cpuacct'.
		return filepath.Join(root, mnt, cgroupPath), nil
	}

	parentPath, err := getCgroupParentPath(mnt, root)
	if err != nil {
		return "", err
	}

	return filepath.Join(parentPath, cgroupPath), nil
}
Пример #7
0
// finalizeRootfs actually switches the root of the process and sets anything
// to ro if necessary. You must call prepareRootfs first.
func finalizeRootfs(config *configs.Config) (err error) {
	// remount dev as ro if specified
	for _, m := range config.Mounts {
		if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
			if m.Flags&syscall.MS_RDONLY != 0 {
				if err := remountReadonly(m.Destination); err != nil {
					return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
				}
			}
			break
		}
	}

	// set rootfs ( / ) as readonly
	if config.Readonlyfs {
		if err := setReadonly(); err != nil {
			return newSystemErrorWithCause(err, "setting rootfs as readonly")
		}
	}

	syscall.Umask(0022)
	return nil
}
Пример #8
0
func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (*configs.Cgroup, error) {
	var myCgroupPath string

	c := &configs.Cgroup{
		Resources: &configs.Resources{},
	}

	if spec.Linux != nil && spec.Linux.CgroupsPath != nil {
		myCgroupPath = libcontainerUtils.CleanPath(*spec.Linux.CgroupsPath)
		if useSystemdCgroup {
			myCgroupPath = *spec.Linux.CgroupsPath
		}
	}

	if useSystemdCgroup {
		if myCgroupPath == "" {
			c.Parent = "system.slice"
			c.ScopePrefix = "runc"
			c.Name = name
		} else {
			// Parse the path from expected "slice:prefix:name"
			// for e.g. "system.slice:docker:1234"
			parts := strings.Split(myCgroupPath, ":")
			if len(parts) != 3 {
				return nil, fmt.Errorf("expected cgroupsPath to be of format \"slice:prefix:name\" for systemd cgroups")
			}
			c.Parent = parts[0]
			c.ScopePrefix = parts[1]
			c.Name = parts[2]
		}
	} else {
		if myCgroupPath == "" {
			c.Name = name
		}
		c.Path = myCgroupPath
	}

	c.Resources.AllowedDevices = allowedDevices
	if spec.Linux == nil {
		return c, nil
	}
	r := spec.Linux.Resources
	if r == nil {
		return c, nil
	}
	for i, d := range spec.Linux.Resources.Devices {
		var (
			t     = "a"
			major = int64(-1)
			minor = int64(-1)
		)
		if d.Type != nil {
			t = *d.Type
		}
		if d.Major != nil {
			major = *d.Major
		}
		if d.Minor != nil {
			minor = *d.Minor
		}
		if d.Access == nil || *d.Access == "" {
			return nil, fmt.Errorf("device access at %d field cannot be empty", i)
		}
		dt, err := stringToDeviceRune(t)
		if err != nil {
			return nil, err
		}
		dd := &configs.Device{
			Type:        dt,
			Major:       major,
			Minor:       minor,
			Permissions: *d.Access,
			Allow:       d.Allow,
		}
		c.Resources.Devices = append(c.Resources.Devices, dd)
	}
	// append the default allowed devices to the end of the list
	c.Resources.Devices = append(c.Resources.Devices, allowedDevices...)
	if r.Memory != nil {
		if r.Memory.Limit != nil {
			c.Resources.Memory = int64(*r.Memory.Limit)
		}
		if r.Memory.Reservation != nil {
			c.Resources.MemoryReservation = int64(*r.Memory.Reservation)
		}
		if r.Memory.Swap != nil {
			c.Resources.MemorySwap = int64(*r.Memory.Swap)
		}
		if r.Memory.Kernel != nil {
			c.Resources.KernelMemory = int64(*r.Memory.Kernel)
		}
		if r.Memory.KernelTCP != nil {
			c.Resources.KernelMemoryTCP = int64(*r.Memory.KernelTCP)
		}
		if r.Memory.Swappiness != nil {
			swappiness := int64(*r.Memory.Swappiness)
			c.Resources.MemorySwappiness = &swappiness
		}
	}
	if r.CPU != nil {
		if r.CPU.Shares != nil {
			c.Resources.CpuShares = int64(*r.CPU.Shares)
		}
		if r.CPU.Quota != nil {
			c.Resources.CpuQuota = int64(*r.CPU.Quota)
		}
		if r.CPU.Period != nil {
			c.Resources.CpuPeriod = int64(*r.CPU.Period)
		}
		if r.CPU.RealtimeRuntime != nil {
			c.Resources.CpuRtRuntime = int64(*r.CPU.RealtimeRuntime)
		}
		if r.CPU.RealtimePeriod != nil {
			c.Resources.CpuRtPeriod = int64(*r.CPU.RealtimePeriod)
		}
		if r.CPU.Cpus != nil {
			c.Resources.CpusetCpus = *r.CPU.Cpus
		}
		if r.CPU.Mems != nil {
			c.Resources.CpusetMems = *r.CPU.Mems
		}
	}
	if r.Pids != nil {
		c.Resources.PidsLimit = *r.Pids.Limit
	}
	if r.BlockIO != nil {
		if r.BlockIO.Weight != nil {
			c.Resources.BlkioWeight = *r.BlockIO.Weight
		}
		if r.BlockIO.LeafWeight != nil {
			c.Resources.BlkioLeafWeight = *r.BlockIO.LeafWeight
		}
		if r.BlockIO.WeightDevice != nil {
			for _, wd := range r.BlockIO.WeightDevice {
				var weight, leafWeight uint16
				if wd.Weight != nil {
					weight = *wd.Weight
				}
				if wd.LeafWeight != nil {
					leafWeight = *wd.LeafWeight
				}
				weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, weight, leafWeight)
				c.Resources.BlkioWeightDevice = append(c.Resources.BlkioWeightDevice, weightDevice)
			}
		}
		if r.BlockIO.ThrottleReadBpsDevice != nil {
			for _, td := range r.BlockIO.ThrottleReadBpsDevice {
				var rate uint64
				if td.Rate != nil {
					rate = *td.Rate
				}
				throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate)
				c.Resources.BlkioThrottleReadBpsDevice = append(c.Resources.BlkioThrottleReadBpsDevice, throttleDevice)
			}
		}
		if r.BlockIO.ThrottleWriteBpsDevice != nil {
			for _, td := range r.BlockIO.ThrottleWriteBpsDevice {
				var rate uint64
				if td.Rate != nil {
					rate = *td.Rate
				}
				throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate)
				c.Resources.BlkioThrottleWriteBpsDevice = append(c.Resources.BlkioThrottleWriteBpsDevice, throttleDevice)
			}
		}
		if r.BlockIO.ThrottleReadIOPSDevice != nil {
			for _, td := range r.BlockIO.ThrottleReadIOPSDevice {
				var rate uint64
				if td.Rate != nil {
					rate = *td.Rate
				}
				throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate)
				c.Resources.BlkioThrottleReadIOPSDevice = append(c.Resources.BlkioThrottleReadIOPSDevice, throttleDevice)
			}
		}
		if r.BlockIO.ThrottleWriteIOPSDevice != nil {
			for _, td := range r.BlockIO.ThrottleWriteIOPSDevice {
				var rate uint64
				if td.Rate != nil {
					rate = *td.Rate
				}
				throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate)
				c.Resources.BlkioThrottleWriteIOPSDevice = append(c.Resources.BlkioThrottleWriteIOPSDevice, throttleDevice)
			}
		}
	}
	for _, l := range r.HugepageLimits {
		c.Resources.HugetlbLimit = append(c.Resources.HugetlbLimit, &configs.HugepageLimit{
			Pagesize: *l.Pagesize,
			Limit:    *l.Limit,
		})
	}
	if r.DisableOOMKiller != nil {
		c.Resources.OomKillDisable = *r.DisableOOMKiller
	}
	if r.Network != nil {
		if r.Network.ClassID != nil {
			c.Resources.NetClsClassid = *r.Network.ClassID
		}
		for _, m := range r.Network.Priorities {
			c.Resources.NetPrioIfpriomap = append(c.Resources.NetPrioIfpriomap, &configs.IfPrioMap{
				Interface: m.Name,
				Priority:  int64(m.Priority),
			})
		}
	}
	return c, nil
}
Пример #9
0
// setupRootfs sets up the devices, mount points, and filesystems for use inside a
// new mount namespace.
func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWriter) (err error) {
	if err := prepareRoot(config); err != nil {
		return newSystemErrorWithCause(err, "preparing rootfs")
	}

	setupDev := needsSetupDev(config)
	for _, m := range config.Mounts {
		for _, precmd := range m.PremountCmds {
			if err := mountCmd(precmd); err != nil {
				return newSystemErrorWithCause(err, "running premount command")
			}
		}
		if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
			return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination)
		}

		for _, postcmd := range m.PostmountCmds {
			if err := mountCmd(postcmd); err != nil {
				return newSystemErrorWithCause(err, "running postmount command")
			}
		}
	}
	if setupDev {
		if err := createDevices(config); err != nil {
			return newSystemErrorWithCause(err, "creating device nodes")
		}
		if err := setupPtmx(config, console); err != nil {
			return newSystemErrorWithCause(err, "setting up ptmx")
		}
		if err := setupDevSymlinks(config.Rootfs); err != nil {
			return newSystemErrorWithCause(err, "setting up /dev symlinks")
		}
	}
	// Signal the parent to run the pre-start hooks.
	// The hooks are run after the mounts are setup, but before we switch to the new
	// root, so that the old root is still available in the hooks for any mount
	// manipulations.
	if err := syncParentHooks(pipe); err != nil {
		return err
	}
	if err := syscall.Chdir(config.Rootfs); err != nil {
		return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs)
	}
	if config.NoPivotRoot {
		err = msMoveRoot(config.Rootfs)
	} else {
		err = pivotRoot(config.Rootfs, config.PivotDir)
	}
	if err != nil {
		return newSystemErrorWithCause(err, "jailing process inside rootfs")
	}
	if setupDev {
		if err := reOpenDevNull(); err != nil {
			return newSystemErrorWithCause(err, "reopening /dev/null inside container")
		}
	}
	// remount dev as ro if specifed
	for _, m := range config.Mounts {
		if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
			if m.Flags&syscall.MS_RDONLY != 0 {
				if err := remountReadonly(m.Destination); err != nil {
					return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
				}
			}
			break
		}
	}
	// set rootfs ( / ) as readonly
	if config.Readonlyfs {
		if err := setReadonly(); err != nil {
			return newSystemErrorWithCause(err, "setting rootfs as readonly")
		}
	}
	syscall.Umask(0022)
	return nil
}
Пример #10
0
func createCgroupConfig(name string, spec *specs.LinuxSpec) (*configs.Cgroup, error) {
	var (
		err          error
		myCgroupPath string
	)

	if spec.Linux.CgroupsPath != nil {
		myCgroupPath = libcontainerUtils.CleanPath(*spec.Linux.CgroupsPath)
	} else {
		myCgroupPath, err = cgroups.GetThisCgroupDir("devices")
		if err != nil {
			return nil, err
		}
	}

	c := &configs.Cgroup{
		Path:      filepath.Join(myCgroupPath, name),
		Resources: &configs.Resources{},
	}
	c.Resources.AllowedDevices = allowedDevices
	r := spec.Linux.Resources
	if r == nil {
		return c, nil
	}
	for i, d := range spec.Linux.Resources.Devices {
		var (
			t     = 'a'
			major = int64(-1)
			minor = int64(-1)
		)
		if d.Type != nil {
			t = *d.Type
		}
		if d.Major != nil {
			major = *d.Major
		}
		if d.Minor != nil {
			minor = *d.Minor
		}
		if d.Access == nil || *d.Access == "" {
			return nil, fmt.Errorf("device access at %d field canot be empty", i)
		}
		dd := &configs.Device{
			Type:        t,
			Major:       major,
			Minor:       minor,
			Permissions: *d.Access,
			Allow:       d.Allow,
		}
		c.Resources.Devices = append(c.Resources.Devices, dd)
	}
	// append the default allowed devices to the end of the list
	c.Resources.Devices = append(c.Resources.Devices, allowedDevices...)
	if r.Memory != nil {
		if r.Memory.Limit != nil {
			c.Resources.Memory = int64(*r.Memory.Limit)
		}
		if r.Memory.Reservation != nil {
			c.Resources.MemoryReservation = int64(*r.Memory.Reservation)
		}
		if r.Memory.Swap != nil {
			c.Resources.MemorySwap = int64(*r.Memory.Swap)
		}
		if r.Memory.Kernel != nil {
			c.Resources.KernelMemory = int64(*r.Memory.Kernel)
		}
		if r.Memory.Swappiness != nil {
			c.Resources.MemorySwappiness = int64(*r.Memory.Swappiness)
		}
	}
	if r.CPU != nil {
		if r.CPU.Shares != nil {
			c.Resources.CpuShares = int64(*r.CPU.Shares)
		}
		if r.CPU.Quota != nil {
			c.Resources.CpuQuota = int64(*r.CPU.Quota)
		}
		if r.CPU.Period != nil {
			c.Resources.CpuPeriod = int64(*r.CPU.Period)
		}
		if r.CPU.RealtimeRuntime != nil {
			c.Resources.CpuRtRuntime = int64(*r.CPU.RealtimeRuntime)
		}
		if r.CPU.RealtimePeriod != nil {
			c.Resources.CpuRtPeriod = int64(*r.CPU.RealtimePeriod)
		}
		if r.CPU.Cpus != nil {
			c.Resources.CpusetCpus = *r.CPU.Cpus
		}
		if r.CPU.Mems != nil {
			c.Resources.CpusetMems = *r.CPU.Mems
		}
	}
	if r.Pids != nil {
		c.Resources.PidsLimit = *r.Pids.Limit
	}
	if r.BlockIO != nil {
		if r.BlockIO.Weight != nil {
			c.Resources.BlkioWeight = *r.BlockIO.Weight
		}
		if r.BlockIO.LeafWeight != nil {
			c.Resources.BlkioLeafWeight = *r.BlockIO.LeafWeight
		}
		if r.BlockIO.WeightDevice != nil {
			for _, wd := range r.BlockIO.WeightDevice {
				weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, *wd.Weight, *wd.LeafWeight)
				c.Resources.BlkioWeightDevice = append(c.Resources.BlkioWeightDevice, weightDevice)
			}
		}
		if r.BlockIO.ThrottleReadBpsDevice != nil {
			for _, td := range r.BlockIO.ThrottleReadBpsDevice {
				throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, *td.Rate)
				c.Resources.BlkioThrottleReadBpsDevice = append(c.Resources.BlkioThrottleReadBpsDevice, throttleDevice)
			}
		}
		if r.BlockIO.ThrottleWriteBpsDevice != nil {
			for _, td := range r.BlockIO.ThrottleWriteBpsDevice {
				throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, *td.Rate)
				c.Resources.BlkioThrottleWriteBpsDevice = append(c.Resources.BlkioThrottleWriteBpsDevice, throttleDevice)
			}
		}
		if r.BlockIO.ThrottleReadIOPSDevice != nil {
			for _, td := range r.BlockIO.ThrottleReadIOPSDevice {
				throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, *td.Rate)
				c.Resources.BlkioThrottleReadIOPSDevice = append(c.Resources.BlkioThrottleReadIOPSDevice, throttleDevice)
			}
		}
		if r.BlockIO.ThrottleWriteIOPSDevice != nil {
			for _, td := range r.BlockIO.ThrottleWriteIOPSDevice {
				throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, *td.Rate)
				c.Resources.BlkioThrottleWriteIOPSDevice = append(c.Resources.BlkioThrottleWriteIOPSDevice, throttleDevice)
			}
		}
	}
	for _, l := range r.HugepageLimits {
		c.Resources.HugetlbLimit = append(c.Resources.HugetlbLimit, &configs.HugepageLimit{
			Pagesize: *l.Pagesize,
			Limit:    *l.Limit,
		})
	}
	if r.DisableOOMKiller != nil {
		c.Resources.OomKillDisable = *r.DisableOOMKiller
	}
	if r.Network != nil {
		if r.Network.ClassID != nil {
			c.Resources.NetClsClassid = string(*r.Network.ClassID)
		}
		for _, m := range r.Network.Priorities {
			c.Resources.NetPrioIfpriomap = append(c.Resources.NetPrioIfpriomap, &configs.IfPrioMap{
				Interface: m.Name,
				Priority:  int64(m.Priority),
			})
		}
	}
	return c, nil
}