Beispiel #1
0
Datei: init.go Projekt: nhlfr/rkt
func getContainerSubCgroup(machineID string, canMachinedRegister, unified bool) (string, error) {
	var subcgroup string
	fromUnit, err := util.RunningFromSystemService()
	if err != nil {
		return "", errwrap.Wrap(errors.New("could not determine if we're running from a unit file"), err)
	}
	if fromUnit {
		slice, err := util.GetRunningSlice()
		if err != nil {
			return "", errwrap.Wrap(errors.New("could not get slice name"), err)
		}
		slicePath, err := common.SliceToPath(slice)
		if err != nil {
			return "", errwrap.Wrap(errors.New("could not convert slice name to path"), err)
		}
		unit, err := util.CurrentUnitName()
		if err != nil {
			return "", errwrap.Wrap(errors.New("could not get unit name"), err)
		}
		subcgroup = filepath.Join(slicePath, unit)

		if unified {
			subcgroup = filepath.Join(subcgroup, "payload")
		}
	} else {
		escapedmID := strings.Replace(machineID, "-", "\\x2d", -1)
		machineDir := "machine-" + escapedmID + ".scope"
		if canMachinedRegister {
			// we are not in the final cgroup yet: systemd-nspawn will move us
			// to the correct cgroup later during registration so we can't
			// look it up in /proc/self/cgroup
			subcgroup = filepath.Join("machine.slice", machineDir)
		} else {
			if unified {
				var err error
				subcgroup, err = v2.GetOwnCgroupPath()
				if err != nil {
					return "", errwrap.Wrap(errors.New("could not get own v2 cgroup path"), err)
				}
			} else {
				// when registration is disabled the container will be directly
				// under the current cgroup so we can look it up in /proc/self/cgroup
				ownV1CgroupPath, err := v1.GetOwnCgroupPath("name=systemd")
				if err != nil {
					return "", errwrap.Wrap(errors.New("could not get own v1 cgroup path"), err)
				}
				// systemd-nspawn won't work if we are in the root cgroup. In addition,
				// we want all rkt instances to be in distinct cgroups. Create a
				// subcgroup and add ourselves to it.
				subcgroup = filepath.Join(ownV1CgroupPath, machineDir)
				if err := v1.JoinSubcgroup("systemd", subcgroup); err != nil {
					return "", errwrap.Wrap(fmt.Errorf("error joining %s subcgroup", ownV1CgroupPath), err)
				}
			}
		}
	}

	return subcgroup, nil
}
Beispiel #2
0
// getArgsEnv returns the nspawn or lkvm args and env according to the flavor used
func getArgsEnv(p *stage1commontypes.Pod, flavor string, debug bool, n *networking.Networking) ([]string, []string, error) {
	var args []string
	env := os.Environ()

	// We store the pod's flavor so we can later garbage collect it correctly
	if err := os.Symlink(flavor, filepath.Join(p.Root, stage1initcommon.FlavorFile)); err != nil {
		return nil, nil, errwrap.Wrap(errors.New("failed to create flavor symlink"), err)
	}

	// set hostname inside pod
	// According to systemd manual (https://www.freedesktop.org/software/systemd/man/hostname.html) :
	// "The /etc/hostname file configures the name of the local system that is set
	// during boot using the sethostname system call"
	if hostname == "" {
		hostname = stage1initcommon.GetMachineID(p)
	}
	hostnamePath := filepath.Join(common.Stage1RootfsPath(p.Root), "etc/hostname")
	if err := ioutil.WriteFile(hostnamePath, []byte(hostname), 0644); err != nil {
		return nil, nil, fmt.Errorf("error writing %s, %s", hostnamePath, err)
	}

	switch flavor {
	case "kvm":
		if privateUsers != "" {
			return nil, nil, fmt.Errorf("flag --private-users cannot be used with an lkvm stage1")
		}

		// kernel and lkvm are relative path, because init has /var/lib/rkt/..../uuid as its working directory
		// TODO: move to path.go
		kernelPath := filepath.Join(common.Stage1RootfsPath(p.Root), "bzImage")
		lkvmPath := filepath.Join(common.Stage1RootfsPath(p.Root), "lkvm")
		netDescriptions := kvm.GetNetworkDescriptions(n)
		lkvmNetArgs, err := kvm.GetKVMNetArgs(netDescriptions)
		if err != nil {
			return nil, nil, err
		}

		cpu, mem := kvm.GetAppsResources(p.Manifest.Apps)

		kernelParams := []string{
			"console=hvc0",
			"init=/usr/lib/systemd/systemd",
			"no_timer_check",
			"noreplace-smp",
			"systemd.default_standard_error=journal+console",
			"systemd.default_standard_output=journal+console",
			// "systemd.default_standard_output=tty",
			"tsc=reliable",
			"MACHINEID=" + p.UUID.String(),
		}

		if debug {
			kernelParams = append(kernelParams, []string{
				"debug",
				"systemd.log_level=debug",
				"systemd.show_status=true",
				// "systemd.confirm_spawn=true",
			}...)
		} else {
			kernelParams = append(kernelParams, "quiet")
		}

		args = append(args, []string{
			"./" + lkvmPath, // relative path
			"run",
			"--name", "rkt-" + p.UUID.String(),
			"--no-dhcp", // speed bootup
			"--cpu", strconv.FormatInt(cpu, 10),
			"--mem", strconv.FormatInt(mem, 10),
			"--console=virtio",
			"--kernel", kernelPath,
			"--disk", "stage1/rootfs", // relative to run/pods/uuid dir this is a place where systemd resides
			// MACHINEID will be available as environment variable
			"--params", strings.Join(kernelParams, " "),
		}...,
		)
		args = append(args, lkvmNetArgs...)

		if debug {
			args = append(args, "--debug")
		}

		// host volume sharing with 9p
		nsargs := stage1initcommon.VolumesToKvmDiskArgs(p.Manifest.Volumes)
		args = append(args, nsargs...)

		// lkvm requires $HOME to be defined,
		// see https://github.com/coreos/rkt/issues/1393
		if os.Getenv("HOME") == "" {
			env = append(env, "HOME=/root")
		}

		return args, env, nil

	case "coreos":
		args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), interpBin))
		args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), nspawnBin))
		args = append(args, "--boot") // Launch systemd in the pod

		if context := os.Getenv(common.EnvSELinuxContext); context != "" {
			args = append(args, fmt.Sprintf("-Z%s", context))
		}

		if context := os.Getenv(common.EnvSELinuxMountContext); context != "" {
			args = append(args, fmt.Sprintf("-L%s", context))
		}

		if machinedRegister() {
			args = append(args, fmt.Sprintf("--register=true"))
		} else {
			args = append(args, fmt.Sprintf("--register=false"))
		}

		// use only dynamic libraries provided in the image
		env = append(env, "LD_LIBRARY_PATH="+filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib"))

	case "src":
		args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), nspawnBin))
		args = append(args, "--boot") // Launch systemd in the pod

		if context := os.Getenv(common.EnvSELinuxContext); context != "" {
			args = append(args, fmt.Sprintf("-Z%s", context))
		}

		if context := os.Getenv(common.EnvSELinuxMountContext); context != "" {
			args = append(args, fmt.Sprintf("-L%s", context))
		}

		if machinedRegister() {
			args = append(args, fmt.Sprintf("--register=true"))
		} else {
			args = append(args, fmt.Sprintf("--register=false"))
		}

	case "host":
		hostNspawnBin, err := common.LookupPath("systemd-nspawn", os.Getenv("PATH"))
		if err != nil {
			return nil, nil, err
		}

		// Check dynamically which version is installed on the host
		// Support version >= 220
		versionBytes, err := exec.Command(hostNspawnBin, "--version").CombinedOutput()
		if err != nil {
			return nil, nil, errwrap.Wrap(fmt.Errorf("unable to probe %s version", hostNspawnBin), err)
		}
		versionStr := strings.SplitN(string(versionBytes), "\n", 2)[0]
		var version int
		n, err := fmt.Sscanf(versionStr, "systemd %d", &version)
		if err != nil {
			return nil, nil, fmt.Errorf("cannot parse version: %q", versionStr)
		}
		if n != 1 || version < 220 {
			return nil, nil, fmt.Errorf("rkt needs systemd-nspawn >= 220. %s version not supported: %v", hostNspawnBin, versionStr)
		}

		// Copy systemd, bash, etc. in stage1 at run-time
		if err := installAssets(); err != nil {
			return nil, nil, errwrap.Wrap(errors.New("cannot install assets from the host"), err)
		}

		args = append(args, hostNspawnBin)
		args = append(args, "--boot") // Launch systemd in the pod
		args = append(args, fmt.Sprintf("--register=true"))

		if context := os.Getenv(common.EnvSELinuxContext); context != "" {
			args = append(args, fmt.Sprintf("-Z%s", context))
		}

		if context := os.Getenv(common.EnvSELinuxMountContext); context != "" {
			args = append(args, fmt.Sprintf("-L%s", context))
		}

	default:
		return nil, nil, fmt.Errorf("unrecognized stage1 flavor: %q", flavor)
	}

	// link journal only if the host is running systemd
	if util.IsRunningSystemd() {
		// we write /etc/machine-id here because systemd-nspawn needs it to link
		// the container's journal to the host
		mPath := filepath.Join(common.Stage1RootfsPath(p.Root), "etc", "machine-id")
		mID := strings.Replace(p.UUID.String(), "-", "", -1)

		if err := ioutil.WriteFile(mPath, []byte(mID), 0644); err != nil {
			log.FatalE("error writing /etc/machine-id", err)
		}

		args = append(args, "--link-journal=try-guest")

		keepUnit, err := util.RunningFromSystemService()
		if err != nil {
			if err == util.ErrSoNotFound {
				log.Print("warning: libsystemd not found even though systemd is running. Cgroup limits set by the environment (e.g. a systemd service) won't be enforced.")
			} else {
				return nil, nil, errwrap.Wrap(errors.New("error determining if we're running from a system service"), err)
			}
		}

		if keepUnit {
			args = append(args, "--keep-unit")
		}
	}

	if !debug {
		args = append(args, "--quiet")             // silence most nspawn output (log_warning is currently not covered by this)
		env = append(env, "SYSTEMD_LOG_LEVEL=err") // silence log_warning too
	}

	env = append(env, "SYSTEMD_NSPAWN_CONTAINER_SERVICE=rkt")

	if len(privateUsers) > 0 {
		args = append(args, "--private-users="+privateUsers)
	}

	nsargs, err := stage1initcommon.PodToNspawnArgs(p)
	if err != nil {
		return nil, nil, errwrap.Wrap(errors.New("failed to generate nspawn args"), err)
	}
	args = append(args, nsargs...)

	// Arguments to systemd
	args = append(args, "--")
	args = append(args, "--default-standard-output=tty") // redirect all service logs straight to tty
	if !debug {
		args = append(args, "--log-target=null") // silence systemd output inside pod
		args = append(args, "--show-status=0")   // silence systemd initialization status output
	}

	return args, env, nil
}
Beispiel #3
0
// getArgsEnv returns the nspawn or lkvm args and env according to the flavor
// as the first two return values respectively.
func getArgsEnv(p *stage1commontypes.Pod, flavor string, canMachinedRegister bool, debug bool, n *networking.Networking, insecureOptions stage1initcommon.Stage1InsecureOptions) ([]string, []string, error) {
	var args []string
	env := os.Environ()

	// We store the pod's flavor so we can later garbage collect it correctly
	if err := os.Symlink(flavor, filepath.Join(p.Root, stage1initcommon.FlavorFile)); err != nil {
		return nil, nil, errwrap.Wrap(errors.New("failed to create flavor symlink"), err)
	}

	// set hostname inside pod
	// According to systemd manual (https://www.freedesktop.org/software/systemd/man/hostname.html) :
	// "The /etc/hostname file configures the name of the local system that is set
	// during boot using the sethostname system call"
	if hostname == "" {
		hostname = stage1initcommon.GetMachineID(p)
	}
	hostnamePath := filepath.Join(common.Stage1RootfsPath(p.Root), "etc/hostname")
	if err := ioutil.WriteFile(hostnamePath, []byte(hostname), 0644); err != nil {
		return nil, nil, fmt.Errorf("error writing %s, %s", hostnamePath, err)
	}

	// systemd-nspawn needs /etc/machine-id to link the container's journal
	// to the host. Since systemd-v230, /etc/machine-id is mandatory, see
	// https://github.com/systemd/systemd/commit/e01ff70a77e781734e1e73a2238af2e9bf7967a8
	mPath := filepath.Join(common.Stage1RootfsPath(p.Root), "etc", "machine-id")
	machineID := strings.Replace(p.UUID.String(), "-", "", -1)

	switch flavor {
	case "kvm":
		if privateUsers != "" {
			return nil, nil, fmt.Errorf("flag --private-users cannot be used with an lkvm stage1")
		}

		// kernel and hypervisor binaries are located relative to the working directory
		// of init (/var/lib/rkt/..../uuid)
		// TODO: move to path.go
		kernelPath := filepath.Join(common.Stage1RootfsPath(p.Root), "bzImage")
		netDescriptions := kvm.GetNetworkDescriptions(n)

		cpu, mem := kvm.GetAppsResources(p.Manifest.Apps)

		// Parse hypervisor
		hv, err := KvmCheckHypervisor(common.Stage1RootfsPath(p.Root))
		if err != nil {
			return nil, nil, err
		}

		// Set start command for hypervisor
		StartCmd := hvlkvm.StartCmd
		switch hv {
		case "lkvm":
			StartCmd = hvlkvm.StartCmd
		case "qemu":
			StartCmd = hvqemu.StartCmd
		default:
			return nil, nil, fmt.Errorf("unrecognized hypervisor")
		}

		hvStartCmd := StartCmd(
			common.Stage1RootfsPath(p.Root),
			p.UUID.String(),
			kernelPath,
			netDescriptions,
			cpu,
			mem,
			debug,
		)

		if hvStartCmd == nil {
			return nil, nil, fmt.Errorf("no hypervisor")
		}

		args = append(args, hvStartCmd...)

		// lkvm requires $HOME to be defined,
		// see https://github.com/coreos/rkt/issues/1393
		if os.Getenv("HOME") == "" {
			env = append(env, "HOME=/root")
		}

		if err := linkJournal(common.Stage1RootfsPath(p.Root), machineID); err != nil {
			return nil, nil, errwrap.Wrap(errors.New("error linking pod's journal"), err)
		}

		// use only dynamic libraries provided in the image
		// from systemd v231 there's a new internal libsystemd-shared-v231.so
		// which is present in /usr/lib/systemd
		env = append(env, "LD_LIBRARY_PATH="+filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib/systemd"))

		return args, env, nil

	case "coreos":
		args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), interpBin))
		args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), nspawnBin))
		args = append(args, "--boot")             // Launch systemd in the pod
		args = append(args, "--notify-ready=yes") // From systemd v231

		if context := os.Getenv(common.EnvSELinuxContext); context != "" {
			args = append(args, fmt.Sprintf("-Z%s", context))
		}

		if context := os.Getenv(common.EnvSELinuxMountContext); context != "" {
			args = append(args, fmt.Sprintf("-L%s", context))
		}

		if canMachinedRegister {
			args = append(args, fmt.Sprintf("--register=true"))
		} else {
			args = append(args, fmt.Sprintf("--register=false"))
		}

		// use only dynamic libraries provided in the image
		// from systemd v231 there's a new internal libsystemd-shared-v231.so
		// which is present in /usr/lib/systemd
		env = append(env, "LD_LIBRARY_PATH="+
			filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib")+":"+
			filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib/systemd"))

	case "src":
		args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), interpBin))
		args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), nspawnBin))
		args = append(args, "--boot")             // Launch systemd in the pod
		args = append(args, "--notify-ready=yes") // From systemd v231

		if context := os.Getenv(common.EnvSELinuxContext); context != "" {
			args = append(args, fmt.Sprintf("-Z%s", context))
		}

		if context := os.Getenv(common.EnvSELinuxMountContext); context != "" {
			args = append(args, fmt.Sprintf("-L%s", context))
		}

		if canMachinedRegister {
			args = append(args, fmt.Sprintf("--register=true"))
		} else {
			args = append(args, fmt.Sprintf("--register=false"))
		}

		// use only dynamic libraries provided in the image
		// from systemd v231 there's a new internal libsystemd-shared-v231.so
		// which is present in /usr/lib/systemd
		env = append(env, "LD_LIBRARY_PATH="+
			filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib")+":"+
			filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib/systemd"))

	case "host":
		hostNspawnBin, err := common.LookupPath("systemd-nspawn", os.Getenv("PATH"))
		if err != nil {
			return nil, nil, err
		}

		// Check dynamically which version is installed on the host
		// Support version >= 220
		versionBytes, err := exec.Command(hostNspawnBin, "--version").CombinedOutput()
		if err != nil {
			return nil, nil, errwrap.Wrap(fmt.Errorf("unable to probe %s version", hostNspawnBin), err)
		}
		versionStr := strings.SplitN(string(versionBytes), "\n", 2)[0]
		var version int
		n, err := fmt.Sscanf(versionStr, "systemd %d", &version)
		if err != nil {
			return nil, nil, fmt.Errorf("cannot parse version: %q", versionStr)
		}
		if n != 1 || version < 220 {
			return nil, nil, fmt.Errorf("rkt needs systemd-nspawn >= 220. %s version not supported: %v", hostNspawnBin, versionStr)
		}

		// Copy systemd, bash, etc. in stage1 at run-time
		if err := installAssets(); err != nil {
			return nil, nil, errwrap.Wrap(errors.New("cannot install assets from the host"), err)
		}

		args = append(args, hostNspawnBin)
		args = append(args, "--boot") // Launch systemd in the pod
		args = append(args, fmt.Sprintf("--register=true"))

		if version >= 231 {
			args = append(args, "--notify-ready=yes") // From systemd v231
		}

		if context := os.Getenv(common.EnvSELinuxContext); context != "" {
			args = append(args, fmt.Sprintf("-Z%s", context))
		}

		if context := os.Getenv(common.EnvSELinuxMountContext); context != "" {
			args = append(args, fmt.Sprintf("-L%s", context))
		}

	default:
		return nil, nil, fmt.Errorf("unrecognized stage1 flavor: %q", flavor)
	}

	machineIDBytes := append([]byte(machineID), '\n')
	if err := ioutil.WriteFile(mPath, machineIDBytes, 0644); err != nil {
		log.FatalE("error writing /etc/machine-id", err)
	}

	// link journal only if the host is running systemd
	if util.IsRunningSystemd() {
		args = append(args, "--link-journal=try-guest")

		keepUnit, err := util.RunningFromSystemService()
		if err != nil {
			if err == dlopen.ErrSoNotFound {
				log.Print("warning: libsystemd not found even though systemd is running. Cgroup limits set by the environment (e.g. a systemd service) won't be enforced.")
			} else {
				return nil, nil, errwrap.Wrap(errors.New("error determining if we're running from a system service"), err)
			}
		}

		if keepUnit {
			args = append(args, "--keep-unit")
		}
	} else {
		args = append(args, "--link-journal=no")
	}

	if !debug {
		args = append(args, "--quiet")             // silence most nspawn output (log_warning is currently not covered by this)
		env = append(env, "SYSTEMD_LOG_LEVEL=err") // silence log_warning too
	}

	env = append(env, "SYSTEMD_NSPAWN_CONTAINER_SERVICE=rkt")
	// TODO (alepuccetti) remove this line when rkt will use cgroup namespace
	// If the kernel has the cgroup namespace enabled, systemd v232 will use it by default.
	// This was introduced by https://github.com/systemd/systemd/pull/3809 and it will cause
	// problems in rkt when cgns is enabled and cgroup-v1 is used. For more information see
	// https://github.com/systemd/systemd/pull/3589#discussion_r70277625.
	// The following line tells systemd-nspawn not to use cgroup namespace using the environment variable
	// introduced by https://github.com/systemd/systemd/pull/3809.
	env = append(env, "SYSTEMD_NSPAWN_USE_CGNS=no")

	if insecureOptions.DisablePaths {
		env = append(env, "SYSTEMD_NSPAWN_API_VFS_WRITABLE=yes")
	}

	if len(privateUsers) > 0 {
		args = append(args, "--private-users="+privateUsers)
	}

	nsargs, err := stage1initcommon.PodToNspawnArgs(p, insecureOptions)
	if err != nil {
		return nil, nil, errwrap.Wrap(errors.New("failed to generate nspawn args"), err)
	}
	args = append(args, nsargs...)

	// Arguments to systemd
	args = append(args, "--")
	args = append(args, "--default-standard-output=tty") // redirect all service logs straight to tty
	if !debug {
		args = append(args, "--log-target=null") // silence systemd output inside pod
		args = append(args, "--show-status=0")   // silence systemd initialization status output
	}

	return args, env, nil
}