func getContainerSubCgroup(machineID string) (string, error) { var subcgroup string fromUnit, err := util.RunningFromSystemService() if err != nil { return "", fmt.Errorf("could not determine if we're running from a unit file: %v", err) } if fromUnit { slice, err := util.GetRunningSlice() if err != nil { return "", fmt.Errorf("could not get slice name: %v", err) } slicePath, err := common.SliceToPath(slice) if err != nil { return "", fmt.Errorf("could not convert slice name to path: %v", err) } unit, err := util.CurrentUnitName() if err != nil { return "", fmt.Errorf("could not get unit name: %v", err) } subcgroup = filepath.Join(slicePath, unit, "system.slice") } else { escapedmID := strings.Replace(machineID, "-", "\\x2d", -1) machineDir := "machine-" + escapedmID + ".scope" if machinedRegister() { // we are not in the final cgroup yet: systemd-nspawn will move us // to the correct cgroup later during registration so we can't // look it up in /proc/self/cgroup subcgroup = filepath.Join("machine.slice", machineDir, "system.slice") } else { // when registration is disabled the container will be directly // under the current cgroup so we can look it up in /proc/self/cgroup ownCgroupPath, err := cgroup.GetOwnCgroupPath("name=systemd") if err != nil { return "", fmt.Errorf("could not get own cgroup path: %v", err) } // systemd-nspawn won't work if we are in the root cgroup. In addition, // we want all rkt instances to be in distinct cgroups. Create a // subcgroup and add ourselves to it. ownCgroupPath = filepath.Join(ownCgroupPath, machineDir) if err := cgroup.JoinSubcgroup("systemd", ownCgroupPath); err != nil { return "", fmt.Errorf("error joining %s subcgroup: %v", ownCgroupPath, err) } subcgroup = filepath.Join(ownCgroupPath, "system.slice") } } return subcgroup, nil }
// getArgsEnv returns the nspawn or lkvm args and env according to the flavor used func getArgsEnv(p *stage1commontypes.Pod, flavor string, debug bool, n *networking.Networking) ([]string, []string, error) { var args []string env := os.Environ() // We store the pod's flavor so we can later garbage collect it correctly if err := os.Symlink(flavor, filepath.Join(p.Root, stage1initcommon.FlavorFile)); err != nil { return nil, nil, fmt.Errorf("failed to create flavor symlink: %v", err) } switch flavor { case "kvm": if privateUsers != "" { return nil, nil, fmt.Errorf("flag --private-users cannot be used with an lkvm stage1") } // kernel and lkvm are relative path, because init has /var/lib/rkt/..../uuid as its working directory // TODO: move to path.go kernelPath := filepath.Join(common.Stage1RootfsPath(p.Root), "bzImage") lkvmPath := filepath.Join(common.Stage1RootfsPath(p.Root), "lkvm") netDescriptions := kvm.GetNetworkDescriptions(n) lkvmNetArgs, err := kvm.GetKVMNetArgs(netDescriptions) if err != nil { return nil, nil, err } cpu, mem := kvm.GetAppsResources(p.Manifest.Apps) kernelParams := []string{ "console=hvc0", "init=/usr/lib/systemd/systemd", "no_timer_check", "noreplace-smp", "systemd.default_standard_error=journal+console", "systemd.default_standard_output=journal+console", // "systemd.default_standard_output=tty", "tsc=reliable", "MACHINEID=" + p.UUID.String(), } if debug { kernelParams = append(kernelParams, []string{ "debug", "systemd.log_level=debug", "systemd.show_status=true", // "systemd.confirm_spawn=true", }...) } else { kernelParams = append(kernelParams, "quiet") } args = append(args, []string{ "./" + lkvmPath, // relative path "run", "--name", "rkt-" + p.UUID.String(), "--no-dhcp", // speed bootup "--cpu", strconv.FormatInt(cpu, 10), "--mem", strconv.FormatInt(mem, 10), "--console=virtio", "--kernel", kernelPath, "--disk", "stage1/rootfs", // relative to run/pods/uuid dir this is a place where systemd resides // MACHINEID will be available as environment variable "--params", strings.Join(kernelParams, " "), }..., ) args = append(args, lkvmNetArgs...) if debug { args = append(args, "--debug") } // host volume sharing with 9p nsargs := stage1initcommon.VolumesToKvmDiskArgs(p.Manifest.Volumes) args = append(args, nsargs...) // lkvm requires $HOME to be defined, // see https://github.com/coreos/rkt/issues/1393 if os.Getenv("HOME") == "" { env = append(env, "HOME=/root") } return args, env, nil case "coreos": args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), interpBin)) args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), nspawnBin)) args = append(args, "--boot") // Launch systemd in the pod if context := os.Getenv(common.EnvSELinuxContext); context != "" { args = append(args, fmt.Sprintf("-Z%s", context)) } if machinedRegister() { args = append(args, fmt.Sprintf("--register=true")) } else { args = append(args, fmt.Sprintf("--register=false")) } // use only dynamic libraries provided in the image env = append(env, "LD_LIBRARY_PATH="+filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib")) case "src": args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), nspawnBin)) args = append(args, "--boot") // Launch systemd in the pod if context := os.Getenv(common.EnvSELinuxContext); context != "" { args = append(args, fmt.Sprintf("-Z%s", context)) } if machinedRegister() { args = append(args, fmt.Sprintf("--register=true")) } else { args = append(args, fmt.Sprintf("--register=false")) } case "host": hostNspawnBin, err := lookupPath("systemd-nspawn", os.Getenv("PATH")) if err != nil { return nil, nil, err } // Check dynamically which version is installed on the host // Support version >= 220 versionBytes, err := exec.Command(hostNspawnBin, "--version").CombinedOutput() if err != nil { return nil, nil, fmt.Errorf("unable to probe %s version: %v", hostNspawnBin, err) } versionStr := strings.SplitN(string(versionBytes), "\n", 2)[0] var version int n, err := fmt.Sscanf(versionStr, "systemd %d", &version) if err != nil { return nil, nil, fmt.Errorf("cannot parse version: %q", versionStr) } if n != 1 || version < 220 { return nil, nil, fmt.Errorf("rkt needs systemd-nspawn >= 220. %s version not supported: %v", hostNspawnBin, versionStr) } // Copy systemd, bash, etc. in stage1 at run-time if err := installAssets(); err != nil { return nil, nil, fmt.Errorf("cannot install assets from the host: %v", err) } args = append(args, hostNspawnBin) args = append(args, "--boot") // Launch systemd in the pod args = append(args, fmt.Sprintf("--register=true")) if context := os.Getenv(common.EnvSELinuxContext); context != "" { args = append(args, fmt.Sprintf("-Z%s", context)) } default: return nil, nil, fmt.Errorf("unrecognized stage1 flavor: %q", flavor) } // link journal only if the host is running systemd if util.IsRunningSystemd() { // we write /etc/machine-id here because systemd-nspawn needs it to link // the container's journal to the host mPath := filepath.Join(common.Stage1RootfsPath(p.Root), "etc", "machine-id") mID := strings.Replace(p.UUID.String(), "-", "", -1) if err := ioutil.WriteFile(mPath, []byte(mID), 0644); err != nil { log.Fatalf("error writing /etc/machine-id: %v\n", err) } args = append(args, "--link-journal=try-guest") keepUnit, err := util.RunningFromSystemService() if err != nil { if err == util.ErrSoNotFound { fmt.Fprintln(os.Stderr, "Warning: libsystemd not found even though systemd is running. Cgroup limits set by the environment (e.g. a systemd service) won't be enforced.") } else { return nil, nil, fmt.Errorf("error determining if we're running from a system service: %v", err) } } if keepUnit { args = append(args, "--keep-unit") } } if !debug { args = append(args, "--quiet") // silence most nspawn output (log_warning is currently not covered by this) env = append(env, "SYSTEMD_LOG_LEVEL=err") // silence log_warning too } env = append(env, "SYSTEMD_NSPAWN_CONTAINER_SERVICE=rkt") if len(privateUsers) > 0 { args = append(args, "--private-users="+privateUsers) } nsargs, err := stage1initcommon.PodToNspawnArgs(p) if err != nil { return nil, nil, fmt.Errorf("failed to generate nspawn args: %v", err) } args = append(args, nsargs...) // Arguments to systemd args = append(args, "--") args = append(args, "--default-standard-output=tty") // redirect all service logs straight to tty if !debug { args = append(args, "--log-target=null") // silence systemd output inside pod // TODO remove --log-level=warning when we update stage1 to systemd v222 args = append(args, "--log-level=warning") // limit log output (systemd-shutdown ignores --log-target) args = append(args, "--show-status=0") // silence systemd initialization status output } return args, env, nil }