func stage1() int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { log.PrintE("UUID is missing or malformed", err) return 1 } root := "." p, err := stage1commontypes.LoadPod(root, uuid) if err != nil { log.PrintE("failed to load pod", err) return 1 } // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed when invoking // network plugins lfd, err := common.GetRktLockFD() if err != nil { log.PrintE("failed to get rkt lock fd", err) return 1 } if err := sys.CloseOnExec(lfd, true); err != nil { log.PrintE("failed to set FD_CLOEXEC on rkt lock", err) return 1 } mirrorLocalZoneInfo(p.Root) flavor, _, err := stage1initcommon.GetFlavor(p) if err != nil { log.PrintE("failed to get stage1 flavor", err) return 3 } var n *networking.Networking if netList.Contained() { fps, err := forwardedPorts(p) if err != nil { log.Error(err) return 6 } n, err = networking.Setup(root, p.UUID, fps, netList, localConfig, flavor, debug) if err != nil { log.PrintE("failed to setup network", err) return 6 } if err = n.Save(); err != nil { log.PrintE("failed to save networking state", err) n.Teardown(flavor, debug) return 6 } if len(mdsToken) > 0 { hostIP, err := n.GetDefaultHostIP() if err != nil { log.PrintE("failed to get default Host IP", err) return 6 } p.MetadataServiceURL = common.MetadataServicePublicURL(hostIP, mdsToken) } } else { if flavor == "kvm" { log.Print("flavor kvm requires private network configuration (try --net)") return 6 } if len(mdsToken) > 0 { p.MetadataServiceURL = common.MetadataServicePublicURL(localhostIP, mdsToken) } } if err = stage1initcommon.WriteDefaultTarget(p); err != nil { log.PrintE("failed to write default.target", err) return 2 } if err = stage1initcommon.WritePrepareAppTemplate(p); err != nil { log.PrintE("failed to write prepare-app service template", err) return 2 } if err := stage1initcommon.SetJournalPermissions(p); err != nil { log.PrintE("warning: error setting journal ACLs, you'll need root to read the pod journal", err) } if flavor == "kvm" { if err := KvmPodToSystemd(p, n); err != nil { log.PrintE("failed to configure systemd for kvm", err) return 2 } } if err = stage1initcommon.PodToSystemd(p, interactive, flavor, privateUsers); err != nil { log.PrintE("failed to configure systemd", err) return 2 } args, env, err := getArgsEnv(p, flavor, debug, n) if err != nil { log.Error(err) return 3 } // create a separate mount namespace so the cgroup filesystems // are unmounted when exiting the pod if err := syscall.Unshare(syscall.CLONE_NEWNS); err != nil { log.FatalE("error unsharing", err) } // we recursively make / a "shared and slave" so mount events from the // new namespace don't propagate to the host namespace but mount events // from the host propagate to the new namespace and are forwarded to // its peer group // See https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SLAVE, ""); err != nil { log.FatalE("error making / a slave mount", err) } if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SHARED, ""); err != nil { log.FatalE("error making / a shared and slave mount", err) } enabledCgroups, err := cgroup.GetEnabledCgroups() if err != nil { log.FatalE("error getting cgroups", err) return 5 } // mount host cgroups in the rkt mount namespace if err := mountHostCgroups(enabledCgroups); err != nil { log.FatalE("couldn't mount the host cgroups", err) return 5 } var serviceNames []string for _, app := range p.Manifest.Apps { serviceNames = append(serviceNames, stage1initcommon.ServiceUnitName(app.Name)) } s1Root := common.Stage1RootfsPath(p.Root) machineID := stage1initcommon.GetMachineID(p) subcgroup, err := getContainerSubCgroup(machineID) if err == nil { if err := mountContainerCgroups(s1Root, enabledCgroups, subcgroup, serviceNames); err != nil { log.PrintE("couldn't mount the container cgroups", err) return 5 } } else { log.PrintE("continuing with per-app isolators disabled", err) } if err = stage1common.WritePpid(os.Getpid()); err != nil { log.Error(err) return 4 } err = stage1common.WithClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) }) if err != nil { log.PrintE(fmt.Sprintf("failed to execute %q", args[0]), err) return 7 } return 0 }
func stage1() int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { log.FatalE("UUID is missing or malformed", err) } root := "." p, err := stage1commontypes.LoadPod(root, uuid) if err != nil { log.FatalE("failed to load pod", err) } // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed when invoking // network plugins lfd, err := common.GetRktLockFD() if err != nil { log.FatalE("failed to get rkt lock fd", err) } if err := sys.CloseOnExec(lfd, true); err != nil { log.FatalE("failed to set FD_CLOEXEC on rkt lock", err) } mirrorLocalZoneInfo(p.Root) flavor, _, err := stage1initcommon.GetFlavor(p) if err != nil { log.FatalE("failed to get stage1 flavor", err) } var n *networking.Networking if netList.Contained() { fps, err := commonnet.ForwardedPorts(p.Manifest) if err != nil { log.FatalE("error initializing forwarding ports", err) } noDNS := dnsConfMode.Pairs["resolv"] != "default" // force ignore CNI DNS results n, err = networking.Setup(root, p.UUID, fps, netList, localConfig, flavor, noDNS, debug) if err != nil { log.FatalE("failed to setup network", err) } if err = n.Save(); err != nil { log.PrintE("failed to save networking state", err) n.Teardown(flavor, debug) return 254 } if len(mdsToken) > 0 { hostIP, err := n.GetForwardableNetHostIP() if err != nil { log.FatalE("failed to get default Host IP", err) } p.MetadataServiceURL = common.MetadataServicePublicURL(hostIP, mdsToken) } } else { if flavor == "kvm" { log.Fatal("flavor kvm requires private network configuration (try --net)") } if len(mdsToken) > 0 { p.MetadataServiceURL = common.MetadataServicePublicURL(localhostIP, mdsToken) } } insecureOptions := stage1initcommon.Stage1InsecureOptions{ DisablePaths: disablePaths, DisableCapabilities: disableCapabilities, DisableSeccomp: disableSeccomp, } mnt := fs.NewLoggingMounter( fs.MounterFunc(syscall.Mount), fs.UnmounterFunc(syscall.Unmount), diag.Printf, ) if dnsConfMode.Pairs["resolv"] == "host" { stage1initcommon.UseHostResolv(mnt, root) } if dnsConfMode.Pairs["hosts"] == "host" { stage1initcommon.UseHostHosts(mnt, root) } if mutable { if err = stage1initcommon.MutableEnv(p); err != nil { log.FatalE("cannot initialize mutable environment", err) } } else { if err = stage1initcommon.ImmutableEnv(p, interactive, privateUsers, insecureOptions); err != nil { log.FatalE("cannot initialize immutable environment", err) } } if err := stage1initcommon.SetJournalPermissions(p); err != nil { log.PrintE("warning: error setting journal ACLs, you'll need root to read the pod journal", err) } if flavor == "kvm" { kvm.InitDebug(debug) if err := KvmNetworkingToSystemd(p, n); err != nil { log.FatalE("failed to configure systemd for kvm", err) } } canMachinedRegister := false if flavor != "kvm" { // kvm doesn't register with systemd right now, see #2664. canMachinedRegister = machinedRegister() } diag.Printf("canMachinedRegister %t", canMachinedRegister) args, env, err := getArgsEnv(p, flavor, canMachinedRegister, debug, n, insecureOptions) if err != nil { log.FatalE("cannot get environment", err) } diag.Printf("args %q", args) diag.Printf("env %q", env) // create a separate mount namespace so the cgroup filesystems // are unmounted when exiting the pod if err := syscall.Unshare(syscall.CLONE_NEWNS); err != nil { log.FatalE("error unsharing", err) } // we recursively make / a "shared and slave" so mount events from the // new namespace don't propagate to the host namespace but mount events // from the host propagate to the new namespace and are forwarded to // its peer group // See https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt if err := mnt.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SLAVE, ""); err != nil { log.FatalE("error making / a slave mount", err) } if err := mnt.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SHARED, ""); err != nil { log.FatalE("error making / a shared and slave mount", err) } unifiedCgroup, err := cgroup.IsCgroupUnified("/") if err != nil { log.FatalE("error determining cgroup version", err) } diag.Printf("unifiedCgroup %t", unifiedCgroup) s1Root := common.Stage1RootfsPath(p.Root) machineID := stage1initcommon.GetMachineID(p) subcgroup, err := getContainerSubCgroup(machineID, canMachinedRegister, unifiedCgroup) if err != nil { log.FatalE("error getting container subcgroup", err) } diag.Printf("subcgroup %q", subcgroup) if err := ioutil.WriteFile(filepath.Join(p.Root, "subcgroup"), []byte(fmt.Sprintf("%s", subcgroup)), 0644); err != nil { log.FatalE("cannot write subcgroup file", err) } if !unifiedCgroup { enabledCgroups, err := v1.GetEnabledCgroups() if err != nil { log.FatalE("error getting v1 cgroups", err) } diag.Printf("enabledCgroups %q", enabledCgroups) if err := mountHostV1Cgroups(mnt, enabledCgroups); err != nil { log.FatalE("couldn't mount the host v1 cgroups", err) } if !canMachinedRegister { if err := v1.JoinSubcgroup("systemd", subcgroup); err != nil { log.FatalE(fmt.Sprintf("error joining subcgroup %q", subcgroup), err) } } var serviceNames []string for _, app := range p.Manifest.Apps { serviceNames = append(serviceNames, stage1initcommon.ServiceUnitName(app.Name)) } diag.Printf("serviceNames %q", serviceNames) if err := mountContainerV1Cgroups(mnt, s1Root, enabledCgroups, subcgroup, serviceNames, insecureOptions); err != nil { log.FatalE("couldn't mount the container v1 cgroups", err) } } // KVM flavor has a bit different logic in handling pid vs ppid, for details look into #2389 // it doesn't require the existence of a "ppid", instead it registers the current pid (which // will be reused by lkvm binary) as a pod process pid used during entering pid_filename := "ppid" if flavor == "kvm" { pid_filename = "pid" } if err = stage1common.WritePid(os.Getpid(), pid_filename); err != nil { log.FatalE("error writing pid", err) } if flavor == "kvm" { if err := KvmPrepareMounts(s1Root, p); err != nil { log.FatalE("error preparing mounts", err) } } err = stage1common.WithClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) }) if err != nil { log.FatalE(fmt.Sprintf("failed to execute %q", args[0]), err) } return 0 }
// getArgsEnv returns the nspawn or lkvm args and env according to the flavor used func getArgsEnv(p *stage1commontypes.Pod, flavor string, debug bool, n *networking.Networking) ([]string, []string, error) { var args []string env := os.Environ() // We store the pod's flavor so we can later garbage collect it correctly if err := os.Symlink(flavor, filepath.Join(p.Root, stage1initcommon.FlavorFile)); err != nil { return nil, nil, errwrap.Wrap(errors.New("failed to create flavor symlink"), err) } // set hostname inside pod // According to systemd manual (https://www.freedesktop.org/software/systemd/man/hostname.html) : // "The /etc/hostname file configures the name of the local system that is set // during boot using the sethostname system call" if hostname == "" { hostname = stage1initcommon.GetMachineID(p) } hostnamePath := filepath.Join(common.Stage1RootfsPath(p.Root), "etc/hostname") if err := ioutil.WriteFile(hostnamePath, []byte(hostname), 0644); err != nil { return nil, nil, fmt.Errorf("error writing %s, %s", hostnamePath, err) } switch flavor { case "kvm": if privateUsers != "" { return nil, nil, fmt.Errorf("flag --private-users cannot be used with an lkvm stage1") } // kernel and lkvm are relative path, because init has /var/lib/rkt/..../uuid as its working directory // TODO: move to path.go kernelPath := filepath.Join(common.Stage1RootfsPath(p.Root), "bzImage") lkvmPath := filepath.Join(common.Stage1RootfsPath(p.Root), "lkvm") netDescriptions := kvm.GetNetworkDescriptions(n) lkvmNetArgs, err := kvm.GetKVMNetArgs(netDescriptions) if err != nil { return nil, nil, err } cpu, mem := kvm.GetAppsResources(p.Manifest.Apps) kernelParams := []string{ "console=hvc0", "init=/usr/lib/systemd/systemd", "no_timer_check", "noreplace-smp", "systemd.default_standard_error=journal+console", "systemd.default_standard_output=journal+console", // "systemd.default_standard_output=tty", "tsc=reliable", "MACHINEID=" + p.UUID.String(), } if debug { kernelParams = append(kernelParams, []string{ "debug", "systemd.log_level=debug", "systemd.show_status=true", // "systemd.confirm_spawn=true", }...) } else { kernelParams = append(kernelParams, "quiet") } args = append(args, []string{ "./" + lkvmPath, // relative path "run", "--name", "rkt-" + p.UUID.String(), "--no-dhcp", // speed bootup "--cpu", strconv.FormatInt(cpu, 10), "--mem", strconv.FormatInt(mem, 10), "--console=virtio", "--kernel", kernelPath, "--disk", "stage1/rootfs", // relative to run/pods/uuid dir this is a place where systemd resides // MACHINEID will be available as environment variable "--params", strings.Join(kernelParams, " "), }..., ) args = append(args, lkvmNetArgs...) if debug { args = append(args, "--debug") } // host volume sharing with 9p nsargs := stage1initcommon.VolumesToKvmDiskArgs(p.Manifest.Volumes) args = append(args, nsargs...) // lkvm requires $HOME to be defined, // see https://github.com/coreos/rkt/issues/1393 if os.Getenv("HOME") == "" { env = append(env, "HOME=/root") } return args, env, nil case "coreos": args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), interpBin)) args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), nspawnBin)) args = append(args, "--boot") // Launch systemd in the pod if context := os.Getenv(common.EnvSELinuxContext); context != "" { args = append(args, fmt.Sprintf("-Z%s", context)) } if context := os.Getenv(common.EnvSELinuxMountContext); context != "" { args = append(args, fmt.Sprintf("-L%s", context)) } if machinedRegister() { args = append(args, fmt.Sprintf("--register=true")) } else { args = append(args, fmt.Sprintf("--register=false")) } // use only dynamic libraries provided in the image env = append(env, "LD_LIBRARY_PATH="+filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib")) case "src": args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), nspawnBin)) args = append(args, "--boot") // Launch systemd in the pod if context := os.Getenv(common.EnvSELinuxContext); context != "" { args = append(args, fmt.Sprintf("-Z%s", context)) } if context := os.Getenv(common.EnvSELinuxMountContext); context != "" { args = append(args, fmt.Sprintf("-L%s", context)) } if machinedRegister() { args = append(args, fmt.Sprintf("--register=true")) } else { args = append(args, fmt.Sprintf("--register=false")) } case "host": hostNspawnBin, err := common.LookupPath("systemd-nspawn", os.Getenv("PATH")) if err != nil { return nil, nil, err } // Check dynamically which version is installed on the host // Support version >= 220 versionBytes, err := exec.Command(hostNspawnBin, "--version").CombinedOutput() if err != nil { return nil, nil, errwrap.Wrap(fmt.Errorf("unable to probe %s version", hostNspawnBin), err) } versionStr := strings.SplitN(string(versionBytes), "\n", 2)[0] var version int n, err := fmt.Sscanf(versionStr, "systemd %d", &version) if err != nil { return nil, nil, fmt.Errorf("cannot parse version: %q", versionStr) } if n != 1 || version < 220 { return nil, nil, fmt.Errorf("rkt needs systemd-nspawn >= 220. %s version not supported: %v", hostNspawnBin, versionStr) } // Copy systemd, bash, etc. in stage1 at run-time if err := installAssets(); err != nil { return nil, nil, errwrap.Wrap(errors.New("cannot install assets from the host"), err) } args = append(args, hostNspawnBin) args = append(args, "--boot") // Launch systemd in the pod args = append(args, fmt.Sprintf("--register=true")) if context := os.Getenv(common.EnvSELinuxContext); context != "" { args = append(args, fmt.Sprintf("-Z%s", context)) } if context := os.Getenv(common.EnvSELinuxMountContext); context != "" { args = append(args, fmt.Sprintf("-L%s", context)) } default: return nil, nil, fmt.Errorf("unrecognized stage1 flavor: %q", flavor) } // link journal only if the host is running systemd if util.IsRunningSystemd() { // we write /etc/machine-id here because systemd-nspawn needs it to link // the container's journal to the host mPath := filepath.Join(common.Stage1RootfsPath(p.Root), "etc", "machine-id") mID := strings.Replace(p.UUID.String(), "-", "", -1) if err := ioutil.WriteFile(mPath, []byte(mID), 0644); err != nil { log.FatalE("error writing /etc/machine-id", err) } args = append(args, "--link-journal=try-guest") keepUnit, err := util.RunningFromSystemService() if err != nil { if err == util.ErrSoNotFound { log.Print("warning: libsystemd not found even though systemd is running. Cgroup limits set by the environment (e.g. a systemd service) won't be enforced.") } else { return nil, nil, errwrap.Wrap(errors.New("error determining if we're running from a system service"), err) } } if keepUnit { args = append(args, "--keep-unit") } } if !debug { args = append(args, "--quiet") // silence most nspawn output (log_warning is currently not covered by this) env = append(env, "SYSTEMD_LOG_LEVEL=err") // silence log_warning too } env = append(env, "SYSTEMD_NSPAWN_CONTAINER_SERVICE=rkt") if len(privateUsers) > 0 { args = append(args, "--private-users="+privateUsers) } nsargs, err := stage1initcommon.PodToNspawnArgs(p) if err != nil { return nil, nil, errwrap.Wrap(errors.New("failed to generate nspawn args"), err) } args = append(args, nsargs...) // Arguments to systemd args = append(args, "--") args = append(args, "--default-standard-output=tty") // redirect all service logs straight to tty if !debug { args = append(args, "--log-target=null") // silence systemd output inside pod args = append(args, "--show-status=0") // silence systemd initialization status output } return args, env, nil }
// getArgsEnv returns the nspawn or lkvm args and env according to the flavor // as the first two return values respectively. func getArgsEnv(p *stage1commontypes.Pod, flavor string, canMachinedRegister bool, debug bool, n *networking.Networking, insecureOptions stage1initcommon.Stage1InsecureOptions) ([]string, []string, error) { var args []string env := os.Environ() // We store the pod's flavor so we can later garbage collect it correctly if err := os.Symlink(flavor, filepath.Join(p.Root, stage1initcommon.FlavorFile)); err != nil { return nil, nil, errwrap.Wrap(errors.New("failed to create flavor symlink"), err) } // set hostname inside pod // According to systemd manual (https://www.freedesktop.org/software/systemd/man/hostname.html) : // "The /etc/hostname file configures the name of the local system that is set // during boot using the sethostname system call" if hostname == "" { hostname = stage1initcommon.GetMachineID(p) } hostnamePath := filepath.Join(common.Stage1RootfsPath(p.Root), "etc/hostname") if err := ioutil.WriteFile(hostnamePath, []byte(hostname), 0644); err != nil { return nil, nil, fmt.Errorf("error writing %s, %s", hostnamePath, err) } // systemd-nspawn needs /etc/machine-id to link the container's journal // to the host. Since systemd-v230, /etc/machine-id is mandatory, see // https://github.com/systemd/systemd/commit/e01ff70a77e781734e1e73a2238af2e9bf7967a8 mPath := filepath.Join(common.Stage1RootfsPath(p.Root), "etc", "machine-id") machineID := strings.Replace(p.UUID.String(), "-", "", -1) switch flavor { case "kvm": if privateUsers != "" { return nil, nil, fmt.Errorf("flag --private-users cannot be used with an lkvm stage1") } // kernel and hypervisor binaries are located relative to the working directory // of init (/var/lib/rkt/..../uuid) // TODO: move to path.go kernelPath := filepath.Join(common.Stage1RootfsPath(p.Root), "bzImage") netDescriptions := kvm.GetNetworkDescriptions(n) cpu, mem := kvm.GetAppsResources(p.Manifest.Apps) // Parse hypervisor hv, err := KvmCheckHypervisor(common.Stage1RootfsPath(p.Root)) if err != nil { return nil, nil, err } // Set start command for hypervisor StartCmd := hvlkvm.StartCmd switch hv { case "lkvm": StartCmd = hvlkvm.StartCmd case "qemu": StartCmd = hvqemu.StartCmd default: return nil, nil, fmt.Errorf("unrecognized hypervisor") } hvStartCmd := StartCmd( common.Stage1RootfsPath(p.Root), p.UUID.String(), kernelPath, netDescriptions, cpu, mem, debug, ) if hvStartCmd == nil { return nil, nil, fmt.Errorf("no hypervisor") } args = append(args, hvStartCmd...) // lkvm requires $HOME to be defined, // see https://github.com/coreos/rkt/issues/1393 if os.Getenv("HOME") == "" { env = append(env, "HOME=/root") } if err := linkJournal(common.Stage1RootfsPath(p.Root), machineID); err != nil { return nil, nil, errwrap.Wrap(errors.New("error linking pod's journal"), err) } // use only dynamic libraries provided in the image // from systemd v231 there's a new internal libsystemd-shared-v231.so // which is present in /usr/lib/systemd env = append(env, "LD_LIBRARY_PATH="+filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib/systemd")) return args, env, nil case "coreos": args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), interpBin)) args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), nspawnBin)) args = append(args, "--boot") // Launch systemd in the pod args = append(args, "--notify-ready=yes") // From systemd v231 if context := os.Getenv(common.EnvSELinuxContext); context != "" { args = append(args, fmt.Sprintf("-Z%s", context)) } if context := os.Getenv(common.EnvSELinuxMountContext); context != "" { args = append(args, fmt.Sprintf("-L%s", context)) } if canMachinedRegister { args = append(args, fmt.Sprintf("--register=true")) } else { args = append(args, fmt.Sprintf("--register=false")) } // use only dynamic libraries provided in the image // from systemd v231 there's a new internal libsystemd-shared-v231.so // which is present in /usr/lib/systemd env = append(env, "LD_LIBRARY_PATH="+ filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib")+":"+ filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib/systemd")) case "src": args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), interpBin)) args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), nspawnBin)) args = append(args, "--boot") // Launch systemd in the pod args = append(args, "--notify-ready=yes") // From systemd v231 if context := os.Getenv(common.EnvSELinuxContext); context != "" { args = append(args, fmt.Sprintf("-Z%s", context)) } if context := os.Getenv(common.EnvSELinuxMountContext); context != "" { args = append(args, fmt.Sprintf("-L%s", context)) } if canMachinedRegister { args = append(args, fmt.Sprintf("--register=true")) } else { args = append(args, fmt.Sprintf("--register=false")) } // use only dynamic libraries provided in the image // from systemd v231 there's a new internal libsystemd-shared-v231.so // which is present in /usr/lib/systemd env = append(env, "LD_LIBRARY_PATH="+ filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib")+":"+ filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib/systemd")) case "host": hostNspawnBin, err := common.LookupPath("systemd-nspawn", os.Getenv("PATH")) if err != nil { return nil, nil, err } // Check dynamically which version is installed on the host // Support version >= 220 versionBytes, err := exec.Command(hostNspawnBin, "--version").CombinedOutput() if err != nil { return nil, nil, errwrap.Wrap(fmt.Errorf("unable to probe %s version", hostNspawnBin), err) } versionStr := strings.SplitN(string(versionBytes), "\n", 2)[0] var version int n, err := fmt.Sscanf(versionStr, "systemd %d", &version) if err != nil { return nil, nil, fmt.Errorf("cannot parse version: %q", versionStr) } if n != 1 || version < 220 { return nil, nil, fmt.Errorf("rkt needs systemd-nspawn >= 220. %s version not supported: %v", hostNspawnBin, versionStr) } // Copy systemd, bash, etc. in stage1 at run-time if err := installAssets(); err != nil { return nil, nil, errwrap.Wrap(errors.New("cannot install assets from the host"), err) } args = append(args, hostNspawnBin) args = append(args, "--boot") // Launch systemd in the pod args = append(args, fmt.Sprintf("--register=true")) if version >= 231 { args = append(args, "--notify-ready=yes") // From systemd v231 } if context := os.Getenv(common.EnvSELinuxContext); context != "" { args = append(args, fmt.Sprintf("-Z%s", context)) } if context := os.Getenv(common.EnvSELinuxMountContext); context != "" { args = append(args, fmt.Sprintf("-L%s", context)) } default: return nil, nil, fmt.Errorf("unrecognized stage1 flavor: %q", flavor) } machineIDBytes := append([]byte(machineID), '\n') if err := ioutil.WriteFile(mPath, machineIDBytes, 0644); err != nil { log.FatalE("error writing /etc/machine-id", err) } // link journal only if the host is running systemd if util.IsRunningSystemd() { args = append(args, "--link-journal=try-guest") keepUnit, err := util.RunningFromSystemService() if err != nil { if err == dlopen.ErrSoNotFound { log.Print("warning: libsystemd not found even though systemd is running. Cgroup limits set by the environment (e.g. a systemd service) won't be enforced.") } else { return nil, nil, errwrap.Wrap(errors.New("error determining if we're running from a system service"), err) } } if keepUnit { args = append(args, "--keep-unit") } } else { args = append(args, "--link-journal=no") } if !debug { args = append(args, "--quiet") // silence most nspawn output (log_warning is currently not covered by this) env = append(env, "SYSTEMD_LOG_LEVEL=err") // silence log_warning too } env = append(env, "SYSTEMD_NSPAWN_CONTAINER_SERVICE=rkt") // TODO (alepuccetti) remove this line when rkt will use cgroup namespace // If the kernel has the cgroup namespace enabled, systemd v232 will use it by default. // This was introduced by https://github.com/systemd/systemd/pull/3809 and it will cause // problems in rkt when cgns is enabled and cgroup-v1 is used. For more information see // https://github.com/systemd/systemd/pull/3589#discussion_r70277625. // The following line tells systemd-nspawn not to use cgroup namespace using the environment variable // introduced by https://github.com/systemd/systemd/pull/3809. env = append(env, "SYSTEMD_NSPAWN_USE_CGNS=no") if insecureOptions.DisablePaths { env = append(env, "SYSTEMD_NSPAWN_API_VFS_WRITABLE=yes") } if len(privateUsers) > 0 { args = append(args, "--private-users="+privateUsers) } nsargs, err := stage1initcommon.PodToNspawnArgs(p, insecureOptions) if err != nil { return nil, nil, errwrap.Wrap(errors.New("failed to generate nspawn args"), err) } args = append(args, nsargs...) // Arguments to systemd args = append(args, "--") args = append(args, "--default-standard-output=tty") // redirect all service logs straight to tty if !debug { args = append(args, "--log-target=null") // silence systemd output inside pod args = append(args, "--show-status=0") // silence systemd initialization status output } return args, env, nil }