// mountContainerCgroups mounts the cgroup controllers hierarchy in the container's // namespace read-only, leaving the needed knobs in the subcgroup for each-app // read-write so systemd inside stage1 can apply isolators to them func mountContainerCgroups(s1Root string, enabledCgroups map[int][]string, subcgroup string, serviceNames []string) error { if err := cgroup.CreateCgroups(s1Root, enabledCgroups); err != nil { return fmt.Errorf("error creating container cgroups: %v\n", err) } if err := cgroup.RemountCgroupsRO(s1Root, enabledCgroups, subcgroup, serviceNames); err != nil { return fmt.Errorf("error restricting container cgroups: %v\n", err) } return nil }
// mountContainerCgroups mounts the cgroup controllers hierarchy in the container's // namespace read-only, leaving the needed knobs in the subcgroup for each-app // read-write so systemd inside stage1 can apply isolators to them func mountContainerCgroups(s1Root string, enabledCgroups map[int][]string, subcgroup string, serviceNames []string) error { if err := cgroup.CreateCgroups(s1Root, enabledCgroups); err != nil { return errwrap.Wrap(errors.New("error creating container cgroups"), err) } if err := cgroup.RemountCgroupsRO(s1Root, enabledCgroups, subcgroup, serviceNames); err != nil { return errwrap.Wrap(errors.New("error restricting container cgroups"), err) } return nil }
// mountContainerCgroups mounts the cgroup controllers hierarchy in the container's // namespace read-only, leaving the needed knobs in the subcgroup for each-app // read-write so systemd inside stage1 can apply isolators to them func mountContainerCgroups(s1Root string, enabledCgroups map[int][]string, subcgroup string, serviceNames []string) error { mountContext := os.Getenv(common.EnvSELinuxMountContext) if err := cgroup.CreateCgroups(s1Root, enabledCgroups, mountContext); err != nil { return errwrap.Wrap(errors.New("error creating container cgroups"), err) } if err := cgroup.RemountCgroupsRO(s1Root, enabledCgroups, subcgroup, serviceNames); err != nil { return errwrap.Wrap(errors.New("error restricting container cgroups"), err) } return nil }
// mountHostCgroups mounts the host cgroup hierarchy as required by // systemd-nspawn. We need this because some distributions don't have the // "name=systemd" cgroup or don't mount the cgroup controllers in // "/sys/fs/cgroup", and systemd-nspawn needs this. Since this is mounted // inside the rkt mount namespace, it doesn't affect the host. func mountHostCgroups(enabledCgroups map[int][]string) error { systemdControllerPath := "/sys/fs/cgroup/systemd" if !areHostCgroupsMounted(enabledCgroups) { if err := cgroup.CreateCgroups("/", enabledCgroups); err != nil { return fmt.Errorf("error creating host cgroups: %v\n", err) } } if !cgroup.IsControllerMounted("systemd") { if err := os.MkdirAll(systemdControllerPath, 0700); err != nil { return err } if err := syscall.Mount("cgroup", systemdControllerPath, "cgroup", 0, "none,name=systemd"); err != nil { return fmt.Errorf("error mounting name=systemd hierarchy on %q: %v", systemdControllerPath, err) } } return nil }
// mountHostCgroups mounts the host cgroup hierarchy as required by // systemd-nspawn. We need this because some distributions don't have the // "name=systemd" cgroup or don't mount the cgroup controllers in // "/sys/fs/cgroup", and systemd-nspawn needs this. Since this is mounted // inside the rkt mount namespace, it doesn't affect the host. func mountHostCgroups(enabledCgroups map[int][]string) error { systemdControllerPath := "/sys/fs/cgroup/systemd" if !areHostCgroupsMounted(enabledCgroups) { mountContext := os.Getenv(common.EnvSELinuxMountContext) if err := cgroup.CreateCgroups("/", enabledCgroups, mountContext); err != nil { return errwrap.Wrap(errors.New("error creating host cgroups"), err) } } if !cgroup.IsControllerMounted("systemd") { if err := os.MkdirAll(systemdControllerPath, 0700); err != nil { return err } if err := syscall.Mount("cgroup", systemdControllerPath, "cgroup", 0, "none,name=systemd"); err != nil { return errwrap.Wrap(fmt.Errorf("error mounting name=systemd hierarchy on %q", systemdControllerPath), err) } } return nil }
func stage1() int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { fmt.Fprintln(os.Stderr, "UUID is missing or malformed") return 1 } root := "." p, err := LoadPod(root, uuid) if err != nil { fmt.Fprintf(os.Stderr, "Failed to load pod: %v\n", err) return 1 } // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed when invoking // network plugins lfd, err := common.GetRktLockFD() if err != nil { fmt.Fprintf(os.Stderr, "Failed to get rkt lock fd: %v\n", err) return 1 } if err := sys.CloseOnExec(lfd, true); err != nil { fmt.Fprintf(os.Stderr, "Failed to set FD_CLOEXEC on rkt lock: %v\n", err) return 1 } mirrorLocalZoneInfo(p.Root) flavor, _, err := p.getFlavor() if err != nil { fmt.Fprintf(os.Stderr, "Failed to get stage1 flavor: %v\n", err) return 3 } var n *networking.Networking if privNet.Any() { fps, err := forwardedPorts(p) if err != nil { fmt.Fprintln(os.Stderr, err.Error()) return 6 } n, err = networking.Setup(root, p.UUID, fps, privNet, localConfig, flavor) if err != nil { fmt.Fprintf(os.Stderr, "Failed to setup network: %v\n", err) return 6 } if err = n.Save(); err != nil { fmt.Fprintf(os.Stderr, "Failed to save networking state %v\n", err) n.Teardown(flavor) return 6 } if len(mdsToken) > 0 { hostIP, err := n.GetDefaultHostIP() if err != nil { fmt.Fprintf(os.Stderr, "Failed to get default Host IP: %v\n", err) return 6 } p.MetadataServiceURL = common.MetadataServicePublicURL(hostIP, mdsToken) } } else { if flavor == "kvm" { fmt.Fprintf(os.Stderr, "Flavor kvm requires private network configuration (try --private-net).\n") return 6 } if len(mdsToken) > 0 { p.MetadataServiceURL = common.MetadataServicePublicURL(localhostIP, mdsToken) } } if err = p.WritePrepareAppTemplate(); err != nil { fmt.Fprintf(os.Stderr, "Failed to write prepare-app service template: %v\n", err) return 2 } if err = p.PodToSystemd(interactive, flavor); err != nil { fmt.Fprintf(os.Stderr, "Failed to configure systemd: %v\n", err) return 2 } args, env, err := getArgsEnv(p, flavor, debug, n) if err != nil { fmt.Fprintf(os.Stderr, "%v\n", err) return 3 } // create a separate mount namespace so the cgroup filesystems // are unmounted when exiting the pod if err := syscall.Unshare(syscall.CLONE_NEWNS); err != nil { log.Fatalf("error unsharing: %v", err) } // we recursively make / a "shared and slave" so mount events from the // new namespace don't propagate to the host namespace but mount events // from the host propagate to the new namespace and are forwarded to // its peer group // See https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SLAVE, ""); err != nil { log.Fatalf("error making / a slave mount: %v", err) } if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SHARED, ""); err != nil { log.Fatalf("error making / a shared and slave mount: %v", err) } var serviceNames []string for _, app := range p.Manifest.Apps { serviceNames = append(serviceNames, ServiceUnitName(app.Name)) } s1Root := common.Stage1RootfsPath(p.Root) machineID := p.GetMachineID() subcgroup, err := getContainerSubCgroup(machineID) if err == nil { if err := cgroup.CreateCgroups(s1Root, subcgroup, serviceNames); err != nil { fmt.Fprintf(os.Stderr, "Error creating cgroups: %v\n", err) return 5 } } else { fmt.Fprintf(os.Stderr, "Continuing with per-app isolators disabled: %v\n", err) } if err = writePpid(os.Getpid()); err != nil { fmt.Fprintln(os.Stderr, err.Error()) return 4 } err = withClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) }) if err != nil { fmt.Fprintf(os.Stderr, "Failed to execute %q: %v\n", args[0], err) return 7 } return 0 }
func stage1() int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { fmt.Fprintln(os.Stderr, "UUID is missing or malformed") return 1 } root := "." p, err := LoadPod(root, uuid) if err != nil { fmt.Fprintf(os.Stderr, "Failed to load pod: %v\n", err) return 1 } // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed when invoking // network plugins lfd, err := common.GetRktLockFD() if err != nil { fmt.Fprintf(os.Stderr, "Failed to get rkt lock fd: %v\n", err) return 1 } if err := sys.CloseOnExec(lfd, true); err != nil { fmt.Fprintf(os.Stderr, "Failed to set FD_CLOEXEC on rkt lock: %v\n", err) return 1 } mirrorLocalZoneInfo(p.Root) if privNet.Any() { fps, err := forwardedPorts(p) if err != nil { fmt.Fprintln(os.Stderr, err.Error()) return 6 } n, err := networking.Setup(root, p.UUID, fps, privNet) if err != nil { fmt.Fprintf(os.Stderr, "Failed to setup network: %v\n", err) return 6 } defer n.Teardown() if err = n.Save(); err != nil { fmt.Fprintf(os.Stderr, "Failed to save networking state %v\n", err) return 6 } hostIP, err := n.GetDefaultHostIP() if err != nil { fmt.Fprintf(os.Stderr, "Failed to get default Host IP: %v\n", err) return 6 } mdsToken, err := generateMDSToken() if err != nil { fmt.Fprintf(os.Stderr, "Failed to generate MDS token: %v", err) return 8 } p.MetadataServiceURL = common.MetadataServicePublicURL(hostIP, mdsToken) if err = registerPod(p, mdsToken); err != nil { fmt.Fprintf(os.Stderr, "Failed to register pod: %v\n", err) return 8 } defer unregisterPod(p) } flavor, systemdStage1Version, err := p.getFlavor() if err != nil { fmt.Fprintf(os.Stderr, "Failed to get stage1 flavor: %v\n", err) return 3 } if err = p.WritePrepareAppTemplate(systemdStage1Version); err != nil { fmt.Fprintf(os.Stderr, "Failed to write prepare-app service template: %v\n", err) return 2 } if err = p.PodToSystemd(interactive); err != nil { fmt.Fprintf(os.Stderr, "Failed to configure systemd: %v\n", err) return 2 } args, env, err := getArgsEnv(p, flavor, systemdStage1Version, debug) if err != nil { fmt.Fprintf(os.Stderr, "%v\n", err) return 3 } appHashes := p.GetAppHashes() s1Root := common.Stage1RootfsPath(p.Root) machineID := p.GetMachineID() subcgroup, err := getContainerSubCgroup(machineID) if err == nil { if err := cgroup.CreateCgroups(s1Root, subcgroup, appHashes); err != nil { fmt.Fprintf(os.Stderr, "Error creating cgroups: %v\n", err) return 5 } } else { fmt.Fprintf(os.Stderr, "Continuing with per-app isolators disabled: %v\n", err) } var execFn func() error if privNet.Any() { cmd := exec.Cmd{ Path: args[0], Args: args, Stdin: os.Stdin, Stdout: os.Stdout, Stderr: os.Stderr, Env: env, } execFn = func() error { err = cmd.Start() if err != nil { return fmt.Errorf("Failed to start nspawn: %v\n", err) } if err = writePpid(cmd.Process.Pid); err != nil { return err } return cmd.Wait() } } else { if err = writePpid(os.Getpid()); err != nil { fmt.Fprintln(os.Stderr, err.Error()) return 4 } execFn = func() error { return syscall.Exec(args[0], args, env) } } err = withClearedCloExec(lfd, execFn) if err != nil { fmt.Fprintf(os.Stderr, "Failed to execute nspawn: %v\n", err) return 7 } return 0 }