func run() int { lfd, err := common.GetRktLockFD() if err != nil { fmt.Fprintf(os.Stderr, "Failed to get rkt lock fd: %v\n", err) return 1 } if err := sys.CloseOnExec(lfd, true); err != nil { fmt.Fprintf(os.Stderr, "Failed to set FD_CLOEXEC on rkt lock: %v\n", err) return 1 } if err := stage1common.WritePpid(os.Getpid()); err != nil { fmt.Fprintf(os.Stderr, "write ppid: %v", err) return 1 } fmt.Println("success, stub stage1 would at this point switch to stage2") return 0 }
func stage1() int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { log.PrintE("UUID is missing or malformed", err) return 1 } root := "." p, err := stage1commontypes.LoadPod(root, uuid) if err != nil { log.PrintE("failed to load pod", err) return 1 } // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed when invoking // network plugins lfd, err := common.GetRktLockFD() if err != nil { log.PrintE("failed to get rkt lock fd", err) return 1 } if err := sys.CloseOnExec(lfd, true); err != nil { log.PrintE("failed to set FD_CLOEXEC on rkt lock", err) return 1 } mirrorLocalZoneInfo(p.Root) flavor, _, err := stage1initcommon.GetFlavor(p) if err != nil { log.PrintE("failed to get stage1 flavor", err) return 3 } var n *networking.Networking if netList.Contained() { fps, err := forwardedPorts(p) if err != nil { log.Error(err) return 6 } n, err = networking.Setup(root, p.UUID, fps, netList, localConfig, flavor, debug) if err != nil { log.PrintE("failed to setup network", err) return 6 } if err = n.Save(); err != nil { log.PrintE("failed to save networking state", err) n.Teardown(flavor, debug) return 6 } if len(mdsToken) > 0 { hostIP, err := n.GetDefaultHostIP() if err != nil { log.PrintE("failed to get default Host IP", err) return 6 } p.MetadataServiceURL = common.MetadataServicePublicURL(hostIP, mdsToken) } } else { if flavor == "kvm" { log.Print("flavor kvm requires private network configuration (try --net)") return 6 } if len(mdsToken) > 0 { p.MetadataServiceURL = common.MetadataServicePublicURL(localhostIP, mdsToken) } } if err = stage1initcommon.WriteDefaultTarget(p); err != nil { log.PrintE("failed to write default.target", err) return 2 } if err = stage1initcommon.WritePrepareAppTemplate(p); err != nil { log.PrintE("failed to write prepare-app service template", err) return 2 } if err := stage1initcommon.SetJournalPermissions(p); err != nil { log.PrintE("warning: error setting journal ACLs, you'll need root to read the pod journal", err) } if flavor == "kvm" { if err := KvmPodToSystemd(p, n); err != nil { log.PrintE("failed to configure systemd for kvm", err) return 2 } } if err = stage1initcommon.PodToSystemd(p, interactive, flavor, privateUsers); err != nil { log.PrintE("failed to configure systemd", err) return 2 } args, env, err := getArgsEnv(p, flavor, debug, n) if err != nil { log.Error(err) return 3 } // create a separate mount namespace so the cgroup filesystems // are unmounted when exiting the pod if err := syscall.Unshare(syscall.CLONE_NEWNS); err != nil { log.FatalE("error unsharing", err) } // we recursively make / a "shared and slave" so mount events from the // new namespace don't propagate to the host namespace but mount events // from the host propagate to the new namespace and are forwarded to // its peer group // See https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SLAVE, ""); err != nil { log.FatalE("error making / a slave mount", err) } if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SHARED, ""); err != nil { log.FatalE("error making / a shared and slave mount", err) } enabledCgroups, err := cgroup.GetEnabledCgroups() if err != nil { log.FatalE("error getting cgroups", err) return 5 } // mount host cgroups in the rkt mount namespace if err := mountHostCgroups(enabledCgroups); err != nil { log.FatalE("couldn't mount the host cgroups", err) return 5 } var serviceNames []string for _, app := range p.Manifest.Apps { serviceNames = append(serviceNames, stage1initcommon.ServiceUnitName(app.Name)) } s1Root := common.Stage1RootfsPath(p.Root) machineID := stage1initcommon.GetMachineID(p) subcgroup, err := getContainerSubCgroup(machineID) if err == nil { if err := mountContainerCgroups(s1Root, enabledCgroups, subcgroup, serviceNames); err != nil { log.PrintE("couldn't mount the container cgroups", err) return 5 } } else { log.PrintE("continuing with per-app isolators disabled", err) } if err = stage1common.WritePpid(os.Getpid()); err != nil { log.Error(err) return 4 } err = stage1common.WithClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) }) if err != nil { log.PrintE(fmt.Sprintf("failed to execute %q", args[0]), err) return 7 } return 0 }
func stage1() int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { log.Print("UUID is missing or malformed\n") return 1 } root := "." p, err := stage1commontypes.LoadPod(root, uuid) if err != nil { log.PrintE("can't load pod", err) return 1 } if len(p.Manifest.Apps) != 1 { log.Printf("flavor %q only supports 1 application per Pod for now", flavor) return 1 } lfd, err := common.GetRktLockFD() if err != nil { log.PrintE("can't get rkt lock fd", err) return 1 } // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed after execution is finished if err := sys.CloseOnExec(lfd, true); err != nil { log.PrintE("can't set FD_CLOEXEC on rkt lock", err) return 1 } env := []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"} for _, e := range p.Manifest.Apps[0].App.Environment { env = append(env, e.Name+"="+e.Value) } args := p.Manifest.Apps[0].App.Exec rfs := filepath.Join(common.AppPath(p.Root, p.Manifest.Apps[0].Name), "rootfs") argFlyMounts, err := evaluateMounts(rfs, string(p.Manifest.Apps[0].Name), p) if err != nil { log.PrintE("can't evaluate mounts", err) return 1 } effectiveMounts := append( []flyMount{ {"", "", "/dev", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/dev", rfs, "/dev", "none", syscall.MS_BIND | syscall.MS_REC}, {"", "", "/proc", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/proc", rfs, "/proc", "none", syscall.MS_BIND | syscall.MS_REC}, {"", "", "/sys", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/sys", rfs, "/sys", "none", syscall.MS_BIND | syscall.MS_REC}, {"tmpfs", rfs, "/tmp", "tmpfs", 0}, }, argFlyMounts..., ) for _, mount := range effectiveMounts { var ( err error hostPathInfo os.FileInfo targetPathInfo os.FileInfo ) if strings.HasPrefix(mount.HostPath, "/") { if hostPathInfo, err = os.Stat(mount.HostPath); err != nil { log.PrintE(fmt.Sprintf("stat of host directory %s", mount.HostPath), err) return 1 } } else { hostPathInfo = nil } absTargetPath := filepath.Join(mount.TargetPrefixPath, mount.RelTargetPath) if targetPathInfo, err = os.Stat(absTargetPath); err != nil && !os.IsNotExist(err) { log.PrintE(fmt.Sprintf("stat of target directory %s", absTargetPath), err) return 1 } switch { case targetPathInfo == nil: absTargetPathParent, _ := filepath.Split(absTargetPath) if err := os.MkdirAll(absTargetPathParent, 0700); err != nil { log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err) return 1 } switch { case hostPathInfo == nil || hostPathInfo.IsDir(): if err := os.Mkdir(absTargetPath, 0700); err != nil { log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err) return 1 } case !hostPathInfo.IsDir(): file, err := os.OpenFile(absTargetPath, os.O_CREATE, 0700) if err != nil { log.PrintE(fmt.Sprintf("can't create file %q", absTargetPath), err) return 1 } file.Close() } case hostPathInfo != nil: switch { case hostPathInfo.IsDir() && !targetPathInfo.IsDir(): log.Printf("can't mount because %q is a directory while %q is not", mount.HostPath, absTargetPath) return 1 case !hostPathInfo.IsDir() && targetPathInfo.IsDir(): log.Printf("can't mount because %q is not a directory while %q is", mount.HostPath, absTargetPath) return 1 } } if err := syscall.Mount(mount.HostPath, absTargetPath, mount.Fs, mount.Flags, ""); err != nil { log.PrintE(fmt.Sprintf("can't mount %q on %q with flags %v", mount.HostPath, absTargetPath, mount.Flags), err) return 1 } } if err = stage1common.WritePpid(os.Getpid()); err != nil { log.Error(err) return 4 } diag.Printf("chroot to %q", rfs) if err := syscall.Chroot(rfs); err != nil { log.PrintE("can't chroot", err) return 1 } if err := os.Chdir("/"); err != nil { log.PrintE("can't change to root new directory", err) return 1 } diag.Printf("execing %q in %q", args, rfs) err = stage1common.WithClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) }) if err != nil { log.PrintE(fmt.Sprintf("can't execute %q", args[0]), err) return 7 } return 0 }