// Run mounts the right overlay filesystems and actually runs the prepared // pod by exec()ing the stage1 init inside the pod filesystem. func Run(cfg RunConfig, dir string) { useOverlay, err := preparedWithOverlay(dir) if err != nil { log.Fatalf("error: %v", err) } log.Printf("Setting up stage1") if err := setupStage1Image(cfg, cfg.Stage1Image, dir, useOverlay); err != nil { log.Fatalf("error setting up stage1: %v", err) } log.Printf("Wrote filesystem to %s\n", dir) for _, app := range cfg.Apps { if err := setupAppImage(cfg, app.Name, app.Image.ID, dir, useOverlay); err != nil { log.Fatalf("error setting up app image: %v", err) } } if err := os.Setenv(common.EnvLockFd, fmt.Sprintf("%v", cfg.LockFd)); err != nil { log.Fatalf("setting lock fd environment: %v", err) } log.Printf("Pivoting to filesystem %s", dir) if err := os.Chdir(dir); err != nil { log.Fatalf("failed changing to dir: %v", err) } ep, err := getStage1Entrypoint(dir, runEntrypoint) if err != nil { log.Fatalf("error determining init entrypoint: %v", err) } args := []string{filepath.Join(common.Stage1RootfsPath(dir), ep)} log.Printf("Execing %s", ep) if cfg.Debug { args = append(args, "--debug") } if cfg.PrivateNet.Any() { args = append(args, "--private-net="+cfg.PrivateNet.String()) } if cfg.Interactive { args = append(args, "--interactive") } if cfg.MDSRegister { mdsToken, err := registerPod(".", cfg.UUID, cfg.Apps) if err != nil { log.Fatalf("failed to register the pod: %v", err) } args = append(args, "--mds-token="+mdsToken) } if cfg.LocalConfig != "" { args = append(args, "--local-config="+cfg.LocalConfig) } args = append(args, cfg.UUID.String()) // make sure the lock fd stays open across exec if err := sys.CloseOnExec(cfg.LockFd, false); err != nil { log.Fatalf("error clearing FD_CLOEXEC on lock fd") } if err := label.SetProcessLabel(cfg.ProcessLabel); err != nil { log.Fatalf("error setting process SELinux label: %v", err) } if err := syscall.Exec(args[0], args, os.Environ()); err != nil { log.Fatalf("error execing init: %v", err) } }
// Run mounts the right overlay filesystems and actually runs the prepared // pod by exec()ing the stage1 init inside the pod filesystem. func Run(cfg RunConfig, dir string) { useOverlay, err := preparedWithOverlay(dir) if err != nil { log.Fatalf("error: %v", err) } // create a separate mount namespace so the cgroup filesystems and/or // overlay mounts are unmounted when exiting the pod if err := syscall.Unshare(syscall.CLONE_NEWNS); err != nil { log.Fatalf("error unsharing: %v", err) } // we recursively make / a "shared and slave" so mount events from the // new namespace don't propagate to the host namespace but mount events // from the host propagate to the new namespace and are forwarded to // its peer group // See https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SLAVE, ""); err != nil { log.Fatalf("error making / a slave mount: %v", err) } if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SHARED, ""); err != nil { log.Fatalf("error making / a shared and slave mount: %v", err) } log.Printf("Setting up stage1") if err := setupStage1Image(cfg, cfg.Stage1Image, dir, useOverlay); err != nil { log.Fatalf("error setting up stage1: %v", err) } log.Printf("Wrote filesystem to %s\n", dir) for _, img := range cfg.Images { if err := setupAppImage(cfg, img, dir, useOverlay); err != nil { log.Fatalf("error setting up app image: %v", err) } } if err := os.Setenv(common.EnvLockFd, fmt.Sprintf("%v", cfg.LockFd)); err != nil { log.Fatalf("setting lock fd environment: %v", err) } log.Printf("Pivoting to filesystem %s", dir) if err := os.Chdir(dir); err != nil { log.Fatalf("failed changing to dir: %v", err) } ep, err := getStage1Entrypoint(dir, runEntrypoint) if err != nil { log.Fatalf("error determining init entrypoint: %v", err) } log.Printf("Execing %s", ep) args := []string{filepath.Join(common.Stage1RootfsPath(dir), ep)} if cfg.Debug { args = append(args, "--debug") } if cfg.PrivateNet.Any() { args = append(args, "--private-net="+cfg.PrivateNet.String()) } if cfg.Interactive { args = append(args, "--interactive") } args = append(args, cfg.UUID.String()) // make sure the lock fd stays open across exec if err := sys.CloseOnExec(cfg.LockFd, false); err != nil { log.Fatalf("error clearing FD_CLOEXEC on lock fd") } if err := label.SetProcessLabel(cfg.ProcessLabel); err != nil { log.Fatalf("error setting process SELinux label: %v", err) } if err := syscall.Exec(args[0], args, os.Environ()); err != nil { log.Fatalf("error execing init: %v", err) } }