func TestRefreshPodIsGone(t *testing.T) { uuid, err := types.NewUUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa") if err != nil { panic(err) } tmpDir, err := ioutil.TempDir("", "") if err != nil { panic(err) } defer os.RemoveAll(tmpDir) podPath := filepath.Join(embryoDir(tmpDir), uuid.String()) os.MkdirAll(podPath, 0777) p, err := getPod(tmpDir, uuid) if err != nil { t.Fatalf("unable to get pod: %v", err) } os.RemoveAll(tmpDir) err = p.refreshState() if err != nil { t.Fatalf("error refreshing state: %v", err) } pstate := podToStates(p) expected := states{isGone: true} if !reflect.DeepEqual(expected, pstate) { t.Errorf("expected %+v == %+v after refrshState", expected, pstate) } }
func handlePodVerify(w http.ResponseWriter, r *http.Request) { defer r.Body.Close() uuid, err := types.NewUUID(r.FormValue("uuid")) if err != nil { w.WriteHeader(http.StatusBadRequest) fmt.Fprintf(w, "uuid field missing or malformed: %v", err) return } content := r.FormValue("content") if content == "" { w.WriteHeader(http.StatusBadRequest) fmt.Fprintf(w, "content field missing") return } sig, err := base64.StdEncoding.DecodeString(r.FormValue("signature")) if err != nil { w.WriteHeader(http.StatusBadRequest) fmt.Fprintf(w, "signature field missing or corrupt: %v", err) return } h := hmac.New(sha512.New, hmacKey[:]) h.Write((*uuid)[:]) h.Write([]byte(content)) if hmac.Equal(sig, h.Sum(nil)) { w.WriteHeader(http.StatusOK) } else { w.WriteHeader(http.StatusForbidden) } }
func main() { flag.Parse() log, diag, _ = rktlog.NewLogSet("stage1 gc", debug) if !debug { diag.SetOutput(ioutil.Discard) } podID, err := types.NewUUID(flag.Arg(0)) if err != nil { log.Fatal("UUID is missing or malformed") } diag.Printf("Removing journal link.") if err := removeJournalLink(podID); err != nil { log.PrintE("error removing journal link", err) } diag.Printf("Cleaning up cgroups.") if err := cleanupV1Cgroups(); err != nil { log.PrintE("error cleaning up cgroups", err) } diag.Printf("Tearing down networks.") if err := gcNetworking(podID); err != nil { log.FatalE("", err) } }
func handleRegisterPod(w http.ResponseWriter, r *http.Request) { defer r.Body.Close() uuid, err := types.NewUUID(mux.Vars(r)["uuid"]) if err != nil { w.WriteHeader(http.StatusBadRequest) fmt.Fprintf(w, "UUID is missing or malformed: %v", err) return } token := queryValue(r.URL, "token") if token == "" { w.WriteHeader(http.StatusBadRequest) fmt.Fprint(w, "token missing") return } pm := &schema.PodManifest{} if err := json.NewDecoder(r.Body).Decode(pm); err != nil { w.WriteHeader(http.StatusBadRequest) fmt.Fprintf(w, "JSON-decoding failed: %v", err) return } pods.addPod(uuid, token, pm) w.WriteHeader(http.StatusOK) }
func (s *v1AlphaAPIServer) InspectPod(ctx context.Context, request *v1alpha.InspectPodRequest) (*v1alpha.InspectPodResponse, error) { uuid, err := types.NewUUID(request.Id) if err != nil { stderr.PrintE(fmt.Sprintf("invalid pod id %q", request.Id), err) return nil, err } p, err := getPod(uuid) if err != nil { stderr.PrintE(fmt.Sprintf("failed to get pod %q", request.Id), err) return nil, err } defer p.Close() pod, _, err := getBasicPod(p) if err != nil { return nil, err } // Fill the extra pod info that is not available in ListPods(detail=false). if err := fillAppInfo(s.store, p, pod); err != nil { return nil, err } return &v1alpha.InspectPodResponse{Pod: pod}, nil }
func handleRegisterApp(w http.ResponseWriter, r *http.Request) { defer r.Body.Close() uuid, err := types.NewUUID(mux.Vars(r)["uuid"]) if err != nil { w.WriteHeader(http.StatusBadRequest) fmt.Fprintf(w, "UUID is missing or malformed: %v", err) return } an := mux.Vars(r)["app"] if an == "" { w.WriteHeader(http.StatusBadRequest) fmt.Fprint(w, "app missing") return } im := &schema.ImageManifest{} if err := json.NewDecoder(r.Body).Decode(im); err != nil { w.WriteHeader(http.StatusBadRequest) fmt.Fprintf(w, "JSON-decoding failed: %v", err) return } err = pods.addApp(uuid, an, im) if err != nil { w.WriteHeader(http.StatusNotFound) fmt.Fprint(w, "Pod with given UUID not found") return } w.WriteHeader(http.StatusOK) }
func TestImagePrepareRmIDRun(t *testing.T) { imageFile := patchTestACI(unreferencedACI, fmt.Sprintf("--name=%s", unreferencedApp)) defer os.Remove(imageFile) ctx := testutils.NewRktRunCtx() defer ctx.Cleanup() cmd := fmt.Sprintf("%s --insecure-options=image fetch %s", ctx.Cmd(), imageFile) t.Logf("Fetching %s", imageFile) spawnAndWaitOrFail(t, cmd, 0) // at this point we know that RKT_INSPECT_IMAGE env var is not empty referencedACI := os.Getenv("RKT_INSPECT_IMAGE") cmds := strings.Fields(ctx.Cmd()) prepareCmd := exec.Command(cmds[0], cmds[1:]...) prepareCmd.Args = append(prepareCmd.Args, "--insecure-options=image", "prepare", referencedACI) output, err := prepareCmd.Output() if err != nil { t.Fatalf("Cannot read the output: %v", err) } podIDStr := strings.TrimSpace(string(output)) podID, err := types.NewUUID(podIDStr) if err != nil { t.Fatalf("%q is not a valid UUID: %v", podIDStr, err) } t.Logf("Retrieving stage1 imageID") stage1ImageID, err := getImageID(ctx, stage1App) if err != nil { t.Fatalf("rkt didn't terminate correctly: %v", err) } t.Logf("Retrieving %s image ID", referencedApp) referencedImageID, err := getImageID(ctx, referencedApp) if err != nil { t.Fatalf("rkt didn't terminate correctly: %v", err) } t.Logf("Retrieving %s image ID", unreferencedApp) unreferencedImageID, err := getImageID(ctx, unreferencedApp) if err != nil { t.Fatalf("rkt didn't terminate correctly: %v", err) } t.Logf("Removing stage1 image (should work)") removeImage(t, ctx, stage1ImageID) t.Logf("Removing image for app %s (should work)", referencedApp) removeImage(t, ctx, referencedImageID) t.Logf("Removing image for app %s (should work)", unreferencedApp) removeImage(t, ctx, unreferencedImageID) cmd = fmt.Sprintf("%s run-prepared --mds-register=false %s", ctx.Cmd(), podID.String()) t.Logf("Running %s", referencedACI) spawnAndWaitOrFail(t, cmd, 0) }
func readUUIDFromFile(path string) (*types.UUID, error) { uuid, err := ioutil.ReadFile(path) if err != nil { return nil, err } uuid = bytes.TrimSpace(uuid) return types.NewUUID(string(uuid)) }
func TestPodStoreAddApp(t *testing.T) { ps, _, _, app := setupPodStoreTest(t) uuid2, err := types.NewUUID("fe305d54-75b4-431b-adb2-eb6b9e546013") if err != nil { panic("bad uuid literal") } im := &schema.ImageManifest{} if err = ps.addApp(uuid2, app, im); err != errPodNotFound { t.Errorf("addApp with unknown pod returned: %v", err) } }
func main() { flag.Parse() log := rktlog.New(os.Stderr, "stage1 gc", debug) podID, err := types.NewUUID(flag.Arg(0)) if err != nil { log.Fatal("UUID is missing or malformed") } if err := gcNetworking(podID); err != nil { log.FatalE("", err) } }
func validatePodMetadata(metadataURL string, pm *schema.PodManifest) results { r := results{} uuid, err := metadataGet(metadataURL, "/pod/uuid") if err != nil { return append(r, err) } _, err = types.NewUUID(string(uuid)) if err != nil { return append(r, fmt.Errorf("malformed UUID returned (%v): %v", string(uuid), err)) } return append(r, validatePodAnnotations(metadataURL, pm)...) }
func runRktAndGetUUID(t *testing.T, rktCmd string) string { child := spawnOrFail(t, rktCmd) defer waitOrFail(t, child, 0) result, out, err := expectRegexWithOutput(child, "\n[0-9a-f-]{36}") if err != nil || len(result) != 1 { t.Fatalf("Error: %v\nOutput: %v", err, out) } podIDStr := strings.TrimSpace(result[0]) podID, err := types.NewUUID(podIDStr) if err != nil { t.Fatalf("%q is not a valid UUID: %v", podIDStr, err) } return podID.String() }
func handleUnregisterPod(w http.ResponseWriter, r *http.Request) { defer r.Body.Close() uuid, err := types.NewUUID(mux.Vars(r)["uuid"]) if err != nil { w.WriteHeader(http.StatusBadRequest) fmt.Fprintf(w, "UUID is missing or malformed: %v", err) return } if err := pods.remove(uuid); err != nil { w.WriteHeader(http.StatusNotFound) fmt.Fprint(w, err) return } w.WriteHeader(http.StatusOK) }
// walkPods iterates over the included directories calling function f for every pod found. func walkPods(include includeMask, f func(*pod)) error { if err := initPods(); err != nil { return err } ls, err := listPods(include) if err != nil { return errwrap.Wrap(errors.New("failed to get pods"), err) } sort.Strings(ls) for _, uuid := range ls { u, err := types.NewUUID(uuid) if err != nil { stderr.PrintE(fmt.Sprintf("skipping %q", uuid), err) continue } p, err := getPod(u) if err != nil { stderr.PrintE(fmt.Sprintf("skipping %q", uuid), err) continue } // omit pods found in unrequested states // this is to cover a race between listPods finding the uuids and pod states changing // it's preferable to keep these operations lock-free, for example a `rkt gc` shouldn't block `rkt run`. if p.isEmbryo && include&includeEmbryoDir == 0 || p.isExitedGarbage && include&includeExitedGarbageDir == 0 || p.isGarbage && include&includeGarbageDir == 0 || p.isPrepared && include&includePreparedDir == 0 || ((p.isPreparing || p.isAbortedPrepare) && include&includePrepareDir == 0) || p.isRunning() && include&includeRunDir == 0 { p.Close() continue } f(p) p.Close() } return nil }
func (s *v1AlphaAPIServer) InspectPod(ctx context.Context, request *v1alpha.InspectPodRequest) (*v1alpha.InspectPodResponse, error) { uuid, err := types.NewUUID(request.Id) if err != nil { stderr.PrintE(fmt.Sprintf("invalid pod id %q", request.Id), err) return nil, err } p, err := getPod(uuid) if err != nil { stderr.PrintE(fmt.Sprintf("failed to get pod %q", request.Id), err) return nil, err } defer p.Close() pod := s.getBasicPod(p) fillPodDetails(s.store, p, pod) return &v1alpha.InspectPodResponse{Pod: pod}, nil }
func main() { flag.Parse() podID, err := types.NewUUID(flag.Arg(0)) if err != nil { log.Fatal("UUID is missing or malformed") } if err := removeJournalLink(podID); err != nil { log.PrintE("error removing journal link", err) } if err := cleanupV1Cgroups(); err != nil { log.PrintE("error cleaning up cgroups", err) } if err := gcNetworking(podID); err != nil { log.FatalE("", err) } }
func ProcessArgsAndReturnPodUUID() *types.UUID { flag.Parse() if cliDebugFlag { logs.SetLevel(logs.DEBUG) } if lvlStr := os.Getenv(common.EnvLogLevel); lvlStr != "" { lvl, err := logs.ParseLevel(lvlStr) if err != nil { fmt.Printf("Unknown log level : %s", lvlStr) os.Exit(1) } logs.SetLevel(lvl) } arg := flag.Arg(0) uuid, err := types.NewUUID(arg) if err != nil { logs.WithE(err).WithField("content", arg).Fatal("UUID is missing or malformed") } return uuid }
// resolveUUID attempts to resolve the uuid specified as uuid against all pods present. // An unambiguously matched uuid or nil is returned. func resolveUUID(dataDir, uuid string) (*types.UUID, error) { uuid = strings.ToLower(uuid) m, err := matchUUID(dataDir, uuid) if err != nil { return nil, err } if len(m) == 0 { return nil, fmt.Errorf("no matches found for %q", uuid) } if len(m) > 1 { return nil, fmt.Errorf("ambiguous uuid, %d matches", len(m)) } u, err := types.NewUUID(m[0]) if err != nil { return nil, errwrap.Wrap(errors.New("invalid UUID"), err) } return u, nil }
func setupPodStoreTest(t *testing.T) (*podStore, *types.UUID, string, string) { ps := newPodStore() uuid, err := types.NewUUID("de305d54-75b4-431b-adb2-eb6b9e546013") if err != nil { panic("bad uuid literal") } ip := "1.2.3.4" app := "myapp" pm := &schema.PodManifest{} ps.addPod(uuid, ip, pm) im := &schema.ImageManifest{} err = ps.addApp(uuid, app, im) if err != nil { t.Fatalf("addApp failed with %v", err) } return ps, uuid, ip, app }
// NewPod creates a new pod directory in the "preparing" state, allocating a unique uuid for it in the process. // The returned pod is always left in an exclusively locked state (preparing is locked in the prepared directory) // The pod must be closed using pod.Close() func NewPod(dataDir string) (*Pod, error) { if err := initPods(dataDir); err != nil { return nil, err } p := &Pod{ dataDir: dataDir, createdByMe: true, isEmbryo: true, // starts as an embryo, then ToPreparing locks, renames, and sets isPreparing // rest start false. } var err error p.UUID, err = types.NewUUID(uuid.New()) if err != nil { return nil, errwrap.Wrap(errors.New("error creating UUID"), err) } err = os.Mkdir(p.embryoPath(), 0750) if err != nil { return nil, err } p.FileLock, err = lock.NewLock(p.embryoPath(), lock.Dir) if err != nil { os.Remove(p.embryoPath()) return nil, err } err = p.ToPreparing() if err != nil { return nil, err } // At this point we we have: // /var/lib/rkt/pods/prepare/$uuid << exclusively locked to indicate "preparing" return p, nil }
func main() { flag.Parse() stage1initcommon.InitDebug(debug) log, diag, _ = rktlog.NewLogSet("app-add", debug) if !debug { diag.SetOutput(ioutil.Discard) } uuid, err := types.NewUUID(flagUUID) if err != nil { log.FatalE("UUID is missing or malformed", err) } appName, err := types.NewACName(flagApp) if err != nil { log.FatalE("invalid app name", err) } root := "." p, err := stage1types.LoadPod(root, uuid) if err != nil { log.FatalE("failed to load pod", err) } flavor, _, err := stage1initcommon.GetFlavor(p) if err != nil { log.FatalE("failed to get stage1 flavor", err) } insecureOptions := stage1initcommon.Stage1InsecureOptions{ DisablePaths: disablePaths, DisableCapabilities: disableCapabilities, DisableSeccomp: disableSeccomp, } ra := p.Manifest.Apps.Get(*appName) if ra == nil { log.Fatalf("failed to find app %q", *appName) } binPath, err := stage1initcommon.FindBinPath(p, ra) if err != nil { log.FatalE("failed to find bin path", err) } if ra.App.WorkingDirectory == "" { ra.App.WorkingDirectory = "/" } enterCmd := stage1common.PrepareEnterCmd(false) stage1initcommon.AppAddMounts(p, ra, enterCmd) // when using host cgroups, make the subgroup writable by pod systemd if flavor != "kvm" { err = prepareAppCgroups(p, ra, enterCmd) if err != nil { log.FatalE("error preparing cgroups", err) } } // write service files w := stage1initcommon.NewUnitWriter(p) w.AppUnit(ra, binPath, privateUsers, insecureOptions, unit.NewUnitOption("Unit", "Before", "halt.target"), unit.NewUnitOption("Unit", "Conflicts", "halt.target"), unit.NewUnitOption("Service", "StandardOutput", "journal+console"), unit.NewUnitOption("Service", "StandardError", "journal+console"), ) w.AppReaperUnit(ra.Name, binPath) if err := w.Error(); err != nil { log.FatalE("error generating app units", err) } // stage2 environment is ready at this point, but systemd does not know // about the new application yet args := enterCmd args = append(args, "/usr/bin/systemctl") args = append(args, "daemon-reload") cmd := exec.Cmd{ Path: args[0], Args: args, } if out, err := cmd.CombinedOutput(); err != nil { log.Fatalf("%q failed at daemon-reload:\n%s", appName, out) } os.Exit(0) }
func stage1(rp *stage1commontypes.RuntimePod) int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { log.Print("UUID is missing or malformed\n") return 254 } root := "." p, err := stage1commontypes.LoadPod(root, uuid, rp) if err != nil { log.PrintE("can't load pod", err) return 254 } if err := p.SaveRuntime(); err != nil { log.FatalE("failed to save runtime parameters", err) } // Sanity checks if len(p.Manifest.Apps) != 1 { log.Printf("flavor %q only supports 1 application per Pod for now", flavor) return 254 } ra := p.Manifest.Apps[0] imgName := p.AppNameToImageName(ra.Name) args := ra.App.Exec if len(args) == 0 { log.Printf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) return 254 } lfd, err := common.GetRktLockFD() if err != nil { log.PrintE("can't get rkt lock fd", err) return 254 } // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed after execution is finished if err := sys.CloseOnExec(lfd, true); err != nil { log.PrintE("can't set FD_CLOEXEC on rkt lock", err) return 254 } workDir := "/" if ra.App.WorkingDirectory != "" { workDir = ra.App.WorkingDirectory } env := []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"} for _, e := range ra.App.Environment { env = append(env, e.Name+"="+e.Value) } rfs := filepath.Join(common.AppPath(p.Root, ra.Name), "rootfs") argFlyMounts, err := evaluateMounts(rfs, string(ra.Name), p) if err != nil { log.PrintE("can't evaluate mounts", err) return 254 } effectiveMounts := append( []flyMount{ {"", "", "/dev", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/dev", rfs, "/dev", "none", syscall.MS_BIND | syscall.MS_REC}, {"", "", "/proc", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/proc", rfs, "/proc", "none", syscall.MS_BIND | syscall.MS_REC}, {"", "", "/sys", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/sys", rfs, "/sys", "none", syscall.MS_BIND | syscall.MS_REC}, {"tmpfs", rfs, "/tmp", "tmpfs", 0}, }, argFlyMounts..., ) /* Process DNS config files * * /etc/resolv.conf: four modes * 'host' - bind-mount host's file * 'stage0' - bind-mount the file created by stage0 * 'default' - do nothing (we would respect CNI if fly had networking) * 'none' - do nothing */ switch p.ResolvConfMode { case "host": effectiveMounts = append(effectiveMounts, flyMount{"/etc/resolv.conf", rfs, "/etc/resolv.conf", "none", syscall.MS_BIND | syscall.MS_RDONLY}) case "stage0": if err := copyResolv(p); err != nil { log.PrintE("can't copy /etc/resolv.conf", err) return 254 } } /* * /etc/hosts: three modes: * 'host' - bind-mount hosts's file * 'stage0' - bind mount the file created by stage1 * 'default' - create a stub /etc/hosts if needed */ switch p.EtcHostsMode { case "host": effectiveMounts = append(effectiveMounts, flyMount{"/etc/hosts", rfs, "/etc/hosts", "none", syscall.MS_BIND | syscall.MS_RDONLY}) case "stage0": effectiveMounts = append(effectiveMounts, flyMount{ filepath.Join(common.Stage1RootfsPath(p.Root), "etc", "rkt-hosts"), rfs, "/etc/hosts", "none", syscall.MS_BIND | syscall.MS_RDONLY}) case "default": stage2HostsPath := filepath.Join(common.AppRootfsPath(p.Root, ra.Name), "etc", "hosts") if _, err := os.Stat(stage2HostsPath); err != nil && os.IsNotExist(err) { fallbackHosts := []byte("127.0.0.1 localhost localdomain\n") ioutil.WriteFile(stage2HostsPath, fallbackHosts, 0644) } } for _, mount := range effectiveMounts { diag.Printf("Processing %+v", mount) var ( err error hostPathInfo os.FileInfo targetPathInfo os.FileInfo ) if strings.HasPrefix(mount.HostPath, "/") { if hostPathInfo, err = os.Stat(mount.HostPath); err != nil { log.PrintE(fmt.Sprintf("stat of host path %s", mount.HostPath), err) return 254 } } else { hostPathInfo = nil } absTargetPath := filepath.Join(mount.TargetPrefixPath, mount.RelTargetPath) if targetPathInfo, err = os.Stat(absTargetPath); err != nil && !os.IsNotExist(err) { log.PrintE(fmt.Sprintf("stat of target path %s", absTargetPath), err) return 254 } switch { case (mount.Flags & syscall.MS_REMOUNT) != 0: { diag.Printf("don't attempt to create files for remount of %q", absTargetPath) } case targetPathInfo == nil: absTargetPathParent, _ := filepath.Split(absTargetPath) if err := os.MkdirAll(absTargetPathParent, 0755); err != nil { log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err) return 254 } switch { case hostPathInfo == nil || hostPathInfo.IsDir(): if err := os.Mkdir(absTargetPath, 0755); err != nil { log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err) return 254 } case !hostPathInfo.IsDir(): file, err := os.OpenFile(absTargetPath, os.O_CREATE, 0700) if err != nil { log.PrintE(fmt.Sprintf("can't create file %q", absTargetPath), err) return 254 } file.Close() } case hostPathInfo != nil: switch { case hostPathInfo.IsDir() && !targetPathInfo.IsDir(): log.Printf("can't mount because %q is a directory while %q is not", mount.HostPath, absTargetPath) return 254 case !hostPathInfo.IsDir() && targetPathInfo.IsDir(): log.Printf("can't mount because %q is not a directory while %q is", mount.HostPath, absTargetPath) return 254 } } if err := syscall.Mount(mount.HostPath, absTargetPath, mount.Fs, mount.Flags, ""); err != nil { log.PrintE(fmt.Sprintf("can't mount %q on %q with flags %v", mount.HostPath, absTargetPath, mount.Flags), err) return 254 } } if err = stage1common.WritePid(os.Getpid(), "pid"); err != nil { log.Error(err) return 254 } var uidResolver, gidResolver user.Resolver var uid, gid int uidResolver, err = user.NumericIDs(ra.App.User) if err != nil { uidResolver, err = user.IDsFromStat(rfs, ra.App.User, nil) } if err != nil { // give up log.PrintE(fmt.Sprintf("invalid user %q", ra.App.User), err) return 254 } if uid, _, err = uidResolver.IDs(); err != nil { log.PrintE(fmt.Sprintf("failed to configure user %q", ra.App.User), err) return 254 } gidResolver, err = user.NumericIDs(ra.App.Group) if err != nil { gidResolver, err = user.IDsFromStat(rfs, ra.App.Group, nil) } if err != nil { // give up log.PrintE(fmt.Sprintf("invalid group %q", ra.App.Group), err) return 254 } if _, gid, err = gidResolver.IDs(); err != nil { log.PrintE(fmt.Sprintf("failed to configure group %q", ra.App.Group), err) return 254 } diag.Printf("chroot to %q", rfs) if err := syscall.Chroot(rfs); err != nil { log.PrintE("can't chroot", err) return 254 } if err := os.Chdir(workDir); err != nil { log.PrintE(fmt.Sprintf("can't change to working directory %q", workDir), err) return 254 } // lock the current goroutine to its current OS thread. // This will force the subsequent syscalls to be executed in the same OS thread as Setresuid, and Setresgid, // see https://github.com/golang/go/issues/1435#issuecomment-66054163. runtime.LockOSThread() diag.Printf("setting uid %d gid %d", uid, gid) if err := syscall.Setresgid(gid, gid, gid); err != nil { log.PrintE(fmt.Sprintf("can't set gid %d", gid), err) return 254 } if err := syscall.Setresuid(uid, uid, uid); err != nil { log.PrintE(fmt.Sprintf("can't set uid %d", uid), err) return 254 } diag.Printf("execing %q in %q", args, rfs) err = stage1common.WithClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) }) if err != nil { log.PrintE(fmt.Sprintf("can't execute %q", args[0]), err) return 254 } return 0 }
func (s *v1AlphaAPIServer) constrainedGetLogs(request *v1alpha.GetLogsRequest, server v1alpha.PublicAPI_GetLogsServer) error { uuid, err := types.NewUUID(request.PodId) if err != nil { return err } pod, err := getPod(uuid) if err != nil { return err } defer pod.Close() stage1Path := "stage1/rootfs" if pod.usesOverlay() { stage1TreeStoreID, err := pod.getStage1TreeStoreID() if err != nil { return err } stage1Path = fmt.Sprintf("/overlay/%s/upper/", stage1TreeStoreID) } path := filepath.Join(getDataDir(), "/pods/run/", request.PodId, stage1Path, "/var/log/journal/") if _, err := os.Stat(path); os.IsNotExist(err) { return fmt.Errorf("%s: logging unsupported", uuid.String()) } jconf := sdjournal.JournalReaderConfig{ Path: path, } if request.AppName != "" { jconf.Matches = []sdjournal.Match{ { Field: sdjournal.SD_JOURNAL_FIELD_SYSLOG_IDENTIFIER, Value: request.AppName, }, } } if request.SinceTime != 0 { t := time.Unix(request.SinceTime, 0) jconf.Since = -time.Since(t) } if request.Lines != 0 { jconf.NumFromTail = uint64(request.Lines) } jr, err := sdjournal.NewJournalReader(jconf) if err != nil { return err } defer jr.Close() if request.Follow { return jr.Follow(nil, LogsStreamWriter{server: server}) } data, err := ioutil.ReadAll(jr) if err != nil { return err } return server.Send(&v1alpha.GetLogsResponse{Lines: common.RemoveEmptyLines(string(data))}) }
func TestGetPodAndRefreshState(t *testing.T) { testCases := []struct { paths []dirFn locks []dirFn expected states }{ { paths: []dirFn{embryoDir}, expected: states{isEmbryo: true}, }, { paths: []dirFn{prepareDir}, locks: []dirFn{prepareDir}, expected: states{isPreparing: true}, }, { paths: []dirFn{prepareDir}, expected: states{isAbortedPrepare: true}, }, { paths: []dirFn{runDir}, locks: []dirFn{runDir}, expected: states{}, }, { paths: []dirFn{runDir}, expected: states{isExited: true}, }, { paths: []dirFn{garbageDir}, expected: states{isGarbage: true}, }, { paths: []dirFn{garbageDir}, locks: []dirFn{garbageDir}, expected: states{isGarbage: true, isDeleting: true}, }, } uuid, err := types.NewUUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa") if err != nil { panic(err) } for i, tcase := range testCases { tmpDir, err := ioutil.TempDir("", "") if err != nil { panic(err) } defer os.RemoveAll(tmpDir) for _, pfn := range tcase.paths { podPath := filepath.Join(pfn(tmpDir), uuid.String()) if err := os.MkdirAll(podPath, 0777); err != nil { panic(err) } } for _, lfn := range tcase.locks { podPath := filepath.Join(lfn(tmpDir), uuid.String()) l, err := lock.NewLock(podPath, lock.Dir) if err != nil { t.Fatalf("error taking lock on directory: %v", err) } err = l.ExclusiveLock() if err != nil { t.Fatalf("could not get exclusive lock on directory: %v", err) } defer l.Unlock() } p, err := getPod(tmpDir, uuid) if err != nil { t.Fatalf("%v: unable to get pod: %v", i, err) } pstate := podToStates(p) if !reflect.DeepEqual(tcase.expected, pstate) { t.Errorf("%v: expected %+v == %+v after getPod", i, tcase.expected, pstate) } err = p.refreshState() if err != nil { t.Errorf("error refreshing state: %v", err) continue } pstate = podToStates(p) if !reflect.DeepEqual(tcase.expected, pstate) { t.Errorf("%v: expected %+v == %+v after refrshState", i, tcase.expected, pstate) } } }
func stage1() int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { log.PrintE("UUID is missing or malformed", err) return 1 } root := "." p, err := stage1commontypes.LoadPod(root, uuid) if err != nil { log.PrintE("failed to load pod", err) return 1 } // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed when invoking // network plugins lfd, err := common.GetRktLockFD() if err != nil { log.PrintE("failed to get rkt lock fd", err) return 1 } if err := sys.CloseOnExec(lfd, true); err != nil { log.PrintE("failed to set FD_CLOEXEC on rkt lock", err) return 1 } mirrorLocalZoneInfo(p.Root) flavor, _, err := stage1initcommon.GetFlavor(p) if err != nil { log.PrintE("failed to get stage1 flavor", err) return 3 } var n *networking.Networking if netList.Contained() { fps, err := forwardedPorts(p) if err != nil { log.Error(err) return 6 } n, err = networking.Setup(root, p.UUID, fps, netList, localConfig, flavor, debug) if err != nil { log.PrintE("failed to setup network", err) return 6 } if err = n.Save(); err != nil { log.PrintE("failed to save networking state", err) n.Teardown(flavor, debug) return 6 } if len(mdsToken) > 0 { hostIP, err := n.GetDefaultHostIP() if err != nil { log.PrintE("failed to get default Host IP", err) return 6 } p.MetadataServiceURL = common.MetadataServicePublicURL(hostIP, mdsToken) } } else { if flavor == "kvm" { log.Print("flavor kvm requires private network configuration (try --net)") return 6 } if len(mdsToken) > 0 { p.MetadataServiceURL = common.MetadataServicePublicURL(localhostIP, mdsToken) } } if err = stage1initcommon.WriteDefaultTarget(p); err != nil { log.PrintE("failed to write default.target", err) return 2 } if err = stage1initcommon.WritePrepareAppTemplate(p); err != nil { log.PrintE("failed to write prepare-app service template", err) return 2 } if err := stage1initcommon.SetJournalPermissions(p); err != nil { log.PrintE("warning: error setting journal ACLs, you'll need root to read the pod journal", err) } if flavor == "kvm" { if err := KvmPodToSystemd(p, n); err != nil { log.PrintE("failed to configure systemd for kvm", err) return 2 } } if err = stage1initcommon.PodToSystemd(p, interactive, flavor, privateUsers); err != nil { log.PrintE("failed to configure systemd", err) return 2 } args, env, err := getArgsEnv(p, flavor, debug, n) if err != nil { log.Error(err) return 3 } // create a separate mount namespace so the cgroup filesystems // are unmounted when exiting the pod if err := syscall.Unshare(syscall.CLONE_NEWNS); err != nil { log.FatalE("error unsharing", err) } // we recursively make / a "shared and slave" so mount events from the // new namespace don't propagate to the host namespace but mount events // from the host propagate to the new namespace and are forwarded to // its peer group // See https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SLAVE, ""); err != nil { log.FatalE("error making / a slave mount", err) } if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SHARED, ""); err != nil { log.FatalE("error making / a shared and slave mount", err) } enabledCgroups, err := cgroup.GetEnabledCgroups() if err != nil { log.FatalE("error getting cgroups", err) return 5 } // mount host cgroups in the rkt mount namespace if err := mountHostCgroups(enabledCgroups); err != nil { log.FatalE("couldn't mount the host cgroups", err) return 5 } var serviceNames []string for _, app := range p.Manifest.Apps { serviceNames = append(serviceNames, stage1initcommon.ServiceUnitName(app.Name)) } s1Root := common.Stage1RootfsPath(p.Root) machineID := stage1initcommon.GetMachineID(p) subcgroup, err := getContainerSubCgroup(machineID) if err == nil { if err := mountContainerCgroups(s1Root, enabledCgroups, subcgroup, serviceNames); err != nil { log.PrintE("couldn't mount the container cgroups", err) return 5 } } else { log.PrintE("continuing with per-app isolators disabled", err) } if err = stage1common.WritePpid(os.Getpid()); err != nil { log.Error(err) return 4 } err = stage1common.WithClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) }) if err != nil { log.PrintE(fmt.Sprintf("failed to execute %q", args[0]), err) return 7 } return 0 }
func stage1() int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { log.FatalE("UUID is missing or malformed", err) } root := "." p, err := stage1commontypes.LoadPod(root, uuid) if err != nil { log.FatalE("failed to load pod", err) } // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed when invoking // network plugins lfd, err := common.GetRktLockFD() if err != nil { log.FatalE("failed to get rkt lock fd", err) } if err := sys.CloseOnExec(lfd, true); err != nil { log.FatalE("failed to set FD_CLOEXEC on rkt lock", err) } mirrorLocalZoneInfo(p.Root) flavor, _, err := stage1initcommon.GetFlavor(p) if err != nil { log.FatalE("failed to get stage1 flavor", err) } var n *networking.Networking if netList.Contained() { fps, err := commonnet.ForwardedPorts(p.Manifest) if err != nil { log.FatalE("error initializing forwarding ports", err) } noDNS := dnsConfMode.Pairs["resolv"] != "default" // force ignore CNI DNS results n, err = networking.Setup(root, p.UUID, fps, netList, localConfig, flavor, noDNS, debug) if err != nil { log.FatalE("failed to setup network", err) } if err = n.Save(); err != nil { log.PrintE("failed to save networking state", err) n.Teardown(flavor, debug) return 254 } if len(mdsToken) > 0 { hostIP, err := n.GetForwardableNetHostIP() if err != nil { log.FatalE("failed to get default Host IP", err) } p.MetadataServiceURL = common.MetadataServicePublicURL(hostIP, mdsToken) } } else { if flavor == "kvm" { log.Fatal("flavor kvm requires private network configuration (try --net)") } if len(mdsToken) > 0 { p.MetadataServiceURL = common.MetadataServicePublicURL(localhostIP, mdsToken) } } insecureOptions := stage1initcommon.Stage1InsecureOptions{ DisablePaths: disablePaths, DisableCapabilities: disableCapabilities, DisableSeccomp: disableSeccomp, } mnt := fs.NewLoggingMounter( fs.MounterFunc(syscall.Mount), fs.UnmounterFunc(syscall.Unmount), diag.Printf, ) if dnsConfMode.Pairs["resolv"] == "host" { stage1initcommon.UseHostResolv(mnt, root) } if dnsConfMode.Pairs["hosts"] == "host" { stage1initcommon.UseHostHosts(mnt, root) } if mutable { if err = stage1initcommon.MutableEnv(p); err != nil { log.FatalE("cannot initialize mutable environment", err) } } else { if err = stage1initcommon.ImmutableEnv(p, interactive, privateUsers, insecureOptions); err != nil { log.FatalE("cannot initialize immutable environment", err) } } if err := stage1initcommon.SetJournalPermissions(p); err != nil { log.PrintE("warning: error setting journal ACLs, you'll need root to read the pod journal", err) } if flavor == "kvm" { kvm.InitDebug(debug) if err := KvmNetworkingToSystemd(p, n); err != nil { log.FatalE("failed to configure systemd for kvm", err) } } canMachinedRegister := false if flavor != "kvm" { // kvm doesn't register with systemd right now, see #2664. canMachinedRegister = machinedRegister() } diag.Printf("canMachinedRegister %t", canMachinedRegister) args, env, err := getArgsEnv(p, flavor, canMachinedRegister, debug, n, insecureOptions) if err != nil { log.FatalE("cannot get environment", err) } diag.Printf("args %q", args) diag.Printf("env %q", env) // create a separate mount namespace so the cgroup filesystems // are unmounted when exiting the pod if err := syscall.Unshare(syscall.CLONE_NEWNS); err != nil { log.FatalE("error unsharing", err) } // we recursively make / a "shared and slave" so mount events from the // new namespace don't propagate to the host namespace but mount events // from the host propagate to the new namespace and are forwarded to // its peer group // See https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt if err := mnt.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SLAVE, ""); err != nil { log.FatalE("error making / a slave mount", err) } if err := mnt.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SHARED, ""); err != nil { log.FatalE("error making / a shared and slave mount", err) } unifiedCgroup, err := cgroup.IsCgroupUnified("/") if err != nil { log.FatalE("error determining cgroup version", err) } diag.Printf("unifiedCgroup %t", unifiedCgroup) s1Root := common.Stage1RootfsPath(p.Root) machineID := stage1initcommon.GetMachineID(p) subcgroup, err := getContainerSubCgroup(machineID, canMachinedRegister, unifiedCgroup) if err != nil { log.FatalE("error getting container subcgroup", err) } diag.Printf("subcgroup %q", subcgroup) if err := ioutil.WriteFile(filepath.Join(p.Root, "subcgroup"), []byte(fmt.Sprintf("%s", subcgroup)), 0644); err != nil { log.FatalE("cannot write subcgroup file", err) } if !unifiedCgroup { enabledCgroups, err := v1.GetEnabledCgroups() if err != nil { log.FatalE("error getting v1 cgroups", err) } diag.Printf("enabledCgroups %q", enabledCgroups) if err := mountHostV1Cgroups(mnt, enabledCgroups); err != nil { log.FatalE("couldn't mount the host v1 cgroups", err) } if !canMachinedRegister { if err := v1.JoinSubcgroup("systemd", subcgroup); err != nil { log.FatalE(fmt.Sprintf("error joining subcgroup %q", subcgroup), err) } } var serviceNames []string for _, app := range p.Manifest.Apps { serviceNames = append(serviceNames, stage1initcommon.ServiceUnitName(app.Name)) } diag.Printf("serviceNames %q", serviceNames) if err := mountContainerV1Cgroups(mnt, s1Root, enabledCgroups, subcgroup, serviceNames, insecureOptions); err != nil { log.FatalE("couldn't mount the container v1 cgroups", err) } } // KVM flavor has a bit different logic in handling pid vs ppid, for details look into #2389 // it doesn't require the existence of a "ppid", instead it registers the current pid (which // will be reused by lkvm binary) as a pod process pid used during entering pid_filename := "ppid" if flavor == "kvm" { pid_filename = "pid" } if err = stage1common.WritePid(os.Getpid(), pid_filename); err != nil { log.FatalE("error writing pid", err) } if flavor == "kvm" { if err := KvmPrepareMounts(s1Root, p); err != nil { log.FatalE("error preparing mounts", err) } } err = stage1common.WithClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) }) if err != nil { log.FatalE(fmt.Sprintf("failed to execute %q", args[0]), err) } return 0 }
func stage1() int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { log.Print("UUID is missing or malformed\n") return 1 } root := "." p, err := stage1commontypes.LoadPod(root, uuid) if err != nil { log.PrintE("can't load pod", err) return 1 } // Sanity checks if len(p.Manifest.Apps) != 1 { log.Printf("flavor %q only supports 1 application per Pod for now", flavor) return 1 } ra := p.Manifest.Apps[0] imgName := p.AppNameToImageName(ra.Name) args := ra.App.Exec if len(args) == 0 { log.Printf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) return 1 } lfd, err := common.GetRktLockFD() if err != nil { log.PrintE("can't get rkt lock fd", err) return 1 } // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed after execution is finished if err := sys.CloseOnExec(lfd, true); err != nil { log.PrintE("can't set FD_CLOEXEC on rkt lock", err) return 1 } workDir := "/" if ra.App.WorkingDirectory != "" { workDir = ra.App.WorkingDirectory } env := []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"} for _, e := range ra.App.Environment { env = append(env, e.Name+"="+e.Value) } rfs := filepath.Join(common.AppPath(p.Root, ra.Name), "rootfs") if err := copyResolv(p); err != nil { log.PrintE("can't copy /etc/resolv.conf", err) return 1 } argFlyMounts, err := evaluateMounts(rfs, string(ra.Name), p) if err != nil { log.PrintE("can't evaluate mounts", err) return 1 } effectiveMounts := append( []flyMount{ {"", "", "/dev", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/dev", rfs, "/dev", "none", syscall.MS_BIND | syscall.MS_REC}, {"", "", "/proc", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/proc", rfs, "/proc", "none", syscall.MS_BIND | syscall.MS_REC}, {"", "", "/sys", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/sys", rfs, "/sys", "none", syscall.MS_BIND | syscall.MS_REC}, {"tmpfs", rfs, "/tmp", "tmpfs", 0}, }, argFlyMounts..., ) for _, mount := range effectiveMounts { var ( err error hostPathInfo os.FileInfo targetPathInfo os.FileInfo ) if strings.HasPrefix(mount.HostPath, "/") { if hostPathInfo, err = os.Stat(mount.HostPath); err != nil { log.PrintE(fmt.Sprintf("stat of host path %s", mount.HostPath), err) return 1 } } else { hostPathInfo = nil } absTargetPath := filepath.Join(mount.TargetPrefixPath, mount.RelTargetPath) if targetPathInfo, err = os.Stat(absTargetPath); err != nil && !os.IsNotExist(err) { log.PrintE(fmt.Sprintf("stat of target path %s", absTargetPath), err) return 1 } switch { case targetPathInfo == nil: absTargetPathParent, _ := filepath.Split(absTargetPath) if err := os.MkdirAll(absTargetPathParent, 0755); err != nil { log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err) return 1 } switch { case hostPathInfo == nil || hostPathInfo.IsDir(): if err := os.Mkdir(absTargetPath, 0755); err != nil { log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err) return 1 } case !hostPathInfo.IsDir(): file, err := os.OpenFile(absTargetPath, os.O_CREATE, 0700) if err != nil { log.PrintE(fmt.Sprintf("can't create file %q", absTargetPath), err) return 1 } file.Close() } case hostPathInfo != nil: switch { case hostPathInfo.IsDir() && !targetPathInfo.IsDir(): log.Printf("can't mount because %q is a directory while %q is not", mount.HostPath, absTargetPath) return 1 case !hostPathInfo.IsDir() && targetPathInfo.IsDir(): log.Printf("can't mount because %q is not a directory while %q is", mount.HostPath, absTargetPath) return 1 } } if err := syscall.Mount(mount.HostPath, absTargetPath, mount.Fs, mount.Flags, ""); err != nil { log.PrintE(fmt.Sprintf("can't mount %q on %q with flags %v", mount.HostPath, absTargetPath, mount.Flags), err) return 1 } } if err = stage1common.WritePid(os.Getpid(), "pid"); err != nil { log.Error(err) return 1 } var uidResolver, gidResolver user.Resolver var uid, gid int uidResolver, err = user.NumericIDs(ra.App.User) if err != nil { uidResolver, err = user.IDsFromStat(rfs, ra.App.User, nil) } if err != nil { // give up log.PrintE(fmt.Sprintf("invalid user %q", ra.App.User), err) return 1 } if uid, _, err = uidResolver.IDs(); err != nil { log.PrintE(fmt.Sprintf("failed to configure user %q", ra.App.User), err) return 1 } gidResolver, err = user.NumericIDs(ra.App.Group) if err != nil { gidResolver, err = user.IDsFromStat(rfs, ra.App.Group, nil) } if err != nil { // give up log.PrintE(fmt.Sprintf("invalid group %q", ra.App.Group), err) return 1 } if _, gid, err = gidResolver.IDs(); err != nil { log.PrintE(fmt.Sprintf("failed to configure group %q", ra.App.Group), err) return 1 } diag.Printf("chroot to %q", rfs) if err := syscall.Chroot(rfs); err != nil { log.PrintE("can't chroot", err) return 1 } if err := os.Chdir(workDir); err != nil { log.PrintE(fmt.Sprintf("can't change to working directory %q", workDir), err) return 1 } // lock the current goroutine to its current OS thread. // This will force the subsequent syscalls to be executed in the same OS thread as Setresuid, and Setresgid, // see https://github.com/golang/go/issues/1435#issuecomment-66054163. runtime.LockOSThread() diag.Printf("setting uid %d gid %d", uid, gid) if err := syscall.Setresgid(gid, gid, gid); err != nil { log.PrintE(fmt.Sprintf("can't set gid %d", gid), err) return 1 } if err := syscall.Setresuid(uid, uid, uid); err != nil { log.PrintE(fmt.Sprintf("can't set uid %d", uid), err) return 1 } diag.Printf("execing %q in %q", args, rfs) err = stage1common.WithClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) }) if err != nil { log.PrintE(fmt.Sprintf("can't execute %q", args[0]), err) return 1 } return 0 }
// TODO use named flags instead of positional func main() { flag.Parse() stage1initcommon.InitDebug(debug) log, diag, _ = rktlog.NewLogSet("stage1", debug) if !debug { diag.SetOutput(ioutil.Discard) } uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { log.PrintE("UUID is missing or malformed", err) os.Exit(254) } appName, err := types.NewACName(flag.Arg(1)) if err != nil { log.PrintE("invalid app name", err) os.Exit(254) } enterEP := flag.Arg(2) root := "." p, err := stage1types.LoadPod(root, uuid) if err != nil { log.PrintE("failed to load pod", err) os.Exit(254) } insecureOptions := stage1initcommon.Stage1InsecureOptions{ DisablePaths: disablePaths, DisableCapabilities: disableCapabilities, DisableSeccomp: disableSeccomp, } ra := p.Manifest.Apps.Get(*appName) if ra == nil { log.Printf("failed to get app") os.Exit(254) } if ra.App.WorkingDirectory == "" { ra.App.WorkingDirectory = "/" } binPath, err := stage1initcommon.FindBinPath(p, ra) if err != nil { log.PrintE("failed to find bin path", err) os.Exit(254) } w := stage1initcommon.NewUnitWriter(p) w.AppUnit(ra, binPath, privateUsers, insecureOptions, unit.NewUnitOption("Unit", "Before", "halt.target"), unit.NewUnitOption("Unit", "Conflicts", "halt.target"), unit.NewUnitOption("Service", "StandardOutput", "journal+console"), unit.NewUnitOption("Service", "StandardError", "journal+console"), ) w.AppReaperUnit(ra.Name, binPath) if err := w.Error(); err != nil { log.PrintE("error generating app units", err) os.Exit(254) } args := []string{enterEP} args = append(args, fmt.Sprintf("--pid=%s", flag.Arg(3))) args = append(args, "/usr/bin/systemctl") args = append(args, "daemon-reload") cmd := exec.Cmd{ Path: args[0], Args: args, } if err := cmd.Run(); err != nil { log.PrintE("error executing daemon-reload", err) os.Exit(254) } args = []string{enterEP} args = append(args, fmt.Sprintf("--pid=%s", flag.Arg(3))) args = append(args, "/usr/bin/systemctl") args = append(args, "start") args = append(args, appName.String()) cmd = exec.Cmd{ Path: args[0], Args: args, } if err := cmd.Run(); err != nil { log.PrintE(fmt.Sprintf("error starting app %q", appName.String()), err) os.Exit(254) } // TODO unmount all the volumes os.Exit(0) }
// isUUID returns true if the input is a valid rkt UUID, // e.g. "2372bc17-47cb-43fb-8d78-20b31729feda". func isUUID(input string) bool { if _, err := appctypes.NewUUID(input); err != nil { return false } return true }
func stage1() int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { log.Print("UUID is missing or malformed\n") return 1 } root := "." p, err := stage1commontypes.LoadPod(root, uuid) if err != nil { log.PrintE("can't load pod", err) return 1 } if len(p.Manifest.Apps) != 1 { log.Printf("flavor %q only supports 1 application per Pod for now", flavor) return 1 } lfd, err := common.GetRktLockFD() if err != nil { log.PrintE("can't get rkt lock fd", err) return 1 } // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed after execution is finished if err := sys.CloseOnExec(lfd, true); err != nil { log.PrintE("can't set FD_CLOEXEC on rkt lock", err) return 1 } env := []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"} for _, e := range p.Manifest.Apps[0].App.Environment { env = append(env, e.Name+"="+e.Value) } args := p.Manifest.Apps[0].App.Exec rfs := filepath.Join(common.AppPath(p.Root, p.Manifest.Apps[0].Name), "rootfs") argFlyMounts, err := evaluateMounts(rfs, string(p.Manifest.Apps[0].Name), p) if err != nil { log.PrintE("can't evaluate mounts", err) return 1 } effectiveMounts := append( []flyMount{ {"", "", "/dev", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/dev", rfs, "/dev", "none", syscall.MS_BIND | syscall.MS_REC}, {"", "", "/proc", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/proc", rfs, "/proc", "none", syscall.MS_BIND | syscall.MS_REC}, {"", "", "/sys", "none", syscall.MS_REC | syscall.MS_SHARED}, {"/sys", rfs, "/sys", "none", syscall.MS_BIND | syscall.MS_REC}, {"tmpfs", rfs, "/tmp", "tmpfs", 0}, }, argFlyMounts..., ) for _, mount := range effectiveMounts { var ( err error hostPathInfo os.FileInfo targetPathInfo os.FileInfo ) if strings.HasPrefix(mount.HostPath, "/") { if hostPathInfo, err = os.Stat(mount.HostPath); err != nil { log.PrintE(fmt.Sprintf("stat of host directory %s", mount.HostPath), err) return 1 } } else { hostPathInfo = nil } absTargetPath := filepath.Join(mount.TargetPrefixPath, mount.RelTargetPath) if targetPathInfo, err = os.Stat(absTargetPath); err != nil && !os.IsNotExist(err) { log.PrintE(fmt.Sprintf("stat of target directory %s", absTargetPath), err) return 1 } switch { case targetPathInfo == nil: absTargetPathParent, _ := filepath.Split(absTargetPath) if err := os.MkdirAll(absTargetPathParent, 0700); err != nil { log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err) return 1 } switch { case hostPathInfo == nil || hostPathInfo.IsDir(): if err := os.Mkdir(absTargetPath, 0700); err != nil { log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err) return 1 } case !hostPathInfo.IsDir(): file, err := os.OpenFile(absTargetPath, os.O_CREATE, 0700) if err != nil { log.PrintE(fmt.Sprintf("can't create file %q", absTargetPath), err) return 1 } file.Close() } case hostPathInfo != nil: switch { case hostPathInfo.IsDir() && !targetPathInfo.IsDir(): log.Printf("can't mount because %q is a directory while %q is not", mount.HostPath, absTargetPath) return 1 case !hostPathInfo.IsDir() && targetPathInfo.IsDir(): log.Printf("can't mount because %q is not a directory while %q is", mount.HostPath, absTargetPath) return 1 } } if err := syscall.Mount(mount.HostPath, absTargetPath, mount.Fs, mount.Flags, ""); err != nil { log.PrintE(fmt.Sprintf("can't mount %q on %q with flags %v", mount.HostPath, absTargetPath, mount.Flags), err) return 1 } } if err = stage1common.WritePpid(os.Getpid()); err != nil { log.Error(err) return 4 } diag.Printf("chroot to %q", rfs) if err := syscall.Chroot(rfs); err != nil { log.PrintE("can't chroot", err) return 1 } if err := os.Chdir("/"); err != nil { log.PrintE("can't change to root new directory", err) return 1 } diag.Printf("execing %q in %q", args, rfs) err = stage1common.WithClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) }) if err != nil { log.PrintE(fmt.Sprintf("can't execute %q", args[0]), err) return 7 } return 0 }