// validatePodManifest reads the user-specified pod manifest, prepares the app images // and validates the pod manifest. If the pod manifest passes validation, it returns // the manifest as []byte. // TODO(yifan): More validation in the future. func validatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { pmb, err := ioutil.ReadFile(cfg.PodManifest) if err != nil { return nil, errwrap.Wrap(errors.New("error reading pod manifest"), err) } var pm schema.PodManifest if err := json.Unmarshal(pmb, &pm); err != nil { return nil, errwrap.Wrap(errors.New("error unmarshaling pod manifest"), err) } appNames := make(map[types.ACName]struct{}) for _, ra := range pm.Apps { img := ra.Image if img.ID.Empty() { return nil, fmt.Errorf("no image ID for app %q", ra.Name) } am, err := cfg.Store.GetImageManifest(img.ID.String()) if err != nil { return nil, errwrap.Wrap(errors.New("error getting the image manifest from store"), err) } if _, err := prepareAppImage(cfg, ra.Name, img.ID, dir, cfg.UseOverlay); err != nil { return nil, errwrap.Wrap(fmt.Errorf("error preparing image %s", img), err) } if _, ok := appNames[ra.Name]; ok { return nil, fmt.Errorf("multiple apps with same name %s", ra.Name) } appNames[ra.Name] = struct{}{} if ra.App == nil && am.App == nil { return nil, fmt.Errorf("no app section in the pod manifest or the image manifest") } } // Validate forwarded ports if _, err := commonnet.ForwardedPorts(&pm); err != nil { return nil, err } return pmb, nil }
func stage1() int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { log.FatalE("UUID is missing or malformed", err) } root := "." p, err := stage1commontypes.LoadPod(root, uuid) if err != nil { log.FatalE("failed to load pod", err) } // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed when invoking // network plugins lfd, err := common.GetRktLockFD() if err != nil { log.FatalE("failed to get rkt lock fd", err) } if err := sys.CloseOnExec(lfd, true); err != nil { log.FatalE("failed to set FD_CLOEXEC on rkt lock", err) } mirrorLocalZoneInfo(p.Root) flavor, _, err := stage1initcommon.GetFlavor(p) if err != nil { log.FatalE("failed to get stage1 flavor", err) } var n *networking.Networking if netList.Contained() { fps, err := commonnet.ForwardedPorts(p.Manifest) if err != nil { log.FatalE("error initializing forwarding ports", err) } noDNS := dnsConfMode.Pairs["resolv"] != "default" // force ignore CNI DNS results n, err = networking.Setup(root, p.UUID, fps, netList, localConfig, flavor, noDNS, debug) if err != nil { log.FatalE("failed to setup network", err) } if err = n.Save(); err != nil { log.PrintE("failed to save networking state", err) n.Teardown(flavor, debug) return 254 } if len(mdsToken) > 0 { hostIP, err := n.GetForwardableNetHostIP() if err != nil { log.FatalE("failed to get default Host IP", err) } p.MetadataServiceURL = common.MetadataServicePublicURL(hostIP, mdsToken) } } else { if flavor == "kvm" { log.Fatal("flavor kvm requires private network configuration (try --net)") } if len(mdsToken) > 0 { p.MetadataServiceURL = common.MetadataServicePublicURL(localhostIP, mdsToken) } } insecureOptions := stage1initcommon.Stage1InsecureOptions{ DisablePaths: disablePaths, DisableCapabilities: disableCapabilities, DisableSeccomp: disableSeccomp, } mnt := fs.NewLoggingMounter( fs.MounterFunc(syscall.Mount), fs.UnmounterFunc(syscall.Unmount), diag.Printf, ) if dnsConfMode.Pairs["resolv"] == "host" { stage1initcommon.UseHostResolv(mnt, root) } if dnsConfMode.Pairs["hosts"] == "host" { stage1initcommon.UseHostHosts(mnt, root) } if mutable { if err = stage1initcommon.MutableEnv(p); err != nil { log.FatalE("cannot initialize mutable environment", err) } } else { if err = stage1initcommon.ImmutableEnv(p, interactive, privateUsers, insecureOptions); err != nil { log.FatalE("cannot initialize immutable environment", err) } } if err := stage1initcommon.SetJournalPermissions(p); err != nil { log.PrintE("warning: error setting journal ACLs, you'll need root to read the pod journal", err) } if flavor == "kvm" { kvm.InitDebug(debug) if err := KvmNetworkingToSystemd(p, n); err != nil { log.FatalE("failed to configure systemd for kvm", err) } } canMachinedRegister := false if flavor != "kvm" { // kvm doesn't register with systemd right now, see #2664. canMachinedRegister = machinedRegister() } diag.Printf("canMachinedRegister %t", canMachinedRegister) args, env, err := getArgsEnv(p, flavor, canMachinedRegister, debug, n, insecureOptions) if err != nil { log.FatalE("cannot get environment", err) } diag.Printf("args %q", args) diag.Printf("env %q", env) // create a separate mount namespace so the cgroup filesystems // are unmounted when exiting the pod if err := syscall.Unshare(syscall.CLONE_NEWNS); err != nil { log.FatalE("error unsharing", err) } // we recursively make / a "shared and slave" so mount events from the // new namespace don't propagate to the host namespace but mount events // from the host propagate to the new namespace and are forwarded to // its peer group // See https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt if err := mnt.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SLAVE, ""); err != nil { log.FatalE("error making / a slave mount", err) } if err := mnt.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SHARED, ""); err != nil { log.FatalE("error making / a shared and slave mount", err) } unifiedCgroup, err := cgroup.IsCgroupUnified("/") if err != nil { log.FatalE("error determining cgroup version", err) } diag.Printf("unifiedCgroup %t", unifiedCgroup) s1Root := common.Stage1RootfsPath(p.Root) machineID := stage1initcommon.GetMachineID(p) subcgroup, err := getContainerSubCgroup(machineID, canMachinedRegister, unifiedCgroup) if err != nil { log.FatalE("error getting container subcgroup", err) } diag.Printf("subcgroup %q", subcgroup) if err := ioutil.WriteFile(filepath.Join(p.Root, "subcgroup"), []byte(fmt.Sprintf("%s", subcgroup)), 0644); err != nil { log.FatalE("cannot write subcgroup file", err) } if !unifiedCgroup { enabledCgroups, err := v1.GetEnabledCgroups() if err != nil { log.FatalE("error getting v1 cgroups", err) } diag.Printf("enabledCgroups %q", enabledCgroups) if err := mountHostV1Cgroups(mnt, enabledCgroups); err != nil { log.FatalE("couldn't mount the host v1 cgroups", err) } if !canMachinedRegister { if err := v1.JoinSubcgroup("systemd", subcgroup); err != nil { log.FatalE(fmt.Sprintf("error joining subcgroup %q", subcgroup), err) } } var serviceNames []string for _, app := range p.Manifest.Apps { serviceNames = append(serviceNames, stage1initcommon.ServiceUnitName(app.Name)) } diag.Printf("serviceNames %q", serviceNames) if err := mountContainerV1Cgroups(mnt, s1Root, enabledCgroups, subcgroup, serviceNames, insecureOptions); err != nil { log.FatalE("couldn't mount the container v1 cgroups", err) } } // KVM flavor has a bit different logic in handling pid vs ppid, for details look into #2389 // it doesn't require the existence of a "ppid", instead it registers the current pid (which // will be reused by lkvm binary) as a pod process pid used during entering pid_filename := "ppid" if flavor == "kvm" { pid_filename = "pid" } if err = stage1common.WritePid(os.Getpid(), pid_filename); err != nil { log.FatalE("error writing pid", err) } if flavor == "kvm" { if err := KvmPrepareMounts(s1Root, p); err != nil { log.FatalE("error preparing mounts", err) } } err = stage1common.WithClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) }) if err != nil { log.FatalE(fmt.Sprintf("failed to execute %q", args[0]), err) } return 0 }
// generatePodManifest creates the pod manifest from the command line input. // It returns the pod manifest as []byte on success. // This is invoked if no pod manifest is specified at the command line. func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { pm := schema.PodManifest{ ACKind: "PodManifest", Apps: make(schema.AppList, 0), } v, err := types.NewSemVer(version.Version) if err != nil { return nil, errwrap.Wrap(errors.New("error creating version"), err) } pm.ACVersion = *v if err := cfg.Apps.Walk(func(app *apps.App) error { img := app.ImageID am, err := cfg.Store.GetImageManifest(img.String()) if err != nil { return errwrap.Wrap(errors.New("error getting the manifest"), err) } var appName *types.ACName if app.Name != "" { appName, err = types.NewACName(app.Name) if err != nil { return errwrap.Wrap(errors.New("invalid app name format"), err) } } else { appName, err = imageNameToAppName(am.Name) if err != nil { return errwrap.Wrap(errors.New("error converting image name to app name"), err) } } if _, err := prepareAppImage(cfg, *appName, img, dir, cfg.UseOverlay); err != nil { return errwrap.Wrap(fmt.Errorf("error preparing image %s", img), err) } if pm.Apps.Get(*appName) != nil { return fmt.Errorf("error: multiple apps with name %s", am.Name) } if am.App == nil && app.Exec == "" { return fmt.Errorf("error: image %s has no app section and --exec argument is not provided", img) } ra := schema.RuntimeApp{ // TODO(vc): leverage RuntimeApp.Name for disambiguating the apps Name: *appName, App: am.App, Image: schema.RuntimeImage{ Name: &am.Name, ID: img, Labels: am.Labels, }, Mounts: MergeMounts(cfg.Apps.Mounts, app.Mounts), ReadOnlyRootFS: app.ReadOnlyRootFS, } if app.Exec != "" { // Create a minimal App section if not present if am.App == nil { ra.App = &types.App{ User: strconv.Itoa(os.Getuid()), Group: strconv.Itoa(os.Getgid()), } } ra.App.Exec = []string{app.Exec} } if app.Args != nil { ra.App.Exec = append(ra.App.Exec, app.Args...) } if app.WorkingDir != "" { ra.App.WorkingDirectory = app.WorkingDir } if err := prepareIsolators(app, ra.App); err != nil { return err } if app.User != "" { ra.App.User = app.User } if app.Group != "" { ra.App.Group = app.Group } if app.SupplementaryGIDs != nil { ra.App.SupplementaryGIDs = app.SupplementaryGIDs } if app.UserAnnotations != nil { ra.App.UserAnnotations = app.UserAnnotations } if app.UserLabels != nil { ra.App.UserLabels = app.UserLabels } // loading the environment from the lowest priority to highest if cfg.InheritEnv { // Inherit environment does not override app image environment mergeEnvs(&ra.App.Environment, os.Environ(), false) } mergeEnvs(&ra.App.Environment, cfg.EnvFromFile, true) mergeEnvs(&ra.App.Environment, cfg.ExplicitEnv, true) if app.Environments != nil { envs := make([]string, 0, len(app.Environments)) for name, value := range app.Environments { envs = append(envs, fmt.Sprintf("%s=%s", name, value)) } mergeEnvs(&ra.App.Environment, envs, true) } pm.Apps = append(pm.Apps, ra) return nil }); err != nil { return nil, err } // TODO(jonboulle): check that app mountpoint expectations are // satisfied here, rather than waiting for stage1 pm.Volumes = cfg.Apps.Volumes // Check to see if ports have any errors pm.Ports = cfg.Ports if _, err := commonnet.ForwardedPorts(&pm); err != nil { return nil, err } pm.Annotations = append(pm.Annotations, types.Annotation{ Name: "coreos.com/rkt/stage1/mutable", Value: strconv.FormatBool(cfg.Mutable), }) pm.UserAnnotations = cfg.UserAnnotations pm.UserLabels = cfg.UserLabels pmb, err := json.Marshal(pm) if err != nil { return nil, errwrap.Wrap(errors.New("error marshalling pod manifest"), err) } return pmb, nil }