示例#1
0
func withClearedCloExec(lfd int, f func() error) error {
	err := sys.CloseOnExec(lfd, false)
	if err != nil {
		return err
	}
	defer sys.CloseOnExec(lfd, true)

	return f()
}
示例#2
0
文件: builder.go 项目: blablacar/dgr
func (b *Builder) Build() error {
	logs.WithF(b.fields).Info("Building aci")

	lfd, err := rktcommon.GetRktLockFD()
	if err != nil {
		return errs.WithEF(err, b.fields, "can't get rkt lock fd")
	}

	if err := sys.CloseOnExec(lfd, true); err != nil {
		return errs.WithEF(err, b.fields, "can't set FD_CLOEXEC on rkt lock")
	}

	if err := b.runBuild(); err != nil {
		return err
	}

	if err := b.writeManifest(); err != nil {
		return err
	}

	if err := b.tarAci(); err != nil {
		return err
	}

	return nil
}
示例#3
0
文件: main.go 项目: sinfomicien/rkt
func run() int {
	lfd, err := common.GetRktLockFD()
	if err != nil {
		fmt.Fprintf(os.Stderr, "Failed to get rkt lock fd: %v\n", err)
		return 1
	}

	if err := sys.CloseOnExec(lfd, true); err != nil {
		fmt.Fprintf(os.Stderr, "Failed to set FD_CLOEXEC on rkt lock: %v\n", err)
		return 1
	}

	if err := stage1common.WritePpid(os.Getpid()); err != nil {
		fmt.Fprintf(os.Stderr, "write ppid: %v", err)
		return 1
	}
	fmt.Println("success, stub stage1 would at this point switch to stage2")
	return 0
}
示例#4
0
func sdListenFDs(unsetEnvironment bool) (int, error) {
	defer func() {
		if unsetEnvironment {
			os.Unsetenv("LISTEN_PID")
			os.Unsetenv("LISTEN_FDS")
		}
	}()

	e := os.Getenv("LISTEN_PID")
	if e == "" {
		return 0, nil
	}

	pid, err := strconv.Atoi(e)
	if err != nil {
		return -1, err
	}

	if os.Getpid() != pid {
		return 0, nil
	}

	e = os.Getenv("LISTEN_FDS")
	if e == "" {
		return 0, nil
	}

	n, err := strconv.Atoi(e)
	if err != nil {
		return -1, err
	}

	for fd := SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START+n; fd++ {
		if err := sys.CloseOnExec(fd, true); err != nil {
			return -1, err
		}
	}

	return n, nil
}
示例#5
0
文件: run.go 项目: krieg/rkt
// Run mounts the right overlay filesystems and actually runs the prepared
// pod by exec()ing the stage1 init inside the pod filesystem.
func Run(cfg RunConfig, dir string) {
	useOverlay, err := preparedWithOverlay(dir)
	if err != nil {
		log.Fatalf("error: %v", err)
	}

	log.Printf("Setting up stage1")
	if err := setupStage1Image(cfg, cfg.Stage1Image, dir, useOverlay); err != nil {
		log.Fatalf("error setting up stage1: %v", err)
	}
	log.Printf("Wrote filesystem to %s\n", dir)

	for _, app := range cfg.Apps {
		if err := setupAppImage(cfg, app.Name, app.Image.ID, dir, useOverlay); err != nil {
			log.Fatalf("error setting up app image: %v", err)
		}
	}

	if err := os.Setenv(common.EnvLockFd, fmt.Sprintf("%v", cfg.LockFd)); err != nil {
		log.Fatalf("setting lock fd environment: %v", err)
	}

	log.Printf("Pivoting to filesystem %s", dir)
	if err := os.Chdir(dir); err != nil {
		log.Fatalf("failed changing to dir: %v", err)
	}

	ep, err := getStage1Entrypoint(dir, runEntrypoint)
	if err != nil {
		log.Fatalf("error determining init entrypoint: %v", err)
	}
	args := []string{filepath.Join(common.Stage1RootfsPath(dir), ep)}
	log.Printf("Execing %s", ep)

	if cfg.Debug {
		args = append(args, "--debug")
	}
	if cfg.PrivateNet.Any() {
		args = append(args, "--private-net="+cfg.PrivateNet.String())
	}
	if cfg.Interactive {
		args = append(args, "--interactive")
	}
	if cfg.MDSRegister {
		mdsToken, err := registerPod(".", cfg.UUID, cfg.Apps)
		if err != nil {
			log.Fatalf("failed to register the pod: %v", err)
		}

		args = append(args, "--mds-token="+mdsToken)
	}

	if cfg.LocalConfig != "" {
		args = append(args, "--local-config="+cfg.LocalConfig)
	}

	args = append(args, cfg.UUID.String())

	// make sure the lock fd stays open across exec
	if err := sys.CloseOnExec(cfg.LockFd, false); err != nil {
		log.Fatalf("error clearing FD_CLOEXEC on lock fd")
	}

	if err := label.SetProcessLabel(cfg.ProcessLabel); err != nil {
		log.Fatalf("error setting process SELinux label: %v", err)
	}

	if err := syscall.Exec(args[0], args, os.Environ()); err != nil {
		log.Fatalf("error execing init: %v", err)
	}
}
示例#6
0
文件: run.go 项目: matomesc/rkt
// Run mounts the right overlay filesystems and actually runs the prepared
// pod by exec()ing the stage1 init inside the pod filesystem.
func Run(cfg RunConfig, dir string, dataDir string) {
	useOverlay, err := preparedWithOverlay(dir)
	if err != nil {
		log.Fatalf("error: %v", err)
	}

	privateUsers, err := preparedWithPrivateUsers(dir)
	if err != nil {
		log.Fatalf("error: %v", err)
	}

	debug("Setting up stage1")
	if err := setupStage1Image(cfg, dir, useOverlay); err != nil {
		log.Fatalf("error setting up stage1: %v", err)
	}
	debug("Wrote filesystem to %s\n", dir)

	for _, app := range cfg.Apps {
		if err := setupAppImage(cfg, app.Name, app.Image.ID, dir, useOverlay); err != nil {
			log.Fatalf("error setting up app image: %v", err)
		}
	}

	destRootfs := common.Stage1RootfsPath(dir)
	flavor, err := os.Readlink(filepath.Join(destRootfs, "flavor"))
	if err != nil {
		log.Printf("error reading flavor: %v\n", err)
	}
	if flavor == "kvm" {
		err := kvmCheckSSHSetup(destRootfs, dataDir)
		if err != nil {
			log.Fatalf("error setting up ssh keys: %v", err)
		}
	}

	if err := os.Setenv(common.EnvLockFd, fmt.Sprintf("%v", cfg.LockFd)); err != nil {
		log.Fatalf("setting lock fd environment: %v", err)
	}

	if err := os.Setenv(common.EnvSELinuxContext, fmt.Sprintf("%v", cfg.ProcessLabel)); err != nil {
		log.Fatalf("setting SELinux context environment: %v", err)
	}

	debug("Pivoting to filesystem %s", dir)
	if err := os.Chdir(dir); err != nil {
		log.Fatalf("failed changing to dir: %v", err)
	}

	ep, err := getStage1Entrypoint(dir, runEntrypoint)
	if err != nil {
		log.Fatalf("error determining 'run' entrypoint: %v", err)
	}
	args := []string{filepath.Join(destRootfs, ep)}
	debug("Execing %s", ep)

	if cfg.Debug {
		args = append(args, "--debug")
	}

	args = append(args, "--net="+cfg.Net.String())

	if cfg.Interactive {
		args = append(args, "--interactive")
	}
	if len(privateUsers) > 0 {
		args = append(args, "--private-users="+privateUsers)
	}
	if cfg.MDSRegister {
		mdsToken, err := registerPod(".", cfg.UUID, cfg.Apps)
		if err != nil {
			log.Fatalf("failed to register the pod: %v", err)
		}

		args = append(args, "--mds-token="+mdsToken)
	}

	if cfg.LocalConfig != "" {
		args = append(args, "--local-config="+cfg.LocalConfig)
	}

	args = append(args, cfg.UUID.String())

	// make sure the lock fd stays open across exec
	if err := sys.CloseOnExec(cfg.LockFd, false); err != nil {
		log.Fatalf("error clearing FD_CLOEXEC on lock fd")
	}

	tpmEvent := fmt.Sprintf("rkt: Rootfs: %s Manifest: %s Stage 1 args: %s", cfg.CommonConfig.RootHash, cfg.CommonConfig.ManifestData, strings.Join(args, " "))
	// If there's no TPM available or there's a failure for some other
	// reason, ignore it and continue anyway. Long term we'll want policy
	// that enforces TPM behaviour, but we don't have any infrastructure
	// around that yet.
	_ = tpm.Extend(tpmEvent)
	if err := syscall.Exec(args[0], args, os.Environ()); err != nil {
		log.Fatalf("error execing init: %v", err)
	}
}
示例#7
0
文件: init.go 项目: joshix/rkt
func stage1() int {
	uuid, err := types.NewUUID(flag.Arg(0))
	if err != nil {
		log.FatalE("UUID is missing or malformed", err)
	}

	root := "."
	p, err := stage1commontypes.LoadPod(root, uuid)
	if err != nil {
		log.FatalE("failed to load pod", err)
	}

	// set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed when invoking
	// network plugins
	lfd, err := common.GetRktLockFD()
	if err != nil {
		log.FatalE("failed to get rkt lock fd", err)
	}

	if err := sys.CloseOnExec(lfd, true); err != nil {
		log.FatalE("failed to set FD_CLOEXEC on rkt lock", err)
	}

	mirrorLocalZoneInfo(p.Root)

	flavor, _, err := stage1initcommon.GetFlavor(p)
	if err != nil {
		log.FatalE("failed to get stage1 flavor", err)
	}

	var n *networking.Networking
	if netList.Contained() {
		fps, err := commonnet.ForwardedPorts(p.Manifest)
		if err != nil {
			log.FatalE("error initializing forwarding ports", err)
		}

		noDNS := dnsConfMode.Pairs["resolv"] != "default" // force ignore CNI DNS results
		n, err = networking.Setup(root, p.UUID, fps, netList, localConfig, flavor, noDNS, debug)
		if err != nil {
			log.FatalE("failed to setup network", err)
		}

		if err = n.Save(); err != nil {
			log.PrintE("failed to save networking state", err)
			n.Teardown(flavor, debug)
			return 254
		}

		if len(mdsToken) > 0 {
			hostIP, err := n.GetForwardableNetHostIP()
			if err != nil {
				log.FatalE("failed to get default Host IP", err)
			}

			p.MetadataServiceURL = common.MetadataServicePublicURL(hostIP, mdsToken)
		}
	} else {
		if flavor == "kvm" {
			log.Fatal("flavor kvm requires private network configuration (try --net)")
		}
		if len(mdsToken) > 0 {
			p.MetadataServiceURL = common.MetadataServicePublicURL(localhostIP, mdsToken)
		}
	}

	insecureOptions := stage1initcommon.Stage1InsecureOptions{
		DisablePaths:        disablePaths,
		DisableCapabilities: disableCapabilities,
		DisableSeccomp:      disableSeccomp,
	}

	mnt := fs.NewLoggingMounter(
		fs.MounterFunc(syscall.Mount),
		fs.UnmounterFunc(syscall.Unmount),
		diag.Printf,
	)

	if dnsConfMode.Pairs["resolv"] == "host" {
		stage1initcommon.UseHostResolv(mnt, root)
	}

	if dnsConfMode.Pairs["hosts"] == "host" {
		stage1initcommon.UseHostHosts(mnt, root)
	}

	if mutable {
		if err = stage1initcommon.MutableEnv(p); err != nil {
			log.FatalE("cannot initialize mutable environment", err)
		}
	} else {
		if err = stage1initcommon.ImmutableEnv(p, interactive, privateUsers, insecureOptions); err != nil {
			log.FatalE("cannot initialize immutable environment", err)
		}
	}

	if err := stage1initcommon.SetJournalPermissions(p); err != nil {
		log.PrintE("warning: error setting journal ACLs, you'll need root to read the pod journal", err)
	}

	if flavor == "kvm" {
		kvm.InitDebug(debug)
		if err := KvmNetworkingToSystemd(p, n); err != nil {
			log.FatalE("failed to configure systemd for kvm", err)
		}
	}

	canMachinedRegister := false
	if flavor != "kvm" {
		// kvm doesn't register with systemd right now, see #2664.
		canMachinedRegister = machinedRegister()
	}
	diag.Printf("canMachinedRegister %t", canMachinedRegister)

	args, env, err := getArgsEnv(p, flavor, canMachinedRegister, debug, n, insecureOptions)
	if err != nil {
		log.FatalE("cannot get environment", err)
	}
	diag.Printf("args %q", args)
	diag.Printf("env %q", env)

	// create a separate mount namespace so the cgroup filesystems
	// are unmounted when exiting the pod
	if err := syscall.Unshare(syscall.CLONE_NEWNS); err != nil {
		log.FatalE("error unsharing", err)
	}

	// we recursively make / a "shared and slave" so mount events from the
	// new namespace don't propagate to the host namespace but mount events
	// from the host propagate to the new namespace and are forwarded to
	// its peer group
	// See https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt
	if err := mnt.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SLAVE, ""); err != nil {
		log.FatalE("error making / a slave mount", err)
	}
	if err := mnt.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SHARED, ""); err != nil {
		log.FatalE("error making / a shared and slave mount", err)
	}

	unifiedCgroup, err := cgroup.IsCgroupUnified("/")
	if err != nil {
		log.FatalE("error determining cgroup version", err)
	}
	diag.Printf("unifiedCgroup %t", unifiedCgroup)

	s1Root := common.Stage1RootfsPath(p.Root)
	machineID := stage1initcommon.GetMachineID(p)

	subcgroup, err := getContainerSubCgroup(machineID, canMachinedRegister, unifiedCgroup)
	if err != nil {
		log.FatalE("error getting container subcgroup", err)
	}
	diag.Printf("subcgroup %q", subcgroup)

	if err := ioutil.WriteFile(filepath.Join(p.Root, "subcgroup"),
		[]byte(fmt.Sprintf("%s", subcgroup)), 0644); err != nil {
		log.FatalE("cannot write subcgroup file", err)
	}

	if !unifiedCgroup {
		enabledCgroups, err := v1.GetEnabledCgroups()
		if err != nil {
			log.FatalE("error getting v1 cgroups", err)
		}
		diag.Printf("enabledCgroups %q", enabledCgroups)

		if err := mountHostV1Cgroups(mnt, enabledCgroups); err != nil {
			log.FatalE("couldn't mount the host v1 cgroups", err)
		}

		if !canMachinedRegister {
			if err := v1.JoinSubcgroup("systemd", subcgroup); err != nil {
				log.FatalE(fmt.Sprintf("error joining subcgroup %q", subcgroup), err)
			}
		}

		var serviceNames []string
		for _, app := range p.Manifest.Apps {
			serviceNames = append(serviceNames, stage1initcommon.ServiceUnitName(app.Name))
		}
		diag.Printf("serviceNames %q", serviceNames)

		if err := mountContainerV1Cgroups(mnt, s1Root, enabledCgroups, subcgroup, serviceNames, insecureOptions); err != nil {
			log.FatalE("couldn't mount the container v1 cgroups", err)
		}

	}

	// KVM flavor has a bit different logic in handling pid vs ppid, for details look into #2389
	// it doesn't require the existence of a "ppid", instead it registers the current pid (which
	// will be reused by lkvm binary) as a pod process pid used during entering
	pid_filename := "ppid"
	if flavor == "kvm" {
		pid_filename = "pid"
	}

	if err = stage1common.WritePid(os.Getpid(), pid_filename); err != nil {
		log.FatalE("error writing pid", err)
	}

	if flavor == "kvm" {
		if err := KvmPrepareMounts(s1Root, p); err != nil {
			log.FatalE("error preparing mounts", err)
		}
	}

	err = stage1common.WithClearedCloExec(lfd, func() error {
		return syscall.Exec(args[0], args, env)
	})

	if err != nil {
		log.FatalE(fmt.Sprintf("failed to execute %q", args[0]), err)
	}

	return 0
}
示例#8
0
文件: main.go 项目: nak3/rkt
func stage1() int {
	uuid, err := types.NewUUID(flag.Arg(0))
	if err != nil {
		log.Print("UUID is missing or malformed\n")
		return 1
	}

	root := "."
	p, err := stage1commontypes.LoadPod(root, uuid)
	if err != nil {
		log.PrintE("can't load pod", err)
		return 1
	}

	// Sanity checks
	if len(p.Manifest.Apps) != 1 {
		log.Printf("flavor %q only supports 1 application per Pod for now", flavor)
		return 1
	}

	ra := p.Manifest.Apps[0]

	imgName := p.AppNameToImageName(ra.Name)
	args := ra.App.Exec
	if len(args) == 0 {
		log.Printf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName)
		return 1
	}

	lfd, err := common.GetRktLockFD()
	if err != nil {
		log.PrintE("can't get rkt lock fd", err)
		return 1
	}

	// set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed after execution is finished
	if err := sys.CloseOnExec(lfd, true); err != nil {
		log.PrintE("can't set FD_CLOEXEC on rkt lock", err)
		return 1
	}

	workDir := "/"
	if ra.App.WorkingDirectory != "" {
		workDir = ra.App.WorkingDirectory
	}

	env := []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"}
	for _, e := range ra.App.Environment {
		env = append(env, e.Name+"="+e.Value)
	}

	rfs := filepath.Join(common.AppPath(p.Root, ra.Name), "rootfs")

	if err := copyResolv(p); err != nil {
		log.PrintE("can't copy /etc/resolv.conf", err)
		return 1
	}

	argFlyMounts, err := evaluateMounts(rfs, string(ra.Name), p)
	if err != nil {
		log.PrintE("can't evaluate mounts", err)
		return 1
	}

	effectiveMounts := append(
		[]flyMount{
			{"", "", "/dev", "none", syscall.MS_REC | syscall.MS_SHARED},
			{"/dev", rfs, "/dev", "none", syscall.MS_BIND | syscall.MS_REC},

			{"", "", "/proc", "none", syscall.MS_REC | syscall.MS_SHARED},
			{"/proc", rfs, "/proc", "none", syscall.MS_BIND | syscall.MS_REC},

			{"", "", "/sys", "none", syscall.MS_REC | syscall.MS_SHARED},
			{"/sys", rfs, "/sys", "none", syscall.MS_BIND | syscall.MS_REC},

			{"tmpfs", rfs, "/tmp", "tmpfs", 0},
		},
		argFlyMounts...,
	)

	for _, mount := range effectiveMounts {
		var (
			err            error
			hostPathInfo   os.FileInfo
			targetPathInfo os.FileInfo
		)

		if strings.HasPrefix(mount.HostPath, "/") {
			if hostPathInfo, err = os.Stat(mount.HostPath); err != nil {
				log.PrintE(fmt.Sprintf("stat of host path %s", mount.HostPath), err)
				return 1
			}
		} else {
			hostPathInfo = nil
		}

		absTargetPath := filepath.Join(mount.TargetPrefixPath, mount.RelTargetPath)
		if targetPathInfo, err = os.Stat(absTargetPath); err != nil && !os.IsNotExist(err) {
			log.PrintE(fmt.Sprintf("stat of target path %s", absTargetPath), err)
			return 1
		}

		switch {
		case targetPathInfo == nil:
			absTargetPathParent, _ := filepath.Split(absTargetPath)
			if err := os.MkdirAll(absTargetPathParent, 0755); err != nil {
				log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err)
				return 1
			}
			switch {
			case hostPathInfo == nil || hostPathInfo.IsDir():
				if err := os.Mkdir(absTargetPath, 0755); err != nil {
					log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err)
					return 1
				}
			case !hostPathInfo.IsDir():
				file, err := os.OpenFile(absTargetPath, os.O_CREATE, 0700)
				if err != nil {
					log.PrintE(fmt.Sprintf("can't create file %q", absTargetPath), err)
					return 1
				}
				file.Close()
			}
		case hostPathInfo != nil:
			switch {
			case hostPathInfo.IsDir() && !targetPathInfo.IsDir():
				log.Printf("can't mount because %q is a directory while %q is not", mount.HostPath, absTargetPath)
				return 1
			case !hostPathInfo.IsDir() && targetPathInfo.IsDir():
				log.Printf("can't mount because %q is not a directory while %q is", mount.HostPath, absTargetPath)
				return 1
			}
		}

		if err := syscall.Mount(mount.HostPath, absTargetPath, mount.Fs, mount.Flags, ""); err != nil {
			log.PrintE(fmt.Sprintf("can't mount %q on %q with flags %v", mount.HostPath, absTargetPath, mount.Flags), err)
			return 1
		}
	}

	if err = stage1common.WritePid(os.Getpid(), "pid"); err != nil {
		log.Error(err)
		return 1
	}

	var uidResolver, gidResolver user.Resolver
	var uid, gid int

	uidResolver, err = user.NumericIDs(ra.App.User)
	if err != nil {
		uidResolver, err = user.IDsFromStat(rfs, ra.App.User, nil)
	}

	if err != nil { // give up
		log.PrintE(fmt.Sprintf("invalid user %q", ra.App.User), err)
		return 1
	}

	if uid, _, err = uidResolver.IDs(); err != nil {
		log.PrintE(fmt.Sprintf("failed to configure user %q", ra.App.User), err)
		return 1
	}

	gidResolver, err = user.NumericIDs(ra.App.Group)
	if err != nil {
		gidResolver, err = user.IDsFromStat(rfs, ra.App.Group, nil)
	}

	if err != nil { // give up
		log.PrintE(fmt.Sprintf("invalid group %q", ra.App.Group), err)
		return 1
	}

	if _, gid, err = gidResolver.IDs(); err != nil {
		log.PrintE(fmt.Sprintf("failed to configure group %q", ra.App.Group), err)
		return 1
	}

	diag.Printf("chroot to %q", rfs)
	if err := syscall.Chroot(rfs); err != nil {
		log.PrintE("can't chroot", err)
		return 1
	}

	if err := os.Chdir(workDir); err != nil {
		log.PrintE(fmt.Sprintf("can't change to working directory %q", workDir), err)
		return 1
	}

	// lock the current goroutine to its current OS thread.
	// This will force the subsequent syscalls to be executed in the same OS thread as Setresuid, and Setresgid,
	// see https://github.com/golang/go/issues/1435#issuecomment-66054163.
	runtime.LockOSThread()

	diag.Printf("setting uid %d gid %d", uid, gid)

	if err := syscall.Setresgid(gid, gid, gid); err != nil {
		log.PrintE(fmt.Sprintf("can't set gid %d", gid), err)
		return 1
	}

	if err := syscall.Setresuid(uid, uid, uid); err != nil {
		log.PrintE(fmt.Sprintf("can't set uid %d", uid), err)
		return 1
	}

	diag.Printf("execing %q in %q", args, rfs)
	err = stage1common.WithClearedCloExec(lfd, func() error {
		return syscall.Exec(args[0], args, env)
	})
	if err != nil {
		log.PrintE(fmt.Sprintf("can't execute %q", args[0]), err)
		return 1
	}

	return 0
}
示例#9
0
文件: init.go 项目: ParthDesai/rkt
func stage1() int {
	uuid, err := types.NewUUID(flag.Arg(0))
	if err != nil {
		fmt.Fprintln(os.Stderr, "UUID is missing or malformed")
		return 1
	}

	root := "."
	p, err := LoadPod(root, uuid)
	if err != nil {
		fmt.Fprintf(os.Stderr, "Failed to load pod: %v\n", err)
		return 1
	}

	// set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed when invoking
	// network plugins
	lfd, err := common.GetRktLockFD()
	if err != nil {
		fmt.Fprintf(os.Stderr, "Failed to get rkt lock fd: %v\n", err)
		return 1
	}

	if err := sys.CloseOnExec(lfd, true); err != nil {
		fmt.Fprintf(os.Stderr, "Failed to set FD_CLOEXEC on rkt lock: %v\n", err)
		return 1
	}

	mirrorLocalZoneInfo(p.Root)

	flavor, _, err := p.getFlavor()
	if err != nil {
		fmt.Fprintf(os.Stderr, "Failed to get stage1 flavor: %v\n", err)
		return 3
	}

	var n *networking.Networking
	if privNet.Any() {
		fps, err := forwardedPorts(p)
		if err != nil {
			fmt.Fprintln(os.Stderr, err.Error())
			return 6
		}

		n, err = networking.Setup(root, p.UUID, fps, privNet, localConfig, flavor)
		if err != nil {
			fmt.Fprintf(os.Stderr, "Failed to setup network: %v\n", err)
			return 6
		}

		if err = n.Save(); err != nil {
			fmt.Fprintf(os.Stderr, "Failed to save networking state %v\n", err)
			n.Teardown(flavor)
			return 6
		}

		if len(mdsToken) > 0 {
			hostIP, err := n.GetDefaultHostIP()
			if err != nil {
				fmt.Fprintf(os.Stderr, "Failed to get default Host IP: %v\n", err)
				return 6
			}

			p.MetadataServiceURL = common.MetadataServicePublicURL(hostIP, mdsToken)
		}
	} else {
		if flavor == "kvm" {
			fmt.Fprintf(os.Stderr, "Flavor kvm requires private network configuration (try --private-net).\n")
			return 6
		}
		if len(mdsToken) > 0 {
			p.MetadataServiceURL = common.MetadataServicePublicURL(localhostIP, mdsToken)
		}
	}

	if err = p.WritePrepareAppTemplate(); err != nil {
		fmt.Fprintf(os.Stderr, "Failed to write prepare-app service template: %v\n", err)
		return 2
	}

	if err = p.PodToSystemd(interactive, flavor); err != nil {
		fmt.Fprintf(os.Stderr, "Failed to configure systemd: %v\n", err)
		return 2
	}

	args, env, err := getArgsEnv(p, flavor, debug, n)
	if err != nil {
		fmt.Fprintf(os.Stderr, "%v\n", err)
		return 3
	}

	// create a separate mount namespace so the cgroup filesystems
	// are unmounted when exiting the pod
	if err := syscall.Unshare(syscall.CLONE_NEWNS); err != nil {
		log.Fatalf("error unsharing: %v", err)
	}

	// we recursively make / a "shared and slave" so mount events from the
	// new namespace don't propagate to the host namespace but mount events
	// from the host propagate to the new namespace and are forwarded to
	// its peer group
	// See https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt
	if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SLAVE, ""); err != nil {
		log.Fatalf("error making / a slave mount: %v", err)
	}
	if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SHARED, ""); err != nil {
		log.Fatalf("error making / a shared and slave mount: %v", err)
	}

	var serviceNames []string
	for _, app := range p.Manifest.Apps {
		serviceNames = append(serviceNames, ServiceUnitName(app.Name))
	}
	s1Root := common.Stage1RootfsPath(p.Root)
	machineID := p.GetMachineID()
	subcgroup, err := getContainerSubCgroup(machineID)
	if err == nil {
		if err := cgroup.CreateCgroups(s1Root, subcgroup, serviceNames); err != nil {
			fmt.Fprintf(os.Stderr, "Error creating cgroups: %v\n", err)
			return 5
		}
	} else {
		fmt.Fprintf(os.Stderr, "Continuing with per-app isolators disabled: %v\n", err)
	}

	if err = writePpid(os.Getpid()); err != nil {
		fmt.Fprintln(os.Stderr, err.Error())
		return 4
	}

	err = withClearedCloExec(lfd, func() error {
		return syscall.Exec(args[0], args, env)
	})
	if err != nil {
		fmt.Fprintf(os.Stderr, "Failed to execute %q: %v\n", args[0], err)
		return 7
	}

	return 0
}
示例#10
0
文件: run.go 项目: danieltaborda/rkt
// Run mounts the right overlay filesystems and actually runs the prepared
// pod by exec()ing the stage1 init inside the pod filesystem.
func Run(cfg RunConfig, dir string) {
	useOverlay, err := preparedWithOverlay(dir)
	if err != nil {
		log.Fatalf("error: %v", err)
	}

	// create a separate mount namespace so the cgroup filesystems and/or
	// overlay mounts are unmounted when exiting the pod
	if err := syscall.Unshare(syscall.CLONE_NEWNS); err != nil {
		log.Fatalf("error unsharing: %v", err)
	}

	// we recursively make / a "shared and slave" so mount events from the
	// new namespace don't propagate to the host namespace but mount events
	// from the host propagate to the new namespace and are forwarded to
	// its peer group
	// See https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt
	if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SLAVE, ""); err != nil {
		log.Fatalf("error making / a slave mount: %v", err)
	}
	if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SHARED, ""); err != nil {
		log.Fatalf("error making / a shared and slave mount: %v", err)
	}

	log.Printf("Setting up stage1")
	if err := setupStage1Image(cfg, cfg.Stage1Image, dir, useOverlay); err != nil {
		log.Fatalf("error setting up stage1: %v", err)
	}
	log.Printf("Wrote filesystem to %s\n", dir)

	for _, img := range cfg.Images {
		if err := setupAppImage(cfg, img, dir, useOverlay); err != nil {
			log.Fatalf("error setting up app image: %v", err)
		}
	}

	if err := os.Setenv(common.EnvLockFd, fmt.Sprintf("%v", cfg.LockFd)); err != nil {
		log.Fatalf("setting lock fd environment: %v", err)
	}

	log.Printf("Pivoting to filesystem %s", dir)
	if err := os.Chdir(dir); err != nil {
		log.Fatalf("failed changing to dir: %v", err)
	}

	ep, err := getStage1Entrypoint(dir, runEntrypoint)
	if err != nil {
		log.Fatalf("error determining init entrypoint: %v", err)
	}
	log.Printf("Execing %s", ep)

	args := []string{filepath.Join(common.Stage1RootfsPath(dir), ep)}
	if cfg.Debug {
		args = append(args, "--debug")
	}
	if cfg.PrivateNet.Any() {
		args = append(args, "--private-net="+cfg.PrivateNet.String())
	}
	if cfg.Interactive {
		args = append(args, "--interactive")
	}
	args = append(args, cfg.UUID.String())

	// make sure the lock fd stays open across exec
	if err := sys.CloseOnExec(cfg.LockFd, false); err != nil {
		log.Fatalf("error clearing FD_CLOEXEC on lock fd")
	}

	if err := syscall.Exec(args[0], args, os.Environ()); err != nil {
		log.Fatalf("error execing init: %v", err)
	}
}
示例#11
0
文件: run.go 项目: yanghongkjxy/rkt
// Run mounts the right overlay filesystems and actually runs the prepared
// pod by exec()ing the stage1 init inside the pod filesystem.
func Run(cfg RunConfig, dir string, dataDir string) {
	useOverlay, err := preparedWithOverlay(dir)
	if err != nil {
		log.FatalE("error preparing overlay", err)
	}

	privateUsers, err := preparedWithPrivateUsers(dir)
	if err != nil {
		log.FatalE("error preparing private users", err)
	}

	debug("Setting up stage1")
	if err := setupStage1Image(cfg, dir, useOverlay); err != nil {
		log.FatalE("error setting up stage1", err)
	}
	debug("Wrote filesystem to %s\n", dir)

	for _, app := range cfg.Apps {
		if err := setupAppImage(cfg, app.Name, app.Image.ID, dir, useOverlay); err != nil {
			log.FatalE("error setting up app image", err)
		}
	}

	destRootfs := common.Stage1RootfsPath(dir)

	if len(cfg.DNS) > 0 || len(cfg.DNSSearch) > 0 || len(cfg.DNSOpt) > 0 {
		addResolvConf(cfg, destRootfs)
	}

	if err := os.Setenv(common.EnvLockFd, fmt.Sprintf("%v", cfg.LockFd)); err != nil {
		log.FatalE("setting lock fd environment", err)
	}

	if err := os.Setenv(common.EnvSELinuxContext, fmt.Sprintf("%v", cfg.ProcessLabel)); err != nil {
		log.FatalE("setting SELinux context environment", err)
	}

	if err := os.Setenv(common.EnvSELinuxMountContext, fmt.Sprintf("%v", cfg.MountLabel)); err != nil {
		log.FatalE("setting SELinux mount context enviroment", err)
	}

	debug("Pivoting to filesystem %s", dir)
	if err := os.Chdir(dir); err != nil {
		log.FatalE("failed changing to dir", err)
	}

	ep, err := getStage1Entrypoint(dir, runEntrypoint)
	if err != nil {
		log.FatalE("error determining 'run' entrypoint", err)
	}
	args := []string{filepath.Join(destRootfs, ep)}
	debug("Execing %s", ep)

	if cfg.Debug {
		args = append(args, "--debug")
	}

	args = append(args, "--net="+cfg.Net.String())

	if cfg.Interactive {
		args = append(args, "--interactive")
	}
	if len(privateUsers) > 0 {
		args = append(args, "--private-users="+privateUsers)
	}
	if cfg.MDSRegister {
		mdsToken, err := registerPod(".", cfg.UUID, cfg.Apps)
		if err != nil {
			log.FatalE("failed to register the pod", err)
		}

		args = append(args, "--mds-token="+mdsToken)
	}

	if cfg.LocalConfig != "" {
		args = append(args, "--local-config="+cfg.LocalConfig)
	}

	s1v, err := getStage1InterfaceVersion(dir)
	if err != nil {
		log.FatalE("error determining stage1 interface version", err)
	}

	if cfg.Hostname != "" {
		if interfaceVersionSupportsHostname(s1v) {
			args = append(args, "--hostname="+cfg.Hostname)
		} else {
			log.Printf("warning: --hostname option is not supported by stage1")
		}
	}

	args = append(args, cfg.UUID.String())

	// make sure the lock fd stays open across exec
	if err := sys.CloseOnExec(cfg.LockFd, false); err != nil {
		log.Fatalf("error clearing FD_CLOEXEC on lock fd")
	}

	tpmEvent := fmt.Sprintf("rkt: Rootfs: %s Manifest: %s Stage1 args: %s", cfg.CommonConfig.RootHash, cfg.CommonConfig.ManifestData, strings.Join(args, " "))
	// If there's no TPM available or there's a failure for some other
	// reason, ignore it and continue anyway. Long term we'll want policy
	// that enforces TPM behaviour, but we don't have any infrastructure
	// around that yet.
	_ = tpm.Extend(tpmEvent)
	if err := syscall.Exec(args[0], args, os.Environ()); err != nil {
		log.FatalE("error execing init", err)
	}
}
示例#12
0
文件: run.go 项目: liugenping/rkt
// Run mounts the right overlay filesystems and actually runs the prepared
// pod by exec()ing the stage1 init inside the pod filesystem.
func Run(cfg RunConfig, dir string, dataDir string) {
	useOverlay, err := preparedWithOverlay(dir)
	if err != nil {
		log.Fatalf("error: %v", err)
	}

	privateUsers, err := preparedWithPrivateUsers(dir)
	if err != nil {
		log.Fatalf("error: %v", err)
	}

	debug("Setting up stage1")
	if err := setupStage1Image(cfg, dir, useOverlay); err != nil {
		log.Fatalf("error setting up stage1: %v", err)
	}
	debug("Wrote filesystem to %s\n", dir)

	for _, app := range cfg.Apps {
		if err := setupAppImage(cfg, app.Name, app.Image.ID, dir, useOverlay); err != nil {
			log.Fatalf("error setting up app image: %v", err)
		}
	}

	destRootfs := common.Stage1RootfsPath(dir)
	flavor, err := os.Readlink(filepath.Join(destRootfs, "flavor"))
	if err != nil {
		log.Printf("error reading flavor: %v\n", err)
	}
	if flavor == "kvm" {
		err := kvmCheckSSHSetup(destRootfs, dataDir)
		if err != nil {
			log.Fatalf("error setting up ssh keys: %v", err)
		}
	}

	if err := os.Setenv(common.EnvLockFd, fmt.Sprintf("%v", cfg.LockFd)); err != nil {
		log.Fatalf("setting lock fd environment: %v", err)
	}

	if err := os.Setenv(common.EnvSELinuxContext, fmt.Sprintf("%v", cfg.ProcessLabel)); err != nil {
		log.Fatalf("setting SELinux context environment: %v", err)
	}

	debug("Pivoting to filesystem %s", dir)
	if err := os.Chdir(dir); err != nil {
		log.Fatalf("failed changing to dir: %v", err)
	}

	ep, err := getStage1Entrypoint(dir, runEntrypoint)
	if err != nil {
		log.Fatalf("error determining 'run' entrypoint: %v", err)
	}
	args := []string{filepath.Join(destRootfs, ep)}
	debug("Execing %s", ep)

	if cfg.Debug {
		args = append(args, "--debug")
	}

	args = append(args, "--net="+cfg.Net.String())

	if cfg.Interactive {
		args = append(args, "--interactive")
	}
	if len(privateUsers) > 0 {
		args = append(args, "--private-users="+privateUsers)
	}
	if cfg.MDSRegister {
		mdsToken, err := registerPod(".", cfg.UUID, cfg.Apps)
		if err != nil {
			log.Fatalf("failed to register the pod: %v", err)
		}

		args = append(args, "--mds-token="+mdsToken)
	}

	if cfg.LocalConfig != "" {
		args = append(args, "--local-config="+cfg.LocalConfig)
	}

	args = append(args, cfg.UUID.String())

	// make sure the lock fd stays open across exec
	if err := sys.CloseOnExec(cfg.LockFd, false); err != nil {
		log.Fatalf("error clearing FD_CLOEXEC on lock fd")
	}

	if err := syscall.Exec(args[0], args, os.Environ()); err != nil {
		log.Fatalf("error execing init: %v", err)
	}
}
示例#13
0
文件: main.go 项目: intelsdi-x/rkt
func stage1(rp *stage1commontypes.RuntimePod) int {
	uuid, err := types.NewUUID(flag.Arg(0))
	if err != nil {
		log.Print("UUID is missing or malformed\n")
		return 254
	}

	root := "."
	p, err := stage1commontypes.LoadPod(root, uuid, rp)
	if err != nil {
		log.PrintE("can't load pod", err)
		return 254
	}

	if err := p.SaveRuntime(); err != nil {
		log.FatalE("failed to save runtime parameters", err)
	}

	// Sanity checks
	if len(p.Manifest.Apps) != 1 {
		log.Printf("flavor %q only supports 1 application per Pod for now", flavor)
		return 254
	}

	ra := p.Manifest.Apps[0]

	imgName := p.AppNameToImageName(ra.Name)
	args := ra.App.Exec
	if len(args) == 0 {
		log.Printf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName)
		return 254
	}

	lfd, err := common.GetRktLockFD()
	if err != nil {
		log.PrintE("can't get rkt lock fd", err)
		return 254
	}

	// set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed after execution is finished
	if err := sys.CloseOnExec(lfd, true); err != nil {
		log.PrintE("can't set FD_CLOEXEC on rkt lock", err)
		return 254
	}

	workDir := "/"
	if ra.App.WorkingDirectory != "" {
		workDir = ra.App.WorkingDirectory
	}

	env := []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"}
	for _, e := range ra.App.Environment {
		env = append(env, e.Name+"="+e.Value)
	}

	rfs := filepath.Join(common.AppPath(p.Root, ra.Name), "rootfs")

	argFlyMounts, err := evaluateMounts(rfs, string(ra.Name), p)
	if err != nil {
		log.PrintE("can't evaluate mounts", err)
		return 254
	}

	effectiveMounts := append(
		[]flyMount{
			{"", "", "/dev", "none", syscall.MS_REC | syscall.MS_SHARED},
			{"/dev", rfs, "/dev", "none", syscall.MS_BIND | syscall.MS_REC},

			{"", "", "/proc", "none", syscall.MS_REC | syscall.MS_SHARED},
			{"/proc", rfs, "/proc", "none", syscall.MS_BIND | syscall.MS_REC},

			{"", "", "/sys", "none", syscall.MS_REC | syscall.MS_SHARED},
			{"/sys", rfs, "/sys", "none", syscall.MS_BIND | syscall.MS_REC},

			{"tmpfs", rfs, "/tmp", "tmpfs", 0},
		},
		argFlyMounts...,
	)

	/* Process DNS config files
	 *
	 * /etc/resolv.conf: four modes
	 * 'host' - bind-mount host's file
	 * 'stage0' - bind-mount the file created by stage0
	 * 'default' - do nothing (we would respect CNI if fly had networking)
	 * 'none' - do nothing
	 */
	switch p.ResolvConfMode {
	case "host":
		effectiveMounts = append(effectiveMounts,
			flyMount{"/etc/resolv.conf", rfs, "/etc/resolv.conf", "none", syscall.MS_BIND | syscall.MS_RDONLY})
	case "stage0":
		if err := copyResolv(p); err != nil {
			log.PrintE("can't copy /etc/resolv.conf", err)
			return 254
		}
	}

	/*
	 * /etc/hosts: three modes:
	 * 'host' - bind-mount hosts's file
	 * 'stage0' - bind mount the file created by stage1
	 * 'default' - create a stub /etc/hosts if needed
	 */

	switch p.EtcHostsMode {
	case "host":
		effectiveMounts = append(effectiveMounts,
			flyMount{"/etc/hosts", rfs, "/etc/hosts", "none", syscall.MS_BIND | syscall.MS_RDONLY})
	case "stage0":
		effectiveMounts = append(effectiveMounts, flyMount{
			filepath.Join(common.Stage1RootfsPath(p.Root), "etc", "rkt-hosts"),
			rfs,
			"/etc/hosts",
			"none",
			syscall.MS_BIND | syscall.MS_RDONLY})
	case "default":
		stage2HostsPath := filepath.Join(common.AppRootfsPath(p.Root, ra.Name), "etc", "hosts")
		if _, err := os.Stat(stage2HostsPath); err != nil && os.IsNotExist(err) {
			fallbackHosts := []byte("127.0.0.1 localhost localdomain\n")
			ioutil.WriteFile(stage2HostsPath, fallbackHosts, 0644)
		}
	}

	for _, mount := range effectiveMounts {
		diag.Printf("Processing %+v", mount)

		var (
			err            error
			hostPathInfo   os.FileInfo
			targetPathInfo os.FileInfo
		)

		if strings.HasPrefix(mount.HostPath, "/") {
			if hostPathInfo, err = os.Stat(mount.HostPath); err != nil {
				log.PrintE(fmt.Sprintf("stat of host path %s", mount.HostPath), err)
				return 254
			}
		} else {
			hostPathInfo = nil
		}

		absTargetPath := filepath.Join(mount.TargetPrefixPath, mount.RelTargetPath)
		if targetPathInfo, err = os.Stat(absTargetPath); err != nil && !os.IsNotExist(err) {
			log.PrintE(fmt.Sprintf("stat of target path %s", absTargetPath), err)
			return 254
		}

		switch {
		case (mount.Flags & syscall.MS_REMOUNT) != 0:
			{
				diag.Printf("don't attempt to create files for remount of %q", absTargetPath)
			}
		case targetPathInfo == nil:
			absTargetPathParent, _ := filepath.Split(absTargetPath)
			if err := os.MkdirAll(absTargetPathParent, 0755); err != nil {
				log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err)
				return 254
			}
			switch {
			case hostPathInfo == nil || hostPathInfo.IsDir():
				if err := os.Mkdir(absTargetPath, 0755); err != nil {
					log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err)
					return 254
				}
			case !hostPathInfo.IsDir():
				file, err := os.OpenFile(absTargetPath, os.O_CREATE, 0700)
				if err != nil {
					log.PrintE(fmt.Sprintf("can't create file %q", absTargetPath), err)
					return 254
				}
				file.Close()
			}
		case hostPathInfo != nil:
			switch {
			case hostPathInfo.IsDir() && !targetPathInfo.IsDir():
				log.Printf("can't mount because %q is a directory while %q is not", mount.HostPath, absTargetPath)
				return 254
			case !hostPathInfo.IsDir() && targetPathInfo.IsDir():
				log.Printf("can't mount because %q is not a directory while %q is", mount.HostPath, absTargetPath)
				return 254
			}
		}

		if err := syscall.Mount(mount.HostPath, absTargetPath, mount.Fs, mount.Flags, ""); err != nil {
			log.PrintE(fmt.Sprintf("can't mount %q on %q with flags %v", mount.HostPath, absTargetPath, mount.Flags), err)
			return 254
		}
	}

	if err = stage1common.WritePid(os.Getpid(), "pid"); err != nil {
		log.Error(err)
		return 254
	}

	var uidResolver, gidResolver user.Resolver
	var uid, gid int

	uidResolver, err = user.NumericIDs(ra.App.User)
	if err != nil {
		uidResolver, err = user.IDsFromStat(rfs, ra.App.User, nil)
	}

	if err != nil { // give up
		log.PrintE(fmt.Sprintf("invalid user %q", ra.App.User), err)
		return 254
	}

	if uid, _, err = uidResolver.IDs(); err != nil {
		log.PrintE(fmt.Sprintf("failed to configure user %q", ra.App.User), err)
		return 254
	}

	gidResolver, err = user.NumericIDs(ra.App.Group)
	if err != nil {
		gidResolver, err = user.IDsFromStat(rfs, ra.App.Group, nil)
	}

	if err != nil { // give up
		log.PrintE(fmt.Sprintf("invalid group %q", ra.App.Group), err)
		return 254
	}

	if _, gid, err = gidResolver.IDs(); err != nil {
		log.PrintE(fmt.Sprintf("failed to configure group %q", ra.App.Group), err)
		return 254
	}

	diag.Printf("chroot to %q", rfs)
	if err := syscall.Chroot(rfs); err != nil {
		log.PrintE("can't chroot", err)
		return 254
	}

	if err := os.Chdir(workDir); err != nil {
		log.PrintE(fmt.Sprintf("can't change to working directory %q", workDir), err)
		return 254
	}

	// lock the current goroutine to its current OS thread.
	// This will force the subsequent syscalls to be executed in the same OS thread as Setresuid, and Setresgid,
	// see https://github.com/golang/go/issues/1435#issuecomment-66054163.
	runtime.LockOSThread()

	diag.Printf("setting uid %d gid %d", uid, gid)

	if err := syscall.Setresgid(gid, gid, gid); err != nil {
		log.PrintE(fmt.Sprintf("can't set gid %d", gid), err)
		return 254
	}

	if err := syscall.Setresuid(uid, uid, uid); err != nil {
		log.PrintE(fmt.Sprintf("can't set uid %d", uid), err)
		return 254
	}

	diag.Printf("execing %q in %q", args, rfs)
	err = stage1common.WithClearedCloExec(lfd, func() error {
		return syscall.Exec(args[0], args, env)
	})
	if err != nil {
		log.PrintE(fmt.Sprintf("can't execute %q", args[0]), err)
		return 254
	}

	return 0
}
示例#14
0
func stage1() int {
	uuid, err := types.NewUUID(flag.Arg(0))
	if err != nil {
		fmt.Fprintln(os.Stderr, "UUID is missing or malformed")
		return 1
	}

	root := "."
	p, err := LoadPod(root, uuid)
	if err != nil {
		fmt.Fprintf(os.Stderr, "Failed to load pod: %v\n", err)
		return 1
	}

	// set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed when invoking
	// network plugins
	lfd, err := common.GetRktLockFD()
	if err != nil {
		fmt.Fprintf(os.Stderr, "Failed to get rkt lock fd: %v\n", err)
		return 1
	}

	if err := sys.CloseOnExec(lfd, true); err != nil {
		fmt.Fprintf(os.Stderr, "Failed to set FD_CLOEXEC on rkt lock: %v\n", err)
		return 1
	}

	mirrorLocalZoneInfo(p.Root)

	if privNet {
		fps, err := forwardedPorts(p)
		if err != nil {
			fmt.Fprintln(os.Stderr, err.Error())
			return 6
		}

		n, err := networking.Setup(root, p.UUID, fps)
		if err != nil {
			fmt.Fprintf(os.Stderr, "Failed to setup network: %v\n", err)
			return 6
		}
		defer n.Teardown()

		if err = n.Save(); err != nil {
			fmt.Fprintf(os.Stderr, "Failed to save networking state %v\n", err)
			return 6
		}

		p.MetadataServiceURL = common.MetadataServicePublicURL(n.GetDefaultHostIP())

		if err = registerPod(p, n.GetDefaultIP()); err != nil {
			fmt.Fprintf(os.Stderr, "Failed to register pod: %v\n", err)
			return 6
		}
		defer unregisterPod(p)
	}

	if err = p.PodToSystemd(interactive); err != nil {
		fmt.Fprintf(os.Stderr, "Failed to configure systemd: %v\n", err)
		return 2
	}

	args, env, err := getArgsEnv(p, debug)
	if err != nil {
		fmt.Fprintf(os.Stderr, "Failed to get execution parameters: %v\n", err)
		return 3
	}

	var execFn func() error

	if privNet {
		cmd := exec.Cmd{
			Path:   args[0],
			Args:   args,
			Stdin:  os.Stdin,
			Stdout: os.Stdout,
			Stderr: os.Stderr,
			Env:    env,
		}
		execFn = cmd.Run
	} else {
		execFn = func() error {
			return syscall.Exec(args[0], args, env)
		}
	}

	err = withClearedCloExec(lfd, execFn)
	if err != nil {
		fmt.Fprintf(os.Stderr, "Failed to execute nspawn: %v\n", err)
		return 5
	}

	return 0
}
示例#15
0
文件: init.go 项目: carriercomm/rkt
func stage1() int {
	uuid, err := types.NewUUID(flag.Arg(0))
	if err != nil {
		log.PrintE("UUID is missing or malformed", err)
		return 1
	}

	root := "."
	p, err := stage1commontypes.LoadPod(root, uuid)
	if err != nil {
		log.PrintE("failed to load pod", err)
		return 1
	}

	// set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed when invoking
	// network plugins
	lfd, err := common.GetRktLockFD()
	if err != nil {
		log.PrintE("failed to get rkt lock fd", err)
		return 1
	}

	if err := sys.CloseOnExec(lfd, true); err != nil {
		log.PrintE("failed to set FD_CLOEXEC on rkt lock", err)
		return 1
	}

	mirrorLocalZoneInfo(p.Root)

	flavor, _, err := stage1initcommon.GetFlavor(p)
	if err != nil {
		log.PrintE("failed to get stage1 flavor", err)
		return 3
	}

	var n *networking.Networking
	if netList.Contained() {
		fps, err := forwardedPorts(p)
		if err != nil {
			log.Error(err)
			return 6
		}

		n, err = networking.Setup(root, p.UUID, fps, netList, localConfig, flavor, debug)
		if err != nil {
			log.PrintE("failed to setup network", err)
			return 6
		}

		if err = n.Save(); err != nil {
			log.PrintE("failed to save networking state", err)
			n.Teardown(flavor, debug)
			return 6
		}

		if len(mdsToken) > 0 {
			hostIP, err := n.GetDefaultHostIP()
			if err != nil {
				log.PrintE("failed to get default Host IP", err)
				return 6
			}

			p.MetadataServiceURL = common.MetadataServicePublicURL(hostIP, mdsToken)
		}
	} else {
		if flavor == "kvm" {
			log.Print("flavor kvm requires private network configuration (try --net)")
			return 6
		}
		if len(mdsToken) > 0 {
			p.MetadataServiceURL = common.MetadataServicePublicURL(localhostIP, mdsToken)
		}
	}

	if err = stage1initcommon.WriteDefaultTarget(p); err != nil {
		log.PrintE("failed to write default.target", err)
		return 2
	}

	if err = stage1initcommon.WritePrepareAppTemplate(p); err != nil {
		log.PrintE("failed to write prepare-app service template", err)
		return 2
	}

	if err := stage1initcommon.SetJournalPermissions(p); err != nil {
		log.PrintE("warning: error setting journal ACLs, you'll need root to read the pod journal", err)
	}

	if flavor == "kvm" {
		if err := KvmPodToSystemd(p, n); err != nil {
			log.PrintE("failed to configure systemd for kvm", err)
			return 2
		}
	}

	if err = stage1initcommon.PodToSystemd(p, interactive, flavor, privateUsers); err != nil {
		log.PrintE("failed to configure systemd", err)
		return 2
	}

	args, env, err := getArgsEnv(p, flavor, debug, n)
	if err != nil {
		log.Error(err)
		return 3
	}

	// create a separate mount namespace so the cgroup filesystems
	// are unmounted when exiting the pod
	if err := syscall.Unshare(syscall.CLONE_NEWNS); err != nil {
		log.FatalE("error unsharing", err)
	}

	// we recursively make / a "shared and slave" so mount events from the
	// new namespace don't propagate to the host namespace but mount events
	// from the host propagate to the new namespace and are forwarded to
	// its peer group
	// See https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt
	if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SLAVE, ""); err != nil {
		log.FatalE("error making / a slave mount", err)
	}
	if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SHARED, ""); err != nil {
		log.FatalE("error making / a shared and slave mount", err)
	}

	enabledCgroups, err := cgroup.GetEnabledCgroups()
	if err != nil {
		log.FatalE("error getting cgroups", err)
		return 5
	}

	// mount host cgroups in the rkt mount namespace
	if err := mountHostCgroups(enabledCgroups); err != nil {
		log.FatalE("couldn't mount the host cgroups", err)
		return 5
	}

	var serviceNames []string
	for _, app := range p.Manifest.Apps {
		serviceNames = append(serviceNames, stage1initcommon.ServiceUnitName(app.Name))
	}
	s1Root := common.Stage1RootfsPath(p.Root)
	machineID := stage1initcommon.GetMachineID(p)
	subcgroup, err := getContainerSubCgroup(machineID)
	if err == nil {
		if err := mountContainerCgroups(s1Root, enabledCgroups, subcgroup, serviceNames); err != nil {
			log.PrintE("couldn't mount the container cgroups", err)
			return 5
		}
	} else {
		log.PrintE("continuing with per-app isolators disabled", err)
	}

	if err = stage1common.WritePpid(os.Getpid()); err != nil {
		log.Error(err)
		return 4
	}

	err = stage1common.WithClearedCloExec(lfd, func() error {
		return syscall.Exec(args[0], args, env)
	})
	if err != nil {
		log.PrintE(fmt.Sprintf("failed to execute %q", args[0]), err)
		return 7
	}

	return 0
}
示例#16
0
文件: main.go 项目: blixtra/rkt
func stage1() int {
	uuid, err := types.NewUUID(flag.Arg(0))
	if err != nil {
		log.Print("UUID is missing or malformed\n")
		return 1
	}

	root := "."
	p, err := stage1commontypes.LoadPod(root, uuid)
	if err != nil {
		log.PrintE("can't load pod", err)
		return 1
	}

	if len(p.Manifest.Apps) != 1 {
		log.Printf("flavor %q only supports 1 application per Pod for now", flavor)
		return 1
	}

	lfd, err := common.GetRktLockFD()
	if err != nil {
		log.PrintE("can't get rkt lock fd", err)
		return 1
	}

	// set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed after execution is finished
	if err := sys.CloseOnExec(lfd, true); err != nil {
		log.PrintE("can't set FD_CLOEXEC on rkt lock", err)
		return 1
	}

	env := []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"}
	for _, e := range p.Manifest.Apps[0].App.Environment {
		env = append(env, e.Name+"="+e.Value)
	}

	args := p.Manifest.Apps[0].App.Exec
	rfs := filepath.Join(common.AppPath(p.Root, p.Manifest.Apps[0].Name), "rootfs")

	argFlyMounts, err := evaluateMounts(rfs, string(p.Manifest.Apps[0].Name), p)
	if err != nil {
		log.PrintE("can't evaluate mounts", err)
		return 1
	}

	effectiveMounts := append(
		[]flyMount{
			{"", "", "/dev", "none", syscall.MS_REC | syscall.MS_SHARED},
			{"/dev", rfs, "/dev", "none", syscall.MS_BIND | syscall.MS_REC},

			{"", "", "/proc", "none", syscall.MS_REC | syscall.MS_SHARED},
			{"/proc", rfs, "/proc", "none", syscall.MS_BIND | syscall.MS_REC},

			{"", "", "/sys", "none", syscall.MS_REC | syscall.MS_SHARED},
			{"/sys", rfs, "/sys", "none", syscall.MS_BIND | syscall.MS_REC},

			{"tmpfs", rfs, "/tmp", "tmpfs", 0},
		},
		argFlyMounts...,
	)

	for _, mount := range effectiveMounts {
		var (
			err            error
			hostPathInfo   os.FileInfo
			targetPathInfo os.FileInfo
		)

		if strings.HasPrefix(mount.HostPath, "/") {
			if hostPathInfo, err = os.Stat(mount.HostPath); err != nil {
				log.PrintE(fmt.Sprintf("stat of host directory %s", mount.HostPath), err)
				return 1
			}
		} else {
			hostPathInfo = nil
		}

		absTargetPath := filepath.Join(mount.TargetPrefixPath, mount.RelTargetPath)
		if targetPathInfo, err = os.Stat(absTargetPath); err != nil && !os.IsNotExist(err) {
			log.PrintE(fmt.Sprintf("stat of target directory %s", absTargetPath), err)
			return 1
		}

		switch {
		case targetPathInfo == nil:
			absTargetPathParent, _ := filepath.Split(absTargetPath)
			if err := os.MkdirAll(absTargetPathParent, 0700); err != nil {
				log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err)
				return 1
			}
			switch {
			case hostPathInfo == nil || hostPathInfo.IsDir():
				if err := os.Mkdir(absTargetPath, 0700); err != nil {
					log.PrintE(fmt.Sprintf("can't create directory %q", absTargetPath), err)
					return 1
				}
			case !hostPathInfo.IsDir():
				file, err := os.OpenFile(absTargetPath, os.O_CREATE, 0700)
				if err != nil {
					log.PrintE(fmt.Sprintf("can't create file %q", absTargetPath), err)
					return 1
				}
				file.Close()
			}
		case hostPathInfo != nil:
			switch {
			case hostPathInfo.IsDir() && !targetPathInfo.IsDir():
				log.Printf("can't mount because %q is a directory while %q is not", mount.HostPath, absTargetPath)
				return 1
			case !hostPathInfo.IsDir() && targetPathInfo.IsDir():
				log.Printf("can't mount because %q is not a directory while %q is", mount.HostPath, absTargetPath)
				return 1
			}
		}

		if err := syscall.Mount(mount.HostPath, absTargetPath, mount.Fs, mount.Flags, ""); err != nil {
			log.PrintE(fmt.Sprintf("can't mount %q on %q with flags %v", mount.HostPath, absTargetPath, mount.Flags), err)
			return 1
		}
	}

	if err = stage1common.WritePpid(os.Getpid()); err != nil {
		log.Error(err)
		return 4
	}

	diag.Printf("chroot to %q", rfs)
	if err := syscall.Chroot(rfs); err != nil {
		log.PrintE("can't chroot", err)
		return 1
	}

	if err := os.Chdir("/"); err != nil {
		log.PrintE("can't change to root new directory", err)
		return 1
	}

	diag.Printf("execing %q in %q", args, rfs)
	err = stage1common.WithClearedCloExec(lfd, func() error {
		return syscall.Exec(args[0], args, env)
	})
	if err != nil {
		log.PrintE(fmt.Sprintf("can't execute %q", args[0]), err)
		return 7
	}

	return 0
}
示例#17
0
文件: init.go 项目: jsarenik/rkt
func stage1() int {
	uuid, err := types.NewUUID(flag.Arg(0))
	if err != nil {
		fmt.Fprintln(os.Stderr, "UUID is missing or malformed")
		return 1
	}

	root := "."
	p, err := LoadPod(root, uuid)
	if err != nil {
		fmt.Fprintf(os.Stderr, "Failed to load pod: %v\n", err)
		return 1
	}

	// set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed when invoking
	// network plugins
	lfd, err := common.GetRktLockFD()
	if err != nil {
		fmt.Fprintf(os.Stderr, "Failed to get rkt lock fd: %v\n", err)
		return 1
	}

	if err := sys.CloseOnExec(lfd, true); err != nil {
		fmt.Fprintf(os.Stderr, "Failed to set FD_CLOEXEC on rkt lock: %v\n", err)
		return 1
	}

	mirrorLocalZoneInfo(p.Root)

	if privNet.Any() {
		fps, err := forwardedPorts(p)
		if err != nil {
			fmt.Fprintln(os.Stderr, err.Error())
			return 6
		}

		n, err := networking.Setup(root, p.UUID, fps, privNet)
		if err != nil {
			fmt.Fprintf(os.Stderr, "Failed to setup network: %v\n", err)
			return 6
		}
		defer n.Teardown()

		if err = n.Save(); err != nil {
			fmt.Fprintf(os.Stderr, "Failed to save networking state %v\n", err)
			return 6
		}

		hostIP, err := n.GetDefaultHostIP()
		if err != nil {
			fmt.Fprintf(os.Stderr, "Failed to get default Host IP: %v\n", err)
			return 6
		}

		mdsToken, err := generateMDSToken()
		if err != nil {
			fmt.Fprintf(os.Stderr, "Failed to generate MDS token: %v", err)
			return 8
		}

		p.MetadataServiceURL = common.MetadataServicePublicURL(hostIP, mdsToken)

		if err = registerPod(p, mdsToken); err != nil {
			fmt.Fprintf(os.Stderr, "Failed to register pod: %v\n", err)
			return 8
		}

		defer unregisterPod(p)
	}

	flavor, systemdStage1Version, err := p.getFlavor()
	if err != nil {
		fmt.Fprintf(os.Stderr, "Failed to get stage1 flavor: %v\n", err)
		return 3
	}

	if err = p.WritePrepareAppTemplate(systemdStage1Version); err != nil {
		fmt.Fprintf(os.Stderr, "Failed to write prepare-app service template: %v\n", err)
		return 2
	}

	if err = p.PodToSystemd(interactive); err != nil {
		fmt.Fprintf(os.Stderr, "Failed to configure systemd: %v\n", err)
		return 2
	}

	args, env, err := getArgsEnv(p, flavor, systemdStage1Version, debug)
	if err != nil {
		fmt.Fprintf(os.Stderr, "%v\n", err)
		return 3
	}

	appHashes := p.GetAppHashes()
	s1Root := common.Stage1RootfsPath(p.Root)
	machineID := p.GetMachineID()
	subcgroup, err := getContainerSubCgroup(machineID)
	if err == nil {
		if err := cgroup.CreateCgroups(s1Root, subcgroup, appHashes); err != nil {
			fmt.Fprintf(os.Stderr, "Error creating cgroups: %v\n", err)
			return 5
		}
	} else {
		fmt.Fprintf(os.Stderr, "Continuing with per-app isolators disabled: %v\n", err)
	}

	var execFn func() error

	if privNet.Any() {
		cmd := exec.Cmd{
			Path:   args[0],
			Args:   args,
			Stdin:  os.Stdin,
			Stdout: os.Stdout,
			Stderr: os.Stderr,
			Env:    env,
		}
		execFn = func() error {
			err = cmd.Start()
			if err != nil {
				return fmt.Errorf("Failed to start nspawn: %v\n", err)
			}
			if err = writePpid(cmd.Process.Pid); err != nil {
				return err
			}
			return cmd.Wait()
		}
	} else {
		if err = writePpid(os.Getpid()); err != nil {
			fmt.Fprintln(os.Stderr, err.Error())
			return 4
		}
		execFn = func() error {
			return syscall.Exec(args[0], args, env)
		}
	}

	err = withClearedCloExec(lfd, execFn)
	if err != nil {
		fmt.Fprintf(os.Stderr, "Failed to execute nspawn: %v\n", err)
		return 7
	}

	return 0
}