func installAppArmorProfile() error { if !apparmor.IsEnabled() { return nil } // Make sure /etc/apparmor.d exists if err := os.MkdirAll(path.Dir(apparmorProfilePath), 0755); err != nil { return err } f, err := os.OpenFile(apparmorProfilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) if err != nil { return err } if err := generateProfile(f); err != nil { f.Close() return err } f.Close() cmd := exec.Command("/sbin/apparmor_parser", "-r", "-W", "docker") // to use the parser directly we have to make sure we are in the correct // dir with the profile cmd.Dir = "/etc/apparmor.d" output, err := cmd.CombinedOutput() if err != nil { return fmt.Errorf("Error loading docker apparmor profile: %s (%s)", err, output) } return nil }
func installAppArmorProfile() error { if !apparmor.IsEnabled() { return nil } // Make sure /etc/apparmor.d exists if err := os.MkdirAll(path.Dir(apparmorProfilePath), 0755); err != nil { return err } f, err := os.OpenFile(apparmorProfilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) if err != nil { return err } if err := generateProfile(f); err != nil { f.Close() return err } f.Close() if err := aaparser.LoadProfile(apparmorProfilePath); err != nil { return err } return nil }
func (d *Driver) setPrivileged(container *configs.Config) (err error) { container.Capabilities = execdriver.GetAllCapabilities() container.Cgroups.AllowAllDevices = true hostDevices, err := devices.HostDevices() if err != nil { return err } container.Devices = hostDevices if apparmor.IsEnabled() { container.AppArmorProfile = "unconfined" } return nil }
func installDefaultAppArmorProfile() { if apparmor.IsEnabled() { if err := aaprofile.InstallDefault(defaultApparmorProfile); err != nil { apparmorProfiles := []string{defaultApparmorProfile} // Allow daemon to run if loading failed, but are active // (possibly through another run, manually, or via system startup) for _, policy := range apparmorProfiles { if err := aaprofile.IsLoaded(policy); err != nil { logrus.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy) } } } } }
func ensureDefaultAppArmorProfile() error { if apparmor.IsEnabled() { loaded, err := aaprofile.IsLoaded(defaultApparmorProfile) if err != nil { return fmt.Errorf("Could not check if %s AppArmor profile was loaded: %s", defaultApparmorProfile, err) } // Nothing to do. if loaded { return nil } // Load the profile. if err := aaprofile.InstallDefault(defaultApparmorProfile); err != nil { return fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", defaultApparmorProfile) } } return nil }
// NewDriver returns a new native driver, called from NewDriver of execdriver. func NewDriver(root, initPath string, options []string) (*Driver, error) { meminfo, err := sysinfo.ReadMemInfo() if err != nil { return nil, err } if err := sysinfo.MkdirAll(root, 0700); err != nil { return nil, err } if apparmor.IsEnabled() { if err := installAppArmorProfile(); err != nil { apparmorProfiles := []string{"docker-default"} // Allow daemon to run if loading failed, but are active // (possibly through another run, manually, or via system startup) for _, policy := range apparmorProfiles { if err := hasAppArmorProfileLoaded(policy); err != nil { return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy) } } } } // choose cgroup manager // this makes sure there are no breaking changes to people // who upgrade from versions without native.cgroupdriver opt cgm := libcontainer.Cgroupfs if systemd.UseSystemd() { cgm = libcontainer.SystemdCgroups } // parse the options for _, option := range options { key, val, err := parsers.ParseKeyValueOpt(option) if err != nil { return nil, err } key = strings.ToLower(key) switch key { case "native.cgroupdriver": // override the default if they set options switch val { case "systemd": if systemd.UseSystemd() { cgm = libcontainer.SystemdCgroups } else { // warn them that they chose the wrong driver logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead") } case "cgroupfs": cgm = libcontainer.Cgroupfs default: return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val) } default: return nil, fmt.Errorf("Unknown option %s\n", key) } } f, err := libcontainer.New( root, cgm, libcontainer.InitPath(reexec.Self(), DriverName), ) if err != nil { return nil, err } return &Driver{ root: root, initPath: initPath, activeContainers: make(map[string]libcontainer.Container), machineMemory: meminfo.MemTotal, factory: f, }, nil }
// New returns the docker default configuration for libcontainer func New() *configs.Config { container := &configs.Config{ Capabilities: []string{ "CHOWN", "DAC_OVERRIDE", "FSETID", "FOWNER", "MKNOD", "NET_RAW", "SETGID", "SETUID", "SETFCAP", "SETPCAP", "NET_BIND_SERVICE", "SYS_CHROOT", "KILL", "AUDIT_WRITE", }, Namespaces: configs.Namespaces([]configs.Namespace{ {Type: "NEWNS"}, {Type: "NEWUTS"}, {Type: "NEWIPC"}, {Type: "NEWPID"}, {Type: "NEWNET"}, }), Cgroups: &configs.Cgroup{ Parent: "docker", AllowAllDevices: false, MemorySwappiness: -1, }, Mounts: []*configs.Mount{ { Source: "proc", Destination: "/proc", Device: "proc", Flags: defaultMountFlags, }, { Source: "tmpfs", Destination: "/dev", Device: "tmpfs", Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, Data: "mode=755", }, { Source: "devpts", Destination: "/dev/pts", Device: "devpts", Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", }, { Source: "sysfs", Destination: "/sys", Device: "sysfs", Flags: defaultMountFlags | syscall.MS_RDONLY, }, { Source: "cgroup", Destination: "/sys/fs/cgroup", Device: "cgroup", Flags: defaultMountFlags | syscall.MS_RDONLY, }, }, MaskPaths: []string{ "/proc/kcore", "/proc/latency_stats", "/proc/timer_stats", }, ReadonlyPaths: []string{ "/proc/asound", "/proc/bus", "/proc/fs", "/proc/irq", "/proc/sys", "/proc/sysrq-trigger", }, } if apparmor.IsEnabled() { container.AppArmorProfile = "docker-default" } return container }
func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, error) { s := oci.DefaultSpec() if err := daemon.populateCommonSpec(&s, c); err != nil { return nil, err } var cgroupsPath string scopePrefix := "docker" parent := "/docker" useSystemd := UsingSystemd(daemon.configStore) if useSystemd { parent = "system.slice" } if c.HostConfig.CgroupParent != "" { parent = c.HostConfig.CgroupParent } else if daemon.configStore.CgroupParent != "" { parent = daemon.configStore.CgroupParent } if useSystemd { cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath) } else { cgroupsPath = filepath.Join(parent, c.ID) } s.Linux.CgroupsPath = &cgroupsPath if err := setResources(&s, c.HostConfig.Resources); err != nil { return nil, fmt.Errorf("linux runtime spec resources: %v", err) } s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj s.Linux.Sysctl = c.HostConfig.Sysctls if err := setDevices(&s, c); err != nil { return nil, fmt.Errorf("linux runtime spec devices: %v", err) } if err := setRlimits(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux runtime spec rlimits: %v", err) } if err := setUser(&s, c); err != nil { return nil, fmt.Errorf("linux spec user: %v", err) } if err := setNamespaces(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux spec namespaces: %v", err) } if err := setCapabilities(&s, c); err != nil { return nil, fmt.Errorf("linux spec capabilities: %v", err) } if err := setSeccomp(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux seccomp: %v", err) } if err := daemon.setupIpcDirs(c); err != nil { return nil, err } ms, err := daemon.setupMounts(c) if err != nil { return nil, err } ms = append(ms, c.IpcMounts()...) ms = append(ms, c.TmpfsMounts()...) sort.Sort(mounts(ms)) if err := setMounts(daemon, &s, c, ms); err != nil { return nil, fmt.Errorf("linux mounts: %v", err) } for _, ns := range s.Linux.Namespaces { if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled { target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe")) if err != nil { return nil, err } s.Hooks = specs.Hooks{ Prestart: []specs.Hook{{ Path: target, // FIXME: cross-platform Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()}, }}, } } } if apparmor.IsEnabled() { appArmorProfile := "docker-default" if len(c.AppArmorProfile) > 0 { appArmorProfile = c.AppArmorProfile } else if c.HostConfig.Privileged { appArmorProfile = "unconfined" } s.Process.ApparmorProfile = appArmorProfile } s.Process.SelinuxLabel = c.GetProcessLabel() s.Process.NoNewPrivileges = c.NoNewPrivileges s.Linux.MountLabel = c.MountLabel return (*libcontainerd.Spec)(&s), nil }
func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) { s := oci.DefaultSpec() if err := daemon.populateCommonSpec(&s, c); err != nil { return nil, err } var cgroupsPath string scopePrefix := "docker" parent := "/docker" useSystemd := UsingSystemd(daemon.configStore) if useSystemd { parent = "system.slice" } if c.HostConfig.CgroupParent != "" { parent = c.HostConfig.CgroupParent } else if daemon.configStore.CgroupParent != "" { parent = daemon.configStore.CgroupParent } if useSystemd { cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath) } else { cgroupsPath = filepath.Join(parent, c.ID) } s.Linux.CgroupsPath = &cgroupsPath if err := setResources(&s, c.HostConfig.Resources); err != nil { return nil, fmt.Errorf("linux runtime spec resources: %v", err) } s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj s.Linux.Sysctl = c.HostConfig.Sysctls p := *s.Linux.CgroupsPath if useSystemd { initPath, err := cgroups.GetInitCgroupDir("cpu") if err != nil { return nil, err } p, _ = cgroups.GetThisCgroupDir("cpu") if err != nil { return nil, err } p = filepath.Join(initPath, p) } // Clean path to guard against things like ../../../BAD parentPath := filepath.Dir(p) if !filepath.IsAbs(parentPath) { parentPath = filepath.Clean("/" + parentPath) } if err := daemon.initCgroupsPath(parentPath); err != nil { return nil, fmt.Errorf("linux init cgroups path: %v", err) } if err := setDevices(&s, c); err != nil { return nil, fmt.Errorf("linux runtime spec devices: %v", err) } if err := setRlimits(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux runtime spec rlimits: %v", err) } if err := setUser(&s, c); err != nil { return nil, fmt.Errorf("linux spec user: %v", err) } if err := setNamespaces(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux spec namespaces: %v", err) } if err := setCapabilities(&s, c); err != nil { return nil, fmt.Errorf("linux spec capabilities: %v", err) } if err := setSeccomp(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux seccomp: %v", err) } if err := daemon.setupIpcDirs(c); err != nil { return nil, err } if err := daemon.setupSecretDir(c); err != nil { return nil, err } ms, err := daemon.setupMounts(c) if err != nil { return nil, err } ms = append(ms, c.IpcMounts()...) tmpfsMounts, err := c.TmpfsMounts() if err != nil { return nil, err } ms = append(ms, tmpfsMounts...) if m := c.SecretMount(); m != nil { ms = append(ms, *m) } sort.Sort(mounts(ms)) if err := setMounts(daemon, &s, c, ms); err != nil { return nil, fmt.Errorf("linux mounts: %v", err) } for _, ns := range s.Linux.Namespaces { if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled { target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe")) if err != nil { return nil, err } s.Hooks = specs.Hooks{ Prestart: []specs.Hook{{ Path: target, // FIXME: cross-platform Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()}, }}, } } } if apparmor.IsEnabled() { var appArmorProfile string if c.AppArmorProfile != "" { appArmorProfile = c.AppArmorProfile } else if c.HostConfig.Privileged { appArmorProfile = "unconfined" } else { appArmorProfile = "docker-default" } if appArmorProfile == "docker-default" { // Unattended upgrades and other fun services can unload AppArmor // profiles inadvertently. Since we cannot store our profile in // /etc/apparmor.d, nor can we practically add other ways of // telling the system to keep our profile loaded, in order to make // sure that we keep the default profile enabled we dynamically // reload it if necessary. if err := ensureDefaultAppArmorProfile(); err != nil { return nil, err } } s.Process.ApparmorProfile = appArmorProfile } s.Process.SelinuxLabel = c.GetProcessLabel() s.Process.NoNewPrivileges = c.NoNewPrivileges s.Linux.MountLabel = c.MountLabel return (*specs.Spec)(&s), nil }
//go:generate go run generate.go func main() { if len(os.Args) > 1 && os.Args[1] == "init" { runInit() return } notifySocket := os.Getenv("NOTIFY_SOCKET") if notifySocket != "" { setupSdNotify(spec, notifySocket) } // override the cmd in the spec with any args specified if len(flag.Args()) > 0 { spec.Process.Args = flag.Args() } // setup readonly fs in spec spec.Root.Readonly = readonly // setup tty in spec spec.Process.Terminal = allocateTty // pass in any hooks spec.Hooks = hooks // install the default apparmor profile if apparmor.IsEnabled() { // check if we have the docker-default apparmor profile loaded if err := aaprofile.IsLoaded(defaultApparmorProfile); err != nil { logrus.Warnf("AppArmor enabled on system but the %s profile is not loaded. apparmor_parser needs root to load a profile so we can't do it for you.", defaultApparmorProfile) } else { spec.Process.ApparmorProfile = defaultApparmorProfile } } // set the CgroupsPath as this user u, err := user.CurrentUser() if err != nil { logrus.Fatal(err) } spec.Linux.CgroupsPath = sPtr(u.Name) // setup UID mappings spec.Linux.UIDMappings = []specs.IDMapping{ { HostID: uint32(u.Uid), ContainerID: 0, Size: 1, }, } // setup GID mappings spec.Linux.GIDMappings = []specs.IDMapping{ { HostID: uint32(u.Gid), ContainerID: 0, Size: 1, }, } if err := unpackRootfs(spec); err != nil { logrus.Fatal(err) } status, err := startContainer(spec, containerID, pidFile, detach, useSystemdCgroup) if err != nil { logrus.Fatal(err) } if err := os.RemoveAll(defaultRootfsDir); err != nil { logrus.Warnf("removing rootfs failed: %v", err) } // exit with the container's exit status os.Exit(status) }
//创建容器的namespace以及cgroups等相关。 func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, error) { s := oci.DefaultSpec() //populateCommand(container, env) 主要是为container的execdriver(最终启动容器的) //设置网络模式、设置namespace(pid,ipc,uts)等、资源(resources)限制等,并且设置 //在容器内执行的Command,Command中含有容器内进程的启动命令; if err := daemon.populateCommonSpec(&s, c); err != nil { return nil, err } var cgroupsPath string scopePrefix := "docker" parent := "/docker" useSystemd := UsingSystemd(daemon.configStore) if useSystemd { parent = "system.slice" } if c.HostConfig.CgroupParent != "" { parent = c.HostConfig.CgroupParent } else if daemon.configStore.CgroupParent != "" { parent = daemon.configStore.CgroupParent } //是否支持systemd? if useSystemd { cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath) } else { cgroupsPath = filepath.Join(parent, c.ID) } s.Linux.CgroupsPath = &cgroupsPath //设置一系列的参数 if err := setResources(&s, c.HostConfig.Resources); err != nil { return nil, fmt.Errorf("linux runtime spec resources: %v", err) } s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj if err := setDevices(&s, c); err != nil { return nil, fmt.Errorf("linux runtime spec devices: %v", err) } if err := setRlimits(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux runtime spec rlimits: %v", err) } if err := setUser(&s, c); err != nil { return nil, fmt.Errorf("linux spec user: %v", err) } //设置namespace if err := setNamespaces(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux spec namespaces: %v", err) } //设置cgroup。 if err := setCapabilities(&s, c); err != nil { return nil, fmt.Errorf("linux spec capabilities: %v", err) } //? if err := setSeccomp(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux seccomp: %v", err) } //? if err := daemon.setupIpcDirs(c); err != nil { return nil, err } //container.setupMounts() 返回container的所有挂载点; mounts, err := daemon.setupMounts(c) if err != nil { return nil, err } mounts = append(mounts, c.IpcMounts()...) mounts = append(mounts, c.TmpfsMounts()...) //设置容器的所有挂载点。 if err := setMounts(daemon, &s, c, mounts); err != nil { return nil, fmt.Errorf("linux mounts: %v", err) } //和网络的设置有关,但是这一点究竟是干什么的呢? //设置到进程的钩子,通过进程/proc/XXX/exe的链接文件获取到。 for _, ns := range s.Linux.Namespaces { if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled { target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe")) if err != nil { return nil, err } s.Hooks = specs.Hooks{ //// Hooks are the commands run at various lifecycle events of the container. Prestart: []specs.Hook{{ Path: target, // FIXME: cross-platform Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()}, }}, } } } //apparmor相关配置 if apparmor.IsEnabled() { appArmorProfile := "docker-default" if len(c.AppArmorProfile) > 0 { appArmorProfile = c.AppArmorProfile } else if c.HostConfig.Privileged { appArmorProfile = "unconfined" } s.Process.ApparmorProfile = appArmorProfile } //设置容器的selinux,privilege,moutlabel等特性。 s.Process.SelinuxLabel = c.GetProcessLabel() s.Process.NoNewPrivileges = c.NoNewPrivileges s.Linux.MountLabel = c.MountLabel return (*libcontainerd.Spec)(&s), nil }