func (daemon *Daemon) populateCommand(c *container.Container, env []string) error { var en *execdriver.Network if !c.Config.NetworkDisabled { en = &execdriver.Network{} if !daemon.execDriver.SupportsHooks() || c.HostConfig.NetworkMode.IsHost() { en.NamespacePath = c.NetworkSettings.SandboxKey } if c.HostConfig.NetworkMode.IsContainer() { nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer()) if err != nil { return err } en.ContainerID = nc.ID } } ipc := &execdriver.Ipc{} var err error c.ShmPath, err = c.ShmResourcePath() if err != nil { return err } c.MqueuePath, err = c.MqueueResourcePath() if err != nil { return err } if c.HostConfig.IpcMode.IsContainer() { ic, err := daemon.getIpcContainer(c) if err != nil { return err } ipc.ContainerID = ic.ID c.ShmPath = ic.ShmPath c.MqueuePath = ic.MqueuePath } else { ipc.HostIpc = c.HostConfig.IpcMode.IsHost() if ipc.HostIpc { if _, err := os.Stat("/dev/shm"); err != nil { return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host") } if _, err := os.Stat("/dev/mqueue"); err != nil { return fmt.Errorf("/dev/mqueue is not mounted, but must be for --ipc=host") } c.ShmPath = "/dev/shm" c.MqueuePath = "/dev/mqueue" } } pid := &execdriver.Pid{} pid.HostPid = c.HostConfig.PidMode.IsHost() uts := &execdriver.UTS{ HostUTS: c.HostConfig.UTSMode.IsHost(), } // Build lists of devices allowed and created within the container. var userSpecifiedDevices []*configs.Device for _, deviceMapping := range c.HostConfig.Devices { devs, err := getDevicesFromPath(deviceMapping) if err != nil { return err } userSpecifiedDevices = append(userSpecifiedDevices, devs...) } allowedDevices := mergeDevices(configs.DefaultAllowedDevices, userSpecifiedDevices) autoCreatedDevices := mergeDevices(configs.DefaultAutoCreatedDevices, userSpecifiedDevices) var rlimits []*units.Rlimit ulimits := c.HostConfig.Ulimits // Merge ulimits with daemon defaults ulIdx := make(map[string]*units.Ulimit) for _, ul := range ulimits { ulIdx[ul.Name] = ul } for name, ul := range daemon.configStore.Ulimits { if _, exists := ulIdx[name]; !exists { ulimits = append(ulimits, ul) } } weightDevices, err := getBlkioWeightDevices(c.HostConfig) if err != nil { return err } readBpsDevice, err := getBlkioReadBpsDevices(c.HostConfig) if err != nil { return err } writeBpsDevice, err := getBlkioWriteBpsDevices(c.HostConfig) if err != nil { return err } readIOpsDevice, err := getBlkioReadIOpsDevices(c.HostConfig) if err != nil { return err } writeIOpsDevice, err := getBlkioWriteIOpsDevices(c.HostConfig) if err != nil { return err } for _, limit := range ulimits { rl, err := limit.GetRlimit() if err != nil { return err } rlimits = append(rlimits, rl) } resources := &execdriver.Resources{ CommonResources: execdriver.CommonResources{ Memory: c.HostConfig.Memory, MemoryReservation: c.HostConfig.MemoryReservation, CPUShares: c.HostConfig.CPUShares, BlkioWeight: c.HostConfig.BlkioWeight, }, MemorySwap: c.HostConfig.MemorySwap, KernelMemory: c.HostConfig.KernelMemory, CpusetCpus: c.HostConfig.CpusetCpus, CpusetMems: c.HostConfig.CpusetMems, CPUPeriod: c.HostConfig.CPUPeriod, CPUQuota: c.HostConfig.CPUQuota, Rlimits: rlimits, BlkioWeightDevice: weightDevices, BlkioThrottleReadBpsDevice: readBpsDevice, BlkioThrottleWriteBpsDevice: writeBpsDevice, BlkioThrottleReadIOpsDevice: readIOpsDevice, BlkioThrottleWriteIOpsDevice: writeIOpsDevice, OomKillDisable: c.HostConfig.OomKillDisable, MemorySwappiness: -1, } if c.HostConfig.MemorySwappiness != nil { resources.MemorySwappiness = *c.HostConfig.MemorySwappiness } processConfig := execdriver.ProcessConfig{ CommonProcessConfig: execdriver.CommonProcessConfig{ Entrypoint: c.Path, Arguments: c.Args, Tty: c.Config.Tty, }, Privileged: c.HostConfig.Privileged, User: c.Config.User, } processConfig.SysProcAttr = &syscall.SysProcAttr{Setsid: true} processConfig.Env = env remappedRoot := &execdriver.User{} rootUID, rootGID := daemon.GetRemappedUIDGID() if rootUID != 0 { remappedRoot.UID = rootUID remappedRoot.GID = rootGID } uidMap, gidMap := daemon.GetUIDGIDMaps() defaultCgroupParent := "/docker" if daemon.configStore.CgroupParent != "" { defaultCgroupParent = daemon.configStore.CgroupParent } else { for _, option := range daemon.configStore.ExecOptions { key, val, err := parsers.ParseKeyValueOpt(option) if err != nil || !strings.EqualFold(key, "native.cgroupdriver") { continue } if val == "systemd" { defaultCgroupParent = "system.slice" } } } c.Command = &execdriver.Command{ CommonCommand: execdriver.CommonCommand{ ID: c.ID, InitPath: "/.dockerinit", MountLabel: c.GetMountLabel(), Network: en, ProcessConfig: processConfig, ProcessLabel: c.GetProcessLabel(), Rootfs: c.BaseFS, Resources: resources, WorkingDir: c.Config.WorkingDir, }, AllowedDevices: allowedDevices, AppArmorProfile: c.AppArmorProfile, AutoCreatedDevices: autoCreatedDevices, CapAdd: c.HostConfig.CapAdd.Slice(), CapDrop: c.HostConfig.CapDrop.Slice(), CgroupParent: defaultCgroupParent, GIDMapping: gidMap, GroupAdd: c.HostConfig.GroupAdd, Ipc: ipc, OomScoreAdj: c.HostConfig.OomScoreAdj, Pid: pid, ReadonlyRootfs: c.HostConfig.ReadonlyRootfs, RemappedRoot: remappedRoot, SeccompProfile: c.SeccompProfile, UIDMapping: uidMap, UTS: uts, } if c.HostConfig.CgroupParent != "" { c.Command.CgroupParent = c.HostConfig.CgroupParent } return nil }
func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, error) { s := oci.DefaultSpec() if err := daemon.populateCommonSpec(&s, c); err != nil { return nil, err } var cgroupsPath string scopePrefix := "docker" parent := "/docker" useSystemd := UsingSystemd(daemon.configStore) if useSystemd { parent = "system.slice" } if c.HostConfig.CgroupParent != "" { parent = c.HostConfig.CgroupParent } else if daemon.configStore.CgroupParent != "" { parent = daemon.configStore.CgroupParent } if useSystemd { cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath) } else { cgroupsPath = filepath.Join(parent, c.ID) } s.Linux.CgroupsPath = &cgroupsPath if err := setResources(&s, c.HostConfig.Resources); err != nil { return nil, fmt.Errorf("linux runtime spec resources: %v", err) } s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj s.Linux.Sysctl = c.HostConfig.Sysctls if err := setDevices(&s, c); err != nil { return nil, fmt.Errorf("linux runtime spec devices: %v", err) } if err := setRlimits(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux runtime spec rlimits: %v", err) } if err := setUser(&s, c); err != nil { return nil, fmt.Errorf("linux spec user: %v", err) } if err := setNamespaces(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux spec namespaces: %v", err) } if err := setCapabilities(&s, c); err != nil { return nil, fmt.Errorf("linux spec capabilities: %v", err) } if err := setSeccomp(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux seccomp: %v", err) } if err := daemon.setupIpcDirs(c); err != nil { return nil, err } ms, err := daemon.setupMounts(c) if err != nil { return nil, err } ms = append(ms, c.IpcMounts()...) ms = append(ms, c.TmpfsMounts()...) sort.Sort(mounts(ms)) if err := setMounts(daemon, &s, c, ms); err != nil { return nil, fmt.Errorf("linux mounts: %v", err) } for _, ns := range s.Linux.Namespaces { if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled { target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe")) if err != nil { return nil, err } s.Hooks = specs.Hooks{ Prestart: []specs.Hook{{ Path: target, // FIXME: cross-platform Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()}, }}, } } } if apparmor.IsEnabled() { appArmorProfile := "docker-default" if len(c.AppArmorProfile) > 0 { appArmorProfile = c.AppArmorProfile } else if c.HostConfig.Privileged { appArmorProfile = "unconfined" } s.Process.ApparmorProfile = appArmorProfile } s.Process.SelinuxLabel = c.GetProcessLabel() s.Process.NoNewPrivileges = c.NoNewPrivileges s.Linux.MountLabel = c.MountLabel return (*libcontainerd.Spec)(&s), nil }
func (daemon *Daemon) populateCommand(c *container.Container, env []string) error { en := &execdriver.Network{ Interface: nil, } parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2) switch parts[0] { case "none": case "default", "": // empty string to support existing containers if !c.Config.NetworkDisabled { en.Interface = &execdriver.NetworkInterface{ MacAddress: c.Config.MacAddress, Bridge: daemon.configStore.bridgeConfig.VirtualSwitchName, PortBindings: c.HostConfig.PortBindings, // TODO Windows. Include IPAddress. There already is a // property IPAddress on execDrive.CommonNetworkInterface, // but there is no CLI option in docker to pass through // an IPAddress on docker run. } } default: return derr.ErrorCodeInvalidNetworkMode.WithArgs(c.HostConfig.NetworkMode) } // TODO Windows. More resource controls to be implemented later. resources := &execdriver.Resources{ CommonResources: execdriver.CommonResources{ CPUShares: c.HostConfig.CPUShares, }, } processConfig := execdriver.ProcessConfig{ CommonProcessConfig: execdriver.CommonProcessConfig{ Entrypoint: c.Path, Arguments: c.Args, Tty: c.Config.Tty, }, ConsoleSize: c.HostConfig.ConsoleSize, } processConfig.Env = env var layerPaths []string img, err := daemon.imageStore.Get(c.ImageID) if err != nil { return derr.ErrorCodeGetGraph.WithArgs(c.ImageID, err) } if img.RootFS != nil && img.RootFS.Type == "layers+base" { max := len(img.RootFS.DiffIDs) for i := 0; i <= max; i++ { img.RootFS.DiffIDs = img.RootFS.DiffIDs[:i] path, err := layer.GetLayerPath(daemon.layerStore, img.RootFS.ChainID()) if err != nil { return derr.ErrorCodeGetLayer.WithArgs(err) } // Reverse order, expecting parent most first layerPaths = append([]string{path}, layerPaths...) } } m, err := c.RWLayer.Metadata() if err != nil { return derr.ErrorCodeGetLayerMetadata.WithArgs(err) } layerFolder := m["dir"] var hvPartition bool // Work out the isolation (whether it is a hypervisor partition) if c.HostConfig.Isolation.IsDefault() { // Not specified by caller. Take daemon default hvPartition = windows.DefaultIsolation.IsHyperV() } else { // Take value specified by caller hvPartition = c.HostConfig.Isolation.IsHyperV() } c.Command = &execdriver.Command{ CommonCommand: execdriver.CommonCommand{ ID: c.ID, Rootfs: c.BaseFS, WorkingDir: c.Config.WorkingDir, Network: en, MountLabel: c.GetMountLabel(), Resources: resources, ProcessConfig: processConfig, ProcessLabel: c.GetProcessLabel(), }, FirstStart: !c.HasBeenStartedBefore, LayerFolder: layerFolder, LayerPaths: layerPaths, Hostname: c.Config.Hostname, Isolation: string(c.HostConfig.Isolation), ArgsEscaped: c.Config.ArgsEscaped, HvPartition: hvPartition, } return nil }
func (daemon *Daemon) populateCommand(c *container.Container, env []string) error { en := &execdriver.Network{ Interface: nil, } var epList []string // Connect all the libnetwork allocated networks to the container if c.NetworkSettings != nil { for n := range c.NetworkSettings.Networks { sn, err := daemon.FindNetwork(n) if err != nil { continue } ep, err := c.GetEndpointInNetwork(sn) if err != nil { continue } data, err := ep.DriverInfo() if err != nil { continue } if data["hnsid"] != nil { epList = append(epList, data["hnsid"].(string)) } } } if daemon.netController == nil { parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2) switch parts[0] { case "none": case "default", "": // empty string to support existing containers if !c.Config.NetworkDisabled { en.Interface = &execdriver.NetworkInterface{ MacAddress: c.Config.MacAddress, Bridge: daemon.configStore.bridgeConfig.Iface, PortBindings: c.HostConfig.PortBindings, // TODO Windows. Include IPAddress. There already is a // property IPAddress on execDrive.CommonNetworkInterface, // but there is no CLI option in docker to pass through // an IPAddress on docker run. } } default: return fmt.Errorf("invalid network mode: %s", c.HostConfig.NetworkMode) } } // TODO Windows. More resource controls to be implemented later. resources := &execdriver.Resources{ CommonResources: execdriver.CommonResources{ CPUShares: c.HostConfig.CPUShares, }, } processConfig := execdriver.ProcessConfig{ CommonProcessConfig: execdriver.CommonProcessConfig{ Entrypoint: c.Path, Arguments: c.Args, Tty: c.Config.Tty, }, ConsoleSize: c.HostConfig.ConsoleSize, } processConfig.Env = env var layerPaths []string img, err := daemon.imageStore.Get(c.ImageID) if err != nil { return fmt.Errorf("Failed to graph.Get on ImageID %s - %s", c.ImageID, err) } if img.RootFS != nil && img.RootFS.Type == "layers+base" { max := len(img.RootFS.DiffIDs) for i := 0; i <= max; i++ { img.RootFS.DiffIDs = img.RootFS.DiffIDs[:i] path, err := layer.GetLayerPath(daemon.layerStore, img.RootFS.ChainID()) if err != nil { return fmt.Errorf("Failed to get layer path from graphdriver %s for ImageID %s - %s", daemon.layerStore, img.RootFS.ChainID(), err) } // Reverse order, expecting parent most first layerPaths = append([]string{path}, layerPaths...) } } m, err := c.RWLayer.Metadata() if err != nil { return fmt.Errorf("Failed to get layer metadata - %s", err) } layerFolder := m["dir"] var hvPartition bool // Work out the isolation (whether it is a hypervisor partition) if c.HostConfig.Isolation.IsDefault() { // Not specified by caller. Take daemon default hvPartition = windows.DefaultIsolation.IsHyperV() } else { // Take value specified by caller hvPartition = c.HostConfig.Isolation.IsHyperV() } c.Command = &execdriver.Command{ CommonCommand: execdriver.CommonCommand{ ID: c.ID, Rootfs: c.BaseFS, WorkingDir: c.Config.WorkingDir, Network: en, MountLabel: c.GetMountLabel(), Resources: resources, ProcessConfig: processConfig, ProcessLabel: c.GetProcessLabel(), }, FirstStart: !c.HasBeenStartedBefore, LayerFolder: layerFolder, LayerPaths: layerPaths, Hostname: c.Config.Hostname, Isolation: string(c.HostConfig.Isolation), ArgsEscaped: c.Config.ArgsEscaped, HvPartition: hvPartition, EpList: epList, } return nil }
func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) { s := oci.DefaultSpec() if err := daemon.populateCommonSpec(&s, c); err != nil { return nil, err } var cgroupsPath string scopePrefix := "docker" parent := "/docker" useSystemd := UsingSystemd(daemon.configStore) if useSystemd { parent = "system.slice" } if c.HostConfig.CgroupParent != "" { parent = c.HostConfig.CgroupParent } else if daemon.configStore.CgroupParent != "" { parent = daemon.configStore.CgroupParent } if useSystemd { cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath) } else { cgroupsPath = filepath.Join(parent, c.ID) } s.Linux.CgroupsPath = &cgroupsPath if err := setResources(&s, c.HostConfig.Resources); err != nil { return nil, fmt.Errorf("linux runtime spec resources: %v", err) } s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj s.Linux.Sysctl = c.HostConfig.Sysctls p := *s.Linux.CgroupsPath if useSystemd { initPath, err := cgroups.GetInitCgroupDir("cpu") if err != nil { return nil, err } p, _ = cgroups.GetThisCgroupDir("cpu") if err != nil { return nil, err } p = filepath.Join(initPath, p) } // Clean path to guard against things like ../../../BAD parentPath := filepath.Dir(p) if !filepath.IsAbs(parentPath) { parentPath = filepath.Clean("/" + parentPath) } if err := daemon.initCgroupsPath(parentPath); err != nil { return nil, fmt.Errorf("linux init cgroups path: %v", err) } if err := setDevices(&s, c); err != nil { return nil, fmt.Errorf("linux runtime spec devices: %v", err) } if err := setRlimits(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux runtime spec rlimits: %v", err) } if err := setUser(&s, c); err != nil { return nil, fmt.Errorf("linux spec user: %v", err) } if err := setNamespaces(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux spec namespaces: %v", err) } if err := setCapabilities(&s, c); err != nil { return nil, fmt.Errorf("linux spec capabilities: %v", err) } if err := setSeccomp(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux seccomp: %v", err) } if err := daemon.setupIpcDirs(c); err != nil { return nil, err } if err := daemon.setupSecretDir(c); err != nil { return nil, err } ms, err := daemon.setupMounts(c) if err != nil { return nil, err } ms = append(ms, c.IpcMounts()...) tmpfsMounts, err := c.TmpfsMounts() if err != nil { return nil, err } ms = append(ms, tmpfsMounts...) if m := c.SecretMount(); m != nil { ms = append(ms, *m) } sort.Sort(mounts(ms)) if err := setMounts(daemon, &s, c, ms); err != nil { return nil, fmt.Errorf("linux mounts: %v", err) } for _, ns := range s.Linux.Namespaces { if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled { target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe")) if err != nil { return nil, err } s.Hooks = specs.Hooks{ Prestart: []specs.Hook{{ Path: target, // FIXME: cross-platform Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()}, }}, } } } if apparmor.IsEnabled() { var appArmorProfile string if c.AppArmorProfile != "" { appArmorProfile = c.AppArmorProfile } else if c.HostConfig.Privileged { appArmorProfile = "unconfined" } else { appArmorProfile = "docker-default" } if appArmorProfile == "docker-default" { // Unattended upgrades and other fun services can unload AppArmor // profiles inadvertently. Since we cannot store our profile in // /etc/apparmor.d, nor can we practically add other ways of // telling the system to keep our profile loaded, in order to make // sure that we keep the default profile enabled we dynamically // reload it if necessary. if err := ensureDefaultAppArmorProfile(); err != nil { return nil, err } } s.Process.ApparmorProfile = appArmorProfile } s.Process.SelinuxLabel = c.GetProcessLabel() s.Process.NoNewPrivileges = c.NoNewPrivileges s.Linux.MountLabel = c.MountLabel return (*specs.Spec)(&s), nil }
//创建容器的namespace以及cgroups等相关。 func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, error) { s := oci.DefaultSpec() //populateCommand(container, env) 主要是为container的execdriver(最终启动容器的) //设置网络模式、设置namespace(pid,ipc,uts)等、资源(resources)限制等,并且设置 //在容器内执行的Command,Command中含有容器内进程的启动命令; if err := daemon.populateCommonSpec(&s, c); err != nil { return nil, err } var cgroupsPath string scopePrefix := "docker" parent := "/docker" useSystemd := UsingSystemd(daemon.configStore) if useSystemd { parent = "system.slice" } if c.HostConfig.CgroupParent != "" { parent = c.HostConfig.CgroupParent } else if daemon.configStore.CgroupParent != "" { parent = daemon.configStore.CgroupParent } //是否支持systemd? if useSystemd { cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath) } else { cgroupsPath = filepath.Join(parent, c.ID) } s.Linux.CgroupsPath = &cgroupsPath //设置一系列的参数 if err := setResources(&s, c.HostConfig.Resources); err != nil { return nil, fmt.Errorf("linux runtime spec resources: %v", err) } s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj if err := setDevices(&s, c); err != nil { return nil, fmt.Errorf("linux runtime spec devices: %v", err) } if err := setRlimits(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux runtime spec rlimits: %v", err) } if err := setUser(&s, c); err != nil { return nil, fmt.Errorf("linux spec user: %v", err) } //设置namespace if err := setNamespaces(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux spec namespaces: %v", err) } //设置cgroup。 if err := setCapabilities(&s, c); err != nil { return nil, fmt.Errorf("linux spec capabilities: %v", err) } //? if err := setSeccomp(daemon, &s, c); err != nil { return nil, fmt.Errorf("linux seccomp: %v", err) } //? if err := daemon.setupIpcDirs(c); err != nil { return nil, err } //container.setupMounts() 返回container的所有挂载点; mounts, err := daemon.setupMounts(c) if err != nil { return nil, err } mounts = append(mounts, c.IpcMounts()...) mounts = append(mounts, c.TmpfsMounts()...) //设置容器的所有挂载点。 if err := setMounts(daemon, &s, c, mounts); err != nil { return nil, fmt.Errorf("linux mounts: %v", err) } //和网络的设置有关,但是这一点究竟是干什么的呢? //设置到进程的钩子,通过进程/proc/XXX/exe的链接文件获取到。 for _, ns := range s.Linux.Namespaces { if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled { target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe")) if err != nil { return nil, err } s.Hooks = specs.Hooks{ //// Hooks are the commands run at various lifecycle events of the container. Prestart: []specs.Hook{{ Path: target, // FIXME: cross-platform Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()}, }}, } } } //apparmor相关配置 if apparmor.IsEnabled() { appArmorProfile := "docker-default" if len(c.AppArmorProfile) > 0 { appArmorProfile = c.AppArmorProfile } else if c.HostConfig.Privileged { appArmorProfile = "unconfined" } s.Process.ApparmorProfile = appArmorProfile } //设置容器的selinux,privilege,moutlabel等特性。 s.Process.SelinuxLabel = c.GetProcessLabel() s.Process.NoNewPrivileges = c.NoNewPrivileges s.Linux.MountLabel = c.MountLabel return (*libcontainerd.Spec)(&s), nil }