func TestValidateUsernamespace(t *testing.T) { config := &configs.Config{ Rootfs: "/var", Namespaces: configs.Namespaces( []configs.Namespace{ {Type: configs.NEWUSER}, }, ), } validator := validate.New() err := validator.Validate(config) if err != nil { t.Errorf("expected error to not occur %+v", err) } }
func TestValidateSecurityWithMaskPaths(t *testing.T) { config := &configs.Config{ Rootfs: "/var", MaskPaths: []string{"/proc/kcores"}, Namespaces: configs.Namespaces( []configs.Namespace{ {Type: configs.NEWNS}, }, ), } validator := validate.New() err := validator.Validate(config) if err != nil { t.Errorf("Expected error to not occur: %+v", err) } }
func TestValidateHostname(t *testing.T) { config := &configs.Config{ Rootfs: "/var", Hostname: "runc", Namespaces: configs.Namespaces( []configs.Namespace{ {Type: configs.NEWUTS}, }, ), } validator := validate.New() err := validator.Validate(config) if err != nil { t.Errorf("Expected error to not occur: %+v", err) } }
func TestValidateUsernamespace(t *testing.T) { if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { t.Skip("userns is unsupported") } config := &configs.Config{ Rootfs: "/var", Namespaces: configs.Namespaces( []configs.Namespace{ {Type: configs.NEWUSER}, }, ), } validator := validate.New() err := validator.Validate(config) if err != nil { t.Errorf("expected error to not occur %+v", err) } }
func TestValidateSysctlWithSameNs(t *testing.T) { config := &configs.Config{ Rootfs: "/var", Sysctl: map[string]string{"net.ctl": "ctl"}, Namespaces: configs.Namespaces( []configs.Namespace{ { Type: configs.NEWNET, Path: "/proc/self/ns/net", }, }, ), } validator := validate.New() err := validator.Validate(config) if err == nil { t.Error("Expected error to occur but it was nil") } }
// New returns the docker default configuration for libcontainer func New() *configs.Config { container := &configs.Config{ Capabilities: []string{ "CHOWN", "DAC_OVERRIDE", "FSETID", "FOWNER", "MKNOD", "NET_RAW", "SETGID", "SETUID", "SETFCAP", "SETPCAP", "NET_BIND_SERVICE", "SYS_CHROOT", "KILL", "AUDIT_WRITE", }, Namespaces: configs.Namespaces([]configs.Namespace{ {Type: "NEWNS"}, {Type: "NEWUTS"}, {Type: "NEWIPC"}, {Type: "NEWPID"}, {Type: "NEWNET"}, }), Cgroups: &configs.Cgroup{ Parent: "docker", AllowAllDevices: false, MemorySwappiness: -1, }, Mounts: []*configs.Mount{ { Source: "proc", Destination: "/proc", Device: "proc", Flags: defaultMountFlags, }, { Source: "tmpfs", Destination: "/dev", Device: "tmpfs", Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, Data: "mode=755", }, { Source: "devpts", Destination: "/dev/pts", Device: "devpts", Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", }, { Source: "sysfs", Destination: "/sys", Device: "sysfs", Flags: defaultMountFlags | syscall.MS_RDONLY, }, { Source: "cgroup", Destination: "/sys/fs/cgroup", Device: "cgroup", Flags: defaultMountFlags | syscall.MS_RDONLY, }, }, MaskPaths: []string{ "/proc/kcore", "/proc/latency_stats", "/proc/timer_stats", }, ReadonlyPaths: []string{ "/proc/asound", "/proc/bus", "/proc/fs", "/proc/irq", "/proc/sys", "/proc/sysrq-trigger", }, } if apparmor.IsEnabled() { container.AppArmorProfile = "docker-default" } return container }
func TestContainerState(t *testing.T) { if testing.Short() { return } root, err := newTestRoot() if err != nil { t.Fatal(err) } defer os.RemoveAll(root) rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) l, err := os.Readlink("/proc/1/ns/ipc") if err != nil { t.Fatal(err) } config := newTemplateConfig(rootfs) config.Namespaces = configs.Namespaces([]configs.Namespace{ {Type: configs.NEWNS}, {Type: configs.NEWUTS}, // host for IPC //{Type: configs.NEWIPC}, {Type: configs.NEWPID}, {Type: configs.NEWNET}, }) container, err := factory.Create("test", config) if err != nil { t.Fatal(err) } defer container.Destroy() stdinR, stdinW, err := os.Pipe() if err != nil { t.Fatal(err) } p := &libcontainer.Process{ Args: []string{"cat"}, Env: standardEnvironment, Stdin: stdinR, } err = container.Start(p) if err != nil { t.Fatal(err) } stdinR.Close() defer stdinW.Close() st, err := container.State() if err != nil { t.Fatal(err) } l1, err := os.Readlink(st.NamespacePaths[configs.NEWIPC]) if err != nil { t.Fatal(err) } if l1 != l { t.Fatal("Container using non-host ipc namespace") } stdinW.Close() waitProcess(p, t) }
func (l *LibcontainerBackend) Run(job *host.Job, runConfig *RunConfig, rateLimitBucket *RateLimitBucket) (err error) { log := l.logger.New("fn", "run", "job.id", job.ID) // if the job has been stopped, just return if l.state.GetJob(job.ID).ForceStop { log.Info("skipping start of stopped job") return nil } log.Info("starting job", "job.artifact.uri", job.ImageArtifact.URI, "job.args", job.Config.Args) defer func() { if err != nil { l.state.SetStatusFailed(job.ID, err) } }() if job.Partition == "" { job.Partition = defaultPartition } if _, ok := l.partitionCGroups[job.Partition]; !ok { return fmt.Errorf("host: invalid job partition %q", job.Partition) } wait := func(ch chan struct{}) { if rateLimitBucket != nil { // unblock the rate limiter whilst waiting rateLimitBucket.Put() defer rateLimitBucket.Wait() } <-ch } if !job.Config.HostNetwork { wait(l.networkConfigured) } if _, ok := job.Config.Env["DISCOVERD"]; !ok { wait(l.discoverdConfigured) } if runConfig == nil { runConfig = &RunConfig{} } container := &Container{ ID: job.ID, l: l, job: job, done: make(chan struct{}), } if !job.Config.HostNetwork { container.IP, err = l.ipalloc.RequestIP(l.bridgeNet, runConfig.IP) if err != nil { log.Error("error requesting ip", "err", err) return err } log.Info("obtained ip", "network", l.bridgeNet.String(), "ip", container.IP.String()) l.state.SetContainerIP(job.ID, container.IP) } defer func() { if err != nil { go container.cleanup() } }() log.Info("pulling image") artifactURI, err := l.resolveDiscoverdURI(job.ImageArtifact.URI) if err != nil { log.Error("error resolving artifact URI", "err", err) return err } // TODO(lmars): stream pull progress (maybe to the app log?) imageID, err := l.pinkerton.PullDocker(artifactURI, ioutil.Discard) if err != nil { log.Error("error pulling image", "err", err) return err } log.Info("reading image config") imageConfig, err := readDockerImageConfig(imageID) if err != nil { log.Error("error reading image config", "err", err) return err } log.Info("checking out image") var rootPath string // creating an AUFS mount can fail intermittently with EINVAL, so try a // few times (see https://github.com/flynn/flynn/issues/2044) for start := time.Now(); time.Since(start) < time.Second; time.Sleep(50 * time.Millisecond) { rootPath, err = l.pinkerton.Checkout(job.ID, imageID) if err == nil || !strings.HasSuffix(err.Error(), "invalid argument") { break } } if err != nil { log.Error("error checking out image", "err", err) return err } container.RootPath = rootPath config := &configs.Config{ Rootfs: rootPath, Capabilities: defaultCapabilities, Namespaces: configs.Namespaces([]configs.Namespace{ {Type: configs.NEWNS}, {Type: configs.NEWUTS}, {Type: configs.NEWIPC}, {Type: configs.NEWPID}, }), Cgroups: &configs.Cgroup{ Path: filepath.Join("/flynn", job.Partition, job.ID), Resources: &configs.Resources{ AllowedDevices: configs.DefaultAllowedDevices, Memory: defaultMemory, }, }, MaskPaths: []string{ "/proc/kcore", }, ReadonlyPaths: []string{ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", }, Devices: configs.DefaultAutoCreatedDevices, Mounts: []*configs.Mount{ { Source: "proc", Destination: "/proc", Device: "proc", Flags: defaultMountFlags, }, { Source: "sysfs", Destination: "/sys", Device: "sysfs", Flags: defaultMountFlags | syscall.MS_RDONLY, }, { Source: "tmpfs", Destination: "/dev", Device: "tmpfs", Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, Data: "mode=755", }, { Source: "devpts", Destination: "/dev/pts", Device: "devpts", Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", }, { Device: "tmpfs", Source: "shm", Destination: "/dev/shm", Data: "mode=1777,size=65536k", Flags: defaultMountFlags, }, { Destination: "/sys/fs/cgroup", Device: "cgroup", Flags: defaultMountFlags | syscall.MS_RDONLY, }, }, } if spec, ok := job.Resources[resource.TypeMaxFD]; ok && spec.Limit != nil && spec.Request != nil { log.Info(fmt.Sprintf("setting max fd limit to %d / %d", *spec.Request, *spec.Limit)) config.Rlimits = append(config.Rlimits, configs.Rlimit{ Type: syscall.RLIMIT_NOFILE, Hard: uint64(*spec.Limit), Soft: uint64(*spec.Request), }) } if spec, ok := job.Resources[resource.TypeMaxProcs]; ok && spec.Limit != nil && spec.Request != nil { log.Info(fmt.Sprintf("setting max processes limit to %d / %d", *spec.Request, *spec.Limit)) config.Rlimits = append(config.Rlimits, configs.Rlimit{ Type: RLIMIT_NPROC, Hard: uint64(*spec.Limit), Soft: uint64(*spec.Request), }) } log.Info("mounting container directories and files") jobIDParts := strings.SplitN(job.ID, "-", 2) var hostname string if len(jobIDParts) == 1 { hostname = jobIDParts[0] } else { hostname = jobIDParts[1] } if len(hostname) > 64 { hostname = hostname[:64] } if err := os.MkdirAll(filepath.Join(rootPath, "etc"), 0755); err != nil { log.Error("error creating /etc in container root", "err", err) return err } if err := writeHostname(filepath.Join(rootPath, "etc/hosts"), hostname); err != nil { log.Error("error writing hosts file", "err", err) return err } if err := os.MkdirAll(filepath.Join(rootPath, ".container-shared"), 0700); err != nil { log.Error("error createing .container-shared", "err", err) return err } addBindMount(config, l.InitPath, "/.containerinit", false) addBindMount(config, l.resolvConf, "/etc/resolv.conf", false) for _, m := range job.Config.Mounts { if m.Target == "" { return errors.New("host: invalid empty mount target") } addBindMount(config, m.Target, m.Location, m.Writeable) } // apply volumes for _, v := range job.Config.Volumes { vol := l.vman.GetVolume(v.VolumeID) if vol == nil { err := fmt.Errorf("job %s required volume %s, but that volume does not exist", job.ID, v.VolumeID) log.Error("missing required volume", "volumeID", v.VolumeID, "err", err) return err } addBindMount(config, vol.Location(), v.Target, v.Writeable) } // mutating job state, take state write lock l.state.mtx.Lock() if job.Config.Env == nil { job.Config.Env = make(map[string]string) } for i, p := range job.Config.Ports { if p.Proto != "tcp" && p.Proto != "udp" { err := fmt.Errorf("unknown port proto %q", p.Proto) log.Error("error allocating port", "proto", p.Proto, "err", err) return err } if p.Port == 0 { job.Config.Ports[i].Port = 5000 + i } if i == 0 { job.Config.Env["PORT"] = strconv.Itoa(job.Config.Ports[i].Port) } job.Config.Env[fmt.Sprintf("PORT_%d", i)] = strconv.Itoa(job.Config.Ports[i].Port) } if !job.Config.HostNetwork { job.Config.Env["EXTERNAL_IP"] = container.IP.String() } // release the write lock, we won't mutate global structures from here on out l.state.mtx.Unlock() initConfig := &containerinit.Config{ Args: job.Config.Args, TTY: job.Config.TTY, OpenStdin: job.Config.Stdin, WorkDir: job.Config.WorkingDir, Resources: job.Resources, FileArtifacts: job.FileArtifacts, } if !job.Config.HostNetwork { initConfig.IP = container.IP.String() + "/24" initConfig.Gateway = l.bridgeAddr.String() } if initConfig.WorkDir == "" { initConfig.WorkDir = imageConfig.WorkingDir } if job.Config.Uid > 0 { initConfig.User = strconv.Itoa(job.Config.Uid) } else if imageConfig.User != "" { // TODO: check and lookup user from image config } if len(job.Config.Args) == 0 { initConfig.Args = append(imageConfig.Entrypoint, imageConfig.Cmd...) } for _, port := range job.Config.Ports { initConfig.Ports = append(initConfig.Ports, port) } log.Info("writing config") l.envMtx.RLock() err = writeContainerConfig(filepath.Join(rootPath, ".containerconfig"), initConfig, map[string]string{ "PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "TERM": "xterm", "HOME": "/", }, l.defaultEnv, job.Config.Env, map[string]string{ "HOSTNAME": hostname, }, ) l.envMtx.RUnlock() if err != nil { log.Error("error writing config", "err", err) return err } if job.Config.HostNetwork { // allow host network jobs to configure the network config.Capabilities = append(config.Capabilities, "CAP_NET_ADMIN") } else { ifaceName, err := netutils.GenerateIfaceName("veth", 4) if err != nil { return err } config.Hostname = hostname config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWNET}) config.Networks = []*configs.Network{ { Type: "loopback", Address: "127.0.0.1/0", Gateway: "localhost", }, { Type: "veth", Name: "eth0", Bridge: l.bridgeName, Address: initConfig.IP, Gateway: initConfig.Gateway, Mtu: 1500, HostInterfaceName: ifaceName, }, } } if spec, ok := job.Resources[resource.TypeMemory]; ok && spec.Limit != nil { config.Cgroups.Resources.Memory = *spec.Limit } if spec, ok := job.Resources[resource.TypeCPU]; ok && spec.Limit != nil { config.Cgroups.Resources.CpuShares = milliCPUToShares(*spec.Limit) } c, err := l.factory.Create(job.ID, config) if err != nil { return err } process := &libcontainer.Process{ Args: []string{"/.containerinit", job.ID}, User: "******", } if err := c.Run(process); err != nil { c.Destroy() return err } // TODO: detach? an update will detach all container anyway go process.Wait() container.container = c // TODO: still necessary? l.state.SetContainerID(job.ID, job.ID) go container.watch(nil, nil) log.Info("job started") return nil }
// newTemplateConfig returns a base template for running a container // // it uses a network strategy of just setting a loopback interface // and the default setup for devices func newTemplateConfig(rootfs string) *configs.Config { return &configs.Config{ Rootfs: rootfs, Capabilities: []string{ "CHOWN", "DAC_OVERRIDE", "FSETID", "FOWNER", "MKNOD", "NET_RAW", "SETGID", "SETUID", "SETFCAP", "SETPCAP", "NET_BIND_SERVICE", "SYS_CHROOT", "KILL", "AUDIT_WRITE", }, Namespaces: configs.Namespaces([]configs.Namespace{ {Type: configs.NEWNS}, {Type: configs.NEWUTS}, {Type: configs.NEWIPC}, {Type: configs.NEWPID}, {Type: configs.NEWNET}, }), Cgroups: &configs.Cgroup{ Name: "test", Parent: "integration", AllowAllDevices: false, AllowedDevices: configs.DefaultAllowedDevices, }, MaskPaths: []string{ "/proc/kcore", }, ReadonlyPaths: []string{ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", }, Devices: configs.DefaultAutoCreatedDevices, Hostname: "integration", Mounts: []*configs.Mount{ { Source: "proc", Destination: "/proc", Device: "proc", Flags: defaultMountFlags, }, { Source: "tmpfs", Destination: "/dev", Device: "tmpfs", Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, Data: "mode=755", }, { Source: "devpts", Destination: "/dev/pts", Device: "devpts", Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", }, { Device: "tmpfs", Source: "shm", Destination: "/dev/shm", Data: "mode=1777,size=65536k", Flags: defaultMountFlags, }, { Source: "mqueue", Destination: "/dev/mqueue", Device: "mqueue", Flags: defaultMountFlags, }, { Source: "sysfs", Destination: "/sys", Device: "sysfs", Flags: defaultMountFlags | syscall.MS_RDONLY, }, }, Networks: []*configs.Network{ { Type: "loopback", Address: "127.0.0.1/0", Gateway: "localhost", }, }, Rlimits: []configs.Rlimit{ { Type: syscall.RLIMIT_NOFILE, Hard: uint64(1025), Soft: uint64(1025), }, }, } }
func (l *LibcontainerBackend) Run(job *host.Job, runConfig *RunConfig, rateLimitBucket *RateLimitBucket) (err error) { log := l.Logger.New("fn", "run", "job.id", job.ID) // if the job has been stopped, just return if l.State.GetJob(job.ID).ForceStop { log.Info("skipping start of stopped job") return nil } log.Info("starting job", "job.args", job.Config.Args) defer func() { if err != nil { l.State.SetStatusFailed(job.ID, err) } }() if job.Partition == "" { job.Partition = defaultPartition } if _, ok := l.PartitionCGroups[job.Partition]; !ok { return fmt.Errorf("host: invalid job partition %q", job.Partition) } wait := func(ch chan struct{}) { if rateLimitBucket != nil { // unblock the rate limiter whilst waiting rateLimitBucket.Put() defer rateLimitBucket.Wait() } <-ch } if !job.Config.HostNetwork { wait(l.networkConfigured) } if _, ok := job.Config.Env["DISCOVERD"]; !ok { wait(l.discoverdConfigured) } if runConfig == nil { runConfig = &RunConfig{} } container := &Container{ ID: job.ID, MuxConfig: &logmux.Config{ AppID: job.Metadata["flynn-controller.app"], HostID: l.State.id, JobType: job.Metadata["flynn-controller.type"], JobID: job.ID, }, l: l, job: job, done: make(chan struct{}), } if !job.Config.HostNetwork { container.IP, err = l.ipalloc.RequestIP(l.bridgeNet, runConfig.IP) if err != nil { log.Error("error requesting ip", "err", err) return err } log.Info("obtained ip", "network", l.bridgeNet.String(), "ip", container.IP.String()) l.State.SetContainerIP(job.ID, container.IP) } defer func() { if err != nil { go container.cleanup() } }() log.Info("setting up rootfs") rootPath := filepath.Join("/var/lib/flynn/image/mnt", job.ID) tmpPath := filepath.Join("/var/lib/flynn/image/tmp", job.ID) for _, path := range []string{rootPath, tmpPath} { if err := os.MkdirAll(path, 0755); err != nil { log.Error("error setting up rootfs", "err", err) return err } } rootMount, err := l.rootOverlayMount(job) if err != nil { log.Error("error setting up rootfs", "err", err) return err } container.RootPath = rootPath container.TmpPath = tmpPath config := &configs.Config{ Rootfs: rootPath, Capabilities: defaultCapabilities, Namespaces: configs.Namespaces([]configs.Namespace{ {Type: configs.NEWNS}, {Type: configs.NEWUTS}, {Type: configs.NEWIPC}, {Type: configs.NEWPID}, }), Cgroups: &configs.Cgroup{ Path: filepath.Join("/flynn", job.Partition, job.ID), Resources: &configs.Resources{ AllowedDevices: configs.DefaultAllowedDevices, Memory: defaultMemory, }, }, MaskPaths: []string{ "/proc/kcore", }, ReadonlyPaths: []string{ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", }, Devices: configs.DefaultAutoCreatedDevices, Mounts: append([]*configs.Mount{rootMount}, []*configs.Mount{ { Source: "proc", Destination: "/proc", Device: "proc", Flags: defaultMountFlags, }, { Source: "sysfs", Destination: "/sys", Device: "sysfs", Flags: defaultMountFlags | syscall.MS_RDONLY, }, { Source: "tmpfs", Destination: "/dev", Device: "tmpfs", Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, Data: "mode=755", }, { Source: "devpts", Destination: "/dev/pts", Device: "devpts", Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", }, { Device: "tmpfs", Source: "shm", Destination: "/dev/shm", Data: "mode=1777,size=65536k", Flags: defaultMountFlags, }, { Destination: "/sys/fs/cgroup", Device: "cgroup", Flags: defaultMountFlags | syscall.MS_RDONLY, }, }...), } if spec, ok := job.Resources[resource.TypeMaxFD]; ok && spec.Limit != nil && spec.Request != nil { log.Info(fmt.Sprintf("setting max fd limit to %d / %d", *spec.Request, *spec.Limit)) config.Rlimits = append(config.Rlimits, configs.Rlimit{ Type: syscall.RLIMIT_NOFILE, Hard: uint64(*spec.Limit), Soft: uint64(*spec.Request), }) } if spec, ok := job.Resources[resource.TypeMaxProcs]; ok && spec.Limit != nil && spec.Request != nil { log.Info(fmt.Sprintf("setting max processes limit to %d / %d", *spec.Request, *spec.Limit)) config.Rlimits = append(config.Rlimits, configs.Rlimit{ Type: RLIMIT_NPROC, Hard: uint64(*spec.Limit), Soft: uint64(*spec.Request), }) } log.Info("mounting container directories and files") jobIDParts := strings.SplitN(job.ID, "-", 2) var hostname string if len(jobIDParts) == 1 { hostname = jobIDParts[0] } else { hostname = jobIDParts[1] } if len(hostname) > 64 { hostname = hostname[:64] } if err := os.MkdirAll(filepath.Join(tmpPath, "etc"), 0755); err != nil { log.Error("error creating container /etc", "err", err) return err } etcHosts := filepath.Join(tmpPath, "etc/hosts") if err := writeHostname(etcHosts, hostname); err != nil { log.Error("error writing hosts file", "err", err) return err } sharedDir := filepath.Join(tmpPath, ".container-shared") if err := os.MkdirAll(sharedDir, 0700); err != nil { log.Error("error creating .container-shared", "err", err) return err } config.Mounts = append(config.Mounts, bindMount(l.InitPath, "/.containerinit", false), bindMount(l.resolvConf, "/etc/resolv.conf", false), bindMount(etcHosts, "/etc/hosts", true), bindMount(sharedDir, "/.container-shared", true), ) for _, m := range job.Config.Mounts { if m.Target == "" { return errors.New("host: invalid empty mount target") } config.Mounts = append(config.Mounts, bindMount(m.Target, m.Location, m.Writeable)) } // apply volumes for _, v := range job.Config.Volumes { vol := l.VolManager.GetVolume(v.VolumeID) if vol == nil { err := fmt.Errorf("job %s required volume %s, but that volume does not exist", job.ID, v.VolumeID) log.Error("missing required volume", "volumeID", v.VolumeID, "err", err) return err } config.Mounts = append(config.Mounts, bindMount(vol.Location(), v.Target, v.Writeable)) } // mutating job state, take state write lock l.State.mtx.Lock() if job.Config.Env == nil { job.Config.Env = make(map[string]string) } for i, p := range job.Config.Ports { if p.Proto != "tcp" && p.Proto != "udp" { err := fmt.Errorf("unknown port proto %q", p.Proto) log.Error("error allocating port", "proto", p.Proto, "err", err) return err } if p.Port == 0 { job.Config.Ports[i].Port = 5000 + i } if i == 0 { job.Config.Env["PORT"] = strconv.Itoa(job.Config.Ports[i].Port) } job.Config.Env[fmt.Sprintf("PORT_%d", i)] = strconv.Itoa(job.Config.Ports[i].Port) } if !job.Config.HostNetwork { job.Config.Env["EXTERNAL_IP"] = container.IP.String() } // release the write lock, we won't mutate global structures from here on out l.State.mtx.Unlock() initConfig := &containerinit.Config{ Args: job.Config.Args, TTY: job.Config.TTY, OpenStdin: job.Config.Stdin, WorkDir: job.Config.WorkingDir, Uid: job.Config.Uid, Gid: job.Config.Gid, Resources: job.Resources, LogLevel: l.InitLogLevel, } if !job.Config.HostNetwork { initConfig.IP = container.IP.String() + "/24" initConfig.Gateway = l.bridgeAddr.String() } for _, port := range job.Config.Ports { initConfig.Ports = append(initConfig.Ports, port) } log.Info("writing config") configPath := filepath.Join(tmpPath, ".containerconfig") l.envMtx.RLock() err = writeContainerConfig(configPath, initConfig, map[string]string{ "PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "TERM": "xterm", "HOME": "/", }, l.defaultEnv, job.Config.Env, map[string]string{ "HOSTNAME": hostname, }, ) l.envMtx.RUnlock() if err != nil { log.Error("error writing config", "err", err) return err } config.Mounts = append(config.Mounts, bindMount(configPath, "/.containerconfig", false)) if job.Config.HostNetwork { // allow host network jobs to configure the network config.Capabilities = append(config.Capabilities, "CAP_NET_ADMIN") } else { ifaceName, err := netutils.GenerateIfaceName("veth", 4) if err != nil { return err } config.Hostname = hostname config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWNET}) config.Networks = []*configs.Network{ { Type: "loopback", Address: "127.0.0.1/0", Gateway: "localhost", }, { Type: "veth", Name: "eth0", Bridge: l.BridgeName, Address: initConfig.IP, Gateway: initConfig.Gateway, Mtu: 1500, HostInterfaceName: ifaceName, }, } } if spec, ok := job.Resources[resource.TypeMemory]; ok && spec.Limit != nil { config.Cgroups.Resources.Memory = *spec.Limit } if spec, ok := job.Resources[resource.TypeCPU]; ok && spec.Limit != nil { config.Cgroups.Resources.CpuShares = milliCPUToShares(*spec.Limit) } c, err := l.factory.Create(job.ID, config) if err != nil { return err } process := &libcontainer.Process{ Args: []string{"/.containerinit", job.ID}, User: "******", } if err := c.Run(process); err != nil { c.Destroy() return err } go process.Wait() container.container = c go container.watch(nil, nil) log.Info("job started") return nil }
func main() { rootfs := os.Getenv("ROOTFS") factory, err := libcontainer.New(rootfs, libcontainer.Cgroupfs) if err != nil { fmt.Println(err) } defaultMountFlags := syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV config := &configs.Config{ Rootfs: rootfs, Capabilities: []string{ "CAP_CHOWN", "CAP_DAC_OVERRIDE", "CAP_FSETID", "CAP_FOWNER", "CAP_MKNOD", "CAP_NET_RAW", "CAP_SETGID", "CAP_SETUID", "CAP_SETFCAP", "CAP_SETPCAP", "CAP_NET_BIND_SERVICE", "CAP_SYS_CHROOT", "CAP_KILL", "CAP_AUDIT_WRITE", }, Namespaces: configs.Namespaces([]configs.Namespace{ {Type: configs.NEWNS}, {Type: configs.NEWUTS}, {Type: configs.NEWIPC}, {Type: configs.NEWPID}, {Type: configs.NEWNET}, }), Cgroups: &configs.Cgroup{ Name: "test-container", Parent: "system", Resources: &configs.Resources{ MemorySwappiness: -1, AllowAllDevices: false, AllowedDevices: configs.DefaultAllowedDevices, }, }, MaskPaths: []string{ "/proc/kcore", }, ReadonlyPaths: []string{ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", }, Devices: configs.DefaultAutoCreatedDevices, Hostname: "testing", Mounts: []*configs.Mount{ { Source: "proc", Destination: "/proc", Device: "proc", Flags: defaultMountFlags, }, { Source: "tmpfs", Destination: "/dev", Device: "tmpfs", Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, Data: "mode=755", }, { Source: "devpts", Destination: "/dev/pts", Device: "devpts", Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", }, { Device: "tmpfs", Source: "shm", Destination: "/dev/shm", Data: "mode=1777,size=65536k", Flags: defaultMountFlags, }, { Source: "mqueue", Destination: "/dev/mqueue", Device: "mqueue", Flags: defaultMountFlags, }, { Source: "sysfs", Destination: "/sys", Device: "sysfs", Flags: defaultMountFlags | syscall.MS_RDONLY, }, }, Networks: []*configs.Network{ { Type: "loopback", Address: "127.0.0.1/0", Gateway: "localhost", }, }, Rlimits: []configs.Rlimit{ { Type: syscall.RLIMIT_NOFILE, Hard: uint64(1025), Soft: uint64(1025), }, }, } container, err := factory.Create("abcde", config) if err != nil { fmt.Println(err) } process := &libcontainer.Process{ Args: []string{"/bin/sh"}, Env: []string{"PATH=/bin"}, Stdin: os.Stdin, Stdout: os.Stdout, Stderr: os.Stderr, } err = container.Start(process) if err != nil { fmt.Println(err) } // wait for the process to finish. status, err := process.Wait() if err != nil { fmt.Println(err) } fmt.Println(status) container.Destroy() fmt.Println("done") }