Example #1
0
func TestValidateUsernamespace(t *testing.T) {
	config := &configs.Config{
		Rootfs: "/var",
		Namespaces: configs.Namespaces(
			[]configs.Namespace{
				{Type: configs.NEWUSER},
			},
		),
	}

	validator := validate.New()
	err := validator.Validate(config)
	if err != nil {
		t.Errorf("expected error to not occur %+v", err)
	}
}
Example #2
0
func TestValidateSecurityWithMaskPaths(t *testing.T) {
	config := &configs.Config{
		Rootfs:    "/var",
		MaskPaths: []string{"/proc/kcores"},
		Namespaces: configs.Namespaces(
			[]configs.Namespace{
				{Type: configs.NEWNS},
			},
		),
	}

	validator := validate.New()
	err := validator.Validate(config)
	if err != nil {
		t.Errorf("Expected error to not occur: %+v", err)
	}
}
Example #3
0
func TestValidateHostname(t *testing.T) {
	config := &configs.Config{
		Rootfs:   "/var",
		Hostname: "runc",
		Namespaces: configs.Namespaces(
			[]configs.Namespace{
				{Type: configs.NEWUTS},
			},
		),
	}

	validator := validate.New()
	err := validator.Validate(config)
	if err != nil {
		t.Errorf("Expected error to not occur: %+v", err)
	}
}
Example #4
0
func TestValidateUsernamespace(t *testing.T) {
	if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
		t.Skip("userns is unsupported")
	}
	config := &configs.Config{
		Rootfs: "/var",
		Namespaces: configs.Namespaces(
			[]configs.Namespace{
				{Type: configs.NEWUSER},
			},
		),
	}

	validator := validate.New()
	err := validator.Validate(config)
	if err != nil {
		t.Errorf("expected error to not occur %+v", err)
	}
}
Example #5
0
func TestValidateSysctlWithSameNs(t *testing.T) {
	config := &configs.Config{
		Rootfs: "/var",
		Sysctl: map[string]string{"net.ctl": "ctl"},
		Namespaces: configs.Namespaces(
			[]configs.Namespace{
				{
					Type: configs.NEWNET,
					Path: "/proc/self/ns/net",
				},
			},
		),
	}

	validator := validate.New()
	err := validator.Validate(config)
	if err == nil {
		t.Error("Expected error to occur but it was nil")
	}
}
Example #6
0
// New returns the docker default configuration for libcontainer
func New() *configs.Config {
	container := &configs.Config{
		Capabilities: []string{
			"CHOWN",
			"DAC_OVERRIDE",
			"FSETID",
			"FOWNER",
			"MKNOD",
			"NET_RAW",
			"SETGID",
			"SETUID",
			"SETFCAP",
			"SETPCAP",
			"NET_BIND_SERVICE",
			"SYS_CHROOT",
			"KILL",
			"AUDIT_WRITE",
		},
		Namespaces: configs.Namespaces([]configs.Namespace{
			{Type: "NEWNS"},
			{Type: "NEWUTS"},
			{Type: "NEWIPC"},
			{Type: "NEWPID"},
			{Type: "NEWNET"},
		}),
		Cgroups: &configs.Cgroup{
			Parent:           "docker",
			AllowAllDevices:  false,
			MemorySwappiness: -1,
		},
		Mounts: []*configs.Mount{
			{
				Source:      "proc",
				Destination: "/proc",
				Device:      "proc",
				Flags:       defaultMountFlags,
			},
			{
				Source:      "tmpfs",
				Destination: "/dev",
				Device:      "tmpfs",
				Flags:       syscall.MS_NOSUID | syscall.MS_STRICTATIME,
				Data:        "mode=755",
			},
			{
				Source:      "devpts",
				Destination: "/dev/pts",
				Device:      "devpts",
				Flags:       syscall.MS_NOSUID | syscall.MS_NOEXEC,
				Data:        "newinstance,ptmxmode=0666,mode=0620,gid=5",
			},
			{
				Source:      "sysfs",
				Destination: "/sys",
				Device:      "sysfs",
				Flags:       defaultMountFlags | syscall.MS_RDONLY,
			},
			{
				Source:      "cgroup",
				Destination: "/sys/fs/cgroup",
				Device:      "cgroup",
				Flags:       defaultMountFlags | syscall.MS_RDONLY,
			},
		},
		MaskPaths: []string{
			"/proc/kcore",
			"/proc/latency_stats",
			"/proc/timer_stats",
		},
		ReadonlyPaths: []string{
			"/proc/asound",
			"/proc/bus",
			"/proc/fs",
			"/proc/irq",
			"/proc/sys",
			"/proc/sysrq-trigger",
		},
	}

	if apparmor.IsEnabled() {
		container.AppArmorProfile = "docker-default"
	}

	return container
}
Example #7
0
func TestContainerState(t *testing.T) {
	if testing.Short() {
		return
	}
	root, err := newTestRoot()
	if err != nil {
		t.Fatal(err)
	}
	defer os.RemoveAll(root)

	rootfs, err := newRootfs()
	if err != nil {
		t.Fatal(err)
	}
	defer remove(rootfs)

	l, err := os.Readlink("/proc/1/ns/ipc")
	if err != nil {
		t.Fatal(err)
	}

	config := newTemplateConfig(rootfs)
	config.Namespaces = configs.Namespaces([]configs.Namespace{
		{Type: configs.NEWNS},
		{Type: configs.NEWUTS},
		// host for IPC
		//{Type: configs.NEWIPC},
		{Type: configs.NEWPID},
		{Type: configs.NEWNET},
	})

	container, err := factory.Create("test", config)
	if err != nil {
		t.Fatal(err)
	}
	defer container.Destroy()

	stdinR, stdinW, err := os.Pipe()
	if err != nil {
		t.Fatal(err)
	}
	p := &libcontainer.Process{
		Args:  []string{"cat"},
		Env:   standardEnvironment,
		Stdin: stdinR,
	}
	err = container.Start(p)
	if err != nil {
		t.Fatal(err)
	}
	stdinR.Close()
	defer stdinW.Close()

	st, err := container.State()
	if err != nil {
		t.Fatal(err)
	}

	l1, err := os.Readlink(st.NamespacePaths[configs.NEWIPC])
	if err != nil {
		t.Fatal(err)
	}
	if l1 != l {
		t.Fatal("Container using non-host ipc namespace")
	}
	stdinW.Close()
	waitProcess(p, t)
}
Example #8
0
func (l *LibcontainerBackend) Run(job *host.Job, runConfig *RunConfig, rateLimitBucket *RateLimitBucket) (err error) {
	log := l.logger.New("fn", "run", "job.id", job.ID)

	// if the job has been stopped, just return
	if l.state.GetJob(job.ID).ForceStop {
		log.Info("skipping start of stopped job")
		return nil
	}

	log.Info("starting job", "job.artifact.uri", job.ImageArtifact.URI, "job.args", job.Config.Args)

	defer func() {
		if err != nil {
			l.state.SetStatusFailed(job.ID, err)
		}
	}()

	if job.Partition == "" {
		job.Partition = defaultPartition
	}
	if _, ok := l.partitionCGroups[job.Partition]; !ok {
		return fmt.Errorf("host: invalid job partition %q", job.Partition)
	}

	wait := func(ch chan struct{}) {
		if rateLimitBucket != nil {
			// unblock the rate limiter whilst waiting
			rateLimitBucket.Put()
			defer rateLimitBucket.Wait()
		}
		<-ch
	}
	if !job.Config.HostNetwork {
		wait(l.networkConfigured)
	}
	if _, ok := job.Config.Env["DISCOVERD"]; !ok {
		wait(l.discoverdConfigured)
	}

	if runConfig == nil {
		runConfig = &RunConfig{}
	}
	container := &Container{
		ID:   job.ID,
		l:    l,
		job:  job,
		done: make(chan struct{}),
	}
	if !job.Config.HostNetwork {
		container.IP, err = l.ipalloc.RequestIP(l.bridgeNet, runConfig.IP)
		if err != nil {
			log.Error("error requesting ip", "err", err)
			return err
		}
		log.Info("obtained ip", "network", l.bridgeNet.String(), "ip", container.IP.String())
		l.state.SetContainerIP(job.ID, container.IP)
	}
	defer func() {
		if err != nil {
			go container.cleanup()
		}
	}()

	log.Info("pulling image")
	artifactURI, err := l.resolveDiscoverdURI(job.ImageArtifact.URI)
	if err != nil {
		log.Error("error resolving artifact URI", "err", err)
		return err
	}
	// TODO(lmars): stream pull progress (maybe to the app log?)
	imageID, err := l.pinkerton.PullDocker(artifactURI, ioutil.Discard)
	if err != nil {
		log.Error("error pulling image", "err", err)
		return err
	}

	log.Info("reading image config")
	imageConfig, err := readDockerImageConfig(imageID)
	if err != nil {
		log.Error("error reading image config", "err", err)
		return err
	}

	log.Info("checking out image")
	var rootPath string
	// creating an AUFS mount can fail intermittently with EINVAL, so try a
	// few times (see https://github.com/flynn/flynn/issues/2044)
	for start := time.Now(); time.Since(start) < time.Second; time.Sleep(50 * time.Millisecond) {
		rootPath, err = l.pinkerton.Checkout(job.ID, imageID)
		if err == nil || !strings.HasSuffix(err.Error(), "invalid argument") {
			break
		}
	}
	if err != nil {
		log.Error("error checking out image", "err", err)
		return err
	}
	container.RootPath = rootPath

	config := &configs.Config{
		Rootfs:       rootPath,
		Capabilities: defaultCapabilities,
		Namespaces: configs.Namespaces([]configs.Namespace{
			{Type: configs.NEWNS},
			{Type: configs.NEWUTS},
			{Type: configs.NEWIPC},
			{Type: configs.NEWPID},
		}),
		Cgroups: &configs.Cgroup{
			Path: filepath.Join("/flynn", job.Partition, job.ID),
			Resources: &configs.Resources{
				AllowedDevices: configs.DefaultAllowedDevices,
				Memory:         defaultMemory,
			},
		},
		MaskPaths: []string{
			"/proc/kcore",
		},
		ReadonlyPaths: []string{
			"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
		},
		Devices: configs.DefaultAutoCreatedDevices,
		Mounts: []*configs.Mount{
			{
				Source:      "proc",
				Destination: "/proc",
				Device:      "proc",
				Flags:       defaultMountFlags,
			},
			{
				Source:      "sysfs",
				Destination: "/sys",
				Device:      "sysfs",
				Flags:       defaultMountFlags | syscall.MS_RDONLY,
			},
			{
				Source:      "tmpfs",
				Destination: "/dev",
				Device:      "tmpfs",
				Flags:       syscall.MS_NOSUID | syscall.MS_STRICTATIME,
				Data:        "mode=755",
			},
			{
				Source:      "devpts",
				Destination: "/dev/pts",
				Device:      "devpts",
				Flags:       syscall.MS_NOSUID | syscall.MS_NOEXEC,
				Data:        "newinstance,ptmxmode=0666,mode=0620,gid=5",
			},
			{
				Device:      "tmpfs",
				Source:      "shm",
				Destination: "/dev/shm",
				Data:        "mode=1777,size=65536k",
				Flags:       defaultMountFlags,
			},
			{
				Destination: "/sys/fs/cgroup",
				Device:      "cgroup",
				Flags:       defaultMountFlags | syscall.MS_RDONLY,
			},
		},
	}

	if spec, ok := job.Resources[resource.TypeMaxFD]; ok && spec.Limit != nil && spec.Request != nil {
		log.Info(fmt.Sprintf("setting max fd limit to %d / %d", *spec.Request, *spec.Limit))
		config.Rlimits = append(config.Rlimits, configs.Rlimit{
			Type: syscall.RLIMIT_NOFILE,
			Hard: uint64(*spec.Limit),
			Soft: uint64(*spec.Request),
		})
	}

	if spec, ok := job.Resources[resource.TypeMaxProcs]; ok && spec.Limit != nil && spec.Request != nil {
		log.Info(fmt.Sprintf("setting max processes limit to %d / %d", *spec.Request, *spec.Limit))
		config.Rlimits = append(config.Rlimits, configs.Rlimit{
			Type: RLIMIT_NPROC,
			Hard: uint64(*spec.Limit),
			Soft: uint64(*spec.Request),
		})
	}

	log.Info("mounting container directories and files")
	jobIDParts := strings.SplitN(job.ID, "-", 2)
	var hostname string
	if len(jobIDParts) == 1 {
		hostname = jobIDParts[0]
	} else {
		hostname = jobIDParts[1]
	}
	if len(hostname) > 64 {
		hostname = hostname[:64]
	}
	if err := os.MkdirAll(filepath.Join(rootPath, "etc"), 0755); err != nil {
		log.Error("error creating /etc in container root", "err", err)
		return err
	}
	if err := writeHostname(filepath.Join(rootPath, "etc/hosts"), hostname); err != nil {
		log.Error("error writing hosts file", "err", err)
		return err
	}
	if err := os.MkdirAll(filepath.Join(rootPath, ".container-shared"), 0700); err != nil {
		log.Error("error createing .container-shared", "err", err)
		return err
	}

	addBindMount(config, l.InitPath, "/.containerinit", false)
	addBindMount(config, l.resolvConf, "/etc/resolv.conf", false)
	for _, m := range job.Config.Mounts {
		if m.Target == "" {
			return errors.New("host: invalid empty mount target")
		}
		addBindMount(config, m.Target, m.Location, m.Writeable)
	}

	// apply volumes
	for _, v := range job.Config.Volumes {
		vol := l.vman.GetVolume(v.VolumeID)
		if vol == nil {
			err := fmt.Errorf("job %s required volume %s, but that volume does not exist", job.ID, v.VolumeID)
			log.Error("missing required volume", "volumeID", v.VolumeID, "err", err)
			return err
		}
		addBindMount(config, vol.Location(), v.Target, v.Writeable)
	}

	// mutating job state, take state write lock
	l.state.mtx.Lock()
	if job.Config.Env == nil {
		job.Config.Env = make(map[string]string)
	}
	for i, p := range job.Config.Ports {
		if p.Proto != "tcp" && p.Proto != "udp" {
			err := fmt.Errorf("unknown port proto %q", p.Proto)
			log.Error("error allocating port", "proto", p.Proto, "err", err)
			return err
		}

		if p.Port == 0 {
			job.Config.Ports[i].Port = 5000 + i
		}
		if i == 0 {
			job.Config.Env["PORT"] = strconv.Itoa(job.Config.Ports[i].Port)
		}
		job.Config.Env[fmt.Sprintf("PORT_%d", i)] = strconv.Itoa(job.Config.Ports[i].Port)
	}

	if !job.Config.HostNetwork {
		job.Config.Env["EXTERNAL_IP"] = container.IP.String()
	}
	// release the write lock, we won't mutate global structures from here on out
	l.state.mtx.Unlock()

	initConfig := &containerinit.Config{
		Args:          job.Config.Args,
		TTY:           job.Config.TTY,
		OpenStdin:     job.Config.Stdin,
		WorkDir:       job.Config.WorkingDir,
		Resources:     job.Resources,
		FileArtifacts: job.FileArtifacts,
	}
	if !job.Config.HostNetwork {
		initConfig.IP = container.IP.String() + "/24"
		initConfig.Gateway = l.bridgeAddr.String()
	}
	if initConfig.WorkDir == "" {
		initConfig.WorkDir = imageConfig.WorkingDir
	}
	if job.Config.Uid > 0 {
		initConfig.User = strconv.Itoa(job.Config.Uid)
	} else if imageConfig.User != "" {
		// TODO: check and lookup user from image config
	}
	if len(job.Config.Args) == 0 {
		initConfig.Args = append(imageConfig.Entrypoint, imageConfig.Cmd...)
	}
	for _, port := range job.Config.Ports {
		initConfig.Ports = append(initConfig.Ports, port)
	}

	log.Info("writing config")
	l.envMtx.RLock()
	err = writeContainerConfig(filepath.Join(rootPath, ".containerconfig"), initConfig,
		map[string]string{
			"PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
			"TERM": "xterm",
			"HOME": "/",
		},
		l.defaultEnv,
		job.Config.Env,
		map[string]string{
			"HOSTNAME": hostname,
		},
	)
	l.envMtx.RUnlock()
	if err != nil {
		log.Error("error writing config", "err", err)
		return err
	}

	if job.Config.HostNetwork {
		// allow host network jobs to configure the network
		config.Capabilities = append(config.Capabilities, "CAP_NET_ADMIN")
	} else {
		ifaceName, err := netutils.GenerateIfaceName("veth", 4)
		if err != nil {
			return err
		}
		config.Hostname = hostname
		config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWNET})
		config.Networks = []*configs.Network{
			{
				Type:    "loopback",
				Address: "127.0.0.1/0",
				Gateway: "localhost",
			},
			{
				Type:              "veth",
				Name:              "eth0",
				Bridge:            l.bridgeName,
				Address:           initConfig.IP,
				Gateway:           initConfig.Gateway,
				Mtu:               1500,
				HostInterfaceName: ifaceName,
			},
		}
	}
	if spec, ok := job.Resources[resource.TypeMemory]; ok && spec.Limit != nil {
		config.Cgroups.Resources.Memory = *spec.Limit
	}
	if spec, ok := job.Resources[resource.TypeCPU]; ok && spec.Limit != nil {
		config.Cgroups.Resources.CpuShares = milliCPUToShares(*spec.Limit)
	}

	c, err := l.factory.Create(job.ID, config)
	if err != nil {
		return err
	}

	process := &libcontainer.Process{
		Args: []string{"/.containerinit", job.ID},
		User: "******",
	}
	if err := c.Run(process); err != nil {
		c.Destroy()
		return err
	}
	// TODO: detach? an update will detach all container anyway
	go process.Wait()

	container.container = c

	// TODO: still necessary?
	l.state.SetContainerID(job.ID, job.ID)

	go container.watch(nil, nil)

	log.Info("job started")
	return nil
}
Example #9
0
// newTemplateConfig returns a base template for running a container
//
// it uses a network strategy of just setting a loopback interface
// and the default setup for devices
func newTemplateConfig(rootfs string) *configs.Config {
	return &configs.Config{
		Rootfs: rootfs,
		Capabilities: []string{
			"CHOWN",
			"DAC_OVERRIDE",
			"FSETID",
			"FOWNER",
			"MKNOD",
			"NET_RAW",
			"SETGID",
			"SETUID",
			"SETFCAP",
			"SETPCAP",
			"NET_BIND_SERVICE",
			"SYS_CHROOT",
			"KILL",
			"AUDIT_WRITE",
		},
		Namespaces: configs.Namespaces([]configs.Namespace{
			{Type: configs.NEWNS},
			{Type: configs.NEWUTS},
			{Type: configs.NEWIPC},
			{Type: configs.NEWPID},
			{Type: configs.NEWNET},
		}),
		Cgroups: &configs.Cgroup{
			Name:            "test",
			Parent:          "integration",
			AllowAllDevices: false,
			AllowedDevices:  configs.DefaultAllowedDevices,
		},
		MaskPaths: []string{
			"/proc/kcore",
		},
		ReadonlyPaths: []string{
			"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
		},
		Devices:  configs.DefaultAutoCreatedDevices,
		Hostname: "integration",
		Mounts: []*configs.Mount{
			{
				Source:      "proc",
				Destination: "/proc",
				Device:      "proc",
				Flags:       defaultMountFlags,
			},
			{
				Source:      "tmpfs",
				Destination: "/dev",
				Device:      "tmpfs",
				Flags:       syscall.MS_NOSUID | syscall.MS_STRICTATIME,
				Data:        "mode=755",
			},
			{
				Source:      "devpts",
				Destination: "/dev/pts",
				Device:      "devpts",
				Flags:       syscall.MS_NOSUID | syscall.MS_NOEXEC,
				Data:        "newinstance,ptmxmode=0666,mode=0620,gid=5",
			},
			{
				Device:      "tmpfs",
				Source:      "shm",
				Destination: "/dev/shm",
				Data:        "mode=1777,size=65536k",
				Flags:       defaultMountFlags,
			},
			{
				Source:      "mqueue",
				Destination: "/dev/mqueue",
				Device:      "mqueue",
				Flags:       defaultMountFlags,
			},
			{
				Source:      "sysfs",
				Destination: "/sys",
				Device:      "sysfs",
				Flags:       defaultMountFlags | syscall.MS_RDONLY,
			},
		},
		Networks: []*configs.Network{
			{
				Type:    "loopback",
				Address: "127.0.0.1/0",
				Gateway: "localhost",
			},
		},
		Rlimits: []configs.Rlimit{
			{
				Type: syscall.RLIMIT_NOFILE,
				Hard: uint64(1025),
				Soft: uint64(1025),
			},
		},
	}
}
Example #10
0
func (l *LibcontainerBackend) Run(job *host.Job, runConfig *RunConfig, rateLimitBucket *RateLimitBucket) (err error) {
	log := l.Logger.New("fn", "run", "job.id", job.ID)

	// if the job has been stopped, just return
	if l.State.GetJob(job.ID).ForceStop {
		log.Info("skipping start of stopped job")
		return nil
	}

	log.Info("starting job", "job.args", job.Config.Args)

	defer func() {
		if err != nil {
			l.State.SetStatusFailed(job.ID, err)
		}
	}()

	if job.Partition == "" {
		job.Partition = defaultPartition
	}
	if _, ok := l.PartitionCGroups[job.Partition]; !ok {
		return fmt.Errorf("host: invalid job partition %q", job.Partition)
	}

	wait := func(ch chan struct{}) {
		if rateLimitBucket != nil {
			// unblock the rate limiter whilst waiting
			rateLimitBucket.Put()
			defer rateLimitBucket.Wait()
		}
		<-ch
	}
	if !job.Config.HostNetwork {
		wait(l.networkConfigured)
	}
	if _, ok := job.Config.Env["DISCOVERD"]; !ok {
		wait(l.discoverdConfigured)
	}

	if runConfig == nil {
		runConfig = &RunConfig{}
	}
	container := &Container{
		ID: job.ID,
		MuxConfig: &logmux.Config{
			AppID:   job.Metadata["flynn-controller.app"],
			HostID:  l.State.id,
			JobType: job.Metadata["flynn-controller.type"],
			JobID:   job.ID,
		},
		l:    l,
		job:  job,
		done: make(chan struct{}),
	}
	if !job.Config.HostNetwork {
		container.IP, err = l.ipalloc.RequestIP(l.bridgeNet, runConfig.IP)
		if err != nil {
			log.Error("error requesting ip", "err", err)
			return err
		}
		log.Info("obtained ip", "network", l.bridgeNet.String(), "ip", container.IP.String())
		l.State.SetContainerIP(job.ID, container.IP)
	}
	defer func() {
		if err != nil {
			go container.cleanup()
		}
	}()

	log.Info("setting up rootfs")
	rootPath := filepath.Join("/var/lib/flynn/image/mnt", job.ID)
	tmpPath := filepath.Join("/var/lib/flynn/image/tmp", job.ID)
	for _, path := range []string{rootPath, tmpPath} {
		if err := os.MkdirAll(path, 0755); err != nil {
			log.Error("error setting up rootfs", "err", err)
			return err
		}
	}
	rootMount, err := l.rootOverlayMount(job)
	if err != nil {
		log.Error("error setting up rootfs", "err", err)
		return err
	}

	container.RootPath = rootPath
	container.TmpPath = tmpPath

	config := &configs.Config{
		Rootfs:       rootPath,
		Capabilities: defaultCapabilities,
		Namespaces: configs.Namespaces([]configs.Namespace{
			{Type: configs.NEWNS},
			{Type: configs.NEWUTS},
			{Type: configs.NEWIPC},
			{Type: configs.NEWPID},
		}),
		Cgroups: &configs.Cgroup{
			Path: filepath.Join("/flynn", job.Partition, job.ID),
			Resources: &configs.Resources{
				AllowedDevices: configs.DefaultAllowedDevices,
				Memory:         defaultMemory,
			},
		},
		MaskPaths: []string{
			"/proc/kcore",
		},
		ReadonlyPaths: []string{
			"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
		},
		Devices: configs.DefaultAutoCreatedDevices,
		Mounts: append([]*configs.Mount{rootMount}, []*configs.Mount{
			{
				Source:      "proc",
				Destination: "/proc",
				Device:      "proc",
				Flags:       defaultMountFlags,
			},
			{
				Source:      "sysfs",
				Destination: "/sys",
				Device:      "sysfs",
				Flags:       defaultMountFlags | syscall.MS_RDONLY,
			},
			{
				Source:      "tmpfs",
				Destination: "/dev",
				Device:      "tmpfs",
				Flags:       syscall.MS_NOSUID | syscall.MS_STRICTATIME,
				Data:        "mode=755",
			},
			{
				Source:      "devpts",
				Destination: "/dev/pts",
				Device:      "devpts",
				Flags:       syscall.MS_NOSUID | syscall.MS_NOEXEC,
				Data:        "newinstance,ptmxmode=0666,mode=0620,gid=5",
			},
			{
				Device:      "tmpfs",
				Source:      "shm",
				Destination: "/dev/shm",
				Data:        "mode=1777,size=65536k",
				Flags:       defaultMountFlags,
			},
			{
				Destination: "/sys/fs/cgroup",
				Device:      "cgroup",
				Flags:       defaultMountFlags | syscall.MS_RDONLY,
			},
		}...),
	}

	if spec, ok := job.Resources[resource.TypeMaxFD]; ok && spec.Limit != nil && spec.Request != nil {
		log.Info(fmt.Sprintf("setting max fd limit to %d / %d", *spec.Request, *spec.Limit))
		config.Rlimits = append(config.Rlimits, configs.Rlimit{
			Type: syscall.RLIMIT_NOFILE,
			Hard: uint64(*spec.Limit),
			Soft: uint64(*spec.Request),
		})
	}

	if spec, ok := job.Resources[resource.TypeMaxProcs]; ok && spec.Limit != nil && spec.Request != nil {
		log.Info(fmt.Sprintf("setting max processes limit to %d / %d", *spec.Request, *spec.Limit))
		config.Rlimits = append(config.Rlimits, configs.Rlimit{
			Type: RLIMIT_NPROC,
			Hard: uint64(*spec.Limit),
			Soft: uint64(*spec.Request),
		})
	}

	log.Info("mounting container directories and files")
	jobIDParts := strings.SplitN(job.ID, "-", 2)
	var hostname string
	if len(jobIDParts) == 1 {
		hostname = jobIDParts[0]
	} else {
		hostname = jobIDParts[1]
	}
	if len(hostname) > 64 {
		hostname = hostname[:64]
	}
	if err := os.MkdirAll(filepath.Join(tmpPath, "etc"), 0755); err != nil {
		log.Error("error creating container /etc", "err", err)
		return err
	}
	etcHosts := filepath.Join(tmpPath, "etc/hosts")
	if err := writeHostname(etcHosts, hostname); err != nil {
		log.Error("error writing hosts file", "err", err)
		return err
	}
	sharedDir := filepath.Join(tmpPath, ".container-shared")
	if err := os.MkdirAll(sharedDir, 0700); err != nil {
		log.Error("error creating .container-shared", "err", err)
		return err
	}

	config.Mounts = append(config.Mounts,
		bindMount(l.InitPath, "/.containerinit", false),
		bindMount(l.resolvConf, "/etc/resolv.conf", false),
		bindMount(etcHosts, "/etc/hosts", true),
		bindMount(sharedDir, "/.container-shared", true),
	)
	for _, m := range job.Config.Mounts {
		if m.Target == "" {
			return errors.New("host: invalid empty mount target")
		}
		config.Mounts = append(config.Mounts, bindMount(m.Target, m.Location, m.Writeable))
	}

	// apply volumes
	for _, v := range job.Config.Volumes {
		vol := l.VolManager.GetVolume(v.VolumeID)
		if vol == nil {
			err := fmt.Errorf("job %s required volume %s, but that volume does not exist", job.ID, v.VolumeID)
			log.Error("missing required volume", "volumeID", v.VolumeID, "err", err)
			return err
		}
		config.Mounts = append(config.Mounts, bindMount(vol.Location(), v.Target, v.Writeable))
	}

	// mutating job state, take state write lock
	l.State.mtx.Lock()
	if job.Config.Env == nil {
		job.Config.Env = make(map[string]string)
	}
	for i, p := range job.Config.Ports {
		if p.Proto != "tcp" && p.Proto != "udp" {
			err := fmt.Errorf("unknown port proto %q", p.Proto)
			log.Error("error allocating port", "proto", p.Proto, "err", err)
			return err
		}

		if p.Port == 0 {
			job.Config.Ports[i].Port = 5000 + i
		}
		if i == 0 {
			job.Config.Env["PORT"] = strconv.Itoa(job.Config.Ports[i].Port)
		}
		job.Config.Env[fmt.Sprintf("PORT_%d", i)] = strconv.Itoa(job.Config.Ports[i].Port)
	}

	if !job.Config.HostNetwork {
		job.Config.Env["EXTERNAL_IP"] = container.IP.String()
	}
	// release the write lock, we won't mutate global structures from here on out
	l.State.mtx.Unlock()

	initConfig := &containerinit.Config{
		Args:      job.Config.Args,
		TTY:       job.Config.TTY,
		OpenStdin: job.Config.Stdin,
		WorkDir:   job.Config.WorkingDir,
		Uid:       job.Config.Uid,
		Gid:       job.Config.Gid,
		Resources: job.Resources,
		LogLevel:  l.InitLogLevel,
	}
	if !job.Config.HostNetwork {
		initConfig.IP = container.IP.String() + "/24"
		initConfig.Gateway = l.bridgeAddr.String()
	}
	for _, port := range job.Config.Ports {
		initConfig.Ports = append(initConfig.Ports, port)
	}

	log.Info("writing config")
	configPath := filepath.Join(tmpPath, ".containerconfig")
	l.envMtx.RLock()
	err = writeContainerConfig(configPath, initConfig,
		map[string]string{
			"PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
			"TERM": "xterm",
			"HOME": "/",
		},
		l.defaultEnv,
		job.Config.Env,
		map[string]string{
			"HOSTNAME": hostname,
		},
	)
	l.envMtx.RUnlock()
	if err != nil {
		log.Error("error writing config", "err", err)
		return err
	}
	config.Mounts = append(config.Mounts, bindMount(configPath, "/.containerconfig", false))

	if job.Config.HostNetwork {
		// allow host network jobs to configure the network
		config.Capabilities = append(config.Capabilities, "CAP_NET_ADMIN")
	} else {
		ifaceName, err := netutils.GenerateIfaceName("veth", 4)
		if err != nil {
			return err
		}
		config.Hostname = hostname
		config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWNET})
		config.Networks = []*configs.Network{
			{
				Type:    "loopback",
				Address: "127.0.0.1/0",
				Gateway: "localhost",
			},
			{
				Type:              "veth",
				Name:              "eth0",
				Bridge:            l.BridgeName,
				Address:           initConfig.IP,
				Gateway:           initConfig.Gateway,
				Mtu:               1500,
				HostInterfaceName: ifaceName,
			},
		}
	}
	if spec, ok := job.Resources[resource.TypeMemory]; ok && spec.Limit != nil {
		config.Cgroups.Resources.Memory = *spec.Limit
	}
	if spec, ok := job.Resources[resource.TypeCPU]; ok && spec.Limit != nil {
		config.Cgroups.Resources.CpuShares = milliCPUToShares(*spec.Limit)
	}

	c, err := l.factory.Create(job.ID, config)
	if err != nil {
		return err
	}

	process := &libcontainer.Process{
		Args: []string{"/.containerinit", job.ID},
		User: "******",
	}
	if err := c.Run(process); err != nil {
		c.Destroy()
		return err
	}
	go process.Wait()

	container.container = c

	go container.watch(nil, nil)

	log.Info("job started")
	return nil
}
Example #11
0
func main() {
	rootfs := os.Getenv("ROOTFS")

	factory, err := libcontainer.New(rootfs, libcontainer.Cgroupfs)

	if err != nil {
		fmt.Println(err)
	}

	defaultMountFlags := syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
	config := &configs.Config{
		Rootfs: rootfs,
		Capabilities: []string{
			"CAP_CHOWN",
			"CAP_DAC_OVERRIDE",
			"CAP_FSETID",
			"CAP_FOWNER",
			"CAP_MKNOD",
			"CAP_NET_RAW",
			"CAP_SETGID",
			"CAP_SETUID",
			"CAP_SETFCAP",
			"CAP_SETPCAP",
			"CAP_NET_BIND_SERVICE",
			"CAP_SYS_CHROOT",
			"CAP_KILL",
			"CAP_AUDIT_WRITE",
		},
		Namespaces: configs.Namespaces([]configs.Namespace{
			{Type: configs.NEWNS},
			{Type: configs.NEWUTS},
			{Type: configs.NEWIPC},
			{Type: configs.NEWPID},
			{Type: configs.NEWNET},
		}),
		Cgroups: &configs.Cgroup{
			Name:   "test-container",
			Parent: "system",
			Resources: &configs.Resources{
				MemorySwappiness: -1,
				AllowAllDevices:  false,
				AllowedDevices:   configs.DefaultAllowedDevices,
			},
		},
		MaskPaths: []string{
			"/proc/kcore",
		},
		ReadonlyPaths: []string{
			"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
		},
		Devices:  configs.DefaultAutoCreatedDevices,
		Hostname: "testing",
		Mounts: []*configs.Mount{
			{
				Source:      "proc",
				Destination: "/proc",
				Device:      "proc",
				Flags:       defaultMountFlags,
			},
			{
				Source:      "tmpfs",
				Destination: "/dev",
				Device:      "tmpfs",
				Flags:       syscall.MS_NOSUID | syscall.MS_STRICTATIME,
				Data:        "mode=755",
			},
			{
				Source:      "devpts",
				Destination: "/dev/pts",
				Device:      "devpts",
				Flags:       syscall.MS_NOSUID | syscall.MS_NOEXEC,
				Data:        "newinstance,ptmxmode=0666,mode=0620,gid=5",
			},
			{
				Device:      "tmpfs",
				Source:      "shm",
				Destination: "/dev/shm",
				Data:        "mode=1777,size=65536k",
				Flags:       defaultMountFlags,
			},
			{
				Source:      "mqueue",
				Destination: "/dev/mqueue",
				Device:      "mqueue",
				Flags:       defaultMountFlags,
			},
			{
				Source:      "sysfs",
				Destination: "/sys",
				Device:      "sysfs",
				Flags:       defaultMountFlags | syscall.MS_RDONLY,
			},
		},
		Networks: []*configs.Network{
			{
				Type:    "loopback",
				Address: "127.0.0.1/0",
				Gateway: "localhost",
			},
		},
		Rlimits: []configs.Rlimit{
			{
				Type: syscall.RLIMIT_NOFILE,
				Hard: uint64(1025),
				Soft: uint64(1025),
			},
		},
	}

	container, err := factory.Create("abcde", config)

	if err != nil {
		fmt.Println(err)
	}

	process := &libcontainer.Process{
		Args:   []string{"/bin/sh"},
		Env:    []string{"PATH=/bin"},
		Stdin:  os.Stdin,
		Stdout: os.Stdout,
		Stderr: os.Stderr,
	}

	err = container.Start(process)
	if err != nil {
		fmt.Println(err)
	}

	// wait for the process to finish.
	status, err := process.Wait()

	if err != nil {
		fmt.Println(err)
	}

	fmt.Println(status)

	container.Destroy()

	fmt.Println("done")
}