func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { if err := execdriver.SetTerminal(c, pipes); err != nil { return -1, err } configPath, err := d.generateLXCConfig(c) if err != nil { return -1, err } params := []string{ "lxc-start", "-n", c.ID, "-f", configPath, "--", c.InitPath, "-driver", DriverName, } if c.Network.Interface != nil { params = append(params, "-g", c.Network.Interface.Gateway, "-i", fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen), ) } params = append(params, "-mtu", strconv.Itoa(c.Network.Mtu), ) if c.User != "" { params = append(params, "-u", c.User) } if c.Privileged { if d.apparmor { params[0] = path.Join(d.root, "lxc-start-unconfined") } params = append(params, "-privileged") } if c.WorkingDir != "" { params = append(params, "-w", c.WorkingDir) } params = append(params, "--", c.Entrypoint) params = append(params, c.Arguments...) if d.sharedRoot { // lxc-start really needs / to be non-shared, or all kinds of stuff break // when lxc-start unmount things and those unmounts propagate to the main // mount namespace. // What we really want is to clone into a new namespace and then // mount / MS_REC|MS_SLAVE, but since we can't really clone or fork // without exec in go we have to do this horrible shell hack... shellString := "mount --make-rslave /; exec " + utils.ShellQuoteArguments(params) params = []string{ "unshare", "-m", "--", "/bin/sh", "-c", shellString, } } var ( name = params[0] arg = params[1:] ) aname, err := exec.LookPath(name) if err != nil { aname = name } c.Path = aname c.Args = append([]string{name}, arg...) if err := c.Start(); err != nil { return -1, err } var ( waitErr error waitLock = make(chan struct{}) ) go func() { if err := c.Wait(); err != nil { if _, ok := err.(*exec.ExitError); !ok { // Do not propagate the error if it's simply a status code != 0 waitErr = err } } close(waitLock) }() // Poll lxc for RUNNING status pid, err := d.waitForStart(c, waitLock) if err != nil { if c.Process != nil { c.Process.Kill() } return -1, err } c.ContainerPid = pid if startCallback != nil { startCallback(c) } <-waitLock return getExitCode(c), waitErr }
func (container *Container) Start() (err error) { container.Lock() defer container.Unlock() if container.State.IsRunning() { return fmt.Errorf("The container %s is already running.", container.ID) } defer func() { if err != nil { container.cleanup() } }() if err := container.EnsureMounted(); err != nil { return err } if container.runtime.networkManager.disabled { container.Config.NetworkDisabled = true container.buildHostnameAndHostsFiles("127.0.1.1") } else { if err := container.allocateNetwork(); err != nil { return err } container.buildHostnameAndHostsFiles(container.NetworkSettings.IPAddress) } // Make sure the config is compatible with the current kernel if container.Config.Memory > 0 && !container.runtime.capabilities.MemoryLimit { log.Printf("WARNING: Your kernel does not support memory limit capabilities. Limitation discarded.\n") container.Config.Memory = 0 } if container.Config.Memory > 0 && !container.runtime.capabilities.SwapLimit { log.Printf("WARNING: Your kernel does not support swap limit capabilities. Limitation discarded.\n") container.Config.MemorySwap = -1 } if container.runtime.capabilities.IPv4ForwardingDisabled { log.Printf("WARNING: IPv4 forwarding is disabled. Networking will not work") } // Create the requested bind mounts binds := make(map[string]BindMap) // Define illegal container destinations illegalDsts := []string{"/", "."} for _, bind := range container.hostConfig.Binds { // FIXME: factorize bind parsing in parseBind var src, dst, mode string arr := strings.Split(bind, ":") if len(arr) == 2 { src = arr[0] dst = arr[1] mode = "rw" } else if len(arr) == 3 { src = arr[0] dst = arr[1] mode = arr[2] } else { return fmt.Errorf("Invalid bind specification: %s", bind) } // Bail if trying to mount to an illegal destination for _, illegal := range illegalDsts { if dst == illegal { return fmt.Errorf("Illegal bind destination: %s", dst) } } bindMap := BindMap{ SrcPath: src, DstPath: dst, Mode: mode, } binds[path.Clean(dst)] = bindMap } if container.Volumes == nil || len(container.Volumes) == 0 { container.Volumes = make(map[string]string) container.VolumesRW = make(map[string]bool) } // Apply volumes from another container if requested if container.Config.VolumesFrom != "" { containerSpecs := strings.Split(container.Config.VolumesFrom, ",") for _, containerSpec := range containerSpecs { mountRW := true specParts := strings.SplitN(containerSpec, ":", 2) switch len(specParts) { case 0: return fmt.Errorf("Malformed volumes-from specification: %s", container.Config.VolumesFrom) case 2: switch specParts[1] { case "ro": mountRW = false case "rw": // mountRW is already true default: return fmt.Errorf("Malformed volumes-from speficication: %s", containerSpec) } } c := container.runtime.Get(specParts[0]) if c == nil { return fmt.Errorf("Container %s not found. Impossible to mount its volumes", container.ID) } for volPath, id := range c.Volumes { if _, exists := container.Volumes[volPath]; exists { continue } if err := os.MkdirAll(path.Join(container.RootfsPath(), volPath), 0755); err != nil { return err } container.Volumes[volPath] = id if isRW, exists := c.VolumesRW[volPath]; exists { container.VolumesRW[volPath] = isRW && mountRW } } } } // Create the requested volumes if they don't exist for volPath := range container.Config.Volumes { volPath = path.Clean(volPath) // Skip existing volumes if _, exists := container.Volumes[volPath]; exists { continue } var srcPath string var isBindMount bool srcRW := false // If an external bind is defined for this volume, use that as a source if bindMap, exists := binds[volPath]; exists { isBindMount = true srcPath = bindMap.SrcPath if strings.ToLower(bindMap.Mode) == "rw" { srcRW = true } // Otherwise create an directory in $ROOT/volumes/ and use that } else { c, err := container.runtime.volumes.Create(nil, container, "", "", nil) if err != nil { return err } srcPath, err = c.layer() if err != nil { return err } srcRW = true // RW by default } container.Volumes[volPath] = srcPath container.VolumesRW[volPath] = srcRW // Create the mountpoint rootVolPath := path.Join(container.RootfsPath(), volPath) if err := os.MkdirAll(rootVolPath, 0755); err != nil { return err } // Do not copy or change permissions if we are mounting from the host if srcRW && !isBindMount { volList, err := ioutil.ReadDir(rootVolPath) if err != nil { return err } if len(volList) > 0 { srcList, err := ioutil.ReadDir(srcPath) if err != nil { return err } if len(srcList) == 0 { // If the source volume is empty copy files from the root into the volume if err := archive.CopyWithTar(rootVolPath, srcPath); err != nil { return err } var stat syscall.Stat_t if err := syscall.Stat(rootVolPath, &stat); err != nil { return err } var srcStat syscall.Stat_t if err := syscall.Stat(srcPath, &srcStat); err != nil { return err } // Change the source volume's ownership if it differs from the root // files that where just copied if stat.Uid != srcStat.Uid || stat.Gid != srcStat.Gid { if err := os.Chown(srcPath, int(stat.Uid), int(stat.Gid)); err != nil { return err } } } } } } if err := container.generateLXCConfig(); err != nil { return err } var lxcStart string = "lxc-start" if container.hostConfig.Privileged && container.runtime.capabilities.AppArmor { lxcStart = path.Join(container.runtime.config.Root, "lxc-start-unconfined") } params := []string{ lxcStart, "-n", container.ID, "-f", container.lxcConfigPath(), "--", "/.dockerinit", } // Networking if !container.Config.NetworkDisabled { params = append(params, "-g", container.network.Gateway.String()) } // User if container.Config.User != "" { params = append(params, "-u", container.Config.User) } // Setup environment env := []string{ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "container=lxc", "HOSTNAME=" + container.Config.Hostname, } if container.Config.Tty { env = append(env, "TERM=xterm") } // Init any links between the parent and children runtime := container.runtime children, err := runtime.Children(container.Name) if err != nil { return err } if len(children) > 0 { container.activeLinks = make(map[string]*Link, len(children)) // If we encounter an error make sure that we rollback any network // config and ip table changes rollback := func() { for _, link := range container.activeLinks { link.Disable() } container.activeLinks = nil } for p, child := range children { link, err := NewLink(container, child, p, runtime.networkManager.bridgeIface) if err != nil { rollback() return err } container.activeLinks[link.Alias()] = link if err := link.Enable(); err != nil { rollback() return err } for _, envVar := range link.ToEnv() { env = append(env, envVar) } } } for _, elem := range container.Config.Env { env = append(env, elem) } if err := container.generateEnvConfig(env); err != nil { return err } if container.Config.WorkingDir != "" { workingDir := path.Clean(container.Config.WorkingDir) utils.Debugf("[working dir] working dir is %s", workingDir) if err := os.MkdirAll(path.Join(container.RootfsPath(), workingDir), 0755); err != nil { return nil } params = append(params, "-w", workingDir, ) } // Program params = append(params, "--", container.Path) params = append(params, container.Args...) if RootIsShared() { // lxc-start really needs / to be non-shared, or all kinds of stuff break // when lxc-start unmount things and those unmounts propagate to the main // mount namespace. // What we really want is to clone into a new namespace and then // mount / MS_REC|MS_SLAVE, but since we can't really clone or fork // without exec in go we have to do this horrible shell hack... shellString := "mount --make-rslave /; exec " + utils.ShellQuoteArguments(params) params = []string{ "unshare", "-m", "--", "/bin/sh", "-c", shellString, } } container.cmd = exec.Command(params[0], params[1:]...) // Setup logging of stdout and stderr to disk if err := container.runtime.LogToDisk(container.stdout, container.logPath("json"), "stdout"); err != nil { return err } if err := container.runtime.LogToDisk(container.stderr, container.logPath("json"), "stderr"); err != nil { return err } container.cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true} if container.Config.Tty { err = container.startPty() } else { err = container.start() } if err != nil { return err } // FIXME: save state on disk *first*, then converge // this way disk state is used as a journal, eg. we can restore after crash etc. container.State.SetRunning(container.cmd.Process.Pid) // Init the lock container.waitLock = make(chan struct{}) container.ToDisk() go container.monitor() defer utils.Debugf("Container running: %v", container.State.IsRunning()) // We wait for the container to be fully running. // Timeout after 5 seconds. In case of broken pipe, just retry. // Note: The container can run and finish correctly before // the end of this loop for now := time.Now(); time.Since(now) < 5*time.Second; { // If the container dies while waiting for it, just return if !container.State.IsRunning() { return nil } output, err := exec.Command("lxc-info", "-s", "-n", container.ID).CombinedOutput() if err != nil { utils.Debugf("Error with lxc-info: %s (%s)", err, output) output, err = exec.Command("lxc-info", "-s", "-n", container.ID).CombinedOutput() if err != nil { utils.Debugf("Second Error with lxc-info: %s (%s)", err, output) return err } } if strings.Contains(string(output), "RUNNING") { return nil } utils.Debugf("Waiting for the container to start (running: %v): %s", container.State.IsRunning(), bytes.TrimSpace(output)) time.Sleep(50 * time.Millisecond) } if container.State.IsRunning() { return ErrContainerStartTimeout } return ErrContainerStart }
func (container *Container) Start() (err error) { container.Lock() defer container.Unlock() if container.State.IsRunning() { return fmt.Errorf("The container %s is already running.", container.ID) } defer func() { if err != nil { container.cleanup() } }() if err := container.EnsureMounted(); err != nil { return err } if container.runtime.networkManager.disabled { container.Config.NetworkDisabled = true container.buildHostnameAndHostsFiles("127.0.1.1") } else { if err := container.allocateNetwork(); err != nil { return err } container.buildHostnameAndHostsFiles(container.NetworkSettings.IPAddress) } // Make sure the config is compatible with the current kernel if container.Config.Memory > 0 && !container.runtime.capabilities.MemoryLimit { log.Printf("WARNING: Your kernel does not support memory limit capabilities. Limitation discarded.\n") container.Config.Memory = 0 } if container.Config.Memory > 0 && !container.runtime.capabilities.SwapLimit { log.Printf("WARNING: Your kernel does not support swap limit capabilities. Limitation discarded.\n") container.Config.MemorySwap = -1 } if container.runtime.capabilities.IPv4ForwardingDisabled { log.Printf("WARNING: IPv4 forwarding is disabled. Networking will not work") } if container.Volumes == nil || len(container.Volumes) == 0 { container.Volumes = make(map[string]string) container.VolumesRW = make(map[string]bool) } // Apply volumes from another container if requested if err := container.applyExternalVolumes(); err != nil { return err } if err := container.createVolumes(); err != nil { return err } if err := container.generateLXCConfig(); err != nil { return err } var lxcStart string = "lxc-start" if container.hostConfig.Privileged && container.runtime.capabilities.AppArmor { lxcStart = path.Join(container.runtime.config.Root, "lxc-start-unconfined") } params := []string{ lxcStart, "-n", container.ID, "-f", container.lxcConfigPath(), "--", "/.dockerinit", } // Networking if !container.Config.NetworkDisabled { network := container.NetworkSettings params = append(params, "-g", network.Gateway, "-i", fmt.Sprintf("%s/%d", network.IPAddress, network.IPPrefixLen), "-mtu", strconv.Itoa(container.runtime.config.Mtu), ) } // User if container.Config.User != "" { params = append(params, "-u", container.Config.User) } // Setup environment env := []string{ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "HOSTNAME=" + container.Config.Hostname, } if container.Config.Tty { env = append(env, "TERM=xterm") } if container.hostConfig.Privileged { params = append(params, "-privileged") } // Init any links between the parent and children runtime := container.runtime children, err := runtime.Children(container.Name) if err != nil { return err } if len(children) > 0 { container.activeLinks = make(map[string]*Link, len(children)) // If we encounter an error make sure that we rollback any network // config and ip table changes rollback := func() { for _, link := range container.activeLinks { link.Disable() } container.activeLinks = nil } for p, child := range children { link, err := NewLink(container, child, p, runtime.networkManager.bridgeIface) if err != nil { rollback() return err } container.activeLinks[link.Alias()] = link if err := link.Enable(); err != nil { rollback() return err } for _, envVar := range link.ToEnv() { env = append(env, envVar) } } } for _, elem := range container.Config.Env { env = append(env, elem) } if err := container.generateEnvConfig(env); err != nil { return err } if container.Config.WorkingDir != "" { workingDir := path.Clean(container.Config.WorkingDir) utils.Debugf("[working dir] working dir is %s", workingDir) if err := os.MkdirAll(path.Join(container.RootfsPath(), workingDir), 0755); err != nil { return nil } params = append(params, "-w", workingDir, ) } // Program params = append(params, "--", container.Path) params = append(params, container.Args...) if RootIsShared() { // lxc-start really needs / to be non-shared, or all kinds of stuff break // when lxc-start unmount things and those unmounts propagate to the main // mount namespace. // What we really want is to clone into a new namespace and then // mount / MS_REC|MS_SLAVE, but since we can't really clone or fork // without exec in go we have to do this horrible shell hack... shellString := "mount --make-rslave /; exec " + utils.ShellQuoteArguments(params) params = []string{ "unshare", "-m", "--", "/bin/sh", "-c", shellString, } } root := container.RootfsPath() envPath, err := container.EnvConfigPath() if err != nil { return err } // Mount docker specific files into the containers root fs if err := mount.Mount(runtime.sysInitPath, path.Join(root, "/.dockerinit"), "none", "bind,ro"); err != nil { return err } if err := mount.Mount(envPath, path.Join(root, "/.dockerenv"), "none", "bind,ro"); err != nil { return err } if err := mount.Mount(container.ResolvConfPath, path.Join(root, "/etc/resolv.conf"), "none", "bind,ro"); err != nil { return err } if container.HostnamePath != "" && container.HostsPath != "" { if err := mount.Mount(container.HostnamePath, path.Join(root, "/etc/hostname"), "none", "bind,ro"); err != nil { return err } if err := mount.Mount(container.HostsPath, path.Join(root, "/etc/hosts"), "none", "bind,ro"); err != nil { return err } } // Mount user specified volumes for r, v := range container.Volumes { mountAs := "ro" if container.VolumesRW[v] { mountAs = "rw" } if err := mount.Mount(v, path.Join(root, r), "none", fmt.Sprintf("bind,%s", mountAs)); err != nil { return err } } container.cmd = exec.Command(params[0], params[1:]...) // Setup logging of stdout and stderr to disk if err := container.runtime.LogToDisk(container.stdout, container.logPath("json"), "stdout"); err != nil { return err } if err := container.runtime.LogToDisk(container.stderr, container.logPath("json"), "stderr"); err != nil { return err } container.cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true} if container.Config.Tty { err = container.startPty() } else { err = container.start() } if err != nil { return err } // FIXME: save state on disk *first*, then converge // this way disk state is used as a journal, eg. we can restore after crash etc. container.State.SetRunning(container.cmd.Process.Pid) // Init the lock container.waitLock = make(chan struct{}) container.ToDisk() go container.monitor() defer utils.Debugf("Container running: %v", container.State.IsRunning()) // We wait for the container to be fully running. // Timeout after 5 seconds. In case of broken pipe, just retry. // Note: The container can run and finish correctly before // the end of this loop for now := time.Now(); time.Since(now) < 5*time.Second; { // If the container dies while waiting for it, just return if !container.State.IsRunning() { return nil } output, err := exec.Command("lxc-info", "-s", "-n", container.ID).CombinedOutput() if err != nil { utils.Debugf("Error with lxc-info: %s (%s)", err, output) output, err = exec.Command("lxc-info", "-s", "-n", container.ID).CombinedOutput() if err != nil { utils.Debugf("Second Error with lxc-info: %s (%s)", err, output) return err } } if strings.Contains(string(output), "RUNNING") { return nil } utils.Debugf("Waiting for the container to start (running: %v): %s", container.State.IsRunning(), bytes.TrimSpace(output)) time.Sleep(50 * time.Millisecond) } if container.State.IsRunning() { return ErrContainerStartTimeout } return ErrContainerStart }