func (d *Daemon) SetupStorageDriver() error { var err error lvmVgName := daemonConfig["storage.lvm_vg_name"].Get() zfsPoolName := daemonConfig["storage.zfs_pool_name"].Get() if lvmVgName != "" { d.Storage, err = newStorage(d, storageTypeLvm) if err != nil { shared.LogErrorf("Could not initialize storage type LVM: %s - falling back to dir", err) } else { return nil } } else if zfsPoolName != "" { d.Storage, err = newStorage(d, storageTypeZfs) if err != nil { shared.LogErrorf("Could not initialize storage type ZFS: %s - falling back to dir", err) } else { return nil } } else if d.BackingFs == "btrfs" { d.Storage, err = newStorage(d, storageTypeBtrfs) if err != nil { shared.LogErrorf("Could not initialize storage type btrfs: %s - falling back to dir", err) } else { return nil } } d.Storage, err = newStorage(d, storageTypeDir) return err }
func deviceEventListener(d *Daemon) { chNetlinkCPU, chNetlinkNetwork, chUSB, err := deviceNetlinkListener() if err != nil { shared.LogErrorf("scheduler: couldn't setup netlink listener") return } for { select { case e := <-chNetlinkCPU: if len(e) != 2 { shared.LogErrorf("Scheduler: received an invalid cpu hotplug event") continue } if !cgCpusetController { continue } shared.LogDebugf("Scheduler: cpu: %s is now %s: re-balancing", e[0], e[1]) deviceTaskBalance(d) case e := <-chNetlinkNetwork: if len(e) != 2 { shared.LogErrorf("Scheduler: received an invalid network hotplug event") continue } if !cgNetPrioController { continue } shared.LogDebugf("Scheduler: network: %s has been added: updating network priorities", e[0]) deviceNetworkPriority(d, e[0]) networkAutoAttach(d, e[0]) case e := <-chUSB: deviceUSBEvent(d, e) case e := <-deviceSchedRebalance: if len(e) != 3 { shared.LogErrorf("Scheduler: received an invalid rebalance event") continue } if !cgCpusetController { continue } shared.LogDebugf("Scheduler: %s %s %s: re-balancing", e[0], e[1], e[2]) deviceTaskBalance(d) } } }
func deviceUSBEvent(d *Daemon, usb usbDevice) { containers, err := dbContainersList(d.db, cTypeRegular) if err != nil { shared.LogError("problem loading containers list", log.Ctx{"err": err}) return } for _, name := range containers { containerIf, err := containerLoadByName(d, name) if err != nil { continue } c, ok := containerIf.(*containerLXC) if !ok { shared.LogErrorf("got device event on non-LXC container?") return } if !c.IsRunning() { continue } devices := c.ExpandedDevices() for _, name := range devices.DeviceNames() { m := devices[name] if m["type"] != "usb" { continue } if m["vendorid"] != usb.vendor || (m["productid"] != "" && m["productid"] != usb.product) { continue } if usb.action == "add" { err := c.insertUnixDeviceNum(m, usb.major, usb.minor, usb.path) if err != nil { shared.LogError("failed to create usb device", log.Ctx{"err": err, "usb": usb, "container": c.Name()}) return } } else if usb.action == "remove" { err := c.removeUnixDeviceNum(m, usb.major, usb.minor, usb.path) if err != nil { shared.LogError("failed to remove usb device", log.Ctx{"err": err, "usb": usb, "container": c.Name()}) return } } else { shared.LogError("unknown action for usb device", log.Ctx{"usb": usb}) continue } } } }
func (d *Daemon) createCmd(version string, c Command) { var uri string if c.name == "" { uri = fmt.Sprintf("/%s", version) } else { uri = fmt.Sprintf("/%s/%s", version, c.name) } d.mux.HandleFunc(uri, func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") if d.isTrustedClient(r) { shared.LogDebug( "handling", log.Ctx{"method": r.Method, "url": r.URL.RequestURI(), "ip": r.RemoteAddr}) } else if r.Method == "GET" && c.untrustedGet { shared.LogDebug( "allowing untrusted GET", log.Ctx{"url": r.URL.RequestURI(), "ip": r.RemoteAddr}) } else if r.Method == "POST" && c.untrustedPost { shared.LogDebug( "allowing untrusted POST", log.Ctx{"url": r.URL.RequestURI(), "ip": r.RemoteAddr}) } else { shared.LogWarn( "rejecting request from untrusted client", log.Ctx{"ip": r.RemoteAddr}) Forbidden.Render(w) return } if debug && r.Method != "GET" && isJSONRequest(r) { newBody := &bytes.Buffer{} captured := &bytes.Buffer{} multiW := io.MultiWriter(newBody, captured) if _, err := io.Copy(multiW, r.Body); err != nil { InternalError(err).Render(w) return } r.Body = shared.BytesReadCloser{Buf: newBody} shared.DebugJson(captured) } var resp Response resp = NotImplemented switch r.Method { case "GET": if c.get != nil { resp = c.get(d, r) } case "PUT": if c.put != nil { resp = c.put(d, r) } case "POST": if c.post != nil { resp = c.post(d, r) } case "DELETE": if c.delete != nil { resp = c.delete(d, r) } case "PATCH": if c.patch != nil { resp = c.patch(d, r) } default: resp = NotFound } if err := resp.Render(w); err != nil { err := InternalError(err).Render(w) if err != nil { shared.LogErrorf("Failed writing error for error, giving up") } } /* * When we create a new lxc.Container, it adds a finalizer (via * SetFinalizer) that frees the struct. However, it sometimes * takes the go GC a while to actually free the struct, * presumably since it is a small amount of memory. * Unfortunately, the struct also keeps the log fd open, so if * we leave too many of these around, we end up running out of * fds. So, let's explicitly do a GC to collect these at the * end of each request. */ runtime.GC() }) }
func (s *migrationSourceWs) Do(migrateOp *operation) error { <-s.allConnected criuType := CRIUType_CRIU_RSYNC.Enum() if !s.live { criuType = nil err := s.container.StorageStart() if err != nil { return err } defer s.container.StorageStop() } idmaps := make([]*IDMapType, 0) idmapset := s.container.IdmapSet() if idmapset != nil { for _, ctnIdmap := range idmapset.Idmap { idmap := IDMapType{ Isuid: proto.Bool(ctnIdmap.Isuid), Isgid: proto.Bool(ctnIdmap.Isgid), Hostid: proto.Int(ctnIdmap.Hostid), Nsid: proto.Int(ctnIdmap.Nsid), Maprange: proto.Int(ctnIdmap.Maprange), } idmaps = append(idmaps, &idmap) } } driver, fsErr := s.container.Storage().MigrationSource(s.container) /* the protocol says we have to send a header no matter what, so let's * do that, but then immediately send an error. */ snapshots := []*Snapshot{} snapshotNames := []string{} if fsErr == nil { fullSnaps := driver.Snapshots() for _, snap := range fullSnaps { snapshots = append(snapshots, snapshotToProtobuf(snap)) snapshotNames = append(snapshotNames, shared.ExtractSnapshotName(snap.Name())) } } myType := s.container.Storage().MigrationType() header := MigrationHeader{ Fs: &myType, Criu: criuType, Idmap: idmaps, SnapshotNames: snapshotNames, Snapshots: snapshots, } if err := s.send(&header); err != nil { s.sendControl(err) return err } if fsErr != nil { s.sendControl(fsErr) return fsErr } if err := s.recv(&header); err != nil { s.sendControl(err) return err } if *header.Fs != myType { myType = MigrationFSType_RSYNC header.Fs = &myType driver, _ = rsyncMigrationSource(s.container) } // All failure paths need to do a few things to correctly handle errors before returning. // Unfortunately, handling errors is not well-suited to defer as the code depends on the // status of driver and the error value. The error value is especially tricky due to the // common case of creating a new err variable (intentional or not) due to scoping and use // of ":=". Capturing err in a closure for use in defer would be fragile, which defeats // the purpose of using defer. An abort function reduces the odds of mishandling errors // without introducing the fragility of closing on err. abort := func(err error) error { driver.Cleanup() s.sendControl(err) return err } if err := driver.SendWhileRunning(s.fsConn, migrateOp); err != nil { return abort(err) } restoreSuccess := make(chan bool, 1) dumpSuccess := make(chan error, 1) if s.live { if header.Criu == nil { return abort(fmt.Errorf("Got no CRIU socket type for live migration")) } else if *header.Criu != CRIUType_CRIU_RSYNC { return abort(fmt.Errorf("Formats other than criu rsync not understood")) } checkpointDir, err := ioutil.TempDir("", "lxd_checkpoint_") if err != nil { return abort(err) } if lxc.VersionAtLeast(2, 0, 4) { /* What happens below is slightly convoluted. Due to various * complications with networking, there's no easy way for criu * to exit and leave the container in a frozen state for us to * somehow resume later. * * Instead, we use what criu calls an "action-script", which is * basically a callback that lets us know when the dump is * done. (Unfortunately, we can't pass arguments, just an * executable path, so we write a custom action script with the * real command we want to run.) * * This script then hangs until the migration operation either * finishes successfully or fails, and exits 1 or 0, which * causes criu to either leave the container running or kill it * as we asked. */ dumpDone := make(chan bool, 1) actionScriptOpSecret, err := shared.RandomCryptoString() if err != nil { os.RemoveAll(checkpointDir) return abort(err) } actionScriptOp, err := operationCreate( operationClassWebsocket, nil, nil, func(op *operation) error { result := <-restoreSuccess if !result { return fmt.Errorf("restore failed, failing CRIU") } return nil }, nil, func(op *operation, r *http.Request, w http.ResponseWriter) error { secret := r.FormValue("secret") if secret == "" { return fmt.Errorf("missing secret") } if secret != actionScriptOpSecret { return os.ErrPermission } c, err := shared.WebsocketUpgrader.Upgrade(w, r, nil) if err != nil { return err } dumpDone <- true closeMsg := websocket.FormatCloseMessage(websocket.CloseNormalClosure, "") return c.WriteMessage(websocket.CloseMessage, closeMsg) }, ) if err != nil { os.RemoveAll(checkpointDir) return abort(err) } if err := writeActionScript(checkpointDir, actionScriptOp.url, actionScriptOpSecret); err != nil { os.RemoveAll(checkpointDir) return abort(err) } _, err = actionScriptOp.Run() if err != nil { os.RemoveAll(checkpointDir) return abort(err) } go func() { dumpSuccess <- s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, "migration", true, true) os.RemoveAll(checkpointDir) }() select { /* the checkpoint failed, let's just abort */ case err = <-dumpSuccess: return abort(err) /* the dump finished, let's continue on to the restore */ case <-dumpDone: shared.LogDebugf("Dump finished, continuing with restore...") } } else { defer os.RemoveAll(checkpointDir) if err := s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, "migration", true, false); err != nil { return abort(err) } } /* * We do the serially right now, but there's really no reason for us * to; since we have separate websockets, we can do it in parallel if * we wanted to. However, assuming we're network bound, there's really * no reason to do these in parallel. In the future when we're using * p.haul's protocol, it will make sense to do these in parallel. */ if err := RsyncSend(shared.AddSlash(checkpointDir), s.criuConn, nil); err != nil { return abort(err) } if err := driver.SendAfterCheckpoint(s.fsConn); err != nil { return abort(err) } } driver.Cleanup() msg := MigrationControl{} if err := s.recv(&msg); err != nil { s.disconnect() return err } if s.live { restoreSuccess <- *msg.Success err := <-dumpSuccess if err != nil { shared.LogErrorf("dump failed after successful restore?: %q", err) } } if !*msg.Success { return fmt.Errorf(*msg.Message) } return nil }
func (s *execWs) Do(op *operation) error { <-s.allConnected var err error var ttys []*os.File var ptys []*os.File var stdin *os.File var stdout *os.File var stderr *os.File if s.interactive { ttys = make([]*os.File, 1) ptys = make([]*os.File, 1) ptys[0], ttys[0], err = shared.OpenPty(s.rootUid, s.rootGid) stdin = ttys[0] stdout = ttys[0] stderr = ttys[0] if s.width > 0 && s.height > 0 { shared.SetSize(int(ptys[0].Fd()), s.width, s.height) } } else { ttys = make([]*os.File, 3) ptys = make([]*os.File, 3) for i := 0; i < len(ttys); i++ { ptys[i], ttys[i], err = shared.Pipe() if err != nil { return err } } stdin = ptys[0] stdout = ttys[1] stderr = ttys[2] } controlExit := make(chan bool) receivePid := make(chan int) var wgEOF sync.WaitGroup if s.interactive { wgEOF.Add(1) go func() { receivedPid := <-receivePid select { case <-s.controlConnected: break case <-controlExit: return } for { mt, r, err := s.conns[-1].NextReader() if mt == websocket.CloseMessage { break } if err != nil { shared.LogDebugf("Got error getting next reader %s", err) break } buf, err := ioutil.ReadAll(r) if err != nil { shared.LogDebugf("Failed to read message %s", err) break } command := shared.ContainerExecControl{} if err := json.Unmarshal(buf, &command); err != nil { shared.LogDebugf("Failed to unmarshal control socket command: %s", err) continue } if command.Command == "window-resize" { winchWidth, err := strconv.Atoi(command.Args["width"]) if err != nil { shared.LogDebugf("Unable to extract window width: %s", err) continue } winchHeight, err := strconv.Atoi(command.Args["height"]) if err != nil { shared.LogDebugf("Unable to extract window height: %s", err) continue } err = shared.SetSize(int(ptys[0].Fd()), winchWidth, winchHeight) if err != nil { shared.LogDebugf("Failed to set window size to: %dx%d", winchWidth, winchHeight) continue } } else if command.Command == "signal" { if err := syscall.Kill(receivedPid, command.Signal); err != nil { shared.LogDebugf("Failed forwarding signal '%s' to PID %d.", command.Signal, receivedPid) continue } shared.LogDebugf("Forwarded signal '%s' to PID %d.", command.Signal, receivedPid) } } }() go func() { readDone, writeDone := shared.WebsocketMirror(s.conns[0], ptys[0], ptys[0]) <-readDone <-writeDone s.conns[0].Close() wgEOF.Done() }() } else { wgEOF.Add(len(ttys) - 1) for i := 0; i < len(ttys); i++ { go func(i int) { if i == 0 { <-shared.WebsocketRecvStream(ttys[i], s.conns[i]) ttys[i].Close() } else { <-shared.WebsocketSendStream(s.conns[i], ptys[i], -1) ptys[i].Close() wgEOF.Done() } }(i) } } finisher := func(cmdResult int, cmdErr error) error { for _, tty := range ttys { tty.Close() } if s.conns[-1] == nil { if s.interactive { controlExit <- true } } else { s.conns[-1].Close() } wgEOF.Wait() for _, pty := range ptys { pty.Close() } metadata := shared.Jmap{"return": cmdResult} err = op.UpdateMetadata(metadata) if err != nil { return err } return cmdErr } r, w, err := shared.Pipe() defer r.Close() if err != nil { shared.LogErrorf("s", err) return err } cmd, err := s.container.ExecNoWait(s.command, s.env, stdin, stdout, stderr, w) if err != nil { w.Close() return err } err = cmd.Start() if err != nil { w.Close() return err } w.Close() attachedPid := -1 if err := json.NewDecoder(r).Decode(&attachedPid); err != nil { shared.LogErrorf("Failed to retrieve PID of executing child process: %s", err) return finisher(-1, err) } if s.interactive { receivePid <- attachedPid } err = cmd.Wait() if err != nil { exitErr, ok := err.(*exec.ExitError) if ok { status, ok := exitErr.Sys().(syscall.WaitStatus) if ok { return finisher(status.ExitStatus(), nil) } } } return finisher(0, nil) }
func deviceTaskBalance(d *Daemon) { min := func(x, y int) int { if x < y { return x } return y } // Don't bother running when CGroup support isn't there if !cgCpusetController { return } // Get effective cpus list - those are all guaranteed to be online effectiveCpus, err := cGroupGet("cpuset", "/", "cpuset.effective_cpus") if err != nil { // Older kernel - use cpuset.cpus effectiveCpus, err = cGroupGet("cpuset", "/", "cpuset.cpus") if err != nil { shared.LogErrorf("Error reading host's cpuset.cpus") return } } err = cGroupSet("cpuset", "/lxc", "cpuset.cpus", effectiveCpus) if err != nil && shared.PathExists("/sys/fs/cgroup/cpuset/lxc") { shared.LogWarn("Error setting lxd's cpuset.cpus", log.Ctx{"err": err}) } cpus, err := parseCpuset(effectiveCpus) if err != nil { shared.LogError("Error parsing host's cpu set", log.Ctx{"cpuset": effectiveCpus, "err": err}) return } // Iterate through the containers containers, err := dbContainersList(d.db, cTypeRegular) if err != nil { shared.LogError("problem loading containers list", log.Ctx{"err": err}) return } fixedContainers := map[int][]container{} balancedContainers := map[container]int{} for _, name := range containers { c, err := containerLoadByName(d, name) if err != nil { continue } conf := c.ExpandedConfig() cpulimit, ok := conf["limits.cpu"] if !ok || cpulimit == "" { cpulimit = effectiveCpus } if !c.IsRunning() { continue } count, err := strconv.Atoi(cpulimit) if err == nil { // Load-balance count = min(count, len(cpus)) balancedContainers[c] = count } else { // Pinned containerCpus, err := parseCpuset(cpulimit) if err != nil { return } for _, nr := range containerCpus { if !shared.IntInSlice(nr, cpus) { continue } _, ok := fixedContainers[nr] if ok { fixedContainers[nr] = append(fixedContainers[nr], c) } else { fixedContainers[nr] = []container{c} } } } } // Balance things pinning := map[container][]string{} usage := map[int]deviceTaskCPU{} for _, id := range cpus { cpu := deviceTaskCPU{} cpu.id = id cpu.strId = fmt.Sprintf("%d", id) count := 0 cpu.count = &count usage[id] = cpu } for cpu, ctns := range fixedContainers { c, ok := usage[cpu] if !ok { shared.LogErrorf("Internal error: container using unavailable cpu") continue } id := c.strId for _, ctn := range ctns { _, ok := pinning[ctn] if ok { pinning[ctn] = append(pinning[ctn], id) } else { pinning[ctn] = []string{id} } *c.count += 1 } } sortedUsage := make(deviceTaskCPUs, 0) for _, value := range usage { sortedUsage = append(sortedUsage, value) } for ctn, count := range balancedContainers { sort.Sort(sortedUsage) for _, cpu := range sortedUsage { if count == 0 { break } count -= 1 id := cpu.strId _, ok := pinning[ctn] if ok { pinning[ctn] = append(pinning[ctn], id) } else { pinning[ctn] = []string{id} } *cpu.count += 1 } } // Set the new pinning for ctn, set := range pinning { // Confirm the container didn't just stop if !ctn.IsRunning() { continue } sort.Strings(set) err := ctn.CGroupSet("cpuset.cpus", strings.Join(set, ",")) if err != nil { shared.LogError("balance: Unable to set cpuset", log.Ctx{"name": ctn.Name(), "err": err, "value": strings.Join(set, ",")}) } } }