func rsyncMigrationSink(live bool, container container, snapshots []container, conn *websocket.Conn) error { /* the first object is the actual container */ if err := RsyncRecv(shared.AddSlash(container.Path()), conn); err != nil { return err } if len(snapshots) > 0 { err := os.MkdirAll(shared.VarPath(fmt.Sprintf("snapshots/%s", container.Name())), 0700) if err != nil { return err } } for _, snap := range snapshots { if err := RsyncRecv(shared.AddSlash(snap.Path()), conn); err != nil { return err } } if live { /* now receive the final sync */ if err := RsyncRecv(shared.AddSlash(container.Path()), conn); err != nil { return err } } return nil }
func rsyncMigrationSink(container container, snapshots []container, conn *websocket.Conn) error { /* the first object is the actual container */ if err := RsyncRecv(shared.AddSlash(container.Path()), conn); err != nil { return err } for _, snap := range snapshots { if err := RsyncRecv(shared.AddSlash(snap.Path()), conn); err != nil { return err } } return nil }
func (s rsyncStorageSourceDriver) SendWhileRunning(conn *websocket.Conn) error { for _, send := range s.snapshots { if err := send.StorageStart(); err != nil { return err } defer send.StorageStop() path := send.Path() if err := RsyncSend(shared.AddSlash(path), conn); err != nil { return err } } return RsyncSend(shared.AddSlash(s.container.Path()), conn) }
func (s rsyncStorageSourceDriver) SendWhileRunning(conn *websocket.Conn, op *operation) error { for _, send := range s.snapshots { if err := send.StorageStart(); err != nil { return err } defer send.StorageStop() path := send.Path() wrapper := StorageProgressReader(op, "fs_progress", send.Name()) if err := RsyncSend(shared.AddSlash(path), conn, wrapper); err != nil { return err } } wrapper := StorageProgressReader(op, "fs_progress", s.container.Name()) return RsyncSend(shared.AddSlash(s.container.Path()), conn, wrapper) }
func (s rsyncStorageSourceDriver) SendWhileRunning(conn *websocket.Conn) error { toSend := append([]container{s.container}, s.snapshots...) for _, send := range toSend { path := send.Path() if err := RsyncSend(shared.AddSlash(path), conn); err != nil { return err } } return nil }
// rsyncCopy copies a directory using rsync (with the --devices option). func (ss *storageShared) rsyncCopy(source string, dest string) (string, error) { if err := os.MkdirAll(dest, 0755); err != nil { return "", err } output, err := exec.Command( "rsync", "-a", "--devices", shared.AddSlash(source), dest).CombinedOutput() return string(output), err }
// storageRsyncCopy copies a directory using rsync (with the --devices option). func storageRsyncCopy(source string, dest string) (string, error) { if err := os.MkdirAll(dest, 0755); err != nil { return "", err } rsyncVerbosity := "-q" if *debug { rsyncVerbosity = "-vi" } output, err := exec.Command( "rsync", "-a", "-HAX", "--devices", "--delete", rsyncVerbosity, shared.AddSlash(source), dest).CombinedOutput() return string(output), err }
func (s *migrationSourceWs) Do(op *operation) error { <-s.allConnected criuType := CRIUType_CRIU_RSYNC.Enum() if !s.live { criuType = nil defer s.container.StorageStop() } idmaps := make([]*IDMapType, 0) idmapset := s.container.IdmapSet() if idmapset != nil { for _, ctnIdmap := range idmapset.Idmap { idmap := IDMapType{ Isuid: proto.Bool(ctnIdmap.Isuid), Isgid: proto.Bool(ctnIdmap.Isgid), Hostid: proto.Int(ctnIdmap.Hostid), Nsid: proto.Int(ctnIdmap.Nsid), Maprange: proto.Int(ctnIdmap.Maprange), } idmaps = append(idmaps, &idmap) } } sources, fsErr := s.container.Storage().MigrationSource(s.container) /* the protocol says we have to send a header no matter what, so let's * do that, but then immediately send an error. */ snapshots := []string{} if fsErr == nil { /* A bit of a special case here: doing lxc launch * host2:c1/snap1 host1:container we're sending a snapshot, but * it ends up as the container on the other end. So, we want to * send it as the main container (i.e. ignore its IsSnapshot()). */ if len(sources) > 1 { for _, snap := range sources { if !snap.IsSnapshot() { continue } name := shared.ExtractSnapshotName(snap.Name()) snapshots = append(snapshots, name) } } } myType := s.container.Storage().MigrationType() header := MigrationHeader{ Fs: &myType, Criu: criuType, Idmap: idmaps, Snapshots: snapshots, } if err := s.send(&header); err != nil { s.sendControl(err) return err } if fsErr != nil { s.sendControl(fsErr) return fsErr } if err := s.recv(&header); err != nil { s.sendControl(err) return err } // TODO: actually fall back on rsync. if *header.Fs != myType { err := fmt.Errorf("mismatched storage types not supported yet") s.sendControl(err) return err } if s.live { if header.Criu == nil { err := fmt.Errorf("Got no CRIU socket type for live migration") s.sendControl(err) return err } else if *header.Criu != CRIUType_CRIU_RSYNC { err := fmt.Errorf("Formats other than criu rsync not understood") s.sendControl(err) return err } checkpointDir, err := ioutil.TempDir("", "lxd_checkpoint_") if err != nil { s.sendControl(err) return err } defer os.RemoveAll(checkpointDir) opts := lxc.CheckpointOptions{Stop: true, Directory: checkpointDir, Verbose: true} err = s.container.Checkpoint(opts) if err2 := CollectCRIULogFile(s.container, checkpointDir, "migration", "dump"); err2 != nil { shared.Debugf("Error collecting checkpoint log file %s", err) } if err != nil { log := GetCRIULogErrors(checkpointDir, "dump") err = fmt.Errorf("checkpoint failed:\n%s", log) s.sendControl(err) return err } /* * We do the serially right now, but there's really no reason for us * to; since we have separate websockets, we can do it in parallel if * we wanted to. However, assuming we're network bound, there's really * no reason to do these in parallel. In the future when we're using * p.haul's protocol, it will make sense to do these in parallel. */ if err := RsyncSend(shared.AddSlash(checkpointDir), s.criuConn); err != nil { s.sendControl(err) return err } } for _, source := range sources { shared.Debugf("sending fs object %s", source.Name()) if err := source.Send(s.fsConn); err != nil { s.sendControl(err) return err } } msg := MigrationControl{} if err := s.recv(&msg); err != nil { s.disconnect() return err } // TODO: should we add some config here about automatically restarting // the container migrate failure? What about the failures above? if !*msg.Success { return fmt.Errorf(*msg.Message) } return nil }
func (s *migrationSourceWs) Do(migrateOp *operation) error { <-s.allConnected criuType := CRIUType_CRIU_RSYNC.Enum() if !s.live { criuType = nil err := s.container.StorageStart() if err != nil { return err } defer s.container.StorageStop() } idmaps := make([]*IDMapType, 0) idmapset := s.container.IdmapSet() if idmapset != nil { for _, ctnIdmap := range idmapset.Idmap { idmap := IDMapType{ Isuid: proto.Bool(ctnIdmap.Isuid), Isgid: proto.Bool(ctnIdmap.Isgid), Hostid: proto.Int(ctnIdmap.Hostid), Nsid: proto.Int(ctnIdmap.Nsid), Maprange: proto.Int(ctnIdmap.Maprange), } idmaps = append(idmaps, &idmap) } } driver, fsErr := s.container.Storage().MigrationSource(s.container) /* the protocol says we have to send a header no matter what, so let's * do that, but then immediately send an error. */ snapshots := []*Snapshot{} snapshotNames := []string{} if fsErr == nil { fullSnaps := driver.Snapshots() for _, snap := range fullSnaps { snapshots = append(snapshots, snapshotToProtobuf(snap)) snapshotNames = append(snapshotNames, shared.ExtractSnapshotName(snap.Name())) } } myType := s.container.Storage().MigrationType() header := MigrationHeader{ Fs: &myType, Criu: criuType, Idmap: idmaps, SnapshotNames: snapshotNames, Snapshots: snapshots, } if err := s.send(&header); err != nil { s.sendControl(err) return err } if fsErr != nil { s.sendControl(fsErr) return fsErr } if err := s.recv(&header); err != nil { s.sendControl(err) return err } if *header.Fs != myType { myType = MigrationFSType_RSYNC header.Fs = &myType driver, _ = rsyncMigrationSource(s.container) } // All failure paths need to do a few things to correctly handle errors before returning. // Unfortunately, handling errors is not well-suited to defer as the code depends on the // status of driver and the error value. The error value is especially tricky due to the // common case of creating a new err variable (intentional or not) due to scoping and use // of ":=". Capturing err in a closure for use in defer would be fragile, which defeats // the purpose of using defer. An abort function reduces the odds of mishandling errors // without introducing the fragility of closing on err. abort := func(err error) error { driver.Cleanup() s.sendControl(err) return err } if err := driver.SendWhileRunning(s.fsConn, migrateOp); err != nil { return abort(err) } restoreSuccess := make(chan bool, 1) dumpSuccess := make(chan error, 1) if s.live { if header.Criu == nil { return abort(fmt.Errorf("Got no CRIU socket type for live migration")) } else if *header.Criu != CRIUType_CRIU_RSYNC { return abort(fmt.Errorf("Formats other than criu rsync not understood")) } checkpointDir, err := ioutil.TempDir("", "lxd_checkpoint_") if err != nil { return abort(err) } if lxc.VersionAtLeast(2, 0, 4) { /* What happens below is slightly convoluted. Due to various * complications with networking, there's no easy way for criu * to exit and leave the container in a frozen state for us to * somehow resume later. * * Instead, we use what criu calls an "action-script", which is * basically a callback that lets us know when the dump is * done. (Unfortunately, we can't pass arguments, just an * executable path, so we write a custom action script with the * real command we want to run.) * * This script then hangs until the migration operation either * finishes successfully or fails, and exits 1 or 0, which * causes criu to either leave the container running or kill it * as we asked. */ dumpDone := make(chan bool, 1) actionScriptOpSecret, err := shared.RandomCryptoString() if err != nil { os.RemoveAll(checkpointDir) return abort(err) } actionScriptOp, err := operationCreate( operationClassWebsocket, nil, nil, func(op *operation) error { result := <-restoreSuccess if !result { return fmt.Errorf("restore failed, failing CRIU") } return nil }, nil, func(op *operation, r *http.Request, w http.ResponseWriter) error { secret := r.FormValue("secret") if secret == "" { return fmt.Errorf("missing secret") } if secret != actionScriptOpSecret { return os.ErrPermission } c, err := shared.WebsocketUpgrader.Upgrade(w, r, nil) if err != nil { return err } dumpDone <- true closeMsg := websocket.FormatCloseMessage(websocket.CloseNormalClosure, "") return c.WriteMessage(websocket.CloseMessage, closeMsg) }, ) if err != nil { os.RemoveAll(checkpointDir) return abort(err) } if err := writeActionScript(checkpointDir, actionScriptOp.url, actionScriptOpSecret); err != nil { os.RemoveAll(checkpointDir) return abort(err) } _, err = actionScriptOp.Run() if err != nil { os.RemoveAll(checkpointDir) return abort(err) } go func() { dumpSuccess <- s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, "migration", true, true) os.RemoveAll(checkpointDir) }() select { /* the checkpoint failed, let's just abort */ case err = <-dumpSuccess: return abort(err) /* the dump finished, let's continue on to the restore */ case <-dumpDone: shared.LogDebugf("Dump finished, continuing with restore...") } } else { defer os.RemoveAll(checkpointDir) if err := s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, "migration", true, false); err != nil { return abort(err) } } /* * We do the serially right now, but there's really no reason for us * to; since we have separate websockets, we can do it in parallel if * we wanted to. However, assuming we're network bound, there's really * no reason to do these in parallel. In the future when we're using * p.haul's protocol, it will make sense to do these in parallel. */ if err := RsyncSend(shared.AddSlash(checkpointDir), s.criuConn, nil); err != nil { return abort(err) } if err := driver.SendAfterCheckpoint(s.fsConn); err != nil { return abort(err) } } driver.Cleanup() msg := MigrationControl{} if err := s.recv(&msg); err != nil { s.disconnect() return err } if s.live { restoreSuccess <- *msg.Success err := <-dumpSuccess if err != nil { shared.LogErrorf("dump failed after successful restore?: %q", err) } } if !*msg.Success { return fmt.Errorf(*msg.Message) } return nil }
func (s rsyncStorageSourceDriver) SendAfterCheckpoint(conn *websocket.Conn) error { /* resync anything that changed between our first send and the checkpoint */ return RsyncSend(shared.AddSlash(s.container.Path()), conn) }
func containerSnapRestore(d *Daemon, name string, snap string) error { // normalize snapshot name if !shared.IsSnapshot(snap) { snap = fmt.Sprintf("%s/%s", name, snap) } shared.Debugf("RESTORE => Restoring snapshot [%s] on container [%s]", snap, name) /* * restore steps: * 1. stop container if already running * 2. overwrite existing config with snapshot config * 3. copy snapshot rootfs to container */ wasRunning := false c, err := newLxdContainer(name, d) if err != nil { shared.Debugf("RESTORE => Error: newLxdContainer() failed for container", err) return err } // 1. stop container // TODO: stateful restore ? if c.c.Running() { wasRunning = true if err = c.Stop(); err != nil { shared.Debugf("RESTORE => Error: could not stop container", err) return err } shared.Debugf("RESTORE => Stopped container %s", name) } // 2, replace config // Make sure the source exists. source, err := newLxdContainer(snap, d) if err != nil { shared.Debugf("RESTORE => Error: newLxdContainer() failed for snapshot", err) return err } newConfig := containerConfigReq{} newConfig.Config = source.config newConfig.Profiles = source.profiles newConfig.Devices = source.devices tx, err := containerReplaceConfig(d, c, name, newConfig) if err != nil { shared.Debugf("RESTORE => err #4", err) return err } if err := txCommit(tx); err != nil { return err } // 3. copy rootfs // TODO: btrfs optimizations containerRootPath := shared.VarPath("lxc", name) if !shared.IsDir(path.Dir(containerRootPath)) { shared.Debugf("RESTORE => containerRoot [%s] directory does not exist", containerRootPath) return os.ErrNotExist } var snapshotRootFSPath string snapshotRootFSPath = shared.AddSlash(snapshotRootfsDir(c, strings.SplitN(snap, "/", 2)[1])) containerRootFSPath := shared.AddSlash(fmt.Sprintf("%s/%s", containerRootPath, "rootfs")) shared.Debugf("RESTORE => Copying %s to %s", snapshotRootFSPath, containerRootFSPath) rsyncVerbosity := "-q" if *debug { rsyncVerbosity = "-vi" } output, err := exec.Command("rsync", "-a", "-c", "-HAX", "--devices", "--delete", rsyncVerbosity, snapshotRootFSPath, containerRootFSPath).CombinedOutput() shared.Debugf("RESTORE => rsync output\n%s", output) if err == nil && !source.isPrivileged() { err = setUnprivUserAcl(c, containerRootPath) if err != nil { shared.Debugf("Error adding acl for container root: falling back to chmod\n") output, err := exec.Command("chmod", "+x", containerRootPath).CombinedOutput() if err != nil { shared.Debugf("Error chmoding the container root\n") shared.Debugf(string(output)) return err } } } else { shared.Debugf("rsync failed:\n%s", output) return err } if wasRunning { c.Start() } return nil }
func (c *migrationSink) do() error { var err error c.controlConn, err = c.connectWithSecret(c.controlSecret) if err != nil { return err } defer c.disconnect() c.fsConn, err = c.connectWithSecret(c.fsSecret) if err != nil { c.sendControl(err) return err } if c.live { c.criuConn, err = c.connectWithSecret(c.criuSecret) if err != nil { c.sendControl(err) return err } } // For now, we just ignore whatever the server sends us. We only // support RSYNC, so that's what we respond with. header := MigrationHeader{} if err := c.recv(&header); err != nil { c.sendControl(err) return err } criuType := CRIUType_CRIU_RSYNC.Enum() if !c.live { criuType = nil } resp := MigrationHeader{Fs: MigrationFSType_RSYNC.Enum(), Criu: criuType} if err := c.send(&resp); err != nil { c.sendControl(err) return err } restore := make(chan error) go func(c *migrationSink) { imagesDir := "" srcIdmap := new(shared.IdmapSet) dstIdmap := c.IdmapSet if dstIdmap == nil { dstIdmap = new(shared.IdmapSet) } if c.live { var err error imagesDir, err = ioutil.TempDir("", "lxd_migration_") if err != nil { os.RemoveAll(imagesDir) c.sendControl(err) return } defer func() { err := CollectCRIULogFile(c.container, imagesDir, "migration", "restore") /* * If the checkpoint fails, we won't have any log to collect, * so don't warn about that. */ if err != nil && !os.IsNotExist(err) { shared.Debugf("Error collectiong migration log file %s", err) } os.RemoveAll(imagesDir) }() if err := RsyncRecv(shared.AddSlash(imagesDir), c.criuConn); err != nil { restore <- err os.RemoveAll(imagesDir) c.sendControl(err) return } /* * For unprivileged containers we need to shift the * perms on the images images so that they can be * opened by the process after it is in its user * namespace. */ if dstIdmap != nil { if err := dstIdmap.ShiftRootfs(imagesDir); err != nil { restore <- err os.RemoveAll(imagesDir) c.sendControl(err) return } } } fsDir := c.container.ConfigItem("lxc.rootfs")[0] if err := RsyncRecv(shared.AddSlash(fsDir), c.fsConn); err != nil { restore <- err c.sendControl(err) return } for _, idmap := range header.Idmap { e := shared.IdmapEntry{ Isuid: *idmap.Isuid, Isgid: *idmap.Isgid, Nsid: int(*idmap.Nsid), Hostid: int(*idmap.Hostid), Maprange: int(*idmap.Maprange)} srcIdmap.Idmap = shared.Extend(srcIdmap.Idmap, e) } if !reflect.DeepEqual(srcIdmap, dstIdmap) { if err := srcIdmap.UnshiftRootfs(shared.VarPath("containers", c.container.Name())); err != nil { restore <- err c.sendControl(err) return } if err := dstIdmap.ShiftRootfs(shared.VarPath("containers", c.container.Name())); err != nil { restore <- err c.sendControl(err) return } } if c.live { f, err := ioutil.TempFile("", "lxd_lxc_migrateconfig_") if err != nil { restore <- err return } if err = f.Chmod(0600); err != nil { f.Close() os.Remove(f.Name()) return } f.Close() if err := c.container.SaveConfigFile(f.Name()); err != nil { restore <- err return } cmd := exec.Command( os.Args[0], "forkmigrate", c.container.Name(), c.container.ConfigPath(), f.Name(), imagesDir, ) err = cmd.Run() if err != nil { log := GetCRIULogErrors(imagesDir, "restore") err = fmt.Errorf("restore failed:\n%s", log) } restore <- err } else { restore <- nil } }(c) source := c.controlChannel() for { select { case err = <-restore: c.sendControl(err) return err case msg, ok := <-source: if !ok { c.disconnect() return fmt.Errorf("Got error reading source") } if !*msg.Success { c.disconnect() return fmt.Errorf(*msg.Message) } else { // The source can only tell us it failed (e.g. if // checkpointing failed). We have to tell the source // whether or not the restore was successful. shared.Debugf("Unknown message %v from source", msg) } } } }
func (s *migrationSourceWs) Do() shared.OperationResult { <-s.allConnected criuType := CRIUType_CRIU_RSYNC.Enum() if !s.live { criuType = nil } idmaps := make([]*IDMapType, 0) if s.idmapset != nil { for _, ctnIdmap := range s.idmapset.Idmap { idmap := IDMapType{ Isuid: proto.Bool(ctnIdmap.Isuid), Isgid: proto.Bool(ctnIdmap.Isgid), Hostid: proto.Int(ctnIdmap.Hostid), Nsid: proto.Int(ctnIdmap.Nsid), Maprange: proto.Int(ctnIdmap.Maprange), } idmaps = append(idmaps, &idmap) } } header := MigrationHeader{ Fs: MigrationFSType_RSYNC.Enum(), Criu: criuType, Idmap: idmaps, } if err := s.send(&header); err != nil { s.sendControl(err) return shared.OperationError(err) } if err := s.recv(&header); err != nil { s.sendControl(err) return shared.OperationError(err) } if *header.Fs != MigrationFSType_RSYNC { err := fmt.Errorf("Formats other than rsync not understood") s.sendControl(err) return shared.OperationError(err) } if s.live { if header.Criu == nil { err := fmt.Errorf("Got no CRIU socket type for live migration") s.sendControl(err) return shared.OperationError(err) } else if *header.Criu != CRIUType_CRIU_RSYNC { err := fmt.Errorf("Formats other than criu rsync not understood") s.sendControl(err) return shared.OperationError(err) } checkpointDir, err := ioutil.TempDir("", "lxd_migration_") if err != nil { s.sendControl(err) return shared.OperationError(err) } defer os.RemoveAll(checkpointDir) opts := lxc.CheckpointOptions{Stop: true, Directory: checkpointDir, Verbose: true} err = s.container.Checkpoint(opts) if err2 := CollectCRIULogFile(s.container, checkpointDir, "migration", "dump"); err2 != nil { shared.Debugf("Error collecting checkpoint log file %s", err) } if err != nil { log := GetCRIULogErrors(checkpointDir, "dump") err = fmt.Errorf("checkpoint failed:\n%s", log) s.sendControl(err) return shared.OperationError(err) } /* * We do the serially right now, but there's really no reason for us * to; since we have separate websockets, we can do it in parallel if * we wanted to. However, assuming we're network bound, there's really * no reason to do these in parallel. In the future when we're using * p.haul's protocol, it will make sense to do these in parallel. */ if err := RsyncSend(shared.AddSlash(checkpointDir), s.criuConn); err != nil { s.sendControl(err) return shared.OperationError(err) } } fsDir := s.container.ConfigItem("lxc.rootfs")[0] if err := RsyncSend(shared.AddSlash(fsDir), s.fsConn); err != nil { s.sendControl(err) return shared.OperationError(err) } msg := MigrationControl{} if err := s.recv(&msg); err != nil { s.disconnect() return shared.OperationError(err) } // TODO: should we add some config here about automatically restarting // the container migrate failure? What about the failures above? if !*msg.Success { return shared.OperationError(fmt.Errorf(*msg.Message)) } return shared.OperationSuccess }
func containerSnapshotsPost(d *Daemon, r *http.Request) Response { name := mux.Vars(r)["name"] /* * snapshot is a three step operation: * 1. choose a new name * 2. copy the database info over * 3. copy over the rootfs */ c, err := newLxdContainer(name, d) if err != nil { return SmartError(err) } raw := shared.Jmap{} if err := json.NewDecoder(r.Body).Decode(&raw); err != nil { return BadRequest(err) } snapshotName, err := raw.GetString("name") if err != nil || snapshotName == "" { // come up with a name i := nextSnapshot(d, name) snapshotName = fmt.Sprintf("snap%d", i) } stateful, err := raw.GetBool("stateful") if err != nil { return BadRequest(err) } fullName := fmt.Sprintf("%s/%s", name, snapshotName) snapDir := snapshotDir(c, snapshotName) if shared.PathExists(snapDir) { shared.Log.Error( "Snapshot exists on disk", log.Ctx{ "name": fullName, "path": snapDir}) return Conflict } err = os.MkdirAll(snapDir, 0700) if err != nil { return InternalError(err) } snapshot := func() error { StateDir := snapshotStateDir(c, snapshotName) err = os.MkdirAll(StateDir, 0700) if err != nil { os.RemoveAll(snapDir) return err } if stateful { // TODO - shouldn't we freeze for the duration of rootfs snapshot below? if !c.c.Running() { os.RemoveAll(snapDir) return fmt.Errorf("Container not running\n") } opts := lxc.CheckpointOptions{Directory: StateDir, Stop: true, Verbose: true} if err := c.c.Checkpoint(opts); err != nil { os.RemoveAll(snapDir) return err } } /* Create the db info */ args := containerLXDArgs{ Ctype: cTypeSnapshot, Config: c.config, Profiles: c.profiles, Ephemeral: c.ephemeral, BaseImage: c.config["volatile.baseImage"], Architecture: c.architecture, } _, err := dbContainerCreate(d.db, fullName, args) if err != nil { os.RemoveAll(snapDir) return err } /* Create the directory and rootfs, set perms */ /* Copy the rootfs */ oldPath := shared.AddSlash(shared.VarPath("containers", name, "rootfs")) newPath := snapshotRootfsDir(c, snapshotName) err = exec.Command("rsync", "-a", "--devices", oldPath, newPath).Run() if err != nil { os.RemoveAll(snapDir) dbContainerSnapshotRemove(d.db, name, snapshotName) } return err } return AsyncResponse(shared.OperationWrap(snapshot), nil) }
func (s *migrationSourceWs) Do(op *operation) error { <-s.allConnected criuType := CRIUType_CRIU_RSYNC.Enum() if !s.live { criuType = nil err := s.container.StorageStart() if err != nil { return err } defer s.container.StorageStop() } idmaps := make([]*IDMapType, 0) idmapset := s.container.IdmapSet() if idmapset != nil { for _, ctnIdmap := range idmapset.Idmap { idmap := IDMapType{ Isuid: proto.Bool(ctnIdmap.Isuid), Isgid: proto.Bool(ctnIdmap.Isgid), Hostid: proto.Int(ctnIdmap.Hostid), Nsid: proto.Int(ctnIdmap.Nsid), Maprange: proto.Int(ctnIdmap.Maprange), } idmaps = append(idmaps, &idmap) } } driver, fsErr := s.container.Storage().MigrationSource(s.container) /* the protocol says we have to send a header no matter what, so let's * do that, but then immediately send an error. */ snapshots := []string{} if fsErr == nil { fullSnaps := driver.Snapshots() for _, snap := range fullSnaps { snapshots = append(snapshots, shared.ExtractSnapshotName(snap.Name())) } } myType := s.container.Storage().MigrationType() header := MigrationHeader{ Fs: &myType, Criu: criuType, Idmap: idmaps, Snapshots: snapshots, } if err := s.send(&header); err != nil { s.sendControl(err) return err } if fsErr != nil { s.sendControl(fsErr) return fsErr } if err := s.recv(&header); err != nil { s.sendControl(err) return err } if *header.Fs != myType { myType = MigrationFSType_RSYNC header.Fs = &myType driver, _ = rsyncMigrationSource(s.container) } defer driver.Cleanup() if err := driver.SendWhileRunning(s.fsConn); err != nil { s.sendControl(err) return err } if s.live { if header.Criu == nil { err := fmt.Errorf("Got no CRIU socket type for live migration") s.sendControl(err) return err } else if *header.Criu != CRIUType_CRIU_RSYNC { err := fmt.Errorf("Formats other than criu rsync not understood") s.sendControl(err) return err } checkpointDir, err := ioutil.TempDir("", "lxd_checkpoint_") if err != nil { s.sendControl(err) return err } defer os.RemoveAll(checkpointDir) opts := lxc.CheckpointOptions{Stop: true, Directory: checkpointDir, Verbose: true} err = s.container.Checkpoint(opts) if err2 := CollectCRIULogFile(s.container, checkpointDir, "migration", "dump"); err2 != nil { shared.Debugf("Error collecting checkpoint log file %s", err) } if err != nil { log, err2 := GetCRIULogErrors(checkpointDir, "dump") /* couldn't find the CRIU log file which means we * didn't even get that far; give back the liblxc * error. */ if err2 != nil { log = err.Error() } err = fmt.Errorf("checkpoint failed:\n%s", log) s.sendControl(err) return err } /* * We do the serially right now, but there's really no reason for us * to; since we have separate websockets, we can do it in parallel if * we wanted to. However, assuming we're network bound, there's really * no reason to do these in parallel. In the future when we're using * p.haul's protocol, it will make sense to do these in parallel. */ if err := RsyncSend(shared.AddSlash(checkpointDir), s.criuConn); err != nil { s.sendControl(err) return err } if err := driver.SendAfterCheckpoint(s.fsConn); err != nil { s.sendControl(err) return err } } msg := MigrationControl{} if err := s.recv(&msg); err != nil { s.disconnect() return err } // TODO: should we add some config here about automatically restarting // the container migrate failure? What about the failures above? if !*msg.Success { return fmt.Errorf(*msg.Message) } return nil }
func (s *rsyncStorageSource) Send(conn *websocket.Conn) error { path := s.container.Path() return RsyncSend(shared.AddSlash(path), conn) }
func TestRsyncSendRecv(t *testing.T) { source, err := ioutil.TempDir("", "lxd_test_source_") if err != nil { t.Error(err) return } defer os.RemoveAll(source) sink, err := ioutil.TempDir("", "lxd_test_sink_") if err != nil { t.Error(err) return } defer os.RemoveAll(sink) /* now, write something to rsync over */ f, err := os.Create(path.Join(source, "foo")) if err != nil { t.Error(err) return } f.Write([]byte(helloWorld)) f.Close() send, sendConn, _, err := rsyncSendSetup(shared.AddSlash(source)) if err != nil { t.Error(err) return } recv := rsyncRecvCmd(sink) recvOut, err := recv.StdoutPipe() if err != nil { t.Error(err) return } recvIn, err := recv.StdinPipe() if err != nil { t.Error(err) return } if err := recv.Start(); err != nil { t.Error(err) return } go func() { defer sendConn.Close() if _, err := io.Copy(sendConn, recvOut); err != nil { t.Error(err) } if err := recv.Wait(); err != nil { t.Error(err) } }() /* * We close the socket in the above gofunc, but go tells us * https://github.com/golang/go/issues/4373 that this is an error * because we were reading from a socket that was closed. Thus, we * ignore it */ io.Copy(recvIn, sendConn) if err := send.Wait(); err != nil { t.Error(err) return } f, err = os.Open(path.Join(sink, "foo")) if err != nil { t.Error(err) return } defer f.Close() buf, err := ioutil.ReadAll(f) if err != nil { t.Error(err) return } if string(buf) != helloWorld { t.Errorf("expected %s got %s", helloWorld, buf) return } }
func rsyncMigrationSink(live bool, container container, snapshots []*Snapshot, conn *websocket.Conn, srcIdmap *shared.IdmapSet, op *operation) error { isDirBackend := container.Storage().GetStorageType() == storageTypeDir if isDirBackend { if len(snapshots) > 0 { err := os.MkdirAll(shared.VarPath(fmt.Sprintf("snapshots/%s", container.Name())), 0700) if err != nil { return err } } for _, snap := range snapshots { args := snapshotProtobufToContainerArgs(container.Name(), snap) s, err := containerCreateEmptySnapshot(container.Daemon(), args) if err != nil { return err } wrapper := StorageProgressWriter(op, "fs_progress", s.Name()) if err := RsyncRecv(shared.AddSlash(s.Path()), conn, wrapper); err != nil { return err } if err := ShiftIfNecessary(container, srcIdmap); err != nil { return err } } wrapper := StorageProgressWriter(op, "fs_progress", container.Name()) if err := RsyncRecv(shared.AddSlash(container.Path()), conn, wrapper); err != nil { return err } } else { if err := container.StorageStart(); err != nil { return err } defer container.StorageStop() for _, snap := range snapshots { args := snapshotProtobufToContainerArgs(container.Name(), snap) wrapper := StorageProgressWriter(op, "fs_progress", snap.GetName()) if err := RsyncRecv(shared.AddSlash(container.Path()), conn, wrapper); err != nil { return err } if err := ShiftIfNecessary(container, srcIdmap); err != nil { return err } _, err := containerCreateAsSnapshot(container.Daemon(), args, container) if err != nil { return err } } wrapper := StorageProgressWriter(op, "fs_progress", container.Name()) if err := RsyncRecv(shared.AddSlash(container.Path()), conn, wrapper); err != nil { return err } } if live { /* now receive the final sync */ wrapper := StorageProgressWriter(op, "fs_progress", container.Name()) if err := RsyncRecv(shared.AddSlash(container.Path()), conn, wrapper); err != nil { return err } } if err := ShiftIfNecessary(container, srcIdmap); err != nil { return err } return nil }
func (c *migrationSink) Do(migrateOp *operation) error { var err error if c.push { <-c.allConnected } disconnector := c.src.disconnect if c.push { disconnector = c.dest.disconnect } if c.push { defer disconnector() } else { c.src.controlConn, err = c.connectWithSecret(c.src.controlSecret) if err != nil { return err } defer c.src.disconnect() c.src.fsConn, err = c.connectWithSecret(c.src.fsSecret) if err != nil { c.src.sendControl(err) return err } if c.src.live { c.src.criuConn, err = c.connectWithSecret(c.src.criuSecret) if err != nil { c.src.sendControl(err) return err } } } receiver := c.src.recv if c.push { receiver = c.dest.recv } sender := c.src.send if c.push { sender = c.dest.send } controller := c.src.sendControl if c.push { controller = c.dest.sendControl } header := MigrationHeader{} if err := receiver(&header); err != nil { controller(err) return err } live := c.src.live if c.push { live = c.dest.live } criuType := CRIUType_CRIU_RSYNC.Enum() if !live { criuType = nil } mySink := c.src.container.Storage().MigrationSink myType := c.src.container.Storage().MigrationType() resp := MigrationHeader{ Fs: &myType, Criu: criuType, } // If the storage type the source has doesn't match what we have, then // we have to use rsync. if *header.Fs != *resp.Fs { mySink = rsyncMigrationSink myType = MigrationFSType_RSYNC resp.Fs = &myType } if err := sender(&resp); err != nil { controller(err) return err } restore := make(chan error) go func(c *migrationSink) { imagesDir := "" srcIdmap := new(shared.IdmapSet) for _, idmap := range header.Idmap { e := shared.IdmapEntry{ Isuid: *idmap.Isuid, Isgid: *idmap.Isgid, Nsid: int(*idmap.Nsid), Hostid: int(*idmap.Hostid), Maprange: int(*idmap.Maprange)} srcIdmap.Idmap = shared.Extend(srcIdmap.Idmap, e) } /* We do the fs receive in parallel so we don't have to reason * about when to receive what. The sending side is smart enough * to send the filesystem bits that it can before it seizes the * container to start checkpointing, so the total transfer time * will be minimized even if we're dumb here. */ fsTransfer := make(chan error) go func() { snapshots := []*Snapshot{} /* Legacy: we only sent the snapshot names, so we just * copy the container's config over, same as we used to * do. */ if len(header.SnapshotNames) != len(header.Snapshots) { for _, name := range header.SnapshotNames { base := snapshotToProtobuf(c.src.container) base.Name = &name snapshots = append(snapshots, base) } } else { snapshots = header.Snapshots } var fsConn *websocket.Conn if c.push { fsConn = c.dest.fsConn } else { fsConn = c.src.fsConn } if err := mySink(live, c.src.container, header.Snapshots, fsConn, srcIdmap, migrateOp); err != nil { fsTransfer <- err return } if err := ShiftIfNecessary(c.src.container, srcIdmap); err != nil { fsTransfer <- err return } fsTransfer <- nil }() if live { var err error imagesDir, err = ioutil.TempDir("", "lxd_restore_") if err != nil { restore <- err return } defer os.RemoveAll(imagesDir) var criuConn *websocket.Conn if c.push { criuConn = c.dest.criuConn } else { criuConn = c.src.criuConn } if err := RsyncRecv(shared.AddSlash(imagesDir), criuConn, nil); err != nil { restore <- err return } } err := <-fsTransfer if err != nil { restore <- err return } if live { err = c.src.container.Migrate(lxc.MIGRATE_RESTORE, imagesDir, "migration", false, false) if err != nil { restore <- err return } } restore <- nil }(c) var source <-chan MigrationControl if c.push { source = c.dest.controlChannel() } else { source = c.src.controlChannel() } for { select { case err = <-restore: controller(err) return err case msg, ok := <-source: if !ok { disconnector() return fmt.Errorf("Got error reading source") } if !*msg.Success { disconnector() return fmt.Errorf(*msg.Message) } else { // The source can only tell us it failed (e.g. if // checkpointing failed). We have to tell the source // whether or not the restore was successful. shared.LogDebugf("Unknown message %v from source", msg) } } } }
func (c *migrationSink) do() error { var err error c.controlConn, err = c.connectWithSecret(c.controlSecret) if err != nil { return err } defer c.disconnect() c.fsConn, err = c.connectWithSecret(c.fsSecret) if err != nil { c.sendControl(err) return err } if c.live { c.criuConn, err = c.connectWithSecret(c.criuSecret) if err != nil { c.sendControl(err) return err } } header := MigrationHeader{} if err := c.recv(&header); err != nil { c.sendControl(err) return err } criuType := CRIUType_CRIU_RSYNC.Enum() if !c.live { criuType = nil } mySink := c.container.Storage().MigrationSink myType := c.container.Storage().MigrationType() resp := MigrationHeader{ Fs: &myType, Criu: criuType, } // If the storage type the source has doesn't match what we have, then // we have to use rsync. if *header.Fs != *resp.Fs { mySink = rsyncMigrationSink myType = MigrationFSType_RSYNC resp.Fs = &myType } if err := c.send(&resp); err != nil { c.sendControl(err) return err } restore := make(chan error) go func(c *migrationSink) { imagesDir := "" srcIdmap := new(shared.IdmapSet) snapshots := []container{} for _, snap := range header.Snapshots { // TODO: we need to propagate snapshot configurations // as well. Right now the container configuration is // done through the initial migration post. Should we // post the snapshots and their configs as well, or do // it some other way? name := c.container.Name() + shared.SnapshotDelimiter + snap args := containerArgs{ Ctype: cTypeSnapshot, Config: c.container.LocalConfig(), Profiles: c.container.Profiles(), Ephemeral: c.container.IsEphemeral(), Architecture: c.container.Architecture(), Devices: c.container.LocalDevices(), Name: name, } ct, err := containerCreateEmptySnapshot(c.container.Daemon(), args) if err != nil { restore <- err return } snapshots = append(snapshots, ct) } for _, idmap := range header.Idmap { e := shared.IdmapEntry{ Isuid: *idmap.Isuid, Isgid: *idmap.Isgid, Nsid: int(*idmap.Nsid), Hostid: int(*idmap.Hostid), Maprange: int(*idmap.Maprange)} srcIdmap.Idmap = shared.Extend(srcIdmap.Idmap, e) } /* We do the fs receive in parallel so we don't have to reason * about when to receive what. The sending side is smart enough * to send the filesystem bits that it can before it seizes the * container to start checkpointing, so the total transfer time * will be minimized even if we're dumb here. */ fsTransfer := make(chan error) go func() { if err := mySink(c.live, c.container, snapshots, c.fsConn); err != nil { fsTransfer <- err return } if err := ShiftIfNecessary(c.container, srcIdmap); err != nil { fsTransfer <- err return } fsTransfer <- nil }() if c.live { var err error imagesDir, err = ioutil.TempDir("", "lxd_restore_") if err != nil { os.RemoveAll(imagesDir) return } defer func() { err := CollectCRIULogFile(c.container, imagesDir, "migration", "restore") /* * If the checkpoint fails, we won't have any log to collect, * so don't warn about that. */ if err != nil && !os.IsNotExist(err) { shared.Debugf("Error collectiong migration log file %s", err) } os.RemoveAll(imagesDir) }() if err := RsyncRecv(shared.AddSlash(imagesDir), c.criuConn); err != nil { restore <- err return } /* * For unprivileged containers we need to shift the * perms on the images images so that they can be * opened by the process after it is in its user * namespace. */ if !c.container.IsPrivileged() { if err := c.container.IdmapSet().ShiftRootfs(imagesDir); err != nil { restore <- err return } } } err := <-fsTransfer if err != nil { restore <- err return } if c.live { err := c.container.StartFromMigration(imagesDir) if err != nil { log, err2 := GetCRIULogErrors(imagesDir, "restore") /* restore failed before CRIU was invoked, give * back the liblxc error */ if err2 != nil { log = err.Error() } err = fmt.Errorf("restore failed:\n%s", log) restore <- err return } } for _, snap := range snapshots { if err := ShiftIfNecessary(snap, srcIdmap); err != nil { restore <- err return } } restore <- nil }(c) source := c.controlChannel() for { select { case err = <-restore: c.sendControl(err) return err case msg, ok := <-source: if !ok { c.disconnect() return fmt.Errorf("Got error reading source") } if !*msg.Success { c.disconnect() return fmt.Errorf(*msg.Message) } else { // The source can only tell us it failed (e.g. if // checkpointing failed). We have to tell the source // whether or not the restore was successful. shared.Debugf("Unknown message %v from source", msg) } } } }
func (c *migrationSink) do() error { var err error c.controlConn, err = c.connectWithSecret(c.controlSecret) if err != nil { return err } defer c.disconnect() c.fsConn, err = c.connectWithSecret(c.fsSecret) if err != nil { c.sendControl(err) return err } if c.live { c.criuConn, err = c.connectWithSecret(c.criuSecret) if err != nil { c.sendControl(err) return err } } // For now, we just ignore whatever the server sends us. We only // support RSYNC, so that's what we respond with. header := MigrationHeader{} if err := c.recv(&header); err != nil { c.sendControl(err) return err } criuType := CRIUType_CRIU_RSYNC.Enum() if !c.live { criuType = nil } resp := MigrationHeader{Fs: MigrationFSType_RSYNC.Enum(), Criu: criuType} if err := c.send(&resp); err != nil { c.sendControl(err) return err } restore := make(chan error) go func(c *migrationSink) { imagesDir := "" if c.live { var err error imagesDir, err = ioutil.TempDir("", "lxd_migration_") if err != nil { os.RemoveAll(imagesDir) c.sendControl(err) return } defer func() { err := collectMigrationLogFile(c.container, imagesDir, "restore") /* * If the checkpoint fails, we won't have any log to collect, * so don't warn about that. */ if err != nil && !os.IsNotExist(err) { shared.Debugf("error collectiong migration log file %s", err) } os.RemoveAll(imagesDir) }() if err := RsyncRecv(shared.AddSlash(imagesDir), c.criuConn); err != nil { restore <- err os.RemoveAll(imagesDir) c.sendControl(err) return } } fsDir := c.container.ConfigItem("lxc.rootfs")[0] if err := RsyncRecv(shared.AddSlash(fsDir), c.fsConn); err != nil { restore <- err c.sendControl(err) return } if c.IdmapSet != nil { if err := c.IdmapSet.ShiftRootfs(shared.VarPath("containers", c.container.Name())); err != nil { restore <- err c.sendControl(err) return } } if c.live { f, err := ioutil.TempFile("", "lxd_lxc_migrateconfig_") if err != nil { restore <- err return } if err = f.Chmod(0600); err != nil { f.Close() os.Remove(f.Name()) return } f.Close() if err := c.container.SaveConfigFile(f.Name()); err != nil { restore <- err return } cmd := exec.Command( os.Args[0], "forkmigrate", c.container.Name(), c.container.ConfigPath(), f.Name(), imagesDir, ) restore <- cmd.Run() } else { restore <- nil } }(c) source := c.controlChannel() for { select { case err = <-restore: c.sendControl(err) return err case msg, ok := <-source: if !ok { c.disconnect() return fmt.Errorf("got error reading source") } if !*msg.Success { c.disconnect() return fmt.Errorf(*msg.Message) } else { // The source can only tell us it failed (e.g. if // checkpointing failed). We have to tell the source // whether or not the restore was successful. shared.Debugf("unknown message %v from source", msg) } } } }