func NewProvider(pspec *volume.ProviderSpec) (provider volume.Provider, err error) { switch pspec.Kind { case "zfs": config := &zfs.ProviderConfig{} if err := json.Unmarshal(pspec.Config, config); err != nil { return nil, err } if provider, err = zfs.NewProvider(config); err != nil { return } return default: return nil, volume.UnknownProviderKind } }
// covers basic volume persistence and named volume persistence func (s *PersistenceTests) TestPersistence(c *C) { idString := random.String(12) vmanDBfilePath := fmt.Sprintf("/tmp/flynn-volumes-%s.bolt", idString) zfsDatasetName := fmt.Sprintf("flynn-test-dataset-%s", idString) zfsVdevFilePath := fmt.Sprintf("/tmp/flynn-test-zpool-%s.vdev", idString) defer os.Remove(vmanDBfilePath) defer os.Remove(zfsVdevFilePath) defer func() { pool, _ := gzfs.GetZpool(zfsDatasetName) if pool != nil { if datasets, err := pool.Datasets(); err == nil { for _, dataset := range datasets { dataset.Destroy(gzfs.DestroyRecursive | gzfs.DestroyForceUmount) os.Remove(dataset.Mountpoint) } } err := pool.Destroy() c.Assert(err, IsNil) } }() // new volume manager with a new backing zfs vdev file and a new boltdb volProv, err := zfs.NewProvider(&zfs.ProviderConfig{ DatasetName: zfsDatasetName, Make: &zfs.MakeDev{ BackingFilename: zfsVdevFilePath, Size: int64(math.Pow(2, float64(30))), }, }) c.Assert(err, IsNil) // new volume manager with that shiny new backing zfs vdev file and a new boltdb vman := volumemanager.New( vmanDBfilePath, func() (volume.Provider, error) { return volProv, nil }, ) c.Assert(vman.OpenDB(), IsNil) // make a volume vol1, err := vman.NewVolume() c.Assert(err, IsNil) // assert existence of filesystems; emplace some data f, err := os.Create(filepath.Join(vol1.Location(), "alpha")) c.Assert(err, IsNil) f.Close() // close persistence c.Assert(vman.CloseDB(), IsNil) // hack zfs export/umounting to emulate host shutdown err = exec.Command("zpool", "export", "-f", zfsDatasetName).Run() c.Assert(err, IsNil) // sanity check: assert the filesystems are gone // note that the directories remain present after 'zpool export' _, err = os.Stat(filepath.Join(vol1.Location(), "alpha")) c.Assert(os.IsNotExist(err), Equals, true) // restore vman = volumemanager.New( vmanDBfilePath, func() (volume.Provider, error) { c.Fatal("default provider setup should not be called if the previous provider was restored") return nil, nil }, ) c.Assert(vman.OpenDB(), IsNil) // assert volumes restoredVolumes := vman.Volumes() c.Assert(restoredVolumes, HasLen, 2) c.Assert(restoredVolumes[vol1.Info().ID], NotNil) // switch to the new volume references; do a bunch of smell checks on those vol1restored := restoredVolumes[vol1.Info().ID] c.Assert(vol1restored.Info(), DeepEquals, vol1.Info()) c.Assert(vol1restored.Provider(), NotNil) // assert existences of filesystems and previous data c.Assert(vol1restored.Location(), testutils.DirContains, []string{"alpha"}) }
func runDaemon(args *docopt.Args) { hostname, _ := os.Hostname() externalIP := args.String["--external-ip"] stateFile := args.String["--state"] hostID := args.String["--id"] force := args.Bool["--force"] volPath := args.String["--volpath"] backendName := args.String["--backend"] flynnInit := args.String["--flynn-init"] nsumount := args.String["--nsumount"] logDir := args.String["--log-dir"] discoveryToken := args.String["--discovery"] var peerIPs []string if args.String["--peer-ips"] != "" { peerIPs = strings.Split(args.String["--peer-ips"], ",") } grohl.AddContext("app", "host") grohl.Log(grohl.Data{"at": "start"}) g := grohl.NewContext(grohl.Data{"fn": "main"}) if hostID == "" { hostID = strings.Replace(hostname, "-", "", -1) } if strings.Contains(hostID, "-") { shutdown.Fatal("host id must not contain dashes") } if externalIP == "" { var err error externalIP, err = config.DefaultExternalIP() if err != nil { shutdown.Fatal(err) } } publishAddr := net.JoinHostPort(externalIP, "1113") if discoveryToken != "" { // TODO: retry discoveryID, err := discovery.RegisterInstance(discovery.Info{ ClusterURL: discoveryToken, InstanceURL: "http://" + publishAddr, Name: hostID, }) if err != nil { g.Log(grohl.Data{"at": "register_discovery", "status": "error", "err": err.Error()}) shutdown.Fatal(err) } g.Log(grohl.Data{"at": "register_discovery", "id": discoveryID}) } state := NewState(hostID, stateFile) var backend Backend var err error // create volume manager vman, err := volumemanager.New( filepath.Join(volPath, "volumes.bolt"), func() (volume.Provider, error) { // use a zpool backing file size of either 70% of the device on which // volumes will reside, or 100GB if that can't be determined. var size int64 var dev syscall.Statfs_t if err := syscall.Statfs(volPath, &dev); err == nil { size = (dev.Bsize * int64(dev.Blocks) * 7) / 10 } else { size = 100000000000 } g.Log(grohl.Data{"at": "zpool_size", "size": size}) return zfsVolume.NewProvider(&zfsVolume.ProviderConfig{ DatasetName: "flynn-default", Make: &zfsVolume.MakeDev{ BackingFilename: filepath.Join(volPath, "zfs/vdev/flynn-default-zpool.vdev"), Size: size, }, WorkingDir: filepath.Join(volPath, "zfs"), }) }, ) if err != nil { shutdown.Fatal(err) } mux := logmux.New(1000) shutdown.BeforeExit(func() { mux.Close() }) switch backendName { case "libvirt-lxc": backend, err = NewLibvirtLXCBackend(state, vman, logDir, flynnInit, nsumount, mux) default: log.Fatalf("unknown backend %q", backendName) } if err != nil { shutdown.Fatal(err) } backend.SetDefaultEnv("EXTERNAL_IP", externalIP) discoverdManager := NewDiscoverdManager(backend, mux, hostID, publishAddr) publishURL := "http://" + publishAddr host := &Host{ id: hostID, url: publishURL, state: state, backend: backend, status: &host.HostStatus{ID: hostID, URL: publishURL}, } // stopJobs stops all jobs, leaving discoverd until the end so other // jobs can unregister themselves on shutdown. stopJobs := func() (err error) { var except []string host.statusMtx.RLock() if host.status.Discoverd != nil && host.status.Discoverd.JobID != "" { except = []string{host.status.Discoverd.JobID} } host.statusMtx.RUnlock() if err := backend.Cleanup(except); err != nil { return err } for _, id := range except { if e := backend.Stop(id); e != nil { err = e } } return } resurrect, err := state.Restore(backend) if err != nil { shutdown.Fatal(err) } shutdown.BeforeExit(func() { // close discoverd before stopping jobs so we can unregister first discoverdManager.Close() stopJobs() }) shutdown.BeforeExit(func() { if err := state.MarkForResurrection(); err != nil { log.Print("error marking for resurrection", err) } }) if err := serveHTTP( host, &attachHandler{state: state, backend: backend}, cluster.NewClient(), vman, discoverdManager.ConnectLocal, ); err != nil { shutdown.Fatal(err) } if force { if err := stopJobs(); err != nil { shutdown.Fatal(err) } } if discoveryToken != "" { instances, err := discovery.GetCluster(discoveryToken) if err != nil { // TODO(titanous): retry? shutdown.Fatal(err) } peerIPs = make([]string, 0, len(instances)) for _, inst := range instances { u, err := url.Parse(inst.URL) if err != nil { continue } ip, _, err := net.SplitHostPort(u.Host) if err != nil || ip == externalIP { continue } peerIPs = append(peerIPs, ip) } } if err := discoverdManager.ConnectPeer(peerIPs); err != nil { // No peers have working discoverd, so resurrect any available jobs resurrect() } <-make(chan struct{}) }
func runDownload(args *docopt.Args) error { log := log15.New() log.Info("initializing ZFS volumes") volPath := args.String["--volpath"] volDB := filepath.Join(volPath, "volumes.bolt") volMan := volumemanager.New(volDB, log, func() (volume.Provider, error) { return zfs.NewProvider(&zfs.ProviderConfig{ DatasetName: zfs.DefaultDatasetName, Make: zfs.DefaultMakeDev(volPath, log), WorkingDir: filepath.Join(volPath, "zfs"), }) }) if err := volMan.OpenDB(); err != nil { log.Error("error opening volume database, make sure flynn-host is not running", "err", err) return err } // create a TUF client and update it log.Info("initializing TUF client") tufDB := args.String["--tuf-db"] local, err := tuf.FileLocalStore(tufDB) if err != nil { log.Error("error creating local TUF client", "err", err) return err } remote, err := tuf.HTTPRemoteStore(args.String["--repository"], tufHTTPOpts("downloader")) if err != nil { log.Error("error creating remote TUF client", "err", err) return err } client := tuf.NewClient(local, remote) if err := updateTUFClient(client); err != nil { log.Error("error updating TUF client", "err", err) return err } configDir := args.String["--config-dir"] requestedVersion := os.Getenv("FLYNN_VERSION") if requestedVersion == "" { requestedVersion, err = getChannelVersion(configDir, client, log) if err != nil { return err } } log.Info(fmt.Sprintf("downloading components with version %s", requestedVersion)) d := downloader.New(client, volMan, requestedVersion) binDir := args.String["--bin-dir"] log.Info(fmt.Sprintf("downloading binaries to %s", binDir)) if _, err := d.DownloadBinaries(binDir); err != nil { log.Error("error downloading binaries", "err", err) return err } // use the requested version of flynn-host to download the images as // the format changed in v20161106 if version.String() != requestedVersion { log.Info(fmt.Sprintf("executing %s flynn-host binary", requestedVersion)) binPath := filepath.Join(binDir, "flynn-host") argv := append([]string{binPath}, os.Args[1:]...) return syscall.Exec(binPath, argv, os.Environ()) } log.Info("downloading images") ch := make(chan *ct.ImagePullInfo) go func() { for info := range ch { switch info.Type { case ct.ImagePullTypeImage: log.Info(fmt.Sprintf("pulling %s image", info.Name)) case ct.ImagePullTypeLayer: log.Info(fmt.Sprintf("pulling %s layer %s (%s)", info.Name, info.Layer.ID, units.BytesSize(float64(info.Layer.Length)))) } } }() if err := d.DownloadImages(configDir, ch); err != nil { log.Error("error downloading images", "err", err) return err } log.Info(fmt.Sprintf("downloading config to %s", configDir)) if _, err := d.DownloadConfig(configDir); err != nil { log.Error("error downloading config", "err", err) return err } log.Info("download complete") return nil }
func runDaemon(args *docopt.Args) { hostname, _ := os.Hostname() httpPort := args.String["--http-port"] externalIP := args.String["--external-ip"] listenIP := args.String["--listen-ip"] stateFile := args.String["--state"] hostID := args.String["--id"] tags := parseTagArgs(args.String["--tags"]) force := args.Bool["--force"] volPath := args.String["--volpath"] volProvider := args.String["--vol-provider"] backendName := args.String["--backend"] flynnInit := args.String["--flynn-init"] logDir := args.String["--log-dir"] discoveryToken := args.String["--discovery"] bridgeName := args.String["--bridge-name"] logger, err := setupLogger(logDir) if err != nil { shutdown.Fatalf("error setting up logger: %s", err) } var peerIPs []string if args.String["--peer-ips"] != "" { peerIPs = strings.Split(args.String["--peer-ips"], ",") } if hostID == "" { hostID = strings.Replace(hostname, "-", "", -1) } var maxJobConcurrency uint64 = 4 if m, err := strconv.ParseUint(args.String["--max-job-concurrency"], 10, 64); err == nil { maxJobConcurrency = m } var partitionCGroups = make(map[string]int64) // name -> cpu shares for _, p := range strings.Split(args.String["--partitions"], " ") { nameShares := strings.Split(p, "=cpu_shares:") if len(nameShares) != 2 { shutdown.Fatalf("invalid partition specifier: %q", p) } shares, err := strconv.ParseInt(nameShares[1], 10, 64) if err != nil || shares < 2 { shutdown.Fatalf("invalid cpu shares specifier: %q", shares) } partitionCGroups[nameShares[0]] = shares } for _, s := range []string{"user", "system", "background"} { if _, ok := partitionCGroups[s]; !ok { shutdown.Fatalf("missing mandatory resource partition: %s", s) } } log := logger.New("fn", "runDaemon", "host.id", hostID) log.Info("starting daemon") log.Info("validating host ID") if strings.Contains(hostID, "-") { shutdown.Fatal("host id must not contain dashes") } if externalIP == "" { log.Info("detecting external IP") var err error externalIP, err = config.DefaultExternalIP() if err != nil { log.Error("error detecting external IP", "err", err) shutdown.Fatal(err) } log.Info("using external IP " + externalIP) } publishAddr := net.JoinHostPort(externalIP, httpPort) if discoveryToken != "" { // TODO: retry log.Info("registering with cluster discovery service", "token", discoveryToken, "addr", publishAddr, "name", hostID) discoveryID, err := discovery.RegisterInstance(discovery.Info{ ClusterURL: discoveryToken, InstanceURL: "http://" + publishAddr, Name: hostID, }) if err != nil { log.Error("error registering with cluster discovery service", "err", err) shutdown.Fatal(err) } log.Info("registered with cluster discovery service", "id", discoveryID) } state := NewState(hostID, stateFile) shutdown.BeforeExit(func() { state.CloseDB() }) log.Info("initializing volume manager", "provider", volProvider) var newVolProvider func() (volume.Provider, error) switch volProvider { case "zfs": newVolProvider = func() (volume.Provider, error) { // use a zpool backing file size of either 70% of the device on which // volumes will reside, or 100GB if that can't be determined. log.Info("determining ZFS zpool size") var size int64 var dev syscall.Statfs_t if err := syscall.Statfs(volPath, &dev); err == nil { size = (dev.Bsize * int64(dev.Blocks) * 7) / 10 } else { size = 100000000000 } log.Info(fmt.Sprintf("using ZFS zpool size %d", size)) return zfsVolume.NewProvider(&zfsVolume.ProviderConfig{ DatasetName: "flynn-default", Make: &zfsVolume.MakeDev{ BackingFilename: filepath.Join(volPath, "zfs/vdev/flynn-default-zpool.vdev"), Size: size, }, WorkingDir: filepath.Join(volPath, "zfs"), }) } case "mock": newVolProvider = func() (volume.Provider, error) { return nil, nil } default: shutdown.Fatalf("unknown volume provider: %q", volProvider) } vman := volumemanager.New( filepath.Join(volPath, "volumes.bolt"), newVolProvider, ) shutdown.BeforeExit(func() { vman.CloseDB() }) mux := logmux.New(hostID, logDir, logger.New("host.id", hostID, "component", "logmux")) log.Info("initializing job backend", "type", backendName) var backend Backend switch backendName { case "libcontainer": backend, err = NewLibcontainerBackend(state, vman, bridgeName, flynnInit, mux, partitionCGroups, logger.New("host.id", hostID, "component", "backend", "backend", "libcontainer")) case "mock": backend = MockBackend{} default: shutdown.Fatalf("unknown backend %q", backendName) } if err != nil { shutdown.Fatal(err) } backend.SetDefaultEnv("EXTERNAL_IP", externalIP) backend.SetDefaultEnv("LISTEN_IP", listenIP) var buffers host.LogBuffers discoverdManager := NewDiscoverdManager(backend, mux, hostID, publishAddr, tags) publishURL := "http://" + publishAddr host := &Host{ id: hostID, url: publishURL, status: &host.HostStatus{ ID: hostID, PID: os.Getpid(), URL: publishURL, Tags: tags, Version: version.String(), }, state: state, backend: backend, vman: vman, discMan: discoverdManager, log: logger.New("host.id", hostID), maxJobConcurrency: maxJobConcurrency, } backend.SetHost(host) // restore the host status if set in the environment if statusEnv := os.Getenv("FLYNN_HOST_STATUS"); statusEnv != "" { log.Info("restoring host status from parent") if err := json.Unmarshal([]byte(statusEnv), &host.status); err != nil { log.Error("error restoring host status from parent", "err", err) shutdown.Fatal(err) } pid := os.Getpid() log.Info("setting status PID", "pid", pid) host.status.PID = pid // keep the same tags as the parent discoverdManager.UpdateTags(host.status.Tags) } log.Info("creating HTTP listener") l, err := newHTTPListener(net.JoinHostPort(listenIP, httpPort)) if err != nil { log.Error("error creating HTTP listener", "err", err) shutdown.Fatal(err) } host.listener = l shutdown.BeforeExit(func() { host.Close() }) // if we have a control socket FD, wait for a "resume" message before // opening state DBs and serving requests. var controlFD int if fdEnv := os.Getenv("FLYNN_CONTROL_FD"); fdEnv != "" { log.Info("parsing control socket file descriptor") controlFD, err = strconv.Atoi(fdEnv) if err != nil { log.Error("error parsing control socket file descriptor", "err", err) shutdown.Fatal(err) } log.Info("waiting for resume message from parent") msg := make([]byte, len(ControlMsgResume)) if _, err := syscall.Read(controlFD, msg); err != nil { log.Error("error waiting for resume message from parent", "err", err) shutdown.Fatal(err) } log.Info("validating resume message") if !bytes.Equal(msg, ControlMsgResume) { log.Error(fmt.Sprintf("unexpected resume message from parent: %v", msg)) shutdown.ExitWithCode(1) } log.Info("receiving log buffers from parent") if err := json.NewDecoder(&controlSock{controlFD}).Decode(&buffers); err != nil { log.Error("error receiving log buffers from parent", "err", err) shutdown.Fatal(err) } } log.Info("opening state databases") if err := host.OpenDBs(); err != nil { log.Error("error opening state databases", "err", err) shutdown.Fatal(err) } // stopJobs stops all jobs, leaving discoverd until the end so other // jobs can unregister themselves on shutdown. stopJobs := func() (err error) { var except []string host.statusMtx.RLock() if host.status.Discoverd != nil && host.status.Discoverd.JobID != "" { except = []string{host.status.Discoverd.JobID} } host.statusMtx.RUnlock() log.Info("stopping all jobs except discoverd") if err := backend.Cleanup(except); err != nil { log.Error("error stopping all jobs except discoverd", "err", err) return err } for _, id := range except { log.Info("stopping discoverd") if e := backend.Stop(id); e != nil { log.Error("error stopping discoverd", "err", err) err = e } } return } log.Info("restoring state") resurrect, err := state.Restore(backend, buffers) if err != nil { log.Error("error restoring state", "err", err) shutdown.Fatal(err) } shutdown.BeforeExit(func() { // close discoverd before stopping jobs so we can unregister first log.Info("unregistering with service discovery") if err := discoverdManager.Close(); err != nil { log.Error("error unregistering with service discovery", "err", err) } stopJobs() }) log.Info("serving HTTP requests") host.ServeHTTP() if controlFD > 0 { // now that we are serving requests, send an "ok" message to the parent log.Info("sending ok message to parent") if _, err := syscall.Write(controlFD, ControlMsgOK); err != nil { log.Error("error sending ok message to parent", "err", err) shutdown.Fatal(err) } log.Info("closing control socket") if err := syscall.Close(controlFD); err != nil { log.Error("error closing control socket", "err", err) } } if force { log.Info("forcibly stopping existing jobs") if err := stopJobs(); err != nil { log.Error("error forcibly stopping existing jobs", "err", err) shutdown.Fatal(err) } } if discoveryToken != "" { log.Info("getting cluster peer IPs") instances, err := discovery.GetCluster(discoveryToken) if err != nil { // TODO(titanous): retry? log.Error("error getting discovery cluster", "err", err) shutdown.Fatal(err) } peerIPs = make([]string, 0, len(instances)) for _, inst := range instances { u, err := url.Parse(inst.URL) if err != nil { continue } ip, _, err := net.SplitHostPort(u.Host) if err != nil || ip == externalIP { continue } peerIPs = append(peerIPs, ip) } log.Info("got cluster peer IPs", "peers", peerIPs) } log.Info("connecting to cluster peers") if err := discoverdManager.ConnectPeer(peerIPs); err != nil { log.Info("no cluster peers available") } if !args.Bool["--no-resurrect"] { log.Info("resurrecting jobs") resurrect() } monitor := NewMonitor(host.discMan, externalIP, logger) shutdown.BeforeExit(func() { monitor.Shutdown() }) go monitor.Run() log.Info("blocking main goroutine") <-make(chan struct{}) }
func (s *PersistenceTests) TestTransmittedSnapshotPersistence(c *C) { idString := random.String(12) vmanDBfilePath := fmt.Sprintf("/tmp/flynn-volumes-%s.bolt", idString) zfsDatasetName := fmt.Sprintf("flynn-test-dataset-%s", idString) zfsVdevFilePath := fmt.Sprintf("/tmp/flynn-test-zpool-%s.vdev", idString) defer os.Remove(vmanDBfilePath) defer os.Remove(zfsVdevFilePath) defer func() { pool, _ := gzfs.GetZpool(zfsDatasetName) if pool != nil { if datasets, err := pool.Datasets(); err == nil { for _, dataset := range datasets { dataset.Destroy(gzfs.DestroyRecursive | gzfs.DestroyForceUmount) os.Remove(dataset.Mountpoint) } } err := pool.Destroy() c.Assert(err, IsNil) } }() // new volume provider with a new backing zfs vdev file volProv, err := zfs.NewProvider(&zfs.ProviderConfig{ DatasetName: zfsDatasetName, Make: &zfs.MakeDev{ BackingFilename: zfsVdevFilePath, Size: int64(math.Pow(2, float64(30))), }, }) c.Assert(err, IsNil) // new volume manager with that shiny new backing zfs vdev file and a new boltdb vman := volumemanager.New( vmanDBfilePath, log15.New(), func() (volume.Provider, error) { return volProv, nil }, ) c.Assert(vman.OpenDB(), IsNil) // make a volume vol1, err := vman.NewVolume() c.Assert(err, IsNil) // assert existence of filesystems; emplace some data f, err := os.Create(filepath.Join(vol1.Location(), "alpha")) c.Assert(err, IsNil) f.Close() // make a snapshot, make a new volume to receive it, and do the transmit snap, err := vman.CreateSnapshot(vol1.Info().ID) vol2, err := vman.NewVolume() c.Assert(err, IsNil) var buf bytes.Buffer haves, err := vman.ListHaves(vol2.Info().ID) c.Assert(err, IsNil) err = vman.SendSnapshot(snap.Info().ID, haves, &buf) c.Assert(err, IsNil) snapTransmitted, err := vman.ReceiveSnapshot(vol2.Info().ID, &buf) // sanity check: snapshot transmission worked c.Assert(vol2.Location(), testutils.DirContains, []string{"alpha"}) c.Assert(snapTransmitted.Location(), testutils.DirContains, []string{"alpha"}) // close persistence c.Assert(vman.CloseDB(), IsNil) // hack zfs export/umounting to emulate host shutdown err = exec.Command("zpool", "export", "-f", zfsDatasetName).Run() c.Assert(err, IsNil) // sanity check: assert the filesystems are gone // note that the directories remain present after 'zpool export' _, err = os.Stat(filepath.Join(snap.Location(), "alpha")) c.Assert(os.IsNotExist(err), Equals, true) _, err = os.Stat(filepath.Join(snapTransmitted.Location(), "alpha")) c.Assert(os.IsNotExist(err), Equals, true) // restore vman = volumemanager.New( vmanDBfilePath, log15.New(), func() (volume.Provider, error) { c.Fatal("default provider setup should not be called if the previous provider was restored") return nil, nil }, ) c.Assert(vman.OpenDB(), IsNil) // assert volumes restoredVolumes := vman.Volumes() c.Assert(restoredVolumes, HasLen, 4) c.Assert(restoredVolumes[vol1.Info().ID], NotNil) c.Assert(restoredVolumes[snap.Info().ID], NotNil) c.Assert(restoredVolumes[vol2.Info().ID], NotNil) c.Assert(restoredVolumes[snapTransmitted.Info().ID], NotNil) // still look like a snapshot? snapRestored := restoredVolumes[snapTransmitted.Info().ID] assertInfoEqual(c, snapRestored, snapTransmitted) c.Assert(snapRestored.IsSnapshot(), Equals, true) // assert existences of filesystems and previous data c.Assert(snapRestored.Location(), testutils.DirContains, []string{"alpha"}) }