func runDownload(args *docopt.Args) error { log := log15.New() log.Info("initializing ZFS volumes") volPath := args.String["--volpath"] volDB := filepath.Join(volPath, "volumes.bolt") volMan := volumemanager.New(volDB, log, func() (volume.Provider, error) { return zfs.NewProvider(&zfs.ProviderConfig{ DatasetName: zfs.DefaultDatasetName, Make: zfs.DefaultMakeDev(volPath, log), WorkingDir: filepath.Join(volPath, "zfs"), }) }) if err := volMan.OpenDB(); err != nil { log.Error("error opening volume database, make sure flynn-host is not running", "err", err) return err } // create a TUF client and update it log.Info("initializing TUF client") tufDB := args.String["--tuf-db"] local, err := tuf.FileLocalStore(tufDB) if err != nil { log.Error("error creating local TUF client", "err", err) return err } remote, err := tuf.HTTPRemoteStore(args.String["--repository"], tufHTTPOpts("downloader")) if err != nil { log.Error("error creating remote TUF client", "err", err) return err } client := tuf.NewClient(local, remote) if err := updateTUFClient(client); err != nil { log.Error("error updating TUF client", "err", err) return err } configDir := args.String["--config-dir"] requestedVersion := os.Getenv("FLYNN_VERSION") if requestedVersion == "" { requestedVersion, err = getChannelVersion(configDir, client, log) if err != nil { return err } } log.Info(fmt.Sprintf("downloading components with version %s", requestedVersion)) d := downloader.New(client, volMan, requestedVersion) binDir := args.String["--bin-dir"] log.Info(fmt.Sprintf("downloading binaries to %s", binDir)) if _, err := d.DownloadBinaries(binDir); err != nil { log.Error("error downloading binaries", "err", err) return err } // use the requested version of flynn-host to download the images as // the format changed in v20161106 if version.String() != requestedVersion { log.Info(fmt.Sprintf("executing %s flynn-host binary", requestedVersion)) binPath := filepath.Join(binDir, "flynn-host") argv := append([]string{binPath}, os.Args[1:]...) return syscall.Exec(binPath, argv, os.Environ()) } log.Info("downloading images") ch := make(chan *ct.ImagePullInfo) go func() { for info := range ch { switch info.Type { case ct.ImagePullTypeImage: log.Info(fmt.Sprintf("pulling %s image", info.Name)) case ct.ImagePullTypeLayer: log.Info(fmt.Sprintf("pulling %s layer %s (%s)", info.Name, info.Layer.ID, units.BytesSize(float64(info.Layer.Length)))) } } }() if err := d.DownloadImages(configDir, ch); err != nil { log.Error("error downloading images", "err", err) return err } log.Info(fmt.Sprintf("downloading config to %s", configDir)) if _, err := d.DownloadConfig(configDir); err != nil { log.Error("error downloading config", "err", err) return err } log.Info("download complete") return nil }
func runDaemon(args *docopt.Args) { hostname, _ := os.Hostname() httpPort := args.String["--http-port"] externalIP := args.String["--external-ip"] listenIP := args.String["--listen-ip"] stateFile := args.String["--state"] hostID := args.String["--id"] tags := parseTagArgs(args.String["--tags"]) force := args.Bool["--force"] volPath := args.String["--volpath"] volProvider := args.String["--vol-provider"] backendName := args.String["--backend"] flynnInit := args.String["--flynn-init"] logDir := args.String["--log-dir"] discoveryToken := args.String["--discovery"] bridgeName := args.String["--bridge-name"] logger, err := setupLogger(logDir) if err != nil { shutdown.Fatalf("error setting up logger: %s", err) } initLogLevel, err := log15.LvlFromString(args.String["--init-log-level"]) if err != nil { shutdown.Fatalf("error setting init log level: %s", err) } var peerIPs []string if args.String["--peer-ips"] != "" { peerIPs = strings.Split(args.String["--peer-ips"], ",") } if hostID == "" { hostID = strings.Replace(hostname, "-", "", -1) } var maxJobConcurrency uint64 = 4 if m, err := strconv.ParseUint(args.String["--max-job-concurrency"], 10, 64); err == nil { maxJobConcurrency = m } if path, err := filepath.Abs(flynnInit); err == nil { flynnInit = path } var partitionCGroups = make(map[string]int64) // name -> cpu shares for _, p := range strings.Split(args.String["--partitions"], " ") { nameShares := strings.Split(p, "=cpu_shares:") if len(nameShares) != 2 { shutdown.Fatalf("invalid partition specifier: %q", p) } shares, err := strconv.ParseInt(nameShares[1], 10, 64) if err != nil || shares < 2 { shutdown.Fatalf("invalid cpu shares specifier: %q", shares) } partitionCGroups[nameShares[0]] = shares } for _, s := range []string{"user", "system", "background"} { if _, ok := partitionCGroups[s]; !ok { shutdown.Fatalf("missing mandatory resource partition: %s", s) } } log := logger.New("fn", "runDaemon", "host.id", hostID) log.Info("starting daemon") log.Info("validating host ID") if strings.Contains(hostID, "-") { shutdown.Fatal("host id must not contain dashes") } if externalIP == "" { log.Info("detecting external IP") var err error externalIP, err = config.DefaultExternalIP() if err != nil { log.Error("error detecting external IP", "err", err) shutdown.Fatal(err) } log.Info("using external IP " + externalIP) } publishAddr := net.JoinHostPort(externalIP, httpPort) if discoveryToken != "" { // TODO: retry log.Info("registering with cluster discovery service", "token", discoveryToken, "addr", publishAddr, "name", hostID) discoveryID, err := discovery.RegisterInstance(discovery.Info{ ClusterURL: discoveryToken, InstanceURL: "http://" + publishAddr, Name: hostID, }) if err != nil { log.Error("error registering with cluster discovery service", "err", err) shutdown.Fatal(err) } log.Info("registered with cluster discovery service", "id", discoveryID) } state := NewState(hostID, stateFile) shutdown.BeforeExit(func() { state.CloseDB() }) log.Info("initializing volume manager", "provider", volProvider) var newVolProvider func() (volume.Provider, error) switch volProvider { case "zfs": newVolProvider = func() (volume.Provider, error) { return zfsVolume.NewProvider(&zfsVolume.ProviderConfig{ DatasetName: zfsVolume.DefaultDatasetName, Make: zfsVolume.DefaultMakeDev(volPath, log), WorkingDir: filepath.Join(volPath, "zfs"), }) } case "mock": newVolProvider = func() (volume.Provider, error) { return nil, nil } default: shutdown.Fatalf("unknown volume provider: %q", volProvider) } vman := volumemanager.New( filepath.Join(volPath, "volumes.bolt"), logger.New("component", "volumemanager"), newVolProvider, ) shutdown.BeforeExit(func() { vman.CloseDB() }) mux := logmux.New(hostID, logDir, logger.New("host.id", hostID, "component", "logmux")) log.Info("initializing job backend", "type", backendName) var backend Backend switch backendName { case "libcontainer": backend, err = NewLibcontainerBackend(&LibcontainerConfig{ State: state, VolManager: vman, BridgeName: bridgeName, InitPath: flynnInit, InitLogLevel: initLogLevel, LogMux: mux, PartitionCGroups: partitionCGroups, Logger: logger.New("host.id", hostID, "component", "backend", "backend", "libcontainer"), }) case "mock": backend = MockBackend{} default: shutdown.Fatalf("unknown backend %q", backendName) } if err != nil { shutdown.Fatal(err) } backend.SetDefaultEnv("EXTERNAL_IP", externalIP) backend.SetDefaultEnv("LISTEN_IP", listenIP) var buffers host.LogBuffers discoverdManager := NewDiscoverdManager(backend, mux, hostID, publishAddr, tags) publishURL := "http://" + publishAddr host := &Host{ id: hostID, url: publishURL, status: &host.HostStatus{ ID: hostID, PID: os.Getpid(), URL: publishURL, Tags: tags, Version: version.String(), }, state: state, backend: backend, vman: vman, volAPI: volumeapi.NewHTTPAPI(vman), discMan: discoverdManager, log: logger.New("host.id", hostID), maxJobConcurrency: maxJobConcurrency, } backend.SetHost(host) // restore the host status if set in the environment if statusEnv := os.Getenv("FLYNN_HOST_STATUS"); statusEnv != "" { log.Info("restoring host status from parent") if err := json.Unmarshal([]byte(statusEnv), &host.status); err != nil { log.Error("error restoring host status from parent", "err", err) shutdown.Fatal(err) } pid := os.Getpid() log.Info("setting status PID", "pid", pid) host.status.PID = pid // keep the same tags as the parent discoverdManager.UpdateTags(host.status.Tags) } log.Info("creating HTTP listener") l, err := newHTTPListener(net.JoinHostPort(listenIP, httpPort)) if err != nil { log.Error("error creating HTTP listener", "err", err) shutdown.Fatal(err) } host.listener = l shutdown.BeforeExit(func() { host.Close() }) // if we have a control socket FD, wait for a "resume" message before // opening state DBs and serving requests. var controlFD int if fdEnv := os.Getenv("FLYNN_CONTROL_FD"); fdEnv != "" { log.Info("parsing control socket file descriptor") controlFD, err = strconv.Atoi(fdEnv) if err != nil { log.Error("error parsing control socket file descriptor", "err", err) shutdown.Fatal(err) } log.Info("waiting for resume message from parent") msg := make([]byte, len(ControlMsgResume)) if _, err := syscall.Read(controlFD, msg); err != nil { log.Error("error waiting for resume message from parent", "err", err) shutdown.Fatal(err) } log.Info("validating resume message") if !bytes.Equal(msg, ControlMsgResume) { log.Error(fmt.Sprintf("unexpected resume message from parent: %v", msg)) shutdown.ExitWithCode(1) } log.Info("receiving log buffers from parent") if err := json.NewDecoder(&controlSock{controlFD}).Decode(&buffers); err != nil { log.Error("error receiving log buffers from parent", "err", err) shutdown.Fatal(err) } } log.Info("opening state databases") if err := host.OpenDBs(); err != nil { log.Error("error opening state databases", "err", err) shutdown.Fatal(err) } // stopJobs stops all jobs, leaving discoverd until the end so other // jobs can unregister themselves on shutdown. stopJobs := func() (err error) { var except []string host.statusMtx.RLock() if host.status.Discoverd != nil && host.status.Discoverd.JobID != "" { except = []string{host.status.Discoverd.JobID} } host.statusMtx.RUnlock() log.Info("stopping all jobs except discoverd") if err := backend.Cleanup(except); err != nil { log.Error("error stopping all jobs except discoverd", "err", err) return err } for _, id := range except { log.Info("stopping discoverd") if e := backend.Stop(id); e != nil { log.Error("error stopping discoverd", "err", err) err = e } } return } log.Info("restoring state") resurrect, err := state.Restore(backend, buffers) if err != nil { log.Error("error restoring state", "err", err) shutdown.Fatal(err) } shutdown.BeforeExit(func() { // close discoverd before stopping jobs so we can unregister first log.Info("unregistering with service discovery") if err := discoverdManager.Close(); err != nil { log.Error("error unregistering with service discovery", "err", err) } stopJobs() }) log.Info("serving HTTP requests") host.ServeHTTP() if controlFD > 0 { // now that we are serving requests, send an "ok" message to the parent log.Info("sending ok message to parent") if _, err := syscall.Write(controlFD, ControlMsgOK); err != nil { log.Error("error sending ok message to parent", "err", err) shutdown.Fatal(err) } log.Info("closing control socket") if err := syscall.Close(controlFD); err != nil { log.Error("error closing control socket", "err", err) } } if force { log.Info("forcibly stopping existing jobs") if err := stopJobs(); err != nil { log.Error("error forcibly stopping existing jobs", "err", err) shutdown.Fatal(err) } } if discoveryToken != "" { log.Info("getting cluster peer IPs") instances, err := discovery.GetCluster(discoveryToken) if err != nil { // TODO(titanous): retry? log.Error("error getting discovery cluster", "err", err) shutdown.Fatal(err) } peerIPs = make([]string, 0, len(instances)) for _, inst := range instances { u, err := url.Parse(inst.URL) if err != nil { continue } ip, _, err := net.SplitHostPort(u.Host) if err != nil || ip == externalIP { continue } peerIPs = append(peerIPs, ip) } log.Info("got cluster peer IPs", "peers", peerIPs) } log.Info("connecting to cluster peers") if err := discoverdManager.ConnectPeer(peerIPs); err != nil { log.Info("no cluster peers available") } if !args.Bool["--no-resurrect"] { log.Info("resurrecting jobs") resurrect() } monitor := NewMonitor(host.discMan, externalIP, logger) shutdown.BeforeExit(func() { monitor.Shutdown() }) go monitor.Run() log.Info("blocking main goroutine") <-make(chan struct{}) }