Exemplo n.º 1
0
func runDownload(args *docopt.Args) error {
	log := log15.New()

	log.Info("initializing ZFS volumes")
	volPath := args.String["--volpath"]
	volDB := filepath.Join(volPath, "volumes.bolt")
	volMan := volumemanager.New(volDB, log, func() (volume.Provider, error) {
		return zfs.NewProvider(&zfs.ProviderConfig{
			DatasetName: zfs.DefaultDatasetName,
			Make:        zfs.DefaultMakeDev(volPath, log),
			WorkingDir:  filepath.Join(volPath, "zfs"),
		})
	})
	if err := volMan.OpenDB(); err != nil {
		log.Error("error opening volume database, make sure flynn-host is not running", "err", err)
		return err
	}

	// create a TUF client and update it
	log.Info("initializing TUF client")
	tufDB := args.String["--tuf-db"]
	local, err := tuf.FileLocalStore(tufDB)
	if err != nil {
		log.Error("error creating local TUF client", "err", err)
		return err
	}
	remote, err := tuf.HTTPRemoteStore(args.String["--repository"], tufHTTPOpts("downloader"))
	if err != nil {
		log.Error("error creating remote TUF client", "err", err)
		return err
	}
	client := tuf.NewClient(local, remote)
	if err := updateTUFClient(client); err != nil {
		log.Error("error updating TUF client", "err", err)
		return err
	}

	configDir := args.String["--config-dir"]

	requestedVersion := os.Getenv("FLYNN_VERSION")
	if requestedVersion == "" {
		requestedVersion, err = getChannelVersion(configDir, client, log)
		if err != nil {
			return err
		}
	}
	log.Info(fmt.Sprintf("downloading components with version %s", requestedVersion))

	d := downloader.New(client, volMan, requestedVersion)

	binDir := args.String["--bin-dir"]
	log.Info(fmt.Sprintf("downloading binaries to %s", binDir))
	if _, err := d.DownloadBinaries(binDir); err != nil {
		log.Error("error downloading binaries", "err", err)
		return err
	}

	// use the requested version of flynn-host to download the images as
	// the format changed in v20161106
	if version.String() != requestedVersion {
		log.Info(fmt.Sprintf("executing %s flynn-host binary", requestedVersion))
		binPath := filepath.Join(binDir, "flynn-host")
		argv := append([]string{binPath}, os.Args[1:]...)
		return syscall.Exec(binPath, argv, os.Environ())
	}

	log.Info("downloading images")
	ch := make(chan *ct.ImagePullInfo)
	go func() {
		for info := range ch {
			switch info.Type {
			case ct.ImagePullTypeImage:
				log.Info(fmt.Sprintf("pulling %s image", info.Name))
			case ct.ImagePullTypeLayer:
				log.Info(fmt.Sprintf("pulling %s layer %s (%s)",
					info.Name, info.Layer.ID, units.BytesSize(float64(info.Layer.Length))))
			}
		}
	}()
	if err := d.DownloadImages(configDir, ch); err != nil {
		log.Error("error downloading images", "err", err)
		return err
	}

	log.Info(fmt.Sprintf("downloading config to %s", configDir))
	if _, err := d.DownloadConfig(configDir); err != nil {
		log.Error("error downloading config", "err", err)
		return err
	}

	log.Info("download complete")
	return nil
}
Exemplo n.º 2
0
func runDaemon(args *docopt.Args) {
	hostname, _ := os.Hostname()
	httpPort := args.String["--http-port"]
	externalIP := args.String["--external-ip"]
	listenIP := args.String["--listen-ip"]
	stateFile := args.String["--state"]
	hostID := args.String["--id"]
	tags := parseTagArgs(args.String["--tags"])
	force := args.Bool["--force"]
	volPath := args.String["--volpath"]
	volProvider := args.String["--vol-provider"]
	backendName := args.String["--backend"]
	flynnInit := args.String["--flynn-init"]
	logDir := args.String["--log-dir"]
	discoveryToken := args.String["--discovery"]
	bridgeName := args.String["--bridge-name"]

	logger, err := setupLogger(logDir)
	if err != nil {
		shutdown.Fatalf("error setting up logger: %s", err)
	}

	initLogLevel, err := log15.LvlFromString(args.String["--init-log-level"])
	if err != nil {
		shutdown.Fatalf("error setting init log level: %s", err)
	}

	var peerIPs []string
	if args.String["--peer-ips"] != "" {
		peerIPs = strings.Split(args.String["--peer-ips"], ",")
	}

	if hostID == "" {
		hostID = strings.Replace(hostname, "-", "", -1)
	}

	var maxJobConcurrency uint64 = 4
	if m, err := strconv.ParseUint(args.String["--max-job-concurrency"], 10, 64); err == nil {
		maxJobConcurrency = m
	}

	if path, err := filepath.Abs(flynnInit); err == nil {
		flynnInit = path
	}

	var partitionCGroups = make(map[string]int64) // name -> cpu shares
	for _, p := range strings.Split(args.String["--partitions"], " ") {
		nameShares := strings.Split(p, "=cpu_shares:")
		if len(nameShares) != 2 {
			shutdown.Fatalf("invalid partition specifier: %q", p)
		}
		shares, err := strconv.ParseInt(nameShares[1], 10, 64)
		if err != nil || shares < 2 {
			shutdown.Fatalf("invalid cpu shares specifier: %q", shares)
		}
		partitionCGroups[nameShares[0]] = shares
	}
	for _, s := range []string{"user", "system", "background"} {
		if _, ok := partitionCGroups[s]; !ok {
			shutdown.Fatalf("missing mandatory resource partition: %s", s)
		}
	}

	log := logger.New("fn", "runDaemon", "host.id", hostID)
	log.Info("starting daemon")

	log.Info("validating host ID")
	if strings.Contains(hostID, "-") {
		shutdown.Fatal("host id must not contain dashes")
	}
	if externalIP == "" {
		log.Info("detecting external IP")
		var err error
		externalIP, err = config.DefaultExternalIP()
		if err != nil {
			log.Error("error detecting external IP", "err", err)
			shutdown.Fatal(err)
		}
		log.Info("using external IP " + externalIP)
	}

	publishAddr := net.JoinHostPort(externalIP, httpPort)
	if discoveryToken != "" {
		// TODO: retry
		log.Info("registering with cluster discovery service", "token", discoveryToken, "addr", publishAddr, "name", hostID)
		discoveryID, err := discovery.RegisterInstance(discovery.Info{
			ClusterURL:  discoveryToken,
			InstanceURL: "http://" + publishAddr,
			Name:        hostID,
		})
		if err != nil {
			log.Error("error registering with cluster discovery service", "err", err)
			shutdown.Fatal(err)
		}
		log.Info("registered with cluster discovery service", "id", discoveryID)
	}

	state := NewState(hostID, stateFile)
	shutdown.BeforeExit(func() { state.CloseDB() })

	log.Info("initializing volume manager", "provider", volProvider)
	var newVolProvider func() (volume.Provider, error)
	switch volProvider {
	case "zfs":
		newVolProvider = func() (volume.Provider, error) {
			return zfsVolume.NewProvider(&zfsVolume.ProviderConfig{
				DatasetName: zfsVolume.DefaultDatasetName,
				Make:        zfsVolume.DefaultMakeDev(volPath, log),
				WorkingDir:  filepath.Join(volPath, "zfs"),
			})
		}
	case "mock":
		newVolProvider = func() (volume.Provider, error) { return nil, nil }
	default:
		shutdown.Fatalf("unknown volume provider: %q", volProvider)
	}
	vman := volumemanager.New(
		filepath.Join(volPath, "volumes.bolt"),
		logger.New("component", "volumemanager"),
		newVolProvider,
	)
	shutdown.BeforeExit(func() { vman.CloseDB() })

	mux := logmux.New(hostID, logDir, logger.New("host.id", hostID, "component", "logmux"))

	log.Info("initializing job backend", "type", backendName)
	var backend Backend
	switch backendName {
	case "libcontainer":
		backend, err = NewLibcontainerBackend(&LibcontainerConfig{
			State:            state,
			VolManager:       vman,
			BridgeName:       bridgeName,
			InitPath:         flynnInit,
			InitLogLevel:     initLogLevel,
			LogMux:           mux,
			PartitionCGroups: partitionCGroups,
			Logger:           logger.New("host.id", hostID, "component", "backend", "backend", "libcontainer"),
		})
	case "mock":
		backend = MockBackend{}
	default:
		shutdown.Fatalf("unknown backend %q", backendName)
	}
	if err != nil {
		shutdown.Fatal(err)
	}
	backend.SetDefaultEnv("EXTERNAL_IP", externalIP)
	backend.SetDefaultEnv("LISTEN_IP", listenIP)

	var buffers host.LogBuffers
	discoverdManager := NewDiscoverdManager(backend, mux, hostID, publishAddr, tags)
	publishURL := "http://" + publishAddr
	host := &Host{
		id:  hostID,
		url: publishURL,
		status: &host.HostStatus{
			ID:      hostID,
			PID:     os.Getpid(),
			URL:     publishURL,
			Tags:    tags,
			Version: version.String(),
		},
		state:   state,
		backend: backend,
		vman:    vman,
		volAPI:  volumeapi.NewHTTPAPI(vman),
		discMan: discoverdManager,
		log:     logger.New("host.id", hostID),

		maxJobConcurrency: maxJobConcurrency,
	}
	backend.SetHost(host)

	// restore the host status if set in the environment
	if statusEnv := os.Getenv("FLYNN_HOST_STATUS"); statusEnv != "" {
		log.Info("restoring host status from parent")
		if err := json.Unmarshal([]byte(statusEnv), &host.status); err != nil {
			log.Error("error restoring host status from parent", "err", err)
			shutdown.Fatal(err)
		}
		pid := os.Getpid()
		log.Info("setting status PID", "pid", pid)
		host.status.PID = pid
		// keep the same tags as the parent
		discoverdManager.UpdateTags(host.status.Tags)
	}

	log.Info("creating HTTP listener")
	l, err := newHTTPListener(net.JoinHostPort(listenIP, httpPort))
	if err != nil {
		log.Error("error creating HTTP listener", "err", err)
		shutdown.Fatal(err)
	}
	host.listener = l
	shutdown.BeforeExit(func() { host.Close() })

	// if we have a control socket FD, wait for a "resume" message before
	// opening state DBs and serving requests.
	var controlFD int
	if fdEnv := os.Getenv("FLYNN_CONTROL_FD"); fdEnv != "" {
		log.Info("parsing control socket file descriptor")
		controlFD, err = strconv.Atoi(fdEnv)
		if err != nil {
			log.Error("error parsing control socket file descriptor", "err", err)
			shutdown.Fatal(err)
		}

		log.Info("waiting for resume message from parent")
		msg := make([]byte, len(ControlMsgResume))
		if _, err := syscall.Read(controlFD, msg); err != nil {
			log.Error("error waiting for resume message from parent", "err", err)
			shutdown.Fatal(err)
		}

		log.Info("validating resume message")
		if !bytes.Equal(msg, ControlMsgResume) {
			log.Error(fmt.Sprintf("unexpected resume message from parent: %v", msg))
			shutdown.ExitWithCode(1)
		}

		log.Info("receiving log buffers from parent")
		if err := json.NewDecoder(&controlSock{controlFD}).Decode(&buffers); err != nil {
			log.Error("error receiving log buffers from parent", "err", err)
			shutdown.Fatal(err)
		}
	}

	log.Info("opening state databases")
	if err := host.OpenDBs(); err != nil {
		log.Error("error opening state databases", "err", err)
		shutdown.Fatal(err)
	}

	// stopJobs stops all jobs, leaving discoverd until the end so other
	// jobs can unregister themselves on shutdown.
	stopJobs := func() (err error) {
		var except []string
		host.statusMtx.RLock()
		if host.status.Discoverd != nil && host.status.Discoverd.JobID != "" {
			except = []string{host.status.Discoverd.JobID}
		}
		host.statusMtx.RUnlock()
		log.Info("stopping all jobs except discoverd")
		if err := backend.Cleanup(except); err != nil {
			log.Error("error stopping all jobs except discoverd", "err", err)
			return err
		}
		for _, id := range except {
			log.Info("stopping discoverd")
			if e := backend.Stop(id); e != nil {
				log.Error("error stopping discoverd", "err", err)
				err = e
			}
		}
		return
	}

	log.Info("restoring state")
	resurrect, err := state.Restore(backend, buffers)
	if err != nil {
		log.Error("error restoring state", "err", err)
		shutdown.Fatal(err)
	}
	shutdown.BeforeExit(func() {
		// close discoverd before stopping jobs so we can unregister first
		log.Info("unregistering with service discovery")
		if err := discoverdManager.Close(); err != nil {
			log.Error("error unregistering with service discovery", "err", err)
		}
		stopJobs()
	})

	log.Info("serving HTTP requests")
	host.ServeHTTP()

	if controlFD > 0 {
		// now that we are serving requests, send an "ok" message to the parent
		log.Info("sending ok message to parent")
		if _, err := syscall.Write(controlFD, ControlMsgOK); err != nil {
			log.Error("error sending ok message to parent", "err", err)
			shutdown.Fatal(err)
		}

		log.Info("closing control socket")
		if err := syscall.Close(controlFD); err != nil {
			log.Error("error closing control socket", "err", err)
		}
	}

	if force {
		log.Info("forcibly stopping existing jobs")
		if err := stopJobs(); err != nil {
			log.Error("error forcibly stopping existing jobs", "err", err)
			shutdown.Fatal(err)
		}
	}

	if discoveryToken != "" {
		log.Info("getting cluster peer IPs")
		instances, err := discovery.GetCluster(discoveryToken)
		if err != nil {
			// TODO(titanous): retry?
			log.Error("error getting discovery cluster", "err", err)
			shutdown.Fatal(err)
		}
		peerIPs = make([]string, 0, len(instances))
		for _, inst := range instances {
			u, err := url.Parse(inst.URL)
			if err != nil {
				continue
			}
			ip, _, err := net.SplitHostPort(u.Host)
			if err != nil || ip == externalIP {
				continue
			}
			peerIPs = append(peerIPs, ip)
		}
		log.Info("got cluster peer IPs", "peers", peerIPs)
	}
	log.Info("connecting to cluster peers")
	if err := discoverdManager.ConnectPeer(peerIPs); err != nil {
		log.Info("no cluster peers available")
	}

	if !args.Bool["--no-resurrect"] {
		log.Info("resurrecting jobs")
		resurrect()
	}

	monitor := NewMonitor(host.discMan, externalIP, logger)
	shutdown.BeforeExit(func() { monitor.Shutdown() })
	go monitor.Run()

	log.Info("blocking main goroutine")
	<-make(chan struct{})
}