Beispiel #1
0
// Run is the main execution entrypoint to run mgmt.
func (obj *Main) Run() error {

	var start = time.Now().UnixNano()

	var flags int
	if obj.DEBUG || true { // TODO: remove || true
		flags = log.LstdFlags | log.Lshortfile
	}
	flags = (flags - log.Ldate) // remove the date for now
	log.SetFlags(flags)

	// un-hijack from capnslog...
	log.SetOutput(os.Stderr)
	if obj.VERBOSE {
		capnslog.SetFormatter(capnslog.NewLogFormatter(os.Stderr, "(etcd) ", flags))
	} else {
		capnslog.SetFormatter(capnslog.NewNilFormatter())
	}

	log.Printf("This is: %s, version: %s", obj.Program, obj.Version)
	log.Printf("Main: Start: %v", start)

	hostname, err := os.Hostname() // a sensible default
	// allow passing in the hostname, instead of using the system setting
	if h := obj.Hostname; h != nil && *h != "" { // override by cli
		hostname = *h
	} else if err != nil {
		return errwrap.Wrapf(err, "Can't get default hostname!")
	}
	if hostname == "" { // safety check
		return fmt.Errorf("Hostname cannot be empty!")
	}

	var prefix = fmt.Sprintf("/var/lib/%s/", obj.Program) // default prefix
	if p := obj.Prefix; p != nil {
		prefix = *p
	}
	// make sure the working directory prefix exists
	if obj.TmpPrefix || os.MkdirAll(prefix, 0770) != nil {
		if obj.TmpPrefix || obj.AllowTmpPrefix {
			var err error
			if prefix, err = ioutil.TempDir("", obj.Program+"-"+hostname+"-"); err != nil {
				return fmt.Errorf("Main: Error: Can't create temporary prefix!")
			}
			log.Println("Main: Warning: Working prefix directory is temporary!")

		} else {
			return fmt.Errorf("Main: Error: Can't create prefix!")
		}
	}
	log.Printf("Main: Working prefix is: %s", prefix)

	var wg sync.WaitGroup
	var G, oldGraph *pgraph.Graph

	// exit after `max-runtime` seconds for no reason at all...
	if i := obj.MaxRuntime; i > 0 {
		go func() {
			time.Sleep(time.Duration(i) * time.Second)
			obj.Exit(nil)
		}()
	}

	// setup converger
	converger := converger.NewConverger(
		obj.ConvergedTimeout,
		nil, // stateFn gets added in by EmbdEtcd
	)
	go converger.Loop(true) // main loop for converger, true to start paused

	// embedded etcd
	if len(obj.seeds) == 0 {
		log.Printf("Main: Seeds: No seeds specified!")
	} else {
		log.Printf("Main: Seeds(%d): %v", len(obj.seeds), obj.seeds)
	}
	EmbdEtcd := etcd.NewEmbdEtcd(
		hostname,
		obj.seeds,
		obj.clientURLs,
		obj.serverURLs,
		obj.NoServer,
		obj.idealClusterSize,
		prefix,
		converger,
	)
	if EmbdEtcd == nil {
		// TODO: verify EmbdEtcd is not nil below...
		obj.Exit(fmt.Errorf("Main: Etcd: Creation failed!"))
	} else if err := EmbdEtcd.Startup(); err != nil { // startup (returns when etcd main loop is running)
		obj.Exit(fmt.Errorf("Main: Etcd: Startup failed: %v", err))
	}
	convergerStateFn := func(b bool) error {
		// exit if we are using the converged timeout and we are the
		// root node. otherwise, if we are a child node in a remote
		// execution hierarchy, we should only notify our converged
		// state and wait for the parent to trigger the exit.
		if t := obj.ConvergedTimeout; obj.Depth == 0 && t >= 0 {
			if b {
				log.Printf("Converged for %d seconds, exiting!", t)
				obj.Exit(nil) // trigger an exit!
			}
			return nil
		}
		// send our individual state into etcd for others to see
		return etcd.EtcdSetHostnameConverged(EmbdEtcd, hostname, b) // TODO: what should happen on error?
	}
	if EmbdEtcd != nil {
		converger.SetStateFn(convergerStateFn)
	}

	var gapiChan chan error // stream events are nil errors
	if obj.GAPI != nil {
		data := gapi.Data{
			Hostname: hostname,
			EmbdEtcd: EmbdEtcd,
			Noop:     obj.Noop,
			NoWatch:  obj.NoWatch,
		}
		if err := obj.GAPI.Init(data); err != nil {
			obj.Exit(fmt.Errorf("Main: GAPI: Init failed: %v", err))
		} else if !obj.NoWatch {
			gapiChan = obj.GAPI.SwitchStream() // stream of graph switch events!
		}
	}

	exitchan := make(chan struct{}) // exit on close
	go func() {
		startChan := make(chan struct{}) // start signal
		go func() { startChan <- struct{}{} }()

		log.Println("Etcd: Starting...")
		etcdChan := etcd.EtcdWatch(EmbdEtcd)
		first := true // first loop or not
		for {
			log.Println("Main: Waiting...")
			select {
			case <-startChan: // kick the loop once at start
				// pass

			case b := <-etcdChan:
				if !b { // ignore the message
					continue
				}
				// everything else passes through to cause a compile!

			case err, ok := <-gapiChan:
				if !ok { // channel closed
					if obj.DEBUG {
						log.Printf("Main: GAPI exited")
					}
					gapiChan = nil // disable it
					continue
				}
				if err != nil {
					obj.Exit(err) // trigger exit
					continue
					//return // TODO: return or wait for exitchan?
				}
				if obj.NoWatch { // extra safety for bad GAPI's
					log.Printf("Main: GAPI stream should be quiet with NoWatch!") // fix the GAPI!
					continue                                                      // no stream events should be sent
				}

			case <-exitchan:
				return
			}

			if obj.GAPI == nil {
				log.Printf("Config: GAPI is empty!")
				continue
			}

			// we need the vertices to be paused to work on them, so
			// run graph vertex LOCK...
			if !first { // TODO: we can flatten this check out I think
				converger.Pause() // FIXME: add sync wait?
				G.Pause()         // sync

				//G.UnGroup() // FIXME: implement me if needed!
			}

			// make the graph from yaml, lib, puppet->yaml, or dsl!
			newGraph, err := obj.GAPI.Graph() // generate graph!
			if err != nil {
				log.Printf("Config: Error creating new graph: %v", err)
				// unpause!
				if !first {
					G.Start(&wg, first) // sync
					converger.Start()   // after G.Start()
				}
				continue
			}

			// apply the global noop parameter if requested
			if obj.Noop {
				for _, m := range newGraph.GraphMetas() {
					m.Noop = obj.Noop
				}
			}

			// FIXME: make sure we "UnGroup()" any semi-destructive
			// changes to the resources so our efficient GraphSync
			// will be able to re-use and cmp to the old graph.
			newFullGraph, err := newGraph.GraphSync(oldGraph)
			if err != nil {
				log.Printf("Config: Error running graph sync: %v", err)
				// unpause!
				if !first {
					G.Start(&wg, first) // sync
					converger.Start()   // after G.Start()
				}
				continue
			}
			oldGraph = newFullGraph // save old graph
			G = oldGraph.Copy()     // copy to active graph

			G.AutoEdges() // add autoedges; modifies the graph
			G.AutoGroup() // run autogroup; modifies the graph
			// TODO: do we want to do a transitive reduction?

			log.Printf("Graph: %v", G) // show graph
			if obj.GraphvizFilter != "" {
				if err := G.ExecGraphviz(obj.GraphvizFilter, obj.Graphviz); err != nil {
					log.Printf("Graphviz: %v", err)
				} else {
					log.Printf("Graphviz: Successfully generated graph!")
				}
			}
			G.AssociateData(converger)
			// G.Start(...) needs to be synchronous or wait,
			// because if half of the nodes are started and
			// some are not ready yet and the EtcdWatch
			// loops, we'll cause G.Pause(...) before we
			// even got going, thus causing nil pointer errors
			G.Start(&wg, first) // sync
			converger.Start()   // after G.Start()
			first = false
		}
	}()

	configWatcher := recwatch.NewConfigWatcher()
	events := configWatcher.Events()
	if !obj.NoWatch {
		configWatcher.Add(obj.Remotes...) // add all the files...
	} else {
		events = nil // signal that no-watch is true
	}
	go func() {
		select {
		case err := <-configWatcher.Error():
			obj.Exit(err) // trigger an exit!

		case <-exitchan:
			return
		}
	}()

	// initialize the add watcher, which calls the f callback on map changes
	convergerCb := func(f func(map[string]bool) error) (func(), error) {
		return etcd.EtcdAddHostnameConvergedWatcher(EmbdEtcd, f)
	}

	// build remotes struct for remote ssh
	remotes := remote.NewRemotes(
		EmbdEtcd.LocalhostClientURLs().StringSlice(),
		[]string{etcd.DefaultClientURL},
		obj.Noop,
		obj.Remotes, // list of files
		events,      // watch for file changes
		obj.CConns,
		obj.AllowInteractive,
		obj.SSHPrivIDRsa,
		!obj.NoCaching,
		obj.Depth,
		prefix,
		converger,
		convergerCb,
		obj.Program,
	)

	// TODO: is there any benefit to running the remotes above in the loop?
	// wait for etcd to be running before we remote in, which we do above!
	go remotes.Run()

	if obj.GAPI == nil {
		converger.Start() // better start this for empty graphs
	}
	log.Println("Main: Running...")

	reterr := <-obj.exit // wait for exit signal

	log.Println("Destroy...")

	if obj.GAPI != nil {
		if err := obj.GAPI.Close(); err != nil {
			err = errwrap.Wrapf(err, "GAPI closed poorly!")
			reterr = multierr.Append(reterr, err) // list of errors
		}
	}

	configWatcher.Close()                  // stop sending file changes to remotes
	if err := remotes.Exit(); err != nil { // tell all the remote connections to shutdown; waits!
		err = errwrap.Wrapf(err, "Remote exited poorly!")
		reterr = multierr.Append(reterr, err) // list of errors
	}

	G.Exit() // tell all the children to exit

	// tell inner main loop to exit
	close(exitchan)

	// cleanup etcd main loop last so it can process everything first
	if err := EmbdEtcd.Destroy(); err != nil { // shutdown and cleanup etcd
		err = errwrap.Wrapf(err, "Etcd exited poorly!")
		reterr = multierr.Append(reterr, err) // list of errors
	}

	if obj.DEBUG {
		log.Printf("Main: Graph: %v", G)
	}

	wg.Wait() // wait for primary go routines to exit

	// TODO: wait for each vertex to exit...
	log.Println("Goodbye!")
	return reterr
}
Beispiel #2
0
// run is the main run target.
func run(c *cli.Context) error {
	var start = time.Now().UnixNano()
	log.Printf("This is: %v, version: %v", program, version)
	log.Printf("Main: Start: %v", start)

	hostname, _ := os.Hostname()
	// allow passing in the hostname, instead of using --hostname
	if c.IsSet("file") {
		if config := gconfig.ParseConfigFromFile(c.String("file")); config != nil {
			if h := config.Hostname; h != "" {
				hostname = h
			}
		}
	}
	if c.IsSet("hostname") { // override by cli
		if h := c.String("hostname"); h != "" {
			hostname = h
		}
	}
	noop := c.Bool("noop")

	seeds, err := etcdtypes.NewURLs(
		util.FlattenListWithSplit(c.StringSlice("seeds"), []string{",", ";", " "}),
	)
	if err != nil && len(c.StringSlice("seeds")) > 0 {
		log.Printf("Main: Error: seeds didn't parse correctly!")
		return cli.NewExitError("", 1)
	}
	clientURLs, err := etcdtypes.NewURLs(
		util.FlattenListWithSplit(c.StringSlice("client-urls"), []string{",", ";", " "}),
	)
	if err != nil && len(c.StringSlice("client-urls")) > 0 {
		log.Printf("Main: Error: clientURLs didn't parse correctly!")
		return cli.NewExitError("", 1)
	}
	serverURLs, err := etcdtypes.NewURLs(
		util.FlattenListWithSplit(c.StringSlice("server-urls"), []string{",", ";", " "}),
	)
	if err != nil && len(c.StringSlice("server-urls")) > 0 {
		log.Printf("Main: Error: serverURLs didn't parse correctly!")
		return cli.NewExitError("", 1)
	}

	idealClusterSize := uint16(c.Int("ideal-cluster-size"))
	if idealClusterSize < 1 {
		log.Printf("Main: Error: idealClusterSize should be at least one!")
		return cli.NewExitError("", 1)
	}

	if c.IsSet("file") && c.IsSet("puppet") {
		log.Println("Main: Error: the --file and --puppet parameters cannot be used together!")
		return cli.NewExitError("", 1)
	}

	if c.Bool("no-server") && len(c.StringSlice("remote")) > 0 {
		// TODO: in this case, we won't be able to tunnel stuff back to
		// here, so if we're okay with every remote graph running in an
		// isolated mode, then this is okay. Improve on this if there's
		// someone who really wants to be able to do this.
		log.Println("Main: Error: the --no-server and --remote parameters cannot be used together!")
		return cli.NewExitError("", 1)
	}

	cConns := uint16(c.Int("cconns"))
	if cConns < 0 {
		log.Printf("Main: Error: --cconns should be at least zero!")
		return cli.NewExitError("", 1)
	}

	if c.IsSet("converged-timeout") && cConns > 0 && len(c.StringSlice("remote")) > c.Int("cconns") {
		log.Printf("Main: Error: combining --converged-timeout with more remotes than available connections will never converge!")
		return cli.NewExitError("", 1)
	}

	depth := uint16(c.Int("depth"))
	if depth < 0 { // user should not be using this argument manually
		log.Printf("Main: Error: negative values for --depth are not permitted!")
		return cli.NewExitError("", 1)
	}

	if c.IsSet("prefix") && c.Bool("tmp-prefix") {
		log.Println("Main: Error: combining --prefix and the request for a tmp prefix is illogical!")
		return cli.NewExitError("", 1)
	}
	if s := c.String("prefix"); c.IsSet("prefix") && s != "" {
		prefix = s
	}

	// make sure the working directory prefix exists
	if c.Bool("tmp-prefix") || os.MkdirAll(prefix, 0770) != nil {
		if c.Bool("tmp-prefix") || c.Bool("allow-tmp-prefix") {
			if prefix, err = ioutil.TempDir("", program+"-"); err != nil {
				log.Printf("Main: Error: Can't create temporary prefix!")
				return cli.NewExitError("", 1)
			}
			log.Println("Main: Warning: Working prefix directory is temporary!")

		} else {
			log.Printf("Main: Error: Can't create prefix!")
			return cli.NewExitError("", 1)
		}
	}
	log.Printf("Main: Working prefix is: %s", prefix)

	var wg sync.WaitGroup
	exit := make(chan bool) // exit signal
	var G, fullGraph *pgraph.Graph

	// exit after `max-runtime` seconds for no reason at all...
	if i := c.Int("max-runtime"); i > 0 {
		go func() {
			time.Sleep(time.Duration(i) * time.Second)
			exit <- true
		}()
	}

	// setup converger
	converger := converger.NewConverger(
		c.Int("converged-timeout"),
		nil, // stateFn gets added in by EmbdEtcd
	)
	go converger.Loop(true) // main loop for converger, true to start paused

	// embedded etcd
	if len(seeds) == 0 {
		log.Printf("Main: Seeds: No seeds specified!")
	} else {
		log.Printf("Main: Seeds(%v): %v", len(seeds), seeds)
	}
	EmbdEtcd := etcd.NewEmbdEtcd(
		hostname,
		seeds,
		clientURLs,
		serverURLs,
		c.Bool("no-server"),
		idealClusterSize,
		prefix,
		converger,
	)
	if EmbdEtcd == nil {
		// TODO: verify EmbdEtcd is not nil below...
		log.Printf("Main: Etcd: Creation failed!")
		exit <- true
	} else if err := EmbdEtcd.Startup(); err != nil { // startup (returns when etcd main loop is running)
		log.Printf("Main: Etcd: Startup failed: %v", err)
		exit <- true
	}
	convergerStateFn := func(b bool) error {
		// exit if we are using the converged-timeout and we are the
		// root node. otherwise, if we are a child node in a remote
		// execution hierarchy, we should only notify our converged
		// state and wait for the parent to trigger the exit.
		if depth == 0 && c.Int("converged-timeout") >= 0 {
			if b {
				log.Printf("Converged for %d seconds, exiting!", c.Int("converged-timeout"))
				exit <- true // trigger an exit!
			}
			return nil
		}
		// send our individual state into etcd for others to see
		return etcd.EtcdSetHostnameConverged(EmbdEtcd, hostname, b) // TODO: what should happen on error?
	}
	if EmbdEtcd != nil {
		converger.SetStateFn(convergerStateFn)
	}

	exitchan := make(chan struct{}) // exit on close
	go func() {
		startchan := make(chan struct{}) // start signal
		go func() { startchan <- struct{}{} }()
		file := c.String("file")
		var configchan chan bool
		var puppetchan <-chan time.Time
		if !c.Bool("no-watch") && c.IsSet("file") {
			configchan = ConfigWatch(file)
		} else if c.IsSet("puppet") {
			interval := puppet.PuppetInterval(c.String("puppet-conf"))
			puppetchan = time.Tick(time.Duration(interval) * time.Second)
		}
		log.Println("Etcd: Starting...")
		etcdchan := etcd.EtcdWatch(EmbdEtcd)
		first := true // first loop or not
		for {
			log.Println("Main: Waiting...")
			select {
			case <-startchan: // kick the loop once at start
				// pass

			case b := <-etcdchan:
				if !b { // ignore the message
					continue
				}
				// everything else passes through to cause a compile!

			case <-puppetchan:
				// nothing, just go on

			case msg := <-configchan:
				if c.Bool("no-watch") || !msg {
					continue // not ready to read config
				}
			// XXX: case compile_event: ...
			// ...
			case <-exitchan:
				return
			}

			var config *gconfig.GraphConfig
			if c.IsSet("file") {
				config = gconfig.ParseConfigFromFile(file)
			} else if c.IsSet("puppet") {
				config = puppet.ParseConfigFromPuppet(c.String("puppet"), c.String("puppet-conf"))
			}
			if config == nil {
				log.Printf("Config: Parse failure")
				continue
			}

			if config.Hostname != "" && config.Hostname != hostname {
				log.Printf("Config: Hostname changed, ignoring config!")
				continue
			}
			config.Hostname = hostname // set it in case it was ""

			// run graph vertex LOCK...
			if !first { // TODO: we can flatten this check out I think
				converger.Pause() // FIXME: add sync wait?
				G.Pause()         // sync
			}

			// build graph from yaml file on events (eg: from etcd)
			// we need the vertices to be paused to work on them
			if newFullgraph, err := config.NewGraphFromConfig(fullGraph, EmbdEtcd, noop); err == nil { // keep references to all original elements
				fullGraph = newFullgraph
			} else {
				log.Printf("Config: Error making new graph from config: %v", err)
				// unpause!
				if !first {
					G.Start(&wg, first) // sync
					converger.Start()   // after G.Start()
				}
				continue
			}

			G = fullGraph.Copy() // copy to active graph
			// XXX: do etcd transaction out here...
			G.AutoEdges() // add autoedges; modifies the graph
			G.AutoGroup() // run autogroup; modifies the graph
			// TODO: do we want to do a transitive reduction?

			log.Printf("Graph: %v", G) // show graph
			err := G.ExecGraphviz(c.String("graphviz-filter"), c.String("graphviz"))
			if err != nil {
				log.Printf("Graphviz: %v", err)
			} else {
				log.Printf("Graphviz: Successfully generated graph!")
			}
			G.AssociateData(converger)
			// G.Start(...) needs to be synchronous or wait,
			// because if half of the nodes are started and
			// some are not ready yet and the EtcdWatch
			// loops, we'll cause G.Pause(...) before we
			// even got going, thus causing nil pointer errors
			G.Start(&wg, first) // sync
			converger.Start()   // after G.Start()
			first = false
		}
	}()

	configWatcher := NewConfigWatcher()
	events := configWatcher.Events()
	if !c.Bool("no-watch") {
		configWatcher.Add(c.StringSlice("remote")...) // add all the files...
	} else {
		events = nil // signal that no-watch is true
	}

	// initialize the add watcher, which calls the f callback on map changes
	convergerCb := func(f func(map[string]bool) error) (func(), error) {
		return etcd.EtcdAddHostnameConvergedWatcher(EmbdEtcd, f)
	}

	// build remotes struct for remote ssh
	remotes := remote.NewRemotes(
		EmbdEtcd.LocalhostClientURLs().StringSlice(),
		[]string{etcd.DefaultClientURL},
		noop,
		c.StringSlice("remote"), // list of files
		events,                  // watch for file changes
		cConns,
		c.Bool("allow-interactive"),
		c.String("ssh-priv-id-rsa"),
		!c.Bool("no-caching"),
		depth,
		prefix,
		converger,
		convergerCb,
		program,
	)

	// TODO: is there any benefit to running the remotes above in the loop?
	// wait for etcd to be running before we remote in, which we do above!
	go remotes.Run()

	if !c.IsSet("file") && !c.IsSet("puppet") {
		converger.Start() // better start this for empty graphs
	}
	log.Println("Main: Running...")

	waitForSignal(exit) // pass in exit channel to watch

	log.Println("Destroy...")

	configWatcher.Close() // stop sending file changes to remotes
	remotes.Exit()        // tell all the remote connections to shutdown; waits!

	G.Exit() // tell all the children to exit

	// tell inner main loop to exit
	close(exitchan)

	// cleanup etcd main loop last so it can process everything first
	if err := EmbdEtcd.Destroy(); err != nil { // shutdown and cleanup etcd
		log.Printf("Etcd exited poorly with: %v", err)
	}

	if global.DEBUG {
		log.Printf("Graph: %v", G)
	}

	wg.Wait() // wait for primary go routines to exit

	// TODO: wait for each vertex to exit...
	log.Println("Goodbye!")
	return nil
}