func main() { kingpin.CommandLine.Name = "p2-replicate" kingpin.CommandLine.Help = `p2-replicate uses the replication package to schedule deployment of a pod across multiple nodes. See the replication package's README and godoc for more information. Example invocation: p2-replicate --min-nodes 2 helloworld.yaml aws{1,2,3}.example.com This will take the pod whose manifest is located at helloworld.yaml and deploy it to the three nodes aws1.example.com, aws2.example.com, and aws3.example.com Because of --min-nodes 2, the replicator will ensure that at least two healthy nodes remain up at all times, according to p2's health checks. ` kingpin.Version(version.VERSION) _, opts := flags.ParseWithConsulOptions() client := kp.NewConsulClient(opts) store := kp.NewConsulStore(client) healthChecker := checker.NewConsulHealthChecker(client) manifest, err := pods.ManifestFromURI(*manifestUri) if err != nil { log.Fatalf("%s", err) } logger := logging.NewLogger(logrus.Fields{ "pod": manifest.ID(), }) logger.Logger.Formatter = &logrus.TextFormatter{ DisableTimestamp: false, FullTimestamp: true, TimestampFormat: "15:04:05.000", } // create a lock with a meaningful name and set up a renewal loop for it thisHost, err := os.Hostname() if err != nil { log.Fatalf("Could not retrieve hostname: %s", err) } thisUser, err := user.Current() if err != nil { log.Fatalf("Could not retrieve user: %s", err) } lockMessage := fmt.Sprintf("%q from %q at %q", thisUser.Username, thisHost, time.Now()) repl, err := replication.NewReplicator( manifest, logger, *hosts, len(*hosts)-*minNodes, store, healthChecker, health.HealthState(*threshold), lockMessage, ) if err != nil { log.Fatalf("Could not initialize replicator: %s", err) } replication, errCh, err := repl.InitializeReplication(*overrideLock) if err != nil { log.Fatalf("Unable to initialize replication: %s", err) } // auto-drain this channel go func() { for range errCh { } }() go func() { // clear lock immediately on ctrl-C signals := make(chan os.Signal, 1) signal.Notify(signals, os.Interrupt) <-signals replication.Cancel() os.Exit(1) }() replication.Enact() }
func (ds *daemonSet) PublishToReplication() error { // We must cancel the replication because if we try to call // InitializeReplicationWithCheck, we will get an error ds.cancelReplication() podLocations, err := ds.CurrentPods() if err != nil { return util.Errorf("Error retrieving pod locations from daemon set: %v", err) } nodes := podLocations.Nodes() ds.logger.Infof("Preparing to publish the following nodes: %v", nodes) thisHost, err := os.Hostname() if err != nil { ds.logger.Errorf("Could not retrieve hostname: %s", err) thisHost = "" } thisUser, err := user.Current() if err != nil { ds.logger.Errorf("Could not retrieve user: %s", err) thisUser = &user.User{} } lockMessage := fmt.Sprintf("%q from %q at %q", thisUser.Username, thisHost, time.Now()) repl, err := replication.NewReplicator( ds.DaemonSet.Manifest, ds.logger, nodes, len(nodes)-ds.DaemonSet.MinHealth, ds.kpStore, ds.applicator, *ds.healthChecker, health.HealthState(health.Passing), lockMessage, ds.Timeout, ) if err != nil { ds.logger.Errorf("Could not initialize replicator: %s", err) return err } ds.logger.Info("New replicator was made") // Replication locks are designed to make sure that two replications to // the same nodes cannot occur at the same time. The granularity is // pod-wide as an optimization for consul performance (only need to // lock a single key) with limited downside when human operators are // executing deploys, because the likelihood of a lock collision is // low. With daemon sets, locking is not necessary because the node // sets should not overlap when they are managed properly. Even when // there is a node overlap between two daemon sets, a simple mutual // exclusion lock around replication will not prevent the pod manifest // on an overlapped node from thrashing. Therefore, it makes sense for // daemon sets to ignore this locking mechanism and always try to // converge nodes to the specified manifest replication, errCh, err := repl.InitializeDaemonSetReplication( replication.DefaultConcurrentReality, ds.rateLimitInterval, ) if err != nil { ds.logger.Errorf("Unable to initialize replication: %s", err) return err } ds.logger.Info("Replication initialized") // auto-drain this channel go func() { for err := range errCh { ds.logger.Errorf("Error occurred in replication: '%v'", err) } }() // Set a new replication ds.currentReplication = replication go replication.Enact() ds.logger.Info("Replication enacted") return nil }