Example #1
0
File: main.go Project: tomzhang/p2
func main() {
	kingpin.CommandLine.Name = "p2-replicate"
	kingpin.CommandLine.Help = `p2-replicate uses the replication package to schedule deployment of a pod across multiple nodes. See the replication package's README and godoc for more information.

	Example invocation: p2-replicate --min-nodes 2 helloworld.yaml aws{1,2,3}.example.com

	This will take the pod whose manifest is located at helloworld.yaml and
	deploy it to the three nodes aws1.example.com, aws2.example.com, and
	aws3.example.com

	Because of --min-nodes 2, the replicator will ensure that at least two healthy
	nodes remain up at all times, according to p2's health checks.
`

	kingpin.Version(version.VERSION)
	_, opts := flags.ParseWithConsulOptions()
	client := kp.NewConsulClient(opts)
	store := kp.NewConsulStore(client)
	healthChecker := checker.NewConsulHealthChecker(client)

	manifest, err := pods.ManifestFromURI(*manifestUri)
	if err != nil {
		log.Fatalf("%s", err)
	}

	logger := logging.NewLogger(logrus.Fields{
		"pod": manifest.ID(),
	})
	logger.Logger.Formatter = &logrus.TextFormatter{
		DisableTimestamp: false,
		FullTimestamp:    true,
		TimestampFormat:  "15:04:05.000",
	}

	// create a lock with a meaningful name and set up a renewal loop for it
	thisHost, err := os.Hostname()
	if err != nil {
		log.Fatalf("Could not retrieve hostname: %s", err)
	}
	thisUser, err := user.Current()
	if err != nil {
		log.Fatalf("Could not retrieve user: %s", err)
	}
	lockMessage := fmt.Sprintf("%q from %q at %q", thisUser.Username, thisHost, time.Now())
	repl, err := replication.NewReplicator(
		manifest,
		logger,
		*hosts,
		len(*hosts)-*minNodes,
		store,
		healthChecker,
		health.HealthState(*threshold),
		lockMessage,
	)
	if err != nil {
		log.Fatalf("Could not initialize replicator: %s", err)
	}

	replication, errCh, err := repl.InitializeReplication(*overrideLock)
	if err != nil {
		log.Fatalf("Unable to initialize replication: %s", err)
	}

	// auto-drain this channel
	go func() {
		for range errCh {
		}
	}()

	go func() {
		// clear lock immediately on ctrl-C
		signals := make(chan os.Signal, 1)
		signal.Notify(signals, os.Interrupt)
		<-signals
		replication.Cancel()
		os.Exit(1)
	}()

	replication.Enact()
}
Example #2
0
func (ds *daemonSet) PublishToReplication() error {
	// We must cancel the replication because if we try to call
	// InitializeReplicationWithCheck, we will get an error
	ds.cancelReplication()

	podLocations, err := ds.CurrentPods()
	if err != nil {
		return util.Errorf("Error retrieving pod locations from daemon set: %v", err)
	}
	nodes := podLocations.Nodes()

	ds.logger.Infof("Preparing to publish the following nodes: %v", nodes)

	thisHost, err := os.Hostname()
	if err != nil {
		ds.logger.Errorf("Could not retrieve hostname: %s", err)
		thisHost = ""
	}
	thisUser, err := user.Current()
	if err != nil {
		ds.logger.Errorf("Could not retrieve user: %s", err)
		thisUser = &user.User{}
	}
	lockMessage := fmt.Sprintf("%q from %q at %q", thisUser.Username, thisHost, time.Now())
	repl, err := replication.NewReplicator(
		ds.DaemonSet.Manifest,
		ds.logger,
		nodes,
		len(nodes)-ds.DaemonSet.MinHealth,
		ds.kpStore,
		ds.applicator,
		*ds.healthChecker,
		health.HealthState(health.Passing),
		lockMessage,
		ds.Timeout,
	)
	if err != nil {
		ds.logger.Errorf("Could not initialize replicator: %s", err)
		return err
	}

	ds.logger.Info("New replicator was made")

	// Replication locks are designed to make sure that two replications to
	// the same nodes cannot occur at the same time. The granularity is
	// pod-wide as an optimization for consul performance (only need to
	// lock a single key) with limited downside when human operators are
	// executing deploys, because the likelihood of a lock collision is
	// low. With daemon sets, locking is not necessary because the node
	// sets should not overlap when they are managed properly. Even when
	// there is a node overlap between two daemon sets, a simple mutual
	// exclusion lock around replication will not prevent the pod manifest
	// on an overlapped node from thrashing. Therefore, it makes sense for
	// daemon sets to ignore this locking mechanism and always try to
	// converge nodes to the specified manifest
	replication, errCh, err := repl.InitializeDaemonSetReplication(
		replication.DefaultConcurrentReality,
		ds.rateLimitInterval,
	)
	if err != nil {
		ds.logger.Errorf("Unable to initialize replication: %s", err)
		return err
	}

	ds.logger.Info("Replication initialized")

	// auto-drain this channel
	go func() {
		for err := range errCh {
			ds.logger.Errorf("Error occurred in replication: '%v'", err)
		}
	}()

	// Set a new replication
	ds.currentReplication = replication

	go replication.Enact()

	ds.logger.Info("Replication enacted")

	return nil
}