예제 #1
func (kab *KeepAliveBuffer) send() {
	kab.lastSent = time.Now()
	// Send but don't block worker (to ensure fast service of channel).
	go func() {
		consul := config.GetConsulClient()
		if kab.isSession {
			session := consul.Session()
			_, _, err := session.Renew(kab.ID, nil)
			if err != nil {
					"err", err,
					"sessionID", kab.ID,
				).Error("Error trying to send keep alive")
		} else {
			// Service TTL.
			agent := consul.Agent()
			err := agent.PassTTL("service:"+kab.ID, "")
			if err != nil {
					"err", err,
					"serviceID", kab.ID,
				).Error("Error trying to send keep alive")
예제 #2
func (ska *SelfKeepAlive) send() {
	consul := config.GetConsulClient()
	if ska.isSession {
		// Session TTL.
		session := consul.Session()
		_, _, err := session.Renew(ska.ID, nil)
		if err != nil {
				"err", err,
				"sessionID", ska.ID,
			).Error("Error trying to send keep alive")
	} else {
		// Service TTL.
		agent := consul.Agent()
		err := agent.PassTTL("service:"+ska.ID, "")
		if err != nil {
				"err", err,
				"serviceID", ska.ID,
			).Error("Error trying to send keep alive")
예제 #3
// WaitResource monitors a resource and blocks until that resource is
// released or there is some other error.
func WaitResource(service string, resource string) error {
	service = url.QueryEscape(service)
	resource = url.QueryEscape(resource)

	consul := config.GetConsulClient()
	kv := consul.KV()

	lastIndex := uint64(0)
	for {
		pair, qm, err := kv.Get(
				WaitIndex:         lastIndex,
				RequireConsistent: true,
		if err != nil {
			if !consulapi.IsServerError(err) {
				return err
			// Consul unresponsive. Wait a bit and try again.
			time.Sleep(3 * time.Second)
		if pair == nil {
			return nil
		lastIndex = qm.LastIndex
예제 #4
// RegisterServiceLocal registers the service as running on the current node.
func RegisterServiceLocal(
	service string, instanceID string, target string,
	ttl time.Duration) (err error) {
	consul := config.GetConsulClient()
	agent := consul.Agent()

	// Parse target into address + port.
	addressSplit := strings.Split(target, ":")
	if len(addressSplit) != 2 {
		return fmt.Errorf("Invalid address")
	port, err := strconv.Atoi(addressSplit[1])
	if err != nil {
		return fmt.Errorf("Invalid address")
	address := addressSplit[0]
	return agent.ServiceRegister(&consulapi.AgentServiceRegistration{
		Name:    service,
		ID:      instanceID,
		Address: address,
		Port:    port,
		Check: &consulapi.AgentServiceCheck{
			TTL:    ttl.String(),
			Status: "passing",
예제 #5
func (tracker *FleetTracker) scaleDown(delta int, rpcEvent *RPCEvent) error {
		"leverEnv", rpcEvent.Environment,
		"leverService", rpcEvent.Service,
		"codeVersion", rpcEvent.CodeVersion,
		"servingID", rpcEvent.ServingID,
		"deltaInstances", delta,
	).Info("Scaling down")

	consulHealth := config.GetConsulClient().Health()
	entries, _, err := consulHealth.Service(
		rpcEvent.ServingID, "", true, &consulapi.QueryOptions{
			RequireConsistent: true,
	if err != nil {
			"err", err,
			"servingID", rpcEvent.ServingID,
		).Error("Error trying to ask Consul for instances")
	if len(entries) < delta {
		delta = len(entries)

	tmpRand := leverutil.GetRand()
	permutation := tmpRand.Perm(len(entries))
	shuffled := make([]*consulapi.ServiceEntry, len(entries))
	for from, to := range permutation {
		shuffled[to] = entries[from]
	toRemove := shuffled[:delta]

	hadErrors := false
	for _, entry := range toRemove {
		err = hostman.StopInstance(
				Environment: rpcEvent.Environment,
				Service:     rpcEvent.Service,
				InstanceID:  entry.Service.ID,
				ServingID:   rpcEvent.ServingID,
			}, entry.Node.Node)
		if err != nil {
			logger.WithFields("err", err).Error(
				"Error trying to stop instance remotely")
			hadErrors = true
	if hadErrors {
		return fmt.Errorf("There were errors during scale down")
	return nil
예제 #6
func newResouce(
	service string, resource string, sessionID string) (res *Resource) {
	service = url.QueryEscape(service)
	resource = url.QueryEscape(resource)
	return &Resource{
		key:       consulResourcePrefix + service + "/" + resource,
		consul:    config.GetConsulClient(),
		service:   service,
		resource:  resource,
		sessionID: sessionID,
예제 #7
func (tracker *LoadTracker) tick(
	avgRPCNanos, rpcNanosVariance, avgRate,
	rateVariance float64) (deltaInstances int) {
	// TODO: This model works decently well for real-time, non-streaming RPCs.
	//       It doesn't do so well with streaming or RPCs taking a long time
	//       because it is imprecise and reacts very late (after the
	//       RPC / stream has finished). Need another strategy for those cases.

	// Assume RPC rate and time are the worst possible (max in confidence
	// interval). This gives us a theoretical 97.5% certainty (in reality
	// we are much less sure, due to many other factors not modeled here).
	RPCNanosCI := 1.96 * math.Sqrt(rpcNanosVariance)
	maxRPCNanos := float64(avgRPCNanos) + RPCNanosCI
	rateCI := 1.96 * math.Sqrt(rateVariance)
	maxRate := avgRate + rateCI
	totalLoad := maxRate * (maxRPCNanos / float64(1000000000))

	// Work out how many instances are theoretically necessary right now, given
	// the current load.
	requiredNumInstances := int(math.Ceil(totalLoad / tracker.maxInstanceLoad))
	requiredNumInstances = max(requiredNumInstances, tracker.minInstances)

	// Work out how many instances are expected to be healthy in the near
	// future.
	if time.Now().After(tracker.nextInstancesQuery) {
		// Refresh number of healthy instances from Consul.
		consulHealth := config.GetConsulClient().Health()
		entries, _, err := consulHealth.Service(
			tracker.servingID, "", true, &consulapi.QueryOptions{
				AllowStale:        true,
				RequireConsistent: false,
		if err != nil {
				"err", err,
				"servingID", tracker.servingID,
			).Error("Error trying to ask Consul for instances")
		} else {
			tracker.queriedNumInstances = len(entries)
		// We don't need this number to be fresh. Don't do this very often.
		tracker.nextInstancesQuery =
	var assumedNumInstances int
	if tracker.queriedNumInstances ==
		tracker.numInstances+tracker.totalDeltaInstances ||
		tracker.numInstances == -1 ||
		tracker.totalDeltaInstances == 0 {
		// All expected changes have been applied or load oscillating or
		// we made no changes but the number of instances changed for reasons
		// external to this algorithm (eg instances expired or died on their
		// own) or the tracker is completely new.
		// TODO: What if there is a widespread crash across instances. We might
		//       want to react quickly to bring up replacements. Maybe.
		//       Maybe not. We would need to avoid bringing up replacement(s)
		//       forever in situations where they eg just crash on startup.
		// Just reset deltas.
		tracker.numInstances = tracker.queriedNumInstances
		tracker.totalDeltaInstances = 0
		tracker.deltas = nil
		assumedNumInstances = tracker.queriedNumInstances
	} else {
		// Not all changes propagated. Use the number of instances we expect.
		assumedNumInstances = tracker.numInstances + tracker.totalDeltaInstances

	delta := requiredNumInstances - assumedNumInstances
		"avgRate", avgRate,
		"rateCI", rateCI,
		"avgRPCNanos", avgRPCNanos,
		"RPCNanosCI", RPCNanosCI,
		"servingID", tracker.servingID,
		"avgRPCNanos", avgRPCNanos,
		"RPCNanosCI", RPCNanosCI,
		"avgRate", avgRate,
		"rateCI", rateCI,
		"totalLoad", totalLoad,
		"requiredNumInstances", requiredNumInstances,
		"assumedNumInstances", assumedNumInstances,
		"delta", delta,
	if delta == 0 {
		// In balance - nothing to do.
		tracker.decreaseTriggered = false
		return 0

	if delta > 0 {
		// Add instances.
		tracker.decreaseTriggered = false
		maxDelta := MaxDeltaFlag.Get()
		if delta > maxDelta {
			delta = maxDelta
		return delta

	// Maybe remove instances (if the load was low for a while).
	if tracker.decreaseTriggered && time.Now().After(tracker.decreaseTime) {
		// Time for a decrease.
		tracker.decreaseTriggered = false
		return delta
	} else if !tracker.decreaseTriggered {
		// Just noticed decrease is necessary. If after ScaleDownAfterFlag
		// we still need to decrease, do it then.
		tracker.decreaseTriggered = true
		tracker.decreaseTime = time.Now().Add(ScaleDownAfterFlag.Get())
		return 0
	} else {
		// Just wait patiently until tracker.decreaseTime.
		return 0

	// Note: It is possible that a future tick may not take place at all
	//       and a decrease does not take effect. In that case, the
	//       instance expiry timer within each instance would take care of
	//       clearing unnecessary instances.
예제 #8
// ServiceKeepAlive maintains the TTL for a service.
func ServiceKeepAlive(instanceID string) error {
	consul := config.GetConsulClient()
	agent := consul.Agent()
	return agent.PassTTL("service:"+instanceID, "")
예제 #9
// DeregisterService deregisters a service from Consul.
func DeregisterService(instanceID string) error {
	consul := config.GetConsulClient()
	agent := consul.Agent()
	return agent.ServiceDeregister(instanceID)
예제 #10
// GetOwnNodeName returns the node name of the current node.
func GetOwnNodeName() (string, error) {
	consul := config.GetConsulClient()
	return consul.Agent().NodeName()