Example #1
0
// resetHeartbeatTimer is used to reset the TTL of a heartbeat.
// This can be used for new heartbeats and existing ones.
func (s *Server) resetHeartbeatTimer(id string) (time.Duration, error) {
	s.heartbeatTimersLock.Lock()
	defer s.heartbeatTimersLock.Unlock()

	// Compute the target TTL value
	n := len(s.heartbeatTimers)
	ttl := lib.RateScaledInterval(s.config.MaxHeartbeatsPerSecond, s.config.MinHeartbeatTTL, n)
	ttl += lib.RandomStagger(ttl)

	// Reset the TTL
	s.resetHeartbeatTimerLocked(id, ttl+s.config.HeartbeatGrace)
	return ttl, nil
}
Example #2
0
// refreshServerRebalanceTimer is only called once m.rebalanceTimer expires.
func (m *Manager) refreshServerRebalanceTimer() time.Duration {
	l := m.getServerList()
	numConsulServers := len(l.servers)
	// Limit this connection's life based on the size (and health) of the
	// cluster.  Never rebalance a connection more frequently than
	// connReuseLowWatermarkDuration, and make sure we never exceed
	// clusterWideRebalanceConnsPerSec operations/s across numLANMembers.
	clusterWideRebalanceConnsPerSec := float64(numConsulServers * newRebalanceConnsPerSecPerServer)
	connReuseLowWatermarkDuration := clientRPCMinReuseDuration + lib.RandomStagger(clientRPCMinReuseDuration/clientRPCJitterFraction)
	numLANMembers := m.clusterInfo.NumNodes()
	connRebalanceTimeout := lib.RateScaledInterval(clusterWideRebalanceConnsPerSec, connReuseLowWatermarkDuration, numLANMembers)

	m.rebalanceTimer.Reset(connRebalanceTimeout)
	return connRebalanceTimeout
}
Example #3
0
// sendCoordinate is a long-running loop that periodically sends our coordinate
// to the server. Closing the agent's shutdownChannel will cause this to exit.
func (a *Agent) sendCoordinate() {
	for {
		rate := a.config.SyncCoordinateRateTarget
		min := a.config.SyncCoordinateIntervalMin
		intv := lib.RateScaledInterval(rate, min, len(a.LANMembers()))
		intv = intv + lib.RandomStagger(intv)

		select {
		case <-time.After(intv):
			members := a.LANMembers()
			grok, err := consul.CanServersUnderstandProtocol(members, 3)
			if err != nil {
				a.logger.Printf("[ERR] agent: failed to check servers: %s", err)
				continue
			}
			if !grok {
				a.logger.Printf("[DEBUG] agent: skipping coordinate updates until servers are upgraded")
				continue
			}

			c, err := a.GetCoordinate()
			if err != nil {
				a.logger.Printf("[ERR] agent: failed to get coordinate: %s", err)
				continue
			}

			// TODO - Consider adding a distance check so we don't send
			// an update if the position hasn't changed by more than a
			// threshold.
			req := structs.CoordinateUpdateRequest{
				Datacenter:   a.config.Datacenter,
				Node:         a.config.NodeName,
				Coord:        c,
				WriteRequest: structs.WriteRequest{Token: a.config.ACLToken},
			}
			var reply struct{}
			if err := a.RPC("Coordinate.Update", &req, &reply); err != nil {
				a.logger.Printf("[ERR] agent: coordinate update error: %s", err)
				continue
			}
		case <-a.shutdownCh:
			return
		}
	}
}
Example #4
0
// RPC is used to forward an RPC call to a consul server, or fail if no servers
func (c *Client) RPC(method string, args interface{}, reply interface{}) error {
	// Check to make sure we haven't spent too much time querying a
	// single server
	now := time.Now()
	if !c.connRebalanceTime.IsZero() && now.After(c.connRebalanceTime) {
		c.logger.Printf("[DEBUG] consul: connection time to server %s exceeded, rotating server connection", c.lastServer.Addr)
		c.lastServer = nil
	}

	// Allocate these vars on the stack before the goto
	var numConsulServers int
	var clusterWideRebalanceConnsPerSec float64
	var connReuseLowWaterMark time.Duration
	var numLANMembers int

	// Check the last RPC time, continue to reuse cached connection for
	// up to clientRPCMinReuseDuration unless exceeded
	// clientRPCConnMaxIdle
	lastRPCTime := now.Sub(c.lastRPCTime)
	var server *serverParts
	if c.lastServer != nil && lastRPCTime < clientRPCConnMaxIdle {
		server = c.lastServer
		goto TRY_RPC
	}

	// Bail if we can't find any servers
	c.consulLock.RLock()
	numConsulServers = len(c.consuls)
	if numConsulServers == 0 {
		c.consulLock.RUnlock()
		return structs.ErrNoServers
	}

	// Select a random addr
	server = c.consuls[rand.Int31n(int32(numConsulServers))]
	c.consulLock.RUnlock()

	// Limit this connection's life based on the size (and health) of the
	// cluster.  Never rebalance a connection more frequently than
	// connReuseLowWaterMark, and make sure we never exceed
	// clusterWideRebalanceConnsPerSec operations/s across numLANMembers.
	clusterWideRebalanceConnsPerSec = float64(numConsulServers * newRebalanceConnsPerSecPerServer)
	connReuseLowWaterMark = clientRPCMinReuseDuration + lib.RandomStagger(clientRPCMinReuseDuration/clientRPCJitterFraction)
	numLANMembers = len(c.LANMembers())
	c.connRebalanceTime = now.Add(lib.RateScaledInterval(clusterWideRebalanceConnsPerSec, connReuseLowWaterMark, numLANMembers))
	c.logger.Printf("[DEBUG] consul: connection to server %s will expire at %v", server.Addr, c.connRebalanceTime)

	// Forward to remote Consul
TRY_RPC:
	if err := c.connPool.RPC(c.config.Datacenter, server.Addr, server.Version, method, args, reply); err != nil {
		c.connRebalanceTime = time.Time{}
		c.lastRPCTime = time.Time{}
		c.lastServer = nil
		return err
	}

	// Cache the last server
	c.lastServer = server
	c.lastRPCTime = now
	return nil
}