// resetHeartbeatTimer is used to reset the TTL of a heartbeat. // This can be used for new heartbeats and existing ones. func (s *Server) resetHeartbeatTimer(id string) (time.Duration, error) { s.heartbeatTimersLock.Lock() defer s.heartbeatTimersLock.Unlock() // Compute the target TTL value n := len(s.heartbeatTimers) ttl := lib.RateScaledInterval(s.config.MaxHeartbeatsPerSecond, s.config.MinHeartbeatTTL, n) ttl += lib.RandomStagger(ttl) // Reset the TTL s.resetHeartbeatTimerLocked(id, ttl+s.config.HeartbeatGrace) return ttl, nil }
// refreshServerRebalanceTimer is only called once m.rebalanceTimer expires. func (m *Manager) refreshServerRebalanceTimer() time.Duration { l := m.getServerList() numConsulServers := len(l.servers) // Limit this connection's life based on the size (and health) of the // cluster. Never rebalance a connection more frequently than // connReuseLowWatermarkDuration, and make sure we never exceed // clusterWideRebalanceConnsPerSec operations/s across numLANMembers. clusterWideRebalanceConnsPerSec := float64(numConsulServers * newRebalanceConnsPerSecPerServer) connReuseLowWatermarkDuration := clientRPCMinReuseDuration + lib.RandomStagger(clientRPCMinReuseDuration/clientRPCJitterFraction) numLANMembers := m.clusterInfo.NumNodes() connRebalanceTimeout := lib.RateScaledInterval(clusterWideRebalanceConnsPerSec, connReuseLowWatermarkDuration, numLANMembers) m.rebalanceTimer.Reset(connRebalanceTimeout) return connRebalanceTimeout }
// sendCoordinate is a long-running loop that periodically sends our coordinate // to the server. Closing the agent's shutdownChannel will cause this to exit. func (a *Agent) sendCoordinate() { for { rate := a.config.SyncCoordinateRateTarget min := a.config.SyncCoordinateIntervalMin intv := lib.RateScaledInterval(rate, min, len(a.LANMembers())) intv = intv + lib.RandomStagger(intv) select { case <-time.After(intv): members := a.LANMembers() grok, err := consul.CanServersUnderstandProtocol(members, 3) if err != nil { a.logger.Printf("[ERR] agent: failed to check servers: %s", err) continue } if !grok { a.logger.Printf("[DEBUG] agent: skipping coordinate updates until servers are upgraded") continue } c, err := a.GetCoordinate() if err != nil { a.logger.Printf("[ERR] agent: failed to get coordinate: %s", err) continue } // TODO - Consider adding a distance check so we don't send // an update if the position hasn't changed by more than a // threshold. req := structs.CoordinateUpdateRequest{ Datacenter: a.config.Datacenter, Node: a.config.NodeName, Coord: c, WriteRequest: structs.WriteRequest{Token: a.config.ACLToken}, } var reply struct{} if err := a.RPC("Coordinate.Update", &req, &reply); err != nil { a.logger.Printf("[ERR] agent: coordinate update error: %s", err) continue } case <-a.shutdownCh: return } } }
// RPC is used to forward an RPC call to a consul server, or fail if no servers func (c *Client) RPC(method string, args interface{}, reply interface{}) error { // Check to make sure we haven't spent too much time querying a // single server now := time.Now() if !c.connRebalanceTime.IsZero() && now.After(c.connRebalanceTime) { c.logger.Printf("[DEBUG] consul: connection time to server %s exceeded, rotating server connection", c.lastServer.Addr) c.lastServer = nil } // Allocate these vars on the stack before the goto var numConsulServers int var clusterWideRebalanceConnsPerSec float64 var connReuseLowWaterMark time.Duration var numLANMembers int // Check the last RPC time, continue to reuse cached connection for // up to clientRPCMinReuseDuration unless exceeded // clientRPCConnMaxIdle lastRPCTime := now.Sub(c.lastRPCTime) var server *serverParts if c.lastServer != nil && lastRPCTime < clientRPCConnMaxIdle { server = c.lastServer goto TRY_RPC } // Bail if we can't find any servers c.consulLock.RLock() numConsulServers = len(c.consuls) if numConsulServers == 0 { c.consulLock.RUnlock() return structs.ErrNoServers } // Select a random addr server = c.consuls[rand.Int31n(int32(numConsulServers))] c.consulLock.RUnlock() // Limit this connection's life based on the size (and health) of the // cluster. Never rebalance a connection more frequently than // connReuseLowWaterMark, and make sure we never exceed // clusterWideRebalanceConnsPerSec operations/s across numLANMembers. clusterWideRebalanceConnsPerSec = float64(numConsulServers * newRebalanceConnsPerSecPerServer) connReuseLowWaterMark = clientRPCMinReuseDuration + lib.RandomStagger(clientRPCMinReuseDuration/clientRPCJitterFraction) numLANMembers = len(c.LANMembers()) c.connRebalanceTime = now.Add(lib.RateScaledInterval(clusterWideRebalanceConnsPerSec, connReuseLowWaterMark, numLANMembers)) c.logger.Printf("[DEBUG] consul: connection to server %s will expire at %v", server.Addr, c.connRebalanceTime) // Forward to remote Consul TRY_RPC: if err := c.connPool.RPC(c.config.Datacenter, server.Addr, server.Version, method, args, reply); err != nil { c.connRebalanceTime = time.Time{} c.lastRPCTime = time.Time{} c.lastServer = nil return err } // Cache the last server c.lastServer = server c.lastRPCTime = now return nil }