// antiEntropy is a long running method used to perform anti-entropy // between local and remote state. func (l *localState) antiEntropy(shutdownCh chan struct{}) { SYNC: // Sync our state with the servers for { err := l.setSyncState() if err == nil { break } l.logger.Printf("[ERR] agent: failed to sync remote state: %v", err) select { case <-l.consulCh: // Stagger the retry on leader election, avoid a thundering heard select { case <-time.After(lib.RandomStagger(aeScale(syncStaggerIntv, len(l.iface.LANMembers())))): case <-shutdownCh: return } case <-time.After(syncRetryIntv + lib.RandomStagger(aeScale(syncRetryIntv, len(l.iface.LANMembers())))): case <-shutdownCh: return } } // Force-trigger AE to pickup any changes l.changeMade() // Schedule the next full sync, with a random stagger aeIntv := aeScale(l.config.AEInterval, len(l.iface.LANMembers())) aeIntv = aeIntv + lib.RandomStagger(aeIntv) aeTimer := time.After(aeIntv) // Wait for sync events for { select { case <-aeTimer: goto SYNC case <-l.triggerCh: // Skip the sync if we are paused if l.isPaused() { continue } if err := l.syncChanges(); err != nil { l.logger.Printf("[ERR] agent: failed to sync changes: %v", err) } case <-shutdownCh: return } } }
// registerAndHeartbeat is a long lived goroutine used to register the client // and then start heartbeatng to the server. func (c *Client) registerAndHeartbeat() { // Register the node c.retryRegisterNode() // Start watching changes for node changes go c.watchNodeUpdates() // Setup the heartbeat timer, for the initial registration // we want to do this quickly. We want to do it extra quickly // in development mode. var heartbeat <-chan time.Time if c.config.DevMode { heartbeat = time.After(0) } else { heartbeat = time.After(lib.RandomStagger(initialHeartbeatStagger)) } for { select { case <-heartbeat: if err := c.updateNodeStatus(); err != nil { heartbeat = time.After(c.retryIntv(registerRetryIntv)) } else { c.heartbeatLock.Lock() heartbeat = time.After(c.heartbeatTTL) c.heartbeatLock.Unlock() } case <-c.shutdownCh: return } } }
// Run triggers periodic syncing of services and checks with Consul. This is // a long lived go-routine which is stopped during shutdown. func (c *Syncer) Run() { sync := time.NewTimer(0) for { select { case <-sync.C: d := syncInterval - lib.RandomStagger(syncInterval/syncJitter) sync.Reset(d) if err := c.SyncServices(); err != nil { if c.consulAvailable { c.logger.Printf("[DEBUG] consul.syncer: error in syncing: %v", err) } c.consulAvailable = false } else { if !c.consulAvailable { c.logger.Printf("[DEBUG] consul.syncer: syncs succesful") } c.consulAvailable = true } case <-c.notifySyncCh: sync.Reset(syncInterval) case <-c.shutdownCh: c.Shutdown() case <-c.notifyShutdownCh: sync.Stop() c.logger.Printf("[INFO] consul.syncer: shutting down syncer ") return } } }
// blockingRPC is used for queries that need to wait for a // minimum index. This is used to block and wait for changes. func (s *Server) blockingRPC(opts *blockingOptions) error { var timeout *time.Timer var notifyCh chan struct{} var state *state.StateStore // Fast path non-blocking if opts.queryOpts.MinQueryIndex == 0 { goto RUN_QUERY } // Restrict the max query time, and ensure there is always one if opts.queryOpts.MaxQueryTime > maxQueryTime { opts.queryOpts.MaxQueryTime = maxQueryTime } else if opts.queryOpts.MaxQueryTime <= 0 { opts.queryOpts.MaxQueryTime = defaultQueryTime } // Apply a small amount of jitter to the request opts.queryOpts.MaxQueryTime += lib.RandomStagger(opts.queryOpts.MaxQueryTime / jitterFraction) // Setup a query timeout timeout = time.NewTimer(opts.queryOpts.MaxQueryTime) // Setup the notify channel notifyCh = make(chan struct{}, 1) // Ensure we tear down any watchers on return state = s.fsm.State() defer func() { timeout.Stop() state.StopWatch(opts.watch, notifyCh) }() REGISTER_NOTIFY: // Register the notification channel. This may be done // multiple times if we have not reached the target wait index. state.Watch(opts.watch, notifyCh) RUN_QUERY: // Update the query meta data s.setQueryMeta(opts.queryMeta) // Run the query function metrics.IncrCounter([]string{"nomad", "rpc", "query"}, 1) err := opts.run() // Check for minimum query time if err == nil && opts.queryOpts.MinQueryIndex > 0 && opts.queryMeta.Index <= opts.queryOpts.MinQueryIndex { select { case <-notifyCh: goto REGISTER_NOTIFY case <-timeout.C: } } return err }
// registerAndHeartbeat is a long lived goroutine used to register the client // and then start heartbeatng to the server. func (c *Client) registerAndHeartbeat() { // Register the node c.retryRegisterNode() // Start watching changes for node changes go c.watchNodeUpdates() // Setup the heartbeat timer, for the initial registration // we want to do this quickly. We want to do it extra quickly // in development mode. var heartbeat <-chan time.Time if c.config.DevMode { heartbeat = time.After(0) } else { heartbeat = time.After(lib.RandomStagger(initialHeartbeatStagger)) } for { select { case <-heartbeat: if err := c.updateNodeStatus(); err != nil { // The servers have changed such that this node has not been // registered before if strings.Contains(err.Error(), "node not found") { // Re-register the node c.logger.Printf("[INFO] client: re-registering node") c.retryRegisterNode() heartbeat = time.After(lib.RandomStagger(initialHeartbeatStagger)) } else { c.logger.Printf("[ERR] client: heartbeating failed: %v", err) heartbeat = time.After(c.retryIntv(registerRetryIntv)) } } else { c.heartbeatLock.Lock() heartbeat = time.After(c.heartbeatTTL) c.heartbeatLock.Unlock() } case <-c.shutdownCh: return } } }
// UpdateCheck is used to update the status of a check func (l *localState) UpdateCheck(checkID types.CheckID, status, output string) { l.Lock() defer l.Unlock() check, ok := l.checks[checkID] if !ok { return } // Update the critical time tracking (this doesn't cause a server updates // so we can always keep this up to date). if status == structs.HealthCritical { _, wasCritical := l.checkCriticalTime[checkID] if !wasCritical { l.checkCriticalTime[checkID] = time.Now() } } else { delete(l.checkCriticalTime, checkID) } // Do nothing if update is idempotent if check.Status == status && check.Output == output { return } // Defer a sync if the output has changed. This is an optimization around // frequent updates of output. Instead, we update the output internally, // and periodically do a write-back to the servers. If there is a status // change we do the write immediately. if l.config.CheckUpdateInterval > 0 && check.Status == status { check.Output = output if _, ok := l.deferCheck[checkID]; !ok { intv := time.Duration(uint64(l.config.CheckUpdateInterval)/2) + lib.RandomStagger(l.config.CheckUpdateInterval) deferSync := time.AfterFunc(intv, func() { l.Lock() if _, ok := l.checkStatus[checkID]; ok { l.checkStatus[checkID] = syncStatus{inSync: false} l.changeMade() } delete(l.deferCheck, checkID) l.Unlock() }) l.deferCheck[checkID] = deferSync } return } // Update status and mark out of sync check.Status = status check.Output = output l.checkStatus[checkID] = syncStatus{inSync: false} l.changeMade() }
// resetHeartbeatTimer is used to reset the TTL of a heartbeat. // This can be used for new heartbeats and existing ones. func (s *Server) resetHeartbeatTimer(id string) (time.Duration, error) { s.heartbeatTimersLock.Lock() defer s.heartbeatTimersLock.Unlock() // Compute the target TTL value n := len(s.heartbeatTimers) ttl := lib.RateScaledInterval(s.config.MaxHeartbeatsPerSecond, s.config.MinHeartbeatTTL, n) ttl += lib.RandomStagger(ttl) // Reset the TTL s.resetHeartbeatTimerLocked(id, ttl+s.config.HeartbeatGrace) return ttl, nil }
// forward is used to forward to a remote region or to forward to the local leader // Returns a bool of if forwarding was performed, as well as any error func (s *Server) forward(method string, info structs.RPCInfo, args interface{}, reply interface{}) (bool, error) { var firstCheck time.Time region := info.RequestRegion() if region == "" { return true, fmt.Errorf("missing target RPC") } // Handle region forwarding if region != s.config.Region { err := s.forwardRegion(region, method, args, reply) return true, err } // Check if we can allow a stale read if info.IsRead() && info.AllowStaleRead() { return false, nil } CHECK_LEADER: // Find the leader isLeader, remoteServer := s.getLeader() // Handle the case we are the leader if isLeader { return false, nil } // Handle the case of a known leader if remoteServer != nil { err := s.forwardLeader(remoteServer, method, args, reply) return true, err } // Gate the request until there is a leader if firstCheck.IsZero() { firstCheck = time.Now() } if time.Now().Sub(firstCheck) < s.config.RPCHoldTimeout { jitter := lib.RandomStagger(s.config.RPCHoldTimeout / jitterFraction) select { case <-time.After(jitter): goto CHECK_LEADER case <-s.shutdownCh: } } // No leader found and hold time exceeded return true, structs.ErrNoLeader }
// setupAgent is used to start the agent and various interfaces func (c *Command) setupAgent(config *Config, logOutput io.Writer) error { c.Ui.Output("Starting Nomad agent...") agent, err := NewAgent(config, logOutput) if err != nil { c.Ui.Error(fmt.Sprintf("Error starting agent: %s", err)) return err } c.agent = agent // Enable the SCADA integration if err := c.setupSCADA(config); err != nil { agent.Shutdown() c.Ui.Error(fmt.Sprintf("Error starting SCADA: %s", err)) return err } // Setup the HTTP server http, err := NewHTTPServer(agent, config, logOutput) if err != nil { agent.Shutdown() c.Ui.Error(fmt.Sprintf("Error starting http server: %s", err)) return err } c.httpServer = http // Setup update checking if !config.DisableUpdateCheck { version := config.Version if config.VersionPrerelease != "" { version += fmt.Sprintf("-%s", config.VersionPrerelease) } updateParams := &checkpoint.CheckParams{ Product: "nomad", Version: version, } if !config.DisableAnonymousSignature { updateParams.SignatureFile = filepath.Join(config.DataDir, "checkpoint-signature") } // Schedule a periodic check with expected interval of 24 hours checkpoint.CheckInterval(updateParams, 24*time.Hour, c.checkpointResults) // Do an immediate check within the next 30 seconds go func() { time.Sleep(lib.RandomStagger(30 * time.Second)) c.checkpointResults(checkpoint.Check(updateParams)) }() } return nil }
// refreshServerRebalanceTimer is only called once m.rebalanceTimer expires. func (m *Manager) refreshServerRebalanceTimer() time.Duration { l := m.getServerList() numConsulServers := len(l.servers) // Limit this connection's life based on the size (and health) of the // cluster. Never rebalance a connection more frequently than // connReuseLowWatermarkDuration, and make sure we never exceed // clusterWideRebalanceConnsPerSec operations/s across numLANMembers. clusterWideRebalanceConnsPerSec := float64(numConsulServers * newRebalanceConnsPerSecPerServer) connReuseLowWatermarkDuration := clientRPCMinReuseDuration + lib.RandomStagger(clientRPCMinReuseDuration/clientRPCJitterFraction) numLANMembers := m.clusterInfo.NumNodes() connRebalanceTimeout := lib.RateScaledInterval(clusterWideRebalanceConnsPerSec, connReuseLowWatermarkDuration, numLANMembers) m.rebalanceTimer.Reset(connRebalanceTimeout) return connRebalanceTimeout }
// run is invoked by a goroutine to run until Stop() is called func (c *CheckDocker) run() { // Get the randomized initial pause time initialPauseTime := lib.RandomStagger(c.Interval) c.Logger.Printf("[DEBUG] agent: pausing %v before first invocation of %s -c %s in container %s", initialPauseTime, c.Shell, c.Script, c.DockerContainerID) next := time.After(initialPauseTime) for { select { case <-next: c.check() next = time.After(c.Interval) case <-c.stopCh: return } } }
// run is invoked by a goroutine to run until Stop() is called func (c *CheckTCP) run() { // Get the randomized initial pause time initialPauseTime := lib.RandomStagger(c.Interval) c.Logger.Printf("[DEBUG] agent: pausing %v before first socket connection of %s", initialPauseTime, c.TCP) next := time.After(initialPauseTime) for { select { case <-next: c.check() next = time.After(c.Interval) case <-c.stopCh: return } } }
// run is invoked by a goroutine to run until Stop() is called func (r *CheckRunner) run() { // Get the randomized initial pause time initialPauseTime := lib.RandomStagger(r.check.Interval()) r.logger.Printf("[DEBUG] agent: pausing %v before first invocation of %s", initialPauseTime, r.check.ID()) next := time.NewTimer(initialPauseTime) for { select { case <-next.C: r.runCheck(r.check) next.Reset(r.check.Interval()) case <-r.stopCh: next.Stop() return } } }
// sendCoordinate is a long-running loop that periodically sends our coordinate // to the server. Closing the agent's shutdownChannel will cause this to exit. func (a *Agent) sendCoordinate() { for { rate := a.config.SyncCoordinateRateTarget min := a.config.SyncCoordinateIntervalMin intv := lib.RateScaledInterval(rate, min, len(a.LANMembers())) intv = intv + lib.RandomStagger(intv) select { case <-time.After(intv): members := a.LANMembers() grok, err := consul.CanServersUnderstandProtocol(members, 3) if err != nil { a.logger.Printf("[ERR] agent: failed to check servers: %s", err) continue } if !grok { a.logger.Printf("[DEBUG] agent: skipping coordinate updates until servers are upgraded") continue } c, err := a.GetCoordinate() if err != nil { a.logger.Printf("[ERR] agent: failed to get coordinate: %s", err) continue } // TODO - Consider adding a distance check so we don't send // an update if the position hasn't changed by more than a // threshold. req := structs.CoordinateUpdateRequest{ Datacenter: a.config.Datacenter, Node: a.config.NodeName, Coord: c, WriteRequest: structs.WriteRequest{Token: a.config.ACLToken}, } var reply struct{} if err := a.RPC("Coordinate.Update", &req, &reply); err != nil { a.logger.Printf("[ERR] agent: coordinate update error: %s", err) continue } case <-a.shutdownCh: return } } }
func (c *ConsulBackend) runEventDemuxer(shutdownCh ShutdownChannel, advertiseAddr string, activeFunc activeFunction, sealedFunc sealedFunction) { // Fire the reconcileTimer immediately upon starting the event demuxer reconcileTimer := time.NewTimer(0) defer reconcileTimer.Stop() // Schedule the first check. Consul TTL checks are passing by // default, checkTimer does not need to be run immediately. checkTimer := time.NewTimer(c.checkDuration()) defer checkTimer.Stop() // Use a reactor pattern to handle and dispatch events to singleton // goroutine handlers for execution. It is not acceptable to drop // inbound events from Notify*(). // // goroutines are dispatched if the demuxer can acquire a lock (via // an atomic CAS incr) on the handler. Handlers are responsible for // deregistering themselves (atomic CAS decr). Handlers and the // demuxer share a lock to synchronize information at the beginning // and end of a handler's life (or after a handler wakes up from // sleeping during a back-off/retry). var shutdown bool var checkLock int64 var registeredServiceID string var serviceRegLock int64 shutdown: for { select { case <-c.notifyActiveCh: // Run reconcile immediately upon active state change notification reconcileTimer.Reset(0) case <-c.notifySealedCh: // Run check timer immediately upon a seal state change notification checkTimer.Reset(0) case <-reconcileTimer.C: // Unconditionally rearm the reconcileTimer reconcileTimer.Reset(reconcileTimeout - lib.RandomStagger(reconcileTimeout/checkJitterFactor)) // Abort if service discovery is disabled or a // reconcile handler is already active if !c.disableRegistration && atomic.CompareAndSwapInt64(&serviceRegLock, 0, 1) { // Enter handler with serviceRegLock held go func() { defer atomic.CompareAndSwapInt64(&serviceRegLock, 1, 0) for !shutdown { serviceID, err := c.reconcileConsul(registeredServiceID, activeFunc, sealedFunc) if err != nil { c.logger.Printf("[WARN]: consul: reconcile unable to talk with Consul backend: %v", err) time.Sleep(consulRetryInterval) continue } c.serviceLock.Lock() defer c.serviceLock.Unlock() registeredServiceID = serviceID return } }() } case <-checkTimer.C: checkTimer.Reset(c.checkDuration()) // Abort if service discovery is disabled or a // reconcile handler is active if !c.disableRegistration && atomic.CompareAndSwapInt64(&checkLock, 0, 1) { // Enter handler with checkLock held go func() { defer atomic.CompareAndSwapInt64(&checkLock, 1, 0) for !shutdown { sealed := sealedFunc() if err := c.runCheck(sealed); err != nil { c.logger.Printf("[WARN]: consul: check unable to talk with Consul backend: %v", err) time.Sleep(consulRetryInterval) continue } return } }() } case <-shutdownCh: c.logger.Printf("[INFO]: consul: Shutting down consul backend") shutdown = true break shutdown } } c.serviceLock.RLock() defer c.serviceLock.RUnlock() if err := c.client.Agent().ServiceDeregister(registeredServiceID); err != nil { c.logger.Printf("[WARN]: consul: service deregistration failed: %v", err) } }
// setupAgent is used to start the agent and various interfaces func (c *Command) setupAgent(config *Config, logOutput io.Writer, logWriter *logWriter) error { c.Ui.Output("Starting Consul agent...") agent, err := Create(config, logOutput) if err != nil { c.Ui.Error(fmt.Sprintf("Error starting agent: %s", err)) return err } c.agent = agent // Setup the RPC listener rpcAddr, err := config.ClientListener(config.Addresses.RPC, config.Ports.RPC) if err != nil { c.Ui.Error(fmt.Sprintf("Invalid RPC bind address: %s", err)) return err } // Clear the domain socket file if it exists socketPath, isSocket := unixSocketAddr(config.Addresses.RPC) if isSocket { if _, err := os.Stat(socketPath); !os.IsNotExist(err) { agent.logger.Printf("[WARN] agent: Replacing socket %q", socketPath) } if err := os.Remove(socketPath); err != nil && !os.IsNotExist(err) { c.Ui.Output(fmt.Sprintf("Error removing socket file: %s", err)) return err } } rpcListener, err := net.Listen(rpcAddr.Network(), rpcAddr.String()) if err != nil { agent.Shutdown() c.Ui.Error(fmt.Sprintf("Error starting RPC listener: %s", err)) return err } // Set up ownership/permission bits on the socket file if isSocket { if err := setFilePermissions(socketPath, config.UnixSockets); err != nil { agent.Shutdown() c.Ui.Error(fmt.Sprintf("Error setting up socket: %s", err)) return err } } // Start the IPC layer c.Ui.Output("Starting Consul agent RPC...") c.rpcServer = NewAgentRPC(agent, rpcListener, logOutput, logWriter) // Enable the SCADA integration if err := c.setupScadaConn(config); err != nil { agent.Shutdown() c.Ui.Error(fmt.Sprintf("Error starting SCADA connection: %s", err)) return err } if config.Ports.HTTP > 0 || config.Ports.HTTPS > 0 { servers, err := NewHTTPServers(agent, config, logOutput) if err != nil { agent.Shutdown() c.Ui.Error(fmt.Sprintf("Error starting http servers: %s", err)) return err } c.httpServers = servers } if config.Ports.DNS > 0 { dnsAddr, err := config.ClientListener(config.Addresses.DNS, config.Ports.DNS) if err != nil { agent.Shutdown() c.Ui.Error(fmt.Sprintf("Invalid DNS bind address: %s", err)) return err } server, err := NewDNSServer(agent, &config.DNSConfig, logOutput, config.Domain, dnsAddr.String(), config.DNSRecursors) if err != nil { agent.Shutdown() c.Ui.Error(fmt.Sprintf("Error starting dns server: %s", err)) return err } c.dnsServer = server } // Setup update checking if !config.DisableUpdateCheck { version := config.Version if config.VersionPrerelease != "" { version += fmt.Sprintf("-%s", config.VersionPrerelease) } updateParams := &checkpoint.CheckParams{ Product: "consul", Version: version, } if !config.DisableAnonymousSignature { updateParams.SignatureFile = filepath.Join(config.DataDir, "checkpoint-signature") } // Schedule a periodic check with expected interval of 24 hours checkpoint.CheckInterval(updateParams, 24*time.Hour, c.checkpointResults) // Do an immediate check within the next 30 seconds go func() { time.Sleep(lib.RandomStagger(30 * time.Second)) c.checkpointResults(checkpoint.Check(updateParams)) }() } return nil }
// runACLReplication is a long-running goroutine that will attempt to replicate // ACLs while the server is the leader, until the shutdown channel closes. func (s *Server) runACLReplication() { var status structs.ACLReplicationStatus status.Enabled = true status.SourceDatacenter = s.config.ACLDatacenter s.updateACLReplicationStatus(status) // Show that it's not running on the way out. defer func() { status.Running = false s.updateACLReplicationStatus(status) }() // Give each server's replicator a random initial phase for good // measure. select { case <-s.shutdownCh: return case <-time.After(lib.RandomStagger(s.config.ACLReplicationInterval)): } // We are fairly conservative with the lastRemoteIndex so that after a // leadership change or an error we re-sync everything (we also don't // want to block the first time after one of these events so we can // show a successful sync in the status endpoint). var lastRemoteIndex uint64 replicate := func() { if !status.Running { lastRemoteIndex = 0 // Re-sync everything. status.Running = true s.updateACLReplicationStatus(status) s.logger.Printf("[INFO] consul: ACL replication started") } index, err := s.replicateACLs(lastRemoteIndex) if err != nil { lastRemoteIndex = 0 // Re-sync everything. status.LastError = time.Now() s.updateACLReplicationStatus(status) s.logger.Printf("[WARN] consul: ACL replication error (will retry if still leader): %v", err) } else { lastRemoteIndex = index status.ReplicatedIndex = index status.LastSuccess = time.Now() s.updateACLReplicationStatus(status) s.logger.Printf("[DEBUG] consul: ACL replication completed through remote index %d", index) } } pause := func() { if status.Running { lastRemoteIndex = 0 // Re-sync everything. status.Running = false s.updateACLReplicationStatus(status) s.logger.Printf("[INFO] consul: ACL replication stopped (no longer leader)") } } // This will slowly poll to see if replication should be active. Once it // is and we've caught up, the replicate() call will begin to block and // only wake up when the query timer expires or there are new ACLs to // replicate. We've chosen this design so that the ACLReplicationInterval // is the lower bound for how quickly we will replicate, no matter how // much ACL churn is happening on the remote side. // // The blocking query inside replicate() respects the shutdown channel, // so we won't get stuck in here as things are torn down. for { select { case <-s.shutdownCh: return case <-time.After(s.config.ACLReplicationInterval): if s.IsLeader() { replicate() } else { pause() } } } }
// retryIntv calculates a retry interval value given the base func (c *Client) retryIntv(base time.Duration) time.Duration { if c.config.DevMode { return devModeRetryIntv } return base + lib.RandomStagger(base) }
// blockingRPC is used for queries that need to wait for a minimum index. This // is used to block and wait for changes. func (s *Server) blockingRPC(queryOpts *structs.QueryOptions, queryMeta *structs.QueryMeta, watch state.Watch, run func() error) error { var timeout *time.Timer var notifyCh chan struct{} // Fast path right to the non-blocking query. if queryOpts.MinQueryIndex == 0 { goto RUN_QUERY } // Make sure a watch was given if we were asked to block. if watch == nil { panic("no watch given for blocking query") } // Restrict the max query time, and ensure there is always one. if queryOpts.MaxQueryTime > maxQueryTime { queryOpts.MaxQueryTime = maxQueryTime } else if queryOpts.MaxQueryTime <= 0 { queryOpts.MaxQueryTime = defaultQueryTime } // Apply a small amount of jitter to the request. queryOpts.MaxQueryTime += lib.RandomStagger(queryOpts.MaxQueryTime / jitterFraction) // Setup a query timeout. timeout = time.NewTimer(queryOpts.MaxQueryTime) // Setup the notify channel. notifyCh = make(chan struct{}, 1) // Ensure we tear down any watches on return. defer func() { timeout.Stop() watch.Clear(notifyCh) }() REGISTER_NOTIFY: // Register the notification channel. This may be done multiple times if // we haven't reached the target wait index. watch.Wait(notifyCh) RUN_QUERY: // Update the query metadata. s.setQueryMeta(queryMeta) // If the read must be consistent we verify that we are still the leader. if queryOpts.RequireConsistent { if err := s.consistentRead(); err != nil { return err } } // Run the query. metrics.IncrCounter([]string{"consul", "rpc", "query"}, 1) err := run() // Check for minimum query time. if err == nil && queryMeta.Index > 0 && queryMeta.Index <= queryOpts.MinQueryIndex { select { case <-notifyCh: goto REGISTER_NOTIFY case <-timeout.C: } } return err }
// setupBootstrapHandler() creates the closure necessary to support a Consul // fallback handler. func (s *Server) setupBootstrapHandler() error { // peersTimeout is used to indicate to the Consul Syncer that the // current Nomad Server has a stale peer set. peersTimeout will time // out if the Consul Syncer bootstrapFn has not observed a Raft // leader in maxStaleLeadership. If peersTimeout has been triggered, // the Consul Syncer will begin querying Consul for other Nomad // Servers. // // NOTE: time.Timer is used vs time.Time in order to handle clock // drift because time.Timer is implemented as a monotonic clock. var peersTimeout *time.Timer = time.NewTimer(0) // consulQueryCount is the number of times the bootstrapFn has been // called, regardless of success. var consulQueryCount uint64 // leadershipTimedOut is a helper method that returns true if the // peersTimeout timer has expired. leadershipTimedOut := func() bool { select { case <-peersTimeout.C: return true default: return false } } // The bootstrapFn callback handler is used to periodically poll // Consul to look up the Nomad Servers in Consul. In the event the // server has been brought up without a `retry-join` configuration // and this Server is partitioned from the rest of the cluster, // periodically poll Consul to reattach this Server to other servers // in the same region and automatically reform a quorum (assuming the // correct number of servers required for quorum are present). bootstrapFn := func() error { // If there is a raft leader, do nothing if s.raft.Leader() != "" { peersTimeout.Reset(maxStaleLeadership) return nil } // (ab)use serf.go's behavior of setting BootstrapExpect to // zero if we have bootstrapped. If we have bootstrapped bootstrapExpect := atomic.LoadInt32(&s.config.BootstrapExpect) if bootstrapExpect == 0 { // This Nomad Server has been bootstrapped. Rely on // the peersTimeout firing as a guard to prevent // aggressive querying of Consul. if !leadershipTimedOut() { return nil } } else { if consulQueryCount > 0 && !leadershipTimedOut() { return nil } // This Nomad Server has not been bootstrapped, reach // out to Consul if our peer list is less than // `bootstrap_expect`. raftPeers, err := s.raftPeers.Peers() if err != nil { peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor)) return nil } // The necessary number of Nomad Servers required for // quorum has been reached, we do not need to poll // Consul. Let the normal timeout-based strategy // take over. if len(raftPeers) >= int(bootstrapExpect) { peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor)) return nil } } consulQueryCount++ s.logger.Printf("[DEBUG] server.consul: lost contact with Nomad quorum, falling back to Consul for server list") consulCatalog := s.consulSyncer.ConsulClient().Catalog() dcs, err := consulCatalog.Datacenters() if err != nil { peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor)) return fmt.Errorf("server.consul: unable to query Consul datacenters: %v", err) } if len(dcs) > 2 { // Query the local DC first, then shuffle the // remaining DCs. If additional calls to bootstrapFn // are necessary, this Nomad Server will eventually // walk all datacenter until it finds enough hosts to // form a quorum. shuffleStrings(dcs[1:]) dcs = dcs[0:lib.MinInt(len(dcs), datacenterQueryLimit)] } nomadServerServiceName := s.config.ConsulConfig.ServerServiceName var mErr multierror.Error const defaultMaxNumNomadServers = 8 nomadServerServices := make([]string, 0, defaultMaxNumNomadServers) localNode := s.serf.Memberlist().LocalNode() for _, dc := range dcs { consulOpts := &consulapi.QueryOptions{ AllowStale: true, Datacenter: dc, Near: "_agent", WaitTime: consul.DefaultQueryWaitDuration, } consulServices, _, err := consulCatalog.Service(nomadServerServiceName, consul.ServiceTagSerf, consulOpts) if err != nil { err := fmt.Errorf("failed to query service %q in Consul datacenter %q: %v", nomadServerServiceName, dc, err) s.logger.Printf("[WARN] server.consul: %v", err) mErr.Errors = append(mErr.Errors, err) continue } for _, cs := range consulServices { port := strconv.FormatInt(int64(cs.ServicePort), 10) addr := cs.ServiceAddress if addr == "" { addr = cs.Address } if localNode.Addr.String() == addr && int(localNode.Port) == cs.ServicePort { continue } serverAddr := net.JoinHostPort(addr, port) nomadServerServices = append(nomadServerServices, serverAddr) } } if len(nomadServerServices) == 0 { if len(mErr.Errors) > 0 { peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor)) return mErr.ErrorOrNil() } // Log the error and return nil so future handlers // can attempt to register the `nomad` service. pollInterval := peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor) s.logger.Printf("[TRACE] server.consul: no Nomad Servers advertising service %+q in Consul datacenters %+q, sleeping for %v", nomadServerServiceName, dcs, pollInterval) peersTimeout.Reset(pollInterval) return nil } numServersContacted, err := s.Join(nomadServerServices) if err != nil { peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor)) return fmt.Errorf("contacted %d Nomad Servers: %v", numServersContacted, err) } peersTimeout.Reset(maxStaleLeadership) s.logger.Printf("[INFO] server.consul: successfully contacted %d Nomad Servers", numServersContacted) return nil } s.consulSyncer.AddPeriodicHandler("Nomad Server Fallback Server Handler", bootstrapFn) return nil }
// RPC is used to forward an RPC call to a consul server, or fail if no servers func (c *Client) RPC(method string, args interface{}, reply interface{}) error { // Check to make sure we haven't spent too much time querying a // single server now := time.Now() if !c.connRebalanceTime.IsZero() && now.After(c.connRebalanceTime) { c.logger.Printf("[DEBUG] consul: connection time to server %s exceeded, rotating server connection", c.lastServer.Addr) c.lastServer = nil } // Allocate these vars on the stack before the goto var numConsulServers int var clusterWideRebalanceConnsPerSec float64 var connReuseLowWaterMark time.Duration var numLANMembers int // Check the last RPC time, continue to reuse cached connection for // up to clientRPCMinReuseDuration unless exceeded // clientRPCConnMaxIdle lastRPCTime := now.Sub(c.lastRPCTime) var server *serverParts if c.lastServer != nil && lastRPCTime < clientRPCConnMaxIdle { server = c.lastServer goto TRY_RPC } // Bail if we can't find any servers c.consulLock.RLock() numConsulServers = len(c.consuls) if numConsulServers == 0 { c.consulLock.RUnlock() return structs.ErrNoServers } // Select a random addr server = c.consuls[rand.Int31n(int32(numConsulServers))] c.consulLock.RUnlock() // Limit this connection's life based on the size (and health) of the // cluster. Never rebalance a connection more frequently than // connReuseLowWaterMark, and make sure we never exceed // clusterWideRebalanceConnsPerSec operations/s across numLANMembers. clusterWideRebalanceConnsPerSec = float64(numConsulServers * newRebalanceConnsPerSecPerServer) connReuseLowWaterMark = clientRPCMinReuseDuration + lib.RandomStagger(clientRPCMinReuseDuration/clientRPCJitterFraction) numLANMembers = len(c.LANMembers()) c.connRebalanceTime = now.Add(lib.RateScaledInterval(clusterWideRebalanceConnsPerSec, connReuseLowWaterMark, numLANMembers)) c.logger.Printf("[DEBUG] consul: connection to server %s will expire at %v", server.Addr, c.connRebalanceTime) // Forward to remote Consul TRY_RPC: if err := c.connPool.RPC(c.config.Datacenter, server.Addr, server.Version, method, args, reply); err != nil { c.connRebalanceTime = time.Time{} c.lastRPCTime = time.Time{} c.lastServer = nil return err } // Cache the last server c.lastServer = server c.lastRPCTime = now return nil }