// ContinuousDiscovery starts an asynchronuous infinite discovery process where instances are // periodically investigated and their status captured, and long since unseen instances are // purged and forgotten. func ContinuousDiscovery() { if config.Config.DatabaselessMode__experimental { log.Fatal("Cannot execute continuous mode in databaseless mode") } log.Infof("Starting continuous discovery") inst.LoadHostnameResolveCacheFromDatabase() go handleDiscoveryRequests(nil, nil) discoveryTick := time.Tick(time.Duration(config.Config.DiscoveryPollSeconds) * time.Second) caretakingTick := time.Tick(time.Minute) recoveryTick := time.Tick(time.Duration(config.Config.RecoveryPollSeconds) * time.Second) var snapshotTopologiesTick <-chan time.Time if config.Config.SnapshotTopologiesIntervalHours > 0 { snapshotTopologiesTick = time.Tick(time.Duration(config.Config.SnapshotTopologiesIntervalHours) * time.Hour) } go ometrics.InitGraphiteMetrics() go acceptSignals() for { select { case <-discoveryTick: go func() { wasAlreadyElected := isElectedNode if isElectedNode, _ = process.AttemptElection(); isElectedNode { instanceKeys, _ := inst.ReadOutdatedInstanceKeys() log.Debugf("outdated keys: %+v", instanceKeys) for _, instanceKey := range instanceKeys { discoveryInstanceKeys <- instanceKey } if !wasAlreadyElected { // Just turned to be leader! go process.HealthTest() } } else { log.Debugf("Not elected as active node; polling") } }() case <-caretakingTick: // Various periodic internal maintenance tasks go func() { if isElectedNode { go inst.ForgetLongUnseenInstances() go inst.ForgetUnseenInstancesDifferentlyResolved() go inst.ForgetExpiredHostnameResolves() go inst.DeleteInvalidHostnameResolves() go inst.ReviewUnseenInstances() go inst.InjectUnseenMasters() go inst.ResolveUnknownMasterHostnameResolves() go inst.UpdateClusterAliases() go inst.ExpireMaintenance() go inst.ExpireDowntime() go inst.ExpireCandidateInstances() go inst.ExpireHostnameUnresolve() go inst.ExpireClusterDomainName() go inst.ExpireAudit() go inst.ExpireMasterPositionEquivalence() go inst.FlushNontrivialResolveCacheToDatabase() } if !isElectedNode { // Take this opportunity to refresh yourself inst.LoadHostnameResolveCacheFromDatabase() } go inst.ReadClusterAliases() go process.HealthTest() }() case <-recoveryTick: go func() { if isElectedNode { go ClearActiveFailureDetections() go ClearActiveRecoveries() go ExpireBlockedRecoveries() go CheckAndRecover(nil, nil, false) } }() case <-snapshotTopologiesTick: go func() { go inst.SnapshotTopologies() }() } } }
// discoverInstance will attempt discovering an instance (unless it is already up to date) and will // list down its master and slaves (if any) for further discovery. func discoverInstance(instanceKey inst.InstanceKey) { instanceKey.Formalize() if !instanceKey.IsValid() { return } instance, found, err := inst.ReadInstance(&instanceKey) if found && instance.IsUpToDate && instance.IsLastCheckValid { // we've already discovered this one. Skip! goto Cleanup } discoveriesCounter.Inc(1) // First we've ever heard of this instance. Continue investigation: instance, err = inst.ReadTopologyInstance(&instanceKey) // panic can occur (IO stuff). Therefore it may happen // that instance is nil. Check it. if err != nil || instance == nil { failedDiscoveriesCounter.Inc(1) log.Warningf("instance is nil in discoverInstance. key=%+v, error=%+v", instanceKey, err) goto Cleanup } log.Debugf("Discovered host: %+v, master: %+v", instance.Key, instance.MasterKey) if !isElectedNode { // Maybe this node was elected before, but isn't elected anymore. // If not elected, stop drilling down to further investigate slaves. return } // Investigate slaves: for _, slaveKey := range instance.SlaveHosts.GetInstanceKeys() { discoveryInstanceKeys <- slaveKey } // Investigate master: discoveryInstanceKeys <- instance.MasterKey Cleanup: } // Start discovery begins a one time asynchronuous discovery process for the given // instance and all of its topology connected instances. // That is, the instance will be investigated for master and slaves, and the routines will follow on // each and every such found master/slave. // In essense, assuming all slaves in a replication topology are running, and given a single instance // in such topology, this function will detect the entire topology. func StartDiscovery(instanceKey inst.InstanceKey) { log.Infof("Starting discovery at %+v", instanceKey) pendingTokens := make(chan bool, maxConcurrency) completedTokens := make(chan bool, maxConcurrency) accountedDiscoverInstance(instanceKey, pendingTokens, completedTokens) go handleDiscoveryRequests(pendingTokens, completedTokens) // Block until all are complete for { select { case <-pendingTokens: <-completedTokens default: inst.AuditOperation("start-discovery", &instanceKey, "") return } } } // ContinuousDiscovery starts an asynchronuous infinite discovery process where instances are // periodically investigated and their status captured, and long since unseen instances are // purged and forgotten. func ContinuousDiscovery() { log.Infof("Starting continuous discovery") inst.LoadHostnameResolveCacheFromDatabase() go handleDiscoveryRequests(nil, nil) tick := time.Tick(time.Duration(config.Config.DiscoveryPollSeconds) * time.Second) forgetUnseenTick := time.Tick(time.Minute) recoverTick := time.Tick(10 * time.Second) var snapshotTopologiesTick <-chan time.Time if config.Config.SnapshotTopologiesIntervalHours > 0 { snapshotTopologiesTick = time.Tick(time.Duration(config.Config.SnapshotTopologiesIntervalHours) * time.Hour) } go ometrics.InitGraphiteMetrics() for { select { case <-tick: go func() { if isElectedNode, _ = process.AttemptElection(); isElectedNode { instanceKeys, _ := inst.ReadOutdatedInstanceKeys() log.Debugf("outdated keys: %+v", instanceKeys) for _, instanceKey := range instanceKeys { discoveryInstanceKeys <- instanceKey } } else { log.Debugf("Not elected as active node; polling") } }() case <-forgetUnseenTick: // See if we should also forget objects (lower frequency) go func() { if isElectedNode { go inst.ForgetLongUnseenInstances() go inst.ForgetUnseenInstancesDifferentlyResolved() go inst.ForgetExpiredHostnameResolves() go inst.DeleteInvalidHostnameResolves() go inst.ReviewUnseenInstances() go inst.InjectUnseenMasters() go inst.ResolveUnknownMasterHostnameResolves() go inst.UpdateClusterAliases() go inst.ExpireMaintenance() go inst.ExpireDowntime() go inst.ExpireCandidateInstances() go inst.ExpireHostnameUnresolve() go inst.ExpireClusterDomainName() go inst.ExpireAudit() go inst.ExpireMasterPositionEquivalence() go inst.FlushNontrivialResolveCacheToDatabase() } if !isElectedNode { // Take this opportunity to refresh yourself inst.LoadHostnameResolveCacheFromDatabase() } go inst.ReadClusterAliases() go process.HealthTest() }() case <-recoverTick: go func() { if isElectedNode { go ClearActiveFailureDetections() go ClearActiveRecoveries() go CheckAndRecover(nil, nil, false, false) } }() case <-snapshotTopologiesTick: go func() { go inst.SnapshotTopologies() }() } } } func pollAgent(hostname string) error { polledAgent, err := agent.GetAgent(hostname) agent.UpdateAgentLastChecked(hostname) if err != nil { return log.Errore(err) } err = agent.UpdateAgentInfo(hostname, polledAgent) if err != nil { return log.Errore(err) } return nil } // ContinuousAgentsPoll starts an asynchronuous infinite process where agents are // periodically investigated and their status captured, and long since unseen agents are // purged and forgotten. func ContinuousAgentsPoll() { log.Infof("Starting continuous agents poll") go discoverSeededAgents() tick := time.Tick(time.Duration(config.Config.DiscoveryPollSeconds) * time.Second) forgetUnseenTick := time.Tick(time.Hour) for range tick { agentsHosts, _ := agent.ReadOutdatedAgentsHosts() log.Debugf("outdated agents hosts: %+v", agentsHosts) for _, hostname := range agentsHosts { go pollAgent(hostname) } // See if we should also forget agents (lower frequency) select { case <-forgetUnseenTick: agent.ForgetLongUnseenAgents() agent.FailStaleSeeds() default: } } } func discoverSeededAgents() { for seededAgent := range agent.SeededAgents { instanceKey := inst.InstanceKey{Hostname: seededAgent.Hostname, Port: int(seededAgent.MySQLPort)} go StartDiscovery(instanceKey) } }