// RegisterNode writes down this node in the node_health table func RegisterNode(extraInfo string, command string, firstTime bool) (sql.Result, error) { if firstTime { db.ExecOrchestrator(` insert ignore into node_health_history (hostname, token, first_seen_active, extra_info, command, app_version) values (?, ?, NOW(), ?, ?, ?) `, ThisHostname, ProcessToken.Hash, extraInfo, command, config.RuntimeCLIFlags.ConfiguredVersion, ) } return db.ExecOrchestrator(` insert into node_health (hostname, token, last_seen_active, extra_info, command, app_version) values (?, ?, NOW(), ?, ?, ?) on duplicate key update token=values(token), last_seen_active=values(last_seen_active), extra_info=if(values(extra_info) != '', values(extra_info), extra_info), app_version=values(app_version) `, ThisHostname, ProcessToken.Hash, extraInfo, command, config.RuntimeCLIFlags.ConfiguredVersion, ) }
// WriteHostnameUnresolve upserts an entry in hostname_unresolve func WriteHostnameUnresolve(instanceKey *InstanceKey, unresolvedHostname string) error { writeFunc := func() error { _, err := db.ExecOrchestrator(` insert into hostname_unresolve ( hostname, unresolved_hostname, last_registered) values (?, ?, NOW()) on duplicate key update unresolved_hostname=values(unresolved_hostname), last_registered=now() `, instanceKey.Hostname, unresolvedHostname, ) if err != nil { return log.Errore(err) } _, err = db.ExecOrchestrator(` replace into hostname_unresolve_history ( hostname, unresolved_hostname, last_registered) values (?, ?, NOW()) `, instanceKey.Hostname, unresolvedHostname, ) writeUnresolvedHostnameCounter.Inc(1) return nil } return ExecDBWriteFunc(writeFunc) }
// UpdateClusterAliases writes down the cluster_alias table based on information // gained from database_instance func UpdateClusterAliases() error { writeFunc := func() error { _, err := db.ExecOrchestrator(` replace into cluster_alias (alias, cluster_name, last_registered) select suggested_cluster_alias, substring_index(group_concat( cluster_name order by ((last_checked <= last_seen) is true) desc, read_only asc, num_slave_hosts desc ), ',', 1) as cluster_name, NOW() from database_instance left join database_instance_downtime using (hostname, port) where suggested_cluster_alias!='' /* exclude newly demoted, downtimed masters */ and ifnull( database_instance_downtime.downtime_active = 1 and database_instance_downtime.end_timestamp > now() and database_instance_downtime.reason = ? , false) is false group by suggested_cluster_alias `, DowntimeLostInRecoveryMessage) return log.Errore(err) } if err := ExecDBWriteFunc(writeFunc); err != nil { return err } writeFunc = func() error { // Handling the case where no cluster alias exists: we write a dummy alias in the form of the real cluster name. _, err := db.ExecOrchestrator(` replace into cluster_alias (alias, cluster_name, last_registered) select cluster_name, cluster_name, now() from database_instance group by cluster_name having sum(suggested_cluster_alias = '') = count(*) `) return log.Errore(err) } if err := ExecDBWriteFunc(writeFunc); err != nil { return err } return nil }
// WriteLongRunningProcesses rewrites current state of long running processes for given instance func WriteLongRunningProcesses(instanceKey *InstanceKey, processes []Process) error { writeFunc := func() error { _, err := db.ExecOrchestrator(` delete from database_instance_long_running_queries where hostname = ? and port = ? `, instanceKey.Hostname, instanceKey.Port) if err != nil { return log.Errore(err) } for _, process := range processes { _, merr := db.ExecOrchestrator(` insert ignore into database_instance_long_running_queries ( hostname, port, process_id, process_started_at, process_user, process_host, process_db, process_command, process_time_seconds, process_state, process_info ) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, instanceKey.Hostname, instanceKey.Port, process.Id, process.StartedAt, process.User, process.Host, process.Db, process.Command, process.Time, process.State, process.Info, ) if merr != nil { err = merr } } if err != nil { return log.Errore(err) } return nil } return ExecDBWriteFunc(writeFunc) }
// auditInstanceAnalysisInChangelog will write down an instance's analysis in the database_instance_analysis_changelog table. // To not repeat recurring analysis code, the database_instance_last_analysis table is used, so that only changes to // analysis codes are written. func auditInstanceAnalysisInChangelog(instanceKey *InstanceKey, analysisCode AnalysisCode) error { if lastWrittenAnalysis, found := recentInstantAnalysis.Get(instanceKey.DisplayString()); found { if lastWrittenAnalysis == analysisCode { // Surely nothing new. // And let's expand the timeout recentInstantAnalysis.Set(instanceKey.DisplayString(), analysisCode, cache.DefaultExpiration) return nil } } // Passed the cache; but does database agree that there's a change? Here's a persistent cache; this comes here // to verify no two orchestrator services are doing this without coordinating (namely, one dies, the other taking its place // and has no familiarity of the former's cache) analysisChangeWriteAttemptCounter.Inc(1) sqlResult, err := db.ExecOrchestrator(` insert ignore into database_instance_last_analysis ( hostname, port, analysis_timestamp, analysis ) values ( ?, ?, now(), ? ) on duplicate key update analysis = values(analysis), analysis_timestamp = if(analysis = values(analysis), analysis_timestamp, values(analysis_timestamp)) `, instanceKey.Hostname, instanceKey.Port, string(analysisCode), ) if err != nil { return log.Errore(err) } rows, err := sqlResult.RowsAffected() if err != nil { return log.Errore(err) } recentInstantAnalysis.Set(instanceKey.DisplayString(), analysisCode, cache.DefaultExpiration) lastAnalysisChanged := (rows > 0) if !lastAnalysisChanged { return nil } _, err = db.ExecOrchestrator(` insert into database_instance_analysis_changelog ( hostname, port, analysis_timestamp, analysis ) values ( ?, ?, now(), ? ) `, instanceKey.Hostname, instanceKey.Port, string(analysisCode), ) if err == nil { analysisChangeWriteCounter.Inc(1) } return log.Errore(err) }
func (s *TestSuite) TestDiscover(c *C) { var err error _, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", masterKey.Hostname, masterKey.Port) _, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave1Key.Hostname, slave1Key.Port) _, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave2Key.Hostname, slave2Key.Port) _, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave3Key.Hostname, slave3Key.Port) _, found, _ := ReadInstance(&masterKey) c.Assert(found, Equals, false) _, _ = ReadTopologyInstance(&slave1Key) _, found, err = ReadInstance(&slave1Key) c.Assert(found, Equals, true) c.Assert(err, IsNil) }
// EnableRecovery ensures recoveries are enabled globally func EnableRecovery() error { _, err := db.ExecOrchestrator(` DELETE FROM global_recovery_disable `, ) return err }
// BeginDowntime will make mark an instance as downtimed (or override existing downtime period) func BeginDowntime(instanceKey *InstanceKey, owner string, reason string, durationSeconds uint) error { if durationSeconds == 0 { durationSeconds = config.Config.MaintenanceExpireMinutes * 60 } _, err := db.ExecOrchestrator(` insert into database_instance_downtime ( hostname, port, downtime_active, begin_timestamp, end_timestamp, owner, reason ) VALUES ( ?, ?, 1, NOW(), NOW() + INTERVAL ? SECOND, ?, ? ) on duplicate key update downtime_active=values(downtime_active), begin_timestamp=values(begin_timestamp), end_timestamp=values(end_timestamp), owner=values(owner), reason=values(reason) `, instanceKey.Hostname, instanceKey.Port, durationSeconds, owner, reason, ) if err != nil { return log.Errore(err) } AuditOperation("begin-downtime", instanceKey, fmt.Sprintf("owner: %s, reason: %s", owner, reason)) return nil }
// EndDowntime will remove downtime flag from an instance func EndDowntime(instanceKey *InstanceKey) error { res, err := db.ExecOrchestrator(` update database_instance_downtime set downtime_active = NULL, end_timestamp = NOW() where hostname = ? and port = ? and downtime_active = 1 `, instanceKey.Hostname, instanceKey.Port, ) if err != nil { return log.Errore(err) } if affected, _ := res.RowsAffected(); affected == 0 { err = fmt.Errorf("Instance is not in downtime mode: %+v", instanceKey) } else { // success AuditOperation("end-downtime", instanceKey, "") } return err }
// deleteHostnameResolves compeltely erases the database cache func deleteHostnameResolves() error { _, err := db.ExecOrchestrator(` delete from hostname_resolve`, ) return err }
// DeleteInvalidHostnameResolves removes invalid resolves. At this time these are: // - infinite loop resolves (A->B and B->A), remove earlier mapping func DeleteInvalidHostnameResolves() error { var invalidHostnames []string query := ` select early.hostname from hostname_resolve as latest join hostname_resolve early on (latest.resolved_hostname = early.hostname and latest.hostname = early.resolved_hostname) where latest.hostname != latest.resolved_hostname and latest.resolved_timestamp > early.resolved_timestamp ` err := db.QueryOrchestratorRowsMap(query, func(m sqlutils.RowMap) error { invalidHostnames = append(invalidHostnames, m.GetString("hostname")) return nil }) if err != nil { return err } for _, invalidHostname := range invalidHostnames { _, err = db.ExecOrchestrator(` delete from hostname_resolve where hostname = ?`, invalidHostname, ) log.Errore(err) } return err }
// DisableRecovery ensures recoveries are disabled globally func DisableRecovery() error { _, err := db.ExecOrchestrator(` INSERT IGNORE INTO global_recovery_disable (disable_recovery) VALUES (1) `, ) return err }
// ForgetLongUnseenAgents will remove entries of all agents that have long since been last seen. func ForgetLongUnseenAgents() error { _, err := db.ExecOrchestrator(` delete from host_agent where last_submitted < NOW() - interval ? hour`, config.Config.UnseenAgentForgetHours, ) return err }
// ForgetExpiredHostnameResolves func ForgetExpiredHostnameResolves() error { _, err := db.ExecOrchestrator(` delete from hostname_resolve where resolved_timestamp < NOW() - interval (? * 2) minute`, config.Config.ExpiryHostnameResolvesMinutes, ) return err }
// ExpireClusterDomainName expires cluster_domain_name entries that haven't been updated recently. func ExpireClusterDomainName() error { writeFunc := func() error { _, err := db.ExecOrchestrator(` delete from cluster_domain_name where last_registered < NOW() - INTERVAL ? MINUTE `, config.Config.ExpiryHostnameResolvesMinutes, ) return log.Errore(err) } return ExecDBWriteFunc(writeFunc) }
// ExpireMasterPositionEquivalence expires old master_position_equivalence func ExpireMasterPositionEquivalence() error { writeFunc := func() error { _, err := db.ExecOrchestrator(` delete from master_position_equivalence where last_suggested < NOW() - INTERVAL ? HOUR `, config.Config.UnseenInstanceForgetHours, ) return log.Errore(err) } return ExecDBWriteFunc(writeFunc) }
// DeregisterHostnameUnresolve removes an unresovle entry func DeregisterHostnameUnresolve(instanceKey *InstanceKey) error { writeFunc := func() error { _, err := db.ExecOrchestrator(` delete from hostname_unresolve where hostname=? `, instanceKey.Hostname, ) return log.Errore(err) } return ExecDBWriteFunc(writeFunc) }
// ExpireInstanceAnalysisChangelog removes old-enough analysis entries from the changelog func ExpireInstanceAnalysisChangelog() error { _, err := db.ExecOrchestrator(` delete from database_instance_analysis_changelog where analysis_timestamp < now() - interval ? hour `, config.Config.UnseenInstanceForgetHours, ) return log.Errore(err) }
// ExpireNodesHistory cleans up the nodes history func ExpireNodesHistory() error { _, err := db.ExecOrchestrator(` delete from node_health_history where first_seen_active < now() - interval ? hour `, config.Config.UnseenInstanceForgetHours, ) return log.Errore(err) }
// ExpirePoolInstances cleans up the database_instance_pool table from expired items func ExpirePoolInstances() error { _, err := db.ExecOrchestrator(` delete from database_instance_pool where registered_at < now() - interval ? minute `, config.Config.InstancePoolExpiryMinutes, ) return log.Errore(err) }
// ReplaceAliasClusterName replaces alis mapping of one cluster name onto a new cluster name. // Used in topology recovery func ReplaceAliasClusterName(oldClusterName string, newClusterName string) error { writeFunc := func() error { _, err := db.ExecOrchestrator(` update cluster_alias set cluster_name = ? where cluster_name = ? `, newClusterName, oldClusterName) return log.Errore(err) } return ExecDBWriteFunc(writeFunc) }
// ExpireAccessTokens removes old, known to be uneligible tokens func ExpireAccessTokens() error { _, err := db.ExecOrchestrator(` delete from access_token where generated_at < now() - interval ? minute and is_reentrant = 0 `, config.Config.AccessTokenExpiryMinutes, ) return log.Errore(err) }
// AuditOperation creates and writes a new audit entry by given params func AuditOperation(auditType string, instanceKey *InstanceKey, message string) error { if instanceKey == nil { instanceKey = &InstanceKey{} } clusterName := "" if instanceKey.Hostname != "" { clusterName, _ = GetClusterName(instanceKey) } if config.Config.AuditLogFile != "" { go func() error { f, err := os.OpenFile(config.Config.AuditLogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0600) if err != nil { return log.Errore(err) } defer f.Close() text := fmt.Sprintf("%s\t%s\t%s\t%d\t[%s]\t%s\t\n", time.Now().Format(log.TimeFormat), auditType, instanceKey.Hostname, instanceKey.Port, clusterName, message) if _, err = f.WriteString(text); err != nil { return log.Errore(err) } return nil }() } _, err := db.ExecOrchestrator(` insert into audit ( audit_timestamp, audit_type, hostname, port, cluster_name, message ) VALUES ( NOW(), ?, ?, ?, ?, ? ) `, auditType, instanceKey.Hostname, instanceKey.Port, clusterName, message, ) if err != nil { return log.Errore(err) } logMessage := fmt.Sprintf("auditType:%s instance:%s cluster:%s message:%s", auditType, instanceKey.DisplayString(), clusterName, message) if syslogWriter != nil { go func() { syslogWriter.Info(logMessage) }() } log.Debugf(logMessage) auditOperationCounter.Inc(1) return err }
// GrabElection forcibly grabs leadership. Use with care!! func GrabElection() error { _, err := db.ExecOrchestrator(` replace into active_node ( anchor, hostname, token, last_seen_active ) values ( 1, ?, ?, NOW() ) `, ThisHostname, ProcessToken.Hash, ) return log.Errore(err) }
// WriteClusterAlias will write (and override) a single cluster name mapping func WriteClusterAlias(clusterName string, alias string) error { writeFunc := func() error { _, err := db.ExecOrchestrator(` replace into cluster_alias (cluster_name, alias) values (?, ?) `, clusterName, alias) return log.Errore(err) } return ExecDBWriteFunc(writeFunc) }
// ExpireDowntime will remove the maintenance flag on old downtimes func ExpireDowntime() error { { res, err := db.ExecOrchestrator(` delete from database_instance_downtime where downtime_active is null and end_timestamp < NOW() - INTERVAL ? DAY `, config.Config.MaintenancePurgeDays, ) if err != nil { return log.Errore(err) } if rowsAffected, _ := res.RowsAffected(); rowsAffected > 0 { AuditOperation("expire-downtime", nil, fmt.Sprintf("Purged %d historical entries", rowsAffected)) } } { res, err := db.ExecOrchestrator(` update database_instance_downtime set downtime_active = NULL where downtime_active = 1 and end_timestamp < NOW() `, ) if err != nil { return log.Errore(err) } if rowsAffected, _ := res.RowsAffected(); rowsAffected > 0 { AuditOperation("expire-downtime", nil, fmt.Sprintf("Expired %d entries", rowsAffected)) } } return nil }
// expireAvailableNodes is an aggressive purging method to remove node entries who have skipped // their keepalive for two times func expireAvailableNodes() error { if !config.Config.NodeHealthExpiry { return nil } _, err := db.ExecOrchestrator(` delete from node_health where last_seen_active < now() - interval ? second `, registrationPollSeconds*2, ) return log.Errore(err) }
// ExpireAudit removes old rows from the audit table func ExpireAudit() error { writeFunc := func() error { _, err := db.ExecOrchestrator(` delete from audit where audit_timestamp < NOW() - INTERVAL ? DAY `, config.Config.AuditPurgeDays, ) return err } return ExecDBWriteFunc(writeFunc) }
// The test also assumes one backend MySQL server. func (s *TestSuite) SetUpSuite(c *C) { config.Config.MySQLTopologyUser = "******" config.Config.MySQLTopologyPassword = "******" config.Config.MySQLOrchestratorHost = "127.0.0.1" config.Config.MySQLOrchestratorPort = 5622 config.Config.MySQLOrchestratorDatabase = "orchestrator" config.Config.MySQLOrchestratorUser = "******" config.Config.MySQLOrchestratorPassword = "******" config.Config.DiscoverByShowSlaveHosts = true _, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", masterKey.Hostname, masterKey.Port) _, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave1Key.Hostname, slave1Key.Port) _, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave2Key.Hostname, slave2Key.Port) _, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave3Key.Hostname, slave3Key.Port) ExecInstance(&masterKey, "drop database if exists orchestrator_test") ExecInstance(&masterKey, "create database orchestrator_test") ExecInstance(&masterKey, `create table orchestrator_test.test_table( name varchar(128) charset ascii not null primary key, value varchar(128) charset ascii not null )`) rand.Seed(time.Now().UTC().UnixNano()) }
// ExpireAsyncRequests will mark "lost" entries as being completed func ExpireAsyncRequests() error { _, err := db.ExecOrchestrator(` update async_request set end_timestamp = NOW() where end_timestamp IS NULL and begin_timestamp < NOW() - INTERVAL ? MINUTE `, config.Config.MaintenanceExpireMinutes, ) if err != nil { log.Errore(err) } _, err = db.ExecOrchestrator(` delete from async_request where end_timestamp IS NOT NULL and begin_timestamp < NOW() - INTERVAL ? DAY `, config.Config.MaintenancePurgeDays, ) return log.Errore(err) }