Ejemplo n.º 1
0
// RegisterNode writes down this node in the node_health table
func RegisterNode(extraInfo string, command string, firstTime bool) (sql.Result, error) {
	if firstTime {
		db.ExecOrchestrator(`
			insert ignore into node_health_history
				(hostname, token, first_seen_active, extra_info, command, app_version)
			values
				(?, ?, NOW(), ?, ?, ?)
			`,
			ThisHostname, ProcessToken.Hash, extraInfo, command,
			config.RuntimeCLIFlags.ConfiguredVersion,
		)
	}
	return db.ExecOrchestrator(`
			insert into node_health
				(hostname, token, last_seen_active, extra_info, command, app_version)
			values
				(?, ?, NOW(), ?, ?, ?)
			on duplicate key update
				token=values(token),
				last_seen_active=values(last_seen_active),
				extra_info=if(values(extra_info) != '', values(extra_info), extra_info),
				app_version=values(app_version)
			`,
		ThisHostname, ProcessToken.Hash, extraInfo, command,
		config.RuntimeCLIFlags.ConfiguredVersion,
	)
}
Ejemplo n.º 2
0
// WriteHostnameUnresolve upserts an entry in hostname_unresolve
func WriteHostnameUnresolve(instanceKey *InstanceKey, unresolvedHostname string) error {
	writeFunc := func() error {
		_, err := db.ExecOrchestrator(`
        	insert into hostname_unresolve (
        		hostname,
        		unresolved_hostname,
        		last_registered)
        	values (?, ?, NOW())
        	on duplicate key update
        		unresolved_hostname=values(unresolved_hostname),
        		last_registered=now()
				`, instanceKey.Hostname, unresolvedHostname,
		)
		if err != nil {
			return log.Errore(err)
		}
		_, err = db.ExecOrchestrator(`
	        	replace into hostname_unresolve_history (
        		hostname,
        		unresolved_hostname,
        		last_registered)
        	values (?, ?, NOW())
				`, instanceKey.Hostname, unresolvedHostname,
		)
		writeUnresolvedHostnameCounter.Inc(1)
		return nil
	}
	return ExecDBWriteFunc(writeFunc)
}
Ejemplo n.º 3
0
// UpdateClusterAliases writes down the cluster_alias table based on information
// gained from database_instance
func UpdateClusterAliases() error {
	writeFunc := func() error {
		_, err := db.ExecOrchestrator(`
			replace into
					cluster_alias (alias, cluster_name, last_registered)
				select
				    suggested_cluster_alias,
						substring_index(group_concat(
							cluster_name order by
								((last_checked <= last_seen) is true) desc,
								read_only asc,
								num_slave_hosts desc
							), ',', 1) as cluster_name,
				    NOW()
				  from
				    database_instance
				    left join database_instance_downtime using (hostname, port)
				  where
				    suggested_cluster_alias!=''
						/* exclude newly demoted, downtimed masters */
						and ifnull(
								database_instance_downtime.downtime_active = 1
								and database_instance_downtime.end_timestamp > now()
								and database_instance_downtime.reason = ?
							, false) is false
				  group by
				    suggested_cluster_alias
			`, DowntimeLostInRecoveryMessage)
		return log.Errore(err)
	}
	if err := ExecDBWriteFunc(writeFunc); err != nil {
		return err
	}
	writeFunc = func() error {
		// Handling the case where no cluster alias exists: we write a dummy alias in the form of the real cluster name.
		_, err := db.ExecOrchestrator(`
			replace into
					cluster_alias (alias, cluster_name, last_registered)
				select
						cluster_name, cluster_name, now()
				  from
				    database_instance
				  group by
				    cluster_name
					having
						sum(suggested_cluster_alias = '') = count(*)
			`)
		return log.Errore(err)
	}
	if err := ExecDBWriteFunc(writeFunc); err != nil {
		return err
	}
	return nil
}
Ejemplo n.º 4
0
// WriteLongRunningProcesses rewrites current state of long running processes for given instance
func WriteLongRunningProcesses(instanceKey *InstanceKey, processes []Process) error {
	writeFunc := func() error {
		_, err := db.ExecOrchestrator(`
			delete from
					database_instance_long_running_queries
				where
					hostname = ?
					and port = ?
			`,
			instanceKey.Hostname,
			instanceKey.Port)
		if err != nil {
			return log.Errore(err)
		}

		for _, process := range processes {
			_, merr := db.ExecOrchestrator(`
	        	insert ignore into database_instance_long_running_queries (
	        		hostname,
	        		port,
	        		process_id,
	        		process_started_at,
					process_user,
					process_host,
					process_db,
					process_command,
					process_time_seconds,
					process_state,
					process_info
				) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
				instanceKey.Hostname,
				instanceKey.Port,
				process.Id,
				process.StartedAt,
				process.User,
				process.Host,
				process.Db,
				process.Command,
				process.Time,
				process.State,
				process.Info,
			)
			if merr != nil {
				err = merr
			}
		}
		if err != nil {
			return log.Errore(err)
		}

		return nil
	}
	return ExecDBWriteFunc(writeFunc)
}
Ejemplo n.º 5
0
// auditInstanceAnalysisInChangelog will write down an instance's analysis in the database_instance_analysis_changelog table.
// To not repeat recurring analysis code, the database_instance_last_analysis table is used, so that only changes to
// analysis codes are written.
func auditInstanceAnalysisInChangelog(instanceKey *InstanceKey, analysisCode AnalysisCode) error {
	if lastWrittenAnalysis, found := recentInstantAnalysis.Get(instanceKey.DisplayString()); found {
		if lastWrittenAnalysis == analysisCode {
			// Surely nothing new.
			// And let's expand the timeout
			recentInstantAnalysis.Set(instanceKey.DisplayString(), analysisCode, cache.DefaultExpiration)
			return nil
		}
	}
	// Passed the cache; but does database agree that there's a change? Here's a persistent cache; this comes here
	// to verify no two orchestrator services are doing this without coordinating (namely, one dies, the other taking its place
	// and has no familiarity of the former's cache)
	analysisChangeWriteAttemptCounter.Inc(1)
	sqlResult, err := db.ExecOrchestrator(`
			insert ignore into database_instance_last_analysis (
					hostname, port, analysis_timestamp, analysis
				) values (
					?, ?, now(), ?
				) on duplicate key update
					analysis = values(analysis),
					analysis_timestamp = if(analysis = values(analysis), analysis_timestamp, values(analysis_timestamp))
			`,
		instanceKey.Hostname, instanceKey.Port, string(analysisCode),
	)
	if err != nil {
		return log.Errore(err)
	}
	rows, err := sqlResult.RowsAffected()
	if err != nil {
		return log.Errore(err)
	}
	recentInstantAnalysis.Set(instanceKey.DisplayString(), analysisCode, cache.DefaultExpiration)
	lastAnalysisChanged := (rows > 0)

	if !lastAnalysisChanged {
		return nil
	}

	_, err = db.ExecOrchestrator(`
			insert into database_instance_analysis_changelog (
					hostname, port, analysis_timestamp, analysis
				) values (
					?, ?, now(), ?
				)
			`,
		instanceKey.Hostname, instanceKey.Port, string(analysisCode),
	)
	if err == nil {
		analysisChangeWriteCounter.Inc(1)
	}
	return log.Errore(err)
}
Ejemplo n.º 6
0
func (s *TestSuite) TestDiscover(c *C) {
	var err error
	_, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", masterKey.Hostname, masterKey.Port)
	_, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave1Key.Hostname, slave1Key.Port)
	_, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave2Key.Hostname, slave2Key.Port)
	_, err = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave3Key.Hostname, slave3Key.Port)
	_, found, _ := ReadInstance(&masterKey)
	c.Assert(found, Equals, false)
	_, _ = ReadTopologyInstance(&slave1Key)
	_, found, err = ReadInstance(&slave1Key)
	c.Assert(found, Equals, true)
	c.Assert(err, IsNil)
}
Ejemplo n.º 7
0
// EnableRecovery ensures recoveries are enabled globally
func EnableRecovery() error {
	_, err := db.ExecOrchestrator(`
		DELETE FROM global_recovery_disable
	`,
	)
	return err
}
Ejemplo n.º 8
0
// BeginDowntime will make mark an instance as downtimed (or override existing downtime period)
func BeginDowntime(instanceKey *InstanceKey, owner string, reason string, durationSeconds uint) error {
	if durationSeconds == 0 {
		durationSeconds = config.Config.MaintenanceExpireMinutes * 60
	}
	_, err := db.ExecOrchestrator(`
			insert 
				into database_instance_downtime (
					hostname, port, downtime_active, begin_timestamp, end_timestamp, owner, reason
				) VALUES (
					?, ?, 1, NOW(), NOW() + INTERVAL ? SECOND, ?, ?
				)
				on duplicate key update
					downtime_active=values(downtime_active),
					begin_timestamp=values(begin_timestamp),
					end_timestamp=values(end_timestamp),
					owner=values(owner),
					reason=values(reason)
			`,
		instanceKey.Hostname,
		instanceKey.Port,
		durationSeconds,
		owner,
		reason,
	)
	if err != nil {
		return log.Errore(err)
	}

	AuditOperation("begin-downtime", instanceKey, fmt.Sprintf("owner: %s, reason: %s", owner, reason))

	return nil
}
Ejemplo n.º 9
0
// EndDowntime will remove downtime flag from an instance
func EndDowntime(instanceKey *InstanceKey) error {
	res, err := db.ExecOrchestrator(`
			update
				database_instance_downtime
			set  
				downtime_active = NULL,
				end_timestamp = NOW()
			where
				hostname = ? 
				and port = ?
				and downtime_active = 1
			`,
		instanceKey.Hostname,
		instanceKey.Port,
	)
	if err != nil {
		return log.Errore(err)
	}

	if affected, _ := res.RowsAffected(); affected == 0 {
		err = fmt.Errorf("Instance is not in downtime mode: %+v", instanceKey)
	} else {
		// success
		AuditOperation("end-downtime", instanceKey, "")
	}
	return err
}
Ejemplo n.º 10
0
// deleteHostnameResolves compeltely erases the database cache
func deleteHostnameResolves() error {
	_, err := db.ExecOrchestrator(`
			delete
				from hostname_resolve`,
	)
	return err
}
Ejemplo n.º 11
0
// DeleteInvalidHostnameResolves removes invalid resolves. At this time these are:
// - infinite loop resolves (A->B and B->A), remove earlier mapping
func DeleteInvalidHostnameResolves() error {
	var invalidHostnames []string

	query := `
		select
		    early.hostname
		  from
		    hostname_resolve as latest
		    join hostname_resolve early on (latest.resolved_hostname = early.hostname and latest.hostname = early.resolved_hostname)
		  where
		    latest.hostname != latest.resolved_hostname
		    and latest.resolved_timestamp > early.resolved_timestamp
	   	`

	err := db.QueryOrchestratorRowsMap(query, func(m sqlutils.RowMap) error {
		invalidHostnames = append(invalidHostnames, m.GetString("hostname"))
		return nil
	})
	if err != nil {
		return err
	}

	for _, invalidHostname := range invalidHostnames {
		_, err = db.ExecOrchestrator(`
			delete
				from hostname_resolve
			where
				hostname = ?`,
			invalidHostname,
		)
		log.Errore(err)
	}
	return err
}
Ejemplo n.º 12
0
// DisableRecovery ensures recoveries are disabled globally
func DisableRecovery() error {
	_, err := db.ExecOrchestrator(`
		INSERT IGNORE INTO global_recovery_disable
			(disable_recovery)
		VALUES  (1)
	`,
	)
	return err
}
Ejemplo n.º 13
0
// ForgetLongUnseenAgents will remove entries of all agents that have long since been last seen.
func ForgetLongUnseenAgents() error {
	_, err := db.ExecOrchestrator(`
			delete
				from host_agent
			where
				last_submitted < NOW() - interval ? hour`,
		config.Config.UnseenAgentForgetHours,
	)
	return err
}
Ejemplo n.º 14
0
// ForgetExpiredHostnameResolves
func ForgetExpiredHostnameResolves() error {
	_, err := db.ExecOrchestrator(`
			delete
				from hostname_resolve
			where
				resolved_timestamp < NOW() - interval (? * 2) minute`,
		config.Config.ExpiryHostnameResolvesMinutes,
	)
	return err
}
Ejemplo n.º 15
0
// ExpireClusterDomainName expires cluster_domain_name entries that haven't been updated recently.
func ExpireClusterDomainName() error {
	writeFunc := func() error {
		_, err := db.ExecOrchestrator(`
    	delete from cluster_domain_name
				where last_registered < NOW() - INTERVAL ? MINUTE
				`, config.Config.ExpiryHostnameResolvesMinutes,
		)
		return log.Errore(err)
	}
	return ExecDBWriteFunc(writeFunc)
}
Ejemplo n.º 16
0
// ExpireMasterPositionEquivalence expires old master_position_equivalence
func ExpireMasterPositionEquivalence() error {
	writeFunc := func() error {
		_, err := db.ExecOrchestrator(`
        	delete from master_position_equivalence 
				where last_suggested < NOW() - INTERVAL ? HOUR
				`, config.Config.UnseenInstanceForgetHours,
		)
		return log.Errore(err)
	}
	return ExecDBWriteFunc(writeFunc)
}
Ejemplo n.º 17
0
// DeregisterHostnameUnresolve removes an unresovle entry
func DeregisterHostnameUnresolve(instanceKey *InstanceKey) error {
	writeFunc := func() error {
		_, err := db.ExecOrchestrator(`
        	delete from hostname_unresolve
				where hostname=?
				`, instanceKey.Hostname,
		)
		return log.Errore(err)
	}
	return ExecDBWriteFunc(writeFunc)
}
Ejemplo n.º 18
0
// ExpireInstanceAnalysisChangelog removes old-enough analysis entries from the changelog
func ExpireInstanceAnalysisChangelog() error {
	_, err := db.ExecOrchestrator(`
			delete
				from database_instance_analysis_changelog
			where
				analysis_timestamp < now() - interval ? hour
			`,
		config.Config.UnseenInstanceForgetHours,
	)
	return log.Errore(err)
}
Ejemplo n.º 19
0
// ExpireNodesHistory cleans up the nodes history
func ExpireNodesHistory() error {
	_, err := db.ExecOrchestrator(`
			delete
				from node_health_history
			where
				first_seen_active < now() - interval ? hour
			`,
		config.Config.UnseenInstanceForgetHours,
	)
	return log.Errore(err)
}
Ejemplo n.º 20
0
// ExpirePoolInstances cleans up the database_instance_pool table from expired items
func ExpirePoolInstances() error {
	_, err := db.ExecOrchestrator(`
			delete
				from database_instance_pool
			where
				registered_at < now() - interval ? minute
			`,
		config.Config.InstancePoolExpiryMinutes,
	)
	return log.Errore(err)
}
Ejemplo n.º 21
0
// ReplaceAliasClusterName replaces alis mapping of one cluster name onto a new cluster name.
// Used in topology recovery
func ReplaceAliasClusterName(oldClusterName string, newClusterName string) error {
	writeFunc := func() error {
		_, err := db.ExecOrchestrator(`
			update cluster_alias
				set cluster_name = ?
				where cluster_name = ?
			`,
			newClusterName, oldClusterName)
		return log.Errore(err)
	}
	return ExecDBWriteFunc(writeFunc)
}
Ejemplo n.º 22
0
// ExpireAccessTokens removes old, known to be uneligible tokens
func ExpireAccessTokens() error {
	_, err := db.ExecOrchestrator(`
			delete
				from access_token
			where
				generated_at < now() - interval ? minute
				and is_reentrant = 0
			`,
		config.Config.AccessTokenExpiryMinutes,
	)
	return log.Errore(err)
}
Ejemplo n.º 23
0
// AuditOperation creates and writes a new audit entry by given params
func AuditOperation(auditType string, instanceKey *InstanceKey, message string) error {

	if instanceKey == nil {
		instanceKey = &InstanceKey{}
	}
	clusterName := ""
	if instanceKey.Hostname != "" {
		clusterName, _ = GetClusterName(instanceKey)
	}

	if config.Config.AuditLogFile != "" {
		go func() error {
			f, err := os.OpenFile(config.Config.AuditLogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0600)
			if err != nil {
				return log.Errore(err)
			}

			defer f.Close()
			text := fmt.Sprintf("%s\t%s\t%s\t%d\t[%s]\t%s\t\n", time.Now().Format(log.TimeFormat), auditType, instanceKey.Hostname, instanceKey.Port, clusterName, message)
			if _, err = f.WriteString(text); err != nil {
				return log.Errore(err)
			}
			return nil
		}()
	}
	_, err := db.ExecOrchestrator(`
			insert 
				into audit (
					audit_timestamp, audit_type, hostname, port, cluster_name, message
				) VALUES (
					NOW(), ?, ?, ?, ?, ?
				)
			`,
		auditType,
		instanceKey.Hostname,
		instanceKey.Port,
		clusterName,
		message,
	)
	if err != nil {
		return log.Errore(err)
	}
	logMessage := fmt.Sprintf("auditType:%s instance:%s cluster:%s message:%s", auditType, instanceKey.DisplayString(), clusterName, message)
	if syslogWriter != nil {
		go func() {
			syslogWriter.Info(logMessage)
		}()
	}
	log.Debugf(logMessage)
	auditOperationCounter.Inc(1)

	return err
}
Ejemplo n.º 24
0
// GrabElection forcibly grabs leadership. Use with care!!
func GrabElection() error {
	_, err := db.ExecOrchestrator(`
			replace into active_node (
					anchor, hostname, token, last_seen_active
				) values (
					1, ?, ?, NOW()
				)
			`,
		ThisHostname, ProcessToken.Hash,
	)
	return log.Errore(err)
}
Ejemplo n.º 25
0
// WriteClusterAlias will write (and override) a single cluster name mapping
func WriteClusterAlias(clusterName string, alias string) error {
	writeFunc := func() error {
		_, err := db.ExecOrchestrator(`
			replace into
					cluster_alias (cluster_name, alias)
				values
					(?, ?)
			`,
			clusterName, alias)
		return log.Errore(err)
	}
	return ExecDBWriteFunc(writeFunc)
}
Ejemplo n.º 26
0
// ExpireDowntime will remove the maintenance flag on old downtimes
func ExpireDowntime() error {
	{
		res, err := db.ExecOrchestrator(`
			delete from
				database_instance_downtime
			where
				downtime_active is null
				and end_timestamp < NOW() - INTERVAL ? DAY 
			`,
			config.Config.MaintenancePurgeDays,
		)
		if err != nil {
			return log.Errore(err)
		}
		if rowsAffected, _ := res.RowsAffected(); rowsAffected > 0 {
			AuditOperation("expire-downtime", nil, fmt.Sprintf("Purged %d historical entries", rowsAffected))
		}
	}
	{
		res, err := db.ExecOrchestrator(`
			update
				database_instance_downtime
			set  
				downtime_active = NULL				
			where
				downtime_active = 1
				and end_timestamp < NOW() 
			`,
		)
		if err != nil {
			return log.Errore(err)
		}
		if rowsAffected, _ := res.RowsAffected(); rowsAffected > 0 {
			AuditOperation("expire-downtime", nil, fmt.Sprintf("Expired %d entries", rowsAffected))
		}
	}

	return nil
}
Ejemplo n.º 27
0
// expireAvailableNodes is an aggressive purging method to remove node entries who have skipped
// their keepalive for two times
func expireAvailableNodes() error {
	if !config.Config.NodeHealthExpiry {
		return nil
	}
	_, err := db.ExecOrchestrator(`
			delete
				from node_health
			where
				last_seen_active < now() - interval ? second
			`,
		registrationPollSeconds*2,
	)
	return log.Errore(err)
}
Ejemplo n.º 28
0
// ExpireAudit removes old rows from the audit table
func ExpireAudit() error {
	writeFunc := func() error {
		_, err := db.ExecOrchestrator(`
 		delete from
				audit
			where
				audit_timestamp < NOW() - INTERVAL ? DAY 
			`,
			config.Config.AuditPurgeDays,
		)
		return err
	}
	return ExecDBWriteFunc(writeFunc)
}
Ejemplo n.º 29
0
// The test also assumes one backend MySQL server.
func (s *TestSuite) SetUpSuite(c *C) {
	config.Config.MySQLTopologyUser = "******"
	config.Config.MySQLTopologyPassword = "******"
	config.Config.MySQLOrchestratorHost = "127.0.0.1"
	config.Config.MySQLOrchestratorPort = 5622
	config.Config.MySQLOrchestratorDatabase = "orchestrator"
	config.Config.MySQLOrchestratorUser = "******"
	config.Config.MySQLOrchestratorPassword = "******"
	config.Config.DiscoverByShowSlaveHosts = true

	_, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", masterKey.Hostname, masterKey.Port)
	_, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave1Key.Hostname, slave1Key.Port)
	_, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave2Key.Hostname, slave2Key.Port)
	_, _ = db.ExecOrchestrator("delete from database_instance where hostname = ? and port = ?", slave3Key.Hostname, slave3Key.Port)

	ExecInstance(&masterKey, "drop database if exists orchestrator_test")
	ExecInstance(&masterKey, "create database orchestrator_test")
	ExecInstance(&masterKey, `create table orchestrator_test.test_table(
			name    varchar(128) charset ascii not null primary key,
			value   varchar(128) charset ascii not null
		)`)
	rand.Seed(time.Now().UTC().UnixNano())
}
Ejemplo n.º 30
0
// ExpireAsyncRequests will mark "lost" entries as being completed
func ExpireAsyncRequests() error {
	_, err := db.ExecOrchestrator(`
			update
				async_request
			set
				end_timestamp = NOW()
			where
				end_timestamp IS NULL
				and begin_timestamp < NOW() - INTERVAL ? MINUTE
			`, config.Config.MaintenanceExpireMinutes,
	)
	if err != nil {
		log.Errore(err)
	}
	_, err = db.ExecOrchestrator(`
			delete from
				async_request
			where
				end_timestamp IS NOT NULL
				and begin_timestamp < NOW() - INTERVAL ? DAY
			`, config.Config.MaintenancePurgeDays,
	)
	return log.Errore(err)
}