// WriteLongRunningProcesses rewrites current state of long running processes for given instance func WriteLongRunningProcesses(instanceKey *InstanceKey, processes []Process) error { writeFunc := func() error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` delete from database_instance_long_running_queries where hostname = ? and port = ? `, instanceKey.Hostname, instanceKey.Port) if err != nil { return log.Errore(err) } for _, process := range processes { _, merr := sqlutils.Exec(db, ` insert into database_instance_long_running_queries ( hostname, port, process_id, process_started_at, process_user, process_host, process_db, process_command, process_time_seconds, process_state, process_info ) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, instanceKey.Hostname, instanceKey.Port, process.Id, process.StartedAt, process.User, process.Host, process.Db, process.Command, process.Time, process.State, process.Info, ) if merr != nil { err = merr } } if err != nil { return log.Errore(err) } return nil } return ExecDBWriteFunc(writeFunc) }
// FailStaleSeeds marks as failed seeds where no progress have been seen recently func FailStaleSeeds() error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` update agent_seed set is_complete=1, is_successful=0 where is_complete=0 and ( select max(state_timestamp) as last_state_timestamp from agent_seed_state where agent_seed.agent_seed_id = agent_seed_state.agent_seed_id ) < now() - interval ? minute`, config.Config.StaleSeedFailMinutes, ) return err }
// EndDowntime will remove downtime flag from an instance func EndDowntime(instanceKey *InstanceKey) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } res, err := sqlutils.Exec(db, ` update database_instance_downtime set downtime_active = NULL, end_timestamp = NOW() where hostname = ? and port = ? and downtime_active = 1 `, instanceKey.Hostname, instanceKey.Port, ) if err != nil { return log.Errore(err) } if affected, _ := res.RowsAffected(); affected == 0 { err = fmt.Errorf("Instance is not in downtime mode: %+v", instanceKey) } else { // success AuditOperation("end-downtime", instanceKey, "") } return err }
// submitSeedStateEntry submits a seed state: a single step in the overall seed process func submitSeedStateEntry(seedId int64, action string, errorMessage string) (int64, error) { db, err := db.OpenOrchestrator() if err != nil { return 0, log.Errore(err) } res, err := sqlutils.Exec(db, ` insert into agent_seed_state ( agent_seed_id, state_timestamp, state_action, error_message ) VALUES ( ?, NOW(), ?, ? ) `, seedId, action, errorMessage, ) if err != nil { return 0, log.Errore(err) } id, err := res.LastInsertId() return id, err }
// UpdateAgentInfo updates some agent state in backend table func UpdateAgentInfo(hostname string, agent Agent) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` update host_agent set last_seen = NOW(), mysql_port = ?, count_mysql_snapshots = ? where hostname = ?`, agent.MySQLPort, len(agent.LogicalVolumes), hostname, ) if err != nil { return log.Errore(err) } return nil }
// WriteChangelog writes a value to the changelog table. // It returns the hint as given, for convenience func (this *Applier) WriteChangelog(hint, value string) (string, error) { explicitId := 0 switch hint { case "heartbeat": explicitId = 1 case "state": explicitId = 2 case "throttle": explicitId = 3 } query := fmt.Sprintf(` insert /* gh-ost */ into %s.%s (id, hint, value) values (NULLIF(?, 0), ?, ?) on duplicate key update last_update=NOW(), value=VALUES(value) `, sql.EscapeName(this.migrationContext.DatabaseName), sql.EscapeName(this.migrationContext.GetChangelogTableName()), ) _, err := sqlutils.Exec(this.db, query, explicitId, hint, value) return hint, err }
// WriteClusterDomainName will write (and override) the domain name of a cluster func WriteClusterDomainName(clusterName string, domainName string) error { writeFunc := func() error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` insert into cluster_domain_name (cluster_name, domain_name, last_registered) values (?, ?, NOW()) on duplicate key update domain_name=values(domain_name), last_registered=values(last_registered) `, clusterName, domainName) if err != nil { return log.Errore(err) } return nil } return ExecDBWriteFunc(writeFunc) }
// AttemptElection tries to grab leadership (become active node) func AttemptElection() (bool, error) { db, err := db.OpenOrchestrator() if err != nil { return false, log.Errore(err) } sqlResult, err := sqlutils.Exec(db, ` update active_node set hostname = ?, token = ?, last_seen_active = now() where anchor = 1 and ( last_seen_active < now() - interval ? second or hostname = '' or (hostname = ? and token = ?) ) `, ThisHostname, ProcessToken.Hash, config.Config.ActiveNodeExpireSeconds, ThisHostname, ProcessToken.Hash, ) if err != nil { return false, log.Errore(err) } rows, err := sqlResult.RowsAffected() return (rows > 0), err }
// ApplyIterationInsertQuery issues a chunk-INSERT query on the ghost table. It is where // data actually gets copied from original table. func (this *Applier) ApplyIterationInsertQuery() (chunkSize int64, rowsAffected int64, duration time.Duration, err error) { startTime := time.Now() chunkSize = atomic.LoadInt64(&this.migrationContext.ChunkSize) query, explodedArgs, err := sql.BuildRangeInsertPreparedQuery( this.migrationContext.DatabaseName, this.migrationContext.OriginalTableName, this.migrationContext.GetGhostTableName(), this.migrationContext.SharedColumns.Names, this.migrationContext.MappedSharedColumns.Names, this.migrationContext.UniqueKey.Name, this.migrationContext.UniqueKey.Columns.Names, this.migrationContext.MigrationIterationRangeMinValues.AbstractValues(), this.migrationContext.MigrationIterationRangeMaxValues.AbstractValues(), this.migrationContext.GetIteration() == 0, this.migrationContext.IsTransactionalTable(), ) if err != nil { return chunkSize, rowsAffected, duration, err } sqlResult, err := sqlutils.Exec(this.db, query, explodedArgs...) if err != nil { return chunkSize, rowsAffected, duration, err } rowsAffected, _ = sqlResult.RowsAffected() duration = time.Now().Sub(startTime) log.Debugf( "Issued INSERT on range: [%s]..[%s]; iteration: %d; chunk-size: %d", this.migrationContext.MigrationIterationRangeMinValues, this.migrationContext.MigrationIterationRangeMaxValues, this.migrationContext.GetIteration(), chunkSize) return chunkSize, rowsAffected, duration, nil }
// EndMaintenance will terminate an active maintenance via maintenanceToken func EndMaintenance(maintenanceToken int64) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } res, err := sqlutils.Exec(db, ` update database_instance_maintenance set maintenance_active = NULL, end_timestamp = NOW() where database_instance_maintenance_id = ? `, maintenanceToken, ) if err != nil { return log.Errore(err) } if affected, _ := res.RowsAffected(); affected == 0 { err = fmt.Errorf("Instance is not in maintenance mode; token = %+v", maintenanceToken) } else { // success instanceKey, _ := ReadMaintenanceInstanceKey(maintenanceToken) AuditOperation("end-maintenance", instanceKey, fmt.Sprintf("maintenanceToken: %d", maintenanceToken)) } return err }
// ExecOrchestrator will execute given query on the orchestrator backend database. func ExecOrchestrator(query string, args ...interface{}) (sql.Result, error) { db, err := OpenOrchestrator() if err != nil { return nil, err } res, err := sqlutils.Exec(db, query, args...) return res, err }
// ExecInstance executes a given query on the given MySQL topology instance func ExecInstance(instanceKey *InstanceKey, query string, args ...interface{}) (sql.Result, error) { db, err := db.OpenTopology(instanceKey.Hostname, instanceKey.Port) if err != nil { return nil, err } res, err := sqlutils.Exec(db, query, args...) return res, err }
// ExpireDowntime will remove the maintenance flag on old downtimes func ExpireDowntime() error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } { res, err := sqlutils.Exec(db, ` delete from database_instance_downtime where downtime_active is null and end_timestamp < NOW() - INTERVAL ? DAY `, config.Config.MaintenancePurgeDays, ) if err != nil { return log.Errore(err) } if rowsAffected, _ := res.RowsAffected(); rowsAffected > 0 { AuditOperation("expire-downtime", nil, fmt.Sprintf("Purged %d historical entries", rowsAffected)) } } { res, err := sqlutils.Exec(db, ` update database_instance_downtime set downtime_active = NULL where downtime_active = 1 and end_timestamp < NOW() `, ) if err != nil { return log.Errore(err) } if rowsAffected, _ := res.RowsAffected(); rowsAffected > 0 { AuditOperation("expire-downtime", nil, fmt.Sprintf("Expired %d entries", rowsAffected)) } } return err }
// ExecOrchestrator will execute given query on the orchestrator backend database. func ExecOrchestrator(query string, args ...interface{}) (sql.Result, error) { if config.Config.DatabaselessMode__experimental { return DummySqlResult{}, nil } db, err := OpenOrchestrator() if err != nil { return nil, err } res, err := sqlutils.Exec(db, query, args...) return res, err }
// ApplyDMLEventQuery writes an entry to the ghost table, in response to an intercepted // original-table binlog event func (this *Applier) ApplyDMLEventQuery(dmlEvent *binlog.BinlogDMLEvent) error { query, args, rowDelta, err := this.buildDMLEventQuery(dmlEvent) if err != nil { return err } _, err = sqlutils.Exec(this.db, query, args...) if err == nil { atomic.AddInt64(&this.migrationContext.TotalDMLEventsApplied, 1) } if this.migrationContext.CountTableRows { atomic.AddInt64(&this.migrationContext.RowsEstimate, rowDelta) } return err }
// ForgetLongUnseenAgents will remove entries of all agents that have long since been last seen. func ForgetLongUnseenAgents() error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` delete from host_agent where last_submitted < NOW() - interval ? hour`, config.Config.UnseenAgentForgetHours, ) return err }
// AttemptRecoveryRegistration tries to add a recovery entry; if this fails that means recovery is already in place. func AttemptRecoveryRegistration(analysisEntry *inst.ReplicationAnalysis) (bool, error) { db, err := db.OpenOrchestrator() if err != nil { return false, log.Errore(err) } sqlResult, err := sqlutils.Exec(db, ` insert ignore into topology_recovery ( hostname, port, in_active_period, start_active_period, end_active_period_unixtime, processing_node_hostname, processcing_node_token, analysis, cluster_name, cluster_alias, count_affected_slaves, slave_hosts ) values ( ?, ?, 1, NOW(), 0, ?, ?, ?, ?, ?, ?, ? ) `, analysisEntry.AnalyzedInstanceKey.Hostname, analysisEntry.AnalyzedInstanceKey.Port, ThisHostname, ProcessToken.Hash, string(analysisEntry.Analysis), analysisEntry.ClusterDetails.ClusterName, analysisEntry.ClusterDetails.ClusterAlias, analysisEntry.CountSlaves, analysisEntry.GetSlaveHostsAsString(), ) if err != nil { return false, log.Errore(err) } rows, err := sqlResult.RowsAffected() return (err == nil && rows > 0), err }
// AuditOperation creates and writes a new audit entry by given params func AuditOperation(auditType string, instanceKey *InstanceKey, message string) error { if instanceKey == nil { instanceKey = &InstanceKey{} } if config.Config.AuditLogFile != "" { f, err := os.OpenFile(config.Config.AuditLogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0600) if err != nil { return log.Errore(err) } defer f.Close() text := fmt.Sprintf("%s\t%s\t%s\t%d\t%s\t\n", time.Now().Format(log.TimeFormat), auditType, instanceKey.Hostname, instanceKey.Port, message) if _, err = f.WriteString(text); err != nil { return log.Errore(err) } } db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` insert into audit ( audit_timestamp, audit_type, hostname, port, message ) VALUES ( NOW(), ?, ?, ?, ? ) `, auditType, instanceKey.Hostname, instanceKey.Port, message, ) if err != nil { return log.Errore(err) } return err }
// HealthTest attempts to write to the backend database and get a result func HealthTest() (*HealthStatus, error) { health := HealthStatus{Healthy: false, Hostname: ThisHostname, Token: ProcessToken.Hash} db, err := db.OpenOrchestrator() if err != nil { health.Error = err return &health, log.Errore(err) } sqlResult, err := sqlutils.Exec(db, ` insert into node_health (hostname, token, last_seen_active) values (?, ?, NOW()) on duplicate key update token=values(token), last_seen_active=values(last_seen_active) `, ThisHostname, ProcessToken.Hash, ) if err != nil { health.Error = err return &health, log.Errore(err) } rows, err := sqlResult.RowsAffected() if err != nil { health.Error = err return &health, log.Errore(err) } health.Healthy = (rows > 0) activeHostname, activeToken, isActive, err := ElectedNode() if err != nil { health.Error = err return &health, log.Errore(err) } health.ActiveNode = fmt.Sprintf("%s;%s", activeHostname, activeToken) health.IsActiveNode = isActive health.AvailableNodes, err = readAvailableNodes() return &health, nil }
// UpdateAgentLastChecked updates the last_check timestamp in the orchestrator backed database // for a given agent func UpdateAgentLastChecked(hostname string) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` update host_agent set last_checked = NOW() where hostname = ?`, hostname, ) if err != nil { return log.Errore(err) } return nil }
// updateSeedStateEntry updates seed step state func updateSeedStateEntry(seedStateId int64, reason error) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` update agent_seed_state set error_message = ? where agent_seed_state_id = ? `, reason.Error(), seedStateId, ) if err != nil { return log.Errore(err) } return reason }
func CreateElectionAnchor(force bool) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } statement := `insert ignore` if force { statement = `replace` } _, err = sqlutils.Exec(db, statement+` into active_node (anchor, last_seen_active, hostname) values (1, now() - interval (? + 1) second, '') `, config.Config.ActiveNodeExpireSeconds) if err != nil { return log.Errore(err) } return nil }
// GrabElection forcibly grabs leadership. Use with care!! func GrabElection() (bool, error) { db, err := db.OpenOrchestrator() if err != nil { return false, log.Errore(err) } sqlResult, err := sqlutils.Exec(db, ` update active_node set hostname = ?, token = ?, last_seen_active = now() where anchor = 1 `, ThisHostname, ProcessToken.Hash, ) if err != nil { return false, log.Errore(err) } rows, err := sqlResult.RowsAffected() return (rows > 0), err }
// WriteClusterAlias will write (and override) a single cluster name mapping func WriteClusterAlias(clusterName string, alias string) error { writeFunc := func() error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` replace into cluster_alias (cluster_name, alias) values (?, ?) `, clusterName, alias) if err != nil { return log.Errore(err) } return nil } return ExecDBWriteFunc(writeFunc) }
// DeleteInvalidHostnameResolves removes invalid resolves. At this time these are: // - infinite loop resolves (A->B and B->A), remove earlier mapping func DeleteInvalidHostnameResolves() error { var invalidHostnames []string query := ` select early.hostname from hostname_resolve as latest join hostname_resolve early on (latest.resolved_hostname = early.hostname and latest.hostname = early.resolved_hostname) where latest.hostname != latest.resolved_hostname and latest.resolved_timestamp > early.resolved_timestamp ` db, err := db.OpenOrchestrator() if err != nil { return err } err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { invalidHostnames = append(invalidHostnames, m.GetString("hostname")) return nil }) if err != nil { return err } for _, invalidHostname := range invalidHostnames { _, err = sqlutils.Exec(db, ` delete from hostname_resolve where hostname = ?`, invalidHostname, ) log.Errore(err) } return err }
// BeginDowntime will make mark an instance as downtimed (or override existing downtime period) func BeginDowntime(instanceKey *InstanceKey, owner string, reason string, durationSeconds uint) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } if durationSeconds == 0 { durationSeconds = config.Config.MaintenanceExpireMinutes * 60 } _, err = sqlutils.Exec(db, ` insert into database_instance_downtime ( hostname, port, downtime_active, begin_timestamp, end_timestamp, owner, reason ) VALUES ( ?, ?, 1, NOW(), NOW() + INTERVAL ? SECOND, ?, ? ) on duplicate key update downtime_active=values(downtime_active), begin_timestamp=values(begin_timestamp), end_timestamp=values(end_timestamp), owner=values(owner), reason=values(reason) `, instanceKey.Hostname, instanceKey.Port, durationSeconds, owner, reason, ) if err != nil { return log.Errore(err) } AuditOperation("begin-downtime", instanceKey, fmt.Sprintf("owner: %s, reason: %s", owner, reason)) return nil }
// BeginBoundedMaintenance will make new maintenance entry for given instanceKey. func BeginBoundedMaintenance(instanceKey *InstanceKey, owner string, reason string, durationSeconds uint) (int64, error) { db, err := db.OpenOrchestrator() var maintenanceToken int64 = 0 if err != nil { return maintenanceToken, log.Errore(err) } if durationSeconds == 0 { durationSeconds = config.Config.MaintenanceExpireMinutes * 60 } res, err := sqlutils.Exec(db, ` insert ignore into database_instance_maintenance ( hostname, port, maintenance_active, begin_timestamp, end_timestamp, owner, reason ) VALUES ( ?, ?, 1, NOW(), NOW() + INTERVAL ? SECOND, ?, ? ) `, instanceKey.Hostname, instanceKey.Port, durationSeconds, owner, reason, ) if err != nil { return maintenanceToken, log.Errore(err) } if affected, _ := res.RowsAffected(); affected == 0 { err = fmt.Errorf("Cannot begin maintenance for instance: %+v", instanceKey) } else { // success maintenanceToken, _ = res.LastInsertId() AuditOperation("begin-maintenance", instanceKey, fmt.Sprintf("maintenanceToken: %d, owner: %s, reason: %s", maintenanceToken, owner, reason)) } return maintenanceToken, err }
// SubmitAgent submits a new agent for listing func SubmitAgent(hostname string, port int, token string) (string, error) { db, err := db.OpenOrchestrator() if err != nil { return "", log.Errore(err) } _, err = sqlutils.Exec(db, ` replace into host_agent ( hostname, port, token, last_submitted ) VALUES ( ?, ?, ?, NOW() ) `, hostname, port, token, ) if err != nil { return "", log.Errore(err) } return hostname, err }
// SubmitSeedEntry submits a new seed operation entry, returning its unique ID func SubmitSeedEntry(targetHostname string, sourceHostname string) (int64, error) { db, err := db.OpenOrchestrator() if err != nil { return 0, log.Errore(err) } res, err := sqlutils.Exec(db, ` insert into agent_seed ( target_hostname, source_hostname, start_timestamp ) VALUES ( ?, ?, NOW() ) `, targetHostname, sourceHostname, ) if err != nil { return 0, log.Errore(err) } id, err := res.LastInsertId() return id, err }
// updateSeedComplete updates the seed entry, signing for completion func updateSeedComplete(seedId int64, seedError error) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` update agent_seed set end_timestamp = NOW(), is_complete = 1, is_successful = ? where agent_seed_id = ? `, (seedError == nil), seedId, ) if err != nil { return log.Errore(err) } return nil }