// Serve begins listening & serving on whichever device was configured func (this *Server) Serve() (err error) { go func() { for { conn, err := this.unixListener.Accept() if err != nil { log.Errore(err) } go this.handleConnection(conn) } }() go func() { if this.tcpListener == nil { return } for { conn, err := this.tcpListener.Accept() if err != nil { log.Errore(err) } go this.handleConnection(conn) } }() return nil }
// ResetSlave resets a slave, breaking the replication func ResetSlave(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { return instance, log.Errore(err) } if instance.SlaveRunning() { return instance, fmt.Errorf("Cannot reset slave on: %+v because slave is running", instanceKey) } if *config.RuntimeCLIFlags.Noop { return instance, fmt.Errorf("noop: aborting reset-slave operation on %+v; signalling error but nothing went wrong.", *instanceKey) } // MySQL's RESET SLAVE is done correctly; however SHOW SLAVE STATUS still returns old hostnames etc // and only resets till after next restart. This leads to orchestrator still thinking the instance replicates // from old host. We therefore forcibly modify the hostname. // RESET SLAVE ALL command solves this, but only as of 5.6.3 _, err = ExecInstanceNoPrepare(instanceKey, `change master to master_host='_'`) if err != nil { return instance, log.Errore(err) } _, err = ExecInstanceNoPrepare(instanceKey, `reset slave /*!50603 all */`) if err != nil { return instance, log.Errore(err) } log.Infof("Reset slave %+v", instanceKey) instance, err = ReadTopologyInstance(instanceKey) return instance, err }
// submitSeedStateEntry submits a seed state: a single step in the overall seed process func submitSeedStateEntry(seedId int64, action string, errorMessage string) (int64, error) { db, err := db.OpenOrchestrator() if err != nil { return 0, log.Errore(err) } res, err := sqlutils.Exec(db, ` insert into agent_seed_state ( agent_seed_id, state_timestamp, state_action, error_message ) VALUES ( ?, NOW(), ?, ? ) `, seedId, action, errorMessage, ) if err != nil { return 0, log.Errore(err) } id, err := res.LastInsertId() return id, err }
// ExpireBlockedRecoveries clears listing of blocked recoveries that are no longer actually blocked. func ExpireBlockedRecoveries() error { // Older recovery is acknowledged by now, hence blocked recovery should be released. // Do NOTE that the data in blocked_topology_recovery is only used for auditing: it is NOT the data // based on which we make automated decisions. _, err := db.ExecOrchestrator(` delete from blocked_topology_recovery using blocked_topology_recovery left join topology_recovery on (blocking_recovery_id = topology_recovery.recovery_id and acknowledged = 0) where acknowledged is null `, ) if err != nil { return log.Errore(err) } // Some oversampling, if a problem has not been noticed for some time (e.g. the server came up alive // before action was taken), expire it. // Recall that RegisterBlockedRecoveries continuously updated the last_blocked_timestamp column. _, err = db.ExecOrchestrator(` delete from blocked_topology_recovery where last_blocked_timestamp < NOW() - interval ? second `, (config.Config.RecoveryPollSeconds * 5), ) if err != nil { return log.Errore(err) } return nil }
// StopSlave stops replication on a given instance func StopSlave(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { return instance, log.Errore(err) } if !instance.IsSlave() { return instance, fmt.Errorf("instance is not a slave: %+v", instanceKey) } _, err = ExecInstanceNoPrepare(instanceKey, `stop slave`) if err != nil { // Patch; current MaxScale behavior for STOP SLAVE is to throw an error if slave already stopped. if instance.isMaxScale() && err.Error() == "Error 1199: Slave connection is not running" { err = nil } } if err != nil { return instance, log.Errore(err) } instance, err = ReadTopologyInstance(instanceKey) log.Infof("Stopped slave on %+v, Self:%+v, Exec:%+v", *instanceKey, instance.SelfBinlogCoordinates, instance.ExecBinlogCoordinates) return instance, err }
// UpdateAgentInfo updates some agent state in backend table func UpdateAgentInfo(hostname string, agent Agent) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` update host_agent set last_seen = NOW(), mysql_port = ?, count_mysql_snapshots = ? where hostname = ?`, agent.MySQLPort, len(agent.LogicalVolumes), hostname, ) if err != nil { return log.Errore(err) } return nil }
// DetachSlave detaches a slave from replication; forcibly corrupting the binlog coordinates (though in such way // that is reversible) func DetachSlave(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { return instance, log.Errore(err) } if instance.SlaveRunning() { return instance, fmt.Errorf("Cannot detach slave on: %+v because slave is running", instanceKey) } isDetached, _, _ := instance.ExecBinlogCoordinates.DetachedCoordinates() if isDetached { return instance, fmt.Errorf("Cannot (need not) detach slave on: %+v because slave is already detached", instanceKey) } if *config.RuntimeCLIFlags.Noop { return instance, fmt.Errorf("noop: aborting detach-slave operation on %+v; signalling error but nothing went wrong.", *instanceKey) } detachedCoordinates := BinlogCoordinates{LogFile: fmt.Sprintf("//%s:%d", instance.ExecBinlogCoordinates.LogFile, instance.ExecBinlogCoordinates.LogPos), LogPos: instance.ExecBinlogCoordinates.LogPos} // Encode the current coordinates within the log file name, in such way that replication is broken, but info can still be resurrected _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf(`change master to master_log_file='%s', master_log_pos=%d`, detachedCoordinates.LogFile, detachedCoordinates.LogPos)) if err != nil { return instance, log.Errore(err) } log.Infof("Detach slave %+v", instanceKey) instance, err = ReadTopologyInstance(instanceKey) return instance, err }
// EndMaintenance will terminate an active maintenance via maintenanceToken func EndMaintenance(maintenanceToken int64) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } res, err := sqlutils.Exec(db, ` update database_instance_maintenance set maintenance_active = NULL, end_timestamp = NOW() where database_instance_maintenance_id = ? `, maintenanceToken, ) if err != nil { return log.Errore(err) } if affected, _ := res.RowsAffected(); affected == 0 { err = fmt.Errorf("Instance is not in maintenance mode; token = %+v", maintenanceToken) } else { // success instanceKey, _ := ReadMaintenanceInstanceKey(maintenanceToken) AuditOperation("end-maintenance", instanceKey, fmt.Sprintf("maintenanceToken: %d", maintenanceToken)) } return err }
// ReattachSlave restores a detached slave back into replication func ReattachSlave(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { return instance, log.Errore(err) } if instance.SlaveRunning() { return instance, fmt.Errorf("Cannot (need not) reattach slave on: %+v because slave is running", instanceKey) } isDetached, detachedLogFile, detachedLogPos := instance.ExecBinlogCoordinates.DetachedCoordinates() if !isDetached { return instance, fmt.Errorf("Cannot reattach slave on: %+v because slave is not detached", instanceKey) } if *config.RuntimeCLIFlags.Noop { return instance, fmt.Errorf("noop: aborting reattach-slave operation on %+v; signalling error but nothing went wrong.", *instanceKey) } _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf(`change master to master_log_file='%s', master_log_pos=%s`, detachedLogFile, detachedLogPos)) if err != nil { return instance, log.Errore(err) } log.Infof("Reattach slave %+v", instanceKey) instance, err = ReadTopologyInstance(instanceKey) return instance, err }
// AttemptElection tries to grab leadership (become active node) func AttemptElection() (bool, error) { db, err := db.OpenOrchestrator() if err != nil { return false, log.Errore(err) } sqlResult, err := sqlutils.Exec(db, ` update active_node set hostname = ?, token = ?, last_seen_active = now() where anchor = 1 and ( last_seen_active < now() - interval ? second or hostname = '' or (hostname = ? and token = ?) ) `, ThisHostname, ProcessToken.Hash, config.Config.ActiveNodeExpireSeconds, ThisHostname, ProcessToken.Hash, ) if err != nil { return false, log.Errore(err) } rows, err := sqlResult.RowsAffected() return (rows > 0), err }
// WriteClusterDomainName will write (and override) the domain name of a cluster func WriteClusterDomainName(clusterName string, domainName string) error { writeFunc := func() error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` insert into cluster_domain_name (cluster_name, domain_name, last_registered) values (?, ?, NOW()) on duplicate key update domain_name=values(domain_name), last_registered=values(last_registered) `, clusterName, domainName) if err != nil { return log.Errore(err) } return nil } return ExecDBWriteFunc(writeFunc) }
// EndDowntime will remove downtime flag from an instance func EndDowntime(instanceKey *InstanceKey) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } res, err := sqlutils.Exec(db, ` update database_instance_downtime set downtime_active = NULL, end_timestamp = NOW() where hostname = ? and port = ? and downtime_active = 1 `, instanceKey.Hostname, instanceKey.Port, ) if err != nil { return log.Errore(err) } if affected, _ := res.RowsAffected(); affected == 0 { err = fmt.Errorf("Instance is not in downtime mode: %+v", instanceKey) } else { // success AuditOperation("end-downtime", instanceKey, "") } return err }
// queryResultData returns a raw array of rows for a given query, optionally reading and returning column names func queryResultData(db *sql.DB, query string, retrieveColumns bool, args ...interface{}) (ResultData, []string, error) { var err error defer func() { if derr := recover(); derr != nil { err = errors.New(fmt.Sprintf("QueryRowsMap unexpected error: %+v", derr)) } }() columns := []string{} rows, err := db.Query(query, args...) if err != nil && err != sql.ErrNoRows { return EmptyResultData, columns, log.Errore(err) } defer rows.Close() if retrieveColumns { // Don't pay if you don't want to columns, err = rows.Columns() if err != nil { return nil, nil, log.Errore(err) } } resultData := ResultData{} err = ScanRowsToArrays(rows, func(rowData []CellData) error { resultData = append(resultData, rowData) return nil }) return resultData, columns, err }
// ChangeMasterCredentials issues a CHANGE MASTER TO... MASTER_USER=, MASTER_PASSWORD=... func ChangeMasterCredentials(instanceKey *InstanceKey, masterUser string, masterPassword string) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { return instance, log.Errore(err) } if masterUser == "" { return instance, log.Errorf("Empty user in ChangeMasterCredentials() for %+v", *instanceKey) } if instance.SlaveRunning() { return instance, fmt.Errorf("ChangeMasterTo: Cannot change master on: %+v because slave is running", *instanceKey) } log.Debugf("ChangeMasterTo: will attempt changing master credentials on %+v", *instanceKey) if *config.RuntimeCLIFlags.Noop { return instance, fmt.Errorf("noop: aborting CHANGE MASTER TO operation on %+v; signalling error but nothing went wrong.", *instanceKey) } _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("change master to master_user='******', master_password='******'", masterUser, masterPassword)) if err != nil { return instance, log.Errore(err) } log.Infof("ChangeMasterTo: Changed master credentials on %+v", *instanceKey) instance, err = ReadTopologyInstance(instanceKey) return instance, err }
// SkipQuery skip a single query in a failed replication instance func SkipQuery(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { return instance, log.Errore(err) } if !instance.IsSlave() { return instance, fmt.Errorf("instance is not a slave: %+v", instanceKey) } if instance.Slave_SQL_Running { return instance, fmt.Errorf("Slave SQL thread is running on %+v", instanceKey) } if instance.LastSQLError == "" { return instance, fmt.Errorf("No SQL error on %+v", instanceKey) } if *config.RuntimeCLIFlags.Noop { return instance, fmt.Errorf("noop: aborting skip-query operation on %+v; signalling error but nothing went wrong.", *instanceKey) } log.Debugf("Skipping one query on %+v", instanceKey) if instance.UsingOracleGTID { err = skipQueryOracleGtid(instance) } else if instance.UsingMariaDBGTID { return instance, log.Errorf("%+v is replicating with MariaDB GTID. To skip a query first disable GTID, then skip, then enable GTID again", *instanceKey) } else { err = skipQueryClassic(instance) } if err != nil { return instance, log.Errore(err) } AuditOperation("skip-query", instanceKey, "Skipped one query") return StartSlave(instanceKey) }
func UnresolveHostname(instanceKey *InstanceKey) (InstanceKey, bool, error) { unresolvedHostname, err := readUnresolvedHostname(instanceKey.Hostname) if err != nil { return *instanceKey, false, log.Errore(err) } if unresolvedHostname == instanceKey.Hostname { // unchanged. Nothing to do return *instanceKey, false, nil } // We unresovled to a different hostname. We will now re-resolve to double-check! unresolvedKey := &InstanceKey{Hostname: unresolvedHostname, Port: instanceKey.Port} instance, err := ReadTopologyInstance(unresolvedKey) if err != nil { return *instanceKey, false, log.Errore(err) } if instance.Key.Hostname != instanceKey.Hostname { // Resolve(Unresolve(hostname)) != hostname ==> Bad; reject if *config.RuntimeCLIFlags.SkipUnresolveCheck { return *instanceKey, false, nil } return *instanceKey, false, log.Errorf("Error unresolving; hostname=%s, unresolved=%s, re-resolved=%s; mismatch. Skip/ignore with --skip-unresolve-check", instanceKey.Hostname, unresolvedKey.Hostname, instance.Key.Hostname) } return *unresolvedKey, true, nil }
// acknowledgeRecoveries sets acknowledged* details and clears the in_active_period flags from a set of entries func acknowledgeRecoveries(owner string, comment string, markEndRecovery bool, whereClause string, args []interface{}) (countAcknowledgedEntries int64, err error) { additionalSet := `` if markEndRecovery { additionalSet = ` end_recovery=IFNULL(end_recovery, NOW()), ` } query := fmt.Sprintf(` update topology_recovery set in_active_period = 0, end_active_period_unixtime = IF(end_active_period_unixtime = 0, UNIX_TIMESTAMP(), end_active_period_unixtime), %s acknowledged = 1, acknowledged_at = NOW(), acknowledged_by = ?, acknowledge_comment = ? where acknowledged = 0 and %s `, additionalSet, whereClause) args = append(sqlutils.Args(owner, comment), args...) sqlResult, err := db.ExecOrchestrator(query, args...) if err != nil { return 0, log.Errore(err) } rows, err := sqlResult.RowsAffected() return rows, log.Errore(err) }
// writePoolInstances will write (and override) a single cluster name mapping func writePoolInstances(pool string, instanceKeys [](*InstanceKey)) error { writeFunc := func() error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } tx, err := db.Begin() stmt, err := tx.Prepare(`delete from database_instance_pool where pool = ?`) _, err = stmt.Exec(pool) if err != nil { tx.Rollback() return log.Errore(err) } stmt, err = tx.Prepare(`insert into database_instance_pool (hostname, port, pool, registered_at) values (?, ?, ?, now())`) for _, instanceKey := range instanceKeys { _, err := stmt.Exec(instanceKey.Hostname, instanceKey.Port, pool) if err != nil { tx.Rollback() return log.Errore(err) } } if err != nil { tx.Rollback() return log.Errore(err) } tx.Commit() return nil } return ExecDBWriteFunc(writeFunc) }
// HealthTest attempts to write to the backend database and get a result func HealthTest() (*HealthStatus, error) { health := HealthStatus{Healthy: false, Hostname: ThisHostname, Token: ProcessToken.Hash} sqlResult, err := RegisterNode("", "", false) if err != nil { health.Error = err return &health, log.Errore(err) } rows, err := sqlResult.RowsAffected() if err != nil { health.Error = err return &health, log.Errore(err) } health.Healthy = (rows > 0) activeHostname, activeToken, isActive, err := ElectedNode() if err != nil { health.Error = err return &health, log.Errore(err) } health.ActiveNode = fmt.Sprintf("%s;%s", activeHostname, activeToken) health.IsActiveNode = isActive health.AvailableNodes, err = ReadAvailableNodes(true) return &health, nil }
// WriteLongRunningProcesses rewrites current state of long running processes for given instance func WriteLongRunningProcesses(instanceKey *InstanceKey, processes []Process) error { writeFunc := func() error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` delete from database_instance_long_running_queries where hostname = ? and port = ? `, instanceKey.Hostname, instanceKey.Port) if err != nil { return log.Errore(err) } for _, process := range processes { _, merr := sqlutils.Exec(db, ` insert into database_instance_long_running_queries ( hostname, port, process_id, process_started_at, process_user, process_host, process_db, process_command, process_time_seconds, process_state, process_info ) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, instanceKey.Hostname, instanceKey.Port, process.Id, process.StartedAt, process.User, process.Host, process.Db, process.Command, process.Time, process.State, process.Info, ) if merr != nil { err = merr } } if err != nil { return log.Errore(err) } return nil } return ExecDBWriteFunc(writeFunc) }
// ExpireMaintenance will remove the maintenance flag on old maintenances and on bounded maintenances func ExpireMaintenance() error { { res, err := db.ExecOrchestrator(` delete from database_instance_maintenance where maintenance_active is null and end_timestamp < NOW() - INTERVAL ? DAY `, config.Config.MaintenancePurgeDays, ) if err != nil { return log.Errore(err) } if rowsAffected, _ := res.RowsAffected(); rowsAffected > 0 { AuditOperation("expire-maintenance", nil, fmt.Sprintf("Purged historical entries: %d", rowsAffected)) } } { res, err := db.ExecOrchestrator(` update database_instance_maintenance set maintenance_active = NULL where maintenance_active = 1 and end_timestamp < NOW() `, ) if err != nil { return log.Errore(err) } if rowsAffected, _ := res.RowsAffected(); rowsAffected > 0 { AuditOperation("expire-maintenance", nil, fmt.Sprintf("Expired bounded: %d", rowsAffected)) } } { res, err := db.ExecOrchestrator(` update database_instance_maintenance left join node_health on (processing_node_hostname = node_health.hostname AND processing_node_token = node_health.token) set database_instance_maintenance.maintenance_active = NULL where node_health.last_seen_active IS NULL and explicitly_bounded = 0 `, ) if err != nil { return log.Errore(err) } if rowsAffected, _ := res.RowsAffected(); rowsAffected > 0 { AuditOperation("expire-maintenance", nil, fmt.Sprintf("Expired dead: %d", rowsAffected)) } } return nil }
// CommandRun executes a command func CommandRun(commandText string, arguments ...string) error { cmd, tmpFileName, err := execCmd(commandText, arguments...) defer os.Remove(tmpFileName) if err != nil { return log.Errore(err) } err = cmd.Run() return log.Errore(err) }
// StartSlaveUntilMasterCoordinates issuesa START SLAVE UNTIL... statement on given instance func StartSlaveUntilMasterCoordinates(instanceKey *InstanceKey, masterCoordinates *BinlogCoordinates) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { return instance, log.Errore(err) } if !instance.IsSlave() { return instance, fmt.Errorf("instance is not a slave: %+v", instanceKey) } if instance.SlaveRunning() { return instance, fmt.Errorf("slave already running: %+v", instanceKey) } log.Infof("Will start slave on %+v until coordinates: %+v", instanceKey, masterCoordinates) if instance.SemiSyncEnforced { // Send ACK only from promotable instances. sendACK := instance.PromotionRule != MustNotPromoteRule // Always disable master setting, in case we're converting a former master. if err := EnableSemiSync(instanceKey, false, sendACK); err != nil { return instance, log.Errore(err) } } // MariaDB has a bug: a CHANGE MASTER TO statement does not work properly with prepared statement... :P // See https://mariadb.atlassian.net/browse/MDEV-7640 // This is the reason for ExecInstanceNoPrepare _, err = ExecInstanceNoPrepare(instanceKey, fmt.Sprintf("start slave until master_log_file='%s', master_log_pos=%d", masterCoordinates.LogFile, masterCoordinates.LogPos)) if err != nil { return instance, log.Errore(err) } for upToDate := false; !upToDate; { instance, err = ReadTopologyInstance(instanceKey) if err != nil { return instance, log.Errore(err) } switch { case instance.ExecBinlogCoordinates.SmallerThan(masterCoordinates): time.Sleep(sqlThreadPollDuration) case instance.ExecBinlogCoordinates.Equals(masterCoordinates): upToDate = true case masterCoordinates.SmallerThan(&instance.ExecBinlogCoordinates): return instance, fmt.Errorf("Start SLAVE UNTIL is past coordinates: %+v", instanceKey) } } instance, err = StopSlave(instanceKey) if err != nil { return instance, log.Errore(err) } return instance, err }
// executeWriteFuncs writes data via applier: both the rowcopy and the events backlog. // This is where the ghost table gets the data. The function fills the data single-threaded. // Both event backlog and rowcopy events are polled; the backlog events have precedence. func (this *Migrator) executeWriteFuncs() error { if this.migrationContext.Noop { log.Debugf("Noop operation; not really executing write funcs") return nil } for { if atomic.LoadInt64(&this.inCutOverCriticalActionFlag) == 0 { // we don't throttle when cutting over. We _do_ throttle: // - during copy phase // - just before cut-over // - in between cut-over retries this.throttle(nil) // When cutting over, we need to be aggressive. Cut-over holds table locks. // We need to release those asap. } // We give higher priority to event processing, then secondary priority to // rowcopy select { case applyEventFunc := <-this.applyEventsQueue: { if err := this.retryOperation(applyEventFunc); err != nil { return log.Errore(err) } } default: { select { case copyRowsFunc := <-this.copyRowsQueue: { copyRowsStartTime := time.Now() // Retries are handled within the copyRowsFunc if err := copyRowsFunc(); err != nil { return log.Errore(err) } if niceRatio := this.migrationContext.GetNiceRatio(); niceRatio > 0 { copyRowsDuration := time.Now().Sub(copyRowsStartTime) sleepTimeNanosecondFloat64 := niceRatio * float64(copyRowsDuration.Nanoseconds()) sleepTime := time.Duration(time.Duration(int64(sleepTimeNanosecondFloat64)) * time.Nanosecond) time.Sleep(sleepTime) } } default: { // Hmmmmm... nothing in the queue; no events, but also no row copy. // This is possible upon load. Let's just sleep it over. log.Debugf("Getting nothing in the write queue. Sleeping...") time.Sleep(time.Second) } } } } } return nil }
// UpdateClusterAliases writes down the cluster_alias table based on information // gained from database_instance func UpdateClusterAliases() error { writeFunc := func() error { _, err := db.ExecOrchestrator(` replace into cluster_alias (alias, cluster_name, last_registered) select suggested_cluster_alias, substring_index(group_concat( cluster_name order by ((last_checked <= last_seen) is true) desc, read_only asc, num_slave_hosts desc ), ',', 1) as cluster_name, NOW() from database_instance left join database_instance_downtime using (hostname, port) where suggested_cluster_alias!='' /* exclude newly demoted, downtimed masters */ and ifnull( database_instance_downtime.downtime_active = 1 and database_instance_downtime.end_timestamp > now() and database_instance_downtime.reason = ? , false) is false group by suggested_cluster_alias `, DowntimeLostInRecoveryMessage) return log.Errore(err) } if err := ExecDBWriteFunc(writeFunc); err != nil { return err } writeFunc = func() error { // Handling the case where no cluster alias exists: we write a dummy alias in the form of the real cluster name. _, err := db.ExecOrchestrator(` replace into cluster_alias (alias, cluster_name, last_registered) select cluster_name, cluster_name, now() from database_instance group by cluster_name having sum(suggested_cluster_alias = '') = count(*) `) return log.Errore(err) } if err := ExecDBWriteFunc(writeFunc); err != nil { return err } return nil }
// AuditOperation creates and writes a new audit entry by given params func AuditOperation(auditType string, instanceKey *InstanceKey, message string) error { if instanceKey == nil { instanceKey = &InstanceKey{} } clusterName := "" if instanceKey.Hostname != "" { clusterName, _ = GetClusterName(instanceKey) } if config.Config.AuditLogFile != "" { go func() error { f, err := os.OpenFile(config.Config.AuditLogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0600) if err != nil { return log.Errore(err) } defer f.Close() text := fmt.Sprintf("%s\t%s\t%s\t%d\t[%s]\t%s\t\n", time.Now().Format(log.TimeFormat), auditType, instanceKey.Hostname, instanceKey.Port, clusterName, message) if _, err = f.WriteString(text); err != nil { return log.Errore(err) } return nil }() } _, err := db.ExecOrchestrator(` insert into audit ( audit_timestamp, audit_type, hostname, port, cluster_name, message ) VALUES ( NOW(), ?, ?, ?, ?, ? ) `, auditType, instanceKey.Hostname, instanceKey.Port, clusterName, message, ) if err != nil { return log.Errore(err) } logMessage := fmt.Sprintf("auditType:%s instance:%s cluster:%s message:%s", auditType, instanceKey.DisplayString(), clusterName, message) if syslogWriter != nil { go func() { syslogWriter.Info(logMessage) }() } log.Debugf(logMessage) auditOperationCounter.Inc(1) return err }
// RestartSlave stops & starts replication on a given instance func RestartSlave(instanceKey *InstanceKey) (instance *Instance, err error) { instance, err = StopSlave(instanceKey) if err != nil { return instance, log.Errore(err) } instance, err = StartSlave(instanceKey) if err != nil { return instance, log.Errore(err) } return instance, nil }
// auditInstanceAnalysisInChangelog will write down an instance's analysis in the database_instance_analysis_changelog table. // To not repeat recurring analysis code, the database_instance_last_analysis table is used, so that only changes to // analysis codes are written. func auditInstanceAnalysisInChangelog(instanceKey *InstanceKey, analysisCode AnalysisCode) error { if lastWrittenAnalysis, found := recentInstantAnalysis.Get(instanceKey.DisplayString()); found { if lastWrittenAnalysis == analysisCode { // Surely nothing new. // And let's expand the timeout recentInstantAnalysis.Set(instanceKey.DisplayString(), analysisCode, cache.DefaultExpiration) return nil } } // Passed the cache; but does database agree that there's a change? Here's a persistent cache; this comes here // to verify no two orchestrator services are doing this without coordinating (namely, one dies, the other taking its place // and has no familiarity of the former's cache) analysisChangeWriteAttemptCounter.Inc(1) sqlResult, err := db.ExecOrchestrator(` insert ignore into database_instance_last_analysis ( hostname, port, analysis_timestamp, analysis ) values ( ?, ?, now(), ? ) on duplicate key update analysis = values(analysis), analysis_timestamp = if(analysis = values(analysis), analysis_timestamp, values(analysis_timestamp)) `, instanceKey.Hostname, instanceKey.Port, string(analysisCode), ) if err != nil { return log.Errore(err) } rows, err := sqlResult.RowsAffected() if err != nil { return log.Errore(err) } recentInstantAnalysis.Set(instanceKey.DisplayString(), analysisCode, cache.DefaultExpiration) lastAnalysisChanged := (rows > 0) if !lastAnalysisChanged { return nil } _, err = db.ExecOrchestrator(` insert into database_instance_analysis_changelog ( hostname, port, analysis_timestamp, analysis ) values ( ?, ?, now(), ? ) `, instanceKey.Hostname, instanceKey.Port, string(analysisCode), ) if err == nil { analysisChangeWriteCounter.Inc(1) } return log.Errore(err) }
// commandOutput executes a command and return output bytes func commandOutput(commandText string) ([]byte, error) { cmd, tmpFileName, err := execCmd(commandText) if err != nil { return nil, log.Errore(err) } outputBytes, err := cmd.Output() if err != nil { return nil, log.Errore(err) } os.Remove(tmpFileName) return outputBytes, nil }
func pollAgent(hostname string) error { polledAgent, err := agent.GetAgent(hostname) agent.UpdateAgentLastChecked(hostname) if err != nil { return log.Errore(err) } err = agent.UpdateAgentInfo(hostname, polledAgent) if err != nil { return log.Errore(err) } return nil }