// ReadClusterNameByMaster will return the cluster name for a given instance by looking at its master // and getting it from there. // It is a non-recursive function and so-called-recursion is performed upon periodic reading of // instances. func ReadClusterNameByMaster(instanceKey *InstanceKey, masterKey *InstanceKey) (string, error) { db, err := db.OpenOrchestrator() if err != nil { return "", log.Errore(err) } var clusterName string err = db.QueryRow(` select if ( cluster_name != '', cluster_name, ifnull(concat(max(hostname), ':', max(port)), '') ) as cluster_name from database_instance where hostname=? and port=?`, masterKey.Hostname, masterKey.Port).Scan( &clusterName, ) if err != nil { return "", log.Errore(err) } if clusterName == "" { return fmt.Sprintf("%s:%d", instanceKey.Hostname, instanceKey.Port), nil } return clusterName, err }
// ReadMaintenanceInstanceKey will return the instanceKey for active maintenance by maintenanceToken func ReadMaintenanceInstanceKey(maintenanceToken int64) (*InstanceKey, error) { var res *InstanceKey query := fmt.Sprintf(` select hostname, port from database_instance_maintenance where database_instance_maintenance_id = %d `, maintenanceToken) db, err := db.OpenOrchestrator() if err != nil { goto Cleanup } err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { instanceKey, merr := NewInstanceKeyFromStrings(m.GetString("hostname"), m.GetString("port")) if merr != nil { return merr } res = instanceKey return nil }) Cleanup: if err != nil { log.Errore(err) } return res, err }
// ReadClusterInstances reads all instances of a given cluster func ReadClusterInstances(clusterName string) ([](*Instance), error) { instances := [](*Instance){} db, err := db.OpenOrchestrator() if err != nil { return instances, log.Errore(err) } if strings.Index(clusterName, "'") >= 0 { return instances, log.Errorf("Invalid cluster name: %s", clusterName) } query := fmt.Sprintf(` select *, timestampdiff(second, last_checked, now()) as seconds_since_last_checked, (last_checked <= last_seen) is true as is_last_check_valid, timestampdiff(second, last_seen, now()) as seconds_since_last_seen from database_instance where cluster_name = '%s' order by hostname, port`, clusterName) err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { instance := readInstanceRow(m) instances = append(instances, instance) return nil }) return instances, err }
// ReadOutdatedInstanceKeys reads and returns keys for all instances that are not up to date (i.e. // pre-configured time has passed since they were last cheked) func ReadOutdatedInstanceKeys() ([]InstanceKey, error) { res := []InstanceKey{} query := fmt.Sprintf(` select hostname, port from database_instance where last_checked < now() - interval %d second`, config.Config.InstancePollSeconds) db, err := db.OpenOrchestrator() if err != nil { goto Cleanup } err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { instanceKey, merr := NewInstanceKeyFromStrings(m.GetString("hostname"), m.GetString("port")) if merr != nil { log.Errore(merr) } else { res = append(res, *instanceKey) } // We don;t return an error because we want to keep filling the outdated instances list. return nil }) Cleanup: if err != nil { log.Errore(err) } return res, err }
func ReadOutdatedAgentsHosts() ([]string, error) { res := []string{} query := fmt.Sprintf(` select hostname from host_agent where IFNULL(last_checked < now() - interval %d minute, true) `, config.Config.AgentPollMinutes) db, err := db.OpenOrchestrator() if err != nil { goto Cleanup } err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { hostname := m.GetString("hostname") res = append(res, hostname) return nil }) Cleanup: if err != nil { log.Errore(err) } return res, err }
// ReadClustersInfo reads names of all known clusters and some aggregated info func ReadClustersInfo() ([]ClusterInfo, error) { clusters := []ClusterInfo{} db, err := db.OpenOrchestrator() if err != nil { return clusters, log.Errore(err) } query := fmt.Sprintf(` select cluster_name, count(*) as count_instances from database_instance group by cluster_name`) err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { clusterInfo := ClusterInfo{ ClusterName: m.GetString("cluster_name"), CountInstances: m.GetUint("count_instances"), } for pattern, _ := range config.Config.ClusterNameToAlias { if matched, _ := regexp.MatchString(pattern, clusterInfo.ClusterName); matched { clusterInfo.ClusterAlias = config.Config.ClusterNameToAlias[pattern] } } clusters = append(clusters, clusterInfo) return nil }) return clusters, err }
// FailStaleSeeds marks as failed seeds where no progress have been seen recently func FailStaleSeeds() error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` update agent_seed set is_complete=1, is_successful=0 where is_complete=0 and ( select max(state_timestamp) as last_state_timestamp from agent_seed_state where agent_seed.agent_seed_id = agent_seed_state.agent_seed_id ) < now() - interval ? minute`, config.Config.StaleSeedFailMinutes, ) return err }
// UpdateAgentLastChecked updates the last_check timestamp in the orchestrator backed database // for a given agent func UpdateAgentInfo(hostname string, agent Agent) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` update host_agent set last_seen = NOW(), mysql_port = ?, count_mysql_snapshots = ? where hostname = ?`, agent.MySQLPort, len(agent.LogicalVolumes), hostname, ) if err != nil { return log.Errore(err) } return nil }
// EndMaintenance will terminate an active maintenance via maintenanceToken func EndMaintenance(maintenanceToken int64) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } res, err := sqlutils.Exec(db, ` update database_instance_maintenance set maintenance_active = NULL, end_timestamp = NOW() where database_instance_maintenance_id = ? `, maintenanceToken, ) if err != nil { return log.Errore(err) } if affected, _ := res.RowsAffected(); affected == 0 { err = errors.New(fmt.Sprintf("Instance is not in maintenance mode; token = %+v", maintenanceToken)) } else { // success instanceKey, _ := ReadMaintenanceInstanceKey(maintenanceToken) AuditOperation("end-maintenance", instanceKey, fmt.Sprintf("maintenanceToken: %d", maintenanceToken)) } return err }
func readAgentBasicInfo(hostname string) (Agent, string, error) { agent := Agent{} token := "" query := fmt.Sprintf(` select hostname, port, token, last_submitted, mysql_port from host_agent where hostname = '%s' `, hostname) db, err := db.OpenOrchestrator() if err != nil { return agent, "", err } err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { agent.Hostname = m.GetString("hostname") agent.Port = m.GetInt("port") agent.LastSubmitted = m.GetString("last_submitted") agent.MySQLPort = m.GetInt64("mysql_port") token = m.GetString("token") return nil }) if token == "" { return agent, "", log.Errorf("Cannot get agent/token: %s", hostname) } return agent, token, nil }
// ReadProblemInstances reads all instances with problems func ReadProblemInstances() ([](*Instance), error) { instances := [](*Instance){} db, err := db.OpenOrchestrator() if err != nil { return instances, log.Errore(err) } query := fmt.Sprintf(` select *, timestampdiff(second, last_checked, now()) as seconds_since_last_checked, (last_checked <= last_seen) is true as is_last_check_valid, timestampdiff(second, last_seen, now()) as seconds_since_last_seen from database_instance where (last_seen < last_checked) or (not ifnull(timestampdiff(second, last_checked, now()) <= %d, false)) or (not slave_sql_running) or (not slave_io_running) or (seconds_behind_master > 10) order by hostname, port`, config.Config.InstancePollSeconds) err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { instance := readInstanceRow(m) instances = append(instances, instance) return nil }) return instances, err }
// ReadAgents returns a list of all known agents func ReadCountMySQLSnapshots(hostnames []string) (map[string]int, error) { res := make(map[string]int) query := fmt.Sprintf(` select hostname, count_mysql_snapshots from host_agent where hostname in (%s) order by hostname `, sqlutils.InClauseStringValues(hostnames)) db, err := db.OpenOrchestrator() if err != nil { goto Cleanup } err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { res[m.GetString("hostname")] = m.GetInt("count_mysql_snapshots") return nil }) Cleanup: if err != nil { log.Errore(err) } return res, err }
// AuditOperation creates and writes a new audit entry by given params func AuditOperation(auditType string, instanceKey *InstanceKey, message string) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } if instanceKey == nil { instanceKey = &InstanceKey{} } _, err = sqlutils.Exec(db, ` insert into audit ( audit_timestamp, audit_type, hostname, port, message ) VALUES ( NOW(), ?, ?, ?, ? ) `, auditType, instanceKey.Hostname, instanceKey.Port, message, ) if err != nil { return log.Errore(err) } return err }
// BeginMaintenance will make new maintenance entry for given instanceKey. func BeginMaintenance(instanceKey *InstanceKey, owner string, reason string) (int64, error) { db, err := db.OpenOrchestrator() var maintenanceToken int64 = 0 if err != nil { return maintenanceToken, log.Errore(err) } res, err := sqlutils.Exec(db, ` insert ignore into database_instance_maintenance ( hostname, port, maintenance_active, begin_timestamp, end_timestamp, owner, reason ) VALUES ( ?, ?, 1, NOW(), NULL, ?, ? ) `, instanceKey.Hostname, instanceKey.Port, owner, reason, ) if err != nil { return maintenanceToken, log.Errore(err) } if affected, _ := res.RowsAffected(); affected == 0 { err = errors.New(fmt.Sprintf("Cannot begin maintenance for instance: %+v", instanceKey)) } else { // success maintenanceToken, _ = res.LastInsertId() AuditOperation("begin-maintenance", instanceKey, fmt.Sprintf("maintenanceToken: %d, owner: %s, reason: %s", maintenanceToken, owner, reason)) } return maintenanceToken, err }
// submitSeedStateEntry submits a seed state: a single step in the overall seed process func submitSeedStateEntry(seedId int64, action string, errorMessage string) (int64, error) { db, err := db.OpenOrchestrator() if err != nil { return 0, log.Errore(err) } res, err := sqlutils.Exec(db, ` insert into agent_seed_state ( agent_seed_id, state_timestamp, state_action, error_message ) VALUES ( ?, NOW(), ?, ? ) `, seedId, action, errorMessage, ) if err != nil { return 0, log.Errore(err) } id, err := res.LastInsertId() return id, err }
// EndMaintenanceByInstanceKey will terminate an active maintenance using given instanceKey as hint func EndMaintenanceByInstanceKey(instanceKey *InstanceKey) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } res, err := sqlutils.Exec(db, ` update database_instance_maintenance set maintenance_active = NULL, end_timestamp = NOW() where hostname = ? and port = ? and maintenance_active = 1 `, instanceKey.Hostname, instanceKey.Port, ) if err != nil { return log.Errore(err) } if affected, _ := res.RowsAffected(); affected == 0 { err = errors.New(fmt.Sprintf("Instance is not in maintenance mode: %+v", instanceKey)) } else { // success AuditOperation("end-maintenance", instanceKey, "") } return err }
// ForgetLongUnseenInstances will remove entries of all instacnes that have long since been last seen. func ForgetLongUnseenAgents() error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` delete from host_agent where last_submitted < NOW() - interval ? hour`, config.Config.UnseenAgentForgetHours, ) return err }
// ForgetLongUnseenInstances will remove entries of all instacnes that have long since been last seen. func ForgetLongUnseenInstances() error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` delete from database_instance where last_seen < NOW() - interval ? hour`, config.Config.UnseenInstanceForgetHours, ) AuditOperation("forget-unseen", nil, "") return err }
// ReadActiveMaintenance returns the list of currently active maintenance entries func ReadActiveMaintenance() ([]Maintenance, error) { res := []Maintenance{} query := fmt.Sprintf(` select database_instance_maintenance_id, hostname, port, begin_timestamp, timestampdiff(second, begin_timestamp, now()) as seconds_elapsed, maintenance_active, owner, reason from database_instance_maintenance where maintenance_active = 1 order by database_instance_maintenance_id `) db, err := db.OpenOrchestrator() if err != nil { goto Cleanup } err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { maintenance := Maintenance{} maintenance.MaintenanceId = m.GetUint("database_instance_maintenance_id") maintenance.Key.Hostname = m.GetString("hostname") maintenance.Key.Port = m.GetInt("port") maintenance.BeginTimestamp = m.GetString("begin_timestamp") maintenance.SecondsElapsed = m.GetUint("seconds_elapsed") maintenance.IsActive = m.GetBool("maintenance_active") maintenance.Owner = m.GetString("owner") maintenance.Reason = m.GetString("reason") res = append(res, maintenance) return err }) Cleanup: if err != nil { log.Errore(err) } return res, err }
// ForgetInstance removes an instance entry from the orchestrator backed database. // It may be auto-rediscovered through topology or requested for discovery by multiple means. func ForgetInstance(instanceKey *InstanceKey) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` delete from database_instance where hostname = ? and port = ?`, instanceKey.Hostname, instanceKey.Port, ) AuditOperation("forget", instanceKey, "") return err }
// SearchInstances reads all instances qualifying for some searchString func SearchInstances(searchString string) ([](*Instance), error) { instances := [](*Instance){} db, err := db.OpenOrchestrator() if err != nil { return instances, log.Errore(err) } if strings.Index(searchString, "'") >= 0 { return instances, log.Errorf("Invalid searchString: %s", searchString) } query := fmt.Sprintf(` select *, timestampdiff(second, last_checked, now()) as seconds_since_last_checked, (last_checked <= last_seen) is true as is_last_check_valid, timestampdiff(second, last_seen, now()) as seconds_since_last_seen from database_instance where hostname like '%%%s%%' or cluster_name like '%%%s%%' or server_id = '%s' or version like '%%%s%%' or port = '%s' or concat(hostname, ':', port) like '%%%s%%' order by cluster_name, hostname, port`, searchString, searchString, searchString, searchString, searchString, searchString) err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { instance := readInstanceRow(m) instances = append(instances, instance) return nil }) if err != nil { return instances, log.Errore(err) } err = PopulateInstancesAgents(instances) if err != nil { return instances, log.Errore(err) } return instances, err }
// readSeeds func readSeeds(whereCondition string, limit string) ([]SeedOperation, error) { res := []SeedOperation{} query := fmt.Sprintf(` select agent_seed_id, target_hostname, source_hostname, start_timestamp, end_timestamp, is_complete, is_successful from agent_seed %s order by agent_seed_id desc %s `, whereCondition, limit) db, err := db.OpenOrchestrator() if err != nil { goto Cleanup } err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { seedOperation := SeedOperation{} seedOperation.SeedId = m.GetInt64("agent_seed_id") seedOperation.TargetHostname = m.GetString("target_hostname") seedOperation.SourceHostname = m.GetString("source_hostname") seedOperation.StartTimestamp = m.GetString("start_timestamp") seedOperation.EndTimestamp = m.GetString("end_timestamp") seedOperation.IsComplete = m.GetBool("is_complete") seedOperation.IsSuccessful = m.GetBool("is_successful") res = append(res, seedOperation) return nil }) Cleanup: if err != nil { log.Errore(err) } return res, err }
// ReadRecentAudit returns a list of audit entries order chronologically descending, using page number. func ReadRecentAudit(page int) ([]Audit, error) { res := []Audit{} query := fmt.Sprintf(` select audit_id, audit_timestamp, audit_type, hostname, port, message from audit order by audit_timestamp desc limit %d offset %d `, config.Config.AuditPageSize, page*config.Config.AuditPageSize) db, err := db.OpenOrchestrator() if err != nil { goto Cleanup } err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { audit := Audit{} audit.AuditId = m.GetInt64("audit_id") audit.AuditTimestamp = m.GetString("audit_timestamp") audit.AuditType = m.GetString("audit_type") audit.AuditInstanceKey.Hostname = m.GetString("hostname") audit.AuditInstanceKey.Port = m.GetInt("port") audit.Message = m.GetString("message") res = append(res, audit) return err }) Cleanup: if err != nil { log.Errore(err) } return res, err }
// UpdateAgentLastChecked updates the last_check timestamp in the orchestrator backed database // for a given agent func UpdateAgentLastChecked(hostname string) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` update host_agent set last_checked = NOW() where hostname = ?`, hostname, ) if err != nil { return log.Errore(err) } return nil }
// SeedOperationState reads states for a given seed operation func ReadSeedStates(seedId int64) ([]SeedOperationState, error) { res := []SeedOperationState{} query := fmt.Sprintf(` select agent_seed_state_id, agent_seed_id, state_timestamp, state_action, error_message from agent_seed_state where agent_seed_id = %d order by agent_seed_state_id desc `, seedId) db, err := db.OpenOrchestrator() if err != nil { goto Cleanup } err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { seedState := SeedOperationState{} seedState.SeedStateId = m.GetInt64("agent_seed_state_id") seedState.SeedId = m.GetInt64("agent_seed_id") seedState.StateTimestamp = m.GetString("state_timestamp") seedState.Action = m.GetString("state_action") seedState.ErrorMessage = m.GetString("error_message") res = append(res, seedState) return nil }) Cleanup: if err != nil { log.Errore(err) } return res, err }
// updateSeedStateEntry updates seed step state func updateSeedStateEntry(seedStateId int64, reason error) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` update agent_seed_state set error_message = ? where agent_seed_state_id = ? `, reason.Error(), seedStateId, ) if err != nil { return log.Errore(err) } return reason }
// ReadAgents returns a list of all known agents func ReadAgents() ([]Agent, error) { res := []Agent{} query := ` select hostname, port, token, last_submitted, mysql_port from host_agent order by hostname ` db, err := db.OpenOrchestrator() if err != nil { goto Cleanup } err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { agent := Agent{} agent.Hostname = m.GetString("hostname") agent.Port = m.GetInt("port") agent.MySQLPort = m.GetInt64("mysql_port") agent.Token = "" agent.LastSubmitted = m.GetString("last_submitted") res = append(res, agent) return nil }) Cleanup: if err != nil { log.Errore(err) } return res, err }
func getHostAttributesByClause(whereClause string) ([]HostAttributes, error) { res := []HostAttributes{} query := fmt.Sprintf(` select hostname, attribute_name, attribute_value, submit_timestamp , ifnull(expire_timestamp, '') as expire_timestamp from host_attributes %s order by hostname, attribute_name `, whereClause) db, err := db.OpenOrchestrator() if err != nil { goto Cleanup } err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { hostAttributes := HostAttributes{} hostAttributes.Hostname = m.GetString("hostname") hostAttributes.AttributeName = m.GetString("attribute_name") hostAttributes.AttributeValue = m.GetString("attribute_value") hostAttributes.SubmitTimestamp = m.GetString("submit_timestamp") hostAttributes.ExpireTimestamp = m.GetString("expire_timestamp") res = append(res, hostAttributes) return nil }) Cleanup: if err != nil { log.Errore(err) } return res, err }
// ReadClusters reads names of all known clusters func ReadClusters() ([]string, error) { clusterNames := []string{} db, err := db.OpenOrchestrator() if err != nil { return clusterNames, log.Errore(err) } query := fmt.Sprintf(` select cluster_name from database_instance group by cluster_name`) err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { clusterNames = append(clusterNames, m.GetString("cluster_name")) return nil }) return clusterNames, err }
// UpdateInstanceLastChecked updates the last_check timestamp in the orchestrator backed database // for a given instance func UpdateInstanceLastChecked(instanceKey *InstanceKey) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` update database_instance set last_checked = NOW() where hostname = ? and port = ?`, instanceKey.Hostname, instanceKey.Port, ) if err != nil { return log.Errore(err) } return nil }