// AsciiTopology returns a string representation of the topology of given clusterName. func AsciiTopology(clusterName string) (string, error) { instances, err := ReadClusterInstances(clusterName) if err != nil { return "", err } instancesMap := make(map[InstanceKey](*Instance)) for _, instance := range instances { log.Debugf("instanceKey: %+v", instance.Key) instancesMap[instance.Key] = instance } replicationMap := make(map[*Instance]([]*Instance)) var masterInstance *Instance // Investigate slaves: for _, instance := range instances { master, ok := instancesMap[instance.MasterKey] if ok { if _, ok := replicationMap[master]; !ok { replicationMap[master] = [](*Instance){} } replicationMap[master] = append(replicationMap[master], instance) } else { masterInstance = instance } } resultArray := getAsciiTopologyEntry(0, masterInstance, replicationMap) result := strings.Join(resultArray, "\n") return result, nil }
func executeAgentCommand(hostname string, command string, onResponse *func([]byte)) (Agent, error) { agent, token, err := readAgentBasicInfo(hostname) if err != nil { return agent, err } // All seems to be in order. Now make some inquiries from orchestrator-agent service: uri := baseAgentUri(agent.Hostname, agent.Port) var fullCommand string if strings.Contains(command, "?") { fullCommand = fmt.Sprintf("%s&token=%s", command, token) } else { fullCommand = fmt.Sprintf("%s?token=%s", command, token) } log.Debugf("orchestrator-agent command: %s", fullCommand) agentCommandUri := fmt.Sprintf("%s/%s", uri, fullCommand) body, err := readResponse(http.Get(agentCommandUri)) if err != nil { return agent, log.Errore(err) } if onResponse != nil { (*onResponse)(body) } return agent, err }
// GetAgent gets a single agent status from the agent service func GetAgent(hostname string) (Agent, error) { agent, token, err := readAgentBasicInfo(hostname) if err != nil { return agent, log.Errore(err) } // All seems to be in order. Now make some inquiries from orchestrator-agent service: { uri := baseAgentUri(agent.Hostname, agent.Port) log.Debugf("orchestrator-agent uri: %s", uri) { availableLocalSnapshotsUri := fmt.Sprintf("%s/available-snapshots-local?token=%s", uri, token) body, err := readResponse(http.Get(availableLocalSnapshotsUri)) if err == nil { err = json.Unmarshal(body, &agent.AvailableLocalSnapshots) } if err != nil { log.Errore(err) } } { availableSnapshotsUri := fmt.Sprintf("%s/available-snapshots?token=%s", uri, token) body, err := readResponse(http.Get(availableSnapshotsUri)) if err == nil { err = json.Unmarshal(body, &agent.AvailableSnapshots) } if err != nil { log.Errore(err) } } { lvSnapshotsUri := fmt.Sprintf("%s/lvs-snapshots?token=%s", uri, token) body, err := readResponse(http.Get(lvSnapshotsUri)) if err == nil { err = json.Unmarshal(body, &agent.LogicalVolumes) } if err != nil { log.Errore(err) } } { mountUri := fmt.Sprintf("%s/mount?token=%s", uri, token) body, err := readResponse(http.Get(mountUri)) if err == nil { err = json.Unmarshal(body, &agent.MountPoint) } if err != nil { log.Errore(err) } } { mySQLRunningUri := fmt.Sprintf("%s/mysql-status?token=%s", uri, token) body, err := readResponse(http.Get(mySQLRunningUri)) if err == nil { err = json.Unmarshal(body, &agent.MySQLRunning) } // Actually an error is OK here since "status" returns with non-zero exit code when MySQL not running } { mySQLRunningUri := fmt.Sprintf("%s/mysql-port?token=%s", uri, token) body, err := readResponse(http.Get(mySQLRunningUri)) if err == nil { err = json.Unmarshal(body, &agent.MySQLPort) } if err != nil { log.Errore(err) } } { mySQLDiskUsageUri := fmt.Sprintf("%s/mysql-du?token=%s", uri, token) body, err := readResponse(http.Get(mySQLDiskUsageUri)) if err == nil { err = json.Unmarshal(body, &agent.MySQLDiskUsage) } if err != nil { log.Errore(err) } } } return agent, err }
func baseAgentUri(agentHostname string, agentPort int) string { uri := fmt.Sprintf("http://%s:%d/api", agentHostname, agentPort) log.Debugf("orchestrator-agent uri: %s", uri) return uri }
// DiscoverInstance will attempt discovering an instance (unless it is already up to date) and will // list down its master and slaves (if any) for further discovery. func DiscoverInstance(instanceKey inst.InstanceKey) { instanceKey.Formalize() if !instanceKey.IsValid() { return } instance, found, err := inst.ReadInstance(&instanceKey) if found && instance.IsUpToDate && instance.IsLastCheckValid { // we've already discovered this one. Skip! goto Cleanup } // First we've ever heard of this instance. Continue investigation: instance, err = inst.ReadTopologyInstance(&instanceKey) // panic can occur (IO stuff). Therefore it may happen // that instance is nil. Check it. if err != nil || instance == nil { goto Cleanup } fmt.Printf("host: %+v, master: %+v\n", instance.Key, instance.MasterKey) // Investigate slaves: for _, slaveKey := range instance.SlaveHosts.GetInstanceKeys() { discoveryInstanceKeys <- slaveKey } // Investigate master: discoveryInstanceKeys <- instance.MasterKey Cleanup: } // Start discovery begins a one time asynchronuous discovery process for the given // instance and all of its topology connected instances. // That is, the instance will be investigated for master and slaves, and the routines will follow on // each and every such found master/slave. // In essense, assuming all slaves in a replication topology are running, and given a single instance // in such topology, this function will detect the entire topology. func StartDiscovery(instanceKey inst.InstanceKey) { log.Infof("Starting discovery at %+v", instanceKey) pendingTokens := make(chan bool, maxConcurrency) completedTokens := make(chan bool, maxConcurrency) AccountedDiscoverInstance(instanceKey, pendingTokens, completedTokens) go handleDiscoveryRequests(pendingTokens, completedTokens) // Block until all are complete for { select { case <-pendingTokens: <-completedTokens default: inst.AuditOperation("start-discovery", &instanceKey, "") return } } } // ContinuousDiscovery starts an asynchronuous infinite discovery process where instances are // periodically investigated and their status captured, and long since unseen instances are // purged and forgotten. func ContinuousDiscovery() { log.Infof("Starting continuous discovery") go handleDiscoveryRequests(nil, nil) tick := time.Tick(time.Duration(config.Config.DiscoveryPollSeconds) * time.Second) forgetUnseenTick := time.Tick(time.Hour) for _ = range tick { instanceKeys, _ := inst.ReadOutdatedInstanceKeys() log.Debugf("outdated keys: %+v", instanceKeys) for _, instanceKey := range instanceKeys { discoveryInstanceKeys <- instanceKey } // See if we should also forget instances (lower frequency) select { case <-forgetUnseenTick: inst.ForgetLongUnseenInstances() default: } } }
// WriteInstance stores an instance in the orchestrator backend func WriteInstance(instance *Instance, lastError error) error { db, err := db.OpenOrchestrator() if err != nil { return log.Errore(err) } _, err = sqlutils.Exec(db, ` replace into database_instance ( hostname, port, last_checked, server_id, version, binlog_format, log_bin, log_slave_updates, binary_log_file, binary_log_pos, master_host, master_port, slave_sql_running, slave_io_running, master_log_file, read_master_log_pos, relay_master_log_file, exec_master_log_pos, seconds_behind_master, slave_lag_seconds, num_slave_hosts, slave_hosts, cluster_name ) values (?, ?, NOW(), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, instance.Key.Hostname, instance.Key.Port, instance.ServerID, instance.Version, instance.Binlog_format, instance.LogBinEnabled, instance.LogSlaveUpdatesEnabled, instance.SelfBinlogCoordinates.LogFile, instance.SelfBinlogCoordinates.LogPos, instance.MasterKey.Hostname, instance.MasterKey.Port, instance.Slave_SQL_Running, instance.Slave_IO_Running, instance.ReadBinlogCoordinates.LogFile, instance.ReadBinlogCoordinates.LogPos, instance.ExecBinlogCoordinates.LogFile, instance.ExecBinlogCoordinates.LogPos, instance.SecondsBehindMaster, instance.SlaveLagSeconds, len(instance.SlaveHosts), instance.GetSlaveHostsAsJson(), instance.ClusterName, ) if err != nil { return log.Errore(err) } if lastError == nil { sqlutils.Exec(db, ` update database_instance set last_seen = NOW() where hostname=? and port=? `, instance.Key.Hostname, instance.Key.Port, ) } else { log.Debugf("WriteInstance: will not update database_instance due to error: %+v", lastError) } return nil }