Ejemplo n.º 1
0
// AsciiTopology returns a string representation of the topology of given clusterName.
func AsciiTopology(clusterName string) (string, error) {
	instances, err := ReadClusterInstances(clusterName)
	if err != nil {
		return "", err
	}

	instancesMap := make(map[InstanceKey](*Instance))
	for _, instance := range instances {
		log.Debugf("instanceKey: %+v", instance.Key)
		instancesMap[instance.Key] = instance
	}

	replicationMap := make(map[*Instance]([]*Instance))
	var masterInstance *Instance
	// Investigate slaves:
	for _, instance := range instances {
		master, ok := instancesMap[instance.MasterKey]
		if ok {
			if _, ok := replicationMap[master]; !ok {
				replicationMap[master] = [](*Instance){}
			}
			replicationMap[master] = append(replicationMap[master], instance)
		} else {
			masterInstance = instance
		}
	}
	resultArray := getAsciiTopologyEntry(0, masterInstance, replicationMap)
	result := strings.Join(resultArray, "\n")
	return result, nil
}
Ejemplo n.º 2
0
func executeAgentCommand(hostname string, command string, onResponse *func([]byte)) (Agent, error) {
	agent, token, err := readAgentBasicInfo(hostname)
	if err != nil {
		return agent, err
	}

	// All seems to be in order. Now make some inquiries from orchestrator-agent service:
	uri := baseAgentUri(agent.Hostname, agent.Port)

	var fullCommand string
	if strings.Contains(command, "?") {
		fullCommand = fmt.Sprintf("%s&token=%s", command, token)
	} else {
		fullCommand = fmt.Sprintf("%s?token=%s", command, token)
	}
	log.Debugf("orchestrator-agent command: %s", fullCommand)
	agentCommandUri := fmt.Sprintf("%s/%s", uri, fullCommand)
	body, err := readResponse(http.Get(agentCommandUri))
	if err != nil {
		return agent, log.Errore(err)
	}
	if onResponse != nil {
		(*onResponse)(body)
	}

	return agent, err
}
Ejemplo n.º 3
0
// GetAgent gets a single agent status from the agent service
func GetAgent(hostname string) (Agent, error) {
	agent, token, err := readAgentBasicInfo(hostname)
	if err != nil {
		return agent, log.Errore(err)
	}

	// All seems to be in order. Now make some inquiries from orchestrator-agent service:
	{
		uri := baseAgentUri(agent.Hostname, agent.Port)
		log.Debugf("orchestrator-agent uri: %s", uri)

		{
			availableLocalSnapshotsUri := fmt.Sprintf("%s/available-snapshots-local?token=%s", uri, token)
			body, err := readResponse(http.Get(availableLocalSnapshotsUri))
			if err == nil {
				err = json.Unmarshal(body, &agent.AvailableLocalSnapshots)
			}
			if err != nil {
				log.Errore(err)
			}
		}
		{
			availableSnapshotsUri := fmt.Sprintf("%s/available-snapshots?token=%s", uri, token)
			body, err := readResponse(http.Get(availableSnapshotsUri))
			if err == nil {
				err = json.Unmarshal(body, &agent.AvailableSnapshots)
			}
			if err != nil {
				log.Errore(err)
			}
		}
		{
			lvSnapshotsUri := fmt.Sprintf("%s/lvs-snapshots?token=%s", uri, token)
			body, err := readResponse(http.Get(lvSnapshotsUri))
			if err == nil {
				err = json.Unmarshal(body, &agent.LogicalVolumes)
			}
			if err != nil {
				log.Errore(err)
			}
		}
		{
			mountUri := fmt.Sprintf("%s/mount?token=%s", uri, token)
			body, err := readResponse(http.Get(mountUri))
			if err == nil {
				err = json.Unmarshal(body, &agent.MountPoint)
			}
			if err != nil {
				log.Errore(err)
			}
		}
		{
			mySQLRunningUri := fmt.Sprintf("%s/mysql-status?token=%s", uri, token)
			body, err := readResponse(http.Get(mySQLRunningUri))
			if err == nil {
				err = json.Unmarshal(body, &agent.MySQLRunning)
			}
			// Actually an error is OK here since "status" returns with non-zero exit code when MySQL not running
		}
		{
			mySQLRunningUri := fmt.Sprintf("%s/mysql-port?token=%s", uri, token)
			body, err := readResponse(http.Get(mySQLRunningUri))
			if err == nil {
				err = json.Unmarshal(body, &agent.MySQLPort)
			}
			if err != nil {
				log.Errore(err)
			}
		}
		{
			mySQLDiskUsageUri := fmt.Sprintf("%s/mysql-du?token=%s", uri, token)
			body, err := readResponse(http.Get(mySQLDiskUsageUri))
			if err == nil {
				err = json.Unmarshal(body, &agent.MySQLDiskUsage)
			}
			if err != nil {
				log.Errore(err)
			}
		}
	}
	return agent, err
}
Ejemplo n.º 4
0
func baseAgentUri(agentHostname string, agentPort int) string {
	uri := fmt.Sprintf("http://%s:%d/api", agentHostname, agentPort)
	log.Debugf("orchestrator-agent uri: %s", uri)
	return uri
}
Ejemplo n.º 5
0
// DiscoverInstance will attempt discovering an instance (unless it is already up to date) and will
// list down its master and slaves (if any) for further discovery.
func DiscoverInstance(instanceKey inst.InstanceKey) {
	instanceKey.Formalize()
	if !instanceKey.IsValid() {
		return
	}

	instance, found, err := inst.ReadInstance(&instanceKey)

	if found && instance.IsUpToDate && instance.IsLastCheckValid {
		// we've already discovered this one. Skip!
		goto Cleanup
	}
	// First we've ever heard of this instance. Continue investigation:
	instance, err = inst.ReadTopologyInstance(&instanceKey)
	// panic can occur (IO stuff). Therefore it may happen
	// that instance is nil. Check it.
	if err != nil || instance == nil {
		goto Cleanup
	}

	fmt.Printf("host: %+v, master: %+v\n", instance.Key, instance.MasterKey)

	// Investigate slaves:
	for _, slaveKey := range instance.SlaveHosts.GetInstanceKeys() {
		discoveryInstanceKeys <- slaveKey
	}
	// Investigate master:
	discoveryInstanceKeys <- instance.MasterKey

Cleanup:
}

// Start discovery begins a one time asynchronuous discovery process for the given
// instance and all of its topology connected instances.
// That is, the instance will be investigated for master and slaves, and the routines will follow on
// each and every such found master/slave.
// In essense, assuming all slaves in a replication topology are running, and given a single instance
// in such topology, this function will detect the entire topology.
func StartDiscovery(instanceKey inst.InstanceKey) {
	log.Infof("Starting discovery at %+v", instanceKey)
	pendingTokens := make(chan bool, maxConcurrency)
	completedTokens := make(chan bool, maxConcurrency)

	AccountedDiscoverInstance(instanceKey, pendingTokens, completedTokens)
	go handleDiscoveryRequests(pendingTokens, completedTokens)

	// Block until all are complete
	for {
		select {
		case <-pendingTokens:
			<-completedTokens
		default:
			inst.AuditOperation("start-discovery", &instanceKey, "")
			return
		}
	}
}

// ContinuousDiscovery starts an asynchronuous infinite discovery process where instances are
// periodically investigated and their status captured, and long since unseen instances are
// purged and forgotten.
func ContinuousDiscovery() {
	log.Infof("Starting continuous discovery")
	go handleDiscoveryRequests(nil, nil)
	tick := time.Tick(time.Duration(config.Config.DiscoveryPollSeconds) * time.Second)
	forgetUnseenTick := time.Tick(time.Hour)
	for _ = range tick {
		instanceKeys, _ := inst.ReadOutdatedInstanceKeys()
		log.Debugf("outdated keys: %+v", instanceKeys)
		for _, instanceKey := range instanceKeys {
			discoveryInstanceKeys <- instanceKey
		}
		// See if we should also forget instances (lower frequency)
		select {
		case <-forgetUnseenTick:
			inst.ForgetLongUnseenInstances()
		default:
		}
	}
}
Ejemplo n.º 6
0
// WriteInstance stores an instance in the orchestrator backend
func WriteInstance(instance *Instance, lastError error) error {
	db, err := db.OpenOrchestrator()
	if err != nil {
		return log.Errore(err)
	}

	_, err = sqlutils.Exec(db, `
        	replace into database_instance (
        		hostname,
        		port,
        		last_checked,
        		server_id,
				version,
				binlog_format,
				log_bin,
				log_slave_updates,
				binary_log_file,
				binary_log_pos,
				master_host,
				master_port,
				slave_sql_running,
				slave_io_running,
				master_log_file,
				read_master_log_pos,
				relay_master_log_file,
				exec_master_log_pos,
				seconds_behind_master,
				slave_lag_seconds,
				num_slave_hosts,
				slave_hosts,
				cluster_name
			) values (?, ?, NOW(), ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
		instance.Key.Hostname,
		instance.Key.Port,
		instance.ServerID,
		instance.Version,
		instance.Binlog_format,
		instance.LogBinEnabled,
		instance.LogSlaveUpdatesEnabled,
		instance.SelfBinlogCoordinates.LogFile,
		instance.SelfBinlogCoordinates.LogPos,
		instance.MasterKey.Hostname,
		instance.MasterKey.Port,
		instance.Slave_SQL_Running,
		instance.Slave_IO_Running,
		instance.ReadBinlogCoordinates.LogFile,
		instance.ReadBinlogCoordinates.LogPos,
		instance.ExecBinlogCoordinates.LogFile,
		instance.ExecBinlogCoordinates.LogPos,
		instance.SecondsBehindMaster,
		instance.SlaveLagSeconds,
		len(instance.SlaveHosts),
		instance.GetSlaveHostsAsJson(),
		instance.ClusterName,
	)
	if err != nil {
		return log.Errore(err)
	}

	if lastError == nil {
		sqlutils.Exec(db, `
        	update database_instance set last_seen = NOW() where hostname=? and port=?
        	`, instance.Key.Hostname, instance.Key.Port,
		)
	} else {
		log.Debugf("WriteInstance: will not update database_instance due to error: %+v", lastError)
	}
	return nil
}