// GetCandidateSiblingOfIntermediateMaster chooses the best sibling of a dead intermediate master // to whom the IM's slaves can be moved. func GetCandidateSiblingOfIntermediateMaster(intermediateMasterInstance *inst.Instance) (*inst.Instance, error) { siblings, err := inst.ReadSlaveInstances(&intermediateMasterInstance.MasterKey) if err != nil { return nil, err } if len(siblings) <= 1 { return nil, log.Errorf("topology_recovery: no siblings found for %+v", intermediateMasterInstance.Key) } sort.Sort(sort.Reverse(InstancesByCountSlaves(siblings))) // In the next series of steps we attempt to return a good replacement. // None of the below attempts is sure to pick a winning server. Perhaps picked server is not enough up-todate -- but // this has small likelihood in the general case, and, well, it's an attempt. It's a Plan A, but we have Plan B & C if this fails. // At first, we try to return an "is_candidate" server in same dc & env log.Infof("topology_recovery: searching for the best candidate sibling of dead intermediate master") for _, sibling := range siblings { sibling := sibling if isValidAsCandidateSiblingOfIntermediateMaster(intermediateMasterInstance, sibling) && sibling.IsCandidate && sibling.DataCenter == intermediateMasterInstance.DataCenter && sibling.PhysicalEnvironment == intermediateMasterInstance.PhysicalEnvironment { log.Infof("topology_recovery: found %+v as the ideal candidate", sibling.Key) return sibling, nil } } // Go for something else in the same DC & ENV for _, sibling := range siblings { sibling := sibling if isValidAsCandidateSiblingOfIntermediateMaster(intermediateMasterInstance, sibling) && sibling.DataCenter == intermediateMasterInstance.DataCenter && sibling.PhysicalEnvironment == intermediateMasterInstance.PhysicalEnvironment { log.Infof("topology_recovery: found %+v as a replacement in same dc & environment", sibling.Key) return sibling, nil } } // Nothing in same DC & env, let's just go for some is_candidate for _, sibling := range siblings { sibling := sibling if isValidAsCandidateSiblingOfIntermediateMaster(intermediateMasterInstance, sibling) && sibling.IsCandidate { log.Infof("topology_recovery: found %+v as a good candidate", sibling.Key) return sibling, nil } } // Havent found an "is_candidate". Just whatever is valid. for _, sibling := range siblings { sibling := sibling if isValidAsCandidateSiblingOfIntermediateMaster(intermediateMasterInstance, sibling) { log.Infof("topology_recovery: found %+v as a replacement", sibling.Key) return sibling, nil } } return nil, log.Errorf("topology_recovery: cannot find candidate sibling of %+v", intermediateMasterInstance.Key) }
// readAgentBasicInfo returns the basic data for an agent directly from backend table (no agent access) func readAgentBasicInfo(hostname string) (Agent, string, error) { agent := Agent{} token := "" query := ` select hostname, port, token, last_submitted, mysql_port from host_agent where hostname = ? ` err := db.QueryOrchestrator(query, sqlutils.Args(hostname), func(m sqlutils.RowMap) error { agent.Hostname = m.GetString("hostname") agent.Port = m.GetInt("port") agent.LastSubmitted = m.GetString("last_submitted") agent.MySQLPort = m.GetInt64("mysql_port") token = m.GetString("token") return nil }) if err != nil { return agent, "", err } if token == "" { return agent, "", log.Errorf("Cannot get agent/token: %s", hostname) } return agent, token, nil }
// SkipQuery skip a single query in a failed replication instance func SkipQuery(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { return instance, log.Errore(err) } if !instance.IsSlave() { return instance, fmt.Errorf("instance is not a slave: %+v", instanceKey) } if instance.Slave_SQL_Running { return instance, fmt.Errorf("Slave SQL thread is running on %+v", instanceKey) } if instance.LastSQLError == "" { return instance, fmt.Errorf("No SQL error on %+v", instanceKey) } if *config.RuntimeCLIFlags.Noop { return instance, fmt.Errorf("noop: aborting skip-query operation on %+v; signalling error but nothing went wrong.", *instanceKey) } log.Debugf("Skipping one query on %+v", instanceKey) if instance.UsingOracleGTID { err = skipQueryOracleGtid(instance) } else if instance.UsingMariaDBGTID { return instance, log.Errorf("%+v is replicating with MariaDB GTID. To skip a query first disable GTID, then skip, then enable GTID again", *instanceKey) } else { err = skipQueryClassic(instance) } if err != nil { return instance, log.Errore(err) } AuditOperation("skip-query", instanceKey, "Skipped one query") return StartSlave(instanceKey) }
// If a mysql port is available, try to discover against it func DiscoverAgentInstance(hostname string, port int) error { agent, err := GetAgent(hostname) if err != nil { log.Errorf("Couldn't get agent for %s: %v", hostname, err) return err } instanceKey := agent.GetInstance() instance, err := inst.ReadTopologyInstance(instanceKey) if err != nil { log.Errorf("Failed to read topology for %v", instanceKey) return err } log.Infof("Discovered Agent Instance: %v", instance.Key) return nil }
func InitGraphiteMetrics() error { if config.Config.GraphiteAddr == "" { return nil } if config.Config.GraphitePath == "" { return log.Errorf("No graphite path provided (see GraphitePath config variable). Will not log to graphite") } addr, err := net.ResolveTCPAddr("tcp", config.Config.GraphiteAddr) if err != nil { return log.Errore(err) } graphitePathHostname := process.ThisHostname if config.Config.GraphiteConvertHostnameDotsToUnderscores { graphitePathHostname = strings.Replace(graphitePathHostname, ".", "_", -1) } graphitePath := config.Config.GraphitePath graphitePath = strings.Replace(graphitePath, "{hostname}", graphitePathHostname, -1) log.Debugf("Will log to graphite on %+v, %+v", config.Config.GraphiteAddr, graphitePath) go func() { go graphite.Graphite(metrics.DefaultRegistry, 1*time.Minute, graphitePath, addr) for range graphiteCallbackTick { for _, f := range graphiteTickCallbacks { go f() } } }() return nil }
func UnresolveHostname(instanceKey *InstanceKey) (InstanceKey, bool, error) { if *config.RuntimeCLIFlags.SkipUnresolve { return *instanceKey, false, nil } unresolvedHostname, err := readUnresolvedHostname(instanceKey.Hostname) if err != nil { return *instanceKey, false, log.Errore(err) } if unresolvedHostname == instanceKey.Hostname { // unchanged. Nothing to do return *instanceKey, false, nil } // We unresovled to a different hostname. We will now re-resolve to double-check! unresolvedKey := &InstanceKey{Hostname: unresolvedHostname, Port: instanceKey.Port} instance, err := ReadTopologyInstance(unresolvedKey) if err != nil { return *instanceKey, false, log.Errore(err) } if instance.IsBinlogServer() && config.Config.SkipBinlogServerUnresolveCheck { // Do nothing. Everything is assumed to be fine. } else if instance.Key.Hostname != instanceKey.Hostname { // Resolve(Unresolve(hostname)) != hostname ==> Bad; reject if *config.RuntimeCLIFlags.SkipUnresolveCheck { return *instanceKey, false, nil } return *instanceKey, false, log.Errorf("Error unresolving; hostname=%s, unresolved=%s, re-resolved=%s; mismatch. Skip/ignore with --skip-unresolve-check", instanceKey.Hostname, unresolvedKey.Hostname, instance.Key.Hostname) } return *unresolvedKey, true, nil }
// Read (as much as possible of) a chunk of binary log events starting the given startingCoordinates func readBinlogEventsChunk(instanceKey *InstanceKey, startingCoordinates BinlogCoordinates) ([]BinlogEvent, error) { events := []BinlogEvent{} db, err := db.OpenTopology(instanceKey.Hostname, instanceKey.Port) if err != nil { return events, err } commandToken := math.TernaryString(startingCoordinates.Type == BinaryLog, "binlog", "relaylog") if startingCoordinates.LogFile == "" { return events, log.Errorf("readBinlogEventsChunk: empty binlog file name for %+v.", *instanceKey) } query := fmt.Sprintf("show %s events in '%s' FROM %d LIMIT %d", commandToken, startingCoordinates.LogFile, startingCoordinates.LogPos, config.Config.BinlogEventsChunkSize) err = sqlutils.QueryRowsMap(db, query, func(m sqlutils.RowMap) error { binlogEvent := BinlogEvent{} binlogEvent.Coordinates.LogFile = m.GetString("Log_name") binlogEvent.Coordinates.LogPos = m.GetInt64("Pos") binlogEvent.Coordinates.Type = startingCoordinates.Type binlogEvent.NextEventPos = m.GetInt64("End_log_pos") binlogEvent.EventType = m.GetString("Event_type") binlogEvent.Info = m.GetString("Info") events = append(events, binlogEvent) return nil }) return events, err }
// Try and find the last position of a pseudo GTID query entry in the given binary log. // Also return the full text of that entry. // maxCoordinates is the position beyond which we should not read. This is relevant when reading relay logs; in particular, // the last relay log. We must be careful not to scan for Pseudo-GTID entries past the position executed by the SQL thread. // maxCoordinates == nil means no limit. func getLastPseudoGTIDEntryInBinlog(instanceKey *InstanceKey, binlog string, binlogType BinlogType, maxCoordinates *BinlogCoordinates) (*BinlogCoordinates, string, error) { if binlog == "" { return nil, "", log.Errorf("getLastPseudoGTIDEntryInBinlog: empty binlog file name for %+v. maxCoordinates = %+v", *instanceKey, maxCoordinates) } binlogCoordinates := BinlogCoordinates{LogFile: binlog, LogPos: 0, Type: binlogType} db, err := db.OpenTopology(instanceKey.Hostname, instanceKey.Port) if err != nil { return nil, "", err } moreRowsExpected := true var nextPos int64 = 0 step := 0 entryText := "" for moreRowsExpected { query := "" if binlogCoordinates.Type == BinaryLog { query = fmt.Sprintf("show binlog events in '%s' FROM %d LIMIT %d", binlog, nextPos, config.Config.BinlogEventsChunkSize) } else { query = fmt.Sprintf("show relaylog events in '%s' LIMIT %d,%d", binlog, (step * config.Config.BinlogEventsChunkSize), config.Config.BinlogEventsChunkSize) } moreRowsExpected = false queryRowsFunc := sqlutils.QueryRowsMap if config.Config.BufferBinlogEvents { queryRowsFunc = sqlutils.QueryRowsMapBuffered } err = queryRowsFunc(db, query, func(m sqlutils.RowMap) error { moreRowsExpected = true nextPos = m.GetInt64("End_log_pos") binlogEntryInfo := m.GetString("Info") if matched, _ := regexp.MatchString(config.Config.PseudoGTIDPattern, binlogEntryInfo); matched { if maxCoordinates != nil && maxCoordinates.SmallerThan(&BinlogCoordinates{LogFile: binlog, LogPos: m.GetInt64("Pos")}) { // past the limitation moreRowsExpected = false return nil } binlogCoordinates.LogPos = m.GetInt64("Pos") entryText = binlogEntryInfo // Found a match. But we keep searching: we're interested in the LAST entry, and, alas, // we can only search in ASCENDING order... } return nil }) if err != nil { return nil, "", err } step++ } // Not found? return nil. an error is reserved to SQL problems. if binlogCoordinates.LogPos == 0 { return nil, "", nil } return &binlogCoordinates, entryText, err }
// SearchEntryInInstanceBinlogs will search for a specific text entry within the binary logs of a given instance. func SearchEntryInInstanceBinlogs(instance *Instance, entryText string, monotonicPseudoGTIDEntries bool) (*BinlogCoordinates, error) { cacheKey := getInstanceBinlogEntryKey(instance, entryText) coords, found := instanceBinlogEntryCache.Get(cacheKey) if found { // This is wonderful. We can skip the tedious GTID search in the binary log log.Debugf("Found instance Pseudo GTID entry coordinates in cache: %+v, %+v, %+v", instance.Key, entryText, coords) return coords.(*BinlogCoordinates), nil } // Look for GTID entry in given instance: log.Debugf("Searching for given pseudo gtid entry in %+v. monotonicPseudoGTIDEntries=%+v", instance.Key, monotonicPseudoGTIDEntries) currentBinlog := instance.SelfBinlogCoordinates var err error = nil for { log.Debugf("Searching for given pseudo gtid entry in binlog %+v of %+v", currentBinlog.LogFile, instance.Key) // loop iteration per binary log. This might turn to be a heavyweight operation. We wish to throttle the operation such that // the instance does not suffer. If it is a slave, we will only act as long as it's not lagging too much. if instance.SlaveRunning() { for { log.Debugf("%+v is a replicating slave. Verifying lag", instance.Key) instance, err = ReadTopologyInstance(&instance.Key) if err != nil { break } if instance.HasReasonableMaintenanceReplicationLag() { // is good to go! break } log.Debugf("lag is too high on %+v. Throttling the search for pseudo gtid entry", instance.Key) time.Sleep(time.Duration(config.Config.ReasonableMaintenanceReplicationLagSeconds) * time.Second) } } var resultCoordinates BinlogCoordinates var found bool = false resultCoordinates, found, err = SearchEntryInBinlog(&instance.Key, currentBinlog.LogFile, entryText, monotonicPseudoGTIDEntries) if err != nil { break } if found { log.Debugf("Matched entry in %+v: %+v", instance.Key, resultCoordinates) instanceBinlogEntryCache.Set(cacheKey, &resultCoordinates, 0) return &resultCoordinates, nil } // Got here? Unfound. Keep looking currentBinlog, err = currentBinlog.PreviousFileCoordinates() if err != nil { break } log.Debugf("- Will move next to binlog %+v", currentBinlog.LogFile) } return nil, log.Errorf("Cannot match pseudo GTID entry in binlogs of %+v; err: %+v", instance.Key, err) }
// FlushBinaryLogsTo attempts to 'FLUSH BINARY LOGS' until given binary log is reached func FlushBinaryLogsTo(instanceKey *InstanceKey, logFile string) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { return instance, log.Errore(err) } distance := instance.SelfBinlogCoordinates.FileNumberDistance(&BinlogCoordinates{LogFile: logFile}) if distance < 0 { return nil, log.Errorf("FlushBinaryLogsTo: target log file %+v is smaller than current log file %+v", logFile, instance.SelfBinlogCoordinates.LogFile) } return FlushBinaryLogs(instanceKey, distance) }
func FlushNontrivialResolveCacheToDatabase() error { if !HostnameResolveMethodIsNone() { return log.Errorf("FlushNontrivialResolveCacheToDatabase() called, but HostnameResolveMethod is %+v", config.Config.HostnameResolveMethod) } items, _ := HostnameResolveCache() for hostname := range items { resolvedHostname, found := hostnameResolvesLightweightCache.Get(hostname) if found && (resolvedHostname.(string) != hostname) { WriteResolvedHostname(hostname, resolvedHostname.(string)) } } return nil }
// StopSlaveNicely stops a slave such that SQL_thread and IO_thread are aligned (i.e. // SQL_thread consumes all relay log entries) // It will actually START the sql_thread even if the slave is completely stopped. func StopSlaveNicely(instanceKey *InstanceKey, timeout time.Duration) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) if err != nil { return instance, log.Errore(err) } if !instance.IsSlave() { return instance, fmt.Errorf("instance is not a slave: %+v", instanceKey) } _, err = ExecInstanceNoPrepare(instanceKey, `stop slave io_thread`) _, err = ExecInstanceNoPrepare(instanceKey, `start slave sql_thread`) if instance.SQLDelay == 0 { // Otherwise we don't bother. startTime := time.Now() for upToDate := false; !upToDate; { if timeout > 0 && time.Since(startTime) >= timeout { // timeout return nil, log.Errorf("StopSlaveNicely timeout on %+v", *instanceKey) } instance, err = ReadTopologyInstance(instanceKey) if err != nil { return instance, log.Errore(err) } if instance.SQLThreadUpToDate() { upToDate = true } else { time.Sleep(sqlThreadPollDuration) } } } _, err = ExecInstanceNoPrepare(instanceKey, `stop slave`) if err != nil { // Patch; current MaxScale behavior for STOP SLAVE is to throw an error if slave already stopped. if instance.isMaxScale() && err.Error() == "Error 1199: Slave connection is not running" { err = nil } } if err != nil { return instance, log.Errore(err) } instance, err = ReadTopologyInstance(instanceKey) log.Infof("Stopped slave nicely on %+v, Self:%+v, Exec:%+v", *instanceKey, instance.SelfBinlogCoordinates, instance.ExecBinlogCoordinates) return instance, err }
// Seed is the entry point for making a seed func Seed(targetHostname string, sourceHostname string) (int64, error) { if targetHostname == sourceHostname { return 0, log.Errorf("Cannot seed %s onto itself", targetHostname) } seedId, err := SubmitSeedEntry(targetHostname, sourceHostname) if err != nil { return 0, log.Errore(err) } go func() { err := executeSeed(seedId, targetHostname, sourceHostname) updateSeedComplete(seedId, err) }() return seedId, nil }
// executeProcesses executes a list of processes func executeProcesses(processes []string, description string, topologyRecovery *TopologyRecovery, failOnError bool) error { var err error for _, command := range processes { command := replaceCommandPlaceholders(command, topologyRecovery) if cmdErr := os.CommandRun(command); cmdErr == nil { log.Infof("Executed %s command: %s", description, command) } else { if err == nil { // Note first error err = cmdErr } log.Errorf("Failed to execute %s command: %s", description, command) if failOnError { return err } } } return err }
func getLastPseudoGTIDEntryInInstance(instance *Instance, maxBinlogCoordinates *BinlogCoordinates, exhaustiveSearch bool) (*BinlogCoordinates, string, error) { // Look for last GTID in instance: currentBinlog := instance.SelfBinlogCoordinates var err error = nil for err == nil { log.Debugf("Searching for latest pseudo gtid entry in binlog %+v of %+v", currentBinlog.LogFile, instance.Key) resultCoordinates, entryInfo, err := getLastPseudoGTIDEntryInBinlog(&instance.Key, currentBinlog.LogFile, BinaryLog, maxBinlogCoordinates) if err != nil { return nil, "", err } if resultCoordinates != nil { log.Debugf("Found pseudo gtid entry in %+v, %+v", instance.Key, resultCoordinates) return resultCoordinates, entryInfo, err } if !exhaustiveSearch { break } currentBinlog, err = currentBinlog.PreviousFileCoordinates() } return nil, "", log.Errorf("Cannot find pseudo GTID entry in binlogs of %+v", instance.Key) }
func getLastPseudoGTIDEntryInRelayLogs(instance *Instance, recordedInstanceRelayLogCoordinates BinlogCoordinates, exhaustiveSearch bool) (*BinlogCoordinates, string, error) { // Look for last GTID in relay logs: // Since MySQL does not provide with a SHOW RELAY LOGS command, we heuristically srtart from current // relay log (indiciated by Relay_log_file) and walk backwards. // Eventually we will hit a relay log name which does not exist. currentRelayLog := recordedInstanceRelayLogCoordinates var err error = nil for err == nil { log.Debugf("Searching for latest pseudo gtid entry in relaylog %+v of %+v, up to pos %+v", currentRelayLog.LogFile, instance.Key, recordedInstanceRelayLogCoordinates) if resultCoordinates, entryInfo, err := getLastPseudoGTIDEntryInBinlog(&instance.Key, currentRelayLog.LogFile, RelayLog, &recordedInstanceRelayLogCoordinates); err != nil { return nil, "", err } else if resultCoordinates != nil { log.Debugf("Found pseudo gtid entry in %+v, %+v", instance.Key, resultCoordinates) return resultCoordinates, entryInfo, err } if !exhaustiveSearch { break } currentRelayLog, err = currentRelayLog.PreviousFileCoordinates() } return nil, "", log.Errorf("Cannot find pseudo GTID entry in relay logs of %+v", instance.Key) }
// AttemptFailureDetectionRegistration tries to add a failure-detection entry; if this fails that means the problem has already been detected func WriteAsyncRequest(asyncRequest *AsyncRequest) error { if asyncRequest.OperatedInstanceKey == nil { return log.Errorf("WriteAsyncRequest received asyncRequest.OperatedInstanceKey for command %+v", asyncRequest.Command) } destinationKey := asyncRequest.DestinationKey if destinationKey == nil { destinationKey = &inst.InstanceKey{} } writeFunc := func() error { _, err := db.ExecOrchestrator(` insert into async_request ( command, hostname, port, destination_hostname, destination_port, pattern, gtid_hint, story, begin_timestamp, end_timestamp ) values ( ?, ?, ?, ?, ?, ?, ?, ?, NULL, NULL ) `, asyncRequest.Command, asyncRequest.OperatedInstanceKey.Hostname, asyncRequest.OperatedInstanceKey.Port, destinationKey.Hostname, destinationKey.Port, asyncRequest.Pattern, string(asyncRequest.GTIDHint), asyncRequest.Story, ) return log.Errore(err) } return inst.ExecDBWriteFunc(writeFunc) }
// RecoverDeadCoMaster recovers a dead co-master, complete logic inside func RecoverDeadCoMaster(topologyRecovery *TopologyRecovery, skipProcesses bool) (promotedSlave *inst.Instance, lostSlaves [](*inst.Instance), err error) { analysisEntry := &topologyRecovery.AnalysisEntry failedInstanceKey := &analysisEntry.AnalyzedInstanceKey otherCoMasterKey := &analysisEntry.AnalyzedInstanceMasterKey otherCoMaster, found, _ := inst.ReadInstance(otherCoMasterKey) if otherCoMaster == nil || !found { return nil, lostSlaves, topologyRecovery.AddError(log.Errorf("RecoverDeadCoMaster: could not read info for co-master %+v of %+v", *otherCoMasterKey, *failedInstanceKey)) } inst.AuditOperation("recover-dead-co-master", failedInstanceKey, "problem found; will recover") if !skipProcesses { if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", topologyRecovery, true); err != nil { return nil, lostSlaves, topologyRecovery.AddError(err) } } log.Debugf("topology_recovery: RecoverDeadCoMaster: will recover %+v", *failedInstanceKey) var coMasterRecoveryType MasterRecoveryType = MasterRecoveryPseudoGTID if analysisEntry.OracleGTIDImmediateTopology || analysisEntry.MariaDBGTIDImmediateTopology { coMasterRecoveryType = MasterRecoveryGTID } log.Debugf("topology_recovery: RecoverDeadCoMaster: coMasterRecoveryType=%+v", coMasterRecoveryType) switch coMasterRecoveryType { case MasterRecoveryGTID: { lostSlaves, _, promotedSlave, err = inst.RegroupSlavesGTID(failedInstanceKey, true, nil) } case MasterRecoveryPseudoGTID: { lostSlaves, _, _, promotedSlave, err = inst.RegroupSlavesPseudoGTIDIncludingSubSlavesOfBinlogServers(failedInstanceKey, true, nil, &topologyRecovery.PostponedFunctionsContainer) } } topologyRecovery.AddError(err) mustPromoteOtherCoMaster := config.Config.CoMasterRecoveryMustPromoteOtherCoMaster if !otherCoMaster.ReadOnly { log.Debugf("topology_recovery: RecoverDeadCoMaster: other co-master %+v is writeable hence has to be promoted", otherCoMaster.Key) mustPromoteOtherCoMaster = true } log.Debugf("topology_recovery: RecoverDeadCoMaster: mustPromoteOtherCoMaster? %+v", mustPromoteOtherCoMaster) if promotedSlave != nil { topologyRecovery.ParticipatingInstanceKeys.AddKey(promotedSlave.Key) if mustPromoteOtherCoMaster { log.Debugf("topology_recovery: mustPromoteOtherCoMaster. Verifying that %+v is/can be promoted", *otherCoMasterKey) promotedSlave, err = replacePromotedSlaveWithCandidate(failedInstanceKey, promotedSlave, otherCoMasterKey) } else { // We are allowed to promote any server promotedSlave, err = replacePromotedSlaveWithCandidate(failedInstanceKey, promotedSlave, nil) if promotedSlave.DataCenter == otherCoMaster.DataCenter && promotedSlave.PhysicalEnvironment == otherCoMaster.PhysicalEnvironment && false { // and _still_ we prefer to promote the co-master! They're in same env & DC so no worries about geo issues! promotedSlave, err = replacePromotedSlaveWithCandidate(failedInstanceKey, promotedSlave, otherCoMasterKey) } } topologyRecovery.AddError(err) } if promotedSlave != nil { if mustPromoteOtherCoMaster && !promotedSlave.Key.Equals(otherCoMasterKey) { topologyRecovery.AddError(log.Errorf("RecoverDeadCoMaster: could not manage to promote other-co-master %+v; was only able to promote %+v; CoMasterRecoveryMustPromoteOtherCoMaster is true, therefore failing", *otherCoMasterKey, promotedSlave.Key)) promotedSlave = nil } } if promotedSlave != nil { topologyRecovery.ParticipatingInstanceKeys.AddKey(promotedSlave.Key) } // OK, we may have someone promoted. Either this was the other co-master or another slave. // Noting down that we DO NOT attempt to set a new co-master topology. We are good with remaining with a single master. // I tried solving the "let's promote a slave and create a new co-master setup" but this turns so complex due to various factors. // I see this as risky and not worth the questionable benefit. // Maybe future me is a smarter person and finds a simple solution. Unlikely. I'm getting dumber. // // ... // Now that we're convinved, take a look at what we can be left with: // Say we started with M1<->M2<-S1, with M2 failing, and we promoted S1. // We now have M1->S1 (because S1 is promoted), S1->M2 (because that's what it remembers), M2->M1 (because that's what it remembers) // !! This is an evil 3-node circle that must be broken. // config.Config.ApplyMySQLPromotionAfterMasterFailover, if true, will cause it to break, because we would RESET SLAVE on S1 // but we want to make sure the circle is broken no matter what. // So in the case we promoted not-the-other-co-master, we issue a detach-slave-master-host, which is a reversible operation if promotedSlave != nil && !promotedSlave.Key.Equals(otherCoMasterKey) { _, err = inst.DetachSlaveMasterHost(&promotedSlave.Key) topologyRecovery.AddError(log.Errore(err)) } if promotedSlave != nil && len(lostSlaves) > 0 && config.Config.DetachLostSlavesAfterMasterFailover { postponedFunction := func() error { log.Debugf("topology_recovery: - RecoverDeadCoMaster: lost %+v slaves during recovery process; detaching them", len(lostSlaves)) for _, slave := range lostSlaves { slave := slave inst.DetachSlaveOperation(&slave.Key) } return nil } topologyRecovery.AddPostponedFunction(postponedFunction) } if config.Config.MasterFailoverLostInstancesDowntimeMinutes > 0 { postponedFunction := func() error { inst.BeginDowntime(failedInstanceKey, inst.GetMaintenanceOwner(), "RecoverDeadCoMaster indicates this instance is lost", config.Config.MasterFailoverLostInstancesDowntimeMinutes*60) for _, slave := range lostSlaves { slave := slave inst.BeginDowntime(&slave.Key, inst.GetMaintenanceOwner(), "RecoverDeadCoMaster indicates this instance is lost", config.Config.MasterFailoverLostInstancesDowntimeMinutes*60) } return nil } topologyRecovery.AddPostponedFunction(postponedFunction) } return promotedSlave, lostSlaves, err }
// RecoverDeadIntermediateMaster performs intermediate master recovery; complete logic inside func RecoverDeadIntermediateMaster(topologyRecovery *TopologyRecovery, skipProcesses bool) (successorInstance *inst.Instance, err error) { analysisEntry := &topologyRecovery.AnalysisEntry failedInstanceKey := &analysisEntry.AnalyzedInstanceKey recoveryResolved := false inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, "problem found; will recover") if !skipProcesses { if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", topologyRecovery, true); err != nil { return nil, topologyRecovery.AddError(err) } } intermediateMasterInstance, _, err := inst.ReadInstance(failedInstanceKey) if err != nil { return nil, topologyRecovery.AddError(err) } // Find possible candidate candidateSiblingOfIntermediateMaster, err := GetCandidateSiblingOfIntermediateMaster(intermediateMasterInstance) relocateSlavesToCandidateSibling := func() { if candidateSiblingOfIntermediateMaster == nil { return } // We have a candidate log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will attempt a candidate intermediate master: %+v", candidateSiblingOfIntermediateMaster.Key) relocatedSlaves, candidateSibling, err, errs := inst.RelocateSlaves(failedInstanceKey, &candidateSiblingOfIntermediateMaster.Key, "") topologyRecovery.AddErrors(errs) topologyRecovery.ParticipatingInstanceKeys.AddKey(candidateSiblingOfIntermediateMaster.Key) if len(relocatedSlaves) == 0 { log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: failed to move any slave to candidate intermediate master (%+v)", candidateSibling.Key) return } if err != nil || len(errs) > 0 { log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: move to candidate intermediate master (%+v) did not complete: %+v", candidateSibling.Key, err) return } if err == nil { recoveryResolved = true successorInstance = candidateSibling inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Relocated %d slaves under candidate sibling: %+v; %d errors: %+v", len(relocatedSlaves), candidateSibling.Key, len(errs), errs)) } } // Plan A: find a replacement intermediate master in same Data Center if candidateSiblingOfIntermediateMaster != nil && candidateSiblingOfIntermediateMaster.DataCenter == intermediateMasterInstance.DataCenter { relocateSlavesToCandidateSibling() } if !recoveryResolved { log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will next attempt regrouping of slaves") // Plan B: regroup (we wish to reduce cross-DC replication streams) _, _, _, regroupPromotedSlave, err := inst.RegroupSlaves(failedInstanceKey, true, nil, nil) if err != nil { topologyRecovery.AddError(err) log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: regroup failed on: %+v", err) } if regroupPromotedSlave != nil { topologyRecovery.ParticipatingInstanceKeys.AddKey(regroupPromotedSlave.Key) } // Plan C: try replacement intermediate master in other DC... if candidateSiblingOfIntermediateMaster != nil && candidateSiblingOfIntermediateMaster.DataCenter != intermediateMasterInstance.DataCenter { log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will next attempt relocating to another DC server") relocateSlavesToCandidateSibling() } } if !recoveryResolved { // Do we still have leftovers? Some slaves couldn't move? Couldn't regroup? Only left with regroup's resulting leader? // nothing moved? // We don't care much if regroup made it or not. We prefer that it made it, in whcih case we only need to relocate up // one slave, but the operation is still valid if regroup partially/completely failed. We just promote anything // not regrouped. // So, match up all that's left, plan D log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will next attempt to relocate up from %+v", *failedInstanceKey) var errs []error var relocatedSlaves [](*inst.Instance) relocatedSlaves, successorInstance, err, errs = inst.RelocateSlaves(failedInstanceKey, &analysisEntry.AnalyzedInstanceMasterKey, "") topologyRecovery.AddErrors(errs) topologyRecovery.ParticipatingInstanceKeys.AddKey(analysisEntry.AnalyzedInstanceMasterKey) if len(relocatedSlaves) > 0 { recoveryResolved = true inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Relocated slaves under: %+v %d errors: %+v", successorInstance.Key, len(errs), errs)) } else { err = log.Errorf("topology_recovery: RecoverDeadIntermediateMaster failed to match up any slave from %+v", *failedInstanceKey) topologyRecovery.AddError(err) } } if !recoveryResolved { successorInstance = nil } ResolveRecovery(topologyRecovery, successorInstance) return successorInstance, err }
// GetNextBinlogCoordinatesToMatch is given a twin-coordinates couple for a would-be slave (instanceKey) and another // instance (otherKey). // This is part of the match-below process, and is the heart of the operation: matching the binlog events starting // the twin-coordinates (where both share the same Pseudo-GTID) until "instance" runs out of entries, hopefully // before "other" runs out. // If "other" runs out that means "instance" is more advanced in replication than "other", in which case we can't // turn it into a slave of "other". // Otherwise "instance" will point to the *next* binlog entry in "other" func GetNextBinlogCoordinatesToMatch(instance *Instance, instanceCoordinates BinlogCoordinates, recordedInstanceRelayLogCoordinates BinlogCoordinates, maxBinlogCoordinates *BinlogCoordinates, other *Instance, otherCoordinates BinlogCoordinates) (*BinlogCoordinates, int, error) { fetchNextEvents := func(binlogCoordinates BinlogCoordinates) ([]BinlogEvent, error) { return getNextBinlogEventsChunk(instance, binlogCoordinates, 0) } instanceCursor := NewBinlogEventCursor(instanceCoordinates, fetchNextEvents) fetchOtherNextEvents := func(binlogCoordinates BinlogCoordinates) ([]BinlogEvent, error) { return getNextBinlogEventsChunk(other, binlogCoordinates, 0) } otherCursor := NewBinlogEventCursor(otherCoordinates, fetchOtherNextEvents) var beautifyCoordinatesLength int = 0 rpad := func(s string, length int) string { if len(s) >= length { return s } return fmt.Sprintf("%s%s", s, strings.Repeat(" ", length-len(s))) } var lastConsumedEventCoordinates BinlogCoordinates var countMatchedEvents int = 0 for { // Exhaust binlogs/relaylogs on instance. While iterating them, also iterate the otherInstance binlogs. // We expect entries on both to match, sequentially, until instance's binlogs/relaylogs are exhausted. var instanceEventInfo string var otherEventInfo string var eventCoordinates BinlogCoordinates var otherEventCoordinates BinlogCoordinates { // Extract next binlog/relaylog entry from instance: event, err := instanceCursor.nextRealEvent() if err != nil { return nil, 0, log.Errore(err) } if event != nil { lastConsumedEventCoordinates = event.Coordinates } switch instanceCoordinates.Type { case BinaryLog: if event == nil { // end of binary logs for instance: targetMatchCoordinates, err := otherCursor.getNextCoordinates() if err != nil { return nil, 0, log.Errore(err) } nextCoordinates, _ := instanceCursor.getNextCoordinates() if nextCoordinates.SmallerThan(&instance.SelfBinlogCoordinates) { return nil, 0, log.Errorf("Unexpected problem: instance binlog iteration ended before self coordinates. Ended with: %+v, self coordinates: %+v", nextCoordinates, instance.SelfBinlogCoordinates) } log.Debugf("Reached end of binary logs for instance, at %+v. Other coordinates: %+v", nextCoordinates, targetMatchCoordinates) return &targetMatchCoordinates, countMatchedEvents, nil } case RelayLog: // Argghhhh! SHOW RELAY LOG EVENTS IN '...' statement returns CRAPPY values for End_log_pos: // instead of returning the end log pos of the current statement in the *relay log*, it shows // the end log pos of the matching statement in the *master's binary log*! // Yes, there's logic to this. But this means the next-ccordinates are meaningless. // As result, in the case where we exhaust (following) the relay log, we cannot do our last // nice sanity test that we've indeed reached the Relay_log_pos coordinate; we are only at the // last statement, which is SMALLER than Relay_log_pos; and there isn't a "Rotate" entry to make // a place holder or anything. The log just ends and we can't be absolutely certain that the next // statement is indeed (futuristically) as End_log_pos. endOfScan := false if event == nil { // End of relay log... endOfScan = true log.Debugf("Reached end of relay log at %+v", recordedInstanceRelayLogCoordinates) } else if recordedInstanceRelayLogCoordinates.Equals(&event.Coordinates) { // We've passed the maxScanInstanceCoordinates (applies for relay logs) endOfScan = true log.Debugf("Reached slave relay log coordinates at %+v", recordedInstanceRelayLogCoordinates) } else if recordedInstanceRelayLogCoordinates.SmallerThan(&event.Coordinates) { return nil, 0, log.Errorf("Unexpected problem: relay log scan passed relay log position without hitting it. Ended with: %+v, relay log position: %+v", event.Coordinates, recordedInstanceRelayLogCoordinates) } if endOfScan { // end of binary logs for instance: targetMatchCoordinates, err := otherCursor.getNextCoordinates() if err != nil { log.Debugf("Cannot otherCursor.getNextCoordinates(). otherCoordinates=%+v, cached events in cursor: %d; index=%d", otherCoordinates, len(otherCursor.cachedEvents), otherCursor.currentEventIndex) return nil, 0, log.Errore(err) } // No further sanity checks (read the above lengthy explanation) log.Debugf("Reached limit of relay logs for instance, just after %+v. Other coordinates: %+v", lastConsumedEventCoordinates, targetMatchCoordinates) return &targetMatchCoordinates, countMatchedEvents, nil } } instanceEventInfo = event.Info eventCoordinates = event.Coordinates coordinatesStr := fmt.Sprintf("%+v", event.Coordinates) if len(coordinatesStr) > beautifyCoordinatesLength { beautifyCoordinatesLength = len(coordinatesStr) } log.Debugf("> %+v %+v; %+v", rpad(coordinatesStr, beautifyCoordinatesLength), event.EventType, strings.Split(strings.TrimSpace(instanceEventInfo), "\n")[0]) } { // Extract next binlog/relaylog entry from otherInstance (intended master): event, err := otherCursor.nextRealEvent() if err != nil { return nil, 0, log.Errore(err) } if event == nil { // end of binary logs for otherInstance: this is unexpected and means instance is more advanced // than otherInstance return nil, 0, log.Errorf("Unexpected end of binary logs for assumed master (%+v). This means the instance which attempted to be a slave (%+v) was more advanced. Try the other way round", other.Key, instance.Key) } otherEventInfo = event.Info otherEventCoordinates = event.Coordinates coordinatesStr := fmt.Sprintf("%+v", event.Coordinates) if len(coordinatesStr) > beautifyCoordinatesLength { beautifyCoordinatesLength = len(coordinatesStr) } log.Debugf("< %+v %+v; %+v", rpad(coordinatesStr, beautifyCoordinatesLength), event.EventType, strings.Split(strings.TrimSpace(otherEventInfo), "\n")[0]) } // Verify things are sane (the two extracted entries are identical): // (not strictly required by the algorithm but adds such a lovely self-sanity-testing essence) if instanceEventInfo != otherEventInfo { return nil, 0, log.Errorf("Mismatching entries, aborting: %+v <-> %+v", instanceEventInfo, otherEventInfo) } countMatchedEvents++ if maxBinlogCoordinates != nil { // Not searching till end of binary logs/relay log exec pos. Instead, we're stopping at an instructed position. if eventCoordinates.Equals(maxBinlogCoordinates) { log.Debugf("maxBinlogCoordinates specified as %+v and reached. Stopping", *maxBinlogCoordinates) return &otherEventCoordinates, countMatchedEvents, nil } else if maxBinlogCoordinates.SmallerThan(&eventCoordinates) { return nil, 0, log.Errorf("maxBinlogCoordinates (%+v) exceeded but not met", *maxBinlogCoordinates) } } } // Won't get here }
// AttemptRecoveryRegistration tries to add a recovery entry; if this fails that means recovery is already in place. func AttemptRecoveryRegistration(analysisEntry *inst.ReplicationAnalysis, failIfFailedInstanceInActiveRecovery bool, failIfClusterInActiveRecovery bool) (*TopologyRecovery, error) { if failIfFailedInstanceInActiveRecovery { // Let's check if this instance has just been promoted recently and is still in active period. // If so, we reject recovery registration to avoid flapping. recoveries, err := ReadInActivePeriodSuccessorInstanceRecovery(&analysisEntry.AnalyzedInstanceKey) if err != nil { return nil, log.Errore(err) } if len(recoveries) > 0 { RegisterBlockedRecoveries(analysisEntry, recoveries) return nil, log.Errorf("AttemptRecoveryRegistration: instance %+v has recently been promoted (by failover of %+v) and is in active period. It will not be failed over. You may acknowledge the failure on %+v (-c ack-instance-recoveries) to remove this blockage", analysisEntry.AnalyzedInstanceKey, recoveries[0].AnalysisEntry.AnalyzedInstanceKey, recoveries[0].AnalysisEntry.AnalyzedInstanceKey) } } if failIfClusterInActiveRecovery { // Let's check if this cluster has just experienced a failover and is still in active period. // If so, we reject recovery registration to avoid flapping. recoveries, err := ReadInActivePeriodClusterRecovery(analysisEntry.ClusterDetails.ClusterName) if err != nil { return nil, log.Errore(err) } if len(recoveries) > 0 { RegisterBlockedRecoveries(analysisEntry, recoveries) return nil, log.Errorf("AttemptRecoveryRegistration: cluster %+v has recently experienced a failover (of %+v) and is in active period. It will not be failed over again. You may acknowledge the failure on this cluster (-c ack-cluster-recoveries) or on %+v (-c ack-instance-recoveries) to remove this blockage", analysisEntry.ClusterDetails.ClusterName, recoveries[0].AnalysisEntry.AnalyzedInstanceKey, recoveries[0].AnalysisEntry.AnalyzedInstanceKey) } } if !failIfFailedInstanceInActiveRecovery { // Implicitly acknowledge this instance's possibly existing active recovery, provided they are completed. AcknowledgeInstanceCompletedRecoveries(&analysisEntry.AnalyzedInstanceKey, "orchestrator", fmt.Sprintf("implicit acknowledge due to user invocation of recovery on same instance: %+v", analysisEntry.AnalyzedInstanceKey)) // The fact we only acknowledge a completed recovery solves the possible case of two DBAs simultaneously // trying to recover the same instance at the same time } sqlResult, err := db.ExecOrchestrator(` insert ignore into topology_recovery ( hostname, port, in_active_period, start_active_period, end_active_period_unixtime, processing_node_hostname, processcing_node_token, analysis, cluster_name, cluster_alias, count_affected_slaves, slave_hosts, last_detection_id ) values ( ?, ?, 1, NOW(), 0, ?, ?, ?, ?, ?, ?, ?, (select ifnull(max(detection_id), 0) from topology_failure_detection where hostname=? and port=?) ) `, analysisEntry.AnalyzedInstanceKey.Hostname, analysisEntry.AnalyzedInstanceKey.Port, process.ThisHostname, process.ProcessToken.Hash, string(analysisEntry.Analysis), analysisEntry.ClusterDetails.ClusterName, analysisEntry.ClusterDetails.ClusterAlias, analysisEntry.CountSlaves, analysisEntry.SlaveHosts.ToCommaDelimitedList(), analysisEntry.AnalyzedInstanceKey.Hostname, analysisEntry.AnalyzedInstanceKey.Port, ) if err != nil { return nil, log.Errore(err) } rows, err := sqlResult.RowsAffected() if err != nil { return nil, log.Errore(err) } if rows == 0 { return nil, nil } // Success topologyRecovery := NewTopologyRecovery(*analysisEntry) topologyRecovery.Id, _ = sqlResult.LastInsertId() return topologyRecovery, nil }
// SearchEntryInBinlog Given a binlog entry text (query), search it in the given binary log of a given instance func SearchEntryInBinlog(instanceKey *InstanceKey, binlog string, entryText string, monotonicPseudoGTIDEntries bool) (BinlogCoordinates, bool, error) { binlogCoordinates := BinlogCoordinates{LogFile: binlog, LogPos: 0, Type: BinaryLog} if binlog == "" { return binlogCoordinates, false, log.Errorf("SearchEntryInBinlog: empty binlog file name for %+v", *instanceKey) } db, err := db.OpenTopology(instanceKey.Hostname, instanceKey.Port) if err != nil { return binlogCoordinates, false, err } moreRowsExpected := true skipRestOfBinlog := false alreadyMatchedAscendingPseudoGTID := false var nextPos int64 = 0 // commandToken := math.TernaryString(binlogCoordinates.Type == BinaryLog, "binlog", "relaylog") for moreRowsExpected { query := fmt.Sprintf("show binlog events in '%s' FROM %d LIMIT %d", binlog, nextPos, config.Config.BinlogEventsChunkSize) // We don't know in advance when we will hit the end of the binlog. We will implicitly understand it when our // `show binlog events` query does not return any row. moreRowsExpected = false queryRowsFunc := sqlutils.QueryRowsMap if config.Config.BufferBinlogEvents { queryRowsFunc = sqlutils.QueryRowsMapBuffered } err = queryRowsFunc(db, query, func(m sqlutils.RowMap) error { if binlogCoordinates.LogPos != 0 { // Entry found! skipRestOfBinlog = true return nil } if skipRestOfBinlog { return nil } moreRowsExpected = true nextPos = m.GetInt64("End_log_pos") binlogEntryInfo := m.GetString("Info") // if binlogEntryInfo == entryText { // found it! binlogCoordinates.LogPos = m.GetInt64("Pos") } else if monotonicPseudoGTIDEntries && !alreadyMatchedAscendingPseudoGTID { // This part assumes we're searching for Pseudo-GTID.Typically that is the case, however this function can // also be used for generic searches through the binary log. // More heavyweight computation here. Need to verify whether the binlog entry we have is a pseudo-gtid entry // We only want to check for ASCENDING once in the top of the binary log. // If we find the first entry to be higher than the searched one, clearly we are done. // If not, then by virtue of binary logs, we still have to full-scan the entrie binlog sequentially; we // do not check again for ASCENDING (no point), so we save up CPU energy wasted in regexp. if matched, _ := regexp.MatchString(config.Config.PseudoGTIDPattern, binlogEntryInfo); matched { alreadyMatchedAscendingPseudoGTID = true log.Debugf("Matched ascending Pseudo-GTID entry in %+v", binlog) if binlogEntryInfo > entryText { // Entries ascending, and current entry is larger than the one we are searching for. // There is no need to scan further on. We can skip the entire binlog log.Debugf(`Pseudo GTID entries are monotonic and we hit "%+v" > "%+v"; skipping binlog %+v`, m.GetString("Info"), entryText, binlogCoordinates.LogFile) skipRestOfBinlog = true return nil } } } return nil }) if err != nil { return binlogCoordinates, (binlogCoordinates.LogPos != 0), err } if skipRestOfBinlog { return binlogCoordinates, (binlogCoordinates.LogPos != 0), err } } return binlogCoordinates, (binlogCoordinates.LogPos != 0), err }