func RecoverDeadMaster(analysisEntry inst.ReplicationAnalysis, skipProcesses bool) (bool, *inst.Instance, error) { failedInstanceKey := &analysisEntry.AnalyzedInstanceKey if ok, err := AttemptRecoveryRegistration(&analysisEntry); !ok { log.Debugf("topology_recovery: found an active or recent recovery on %+v. Will not issue another RecoverDeadMaster.", *failedInstanceKey) return false, nil, err } inst.AuditOperation("recover-dead-master", failedInstanceKey, "problem found; will recover") if !skipProcesses { if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", analysisEntry, nil, true); err != nil { return false, nil, err } } log.Debugf("topology_recovery: RecoverDeadMaster: will recover %+v", *failedInstanceKey) _, _, _, candidateSlave, err := inst.RegroupSlavesIncludingSubSlavesOfBinlogServers(failedInstanceKey, true, nil) ResolveRecovery(failedInstanceKey, &candidateSlave.Key) log.Debugf("topology_recovery: - RecoverDeadMaster: candidate slave is %+v", candidateSlave.Key) inst.AuditOperation("recover-dead-master", failedInstanceKey, fmt.Sprintf("master: %+v", candidateSlave.Key)) return true, candidateSlave, err }
func RecoverDeadMaster(analysisEntry inst.ReplicationAnalysis, skipProcesses bool) (promotedSlave *inst.Instance, lostSlaves [](*inst.Instance), err error) { failedInstanceKey := &analysisEntry.AnalyzedInstanceKey if ok, err := AttemptRecoveryRegistration(&analysisEntry); !ok { log.Debugf("topology_recovery: found an active or recent recovery on %+v. Will not issue another RecoverDeadMaster.", *failedInstanceKey) return nil, lostSlaves, err } inst.AuditOperation("recover-dead-master", failedInstanceKey, "problem found; will recover") if !skipProcesses { if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", analysisEntry, nil, emptySlavesList, true); err != nil { return nil, lostSlaves, err } } log.Debugf("topology_recovery: RecoverDeadMaster: will recover %+v", *failedInstanceKey) var masterRecoveryType MasterRecoveryType = MasterRecoveryPseudoGTID if (analysisEntry.OracleGTIDImmediateTopology || analysisEntry.MariaDBGTIDImmediateTopology) && !analysisEntry.PseudoGTIDImmediateTopology { masterRecoveryType = MasterRecoveryGTID } else if analysisEntry.BinlogServerImmediateTopology { masterRecoveryType = MasterRecoveryBinlogServer } log.Debugf("topology_recovery: RecoverDeadMaster: masterRecoveryType=%+v", masterRecoveryType) switch masterRecoveryType { case MasterRecoveryGTID: { lostSlaves, _, promotedSlave, err = inst.RegroupSlavesGTID(failedInstanceKey, true, nil) } case MasterRecoveryPseudoGTID: { lostSlaves, _, _, promotedSlave, err = inst.RegroupSlavesIncludingSubSlavesOfBinlogServers(failedInstanceKey, true, nil) } case MasterRecoveryBinlogServer: { promotedSlave, err = inst.RegroupSlavesBinlogServers(failedInstanceKey, true, nil) } } if promotedSlave != nil && len(lostSlaves) > 0 && config.Config.DetachLostSlavesAfterMasterFailover { log.Debugf("topology_recovery: - RecoverDeadMaster: lost %+v slaves during recovery process; detaching them", len(lostSlaves)) go func() { for _, slave := range lostSlaves { slave := slave inst.DetachSlaveOperation(&slave.Key) } }() } if config.Config.MasterFailoverLostInstancesDowntimeMinutes > 0 { inst.BeginDowntime(failedInstanceKey, inst.GetMaintenanceOwner(), "RecoverDeadMaster indicates this instance is lost", config.Config.MasterFailoverLostInstancesDowntimeMinutes*60) for _, slave := range lostSlaves { slave := slave inst.BeginDowntime(&slave.Key, inst.GetMaintenanceOwner(), "RecoverDeadMaster indicates this instance is lost", config.Config.MasterFailoverLostInstancesDowntimeMinutes*60) } } if promotedSlave == nil { log.Debugf("topology_recovery: - RecoverDeadMaster: Failure: no slave promoted.") inst.AuditOperation("recover-dead-master", failedInstanceKey, "Failure: no slave promoted.") } else { log.Debugf("topology_recovery: - RecoverDeadMaster: promoted slave is %+v", promotedSlave.Key) inst.AuditOperation("recover-dead-master", failedInstanceKey, fmt.Sprintf("master: %+v", promotedSlave.Key)) } return promotedSlave, lostSlaves, err }