func clearSlave(dbName string, swm *swarm.Swarm) error { slaveStatus, err := swm.SlaveShowStatus(dbName) if err != nil { return err } if slaveStatus.MasterIp == "" { return nil } err = swm.SlaveStop(dbName) if err != nil { return err } err = swm.SlaveReset(dbName) if err != nil { return err } return nil }
func changeMaster(swm *swarm.Swarm, fromName, toIp string, toPort int) error { slaveStatus, err := swm.SlaveShowStatus(fromName) if err != nil { return err } log.WithFields(log.Fields{ "DbName": fromName, "SlaveStatus": slaveStatus, }).Debug("change master") if slaveStatus.MasterIp == toIp && slaveStatus.MasterPort == toPort { if slaveStatus.SlaveIoRunning == "No" && slaveStatus.SlaveSqlRunning == "No" { // 1.4 start slave err = swm.SlaveStart(fromName) if err != nil { return err } } return nil } if slaveStatus.SlaveIoRunning == "Yes" || slaveStatus.SlaveSqlRunning == "Yes" { // 1.1 stop slave err = swm.SlaveStop(fromName) if err != nil { return err } } if slaveStatus.MasterIp != "" { // 1.2 reset slave err = swm.SlaveReset(fromName) if err != nil { return err } } // 1.3 get gtid_executed gtidExecuted, err := swm.GetGtidExecuted(fromName) if err != nil { return err } if gtidExecuted != "" { // 1.4 reset master err = swm.MasterReset(fromName) if err != nil { return err } // 1.5 set gtid_purged err = swm.SetGtidPurged(fromName, gtidExecuted) if err != nil { return err } } // 1.6 change master err = swm.ChangeMaster(fromName, toIp, toPort) if err != nil { return err } // 1.7 start slave err = swm.SlaveStart(fromName) if err != nil { return err } return nil }
// no need RLock func RecoverDb(dbName string, rwMutex *sync.RWMutex, topology *structs.Topology, swm *swarm.Swarm) (error, []string) { var err error var steps []string defer func() { if err != nil { log.WithFields(log.Fields{ "DbName": dbName, "err": err.Error(), }).Error("recover fail") } else { log.WithFields(log.Fields{ "DbName": dbName, }).Info("recover success") } }() var recoverDbInfo *structs.DatabaseInfo for k, v := range topology.DataNodeGroup["default"] { if k == dbName { recoverDbInfo = v break } } if recoverDbInfo == nil { return errors.New("recover db node not found"), steps } // 1. check m slave status if recoverDbInfo.Type == consts.StandBy || recoverDbInfo.Type == consts.Slave { mName, _ := getMaster(topology) var masterSlStatus structs.SlaveStatus masterSlStatus, err = swm.SlaveShowStatus(mName) if err != nil { steps = append(steps, fmt.Sprintf("Step1. Clear M status fail: %s\n", err.Error())) return err, steps } if masterSlStatus.MasterIp != "" { // 1.1 stop m slave err = swm.SlaveStop(mName) if err != nil { steps = append(steps, fmt.Sprintf("Step1. Clear M status fail: %s\n", err.Error())) return err, steps } // 1.2 reset m slave err = swm.SlaveReset(mName) if err != nil { steps = append(steps, fmt.Sprintf("Step1. Clear M status fail: %s\n", err.Error())) return err, steps } } } steps = append(steps, "Step1. Clear M status success\n") // 2. recover change master if recoverDbInfo.Type == consts.StandBy { mName, mDbInfo := getMaster(topology) err = changeMaster(swm, dbName, mDbInfo.Ip, mDbInfo.Port) if err != nil { log.WithFields(log.Fields{ "Sb": dbName + "(" + recoverDbInfo.Ip + ":" + strconv.Itoa(recoverDbInfo.Port) + ")", "M": mName + "(" + mDbInfo.Ip + ":" + strconv.Itoa(mDbInfo.Port) + ")", "err:": err.Error(), }).Error("change master fail, return") steps = append(steps, fmt.Sprintf("Step2. Sb change master to M fail: %s\n", err.Error())) return err, steps } log.WithFields(log.Fields{ "Sb": dbName + "(" + recoverDbInfo.Ip + ":" + strconv.Itoa(recoverDbInfo.Port) + ")", "M": mName + "(" + mDbInfo.Ip + ":" + strconv.Itoa(mDbInfo.Port) + ")", }).Debug("change master success") steps = append(steps, "Step2. Sb change master to M success\n") } else if recoverDbInfo.Type == consts.Slave { sbName, sbDbInfo := getStandBy(topology) err = changeMaster(swm, dbName, sbDbInfo.Ip, sbDbInfo.Port) if err != nil { log.WithFields(log.Fields{ "Sl": dbName + "(" + recoverDbInfo.Ip + ":" + strconv.Itoa(recoverDbInfo.Port) + ")", "Sb": sbName + "(" + sbDbInfo.Ip + ":" + strconv.Itoa(sbDbInfo.Port) + ")", "err:": err.Error(), }).Error("change master fail, return") steps = append(steps, fmt.Sprintf("Step2. Sl change master to Sb fail: %s\n", err.Error())) return err, steps } log.WithFields(log.Fields{ "Sl": dbName + "(" + recoverDbInfo.Ip + ":" + strconv.Itoa(recoverDbInfo.Port) + ")", "Sb": sbName + "(" + sbDbInfo.Ip + ":" + strconv.Itoa(sbDbInfo.Port) + ")", }).Info("change master success") steps = append(steps, "Step2. Sl change master to Sb success\n") } // 3. update topology rwMutex.Lock() dbInfo := getDbInfo(dbName, topology) dbInfo.Status = consts.Normal updateVersionAndNormalCount(topology, 1) rwMutex.Unlock() return nil, steps }
func checkTopo(swarm *swarm.Swarm, topology *structs.Topology, mCheck bool, t *testing.T) error { log.Println("***CheckTopo start***") var mName, mIp, sbName, sbIp string var mPort, sbPort int for k, v := range topology.DataNodeGroup["default"] { if v.Type == consts.Master && v.Status == consts.Normal { mName = k mIp = v.Ip mPort = v.Port } else if v.Type == consts.StandBy && v.Status == consts.Normal { sbName = k sbIp = v.Ip sbPort = v.Port } } if mCheck { // check m mSlaveStatus, err := swarm.SlaveShowStatus(mName) if err != nil { return fmt.Errorf(err.Error()) } if mSlaveStatus.MasterIp != "" || mSlaveStatus.MasterPort != 0 { return fmt.Errorf("master %s init/isolate/recover err, slave status:%#v\n", mName, mSlaveStatus) } } if sbName == "" { log.Println("***CheckTopo end***") return nil } // check sb sbSlaveStatus, err := swarm.SlaveShowStatus(sbName) if err != nil { return fmt.Errorf(err.Error()) } if sbSlaveStatus.MasterIp != mIp || sbSlaveStatus.MasterPort != mPort || sbSlaveStatus.SlaveIoRunning != "Yes" || sbSlaveStatus.SlaveSqlRunning != "Yes" { return fmt.Errorf("standby %s init/isolate/recover err, slave status:%#v\n", sbName, sbSlaveStatus) } // check sl if topology.DataNodeGroupNormalCount["default"] > 2 { for k, v := range topology.DataNodeGroup["default"] { if v.Type == consts.Slave && v.Status == consts.Normal { slSlaveStatus, err := swarm.SlaveShowStatus(k) if err != nil { return fmt.Errorf(err.Error()) } if slSlaveStatus.MasterIp != sbIp || slSlaveStatus.MasterPort != sbPort || slSlaveStatus.SlaveIoRunning != "Yes" || slSlaveStatus.SlaveSqlRunning != "Yes" { return fmt.Errorf("slave %s init/isolate/recover err, slave status:%#v\n", k, sbSlaveStatus) } } } } log.Println("***CheckTopo end***") return nil }