示例#1
0
文件: reset_db.go 项目: yiduoyunQ/sm
func clearSlave(dbName string, swm *swarm.Swarm) error {
	slaveStatus, err := swm.SlaveShowStatus(dbName)
	if err != nil {
		return err
	}
	if slaveStatus.MasterIp == "" {
		return nil
	}
	err = swm.SlaveStop(dbName)
	if err != nil {
		return err
	}
	err = swm.SlaveReset(dbName)
	if err != nil {
		return err
	}

	return nil
}
示例#2
0
func changeMaster(swm *swarm.Swarm, fromName, toIp string, toPort int) error {
	slaveStatus, err := swm.SlaveShowStatus(fromName)
	if err != nil {
		return err
	}
	log.WithFields(log.Fields{
		"DbName":      fromName,
		"SlaveStatus": slaveStatus,
	}).Debug("change master")

	if slaveStatus.MasterIp == toIp && slaveStatus.MasterPort == toPort {
		if slaveStatus.SlaveIoRunning == "No" && slaveStatus.SlaveSqlRunning == "No" {
			// 1.4 start slave
			err = swm.SlaveStart(fromName)
			if err != nil {
				return err
			}
		}
		return nil
	}
	if slaveStatus.SlaveIoRunning == "Yes" || slaveStatus.SlaveSqlRunning == "Yes" {
		// 1.1 stop slave
		err = swm.SlaveStop(fromName)
		if err != nil {
			return err
		}
	}
	if slaveStatus.MasterIp != "" {
		// 1.2 reset slave
		err = swm.SlaveReset(fromName)
		if err != nil {
			return err
		}
	}

	// 1.3 get gtid_executed
	gtidExecuted, err := swm.GetGtidExecuted(fromName)
	if err != nil {
		return err
	}

	if gtidExecuted != "" {
		// 1.4 reset master
		err = swm.MasterReset(fromName)
		if err != nil {
			return err
		}
		// 1.5 set gtid_purged
		err = swm.SetGtidPurged(fromName, gtidExecuted)
		if err != nil {
			return err
		}
	}

	// 1.6 change master
	err = swm.ChangeMaster(fromName, toIp, toPort)
	if err != nil {
		return err
	}
	// 1.7 start slave
	err = swm.SlaveStart(fromName)
	if err != nil {
		return err
	}
	return nil
}
示例#3
0
文件: recover.go 项目: yiduoyunQ/sm
// no need RLock
func RecoverDb(dbName string, rwMutex *sync.RWMutex, topology *structs.Topology, swm *swarm.Swarm) (error, []string) {
	var err error
	var steps []string
	defer func() {
		if err != nil {
			log.WithFields(log.Fields{
				"DbName": dbName,
				"err":    err.Error(),
			}).Error("recover fail")
		} else {
			log.WithFields(log.Fields{
				"DbName": dbName,
			}).Info("recover success")
		}
	}()

	var recoverDbInfo *structs.DatabaseInfo
	for k, v := range topology.DataNodeGroup["default"] {
		if k == dbName {
			recoverDbInfo = v
			break
		}
	}

	if recoverDbInfo == nil {
		return errors.New("recover db node not found"), steps
	}

	// 1. check m slave status
	if recoverDbInfo.Type == consts.StandBy || recoverDbInfo.Type == consts.Slave {
		mName, _ := getMaster(topology)
		var masterSlStatus structs.SlaveStatus
		masterSlStatus, err = swm.SlaveShowStatus(mName)
		if err != nil {
			steps = append(steps, fmt.Sprintf("Step1. Clear M status fail: %s\n", err.Error()))
			return err, steps
		}
		if masterSlStatus.MasterIp != "" {
			// 1.1 stop m slave
			err = swm.SlaveStop(mName)
			if err != nil {
				steps = append(steps, fmt.Sprintf("Step1. Clear M status fail: %s\n", err.Error()))
				return err, steps
			}
			// 1.2 reset m slave
			err = swm.SlaveReset(mName)
			if err != nil {
				steps = append(steps, fmt.Sprintf("Step1. Clear M status fail: %s\n", err.Error()))
				return err, steps
			}
		}
	}

	steps = append(steps, "Step1. Clear M status success\n")

	// 2. recover change master
	if recoverDbInfo.Type == consts.StandBy {
		mName, mDbInfo := getMaster(topology)
		err = changeMaster(swm, dbName, mDbInfo.Ip, mDbInfo.Port)
		if err != nil {
			log.WithFields(log.Fields{
				"Sb":   dbName + "(" + recoverDbInfo.Ip + ":" + strconv.Itoa(recoverDbInfo.Port) + ")",
				"M":    mName + "(" + mDbInfo.Ip + ":" + strconv.Itoa(mDbInfo.Port) + ")",
				"err:": err.Error(),
			}).Error("change master fail, return")
			steps = append(steps, fmt.Sprintf("Step2. Sb change master to M fail: %s\n", err.Error()))
			return err, steps
		}
		log.WithFields(log.Fields{
			"Sb": dbName + "(" + recoverDbInfo.Ip + ":" + strconv.Itoa(recoverDbInfo.Port) + ")",
			"M":  mName + "(" + mDbInfo.Ip + ":" + strconv.Itoa(mDbInfo.Port) + ")",
		}).Debug("change master success")
		steps = append(steps, "Step2. Sb change master to M success\n")
	} else if recoverDbInfo.Type == consts.Slave {
		sbName, sbDbInfo := getStandBy(topology)
		err = changeMaster(swm, dbName, sbDbInfo.Ip, sbDbInfo.Port)
		if err != nil {
			log.WithFields(log.Fields{
				"Sl":   dbName + "(" + recoverDbInfo.Ip + ":" + strconv.Itoa(recoverDbInfo.Port) + ")",
				"Sb":   sbName + "(" + sbDbInfo.Ip + ":" + strconv.Itoa(sbDbInfo.Port) + ")",
				"err:": err.Error(),
			}).Error("change master fail, return")
			steps = append(steps, fmt.Sprintf("Step2. Sl change master to Sb fail: %s\n", err.Error()))
			return err, steps
		}
		log.WithFields(log.Fields{
			"Sl": dbName + "(" + recoverDbInfo.Ip + ":" + strconv.Itoa(recoverDbInfo.Port) + ")",
			"Sb": sbName + "(" + sbDbInfo.Ip + ":" + strconv.Itoa(sbDbInfo.Port) + ")",
		}).Info("change master success")
		steps = append(steps, "Step2. Sl change master to Sb success\n")
	}

	// 3. update topology
	rwMutex.Lock()
	dbInfo := getDbInfo(dbName, topology)
	dbInfo.Status = consts.Normal
	updateVersionAndNormalCount(topology, 1)
	rwMutex.Unlock()

	return nil, steps
}
示例#4
0
func checkTopo(swarm *swarm.Swarm, topology *structs.Topology, mCheck bool, t *testing.T) error {
	log.Println("***CheckTopo start***")
	var mName, mIp, sbName, sbIp string
	var mPort, sbPort int
	for k, v := range topology.DataNodeGroup["default"] {
		if v.Type == consts.Master && v.Status == consts.Normal {
			mName = k
			mIp = v.Ip
			mPort = v.Port
		} else if v.Type == consts.StandBy && v.Status == consts.Normal {
			sbName = k
			sbIp = v.Ip
			sbPort = v.Port
		}
	}

	if mCheck {
		// check m
		mSlaveStatus, err := swarm.SlaveShowStatus(mName)
		if err != nil {
			return fmt.Errorf(err.Error())
		}
		if mSlaveStatus.MasterIp != "" ||
			mSlaveStatus.MasterPort != 0 {
			return fmt.Errorf("master %s init/isolate/recover err, slave status:%#v\n", mName, mSlaveStatus)
		}
	}

	if sbName == "" {
		log.Println("***CheckTopo end***")
		return nil
	}

	// check sb
	sbSlaveStatus, err := swarm.SlaveShowStatus(sbName)
	if err != nil {
		return fmt.Errorf(err.Error())
	}
	if sbSlaveStatus.MasterIp != mIp ||
		sbSlaveStatus.MasterPort != mPort ||
		sbSlaveStatus.SlaveIoRunning != "Yes" ||
		sbSlaveStatus.SlaveSqlRunning != "Yes" {
		return fmt.Errorf("standby %s init/isolate/recover err, slave status:%#v\n", sbName, sbSlaveStatus)
	}

	// check sl
	if topology.DataNodeGroupNormalCount["default"] > 2 {
		for k, v := range topology.DataNodeGroup["default"] {
			if v.Type == consts.Slave && v.Status == consts.Normal {
				slSlaveStatus, err := swarm.SlaveShowStatus(k)
				if err != nil {
					return fmt.Errorf(err.Error())
				}
				if slSlaveStatus.MasterIp != sbIp ||
					slSlaveStatus.MasterPort != sbPort ||
					slSlaveStatus.SlaveIoRunning != "Yes" ||
					slSlaveStatus.SlaveSqlRunning != "Yes" {
					return fmt.Errorf("slave %s init/isolate/recover err, slave status:%#v\n", k, sbSlaveStatus)
				}
			}
		}
	}
	log.Println("***CheckTopo end***")
	return nil
}