Ejemplo n.º 1
0
func clearDb(dbName string, swarm *swarm.Swarm) error {
	cmd := []string{}
	cmd = append(cmd, "mysql")
	cmd = append(cmd, "-u"+"cup_dba")
	cmd = append(cmd, "-p"+"111111")
	cmd = append(cmd, "-S"+"/DBAASDAT/upsql.sock")
	cmd = append(cmd, "-e drop database qjrtest1")
	res, err := swarm.Exec(cmd, dbName)
	if err != nil {
		log.WithFields(log.Fields{
			"DbName": dbName,
			"err":    err.Error(),
		}).Error("drop qjrtest1 fail")
		return err
	}
	res.Close()

	cmd = []string{}
	cmd = append(cmd, "mysql")
	cmd = append(cmd, "-u"+"cup_dba")
	cmd = append(cmd, "-p"+"111111")
	cmd = append(cmd, "-S"+"/DBAASDAT/upsql.sock")
	cmd = append(cmd, "-e drop database qjrtest2")
	res1, err := swarm.Exec(cmd, dbName)
	if err != nil {
		log.WithFields(log.Fields{
			"DbName": dbName,
			"err":    err.Error(),
		}).Error("drop qjrtest2 fail")
		return err
	}
	res1.Close()

	return nil
}
Ejemplo n.º 2
0
func clearMaster(dbName string, swm *swarm.Swarm) error {
	err := swm.MasterReset(dbName)
	if err != nil {
		return err
	}

	return nil
}
Ejemplo n.º 3
0
func voteSl(swm *swarm.Swarm, topology *structs.Topology) (string, error) {
	maxMasterLogFileNum := 0
	maxReadMasterLogPos := 0
	ch := make(chan []interface{})
	maxDbName := ""
	for k, v := range topology.DataNodeGroup["default"] {
		if v.Type == consts.Slave && v.Status == consts.Normal {
			dbName := k
			go func() {
				masterLogFileNum, readMasterLogPos := swm.PositionGet(dbName)
				log.WithFields(log.Fields{
					"SlaveName":        dbName,
					"MasterLogFileNum": masterLogFileNum,
					"ReadMasterLogPos": readMasterLogPos,
				}).Debug("Sl vote")
				pos := []interface{}{dbName, masterLogFileNum, readMasterLogPos}
				ch <- pos
			}()
		}
	}

	for i := 0; i < topology.DataNodeGroupNormalCount["default"]-2; i++ {
		pos := <-ch
		dbName := pos[0].(string)
		masterLogFileNum := pos[1].(int)
		readMasterLogPos := pos[2].(int)
		if masterLogFileNum > maxMasterLogFileNum {
			if readMasterLogPos > maxReadMasterLogPos {
				maxMasterLogFileNum = masterLogFileNum
				maxReadMasterLogPos = readMasterLogPos
				maxDbName = dbName
			}
		}
	}

	// all sl unavaliable
	if maxDbName == "" {
		return "", errors.New(
			"when isolate M|Sb, all slave are unavilable for vote, isolate failed and return to hope health check enter sl auto isolate")
	}

	maxDbInfo := topology.DataNodeGroup["default"][maxDbName]
	log.WithFields(log.Fields{
		"winer": maxDbName + "(" + maxDbInfo.Ip + ":" + strconv.Itoa(maxDbInfo.Port) + ")",
	}).Debug("Sl vote")

	return maxDbName, nil
}
Ejemplo n.º 4
0
func createDb(dbName, dbString string, swarm *swarm.Swarm) error {
	cmd := []string{}
	cmd = append(cmd, "mysql")
	cmd = append(cmd, "-u"+"cup_dba")
	cmd = append(cmd, "-p"+"111111")
	cmd = append(cmd, "-S"+"/DBAASDAT/upsql.sock")
	cmd = append(cmd, "-e create database "+dbString)
	res, err := swarm.Exec(cmd, dbName)
	if err != nil {
		log.WithFields(log.Fields{
			"DbName":   dbName,
			"err":      err.Error(),
			"Database": dbString,
		}).Error("create test database fail")
		return err
	}
	res.Close()
	return nil
}
Ejemplo n.º 5
0
func clearSlave(dbName string, swm *swarm.Swarm) error {
	slaveStatus, err := swm.SlaveShowStatus(dbName)
	if err != nil {
		return err
	}
	if slaveStatus.MasterIp == "" {
		return nil
	}
	err = swm.SlaveStop(dbName)
	if err != nil {
		return err
	}
	err = swm.SlaveReset(dbName)
	if err != nil {
		return err
	}

	return nil
}
Ejemplo n.º 6
0
func changeMaster(swm *swarm.Swarm, fromName, toIp string, toPort int) error {
	slaveStatus, err := swm.SlaveShowStatus(fromName)
	if err != nil {
		return err
	}
	log.WithFields(log.Fields{
		"DbName":      fromName,
		"SlaveStatus": slaveStatus,
	}).Debug("change master")

	if slaveStatus.MasterIp == toIp && slaveStatus.MasterPort == toPort {
		if slaveStatus.SlaveIoRunning == "No" && slaveStatus.SlaveSqlRunning == "No" {
			// 1.4 start slave
			err = swm.SlaveStart(fromName)
			if err != nil {
				return err
			}
		}
		return nil
	}
	if slaveStatus.SlaveIoRunning == "Yes" || slaveStatus.SlaveSqlRunning == "Yes" {
		// 1.1 stop slave
		err = swm.SlaveStop(fromName)
		if err != nil {
			return err
		}
	}
	if slaveStatus.MasterIp != "" {
		// 1.2 reset slave
		err = swm.SlaveReset(fromName)
		if err != nil {
			return err
		}
	}

	// 1.3 get gtid_executed
	gtidExecuted, err := swm.GetGtidExecuted(fromName)
	if err != nil {
		return err
	}

	if gtidExecuted != "" {
		// 1.4 reset master
		err = swm.MasterReset(fromName)
		if err != nil {
			return err
		}
		// 1.5 set gtid_purged
		err = swm.SetGtidPurged(fromName, gtidExecuted)
		if err != nil {
			return err
		}
	}

	// 1.6 change master
	err = swm.ChangeMaster(fromName, toIp, toPort)
	if err != nil {
		return err
	}
	// 1.7 start slave
	err = swm.SlaveStart(fromName)
	if err != nil {
		return err
	}
	return nil
}
Ejemplo n.º 7
0
// no need RLock
func RecoverDb(dbName string, rwMutex *sync.RWMutex, topology *structs.Topology, swm *swarm.Swarm) (error, []string) {
	var err error
	var steps []string
	defer func() {
		if err != nil {
			log.WithFields(log.Fields{
				"DbName": dbName,
				"err":    err.Error(),
			}).Error("recover fail")
		} else {
			log.WithFields(log.Fields{
				"DbName": dbName,
			}).Info("recover success")
		}
	}()

	var recoverDbInfo *structs.DatabaseInfo
	for k, v := range topology.DataNodeGroup["default"] {
		if k == dbName {
			recoverDbInfo = v
			break
		}
	}

	if recoverDbInfo == nil {
		return errors.New("recover db node not found"), steps
	}

	// 1. check m slave status
	if recoverDbInfo.Type == consts.StandBy || recoverDbInfo.Type == consts.Slave {
		mName, _ := getMaster(topology)
		var masterSlStatus structs.SlaveStatus
		masterSlStatus, err = swm.SlaveShowStatus(mName)
		if err != nil {
			steps = append(steps, fmt.Sprintf("Step1. Clear M status fail: %s\n", err.Error()))
			return err, steps
		}
		if masterSlStatus.MasterIp != "" {
			// 1.1 stop m slave
			err = swm.SlaveStop(mName)
			if err != nil {
				steps = append(steps, fmt.Sprintf("Step1. Clear M status fail: %s\n", err.Error()))
				return err, steps
			}
			// 1.2 reset m slave
			err = swm.SlaveReset(mName)
			if err != nil {
				steps = append(steps, fmt.Sprintf("Step1. Clear M status fail: %s\n", err.Error()))
				return err, steps
			}
		}
	}

	steps = append(steps, "Step1. Clear M status success\n")

	// 2. recover change master
	if recoverDbInfo.Type == consts.StandBy {
		mName, mDbInfo := getMaster(topology)
		err = changeMaster(swm, dbName, mDbInfo.Ip, mDbInfo.Port)
		if err != nil {
			log.WithFields(log.Fields{
				"Sb":   dbName + "(" + recoverDbInfo.Ip + ":" + strconv.Itoa(recoverDbInfo.Port) + ")",
				"M":    mName + "(" + mDbInfo.Ip + ":" + strconv.Itoa(mDbInfo.Port) + ")",
				"err:": err.Error(),
			}).Error("change master fail, return")
			steps = append(steps, fmt.Sprintf("Step2. Sb change master to M fail: %s\n", err.Error()))
			return err, steps
		}
		log.WithFields(log.Fields{
			"Sb": dbName + "(" + recoverDbInfo.Ip + ":" + strconv.Itoa(recoverDbInfo.Port) + ")",
			"M":  mName + "(" + mDbInfo.Ip + ":" + strconv.Itoa(mDbInfo.Port) + ")",
		}).Debug("change master success")
		steps = append(steps, "Step2. Sb change master to M success\n")
	} else if recoverDbInfo.Type == consts.Slave {
		sbName, sbDbInfo := getStandBy(topology)
		err = changeMaster(swm, dbName, sbDbInfo.Ip, sbDbInfo.Port)
		if err != nil {
			log.WithFields(log.Fields{
				"Sl":   dbName + "(" + recoverDbInfo.Ip + ":" + strconv.Itoa(recoverDbInfo.Port) + ")",
				"Sb":   sbName + "(" + sbDbInfo.Ip + ":" + strconv.Itoa(sbDbInfo.Port) + ")",
				"err:": err.Error(),
			}).Error("change master fail, return")
			steps = append(steps, fmt.Sprintf("Step2. Sl change master to Sb fail: %s\n", err.Error()))
			return err, steps
		}
		log.WithFields(log.Fields{
			"Sl": dbName + "(" + recoverDbInfo.Ip + ":" + strconv.Itoa(recoverDbInfo.Port) + ")",
			"Sb": sbName + "(" + sbDbInfo.Ip + ":" + strconv.Itoa(sbDbInfo.Port) + ")",
		}).Info("change master success")
		steps = append(steps, "Step2. Sl change master to Sb success\n")
	}

	// 3. update topology
	rwMutex.Lock()
	dbInfo := getDbInfo(dbName, topology)
	dbInfo.Status = consts.Normal
	updateVersionAndNormalCount(topology, 1)
	rwMutex.Unlock()

	return nil, steps
}
Ejemplo n.º 8
0
// no need RLock
func IsolateDB(dbName string, rwMutex *sync.RWMutex, topology *structs.Topology, swm *swarm.Swarm) (error, []string) {
	var err error
	steps := []string{}
	defer func() {
		if err != nil && err.Error() != "" {
			log.WithFields(log.Fields{
				"DbName": dbName,
				"err":    err.Error(),
			}).Error("isolate fail")
		} else {
			log.WithFields(log.Fields{
				"DbName": dbName,
			}).Info("isolate success")
		}
	}()
	var isolateType string
	for k, v := range topology.DataNodeGroup["default"] {
		if k == dbName {
			isolateType = v.Type
			break
		}
	}

	if isolateType == "" {
		err = errors.New("isolate db node not found")
		return err, steps
	}

	btopology, _ := json.MarshalIndent(topology, "", "  ")
	log.WithFields(log.Fields{
		"IsolateDbType": isolateType,
		"DbName":        dbName,
		"topology":      string(btopology),
	}).Debug("isolate db")

	// only m normal ||
	// isolate sb && only m sb normal ||
	// isolate sl
	// no need swarm, update topology and return
	if topology.DataNodeGroupNormalCount["default"] == 1 ||
		(isolateType == consts.StandBy && topology.DataNodeGroupNormalCount["default"] == 2) ||
		isolateType == consts.Slave {
		rwMutex.Lock()
		topology.DataNodeGroup["default"][dbName].Status = consts.Abnormal
		updateVersionAndNormalCount(topology, -1)
		rwMutex.Unlock()
		steps = append(steps, fmt.Sprintf("Step1. Isolate %s success\n", dbName))
		return nil, steps
	}

	if isolateType == consts.Master {
		// 1.sb checkGtidDiff
		sbName, sbDbInfo := getStandBy(topology)

		err = swm.IsDelay(sbName)
		if err != nil {
			log.WithFields(log.Fields{
				"DbName": sbName,
				"err":    err.Error(),
			}).Warn("Sb delay check fail, continue")
			err = nil
		}
		// 1. end ********************

		// m sb normal
		// update topology and return
		if topology.DataNodeGroupNormalCount["default"] == 2 {
			// 3.update topology
			_, mDbInfo := getMaster(topology)
			rwMutex.Lock()
			sbDbInfo.Type = consts.Master // sb -> m
			mDbInfo.Type = consts.StandBy // m -> sb
			mDbInfo.Status = consts.Abnormal
			updateVersionAndNormalCount(topology, -1)
			steps = append(steps, fmt.Sprintf("Step1. Isolate %s success\n", dbName))
			rwMutex.Unlock()
			return nil, steps
			// ********************
		}

		// has normal sl
		// 2.vote sb
		var maxGtidDbName string
		maxGtidDbName, err = voteSl(swm, topology)
		if err != nil {
			steps = append(steps, fmt.Sprintf("Step2. Vote new Sb fail: %s\n", err.Error()))
			return err, steps
		}
		steps = append(steps, fmt.Sprintf("Step2. Vote new Sb success:%s\n", maxGtidDbName))
		// 2. end ********************

		// 3.update topology
		rwMutex.Lock()
		mName, mDbInfo := getMaster(topology)
		mDbInfo.Type = consts.Slave // m -> sl
		mDbInfo.Status = consts.Abnormal
		sbDbInfo.Type = consts.Master // sb -> master
		slDbInfo := topology.DataNodeGroup["default"][maxGtidDbName]
		slDbInfo.Type = consts.StandBy // maxGtidSl -> sb
		updateVersionAndNormalCount(topology, -1)
		rwMutex.Unlock()
		// 3. end ********************

		// 4.for other sl(except origin m) changeMaster to newSb
		errs := otherSlChangeMasterParallel(topology, mName, maxGtidDbName, slDbInfo, swm)
		if len(errs) != 0 {
			steps = append(steps, fmt.Sprintf("Step3. Other Sl change master to new Sb fail:%v\n", errs))
		} else {
			steps = append(steps, "Step3. Other Sl change master to new Sb success\n")
		}

		// 4. end ********************

	} else if isolateType == consts.StandBy {
		sbName, sbDbInfo := getStandBy(topology)
		// has normal sl
		// 1.vote sb
		var maxGtidDbName string
		maxGtidDbName, err = voteSl(swm, topology)
		if err != nil {
			return err, steps
		}
		// 1. end ********************

		// no need check gtidDiff

		// 2. newsb change master to master
		mName, mDbInfo := getMaster(topology)
		slDbInfo := topology.DataNodeGroup["default"][maxGtidDbName]
		err = changeMaster(swm, maxGtidDbName, mDbInfo.Ip, mDbInfo.Port)
		if err != nil {
			log.WithFields(log.Fields{
				"Sb":   maxGtidDbName + ":" + slDbInfo.Ip + strconv.Itoa(slDbInfo.Port),
				"M":    mName + ":" + mDbInfo.Ip + strconv.Itoa(mDbInfo.Port),
				"err:": err.Error(),
			}).Error("new Sb change master to M fail, return")
			steps = append(steps, fmt.Sprintf("Step1. New Sb change master to M fail:%s\n", err.Error()))
			return err, steps
		}
		log.WithFields(log.Fields{
			"Sb":          maxGtidDbName + ":" + slDbInfo.Ip + strconv.Itoa(slDbInfo.Port),
			"M":           mName + ":" + mDbInfo.Ip + strconv.Itoa(mDbInfo.Port),
			"IsolateType": isolateType,
		}).Debug("new Sb change master to M success")
		steps = append(steps, "Step1. New Sb change master to M success\n")
		// 2. end ********************

		// 3.update topology
		rwMutex.Lock()
		sbDbInfo.Type = consts.Slave // sb -> sl
		sbDbInfo.Status = consts.Abnormal
		slDbInfo.Type = consts.StandBy // maxGtidSl -> sb
		updateVersionAndNormalCount(topology, -1)
		rwMutex.Unlock()
		// 3. end ********************

		// 4.for other sl(except origin sb) changeMaster to newSb
		errs := otherSlChangeMasterParallel(topology, sbName, maxGtidDbName, slDbInfo, swm)
		if len(errs) != 0 {
			steps = append(steps, fmt.Sprintf("Step2. Other Sl change master to new Sb fail:%v\n", errs))
		} else {
			steps = append(steps, "Step2. Other Sl change master to new Sb success\n")
		}

		// 4. end ********************
	} else {
		return errors.New("Unhandle isolate logic"), steps
	}

	return nil, steps
}
Ejemplo n.º 9
0
func checkTopo(swarm *swarm.Swarm, topology *structs.Topology, mCheck bool, t *testing.T) error {
	log.Println("***CheckTopo start***")
	var mName, mIp, sbName, sbIp string
	var mPort, sbPort int
	for k, v := range topology.DataNodeGroup["default"] {
		if v.Type == consts.Master && v.Status == consts.Normal {
			mName = k
			mIp = v.Ip
			mPort = v.Port
		} else if v.Type == consts.StandBy && v.Status == consts.Normal {
			sbName = k
			sbIp = v.Ip
			sbPort = v.Port
		}
	}

	if mCheck {
		// check m
		mSlaveStatus, err := swarm.SlaveShowStatus(mName)
		if err != nil {
			return fmt.Errorf(err.Error())
		}
		if mSlaveStatus.MasterIp != "" ||
			mSlaveStatus.MasterPort != 0 {
			return fmt.Errorf("master %s init/isolate/recover err, slave status:%#v\n", mName, mSlaveStatus)
		}
	}

	if sbName == "" {
		log.Println("***CheckTopo end***")
		return nil
	}

	// check sb
	sbSlaveStatus, err := swarm.SlaveShowStatus(sbName)
	if err != nil {
		return fmt.Errorf(err.Error())
	}
	if sbSlaveStatus.MasterIp != mIp ||
		sbSlaveStatus.MasterPort != mPort ||
		sbSlaveStatus.SlaveIoRunning != "Yes" ||
		sbSlaveStatus.SlaveSqlRunning != "Yes" {
		return fmt.Errorf("standby %s init/isolate/recover err, slave status:%#v\n", sbName, sbSlaveStatus)
	}

	// check sl
	if topology.DataNodeGroupNormalCount["default"] > 2 {
		for k, v := range topology.DataNodeGroup["default"] {
			if v.Type == consts.Slave && v.Status == consts.Normal {
				slSlaveStatus, err := swarm.SlaveShowStatus(k)
				if err != nil {
					return fmt.Errorf(err.Error())
				}
				if slSlaveStatus.MasterIp != sbIp ||
					slSlaveStatus.MasterPort != sbPort ||
					slSlaveStatus.SlaveIoRunning != "Yes" ||
					slSlaveStatus.SlaveSqlRunning != "Yes" {
					return fmt.Errorf("slave %s init/isolate/recover err, slave status:%#v\n", k, sbSlaveStatus)
				}
			}
		}
	}
	log.Println("***CheckTopo end***")
	return nil
}