Exemplo n.º 1
0
// no need RLock
func IsolateDB(dbName string, rwMutex *sync.RWMutex, topology *structs.Topology, swm *swarm.Swarm) (error, []string) {
	var err error
	steps := []string{}
	defer func() {
		if err != nil && err.Error() != "" {
			log.WithFields(log.Fields{
				"DbName": dbName,
				"err":    err.Error(),
			}).Error("isolate fail")
		} else {
			log.WithFields(log.Fields{
				"DbName": dbName,
			}).Info("isolate success")
		}
	}()
	var isolateType string
	for k, v := range topology.DataNodeGroup["default"] {
		if k == dbName {
			isolateType = v.Type
			break
		}
	}

	if isolateType == "" {
		err = errors.New("isolate db node not found")
		return err, steps
	}

	btopology, _ := json.MarshalIndent(topology, "", "  ")
	log.WithFields(log.Fields{
		"IsolateDbType": isolateType,
		"DbName":        dbName,
		"topology":      string(btopology),
	}).Debug("isolate db")

	// only m normal ||
	// isolate sb && only m sb normal ||
	// isolate sl
	// no need swarm, update topology and return
	if topology.DataNodeGroupNormalCount["default"] == 1 ||
		(isolateType == consts.StandBy && topology.DataNodeGroupNormalCount["default"] == 2) ||
		isolateType == consts.Slave {
		rwMutex.Lock()
		topology.DataNodeGroup["default"][dbName].Status = consts.Abnormal
		updateVersionAndNormalCount(topology, -1)
		rwMutex.Unlock()
		steps = append(steps, fmt.Sprintf("Step1. Isolate %s success\n", dbName))
		return nil, steps
	}

	if isolateType == consts.Master {
		// 1.sb checkGtidDiff
		sbName, sbDbInfo := getStandBy(topology)

		err = swm.IsDelay(sbName)
		if err != nil {
			log.WithFields(log.Fields{
				"DbName": sbName,
				"err":    err.Error(),
			}).Warn("Sb delay check fail, continue")
			err = nil
		}
		// 1. end ********************

		// m sb normal
		// update topology and return
		if topology.DataNodeGroupNormalCount["default"] == 2 {
			// 3.update topology
			_, mDbInfo := getMaster(topology)
			rwMutex.Lock()
			sbDbInfo.Type = consts.Master // sb -> m
			mDbInfo.Type = consts.StandBy // m -> sb
			mDbInfo.Status = consts.Abnormal
			updateVersionAndNormalCount(topology, -1)
			steps = append(steps, fmt.Sprintf("Step1. Isolate %s success\n", dbName))
			rwMutex.Unlock()
			return nil, steps
			// ********************
		}

		// has normal sl
		// 2.vote sb
		var maxGtidDbName string
		maxGtidDbName, err = voteSl(swm, topology)
		if err != nil {
			steps = append(steps, fmt.Sprintf("Step2. Vote new Sb fail: %s\n", err.Error()))
			return err, steps
		}
		steps = append(steps, fmt.Sprintf("Step2. Vote new Sb success:%s\n", maxGtidDbName))
		// 2. end ********************

		// 3.update topology
		rwMutex.Lock()
		mName, mDbInfo := getMaster(topology)
		mDbInfo.Type = consts.Slave // m -> sl
		mDbInfo.Status = consts.Abnormal
		sbDbInfo.Type = consts.Master // sb -> master
		slDbInfo := topology.DataNodeGroup["default"][maxGtidDbName]
		slDbInfo.Type = consts.StandBy // maxGtidSl -> sb
		updateVersionAndNormalCount(topology, -1)
		rwMutex.Unlock()
		// 3. end ********************

		// 4.for other sl(except origin m) changeMaster to newSb
		errs := otherSlChangeMasterParallel(topology, mName, maxGtidDbName, slDbInfo, swm)
		if len(errs) != 0 {
			steps = append(steps, fmt.Sprintf("Step3. Other Sl change master to new Sb fail:%v\n", errs))
		} else {
			steps = append(steps, "Step3. Other Sl change master to new Sb success\n")
		}

		// 4. end ********************

	} else if isolateType == consts.StandBy {
		sbName, sbDbInfo := getStandBy(topology)
		// has normal sl
		// 1.vote sb
		var maxGtidDbName string
		maxGtidDbName, err = voteSl(swm, topology)
		if err != nil {
			return err, steps
		}
		// 1. end ********************

		// no need check gtidDiff

		// 2. newsb change master to master
		mName, mDbInfo := getMaster(topology)
		slDbInfo := topology.DataNodeGroup["default"][maxGtidDbName]
		err = changeMaster(swm, maxGtidDbName, mDbInfo.Ip, mDbInfo.Port)
		if err != nil {
			log.WithFields(log.Fields{
				"Sb":   maxGtidDbName + ":" + slDbInfo.Ip + strconv.Itoa(slDbInfo.Port),
				"M":    mName + ":" + mDbInfo.Ip + strconv.Itoa(mDbInfo.Port),
				"err:": err.Error(),
			}).Error("new Sb change master to M fail, return")
			steps = append(steps, fmt.Sprintf("Step1. New Sb change master to M fail:%s\n", err.Error()))
			return err, steps
		}
		log.WithFields(log.Fields{
			"Sb":          maxGtidDbName + ":" + slDbInfo.Ip + strconv.Itoa(slDbInfo.Port),
			"M":           mName + ":" + mDbInfo.Ip + strconv.Itoa(mDbInfo.Port),
			"IsolateType": isolateType,
		}).Debug("new Sb change master to M success")
		steps = append(steps, "Step1. New Sb change master to M success\n")
		// 2. end ********************

		// 3.update topology
		rwMutex.Lock()
		sbDbInfo.Type = consts.Slave // sb -> sl
		sbDbInfo.Status = consts.Abnormal
		slDbInfo.Type = consts.StandBy // maxGtidSl -> sb
		updateVersionAndNormalCount(topology, -1)
		rwMutex.Unlock()
		// 3. end ********************

		// 4.for other sl(except origin sb) changeMaster to newSb
		errs := otherSlChangeMasterParallel(topology, sbName, maxGtidDbName, slDbInfo, swm)
		if len(errs) != 0 {
			steps = append(steps, fmt.Sprintf("Step2. Other Sl change master to new Sb fail:%v\n", errs))
		} else {
			steps = append(steps, "Step2. Other Sl change master to new Sb success\n")
		}

		// 4. end ********************
	} else {
		return errors.New("Unhandle isolate logic"), steps
	}

	return nil, steps
}