// no need RLock func IsolateDB(dbName string, rwMutex *sync.RWMutex, topology *structs.Topology, swm *swarm.Swarm) (error, []string) { var err error steps := []string{} defer func() { if err != nil && err.Error() != "" { log.WithFields(log.Fields{ "DbName": dbName, "err": err.Error(), }).Error("isolate fail") } else { log.WithFields(log.Fields{ "DbName": dbName, }).Info("isolate success") } }() var isolateType string for k, v := range topology.DataNodeGroup["default"] { if k == dbName { isolateType = v.Type break } } if isolateType == "" { err = errors.New("isolate db node not found") return err, steps } btopology, _ := json.MarshalIndent(topology, "", " ") log.WithFields(log.Fields{ "IsolateDbType": isolateType, "DbName": dbName, "topology": string(btopology), }).Debug("isolate db") // only m normal || // isolate sb && only m sb normal || // isolate sl // no need swarm, update topology and return if topology.DataNodeGroupNormalCount["default"] == 1 || (isolateType == consts.StandBy && topology.DataNodeGroupNormalCount["default"] == 2) || isolateType == consts.Slave { rwMutex.Lock() topology.DataNodeGroup["default"][dbName].Status = consts.Abnormal updateVersionAndNormalCount(topology, -1) rwMutex.Unlock() steps = append(steps, fmt.Sprintf("Step1. Isolate %s success\n", dbName)) return nil, steps } if isolateType == consts.Master { // 1.sb checkGtidDiff sbName, sbDbInfo := getStandBy(topology) err = swm.IsDelay(sbName) if err != nil { log.WithFields(log.Fields{ "DbName": sbName, "err": err.Error(), }).Warn("Sb delay check fail, continue") err = nil } // 1. end ******************** // m sb normal // update topology and return if topology.DataNodeGroupNormalCount["default"] == 2 { // 3.update topology _, mDbInfo := getMaster(topology) rwMutex.Lock() sbDbInfo.Type = consts.Master // sb -> m mDbInfo.Type = consts.StandBy // m -> sb mDbInfo.Status = consts.Abnormal updateVersionAndNormalCount(topology, -1) steps = append(steps, fmt.Sprintf("Step1. Isolate %s success\n", dbName)) rwMutex.Unlock() return nil, steps // ******************** } // has normal sl // 2.vote sb var maxGtidDbName string maxGtidDbName, err = voteSl(swm, topology) if err != nil { steps = append(steps, fmt.Sprintf("Step2. Vote new Sb fail: %s\n", err.Error())) return err, steps } steps = append(steps, fmt.Sprintf("Step2. Vote new Sb success:%s\n", maxGtidDbName)) // 2. end ******************** // 3.update topology rwMutex.Lock() mName, mDbInfo := getMaster(topology) mDbInfo.Type = consts.Slave // m -> sl mDbInfo.Status = consts.Abnormal sbDbInfo.Type = consts.Master // sb -> master slDbInfo := topology.DataNodeGroup["default"][maxGtidDbName] slDbInfo.Type = consts.StandBy // maxGtidSl -> sb updateVersionAndNormalCount(topology, -1) rwMutex.Unlock() // 3. end ******************** // 4.for other sl(except origin m) changeMaster to newSb errs := otherSlChangeMasterParallel(topology, mName, maxGtidDbName, slDbInfo, swm) if len(errs) != 0 { steps = append(steps, fmt.Sprintf("Step3. Other Sl change master to new Sb fail:%v\n", errs)) } else { steps = append(steps, "Step3. Other Sl change master to new Sb success\n") } // 4. end ******************** } else if isolateType == consts.StandBy { sbName, sbDbInfo := getStandBy(topology) // has normal sl // 1.vote sb var maxGtidDbName string maxGtidDbName, err = voteSl(swm, topology) if err != nil { return err, steps } // 1. end ******************** // no need check gtidDiff // 2. newsb change master to master mName, mDbInfo := getMaster(topology) slDbInfo := topology.DataNodeGroup["default"][maxGtidDbName] err = changeMaster(swm, maxGtidDbName, mDbInfo.Ip, mDbInfo.Port) if err != nil { log.WithFields(log.Fields{ "Sb": maxGtidDbName + ":" + slDbInfo.Ip + strconv.Itoa(slDbInfo.Port), "M": mName + ":" + mDbInfo.Ip + strconv.Itoa(mDbInfo.Port), "err:": err.Error(), }).Error("new Sb change master to M fail, return") steps = append(steps, fmt.Sprintf("Step1. New Sb change master to M fail:%s\n", err.Error())) return err, steps } log.WithFields(log.Fields{ "Sb": maxGtidDbName + ":" + slDbInfo.Ip + strconv.Itoa(slDbInfo.Port), "M": mName + ":" + mDbInfo.Ip + strconv.Itoa(mDbInfo.Port), "IsolateType": isolateType, }).Debug("new Sb change master to M success") steps = append(steps, "Step1. New Sb change master to M success\n") // 2. end ******************** // 3.update topology rwMutex.Lock() sbDbInfo.Type = consts.Slave // sb -> sl sbDbInfo.Status = consts.Abnormal slDbInfo.Type = consts.StandBy // maxGtidSl -> sb updateVersionAndNormalCount(topology, -1) rwMutex.Unlock() // 3. end ******************** // 4.for other sl(except origin sb) changeMaster to newSb errs := otherSlChangeMasterParallel(topology, sbName, maxGtidDbName, slDbInfo, swm) if len(errs) != 0 { steps = append(steps, fmt.Sprintf("Step2. Other Sl change master to new Sb fail:%v\n", errs)) } else { steps = append(steps, "Step2. Other Sl change master to new Sb success\n") } // 4. end ******************** } else { return errors.New("Unhandle isolate logic"), steps } return nil, steps }