func (self *UpdateRegionCommand) Execute(c *cc.Controller) (cc.Result, error) { if len(self.Nodes) == 0 { return nil, nil } // 更新Cluster拓扑 cs := c.ClusterState cs.UpdateRegionNodes(self.Region, self.Nodes) // 首先更新迁移任务状态,以便发现故障时,在处理故障之前就暂停迁移任务 cluster := cs.GetClusterSnapshot() if cluster != nil { mm := c.MigrateManager mm.HandleNodeStateChange(cluster) } for _, ns := range cs.AllNodeStates() { node := ns.Node() // Slave auto enable read ? if !node.IsMaster() && !node.Fail && !node.Readable && node.MasterLinkStatus == "up" { if meta.GetAppConfig().AutoEnableSlaveRead { redis.EnableRead(node.Addr(), node.Id) } } // Master auto enable write ? if node.IsMaster() && !node.Fail && !node.Writable { if meta.GetAppConfig().AutoEnableMasterWrite { redis.EnableWrite(node.Addr(), node.Id) } } // Fix chained replication: slave's parent is slave. if meta.LocalRegion() == self.Region && !node.IsMaster() { parent := cs.FindNode(node.ParentId) // Parent is not master? if parent != nil && !parent.IsMaster() { grandpa := cs.FindNode(parent.ParentId) if grandpa != nil { _, err := redis.ClusterReplicate(node.Addr(), grandpa.Id) if err == nil { log.Warningf(node.Addr(), "Fix chained replication, (%s->%s->%s)=>(%s->%s)", node, parent, grandpa, node, grandpa) } } else { log.Warningf(node.Addr(), "Found chained replication, (%s->%s->nil), cannot fix.", node, parent) } } } // 更新Region内Node的状态机 ns.AdvanceFSM(cs, state.CMD_NONE) } return nil, nil }
func Migrate(addr, toIp string, toPort int, key string, timeout int) (string, error) { inner := func(addr, toIp string, toPort int, key string, timeout int) (string, error) { conn, err := dial(addr) if err != nil { return "", ErrConnFailed } defer conn.Close() resp, err := redis.String(conn.Do("migrate", toIp, toPort, key, 0, timeout)) if err != nil && strings.Contains(err.Error(), "BUSYKEY") { log.Warningf("Migrate", "Found BUSYKEY '%s', will overwrite it.", key) resp, err = redis.String(conn.Do("migrate", toIp, toPort, key, 0, timeout, "replace")) } if err != nil { return "", err } return resp, nil } retry := NUM_RETRY var err error var resp string for retry > 0 { resp, err = inner(addr, toIp, toPort, key, timeout) if err == nil { return resp, nil } retry-- } return "", err }
func (cs *ClusterState) UpdateRegionNodes(region string, nodes []*topo.Node) { cs.version++ now := time.Now() log.Verbosef("CLUSTER", "Update region %s %d nodes", region, len(nodes)) // 添加不存在的节点,版本号+1 for _, n := range nodes { if n.Region != region { continue } nodeState := cs.nodeStates[n.Id] if nodeState == nil { nodeState = NewNodeState(n, cs.version) cs.nodeStates[n.Id] = nodeState } else { nodeState.version = cs.version if nodeState.node.Fail != n.Fail { log.Eventf(n.Addr(), "Fail state changed, %v -> %v", nodeState.node.Fail, n.Fail) } if nodeState.node.Readable != n.Readable { log.Eventf(n.Addr(), "Readable state changed, %v -> %v", nodeState.node.Readable, n.Readable) } if nodeState.node.Writable != n.Writable { log.Eventf(n.Addr(), "Writable state changed, %v -> %v", nodeState.node.Writable, n.Writable) } nodeState.node = n } nodeState.updateTime = now } // 删除已经下线的节点 for id, n := range cs.nodeStates { if n.node.Region != region { continue } nodeState := cs.nodeStates[id] if nodeState.version != cs.version { log.Warningf("CLUSTER", "Delete node %s", nodeState.node) delete(cs.nodeStates, id) } } // NB:低效? cs.BuildClusterSnapshot() }
// 似乎,只有同时进行Forget和Reset才有意义,否则都是一个不一致的状态 func (self *ForgetAndResetNodeCommand) Execute(c *cc.Controller) (cc.Result, error) { cs := c.ClusterState target := cs.FindNode(self.NodeId) if target == nil { return nil, ErrNodeNotExist } if !target.Free == false { return nil, ErrNodeIsFree } if len(target.Ranges) > 0 { return nil, ErrNodeNotEmpty } var err error forgetCount := 0 allForgetDone := true // 1. 所有节点发送Forget for _, ns := range cs.AllNodeStates() { if ns.Id() == target.Id { continue } node := ns.Node() _, err = redis.ClusterForget(ns.Addr(), target.Id) if !node.Fail && err != nil && !strings.HasPrefix(err.Error(), "ERR Unknown node") { allForgetDone = false log.Warningf(target.Addr(), "Forget node %s(%s) failed, %v", ns.Addr(), ns.Id(), err) continue } else if !node.Fail && err != nil { //try again for try := redis.NUM_RETRY; try >= 0; try-- { _, err = redis.ClusterForget(ns.Addr(), target.Id) if err == nil { break } } //execute failed after retry if err != nil { allForgetDone = false log.Warningf(target.Addr(), "Forget node %s(%s) failed after retry, %v", ns.Addr(), ns.Id(), err) continue } } log.Eventf(target.Addr(), "Forget by %s(%s).", ns.Addr(), ns.Id()) forgetCount++ } if !allForgetDone { return nil, fmt.Errorf("Not all forget done, only (%d/%d) success", forgetCount, len(cs.AllNodeStates())-1) } // 2. 重置 if !target.Fail { _, err = redis.ClusterReset(target.Addr(), false) if err != nil { return nil, fmt.Errorf("Reset node %s(%s) failed, %v", target.Id, target.Addr(), err) } log.Eventf(target.Addr(), "Reset.") } // remove seed in leader contrller meta.RemoveSeed(target.Addr()) return nil, nil }
func (t *MigrateTask) Run() { if t.CurrentState() == StateNew { t.SetState(StateRunning) } if t.CurrentState() == StateCancelling { t.SetState(StateCancelled) return } prev_key := "" timeout_cnt := 0 for i, r := range t.ranges { if r.Left < 0 { r.Left = 0 } if r.Right > 16383 { r.Right = 16383 } t.currRangeIndex = i t.currSlot = r.Left t.totalKeysInSlot = 0 for t.currSlot <= r.Right { t.streamPub(true) // 尽量在迁移完一个完整Slot或遇到错误时,再进行状态的转换 if t.CurrentState() == StateCancelling { t.SetState(StateCancelled) t.streamPub(false) return } // 暂停,sleep一会继续检查 if t.CurrentState() == StatePausing { t.SetState(StatePaused) } if t.CurrentState() == StatePaused { time.Sleep(100 * time.Millisecond) continue } // 正常运行 app := meta.GetAppConfig() nkeys, err, key := t.migrateSlot(t.currSlot, app.MigrateKeysEachTime) t.totalKeysInSlot += nkeys // Check remains again seed := t.SourceNode() remains, err2 := redis.CountKeysInSlot(seed.Addr(), t.currSlot) if err2 != nil { remains = -1 } if err != nil || remains > 0 { log.Warningf(t.TaskName(), "Migrate slot %d error, %d keys done, total %d keys, remains %d keys, %v", t.currSlot, nkeys, t.totalKeysInSlot, remains, err) if err != nil && strings.HasPrefix(err.Error(), "READONLY") { log.Warningf(t.TaskName(), "Migrating across slaves nodes. "+ "Maybe a manual failover just happened, "+ "if cluster marks down after this point, "+ "we need recover it by ourself using cli commands.") t.SetState(StateCancelled) goto quit } else if err != nil && strings.HasPrefix(err.Error(), "CLUSTERDOWN") { log.Warningf(t.TaskName(), "The cluster is down, please check it yourself, migrating task cancelled.") t.SetState(StateCancelled) goto quit } else if err != nil && strings.HasPrefix(err.Error(), "IOERR") { log.Warningf(t.TaskName(), "Migrating key:%s timeout", key) if timeout_cnt > 10 { log.Warningf(t.TaskName(), "Migrating key:%s timeout too frequently, task cancelled") t.SetState(StateCancelled) goto quit } if prev_key == key { timeout_cnt++ } else { timeout_cnt = 0 prev_key = key } } time.Sleep(500 * time.Millisecond) } else { log.Infof(t.TaskName(), "Migrate slot %d done, %d keys done, total %d keys, remains %d keys", t.currSlot, nkeys, t.totalKeysInSlot, remains) t.currSlot++ t.totalKeysInSlot = 0 } } } t.currSlot-- t.SetState(StateDone) quit: t.streamPub(false) }
/// 迁移slot过程: /// 1. 标记Target分片Master为IMPORTING /// 2. 标记所有Source分片节点为MIGRATING /// 3. 从Source分片Master取keys迁移,直到空,数据迁移完成 /// 4. 设置Target的Slave的slot归属到Target /// 5. 设置Target的Master的slot归属到Target /// 6. 设置Source所有节点的slot归属到Target /// 命令: /// 1. <Target Master> setslot $slot IMPORTING $sourceId /// 2. <Source Slaves> setslot $slot MIGRATING $targetId /// 3. <Source Master> setslot $slot MIGRATING $targetId /// ... migrating all keys /// 4. <Target Slaves> setslot $slot node $targetId /// 5. <Target Master> setslot $slot node $targetId /// 6. <Source Slaves> setslot $slot node $targetId /// 7. <Source Master> setslot $slot node $targetId func (t *MigrateTask) migrateSlot(slot int, keysPer int) (int, error, string) { rs := t.SourceReplicaSet() sourceNode := t.SourceNode() targetNode := t.TargetNode() err := redis.SetSlot(targetNode.Addr(), slot, redis.SLOT_IMPORTING, sourceNode.Id) if err != nil { if strings.HasPrefix(err.Error(), "ERR I'm already the owner of hash slot") { log.Warningf(t.TaskName(), "%s already the owner of hash slot %d", targetNode.Id[:6], slot) // 逻辑到此,说明Target已经包含该slot,但是Source处于Migrating状态 // 迁移实际已经完成,需要清理Source的Migrating状态 srs := t.SourceReplicaSet() err = SetSlotToNode(srs, slot, targetNode.Id) if err != nil { return 0, err, "" } err = SetSlotStable(srs, slot) if err != nil { return 0, err, "" } trs := t.TargetReplicaSet() err = SetSlotToNode(trs, slot, targetNode.Id) if err != nil { return 0, err, "" } err = SetSlotStable(trs, slot) return 0, err, "" } return 0, err, "" } // 需要将Source分片的所有节点标记为MIGRATING,最大限度避免从地域的读造成的数据不一致 for _, node := range rs.AllNodes() { err := redis.SetSlot(node.Addr(), slot, redis.SLOT_MIGRATING, targetNode.Id) if err != nil { if strings.HasPrefix(err.Error(), "ERR I'm not the owner of hash slot") { log.Warningf(t.TaskName(), "%s is not the owner of hash slot %d", sourceNode.Id, slot) srs := t.SourceReplicaSet() err = SetSlotStable(srs, slot) if err != nil { log.Warningf(t.TaskName(), "Failed to clean MIGRATING state of source server.") return 0, err, "" } trs := t.TargetReplicaSet() err = SetSlotStable(trs, slot) if err != nil { log.Warningf(t.TaskName(), "Failed to clean MIGRATING state of target server.") return 0, err, "" } return 0, fmt.Errorf("mig: %s is not the owner of hash slot %d", sourceNode.Id, slot), "" } return 0, err, "" } } nkeys := 0 app := meta.GetAppConfig() for { keys, err := redis.GetKeysInSlot(sourceNode.Addr(), slot, keysPer) if err != nil { return nkeys, err, "" } for _, key := range keys { _, err := redis.Migrate(sourceNode.Addr(), targetNode.Ip, targetNode.Port, key, app.MigrateTimeout) if err != nil { return nkeys, err, key } nkeys++ } if len(keys) == 0 { // 迁移完成,需要等SourceSlaves同步(DEL)完成,即SourceSlaves节点中该slot内已无key slaveSyncDone := true srs := t.SourceReplicaSet() for _, node := range srs.AllNodes() { nkeys, err := redis.CountKeysInSlot(node.Addr(), slot) if err != nil { return nkeys, err, "" } if nkeys > 0 { slaveSyncDone = false } } if !slaveSyncDone { // FIXME // master migrate done, slave still have some keys in slot, setslot will ensure slave clear the data log.Info(t.TaskName(), "source node not empty, setslot will clear") //return nkeys, fmt.Errorf("mig: source nodes not all empty, will retry."), "" } // 设置slot归属到新节点,该操作自动清理IMPORTING和MIGRATING状态 // 如果设置的是Source节点,设置slot归属时,Redis会确保该slot中已无剩余的key trs := t.TargetReplicaSet() // 优先设置从节点,保证当主的数据分布还未广播到从节点时主挂掉,slot信息也不会丢失 for _, node := range trs.Slaves { if node.Fail { continue } err = redis.SetSlot(node.Addr(), slot, redis.SLOT_NODE, targetNode.Id) if err != nil { return nkeys, err, "" } } // 该操作增加Epoch并广播出去 err = redis.SetSlot(trs.Master.Addr(), slot, redis.SLOT_NODE, targetNode.Id) if err != nil { return nkeys, err, "" } // 更新节点上slot的归属 for _, rs := range t.cluster.ReplicaSets() { if rs.Master.IsStandbyMaster() { continue } err = SetSlotToNode(rs, slot, targetNode.Id) if err != nil { return nkeys, err, "" } } break } } return nkeys, nil, "" }
func (cs *ClusterState) RunFailoverTask(oldMasterId, newMasterId string) { new := cs.FindNodeState(newMasterId) old := cs.FindNodeState(oldMasterId) if old == nil { log.Warningf(oldMasterId, "Can't run failover task, the old dead master lost") return } if new == nil { log.Warningf(oldMasterId, "Can't run failover task, new master lost (%s)", newMasterId) old.AdvanceFSM(cs, CMD_FAILOVER_END_SIGNAL) return } // 通过新主广播消息 redis.DisableRead(new.Addr(), old.Id()) redis.DisableWrite(new.Addr(), old.Id()) c := make(chan error, 1) go func() { //choose failover force or takeover in case of arbiter cluster := cs.cluster rs := cluster.FindReplicaSetByNode(old.Id()) if cluster.HasArbiter() || cluster.IsClusterDown() { //use failover takeover c <- redis.SetAsMasterWaitSyncDone(new.Addr(), true, true, rs) } else { //use failover force c <- redis.SetAsMasterWaitSyncDone(new.Addr(), true, false, rs) } }() select { case err := <-c: if err != nil { log.Eventf(old.Addr(), "Failover request done with error(%v).", err) } else { log.Eventf(old.Addr(), "Failover request done, new master %s(%s).", new.Id(), new.Addr()) } case <-time.After(20 * time.Minute): log.Eventf(old.Addr(), "Failover timedout, new master %s(%s)", new.Id(), new.Addr()) } // 重新读取一次,因为可能已经更新了 roleChanged := false node := cs.FindNode(newMasterId) if node.IsMaster() { roleChanged = true } else { for i := 0; i < 10; i++ { info, err := redis.FetchInfo(node.Addr(), "Replication") if err == nil && info.Get("role") == "master" { roleChanged = true break } log.Warningf(old.Addr(), "Role of new master %s(%s) has not yet changed, will check 5 seconds later.", new.Id(), new.Addr()) time.Sleep(5 * time.Second) } } if roleChanged { log.Eventf(old.Addr(), "New master %s(%s) role change success", node.Id, node.Addr()) // 处理迁移过程中的异常问题,将故障节点(旧主)的slots转移到新主上 oldNode := cs.FindNode(oldMasterId) if oldNode != nil && oldNode.Fail && oldNode.IsMaster() && len(oldNode.Ranges) != 0 { log.Warningf(old.Addr(), "Some node carries slots info(%v) about the old master, waiting for MigrateManager to fix it.", oldNode.Ranges) } else { log.Info(old.Addr(), "Good, no slot need to be fix after failover.") } } else { log.Warningf(old.Addr(), "Failover failed, please check cluster state.") log.Warningf(old.Addr(), "The dead master will goto OFFLINE state and then goto WAIT_FAILOVER_BEGIN state to try failover again.") } old.AdvanceFSM(cs, CMD_FAILOVER_END_SIGNAL) // 打开新主的写入,因为给slave加Write没有效果 // 所以即便Failover失败,也不会产生错误 redis.EnableWrite(new.Addr(), new.Id()) }
func init() { RedisNodeStateModel.AddState(RunningState) RedisNodeStateModel.AddState(WaitFailoverBeginState) RedisNodeStateModel.AddState(WaitFailoverEndState) RedisNodeStateModel.AddState(OfflineState) /// State: (WaitFailoverRunning) // (a0) Running封禁了,进入Offline状态 RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateRunning, To: StateOffline, Input: Input{F, F, ANY, ANY, ANY}, Priority: 0, Constraint: nil, Apply: nil, }) // (a1) 节点挂了,且未封禁 RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateRunning, To: StateWaitFailoverBegin, Input: Input{T, ANY, FAIL, ANY, ANY}, Priority: 0, Constraint: nil, Apply: nil, }) // (a2) 节点挂了,且未封禁 RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateRunning, To: StateWaitFailoverBegin, Input: Input{ANY, T, FAIL, ANY, ANY}, Priority: 0, Constraint: nil, Apply: nil, }) // (a3) 节点挂了,从,且未封禁,且可以自动进行Failover RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateRunning, To: StateWaitFailoverEnd, Input: Input{T, ANY, FAIL, S, ANY}, Priority: 1, Constraint: SlaveAutoFailoverConstraint, Apply: SlaveFailoverHandler, }) // (a4) 节点挂了,主,未封禁,且可以自动进行Failover RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateRunning, To: StateWaitFailoverEnd, Input: Input{T, T, FAIL, M, ANY}, Priority: 1, Constraint: MasterAutoFailoverConstraint, Apply: MasterFailoverHandler, }) /// State: (WaitFailoverBegin) // (b0) 节点恢复了 RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateWaitFailoverBegin, To: StateRunning, Input: Input{ANY, ANY, FINE, ANY, ANY}, Priority: 0, Constraint: nil, Apply: nil, }) // (b10) 主节点,Autofailover或手动继续执行Failover RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateWaitFailoverBegin, To: StateWaitFailoverEnd, Input: Input{ANY, ANY, FAIL, M, ANY}, Priority: 0, Constraint: MasterAutoFailoverConstraint, Apply: MasterFailoverHandler, }) // (b11) 主节点,已经处理过了 RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateWaitFailoverBegin, To: StateOffline, Input: Input{ANY, ANY, FAIL, M, ANY}, Priority: 0, Constraint: MasterGotoOfflineConstraint, Apply: MasterGotoOfflineHandler, }) // (b2) 从节点,AutoFailover或手动继续执行Failover RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateWaitFailoverBegin, To: StateWaitFailoverEnd, Input: Input{ANY, ANY, FAIL, S, ANY}, Priority: 0, Constraint: SlaveAutoFailoverConstraint, Apply: SlaveFailoverHandler, }) // (b3) 从节点,已经处于封禁状态,转到OFFLINE RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateWaitFailoverBegin, To: StateOffline, Input: Input{F, F, FAIL, S, ANY}, Priority: 1, Constraint: nil, Apply: nil, }) /// State: (WaitFailoverEnd) // (c0) 等待Failover执行结束信号 RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateWaitFailoverEnd, To: StateOffline, Input: Input{ANY, ANY, ANY, ANY, CMD_FAILOVER_END_SIGNAL}, Priority: 0, Constraint: nil, Apply: nil, }) // (c1) 从挂了,且已经封禁 RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateWaitFailoverEnd, To: StateOffline, Input: Input{F, F, FAIL, S, ANY}, Priority: 1, Constraint: nil, Apply: nil, }) // (c2) 从在Failover过程中恢复了,结束FailoverEnd状态 RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateWaitFailoverEnd, To: StateOffline, Input: Input{ANY, ANY, FINE, S, ANY}, Priority: 0, Constraint: nil, Apply: nil, }) /// State: (Offline) // (d0) 节点恢复读标记 RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateOffline, To: StateRunning, Input: Input{T, ANY, ANY, ANY, ANY}, Priority: 0, Constraint: nil, Apply: nil, }) // (d1) 节点恢复写标记 RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateOffline, To: StateRunning, Input: Input{ANY, T, ANY, ANY, ANY}, Priority: 0, Constraint: nil, Apply: nil, }) // (d2) 是主节,且挂了,需要进行Failover RedisNodeStateModel.AddTransition(&fsm.Transition{ From: StateOffline, To: StateWaitFailoverBegin, Input: Input{F, F, FAIL, M, ANY}, Priority: 0, Constraint: func(i interface{}) bool { // Master故障,进行Failover之后,故障的节点仍然被标记为master。 // 所以我们需要判断这个Master是否已经被处理过了。 // 判断依据是节点处于FAIL状态,且没有slots ctx := i.(StateContext) ns := ctx.NodeState if ns.node.IsStandbyMaster() { return false } log.Warningf(ns.Addr(), "Found offline non standby master, will try to failover(%v,%v).", ns.Role(), ns.Ranges()) return true }, Apply: nil, }) }
ctx := i.(StateContext) ns := ctx.NodeState record := &meta.FailoverRecord{ AppName: meta.AppName(), NodeId: ns.Id(), NodeAddr: ns.Addr(), Timestamp: time.Now(), Region: ns.Region(), Tag: ns.Tag(), Role: ns.Role(), Ranges: ns.Ranges(), } err := meta.AddFailoverRecord(record) if err != nil { log.Warningf(ns.Addr(), "state: add failover record failed, %v", err) } }, OnLeave: func(i interface{}) { log.Event(getNodeState(i).Addr(), "Leave WAIT_FAILOVER_END state") ctx := i.(StateContext) ns := ctx.NodeState if ns.Role() == "master" { err := meta.UnmarkFailoverDoing() if err != nil { log.Warningf(ns.Addr(), "state: unmark FAILOVER_DOING status failed, %v", err) } } },
func (m *MigrateManager) HandleNodeStateChange(cluster *topo.Cluster) { // 如果存在迁移任务,先跳过,等结束后再处理 if len(m.tasks) > 0 { goto done } // 处理主节点的迁移任务重建 for _, node := range cluster.AllNodes() { if node.Fail { continue } // Wait a while if time.Now().Sub(m.lastTaskEndTime) < 5*time.Second { continue } for id, slots := range node.Migrating { // 根据slot生成ranges ranges := []topo.Range{} for _, slot := range slots { // 如果是自己 if id == node.Id { redis.SetSlot(node.Addr(), slot, redis.SLOT_STABLE, "") } else { ranges = append(ranges, topo.Range{Left: slot, Right: slot}) } } // Source source := node if !node.IsMaster() { srs := cluster.FindReplicaSetByNode(node.Id) if srs != nil { source = srs.Master } } // Target rs := cluster.FindReplicaSetByNode(id) if source.Fail || rs.Master.Fail { continue } _, err := m.CreateTask(source.Id, rs.Master.Id, ranges, cluster) if err != nil { log.Warningf(node.Addr(), "Can not recover migrate task, %v", err) } else { log.Warningf(node.Addr(), "Will recover migrating task for node %s(%s) with MIGRATING info"+ ", Task(Source:%s, Target:%s).", node.Id, node.Addr(), source.Addr(), rs.Master.Addr()) goto done } } for id, slots := range node.Importing { // 根据slot生成ranges ranges := []topo.Range{} for _, slot := range slots { // 如果是自己 if id == node.Id { redis.SetSlot(node.Addr(), slot, redis.SLOT_STABLE, "") } else { ranges = append(ranges, topo.Range{Left: slot, Right: slot}) } } // Target target := node if !node.IsMaster() { trs := cluster.FindReplicaSetByNode(node.Id) if trs != nil { target = trs.Master } } if target.IsStandbyMaster() { s := cluster.FindNodeBySlot(ranges[0].Left) if s != nil { log.Warningf(node.Addr(), "Reset migrate task target to %s(%s)", s.Id, s.Addr()) target = s } } // Source rs := cluster.FindReplicaSetByNode(id) if target.Fail || rs.Master.Fail { continue } _, err := m.CreateTask(rs.Master.Id, target.Id, ranges, cluster) if err != nil { log.Warningf(node.Addr(), "Can not recover migrate task, %v", err) } else { log.Warningf(node.Addr(), "Will recover migrating task for node %s(%s) with IMPORTING info"+ ", Task(Source:%s,Target:%s).", node.Id, node.Addr(), rs.Master.Addr(), target.Addr()) goto done } } } done: for _, task := range m.tasks { if task.CurrentState() != StateDone { m.handleTaskChange(task, cluster) } } }
// 更新任务状态机 func (m *MigrateManager) handleTaskChange(task *MigrateTask, cluster *topo.Cluster) error { fromNode := cluster.FindNode(task.SourceNode().Id) toNode := cluster.FindNode(task.TargetNode().Id) tname := task.TaskName() if fromNode == nil { log.Infof(tname, "Source node %s(%s) not exist", fromNode.Addr(), fromNode.Id) return ErrNodeNotFound } if toNode == nil { log.Infof(tname, "Target node %s(%s) not exist", toNode.Addr(), toNode.Id) return ErrNodeNotFound } // 角色变化说明该分片进行了主从切换 if !fromNode.IsMaster() || !toNode.IsMaster() { log.Warningf(tname, "%s role change, cancel migration task %s\n", fromNode.Id[:6], task.TaskName()) task.SetState(StateCancelling) return ErrSourceNodeFail } // 如果是源节点挂了,直接取消,等待主从切换之后重建任务 if fromNode.Fail { log.Infof(tname, "Cancel migration task %s\n", task.TaskName()) task.SetState(StateCancelling) return ErrSourceNodeFail } // 如果目标节点挂了,需要记录当前的ReplicaSet,观察等待主从切换 if toNode.Fail { if task.CurrentState() == StateRunning { task.SetState(StateTargetNodeFailure) task.SetBackupReplicaSet(task.TargetReplicaSet()) return ErrTargetNodeFail } } else if task.CurrentState() != StateNew { task.SetState(StateRunning) task.SetBackupReplicaSet(nil) } // 如果目标节点已经进行了Failover(重新选主),我们需要找到对应的新主 // 方法是从BackupReplicaSet里取一个从来查找 if toNode.IsStandbyMaster() { brs := task.BackupReplicaSet() if brs == nil { task.SetState(StateCancelling) log.Info(tname, "No backup replicaset found, controller maybe restarted after target master failure, can not do recovery.") return ErrCanNotRecover } slaves := brs.Slaves if len(slaves) == 0 { task.SetState(StateCancelling) log.Info(tname, "The dead target master has no slave, cannot do recovery.") return ErrCanNotRecover } else { rs := cluster.FindReplicaSetByNode(slaves[0].Id) if rs == nil { task.SetState(StateCancelling) log.Info(tname, "No replicaset for slave of dead target master found") return ErrCanNotRecover } task.ReplaceTargetReplicaSet(rs) log.Infof(tname, "Recover dead target node to %s(%s)", rs.Master.Id, rs.Master.Addr()) } } return nil }