func SetSlotToNode(rs *topo.ReplicaSet, slot int, targetId string) error { // 先清理从节点的MIGRATING状态 for _, node := range rs.Slaves { if node.Fail { continue } err := redis.SetSlot(node.Addr(), slot, redis.SLOT_NODE, targetId) if err != nil { return err } } err := redis.SetSlot(rs.Master.Addr(), slot, redis.SLOT_NODE, targetId) if err != nil { return err } return nil }
func SetSlotStable(rs *topo.ReplicaSet, slot int) error { // 先清理从节点的MIGRATING状态 for _, node := range rs.Slaves { if node.Fail { continue } err := redis.SetSlot(node.Addr(), slot, redis.SLOT_STABLE, "") if err != nil { return err } } err := redis.SetSlot(rs.Master.Addr(), slot, redis.SLOT_STABLE, "") if err != nil { return err } return nil }
/// 迁移slot过程: /// 1. 标记Target分片Master为IMPORTING /// 2. 标记所有Source分片节点为MIGRATING /// 3. 从Source分片Master取keys迁移,直到空,数据迁移完成 /// 4. 设置Target的Slave的slot归属到Target /// 5. 设置Target的Master的slot归属到Target /// 6. 设置Source所有节点的slot归属到Target /// 命令: /// 1. <Target Master> setslot $slot IMPORTING $sourceId /// 2. <Source Slaves> setslot $slot MIGRATING $targetId /// 3. <Source Master> setslot $slot MIGRATING $targetId /// ... migrating all keys /// 4. <Target Slaves> setslot $slot node $targetId /// 5. <Target Master> setslot $slot node $targetId /// 6. <Source Slaves> setslot $slot node $targetId /// 7. <Source Master> setslot $slot node $targetId func (t *MigrateTask) migrateSlot(slot int, keysPer int) (int, error, string) { rs := t.SourceReplicaSet() sourceNode := t.SourceNode() targetNode := t.TargetNode() err := redis.SetSlot(targetNode.Addr(), slot, redis.SLOT_IMPORTING, sourceNode.Id) if err != nil { if strings.HasPrefix(err.Error(), "ERR I'm already the owner of hash slot") { log.Warningf(t.TaskName(), "%s already the owner of hash slot %d", targetNode.Id[:6], slot) // 逻辑到此,说明Target已经包含该slot,但是Source处于Migrating状态 // 迁移实际已经完成,需要清理Source的Migrating状态 srs := t.SourceReplicaSet() err = SetSlotToNode(srs, slot, targetNode.Id) if err != nil { return 0, err, "" } err = SetSlotStable(srs, slot) if err != nil { return 0, err, "" } trs := t.TargetReplicaSet() err = SetSlotToNode(trs, slot, targetNode.Id) if err != nil { return 0, err, "" } err = SetSlotStable(trs, slot) return 0, err, "" } return 0, err, "" } // 需要将Source分片的所有节点标记为MIGRATING,最大限度避免从地域的读造成的数据不一致 for _, node := range rs.AllNodes() { err := redis.SetSlot(node.Addr(), slot, redis.SLOT_MIGRATING, targetNode.Id) if err != nil { if strings.HasPrefix(err.Error(), "ERR I'm not the owner of hash slot") { log.Warningf(t.TaskName(), "%s is not the owner of hash slot %d", sourceNode.Id, slot) srs := t.SourceReplicaSet() err = SetSlotStable(srs, slot) if err != nil { log.Warningf(t.TaskName(), "Failed to clean MIGRATING state of source server.") return 0, err, "" } trs := t.TargetReplicaSet() err = SetSlotStable(trs, slot) if err != nil { log.Warningf(t.TaskName(), "Failed to clean MIGRATING state of target server.") return 0, err, "" } return 0, fmt.Errorf("mig: %s is not the owner of hash slot %d", sourceNode.Id, slot), "" } return 0, err, "" } } nkeys := 0 app := meta.GetAppConfig() for { keys, err := redis.GetKeysInSlot(sourceNode.Addr(), slot, keysPer) if err != nil { return nkeys, err, "" } for _, key := range keys { _, err := redis.Migrate(sourceNode.Addr(), targetNode.Ip, targetNode.Port, key, app.MigrateTimeout) if err != nil { return nkeys, err, key } nkeys++ } if len(keys) == 0 { // 迁移完成,需要等SourceSlaves同步(DEL)完成,即SourceSlaves节点中该slot内已无key slaveSyncDone := true srs := t.SourceReplicaSet() for _, node := range srs.AllNodes() { nkeys, err := redis.CountKeysInSlot(node.Addr(), slot) if err != nil { return nkeys, err, "" } if nkeys > 0 { slaveSyncDone = false } } if !slaveSyncDone { // FIXME // master migrate done, slave still have some keys in slot, setslot will ensure slave clear the data log.Info(t.TaskName(), "source node not empty, setslot will clear") //return nkeys, fmt.Errorf("mig: source nodes not all empty, will retry."), "" } // 设置slot归属到新节点,该操作自动清理IMPORTING和MIGRATING状态 // 如果设置的是Source节点,设置slot归属时,Redis会确保该slot中已无剩余的key trs := t.TargetReplicaSet() // 优先设置从节点,保证当主的数据分布还未广播到从节点时主挂掉,slot信息也不会丢失 for _, node := range trs.Slaves { if node.Fail { continue } err = redis.SetSlot(node.Addr(), slot, redis.SLOT_NODE, targetNode.Id) if err != nil { return nkeys, err, "" } } // 该操作增加Epoch并广播出去 err = redis.SetSlot(trs.Master.Addr(), slot, redis.SLOT_NODE, targetNode.Id) if err != nil { return nkeys, err, "" } // 更新节点上slot的归属 for _, rs := range t.cluster.ReplicaSets() { if rs.Master.IsStandbyMaster() { continue } err = SetSlotToNode(rs, slot, targetNode.Id) if err != nil { return nkeys, err, "" } } break } } return nkeys, nil, "" }
func (m *MigrateManager) HandleNodeStateChange(cluster *topo.Cluster) { // 如果存在迁移任务,先跳过,等结束后再处理 if len(m.tasks) > 0 { goto done } // 处理主节点的迁移任务重建 for _, node := range cluster.AllNodes() { if node.Fail { continue } // Wait a while if time.Now().Sub(m.lastTaskEndTime) < 5*time.Second { continue } for id, slots := range node.Migrating { // 根据slot生成ranges ranges := []topo.Range{} for _, slot := range slots { // 如果是自己 if id == node.Id { redis.SetSlot(node.Addr(), slot, redis.SLOT_STABLE, "") } else { ranges = append(ranges, topo.Range{Left: slot, Right: slot}) } } // Source source := node if !node.IsMaster() { srs := cluster.FindReplicaSetByNode(node.Id) if srs != nil { source = srs.Master } } // Target rs := cluster.FindReplicaSetByNode(id) if source.Fail || rs.Master.Fail { continue } _, err := m.CreateTask(source.Id, rs.Master.Id, ranges, cluster) if err != nil { log.Warningf(node.Addr(), "Can not recover migrate task, %v", err) } else { log.Warningf(node.Addr(), "Will recover migrating task for node %s(%s) with MIGRATING info"+ ", Task(Source:%s, Target:%s).", node.Id, node.Addr(), source.Addr(), rs.Master.Addr()) goto done } } for id, slots := range node.Importing { // 根据slot生成ranges ranges := []topo.Range{} for _, slot := range slots { // 如果是自己 if id == node.Id { redis.SetSlot(node.Addr(), slot, redis.SLOT_STABLE, "") } else { ranges = append(ranges, topo.Range{Left: slot, Right: slot}) } } // Target target := node if !node.IsMaster() { trs := cluster.FindReplicaSetByNode(node.Id) if trs != nil { target = trs.Master } } if target.IsStandbyMaster() { s := cluster.FindNodeBySlot(ranges[0].Left) if s != nil { log.Warningf(node.Addr(), "Reset migrate task target to %s(%s)", s.Id, s.Addr()) target = s } } // Source rs := cluster.FindReplicaSetByNode(id) if target.Fail || rs.Master.Fail { continue } _, err := m.CreateTask(rs.Master.Id, target.Id, ranges, cluster) if err != nil { log.Warningf(node.Addr(), "Can not recover migrate task, %v", err) } else { log.Warningf(node.Addr(), "Will recover migrating task for node %s(%s) with IMPORTING info"+ ", Task(Source:%s,Target:%s).", node.Id, node.Addr(), rs.Master.Addr(), target.Addr()) goto done } } } done: for _, task := range m.tasks { if task.CurrentState() != StateDone { m.handleTaskChange(task, cluster) } } }