func (m *MigrateManager) CreateTask(sourceId, targetId string, ranges []topo.Range, cluster *topo.Cluster) (*MigrateTask, error) { sourceRS := cluster.FindReplicaSetByNode(sourceId) targetRS := cluster.FindReplicaSetByNode(targetId) if sourceRS == nil || targetRS == nil { return nil, ErrReplicatSetNotFound } task := NewMigrateTask(cluster, sourceRS, targetRS, ranges) err := m.AddTask(task) if err != nil { return nil, err } return task, nil }
func (self *Inspector) IsClusterDamaged(cluster *topo.Cluster, seeds []*topo.Node) bool { // more than half masters dead numFail := 0 for _, node := range cluster.MasterNodes() { if node.Fail { numFail++ } } if numFail >= (cluster.Size()+1)/2 { return true } // more than half nodes dead if len(seeds) > cluster.NumLocalRegionNode()/2 { return false } for _, seed := range seeds { c, err := self.initClusterTopo(seed) if err != nil { return false } for _, node := range c.LocalRegionNodes() { // nodes not in seeds must be pfail if !containsNode(node, seeds) && !node.PFail { return false } } } glog.Info("more than half nodes dead") return true }
func GenerateRebalancePlan(method string, cluster *topo.Cluster, targetIds []string, ratio int) ([]*MigratePlan, error) { rss := cluster.ReplicaSets() regions := meta.AllRegions() ss := []*topo.Node{} // 有slots的Master tm := map[string]*topo.Node{} // 空slots的Master for _, rs := range rss { master := rs.Master // 忽略主挂掉和region覆盖不全的rs if master.Fail || !rs.IsCoverAllRegions(regions) || master.Free { continue } if master.Empty() { tm[master.Id] = master } else { ss = append(ss, master) } } if method == "mergetail" { merger := MergerTable[method] if merger == nil { return nil, fmt.Errorf("Rebalancing method %s not exist.", method) } plans := merger(ss, 0) return plans, nil } else if method == "mergeall" { merger := MergerTable[method] if merger == nil { return nil, fmt.Errorf("Rebalancing method %s not exist.", method) } plans := merger(ss, ratio) return plans, nil } else { var ts []*topo.Node // 如果没传TargetId,则选择所有可以作为迁移目标的rs if len(targetIds) == 0 { for _, node := range tm { ts = append(ts, node) } } else { for _, id := range targetIds { if tm[id] == nil { return nil, fmt.Errorf("Master %s not found.", id) } ts = append(ts, tm[id]) } } if len(ts) == 0 { return nil, fmt.Errorf("No available empty target replicasets.") } rebalancer := RebalancerTable[method] if rebalancer == nil { return nil, fmt.Errorf("Rebalancing method %s not exist.", method) } plans := rebalancer(ss, ts) return plans, nil } return nil, nil }
func (m *MigrateManager) HandleNodeStateChange(cluster *topo.Cluster) { // 如果存在迁移任务,先跳过,等结束后再处理 if len(m.tasks) > 0 { goto done } // 处理主节点的迁移任务重建 for _, node := range cluster.AllNodes() { if node.Fail { continue } // Wait a while if time.Now().Sub(m.lastTaskEndTime) < 5*time.Second { continue } for id, slots := range node.Migrating { // 根据slot生成ranges ranges := []topo.Range{} for _, slot := range slots { // 如果是自己 if id == node.Id { redis.SetSlot(node.Addr(), slot, redis.SLOT_STABLE, "") } else { ranges = append(ranges, topo.Range{Left: slot, Right: slot}) } } // Source source := node if !node.IsMaster() { srs := cluster.FindReplicaSetByNode(node.Id) if srs != nil { source = srs.Master } } // Target rs := cluster.FindReplicaSetByNode(id) if source.Fail || rs.Master.Fail { continue } _, err := m.CreateTask(source.Id, rs.Master.Id, ranges, cluster) if err != nil { log.Warningf(node.Addr(), "Can not recover migrate task, %v", err) } else { log.Warningf(node.Addr(), "Will recover migrating task for node %s(%s) with MIGRATING info"+ ", Task(Source:%s, Target:%s).", node.Id, node.Addr(), source.Addr(), rs.Master.Addr()) goto done } } for id, slots := range node.Importing { // 根据slot生成ranges ranges := []topo.Range{} for _, slot := range slots { // 如果是自己 if id == node.Id { redis.SetSlot(node.Addr(), slot, redis.SLOT_STABLE, "") } else { ranges = append(ranges, topo.Range{Left: slot, Right: slot}) } } // Target target := node if !node.IsMaster() { trs := cluster.FindReplicaSetByNode(node.Id) if trs != nil { target = trs.Master } } if target.IsStandbyMaster() { s := cluster.FindNodeBySlot(ranges[0].Left) if s != nil { log.Warningf(node.Addr(), "Reset migrate task target to %s(%s)", s.Id, s.Addr()) target = s } } // Source rs := cluster.FindReplicaSetByNode(id) if target.Fail || rs.Master.Fail { continue } _, err := m.CreateTask(rs.Master.Id, target.Id, ranges, cluster) if err != nil { log.Warningf(node.Addr(), "Can not recover migrate task, %v", err) } else { log.Warningf(node.Addr(), "Will recover migrating task for node %s(%s) with IMPORTING info"+ ", Task(Source:%s,Target:%s).", node.Id, node.Addr(), rs.Master.Addr(), target.Addr()) goto done } } } done: for _, task := range m.tasks { if task.CurrentState() != StateDone { m.handleTaskChange(task, cluster) } } }
// 更新任务状态机 func (m *MigrateManager) handleTaskChange(task *MigrateTask, cluster *topo.Cluster) error { fromNode := cluster.FindNode(task.SourceNode().Id) toNode := cluster.FindNode(task.TargetNode().Id) tname := task.TaskName() if fromNode == nil { log.Infof(tname, "Source node %s(%s) not exist", fromNode.Addr(), fromNode.Id) return ErrNodeNotFound } if toNode == nil { log.Infof(tname, "Target node %s(%s) not exist", toNode.Addr(), toNode.Id) return ErrNodeNotFound } // 角色变化说明该分片进行了主从切换 if !fromNode.IsMaster() || !toNode.IsMaster() { log.Warningf(tname, "%s role change, cancel migration task %s\n", fromNode.Id[:6], task.TaskName()) task.SetState(StateCancelling) return ErrSourceNodeFail } // 如果是源节点挂了,直接取消,等待主从切换之后重建任务 if fromNode.Fail { log.Infof(tname, "Cancel migration task %s\n", task.TaskName()) task.SetState(StateCancelling) return ErrSourceNodeFail } // 如果目标节点挂了,需要记录当前的ReplicaSet,观察等待主从切换 if toNode.Fail { if task.CurrentState() == StateRunning { task.SetState(StateTargetNodeFailure) task.SetBackupReplicaSet(task.TargetReplicaSet()) return ErrTargetNodeFail } } else if task.CurrentState() != StateNew { task.SetState(StateRunning) task.SetBackupReplicaSet(nil) } // 如果目标节点已经进行了Failover(重新选主),我们需要找到对应的新主 // 方法是从BackupReplicaSet里取一个从来查找 if toNode.IsStandbyMaster() { brs := task.BackupReplicaSet() if brs == nil { task.SetState(StateCancelling) log.Info(tname, "No backup replicaset found, controller maybe restarted after target master failure, can not do recovery.") return ErrCanNotRecover } slaves := brs.Slaves if len(slaves) == 0 { task.SetState(StateCancelling) log.Info(tname, "The dead target master has no slave, cannot do recovery.") return ErrCanNotRecover } else { rs := cluster.FindReplicaSetByNode(slaves[0].Id) if rs == nil { task.SetState(StateCancelling) log.Info(tname, "No replicaset for slave of dead target master found") return ErrCanNotRecover } task.ReplaceTargetReplicaSet(rs) log.Infof(tname, "Recover dead target node to %s(%s)", rs.Master.Id, rs.Master.Addr()) } } return nil }
func (self *Inspector) checkClusterTopo(seed *topo.Node, cluster *topo.Cluster) error { resp, err := redis.ClusterNodesInRegion(seed.Addr(), self.LocalRegion) if err != nil && strings.HasPrefix(err.Error(), "ERR Wrong CLUSTER subcommand or number of arguments") { //server version do not support 'cluster nodes extra [region]' resp, err = redis.ClusterNodes(seed.Addr()) } //this may lead to BuildClusterTopo update failed for a time //the node is step into this state after check IsAlive if err != nil && strings.HasPrefix(err.Error(), "LOADING") { return nil } if err != nil { return err } var summary topo.SummaryInfo lines := strings.Split(resp, "\n") for _, line := range lines { if strings.HasPrefix(line, "# ") { summary.ReadLine(line) continue } line = strings.TrimSpace(line) if line == "" { continue } s, myself, err := self.buildNode(line) if err == ErrNodeInHandShake || err == ErrNodeNoAddr { continue } // Fix 'cluster nodes extra' & 'cluster nodes extra region' compatiable if s.Region != self.LocalRegion { continue } if err != nil { return err } if s.Ip == "127.0.0.1" { s.Ip = seed.Ip } node := cluster.FindNode(s.Id) if node == nil { if s.PFail { glog.Warningf("forget dead node %s(%s) should be forgoten", s.Id, s.Addr()) //redis.ClusterForget(seed.Addr(), s.Id) } return fmt.Errorf("node not exist %s(%s)", s.Id, s.Addr()) } // 对比节点数据是否相同 if !node.Compare(s) { glog.Infof("%#v vs %#v different", s, node) if s.Tag == "-" && node.Tag != "-" { // 可能存在处于不被Cluster接受的节点,节点可以看见Cluster,但Cluster看不到它。 // 一种复现情况情况:某个节点已经死了,系统将其Forget,但是OP并未被摘除该节点, // 而是恢复了该节点。 glog.Warningf("remeet node %s", seed.Addr()) self.MeetNode(seed) } return ErrNodesInfoNotSame } if len(node.Ranges) == 0 && len(s.Ranges) > 0 { glog.Warningf("Ranges not equal, use nonempty ranges.") node.Ranges = s.Ranges } if myself { info, err := redis.FetchClusterInfo(node.Addr()) if err != nil { return err } node.ClusterInfo = info node.SummaryInfo = summary } if len(s.Migrating) != 0 { node.Migrating = s.Migrating } if len(s.Importing) != 0 { node.Importing = s.Importing } if s.PFail { node.IncrPFailCount() } } return nil }