Beispiel #1
0
func (m *MigrateManager) rebalance(rbtask *RebalanceTask, cluster *topo.Cluster) {
	// 启动所有任务,失败则等待一会进行重试
	for {
		allRunning := true
		for _, plan := range rbtask.Plans {
			if plan.task == nil {
				task, err := m.CreateTask(plan.SourceId, plan.TargetId, plan.Ranges, cluster)
				if err == nil {
					log.Infof(task.TaskName(), "Rebalance task created, %v", task)
					plan.task = task
				} else {
					allRunning = false
				}
			}
		}
		if allRunning {
			break
		}
		streams.RebalanceStateStream.Pub(*m.rebalanceTask)
		time.Sleep(5 * time.Second)
	}
	// 等待结束
	for {
		allDone := true
		for _, plan := range rbtask.Plans {
			state := plan.task.CurrentState()
			if state != StateDone && state != StateCancelled {
				allDone = false
			}
		}
		if allDone {
			break
		}
		streams.RebalanceStateStream.Pub(*m.rebalanceTask)
		time.Sleep(5 * time.Second)
	}
	now := time.Now()
	m.rebalanceTask.EndTime = &now
	streams.RebalanceStateStream.Pub(*m.rebalanceTask)
	m.rebalanceTask = nil
}
Beispiel #2
0
func (t *MigrateTask) Run() {
	if t.CurrentState() == StateNew {
		t.SetState(StateRunning)
	}
	if t.CurrentState() == StateCancelling {
		t.SetState(StateCancelled)
		return
	}
	prev_key := ""
	timeout_cnt := 0
	for i, r := range t.ranges {
		if r.Left < 0 {
			r.Left = 0
		}
		if r.Right > 16383 {
			r.Right = 16383
		}
		t.currRangeIndex = i
		t.currSlot = r.Left
		t.totalKeysInSlot = 0
		for t.currSlot <= r.Right {
			t.streamPub(true)

			// 尽量在迁移完一个完整Slot或遇到错误时,再进行状态的转换
			if t.CurrentState() == StateCancelling {
				t.SetState(StateCancelled)
				t.streamPub(false)
				return
			}

			// 暂停,sleep一会继续检查
			if t.CurrentState() == StatePausing {
				t.SetState(StatePaused)
			}
			if t.CurrentState() == StatePaused {
				time.Sleep(100 * time.Millisecond)
				continue
			}

			// 正常运行
			app := meta.GetAppConfig()
			nkeys, err, key := t.migrateSlot(t.currSlot, app.MigrateKeysEachTime)
			t.totalKeysInSlot += nkeys
			// Check remains again
			seed := t.SourceNode()
			remains, err2 := redis.CountKeysInSlot(seed.Addr(), t.currSlot)
			if err2 != nil {
				remains = -1
			}
			if err != nil || remains > 0 {
				log.Warningf(t.TaskName(),
					"Migrate slot %d error, %d keys done, total %d keys, remains %d keys, %v",
					t.currSlot, nkeys, t.totalKeysInSlot, remains, err)
				if err != nil && strings.HasPrefix(err.Error(), "READONLY") {
					log.Warningf(t.TaskName(), "Migrating across slaves nodes. "+
						"Maybe a manual failover just happened, "+
						"if cluster marks down after this point, "+
						"we need recover it by ourself using cli commands.")
					t.SetState(StateCancelled)
					goto quit
				} else if err != nil && strings.HasPrefix(err.Error(), "CLUSTERDOWN") {
					log.Warningf(t.TaskName(), "The cluster is down, please check it yourself, migrating task cancelled.")
					t.SetState(StateCancelled)
					goto quit
				} else if err != nil && strings.HasPrefix(err.Error(), "IOERR") {
					log.Warningf(t.TaskName(), "Migrating key:%s timeout", key)
					if timeout_cnt > 10 {
						log.Warningf(t.TaskName(), "Migrating key:%s timeout too frequently, task cancelled")
						t.SetState(StateCancelled)
						goto quit
					}
					if prev_key == key {
						timeout_cnt++
					} else {
						timeout_cnt = 0
						prev_key = key
					}
				}
				time.Sleep(500 * time.Millisecond)
			} else {
				log.Infof(t.TaskName(), "Migrate slot %d done, %d keys done, total %d keys, remains %d keys",
					t.currSlot, nkeys, t.totalKeysInSlot, remains)
				t.currSlot++
				t.totalKeysInSlot = 0
			}
		}
	}
	t.currSlot--
	t.SetState(StateDone)
quit:
	t.streamPub(false)
}
Beispiel #3
0
func (self *FixClusterCommand) Execute(c *cc.Controller) (cc.Result, error) {
	cs := c.ClusterState
	snapshot := cs.GetClusterSnapshot()
	if snapshot == nil {
		return nil, nil
	}
	snapshot.BuildReplicaSets()

	nodeStates := map[string]string{}
	nss := cs.AllNodeStates()
	for id, n := range nss {
		nodeStates[id] = n.CurrentState()
	}
	rss := snapshot.ReplicaSets()

	totalNum := 0 //总节点数
	totalRepli := 0
	failedNodes := []*topo.Node{}
	freeNodes := []*topo.Node{}
	defectMaster := []*topo.Node{}

	for _, rs := range rss {
		//check failed nodes and free nodes
		if rs.Master != nil && rs.Master.IsArbiter() {
			continue
		}
		totalNum = totalNum + len(rs.AllNodes())
		if len(rs.Master.Ranges) == 0 && nodeStates[rs.Master.Id] == state.StateRunning {
			//free节点
			freeNodes = append(freeNodes, rs.Master)
		} else {
			if len(rs.AllNodes()) > 1 {
				totalRepli = totalRepli + 1
			}
			for _, node := range rs.AllNodes() {
				if nodeStates[node.Id] != state.StateRunning {
					failedNodes = append(failedNodes, node)
				}
			}
		}
	}

	log.Infof("CLUSTER", "freeNodes=%d failedNodes=%d", len(freeNodes), len(failedNodes))
	if len(freeNodes) == 0 && len(failedNodes) == 0 {
		return nil, nil
	}

	if len(freeNodes) != len(failedNodes) ||
		(totalNum-len(failedNodes))%(totalRepli) != 0 {
		log.Infof("CLUSTER", "totalNum=%d totalRepli=%d freeNodes=%d failedNodes=%d",
			totalNum-len(failedNodes), totalRepli, len(freeNodes), len(failedNodes))
		return nil, errors.New("cluster fix check error, please check")
	}
	avgReplica := int((totalNum - len(failedNodes)) / totalRepli)

	replicaBroken := func(rs *topo.ReplicaSet) bool {
		for _, n := range rs.AllNodes() {
			if nodeStates[n.Id] != state.StateRunning {
				return true
			}
		}
		return false
	}
	for _, rs := range rss {
		if rs.Master != nil && rs.Master.IsArbiter() ||
			nodeStates[rs.Master.Id] != state.StateRunning {
			continue
		}
		if len(rs.AllNodes()) < avgReplica && len(rs.Master.Ranges) > 0 &&
			nodeStates[rs.Master.Id] == state.StateRunning {
			defectMaster = append(defectMaster, rs.Master)
		}
		if len(rs.AllNodes()) == avgReplica && replicaBroken(rs) == true {
			defectMaster = append(defectMaster, rs.Master)
		}
	}
	// forget offline nodes
	for _, node := range failedNodes {
		forgetCmd := ForgetAndResetNodeCommand{
			NodeId: node.Id,
		}
		forgetCmd.Execute(c)
		log.Eventf(node.Addr(), "Forget and reset failed node")
	}

	//meet & replicate
	for _, node := range freeNodes {
		meetCmd := MeetNodeCommand{
			NodeId: node.Id,
		}
		meetCmd.Execute(c)
		log.Eventf(node.Addr(), "Meet cluster")
		// give some time to gossip
		time.Sleep(5 * time.Second)
	}

	for idx, node := range freeNodes {
		//disable read
		disableReadCmd := DisableReadCommand{
			NodeId: node.Id,
		}
		disableReadCmd.Execute(c)
		log.Eventf(node.Addr(), "Disable read flag")

		//replicate
		replicateCmd := ReplicateCommand{
			ChildId:  node.Id,
			ParentId: defectMaster[idx].Id,
		}
		replicateCmd.Execute(c)
		log.Eventf(node.Addr(), "Replicate %s to %s", node.Addr(), defectMaster[idx].Addr())
	}

	result := FixClusterResult{Result: true}
	return result, nil
}
Beispiel #4
0
		// 是否有其他Failover正在进行
		doing, err := meta.IsDoingFailover()
		if err != nil {
			log.Warningf(ns.Addr(), "Fetch failover status failed, %v", err)
			return false
		}
		if doing {
			// get doing failover record, if record last for more than 1min, delete doing record
			record, err := meta.DoingFailoverRecord()
			if err == nil {
				if record.Timestamp.Add(1 * time.Millisecond).Before(time.Now()) {
					err = meta.UnmarkFailoverDoing()
					if err != nil {
						log.Warning(ns.Addr(), "UnmarkFailoverDoing failed last for 1 min, %v", err)
					}
					log.Infof(ns.Addr(), "UnmarkFailoverDoing last for 1 min")
				}
			}

			log.Warning(ns.Addr(), "There is another failover doing")
			return false
		}
		// 最近是否进行过Failover
		lastTime, err := meta.LastFailoverTime()
		if err != nil {
			log.Warningf(ns.Addr(), "Get last failover time failed, %v", err)
			return false
		}
		app := meta.GetAppConfig()
		if lastTime != nil && time.Since(*lastTime) < app.AutoFailoverInterval {
			log.Warningf(ns.Addr(), "Failover too soon, lastTime: %v", *lastTime)
Beispiel #5
0
// 更新任务状态机
func (m *MigrateManager) handleTaskChange(task *MigrateTask, cluster *topo.Cluster) error {
	fromNode := cluster.FindNode(task.SourceNode().Id)
	toNode := cluster.FindNode(task.TargetNode().Id)
	tname := task.TaskName()

	if fromNode == nil {
		log.Infof(tname, "Source node %s(%s) not exist", fromNode.Addr(), fromNode.Id)
		return ErrNodeNotFound
	}
	if toNode == nil {
		log.Infof(tname, "Target node %s(%s) not exist", toNode.Addr(), toNode.Id)
		return ErrNodeNotFound
	}

	// 角色变化说明该分片进行了主从切换
	if !fromNode.IsMaster() || !toNode.IsMaster() {
		log.Warningf(tname, "%s role change, cancel migration task %s\n", fromNode.Id[:6], task.TaskName())
		task.SetState(StateCancelling)
		return ErrSourceNodeFail
	}

	// 如果是源节点挂了,直接取消,等待主从切换之后重建任务
	if fromNode.Fail {
		log.Infof(tname, "Cancel migration task %s\n", task.TaskName())
		task.SetState(StateCancelling)
		return ErrSourceNodeFail
	}
	// 如果目标节点挂了,需要记录当前的ReplicaSet,观察等待主从切换
	if toNode.Fail {
		if task.CurrentState() == StateRunning {
			task.SetState(StateTargetNodeFailure)
			task.SetBackupReplicaSet(task.TargetReplicaSet())
			return ErrTargetNodeFail
		}
	} else if task.CurrentState() != StateNew {
		task.SetState(StateRunning)
		task.SetBackupReplicaSet(nil)
	}
	// 如果目标节点已经进行了Failover(重新选主),我们需要找到对应的新主
	// 方法是从BackupReplicaSet里取一个从来查找
	if toNode.IsStandbyMaster() {
		brs := task.BackupReplicaSet()
		if brs == nil {
			task.SetState(StateCancelling)
			log.Info(tname, "No backup replicaset found, controller maybe restarted after target master failure, can not do recovery.")
			return ErrCanNotRecover
		}
		slaves := brs.Slaves
		if len(slaves) == 0 {
			task.SetState(StateCancelling)
			log.Info(tname, "The dead target master has no slave, cannot do recovery.")
			return ErrCanNotRecover
		} else {
			rs := cluster.FindReplicaSetByNode(slaves[0].Id)
			if rs == nil {
				task.SetState(StateCancelling)
				log.Info(tname, "No replicaset for slave of dead target master found")
				return ErrCanNotRecover
			}
			task.ReplaceTargetReplicaSet(rs)
			log.Infof(tname, "Recover dead target node to %s(%s)",
				rs.Master.Id, rs.Master.Addr())
		}
	}
	return nil
}