Beispiel #1
0
func (cs *ClusterState) UpdateRegionNodes(region string, nodes []*topo.Node) {
	cs.version++
	now := time.Now()

	log.Verbosef("CLUSTER", "Update region %s %d nodes", region, len(nodes))

	// 添加不存在的节点,版本号+1
	for _, n := range nodes {
		if n.Region != region {
			continue
		}
		nodeState := cs.nodeStates[n.Id]
		if nodeState == nil {
			nodeState = NewNodeState(n, cs.version)
			cs.nodeStates[n.Id] = nodeState
		} else {
			nodeState.version = cs.version
			if nodeState.node.Fail != n.Fail {
				log.Eventf(n.Addr(), "Fail state changed, %v -> %v", nodeState.node.Fail, n.Fail)
			}
			if nodeState.node.Readable != n.Readable {
				log.Eventf(n.Addr(), "Readable state changed, %v -> %v", nodeState.node.Readable, n.Readable)
			}
			if nodeState.node.Writable != n.Writable {
				log.Eventf(n.Addr(), "Writable state changed, %v -> %v", nodeState.node.Writable, n.Writable)
			}
			nodeState.node = n
		}
		nodeState.updateTime = now
	}

	// 删除已经下线的节点
	for id, n := range cs.nodeStates {
		if n.node.Region != region {
			continue
		}
		nodeState := cs.nodeStates[id]
		if nodeState.version != cs.version {
			log.Warningf("CLUSTER", "Delete node %s", nodeState.node)
			delete(cs.nodeStates, id)
		}
	}

	// NB:低效?
	cs.BuildClusterSnapshot()
}
Beispiel #2
0
func (self *ReplicateCommand) Execute(c *cc.Controller) (cc.Result, error) {
	cs := c.ClusterState
	child := cs.FindNode(self.ChildId)
	parent := cs.FindNode(self.ParentId)
	if child == nil {
		return nil, fmt.Errorf("Child node not exist %s", self.ChildId)
	}
	if parent == nil {
		return nil, fmt.Errorf("Parent node not exist %s", self.ParentId)
	}
	if parent.Fail || child.Fail {
		return nil, ErrNodeIsDead
	}
	// TODO: more check
	_, err := redis.ClusterReplicate(child.Addr(), parent.Id)
	if err != nil {
		return nil, err
	}
	log.Eventf(child.Addr(), "Reparent to %s(%s).", parent.Addr(), parent.Id)
	return nil, nil
}
Beispiel #3
0
func (self *MeetNodeCommand) Execute(c *cc.Controller) (cc.Result, error) {
	cs := c.ClusterState
	target := cs.FindNode(self.NodeId)
	if target == nil {
		return nil, ErrNodeNotExist
	}
	if target.Fail {
		return nil, ErrNodeIsDead
	}
	if target.Free == false {
		return nil, ErrNodeNotFree
	}
	var err error
	for _, ns := range cs.AllNodeStates() {
		_, err = redis.ClusterMeet(ns.Addr(), target.Ip, target.Port)
		if err == nil {
			log.Eventf(target.Addr(), "Meet.")
			return nil, nil
		}
	}
	return nil, err
}
// 似乎,只有同时进行Forget和Reset才有意义,否则都是一个不一致的状态
func (self *ForgetAndResetNodeCommand) Execute(c *cc.Controller) (cc.Result, error) {
	cs := c.ClusterState
	target := cs.FindNode(self.NodeId)
	if target == nil {
		return nil, ErrNodeNotExist
	}
	if !target.Free == false {
		return nil, ErrNodeIsFree
	}
	if len(target.Ranges) > 0 {
		return nil, ErrNodeNotEmpty
	}
	var err error
	forgetCount := 0
	allForgetDone := true
	// 1. 所有节点发送Forget
	for _, ns := range cs.AllNodeStates() {
		if ns.Id() == target.Id {
			continue
		}
		node := ns.Node()
		_, err = redis.ClusterForget(ns.Addr(), target.Id)
		if !node.Fail && err != nil && !strings.HasPrefix(err.Error(), "ERR Unknown node") {
			allForgetDone = false
			log.Warningf(target.Addr(), "Forget node %s(%s) failed, %v", ns.Addr(), ns.Id(), err)
			continue
		} else if !node.Fail && err != nil {
			//try again
			for try := redis.NUM_RETRY; try >= 0; try-- {
				_, err = redis.ClusterForget(ns.Addr(), target.Id)
				if err == nil {
					break
				}
			}
			//execute failed after retry
			if err != nil {
				allForgetDone = false
				log.Warningf(target.Addr(), "Forget node %s(%s) failed after retry, %v", ns.Addr(), ns.Id(), err)
				continue
			}

		}
		log.Eventf(target.Addr(), "Forget by %s(%s).", ns.Addr(), ns.Id())
		forgetCount++
	}
	if !allForgetDone {
		return nil, fmt.Errorf("Not all forget done, only (%d/%d) success",
			forgetCount, len(cs.AllNodeStates())-1)
	}
	// 2. 重置
	if !target.Fail {
		_, err = redis.ClusterReset(target.Addr(), false)
		if err != nil {
			return nil, fmt.Errorf("Reset node %s(%s) failed, %v", target.Id, target.Addr(), err)
		}
		log.Eventf(target.Addr(), "Reset.")
	}

	// remove seed in leader contrller
	meta.RemoveSeed(target.Addr())
	return nil, nil
}
Beispiel #5
0
func (self *FixClusterCommand) Execute(c *cc.Controller) (cc.Result, error) {
	cs := c.ClusterState
	snapshot := cs.GetClusterSnapshot()
	if snapshot == nil {
		return nil, nil
	}
	snapshot.BuildReplicaSets()

	nodeStates := map[string]string{}
	nss := cs.AllNodeStates()
	for id, n := range nss {
		nodeStates[id] = n.CurrentState()
	}
	rss := snapshot.ReplicaSets()

	totalNum := 0 //总节点数
	totalRepli := 0
	failedNodes := []*topo.Node{}
	freeNodes := []*topo.Node{}
	defectMaster := []*topo.Node{}

	for _, rs := range rss {
		//check failed nodes and free nodes
		if rs.Master != nil && rs.Master.IsArbiter() {
			continue
		}
		totalNum = totalNum + len(rs.AllNodes())
		if len(rs.Master.Ranges) == 0 && nodeStates[rs.Master.Id] == state.StateRunning {
			//free节点
			freeNodes = append(freeNodes, rs.Master)
		} else {
			if len(rs.AllNodes()) > 1 {
				totalRepli = totalRepli + 1
			}
			for _, node := range rs.AllNodes() {
				if nodeStates[node.Id] != state.StateRunning {
					failedNodes = append(failedNodes, node)
				}
			}
		}
	}

	log.Infof("CLUSTER", "freeNodes=%d failedNodes=%d", len(freeNodes), len(failedNodes))
	if len(freeNodes) == 0 && len(failedNodes) == 0 {
		return nil, nil
	}

	if len(freeNodes) != len(failedNodes) ||
		(totalNum-len(failedNodes))%(totalRepli) != 0 {
		log.Infof("CLUSTER", "totalNum=%d totalRepli=%d freeNodes=%d failedNodes=%d",
			totalNum-len(failedNodes), totalRepli, len(freeNodes), len(failedNodes))
		return nil, errors.New("cluster fix check error, please check")
	}
	avgReplica := int((totalNum - len(failedNodes)) / totalRepli)

	replicaBroken := func(rs *topo.ReplicaSet) bool {
		for _, n := range rs.AllNodes() {
			if nodeStates[n.Id] != state.StateRunning {
				return true
			}
		}
		return false
	}
	for _, rs := range rss {
		if rs.Master != nil && rs.Master.IsArbiter() ||
			nodeStates[rs.Master.Id] != state.StateRunning {
			continue
		}
		if len(rs.AllNodes()) < avgReplica && len(rs.Master.Ranges) > 0 &&
			nodeStates[rs.Master.Id] == state.StateRunning {
			defectMaster = append(defectMaster, rs.Master)
		}
		if len(rs.AllNodes()) == avgReplica && replicaBroken(rs) == true {
			defectMaster = append(defectMaster, rs.Master)
		}
	}
	// forget offline nodes
	for _, node := range failedNodes {
		forgetCmd := ForgetAndResetNodeCommand{
			NodeId: node.Id,
		}
		forgetCmd.Execute(c)
		log.Eventf(node.Addr(), "Forget and reset failed node")
	}

	//meet & replicate
	for _, node := range freeNodes {
		meetCmd := MeetNodeCommand{
			NodeId: node.Id,
		}
		meetCmd.Execute(c)
		log.Eventf(node.Addr(), "Meet cluster")
		// give some time to gossip
		time.Sleep(5 * time.Second)
	}

	for idx, node := range freeNodes {
		//disable read
		disableReadCmd := DisableReadCommand{
			NodeId: node.Id,
		}
		disableReadCmd.Execute(c)
		log.Eventf(node.Addr(), "Disable read flag")

		//replicate
		replicateCmd := ReplicateCommand{
			ChildId:  node.Id,
			ParentId: defectMaster[idx].Id,
		}
		replicateCmd.Execute(c)
		log.Eventf(node.Addr(), "Replicate %s to %s", node.Addr(), defectMaster[idx].Addr())
	}

	result := FixClusterResult{Result: true}
	return result, nil
}
Beispiel #6
0
func (cs *ClusterState) RunFailoverTask(oldMasterId, newMasterId string) {
	new := cs.FindNodeState(newMasterId)
	old := cs.FindNodeState(oldMasterId)

	if old == nil {
		log.Warningf(oldMasterId, "Can't run failover task, the old dead master lost")
		return
	}
	if new == nil {
		log.Warningf(oldMasterId, "Can't run failover task, new master lost (%s)", newMasterId)
		old.AdvanceFSM(cs, CMD_FAILOVER_END_SIGNAL)
		return
	}

	// 通过新主广播消息
	redis.DisableRead(new.Addr(), old.Id())
	redis.DisableWrite(new.Addr(), old.Id())

	c := make(chan error, 1)

	go func() {
		//choose failover force or takeover in case of arbiter
		cluster := cs.cluster
		rs := cluster.FindReplicaSetByNode(old.Id())
		if cluster.HasArbiter() || cluster.IsClusterDown() {
			//use failover takeover
			c <- redis.SetAsMasterWaitSyncDone(new.Addr(), true, true, rs)
		} else {
			//use failover force
			c <- redis.SetAsMasterWaitSyncDone(new.Addr(), true, false, rs)
		}
	}()

	select {
	case err := <-c:
		if err != nil {
			log.Eventf(old.Addr(), "Failover request done with error(%v).", err)
		} else {
			log.Eventf(old.Addr(), "Failover request done, new master %s(%s).", new.Id(), new.Addr())
		}
	case <-time.After(20 * time.Minute):
		log.Eventf(old.Addr(), "Failover timedout, new master %s(%s)", new.Id(), new.Addr())
	}

	// 重新读取一次,因为可能已经更新了
	roleChanged := false
	node := cs.FindNode(newMasterId)
	if node.IsMaster() {
		roleChanged = true
	} else {
		for i := 0; i < 10; i++ {
			info, err := redis.FetchInfo(node.Addr(), "Replication")
			if err == nil && info.Get("role") == "master" {
				roleChanged = true
				break
			}
			log.Warningf(old.Addr(),
				"Role of new master %s(%s) has not yet changed, will check 5 seconds later.",
				new.Id(), new.Addr())
			time.Sleep(5 * time.Second)
		}
	}

	if roleChanged {
		log.Eventf(old.Addr(), "New master %s(%s) role change success", node.Id, node.Addr())
		// 处理迁移过程中的异常问题,将故障节点(旧主)的slots转移到新主上
		oldNode := cs.FindNode(oldMasterId)
		if oldNode != nil && oldNode.Fail && oldNode.IsMaster() && len(oldNode.Ranges) != 0 {
			log.Warningf(old.Addr(),
				"Some node carries slots info(%v) about the old master, waiting for MigrateManager to fix it.",
				oldNode.Ranges)
		} else {
			log.Info(old.Addr(), "Good, no slot need to be fix after failover.")
		}
	} else {
		log.Warningf(old.Addr(), "Failover failed, please check cluster state.")
		log.Warningf(old.Addr(), "The dead master will goto OFFLINE state and then goto WAIT_FAILOVER_BEGIN state to try failover again.")
	}

	old.AdvanceFSM(cs, CMD_FAILOVER_END_SIGNAL)

	// 打开新主的写入,因为给slave加Write没有效果
	// 所以即便Failover失败,也不会产生错误
	redis.EnableWrite(new.Addr(), new.Id())
}