func (cs *ClusterState) UpdateRegionNodes(region string, nodes []*topo.Node) { cs.version++ now := time.Now() log.Verbosef("CLUSTER", "Update region %s %d nodes", region, len(nodes)) // 添加不存在的节点,版本号+1 for _, n := range nodes { if n.Region != region { continue } nodeState := cs.nodeStates[n.Id] if nodeState == nil { nodeState = NewNodeState(n, cs.version) cs.nodeStates[n.Id] = nodeState } else { nodeState.version = cs.version if nodeState.node.Fail != n.Fail { log.Eventf(n.Addr(), "Fail state changed, %v -> %v", nodeState.node.Fail, n.Fail) } if nodeState.node.Readable != n.Readable { log.Eventf(n.Addr(), "Readable state changed, %v -> %v", nodeState.node.Readable, n.Readable) } if nodeState.node.Writable != n.Writable { log.Eventf(n.Addr(), "Writable state changed, %v -> %v", nodeState.node.Writable, n.Writable) } nodeState.node = n } nodeState.updateTime = now } // 删除已经下线的节点 for id, n := range cs.nodeStates { if n.node.Region != region { continue } nodeState := cs.nodeStates[id] if nodeState.version != cs.version { log.Warningf("CLUSTER", "Delete node %s", nodeState.node) delete(cs.nodeStates, id) } } // NB:低效? cs.BuildClusterSnapshot() }
func (self *ReplicateCommand) Execute(c *cc.Controller) (cc.Result, error) { cs := c.ClusterState child := cs.FindNode(self.ChildId) parent := cs.FindNode(self.ParentId) if child == nil { return nil, fmt.Errorf("Child node not exist %s", self.ChildId) } if parent == nil { return nil, fmt.Errorf("Parent node not exist %s", self.ParentId) } if parent.Fail || child.Fail { return nil, ErrNodeIsDead } // TODO: more check _, err := redis.ClusterReplicate(child.Addr(), parent.Id) if err != nil { return nil, err } log.Eventf(child.Addr(), "Reparent to %s(%s).", parent.Addr(), parent.Id) return nil, nil }
func (self *MeetNodeCommand) Execute(c *cc.Controller) (cc.Result, error) { cs := c.ClusterState target := cs.FindNode(self.NodeId) if target == nil { return nil, ErrNodeNotExist } if target.Fail { return nil, ErrNodeIsDead } if target.Free == false { return nil, ErrNodeNotFree } var err error for _, ns := range cs.AllNodeStates() { _, err = redis.ClusterMeet(ns.Addr(), target.Ip, target.Port) if err == nil { log.Eventf(target.Addr(), "Meet.") return nil, nil } } return nil, err }
// 似乎,只有同时进行Forget和Reset才有意义,否则都是一个不一致的状态 func (self *ForgetAndResetNodeCommand) Execute(c *cc.Controller) (cc.Result, error) { cs := c.ClusterState target := cs.FindNode(self.NodeId) if target == nil { return nil, ErrNodeNotExist } if !target.Free == false { return nil, ErrNodeIsFree } if len(target.Ranges) > 0 { return nil, ErrNodeNotEmpty } var err error forgetCount := 0 allForgetDone := true // 1. 所有节点发送Forget for _, ns := range cs.AllNodeStates() { if ns.Id() == target.Id { continue } node := ns.Node() _, err = redis.ClusterForget(ns.Addr(), target.Id) if !node.Fail && err != nil && !strings.HasPrefix(err.Error(), "ERR Unknown node") { allForgetDone = false log.Warningf(target.Addr(), "Forget node %s(%s) failed, %v", ns.Addr(), ns.Id(), err) continue } else if !node.Fail && err != nil { //try again for try := redis.NUM_RETRY; try >= 0; try-- { _, err = redis.ClusterForget(ns.Addr(), target.Id) if err == nil { break } } //execute failed after retry if err != nil { allForgetDone = false log.Warningf(target.Addr(), "Forget node %s(%s) failed after retry, %v", ns.Addr(), ns.Id(), err) continue } } log.Eventf(target.Addr(), "Forget by %s(%s).", ns.Addr(), ns.Id()) forgetCount++ } if !allForgetDone { return nil, fmt.Errorf("Not all forget done, only (%d/%d) success", forgetCount, len(cs.AllNodeStates())-1) } // 2. 重置 if !target.Fail { _, err = redis.ClusterReset(target.Addr(), false) if err != nil { return nil, fmt.Errorf("Reset node %s(%s) failed, %v", target.Id, target.Addr(), err) } log.Eventf(target.Addr(), "Reset.") } // remove seed in leader contrller meta.RemoveSeed(target.Addr()) return nil, nil }
func (self *FixClusterCommand) Execute(c *cc.Controller) (cc.Result, error) { cs := c.ClusterState snapshot := cs.GetClusterSnapshot() if snapshot == nil { return nil, nil } snapshot.BuildReplicaSets() nodeStates := map[string]string{} nss := cs.AllNodeStates() for id, n := range nss { nodeStates[id] = n.CurrentState() } rss := snapshot.ReplicaSets() totalNum := 0 //总节点数 totalRepli := 0 failedNodes := []*topo.Node{} freeNodes := []*topo.Node{} defectMaster := []*topo.Node{} for _, rs := range rss { //check failed nodes and free nodes if rs.Master != nil && rs.Master.IsArbiter() { continue } totalNum = totalNum + len(rs.AllNodes()) if len(rs.Master.Ranges) == 0 && nodeStates[rs.Master.Id] == state.StateRunning { //free节点 freeNodes = append(freeNodes, rs.Master) } else { if len(rs.AllNodes()) > 1 { totalRepli = totalRepli + 1 } for _, node := range rs.AllNodes() { if nodeStates[node.Id] != state.StateRunning { failedNodes = append(failedNodes, node) } } } } log.Infof("CLUSTER", "freeNodes=%d failedNodes=%d", len(freeNodes), len(failedNodes)) if len(freeNodes) == 0 && len(failedNodes) == 0 { return nil, nil } if len(freeNodes) != len(failedNodes) || (totalNum-len(failedNodes))%(totalRepli) != 0 { log.Infof("CLUSTER", "totalNum=%d totalRepli=%d freeNodes=%d failedNodes=%d", totalNum-len(failedNodes), totalRepli, len(freeNodes), len(failedNodes)) return nil, errors.New("cluster fix check error, please check") } avgReplica := int((totalNum - len(failedNodes)) / totalRepli) replicaBroken := func(rs *topo.ReplicaSet) bool { for _, n := range rs.AllNodes() { if nodeStates[n.Id] != state.StateRunning { return true } } return false } for _, rs := range rss { if rs.Master != nil && rs.Master.IsArbiter() || nodeStates[rs.Master.Id] != state.StateRunning { continue } if len(rs.AllNodes()) < avgReplica && len(rs.Master.Ranges) > 0 && nodeStates[rs.Master.Id] == state.StateRunning { defectMaster = append(defectMaster, rs.Master) } if len(rs.AllNodes()) == avgReplica && replicaBroken(rs) == true { defectMaster = append(defectMaster, rs.Master) } } // forget offline nodes for _, node := range failedNodes { forgetCmd := ForgetAndResetNodeCommand{ NodeId: node.Id, } forgetCmd.Execute(c) log.Eventf(node.Addr(), "Forget and reset failed node") } //meet & replicate for _, node := range freeNodes { meetCmd := MeetNodeCommand{ NodeId: node.Id, } meetCmd.Execute(c) log.Eventf(node.Addr(), "Meet cluster") // give some time to gossip time.Sleep(5 * time.Second) } for idx, node := range freeNodes { //disable read disableReadCmd := DisableReadCommand{ NodeId: node.Id, } disableReadCmd.Execute(c) log.Eventf(node.Addr(), "Disable read flag") //replicate replicateCmd := ReplicateCommand{ ChildId: node.Id, ParentId: defectMaster[idx].Id, } replicateCmd.Execute(c) log.Eventf(node.Addr(), "Replicate %s to %s", node.Addr(), defectMaster[idx].Addr()) } result := FixClusterResult{Result: true} return result, nil }
func (cs *ClusterState) RunFailoverTask(oldMasterId, newMasterId string) { new := cs.FindNodeState(newMasterId) old := cs.FindNodeState(oldMasterId) if old == nil { log.Warningf(oldMasterId, "Can't run failover task, the old dead master lost") return } if new == nil { log.Warningf(oldMasterId, "Can't run failover task, new master lost (%s)", newMasterId) old.AdvanceFSM(cs, CMD_FAILOVER_END_SIGNAL) return } // 通过新主广播消息 redis.DisableRead(new.Addr(), old.Id()) redis.DisableWrite(new.Addr(), old.Id()) c := make(chan error, 1) go func() { //choose failover force or takeover in case of arbiter cluster := cs.cluster rs := cluster.FindReplicaSetByNode(old.Id()) if cluster.HasArbiter() || cluster.IsClusterDown() { //use failover takeover c <- redis.SetAsMasterWaitSyncDone(new.Addr(), true, true, rs) } else { //use failover force c <- redis.SetAsMasterWaitSyncDone(new.Addr(), true, false, rs) } }() select { case err := <-c: if err != nil { log.Eventf(old.Addr(), "Failover request done with error(%v).", err) } else { log.Eventf(old.Addr(), "Failover request done, new master %s(%s).", new.Id(), new.Addr()) } case <-time.After(20 * time.Minute): log.Eventf(old.Addr(), "Failover timedout, new master %s(%s)", new.Id(), new.Addr()) } // 重新读取一次,因为可能已经更新了 roleChanged := false node := cs.FindNode(newMasterId) if node.IsMaster() { roleChanged = true } else { for i := 0; i < 10; i++ { info, err := redis.FetchInfo(node.Addr(), "Replication") if err == nil && info.Get("role") == "master" { roleChanged = true break } log.Warningf(old.Addr(), "Role of new master %s(%s) has not yet changed, will check 5 seconds later.", new.Id(), new.Addr()) time.Sleep(5 * time.Second) } } if roleChanged { log.Eventf(old.Addr(), "New master %s(%s) role change success", node.Id, node.Addr()) // 处理迁移过程中的异常问题,将故障节点(旧主)的slots转移到新主上 oldNode := cs.FindNode(oldMasterId) if oldNode != nil && oldNode.Fail && oldNode.IsMaster() && len(oldNode.Ranges) != 0 { log.Warningf(old.Addr(), "Some node carries slots info(%v) about the old master, waiting for MigrateManager to fix it.", oldNode.Ranges) } else { log.Info(old.Addr(), "Good, no slot need to be fix after failover.") } } else { log.Warningf(old.Addr(), "Failover failed, please check cluster state.") log.Warningf(old.Addr(), "The dead master will goto OFFLINE state and then goto WAIT_FAILOVER_BEGIN state to try failover again.") } old.AdvanceFSM(cs, CMD_FAILOVER_END_SIGNAL) // 打开新主的写入,因为给slave加Write没有效果 // 所以即便Failover失败,也不会产生错误 redis.EnableWrite(new.Addr(), new.Id()) }