func (self *Inspector) isFreeNode(seed *topo.Node) (bool, *topo.Node) { resp, err := redis.ClusterNodesInRegion(seed.Addr(), self.LocalRegion) if err != nil && strings.HasPrefix(err.Error(), "ERR Wrong CLUSTER subcommand or number of arguments") { //server version do not support 'cluster nodes extra [region]' resp, err = redis.ClusterNodes(seed.Addr()) } if err != nil { return false, nil } numNode := 0 lines := strings.Split(resp, "\n") for _, line := range lines { line = strings.TrimSpace(line) if line == "" || strings.HasPrefix(line, "# ") { continue } numNode++ } if numNode != 1 { return false, nil } for _, line := range lines { line = strings.TrimSpace(line) if line == "" || strings.HasPrefix(line, "# ") { continue } node, myself, err := self.buildNode(line) if node.Ip == "127.0.0.1" { node.Ip = seed.Ip } // 只看到自己,是主,且没有slots,才认为是FreeNode if !myself { return false, nil } if err != nil || len(node.Ranges) > 0 || !node.IsMaster() { return false, nil } else { return true, node } } return false, nil }
func getFailedNodes(addr string) ([]string, error) { resp, err := redis.ClusterNodes(addr) if err != nil { return nil, err } lines := strings.Split(resp, "\n") failedNodes := []string{} for _, line := range lines { if strings.HasPrefix(line, "# ") { continue } line = strings.TrimSpace(line) if line == "" { continue } node, _ := checkNodeStatus(line) if node.Fail { failedNodes = append(failedNodes, node.Id) } } return failedNodes, nil }
func (self *Inspector) checkClusterTopo(seed *topo.Node, cluster *topo.Cluster) error { resp, err := redis.ClusterNodesInRegion(seed.Addr(), self.LocalRegion) if err != nil && strings.HasPrefix(err.Error(), "ERR Wrong CLUSTER subcommand or number of arguments") { //server version do not support 'cluster nodes extra [region]' resp, err = redis.ClusterNodes(seed.Addr()) } //this may lead to BuildClusterTopo update failed for a time //the node is step into this state after check IsAlive if err != nil && strings.HasPrefix(err.Error(), "LOADING") { return nil } if err != nil { return err } var summary topo.SummaryInfo lines := strings.Split(resp, "\n") for _, line := range lines { if strings.HasPrefix(line, "# ") { summary.ReadLine(line) continue } line = strings.TrimSpace(line) if line == "" { continue } s, myself, err := self.buildNode(line) if err == ErrNodeInHandShake || err == ErrNodeNoAddr { continue } // Fix 'cluster nodes extra' & 'cluster nodes extra region' compatiable if s.Region != self.LocalRegion { continue } if err != nil { return err } if s.Ip == "127.0.0.1" { s.Ip = seed.Ip } node := cluster.FindNode(s.Id) if node == nil { if s.PFail { glog.Warningf("forget dead node %s(%s) should be forgoten", s.Id, s.Addr()) //redis.ClusterForget(seed.Addr(), s.Id) } return fmt.Errorf("node not exist %s(%s)", s.Id, s.Addr()) } // 对比节点数据是否相同 if !node.Compare(s) { glog.Infof("%#v vs %#v different", s, node) if s.Tag == "-" && node.Tag != "-" { // 可能存在处于不被Cluster接受的节点,节点可以看见Cluster,但Cluster看不到它。 // 一种复现情况情况:某个节点已经死了,系统将其Forget,但是OP并未被摘除该节点, // 而是恢复了该节点。 glog.Warningf("remeet node %s", seed.Addr()) self.MeetNode(seed) } return ErrNodesInfoNotSame } if len(node.Ranges) == 0 && len(s.Ranges) > 0 { glog.Warningf("Ranges not equal, use nonempty ranges.") node.Ranges = s.Ranges } if myself { info, err := redis.FetchClusterInfo(node.Addr()) if err != nil { return err } node.ClusterInfo = info node.SummaryInfo = summary } if len(s.Migrating) != 0 { node.Migrating = s.Migrating } if len(s.Importing) != 0 { node.Importing = s.Importing } if s.PFail { node.IncrPFailCount() } } return nil }
func (self *Inspector) initClusterTopo(seed *topo.Node) (*topo.Cluster, error) { resp, err := redis.ClusterNodesInRegion(seed.Addr(), self.LocalRegion) if err != nil && strings.HasPrefix(err.Error(), "ERR Wrong CLUSTER subcommand or number of arguments") { //server version do not support 'cluster nodes extra [region]' resp, err = redis.ClusterNodes(seed.Addr()) } if err != nil { return nil, err } cluster := topo.NewCluster(self.LocalRegion) var summary topo.SummaryInfo var nodeidx *topo.Node var cnt int lines := strings.Split(resp, "\n") cnt = 0 for _, line := range lines { if strings.HasPrefix(line, "# ") { summary.ReadLine(line) continue } line = strings.TrimSpace(line) if line == "" { continue } node, myself, err := self.buildNode(line) if err == ErrNodeInHandShake || err == ErrNodeNoAddr { continue } // Fix 'cluster nodes extra' & 'cluster nodes extra region' compatiable if node.Region != self.LocalRegion { continue } if err != nil { return nil, err } if node.Ip == "127.0.0.1" { node.Ip = seed.Ip } // 遇到myself,读取该节点的ClusterInfo if myself { info, err := redis.FetchClusterInfo(node.Addr()) if err != nil { return nil, err } node.ClusterInfo = info node.SummaryInfo = summary } cluster.AddNode(node) nodeidx = node cnt++ } if cnt == 1 { if nodeidx.IsMaster() && len(nodeidx.Ranges) == 0 { glog.Infof("Node %s is free node", nodeidx.Addr()) nodeidx.SetFree(true) } } return cluster, nil }