Beispiel #1
0
func showNodes(format string, arbiter bool) {
	addr := context.GetLeaderAddr()
	url := "http://" + addr + api.FetchReplicaSetsPath

	resp, err := utils.HttpGet(url, nil, 5*time.Second)
	if err != nil {
		fmt.Println(err)
		return
	}

	var rss command.FetchReplicaSetsResult
	err = utils.InterfaceToStruct(resp.Body, &rss)
	if err != nil {
		fmt.Println(err)
		return
	}
	sort.Sort(topo.ByMasterId(rss.ReplicaSets))
	sort.Sort(topo.ByNodeState(rss.ReplicaSets))

	var allNodes []*topo.Node
	for i, rs := range rss.ReplicaSets {
		if !arbiter && rs.Master != nil && strings.Contains(rs.Master.Tag, "Arbiter") {
			continue
		}
		allNodes = append(allNodes, rs.Master)
		for _, node := range rs.Slaves {
			allNodes = append(allNodes, node)
		}
		if i < len(rss.ReplicaSets)-1 {
			allNodes = append(allNodes, nil)
		}
	}
	utils.PrintJsonArray(format,
		[]string{"State", "Mode", "Fail", "Role", "Id", "Tag", "Addr", "QPS",
			"UsedMemory", "Link", "Repl", "Keys", "NetIn", "NetOut"},
		nodesToInterfaceSlice(allNodes, rss.NodeStates))
}
Beispiel #2
0
func checkReplicaAction(c *cli.Context) {
	addr := context.GetLeaderAddr()
	url := "http://" + addr + api.FetchReplicaSetsPath

	resp, err := utils.HttpGet(url, nil, 5*time.Second)
	if err != nil {
		fmt.Println(err)
		return
	}

	var rss command.FetchReplicaSetsResult
	err = utils.InterfaceToStruct(resp.Body, &rss)
	if err != nil {
		fmt.Println(err)
		return
	}
	sort.Sort(topo.ByMasterId(rss.ReplicaSets))
	sort.Sort(topo.ByNodeState(rss.ReplicaSets))

	for _, rs := range rss.ReplicaSets {
		hostMap := map[string]bool{}

		n := rs.Master
		hostMap[n.Ip] = true

		//slaves
		for _, ns := range rs.Slaves {
			if _, ok := hostMap[ns.Ip]; ok {
				fmt.Printf("[%s] %s:%d %s\n", ns.Region, n.Ip, n.Port, "Replica has nodes in same host")
			} else {
				hostMap[ns.Ip] = true
			}
		}
	}
	fmt.Println("Check done")
}
Beispiel #3
0
func mkreplicaAction(c *cli.Context) {

	red := color.New(color.FgRed).SprintFunc()
	green := color.New(color.FgGreen).SprintFunc()
	yellow := color.New(color.FgYellow).SprintFunc()
	cyan := color.New(color.FgCyan).SprintFunc()

	l := c.String("l")
	if l == "" {
		fmt.Println(red("-l logic machine room must be assigned"))
		os.Exit(-1)
	}
	m := c.String("m")
	if m == "" {
		fmt.Println(red("-m master machine rooms must be assigned"))
		os.Exit(-1)
	}
	replicas := c.Int("r")
	masterRooms := strings.Split(m, ",")
	rooms := strings.Split(l, ",")

	//fetch and check cluster nodes
	addr := context.GetLeaderAddr()
	url := "http://" + addr + api.FetchReplicaSetsPath

	resp, err := utils.HttpGet(url, nil, 5*time.Second)
	if err != nil {
		fmt.Println(err)
		return
	}
	var rss command.FetchReplicaSetsResult
	err = utils.InterfaceToStruct(resp.Body, &rss)
	if err != nil {
		fmt.Println(err)
		return
	}
	sort.Sort(topo.ByMasterId(rss.ReplicaSets))
	sort.Sort(topo.ByNodeState(rss.ReplicaSets))

	freeNodes := []*Node{}

	for _, rs := range rss.ReplicaSets {
		if rs.Master != nil && len(rs.Master.Ranges) == 0 && len(rs.Slaves) == 0 {
			// this is a free node
			node := &Node{
				Ip:      rs.Master.Ip,
				Port:    fmt.Sprintf("%d", rs.Master.Port),
				LogicMR: rs.Master.Zone,
			}
			freeNodes = append(freeNodes, node)
		}
	}
	// get all free nodes done
	if replicas != 0 && len(freeNodes)%(replicas+1) != 0 {
		fmt.Printf("%s. Not enough nodes\n", red("ERROR"))
		return
	}

	// check free nodes state
	for _, fn := range freeNodes {
		fn.Alive = isAlive(fn)
		fmt.Printf("connecting to %s\t%s\t", fn.Ip, fn.Port)
		if fn.Alive {
			fmt.Printf("%s\n", green("OK"))
		} else {
			fmt.Printf("%s\n", red("FAILED"))
		}
	}

	// check and set state
	fmt.Println("Check and set state...")
	for _, fn := range freeNodes {
		err := checkAndSetState(fn)
		if err != nil {
			fmt.Println(err)
			return
		}
	}

	// validate
	if validateProcess(freeNodes) == false {
		fmt.Println("Not all nodes have the right status")
		return
	}

	// build replicas
	fmt.Println("Build replicas...")
	masterNodes, err := buildCluster(freeNodes, replicas, masterRooms, rooms)
	if err != nil {
		fmt.Println("build cluster failed, ", err)
		return
	}

	// summary
	for _, mn := range masterNodes {
		fmt.Printf("%s %s\t%s\t%s\t%s\n", yellow("M:"), mn.Id, mn.Ip, mn.Port, yellow(mn.SlotsRange))
		slaves := getSlaves(freeNodes, mn)
		for _, slave := range slaves {
			fmt.Printf("%s %s\t%s\t%s\t%s\n", cyan("S:"), slave.Id, slave.Ip, slave.Port, slave.MasterId)
		}
	}
	var cmd string
	fmt.Printf("Type %s to continue: \n", green("yes"))

	fmt.Scanf("%s\n", &cmd)
	if cmd != "yes" {
		os.Exit(0)
	}
	meetEach(freeNodes)

	fmt.Println("Wait for stable state...")
	time.Sleep(10 * time.Second)

	for _, mn := range masterNodes {
		fmt.Printf("Node:%s\n", mn.Id)
		resp, err := rwMasterState(mn)
		if err != nil {
			fmt.Printf("%s\n", red("FAILED to chmod, please check"))
		}
		slaves := getSlaves(freeNodes, mn)
		fmt.Printf("%-40s", "setting replicas...")
		err = rwReplicasState(slaves)
		if err != nil {
			fmt.Printf("%s\n", red("FAILED to chmod, please check"))
		}
		resp, err = setReplicas(slaves)
		if err != nil {
			fmt.Printf("%s\n", red(err.Error()))
			break
		} else {
			fmt.Printf("%s\n", green(resp))
		}
	}
}
Beispiel #4
0
func addRegionAction(c *cli.Context) {
	region := c.String("r")
	if region == "" {
		fmt.Println("-r region must be assigned")
		return
	}

	//fetch and check cluster nodes
	addr := context.GetLeaderAddr()
	url := "http://" + addr + api.FetchReplicaSetsPath

	resp, err := utils.HttpGet(url, nil, 5*time.Second)
	if err != nil {
		fmt.Println(err)
		return
	}
	var rss command.FetchReplicaSetsResult
	err = utils.InterfaceToStruct(resp.Body, &rss)
	if err != nil {
		fmt.Println(err)
		return
	}
	sort.Sort(topo.ByMasterId(rss.ReplicaSets))
	sort.Sort(topo.ByNodeState(rss.ReplicaSets))

	freeNodes := []*topo.Node{}
	masterNodes := []*topo.Node{}

	for _, rs := range rss.ReplicaSets {
		if rs.Master != nil && len(rs.Master.Ranges) == 0 && len(rs.Slaves) == 0 {
			// ignore arbiter
			if strings.Contains(rs.Master.Tag, "Arbiter") {
				continue
			}
			// this is a free node
			if rs.Master.Region == region {
				freeNodes = append(freeNodes, rs.Master)
			}
		} else {
			masterNodes = append(masterNodes, rs.Master)
		}
	}

	if len(freeNodes)%len(masterNodes) != 0 {
		fmt.Println("Number of free nodes in region not match")
		return
	}
	repli_num := len(freeNodes) / len(masterNodes)
	// meet free nodes
	allNodes := append(masterNodes, freeNodes...)
	allNodes_alter := []*Node{}
	for _, n := range allNodes {
		node := &Node{
			Ip:   n.Ip,
			Port: fmt.Sprintf("%d", n.Port),
		}
		allNodes_alter = append(allNodes_alter, node)
	}
	fmt.Println("Check node is alive")

	for _, n := range freeNodes {
		node := &Node{
			Ip:   n.Ip,
			Port: fmt.Sprintf("%d", n.Port),
		}
		fmt.Printf("connecting to %s\t%s\t", node.Ip, node.Port)
		if isAlive(node) {
			fmt.Printf("OK\n")
		} else {
			fmt.Printf("Error\n")
		}
	}

	fmt.Println("Begin meet cluster nodes, this will take some times")
	meetEach(allNodes_alter)

	if checkClusterInfo(allNodes_alter) {
		fmt.Println("All nodes agree the configure, continue")
	} else {
		fmt.Println("Node configure inconsistent or slots incomplete")
	}
	// set replica
	for idx, r := range masterNodes {
		slaves := []*Node{}
		for i := 0; i < repli_num; i++ {
			s := freeNodes[idx*repli_num+i]
			fmt.Printf("%s %s\n", "setting replicas", r.Id)
			node := &Node{
				Id:       s.Id,
				Ip:       s.Ip,
				Port:     fmt.Sprintf("%d", s.Port),
				MasterId: r.Id,
			}
			slaves = append(slaves, node)
		}
		err := rwReplicasState(slaves)
		if err != nil {
			fmt.Println(err)
		}
		resp, err := setReplicas(slaves)
		if err != nil {
			fmt.Println(err)
			break
		} else {
			fmt.Println(resp)
		}
	}

	if checkClusterInfo(allNodes_alter) {
		fmt.Println("All nodes agree the configure")
	} else {
		fmt.Println("Node configure inconsistent or slots incomplete")
	}
}
Beispiel #5
0
func upgradeSlaves(c *cli.Context) {

	pid := context.GetAppName()
	addr := context.GetLeaderAddr()
	url := "http://" + addr + api.FetchReplicaSetsPath

	resp, err := utils.HttpGet(url, nil, 5*time.Second)
	if err != nil {
		fmt.Println(err)
		return
	}

	var rss command.FetchReplicaSetsResult
	err = utils.InterfaceToStruct(resp.Body, &rss)
	if err != nil {
		fmt.Println(err)
		return
	}
	sort.Sort(topo.ByMasterId(rss.ReplicaSets))
	sort.Sort(topo.ByNodeState(rss.ReplicaSets))

	iidx, err := getIdx(IdxServerAddr, pid, "slaves")
	if err != nil {
		fmt.Println(err)
		return
	}

	fmt.Printf("Get last idx record: %d\n", iidx)
	for idx, rs := range rss.ReplicaSets {
		if rs.Master.IsArbiter() {
			continue
		}
		if idx <= iidx {
			fmt.Printf("Skipping replica(id:%s) (%d/%d) slaves\n", rs.Master.Id, idx, len(rss.ReplicaSets))
			continue
		}

		fmt.Printf("Upgrading replica(id:%s) (%d/%d) slaves\n", rs.Master.Id, idx, len(rss.ReplicaSets))
		for _, s := range rs.Slaves {

			//disable read
			_, err := configRead(s, false)
			if err != nil {
				fmt.Println(err)
			}
			fmt.Printf("Disable read %s\n", s.Addr())

			err = configAofAndRdb(s, false)
			if err != nil {
				fmt.Println(err)
			}
			fmt.Printf("Disable aof feature %s\n", s.Addr())

			//send shutdown command
			err = shutdownServer(s)
			if err != nil {
				fmt.Printf("server %s restart\n", s.Addr())
			}
			//sleep for 5 seconds
			time.Sleep(5 * time.Second)
		}
		//check slaves replica status and loading status
		inner := func(nodes []*topo.Node) bool {
			for _, n := range nodes {
				ok, err := checkSlaveRepliStatusOk(n)
				if err != nil {
					return false
				}
				if !ok {
					return false
				}
			}
			return true
		}
		cnt := 0
		for {
			ok := inner(rs.Slaves)
			if ok {
				break
			}
			cnt++
			fmt.Printf("Checking slaves replication status %d times\n", cnt)
			time.Sleep(5 * time.Second)
		}
		//enable slaves aof and read flag
		for _, s := range rs.Slaves {
			err := configAofAndRdb(s, true)
			if err != nil {
				fmt.Println(err)
			}
			_, err = configRead(s, true)
			if err != nil {
				fmt.Println(err)
			}
			fmt.Printf("Enable slaves %s aof and read flag\n", s.Addr())
		}
		//status ok, record the idx to a redis
		err := saveIdx(IdxServerAddr, pid, "slaves", idx)
		if err != nil {
			fmt.Printf("saveIdx to %d failed\n", idx)
		}
	}
}
Beispiel #6
0
func upgradeMaster(c *cli.Context) {
	pid := context.GetAppName()
	addr := context.GetLeaderAddr()
	url_fr := "http://" + addr + api.FetchReplicaSetsPath
	url_fl := "http://" + addr + api.NodeSetAsMasterPath
	extraHeader := &utils.ExtraHeader{
		User:  context.Config.User,
		Role:  context.Config.Role,
		Token: context.Config.Token,
	}

	resp, err := utils.HttpGet(url_fr, nil, 5*time.Second)
	if err != nil {
		fmt.Println(err)
		return
	}

	var rss command.FetchReplicaSetsResult
	err = utils.InterfaceToStruct(resp.Body, &rss)
	if err != nil {
		fmt.Println(err)
		return
	}
	sort.Sort(topo.ByMasterId(rss.ReplicaSets))
	sort.Sort(topo.ByNodeState(rss.ReplicaSets))

	iidx, err := getIdx(IdxServerAddr, pid, "master")
	if err != nil {
		fmt.Println(err)
		return
	}

	fmt.Printf("Get last idx record: %d\n", iidx)
	var old_master *topo.Node
	var new_master *topo.Node

	//used to check status
	var new_slaves []*topo.Node
	old_master = nil
	new_master = nil

	for idx, rs := range rss.ReplicaSets {
		if rs.Master.IsArbiter() {
			continue
		}
		if idx <= iidx {
			fmt.Printf("Skipping replica(id:%s) (%d/%d) master\n", rs.Master.Id, idx, len(rss.ReplicaSets))
			continue
		}
		//select a slave in the same IDC
		old_master = rs.Master
		old_master_r := getRegion(old_master)
		if old_master_r == "" {
			return
		}
		new_slaves = append(new_slaves, old_master)

		fmt.Printf("Upgrading replica(id:%s) (%d/%d) master\n", rs.Master.Id, idx, len(rss.ReplicaSets))
		skip := false
		for _, s := range rs.Slaves {
			re := getRegion(s)
			if re == "" {
				return
			}
			if re == old_master_r && !skip {
				new_master = s
				skip = true
			} else {
				new_slaves = append(new_slaves, s)
			}
		}
		if new_master == nil {
			fmt.Printf("Select new master failed for master(%s) replica\n", old_master.Id)
			return
		}
		//send failover to the new master
		req := api.FailoverTakeoverParams{
			NodeId: new_master.Id,
		}
		resp, err := utils.HttpPostExtra(url_fl, req, 10*time.Second, extraHeader)
		if err != nil {
			fmt.Println(err)
			return
		}
		if resp.Errno != 0 {
			fmt.Println(resp.Errmsg)
			return
		}
		//send failover request done,check the new_master role to a real master
		for {
			ismaster, err := checkMasterRole(new_master, true)
			if err != nil {
				fmt.Println(err)
				time.Sleep(10 * time.Second)
				continue
			}
			if ismaster == true {
				//to be a new master
				break
			} else {
				//wait for next check
				time.Sleep(10 * time.Second)
			}
		}
		//disable read flag of the all new slaves,including old master
		for _, s := range new_slaves {
			resp, err = configRead(s, false)
			if err != nil {
				fmt.Println(err)
				return
			}
			if resp.Errno != 0 {
				fmt.Println(resp.Errmsg)
				return
			}
		}
		//disable aof and rdb to speed up start
		err = configAofAndRdb(old_master, false)
		if err != nil {
			fmt.Println(err)
			return
		}
		//shutdown server
		err = shutdownServer(old_master)
		if err != nil {
			fmt.Printf("server %s restart\n", old_master.Addr())
		}
		//check the status of old master
		cnt := 1
		for {
			fmt.Printf("Check slave status %d times\n", cnt)
			cnt++
			inner := func(nodes []*topo.Node) bool {
				rok := true
				for _, n := range nodes {
					ok, err := checkSlaveRepliStatusOk(n)
					if ok {
						//replica status ok,enable read flag,ignore result
						configRead(n, true)
						continue
					}
					if !ok || err != nil {
						rok = false
					}
				}
				return rok
			}

			ok := inner(new_slaves)
			if !ok {
				//not ok, wait for next trun check
				time.Sleep(10 * time.Second)
			} else {
				break
			}
		}
		//enable aof and rdb
		err = configAofAndRdb(old_master, true)
		if err != nil {
			fmt.Println(err)
			return
		}
		//save the idx of the process
		err = saveIdx(IdxServerAddr, pid, "master", idx)
		if err != nil {
			fmt.Println(err)
			return
		}
	}
}
Beispiel #7
0
func fixHandshakeAction(c *cli.Context) {
	addr := context.GetLeaderAddr()
	url := "http://" + addr + api.FetchReplicaSetsPath

	resp, err := utils.HttpGet(url, nil, 5*time.Second)
	if err != nil {
		fmt.Println(err)
		return
	}

	var rss command.FetchReplicaSetsResult
	err = utils.InterfaceToStruct(resp.Body, &rss)
	if err != nil {
		fmt.Println(err)
		return
	}
	sort.Sort(topo.ByMasterId(rss.ReplicaSets))
	sort.Sort(topo.ByNodeState(rss.ReplicaSets))

	seedNodes := []string{}
	allFailedNodes := []string{}
	inner := func(addr string) {
		failedNodes, err := getFailedNodes(addr)
		if err != nil {
			fmt.Println(err)
			return
		}
		if len(failedNodes) > 0 {
			seedNodes = append(seedNodes, addr)
		}
		for _, fn := range failedNodes {
			if nodeExists(fn, allFailedNodes) == false {
				allFailedNodes = append(allFailedNodes, fn)
			}
		}
	}
	for _, rs := range rss.ReplicaSets {
		n := rs.Master
		inner(n.Addr())

		//slaves
		for _, ns := range rs.Slaves {
			inner(ns.Addr())
		}
	}

	//send forget to need fix nodes
	resChan := make(chan string, len(seedNodes))
	for _, failed := range allFailedNodes {
		for _, seed := range seedNodes {
			go func(seed, failed string) {
				resp, err := redis.ClusterForget(seed, failed)
				res := fmt.Sprintf("Node %s forget %s result %s %v", seed, failed, resp, err)
				resChan <- res
			}(seed, failed)
		}
		for i := 0; i < len(seedNodes); i++ {
			res := <-resChan
			fmt.Println(res)
		}
	}
}
Beispiel #8
0
func disableDuplicatedAction(c *cli.Context) {
	region := c.String("r")
	zone := c.String("z")
	if region == "" && zone == "" {
		fmt.Println("region or zone should be assigned")
		return
	}
	if region != "" && zone != "" {
		fmt.Println("region or zone should be choose one")
		return
	}
	limit := c.Int("l")
	if limit < 1 {
		fmt.Println("limit should be >=1 ")
		return
	}
	addr := context.GetLeaderAddr()
	url := "http://" + addr + api.FetchReplicaSetsPath

	resp, err := utils.HttpGet(url, nil, 5*time.Second)
	if err != nil {
		fmt.Println(err)
		return
	}

	var rss command.FetchReplicaSetsResult
	err = utils.InterfaceToStruct(resp.Body, &rss)
	if err != nil {
		fmt.Println(err)
		return
	}
	sort.Sort(topo.ByMasterId(rss.ReplicaSets))
	sort.Sort(topo.ByNodeState(rss.ReplicaSets))
	extraHeader := &utils.ExtraHeader{
		User:  context.Config.User,
		Role:  context.Config.Role,
		Token: context.Config.Token,
	}
	url = "http://" + addr + api.NodePermPath
	for _, rs := range rss.ReplicaSets {
		rlimit := limit
		if region != "" {
			n := rs.Master
			if n.Region == region {
				rlimit--
			}

			//slaves
			for _, ns := range rs.Slaves {
				if rlimit <= 0 && ns.Region == region {
					//chmod -r
					fmt.Printf("Disable node addr: %s region: %s\n", ns.Addr(), ns.Region)

					req := api.ToggleModeParams{
						NodeId: ns.Id,
						Action: "disable",
						Perm:   "read",
					}
					resp, err := utils.HttpPostExtra(url, req, 5*time.Second, extraHeader)
					if err != nil {
						fmt.Println(err)
						return
					}
					ShowResponse(resp)
				}
				if ns.Region == region {
					rlimit--
				}
			}
		} else {
			n := rs.Master
			if n.Zone == zone {
				rlimit--
			}

			//slaves
			for _, ns := range rs.Slaves {
				if rlimit <= 0 && ns.Zone == zone {
					//chmod -r
					fmt.Printf("Disable node addr: %s zone: %s\n", ns.Addr(), ns.Zone)

					req := api.ToggleModeParams{
						NodeId: ns.Id,
						Action: "disable",
						Perm:   "read",
					}
					resp, err := utils.HttpPostExtra(url, req, 5*time.Second, extraHeader)
					if err != nil {
						fmt.Println(err)
						return
					}
					ShowResponse(resp)
				}
				if ns.Zone == zone {
					rlimit--
				}
			}
		}
	}
}