Example #1
0
func (t *haTask) doFailover(s *models.Server) error {
	// first get all slaves
	group := models.NewServerGroup(globalEnv.ProductName(), s.GroupId)

	var err error
	group.Servers, err = group.GetServers(globalConn)
	if err != nil {
		return errors.Trace(err)
	}

	slaves := make([]*models.Server, 0, len(group.Servers))
	slaveAddrs := make([]string, 0, len(group.Servers))

	for _, s := range group.Servers {
		if s.Type == models.SERVER_TYPE_SLAVE {
			slaves = append(slaves, s)
			slaveAddrs = append(slaveAddrs, s.Addr)
		}
	}

	// elect a new master
	log.Infof("elect a new master in %v", slaveAddrs)
	addr, err := t.electNewMaster(slaves)
	if err != nil {
		return errors.Trace(err)
	}

	// prmote it as new master
	log.Infof("promote %s as the new master", addr)
	if err := group.Promote(globalConn, addr, globalEnv.StoreAuth()); err != nil {
		// should we fatal here and let human intervention ???
		return errors.Trace(err)
	}

	// let other slaves replicate from the new master
	for _, slave := range slaves {
		if slave.Addr == addr {
			continue
		}

		log.Infof("let %s slaveof new master %s", slave.Addr, addr)
		if err := utils.SlaveOf(slave.Addr, addr, globalEnv.StoreAuth()); err != nil {
			// should we fatal here and let human intervention ???
			return errors.Trace(err)
		}
	}

	return nil
}
Example #2
0
func (sg *ServerGroup) AddServer(coordConn zkhelper.Conn, s *Server, auth string) error {
	switch s.Type {
	case SERVER_TYPE_MASTER, SERVER_TYPE_SLAVE, SERVER_TYPE_OFFLINE:
	default:
		return errors.NotSupportedf("server type %q", s.Type)
	}
	// if type is offline, the server may be down, so we cannot use store function
	if s.Type != SERVER_TYPE_OFFLINE {
		// we only support reborn-server and qdb-server
		// origin redis has no slot_info command
		// atm, we can use this command to check whether server is alive or not.
		if _, err := utils.SlotsInfo(s.Addr, 0, 0, auth); err != nil {
			return errors.Trace(err)
		}
	}

	s.GroupId = sg.Id

	servers, err := sg.GetServers(coordConn)
	if err != nil {
		return errors.Trace(err)
	}
	var masterAddr string
	for _, server := range servers {
		if server.Type == SERVER_TYPE_MASTER {
			masterAddr = server.Addr
		}
	}

	// make sure there is only one master
	if s.Type == SERVER_TYPE_MASTER && len(masterAddr) > 0 {
		return errors.Trace(ErrNodeExists)
	}

	// if this group has no server.
	// promote this server to master automatically if type is not offline
	if len(servers) == 0 && s.Type != SERVER_TYPE_OFFLINE {
		s.Type = SERVER_TYPE_MASTER
	}

	if s.Type == SERVER_TYPE_MASTER {
		if role, err := utils.GetRole(s.Addr, auth); err != nil {
			return errors.Trace(err)
		} else if role != "master" {
			return errors.Errorf("we need master, but server %s is %s", s.Addr, role)
		}
	}

	val, err := json.Marshal(s)
	if err != nil {
		return errors.Trace(err)
	}

	coordPath := fmt.Sprintf("/zk/reborn/db_%s/servers/group_%d/%s", sg.ProductName, sg.Id, s.Addr)
	_, err = zkhelper.CreateOrUpdate(coordConn, coordPath, string(val), 0, zkhelper.DefaultFileACLs(), true)

	// update servers
	servers, err = sg.GetServers(coordConn)
	if err != nil {
		return errors.Trace(err)
	}
	sg.Servers = servers

	if s.Type == SERVER_TYPE_MASTER {
		err = NewAction(coordConn, sg.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, sg, "", true)
		if err != nil {
			return errors.Trace(err)
		}
	} else if s.Type == SERVER_TYPE_SLAVE && len(masterAddr) > 0 {
		// send command slaveof to slave
		err := utils.SlaveOf(s.Addr, masterAddr, auth)
		if err != nil {
			return errors.Trace(err)
		}
	}

	return nil
}