func (t *haTask) doFailover(s *models.Server) error { // first get all slaves group := models.NewServerGroup(globalEnv.ProductName(), s.GroupId) var err error group.Servers, err = group.GetServers(globalConn) if err != nil { return errors.Trace(err) } slaves := make([]*models.Server, 0, len(group.Servers)) slaveAddrs := make([]string, 0, len(group.Servers)) for _, s := range group.Servers { if s.Type == models.SERVER_TYPE_SLAVE { slaves = append(slaves, s) slaveAddrs = append(slaveAddrs, s.Addr) } } // elect a new master log.Infof("elect a new master in %v", slaveAddrs) addr, err := t.electNewMaster(slaves) if err != nil { return errors.Trace(err) } // prmote it as new master log.Infof("promote %s as the new master", addr) if err := group.Promote(globalConn, addr, globalEnv.StoreAuth()); err != nil { // should we fatal here and let human intervention ??? return errors.Trace(err) } // let other slaves replicate from the new master for _, slave := range slaves { if slave.Addr == addr { continue } log.Infof("let %s slaveof new master %s", slave.Addr, addr) if err := utils.SlaveOf(slave.Addr, addr, globalEnv.StoreAuth()); err != nil { // should we fatal here and let human intervention ??? return errors.Trace(err) } } return nil }
func (sg *ServerGroup) AddServer(coordConn zkhelper.Conn, s *Server, auth string) error { switch s.Type { case SERVER_TYPE_MASTER, SERVER_TYPE_SLAVE, SERVER_TYPE_OFFLINE: default: return errors.NotSupportedf("server type %q", s.Type) } // if type is offline, the server may be down, so we cannot use store function if s.Type != SERVER_TYPE_OFFLINE { // we only support reborn-server and qdb-server // origin redis has no slot_info command // atm, we can use this command to check whether server is alive or not. if _, err := utils.SlotsInfo(s.Addr, 0, 0, auth); err != nil { return errors.Trace(err) } } s.GroupId = sg.Id servers, err := sg.GetServers(coordConn) if err != nil { return errors.Trace(err) } var masterAddr string for _, server := range servers { if server.Type == SERVER_TYPE_MASTER { masterAddr = server.Addr } } // make sure there is only one master if s.Type == SERVER_TYPE_MASTER && len(masterAddr) > 0 { return errors.Trace(ErrNodeExists) } // if this group has no server. // promote this server to master automatically if type is not offline if len(servers) == 0 && s.Type != SERVER_TYPE_OFFLINE { s.Type = SERVER_TYPE_MASTER } if s.Type == SERVER_TYPE_MASTER { if role, err := utils.GetRole(s.Addr, auth); err != nil { return errors.Trace(err) } else if role != "master" { return errors.Errorf("we need master, but server %s is %s", s.Addr, role) } } val, err := json.Marshal(s) if err != nil { return errors.Trace(err) } coordPath := fmt.Sprintf("/zk/reborn/db_%s/servers/group_%d/%s", sg.ProductName, sg.Id, s.Addr) _, err = zkhelper.CreateOrUpdate(coordConn, coordPath, string(val), 0, zkhelper.DefaultFileACLs(), true) // update servers servers, err = sg.GetServers(coordConn) if err != nil { return errors.Trace(err) } sg.Servers = servers if s.Type == SERVER_TYPE_MASTER { err = NewAction(coordConn, sg.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, sg, "", true) if err != nil { return errors.Trace(err) } } else if s.Type == SERVER_TYPE_SLAVE && len(masterAddr) > 0 { // send command slaveof to slave err := utils.SlaveOf(s.Addr, masterAddr, auth) if err != nil { return errors.Trace(err) } } return nil }