func (s *Slot) Update(coordConn zkhelper.Conn) error { // status validation switch s.State.Status { case SLOT_STATUS_MIGRATE, SLOT_STATUS_OFFLINE, SLOT_STATUS_ONLINE, SLOT_STATUS_PRE_MIGRATE: { // valid status, OK } default: { return errors.Trace(ErrUnknownSlotStatus) } } data, err := json.Marshal(s) if err != nil { return errors.Trace(err) } coordPath := GetSlotPath(s.ProductName, s.Id) _, err = zkhelper.CreateOrUpdate(coordConn, coordPath, string(data), 0, zkhelper.DefaultFileACLs(), true) if err != nil { return errors.Trace(err) } if s.State.Status == SLOT_STATUS_MIGRATE { err = NewAction(coordConn, s.ProductName, ACTION_TYPE_SLOT_MIGRATE, s, "", true) } else { err = NewAction(coordConn, s.ProductName, ACTION_TYPE_SLOT_CHANGED, s, "", true) } return errors.Trace(err) }
func SetSlotRange(zkConn zkhelper.Conn, productName string, fromSlot, toSlot, groupId int, status SlotStatus) error { if status != SLOT_STATUS_OFFLINE && status != SLOT_STATUS_ONLINE { return errors.New("invalid status") } for i := fromSlot; i <= toSlot; i++ { s, err := GetSlot(zkConn, productName, i) if err != nil { return errors.Trace(err) } s.GroupId = groupId s.State.Status = status data, err := json.Marshal(s) if err != nil { return errors.Trace(err) } zkPath := GetSlotPath(productName, i) _, err = zkhelper.CreateOrUpdate(zkConn, zkPath, string(data), 0, zkhelper.DefaultFileACLs(), true) if err != nil { return errors.Trace(err) } } param := SlotMultiSetParam{ From: fromSlot, To: toSlot, GroupId: groupId, Status: status, } err := NewAction(zkConn, productName, ACTION_TYPE_MULTI_SLOT_CHANGED, param, "", true) return errors.Trace(err) }
func TestProxyOfflineInWaitActionReceiver(t *testing.T) { log.Info("test proxy offline when waiting action response") fakeZkConn := zkhelper.NewConn() for i := 1; i <= 4; i++ { CreateProxyInfo(fakeZkConn, productName, &ProxyInfo{ Id: strconv.Itoa(i), State: PROXY_STATE_ONLINE, }) go waitForProxyMarkOffline(fakeZkConn, strconv.Itoa(i)) } lst, _ := ProxyList(fakeZkConn, productName, nil) if len(lst) != 4 { t.Error("create proxy info error") } go func() { time.Sleep(500 * time.Millisecond) actionPath := path.Join(GetActionResponsePath(productName), fakeZkConn.Seq2Str(1)) //create test response for proxy 4, means proxy 1,2,3 are timeout fakeZkConn.Create(path.Join(actionPath, "4"), nil, 0, zkhelper.DefaultFileACLs()) }() err := NewActionWithTimeout(fakeZkConn, productName, ACTION_TYPE_SLOT_CHANGED, nil, "desc", true, 3*1000) if err != nil && err.Error() != ErrReceiverTimeout.Error() { t.Error(errors.ErrorStack(err)) } for i := 1; i <= 3; i++ { if info, _ := GetProxyInfo(fakeZkConn, productName, strconv.Itoa(i)); info.State != PROXY_STATE_OFFLINE { t.Error("shutdown offline proxy error") } } }
func CreateProxyInfo(zkConn zkhelper.Conn, productName string, pi *ProxyInfo) (string, error) { data, err := json.Marshal(pi) if err != nil { return "", errors.Trace(err) } dir := GetProxyPath(productName) zkhelper.CreateRecursive(zkConn, dir, "", 0, zkhelper.DefaultDirACLs()) return zkConn.Create(path.Join(dir, pi.Id), data, zk.FlagEphemeral, zkhelper.DefaultFileACLs()) }
func (m *MigrateManager) createNode() error { zkhelper.CreateRecursive(m.zkConn, fmt.Sprintf("/zk/codis/db_%s/migrate_tasks", m.productName), "", 0, zkhelper.DefaultDirACLs()) _, err := m.zkConn.Create(getManagerPath(m.productName), []byte(""), zk.FlagEphemeral, zkhelper.DefaultFileACLs()) if err != nil { log.Error("dashboard already exists! err: ", err) } return nil }
func (self *ServerGroup) AddServer(zkConn zkhelper.Conn, s *Server) error { s.GroupId = self.Id servers, err := self.GetServers(zkConn) if err != nil { return errors.Trace(err) } var masterAddr string for _, server := range servers { if server.Type == SERVER_TYPE_MASTER { masterAddr = server.Addr } } // make sure there is only one master if s.Type == SERVER_TYPE_MASTER && len(masterAddr) > 0 { return errors.Trace(ErrNodeExists) } // if this group has no server. auto promote this server to master if len(servers) == 0 { s.Type = SERVER_TYPE_MASTER } val, err := json.Marshal(s) if err != nil { return errors.Trace(err) } zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d/%s", self.ProductName, self.Id, s.Addr) _, err = zkhelper.CreateOrUpdate(zkConn, zkPath, string(val), 0, zkhelper.DefaultFileACLs(), true) // update servers servers, err = self.GetServers(zkConn) if err != nil { return errors.Trace(err) } self.Servers = servers if s.Type == SERVER_TYPE_MASTER { err = NewAction(zkConn, self.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, self, "", true) if err != nil { return errors.Trace(err) } } else if s.Type == SERVER_TYPE_SLAVE && len(masterAddr) > 0 { // send command slaveof to slave err := utils.SlaveOf(s.Addr, masterAddr) if err != nil { return errors.Trace(err) } } return nil }
func addAgent(a *agentInfo) error { basePath := agentPath() zkhelper.CreateRecursive(globalConn, basePath, "", 0, zkhelper.DefaultDirACLs()) contents, err := json.Marshal(a) if err != nil { return errors.Trace(err) } _, err = globalConn.Create(path.Join(basePath, a.ID), contents, zk.FlagEphemeral, zkhelper.DefaultFileACLs()) return errors.Trace(err) }
func (top *Topology) DoResponse(seq int, pi *models.ProxyInfo) error { //create response node actionPath := top.GetActionResponsePath(seq) //log.Debug("actionPath:", actionPath) data, err := json.Marshal(pi) if err != nil { return errors.Trace(err) } _, err = top.coordConn.Create(path.Join(actionPath, pi.ID), data, 0, zkhelper.DefaultFileACLs()) return err }
func (self *ServerGroup) AddServer(zkConn zkhelper.Conn, s *Server) error { s.GroupId = self.Id val, err := json.Marshal(s) if err != nil { return errors.Trace(err) } if s.Type == SERVER_TYPE_MASTER { // make sure there is only one master servers, err := self.GetServers(zkConn) if err != nil { return errors.Trace(err) } for _, server := range servers { if server.Type == SERVER_TYPE_MASTER { return errors.Trace(ErrNodeExists) } } } zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d/%s", self.ProductName, self.Id, s.Addr) _, err = zkhelper.CreateOrUpdate(zkConn, zkPath, string(val), 0, zkhelper.DefaultFileACLs(), true) // update servers servers, err := self.GetServers(zkConn) if err != nil { return errors.Trace(err) } self.Servers = servers if s.Type == SERVER_TYPE_MASTER { err = NewAction(zkConn, self.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, self, "", true) if err != nil { return errors.Trace(err) } } return nil }
func SetSlots(coordConn zkhelper.Conn, productName string, slots []*Slot, groupId int, status SlotStatus) error { if status != SLOT_STATUS_OFFLINE && status != SLOT_STATUS_ONLINE { return errors.New("invalid status") } ok, err := GroupExists(coordConn, productName, groupId) if err != nil { return errors.Trace(err) } if !ok { return errors.NotFoundf("group %d", groupId) } for _, s := range slots { s.GroupId = groupId s.State.Status = status data, err := json.Marshal(s) if err != nil { return errors.Trace(err) } coordPath := GetSlotPath(productName, s.Id) _, err = zkhelper.CreateOrUpdate(coordConn, coordPath, string(data), 0, zkhelper.DefaultFileACLs(), true) if err != nil { return errors.Trace(err) } } param := SlotMultiSetParam{ From: -1, To: -1, GroupId: groupId, Status: status, } err = NewAction(coordConn, productName, ACTION_TYPE_MULTI_SLOT_CHANGED, param, "", true) return errors.Trace(err) }
func createDashboardNode() error { conn := CreateZkConn() defer conn.Close() // make sure root dir is exists rootDir := fmt.Sprintf("/zk/codis/db_%s", globalEnv.ProductName()) zkhelper.CreateRecursive(conn, rootDir, "", 0, zkhelper.DefaultDirACLs()) zkPath := fmt.Sprintf("%s/dashboard", rootDir) // make sure we're the only one dashboard if exists, _, _ := conn.Exists(zkPath); exists { data, _, _ := conn.Get(zkPath) return errors.New("dashboard already exists: " + string(data)) } content := fmt.Sprintf(`{"addr": "%v", "pid": %v}`, globalEnv.DashboardAddr(), os.Getpid()) pathCreated, err := conn.Create(zkPath, []byte(content), zk.FlagEphemeral, zkhelper.DefaultFileACLs()) log.Info("dashboard node created:", pathCreated, string(content)) return errors.Trace(err) }
func (s *testModelSuite) TestProxyOfflineInWaitActionReceiver(c *C) { log.Info("[TestProxyOfflineInWaitActionReceiver][start]") fakeCoordConn := zkhelper.NewConn() proxyNum := 4 for i := 1; i <= proxyNum; i++ { CreateProxyInfo(fakeCoordConn, productName, &ProxyInfo{ ID: strconv.Itoa(i), State: PROXY_STATE_ONLINE, }) go waitForProxyMarkOffline(fakeCoordConn, strconv.Itoa(i)) } lst, _ := ProxyList(fakeCoordConn, productName, nil) c.Assert(len(lst), Equals, proxyNum) go func() { time.Sleep(500 * time.Millisecond) actionPath := path.Join(GetActionResponsePath(productName), fakeCoordConn.Seq2Str(1)) // create test response for proxy 4, means proxy 1,2,3 are timeout fakeCoordConn.Create(path.Join(actionPath, "4"), nil, 0, zkhelper.DefaultFileACLs()) }() err := NewActionWithTimeout(fakeCoordConn, productName, ACTION_TYPE_SLOT_CHANGED, nil, "desc", true, 3*1000) if c.Check(err, NotNil) { c.Assert(err.Error(), Equals, ErrReceiverTimeout.Error()) } for i := 1; i <= proxyNum-1; i++ { info, _ := GetProxyInfo(fakeCoordConn, productName, strconv.Itoa(i)) c.Assert(info.State, Equals, PROXY_STATE_OFFLINE) } fakeCoordConn.Close() log.Info("[TestProxyOfflineInWaitActionReceiver][end]") }
func createDashboardNode(conn zkhelper.Conn) error { // make sure root dir is exists rootDir := fmt.Sprintf("/zk/reborn/db_%s", globalEnv.ProductName()) zkhelper.CreateRecursive(conn, rootDir, "", 0, zkhelper.DefaultDirACLs()) coordPath := fmt.Sprintf("%s/dashboard", rootDir) // make sure we're the only one dashboard timeoutCh := time.After(60 * time.Second) for { if exists, _, ch, _ := conn.ExistsW(coordPath); exists { data, _, _ := conn.Get(coordPath) if checkDashboardAlive(data) { return errors.Errorf("dashboard already exists: %s", string(data)) } else { log.Warningf("dashboard %s exists in zk, wait it removed", data) select { case <-ch: case <-timeoutCh: return errors.Errorf("wait existed dashboard %s removed timeout", string(data)) } } } else { break } } content := fmt.Sprintf(`{"addr": "%v", "pid": %v}`, globalEnv.DashboardAddr(), os.Getpid()) pathCreated, err := conn.Create(coordPath, []byte(content), zk.FlagEphemeral, zkhelper.DefaultFileACLs()) log.Infof("dashboard node %s created, data %s, err %v", pathCreated, string(content), err) return errors.Trace(err) }
func CreateProxyFenceNode(zkConn zkhelper.Conn, productName string, pi *ProxyInfo) (string, error) { return zkhelper.CreateRecursive(zkConn, path.Join(GetProxyFencePath(productName), pi.Addr), "", 0, zkhelper.DefaultFileACLs()) }
func (sg *ServerGroup) AddServer(coordConn zkhelper.Conn, s *Server, auth string) error { switch s.Type { case SERVER_TYPE_MASTER, SERVER_TYPE_SLAVE, SERVER_TYPE_OFFLINE: default: return errors.NotSupportedf("server type %q", s.Type) } // if type is offline, the server may be down, so we cannot use store function if s.Type != SERVER_TYPE_OFFLINE { // we only support reborn-server and qdb-server // origin redis has no slot_info command // atm, we can use this command to check whether server is alive or not. if _, err := utils.SlotsInfo(s.Addr, 0, 0, auth); err != nil { return errors.Trace(err) } } s.GroupId = sg.Id servers, err := sg.GetServers(coordConn) if err != nil { return errors.Trace(err) } var masterAddr string for _, server := range servers { if server.Type == SERVER_TYPE_MASTER { masterAddr = server.Addr } } // make sure there is only one master if s.Type == SERVER_TYPE_MASTER && len(masterAddr) > 0 { return errors.Trace(ErrNodeExists) } // if this group has no server. // promote this server to master automatically if type is not offline if len(servers) == 0 && s.Type != SERVER_TYPE_OFFLINE { s.Type = SERVER_TYPE_MASTER } if s.Type == SERVER_TYPE_MASTER { if role, err := utils.GetRole(s.Addr, auth); err != nil { return errors.Trace(err) } else if role != "master" { return errors.Errorf("we need master, but server %s is %s", s.Addr, role) } } val, err := json.Marshal(s) if err != nil { return errors.Trace(err) } coordPath := fmt.Sprintf("/zk/reborn/db_%s/servers/group_%d/%s", sg.ProductName, sg.Id, s.Addr) _, err = zkhelper.CreateOrUpdate(coordConn, coordPath, string(val), 0, zkhelper.DefaultFileACLs(), true) // update servers servers, err = sg.GetServers(coordConn) if err != nil { return errors.Trace(err) } sg.Servers = servers if s.Type == SERVER_TYPE_MASTER { err = NewAction(coordConn, sg.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, sg, "", true) if err != nil { return errors.Trace(err) } } else if s.Type == SERVER_TYPE_SLAVE && len(masterAddr) > 0 { // send command slaveof to slave err := utils.SlaveOf(s.Addr, masterAddr, auth) if err != nil { return errors.Trace(err) } } return nil }
func NewActionWithTimeout(zkConn zkhelper.Conn, productName string, actionType ActionType, target interface{}, desc string, needConfirm bool, timeoutInMs int) error { ts := strconv.FormatInt(time.Now().Unix(), 10) action := &Action{ Type: actionType, Desc: desc, Target: target, Ts: ts, } // set action receivers proxies, err := ProxyList(zkConn, productName, func(p *ProxyInfo) bool { return p.State == PROXY_STATE_ONLINE }) if err != nil { return errors.Trace(err) } if needConfirm { // do fencing here, make sure 'offline' proxies are really offline // now we only check whether the proxy lists are match fenceProxies, err := GetFenceProxyMap(zkConn, productName) if err != nil { return errors.Trace(err) } for _, proxy := range proxies { delete(fenceProxies, proxy.Addr) } if len(fenceProxies) > 0 { errMsg := bytes.NewBufferString("Some proxies may not stop cleanly:") for k, _ := range fenceProxies { errMsg.WriteString(" ") errMsg.WriteString(k) } return errors.New(errMsg.String()) } } for _, p := range proxies { buf, err := json.Marshal(p) if err != nil { return errors.Trace(err) } action.Receivers = append(action.Receivers, string(buf)) } b, _ := json.Marshal(action) prefix := GetWatchActionPath(productName) //action root path err = CreateActionRootPath(zkConn, prefix) if err != nil { return errors.Trace(err) } //response path respPath := path.Join(path.Dir(prefix), "ActionResponse") err = CreateActionRootPath(zkConn, respPath) if err != nil { return errors.Trace(err) } //create response node, etcd do not support create in order directory //get path first actionRespPath, err := zkConn.Create(respPath+"/", b, int32(zk.FlagSequence), zkhelper.DefaultFileACLs()) if err != nil { log.Error(err, respPath) return errors.Trace(err) } //remove file then create directory zkConn.Delete(actionRespPath, -1) actionRespPath, err = zkConn.Create(actionRespPath, b, 0, zkhelper.DefaultDirACLs()) if err != nil { log.Error(err, respPath) return errors.Trace(err) } suffix := path.Base(actionRespPath) // create action node actionPath := path.Join(prefix, suffix) _, err = zkConn.Create(actionPath, b, 0, zkhelper.DefaultFileACLs()) if err != nil { log.Error(err, actionPath) return errors.Trace(err) } if needConfirm { if err := WaitForReceiverWithTimeout(zkConn, productName, actionRespPath, proxies, timeoutInMs); err != nil { return errors.Trace(err) } } return nil }