func ServerGroups(coordConn zkhelper.Conn, productName string) ([]*ServerGroup, error) { var ret []*ServerGroup root := fmt.Sprintf("/zk/reborn/db_%s/servers", productName) groups, _, err := coordConn.Children(root) // if ErrNoNode, we may return an empty slice like ProxyList if err != nil && !zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { return nil, errors.Trace(err) } // Buggy :X //zkhelper.ChildrenRecursive(*coordConn, root) for _, group := range groups { // parse group_1 => 1 groupId, err := strconv.Atoi(strings.Split(group, "_")[1]) if err != nil { return nil, errors.Trace(err) } g, err := GetGroup(coordConn, productName, groupId) if err != nil { return nil, errors.Trace(err) } ret = append(ret, g) } return ret, nil }
func WaitForReceiverWithTimeout(zkConn zkhelper.Conn, productName string, actionZkPath string, proxies []ProxyInfo, timeoutInMs int) error { if len(proxies) == 0 { return nil } times := 0 proxyIds := make(map[string]struct{}) var offlineProxyIds []string for _, p := range proxies { proxyIds[p.Id] = struct{}{} } checkTimes := timeoutInMs / 500 // check every 500ms for times < checkTimes { if times >= 6 && (times*500)%1000 == 0 { log.Warning("abnormal waiting time for receivers", actionZkPath, offlineProxyIds) } // get confirm ids nodes, _, err := zkConn.Children(actionZkPath) if err != nil { return errors.Trace(err) } confirmIds := make(map[string]struct{}) for _, node := range nodes { id := path.Base(node) confirmIds[id] = struct{}{} } if len(confirmIds) != 0 { match := true // check if all proxy have responsed var notMatchList []string for id, _ := range proxyIds { // if proxy id not in confirm ids, means someone didn't response if _, ok := confirmIds[id]; !ok { match = false notMatchList = append(notMatchList, id) } } if match { return nil } offlineProxyIds = notMatchList } times += 1 time.Sleep(500 * time.Millisecond) } if len(offlineProxyIds) > 0 { log.Error("proxies didn't responed: ", offlineProxyIds) } // set offline proxies for _, id := range offlineProxyIds { log.Errorf("mark proxy %s to PROXY_STATE_MARK_OFFLINE", id) if err := SetProxyStatus(zkConn, productName, id, PROXY_STATE_MARK_OFFLINE); err != nil { return errors.Trace(err) } } return errors.Trace(ErrReceiverTimeout) }
func (sg *ServerGroup) RemoveServer(coordConn zkhelper.Conn, addr string) error { coordPath := fmt.Sprintf("/zk/reborn/db_%s/servers/group_%d/%s", sg.ProductName, sg.Id, addr) data, _, err := coordConn.Get(coordPath) if err != nil { return errors.Trace(err) } var s Server err = json.Unmarshal(data, &s) if err != nil { return errors.Trace(err) } log.Info(s) if s.Type == SERVER_TYPE_MASTER { return errors.New("cannot remove master, use promote first") } err = coordConn.Delete(coordPath, -1) if err != nil { return errors.Trace(err) } // update server list for i := 0; i < len(sg.Servers); i++ { if sg.Servers[i].Addr == s.Addr { sg.Servers = append(sg.Servers[:i], sg.Servers[i+1:]...) break } } // remove slave won't need proxy confirm err = NewAction(coordConn, sg.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, sg, "", false) return errors.Trace(err) }
func ServerGroups(zkConn zkhelper.Conn, productName string) ([]ServerGroup, error) { var ret []ServerGroup root := fmt.Sprintf("/zk/codis/db_%s/servers", productName) groups, _, err := zkConn.Children(root) if err != nil { return nil, errors.Trace(err) } // Buggy :X //zkhelper.ChildrenRecursive(*zkConn, root) for _, group := range groups { // parse group_1 => 1 groupId, err := strconv.Atoi(strings.Split(group, "_")[1]) if err != nil { return nil, errors.Trace(err) } g, err := GetGroup(zkConn, productName, groupId) if err != nil { return nil, errors.Trace(err) } ret = append(ret, *g) } return ret, nil }
func waitForProxyMarkOffline(zkConn zkhelper.Conn, proxyName string) { _, _, c, _ := zkConn.GetW(path.Join(GetProxyPath(productName), proxyName)) <-c info, _ := GetProxyInfo(zkConn, productName, proxyName) if info.State == PROXY_STATE_MARK_OFFLINE { SetProxyStatus(zkConn, productName, proxyName, PROXY_STATE_OFFLINE) } }
func GetActionSeqList(zkConn zkhelper.Conn, productName string) ([]int, error) { nodes, _, err := zkConn.Children(GetWatchActionPath(productName)) if err != nil { return nil, errors.Trace(err) } return ExtraSeqList(nodes) }
func GroupExists(zkConn zkhelper.Conn, productName string, groupId int) (bool, error) { zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d", productName, groupId) exists, _, err := zkConn.Exists(zkPath) if err != nil { return false, errors.Trace(err) } return exists, nil }
func releaseDashboardNode(conn zkhelper.Conn) { coordPath := fmt.Sprintf("/zk/reborn/db_%s/dashboard", globalEnv.ProductName()) if exists, _, _ := conn.Exists(coordPath); exists { log.Info("removing dashboard node") conn.Delete(coordPath, 0) } }
func CreateProxyInfo(zkConn zkhelper.Conn, productName string, pi *ProxyInfo) (string, error) { data, err := json.Marshal(pi) if err != nil { return "", errors.Trace(err) } dir := GetProxyPath(productName) zkhelper.CreateRecursive(zkConn, dir, "", 0, zkhelper.DefaultDirACLs()) return zkConn.Create(path.Join(dir, pi.Id), data, zk.FlagEphemeral, zkhelper.DefaultFileACLs()) }
func GetServer(zkConn zkhelper.Conn, zkPath string) (*Server, error) { data, _, err := zkConn.Get(zkPath) if err != nil { return nil, errors.Trace(err) } srv := Server{} if err := json.Unmarshal(data, &srv); err != nil { return nil, errors.Trace(err) } return &srv, nil }
func waitForProxyMarkOffline(coordConn zkhelper.Conn, proxyName string) { _, _, c, _ := coordConn.GetW(path.Join(GetProxyPath(productName), proxyName)) <-c // test action need response, if proxy not responsed, then marked offline info, _ := GetProxyInfo(coordConn, productName, proxyName) if info.State == PROXY_STATE_MARK_OFFLINE { SetProxyStatus(coordConn, productName, proxyName, PROXY_STATE_OFFLINE) } }
// GetLeaderAddr gets the leader tso address in zookeeper for outer use. func GetLeader(conn zkhelper.Conn, rootPath string) (string, error) { data, _, err := conn.Get(getLeaderPath(rootPath)) if err != nil { return "", errors.Trace(err) } // if err != checkLeaderExists(conn); err != nil { // return "", errors.Trace(err) // } return getLeader(data) }
func GetSlot(zkConn zkhelper.Conn, productName string, id int) (*Slot, error) { zkPath := GetSlotPath(productName, id) data, _, err := zkConn.Get(zkPath) if err != nil { return nil, err } slot := Slot{} if err := json.Unmarshal(data, &slot); err != nil { return nil, err } return &slot, nil }
func loadTimestamp(conn zkhelper.Conn, rootPath string) (int64, error) { data, _, err := conn.Get(getTimestampPath(rootPath)) if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { return 0, zk.ErrNoNode } else if err != nil { return 0, errors.Trace(err) } else if len(data) != 8 { return 0, errors.Errorf("invalid timestamp data, must 8 bytes, but %d", len(data)) } return int64(binary.BigEndian.Uint64(data)), nil }
func GetActionObject(zkConn zkhelper.Conn, productName string, seq int64, act interface{}) error { data, _, err := zkConn.Get(path.Join(GetWatchActionPath(productName), "action_"+fmt.Sprintf("%0.10d", seq))) if err != nil { return errors.Trace(err) } if err := json.Unmarshal(data, act); err != nil { return errors.Trace(err) } return nil }
func GetActionObject(zkConn zkhelper.Conn, productName string, seq int64, act interface{}, provider string) error { data, _, err := zkConn.Get(path.Join(GetWatchActionPath(productName), zkConn.Seq2Str(seq))) if err != nil { return errors.Trace(err) } if err := json.Unmarshal(data, act); err != nil { return errors.Trace(err) } return nil }
func GetProxyInfo(zkConn zkhelper.Conn, productName string, proxyName string) (*ProxyInfo, error) { var pi ProxyInfo data, _, err := zkConn.Get(path.Join(GetProxyPath(productName), proxyName)) if err != nil { return nil, errors.Trace(err) } if err := json.Unmarshal(data, &pi); err != nil { return nil, errors.Trace(err) } return &pi, nil }
func saveTimestamp(conn zkhelper.Conn, rootPath string, ts int64) error { var buf [8]byte binary.BigEndian.PutUint64(buf[:], uint64(ts)) tsPath := getTimestampPath(rootPath) _, err := conn.Set(tsPath, buf[:], -1) if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { _, err = conn.Create(tsPath, buf[:], 0, zk.WorldACL(zkhelper.PERM_FILE)) } return errors.Trace(err) }
func GetActionWithSeq(zkConn zkhelper.Conn, productName string, seq int64) (*Action, error) { var act Action data, _, err := zkConn.Get(path.Join(GetWatchActionPath(productName), "action_"+fmt.Sprintf("%0.10d", seq))) if err != nil { return nil, errors.Trace(err) } if err := json.Unmarshal(data, &act); err != nil { return nil, errors.Trace(err) } return &act, nil }
func GetActionWithSeq(zkConn zkhelper.Conn, productName string, seq int64, provider string) (*Action, error) { var act Action data, _, err := zkConn.Get(path.Join(GetWatchActionPath(productName), zkConn.Seq2Str(seq))) if err != nil { return nil, errors.Trace(err) } if err := json.Unmarshal(data, &act); err != nil { return nil, errors.Trace(err) } return &act, nil }
func GetSlot(coordConn zkhelper.Conn, productName string, id int) (*Slot, error) { coordPath := GetSlotPath(productName, id) data, _, err := coordConn.Get(coordPath) if err != nil { return nil, err } var slot Slot if err := json.Unmarshal(data, &slot); err != nil { return nil, err } return &slot, nil }
func WaitForReceiver(zkConn zkhelper.Conn, productName string, actionZkPath string, proxies []ProxyInfo) error { if len(proxies) == 0 { return nil } times := 0 var proxyIds []string var offlineProxyIds []string for _, p := range proxies { proxyIds = append(proxyIds, p.Id) } sort.Strings(proxyIds) // check every 500ms for times < 60 { if times >= 6 && (times*500)%1000 == 0 { log.Warning("abnormal waiting time for receivers", actionZkPath) } nodes, _, err := zkConn.Children(actionZkPath) if err != nil { return errors.Trace(err) } var confirmIds []string for _, node := range nodes { id := path.Base(node) confirmIds = append(confirmIds, id) } if len(confirmIds) != 0 { sort.Strings(confirmIds) if utils.Strings(proxyIds).Eq(confirmIds) { return nil } offlineProxyIds = proxyIds[len(confirmIds)-1:] } times += 1 time.Sleep(500 * time.Millisecond) } if len(offlineProxyIds) > 0 { log.Error("proxies didn't responed: ", offlineProxyIds) } // set offline proxies for _, id := range offlineProxyIds { log.Errorf("mark proxy %s to PROXY_STATE_MARK_OFFLINE", id) if err := SetProxyStatus(zkConn, productName, id, PROXY_STATE_MARK_OFFLINE); err != nil { return err } } return ErrReceiverTimeout }
func GetFenceProxyMap(zkConn zkhelper.Conn, productName string) (map[string]bool, error) { children, _, err := zkConn.Children(GetProxyFencePath(productName)) if err != nil { if err.Error() == zk.ErrNoNode.Error() { return make(map[string]bool), nil } else { return nil, err } } m := make(map[string]bool, len(children)) for _, fenceNode := range children { m[fenceNode] = true } return m, nil }
// GetWatchLeader gets the leader tso address in zookeeper and returns a watcher for leader change. func GetWatchLeader(conn zkhelper.Conn, rootPath string) (string, <-chan zk.Event, error) { data, _, watcher, err := conn.GetW(getLeaderPath(rootPath)) if err != nil { return "", nil, errors.Trace(err) } addr, err := getLeader(data) if err != nil { return "", nil, errors.Trace(err) } // if err != checkLeaderExists(conn); err != nil { // return "", errors.Trace(err) // } return addr, watcher, nil }
func NewAction(zkConn zkhelper.Conn, productName string, actionType ActionType, target interface{}, desc string, needConfirm bool) error { ts := strconv.FormatInt(time.Now().Unix(), 10) action := &Action{ Type: actionType, Desc: desc, Target: target, Ts: ts, } // set action receivers proxies, err := ProxyList(zkConn, productName, func(p *ProxyInfo) bool { return p.State == PROXY_STATE_ONLINE }) if err != nil { return errors.Trace(err) } for _, p := range proxies { action.Receivers = append(action.Receivers, p.Id) } b, _ := json.Marshal(action) prefix := GetWatchActionPath(productName) err = CreateActionRootPath(zkConn, prefix) if err != nil { return errors.Trace(err) } // create action node actionCreated, err := zkConn.Create(prefix+"/action_", b, int32(zk.FlagSequence), zkhelper.DefaultDirACLs()) if err != nil { log.Error(err, prefix) return errors.Trace(err) } if needConfirm { if err := WaitForReceiver(zkConn, productName, actionCreated, proxies); err != nil { return errors.Trace(err) } } return nil }
func (sg *ServerGroup) GetServers(coordConn zkhelper.Conn) ([]*Server, error) { var ret []*Server root := fmt.Sprintf("/zk/reborn/db_%s/servers/group_%d", sg.ProductName, sg.Id) nodes, _, err := coordConn.Children(root) if err != nil { return nil, errors.Trace(err) } for _, node := range nodes { nodePath := root + "/" + node s, err := GetServer(coordConn, nodePath) if err != nil { return nil, errors.Trace(err) } ret = append(ret, s) } return ret, nil }
func (self *ServerGroup) GetServers(zkConn zkhelper.Conn) ([]Server, error) { var ret []Server root := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d", self.ProductName, self.Id) nodes, _, err := zkConn.Children(root) if err != nil { return nil, errors.Trace(err) } for _, node := range nodes { nodePath := root + "/" + node s, err := GetServer(zkConn, nodePath) if err != nil { return nil, errors.Trace(err) } ret = append(ret, *s) } return ret, nil }
func SetProxyStatus(zkConn zkhelper.Conn, productName string, proxyName string, status string) error { p, err := GetProxyInfo(zkConn, productName, proxyName) if err != nil { return errors.Trace(err) } if status != PROXY_STATE_ONLINE && status != PROXY_STATE_MARK_OFFLINE && status != PROXY_STATE_OFFLINE { return errors.Errorf("%v, %s", ErrUnknownProxyStatus, status) } p.State = status b, _ := json.Marshal(p) _, err = zkConn.Set(path.Join(GetProxyPath(productName), proxyName), b, -1) if err != nil { return errors.Trace(err) } if status == PROXY_STATE_MARK_OFFLINE { // wait for the proxy down for { _, _, c, err := zkConn.GetW(path.Join(GetProxyPath(productName), proxyName)) if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { return nil } else if err != nil { return errors.Trace(err) } <-c info, err := GetProxyInfo(zkConn, productName, proxyName) log.Info("mark_offline, check proxy status:", proxyName, info, err) if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { log.Info("shutdown proxy successful") return nil } else if err != nil { return errors.Trace(err) } if info.State == PROXY_STATE_OFFLINE { log.Info("proxy:", proxyName, "offline success!") return nil } } } return nil }
func ForceRemoveLock(coordConn zkhelper.Conn, productName string) error { lockPath := fmt.Sprintf("/zk/reborn/db_%s/LOCK", productName) children, _, err := coordConn.Children(lockPath) if err != nil && !zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { return errors.Trace(err) } for _, c := range children { fullPath := path.Join(lockPath, c) log.Info("deleting..", fullPath) err := coordConn.Delete(fullPath, 0) if err != nil { return errors.Trace(err) } } return nil }
func ForceRemoveLock(zkConn zkhelper.Conn, productName string) error { lockPath := fmt.Sprintf("/zk/codis/db_%s/LOCK", productName) children, _, err := zkConn.Children(lockPath) if err != nil { return errors.Trace(err) } for _, c := range children { fullPath := path.Join(lockPath, c) log.Info("deleting..", fullPath) err := zkConn.Delete(fullPath, 0) if err != nil { return errors.Trace(err) } } return nil }