func (self *ServerGroup) RemoveServer(zkConn zkhelper.Conn, addr string) error { zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d/%s", self.ProductName, self.Id, addr) data, _, err := zkConn.Get(zkPath) if err != nil { return errors.Trace(err) } var s Server err = json.Unmarshal(data, &s) if err != nil { return errors.Trace(err) } log.Info(s) if s.Type == SERVER_TYPE_MASTER { return errors.Errorf("cannot remove master, use promote first") } err = zkConn.Delete(zkPath, -1) if err != nil { return errors.Trace(err) } // update server list for i := 0; i < len(self.Servers); i++ { if self.Servers[i].Addr == s.Addr { self.Servers = append(self.Servers[:i], self.Servers[i+1:]...) break } } // remove slave won't need proxy confirm err = NewAction(zkConn, self.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, self, "", false) return errors.Trace(err) }
func GetActionSeqList(zkConn zkhelper.Conn, productName string) ([]int, error) { nodes, _, err := zkConn.Children(GetWatchActionPath(productName)) if err != nil { return nil, errors.Trace(err) } return ExtraSeqList(nodes) }
func ServerGroups(zkConn zkhelper.Conn, productName string) ([]*ServerGroup, error) { var ret []*ServerGroup root := fmt.Sprintf("/zk/codis/db_%s/servers", productName) groups, _, err := zkConn.Children(root) if err != nil { return nil, errors.Trace(err) } // Buggy :X //zkhelper.ChildrenRecursive(*zkConn, root) for _, group := range groups { // parse group_1 => 1 groupId, err := strconv.Atoi(strings.Split(group, "_")[1]) if err != nil { return nil, errors.Trace(err) } g, err := GetGroup(zkConn, productName, groupId) if err != nil { return nil, errors.Trace(err) } ret = append(ret, g) } return ret, nil }
func WaitForReceiverWithTimeout(zkConn zkhelper.Conn, productName string, actionZkPath string, proxies []ProxyInfo, timeoutInMs int) error { if len(proxies) == 0 { return nil } times := 0 proxyIds := make(map[string]struct{}) var offlineProxyIds []string for _, p := range proxies { proxyIds[p.Id] = struct{}{} } checkTimes := timeoutInMs / 500 // check every 500ms for times < checkTimes { if times >= 6 && (times*500)%1000 == 0 { log.Warnf("abnormal waiting time for receivers: %s %v", actionZkPath, offlineProxyIds) } // get confirm ids nodes, _, err := zkConn.Children(actionZkPath) if err != nil { return errors.Trace(err) } confirmIds := make(map[string]struct{}) for _, node := range nodes { id := path.Base(node) confirmIds[id] = struct{}{} } if len(confirmIds) != 0 { match := true // check if all proxy have responsed var notMatchList []string for id, _ := range proxyIds { // if proxy id not in confirm ids, means someone didn't response if _, ok := confirmIds[id]; !ok { match = false notMatchList = append(notMatchList, id) } } if match { return nil } offlineProxyIds = notMatchList } times += 1 time.Sleep(500 * time.Millisecond) } if len(offlineProxyIds) > 0 { log.Errorf("proxies didn't responed: %v", offlineProxyIds) } // set offline proxies for _, id := range offlineProxyIds { log.Errorf("mark proxy %s to PROXY_STATE_MARK_OFFLINE", id) if err := SetProxyStatus(zkConn, productName, id, PROXY_STATE_MARK_OFFLINE); err != nil { return errors.Trace(err) } } return errors.Trace(ErrReceiverTimeout) }
func waitForProxyMarkOffline(zkConn zkhelper.Conn, proxyName string) { _, _, c, _ := zkConn.GetW(path.Join(GetProxyPath(productName), proxyName)) <-c info, _ := GetProxyInfo(zkConn, productName, proxyName) if info.State == PROXY_STATE_MARK_OFFLINE { SetProxyStatus(zkConn, productName, proxyName, PROXY_STATE_OFFLINE) } }
func GroupExists(zkConn zkhelper.Conn, productName string, groupId int) (bool, error) { zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d", productName, groupId) exists, _, err := zkConn.Exists(zkPath) if err != nil { return false, errors.Trace(err) } return exists, nil }
func CreateProxyInfo(zkConn zkhelper.Conn, productName string, pi *ProxyInfo) (string, error) { data, err := json.Marshal(pi) if err != nil { return "", errors.Trace(err) } dir := GetProxyPath(productName) zkhelper.CreateRecursive(zkConn, dir, "", 0, zkhelper.DefaultDirACLs()) return zkConn.Create(path.Join(dir, pi.Id), data, zk.FlagEphemeral, zkhelper.DefaultFileACLs()) }
func doResponseForTest(conn zkhelper.Conn, seq string, pi *ProxyInfo) error { actionPath := GetActionResponsePath(productName) + "/" + seq data, err := json.Marshal(pi) if err != nil { return errors.Trace(err) } _, err = conn.Create(path.Join(actionPath, pi.Id), data, 0, zkhelper.DefaultFileACLs()) return err }
func GetServer(zkConn zkhelper.Conn, zkPath string) (*Server, error) { data, _, err := zkConn.Get(zkPath) if err != nil { return nil, errors.Trace(err) } srv := Server{} if err := json.Unmarshal(data, &srv); err != nil { return nil, errors.Trace(err) } return &srv, nil }
func GetActionObject(zkConn zkhelper.Conn, productName string, seq int64, act interface{}, provider string) error { data, _, err := zkConn.Get(path.Join(GetWatchActionPath(productName), zkConn.Seq2Str(seq))) if err != nil { return errors.Trace(err) } if err := json.Unmarshal(data, act); err != nil { return errors.Trace(err) } return nil }
func CreateOrUpdate(zconn zkhelper.Conn, zkPath, value string, flags int, aclv []topo.ACL, recursive bool) (pathCreated string, err error) { if recursive { pathCreated, err = CreateRecursive(zconn, zkPath, value, flags, aclv) } else { pathCreated, err = zconn.Create(zkPath, []byte(value), int32(flags), aclv) } if err != nil && zkhelper.ZkErrorEqual(err, topo.ErrNodeExists) { pathCreated = "" _, err = zconn.Set(zkPath, []byte(value), -1) } return }
func GetActionWithSeq(zkConn zkhelper.Conn, productName string, seq int64, provider string) (*Action, error) { var act Action data, _, err := zkConn.Get(path.Join(GetWatchActionPath(productName), zkConn.Seq2Str(seq))) if err != nil { return nil, errors.Trace(err) } if err := json.Unmarshal(data, &act); err != nil { return nil, errors.Trace(err) } return &act, nil }
func GetSlot(zkConn zkhelper.Conn, productName string, id int) (*Slot, error) { zkPath := GetSlotPath(productName, id) data, _, err := zkConn.Get(zkPath) if err != nil { return nil, errors.Trace(err) } var slot Slot if err := json.Unmarshal(data, &slot); err != nil { return nil, errors.Trace(err) } return &slot, nil }
func GetProxyInfo(zkConn zkhelper.Conn, productName string, proxyName string) (*ProxyInfo, error) { var pi ProxyInfo data, _, err := zkConn.Get(path.Join(GetProxyPath(productName), proxyName)) if err != nil { return nil, errors.Trace(err) } if err := json.Unmarshal(data, &pi); err != nil { return nil, errors.Trace(err) } return &pi, nil }
func GetFenceProxyMap(zkConn zkhelper.Conn, productName string) (map[string]bool, error) { children, _, err := zkConn.Children(GetProxyFencePath(productName)) if err != nil { if err.Error() == zk.ErrNoNode.Error() { return make(map[string]bool), nil } else { return nil, err } } m := make(map[string]bool, len(children)) for _, fenceNode := range children { m[fenceNode] = true } return m, nil }
func (self *ServerGroup) GetServers(zkConn zkhelper.Conn) ([]*Server, error) { var ret []*Server root := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d", self.ProductName, self.Id) nodes, _, err := zkConn.Children(root) if err != nil { return nil, errors.Trace(err) } for _, node := range nodes { nodePath := root + "/" + node s, err := GetServer(zkConn, nodePath) if err != nil { return nil, errors.Trace(err) } ret = append(ret, s) } return ret, nil }
func ForceRemoveLock(zkConn zkhelper.Conn, productName string) error { lockPath := fmt.Sprintf("/zk/codis/db_%s/LOCK", productName) children, _, err := zkConn.Children(lockPath) if err != nil && !zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { return errors.Trace(err) } for _, c := range children { fullPath := path.Join(lockPath, c) log.Info("deleting..", fullPath) err := zkConn.Delete(fullPath, 0) if err != nil { return errors.Trace(err) } } return nil }
func WaitForReceiverWithTimeout(zkConn zkhelper.Conn, productName string, actionZkPath string, proxies []ProxyInfo, timeoutInMs int) error { if len(proxies) == 0 { return nil } times := 0 proxyIds := make(map[string]bool) for _, p := range proxies { proxyIds[p.Id] = true } // check every 500ms for times < timeoutInMs/500 { if times >= 6 && (times*500)%1000 == 0 { log.Warnf("abnormal waiting time for receivers: %s %v", actionZkPath, proxyIds) } // get confirm ids nodes, _, err := zkConn.Children(actionZkPath) if err != nil { return errors.Trace(err) } for _, node := range nodes { id := path.Base(node) delete(proxyIds, id) } if len(proxyIds) == 0 { return nil } times++ time.Sleep(500 * time.Millisecond) } log.Warn("proxies didn't responed: ", proxyIds) // set offline proxies /* for id, _ := range proxyIds { log.Errorf("mark proxy %s to PROXY_STATE_MARK_OFFLINE", id) if err := SetProxyStatus(zkConn, productName, id, PROXY_STATE_MARK_OFFLINE); err != nil { return errors.Trace(err) } } */ return ErrReceiverTimeout }
func ProxyList(zkConn zkhelper.Conn, productName string, filter func(*ProxyInfo) bool) ([]ProxyInfo, error) { ret := make([]ProxyInfo, 0) root := GetProxyPath(productName) proxies, _, err := zkConn.Children(root) if err != nil && !zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { return nil, errors.Trace(err) } for _, proxyName := range proxies { pi, err := GetProxyInfo(zkConn, productName, proxyName) if err != nil { return nil, errors.Trace(err) } if filter == nil || filter(pi) == true { ret = append(ret, *pi) } } return ret, nil }
func Slots(zkConn zkhelper.Conn, productName string) ([]*Slot, error) { zkPath := GetSlotBasePath(productName) children, _, err := zkConn.Children(zkPath) if err != nil { return nil, errors.Trace(err) } var slots []*Slot for _, p := range children { data, _, err := zkConn.Get(path.Join(zkPath, p)) if err != nil { return nil, errors.Trace(err) } slot := &Slot{} if err := json.Unmarshal(data, &slot); err != nil { return nil, errors.Trace(err) } slots = append(slots, slot) } return slots, nil }
// Create a path and any pieces required, think mkdir -p. // Intermediate znodes are always created empty. func CreateRecursive(zconn zkhelper.Conn, zkPath, value string, flags int, aclv []topo.ACL) (pathCreated string, err error) { parts := strings.Split(zkPath, "/") if parts[1] != zkhelper.MagicPrefix { return "", fmt.Errorf("zkutil: non /%v path: %v", zkhelper.MagicPrefix, zkPath) } pathCreated, err = zconn.Create(zkPath, []byte(value), int32(flags), aclv) if zkhelper.ZkErrorEqual(err, topo.ErrNoNode) { // Make sure that nodes are either "file" or "directory" to mirror file system // semantics. dirAclv := make([]topo.ACL, len(aclv)) for i, acl := range aclv { dirAclv[i] = acl dirAclv[i].Perms = zkhelper.PERM_DIRECTORY } _, err = CreateRecursive(zconn, os_path.Dir(zkPath), "", 0, dirAclv) if err != nil && !zkhelper.ZkErrorEqual(err, topo.ErrNodeExists) { return "", err } pathCreated, err = zconn.Create(zkPath, []byte(value), int32(flags), aclv) } return }
func NewActionWithTimeout(zkConn zkhelper.Conn, productName string, actionType ActionType, target interface{}, desc string, needConfirm bool, timeoutInMs int) error { ts := strconv.FormatInt(time.Now().Unix(), 10) action := &Action{ Type: actionType, Desc: desc, Target: target, Ts: ts, } // set action receivers proxies, err := ProxyList(zkConn, productName, func(p *ProxyInfo) bool { return p.State == PROXY_STATE_ONLINE }) if err != nil { return errors.Trace(err) } if needConfirm { // do fencing here, make sure 'offline' proxies are really offline // now we only check whether the proxy lists are match fenceProxies, err := GetFenceProxyMap(zkConn, productName) if err != nil { return errors.Trace(err) } for _, proxy := range proxies { delete(fenceProxies, proxy.Addr) } if len(fenceProxies) > 0 { errMsg := bytes.NewBufferString("Some proxies may not stop cleanly:") for k, _ := range fenceProxies { errMsg.WriteString(" ") errMsg.WriteString(k) } return errors.Errorf("%s", errMsg) } } for _, p := range proxies { buf, err := json.Marshal(p) if err != nil { return errors.Trace(err) } action.Receivers = append(action.Receivers, string(buf)) } b, _ := json.Marshal(action) prefix := GetWatchActionPath(productName) //action root path err = CreateActionRootPath(zkConn, prefix) if err != nil { return errors.Trace(err) } //response path respPath := path.Join(path.Dir(prefix), "ActionResponse") err = CreateActionRootPath(zkConn, respPath) if err != nil { return errors.Trace(err) } //create response node, etcd do not support create in order directory //get path first actionRespPath, err := zkConn.Create(respPath+"/", b, int32(zk.FlagSequence), zkhelper.DefaultFileACLs()) if err != nil { log.ErrorErrorf(err, "zk create resp node = %s", respPath) return errors.Trace(err) } //remove file then create directory zkConn.Delete(actionRespPath, -1) actionRespPath, err = zkConn.Create(actionRespPath, b, 0, zkhelper.DefaultDirACLs()) if err != nil { log.ErrorErrorf(err, "zk create resp node = %s", respPath) return errors.Trace(err) } suffix := path.Base(actionRespPath) // create action node actionPath := path.Join(prefix, suffix) _, err = zkConn.Create(actionPath, b, 0, zkhelper.DefaultFileACLs()) if err != nil { log.ErrorErrorf(err, "zk create action path = %s", actionPath) return errors.Trace(err) } if needConfirm { if err := WaitForReceiverWithTimeout(zkConn, productName, actionRespPath, proxies, timeoutInMs); err != nil { return errors.Trace(err) } } return nil }
func SetProxyStatus(zkConn zkhelper.Conn, productName string, proxyName string, status string) error { p, err := GetProxyInfo(zkConn, productName, proxyName) if err != nil { return errors.Trace(err) } if status != PROXY_STATE_ONLINE && status != PROXY_STATE_MARK_OFFLINE && status != PROXY_STATE_OFFLINE { return errors.Errorf("%v, %s", ErrUnknownProxyStatus, status) } // check slot status before setting proxy online if status == PROXY_STATE_ONLINE { slots, err := Slots(zkConn, productName) if err != nil { return errors.Trace(err) } for _, slot := range slots { if slot.State.Status != SLOT_STATUS_ONLINE && slot.State.Status != SLOT_STATUS_MIGRATE { return errors.Errorf("slot %v is not online or migrate", slot) } if slot.GroupId == INVALID_ID { return errors.Errorf("slot %v has invalid group id", slot) } } } p.State = status b, _ := json.Marshal(p) _, err = zkConn.Set(path.Join(GetProxyPath(productName), proxyName), b, -1) if err != nil { return errors.Trace(err) } if status == PROXY_STATE_MARK_OFFLINE { // wait for the proxy down for { _, _, c, err := zkConn.GetW(path.Join(GetProxyPath(productName), proxyName)) if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { return nil } else if err != nil { return errors.Trace(err) } <-c info, err := GetProxyInfo(zkConn, productName, proxyName) log.Info("mark_offline, check proxy status:", proxyName, info, err) if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { log.Info("shutdown proxy successful") return nil } else if err != nil { return errors.Trace(err) } if info.State == PROXY_STATE_OFFLINE { log.Infof("proxy: %s offline success!", proxyName) return nil } } } return nil }
func ActionGC(zkConn zkhelper.Conn, productName string, gcType int, keep int) error { prefix := GetWatchActionPath(productName) respPrefix := GetActionResponsePath(productName) exists, err := zkhelper.NodeExists(zkConn, prefix) if err != nil { return errors.Trace(err) } if !exists { // if action path not exists just return nil return nil } actions, _, err := zkConn.Children(prefix) if err != nil { return errors.Trace(err) } var act Action currentTs := time.Now().Unix() if gcType == GC_TYPE_N { sort.Strings(actions) // keep last 500 actions if len(actions)-500 <= keep { return nil } for _, action := range actions[:len(actions)-keep-500] { if err := zkhelper.DeleteRecursive(zkConn, path.Join(prefix, action), -1); err != nil { return errors.Trace(err) } err := zkhelper.DeleteRecursive(zkConn, path.Join(respPrefix, action), -1) if err != nil && !zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { return errors.Trace(err) } } } else if gcType == GC_TYPE_SEC { secs := keep for _, action := range actions { b, _, err := zkConn.Get(path.Join(prefix, action)) if err != nil { return errors.Trace(err) } if err := json.Unmarshal(b, &act); err != nil { return errors.Trace(err) } log.Infof("action = %s, timestamp = %s", action, act.Ts) ts, _ := strconv.ParseInt(act.Ts, 10, 64) if currentTs-ts > int64(secs) { if err := zkhelper.DeleteRecursive(zkConn, path.Join(prefix, action), -1); err != nil { return errors.Trace(err) } err := zkhelper.DeleteRecursive(zkConn, path.Join(respPrefix, action), -1) if err != nil && !zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { return errors.Trace(err) } } } actionResps, _, err := zkConn.Children(respPrefix) if err != nil { return errors.Trace(err) } for _, action := range actionResps { b, _, err := zkConn.Get(path.Join(respPrefix, action)) if err != nil { return errors.Trace(err) } if err := json.Unmarshal(b, &act); err != nil { return errors.Trace(err) } log.Infof("action = %s, timestamp = %s", action, act.Ts) ts, _ := strconv.ParseInt(act.Ts, 10, 64) if currentTs-ts > int64(secs) { if err := zkhelper.DeleteRecursive(zkConn, path.Join(respPrefix, action), -1); err != nil { return errors.Trace(err) } } } } return nil }