func GetRedisStat(addr, passwd string) (map[string]string, error) { c, err := DialTo(addr, passwd) if err != nil { return nil, err } defer c.Close() ret, err := redis.String(c.Do("INFO")) if err != nil { return nil, errors.Trace(err) } m := make(map[string]string) lines := strings.Split(ret, "\n") for _, line := range lines { kv := strings.SplitN(line, ":", 2) if len(kv) == 2 { k, v := strings.TrimSpace(kv[0]), strings.TrimSpace(kv[1]) m[k] = v } } reply, err := redis.Strings(c.Do("config", "get", "maxmemory")) if err != nil { return nil, errors.Trace(err) } // we got result if len(reply) == 2 { if reply[1] != "0" { m["maxmemory"] = reply[1] } else { m["maxmemory"] = "∞" } } return m, nil }
func WaitForReceiverWithTimeout(zkConn zkhelper.Conn, productName string, actionZkPath string, proxies []ProxyInfo, timeoutInMs int) error { if len(proxies) == 0 { return nil } times := 0 proxyIds := make(map[string]struct{}) var offlineProxyIds []string for _, p := range proxies { proxyIds[p.Id] = struct{}{} } checkTimes := timeoutInMs / 500 // check every 500ms for times < checkTimes { if times >= 6 && (times*500)%1000 == 0 { log.Warnf("abnormal waiting time for receivers: %s %v", actionZkPath, offlineProxyIds) } // get confirm ids nodes, _, err := zkConn.Children(actionZkPath) if err != nil { return errors.Trace(err) } confirmIds := make(map[string]struct{}) for _, node := range nodes { id := path.Base(node) confirmIds[id] = struct{}{} } if len(confirmIds) != 0 { match := true // check if all proxy have responsed var notMatchList []string for id, _ := range proxyIds { // if proxy id not in confirm ids, means someone didn't response if _, ok := confirmIds[id]; !ok { match = false notMatchList = append(notMatchList, id) } } if match { return nil } offlineProxyIds = notMatchList } times += 1 time.Sleep(500 * time.Millisecond) } if len(offlineProxyIds) > 0 { log.Errorf("proxies didn't responed: %v", offlineProxyIds) } // set offline proxies for _, id := range offlineProxyIds { log.Errorf("mark proxy %s to PROXY_STATE_MARK_OFFLINE", id) if err := SetProxyStatus(zkConn, productName, id, PROXY_STATE_MARK_OFFLINE); err != nil { return errors.Trace(err) } } return errors.Trace(ErrReceiverTimeout) }
func (self *ServerGroup) RemoveServer(zkConn zkhelper.Conn, addr string) error { zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d/%s", self.ProductName, self.Id, addr) data, _, err := zkConn.Get(zkPath) if err != nil { return errors.Trace(err) } var s Server err = json.Unmarshal(data, &s) if err != nil { return errors.Trace(err) } log.Info(s) if s.Type == SERVER_TYPE_MASTER { return errors.Errorf("cannot remove master, use promote first") } err = zkConn.Delete(zkPath, -1) if err != nil { return errors.Trace(err) } // update server list for i := 0; i < len(self.Servers); i++ { if self.Servers[i].Addr == s.Addr { self.Servers = append(self.Servers[:i], self.Servers[i+1:]...) break } } // remove slave won't need proxy confirm err = NewAction(zkConn, self.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, self, "", false) return errors.Trace(err) }
func (d *Decoder) decodeResp(depth int) (*Resp, error) { b, err := d.ReadByte() if err != nil { return nil, errors.Trace(err) } switch t := RespType(b); t { case TypeString, TypeError, TypeInt: r := &Resp{Type: t} r.Value, err = d.decodeTextBytes() return r, err case TypeBulkBytes: r := &Resp{Type: t} r.Value, err = d.decodeBulkBytes() return r, err case TypeArray: r := &Resp{Type: t} r.Array, err = d.decodeArray(depth) return r, err default: if depth != 0 { return nil, errors.Errorf("bad resp type %s", t) } if err := d.UnreadByte(); err != nil { return nil, errors.Trace(err) } r := &Resp{Type: TypeArray} r.Array, err = d.decodeSingleLineBulkBytesArray() return r, err } }
// experimental simple auto rebalance :) func Rebalance() error { targetQuota, err := getQuotaMap(safeZkConn) if err != nil { return errors.Trace(err) } livingNodes, err := getLivingNodeInfos(safeZkConn) if err != nil { return errors.Trace(err) } log.Infof("start rebalance") for _, node := range livingNodes { for len(node.CurSlots) > targetQuota[node.GroupId] { for _, dest := range livingNodes { if dest.GroupId != node.GroupId && len(dest.CurSlots) < targetQuota[dest.GroupId] { slot := node.CurSlots[len(node.CurSlots)-1] // create a migration task info := &MigrateTaskInfo{ Delay: 0, SlotId: slot, NewGroupId: dest.GroupId, Status: MIGRATE_TASK_PENDING, CreateAt: strconv.FormatInt(time.Now().Unix(), 10), } globalMigrateManager.PostTask(info) node.CurSlots = node.CurSlots[0 : len(node.CurSlots)-1] dest.CurSlots = append(dest.CurSlots, slot) } } } } log.Infof("rebalance tasks submit finish") return nil }
func (top *Topology) GetSlotByIndex(i int) (*models.Slot, *models.ServerGroup, error) { var slot *models.Slot var groupServer *models.ServerGroup var err error for { slot, err = models.GetSlot(top.zkConn, top.ProductName, i) if err == nil || top.IsFatalErr(err) { break } else { time.Sleep(ZK_RECONNECT_INTERVAL * time.Second) } if err != nil { log.ErrorErrorf(err, " GetSlot ") } } if err != nil { return nil, nil, errors.Trace(err) } for { groupServer, err = models.GetGroup(top.zkConn, top.ProductName, slot.GroupId) if err == nil || top.IsFatalErr(err) { break } else { time.Sleep(ZK_RECONNECT_INTERVAL * time.Second) } if err != nil { log.ErrorErrorf(err, " GetGroup ") } } if err != nil { return nil, nil, errors.Trace(err) } return slot, groupServer, nil }
func ServerGroups(zkConn zkhelper.Conn, productName string) ([]*ServerGroup, error) { var ret []*ServerGroup root := fmt.Sprintf("/zk/codis/db_%s/servers", productName) groups, _, err := zkConn.Children(root) if err != nil { return nil, errors.Trace(err) } // Buggy :X //zkhelper.ChildrenRecursive(*zkConn, root) for _, group := range groups { // parse group_1 => 1 groupId, err := strconv.Atoi(strings.Split(group, "_")[1]) if err != nil { return nil, errors.Trace(err) } g, err := GetGroup(zkConn, productName, groupId) if err != nil { return nil, errors.Trace(err) } ret = append(ret, g) } return ret, nil }
func SlotsInfo(addr, passwd string, fromSlot, toSlot int) (map[int]int, error) { c, err := DialTo(addr, passwd) if err != nil { return nil, err } defer c.Close() infos, err := redis.Values(c.Do("SLOTSINFO", fromSlot, toSlot-fromSlot+1)) if err != nil { return nil, errors.Trace(err) } slots := make(map[int]int) if infos != nil { for i := 0; i < len(infos); i++ { info, err := redis.Values(infos[i], nil) if err != nil { return nil, errors.Trace(err) } var slotid, slotsize int if _, err := redis.Scan(info, &slotid, &slotsize); err != nil { return nil, errors.Trace(err) } else { slots[slotid] = slotsize } } } return slots, nil }
func ForceRemoveDeadFence(zkConn zkhelper.Conn, productName string) error { proxies, err := ProxyList(zkConn, productName, func(p *ProxyInfo) bool { return p.State == PROXY_STATE_ONLINE }) if err != nil { return errors.Trace(err) } fenceProxies, err := GetFenceProxyMap(zkConn, productName) if err != nil { return errors.Trace(err) } // remove online proxies's fence for _, proxy := range proxies { delete(fenceProxies, proxy.Addr) } // delete dead fence in zookeeper path := GetProxyFencePath(productName) for remainFence, _ := range fenceProxies { fencePath := filepath.Join(path, remainFence) log.Info("removing fence: ", fencePath) if err := zkhelper.DeleteRecursive(zkConn, fencePath, -1); err != nil { return errors.Trace(err) } } return nil }
func (p *ProxyInfo) Ops() (int64, error) { resp, err := http.Get("http://" + p.DebugVarAddr + "/debug/vars") if err != nil { return -1, errors.Trace(err) } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { return -1, errors.Trace(err) } m := make(map[string]interface{}) err = json.Unmarshal(body, &m) if err != nil { return -1, errors.Trace(err) } if v, ok := m["router"]; ok { if vv, ok := v.(map[string]interface{})["ops"]; ok { return int64(vv.(float64)), nil } } return 0, nil }
func (s *Slot) Update(zkConn zkhelper.Conn) error { // status validation switch s.State.Status { case SLOT_STATUS_MIGRATE, SLOT_STATUS_OFFLINE, SLOT_STATUS_ONLINE, SLOT_STATUS_PRE_MIGRATE: { // valid status, OK } default: { return errors.Trace(ErrUnknownSlotStatus) } } data, err := json.Marshal(s) if err != nil { return errors.Trace(err) } zkPath := GetSlotPath(s.ProductName, s.Id) _, err = zkhelper.CreateOrUpdate(zkConn, zkPath, string(data), 0, zkhelper.DefaultFileACLs(), true) if err != nil { return errors.Trace(err) } if s.State.Status == SLOT_STATUS_MIGRATE { err = NewAction(zkConn, s.ProductName, ACTION_TYPE_SLOT_MIGRATE, s, "", true) } else { err = NewAction(zkConn, s.ProductName, ACTION_TYPE_SLOT_CHANGED, s, "", true) } return errors.Trace(err) }
func (e *Encoder) encodeTextString(s string) error { if _, err := e.WriteString(s); err != nil { return errors.Trace(err) } if _, err := e.WriteString("\r\n"); err != nil { return errors.Trace(err) } return nil }
func (e *Encoder) encodeTextBytes(b []byte) error { if _, err := e.Write(b); err != nil { return errors.Trace(err) } if _, err := e.WriteString("\r\n"); err != nil { return errors.Trace(err) } return nil }
func (d *Decoder) decodeTextBytes() ([]byte, error) { b, err := d.ReadBytes('\n') if err != nil { return nil, errors.Trace(err) } if n := len(b) - 2; n < 0 || b[n] != '\r' { return nil, errors.Trace(ErrBadRespCRLFEnd) } else { return b[:n], nil } }
func GetServer(zkConn zkhelper.Conn, zkPath string) (*Server, error) { data, _, err := zkConn.Get(zkPath) if err != nil { return nil, errors.Trace(err) } srv := Server{} if err := json.Unmarshal(data, &srv); err != nil { return nil, errors.Trace(err) } return &srv, nil }
func GetActionWithSeq(zkConn zkhelper.Conn, productName string, seq int64, provider string) (*Action, error) { var act Action data, _, err := zkConn.Get(path.Join(GetWatchActionPath(productName), zkConn.Seq2Str(seq))) if err != nil { return nil, errors.Trace(err) } if err := json.Unmarshal(data, &act); err != nil { return nil, errors.Trace(err) } return &act, nil }
func GetActionObject(zkConn zkhelper.Conn, productName string, seq int64, act interface{}, provider string) error { data, _, err := zkConn.Get(path.Join(GetWatchActionPath(productName), zkConn.Seq2Str(seq))) if err != nil { return errors.Trace(err) } if err := json.Unmarshal(data, act); err != nil { return errors.Trace(err) } return nil }
func SlaveNoOne(addr string) error { c, err := redis.DialTimeout("tcp", addr, defaultTimeout, defaultTimeout, defaultTimeout) if err != nil { return errors.Trace(err) } defer c.Close() if _, err = c.Do("SLAVEOF", "NO", "ONE"); err != nil { return errors.Trace(err) } return nil }
func DialToTimeout(addr string, passwd string, readTimeout, writeTimeout time.Duration) (redis.Conn, error) { c, err := redis.DialTimeout("tcp", addr, time.Second, readTimeout, writeTimeout) if err != nil { return nil, errors.Trace(err) } if passwd != "" { if _, err := c.Do("AUTH", passwd); err != nil { c.Close() return nil, errors.Trace(err) } } return c, nil }
func GetSlot(zkConn zkhelper.Conn, productName string, id int) (*Slot, error) { zkPath := GetSlotPath(productName, id) data, _, err := zkConn.Get(zkPath) if err != nil { return nil, errors.Trace(err) } var slot Slot if err := json.Unmarshal(data, &slot); err != nil { return nil, errors.Trace(err) } return &slot, nil }
func (top *Topology) GetSlotByIndex(i int) (*models.Slot, *models.ServerGroup, error) { slot, err := models.GetSlot(top.zkConn, top.ProductName, i) if err != nil { return nil, nil, errors.Trace(err) } groupServer, err := models.GetGroup(top.zkConn, top.ProductName, slot.GroupId) if err != nil { return nil, nil, errors.Trace(err) } return slot, groupServer, nil }
func GetProxyInfo(zkConn zkhelper.Conn, productName string, proxyName string) (*ProxyInfo, error) { var pi ProxyInfo data, _, err := zkConn.Get(path.Join(GetProxyPath(productName), proxyName)) if err != nil { return nil, errors.Trace(err) } if err := json.Unmarshal(data, &pi); err != nil { return nil, errors.Trace(err) } return &pi, nil }
func CreateActionRootPath(zkConn zkhelper.Conn, path string) error { // if action dir not exists, create it first exists, err := zkhelper.NodeExists(zkConn, path) if err != nil { return errors.Trace(err) } if !exists { _, err := zkhelper.CreateOrUpdate(zkConn, path, "", 0, zkhelper.DefaultDirACLs(), true) if err != nil { return errors.Trace(err) } } return nil }
func getLivingNodeInfos(zkConn zkhelper.Conn) ([]*NodeInfo, error) { groups, err := models.ServerGroups(zkConn, globalEnv.ProductName()) if err != nil { return nil, errors.Trace(err) } slots, err := models.Slots(zkConn, globalEnv.ProductName()) slotMap := make(map[int][]int) for _, slot := range slots { if slot.State.Status == models.SLOT_STATUS_ONLINE { slotMap[slot.GroupId] = append(slotMap[slot.GroupId], slot.Id) } } var ret []*NodeInfo for _, g := range groups { master, err := g.Master(zkConn) if err != nil { return nil, errors.Trace(err) } if master == nil { return nil, errors.Errorf("group %d has no master", g.Id) } out, err := utils.GetRedisConfig(master.Addr, globalEnv.Password(), "maxmemory") if err != nil { return nil, errors.Trace(err) } maxMem, err := strconv.ParseInt(out, 10, 64) if err != nil { return nil, errors.Trace(err) } if maxMem <= 0 { return nil, errors.Errorf("redis %s should set maxmemory", master.Addr) } node := &NodeInfo{ GroupId: g.Id, CurSlots: slotMap[g.Id], MaxMemory: maxMem, } ret = append(ret, node) } cnt := 0 for _, info := range ret { cnt += len(info.CurSlots) } if cnt != models.DEFAULT_SLOT_NUM { return nil, errors.Errorf("not all slots are online") } return ret, nil }
func GetRedisConfig(addr string, configName string) (string, error) { c, err := redis.DialTimeout("tcp", addr, defaultTimeout, defaultTimeout, defaultTimeout) if err != nil { return "", errors.Trace(err) } defer c.Close() ret, err := redis.Strings(c.Do("config", "get", configName)) if err != nil { return "", errors.Trace(err) } if len(ret) == 2 { return ret[1], nil } return "", nil }
func DialTimeout(addr string, bufsize int, timeout time.Duration) (*Conn, error) { c, err := net.DialTimeout("tcp", addr, timeout) if err != nil { return nil, errors.Trace(err) } return NewConnSize(c, bufsize), nil }
func InitConfigFromFile(filename string) (*cfg.Cfg, error) { ret := cfg.NewCfg(filename) if err := ret.Load(); err != nil { return nil, errors.Trace(err) } return ret, nil }
func (self *ServerGroup) Create(zkConn zkhelper.Conn) error { if self.Id < 0 { return errors.Errorf("invalid server group id %d", self.Id) } zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d", self.ProductName, self.Id) _, err := zkhelper.CreateOrUpdate(zkConn, zkPath, "", 0, zkhelper.DefaultDirACLs(), true) if err != nil { return errors.Trace(err) } err = NewAction(zkConn, self.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, self, "", false) if err != nil { return errors.Trace(err) } return nil }
func btoi(b []byte) (int64, error) { if len(b) != 0 && len(b) < 10 { var neg, i = false, 0 switch b[0] { case '-': neg = true fallthrough case '+': i++ } if len(b) != i { var n int64 for ; i < len(b) && b[i] >= '0' && b[i] <= '9'; i++ { n = int64(b[i]-'0') + n*10 } if len(b) == i { if neg { n = -n } return n, nil } } } if n, err := strconv.ParseInt(string(b), 10, 64); err != nil { return 0, errors.Trace(err) } else { return n, nil } }
func GetActionSeqList(zkConn zkhelper.Conn, productName string) ([]int, error) { nodes, _, err := zkConn.Children(GetWatchActionPath(productName)) if err != nil { return nil, errors.Trace(err) } return ExtraSeqList(nodes) }