// experimental simple auto rebalance :) func Rebalance() error { targetQuota, err := getQuotaMap(safeZkConn) if err != nil { return errors.Trace(err) } livingNodes, err := getLivingNodeInfos(safeZkConn) if err != nil { return errors.Trace(err) } log.Infof("start rebalance") for _, node := range livingNodes { for len(node.CurSlots) > targetQuota[node.GroupId] { for _, dest := range livingNodes { if dest.GroupId != node.GroupId && len(dest.CurSlots) < targetQuota[dest.GroupId] { slot := node.CurSlots[len(node.CurSlots)-1] // create a migration task info := &MigrateTaskInfo{ Delay: 0, SlotId: slot, NewGroupId: dest.GroupId, Status: MIGRATE_TASK_PENDING, CreateAt: strconv.FormatInt(time.Now().Unix(), 10), } globalMigrateManager.PostTask(info) node.CurSlots = node.CurSlots[0 : len(node.CurSlots)-1] dest.CurSlots = append(dest.CurSlots, slot) } } } } log.Infof("rebalance tasks submit finish") return nil }
func (d *Decoder) decodeResp(depth int) (*Resp, error) { b, err := d.ReadByte() if err != nil { return nil, errors.Trace(err) } switch t := RespType(b); t { case TypeString, TypeError, TypeInt: r := &Resp{Type: t} r.Value, err = d.decodeTextBytes() return r, err case TypeBulkBytes: r := &Resp{Type: t} r.Value, err = d.decodeBulkBytes() return r, err case TypeArray: r := &Resp{Type: t} r.Array, err = d.decodeArray(depth) return r, err default: if depth != 0 { return nil, errors.Errorf("bad resp type %s", t) } if err := d.UnreadByte(); err != nil { return nil, errors.Trace(err) } r := &Resp{Type: TypeArray} r.Array, err = d.decodeSingleLineBulkBytesArray() return r, err } }
func (self *ServerGroup) RemoveServer(zkConn zkhelper.Conn, addr string) error { zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d/%s", self.ProductName, self.Id, addr) data, _, err := zkConn.Get(zkPath) if err != nil { return errors.Trace(err) } var s Server err = json.Unmarshal(data, &s) if err != nil { return errors.Trace(err) } log.Info(s) if s.Type == SERVER_TYPE_MASTER { return errors.Errorf("cannot remove master, use promote first") } err = zkConn.Delete(zkPath, -1) if err != nil { return errors.Trace(err) } // update server list for i := 0; i < len(self.Servers); i++ { if self.Servers[i].Addr == s.Addr { self.Servers = append(self.Servers[:i], self.Servers[i+1:]...) break } } // remove slave won't need proxy confirm err = NewAction(zkConn, self.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, self, "", false) return errors.Trace(err) }
func (p *ProxyInfo) Ops() (int64, error) { resp, err := http.Get("http://" + p.DebugVarAddr + "/debug/vars") if err != nil { return -1, errors.Trace(err) } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { return -1, errors.Trace(err) } m := make(map[string]interface{}) err = json.Unmarshal(body, &m) if err != nil { return -1, errors.Trace(err) } if v, ok := m["router"]; ok { if vv, ok := v.(map[string]interface{})["ops"]; ok { return int64(vv.(float64)), nil } } return 0, nil }
func ForceRemoveDeadFence(zkConn zkhelper.Conn, productName string) error { proxies, err := ProxyList(zkConn, productName, func(p *ProxyInfo) bool { return p.State == PROXY_STATE_ONLINE }) if err != nil { return errors.Trace(err) } fenceProxies, err := GetFenceProxyMap(zkConn, productName) if err != nil { return errors.Trace(err) } // remove online proxies's fence for _, proxy := range proxies { delete(fenceProxies, proxy.Addr) } // delete dead fence in zookeeper path := GetProxyFencePath(productName) for remainFence, _ := range fenceProxies { fencePath := filepath.Join(path, remainFence) log.Info("removing fence: ", fencePath) if err := zkhelper.DeleteRecursive(zkConn, fencePath, -1); err != nil { return errors.Trace(err) } } return nil }
func ServerGroups(zkConn zkhelper.Conn, productName string) ([]*ServerGroup, error) { var ret []*ServerGroup root := fmt.Sprintf("/zk/codis/db_%s/servers", productName) groups, _, err := zkConn.Children(root) if err != nil { return nil, errors.Trace(err) } // Buggy :X //zkhelper.ChildrenRecursive(*zkConn, root) for _, group := range groups { // parse group_1 => 1 groupId, err := strconv.Atoi(strings.Split(group, "_")[1]) if err != nil { return nil, errors.Trace(err) } g, err := GetGroup(zkConn, productName, groupId) if err != nil { return nil, errors.Trace(err) } ret = append(ret, g) } return ret, nil }
func GetRedisStat(addr, passwd string) (map[string]string, error) { c, err := DialTo(addr, passwd) if err != nil { return nil, err } defer c.Close() ret, err := redis.String(c.Do("INFO")) if err != nil { return nil, errors.Trace(err) } m := make(map[string]string) lines := strings.Split(ret, "\n") for _, line := range lines { kv := strings.SplitN(line, ":", 2) if len(kv) == 2 { k, v := strings.TrimSpace(kv[0]), strings.TrimSpace(kv[1]) m[k] = v } } reply, err := redis.Strings(c.Do("config", "get", "maxmemory")) if err != nil { return nil, errors.Trace(err) } // we got result if len(reply) == 2 { if reply[1] != "0" { m["maxmemory"] = reply[1] } else { m["maxmemory"] = "∞" } } return m, nil }
func SlotsInfo(addr, passwd string, fromSlot, toSlot int) (map[int]int, error) { c, err := DialTo(addr, passwd) if err != nil { return nil, err } defer c.Close() infos, err := redis.Values(c.Do("SLOTSINFO", fromSlot, toSlot-fromSlot+1)) if err != nil { return nil, errors.Trace(err) } slots := make(map[int]int) if infos != nil { for i := 0; i < len(infos); i++ { info, err := redis.Values(infos[i], nil) if err != nil { return nil, errors.Trace(err) } var slotid, slotsize int if _, err := redis.Scan(info, &slotid, &slotsize); err != nil { return nil, errors.Trace(err) } else { slots[slotid] = slotsize } } } return slots, nil }
func (e *Encoder) encodeTextBytes(b []byte) error { if _, err := e.Write(b); err != nil { return errors.Trace(err) } if _, err := e.WriteString("\r\n"); err != nil { return errors.Trace(err) } return nil }
func (e *Encoder) encodeTextString(s string) error { if _, err := e.WriteString(s); err != nil { return errors.Trace(err) } if _, err := e.WriteString("\r\n"); err != nil { return errors.Trace(err) } return nil }
func GetServer(zkConn zkhelper.Conn, zkPath string) (*Server, error) { data, _, err := zkConn.Get(zkPath) if err != nil { return nil, errors.Trace(err) } srv := Server{} if err := json.Unmarshal(data, &srv); err != nil { return nil, errors.Trace(err) } return &srv, nil }
func (d *Decoder) decodeTextBytes() ([]byte, error) { b, err := d.ReadBytes('\n') if err != nil { return nil, errors.Trace(err) } if n := len(b) - 2; n < 0 || b[n] != '\r' { return nil, errors.Trace(ErrBadRespCRLFEnd) } else { return b[:n], nil } }
func GetActionObject(zkConn zkhelper.Conn, productName string, seq int64, act interface{}, provider string) error { data, _, err := zkConn.Get(path.Join(GetWatchActionPath(productName), zkConn.Seq2Str(seq))) if err != nil { return errors.Trace(err) } if err := json.Unmarshal(data, act); err != nil { return errors.Trace(err) } return nil }
func GetActionWithSeq(zkConn zkhelper.Conn, productName string, seq int64, provider string) (*Action, error) { var act Action data, _, err := zkConn.Get(path.Join(GetWatchActionPath(productName), zkConn.Seq2Str(seq))) if err != nil { return nil, errors.Trace(err) } if err := json.Unmarshal(data, &act); err != nil { return nil, errors.Trace(err) } return &act, nil }
func DialToTimeout(addr string, passwd string, readTimeout, writeTimeout time.Duration) (redis.Conn, error) { c, err := redis.DialTimeout("tcp", addr, time.Second, readTimeout, writeTimeout) if err != nil { return nil, errors.Trace(err) } if passwd != "" { if _, err := c.Do("AUTH", passwd); err != nil { c.Close() return nil, errors.Trace(err) } } return c, nil }
func (top *Topology) GetSlotByIndex(i int) (*models.Slot, *models.ServerGroup, error) { slot, err := models.GetSlot(top.zkConn, top.ProductName, i) if err != nil { return nil, nil, errors.Trace(err) } groupServer, err := models.GetGroup(top.zkConn, top.ProductName, slot.GroupId) if err != nil { return nil, nil, errors.Trace(err) } return slot, groupServer, nil }
func GetProxyInfo(zkConn zkhelper.Conn, productName string, proxyName string) (*ProxyInfo, error) { var pi ProxyInfo data, _, err := zkConn.Get(path.Join(GetProxyPath(productName), proxyName)) if err != nil { return nil, errors.Trace(err) } if err := json.Unmarshal(data, &pi); err != nil { return nil, errors.Trace(err) } return &pi, nil }
func GetSlot(zkConn zkhelper.Conn, productName string, id int) (*Slot, error) { zkPath := GetSlotPath(productName, id) data, _, err := zkConn.Get(zkPath) if err != nil { return nil, errors.Trace(err) } var slot Slot if err := json.Unmarshal(data, &slot); err != nil { return nil, errors.Trace(err) } return &slot, nil }
func CreateActionRootPath(zkConn zkhelper.Conn, path string) error { // if action dir not exists, create it first exists, err := zkhelper.NodeExists(zkConn, path) if err != nil { return errors.Trace(err) } if !exists { _, err := zkhelper.CreateOrUpdate(zkConn, path, "", 0, zkhelper.DefaultDirACLs(), true) if err != nil { return errors.Trace(err) } } return nil }
func getLivingNodeInfos(zkConn zkhelper.Conn) ([]*NodeInfo, error) { groups, err := models.ServerGroups(zkConn, globalEnv.ProductName()) if err != nil { return nil, errors.Trace(err) } slots, err := models.Slots(zkConn, globalEnv.ProductName()) slotMap := make(map[int][]int) for _, slot := range slots { if slot.State.Status == models.SLOT_STATUS_ONLINE { slotMap[slot.GroupId] = append(slotMap[slot.GroupId], slot.Id) } } var ret []*NodeInfo for _, g := range groups { master, err := g.Master(zkConn) if err != nil { return nil, errors.Trace(err) } if master == nil { return nil, errors.Errorf("group %d has no master", g.Id) } out, err := utils.GetRedisConfig(master.Addr, globalEnv.Password(), "maxmemory") if err != nil { return nil, errors.Trace(err) } maxMem, err := strconv.ParseInt(out, 10, 64) if err != nil { return nil, errors.Trace(err) } if maxMem <= 0 { return nil, errors.Errorf("redis %s should set maxmemory", master.Addr) } node := &NodeInfo{ GroupId: g.Id, CurSlots: slotMap[g.Id], MaxMemory: maxMem, } ret = append(ret, node) } cnt := 0 for _, info := range ret { cnt += len(info.CurSlots) } if cnt != models.DEFAULT_SLOT_NUM { return nil, errors.Errorf("not all slots are online") } return ret, nil }
func DialTimeout(addr string, bufsize int, timeout time.Duration) (*Conn, error) { c, err := net.DialTimeout("tcp", addr, timeout) if err != nil { return nil, errors.Trace(err) } return NewConnSize(c, bufsize), nil }
func GetActionSeqList(zkConn zkhelper.Conn, productName string) ([]int, error) { nodes, _, err := zkConn.Children(GetWatchActionPath(productName)) if err != nil { return nil, errors.Trace(err) } return ExtraSeqList(nodes) }
func (self *ServerGroup) Create(zkConn zkhelper.Conn) error { if self.Id < 0 { return errors.Errorf("invalid server group id %d", self.Id) } zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d", self.ProductName, self.Id) _, err := zkhelper.CreateOrUpdate(zkConn, zkPath, "", 0, zkhelper.DefaultDirACLs(), true) if err != nil { return errors.Trace(err) } err = NewAction(zkConn, self.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, self, "", false) if err != nil { return errors.Trace(err) } return nil }
func getQuotaMap(zkConn zkhelper.Conn) (map[int]int, error) { nodes, err := getLivingNodeInfos(zkConn) if err != nil { return nil, errors.Trace(err) } ret := make(map[int]int) var totalMem int64 totalQuota := 0 for _, node := range nodes { totalMem += node.MaxMemory } for _, node := range nodes { quota := int(models.DEFAULT_SLOT_NUM * node.MaxMemory * 1.0 / totalMem) ret[node.GroupId] = quota totalQuota += quota } // round up if totalQuota < models.DEFAULT_SLOT_NUM { for k, _ := range ret { ret[k] += models.DEFAULT_SLOT_NUM - totalQuota break } } return ret, nil }
func btoi(b []byte) (int64, error) { if len(b) != 0 && len(b) < 10 { var neg, i = false, 0 switch b[0] { case '-': neg = true fallthrough case '+': i++ } if len(b) != i { var n int64 for ; i < len(b) && b[i] >= '0' && b[i] <= '9'; i++ { n = int64(b[i]-'0') + n*10 } if len(b) == i { if neg { n = -n } return n, nil } } } if n, err := strconv.ParseInt(string(b), 10, 64); err != nil { return 0, errors.Trace(err) } else { return n, nil } }
func (self *ServerGroup) Exists(zkConn zkhelper.Conn) (bool, error) { zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d", self.ProductName, self.Id) b, err := zkhelper.NodeExists(zkConn, zkPath) if err != nil { return false, errors.Trace(err) } return b, nil }
func (self *ServerGroup) GetServers(zkConn zkhelper.Conn) ([]*Server, error) { var ret []*Server root := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d", self.ProductName, self.Id) nodes, _, err := zkConn.Children(root) if err != nil { return nil, errors.Trace(err) } for _, node := range nodes { nodePath := root + "/" + node s, err := GetServer(zkConn, nodePath) if err != nil { return nil, errors.Trace(err) } ret = append(ret, s) } return ret, nil }
func GroupExists(zkConn zkhelper.Conn, productName string, groupId int) (bool, error) { zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d", productName, groupId) exists, _, err := zkConn.Exists(zkPath) if err != nil { return false, errors.Trace(err) } return exists, nil }
func cmdAction(argv []string) (err error) { usage := `usage: codis-config action (gc [-n <num> | -s <seconds>] | remove-lock | remove-fence) options: gc: gc -n N keep last N actions; gc -s Sec keep last Sec seconds actions; remove-lock force remove zookeeper lock; ` args, err := docopt.Parse(usage, argv, true, "", false) if err != nil { log.ErrorErrorf(err, "parse args failed") return errors.Trace(err) } log.Debugf("parse args = {%+v}", args) if args["remove-lock"].(bool) { return errors.Trace(runRemoveLock()) } if args["remove-fence"].(bool) { return errors.Trace(runRemoveFence()) } if args["gc"].(bool) { if args["-n"].(bool) { n, err := strconv.Atoi(args["<num>"].(string)) if err != nil { log.ErrorErrorf(err, "parse <num> failed") return err } return runGCKeepN(n) } else if args["-s"].(bool) { sec, err := strconv.Atoi(args["<seconds>"].(string)) if err != nil { log.ErrorErrorf(err, "parse <seconds> failed") return errors.Trace(err) } return runGCKeepNSec(sec) } } return nil }
func runSlotInfo(slotId int) error { var v interface{} err := callApi(METHOD_GET, fmt.Sprintf("/api/slot/%d", slotId), nil, &v) if err != nil { return errors.Trace(err) } fmt.Println(jsonify(v)) return nil }