func NewRollingFile(basePath string, maxFileFrag int, maxFragSize int64) (io.WriteCloser, error) { if maxFileFrag <= 0 { return nil, errors.Errorf("invalid max file-frag = %d", maxFileFrag) } if maxFragSize <= 0 { return nil, errors.Errorf("invalid max frag-size = %d", maxFragSize) } if _, file := path.Split(basePath); file == "" { return nil, errors.Errorf("invalid base-path = %s, file name is required", basePath) } var fileFrag = 0 for i := 0; i < maxFileFrag; i++ { _, err := os.Stat(fmt.Sprintf("%s.%d", basePath, i)) if err != nil && os.IsNotExist(err) { fileFrag = i break } } return &rollingFile{ maxFileFrag: maxFileFrag, maxFragSize: maxFragSize, basePath: basePath, fileFrag: fileFrag - 1, }, nil }
func getLivingNodeInfos(zkConn zkhelper.Conn) ([]*NodeInfo, error) { groups, err := models.ServerGroups(zkConn, globalEnv.ProductName()) if err != nil { return nil, errors.Trace(err) } slots, err := models.Slots(zkConn, globalEnv.ProductName()) slotMap := make(map[int][]int) for _, slot := range slots { if slot.State.Status == models.SLOT_STATUS_ONLINE { slotMap[slot.GroupId] = append(slotMap[slot.GroupId], slot.Id) } } var ret []*NodeInfo for _, g := range groups { master, err := g.Master(zkConn) if err != nil { return nil, errors.Trace(err) } if master == nil { return nil, errors.Errorf("group %d has no master", g.Id) } out, err := utils.GetRedisConfig(master.Addr, globalEnv.Password(), "maxmemory") if err != nil { return nil, errors.Trace(err) } maxMem, err := strconv.ParseInt(out, 10, 64) if err != nil { return nil, errors.Trace(err) } if maxMem <= 0 { return nil, errors.Errorf("redis %s should set maxmemory", master.Addr) } node := &NodeInfo{ GroupId: g.Id, CurSlots: slotMap[g.Id], MaxMemory: maxMem, } ret = append(ret, node) } cnt := 0 for _, info := range ret { cnt += len(info.CurSlots) } if cnt != models.DEFAULT_SLOT_NUM { return nil, errors.Errorf("not all slots are online") } return ret, nil }
func (self *ServerGroup) RemoveServer(zkConn zkhelper.Conn, addr string) error { zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d/%s", self.ProductName, self.Id, addr) data, _, err := zkConn.Get(zkPath) if err != nil { return errors.Trace(err) } var s Server err = json.Unmarshal(data, &s) if err != nil { return errors.Trace(err) } log.Info(s) if s.Type == SERVER_TYPE_MASTER { return errors.Errorf("cannot remove master, use promote first") } err = zkConn.Delete(zkPath, -1) if err != nil { return errors.Trace(err) } // update server list for i := 0; i < len(self.Servers); i++ { if self.Servers[i].Addr == s.Addr { self.Servers = append(self.Servers[:i], self.Servers[i+1:]...) break } } // remove slave won't need proxy confirm err = NewAction(zkConn, self.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, self, "", false) return errors.Trace(err) }
func (d *Decoder) decodeResp(depth int) (*Resp, error) { b, err := d.ReadByte() if err != nil { return nil, errors.Trace(err) } switch t := RespType(b); t { case TypeString, TypeError, TypeInt: r := &Resp{Type: t} r.Value, err = d.decodeTextBytes() return r, err case TypeBulkBytes: r := &Resp{Type: t} r.Value, err = d.decodeBulkBytes() return r, err case TypeArray: r := &Resp{Type: t} r.Array, err = d.decodeArray(depth) return r, err default: if depth != 0 { return nil, errors.Errorf("bad resp type %s", t) } if err := d.UnreadByte(); err != nil { return nil, errors.Trace(err) } r := &Resp{Type: TypeArray} r.Array, err = d.decodeSingleLineBulkBytesArray() return r, err } }
func (t *MigrateTask) preMigrateCheck() error { slots, err := models.GetMigratingSlots(safeZkConn, t.productName) if err != nil { return errors.Trace(err) } // check if there is migrating slot if len(slots) > 1 { return errors.Errorf("more than one slots are migrating, unknown error") } if len(slots) == 1 { slot := slots[0] if t.NewGroupId != slot.State.MigrateStatus.To || t.SlotId != slot.Id { return errors.Errorf("there is a migrating slot %+v, finish it first", slot) } } return nil }
func SetSlotRange(zkConn zkhelper.Conn, productName string, fromSlot, toSlot, groupId int, status SlotStatus) error { if status != SLOT_STATUS_OFFLINE && status != SLOT_STATUS_ONLINE { return errors.Errorf("invalid status") } ok, err := GroupExists(zkConn, productName, groupId) if err != nil { return errors.Trace(err) } if !ok { return errors.Errorf("group %d is not found", groupId) } for i := fromSlot; i <= toSlot; i++ { s, err := GetSlot(zkConn, productName, i) if err != nil { return errors.Trace(err) } if s.State.Status != SLOT_STATUS_OFFLINE { return errors.New(fmt.Sprintf("slot %d is not offline, if you want to change the group for a slot, use migrate", s.Id)) } s.GroupId = groupId s.State.Status = status data, err := json.Marshal(s) if err != nil { return errors.Trace(err) } zkPath := GetSlotPath(productName, i) _, err = zkhelper.CreateOrUpdate(zkConn, zkPath, string(data), 0, zkhelper.DefaultFileACLs(), true) if err != nil { return errors.Trace(err) } } param := SlotMultiSetParam{ From: fromSlot, To: toSlot, GroupId: groupId, Status: status, } err = NewAction(zkConn, productName, ACTION_TYPE_MULTI_SLOT_CHANGED, param, "", true) return errors.Trace(err) }
func (e *CodisEnv) NewZkConn() (zkhelper.Conn, error) { switch e.provider { case "zookeeper": return zkhelper.ConnectToZk(e.zkAddr, 30) case "etcd": addr := strings.TrimSpace(e.zkAddr) if !strings.HasPrefix(addr, "http://") { addr = "http://" + addr } return zkhelper.NewEtcdConn(addr, 30) } return nil, errors.Errorf("need coordinator in config file, %s", e) }
func callApi(method HttpMethod, apiPath string, params interface{}, retVal interface{}) error { if apiPath[0] != '/' { return errors.Errorf("api path must starts with /") } url := "http://" + globalEnv.DashboardAddr() + apiPath client := &http.Client{Transport: http.DefaultTransport} b, err := json.Marshal(params) if err != nil { return errors.Trace(err) } req, err := http.NewRequest(string(method), url, strings.NewReader(string(b))) if err != nil { return errors.Trace(err) } resp, err := client.Do(req) if err != nil { log.Errorf("can't connect to dashboard, please check 'dashboard_addr' is corrent in config file") return errors.Trace(err) } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { return errors.Trace(err) } if resp.StatusCode == 200 { err := json.Unmarshal(body, retVal) if err != nil { return errors.Trace(err) } return nil } return errors.Errorf("http status code %d, %s", resp.StatusCode, string(body)) }
func (e *Encoder) encodeResp(r *Resp) error { if err := e.WriteByte(byte(r.Type)); err != nil { return errors.Trace(err) } switch r.Type { default: return errors.Errorf("bad resp type %s", r.Type) case TypeString, TypeError, TypeInt: return e.encodeTextBytes(r.Value) case TypeBulkBytes: return e.encodeBulkBytes(r.Value) case TypeArray: return e.encodeArray(r.Array) } }
func (self *ServerGroup) Remove(zkConn zkhelper.Conn) error { // check if this group is not used by any slot slots, err := Slots(zkConn, self.ProductName) if err != nil { return errors.Trace(err) } for _, slot := range slots { if slot.GroupId == self.Id { return errors.Errorf("group %d is using by slot %d", slot.GroupId, slot.Id) } if (slot.State.Status == SLOT_STATUS_MIGRATE || slot.State.Status == SLOT_STATUS_PRE_MIGRATE) && slot.State.MigrateStatus.From == self.Id { return errors.Errorf("slot %d has residual data remain in group %d", slot.Id, self.Id) } } // do delete zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d", self.ProductName, self.Id) err = zkhelper.DeleteRecursive(zkConn, zkPath, -1) // we know that there's no slots affected, so this action doesn't need proxy confirm err = NewAction(zkConn, self.ProductName, ACTION_TYPE_SERVER_GROUP_REMOVE, self, "", false) return errors.Trace(err) }
func (self *ServerGroup) Create(zkConn zkhelper.Conn) error { if self.Id < 0 { return errors.Errorf("invalid server group id %d", self.Id) } zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d", self.ProductName, self.Id) _, err := zkhelper.CreateOrUpdate(zkConn, zkPath, "", 0, zkhelper.DefaultDirACLs(), true) if err != nil { return errors.Trace(err) } err = NewAction(zkConn, self.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, self, "", false) if err != nil { return errors.Trace(err) } return nil }
func runCommand(cmd string, args []string) (err error) { argv := make([]string, 1) argv[0] = cmd argv = append(argv, args...) switch cmd { case "action": return errors.Trace(cmdAction(argv)) case "dashboard": return errors.Trace(cmdDashboard(argv)) case "server": return errors.Trace(cmdServer(argv)) case "proxy": return errors.Trace(cmdProxy(argv)) case "slot": return errors.Trace(cmdSlot(argv)) } return errors.Errorf("%s is not a valid command. See 'codis-config -h'", cmd) }
func (s *Slot) SetMigrateStatus(zkConn zkhelper.Conn, fromGroup, toGroup int) error { if fromGroup < 0 || toGroup < 0 { return errors.Errorf("invalid group id, from %d, to %d", fromGroup, toGroup) } // skip pre_migrate if slot is already migrating if s.State.Status != SLOT_STATUS_MIGRATE { s.State.Status = SLOT_STATUS_PRE_MIGRATE err := s.Update(zkConn) if err != nil { return errors.Trace(err) } } s.State.Status = SLOT_STATUS_MIGRATE s.State.MigrateStatus.From = fromGroup s.State.MigrateStatus.To = toGroup s.GroupId = toGroup return s.Update(zkConn) }
func GetGroup(zkConn zkhelper.Conn, productName string, groupId int) (*ServerGroup, error) { exists, err := GroupExists(zkConn, productName, groupId) if err != nil { return nil, errors.Trace(err) } if !exists { return nil, errors.Errorf("group %d is not found", groupId) } group := &ServerGroup{ ProductName: productName, Id: groupId, } group.Servers, err = group.GetServers(zkConn) if err != nil { return nil, errors.Trace(err) } return group, nil }
func SlaveOf(slave, passwd string, master string) error { if master == slave { return errors.Errorf("can not slave of itself") } c, err := DialToTimeout(slave, passwd, time.Minute*15, time.Second*5) if err != nil { return err } defer c.Close() host, port, err := net.SplitHostPort(master) if err != nil { return errors.Trace(err) } if _, err := c.Do("SLAVEOF", host, port); err != nil { return errors.Trace(err) } return nil }
func (self *ServerGroup) Promote(conn zkhelper.Conn, addr, passwd string) error { var s *Server exists := false for i := 0; i < len(self.Servers); i++ { if self.Servers[i].Addr == addr { s = self.Servers[i] exists = true break } } if !exists { return errors.Errorf("no such addr %s", addr) } err := utils.SlaveNoOne(s.Addr, passwd) if err != nil { return errors.Trace(err) } // set origin master offline master, err := self.Master(conn) if err != nil { return errors.Trace(err) } // old master may be nil if master != nil { master.Type = SERVER_TYPE_OFFLINE err = self.AddServer(conn, master, passwd) if err != nil { return errors.Trace(err) } } // promote new server to master s.Type = SERVER_TYPE_MASTER err = self.AddServer(conn, s, passwd) return errors.Trace(err) }
func (t *MigrateTask) migrateSingleSlot(slotId int, to int) error { // set slot status s, err := models.GetSlot(t.zkConn, t.productName, slotId) if err != nil { log.ErrorErrorf(err, "get slot info failed") return err } if s.State.Status == models.SLOT_STATUS_OFFLINE { log.Warnf("status is offline: %+v", s) return nil } from := s.GroupId if s.State.Status == models.SLOT_STATUS_MIGRATE { from = s.State.MigrateStatus.From } // make sure from group & target group exists exists, err := models.GroupExists(t.zkConn, t.productName, from) if err != nil { return errors.Trace(err) } if !exists { log.Errorf("src group %d not exist when migrate from %d to %d", from, from, to) return errors.Errorf("group %d not found", from) } exists, err = models.GroupExists(t.zkConn, t.productName, to) if err != nil { return errors.Trace(err) } if !exists { return errors.Errorf("group %d not found", to) } // cannot migrate to itself, just ignore if from == to { log.Warnf("from == to, ignore: %+v", s) return nil } // modify slot status if err := s.SetMigrateStatus(t.zkConn, from, to); err != nil { log.ErrorErrorf(err, "set migrate status failed") return err } err = t.Migrate(s, from, to, func(p SlotMigrateProgress) { // on migrate slot progress if p.Remain%5000 == 0 { log.Infof("%+v", p) } }) if err != nil { log.ErrorErrorf(err, "migrate slot failed") return err } // migrate done, change slot status back s.State.Status = models.SLOT_STATUS_ONLINE s.State.MigrateStatus.From = models.INVALID_ID s.State.MigrateStatus.To = models.INVALID_ID if err := s.Update(t.zkConn); err != nil { log.ErrorErrorf(err, "update zk status failed, should be: %+v", s) return err } return nil }
func NewActionWithTimeout(zkConn zkhelper.Conn, productName string, actionType ActionType, target interface{}, desc string, needConfirm bool, timeoutInMs int) error { ts := strconv.FormatInt(time.Now().Unix(), 10) action := &Action{ Type: actionType, Desc: desc, Target: target, Ts: ts, } // set action receivers proxies, err := ProxyList(zkConn, productName, func(p *ProxyInfo) bool { return p.State == PROXY_STATE_ONLINE }) if err != nil { return errors.Trace(err) } if needConfirm { // do fencing here, make sure 'offline' proxies are really offline // now we only check whether the proxy lists are match fenceProxies, err := GetFenceProxyMap(zkConn, productName) if err != nil { return errors.Trace(err) } for _, proxy := range proxies { delete(fenceProxies, proxy.Addr) } if len(fenceProxies) > 0 { errMsg := bytes.NewBufferString("Some proxies may not stop cleanly:") for k, _ := range fenceProxies { errMsg.WriteString(" ") errMsg.WriteString(k) } return errors.Errorf("%s", errMsg) } } for _, p := range proxies { buf, err := json.Marshal(p) if err != nil { return errors.Trace(err) } action.Receivers = append(action.Receivers, string(buf)) } b, _ := json.Marshal(action) prefix := GetWatchActionPath(productName) //action root path err = CreateActionRootPath(zkConn, prefix) if err != nil { return errors.Trace(err) } //response path respPath := path.Join(path.Dir(prefix), "ActionResponse") err = CreateActionRootPath(zkConn, respPath) if err != nil { return errors.Trace(err) } //create response node, etcd do not support create in order directory //get path first actionRespPath, err := zkConn.Create(respPath+"/", b, int32(zk.FlagSequence), zkhelper.DefaultFileACLs()) if err != nil { log.ErrorErrorf(err, "zk create resp node = %s", respPath) return errors.Trace(err) } //remove file then create directory zkConn.Delete(actionRespPath, -1) actionRespPath, err = zkConn.Create(actionRespPath, b, 0, zkhelper.DefaultDirACLs()) if err != nil { log.ErrorErrorf(err, "zk create resp node = %s", respPath) return errors.Trace(err) } suffix := path.Base(actionRespPath) // create action node actionPath := path.Join(prefix, suffix) _, err = zkConn.Create(actionPath, b, 0, zkhelper.DefaultFileACLs()) if err != nil { log.ErrorErrorf(err, "zk create action path = %s", actionPath) return errors.Trace(err) } if needConfirm { if err := WaitForReceiverWithTimeout(zkConn, productName, actionRespPath, proxies, timeoutInMs); err != nil { return errors.Trace(err) } } return nil }
func SetProxyStatus(zkConn zkhelper.Conn, productName string, proxyName string, status string) error { p, err := GetProxyInfo(zkConn, productName, proxyName) if err != nil { return errors.Trace(err) } if status != PROXY_STATE_ONLINE && status != PROXY_STATE_MARK_OFFLINE && status != PROXY_STATE_OFFLINE { return errors.Errorf("%v, %s", ErrUnknownProxyStatus, status) } // check slot status before setting proxy online if status == PROXY_STATE_ONLINE { slots, err := Slots(zkConn, productName) if err != nil { return errors.Trace(err) } for _, slot := range slots { if slot.State.Status != SLOT_STATUS_ONLINE && slot.State.Status != SLOT_STATUS_MIGRATE { return errors.Errorf("slot %v is not online or migrate", slot) } if slot.GroupId == INVALID_ID { return errors.Errorf("slot %v has invalid group id", slot) } } } p.State = status b, _ := json.Marshal(p) _, err = zkConn.Set(path.Join(GetProxyPath(productName), proxyName), b, -1) if err != nil { return errors.Trace(err) } if status == PROXY_STATE_MARK_OFFLINE { // wait for the proxy down for { _, _, c, err := zkConn.GetW(path.Join(GetProxyPath(productName), proxyName)) if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { return nil } else if err != nil { return errors.Trace(err) } <-c info, err := GetProxyInfo(zkConn, productName, proxyName) log.Info("mark_offline, check proxy status:", proxyName, info, err) if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { log.Info("shutdown proxy successful") return nil } else if err != nil { return errors.Trace(err) } if info.State == PROXY_STATE_OFFLINE { log.Infof("proxy: %s offline success!", proxyName) return nil } } } return nil }