Пример #1
0
func NewRollingFile(basePath string, maxFileFrag int, maxFragSize int64) (io.WriteCloser, error) {
	if maxFileFrag <= 0 {
		return nil, errors.Errorf("invalid max file-frag = %d", maxFileFrag)
	}
	if maxFragSize <= 0 {
		return nil, errors.Errorf("invalid max frag-size = %d", maxFragSize)
	}
	if _, file := path.Split(basePath); file == "" {
		return nil, errors.Errorf("invalid base-path = %s, file name is required", basePath)
	}

	var fileFrag = 0
	for i := 0; i < maxFileFrag; i++ {
		_, err := os.Stat(fmt.Sprintf("%s.%d", basePath, i))
		if err != nil && os.IsNotExist(err) {
			fileFrag = i
			break
		}
	}

	return &rollingFile{
		maxFileFrag: maxFileFrag,
		maxFragSize: maxFragSize,

		basePath: basePath,
		fileFrag: fileFrag - 1,
	}, nil
}
Пример #2
0
func getLivingNodeInfos(zkConn zkhelper.Conn) ([]*NodeInfo, error) {
	groups, err := models.ServerGroups(zkConn, globalEnv.ProductName())
	if err != nil {
		return nil, errors.Trace(err)
	}
	slots, err := models.Slots(zkConn, globalEnv.ProductName())
	slotMap := make(map[int][]int)
	for _, slot := range slots {
		if slot.State.Status == models.SLOT_STATUS_ONLINE {
			slotMap[slot.GroupId] = append(slotMap[slot.GroupId], slot.Id)
		}
	}
	var ret []*NodeInfo
	for _, g := range groups {
		master, err := g.Master(zkConn)
		if err != nil {
			return nil, errors.Trace(err)
		}
		if master == nil {
			return nil, errors.Errorf("group %d has no master", g.Id)
		}
		out, err := utils.GetRedisConfig(master.Addr, globalEnv.Password(), "maxmemory")
		if err != nil {
			return nil, errors.Trace(err)
		}
		maxMem, err := strconv.ParseInt(out, 10, 64)
		if err != nil {
			return nil, errors.Trace(err)
		}
		if maxMem <= 0 {
			return nil, errors.Errorf("redis %s should set maxmemory", master.Addr)
		}
		node := &NodeInfo{
			GroupId:   g.Id,
			CurSlots:  slotMap[g.Id],
			MaxMemory: maxMem,
		}
		ret = append(ret, node)
	}
	cnt := 0
	for _, info := range ret {
		cnt += len(info.CurSlots)
	}
	if cnt != models.DEFAULT_SLOT_NUM {
		return nil, errors.Errorf("not all slots are online")
	}
	return ret, nil
}
Пример #3
0
func (d *Decoder) decodeResp(depth int) (*Resp, error) {
	b, err := d.ReadByte()
	if err != nil {
		return nil, errors.Trace(err)
	}
	switch t := RespType(b); t {
	case TypeString, TypeError, TypeInt:
		r := &Resp{Type: t}
		r.Value, err = d.decodeTextBytes()
		return r, err
	case TypeBulkBytes:
		r := &Resp{Type: t}
		r.Value, err = d.decodeBulkBytes()
		return r, err
	case TypeArray:
		r := &Resp{Type: t}
		r.Array, err = d.decodeArray(depth)
		return r, err
	default:
		if depth != 0 {
			return nil, errors.Errorf("bad resp type %s", t)
		}
		if err := d.UnreadByte(); err != nil {
			return nil, errors.Trace(err)
		}
		r := &Resp{Type: TypeArray}
		r.Array, err = d.decodeSingleLineBulkBytesArray()
		return r, err
	}
}
Пример #4
0
func (self *ServerGroup) RemoveServer(zkConn zkhelper.Conn, addr string) error {
	zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d/%s", self.ProductName, self.Id, addr)
	data, _, err := zkConn.Get(zkPath)
	if err != nil {
		return errors.Trace(err)
	}

	var s Server
	err = json.Unmarshal(data, &s)
	if err != nil {
		return errors.Trace(err)
	}
	log.Info(s)
	if s.Type == SERVER_TYPE_MASTER {
		return errors.Errorf("cannot remove master, use promote first")
	}

	err = zkConn.Delete(zkPath, -1)
	if err != nil {
		return errors.Trace(err)
	}

	// update server list
	for i := 0; i < len(self.Servers); i++ {
		if self.Servers[i].Addr == s.Addr {
			self.Servers = append(self.Servers[:i], self.Servers[i+1:]...)
			break
		}
	}

	// remove slave won't need proxy confirm
	err = NewAction(zkConn, self.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, self, "", false)
	return errors.Trace(err)
}
Пример #5
0
func (t *MigrateTask) preMigrateCheck() error {
	slots, err := models.GetMigratingSlots(safeZkConn, t.productName)

	if err != nil {
		return errors.Trace(err)
	}
	// check if there is migrating slot
	if len(slots) > 1 {
		return errors.Errorf("more than one slots are migrating, unknown error")
	}
	if len(slots) == 1 {
		slot := slots[0]
		if t.NewGroupId != slot.State.MigrateStatus.To || t.SlotId != slot.Id {
			return errors.Errorf("there is a migrating slot %+v, finish it first", slot)
		}
	}
	return nil
}
Пример #6
0
func SetSlotRange(zkConn zkhelper.Conn, productName string, fromSlot, toSlot, groupId int, status SlotStatus) error {
	if status != SLOT_STATUS_OFFLINE && status != SLOT_STATUS_ONLINE {
		return errors.Errorf("invalid status")
	}

	ok, err := GroupExists(zkConn, productName, groupId)
	if err != nil {
		return errors.Trace(err)
	}
	if !ok {
		return errors.Errorf("group %d is not found", groupId)
	}

	for i := fromSlot; i <= toSlot; i++ {
		s, err := GetSlot(zkConn, productName, i)
		if err != nil {
			return errors.Trace(err)
		}
		if s.State.Status != SLOT_STATUS_OFFLINE {
			return errors.New(fmt.Sprintf("slot %d is not offline, if you want to change the group for a slot, use migrate", s.Id))
		}
		s.GroupId = groupId
		s.State.Status = status
		data, err := json.Marshal(s)
		if err != nil {
			return errors.Trace(err)
		}

		zkPath := GetSlotPath(productName, i)
		_, err = zkhelper.CreateOrUpdate(zkConn, zkPath, string(data), 0, zkhelper.DefaultFileACLs(), true)
		if err != nil {
			return errors.Trace(err)
		}
	}

	param := SlotMultiSetParam{
		From:    fromSlot,
		To:      toSlot,
		GroupId: groupId,
		Status:  status,
	}
	err = NewAction(zkConn, productName, ACTION_TYPE_MULTI_SLOT_CHANGED, param, "", true)
	return errors.Trace(err)
}
Пример #7
0
func (e *CodisEnv) NewZkConn() (zkhelper.Conn, error) {
	switch e.provider {
	case "zookeeper":
		return zkhelper.ConnectToZk(e.zkAddr, 30)
	case "etcd":
		addr := strings.TrimSpace(e.zkAddr)
		if !strings.HasPrefix(addr, "http://") {
			addr = "http://" + addr
		}
		return zkhelper.NewEtcdConn(addr, 30)
	}
	return nil, errors.Errorf("need coordinator in config file, %s", e)
}
Пример #8
0
func callApi(method HttpMethod, apiPath string, params interface{}, retVal interface{}) error {
	if apiPath[0] != '/' {
		return errors.Errorf("api path must starts with /")
	}
	url := "http://" + globalEnv.DashboardAddr() + apiPath
	client := &http.Client{Transport: http.DefaultTransport}

	b, err := json.Marshal(params)
	if err != nil {
		return errors.Trace(err)
	}

	req, err := http.NewRequest(string(method), url, strings.NewReader(string(b)))
	if err != nil {
		return errors.Trace(err)
	}

	resp, err := client.Do(req)
	if err != nil {
		log.Errorf("can't connect to dashboard, please check 'dashboard_addr' is corrent in config file")
		return errors.Trace(err)
	}
	defer resp.Body.Close()

	body, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		return errors.Trace(err)
	}

	if resp.StatusCode == 200 {
		err := json.Unmarshal(body, retVal)
		if err != nil {
			return errors.Trace(err)
		}
		return nil
	}
	return errors.Errorf("http status code %d, %s", resp.StatusCode, string(body))
}
Пример #9
0
func (self *ServerGroup) Remove(zkConn zkhelper.Conn) error {
	// check if this group is not used by any slot
	slots, err := Slots(zkConn, self.ProductName)
	if err != nil {
		return errors.Trace(err)
	}

	for _, slot := range slots {
		if slot.GroupId == self.Id {
			return errors.Errorf("group %d is using by slot %d", slot.GroupId, slot.Id)
		}
		if (slot.State.Status == SLOT_STATUS_MIGRATE || slot.State.Status == SLOT_STATUS_PRE_MIGRATE) && slot.State.MigrateStatus.From == self.Id {
			return errors.Errorf("slot %d has residual data remain in group %d", slot.Id, self.Id)
		}
	}

	// do delete
	zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d", self.ProductName, self.Id)
	err = zkhelper.DeleteRecursive(zkConn, zkPath, -1)

	// we know that there's no slots affected, so this action doesn't need proxy confirm
	err = NewAction(zkConn, self.ProductName, ACTION_TYPE_SERVER_GROUP_REMOVE, self, "", false)
	return errors.Trace(err)
}
Пример #10
0
func (e *Encoder) encodeResp(r *Resp) error {
	if err := e.WriteByte(byte(r.Type)); err != nil {
		return errors.Trace(err)
	}
	switch r.Type {
	default:
		return errors.Errorf("bad resp type %s", r.Type)
	case TypeString, TypeError, TypeInt:
		return e.encodeTextBytes(r.Value)
	case TypeBulkBytes:
		return e.encodeBulkBytes(r.Value)
	case TypeArray:
		return e.encodeArray(r.Array)
	}
}
Пример #11
0
func (self *ServerGroup) Create(zkConn zkhelper.Conn) error {
	if self.Id < 0 {
		return errors.Errorf("invalid server group id %d", self.Id)
	}
	zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d", self.ProductName, self.Id)
	_, err := zkhelper.CreateOrUpdate(zkConn, zkPath, "", 0, zkhelper.DefaultDirACLs(), true)
	if err != nil {
		return errors.Trace(err)
	}
	err = NewAction(zkConn, self.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, self, "", false)
	if err != nil {
		return errors.Trace(err)
	}

	return nil
}
Пример #12
0
func runCommand(cmd string, args []string) (err error) {
	argv := make([]string, 1)
	argv[0] = cmd
	argv = append(argv, args...)
	switch cmd {
	case "action":
		return errors.Trace(cmdAction(argv))
	case "dashboard":
		return errors.Trace(cmdDashboard(argv))
	case "server":
		return errors.Trace(cmdServer(argv))
	case "proxy":
		return errors.Trace(cmdProxy(argv))
	case "slot":
		return errors.Trace(cmdSlot(argv))
	}
	return errors.Errorf("%s is not a valid command. See 'codis-config -h'", cmd)
}
Пример #13
0
func (s *Slot) SetMigrateStatus(zkConn zkhelper.Conn, fromGroup, toGroup int) error {
	if fromGroup < 0 || toGroup < 0 {
		return errors.Errorf("invalid group id, from %d, to %d", fromGroup, toGroup)
	}

	// skip pre_migrate if slot is already migrating
	if s.State.Status != SLOT_STATUS_MIGRATE {
		s.State.Status = SLOT_STATUS_PRE_MIGRATE
		err := s.Update(zkConn)
		if err != nil {
			return errors.Trace(err)
		}
	}

	s.State.Status = SLOT_STATUS_MIGRATE
	s.State.MigrateStatus.From = fromGroup
	s.State.MigrateStatus.To = toGroup
	s.GroupId = toGroup
	return s.Update(zkConn)
}
Пример #14
0
func GetGroup(zkConn zkhelper.Conn, productName string, groupId int) (*ServerGroup, error) {
	exists, err := GroupExists(zkConn, productName, groupId)
	if err != nil {
		return nil, errors.Trace(err)
	}
	if !exists {
		return nil, errors.Errorf("group %d is not found", groupId)
	}

	group := &ServerGroup{
		ProductName: productName,
		Id:          groupId,
	}

	group.Servers, err = group.GetServers(zkConn)
	if err != nil {
		return nil, errors.Trace(err)
	}
	return group, nil
}
Пример #15
0
func SlaveOf(slave, passwd string, master string) error {
	if master == slave {
		return errors.Errorf("can not slave of itself")
	}

	c, err := DialToTimeout(slave, passwd, time.Minute*15, time.Second*5)
	if err != nil {
		return err
	}
	defer c.Close()

	host, port, err := net.SplitHostPort(master)
	if err != nil {
		return errors.Trace(err)
	}

	if _, err := c.Do("SLAVEOF", host, port); err != nil {
		return errors.Trace(err)
	}
	return nil
}
Пример #16
0
func (self *ServerGroup) Promote(conn zkhelper.Conn, addr, passwd string) error {
	var s *Server
	exists := false
	for i := 0; i < len(self.Servers); i++ {
		if self.Servers[i].Addr == addr {
			s = self.Servers[i]
			exists = true
			break
		}
	}

	if !exists {
		return errors.Errorf("no such addr %s", addr)
	}

	err := utils.SlaveNoOne(s.Addr, passwd)
	if err != nil {
		return errors.Trace(err)
	}

	// set origin master offline
	master, err := self.Master(conn)
	if err != nil {
		return errors.Trace(err)
	}

	// old master may be nil
	if master != nil {
		master.Type = SERVER_TYPE_OFFLINE
		err = self.AddServer(conn, master, passwd)
		if err != nil {
			return errors.Trace(err)
		}
	}

	// promote new server to master
	s.Type = SERVER_TYPE_MASTER
	err = self.AddServer(conn, s, passwd)
	return errors.Trace(err)
}
Пример #17
0
func NewActionWithTimeout(zkConn zkhelper.Conn, productName string, actionType ActionType, target interface{}, desc string, needConfirm bool, timeoutInMs int) error {
	ts := strconv.FormatInt(time.Now().Unix(), 10)

	action := &Action{
		Type:   actionType,
		Desc:   desc,
		Target: target,
		Ts:     ts,
	}

	// set action receivers
	proxies, err := ProxyList(zkConn, productName, func(p *ProxyInfo) bool {
		return p.State == PROXY_STATE_ONLINE
	})
	if err != nil {
		return errors.Trace(err)
	}
	if needConfirm {
		// do fencing here, make sure 'offline' proxies are really offline
		// now we only check whether the proxy lists are match
		fenceProxies, err := GetFenceProxyMap(zkConn, productName)
		if err != nil {
			return errors.Trace(err)
		}
		for _, proxy := range proxies {
			delete(fenceProxies, proxy.Addr)
		}
		if len(fenceProxies) > 0 {
			errMsg := bytes.NewBufferString("Some proxies may not stop cleanly:")
			for k, _ := range fenceProxies {
				errMsg.WriteString(" ")
				errMsg.WriteString(k)
			}
			return errors.Errorf("%s", errMsg)
		}
	}
	for _, p := range proxies {
		buf, err := json.Marshal(p)
		if err != nil {
			return errors.Trace(err)
		}
		action.Receivers = append(action.Receivers, string(buf))
	}

	b, _ := json.Marshal(action)

	prefix := GetWatchActionPath(productName)
	//action root path
	err = CreateActionRootPath(zkConn, prefix)
	if err != nil {
		return errors.Trace(err)
	}

	//response path
	respPath := path.Join(path.Dir(prefix), "ActionResponse")
	err = CreateActionRootPath(zkConn, respPath)
	if err != nil {
		return errors.Trace(err)
	}

	//create response node, etcd do not support create in order directory
	//get path first
	actionRespPath, err := zkConn.Create(respPath+"/", b, int32(zk.FlagSequence), zkhelper.DefaultFileACLs())
	if err != nil {
		log.ErrorErrorf(err, "zk create resp node = %s", respPath)
		return errors.Trace(err)
	}

	//remove file then create directory
	zkConn.Delete(actionRespPath, -1)
	actionRespPath, err = zkConn.Create(actionRespPath, b, 0, zkhelper.DefaultDirACLs())
	if err != nil {
		log.ErrorErrorf(err, "zk create resp node = %s", respPath)
		return errors.Trace(err)
	}

	suffix := path.Base(actionRespPath)

	// create action node
	actionPath := path.Join(prefix, suffix)
	_, err = zkConn.Create(actionPath, b, 0, zkhelper.DefaultFileACLs())
	if err != nil {
		log.ErrorErrorf(err, "zk create action path = %s", actionPath)
		return errors.Trace(err)
	}

	if needConfirm {
		if err := WaitForReceiverWithTimeout(zkConn, productName, actionRespPath, proxies, timeoutInMs); err != nil {
			return errors.Trace(err)
		}
	}
	return nil
}
Пример #18
0
func SetProxyStatus(zkConn zkhelper.Conn, productName string, proxyName string, status string) error {
	p, err := GetProxyInfo(zkConn, productName, proxyName)
	if err != nil {
		return errors.Trace(err)
	}

	if status != PROXY_STATE_ONLINE && status != PROXY_STATE_MARK_OFFLINE && status != PROXY_STATE_OFFLINE {
		return errors.Errorf("%v, %s", ErrUnknownProxyStatus, status)
	}

	// check slot status before setting proxy online
	if status == PROXY_STATE_ONLINE {
		slots, err := Slots(zkConn, productName)
		if err != nil {
			return errors.Trace(err)
		}
		for _, slot := range slots {
			if slot.State.Status != SLOT_STATUS_ONLINE && slot.State.Status != SLOT_STATUS_MIGRATE {
				return errors.Errorf("slot %v is not online or migrate", slot)
			}
			if slot.GroupId == INVALID_ID {
				return errors.Errorf("slot %v has invalid group id", slot)
			}
		}
	}

	p.State = status
	b, _ := json.Marshal(p)

	_, err = zkConn.Set(path.Join(GetProxyPath(productName), proxyName), b, -1)
	if err != nil {
		return errors.Trace(err)
	}

	if status == PROXY_STATE_MARK_OFFLINE {
		// wait for the proxy down
		for {
			_, _, c, err := zkConn.GetW(path.Join(GetProxyPath(productName), proxyName))
			if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) {
				return nil
			} else if err != nil {
				return errors.Trace(err)
			}
			<-c
			info, err := GetProxyInfo(zkConn, productName, proxyName)
			log.Info("mark_offline, check proxy status:", proxyName, info, err)
			if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) {
				log.Info("shutdown proxy successful")
				return nil
			} else if err != nil {
				return errors.Trace(err)
			}
			if info.State == PROXY_STATE_OFFLINE {
				log.Infof("proxy: %s offline success!", proxyName)
				return nil
			}
		}
	}

	return nil
}
Пример #19
0
func (t *MigrateTask) migrateSingleSlot(slotId int, to int) error {
	// set slot status
	s, err := models.GetSlot(t.zkConn, t.productName, slotId)
	if err != nil {
		log.ErrorErrorf(err, "get slot info failed")
		return err
	}
	if s.State.Status == models.SLOT_STATUS_OFFLINE {
		log.Warnf("status is offline: %+v", s)
		return nil
	}

	from := s.GroupId
	if s.State.Status == models.SLOT_STATUS_MIGRATE {
		from = s.State.MigrateStatus.From
	}

	// make sure from group & target group exists
	exists, err := models.GroupExists(t.zkConn, t.productName, from)
	if err != nil {
		return errors.Trace(err)
	}
	if !exists {
		log.Errorf("src group %d not exist when migrate from %d to %d", from, from, to)
		return errors.Errorf("group %d not found", from)
	}

	exists, err = models.GroupExists(t.zkConn, t.productName, to)
	if err != nil {
		return errors.Trace(err)
	}
	if !exists {
		return errors.Errorf("group %d not found", to)
	}

	// cannot migrate to itself, just ignore
	if from == to {
		log.Warnf("from == to, ignore: %+v", s)
		return nil
	}

	// modify slot status
	if err := s.SetMigrateStatus(t.zkConn, from, to); err != nil {
		log.ErrorErrorf(err, "set migrate status failed")
		return err
	}

	err = t.Migrate(s, from, to, func(p SlotMigrateProgress) {
		// on migrate slot progress
		if p.Remain%5000 == 0 {
			log.Infof("%+v", p)
		}
	})
	if err != nil {
		log.ErrorErrorf(err, "migrate slot failed")
		return err
	}

	// migrate done, change slot status back
	s.State.Status = models.SLOT_STATUS_ONLINE
	s.State.MigrateStatus.From = models.INVALID_ID
	s.State.MigrateStatus.To = models.INVALID_ID
	if err := s.Update(t.zkConn); err != nil {
		log.ErrorErrorf(err, "update zk status failed, should be: %+v", s)
		return err
	}
	return nil
}