Пример #1
0
// experimental simple auto rebalance :)
func Rebalance() error {
	targetQuota, err := getQuotaMap(safeZkConn)
	if err != nil {
		return errors.Trace(err)
	}
	livingNodes, err := getLivingNodeInfos(safeZkConn)
	if err != nil {
		return errors.Trace(err)
	}
	log.Infof("start rebalance")
	for _, node := range livingNodes {
		for len(node.CurSlots) > targetQuota[node.GroupId] {
			for _, dest := range livingNodes {
				if dest.GroupId != node.GroupId && len(dest.CurSlots) < targetQuota[dest.GroupId] {
					slot := node.CurSlots[len(node.CurSlots)-1]
					// create a migration task
					info := &MigrateTaskInfo{
						Delay:      0,
						SlotId:     slot,
						NewGroupId: dest.GroupId,
						Status:     MIGRATE_TASK_PENDING,
						CreateAt:   strconv.FormatInt(time.Now().Unix(), 10),
					}
					globalMigrateManager.PostTask(info)

					node.CurSlots = node.CurSlots[0 : len(node.CurSlots)-1]
					dest.CurSlots = append(dest.CurSlots, slot)
				}
			}
		}
	}
	log.Infof("rebalance tasks submit finish")
	return nil
}
Пример #2
0
/*
* 对后端返回的ASK,重新向正确的后端发送
* r : 已经返回ASK的请求
 */
func (s *Session) handleAskRequest(r *Request) (*Request, error) {
	// reset response
	r.Response.Resp, r.Response.Err = nil, nil

	usnow := microseconds()
	s.LastOpUnix = usnow / 1e6
	s.Ops++

	// get hash key of the request
	hkey := getHashKey(r.Resp, r.OpStr)
	log.Infof("ASK: use key: %s to choose backend", string(hkey))

	// create a asking request to backend
	nr := &Request{
		OpStr:  "ASKING",
		Start:  usnow,
		Resp:   redis.NewString([]byte("ASKING")),
		Wait:   &sync.WaitGroup{},
		Failed: &s.failed,
	}
	nr, err := s.handleRequestAsking(nr, hkey)
	log.Infof("ASK: after dispatch asking request, waiting. error=%s", err)
	if err != nil {
		return r, err
	} else {
		s.GetTasks() <- nr
	}

	// resend the r to backend
	err = s.GetDispather().Dispatch(r)
	if err == nil {
		s.GetTasks() <- r
	}
	return r, err
}
Пример #3
0
func (s *Server) waitOnline() bool {
	for {
		info, err := s.topo.GetProxyInfo(s.info.Id)
		if err != nil {
			log.PanicErrorf(err, "get proxy info failed: %s", s.info.Id)
		}
		switch info.State {
		case models.PROXY_STATE_MARK_OFFLINE:
			log.Infof("mark offline, proxy got offline event: %s", s.info.Id)
			s.markOffline()
			return false
		case models.PROXY_STATE_ONLINE:
			s.info.State = info.State
			log.Infof("we are online: %s", s.info.Id)
			s.rewatchProxy()
			return true
		}
		select {
		case <-s.kill:
			log.Infof("mark offline, proxy is killed: %s", s.info.Id)
			s.markOffline()
			return false
		default:
		}
		log.Infof("wait to be online: %s", s.info.Id)
		time.Sleep(3 * time.Second)
	}
}
Пример #4
0
func (s *Server) loopEvents() {
	ticker := time.NewTicker(time.Second)
	defer ticker.Stop()

	var tick int = 0
	for s.info.State == models.PROXY_STATE_ONLINE {
		select {
		case <-s.kill:
			log.Infof("mark offline, proxy is killed: %s", s.info.Id)
			s.markOffline()
		case e := <-s.evtbus:
			evtPath := getEventPath(e)
			log.Infof("got event %s, %v, lastActionSeq %d", s.info.Id, e, s.lastActionSeq)
			if strings.Index(evtPath, models.GetActionResponsePath(s.conf.productName)) == 0 {
				seq, err := strconv.Atoi(path.Base(evtPath))
				if err != nil {
					log.ErrorErrorf(err, "parse action seq failed")
				} else {
					if seq < s.lastActionSeq {
						log.Infof("ignore seq = %d", seq)
						continue
					}
				}
			}
			s.processAction(e)
		case <-ticker.C:
			if maxTick := s.conf.pingPeriod; maxTick != 0 {
				if tick++; tick >= maxTick {
					s.router.KeepAlive()
					tick = 0
				}
			}
		}
	}
}
Пример #5
0
func (t *MigrateTask) run() error {
	log.Infof("migration start: %+v", t.MigrateTaskInfo)
	to := t.NewGroupId
	t.UpdateStatus(MIGRATE_TASK_MIGRATING)
	err := t.migrateSingleSlot(t.SlotId, to)
	if err != nil {
		log.ErrorErrorf(err, "migrate single slot failed")
		t.UpdateStatus(MIGRATE_TASK_ERR)
		t.rollbackPremigrate()
		return err
	}
	t.UpdateFinish()
	log.Infof("migration finished: %+v", t.MigrateTaskInfo)
	return nil
}
Пример #6
0
func (s *Session) handleRequestAsking(r *Request, extra []byte) (*Request, error) {
	log.Infof("ASK: handling asking request to backend")
	if err := s.GetDispather().DispatchAsking(r, extra); err != nil {
		return nil, err
	}
	return r, nil
}
Пример #7
0
func TestProxyOfflineInWaitActionReceiver(t *testing.T) {
	log.Infof("test proxy offline when waiting action response")
	fakeZkConn := zkhelper.NewConn()

	for i := 1; i <= 4; i++ {
		CreateProxyInfo(fakeZkConn, productName, &ProxyInfo{
			Id:    strconv.Itoa(i),
			State: PROXY_STATE_ONLINE,
		})
		go waitForProxyMarkOffline(fakeZkConn, strconv.Itoa(i))
	}

	lst, _ := ProxyList(fakeZkConn, productName, nil)
	assert.Must(len(lst) == 4)

	go func() {
		time.Sleep(500 * time.Millisecond)
		actionPath := path.Join(GetActionResponsePath(productName), fakeZkConn.Seq2Str(1))
		//create test response for proxy 4, means proxy 1,2,3 are timeout
		fakeZkConn.Create(path.Join(actionPath, "4"), nil,
			0, zkhelper.DefaultFileACLs())
	}()

	err := NewActionWithTimeout(fakeZkConn, productName, ACTION_TYPE_SLOT_CHANGED, nil, "desc", true, 3*1000)
	if err != nil {
		assert.Must(err.Error() == ErrReceiverTimeout.Error())
	}

	for i := 1; i <= 3; i++ {
		info, _ := GetProxyInfo(fakeZkConn, productName, strconv.Itoa(i))
		assert.Must(info.State == PROXY_STATE_OFFLINE)
	}
}
Пример #8
0
func (s *Server) responseAction(seq int64) {
	log.Infof("send response seq = %d", seq)
	err := s.topo.DoResponse(int(seq), &s.info)
	if err != nil {
		log.InfoErrorf(err, "send response seq = %d failed", seq)
	}
}
Пример #9
0
func releaseDashboardNode() {
	zkPath := fmt.Sprintf("/zk/codis/db_%s/dashboard", globalEnv.ProductName())
	if exists, _, _ := safeZkConn.Exists(zkPath); exists {
		log.Infof("removing dashboard node")
		safeZkConn.Delete(zkPath, 0)
	}
}
Пример #10
0
func (s *Server) processAction(e interface{}) {
	if strings.Index(getEventPath(e), models.GetProxyPath(s.topo.ProductName)) == 0 {
		info, err := s.topo.GetProxyInfo(s.info.Id)
		if err != nil {
			log.PanicErrorf(err, "get proxy info failed: %s", s.info.Id)
		}
		switch info.State {
		case models.PROXY_STATE_MARK_OFFLINE:
			log.Infof("mark offline, proxy got offline event: %s", s.info.Id)
			s.markOffline()
		case models.PROXY_STATE_ONLINE:
			s.rewatchProxy()
		default:
			log.Panicf("unknown proxy state %v", info)
		}
		return
	}

	//re-watch
	nodes := s.rewatchNodes()

	seqs, err := models.ExtraSeqList(nodes)
	if err != nil {
		log.PanicErrorf(err, "get seq list failed")
	}

	if len(seqs) == 0 || !s.topo.IsChildrenChangedEvent(e) {
		return
	}

	//get last pos
	index := -1
	for i, seq := range seqs {
		if s.lastActionSeq < seq {
			index = i
			break
		}
	}

	if index < 0 {
		return
	}

	actions := seqs[index:]
	for _, seq := range actions {
		exist, err := s.topo.Exist(path.Join(s.topo.GetActionResponsePath(seq), s.info.Id))
		if err != nil {
			log.PanicErrorf(err, "get action failed")
		}
		if exist {
			continue
		}
		if s.checkAndDoTopoChange(seq) {
			s.responseAction(int64(seq))
		}
	}

	s.lastActionSeq = seqs[len(seqs)-1]
}
Пример #11
0
func (bc *BackendConn) Run() {
	log.Infof("backend conn [%p] to %s, start service", bc, bc.addr)
	for k := 0; ; k++ {
		err := bc.loopWriter()
		if err == nil {
			break
		} else {
			for i := len(bc.input); i != 0; i-- {
				r := <-bc.input
				bc.setResponse(r, nil, err)
			}
		}
		log.WarnErrorf(err, "backend conn [%p] to %s, restart [%d]", bc, bc.addr, k)
		time.Sleep(time.Millisecond * 50)
	}
	log.Infof("backend conn [%p] to %s, stop and exit", bc, bc.addr)
}
Пример #12
0
func (s *Server) getActionObject(seq int, target interface{}) {
	act := &models.Action{Target: target}
	err := s.topo.GetActionWithSeqObject(int64(seq), act)
	if err != nil {
		log.PanicErrorf(err, "get action object failed, seq = %d", seq)
	}
	log.Infof("action %+v", act)
}
Пример #13
0
func (s *Server) onGroupChange(groupId int) {
	log.Infof("group changed %d", groupId)
	for i, g := range s.groups {
		if g == groupId {
			s.fillSlot(i)
		}
	}
}
Пример #14
0
func NewSessionSize(c net.Conn, auth string, bufsize int, timeout int) *Session {
	s := &Session{CreateUnix: time.Now().Unix(), auth: auth}
	s.Conn = redis.NewConnSize(c, bufsize)
	s.Conn.ReaderTimeout = time.Second * time.Duration(timeout)
	s.Conn.WriterTimeout = time.Second * 30
	log.Infof("session [%p] create: %s", s, s)
	return s
}
Пример #15
0
func New(addr string, debugVarAddr string, conf *Config) *Server {
	log.Infof("create proxy with config: %+v", conf)

	proxyHost := strings.Split(addr, ":")[0]
	debugHost := strings.Split(debugVarAddr, ":")[0]

	hostname, err := os.Hostname()
	if err != nil {
		log.PanicErrorf(err, "get host name failed")
	}
	if proxyHost == "0.0.0.0" || strings.HasPrefix(proxyHost, "127.0.0.") || proxyHost == "" {
		proxyHost = hostname
	}
	if debugHost == "0.0.0.0" || strings.HasPrefix(debugHost, "127.0.0.") || debugHost == "" {
		debugHost = hostname
	}

	s := &Server{conf: conf, lastActionSeq: -1, groups: make(map[int]int)}
	s.topo = NewTopo(conf.productName, conf.zkAddr, conf.fact, conf.provider, conf.zkSessionTimeout)
	s.info.Id = conf.proxyId
	s.info.State = models.PROXY_STATE_OFFLINE
	s.info.Addr = proxyHost + ":" + strings.Split(addr, ":")[1]
	s.info.DebugVarAddr = debugHost + ":" + strings.Split(debugVarAddr, ":")[1]
	s.info.Pid = os.Getpid()
	s.info.StartAt = time.Now().String()
	s.kill = make(chan interface{})

	log.Infof("proxy info = %+v", s.info)

	if l, err := net.Listen(conf.proto, addr); err != nil {
		log.PanicErrorf(err, "open listener failed")
	} else {
		s.listener = l
	}
	s.router = router.NewWithAuth(conf.passwd)
	s.evtbus = make(chan interface{}, 1024)

	s.register()

	s.wait.Add(1)
	go func() {
		defer s.wait.Done()
		s.serve()
	}()
	return s
}
Пример #16
0
func (s *Slot) prepareAsk(r *Request) (*SharedBackendConn, error) {
	if s.migrate.bc == nil {
		log.Infof("ASK: slot-%04d is not migrating", s.id)
		return nil, ErrSlotIsNotMigrating
	}
	r.slot = &s.wait
	r.slot.Add(1)
	return s.migrate.bc, nil
}
Пример #17
0
func (t *MigrateTask) rollbackPremigrate() {
	if s, err := models.GetSlot(t.zkConn, t.productName, t.SlotId); err == nil && s.State.Status == models.SLOT_STATUS_PRE_MIGRATE {
		s.State.Status = models.SLOT_STATUS_ONLINE
		err = s.Update(t.zkConn)
		if err != nil {
			log.Warn("rollback premigrate failed", err)
		} else {
			log.Infof("rollback slot %d from premigrate to online\n", s.Id)
		}
	}
}
Пример #18
0
func (s *Server) onSlotRangeChange(param *models.SlotMultiSetParam) {
	log.Infof("slotRangeChange %+v", param)
	for i := param.From; i <= param.To; i++ {
		switch param.Status {
		case models.SLOT_STATUS_OFFLINE:
			s.resetSlot(i)
		case models.SLOT_STATUS_ONLINE:
			s.fillSlot(i)
		default:
			log.Panicf("can not handle status %v", param.Status)
		}
	}
}
Пример #19
0
func (s *Router) fillSlot(i int, addr, from string, lock bool) {
	if !s.isValidSlot(i) {
		return
	}
	slot := s.slots[i]
	slot.blockAndWait()

	s.putBackendConn(slot.backend.bc)
	s.putBackendConn(slot.migrate.bc)
	slot.reset()

	if len(addr) != 0 {
		xx := strings.Split(addr, ":")
		if len(xx) >= 1 {
			slot.backend.host = []byte(xx[0])
		}
		if len(xx) >= 2 {
			slot.backend.port = []byte(xx[1])
		}
		slot.backend.addr = addr
		slot.backend.bc = s.getBackendConn(addr)
	}
	if len(from) != 0 {
		slot.migrate.from = from
		slot.migrate.bc = s.getBackendConn(from)
	}

	if !lock {
		slot.unblock()
	}

	if slot.migrate.bc != nil {
		log.Infof("fill slot %04d, backend.addr = %s, migrate.from = %s",
			i, slot.backend.addr, slot.migrate.from)
	} else {
		log.Infof("fill slot %04d, backend.addr = %s",
			i, slot.backend.addr)
	}
}
Пример #20
0
func (s *Session) Serve(d Dispatcher, maxPipeline int) {
	if d == nil {
		log.Infof("dispatcher is nil")
		return
	}
	// set dispacher for this session
	s.SetDispather(d)

	var errlist errors.ErrorList
	defer func() {
		if err := errlist.First(); err != nil {
			log.Infof("session [%p] closed: %s, error = %s", s, s, err)
		} else {
			log.Infof("session [%p] closed: %s, quit", s, s)
		}
	}()

	tasks := make(chan *Request, maxPipeline)
	s.SetTasks(tasks)

	go func() {
		defer func() {
			s.Close()
			for _ = range tasks {
			}
		}()
		if err := s.loopWriter(tasks); err != nil {
			errlist.PushBack(err)
		}
	}()

	defer func(tasks chan<- *Request) {
		close(tasks)
		s.SetTasks(nil)
	}(tasks)
	if err := s.loopReader(tasks); err != nil {
		errlist.PushBack(err)
	}
}
Пример #21
0
func (s *Session) handleRequest(resp *redis.Resp) (*Request, error) {
	opstr, err := getOpStr(resp)
	log.Infof("In handleRequest, opstr is %s", string(opstr))
	if err != nil {
		return nil, err
	}
	if isNotAllowed(opstr) {
		return nil, errors.New(fmt.Sprintf("command <%s> is not allowed", opstr))
	}

	usnow := microseconds()
	s.LastOpUnix = usnow / 1e6
	s.Ops++

	r := &Request{
		OpStr:  opstr,
		Start:  usnow,
		Resp:   resp,
		Wait:   &sync.WaitGroup{},
		Failed: &s.failed,
	}

	if opstr == "QUIT" {
		return s.handleQuit(r)
	}
	if opstr == "AUTH" {
		return s.handleAuth(r)
	}

	if !s.authorized {
		if s.auth != "" {
			r.Response.Resp = redis.NewError([]byte("NOAUTH Authentication required."))
			return r, nil
		}
		s.authorized = true
	}

	switch opstr {
	case "SELECT":
		return s.handleSelect(r)
	case "PING":
		return s.handlePing(r)
	case "MGET":
		return s.handleRequestMGet(r)
	case "MSET":
		return s.handleRequestMSet(r)
	case "DEL":
		return s.handleRequestMDel(r)
	}
	return r, s.GetDispather().Dispatch(r)
}
Пример #22
0
func (s *Session) loopWriter(tasks <-chan *Request) error {
	p := &FlushPolicy{
		Encoder:     s.Writer,
		MaxBuffered: 32,
		MaxInterval: 300,
	}
	for r := range tasks {
		resp, err := s.handleResponse(r)

		log.Infof("receive a response %s", string(r.Response.Resp.Value))
		// 当resp返回为ASK,需要重新请求后端
		if r.isAskResp() {
			log.Infof("ASK: handle -ASK req:response %s:%s", string(r.Resp.Value), string(r.Response.Resp.Value))
			if _, err = s.handleAskRequest(r); err != nil {
				log.Infof("ASK: handleAskRequest error %s", err)
				return err
			}
			// 重新处理请求的返回
			log.Infof("ASK: receive new response")
			resp, err = s.handleResponse(r)
			log.Infof("ASK: new response %s", string(r.Response.Resp.Value))
		}
		// 如果该请求为proxy主动发送的请求,将response swallow
		if r.isAskingReq() {
			return nil
		}

		if err != nil {
			return err
		}
		if err = p.Encode(resp, len(tasks) == 0); err != nil {
			return err
		}
	}
	return nil
}
Пример #23
0
func (s *Slot) prepare(r *Request, key []byte) (*SharedBackendConn, error) {
	if s.backend.bc == nil {
		log.Infof("slot-%04d is not ready: key = %s", s.id, key)
		return nil, ErrSlotIsNotReady
	}
	if err := s.slotsmgrt(r, key); err != nil {
		log.Warnf("slot-%04d migrate from = %s to %s failed: key = %s, error = %s",
			s.id, s.migrate.from, s.backend.addr, key, err)
		return nil, err
	} else {
		r.slot = &s.wait
		r.slot.Add(1)
		return s.backend.bc, nil
	}
}
Пример #24
0
func setLogLevel(level string) {
	var lv = log.LEVEL_INFO
	switch strings.ToLower(level) {
	case "error":
		lv = log.LEVEL_ERROR
	case "warn", "warning":
		lv = log.LEVEL_WARN
	case "debug":
		lv = log.LEVEL_DEBUG
	case "info":
		fallthrough
	default:
		lv = log.LEVEL_INFO
	}
	log.SetLevel(lv)
	log.Infof("set log level to %s", lv)
}
Пример #25
0
func setLogLevel(level string) {
	level = strings.ToLower(level)
	var l = log.LEVEL_INFO
	switch level {
	case "error":
		l = log.LEVEL_ERROR
	case "warn", "warning":
		l = log.LEVEL_WARN
	case "debug":
		l = log.LEVEL_DEBUG
	case "info":
		fallthrough
	default:
		level = "info"
		l = log.LEVEL_INFO
	}
	log.SetLevel(l)
	log.Infof("set log level to <%s>", level)
}
Пример #26
0
func createDashboardNode() error {

	// make sure root dir is exists
	rootDir := fmt.Sprintf("/zk/codis/db_%s", globalEnv.ProductName())
	zkhelper.CreateRecursive(safeZkConn, rootDir, "", 0, zkhelper.DefaultDirACLs())

	zkPath := fmt.Sprintf("%s/dashboard", rootDir)
	// make sure we're the only one dashboard
	if exists, _, _ := safeZkConn.Exists(zkPath); exists {
		data, _, _ := safeZkConn.Get(zkPath)
		return errors.New("dashboard already exists: " + string(data))
	}

	content := fmt.Sprintf(`{"addr": "%v", "pid": %v}`, globalEnv.DashboardAddr(), os.Getpid())
	pathCreated, err := safeZkConn.Create(zkPath, []byte(content), 0, zkhelper.DefaultFileACLs())
	createdDashboardNode = true
	log.Infof("dashboard node created: %v, %s", pathCreated, string(content))
	log.Warn("********** Attention **********")
	log.Warn("You should use `kill {pid}` rather than `kill -9 {pid}` to stop me,")
	log.Warn("or the node resisted on zk will not be cleaned when I'm quiting and you must remove it manually")
	log.Warn("*******************************")
	return errors.Trace(err)
}
Пример #27
0
func runDashboard(addr string, httpLogFile string) {
	log.Infof("dashboard listening on addr: %s", addr)
	m := martini.Classic()
	f, err := os.OpenFile(httpLogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
	if err != nil {
		log.PanicErrorf(err, "open http log file failed")
	}
	defer f.Close()

	m.Map(stdlog.New(f, "[martini]", stdlog.LstdFlags))
	binRoot, err := filepath.Abs(filepath.Dir(os.Args[0]))
	if err != nil {
		log.PanicErrorf(err, "get binroot path failed")
	}

	m.Use(martini.Static(filepath.Join(binRoot, "assets/statics")))
	m.Use(render.Renderer(render.Options{
		Directory:  filepath.Join(binRoot, "assets/template"),
		Extensions: []string{".tmpl", ".html"},
		Charset:    "UTF-8",
		IndentJSON: true,
	}))

	m.Use(cors.Allow(&cors.Options{
		AllowOrigins:     []string{"*"},
		AllowMethods:     []string{"POST", "GET", "DELETE", "PUT"},
		AllowHeaders:     []string{"Origin", "x-requested-with", "Content-Type", "Content-Range", "Content-Disposition", "Content-Description"},
		ExposeHeaders:    []string{"Content-Length"},
		AllowCredentials: false,
	}))

	m.Get("/api/server_groups", apiGetServerGroupList)
	m.Get("/api/overview", apiOverview)

	m.Get("/api/redis/:addr/stat", apiRedisStat)
	m.Get("/api/redis/:addr/:id/slotinfo", apiGetRedisSlotInfo)
	m.Get("/api/redis/group/:group_id/:slot_id/slotinfo", apiGetRedisSlotInfoFromGroupId)

	m.Put("/api/server_groups", binding.Json(models.ServerGroup{}), apiAddServerGroup)
	m.Put("/api/server_group/(?P<id>[0-9]+)/addServer", binding.Json(models.Server{}), apiAddServerToGroup)
	m.Delete("/api/server_group/(?P<id>[0-9]+)", apiRemoveServerGroup)

	m.Put("/api/server_group/(?P<id>[0-9]+)/removeServer", binding.Json(models.Server{}), apiRemoveServerFromGroup)
	m.Get("/api/server_group/(?P<id>[0-9]+)", apiGetServerGroup)
	m.Post("/api/server_group/(?P<id>[0-9]+)/promote", binding.Json(models.Server{}), apiPromoteServer)

	m.Get("/api/migrate/status", apiMigrateStatus)
	m.Get("/api/migrate/tasks", apiGetMigrateTasks)
	m.Post("/api/migrate", binding.Json(migrateTaskForm{}), apiDoMigrate)

	m.Post("/api/rebalance", apiRebalance)

	m.Get("/api/slot/list", apiGetSlots)
	m.Get("/api/slot/:id", apiGetSingleSlot)
	m.Post("/api/slots/init", apiInitSlots)
	m.Get("/api/slots", apiGetSlots)
	m.Post("/api/slot", binding.Json(RangeSetTask{}), apiSlotRangeSet)
	m.Get("/api/proxy/list", apiGetProxyList)
	m.Get("/api/proxy/debug/vars", apiGetProxyDebugVars)
	m.Post("/api/proxy", binding.Json(models.ProxyInfo{}), apiSetProxyStatus)

	m.Get("/api/action/gc", apiActionGC)
	m.Get("/api/force_remove_locks", apiForceRemoveLocks)
	m.Get("/api/remove_fence", apiRemoveFence)

	m.Get("/slots", pageSlots)
	m.Get("/", func(r render.Render) {
		r.Redirect("/admin")
	})
	zkBuilder := utils.NewConnBuilder(globalEnv.NewZkConn)
	safeZkConn = zkBuilder.GetSafeConn()
	unsafeZkConn = zkBuilder.GetUnsafeConn()

	// create temp node in ZK
	if err := createDashboardNode(); err != nil {
		log.PanicErrorf(err, "create zk node failed") // do not release dashborad node here
	}

	// create long live migrate manager
	globalMigrateManager = NewMigrateManager(safeZkConn, globalEnv.ProductName())

	go func() {
		tick := time.Tick(time.Second)
		var lastCnt, qps int64
		for _ = range tick {
			cnt := getAllProxyOps()
			if cnt > 0 {
				qps = cnt - lastCnt
				lastCnt = cnt
			} else {
				qps = 0
			}
			atomic.StoreInt64(&proxiesSpeed, qps)
		}
	}()

	m.RunOnAddr(addr)
}
Пример #28
0
func SetProxyStatus(zkConn zkhelper.Conn, productName string, proxyName string, status string) error {
	p, err := GetProxyInfo(zkConn, productName, proxyName)
	if err != nil {
		return errors.Trace(err)
	}

	if status != PROXY_STATE_ONLINE && status != PROXY_STATE_MARK_OFFLINE && status != PROXY_STATE_OFFLINE {
		return errors.Errorf("%v, %s", ErrUnknownProxyStatus, status)
	}

	// check slot status before setting proxy online
	if status == PROXY_STATE_ONLINE {
		slots, err := Slots(zkConn, productName)
		if err != nil {
			return errors.Trace(err)
		}
		for _, slot := range slots {
			if slot.State.Status != SLOT_STATUS_ONLINE && slot.State.Status != SLOT_STATUS_MIGRATE {
				return errors.Errorf("slot %v is not online or migrate", slot)
			}
			if slot.GroupId == INVALID_ID {
				return errors.Errorf("slot %v has invalid group id", slot)
			}
		}
	}

	p.State = status
	b, _ := json.Marshal(p)

	_, err = zkConn.Set(path.Join(GetProxyPath(productName), proxyName), b, -1)
	if err != nil {
		return errors.Trace(err)
	}

	if status == PROXY_STATE_MARK_OFFLINE {
		// wait for the proxy down
		for {
			_, _, c, err := zkConn.GetW(path.Join(GetProxyPath(productName), proxyName))
			if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) {
				return nil
			} else if err != nil {
				return errors.Trace(err)
			}
			<-c
			info, err := GetProxyInfo(zkConn, productName, proxyName)
			log.Info("mark_offline, check proxy status:", proxyName, info, err)
			if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) {
				log.Info("shutdown proxy successful")
				return nil
			} else if err != nil {
				return errors.Trace(err)
			}
			if info.State == PROXY_STATE_OFFLINE {
				log.Infof("proxy: %s offline success!", proxyName)
				return nil
			}
		}
	}

	return nil
}
Пример #29
0
func main() {
	fmt.Print(banner)

	args, err := docopt.Parse(usage, nil, true, "codis proxy v0.1", true)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}

	// set config file
	if args["-c"] != nil {
		configFile = args["-c"].(string)
	}

	var maxFileFrag = 10
	var maxFragSize int64 = bytesize.GB * 1
	if s, ok := args["--log-filesize"].(string); ok && s != "" {
		v, err := bytesize.Parse(s)
		if err != nil {
			log.PanicErrorf(err, "invalid max log file size = %s", s)
		}
		maxFragSize = v
	}

	// set output log file
	if s, ok := args["-L"].(string); ok && s != "" {
		f, err := log.NewRollingFile(s, maxFileFrag, maxFragSize)
		if err != nil {
			log.PanicErrorf(err, "open rolling log file failed: %s", s)
		} else {
			defer f.Close()
			log.StdLog = log.New(f, "")
		}
	}
	log.SetLevel(log.LEVEL_INFO)
	log.SetFlags(log.Flags() | log.Lshortfile)

	// set log level
	if s, ok := args["--log-level"].(string); ok && s != "" {
		setLogLevel(s)
	}
	cpus = runtime.NumCPU()
	// set cpu
	if args["--cpu"] != nil {
		cpus, err = strconv.Atoi(args["--cpu"].(string))
		if err != nil {
			log.PanicErrorf(err, "parse cpu number failed")
		}
	}

	// set addr
	if args["--addr"] != nil {
		addr = args["--addr"].(string)
	}

	// set http addr
	if args["--http-addr"] != nil {
		httpAddr = args["--http-addr"].(string)
	}

	checkUlimit(1024)
	runtime.GOMAXPROCS(cpus)

	http.HandleFunc("/setloglevel", handleSetLogLevel)
	go func() {
		err := http.ListenAndServe(httpAddr, nil)
		log.PanicError(err, "http debug server quit")
	}()
	log.Info("running on ", addr)
	conf, err := proxy.LoadConf(configFile)
	if err != nil {
		log.PanicErrorf(err, "load config failed")
	}

	c := make(chan os.Signal, 1)
	signal.Notify(c, os.Interrupt, syscall.SIGTERM, os.Kill)

	s := proxy.New(addr, httpAddr, conf)
	defer s.Close()

	stats.PublishJSONFunc("router", func() string {
		var m = make(map[string]interface{})
		m["ops"] = router.OpCounts()
		m["cmds"] = router.GetAllOpStats()
		m["info"] = s.Info()
		m["build"] = map[string]interface{}{
			"version": utils.Version,
			"compile": utils.Compile,
		}
		b, _ := json.Marshal(m)
		return string(b)
	})

	go func() {
		<-c
		log.Info("ctrl-c or SIGTERM found, bye bye...")
		s.Close()
	}()

	time.Sleep(time.Second)
	if err := s.SetMyselfOnline(); err != nil {
		log.WarnError(err, "mark myself online fail, you need mark online manually by dashboard")
	}

	s.Join()
	log.Infof("proxy exit!! :(")
}
Пример #30
0
func (t *MigrateTask) migrateSingleSlot(slotId int, to int) error {
	// set slot status
	s, err := models.GetSlot(t.zkConn, t.productName, slotId)
	if err != nil {
		log.ErrorErrorf(err, "get slot info failed")
		return err
	}
	if s.State.Status == models.SLOT_STATUS_OFFLINE {
		log.Warnf("status is offline: %+v", s)
		return nil
	}

	from := s.GroupId
	if s.State.Status == models.SLOT_STATUS_MIGRATE {
		from = s.State.MigrateStatus.From
	}

	// make sure from group & target group exists
	exists, err := models.GroupExists(t.zkConn, t.productName, from)
	if err != nil {
		return errors.Trace(err)
	}
	if !exists {
		log.Errorf("src group %d not exist when migrate from %d to %d", from, from, to)
		return errors.Errorf("group %d not found", from)
	}

	exists, err = models.GroupExists(t.zkConn, t.productName, to)
	if err != nil {
		return errors.Trace(err)
	}
	if !exists {
		return errors.Errorf("group %d not found", to)
	}

	// cannot migrate to itself, just ignore
	if from == to {
		log.Warnf("from == to, ignore: %+v", s)
		return nil
	}

	// modify slot status
	if err := s.SetMigrateStatus(t.zkConn, from, to); err != nil {
		log.ErrorErrorf(err, "set migrate status failed")
		return err
	}

	err = t.Migrate(s, from, to, func(p SlotMigrateProgress) {
		// on migrate slot progress
		if p.Remain%5000 == 0 {
			log.Infof("%+v", p)
		}
	})
	if err != nil {
		log.ErrorErrorf(err, "migrate slot failed")
		return err
	}

	// migrate done, change slot status back
	s.State.Status = models.SLOT_STATUS_ONLINE
	s.State.MigrateStatus.From = models.INVALID_ID
	s.State.MigrateStatus.To = models.INVALID_ID
	if err := s.Update(t.zkConn); err != nil {
		log.ErrorErrorf(err, "update zk status failed, should be: %+v", s)
		return err
	}
	return nil
}