// experimental simple auto rebalance :) func Rebalance() error { targetQuota, err := getQuotaMap(safeZkConn) if err != nil { return errors.Trace(err) } livingNodes, err := getLivingNodeInfos(safeZkConn) if err != nil { return errors.Trace(err) } log.Infof("start rebalance") for _, node := range livingNodes { for len(node.CurSlots) > targetQuota[node.GroupId] { for _, dest := range livingNodes { if dest.GroupId != node.GroupId && len(dest.CurSlots) < targetQuota[dest.GroupId] { slot := node.CurSlots[len(node.CurSlots)-1] // create a migration task info := &MigrateTaskInfo{ Delay: 0, SlotId: slot, NewGroupId: dest.GroupId, Status: MIGRATE_TASK_PENDING, CreateAt: strconv.FormatInt(time.Now().Unix(), 10), } globalMigrateManager.PostTask(info) node.CurSlots = node.CurSlots[0 : len(node.CurSlots)-1] dest.CurSlots = append(dest.CurSlots, slot) } } } } log.Infof("rebalance tasks submit finish") return nil }
/* * 对后端返回的ASK,重新向正确的后端发送 * r : 已经返回ASK的请求 */ func (s *Session) handleAskRequest(r *Request) (*Request, error) { // reset response r.Response.Resp, r.Response.Err = nil, nil usnow := microseconds() s.LastOpUnix = usnow / 1e6 s.Ops++ // get hash key of the request hkey := getHashKey(r.Resp, r.OpStr) log.Infof("ASK: use key: %s to choose backend", string(hkey)) // create a asking request to backend nr := &Request{ OpStr: "ASKING", Start: usnow, Resp: redis.NewString([]byte("ASKING")), Wait: &sync.WaitGroup{}, Failed: &s.failed, } nr, err := s.handleRequestAsking(nr, hkey) log.Infof("ASK: after dispatch asking request, waiting. error=%s", err) if err != nil { return r, err } else { s.GetTasks() <- nr } // resend the r to backend err = s.GetDispather().Dispatch(r) if err == nil { s.GetTasks() <- r } return r, err }
func (s *Server) waitOnline() bool { for { info, err := s.topo.GetProxyInfo(s.info.Id) if err != nil { log.PanicErrorf(err, "get proxy info failed: %s", s.info.Id) } switch info.State { case models.PROXY_STATE_MARK_OFFLINE: log.Infof("mark offline, proxy got offline event: %s", s.info.Id) s.markOffline() return false case models.PROXY_STATE_ONLINE: s.info.State = info.State log.Infof("we are online: %s", s.info.Id) s.rewatchProxy() return true } select { case <-s.kill: log.Infof("mark offline, proxy is killed: %s", s.info.Id) s.markOffline() return false default: } log.Infof("wait to be online: %s", s.info.Id) time.Sleep(3 * time.Second) } }
func (s *Server) loopEvents() { ticker := time.NewTicker(time.Second) defer ticker.Stop() var tick int = 0 for s.info.State == models.PROXY_STATE_ONLINE { select { case <-s.kill: log.Infof("mark offline, proxy is killed: %s", s.info.Id) s.markOffline() case e := <-s.evtbus: evtPath := getEventPath(e) log.Infof("got event %s, %v, lastActionSeq %d", s.info.Id, e, s.lastActionSeq) if strings.Index(evtPath, models.GetActionResponsePath(s.conf.productName)) == 0 { seq, err := strconv.Atoi(path.Base(evtPath)) if err != nil { log.ErrorErrorf(err, "parse action seq failed") } else { if seq < s.lastActionSeq { log.Infof("ignore seq = %d", seq) continue } } } s.processAction(e) case <-ticker.C: if maxTick := s.conf.pingPeriod; maxTick != 0 { if tick++; tick >= maxTick { s.router.KeepAlive() tick = 0 } } } } }
func (t *MigrateTask) run() error { log.Infof("migration start: %+v", t.MigrateTaskInfo) to := t.NewGroupId t.UpdateStatus(MIGRATE_TASK_MIGRATING) err := t.migrateSingleSlot(t.SlotId, to) if err != nil { log.ErrorErrorf(err, "migrate single slot failed") t.UpdateStatus(MIGRATE_TASK_ERR) t.rollbackPremigrate() return err } t.UpdateFinish() log.Infof("migration finished: %+v", t.MigrateTaskInfo) return nil }
func (s *Session) handleRequestAsking(r *Request, extra []byte) (*Request, error) { log.Infof("ASK: handling asking request to backend") if err := s.GetDispather().DispatchAsking(r, extra); err != nil { return nil, err } return r, nil }
func TestProxyOfflineInWaitActionReceiver(t *testing.T) { log.Infof("test proxy offline when waiting action response") fakeZkConn := zkhelper.NewConn() for i := 1; i <= 4; i++ { CreateProxyInfo(fakeZkConn, productName, &ProxyInfo{ Id: strconv.Itoa(i), State: PROXY_STATE_ONLINE, }) go waitForProxyMarkOffline(fakeZkConn, strconv.Itoa(i)) } lst, _ := ProxyList(fakeZkConn, productName, nil) assert.Must(len(lst) == 4) go func() { time.Sleep(500 * time.Millisecond) actionPath := path.Join(GetActionResponsePath(productName), fakeZkConn.Seq2Str(1)) //create test response for proxy 4, means proxy 1,2,3 are timeout fakeZkConn.Create(path.Join(actionPath, "4"), nil, 0, zkhelper.DefaultFileACLs()) }() err := NewActionWithTimeout(fakeZkConn, productName, ACTION_TYPE_SLOT_CHANGED, nil, "desc", true, 3*1000) if err != nil { assert.Must(err.Error() == ErrReceiverTimeout.Error()) } for i := 1; i <= 3; i++ { info, _ := GetProxyInfo(fakeZkConn, productName, strconv.Itoa(i)) assert.Must(info.State == PROXY_STATE_OFFLINE) } }
func (s *Server) responseAction(seq int64) { log.Infof("send response seq = %d", seq) err := s.topo.DoResponse(int(seq), &s.info) if err != nil { log.InfoErrorf(err, "send response seq = %d failed", seq) } }
func releaseDashboardNode() { zkPath := fmt.Sprintf("/zk/codis/db_%s/dashboard", globalEnv.ProductName()) if exists, _, _ := safeZkConn.Exists(zkPath); exists { log.Infof("removing dashboard node") safeZkConn.Delete(zkPath, 0) } }
func (s *Server) processAction(e interface{}) { if strings.Index(getEventPath(e), models.GetProxyPath(s.topo.ProductName)) == 0 { info, err := s.topo.GetProxyInfo(s.info.Id) if err != nil { log.PanicErrorf(err, "get proxy info failed: %s", s.info.Id) } switch info.State { case models.PROXY_STATE_MARK_OFFLINE: log.Infof("mark offline, proxy got offline event: %s", s.info.Id) s.markOffline() case models.PROXY_STATE_ONLINE: s.rewatchProxy() default: log.Panicf("unknown proxy state %v", info) } return } //re-watch nodes := s.rewatchNodes() seqs, err := models.ExtraSeqList(nodes) if err != nil { log.PanicErrorf(err, "get seq list failed") } if len(seqs) == 0 || !s.topo.IsChildrenChangedEvent(e) { return } //get last pos index := -1 for i, seq := range seqs { if s.lastActionSeq < seq { index = i break } } if index < 0 { return } actions := seqs[index:] for _, seq := range actions { exist, err := s.topo.Exist(path.Join(s.topo.GetActionResponsePath(seq), s.info.Id)) if err != nil { log.PanicErrorf(err, "get action failed") } if exist { continue } if s.checkAndDoTopoChange(seq) { s.responseAction(int64(seq)) } } s.lastActionSeq = seqs[len(seqs)-1] }
func (bc *BackendConn) Run() { log.Infof("backend conn [%p] to %s, start service", bc, bc.addr) for k := 0; ; k++ { err := bc.loopWriter() if err == nil { break } else { for i := len(bc.input); i != 0; i-- { r := <-bc.input bc.setResponse(r, nil, err) } } log.WarnErrorf(err, "backend conn [%p] to %s, restart [%d]", bc, bc.addr, k) time.Sleep(time.Millisecond * 50) } log.Infof("backend conn [%p] to %s, stop and exit", bc, bc.addr) }
func (s *Server) getActionObject(seq int, target interface{}) { act := &models.Action{Target: target} err := s.topo.GetActionWithSeqObject(int64(seq), act) if err != nil { log.PanicErrorf(err, "get action object failed, seq = %d", seq) } log.Infof("action %+v", act) }
func (s *Server) onGroupChange(groupId int) { log.Infof("group changed %d", groupId) for i, g := range s.groups { if g == groupId { s.fillSlot(i) } } }
func NewSessionSize(c net.Conn, auth string, bufsize int, timeout int) *Session { s := &Session{CreateUnix: time.Now().Unix(), auth: auth} s.Conn = redis.NewConnSize(c, bufsize) s.Conn.ReaderTimeout = time.Second * time.Duration(timeout) s.Conn.WriterTimeout = time.Second * 30 log.Infof("session [%p] create: %s", s, s) return s }
func New(addr string, debugVarAddr string, conf *Config) *Server { log.Infof("create proxy with config: %+v", conf) proxyHost := strings.Split(addr, ":")[0] debugHost := strings.Split(debugVarAddr, ":")[0] hostname, err := os.Hostname() if err != nil { log.PanicErrorf(err, "get host name failed") } if proxyHost == "0.0.0.0" || strings.HasPrefix(proxyHost, "127.0.0.") || proxyHost == "" { proxyHost = hostname } if debugHost == "0.0.0.0" || strings.HasPrefix(debugHost, "127.0.0.") || debugHost == "" { debugHost = hostname } s := &Server{conf: conf, lastActionSeq: -1, groups: make(map[int]int)} s.topo = NewTopo(conf.productName, conf.zkAddr, conf.fact, conf.provider, conf.zkSessionTimeout) s.info.Id = conf.proxyId s.info.State = models.PROXY_STATE_OFFLINE s.info.Addr = proxyHost + ":" + strings.Split(addr, ":")[1] s.info.DebugVarAddr = debugHost + ":" + strings.Split(debugVarAddr, ":")[1] s.info.Pid = os.Getpid() s.info.StartAt = time.Now().String() s.kill = make(chan interface{}) log.Infof("proxy info = %+v", s.info) if l, err := net.Listen(conf.proto, addr); err != nil { log.PanicErrorf(err, "open listener failed") } else { s.listener = l } s.router = router.NewWithAuth(conf.passwd) s.evtbus = make(chan interface{}, 1024) s.register() s.wait.Add(1) go func() { defer s.wait.Done() s.serve() }() return s }
func (s *Slot) prepareAsk(r *Request) (*SharedBackendConn, error) { if s.migrate.bc == nil { log.Infof("ASK: slot-%04d is not migrating", s.id) return nil, ErrSlotIsNotMigrating } r.slot = &s.wait r.slot.Add(1) return s.migrate.bc, nil }
func (t *MigrateTask) rollbackPremigrate() { if s, err := models.GetSlot(t.zkConn, t.productName, t.SlotId); err == nil && s.State.Status == models.SLOT_STATUS_PRE_MIGRATE { s.State.Status = models.SLOT_STATUS_ONLINE err = s.Update(t.zkConn) if err != nil { log.Warn("rollback premigrate failed", err) } else { log.Infof("rollback slot %d from premigrate to online\n", s.Id) } } }
func (s *Server) onSlotRangeChange(param *models.SlotMultiSetParam) { log.Infof("slotRangeChange %+v", param) for i := param.From; i <= param.To; i++ { switch param.Status { case models.SLOT_STATUS_OFFLINE: s.resetSlot(i) case models.SLOT_STATUS_ONLINE: s.fillSlot(i) default: log.Panicf("can not handle status %v", param.Status) } } }
func (s *Router) fillSlot(i int, addr, from string, lock bool) { if !s.isValidSlot(i) { return } slot := s.slots[i] slot.blockAndWait() s.putBackendConn(slot.backend.bc) s.putBackendConn(slot.migrate.bc) slot.reset() if len(addr) != 0 { xx := strings.Split(addr, ":") if len(xx) >= 1 { slot.backend.host = []byte(xx[0]) } if len(xx) >= 2 { slot.backend.port = []byte(xx[1]) } slot.backend.addr = addr slot.backend.bc = s.getBackendConn(addr) } if len(from) != 0 { slot.migrate.from = from slot.migrate.bc = s.getBackendConn(from) } if !lock { slot.unblock() } if slot.migrate.bc != nil { log.Infof("fill slot %04d, backend.addr = %s, migrate.from = %s", i, slot.backend.addr, slot.migrate.from) } else { log.Infof("fill slot %04d, backend.addr = %s", i, slot.backend.addr) } }
func (s *Session) Serve(d Dispatcher, maxPipeline int) { if d == nil { log.Infof("dispatcher is nil") return } // set dispacher for this session s.SetDispather(d) var errlist errors.ErrorList defer func() { if err := errlist.First(); err != nil { log.Infof("session [%p] closed: %s, error = %s", s, s, err) } else { log.Infof("session [%p] closed: %s, quit", s, s) } }() tasks := make(chan *Request, maxPipeline) s.SetTasks(tasks) go func() { defer func() { s.Close() for _ = range tasks { } }() if err := s.loopWriter(tasks); err != nil { errlist.PushBack(err) } }() defer func(tasks chan<- *Request) { close(tasks) s.SetTasks(nil) }(tasks) if err := s.loopReader(tasks); err != nil { errlist.PushBack(err) } }
func (s *Session) handleRequest(resp *redis.Resp) (*Request, error) { opstr, err := getOpStr(resp) log.Infof("In handleRequest, opstr is %s", string(opstr)) if err != nil { return nil, err } if isNotAllowed(opstr) { return nil, errors.New(fmt.Sprintf("command <%s> is not allowed", opstr)) } usnow := microseconds() s.LastOpUnix = usnow / 1e6 s.Ops++ r := &Request{ OpStr: opstr, Start: usnow, Resp: resp, Wait: &sync.WaitGroup{}, Failed: &s.failed, } if opstr == "QUIT" { return s.handleQuit(r) } if opstr == "AUTH" { return s.handleAuth(r) } if !s.authorized { if s.auth != "" { r.Response.Resp = redis.NewError([]byte("NOAUTH Authentication required.")) return r, nil } s.authorized = true } switch opstr { case "SELECT": return s.handleSelect(r) case "PING": return s.handlePing(r) case "MGET": return s.handleRequestMGet(r) case "MSET": return s.handleRequestMSet(r) case "DEL": return s.handleRequestMDel(r) } return r, s.GetDispather().Dispatch(r) }
func (s *Session) loopWriter(tasks <-chan *Request) error { p := &FlushPolicy{ Encoder: s.Writer, MaxBuffered: 32, MaxInterval: 300, } for r := range tasks { resp, err := s.handleResponse(r) log.Infof("receive a response %s", string(r.Response.Resp.Value)) // 当resp返回为ASK,需要重新请求后端 if r.isAskResp() { log.Infof("ASK: handle -ASK req:response %s:%s", string(r.Resp.Value), string(r.Response.Resp.Value)) if _, err = s.handleAskRequest(r); err != nil { log.Infof("ASK: handleAskRequest error %s", err) return err } // 重新处理请求的返回 log.Infof("ASK: receive new response") resp, err = s.handleResponse(r) log.Infof("ASK: new response %s", string(r.Response.Resp.Value)) } // 如果该请求为proxy主动发送的请求,将response swallow if r.isAskingReq() { return nil } if err != nil { return err } if err = p.Encode(resp, len(tasks) == 0); err != nil { return err } } return nil }
func (s *Slot) prepare(r *Request, key []byte) (*SharedBackendConn, error) { if s.backend.bc == nil { log.Infof("slot-%04d is not ready: key = %s", s.id, key) return nil, ErrSlotIsNotReady } if err := s.slotsmgrt(r, key); err != nil { log.Warnf("slot-%04d migrate from = %s to %s failed: key = %s, error = %s", s.id, s.migrate.from, s.backend.addr, key, err) return nil, err } else { r.slot = &s.wait r.slot.Add(1) return s.backend.bc, nil } }
func setLogLevel(level string) { var lv = log.LEVEL_INFO switch strings.ToLower(level) { case "error": lv = log.LEVEL_ERROR case "warn", "warning": lv = log.LEVEL_WARN case "debug": lv = log.LEVEL_DEBUG case "info": fallthrough default: lv = log.LEVEL_INFO } log.SetLevel(lv) log.Infof("set log level to %s", lv) }
func setLogLevel(level string) { level = strings.ToLower(level) var l = log.LEVEL_INFO switch level { case "error": l = log.LEVEL_ERROR case "warn", "warning": l = log.LEVEL_WARN case "debug": l = log.LEVEL_DEBUG case "info": fallthrough default: level = "info" l = log.LEVEL_INFO } log.SetLevel(l) log.Infof("set log level to <%s>", level) }
func createDashboardNode() error { // make sure root dir is exists rootDir := fmt.Sprintf("/zk/codis/db_%s", globalEnv.ProductName()) zkhelper.CreateRecursive(safeZkConn, rootDir, "", 0, zkhelper.DefaultDirACLs()) zkPath := fmt.Sprintf("%s/dashboard", rootDir) // make sure we're the only one dashboard if exists, _, _ := safeZkConn.Exists(zkPath); exists { data, _, _ := safeZkConn.Get(zkPath) return errors.New("dashboard already exists: " + string(data)) } content := fmt.Sprintf(`{"addr": "%v", "pid": %v}`, globalEnv.DashboardAddr(), os.Getpid()) pathCreated, err := safeZkConn.Create(zkPath, []byte(content), 0, zkhelper.DefaultFileACLs()) createdDashboardNode = true log.Infof("dashboard node created: %v, %s", pathCreated, string(content)) log.Warn("********** Attention **********") log.Warn("You should use `kill {pid}` rather than `kill -9 {pid}` to stop me,") log.Warn("or the node resisted on zk will not be cleaned when I'm quiting and you must remove it manually") log.Warn("*******************************") return errors.Trace(err) }
func runDashboard(addr string, httpLogFile string) { log.Infof("dashboard listening on addr: %s", addr) m := martini.Classic() f, err := os.OpenFile(httpLogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { log.PanicErrorf(err, "open http log file failed") } defer f.Close() m.Map(stdlog.New(f, "[martini]", stdlog.LstdFlags)) binRoot, err := filepath.Abs(filepath.Dir(os.Args[0])) if err != nil { log.PanicErrorf(err, "get binroot path failed") } m.Use(martini.Static(filepath.Join(binRoot, "assets/statics"))) m.Use(render.Renderer(render.Options{ Directory: filepath.Join(binRoot, "assets/template"), Extensions: []string{".tmpl", ".html"}, Charset: "UTF-8", IndentJSON: true, })) m.Use(cors.Allow(&cors.Options{ AllowOrigins: []string{"*"}, AllowMethods: []string{"POST", "GET", "DELETE", "PUT"}, AllowHeaders: []string{"Origin", "x-requested-with", "Content-Type", "Content-Range", "Content-Disposition", "Content-Description"}, ExposeHeaders: []string{"Content-Length"}, AllowCredentials: false, })) m.Get("/api/server_groups", apiGetServerGroupList) m.Get("/api/overview", apiOverview) m.Get("/api/redis/:addr/stat", apiRedisStat) m.Get("/api/redis/:addr/:id/slotinfo", apiGetRedisSlotInfo) m.Get("/api/redis/group/:group_id/:slot_id/slotinfo", apiGetRedisSlotInfoFromGroupId) m.Put("/api/server_groups", binding.Json(models.ServerGroup{}), apiAddServerGroup) m.Put("/api/server_group/(?P<id>[0-9]+)/addServer", binding.Json(models.Server{}), apiAddServerToGroup) m.Delete("/api/server_group/(?P<id>[0-9]+)", apiRemoveServerGroup) m.Put("/api/server_group/(?P<id>[0-9]+)/removeServer", binding.Json(models.Server{}), apiRemoveServerFromGroup) m.Get("/api/server_group/(?P<id>[0-9]+)", apiGetServerGroup) m.Post("/api/server_group/(?P<id>[0-9]+)/promote", binding.Json(models.Server{}), apiPromoteServer) m.Get("/api/migrate/status", apiMigrateStatus) m.Get("/api/migrate/tasks", apiGetMigrateTasks) m.Post("/api/migrate", binding.Json(migrateTaskForm{}), apiDoMigrate) m.Post("/api/rebalance", apiRebalance) m.Get("/api/slot/list", apiGetSlots) m.Get("/api/slot/:id", apiGetSingleSlot) m.Post("/api/slots/init", apiInitSlots) m.Get("/api/slots", apiGetSlots) m.Post("/api/slot", binding.Json(RangeSetTask{}), apiSlotRangeSet) m.Get("/api/proxy/list", apiGetProxyList) m.Get("/api/proxy/debug/vars", apiGetProxyDebugVars) m.Post("/api/proxy", binding.Json(models.ProxyInfo{}), apiSetProxyStatus) m.Get("/api/action/gc", apiActionGC) m.Get("/api/force_remove_locks", apiForceRemoveLocks) m.Get("/api/remove_fence", apiRemoveFence) m.Get("/slots", pageSlots) m.Get("/", func(r render.Render) { r.Redirect("/admin") }) zkBuilder := utils.NewConnBuilder(globalEnv.NewZkConn) safeZkConn = zkBuilder.GetSafeConn() unsafeZkConn = zkBuilder.GetUnsafeConn() // create temp node in ZK if err := createDashboardNode(); err != nil { log.PanicErrorf(err, "create zk node failed") // do not release dashborad node here } // create long live migrate manager globalMigrateManager = NewMigrateManager(safeZkConn, globalEnv.ProductName()) go func() { tick := time.Tick(time.Second) var lastCnt, qps int64 for _ = range tick { cnt := getAllProxyOps() if cnt > 0 { qps = cnt - lastCnt lastCnt = cnt } else { qps = 0 } atomic.StoreInt64(&proxiesSpeed, qps) } }() m.RunOnAddr(addr) }
func SetProxyStatus(zkConn zkhelper.Conn, productName string, proxyName string, status string) error { p, err := GetProxyInfo(zkConn, productName, proxyName) if err != nil { return errors.Trace(err) } if status != PROXY_STATE_ONLINE && status != PROXY_STATE_MARK_OFFLINE && status != PROXY_STATE_OFFLINE { return errors.Errorf("%v, %s", ErrUnknownProxyStatus, status) } // check slot status before setting proxy online if status == PROXY_STATE_ONLINE { slots, err := Slots(zkConn, productName) if err != nil { return errors.Trace(err) } for _, slot := range slots { if slot.State.Status != SLOT_STATUS_ONLINE && slot.State.Status != SLOT_STATUS_MIGRATE { return errors.Errorf("slot %v is not online or migrate", slot) } if slot.GroupId == INVALID_ID { return errors.Errorf("slot %v has invalid group id", slot) } } } p.State = status b, _ := json.Marshal(p) _, err = zkConn.Set(path.Join(GetProxyPath(productName), proxyName), b, -1) if err != nil { return errors.Trace(err) } if status == PROXY_STATE_MARK_OFFLINE { // wait for the proxy down for { _, _, c, err := zkConn.GetW(path.Join(GetProxyPath(productName), proxyName)) if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { return nil } else if err != nil { return errors.Trace(err) } <-c info, err := GetProxyInfo(zkConn, productName, proxyName) log.Info("mark_offline, check proxy status:", proxyName, info, err) if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { log.Info("shutdown proxy successful") return nil } else if err != nil { return errors.Trace(err) } if info.State == PROXY_STATE_OFFLINE { log.Infof("proxy: %s offline success!", proxyName) return nil } } } return nil }
func main() { fmt.Print(banner) args, err := docopt.Parse(usage, nil, true, "codis proxy v0.1", true) if err != nil { fmt.Println(err) os.Exit(1) } // set config file if args["-c"] != nil { configFile = args["-c"].(string) } var maxFileFrag = 10 var maxFragSize int64 = bytesize.GB * 1 if s, ok := args["--log-filesize"].(string); ok && s != "" { v, err := bytesize.Parse(s) if err != nil { log.PanicErrorf(err, "invalid max log file size = %s", s) } maxFragSize = v } // set output log file if s, ok := args["-L"].(string); ok && s != "" { f, err := log.NewRollingFile(s, maxFileFrag, maxFragSize) if err != nil { log.PanicErrorf(err, "open rolling log file failed: %s", s) } else { defer f.Close() log.StdLog = log.New(f, "") } } log.SetLevel(log.LEVEL_INFO) log.SetFlags(log.Flags() | log.Lshortfile) // set log level if s, ok := args["--log-level"].(string); ok && s != "" { setLogLevel(s) } cpus = runtime.NumCPU() // set cpu if args["--cpu"] != nil { cpus, err = strconv.Atoi(args["--cpu"].(string)) if err != nil { log.PanicErrorf(err, "parse cpu number failed") } } // set addr if args["--addr"] != nil { addr = args["--addr"].(string) } // set http addr if args["--http-addr"] != nil { httpAddr = args["--http-addr"].(string) } checkUlimit(1024) runtime.GOMAXPROCS(cpus) http.HandleFunc("/setloglevel", handleSetLogLevel) go func() { err := http.ListenAndServe(httpAddr, nil) log.PanicError(err, "http debug server quit") }() log.Info("running on ", addr) conf, err := proxy.LoadConf(configFile) if err != nil { log.PanicErrorf(err, "load config failed") } c := make(chan os.Signal, 1) signal.Notify(c, os.Interrupt, syscall.SIGTERM, os.Kill) s := proxy.New(addr, httpAddr, conf) defer s.Close() stats.PublishJSONFunc("router", func() string { var m = make(map[string]interface{}) m["ops"] = router.OpCounts() m["cmds"] = router.GetAllOpStats() m["info"] = s.Info() m["build"] = map[string]interface{}{ "version": utils.Version, "compile": utils.Compile, } b, _ := json.Marshal(m) return string(b) }) go func() { <-c log.Info("ctrl-c or SIGTERM found, bye bye...") s.Close() }() time.Sleep(time.Second) if err := s.SetMyselfOnline(); err != nil { log.WarnError(err, "mark myself online fail, you need mark online manually by dashboard") } s.Join() log.Infof("proxy exit!! :(") }
func (t *MigrateTask) migrateSingleSlot(slotId int, to int) error { // set slot status s, err := models.GetSlot(t.zkConn, t.productName, slotId) if err != nil { log.ErrorErrorf(err, "get slot info failed") return err } if s.State.Status == models.SLOT_STATUS_OFFLINE { log.Warnf("status is offline: %+v", s) return nil } from := s.GroupId if s.State.Status == models.SLOT_STATUS_MIGRATE { from = s.State.MigrateStatus.From } // make sure from group & target group exists exists, err := models.GroupExists(t.zkConn, t.productName, from) if err != nil { return errors.Trace(err) } if !exists { log.Errorf("src group %d not exist when migrate from %d to %d", from, from, to) return errors.Errorf("group %d not found", from) } exists, err = models.GroupExists(t.zkConn, t.productName, to) if err != nil { return errors.Trace(err) } if !exists { return errors.Errorf("group %d not found", to) } // cannot migrate to itself, just ignore if from == to { log.Warnf("from == to, ignore: %+v", s) return nil } // modify slot status if err := s.SetMigrateStatus(t.zkConn, from, to); err != nil { log.ErrorErrorf(err, "set migrate status failed") return err } err = t.Migrate(s, from, to, func(p SlotMigrateProgress) { // on migrate slot progress if p.Remain%5000 == 0 { log.Infof("%+v", p) } }) if err != nil { log.ErrorErrorf(err, "migrate slot failed") return err } // migrate done, change slot status back s.State.Status = models.SLOT_STATUS_ONLINE s.State.MigrateStatus.From = models.INVALID_ID s.State.MigrateStatus.To = models.INVALID_ID if err := s.Update(t.zkConn); err != nil { log.ErrorErrorf(err, "update zk status failed, should be: %+v", s) return err } return nil }