func (s *Server) processAction(e interface{}) { if strings.Index(getEventPath(e), models.GetProxyPath(s.topo.ProductName)) == 0 { info, err := s.topo.GetProxyInfo(s.info.Id) if err != nil { log.PanicErrorf(err, "get proxy info failed: %s", s.info.Id) } switch info.State { case models.PROXY_STATE_MARK_OFFLINE: log.Infof("mark offline, proxy got offline event: %s", s.info.Id) s.markOffline() case models.PROXY_STATE_ONLINE: s.rewatchProxy() default: log.Panicf("unknown proxy state %v", info) } return } //re-watch nodes := s.rewatchNodes() seqs, err := models.ExtraSeqList(nodes) if err != nil { log.PanicErrorf(err, "get seq list failed") } if len(seqs) == 0 || !s.topo.IsChildrenChangedEvent(e) { return } //get last pos index := -1 for i, seq := range seqs { if s.lastActionSeq < seq { index = i break } } if index < 0 { return } actions := seqs[index:] for _, seq := range actions { exist, err := s.topo.Exist(path.Join(s.topo.GetActionResponsePath(seq), s.info.Id)) if err != nil { log.PanicErrorf(err, "get action failed") } if exist { continue } if s.checkAndDoTopoChange(seq) { s.responseAction(int64(seq)) } } s.lastActionSeq = seqs[len(seqs)-1] }
func (s *Server) register() { if _, err := s.topo.CreateProxyInfo(&s.info); err != nil { log.PanicErrorf(err, "create proxy node failed") } if _, err := s.topo.CreateProxyFenceNode(&s.info); err != nil && err != zk.ErrNodeExists { log.PanicErrorf(err, "create fence node failed") } log.Warn("********** Attention **********") log.Warn("You should use `kill {pid}` rather than `kill -9 {pid}` to stop me,") log.Warn("or the node resisted on zk will not be cleaned when I'm quiting and you must remove it manually") log.Warn("*******************************") }
func (s *Server) checkAndDoTopoChange(seq int) bool { act, err := s.topo.GetActionWithSeq(int64(seq)) if err != nil { //todo: error is not "not exist" log.PanicErrorf(err, "action failed, seq = %d", seq) } if !needResponse(act.Receivers, s.info) { //no need to response return false } log.Warnf("action %v receivers %v", seq, act.Receivers) switch act.Type { case models.ACTION_TYPE_SLOT_MIGRATE, models.ACTION_TYPE_SLOT_CHANGED, models.ACTION_TYPE_SLOT_PREMIGRATE: slot := &models.Slot{} s.getActionObject(seq, slot) s.fillSlot(slot.Id) case models.ACTION_TYPE_SERVER_GROUP_CHANGED: serverGroup := &models.ServerGroup{} s.getActionObject(seq, serverGroup) s.onGroupChange(serverGroup.Id) case models.ACTION_TYPE_SERVER_GROUP_REMOVE: //do not care case models.ACTION_TYPE_MULTI_SLOT_CHANGED: param := &models.SlotMultiSetParam{} s.getActionObject(seq, param) s.onSlotRangeChange(param) default: log.Panicf("unknown action %+v", act) } return true }
func (s *Server) waitOnline() bool { for { info, err := s.topo.GetProxyInfo(s.info.Id) if err != nil { log.PanicErrorf(err, "get proxy info failed: %s", s.info.Id) } switch info.State { case models.PROXY_STATE_MARK_OFFLINE: log.Infof("mark offline, proxy got offline event: %s", s.info.Id) s.markOffline() return false case models.PROXY_STATE_ONLINE: s.info.State = info.State log.Infof("we are online: %s", s.info.Id) s.rewatchProxy() return true } select { case <-s.kill: log.Infof("mark offline, proxy is killed: %s", s.info.Id) s.markOffline() return false default: } log.Infof("wait to be online: %s", s.info.Id) time.Sleep(3 * time.Second) } }
func (s *Server) rewatchNodes() []string { nodes, err := s.topo.WatchChildren(models.GetWatchActionPath(s.topo.ProductName), s.evtbus) if err != nil { log.PanicErrorf(err, "watch children failed") } return nodes }
func (top *Topology) InitZkConn() { var err error top.zkConn, err = top.fact(top.zkAddr, top.zkSessionTimeout) if err != nil { log.PanicErrorf(err, "init failed") } }
func (s *Server) getActionObject(seq int, target interface{}) { act := &models.Action{Target: target} err := s.topo.GetActionWithSeqObject(int64(seq), act) if err != nil { log.PanicErrorf(err, "get action object failed, seq = %d", seq) } log.Infof("action %+v", act) }
func GetExecutorPath() string { filedirectory := filepath.Dir(os.Args[0]) execPath, err := filepath.Abs(filedirectory) if err != nil { log.PanicErrorf(err, "get executor path failed") } return execPath }
func New(addr string, debugVarAddr string, conf *Config) *Server { log.Infof("create proxy with config: %+v", conf) proxyHost := strings.Split(addr, ":")[0] debugHost := strings.Split(debugVarAddr, ":")[0] hostname, err := os.Hostname() if err != nil { log.PanicErrorf(err, "get host name failed") } if proxyHost == "0.0.0.0" || strings.HasPrefix(proxyHost, "127.0.0.") || proxyHost == "" { proxyHost = hostname } if debugHost == "0.0.0.0" || strings.HasPrefix(debugHost, "127.0.0.") || debugHost == "" { debugHost = hostname } s := &Server{conf: conf, lastActionSeq: -1, groups: make(map[int]int)} s.topo = NewTopo(conf.productName, conf.zkAddr, conf.fact, conf.provider, conf.zkSessionTimeout) s.info.Id = conf.proxyId s.info.State = models.PROXY_STATE_OFFLINE s.info.Addr = proxyHost + ":" + strings.Split(addr, ":")[1] s.info.DebugVarAddr = debugHost + ":" + strings.Split(debugVarAddr, ":")[1] s.info.Pid = os.Getpid() s.info.StartAt = time.Now().String() s.kill = make(chan interface{}) log.Infof("proxy info = %+v", s.info) if l, err := net.Listen(conf.proto, addr); err != nil { log.PanicErrorf(err, "open listener failed") } else { s.listener = l } s.router = router.NewWithAuth(conf.passwd) s.evtbus = make(chan interface{}, 1024) s.register() s.wait.Add(1) go func() { defer s.wait.Done() s.serve() }() return s }
func (c *safeConn) Delete(path string, version int32) (err error) { for i := 0; i <= retryMaxOnOps; i++ { c.builder.lock.RLock() err = c.Conn.Delete(path, version) c.builder.lock.RUnlock() if err == nil || !isConnectionError(err) { return } c.builder.resetConnection() } log.PanicErrorf(err, "zk error after retries") return }
func (c *safeConn) Create(path string, value []byte, flags int32, aclv []zk.ACL) (pathCreated string, err error) { for i := 0; i <= retryMaxOnOps; i++ { c.builder.lock.RLock() pathCreated, err = c.Conn.Create(path, value, flags, aclv) c.builder.lock.RUnlock() if err == nil || !isConnectionError(err) { return } c.builder.resetConnection() } log.PanicErrorf(err, "zk error after retries") return }
func (c *safeConn) ExistsW(path string) (exist bool, stat zk.Stat, watch <-chan zk.Event, err error) { for i := 0; i <= retryMaxOnOps; i++ { c.builder.lock.RLock() exist, stat, watch, err = c.Conn.ExistsW(path) c.builder.lock.RUnlock() if err == nil || !isConnectionError(err) { return } c.builder.resetConnection() } log.PanicErrorf(err, "zk error after retries") return }
func (c *safeConn) Children(path string) (children []string, stat zk.Stat, err error) { for i := 0; i <= retryMaxOnOps; i++ { c.builder.lock.RLock() children, stat, err = c.Conn.Children(path) c.builder.lock.RUnlock() if err == nil || !isConnectionError(err) { return } c.builder.resetConnection() } log.PanicErrorf(err, "zk error after retries") return }
func (c *safeConn) Get(path string) (data []byte, stat zk.Stat, err error) { for i := 0; i <= retryMaxOnOps; i++ { c.builder.lock.RLock() data, stat, err = c.Conn.Get(path) c.builder.lock.RUnlock() if err == nil || !isConnectionError(err) { return } c.builder.resetConnection() } log.PanicErrorf(err, "zk error after retries") return }
func (c *safeConn) SetACL(path string, aclv []zk.ACL, version int32) (stat zk.Stat, err error) { for i := 0; i <= retryMaxOnOps; i++ { c.builder.lock.RLock() stat, err = c.Conn.SetACL(path, aclv, version) c.builder.lock.RUnlock() if err == nil || !isConnectionError(err) { return } c.builder.resetConnection() } log.PanicErrorf(err, "zk error after retries") return }
func (s *Server) fillSlot(i int) { slotInfo, slotGroup, err := s.topo.GetSlotByIndex(i) if err != nil { log.PanicErrorf(err, "get slot by index failed", i) } var from string var addr = groupMaster(*slotGroup) if slotInfo.State.Status == models.SLOT_STATUS_MIGRATE { fromGroup, err := s.topo.GetGroup(slotInfo.State.MigrateStatus.From) if err != nil { log.PanicErrorf(err, "get migrate from failed") } from = groupMaster(*fromGroup) if from == addr { log.Panicf("set slot %04d migrate from %s to %s", i, from, addr) } } s.groups[i] = slotInfo.GroupId s.router.FillSlot(i, addr, from, slotInfo.State.Status == models.SLOT_STATUS_PRE_MIGRATE) }
func LoadCodisEnv(cfg *cfg.Cfg) Env { if cfg == nil { log.Panicf("config is nil") } productName, err := cfg.ReadString("product", "test") if err != nil { log.PanicErrorf(err, "config: 'product' not found") } zkAddr, err := cfg.ReadString("zk", "localhost:2181") if err != nil { log.PanicErrorf(err, "config: 'zk' not found") } hostname, _ := os.Hostname() dashboardAddr, err := cfg.ReadString("dashboard_addr", hostname+":18087") if err != nil { log.PanicErrorf(err, "config: 'dashboard_addr' not found") } provider, err := cfg.ReadString("coordinator", "zookeeper") if err != nil { log.PanicErrorf(err, "config: 'coordinator' not found") } passwd, _ := cfg.ReadString("password", "") return &CodisEnv{ zkAddr: zkAddr, passwd: passwd, dashboardAddr: dashboardAddr, productName: productName, provider: provider, } }
func LoadConf(configFile string) (*Config, error) { c := cfg.NewCfg(configFile) if err := c.Load(); err != nil { log.PanicErrorf(err, "load config '%s' failed", configFile) } conf := &Config{} conf.productName, _ = c.ReadString("product", "test") if len(conf.productName) == 0 { log.Panicf("invalid config: product entry is missing in %s", configFile) } conf.dashboardAddr, _ = c.ReadString("dashboard_addr", "") if conf.dashboardAddr == "" { log.Panicf("invalid config: dashboard_addr is missing in %s", configFile) } conf.zkAddr, _ = c.ReadString("zk", "") if len(conf.zkAddr) == 0 { log.Panicf("invalid config: need zk entry is missing in %s", configFile) } conf.zkAddr = strings.TrimSpace(conf.zkAddr) conf.passwd, _ = c.ReadString("password", "") conf.proxyId, _ = c.ReadString("proxy_id", "") if len(conf.proxyId) == 0 { log.Panicf("invalid config: need proxy_id entry is missing in %s", configFile) } conf.proto, _ = c.ReadString("proto", "tcp") conf.provider, _ = c.ReadString("coordinator", "zookeeper") loadConfInt := func(entry string, defval int) int { v, _ := c.ReadInt(entry, defval) if v < 0 { log.Panicf("invalid config: read %s = %d", entry, v) } return v } conf.pingPeriod = loadConfInt("backend_ping_period", 5) conf.maxTimeout = loadConfInt("session_max_timeout", 1800) conf.maxBufSize = loadConfInt("session_max_bufsize", 131072) conf.maxPipeline = loadConfInt("session_max_pipeline", 1024) conf.zkSessionTimeout = loadConfInt("zk_session_timeout", 30) return conf, nil }
func main() { fmt.Print(banner) args, err := docopt.Parse(usage, nil, true, "codis proxy v0.1", true) if err != nil { fmt.Println(err) os.Exit(1) } // set config file if args["-c"] != nil { configFile = args["-c"].(string) } var maxFileFrag = 10 var maxFragSize int64 = bytesize.GB * 1 if s, ok := args["--log-filesize"].(string); ok && s != "" { v, err := bytesize.Parse(s) if err != nil { log.PanicErrorf(err, "invalid max log file size = %s", s) } maxFragSize = v } // set output log file if s, ok := args["-L"].(string); ok && s != "" { f, err := log.NewRollingFile(s, maxFileFrag, maxFragSize) if err != nil { log.PanicErrorf(err, "open rolling log file failed: %s", s) } else { defer f.Close() log.StdLog = log.New(f, "") } } log.SetLevel(log.LEVEL_INFO) log.SetFlags(log.Flags() | log.Lshortfile) // set log level if s, ok := args["--log-level"].(string); ok && s != "" { setLogLevel(s) } cpus = runtime.NumCPU() // set cpu if args["--cpu"] != nil { cpus, err = strconv.Atoi(args["--cpu"].(string)) if err != nil { log.PanicErrorf(err, "parse cpu number failed") } } // set addr if args["--addr"] != nil { addr = args["--addr"].(string) } // set http addr if args["--http-addr"] != nil { httpAddr = args["--http-addr"].(string) } checkUlimit(1024) runtime.GOMAXPROCS(cpus) http.HandleFunc("/setloglevel", handleSetLogLevel) go func() { err := http.ListenAndServe(httpAddr, nil) log.PanicError(err, "http debug server quit") }() log.Info("running on ", addr) conf, err := proxy.LoadConf(configFile) if err != nil { log.PanicErrorf(err, "load config failed") } c := make(chan os.Signal, 1) signal.Notify(c, os.Interrupt, syscall.SIGTERM, os.Kill) s := proxy.New(addr, httpAddr, conf) defer s.Close() stats.PublishJSONFunc("router", func() string { var m = make(map[string]interface{}) m["ops"] = router.OpCounts() m["cmds"] = router.GetAllOpStats() m["info"] = s.Info() m["build"] = map[string]interface{}{ "version": utils.Version, "compile": utils.Compile, } b, _ := json.Marshal(m) return string(b) }) go func() { <-c log.Info("ctrl-c or SIGTERM found, bye bye...") s.Close() }() time.Sleep(time.Second) if err := s.SetMyselfOnline(); err != nil { log.WarnError(err, "mark myself online fail, you need mark online manually by dashboard") } s.Join() log.Infof("proxy exit!! :(") }
func runDashboard(addr string, httpLogFile string) { log.Infof("dashboard listening on addr: %s", addr) m := martini.Classic() f, err := os.OpenFile(httpLogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { log.PanicErrorf(err, "open http log file failed") } defer f.Close() m.Map(stdlog.New(f, "[martini]", stdlog.LstdFlags)) binRoot, err := filepath.Abs(filepath.Dir(os.Args[0])) if err != nil { log.PanicErrorf(err, "get binroot path failed") } m.Use(martini.Static(filepath.Join(binRoot, "assets/statics"))) m.Use(render.Renderer(render.Options{ Directory: filepath.Join(binRoot, "assets/template"), Extensions: []string{".tmpl", ".html"}, Charset: "UTF-8", IndentJSON: true, })) m.Use(cors.Allow(&cors.Options{ AllowOrigins: []string{"*"}, AllowMethods: []string{"POST", "GET", "DELETE", "PUT"}, AllowHeaders: []string{"Origin", "x-requested-with", "Content-Type", "Content-Range", "Content-Disposition", "Content-Description"}, ExposeHeaders: []string{"Content-Length"}, AllowCredentials: false, })) m.Get("/api/server_groups", apiGetServerGroupList) m.Get("/api/overview", apiOverview) m.Get("/api/redis/:addr/stat", apiRedisStat) m.Get("/api/redis/:addr/:id/slotinfo", apiGetRedisSlotInfo) m.Get("/api/redis/group/:group_id/:slot_id/slotinfo", apiGetRedisSlotInfoFromGroupId) m.Put("/api/server_groups", binding.Json(models.ServerGroup{}), apiAddServerGroup) m.Put("/api/server_group/(?P<id>[0-9]+)/addServer", binding.Json(models.Server{}), apiAddServerToGroup) m.Delete("/api/server_group/(?P<id>[0-9]+)", apiRemoveServerGroup) m.Put("/api/server_group/(?P<id>[0-9]+)/removeServer", binding.Json(models.Server{}), apiRemoveServerFromGroup) m.Get("/api/server_group/(?P<id>[0-9]+)", apiGetServerGroup) m.Post("/api/server_group/(?P<id>[0-9]+)/promote", binding.Json(models.Server{}), apiPromoteServer) m.Get("/api/migrate/status", apiMigrateStatus) m.Get("/api/migrate/tasks", apiGetMigrateTasks) m.Post("/api/migrate", binding.Json(migrateTaskForm{}), apiDoMigrate) m.Post("/api/rebalance", apiRebalance) m.Get("/api/slot/list", apiGetSlots) m.Get("/api/slot/:id", apiGetSingleSlot) m.Post("/api/slots/init", apiInitSlots) m.Get("/api/slots", apiGetSlots) m.Post("/api/slot", binding.Json(RangeSetTask{}), apiSlotRangeSet) m.Get("/api/proxy/list", apiGetProxyList) m.Get("/api/proxy/debug/vars", apiGetProxyDebugVars) m.Post("/api/proxy", binding.Json(models.ProxyInfo{}), apiSetProxyStatus) m.Get("/api/action/gc", apiActionGC) m.Get("/api/force_remove_locks", apiForceRemoveLocks) m.Get("/api/remove_fence", apiRemoveFence) m.Get("/slots", pageSlots) m.Get("/", func(r render.Render) { r.Redirect("/admin") }) zkBuilder := utils.NewConnBuilder(globalEnv.NewZkConn) safeZkConn = zkBuilder.GetSafeConn() unsafeZkConn = zkBuilder.GetUnsafeConn() // create temp node in ZK if err := createDashboardNode(); err != nil { log.PanicErrorf(err, "create zk node failed") // do not release dashborad node here } // create long live migrate manager globalMigrateManager = NewMigrateManager(safeZkConn, globalEnv.ProductName()) go func() { tick := time.Tick(time.Second) var lastCnt, qps int64 for _ = range tick { cnt := getAllProxyOps() if cnt > 0 { qps = cnt - lastCnt lastCnt = cnt } else { qps = 0 } atomic.StoreInt64(&proxiesSpeed, qps) } }() m.RunOnAddr(addr) }
func (s *Server) rewatchProxy() { _, err := s.topo.WatchNode(path.Join(models.GetProxyPath(s.topo.ProductName), s.info.Id), s.evtbus) if err != nil { log.PanicErrorf(err, "watch node failed") } }
func main() { c := make(chan os.Signal, 1) signal.Notify(c, os.Interrupt) signal.Notify(c, syscall.SIGTERM) go func() { <-c if createdDashboardNode { releaseDashboardNode() } log.Panicf("ctrl-c or SIGTERM found, exit") }() args, err := docopt.Parse(usage, nil, true, "codis config v0.1", true) if err != nil { fmt.Println(err) os.Exit(1) } // set output log file if s, ok := args["-L"].(string); ok && s != "" { f, err := os.OpenFile(s, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666) if err != nil { log.PanicErrorf(err, "open log file failed: %s", s) } else { defer f.Close() log.StdLog = log.New(f, "") } } log.SetLevel(log.LEVEL_INFO) log.SetFlags(log.Flags() | log.Lshortfile) // set log level if s, ok := args["--log-level"].(string); ok && s != "" { setLogLevel(s) } // set config file var configFile string if args["-c"] != nil { configFile = args["-c"].(string) } else { configFile = "config.ini" } config := cfg.NewCfg(configFile) if err := config.Load(); err != nil { log.PanicErrorf(err, "load config file error") } // load global vars globalEnv = LoadCodisEnv(config) cmd := args["<command>"].(string) cmdArgs := args["<args>"].([]string) go http.ListenAndServe(":10086", nil) err = runCommand(cmd, cmdArgs) if err != nil { log.PanicErrorf(err, "run sub-command failed") } }