func (s *Server) waitOnline() bool { for { info, err := s.topo.GetProxyInfo(s.info.Id) if err != nil { log.PanicErrorf(err, "get proxy info failed: %s", s.info.Id) } switch info.State { case models.PROXY_STATE_MARK_OFFLINE: log.Infof("mark offline, proxy got offline event: %s", s.info.Id) s.markOffline() return false case models.PROXY_STATE_ONLINE: s.info.State = info.State log.Infof("we are online: %s", s.info.Id) s.rewatchProxy() return true } select { case <-s.kill: log.Infof("mark offline, proxy is killed: %s", s.info.Id) s.markOffline() return false default: } log.Infof("wait to be online: %s", s.info.Id) time.Sleep(3 * time.Second) } }
func (s *Server) loopEvents() { ticker := time.NewTicker(time.Second) defer ticker.Stop() var tick int = 0 for s.info.State == models.PROXY_STATE_ONLINE { select { case <-s.kill: log.Infof("mark offline, proxy is killed: %s", s.info.Id) s.markOffline() case e := <-s.evtbus: evtPath := getEventPath(e) log.Infof("got event %s, %v, lastActionSeq %d", s.info.Id, e, s.lastActionSeq) if strings.Index(evtPath, models.GetActionResponsePath(s.conf.productName)) == 0 { seq, err := strconv.Atoi(path.Base(evtPath)) if err != nil { log.ErrorErrorf(err, "parse action seq failed") } else { if seq < s.lastActionSeq { log.Infof("ignore seq = %d", seq) continue } } } s.processAction(e) case <-ticker.C: if maxTick := s.conf.pingPeriod; maxTick != 0 { if tick++; tick >= maxTick { s.router.KeepAlive() tick = 0 } } } } }
func (s *Session) Serve(d Dispatcher, maxPipeline int) { var errlist errors.ErrorList defer func() { if err := errlist.First(); err != nil { log.Infof("session [%p] closed: %s, error = %s", s, s, err) } else { log.Infof("session [%p] closed: %s, quit", s, s) } }() tasks := make(chan *Request, maxPipeline) go func() { defer func() { s.Close() for _ = range tasks { } }() if err := s.loopWriter(tasks); err != nil { errlist.PushBack(err) } }() defer close(tasks) if err := s.loopReader(tasks, d); err != nil { errlist.PushBack(err) } }
func (s *Server) handleTopoEvent() { ticker := time.NewTicker(time.Second * 5) defer ticker.Stop() for { select { case e := <-s.evtbus: switch e.(type) { case *killEvent: s.handleMarkOffline() default: evtPath := getEventPath(e) log.Infof("got event %s, %v, lastActionSeq %d", s.info.Id, e, s.lastActionSeq) if strings.Index(evtPath, models.GetActionResponsePath(s.conf.productName)) == 0 { seq, err := strconv.Atoi(path.Base(evtPath)) if err != nil { log.WarnErrorf(err, "parse action seq failed") } else { if seq < s.lastActionSeq { log.Infof("ignore seq = %d", seq) continue } } } log.Infof("got event %s, %v, lastActionSeq %d", s.info.Id, e, s.lastActionSeq) s.processAction(e) } case <-ticker.C: for _, bc := range s.pool { bc.KeepAlive() } } } }
// experimental simple auto rebalance :) func Rebalance() error { targetQuota, err := getQuotaMap(safeZkConn) if err != nil { return errors.Trace(err) } livingNodes, err := getLivingNodeInfos(safeZkConn) if err != nil { return errors.Trace(err) } log.Infof("start rebalance") for _, node := range livingNodes { for len(node.CurSlots) > targetQuota[node.GroupId] { for _, dest := range livingNodes { if dest.GroupId != node.GroupId && len(dest.CurSlots) < targetQuota[dest.GroupId] { slot := node.CurSlots[len(node.CurSlots)-1] // create a migration task info := &MigrateTaskInfo{ Delay: 0, SlotId: slot, NewGroupId: dest.GroupId, Status: MIGRATE_TASK_PENDING, CreateAt: strconv.FormatInt(time.Now().Unix(), 10), } globalMigrateManager.PostTask(info) node.CurSlots = node.CurSlots[0 : len(node.CurSlots)-1] dest.CurSlots = append(dest.CurSlots, slot) } } } } log.Infof("rebalance tasks submit finish") return nil }
func New(addr string, debugVarAddr string, conf *Config) *Server { log.Infof("create proxy with config: %+v", conf) proxyHost := strings.Split(addr, ":")[0] debugHost := strings.Split(debugVarAddr, ":")[0] hostname, err := os.Hostname() if err != nil { log.PanicErrorf(err, "get host name failed") } if proxyHost == "0.0.0.0" || strings.HasPrefix(proxyHost, "127.0.0.") || proxyHost == "" { proxyHost = hostname } if debugHost == "0.0.0.0" || strings.HasPrefix(debugHost, "127.0.0.") || debugHost == "" { debugHost = hostname } s := &Server{conf: conf, lastActionSeq: -1, groups: make(map[int]int)} s.topo = NewTopo(conf.productName, conf.zkAddr, conf.fact, conf.provider, conf.zkSessionTimeout) s.info.Id = conf.proxyId s.info.State = models.PROXY_STATE_OFFLINE s.info.Addr = proxyHost + ":" + strings.Split(addr, ":")[1] s.info.DebugVarAddr = debugHost + ":" + strings.Split(debugVarAddr, ":")[1] s.info.Pid = os.Getpid() s.info.StartAt = time.Now().String() s.kill = make(chan interface{}) log.Infof("proxy info = %+v", s.info) if l, err := net.Listen(conf.proto, addr); err != nil { log.PanicErrorf(err, "open listener failed") } else { s.listener = l } s.router = router.NewWithAuth(conf.passwd) s.evtbus = make(chan interface{}, 1024) s.sessionPool = &sync.Pool{ New: func() interface{} { return nil }, } s.register() s.wait.Add(1) go func() { defer s.wait.Done() s.serve() }() return s }
func (t *MigrateTask) run() error { log.Infof("migration start: %+v", t.MigrateTaskInfo) to := t.NewGroupId t.UpdateStatus(MIGRATE_TASK_MIGRATING) err := t.migrateSingleSlot(t.SlotId, to) if err != nil { log.ErrorErrorf(err, "migrate single slot failed") t.UpdateStatus(MIGRATE_TASK_ERR) return err } t.UpdateFinish() log.Infof("migration finished: %+v", t.MigrateTaskInfo) return nil }
func (s *Server) waitOnline() { for { info, err := s.topo.GetProxyInfo(s.info.Id) if err != nil { log.PanicErrorf(err, "get proxy info failed") } switch info.State { case models.PROXY_STATE_MARK_OFFLINE: s.handleMarkOffline() case models.PROXY_STATE_ONLINE: s.info.State = info.State log.Infof("we are online: %s", s.info.Id) _, err := s.topo.WatchNode(path.Join(models.GetProxyPath(s.topo.ProductName), s.info.Id), s.evtbus) if err != nil { log.PanicErrorf(err, "watch node failed") } return } select { case e := <-s.evtbus: switch e.(type) { case *killEvent: s.handleMarkOffline() } default: //otherwise ignore it } log.Warnf("wait to be online: %s", s.info.Id) time.Sleep(3 * time.Second) } }
func (s *Server) processAction(e interface{}) { if strings.Index(getEventPath(e), models.GetProxyPath(s.topo.ProductName)) == 0 { info, err := s.topo.GetProxyInfo(s.info.Id) if err != nil { log.PanicErrorf(err, "get proxy info failed: %s", s.info.Id) } switch info.State { case models.PROXY_STATE_MARK_OFFLINE: log.Infof("mark offline, proxy got offline event: %s", s.info.Id) s.markOffline() case models.PROXY_STATE_ONLINE: s.rewatchProxy() default: log.Panicf("unknown proxy state %v", info) } return } //re-watch nodes := s.rewatchNodes() seqs, err := models.ExtraSeqList(nodes) if err != nil { log.PanicErrorf(err, "get seq list failed") } if len(seqs) == 0 || !s.topo.IsChildrenChangedEvent(e) { return } //get last pos index := -1 for i, seq := range seqs { if s.lastActionSeq < seq { index = i //break //only handle latest action } } if index < 0 { return } actions := seqs[index:] for _, seq := range actions { exist, err := s.topo.Exist(path.Join(s.topo.GetActionResponsePath(seq), s.info.Id)) if err != nil { log.PanicErrorf(err, "get action failed") } if exist { continue } if s.checkAndDoTopoChange(seq) { s.responseAction(int64(seq)) } } s.lastActionSeq = seqs[len(seqs)-1] }
func releaseDashboardNode() { zkPath := fmt.Sprintf("/zk/codis/db_%s/dashboard", globalEnv.ProductName()) if exists, _, _ := safeZkConn.Exists(zkPath); exists { log.Infof("removing dashboard node") safeZkConn.Delete(zkPath, 0) } }
func (s *Server) responseAction(seq int64) { log.Infof("send response seq = %d", seq) err := s.topo.DoResponse(int(seq), &s.info) if err != nil { log.InfoErrorf(err, "send response seq = %d failed", seq) } }
func addToSessions(s *Session) *Session { sessions.Lock() sessions.PushBack(s) sessions.Unlock() log.Infof("session [%p] created, sid = %d", s, s.Sid) return s }
func TestProxyOfflineInWaitActionReceiver(t *testing.T) { log.Infof("test proxy offline when waiting action response") fakeZkConn := zkhelper.NewConn() for i := 1; i <= 4; i++ { CreateProxyInfo(fakeZkConn, productName, &ProxyInfo{ Id: strconv.Itoa(i), State: PROXY_STATE_ONLINE, }) go waitForProxyMarkOffline(fakeZkConn, strconv.Itoa(i)) } lst, _ := ProxyList(fakeZkConn, productName, nil) assert.Must(len(lst) == 4) go func() { time.Sleep(500 * time.Millisecond) actionPath := path.Join(GetActionResponsePath(productName), fakeZkConn.Seq2Str(1)) //create test response for proxy 4, means proxy 1,2,3 are timeout fakeZkConn.Create(path.Join(actionPath, "4"), nil, 0, zkhelper.DefaultFileACLs()) }() err := NewActionWithTimeout(fakeZkConn, productName, ACTION_TYPE_SLOT_CHANGED, nil, "desc", true, 3*1000) if err != nil { assert.Must(err.Error() == ErrReceiverTimeout.Error()) } for i := 1; i <= 3; i++ { info, _ := GetProxyInfo(fakeZkConn, productName, strconv.Itoa(i)) assert.Must(info.State == PROXY_STATE_OFFLINE) } }
func (bc *BackendConn) Run() { log.Infof("backend conn [%p] to %s, start service", bc, bc.addr) for k := 0; ; k++ { err := bc.loopWriter() if err == nil { break } else { for i := len(bc.input); i != 0; i-- { r := <-bc.input bc.setResponse(r, nil, err) } } log.WarnErrorf(err, "backend conn [%p] to %s, restart [%d]", bc, bc.addr, k) time.Sleep(time.Millisecond * 50) } log.Infof("backend conn [%p] to %s, stop and exit", bc, bc.addr) }
func (s *Server) getActionObject(seq int, target interface{}) { act := &models.Action{Target: target} err := s.topo.GetActionWithSeqObject(int64(seq), act) if err != nil { log.PanicErrorf(err, "get action object failed, seq = %d", seq) } log.Infof("action %+v", act) }
func (s *Server) onGroupChange(groupId int) { log.Infof("group changed %d", groupId) for i, g := range s.groups { if g == groupId { s.fillSlot(i) } } }
func NewSessionSize(c net.Conn, auth string, bufsize int, timeout int) *Session { s := &Session{CreateUnix: time.Now().Unix(), auth: auth} s.Conn = redis.NewConnSize(c, bufsize) s.Conn.ReaderTimeout = time.Second * time.Duration(timeout) s.Conn.WriterTimeout = time.Second * 30 log.Infof("session [%p] create: %s", s, s) return s }
func NewSession(c net.Conn) *Session { s := &Session{Sid: sessions.sid.Incr(), CreateUnix: time.Now().Unix()} s.Conn = redis.NewConn(c) s.Conn.ReaderTimeout = time.Minute * 30 s.Conn.WriterTimeout = time.Second * 30 log.Infof("session [%p] create: %s", s, s) return addToSessions(s) }
func (s *Server) SetMyselfOnline() error { log.Info("mark myself online") info := models.ProxyInfo{ Id: s.conf.proxyId, State: models.PROXY_STATE_ONLINE, } b, _ := json.Marshal(info) url := "http://" + s.conf.dashboardAddr + "/api/proxy" log.Infof("url = %s", url) res, err := http.Post(url, "application/json", bytes.NewReader(b)) if err != nil { return err } log.Infof("StatusCode = %d", res.StatusCode) if res.StatusCode != 200 { return errors.New("response code is not 200") } return nil }
func (t *MigrateTask) rollbackPremigrate() { if s, err := models.GetSlot(t.zkConn, t.productName, t.SlotId); err == nil && (s.State.Status == models.SLOT_STATUS_PRE_MIGRATE || s.State.Status == models.SLOT_STATUS_MIGRATE) { s.State.Status = models.SLOT_STATUS_ONLINE err = s.Update(t.zkConn) if err != nil { log.Warn("rollback premigrate failed", err) } else { log.Infof("rollback slot %d from premigrate to online\n", s.Id) } } }
func (s *Server) onGroupChange(groupId int) error { log.Infof("group changed %d", groupId) for i, g := range s.groups { if g == groupId { if err := s.fillSlot(i); err != nil { //s.reRegisterAndFillSlots(models.PROXY_STATE_ONLINE) return err } } } return nil }
func (s *Router) fillSlot(i int, addr, from string, lock bool) { if !s.isValidSlot(i) { return } slot := s.slots[i] slot.blockAndWait() s.putBackendConn(slot.backend.bc) s.putBackendConn(slot.migrate.bc) slot.reset() if len(addr) != 0 { xx := strings.Split(addr, ":") if len(xx) >= 1 { slot.backend.host = []byte(xx[0]) } if len(xx) >= 2 { slot.backend.port = []byte(xx[1]) } slot.backend.addr = addr slot.backend.bc = s.getBackendConn(addr) } if len(from) != 0 { slot.migrate.from = from slot.migrate.bc = s.getBackendConn(from) } if !lock { slot.unblock() } if slot.migrate.bc != nil { log.Infof("fill slot %04d, backend.addr = %s, migrate.from = %s", i, slot.backend.addr, slot.migrate.from) } else { log.Infof("fill slot %04d, backend.addr = %s", i, slot.backend.addr) } }
func (s *Server) onSlotRangeChange(param *models.SlotMultiSetParam) { log.Infof("slotRangeChange %+v", param) for i := param.From; i <= param.To; i++ { switch param.Status { case models.SLOT_STATUS_OFFLINE: s.resetSlot(i) case models.SLOT_STATUS_ONLINE: s.fillSlot(i) default: log.Panicf("can not handle status %v", param.Status) } } }
func TestRefreshZkConn(t *testing.T) { initReal() time.Sleep(50 * time.Second) if s != nil { //s.top.RefreshZkConn() pi, err := s.topo.GetProxyInfo(s.info.Id) if err != nil { log.Error(err) } else { log.Infof("get proxy: %+v", pi) } log.Info("begin get") /* time.Sleep(15 * time.Second) content, err := s.top.WatchChildren(models.GetWatchActionPath(s.top.ProductName), s.evtbus) if err != nil { log.Error(fmt.Sprintf(errors.ErrorStack(err)+" zk error %+v", s.top.IsFatalErr(err))) } else { log.Infof("watch action: %+v", content) } e := <-s.evtbus log.Infof("event: %+v", e) */ for { pi, err = s.topo.GetProxyInfo(s.info.Id) if err != nil { log.Error(err) } else { log.Infof("get proxy: %+v", pi) } time.Sleep(5 * time.Second) } } }
func (s *Slot) prepare(r *Request, key []byte) (*SharedBackendConn, error) { if s.backend.bc == nil { log.Infof("slot-%04d is not ready: key = %s", s.Id, key) return nil, ErrSlotIsNotReady } if err := s.slotsmgrt(r, key); err != nil { log.Warnf("slot-%04d migrate from = %s to %s failed: key = %s, error = %s", s.Id, s.migrate.from, s.backend.addr, key, err) return nil, err } else { r.slot = &s.wait r.slot.Add(1) return s.backend.bc, nil } }
func (s *Server) onSlotRangeChange(param *models.SlotMultiSetParam) error { log.Infof("slotRangeChange %+v", param) for i := param.From; i <= param.To; i++ { switch param.Status { case models.SLOT_STATUS_OFFLINE: s.resetSlot(i) case models.SLOT_STATUS_ONLINE: if err := s.fillSlot(i); err != nil { //s.reRegisterAndFillSlots(models.PROXY_STATE_ONLINE) return err } default: log.Errorf("can not handle status %v", param.Status) } } return nil }
func cleanupSessions(lastunix int64) { sessions.Lock() for i := sessions.Len(); i != 0; i-- { e := sessions.Front() s := e.Value.(*Session) if s.IsClosed() { sessions.Remove(e) } else if s.IsTimeout(lastunix) { log.Infof("session [%p] killed, due to timeout, sid = %d, ops = %d", s, s.Sid, s.Ops.Get()) s.Close() sessions.Remove(e) } else { sessions.MoveToBack(e) } } sessions.Unlock() }
func setLogLevel(level string) { var lv = log.LEVEL_INFO switch strings.ToLower(level) { case "error": lv = log.LEVEL_ERROR case "warn", "warning": lv = log.LEVEL_WARN case "debug": lv = log.LEVEL_DEBUG case "info": fallthrough default: lv = log.LEVEL_INFO } log.SetLevel(lv) log.Infof("set log level to %s", lv) }
func setLogLevel(level string) { level = strings.ToLower(level) var l = log.LEVEL_INFO switch level { case "error": l = log.LEVEL_ERROR case "warn", "warning": l = log.LEVEL_WARN case "debug": l = log.LEVEL_DEBUG case "info": fallthrough default: level = "info" l = log.LEVEL_INFO } log.SetLevel(l) log.Infof("set log level to <%s>", level) }
func createDashboardNode() error { // make sure root dir is exists rootDir := fmt.Sprintf("/zk/codis/db_%s", globalEnv.ProductName()) zkhelper.CreateRecursive(safeZkConn, rootDir, "", 0, zkhelper.DefaultDirACLs()) zkPath := fmt.Sprintf("%s/dashboard", rootDir) // make sure we're the only one dashboard if exists, _, _ := safeZkConn.Exists(zkPath); exists { data, _, _ := safeZkConn.Get(zkPath) return errors.New("dashboard already exists: " + string(data)) } content := fmt.Sprintf(`{"addr": "%v", "pid": %v}`, globalEnv.DashboardAddr(), os.Getpid()) pathCreated, err := safeZkConn.Create(zkPath, []byte(content), 0, zkhelper.DefaultFileACLs()) createdDashboardNode = true log.Infof("dashboard node created: %v, %s", pathCreated, string(content)) return errors.Trace(err) }