/* * return err should be reconnect to zk * */ func (s *Server) fillSlot(i int) error { slotInfo, slotGroup, err := s.topo.GetSlotByIndex(i) if err != nil { log.ErrorErrorf(err, "get slot by index failed", i) return err } var from string var addr = groupMaster(*slotGroup) if slotInfo.State.Status == models.SLOT_STATUS_MIGRATE { fromGroup, err := s.topo.GetGroup(slotInfo.State.MigrateStatus.From) if err != nil { log.ErrorErrorf(err, "get migrate from failed") return err } from = groupMaster(*fromGroup) if from == addr { log.Errorf("set slot %04d migrate from %s to %s", i, from, addr) return nil } if "" == addr { log.Errorf("set slot %04d addr nil", i) return nil } } s.groups[i] = slotInfo.GroupId s.router.FillSlot(i, addr, from, slotInfo.State.Status == models.SLOT_STATUS_PRE_MIGRATE) return err }
func WaitForReceiverWithTimeout(zkConn zkhelper.Conn, productName string, actionZkPath string, proxies []ProxyInfo, timeoutInMs int) error { if len(proxies) == 0 { return nil } times := 0 proxyIds := make(map[string]struct{}) var offlineProxyIds []string for _, p := range proxies { proxyIds[p.Id] = struct{}{} } checkTimes := timeoutInMs / 500 // check every 500ms for times < checkTimes { if times >= 6 && (times*500)%1000 == 0 { log.Warnf("abnormal waiting time for receivers: %s %v", actionZkPath, offlineProxyIds) } // get confirm ids nodes, _, err := zkConn.Children(actionZkPath) if err != nil { return errors.Trace(err) } confirmIds := make(map[string]struct{}) for _, node := range nodes { id := path.Base(node) confirmIds[id] = struct{}{} } if len(confirmIds) != 0 { match := true // check if all proxy have responsed var notMatchList []string for id, _ := range proxyIds { // if proxy id not in confirm ids, means someone didn't response if _, ok := confirmIds[id]; !ok { match = false notMatchList = append(notMatchList, id) } } if match { return nil } offlineProxyIds = notMatchList } times += 1 time.Sleep(500 * time.Millisecond) } if len(offlineProxyIds) > 0 { log.Errorf("proxies didn't responed: %v", offlineProxyIds) } // set offline proxies for _, id := range offlineProxyIds { log.Errorf("mark proxy %s to PROXY_STATE_MARK_OFFLINE", id) if err := SetProxyStatus(zkConn, productName, id, PROXY_STATE_MARK_OFFLINE); err != nil { return errors.Trace(err) } } return errors.Trace(ErrReceiverTimeout) }
func groupMaster(groupInfo models.ServerGroup) string { var master string for _, server := range groupInfo.Servers { if server.Type == models.SERVER_TYPE_MASTER { if master != "" { log.Errorf("two master not allowed: %+v", groupInfo) } master = server.Addr } } if master == "" { log.Errorf("master not found: %+v", groupInfo) } return master }
func (s *Server) OnSlotRangeChange(param *models.SlotMultiSetParam) { log.Warnf("slotRangeChange %+v", param) if !s.isValidSlot(param.From) || !s.isValidSlot(param.To) { log.Errorf("invalid slot number, %+v", param) return } for i := param.From; i <= param.To; i++ { switch param.Status { case models.SLOT_STATUS_OFFLINE: s.clearSlot(i) case models.SLOT_STATUS_ONLINE: s.fillSlot(i, true) default: log.Errorf("can not handle status %v", param.Status) } } }
func initReal() { conn = zkhelper.NewConn() conf = &Config{ proxyId: "proxy_test", productName: "test", zkAddr: "192.168.28.191:2181", fact: nil, proto: "tcp4", provider: "zookeeper", zkSessionTimeout: 30, zkReadTimeout: 30, } c := make(chan os.Signal, 1) signal.Notify(c, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT, os.Kill) go func() { <-c log.Info("ctrl-c or SIGTERM found, bye bye...") s.Close() }() go func() { log.Info(http.ListenAndServe("192.168.28.192:6060", nil)) }() go func() { time.Sleep(10 * time.Second) zkConn, err := zkhelper.ConnectToZk(conf.zkAddr, 20000) if err != nil { log.Errorf("connect to zk: %+v", errors.Trace(err)) } else { err = models.SetProxyStatus(zkConn, conf.productName, conf.proxyId, models.PROXY_STATE_ONLINE) if err != nil { log.Errorf("set proxy error: %+v", errors.Trace(err)) } } zkConn.Close() }() go func() { err := http.ListenAndServe("192.168.28.192:11001", nil) log.PanicError(err, "http debug server quit") }() s = New("192.168.28.192:19001", "192.168.28.192:11001", conf) }
func (top *Topology) Close(proxyName string) { // delete fence znode pi, err := models.GetProxyInfo(top.zkConn, top.ProductName, proxyName) if err != nil { log.Errorf("killing fence error, proxy %s is not exists", proxyName) } else { zkhelper.DeleteRecursive(top.zkConn, path.Join(models.GetProxyFencePath(top.ProductName), pi.Addr), -1) } // delete ephemeral znode zkhelper.DeleteRecursive(top.zkConn, path.Join(models.GetProxyPath(top.ProductName), proxyName), -1) top.zkConn.Close() }
func (b *connBuilder) resetConnection() { b.lock.Lock() defer b.lock.Unlock() if b.builder == nil { log.Errorf("no connection builder") return } if time.Now().Before(b.createdOn.Add(time.Second)) { return } if b.connection != nil { b.connection.Close() } var err error b.connection, err = b.builder() // this is asnyc if err == nil { b.safeConnInstance.Conn = b.connection b.unsafeConnInstance.Conn = b.connection b.createdOn = time.Now() return } log.Errorf("can not build new zk session, exit") }
func (s *Server) onSlotRangeChange(param *models.SlotMultiSetParam) error { log.Infof("slotRangeChange %+v", param) for i := param.From; i <= param.To; i++ { switch param.Status { case models.SLOT_STATUS_OFFLINE: s.resetSlot(i) case models.SLOT_STATUS_ONLINE: if err := s.fillSlot(i); err != nil { //s.reRegisterAndFillSlots(models.PROXY_STATE_ONLINE) return err } default: log.Errorf("can not handle status %v", param.Status) } } return nil }
func (s *Server) checkAndDoTopoChange(seq int) bool { act, err := s.topo.GetActionWithSeq(int64(seq)) if err != nil { return false } if !needResponse(act.Receivers, s.info) { //no need to response return false } log.Warnf("action %v receivers %v", seq, act.Receivers) switch act.Type { case models.ACTION_TYPE_SLOT_MIGRATE, models.ACTION_TYPE_SLOT_CHANGED, models.ACTION_TYPE_SLOT_PREMIGRATE: slot := &models.Slot{} if err := s.getActionObject(seq, slot); err != nil { return false } if err := s.fillSlot(slot.Id); err != nil { //s.reRegisterAndFillSlots(models.PROXY_STATE_ONLINE) return false } case models.ACTION_TYPE_SERVER_GROUP_CHANGED: serverGroup := &models.ServerGroup{} if err := s.getActionObject(seq, serverGroup); err != nil { return false } if err := s.onGroupChange(serverGroup.Id); err != nil { return false } case models.ACTION_TYPE_SERVER_GROUP_REMOVE: //do not care case models.ACTION_TYPE_MULTI_SLOT_CHANGED: param := &models.SlotMultiSetParam{} if err := s.getActionObject(seq, param); err != nil { return false } if err := s.onSlotRangeChange(param); err != nil { return false } default: log.Errorf("unknown action %+v", act) } return true }
func (top *Topology) GetSlotByIndex(i int) (*models.Slot, *models.ServerGroup, error) { slot, err := models.GetSlot(top.zkConn, top.ProductName, i) if err != nil { return nil, nil, errors.Trace(err) } log.Debugf("get slot %d : %+v", i, slot) if slot.State.Status != models.SLOT_STATUS_ONLINE && slot.State.Status != models.SLOT_STATUS_MIGRATE { log.Errorf("slot not online, %+v", slot) } groupServer, err := models.GetGroup(top.zkConn, top.ProductName, slot.GroupId) if err != nil { return nil, nil, errors.Trace(err) } return slot, groupServer, nil }
func WaitForReceiverWithTimeout(zkConn zkhelper.Conn, productName string, actionZkPath string, proxies []ProxyInfo, timeoutInMs int) error { if len(proxies) == 0 { return nil } times := 0 proxyIds := make(map[string]bool) for _, p := range proxies { proxyIds[p.Id] = true } // check every 500ms for times < timeoutInMs/500 { if times >= 6 && (times*500)%1000 == 0 { log.Warnf("abnormal waiting time for receivers: %s %v", actionZkPath, proxyIds) } // get confirm ids nodes, _, err := zkConn.Children(actionZkPath) if err != nil { return errors.Trace(err) } for _, node := range nodes { id := path.Base(node) delete(proxyIds, id) } if len(proxyIds) == 0 { return nil } times++ time.Sleep(500 * time.Millisecond) } log.Warn("proxies didn't responed: ", proxyIds) // set offline proxies for id, _ := range proxyIds { log.Errorf("mark proxy %s to PROXY_STATE_MARK_OFFLINE", id) if err := SetProxyStatus(zkConn, productName, id, PROXY_STATE_MARK_OFFLINE); err != nil { return errors.Trace(err) } } return ErrReceiverTimeout }
func callApi(method HttpMethod, apiPath string, params interface{}, retVal interface{}) error { if apiPath[0] != '/' { return errors.Errorf("api path must starts with /") } url := "http://" + globalEnv.DashboardAddr() + apiPath client := &http.Client{Transport: http.DefaultTransport} b, err := json.Marshal(params) if err != nil { return errors.Trace(err) } req, err := http.NewRequest(string(method), url, strings.NewReader(string(b))) if err != nil { return errors.Trace(err) } resp, err := client.Do(req) if err != nil { log.Errorf("can't connect to dashboard, please check 'dashboard_addr' is corrent in config file") return errors.Trace(err) } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { return errors.Trace(err) } if resp.StatusCode == 200 { err := json.Unmarshal(body, retVal) if err != nil { return errors.Trace(err) } return nil } return errors.Errorf("http status code %d, %s", resp.StatusCode, string(body)) }
func (t *MigrateTask) migrateSingleSlot(slotId int, to int) error { // set slot status s, err := models.GetSlot(t.zkConn, t.productName, slotId) if err != nil { log.ErrorErrorf(err, "get slot info failed") return err } if s.State.Status == models.SLOT_STATUS_OFFLINE { log.Warnf("status is offline: %+v", s) return nil } from := s.GroupId if s.State.Status == models.SLOT_STATUS_MIGRATE { from = s.State.MigrateStatus.From } // make sure from group & target group exists exists, err := models.GroupExists(t.zkConn, t.productName, from) if err != nil { return errors.Trace(err) } if !exists { log.Errorf("src group %d not exist when migrate from %d to %d", from, from, to) return errors.Errorf("group %d not found", from) } exists, err = models.GroupExists(t.zkConn, t.productName, to) if err != nil { return errors.Trace(err) } if !exists { return errors.Errorf("group %d not found", to) } // cannot migrate to itself, just ignore if from == to { log.Warnf("from == to, ignore: %+v", s) return nil } // modify slot status if err := s.SetMigrateStatus(t.zkConn, from, to); err != nil { log.ErrorErrorf(err, "set migrate status failed") return err } err = t.Migrate(s, from, to, func(p SlotMigrateProgress) { // on migrate slot progress if p.Remain%5000 == 0 { log.Infof("%+v", p) } }) if err != nil { log.ErrorErrorf(err, "migrate slot failed") return err } // migrate done, change slot status back s.State.Status = models.SLOT_STATUS_ONLINE s.State.MigrateStatus.From = models.INVALID_ID s.State.MigrateStatus.To = models.INVALID_ID if err := s.Update(t.zkConn); err != nil { log.ErrorErrorf(err, "update zk status failed, should be: %+v", s) return err } return nil }
func (s *Server) processAction(e interface{}) error { if s.topo.IsSessionExpiredEvent(e) { return topo.ErrSessionExpired } if strings.Index(getEventPath(e), models.GetProxyPath(s.topo.ProductName)) == 0 { info, err := s.topo.GetProxyInfo(s.info.Id) if err != nil { log.ErrorErrorf(err, "get proxy info failed: %s", s.info.Id) return err } switch info.State { case models.PROXY_STATE_MARK_OFFLINE: log.Warnf("mark offline, proxy got offline event: %s", s.info.Id) s.markOffline() case models.PROXY_STATE_ONLINE: s.rewatchProxy(false) default: log.Errorf("unknown proxy state %+v", info) } return nil } //re-watch nodes := s.rewatchNodes() seqs, err := models.ExtraSeqList(nodes) if err != nil { log.ErrorErrorf(err, "get seq list failed") //s.reRegisterAndFillSlots(models.PROXY_STATE_ONLINE) return err } if len(seqs) == 0 || !s.topo.IsChildrenChangedEvent(e) { return nil } //get last pos index := -1 for i, seq := range seqs { if s.lastActionSeq < seq { index = i break } } if index < 0 { return nil } actions := seqs[index:] for _, seq := range actions { exist, err := s.topo.Exist(path.Join(s.topo.GetActionResponsePath(seq), s.info.Id)) if err != nil { log.ErrorErrorf(err, "get action failed") //s.reRegisterAndFillSlots(models.PROXY_STATE_ONLINE) return err } if exist { continue } if s.checkAndDoTopoChange(seq) { s.responseAction(int64(seq)) } } s.lastActionSeq = seqs[len(seqs)-1] return nil }