Example #1
0
/*
* return err should be reconnect to zk
*
 */
func (s *Server) fillSlot(i int) error {
	slotInfo, slotGroup, err := s.topo.GetSlotByIndex(i)
	if err != nil {
		log.ErrorErrorf(err, "get slot by index failed", i)
		return err
	}

	var from string
	var addr = groupMaster(*slotGroup)
	if slotInfo.State.Status == models.SLOT_STATUS_MIGRATE {
		fromGroup, err := s.topo.GetGroup(slotInfo.State.MigrateStatus.From)
		if err != nil {
			log.ErrorErrorf(err, "get migrate from failed")
			return err
		}

		from = groupMaster(*fromGroup)
		if from == addr {
			log.Errorf("set slot %04d migrate from %s to %s", i, from, addr)
			return nil
		}

		if "" == addr {
			log.Errorf("set slot %04d addr nil", i)
			return nil
		}
	}

	s.groups[i] = slotInfo.GroupId
	s.router.FillSlot(i, addr, from,
		slotInfo.State.Status == models.SLOT_STATUS_PRE_MIGRATE)
	return err
}
Example #2
0
func WaitForReceiverWithTimeout(zkConn zkhelper.Conn, productName string, actionZkPath string, proxies []ProxyInfo, timeoutInMs int) error {
	if len(proxies) == 0 {
		return nil
	}

	times := 0
	proxyIds := make(map[string]struct{})
	var offlineProxyIds []string
	for _, p := range proxies {
		proxyIds[p.Id] = struct{}{}
	}

	checkTimes := timeoutInMs / 500
	// check every 500ms
	for times < checkTimes {
		if times >= 6 && (times*500)%1000 == 0 {
			log.Warnf("abnormal waiting time for receivers: %s %v", actionZkPath, offlineProxyIds)
		}
		// get confirm ids
		nodes, _, err := zkConn.Children(actionZkPath)
		if err != nil {
			return errors.Trace(err)
		}
		confirmIds := make(map[string]struct{})
		for _, node := range nodes {
			id := path.Base(node)
			confirmIds[id] = struct{}{}
		}
		if len(confirmIds) != 0 {
			match := true
			// check if all proxy have responsed
			var notMatchList []string
			for id, _ := range proxyIds {
				// if proxy id not in confirm ids, means someone didn't response
				if _, ok := confirmIds[id]; !ok {
					match = false
					notMatchList = append(notMatchList, id)
				}
			}
			if match {
				return nil
			}
			offlineProxyIds = notMatchList
		}
		times += 1
		time.Sleep(500 * time.Millisecond)
	}
	if len(offlineProxyIds) > 0 {
		log.Errorf("proxies didn't responed: %v", offlineProxyIds)
	}

	// set offline proxies
	for _, id := range offlineProxyIds {
		log.Errorf("mark proxy %s to PROXY_STATE_MARK_OFFLINE", id)
		if err := SetProxyStatus(zkConn, productName, id, PROXY_STATE_MARK_OFFLINE); err != nil {
			return errors.Trace(err)
		}
	}
	return errors.Trace(ErrReceiverTimeout)
}
Example #3
0
func groupMaster(groupInfo models.ServerGroup) string {
	var master string
	for _, server := range groupInfo.Servers {
		if server.Type == models.SERVER_TYPE_MASTER {
			if master != "" {
				log.Errorf("two master not allowed: %+v", groupInfo)
			}
			master = server.Addr
		}
	}
	if master == "" {
		log.Errorf("master not found: %+v", groupInfo)
	}
	return master
}
Example #4
0
func (s *Server) OnSlotRangeChange(param *models.SlotMultiSetParam) {
	log.Warnf("slotRangeChange %+v", param)
	if !s.isValidSlot(param.From) || !s.isValidSlot(param.To) {
		log.Errorf("invalid slot number, %+v", param)
		return
	}
	for i := param.From; i <= param.To; i++ {
		switch param.Status {
		case models.SLOT_STATUS_OFFLINE:
			s.clearSlot(i)
		case models.SLOT_STATUS_ONLINE:
			s.fillSlot(i, true)
		default:
			log.Errorf("can not handle status %v", param.Status)
		}
	}
}
Example #5
0
func initReal() {
	conn = zkhelper.NewConn()
	conf = &Config{
		proxyId:          "proxy_test",
		productName:      "test",
		zkAddr:           "192.168.28.191:2181",
		fact:             nil,
		proto:            "tcp4",
		provider:         "zookeeper",
		zkSessionTimeout: 30,
		zkReadTimeout:    30,
	}
	c := make(chan os.Signal, 1)
	signal.Notify(c, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT, os.Kill)
	go func() {
		<-c
		log.Info("ctrl-c or SIGTERM found, bye bye...")
		s.Close()
	}()
	go func() {
		log.Info(http.ListenAndServe("192.168.28.192:6060", nil))
	}()

	go func() {
		time.Sleep(10 * time.Second)
		zkConn, err := zkhelper.ConnectToZk(conf.zkAddr, 20000)
		if err != nil {
			log.Errorf("connect to zk:  %+v", errors.Trace(err))
		} else {
			err = models.SetProxyStatus(zkConn, conf.productName, conf.proxyId, models.PROXY_STATE_ONLINE)
			if err != nil {
				log.Errorf("set proxy error: %+v", errors.Trace(err))
			}
		}
		zkConn.Close()
	}()

	go func() {
		err := http.ListenAndServe("192.168.28.192:11001", nil)
		log.PanicError(err, "http debug server quit")
	}()

	s = New("192.168.28.192:19001", "192.168.28.192:11001", conf)
}
Example #6
0
func (top *Topology) Close(proxyName string) {
	// delete fence znode
	pi, err := models.GetProxyInfo(top.zkConn, top.ProductName, proxyName)
	if err != nil {
		log.Errorf("killing fence error, proxy %s is not exists", proxyName)
	} else {
		zkhelper.DeleteRecursive(top.zkConn, path.Join(models.GetProxyFencePath(top.ProductName), pi.Addr), -1)
	}
	// delete ephemeral znode
	zkhelper.DeleteRecursive(top.zkConn, path.Join(models.GetProxyPath(top.ProductName), proxyName), -1)
	top.zkConn.Close()
}
Example #7
0
File: zk.go Project: cyflhn/codis
func (b *connBuilder) resetConnection() {
	b.lock.Lock()
	defer b.lock.Unlock()
	if b.builder == nil {
		log.Errorf("no connection builder")
		return
	}
	if time.Now().Before(b.createdOn.Add(time.Second)) {
		return
	}
	if b.connection != nil {
		b.connection.Close()
	}
	var err error
	b.connection, err = b.builder() // this is asnyc
	if err == nil {
		b.safeConnInstance.Conn = b.connection
		b.unsafeConnInstance.Conn = b.connection
		b.createdOn = time.Now()
		return
	}
	log.Errorf("can not build new zk session, exit")
}
Example #8
0
func (s *Server) onSlotRangeChange(param *models.SlotMultiSetParam) error {
	log.Infof("slotRangeChange %+v", param)
	for i := param.From; i <= param.To; i++ {
		switch param.Status {
		case models.SLOT_STATUS_OFFLINE:
			s.resetSlot(i)
		case models.SLOT_STATUS_ONLINE:
			if err := s.fillSlot(i); err != nil {
				//s.reRegisterAndFillSlots(models.PROXY_STATE_ONLINE)
				return err
			}
		default:
			log.Errorf("can not handle status %v", param.Status)
		}
	}
	return nil
}
Example #9
0
func (s *Server) checkAndDoTopoChange(seq int) bool {
	act, err := s.topo.GetActionWithSeq(int64(seq))
	if err != nil {
		return false
	}
	if !needResponse(act.Receivers, s.info) { //no need to response
		return false
	}

	log.Warnf("action %v receivers %v", seq, act.Receivers)

	switch act.Type {
	case models.ACTION_TYPE_SLOT_MIGRATE, models.ACTION_TYPE_SLOT_CHANGED,
		models.ACTION_TYPE_SLOT_PREMIGRATE:
		slot := &models.Slot{}
		if err := s.getActionObject(seq, slot); err != nil {
			return false
		}
		if err := s.fillSlot(slot.Id); err != nil {
			//s.reRegisterAndFillSlots(models.PROXY_STATE_ONLINE)
			return false
		}
	case models.ACTION_TYPE_SERVER_GROUP_CHANGED:
		serverGroup := &models.ServerGroup{}
		if err := s.getActionObject(seq, serverGroup); err != nil {
			return false
		}
		if err := s.onGroupChange(serverGroup.Id); err != nil {
			return false
		}
	case models.ACTION_TYPE_SERVER_GROUP_REMOVE:
	//do not care
	case models.ACTION_TYPE_MULTI_SLOT_CHANGED:
		param := &models.SlotMultiSetParam{}
		if err := s.getActionObject(seq, param); err != nil {
			return false
		}
		if err := s.onSlotRangeChange(param); err != nil {
			return false
		}
	default:
		log.Errorf("unknown action %+v", act)
	}
	return true
}
Example #10
0
func (top *Topology) GetSlotByIndex(i int) (*models.Slot, *models.ServerGroup, error) {
	slot, err := models.GetSlot(top.zkConn, top.ProductName, i)
	if err != nil {
		return nil, nil, errors.Trace(err)
	}

	log.Debugf("get slot %d : %+v", i, slot)
	if slot.State.Status != models.SLOT_STATUS_ONLINE && slot.State.Status != models.SLOT_STATUS_MIGRATE {
		log.Errorf("slot not online, %+v", slot)
	}

	groupServer, err := models.GetGroup(top.zkConn, top.ProductName, slot.GroupId)
	if err != nil {
		return nil, nil, errors.Trace(err)
	}

	return slot, groupServer, nil
}
Example #11
0
func WaitForReceiverWithTimeout(zkConn zkhelper.Conn, productName string, actionZkPath string, proxies []ProxyInfo, timeoutInMs int) error {
	if len(proxies) == 0 {
		return nil
	}

	times := 0
	proxyIds := make(map[string]bool)
	for _, p := range proxies {
		proxyIds[p.Id] = true
	}
	// check every 500ms
	for times < timeoutInMs/500 {
		if times >= 6 && (times*500)%1000 == 0 {
			log.Warnf("abnormal waiting time for receivers: %s %v", actionZkPath, proxyIds)
		}
		// get confirm ids
		nodes, _, err := zkConn.Children(actionZkPath)
		if err != nil {
			return errors.Trace(err)
		}
		for _, node := range nodes {
			id := path.Base(node)
			delete(proxyIds, id)
		}
		if len(proxyIds) == 0 {
			return nil
		}
		times++
		time.Sleep(500 * time.Millisecond)
	}
	log.Warn("proxies didn't responed: ", proxyIds)
	// set offline proxies
	for id, _ := range proxyIds {
		log.Errorf("mark proxy %s to PROXY_STATE_MARK_OFFLINE", id)
		if err := SetProxyStatus(zkConn, productName, id, PROXY_STATE_MARK_OFFLINE); err != nil {
			return errors.Trace(err)
		}
	}
	return ErrReceiverTimeout
}
Example #12
0
func callApi(method HttpMethod, apiPath string, params interface{}, retVal interface{}) error {
	if apiPath[0] != '/' {
		return errors.Errorf("api path must starts with /")
	}
	url := "http://" + globalEnv.DashboardAddr() + apiPath
	client := &http.Client{Transport: http.DefaultTransport}

	b, err := json.Marshal(params)
	if err != nil {
		return errors.Trace(err)
	}

	req, err := http.NewRequest(string(method), url, strings.NewReader(string(b)))
	if err != nil {
		return errors.Trace(err)
	}

	resp, err := client.Do(req)
	if err != nil {
		log.Errorf("can't connect to dashboard, please check 'dashboard_addr' is corrent in config file")
		return errors.Trace(err)
	}
	defer resp.Body.Close()

	body, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		return errors.Trace(err)
	}

	if resp.StatusCode == 200 {
		err := json.Unmarshal(body, retVal)
		if err != nil {
			return errors.Trace(err)
		}
		return nil
	}
	return errors.Errorf("http status code %d, %s", resp.StatusCode, string(body))
}
Example #13
0
func (t *MigrateTask) migrateSingleSlot(slotId int, to int) error {
	// set slot status
	s, err := models.GetSlot(t.zkConn, t.productName, slotId)
	if err != nil {
		log.ErrorErrorf(err, "get slot info failed")
		return err
	}
	if s.State.Status == models.SLOT_STATUS_OFFLINE {
		log.Warnf("status is offline: %+v", s)
		return nil
	}

	from := s.GroupId
	if s.State.Status == models.SLOT_STATUS_MIGRATE {
		from = s.State.MigrateStatus.From
	}

	// make sure from group & target group exists
	exists, err := models.GroupExists(t.zkConn, t.productName, from)
	if err != nil {
		return errors.Trace(err)
	}
	if !exists {
		log.Errorf("src group %d not exist when migrate from %d to %d", from, from, to)
		return errors.Errorf("group %d not found", from)
	}

	exists, err = models.GroupExists(t.zkConn, t.productName, to)
	if err != nil {
		return errors.Trace(err)
	}
	if !exists {
		return errors.Errorf("group %d not found", to)
	}

	// cannot migrate to itself, just ignore
	if from == to {
		log.Warnf("from == to, ignore: %+v", s)
		return nil
	}

	// modify slot status
	if err := s.SetMigrateStatus(t.zkConn, from, to); err != nil {
		log.ErrorErrorf(err, "set migrate status failed")
		return err
	}

	err = t.Migrate(s, from, to, func(p SlotMigrateProgress) {
		// on migrate slot progress
		if p.Remain%5000 == 0 {
			log.Infof("%+v", p)
		}
	})
	if err != nil {
		log.ErrorErrorf(err, "migrate slot failed")
		return err
	}

	// migrate done, change slot status back
	s.State.Status = models.SLOT_STATUS_ONLINE
	s.State.MigrateStatus.From = models.INVALID_ID
	s.State.MigrateStatus.To = models.INVALID_ID
	if err := s.Update(t.zkConn); err != nil {
		log.ErrorErrorf(err, "update zk status failed, should be: %+v", s)
		return err
	}
	return nil
}
Example #14
0
func (s *Server) processAction(e interface{}) error {
	if s.topo.IsSessionExpiredEvent(e) {
		return topo.ErrSessionExpired
	}
	if strings.Index(getEventPath(e), models.GetProxyPath(s.topo.ProductName)) == 0 {
		info, err := s.topo.GetProxyInfo(s.info.Id)
		if err != nil {
			log.ErrorErrorf(err, "get proxy info failed: %s", s.info.Id)
			return err
		}
		switch info.State {
		case models.PROXY_STATE_MARK_OFFLINE:
			log.Warnf("mark offline, proxy got offline event: %s", s.info.Id)
			s.markOffline()
		case models.PROXY_STATE_ONLINE:
			s.rewatchProxy(false)
		default:
			log.Errorf("unknown proxy state %+v", info)
		}
		return nil
	}

	//re-watch
	nodes := s.rewatchNodes()

	seqs, err := models.ExtraSeqList(nodes)
	if err != nil {
		log.ErrorErrorf(err, "get seq list failed")
		//s.reRegisterAndFillSlots(models.PROXY_STATE_ONLINE)
		return err
	}

	if len(seqs) == 0 || !s.topo.IsChildrenChangedEvent(e) {
		return nil
	}

	//get last pos
	index := -1
	for i, seq := range seqs {
		if s.lastActionSeq < seq {
			index = i
			break
		}
	}

	if index < 0 {
		return nil
	}

	actions := seqs[index:]
	for _, seq := range actions {
		exist, err := s.topo.Exist(path.Join(s.topo.GetActionResponsePath(seq), s.info.Id))
		if err != nil {
			log.ErrorErrorf(err, "get action failed")
			//s.reRegisterAndFillSlots(models.PROXY_STATE_ONLINE)
			return err
		}
		if exist {
			continue
		}
		if s.checkAndDoTopoChange(seq) {
			s.responseAction(int64(seq))
		}
	}

	s.lastActionSeq = seqs[len(seqs)-1]
	return nil
}