Ejemplo n.º 1
0
func BeforePromote(oldMaster string) error {
	conn, _ := zkhelper.ConnectToZk(*zkAddr)
	defer conn.Close()

	groups, err := models.ServerGroups(conn, *productName)
	if err != nil {
		log.Errorf("get server groups error %v, give up failover", err)
		return failover.ErrGiveupFailover
	}

	found := false
	for _, group := range groups {
		for _, server := range group.Servers {
			if server.Addr == oldMaster {
				found = true
				break
			}
		}
	}

	if !found {
		log.Errorf("can not find %s in any groups, give up failover", oldMaster)
		return failover.ErrGiveupFailover
	}

	return nil
}
Ejemplo n.º 2
0
func (c *Conn) Run() {
	defer func() {
		r := recover()
		if err, ok := r.(error); ok {
			const size = 4096
			buf := make([]byte, size)
			buf = buf[:runtime.Stack(buf, false)]

			log.Errorf("lastCmd %s, %v, %s", c.lastCmd, err, buf)
		}

		c.Close()
	}()

	for {
		c.alloc.Reset()
		data, err := c.readPacket()
		if err != nil {
			if err.Error() != io.EOF.Error() {
				log.Info(err)
			}
			return
		}

		if err := c.dispatch(data); err != nil {
			log.Errorf("dispatch error %s, %s", errors.ErrorStack(err), c)
			if err != mysql.ErrBadConn { //todo: fix this
				c.writeError(err)
			}
		}

		c.pkg.Sequence = 0
	}
}
Ejemplo n.º 3
0
func handleCrashedServer(s *models.Server) error {
	switch s.Type {
	case models.SERVER_TYPE_MASTER:
		//get slave and do promote
		slave, err := getSlave(s)
		if err != nil {
			log.Warning(errors.ErrorStack(err))
			return err
		}

		log.Infof("try promote %+v", slave)
		err = callHttp(nil, genUrl(*apiServer, "/api/server_group/", slave.GroupId, "/promote"), "POST", slave)
		if err != nil {
			log.Errorf("do promote %v failed %v", slave, errors.ErrorStack(err))
			return err
		}
		refreshSlave(s) //刷新
	case models.SERVER_TYPE_SLAVE:
		log.Errorf("slave is down: %+v", s)
	case models.SERVER_TYPE_OFFLINE:
		//no need to handle it
	default:
		log.Fatalf("unkonwn type %+v", s)
	}

	return nil
}
Ejemplo n.º 4
0
// wait for the slot reload chan and reload cluster topology
// at most every slotReloadInterval
// it also reload topology at a relative long periodic interval
func (d *Dispatcher) slotsReloadLoop() {
	periodicReloadInterval := 60 * time.Second
	for {
		select {
		case <-time.After(d.slotReloadInterval):
			select {
			case _, ok := <-d.slotReloadChan:
				if !ok {
					log.Infof("exit reload slot table loop")
					return
				}
				log.Infof("request reload triggered")
				if slotInfos, err := d.reloadTopology(); err != nil {
					log.Errorf("reload slot table failed")
				} else {
					d.slotInfoChan <- slotInfos
				}
			case <-time.After(periodicReloadInterval):
				log.Infof("periodic reload triggered")
				if slotInfos, err := d.reloadTopology(); err != nil {
					log.Errorf("reload slot table failed")
				} else {
					d.slotInfoChan <- slotInfos
				}
			}
		}
	}
}
Ejemplo n.º 5
0
func (s *MemcacheStats) publishMainStats() {
	s.mu.Lock()
	defer s.mu.Unlock()
	s.main = make(map[string]string)
	for key, isstr := range mainStringMetrics {
		key := key
		if isstr {
			s.main[key] = ""
			stats.Publish(s.cachePool.name+"Memcache"+formatKey(key), stats.StringFunc(func() string {
				s.mu.Lock()
				defer s.mu.Unlock()
				return s.main[key]
			}))
		} else {
			s.main[key] = "0"
			stats.Publish(s.cachePool.name+"Memcache"+formatKey(key), stats.IntFunc(func() int64 {
				s.mu.Lock()
				defer s.mu.Unlock()
				ival, err := strconv.ParseInt(s.main[key], 10, 64)
				if err != nil {
					log.Errorf("value '%v' for key %v is not an int", s.main[key], key)
					internalErrors.Add("MemcacheStats", 1)
					return -1
				}
				return ival
			}))
		}
	}
}
Ejemplo n.º 6
0
func WaitForReceiverWithTimeout(zkConn zkhelper.Conn, productName string, actionZkPath string, proxies []ProxyInfo, timeoutInMs int) error {
	if len(proxies) == 0 {
		return nil
	}

	times := 0
	proxyIds := make(map[string]struct{})
	var offlineProxyIds []string
	for _, p := range proxies {
		proxyIds[p.Id] = struct{}{}
	}

	checkTimes := timeoutInMs / 500
	// check every 500ms
	for times < checkTimes {
		if times >= 6 && (times*500)%1000 == 0 {
			log.Warning("abnormal waiting time for receivers", actionZkPath, offlineProxyIds)
		}
		// get confirm ids
		nodes, _, err := zkConn.Children(actionZkPath)
		if err != nil {
			return errors.Trace(err)
		}
		confirmIds := make(map[string]struct{})
		for _, node := range nodes {
			id := path.Base(node)
			confirmIds[id] = struct{}{}
		}
		if len(confirmIds) != 0 {
			match := true
			// check if all proxy have responsed
			var notMatchList []string
			for id, _ := range proxyIds {
				// if proxy id not in confirm ids, means someone didn't response
				if _, ok := confirmIds[id]; !ok {
					match = false
					notMatchList = append(notMatchList, id)
				}
			}
			if match {
				return nil
			}
			offlineProxyIds = notMatchList
		}
		times += 1
		time.Sleep(500 * time.Millisecond)
	}
	if len(offlineProxyIds) > 0 {
		log.Error("proxies didn't responed: ", offlineProxyIds)
	}

	// set offline proxies
	for _, id := range offlineProxyIds {
		log.Errorf("mark proxy %s to PROXY_STATE_MARK_OFFLINE", id)
		if err := SetProxyStatus(zkConn, productName, id, PROXY_STATE_MARK_OFFLINE); err != nil {
			return errors.Trace(err)
		}
	}
	return errors.Trace(ErrReceiverTimeout)
}
Ejemplo n.º 7
0
func (self *ResMan) handleMesosError(t *cmdMesosError) {
	defer func() {
		t.wait <- struct{}{}
	}()

	log.Errorf("%s\n", t.err)
}
Ejemplo n.º 8
0
func Raw(t byte, val Value, isUnsigned bool) []byte {
	if val == nil {
		return nil
	}

	var ret []byte
	switch t {
	case MYSQL_TYPE_TINY, MYSQL_TYPE_SHORT, MYSQL_TYPE_INT24, MYSQL_TYPE_LONG,
		MYSQL_TYPE_LONGLONG, MYSQL_TYPE_YEAR:
		if isUnsigned {
			ret = []byte(strconv.FormatUint(val.(uint64), 10))
		} else {
			ret = []byte(strconv.FormatInt(val.(int64), 10))
		}

	case MYSQL_TYPE_FLOAT, MYSQL_TYPE_DOUBLE:
		ret = []byte(strconv.FormatFloat(val.(float64), 'f', 16, 64))
	case MYSQL_TYPE_VARCHAR:
		str, ok := val.(string)
		if ok {
			ret = hack.Slice(str)
			break
		}

		fallthrough
	default:
		var ok bool
		ret, ok = val.([]byte)
		if !ok {
			log.Errorf("%v, %+v, %T", t, val, val)
		}
	}

	return ret
}
Ejemplo n.º 9
0
func (s *MemcacheStats) updateItemsStats() {
	if s.items == nil {
		return
	}
	s.readStats("items", func(sKey, sValue string) {
		ival, err := strconv.ParseInt(sValue, 10, 64)
		if err != nil {
			log.Error(err)
			internalErrors.Add("MemcacheStats", 1)
			return
		}
		subkey, slabid, err := parseItemKey(sKey)
		if err != nil {
			log.Error(err)
			internalErrors.Add("MemcacheStats", 1)
			return
		}
		m, ok := s.items[subkey]
		if !ok {
			log.Errorf("Unknown memcache items stats %v %v: %v", subkey, slabid, ival)
			internalErrors.Add("MemcacheStats", 1)
			return
		}
		m[slabid] = ival
	})
}
Ejemplo n.º 10
0
func refreshSlave(master *models.Server) {
	var group models.ServerGroup
	err := callHttp(&group, genUrl(*apiServer, "/api/server_group/", master.GroupId), "GET", nil)
	if err == nil {
		for _, s := range group.Servers {
			if s.Type == models.SERVER_TYPE_SLAVE {
				err := callHttp(nil, genUrl(*apiServer, "/api/server_group/", master.GroupId, "/addServer"), "PUT", s)
				if err != nil {
					log.Errorf("slave  refresh failed:  %v error:v%", s, errors.ErrorStack(err))
				}
			}
		}
	} else {
		log.Errorf("slave  refresh failed:  %v", errors.Trace(err))
	}
}
Ejemplo n.º 11
0
func (s *Server) OnSlotRangeChange(param *models.SlotMultiSetParam) {
	log.Warningf("slotRangeChange %+v", param)
	if !validSlot(param.From) || !validSlot(param.To) {
		log.Errorf("invalid slot number, %+v", param)
		return
	}

	for i := param.From; i <= param.To; i++ {
		switch param.Status {
		case models.SLOT_STATUS_OFFLINE:
			s.clearSlot(i)
		case models.SLOT_STATUS_ONLINE:
			s.fillSlot(i, true)
		default:
			log.Errorf("can not handle status %v", param.Status)
		}
	}
}
Ejemplo n.º 12
0
func (s *Server) Run() error {
	for {
		conn, err := s.listener.Accept()
		if err != nil {
			log.Errorf("accept error %s", err.Error())
			return err
		}

		go s.onConn(conn)
	}

	return nil
}
Ejemplo n.º 13
0
func (s *MemcacheStats) readStats(k string, proc func(key, value string)) {
	defer func() {
		if x := recover(); x != nil {
			log.Errorf("Could not read memcache stats: %v", x)
			internalErrors.Add("MemcacheStats", 1)
		}
	}()
	conn := s.cachePool.Get(0)
	// This is not the same as defer rc.cachePool.Put(conn)
	defer func() { s.cachePool.Put(conn) }()

	stats, err := conn.Stats(k)
	if err != nil {
		conn.Close()
		conn = nil
		log.Errorf("Cannot export memcache %v stats: %v", k, err)
		internalErrors.Add("MemcacheStats", 1)
		return
	}

	s.mu.Lock()
	defer s.mu.Unlock()
	st := string(stats)
	lines := strings.Split(st, "\n")
	for _, line := range lines {
		if line == "" {
			continue
		}
		items := strings.Split(line, " ")
		if len(items) < 3 { //liuqi: if using apt-get, memcached info would be:  STAT version 1.4.14 (Ubuntu)
			log.Errorf("Unexpected stats: %v", line)
			internalErrors.Add("MemcacheStats", 1)
			continue
		}
		proc(items[1], items[2])
	}
}
Ejemplo n.º 14
0
func WaitForReceiver(zkConn zkhelper.Conn, productName string, actionZkPath string, proxies []ProxyInfo) error {
	if len(proxies) == 0 {
		return nil
	}

	times := 0
	var proxyIds []string
	var offlineProxyIds []string
	for _, p := range proxies {
		proxyIds = append(proxyIds, p.Id)
	}
	sort.Strings(proxyIds)
	// check every 500ms
	for times < 60 {
		if times >= 6 && (times*500)%1000 == 0 {
			log.Warning("abnormal waiting time for receivers", actionZkPath)
		}
		nodes, _, err := zkConn.Children(actionZkPath)
		if err != nil {
			return errors.Trace(err)
		}
		var confirmIds []string
		for _, node := range nodes {
			id := path.Base(node)
			confirmIds = append(confirmIds, id)
		}
		if len(confirmIds) != 0 {
			sort.Strings(confirmIds)
			if utils.Strings(proxyIds).Eq(confirmIds) {
				return nil
			}
			offlineProxyIds = proxyIds[len(confirmIds)-1:]
		}
		times += 1
		time.Sleep(500 * time.Millisecond)
	}
	if len(offlineProxyIds) > 0 {
		log.Error("proxies didn't responed: ", offlineProxyIds)
	}
	// set offline proxies
	for _, id := range offlineProxyIds {
		log.Errorf("mark proxy %s to PROXY_STATE_MARK_OFFLINE", id)
		if err := SetProxyStatus(zkConn, productName, id, PROXY_STATE_MARK_OFFLINE); err != nil {
			return err
		}
	}

	return ErrReceiverTimeout
}
Ejemplo n.º 15
0
func (s *MemcacheStats) updateSlabsStats() {
	if s.slabs == nil {
		return
	}
	s.readStats("slabs", func(sKey, sValue string) {
		ival, err := strconv.ParseInt(sValue, 10, 64)
		if err != nil {
			log.Error(err)
			internalErrors.Add("MemcacheStats", 1)
			return
		}
		if slabsSingleMetrics[sKey] {
			m, ok := s.slabs[sKey]
			if !ok {
				log.Errorf("Unknown memcache slabs stats %v: %v", sKey, ival)
				internalErrors.Add("MemcacheStats", 1)
				return
			}
			m[""] = ival
			return
		}
		subkey, slabid, err := parseSlabKey(sKey)
		if err != nil {
			log.Error(err)
			internalErrors.Add("MemcacheStats", 1)
			return
		}
		m, ok := s.slabs[subkey]
		if !ok {
			log.Errorf("Unknown memcache slabs stats %v %v: %v", subkey, slabid, ival)
			internalErrors.Add("MemcacheStats", 1)
			return
		}
		m[slabid] = ival
	})
}
Ejemplo n.º 16
0
func genUrl(args ...interface{}) string {
	url := "http://"
	for _, v := range args {
		switch v.(type) {
		case string:
			url += v.(string)
		case int:
			url += strconv.Itoa(v.(int))
		default:
			log.Errorf("unsupported type %T", v)
		}
	}

	return url
}
Ejemplo n.º 17
0
func (self *ShellExecutor) OnKillTask(driver *mesos.ExecutorDriver, tid mesos.TaskID) {
	taskId := tid.GetValue()
	log.Warningf("OnKillTask %s", taskId)
	self.lock.Lock()
	defer self.lock.Unlock()
	if cmd, ok := self.process[taskId]; ok {
		err := cmd.Process.Kill()
		if err != nil {
			log.Errorf("kill taskId %s failed, err:%v", taskId, err)
		}
	}

	log.Error("send kill state")
	self.sendStatusUpdate(tid.GetValue(), mesos.TaskState_TASK_KILLED, "task killed by framework!")
}
Ejemplo n.º 18
0
func (si *SchemaInfo) override() {
	for _, override := range si.overrides {
		table, ok := si.tables[override.Name]
		if !ok {
			log.Warningf("Table not found for override: %v, %v", override, si.tables)
			continue
		}
		if override.PKColumns != nil {
			log.Infof("SetPK Table name %s, pk %v", override.Name, override.PKColumns)
			if err := table.SetPK(override.PKColumns); err != nil {
				log.Errorf("%s: %v", errors.ErrorStack(err), override)
				continue
			}
		}
		if si.cachePool.IsClosed() || override.Cache == nil {
			log.Infof("%+v", override)
			continue
		}

		switch override.Cache.Type {
		case "RW":
			table.CacheType = schema.CACHE_RW
			table.Cache = NewRowCache(table, si.cachePool)
		case "W":
			table.CacheType = schema.CACHE_W
			if len(override.Cache.Table) == 0 {
				log.Warningf("Incomplete cache specs: %v", override)
				continue
			}

			totable, ok := si.tables[override.Cache.Table]
			if !ok {
				log.Warningf("Table not found: %v", override)
				continue
			}

			if totable.Cache == nil {
				log.Warningf("Table has no cache: %v", override)
				continue
			}

			table.Cache = totable.Cache
		default:
			log.Warningf("Ignoring cache override: %+v", override)
		}
	}
}
Ejemplo n.º 19
0
Archivo: example.go Proyecto: npk/tidb
func main() {
	logging.SetRotateByHour()
	// logging.SetRotateByDay()

	err := logging.SetOutputByName("example.log")
	checkError(err)

	timer := time.NewTicker(time.Duration(10) * time.Second)
	for {
		select {
		case <-timer.C:
			logging.Debug(time.Now().String())
			logging.Info(time.Now().String())
			logging.Warningf("%s", time.Now().String())
			logging.Errorf("%s", time.Now().String())
		}
	}
}
Ejemplo n.º 20
0
func (top *Topology) GetSlotByIndex(i int) (*models.Slot, *models.ServerGroup, error) {
	slot, err := models.GetSlot(top.zkConn, top.ProductName, i)
	if err != nil {
		return nil, nil, errors.Trace(err)
	}

	log.Debugf("get slot %d : %+v", i, slot)
	if slot.State.Status != models.SLOT_STATUS_ONLINE && slot.State.Status != models.SLOT_STATUS_MIGRATE {
		log.Errorf("slot not online, %+v", slot)
	}

	groupServer, err := models.GetGroup(top.zkConn, top.ProductName, slot.GroupId)
	if err != nil {
		return nil, nil, errors.Trace(err)
	}

	return slot, groupServer, nil
}
Ejemplo n.º 21
0
func (mc *MultiKeyCmd) CoalesceRsp() *PipelineResponse {
	plRsp := &PipelineResponse{}
	var rsp *resp.Data
	switch mc.CmdType() {
	case MGET:
		rsp = &resp.Data{T: resp.T_Array, Array: make([]*resp.Data, mc.numSubCmds)}
	case MSET:
		rsp = OK_DATA
	case DEL:
		rsp = &resp.Data{T: resp.T_Integer}
	default:
		panic("invalid multi key cmd name")
	}
	for i, subCmdRsp := range mc.subCmdRsps {
		if subCmdRsp.err != nil {
			rsp = &resp.Data{T: resp.T_Error, String: []byte(subCmdRsp.err.Error())}
			break
		}
		reader := bufio.NewReader(bytes.NewReader(subCmdRsp.rsp.Raw()))
		data, err := resp.ReadData(reader)
		if err != nil {
			log.Errorf("re-parse response err=%s", err)
			rsp = &resp.Data{T: resp.T_Error, String: []byte(err.Error())}
			break
		}
		if data.T == resp.T_Error {
			rsp = data
			break
		}
		switch mc.CmdType() {
		case MGET:
			rsp.Array[i] = data
		case MSET:
		case DEL:
			rsp.Integer += data.Integer
		default:
			panic("invalid multi key cmd name")
		}
	}
	plRsp.rsp = resp.NewObjectFromData(rsp)
	return plRsp
}
Ejemplo n.º 22
0
Archivo: main.go Proyecto: ngaut/tyrant
func (self *ShellExecutor) OnKillTask(driver *mesos.ExecutorDriver, tid mesos.TaskID) {
	taskId := tid.GetValue()
	log.Warningf("OnKillTask %s", taskId)
	self.lock.Lock()
	defer self.lock.Unlock()
	if contex, ok := self.process[taskId]; ok {
		ret, _ := exec.Command("pgrep", "-P", strconv.Itoa(contex.cmd.Process.Pid)).Output()
		log.Debug("children process", string(ret))
		log.Debug("pid", contex.cmd.Process.Pid)
		ret, err := exec.Command("pkill", "-P", strconv.Itoa(contex.cmd.Process.Pid)).Output()
		if err != nil {
			log.Errorf("kill taskId %s failed, err:%v", taskId, err)
		}
		log.Debugf("kill taskId %s result %v", taskId, ret)
		contex.statusFile.Stop()
	}

	//log.Error("send kill state")
	//self.sendStatusUpdate(tid.GetValue(), mesos.TaskState_TASK_KILLED, "")
}
Ejemplo n.º 23
0
func (s *Server) onConn(c net.Conn) {
	conn := s.newConn(c)
	if err := conn.Handshake(); err != nil {
		log.Errorf("handshake error %s", errors.ErrorStack(err))
		c.Close()
		return
	}

	const key = "connections"

	s.IncCounter(key)
	defer func() {
		s.DecCounter(key)
		log.Infof("close %s", conn)
	}()

	s.rwlock.Lock()
	s.clients[conn.connectionId] = conn
	s.rwlock.Unlock()

	conn.Run()
}
Ejemplo n.º 24
0
func decodeArgs(cmd uint32, buf []byte) ([][]byte, bool) {
	argc := common.ArgCount(cmd)
	//log.Debug("cmd:", common.CmdDescription(cmd), "details:", buf)
	if argc == 0 {
		return nil, true
	}

	args := make([][]byte, 0, argc)

	if argc == 1 {
		args = append(args, buf)
		return args, true
	}

	endPos := 0
	cnt := 0
	for ; cnt < argc-1 && endPos < len(buf); cnt++ {
		startPos := endPos
		pos := bytes.IndexByte(buf[startPos:], 0x0)
		if pos == -1 {
			log.Warning("invalid protocol")
			return nil, false
		}
		endPos = startPos + pos
		args = append(args, buf[startPos:endPos])
		endPos++
	}

	args = append(args, buf[endPos:]) //option data
	cnt++

	if cnt != argc {
		log.Errorf("argc not match %d-%d", argc, len(args))
		return nil, false
	}

	return args, true
}
Ejemplo n.º 25
0
func handleAddServer(s *models.Server) {
	s.Type = models.SERVER_TYPE_SLAVE
	log.Infof("try reusing slave %+v", s)
	err := callHttp(nil, genUrl(*apiServer, "/api/server_group/", s.GroupId, "/addServer"), "PUT", s)
	log.Errorf("do reusing slave %v failed %v", s, errors.ErrorStack(err))
}
Ejemplo n.º 26
0
func (self *session) handleConnection(s *Server, conn net.Conn) {
	sessionId := s.allocSessionId()

	inbox := make(chan []byte, 200)
	out := make(chan []byte, 200)
	defer func() {
		if self.w != nil || self.c != nil {
			e := &event{tp: ctrlCloseSession, fromSessionId: sessionId,
				result: createResCh()}
			s.protoEvtCh <- e
			<-e.result
			close(inbox) //notify writer to quit
		}
	}()

	log.Debug("new sessionId", sessionId, "address:", conn.RemoteAddr())

	go queueingWriter(inbox, out)
	go writer(conn, out)

	r := bufio.NewReaderSize(conn, 256*1024)
	//todo:1. reuse event's result channel, create less garbage.
	//2. heavily rely on goroutine switch, send reply in EventLoop can make it faster, but logic is not that clean
	//so i am not going to change it right now, maybe never

	for {
		tp, buf, err := ReadMessage(r)
		if err != nil {
			log.Debug(err, "sessionId", sessionId)
			return
		}

		args, ok := decodeArgs(tp, buf)
		if !ok {
			log.Debug("tp:", CmdDescription(tp), "argc not match", "details:", string(buf))
			return
		}

		log.Debug("sessionId", sessionId, "tp:", CmdDescription(tp), "len(args):", len(args), "details:", string(buf))

		switch tp {
		case CAN_DO, CAN_DO_TIMEOUT: //todo: CAN_DO_TIMEOUT timeout support
			self.w = self.getWorker(sessionId, inbox, conn)
			s.protoEvtCh <- &event{tp: tp, args: &Tuple{
				t0: self.w, t1: string(args[0])}}
		case CANT_DO:
			s.protoEvtCh <- &event{tp: tp, fromSessionId: sessionId,
				args: &Tuple{t0: string(args[0])}}
		case ECHO_REQ:
			sendReply(inbox, ECHO_RES, [][]byte{buf})
		case PRE_SLEEP:
			self.w = self.getWorker(sessionId, inbox, conn)
			s.protoEvtCh <- &event{tp: tp, args: &Tuple{t0: self.w}, fromSessionId: sessionId}
		case SET_CLIENT_ID:
			self.w = self.getWorker(sessionId, inbox, conn)
			s.protoEvtCh <- &event{tp: tp, args: &Tuple{t0: self.w, t1: string(args[0])}}
		case GRAB_JOB_UNIQ:
			if self.w == nil {
				log.Errorf("can't perform %s, need send CAN_DO first", CmdDescription(tp))
				return
			}
			e := &event{tp: tp, fromSessionId: sessionId,
				result: createResCh()}
			s.protoEvtCh <- e
			job := (<-e.result).(*Job)
			if job == nil {
				log.Debug("sessionId", sessionId, "no job")
				sendReplyResult(inbox, nojobReply)
				break
			}

			//log.Debugf("%+v", job)
			sendReply(inbox, JOB_ASSIGN_UNIQ, [][]byte{
				[]byte(job.Handle), []byte(job.FuncName), []byte(job.Id), job.Data})
		case SUBMIT_JOB, SUBMIT_JOB_LOW_BG, SUBMIT_JOB_LOW:
			if self.c == nil {
				self.c = &Client{Session: Session{SessionId: sessionId, in: inbox,
					ConnectAt: time.Now()}}
			}
			e := &event{tp: tp,
				args:   &Tuple{t0: self.c, t1: args[0], t2: args[1], t3: args[2]},
				result: createResCh(),
			}
			s.protoEvtCh <- e
			handle := <-e.result
			sendReply(inbox, JOB_CREATED, [][]byte{[]byte(handle.(string))})
		case GET_STATUS:
			e := &event{tp: tp, args: &Tuple{t0: args[0]},
				result: createResCh()}
			s.protoEvtCh <- e

			resp := (<-e.result).(*Tuple)
			sendReply(inbox, STATUS_RES, [][]byte{resp.t0.([]byte),
				bool2bytes(resp.t1), bool2bytes(resp.t2),
				int2bytes(resp.t3),
				int2bytes(resp.t4)})
		case WORK_DATA, WORK_WARNING, WORK_STATUS, WORK_COMPLETE,
			WORK_FAIL, WORK_EXCEPTION:
			if self.w == nil {
				log.Errorf("can't perform %s, need send CAN_DO first", CmdDescription(tp))
				return
			}
			s.protoEvtCh <- &event{tp: tp, args: &Tuple{t0: args},
				fromSessionId: sessionId}
		default:
			log.Warningf("not support type %s", CmdDescription(tp))
		}
	}
}
Ejemplo n.º 27
0
func (t *MigrateTask) migrateSingleSlot(slotId int, to int) error {
	// set slot status
	s, err := models.GetSlot(t.zkConn, t.productName, slotId)
	if err != nil {
		log.Error(err)
		return err
	}
	if s.State.Status != models.SLOT_STATUS_ONLINE && s.State.Status != models.SLOT_STATUS_MIGRATE {
		log.Warning("status is not online && migrate", s)
		return nil
	}

	from := s.GroupId
	if s.State.Status == models.SLOT_STATUS_MIGRATE {
		from = s.State.MigrateStatus.From
	}

	// make sure from group & target group exists
	exists, err := models.GroupExists(t.zkConn, t.productName, from)
	if err != nil {
		return errors.Trace(err)
	}
	if !exists {
		log.Errorf("src group %d not exist when migrate from %d to %d", from, from, to)
		return errors.NotFoundf("group %d", from)
	}

	exists, err = models.GroupExists(t.zkConn, t.productName, to)
	if err != nil {
		return errors.Trace(err)
	}
	if !exists {
		return errors.NotFoundf("group %d", to)
	}

	// cannot migrate to itself, just ignore
	if from == to {
		log.Warning("from == to, ignore", s)
		return nil
	}

	// modify slot status
	if err := s.SetMigrateStatus(t.zkConn, from, to); err != nil {
		log.Error(err)
		return err
	}

	err = t.slotMigrator.Migrate(s, from, to, t, func(p SlotMigrateProgress) {
		// on migrate slot progress
		if p.Remain%500 == 0 {
			log.Info(p)
		}
	})
	if err != nil {
		log.Error(err)
		return err
	}

	// migrate done, change slot status back
	s.State.Status = models.SLOT_STATUS_ONLINE
	s.State.MigrateStatus.From = models.INVALID_ID
	s.State.MigrateStatus.To = models.INVALID_ID
	if err := s.Update(t.zkConn); err != nil {
		log.Error(err)
		return err
	}

	return nil
}
Ejemplo n.º 28
0
/**
获取cluster slots信息,并利用cluster nodes信息来将failed的slave过滤掉
*/
func (d *Dispatcher) doReload(server string) (slotInfos []*SlotInfo, err error) {
	var conn net.Conn
	conn, err = d.connPool.GetConn(server)
	if err != nil {
		log.Error(server, err)
		return
	} else {
		log.Infof("query cluster slots from %s", server)
	}
	defer func() {
		if err != nil {
			conn.(*pool.PoolConn).MarkUnusable()
		}
		conn.Close()
	}()
	_, err = conn.Write(REDIS_CMD_CLUSTER_SLOTS)
	if err != nil {
		log.Errorf("write cluster slots error", server, err)
		return
	}
	r := bufio.NewReader(conn)
	var data *resp.Data
	data, err = resp.ReadData(r)
	if err != nil {
		log.Error(server, err)
		return
	}
	slotInfos = make([]*SlotInfo, 0, len(data.Array))
	for _, info := range data.Array {
		slotInfos = append(slotInfos, NewSlotInfo(info))
	}

	// filter slot info with cluster nodes information
	_, err = conn.Write(REDIS_CMD_CLUSTER_NODES)
	if err != nil {
		log.Errorf("write cluster nodes error", server, err)
		return
	}
	r = bufio.NewReader(conn)
	data, err = resp.ReadData(r)
	if err != nil {
		log.Error(server, err)
		return
	}
	aliveNodes := make(map[string]bool)
	lines := strings.Split(strings.TrimSpace(string(data.String)), "\n")
	for _, line := range lines {
		// 305fa52a4ed213df3ca97a4399d9e2a6e44371d2 10.4.17.164:7704 master - 0 1440042315188 2 connected 5461-10922
		log.Debug(line)
		elements := strings.SplitN(line, " ", CLUSTER_NODES_FIELD_SPLIT_NUM)
		if !strings.Contains(elements[CLUSTER_NODES_FIELD_NUM_FLAGS], "fail") {
			aliveNodes[elements[CLUSTER_NODES_FIELD_NUM_IP_PORT]] = true
		} else {
			log.Warningf("node fails: %s", elements[1])
		}
	}
	for _, si := range slotInfos {
		if d.readPrefer == READ_PREFER_MASTER {
			si.read = []string{si.write}
		} else if d.readPrefer == READ_PREFER_SLAVE || d.readPrefer == READ_PREFER_SLAVE_IDC {
			localIPPrefix := LocalIP()
			if len(localIPPrefix) > 0 {
				segments := strings.SplitN(localIPPrefix, ".", 3)
				localIPPrefix = strings.Join(segments[:2], ".")
				localIPPrefix += "."
			}
			var readNodes []string
			for _, node := range si.read {
				if !aliveNodes[node] {
					log.Infof("filter %s since it's not alive", node)
					continue
				}
				if d.readPrefer == READ_PREFER_SLAVE_IDC {
					// ips are regarded as in the same idc if they have the same first two segments, eg 10.4.x.x
					if !strings.HasPrefix(node, localIPPrefix) {
						log.Infof("filter %s by read prefer slave idc", node)
						continue
					}
				}
				readNodes = append(readNodes, node)
			}
			if len(readNodes) == 0 {
				readNodes = []string{si.write}
			}
			si.read = readNodes
		}
	}
	return
}
Ejemplo n.º 29
0
// migrate multi slots
func RunMigrateTask(task *MigrateTask) error {
	conn := CreateZkConn()
	defer conn.Close()
	lock := utils.GetZkLock(conn, productName)

	to := task.NewGroupId
	task.Status = MIGRATE_TASK_MIGRATING
	for slotId := task.FromSlot; slotId <= task.ToSlot; slotId++ {
		err := func() error {
			log.Info("start migrate slot:", slotId)

			lock.Lock(fmt.Sprintf("migrate %d", slotId))
			defer func() {
				err := lock.Unlock()
				if err != nil {
					log.Info(err)
				}
			}()
			// set slot status
			s, err := models.GetSlot(conn, productName, slotId)
			if err != nil {
				log.Error(err)
				return err
			}
			if s.State.Status != models.SLOT_STATUS_ONLINE && s.State.Status != models.SLOT_STATUS_MIGRATE {
				log.Warning("status is not online && migrate", s)
				return nil
			}

			from := s.GroupId
			if s.State.Status == models.SLOT_STATUS_MIGRATE {
				from = s.State.MigrateStatus.From
			}

			// make sure from group & target group exists
			exists, err := models.GroupExists(conn, productName, from)
			if err != nil {
				return errors.Trace(err)
			}
			if !exists {
				log.Errorf("src group %d not exist when migrate from %d to %d", from, from, to)
				return errors.NotFoundf("group %d", from)
			}
			exists, err = models.GroupExists(conn, productName, to)
			if err != nil {
				return errors.Trace(err)
			}
			if !exists {
				return errors.NotFoundf("group %d", to)
			}

			// cannot migrate to itself
			if from == to {
				log.Warning("from == to, ignore", s)
				return nil
			}

			// modify slot status
			if err := s.SetMigrateStatus(conn, from, to); err != nil {
				log.Error(err)
				return err
			}

			// do real migrate
			err = MigrateSingleSlot(conn, slotId, from, to, task.Delay, task.stopChan)
			if err != nil {
				log.Error(err)
				return err
			}

			// migrate done, change slot status back
			s.State.Status = models.SLOT_STATUS_ONLINE
			s.State.MigrateStatus.From = models.INVALID_ID
			s.State.MigrateStatus.To = models.INVALID_ID
			if err := s.Update(zkConn); err != nil {
				log.Error(err)
				return err
			}
			return nil
		}()
		if err == ErrStopMigrateByUser {
			log.Info("stop migration job by user")
			break
		} else if err != nil {
			log.Error(err)
			task.Status = MIGRATE_TASK_ERR
			return err
		}
		task.Percent = (slotId - task.FromSlot + 1) * 100 / (task.ToSlot - task.FromSlot + 1)
		log.Info("total percent:", task.Percent)
	}
	task.Status = MIGRATE_TASK_FINISHED
	log.Info("migration finished")
	return nil
}