// 获取下一个active状态的BackendConn
func (s *BackServiceLB) nextBackendConn() *BackendConnLB {
	s.activeConnsLock.Lock()
	defer s.activeConnsLock.Unlock()

	// TODO: 暂时采用RoundRobin的方法,可以采用其他具有优先级排列的方法
	var backSocket *BackendConnLB

	if len(s.activeConns) == 0 {
		if s.verbose {
			log.Printf(Cyan("[%s]ActiveConns Len 0"), s.serviceName)
		}
		backSocket = nil
	} else {
		if s.currentConnIndex >= len(s.activeConns) {
			s.currentConnIndex = 0
		}
		backSocket = s.activeConns[s.currentConnIndex]
		s.currentConnIndex++
		if s.verbose {
			log.Printf(Cyan("[%s]ActiveConns Len %d, CurrentIndex: %d"), s.serviceName,
				len(s.activeConns), s.currentConnIndex)
		}
	}
	return backSocket
}
// 处理所有的等待中的请求
func (bc *BackendConn) flushRequests(err error) {
	// 告诉BackendService, 不再接受新的请求
	bc.MarkConnActiveFalse()

	bc.Lock()
	seqRequest := bc.seqNum2Request
	bc.seqNum2Request = make(map[int32]*Request, 4096)
	bc.Unlock()
	threshold := time.Now().Add(-time.Second * 5)
	for _, request := range seqRequest {
		if request.Start > 0 {
			t := time.Unix(request.Start, 0)
			if t.After(threshold) {
				// 似乎在笔记本上,合上显示器之后出出现网络错误
				log.Printf(Red("[%s]Handle Failed Request: %s, Started: %s"),
					request.Service, request.Request.Name, FormatYYYYmmDDHHMMSS(t))
			}
		} else {
			log.Printf(Red("[%s]Handle Failed Request: %s"), request.Service,
				request.Request.Name)
		}
		request.Response.Err = err
		if request.Wait != nil {
			request.Wait.Done()
		}
	}

}
// 配对 Request, resp, err
// PARAM: resp []byte 为一帧完整的thrift数据包
func (bc *BackendConn) setResponse(r *Request, data []byte, err error) error {
	// 表示出现错误了
	if data == nil {
		log.Printf("[%s]No Data From Server, error: %v", r.Service, err)
		r.Response.Err = err
	} else {
		// 从resp中读取基本的信息
		typeId, seqId, err := DecodeThriftTypIdSeqId(data)

		// 解码错误,直接报错
		if err != nil {
			return err
		}

		// 找到对应的Request
		bc.Lock()
		req, ok := bc.seqNum2Request[seqId]
		if ok {
			delete(bc.seqNum2Request, seqId)
		}
		bc.Unlock()

		// 如果是心跳,则OK
		if typeId == MESSAGE_TYPE_HEART_BEAT {
			//			log.Printf(Magenta("Get Ping/Pang Back"))
			bc.hbLastTime.Set(time.Now().Unix())
			return nil
		}

		if !ok {
			return errors.New("Invalid Response")
		}
		if bc.verbose {
			log.Printf("[%s]Data From Server, seqId: %d, Request: %d", req.Service, seqId, req.Request.SeqId)
		}
		r = req
		r.Response.TypeId = typeId
	}

	r.Response.Data, r.Response.Err = data, err

	// 还原SeqId
	if data != nil {
		r.RestoreSeqId()
	}

	// 设置几个控制用的channel
	if err != nil && r.Failed != nil {
		r.Failed.Set(true)
	}
	if r.Wait != nil {
		r.Wait.Done()
	}

	return err
}
func (p *fakeServer) Dispatch(r *Request) error {
	log.Printf("Request SeqId: %d, MethodName: %s\n", r.Request.SeqId, r.Request.Name)
	r.Wait.Add(1)
	go func() {
		time.Sleep(time.Millisecond)
		r.Response.Data = []byte(string(r.Request.Data))

		typeId, seqId, _ := DecodeThriftTypIdSeqId(r.Response.Data)
		log.Printf(Green("TypeId: %d, SeqId: %d\n"), typeId, seqId)
		r.Wait.Done()
	}()
	//	r.RestoreSeqId()
	//	r.Wait.Done()
	return nil
}
//
// 不断建立到后端的逻辑,负责: BackendConn#input到redis的数据的输入和返回
//
func (bc *BackendConn) Run() {

	for k := 0; !bc.IsMarkOffline.Get(); k++ {

		// 1. 首先BackendConn将当前 input中的数据写到后端服务中
		transport, err := bc.ensureConn()
		if err != nil {
			log.ErrorErrorf(err, "[%s]BackendConn#ensureConn error: %v", bc.service, err)
			return
		}

		c := NewTBufferedFramedTransport(transport, 100*time.Microsecond, 20)

		// 2. 将 bc.input 中的请求写入 后端的Rpc Server
		err = bc.loopWriter(c) // 同步

		// 3. 停止接受Request
		bc.MarkConnActiveFalse()

		// 4. 将bc.input中剩余的 Request直接出错处理
		if err == nil {
			log.Printf(Red("[%s]BackendConn#loopWriter normal Exit..."), bc.service)
			break
		} else {
			// 对于尚未处理的Request, 直接报错
			for i := len(bc.input); i != 0; i-- {
				r := <-bc.input
				bc.setResponse(r, nil, err)
			}
		}
	}
}
Exemple #6
0
// 处理所有的等待中的请求
func (bc *BackendConnLB) flushRequests(err error) {
	// 告诉BackendService, 不再接受新的请求
	bc.MarkConnActiveFalse()

	bc.Lock()
	seqRequest := bc.seqNum2Request
	bc.seqNum2Request = make(map[int32]*Request)
	bc.Unlock()

	for _, request := range seqRequest {
		if request.Request.TypeId == MESSAGE_TYPE_HEART_BEAT {
			// 心跳出错了,则直接直接跳过
		} else {
			log.Printf(Red("Handle Failed Request: %s.%s"), request.Service, request.Request.Name)
			request.Response.Err = err
			if request.Wait != nil {
				request.Wait.Done()
			}
		}
	}

	// 关闭输入
	close(bc.input)

}
//
// 后端如何处理一个Request, 处理完毕之后直接返回,因为Caller已经做好异步处理了
//
func (s *BackServiceLB) Dispatch(r *Request) error {
	backendConn := s.nextBackendConn()

	r.Service = s.serviceName

	if backendConn == nil {
		// 没有后端服务
		if s.verbose {
			log.Printf(Red("[%s]No BackSocket Found: %s"),
				s.serviceName, r.Request.Name)
		}
		// 从errMsg来构建异常
		errMsg := GetWorkerNotFoundData(r, "BackServiceLB")
		//		log.Printf(Magenta("---->Convert Error Back to Exception:[%d] %s\n"), len(errMsg), string(errMsg))
		r.Response.Data = errMsg

		return nil
	} else {
		//		if s.verbose {
		//			log.Println("SendMessage With: ", backendConn.Addr4Log(), "For Service: ", s.serviceName)
		//		}
		backendConn.PushBack(r)

		r.Wait.Wait()

		return nil
	}
}
Exemple #8
0
func (s *Session) loopWriter(tasks <-chan *Request) error {
	// Proxy: Session ---> Client
	for r := range tasks {
		// 1. 等待Request对应的Response
		//    出错了如何处理呢?
		s.handleResponse(r)

		// 2. 将结果写回给Client
		if s.verbose {
			log.Printf("[%s]Session#loopWriter --> client FrameSize: %d",
				r.Service, len(r.Response.Data))
		}

		// r.Response.Data ---> Client
		_, err := s.TBufferedFramedTransport.Write(r.Response.Data)
		if err != nil {
			log.ErrorErrorf(err, "Write back Data Error: %v", err)
			return err
		}

		// 3. Flush
		err = s.TBufferedFramedTransport.FlushBuffer(true) // len(tasks) == 0
		if err != nil {
			log.ErrorErrorf(err, "Write back Data Error: %v", err)
			return err
		}
		r.Recycle()
	}
	return nil
}
Exemple #9
0
// 配对 Request, resp, err
// PARAM: resp []byte 为一帧完整的thrift数据包
func (bc *BackendConnLB) setResponse(r *Request, data []byte, err error) error {
	// 表示出现错误了
	if data == nil {
		log.Printf("No Data From Server, error: %v\n", err)
		r.Response.Err = err
	} else {
		// 从resp中读取基本的信息
		typeId, seqId, err := DecodeThriftTypIdSeqId(data)

		// 解码错误,直接报错
		if err != nil {
			return err
		}

		if typeId == MESSAGE_TYPE_STOP {
			// 不再接受新的输入
			// 直接来自后端的服务(不遵循: Request/Reply模型)
			bc.MarkConnActiveFalse()
			return nil
		}

		// 找到对应的Request
		bc.Lock()
		req, ok := bc.seqNum2Request[seqId]
		if ok {
			delete(bc.seqNum2Request, seqId)
		}
		bc.Unlock()

		// 如果是心跳,则OK
		if typeId == MESSAGE_TYPE_HEART_BEAT {
			bc.hbLastTime.Set(time.Now().Unix())
			return nil
		}

		if !ok {
			return errors.New("Invalid Response")
		}

		//		log.Printf("Data From Server, seqId: %d, Request: %d\n", seqId, req.Request.SeqId)
		r = req
		r.Response.TypeId = typeId
	}

	r.Response.Data, r.Response.Err = data, err
	// 还原SeqId
	if data != nil {
		r.RestoreSeqId()
	}

	// 设置几个控制用的channel
	if err != nil && r.Failed != nil {
		r.Failed.Set(true)
	}
	if r.Wait != nil {
		r.Wait.Done()
	}

	return err
}
Exemple #10
0
//
// 确保Socket成功连接到后端服务器
//
func (bc *BackendConn) ensureConn() (transport thrift.TTransport, err error) {
	// 1. 创建连接(只要IP没有问题, err一般就是空)
	timeout := time.Second * 5
	if strings.Contains(bc.addr, ":") {
		transport, err = thrift.NewTSocketTimeout(bc.addr, timeout)
	} else {
		transport, err = NewTUnixDomainTimeout(bc.addr, timeout)
	}
	log.Printf(Cyan("[%s]Create Socket To: %s"), bc.service, bc.addr)

	if err != nil {
		log.ErrorErrorf(err, "[%s]Create Socket Failed: %v, Addr: %s", err, bc.service, bc.addr)
		// 连接不上,失败
		return nil, err
	}

	// 2. 只要服务存在,一般不会出现err
	sleepInterval := 1
	err = transport.Open()
	for err != nil && !bc.IsMarkOffline.Get() {
		log.ErrorErrorf(err, "[%s]Socket Open Failed: %v, Addr: %s", bc.service, err, bc.addr)

		// Sleep: 1, 2, 4这几个间隔
		time.Sleep(time.Duration(sleepInterval) * time.Second)

		if sleepInterval < 4 {
			sleepInterval *= 2
		}
		err = transport.Open()
	}
	return transport, err
}
Exemple #11
0
//
// 两参数是必须的:  ProductName, zkAddress, frontAddr可以用来测试
//
func (p *ProxyServer) Run() {

	var transport thrift.TServerTransport
	var err error

	log.Printf(Magenta("Start Proxy at Address: %s"), p.proxyAddr)
	// 读取后端服务的配置
	isUnixDomain := false
	if !strings.Contains(p.proxyAddr, ":") {
		if FileExist(p.proxyAddr) {
			os.Remove(p.proxyAddr)
		}
		transport, err = NewTServerUnixDomain(p.proxyAddr)
		isUnixDomain = true
	} else {
		transport, err = thrift.NewTServerSocket(p.proxyAddr)
	}
	if err != nil {
		log.ErrorErrorf(err, "Server Socket Create Failed: %v, Front: %s", err, p.proxyAddr)
	}

	// 开始监听
	//	transport.Open()
	transport.Listen()

	ch := make(chan thrift.TTransport, 4096)
	defer close(ch)

	go func() {
		var address string
		for c := range ch {
			// 为每个Connection建立一个Session
			socket, ok := c.(SocketAddr)
			if isUnixDomain {
				address = p.proxyAddr
			} else if ok {
				address = socket.Addr().String()
			} else {
				address = "unknow"
			}
			x := NewSession(c, address, p.verbose)
			// Session独立处理自己的请求
			go x.Serve(p.router, 1000)
		}
	}()

	// Accept什么时候出错,出错之后如何处理呢?
	for {
		c, err := transport.Accept()
		if err != nil {
			log.ErrorErrorf(err, "Accept Error: %v", err)
			break
		} else {
			ch <- c
		}
	}
}
func (s *BackService) StateChanged(conn *BackendConn) {
	log.Printf(Cyan("[%s]StateChanged: %s, Index: %d, Count: %d, IsConnActive: %t"),
		s.serviceName, conn.addr, conn.Index, len(s.activeConns),
		conn.IsConnActive.Get())

	s.activeConnsLock.Lock()
	defer s.activeConnsLock.Unlock()

	if conn.IsConnActive.Get() {
		log.Printf(Cyan("[%s]MarkConnActiveOK: %s, Index: %d, Count: %d"),
			s.serviceName, conn.addr, conn.Index, len(s.activeConns))

		if conn.Index == INVALID_ARRAY_INDEX {
			conn.Index = len(s.activeConns)
			s.activeConns = append(s.activeConns, conn)

			log.Printf(Green("[%s]Add BackendConn to activeConns: %s, Total Actives: %d"),
				s.serviceName, conn.Addr(), len(s.activeConns))
		}
	} else {
		log.Printf(Red("[%s]Remove BackendConn From activeConns: %s, Index: %d"),
			s.serviceName, conn.Addr(), conn.Index)
		if conn.Index != INVALID_ARRAY_INDEX {
			lastIndex := len(s.activeConns) - 1

			// 将最后一个元素和当前的元素交换位置
			if lastIndex != conn.Index {

				lastConn := s.activeConns[lastIndex]
				s.activeConns[conn.Index] = lastConn
				lastConn.Index = conn.Index
			}

			s.activeConns[lastIndex] = nil
			conn.Index = INVALID_ARRAY_INDEX

			// slice
			s.activeConns = s.activeConns[0:lastIndex]
			log.Printf(Red("[%s]Remove BackendConn From activeConns: %s, Remains: %d"),
				s.serviceName, conn.Addr(), len(s.activeConns))
		}
	}
}
Exemple #13
0
//
// MarkOffline发生场景:
// 1. 后端服务即将下线,预先通知
// 2. 后端服务已经挂了,zk检测到
//
// BackendConn 在这里暂时理解关闭conn, 而是从 backend_service_proxy中下线当前的conn,
// 然后conn的关闭根据 心跳&Conn的读写异常来判断; 因此 IsConnActive = false 情况下,心跳不能关闭
//
func (bc *BackendConn) MarkOffline() {
	if !bc.IsMarkOffline.Get() {
		log.Printf(Magenta("[%s]BackendConn: %s MarkOffline"), bc.service, bc.addr)
		bc.IsMarkOffline.Set(true)

		// 不再接受(来自backend_service_proxy的)新的输入
		bc.MarkConnActiveFalse()

		close(bc.input)
	}
}
Exemple #14
0
func (bc *BackendConn) MarkConnActiveFalse() {
	if bc.IsConnActive.Get() {
		log.Printf(Red("[%s]MarkConnActiveFalse: %s, %p"), bc.service, bc.addr, bc.delegate)
		// 从Active切换到非正常状态
		bc.IsConnActive.Set(false)

		if bc.delegate != nil {
			bc.delegate.StateChanged(bc) // 通知其他人状态出现问题
		}
	}
}
Exemple #15
0
// 只有在conn出现错误时才会调用
func (s *BackServiceLB) StateChanged(conn *BackendConnLB) {
	s.activeConnsLock.Lock()
	defer s.activeConnsLock.Unlock()

	log.Printf(Green("[%s]StateChanged: %s, Index: %d, Count: %d"), conn.serviceName, conn.addr4Log, conn.Index, len(s.activeConns))
	if conn.IsConnActive.Get() {
		// BackServiceLB 只有一个状态转移: Active --> Not Active
		log.Printf(Magenta("Unexpected BackendConnLB State"))
		if s.verbose {
			panic("Unexpected BackendConnLB State")
		}
	} else {
		log.Printf(Red("Remove BackendConn From activeConns: %s, Index: %d, Count: %d"),
			conn.Addr4Log(), conn.Index, len(s.activeConns))

		// 从数组中删除一个元素(O(1)的操作)
		if conn.Index != INVALID_ARRAY_INDEX {
			// 1. 和最后一个元素进行交换
			lastIndex := len(s.activeConns) - 1
			if lastIndex != conn.Index {
				lastConn := s.activeConns[lastIndex]

				// 将最后一个元素和当前的元素交换位置
				s.activeConns[conn.Index] = lastConn
				lastConn.Index = conn.Index

				// 删除引用
				s.activeConns[lastIndex] = nil
				conn.Index = INVALID_ARRAY_INDEX

			}
			log.Printf(Red("Remove BackendConn From activeConns: %s"), conn.Addr4Log())

			// 2. slice
			s.activeConns = s.activeConns[0:lastIndex]

		}
	}
}
Exemple #16
0
// run之间 transport刚刚建立,因此服务的可靠性比较高
func (bc *BackendConnLB) Run() {
	log.Printf(Green("[%s]Add New BackendConnLB: %s\n"), bc.serviceName, bc.addr4Log)

	// 1. 首先BackendConn将当前 input中的数据写到后端服务中
	err := bc.loopWriter()

	// 2. 从Active切换到非正常状态, 同时不再从backend_service_lb接受新的任务
	//    可能出现异常,也可能正常退出(反正不干活了)
	bc.MarkConnActiveFalse()

	log.Printf(Red("[%s]Remove Faild BackendConnLB: %s\n"), bc.serviceName, bc.addr4Log)

	if err == nil {
		// bc.input被关闭了,应该就没有 Request 了
	} else {
		// 如果出现err, 则将bc.input中现有的数据都flush回去(直接报错)
		for i := len(bc.input); i != 0; i-- {
			r := <-bc.input
			bc.setResponse(r, nil, err)
		}
	}

}
Exemple #17
0
//
//
// 等待Request请求的返回: Session最终被Block住
//
func (s *Session) handleResponse(r *Request) {
	// 等待结果的出现
	r.Wait.Wait()

	// 将Err转换成为Exception
	if r.Response.Err != nil {

		r.Response.Data = GetThriftException(r, "proxy_session")
		log.Printf(Magenta("---->Convert Error Back to Exception"))
	}

	// 如何处理Data和Err呢?
	incrOpStats(r.OpStr, microseconds()-r.Start)
}
Exemple #18
0
// 处理来自Client的请求
func (s *Session) handleRequest(request []byte, d Dispatcher) (*Request, error) {
	// 构建Request
	if s.verbose {
		log.Printf("HandleRequest: %s", string(request))
	}
	r := NewRequest(request, true)

	// 增加统计
	s.LastOpUnix = time.Now().Unix()
	s.Ops++

	// 交给Dispatch
	// Router
	return r, d.Dispatch(r)
}
Exemple #19
0
//
// 删除过期的Endpoints
//
func (p *BackSockets) PurgeEndpoints() {
	// 没有需要删除的对象
	if p.Active == len(p.Sockets) {
		return
	}

	log.Printf(utils.Green("PurgeEndpoints %d vs. %d"), p.Active, len(p.Sockets))

	p.Lock()
	defer p.Unlock()

	now := time.Now().Unix()
	nowStr := time.Now().Format("@2006-01-02 15:04:05")

	for i := p.Active; i < len(p.Sockets); i++ {
		// 逐步删除过期的Sockets
		current := p.Sockets[i]
		lastIndex := len(p.Sockets) - 1
		if now-current.markedOfflineTime > 5 {

			// 将i和最后一个元素交换
			p.swap(current, p.Sockets[lastIndex])

			// 关闭
			// current
			// 关闭旧的Socket
			log.Println(utils.Red("PurgeEndpoints#Purge Old Socket: "), current.Addr, nowStr)
			// 由Socket自己维护自己的状态
			// current.Socket.Close()

			p.Sockets[lastIndex] = nil
			p.Sockets = p.Sockets[0:lastIndex]

			i-- // 保持原位
		}

	}
}
Exemple #20
0
func NewThriftLoadBalanceServer(config *utils.Config) *ThriftLoadBalanceServer {
	log.Printf("FrontAddr: %s\n", Magenta(config.FrontendAddr))

	// 前端对接rpc_proxy
	p := &ThriftLoadBalanceServer{
		config:       config,
		zkAddr:       config.ZkAddr,
		productName:  config.ProductName,
		serviceName:  config.Service,
		frontendAddr: config.FrontendAddr,
		backendAddr:  config.BackAddr,
		verbose:      config.Verbose,
		exitEvt:      make(chan bool),
	}

	p.topo = zk.NewTopology(p.productName, p.zkAddr)
	p.lbServiceName = GetServiceIdentity(p.frontendAddr)

	// 后端对接: 各种python的rpc server
	p.backendService = NewBackServiceLB(p.serviceName, p.backendAddr, p.verbose, p.exitEvt)
	return p

}
func (s *BackService) Stop() {
	// 标志停止
	s.stop.Set(true)
	// 触发一个事件(之后ServiceNodes也不再监控)
	s.evtbus <- true
	go func() {
		// TODO:
		for true {
			now := time.Now().Unix()
			if now-s.lastRequestTime.Get() > 10 {
				break
			} else {
				time.Sleep(time.Second)
			}
		}
		for len(s.activeConns) > 0 {
			s.activeConns[0].MarkOffline()
		}

		log.Printf(Red("Mark All Connections Off: %s"), s.serviceName)

	}()
}
// 创建一个BackService
func NewBackService(productName string, serviceName string, topo *zk.Topology, verbose bool) *BackService {

	service := &BackService{
		productName: productName,
		serviceName: serviceName,
		activeConns: make([]*BackendConn, 0, 10),
		addr2Conn:   make(map[string]*BackendConn),
		topo:        topo,
		verbose:     verbose,
	}

	service.WatchBackServiceNodes()

	go func() {
		for !service.stop.Get() {
			log.Printf(Blue("[Report]: %s --> %d backservice, coroutine: %d"),
				service.serviceName, service.Active(), runtime.NumGoroutine())
			time.Sleep(time.Second * 10)
		}
	}()

	return service

}
Exemple #23
0
func mainBody(zkAddr string, productName string, serviceName string, frontendAddr string, backendAddr string) {
	// 1. 创建到zk的连接
	var topo *zk.Topology
	topo = zk.NewTopology(productName, zkAddr)

	// 2. 启动服务
	frontend, _ := zmq.NewSocket(zmq.ROUTER)
	backend, _ := zmq.NewSocket(zmq.ROUTER)
	defer frontend.Close()
	defer backend.Close()

	// ROUTER/ROUTER绑定到指定的端口

	// tcp://127.0.0.1:5555 --> tcp://127_0_0_1:5555
	lbServiceName := GetServiceIdentity(frontendAddr)

	frontend.SetIdentity(lbServiceName)
	frontend.Bind(frontendAddr) //  For clients "tcp://*:5555"
	backend.Bind(backendAddr)   //  For workers "tcp://*:5556"

	log.Printf("FrontAddr: %s, BackendAddr: %s\n", magenta(frontendAddr), magenta(backendAddr))

	// 后端的workers queue
	workersQueue := queue.NewPPQueue()

	// 心跳间隔1s
	heartbeat_at := time.Tick(HEARTBEAT_INTERVAL)

	poller1 := zmq.NewPoller()
	poller1.Add(backend, zmq.POLLIN)

	poller2 := zmq.NewPoller()
	// 前提:
	//     1. 当zeromq通知消息可读时,那么整个Message(所有的msg parts)都可读
	//	   2. 往zeromq写数据时,是异步的,因此也不存在block(除非数据量巨大)
	//
	poller2.Add(backend, zmq.POLLIN)
	poller2.Add(frontend, zmq.POLLIN)

	// 3. 注册zk
	var endpointInfo map[string]interface{} = make(map[string]interface{})
	endpointInfo["frontend"] = frontendAddr
	endpointInfo["backend"] = backendAddr

	topo.AddServiceEndPoint(serviceName, lbServiceName, endpointInfo)

	isAlive := true
	isAliveLock := &sync.RWMutex{}

	go func() {
		servicePath := topo.ProductServicePath(serviceName)
		evtbus := make(chan interface{})
		for true {
			// 只是为了监控状态
			_, err := topo.WatchNode(servicePath, evtbus)

			if err == nil {
				// 等待事件
				e := (<-evtbus).(topozk.Event)
				if e.State == topozk.StateExpired || e.Type == topozk.EventNotWatching {
					// Session过期了,则需要删除之前的数据,因为这个数据的Owner不是当前的Session
					topo.DeleteServiceEndPoint(serviceName, lbServiceName)
					topo.AddServiceEndPoint(serviceName, lbServiceName, endpointInfo)
				}
			} else {
				time.Sleep(time.Second)
			}

			isAliveLock.RLock()
			isAlive1 := isAlive
			isAliveLock.RUnlock()
			if !isAlive1 {
				break
			}

		}
	}()

	ch := make(chan os.Signal, 1)

	signal.Notify(ch, syscall.SIGTERM, syscall.SIGINT, syscall.SIGKILL)
	// syscall.SIGKILL
	// kill -9 pid
	// kill -s SIGKILL pid 还是留给运维吧
	//

	// 自动退出条件:
	//

	var suideTime time.Time

	for {
		var sockets []zmq.Polled
		var err error

		sockets, err = poller2.Poll(HEARTBEAT_INTERVAL)
		if err != nil {
			//			break //  Interrupted
			log.Errorf("Error When Pollling: %v\n", err)
			continue
		}

		hasValidMsg := false
		for _, socket := range sockets {
			switch socket.Socket {
			case backend:
				// 格式:
				// 后端:
				// 	             <"", proxy_id, "", client_id, "", rpc_data>
				// Backend Socket读取到的:
				//		<wokerid, "", proxy_id, "", client_id, "", rpc_data>
				//
				msgs, err := backend.RecvMessage(0)
				if err != nil {
					log.Errorf("Error When RecvMessage from background: %v\n", err)
					continue
				}
				if config.VERBOSE {
					// log.Println("Message from backend: ", msgs)
				}
				// 消息类型:
				// msgs: <worker_id, "", proxy_id, "", client_id, "", rpc_data>
				//       <worker_id, "", rpc_control_data>
				worker_id, msgs := utils.Unwrap(msgs)

				// rpc_control_data 控制信息
				// msgs: <rpc_control_data>
				if len(msgs) == 1 {
					// PPP_READY
					// PPP_HEARTBEAT
					controlMsg := msgs[0]

					// 碰到无效的信息,则直接跳过去
					if len(controlMsg) == 0 {
						continue
					}
					if config.VERBOSE {
						// log.Println("Got Message From Backend...")
					}

					if controlMsg[0] == PPP_READY || controlMsg[0] == PPP_HEARTBEAT {
						// 后端服务剩余的并发能力
						var concurrency int
						if len(controlMsg) >= 3 {
							concurrency = int(controlMsg[2])
						} else {
							concurrency = 1
						}
						if config.VERBOSE {
							// utils.PrintZeromqMsgs(msgs, "control msg")
						}

						force_update := controlMsg[0] == PPP_READY
						workersQueue.UpdateWorkerStatus(worker_id, concurrency, force_update)
					} else if controlMsg[0] == PPP_STOP {
						// 停止指定的后端服务
						workersQueue.UpdateWorkerStatus(worker_id, -1, true)
					} else {
						log.Errorf("Unexpected Control Message: %d", controlMsg[0])
					}
				} else {
					hasValidMsg = true
					// 将信息发送到前段服务, 如果前端服务挂了,则消息就丢失
					//					log.Println("Send Message to frontend")
					workersQueue.UpdateWorkerStatus(worker_id, 0, false)
					// msgs: <proxy_id, "", client_id, "", rpc_data>
					frontend.SendMessage(msgs)
				}
			case frontend:
				hasValidMsg = true
				log.Println("----->Message from front: ")
				msgs, err := frontend.RecvMessage(0)
				if err != nil {
					log.Errorf("Error when reading from frontend: %v\n", err)
					continue
				}

				// msgs:
				// <proxy_id, "", client_id, "", rpc_data>
				if config.VERBOSE {
					utils.PrintZeromqMsgs(msgs, "frontend")
				}
				msgs = utils.TrimLeftEmptyMsg(msgs)

				// 将msgs交给后端服务器
				worker := workersQueue.NextWorker()
				if worker != nil {
					if config.VERBOSE {
						log.Println("Send Msg to Backend worker: ", worker.Identity)
					}
					backend.SendMessage(worker.Identity, "", msgs)
				} else {
					// 怎么返回错误消息呢?
					if config.VERBOSE {
						log.Println("No backend worker found")
					}
					errMsg := proxy.GetWorkerNotFoundData("account", 0)

					// <proxy_id, "", client_id, "", rpc_data>
					frontend.SendMessage(msgs[0:(len(msgs)-1)], errMsg)
				}
			}
		}

		// 如果安排的suiside, 则需要处理 suiside的时间
		isAliveLock.RLock()
		isAlive1 := isAlive
		isAliveLock.RUnlock()

		if !isAlive1 {
			if hasValidMsg {
				suideTime = time.Now().Add(time.Second * 3)
			} else {
				if time.Now().After(suideTime) {
					log.Println(utils.Green("Load Balance Suiside Gracefully"))
					break
				}
			}
		}

		// 心跳同步
		select {
		case <-heartbeat_at:
			now := time.Now()

			// 给workerQueue中的所有的worker发送心跳消息
			for _, worker := range workersQueue.WorkerQueue {
				if worker.Expire.After(now) {
					//					log.Println("Sending Hb to Worker: ", worker.Identity)
					backend.SendMessage(worker.Identity, "", PPP_HEARTBEAT_STR)
				}
			}

			workersQueue.PurgeExpired()
		case sig := <-ch:
			isAliveLock.Lock()
			isAlive1 := isAlive
			isAlive = false
			isAliveLock.Unlock()

			if isAlive1 {
				// 准备退出(但是需要处理完毕手上的活)

				// 需要退出:
				topo.DeleteServiceEndPoint(serviceName, lbServiceName)

				if sig == syscall.SIGKILL {
					log.Println(utils.Red("Got Kill Signal, Return Directly"))
					break
				} else {
					suideTime = time.Now().Add(time.Second * 3)
					log.Println(utils.Red("Schedule to suicide at: "), suideTime.Format("@2006-01-02 15:04:05"))
				}
			}
		default:
		}
	}
}
Exemple #24
0
func (p *ThriftLoadBalanceServer) Run() {
	//	// 1. 创建到zk的连接

	// 127.0.0.1:5555 --> 127_0_0_1:5555

	exitSignal := make(chan os.Signal, 1)

	signal.Notify(exitSignal, syscall.SIGTERM, syscall.SIGINT, syscall.SIGKILL)
	// syscall.SIGKILL
	// kill -9 pid
	// kill -s SIGKILL pid 还是留给运维吧
	//

	// 注册服务
	evtExit := make(chan interface{})
	serviceEndpoint := RegisterService(p.serviceName, p.frontendAddr, p.lbServiceName,
		p.topo, evtExit, p.config.WorkDir, p.config.CodeUrlVersion)

	//	var suideTime time.Time

	//	isAlive := true

	// 3. 读取后端服务的配置
	var transport thrift.TServerTransport
	var err error

	isUnixDomain := false
	// 127.0.0.1:9999(以:区分不同的类型)
	if !strings.Contains(p.frontendAddr, ":") {
		if FileExist(p.frontendAddr) {
			os.Remove(p.frontendAddr)
		}
		transport, err = NewTServerUnixDomain(p.frontendAddr)
		isUnixDomain = true
	} else {
		transport, err = thrift.NewTServerSocket(p.frontendAddr)
	}

	if err != nil {
		log.ErrorErrorf(err, "Server Socket Create Failed: %v", err)
		panic(fmt.Sprintf("Invalid FrontendAddress: %s", p.frontendAddr))
	}

	err = transport.Listen()
	if err != nil {
		log.ErrorErrorf(err, "Server Socket Create Failed: %v", err)
		panic(fmt.Sprintf("Binding Error FrontendAddress: %s", p.frontendAddr))
	}

	ch := make(chan thrift.TTransport, 4096)
	defer close(ch)

	// 强制退出? TODO: Graceful退出
	go func() {
		<-exitSignal

		// 通知RegisterService终止循环
		evtExit <- true
		log.Info(Green("Receive Exit Signals...."))
		serviceEndpoint.DeleteServiceEndpoint(p.topo)

		start := time.Now().Unix()
		for true {
			// 如果5s内没有接受到新的请求了,则退出
			now := time.Now().Unix()
			if now-p.lastRequestTime.Get() > 5 {
				log.Printf(Red("[%s]Graceful Exit..."), p.serviceName)
				break
			} else {
				log.Printf(Cyan("[%s]Sleeping %d seconds before Exit...\n"),
					p.serviceName, now-start)
				time.Sleep(time.Second)
			}
		}

		transport.Interrupt()
		transport.Close()
	}()

	go func() {
		var address string
		for c := range ch {
			// 为每个Connection建立一个Session
			socket, ok := c.(SocketAddr)

			if ok {
				if isUnixDomain {
					address = p.frontendAddr
				} else {
					address = socket.Addr().String()
				}
			} else {
				address = "unknow"
			}
			x := NewNonBlockSession(c, address, p.verbose, &p.lastRequestTime)
			// Session独立处理自己的请求
			go x.Serve(p.backendService, 1000)
		}
	}()

	// Accept什么时候出错,出错之后如何处理呢?
	for {
		c, err := transport.Accept()
		if err != nil {
			close(ch)
			break
		} else {
			ch <- c
		}
	}
}
Exemple #25
0
//
// go test github.com/wfxiang08/rpc_proxy/proxy -v -run "TestSession"
//
func TestSession(t *testing.T) {
	// 作为一个Server
	transport, err := thrift.NewTServerSocket("127.0.0.1:0")
	assert.NoError(t, err)
	err = transport.Open() // 打开Transport
	assert.NoError(t, err)

	defer transport.Close()

	err = transport.Listen() // 开始监听
	assert.NoError(t, err)

	addr := transport.Addr().String()

	fmt.Println("Addr: ", addr)

	var requestNum int32 = 10
	requests := make([]*Request, 0, requestNum)

	var i int32
	for i = 0; i < requestNum; i++ {
		buf := make([]byte, 100, 100)
		l := fakeData("Hello", thrift.CALL, i+1, buf[0:0])
		buf = buf[0:l]

		req := NewRequest(buf, true)

		req.Wait.Add(1) // 因为go routine可能还没有执行,代码就跑到最后面进行校验了

		assert.Equal(t, i+1, req.Request.SeqId, "Request SeqId是否靠谱")

		requests = append(requests, req)
	}
	go func() {
		// 模拟请求:
		// 客户端代码
		bc := NewBackendConn(addr, nil, "test", true)
		bc.currentSeqId = 10

		// 准备发送数据
		var i int32
		for i = 0; i < requestNum; i++ {
			fmt.Println("Sending Request to Backend Conn", i)
			bc.PushBack(requests[i])

			requests[i].Wait.Done()
		}

		// 需要等待数据返回?
		time.Sleep(time.Second * 2)
	}()

	server := &fakeServer{}
	go func() {
		// 服务器端代码
		tran, err := transport.Accept()
		defer tran.Close()
		if err != nil {
			log.ErrorErrorf(err, "Error: %v\n", err)
		}
		assert.NoError(t, err)

		session := NewSession(tran, "", true)

		session.Serve(server, 6)

		time.Sleep(time.Second * 2)
	}()

	for i = 0; i < requestNum; i++ {
		fmt.Println("===== Before Wait")
		requests[i].Wait.Wait()
		fmt.Println("===== Before After Wait")

		log.Printf("Request: %d, .....", i)
		assert.Equal(t, len(requests[i].Response.Data), len(requests[i].Request.Data))
	}
}
//
// 如何处理后端服务的变化呢?
//
func (s *BackService) WatchBackServiceNodes() {
	s.evtbus = make(chan interface{}, 2)
	servicePath := s.topo.ProductServicePath(s.serviceName)

	go func() {
		for !s.stop.Get() {
			serviceIds, err := s.topo.WatchChildren(servicePath, s.evtbus)

			if err == nil {
				// 如何监听endpoints的变化呢?
				addressMap := make(map[string]bool, len(serviceIds))

				for _, serviceId := range serviceIds {
					log.Printf(Green("---->Find Endpoint: %s for Service: %s"), serviceId, s.serviceName)
					endpointInfo, err := GetServiceEndpoint(s.topo, s.serviceName, serviceId)

					if err != nil {
						log.ErrorErrorf(err, "Service Endpoint Read Error: %v\n", err)
					} else {

						log.Printf(Green("---->Add endpoint %s To Service %s"),
							endpointInfo.Frontend, s.serviceName)

						if strings.Contains(endpointInfo.Frontend, ":") {
							addressMap[endpointInfo.Frontend] = true
						} else if s.productName == TEST_PRODUCT_NAME {
							// unix domain socket只在测试的时候可以使用(因为不能实现跨机器访问)
							addressMap[endpointInfo.Frontend] = true
						}
					}
				}

				for addr, _ := range addressMap {
					conn, ok := s.addr2Conn[addr]
					if ok && !conn.IsMarkOffline.Get() {
						continue
					} else {
						// 创建新的连接(心跳成功之后就自动加入到 s.activeConns 中
						s.addr2Conn[addr] = NewBackendConn(addr, s, s.serviceName, s.verbose)
					}
				}

				for addr, conn := range s.addr2Conn {
					_, ok := addressMap[addr]
					if !ok {
						conn.MarkOffline()

						// 删除: 然后等待Conn自生自灭
						delete(s.addr2Conn, addr)
					}
				}

				// 等待事件
				<-s.evtbus
			} else {
				log.WarnErrorf(err, "zk read failed: %s", servicePath)
				// 如果读取失败则,则继续等待5s
				time.Sleep(time.Duration(5) * time.Second)
			}

		}
	}()
}
//
// go test github.com/wfxiang08/rpc_proxy/proxy -v -run "TestBackend"
//
func TestBackend(t *testing.T) {

	// 作为一个Server
	transport, err := thrift.NewTServerSocket("127.0.0.1:0")
	assert.NoError(t, err)
	err = transport.Open() // 打开Transport
	assert.NoError(t, err)

	defer transport.Close()

	err = transport.Listen() // 开始监听
	assert.NoError(t, err)

	addr := transport.Addr().String()

	fmt.Println("Addr: ", addr)

	var requestNum int32 = 10

	requests := make([]*Request, 0, requestNum)

	var i int32
	for i = 0; i < requestNum; i++ {
		buf := make([]byte, 100, 100)
		l := fakeData("Hello", thrift.CALL, i+1, buf[0:0])
		buf = buf[0:l]

		req := NewRequest(buf, false)

		req.Wait.Add(1) // 因为go routine可能还没有执行,代码就跑到最后面进行校验了

		assert.Equal(t, i+1, req.Request.SeqId, "Request SeqId是否靠谱")

		requests = append(requests, req)
	}

	go func() {
		// 客户端代码
		bc := NewBackendConn(addr, nil, "test", true)
		bc.currentSeqId = 10

		// 准备发送数据
		var i int32
		for i = 0; i < requestNum; i++ {
			fmt.Println("Sending Request to Backend Conn", i)
			bc.PushBack(requests[i])

			requests[i].Wait.Done()
		}

		// 需要等待数据返回?
		time.Sleep(time.Second * 2)

	}()

	go func() {
		// 服务器端代码
		tran, err := transport.Accept()
		if err != nil {
			log.ErrorErrorf(err, "Error: %v\n", err)
		}
		assert.NoError(t, err)

		bt := NewTBufferedFramedTransport(tran, time.Microsecond*100, 2)

		// 在当前的这个t上读写数据
		var i int32
		for i = 0; i < requestNum; i++ {
			request, err := bt.ReadFrame()
			assert.NoError(t, err)

			req := NewRequest(request, false)
			assert.Equal(t, req.Request.SeqId, i+10)
			fmt.Printf("Server Got Request, and SeqNum OK, Id: %d, Frame Size: %d\n", i, len(request))

			// 回写数据
			bt.Write(request)
			bt.FlushBuffer(true)

		}

		tran.Close()
	}()

	fmt.Println("Requests Len: ", len(requests))
	for idx, r := range requests {
		r.Wait.Wait()

		// r 原始的请求
		req := NewRequest(r.Response.Data, false)

		log.Printf(Green("SeqMatch[%d]: Orig: %d, Return: %d\n"), idx, req.Request.SeqId, r.Request.SeqId)
		assert.Equal(t, req.Request.SeqId, r.Request.SeqId)
	}
	log.Println("OK")
}
Exemple #28
0
func RpcMain(binaryName string, serviceDesc string, configCheck ConfigCheck,
	serverFactory ServerFactorory, buildDate string, gitVersion string) {

	// 1. 准备解析参数
	usage = fmt.Sprintf(usage, binaryName, binaryName)

	version := fmt.Sprintf("Version: %s\nBuildDate: %s\nDesc: %s\nAuthor: [email protected]", gitVersion, buildDate, serviceDesc)
	args, err := docopt.Parse(usage, nil, true, version, true)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}

	if s, ok := args["-V"].(bool); ok && s {
		fmt.Println(Green(version))
		os.Exit(1)
	}

	// 这就是为什么 Codis 傻乎乎起一个 http server的目的
	if s, ok := args["--profile-addr"].(string); ok && len(s) > 0 {
		go func() {
			log.Printf(Red("Profile Address: %s"), s)
			log.Println(http.ListenAndServe(s, nil))
		}()
	}

	// 2. 解析Log相关的配置
	log.SetLevel(log.LEVEL_INFO)

	var maxKeepDays int = 3
	if s, ok := args["--log-keep-days"].(string); ok && s != "" {
		v, err := strconv.ParseInt(s, 10, 32)
		if err != nil {
			log.PanicErrorf(err, "invalid max log file keep days = %s", s)
		}
		maxKeepDays = int(v)
	}

	// set output log file
	if s, ok := args["-L"].(string); ok && s != "" {
		f, err := log.NewRollingFile(s, maxKeepDays)
		if err != nil {
			log.PanicErrorf(err, "open rolling log file failed: %s", s)
		} else {
			defer f.Close()
			log.StdLog = log.New(f, "")
		}
	}
	log.SetLevel(log.LEVEL_INFO)
	log.SetFlags(log.Flags() | log.Lshortfile)

	// set log level
	if s, ok := args["--log-level"].(string); ok && s != "" {
		SetLogLevel(s)
	}

	// 没有就没有
	workDir, _ := args["--work-dir"].(string)
	codeUrlVersion, _ := args["--code-url-version"].(string)
	if len(workDir) == 0 {
		workDir, _ = os.Getwd()
	}

	log.Printf("WorkDir: %s, CodeUrl: %s, Wd: %s", workDir, codeUrlVersion)

	// 3. 解析Config
	configFile := args["-c"].(string)
	conf, err := utils.LoadConf(configFile)
	if err != nil {
		log.PanicErrorf(err, "load config failed")
	}

	// 额外的配置信息
	conf.WorkDir = workDir
	conf.CodeUrlVersion = codeUrlVersion

	if configCheck != nil {
		configCheck(conf)
	} else {
		log.Panic("No Config Check Given")
	}
	// 每次启动的时候都打印版本信息
	log.Infof(Green("-----------------\n%s\n--------------------------------------------------------------------"), version)

	// 启动服务
	server := serverFactory(conf)
	server.Run()
}
Exemple #29
0
func (s *BackServiceLB) run() {
	go func() {
		// 定时汇报当前的状态
		for true {
			log.Printf(Green("[Report]: %s --> %d workers, coroutine: %d"),
				s.serviceName, s.Active(), runtime.NumGoroutine())
			time.Sleep(time.Second * 10)
		}
	}()

	var transport thrift.TServerTransport
	var err error

	// 3. 读取后端服务的配置
	isUnixDomain := false
	// 127.0.0.1:9999(以:区分不同的类型)
	if !strings.Contains(s.backendAddr, ":") {
		if FileExist(s.backendAddr) {
			os.Remove(s.backendAddr)
		}
		transport, err = NewTServerUnixDomain(s.backendAddr)
		isUnixDomain = true
	} else {
		transport, err = thrift.NewTServerSocket(s.backendAddr)
	}

	if err != nil {
		log.ErrorErrorf(err, "[%s]Server Socket Create Failed: %v", s.serviceName, err)
		panic("BackendAddr Invalid")
	}

	err = transport.Listen()
	if err != nil {
		log.ErrorErrorf(err, "[%s]Server Socket Open Failed: %v", s.serviceName, err)
		panic("Server Socket Open Failed")
	}

	// 和transport.open做的事情一样,如果Open没错,则Listen也不会有问题

	log.Printf(Green("[%s]LB Backend Services listens at: %s"), s.serviceName, s.backendAddr)

	s.ch = make(chan thrift.TTransport, 4096)

	// 强制退出? TODO: Graceful退出
	go func() {
		<-s.exitEvt
		log.Info(Red("Receive Exit Signals...."))
		transport.Interrupt()
		transport.Close()
	}()

	go func() {
		var backendAddr string
		for trans := range s.ch {
			// 为每个Connection建立一个Session
			socket, ok := trans.(SocketAddr)
			if ok {
				if isUnixDomain {
					backendAddr = s.backendAddr
				} else {
					backendAddr = socket.Addr().String()
				}

				conn := NewBackendConnLB(trans, s.serviceName, backendAddr, s, s.verbose)

				// 因为连接刚刚建立,可靠性还是挺高的,因此直接加入到列表中
				s.activeConnsLock.Lock()
				conn.Index = len(s.activeConns)
				s.activeConns = append(s.activeConns, conn)
				s.activeConnsLock.Unlock()

				log.Printf(Green("%s --> %d workers"), s.serviceName, conn.Index)
			} else {
				panic("Invalid Socket Type")
			}

		}
	}()

	// Accept什么时候出错,出错之后如何处理呢?
	go func() {
		for {
			c, err := transport.Accept()
			if err != nil {
				return
			} else {
				s.ch <- c
			}
		}
	}()
}
Exemple #30
0
func (s *Session) Close() error {
	s.closed.Set(true)
	log.Printf(Red("Close Proxy Session"))
	return s.TBufferedFramedTransport.Close()
}