// // 确保Socket成功连接到后端服务器 // func (bc *BackendConn) ensureConn() (transport thrift.TTransport, err error) { // 1. 创建连接(只要IP没有问题, err一般就是空) timeout := time.Second * 5 if strings.Contains(bc.addr, ":") { transport, err = thrift.NewTSocketTimeout(bc.addr, timeout) } else { transport, err = NewTUnixDomainTimeout(bc.addr, timeout) } log.Printf(Cyan("[%s]Create Socket To: %s"), bc.service, bc.addr) if err != nil { log.ErrorErrorf(err, "[%s]Create Socket Failed: %v, Addr: %s", err, bc.service, bc.addr) // 连接不上,失败 return nil, err } // 2. 只要服务存在,一般不会出现err sleepInterval := 1 err = transport.Open() for err != nil && !bc.IsMarkOffline.Get() { log.ErrorErrorf(err, "[%s]Socket Open Failed: %v, Addr: %s", bc.service, err, bc.addr) // Sleep: 1, 2, 4这几个间隔 time.Sleep(time.Duration(sleepInterval) * time.Second) if sleepInterval < 4 { sleepInterval *= 2 } err = transport.Open() } return transport, err }
func (s *Session) loopWriter(tasks <-chan *Request) error { // Proxy: Session ---> Client for r := range tasks { // 1. 等待Request对应的Response // 出错了如何处理呢? s.handleResponse(r) // 2. 将结果写回给Client if s.verbose { log.Printf("[%s]Session#loopWriter --> client FrameSize: %d", r.Service, len(r.Response.Data)) } // r.Response.Data ---> Client _, err := s.TBufferedFramedTransport.Write(r.Response.Data) if err != nil { log.ErrorErrorf(err, "Write back Data Error: %v", err) return err } // 3. Flush err = s.TBufferedFramedTransport.FlushBuffer(true) // len(tasks) == 0 if err != nil { log.ErrorErrorf(err, "Write back Data Error: %v", err) return err } r.Recycle() } return nil }
// // 两参数是必须的: ProductName, zkAddress, frontAddr可以用来测试 // func (p *ProxyServer) Run() { var transport thrift.TServerTransport var err error log.Printf(Magenta("Start Proxy at Address: %s"), p.proxyAddr) // 读取后端服务的配置 isUnixDomain := false if !strings.Contains(p.proxyAddr, ":") { if FileExist(p.proxyAddr) { os.Remove(p.proxyAddr) } transport, err = NewTServerUnixDomain(p.proxyAddr) isUnixDomain = true } else { transport, err = thrift.NewTServerSocket(p.proxyAddr) } if err != nil { log.ErrorErrorf(err, "Server Socket Create Failed: %v, Front: %s", err, p.proxyAddr) } // 开始监听 // transport.Open() transport.Listen() ch := make(chan thrift.TTransport, 4096) defer close(ch) go func() { var address string for c := range ch { // 为每个Connection建立一个Session socket, ok := c.(SocketAddr) if isUnixDomain { address = p.proxyAddr } else if ok { address = socket.Addr().String() } else { address = "unknow" } x := NewSession(c, address, p.verbose) // Session独立处理自己的请求 go x.Serve(p.router, 1000) } }() // Accept什么时候出错,出错之后如何处理呢? for { c, err := transport.Accept() if err != nil { log.ErrorErrorf(err, "Accept Error: %v", err) break } else { ch <- c } } }
// // 先写入数据,然后再Flush Transport // func (p *TBufferedFramedTransport) FlushBuffer(force bool) error { size := p.Buffer.Len() // 1. 将p.buf的大小以BigEndian模式写入: buf中 buf := p.LenghW[:4] binary.BigEndian.PutUint32(buf, uint32(size)) // log.Printf("----> Frame Size: %d, %v\n", size, buf) // 然后transport中先写入: 长度信息 _, err := p.Writer.Write(buf) if err != nil { return thrift.NewTTransportExceptionFromError(err) } // 2. 然后继续写入p.buf中的数据 if size > 0 { var n int64 if n, err = p.Buffer.WriteTo(p.Writer); err != nil { log.ErrorErrorf(err, "Error Flushing Expect Write: %d, but %d\n", size, n) return thrift.NewTTransportExceptionFromError(err) } } p.nbuffered++ // Buffer重新开始处理数据 p.Buffer.Reset() // Flush Buffer return p.FlushTransport(force) }
// // Client <---> Proxy[BackendConn] <---> RPC Server[包含LB] // BackConn <====> RPC Server // loopReader从RPC Server读取数据,然后根据返回的结果来设置: Client的Request的状态 // // 1. bc.flushRequest // 2. bc.setResponse // func (bc *BackendConn) loopReader(c *TBufferedFramedTransport) { go func() { defer c.Close() for true { // 读取来自后端服务的数据,通过 setResponse 转交给 前端 // client <---> proxy <-----> backend_conn <---> rpc_server // ReadFrame需要有一个度? 如果碰到EOF该如何处理呢? // io.EOF在两种情况下会出现 // resp, err := c.ReadFrame() if err != nil { err1, ok := err.(thrift.TTransportException) if !ok || err1.TypeId() != thrift.END_OF_FILE { log.ErrorErrorf(err, Red("[%s]ReadFrame From Server with Error: %v"), bc.service, err) } bc.flushRequests(err) break } else { bc.setResponse(nil, resp, err) } } }() }
// // 不断建立到后端的逻辑,负责: BackendConn#input到redis的数据的输入和返回 // func (bc *BackendConn) Run() { for k := 0; !bc.IsMarkOffline.Get(); k++ { // 1. 首先BackendConn将当前 input中的数据写到后端服务中 transport, err := bc.ensureConn() if err != nil { log.ErrorErrorf(err, "[%s]BackendConn#ensureConn error: %v", bc.service, err) return } c := NewTBufferedFramedTransport(transport, 100*time.Microsecond, 20) // 2. 将 bc.input 中的请求写入 后端的Rpc Server err = bc.loopWriter(c) // 同步 // 3. 停止接受Request bc.MarkConnActiveFalse() // 4. 将bc.input中剩余的 Request直接出错处理 if err == nil { log.Printf(Red("[%s]BackendConn#loopWriter normal Exit..."), bc.service) break } else { // 对于尚未处理的Request, 直接报错 for i := len(bc.input); i != 0; i-- { r := <-bc.input bc.setResponse(r, nil, err) } } } }
// 从Client读取数据 func (s *Session) loopReader(tasks chan<- *Request, d Dispatcher) error { if d == nil { return errors.New("nil dispatcher") } for !s.quit { // client <--> rpc // 从client读取frames request, err := s.ReadFrame() if err != nil { err1, ok := err.(thrift.TTransportException) if !ok || err1.TypeId() != thrift.END_OF_FILE { // 遇到EOF等错误,就直接结束loopReader // 结束之前需要和后端的back_conn之间处理好关系? log.ErrorErrorf(err, Red("ReadFrame Error: %v"), err) } return err } r, err := s.handleRequest(request, d) if err != nil { return err } else { if s.verbose { log.Info("Succeed Get Result") } // 将请求交给: tasks, 同一个Session中的请求是 tasks <- r } } return nil }
// // 从RPC Backend中读取结果, ReadFrame读取的是一个thrift message // 存在两种情况: // 1. 正常读取thrift message, 然后从frame解码得到seqId, 然后得到request, 结束请求 // 2. 读取错误 // 将现有的requests全部flush回去 // func (bc *BackendConnLB) loopReader(c *TBufferedFramedTransport) { go func() { defer c.Close() for true { // 坚信: EOF只有在连接被关闭的情况下才会发生,其他情况下, Read等操作被会被block住 // EOF有两种情况: // 1. 连接正常关闭,最后数据等完整读取 --> io.EOF // 2. 连接异常关闭,数据不完整 --> io.ErrUnexpectedEOF // // rpc_server ---> backend_conn frame, err := c.ReadFrame() if err != nil { err1, ok := err.(thrift.TTransportException) if !ok || err1.TypeId() != thrift.END_OF_FILE { log.ErrorErrorf(err, Red("ReadFrame From rpc_server with Error: %v\n"), err) } bc.flushRequests(err) break } else { bc.setResponse(nil, frame, err) } } }() }
func (p *ThriftLoadBalanceServer) Run() { // // 1. 创建到zk的连接 // 127.0.0.1:5555 --> 127_0_0_1:5555 exitSignal := make(chan os.Signal, 1) signal.Notify(exitSignal, syscall.SIGTERM, syscall.SIGINT, syscall.SIGKILL) // syscall.SIGKILL // kill -9 pid // kill -s SIGKILL pid 还是留给运维吧 // // 注册服务 evtExit := make(chan interface{}) serviceEndpoint := RegisterService(p.serviceName, p.frontendAddr, p.lbServiceName, p.topo, evtExit, p.config.WorkDir, p.config.CodeUrlVersion) // var suideTime time.Time // isAlive := true // 3. 读取后端服务的配置 var transport thrift.TServerTransport var err error isUnixDomain := false // 127.0.0.1:9999(以:区分不同的类型) if !strings.Contains(p.frontendAddr, ":") { if FileExist(p.frontendAddr) { os.Remove(p.frontendAddr) } transport, err = NewTServerUnixDomain(p.frontendAddr) isUnixDomain = true } else { transport, err = thrift.NewTServerSocket(p.frontendAddr) } if err != nil { log.ErrorErrorf(err, "Server Socket Create Failed: %v", err) panic(fmt.Sprintf("Invalid FrontendAddress: %s", p.frontendAddr)) } err = transport.Listen() if err != nil { log.ErrorErrorf(err, "Server Socket Create Failed: %v", err) panic(fmt.Sprintf("Binding Error FrontendAddress: %s", p.frontendAddr)) } ch := make(chan thrift.TTransport, 4096) defer close(ch) // 强制退出? TODO: Graceful退出 go func() { <-exitSignal // 通知RegisterService终止循环 evtExit <- true log.Info(Green("Receive Exit Signals....")) serviceEndpoint.DeleteServiceEndpoint(p.topo) start := time.Now().Unix() for true { // 如果5s内没有接受到新的请求了,则退出 now := time.Now().Unix() if now-p.lastRequestTime.Get() > 5 { log.Printf(Red("[%s]Graceful Exit..."), p.serviceName) break } else { log.Printf(Cyan("[%s]Sleeping %d seconds before Exit...\n"), p.serviceName, now-start) time.Sleep(time.Second) } } transport.Interrupt() transport.Close() }() go func() { var address string for c := range ch { // 为每个Connection建立一个Session socket, ok := c.(SocketAddr) if ok { if isUnixDomain { address = p.frontendAddr } else { address = socket.Addr().String() } } else { address = "unknow" } x := NewNonBlockSession(c, address, p.verbose, &p.lastRequestTime) // Session独立处理自己的请求 go x.Serve(p.backendService, 1000) } }() // Accept什么时候出错,出错之后如何处理呢? for { c, err := transport.Accept() if err != nil { close(ch) break } else { ch <- c } } }
// // go test github.com/wfxiang08/rpc_proxy/proxy -v -run "TestSession" // func TestSession(t *testing.T) { // 作为一个Server transport, err := thrift.NewTServerSocket("127.0.0.1:0") assert.NoError(t, err) err = transport.Open() // 打开Transport assert.NoError(t, err) defer transport.Close() err = transport.Listen() // 开始监听 assert.NoError(t, err) addr := transport.Addr().String() fmt.Println("Addr: ", addr) var requestNum int32 = 10 requests := make([]*Request, 0, requestNum) var i int32 for i = 0; i < requestNum; i++ { buf := make([]byte, 100, 100) l := fakeData("Hello", thrift.CALL, i+1, buf[0:0]) buf = buf[0:l] req := NewRequest(buf, true) req.Wait.Add(1) // 因为go routine可能还没有执行,代码就跑到最后面进行校验了 assert.Equal(t, i+1, req.Request.SeqId, "Request SeqId是否靠谱") requests = append(requests, req) } go func() { // 模拟请求: // 客户端代码 bc := NewBackendConn(addr, nil, "test", true) bc.currentSeqId = 10 // 准备发送数据 var i int32 for i = 0; i < requestNum; i++ { fmt.Println("Sending Request to Backend Conn", i) bc.PushBack(requests[i]) requests[i].Wait.Done() } // 需要等待数据返回? time.Sleep(time.Second * 2) }() server := &fakeServer{} go func() { // 服务器端代码 tran, err := transport.Accept() defer tran.Close() if err != nil { log.ErrorErrorf(err, "Error: %v\n", err) } assert.NoError(t, err) session := NewSession(tran, "", true) session.Serve(server, 6) time.Sleep(time.Second * 2) }() for i = 0; i < requestNum; i++ { fmt.Println("===== Before Wait") requests[i].Wait.Wait() fmt.Println("===== Before After Wait") log.Printf("Request: %d, .....", i) assert.Equal(t, len(requests[i].Response.Data), len(requests[i].Request.Data)) } }
// // 如何处理后端服务的变化呢? // func (s *BackService) WatchBackServiceNodes() { s.evtbus = make(chan interface{}, 2) servicePath := s.topo.ProductServicePath(s.serviceName) go func() { for !s.stop.Get() { serviceIds, err := s.topo.WatchChildren(servicePath, s.evtbus) if err == nil { // 如何监听endpoints的变化呢? addressMap := make(map[string]bool, len(serviceIds)) for _, serviceId := range serviceIds { log.Printf(Green("---->Find Endpoint: %s for Service: %s"), serviceId, s.serviceName) endpointInfo, err := GetServiceEndpoint(s.topo, s.serviceName, serviceId) if err != nil { log.ErrorErrorf(err, "Service Endpoint Read Error: %v\n", err) } else { log.Printf(Green("---->Add endpoint %s To Service %s"), endpointInfo.Frontend, s.serviceName) if strings.Contains(endpointInfo.Frontend, ":") { addressMap[endpointInfo.Frontend] = true } else if s.productName == TEST_PRODUCT_NAME { // unix domain socket只在测试的时候可以使用(因为不能实现跨机器访问) addressMap[endpointInfo.Frontend] = true } } } for addr, _ := range addressMap { conn, ok := s.addr2Conn[addr] if ok && !conn.IsMarkOffline.Get() { continue } else { // 创建新的连接(心跳成功之后就自动加入到 s.activeConns 中 s.addr2Conn[addr] = NewBackendConn(addr, s, s.serviceName, s.verbose) } } for addr, conn := range s.addr2Conn { _, ok := addressMap[addr] if !ok { conn.MarkOffline() // 删除: 然后等待Conn自生自灭 delete(s.addr2Conn, addr) } } // 等待事件 <-s.evtbus } else { log.WarnErrorf(err, "zk read failed: %s", servicePath) // 如果读取失败则,则继续等待5s time.Sleep(time.Duration(5) * time.Second) } } }() }
func (s *BackServiceLB) run() { go func() { // 定时汇报当前的状态 for true { log.Printf(Green("[Report]: %s --> %d workers, coroutine: %d"), s.serviceName, s.Active(), runtime.NumGoroutine()) time.Sleep(time.Second * 10) } }() var transport thrift.TServerTransport var err error // 3. 读取后端服务的配置 isUnixDomain := false // 127.0.0.1:9999(以:区分不同的类型) if !strings.Contains(s.backendAddr, ":") { if FileExist(s.backendAddr) { os.Remove(s.backendAddr) } transport, err = NewTServerUnixDomain(s.backendAddr) isUnixDomain = true } else { transport, err = thrift.NewTServerSocket(s.backendAddr) } if err != nil { log.ErrorErrorf(err, "[%s]Server Socket Create Failed: %v", s.serviceName, err) panic("BackendAddr Invalid") } err = transport.Listen() if err != nil { log.ErrorErrorf(err, "[%s]Server Socket Open Failed: %v", s.serviceName, err) panic("Server Socket Open Failed") } // 和transport.open做的事情一样,如果Open没错,则Listen也不会有问题 log.Printf(Green("[%s]LB Backend Services listens at: %s"), s.serviceName, s.backendAddr) s.ch = make(chan thrift.TTransport, 4096) // 强制退出? TODO: Graceful退出 go func() { <-s.exitEvt log.Info(Red("Receive Exit Signals....")) transport.Interrupt() transport.Close() }() go func() { var backendAddr string for trans := range s.ch { // 为每个Connection建立一个Session socket, ok := trans.(SocketAddr) if ok { if isUnixDomain { backendAddr = s.backendAddr } else { backendAddr = socket.Addr().String() } conn := NewBackendConnLB(trans, s.serviceName, backendAddr, s, s.verbose) // 因为连接刚刚建立,可靠性还是挺高的,因此直接加入到列表中 s.activeConnsLock.Lock() conn.Index = len(s.activeConns) s.activeConns = append(s.activeConns, conn) s.activeConnsLock.Unlock() log.Printf(Green("%s --> %d workers"), s.serviceName, conn.Index) } else { panic("Invalid Socket Type") } } }() // Accept什么时候出错,出错之后如何处理呢? go func() { for { c, err := transport.Accept() if err != nil { return } else { s.ch <- c } } }() }
// // go test github.com/wfxiang08/rpc_proxy/proxy -v -run "TestBackend" // func TestBackend(t *testing.T) { // 作为一个Server transport, err := thrift.NewTServerSocket("127.0.0.1:0") assert.NoError(t, err) err = transport.Open() // 打开Transport assert.NoError(t, err) defer transport.Close() err = transport.Listen() // 开始监听 assert.NoError(t, err) addr := transport.Addr().String() fmt.Println("Addr: ", addr) var requestNum int32 = 10 requests := make([]*Request, 0, requestNum) var i int32 for i = 0; i < requestNum; i++ { buf := make([]byte, 100, 100) l := fakeData("Hello", thrift.CALL, i+1, buf[0:0]) buf = buf[0:l] req := NewRequest(buf, false) req.Wait.Add(1) // 因为go routine可能还没有执行,代码就跑到最后面进行校验了 assert.Equal(t, i+1, req.Request.SeqId, "Request SeqId是否靠谱") requests = append(requests, req) } go func() { // 客户端代码 bc := NewBackendConn(addr, nil, "test", true) bc.currentSeqId = 10 // 准备发送数据 var i int32 for i = 0; i < requestNum; i++ { fmt.Println("Sending Request to Backend Conn", i) bc.PushBack(requests[i]) requests[i].Wait.Done() } // 需要等待数据返回? time.Sleep(time.Second * 2) }() go func() { // 服务器端代码 tran, err := transport.Accept() if err != nil { log.ErrorErrorf(err, "Error: %v\n", err) } assert.NoError(t, err) bt := NewTBufferedFramedTransport(tran, time.Microsecond*100, 2) // 在当前的这个t上读写数据 var i int32 for i = 0; i < requestNum; i++ { request, err := bt.ReadFrame() assert.NoError(t, err) req := NewRequest(request, false) assert.Equal(t, req.Request.SeqId, i+10) fmt.Printf("Server Got Request, and SeqNum OK, Id: %d, Frame Size: %d\n", i, len(request)) // 回写数据 bt.Write(request) bt.FlushBuffer(true) } tran.Close() }() fmt.Println("Requests Len: ", len(requests)) for idx, r := range requests { r.Wait.Wait() // r 原始的请求 req := NewRequest(r.Response.Data, false) log.Printf(Green("SeqMatch[%d]: Orig: %d, Return: %d\n"), idx, req.Request.SeqId, r.Request.SeqId) assert.Equal(t, req.Request.SeqId, r.Request.SeqId) } log.Println("OK") }
// // 数据: LB ---> backend services // // 如果input关闭,且loopWriter正常处理完毕之后,返回nil // 其他情况返回error // func (bc *BackendConnLB) loopWriter() error { // 正常情况下, ok总是为True; 除非bc.input的发送者主动关闭了channel, 表示再也没有新的Task过来了 // 参考: https://tour.golang.org/concurrency/4 // 如果input没有关闭,则会block c := NewTBufferedFramedTransport(bc.transport, 100*time.Microsecond, 20) // bc.MarkConnActiveOK() // 准备接受数据 // BackendConnLB 在构造之初就有打开的transport, 并且Active默认为OK bc.hbTicker = time.NewTicker(time.Second) defer func() { bc.hbTicker.Stop() bc.hbStop <- true }() bc.loopReader(c) // 异步 bc.Heartbeat() // 建立连接之后,就启动HB var r *Request var ok bool for true { // 等待输入的Event, 或者 heartbeatTimeout select { case r, ok = <-bc.input: if !ok { return nil } case <-bc.hbTimeout: return errors.New("HB timeout") } // 如果暂时没有数据输入,则p策略可能就有问题了 // 只有写入数据,才有可能产生flush; 如果是最后一个数据必须自己flush, 否则就可能无限期等待 // if r.Request.TypeId == MESSAGE_TYPE_HEART_BEAT { // 过期的HB信号,直接放弃 if time.Now().Unix()-r.Start > 4 { continue } else { // log.Printf(Magenta("Send Heartbeat to %s\n"), bc.Addr4Log()) } } var flush = len(bc.input) == 0 // fmt.Printf("Force flush %t\n", flush) // 1. 替换新的SeqId r.ReplaceSeqId(bc.currentSeqId) // 2. 主动控制Buffer的flush // log.Printf("Request Data Len: %d\n ", len(r.Request.Data)) c.Write(r.Request.Data) err := c.FlushBuffer(flush) if err == nil { bc.IncreaseCurrentSeqId() bc.Lock() bc.seqNum2Request[r.Response.SeqId] = r bc.Unlock() // 继续读取请求, 如果有异常,如何处理呢? } else { log.ErrorErrorf(err, "FlushBuffer Error: %v\n", err) // 进入不可用状态(不可用状态下,通过自我心跳进入可用状态) return bc.setResponse(r, nil, err) } } return nil }