func (p *fakeServer) Dispatch(r *Request) error { log.Printf("Request SeqId: %d, MethodName: %s\n", r.Request.SeqId, r.Request.Name) r.Wait.Add(1) go func() { time.Sleep(time.Millisecond) r.Response.Data = []byte(string(r.Request.Data)) typeId, _, seqId, _ := DecodeThriftTypIdSeqId(r.Response.Data) log.Printf(Green("TypeId: %d, SeqId: %d\n"), typeId, seqId) r.Wait.Done() }() // r.RestoreSeqId() // r.Wait.Done() return nil }
// // 确保Socket成功连接到后端服务器 // func (bc *BackendConn) ensureConn() (transport thrift.TTransport, err error) { // 1. 创建连接(只要IP没有问题, err一般就是空) timeout := time.Second * 5 if strings.Contains(bc.addr, ":") { transport, err = thrift.NewTSocketTimeout(bc.addr, timeout) } else { transport, err = rpc_utils.NewTUnixDomainTimeout(bc.addr, timeout) } log.Printf(Cyan("[%s]Create Socket To: %s"), bc.service, bc.addr) if err != nil { log.ErrorErrorf(err, "[%s]Create Socket Failed: %v, Addr: %s", err, bc.service, bc.addr) // 连接不上,失败 return nil, err } // 2. 只要服务存在,一般不会出现err sleepInterval := 1 err = transport.Open() for err != nil && !bc.IsMarkOffline.Get() { log.ErrorErrorf(err, "[%s]Socket Open Failed: %v, Addr: %s", bc.service, err, bc.addr) // Sleep: 1, 2, 4这几个间隔 time.Sleep(time.Duration(sleepInterval) * time.Second) if sleepInterval < 4 { sleepInterval *= 2 } err = transport.Open() } return transport, err }
// 配对 Request, resp, err // PARAM: resp []byte 为一帧完整的thrift数据包 func (bc *BackendConnLB) setResponse(r *Request, data []byte, err error) error { // log.Printf("#setResponse: data: %v", data) // 表示出现错误了 if data == nil { log.Printf("No Data From Server, error: %v\n", err) r.Response.Err = err } else { // 从resp中读取基本的信息 typeId, method, seqId, err := DecodeThriftTypIdSeqId(data) // 解码错误,直接报错 if err != nil { log.ErrorErrorf(err, "Decode SeqId Error: %v", err) return err } if typeId == MESSAGE_TYPE_STOP { // 不再接受新的输入 // 直接来自后端的服务(不遵循: Request/Reply模型) bc.MarkConnActiveFalse() return nil } // 找到对应的Request req := bc.seqNumRequestMap.Pop(seqId) // 如果是心跳,则OK if typeId == MESSAGE_TYPE_HEART_BEAT { bc.hbLastTime.Set(time.Now().Unix()) return nil } if req == nil { log.Errorf("#setResponse not found, seqId: %d", seqId) return nil } else { if req.Response.SeqId != seqId { log.Errorf("Data From Server, SeqId not match, Ex: %d, Ret: %d", req.Request.SeqId, seqId) } r = req r.Response.TypeId = typeId if req.Request.Name != method { data = nil err = req.NewInvalidResponseError(method, "conn_lb") } } } r.Response.Data, r.Response.Err = data, err // 还原SeqId if data != nil { r.RestoreSeqId() } r.Wait.Done() return err }
func (s *BackService) StateChanged(conn *BackendConn) { // log.Printf(Cyan("[%s]StateChanged: %s, Index: %d, Count: %d, IsConnActive: %t"), // s.serviceName, conn.addr, conn.Index, len(s.activeConns), // conn.IsConnActive.Get()) s.activeConnsLock.Lock() defer s.activeConnsLock.Unlock() if conn.IsConnActive.Get() { // 上线: BackendConn log.Printf(Cyan("[%s]MarkConnActiveOK: %s, Index: %d, Count: %d"), s.serviceName, conn.addr, conn.Index, len(s.activeConns)) if conn.Index == INVALID_ARRAY_INDEX { conn.Index = len(s.activeConns) s.activeConns = append(s.activeConns, conn) log.Printf(Green("[%s]Add BackendConn to activeConns: %s, Total Actives: %d"), s.serviceName, conn.Addr(), len(s.activeConns)) } } else { // 下线BackendConn(急速执行 connIndex := conn.Index if conn.Index != INVALID_ARRAY_INDEX { lastIndex := len(s.activeConns) - 1 // 将最后一个元素和当前的元素交换位置 if lastIndex != conn.Index { lastConn := s.activeConns[lastIndex] s.activeConns[conn.Index] = lastConn lastConn.Index = conn.Index } s.activeConns[lastIndex] = nil conn.Index = INVALID_ARRAY_INDEX // slice s.activeConns = s.activeConns[0:lastIndex] log.Printf(Red("[%s]Remove BackendConn From activeConns: %s, Remains: %d"), s.serviceName, conn.Addr(), len(s.activeConns)) } log.Printf(Red("[%s]Remove BackendConn From activeConns: %s, Index: %d"), s.serviceName, conn.Addr(), connIndex) } }
// // 后端如何处理一个Request // func (s *Router) Dispatch(r *Request) error { backService := s.GetBackService(r.Service) if backService == nil { log.Printf(Cyan("Service Not Found for: %s.%s\n"), r.Service, r.Request.Name) r.Response.Data = GetServiceNotFoundData(r) return nil } else { return backService.HandleRequest(r) } }
// // MarkOffline发生场景: // 1. 后端服务即将下线,预先通知 // 2. 后端服务已经挂了,zk检测到 // // BackendConn 在这里暂时理解关闭conn, 而是从 backend_service_proxy中下线当前的conn, // 然后conn的关闭根据 心跳&Conn的读写异常来判断; 因此 IsConnActive = false 情况下,心跳不能关闭 // func (bc *BackendConn) MarkOffline() { if !bc.IsMarkOffline.Get() { log.Printf(Magenta("[%s]BackendConn: %s MarkOffline"), bc.service, bc.addr) bc.IsMarkOffline.Set(true) // 不再接受(来自backend_service_proxy的)新的输入 bc.MarkConnActiveFalse() close(bc.input) } }
func (bc *BackendConn) MarkConnActiveFalse() { if bc.IsConnActive.Get() { // 从Active切换到非正常状态 bc.IsConnActive.Set(false) if bc.delegate != nil { bc.delegate.StateChanged(bc) // 通知其他人状态出现问题 } // 日志延后, 控制信息尽快生效 log.Printf(Red("[%s]MarkConnActiveFalse: %s, %p"), bc.service, bc.addr, bc.delegate) } }
// run之间 transport刚刚建立,因此服务的可靠性比较高 func (bc *BackendConnLB) Run() { log.Printf(Green("[%s]Add New BackendConnLB: %s"), bc.serviceName, bc.address) // 1. 首先BackendConn将当前 input中的数据写到后端服务中 err := bc.loopWriter() // 2. 从Active切换到非正常状态, 同时不再从backend_service_lb接受新的任务 // 可能出现异常,也可能正常退出(反正不干活了) bc.MarkConnActiveFalse() log.Printf(Red("[%s]Remove Faild BackendConnLB: %s"), bc.serviceName, bc.address) if err == nil { // bc.input被关闭了,应该就没有 Request 了 } else { // 如果出现err, 则将bc.input中现有的数据都flush回去(直接报错) for i := len(bc.input); i != 0; i-- { r := <-bc.input bc.setResponse(r, nil, err) } } }
func NewThriftRpcServer(config *Config, processor thrift.TProcessor) *ThriftRpcServer { log.Printf("FrontAddr: %s\n", Magenta(config.FrontendAddr)) return &ThriftRpcServer{ config: config, ZkAddr: config.ZkAddr, ProductName: config.ProductName, ServiceName: config.Service, FrontendAddr: config.FrontendAddr, Verbose: config.Verbose, Processor: processor, } }
func NewNonBlockSessionSize(c thrift.TTransport, address string, verbose bool, lastRequestTime *atomic2.Int64, bufsize int, timeout int) *NonBlockSession { s := &NonBlockSession{ RemoteAddress: address, lastRequestTime: lastRequestTime, verbose: verbose, TBufferedFramedTransport: NewTBufferedFramedTransport(c, time.Microsecond*100, 20), } // 还是基于c net.Conn进行读写,只是采用Redis协议进行编码解码 // Reader 处理Client发送过来的消息 // Writer 将后端服务的数据返回给Client log.Printf(Green("Session From Proxy [%s] created"), address) return s }
// // 先写入数据,然后再Flush Transport // func (p *TBufferedFramedTransport) FlushBuffer(force bool) error { size := p.Buffer.Len() // 没有有效的数据,直接返回 if size == 0 { return nil } // TODO: 待优化 force = true // 1. 将p.buf的大小以BigEndian模式写入: buf中 buf := p.LenghW[:4] binary.BigEndian.PutUint32(buf, uint32(size)) // log.Printf("----> Frame Size: %d, %v\n", size, buf) // 然后transport中先写入: 长度信息 _, err := p.Writer.Write(buf) if err != nil { return thrift.NewTTransportExceptionFromError(err) } // 2. 然后继续写入p.buf中的数据 if size > 0 { var ( n int64 err error ) // 如果 err == io.ErrShortWrite, p.Writer中也有buffer, 因此可以不用考虑异常 if n, err = p.Buffer.WriteTo(p.Writer); err != nil { log.ErrorErrorf(err, "Error Flushing Expect Write: %d, but %d\n", size, n) return thrift.NewTTransportExceptionFromError(err) } if n < int64(size) { log.Printf(Red("Buffer Write Not Finished")) } } p.nbuffered++ // Buffer重新开始处理数据 p.Buffer.Reset() // Flush Buffer return p.flushTransport(force) }
// 处理所有的等待中的请求 func (bc *BackendConnLB) flushRequests(err error) { // 告诉BackendService, 不再接受新的请求 bc.MarkConnActiveFalse() seqRequest := bc.seqNumRequestMap.Purge() for _, request := range seqRequest { if request.Request.TypeId == MESSAGE_TYPE_HEART_BEAT { // 心跳出错了,则直接直接跳过 } else { log.Printf(Red("Handle Failed Request: %s.%s"), request.Service, request.Request.Name) request.Response.Err = err request.Wait.Done() } } // 关闭输入 close(bc.input) }
// // 不断建立到后端的逻辑,负责: BackendConn#input到redis的数据的输入和返回 // func (bc *BackendConn) Run() { for k := 0; !bc.IsMarkOffline.Get(); k++ { // 1. 首先BackendConn将当前 input中的数据写到后端服务中 transport, err := bc.ensureConn() if err != nil { log.ErrorErrorf(err, "[%s]BackendConn#ensureConn error: %v", bc.service, err) return } connOver := &sync.WaitGroup{} c := NewTBufferedFramedTransport(transport, 100*time.Microsecond, 20) bc.MarkConnActiveOK() // 准备接受数据 connOver.Add(1) bc.loopReader(c, connOver) // 异步(读取来自后端服务器的返回数据) // 2. 将 bc.input 中的请求写入 后端的Rpc Server err = bc.loopWriter(c) // 同步 // 3. 停止接受Request bc.MarkConnActiveFalse() // 等待Conn正式关闭 connOver.Wait() // 4. 将bc.input中剩余的 Request直接出错处理 if err == nil { log.Printf(Red("[%s]BackendConn#loopWriter normal Exit..."), bc.service) break } else { // 对于尚未处理的Request, 直接报错 for i := len(bc.input); i != 0; i-- { r := <-bc.input bc.setResponse(r, nil, err) } } } }
// 处理来自Client的请求 func (s *Session) handleRequest(request []byte, d Dispatcher) (*Request, error) { // 构建Request if s.verbose { log.Printf("HandleRequest: %s", string(request)) } r, err := NewRequest(request, true) if err != nil { return r, err } // 增加统计 s.LastOpUnix = time.Now().Unix() s.Ops++ if r.Request.TypeId == MESSAGE_TYPE_HEART_BEAT { HandleProxyPingRequest(r) // 直接返回数据 return r, nil } // 交给Dispatch // Router return r, d.Dispatch(r) }
func (s *BackService) Stop() { // 标志停止 s.stop.Set(true) // 触发一个事件(之后ServiceNodes也不再监控) s.evtbus <- true go func() { // TODO: for true { now := time.Now().Unix() if now-s.lastRequestTime.Get() > 10 { break } else { time.Sleep(time.Second) } } for len(s.activeConns) > 0 { s.activeConns[0].MarkOffline() } log.Printf(Red("Mark All Connections Off: %s"), s.serviceName) }() }
func NewThriftLoadBalanceServer(config *Config) *ThriftLoadBalanceServer { log.Printf("FrontAddr: %s\n", Magenta(config.FrontendAddr)) // 前端对接rpc_proxy p := &ThriftLoadBalanceServer{ config: config, zkAddr: config.ZkAddr, productName: config.ProductName, serviceName: config.Service, frontendAddr: config.FrontendAddr, backendAddr: config.BackAddr, verbose: config.Verbose, exitEvt: make(chan bool), } p.topo = NewTopology(p.productName, p.zkAddr) p.lbServiceName = GetServiceIdentity(p.frontendAddr) // 后端对接: 各种python的rpc server p.backendService = NewBackServiceLB(p.serviceName, p.backendAddr, p.verbose, p.config.FalconClient, p.exitEvt) return p }
// 创建一个BackService func NewBackService(productName string, serviceName string, topo *Topology, verbose bool) *BackService { service := &BackService{ productName: productName, serviceName: serviceName, activeConns: make([]*BackendConn, 0, 10), addr2Conn: make(map[string]*BackendConn), topo: topo, verbose: verbose, } service.WatchBackServiceNodes() go func() { for !service.stop.Get() { log.Printf(Blue("[Report]: %s --> %d backservice, coroutine: %d"), service.serviceName, service.Active(), runtime.NumGoroutine()) time.Sleep(time.Second * 10) } }() return service }
func RpcMain(binaryName string, serviceDesc string, configCheck ConfigCheck, serverFactory ServerFactorory, buildDate string, gitVersion string) { // 1. 准备解析参数 usage = fmt.Sprintf(usage, binaryName, binaryName) version := fmt.Sprintf("Version: %s\nBuildDate: %s\nDesc: %s\nAuthor: [email protected]", gitVersion, buildDate, serviceDesc) args, err := docopt.Parse(usage, nil, true, version, true) if err != nil { fmt.Println(err) os.Exit(1) } if s, ok := args["-V"].(bool); ok && s { fmt.Println(Green(version)) os.Exit(1) } // 这就是为什么 Codis 傻乎乎起一个 http server的目的 if s, ok := args["--profile-addr"].(string); ok && len(s) > 0 { go func() { log.Printf(Red("Profile Address: %s"), s) log.Println(http.ListenAndServe(s, nil)) }() } // 2. 解析Log相关的配置 log.SetLevel(log.LEVEL_INFO) var maxKeepDays int = 3 if s, ok := args["--log-keep-days"].(string); ok && s != "" { v, err := strconv.ParseInt(s, 10, 32) if err != nil { log.PanicErrorf(err, "invalid max log file keep days = %s", s) } maxKeepDays = int(v) } // set output log file if s, ok := args["-L"].(string); ok && s != "" { f, err := log.NewRollingFile(s, maxKeepDays) if err != nil { log.PanicErrorf(err, "open rolling log file failed: %s", s) } else { defer f.Close() log.StdLog = log.New(f, "") } } log.SetLevel(log.LEVEL_INFO) log.SetFlags(log.Flags() | log.Lshortfile) // set log level if s, ok := args["--log-level"].(string); ok && s != "" { SetLogLevel(s) } // 没有就没有 workDir, _ := args["--work-dir"].(string) codeUrlVersion, _ := args["--code-url-version"].(string) if len(workDir) == 0 { workDir, _ = os.Getwd() } log.Printf("WorkDir: %s, CodeUrl: %s, Wd: %s", workDir, codeUrlVersion) // 3. 解析Config configFile := args["-c"].(string) conf, err := LoadConf(configFile) if err != nil { log.PanicErrorf(err, "load config failed") } // 额外的配置信息 conf.WorkDir = workDir conf.CodeUrlVersion = codeUrlVersion if configCheck != nil { configCheck(conf) } else { log.Panic("No Config Check Given") } // 每次启动的时候都打印版本信息 log.Infof(Green("-----------------\n%s\n--------------------------------------------------------------------"), version) // 启动服务 server := serverFactory(conf) server.Run() }
func (p *ThriftLoadBalanceServer) Run() { // // 1. 创建到zk的连接 // 127.0.0.1:5555 --> 127_0_0_1:5555 exitSignal := make(chan os.Signal, 1) signal.Notify(exitSignal, syscall.SIGTERM, syscall.SIGINT, syscall.SIGKILL) // syscall.SIGKILL // kill -9 pid // kill -s SIGKILL pid 还是留给运维吧 // // 注册服务 evtExit := make(chan interface{}) // 初始状态为不上线 var state atomic2.Bool state.Set(false) stateChan := make(chan bool) serviceEndpoint := RegisterService(p.serviceName, p.frontendAddr, p.lbServiceName, p.topo, evtExit, p.config.WorkDir, p.config.CodeUrlVersion, &state, stateChan) // var suideTime time.Time // isAlive := true // 3. 读取后端服务的配置 var transport thrift.TServerTransport var err error isUnixDomain := false // 127.0.0.1:9999(以:区分不同的类型) if !strings.Contains(p.frontendAddr, ":") { if rpc_utils.FileExist(p.frontendAddr) { os.Remove(p.frontendAddr) } transport, err = rpc_utils.NewTServerUnixDomain(p.frontendAddr) isUnixDomain = true } else { transport, err = thrift.NewTServerSocket(p.frontendAddr) } if err != nil { log.ErrorErrorf(err, "Server Socket Create Failed: %v", err) panic(fmt.Sprintf("Invalid FrontendAddress: %s", p.frontendAddr)) } err = transport.Listen() if err != nil { log.ErrorErrorf(err, "Server Socket Create Failed: %v", err) panic(fmt.Sprintf("Binding Error FrontendAddress: %s", p.frontendAddr)) } ch := make(chan thrift.TTransport, 4096) defer close(ch) // 等待后端服务起来 waitTicker := time.NewTicker(time.Second) // 等待上线采用的策略: // 1. 检测到有效的Worker注册之后,再等5s即可像zk注册; 避免了Worker没有连接上来,就有请求过来 // 2. 一旦注册之后,就不再使用该策略;避免服务故障时,lb频繁更新zk, 导致proxy等频繁读取zk START_WAIT: for true { select { case <-waitTicker.C: if p.backendService.Active() <= 0 { log.Infof("Sleep Waiting for back Service to Start") time.Sleep(time.Second) } else { break START_WAIT } case <-exitSignal: // 直接退出 transport.Interrupt() transport.Close() return } } log.Infof("Stop Waiting") // 停止: waitTicker, 再等等就继续了 waitTicker.Stop() time.Sleep(time.Second * 5) log.Infof("Begin to Reg To Zk...") state.Set(true) stateChan <- true // 强制退出? TODO: Graceful退出 go func() { <-exitSignal // 通知RegisterService终止循环 evtExit <- true log.Info(Green("Receive Exit Signals....")) serviceEndpoint.DeleteServiceEndpoint(p.topo) start := time.Now().Unix() for true { // 如果5s内没有接受到新的请求了,则退出 now := time.Now().Unix() if now-p.lastRequestTime.Get() > 5 { log.Printf(Red("[%s]Graceful Exit..."), p.serviceName) break } else { log.Printf(Cyan("[%s]Sleeping %d seconds before Exit...\n"), p.serviceName, now-start) time.Sleep(time.Second) } } transport.Interrupt() transport.Close() }() go func() { var address string for c := range ch { // 为每个Connection建立一个Session socket, ok := c.(rpc_utils.SocketAddr) if ok { if isUnixDomain { address = p.frontendAddr } else { address = socket.Addr().String() } } else { address = "unknow" } x := NewNonBlockSession(c, address, p.verbose, &p.lastRequestTime) // Session独立处理自己的请求 go x.Serve(p.backendService, 1000) } }() // Accept什么时候出错,出错之后如何处理呢? for { c, err := transport.Accept() if err != nil { close(ch) break } else { ch <- c } } }
// // go test proxy -v -run "TestSession" // func TestSession(t *testing.T) { // 作为一个Server transport, err := thrift.NewTServerSocket("127.0.0.1:0") err = transport.Open() // 打开Transport defer transport.Close() err = transport.Listen() // 开始监听 assert.NoError(t, err) addr := transport.Addr().String() fmt.Println("Addr: ", addr) // 1. Fake Requests var requestNum int32 = 10 requests := make([]*Request, 0, requestNum) var i int32 for i = 0; i < requestNum; i++ { buf := make([]byte, 100, 100) l := fakeData("Hello", thrift.CALL, i+1, buf[0:0]) buf = buf[0:l] req, _ := NewRequest(buf, true) req.Wait.Add(1) // 因为go routine可能还没有执行,代码就跑到最后面进行校验了 assert.Equal(t, i+1, req.Request.SeqId, "Request SeqId是否靠谱") requests = append(requests, req) } // 2. 将请求交给BackendConn go func() { // 模拟请求: // 客户端代码 bc := NewBackendConn(addr, nil, "test", true) bc.currentSeqId = 10 // 上线 BackendConn bc.IsConnActive.Set(true) // 准备发送数据 var i int32 for i = 0; i < requestNum; i++ { fmt.Println("Sending Request to Backend Conn", i) bc.PushBack(requests[i]) requests[i].Wait.Done() } // 需要等待数据返回? time.Sleep(time.Second * 2) }() server := &fakeServer{} go func() { // 服务器端代码 tran, err := transport.Accept() defer tran.Close() if err != nil { log.ErrorErrorf(err, "Error: %v\n", err) } assert.NoError(t, err) // 建立一个长连接, 同上面的: NewBackendConn通信 session := NewSession(tran, "", true) session.Serve(server, 6) time.Sleep(time.Second * 2) }() for i = 0; i < requestNum; i++ { fmt.Println("===== Before Wait") requests[i].Wait.Wait() fmt.Println("===== Before After Wait") log.Printf("Request: %d, .....", i) assert.Equal(t, len(requests[i].Request.Data), len(requests[i].Response.Data)) } }
func (p *ThriftRpcServer) Run() { // // 1. 创建到zk的连接 p.Topo = NewTopology(p.ProductName, p.ZkAddr) // 127.0.0.1:5555 --> 127_0_0_1:5555 lbServiceName := GetServiceIdentity(p.FrontendAddr) exitSignal := make(chan os.Signal, 1) signal.Notify(exitSignal, syscall.SIGTERM, syscall.SIGINT, syscall.SIGKILL) // syscall.SIGKILL // kill -9 pid // kill -s SIGKILL pid 还是留给运维吧 StartTicker(p.config.FalconClient, p.ServiceName) // 初始状态为不上线 var state atomic2.Bool state.Set(false) stateChan := make(chan bool) // 注册服务 evtExit := make(chan interface{}) endpoint := RegisterService(p.ServiceName, p.FrontendAddr, lbServiceName, p.Topo, evtExit, p.config.WorkDir, p.config.CodeUrlVersion, &state, stateChan) // 3. 读取"前端"的配置 var transport thrift.TServerTransport var err error isUnixDomain := false // 127.0.0.1:9999(以:区分不同的类型) if !strings.Contains(p.FrontendAddr, ":") { if rpc_utils.FileExist(p.FrontendAddr) { os.Remove(p.FrontendAddr) } transport, err = rpc_utils.NewTServerUnixDomain(p.FrontendAddr) isUnixDomain = true } else { transport, err = thrift.NewTServerSocket(p.FrontendAddr) } if err != nil { log.ErrorErrorf(err, Red("Server Socket Create Failed: %v"), err) panic(fmt.Sprintf("Invalid FrontendAddr: %s", p.FrontendAddr)) } err = transport.Listen() if err != nil { log.ErrorErrorf(err, Red("Server Socket Open Failed: %v"), err) panic(fmt.Sprintf("Server Socket Open Failed: %s", p.FrontendAddr)) } ch := make(chan thrift.TTransport, 4096) defer close(ch) // 强制退出? TODO: Graceful退出 go func() { <-exitSignal evtExit <- true log.Info(Magenta("Receive Exit Signals....")) endpoint.DeleteServiceEndpoint(p.Topo) // 等待 start := time.Now().Unix() for true { // 如果5s内没有接受到新的请求了,则退出 now := time.Now().Unix() if now-p.lastRequestTime.Get() > 5 { log.Info(Red("Graceful Exit...")) break } else { log.Printf(Cyan("Sleeping %d seconds\n"), now-start) time.Sleep(time.Second) } } transport.Interrupt() transport.Close() }() go func() { var address string for c := range ch { // 为每个Connection建立一个Session socket, ok := c.(rpc_utils.SocketAddr) if ok { if isUnixDomain { address = p.FrontendAddr } else { address = socket.Addr().String() } } else { address = "unknow" } x := NewNonBlockSession(c, address, p.Verbose, &p.lastRequestTime) // Session独立处理自己的请求 go x.Serve(p, 1000) } }() // 准备上线服务 state.Set(true) stateChan <- true // Accept什么时候出错,出错之后如何处理呢? for { c, err := transport.Accept() if err != nil { break } else { ch <- c } } }
func (s *Session) Close() error { log.Printf(Red("Close Proxy Session")) return s.TBufferedFramedTransport.Close() }
// // 将 bc.input 中的Request写入后端的服务器 // func (bc *BackendConn) loopWriter(c *TBufferedFramedTransport) error { defer func() { // 关闭心跳的Ticker bc.hbTicker.Stop() bc.hbTicker = nil }() var r *Request var ok bool // 准备HB Ticker bc.hbTicker = time.NewTicker(time.Second) bc.hbLastTime.Set(time.Now().Unix()) for true { // 等待输入的Event, 或者 heartbeatTimeout select { case <-bc.hbTicker.C: if time.Now().Unix()-bc.hbLastTime.Get() > HB_TIMEOUT { return errors.New(fmt.Sprintf("[%s]HB timeout", bc.service)) } else { // 定时添加Ping的任务; 如果标记下线,则不在心跳 if !bc.IsMarkOffline.Get() { // 发送心跳信息 r := NewPingRequest() bc.PushBack(r) // 同时检测当前的异常请求 expired := microseconds() - REQUEST_EXPIRED_TIME_MICRO // 以microsecond为单位 // microseconds() - request.Start > REQUEST_EXPIRED_TIME_MICRO // 超时: microseconds() - REQUEST_EXPIRED_TIME_MICRO > request.Start bc.seqNumRequestMap.RemoveExpired(expired) } } case r, ok = <-bc.input: if !ok { return nil } else { // // 如果暂时没有数据输入,则p策略可能就有问题了 // 只有写入数据,才有可能产生flush; 如果是最后一个数据必须自己flush, 否则就可能无限期等待 // if r.Request.TypeId == MESSAGE_TYPE_HEART_BEAT { // 过期的HB信号,直接放弃 if time.Now().Unix()-r.Start > 4 { log.Printf(Magenta("Expired HB Signal")) } } // 请求正常转发给后端的Rpc Server var flush = len(bc.input) == 0 // 1. 替换新的SeqId r.ReplaceSeqId(bc.currentSeqId) bc.IncreaseCurrentSeqId() // 2. 主动控制Buffer的flush // 先记录SeqId <--> Request, 再发送请求 // 否则: 请求从后端返回,记录还没有完成,就容易导致Request丢失 bc.seqNumRequestMap.Add(r.Response.SeqId, r) // 2. 主动控制Buffer的flush c.Write(r.Request.Data) err := c.FlushBuffer(flush) if err == nil { log.Debugf("--> SeqId: %d vs. %d To Backend", r.Request.SeqId, r.Response.SeqId) } else { bc.seqNumRequestMap.Pop(r.Response.SeqId) // 如果写错了,在删除 // 进入不可用状态(不可用状态下,通过自我心跳进入可用状态) return bc.setResponse(r, nil, err) } } } } return nil }
// // 两参数是必须的: ProductName, zkAddress, frontAddr可以用来测试 // func (p *ProxyServer) Run() { var transport thrift.TServerTransport var err error log.Printf(Magenta("Start Proxy at Address: %s"), p.proxyAddr) // 读取后端服务的配置 isUnixDomain := false if !strings.Contains(p.proxyAddr, ":") { if rpc_utils.FileExist(p.proxyAddr) { os.Remove(p.proxyAddr) } transport, err = rpc_utils.NewTServerUnixDomain(p.proxyAddr) isUnixDomain = true } else { transport, err = thrift.NewTServerSocket(p.proxyAddr) } if err != nil { log.ErrorErrorf(err, "Server Socket Create Failed: %v, Front: %s", err, p.proxyAddr) } // 开始监听 // transport.Open() transport.Listen() ch := make(chan thrift.TTransport, 4096) defer close(ch) defer func() { log.Infof(Red("==> Exit rpc_proxy")) if err := recover(); err != nil { log.Infof("Error rpc_proxy: %s", err) } }() go func() { var address string for c := range ch { // 为每个Connection建立一个Session socket, ok := c.(rpc_utils.SocketAddr) if isUnixDomain { address = p.proxyAddr } else if ok { address = socket.Addr().String() } else { address = "unknow" } x := NewSession(c, address, p.verbose) // Session独立处理自己的请求 go x.Serve(p.router, 1000) } }() // Accept什么时候出错,出错之后如何处理呢? for { c, err := transport.Accept() if err != nil { log.ErrorErrorf(err, "Accept Error: %v", err) break } else { ch <- c } } }
// // 如何处理后端服务的变化呢? // func (s *BackService) WatchBackServiceNodes() { s.evtbus = make(chan interface{}, 2) servicePath := s.topo.ProductServicePath(s.serviceName) go func() { for !s.stop.Get() { serviceIds, err := s.topo.WatchChildren(servicePath, s.evtbus) if err == nil { // 如何监听endpoints的变化呢? addressMap := make(map[string]bool, len(serviceIds)) for _, serviceId := range serviceIds { log.Printf(Green("---->Find Endpoint: %s for Service: %s"), serviceId, s.serviceName) endpointInfo, err := GetServiceEndpoint(s.topo, s.serviceName, serviceId) if err != nil { log.ErrorErrorf(err, "Service Endpoint Read Error: %v\n", err) } else { log.Printf(Green("---->Add endpoint %s To Service %s"), endpointInfo.Frontend, s.serviceName) if strings.Contains(endpointInfo.Frontend, ":") { addressMap[endpointInfo.Frontend] = true } else if s.productName == TEST_PRODUCT_NAME { // unix domain socket只在测试的时候可以使用(因为不能实现跨机器访问) addressMap[endpointInfo.Frontend] = true } } } for addr, _ := range addressMap { conn, ok := s.addr2Conn[addr] if ok && !conn.IsMarkOffline.Get() { continue } else { // 创建新的连接(心跳成功之后就自动加入到 s.activeConns 中 s.addr2Conn[addr] = NewBackendConn(addr, s, s.serviceName, s.verbose) } } for addr, conn := range s.addr2Conn { _, ok := addressMap[addr] if !ok { conn.MarkOffline() // 删除: 然后等待Conn自生自灭 delete(s.addr2Conn, addr) } } // 等待事件 <-s.evtbus } else { log.WarnErrorf(err, "zk read failed: %s", servicePath) // 如果读取失败则,则继续等待5s time.Sleep(time.Duration(5) * time.Second) } } }() }
// 主动调用 func StartTicker(falconClient string, service string) { // 如果没有监控配置,则直接返回 if len(falconClient) == 0 { return } log.Printf(Green("Log to falconClient: %s"), falconClient) cmdstats.histMaps = make(chan *OpStatsInfo, 5) cmdstats.ticker = time.NewTicker(time.Minute) var statsInfo *OpStatsInfo hostname := utils.Hostname() go func() { for true { statsInfo = <-cmdstats.histMaps // 准备发送 // 需要处理timeout metrics := make([]*utils.MetaData, 0, 3) t := statsInfo.timestamp.Unix() for method, stats := range statsInfo.opmap { metricCount := &utils.MetaData{ Metric: fmt.Sprintf("%s.%s.calls", service, method), Endpoint: hostname, Value: stats.Calls(), CounterType: utils.DATA_TYPE_GAUGE, Tags: EMPTY_STR, Timestamp: t, Step: 60, // 一分钟一次采样 } metricAvg := &utils.MetaData{ Metric: fmt.Sprintf("%s.%s.avgrt", service, method), Endpoint: hostname, Value: float64(stats.USecsPerCall()) * 0.001, // 单位: ms CounterType: utils.DATA_TYPE_GAUGE, Tags: EMPTY_STR, Timestamp: t, Step: 60, // 一分钟一次采样 } metrics = append(metrics, metricCount, metricAvg) } // 准备发送数据到Local Agent // 10s timeout log.Printf("Send %d Metrics....", len(metrics)) if len(metrics) > 0 { utils.SendData(metrics, falconClient, time.Second*10) } } }() go func() { // 死循环: 最终进程退出时自动被杀掉 var t time.Time for t = range cmdstats.ticker.C { // 到了指定的时间点之后将过去一分钟的统计数据转移到: cmdstats.rwlck.Lock() cmdstats.histMaps <- &OpStatsInfo{ opmap: cmdstats.opmap, timestamp: t, } cmdstats.opmap = make(map[string]*OpStats) cmdstats.rwlck.Unlock() } }() }
// // go test proxy -v -run "TestBackend" // func TestBackend(t *testing.T) { // 作为一个Server transport, err := thrift.NewTServerSocket("127.0.0.1:0") assert.NoError(t, err) err = transport.Open() // 打开Transport assert.NoError(t, err) defer transport.Close() err = transport.Listen() // 开始监听 assert.NoError(t, err) addr := transport.Addr().String() fmt.Println("Addr: ", addr) var requestNum int32 = 10 requests := make([]*Request, 0, requestNum) var i int32 for i = 0; i < requestNum; i++ { buf := make([]byte, 100, 100) l := fakeData("Hello", thrift.CALL, i+1, buf[0:0]) buf = buf[0:l] req, _ := NewRequest(buf, false) req.Wait.Add(1) // 因为go routine可能还没有执行,代码就跑到最后面进行校验了 assert.Equal(t, i+1, req.Request.SeqId, "Request SeqId是否靠谱") requests = append(requests, req) } go func() { // 客户端代码 bc := NewBackendConn(addr, nil, "test", true) bc.currentSeqId = 10 // 准备发送数据 var i int32 for i = 0; i < requestNum; i++ { fmt.Println("Sending Request to Backend Conn", i) bc.PushBack(requests[i]) requests[i].Wait.Done() } // 需要等待数据返回? time.Sleep(time.Second * 2) }() go func() { // 服务器端代码 tran, err := transport.Accept() if err != nil { log.ErrorErrorf(err, "Error: %v\n", err) } assert.NoError(t, err) bt := NewTBufferedFramedTransport(tran, time.Microsecond*100, 2) // 在当前的这个t上读写数据 var i int32 for i = 0; i < requestNum; i++ { request, err := bt.ReadFrame() assert.NoError(t, err) req, _ := NewRequest(request, false) assert.Equal(t, req.Request.SeqId, i+10) fmt.Printf("Server Got Request, and SeqNum OK, Id: %d, Frame Size: %d\n", i, len(request)) // 回写数据 bt.Write(request) bt.FlushBuffer(true) } tran.Close() }() fmt.Println("Requests Len: ", len(requests)) for idx, r := range requests { r.Wait.Wait() // r 原始的请求 req, _ := NewRequest(r.Response.Data, false) log.Printf(Green("SeqMatch[%d]: Orig: %d, Return: %d\n"), idx, req.Request.SeqId, r.Request.SeqId) assert.Equal(t, req.Request.SeqId, r.Request.SeqId) } log.Println("OK") }