// // 获取下一个可用的Worker // func (pq *PriorityQueue) NextWorker() *Worker { now := time.Now() for pq.Len() > 0 { result := (*pq)[0] if result.index != INVALID_INDEX && result.Expire.After(now) { // 只要活着,就留在优先级队列中,等待分配任务 // log.Println("Find Valid Worker...") result.priority -= 1 // 调整Worker的优先级 heap.Fix(pq, result.index) return result } else { if result.index != INVALID_INDEX { log.Errorf("Invalid Item index in PriorityQueue#NextWorker") } else { log.Println("Worker Expired") // 只有过期的元素才删除 heap.Remove(pq, result.index) } } } log.Println("Has Not Worker...") return nil }
// 创建一个BackService func NewBackService(serviceName string, poller *zmq.Poller, topo *zk.Topology) *BackService { backSockets := NewBackSockets(poller) service := &BackService{ ServiceName: serviceName, backend: backSockets, poller: poller, topo: topo, } var evtbus chan interface{} = make(chan interface{}, 2) servicePath := topo.ProductServicePath(serviceName) endpoints, err := topo.WatchChildren(servicePath, evtbus) if err != nil { log.Println("Error: ", err) panic("Reading Service List Failed: ") } go func() { for true { // 如何监听endpoints的变化呢? addrSet := make(map[string]bool) nowStr := time.Now().Format("@2006-01-02 15:04:05") for _, endpoint := range endpoints { // 这些endpoint变化该如何处理呢? log.Println(utils.Green("---->Find Endpoint: "), endpoint, "For Service: ", serviceName) endpointInfo, _ := topo.GetServiceEndPoint(serviceName, endpoint) addr, ok := endpointInfo["frontend"] if ok { addrStr := addr.(string) log.Println(utils.Green("---->Add endpoint to backend: "), addrStr, nowStr, "For Service: ", serviceName) addrSet[addrStr] = true } } service.backend.UpdateEndpointAddrs(addrSet) // 等待事件 <-evtbus // 读取数据,继续监听 endpoints, err = topo.WatchChildren(servicePath, evtbus) } }() ticker := time.NewTicker(time.Millisecond * 1000) go func() { for _ = range ticker.C { service.backend.PurgeEndpoints() } }() return service }
// // 删除Service Endpoint // func (s *ServiceEndpoint) DeleteServiceEndpoint(top *zk.Topology) { path := top.ProductServiceEndPointPath(s.Service, s.ServiceId) if ok, _ := top.Exist(path); ok { zkhelper.DeleteRecursive(top.ZkConn, path, -1) log.Println(Red("DeleteServiceEndpoint"), "Path: ", path) } }
func NewBackServices(poller *zmq.Poller, productName string, topo *zk.Topology) *BackServices { // 创建BackServices result := &BackServices{ Services: make(map[string]*BackService), OfflineServices: make(map[string]*BackService), poller: poller, topo: topo, } var evtbus chan interface{} = make(chan interface{}, 2) servicesPath := topo.ProductServicesPath() path, e1 := topo.CreateDir(servicesPath) // 保证Service目录存在,否则会报错 fmt.Println("Path: ", path, "error: ", e1) services, err := topo.WatchChildren(servicesPath, evtbus) if err != nil { log.Println("Error: ", err) // TODO: 这个地方需要优化 panic("Reading Service List Failed") } go func() { for true { result.Lock() for _, service := range services { log.Println("Service: ", service) if _, ok := result.Services[service]; !ok { result.addBackService(service) } } result.Unlock() // 等待事件 <-evtbus // 读取数据,继续监听(连接过期了就过期了,再次Watch即可) services, err = topo.WatchChildren(servicesPath, evtbus) } }() // 读取zk, 等待 log.Println("ProductName: ", result.topo.ProductName) return result }
// 创建指定的Path func (top *Topology) CreateDir(path string) (string, error) { dir := top.FullPath(path) if ok, _ := top.Exist(dir); ok { log.Println("Path Exists") return dir, nil } else { return zkhelper.CreateRecursive(top.ZkConn, dir, "", 0, zkhelper.DefaultDirACLs()) } }
// // 将消息发送到Backend上去 // func (s *BackService) HandleRequest(client_id string, msgs []string) (total int, err error, msg *[]byte) { backSocket := s.backend.NextSocket() if backSocket == nil { // 没有后端服务 if config.VERBOSE { log.Println(utils.Red("No BackSocket Found for service:"), s.ServiceName) } errMsg := GetWorkerNotFoundData(s.ServiceName, 0) return 0, nil, &errMsg } else { if config.VERBOSE { log.Println("SendMessage With: ", backSocket.Addr, "For Service: ", s.ServiceName) } total, err = backSocket.SendMessage("", client_id, "", msgs) return total, err, nil } }
// 在第一次使用时再连接 func (p *BackSocket) connect() error { var err error p.Socket, err = zmq.NewSocket(zmq.DEALER) if err == nil { // 这个Id存在问题: socketSeq += 1 p.Socket.SetIdentity(fmt.Sprintf("proxy-%d-%d", os.Getpid(), socketSeq)) p.Socket.Connect(p.Addr) // 都只看数据的输入 // 数据的输出经过异步处理,不用考虑时间的问题 p.poller.Add(p.Socket, zmq.POLLIN) log.Println("Socket Create Succeed") return nil } else { log.Println("Socket Create Failed: ", err) return err } }
func (p *BackSocket) SendMessage(parts ...interface{}) (total int, err error) { if p.Socket == nil { err := p.connect() if err != nil { log.Println("Socket Connect Failed") return 0, err } } return p.Socket.SendMessage(parts...) }
// // 设置RPC Proxy的数据: // 绑定的前端的ip/port, 例如: {"rpc_front": "tcp://127.0.0.1:5550"} // func (top *Topology) SetRpcProxyData(proxyInfo map[string]interface{}) error { path := top.FullPath("/rpc_proxy") data, err := json.Marshal(proxyInfo) if err != nil { return err } // topo.FlagEphemeral 这里的ProxyInfo是手动配置的,需要持久化 path, err = CreateOrUpdate(top.ZkConn, path, string(data), 0, zkhelper.DefaultDirACLs(), true) log.Println(green("SetRpcProxyData"), "Path: ", path, ", Error: ", err, ", Data: ", string(data)) return err }
func (pq *PPQueue) PurgeExpired() { now := time.Now() expiredWokers := make([]*Worker, 0) // 给workerQueue中的所有的worker发送心跳消息 for _, worker := range pq.WorkerQueue { if worker.Expire.Before(now) { fmt.Println("Purge Worker: ", worker.Identity, ", At Index: ", worker.index) expiredWokers = append(expiredWokers, worker) } } log.Println("expiredWokers: ", len(expiredWokers)) // 删除过期的Worker for _, worker := range expiredWokers { log.Println("Purge Worker: ", worker.Identity, ", At Index: ", worker.index) heap.Remove(&(pq.WorkerQueue), worker.index) delete(pq.id2item, worker.Identity) } log.Println("Available Workers: ", green(fmt.Sprintf("%d", len(pq.WorkerQueue)))) }
// // 将消息发送到Backend上去 // func (s *BackService) HandleRequest(req *Request) (err error) { backendConn := s.NextBackendConn() s.lastRequestTime.Set(time.Now().Unix()) if backendConn == nil { // 没有后端服务 if s.verbose { log.Println(Red("No BackSocket Found for service:"), s.serviceName) } // 从errMsg来构建异常 errMsg := GetWorkerNotFoundData(req, "BackService") req.Response.Data = errMsg return nil } else { if s.verbose { log.Println("SendMessage With: ", backendConn.Addr(), "For Service: ", s.serviceName) } backendConn.PushBack(req) return nil } }
func (conf *Config) getFrontendAddr() string { var frontendAddr = "" // 如果没有指定FrontHost, 则自动根据 IpPrefix来进行筛选, // 例如: IpPrefix: 10., 那么最终内网IP: 10.4.10.2之类的被选中 if conf.FrontHost == "" { log.Println("FrontHost: ", conf.FrontHost, ", Prefix: ", conf.IpPrefix) if conf.IpPrefix != "" { conf.FrontHost = GetIpWithPrefix(conf.IpPrefix) } } if conf.FrontPort != "" && conf.FrontHost != "" { frontendAddr = fmt.Sprintf("tcp://%s:%s", conf.FrontHost, conf.FrontPort) } return frontendAddr }
// // 读取RPC Proxy的数据: // 绑定的前端的ip/port, 例如: {"rpc_front": "tcp://127.0.0.1:5550"} // func (top *Topology) GetRpcProxyData() (proxyInfo map[string]interface{}, e error) { path := top.FullPath("/rpc_proxy") data, _, err := top.ZkConn.Get(path) log.Println("Data: ", data, ", err: ", err) if err != nil { return nil, err } proxyInfo = make(map[string]interface{}) err = json.Unmarshal(data, &proxyInfo) if err != nil { return nil, err } else { return proxyInfo, nil } }
// // 将不在: addrSet中的endPoint标记为下线 // func (p *BackSockets) UpdateEndpointAddrs(addrSet map[string]bool) { p.Lock() defer p.Unlock() var addr string for addr, _ = range addrSet { p.addEndpoint(addr) } now := time.Now().Format("@2006-01-02 15:04:05") for i := 0; i < p.Active; i++ { if _, ok := addrSet[p.Sockets[i].Addr]; !ok { log.Println(utils.Red("MarkEndpointsOffline#Mark Backend Offline: "), p.Sockets[i].Addr, now) p.markOffline(p.Sockets[i]) i-- } } }
// // 通过参数依赖,保证getFrontendAddr的调用位置(必须等待Host, IpPrefix, Port读取完毕之后) // func (conf *Config) getFrontendAddr(frontHost, ipPrefix, frontPort string) string { if conf.FrontSock != "" { return conf.FrontSock } var frontendAddr = "" // 如果没有指定FrontHost, 则自动根据 IpPrefix来进行筛选, // 例如: IpPrefix: 10., 那么最终内网IP: 10.4.10.2之类的被选中 if frontHost == "" { log.Println("FrontHost: ", frontHost, ", Prefix: ", ipPrefix) if ipPrefix != "" { frontHost = GetIpWithPrefix(ipPrefix) } } if frontPort != "" && frontHost != "" { frontendAddr = fmt.Sprintf("%s:%s", frontHost, frontPort) } return frontendAddr }
// // 注册一个服务的Endpoints // func (s *ServiceEndpoint) AddServiceEndpoint(topo *zk.Topology) error { path := topo.ProductServiceEndPointPath(s.Service, s.ServiceId) data, err := json.Marshal(s) if err != nil { return err } // 创建Service(XXX: Service本身不包含数据) zk.CreateRecursive(topo.ZkConn, os_path.Dir(path), "", 0, zkhelper.DefaultDirACLs()) // 当前的Session挂了,服务就下线 // topo.FlagEphemeral // 参考: https://www.box.com/blog/a-gotcha-when-using-zookeeper-ephemeral-nodes/ // 如果之前的Session信息还存在,则先删除;然后再添加 topo.ZkConn.Delete(path, -1) var pathCreated string pathCreated, err = topo.ZkConn.Create(path, []byte(data), int32(zookeeper.FlagEphemeral), zkhelper.DefaultFileACLs()) log.Println(Green("AddServiceEndpoint"), "Path: ", pathCreated, ", Error: ", err) return err }
// // 注册一个服务的Endpoints // func (top *Topology) AddServiceEndPoint(service string, endpoint string, endpointInfo map[string]interface{}) error { path := top.ProductServiceEndPointPath(service, endpoint) data, err := json.Marshal(endpointInfo) if err != nil { return err } // 创建Service(XXX: Service本身不包含数据) CreateRecursive(top.zkConn, os_path.Dir(path), "", 0, zkhelper.DefaultDirACLs()) // 当前的Session挂了,服务就下线 // topo.FlagEphemeral // 参考: https://www.box.com/blog/a-gotcha-when-using-zookeeper-ephemeral-nodes/ // 如果之前的Session信息还存在,则先删除;然后再添加 top.zkConn.Delete(path, -1) var pathCreated string pathCreated, err = top.zkConn.Create(path, []byte(data), int32(topo.FlagEphemeral), zkhelper.DefaultFileACLs()) log.Println(green("SetRpcProxyData"), "Path: ", pathCreated, ", Error: ", err) return err }
// // 删除过期的Endpoints // func (p *BackSockets) PurgeEndpoints() { // 没有需要删除的对象 if p.Active == len(p.Sockets) { return } log.Printf(utils.Green("PurgeEndpoints %d vs. %d"), p.Active, len(p.Sockets)) p.Lock() defer p.Unlock() now := time.Now().Unix() nowStr := time.Now().Format("@2006-01-02 15:04:05") for i := p.Active; i < len(p.Sockets); i++ { // 逐步删除过期的Sockets current := p.Sockets[i] lastIndex := len(p.Sockets) - 1 if now-current.markedOfflineTime > 5 { // 将i和最后一个元素交换 p.swap(current, p.Sockets[lastIndex]) // 关闭 // current // 关闭旧的Socket log.Println(utils.Red("PurgeEndpoints#Purge Old Socket: "), current.Addr, nowStr) // 由Socket自己维护自己的状态 // current.Socket.Close() p.Sockets[lastIndex] = nil p.Sockets = p.Sockets[0:lastIndex] i-- // 保持原位 } } }
// // 两参数是必须的: ProductName, zkAddress, frontAddr可以用来测试 // func mainBody(productName string, frontAddr string, zkAdresses string) { // 1. 创建到zk的连接 var topo *zk.Topology topo = zk.NewTopology(productName, zkAdresses) // 3. 读取后端服务的配置 poller := zmq.NewPoller() backServices := proxy.NewBackServices(poller, productName, topo) // 4. 创建前端服务 frontend, _ := zmq.NewSocket(zmq.ROUTER) defer frontend.Close() // ROUTER/ROUTER绑定到指定的端口 log.Println("---->Bind: ", magenta(frontAddr)) frontend.Bind(frontAddr) // For clients // 开始监听前端服务 poller.Add(frontend, zmq.POLLIN) for { var sockets []zmq.Polled var err error sockets, err = poller.Poll(HEARTBEAT_INTERVAL) if err != nil { log.Println("Encounter Errors, Services Stoped: ", err) continue } for _, socket := range sockets { switch socket.Socket { case frontend: if config.VERBOSE { log.Println("----->Message from front: ") } msgs, err := frontend.RecvMessage(0) if err != nil { continue // Interrupted } var service string var client_id string utils.PrintZeromqMsgs(msgs, "ProxyFrontEnd") // msg格式: <client_id, '', service, '', other_msgs> client_id, msgs = utils.Unwrap(msgs) service, msgs = utils.Unwrap(msgs) // log.Println("Client_id: ", client_id, ", Service: ", service) backService := backServices.GetBackService(service) if backService == nil { log.Println("BackService Not Found...") // 最后一个msg为Thrift编码后的消息 thriftMsg := msgs[len(msgs)-1] // XXX: seqId如果不需要,也可以使用固定的数字 _, _, seqId, _ := proxy.ParseThriftMsgBegin([]byte(thriftMsg)) errMsg := proxy.GetServiceNotFoundData(service, seqId) // <client_id, "", errMsg> if len(msgs) > 1 { frontend.SendMessage(client_id, "", msgs[0:len(msgs)-1], errMsg) } else { frontend.SendMessage(client_id, "", errMsg) } } else { // <"", client_id, "", msgs> if config.PROFILE { lastMsg := msgs[len(msgs)-1] msgs = msgs[0 : len(msgs)-1] msgs = append(msgs, fmt.Sprintf("%.4f", float64(time.Now().UnixNano())*1e-9), "", lastMsg) if config.VERBOSE { log.Println(printList(msgs)) } } total, err, errMsg := backService.HandleRequest(client_id, msgs) if errMsg != nil { if config.VERBOSE { log.Println("backService Error for service: ", service) } if len(msgs) > 1 { frontend.SendMessage(client_id, "", msgs[0:len(msgs)-1], *errMsg) } else { frontend.SendMessage(client_id, "", *errMsg) } } else if err != nil { log.Println(utils.Red("backService.HandleRequest Error: "), err, ", Total: ", total) } } default: // 除了来自前端的数据,其他的都来自后端 msgs, err := socket.Socket.RecvMessage(0) if err != nil { log.Println("Encounter Errors When receiving from background") continue // Interrupted } if config.VERBOSE { utils.PrintZeromqMsgs(msgs, "proxy") } msgs = utils.TrimLeftEmptyMsg(msgs) // msgs格式: <client_id, "", rpc_data> // <control_msg_rpc_data> if len(msgs) == 1 { // 告知后端的服务可能有问题 } else { if config.PROFILE { lastMsg := msgs[len(msgs)-1] msgs = msgs[0 : len(msgs)-1] msgs = append(msgs, fmt.Sprintf("%.4f", float64(time.Now().UnixNano())*1e-9), "", lastMsg) } if config.VERBOSE { log.Println(printList(msgs)) } frontend.SendMessage(msgs) } } } } }
// // Load Balance如何运维呢? // 1. 在服务提供方,会会启动Load Balance, 它只负责本机器的某个指定服务的lb // 2. 正常情况下,不能被轻易杀死 // 3. 需要考虑 graceful stop, 在死之前告知所有的proxy,如何告知呢? TODO // // func main() { args, err := docopt.Parse(usage, nil, true, "Chunyu RPC Load Balance v0.1", true) if err != nil { log.Println(err) os.Exit(1) } var maxFileFrag = 2 var maxFragSize int64 = bytesize.GB * 1 if s, ok := args["--log-filesize"].(string); ok && s != "" { v, err := bytesize.Parse(s) if err != nil { log.PanicErrorf(err, "invalid max log file size = %s", s) } maxFragSize = v } // set output log file if s, ok := args["-L"].(string); ok && s != "" { f, err := log.NewRollingFile(s, maxFileFrag, maxFragSize) if err != nil { log.PanicErrorf(err, "open rolling log file failed: %s", s) } else { defer f.Close() log.StdLog = log.New(f, "") } } log.SetLevel(log.LEVEL_INFO) log.SetFlags(log.Flags() | log.Lshortfile) // set log level if s, ok := args["--log-level"].(string); ok && s != "" { setLogLevel(s) } var backendAddr, frontendAddr, zkAddr, productName, serviceName string // set config file if args["-c"] != nil { configFile := args["-c"].(string) conf, err := utils.LoadConf(configFile) if err != nil { log.PanicErrorf(err, "load config failed") } productName = conf.ProductName if conf.FrontHost == "" { fmt.Println("FrontHost: ", conf.FrontHost, ", Prefix: ", conf.IpPrefix) if conf.IpPrefix != "" { conf.FrontHost = utils.GetIpWithPrefix(conf.IpPrefix) } } if conf.FrontPort != "" && conf.FrontHost != "" { frontendAddr = fmt.Sprintf("tcp://%s:%s", conf.FrontHost, conf.FrontPort) } backendAddr = conf.BackAddr serviceName = conf.Service zkAddr = conf.ZkAddr config.VERBOSE = conf.Verbose } else { productName = "" zkAddr = "" } if s, ok := args["--product"].(string); ok && s != "" { productName = s } else if productName == "" { // 既没有config指定,也没有命令行指定,则报错 log.PanicErrorf(err, "Invalid ProductName: %s", s) } if s, ok := args["--zk"].(string); ok && s != "" { zkAddr = s } else if zkAddr == "" { log.PanicErrorf(err, "Invalid zookeeper address: %s", s) } if s, ok := args["--service"].(string); ok && s != "" { serviceName = s } else if serviceName == "" { log.PanicErrorf(err, "Invalid ServiceName: %s", s) } if s, ok := args["--baddr"].(string); ok && s != "" { backendAddr = s } else if backendAddr == "" { log.PanicErrorf(err, "Invalid backend address: %s", s) } if s, ok := args["--faddr"].(string); ok && s != "" { frontendAddr = s } else if frontendAddr == "" { // log.PanicErrorf(err, "Invalid frontend address: %s", s) } // 正式的服务 mainBody(zkAddr, productName, serviceName, frontendAddr, backendAddr) }
func mainBody(zkAddr string, productName string, serviceName string, frontendAddr string, backendAddr string) { // 1. 创建到zk的连接 var topo *zk.Topology topo = zk.NewTopology(productName, zkAddr) // 2. 启动服务 frontend, _ := zmq.NewSocket(zmq.ROUTER) backend, _ := zmq.NewSocket(zmq.ROUTER) defer frontend.Close() defer backend.Close() // ROUTER/ROUTER绑定到指定的端口 // tcp://127.0.0.1:5555 --> tcp://127_0_0_1:5555 lbServiceName := GetServiceIdentity(frontendAddr) frontend.SetIdentity(lbServiceName) frontend.Bind(frontendAddr) // For clients "tcp://*:5555" backend.Bind(backendAddr) // For workers "tcp://*:5556" log.Printf("FrontAddr: %s, BackendAddr: %s\n", magenta(frontendAddr), magenta(backendAddr)) // 后端的workers queue workersQueue := queue.NewPPQueue() // 心跳间隔1s heartbeat_at := time.Tick(HEARTBEAT_INTERVAL) poller1 := zmq.NewPoller() poller1.Add(backend, zmq.POLLIN) poller2 := zmq.NewPoller() // 前提: // 1. 当zeromq通知消息可读时,那么整个Message(所有的msg parts)都可读 // 2. 往zeromq写数据时,是异步的,因此也不存在block(除非数据量巨大) // poller2.Add(backend, zmq.POLLIN) poller2.Add(frontend, zmq.POLLIN) // 3. 注册zk var endpointInfo map[string]interface{} = make(map[string]interface{}) endpointInfo["frontend"] = frontendAddr endpointInfo["backend"] = backendAddr topo.AddServiceEndPoint(serviceName, lbServiceName, endpointInfo) isAlive := true isAliveLock := &sync.RWMutex{} go func() { servicePath := topo.ProductServicePath(serviceName) evtbus := make(chan interface{}) for true { // 只是为了监控状态 _, err := topo.WatchNode(servicePath, evtbus) if err == nil { // 等待事件 e := (<-evtbus).(topozk.Event) if e.State == topozk.StateExpired || e.Type == topozk.EventNotWatching { // Session过期了,则需要删除之前的数据,因为这个数据的Owner不是当前的Session topo.DeleteServiceEndPoint(serviceName, lbServiceName) topo.AddServiceEndPoint(serviceName, lbServiceName, endpointInfo) } } else { time.Sleep(time.Second) } isAliveLock.RLock() isAlive1 := isAlive isAliveLock.RUnlock() if !isAlive1 { break } } }() ch := make(chan os.Signal, 1) signal.Notify(ch, syscall.SIGTERM, syscall.SIGINT, syscall.SIGKILL) // syscall.SIGKILL // kill -9 pid // kill -s SIGKILL pid 还是留给运维吧 // // 自动退出条件: // var suideTime time.Time for { var sockets []zmq.Polled var err error sockets, err = poller2.Poll(HEARTBEAT_INTERVAL) if err != nil { // break // Interrupted log.Errorf("Error When Pollling: %v\n", err) continue } hasValidMsg := false for _, socket := range sockets { switch socket.Socket { case backend: // 格式: // 后端: // <"", proxy_id, "", client_id, "", rpc_data> // Backend Socket读取到的: // <wokerid, "", proxy_id, "", client_id, "", rpc_data> // msgs, err := backend.RecvMessage(0) if err != nil { log.Errorf("Error When RecvMessage from background: %v\n", err) continue } if config.VERBOSE { // log.Println("Message from backend: ", msgs) } // 消息类型: // msgs: <worker_id, "", proxy_id, "", client_id, "", rpc_data> // <worker_id, "", rpc_control_data> worker_id, msgs := utils.Unwrap(msgs) // rpc_control_data 控制信息 // msgs: <rpc_control_data> if len(msgs) == 1 { // PPP_READY // PPP_HEARTBEAT controlMsg := msgs[0] // 碰到无效的信息,则直接跳过去 if len(controlMsg) == 0 { continue } if config.VERBOSE { // log.Println("Got Message From Backend...") } if controlMsg[0] == PPP_READY || controlMsg[0] == PPP_HEARTBEAT { // 后端服务剩余的并发能力 var concurrency int if len(controlMsg) >= 3 { concurrency = int(controlMsg[2]) } else { concurrency = 1 } if config.VERBOSE { // utils.PrintZeromqMsgs(msgs, "control msg") } force_update := controlMsg[0] == PPP_READY workersQueue.UpdateWorkerStatus(worker_id, concurrency, force_update) } else if controlMsg[0] == PPP_STOP { // 停止指定的后端服务 workersQueue.UpdateWorkerStatus(worker_id, -1, true) } else { log.Errorf("Unexpected Control Message: %d", controlMsg[0]) } } else { hasValidMsg = true // 将信息发送到前段服务, 如果前端服务挂了,则消息就丢失 // log.Println("Send Message to frontend") workersQueue.UpdateWorkerStatus(worker_id, 0, false) // msgs: <proxy_id, "", client_id, "", rpc_data> frontend.SendMessage(msgs) } case frontend: hasValidMsg = true log.Println("----->Message from front: ") msgs, err := frontend.RecvMessage(0) if err != nil { log.Errorf("Error when reading from frontend: %v\n", err) continue } // msgs: // <proxy_id, "", client_id, "", rpc_data> if config.VERBOSE { utils.PrintZeromqMsgs(msgs, "frontend") } msgs = utils.TrimLeftEmptyMsg(msgs) // 将msgs交给后端服务器 worker := workersQueue.NextWorker() if worker != nil { if config.VERBOSE { log.Println("Send Msg to Backend worker: ", worker.Identity) } backend.SendMessage(worker.Identity, "", msgs) } else { // 怎么返回错误消息呢? if config.VERBOSE { log.Println("No backend worker found") } errMsg := proxy.GetWorkerNotFoundData("account", 0) // <proxy_id, "", client_id, "", rpc_data> frontend.SendMessage(msgs[0:(len(msgs)-1)], errMsg) } } } // 如果安排的suiside, 则需要处理 suiside的时间 isAliveLock.RLock() isAlive1 := isAlive isAliveLock.RUnlock() if !isAlive1 { if hasValidMsg { suideTime = time.Now().Add(time.Second * 3) } else { if time.Now().After(suideTime) { log.Println(utils.Green("Load Balance Suiside Gracefully")) break } } } // 心跳同步 select { case <-heartbeat_at: now := time.Now() // 给workerQueue中的所有的worker发送心跳消息 for _, worker := range workersQueue.WorkerQueue { if worker.Expire.After(now) { // log.Println("Sending Hb to Worker: ", worker.Identity) backend.SendMessage(worker.Identity, "", PPP_HEARTBEAT_STR) } } workersQueue.PurgeExpired() case sig := <-ch: isAliveLock.Lock() isAlive1 := isAlive isAlive = false isAliveLock.Unlock() if isAlive1 { // 准备退出(但是需要处理完毕手上的活) // 需要退出: topo.DeleteServiceEndPoint(serviceName, lbServiceName) if sig == syscall.SIGKILL { log.Println(utils.Red("Got Kill Signal, Return Directly")) break } else { suideTime = time.Now().Add(time.Second * 3) log.Println(utils.Red("Schedule to suicide at: "), suideTime.Format("@2006-01-02 15:04:05")) } } default: } } }
// // Proxy关闭,则整个机器就OVER, 需要考虑将整个机器下线 // 因此Proxy需要设计的非常完美,不要轻易地被杀死,或自杀 // func main() { // 解析输入参数 args, err := docopt.Parse(usage, nil, true, "Chunyu RPC Local Proxy v0.1", true) if err != nil { log.Println(err) os.Exit(1) } var maxFileFrag = 2 var maxFragSize int64 = bytesize.GB * 1 if s, ok := args["--log-filesize"].(string); ok && s != "" { v, err := bytesize.Parse(s) if err != nil { log.PanicErrorf(err, "invalid max log file size = %s", s) } maxFragSize = v } // set output log file if s, ok := args["-L"].(string); ok && s != "" { f, err := log.NewRollingFile(s, maxFileFrag, maxFragSize) if err != nil { log.PanicErrorf(err, "open rolling log file failed: %s", s) } else { defer f.Close() log.StdLog = log.New(f, "") } } log.SetLevel(log.LEVEL_INFO) log.SetFlags(log.Flags() | log.Lshortfile) // set log level if s, ok := args["--log-level"].(string); ok && s != "" { setLogLevel(s) } var zkAddr, frontAddr, productName string // 从config文件中读取数据 if args["-c"] != nil { configFile := args["-c"].(string) conf, err := utils.LoadConf(configFile) if err != nil { log.PanicErrorf(err, "load config failed") } productName = conf.ProductName frontAddr = conf.ProxyAddr zkAddr = conf.ZkAddr config.VERBOSE = conf.Verbose config.PROFILE = conf.Profile } else { productName = "" zkAddr = "" } if s, ok := args["--product"].(string); ok && s != "" { productName = s } else if productName == "" { log.PanicErrorf(err, "Invalid ProductName: %s", s) } if s, ok := args["--zk"].(string); ok && s != "" { zkAddr = s } else if zkAddr == "" { log.PanicErrorf(err, "Invalid zookeeper address: %s", s) } if s, ok := args["--faddr"].(string); ok && s != "" { frontAddr = s } else if frontAddr == "" { log.PanicErrorf(err, "Invalid Proxy address: %s", s) } // 正式的服务 mainBody(productName, frontAddr, zkAddr) }
// // go test github.com/wfxiang08/rpc_proxy/proxy -v -run "TestBackend" // func TestBackend(t *testing.T) { // 作为一个Server transport, err := thrift.NewTServerSocket("127.0.0.1:0") assert.NoError(t, err) err = transport.Open() // 打开Transport assert.NoError(t, err) defer transport.Close() err = transport.Listen() // 开始监听 assert.NoError(t, err) addr := transport.Addr().String() fmt.Println("Addr: ", addr) var requestNum int32 = 10 requests := make([]*Request, 0, requestNum) var i int32 for i = 0; i < requestNum; i++ { buf := make([]byte, 100, 100) l := fakeData("Hello", thrift.CALL, i+1, buf[0:0]) buf = buf[0:l] req := NewRequest(buf, false) req.Wait.Add(1) // 因为go routine可能还没有执行,代码就跑到最后面进行校验了 assert.Equal(t, i+1, req.Request.SeqId, "Request SeqId是否靠谱") requests = append(requests, req) } go func() { // 客户端代码 bc := NewBackendConn(addr, nil, "test", true) bc.currentSeqId = 10 // 准备发送数据 var i int32 for i = 0; i < requestNum; i++ { fmt.Println("Sending Request to Backend Conn", i) bc.PushBack(requests[i]) requests[i].Wait.Done() } // 需要等待数据返回? time.Sleep(time.Second * 2) }() go func() { // 服务器端代码 tran, err := transport.Accept() if err != nil { log.ErrorErrorf(err, "Error: %v\n", err) } assert.NoError(t, err) bt := NewTBufferedFramedTransport(tran, time.Microsecond*100, 2) // 在当前的这个t上读写数据 var i int32 for i = 0; i < requestNum; i++ { request, err := bt.ReadFrame() assert.NoError(t, err) req := NewRequest(request, false) assert.Equal(t, req.Request.SeqId, i+10) fmt.Printf("Server Got Request, and SeqNum OK, Id: %d, Frame Size: %d\n", i, len(request)) // 回写数据 bt.Write(request) bt.FlushBuffer(true) } tran.Close() }() fmt.Println("Requests Len: ", len(requests)) for idx, r := range requests { r.Wait.Wait() // r 原始的请求 req := NewRequest(r.Response.Data, false) log.Printf(Green("SeqMatch[%d]: Orig: %d, Return: %d\n"), idx, req.Request.SeqId, r.Request.SeqId) assert.Equal(t, req.Request.SeqId, r.Request.SeqId) } log.Println("OK") }
func RpcMain(binaryName string, serviceDesc string, configCheck ConfigCheck, serverFactory ServerFactorory, buildDate string, gitVersion string) { // 1. 准备解析参数 usage = fmt.Sprintf(usage, binaryName, binaryName) version := fmt.Sprintf("Version: %s\nBuildDate: %s\nDesc: %s\nAuthor: [email protected]", gitVersion, buildDate, serviceDesc) args, err := docopt.Parse(usage, nil, true, version, true) if err != nil { fmt.Println(err) os.Exit(1) } if s, ok := args["-V"].(bool); ok && s { fmt.Println(Green(version)) os.Exit(1) } // 这就是为什么 Codis 傻乎乎起一个 http server的目的 if s, ok := args["--profile-addr"].(string); ok && len(s) > 0 { go func() { log.Printf(Red("Profile Address: %s"), s) log.Println(http.ListenAndServe(s, nil)) }() } // 2. 解析Log相关的配置 log.SetLevel(log.LEVEL_INFO) var maxKeepDays int = 3 if s, ok := args["--log-keep-days"].(string); ok && s != "" { v, err := strconv.ParseInt(s, 10, 32) if err != nil { log.PanicErrorf(err, "invalid max log file keep days = %s", s) } maxKeepDays = int(v) } // set output log file if s, ok := args["-L"].(string); ok && s != "" { f, err := log.NewRollingFile(s, maxKeepDays) if err != nil { log.PanicErrorf(err, "open rolling log file failed: %s", s) } else { defer f.Close() log.StdLog = log.New(f, "") } } log.SetLevel(log.LEVEL_INFO) log.SetFlags(log.Flags() | log.Lshortfile) // set log level if s, ok := args["--log-level"].(string); ok && s != "" { SetLogLevel(s) } // 没有就没有 workDir, _ := args["--work-dir"].(string) codeUrlVersion, _ := args["--code-url-version"].(string) if len(workDir) == 0 { workDir, _ = os.Getwd() } log.Printf("WorkDir: %s, CodeUrl: %s, Wd: %s", workDir, codeUrlVersion) // 3. 解析Config configFile := args["-c"].(string) conf, err := utils.LoadConf(configFile) if err != nil { log.PanicErrorf(err, "load config failed") } // 额外的配置信息 conf.WorkDir = workDir conf.CodeUrlVersion = codeUrlVersion if configCheck != nil { configCheck(conf) } else { log.Panic("No Config Check Given") } // 每次启动的时候都打印版本信息 log.Infof(Green("-----------------\n%s\n--------------------------------------------------------------------"), version) // 启动服务 server := serverFactory(conf) server.Run() }