// // 删除Service Endpoint // func (s *ServiceEndpoint) DeleteServiceEndpoint(top *zk.Topology) { path := top.ProductServiceEndPointPath(s.Service, s.ServiceId) if ok, _ := top.Exist(path); ok { zkhelper.DeleteRecursive(top.ZkConn, path, -1) log.Println(Red("DeleteServiceEndpoint"), "Path: ", path) } }
func GetServiceEndpoint(top *zk.Topology, service string, serviceId string) (endpoint *ServiceEndpoint, err error) { path := top.ProductServiceEndPointPath(service, serviceId) data, _, err := top.ZkConn.Get(path) if err != nil { return nil, err } endpoint = &ServiceEndpoint{} err = json.Unmarshal(data, endpoint) if err != nil { return nil, err } else { return endpoint, nil } }
// 创建一个BackService func NewBackService(serviceName string, poller *zmq.Poller, topo *zk.Topology) *BackService { backSockets := NewBackSockets(poller) service := &BackService{ ServiceName: serviceName, backend: backSockets, poller: poller, topo: topo, } var evtbus chan interface{} = make(chan interface{}, 2) servicePath := topo.ProductServicePath(serviceName) endpoints, err := topo.WatchChildren(servicePath, evtbus) if err != nil { log.Println("Error: ", err) panic("Reading Service List Failed: ") } go func() { for true { // 如何监听endpoints的变化呢? addrSet := make(map[string]bool) nowStr := time.Now().Format("@2006-01-02 15:04:05") for _, endpoint := range endpoints { // 这些endpoint变化该如何处理呢? log.Println(utils.Green("---->Find Endpoint: "), endpoint, "For Service: ", serviceName) endpointInfo, _ := topo.GetServiceEndPoint(serviceName, endpoint) addr, ok := endpointInfo["frontend"] if ok { addrStr := addr.(string) log.Println(utils.Green("---->Add endpoint to backend: "), addrStr, nowStr, "For Service: ", serviceName) addrSet[addrStr] = true } } service.backend.UpdateEndpointAddrs(addrSet) // 等待事件 <-evtbus // 读取数据,继续监听 endpoints, err = topo.WatchChildren(servicePath, evtbus) } }() ticker := time.NewTicker(time.Millisecond * 1000) go func() { for _ = range ticker.C { service.backend.PurgeEndpoints() } }() return service }
// // 注册一个服务的Endpoints // func (s *ServiceEndpoint) AddServiceEndpoint(topo *zk.Topology) error { path := topo.ProductServiceEndPointPath(s.Service, s.ServiceId) data, err := json.Marshal(s) if err != nil { return err } // 创建Service(XXX: Service本身不包含数据) zk.CreateRecursive(topo.ZkConn, os_path.Dir(path), "", 0, zkhelper.DefaultDirACLs()) // 当前的Session挂了,服务就下线 // topo.FlagEphemeral // 参考: https://www.box.com/blog/a-gotcha-when-using-zookeeper-ephemeral-nodes/ // 如果之前的Session信息还存在,则先删除;然后再添加 topo.ZkConn.Delete(path, -1) var pathCreated string pathCreated, err = topo.ZkConn.Create(path, []byte(data), int32(zookeeper.FlagEphemeral), zkhelper.DefaultFileACLs()) log.Println(Green("AddServiceEndpoint"), "Path: ", pathCreated, ", Error: ", err) return err }
func NewBackServices(poller *zmq.Poller, productName string, topo *zk.Topology) *BackServices { // 创建BackServices result := &BackServices{ Services: make(map[string]*BackService), OfflineServices: make(map[string]*BackService), poller: poller, topo: topo, } var evtbus chan interface{} = make(chan interface{}, 2) servicesPath := topo.ProductServicesPath() path, e1 := topo.CreateDir(servicesPath) // 保证Service目录存在,否则会报错 fmt.Println("Path: ", path, "error: ", e1) services, err := topo.WatchChildren(servicesPath, evtbus) if err != nil { log.Println("Error: ", err) // TODO: 这个地方需要优化 panic("Reading Service List Failed") } go func() { for true { result.Lock() for _, service := range services { log.Println("Service: ", service) if _, ok := result.Services[service]; !ok { result.addBackService(service) } } result.Unlock() // 等待事件 <-evtbus // 读取数据,继续监听(连接过期了就过期了,再次Watch即可) services, err = topo.WatchChildren(servicesPath, evtbus) } }() // 读取zk, 等待 log.Println("ProductName: ", result.topo.ProductName) return result }
func mainBody(zkAddr string, productName string, serviceName string, frontendAddr string, backendAddr string) { // 1. 创建到zk的连接 var topo *zk.Topology topo = zk.NewTopology(productName, zkAddr) // 2. 启动服务 frontend, _ := zmq.NewSocket(zmq.ROUTER) backend, _ := zmq.NewSocket(zmq.ROUTER) defer frontend.Close() defer backend.Close() // ROUTER/ROUTER绑定到指定的端口 // tcp://127.0.0.1:5555 --> tcp://127_0_0_1:5555 lbServiceName := GetServiceIdentity(frontendAddr) frontend.SetIdentity(lbServiceName) frontend.Bind(frontendAddr) // For clients "tcp://*:5555" backend.Bind(backendAddr) // For workers "tcp://*:5556" log.Printf("FrontAddr: %s, BackendAddr: %s\n", magenta(frontendAddr), magenta(backendAddr)) // 后端的workers queue workersQueue := queue.NewPPQueue() // 心跳间隔1s heartbeat_at := time.Tick(HEARTBEAT_INTERVAL) poller1 := zmq.NewPoller() poller1.Add(backend, zmq.POLLIN) poller2 := zmq.NewPoller() // 前提: // 1. 当zeromq通知消息可读时,那么整个Message(所有的msg parts)都可读 // 2. 往zeromq写数据时,是异步的,因此也不存在block(除非数据量巨大) // poller2.Add(backend, zmq.POLLIN) poller2.Add(frontend, zmq.POLLIN) // 3. 注册zk var endpointInfo map[string]interface{} = make(map[string]interface{}) endpointInfo["frontend"] = frontendAddr endpointInfo["backend"] = backendAddr topo.AddServiceEndPoint(serviceName, lbServiceName, endpointInfo) isAlive := true isAliveLock := &sync.RWMutex{} go func() { servicePath := topo.ProductServicePath(serviceName) evtbus := make(chan interface{}) for true { // 只是为了监控状态 _, err := topo.WatchNode(servicePath, evtbus) if err == nil { // 等待事件 e := (<-evtbus).(topozk.Event) if e.State == topozk.StateExpired || e.Type == topozk.EventNotWatching { // Session过期了,则需要删除之前的数据,因为这个数据的Owner不是当前的Session topo.DeleteServiceEndPoint(serviceName, lbServiceName) topo.AddServiceEndPoint(serviceName, lbServiceName, endpointInfo) } } else { time.Sleep(time.Second) } isAliveLock.RLock() isAlive1 := isAlive isAliveLock.RUnlock() if !isAlive1 { break } } }() ch := make(chan os.Signal, 1) signal.Notify(ch, syscall.SIGTERM, syscall.SIGINT, syscall.SIGKILL) // syscall.SIGKILL // kill -9 pid // kill -s SIGKILL pid 还是留给运维吧 // // 自动退出条件: // var suideTime time.Time for { var sockets []zmq.Polled var err error sockets, err = poller2.Poll(HEARTBEAT_INTERVAL) if err != nil { // break // Interrupted log.Errorf("Error When Pollling: %v\n", err) continue } hasValidMsg := false for _, socket := range sockets { switch socket.Socket { case backend: // 格式: // 后端: // <"", proxy_id, "", client_id, "", rpc_data> // Backend Socket读取到的: // <wokerid, "", proxy_id, "", client_id, "", rpc_data> // msgs, err := backend.RecvMessage(0) if err != nil { log.Errorf("Error When RecvMessage from background: %v\n", err) continue } if config.VERBOSE { // log.Println("Message from backend: ", msgs) } // 消息类型: // msgs: <worker_id, "", proxy_id, "", client_id, "", rpc_data> // <worker_id, "", rpc_control_data> worker_id, msgs := utils.Unwrap(msgs) // rpc_control_data 控制信息 // msgs: <rpc_control_data> if len(msgs) == 1 { // PPP_READY // PPP_HEARTBEAT controlMsg := msgs[0] // 碰到无效的信息,则直接跳过去 if len(controlMsg) == 0 { continue } if config.VERBOSE { // log.Println("Got Message From Backend...") } if controlMsg[0] == PPP_READY || controlMsg[0] == PPP_HEARTBEAT { // 后端服务剩余的并发能力 var concurrency int if len(controlMsg) >= 3 { concurrency = int(controlMsg[2]) } else { concurrency = 1 } if config.VERBOSE { // utils.PrintZeromqMsgs(msgs, "control msg") } force_update := controlMsg[0] == PPP_READY workersQueue.UpdateWorkerStatus(worker_id, concurrency, force_update) } else if controlMsg[0] == PPP_STOP { // 停止指定的后端服务 workersQueue.UpdateWorkerStatus(worker_id, -1, true) } else { log.Errorf("Unexpected Control Message: %d", controlMsg[0]) } } else { hasValidMsg = true // 将信息发送到前段服务, 如果前端服务挂了,则消息就丢失 // log.Println("Send Message to frontend") workersQueue.UpdateWorkerStatus(worker_id, 0, false) // msgs: <proxy_id, "", client_id, "", rpc_data> frontend.SendMessage(msgs) } case frontend: hasValidMsg = true log.Println("----->Message from front: ") msgs, err := frontend.RecvMessage(0) if err != nil { log.Errorf("Error when reading from frontend: %v\n", err) continue } // msgs: // <proxy_id, "", client_id, "", rpc_data> if config.VERBOSE { utils.PrintZeromqMsgs(msgs, "frontend") } msgs = utils.TrimLeftEmptyMsg(msgs) // 将msgs交给后端服务器 worker := workersQueue.NextWorker() if worker != nil { if config.VERBOSE { log.Println("Send Msg to Backend worker: ", worker.Identity) } backend.SendMessage(worker.Identity, "", msgs) } else { // 怎么返回错误消息呢? if config.VERBOSE { log.Println("No backend worker found") } errMsg := proxy.GetWorkerNotFoundData("account", 0) // <proxy_id, "", client_id, "", rpc_data> frontend.SendMessage(msgs[0:(len(msgs)-1)], errMsg) } } } // 如果安排的suiside, 则需要处理 suiside的时间 isAliveLock.RLock() isAlive1 := isAlive isAliveLock.RUnlock() if !isAlive1 { if hasValidMsg { suideTime = time.Now().Add(time.Second * 3) } else { if time.Now().After(suideTime) { log.Println(utils.Green("Load Balance Suiside Gracefully")) break } } } // 心跳同步 select { case <-heartbeat_at: now := time.Now() // 给workerQueue中的所有的worker发送心跳消息 for _, worker := range workersQueue.WorkerQueue { if worker.Expire.After(now) { // log.Println("Sending Hb to Worker: ", worker.Identity) backend.SendMessage(worker.Identity, "", PPP_HEARTBEAT_STR) } } workersQueue.PurgeExpired() case sig := <-ch: isAliveLock.Lock() isAlive1 := isAlive isAlive = false isAliveLock.Unlock() if isAlive1 { // 准备退出(但是需要处理完毕手上的活) // 需要退出: topo.DeleteServiceEndPoint(serviceName, lbServiceName) if sig == syscall.SIGKILL { log.Println(utils.Red("Got Kill Signal, Return Directly")) break } else { suideTime = time.Now().Add(time.Second * 3) log.Println(utils.Red("Schedule to suicide at: "), suideTime.Format("@2006-01-02 15:04:05")) } } default: } } }