// // 获取下一个可用的Worker // func (pq *PriorityQueue) NextWorker() *Worker { now := time.Now() for pq.Len() > 0 { result := (*pq)[0] if result.index != INVALID_INDEX && result.Expire.After(now) { // 只要活着,就留在优先级队列中,等待分配任务 // log.Println("Find Valid Worker...") result.priority -= 1 // 调整Worker的优先级 heap.Fix(pq, result.index) return result } else { if result.index != INVALID_INDEX { log.Errorf("Invalid Item index in PriorityQueue#NextWorker") } else { log.Println("Worker Expired") // 只有过期的元素才删除 heap.Remove(pq, result.index) } } } log.Println("Has Not Worker...") return nil }
func mainBody(zkAddr string, productName string, serviceName string, frontendAddr string, backendAddr string) { // 1. 创建到zk的连接 var topo *zk.Topology topo = zk.NewTopology(productName, zkAddr) // 2. 启动服务 frontend, _ := zmq.NewSocket(zmq.ROUTER) backend, _ := zmq.NewSocket(zmq.ROUTER) defer frontend.Close() defer backend.Close() // ROUTER/ROUTER绑定到指定的端口 // tcp://127.0.0.1:5555 --> tcp://127_0_0_1:5555 lbServiceName := GetServiceIdentity(frontendAddr) frontend.SetIdentity(lbServiceName) frontend.Bind(frontendAddr) // For clients "tcp://*:5555" backend.Bind(backendAddr) // For workers "tcp://*:5556" log.Printf("FrontAddr: %s, BackendAddr: %s\n", magenta(frontendAddr), magenta(backendAddr)) // 后端的workers queue workersQueue := queue.NewPPQueue() // 心跳间隔1s heartbeat_at := time.Tick(HEARTBEAT_INTERVAL) poller1 := zmq.NewPoller() poller1.Add(backend, zmq.POLLIN) poller2 := zmq.NewPoller() // 前提: // 1. 当zeromq通知消息可读时,那么整个Message(所有的msg parts)都可读 // 2. 往zeromq写数据时,是异步的,因此也不存在block(除非数据量巨大) // poller2.Add(backend, zmq.POLLIN) poller2.Add(frontend, zmq.POLLIN) // 3. 注册zk var endpointInfo map[string]interface{} = make(map[string]interface{}) endpointInfo["frontend"] = frontendAddr endpointInfo["backend"] = backendAddr topo.AddServiceEndPoint(serviceName, lbServiceName, endpointInfo) isAlive := true isAliveLock := &sync.RWMutex{} go func() { servicePath := topo.ProductServicePath(serviceName) evtbus := make(chan interface{}) for true { // 只是为了监控状态 _, err := topo.WatchNode(servicePath, evtbus) if err == nil { // 等待事件 e := (<-evtbus).(topozk.Event) if e.State == topozk.StateExpired || e.Type == topozk.EventNotWatching { // Session过期了,则需要删除之前的数据,因为这个数据的Owner不是当前的Session topo.DeleteServiceEndPoint(serviceName, lbServiceName) topo.AddServiceEndPoint(serviceName, lbServiceName, endpointInfo) } } else { time.Sleep(time.Second) } isAliveLock.RLock() isAlive1 := isAlive isAliveLock.RUnlock() if !isAlive1 { break } } }() ch := make(chan os.Signal, 1) signal.Notify(ch, syscall.SIGTERM, syscall.SIGINT, syscall.SIGKILL) // syscall.SIGKILL // kill -9 pid // kill -s SIGKILL pid 还是留给运维吧 // // 自动退出条件: // var suideTime time.Time for { var sockets []zmq.Polled var err error sockets, err = poller2.Poll(HEARTBEAT_INTERVAL) if err != nil { // break // Interrupted log.Errorf("Error When Pollling: %v\n", err) continue } hasValidMsg := false for _, socket := range sockets { switch socket.Socket { case backend: // 格式: // 后端: // <"", proxy_id, "", client_id, "", rpc_data> // Backend Socket读取到的: // <wokerid, "", proxy_id, "", client_id, "", rpc_data> // msgs, err := backend.RecvMessage(0) if err != nil { log.Errorf("Error When RecvMessage from background: %v\n", err) continue } if config.VERBOSE { // log.Println("Message from backend: ", msgs) } // 消息类型: // msgs: <worker_id, "", proxy_id, "", client_id, "", rpc_data> // <worker_id, "", rpc_control_data> worker_id, msgs := utils.Unwrap(msgs) // rpc_control_data 控制信息 // msgs: <rpc_control_data> if len(msgs) == 1 { // PPP_READY // PPP_HEARTBEAT controlMsg := msgs[0] // 碰到无效的信息,则直接跳过去 if len(controlMsg) == 0 { continue } if config.VERBOSE { // log.Println("Got Message From Backend...") } if controlMsg[0] == PPP_READY || controlMsg[0] == PPP_HEARTBEAT { // 后端服务剩余的并发能力 var concurrency int if len(controlMsg) >= 3 { concurrency = int(controlMsg[2]) } else { concurrency = 1 } if config.VERBOSE { // utils.PrintZeromqMsgs(msgs, "control msg") } force_update := controlMsg[0] == PPP_READY workersQueue.UpdateWorkerStatus(worker_id, concurrency, force_update) } else if controlMsg[0] == PPP_STOP { // 停止指定的后端服务 workersQueue.UpdateWorkerStatus(worker_id, -1, true) } else { log.Errorf("Unexpected Control Message: %d", controlMsg[0]) } } else { hasValidMsg = true // 将信息发送到前段服务, 如果前端服务挂了,则消息就丢失 // log.Println("Send Message to frontend") workersQueue.UpdateWorkerStatus(worker_id, 0, false) // msgs: <proxy_id, "", client_id, "", rpc_data> frontend.SendMessage(msgs) } case frontend: hasValidMsg = true log.Println("----->Message from front: ") msgs, err := frontend.RecvMessage(0) if err != nil { log.Errorf("Error when reading from frontend: %v\n", err) continue } // msgs: // <proxy_id, "", client_id, "", rpc_data> if config.VERBOSE { utils.PrintZeromqMsgs(msgs, "frontend") } msgs = utils.TrimLeftEmptyMsg(msgs) // 将msgs交给后端服务器 worker := workersQueue.NextWorker() if worker != nil { if config.VERBOSE { log.Println("Send Msg to Backend worker: ", worker.Identity) } backend.SendMessage(worker.Identity, "", msgs) } else { // 怎么返回错误消息呢? if config.VERBOSE { log.Println("No backend worker found") } errMsg := proxy.GetWorkerNotFoundData("account", 0) // <proxy_id, "", client_id, "", rpc_data> frontend.SendMessage(msgs[0:(len(msgs)-1)], errMsg) } } } // 如果安排的suiside, 则需要处理 suiside的时间 isAliveLock.RLock() isAlive1 := isAlive isAliveLock.RUnlock() if !isAlive1 { if hasValidMsg { suideTime = time.Now().Add(time.Second * 3) } else { if time.Now().After(suideTime) { log.Println(utils.Green("Load Balance Suiside Gracefully")) break } } } // 心跳同步 select { case <-heartbeat_at: now := time.Now() // 给workerQueue中的所有的worker发送心跳消息 for _, worker := range workersQueue.WorkerQueue { if worker.Expire.After(now) { // log.Println("Sending Hb to Worker: ", worker.Identity) backend.SendMessage(worker.Identity, "", PPP_HEARTBEAT_STR) } } workersQueue.PurgeExpired() case sig := <-ch: isAliveLock.Lock() isAlive1 := isAlive isAlive = false isAliveLock.Unlock() if isAlive1 { // 准备退出(但是需要处理完毕手上的活) // 需要退出: topo.DeleteServiceEndPoint(serviceName, lbServiceName) if sig == syscall.SIGKILL { log.Println(utils.Red("Got Kill Signal, Return Directly")) break } else { suideTime = time.Now().Add(time.Second * 3) log.Println(utils.Red("Schedule to suicide at: "), suideTime.Format("@2006-01-02 15:04:05")) } } default: } } }