예제 #1
0
// 创建一个BackService
func NewBackService(serviceName string, poller *zmq.Poller, topo *zk.Topology) *BackService {

	backSockets := NewBackSockets(poller)

	service := &BackService{
		ServiceName: serviceName,
		backend:     backSockets,
		poller:      poller,
		topo:        topo,
	}

	var evtbus chan interface{} = make(chan interface{}, 2)
	servicePath := topo.ProductServicePath(serviceName)
	endpoints, err := topo.WatchChildren(servicePath, evtbus)
	if err != nil {
		log.Println("Error: ", err)
		panic("Reading Service List Failed: ")
	}

	go func() {
		for true {
			// 如何监听endpoints的变化呢?
			addrSet := make(map[string]bool)
			nowStr := time.Now().Format("@2006-01-02 15:04:05")
			for _, endpoint := range endpoints {
				// 这些endpoint变化该如何处理呢?
				log.Println(utils.Green("---->Find Endpoint: "), endpoint, "For Service: ", serviceName)
				endpointInfo, _ := topo.GetServiceEndPoint(serviceName, endpoint)

				addr, ok := endpointInfo["frontend"]
				if ok {
					addrStr := addr.(string)
					log.Println(utils.Green("---->Add endpoint to backend: "), addrStr, nowStr, "For Service: ", serviceName)
					addrSet[addrStr] = true
				}
			}

			service.backend.UpdateEndpointAddrs(addrSet)

			// 等待事件
			<-evtbus
			// 读取数据,继续监听
			endpoints, err = topo.WatchChildren(servicePath, evtbus)
		}
	}()

	ticker := time.NewTicker(time.Millisecond * 1000)
	go func() {
		for _ = range ticker.C {
			service.backend.PurgeEndpoints()
		}
	}()

	return service

}
예제 #2
0
//
// 删除过期的Endpoints
//
func (p *BackSockets) PurgeEndpoints() {
	// 没有需要删除的对象
	if p.Active == len(p.Sockets) {
		return
	}

	log.Printf(utils.Green("PurgeEndpoints %d vs. %d"), p.Active, len(p.Sockets))

	p.Lock()
	defer p.Unlock()

	now := time.Now().Unix()
	nowStr := time.Now().Format("@2006-01-02 15:04:05")

	for i := p.Active; i < len(p.Sockets); i++ {
		// 逐步删除过期的Sockets
		current := p.Sockets[i]
		lastIndex := len(p.Sockets) - 1
		if now-current.markedOfflineTime > 5 {

			// 将i和最后一个元素交换
			p.swap(current, p.Sockets[lastIndex])

			// 关闭
			// current
			// 关闭旧的Socket
			log.Println(utils.Red("PurgeEndpoints#Purge Old Socket: "), current.Addr, nowStr)
			// 由Socket自己维护自己的状态
			// current.Socket.Close()

			p.Sockets[lastIndex] = nil
			p.Sockets = p.Sockets[0:lastIndex]

			i-- // 保持原位
		}

	}
}
예제 #3
0
func mainBody(zkAddr string, productName string, serviceName string, frontendAddr string, backendAddr string) {
	// 1. 创建到zk的连接
	var topo *zk.Topology
	topo = zk.NewTopology(productName, zkAddr)

	// 2. 启动服务
	frontend, _ := zmq.NewSocket(zmq.ROUTER)
	backend, _ := zmq.NewSocket(zmq.ROUTER)
	defer frontend.Close()
	defer backend.Close()

	// ROUTER/ROUTER绑定到指定的端口

	// tcp://127.0.0.1:5555 --> tcp://127_0_0_1:5555
	lbServiceName := GetServiceIdentity(frontendAddr)

	frontend.SetIdentity(lbServiceName)
	frontend.Bind(frontendAddr) //  For clients "tcp://*:5555"
	backend.Bind(backendAddr)   //  For workers "tcp://*:5556"

	log.Printf("FrontAddr: %s, BackendAddr: %s\n", magenta(frontendAddr), magenta(backendAddr))

	// 后端的workers queue
	workersQueue := queue.NewPPQueue()

	// 心跳间隔1s
	heartbeat_at := time.Tick(HEARTBEAT_INTERVAL)

	poller1 := zmq.NewPoller()
	poller1.Add(backend, zmq.POLLIN)

	poller2 := zmq.NewPoller()
	// 前提:
	//     1. 当zeromq通知消息可读时,那么整个Message(所有的msg parts)都可读
	//	   2. 往zeromq写数据时,是异步的,因此也不存在block(除非数据量巨大)
	//
	poller2.Add(backend, zmq.POLLIN)
	poller2.Add(frontend, zmq.POLLIN)

	// 3. 注册zk
	var endpointInfo map[string]interface{} = make(map[string]interface{})
	endpointInfo["frontend"] = frontendAddr
	endpointInfo["backend"] = backendAddr

	topo.AddServiceEndPoint(serviceName, lbServiceName, endpointInfo)

	isAlive := true
	isAliveLock := &sync.RWMutex{}

	go func() {
		servicePath := topo.ProductServicePath(serviceName)
		evtbus := make(chan interface{})
		for true {
			// 只是为了监控状态
			_, err := topo.WatchNode(servicePath, evtbus)

			if err == nil {
				// 等待事件
				e := (<-evtbus).(topozk.Event)
				if e.State == topozk.StateExpired || e.Type == topozk.EventNotWatching {
					// Session过期了,则需要删除之前的数据,因为这个数据的Owner不是当前的Session
					topo.DeleteServiceEndPoint(serviceName, lbServiceName)
					topo.AddServiceEndPoint(serviceName, lbServiceName, endpointInfo)
				}
			} else {
				time.Sleep(time.Second)
			}

			isAliveLock.RLock()
			isAlive1 := isAlive
			isAliveLock.RUnlock()
			if !isAlive1 {
				break
			}

		}
	}()

	ch := make(chan os.Signal, 1)

	signal.Notify(ch, syscall.SIGTERM, syscall.SIGINT, syscall.SIGKILL)
	// syscall.SIGKILL
	// kill -9 pid
	// kill -s SIGKILL pid 还是留给运维吧
	//

	// 自动退出条件:
	//

	var suideTime time.Time

	for {
		var sockets []zmq.Polled
		var err error

		sockets, err = poller2.Poll(HEARTBEAT_INTERVAL)
		if err != nil {
			//			break //  Interrupted
			log.Errorf("Error When Pollling: %v\n", err)
			continue
		}

		hasValidMsg := false
		for _, socket := range sockets {
			switch socket.Socket {
			case backend:
				// 格式:
				// 后端:
				// 	             <"", proxy_id, "", client_id, "", rpc_data>
				// Backend Socket读取到的:
				//		<wokerid, "", proxy_id, "", client_id, "", rpc_data>
				//
				msgs, err := backend.RecvMessage(0)
				if err != nil {
					log.Errorf("Error When RecvMessage from background: %v\n", err)
					continue
				}
				if config.VERBOSE {
					// log.Println("Message from backend: ", msgs)
				}
				// 消息类型:
				// msgs: <worker_id, "", proxy_id, "", client_id, "", rpc_data>
				//       <worker_id, "", rpc_control_data>
				worker_id, msgs := utils.Unwrap(msgs)

				// rpc_control_data 控制信息
				// msgs: <rpc_control_data>
				if len(msgs) == 1 {
					// PPP_READY
					// PPP_HEARTBEAT
					controlMsg := msgs[0]

					// 碰到无效的信息,则直接跳过去
					if len(controlMsg) == 0 {
						continue
					}
					if config.VERBOSE {
						// log.Println("Got Message From Backend...")
					}

					if controlMsg[0] == PPP_READY || controlMsg[0] == PPP_HEARTBEAT {
						// 后端服务剩余的并发能力
						var concurrency int
						if len(controlMsg) >= 3 {
							concurrency = int(controlMsg[2])
						} else {
							concurrency = 1
						}
						if config.VERBOSE {
							// utils.PrintZeromqMsgs(msgs, "control msg")
						}

						force_update := controlMsg[0] == PPP_READY
						workersQueue.UpdateWorkerStatus(worker_id, concurrency, force_update)
					} else if controlMsg[0] == PPP_STOP {
						// 停止指定的后端服务
						workersQueue.UpdateWorkerStatus(worker_id, -1, true)
					} else {
						log.Errorf("Unexpected Control Message: %d", controlMsg[0])
					}
				} else {
					hasValidMsg = true
					// 将信息发送到前段服务, 如果前端服务挂了,则消息就丢失
					//					log.Println("Send Message to frontend")
					workersQueue.UpdateWorkerStatus(worker_id, 0, false)
					// msgs: <proxy_id, "", client_id, "", rpc_data>
					frontend.SendMessage(msgs)
				}
			case frontend:
				hasValidMsg = true
				log.Println("----->Message from front: ")
				msgs, err := frontend.RecvMessage(0)
				if err != nil {
					log.Errorf("Error when reading from frontend: %v\n", err)
					continue
				}

				// msgs:
				// <proxy_id, "", client_id, "", rpc_data>
				if config.VERBOSE {
					utils.PrintZeromqMsgs(msgs, "frontend")
				}
				msgs = utils.TrimLeftEmptyMsg(msgs)

				// 将msgs交给后端服务器
				worker := workersQueue.NextWorker()
				if worker != nil {
					if config.VERBOSE {
						log.Println("Send Msg to Backend worker: ", worker.Identity)
					}
					backend.SendMessage(worker.Identity, "", msgs)
				} else {
					// 怎么返回错误消息呢?
					if config.VERBOSE {
						log.Println("No backend worker found")
					}
					errMsg := proxy.GetWorkerNotFoundData("account", 0)

					// <proxy_id, "", client_id, "", rpc_data>
					frontend.SendMessage(msgs[0:(len(msgs)-1)], errMsg)
				}
			}
		}

		// 如果安排的suiside, 则需要处理 suiside的时间
		isAliveLock.RLock()
		isAlive1 := isAlive
		isAliveLock.RUnlock()

		if !isAlive1 {
			if hasValidMsg {
				suideTime = time.Now().Add(time.Second * 3)
			} else {
				if time.Now().After(suideTime) {
					log.Println(utils.Green("Load Balance Suiside Gracefully"))
					break
				}
			}
		}

		// 心跳同步
		select {
		case <-heartbeat_at:
			now := time.Now()

			// 给workerQueue中的所有的worker发送心跳消息
			for _, worker := range workersQueue.WorkerQueue {
				if worker.Expire.After(now) {
					//					log.Println("Sending Hb to Worker: ", worker.Identity)
					backend.SendMessage(worker.Identity, "", PPP_HEARTBEAT_STR)
				}
			}

			workersQueue.PurgeExpired()
		case sig := <-ch:
			isAliveLock.Lock()
			isAlive1 := isAlive
			isAlive = false
			isAliveLock.Unlock()

			if isAlive1 {
				// 准备退出(但是需要处理完毕手上的活)

				// 需要退出:
				topo.DeleteServiceEndPoint(serviceName, lbServiceName)

				if sig == syscall.SIGKILL {
					log.Println(utils.Red("Got Kill Signal, Return Directly"))
					break
				} else {
					suideTime = time.Now().Add(time.Second * 3)
					log.Println(utils.Red("Schedule to suicide at: "), suideTime.Format("@2006-01-02 15:04:05"))
				}
			}
		default:
		}
	}
}