Пример #1
0
//
// 删除Service Endpoint
//
func (s *ServiceEndpoint) DeleteServiceEndpoint(top *zk.Topology) {
	path := top.ProductServiceEndPointPath(s.Service, s.ServiceId)
	if ok, _ := top.Exist(path); ok {
		zkhelper.DeleteRecursive(top.ZkConn, path, -1)
		log.Println(Red("DeleteServiceEndpoint"), "Path: ", path)
	}
}
Пример #2
0
func GetServiceEndpoint(top *zk.Topology, service string, serviceId string) (endpoint *ServiceEndpoint, err error) {

	path := top.ProductServiceEndPointPath(service, serviceId)
	data, _, err := top.ZkConn.Get(path)
	if err != nil {
		return nil, err
	}
	endpoint = &ServiceEndpoint{}
	err = json.Unmarshal(data, endpoint)
	if err != nil {
		return nil, err
	} else {
		return endpoint, nil
	}
}
Пример #3
0
// 创建一个BackService
func NewBackService(serviceName string, poller *zmq.Poller, topo *zk.Topology) *BackService {

	backSockets := NewBackSockets(poller)

	service := &BackService{
		ServiceName: serviceName,
		backend:     backSockets,
		poller:      poller,
		topo:        topo,
	}

	var evtbus chan interface{} = make(chan interface{}, 2)
	servicePath := topo.ProductServicePath(serviceName)
	endpoints, err := topo.WatchChildren(servicePath, evtbus)
	if err != nil {
		log.Println("Error: ", err)
		panic("Reading Service List Failed: ")
	}

	go func() {
		for true {
			// 如何监听endpoints的变化呢?
			addrSet := make(map[string]bool)
			nowStr := time.Now().Format("@2006-01-02 15:04:05")
			for _, endpoint := range endpoints {
				// 这些endpoint变化该如何处理呢?
				log.Println(utils.Green("---->Find Endpoint: "), endpoint, "For Service: ", serviceName)
				endpointInfo, _ := topo.GetServiceEndPoint(serviceName, endpoint)

				addr, ok := endpointInfo["frontend"]
				if ok {
					addrStr := addr.(string)
					log.Println(utils.Green("---->Add endpoint to backend: "), addrStr, nowStr, "For Service: ", serviceName)
					addrSet[addrStr] = true
				}
			}

			service.backend.UpdateEndpointAddrs(addrSet)

			// 等待事件
			<-evtbus
			// 读取数据,继续监听
			endpoints, err = topo.WatchChildren(servicePath, evtbus)
		}
	}()

	ticker := time.NewTicker(time.Millisecond * 1000)
	go func() {
		for _ = range ticker.C {
			service.backend.PurgeEndpoints()
		}
	}()

	return service

}
Пример #4
0
//
// 注册一个服务的Endpoints
//
func (s *ServiceEndpoint) AddServiceEndpoint(topo *zk.Topology) error {
	path := topo.ProductServiceEndPointPath(s.Service, s.ServiceId)
	data, err := json.Marshal(s)
	if err != nil {
		return err
	}

	// 创建Service(XXX: Service本身不包含数据)
	zk.CreateRecursive(topo.ZkConn, os_path.Dir(path), "", 0, zkhelper.DefaultDirACLs())

	// 当前的Session挂了,服务就下线
	// topo.FlagEphemeral

	// 参考: https://www.box.com/blog/a-gotcha-when-using-zookeeper-ephemeral-nodes/
	// 如果之前的Session信息还存在,则先删除;然后再添加
	topo.ZkConn.Delete(path, -1)
	var pathCreated string
	pathCreated, err = topo.ZkConn.Create(path, []byte(data), int32(zookeeper.FlagEphemeral), zkhelper.DefaultFileACLs())

	log.Println(Green("AddServiceEndpoint"), "Path: ", pathCreated, ", Error: ", err)
	return err
}
Пример #5
0
func NewBackServices(poller *zmq.Poller, productName string, topo *zk.Topology) *BackServices {

	// 创建BackServices
	result := &BackServices{
		Services:        make(map[string]*BackService),
		OfflineServices: make(map[string]*BackService),
		poller:          poller,
		topo:            topo,
	}

	var evtbus chan interface{} = make(chan interface{}, 2)
	servicesPath := topo.ProductServicesPath()
	path, e1 := topo.CreateDir(servicesPath) // 保证Service目录存在,否则会报错
	fmt.Println("Path: ", path, "error: ", e1)
	services, err := topo.WatchChildren(servicesPath, evtbus)
	if err != nil {
		log.Println("Error: ", err)
		// TODO: 这个地方需要优化
		panic("Reading Service List Failed")
	}

	go func() {
		for true {

			result.Lock()
			for _, service := range services {
				log.Println("Service: ", service)
				if _, ok := result.Services[service]; !ok {
					result.addBackService(service)
				}
			}
			result.Unlock()

			// 等待事件
			<-evtbus
			// 读取数据,继续监听(连接过期了就过期了,再次Watch即可)
			services, err = topo.WatchChildren(servicesPath, evtbus)
		}
	}()

	// 读取zk, 等待
	log.Println("ProductName: ", result.topo.ProductName)

	return result
}
Пример #6
0
func mainBody(zkAddr string, productName string, serviceName string, frontendAddr string, backendAddr string) {
	// 1. 创建到zk的连接
	var topo *zk.Topology
	topo = zk.NewTopology(productName, zkAddr)

	// 2. 启动服务
	frontend, _ := zmq.NewSocket(zmq.ROUTER)
	backend, _ := zmq.NewSocket(zmq.ROUTER)
	defer frontend.Close()
	defer backend.Close()

	// ROUTER/ROUTER绑定到指定的端口

	// tcp://127.0.0.1:5555 --> tcp://127_0_0_1:5555
	lbServiceName := GetServiceIdentity(frontendAddr)

	frontend.SetIdentity(lbServiceName)
	frontend.Bind(frontendAddr) //  For clients "tcp://*:5555"
	backend.Bind(backendAddr)   //  For workers "tcp://*:5556"

	log.Printf("FrontAddr: %s, BackendAddr: %s\n", magenta(frontendAddr), magenta(backendAddr))

	// 后端的workers queue
	workersQueue := queue.NewPPQueue()

	// 心跳间隔1s
	heartbeat_at := time.Tick(HEARTBEAT_INTERVAL)

	poller1 := zmq.NewPoller()
	poller1.Add(backend, zmq.POLLIN)

	poller2 := zmq.NewPoller()
	// 前提:
	//     1. 当zeromq通知消息可读时,那么整个Message(所有的msg parts)都可读
	//	   2. 往zeromq写数据时,是异步的,因此也不存在block(除非数据量巨大)
	//
	poller2.Add(backend, zmq.POLLIN)
	poller2.Add(frontend, zmq.POLLIN)

	// 3. 注册zk
	var endpointInfo map[string]interface{} = make(map[string]interface{})
	endpointInfo["frontend"] = frontendAddr
	endpointInfo["backend"] = backendAddr

	topo.AddServiceEndPoint(serviceName, lbServiceName, endpointInfo)

	isAlive := true
	isAliveLock := &sync.RWMutex{}

	go func() {
		servicePath := topo.ProductServicePath(serviceName)
		evtbus := make(chan interface{})
		for true {
			// 只是为了监控状态
			_, err := topo.WatchNode(servicePath, evtbus)

			if err == nil {
				// 等待事件
				e := (<-evtbus).(topozk.Event)
				if e.State == topozk.StateExpired || e.Type == topozk.EventNotWatching {
					// Session过期了,则需要删除之前的数据,因为这个数据的Owner不是当前的Session
					topo.DeleteServiceEndPoint(serviceName, lbServiceName)
					topo.AddServiceEndPoint(serviceName, lbServiceName, endpointInfo)
				}
			} else {
				time.Sleep(time.Second)
			}

			isAliveLock.RLock()
			isAlive1 := isAlive
			isAliveLock.RUnlock()
			if !isAlive1 {
				break
			}

		}
	}()

	ch := make(chan os.Signal, 1)

	signal.Notify(ch, syscall.SIGTERM, syscall.SIGINT, syscall.SIGKILL)
	// syscall.SIGKILL
	// kill -9 pid
	// kill -s SIGKILL pid 还是留给运维吧
	//

	// 自动退出条件:
	//

	var suideTime time.Time

	for {
		var sockets []zmq.Polled
		var err error

		sockets, err = poller2.Poll(HEARTBEAT_INTERVAL)
		if err != nil {
			//			break //  Interrupted
			log.Errorf("Error When Pollling: %v\n", err)
			continue
		}

		hasValidMsg := false
		for _, socket := range sockets {
			switch socket.Socket {
			case backend:
				// 格式:
				// 后端:
				// 	             <"", proxy_id, "", client_id, "", rpc_data>
				// Backend Socket读取到的:
				//		<wokerid, "", proxy_id, "", client_id, "", rpc_data>
				//
				msgs, err := backend.RecvMessage(0)
				if err != nil {
					log.Errorf("Error When RecvMessage from background: %v\n", err)
					continue
				}
				if config.VERBOSE {
					// log.Println("Message from backend: ", msgs)
				}
				// 消息类型:
				// msgs: <worker_id, "", proxy_id, "", client_id, "", rpc_data>
				//       <worker_id, "", rpc_control_data>
				worker_id, msgs := utils.Unwrap(msgs)

				// rpc_control_data 控制信息
				// msgs: <rpc_control_data>
				if len(msgs) == 1 {
					// PPP_READY
					// PPP_HEARTBEAT
					controlMsg := msgs[0]

					// 碰到无效的信息,则直接跳过去
					if len(controlMsg) == 0 {
						continue
					}
					if config.VERBOSE {
						// log.Println("Got Message From Backend...")
					}

					if controlMsg[0] == PPP_READY || controlMsg[0] == PPP_HEARTBEAT {
						// 后端服务剩余的并发能力
						var concurrency int
						if len(controlMsg) >= 3 {
							concurrency = int(controlMsg[2])
						} else {
							concurrency = 1
						}
						if config.VERBOSE {
							// utils.PrintZeromqMsgs(msgs, "control msg")
						}

						force_update := controlMsg[0] == PPP_READY
						workersQueue.UpdateWorkerStatus(worker_id, concurrency, force_update)
					} else if controlMsg[0] == PPP_STOP {
						// 停止指定的后端服务
						workersQueue.UpdateWorkerStatus(worker_id, -1, true)
					} else {
						log.Errorf("Unexpected Control Message: %d", controlMsg[0])
					}
				} else {
					hasValidMsg = true
					// 将信息发送到前段服务, 如果前端服务挂了,则消息就丢失
					//					log.Println("Send Message to frontend")
					workersQueue.UpdateWorkerStatus(worker_id, 0, false)
					// msgs: <proxy_id, "", client_id, "", rpc_data>
					frontend.SendMessage(msgs)
				}
			case frontend:
				hasValidMsg = true
				log.Println("----->Message from front: ")
				msgs, err := frontend.RecvMessage(0)
				if err != nil {
					log.Errorf("Error when reading from frontend: %v\n", err)
					continue
				}

				// msgs:
				// <proxy_id, "", client_id, "", rpc_data>
				if config.VERBOSE {
					utils.PrintZeromqMsgs(msgs, "frontend")
				}
				msgs = utils.TrimLeftEmptyMsg(msgs)

				// 将msgs交给后端服务器
				worker := workersQueue.NextWorker()
				if worker != nil {
					if config.VERBOSE {
						log.Println("Send Msg to Backend worker: ", worker.Identity)
					}
					backend.SendMessage(worker.Identity, "", msgs)
				} else {
					// 怎么返回错误消息呢?
					if config.VERBOSE {
						log.Println("No backend worker found")
					}
					errMsg := proxy.GetWorkerNotFoundData("account", 0)

					// <proxy_id, "", client_id, "", rpc_data>
					frontend.SendMessage(msgs[0:(len(msgs)-1)], errMsg)
				}
			}
		}

		// 如果安排的suiside, 则需要处理 suiside的时间
		isAliveLock.RLock()
		isAlive1 := isAlive
		isAliveLock.RUnlock()

		if !isAlive1 {
			if hasValidMsg {
				suideTime = time.Now().Add(time.Second * 3)
			} else {
				if time.Now().After(suideTime) {
					log.Println(utils.Green("Load Balance Suiside Gracefully"))
					break
				}
			}
		}

		// 心跳同步
		select {
		case <-heartbeat_at:
			now := time.Now()

			// 给workerQueue中的所有的worker发送心跳消息
			for _, worker := range workersQueue.WorkerQueue {
				if worker.Expire.After(now) {
					//					log.Println("Sending Hb to Worker: ", worker.Identity)
					backend.SendMessage(worker.Identity, "", PPP_HEARTBEAT_STR)
				}
			}

			workersQueue.PurgeExpired()
		case sig := <-ch:
			isAliveLock.Lock()
			isAlive1 := isAlive
			isAlive = false
			isAliveLock.Unlock()

			if isAlive1 {
				// 准备退出(但是需要处理完毕手上的活)

				// 需要退出:
				topo.DeleteServiceEndPoint(serviceName, lbServiceName)

				if sig == syscall.SIGKILL {
					log.Println(utils.Red("Got Kill Signal, Return Directly"))
					break
				} else {
					suideTime = time.Now().Add(time.Second * 3)
					log.Println(utils.Red("Schedule to suicide at: "), suideTime.Format("@2006-01-02 15:04:05"))
				}
			}
		default:
		}
	}
}