// RegisterTmp create a ephemeral node, and watch it, if node droped then send a SIGQUIT to self. func RegisterTemp(conn *zk.Conn, fpath string, data []byte) error { tpath, err := conn.Create(path.Join(fpath)+"/", data, zk.FlagEphemeral|zk.FlagSequence, zk.WorldACL(zk.PermAll)) if err != nil { log.Error("conn.Create(\"%s\", \"%s\", zk.FlagEphemeral|zk.FlagSequence) error(%v)", fpath, string(data), err) return err } log.Debug("create a zookeeper node:%s", tpath) // watch self go func() { for { log.Info("zk path: \"%s\" set a watch", tpath) exist, _, watch, err := conn.ExistsW(tpath) if err != nil { log.Error("zk.ExistsW(\"%s\") error(%v)", tpath, err) log.Warn("zk path: \"%s\" set watch failed, kill itself", tpath) killSelf() return } if !exist { log.Warn("zk path: \"%s\" not exist, kill itself", tpath) killSelf() return } event := <-watch log.Info("zk path: \"%s\" receive a event %v", tpath, event) } }() return nil }
func main() { var err error // Parse cmd-line arguments flag.Parse() log.Info("web ver: \"%s\" start", ver.Version) if err = InitConfig(); err != nil { panic(err) } // Set max routine runtime.GOMAXPROCS(Conf.MaxProc) // init log log.LoadConfiguration(Conf.Log) defer log.Close() // init zookeeper zkConn, err := InitZK() if err != nil { if zkConn != nil { zkConn.Close() } panic(err) } // start pprof http perf.Init(Conf.PprofBind) // start http listen. StartHTTP() // process init if err = process.Init(Conf.User, Conf.Dir, Conf.PidFile); err != nil { panic(err) } // init signals, block wait signals signalCH := InitSignal() HandleSignal(signalCH) log.Info("web stop") }
// Get a user channel from ChannleList. func (l *ChannelList) Get(key string, newOne bool) (Channel, error) { // validate if err := l.validate(key); err != nil { return nil, err } // get a channel bucket b := l.Bucket(key) b.Lock() if c, ok := b.Data[key]; !ok { if !Conf.Auth && newOne { c = NewSeqChannel() b.Data[key] = c b.Unlock() ChStat.IncrCreate() log.Info("user_key:\"%s\" create a new channel", key) return c, nil } else { b.Unlock() log.Warn("user_key:\"%s\" channle not exists", key) return nil, ErrChannelNotExist } } else { b.Unlock() ChStat.IncrAccess() log.Info("user_key:\"%s\" refresh channel bucket expire time", key) return c, nil } }
// StartHTTP start listen http. func StartHTTP() { // external httpServeMux := http.NewServeMux() // 2 httpServeMux.HandleFunc("/2/server/get", GetServer2) // 1.0 httpServeMux.HandleFunc("/1/server/get", GetServer) httpServeMux.HandleFunc("/1/msg/get", GetOfflineMsg) httpServeMux.HandleFunc("/1/time/get", GetTime) // old httpServeMux.HandleFunc("/server/get", GetServer0) httpServeMux.HandleFunc("/msg/get", GetOfflineMsg0) httpServeMux.HandleFunc("/time/get", GetTime0) // internal httpAdminServeMux := http.NewServeMux() // 1.0 httpAdminServeMux.HandleFunc("/1/admin/push/private", PushPrivate) httpAdminServeMux.HandleFunc("/1/admin/push/mprivate", PushMultiPrivate) httpAdminServeMux.HandleFunc("/1/admin/msg/del", DelPrivate) // old httpAdminServeMux.HandleFunc("/admin/push", PushPrivate) httpAdminServeMux.HandleFunc("/admin/msg/clean", DelPrivate) for _, bind := range Conf.HttpBind { log.Info("start http listen addr:\"%s\"", bind) go httpListen(httpServeMux, bind) } for _, bind := range Conf.AdminBind { log.Info("start admin http listen addr:\"%s\"", bind) go httpListen(httpAdminServeMux, bind) } }
// Migrate migrate portion of connections which don't belong to this comet. func (l *ChannelList) Migrate(nw map[string]int) (err error) { migrate := false // check new/update node for k, v := range nw { weight, ok := nodeWeightMap[k] // not found or weight change if !ok || weight != v { migrate = true break } } // check del node if !migrate { for k, _ := range nodeWeightMap { // node deleted if _, ok := nw[k]; !ok { migrate = true break } } } if !migrate { return } // init ketama ring := ketama.NewRing(ketama.Base) for node, weight := range nw { ring.AddNode(node, weight) } ring.Bake() // atomic update nodeWeightMap = nw CometRing = ring // get all the channel lock channels := []Channel{} for i, c := range l.Channels { c.Lock() for k, v := range c.Data { hn := ring.Hash(k) if hn != Conf.ZookeeperCometNode { channels = append(channels, v) delete(c.Data, k) log.Debug("migrate delete channel key \"%s\"", k) } } c.Unlock() log.Debug("migrate channel bucket:%d finished", i) } // close all the migrate channels log.Info("close all the migrate channels") for _, channel := range channels { if err := channel.Close(); err != nil { log.Error("channel.Close() error(%v)", err) continue } } log.Info("close all the migrate channels finished") return }
// watchNode watch a named node for leader selection when failover func watchCometNode(conn *zk.Conn, node, fpath string, retry, ping time.Duration, ch chan *CometNodeEvent) { fpath = path.Join(fpath, node) for { nodes, watch, err := myzk.GetNodesW(conn, fpath) if err == myzk.ErrNodeNotExist { log.Warn("zk don't have node \"%s\"", fpath) break } else if err == myzk.ErrNoChild { log.Warn("zk don't have any children in \"%s\", retry in %d second", fpath, waitNodeDelay) time.Sleep(waitNodeDelaySecond) continue } else if err != nil { log.Error("zk path: \"%s\" getNodes error(%v), retry in %d second", fpath, err, waitNodeDelay) time.Sleep(waitNodeDelaySecond) continue } // leader selection sort.Strings(nodes) if info, err := registerCometNode(conn, nodes[0], fpath, retry, ping, true); err != nil { log.Error("zk path: \"%s\" registerCometNode error(%v)", fpath, err) time.Sleep(waitNodeDelaySecond) continue } else { // update node info ch <- &CometNodeEvent{Event: eventNodeUpdate, Key: node, Value: info} } // blocking receive event event := <-watch log.Info("zk path: \"%s\" receive a event: (%v)", fpath, event) } // WARN, if no persistence node and comet rpc not config log.Warn("zk path: \"%s\" never watch again till recreate", fpath) }
// start stats, called at process start func StartStats() { startTime = time.Now().UnixNano() for _, bind := range Conf.StatBind { log.Info("start stat listen addr:\"%s\"", bind) go statListen(bind) } }
func main() { // parse cmd-line arguments flag.Parse() log.Info("comet ver: \"%s\" start", ver.Version) // init config if err := InitConfig(); err != nil { panic(err) } // set max routine runtime.GOMAXPROCS(Conf.MaxProc) // init log log.LoadConfiguration(Conf.Log) defer log.Close() // start pprof perf.Init(Conf.PprofBind) // create channel // if process exit, close channel UserChannel = NewChannelList() defer UserChannel.Close() // start stats StartStats() // start rpc if err := StartRPC(); err != nil { panic(err) } // start comet if err := StartComet(); err != nil { panic(err) } // init zookeeper zkConn, err := InitZK() if err != nil { if zkConn != nil { zkConn.Close() } panic(err) } // process init if err = process.Init(Conf.User, Conf.Dir, Conf.PidFile); err != nil { panic(err) } // init signals, block wait signals signalCH := InitSignal() HandleSignal(signalCH) // exit log.Info("comet stop") }
// StartHttp start http listen. func StartWebsocket() error { for _, bind := range Conf.WebsocketBind { log.Info("start websocket listen addr:\"%s\"", bind) go websocketListen(bind) } return nil }
// StartTCP Start tcp listen. func StartTCP() error { for _, bind := range Conf.TCPBind { log.Info("start tcp listen addr:\"%s\"", bind) go tcpListen(bind) } return nil }
// StartRPC start rpc listen. func StartRPC() error { c := &CometRPC{} rpc.Register(c) for _, bind := range Conf.RPCBind { log.Info("start listen rpc addr: \"%s\"", bind) go rpcListen(bind) } return nil }
// InitRPC start accept rpc call. func InitRPC() error { msg := &MessageRPC{} rpc.Register(msg) for _, bind := range Conf.RPCBind { log.Info("start rpc listen addr: \"%s\"", bind) go rpcListen(bind) } return nil }
func tcpListen(bind string) { addr, err := net.ResolveTCPAddr("tcp", bind) if err != nil { log.Error("net.ResolveTCPAddr(\"tcp\"), %s) error(%v)", bind, err) panic(err) } l, err := net.ListenTCP("tcp", addr) if err != nil { log.Error("net.ListenTCP(\"tcp4\", \"%s\") error(%v)", bind, err) panic(err) } // free the listener resource defer func() { log.Info("tcp addr: \"%s\" close", bind) if err := l.Close(); err != nil { log.Error("listener.Close() error(%v)", err) } }() // init reader buffer instance rb := newtcpBufCache() for { log.Debug("start accept") conn, err := l.AcceptTCP() if err != nil { log.Error("listener.AcceptTCP() error(%v)", err) continue } if err = conn.SetKeepAlive(Conf.TCPKeepalive); err != nil { log.Error("conn.SetKeepAlive() error(%v)", err) conn.Close() continue } if err = conn.SetReadBuffer(Conf.RcvbufSize); err != nil { log.Error("conn.SetReadBuffer(%d) error(%v)", Conf.RcvbufSize, err) conn.Close() continue } if err = conn.SetWriteBuffer(Conf.SndbufSize); err != nil { log.Error("conn.SetWriteBuffer(%d) error(%v)", Conf.SndbufSize, err) conn.Close() continue } // first packet must sent by client in specified seconds if err = conn.SetReadDeadline(time.Now().Add(fitstPacketTimedoutSec)); err != nil { log.Error("conn.SetReadDeadLine() error(%v)", err) conn.Close() continue } rc := rb.Get() // one connection one routine go handleTCPConn(conn, rc) log.Debug("accept finished") } }
// watchMessageRoot watch the message root path. func watchMessageRoot(conn *zk.Conn, fpath string, ch chan *MessageNodeEvent) error { for { nodes, watch, err := myzk.GetNodesW(conn, fpath) if err == myzk.ErrNodeNotExist { log.Warn("zk don't have node \"%s\", retry in %d second", fpath, waitNodeDelay) time.Sleep(waitNodeDelaySecond) continue } else if err == myzk.ErrNoChild { log.Warn("zk don't have any children in \"%s\", retry in %d second", fpath, waitNodeDelay) // all child died, kick all the nodes for _, client := range MessageRPC.Clients { log.Debug("node: \"%s\" send del node event", client.Addr) ch <- &MessageNodeEvent{Event: eventNodeDel, Key: &WeightRpc{Addr: client.Addr, Weight: client.Weight}} } time.Sleep(waitNodeDelaySecond) continue } else if err != nil { log.Error("getNodes error(%v), retry in %d second", err, waitNodeDelay) time.Sleep(waitNodeDelaySecond) continue } nodesMap := map[string]bool{} // handle new add nodes for _, node := range nodes { data, _, err := conn.Get(path.Join(fpath, node)) if err != nil { log.Error("zk.Get(\"%s\") error(%v)", path.Join(fpath, node), err) continue } // parse message node info nodeInfo := &MessageNodeInfo{} if err := json.Unmarshal(data, nodeInfo); err != nil { log.Error("json.Unmarshal(\"%s\", nodeInfo) error(%v)", string(data), err) continue } for _, addr := range nodeInfo.Rpc { // if not exists in old map then trigger a add event if _, ok := MessageRPC.Clients[addr]; !ok { ch <- &MessageNodeEvent{Event: eventNodeAdd, Key: &WeightRpc{Addr: addr, Weight: nodeInfo.Weight}} } nodesMap[addr] = true } } // handle delete nodes for _, client := range MessageRPC.Clients { if _, ok := nodesMap[client.Addr]; !ok { ch <- &MessageNodeEvent{Event: eventNodeDel, Key: client} } } // blocking wait node changed event := <-watch log.Info("zk path: \"%s\" receive a event %v", fpath, event) } }
// handleNodeEvent add and remove MessageRPC.Clients, copy the src map to a new map then replace the variable. func handleMessageNodeEvent(conn *zk.Conn, retry, ping time.Duration, ch chan *MessageNodeEvent) { for { ev := <-ch // copy map from src tmpMessageRPCMap := make(map[string]*WeightRpc, len(MessageRPC.Clients)) for k, v := range MessageRPC.Clients { tmpMessageRPCMap[k] = &WeightRpc{Client: v.Client, Addr: v.Addr, Weight: v.Weight} // reuse rpc connection v.Client = nil } // handle event if ev.Event == eventNodeAdd { log.Info("add message rpc node: \"%s\"", ev.Key.Addr) rpcTmp, err := rpc.Dial("tcp", ev.Key.Addr) if err != nil { log.Error("rpc.Dial(\"tcp\", \"%s\") error(%v)", ev.Key, err) log.Warn("discard message rpc node: \"%s\", connect failed", ev.Key) continue } ev.Key.Client = rpcTmp tmpMessageRPCMap[ev.Key.Addr] = ev.Key } else if ev.Event == eventNodeDel { log.Info("del message rpc node: \"%s\"", ev.Key.Addr) delete(tmpMessageRPCMap, ev.Key.Addr) } else { log.Error("unknown node event: %d", ev.Event) panic("unknown node event") } tmpMessageRPC, err := NewRandLB(tmpMessageRPCMap, MessageService, retry, ping, true) if err != nil { log.Error("NewRandLR() error(%v)", err) panic(err) } oldMessageRPC := MessageRPC // atomic update MessageRPC = tmpMessageRPC // release resource oldMessageRPC.Destroy() log.Debug("MessageRPC.Client length: %d", len(MessageRPC.Clients)) } }
// registerCometNode get infomation of comet node func registerCometNode(conn *zk.Conn, node, fpath string, retry, ping time.Duration, startPing bool) (info *CometNodeInfo, err error) { // get current node info from zookeeper fpath = path.Join(fpath, node) data, _, err := conn.Get(fpath) if err != nil { log.Error("zk.Get(\"%s\") error(%v)", fpath, err) return } info = &CometNodeInfo{} if err = json.Unmarshal(data, info); err != nil { log.Error("json.Unmarshal(\"%s\", nodeData) error(%v)", string(data), err) return } if len(info.RpcAddr) == 0 { log.Error("zk nodes: \"%s\" don't have rpc addr", fpath) err = ErrCometRPC return } // get old node info for finding the old rpc connection oldInfo := cometNodeInfoMap[node] // init comet rpc clients := make(map[string]*WeightRpc, len(info.RpcAddr)) for _, addr := range info.RpcAddr { var ( r *rpc.Client ) if oldInfo != nil && oldInfo.Rpc != nil { if wr, ok := oldInfo.Rpc.Clients[addr]; ok && wr.Client != nil { // reuse the rpc connection must let old client = nil, avoid reclose rpc. oldInfo.Rpc.Clients[addr].Client = nil r = wr.Client } } if r == nil { if r, err = rpc.Dial("tcp", addr); err != nil { log.Error("rpc.Dial(\"%s\") error(%v)", addr, err) return } log.Debug("node:%s addr:%s rpc reconnect", node, addr) } clients[addr] = &WeightRpc{Weight: 1, Addr: addr, Client: r} } // comet rpc use rand load balance lb, err := NewRandLB(clients, cometService, retry, ping, startPing) if err != nil { log.Error("NewRandLR() error(%v)", err) return } info.Rpc = lb log.Info("zk path: \"%s\" register nodes: \"%s\"", fpath, node) return }
// New create a user channle. func (l *ChannelList) New(key string) (Channel, *ChannelBucket, error) { // validate if err := l.validate(key); err != nil { return nil, nil, err } // get a channel bucket b := l.Bucket(key) b.Lock() if c, ok := b.Data[key]; ok { b.Unlock() ChStat.IncrAccess() log.Info("user_key:\"%s\" refresh channel bucket expire time", key) return c, b, nil } else { c = NewSeqChannel() b.Data[key] = c b.Unlock() ChStat.IncrCreate() log.Info("user_key:\"%s\" create a new channel", key) return c, b, nil } }
// RemoveConn implements the Channel RemoveConn method. func (c *SeqChannel) RemoveConn(key string, e *hlist.Element) error { c.mutex.Lock() tmp := c.conn.Remove(e) c.mutex.Unlock() conn, ok := tmp.(*Connection) if !ok { return ErrAssectionConn } close(conn.Buf) ConnStat.IncrRemove() log.Info("user_key:\"%s\" remove conn = %d", key, c.conn.Len()) return nil }
func main() { flag.Parse() log.Info("message ver: \"%s\" start", ver.Version) if err := InitConfig(); err != nil { panic(err) } // Set max routine runtime.GOMAXPROCS(Conf.MaxProc) // init log log.LoadConfiguration(Conf.Log) defer log.Close() // start pprof http perf.Init(Conf.PprofBind) // Initialize redis if err := InitStorage(); err != nil { panic(err) } // init rpc service if err := InitRPC(); err != nil { panic(err) } // init zookeeper zk, err := InitZK() if err != nil { if zk != nil { zk.Close() } panic(err) } // process init if err = process.Init(Conf.User, Conf.Dir, Conf.PidFile); err != nil { panic(err) } // init signals, block wait signals sig := InitSignal() HandleSignal(sig) // exit log.Info("message stop") }
// getConn get the connection of matching with key using ketama hash func (s *MySQLStorage) getConn(key string) *sql.DB { if len(s.pool) == 0 { return nil } node := s.ring.Hash(key) p, ok := s.pool[node] if !ok { log.Warn("user_key: \"%s\" hit mysql node: \"%s\" not in pool", key, node) return nil } log.Info("user_key: \"%s\" hit mysql node: \"%s\"", key, node) return p }
// clean delete expired messages peroridly. func (s *MySQLStorage) clean() { for { log.Info("clean mysql expired message start") now := time.Now().Unix() affect := int64(0) for _, db := range s.pool { res, err := db.Exec(delExpiredPrivateMsgSQL, now) if err != nil { log.Error("db.Exec(\"%s\", %d) failed (%v)", delExpiredPrivateMsgSQL, now, err) continue } aff, err := res.RowsAffected() if err != nil { log.Error("res.RowsAffected() error(%v)", err) continue } affect += aff } log.Info("clean mysql expired message finish, num: %d", affect) time.Sleep(Conf.MySQLClean) } }
// retPWrite marshal the result and write to client(post). func retPWrite(w http.ResponseWriter, r *http.Request, res map[string]interface{}, body *string, start time.Time) { data, err := json.Marshal(res) if err != nil { log.Error("json.Marshal(\"%v\") error(%v)", res, err) return } dataStr := string(data) if n, err := w.Write([]byte(dataStr)); err != nil { log.Error("w.Write(\"%s\") error(%v)", dataStr, err) } else { log.Debug("w.Write(\"%s\") write %d bytes", dataStr, n) } log.Info("req: \"%s\", post: \"%s\", res:\"%s\", ip:\"%s\", time:\"%fs\"", r.URL.String(), *body, dataStr, r.RemoteAddr, time.Now().Sub(start).Seconds()) }
func rpcListen(bind string) { l, err := net.Listen("tcp", bind) if err != nil { log.Error("net.Listen(\"tcp\", \"%s\") error(%v)", bind, err) panic(err) } // if process exit, then close the rpc bind defer func() { log.Info("rpc addr: \"%s\" close", bind) if err := l.Close(); err != nil { log.Error("listener.Close() error(%v)", err) } }() rpc.Accept(l) }
// HandleSignal fetch signal from chan then do exit or reload. func HandleSignal(c chan os.Signal) { // Block until a signal is received. for { s := <-c log.Info("get a signal %s", s.String()) switch s { case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP, syscall.SIGINT: return case syscall.SIGHUP: // TODO reload //return default: return } } }
// Delete a user channel from ChannleList. func (l *ChannelList) Delete(key string) (Channel, error) { // get a channel bucket b := l.Bucket(key) b.Lock() if c, ok := b.Data[key]; !ok { b.Unlock() log.Warn("user_key:\"%s\" delete channle not exists", key) return nil, ErrChannelNotExist } else { delete(b.Data, key) b.Unlock() ChStat.IncrDelete() log.Info("user_key:\"%s\" delete channel", key) return c, nil } }
// Close close all channel. func (l *ChannelList) Close() { log.Info("channel close") chs := make([]Channel, 0, l.Count()) for _, c := range l.Channels { c.Lock() for _, c := range c.Data { chs = append(chs, c) } c.Unlock() } // close all channels for _, c := range chs { if err := c.Close(); err != nil { log.Error("c.Close() error(%v)", err) } } }
// DelPrivate implements the Storage DelPrivate method. func (s *MySQLStorage) DelPrivate(key string) error { db := s.getConn(key) if db == nil { return ErrNoMySQLConn } res, err := db.Exec(delPrivateMsgSQL, key) if err != nil { log.Error("db.Exec(\"%s\", \"%s\") error(%v)", delPrivateMsgSQL, key, err) return err } rows, err := res.RowsAffected() if err != nil { log.Error("res.RowsAffected() error(%v)", err) return err } log.Info("user_key: \"%s\" clean message num: %d", rows) return nil }
// retWrite marshal the result and write to client(get). func retWrite(w http.ResponseWriter, r *http.Request, res map[string]interface{}, callback string, start time.Time) { data, err := json.Marshal(res) if err != nil { log.Error("json.Marshal(\"%v\") error(%v)", res, err) return } dataStr := "" if callback == "" { // Normal json dataStr = string(data) } else { // Jsonp dataStr = fmt.Sprintf("%s(%s)", callback, string(data)) } if n, err := w.Write([]byte(dataStr)); err != nil { log.Error("w.Write(\"%s\") error(%v)", dataStr, err) } else { log.Debug("w.Write(\"%s\") write %d bytes", dataStr, n) } log.Info("req: \"%s\", res:\"%s\", ip:\"%s\", time:\"%fs\"", r.URL.String(), dataStr, r.RemoteAddr, time.Now().Sub(start).Seconds()) }
// AddConn implements the Channel AddConn method. func (c *SeqChannel) AddConn(key string, conn *Connection) (*hlist.Element, error) { c.mutex.Lock() if c.conn.Len()+1 > Conf.MaxSubscriberPerChannel { c.mutex.Unlock() log.Error("user_key:\"%s\" exceed conn", key) return nil, ErrMaxConn } // send first heartbeat to tell client service is ready for accept heartbeat if _, err := conn.Conn.Write(HeartbeatReply); err != nil { c.mutex.Unlock() log.Error("user_key:\"%s\" write first heartbeat to client error(%v)", key, err) return nil, err } // add conn conn.Buf = make(chan []byte, Conf.MsgBufNum) conn.HandleWrite(key) e := c.conn.PushFront(conn) c.mutex.Unlock() ConnStat.IncrAdd() log.Info("user_key:\"%s\" add conn = %d", key, c.conn.Len()) return e, nil }
// watchCometRoot watch the gopush root node for detecting the node add/del. func watchCometRoot(conn *zk.Conn, fpath string, ch chan *CometNodeEvent) error { for { nodes, watch, err := myzk.GetNodesW(conn, fpath) if err == myzk.ErrNodeNotExist { log.Warn("zk don't have node \"%s\", retry in %d second", fpath, waitNodeDelay) time.Sleep(waitNodeDelaySecond) continue } else if err == myzk.ErrNoChild { log.Warn("zk don't have any children in \"%s\", retry in %d second", fpath, waitNodeDelay) for node, _ := range cometNodeInfoMap { ch <- &CometNodeEvent{Event: eventNodeDel, Key: node} } time.Sleep(waitNodeDelaySecond) continue } else if err != nil { log.Error("getNodes error(%v), retry in %d second", err, waitNodeDelay) time.Sleep(waitNodeDelaySecond) continue } nodesMap := map[string]bool{} // handle new add nodes for _, node := range nodes { if _, ok := cometNodeInfoMap[node]; !ok { ch <- &CometNodeEvent{Event: eventNodeAdd, Key: node} } nodesMap[node] = true } // handle delete nodes for node, _ := range cometNodeInfoMap { if _, ok := nodesMap[node]; !ok { ch <- &CometNodeEvent{Event: eventNodeDel, Key: node} } } event := <-watch log.Info("zk path: \"%s\" receive a event %v", fpath, event) } }