func (c *ClusterManager) getSelf() *api.Node { var node = api.Node{} // Get physical node info. node.Id = c.config.NodeId node.Status = api.StatusOk node.Ip, _ = externalIp() node.Timestamp = time.Now() return &node }
func (c *ClusterManager) updateClusterStatus() { gossipStoreKey := types.StoreKey(heartbeatKey + c.config.ClusterId) for { node := c.getCurrentState() c.nodeCache[node.Id] = *node // Process heartbeats from other nodes... gossipValues := c.gossip.GetStoreKeyValue(gossipStoreKey) numNodes := 0 for id, nodeInfo := range gossipValues { numNodes = numNodes + 1 // Check to make sure we are not exceeding the size of the cluster. if c.size > 0 && numNodes > c.size { dlog.Fatalf("Fatal, number of nodes in the cluster has"+ "exceeded the cluster size: %d > %d", numNodes, c.size) os.Exit(-1) } // Ignore updates from self node. if id == types.NodeId(node.Id) { continue } // Notify node status change if required. newNodeInfo := api.Node{} newNodeInfo.Id = string(id) newNodeInfo.Status = api.Status_STATUS_OK switch { case nodeInfo.Status == types.NODE_STATUS_DOWN: newNodeInfo.Status = api.Status_STATUS_OFFLINE lastStatus, ok := c.nodeStatuses[string(id)] if ok && lastStatus == newNodeInfo.Status { break } // Check if it is a stale update ne := c.getLatestNodeConfig(string(id)) if ne != nil && nodeInfo.GenNumber != 0 && nodeInfo.GenNumber < ne.GenNumber { dlog.Warnln("Detected stale update for node ", id, " going down, ignoring it") c.gossip.MarkNodeHasOldGen(id) break } c.nodeStatuses[string(id)] = newNodeInfo.Status dlog.Warnln("Detected node ", id, " to be offline due to inactivity.") for e := c.listeners.Front(); e != nil && c.gEnabled; e = e.Next() { err := e.Value.(ClusterListener).Update(&newNodeInfo) if err != nil { dlog.Warnln("Failed to notify ", e.Value.(ClusterListener).String()) } } case nodeInfo.Status == types.NODE_STATUS_DOWN_WAITING_FOR_NEW_UPDATE: newNodeInfo.Status = api.Status_STATUS_OFFLINE lastStatus, ok := c.nodeStatuses[string(id)] if ok && lastStatus == newNodeInfo.Status { break } c.nodeStatuses[string(id)] = newNodeInfo.Status dlog.Warnln("Detected node ", newNodeInfo.Id, " to be offline due to inactivity.") for e := c.listeners.Front(); e != nil && c.gEnabled; e = e.Next() { err := e.Value.(ClusterListener).Update(&newNodeInfo) if err != nil { dlog.Warnln("Failed to notify ", e.Value.(ClusterListener).String()) } } case nodeInfo.Status == types.NODE_STATUS_UP: newNodeInfo.Status = api.Status_STATUS_OK lastStatus, ok := c.nodeStatuses[string(id)] if ok && lastStatus == newNodeInfo.Status { break } c.nodeStatuses[string(id)] = newNodeInfo.Status // A node discovered in the cluster. dlog.Warnln("Detected node ", newNodeInfo.Id, " to be in the cluster.") for e := c.listeners.Front(); e != nil && c.gEnabled; e = e.Next() { err := e.Value.(ClusterListener).Add(&newNodeInfo) if err != nil { dlog.Warnln("Failed to notify ", e.Value.(ClusterListener).String()) } } } // Update cache. if nodeInfo.Value != nil { n, ok := nodeInfo.Value.(api.Node) if ok { n.Status = newNodeInfo.Status c.nodeCache[n.Id] = n } else { c.nodeCache[newNodeInfo.Id] = newNodeInfo } } else { newNodeInfo.Status = api.Status_STATUS_OFFLINE c.nodeCache[newNodeInfo.Id] = newNodeInfo } } time.Sleep(2 * time.Second) } }