示例#1
0
func (c *ClusterManager) getSelf() *api.Node {
	var node = api.Node{}

	// Get physical node info.
	node.Id = c.config.NodeId
	node.Status = api.StatusOk
	node.Ip, _ = externalIp()
	node.Timestamp = time.Now()

	return &node
}
示例#2
0
func (c *ClusterManager) updateClusterStatus() {
	gossipStoreKey := types.StoreKey(heartbeatKey + c.config.ClusterId)

	for {
		node := c.getCurrentState()
		c.nodeCache[node.Id] = *node

		// Process heartbeats from other nodes...
		gossipValues := c.gossip.GetStoreKeyValue(gossipStoreKey)

		numNodes := 0
		for id, nodeInfo := range gossipValues {
			numNodes = numNodes + 1

			// Check to make sure we are not exceeding the size of the cluster.
			if c.size > 0 && numNodes > c.size {
				dlog.Fatalf("Fatal, number of nodes in the cluster has"+
					"exceeded the cluster size: %d > %d", numNodes, c.size)
				os.Exit(-1)
			}

			// Ignore updates from self node.
			if id == types.NodeId(node.Id) {
				continue
			}

			// Notify node status change if required.
			newNodeInfo := api.Node{}
			newNodeInfo.Id = string(id)
			newNodeInfo.Status = api.Status_STATUS_OK

			switch {
			case nodeInfo.Status == types.NODE_STATUS_DOWN:
				newNodeInfo.Status = api.Status_STATUS_OFFLINE
				lastStatus, ok := c.nodeStatuses[string(id)]
				if ok && lastStatus == newNodeInfo.Status {
					break
				}

				// Check if it is a stale update
				ne := c.getLatestNodeConfig(string(id))
				if ne != nil && nodeInfo.GenNumber != 0 &&
					nodeInfo.GenNumber < ne.GenNumber {
					dlog.Warnln("Detected stale update for node ", id,
						" going down, ignoring it")
					c.gossip.MarkNodeHasOldGen(id)
					break
				}
				c.nodeStatuses[string(id)] = newNodeInfo.Status

				dlog.Warnln("Detected node ", id,
					" to be offline due to inactivity.")

				for e := c.listeners.Front(); e != nil && c.gEnabled; e = e.Next() {
					err := e.Value.(ClusterListener).Update(&newNodeInfo)
					if err != nil {
						dlog.Warnln("Failed to notify ",
							e.Value.(ClusterListener).String())
					}
				}

			case nodeInfo.Status == types.NODE_STATUS_DOWN_WAITING_FOR_NEW_UPDATE:
				newNodeInfo.Status = api.Status_STATUS_OFFLINE
				lastStatus, ok := c.nodeStatuses[string(id)]
				if ok && lastStatus == newNodeInfo.Status {
					break
				}
				c.nodeStatuses[string(id)] = newNodeInfo.Status

				dlog.Warnln("Detected node ", newNodeInfo.Id,
					" to be offline due to inactivity.")

				for e := c.listeners.Front(); e != nil && c.gEnabled; e = e.Next() {
					err := e.Value.(ClusterListener).Update(&newNodeInfo)
					if err != nil {
						dlog.Warnln("Failed to notify ",
							e.Value.(ClusterListener).String())
					}
				}

			case nodeInfo.Status == types.NODE_STATUS_UP:
				newNodeInfo.Status = api.Status_STATUS_OK
				lastStatus, ok := c.nodeStatuses[string(id)]
				if ok && lastStatus == newNodeInfo.Status {
					break
				}
				c.nodeStatuses[string(id)] = newNodeInfo.Status

				// A node discovered in the cluster.
				dlog.Warnln("Detected node ", newNodeInfo.Id,
					" to be in the cluster.")

				for e := c.listeners.Front(); e != nil && c.gEnabled; e = e.Next() {
					err := e.Value.(ClusterListener).Add(&newNodeInfo)
					if err != nil {
						dlog.Warnln("Failed to notify ",
							e.Value.(ClusterListener).String())
					}
				}
			}

			// Update cache.
			if nodeInfo.Value != nil {
				n, ok := nodeInfo.Value.(api.Node)
				if ok {
					n.Status = newNodeInfo.Status
					c.nodeCache[n.Id] = n
				} else {
					c.nodeCache[newNodeInfo.Id] = newNodeInfo
				}
			} else {
				newNodeInfo.Status = api.Status_STATUS_OFFLINE
				c.nodeCache[newNodeInfo.Id] = newNodeInfo
			}
		}

		time.Sleep(2 * time.Second)
	}
}