예제 #1
0
// Initialize node and alert listeners that we are joining the cluster.
func (c *ClusterManager) joinCluster(db *Database, self *api.Node, exist bool) error {
	var err error

	// If I am already in the cluster map, don't add me again.
	if exist {
		goto found
	}

	// Alert all listeners that we are a new node joining an existing cluster.
	for e := c.listeners.Front(); e != nil; e = e.Next() {
		err = e.Value.(ClusterListener).Init(self, db)
		if err != nil {
			self.Status = api.Status_STATUS_ERROR
			dlog.Warnf("Failed to initialize Init %s: %v",
				e.Value.(ClusterListener).String(), err)
			c.cleanupInit(db, self)
			goto done
		}
	}

found:
	// Alert all listeners that we are joining the cluster.
	for e := c.listeners.Front(); e != nil; e = e.Next() {
		err = e.Value.(ClusterListener).Join(self, db)
		if err != nil {
			self.Status = api.Status_STATUS_ERROR
			dlog.Warnf("Failed to initialize Join %s: %v",
				e.Value.(ClusterListener).String(), err)

			if exist == false {
				c.cleanupInit(db, self)
			}
			goto done
		}
	}

	for id, n := range db.NodeEntries {
		if id != c.config.NodeId {
			// Check to see if the IP is the same.  If it is, then we have a stale entry.
			if n.MgmtIp == self.MgmtIp {
				dlog.Warnf("Warning, Detected node %s with the same IP %s in the database.  Will not connect to this node.",
					id, n.MgmtIp)
			} else {
				// Gossip with this node.
				dlog.Infof("Connecting to node %s with IP %s.", id, n.MgmtIp)
				c.gossip.AddNode(n.MgmtIp+":9002", types.NodeId(id))
			}
		}
	}

done:
	return err
}
예제 #2
0
func (c *ClusterManager) getSelf() *api.Node {
	var node = api.Node{}

	// Get physical node info.
	node.Id = c.config.NodeId
	node.Status = api.StatusOk
	node.Ip, _ = externalIp()
	node.Timestamp = time.Now()

	return &node
}
예제 #3
0
func (c *ClusterManager) initCluster(db *Database, self *api.Node, exist bool) error {
	err := error(nil)

	// Alert all listeners that we are initializing a new cluster.
	for e := c.listeners.Front(); e != nil; e = e.Next() {
		err = e.Value.(ClusterListener).ClusterInit(self, db)
		if err != nil {
			self.Status = api.Status_STATUS_ERROR
			dlog.Printf("Failed to initialize %s",
				e.Value.(ClusterListener).String())
			goto done
		}
	}

	err = c.joinCluster(db, self, exist)
	if err != nil {
		dlog.Printf("Failed to join new cluster")
		goto done
	}

done:
	return err
}
예제 #4
0
func (c *ClusterManager) updateClusterStatus() {
	gossipStoreKey := types.StoreKey(heartbeatKey + c.config.ClusterId)

	for {
		node := c.getCurrentState()
		c.nodeCache[node.Id] = *node

		// Process heartbeats from other nodes...
		gossipValues := c.gossip.GetStoreKeyValue(gossipStoreKey)

		numNodes := 0
		for id, nodeInfo := range gossipValues {
			numNodes = numNodes + 1

			// Check to make sure we are not exceeding the size of the cluster.
			if c.size > 0 && numNodes > c.size {
				dlog.Fatalf("Fatal, number of nodes in the cluster has"+
					"exceeded the cluster size: %d > %d", numNodes, c.size)
				os.Exit(-1)
			}

			// Ignore updates from self node.
			if id == types.NodeId(node.Id) {
				continue
			}

			// Notify node status change if required.
			newNodeInfo := api.Node{}
			newNodeInfo.Id = string(id)
			newNodeInfo.Status = api.Status_STATUS_OK

			switch {
			case nodeInfo.Status == types.NODE_STATUS_DOWN:
				newNodeInfo.Status = api.Status_STATUS_OFFLINE
				lastStatus, ok := c.nodeStatuses[string(id)]
				if ok && lastStatus == newNodeInfo.Status {
					break
				}

				// Check if it is a stale update
				ne := c.getLatestNodeConfig(string(id))
				if ne != nil && nodeInfo.GenNumber != 0 &&
					nodeInfo.GenNumber < ne.GenNumber {
					dlog.Warnln("Detected stale update for node ", id,
						" going down, ignoring it")
					c.gossip.MarkNodeHasOldGen(id)
					break
				}
				c.nodeStatuses[string(id)] = newNodeInfo.Status

				dlog.Warnln("Detected node ", id,
					" to be offline due to inactivity.")

				for e := c.listeners.Front(); e != nil && c.gEnabled; e = e.Next() {
					err := e.Value.(ClusterListener).Update(&newNodeInfo)
					if err != nil {
						dlog.Warnln("Failed to notify ",
							e.Value.(ClusterListener).String())
					}
				}

			case nodeInfo.Status == types.NODE_STATUS_DOWN_WAITING_FOR_NEW_UPDATE:
				newNodeInfo.Status = api.Status_STATUS_OFFLINE
				lastStatus, ok := c.nodeStatuses[string(id)]
				if ok && lastStatus == newNodeInfo.Status {
					break
				}
				c.nodeStatuses[string(id)] = newNodeInfo.Status

				dlog.Warnln("Detected node ", newNodeInfo.Id,
					" to be offline due to inactivity.")

				for e := c.listeners.Front(); e != nil && c.gEnabled; e = e.Next() {
					err := e.Value.(ClusterListener).Update(&newNodeInfo)
					if err != nil {
						dlog.Warnln("Failed to notify ",
							e.Value.(ClusterListener).String())
					}
				}

			case nodeInfo.Status == types.NODE_STATUS_UP:
				newNodeInfo.Status = api.Status_STATUS_OK
				lastStatus, ok := c.nodeStatuses[string(id)]
				if ok && lastStatus == newNodeInfo.Status {
					break
				}
				c.nodeStatuses[string(id)] = newNodeInfo.Status

				// A node discovered in the cluster.
				dlog.Warnln("Detected node ", newNodeInfo.Id,
					" to be in the cluster.")

				for e := c.listeners.Front(); e != nil && c.gEnabled; e = e.Next() {
					err := e.Value.(ClusterListener).Add(&newNodeInfo)
					if err != nil {
						dlog.Warnln("Failed to notify ",
							e.Value.(ClusterListener).String())
					}
				}
			}

			// Update cache.
			if nodeInfo.Value != nil {
				n, ok := nodeInfo.Value.(api.Node)
				if ok {
					n.Status = newNodeInfo.Status
					c.nodeCache[n.Id] = n
				} else {
					c.nodeCache[newNodeInfo.Id] = newNodeInfo
				}
			} else {
				newNodeInfo.Status = api.Status_STATUS_OFFLINE
				c.nodeCache[newNodeInfo.Id] = newNodeInfo
			}
		}

		time.Sleep(2 * time.Second)
	}
}