// Initialize node and alert listeners that we are joining the cluster. func (c *ClusterManager) joinCluster(db *Database, self *api.Node, exist bool) error { var err error // If I am already in the cluster map, don't add me again. if exist { goto found } // Alert all listeners that we are a new node joining an existing cluster. for e := c.listeners.Front(); e != nil; e = e.Next() { err = e.Value.(ClusterListener).Init(self, db) if err != nil { self.Status = api.Status_STATUS_ERROR dlog.Warnf("Failed to initialize Init %s: %v", e.Value.(ClusterListener).String(), err) c.cleanupInit(db, self) goto done } } found: // Alert all listeners that we are joining the cluster. for e := c.listeners.Front(); e != nil; e = e.Next() { err = e.Value.(ClusterListener).Join(self, db) if err != nil { self.Status = api.Status_STATUS_ERROR dlog.Warnf("Failed to initialize Join %s: %v", e.Value.(ClusterListener).String(), err) if exist == false { c.cleanupInit(db, self) } goto done } } for id, n := range db.NodeEntries { if id != c.config.NodeId { // Check to see if the IP is the same. If it is, then we have a stale entry. if n.MgmtIp == self.MgmtIp { dlog.Warnf("Warning, Detected node %s with the same IP %s in the database. Will not connect to this node.", id, n.MgmtIp) } else { // Gossip with this node. dlog.Infof("Connecting to node %s with IP %s.", id, n.MgmtIp) c.gossip.AddNode(n.MgmtIp+":9002", types.NodeId(id)) } } } done: return err }
func (c *ClusterManager) getSelf() *api.Node { var node = api.Node{} // Get physical node info. node.Id = c.config.NodeId node.Status = api.StatusOk node.Ip, _ = externalIp() node.Timestamp = time.Now() return &node }
func (c *ClusterManager) initCluster(db *Database, self *api.Node, exist bool) error { err := error(nil) // Alert all listeners that we are initializing a new cluster. for e := c.listeners.Front(); e != nil; e = e.Next() { err = e.Value.(ClusterListener).ClusterInit(self, db) if err != nil { self.Status = api.Status_STATUS_ERROR dlog.Printf("Failed to initialize %s", e.Value.(ClusterListener).String()) goto done } } err = c.joinCluster(db, self, exist) if err != nil { dlog.Printf("Failed to join new cluster") goto done } done: return err }
func (c *ClusterManager) updateClusterStatus() { gossipStoreKey := types.StoreKey(heartbeatKey + c.config.ClusterId) for { node := c.getCurrentState() c.nodeCache[node.Id] = *node // Process heartbeats from other nodes... gossipValues := c.gossip.GetStoreKeyValue(gossipStoreKey) numNodes := 0 for id, nodeInfo := range gossipValues { numNodes = numNodes + 1 // Check to make sure we are not exceeding the size of the cluster. if c.size > 0 && numNodes > c.size { dlog.Fatalf("Fatal, number of nodes in the cluster has"+ "exceeded the cluster size: %d > %d", numNodes, c.size) os.Exit(-1) } // Ignore updates from self node. if id == types.NodeId(node.Id) { continue } // Notify node status change if required. newNodeInfo := api.Node{} newNodeInfo.Id = string(id) newNodeInfo.Status = api.Status_STATUS_OK switch { case nodeInfo.Status == types.NODE_STATUS_DOWN: newNodeInfo.Status = api.Status_STATUS_OFFLINE lastStatus, ok := c.nodeStatuses[string(id)] if ok && lastStatus == newNodeInfo.Status { break } // Check if it is a stale update ne := c.getLatestNodeConfig(string(id)) if ne != nil && nodeInfo.GenNumber != 0 && nodeInfo.GenNumber < ne.GenNumber { dlog.Warnln("Detected stale update for node ", id, " going down, ignoring it") c.gossip.MarkNodeHasOldGen(id) break } c.nodeStatuses[string(id)] = newNodeInfo.Status dlog.Warnln("Detected node ", id, " to be offline due to inactivity.") for e := c.listeners.Front(); e != nil && c.gEnabled; e = e.Next() { err := e.Value.(ClusterListener).Update(&newNodeInfo) if err != nil { dlog.Warnln("Failed to notify ", e.Value.(ClusterListener).String()) } } case nodeInfo.Status == types.NODE_STATUS_DOWN_WAITING_FOR_NEW_UPDATE: newNodeInfo.Status = api.Status_STATUS_OFFLINE lastStatus, ok := c.nodeStatuses[string(id)] if ok && lastStatus == newNodeInfo.Status { break } c.nodeStatuses[string(id)] = newNodeInfo.Status dlog.Warnln("Detected node ", newNodeInfo.Id, " to be offline due to inactivity.") for e := c.listeners.Front(); e != nil && c.gEnabled; e = e.Next() { err := e.Value.(ClusterListener).Update(&newNodeInfo) if err != nil { dlog.Warnln("Failed to notify ", e.Value.(ClusterListener).String()) } } case nodeInfo.Status == types.NODE_STATUS_UP: newNodeInfo.Status = api.Status_STATUS_OK lastStatus, ok := c.nodeStatuses[string(id)] if ok && lastStatus == newNodeInfo.Status { break } c.nodeStatuses[string(id)] = newNodeInfo.Status // A node discovered in the cluster. dlog.Warnln("Detected node ", newNodeInfo.Id, " to be in the cluster.") for e := c.listeners.Front(); e != nil && c.gEnabled; e = e.Next() { err := e.Value.(ClusterListener).Add(&newNodeInfo) if err != nil { dlog.Warnln("Failed to notify ", e.Value.(ClusterListener).String()) } } } // Update cache. if nodeInfo.Value != nil { n, ok := nodeInfo.Value.(api.Node) if ok { n.Status = newNodeInfo.Status c.nodeCache[n.Id] = n } else { c.nodeCache[newNodeInfo.Id] = newNodeInfo } } else { newNodeInfo.Status = api.Status_STATUS_OFFLINE c.nodeCache[newNodeInfo.Id] = newNodeInfo } } time.Sleep(2 * time.Second) } }