// connectGossip connects to gossip network and reads cluster ID. If // this node is already part of a cluster, the cluster ID is verified // for a match. If not part of a cluster, the cluster ID is set. The // node's address is gossipped with node ID as the gossip key. func (n *Node) connectGossip() { glog.Infof("connecting to gossip network to verify cluster ID...") <-n.gossip.Connected val, err := n.gossip.GetInfo(gossip.KeyClusterID) if err != nil || val == nil { glog.Fatalf("unable to ascertain cluster ID from gossip network: %v", err) } gossipClusterID := val.(string) if n.ClusterID == "" { n.ClusterID = gossipClusterID } else if n.ClusterID != gossipClusterID { glog.Fatalf("node %d belongs to cluster %q but is attempting to connect to a gossip network for cluster %q", n.Descriptor.NodeID, n.ClusterID, gossipClusterID) } glog.Infof("node connected via gossip and verified as part of cluster %q", gossipClusterID) // Gossip node address keyed by node ID. if n.Descriptor.NodeID != 0 { nodeIDKey := gossip.MakeNodeIDGossipKey(n.Descriptor.NodeID) if err := n.gossip.AddInfo(nodeIDKey, n.Descriptor.Address, ttlNodeIDGossip); err != nil { glog.Errorf("couldn't gossip address for node %d: %v", n.Descriptor.NodeID, err) } } }
// connectGossip connects to gossip network and reads cluster ID. If // this node is already part of a cluster, the cluster ID is verified // for a match. If not part of a cluster, the cluster ID is set. The // node's address is gossiped with node ID as the gossip key. func (n *Node) connectGossip() { log.Infof("connecting to gossip network to verify cluster ID...") // No timeout or stop condition is needed here. Log statements should be // sufficient for diagnosing this type of condition. <-n.gossip.Connected val, err := n.gossip.GetInfo(gossip.KeyClusterID) if err != nil || val == nil { log.Fatalf("unable to ascertain cluster ID from gossip network: %v", err) } gossipClusterID := val.(string) if n.ClusterID == "" { n.ClusterID = gossipClusterID } else if n.ClusterID != gossipClusterID { log.Fatalf("node %d belongs to cluster %q but is attempting to connect to a gossip network for cluster %q", n.Descriptor.NodeID, n.ClusterID, gossipClusterID) } log.Infof("node connected via gossip and verified as part of cluster %q", gossipClusterID) // Gossip node address keyed by node ID. if n.Descriptor.NodeID != 0 { nodeIDKey := gossip.MakeNodeIDGossipKey(n.Descriptor.NodeID) if err := n.gossip.AddInfo(nodeIDKey, n.Descriptor.Address, ttlNodeIDGossip); err != nil { log.Errorf("couldn't gossip address for node %d: %v", n.Descriptor.NodeID, err) } } }
// startGossip loops on a periodic ticker to gossip node-related // information. Loops until the node is closed and should be // invoked via goroutin. func (n *Node) startGossip() { // Register gossip groups. n.maxAvailPrefix = gossip.KeyMaxAvailCapacityPrefix + n.Attributes.Datacenter n.gossip.RegisterGroup(n.maxAvailPrefix, gossipGroupLimit, gossip.MaxGroup) // Gossip cluster ID if not yet on network. Multiple nodes may race // to gossip, but there's no harm in it, as there's no definitive // source. if _, err := n.gossip.GetInfo(gossip.KeyClusterID); err != nil { n.gossip.AddInfo(gossip.KeyClusterID, n.ClusterID, ttlClusterIDGossip) } // Always gossip node ID at startup. nodeIDKey := gossip.MakeNodeIDGossipKey(n.Attributes.NodeID) n.gossip.AddInfo(nodeIDKey, n.Attributes.Address, ttlNodeIDGossip) ticker := time.NewTicker(gossipInterval) for { select { case <-ticker.C: n.gossipCapacities() case <-n.closer: ticker.Stop() return } } }
// nodeIDToAddr uses the gossip network to translate from node ID // to a host:port address pair. func (db *DistDB) nodeIDToAddr(nodeID int32) (net.Addr, error) { nodeIDKey := gossip.MakeNodeIDGossipKey(nodeID) info, err := db.gossip.GetInfo(nodeIDKey) if info == nil || err != nil { return nil, util.Errorf("Unable to lookup address for node: %v. Error: %v", nodeID, err) } return info.(net.Addr), nil }
// TestNodeJoin verifies a new node is able to join a bootstrapped // cluster consisting of one node. func TestNodeJoin(t *testing.T) { e := engine.NewInMem(engine.Attributes{}, 1<<20) localDB, err := BootstrapCluster("cluster-1", e) if err != nil { t.Fatal(err) } localDB.Close() // Set an aggressive gossip interval to make sure information is exchanged tout de suite. *gossip.GossipInterval = 10 * time.Millisecond // Start the bootstrap node. engines1 := []engine.Engine{e} addr1 := util.CreateTestAddr("tcp") server1, node1 := createTestNode(addr1, engines1, addr1, t) defer server1.Close() // Create a new node. engines2 := []engine.Engine{engine.NewInMem(engine.Attributes{}, 1<<20)} server2, node2 := createTestNode(util.CreateTestAddr("tcp"), engines2, server1.Addr(), t) defer server2.Close() // Verify new node is able to bootstrap its store. if err := util.IsTrueWithin(func() bool { return node2.localDB.GetStoreCount() == 1 }, 50*time.Millisecond); err != nil { t.Fatal(err) } // Verify node1 sees node2 via gossip and vice versa. node1Key := gossip.MakeNodeIDGossipKey(node1.Descriptor.NodeID) node2Key := gossip.MakeNodeIDGossipKey(node2.Descriptor.NodeID) if err := util.IsTrueWithin(func() bool { if val, err := node1.gossip.GetInfo(node2Key); err != nil { return false } else if val.(net.Addr).String() != server2.Addr().String() { t.Error("addr2 gossip %s doesn't match addr2 address %s", val.(net.Addr).String(), server2.Addr().String()) } if val, err := node2.gossip.GetInfo(node1Key); err != nil { return false } else if val.(net.Addr).String() != server1.Addr().String() { t.Error("addr1 gossip %s doesn't match addr1 address %s", val.(net.Addr).String(), server1.Addr().String()) } return true }, 50*time.Millisecond); err != nil { t.Error(err) } }
// bootstrapStores bootstraps uninitialized stores once the cluster // and node IDs have been established for this node. Store IDs are // allocated via a sequence id generator stored at a system key per // node. func (n *Node) bootstrapStores(bootstraps *list.List) { glog.Infof("bootstrapping %d store(s)", bootstraps.Len()) // Allocate a new node ID if necessary. if n.Descriptor.NodeID == 0 { var err error n.Descriptor.NodeID, err = allocateNodeID(n.kvDB) glog.Infof("new node allocated ID %d", n.Descriptor.NodeID) if err != nil { glog.Fatal(err) } // Gossip node address keyed by node ID. nodeIDKey := gossip.MakeNodeIDGossipKey(n.Descriptor.NodeID) if err := n.gossip.AddInfo(nodeIDKey, n.Descriptor.Address, ttlNodeIDGossip); err != nil { glog.Errorf("couldn't gossip address for node %d: %v", n.Descriptor.NodeID, err) } } // Bootstrap all waiting stores by allocating a new store id for // each and invoking store.Bootstrap() to persist. inc := int64(bootstraps.Len()) firstID, err := allocateStoreIDs(n.Descriptor.NodeID, inc, n.kvDB) if err != nil { glog.Fatal(err) } sIdent := storage.StoreIdent{ ClusterID: n.ClusterID, NodeID: n.Descriptor.NodeID, StoreID: firstID, } for e := bootstraps.Front(); e != nil; e = e.Next() { s := e.Value.(*storage.Store) s.Bootstrap(sIdent) n.mu.Lock() n.storeMap[s.Ident.StoreID] = s n.mu.Unlock() sIdent.StoreID++ glog.Infof("bootstrapped store %s", s) } }