// New creates an instance of a gossip node. // The higher level manages the NodeIDContainer instance (which can be shared by // various server components). The ambient context is expected to already // contain the node ID. func New( ambient log.AmbientContext, nodeID *base.NodeIDContainer, rpcContext *rpc.Context, grpcServer *grpc.Server, resolvers []resolver.Resolver, stopper *stop.Stopper, registry *metric.Registry, ) *Gossip { ambient.SetEventLog("gossip", "gossip") g := &Gossip{ server: newServer(ambient, nodeID, stopper, registry), Connected: make(chan struct{}), rpcContext: rpcContext, outgoing: makeNodeSet(minPeers, metric.NewGauge(MetaConnectionsOutgoingGauge)), bootstrapping: map[string]struct{}{}, disconnected: make(chan *client, 10), stalledCh: make(chan struct{}, 1), stallInterval: defaultStallInterval, bootstrapInterval: defaultBootstrapInterval, cullInterval: defaultCullInterval, nodeDescs: map[roachpb.NodeID]*roachpb.NodeDescriptor{}, resolverAddrs: map[util.UnresolvedAddr]resolver.Resolver{}, bootstrapAddrs: map[util.UnresolvedAddr]roachpb.NodeID{}, } stopper.AddCloser(stop.CloserFn(g.server.AmbientContext.FinishEventLog)) registry.AddMetric(g.outgoing.gauge) g.clientsMu.breakers = map[string]*circuit.Breaker{} resolverAddrs := make([]string, len(resolvers)) for i, resolver := range resolvers { resolverAddrs[i] = resolver.Addr() } ctx := g.AnnotateCtx(context.Background()) if log.V(1) { log.Infof(ctx, "initial resolvers: %v", resolverAddrs) } g.SetResolvers(resolvers) g.mu.Lock() // Add ourselves as a SystemConfig watcher. g.mu.is.registerCallback(KeySystemConfig, g.updateSystemConfig) // Add ourselves as a node descriptor watcher. g.mu.is.registerCallback(MakePrefixPattern(KeyNodeIDPrefix), g.updateNodeAddress) g.mu.Unlock() RegisterGossipServer(grpcServer, g.server) return g }
func TestGossipOrphanedStallDetection(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() local := startGossip(1, stopper, t, metric.NewRegistry()) local.SetStallInterval(5 * time.Millisecond) // Make sure we have the sentinel to ensure that its absence is not the // cause of stall detection. if err := local.AddInfo(KeySentinel, nil, time.Hour); err != nil { t.Fatal(err) } peerStopper := stop.NewStopper() peer := startGossip(2, peerStopper, t, metric.NewRegistry()) peerNodeID := peer.NodeID.Get() peerAddr := peer.GetNodeAddr() peerAddrStr := peerAddr.String() local.startClient(peerAddr) util.SucceedsSoon(t, func() error { for _, peerID := range local.Outgoing() { if peerID == peerNodeID { return nil } } return errors.Errorf("node %d not yet connected", peerNodeID) }) util.SucceedsSoon(t, func() error { for _, resolver := range local.GetResolvers() { if resolver.Addr() == peerAddrStr { return nil } } return errors.Errorf("node %d descriptor not yet available", peerNodeID) }) local.bootstrap() local.manage() peerStopper.Stop() util.SucceedsSoon(t, func() error { for _, peerID := range local.Outgoing() { if peerID == peerNodeID { return errors.Errorf("node %d still connected", peerNodeID) } } return nil }) peerStopper = stop.NewStopper() defer peerStopper.Stop() startGossipAtAddr(peerNodeID, peerAddr, peerStopper, t, metric.NewRegistry()) util.SucceedsSoon(t, func() error { for _, peerID := range local.Outgoing() { if peerID == peerNodeID { return nil } } return errors.Errorf("node %d not yet connected", peerNodeID) }) }