Beispiel #1
0
// start dials the remote addr and commences gossip once connected. Upon exit,
// the client is sent on the disconnected channel. This method starts client
// processing in a goroutine and returns immediately.
func (c *client) start(g *Gossip, disconnected chan *client, ctx *rpc.Context, stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		defer func() {
			disconnected <- c
		}()

		conn, err := ctx.GRPCDial(c.addr.String(), grpc.WithBlock())
		if err != nil {
			log.Errorf("failed to dial: %v", err)
			return
		}

		// Start gossiping.
		if err := c.gossip(g, NewGossipClient(conn), stopper); err != nil {
			if !grpcutil.IsClosedConnection(err) {
				g.mu.Lock()
				peerID := c.peerID
				g.mu.Unlock()
				if peerID != 0 {
					log.Infof("closing client to node %d (%s): %s", peerID, c.addr, err)
				} else {
					log.Infof("closing client to %s: %s", c.addr, err)
				}
			}
		}
	})
}
Beispiel #2
0
// start dials the remote addr and commences gossip once connected. Upon exit,
// the client is sent on the disconnected channel. This method starts client
// processing in a goroutine and returns immediately.
func (c *client) start(g *Gossip, disconnected chan *client, ctx *rpc.Context, stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		defer func() {
			disconnected <- c
		}()

		// Note: avoid using `grpc.WithBlock` here. This code is already
		// asynchronous from the caller's perspective, so the only effect of
		// `WithBlock` here is blocking shutdown - at the time of this writing,
		// that ends ups up making `kv` tests take twice as long.
		conn, err := ctx.GRPCDial(c.addr.String())
		if err != nil {
			log.Errorf("failed to dial: %v", err)
			return
		}

		// Start gossiping.
		if err := c.gossip(g, NewGossipClient(conn), stopper); err != nil {
			if !grpcutil.IsClosedConnection(err) {
				g.mu.Lock()
				peerID := c.peerID
				g.mu.Unlock()
				if peerID != 0 {
					log.Infof("closing client to node %d (%s): %s", peerID, c.addr, err)
				} else {
					log.Infof("closing client to %s: %s", c.addr, err)
				}
			}
		}
	})
}
Beispiel #3
0
func (ctx *Context) removeConn(key string, conn *grpc.ClientConn) {
	if err := conn.Close(); err != nil && !grpcutil.IsClosedConnection(err) {
		if log.V(1) {
			log.Errorf("failed to close client connection: %s", err)
		}
	}
	delete(ctx.conns.cache, key)
}
Beispiel #4
0
func (ctx *Context) removeConnLocked(key string, meta *connMeta) {
	if log.V(1) {
		log.Infof(ctx.masterCtx, "closing %s", key)
	}
	if conn := meta.conn; conn != nil {
		if err := conn.Close(); err != nil && !grpcutil.IsClosedConnection(err) {
			if log.V(1) {
				log.Errorf(ctx.masterCtx, "failed to close client connection: %s", err)
			}
		}
	}
	delete(ctx.conns.cache, key)
}
Beispiel #5
0
// GRPCDial calls grpc.Dial with the options appropriate for the context.
func (ctx *Context) GRPCDial(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) {
	ctx.conns.Lock()
	meta, ok := ctx.conns.cache[target]
	if !ok {
		meta = &connMeta{}
		ctx.conns.cache[target] = meta
	}
	ctx.conns.Unlock()

	meta.Do(func() {
		dialOpt, err := ctx.GRPCDialOption()
		if err != nil {
			meta.err = err
			return
		}

		dialOpts := make([]grpc.DialOption, 0, 1+len(opts))
		dialOpts = append(dialOpts, dialOpt)
		dialOpts = append(dialOpts, opts...)

		if log.V(1) {
			log.Infof(ctx.masterCtx, "dialing %s", target)
		}
		meta.conn, meta.err = grpc.DialContext(ctx.masterCtx, target, dialOpts...)
		if meta.err == nil {
			if err := ctx.Stopper.RunTask(func() {
				ctx.Stopper.RunWorker(func() {
					err := ctx.runHeartbeat(meta.conn, target)
					if err != nil && !grpcutil.IsClosedConnection(err) {
						log.Error(ctx.masterCtx, err)
					}
					ctx.removeConn(target, meta)
				})
			}); err != nil {
				meta.err = err
				// removeConn and ctx's cleanup worker both lock ctx.conns. However,
				// to avoid racing with meta's initialization, the cleanup worker
				// blocks on meta.Do while holding ctx.conns. Invoke removeConn
				// asynchronously to avoid deadlock.
				go ctx.removeConn(target, meta)
			}
		}
	})

	return meta.conn, meta.err
}
Beispiel #6
0
// GRPCDial calls grpc.Dial with the options appropriate for the context.
func (ctx *Context) GRPCDial(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) {
	ctx.conns.Lock()
	defer ctx.conns.Unlock()

	if meta, ok := ctx.conns.cache[target]; ok {
		return meta.conn, nil
	}

	var dialOpt grpc.DialOption
	if ctx.Insecure {
		dialOpt = grpc.WithInsecure()
	} else {
		tlsConfig, err := ctx.GetClientTLSConfig()
		if err != nil {
			return nil, err
		}
		dialOpt = grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))
	}

	dialOpts := make([]grpc.DialOption, 0, 1+len(opts))
	dialOpts = append(dialOpts, dialOpt)
	dialOpts = append(dialOpts, opts...)

	conn, err := grpc.Dial(target, dialOpts...)
	if err == nil {
		if ctx.conns.cache == nil {
			ctx.conns.cache = make(map[string]connMeta)
		}
		ctx.conns.cache[target] = connMeta{conn: conn}

		if ctx.Stopper.RunTask(func() {
			ctx.Stopper.RunWorker(func() {
				if err := ctx.runHeartbeat(conn, target); err != nil && !grpcutil.IsClosedConnection(err) {
					log.Error(err)
				}
				ctx.conns.Lock()
				ctx.removeConn(target, conn)
				ctx.conns.Unlock()
			})
		}) != nil {
			ctx.removeConn(target, conn)
		}
	}
	return conn, err
}
Beispiel #7
0
// GRPCDial calls grpc.Dial with the options appropriate for the context.
func (ctx *Context) GRPCDial(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) {
	/*
		_, file, ln, _ := runtime.Caller(1)
		_, file2, ln2, _ := runtime.Caller(2)
		fmt.Printf("GRPCDial: %s:%d %s:%d\n", file, ln, file2, ln2)
	*/

	ctx.conns.Lock()
	defer ctx.conns.Unlock()

	if conn, ok := ctx.conns.cache[target]; ok {
		return conn, nil
	}

	var dialOpt grpc.DialOption
	if ctx.Insecure {
		dialOpt = grpc.WithInsecure()
	} else {
		tlsConfig, err := ctx.GetClientTLSConfig()
		if err != nil {
			return nil, err
		}
		dialOpt = grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))
	}

	conn, err := grpc.Dial(target, append(opts, dialOpt, grpc.WithTimeout(base.NetworkTimeout))...)
	if err == nil {
		if ctx.conns.cache == nil {
			ctx.conns.cache = make(map[string]*grpc.ClientConn)
		}
		ctx.conns.cache[target] = conn

		ctx.Stopper.RunWorker(func() {
			if err := ctx.runHeartbeat(conn, target); err != nil && !grpcutil.IsClosedConnection(err) {
				log.Error(err)
			}
			ctx.conns.Lock()
			ctx.removeConn(target, conn)
			ctx.conns.Unlock()
		})
	}
	return conn, err
}
Beispiel #8
0
// start dials the remote addr and commences gossip once connected.
// Upon exit, the client is sent on the disconnected channel.
// If the client experienced an error, its err field will
// be set. This method starts client processing in a goroutine and
// returns immediately.
func (c *client) start(g *Gossip, disconnected chan *client, ctx *rpc.Context, stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		defer func() {
			disconnected <- c
		}()

		var dialOpt grpc.DialOption
		if ctx.Insecure {
			dialOpt = grpc.WithInsecure()
		} else {
			tlsConfig, err := ctx.GetClientTLSConfig()
			if err != nil {
				log.Error(err)
				return
			}
			dialOpt = grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))
		}

		conn, err := grpc.Dial(c.addr.String(), dialOpt)
		if err != nil {
			log.Errorf("failed to dial: %v", err)
			return
		}
		defer func() {
			if err := conn.Close(); err != nil {
				log.Error(err)
			}
		}()
		c.rpcClient = NewGossipClient(conn)

		// Start gossiping.
		if err := c.gossip(g, stopper); err != nil {
			if !grpcutil.IsClosedConnection(err) {
				if c.peerID != 0 {
					log.Infof("closing client to node %d (%s): %s", c.peerID, c.addr, err)
				} else {
					log.Infof("closing client to %s: %s", c.addr, err)
				}
			}
		}
	})
}
// Send a message. Returns false if the message was dropped.
func (rttc *raftTransportTestContext) Send(
	from, to roachpb.ReplicaDescriptor, rangeID roachpb.RangeID, msg raftpb.Message,
) bool {
	msg.To = uint64(to.ReplicaID)
	msg.From = uint64(from.ReplicaID)
	req := &storage.RaftMessageRequest{
		RangeID:     rangeID,
		Message:     msg,
		ToReplica:   to,
		FromReplica: from,
	}
	sender := rttc.transports[from.NodeID].MakeSender(
		func(err error, _ roachpb.ReplicaDescriptor) {
			if err != nil && !grpcutil.IsClosedConnection(err) &&
				!testutils.IsError(err, channelServerBrokenRangeMessage) {
				rttc.t.Fatal(err)
			}
		})
	return sender.SendAsync(req)
}
Beispiel #10
0
// GRPCDial calls grpc.Dial with the options appropriate for the context.
func (ctx *Context) GRPCDial(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) {
	ctx.conns.Lock()
	defer ctx.conns.Unlock()

	if meta, ok := ctx.conns.cache[target]; ok {
		return meta.conn, nil
	}

	dialOpt, err := ctx.GRPCDialOption()
	if err != nil {
		return nil, err
	}

	dialOpts := make([]grpc.DialOption, 0, 1+len(opts))
	dialOpts = append(dialOpts, dialOpt)
	dialOpts = append(dialOpts, opts...)

	if log.V(1) {
		log.Infof(context.TODO(), "dialing %s", target)
	}
	conn, err := grpc.Dial(target, dialOpts...)
	if err == nil {
		ctx.conns.cache[target] = connMeta{conn: conn}

		if ctx.Stopper.RunTask(func() {
			ctx.Stopper.RunWorker(func() {
				err := ctx.runHeartbeat(conn, target)
				if err != nil && !grpcutil.IsClosedConnection(err) {
					log.Error(context.TODO(), err)
				}
				ctx.conns.Lock()
				ctx.removeConn(target, conn)
				ctx.conns.Unlock()
			})
		}) != nil {
			ctx.removeConn(target, conn)
		}
	}
	return conn, err
}
Beispiel #11
0
func (s *server) gossipSender(argsPtr **Request, senderFn func(*Response) error) {
	s.mu.Lock()
	defer s.mu.Unlock()

	reply := new(Response)

	for {
		if !s.stopper.RunTask(func() {
			args := *argsPtr
			delta := s.is.delta(args.Nodes)
			if infoCount := len(delta); infoCount > 0 {
				if log.V(1) {
					log.Infof("node %d returned %d info(s) to node %d", s.is.NodeID, infoCount, args.NodeID)
				}

				*reply = Response{
					NodeID: s.is.NodeID,
					Nodes:  s.is.getNodes(),
					Delta:  delta,
				}

				s.mu.Unlock()
				err := senderFn(reply)
				s.mu.Lock()
				if err != nil {
					if !grpcutil.IsClosedConnection(err) {
						log.Error(err)
					}
					return
				}
				s.sent += infoCount
			}
		}) {
			return
		}

		s.ready.Wait()
	}
}
func TestSendAndReceive(t *testing.T) {
	defer leaktest.AfterTest(t)()
	rttc := newRaftTransportTestContext(t)
	defer rttc.Stop()

	// Create several servers, each of which has two stores (A raft
	// node ID addresses a store). Node 1 has stores 1 and 2, node 2 has
	// stores 3 and 4, etc.
	//
	// We suppose that range 1 is replicated across the odd-numbered
	// stores in reverse order to ensure that the various IDs are not
	// equal: replica 1 is store 5, replica 2 is store 3, and replica 3
	// is store 1.
	const numNodes = 3
	const storesPerNode = 2
	nextNodeID := roachpb.NodeID(2)
	nextStoreID := roachpb.StoreID(2)

	// Per-node state.
	transports := map[roachpb.NodeID]*storage.RaftTransport{}

	// Per-store state.
	storeNodes := map[roachpb.StoreID]roachpb.NodeID{}
	channels := map[roachpb.StoreID]channelServer{}
	replicaIDs := map[roachpb.StoreID]roachpb.ReplicaID{
		1: 3,
		3: 2,
		5: 1,
	}

	messageTypes := []raftpb.MessageType{
		raftpb.MsgSnap,
		raftpb.MsgHeartbeat,
	}

	for nodeIndex := 0; nodeIndex < numNodes; nodeIndex++ {
		nodeID := nextNodeID
		nextNodeID++
		transports[nodeID] = rttc.AddNode(nodeID)

		// This channel is normally unbuffered, but it is also normally serviced by
		// the raft goroutine. Since we don't have that goroutine in this test, we
		// must buffer the channel to prevent snapshots from blocking while we
		// iterate through the recipients in an order that may differ from the
		// sending order.
		sendersPerNode := storesPerNode
		recipientsPerSender := numNodes * storesPerNode
		outboundSnapshotsPerNode := sendersPerNode * recipientsPerSender
		transports[nodeID].SnapshotStatusChan = make(chan storage.RaftSnapshotStatus, outboundSnapshotsPerNode)

		for storeIndex := 0; storeIndex < storesPerNode; storeIndex++ {
			storeID := nextStoreID
			nextStoreID++

			storeNodes[storeID] = nodeID

			channels[storeID] = rttc.ListenStore(nodeID, storeID)
		}
	}

	messageTypeCounts := make(map[roachpb.StoreID]map[raftpb.MessageType]int)

	// Each store sends one snapshot and one heartbeat to each store, including
	// itself.
	for toStoreID, toNodeID := range storeNodes {
		if _, ok := messageTypeCounts[toStoreID]; !ok {
			messageTypeCounts[toStoreID] = make(map[raftpb.MessageType]int)
		}

		for fromStoreID, fromNodeID := range storeNodes {
			baseReq := storage.RaftMessageRequest{
				RangeID: 1,
				Message: raftpb.Message{
					From: uint64(fromStoreID),
					To:   uint64(toStoreID),
				},
				FromReplica: roachpb.ReplicaDescriptor{
					NodeID:  fromNodeID,
					StoreID: fromStoreID,
				},
				ToReplica: roachpb.ReplicaDescriptor{
					NodeID:  toNodeID,
					StoreID: toStoreID,
				},
			}

			for _, messageType := range messageTypes {
				req := baseReq
				req.Message.Type = messageType

				if !transports[fromNodeID].MakeSender(func(err error, _ roachpb.ReplicaDescriptor) {
					if err != nil && !grpcutil.IsClosedConnection(err) {
						panic(err)
					}
				}).SendAsync(&req) {
					t.Errorf("unable to send %s from %d to %d", req.Message.Type, fromNodeID, toNodeID)
				}
				messageTypeCounts[toStoreID][req.Message.Type]++
			}
		}
	}

	// Read all the messages from the channels. Note that the transport
	// does not guarantee in-order delivery between independent
	// transports, so we just verify that the right number of messages
	// end up in each channel.
	for toStoreID := range storeNodes {
		func() {
			for len(messageTypeCounts[toStoreID]) > 0 {
				req := <-channels[toStoreID].ch
				if req.Message.To != uint64(toStoreID) {
					t.Errorf("got unexpected message %v on channel %d", req, toStoreID)
				}

				// Each MsgSnap should have a corresponding entry on the
				// sender's SnapshotStatusChan.
				if req.Message.Type == raftpb.MsgSnap {
					st := <-transports[req.FromReplica.NodeID].SnapshotStatusChan
					if st.Err != nil {
						t.Errorf("unexpected error sending snapshot: %s", st.Err)
					}
				}

				if typeCounts, ok := messageTypeCounts[toStoreID]; ok {
					if _, ok := typeCounts[req.Message.Type]; ok {
						typeCounts[req.Message.Type]--
						if typeCounts[req.Message.Type] == 0 {
							delete(typeCounts, req.Message.Type)
						}
					} else {
						t.Errorf("expected %v to have key %v, but it did not", typeCounts, req.Message.Type)
					}
				} else {
					t.Errorf("expected %v to have key %v, but it did not", messageTypeCounts, toStoreID)
				}
			}

			delete(messageTypeCounts, toStoreID)
		}()

		select {
		case req := <-channels[toStoreID].ch:
			t.Errorf("got unexpected message %v on channel %d", req, toStoreID)
		case <-time.After(100 * time.Millisecond):
		}
	}

	if len(messageTypeCounts) > 0 {
		t.Errorf("remaining messages expected: %v", messageTypeCounts)
	}

	// Real raft messages have different node/store/replica IDs.
	// Send a message from replica 2 (on store 3, node 2) to replica 1 (on store 5, node 3)
	fromStoreID := roachpb.StoreID(3)
	toStoreID := roachpb.StoreID(5)
	expReq := &storage.RaftMessageRequest{
		RangeID: 1,
		Message: raftpb.Message{
			Type: raftpb.MsgApp,
			From: uint64(replicaIDs[fromStoreID]),
			To:   uint64(replicaIDs[toStoreID]),
		},
		FromReplica: roachpb.ReplicaDescriptor{
			NodeID:    storeNodes[fromStoreID],
			StoreID:   fromStoreID,
			ReplicaID: replicaIDs[fromStoreID],
		},
		ToReplica: roachpb.ReplicaDescriptor{
			NodeID:    storeNodes[toStoreID],
			StoreID:   toStoreID,
			ReplicaID: replicaIDs[toStoreID],
		},
	}
	if !transports[storeNodes[fromStoreID]].MakeSender(func(err error, _ roachpb.ReplicaDescriptor) {
		if err != nil && !grpcutil.IsClosedConnection(err) {
			panic(err)
		}
	}).SendAsync(expReq) {
		t.Errorf("unable to send message from %d to %d", fromStoreID, toStoreID)
	}
	if req := <-channels[toStoreID].ch; !proto.Equal(req, expReq) {
		t.Errorf("got unexpected message %v on channel %d", req, toStoreID)
	}

	select {
	case req := <-channels[toStoreID].ch:
		t.Errorf("got unexpected message %v on channel %d", req, toStoreID)
	default:
	}
}
// TestInOrderDelivery verifies that for a given pair of nodes, raft
// messages are delivered in order.
func TestInOrderDelivery(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper := stop.NewStopper()
	defer stopper.Stop()
	nodeRPCContext := rpc.NewContext(testutils.NewNodeTestBaseContext(), nil, stopper)
	g := gossip.New(nodeRPCContext, nil, stopper)

	grpcServer := rpc.NewServer(nodeRPCContext)
	ln, err := netutil.ListenAndServeGRPC(stopper, grpcServer, util.TestAddr)
	if err != nil {
		t.Fatal(err)
	}

	const numMessages = 100
	nodeID := roachpb.NodeID(roachpb.NodeID(2))
	serverTransport := storage.NewRaftTransport(storage.GossipAddressResolver(g), grpcServer, nodeRPCContext)
	serverChannel := newChannelServer(numMessages, 10*time.Millisecond)
	serverTransport.Listen(roachpb.StoreID(nodeID), serverChannel.RaftMessage)
	addr := ln.Addr()
	// Have to set gossip.NodeID before calling gossip.AddInfoXXX.
	g.SetNodeID(nodeID)
	if err := g.AddInfoProto(gossip.MakeNodeIDKey(nodeID),
		&roachpb.NodeDescriptor{
			Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()),
		},
		time.Hour); err != nil {
		t.Fatal(err)
	}

	clientNodeID := roachpb.NodeID(2)
	clientTransport := storage.NewRaftTransport(storage.GossipAddressResolver(g), nil, nodeRPCContext)

	for i := 0; i < numMessages; i++ {
		req := &storage.RaftMessageRequest{
			RangeID: 1,
			Message: raftpb.Message{
				To:     uint64(nodeID),
				From:   uint64(clientNodeID),
				Commit: uint64(i),
			},
			ToReplica: roachpb.ReplicaDescriptor{
				NodeID:    nodeID,
				StoreID:   roachpb.StoreID(nodeID),
				ReplicaID: roachpb.ReplicaID(nodeID),
			},
			FromReplica: roachpb.ReplicaDescriptor{
				NodeID:    clientNodeID,
				StoreID:   roachpb.StoreID(clientNodeID),
				ReplicaID: roachpb.ReplicaID(clientNodeID),
			},
		}
		if !clientTransport.MakeSender(func(err error, _ roachpb.ReplicaDescriptor) {
			if err != nil && !grpcutil.IsClosedConnection(err) {
				panic(err)
			}
		}).SendAsync(req) {
			t.Errorf("failed to send message %d", i)
		}
	}

	for i := 0; i < numMessages; i++ {
		req := <-serverChannel.ch
		if req.Message.Commit != uint64(i) {
			t.Errorf("messages out of order: got %d while expecting %d", req.Message.Commit, i)
		}
	}
}
Beispiel #14
0
// gossip loops, sending deltas of the infostore and receiving deltas
// in turn. If an alternate is proposed on response, the client addr
// is modified and method returns for forwarding by caller.
func (c *client) gossip(g *Gossip, gossipClient GossipClient, stopper *stop.Stopper) error {
	// For un-bootstrapped node, g.is.NodeID is 0 when client start gossip,
	// so it's better to get nodeID from g.is every time.
	g.mu.Lock()
	addr := g.is.NodeAddr
	g.mu.Unlock()

	ctx := grpcutil.NewContextWithStopper(context.Background(), stopper)

	stream, err := gossipClient.Gossip(ctx)
	if err != nil {
		return err
	}
	defer func() {
		if err := stream.CloseSend(); err != nil {
			log.Error(err)
		}
	}()

	if err := c.requestGossip(g, addr, stream); err != nil {
		return err
	}

	sendGossipChan := make(chan struct{}, 1)

	// Register a callback for gossip updates.
	updateCallback := func(_ string, _ roachpb.Value) {
		select {
		case sendGossipChan <- struct{}{}:
		default:
		}
	}
	// Defer calling "undoer" callback returned from registration.
	defer g.RegisterCallback(".*", updateCallback)()

	// Loop in worker, sending updates from the info store.
	stopper.RunWorker(func() {
		for {
			select {
			case <-sendGossipChan:
				if err := c.sendGossip(g, addr, stream); err != nil {
					if !grpcutil.IsClosedConnection(err) {
						log.Error(err)
					}
					return
				}
			case <-stopper.ShouldStop():
				return
			}
		}
	})

	// Loop until stopper is signalled, or until either the gossip or RPC clients are closed.
	// The stopper's signal is propagated through the context attached to the stream.
	for {
		reply, err := stream.Recv()
		if err != nil {
			return err
		}
		if err := c.handleResponse(g, reply); err != nil {
			return err
		}
	}
}
Beispiel #15
0
// start dials the remote addr and commences gossip once connected. Upon exit,
// the client is sent on the disconnected channel. This method starts client
// processing in a goroutine and returns immediately.
func (c *client) start(
	g *Gossip,
	disconnected chan *client,
	rpcCtx *rpc.Context,
	stopper *stop.Stopper,
	nodeID roachpb.NodeID,
	breaker *circuit.Breaker,
) {
	stopper.RunWorker(func() {
		ctx, cancel := context.WithCancel(c.ctx)
		var wg sync.WaitGroup
		defer func() {
			// This closes the outgoing stream, causing any attempt to send or
			// receive to return an error.
			//
			// Note: it is still possible for incoming gossip to be processed after
			// this point.
			cancel()

			// The stream is closed, but there may still be some incoming gossip
			// being processed. Wait until that is complete to avoid racing the
			// client's removal against the discovery of its remote's node ID.
			wg.Wait()
			disconnected <- c
		}()

		consecFailures := breaker.ConsecFailures()
		var stream Gossip_GossipClient
		if err := breaker.Call(func() error {
			// Note: avoid using `grpc.WithBlock` here. This code is already
			// asynchronous from the caller's perspective, so the only effect of
			// `WithBlock` here is blocking shutdown - at the time of this writing,
			// that ends ups up making `kv` tests take twice as long.
			conn, err := rpcCtx.GRPCDial(c.addr.String())
			if err != nil {
				return err
			}
			if stream, err = NewGossipClient(conn).Gossip(ctx); err != nil {
				return err
			}
			return c.requestGossip(g, stream)
		}, 0); err != nil {
			if consecFailures == 0 {
				log.Warningf(ctx, "node %d: failed to start gossip client: %s", nodeID, err)
			}
			return
		}

		// Start gossiping.
		log.Infof(ctx, "node %d: started gossip client to %s", nodeID, c.addr)
		if err := c.gossip(ctx, g, stream, stopper, &wg); err != nil {
			if !grpcutil.IsClosedConnection(err) {
				g.mu.Lock()
				peerID := c.peerID
				g.mu.Unlock()
				if peerID != 0 {
					log.Infof(ctx, "node %d: closing client to node %d (%s): %s", nodeID, peerID, c.addr, err)
				} else {
					log.Infof(ctx, "node %d: closing client to %s: %s", nodeID, c.addr, err)
				}
			}
		}
	})
}