// start dials the remote addr and commences gossip once connected. Upon exit, // the client is sent on the disconnected channel. This method starts client // processing in a goroutine and returns immediately. func (c *client) start(g *Gossip, disconnected chan *client, ctx *rpc.Context, stopper *stop.Stopper) { stopper.RunWorker(func() { defer func() { disconnected <- c }() conn, err := ctx.GRPCDial(c.addr.String(), grpc.WithBlock()) if err != nil { log.Errorf("failed to dial: %v", err) return } // Start gossiping. if err := c.gossip(g, NewGossipClient(conn), stopper); err != nil { if !grpcutil.IsClosedConnection(err) { g.mu.Lock() peerID := c.peerID g.mu.Unlock() if peerID != 0 { log.Infof("closing client to node %d (%s): %s", peerID, c.addr, err) } else { log.Infof("closing client to %s: %s", c.addr, err) } } } }) }
// start dials the remote addr and commences gossip once connected. Upon exit, // the client is sent on the disconnected channel. This method starts client // processing in a goroutine and returns immediately. func (c *client) start(g *Gossip, disconnected chan *client, ctx *rpc.Context, stopper *stop.Stopper) { stopper.RunWorker(func() { defer func() { disconnected <- c }() // Note: avoid using `grpc.WithBlock` here. This code is already // asynchronous from the caller's perspective, so the only effect of // `WithBlock` here is blocking shutdown - at the time of this writing, // that ends ups up making `kv` tests take twice as long. conn, err := ctx.GRPCDial(c.addr.String()) if err != nil { log.Errorf("failed to dial: %v", err) return } // Start gossiping. if err := c.gossip(g, NewGossipClient(conn), stopper); err != nil { if !grpcutil.IsClosedConnection(err) { g.mu.Lock() peerID := c.peerID g.mu.Unlock() if peerID != 0 { log.Infof("closing client to node %d (%s): %s", peerID, c.addr, err) } else { log.Infof("closing client to %s: %s", c.addr, err) } } } }) }
func (ctx *Context) removeConn(key string, conn *grpc.ClientConn) { if err := conn.Close(); err != nil && !grpcutil.IsClosedConnection(err) { if log.V(1) { log.Errorf("failed to close client connection: %s", err) } } delete(ctx.conns.cache, key) }
func (ctx *Context) removeConnLocked(key string, meta *connMeta) { if log.V(1) { log.Infof(ctx.masterCtx, "closing %s", key) } if conn := meta.conn; conn != nil { if err := conn.Close(); err != nil && !grpcutil.IsClosedConnection(err) { if log.V(1) { log.Errorf(ctx.masterCtx, "failed to close client connection: %s", err) } } } delete(ctx.conns.cache, key) }
// GRPCDial calls grpc.Dial with the options appropriate for the context. func (ctx *Context) GRPCDial(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) { ctx.conns.Lock() meta, ok := ctx.conns.cache[target] if !ok { meta = &connMeta{} ctx.conns.cache[target] = meta } ctx.conns.Unlock() meta.Do(func() { dialOpt, err := ctx.GRPCDialOption() if err != nil { meta.err = err return } dialOpts := make([]grpc.DialOption, 0, 1+len(opts)) dialOpts = append(dialOpts, dialOpt) dialOpts = append(dialOpts, opts...) if log.V(1) { log.Infof(ctx.masterCtx, "dialing %s", target) } meta.conn, meta.err = grpc.DialContext(ctx.masterCtx, target, dialOpts...) if meta.err == nil { if err := ctx.Stopper.RunTask(func() { ctx.Stopper.RunWorker(func() { err := ctx.runHeartbeat(meta.conn, target) if err != nil && !grpcutil.IsClosedConnection(err) { log.Error(ctx.masterCtx, err) } ctx.removeConn(target, meta) }) }); err != nil { meta.err = err // removeConn and ctx's cleanup worker both lock ctx.conns. However, // to avoid racing with meta's initialization, the cleanup worker // blocks on meta.Do while holding ctx.conns. Invoke removeConn // asynchronously to avoid deadlock. go ctx.removeConn(target, meta) } } }) return meta.conn, meta.err }
// GRPCDial calls grpc.Dial with the options appropriate for the context. func (ctx *Context) GRPCDial(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) { ctx.conns.Lock() defer ctx.conns.Unlock() if meta, ok := ctx.conns.cache[target]; ok { return meta.conn, nil } var dialOpt grpc.DialOption if ctx.Insecure { dialOpt = grpc.WithInsecure() } else { tlsConfig, err := ctx.GetClientTLSConfig() if err != nil { return nil, err } dialOpt = grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig)) } dialOpts := make([]grpc.DialOption, 0, 1+len(opts)) dialOpts = append(dialOpts, dialOpt) dialOpts = append(dialOpts, opts...) conn, err := grpc.Dial(target, dialOpts...) if err == nil { if ctx.conns.cache == nil { ctx.conns.cache = make(map[string]connMeta) } ctx.conns.cache[target] = connMeta{conn: conn} if ctx.Stopper.RunTask(func() { ctx.Stopper.RunWorker(func() { if err := ctx.runHeartbeat(conn, target); err != nil && !grpcutil.IsClosedConnection(err) { log.Error(err) } ctx.conns.Lock() ctx.removeConn(target, conn) ctx.conns.Unlock() }) }) != nil { ctx.removeConn(target, conn) } } return conn, err }
// GRPCDial calls grpc.Dial with the options appropriate for the context. func (ctx *Context) GRPCDial(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) { /* _, file, ln, _ := runtime.Caller(1) _, file2, ln2, _ := runtime.Caller(2) fmt.Printf("GRPCDial: %s:%d %s:%d\n", file, ln, file2, ln2) */ ctx.conns.Lock() defer ctx.conns.Unlock() if conn, ok := ctx.conns.cache[target]; ok { return conn, nil } var dialOpt grpc.DialOption if ctx.Insecure { dialOpt = grpc.WithInsecure() } else { tlsConfig, err := ctx.GetClientTLSConfig() if err != nil { return nil, err } dialOpt = grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig)) } conn, err := grpc.Dial(target, append(opts, dialOpt, grpc.WithTimeout(base.NetworkTimeout))...) if err == nil { if ctx.conns.cache == nil { ctx.conns.cache = make(map[string]*grpc.ClientConn) } ctx.conns.cache[target] = conn ctx.Stopper.RunWorker(func() { if err := ctx.runHeartbeat(conn, target); err != nil && !grpcutil.IsClosedConnection(err) { log.Error(err) } ctx.conns.Lock() ctx.removeConn(target, conn) ctx.conns.Unlock() }) } return conn, err }
// start dials the remote addr and commences gossip once connected. // Upon exit, the client is sent on the disconnected channel. // If the client experienced an error, its err field will // be set. This method starts client processing in a goroutine and // returns immediately. func (c *client) start(g *Gossip, disconnected chan *client, ctx *rpc.Context, stopper *stop.Stopper) { stopper.RunWorker(func() { defer func() { disconnected <- c }() var dialOpt grpc.DialOption if ctx.Insecure { dialOpt = grpc.WithInsecure() } else { tlsConfig, err := ctx.GetClientTLSConfig() if err != nil { log.Error(err) return } dialOpt = grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig)) } conn, err := grpc.Dial(c.addr.String(), dialOpt) if err != nil { log.Errorf("failed to dial: %v", err) return } defer func() { if err := conn.Close(); err != nil { log.Error(err) } }() c.rpcClient = NewGossipClient(conn) // Start gossiping. if err := c.gossip(g, stopper); err != nil { if !grpcutil.IsClosedConnection(err) { if c.peerID != 0 { log.Infof("closing client to node %d (%s): %s", c.peerID, c.addr, err) } else { log.Infof("closing client to %s: %s", c.addr, err) } } } }) }
// Send a message. Returns false if the message was dropped. func (rttc *raftTransportTestContext) Send( from, to roachpb.ReplicaDescriptor, rangeID roachpb.RangeID, msg raftpb.Message, ) bool { msg.To = uint64(to.ReplicaID) msg.From = uint64(from.ReplicaID) req := &storage.RaftMessageRequest{ RangeID: rangeID, Message: msg, ToReplica: to, FromReplica: from, } sender := rttc.transports[from.NodeID].MakeSender( func(err error, _ roachpb.ReplicaDescriptor) { if err != nil && !grpcutil.IsClosedConnection(err) && !testutils.IsError(err, channelServerBrokenRangeMessage) { rttc.t.Fatal(err) } }) return sender.SendAsync(req) }
// GRPCDial calls grpc.Dial with the options appropriate for the context. func (ctx *Context) GRPCDial(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) { ctx.conns.Lock() defer ctx.conns.Unlock() if meta, ok := ctx.conns.cache[target]; ok { return meta.conn, nil } dialOpt, err := ctx.GRPCDialOption() if err != nil { return nil, err } dialOpts := make([]grpc.DialOption, 0, 1+len(opts)) dialOpts = append(dialOpts, dialOpt) dialOpts = append(dialOpts, opts...) if log.V(1) { log.Infof(context.TODO(), "dialing %s", target) } conn, err := grpc.Dial(target, dialOpts...) if err == nil { ctx.conns.cache[target] = connMeta{conn: conn} if ctx.Stopper.RunTask(func() { ctx.Stopper.RunWorker(func() { err := ctx.runHeartbeat(conn, target) if err != nil && !grpcutil.IsClosedConnection(err) { log.Error(context.TODO(), err) } ctx.conns.Lock() ctx.removeConn(target, conn) ctx.conns.Unlock() }) }) != nil { ctx.removeConn(target, conn) } } return conn, err }
func (s *server) gossipSender(argsPtr **Request, senderFn func(*Response) error) { s.mu.Lock() defer s.mu.Unlock() reply := new(Response) for { if !s.stopper.RunTask(func() { args := *argsPtr delta := s.is.delta(args.Nodes) if infoCount := len(delta); infoCount > 0 { if log.V(1) { log.Infof("node %d returned %d info(s) to node %d", s.is.NodeID, infoCount, args.NodeID) } *reply = Response{ NodeID: s.is.NodeID, Nodes: s.is.getNodes(), Delta: delta, } s.mu.Unlock() err := senderFn(reply) s.mu.Lock() if err != nil { if !grpcutil.IsClosedConnection(err) { log.Error(err) } return } s.sent += infoCount } }) { return } s.ready.Wait() } }
func TestSendAndReceive(t *testing.T) { defer leaktest.AfterTest(t)() rttc := newRaftTransportTestContext(t) defer rttc.Stop() // Create several servers, each of which has two stores (A raft // node ID addresses a store). Node 1 has stores 1 and 2, node 2 has // stores 3 and 4, etc. // // We suppose that range 1 is replicated across the odd-numbered // stores in reverse order to ensure that the various IDs are not // equal: replica 1 is store 5, replica 2 is store 3, and replica 3 // is store 1. const numNodes = 3 const storesPerNode = 2 nextNodeID := roachpb.NodeID(2) nextStoreID := roachpb.StoreID(2) // Per-node state. transports := map[roachpb.NodeID]*storage.RaftTransport{} // Per-store state. storeNodes := map[roachpb.StoreID]roachpb.NodeID{} channels := map[roachpb.StoreID]channelServer{} replicaIDs := map[roachpb.StoreID]roachpb.ReplicaID{ 1: 3, 3: 2, 5: 1, } messageTypes := []raftpb.MessageType{ raftpb.MsgSnap, raftpb.MsgHeartbeat, } for nodeIndex := 0; nodeIndex < numNodes; nodeIndex++ { nodeID := nextNodeID nextNodeID++ transports[nodeID] = rttc.AddNode(nodeID) // This channel is normally unbuffered, but it is also normally serviced by // the raft goroutine. Since we don't have that goroutine in this test, we // must buffer the channel to prevent snapshots from blocking while we // iterate through the recipients in an order that may differ from the // sending order. sendersPerNode := storesPerNode recipientsPerSender := numNodes * storesPerNode outboundSnapshotsPerNode := sendersPerNode * recipientsPerSender transports[nodeID].SnapshotStatusChan = make(chan storage.RaftSnapshotStatus, outboundSnapshotsPerNode) for storeIndex := 0; storeIndex < storesPerNode; storeIndex++ { storeID := nextStoreID nextStoreID++ storeNodes[storeID] = nodeID channels[storeID] = rttc.ListenStore(nodeID, storeID) } } messageTypeCounts := make(map[roachpb.StoreID]map[raftpb.MessageType]int) // Each store sends one snapshot and one heartbeat to each store, including // itself. for toStoreID, toNodeID := range storeNodes { if _, ok := messageTypeCounts[toStoreID]; !ok { messageTypeCounts[toStoreID] = make(map[raftpb.MessageType]int) } for fromStoreID, fromNodeID := range storeNodes { baseReq := storage.RaftMessageRequest{ RangeID: 1, Message: raftpb.Message{ From: uint64(fromStoreID), To: uint64(toStoreID), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: fromNodeID, StoreID: fromStoreID, }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: toNodeID, StoreID: toStoreID, }, } for _, messageType := range messageTypes { req := baseReq req.Message.Type = messageType if !transports[fromNodeID].MakeSender(func(err error, _ roachpb.ReplicaDescriptor) { if err != nil && !grpcutil.IsClosedConnection(err) { panic(err) } }).SendAsync(&req) { t.Errorf("unable to send %s from %d to %d", req.Message.Type, fromNodeID, toNodeID) } messageTypeCounts[toStoreID][req.Message.Type]++ } } } // Read all the messages from the channels. Note that the transport // does not guarantee in-order delivery between independent // transports, so we just verify that the right number of messages // end up in each channel. for toStoreID := range storeNodes { func() { for len(messageTypeCounts[toStoreID]) > 0 { req := <-channels[toStoreID].ch if req.Message.To != uint64(toStoreID) { t.Errorf("got unexpected message %v on channel %d", req, toStoreID) } // Each MsgSnap should have a corresponding entry on the // sender's SnapshotStatusChan. if req.Message.Type == raftpb.MsgSnap { st := <-transports[req.FromReplica.NodeID].SnapshotStatusChan if st.Err != nil { t.Errorf("unexpected error sending snapshot: %s", st.Err) } } if typeCounts, ok := messageTypeCounts[toStoreID]; ok { if _, ok := typeCounts[req.Message.Type]; ok { typeCounts[req.Message.Type]-- if typeCounts[req.Message.Type] == 0 { delete(typeCounts, req.Message.Type) } } else { t.Errorf("expected %v to have key %v, but it did not", typeCounts, req.Message.Type) } } else { t.Errorf("expected %v to have key %v, but it did not", messageTypeCounts, toStoreID) } } delete(messageTypeCounts, toStoreID) }() select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %v on channel %d", req, toStoreID) case <-time.After(100 * time.Millisecond): } } if len(messageTypeCounts) > 0 { t.Errorf("remaining messages expected: %v", messageTypeCounts) } // Real raft messages have different node/store/replica IDs. // Send a message from replica 2 (on store 3, node 2) to replica 1 (on store 5, node 3) fromStoreID := roachpb.StoreID(3) toStoreID := roachpb.StoreID(5) expReq := &storage.RaftMessageRequest{ RangeID: 1, Message: raftpb.Message{ Type: raftpb.MsgApp, From: uint64(replicaIDs[fromStoreID]), To: uint64(replicaIDs[toStoreID]), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[fromStoreID], StoreID: fromStoreID, ReplicaID: replicaIDs[fromStoreID], }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: storeNodes[toStoreID], StoreID: toStoreID, ReplicaID: replicaIDs[toStoreID], }, } if !transports[storeNodes[fromStoreID]].MakeSender(func(err error, _ roachpb.ReplicaDescriptor) { if err != nil && !grpcutil.IsClosedConnection(err) { panic(err) } }).SendAsync(expReq) { t.Errorf("unable to send message from %d to %d", fromStoreID, toStoreID) } if req := <-channels[toStoreID].ch; !proto.Equal(req, expReq) { t.Errorf("got unexpected message %v on channel %d", req, toStoreID) } select { case req := <-channels[toStoreID].ch: t.Errorf("got unexpected message %v on channel %d", req, toStoreID) default: } }
// TestInOrderDelivery verifies that for a given pair of nodes, raft // messages are delivered in order. func TestInOrderDelivery(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() nodeRPCContext := rpc.NewContext(testutils.NewNodeTestBaseContext(), nil, stopper) g := gossip.New(nodeRPCContext, nil, stopper) grpcServer := rpc.NewServer(nodeRPCContext) ln, err := netutil.ListenAndServeGRPC(stopper, grpcServer, util.TestAddr) if err != nil { t.Fatal(err) } const numMessages = 100 nodeID := roachpb.NodeID(roachpb.NodeID(2)) serverTransport := storage.NewRaftTransport(storage.GossipAddressResolver(g), grpcServer, nodeRPCContext) serverChannel := newChannelServer(numMessages, 10*time.Millisecond) serverTransport.Listen(roachpb.StoreID(nodeID), serverChannel.RaftMessage) addr := ln.Addr() // Have to set gossip.NodeID before calling gossip.AddInfoXXX. g.SetNodeID(nodeID) if err := g.AddInfoProto(gossip.MakeNodeIDKey(nodeID), &roachpb.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { t.Fatal(err) } clientNodeID := roachpb.NodeID(2) clientTransport := storage.NewRaftTransport(storage.GossipAddressResolver(g), nil, nodeRPCContext) for i := 0; i < numMessages; i++ { req := &storage.RaftMessageRequest{ RangeID: 1, Message: raftpb.Message{ To: uint64(nodeID), From: uint64(clientNodeID), Commit: uint64(i), }, ToReplica: roachpb.ReplicaDescriptor{ NodeID: nodeID, StoreID: roachpb.StoreID(nodeID), ReplicaID: roachpb.ReplicaID(nodeID), }, FromReplica: roachpb.ReplicaDescriptor{ NodeID: clientNodeID, StoreID: roachpb.StoreID(clientNodeID), ReplicaID: roachpb.ReplicaID(clientNodeID), }, } if !clientTransport.MakeSender(func(err error, _ roachpb.ReplicaDescriptor) { if err != nil && !grpcutil.IsClosedConnection(err) { panic(err) } }).SendAsync(req) { t.Errorf("failed to send message %d", i) } } for i := 0; i < numMessages; i++ { req := <-serverChannel.ch if req.Message.Commit != uint64(i) { t.Errorf("messages out of order: got %d while expecting %d", req.Message.Commit, i) } } }
// gossip loops, sending deltas of the infostore and receiving deltas // in turn. If an alternate is proposed on response, the client addr // is modified and method returns for forwarding by caller. func (c *client) gossip(g *Gossip, gossipClient GossipClient, stopper *stop.Stopper) error { // For un-bootstrapped node, g.is.NodeID is 0 when client start gossip, // so it's better to get nodeID from g.is every time. g.mu.Lock() addr := g.is.NodeAddr g.mu.Unlock() ctx := grpcutil.NewContextWithStopper(context.Background(), stopper) stream, err := gossipClient.Gossip(ctx) if err != nil { return err } defer func() { if err := stream.CloseSend(); err != nil { log.Error(err) } }() if err := c.requestGossip(g, addr, stream); err != nil { return err } sendGossipChan := make(chan struct{}, 1) // Register a callback for gossip updates. updateCallback := func(_ string, _ roachpb.Value) { select { case sendGossipChan <- struct{}{}: default: } } // Defer calling "undoer" callback returned from registration. defer g.RegisterCallback(".*", updateCallback)() // Loop in worker, sending updates from the info store. stopper.RunWorker(func() { for { select { case <-sendGossipChan: if err := c.sendGossip(g, addr, stream); err != nil { if !grpcutil.IsClosedConnection(err) { log.Error(err) } return } case <-stopper.ShouldStop(): return } } }) // Loop until stopper is signalled, or until either the gossip or RPC clients are closed. // The stopper's signal is propagated through the context attached to the stream. for { reply, err := stream.Recv() if err != nil { return err } if err := c.handleResponse(g, reply); err != nil { return err } } }
// start dials the remote addr and commences gossip once connected. Upon exit, // the client is sent on the disconnected channel. This method starts client // processing in a goroutine and returns immediately. func (c *client) start( g *Gossip, disconnected chan *client, rpcCtx *rpc.Context, stopper *stop.Stopper, nodeID roachpb.NodeID, breaker *circuit.Breaker, ) { stopper.RunWorker(func() { ctx, cancel := context.WithCancel(c.ctx) var wg sync.WaitGroup defer func() { // This closes the outgoing stream, causing any attempt to send or // receive to return an error. // // Note: it is still possible for incoming gossip to be processed after // this point. cancel() // The stream is closed, but there may still be some incoming gossip // being processed. Wait until that is complete to avoid racing the // client's removal against the discovery of its remote's node ID. wg.Wait() disconnected <- c }() consecFailures := breaker.ConsecFailures() var stream Gossip_GossipClient if err := breaker.Call(func() error { // Note: avoid using `grpc.WithBlock` here. This code is already // asynchronous from the caller's perspective, so the only effect of // `WithBlock` here is blocking shutdown - at the time of this writing, // that ends ups up making `kv` tests take twice as long. conn, err := rpcCtx.GRPCDial(c.addr.String()) if err != nil { return err } if stream, err = NewGossipClient(conn).Gossip(ctx); err != nil { return err } return c.requestGossip(g, stream) }, 0); err != nil { if consecFailures == 0 { log.Warningf(ctx, "node %d: failed to start gossip client: %s", nodeID, err) } return } // Start gossiping. log.Infof(ctx, "node %d: started gossip client to %s", nodeID, c.addr) if err := c.gossip(ctx, g, stream, stopper, &wg); err != nil { if !grpcutil.IsClosedConnection(err) { g.mu.Lock() peerID := c.peerID g.mu.Unlock() if peerID != 0 { log.Infof(ctx, "node %d: closing client to node %d (%s): %s", nodeID, peerID, c.addr, err) } else { log.Infof(ctx, "node %d: closing client to %s: %s", nodeID, c.addr, err) } } } }) }