// TestSendRPCRetry verifies that sendRPC failed on first address but succeed on // second address, the second reply should be successfully returned back. func TestSendRPCRetry(t *testing.T) { defer leaktest.AfterTest(t) g, s := makeTestGossip(t) defer s() g.SetNodeID(1) if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{NodeID: 1}); err != nil { t.Fatal(err) } // Fill RangeDescriptor with 2 replicas var descriptor = roachpb.RangeDescriptor{ RangeID: 1, StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("z"), } for i := 1; i <= 2; i++ { addr := util.MakeUnresolvedAddr("tcp", fmt.Sprintf("node%d", i)) nd := &roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(i), Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), } if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(i)), nd, time.Hour); err != nil { t.Fatal(err) } descriptor.Replicas = append(descriptor.Replicas, roachpb.ReplicaDescriptor{ NodeID: roachpb.NodeID(i), StoreID: roachpb.StoreID(i), }) } // Define our rpcSend stub which returns success on the second address. var testFn rpcSendFn = func(_ rpc.Options, method string, addrs []net.Addr, getArgs func(addr net.Addr) proto.Message, getReply func() proto.Message, _ *rpc.Context) ([]proto.Message, error) { if method == "Node.Batch" { // reply from first address failed _ = getReply() // reply from second address succeed batchReply := getReply().(*roachpb.BatchResponse) reply := &roachpb.ScanResponse{} batchReply.Add(reply) reply.Rows = append([]roachpb.KeyValue{}, roachpb.KeyValue{Key: roachpb.Key("b"), Value: roachpb.Value{}}) return []proto.Message{batchReply}, nil } return nil, util.Errorf("unexpected method %v", method) } ctx := &DistSenderContext{ RPCSend: testFn, RangeDescriptorDB: mockRangeDescriptorDB(func(_ roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { return []roachpb.RangeDescriptor{descriptor}, nil }), } ds := NewDistSender(ctx, g) scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("d"), 1) sr, err := client.SendWrapped(ds, nil, scan) if err != nil { t.Fatal(err) } if l := len(sr.(*roachpb.ScanResponse).Rows); l != 1 { t.Fatalf("expected 1 row; got %d", l) } }
// gossip loops, sending deltas of the infostore and receiving deltas // in turn. If an alternate is proposed on response, the client addr // is modified and method returns for forwarding by caller. func (c *client) gossip(g *Gossip, stopper *stop.Stopper) error { // For un-bootstrapped node, g.is.NodeID is 0 when client start gossip, // so it's better to get nodeID from g.is every time. g.mu.Lock() addr := util.MakeUnresolvedAddr(g.is.NodeAddr.Network(), g.is.NodeAddr.String()) g.mu.Unlock() lAddr := util.MakeUnresolvedAddr(c.rpcClient.LocalAddr().Network(), c.rpcClient.LocalAddr().String()) done := make(chan *netrpc.Call, 10) c.getGossip(g, addr, lAddr, done) // Register a callback for gossip updates. updateCallback := func(_ string, _ roachpb.Value) { c.sendGossip(g, addr, lAddr, done) } // Defer calling "undoer" callback returned from registration. defer g.RegisterCallback(".*", updateCallback)() // Loop until stopper is signalled, or until either the gossip or // RPC clients are closed. getGossip is a hanging get, returning // results only when the remote server has new gossip information to // share. sendGossip is sent to the remote server when this node has // new gossip information to share with the server. // // Nodes "pull" gossip in order to guarantee that they're connected // to the sentinel and not too distant from other nodes in the // network. The also "push" their own gossip which guarantees that // the sentinel node will contain their info, and therefore every // node connected to the sentinel. Just pushing or just pulling // wouldn't guarantee a fully connected network. for { select { case call := <-done: if err := c.handleGossip(g, call); err != nil { return err } req := call.Args.(*Request) // If this was from a gossip pull request, fetch again. if req.Delta == nil { c.getGossip(g, addr, lAddr, done) } else { // Otherwise, it's a gossip push request; set sendingGossip // flag false and maybe send more gossip if there have been // additional updates. g.mu.Lock() c.sendingGossip = false g.mu.Unlock() c.sendGossip(g, addr, lAddr, done) } case <-c.rpcClient.Closed: return util.Errorf("client closed") case <-c.closer: return nil case <-stopper.ShouldStop(): return nil } } }
// TestSendRPCRetry verifies that sendRPC failed on first address but succeed on // second address, the second reply should be successfully returned back. func TestSendRPCRetry(t *testing.T) { defer leaktest.AfterTest(t) g, s := makeTestGossip(t) defer s() if err := g.SetNodeDescriptor(&proto.NodeDescriptor{NodeID: 1}); err != nil { t.Fatal(err) } // Fill RangeDescriptor with 2 replicas var descriptor = proto.RangeDescriptor{ RangeID: 1, StartKey: proto.Key("a"), EndKey: proto.Key("z"), } for i := 1; i <= 2; i++ { addr := util.MakeUnresolvedAddr("tcp", fmt.Sprintf("node%d", i)) nd := &proto.NodeDescriptor{ NodeID: proto.NodeID(i), Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), } if err := g.AddInfoProto(gossip.MakeNodeIDKey(proto.NodeID(i)), nd, time.Hour); err != nil { t.Fatal(err) } descriptor.Replicas = append(descriptor.Replicas, proto.Replica{ NodeID: proto.NodeID(i), StoreID: proto.StoreID(i), }) } // Define our rpcSend stub which returns success on the second address. var testFn rpcSendFn = func(_ rpc.Options, method string, addrs []net.Addr, getArgs func(addr net.Addr) gogoproto.Message, getReply func() gogoproto.Message, _ *rpc.Context) ([]gogoproto.Message, error) { if method == "Node.Scan" { // reply from first address failed _ = getReply() // reply from second address succeed reply := getReply() reply.(*proto.ScanResponse).Rows = append([]proto.KeyValue{}, proto.KeyValue{Key: proto.Key("b"), Value: proto.Value{}}) return []gogoproto.Message{reply}, nil } return nil, util.Errorf("Not expected method %v", method) } ctx := &DistSenderContext{ rpcSend: testFn, rangeDescriptorDB: mockRangeDescriptorDB(func(_ proto.Key, _ lookupOptions) ([]proto.RangeDescriptor, error) { return []proto.RangeDescriptor{descriptor}, nil }), } ds := NewDistSender(ctx, g) call := proto.ScanCall(proto.Key("a"), proto.Key("d"), 1) sr := call.Reply.(*proto.ScanResponse) ds.Send(context.Background(), call) if err := sr.GoError(); err != nil { t.Fatal(err) } if l := len(sr.Rows); l != 1 { t.Fatalf("expected 1 row; got %d", l) } }
func checkUpdateFails(t *testing.T, network, oldAddrString, newAddrString string) { oldAddr := util.MakeUnresolvedAddr(network, oldAddrString) newAddr := util.MakeUnresolvedAddr(network, newAddrString) retAddr, err := updatedAddr(oldAddr, newAddr) if err == nil { t.Fatalf("updatedAddr(%v, %v) should have failed; instead returned %v", oldAddr, newAddr, retAddr) } }
// Start runs the RPC and HTTP servers, starts the gossip instance (if // selfBootstrap is true, uses the rpc server's address as the gossip // bootstrap), and starts the node using the supplied engines slice. func (s *Server) Start(selfBootstrap bool) error { tlsConfig, err := s.ctx.GetServerTLSConfig() if err != nil { return err } unresolvedAddr := util.MakeUnresolvedAddr("tcp", s.ctx.Addr) ln, err := util.ListenAndServe(s.stopper, s, unresolvedAddr, tlsConfig) if err != nil { return err } s.listener = ln addr := ln.Addr() addrStr := addr.String() // Handle self-bootstrapping case for a single node. if selfBootstrap { selfResolver, err := resolver.NewResolver(&s.ctx.Context, addrStr) if err != nil { return err } s.gossip.SetResolvers([]resolver.Resolver{selfResolver}) } s.gossip.Start(s.rpc, addr, s.stopper) if err := s.node.start(s.rpc, addr, s.ctx.Engines, s.ctx.NodeAttributes); err != nil { return err } // Begin recording runtime statistics. runtime := status.NewRuntimeStatRecorder(s.node.Descriptor.NodeID, s.clock) s.tsDB.PollSource(runtime, s.ctx.MetricsFrequency, ts.Resolution10s, s.stopper) // Begin recording time series data collected by the status monitor. s.recorder = status.NewNodeStatusRecorder(s.node.status, s.clock) s.tsDB.PollSource(s.recorder, s.ctx.MetricsFrequency, ts.Resolution10s, s.stopper) // Begin recording status summaries. s.startWriteSummaries() s.sqlServer.SetNodeID(s.node.Descriptor.NodeID) s.status = newStatusServer(s.db, s.gossip, s.metaRegistry, s.ctx) log.Infof("starting %s server at %s", s.ctx.HTTPRequestScheme(), addr) s.initHTTP() // TODO(tamird): pick a port here host, _, err := net.SplitHostPort(addrStr) if err != nil { return err } return s.pgServer.Start(util.MakeUnresolvedAddr("tcp", net.JoinHostPort(host, "0"))) }
// Start runs the RPC and HTTP servers, starts the gossip instance (if // selfBootstrap is true, uses the rpc server's address as the gossip // bootstrap), and starts the node using the supplied engines slice. func (s *Server) Start(selfBootstrap bool) error { tlsConfig, err := s.ctx.GetServerTLSConfig() if err != nil { return err } unresolvedAddr := util.MakeUnresolvedAddr("tcp", s.ctx.Addr) ln, err := util.ListenAndServe(s.stopper, s, unresolvedAddr, tlsConfig) if err != nil { return err } s.listener = ln addr := ln.Addr() addrStr := addr.String() s.rpcContext.SetLocalServer(s.rpc, addrStr) // Handle self-bootstrapping case for a single node. if selfBootstrap { selfResolver, err := resolver.NewResolver(&s.ctx.Context, addrStr) if err != nil { return err } s.gossip.SetResolvers([]resolver.Resolver{selfResolver}) } s.gossip.Start(s.rpc, addr) if err := s.node.start(s.rpc, addr, s.ctx.Engines, s.ctx.NodeAttributes); err != nil { return err } // Begin recording runtime statistics. runtime := status.NewRuntimeStatRecorder(s.node.Descriptor.NodeID, s.clock) s.tsDB.PollSource(runtime, s.ctx.MetricsFrequency, ts.Resolution10s, s.stopper) // Begin recording time series data collected by the status monitor. s.recorder = status.NewNodeStatusRecorder(s.node.status, s.clock) s.tsDB.PollSource(s.recorder, s.ctx.MetricsFrequency, ts.Resolution10s, s.stopper) // Begin recording status summaries. s.startWriteSummaries() s.sqlServer.SetNodeID(s.node.Descriptor.NodeID) // Create and start the schema change manager only after a NodeID // has been assigned. s.schemaChangeManager = sql.NewSchemaChangeManager(*s.db, s.gossip, s.leaseMgr) s.schemaChangeManager.Start(s.stopper) s.status = newStatusServer(s.db, s.gossip, s.metaRegistry, s.ctx) log.Infof("starting %s server at %s", s.ctx.HTTPRequestScheme(), addr) s.initHTTP() return s.pgServer.Start(util.MakeUnresolvedAddr("tcp", s.ctx.PGAddr)) }
// TestSendRPCRetry verifies that sendRPC failed on first address but succeed on // second address, the second reply should be successfully returned back. func TestSendRPCRetry(t *testing.T) { defer leaktest.AfterTest(t) g, s := makeTestGossip(t) defer s() g.SetNodeID(1) if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{NodeID: 1}); err != nil { t.Fatal(err) } // Fill RangeDescriptor with 2 replicas var descriptor = roachpb.RangeDescriptor{ RangeID: 1, StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("z"), } for i := 1; i <= 2; i++ { addr := util.MakeUnresolvedAddr("tcp", fmt.Sprintf("node%d", i)) nd := &roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(i), Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), } if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(i)), nd, time.Hour); err != nil { t.Fatal(err) } descriptor.Replicas = append(descriptor.Replicas, roachpb.ReplicaDescriptor{ NodeID: roachpb.NodeID(i), StoreID: roachpb.StoreID(i), }) } var testFn rpcSendFn = func(_ SendOptions, _ ReplicaSlice, args roachpb.BatchRequest, _ *rpc.Context) (proto.Message, error) { batchReply := &roachpb.BatchResponse{} reply := &roachpb.ScanResponse{} batchReply.Add(reply) reply.Rows = append([]roachpb.KeyValue{}, roachpb.KeyValue{Key: roachpb.Key("b"), Value: roachpb.Value{}}) return batchReply, nil } ctx := &DistSenderContext{ RPCSend: testFn, RangeDescriptorDB: mockRangeDescriptorDB(func(_ roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { return []roachpb.RangeDescriptor{descriptor}, nil }), } ds := NewDistSender(ctx, g) scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("d"), 1) sr, err := client.SendWrapped(ds, nil, scan) if err != nil { t.Fatal(err) } if l := len(sr.(*roachpb.ScanResponse).Rows); l != 1 { t.Fatalf("expected 1 row; got %d", l) } }
// TestStoresGossipStorageReadLatest verifies that the latest // bootstrap info from multiple stores is returned on Read. func TestStoresGossipStorageReadLatest(t *testing.T) { defer leaktest.AfterTest(t)() manual, stores, ls, stopper := createStores(2, t) defer stopper.Stop() ls.AddStore(stores[0]) // Set clock to 1. manual.Set(1) // Add a fake address and write. var bi gossip.BootstrapInfo bi.Addresses = append(bi.Addresses, util.MakeUnresolvedAddr("tcp", "127.0.0.1:8001")) if err := ls.WriteBootstrapInfo(&bi); err != nil { t.Fatal(err) } // Now remove store 0 and add store 1. ls.RemoveStore(stores[0]) ls.AddStore(stores[1]) // Increment clock, add another address and write. manual.Increment(1) bi.Addresses = append(bi.Addresses, util.MakeUnresolvedAddr("tcp", "127.0.0.1:8002")) if err := ls.WriteBootstrapInfo(&bi); err != nil { t.Fatal(err) } // Create a new stores object to freshly read. Should get latest // version from store 1. manual.Increment(1) ls2 := NewStores(ls.clock) ls2.AddStore(stores[0]) ls2.AddStore(stores[1]) var verifyBI gossip.BootstrapInfo if err := ls2.ReadBootstrapInfo(&verifyBI); err != nil { t.Fatal(err) } if !reflect.DeepEqual(bi, verifyBI) { t.Errorf("bootstrap info %+v not equal to expected %+v", verifyBI, bi) } // Verify that stores[0], which had old info, was updated with // latest bootstrap info during the read. ls3 := NewStores(ls.clock) ls3.AddStore(stores[0]) verifyBI.Reset() if err := ls2.ReadBootstrapInfo(&verifyBI); err != nil { t.Fatal(err) } if !reflect.DeepEqual(bi, verifyBI) { t.Errorf("bootstrap info %+v not equal to expected %+v", verifyBI, bi) } }
// TestGossipStorageCleanup verifies that bad resolvers are purged // from the bootstrap info after gossip has successfully connected. func TestGossipStorageCleanup(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() const numNodes = 3 network := simulation.NewNetwork(stopper, numNodes, false) const notReachableAddr = "localhost:0" const invalidAddr = "10.0.0.1000:3333333" // Set storage for each of the nodes. addresses := make(unresolvedAddrSlice, len(network.Nodes)) stores := make([]testStorage, len(network.Nodes)) for i, n := range network.Nodes { addresses[i] = util.MakeUnresolvedAddr(n.Addr().Network(), n.Addr().String()) // Pre-add an invalid address to each gossip storage. if err := stores[i].WriteBootstrapInfo(&gossip.BootstrapInfo{ Addresses: []util.UnresolvedAddr{ util.MakeUnresolvedAddr("tcp", network.Nodes[(i+1)%numNodes].Addr().String()), // node i+1 address util.MakeUnresolvedAddr("tcp", notReachableAddr), // unreachable address util.MakeUnresolvedAddr("tcp", invalidAddr), // invalid address }, }); err != nil { t.Fatal(err) } if err := n.Gossip.SetStorage(&stores[i]); err != nil { t.Fatal(err) } n.Gossip.SetStallInterval(1 * time.Millisecond) n.Gossip.SetBootstrapInterval(1 * time.Millisecond) } // Wait for the gossip network to connect. network.RunUntilFullyConnected() // Wait long enough for storage to get the expected number of // addresses and no pending cleanups. util.SucceedsSoon(t, func() error { for i := range stores { p := &stores[i] if expected, actual := len(network.Nodes)-1 /* -1 is ourself */, p.Len(); expected != actual { return errors.Errorf("expected %v, got %v (info: %#v)", expected, actual, p.Info().Addresses) } for _, addr := range p.Info().Addresses { if addr.String() == invalidAddr { return errors.Errorf("node %d still needs bootstrap cleanup", i) } } } return nil }) }
func checkUpdateMatches(t *testing.T, network, oldAddrString, newAddrString, expAddrString string) { oldAddr := util.MakeUnresolvedAddr(network, oldAddrString) newAddr := util.MakeUnresolvedAddr(network, newAddrString) expAddr := util.MakeUnresolvedAddr(network, expAddrString) retAddr, err := updatedAddr(oldAddr, newAddr) if err != nil { t.Fatalf("updatedAddr failed on %v, %v: %v", oldAddr, newAddr, err) } if retAddr.String() != expAddrString { t.Fatalf("updatedAddr(%v, %v) was %s; expected %s", oldAddr, newAddr, retAddr, expAddr) } }
// startGossip creates local and remote gossip instances. // Both remote and local instances launch the gossip service. func startGossip(t *testing.T) (local, remote *Gossip, stopper *stop.Stopper) { stopper = stop.NewStopper() lclock := hlc.NewClock(hlc.UnixNano) lRPCContext := rpc.NewContext(&base.Context{Insecure: true}, lclock, stopper) laddr := util.CreateTestAddr("tcp") lserver := rpc.NewServer(lRPCContext) lTLSConfig, err := lRPCContext.GetServerTLSConfig() if err != nil { t.Fatal(err) } lln, err := util.ListenAndServe(stopper, lserver, laddr, lTLSConfig) if err != nil { t.Fatal(err) } local = New(lRPCContext, TestBootstrap) local.SetNodeID(1) if err := local.SetNodeDescriptor(&roachpb.NodeDescriptor{ NodeID: 1, Address: util.MakeUnresolvedAddr(laddr.Network(), laddr.String()), }); err != nil { t.Fatal(err) } rclock := hlc.NewClock(hlc.UnixNano) rRPCContext := rpc.NewContext(&base.Context{Insecure: true}, rclock, stopper) raddr := util.CreateTestAddr("tcp") rserver := rpc.NewServer(rRPCContext) rTLSConfig, err := rRPCContext.GetServerTLSConfig() if err != nil { t.Fatal(err) } rln, err := util.ListenAndServe(stopper, rserver, raddr, rTLSConfig) if err != nil { t.Fatal(err) } remote = New(rRPCContext, TestBootstrap) remote.SetNodeID(2) if err := remote.SetNodeDescriptor(&roachpb.NodeDescriptor{ NodeID: 2, Address: util.MakeUnresolvedAddr(raddr.Network(), raddr.String()), }); err != nil { t.Fatal(err) } local.start(lserver, lln.Addr(), stopper) remote.start(rserver, rln.Addr(), stopper) time.Sleep(time.Millisecond) return }
// start initializes the infostore with the rpc server address and // then begins processing connecting clients in an infinite select // loop via goroutine. Periodically, clients connected and awaiting // the next round of gossip are awoken via the conditional variable. func (s *server) start(addr net.Addr) { s.mu.Lock() defer s.mu.Unlock() s.mu.is.NodeAddr = util.MakeUnresolvedAddr(addr.Network(), addr.String()) broadcast := func() { // Close the old ready and open a new one. This will broadcast to all // receivers and setup a fresh channel to replace the closed one. s.mu.Lock() defer s.mu.Unlock() ready := make(chan struct{}) close(s.mu.ready) s.mu.ready = ready } unregister := s.mu.is.registerCallback(".*", func(_ string, _ roachpb.Value) { broadcast() }) s.stopper.RunWorker(func() { <-s.stopper.ShouldQuiesce() s.mu.Lock() unregister() s.mu.Unlock() broadcast() }) }
// startFakeServerGossip creates local gossip instances and remote faked gossip instance. // The remote gossip instance launches its faked gossip service just for // check the client message. func startFakeServerGossip(t *testing.T) (local *Gossip, remote *fakeGossipServer, stopper *stop.Stopper) { lclock := hlc.NewClock(hlc.UnixNano) stopper = stop.NewStopper() lRPCContext := rpc.NewContext(&base.Context{Insecure: true}, lclock, stopper) laddr := util.CreateTestAddr("tcp") lserver := rpc.NewServer(laddr, lRPCContext) if err := lserver.Start(); err != nil { t.Fatal(err) } local = New(lRPCContext, TestBootstrap) local.start(lserver, stopper) rclock := hlc.NewClock(hlc.UnixNano) raddr := util.CreateTestAddr("tcp") rRPCContext := rpc.NewContext(&base.Context{Insecure: true}, rclock, stopper) rserver := rpc.NewServer(raddr, rRPCContext) if err := rserver.Start(); err != nil { t.Fatal(err) } remote, err := newFakeGossipServer(rserver, stopper) if err != nil { t.Fatal(err) } addr := rserver.Addr() remote.nodeAddr = util.MakeUnresolvedAddr(addr.Network(), addr.String()) time.Sleep(time.Millisecond) return }
// start initializes the infostore with the rpc server address and // then begins processing connecting clients in an infinite select // loop via goroutine. Periodically, clients connected and awaiting // the next round of gossip are awoken via the conditional variable. func (s *server) start(rpcServer *rpc.Server, stopper *stop.Stopper) { addr := rpcServer.Addr() s.is.NodeAddr = util.MakeUnresolvedAddr(addr.Network(), addr.String()) if err := rpcServer.Register("Gossip.Gossip", s.Gossip, &Request{}); err != nil { log.Fatalf("unable to register gossip service with RPC server: %s", err) } rpcServer.AddCloseCallback(s.onClose) updateCallback := func(_ string, _ roachpb.Value) { // Wakeup all pending clients. s.ready.Broadcast() } unregister := s.is.registerCallback(".*", updateCallback) stopper.RunWorker(func() { // Periodically wakeup blocked client gossip requests. for { select { case <-stopper.ShouldStop(): s.stop(unregister) return } } }) }
func makeTestGossip(t *testing.T) (*gossip.Gossip, func()) { n := simulation.NewNetwork(1, "tcp", gossip.TestInterval) g := n.Nodes[0].Gossip permConfig := &config.PermConfig{ Read: []string{""}, Write: []string{""}, } configMap, err := config.NewPrefixConfigMap([]*config.PrefixConfig{ {proto.KeyMin, nil, permConfig}, }) if err != nil { t.Fatalf("failed to make prefix config map, err: %s", err.Error()) } if err := g.AddInfo(gossip.KeySentinel, "cluster1", time.Hour); err != nil { t.Fatal(err) } if err := g.AddInfo(gossip.KeyConfigPermission, configMap, time.Hour); err != nil { t.Fatal(err) } if err := g.AddInfo(gossip.KeyFirstRangeDescriptor, testRangeDescriptor, time.Hour); err != nil { t.Fatal(err) } nodeIDKey := gossip.MakeNodeIDKey(1) if err := g.AddInfo(nodeIDKey, &proto.NodeDescriptor{ NodeID: 1, Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()), Attrs: proto.Attributes{Attrs: []string{"attr1", "attr2"}}, }, time.Hour); err != nil { t.Fatal(err) } return g, n.Stop }
// startGossip creates and starts a gossip instance. func startGossip(nodeID roachpb.NodeID, stopper *stop.Stopper, t *testing.T) *Gossip { clock := hlc.NewClock(hlc.UnixNano) rpcContext := rpc.NewContext(&base.Context{Insecure: true}, clock, stopper) addr := util.CreateTestAddr("tcp") server := rpc.NewServer(rpcContext) tlsConfig, err := rpcContext.GetServerTLSConfig() if err != nil { t.Fatal(err) } ln, err := util.ListenAndServe(stopper, server, addr, tlsConfig) if err != nil { t.Fatal(err) } g := New(rpcContext, TestBootstrap, stopper) g.SetNodeID(nodeID) if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{ NodeID: nodeID, Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }); err != nil { t.Fatal(err) } g.start(server, ln.Addr()) time.Sleep(time.Millisecond) return g }
// start initializes the infostore with the rpc server address and // then begins processing connecting clients in an infinite select // loop via goroutine. Periodically, clients connected and awaiting // the next round of gossip are awoken via the conditional variable. func (s *server) start(grpcServer *grpc.Server, addr net.Addr) { s.mu.Lock() s.is.NodeAddr = util.MakeUnresolvedAddr(addr.Network(), addr.String()) s.mu.Unlock() RegisterGossipServer(grpcServer, s) broadcast := func() { ready := make(chan struct{}) s.mu.Lock() close(s.ready) s.ready = ready s.mu.Unlock() } unregister := s.is.registerCallback(".*", func(_ string, _ roachpb.Value) { broadcast() }) s.stopper.RunWorker(func() { <-s.stopper.ShouldDrain() s.mu.Lock() unregister() s.mu.Unlock() broadcast() }) }
// startFakeServerGossips creates local gossip instances and remote // faked gossip instance. The remote gossip instance launches its // faked gossip service just for check the client message. func startFakeServerGossips(t *testing.T) (local *Gossip, remote *fakeGossipServer, stopper *stop.Stopper) { stopper = stop.NewStopper() lRPCContext := rpc.NewContext(&base.Context{Insecure: true}, nil, stopper) lserver := rpc.NewServer(lRPCContext) lln, err := util.ListenAndServeGRPC(stopper, lserver, util.TestAddr) if err != nil { t.Fatal(err) } local = New(lRPCContext, nil, stopper) local.start(lserver, lln.Addr()) rRPCContext := rpc.NewContext(&base.Context{Insecure: true}, nil, stopper) rserver := rpc.NewServer(rRPCContext) rln, err := util.ListenAndServeGRPC(stopper, rserver, util.TestAddr) if err != nil { t.Fatal(err) } remote = newFakeGossipServer(rserver, stopper) addr := rln.Addr() remote.nodeAddr = util.MakeUnresolvedAddr(addr.Network(), addr.String()) return }
// getNodeIDAddressLocked looks up the address of the node by ID. The mutex is // assumed held by the caller. This method is called externally via // GetNodeIDAddress or internally when looking up a "distant" node address to // connect directly to. func (g *Gossip) getNodeIDAddressLocked(nodeID proto.NodeID) (net.Addr, error) { nd, err := g.getNodeDescriptorLocked(nodeID) if err != nil { return nil, err } return util.MakeUnresolvedAddr(nd.Address.Network, nd.Address.Address), nil }
func TestUpdateOffsetOnHeartbeat(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() defer stopper.Stop() ctx := newNodeTestContext(nil, stopper) _, ln := newTestServer(t, ctx, false) // Create a client and set its remote offset. On first heartbeat, // it will update the server's remote clocks map. We create the // client manually here to allow us to set the remote offset // before the first heartbeat. tlsConfig, err := ctx.GetClientTLSConfig() if err != nil { t.Fatal(err) } serverAddr := ln.Addr() client := &Client{ Closed: make(chan struct{}), addr: util.MakeUnresolvedAddr(serverAddr.Network(), serverAddr.String()), tlsConfig: tlsConfig, clock: ctx.localClock, remoteClocks: ctx.RemoteClocks, remoteOffset: RemoteOffset{ Offset: 10, Uncertainty: 5, MeasuredAt: 20, }, } if err = client.connect(); err != nil { t.Fatal(err) } ctx.RemoteClocks.mu.Lock() remoteAddr := client.RemoteAddr().String() o := ctx.RemoteClocks.offsets[remoteAddr] ctx.RemoteClocks.mu.Unlock() expServerOffset := RemoteOffset{Offset: -10, Uncertainty: 5, MeasuredAt: 20} if proto.Equal(&o, &expServerOffset) { t.Errorf("expected updated offset %v, instead %v", expServerOffset, o) } ln.Close() // Remove the offset from RemoteClocks and close the connection from the // remote end. A new offset for the server should not be added to the clock // monitor. ctx.RemoteClocks.mu.Lock() delete(ctx.RemoteClocks.offsets, remoteAddr) ln.Close() ctx.RemoteClocks.mu.Unlock() ctx.RemoteClocks.mu.Lock() if offset, ok := ctx.RemoteClocks.offsets[remoteAddr]; ok { t.Errorf("unexpected updated offset: %v", offset) } ctx.RemoteClocks.mu.Unlock() }
func makeReplicas(addrs ...net.Addr) ReplicaSlice { replicas := make(ReplicaSlice, len(addrs)) for i, addr := range addrs { replicas[i].NodeDesc = &roachpb.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), } } return replicas }
// GossipNode gossips the node's address, which is necessary before // any messages can be sent to it. Normally done automatically by // AddNode. func (rttc *raftTransportTestContext) GossipNode(nodeID roachpb.NodeID, addr net.Addr) { if err := rttc.gossip.AddInfoProto(gossip.MakeNodeIDKey(nodeID), &roachpb.NodeDescriptor{ Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }, time.Hour); err != nil { rttc.t.Fatal(err) } }
func resolveAddress(network, address string) (net.Addr, error) { if network == "tcp" { _, err := net.ResolveTCPAddr("tcp", address) if err != nil { return nil, err } return util.MakeUnresolvedAddr("tcp", address), nil } return nil, util.Errorf("unknown address type: %q", network) }
// TestClientCloseBeforeConnect verifies that the client goroutine // does not leak if the client is closed before connecting. func TestClientCloseBeforeConnect(t *testing.T) { defer leaktest.AfterTest(t) c := NewClient( util.MakeUnresolvedAddr("tcp", ":1337"), &Context{Stopper: stop.NewStopper()}, ) c.Close() }
// NewNetwork creates nodeCount gossip nodes. The networkType should // be set to either "tcp" or "unix". The gossipInterval should be set // to a compressed simulation timescale, though large enough to give // the concurrent goroutines enough time to pass data back and forth // in order to yield accurate estimates of how old data actually ends // up being at the various nodes (e.g. DefaultTestGossipInterval). // TODO: This method should take `stopper` as an argument. func NewNetwork(nodeCount int, networkType string, gossipInterval time.Duration) *Network { clock := hlc.NewClock(hlc.UnixNano) log.Infof("simulating gossip network with %d nodes", nodeCount) stopper := stop.NewStopper() rpcContext := rpc.NewContext(&base.Context{Insecure: true}, clock, stopper) nodes := make([]*Node, nodeCount) for i := range nodes { server := rpc.NewServer(util.CreateTestAddr(networkType), rpcContext) if err := server.Start(); err != nil { log.Fatal(err) } nodes[i] = &Node{Server: server} } var numResolvers int if len(nodes) > 3 { numResolvers = 3 } else { numResolvers = len(nodes) } for i, leftNode := range nodes { // Build new resolvers for each instance or we'll get data races. var resolvers []resolver.Resolver for _, rightNode := range nodes[:numResolvers] { resolvers = append(resolvers, resolver.NewResolverFromAddress(rightNode.Server.Addr())) } gossipNode := gossip.New(rpcContext, gossipInterval, resolvers) addr := leftNode.Server.Addr() if err := gossipNode.SetNodeDescriptor(&roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(i + 1), Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }); err != nil { log.Fatal(err) } gossipNode.Start(leftNode.Server, stopper) stopper.AddCloser(leftNode.Server) leftNode.Gossip = gossipNode } return &Network{ Nodes: nodes, NetworkType: networkType, GossipInterval: gossipInterval, Stopper: stopper, } }
// NewNetwork creates nodeCount gossip nodes. The networkType should // be set to either "tcp" or "unix". func NewNetwork(nodeCount int, networkType string) *Network { clock := hlc.NewClock(hlc.UnixNano) log.Infof("simulating gossip network with %d nodes", nodeCount) stopper := stop.NewStopper() rpcContext := rpc.NewContext(&base.Context{Insecure: true}, clock, stopper) tlsConfig, err := rpcContext.GetServerTLSConfig() if err != nil { log.Fatal(err) } nodes := make([]*Node, nodeCount) for i := range nodes { server := rpc.NewServer(rpcContext) testAddr := util.CreateTestAddr(networkType) ln, err := util.ListenAndServe(stopper, server, testAddr, tlsConfig) if err != nil { log.Fatal(err) } nodes[i] = &Node{Server: server, Addr: ln.Addr()} } for i, leftNode := range nodes { // Build new resolvers for each instance or we'll get data races. resolvers := []resolver.Resolver{resolver.NewResolverFromAddress(nodes[0].Addr)} gossipNode := gossip.New(rpcContext, resolvers) addr := leftNode.Addr gossipNode.SetNodeID(roachpb.NodeID(i + 1)) if err := gossipNode.SetNodeDescriptor(&roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(i + 1), Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }); err != nil { log.Fatal(err) } if err := gossipNode.AddInfo(addr.String(), encoding.EncodeUint64(nil, 0), time.Hour); err != nil { log.Fatal(err) } gossipNode.Start(leftNode.Server, addr, stopper) gossipNode.EnableSimulationCycler(true) leftNode.Gossip = gossipNode } return &Network{ Nodes: nodes, NetworkType: networkType, Stopper: stopper, } }
// NewClient returns a client RPC stub for the specified address // (usually a TCP host:port, but for testing may be a unix domain // socket). The process-wide client RPC cache is consulted first; if // the requested client is not present, it's created and the cache is // updated. Specify opts to fine tune client connection behavior or // nil to use defaults (i.e. indefinite retries with exponential // backoff). // // The Closed channel is closed if the client's Close() method is // invoked. func NewClient(addr net.Addr, context *Context) *Client { clientMu.Lock() defer clientMu.Unlock() unresolvedAddr := util.MakeUnresolvedAddr(addr.Network(), addr.String()) key := fmt.Sprintf("%s@%s", context.User, unresolvedAddr) if !context.DisableCache { if c, ok := clients[key]; ok { return c } } tlsConfig, err := context.GetClientTLSConfig() if err != nil { log.Fatal(err) } c := &Client{ closer: make(chan struct{}), Closed: make(chan struct{}), key: key, addr: unresolvedAddr, tlsConfig: tlsConfig, disableReconnects: context.DisableReconnects, clock: context.localClock, remoteClocks: context.RemoteClocks, } c.healthy.Store(make(chan struct{})) c.healthWaitTime = time.Now().Add(context.HealthWait) c.healthReceived = make(chan struct{}) if !context.DisableCache { clients[key] = c } retryOpts := clientRetryOptions retryOpts.Closer = context.Stopper.ShouldStop() context.Stopper.RunWorker(func() { c.runHeartbeat(retryOpts) close(c.Closed) if conn := c.internalConn(); conn != nil { conn.client.Close() } }) return c }
// SetResolvers initializes the set of gossip resolvers used to // find nodes to bootstrap the gossip network. func (g *Gossip) SetResolvers(resolvers []resolver.Resolver) { g.mu.Lock() defer g.mu.Unlock() // Start index at end because get next address loop logic increments as first step. g.resolverIdx = len(resolvers) - 1 g.resolvers = resolvers g.resolversTried = map[int]struct{}{} // Initialize the bootstrap info with addresses of specified resolvers. for _, r := range resolvers { addr := util.MakeUnresolvedAddr(r.Type(), r.Addr()) g.bootstrapInfo.Addresses = append(g.bootstrapInfo.Addresses, addr) } }
func TestOwnNodeCertain(t *testing.T) { defer leaktest.AfterTest(t)() g, s := makeTestGossip(t) defer s() const expNodeID = 42 nd := &roachpb.NodeDescriptor{ NodeID: expNodeID, Address: util.MakeUnresolvedAddr("tcp", "foobar:1234"), } g.ResetNodeID(nd.NodeID) if err := g.SetNodeDescriptor(nd); err != nil { t.Fatal(err) } if err := g.AddInfoProto(gossip.MakeNodeIDKey(expNodeID), nd, time.Hour); err != nil { t.Fatal(err) } act := make(map[roachpb.NodeID]roachpb.Timestamp) var testFn rpcSendFn = func(_ SendOptions, _ ReplicaSlice, ba roachpb.BatchRequest, _ *rpc.Context) (*roachpb.BatchResponse, error) { for k, v := range ba.Txn.ObservedTimestamps { act[k] = v } return ba.CreateReply(), nil } ctx := &DistSenderContext{ RPCSend: testFn, RangeDescriptorDB: mockRangeDescriptorDB(func(_ roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { return []roachpb.RangeDescriptor{testRangeDescriptor}, nil }), } expTS := roachpb.ZeroTimestamp.Add(1, 2) ds := NewDistSender(ctx, g) v := roachpb.MakeValueFromString("value") put := roachpb.NewPut(roachpb.Key("a"), v) if _, err := client.SendWrappedWith(ds, nil, roachpb.Header{ // MaxTimestamp is set very high so that all uncertainty updates have // effect. Txn: &roachpb.Transaction{OrigTimestamp: expTS, MaxTimestamp: roachpb.MaxTimestamp}, }, put); err != nil { t.Fatalf("put encountered error: %s", err) } exp := map[roachpb.NodeID]roachpb.Timestamp{ expNodeID: expTS, } if !reflect.DeepEqual(exp, act) { t.Fatalf("wanted %v, got %v", exp, act) } }
// startGossip creates local and remote gossip instances. // The remote gossip instance launches its gossip service. func startGossip(t *testing.T) (local, remote *Gossip, stopper *stop.Stopper) { lclock := hlc.NewClock(hlc.UnixNano) stopper = stop.NewStopper() lRPCContext := rpc.NewContext(nodeTestBaseContext, lclock, stopper) laddr := util.CreateTestAddr("tcp") lserver := rpc.NewServer(laddr, lRPCContext) if err := lserver.Start(); err != nil { t.Fatal(err) } local = New(lRPCContext, gossipInterval, TestBootstrap) if err := local.SetNodeDescriptor(&proto.NodeDescriptor{ NodeID: 1, Address: util.MakeUnresolvedAddr(laddr.Network(), laddr.String()), }); err != nil { t.Fatal(err) } rclock := hlc.NewClock(hlc.UnixNano) raddr := util.CreateTestAddr("tcp") rRPCContext := rpc.NewContext(nodeTestBaseContext, rclock, stopper) rserver := rpc.NewServer(raddr, rRPCContext) if err := rserver.Start(); err != nil { t.Fatal(err) } remote = New(rRPCContext, gossipInterval, TestBootstrap) if err := local.SetNodeDescriptor(&proto.NodeDescriptor{ NodeID: 2, Address: util.MakeUnresolvedAddr(raddr.Network(), raddr.String()), }); err != nil { t.Fatal(err) } local.start(lserver, stopper) remote.start(rserver, stopper) time.Sleep(time.Millisecond) return }