// createTestNode creates an rpc server using the specified address, // gossip instance, KV database and a node using the specified slice // of engines. The server, clock and node are returned. If gossipBS is // not nil, the gossip bootstrap address is set to gossipBS. func createTestNode(addr net.Addr, engines []engine.Engine, gossipBS net.Addr, t *testing.T) ( *rpc.Server, *hlc.Clock, *Node, *stop.Stopper) { ctx := storage.StoreContext{} stopper := stop.NewStopper() ctx.Clock = hlc.NewClock(hlc.UnixNano) nodeRPCContext := rpc.NewContext(nodeTestBaseContext, ctx.Clock, stopper) ctx.ScanInterval = 10 * time.Hour rpcServer := rpc.NewServer(addr, nodeRPCContext) if err := rpcServer.Start(); err != nil { t.Fatal(err) } g := gossip.New(nodeRPCContext, testContext.GossipInterval, testContext.GossipBootstrapResolvers) if gossipBS != nil { // Handle possibility of a :0 port specification. if gossipBS == addr { gossipBS = rpcServer.Addr() } g.SetResolvers([]resolver.Resolver{resolver.NewResolverFromAddress(gossipBS)}) g.Start(rpcServer, stopper) } ctx.Gossip = g sender := kv.NewDistSender(&kv.DistSenderContext{Clock: ctx.Clock}, g) ctx.DB = client.NewDB(sender) // TODO(bdarnell): arrange to have the transport closed. // (or attach LocalRPCTransport.Close to the stopper) ctx.Transport = multiraft.NewLocalRPCTransport(stopper) ctx.EventFeed = util.NewFeed(stopper) node := NewNode(ctx) return rpcServer, ctx.Clock, node, stopper }
// NewNetwork creates nodeCount gossip nodes. func NewNetwork(nodeCount int) *Network { clock := hlc.NewClock(hlc.UnixNano) log.Infof("simulating gossip network with %d nodes", nodeCount) n := &Network{ Nodes: []*Node{}, Stopper: stop.NewStopper(), } n.rpcContext = rpc.NewContext(&base.Context{Insecure: true}, clock, n.Stopper) var err error n.tlsConfig, err = n.rpcContext.GetServerTLSConfig() if err != nil { log.Fatal(err) } for i := 0; i < nodeCount; i++ { node, err := n.CreateNode() if err != nil { log.Fatal(err) } // Build a resolver for each instance or we'll get data races. r, err := resolver.NewResolverFromAddress(n.Nodes[0].Addr) if err != nil { log.Fatalf("bad gossip address %s: %s", n.Nodes[0].Addr, err) } node.Gossip.SetResolvers([]resolver.Resolver{r}) if err := n.StartNode(node); err != nil { log.Fatal(err) } } return n }
// NewNetwork creates nodeCount gossip nodes. func NewNetwork(nodeCount int, createResolvers bool) *Network { log.Infof(context.TODO(), "simulating gossip network with %d nodes", nodeCount) n := &Network{ Nodes: []*Node{}, Stopper: stop.NewStopper(), } n.rpcContext = rpc.NewContext(&base.Context{Insecure: true}, nil, n.Stopper) var err error n.tlsConfig, err = n.rpcContext.GetServerTLSConfig() if err != nil { log.Fatal(context.TODO(), err) } for i := 0; i < nodeCount; i++ { node, err := n.CreateNode() if err != nil { log.Fatal(context.TODO(), err) } // Build a resolver for each instance or we'll get data races. if createResolvers { r, err := resolver.NewResolverFromAddress(n.Nodes[0].Addr()) if err != nil { log.Fatalf(context.TODO(), "bad gossip address %s: %s", n.Nodes[0].Addr(), err) } node.Gossip.SetResolvers([]resolver.Resolver{r}) } } return n }
// NewNetwork creates nodeCount gossip nodes. The networkType should // be set to either "tcp" or "unix". The gossipInterval should be set // to a compressed simulation timescale, though large enough to give // the concurrent goroutines enough time to pass data back and forth // in order to yield accurate estimates of how old data actually ends // up being at the various nodes (e.g. DefaultTestGossipInterval). // TODO: This method should take `stopper` as an argument. func NewNetwork(nodeCount int, networkType string, gossipInterval time.Duration) *Network { clock := hlc.NewClock(hlc.UnixNano) log.Infof("simulating gossip network with %d nodes", nodeCount) stopper := stop.NewStopper() rpcContext := rpc.NewContext(&base.Context{Insecure: true}, clock, stopper) nodes := make([]*Node, nodeCount) for i := range nodes { server := rpc.NewServer(util.CreateTestAddr(networkType), rpcContext) if err := server.Start(); err != nil { log.Fatal(err) } nodes[i] = &Node{Server: server} } var numResolvers int if len(nodes) > 3 { numResolvers = 3 } else { numResolvers = len(nodes) } for i, leftNode := range nodes { // Build new resolvers for each instance or we'll get data races. var resolvers []resolver.Resolver for _, rightNode := range nodes[:numResolvers] { resolvers = append(resolvers, resolver.NewResolverFromAddress(rightNode.Server.Addr())) } gossipNode := gossip.New(rpcContext, gossipInterval, resolvers) addr := leftNode.Server.Addr() if err := gossipNode.SetNodeDescriptor(&roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(i + 1), Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }); err != nil { log.Fatal(err) } gossipNode.Start(leftNode.Server, stopper) stopper.AddCloser(leftNode.Server) leftNode.Gossip = gossipNode } return &Network{ Nodes: nodes, NetworkType: networkType, GossipInterval: gossipInterval, Stopper: stopper, } }
// NewNetwork creates nodeCount gossip nodes. The networkType should // be set to either "tcp" or "unix". func NewNetwork(nodeCount int, networkType string) *Network { clock := hlc.NewClock(hlc.UnixNano) log.Infof("simulating gossip network with %d nodes", nodeCount) stopper := stop.NewStopper() rpcContext := rpc.NewContext(&base.Context{Insecure: true}, clock, stopper) tlsConfig, err := rpcContext.GetServerTLSConfig() if err != nil { log.Fatal(err) } nodes := make([]*Node, nodeCount) for i := range nodes { server := rpc.NewServer(rpcContext) testAddr := util.CreateTestAddr(networkType) ln, err := util.ListenAndServe(stopper, server, testAddr, tlsConfig) if err != nil { log.Fatal(err) } nodes[i] = &Node{Server: server, Addr: ln.Addr()} } for i, leftNode := range nodes { // Build new resolvers for each instance or we'll get data races. resolvers := []resolver.Resolver{resolver.NewResolverFromAddress(nodes[0].Addr)} gossipNode := gossip.New(rpcContext, resolvers) addr := leftNode.Addr gossipNode.SetNodeID(roachpb.NodeID(i + 1)) if err := gossipNode.SetNodeDescriptor(&roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(i + 1), Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }); err != nil { log.Fatal(err) } if err := gossipNode.AddInfo(addr.String(), encoding.EncodeUint64(nil, 0), time.Hour); err != nil { log.Fatal(err) } gossipNode.Start(leftNode.Server, addr, stopper) gossipNode.EnableSimulationCycler(true) leftNode.Gossip = gossipNode } return &Network{ Nodes: nodes, NetworkType: networkType, Stopper: stopper, } }
// start starts the node by registering the storage instance for the // RPC service "Node" and initializing stores for each specified // engine. Launches periodic store gossiping in a goroutine. func (n *Node) start(rpcServer *rpc.Server, addr net.Addr, engines []engine.Engine, attrs roachpb.Attributes) error { n.initDescriptor(addr, attrs) // Start status monitor. n.status.StartMonitorFeed(n.ctx.EventFeed) // Initialize stores, including bootstrapping new ones. if err := n.initStores(engines, n.stopper); err != nil { if err == errNeedsBootstrap { // This node has no initialized stores and no way to connect to // an existing cluster, so we bootstrap it. clusterID, err := bootstrapCluster(engines) if err != nil { return err } log.Infof("**** cluster %s has been created", clusterID) log.Infof("**** add additional nodes by specifying --join=%s", addr) // Make sure we add the node as a resolver. selfResolver, err := resolver.NewResolverFromAddress(addr) if err != nil { return err } n.ctx.Gossip.SetResolvers([]resolver.Resolver{selfResolver}) // After bootstrapping, try again to initialize the stores. if err := n.initStores(engines, n.stopper); err != nil { return err } } else { return err } } n.startedAt = n.ctx.Clock.Now().WallTime // Initialize publisher for Node Events. This requires the NodeID, which is // initialized by initStores(); because of this, some Store initialization // events will precede the StartNodeEvent on the feed. n.feed = status.NewNodeEventFeed(n.Descriptor.NodeID, n.ctx.EventFeed) n.feed.StartNode(n.Descriptor, n.startedAt) n.startPublishStatuses(n.stopper) n.startGossip(n.stopper) // Register the RPC methods we support last as doing so allows RPCs to be // received which may access state initialized above without locks. const method = "Node.Batch" if err := rpcServer.Register(method, n.executeCmd, &roachpb.BatchRequest{}); err != nil { log.Fatalf("unable to register node service with RPC server: %s", err) } log.Infoc(n.context(), "Started node with %v engine(s) and attributes %v", engines, attrs.Attrs) return nil }
// createTestNode creates an rpc server using the specified address, // gossip instance, KV database and a node using the specified slice // of engines. The server, clock and node are returned. If gossipBS is // not nil, the gossip bootstrap address is set to gossipBS. func createTestNode(addr net.Addr, engines []engine.Engine, gossipBS net.Addr, t *testing.T) ( *grpc.Server, net.Addr, *hlc.Clock, *Node, *stop.Stopper) { ctx := storage.StoreContext{} stopper := stop.NewStopper() ctx.Clock = hlc.NewClock(hlc.UnixNano) nodeRPCContext := rpc.NewContext(nodeTestBaseContext, ctx.Clock, stopper) ctx.ScanInterval = 10 * time.Hour ctx.ConsistencyCheckInterval = 10 * time.Hour grpcServer := rpc.NewServer(nodeRPCContext) serverCtx := makeTestContext() g := gossip.New( context.Background(), nodeRPCContext, grpcServer, serverCtx.GossipBootstrapResolvers, stopper, metric.NewRegistry()) ln, err := netutil.ListenAndServeGRPC(stopper, grpcServer, addr) if err != nil { t.Fatal(err) } if gossipBS != nil { // Handle possibility of a :0 port specification. if gossipBS.Network() == addr.Network() && gossipBS.String() == addr.String() { gossipBS = ln.Addr() } r, err := resolver.NewResolverFromAddress(gossipBS) if err != nil { t.Fatalf("bad gossip address %s: %s", gossipBS, err) } g.SetResolvers([]resolver.Resolver{r}) g.Start(ln.Addr()) } ctx.Gossip = g retryOpts := base.DefaultRetryOptions() retryOpts.Closer = stopper.ShouldQuiesce() distSender := kv.NewDistSender(&kv.DistSenderConfig{ Clock: ctx.Clock, RPCContext: nodeRPCContext, RPCRetryOptions: &retryOpts, }, g) ctx.Ctx = tracing.WithTracer(context.Background(), tracing.NewTracer()) sender := kv.NewTxnCoordSender(ctx.Ctx, distSender, ctx.Clock, false, stopper, kv.MakeTxnMetrics()) ctx.DB = client.NewDB(sender) ctx.Transport = storage.NewDummyRaftTransport() node := NewNode(ctx, status.NewMetricsRecorder(ctx.Clock), metric.NewRegistry(), stopper, kv.MakeTxnMetrics(), sql.MakeEventLogger(nil)) roachpb.RegisterInternalServer(grpcServer, node) return grpcServer, ln.Addr(), ctx.Clock, node, stopper }
// createTestNode creates an rpc server using the specified address, // gossip instance, KV database and a node using the specified slice // of engines. The server, clock and node are returned. If gossipBS is // not nil, the gossip bootstrap address is set to gossipBS. func createTestNode(addr net.Addr, engines []engine.Engine, gossipBS net.Addr, t *testing.T) ( *rpc.Server, net.Addr, *hlc.Clock, *Node, *stop.Stopper) { ctx := storage.StoreContext{} stopper := stop.NewStopper() ctx.Clock = hlc.NewClock(hlc.UnixNano) nodeRPCContext := rpc.NewContext(nodeTestBaseContext, ctx.Clock, stopper) ctx.ScanInterval = 10 * time.Hour rpcServer := rpc.NewServer(nodeRPCContext) grpcServer := grpc.NewServer() tlsConfig, err := nodeRPCContext.GetServerTLSConfig() if err != nil { t.Fatal(err) } ln, err := util.ListenAndServe(stopper, grpcutil.GRPCHandlerFunc(grpcServer, rpcServer), addr, tlsConfig) if err != nil { t.Fatal(err) } g := gossip.New(nodeRPCContext, testContext.GossipBootstrapResolvers, stopper) if gossipBS != nil { // Handle possibility of a :0 port specification. if gossipBS.Network() == addr.Network() && gossipBS.String() == addr.String() { gossipBS = ln.Addr() } r, err := resolver.NewResolverFromAddress(gossipBS) if err != nil { t.Fatalf("bad gossip address %s: %s", gossipBS, err) } g.SetResolvers([]resolver.Resolver{r}) g.Start(grpcServer, ln.Addr()) } ctx.Gossip = g retryOpts := kv.GetDefaultDistSenderRetryOptions() retryOpts.Closer = stopper.ShouldDrain() distSender := kv.NewDistSender(&kv.DistSenderContext{ Clock: ctx.Clock, RPCContext: nodeRPCContext, RPCRetryOptions: &retryOpts, }, g) tracer := tracing.NewTracer() sender := kv.NewTxnCoordSender(distSender, ctx.Clock, false, tracer, stopper) ctx.DB = client.NewDB(sender) // TODO(bdarnell): arrange to have the transport closed. // (or attach LocalRPCTransport.Close to the stopper) ctx.Transport = storage.NewLocalRPCTransport(stopper) ctx.EventFeed = util.NewFeed(stopper) ctx.Tracer = tracer node := NewNode(ctx, metric.NewRegistry(), stopper, nil) return rpcServer, ln.Addr(), ctx.Clock, node, stopper }
// start starts the node by registering the storage instance for the // RPC service "Node" and initializing stores for each specified // engine. Launches periodic store gossiping in a goroutine. func (n *Node) start(addr net.Addr, engines []engine.Engine, attrs roachpb.Attributes) error { n.initDescriptor(addr, attrs) // Initialize stores, including bootstrapping new ones. if err := n.initStores(engines, n.stopper); err != nil { if err == errNeedsBootstrap { n.initialBoot = true // This node has no initialized stores and no way to connect to // an existing cluster, so we bootstrap it. clusterID, err := bootstrapCluster(engines, n.txnMetrics) if err != nil { return err } log.Infof("**** cluster %s has been created", clusterID) log.Infof("**** add additional nodes by specifying --join=%s", addr) // Make sure we add the node as a resolver. selfResolver, err := resolver.NewResolverFromAddress(addr) if err != nil { return err } n.ctx.Gossip.SetResolvers([]resolver.Resolver{selfResolver}) // After bootstrapping, try again to initialize the stores. if err := n.initStores(engines, n.stopper); err != nil { return err } } else { return err } } n.startedAt = n.ctx.Clock.Now().WallTime // Initialize the recorder with the NodeID, which is initialized by initStores(). n.recorder.NodeStarted(n.Descriptor, n.startedAt) n.startComputePeriodicMetrics(n.stopper) n.startGossip(n.stopper) // Record node started event. n.recordJoinEvent() log.Infoc(n.context(context.TODO()), "Started node with %v engine(s) and attributes %v", engines, attrs.Attrs) return nil }
// createTestNode creates an rpc server using the specified address, // gossip instance, KV database and a node using the specified slice // of engines. The server, clock and node are returned. If gossipBS is // not nil, the gossip bootstrap address is set to gossipBS. func createTestNode(addr net.Addr, engines []engine.Engine, gossipBS net.Addr, t *testing.T) ( *rpc.Server, net.Addr, *hlc.Clock, *Node, *stop.Stopper) { ctx := storage.StoreContext{} stopper := stop.NewStopper() ctx.Clock = hlc.NewClock(hlc.UnixNano) nodeRPCContext := rpc.NewContext(nodeTestBaseContext, ctx.Clock, stopper) ctx.ScanInterval = 10 * time.Hour rpcServer := rpc.NewServer(nodeRPCContext) tlsConfig, err := nodeRPCContext.GetServerTLSConfig() if err != nil { t.Fatal(err) } ln, err := util.ListenAndServe(stopper, rpcServer, addr, tlsConfig) if err != nil { t.Fatal(err) } g := gossip.New(nodeRPCContext, testContext.GossipBootstrapResolvers) if gossipBS != nil { // Handle possibility of a :0 port specification. if gossipBS == addr { gossipBS = ln.Addr() } r, err := resolver.NewResolverFromAddress(gossipBS) if err != nil { t.Fatalf("bad gossip address %s: %s", gossipBS, err) } g.SetResolvers([]resolver.Resolver{r}) g.Start(rpcServer, ln.Addr(), stopper) } ctx.Gossip = g sender := kv.NewDistSender(&kv.DistSenderContext{Clock: ctx.Clock, RPCContext: nodeRPCContext}, g) ctx.DB = client.NewDB(sender) // TODO(bdarnell): arrange to have the transport closed. // (or attach LocalRPCTransport.Close to the stopper) ctx.Transport = storage.NewLocalRPCTransport(stopper) ctx.EventFeed = util.NewFeed(stopper) node := NewNode(ctx, metric.NewRegistry(), stopper) return rpcServer, ln.Addr(), ctx.Clock, node, stopper }
// TestGossipStorage verifies that a gossip node can join the cluster // using the bootstrap hosts in a gossip.Storage object. func TestGossipStorage(t *testing.T) { defer leaktest.AfterTest(t)() network := simulation.NewNetwork(3) defer network.Stop() // Set storage for each of the nodes. addresses := make(unresolvedAddrSlice, len(network.Nodes)) stores := make([]*testStorage, len(network.Nodes)) for i, n := range network.Nodes { addresses[i] = util.MakeUnresolvedAddr(n.Addr.Network(), n.Addr.String()) stores[i] = new(testStorage) if err := n.Gossip.SetStorage(stores[i]); err != nil { t.Fatal(err) } } // Wait for the gossip network to connect. network.RunUntilFullyConnected() // Wait long enough for storage to get the expected number of addresses. util.SucceedsSoon(t, func() error { for _, p := range stores { if p.Len() != 2 { return util.Errorf("incorrect number of addresses: expected 2; got %d", p.Len()) } } return nil }) for i, p := range stores { if !p.isRead() { t.Errorf("%d: expected read from storage", i) } if !p.isWrite() { t.Errorf("%d: expected write from storage", i) } p.Lock() gotAddresses := unresolvedAddrSlice(p.info.Addresses) sort.Sort(gotAddresses) var expectedAddresses unresolvedAddrSlice for j, addr := range addresses { if i != j { // skip node's own address expectedAddresses = append(expectedAddresses, addr) } } sort.Sort(expectedAddresses) // Verify all gossip addresses are written to each persistent store. if !reflect.DeepEqual(gotAddresses, expectedAddresses) { t.Errorf("%d: expected addresses: %s, got: %s", i, expectedAddresses, gotAddresses) } p.Unlock() } // Create an unaffiliated gossip node with only itself as a resolver, // leaving it no way to reach the gossip network. node, err := network.CreateNode() if err != nil { t.Fatal(err) } node.Gossip.SetBootstrapInterval(1 * time.Millisecond) r, err := resolver.NewResolverFromAddress(node.Addr) if err != nil { t.Fatal(err) } node.Gossip.SetResolvers([]resolver.Resolver{r}) if err := network.StartNode(node); err != nil { t.Fatal(err) } // Wait for a bit to ensure no connection. select { case <-time.After(10 * time.Millisecond): // expected outcome... case <-node.Gossip.Connected: t.Fatal("unexpectedly connected to gossip") } // Give the new node storage with info established from a node // in the established network. var ts2 testStorage if err := stores[0].ReadBootstrapInfo(&ts2.info); err != nil { t.Fatal(err) } if err := node.Gossip.SetStorage(&ts2); err != nil { t.Fatal(err) } network.SimulateNetwork(func(cycle int, network *simulation.Network) bool { if cycle > 1000 { t.Fatal("failed to connect to gossip") } select { case <-node.Gossip.Connected: return false default: return true } }) }
// TestGossipStorage verifies that a gossip node can join the cluster // using the bootstrap hosts in a gossip.Storage object. func TestGossipStorage(t *testing.T) { defer leaktest.AfterTest(t) const numNodes = 3 network := simulation.NewNetwork(3) defer network.Stop() // Set storage for each of the nodes. stores := []*testStorage{} for _, n := range network.Nodes { tp := &testStorage{} stores = append(stores, tp) if err := n.Gossip.SetStorage(tp); err != nil { t.Fatal(err) } } // Wait for the gossip network to connect. network.RunUntilFullyConnected() for i, p := range stores { if !p.read { t.Errorf("%d: expected read from storage", i) } if !p.write { t.Errorf("%d: expected write from storage", i) } // Verify all gossip addresses are written to each persistent store. if len(p.info.Addresses) != 3 { t.Errorf("%d: expected 3 addresses, have: %s", i, p.info.Addresses) } } // Create an unaffiliated gossip node with only itself as a resolver, // leaving it no way to reach the gossip network. node, err := network.CreateNode() if err != nil { t.Fatal(err) } node.Gossip.SetBootstrapInterval(1 * time.Millisecond) r, err := resolver.NewResolverFromAddress(node.Addr) if err != nil { t.Fatal(err) } node.Gossip.SetResolvers([]resolver.Resolver{r}) if err := network.StartNode(node); err != nil { t.Fatal(err) } // Wait for a bit to ensure no connection. select { case <-time.After(10 * time.Millisecond): // expected outcome... case <-node.Gossip.Connected: t.Fatal("unexpectedly connected to gossip") } // Give the new node storage with info established from a node // in the established network. tp := &testStorage{ info: stores[0].info, } if err := node.Gossip.SetStorage(tp); err != nil { t.Fatal(err) } network.SimulateNetwork(func(cycle int, network *simulation.Network) bool { if cycle > 100 { t.Fatal("failed to connect to gossip") } select { case <-node.Gossip.Connected: return false default: return true } }) }