// AddNodeWithoutGossip registers a node with the cluster. Nodes must
// be added before they can be used in other methods of
// raftTransportTestContext. Unless you are testing the effects of
// delaying gossip, use AddNode instead.
func (rttc *raftTransportTestContext) AddNodeWithoutGossip(
	nodeID roachpb.NodeID,
) (*storage.RaftTransport, net.Addr) {
	grpcServer := rpc.NewServer(rttc.nodeRPCContext)
	ln, err := netutil.ListenAndServeGRPC(rttc.stopper, grpcServer, util.TestAddr)
	if err != nil {
		rttc.t.Fatal(err)
	}
	transport := storage.NewRaftTransport(storage.GossipAddressResolver(rttc.gossip),
		grpcServer, rttc.nodeRPCContext)
	rttc.transports[nodeID] = transport
	return transport, ln.Addr()
}
Beispiel #2
0
// NewServer creates a Server from a server.Context.
func NewServer(ctx *Context, stopper *stop.Stopper) (*Server, error) {
	if ctx == nil {
		return nil, util.Errorf("ctx must not be null")
	}

	if _, err := net.ResolveTCPAddr("tcp", ctx.Addr); err != nil {
		return nil, util.Errorf("unable to resolve RPC address %q: %v", ctx.Addr, err)
	}

	if ctx.Insecure {
		log.Warning("running in insecure mode, this is strongly discouraged. See --insecure and --certs.")
	}
	// Try loading the TLS configs before anything else.
	if _, err := ctx.GetServerTLSConfig(); err != nil {
		return nil, err
	}
	if _, err := ctx.GetClientTLSConfig(); err != nil {
		return nil, err
	}

	s := &Server{
		Tracer:  tracing.NewTracer(),
		ctx:     ctx,
		mux:     http.NewServeMux(),
		clock:   hlc.NewClock(hlc.UnixNano),
		stopper: stopper,
	}
	s.clock.SetMaxOffset(ctx.MaxOffset)

	s.rpcContext = crpc.NewContext(&ctx.Context, s.clock, stopper)
	stopper.RunWorker(func() {
		s.rpcContext.RemoteClocks.MonitorRemoteOffsets(stopper)
	})

	s.rpc = crpc.NewServer(s.rpcContext)

	s.gossip = gossip.New(s.rpcContext, s.ctx.GossipBootstrapResolvers, stopper)
	s.storePool = storage.NewStorePool(s.gossip, s.clock, ctx.TimeUntilStoreDead, stopper)

	feed := util.NewFeed(stopper)

	// A custom RetryOptions is created which uses stopper.ShouldDrain() as
	// the Closer. This prevents infinite retry loops from occurring during
	// graceful server shutdown
	//
	// Such a loop loop occurs with the DistSender attempts a connection to the
	// local server during shutdown, and receives an internal server error (HTTP
	// Code 5xx). This is the correct error for a server to return when it is
	// shutting down, and is normally retryable in a cluster environment.
	// However, on a single-node setup (such as a test), retries will never
	// succeed because the only server has been shut down; thus, thus the
	// DistSender needs to know that it should not retry in this situation.
	retryOpts := kv.GetDefaultDistSenderRetryOptions()
	retryOpts.Closer = stopper.ShouldDrain()
	ds := kv.NewDistSender(&kv.DistSenderContext{
		Clock:           s.clock,
		RPCContext:      s.rpcContext,
		RPCRetryOptions: &retryOpts,
	}, s.gossip)
	txnRegistry := metric.NewRegistry()
	txnMetrics := kv.NewTxnMetrics(txnRegistry)
	sender := kv.NewTxnCoordSender(ds, s.clock, ctx.Linearizable, s.Tracer, s.stopper, txnMetrics)
	s.db = client.NewDB(sender)

	s.grpc = grpc.NewServer()
	s.raftTransport = storage.NewRaftTransport(storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext)

	s.kvDB = kv.NewDBServer(&s.ctx.Context, sender, stopper)
	if err := s.kvDB.RegisterRPC(s.rpc); err != nil {
		return nil, err
	}

	s.leaseMgr = sql.NewLeaseManager(0, *s.db, s.clock)
	s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip)
	sqlRegistry := metric.NewRegistry()
	s.sqlExecutor = sql.NewExecutor(*s.db, s.gossip, s.leaseMgr, s.stopper, sqlRegistry)

	s.pgServer = pgwire.MakeServer(&s.ctx.Context, s.sqlExecutor, sqlRegistry)

	// TODO(bdarnell): make StoreConfig configurable.
	nCtx := storage.StoreContext{
		Clock:           s.clock,
		DB:              s.db,
		Gossip:          s.gossip,
		Transport:       s.raftTransport,
		ScanInterval:    s.ctx.ScanInterval,
		ScanMaxIdleTime: s.ctx.ScanMaxIdleTime,
		EventFeed:       feed,
		Tracer:          s.Tracer,
		StorePool:       s.storePool,
		SQLExecutor: sql.InternalExecutor{
			LeaseManager: s.leaseMgr,
		},
		LogRangeEvents: true,
		AllocatorOptions: storage.AllocatorOptions{
			AllowRebalance: true,
			Mode:           s.ctx.BalanceMode,
		},
	}

	s.recorder = status.NewMetricsRecorder(s.clock)
	s.recorder.AddNodeRegistry("sql.%s", sqlRegistry)
	s.recorder.AddNodeRegistry("txn.%s", txnRegistry)

	s.node = NewNode(nCtx, s.recorder, s.stopper, txnMetrics)
	s.admin = newAdminServer(s.db, s.stopper, s.sqlExecutor)
	s.tsDB = ts.NewDB(s.db)
	s.tsServer = ts.NewServer(s.tsDB)
	s.status = newStatusServer(s.db, s.gossip, s.recorder, s.ctx)

	return s, nil
}
func TestSendAndReceive(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper := stop.NewStopper()
	defer stopper.Stop()
	nodeRPCContext := rpc.NewContext(testutils.NewNodeTestBaseContext(), nil, stopper)
	g := gossip.New(nodeRPCContext, nil, stopper)
	g.SetNodeID(roachpb.NodeID(1))

	// Create several servers, each of which has two stores (A raft
	// node ID addresses a store). Node 1 has stores 1 and 2, node 2 has
	// stores 3 and 4, etc.
	//
	// We suppose that range 1 is replicated across the odd-numbered
	// stores in reverse order to ensure that the various IDs are not
	// equal: replica 1 is store 5, replica 2 is store 3, and replica 3
	// is store 1.
	const numNodes = 3
	const storesPerNode = 2
	nextNodeID := roachpb.NodeID(2)
	nextStoreID := roachpb.StoreID(2)

	// Per-node state.
	transports := map[roachpb.NodeID]*storage.RaftTransport{}

	// Per-store state.
	storeNodes := map[roachpb.StoreID]roachpb.NodeID{}
	channels := map[roachpb.StoreID]channelServer{}
	replicaIDs := map[roachpb.StoreID]roachpb.ReplicaID{
		1: 3,
		3: 2,
		5: 1,
	}

	messageTypes := []raftpb.MessageType{
		raftpb.MsgSnap,
		raftpb.MsgHeartbeat,
	}

	for nodeIndex := 0; nodeIndex < numNodes; nodeIndex++ {
		nodeID := nextNodeID
		nextNodeID++
		grpcServer := rpc.NewServer(nodeRPCContext)
		ln, err := util.ListenAndServeGRPC(stopper, grpcServer, util.TestAddr)
		if err != nil {
			t.Fatal(err)
		}

		addr := ln.Addr()
		// Have to call g.SetNodeID before call g.AddInfo.
		g.ResetNodeID(roachpb.NodeID(nodeID))
		if err := g.AddInfoProto(gossip.MakeNodeIDKey(nodeID),
			&roachpb.NodeDescriptor{
				Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()),
			},
			time.Hour); err != nil {
			t.Fatal(err)
		}

		transport := storage.NewRaftTransport(storage.GossipAddressResolver(g), grpcServer, nodeRPCContext)
		transports[nodeID] = transport

		for storeIndex := 0; storeIndex < storesPerNode; storeIndex++ {
			storeID := nextStoreID
			nextStoreID++

			storeNodes[storeID] = nodeID

			channel := newChannelServer(numNodes*storesPerNode*len(messageTypes), 0)
			transport.Listen(storeID, channel.RaftMessage)
			channels[storeID] = channel
		}
	}

	messageTypeCounts := make(map[roachpb.StoreID]map[raftpb.MessageType]int)

	// Each store sends one snapshot and one heartbeat to each store, including
	// itself.
	for toStoreID, toNodeID := range storeNodes {
		if _, ok := messageTypeCounts[toStoreID]; !ok {
			messageTypeCounts[toStoreID] = make(map[raftpb.MessageType]int)
		}

		for fromStoreID, fromNodeID := range storeNodes {
			baseReq := storage.RaftMessageRequest{
				Message: raftpb.Message{
					From: uint64(fromStoreID),
					To:   uint64(toStoreID),
				},
				FromReplica: roachpb.ReplicaDescriptor{
					NodeID:  fromNodeID,
					StoreID: fromStoreID,
				},
				ToReplica: roachpb.ReplicaDescriptor{
					NodeID:  toNodeID,
					StoreID: toStoreID,
				},
			}

			for _, messageType := range messageTypes {
				req := baseReq
				req.Message.Type = messageType

				if err := transports[fromNodeID].Send(&req); err != nil {
					t.Errorf("unable to send %s from %d to %d: %s", req.Message.Type, fromNodeID, toNodeID, err)
				}
				messageTypeCounts[toStoreID][req.Message.Type]++
			}
		}
	}

	// Read all the messages from the channels. Note that the transport
	// does not guarantee in-order delivery between independent
	// transports, so we just verify that the right number of messages
	// end up in each channel.
	for toStoreID := range storeNodes {
		func() {
			for len(messageTypeCounts[toStoreID]) > 0 {
				req := <-channels[toStoreID].ch
				if req.Message.To != uint64(toStoreID) {
					t.Errorf("got unexpected message %v on channel %d", req, toStoreID)
				}

				if typeCounts, ok := messageTypeCounts[toStoreID]; ok {
					if _, ok := typeCounts[req.Message.Type]; ok {
						typeCounts[req.Message.Type]--
						if typeCounts[req.Message.Type] == 0 {
							delete(typeCounts, req.Message.Type)
						}
					} else {
						t.Errorf("expected %v to have key %v, but it did not", typeCounts, req.Message.Type)
					}
				} else {
					t.Errorf("expected %v to have key %v, but it did not", messageTypeCounts, toStoreID)
				}
			}

			delete(messageTypeCounts, toStoreID)
		}()

		select {
		case req := <-channels[toStoreID].ch:
			t.Errorf("got unexpected message %v on channel %d", req, toStoreID)
		case <-time.After(100 * time.Millisecond):
		}
	}

	if len(messageTypeCounts) > 0 {
		t.Errorf("remaining messages expected: %v", messageTypeCounts)
	}

	// Real raft messages have different node/store/replica IDs.
	// Send a message from replica 2 (on store 3, node 2) to replica 1 (on store 5, node 3)
	fromStoreID := roachpb.StoreID(3)
	toStoreID := roachpb.StoreID(5)
	expReq := &storage.RaftMessageRequest{
		GroupID: 1,
		Message: raftpb.Message{
			Type: raftpb.MsgApp,
			From: uint64(replicaIDs[fromStoreID]),
			To:   uint64(replicaIDs[toStoreID]),
		},
		FromReplica: roachpb.ReplicaDescriptor{
			NodeID:    storeNodes[fromStoreID],
			StoreID:   fromStoreID,
			ReplicaID: replicaIDs[fromStoreID],
		},
		ToReplica: roachpb.ReplicaDescriptor{
			NodeID:    storeNodes[toStoreID],
			StoreID:   toStoreID,
			ReplicaID: replicaIDs[toStoreID],
		},
	}
	if err := transports[storeNodes[fromStoreID]].Send(expReq); err != nil {
		t.Errorf("unable to send message from %d to %d: %s", fromStoreID, toStoreID, err)
	}
	if req := <-channels[toStoreID].ch; !proto.Equal(req, expReq) {
		t.Errorf("got unexpected message %v on channel %d", req, toStoreID)
	}

	select {
	case req := <-channels[toStoreID].ch:
		t.Errorf("got unexpected message %v on channel %d", req, toStoreID)
	default:
	}
}
// TestInOrderDelivery verifies that for a given pair of nodes, raft
// messages are delivered in order.
func TestInOrderDelivery(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper := stop.NewStopper()
	defer stopper.Stop()
	nodeRPCContext := rpc.NewContext(testutils.NewNodeTestBaseContext(), nil, stopper)
	g := gossip.New(nodeRPCContext, nil, stopper)

	grpcServer := rpc.NewServer(nodeRPCContext)
	ln, err := util.ListenAndServeGRPC(stopper, grpcServer, util.TestAddr)
	if err != nil {
		t.Fatal(err)
	}

	const numMessages = 100
	nodeID := roachpb.NodeID(roachpb.NodeID(2))
	serverTransport := storage.NewRaftTransport(storage.GossipAddressResolver(g), grpcServer, nodeRPCContext)
	serverChannel := newChannelServer(numMessages, 10*time.Millisecond)
	serverTransport.Listen(roachpb.StoreID(nodeID), serverChannel.RaftMessage)
	addr := ln.Addr()
	// Have to set gossip.NodeID before call gossip.AddInofXXX.
	g.SetNodeID(nodeID)
	if err := g.AddInfoProto(gossip.MakeNodeIDKey(nodeID),
		&roachpb.NodeDescriptor{
			Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()),
		},
		time.Hour); err != nil {
		t.Fatal(err)
	}

	clientNodeID := roachpb.NodeID(2)
	clientTransport := storage.NewRaftTransport(storage.GossipAddressResolver(g), nil, nodeRPCContext)

	for i := 0; i < numMessages; i++ {
		req := &storage.RaftMessageRequest{
			GroupID: 1,
			Message: raftpb.Message{
				To:     uint64(nodeID),
				From:   uint64(clientNodeID),
				Commit: uint64(i),
			},
			ToReplica: roachpb.ReplicaDescriptor{
				NodeID:    nodeID,
				StoreID:   roachpb.StoreID(nodeID),
				ReplicaID: roachpb.ReplicaID(nodeID),
			},
			FromReplica: roachpb.ReplicaDescriptor{
				NodeID:    clientNodeID,
				StoreID:   roachpb.StoreID(clientNodeID),
				ReplicaID: roachpb.ReplicaID(clientNodeID),
			},
		}
		if err := clientTransport.Send(req); err != nil {
			t.Errorf("failed to send message %d: %s", i, err)
		}
	}

	for i := 0; i < numMessages; i++ {
		req := <-serverChannel.ch
		if req.Message.Commit != uint64(i) {
			t.Errorf("messages out of order: got %d while expecting %d", req.Message.Commit, i)
		}
	}
}
Beispiel #5
0
// NewServer creates a Server from a server.Context.
func NewServer(srvCtx Context, stopper *stop.Stopper) (*Server, error) {
	if _, err := net.ResolveTCPAddr("tcp", srvCtx.Addr); err != nil {
		return nil, errors.Errorf("unable to resolve RPC address %q: %v", srvCtx.Addr, err)
	}

	if srvCtx.Ctx == nil {
		srvCtx.Ctx = context.Background()
	}
	if srvCtx.Ctx.Done() != nil {
		panic("context with cancel or deadline")
	}
	if tracing.TracerFromCtx(srvCtx.Ctx) == nil {
		// TODO(radu): instead of modifying srvCtx.Ctx, we should have a separate
		// context.Context inside Server. We will need to rename server.Context
		// though.
		srvCtx.Ctx = tracing.WithTracer(srvCtx.Ctx, tracing.NewTracer())
	}

	if srvCtx.Insecure {
		log.Warning(srvCtx.Ctx, "running in insecure mode, this is strongly discouraged. See --insecure.")
	}
	// Try loading the TLS configs before anything else.
	if _, err := srvCtx.GetServerTLSConfig(); err != nil {
		return nil, err
	}
	if _, err := srvCtx.GetClientTLSConfig(); err != nil {
		return nil, err
	}

	s := &Server{
		mux:     http.NewServeMux(),
		clock:   hlc.NewClock(hlc.UnixNano),
		stopper: stopper,
	}
	// Add a dynamic log tag value for the node ID.
	//
	// We need to pass the server's Ctx as a base context for the various server
	// components, but we won't know the node ID until we Start(). At that point
	// it's too late to change the contexts in the components (various background
	// processes will have already started using the contexts).
	//
	// The dynamic value allows us to add the log tag to the context now and
	// update the value asynchronously. It's not significantly more expensive than
	// a regular tag since it's just doing an (atomic) load when a log/trace
	// message is constructed.
	s.nodeLogTagVal.Set(log.DynamicIntValueUnknown)
	srvCtx.Ctx = log.WithLogTag(srvCtx.Ctx, "n", &s.nodeLogTagVal)
	s.ctx = srvCtx

	s.clock.SetMaxOffset(srvCtx.MaxOffset)

	s.rpcContext = rpc.NewContext(srvCtx.Context, s.clock, s.stopper)
	s.rpcContext.HeartbeatCB = func() {
		if err := s.rpcContext.RemoteClocks.VerifyClockOffset(); err != nil {
			log.Fatal(s.Ctx(), err)
		}
	}
	s.grpc = rpc.NewServer(s.rpcContext)

	s.registry = metric.NewRegistry()
	s.gossip = gossip.New(
		s.Ctx(), s.rpcContext, s.grpc, s.ctx.GossipBootstrapResolvers, s.stopper, s.registry)
	s.storePool = storage.NewStorePool(
		s.gossip,
		s.clock,
		s.rpcContext,
		srvCtx.ReservationsEnabled,
		srvCtx.TimeUntilStoreDead,
		s.stopper,
	)

	// A custom RetryOptions is created which uses stopper.ShouldQuiesce() as
	// the Closer. This prevents infinite retry loops from occurring during
	// graceful server shutdown
	//
	// Such a loop loop occurs with the DistSender attempts a connection to the
	// local server during shutdown, and receives an internal server error (HTTP
	// Code 5xx). This is the correct error for a server to return when it is
	// shutting down, and is normally retryable in a cluster environment.
	// However, on a single-node setup (such as a test), retries will never
	// succeed because the only server has been shut down; thus, thus the
	// DistSender needs to know that it should not retry in this situation.
	retryOpts := base.DefaultRetryOptions()
	retryOpts.Closer = s.stopper.ShouldQuiesce()
	distSenderCfg := kv.DistSenderConfig{
		Ctx:             s.Ctx(),
		Clock:           s.clock,
		RPCContext:      s.rpcContext,
		RPCRetryOptions: &retryOpts,
	}
	s.distSender = kv.NewDistSender(&distSenderCfg, s.gossip)

	txnMetrics := kv.MakeTxnMetrics()
	s.registry.AddMetricStruct(txnMetrics)
	s.txnCoordSender = kv.NewTxnCoordSender(s.Ctx(), s.distSender, s.clock, srvCtx.Linearizable,
		s.stopper, txnMetrics)
	s.db = client.NewDB(s.txnCoordSender)

	s.raftTransport = storage.NewRaftTransport(storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext)

	s.kvDB = kv.NewDBServer(s.ctx.Context, s.txnCoordSender, s.stopper)
	roachpb.RegisterExternalServer(s.grpc, s.kvDB)

	// Set up Lease Manager
	var lmKnobs sql.LeaseManagerTestingKnobs
	if srvCtx.TestingKnobs.SQLLeaseManager != nil {
		lmKnobs = *srvCtx.TestingKnobs.SQLLeaseManager.(*sql.LeaseManagerTestingKnobs)
	}
	s.leaseMgr = sql.NewLeaseManager(0, *s.db, s.clock, lmKnobs, s.stopper)
	s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip)

	// Set up the DistSQL server
	distSQLCfg := distsql.ServerConfig{
		Context:    s.Ctx(),
		DB:         s.db,
		RPCContext: s.rpcContext,
	}
	s.distSQLServer = distsql.NewServer(distSQLCfg)
	distsql.RegisterDistSQLServer(s.grpc, s.distSQLServer)

	// Set up Executor
	execCfg := sql.ExecutorConfig{
		Context:      s.Ctx(),
		DB:           s.db,
		Gossip:       s.gossip,
		LeaseManager: s.leaseMgr,
		Clock:        s.clock,
		DistSQLSrv:   s.distSQLServer,
	}
	if srvCtx.TestingKnobs.SQLExecutor != nil {
		execCfg.TestingKnobs = srvCtx.TestingKnobs.SQLExecutor.(*sql.ExecutorTestingKnobs)
	} else {
		execCfg.TestingKnobs = &sql.ExecutorTestingKnobs{}
	}

	s.sqlExecutor = sql.NewExecutor(execCfg, s.stopper)
	s.registry.AddMetricStruct(s.sqlExecutor)

	s.pgServer = pgwire.MakeServer(s.ctx.Context, s.sqlExecutor)
	s.registry.AddMetricStruct(s.pgServer.Metrics())

	// TODO(bdarnell): make StoreConfig configurable.
	nCtx := storage.StoreContext{
		Ctx:                            s.Ctx(),
		Clock:                          s.clock,
		DB:                             s.db,
		Gossip:                         s.gossip,
		Transport:                      s.raftTransport,
		RaftTickInterval:               s.ctx.RaftTickInterval,
		ScanInterval:                   s.ctx.ScanInterval,
		ScanMaxIdleTime:                s.ctx.ScanMaxIdleTime,
		ConsistencyCheckInterval:       s.ctx.ConsistencyCheckInterval,
		ConsistencyCheckPanicOnFailure: s.ctx.ConsistencyCheckPanicOnFailure,
		StorePool:                      s.storePool,
		SQLExecutor: sql.InternalExecutor{
			LeaseManager: s.leaseMgr,
		},
		LogRangeEvents: true,
		AllocatorOptions: storage.AllocatorOptions{
			AllowRebalance: true,
		},
	}
	if srvCtx.TestingKnobs.Store != nil {
		nCtx.TestingKnobs = *srvCtx.TestingKnobs.Store.(*storage.StoreTestingKnobs)
	}

	s.recorder = status.NewMetricsRecorder(s.clock)
	s.registry.AddMetricStruct(s.rpcContext.RemoteClocks.Metrics())

	s.runtime = status.MakeRuntimeStatSampler(s.clock)
	s.registry.AddMetricStruct(s.runtime)

	s.node = NewNode(nCtx, s.recorder, s.registry, s.stopper, txnMetrics, sql.MakeEventLogger(s.leaseMgr))
	roachpb.RegisterInternalServer(s.grpc, s.node)
	storage.RegisterStoresServer(s.grpc, s.node.storesServer)

	s.tsDB = ts.NewDB(s.db)
	s.tsServer = ts.MakeServer(s.tsDB)

	s.admin = makeAdminServer(s)
	s.status = newStatusServer(s.db, s.gossip, s.recorder, s.ctx.Context, s.rpcContext, s.node.stores)
	for _, gw := range []grpcGatewayServer{&s.admin, s.status, &s.tsServer} {
		gw.RegisterService(s.grpc)
	}

	return s, nil
}
func TestSendAndReceive(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper := stop.NewStopper()
	defer stopper.Stop()
	nodeRPCContext := rpc.NewContext(testutils.NewNodeTestBaseContext(), hlc.NewClock(hlc.UnixNano), stopper)
	g := gossip.New(nodeRPCContext, gossip.TestBootstrap, stopper)
	g.SetNodeID(roachpb.NodeID(1))

	// Create several servers, each of which has two stores (A raft
	// node ID addresses a store). Node 1 has stores 1 and 2, node 2 has
	// stores 3 and 4, etc.
	//
	// We suppose that range 1 is replicated across the odd-numbered
	// stores in reverse order to ensure that the various IDs are not
	// equal: replica 1 is store 5, replica 2 is store 3, and replica 3
	// is store 1.
	const numServers = 3
	const storesPerServer = 2
	const numStores = numServers * storesPerServer
	nextNodeID := roachpb.NodeID(2)
	nextStoreID := roachpb.StoreID(2)

	// Per-node state.
	transports := map[roachpb.NodeID]*storage.RaftTransport{}

	// Per-store state.
	storeNodes := map[roachpb.StoreID]roachpb.NodeID{}
	channels := map[roachpb.StoreID]channelServer{}
	replicaIDs := map[roachpb.StoreID]roachpb.ReplicaID{
		1: 3,
		3: 2,
		5: 1,
	}

	for serverIndex := 0; serverIndex < numServers; serverIndex++ {
		nodeID := nextNodeID
		nextNodeID++
		rpcServer := rpc.NewServer(nodeRPCContext)
		grpcServer := grpc.NewServer()
		tlsConfig, err := nodeRPCContext.GetServerTLSConfig()
		if err != nil {
			t.Fatal(err)
		}
		ln, err := util.ListenAndServe(stopper, grpcutil.GRPCHandlerFunc(grpcServer, rpcServer), util.CreateTestAddr("tcp"), tlsConfig)
		if err != nil {
			t.Fatal(err)
		}

		addr := ln.Addr()
		// Have to call g.SetNodeID before call g.AddInfo.
		g.ResetNodeID(roachpb.NodeID(nodeID))
		if err := g.AddInfoProto(gossip.MakeNodeIDKey(nodeID),
			&roachpb.NodeDescriptor{
				Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()),
			},
			time.Hour); err != nil {
			t.Fatal(err)
		}

		transport := storage.NewRaftTransport(storage.GossipAddressResolver(g), grpcServer, nodeRPCContext)
		transports[nodeID] = transport

		for store := 0; store < storesPerServer; store++ {
			storeID := nextStoreID
			nextStoreID++

			storeNodes[storeID] = nodeID

			channel := newChannelServer(10, 0)
			transport.Listen(storeID, channel.RaftMessage)
			channels[storeID] = channel
		}
	}

	// Heartbeat messages: Each store sends one message to each store.
	for fromStoreID, fromNodeID := range storeNodes {
		for toStoreID, toNodeID := range storeNodes {
			req := &storage.RaftMessageRequest{
				GroupID: 0,
				Message: raftpb.Message{
					Type: raftpb.MsgHeartbeat,
					From: uint64(fromStoreID),
					To:   uint64(toStoreID),
				},
				FromReplica: roachpb.ReplicaDescriptor{
					NodeID:    fromNodeID,
					StoreID:   fromStoreID,
					ReplicaID: 0,
				},
				ToReplica: roachpb.ReplicaDescriptor{
					NodeID:    toNodeID,
					StoreID:   toStoreID,
					ReplicaID: 0,
				},
			}

			if err := transports[fromNodeID].Send(req); err != nil {
				t.Errorf("Unable to send message from %d to %d: %s", fromNodeID, toNodeID, err)
			}
		}
	}

	// Read all the messages from the channels. Note that the transport
	// does not guarantee in-order delivery between independent
	// transports, so we just verify that the right number of messages
	// end up in each channel.
	for toStoreID := range storeNodes {
		for range storeNodes {
			select {
			case req := <-channels[toStoreID].ch:
				if req.Message.To != uint64(toStoreID) {
					t.Errorf("invalid message received on channel %d: %+v",
						toStoreID, req)
				}
			case <-time.After(5 * time.Second):
				t.Fatal("timed out waiting for message")
			}
		}

		select {
		case req := <-channels[toStoreID].ch:
			t.Errorf("got unexpected message %+v on channel %d", req, toStoreID)
		default:
		}
	}

	// Real raft messages have different node/store/replica IDs.
	// Send a message from replica 2 (on store 3, node 2) to replica 1 (on store 5, node 3)
	fromStoreID := roachpb.StoreID(3)
	toStoreID := roachpb.StoreID(5)
	req := &storage.RaftMessageRequest{
		GroupID: 1,
		Message: raftpb.Message{
			Type: raftpb.MsgApp,
			From: uint64(replicaIDs[fromStoreID]),
			To:   uint64(replicaIDs[toStoreID]),
		},
		FromReplica: roachpb.ReplicaDescriptor{
			NodeID:    storeNodes[fromStoreID],
			StoreID:   fromStoreID,
			ReplicaID: replicaIDs[fromStoreID],
		},
		ToReplica: roachpb.ReplicaDescriptor{
			NodeID:    storeNodes[toStoreID],
			StoreID:   toStoreID,
			ReplicaID: replicaIDs[toStoreID],
		},
	}
	if err := transports[storeNodes[fromStoreID]].Send(req); err != nil {
		t.Errorf("Unable to send message from %d to %d: %s", fromStoreID, toStoreID, err)
	}
	select {
	case req2 := <-channels[toStoreID].ch:
		if !reflect.DeepEqual(req, req2) {
			t.Errorf("got unexpected message %+v", req2)
		}

	case <-time.After(5 * time.Second):
		t.Fatal("timed out waiting for message")
	}

	select {
	case req := <-channels[toStoreID].ch:
		t.Errorf("got unexpected message %+v on channel %d", req, toStoreID)
	default:
	}
}
Beispiel #7
0
// NewServer creates a Server from a server.Context.
func NewServer(ctx Context, stopper *stop.Stopper) (*Server, error) {
	if _, err := net.ResolveTCPAddr("tcp", ctx.Addr); err != nil {
		return nil, errors.Errorf("unable to resolve RPC address %q: %v", ctx.Addr, err)
	}

	if ctx.Insecure {
		log.Warning(context.TODO(), "running in insecure mode, this is strongly discouraged. See --insecure.")
	}
	// Try loading the TLS configs before anything else.
	if _, err := ctx.GetServerTLSConfig(); err != nil {
		return nil, err
	}
	if _, err := ctx.GetClientTLSConfig(); err != nil {
		return nil, err
	}

	s := &Server{
		Tracer:  tracing.NewTracer(),
		ctx:     ctx,
		mux:     http.NewServeMux(),
		clock:   hlc.NewClock(hlc.UnixNano),
		stopper: stopper,
	}
	s.clock.SetMaxOffset(ctx.MaxOffset)

	s.rpcContext = rpc.NewContext(ctx.Context, s.clock, s.stopper)
	s.rpcContext.HeartbeatCB = func() {
		if err := s.rpcContext.RemoteClocks.VerifyClockOffset(); err != nil {
			log.Fatal(context.TODO(), err)
		}
	}
	s.grpc = rpc.NewServer(s.rpcContext)

	s.registry = metric.NewRegistry()
	s.gossip = gossip.New(s.rpcContext, s.grpc, s.ctx.GossipBootstrapResolvers, s.stopper, s.registry)
	s.storePool = storage.NewStorePool(
		s.gossip,
		s.clock,
		s.rpcContext,
		ctx.ReservationsEnabled,
		ctx.TimeUntilStoreDead,
		s.stopper,
	)

	// A custom RetryOptions is created which uses stopper.ShouldQuiesce() as
	// the Closer. This prevents infinite retry loops from occurring during
	// graceful server shutdown
	//
	// Such a loop loop occurs with the DistSender attempts a connection to the
	// local server during shutdown, and receives an internal server error (HTTP
	// Code 5xx). This is the correct error for a server to return when it is
	// shutting down, and is normally retryable in a cluster environment.
	// However, on a single-node setup (such as a test), retries will never
	// succeed because the only server has been shut down; thus, thus the
	// DistSender needs to know that it should not retry in this situation.
	retryOpts := base.DefaultRetryOptions()
	retryOpts.Closer = s.stopper.ShouldQuiesce()
	s.distSender = kv.NewDistSender(&kv.DistSenderContext{
		Clock:           s.clock,
		RPCContext:      s.rpcContext,
		RPCRetryOptions: &retryOpts,
	}, s.gossip)
	txnMetrics := kv.NewTxnMetrics(s.registry)
	sender := kv.NewTxnCoordSender(s.distSender, s.clock, ctx.Linearizable, s.Tracer,
		s.stopper, txnMetrics)
	s.db = client.NewDB(sender)

	s.raftTransport = storage.NewRaftTransport(storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext)

	s.kvDB = kv.NewDBServer(s.ctx.Context, sender, s.stopper)
	roachpb.RegisterExternalServer(s.grpc, s.kvDB)

	// Set up Lease Manager
	var lmKnobs sql.LeaseManagerTestingKnobs
	if ctx.TestingKnobs.SQLLeaseManager != nil {
		lmKnobs = *ctx.TestingKnobs.SQLLeaseManager.(*sql.LeaseManagerTestingKnobs)
	}
	s.leaseMgr = sql.NewLeaseManager(0, *s.db, s.clock, lmKnobs, s.stopper)
	s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip)

	// Set up the DistSQL server
	distSQLCtx := distsql.ServerContext{
		Context:    context.Background(),
		DB:         s.db,
		RPCContext: s.rpcContext,
	}
	s.distSQLServer = distsql.NewServer(distSQLCtx)
	distsql.RegisterDistSQLServer(s.grpc, s.distSQLServer)

	// Set up Executor
	eCtx := sql.ExecutorContext{
		Context:      context.Background(),
		DB:           s.db,
		Gossip:       s.gossip,
		LeaseManager: s.leaseMgr,
		Clock:        s.clock,
		DistSQLSrv:   s.distSQLServer,
	}
	if ctx.TestingKnobs.SQLExecutor != nil {
		eCtx.TestingKnobs = ctx.TestingKnobs.SQLExecutor.(*sql.ExecutorTestingKnobs)
	} else {
		eCtx.TestingKnobs = &sql.ExecutorTestingKnobs{}
	}

	s.sqlExecutor = sql.NewExecutor(eCtx, s.stopper, s.registry)

	s.pgServer = pgwire.MakeServer(s.ctx.Context, s.sqlExecutor, s.registry)

	// TODO(bdarnell): make StoreConfig configurable.
	nCtx := storage.StoreContext{
		Clock:                          s.clock,
		DB:                             s.db,
		Gossip:                         s.gossip,
		Transport:                      s.raftTransport,
		RaftTickInterval:               s.ctx.RaftTickInterval,
		ScanInterval:                   s.ctx.ScanInterval,
		ScanMaxIdleTime:                s.ctx.ScanMaxIdleTime,
		ConsistencyCheckInterval:       s.ctx.ConsistencyCheckInterval,
		ConsistencyCheckPanicOnFailure: s.ctx.ConsistencyCheckPanicOnFailure,
		Tracer:    s.Tracer,
		StorePool: s.storePool,
		SQLExecutor: sql.InternalExecutor{
			LeaseManager: s.leaseMgr,
		},
		LogRangeEvents: true,
		AllocatorOptions: storage.AllocatorOptions{
			AllowRebalance: true,
		},
	}
	if ctx.TestingKnobs.Store != nil {
		nCtx.TestingKnobs = *ctx.TestingKnobs.Store.(*storage.StoreTestingKnobs)
	}

	s.recorder = status.NewMetricsRecorder(s.clock)
	s.rpcContext.RemoteClocks.RegisterMetrics(s.registry)
	s.runtime = status.MakeRuntimeStatSampler(s.clock, s.registry)

	s.node = NewNode(nCtx, s.recorder, s.registry, s.stopper, txnMetrics, sql.MakeEventLogger(s.leaseMgr))
	roachpb.RegisterInternalServer(s.grpc, s.node)
	roachpb.RegisterInternalStoresServer(s.grpc, s.node.InternalStoresServer)

	s.tsDB = ts.NewDB(s.db)
	s.tsServer = ts.MakeServer(s.tsDB)

	s.admin = makeAdminServer(s)
	s.status = newStatusServer(s.db, s.gossip, s.recorder, s.ctx.Context, s.rpcContext, s.node.stores)
	for _, gw := range []grpcGatewayServer{&s.admin, s.status, &s.tsServer} {
		gw.RegisterService(s.grpc)
	}

	return s, nil
}