// TestTxnCoordSenderErrorWithIntent validates that if a transactional request // returns an error but also indicates a Writing transaction, the coordinator // tracks it just like a successful request. func TestTxnCoordSenderErrorWithIntent(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() manual := hlc.NewManualClock(0) clock := hlc.NewClock(manual.UnixNano) clock.SetMaxOffset(20) ts := NewTxnCoordSender(senderFn(func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { txn := ba.Txn.Clone() txn.Writing = true pErr := roachpb.NewError(roachpb.NewTransactionRetryError()) pErr.SetTxn(&txn) return nil, pErr }), clock, false, tracing.NewTracer(), stopper) defer stopper.Stop() var ba roachpb.BatchRequest key := roachpb.Key("test") ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.EndTransactionRequest{}) ba.Txn = &roachpb.Transaction{Name: "test"} if _, pErr := ts.Send(context.Background(), ba); !testutils.IsPError(pErr, "retry txn") { t.Fatalf("unexpected error: %v", pErr) } defer teardownHeartbeats(ts) ts.Lock() defer ts.Unlock() if len(ts.txns) != 1 { t.Fatalf("expected transaction to be tracked") } }
// TestMultiRangeScanWithMaxResults tests that commands which access multiple // ranges with MaxResults parameter are carried out properly. func TestMultiRangeScanWithMaxResults(t *testing.T) { defer leaktest.AfterTest(t)() testCases := []struct { splitKeys []roachpb.Key keys []roachpb.Key }{ {[]roachpb.Key{roachpb.Key("m")}, []roachpb.Key{roachpb.Key("a"), roachpb.Key("z")}}, {[]roachpb.Key{roachpb.Key("h"), roachpb.Key("q")}, []roachpb.Key{roachpb.Key("b"), roachpb.Key("f"), roachpb.Key("k"), roachpb.Key("r"), roachpb.Key("w"), roachpb.Key("y")}}, } for i, tc := range testCases { s, _, _ := serverutils.StartServer(t, base.TestServerArgs{}) defer s.Stopper().Stop() ts := s.(*TestServer) retryOpts := base.DefaultRetryOptions() retryOpts.Closer = ts.stopper.ShouldDrain() ds := kv.NewDistSender(&kv.DistSenderContext{ Clock: s.Clock(), RPCContext: s.RPCContext(), RPCRetryOptions: &retryOpts, }, ts.Gossip()) tds := kv.NewTxnCoordSender(ds, ts.Clock(), ts.Ctx.Linearizable, tracing.NewTracer(), ts.stopper, kv.NewTxnMetrics(metric.NewRegistry())) for _, sk := range tc.splitKeys { if err := ts.node.ctx.DB.AdminSplit(sk); err != nil { t.Fatal(err) } } for _, k := range tc.keys { put := roachpb.NewPut(k, roachpb.MakeValueFromBytes(k)) if _, err := client.SendWrapped(tds, nil, put); err != nil { t.Fatal(err) } } // Try every possible ScanRequest startKey. for start := 0; start < len(tc.keys); start++ { // Try every possible maxResults, from 1 to beyond the size of key array. for maxResults := 1; maxResults <= len(tc.keys)-start+1; maxResults++ { scan := roachpb.NewScan(tc.keys[start], tc.keys[len(tc.keys)-1].Next(), int64(maxResults)) reply, err := client.SendWrapped(tds, nil, scan) if err != nil { t.Fatal(err) } rows := reply.(*roachpb.ScanResponse).Rows if start+maxResults <= len(tc.keys) && len(rows) != maxResults { t.Errorf("%d: start=%s: expected %d rows, but got %d", i, tc.keys[start], maxResults, len(rows)) } else if start+maxResults == len(tc.keys)+1 && len(rows) != maxResults-1 { t.Errorf("%d: expected %d rows, but got %d", i, maxResults-1, len(rows)) } } } } }
// TestSendToOneClient verifies that Send correctly sends a request // to one server using the heartbeat RPC. func TestSendToOneClient(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() ctx := newNodeTestContext(nil, stopper) s, ln := newTestServer(t, ctx) registerBatch(t, s, 0) sp := tracing.NewTracer().StartSpan("node test") defer sp.Finish() opts := SendOptions{ Ordering: orderStable, SendNextTimeout: 1 * time.Second, Timeout: 10 * time.Second, Trace: sp, } reply, err := sendBatch(opts, []net.Addr{ln.Addr()}, ctx) if err != nil { t.Fatal(err) } if reply == nil { t.Errorf("expected reply") } }
// TestTxnCoordSenderSingleRoundtripTxn checks that a batch which completely // holds the writing portion of a Txn (including EndTransaction) does not // launch a heartbeat goroutine at all. func TestTxnCoordSenderSingleRoundtripTxn(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() manual := hlc.NewManualClock(0) clock := hlc.NewClock(manual.UnixNano) clock.SetMaxOffset(20) ts := NewTxnCoordSender(senderFn(func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { br := ba.CreateReply() txnClone := ba.Txn.Clone() br.Txn = &txnClone br.Txn.Writing = true return br, nil }), clock, false, tracing.NewTracer(), stopper, NewTxnMetrics(metric.NewRegistry())) // Stop the stopper manually, prior to trying the transaction. This has the // effect of returning a NodeUnavailableError for any attempts at launching // a heartbeat goroutine. stopper.Stop() var ba roachpb.BatchRequest key := roachpb.Key("test") ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.EndTransactionRequest{}) ba.Txn = &roachpb.Transaction{Name: "test"} _, pErr := ts.Send(context.Background(), ba) if pErr != nil { t.Fatal(pErr) } }
// NewDistSender returns a batch.Sender instance which connects to the // Cockroach cluster via the supplied gossip instance. Supplying a // DistSenderContext or the fields within is optional. For omitted values, sane // defaults will be used. func NewDistSender(ctx *DistSenderContext, gossip *gossip.Gossip) *DistSender { if ctx == nil { ctx = &DistSenderContext{} } clock := ctx.Clock if clock == nil { clock = hlc.NewClock(hlc.UnixNano) } ds := &DistSender{ clock: clock, gossip: gossip, } if ctx.nodeDescriptor != nil { atomic.StorePointer(&ds.nodeDescriptor, unsafe.Pointer(ctx.nodeDescriptor)) } rcSize := ctx.RangeDescriptorCacheSize if rcSize <= 0 { rcSize = defaultRangeDescriptorCacheSize } rdb := ctx.RangeDescriptorDB if rdb == nil { rdb = ds } ds.rangeCache = newRangeDescriptorCache(rdb, int(rcSize)) lcSize := ctx.LeaderCacheSize if lcSize <= 0 { lcSize = defaultLeaderCacheSize } ds.leaderCache = newLeaderCache(int(lcSize)) if ctx.RangeLookupMaxRanges <= 0 { ds.rangeLookupMaxRanges = defaultRangeLookupMaxRanges } if ctx.TransportFactory != nil { ds.transportFactory = ctx.TransportFactory } ds.rpcRetryOptions = base.DefaultRetryOptions() if ctx.RPCRetryOptions != nil { ds.rpcRetryOptions = *ctx.RPCRetryOptions } if ctx.RPCContext != nil { ds.rpcContext = ctx.RPCContext if ds.rpcRetryOptions.Closer == nil { ds.rpcRetryOptions.Closer = ds.rpcContext.Stopper.ShouldDrain() } } if ctx.Tracer != nil { ds.Tracer = ctx.Tracer } else { ds.Tracer = tracing.NewTracer() } if ctx.SendNextTimeout != 0 { ds.sendNextTimeout = ctx.SendNextTimeout } else { ds.sendNextTimeout = defaultSendNextTimeout } return ds }
// TestRetryableError verifies that Send returns a retryable error // when it hits an RPC error. func TestRetryableError(t *testing.T) { defer leaktest.AfterTest(t)() clientStopper := stop.NewStopper() defer clientStopper.Stop() clientContext := newNodeTestContext(nil, clientStopper) serverStopper := stop.NewStopper() serverContext := newNodeTestContext(nil, serverStopper) s, ln := newTestServer(t, serverContext) roachpb.RegisterInternalServer(s, Node(0)) conn, err := clientContext.GRPCDial(ln.Addr().String()) if err != nil { t.Fatal(err) } ctx := context.Background() waitForConnState := func(desiredState grpc.ConnectivityState) { clientState, err := conn.State() for clientState != desiredState { if err != nil { t.Fatal(err) } if clientState == grpc.Shutdown { t.Fatalf("%v has unexpectedly shut down", conn) } clientState, err = conn.WaitForStateChange(ctx, clientState) } } // Wait until the client becomes healthy and shut down the server. waitForConnState(grpc.Ready) serverStopper.Stop() // Wait until the client becomes unhealthy. waitForConnState(grpc.TransientFailure) sp := tracing.NewTracer().StartSpan("node test") defer sp.Finish() opts := SendOptions{ Ordering: orderStable, SendNextTimeout: 100 * time.Millisecond, Timeout: 100 * time.Millisecond, Trace: sp, } if _, err := sendBatch(opts, []net.Addr{ln.Addr()}, clientContext); err != nil { retryErr, ok := err.(retry.Retryable) if !ok { t.Fatalf("Unexpected error type: %v", err) } if !retryErr.CanRetry() { t.Errorf("Expected retryable error: %v", retryErr) } } else { t.Fatalf("Unexpected success") } }
// setupMetricsTest returns a TxnCoordSender and ManualClock pointing to a newly created // LocalTestCluster. Also returns a cleanup function to be executed at the end of the // test. func setupMetricsTest(t *testing.T) (*hlc.ManualClock, *TxnCoordSender, func()) { s, testSender := createTestDB(t) reg := metric.NewRegistry() txnMetrics := NewTxnMetrics(reg) sender := NewTxnCoordSender(testSender.wrapped, s.Clock, false, tracing.NewTracer(), s.Stopper, txnMetrics) return s.Manual, sender, func() { teardownHeartbeats(sender) s.Stop() } }
// setupMetricsTest returns a TxnCoordSender and ManualClock pointing to a newly created // LocalTestCluster. Also returns a cleanup function to be executed at the end of the // test. func setupMetricsTest(t *testing.T) (*hlc.ManualClock, *TxnCoordSender, func()) { s, testSender := createTestDB(t) txnMetrics := MakeTxnMetrics() ctx := tracing.WithTracer(context.Background(), tracing.NewTracer()) sender := NewTxnCoordSender(ctx, testSender.wrapped, s.Clock, false, s.Stopper, txnMetrics) return s.Manual, sender, func() { teardownHeartbeats(sender) s.Stop() } }
// createTestNode creates an rpc server using the specified address, // gossip instance, KV database and a node using the specified slice // of engines. The server, clock and node are returned. If gossipBS is // not nil, the gossip bootstrap address is set to gossipBS. func createTestNode(addr net.Addr, engines []engine.Engine, gossipBS net.Addr, t *testing.T) ( *grpc.Server, net.Addr, *hlc.Clock, *Node, *stop.Stopper) { ctx := storage.StoreContext{} stopper := stop.NewStopper() ctx.Clock = hlc.NewClock(hlc.UnixNano) nodeRPCContext := rpc.NewContext(nodeTestBaseContext, ctx.Clock, stopper) ctx.ScanInterval = 10 * time.Hour ctx.ConsistencyCheckInterval = 10 * time.Hour grpcServer := rpc.NewServer(nodeRPCContext) serverCtx := makeTestContext() g := gossip.New( context.Background(), nodeRPCContext, grpcServer, serverCtx.GossipBootstrapResolvers, stopper, metric.NewRegistry()) ln, err := netutil.ListenAndServeGRPC(stopper, grpcServer, addr) if err != nil { t.Fatal(err) } if gossipBS != nil { // Handle possibility of a :0 port specification. if gossipBS.Network() == addr.Network() && gossipBS.String() == addr.String() { gossipBS = ln.Addr() } r, err := resolver.NewResolverFromAddress(gossipBS) if err != nil { t.Fatalf("bad gossip address %s: %s", gossipBS, err) } g.SetResolvers([]resolver.Resolver{r}) g.Start(ln.Addr()) } ctx.Gossip = g retryOpts := base.DefaultRetryOptions() retryOpts.Closer = stopper.ShouldQuiesce() distSender := kv.NewDistSender(&kv.DistSenderConfig{ Clock: ctx.Clock, RPCContext: nodeRPCContext, RPCRetryOptions: &retryOpts, }, g) ctx.Ctx = tracing.WithTracer(context.Background(), tracing.NewTracer()) sender := kv.NewTxnCoordSender(ctx.Ctx, distSender, ctx.Clock, false, stopper, kv.MakeTxnMetrics()) ctx.DB = client.NewDB(sender) ctx.Transport = storage.NewDummyRaftTransport() node := NewNode(ctx, status.NewMetricsRecorder(ctx.Clock), metric.NewRegistry(), stopper, kv.MakeTxnMetrics(), sql.MakeEventLogger(nil)) roachpb.RegisterInternalServer(grpcServer, node) return grpcServer, ln.Addr(), ctx.Clock, node, stopper }
// TestTxnCoordSenderErrorWithIntent validates that if a transactional request // returns an error but also indicates a Writing transaction, the coordinator // tracks it just like a successful request. func TestTxnCoordSenderErrorWithIntent(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() manual := hlc.NewManualClock(0) clock := hlc.NewClock(manual.UnixNano) clock.SetMaxOffset(20) testCases := []struct { roachpb.Error errMsg string }{ {*roachpb.NewError(roachpb.NewTransactionRetryError()), "retry txn"}, {*roachpb.NewError(roachpb.NewTransactionPushError(roachpb.Transaction{ TxnMeta: enginepb.TxnMeta{ ID: uuid.NewV4(), }})), "failed to push"}, {*roachpb.NewErrorf("testError"), "testError"}, } for i, test := range testCases { func() { senderFunc := func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { txn := ba.Txn.Clone() txn.Writing = true pErr := &roachpb.Error{} *pErr = test.Error pErr.SetTxn(&txn) return nil, pErr } ctx := tracing.WithTracer(context.Background(), tracing.NewTracer()) ts := NewTxnCoordSender(ctx, senderFn(senderFunc), clock, false, stopper, MakeTxnMetrics()) var ba roachpb.BatchRequest key := roachpb.Key("test") ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.EndTransactionRequest{}) ba.Txn = &roachpb.Transaction{Name: "test"} _, pErr := ts.Send(context.Background(), ba) if !testutils.IsPError(pErr, test.errMsg) { t.Errorf("%d: error did not match %s: %v", i, test.errMsg, pErr) } defer teardownHeartbeats(ts) ts.Lock() defer ts.Unlock() if len(ts.txns) != 1 { t.Errorf("%d: expected transaction to be tracked", i) } }() } }
// setupMetricsTest returns a TxnCoordSender and ManualClock pointing to a newly created // LocalTestCluster. Also returns a cleanup function to be executed at the end of the // test. func setupMetricsTest(t *testing.T) (*hlc.ManualClock, *TxnCoordSender, func()) { s := createTestDB(t) reg := metric.NewRegistry() txnMetrics := NewTxnMetrics(reg) manual := hlc.NewManualClock(0) clock := hlc.NewClock(manual.UnixNano) sender := NewTxnCoordSender(s.distSender, clock, false, tracing.NewTracer(), s.Stopper, txnMetrics) return manual, sender, func() { teardownHeartbeats(sender) s.Stop() } }
// NewDistSender returns a batch.Sender instance which connects to the // Cockroach cluster via the supplied gossip instance. Supplying a // DistSenderContext or the fields within is optional. For omitted values, sane // defaults will be used. func NewDistSender(ctx *DistSenderContext, gossip *gossip.Gossip) *DistSender { if ctx == nil { ctx = &DistSenderContext{} } clock := ctx.Clock if clock == nil { clock = hlc.NewClock(hlc.UnixNano) } ds := &DistSender{ clock: clock, gossip: gossip, } if ctx.nodeDescriptor != nil { atomic.StorePointer(&ds.nodeDescriptor, unsafe.Pointer(ctx.nodeDescriptor)) } rcSize := ctx.RangeDescriptorCacheSize if rcSize <= 0 { rcSize = defaultRangeDescriptorCacheSize } rdb := ctx.RangeDescriptorDB if rdb == nil { rdb = ds } ds.rangeCache = newRangeDescriptorCache(rdb, int(rcSize)) lcSize := ctx.LeaderCacheSize if lcSize <= 0 { lcSize = defaultLeaderCacheSize } ds.leaderCache = newLeaderCache(int(lcSize)) if ctx.RangeLookupMaxRanges <= 0 { ds.rangeLookupMaxRanges = defaultRangeLookupMaxRanges } ds.rpcSend = send if ctx.RPCSend != nil { ds.rpcSend = ctx.RPCSend } if ctx.RPCContext != nil { ds.rpcContext = ctx.RPCContext } ds.rpcRetryOptions = defaultRPCRetryOptions if ctx.RPCRetryOptions != nil { ds.rpcRetryOptions = *ctx.RPCRetryOptions } if ctx.Tracer != nil { ds.Tracer = ctx.Tracer } else { ds.Tracer = tracing.NewTracer() } return ds }
// TestRetryableError verifies that Send returns a retryable error // when it hits an RPC error. func TestRetryableError(t *testing.T) { defer leaktest.AfterTest(t)() clientStopper := stop.NewStopper() defer clientStopper.Stop() clientContext := newNodeTestContext(nil, clientStopper) clientContext.HeartbeatTimeout = 10 * clientContext.HeartbeatInterval serverStopper := stop.NewStopper() serverContext := newNodeTestContext(nil, serverStopper) s, ln := newTestServer(t, serverContext) registerBatch(t, s, 0) c := rpc.NewClient(ln.Addr(), clientContext) // Wait until the client becomes healthy and shut down the server. <-c.Healthy() serverStopper.Stop() // Wait until the client becomes unhealthy. func() { for r := retry.Start(retry.Options{}); r.Next(); { select { case <-c.Healthy(): case <-time.After(1 * time.Nanosecond): return } } }() sp := tracing.NewTracer().StartSpan("node test") defer sp.Finish() opts := SendOptions{ Ordering: orderStable, SendNextTimeout: 100 * time.Millisecond, Timeout: 100 * time.Millisecond, Trace: sp, } if _, err := sendBatch(opts, []net.Addr{ln.Addr()}, clientContext); err != nil { retryErr, ok := err.(retry.Retryable) if !ok { t.Fatalf("Unexpected error type: %v", err) } if !retryErr.CanRetry() { t.Errorf("Expected retryable error: %v", retryErr) } } else { t.Fatalf("Unexpected success") } }
// createTestNode creates an rpc server using the specified address, // gossip instance, KV database and a node using the specified slice // of engines. The server, clock and node are returned. If gossipBS is // not nil, the gossip bootstrap address is set to gossipBS. func createTestNode(addr net.Addr, engines []engine.Engine, gossipBS net.Addr, t *testing.T) ( *rpc.Server, net.Addr, *hlc.Clock, *Node, *stop.Stopper) { ctx := storage.StoreContext{} stopper := stop.NewStopper() ctx.Clock = hlc.NewClock(hlc.UnixNano) nodeRPCContext := rpc.NewContext(nodeTestBaseContext, ctx.Clock, stopper) ctx.ScanInterval = 10 * time.Hour rpcServer := rpc.NewServer(nodeRPCContext) grpcServer := grpc.NewServer() tlsConfig, err := nodeRPCContext.GetServerTLSConfig() if err != nil { t.Fatal(err) } ln, err := util.ListenAndServe(stopper, grpcutil.GRPCHandlerFunc(grpcServer, rpcServer), addr, tlsConfig) if err != nil { t.Fatal(err) } g := gossip.New(nodeRPCContext, testContext.GossipBootstrapResolvers, stopper) if gossipBS != nil { // Handle possibility of a :0 port specification. if gossipBS.Network() == addr.Network() && gossipBS.String() == addr.String() { gossipBS = ln.Addr() } r, err := resolver.NewResolverFromAddress(gossipBS) if err != nil { t.Fatalf("bad gossip address %s: %s", gossipBS, err) } g.SetResolvers([]resolver.Resolver{r}) g.Start(grpcServer, ln.Addr()) } ctx.Gossip = g retryOpts := kv.GetDefaultDistSenderRetryOptions() retryOpts.Closer = stopper.ShouldDrain() distSender := kv.NewDistSender(&kv.DistSenderContext{ Clock: ctx.Clock, RPCContext: nodeRPCContext, RPCRetryOptions: &retryOpts, }, g) tracer := tracing.NewTracer() sender := kv.NewTxnCoordSender(distSender, ctx.Clock, false, tracer, stopper) ctx.DB = client.NewDB(sender) // TODO(bdarnell): arrange to have the transport closed. // (or attach LocalRPCTransport.Close to the stopper) ctx.Transport = storage.NewLocalRPCTransport(stopper) ctx.EventFeed = util.NewFeed(stopper) ctx.Tracer = tracer node := NewNode(ctx, metric.NewRegistry(), stopper, nil) return rpcServer, ln.Addr(), ctx.Clock, node, stopper }
// Start starts the test cluster by bootstrapping an in-memory store // (defaults to maximum of 50M). The server is started, launching the // node RPC server and all HTTP endpoints. Use the value of // TestServer.Addr after Start() for client connections. Use Stop() // to shutdown the server after the test completes. func (ltc *LocalTestCluster) Start(t util.Tester, baseCtx *base.Context, initSender InitSenderFn) { nodeID := roachpb.NodeID(1) nodeDesc := &roachpb.NodeDescriptor{NodeID: nodeID} tracer := tracing.NewTracer() ltc.tester = t ltc.Manual = hlc.NewManualClock(0) ltc.Clock = hlc.NewClock(ltc.Manual.UnixNano) ltc.Stopper = stop.NewStopper() rpcContext := rpc.NewContext(baseCtx, ltc.Clock, ltc.Stopper) server := rpc.NewServer(rpcContext) // never started ltc.Gossip = gossip.New( context.Background(), rpcContext, server, nil, ltc.Stopper, metric.NewRegistry()) ltc.Eng = engine.NewInMem(roachpb.Attributes{}, 50<<20, ltc.Stopper) ltc.Stores = storage.NewStores(ltc.Clock) ltc.Sender = initSender(nodeDesc, tracer, ltc.Clock, ltc.Latency, ltc.Stores, ltc.Stopper, ltc.Gossip) if ltc.DBContext == nil { dbCtx := client.DefaultDBContext() ltc.DBContext = &dbCtx } ltc.DB = client.NewDBWithContext(ltc.Sender, *ltc.DBContext) transport := storage.NewDummyRaftTransport() ctx := storage.TestStoreContext() if ltc.RangeRetryOptions != nil { ctx.RangeRetryOptions = *ltc.RangeRetryOptions } ctx.Ctx = tracing.WithTracer(context.Background(), tracer) ctx.Clock = ltc.Clock ctx.DB = ltc.DB ctx.Gossip = ltc.Gossip ctx.Transport = transport ltc.Store = storage.NewStore(ctx, ltc.Eng, nodeDesc) if err := ltc.Store.Bootstrap(roachpb.StoreIdent{NodeID: nodeID, StoreID: 1}, ltc.Stopper); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } ltc.Stores.AddStore(ltc.Store) if err := ltc.Store.BootstrapRange(nil); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } if err := ltc.Store.Start(context.Background(), ltc.Stopper); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } ltc.Gossip.SetNodeID(nodeDesc.NodeID) if err := ltc.Gossip.SetNodeDescriptor(nodeDesc); err != nil { t.Fatalf("unable to set node descriptor: %s", err) } }
func TestInvalidAddrLength(t *testing.T) { defer leaktest.AfterTest(t)() // The provided replicas is nil, so its length will be always less than the // specified response number sp := tracing.NewTracer().StartSpan("node test") defer sp.Finish() opts := SendOptions{Trace: sp} ret, err := send(opts, nil, roachpb.BatchRequest{}, nil) // the expected return is nil and SendError if _, ok := err.(*roachpb.SendError); !ok || ret != nil { t.Fatalf("Shorter replicas should return nil and SendError.") } }
// NewTxnCoordSender creates a new TxnCoordSender for use from a KV // distributed DB instance. func NewTxnCoordSender(wrapped client.Sender, clock *hlc.Clock, linearizable bool, tracer opentracing.Tracer, stopper *stop.Stopper) *TxnCoordSender { if tracer == nil { tracer = tracing.NewTracer() } tc := &TxnCoordSender{ wrapped: wrapped, clock: clock, heartbeatInterval: storage.DefaultHeartbeatInterval, clientTimeout: defaultClientTimeout, txns: map[uuid.UUID]*txnMetadata{}, linearizable: linearizable, tracer: tracer, stopper: stopper, } tc.stopper.RunWorker(tc.startStats) return tc }
// TestUnretryableError verifies that Send returns an unretryable // error when it hits a critical error. func TestUnretryableError(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() nodeContext := newNodeTestContext(nil, stopper) _, ln := newTestServer(t, nodeContext) sp := tracing.NewTracer().StartSpan("node test") defer sp.Finish() opts := SendOptions{ Ordering: orderStable, SendNextTimeout: 1 * time.Second, Timeout: 10 * time.Second, Trace: sp, } sendOneFn = func(client *batchClient, timeout time.Duration, context *rpc.Context, trace opentracing.Span, done chan *netrpc.Call) { call := netrpc.Call{ Reply: &roachpb.BatchResponse{}, } call.Error = errors.New("unretryable") done <- &call } defer func() { sendOneFn = sendOne }() _, err := sendBatch(opts, []net.Addr{ln.Addr()}, nodeContext) if err == nil { t.Fatalf("Unexpected success") } retryErr, ok := err.(retry.Retryable) if !ok { t.Fatalf("Unexpected error type: %v", err) } if retryErr.CanRetry() { t.Errorf("Unexpected retryable error: %v", retryErr) } }
// Start starts the test cluster by bootstrapping an in-memory store // (defaults to maximum of 50M). The server is started, launching the // node RPC server and all HTTP endpoints. Use the value of // TestServer.Addr after Start() for client connections. Use Stop() // to shutdown the server after the test completes. func (ltc *LocalTestCluster) Start(t util.Tester, baseCtx *base.Context, initSender InitSenderFn) { nodeID := roachpb.NodeID(1) nodeDesc := &roachpb.NodeDescriptor{NodeID: nodeID} tracer := tracing.NewTracer() ltc.tester = t ltc.Manual = hlc.NewManualClock(0) ltc.Clock = hlc.NewClock(ltc.Manual.UnixNano) ltc.Stopper = stop.NewStopper() rpcContext := rpc.NewContext(baseCtx, ltc.Clock, ltc.Stopper) ltc.Gossip = gossip.New(rpcContext, nil, ltc.Stopper) ltc.Eng = engine.NewInMem(roachpb.Attributes{}, 50<<20, ltc.Stopper) ltc.Stores = storage.NewStores(ltc.Clock) ltc.Sender = initSender(nodeDesc, tracer, ltc.Clock, ltc.Latency, ltc.Stores, ltc.Stopper, ltc.Gossip) ltc.DB = client.NewDB(ltc.Sender) transport := storage.NewDummyRaftTransport() ctx := storage.TestStoreContext() ctx.Clock = ltc.Clock ctx.DB = ltc.DB ctx.Gossip = ltc.Gossip ctx.Transport = transport ctx.Tracer = tracer ltc.Store = storage.NewStore(ctx, ltc.Eng, nodeDesc) if err := ltc.Store.Bootstrap(roachpb.StoreIdent{NodeID: nodeID, StoreID: 1}, ltc.Stopper); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } ltc.Stores.AddStore(ltc.Store) if err := ltc.Store.BootstrapRange(nil); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } if err := ltc.Store.Start(ltc.Stopper); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } ltc.Gossip.SetNodeID(nodeDesc.NodeID) if err := ltc.Gossip.SetNodeDescriptor(nodeDesc); err != nil { t.Fatalf("unable to set node descriptor: %s", err) } }
// NewDistSender returns a batch.Sender instance which connects to the // Cockroach cluster via the supplied gossip instance. Supplying a // DistSenderContext or the fields within is optional. For omitted values, sane // defaults will be used. func NewDistSender(cfg *DistSenderConfig, g *gossip.Gossip) *DistSender { if cfg == nil { cfg = &DistSenderConfig{} } ds := &DistSender{gossip: g} ds.Ctx = cfg.Ctx if ds.Ctx == nil { ds.Ctx = context.Background() } if ds.Ctx.Done() != nil { panic("context with cancel or deadline") } if tracing.TracerFromCtx(ds.Ctx) == nil { ds.Ctx = tracing.WithTracer(ds.Ctx, tracing.NewTracer()) } ds.clock = cfg.Clock if ds.clock == nil { ds.clock = hlc.NewClock(hlc.UnixNano) } if cfg.nodeDescriptor != nil { atomic.StorePointer(&ds.nodeDescriptor, unsafe.Pointer(cfg.nodeDescriptor)) } rcSize := cfg.RangeDescriptorCacheSize if rcSize <= 0 { rcSize = defaultRangeDescriptorCacheSize } rdb := cfg.RangeDescriptorDB if rdb == nil { rdb = ds } ds.rangeCache = newRangeDescriptorCache(rdb, int(rcSize)) lcSize := cfg.LeaseHolderCacheSize if lcSize <= 0 { lcSize = defaultLeaseHolderCacheSize } ds.leaseHolderCache = newLeaseHolderCache(int(lcSize)) if cfg.RangeLookupMaxRanges <= 0 { ds.rangeLookupMaxRanges = defaultRangeLookupMaxRanges } if cfg.TransportFactory != nil { ds.transportFactory = cfg.TransportFactory } ds.rpcRetryOptions = base.DefaultRetryOptions() if cfg.RPCRetryOptions != nil { ds.rpcRetryOptions = *cfg.RPCRetryOptions } if cfg.RPCContext != nil { ds.rpcContext = cfg.RPCContext if ds.rpcRetryOptions.Closer == nil { ds.rpcRetryOptions.Closer = ds.rpcContext.Stopper.ShouldQuiesce() } } if cfg.SendNextTimeout != 0 { ds.sendNextTimeout = cfg.SendNextTimeout } else { ds.sendNextTimeout = defaultSendNextTimeout } if g != nil { g.RegisterCallback(gossip.KeyFirstRangeDescriptor, func(_ string, value roachpb.Value) { if log.V(1) { var desc roachpb.RangeDescriptor if err := value.GetProto(&desc); err != nil { log.Errorf(ds.Ctx, "unable to parse gossipped first range descriptor: %s", err) } else { log.Infof(ds.Ctx, "gossipped first range descriptor: %+v", desc.Replicas) } } err := ds.rangeCache.EvictCachedRangeDescriptor(roachpb.RKeyMin, nil, false) if err != nil { log.Warningf(ds.Ctx, "failed to evict first range descriptor: %s", err) } }) } return ds }
// TestTxnCoordSenderNoDuplicateIntents verifies that TxnCoordSender does not // generate duplicate intents and that it merges intents for overlapping ranges. func TestTxnCoordSenderNoDuplicateIntents(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() manual := hlc.NewManualClock(0) clock := hlc.NewClock(manual.UnixNano) var expectedIntents []roachpb.Span senderFunc := func(_ context.Context, ba roachpb.BatchRequest) ( *roachpb.BatchResponse, *roachpb.Error) { if rArgs, ok := ba.GetArg(roachpb.EndTransaction); ok { et := rArgs.(*roachpb.EndTransactionRequest) if !reflect.DeepEqual(et.IntentSpans, expectedIntents) { t.Errorf("Invalid intents: %+v; expected %+v", et.IntentSpans, expectedIntents) } } br := ba.CreateReply() txnClone := ba.Txn.Clone() br.Txn = &txnClone br.Txn.Writing = true return br, nil } ts := NewTxnCoordSender(senderFn(senderFunc), clock, false, tracing.NewTracer(), stopper, NewTxnMetrics(metric.NewRegistry())) defer stopper.Stop() defer teardownHeartbeats(ts) db := client.NewDB(ts) txn := client.NewTxn(context.Background(), *db) // Write to a, b, u-w before the final batch. pErr := txn.Put(roachpb.Key("a"), []byte("value")) if pErr != nil { t.Fatal(pErr) } pErr = txn.Put(roachpb.Key("b"), []byte("value")) if pErr != nil { t.Fatal(pErr) } pErr = txn.DelRange(roachpb.Key("u"), roachpb.Key("w")) if pErr != nil { t.Fatal(pErr) } // The final batch overwrites key a and overlaps part of the u-w range. b := txn.NewBatch() b.Put(roachpb.Key("b"), []byte("value")) b.Put(roachpb.Key("c"), []byte("value")) b.DelRange(roachpb.Key("v"), roachpb.Key("z"), false) // The expected intents are a, b, c, and u-z. expectedIntents = []roachpb.Span{ {Key: roachpb.Key("a"), EndKey: nil}, {Key: roachpb.Key("b"), EndKey: nil}, {Key: roachpb.Key("c"), EndKey: nil}, {Key: roachpb.Key("u"), EndKey: roachpb.Key("z")}, } pErr = txn.CommitInBatch(b) if pErr != nil { t.Fatal(pErr) } }
// verifyUncertainty writes values to a key in 5ns intervals and then launches // a transaction at each value's timestamp reading that value with // the maximumOffset given, verifying in the process that the correct values // are read (usually after one transaction restart). func verifyUncertainty(concurrency int, maxOffset time.Duration, t *testing.T) { s := createTestDB(t) disableOwnNodeCertain(s) defer s.Stop() key := []byte("key-test") // wgStart waits for all transactions to line up, wgEnd has the main // function wait for them to finish. var wgStart, wgEnd sync.WaitGroup wgStart.Add(concurrency + 1) wgEnd.Add(concurrency) // Initial high offset to allow for future writes. s.Clock.SetMaxOffset(999 * time.Nanosecond) s.Manual.Set(s.Clock.MaxOffset().Nanoseconds() + 1) for i := 0; i < concurrency; i++ { value := []byte(fmt.Sprintf("value-%d", i)) // Values will be written with 5ns spacing. futureTS := s.Clock.Now().Add(5, 0) s.Clock.Update(futureTS) // Expected number of versions skipped. skipCount := int(maxOffset) / 5 if i+skipCount >= concurrency { skipCount = concurrency - i - 1 } readValue := []byte(fmt.Sprintf("value-%d", i+skipCount)) if err := s.DB.Put(key, value); err != nil { t.Errorf("%d: got write error: %s", i, err) } if gr, err := s.DB.Get(key); err != nil { t.Fatalf("%d: expected success reading value: %s", i, err) } else if !gr.Exists() || !bytes.Equal(gr.ValueBytes(), value) { t.Fatalf("%d: expected success reading value: %v", i, gr.Value) } go func(i int) { defer wgEnd.Done() wgStart.Done() // Wait until the other goroutines are running. wgStart.Wait() txnManual := hlc.NewManualClock(futureTS.WallTime) txnClock := hlc.NewClock(txnManual.UnixNano) // Make sure to incorporate the logical component if the wall time // hasn't changed (i=0). The logical component will change // internally in a way we can't track, but we want to be just // ahead. txnClock.Update(futureTS.Add(0, 999)) // The written values are spaced out in intervals of 5ns, so // setting <5ns here should make do without any restarts while // higher values require roughly offset/5 restarts. txnClock.SetMaxOffset(maxOffset) sender := NewTxnCoordSender(s.distSender, txnClock, false, tracing.NewTracer(), s.Stopper) txnDB := client.NewDB(sender) if pErr := txnDB.Txn(func(txn *client.Txn) *roachpb.Error { // Read within the transaction. gr, pErr := txn.Get(key) if pErr != nil { if _, ok := pErr.GetDetail().(*roachpb.ReadWithinUncertaintyIntervalError); ok { return pErr } return roachpb.NewErrorf("unexpected read error of type %s: %s", reflect.TypeOf(pErr.GetDetail()), pErr) } if !gr.Exists() { return roachpb.NewErrorf("no value read") } if !bytes.Equal(gr.ValueBytes(), readValue) { return roachpb.NewErrorf("%d: read wrong value %v at %s, wanted %q", i, gr.Value, futureTS, readValue) } return nil }); pErr != nil { t.Error(pErr) } }(i) } // Kick the goroutines loose. wgStart.Done() // Wait for the goroutines to finish. wgEnd.Wait() }
// TestTxnCoordSenderTxnUpdatedOnError verifies that errors adjust the // response transaction's timestamp and priority as appropriate. func TestTxnCoordSenderTxnUpdatedOnError(t *testing.T) { defer leaktest.AfterTest(t)() origTS := makeTS(123, 0) plus10 := origTS.Add(10, 10) plus20 := plus10.Add(10, 0) testCases := []struct { pErr *roachpb.Error expEpoch uint32 expPri int32 expTS, expOrigTS roachpb.Timestamp nodeSeen bool }{ { // No error, so nothing interesting either. pErr: nil, expEpoch: 0, expPri: 1, expTS: origTS, expOrigTS: origTS, }, { // On uncertainty error, new epoch begins and node is seen. // Timestamp moves ahead of the existing write. pErr: func() *roachpb.Error { pErr := roachpb.NewErrorWithTxn( roachpb.NewReadWithinUncertaintyIntervalError(roachpb.ZeroTimestamp, roachpb.ZeroTimestamp), &roachpb.Transaction{}) const nodeID = 1 pErr.GetTxn().UpdateObservedTimestamp(nodeID, plus10) pErr.OriginNode = nodeID return pErr }(), expEpoch: 1, expPri: 1, expTS: plus10, expOrigTS: plus10, nodeSeen: true, }, { // On abort, nothing changes but we get a new priority to use for // the next attempt. pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionAbortedError{}, &roachpb.Transaction{ TxnMeta: roachpb.TxnMeta{Timestamp: plus20, Priority: 10}, }), expPri: 10, }, { // On failed push, new epoch begins just past the pushed timestamp. // Additionally, priority ratchets up to just below the pusher's. pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionPushError{ PusheeTxn: roachpb.Transaction{ TxnMeta: roachpb.TxnMeta{Timestamp: plus10, Priority: int32(10)}, }, }, &roachpb.Transaction{}), expEpoch: 1, expPri: 9, expTS: plus10, expOrigTS: plus10, }, { // On retry, restart with new epoch, timestamp and priority. pErr: roachpb.NewErrorWithTxn(&roachpb.TransactionRetryError{}, &roachpb.Transaction{ TxnMeta: roachpb.TxnMeta{Timestamp: plus10, Priority: int32(10)}, }, ), expEpoch: 1, expPri: 10, expTS: plus10, expOrigTS: plus10, }, } for i, test := range testCases { stopper := stop.NewStopper() manual := hlc.NewManualClock(origTS.WallTime) clock := hlc.NewClock(manual.UnixNano) clock.SetMaxOffset(20) ts := NewTxnCoordSender(senderFn(func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { var reply *roachpb.BatchResponse if test.pErr == nil { reply = ba.CreateReply() } return reply, test.pErr }), clock, false, tracing.NewTracer(), stopper, NewTxnMetrics(metric.NewRegistry())) db := client.NewDB(ts) txn := client.NewTxn(context.Background(), *db) txn.InternalSetPriority(1) txn.Proto.Name = "test txn" key := roachpb.Key("test-key") _, err := txn.Get(key) teardownHeartbeats(ts) stopper.Stop() if test.pErr != nil && err == nil { t.Fatalf("expected an error") } if txn.Proto.Epoch != test.expEpoch { t.Errorf("%d: expected epoch = %d; got %d", i, test.expEpoch, txn.Proto.Epoch) } if txn.Proto.Priority != test.expPri { t.Errorf("%d: expected priority = %d; got %d", i, test.expPri, txn.Proto.Priority) } if !txn.Proto.Timestamp.Equal(test.expTS) { t.Errorf("%d: expected timestamp to be %s; got %s", i, test.expTS, txn.Proto.Timestamp) } if !txn.Proto.OrigTimestamp.Equal(test.expOrigTS) { t.Errorf("%d: expected orig timestamp to be %s; got %s", i, test.expOrigTS, txn.Proto.OrigTimestamp) } if ns := txn.Proto.ObservedTimestamps; (len(ns) != 0) != test.nodeSeen { t.Errorf("%d: expected nodeSeen=%t, but list of hosts is %v", i, test.nodeSeen, ns) } } }
// NewServer creates a Server from a server.Context. func NewServer(srvCtx Context, stopper *stop.Stopper) (*Server, error) { if _, err := net.ResolveTCPAddr("tcp", srvCtx.Addr); err != nil { return nil, errors.Errorf("unable to resolve RPC address %q: %v", srvCtx.Addr, err) } if srvCtx.Ctx == nil { srvCtx.Ctx = context.Background() } if srvCtx.Ctx.Done() != nil { panic("context with cancel or deadline") } if tracing.TracerFromCtx(srvCtx.Ctx) == nil { // TODO(radu): instead of modifying srvCtx.Ctx, we should have a separate // context.Context inside Server. We will need to rename server.Context // though. srvCtx.Ctx = tracing.WithTracer(srvCtx.Ctx, tracing.NewTracer()) } if srvCtx.Insecure { log.Warning(srvCtx.Ctx, "running in insecure mode, this is strongly discouraged. See --insecure.") } // Try loading the TLS configs before anything else. if _, err := srvCtx.GetServerTLSConfig(); err != nil { return nil, err } if _, err := srvCtx.GetClientTLSConfig(); err != nil { return nil, err } s := &Server{ mux: http.NewServeMux(), clock: hlc.NewClock(hlc.UnixNano), stopper: stopper, } // Add a dynamic log tag value for the node ID. // // We need to pass the server's Ctx as a base context for the various server // components, but we won't know the node ID until we Start(). At that point // it's too late to change the contexts in the components (various background // processes will have already started using the contexts). // // The dynamic value allows us to add the log tag to the context now and // update the value asynchronously. It's not significantly more expensive than // a regular tag since it's just doing an (atomic) load when a log/trace // message is constructed. s.nodeLogTagVal.Set(log.DynamicIntValueUnknown) srvCtx.Ctx = log.WithLogTag(srvCtx.Ctx, "n", &s.nodeLogTagVal) s.ctx = srvCtx s.clock.SetMaxOffset(srvCtx.MaxOffset) s.rpcContext = rpc.NewContext(srvCtx.Context, s.clock, s.stopper) s.rpcContext.HeartbeatCB = func() { if err := s.rpcContext.RemoteClocks.VerifyClockOffset(); err != nil { log.Fatal(s.Ctx(), err) } } s.grpc = rpc.NewServer(s.rpcContext) s.registry = metric.NewRegistry() s.gossip = gossip.New( s.Ctx(), s.rpcContext, s.grpc, s.ctx.GossipBootstrapResolvers, s.stopper, s.registry) s.storePool = storage.NewStorePool( s.gossip, s.clock, s.rpcContext, srvCtx.ReservationsEnabled, srvCtx.TimeUntilStoreDead, s.stopper, ) // A custom RetryOptions is created which uses stopper.ShouldQuiesce() as // the Closer. This prevents infinite retry loops from occurring during // graceful server shutdown // // Such a loop loop occurs with the DistSender attempts a connection to the // local server during shutdown, and receives an internal server error (HTTP // Code 5xx). This is the correct error for a server to return when it is // shutting down, and is normally retryable in a cluster environment. // However, on a single-node setup (such as a test), retries will never // succeed because the only server has been shut down; thus, thus the // DistSender needs to know that it should not retry in this situation. retryOpts := base.DefaultRetryOptions() retryOpts.Closer = s.stopper.ShouldQuiesce() distSenderCfg := kv.DistSenderConfig{ Ctx: s.Ctx(), Clock: s.clock, RPCContext: s.rpcContext, RPCRetryOptions: &retryOpts, } s.distSender = kv.NewDistSender(&distSenderCfg, s.gossip) txnMetrics := kv.MakeTxnMetrics() s.registry.AddMetricStruct(txnMetrics) s.txnCoordSender = kv.NewTxnCoordSender(s.Ctx(), s.distSender, s.clock, srvCtx.Linearizable, s.stopper, txnMetrics) s.db = client.NewDB(s.txnCoordSender) s.raftTransport = storage.NewRaftTransport(storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext) s.kvDB = kv.NewDBServer(s.ctx.Context, s.txnCoordSender, s.stopper) roachpb.RegisterExternalServer(s.grpc, s.kvDB) // Set up Lease Manager var lmKnobs sql.LeaseManagerTestingKnobs if srvCtx.TestingKnobs.SQLLeaseManager != nil { lmKnobs = *srvCtx.TestingKnobs.SQLLeaseManager.(*sql.LeaseManagerTestingKnobs) } s.leaseMgr = sql.NewLeaseManager(0, *s.db, s.clock, lmKnobs, s.stopper) s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip) // Set up the DistSQL server distSQLCfg := distsql.ServerConfig{ Context: s.Ctx(), DB: s.db, RPCContext: s.rpcContext, } s.distSQLServer = distsql.NewServer(distSQLCfg) distsql.RegisterDistSQLServer(s.grpc, s.distSQLServer) // Set up Executor execCfg := sql.ExecutorConfig{ Context: s.Ctx(), DB: s.db, Gossip: s.gossip, LeaseManager: s.leaseMgr, Clock: s.clock, DistSQLSrv: s.distSQLServer, } if srvCtx.TestingKnobs.SQLExecutor != nil { execCfg.TestingKnobs = srvCtx.TestingKnobs.SQLExecutor.(*sql.ExecutorTestingKnobs) } else { execCfg.TestingKnobs = &sql.ExecutorTestingKnobs{} } s.sqlExecutor = sql.NewExecutor(execCfg, s.stopper) s.registry.AddMetricStruct(s.sqlExecutor) s.pgServer = pgwire.MakeServer(s.ctx.Context, s.sqlExecutor) s.registry.AddMetricStruct(s.pgServer.Metrics()) // TODO(bdarnell): make StoreConfig configurable. nCtx := storage.StoreContext{ Ctx: s.Ctx(), Clock: s.clock, DB: s.db, Gossip: s.gossip, Transport: s.raftTransport, RaftTickInterval: s.ctx.RaftTickInterval, ScanInterval: s.ctx.ScanInterval, ScanMaxIdleTime: s.ctx.ScanMaxIdleTime, ConsistencyCheckInterval: s.ctx.ConsistencyCheckInterval, ConsistencyCheckPanicOnFailure: s.ctx.ConsistencyCheckPanicOnFailure, StorePool: s.storePool, SQLExecutor: sql.InternalExecutor{ LeaseManager: s.leaseMgr, }, LogRangeEvents: true, AllocatorOptions: storage.AllocatorOptions{ AllowRebalance: true, }, } if srvCtx.TestingKnobs.Store != nil { nCtx.TestingKnobs = *srvCtx.TestingKnobs.Store.(*storage.StoreTestingKnobs) } s.recorder = status.NewMetricsRecorder(s.clock) s.registry.AddMetricStruct(s.rpcContext.RemoteClocks.Metrics()) s.runtime = status.MakeRuntimeStatSampler(s.clock) s.registry.AddMetricStruct(s.runtime) s.node = NewNode(nCtx, s.recorder, s.registry, s.stopper, txnMetrics, sql.MakeEventLogger(s.leaseMgr)) roachpb.RegisterInternalServer(s.grpc, s.node) storage.RegisterStoresServer(s.grpc, s.node.storesServer) s.tsDB = ts.NewDB(s.db) s.tsServer = ts.MakeServer(s.tsDB) s.admin = makeAdminServer(s) s.status = newStatusServer(s.db, s.gossip, s.recorder, s.ctx.Context, s.rpcContext, s.node.stores) for _, gw := range []grpcGatewayServer{&s.admin, s.status, &s.tsServer} { gw.RegisterService(s.grpc) } return s, nil }
// bootstrapCluster bootstraps a multiple stores using the provided // engines and cluster ID. The first bootstrapped store contains a // single range spanning all keys. Initial range lookup metadata is // populated for the range. Returns the cluster ID. func bootstrapCluster(engines []engine.Engine) (uuid.UUID, error) { clusterID := uuid.MakeV4() stopper := stop.NewStopper() defer stopper.Stop() ctx := storage.StoreContext{} ctx.ScanInterval = 10 * time.Minute ctx.Clock = hlc.NewClock(hlc.UnixNano) ctx.Tracer = tracing.NewTracer() // Create a KV DB with a local sender. stores := storage.NewStores(ctx.Clock) sender := kv.NewTxnCoordSender(stores, ctx.Clock, false, ctx.Tracer, stopper) ctx.DB = client.NewDB(sender) ctx.Transport = storage.NewLocalRPCTransport(stopper) for i, eng := range engines { sIdent := roachpb.StoreIdent{ ClusterID: clusterID, NodeID: 1, StoreID: roachpb.StoreID(i + 1), } // The bootstrapping store will not connect to other nodes so its // StoreConfig doesn't really matter. s := storage.NewStore(ctx, eng, &roachpb.NodeDescriptor{NodeID: 1}) // Verify the store isn't already part of a cluster. if s.Ident.ClusterID != *uuid.EmptyUUID { return uuid.UUID{}, util.Errorf("storage engine already belongs to a cluster (%s)", s.Ident.ClusterID) } // Bootstrap store to persist the store ident. if err := s.Bootstrap(sIdent, stopper); err != nil { return uuid.UUID{}, err } // Create first range, writing directly to engine. Note this does // not create the range, just its data. Only do this if this is the // first store. if i == 0 { initialValues := GetBootstrapSchema().GetInitialValues() if err := s.BootstrapRange(initialValues); err != nil { return uuid.UUID{}, err } } if err := s.Start(stopper); err != nil { return uuid.UUID{}, err } stores.AddStore(s) // Initialize node and store ids. Only initialize the node once. if i == 0 { if nodeID, err := allocateNodeID(ctx.DB); nodeID != sIdent.NodeID || err != nil { return uuid.UUID{}, util.Errorf("expected to initialize node id allocator to %d, got %d: %s", sIdent.NodeID, nodeID, err) } } if storeID, err := allocateStoreIDs(sIdent.NodeID, 1, ctx.DB); storeID != sIdent.StoreID || err != nil { return uuid.UUID{}, util.Errorf("expected to initialize store id allocator to %d, got %d: %s", sIdent.StoreID, storeID, err) } } return clusterID, nil }
// NewServer creates a Server from a server.Context. func NewServer(ctx *Context, stopper *stop.Stopper) (*Server, error) { if ctx == nil { return nil, util.Errorf("ctx must not be null") } if _, err := net.ResolveTCPAddr("tcp", ctx.Addr); err != nil { return nil, util.Errorf("unable to resolve RPC address %q: %v", ctx.Addr, err) } if ctx.Insecure { log.Warning("running in insecure mode, this is strongly discouraged. See --insecure and --certs.") } // Try loading the TLS configs before anything else. if _, err := ctx.GetServerTLSConfig(); err != nil { return nil, err } if _, err := ctx.GetClientTLSConfig(); err != nil { return nil, err } s := &Server{ Tracer: tracing.NewTracer(), ctx: ctx, mux: http.NewServeMux(), clock: hlc.NewClock(hlc.UnixNano), stopper: stopper, } s.clock.SetMaxOffset(ctx.MaxOffset) s.rpcContext = crpc.NewContext(&ctx.Context, s.clock, stopper) stopper.RunWorker(func() { s.rpcContext.RemoteClocks.MonitorRemoteOffsets(stopper) }) s.rpc = crpc.NewServer(s.rpcContext) s.gossip = gossip.New(s.rpcContext, s.ctx.GossipBootstrapResolvers, stopper) s.storePool = storage.NewStorePool(s.gossip, s.clock, ctx.TimeUntilStoreDead, stopper) feed := util.NewFeed(stopper) // A custom RetryOptions is created which uses stopper.ShouldDrain() as // the Closer. This prevents infinite retry loops from occurring during // graceful server shutdown // // Such a loop loop occurs with the DistSender attempts a connection to the // local server during shutdown, and receives an internal server error (HTTP // Code 5xx). This is the correct error for a server to return when it is // shutting down, and is normally retryable in a cluster environment. // However, on a single-node setup (such as a test), retries will never // succeed because the only server has been shut down; thus, thus the // DistSender needs to know that it should not retry in this situation. retryOpts := kv.GetDefaultDistSenderRetryOptions() retryOpts.Closer = stopper.ShouldDrain() ds := kv.NewDistSender(&kv.DistSenderContext{ Clock: s.clock, RPCContext: s.rpcContext, RPCRetryOptions: &retryOpts, }, s.gossip) txnRegistry := metric.NewRegistry() txnMetrics := kv.NewTxnMetrics(txnRegistry) sender := kv.NewTxnCoordSender(ds, s.clock, ctx.Linearizable, s.Tracer, s.stopper, txnMetrics) s.db = client.NewDB(sender) s.grpc = grpc.NewServer() s.raftTransport = storage.NewRaftTransport(storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext) s.kvDB = kv.NewDBServer(&s.ctx.Context, sender, stopper) if err := s.kvDB.RegisterRPC(s.rpc); err != nil { return nil, err } s.leaseMgr = sql.NewLeaseManager(0, *s.db, s.clock) s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip) sqlRegistry := metric.NewRegistry() s.sqlExecutor = sql.NewExecutor(*s.db, s.gossip, s.leaseMgr, s.stopper, sqlRegistry) s.pgServer = pgwire.MakeServer(&s.ctx.Context, s.sqlExecutor, sqlRegistry) // TODO(bdarnell): make StoreConfig configurable. nCtx := storage.StoreContext{ Clock: s.clock, DB: s.db, Gossip: s.gossip, Transport: s.raftTransport, ScanInterval: s.ctx.ScanInterval, ScanMaxIdleTime: s.ctx.ScanMaxIdleTime, EventFeed: feed, Tracer: s.Tracer, StorePool: s.storePool, SQLExecutor: sql.InternalExecutor{ LeaseManager: s.leaseMgr, }, LogRangeEvents: true, AllocatorOptions: storage.AllocatorOptions{ AllowRebalance: true, Mode: s.ctx.BalanceMode, }, } s.recorder = status.NewMetricsRecorder(s.clock) s.recorder.AddNodeRegistry("sql.%s", sqlRegistry) s.recorder.AddNodeRegistry("txn.%s", txnRegistry) s.node = NewNode(nCtx, s.recorder, s.stopper, txnMetrics) s.admin = newAdminServer(s.db, s.stopper, s.sqlExecutor) s.tsDB = ts.NewDB(s.db) s.tsServer = ts.NewServer(s.tsDB) s.status = newStatusServer(s.db, s.gossip, s.recorder, s.ctx) return s, nil }
// Start starts the test cluster by bootstrapping an in-memory store // (defaults to maximum of 50M). The server is started, launching the // node RPC server and all HTTP endpoints. Use the value of // TestServer.Addr after Start() for client connections. Use Stop() // to shutdown the server after the test completes. func (ltc *LocalTestCluster) Start(t util.Tester) { nodeID := roachpb.NodeID(1) nodeDesc := &roachpb.NodeDescriptor{NodeID: nodeID} ltc.tester = t ltc.Manual = hlc.NewManualClock(0) ltc.Clock = hlc.NewClock(ltc.Manual.UnixNano) ltc.Stopper = stop.NewStopper() rpcContext := rpc.NewContext(testutils.NewNodeTestBaseContext(), ltc.Clock, ltc.Stopper) ltc.Gossip = gossip.New(rpcContext, gossip.TestBootstrap, ltc.Stopper) ltc.Eng = engine.NewInMem(roachpb.Attributes{}, 50<<20, ltc.Stopper) ltc.stores = storage.NewStores(ltc.Clock) tracer := tracing.NewTracer() var rpcSend rpcSendFn = func(_ SendOptions, _ ReplicaSlice, args roachpb.BatchRequest, _ *rpc.Context) (proto.Message, error) { if ltc.Latency > 0 { time.Sleep(ltc.Latency) } sp := tracer.StartSpan("node") defer sp.Finish() ctx := opentracing.ContextWithSpan(context.Background(), sp) sp.LogEvent(args.String()) br, pErr := ltc.stores.Send(ctx, args) if br == nil { br = &roachpb.BatchResponse{} } if br.Error != nil { panic(roachpb.ErrorUnexpectedlySet(ltc.stores, br)) } br.Error = pErr if pErr != nil { sp.LogEvent("error: " + pErr.String()) } return br, nil } retryOpts := GetDefaultDistSenderRetryOptions() retryOpts.Closer = ltc.Stopper.ShouldDrain() ltc.distSender = NewDistSender(&DistSenderContext{ Clock: ltc.Clock, RangeDescriptorCacheSize: defaultRangeDescriptorCacheSize, RangeLookupMaxRanges: defaultRangeLookupMaxRanges, LeaderCacheSize: defaultLeaderCacheSize, RPCRetryOptions: &retryOpts, nodeDescriptor: nodeDesc, RPCSend: rpcSend, // defined above RangeDescriptorDB: ltc.stores, // for descriptor lookup }, ltc.Gossip) ltc.Sender = NewTxnCoordSender(ltc.distSender, ltc.Clock, false /* !linearizable */, tracer, ltc.Stopper, NewTxnMetrics(metric.NewRegistry())) ltc.DB = client.NewDB(ltc.Sender) transport := storage.NewDummyRaftTransport() ctx := storage.TestStoreContext() ctx.Clock = ltc.Clock ctx.DB = ltc.DB ctx.Gossip = ltc.Gossip ctx.Transport = transport ctx.Tracer = tracer ltc.Store = storage.NewStore(ctx, ltc.Eng, nodeDesc) if err := ltc.Store.Bootstrap(roachpb.StoreIdent{NodeID: nodeID, StoreID: 1}, ltc.Stopper); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } ltc.stores.AddStore(ltc.Store) if err := ltc.Store.BootstrapRange(nil); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } if err := ltc.Store.Start(ltc.Stopper); err != nil { t.Fatalf("unable to start local test cluster: %s", err) } ltc.Gossip.SetNodeID(nodeDesc.NodeID) if err := ltc.Gossip.SetNodeDescriptor(nodeDesc); err != nil { t.Fatalf("unable to set node descriptor: %s", err) } }
// TestClientNotReady verifies that Send gets an RPC error when a client // does not become ready. func TestClientNotReady(t *testing.T) { defer leaktest.AfterTest(t)() t.Skip("TODO(tamird): #3942") stopper := stop.NewStopper() defer stopper.Stop() nodeContext := newNodeTestContext(nil, stopper) // Construct a server that listens but doesn't do anything. s, ln := newTestServer(t, nodeContext) registerBatch(t, s, 50*time.Millisecond) sp := tracing.NewTracer().StartSpan("node test") defer sp.Finish() opts := SendOptions{ Ordering: orderStable, SendNextTimeout: 100 * time.Nanosecond, Timeout: 100 * time.Nanosecond, Trace: sp, } // Send RPC to an address where no server is running. if _, err := sendBatch(opts, []net.Addr{ln.Addr()}, nodeContext); err != nil { retryErr, ok := err.(retry.Retryable) if !ok { t.Fatalf("Unexpected error type: %v", err) } if !retryErr.CanRetry() { t.Errorf("Expected retryable error: %v", retryErr) } } else { t.Fatalf("Unexpected success") } // Send the RPC again with no timeout. opts.SendNextTimeout = 0 opts.Timeout = 0 c := make(chan error) go func() { if _, err := sendBatch(opts, []net.Addr{ln.Addr()}, nodeContext); err == nil { c <- util.Errorf("expected error when client is closed") } else if !strings.Contains(err.Error(), "failed as client connection was closed") { c <- err } close(c) }() select { case <-c: t.Fatalf("Unexpected end of rpc call") case <-time.After(1 * time.Millisecond): } // Grab the client for our invalid address and close it. This will cause the // blocked ping RPC to finish. rpc.NewClient(ln.Addr(), nodeContext).Close() if err := <-c; err != nil { t.Fatal(err) } }
// TestMultiRangeScanDeleteRange tests that commands which access multiple // ranges are carried out properly. func TestMultiRangeScanDeleteRange(t *testing.T) { defer leaktest.AfterTest(t)() s := StartTestServer(t) defer s.Stop() retryOpts := kv.GetDefaultDistSenderRetryOptions() retryOpts.Closer = s.stopper.ShouldDrain() ds := kv.NewDistSender(&kv.DistSenderContext{ Clock: s.Clock(), RPCContext: s.RPCContext(), RPCRetryOptions: &retryOpts, }, s.Gossip()) tds := kv.NewTxnCoordSender(ds, s.Clock(), testContext.Linearizable, tracing.NewTracer(), s.stopper, kv.NewTxnMetrics(metric.NewRegistry())) if err := s.node.ctx.DB.AdminSplit("m"); err != nil { t.Fatal(err) } writes := []roachpb.Key{roachpb.Key("a"), roachpb.Key("z")} get := &roachpb.GetRequest{ Span: roachpb.Span{Key: writes[0]}, } get.EndKey = writes[len(writes)-1] if _, err := client.SendWrapped(tds, nil, get); err == nil { t.Errorf("able to call Get with a key range: %v", get) } var delTS roachpb.Timestamp for i, k := range writes { put := roachpb.NewPut(k, roachpb.MakeValueFromBytes(k)) reply, err := client.SendWrapped(tds, nil, put) if err != nil { t.Fatal(err) } scan := roachpb.NewScan(writes[0], writes[len(writes)-1].Next(), 0).(*roachpb.ScanRequest) // The Put ts may have been pushed by tsCache, // so make sure we see their values in our Scan. delTS = reply.(*roachpb.PutResponse).Timestamp reply, err = client.SendWrappedWith(tds, nil, roachpb.Header{Timestamp: delTS}, scan) if err != nil { t.Fatal(err) } sr := reply.(*roachpb.ScanResponse) if sr.Txn != nil { // This was the other way around at some point in the past. // Same below for Delete, etc. t.Errorf("expected no transaction in response header") } if rows := sr.Rows; len(rows) != i+1 { t.Fatalf("expected %d rows, but got %d", i+1, len(rows)) } } del := &roachpb.DeleteRangeRequest{ Span: roachpb.Span{ Key: writes[0], EndKey: roachpb.Key(writes[len(writes)-1]).Next(), }, ReturnKeys: true, } reply, err := client.SendWrappedWith(tds, nil, roachpb.Header{Timestamp: delTS}, del) if err != nil { t.Fatal(err) } dr := reply.(*roachpb.DeleteRangeResponse) if dr.Txn != nil { t.Errorf("expected no transaction in response header") } if !reflect.DeepEqual(dr.Keys, writes) { t.Errorf("expected %d keys to be deleted, but got %d instead", writes, dr.Keys) } scan := roachpb.NewScan(writes[0], writes[len(writes)-1].Next(), 0).(*roachpb.ScanRequest) txn := &roachpb.Transaction{Name: "MyTxn"} reply, err = client.SendWrappedWith(tds, nil, roachpb.Header{Txn: txn}, scan) if err != nil { t.Fatal(err) } sr := reply.(*roachpb.ScanResponse) if txn := sr.Txn; txn == nil || txn.Name != "MyTxn" { t.Errorf("wanted Txn to persist, but it changed to %v", txn) } if rows := sr.Rows; len(rows) > 0 { t.Fatalf("scan after delete returned rows: %v", rows) } }
// TestComplexScenarios verifies various complex success/failure scenarios by // mocking sendOne. func TestComplexScenarios(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() nodeContext := newNodeTestContext(nil, stopper) testCases := []struct { numServers int numErrors int numRetryableErrors int success bool isRetryableErrorExpected bool }{ // --- Success scenarios --- {1, 0, 0, true, false}, {5, 0, 0, true, false}, // There are some errors, but enough RPCs succeed. {5, 1, 0, true, false}, {5, 4, 0, true, false}, {5, 2, 0, true, false}, // --- Failure scenarios --- // All RPCs fail. {5, 5, 0, false, false}, // All RPCs fail, but some of the errors are retryable. {5, 5, 1, false, true}, {5, 5, 3, false, true}, // Some RPCs fail, but we do have enough remaining clients and recoverable errors. {5, 5, 2, false, true}, } for i, test := range testCases { // Copy the values to avoid data race. sendOneFn might // be called after this test case finishes. numErrors := test.numErrors numRetryableErrors := test.numRetryableErrors var serverAddrs []net.Addr for j := 0; j < test.numServers; j++ { _, ln := newTestServer(t, nodeContext) serverAddrs = append(serverAddrs, ln.Addr()) } sp := tracing.NewTracer().StartSpan("node test") defer sp.Finish() opts := SendOptions{ Ordering: orderStable, SendNextTimeout: 1 * time.Second, Timeout: 10 * time.Second, Trace: sp, } // Mock sendOne. sendOneFn = func(client *batchClient, timeout time.Duration, context *rpc.Context, trace opentracing.Span, done chan *netrpc.Call) { addr := client.RemoteAddr() addrID := -1 for serverAddrID, serverAddr := range serverAddrs { if serverAddr.String() == addr.String() { addrID = serverAddrID break } } if addrID == -1 { t.Fatalf("%d: %v is not found in serverAddrs: %v", i, addr, serverAddrs) } call := netrpc.Call{ Reply: &roachpb.BatchResponse{}, } if addrID < numErrors { call.Error = roachpb.NewSendError("test", addrID < numRetryableErrors) } done <- &call } defer func() { sendOneFn = sendOne }() reply, err := sendBatch(opts, serverAddrs, nodeContext) if test.success { if reply == nil { t.Errorf("%d: expected reply", i) } continue } retryErr, ok := err.(retry.Retryable) if !ok { t.Fatalf("%d: Unexpected error type: %v", i, err) } if retryErr.CanRetry() != test.isRetryableErrorExpected { t.Errorf("%d: Unexpected error: %v", i, retryErr) } } }