// sendOne invokes the specified RPC on the supplied client when the // client is ready. On success, the reply is sent on the channel; // otherwise an error is sent. // // Do not call directly, but instead use sendOneFn. Tests mock out this method // via sendOneFn in order to test various error cases. func sendOne(opts SendOptions, rpcContext *rpc.Context, client batchClient, done chan batchCall) { addr := client.remoteAddr if log.V(2) { log.Infof("sending request to %s: %+v", addr, client.args) } if localServer := rpcContext.GetLocalInternalServerForAddr(addr); enableLocalCalls && localServer != nil { ctx, cancel := opts.contextWithTimeout() defer cancel() reply, err := localServer.Batch(ctx, &client.args) done <- batchCall{reply: reply, err: err} return } go func() { ctx, cancel := opts.contextWithTimeout() defer cancel() c := client.conn for state, err := c.State(); state != grpc.Ready; state, err = c.WaitForStateChange(ctx, state) { if err != nil { done <- batchCall{err: roachpb.NewSendError( fmt.Sprintf("rpc to %s failed: %s", addr, err), true)} return } if state == grpc.Shutdown { done <- batchCall{err: roachpb.NewSendError( fmt.Sprintf("rpc to %s failed as client connection was closed", addr), true)} return } } reply, err := client.client.Batch(ctx, &client.args) done <- batchCall{reply: reply, err: err} }() }
func TestEvictCacheOnError(t *testing.T) { defer leaktest.AfterTest(t)() // if rpcError is true, the first attempt gets an RPC error, otherwise // the RPC call succeeds but there is an error in the RequestHeader. // Currently leader and cached range descriptor are treated equally. testCases := []struct{ rpcError, retryable, shouldClearLeader, shouldClearReplica bool }{ {false, false, false, false}, // non-retryable replica error {false, true, false, false}, // retryable replica error {true, false, true, true}, // RPC error aka all nodes dead {true, true, false, false}, // retryable RPC error } for i, tc := range testCases { g, s := makeTestGossip(t) defer s() leader := roachpb.ReplicaDescriptor{ NodeID: 99, StoreID: 999, } first := true var testFn rpcSendFn = func(_ SendOptions, _ ReplicaSlice, args roachpb.BatchRequest, _ *rpc.Context) (*roachpb.BatchResponse, error) { if !first { return args.CreateReply(), nil } first = false if tc.rpcError { return nil, roachpb.NewSendError("boom", tc.retryable) } var err error if tc.retryable { err = &roachpb.RangeKeyMismatchError{} } else { err = errors.New("boom") } reply := &roachpb.BatchResponse{} reply.Error = roachpb.NewError(err) return reply, nil } ctx := &DistSenderContext{ RPCSend: testFn, RangeDescriptorDB: mockRangeDescriptorDB(func(_ roachpb.RKey, _, _ bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { return []roachpb.RangeDescriptor{testRangeDescriptor}, nil }), } ds := NewDistSender(ctx, g) ds.updateLeaderCache(1, leader) put := roachpb.NewPut(roachpb.Key("a"), roachpb.MakeValueFromString("value")).(*roachpb.PutRequest) if _, pErr := client.SendWrapped(ds, nil, put); pErr != nil && !testutils.IsPError(pErr, "boom") { t.Errorf("put encountered unexpected error: %s", pErr) } if cur := ds.leaderCache.Lookup(1); reflect.DeepEqual(cur, &roachpb.ReplicaDescriptor{}) && !tc.shouldClearLeader { t.Errorf("%d: leader cache eviction: shouldClearLeader=%t, but value is %v", i, tc.shouldClearLeader, cur) } _, cachedDesc := ds.rangeCache.getCachedRangeDescriptor(roachpb.RKey(put.Key), false /* !inclusive */) if cachedDesc == nil != tc.shouldClearReplica { t.Errorf("%d: unexpected second replica lookup behaviour: wanted=%t", i, tc.shouldClearReplica) } } }
// sendToReplicas sends one or more RPCs to clients specified by the slice of // replicas. On success, Send returns the first successful reply. Otherwise, // Send returns an error if and as soon as the number of failed RPCs exceeds // the available endpoints less the number of required replies. func (ds *DistSender) sendToReplicas( opts SendOptions, rangeID roachpb.RangeID, replicas ReplicaSlice, args roachpb.BatchRequest, rpcContext *rpc.Context, ) (*roachpb.BatchResponse, error) { if len(replicas) < 1 { return nil, roachpb.NewSendError( fmt.Sprintf("insufficient replicas (%d) to satisfy send request of %d", len(replicas), 1)) } done := make(chan BatchCall, len(replicas)) transportFactory := opts.transportFactory if transportFactory == nil { transportFactory = grpcTransportFactory } transport, err := transportFactory(opts, rpcContext, replicas, args) if err != nil { return nil, err } defer transport.Close() if transport.IsExhausted() { return nil, roachpb.NewSendError( fmt.Sprintf("sending to all %d replicas failed", len(replicas))) } // Send the first request. pending := 1 transport.SendNext(done) // Wait for completions. This loop will retry operations that fail // with errors that reflect per-replica state and may succeed on // other replicas. var sendNextTimer timeutil.Timer defer sendNextTimer.Stop() for { sendNextTimer.Reset(opts.SendNextTimeout) select { case <-sendNextTimer.C: sendNextTimer.Read = true // On successive RPC timeouts, send to additional replicas if available. if !transport.IsExhausted() { log.Trace(opts.Context, "timeout, trying next peer") pending++ transport.SendNext(done) } case call := <-done: pending-- err := call.Err if err == nil { if log.V(2) { log.Infof(opts.Context, "RPC reply: %+v", call.Reply) } else if log.V(1) && call.Reply.Error != nil { log.Infof(opts.Context, "application error: %s", call.Reply.Error) } if !ds.handlePerReplicaError(rangeID, call.Reply.Error) { return call.Reply, nil } // Extract the detail so it can be included in the error // message if this is our last replica. // // TODO(bdarnell): The last error is not necessarily the best // one to return; we may want to remember the "best" error // we've seen (for example, a NotLeaseHolderError conveys more // information than a RangeNotFound). err = call.Reply.Error.GoError() } else if log.V(1) { log.Warningf(opts.Context, "RPC error: %s", err) } // Send to additional replicas if available. if !transport.IsExhausted() { log.Tracef(opts.Context, "error, trying next peer: %s", err) pending++ transport.SendNext(done) } if pending == 0 { return nil, roachpb.NewSendError( fmt.Sprintf("sending to all %d replicas failed; last error: %v", len(replicas), err)) } } } }
// TestComplexScenarios verifies various complex success/failure scenarios by // mocking sendOne. func TestComplexScenarios(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() nodeContext := newNodeTestContext(nil, stopper) testCases := []struct { numServers int numErrors int numRetryableErrors int success bool isRetryableErrorExpected bool }{ // --- Success scenarios --- {1, 0, 0, true, false}, {5, 0, 0, true, false}, // There are some errors, but enough RPCs succeed. {5, 1, 0, true, false}, {5, 4, 0, true, false}, {5, 2, 0, true, false}, // --- Failure scenarios --- // All RPCs fail. {5, 5, 0, false, false}, // All RPCs fail, but some of the errors are retryable. {5, 5, 1, false, true}, {5, 5, 3, false, true}, // Some RPCs fail, but we do have enough remaining clients and recoverable errors. {5, 5, 2, false, true}, } for i, test := range testCases { // Copy the values to avoid data race. sendOneFn might // be called after this test case finishes. numErrors := test.numErrors numRetryableErrors := test.numRetryableErrors var serverAddrs []net.Addr for j := 0; j < test.numServers; j++ { _, ln := newTestServer(t, nodeContext) serverAddrs = append(serverAddrs, ln.Addr()) } sp := tracing.NewTracer().StartSpan("node test") defer sp.Finish() opts := SendOptions{ Ordering: orderStable, SendNextTimeout: 1 * time.Second, Timeout: 10 * time.Second, Trace: sp, } // Mock sendOne. sendOneFn = func(client *batchClient, timeout time.Duration, context *rpc.Context, trace opentracing.Span, done chan *netrpc.Call) { addr := client.RemoteAddr() addrID := -1 for serverAddrID, serverAddr := range serverAddrs { if serverAddr.String() == addr.String() { addrID = serverAddrID break } } if addrID == -1 { t.Fatalf("%d: %v is not found in serverAddrs: %v", i, addr, serverAddrs) } call := netrpc.Call{ Reply: &roachpb.BatchResponse{}, } if addrID < numErrors { call.Error = roachpb.NewSendError("test", addrID < numRetryableErrors) } done <- &call } defer func() { sendOneFn = sendOne }() reply, err := sendBatch(opts, serverAddrs, nodeContext) if test.success { if reply == nil { t.Errorf("%d: expected reply", i) } continue } retryErr, ok := err.(retry.Retryable) if !ok { t.Fatalf("%d: Unexpected error type: %v", i, err) } if retryErr.CanRetry() != test.isRetryableErrorExpected { t.Errorf("%d: Unexpected error: %v", i, retryErr) } } }
// Send sends one or more RPCs to clients specified by the slice of // replicas. On success, Send returns the first successful reply. Otherwise, // Send returns an error if and as soon as the number of failed RPCs exceeds // the available endpoints less the number of required replies. // // TODO(pmattis): Get rid of the getArgs function which requires the caller to // maintain a map from address to replica. Instead, pass in the list of // replicas instead of a list of addresses and use that to populate the // requests. func send(opts SendOptions, replicas ReplicaSlice, args roachpb.BatchRequest, context *rpc.Context) (proto.Message, error) { sp := opts.Trace if sp == nil { sp = tracing.NilSpan() } if len(replicas) < 1 { return nil, roachpb.NewSendError( fmt.Sprintf("insufficient replicas (%d) to satisfy send request of %d", len(replicas), 1), false) } done := make(chan *netrpc.Call, len(replicas)) clients := make([]batchClient, 0, len(replicas)) for i, replica := range replicas { clients = append(clients, batchClient{ Client: rpc.NewClient(&replica.NodeDesc.Address, context), replica: &replicas[i], args: args, }) } var orderedClients []batchClient switch opts.Ordering { case orderStable: orderedClients = clients case orderRandom: // Randomly permute order, but keep known-unhealthy clients last. var nHealthy int for i, client := range clients { select { case <-client.Healthy(): clients[i], clients[nHealthy] = clients[nHealthy], clients[i] nHealthy++ default: } } shuffleClients(clients[:nHealthy]) shuffleClients(clients[nHealthy:]) orderedClients = clients } // TODO(spencer): going to need to also sort by affinity; closest // ping time should win. Makes sense to have the rpc client/server // heartbeat measure ping times. With a bit of seasoning, each // node will be able to order the healthy replicas based on latency. // Send the first request. sendOneFn(&orderedClients[0], opts.Timeout, context, sp, done) orderedClients = orderedClients[1:] var errors, retryableErrors int // Wait for completions. var sendNextTimer util.Timer defer sendNextTimer.Stop() for { sendNextTimer.Reset(opts.SendNextTimeout) select { case <-sendNextTimer.C: sendNextTimer.Read = true // On successive RPC timeouts, send to additional replicas if available. if len(orderedClients) > 0 { sp.LogEvent("timeout, trying next peer") sendOneFn(&orderedClients[0], opts.Timeout, context, sp, done) orderedClients = orderedClients[1:] } case call := <-done: if call.Error == nil { // Verify response data integrity if this is a proto response. if req, reqOk := call.Args.(roachpb.Request); reqOk { if resp, respOk := call.Reply.(roachpb.Response); respOk { if err := resp.Verify(req); err != nil { call.Error = err } } else { call.Error = util.Errorf("response to proto request must be a proto") } } } err := call.Error if err == nil { if log.V(2) { log.Infof("successful reply: %+v", call.Reply) } return call.Reply.(proto.Message), nil } // Error handling. if log.V(1) { log.Warningf("error reply: %s", err) } errors++ // Since we have a reconnecting client here, disconnect errors are retryable. disconnected := err == netrpc.ErrShutdown || err == io.ErrUnexpectedEOF if retryErr, ok := err.(retry.Retryable); disconnected || (ok && retryErr.CanRetry()) { retryableErrors++ } if remainingNonErrorRPCs := len(replicas) - errors; remainingNonErrorRPCs < 1 { return nil, roachpb.NewSendError( fmt.Sprintf("too many errors encountered (%d of %d total): %v", errors, len(clients), err), remainingNonErrorRPCs+retryableErrors >= 1) } // Send to additional replicas if available. if len(orderedClients) > 0 { sp.LogEvent("error, trying next peer") sendOneFn(&orderedClients[0], opts.Timeout, context, sp, done) orderedClients = orderedClients[1:] } } } }
// Send sends one or more RPCs to clients specified by the slice of // replicas. On success, Send returns the first successful reply. Otherwise, // Send returns an error if and as soon as the number of failed RPCs exceeds // the available endpoints less the number of required replies. func send(opts SendOptions, replicas ReplicaSlice, args roachpb.BatchRequest, rpcContext *rpc.Context) (*roachpb.BatchResponse, error) { if len(replicas) < 1 { return nil, roachpb.NewSendError( fmt.Sprintf("insufficient replicas (%d) to satisfy send request of %d", len(replicas), 1), false) } done := make(chan batchCall, len(replicas)) clients := make([]batchClient, 0, len(replicas)) for _, replica := range replicas { conn, err := rpcContext.GRPCDial(replica.NodeDesc.Address.String()) if err != nil { return nil, err } argsCopy := args argsCopy.Replica = replica.ReplicaDescriptor clients = append(clients, batchClient{ remoteAddr: replica.NodeDesc.Address.String(), conn: conn, client: roachpb.NewInternalClient(conn), args: argsCopy, }) } // Put known-unhealthy clients last. nHealthy, err := splitHealthy(clients) if err != nil { return nil, err } var orderedClients []batchClient switch opts.Ordering { case orderStable: orderedClients = clients case orderRandom: // Randomly permute order, but keep known-unhealthy clients last. shuffleClients(clients[:nHealthy]) shuffleClients(clients[nHealthy:]) orderedClients = clients } // TODO(spencer): going to need to also sort by affinity; closest // ping time should win. Makes sense to have the rpc client/server // heartbeat measure ping times. With a bit of seasoning, each // node will be able to order the healthy replicas based on latency. // Send the first request. sendOneFn(opts, rpcContext, orderedClients[0], done) orderedClients = orderedClients[1:] var errors, retryableErrors int // Wait for completions. var sendNextTimer util.Timer defer sendNextTimer.Stop() for { sendNextTimer.Reset(opts.SendNextTimeout) select { case <-sendNextTimer.C: sendNextTimer.Read = true // On successive RPC timeouts, send to additional replicas if available. if len(orderedClients) > 0 { log.Trace(opts.Context, "timeout, trying next peer") sendOneFn(opts, rpcContext, orderedClients[0], done) orderedClients = orderedClients[1:] } case call := <-done: err := call.err if err == nil { if log.V(2) { log.Infof("successful reply: %+v", call.reply) } return call.reply, nil } // Error handling. if log.V(1) { log.Warningf("error reply: %s", err) } errors++ // Since we have a reconnecting client here, disconnect errors are retryable. disconnected := err == io.ErrUnexpectedEOF if retryErr, ok := err.(retry.Retryable); disconnected || (ok && retryErr.CanRetry()) { retryableErrors++ } if remainingNonErrorRPCs := len(replicas) - errors; remainingNonErrorRPCs < 1 { return nil, roachpb.NewSendError( fmt.Sprintf("too many errors encountered (%d of %d total): %v", errors, len(clients), err), remainingNonErrorRPCs+retryableErrors >= 1) } // Send to additional replicas if available. if len(orderedClients) > 0 { log.Trace(opts.Context, "error, trying next peer") sendOneFn(opts, rpcContext, orderedClients[0], done) orderedClients = orderedClients[1:] } } } }
// Send sends one or more method RPCs to clients specified by the slice of // endpoint addrs. Arguments for methods are obtained using the supplied // getArgs function. Reply structs are obtained through the getReply() // function. On success, Send returns the first successful reply. Otherwise, // Send returns an error if and as soon as the number of failed RPCs exceeds // the available endpoints less the number of required replies. func send(opts SendOptions, method string, addrs []net.Addr, getArgs func(addr net.Addr) proto.Message, getReply func() proto.Message, context *rpc.Context) (proto.Message, error) { trace := opts.Trace // not thread safe! if len(addrs) < 1 { return nil, roachpb.NewSendError( fmt.Sprintf("insufficient replicas (%d) to satisfy send request of %d", len(addrs), 1), false) } done := make(chan *netrpc.Call, len(addrs)) var clients []*rpc.Client for _, addr := range addrs { clients = append(clients, rpc.NewClient(addr, context)) } var orderedClients []*rpc.Client switch opts.Ordering { case orderStable: orderedClients = clients case orderRandom: // Randomly permute order, but keep known-unhealthy clients last. var healthy, unhealthy []*rpc.Client for _, client := range clients { select { case <-client.Healthy(): healthy = append(healthy, client) default: unhealthy = append(unhealthy, client) } } for _, idx := range rand.Perm(len(healthy)) { orderedClients = append(orderedClients, healthy[idx]) } for _, idx := range rand.Perm(len(unhealthy)) { orderedClients = append(orderedClients, unhealthy[idx]) } } // TODO(spencer): going to need to also sort by affinity; closest // ping time should win. Makes sense to have the rpc client/server // heartbeat measure ping times. With a bit of seasoning, each // node will be able to order the healthy replicas based on latency. // Send the first request. sendOneFn(orderedClients[0], opts.Timeout, method, getArgs, getReply, context, trace, done) orderedClients = orderedClients[1:] var errors, retryableErrors int // Wait for completions. for { select { case call := <-done: if call.Error == nil { // Verify response data integrity if this is a proto response. if req, reqOk := call.Args.(roachpb.Request); reqOk { if resp, respOk := call.Reply.(roachpb.Response); respOk { if err := resp.Verify(req); err != nil { call.Error = err } } else { call.Error = util.Errorf("response to proto request must be a proto") } } } err := call.Error if err == nil { if log.V(2) { log.Infof("%s: successful reply: %+v", method, call.Reply) } return call.Reply.(proto.Message), nil } // Error handling. if log.V(1) { log.Warningf("%s: error reply: %s", method, err) } errors++ // Since we have a reconnecting client here, disconnect errors are retryable. disconnected := err == netrpc.ErrShutdown || err == io.ErrUnexpectedEOF if retryErr, ok := err.(retry.Retryable); disconnected || (ok && retryErr.CanRetry()) { retryableErrors++ } if remainingNonErrorRPCs := len(addrs) - errors; remainingNonErrorRPCs < 1 { return nil, roachpb.NewSendError( fmt.Sprintf("too many errors encountered (%d of %d total): %v", errors, len(clients), err), remainingNonErrorRPCs+retryableErrors >= 1) } // Send to additional replicas if available. if len(orderedClients) > 0 { trace.Event("error, trying next peer") sendOneFn(orderedClients[0], opts.Timeout, method, getArgs, getReply, context, trace, done) orderedClients = orderedClients[1:] } case <-time.After(opts.SendNextTimeout): // On successive RPC timeouts, send to additional replicas if available. if len(orderedClients) > 0 { trace.Event("timeout, trying next peer") sendOneFn(orderedClients[0], opts.Timeout, method, getArgs, getReply, context, trace, done) orderedClients = orderedClients[1:] } } } }