func (db *DistDB) lookupMetadata(metadataKey storage.Key, replicas []storage.Replica) (*storage.RangeLocations, error) { replica := storage.ChooseRandomReplica(replicas) if replica == nil { return nil, util.Errorf("No replica to choose for metadata key: %q", metadataKey) } addr, err := db.nodeIDToAddr(replica.NodeID) if err != nil { // TODO(harshit): May be retry a different replica. return nil, err } client := rpc.NewClient(addr) arg := &storage.InternalRangeLookupRequest{ RequestHeader: storage.RequestHeader{ Replica: *replica, }, Key: metadataKey, } var reply storage.InternalRangeLookupResponse err = client.Call("Node.InternalRangeLookup", arg, &reply) if err != nil { return nil, err } if reply.Error != nil { return nil, reply.Error } return &reply.Locations, nil }
// start dials the remote addr and commences gossip once connected. // Upon exit, signals client is done by pushing it onto the done // channel. If the client experienced an error, its err field will // be set. This method starts client processing in a goroutine and // returns immediately. func (c *client) start(g *Gossip, done chan *client, context *rpc.Context, stopper *stop.Stopper) { stopper.RunWorker(func() { var err error c.rpcClient = rpc.NewClient(c.addr, context) select { case <-c.rpcClient.Healthy(): // Start gossiping and wait for disconnect or error. err = c.gossip(g, stopper) if context.DisableCache { c.rpcClient.Close() } case <-c.rpcClient.Closed: err = util.Errorf("client closed") } done <- c if err != nil { if c.peerID != 0 { log.Infof("closing client to node %d (%s): %s", c.peerID, c.addr, err) } else { log.Infof("closing client to %s: %s", c.addr, err) } } }) }
// TestClientNotReady verifies that Send gets an RPC error when a client // does not become ready. func TestClientNotReady(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() defer stopper.Stop() nodeContext := newNodeTestContext(nil, stopper) // Construct a server that listens but doesn't do anything. s, ln := newTestServer(t, nodeContext, true) if err := s.RegisterPublic("Heartbeat.Ping", (&Heartbeat{}).Ping, &rpc.PingRequest{}); err != nil { t.Fatal(err) } opts := SendOptions{ Ordering: orderStable, SendNextTimeout: 100 * time.Nanosecond, Timeout: 100 * time.Nanosecond, } // Send RPC to an address where no server is running. if _, err := sendPing(opts, []net.Addr{ln.Addr()}, nodeContext); err != nil { retryErr, ok := err.(retry.Retryable) if !ok { t.Fatalf("Unexpected error type: %v", err) } if !retryErr.CanRetry() { t.Errorf("Expected retryable error: %v", retryErr) } } else { t.Fatalf("Unexpected success") } // Send the RPC again with no timeout. opts.SendNextTimeout = 0 opts.Timeout = 0 c := make(chan error) go func() { if _, err := sendPing(opts, []net.Addr{ln.Addr()}, nodeContext); err == nil { c <- util.Errorf("expected error when client is closed") } else if !strings.Contains(err.Error(), "failed as client connection was closed") { c <- err } close(c) }() select { case <-c: t.Fatalf("Unexpected end of rpc call") case <-time.After(1 * time.Millisecond): } // Grab the client for our invalid address and close it. This will cause the // blocked ping RPC to finish. rpc.NewClient(ln.Addr(), nodeContext).Close() if err := <-c; err != nil { t.Fatal(err) } }
// TestRetryableError verifies that Send returns a retryable error // when it hits an RPC error. func TestRetryableError(t *testing.T) { defer leaktest.AfterTest(t)() clientStopper := stop.NewStopper() defer clientStopper.Stop() clientContext := newNodeTestContext(nil, clientStopper) clientContext.HeartbeatTimeout = 10 * clientContext.HeartbeatInterval serverStopper := stop.NewStopper() serverContext := newNodeTestContext(nil, serverStopper) s, ln := newTestServer(t, serverContext) registerBatch(t, s, 0) c := rpc.NewClient(ln.Addr(), clientContext) // Wait until the client becomes healthy and shut down the server. <-c.Healthy() serverStopper.Stop() // Wait until the client becomes unhealthy. func() { for r := retry.Start(retry.Options{}); r.Next(); { select { case <-c.Healthy(): case <-time.After(1 * time.Nanosecond): return } } }() sp := tracing.NewTracer().StartSpan("node test") defer sp.Finish() opts := SendOptions{ Ordering: orderStable, SendNextTimeout: 100 * time.Millisecond, Timeout: 100 * time.Millisecond, Trace: sp, } if _, err := sendBatch(opts, []net.Addr{ln.Addr()}, clientContext); err != nil { retryErr, ok := err.(retry.Retryable) if !ok { t.Fatalf("Unexpected error type: %v", err) } if !retryErr.CanRetry() { t.Errorf("Expected retryable error: %v", retryErr) } } else { t.Fatalf("Unexpected success") } }
// newSender returns a new instance of Sender. func newSender(server string, context *base.Context, retryOpts retry.Options) (*Sender, error) { addr, err := net.ResolveTCPAddr("tcp", server) if err != nil { return nil, err } if context.Insecure { log.Warning("running in insecure mode, this is strongly discouraged. See --insecure and --certs.") } ctx := roachrpc.NewContext(context, hlc.NewClock(hlc.UnixNano), nil) client := roachrpc.NewClient(addr, &retryOpts, ctx) return &Sender{ client: client, retryOpts: retryOpts, }, nil }
// getNode gets an RPC client to the node where the requested // key is located. The range cache may be updated. The bi-level range // metadata for the cluster is consulted in the event that the local // cache doesn't contain range metadata corresponding to the specified // key. func (db *DistDB) getNode(key storage.Key) (*rpc.Client, *storage.Replica, error) { meta2Val, err := db.lookupMeta2(key) if err != nil { return nil, nil, err } replica := storage.ChooseRandomReplica(meta2Val.Replicas) if replica == nil { return nil, nil, util.Errorf("No node found for key: %q", key) } addr, err := db.nodeIDToAddr(replica.NodeID) if err != nil { // TODO(harshit): May be retry a different replica. return nil, nil, err } return rpc.NewClient(addr), replica, nil }
// start dials the remote addr and commences gossip once connected. // Upon exit, signals client is done by pushing it onto the done // channel. If the client experienced an error, its err field will // be set. This method blocks and should be invoked via goroutine. func (c *client) start(g *Gossip, done chan *client) { c.rpcClient = rpc.NewClient(c.addr) select { case <-c.rpcClient.Ready: // Start gossip; see below. case <-time.After(gossipDialTimeout): c.err = util.Errorf("timeout connecting to remote server: %v", c.addr) done <- c return } // Start gossipping and wait for disconnect or error. c.lastFresh = time.Now().UnixNano() err := c.gossip(g) if err != nil { c.err = util.Errorf("gossip client: %s", err) } done <- c }
// start dials the remote addr and commences gossip once connected. // Upon exit, signals client is done by pushing it onto the done // channel. If the client experienced an error, its err field will // be set. This method blocks and should be invoked via goroutine. func (c *client) start(g *Gossip, done chan *client) { c.rpcClient = rpc.NewClient(c.addr, nil) select { case <-c.rpcClient.Ready: // Success! case <-c.rpcClient.Closed: c.err = util.Errorf("gossip client failed to connect") done <- c return } // Start gossipping and wait for disconnect or error. c.lastFresh = time.Now().UnixNano() err := c.gossip(g) if err != nil { c.err = util.Errorf("gossip client: %s", err) } done <- c }
// TestRetryableError verifies that Send returns a retryable error // when it hits an RPC error. func TestRetryableError(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() defer stopper.Stop() nodeContext := newNodeTestContext(nil, stopper) nodeContext.HeartbeatTimeout = 10 * nodeContext.HeartbeatInterval _, ln := newTestServer(t, nodeContext, false) c := rpc.NewClient(ln.Addr(), nodeContext) // Wait until the client becomes healthy and shut down the server. <-c.Healthy() ln.Close() // Wait until the client becomes unhealthy. func() { for r := retry.Start(retry.Options{}); r.Next(); { select { case <-c.Healthy(): case <-time.After(1 * time.Nanosecond): return } } }() opts := SendOptions{ Ordering: orderStable, SendNextTimeout: 100 * time.Millisecond, Timeout: 100 * time.Millisecond, } if _, err := sendPing(opts, []net.Addr{ln.Addr()}, nodeContext); err != nil { retryErr, ok := err.(retry.Retryable) if !ok { t.Fatalf("Unexpected error type: %v", err) } if !retryErr.CanRetry() { t.Errorf("Expected retryable error: %v", retryErr) } } else { t.Fatalf("Unexpected success") } }
// start dials the remote addr and commences gossip once connected. // Upon exit, signals client is done by pushing it onto the done // channel. If the client experienced an error, its err field will // be set. This method starts client processing in a goroutine and // returns immediately. func (c *client) start(g *Gossip, done chan *client, context *rpc.Context, stopper *stop.Stopper) { stopper.RunWorker(func() { c.rpcClient = rpc.NewClient(c.addr, context) select { case <-c.rpcClient.Healthy(): // Success! case <-c.rpcClient.Closed: c.err = util.Errorf("gossip client was closed") done <- c return } // Start gossipping and wait for disconnect or error. c.lastFresh = time.Now().UnixNano() c.err = c.gossip(g, stopper) if context.DisableCache { c.rpcClient.Close() } done <- c }) }
// newRPCSender returns a new instance of rpcSender. func newRPCSender(server string, context *base.Context, retryOpts retry.Options, stopper *stop.Stopper) (*rpcSender, error) { addr, err := net.ResolveTCPAddr("tcp", server) if err != nil { return nil, err } if context.Insecure { log.Warning("running in insecure mode, this is strongly discouraged. See --insecure and --certs.") } else { if _, err := context.GetClientTLSConfig(); err != nil { return nil, err } } ctx := rpc.NewContext(context, hlc.NewClock(hlc.UnixNano), stopper) client := rpc.NewClient(addr, ctx) return &rpcSender{ client: client, retryOpts: retryOpts, }, nil }
// start dials the remote addr and commences gossip once connected. // Upon exit, signals client is done by pushing it onto the done // channel. If the client experienced an error, its err field will // be set. This method starts client processing in a goroutine and // returns immediately. func (c *client) start(g *Gossip, done chan *client, context *rpc.Context, stopper *util.Stopper) { stopper.RunWorker(func() { c.rpcClient = rpc.NewClient(c.addr, nil, context) select { case <-c.rpcClient.Ready: // Success! case <-c.rpcClient.Closed: c.err = util.Errorf("gossip client failed to connect") done <- c return } // Start gossipping and wait for disconnect or error. c.lastFresh = time.Now().UnixNano() c.err = c.gossip(g, stopper) if c.err != nil { c.rpcClient.Close() } done <- c }) }
// start dials the remote addr and commences gossip once connected. // Upon exit, signals client is done by pushing it onto the done // channel. If the client experienced an error, its err field will // be set. This method starts client processing in a goroutine and // returns immediately. func (c *client) start(g *Gossip, done chan *client, context *rpc.Context, stopper *stop.Stopper) { stopper.RunWorker(func() { var err error c.rpcClient = rpc.NewClient(c.addr, context) select { case <-c.rpcClient.Healthy(): // Start gossiping and wait for disconnect or error. c.lastFresh = time.Now().UnixNano() err = c.gossip(g, stopper) if context.DisableCache { c.rpcClient.Close() } case <-c.rpcClient.Closed: err = util.Errorf("client closed") } done <- c if err != nil { log.Infof("gossip client to %s: %s", c.addr, err) } }) }
// Send sends one or more method RPCs to clients specified by the slice of // endpoint addrs. Arguments for methods are obtained using the supplied // getArgs function. Reply structs are obtained through the getReply() // function. On success, Send returns the first successful reply. Otherwise, // Send returns an error if and as soon as the number of failed RPCs exceeds // the available endpoints less the number of required replies. func send(opts SendOptions, method string, addrs []net.Addr, getArgs func(addr net.Addr) proto.Message, getReply func() proto.Message, context *rpc.Context) (proto.Message, error) { trace := opts.Trace // not thread safe! if len(addrs) < 1 { return nil, roachpb.NewSendError( fmt.Sprintf("insufficient replicas (%d) to satisfy send request of %d", len(addrs), 1), false) } done := make(chan *netrpc.Call, len(addrs)) var clients []*rpc.Client for _, addr := range addrs { clients = append(clients, rpc.NewClient(addr, context)) } var orderedClients []*rpc.Client switch opts.Ordering { case orderStable: orderedClients = clients case orderRandom: // Randomly permute order, but keep known-unhealthy clients last. var healthy, unhealthy []*rpc.Client for _, client := range clients { select { case <-client.Healthy(): healthy = append(healthy, client) default: unhealthy = append(unhealthy, client) } } for _, idx := range rand.Perm(len(healthy)) { orderedClients = append(orderedClients, healthy[idx]) } for _, idx := range rand.Perm(len(unhealthy)) { orderedClients = append(orderedClients, unhealthy[idx]) } } // TODO(spencer): going to need to also sort by affinity; closest // ping time should win. Makes sense to have the rpc client/server // heartbeat measure ping times. With a bit of seasoning, each // node will be able to order the healthy replicas based on latency. // Send the first request. sendOneFn(orderedClients[0], opts.Timeout, method, getArgs, getReply, context, trace, done) orderedClients = orderedClients[1:] var errors, retryableErrors int // Wait for completions. for { select { case call := <-done: if call.Error == nil { // Verify response data integrity if this is a proto response. if req, reqOk := call.Args.(roachpb.Request); reqOk { if resp, respOk := call.Reply.(roachpb.Response); respOk { if err := resp.Verify(req); err != nil { call.Error = err } } else { call.Error = util.Errorf("response to proto request must be a proto") } } } err := call.Error if err == nil { if log.V(2) { log.Infof("%s: successful reply: %+v", method, call.Reply) } return call.Reply.(proto.Message), nil } // Error handling. if log.V(1) { log.Warningf("%s: error reply: %s", method, err) } errors++ // Since we have a reconnecting client here, disconnect errors are retryable. disconnected := err == netrpc.ErrShutdown || err == io.ErrUnexpectedEOF if retryErr, ok := err.(retry.Retryable); disconnected || (ok && retryErr.CanRetry()) { retryableErrors++ } if remainingNonErrorRPCs := len(addrs) - errors; remainingNonErrorRPCs < 1 { return nil, roachpb.NewSendError( fmt.Sprintf("too many errors encountered (%d of %d total): %v", errors, len(clients), err), remainingNonErrorRPCs+retryableErrors >= 1) } // Send to additional replicas if available. if len(orderedClients) > 0 { trace.Event("error, trying next peer") sendOneFn(orderedClients[0], opts.Timeout, method, getArgs, getReply, context, trace, done) orderedClients = orderedClients[1:] } case <-time.After(opts.SendNextTimeout): // On successive RPC timeouts, send to additional replicas if available. if len(orderedClients) > 0 { trace.Event("timeout, trying next peer") sendOneFn(orderedClients[0], opts.Timeout, method, getArgs, getReply, context, trace, done) orderedClients = orderedClients[1:] } } } }
// processQueue creates a client and sends messages from its designated queue // via that client, exiting when the client fails or when it idles out. All // messages remaining in the queue at that point are lost and a new instance of // processQueue should be started by the next message to be sent. // TODO(tschottdorf) should let MultiRaft know if the node is down; // need a feedback mechanism for that. Potentially easiest is to arrange for // the next call to Send() to fail appropriately. func (t *rpcTransport) processQueue(raftNodeID proto.RaftNodeID) { t.mu.Lock() ch, ok := t.queues[raftNodeID] t.mu.Unlock() if !ok { return } // Clean-up when the loop below shuts down. defer func() { t.mu.Lock() delete(t.queues, raftNodeID) t.mu.Unlock() }() nodeID, _ := proto.DecodeRaftNodeID(raftNodeID) addr, err := t.gossip.GetNodeIDAddress(nodeID) if err != nil { log.Errorf("could not get address for node %d: %s", nodeID, err) return } client := rpc.NewClient(addr, nil, t.rpcContext) select { case <-t.rpcContext.Stopper.ShouldStop(): return case <-client.Closed: log.Warningf("raft client for node %d failed to connect", nodeID) return case <-time.After(raftIdleTimeout): // Should never happen. log.Errorf("raft client for node %d stuck connecting", nodeID) return case <-client.Ready: } done := make(chan *gorpc.Call, cap(ch)) var req *multiraft.RaftMessageRequest protoReq := &proto.RaftMessageRequest{} protoResp := &proto.RaftMessageResponse{} for { select { case <-t.rpcContext.Stopper.ShouldStop(): return case <-time.After(raftIdleTimeout): if log.V(1) { log.Infof("closing Raft transport to %d due to inactivity", nodeID) } return case <-client.Closed: log.Warningf("raft client for node %d closed", nodeID) return case call := <-done: if call.Error != nil { log.Errorf("raft message to node %d failed: %s", nodeID, call.Error) } continue case req = <-ch: } if req == nil { return } // Convert to proto format. protoReq.Reset() protoReq.GroupID = req.GroupID var err error if protoReq.Msg, err = req.Message.Marshal(); err != nil { log.Errorf("could not marshal message: %s", err) continue } if !client.IsHealthy() { log.Warningf("raft client for node %d unhealthy", nodeID) return } client.Go(raftMessageName, protoReq, protoResp, done) // TODO(tschottdorf): work around #1176 by wasting just a little // bit of time before moving to the next request. select { case <-done: case <-time.After(10 * time.Millisecond): } } }
// processQueue creates a client and sends messages from its designated queue // via that client, exiting when the client fails or when it idles out. All // messages remaining in the queue at that point are lost and a new instance of // processQueue should be started by the next message to be sent. // TODO(tschottdorf) should let MultiRaft know if the node is down; // need a feedback mechanism for that. Potentially easiest is to arrange for // the next call to Send() to fail appropriately. func (t *rpcTransport) processQueue(nodeID roachpb.NodeID, storeID roachpb.StoreID) { t.mu.Lock() ch, ok := t.queues[storeID] t.mu.Unlock() if !ok { return } // Clean-up when the loop below shuts down. defer func() { t.mu.Lock() delete(t.queues, storeID) t.mu.Unlock() }() addr, err := t.gossip.GetNodeIDAddress(nodeID) if err != nil { if log.V(1) { log.Errorf("could not get address for node %d: %s", nodeID, err) } return } client := rpc.NewClient(addr, t.rpcContext) select { case <-t.rpcContext.Stopper.ShouldStop(): return case <-client.Closed: log.Warningf("raft client for node %d was closed", nodeID) return case <-time.After(raftIdleTimeout): // Should never happen. log.Errorf("raft client for node %d stuck connecting", nodeID) return case <-client.Healthy(): } done := make(chan *gorpc.Call, cap(ch)) var req *multiraft.RaftMessageRequest protoResp := &multiraft.RaftMessageResponse{} for { select { case <-t.rpcContext.Stopper.ShouldStop(): return case <-time.After(raftIdleTimeout): if log.V(1) { log.Infof("closing Raft transport to %d due to inactivity", nodeID) } return case <-client.Closed: log.Warningf("raft client for node %d closed", nodeID) return case call := <-done: if call.Error != nil { log.Errorf("raft message to node %d failed: %s", nodeID, call.Error) } continue case req = <-ch: } if req == nil { return } client.Go(raftMessageName, req, protoResp, done) } }
// Send sends one or more RPCs to clients specified by the slice of // replicas. On success, Send returns the first successful reply. Otherwise, // Send returns an error if and as soon as the number of failed RPCs exceeds // the available endpoints less the number of required replies. // // TODO(pmattis): Get rid of the getArgs function which requires the caller to // maintain a map from address to replica. Instead, pass in the list of // replicas instead of a list of addresses and use that to populate the // requests. func send(opts SendOptions, replicas ReplicaSlice, args roachpb.BatchRequest, context *rpc.Context) (proto.Message, error) { sp := opts.Trace if sp == nil { sp = tracing.NilSpan() } if len(replicas) < 1 { return nil, roachpb.NewSendError( fmt.Sprintf("insufficient replicas (%d) to satisfy send request of %d", len(replicas), 1), false) } done := make(chan *netrpc.Call, len(replicas)) clients := make([]batchClient, 0, len(replicas)) for i, replica := range replicas { clients = append(clients, batchClient{ Client: rpc.NewClient(&replica.NodeDesc.Address, context), replica: &replicas[i], args: args, }) } var orderedClients []batchClient switch opts.Ordering { case orderStable: orderedClients = clients case orderRandom: // Randomly permute order, but keep known-unhealthy clients last. var nHealthy int for i, client := range clients { select { case <-client.Healthy(): clients[i], clients[nHealthy] = clients[nHealthy], clients[i] nHealthy++ default: } } shuffleClients(clients[:nHealthy]) shuffleClients(clients[nHealthy:]) orderedClients = clients } // TODO(spencer): going to need to also sort by affinity; closest // ping time should win. Makes sense to have the rpc client/server // heartbeat measure ping times. With a bit of seasoning, each // node will be able to order the healthy replicas based on latency. // Send the first request. sendOneFn(&orderedClients[0], opts.Timeout, context, sp, done) orderedClients = orderedClients[1:] var errors, retryableErrors int // Wait for completions. var sendNextTimer util.Timer defer sendNextTimer.Stop() for { sendNextTimer.Reset(opts.SendNextTimeout) select { case <-sendNextTimer.C: sendNextTimer.Read = true // On successive RPC timeouts, send to additional replicas if available. if len(orderedClients) > 0 { sp.LogEvent("timeout, trying next peer") sendOneFn(&orderedClients[0], opts.Timeout, context, sp, done) orderedClients = orderedClients[1:] } case call := <-done: if call.Error == nil { // Verify response data integrity if this is a proto response. if req, reqOk := call.Args.(roachpb.Request); reqOk { if resp, respOk := call.Reply.(roachpb.Response); respOk { if err := resp.Verify(req); err != nil { call.Error = err } } else { call.Error = util.Errorf("response to proto request must be a proto") } } } err := call.Error if err == nil { if log.V(2) { log.Infof("successful reply: %+v", call.Reply) } return call.Reply.(proto.Message), nil } // Error handling. if log.V(1) { log.Warningf("error reply: %s", err) } errors++ // Since we have a reconnecting client here, disconnect errors are retryable. disconnected := err == netrpc.ErrShutdown || err == io.ErrUnexpectedEOF if retryErr, ok := err.(retry.Retryable); disconnected || (ok && retryErr.CanRetry()) { retryableErrors++ } if remainingNonErrorRPCs := len(replicas) - errors; remainingNonErrorRPCs < 1 { return nil, roachpb.NewSendError( fmt.Sprintf("too many errors encountered (%d of %d total): %v", errors, len(clients), err), remainingNonErrorRPCs+retryableErrors >= 1) } // Send to additional replicas if available. if len(orderedClients) > 0 { sp.LogEvent("error, trying next peer") sendOneFn(&orderedClients[0], opts.Timeout, context, sp, done) orderedClients = orderedClients[1:] } } } }