func maybeDecorateGRPCError(wrapped cmdFn) cmdFn { return func(cmd *cobra.Command, args []string) error { err := wrapped(cmd, args) { unwrappedErr := errors.Cause(err) if unwrappedErr == nil { return err } _, isSendError := unwrappedErr.(*roachpb.SendError) isGRPCError := grpcutil.IsClosedConnection(unwrappedErr) _, isNetError := unwrappedErr.(*net.OpError) if !(isSendError || isGRPCError || isNetError) { return err // intentionally return original to keep wrapping } } format := `unable to connect or connection lost. Please check the address and credentials such as certificates (if attempting to communicate with a secure cluster). %s` return errors.Errorf(format, err) } }
// GRPCDial calls grpc.Dial with the options appropriate for the context. func (ctx *Context) GRPCDial(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) { ctx.conns.Lock() meta, ok := ctx.conns.cache[target] if !ok { meta = &connMeta{} ctx.conns.cache[target] = meta } ctx.conns.Unlock() meta.Do(func() { var dialOpt grpc.DialOption if ctx.Insecure { dialOpt = grpc.WithInsecure() } else { tlsConfig, err := ctx.GetClientTLSConfig() if err != nil { meta.err = err return } dialOpt = grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig)) } dialOpts := make([]grpc.DialOption, 0, 2+len(opts)) dialOpts = append(dialOpts, dialOpt) dialOpts = append(dialOpts, grpc.WithBackoffMaxDelay(maxBackoff)) dialOpts = append(dialOpts, opts...) if log.V(1) { log.Infof(ctx.masterCtx, "dialing %s", target) } meta.conn, meta.err = grpc.DialContext(ctx.masterCtx, target, dialOpts...) if meta.err == nil { if err := ctx.Stopper.RunTask(func() { ctx.Stopper.RunWorker(func() { err := ctx.runHeartbeat(meta.conn, target) if err != nil && !grpcutil.IsClosedConnection(err) { log.Error(ctx.masterCtx, err) } ctx.removeConn(target, meta) }) }); err != nil { meta.err = err // removeConn and ctx's cleanup worker both lock ctx.conns. However, // to avoid racing with meta's initialization, the cleanup worker // blocks on meta.Do while holding ctx.conns. Invoke removeConn // asynchronously to avoid deadlock. go ctx.removeConn(target, meta) } } }) return meta.conn, meta.err }
func (ctx *Context) removeConnLocked(key string, meta *connMeta) { if log.V(1) { log.Infof(ctx.masterCtx, "closing %s", key) } if conn := meta.conn; conn != nil { if err := conn.Close(); err != nil && !grpcutil.IsClosedConnection(err) { if log.V(1) { log.Errorf(ctx.masterCtx, "failed to close client connection: %s", err) } } } delete(ctx.conns.cache, key) }
// start dials the remote addr and commences gossip once connected. Upon exit, // the client is sent on the disconnected channel. This method starts client // processing in a goroutine and returns immediately. func (c *client) start( g *Gossip, disconnected chan *client, rpcCtx *rpc.Context, stopper *stop.Stopper, nodeID roachpb.NodeID, breaker *circuit.Breaker, ) { stopper.RunWorker(func() { ctx, cancel := context.WithCancel(c.AnnotateCtx(context.Background())) var wg sync.WaitGroup defer func() { // This closes the outgoing stream, causing any attempt to send or // receive to return an error. // // Note: it is still possible for incoming gossip to be processed after // this point. cancel() // The stream is closed, but there may still be some incoming gossip // being processed. Wait until that is complete to avoid racing the // client's removal against the discovery of its remote's node ID. wg.Wait() disconnected <- c }() consecFailures := breaker.ConsecFailures() var stream Gossip_GossipClient if err := breaker.Call(func() error { // Note: avoid using `grpc.WithBlock` here. This code is already // asynchronous from the caller's perspective, so the only effect of // `WithBlock` here is blocking shutdown - at the time of this writing, // that ends ups up making `kv` tests take twice as long. conn, err := rpcCtx.GRPCDial(c.addr.String()) if err != nil { return err } if stream, err = NewGossipClient(conn).Gossip(ctx); err != nil { return err } return c.requestGossip(g, stream) }, 0); err != nil { if consecFailures == 0 { log.Warningf(ctx, "node %d: failed to start gossip client: %s", nodeID, err) } return } // Start gossiping. log.Infof(ctx, "node %d: started gossip client to %s", nodeID, c.addr) if err := c.gossip(ctx, g, stream, stopper, &wg); err != nil { if !grpcutil.IsClosedConnection(err) { g.mu.Lock() if c.peerID != 0 { log.Infof(ctx, "node %d: closing client to node %d (%s): %s", nodeID, c.peerID, c.addr, err) } else { log.Infof(ctx, "node %d: closing client to %s: %s", nodeID, c.addr, err) } g.mu.Unlock() } } }) }