Exemplo n.º 1
0
func maybeDecorateGRPCError(wrapped cmdFn) cmdFn {
	return func(cmd *cobra.Command, args []string) error {
		err := wrapped(cmd, args)

		{
			unwrappedErr := errors.Cause(err)
			if unwrappedErr == nil {
				return err
			}
			_, isSendError := unwrappedErr.(*roachpb.SendError)
			isGRPCError := grpcutil.IsClosedConnection(unwrappedErr)
			_, isNetError := unwrappedErr.(*net.OpError)
			if !(isSendError || isGRPCError || isNetError) {
				return err // intentionally return original to keep wrapping
			}
		}

		format := `unable to connect or connection lost.

Please check the address and credentials such as certificates (if attempting to
communicate with a secure cluster).

%s`

		return errors.Errorf(format, err)
	}
}
Exemplo n.º 2
0
// GRPCDial calls grpc.Dial with the options appropriate for the context.
func (ctx *Context) GRPCDial(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) {
	ctx.conns.Lock()
	meta, ok := ctx.conns.cache[target]
	if !ok {
		meta = &connMeta{}
		ctx.conns.cache[target] = meta
	}
	ctx.conns.Unlock()

	meta.Do(func() {
		var dialOpt grpc.DialOption
		if ctx.Insecure {
			dialOpt = grpc.WithInsecure()
		} else {
			tlsConfig, err := ctx.GetClientTLSConfig()
			if err != nil {
				meta.err = err
				return
			}
			dialOpt = grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))
		}

		dialOpts := make([]grpc.DialOption, 0, 2+len(opts))
		dialOpts = append(dialOpts, dialOpt)
		dialOpts = append(dialOpts, grpc.WithBackoffMaxDelay(maxBackoff))
		dialOpts = append(dialOpts, opts...)

		if log.V(1) {
			log.Infof(ctx.masterCtx, "dialing %s", target)
		}
		meta.conn, meta.err = grpc.DialContext(ctx.masterCtx, target, dialOpts...)
		if meta.err == nil {
			if err := ctx.Stopper.RunTask(func() {
				ctx.Stopper.RunWorker(func() {
					err := ctx.runHeartbeat(meta.conn, target)
					if err != nil && !grpcutil.IsClosedConnection(err) {
						log.Error(ctx.masterCtx, err)
					}
					ctx.removeConn(target, meta)
				})
			}); err != nil {
				meta.err = err
				// removeConn and ctx's cleanup worker both lock ctx.conns. However,
				// to avoid racing with meta's initialization, the cleanup worker
				// blocks on meta.Do while holding ctx.conns. Invoke removeConn
				// asynchronously to avoid deadlock.
				go ctx.removeConn(target, meta)
			}
		}
	})

	return meta.conn, meta.err
}
Exemplo n.º 3
0
func (ctx *Context) removeConnLocked(key string, meta *connMeta) {
	if log.V(1) {
		log.Infof(ctx.masterCtx, "closing %s", key)
	}
	if conn := meta.conn; conn != nil {
		if err := conn.Close(); err != nil && !grpcutil.IsClosedConnection(err) {
			if log.V(1) {
				log.Errorf(ctx.masterCtx, "failed to close client connection: %s", err)
			}
		}
	}
	delete(ctx.conns.cache, key)
}
Exemplo n.º 4
0
// start dials the remote addr and commences gossip once connected. Upon exit,
// the client is sent on the disconnected channel. This method starts client
// processing in a goroutine and returns immediately.
func (c *client) start(
	g *Gossip,
	disconnected chan *client,
	rpcCtx *rpc.Context,
	stopper *stop.Stopper,
	nodeID roachpb.NodeID,
	breaker *circuit.Breaker,
) {
	stopper.RunWorker(func() {
		ctx, cancel := context.WithCancel(c.AnnotateCtx(context.Background()))
		var wg sync.WaitGroup
		defer func() {
			// This closes the outgoing stream, causing any attempt to send or
			// receive to return an error.
			//
			// Note: it is still possible for incoming gossip to be processed after
			// this point.
			cancel()

			// The stream is closed, but there may still be some incoming gossip
			// being processed. Wait until that is complete to avoid racing the
			// client's removal against the discovery of its remote's node ID.
			wg.Wait()
			disconnected <- c
		}()

		consecFailures := breaker.ConsecFailures()
		var stream Gossip_GossipClient
		if err := breaker.Call(func() error {
			// Note: avoid using `grpc.WithBlock` here. This code is already
			// asynchronous from the caller's perspective, so the only effect of
			// `WithBlock` here is blocking shutdown - at the time of this writing,
			// that ends ups up making `kv` tests take twice as long.
			conn, err := rpcCtx.GRPCDial(c.addr.String())
			if err != nil {
				return err
			}
			if stream, err = NewGossipClient(conn).Gossip(ctx); err != nil {
				return err
			}
			return c.requestGossip(g, stream)
		}, 0); err != nil {
			if consecFailures == 0 {
				log.Warningf(ctx, "node %d: failed to start gossip client: %s", nodeID, err)
			}
			return
		}

		// Start gossiping.
		log.Infof(ctx, "node %d: started gossip client to %s", nodeID, c.addr)
		if err := c.gossip(ctx, g, stream, stopper, &wg); err != nil {
			if !grpcutil.IsClosedConnection(err) {
				g.mu.Lock()
				if c.peerID != 0 {
					log.Infof(ctx, "node %d: closing client to node %d (%s): %s", nodeID, c.peerID, c.addr, err)
				} else {
					log.Infof(ctx, "node %d: closing client to %s: %s", nodeID, c.addr, err)
				}
				g.mu.Unlock()
			}
		}
	})
}