Exemple #1
0
// grpcTransportFactory is the default TransportFactory, using GRPC.
func grpcTransportFactory(
	opts SendOptions,
	rpcContext *rpc.Context,
	replicas ReplicaSlice,
	args roachpb.BatchRequest,
) (Transport, error) {
	clients := make([]batchClient, 0, len(replicas))
	for _, replica := range replicas {
		conn, err := rpcContext.GRPCDial(replica.NodeDesc.Address.String())
		if err != nil {
			return nil, err
		}
		argsCopy := args
		argsCopy.Replica = replica.ReplicaDescriptor
		remoteAddr := replica.NodeDesc.Address.String()
		clients = append(clients, batchClient{
			remoteAddr: remoteAddr,
			conn:       conn,
			client:     roachpb.NewInternalClient(conn),
			args:       argsCopy,
			healthy:    rpcContext.IsConnHealthy(remoteAddr),
		})
	}

	// Put known-unhealthy clients last.
	splitHealthy(clients)

	return &grpcTransport{
		opts:           opts,
		rpcContext:     rpcContext,
		orderedClients: clients,
	}, nil
}
Exemple #2
0
// reserve send a reservation request rpc to the node and store
// based on the toStoreID. It returns an error if the reservation was not
// successfully booked. When unsuccessful, the store is marked as having a
// declined reservation so it will not be considered for up-replication or
// rebalancing until after the configured timeout period has passed.
// TODO(bram): consider moving the nodeID to the store pool during
// NewStorePool.
func (sp *StorePool) reserve(
	curIdent roachpb.StoreIdent,
	toStoreID roachpb.StoreID,
	rangeID roachpb.RangeID,
	rangeSize int64,
) error {
	if !sp.reservationsEnabled {
		return nil
	}
	sp.mu.Lock()
	defer sp.mu.Unlock()
	detail, ok := sp.mu.stores[toStoreID]
	if !ok {
		return fmt.Errorf("store does not exist in the store pool")
	}
	conn, err := sp.rpcContext.GRPCDial(detail.desc.Node.Address.String())
	if err != nil {
		return err
	}

	client := roachpb.NewInternalClient(conn)
	req := &roachpb.ReservationRequest{
		StoreRequestHeader: roachpb.StoreRequestHeader{
			NodeID:  detail.desc.Node.NodeID,
			StoreID: toStoreID,
		},
		FromNodeID:  curIdent.NodeID,
		FromStoreID: curIdent.StoreID,
		RangeSize:   rangeSize,
		RangeID:     rangeID,
	}

	if log.V(2) {
		log.Infof("proposing new reservation:%+v", req)
	}

	ctxWithTimeout, cancel := context.WithTimeout(context.TODO(), sp.reserveRPCTimeout)
	defer cancel()
	resp, err := client.Reserve(ctxWithTimeout, req)

	// If a reservation is declined, be it due to an error or because it was
	// rejected, we mark the store detail as having been rejected so it won't
	// be considered as a candidate for new replicas until after the configured
	// timeout period has passed.
	if err != nil {
		detail.unavailableUntil = sp.clock.Now().GoTime().Add(sp.failedReservationsTimeout)
		if log.V(2) {
			log.Infof("reservation failed, store:%s will be unavailable for %s until %s",
				toStoreID, sp.failedReservationsTimeout, detail.unavailableUntil)
		}
		return fmt.Errorf("reservation failed:%+v due to error:%s", req, err)
	}
	if !resp.Reserved {
		detail.unavailableUntil = sp.clock.Now().GoTime().Add(sp.declinedReservationsTimeout)
		if log.V(2) {
			log.Infof("reservation failed, store:%s will be unavailable for %s until %s",
				toStoreID, sp.declinedReservationsTimeout, detail.unavailableUntil)
		}
		return fmt.Errorf("reservation declined:%+v", req)
	}

	if log.V(2) {
		log.Infof("reservation was approved:%+v", req)
	}
	return nil
}
Exemple #3
0
// waitForStoreFrozen polls the given stores until they all report having no
// unfrozen Replicas (or an error or timeout occurs).
func (s *adminServer) waitForStoreFrozen(
	stream serverpb.Admin_ClusterFreezeServer,
	stores map[roachpb.StoreID]roachpb.NodeID,
	wantFrozen bool,
) error {
	mu := struct {
		sync.Mutex
		oks map[roachpb.StoreID]bool
	}{
		oks: make(map[roachpb.StoreID]bool),
	}

	opts := base.DefaultRetryOptions()
	opts.Closer = s.server.stopper.ShouldDrain()
	opts.MaxRetries = 20
	sem := make(chan struct{}, 256)
	errChan := make(chan error, 1)
	sendErr := func(err error) {
		select {
		case errChan <- err:
		default:
		}
	}

	numWaiting := len(stores) // loop until this drops to zero
	var err error
	for r := retry.Start(opts); r.Next(); {
		mu.Lock()
		for storeID, nodeID := range stores {
			storeID, nodeID := storeID, nodeID // loop-local copies for goroutine
			var nodeDesc roachpb.NodeDescriptor
			if err := s.server.gossip.GetInfoProto(gossip.MakeNodeIDKey(nodeID), &nodeDesc); err != nil {
				sendErr(err)
				break
			}
			addr := nodeDesc.Address.String()

			if _, inflightOrSucceeded := mu.oks[storeID]; inflightOrSucceeded {
				continue
			}
			mu.oks[storeID] = false // mark as inflight
			action := func() (err error) {
				var resp *roachpb.PollFrozenResponse
				defer func() {
					message := fmt.Sprintf("node %d, store %d: ", nodeID, storeID)

					if err != nil {
						message += err.Error()
					} else {
						numMismatching := len(resp.Results)
						mu.Lock()
						if numMismatching == 0 {
							// If the Store is in the right state, mark it as such.
							// This means we won't try it again.
							message += "ready"
							mu.oks[storeID] = true
						} else {
							// Otherwise, forget that we tried the Store so that
							// the retry loop picks it up again.
							message += fmt.Sprintf("%d replicas report wrong status", numMismatching)
							if limit := 10; numMismatching > limit {
								message += " [truncated]: "
								resp.Results = resp.Results[:limit]
							} else {
								message += ": "
							}
							message += fmt.Sprintf("%+v", resp.Results)
							delete(mu.oks, storeID)
						}
						mu.Unlock()
						err = stream.Send(&serverpb.ClusterFreezeResponse{
							Message: message,
						})
					}
				}()
				conn, err := s.server.rpcContext.GRPCDial(addr)
				if err != nil {
					return err
				}
				client := roachpb.NewInternalClient(conn)
				resp, err = client.PollFrozen(context.Background(),
					&roachpb.PollFrozenRequest{
						StoreRequestHeader: roachpb.StoreRequestHeader{
							NodeID:  nodeID,
							StoreID: storeID,
						},
						// If we are looking to freeze everything, we want to
						// collect thawed Replicas, and vice versa.
						CollectFrozen: !wantFrozen,
					})
				return err
			}
			// Run a limited, non-blocking task. That means the task simply
			// won't run if the semaphore is full (or the node is draining).
			// Both are handled by the surrounding retry loop.
			if !s.server.stopper.RunLimitedAsyncTask(sem, func() {
				if err := action(); err != nil {
					sendErr(err)
				}
			}) {
				// Node draining.
				sendErr(errors.New("node is shutting down"))
				break
			}
		}

		numWaiting = len(stores)
		for _, ok := range mu.oks {
			if ok {
				// Store has reported that it is frozen.
				numWaiting--
				continue
			}
		}
		mu.Unlock()

		select {
		case err = <-errChan:
		default:
		}

		// Keep going unless there's been an error or everyone's frozen.
		if err != nil || numWaiting == 0 {
			break
		}
		if err := stream.Send(&serverpb.ClusterFreezeResponse{
			Message: fmt.Sprintf("waiting for %d store%s to apply operation",
				numWaiting, util.Pluralize(int64(numWaiting))),
		}); err != nil {
			return err
		}
	}
	if err != nil {
		return err
	}
	if numWaiting > 0 {
		err = fmt.Errorf("timed out waiting for %d store%s to report freeze",
			numWaiting, util.Pluralize(int64(numWaiting)))
	}
	return err
}
Exemple #4
0
// Send sends one or more RPCs to clients specified by the slice of
// replicas. On success, Send returns the first successful reply. Otherwise,
// Send returns an error if and as soon as the number of failed RPCs exceeds
// the available endpoints less the number of required replies.
func send(opts SendOptions, replicas ReplicaSlice,
	args roachpb.BatchRequest, rpcContext *rpc.Context) (*roachpb.BatchResponse, error) {

	if len(replicas) < 1 {
		return nil, roachpb.NewSendError(
			fmt.Sprintf("insufficient replicas (%d) to satisfy send request of %d",
				len(replicas), 1), false)
	}

	done := make(chan batchCall, len(replicas))

	clients := make([]batchClient, 0, len(replicas))
	for _, replica := range replicas {
		conn, err := rpcContext.GRPCDial(replica.NodeDesc.Address.String())
		if err != nil {
			return nil, err
		}
		argsCopy := args
		argsCopy.Replica = replica.ReplicaDescriptor
		clients = append(clients, batchClient{
			remoteAddr: replica.NodeDesc.Address.String(),
			conn:       conn,
			client:     roachpb.NewInternalClient(conn),
			args:       argsCopy,
		})
	}

	// Put known-unhealthy clients last.
	nHealthy, err := splitHealthy(clients)
	if err != nil {
		return nil, err
	}

	var orderedClients []batchClient
	switch opts.Ordering {
	case orderStable:
		orderedClients = clients
	case orderRandom:
		// Randomly permute order, but keep known-unhealthy clients last.
		shuffleClients(clients[:nHealthy])
		shuffleClients(clients[nHealthy:])

		orderedClients = clients
	}
	// TODO(spencer): going to need to also sort by affinity; closest
	// ping time should win. Makes sense to have the rpc client/server
	// heartbeat measure ping times. With a bit of seasoning, each
	// node will be able to order the healthy replicas based on latency.

	// Send the first request.
	sendOneFn(opts, rpcContext, orderedClients[0], done)
	orderedClients = orderedClients[1:]

	var errors, retryableErrors int

	// Wait for completions.
	var sendNextTimer util.Timer
	defer sendNextTimer.Stop()
	for {
		sendNextTimer.Reset(opts.SendNextTimeout)
		select {
		case <-sendNextTimer.C:
			sendNextTimer.Read = true
			// On successive RPC timeouts, send to additional replicas if available.
			if len(orderedClients) > 0 {
				log.Trace(opts.Context, "timeout, trying next peer")
				sendOneFn(opts, rpcContext, orderedClients[0], done)
				orderedClients = orderedClients[1:]
			}

		case call := <-done:
			err := call.err
			if err == nil {
				if log.V(2) {
					log.Infof("successful reply: %+v", call.reply)
				}

				return call.reply, nil
			}

			// Error handling.
			if log.V(1) {
				log.Warningf("error reply: %s", err)
			}

			errors++

			// Since we have a reconnecting client here, disconnect errors are retryable.
			disconnected := err == io.ErrUnexpectedEOF
			if retryErr, ok := err.(retry.Retryable); disconnected || (ok && retryErr.CanRetry()) {
				retryableErrors++
			}

			if remainingNonErrorRPCs := len(replicas) - errors; remainingNonErrorRPCs < 1 {
				return nil, roachpb.NewSendError(
					fmt.Sprintf("too many errors encountered (%d of %d total): %v",
						errors, len(clients), err), remainingNonErrorRPCs+retryableErrors >= 1)
			}
			// Send to additional replicas if available.
			if len(orderedClients) > 0 {
				log.Trace(opts.Context, "error, trying next peer")
				sendOneFn(opts, rpcContext, orderedClients[0], done)
				orderedClients = orderedClients[1:]
			}
		}
	}
}