Exemplo n.º 1
0
// RangeLookup implements the RangeDescriptorDB interface.
// RangeLookup dispatches a RangeLookup request for the given metadata
// key to the replicas of the given range. Note that we allow
// inconsistent reads when doing range lookups for efficiency. Getting
// stale data is not a correctness problem but instead may
// infrequently result in additional latency as additional range
// lookups may be required. Note also that rangeLookup bypasses the
// DistSender's Send() method, so there is no error inspection and
// retry logic here; this is not an issue since the lookup performs a
// single inconsistent read only.
func (ds *DistSender) RangeLookup(
	ctx context.Context, key roachpb.RKey, desc *roachpb.RangeDescriptor, useReverseScan bool,
) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, *roachpb.Error) {
	ba := roachpb.BatchRequest{}
	ba.ReadConsistency = roachpb.INCONSISTENT
	ba.Add(&roachpb.RangeLookupRequest{
		Span: roachpb.Span{
			// We can interpret the RKey as a Key here since it's a metadata
			// lookup; those are never local.
			Key: key.AsRawKey(),
		},
		MaxRanges: ds.rangeLookupMaxRanges,
		Reverse:   useReverseScan,
	})
	replicas := NewReplicaSlice(ds.gossip, desc)
	shuffle.Shuffle(replicas)
	br, err := ds.sendRPC(ctx, desc.RangeID, replicas, ba)
	if err != nil {
		return nil, nil, roachpb.NewError(err)
	}
	if br.Error != nil {
		return nil, nil, br.Error
	}
	resp := br.Responses[0].GetInner().(*roachpb.RangeLookupResponse)
	return resp.Ranges, resp.PrefetchedRanges, nil
}
Exemplo n.º 2
0
// SortByCommonAttributePrefix rearranges the ReplicaSlice by comparing the
// attributes to the given reference attributes. The basis for the comparison
// is that of the common prefix of replica attributes (i.e. the number of equal
// attributes, starting at the first), with a longer prefix sorting first. The
// number of attributes successfully matched to at least one replica is
// returned (hence, if the return value equals the length of the ReplicaSlice,
// at least one replica matched all attributes).
func (rs ReplicaSlice) SortByCommonAttributePrefix(attrs []string) int {
	if len(rs) < 2 {
		return 0
	}
	topIndex := len(rs) - 1
	for bucket := 0; bucket < len(attrs); bucket++ {
		firstNotOrdered := 0
		for i := 0; i <= topIndex; i++ {
			if bucket < len(rs[i].attrs()) && rs[i].attrs()[bucket] == attrs[bucket] {
				// Move replica which matches this attribute to an earlier
				// place in the array, just behind the last matching replica.
				// This packs all matching replicas together.
				rs.Swap(firstNotOrdered, i)
				firstNotOrdered++
			}
		}
		if topIndex < len(rs)-1 {
			shuffle.Shuffle(rs[firstNotOrdered : topIndex+1])
		}
		if firstNotOrdered == 0 {
			return bucket
		}
		topIndex = firstNotOrdered - 1
	}
	return len(attrs)
}
Exemplo n.º 3
0
// OptimizeReplicaOrder sorts the replicas in the order in which they're to be
// used for sending RPCs (meaning in the order in which they'll be probed for
// the lease).  "Closer" (matching in more attributes) replicas are ordered
// first. If the current node is a replica, then it'll be the first one.
//
// nodeDesc is the descriptor of the current node. It can be nil, in which case
// information about the current descriptor is not used in optimizing the order.
//
// Note that this method is not concerned with any information the node might
// have about who the lease holder might be. If there is such info (e.g. in a
// LeaseHolderCache), the caller will probably want to further tweak the head of
// the ReplicaSlice.
func (rs ReplicaSlice) OptimizeReplicaOrder(nodeDesc *roachpb.NodeDescriptor) {
	// If we don't know which node we're on, send the RPCs randomly.
	if nodeDesc == nil {
		shuffle.Shuffle(rs)
		return
	}
	// Sort replicas by attribute affinity, which we treat as a stand-in for
	// proximity (for now).
	rs.SortByCommonAttributePrefix(nodeDesc.Attrs.Attrs)

	// If there is a replica in local node, move it to the front.
	if i := rs.FindReplicaByNodeID(nodeDesc.NodeID); i > 0 {
		rs.MoveToFront(i)
	}
}
Exemplo n.º 4
0
// getStoreList returns a storeList that contains all active stores that
// contain the required attributes and their associated stats. It also returns
// the total number of alive and throttled stores.
func (sp *StorePool) getStoreList(rangeID roachpb.RangeID) (StoreList, int, int) {
	sp.mu.RLock()
	defer sp.mu.RUnlock()

	var storeIDs roachpb.StoreIDSlice
	for storeID := range sp.mu.storeDetails {
		storeIDs = append(storeIDs, storeID)
	}

	if sp.deterministic {
		sort.Sort(storeIDs)
	} else {
		shuffle.Shuffle(storeIDs)
	}

	var aliveStoreCount int
	var throttledStoreCount int
	var storeDescriptors []roachpb.StoreDescriptor

	now := sp.clock.PhysicalTime()
	for _, storeID := range storeIDs {
		detail := sp.mu.storeDetails[storeID]
		switch s := detail.status(now, sp.timeUntilStoreDead, rangeID, sp.nodeLivenessFn); s {
		case storeStatusThrottled:
			aliveStoreCount++
			throttledStoreCount++
		case storeStatusReplicaCorrupted:
			aliveStoreCount++
		case storeStatusAvailable:
			aliveStoreCount++
			storeDescriptors = append(storeDescriptors, *detail.desc)
		case storeStatusDead, storeStatusUnavailable:
			// Do nothing; this node cannot be used.
		default:
			panic(fmt.Sprintf("unknown store status: %d", s))
		}
	}

	return makeStoreList(storeDescriptors), aliveStoreCount, throttledStoreCount
}
Exemplo n.º 5
0
// optimizeReplicaOrder sorts the replicas in the order in which they are to be
// used for sending RPCs (meaning in the order in which they'll be probed for
// the lease). "Closer" replicas (matching in more attributes) are ordered
// first. Replicas matching in the same number of attributes are shuffled
// randomly.
// If the current node is a replica, then it'll be the first one.
func (ds *DistSender) optimizeReplicaOrder(replicas ReplicaSlice) {
	// TODO(spencer): going to need to also sort by affinity; closest
	// ping time should win. Makes sense to have the rpc client/server
	// heartbeat measure ping times. With a bit of seasoning, each
	// node will be able to order the healthy replicas based on latency.

	// Unless we know better, send the RPCs randomly.
	nodeDesc := ds.getNodeDescriptor()
	// If we don't know which node we're on, don't optimize anything.
	if nodeDesc == nil {
		shuffle.Shuffle(replicas)
		return
	}
	// Sort replicas by attribute affinity (if any), which we treat as a stand-in
	// for proximity (for now).
	replicas.SortByCommonAttributePrefix(nodeDesc.Attrs.Attrs)

	// If there is a replica in local node, move it to the front.
	if i := replicas.FindReplicaByNodeID(nodeDesc.NodeID); i > 0 {
		replicas.MoveToFront(i)
	}
}
Exemplo n.º 6
0
// getStoreList returns a storeList that contains all active stores that
// contain the required attributes and their associated stats. It also returns
// the total number of alive and throttled stores.
func (sp *StorePool) getStoreList(rangeID roachpb.RangeID) (StoreList, int, int) {
	sp.mu.RLock()
	defer sp.mu.RUnlock()

	var storeIDs roachpb.StoreIDSlice
	for storeID := range sp.mu.storeDetails {
		storeIDs = append(storeIDs, storeID)
	}

	if sp.deterministic {
		sort.Sort(storeIDs)
	} else {
		shuffle.Shuffle(storeIDs)
	}

	var aliveStoreCount int
	var throttledStoreCount int
	var storeDescriptors []roachpb.StoreDescriptor

	now := sp.clock.PhysicalTime()
	for _, storeID := range storeIDs {
		detail := sp.mu.storeDetails[storeID]
		switch detail.status(now, rangeID) {
		case storeStatusThrottled:
			aliveStoreCount++
			throttledStoreCount++
		case storeStatusReplicaCorrupted:
			aliveStoreCount++
		case storeStatusAvailable:
			aliveStoreCount++
			storeDescriptors = append(storeDescriptors, *detail.desc)
		}
	}

	return makeStoreList(storeDescriptors), aliveStoreCount, throttledStoreCount
}