Exemple #1
0
// GetOverlaps returns a slice of values which overlap the specified
// interval. The slice is only valid until the next call to GetOverlaps.
func (ic *IntervalCache) GetOverlaps(start, end []byte) []*Entry {
	ic.overlapKey.Range = interval.Range{
		Start: interval.Comparable(start),
		End:   interval.Comparable(end),
	}
	ic.tree.DoMatching(ic.doOverlaps, ic.overlapKey.Range)
	overlaps := ic.overlaps
	ic.overlaps = ic.overlaps[:0]
	return overlaps
}
Exemple #2
0
// getOverlaps returns a slice of values which overlap the specified
// interval. The slice is only valid until the next call to GetOverlaps.
func (cq *CommandQueue) getOverlaps(start, end []byte) []*cmd {
	rng := interval.Range{
		Start: interval.Comparable(start),
		End:   interval.Comparable(end),
	}
	cq.tree.DoMatching(cq.doOverlaps, rng)
	overlaps := cq.overlaps
	cq.overlaps = cq.overlaps[:0]
	return overlaps
}
Exemple #3
0
// MakeKey creates a new interval key defined by start and end values.
func (ic *IntervalCache) MakeKey(start, end []byte) IntervalKey {
	if bytes.Compare(start, end) >= 0 {
		panic(fmt.Sprintf("start key greater than or equal to end key %q >= %q", start, end))
	}
	return IntervalKey{
		Range: interval.Range{
			Start: interval.Comparable(start),
			End:   interval.Comparable(end),
		},
		id: uintptr(atomic.AddInt64(&intervalAlloc, 1)),
	}
}
Exemple #4
0
// add adds commands to the queue which affect the specified key ranges. Ranges
// without an end key affect only the start key. The returned interface is the
// key for the command queue and must be re-supplied on subsequent invocation
// of remove().
//
// add should be invoked after waiting on already-executing, overlapping
// commands via the WaitGroup initialized through getWait().
func (cq *CommandQueue) add(readOnly bool, spans ...roachpb.Span) *cmd {
	prepareSpans(spans...)

	// Compute the min and max key that covers all of the spans.
	minKey, maxKey := spans[0].Key, spans[0].EndKey
	for i := 1; i < len(spans); i++ {
		start, end := spans[i].Key, spans[i].EndKey
		if minKey.Compare(start) > 0 {
			minKey = start
		}
		if maxKey.Compare(end) < 0 {
			maxKey = end
		}
	}

	numCmds := 1
	if len(spans) > 1 {
		numCmds += len(spans)
	}
	cmds := make([]cmd, numCmds)

	// Create the covering entry. Note that this may have an "illegal" key range
	// spanning from range-local to range-global, but that's acceptable here as
	// long as we're careful in the future.
	cmd := &cmds[0]
	cmd.id = cq.nextID()
	cmd.key = interval.Range{
		Start: interval.Comparable(minKey),
		End:   interval.Comparable(maxKey),
	}
	cmd.readOnly = readOnly
	cmd.expanded = false

	if len(spans) > 1 {
		// Populate the covering entry's children.
		cmd.children = cmds[1:]
		for i, span := range spans {
			child := &cmd.children[i]
			child.id = cq.nextID()
			child.key = interval.Range{
				Start: interval.Comparable(span.Key),
				End:   interval.Comparable(span.EndKey),
			}
			child.readOnly = readOnly
			child.expanded = true
		}
	}

	if err := cq.tree.Insert(cmd, false /* !fast */); err != nil {
		panic(err)
	}
	return cmd
}
// addKeyRange adds the specified key range to the range group,
// taking care not to add this range if existing entries already
// completely cover the range.
func addKeyRange(keys interval.RangeGroup, start, end roachpb.Key) {
	// This gives us a memory-efficient end key if end is empty.
	// The most common case for keys in the intents interval map
	// is for single keys. However, the range group requires
	// a non-empty interval, so we create two key slices which
	// share the same underlying byte array.
	if len(end) == 0 {
		end = start.Next()
		start = end[:len(start)]
	}
	keyR := interval.Range{
		Start: interval.Comparable(start),
		End:   interval.Comparable(end),
	}
	keys.Add(keyR)
}
// Add the specified timestamp to the cache as covering the range of
// keys from start to end. If end is nil, the range covers the start
// key only. txnID is nil for no transaction. readTSCache specifies
// whether the command adding this timestamp should update the read
// timestamp; false to update the write timestamp cache.
func (tc *TimestampCache) Add(start, end roachpb.Key, timestamp roachpb.Timestamp, txnID *uuid.UUID, readTSCache bool) {
	// This gives us a memory-efficient end key if end is empty.
	if len(end) == 0 {
		end = start.Next()
		start = end[:len(start)]
	}
	if tc.latest.Less(timestamp) {
		tc.latest = timestamp
	}
	// Only add to the cache if the timestamp is more recent than the
	// low water mark.
	if tc.lowWater.Less(timestamp) {
		cache := tc.wCache
		if readTSCache {
			cache = tc.rCache
		}

		addRange := func(r interval.Range) {
			value := cacheValue{timestamp: timestamp, txnID: txnID}
			key := cache.MakeKey(r.Start, r.End)
			entry := makeCacheEntry(key, value)
			cache.AddEntry(entry)
		}
		r := interval.Range{
			Start: interval.Comparable(start),
			End:   interval.Comparable(end),
		}

		// Check existing, overlapping entries and truncate/split/remove if
		// superseded and in the past. If existing entries are in the future,
		// subtract from the range/ranges that need to be added to cache.
		for _, o := range cache.GetOverlaps(r.Start, r.End) {
			cv := o.Value.(*cacheValue)
			sCmp := r.Start.Compare(o.Key.Start)
			eCmp := r.End.Compare(o.Key.End)
			if !timestamp.Less(cv.timestamp) {
				// The existing interval has a timestamp less than or equal to the new interval.
				// Compare interval ranges to determine how to modify existing interval.
				switch {
				case sCmp == 0 && eCmp == 0:
					// New and old are equal; replace old with new and avoid the need to insert new.
					//
					// New: ------------
					// Old: ------------
					//
					// New: ------------
					*cv = cacheValue{timestamp: timestamp, txnID: txnID}
					cache.MoveToEnd(o.Entry)
					return
				case sCmp <= 0 && eCmp >= 0:
					// New contains or is equal to old; delete old.
					//
					// New: ------------      ------------      ------------
					// Old:   --------    or    ----------  or  ----------
					//
					// Old:
					cache.DelEntry(o.Entry)
				case sCmp > 0 && eCmp < 0:
					// Old contains new; split up old into two.
					//
					// New:     ----
					// Old: ------------
					//
					// Old: ----    ----
					oldEnd := o.Key.End
					o.Key.End = r.Start

					key := cache.MakeKey(r.End, oldEnd)
					entry := makeCacheEntry(key, *cv)
					cache.AddEntryAfter(entry, o.Entry)
				case eCmp >= 0:
					// Left partial overlap; truncate old end.
					//
					// New:     --------          --------
					// Old: --------      or  ------------
					//
					// Old: ----              ----
					o.Key.End = r.Start
				case sCmp <= 0:
					// Right partial overlap; truncate old start.
					//
					// New: --------          --------
					// Old:     --------  or  ------------
					//
					// Old:         ----              ----
					o.Key.Start = r.End
				default:
					panic(fmt.Sprintf("no overlap between %v and %v", o.Key.Range, r))
				}
			} else {
				// The existing interval has a timestamp greater than the new interval.
				// Compare interval ranges to determine how to modify new interval before
				// adding it to the timestamp cache.
				switch {
				case sCmp >= 0 && eCmp <= 0:
					// Old contains or is equal to new; no need to add.
					//
					// Old: -----------      -----------      -----------      -----------
					// New:    -----     or  -----------  or  --------     or     --------
					//
					// New:
					return
				case sCmp < 0 && eCmp > 0:
					// New contains old; split up old into two. We can add the left piece
					// immediately because it is guaranteed to be before the rest of the
					// overlaps.
					//
					// Old:    ------
					// New: ------------
					//
					// New: ---      ---
					lr := interval.Range{Start: r.Start, End: o.Key.Start}
					addRange(lr)

					r.Start = o.Key.End
				case eCmp > 0:
					// Left partial overlap; truncate new start.
					//
					// Old: --------          --------
					// New:     --------  or  ------------
					//
					// New:         ----              ----
					r.Start = o.Key.End
				case sCmp < 0:
					// Right partial overlap; truncate new end.
					//
					// Old:     --------          --------
					// New: --------      or  ------------
					//
					// New: ----              ----
					r.End = o.Key.Start
				default:
					panic(fmt.Sprintf("no overlap between %v and %v", o.Key.Range, r))
				}
			}
		}
		addRange(r)
	}
}
// GetWait initializes the supplied wait group with the number of executing
// commands which overlap the specified key ranges. If an end key is empty, it
// only affects the start key. The caller should call wg.Wait() to wait for
// confirmation that all gating commands have completed or failed, and then
// call Add() to add the keys to the command queue. readOnly is true if the
// requester is a read-only command; false for read-write.
func (cq *CommandQueue) GetWait(readOnly bool, wg *sync.WaitGroup, spans ...roachpb.Span) {
	for _, span := range spans {
		// This gives us a memory-efficient end key if end is empty.
		start, end := span.Key, span.EndKey
		if len(end) == 0 {
			end = start.Next()
			start = end[:len(start)]
		}
		newCmdRange := interval.Range{
			Start: interval.Comparable(start),
			End:   interval.Comparable(end),
		}
		overlaps := cq.cache.GetOverlaps(newCmdRange.Start, newCmdRange.End)
		if readOnly {
			// If both commands are read-only, there are no dependencies between them,
			// so these can be filtered out of the overlapping commands.
			overlaps = filterReadWrite(overlaps)
		}

		// Sort overlapping commands by command ID and iterate from latest to earliest,
		// adding the commands' ranges to the RangeGroup to determine gating keyspace
		// command dependencies. Because all commands are given WaitGroup dependencies
		// to the most recent commands that they are dependent on, and because of the
		// causality provided by the strictly increasing command ID allocation, this
		// approach will construct a DAG-like dependency graph between WaitGroups with
		// overlapping keys. This comes as an alternative to creating explicit WaitGroups
		// dependencies to all gating commands for each new command, which could result
		// in an exponential dependency explosion.
		//
		// For example, consider the following 5 write commands, each with key ranges
		// represented on the x axis and WaitGroup dependencies represented by vertical
		// lines:
		//
		// cmd 1:   --------------
		//           |      |
		// cmd 2:    |  -------------
		//           |    |    |
		// cmd 3:    -------   |
		//                |    |
		// cmd 4:         -------
		//                   |
		// cmd 5:         -------
		//
		// Instead of having each command establish explicit dependencies on all previous
		// overlapping commands, each command only needs to establish explicit dependencies
		// on the set of overlapping commands closest to the new command that together span
		// the new commands overlapped range. Following this strategy, the other dependencies
		// will be implicitly enforced, which reduces memory utilization and synchronization
		// costs.
		//
		// The exception are existing reads: since reads don't wait for each other, an incoming
		// write must wait for reads even when they are covered by a "later" read (since that
		// "later" read won't wait for the earlier read to complete). However, if that read is
		// covered by a "later" write, we don't need to wait because writes can't be reordered.
		//
		// Two example of how this logic works are shown below. Notice in the first example how
		// the overlapping reads do not establish dependencies on each other, and can therefore
		// be reordered. Also notice in the second example that once read command 4 overlaps
		// a "later" write, it no longer needs to be a dependency for the new write command 5.
		// However, because read command 3 does not overlap a "later" write, it is still a
		// dependency for the new write, but can be safely reordered before or after command 4.
		//
		// cmd 1 [R]:                -----               ----------
		//                             |                        |
		// cmd 2 [W]:              ========                 ========
		//                          |   |                    |   |
		// cmd 3 [R]:             --+------                --+------
		//                          | |                      | |
		// cmd 4 [R]:          -------+-----        -----------+-----
		//                       |    |              |         |
		// cmd 5 [W]:   =====    |    |          =======       |
		//                |      |    |            |           |
		// cmd 5 [W]:   ====================     ====================
		//
		cq.oHeap.Init(overlaps)
		for enclosed := false; cq.oHeap.Len() > 0 && !enclosed; {
			o := cq.oHeap.PopOverlap()
			keyRange, cmd := o.Key.Range, o.Value.(*cmd)
			if cmd.readOnly {
				// If the current overlap is a read (meaning we're a write because other reads will
				// be filtered out if we're a read as well), we only need to wait if the write RangeGroup
				// doesn't already overlap the read. Otherwise, we know that this current read is a dependent
				// itself to a command already accounted for in out write RangeGroup. Either way, we need to add
				// this current command to the combined RangeGroup.
				cq.rwRg.Add(keyRange)
				if !cq.wRg.Overlaps(keyRange) {
					cmd.pending = append(cmd.pending, wg)
					wg.Add(1)
				}
			} else {
				// If the current overlap is a write, pick which RangeGroup will be used to determine necessary
				// dependencies based on if we are a read or write.
				overlapRg := cq.wRg
				if !readOnly {
					// We only use the combined read-write RangeGroup when we are a new write command, because
					// otherwise all read commands would have been filtered out so we can avoid using a second
					// RangeGroup. Here, the previous reads rely on a distinction between a write command RangeGroup
					// and an all command RangeGroup. This is so that they can avoid establishing a dependency
					// if they are already dependent on previous writes, but can remain independent from other
					// reads.
					overlapRg = cq.rwRg
				}

				// We only need to establish a dependency when this write command key range is not overlapping
				// any other reads or writes in its future. If it is overlapping, we know there was already a
				// dependency established with a dependent of the current overlap, meaning we already established
				// an implicit transitive dependency to the current overlap.
				if !overlapRg.Overlaps(keyRange) {
					cmd.pending = append(cmd.pending, wg)
					wg.Add(1)
				}

				// The current command is a write, so add it to the write RangeGroup and observe if the group grows.
				if cq.wRg.Add(keyRange) {
					// We can stop dependency creation early in the case that the write RangeGroup fully encloses
					// our new range, which means that no new dependencies are needed. This looks only at the
					// write RangeGroup because even if the combined range group encloses us, there can always be
					// more reads that are necessary dependencies if they themselves don't overlap any writes. We
					// only need to perform this check when the write RangeGroup grows.
					//
					// We check the write RangeGroup's length before checking if it encloses the new command's
					// range because we know (based on the fact that these are all overlapping commands) that the
					// RangeGroup can enclose us only if its length is 1 (meaning all ranges inserted have coalesced).
					// This guarantees that this enclosure check will always be run in constant time.
					if cq.wRg.Len() == 1 && cq.wRg.Encloses(newCmdRange) {
						enclosed = true
					}
				}

				// Make sure the current command's range gets added to the combined RangeGroup if we are using it.
				if overlapRg == cq.rwRg {
					cq.rwRg.Add(keyRange)
				}
			}
		}

		// Clear heap to avoid leaking anything it is currently storing.
		cq.oHeap.Clear()

		// Clear the RangeGroups so that they can be used again. This is an alternative
		// to using local variables that must be allocated in every iteration.
		cq.wRg.Clear()
		cq.rwRg.Clear()
	}
}