예제 #1
0
// getOverlaps returns a slice of values which overlap the specified
// interval. The slice is only valid until the next call to GetOverlaps.
func (cq *CommandQueue) getOverlaps(start, end []byte) []*cmd {
	rng := interval.Range{
		Start: interval.Comparable(start),
		End:   interval.Comparable(end),
	}
	cq.tree.DoMatching(cq.doOverlaps, rng)
	overlaps := cq.overlaps
	cq.overlaps = cq.overlaps[:0]
	return overlaps
}
예제 #2
0
파일: cache.go 프로젝트: knz/cockroach
// GetOverlaps returns a slice of values which overlap the specified
// interval. The slice is only valid until the next call to GetOverlaps.
func (ic *IntervalCache) GetOverlaps(start, end []byte) []*Entry {
	ic.overlapKey.Range = interval.Range{
		Start: interval.Comparable(start),
		End:   interval.Comparable(end),
	}
	ic.tree.DoMatching(ic.doOverlaps, ic.overlapKey.Range)
	overlaps := ic.overlaps
	ic.overlaps = ic.overlaps[:0]
	return overlaps
}
예제 #3
0
파일: cache.go 프로젝트: knz/cockroach
// MakeKey creates a new interval key defined by start and end values.
func (ic *IntervalCache) MakeKey(start, end []byte) IntervalKey {
	if bytes.Compare(start, end) >= 0 {
		panic(fmt.Sprintf("start key greater than or equal to end key %q >= %q", start, end))
	}
	return IntervalKey{
		Range: interval.Range{
			Start: interval.Comparable(start),
			End:   interval.Comparable(end),
		},
		id: uintptr(atomic.AddInt64(&intervalAlloc, 1)),
	}
}
예제 #4
0
// add the specified timestamp to the cache as covering the range of
// keys from start to end. If end is nil, the range covers the start
// key only. txnID is nil for no transaction. readTSCache specifies
// whether the command adding this timestamp should update the read
// timestamp; false to update the write timestamp cache.
func (tc *timestampCache) add(
	start, end roachpb.Key, timestamp hlc.Timestamp, txnID *uuid.UUID, readTSCache bool,
) {
	// This gives us a memory-efficient end key if end is empty.
	if len(end) == 0 {
		end = start.Next()
		start = end[:len(start)]
	}
	tc.latest.Forward(timestamp)
	// Only add to the cache if the timestamp is more recent than the
	// low water mark.
	if tc.lowWater.Less(timestamp) {
		tcache := tc.wCache
		if readTSCache {
			tcache = tc.rCache
		}

		addRange := func(r interval.Range) {
			value := cacheValue{timestamp: timestamp, txnID: txnID}
			key := tcache.MakeKey(r.Start, r.End)
			entry := makeCacheEntry(key, value)
			tcache.AddEntry(entry)
		}
		r := interval.Range{
			Start: interval.Comparable(start),
			End:   interval.Comparable(end),
		}

		// Check existing, overlapping entries and truncate/split/remove if
		// superseded and in the past. If existing entries are in the future,
		// subtract from the range/ranges that need to be added to cache.
		for _, entry := range tcache.GetOverlaps(r.Start, r.End) {
			cv := entry.Value.(*cacheValue)
			key := entry.Key.(*cache.IntervalKey)
			sCmp := r.Start.Compare(key.Start)
			eCmp := r.End.Compare(key.End)
			if cv.timestamp.Less(timestamp) {
				// The existing interval has a timestamp less than the new
				// interval. Compare interval ranges to determine how to
				// modify existing interval.
				switch {
				case sCmp == 0 && eCmp == 0:
					// New and old are equal; replace old with new and avoid the need to insert new.
					//
					// New: ------------
					// Old: ------------
					//
					// New: ------------
					// Old:
					*cv = cacheValue{timestamp: timestamp, txnID: txnID}
					tcache.MoveToEnd(entry)
					return
				case sCmp <= 0 && eCmp >= 0:
					// New contains or is equal to old; delete old.
					//
					// New: ------------      ------------      ------------
					// Old:   --------    or    ----------  or  ----------
					//
					// New: ------------      ------------      ------------
					// Old:
					tcache.DelEntry(entry)
				case sCmp > 0 && eCmp < 0:
					// Old contains new; split up old into two.
					//
					// New:     ----
					// Old: ------------
					//
					// New:     ----
					// Old: ----    ----
					oldEnd := key.End
					key.End = r.Start

					newKey := tcache.MakeKey(r.End, oldEnd)
					newEntry := makeCacheEntry(newKey, *cv)
					tcache.AddEntryAfter(newEntry, entry)
				case eCmp >= 0:
					// Left partial overlap; truncate old end.
					//
					// New:     --------          --------
					// Old: --------      or  ------------
					//
					// New:     --------          --------
					// Old: ----              ----
					key.End = r.Start
				case sCmp <= 0:
					// Right partial overlap; truncate old start.
					//
					// New: --------          --------
					// Old:     --------  or  ------------
					//
					// New: --------          --------
					// Old:         ----              ----
					key.Start = r.End
				default:
					panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r))
				}
			} else if timestamp.Less(cv.timestamp) {
				// The existing interval has a timestamp greater than the new interval.
				// Compare interval ranges to determine how to modify new interval before
				// adding it to the timestamp cache.
				switch {
				case sCmp >= 0 && eCmp <= 0:
					// Old contains or is equal to new; no need to add.
					//
					// Old: -----------      -----------      -----------      -----------
					// New:    -----     or  -----------  or  --------     or     --------
					//
					// Old: -----------      -----------      -----------      -----------
					// New:
					return
				case sCmp < 0 && eCmp > 0:
					// New contains old; split up old into two. We can add the left piece
					// immediately because it is guaranteed to be before the rest of the
					// overlaps.
					//
					// Old:    ------
					// New: ------------
					//
					// Old:    ------
					// New: ---      ---
					lr := interval.Range{Start: r.Start, End: key.Start}
					addRange(lr)

					r.Start = key.End
				case eCmp > 0:
					// Left partial overlap; truncate new start.
					//
					// Old: --------          --------
					// New:     --------  or  ------------
					//
					// Old: --------          --------
					// New:         ----              ----
					r.Start = key.End
				case sCmp < 0:
					// Right partial overlap; truncate new end.
					//
					// Old:     --------          --------
					// New: --------      or  ------------
					//
					// Old:     --------          --------
					// New: ----              ----
					r.End = key.Start
				default:
					panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r))
				}
			} else if (cv.txnID == nil && txnID == nil) ||
				(cv.txnID != nil && txnID != nil && *cv.txnID == *txnID) {
				// The existing interval has a timestamp equal to the new
				// interval, and the same transaction ID.
				switch {
				case sCmp >= 0 && eCmp <= 0:
					// Old contains or is equal to new; no need to add.
					//
					// New:    -----     or  -----------  or  --------     or     --------
					// Old: -----------      -----------      -----------      -----------
					//
					// New:
					// Old: -----------      -----------      -----------      -----------
					return
				case sCmp <= 0 && eCmp >= 0:
					// New contains old; delete old.
					//
					// New: ------------      ------------      ------------
					// Old:   --------    or    ----------  or  ----------
					//
					// New: ------------      ------------      ------------
					// Old:
					tcache.DelEntry(entry)
				case eCmp >= 0:
					// Left partial overlap; truncate old end.
					//
					// New:     --------          --------
					// Old: --------      or  ------------
					//
					// New:     --------          --------
					// Old: ----              ----
					key.End = r.Start
				case sCmp <= 0:
					// Right partial overlap; truncate old start.
					//
					// New: --------          --------
					// Old:     --------  or  ------------
					//
					// New: --------          --------
					// Old:         ----              ----
					key.Start = r.End
				default:
					panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r))
				}
			} else {
				// The existing interval has a timestamp equal to the new
				// interval and a different transaction ID.
				switch {
				case sCmp == 0 && eCmp == 0:
					// New and old are equal. Segment is no longer owned by any
					// transaction.
					//
					// New: ------------
					// Old: ------------
					//
					// New:
					// Nil: ============
					// Old:
					cv.txnID = nil
					return
				case sCmp == 0 && eCmp > 0:
					// New contains old, left-aligned. Clear ownership of the
					// existing segment and truncate new.
					//
					// New: ------------
					// Old: ----------
					//
					// New:           --
					// Nil: ==========
					// Old:
					cv.txnID = nil
					r.Start = key.End
				case sCmp < 0 && eCmp == 0:
					// New contains old, right-aligned. Clear ownership of the
					// existing segment and truncate new.
					//
					// New: ------------
					// Old:   ----------
					//
					// New: --
					// Nil:   ==========
					// Old:
					cv.txnID = nil
					r.End = key.Start
				case sCmp < 0 && eCmp > 0:
					// New contains old; split into three segments with the
					// overlap owned by no txn.
					//
					// New: ------------
					// Old:   --------
					//
					// New: --        --
					// Nil:   ========
					// Old:
					cv.txnID = nil
					newKey := tcache.MakeKey(r.Start, key.Start)
					newEntry := makeCacheEntry(newKey, cacheValue{timestamp: timestamp, txnID: txnID})
					tcache.AddEntryAfter(newEntry, entry)
					r.Start = key.End
				case sCmp > 0 && eCmp < 0:
					// Old contains new; split up old into two. New segment is
					// owned by no txn.
					//
					// New:     ----
					// Old: ------------
					//
					// New:
					// Nil:     ====
					// Old: ----    ----
					txnID = nil
					oldEnd := key.End
					key.End = r.Start

					newKey := tcache.MakeKey(r.End, oldEnd)
					newEntry := makeCacheEntry(newKey, *cv)
					tcache.AddEntryAfter(newEntry, entry)
				case eCmp == 0:
					// Old contains new, right-aligned; truncate old end and clear
					// ownership of new segment.
					//
					// New:     --------
					// Old: ------------
					//
					// New:
					// Nil:     ========
					// Old: ----
					txnID = nil
					key.End = r.Start
				case sCmp == 0:
					// Old contains new, left-aligned; truncate old start and
					// clear ownership of new segment.
					// New: --------
					// Old: ------------
					//
					// New:
					// Nil: ========
					// Old:         ----
					txnID = nil
					key.Start = r.End
				case eCmp > 0:
					// Left partial overlap; truncate old end and split new into
					// segments owned by no txn (the overlap) and the new txn.
					//
					// New:     --------
					// Old: --------
					//
					// New:         ----
					// Nil:     ====
					// Old: ----
					key.End, r.Start = r.Start, key.End
					newKey := tcache.MakeKey(key.End, r.Start)
					newCV := cacheValue{timestamp: cv.timestamp, txnID: nil}
					newEntry := makeCacheEntry(newKey, newCV)
					tcache.AddEntryAfter(newEntry, entry)
				case sCmp < 0:
					// Right partial overlap; truncate old start and split new into
					// segments owned by no txn (the overlap) and the new txn.
					//
					// New: --------
					// Old:     --------
					//
					// New: ----
					// Nil:     ====
					// Old:         ----
					key.Start, r.End = r.End, key.Start
					newKey := tcache.MakeKey(r.End, key.Start)
					newCV := cacheValue{timestamp: cv.timestamp, txnID: nil}
					newEntry := makeCacheEntry(newKey, newCV)
					tcache.AddEntryAfter(newEntry, entry)
				default:
					panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r))
				}
			}
		}
		addRange(r)
	}
}
예제 #5
0
// add adds commands to the queue which affect the specified key ranges. Ranges
// without an end key affect only the start key. The returned interface is the
// key for the command queue and must be re-supplied on subsequent invocation
// of remove().
//
// Either all supplied spans must be range-global or range-local. Failure to
// obey with this restriction results in a fatal error.
//
// Returns a nil `cmd` when no spans are given.
//
// add should be invoked after waiting on already-executing, overlapping
// commands via the WaitGroup initialized through getWait().
func (cq *CommandQueue) add(readOnly bool, spans ...roachpb.Span) *cmd {
	if len(spans) == 0 {
		return nil
	}
	prepareSpans(spans...)

	// Compute the min and max key that covers all of the spans.
	minKey, maxKey := spans[0].Key, spans[0].EndKey
	for i := 1; i < len(spans); i++ {
		start, end := spans[i].Key, spans[i].EndKey
		if minKey.Compare(start) > 0 {
			minKey = start
		}
		if maxKey.Compare(end) < 0 {
			maxKey = end
		}
	}

	if keys.IsLocal(minKey) != keys.IsLocal(maxKey) {
		log.Fatalf(
			context.TODO(),
			"mixed range-global and range-local keys: %s and %s",
			minKey, maxKey,
		)
	}

	numCmds := 1
	if len(spans) > 1 {
		numCmds += len(spans)
	}
	cmds := make([]cmd, numCmds)

	// Create the covering entry.
	cmd := &cmds[0]
	cmd.id = cq.nextID()
	cmd.key = interval.Range{
		Start: interval.Comparable(minKey),
		End:   interval.Comparable(maxKey),
	}
	cmd.readOnly = readOnly
	cmd.expanded = false

	if len(spans) > 1 {
		// Populate the covering entry's children.
		cmd.children = cmds[1:]
		for i, span := range spans {
			child := &cmd.children[i]
			child.id = cq.nextID()
			child.key = interval.Range{
				Start: interval.Comparable(span.Key),
				End:   interval.Comparable(span.EndKey),
			}
			child.readOnly = readOnly
			child.expanded = true
		}
	}

	if cmd.readOnly {
		cq.localMetrics.readCommands += int64(cmd.cmdCount())
	} else {
		cq.localMetrics.writeCommands += int64(cmd.cmdCount())
	}

	if cq.coveringOptimization || len(spans) == 1 {
		if err := cq.tree.Insert(cmd, false /* !fast */); err != nil {
			panic(err)
		}
	} else {
		cq.expand(cmd, false /* !isInserted */)
	}
	return cmd
}
예제 #6
0
// GetWait returns a slice of the pending channels of executing commands which
// overlap the specified key ranges. If an end key is empty, it only affects
// the start key. The caller should call wg.Wait() to wait for confirmation
// that all gating commands have completed or failed, and then call add() to
// add the keys to the command queue. readOnly is true if the requester is a
// read-only command; false for read-write.
func (cq *CommandQueue) getWait(readOnly bool, spans ...roachpb.Span) (chans []<-chan struct{}) {
	prepareSpans(spans...)

	for i := 0; i < len(spans); i++ {
		span := spans[i]
		start, end := span.Key, span.EndKey
		if end == nil {
			panic(fmt.Sprintf("%d: unexpected nil EndKey: %s", i, span))
		}
		newCmdRange := interval.Range{
			Start: interval.Comparable(start),
			End:   interval.Comparable(end),
		}
		overlaps := cq.getOverlaps(newCmdRange.Start, newCmdRange.End)
		if readOnly {
			// If both commands are read-only, there are no dependencies between them,
			// so these can be filtered out of the overlapping commands.
			overlaps = filterReadWrite(overlaps)
		}

		// Check to see if any of the overlapping entries are "covering"
		// entries. If we encounter a covering entry, we remove it from the
		// interval tree and add all of its children.
		restart := false
		for _, c := range overlaps {
			// Operand order matters: call cq.expand() for its side effects
			// even if `restart` is already true.
			restart = cq.expand(c, true /* isInserted */) || restart
		}
		if restart {
			i--
			continue
		}
		if overlapCount := int64(len(overlaps)); overlapCount > cq.localMetrics.maxOverlapsSeen {
			cq.localMetrics.maxOverlapsSeen = overlapCount
		}

		// Sort overlapping commands by command ID and iterate from latest to earliest,
		// adding the commands' ranges to the RangeGroup to determine gating keyspace
		// command dependencies. Because all commands are given WaitGroup dependencies
		// to the most recent commands that they are dependent on, and because of the
		// causality provided by the strictly increasing command ID allocation, this
		// approach will construct a DAG-like dependency graph between WaitGroups with
		// overlapping keys. This comes as an alternative to creating explicit WaitGroups
		// dependencies to all gating commands for each new command, which could result
		// in an exponential dependency explosion.
		//
		// For example, consider the following 5 write commands, each with key ranges
		// represented on the x axis and WaitGroup dependencies represented by vertical
		// lines:
		//
		// cmd 1:   --------------
		//           |      |
		// cmd 2:    |  -------------
		//           |    |    |
		// cmd 3:    -------   |
		//                |    |
		// cmd 4:         -------
		//                   |
		// cmd 5:         -------
		//
		// Instead of having each command establish explicit dependencies on all previous
		// overlapping commands, each command only needs to establish explicit dependencies
		// on the set of overlapping commands closest to the new command that together span
		// the new commands overlapped range. Following this strategy, the other dependencies
		// will be implicitly enforced, which reduces memory utilization and synchronization
		// costs.
		//
		// The exception are existing reads: since reads don't wait for each other, an incoming
		// write must wait for reads even when they are covered by a "later" read (since that
		// "later" read won't wait for the earlier read to complete). However, if that read is
		// covered by a "later" write, we don't need to wait because writes can't be reordered.
		//
		// Two example of how this logic works are shown below. Notice in the first example how
		// the overlapping reads do not establish dependencies on each other, and can therefore
		// be reordered. Also notice in the second example that once read command 4 overlaps
		// a "later" write, it no longer needs to be a dependency for the new write command 5.
		// However, because read command 3 does not overlap a "later" write, it is still a
		// dependency for the new write, but can be safely reordered before or after command 4.
		//
		// cmd 1 [R]:                -----               ----------
		//                             |                        |
		// cmd 2 [W]:              ========                 ========
		//                          |   |                    |   |
		// cmd 3 [R]:             --+------                --+------
		//                          | |                      | |
		// cmd 4 [R]:          -------+-----        -----------+-----
		//                       |    |              |         |
		// cmd 5 [W]:   =====    |    |          =======       |
		//                |      |    |            |           |
		// cmd 5 [W]:   ====================     ====================
		//
		cq.oHeap.Init(overlaps)
		for enclosed := false; cq.oHeap.Len() > 0 && !enclosed; {
			cmd := cq.oHeap.PopOverlap()
			keyRange := cmd.key
			if cmd.readOnly {
				// If the current overlap is a read (meaning we're a write because other reads will
				// be filtered out if we're a read as well), we only need to wait if the write RangeGroup
				// doesn't already overlap the read. Otherwise, we know that this current read is a dependent
				// itself to a command already accounted for in out write RangeGroup. Either way, we need to add
				// this current command to the combined RangeGroup.
				cq.rwRg.Add(keyRange)
				if !cq.wRg.Overlaps(keyRange) {
					if cmd.pending == nil {
						cmd.pending = make(chan struct{})
					}
					chans = append(chans, cmd.pending)
				}
			} else {
				// If the current overlap is a write, pick which RangeGroup will be used to determine necessary
				// dependencies based on if we are a read or write.
				overlapRg := cq.wRg
				if !readOnly {
					// We only use the combined read-write RangeGroup when we are a new write command, because
					// otherwise all read commands would have been filtered out so we can avoid using a second
					// RangeGroup. Here, the previous reads rely on a distinction between a write command RangeGroup
					// and an all command RangeGroup. This is so that they can avoid establishing a dependency
					// if they are already dependent on previous writes, but can remain independent from other
					// reads.
					overlapRg = cq.rwRg
				}

				// We only need to establish a dependency when this write command key range is not overlapping
				// any other reads or writes in its future. If it is overlapping, we know there was already a
				// dependency established with a dependent of the current overlap, meaning we already established
				// an implicit transitive dependency to the current overlap.
				if !overlapRg.Overlaps(keyRange) {
					if cmd.pending == nil {
						cmd.pending = make(chan struct{})
					}
					chans = append(chans, cmd.pending)
				}

				// The current command is a write, so add it to the write RangeGroup and observe if the group grows.
				if cq.wRg.Add(keyRange) {
					// We can stop dependency creation early in the case that the write RangeGroup fully encloses
					// our new range, which means that no new dependencies are needed. This looks only at the
					// write RangeGroup because even if the combined range group encloses us, there can always be
					// more reads that are necessary dependencies if they themselves don't overlap any writes. We
					// only need to perform this check when the write RangeGroup grows.
					//
					// We check the write RangeGroup's length before checking if it encloses the new command's
					// range because we know (based on the fact that these are all overlapping commands) that the
					// RangeGroup can enclose us only if its length is 1 (meaning all ranges inserted have coalesced).
					// This guarantees that this enclosure check will always be run in constant time.
					if cq.wRg.Len() == 1 && cq.wRg.Encloses(newCmdRange) {
						enclosed = true
					}
				}

				// Make sure the current command's range gets added to the combined RangeGroup if we are using it.
				if overlapRg == cq.rwRg {
					cq.rwRg.Add(keyRange)
				}
			}
		}

		// Clear heap to avoid leaking anything it is currently storing.
		cq.oHeap.Clear()

		// Clear the RangeGroups so that they can be used again. This is an alternative
		// to using local variables that must be allocated in every iteration.
		cq.wRg.Clear()
		cq.rwRg.Clear()
	}
	return chans
}