// addKeyRange adds the specified key range to the range group, // taking care not to add this range if existing entries already // completely cover the range. func addKeyRange(keys interval.RangeGroup, start, end roachpb.Key) { // This gives us a memory-efficient end key if end is empty. // The most common case for keys in the intents interval map // is for single keys. However, the range group requires // a non-empty interval, so we create two key slices which // share the same underlying byte array. if len(end) == 0 { end = start.ShallowNext() start = end[:len(start)] } keyR := interval.Range{ Start: interval.Comparable(start), End: interval.Comparable(end), } keys.Add(keyR) }
func (tc *TimestampCache) getMax(start, end roachpb.Key, txnID *uuid.UUID, readTSCache bool) roachpb.Timestamp { if len(end) == 0 { end = start.ShallowNext() } max := tc.lowWater cache := tc.wCache if readTSCache { cache = tc.rCache } for _, o := range cache.GetOverlaps(start, end) { ce := o.Value.(*cacheValue) if ce.txnID == nil || txnID == nil || !roachpb.TxnIDEqual(txnID, ce.txnID) { if max.Less(ce.timestamp) { max = ce.timestamp } } } return max }
// Add the specified timestamp to the cache as covering the range of // keys from start to end. If end is nil, the range covers the start // key only. txnID is nil for no transaction. readTSCache specifies // whether the command adding this timestamp should update the read // timestamp; false to update the write timestamp cache. func (tc *TimestampCache) Add(start, end roachpb.Key, timestamp roachpb.Timestamp, txnID *uuid.UUID, readTSCache bool) { // This gives us a memory-efficient end key if end is empty. if len(end) == 0 { end = start.ShallowNext() start = end[:len(start)] } if tc.latest.Less(timestamp) { tc.latest = timestamp } // Only add to the cache if the timestamp is more recent than the // low water mark. if tc.lowWater.Less(timestamp) { cache := tc.wCache if readTSCache { cache = tc.rCache } addRange := func(r interval.Range) { value := cacheValue{timestamp: timestamp, txnID: txnID} key := cache.MakeKey(r.Start, r.End) entry := makeCacheEntry(key, value) cache.AddEntry(entry) } r := interval.Range{ Start: interval.Comparable(start), End: interval.Comparable(end), } // Check existing, overlapping entries and truncate/split/remove if // superseded and in the past. If existing entries are in the future, // subtract from the range/ranges that need to be added to cache. for _, o := range cache.GetOverlaps(r.Start, r.End) { cv := o.Value.(*cacheValue) sCmp := r.Start.Compare(o.Key.Start) eCmp := r.End.Compare(o.Key.End) if !timestamp.Less(cv.timestamp) { // The existing interval has a timestamp less than or equal to the new interval. // Compare interval ranges to determine how to modify existing interval. switch { case sCmp == 0 && eCmp == 0: // New and old are equal; replace old with new and avoid the need to insert new. // // New: ------------ // Old: ------------ // // New: ------------ *cv = cacheValue{timestamp: timestamp, txnID: txnID} cache.MoveToEnd(o.Entry) return case sCmp <= 0 && eCmp >= 0: // New contains or is equal to old; delete old. // // New: ------------ ------------ ------------ // Old: -------- or ---------- or ---------- // // Old: cache.DelEntry(o.Entry) case sCmp > 0 && eCmp < 0: // Old contains new; split up old into two. // // New: ---- // Old: ------------ // // Old: ---- ---- oldEnd := o.Key.End o.Key.End = r.Start key := cache.MakeKey(r.End, oldEnd) entry := makeCacheEntry(key, *cv) cache.AddEntryAfter(entry, o.Entry) case eCmp >= 0: // Left partial overlap; truncate old end. // // New: -------- -------- // Old: -------- or ------------ // // Old: ---- ---- o.Key.End = r.Start case sCmp <= 0: // Right partial overlap; truncate old start. // // New: -------- -------- // Old: -------- or ------------ // // Old: ---- ---- o.Key.Start = r.End default: panic(fmt.Sprintf("no overlap between %v and %v", o.Key.Range, r)) } } else { // The existing interval has a timestamp greater than the new interval. // Compare interval ranges to determine how to modify new interval before // adding it to the timestamp cache. switch { case sCmp >= 0 && eCmp <= 0: // Old contains or is equal to new; no need to add. // // Old: ----------- ----------- ----------- ----------- // New: ----- or ----------- or -------- or -------- // // New: return case sCmp < 0 && eCmp > 0: // New contains old; split up old into two. We can add the left piece // immediately because it is guaranteed to be before the rest of the // overlaps. // // Old: ------ // New: ------------ // // New: --- --- lr := interval.Range{Start: r.Start, End: o.Key.Start} addRange(lr) r.Start = o.Key.End case eCmp > 0: // Left partial overlap; truncate new start. // // Old: -------- -------- // New: -------- or ------------ // // New: ---- ---- r.Start = o.Key.End case sCmp < 0: // Right partial overlap; truncate new end. // // Old: -------- -------- // New: -------- or ------------ // // New: ---- ---- r.End = o.Key.Start default: panic(fmt.Sprintf("no overlap between %v and %v", o.Key.Range, r)) } } } addRange(r) } }
// fetch retrieves spans from the kv func (f *kvFetcher) fetch() *roachpb.Error { // Retrieve all the spans. batchSize := int64(kvBatchSize) if f.firstBatchLimit != 0 && len(f.kvs) == 0 && f.firstBatchLimit < batchSize { batchSize = f.firstBatchLimit } b := &client.Batch{MaxScanResults: batchSize} var resumeKey roachpb.Key if len(f.kvs) > 0 { resumeKey = f.kvs[len(f.kvs)-1].Key // To resume forward scans we will set the (inclusive) scan start to the Next of the last // received key. To resume reverse scans we will set the (exclusive) scan end to the last // received key. if !f.reverse { resumeKey = resumeKey.ShallowNext() } } atEnd := true if !f.reverse { for i := 0; i < len(f.spans); i++ { start := f.spans[i].start if resumeKey != nil { if resumeKey.Compare(f.spans[i].end) >= 0 { // We are resuming from a key after this span. continue } if resumeKey.Compare(start) > 0 { // We are resuming from a key inside this span. // In this case we should technically reduce the max count for the span; but // since this count is only an optimization it's not incorrect to retrieve more // keys for the span. start = resumeKey } } atEnd = false b.Scan(start, f.spans[i].end, f.spans[i].count) } } else { for i := len(f.spans) - 1; i >= 0; i-- { end := f.spans[i].end if resumeKey != nil { if resumeKey.Compare(f.spans[i].start) <= 0 { // We are resuming from a key before this span. continue } if resumeKey.Compare(end) < 0 { // We are resuming from a key inside this span. // In this case we should technically reduce the max count for the span; but // since this count is only an optimization it's not incorrect to retrieve more // keys for the span. end = resumeKey } } atEnd = false b.ReverseScan(f.spans[i].start, end, f.spans[i].count) } } if atEnd { // The last scan happened to finish just at the end of the last span. f.kvs = nil f.fetchEnd = true return nil } if pErr := f.txn.Run(b); pErr != nil { return pErr } if f.kvs == nil { numResults := 0 for _, result := range b.Results { numResults += len(result.Rows) } f.kvs = make([]client.KeyValue, 0, numResults) } else { f.kvs = f.kvs[:0] } for _, result := range b.Results { f.kvs = append(f.kvs, result.Rows...) } f.totalFetched += int64(len(f.kvs)) f.kvIndex = 0 if int64(len(f.kvs)) < batchSize { f.fetchEnd = true } // TODO(radu): We should fetch the next chunk in the background instead of waiting for the next // call to fetch(). We can use a pool of workers to issue the KV ops which will also limit the // total number of fetches that happen in parallel (and thus the amount of resources we use). return nil }