func loadRangeDescriptor( db engine.Engine, rangeID roachpb.RangeID, ) (roachpb.RangeDescriptor, error) { var desc roachpb.RangeDescriptor handleKV := func(kv engine.MVCCKeyValue) (bool, error) { if kv.Key.Timestamp == hlc.ZeroTimestamp { // We only want values, not MVCCMetadata. return false, nil } if err := checkRangeDescriptorKey(kv.Key); err != nil { // Range descriptor keys are interleaved with others, so if it // doesn't parse as a range descriptor just skip it. return false, nil } if err := getProtoValue(kv.Value, &desc); err != nil { return false, err } return desc.RangeID == rangeID, nil } // Range descriptors are stored by key, so we have to scan over the // range-local data to find the one for this RangeID. start := engine.MakeMVCCMetadataKey(keys.LocalRangePrefix) end := engine.MakeMVCCMetadataKey(keys.LocalRangeMax) if err := db.Iterate(start, end, handleKV); err != nil { return roachpb.RangeDescriptor{}, err } if desc.RangeID == rangeID { return desc, nil } return roachpb.RangeDescriptor{}, fmt.Errorf("range descriptor %d not found", rangeID) }
// CopyFrom copies all the persisted results from the originRangeID // abort cache into this one. Note that the cache will not be // locked while copying is in progress. Failures decoding individual // entries return an error. The copy is done directly using the engine // instead of interpreting values through MVCC for efficiency. // On success, returns the number of entries (key-value pairs) copied. func (sc *AbortCache) CopyFrom( ctx context.Context, e engine.ReadWriter, ms *enginepb.MVCCStats, originRangeID roachpb.RangeID, ) (int, error) { originMin := engine.MakeMVCCMetadataKey(keys.AbortCacheKey(originRangeID, txnIDMin)) originMax := engine.MakeMVCCMetadataKey(keys.AbortCacheKey(originRangeID, txnIDMax)) return copySeqCache(e, ms, originRangeID, sc.rangeID, originMin, originMax) }
// Calculate the size (MVCCStats.SysBytes) of the {raft,lease} applied index // keys/values. func calcAppliedIndexSysBytes( rangeID roachpb.RangeID, appliedIndex, leaseAppliedIndex uint64, ) int64 { return int64(engine.MakeMVCCMetadataKey(keys.RaftAppliedIndexKey(rangeID)).EncodedSize() + engine.MakeMVCCMetadataKey(keys.LeaseAppliedIndexKey(rangeID)).EncodedSize() + inlineValueIntEncodedSize(int64(appliedIndex)) + inlineValueIntEncodedSize(int64(leaseAppliedIndex))) }
// ClearData removes all persisted items stored in the cache. func (sc *AbortCache) ClearData(e engine.Engine) error { b := e.NewBatch() defer b.Close() _, err := engine.ClearRange(b, engine.MakeMVCCMetadataKey(sc.min()), engine.MakeMVCCMetadataKey(sc.max())) if err != nil { return err } return b.Commit() }
// ClearData removes all persisted items stored in the cache. func (sc *AbortCache) ClearData(e engine.Engine) error { iter := e.NewIterator(false) defer iter.Close() b := e.NewWriteOnlyBatch() defer b.Close() err := b.ClearRange(iter, engine.MakeMVCCMetadataKey(sc.min()), engine.MakeMVCCMetadataKey(sc.max())) if err != nil { return err } return b.Commit() }
// findTimeSeries searches the supplied engine over the supplied key range, // identifying time series which have stored data in the range, along with the // resolutions at which time series data is stored. A unique name/resolution // pair will only be identified once, even if the range contains keys for that // name/resolution pair at multiple timestamps or from multiple sources. // // An engine snapshot is used, rather than a client, because this function is // intended to be called by a storage queue which can inspect the local data for // a single range without the need for expensive network calls. func findTimeSeries( snapshot engine.Reader, startKey, endKey roachpb.RKey, now hlc.Timestamp, ) ([]timeSeriesResolutionInfo, error) { var results []timeSeriesResolutionInfo iter := snapshot.NewIterator(false) defer iter.Close() // Set start boundary for the search, which is the lesser of the range start // key and the beginning of time series data. start := engine.MakeMVCCMetadataKey(startKey.AsRawKey()) next := engine.MakeMVCCMetadataKey(keys.TimeseriesPrefix) if next.Less(start) { next = start } // Set end boundary for the search, which is the lesser of the range end key // and the end of time series data. end := engine.MakeMVCCMetadataKey(endKey.AsRawKey()) lastTS := engine.MakeMVCCMetadataKey(keys.TimeseriesPrefix.PrefixEnd()) if lastTS.Less(end) { end = lastTS } thresholds := computeThresholds(now.WallTime) for iter.Seek(next); iter.Valid() && iter.Less(end); iter.Seek(next) { foundKey := iter.Key().Key // Extract the name and resolution from the discovered key. name, _, res, tsNanos, err := DecodeDataKey(foundKey) if err != nil { return nil, err } // Skip this time series if there's nothing to prune. We check the // oldest (first) time series record's timestamp against the // pruning threshold. if threshold, ok := thresholds[res]; !ok || threshold > tsNanos { results = append(results, timeSeriesResolutionInfo{ Name: name, Resolution: res, }) } // Set 'next' is initialized to the next possible time series key // which could belong to a previously undiscovered time series. next = engine.MakeMVCCMetadataKey(makeDataKeySeriesPrefix(name, res).PrefixEnd()) } return results, nil }
func runDebugRangeDescriptors(cmd *cobra.Command, args []string) error { stopper := stop.NewStopper() defer stopper.Stop() if len(args) != 1 { return errors.New("one argument required: dir") } db, err := openStore(cmd, args[0], stopper) if err != nil { return err } start := engine.MakeMVCCMetadataKey(keys.LocalRangePrefix) end := engine.MakeMVCCMetadataKey(keys.LocalRangeMax) return db.Iterate(start, end, printRangeDescriptor) }
// findTimeSeries searches the supplied engine over the supplied key range, // identifying time series which have stored data in the range, along with the // resolutions at which time series data is stored. A unique name/resolution // pair will only be identified once, even if the range contains keys for that // name/resolution pair at multiple timestamps or from multiple sources. // // An engine snapshot is used, rather than a client, because this function is // intended to be called by a storage queue which can inspect the local data for // a single range without the need for expensive network calls. func findTimeSeries( snapshot engine.Reader, startKey, endKey roachpb.RKey, ) ([]timeSeriesResolutionInfo, error) { var results []timeSeriesResolutionInfo iter := snapshot.NewIterator(false) defer iter.Close() // Set start boundary for the search, which is the lesser of the range start // key and the beginning of time series data. start := engine.MakeMVCCMetadataKey(startKey.AsRawKey()) next := engine.MakeMVCCMetadataKey(keys.TimeseriesPrefix) if next.Less(start) { next = start } // Set end boundary for the search, which is the lesser of the range end key // and the end of time series data. end := engine.MakeMVCCMetadataKey(endKey.AsRawKey()) lastTS := engine.MakeMVCCMetadataKey(keys.TimeseriesPrefix.PrefixEnd()) if lastTS.Less(end) { end = lastTS } for iter.Seek(next); iter.Valid() && iter.Less(end); iter.Seek(next) { foundKey := iter.Key().Key // Extract the name and resolution from the discovered key. name, _, res, _, err := DecodeDataKey(foundKey) if err != nil { return nil, err } results = append(results, timeSeriesResolutionInfo{ Name: name, Resolution: res, }) // Set 'next' is initialized to the next possible time series key // which could belong to a previously undiscovered time series. next = engine.MakeMVCCMetadataKey(makeDataKeySeriesPrefix(name, res).PrefixEnd()) } return results, nil }
func (k *mvccKey) Set(value string) error { var typ keyType var keyStr string i := strings.IndexByte(value, ':') if i == -1 { keyStr = value } else { var err error typ, err = parseKeyType(value[:i]) if err != nil { return err } keyStr = value[i+1:] } switch typ { case raw: unquoted, err := unquoteArg(keyStr, false) if err != nil { return err } *k = mvccKey(engine.MakeMVCCMetadataKey(roachpb.Key(unquoted))) case human: key, err := keys.UglyPrint(keyStr) if err != nil { return err } *k = mvccKey(engine.MakeMVCCMetadataKey(key)) case rangeID: fromID, err := parseRangeID(keyStr) if err != nil { return err } *k = mvccKey(engine.MakeMVCCMetadataKey(keys.MakeRangeIDPrefix(fromID))) default: return fmt.Errorf("unknown key type %s", typ) } return nil }
func makeReplicaKeyRanges( d *roachpb.RangeDescriptor, metaFunc func(roachpb.RangeID) roachpb.Key, ) []keyRange { // The first range in the keyspace starts at KeyMin, which includes the // node-local space. We need the original StartKey to find the range // metadata, but the actual data starts at LocalMax. dataStartKey := d.StartKey.AsRawKey() if d.StartKey.Equal(roachpb.RKeyMin) { dataStartKey = keys.LocalMax } sysRangeIDKey := metaFunc(d.RangeID) return []keyRange{ { start: engine.MakeMVCCMetadataKey(sysRangeIDKey), end: engine.MakeMVCCMetadataKey(sysRangeIDKey.PrefixEnd()), }, { start: engine.MakeMVCCMetadataKey(keys.MakeRangeKeyPrefix(d.StartKey)), end: engine.MakeMVCCMetadataKey(keys.MakeRangeKeyPrefix(d.EndKey)), }, { start: engine.MakeMVCCMetadataKey(dataStartKey), end: engine.MakeMVCCMetadataKey(d.EndKey.AsRawKey()), }, } }
func runDebugRaftLog(cmd *cobra.Command, args []string) error { stopper := stop.NewStopper() defer stopper.Stop() if len(args) != 2 { return errors.New("two arguments required: dir range_id") } db, err := openStore(cmd, args[0], stopper) if err != nil { return err } rangeID, err := parseRangeID(args[1]) if err != nil { return err } start := engine.MakeMVCCMetadataKey(keys.RaftLogPrefix(rangeID)) end := engine.MakeMVCCMetadataKey(keys.RaftLogPrefix(rangeID).PrefixEnd()) return db.Iterate(start, end, printRaftLogEntry) }
func copySeqCache( e engine.ReadWriter, ms *enginepb.MVCCStats, srcID, dstID roachpb.RangeID, keyMin, keyMax engine.MVCCKey, ) (int, error) { var scratch [64]byte var count int var meta enginepb.MVCCMetadata // TODO(spencer): look into making this an MVCCIteration and writing // the values using MVCC so we can avoid the ugliness of updating // the MVCCStats by hand below. err := e.Iterate(keyMin, keyMax, func(kv engine.MVCCKeyValue) (bool, error) { // Decode the key, skipping on error. Otherwise, write it to the // corresponding key in the new cache. txnID, err := decodeAbortCacheMVCCKey(kv.Key, scratch[:0]) if err != nil { return false, errors.Errorf("could not decode an abort cache key %s: %s", kv.Key, err) } key := keys.AbortCacheKey(dstID, txnID) encKey := engine.MakeMVCCMetadataKey(key) // Decode the MVCCMetadata value. if err := proto.Unmarshal(kv.Value, &meta); err != nil { return false, errors.Errorf("could not decode mvcc metadata %s [% x]: %s", kv.Key, kv.Value, err) } value := engine.MakeValue(meta) value.ClearChecksum() value.InitChecksum(key) meta.RawBytes = value.RawBytes keyBytes, valBytes, err := engine.PutProto(e, encKey, &meta) if err != nil { return false, err } count++ if ms != nil { ms.SysBytes += keyBytes + valBytes ms.SysCount++ } return false, nil }) return count, err }
func verifyCleanup(key roachpb.Key, coord *TxnCoordSender, eng engine.Engine, t *testing.T) { util.SucceedsSoon(t, func() error { coord.Lock() l := len(coord.txns) coord.Unlock() if l != 0 { return fmt.Errorf("expected empty transactions map; got %d", l) } meta := &enginepb.MVCCMetadata{} ok, _, _, err := eng.GetProto(engine.MakeMVCCMetadataKey(key), meta) if err != nil { return fmt.Errorf("error getting MVCC metadata: %s", err) } if ok && meta.Txn != nil { return fmt.Errorf("found unexpected write intent: %s", meta) } return nil }) }
// TestGCQueueIntentResolution verifies intent resolution with many // intents spanning just two transactions. func TestGCQueueIntentResolution(t *testing.T) { defer leaktest.AfterTest(t)() tc := testContext{} tc.Start(t) defer tc.Stop() const now int64 = 48 * 60 * 60 * 1E9 // 2d past the epoch tc.manualClock.Set(now) txns := []*roachpb.Transaction{ newTransaction("txn1", roachpb.Key("0-00000"), 1, enginepb.SERIALIZABLE, tc.clock), newTransaction("txn2", roachpb.Key("1-00000"), 1, enginepb.SERIALIZABLE, tc.clock), } intentResolveTS := makeTS(now-intentAgeThreshold.Nanoseconds(), 0) txns[0].OrigTimestamp = intentResolveTS txns[0].Timestamp = intentResolveTS txns[1].OrigTimestamp = intentResolveTS txns[1].Timestamp = intentResolveTS // Two transactions. for i := 0; i < 2; i++ { // 5 puts per transaction. // TODO(spencerkimball): benchmark with ~50k. for j := 0; j < 5; j++ { pArgs := putArgs(roachpb.Key(fmt.Sprintf("%d-%05d", i, j)), []byte("value")) if _, err := tc.SendWrappedWith(roachpb.Header{ Txn: txns[i], }, &pArgs); err != nil { t.Fatalf("%d: could not put data: %s", i, err) } txns[i].Sequence++ } } cfg, ok := tc.gossip.GetSystemConfig() if !ok { t.Fatal("config not set") } // Process through a scan queue. gcQ := newGCQueue(tc.store, tc.gossip) if err := gcQ.process(context.Background(), tc.clock.Now(), tc.rng, cfg); err != nil { t.Fatal(err) } // Iterate through all values to ensure intents have been fully resolved. meta := &enginepb.MVCCMetadata{} err := tc.store.Engine().Iterate(engine.MakeMVCCMetadataKey(roachpb.KeyMin), engine.MakeMVCCMetadataKey(roachpb.KeyMax), func(kv engine.MVCCKeyValue) (bool, error) { if !kv.Key.IsValue() { if err := proto.Unmarshal(kv.Value, meta); err != nil { return false, err } if meta.Txn != nil { return false, errors.Errorf("non-nil Txn after GC for key %s", kv.Key) } } return false, nil }) if err != nil { t.Fatal(err) } }
// TestGCQueueProcess creates test data in the range over various time // scales and verifies that scan queue process properly GCs test data. func TestGCQueueProcess(t *testing.T) { defer leaktest.AfterTest(t)() tc := testContext{} tc.Start(t) defer tc.Stop() const now int64 = 48 * 60 * 60 * 1E9 // 2d past the epoch tc.manualClock.Set(now) ts1 := makeTS(now-2*24*60*60*1E9+1, 0) // 2d old (add one nanosecond so we're not using zero timestamp) ts2 := makeTS(now-25*60*60*1E9, 0) // GC will occur at time=25 hours ts2m1 := ts2.Prev() // ts2 - 1 so we have something not right at the GC time ts3 := makeTS(now-intentAgeThreshold.Nanoseconds(), 0) // 2h old ts4 := makeTS(now-(intentAgeThreshold.Nanoseconds()-1), 0) // 2h-1ns old ts5 := makeTS(now-1E9, 0) // 1s old key1 := roachpb.Key("a") key2 := roachpb.Key("b") key3 := roachpb.Key("c") key4 := roachpb.Key("d") key5 := roachpb.Key("e") key6 := roachpb.Key("f") key7 := roachpb.Key("g") key8 := roachpb.Key("h") key9 := roachpb.Key("i") key10 := roachpb.Key("j") key11 := roachpb.Key("k") data := []struct { key roachpb.Key ts hlc.Timestamp del bool txn bool }{ // For key1, we expect first value to GC. {key1, ts1, false, false}, {key1, ts2, false, false}, {key1, ts5, false, false}, // For key2, we expect values to GC, even though most recent is deletion. {key2, ts1, false, false}, {key2, ts2m1, false, false}, // use a value < the GC time to verify it's kept {key2, ts5, true, false}, // For key3, we expect just ts1 to GC, because most recent deletion is intent. {key3, ts1, false, false}, {key3, ts2, false, false}, {key3, ts5, true, true}, // For key4, expect oldest value to GC. {key4, ts1, false, false}, {key4, ts2, false, false}, // For key5, expect all values to GC (most recent value deleted). {key5, ts1, false, false}, {key5, ts2, true, false}, // deleted, so GC // For key6, expect no values to GC because most recent value is intent. {key6, ts1, false, false}, {key6, ts5, false, true}, // For key7, expect no values to GC because intent is exactly 2h old. {key7, ts2, false, false}, {key7, ts4, false, true}, // For key8, expect most recent value to resolve by aborting, which will clean it up. {key8, ts2, false, false}, {key8, ts3, true, true}, // For key9, resolve naked intent with no remaining values. {key9, ts3, false, true}, // For key10, GC ts1 because it's a delete but not ts3 because it's above the threshold. {key10, ts1, true, false}, {key10, ts3, true, false}, {key10, ts4, false, false}, {key10, ts5, false, false}, // For key11, we can't GC anything because ts1 isn't a delete. {key11, ts1, false, false}, {key11, ts3, true, false}, {key11, ts4, true, false}, {key11, ts5, true, false}, } for i, datum := range data { if datum.del { dArgs := deleteArgs(datum.key) var txn *roachpb.Transaction if datum.txn { txn = newTransaction("test", datum.key, 1, enginepb.SERIALIZABLE, tc.clock) txn.OrigTimestamp = datum.ts txn.Timestamp = datum.ts } if _, err := tc.SendWrappedWith(roachpb.Header{ Timestamp: datum.ts, Txn: txn, }, &dArgs); err != nil { t.Fatalf("%d: could not delete data: %s", i, err) } } else { pArgs := putArgs(datum.key, []byte("value")) var txn *roachpb.Transaction if datum.txn { txn = newTransaction("test", datum.key, 1, enginepb.SERIALIZABLE, tc.clock) txn.OrigTimestamp = datum.ts txn.Timestamp = datum.ts } if _, err := tc.SendWrappedWith(roachpb.Header{ Timestamp: datum.ts, Txn: txn, }, &pArgs); err != nil { t.Fatalf("%d: could not put data: %s", i, err) } } } cfg, ok := tc.gossip.GetSystemConfig() if !ok { t.Fatal("config not set") } // Process through a scan queue. gcQ := newGCQueue(tc.store, tc.gossip) if err := gcQ.process(context.Background(), tc.clock.Now(), tc.rng, cfg); err != nil { t.Fatal(err) } expKVs := []struct { key roachpb.Key ts hlc.Timestamp }{ {key1, ts5}, {key1, ts2}, {key2, ts5}, {key2, ts2m1}, {key3, hlc.ZeroTimestamp}, {key3, ts5}, {key3, ts2}, {key4, ts2}, {key6, hlc.ZeroTimestamp}, {key6, ts5}, {key6, ts1}, {key7, hlc.ZeroTimestamp}, {key7, ts4}, {key7, ts2}, {key8, ts2}, {key10, ts5}, {key10, ts4}, {key10, ts3}, {key11, ts5}, {key11, ts4}, {key11, ts3}, {key11, ts1}, } // Read data directly from engine to avoid intent errors from MVCC. kvs, err := engine.Scan(tc.store.Engine(), engine.MakeMVCCMetadataKey(key1), engine.MakeMVCCMetadataKey(keys.MaxKey), 0) if err != nil { t.Fatal(err) } for i, kv := range kvs { if log.V(1) { log.Infof(context.Background(), "%d: %s", i, kv.Key) } } if len(kvs) != len(expKVs) { t.Fatalf("expected length %d; got %d", len(expKVs), len(kvs)) } for i, kv := range kvs { if !kv.Key.Key.Equal(expKVs[i].key) { t.Errorf("%d: expected key %q; got %q", i, expKVs[i].key, kv.Key.Key) } if !kv.Key.Timestamp.Equal(expKVs[i].ts) { t.Errorf("%d: expected ts=%s; got %s", i, expKVs[i].ts, kv.Key.Timestamp) } if log.V(1) { log.Infof(context.Background(), "%d: %s", i, kv.Key) } } }
// RunGC runs garbage collection for the specified descriptor on the provided // Engine (which is not mutated). It uses the provided functions pushTxnFn and // resolveIntentsFn to clarify the true status of and clean up after encountered // transactions. It returns a slice of gc'able keys from the data, transaction, // and abort spans. func RunGC( ctx context.Context, desc *roachpb.RangeDescriptor, snap engine.Reader, now hlc.Timestamp, policy config.GCPolicy, pushTxnFn pushFunc, resolveIntentsFn resolveFunc, ) ([]roachpb.GCRequest_GCKey, GCInfo, error) { iter := NewReplicaDataIterator(desc, snap, true /* replicatedOnly */) defer iter.Close() var infoMu = lockableGCInfo{} infoMu.Policy = policy infoMu.Now = now { realResolveIntentsFn := resolveIntentsFn resolveIntentsFn = func(intents []roachpb.Intent, poison bool, wait bool) (err error) { defer func() { infoMu.Lock() infoMu.ResolveTotal += len(intents) if err == nil { infoMu.ResolveSuccess += len(intents) } infoMu.Unlock() }() return realResolveIntentsFn(intents, poison, wait) } realPushTxnFn := pushTxnFn pushTxnFn = func(ts hlc.Timestamp, txn *roachpb.Transaction, typ roachpb.PushTxnType) { infoMu.Lock() infoMu.PushTxn++ infoMu.Unlock() realPushTxnFn(ts, txn, typ) } } // Compute intent expiration (intent age at which we attempt to resolve). intentExp := now intentExp.WallTime -= intentAgeThreshold.Nanoseconds() txnExp := now txnExp.WallTime -= txnCleanupThreshold.Nanoseconds() abortSpanGCThreshold := now.Add(-int64(abortCacheAgeThreshold), 0) gc := engine.MakeGarbageCollector(now, policy) infoMu.Threshold = gc.Threshold infoMu.TxnSpanGCThreshold = txnExp var gcKeys []roachpb.GCRequest_GCKey var expBaseKey roachpb.Key var keys []engine.MVCCKey var vals [][]byte // Maps from txn ID to txn and intent key slice. txnMap := map[uuid.UUID]*roachpb.Transaction{} intentSpanMap := map[uuid.UUID][]roachpb.Span{} // processKeysAndValues is invoked with each key and its set of // values. Intents older than the intent age threshold are sent for // resolution and values after the MVCC metadata, and possible // intent, are sent for garbage collection. processKeysAndValues := func() { // If there's more than a single value for the key, possibly send for GC. if len(keys) > 1 { meta := &enginepb.MVCCMetadata{} if err := proto.Unmarshal(vals[0], meta); err != nil { log.Errorf(ctx, "unable to unmarshal MVCC metadata for key %q: %s", keys[0], err) } else { // In the event that there's an active intent, send for // intent resolution if older than the threshold. startIdx := 1 if meta.Txn != nil { // Keep track of intent to resolve if older than the intent // expiration threshold. if meta.Timestamp.Less(intentExp) { txnID := *meta.Txn.ID txn := &roachpb.Transaction{ TxnMeta: *meta.Txn, } txnMap[txnID] = txn infoMu.IntentsConsidered++ intentSpanMap[txnID] = append(intentSpanMap[txnID], roachpb.Span{Key: expBaseKey}) } // With an active intent, GC ignores MVCC metadata & intent value. startIdx = 2 } // See if any values may be GC'd. if gcTS := gc.Filter(keys[startIdx:], vals[startIdx:]); !gcTS.Equal(hlc.ZeroTimestamp) { // TODO(spencer): need to split the requests up into // multiple requests in the event that more than X keys // are added to the request. gcKeys = append(gcKeys, roachpb.GCRequest_GCKey{Key: expBaseKey, Timestamp: gcTS}) } } } } // Iterate through the keys and values of this replica's range. for ; iter.Valid(); iter.Next() { iterKey := iter.Key() if !iterKey.IsValue() || !iterKey.Key.Equal(expBaseKey) { // Moving to the next key (& values). processKeysAndValues() expBaseKey = iterKey.Key if !iterKey.IsValue() { keys = []engine.MVCCKey{iter.Key()} vals = [][]byte{iter.Value()} continue } // An implicit metadata. keys = []engine.MVCCKey{engine.MakeMVCCMetadataKey(iterKey.Key)} // A nil value for the encoded MVCCMetadata. This will unmarshal to an // empty MVCCMetadata which is sufficient for processKeysAndValues to // determine that there is no intent. vals = [][]byte{nil} } keys = append(keys, iter.Key()) vals = append(vals, iter.Value()) } if iter.Error() != nil { return nil, GCInfo{}, iter.Error() } // Handle last collected set of keys/vals. processKeysAndValues() infoMu.IntentTxns = len(txnMap) infoMu.NumKeysAffected = len(gcKeys) txnKeys, err := processTransactionTable(ctx, snap, desc, txnMap, txnExp, &infoMu, resolveIntentsFn) if err != nil { return nil, GCInfo{}, err } // From now on, all newly added keys are range-local. // TODO(tschottdorf): Might need to use two requests at some point since we // hard-coded the full non-local key range in the header, but that does // not take into account the range-local keys. It will be OK as long as // we send directly to the Replica, though. gcKeys = append(gcKeys, txnKeys...) // Process push transactions in parallel. var wg sync.WaitGroup sem := make(chan struct{}, gcTaskLimit) for _, txn := range txnMap { if txn.Status != roachpb.PENDING { continue } wg.Add(1) sem <- struct{}{} // Avoid passing loop variable into closure. txnCopy := txn go func() { defer func() { <-sem wg.Done() }() pushTxnFn(now, txnCopy, roachpb.PUSH_ABORT) }() } wg.Wait() // Resolve all intents. var intents []roachpb.Intent for txnID, txn := range txnMap { if txn.Status != roachpb.PENDING { for _, intent := range intentSpanMap[txnID] { intents = append(intents, roachpb.Intent{Span: intent, Status: txn.Status, Txn: txn.TxnMeta}) } } } if err := resolveIntentsFn(intents, true /* wait */, false /* !poison */); err != nil { return nil, GCInfo{}, err } // Clean up the abort cache. gcKeys = append(gcKeys, processAbortCache( ctx, snap, desc.RangeID, abortSpanGCThreshold, &infoMu, pushTxnFn)...) return gcKeys, infoMu.GCInfo, nil }
// TestTxnCoordSenderGCWithCancel verifies that the coordinator cleans up extant // transactions and intents after transaction context is cancelled. func TestTxnCoordSenderGCWithCancel(t *testing.T) { defer leaktest.AfterTest(t)() s, sender := createTestDB(t) defer s.Stop() // Set heartbeat interval to 1ms for testing. sender.heartbeatInterval = 1 * time.Millisecond ctx, cancel := context.WithCancel(context.Background()) txn := client.NewTxn(ctx, *s.DB) key := roachpb.Key("a") if pErr := txn.Put(key, []byte("value")); pErr != nil { t.Fatal(pErr) } // Now, advance clock past the default client timeout. // Locking the TxnCoordSender to prevent a data race. sender.Lock() s.Manual.Increment(defaultClientTimeout.Nanoseconds() + 1) sender.Unlock() txnID := *txn.Proto.ID // Verify that the transaction is alive despite the timeout having been // exceeded. errStillActive := errors.New("transaction is still active") // TODO(dan): Figure out how to run the heartbeat manually instead of this. if err := util.RetryForDuration(1*time.Second, func() error { // Locking the TxnCoordSender to prevent a data race. sender.Lock() _, ok := sender.txns[txnID] sender.Unlock() if !ok { return nil } meta := &enginepb.MVCCMetadata{} ok, _, _, err := s.Eng.GetProto(engine.MakeMVCCMetadataKey(key), meta) if err != nil { t.Fatalf("error getting MVCC metadata: %s", err) } if !ok || meta.Txn == nil { return nil } return errStillActive }); err != errStillActive { t.Fatalf("expected transaction to be active, got: %v", err) } // After the context is cancelled, the transaction should be cleaned up. cancel() util.SucceedsSoon(t, func() error { // Locking the TxnCoordSender to prevent a data race. sender.Lock() _, ok := sender.txns[txnID] sender.Unlock() if ok { return errors.Errorf("expected garbage collection") } return nil }) verifyCleanup(key, sender, s.Eng, t) }
// CopyInto copies all the results from this abort cache into the destRangeID // abort cache. Failures decoding individual cache entries return an error. // On success, returns the number of entries (key-value pairs) copied. func (sc *AbortCache) CopyInto( e engine.ReadWriter, ms *enginepb.MVCCStats, destRangeID roachpb.RangeID, ) (int, error) { return copySeqCache(e, ms, sc.rangeID, destRangeID, engine.MakeMVCCMetadataKey(sc.min()), engine.MakeMVCCMetadataKey(sc.max())) }