Example #1
0
func runDebugRaftLog(cmd *cobra.Command, args []string) error {
	stopper := stop.NewStopper()
	defer stopper.Stop()

	if len(args) != 2 {
		return errors.New("required arguments: dir range_id")
	}

	db, err := openStore(cmd, args[0], stopper)
	if err != nil {
		return err
	}

	rangeID, err := parseRangeID(args[1])
	if err != nil {
		return err
	}

	start := engine.MakeMVCCMetadataKey(keys.RaftLogPrefix(rangeID))
	end := engine.MakeMVCCMetadataKey(keys.RaftLogPrefix(rangeID).PrefixEnd())

	if err := db.Iterate(start, end, printRaftLogEntry); err != nil {
		return err
	}
	return nil
}
Example #2
0
func loadRangeDescriptor(
	db engine.Engine, rangeID roachpb.RangeID,
) (roachpb.RangeDescriptor, error) {
	var desc roachpb.RangeDescriptor
	handleKV := func(kv engine.MVCCKeyValue) (bool, error) {
		if kv.Key.Timestamp == hlc.ZeroTimestamp {
			// We only want values, not MVCCMetadata.
			return false, nil
		}
		if err := checkRangeDescriptorKey(kv.Key); err != nil {
			// Range descriptor keys are interleaved with others, so if it
			// doesn't parse as a range descriptor just skip it.
			return false, nil
		}
		if err := getProtoValue(kv.Value, &desc); err != nil {
			return false, err
		}
		return desc.RangeID == rangeID, nil
	}

	// Range descriptors are stored by key, so we have to scan over the
	// range-local data to find the one for this RangeID.
	start := engine.MakeMVCCMetadataKey(keys.LocalRangePrefix)
	end := engine.MakeMVCCMetadataKey(keys.LocalRangeMax)

	if err := db.Iterate(start, end, handleKV); err != nil {
		return roachpb.RangeDescriptor{}, err
	}
	if desc.RangeID == rangeID {
		return desc, nil
	}
	return roachpb.RangeDescriptor{}, fmt.Errorf("range descriptor %d not found", rangeID)
}
Example #3
0
// CopyFrom copies all the persisted results from the originRangeID
// sequence cache into this one. Note that the cache will not be
// locked while copying is in progress. Failures decoding individual
// entries return an error. The copy is done directly using the engine
// instead of interpreting values through MVCC for efficiency.
func (sc *SequenceCache) CopyFrom(e engine.Engine, originRangeID roachpb.RangeID) error {
	originMin := engine.MakeMVCCMetadataKey(
		keys.SequenceCacheKey(originRangeID, txnIDMin, math.MaxUint32, math.MaxUint32))
	originMax := engine.MakeMVCCMetadataKey(
		keys.SequenceCacheKey(originRangeID, txnIDMax, 0, 0))
	return copySeqCache(e, originRangeID, sc.rangeID, originMin, originMax)
}
Example #4
0
// CopyInto copies all the results from this abort cache into the destRangeID
// abort cache. Failures decoding individual cache entries return an error.
// On success, returns the number of entries (key-value pairs) copied.
func (sc *AbortCache) CopyInto(
	e engine.Engine,
	ms *engine.MVCCStats,
	destRangeID roachpb.RangeID,
) (int, error) {
	return copySeqCache(e, ms, sc.rangeID, destRangeID,
		engine.MakeMVCCMetadataKey(sc.min()), engine.MakeMVCCMetadataKey(sc.max()))
}
Example #5
0
// ClearData removes all persisted items stored in the cache.
func (sc *AbortCache) ClearData(e engine.Engine) error {
	b := e.NewBatch()
	defer b.Close()
	_, err := engine.ClearRange(b, engine.MakeMVCCMetadataKey(sc.min()), engine.MakeMVCCMetadataKey(sc.max()))
	if err != nil {
		return err
	}
	return b.Commit()
}
Example #6
0
// CopyFrom copies all the persisted results from the originRangeID
// abort cache into this one. Note that the cache will not be
// locked while copying is in progress. Failures decoding individual
// entries return an error. The copy is done directly using the engine
// instead of interpreting values through MVCC for efficiency.
// On success, returns the number of entries (key-value pairs) copied.
func (sc *AbortCache) CopyFrom(
	ctx context.Context,
	e engine.Engine,
	ms *engine.MVCCStats,
	originRangeID roachpb.RangeID,
) (int, error) {
	originMin := engine.MakeMVCCMetadataKey(keys.AbortCacheKey(originRangeID, txnIDMin))
	originMax := engine.MakeMVCCMetadataKey(keys.AbortCacheKey(originRangeID, txnIDMax))
	return copySeqCache(e, ms, originRangeID, sc.rangeID, originMin, originMax)
}
Example #7
0
func runDebugKeys(cmd *cobra.Command, args []string) error {
	stopper := stop.NewStopper()
	defer stopper.Stop()

	if len(args) != 1 {
		return errors.New("one argument is required")
	}

	db, err := openStore(cmd, args[0], stopper)
	if err != nil {
		return err
	}

	d := cliContext.debug

	from := engine.NilKey
	to := engine.MVCCKeyMax
	if d.raw {
		if len(d.startKey) > 0 {
			from = engine.MakeMVCCMetadataKey(roachpb.Key(d.startKey))
		}
		if len(d.endKey) > 0 {
			to = engine.MakeMVCCMetadataKey(roachpb.Key(d.endKey))
		}
	} else {
		if len(d.startKey) > 0 {
			startKey, err := keys.UglyPrint(d.startKey)
			if err != nil {
				return err
			}
			from = engine.MakeMVCCMetadataKey(startKey)
		}
		if len(d.endKey) > 0 {
			endKey, err := keys.UglyPrint(d.endKey)
			if err != nil {
				return err
			}
			to = engine.MakeMVCCMetadataKey(endKey)
		}
	}

	printer := printKey
	if d.values {
		printer = printKeyValue
	}

	if err := db.Iterate(from, to, printer); err != nil {
		return err
	}

	return nil
}
Example #8
0
func copySeqCache(e engine.Engine, srcID, dstID roachpb.RangeID, keyMin, keyMax engine.MVCCKey) error {
	var scratch [64]byte
	return e.Iterate(keyMin, keyMax,
		func(kv engine.MVCCKeyValue) (bool, error) {
			// Decode the key into a cmd, skipping on error. Otherwise,
			// write it to the corresponding key in the new cache.
			id, epoch, seq, err := decodeSequenceCacheMVCCKey(kv.Key, scratch[:0])
			if err != nil {
				return false, util.Errorf("could not decode a sequence cache key %s: %s",
					kv.Key, err)
			}
			key := keys.SequenceCacheKey(dstID, id, epoch, seq)
			encKey := engine.MakeMVCCMetadataKey(key)
			// Decode the value, update the checksum and re-encode.
			meta := &engine.MVCCMetadata{}
			if err := proto.Unmarshal(kv.Value, meta); err != nil {
				return false, util.Errorf("could not decode sequence cache value %s [% x]: %s",
					kv.Key, kv.Value, err)
			}
			value := meta.Value()
			value.ClearChecksum()
			value.InitChecksum(key)
			meta.RawBytes = value.RawBytes
			_, _, err = engine.PutProto(e, encKey, meta)
			return false, err
		})
}
Example #9
0
func runDebugRangeDescriptors(cmd *cobra.Command, args []string) error {
	stopper := stop.NewStopper()
	defer stopper.Stop()

	db, err := openStore(cmd, args, stopper)
	if err != nil {
		return err
	}

	start := engine.MakeMVCCMetadataKey(keys.LocalRangePrefix)
	end := engine.MakeMVCCMetadataKey(keys.LocalRangeMax)

	if err := db.Iterate(start, end, printRangeDescriptor); err != nil {
		return err
	}
	return nil
}
Example #10
0
func runDebugRangeDescriptors(cmd *cobra.Command, args []string) error {
	stopper := stop.NewStopper()
	defer stopper.Stop()

	if len(args) != 1 {
		return errors.New("one argument required: dir")
	}

	db, err := openStore(cmd, args[0], stopper)
	if err != nil {
		return err
	}

	start := engine.MakeMVCCMetadataKey(keys.LocalRangePrefix)
	end := engine.MakeMVCCMetadataKey(keys.LocalRangeMax)

	return db.Iterate(start, end, printRangeDescriptor)
}
Example #11
0
func (k *mvccKey) Set(value string) error {
	var typ keyType
	var keyStr string
	i := strings.IndexByte(value, ':')
	if i == -1 {
		keyStr = value
	} else {
		var err error
		typ, err = parseKeyType(value[:i])
		if err != nil {
			return err
		}
		keyStr = value[i+1:]
	}

	switch typ {
	case raw:
		*k = mvccKey(engine.MakeMVCCMetadataKey(roachpb.Key(keyStr)))
	case human:
		key, err := keys.UglyPrint(keyStr)
		if err != nil {
			return err
		}
		*k = mvccKey(engine.MakeMVCCMetadataKey(key))
	case rangeID:
		fromID, err := parseRangeID(keyStr)
		if err != nil {
			return err
		}
		*k = mvccKey(engine.MakeMVCCMetadataKey(keys.MakeRangeIDPrefix(fromID)))
	default:
		return fmt.Errorf("unknown key type %s", typ)
	}

	return nil
}
Example #12
0
func copySeqCache(
	e engine.Engine,
	ms *engine.MVCCStats,
	srcID, dstID roachpb.RangeID,
	keyMin, keyMax engine.MVCCKey,
) (int, error) {
	var scratch [64]byte
	var count int
	var meta engine.MVCCMetadata
	// TODO(spencer): look into making this an MVCCIteration and writing
	// the values using MVCC so we can avoid the ugliness of updating
	// the MVCCStats by hand below.
	err := e.Iterate(keyMin, keyMax,
		func(kv engine.MVCCKeyValue) (bool, error) {
			// Decode the key, skipping on error. Otherwise, write it to the
			// corresponding key in the new cache.
			txnID, err := decodeAbortCacheMVCCKey(kv.Key, scratch[:0])
			if err != nil {
				return false, util.Errorf("could not decode an abort cache key %s: %s", kv.Key, err)
			}
			key := keys.AbortCacheKey(dstID, txnID)
			encKey := engine.MakeMVCCMetadataKey(key)
			// Decode the MVCCMetadata value.
			if err := proto.Unmarshal(kv.Value, &meta); err != nil {
				return false, util.Errorf("could not decode mvcc metadata %s [% x]: %s", kv.Key, kv.Value, err)
			}
			value := meta.Value()
			value.ClearChecksum()
			value.InitChecksum(key)
			meta.RawBytes = value.RawBytes

			keyBytes, valBytes, err := engine.PutProto(e, encKey, &meta)
			if err != nil {
				return false, err
			}
			count++
			if ms != nil {
				ms.SysBytes += keyBytes + valBytes
				ms.SysCount++
			}
			return false, nil
		})
	return count, err
}
Example #13
0
func verifyCleanup(key roachpb.Key, coord *TxnCoordSender, eng engine.Engine, t *testing.T) {
	util.SucceedsWithin(t, 500*time.Millisecond, func() error {
		coord.Lock()
		l := len(coord.txns)
		coord.Unlock()
		if l != 0 {
			return fmt.Errorf("expected empty transactions map; got %d", l)
		}
		meta := &engine.MVCCMetadata{}
		ok, _, _, err := eng.GetProto(engine.MakeMVCCMetadataKey(key), meta)
		if err != nil {
			return fmt.Errorf("error getting MVCC metadata: %s", err)
		}
		if ok && meta.Txn != nil {
			return fmt.Errorf("found unexpected write intent: %s", meta)
		}
		return nil
	})
}
func makeReplicaKeyRanges(d *roachpb.RangeDescriptor) []keyRange {
	// The first range in the keyspace starts at KeyMin, which includes the
	// node-local space. We need the original StartKey to find the range
	// metadata, but the actual data starts at LocalMax.
	dataStartKey := d.StartKey.AsRawKey()
	if d.StartKey.Equal(roachpb.RKeyMin) {
		dataStartKey = keys.LocalMax
	}
	return []keyRange{
		{
			start: engine.MakeMVCCMetadataKey(keys.MakeRangeIDPrefix(d.RangeID)),
			end:   engine.MakeMVCCMetadataKey(keys.MakeRangeIDPrefix(d.RangeID + 1)),
		},
		{
			start: engine.MakeMVCCMetadataKey(keys.MakeRangeKeyPrefix(d.StartKey)),
			end:   engine.MakeMVCCMetadataKey(keys.MakeRangeKeyPrefix(d.EndKey)),
		},
		{
			start: engine.MakeMVCCMetadataKey(dataStartKey),
			end:   engine.MakeMVCCMetadataKey(d.EndKey.AsRawKey()),
		},
	}
}
Example #15
0
// TestGCQueueIntentResolution verifies intent resolution with many
// intents spanning just two transactions.
func TestGCQueueIntentResolution(t *testing.T) {
	defer leaktest.AfterTest(t)()
	tc := testContext{}
	tc.Start(t)
	defer tc.Stop()

	const now int64 = 48 * 60 * 60 * 1E9 // 2d past the epoch
	tc.manualClock.Set(now)

	txns := []*roachpb.Transaction{
		newTransaction("txn1", roachpb.Key("0-00000"), 1, enginepb.SERIALIZABLE, tc.clock),
		newTransaction("txn2", roachpb.Key("1-00000"), 1, enginepb.SERIALIZABLE, tc.clock),
	}
	intentResolveTS := makeTS(now-intentAgeThreshold.Nanoseconds(), 0)
	txns[0].OrigTimestamp = intentResolveTS
	txns[0].Timestamp = intentResolveTS
	txns[1].OrigTimestamp = intentResolveTS
	txns[1].Timestamp = intentResolveTS

	// Two transactions.
	for i := 0; i < 2; i++ {
		// 5 puts per transaction.
		// TODO(spencerkimball): benchmark with ~50k.
		for j := 0; j < 5; j++ {
			pArgs := putArgs(roachpb.Key(fmt.Sprintf("%d-%05d", i, j)), []byte("value"))
			if _, err := tc.SendWrappedWith(roachpb.Header{
				Txn: txns[i],
			}, &pArgs); err != nil {
				t.Fatalf("%d: could not put data: %s", i, err)
			}
			txns[i].Sequence++
		}
	}

	cfg, ok := tc.gossip.GetSystemConfig()
	if !ok {
		t.Fatal("config not set")
	}

	// Process through a scan queue.
	gcQ := newGCQueue(tc.gossip)
	if err := gcQ.process(tc.clock.Now(), tc.rng, cfg); err != nil {
		t.Fatal(err)
	}

	// Iterate through all values to ensure intents have been fully resolved.
	meta := &enginepb.MVCCMetadata{}
	err := tc.store.Engine().Iterate(engine.MakeMVCCMetadataKey(roachpb.KeyMin),
		engine.MakeMVCCMetadataKey(roachpb.KeyMax), func(kv engine.MVCCKeyValue) (bool, error) {
			if !kv.Key.IsValue() {
				if err := proto.Unmarshal(kv.Value, meta); err != nil {
					return false, err
				}
				if meta.Txn != nil {
					return false, util.Errorf("non-nil Txn after GC for key %s", kv.Key)
				}
			}
			return false, nil
		})
	if err != nil {
		t.Fatal(err)
	}
}
Example #16
0
// TestGCQueueProcess creates test data in the range over various time
// scales and verifies that scan queue process properly GCs test data.
func TestGCQueueProcess(t *testing.T) {
	defer leaktest.AfterTest(t)()
	tc := testContext{}
	tc.Start(t)
	defer tc.Stop()

	const now int64 = 48 * 60 * 60 * 1E9 // 2d past the epoch
	tc.manualClock.Set(now)

	ts1 := makeTS(now-2*24*60*60*1E9+1, 0)                     // 2d old (add one nanosecond so we're not using zero timestamp)
	ts2 := makeTS(now-25*60*60*1E9, 0)                         // GC will occur at time=25 hours
	ts2m1 := ts2.Prev()                                        // ts2 - 1 so we have something not right at the GC time
	ts3 := makeTS(now-intentAgeThreshold.Nanoseconds(), 0)     // 2h old
	ts4 := makeTS(now-(intentAgeThreshold.Nanoseconds()-1), 0) // 2h-1ns old
	ts5 := makeTS(now-1E9, 0)                                  // 1s old
	key1 := roachpb.Key("a")
	key2 := roachpb.Key("b")
	key3 := roachpb.Key("c")
	key4 := roachpb.Key("d")
	key5 := roachpb.Key("e")
	key6 := roachpb.Key("f")
	key7 := roachpb.Key("g")
	key8 := roachpb.Key("h")
	key9 := roachpb.Key("i")
	key10 := roachpb.Key("j")
	key11 := roachpb.Key("k")

	data := []struct {
		key roachpb.Key
		ts  hlc.Timestamp
		del bool
		txn bool
	}{
		// For key1, we expect first value to GC.
		{key1, ts1, false, false},
		{key1, ts2, false, false},
		{key1, ts5, false, false},
		// For key2, we expect values to GC, even though most recent is deletion.
		{key2, ts1, false, false},
		{key2, ts2m1, false, false}, // use a value < the GC time to verify it's kept
		{key2, ts5, true, false},
		// For key3, we expect just ts1 to GC, because most recent deletion is intent.
		{key3, ts1, false, false},
		{key3, ts2, false, false},
		{key3, ts5, true, true},
		// For key4, expect oldest value to GC.
		{key4, ts1, false, false},
		{key4, ts2, false, false},
		// For key5, expect all values to GC (most recent value deleted).
		{key5, ts1, false, false},
		{key5, ts2, true, false}, // deleted, so GC
		// For key6, expect no values to GC because most recent value is intent.
		{key6, ts1, false, false},
		{key6, ts5, false, true},
		// For key7, expect no values to GC because intent is exactly 2h old.
		{key7, ts2, false, false},
		{key7, ts4, false, true},
		// For key8, expect most recent value to resolve by aborting, which will clean it up.
		{key8, ts2, false, false},
		{key8, ts3, true, true},
		// For key9, resolve naked intent with no remaining values.
		{key9, ts3, false, true},
		// For key10, GC ts1 because it's a delete but not ts3 because it's above the threshold.
		{key10, ts1, true, false},
		{key10, ts3, true, false},
		{key10, ts4, false, false},
		{key10, ts5, false, false},
		// For key11, we can't GC anything because ts1 isn't a delete.
		{key11, ts1, false, false},
		{key11, ts3, true, false},
		{key11, ts4, true, false},
		{key11, ts5, true, false},
	}

	for i, datum := range data {
		if datum.del {
			dArgs := deleteArgs(datum.key)
			var txn *roachpb.Transaction
			if datum.txn {
				txn = newTransaction("test", datum.key, 1, enginepb.SERIALIZABLE, tc.clock)
				txn.OrigTimestamp = datum.ts
				txn.Timestamp = datum.ts
			}
			if _, err := tc.SendWrappedWith(roachpb.Header{
				Timestamp: datum.ts,
				Txn:       txn,
			}, &dArgs); err != nil {
				t.Fatalf("%d: could not delete data: %s", i, err)
			}
		} else {
			pArgs := putArgs(datum.key, []byte("value"))
			var txn *roachpb.Transaction
			if datum.txn {
				txn = newTransaction("test", datum.key, 1, enginepb.SERIALIZABLE, tc.clock)
				txn.OrigTimestamp = datum.ts
				txn.Timestamp = datum.ts
			}
			if _, err := tc.SendWrappedWith(roachpb.Header{
				Timestamp: datum.ts,
				Txn:       txn,
			}, &pArgs); err != nil {
				t.Fatalf("%d: could not put data: %s", i, err)
			}
		}
	}

	cfg, ok := tc.gossip.GetSystemConfig()
	if !ok {
		t.Fatal("config not set")
	}

	// Process through a scan queue.
	gcQ := newGCQueue(tc.gossip)
	if err := gcQ.process(tc.clock.Now(), tc.rng, cfg); err != nil {
		t.Fatal(err)
	}

	expKVs := []struct {
		key roachpb.Key
		ts  hlc.Timestamp
	}{
		{key1, ts5},
		{key1, ts2},
		{key2, ts5},
		{key2, ts2m1},
		{key3, hlc.ZeroTimestamp},
		{key3, ts5},
		{key3, ts2},
		{key4, ts2},
		{key6, hlc.ZeroTimestamp},
		{key6, ts5},
		{key6, ts1},
		{key7, hlc.ZeroTimestamp},
		{key7, ts4},
		{key7, ts2},
		{key8, ts2},
		{key10, ts5},
		{key10, ts4},
		{key10, ts3},
		{key11, ts5},
		{key11, ts4},
		{key11, ts3},
		{key11, ts1},
	}
	// Read data directly from engine to avoid intent errors from MVCC.
	kvs, err := engine.Scan(tc.store.Engine(), engine.MakeMVCCMetadataKey(key1),
		engine.MakeMVCCMetadataKey(keys.MaxKey), 0)
	if err != nil {
		t.Fatal(err)
	}
	for i, kv := range kvs {
		if log.V(1) {
			log.Infof("%d: %s", i, kv.Key)
		}
	}
	if len(kvs) != len(expKVs) {
		t.Fatalf("expected length %d; got %d", len(expKVs), len(kvs))
	}
	for i, kv := range kvs {
		if !kv.Key.Key.Equal(expKVs[i].key) {
			t.Errorf("%d: expected key %q; got %q", i, expKVs[i].key, kv.Key.Key)
		}
		if !kv.Key.Timestamp.Equal(expKVs[i].ts) {
			t.Errorf("%d: expected ts=%s; got %s", i, expKVs[i].ts, kv.Key.Timestamp)
		}
		if log.V(1) {
			log.Infof("%d: %s", i, kv.Key)
		}
	}

	// Verify that the last verification timestamp was updated as whole range was scanned.
	if _, err := tc.rng.getLastVerificationTimestamp(); err != nil {
		t.Fatal(err)
	}
}
// TestTxnCoordSenderGCWithCancel verifies that the coordinator cleans up extant
// transactions and intents after transaction context is cancelled.
func TestTxnCoordSenderGCWithCancel(t *testing.T) {
	defer leaktest.AfterTest(t)()
	s, sender := createTestDB(t)
	defer s.Stop()

	// Set heartbeat interval to 1ms for testing.
	sender.heartbeatInterval = 1 * time.Millisecond

	ctx, cancel := context.WithCancel(context.Background())
	txn := client.NewTxn(ctx, *s.DB)
	key := roachpb.Key("a")
	if pErr := txn.Put(key, []byte("value")); pErr != nil {
		t.Fatal(pErr)
	}

	// Now, advance clock past the default client timeout.
	// Locking the TxnCoordSender to prevent a data race.
	sender.Lock()
	s.Manual.Set(defaultClientTimeout.Nanoseconds() + 1)
	sender.Unlock()

	txnID := *txn.Proto.ID

	// Verify that the transaction is alive despite the timeout having been
	// exceeded.
	errStillActive := errors.New("transaction is still active")
	// TODO(dan): Figure out how to run the heartbeat manually instead of this.
	if err := util.RetryForDuration(1*time.Second, func() error {
		// Locking the TxnCoordSender to prevent a data race.
		sender.Lock()
		_, ok := sender.txns[txnID]
		sender.Unlock()
		if !ok {
			return nil
		}
		meta := &engine.MVCCMetadata{}
		ok, _, _, err := s.Eng.GetProto(engine.MakeMVCCMetadataKey(key), meta)
		if err != nil {
			t.Fatalf("error getting MVCC metadata: %s", err)
		}
		if !ok || meta.Txn == nil {
			return nil
		}
		return errStillActive
	}); err != errStillActive {
		t.Fatalf("expected transaction to be active, got: %v", err)
	}

	// After the context is cancelled, the transaction should be cleaned up.
	cancel()
	util.SucceedsSoon(t, func() error {
		// Locking the TxnCoordSender to prevent a data race.
		sender.Lock()
		_, ok := sender.txns[txnID]
		sender.Unlock()
		if ok {
			return util.Errorf("expected garbage collection")
		}
		return nil
	})

	verifyCleanup(key, sender, s.Eng, t)
}
Example #18
0
// ClearData removes all persisted items stored in the cache.
func (sc *AbortCache) ClearData(e engine.Engine) error {
	_, err := engine.ClearRange(e, engine.MakeMVCCMetadataKey(sc.min()), engine.MakeMVCCMetadataKey(sc.max()))
	return err
}
Example #19
0
// RunGC runs garbage collection for the specified descriptor on the provided
// Engine (which is not mutated). It uses the provided functions pushTxn and
// resolveIntents to clarify the true status of and clean up after encountered
// transactions. It returns a slice of gc'able keys from the data, transaction,
// and abort spans.
func RunGC(ctx context.Context, desc *roachpb.RangeDescriptor, snap engine.Engine, now roachpb.Timestamp, policy config.GCPolicy,
	pushTxn pushFunc, resolveIntents resolveFunc) ([]roachpb.GCRequest_GCKey, GCInfo, error) {

	iter := newReplicaDataIterator(desc, snap, true /* replicatedOnly */)
	defer iter.Close()

	var infoMu = lockableGCInfo{}
	infoMu.Policy = policy
	infoMu.Now = now

	{
		realResolveIntents := resolveIntents
		resolveIntents = func(intents []roachpb.Intent, poison bool, wait bool) (err error) {
			defer func() {
				infoMu.Lock()
				infoMu.ResolveTotal += len(intents)
				if err == nil {
					infoMu.ResolveSuccess += len(intents)
				}
				infoMu.Unlock()
			}()
			return realResolveIntents(intents, poison, wait)
		}
		realPushTxn := pushTxn
		pushTxn = func(ts roachpb.Timestamp, txn *roachpb.Transaction, typ roachpb.PushTxnType) {
			infoMu.Lock()
			infoMu.PushTxn++
			infoMu.Unlock()
			realPushTxn(ts, txn, typ)
		}
	}

	gc := engine.MakeGarbageCollector(now, policy)

	var gcKeys []roachpb.GCRequest_GCKey

	// Compute intent expiration (intent age at which we attempt to resolve).
	intentExp := now
	intentExp.WallTime -= intentAgeThreshold.Nanoseconds()
	txnExp := now
	txnExp.WallTime -= txnCleanupThreshold.Nanoseconds()

	var expBaseKey roachpb.Key
	var keys []engine.MVCCKey
	var vals [][]byte

	// Maps from txn ID to txn and intent key slice.
	txnMap := map[uuid.UUID]*roachpb.Transaction{}
	intentSpanMap := map[uuid.UUID][]roachpb.Span{}

	// processKeysAndValues is invoked with each key and its set of
	// values. Intents older than the intent age threshold are sent for
	// resolution and values after the MVCC metadata, and possible
	// intent, are sent for garbage collection.
	processKeysAndValues := func() {
		// If there's more than a single value for the key, possibly send for GC.
		if len(keys) > 1 {
			meta := &engine.MVCCMetadata{}
			if err := proto.Unmarshal(vals[0], meta); err != nil {
				log.Errorf("unable to unmarshal MVCC metadata for key %q: %s", keys[0], err)
			} else {
				// In the event that there's an active intent, send for
				// intent resolution if older than the threshold.
				startIdx := 1
				if meta.Txn != nil {
					// Keep track of intent to resolve if older than the intent
					// expiration threshold.
					if meta.Timestamp.Less(intentExp) {
						txnID := *meta.Txn.ID
						txn := &roachpb.Transaction{
							TxnMeta: *meta.Txn,
						}
						txnMap[txnID] = txn
						infoMu.IntentsConsidered++
						intentSpanMap[txnID] = append(intentSpanMap[txnID], roachpb.Span{Key: expBaseKey})
					}
					// With an active intent, GC ignores MVCC metadata & intent value.
					startIdx = 2
				}
				// See if any values may be GC'd.
				if gcTS := gc.Filter(keys[startIdx:], vals[startIdx:]); !gcTS.Equal(roachpb.ZeroTimestamp) {
					// TODO(spencer): need to split the requests up into
					// multiple requests in the event that more than X keys
					// are added to the request.
					gcKeys = append(gcKeys, roachpb.GCRequest_GCKey{Key: expBaseKey, Timestamp: gcTS})
				}
			}
		}
	}

	// Iterate through the keys and values of this replica's range.
	for ; iter.Valid(); iter.Next() {
		iterKey := iter.Key()
		if !iterKey.IsValue() || !iterKey.Key.Equal(expBaseKey) {
			// Moving to the next key (& values).
			processKeysAndValues()
			expBaseKey = iterKey.Key
			if !iterKey.IsValue() {
				keys = []engine.MVCCKey{iter.Key()}
				vals = [][]byte{iter.Value()}
				continue
			}
			// An implicit metadata.
			keys = []engine.MVCCKey{engine.MakeMVCCMetadataKey(iterKey.Key)}
			// A nil value for the encoded MVCCMetadata. This will unmarshal to an
			// empty MVCCMetadata which is sufficient for processKeysAndValues to
			// determine that there is no intent.
			vals = [][]byte{nil}
		}
		keys = append(keys, iter.Key())
		vals = append(vals, iter.Value())
	}
	if iter.Error() != nil {
		return nil, GCInfo{}, iter.Error()
	}
	// Handle last collected set of keys/vals.
	processKeysAndValues()

	infoMu.IntentTxns = len(txnMap)
	infoMu.GCKeys = len(gcKeys)

	txnKeys, err := processTransactionTable(ctx, snap, desc, txnMap, txnExp, &infoMu, resolveIntents)
	if err != nil {
		return nil, GCInfo{}, err
	}

	// From now on, all newly added keys are range-local.
	// TODO(tschottdorf): Might need to use two requests at some point since we
	// hard-coded the full non-local key range in the header, but that does
	// not take into account the range-local keys. It will be OK as long as
	// we send directly to the Replica, though.
	gcKeys = append(gcKeys, txnKeys...)

	// Process push transactions in parallel.
	var wg sync.WaitGroup
	sem := make(chan struct{}, gcTaskLimit)
	for _, txn := range txnMap {
		if txn.Status != roachpb.PENDING {
			continue
		}
		wg.Add(1)
		sem <- struct{}{}
		// Avoid passing loop variable into closure.
		txnCopy := txn
		go func() {
			defer func() {
				<-sem
				wg.Done()
			}()
			pushTxn(now, txnCopy, roachpb.PUSH_ABORT)
		}()
	}
	wg.Wait()

	// Resolve all intents.
	var intents []roachpb.Intent
	for txnID, txn := range txnMap {
		if txn.Status != roachpb.PENDING {
			for _, intent := range intentSpanMap[txnID] {
				intents = append(intents, roachpb.Intent{Span: intent, Status: txn.Status, Txn: txn.TxnMeta})
			}
		}
	}

	if err := resolveIntents(intents, true /* wait */, false /* !poison */); err != nil {
		return nil, GCInfo{}, err
	}

	// Clean up the abort cache.
	gcKeys = append(gcKeys, processAbortCache(ctx, snap, desc.RangeID, now,
		abortCacheAgeThreshold, &infoMu, pushTxn)...)
	return gcKeys, infoMu.GCInfo, nil
}
Example #20
0
// process iterates through all keys in a replica's range, calling the garbage
// collector for each key and associated set of values. GC'd keys are batched
// into GC calls. Extant intents are resolved if intents are older than
// intentAgeThreshold. The transaction and sequence cache records are also
// scanned and old entries evicted. During normal operation, both of these
// records are cleaned up when their respective transaction finishes, so the
// amount of work done here is expected to be small.
//
// Some care needs to be taken to avoid cyclic recreation of entries during GC:
// * a Push initiated due to an intent may recreate a transaction entry
// * resolving an intent may write a new sequence cache entry
// * obtaining the transaction for a sequence cache entry requires a Push
//
// The following order is taken below:
// 1) collect all intents with sufficiently old txn record
// 2) collect these intents' transactions
// 3) scan the transaction table, collecting abandoned or completed txns
// 4) push all of these transactions (possibly recreating entries)
// 5) resolve all intents (unless the txn is still PENDING), which will recreate
//    sequence cache entries (but with the txn timestamp; i.e. likely gc'able)
// 6) scan the sequence table for old entries
// 7) push these transactions (again, recreating txn entries).
// 8) send a GCRequest.
func (gcq *gcQueue) process(now roachpb.Timestamp, repl *Replica,
	sysCfg config.SystemConfig) error {

	snap := repl.store.Engine().NewSnapshot()
	desc := repl.Desc()
	iter := newReplicaDataIterator(desc, snap, true /* replicatedOnly */)
	defer iter.Close()
	defer snap.Close()

	// Lookup the GC policy for the zone containing this key range.
	zone, err := sysCfg.GetZoneConfigForKey(desc.StartKey)
	if err != nil {
		return util.Errorf("could not find zone config for range %s: %s", repl, err)
	}

	gc := engine.NewGarbageCollector(now, zone.GC)

	// Compute intent expiration (intent age at which we attempt to resolve).
	intentExp := now
	intentExp.WallTime -= intentAgeThreshold.Nanoseconds()
	txnExp := now
	txnExp.WallTime -= txnCleanupThreshold.Nanoseconds()

	gcArgs := &roachpb.GCRequest{}
	// TODO(tschottdorf): This is one of these instances in which we want
	// to be more careful that the request ends up on the correct Replica,
	// and we might have to worry about mixing range-local and global keys
	// in a batch which might end up spanning Ranges by the time it executes.
	gcArgs.Key = desc.StartKey.AsRawKey()
	gcArgs.EndKey = desc.EndKey.AsRawKey()

	var expBaseKey roachpb.Key
	var keys []engine.MVCCKey
	var vals [][]byte

	// Maps from txn ID to txn and intent key slice.
	txnMap := map[uuid.UUID]*roachpb.Transaction{}
	intentSpanMap := map[uuid.UUID][]roachpb.Span{}

	// processKeysAndValues is invoked with each key and its set of
	// values. Intents older than the intent age threshold are sent for
	// resolution and values after the MVCC metadata, and possible
	// intent, are sent for garbage collection.
	var intentCount int
	processKeysAndValues := func() {
		// If there's more than a single value for the key, possibly send for GC.
		if len(keys) > 1 {
			meta := &engine.MVCCMetadata{}
			if err := proto.Unmarshal(vals[0], meta); err != nil {
				log.Errorf("unable to unmarshal MVCC metadata for key %q: %s", keys[0], err)
			} else {
				// In the event that there's an active intent, send for
				// intent resolution if older than the threshold.
				startIdx := 1
				if meta.Txn != nil {
					// Keep track of intent to resolve if older than the intent
					// expiration threshold.
					if meta.Timestamp.Less(intentExp) {
						txnID := *meta.Txn.ID
						txn := &roachpb.Transaction{
							TxnMeta: *meta.Txn,
						}
						txnMap[txnID] = txn
						intentCount++
						intentSpanMap[txnID] = append(intentSpanMap[txnID], roachpb.Span{Key: expBaseKey})
					}
					// With an active intent, GC ignores MVCC metadata & intent value.
					startIdx = 2
				}
				// See if any values may be GC'd.
				if gcTS := gc.Filter(keys[startIdx:], vals[startIdx:]); !gcTS.Equal(roachpb.ZeroTimestamp) {
					// TODO(spencer): need to split the requests up into
					// multiple requests in the event that more than X keys
					// are added to the request.
					gcArgs.Keys = append(gcArgs.Keys, roachpb.GCRequest_GCKey{Key: expBaseKey, Timestamp: gcTS})
				}
			}
		}
	}

	// Iterate through the keys and values of this replica's range.
	for ; iter.Valid(); iter.Next() {
		iterKey := iter.Key()
		if !iterKey.IsValue() || !iterKey.Key.Equal(expBaseKey) {
			// Moving to the next key (& values).
			processKeysAndValues()
			expBaseKey = iterKey.Key
			if !iterKey.IsValue() {
				keys = []engine.MVCCKey{iter.Key()}
				vals = [][]byte{iter.Value()}
				continue
			}
			// An implicit metadata.
			keys = []engine.MVCCKey{engine.MakeMVCCMetadataKey(iterKey.Key)}
			// A nil value for the encoded MVCCMetadata. This will unmarshal to an
			// empty MVCCMetadata which is sufficient for processKeysAndValues to
			// determine that there is no intent.
			vals = [][]byte{nil}
		}
		keys = append(keys, iter.Key())
		vals = append(vals, iter.Value())
	}
	if iter.Error() != nil {
		return iter.Error()
	}
	// Handle last collected set of keys/vals.
	processKeysAndValues()
	gcq.eventLog.Infof(true, "assembled %d transactions from %d old intents; found %d gc'able keys", len(txnMap), intentCount, len(gcArgs.Keys))

	txnKeys, err := gcq.processTransactionTable(repl, txnMap, txnExp)
	if err != nil {
		return err
	}

	// From now on, all newly added keys are range-local.
	// TODO(tschottdorf): Might need to use two requests at some point since we
	// hard-coded the full non-local key range in the header, but that does
	// not take into account the range-local keys. It will be OK as long as
	// we send directly to the Replica, though.
	gcArgs.Keys = append(gcArgs.Keys, txnKeys...)

	// Process push transactions in parallel.
	var wg sync.WaitGroup
	gcq.eventLog.Infof(true, "pushing %d txns", len(txnMap))
	for _, txn := range txnMap {
		if txn.Status != roachpb.PENDING {
			continue
		}
		wg.Add(1)
		go gcq.pushTxn(repl, now, txn, roachpb.PUSH_ABORT, &wg)
	}
	wg.Wait()

	// Resolve all intents.
	var intents []roachpb.Intent
	for txnID, txn := range txnMap {
		if txn.Status != roachpb.PENDING {
			for _, intent := range intentSpanMap[txnID] {
				intents = append(intents, roachpb.Intent{Span: intent, Status: txn.Status, Txn: txn.TxnMeta})
			}
		}
	}
	gcq.eventLog.Infof(true, "resolving %d intents", len(intents))

	if pErr := repl.store.intentResolver.resolveIntents(repl.context(), repl, intents,
		true /* wait */, false /* !poison */); pErr != nil {
		return pErr.GoError()
	}

	// Deal with any leftover sequence cache keys. There shouldn't be many of
	// them.
	leftoverSeqCacheKeys := gcq.processSequenceCache(repl, now, txnExp, txnMap)
	gcq.eventLog.Infof(true, "collected %d leftover sequence cache keys", len(leftoverSeqCacheKeys))
	gcArgs.Keys = append(gcArgs.Keys, leftoverSeqCacheKeys...)
	gcq.eventLog.Infof(true, "sending gc request for %d keys", len(gcArgs.Keys))

	var ba roachpb.BatchRequest
	// Technically not needed since we're talking directly to the Range.
	ba.RangeID = desc.RangeID
	ba.Timestamp = now
	ba.Add(gcArgs)
	if _, pErr := repl.Send(repl.context(), ba); pErr != nil {
		return pErr.GoError()
	}

	return nil
}
Example #21
0
// ClearData removes all persisted items stored in the cache.
func (sc *SequenceCache) ClearData(e engine.Engine) error {
	_, err := engine.ClearRange(e, engine.MakeMVCCMetadataKey(sc.min), engine.MakeMVCCMetadataKey(sc.max))
	return err
}
Example #22
0
// CopyInto copies all the results from this sequence cache into the destRangeID
// sequence cache. Failures decoding individual cache entries return an error.
func (sc *SequenceCache) CopyInto(e engine.Engine, destRangeID roachpb.RangeID) error {
	return copySeqCache(e, sc.rangeID, destRangeID,
		engine.MakeMVCCMetadataKey(sc.min), engine.MakeMVCCMetadataKey(sc.max))
}