Beispiel #1
0
// process iterates through all keys in a replica's range, calling the garbage
// collector for each key and associated set of values. GC'd keys are batched
// into GC calls. Extant intents are resolved if intents are older than
// intentAgeThreshold. The transaction and abort cache records are also
// scanned and old entries evicted. During normal operation, both of these
// records are cleaned up when their respective transaction finishes, so the
// amount of work done here is expected to be small.
//
// Some care needs to be taken to avoid cyclic recreation of entries during GC:
// * a Push initiated due to an intent may recreate a transaction entry
// * resolving an intent may write a new abort cache entry
// * obtaining the transaction for a abort cache entry requires a Push
//
// The following order is taken below:
// 1) collect all intents with sufficiently old txn record
// 2) collect these intents' transactions
// 3) scan the transaction table, collecting abandoned or completed txns
// 4) push all of these transactions (possibly recreating entries)
// 5) resolve all intents (unless the txn is still PENDING), which will recreate
//    abort cache entries (but with the txn timestamp; i.e. likely gc'able)
// 6) scan the abort cache table for old entries
// 7) push these transactions (again, recreating txn entries).
// 8) send a GCRequest.
func (gcq *gcQueue) process(
	ctx context.Context, now hlc.Timestamp, repl *Replica, sysCfg config.SystemConfig,
) error {
	snap := repl.store.Engine().NewSnapshot()
	desc := repl.Desc()
	defer snap.Close()

	// Lookup the GC policy for the zone containing this key range.
	zone, err := sysCfg.GetZoneConfigForKey(desc.StartKey)
	if err != nil {
		return errors.Errorf("could not find zone config for range %s: %s", repl, err)
	}

	gcKeys, info, err := RunGC(ctx, desc, snap, now, zone.GC,
		func(now hlc.Timestamp, txn *roachpb.Transaction, typ roachpb.PushTxnType) {
			pushTxn(ctx, gcq.store.DB(), now, txn, typ)
		},
		func(intents []roachpb.Intent, poison bool, wait bool) error {
			return repl.store.intentResolver.resolveIntents(ctx, intents, poison, wait)
		})

	if err != nil {
		return err
	}

	log.VEventf(ctx, 1, "completed with stats %+v", info)

	info.updateMetrics(gcq.store.metrics)

	var ba roachpb.BatchRequest
	var gcArgs roachpb.GCRequest
	// TODO(tschottdorf): This is one of these instances in which we want
	// to be more careful that the request ends up on the correct Replica,
	// and we might have to worry about mixing range-local and global keys
	// in a batch which might end up spanning Ranges by the time it executes.
	gcArgs.Key = desc.StartKey.AsRawKey()
	gcArgs.EndKey = desc.EndKey.AsRawKey()
	gcArgs.Keys = gcKeys
	gcArgs.Threshold = info.Threshold
	gcArgs.TxnSpanGCThreshold = info.TxnSpanGCThreshold

	// Technically not needed since we're talking directly to the Range.
	ba.RangeID = desc.RangeID
	ba.Timestamp = now
	ba.Add(&gcArgs)
	if _, pErr := repl.Send(ctx, ba); pErr != nil {
		log.ErrEvent(ctx, pErr.String())
		return pErr.GoError()
	}
	return nil
}
Beispiel #2
0
// processIntentsAsync asynchronously processes intents which were
// encountered during another command but did not interfere with the
// execution of that command. This occurs in two cases: inconsistent
// reads and EndTransaction (which queues its own external intents for
// processing via this method). The two cases are handled somewhat
// differently and would be better served by different entry points,
// but combining them simplifies the plumbing necessary in Replica.
func (ir *intentResolver) processIntentsAsync(r *Replica, intents []intentsWithArg) {
	now := r.store.Clock().Now()
	ctx := context.TODO()
	stopper := r.store.Stopper()

	for _, item := range intents {
		if item.args.Method() != roachpb.EndTransaction {
			if err := stopper.RunLimitedAsyncTask(
				ctx, ir.sem, true /* wait */, func(ctx context.Context) {
					// Everything here is best effort; give up rather than waiting
					// too long (helps avoid deadlocks during test shutdown,
					// although this is imperfect due to the use of an
					// uninterruptible WaitGroup.Wait in beginCmds).
					ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout)
					defer cancel()
					h := roachpb.Header{Timestamp: now}
					resolveIntents, pushErr := ir.maybePushTransactions(ctxWithTimeout,
						item.intents, h, roachpb.PUSH_TOUCH, true /* skipInFlight */)

					// resolveIntents with poison=true because we're resolving
					// intents outside of the context of an EndTransaction.
					//
					// Naively, it doesn't seem like we need to poison the abort
					// cache since we're pushing with PUSH_TOUCH - meaning that
					// the primary way our Push leads to aborting intents is that
					// of the transaction having timed out (and thus presumably no
					// client being around any more, though at the time of writing
					// we don't guarantee that). But there are other paths in which
					// the Push comes back successful while the coordinating client
					// may still be active. Examples of this are when:
					//
					// - the transaction was aborted by someone else, but the
					//   coordinating client may still be running.
					// - the transaction entry wasn't written yet, which at the
					//   time of writing has our push abort it, leading to the
					//   same situation as above.
					//
					// Thus, we must poison.
					if err := ir.resolveIntents(ctxWithTimeout, resolveIntents,
						true /* wait */, true /* poison */); err != nil {
						log.Warningf(ctx, "%s: failed to resolve intents: %s", r, err)
						return
					}
					if pushErr != nil {
						log.Warningf(ctx, "%s: failed to push during intent resolution: %s", r, pushErr)
						return
					}
				}); err != nil {
				log.Warningf(ctx, "failed to resolve intents: %s", err)
				return
			}
		} else { // EndTransaction
			if err := stopper.RunLimitedAsyncTask(
				ctx, ir.sem, true /* wait */, func(ctx context.Context) {
					ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout)
					defer cancel()

					// For EndTransaction, we know the transaction is finalized so
					// we can skip the push and go straight to the resolve.
					//
					// This mechanism assumes that when an EndTransaction fails,
					// the client makes no assumptions about the result. For
					// example, an attempt to explicitly rollback the transaction
					// may succeed (triggering this code path), but the result may
					// not make it back to the client.
					if err := ir.resolveIntents(ctxWithTimeout, item.intents,
						true /* wait */, false /* !poison */); err != nil {
						log.Warningf(ctx, "%s: failed to resolve intents: %s", r, err)
						return
					}

					// We successfully resolved the intents, so we're able to GC from
					// the txn span directly.
					b := &client.Batch{}
					txn := item.intents[0].Txn
					txnKey := keys.TransactionKey(txn.Key, *txn.ID)

					// This is pretty tricky. Transaction keys are range-local and
					// so they are encoded specially. The key range addressed by
					// (txnKey, txnKey.Next()) might be empty (since Next() does
					// not imply monotonicity on the address side). Instead, we
					// send this request to a range determined using the resolved
					// transaction anchor, i.e. if the txn is anchored on
					// /Local/RangeDescriptor/"a"/uuid, the key range below would
					// be ["a", "a\x00"). However, the first range is special again
					// because the above procedure results in KeyMin, but we need
					// at least KeyLocalMax.
					//
					// #7880 will address this by making GCRequest less special and
					// thus obviating the need to cook up an artificial range here.
					var gcArgs roachpb.GCRequest
					{
						key := keys.MustAddr(txn.Key)
						if localMax := keys.MustAddr(keys.LocalMax); key.Less(localMax) {
							key = localMax
						}
						endKey := key.Next()

						gcArgs.Span = roachpb.Span{
							Key:    key.AsRawKey(),
							EndKey: endKey.AsRawKey(),
						}
					}

					gcArgs.Keys = append(gcArgs.Keys, roachpb.GCRequest_GCKey{
						Key: txnKey,
					})
					b.AddRawRequest(&gcArgs)
					if err := ir.store.db.Run(ctx, b); err != nil {
						log.Warningf(ctx, "could not GC completed transaction anchored at %s: %s",
							roachpb.Key(txn.Key), err)
						return
					}
				}); err != nil {
				log.Warningf(ctx, "failed to resolve intents: %s", err)
				return
			}
		}
	}
}