// process iterates through all keys in a replica's range, calling the garbage // collector for each key and associated set of values. GC'd keys are batched // into GC calls. Extant intents are resolved if intents are older than // intentAgeThreshold. The transaction and abort cache records are also // scanned and old entries evicted. During normal operation, both of these // records are cleaned up when their respective transaction finishes, so the // amount of work done here is expected to be small. // // Some care needs to be taken to avoid cyclic recreation of entries during GC: // * a Push initiated due to an intent may recreate a transaction entry // * resolving an intent may write a new abort cache entry // * obtaining the transaction for a abort cache entry requires a Push // // The following order is taken below: // 1) collect all intents with sufficiently old txn record // 2) collect these intents' transactions // 3) scan the transaction table, collecting abandoned or completed txns // 4) push all of these transactions (possibly recreating entries) // 5) resolve all intents (unless the txn is still PENDING), which will recreate // abort cache entries (but with the txn timestamp; i.e. likely gc'able) // 6) scan the abort cache table for old entries // 7) push these transactions (again, recreating txn entries). // 8) send a GCRequest. func (gcq *gcQueue) process( ctx context.Context, now hlc.Timestamp, repl *Replica, sysCfg config.SystemConfig, ) error { snap := repl.store.Engine().NewSnapshot() desc := repl.Desc() defer snap.Close() // Lookup the GC policy for the zone containing this key range. zone, err := sysCfg.GetZoneConfigForKey(desc.StartKey) if err != nil { return errors.Errorf("could not find zone config for range %s: %s", repl, err) } gcKeys, info, err := RunGC(ctx, desc, snap, now, zone.GC, func(now hlc.Timestamp, txn *roachpb.Transaction, typ roachpb.PushTxnType) { pushTxn(gcq.store.DB(), now, txn, typ) }, func(intents []roachpb.Intent, poison bool, wait bool) error { return repl.store.intentResolver.resolveIntents(ctx, intents, poison, wait) }) if err != nil { return err } gcq.eventLog.VInfof(true, "completed with stats %+v", info) var ba roachpb.BatchRequest var gcArgs roachpb.GCRequest // TODO(tschottdorf): This is one of these instances in which we want // to be more careful that the request ends up on the correct Replica, // and we might have to worry about mixing range-local and global keys // in a batch which might end up spanning Ranges by the time it executes. gcArgs.Key = desc.StartKey.AsRawKey() gcArgs.EndKey = desc.EndKey.AsRawKey() gcArgs.Keys = gcKeys gcArgs.Threshold = info.Threshold // Technically not needed since we're talking directly to the Range. ba.RangeID = desc.RangeID ba.Timestamp = now ba.Add(&gcArgs) if _, pErr := repl.Send(ctx, ba); pErr != nil { return pErr.GoError() } return nil }
// processIntentsAsync asynchronously processes intents which were // encountered during another command but did not interfere with the // execution of that command. This occurs in two cases: inconsistent // reads and EndTransaction (which queues its own external intents for // processing via this method). The two cases are handled somewhat // differently and would be better served by different entry points, // but combining them simplifies the plumbing necessary in Replica. func (ir *intentResolver) processIntentsAsync(r *Replica, intents []intentsWithArg) { if len(intents) == 0 { return } now := r.store.Clock().Now() ctx := r.context(context.TODO()) stopper := r.store.Stopper() for _, item := range intents { if item.args.Method() != roachpb.EndTransaction { stopper.RunLimitedAsyncTask(ir.sem, func() { // Everything here is best effort; give up rather than waiting // too long (helps avoid deadlocks during test shutdown, // although this is imperfect due to the use of an // uninterruptible WaitGroup.Wait in beginCmds). ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() h := roachpb.Header{Timestamp: now} resolveIntents, pushErr := ir.maybePushTransactions(ctxWithTimeout, item.intents, h, roachpb.PUSH_TOUCH, true /* skipInFlight */) // resolveIntents with poison=true because we're resolving // intents outside of the context of an EndTransaction. // // Naively, it doesn't seem like we need to poison the abort // cache since we're pushing with PUSH_TOUCH - meaning that // the primary way our Push leads to aborting intents is that // of the transaction having timed out (and thus presumably no // client being around any more, though at the time of writing // we don't guarantee that). But there's another path in which // the Push comes back successful, namely that of the // transaction already having been aborted by someone else, in // which case the client may still be running. Thus, we must // poison. if err := ir.resolveIntents(ctxWithTimeout, r, resolveIntents, true /* wait */, true /* poison */); err != nil { log.Warningc(ctxWithTimeout, "failed to resolve intents: %s", err) return } if pushErr != nil { log.Warningc(ctxWithTimeout, "failed to push during intent resolution: %s", pushErr) return } }) } else { // EndTransaction stopper.RunLimitedAsyncTask(ir.sem, func() { ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() // For EndTransaction, we know the transaction is finalized so // we can skip the push and go straight to the resolve. // // This mechanism assumes that when an EndTransaction fails, // the client makes no assumptions about the result. For // example, an attempt to explicitly rollback the transaction // may succeed (triggering this code path), but the result may // not make it back to the client. if err := ir.resolveIntents(ctxWithTimeout, r, item.intents, true /* wait */, false /* !poison */); err != nil { log.Warningc(ctxWithTimeout, "failed to resolve intents: %s", err) return } // We successfully resolved the intents, so we're able to GC from // the txn span directly. var ba roachpb.BatchRequest ba.Timestamp = now txn := item.intents[0].Txn gcArgs := roachpb.GCRequest{ Span: roachpb.Span{ Key: r.Desc().StartKey.AsRawKey(), EndKey: r.Desc().EndKey.AsRawKey(), }, } gcArgs.Keys = append(gcArgs.Keys, roachpb.GCRequest_GCKey{ Key: keys.TransactionKey(txn.Key, txn.ID), }) ba.Add(&gcArgs) if _, pErr := r.addWriteCmd(ctxWithTimeout, ba, nil /* nil */); pErr != nil { log.Warningf("could not GC completed transaction: %s", pErr) } }) } } }
// processIntentsAsync asynchronously processes intents which were // encountered during another command but did not interfere with the // execution of that command. This occurs in two cases: inconsistent // reads and EndTransaction (which queues its own external intents for // processing via this method). The two cases are handled somewhat // differently and would be better served by different entry points, // but combining them simplifies the plumbing necessary in Replica. func (ir *intentResolver) processIntentsAsync(r *Replica, intents []intentsWithArg) { if len(intents) == 0 { return } now := r.store.Clock().Now() ctx := context.TODO() stopper := r.store.Stopper() for _, item := range intents { if item.args.Method() != roachpb.EndTransaction { if err := stopper.RunLimitedAsyncTask(ir.sem, func() { // Everything here is best effort; give up rather than waiting // too long (helps avoid deadlocks during test shutdown, // although this is imperfect due to the use of an // uninterruptible WaitGroup.Wait in beginCmds). ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() h := roachpb.Header{Timestamp: now} resolveIntents, pushErr := ir.maybePushTransactions(ctxWithTimeout, item.intents, h, roachpb.PUSH_TOUCH, true /* skipInFlight */) // resolveIntents with poison=true because we're resolving // intents outside of the context of an EndTransaction. // // Naively, it doesn't seem like we need to poison the abort // cache since we're pushing with PUSH_TOUCH - meaning that // the primary way our Push leads to aborting intents is that // of the transaction having timed out (and thus presumably no // client being around any more, though at the time of writing // we don't guarantee that). But there's another path in which // the Push comes back successful, namely that of the // transaction already having been aborted by someone else, in // which case the client may still be running. Thus, we must // poison. if err := ir.resolveIntents(ctxWithTimeout, resolveIntents, true /* wait */, true /* poison */); err != nil { log.Warningf(context.TODO(), "%s: failed to resolve intents: %s", r, err) return } if pushErr != nil { log.Warningf(context.TODO(), "%s: failed to push during intent resolution: %s", r, pushErr) return } }); err != nil { log.Warningf(context.TODO(), "failed to resolve intents: %s", err) return } } else { // EndTransaction if err := stopper.RunLimitedAsyncTask(ir.sem, func() { ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() // For EndTransaction, we know the transaction is finalized so // we can skip the push and go straight to the resolve. // // This mechanism assumes that when an EndTransaction fails, // the client makes no assumptions about the result. For // example, an attempt to explicitly rollback the transaction // may succeed (triggering this code path), but the result may // not make it back to the client. if err := ir.resolveIntents(ctxWithTimeout, item.intents, true /* wait */, false /* !poison */); err != nil { log.Warningf(context.TODO(), "%s: failed to resolve intents: %s", r, err) return } // We successfully resolved the intents, so we're able to GC from // the txn span directly. b := &client.Batch{} txn := item.intents[0].Txn txnKey := keys.TransactionKey(txn.Key, txn.ID) // This is pretty tricky. Transaction keys are range-local and // so they are encoded specially. The key range addressed by // (txnKey, txnKey.Next()) might be empty (since Next() does // not imply monotonicity on the address side). Instead, we // send this request to a range determined using the resolved // transaction anchor, i.e. if the txn is anchored on // /Local/RangeDescriptor/"a"/uuid, the key range below would // be ["a", "a\x00"). However, the first range is special again // because the above procedure results in KeyMin, but we need // at least KeyLocalMax. // // #7880 will address this by making GCRequest less special and // thus obviating the need to cook up an artificial range here. var gcArgs roachpb.GCRequest { key := keys.MustAddr(txn.Key) if localMax := keys.MustAddr(keys.LocalMax); key.Less(localMax) { key = localMax } endKey := key.Next() gcArgs.Span = roachpb.Span{ Key: key.AsRawKey(), EndKey: endKey.AsRawKey(), } } gcArgs.Keys = append(gcArgs.Keys, roachpb.GCRequest_GCKey{ Key: txnKey, }) b.AddRawRequest(&gcArgs) if err := ir.store.db.Run(b); err != nil { log.Warningf( context.TODO(), "could not GC completed transaction anchored at %s: %s", roachpb.Key(txn.Key), err, ) return } }); err != nil { log.Warningf(context.TODO(), "failed to resolve intents: %s", err) return } } } }
// processIntentsAsync asynchronously processes intents which were // encountered during another command but did not interfere with the // execution of that command. This occurs in two cases: inconsistent // reads and EndTransaction (which queues its own external intents for // processing via this method). The two cases are handled somewhat // differently and would be better served by different entry points, // but combining them simplifies the plumbing necessary in Replica. func (ir *intentResolver) processIntentsAsync(r *Replica, intents []intentsWithArg) { if len(intents) == 0 { return } now := r.store.Clock().Now() ctx := r.context() stopper := r.store.Stopper() for _, item := range intents { if item.args.Method() != roachpb.EndTransaction { stopper.RunLimitedAsyncTask(ir.sem, func() { // Everything here is best effort; give up rather than waiting // too long (helps avoid deadlocks during test shutdown, // although this is imperfect due to the use of an // uninterruptible WaitGroup.Wait in beginCmds). ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() h := roachpb.Header{Timestamp: now} resolveIntents, pushErr := ir.maybePushTransactions(ctxWithTimeout, item.intents, h, roachpb.PUSH_TOUCH, true /* skipInFlight */) if pErr := ir.resolveIntents(ctxWithTimeout, r, resolveIntents, true /* wait */, false /* TODO(tschottdorf): #5088 */); pErr != nil { log.Warningc(ctxWithTimeout, "failed to resolve intents: %s", pErr) return } if pushErr != nil { log.Warningc(ctxWithTimeout, "failed to push during intent resolution: %s", pushErr) return } }) } else { // EndTransaction stopper.RunLimitedAsyncTask(ir.sem, func() { ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() // For EndTransaction, we know the transaction is finalized so // we can skip the push and go straight to the resolve. if pErr := ir.resolveIntents(ctxWithTimeout, r, item.intents, true /* wait */, false /* TODO(tschottdorf): #5088 */); pErr != nil { log.Warningc(ctxWithTimeout, "failed to resolve intents: %s", pErr) return } // We successfully resolved the intents, so we're able to GC from // the txn span directly. Note that the sequence cache was cleared // out synchronously with EndTransaction (see comments within for // an explanation of why that is kosher). // // Note that we poisoned the sequence caches on the external ranges // above. This may seem counter-intuitive, but it's actually // necessary: Assume a transaction has committed here, with two // external intents, and assume that we did not poison. Normally, // these two intents would be resolved in the same batch, but that // is not guaranteed (for example, if DistSender has a stale // descriptor after a Merge). When resolved separately, the first // ResolveIntent would clear out the sequence cache; an individual // write on the second (still present) intent could then be // replayed and would resolve to a real value (at least for a // window of time unless we delete the local txn entry). That's not // OK for non-idempotent commands such as Increment. // TODO(tschottdorf): We should have another side effect on // MVCCResolveIntent (on commit/abort): If it were able to remove // the txn from its corresponding entries in the timestamp cache, // no more replays at the same timestamp would be possible. This // appears to be a useful performance optimization; we could then // not poison on EndTransaction. In fact, the above mechanism // could be an effective alternative to sequence-cache based // poisoning (or the whole sequence cache?) itself. // // TODO(tschottdorf): down the road, can probably unclog the system // here by batching up a bunch of those GCRequests before proposing. var ba roachpb.BatchRequest txn := item.intents[0].Txn gcArgs := roachpb.GCRequest{ Span: roachpb.Span{ Key: r.Desc().StartKey.AsRawKey(), EndKey: r.Desc().EndKey.AsRawKey(), }, } gcArgs.Keys = append(gcArgs.Keys, roachpb.GCRequest_GCKey{ Key: keys.TransactionKey(txn.Key, txn.ID), }) ba.Add(&gcArgs) if _, pErr := r.addWriteCmd(ctxWithTimeout, ba, nil /* nil */); pErr != nil { log.Warningf("could not GC completed transaction: %s", pErr) } }) } } }