func tryTxn(kv engine.MVCCKeyValue) (string, error) { var txn roachpb.Transaction if err := maybeUnmarshalInline(kv.Value, &txn); err != nil { return "", err } return txn.String() + "\n", nil }
// TestGCQueueProcess creates test data in the range over various time // scales and verifies that scan queue process properly GCs test data. func TestGCQueueProcess(t *testing.T) { defer leaktest.AfterTest(t)() tc := testContext{} tc.Start(t) defer tc.Stop() const now int64 = 48 * 60 * 60 * 1E9 // 2d past the epoch tc.manualClock.Set(now) ts1 := makeTS(now-2*24*60*60*1E9+1, 0) // 2d old (add one nanosecond so we're not using zero timestamp) ts2 := makeTS(now-25*60*60*1E9, 0) // GC will occur at time=25 hours ts2m1 := ts2.Prev() // ts2 - 1 so we have something not right at the GC time ts3 := makeTS(now-intentAgeThreshold.Nanoseconds(), 0) // 2h old ts4 := makeTS(now-(intentAgeThreshold.Nanoseconds()-1), 0) // 2h-1ns old ts5 := makeTS(now-1E9, 0) // 1s old key1 := roachpb.Key("a") key2 := roachpb.Key("b") key3 := roachpb.Key("c") key4 := roachpb.Key("d") key5 := roachpb.Key("e") key6 := roachpb.Key("f") key7 := roachpb.Key("g") key8 := roachpb.Key("h") key9 := roachpb.Key("i") key10 := roachpb.Key("j") key11 := roachpb.Key("k") data := []struct { key roachpb.Key ts hlc.Timestamp del bool txn bool }{ // For key1, we expect first value to GC. {key1, ts1, false, false}, {key1, ts2, false, false}, {key1, ts5, false, false}, // For key2, we expect values to GC, even though most recent is deletion. {key2, ts1, false, false}, {key2, ts2m1, false, false}, // use a value < the GC time to verify it's kept {key2, ts5, true, false}, // For key3, we expect just ts1 to GC, because most recent deletion is intent. {key3, ts1, false, false}, {key3, ts2, false, false}, {key3, ts5, true, true}, // For key4, expect oldest value to GC. {key4, ts1, false, false}, {key4, ts2, false, false}, // For key5, expect all values to GC (most recent value deleted). {key5, ts1, false, false}, {key5, ts2, true, false}, // deleted, so GC // For key6, expect no values to GC because most recent value is intent. {key6, ts1, false, false}, {key6, ts5, false, true}, // For key7, expect no values to GC because intent is exactly 2h old. {key7, ts2, false, false}, {key7, ts4, false, true}, // For key8, expect most recent value to resolve by aborting, which will clean it up. {key8, ts2, false, false}, {key8, ts3, true, true}, // For key9, resolve naked intent with no remaining values. {key9, ts3, false, true}, // For key10, GC ts1 because it's a delete but not ts3 because it's above the threshold. {key10, ts1, true, false}, {key10, ts3, true, false}, {key10, ts4, false, false}, {key10, ts5, false, false}, // For key11, we can't GC anything because ts1 isn't a delete. {key11, ts1, false, false}, {key11, ts3, true, false}, {key11, ts4, true, false}, {key11, ts5, true, false}, } for i, datum := range data { if datum.del { dArgs := deleteArgs(datum.key) var txn *roachpb.Transaction if datum.txn { txn = newTransaction("test", datum.key, 1, enginepb.SERIALIZABLE, tc.clock) txn.OrigTimestamp = datum.ts txn.Timestamp = datum.ts } if _, err := tc.SendWrappedWith(roachpb.Header{ Timestamp: datum.ts, Txn: txn, }, &dArgs); err != nil { t.Fatalf("%d: could not delete data: %s", i, err) } } else { pArgs := putArgs(datum.key, []byte("value")) var txn *roachpb.Transaction if datum.txn { txn = newTransaction("test", datum.key, 1, enginepb.SERIALIZABLE, tc.clock) txn.OrigTimestamp = datum.ts txn.Timestamp = datum.ts } if _, err := tc.SendWrappedWith(roachpb.Header{ Timestamp: datum.ts, Txn: txn, }, &pArgs); err != nil { t.Fatalf("%d: could not put data: %s", i, err) } } } cfg, ok := tc.gossip.GetSystemConfig() if !ok { t.Fatal("config not set") } // Process through a scan queue. gcQ := newGCQueue(tc.gossip) if err := gcQ.process(tc.clock.Now(), tc.rng, cfg); err != nil { t.Fatal(err) } expKVs := []struct { key roachpb.Key ts hlc.Timestamp }{ {key1, ts5}, {key1, ts2}, {key2, ts5}, {key2, ts2m1}, {key3, hlc.ZeroTimestamp}, {key3, ts5}, {key3, ts2}, {key4, ts2}, {key6, hlc.ZeroTimestamp}, {key6, ts5}, {key6, ts1}, {key7, hlc.ZeroTimestamp}, {key7, ts4}, {key7, ts2}, {key8, ts2}, {key10, ts5}, {key10, ts4}, {key10, ts3}, {key11, ts5}, {key11, ts4}, {key11, ts3}, {key11, ts1}, } // Read data directly from engine to avoid intent errors from MVCC. kvs, err := engine.Scan(tc.store.Engine(), engine.MakeMVCCMetadataKey(key1), engine.MakeMVCCMetadataKey(keys.MaxKey), 0) if err != nil { t.Fatal(err) } for i, kv := range kvs { if log.V(1) { log.Infof("%d: %s", i, kv.Key) } } if len(kvs) != len(expKVs) { t.Fatalf("expected length %d; got %d", len(expKVs), len(kvs)) } for i, kv := range kvs { if !kv.Key.Key.Equal(expKVs[i].key) { t.Errorf("%d: expected key %q; got %q", i, expKVs[i].key, kv.Key.Key) } if !kv.Key.Timestamp.Equal(expKVs[i].ts) { t.Errorf("%d: expected ts=%s; got %s", i, expKVs[i].ts, kv.Key.Timestamp) } if log.V(1) { log.Infof("%d: %s", i, kv.Key) } } // Verify that the last verification timestamp was updated as whole range was scanned. if _, err := tc.rng.getLastVerificationTimestamp(); err != nil { t.Fatal(err) } }
// updateState updates the transaction state in both the success and // error cases, applying those updates to the corresponding txnMeta // object when adequate. It also updates certain errors with the // updated transaction for use by client restarts. func (tc *TxnCoordSender) updateState( startNS int64, ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error, ) *roachpb.Error { tc.Lock() defer tc.Unlock() if ba.Txn == nil { // Not a transactional request. return pErr } var newTxn roachpb.Transaction newTxn.Update(ba.Txn) if pErr == nil { newTxn.Update(br.Txn) } else if errTxn := pErr.GetTxn(); errTxn != nil { newTxn.Update(errTxn) } switch t := pErr.GetDetail().(type) { case *roachpb.TransactionStatusError: // Likely already committed or more obscure errors such as epoch or // timestamp regressions; consider txn dead. if txn := pErr.GetTxn(); txn != nil { defer tc.cleanupTxnLocked(ctx, *txn) } case *roachpb.OpRequiresTxnError: panic("OpRequiresTxnError must not happen at this level") case *roachpb.ReadWithinUncertaintyIntervalError: // If the reader encountered a newer write within the uncertainty // interval, we advance the txn's timestamp just past the last observed // timestamp from the node. restartTS, ok := newTxn.GetObservedTimestamp(pErr.OriginNode) if !ok { pErr = roachpb.NewError(errors.Errorf("no observed timestamp for node %d found on uncertainty restart", pErr.OriginNode)) } else { newTxn.Timestamp.Forward(restartTS) newTxn.Restart(ba.UserPriority, newTxn.Priority, newTxn.Timestamp) } case *roachpb.TransactionAbortedError: // Increase timestamp if applicable. newTxn.Timestamp.Forward(pErr.GetTxn().Timestamp) newTxn.Priority = pErr.GetTxn().Priority // Clean up the freshly aborted transaction in defer(), avoiding a // race with the state update below. defer tc.cleanupTxnLocked(ctx, newTxn) case *roachpb.TransactionPushError: // Increase timestamp if applicable, ensuring that we're // just ahead of the pushee. newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp) newTxn.Restart(ba.UserPriority, t.PusheeTxn.Priority-1, newTxn.Timestamp) case *roachpb.TransactionRetryError: // Increase timestamp so on restart, we're ahead of any timestamp // cache entries or newer versions which caused the restart. newTxn.Restart(ba.UserPriority, pErr.GetTxn().Priority, newTxn.Timestamp) case *roachpb.WriteTooOldError: newTxn.Restart(ba.UserPriority, newTxn.Priority, t.ActualTimestamp) case nil: // Nothing to do here, avoid the default case. default: // Do not clean up the transaction here since the client might still // want to continue the transaction. For example, a client might // continue its transaction after receiving ConditionFailedError, which // can come from a unique index violation. // // TODO(bdarnell): Is this valid? Unless there is a single CPut in // the batch, it is difficult to be able to continue after a // ConditionFailedError because it is unclear which parts of the // batch had succeeded on other ranges before one range hit the // failed condition. It may be better to clean up the transaction here. } txnID := *newTxn.ID txnMeta := tc.txns[txnID] // For successful transactional requests, keep the written intents and // the updated transaction record to be sent along with the reply. // The transaction metadata is created with the first writing operation. // A tricky edge case is that of a transaction which "fails" on the // first writing request, but actually manages to write some intents // (for example, due to being multi-range). In this case, there will // be an error, but the transaction will be marked as Writing and the // coordinator must track the state, for the client's retry will be // performed with a Writing transaction which the coordinator rejects // unless it is tracking it (on top of it making sense to track it; // after all, it **has** laid down intents and only the coordinator // can augment a potential EndTransaction call). See #3303. if txnMeta != nil || pErr == nil || newTxn.Writing { // Adding the intents even on error reduces the likelihood of dangling // intents blocking concurrent writers for extended periods of time. // See #3346. var keys []roachpb.Span if txnMeta != nil { keys = txnMeta.keys } ba.IntentSpanIterate(func(key, endKey roachpb.Key) { keys = append(keys, roachpb.Span{ Key: key, EndKey: endKey, }) }) if txnMeta != nil { txnMeta.keys = keys } else if len(keys) > 0 { if !newTxn.Writing { panic("txn with intents marked as non-writing") } // If the transaction is already over, there's no point in // launching a one-off coordinator which will shut down right // away. If we ended up here with an error, we'll always start // the coordinator - the transaction has laid down intents, so // we expect it to be committed/aborted at some point in the // future. if _, isEnding := ba.GetArg(roachpb.EndTransaction); pErr != nil || !isEnding { log.Trace(ctx, "coordinator spawns") txnMeta = &txnMetadata{ txn: newTxn, keys: keys, firstUpdateNanos: startNS, lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } tc.txns[txnID] = txnMeta if err := tc.stopper.RunAsyncTask(func() { tc.heartbeatLoop(ctx, txnID) }); err != nil { // The system is already draining and we can't start the // heartbeat. We refuse new transactions for now because // they're likely not going to have all intents committed. // In principle, we can relax this as needed though. tc.unregisterTxnLocked(txnID) return roachpb.NewError(err) } } else { // If this was a successful one phase commit, update stats // directly as they won't otherwise be updated on heartbeat // loop shutdown. etArgs, ok := br.Responses[len(br.Responses)-1].GetInner().(*roachpb.EndTransactionResponse) tc.updateStats(tc.clock.PhysicalNow()-startNS, 0, newTxn.Status, ok && etArgs.OnePhaseCommit) } } } // Update our record of this transaction, even on error. if txnMeta != nil { txnMeta.txn.Update(&newTxn) if !txnMeta.txn.Writing { panic("tracking a non-writing txn") } txnMeta.setLastUpdate(tc.clock.PhysicalNow()) } if pErr == nil { // For successful transactional requests, always send the updated txn // record back. Note that we make sure not to share data with newTxn // (which may have made it into txnMeta). if br.Txn != nil { br.Txn.Update(&newTxn) } else { clonedTxn := newTxn.Clone() br.Txn = &clonedTxn } } else if pErr.GetTxn() != nil { // Avoid changing existing errors because sometimes they escape into // goroutines and data races can occur. pErrShallow := *pErr pErrShallow.SetTxn(&newTxn) // SetTxn clones newTxn pErr = &pErrShallow } return pErr }
// TestGCQueueProcess creates test data in the range over various time // scales and verifies that scan queue process properly GCs test data. func TestGCQueueProcess(t *testing.T) { defer leaktest.AfterTest(t) tc := testContext{} tc.Start(t) defer tc.Stop() const now int64 = 48 * 60 * 60 * 1E9 // 2d past the epoch tc.manualClock.Set(now) ts1 := makeTS(now-2*24*60*60*1E9+1, 0) // 2d old (add one nanosecond so we're not using zero timestamp) ts2 := makeTS(now-25*60*60*1E9, 0) // GC will occur at time=25 hours ts3 := makeTS(now-intentAgeThreshold.Nanoseconds(), 0) // 2h old ts4 := makeTS(now-(intentAgeThreshold.Nanoseconds()-1), 0) // 2h-1ns old ts5 := makeTS(now-1E9, 0) // 1s old key1 := roachpb.Key("a") key2 := roachpb.Key("b") key3 := roachpb.Key("c") key4 := roachpb.Key("d") key5 := roachpb.Key("e") key6 := roachpb.Key("f") key7 := roachpb.Key("g") key8 := roachpb.Key("h") key9 := roachpb.Key("i") data := []struct { key roachpb.Key ts roachpb.Timestamp del bool txn bool }{ // For key1, we expect first two values to GC. {key1, ts1, false, false}, {key1, ts2, false, false}, {key1, ts5, false, false}, // For key2, we expect all values to GC, because most recent is deletion. {key2, ts1, false, false}, {key2, ts2, false, false}, {key2, ts5, true, false}, // For key3, we expect just ts1 to GC, because most recent deletion is intent. {key3, ts1, false, false}, {key3, ts2, false, false}, {key3, ts5, true, true}, // For key4, expect oldest value to GC. {key4, ts1, false, false}, {key4, ts2, false, false}, // For key5, expect all values to GC (most recent value deleted). {key5, ts1, false, false}, {key5, ts2, true, false}, // For key6, expect no values to GC because most recent value is intent. {key6, ts1, false, false}, {key6, ts5, false, true}, // For key7, expect no values to GC because intent is exactly 2h old. {key7, ts2, false, false}, {key7, ts4, false, true}, // For key8, expect most recent value to resolve by aborting, which will clean it up. {key8, ts2, false, false}, {key8, ts3, true, true}, // For key9, resolve naked intent with no remaining values. {key9, ts3, true, false}, } for i, datum := range data { if datum.del { dArgs := deleteArgs(datum.key) var txn *roachpb.Transaction if datum.txn { txn = newTransaction("test", datum.key, 1, roachpb.SERIALIZABLE, tc.clock) txn.OrigTimestamp = datum.ts txn.Timestamp = datum.ts } if _, err := client.SendWrappedWith(tc.Sender(), tc.rng.context(), roachpb.BatchRequest_Header{ Timestamp: datum.ts, Txn: txn, }, &dArgs); err != nil { t.Fatalf("%d: could not delete data: %s", i, err) } } else { pArgs := putArgs(datum.key, []byte("value")) var txn *roachpb.Transaction if datum.txn { txn = newTransaction("test", datum.key, 1, roachpb.SERIALIZABLE, tc.clock) txn.OrigTimestamp = datum.ts txn.Timestamp = datum.ts } if _, err := client.SendWrappedWith(tc.Sender(), tc.rng.context(), roachpb.BatchRequest_Header{ Timestamp: datum.ts, Txn: txn, }, &pArgs); err != nil { t.Fatalf("%d: could not put data: %s", i, err) } } } cfg := tc.gossip.GetSystemConfig() if cfg == nil { t.Fatal("nil config") } // Process through a scan queue. gcQ := newGCQueue(tc.gossip) if err := gcQ.process(tc.clock.Now(), tc.rng, cfg); err != nil { t.Fatal(err) } expKVs := []struct { key roachpb.Key ts roachpb.Timestamp }{ {key1, roachpb.ZeroTimestamp}, {key1, ts5}, {key3, roachpb.ZeroTimestamp}, {key3, ts5}, {key3, ts2}, {key4, roachpb.ZeroTimestamp}, {key4, ts2}, {key6, roachpb.ZeroTimestamp}, {key6, ts5}, {key6, ts1}, {key7, roachpb.ZeroTimestamp}, {key7, ts4}, {key7, ts2}, {key8, roachpb.ZeroTimestamp}, {key8, ts2}, } // Read data directly from engine to avoid intent errors from MVCC. kvs, err := engine.Scan(tc.store.Engine(), engine.MVCCEncodeKey(key1), engine.MVCCEncodeKey(keys.TableDataPrefix), 0) if err != nil { t.Fatal(err) } for i, kv := range kvs { if key, ts, isValue, err := engine.MVCCDecodeKey(kv.Key); isValue { if err != nil { t.Fatal(err) } if log.V(1) { log.Infof("%d: %q, ts=%s", i, key, ts) } } else { if log.V(1) { log.Infof("%d: %q meta", i, key) } } } if len(kvs) != len(expKVs) { t.Fatalf("expected length %d; got %d", len(expKVs), len(kvs)) } for i, kv := range kvs { key, ts, isValue, err := engine.MVCCDecodeKey(kv.Key) if err != nil { t.Fatal(err) } if !key.Equal(expKVs[i].key) { t.Errorf("%d: expected key %q; got %q", i, expKVs[i].key, key) } if !ts.Equal(expKVs[i].ts) { t.Errorf("%d: expected ts=%s; got %s", i, expKVs[i].ts, ts) } if isValue { if log.V(1) { log.Infof("%d: %q, ts=%s", i, key, ts) } } else { if log.V(1) { log.Infof("%d: %q meta", i, key) } } } // Verify the oldest extant intent age. gcMeta, err := tc.rng.GetGCMetadata() if err != nil { t.Fatal(err) } if gcMeta.LastScanNanos != now { t.Errorf("expected last scan nanos=%d; got %d", now, gcMeta.LastScanNanos) } if *gcMeta.OldestIntentNanos != ts4.WallTime { t.Errorf("expected oldest intent nanos=%d; got %d", ts4.WallTime, gcMeta.OldestIntentNanos) } // Verify that the last verification timestamp was updated as whole range was scanned. ts, err := tc.rng.GetLastVerificationTimestamp() if err != nil { t.Fatal(err) } if gcMeta.LastScanNanos != ts.WallTime { t.Errorf("expected walltime nanos %d; got %d", gcMeta.LastScanNanos, ts.WallTime) } }
// processTransactionTable scans the transaction table and updates txnMap with // those transactions which are old and either PENDING or with intents // registered. In the first case we want to push the transaction so that it is // aborted, and in the second case we may have to resolve the intents success- // fully before GCing the entry. The transaction records which can be gc'ed are // returned separately and are not added to txnMap nor intentSpanMap. func processTransactionTable( ctx context.Context, snap engine.Engine, desc *roachpb.RangeDescriptor, txnMap map[uuid.UUID]*roachpb.Transaction, cutoff roachpb.Timestamp, infoMu *lockableGCInfo, resolveIntents resolveFunc, ) ([]roachpb.GCRequest_GCKey, error) { infoMu.Lock() defer infoMu.Unlock() var gcKeys []roachpb.GCRequest_GCKey handleOne := func(kv roachpb.KeyValue) error { var txn roachpb.Transaction if err := kv.Value.GetProto(&txn); err != nil { return err } infoMu.TransactionSpanTotal++ if !txn.LastActive().Less(cutoff) { return nil } txnID := *txn.ID // The transaction record should be considered for removal. switch txn.Status { case roachpb.PENDING: // Marked as running, so we need to push it to abort it but won't // try to GC it in this cycle (for convenience). // TODO(tschottdorf): refactor so that we can GC PENDING entries // in the same cycle, but keeping the calls to pushTxn in a central // location (keeping it easy to batch them up in the future). infoMu.TransactionSpanGCPending++ txnMap[txnID] = &txn return nil case roachpb.ABORTED: // If we remove this transaction, it effectively still counts as // ABORTED (by design). So this can be GC'ed even if we can't // resolve the intents. // Note: Most aborted transaction weren't aborted by their client, // but instead by the coordinator - those will not have any intents // persisted, though they still might exist in the system. infoMu.TransactionSpanGCAborted++ func() { infoMu.Unlock() // intentional defer infoMu.Lock() if err := resolveIntents(roachpb.AsIntents(txn.Intents, &txn), true /* wait */, false /* !poison */); err != nil { log.Warningf("failed to resolve intents of aborted txn on gc: %s", err) } }() case roachpb.COMMITTED: // It's committed, so it doesn't need a push but we can only // GC it after its intents are resolved. if err := func() error { infoMu.Unlock() // intentional defer infoMu.Lock() return resolveIntents(roachpb.AsIntents(txn.Intents, &txn), true /* wait */, false /* !poison */) }(); err != nil { log.Warningf("unable to resolve intents of committed txn on gc: %s", err) // Returning the error here would abort the whole GC run, and // we don't want that. Instead, we simply don't GC this entry. return nil } infoMu.TransactionSpanGCCommitted++ default: panic(fmt.Sprintf("invalid transaction state: %s", txn)) } gcKeys = append(gcKeys, roachpb.GCRequest_GCKey{Key: kv.Key}) // zero timestamp return nil } startKey := keys.TransactionKey(desc.StartKey.AsRawKey(), uuid.EmptyUUID) endKey := keys.TransactionKey(desc.EndKey.AsRawKey(), uuid.EmptyUUID) _, err := engine.MVCCIterate(ctx, snap, startKey, endKey, roachpb.ZeroTimestamp, true /* consistent */, nil, /* txn */ false /* !reverse */, func(kv roachpb.KeyValue) (bool, error) { return false, handleOne(kv) }) return gcKeys, err }