func tryTxn(kv engine.MVCCKeyValue) (string, error) { var txn roachpb.Transaction if err := maybeUnmarshalInline(kv.Value, &txn); err != nil { return "", err } return txn.String() + "\n", nil }
func TestTransactionString(t *testing.T) { txnID, err := uuid.FromBytes([]byte("ת\x0f^\xe4-Fؽ\xf7\x16\xe4\xf9\xbe^\xbe")) if err != nil { t.Fatal(err) } ts1 := hlc.Timestamp{WallTime: 10, Logical: 11} txn := roachpb.Transaction{ TxnMeta: enginepb.TxnMeta{ Isolation: enginepb.SERIALIZABLE, Key: roachpb.Key("foo"), ID: &txnID, Epoch: 2, Timestamp: hlc.Timestamp{WallTime: 20, Logical: 21}, Priority: 957356782, }, Name: "name", Status: roachpb.COMMITTED, LastHeartbeat: &ts1, OrigTimestamp: hlc.Timestamp{WallTime: 30, Logical: 31}, MaxTimestamp: hlc.Timestamp{WallTime: 40, Logical: 41}, } expStr := `"name" id=d7aa0f5e key="foo" rw=false pri=44.58039917 iso=SERIALIZABLE stat=COMMITTED ` + `epo=2 ts=0.000000020,21 orig=0.000000030,31 max=0.000000040,41 wto=false rop=false` if str := txn.String(); str != expStr { t.Errorf("expected txn %s; got %s", expStr, str) } var txnEmpty roachpb.Transaction _ = txnEmpty.String() // prevent regression of NPE cmd := storagebase.RaftCommand{ BatchRequest: &roachpb.BatchRequest{}, } cmd.BatchRequest.Txn = &txn if actStr, idStr := fmt.Sprintf("%s", &cmd), txnID.String(); !strings.Contains(actStr, idStr) { t.Fatalf("expected to find '%s' in '%s'", idStr, actStr) } }
// TestGCQueueProcess creates test data in the range over various time // scales and verifies that scan queue process properly GCs test data. func TestGCQueueProcess(t *testing.T) { defer leaktest.AfterTest(t)() tc := testContext{} tc.Start(t) defer tc.Stop() const now int64 = 48 * 60 * 60 * 1E9 // 2d past the epoch tc.manualClock.Set(now) ts1 := makeTS(now-2*24*60*60*1E9+1, 0) // 2d old (add one nanosecond so we're not using zero timestamp) ts2 := makeTS(now-25*60*60*1E9, 0) // GC will occur at time=25 hours ts2m1 := ts2.Prev() // ts2 - 1 so we have something not right at the GC time ts3 := makeTS(now-intentAgeThreshold.Nanoseconds(), 0) // 2h old ts4 := makeTS(now-(intentAgeThreshold.Nanoseconds()-1), 0) // 2h-1ns old ts5 := makeTS(now-1E9, 0) // 1s old key1 := roachpb.Key("a") key2 := roachpb.Key("b") key3 := roachpb.Key("c") key4 := roachpb.Key("d") key5 := roachpb.Key("e") key6 := roachpb.Key("f") key7 := roachpb.Key("g") key8 := roachpb.Key("h") key9 := roachpb.Key("i") key10 := roachpb.Key("j") key11 := roachpb.Key("k") data := []struct { key roachpb.Key ts hlc.Timestamp del bool txn bool }{ // For key1, we expect first value to GC. {key1, ts1, false, false}, {key1, ts2, false, false}, {key1, ts5, false, false}, // For key2, we expect values to GC, even though most recent is deletion. {key2, ts1, false, false}, {key2, ts2m1, false, false}, // use a value < the GC time to verify it's kept {key2, ts5, true, false}, // For key3, we expect just ts1 to GC, because most recent deletion is intent. {key3, ts1, false, false}, {key3, ts2, false, false}, {key3, ts5, true, true}, // For key4, expect oldest value to GC. {key4, ts1, false, false}, {key4, ts2, false, false}, // For key5, expect all values to GC (most recent value deleted). {key5, ts1, false, false}, {key5, ts2, true, false}, // deleted, so GC // For key6, expect no values to GC because most recent value is intent. {key6, ts1, false, false}, {key6, ts5, false, true}, // For key7, expect no values to GC because intent is exactly 2h old. {key7, ts2, false, false}, {key7, ts4, false, true}, // For key8, expect most recent value to resolve by aborting, which will clean it up. {key8, ts2, false, false}, {key8, ts3, true, true}, // For key9, resolve naked intent with no remaining values. {key9, ts3, false, true}, // For key10, GC ts1 because it's a delete but not ts3 because it's above the threshold. {key10, ts1, true, false}, {key10, ts3, true, false}, {key10, ts4, false, false}, {key10, ts5, false, false}, // For key11, we can't GC anything because ts1 isn't a delete. {key11, ts1, false, false}, {key11, ts3, true, false}, {key11, ts4, true, false}, {key11, ts5, true, false}, } for i, datum := range data { if datum.del { dArgs := deleteArgs(datum.key) var txn *roachpb.Transaction if datum.txn { txn = newTransaction("test", datum.key, 1, enginepb.SERIALIZABLE, tc.clock) txn.OrigTimestamp = datum.ts txn.Timestamp = datum.ts } if _, err := tc.SendWrappedWith(roachpb.Header{ Timestamp: datum.ts, Txn: txn, }, &dArgs); err != nil { t.Fatalf("%d: could not delete data: %s", i, err) } } else { pArgs := putArgs(datum.key, []byte("value")) var txn *roachpb.Transaction if datum.txn { txn = newTransaction("test", datum.key, 1, enginepb.SERIALIZABLE, tc.clock) txn.OrigTimestamp = datum.ts txn.Timestamp = datum.ts } if _, err := tc.SendWrappedWith(roachpb.Header{ Timestamp: datum.ts, Txn: txn, }, &pArgs); err != nil { t.Fatalf("%d: could not put data: %s", i, err) } } } cfg, ok := tc.gossip.GetSystemConfig() if !ok { t.Fatal("config not set") } // Process through a scan queue. gcQ := newGCQueue(tc.store, tc.gossip) if err := gcQ.process(context.Background(), tc.clock.Now(), tc.rng, cfg); err != nil { t.Fatal(err) } expKVs := []struct { key roachpb.Key ts hlc.Timestamp }{ {key1, ts5}, {key1, ts2}, {key2, ts5}, {key2, ts2m1}, {key3, hlc.ZeroTimestamp}, {key3, ts5}, {key3, ts2}, {key4, ts2}, {key6, hlc.ZeroTimestamp}, {key6, ts5}, {key6, ts1}, {key7, hlc.ZeroTimestamp}, {key7, ts4}, {key7, ts2}, {key8, ts2}, {key10, ts5}, {key10, ts4}, {key10, ts3}, {key11, ts5}, {key11, ts4}, {key11, ts3}, {key11, ts1}, } // Read data directly from engine to avoid intent errors from MVCC. kvs, err := engine.Scan(tc.store.Engine(), engine.MakeMVCCMetadataKey(key1), engine.MakeMVCCMetadataKey(keys.MaxKey), 0) if err != nil { t.Fatal(err) } for i, kv := range kvs { if log.V(1) { log.Infof(context.Background(), "%d: %s", i, kv.Key) } } if len(kvs) != len(expKVs) { t.Fatalf("expected length %d; got %d", len(expKVs), len(kvs)) } for i, kv := range kvs { if !kv.Key.Key.Equal(expKVs[i].key) { t.Errorf("%d: expected key %q; got %q", i, expKVs[i].key, kv.Key.Key) } if !kv.Key.Timestamp.Equal(expKVs[i].ts) { t.Errorf("%d: expected ts=%s; got %s", i, expKVs[i].ts, kv.Key.Timestamp) } if log.V(1) { log.Infof(context.Background(), "%d: %s", i, kv.Key) } } }
// processTransactionTable scans the transaction table and updates txnMap with // those transactions which are old and either PENDING or with intents // registered. In the first case we want to push the transaction so that it is // aborted, and in the second case we may have to resolve the intents success- // fully before GCing the entry. The transaction records which can be gc'ed are // returned separately and are not added to txnMap nor intentSpanMap. func processTransactionTable( ctx context.Context, snap engine.Reader, desc *roachpb.RangeDescriptor, txnMap map[uuid.UUID]*roachpb.Transaction, cutoff hlc.Timestamp, infoMu *lockableGCInfo, resolveIntents resolveFunc, ) ([]roachpb.GCRequest_GCKey, error) { infoMu.Lock() defer infoMu.Unlock() var gcKeys []roachpb.GCRequest_GCKey handleOne := func(kv roachpb.KeyValue) error { var txn roachpb.Transaction if err := kv.Value.GetProto(&txn); err != nil { return err } infoMu.TransactionSpanTotal++ if !txn.LastActive().Less(cutoff) { return nil } txnID := *txn.ID // The transaction record should be considered for removal. switch txn.Status { case roachpb.PENDING: // Marked as running, so we need to push it to abort it but won't // try to GC it in this cycle (for convenience). // TODO(tschottdorf): refactor so that we can GC PENDING entries // in the same cycle, but keeping the calls to pushTxn in a central // location (keeping it easy to batch them up in the future). infoMu.TransactionSpanGCPending++ txnMap[txnID] = &txn return nil case roachpb.ABORTED: // If we remove this transaction, it effectively still counts as // ABORTED (by design). So this can be GC'ed even if we can't // resolve the intents. // Note: Most aborted transaction weren't aborted by their client, // but instead by the coordinator - those will not have any intents // persisted, though they still might exist in the system. infoMu.TransactionSpanGCAborted++ func() { infoMu.Unlock() // intentional defer infoMu.Lock() if err := resolveIntents(roachpb.AsIntents(txn.Intents, &txn), true /* wait */, false /* !poison */); err != nil { log.Warningf(ctx, "failed to resolve intents of aborted txn on gc: %s", err) } }() case roachpb.COMMITTED: // It's committed, so it doesn't need a push but we can only // GC it after its intents are resolved. if err := func() error { infoMu.Unlock() // intentional defer infoMu.Lock() return resolveIntents(roachpb.AsIntents(txn.Intents, &txn), true /* wait */, false /* !poison */) }(); err != nil { log.Warningf(ctx, "unable to resolve intents of committed txn on gc: %s", err) // Returning the error here would abort the whole GC run, and // we don't want that. Instead, we simply don't GC this entry. return nil } infoMu.TransactionSpanGCCommitted++ default: panic(fmt.Sprintf("invalid transaction state: %s", txn)) } gcKeys = append(gcKeys, roachpb.GCRequest_GCKey{Key: kv.Key}) // zero timestamp return nil } startKey := keys.TransactionKey(desc.StartKey.AsRawKey(), uuid.UUID{}) endKey := keys.TransactionKey(desc.EndKey.AsRawKey(), uuid.UUID{}) _, err := engine.MVCCIterate(ctx, snap, startKey, endKey, hlc.ZeroTimestamp, true /* consistent */, nil, /* txn */ false /* !reverse */, func(kv roachpb.KeyValue) (bool, error) { return false, handleOne(kv) }) return gcKeys, err }
// updateState updates the transaction state in both the success and // error cases, applying those updates to the corresponding txnMeta // object when adequate. It also updates certain errors with the // updated transaction for use by client restarts. func (tc *TxnCoordSender) updateState( ctx context.Context, startNS int64, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error, ) *roachpb.Error { tc.Lock() defer tc.Unlock() if ba.Txn == nil { // Not a transactional request. return pErr } var newTxn roachpb.Transaction newTxn.Update(ba.Txn) if pErr == nil { newTxn.Update(br.Txn) } else if errTxn := pErr.GetTxn(); errTxn != nil { newTxn.Update(errTxn) } switch t := pErr.GetDetail().(type) { case *roachpb.OpRequiresTxnError: panic("OpRequiresTxnError must not happen at this level") case *roachpb.ReadWithinUncertaintyIntervalError: // If the reader encountered a newer write within the uncertainty // interval, we advance the txn's timestamp just past the last observed // timestamp from the node. restartTS, ok := newTxn.GetObservedTimestamp(pErr.OriginNode) if !ok { pErr = roachpb.NewError(errors.Errorf("no observed timestamp for node %d found on uncertainty restart", pErr.OriginNode)) } else { newTxn.Timestamp.Forward(restartTS) newTxn.Restart(ba.UserPriority, newTxn.Priority, newTxn.Timestamp) } case *roachpb.TransactionAbortedError: // Increase timestamp if applicable. newTxn.Timestamp.Forward(pErr.GetTxn().Timestamp) newTxn.Priority = pErr.GetTxn().Priority // Clean up the freshly aborted transaction in defer(), avoiding a // race with the state update below. defer tc.cleanupTxnLocked(ctx, newTxn) case *roachpb.TransactionPushError: // Increase timestamp if applicable, ensuring that we're // just ahead of the pushee. newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp) newTxn.Restart(ba.UserPriority, t.PusheeTxn.Priority-1, newTxn.Timestamp) case *roachpb.TransactionRetryError: // Increase timestamp so on restart, we're ahead of any timestamp // cache entries or newer versions which caused the restart. newTxn.Restart(ba.UserPriority, pErr.GetTxn().Priority, newTxn.Timestamp) case *roachpb.WriteTooOldError: newTxn.Restart(ba.UserPriority, newTxn.Priority, t.ActualTimestamp) case nil: // Nothing to do here, avoid the default case. default: // Do not clean up the transaction since we're leaving cancellation of // the transaction up to the client. For example, on seeing an error, // like TransactionStatusError or ConditionFailedError, the client // will call Txn.CleanupOnError() which will cleanup the transaction // and its intents. Therefore leave the transaction in the PENDING // state and do not call cleanTxnLocked(). } txnID := *newTxn.ID txnMeta := tc.txns[txnID] // For successful transactional requests, keep the written intents and // the updated transaction record to be sent along with the reply. // The transaction metadata is created with the first writing operation. // A tricky edge case is that of a transaction which "fails" on the // first writing request, but actually manages to write some intents // (for example, due to being multi-range). In this case, there will // be an error, but the transaction will be marked as Writing and the // coordinator must track the state, for the client's retry will be // performed with a Writing transaction which the coordinator rejects // unless it is tracking it (on top of it making sense to track it; // after all, it **has** laid down intents and only the coordinator // can augment a potential EndTransaction call). See #3303. if txnMeta != nil || pErr == nil || newTxn.Writing { // Adding the intents even on error reduces the likelihood of dangling // intents blocking concurrent writers for extended periods of time. // See #3346. var keys []roachpb.Span if txnMeta != nil { keys = txnMeta.keys } ba.IntentSpanIterate(br, func(key, endKey roachpb.Key) { keys = append(keys, roachpb.Span{ Key: key, EndKey: endKey, }) }) if txnMeta != nil { txnMeta.keys = keys } else if len(keys) > 0 { if !newTxn.Writing { panic("txn with intents marked as non-writing") } // If the transaction is already over, there's no point in // launching a one-off coordinator which will shut down right // away. If we ended up here with an error, we'll always start // the coordinator - the transaction has laid down intents, so // we expect it to be committed/aborted at some point in the // future. if _, isEnding := ba.GetArg(roachpb.EndTransaction); pErr != nil || !isEnding { log.Event(ctx, "coordinator spawns") txnMeta = &txnMetadata{ txn: newTxn, keys: keys, firstUpdateNanos: startNS, lastUpdateNanos: tc.clock.PhysicalNow(), timeoutDuration: tc.clientTimeout, txnEnd: make(chan struct{}), } tc.txns[txnID] = txnMeta if err := tc.stopper.RunAsyncTask(ctx, func(ctx context.Context) { tc.heartbeatLoop(ctx, txnID) }); err != nil { // The system is already draining and we can't start the // heartbeat. We refuse new transactions for now because // they're likely not going to have all intents committed. // In principle, we can relax this as needed though. tc.unregisterTxnLocked(txnID) return roachpb.NewError(err) } } else { // If this was a successful one phase commit, update stats // directly as they won't otherwise be updated on heartbeat // loop shutdown. etArgs, ok := br.Responses[len(br.Responses)-1].GetInner().(*roachpb.EndTransactionResponse) tc.updateStats(tc.clock.PhysicalNow()-startNS, 0, newTxn.Status, ok && etArgs.OnePhaseCommit) } } } // Update our record of this transaction, even on error. if txnMeta != nil { txnMeta.txn.Update(&newTxn) if !txnMeta.txn.Writing { panic("tracking a non-writing txn") } txnMeta.setLastUpdate(tc.clock.PhysicalNow()) } if pErr == nil { // For successful transactional requests, always send the updated txn // record back. Note that we make sure not to share data with newTxn // (which may have made it into txnMeta). if br.Txn != nil { br.Txn.Update(&newTxn) } else { clonedTxn := newTxn.Clone() br.Txn = &clonedTxn } } else if pErr.GetTxn() != nil { // Avoid changing existing errors because sometimes they escape into // goroutines and data races can occur. pErrShallow := *pErr pErrShallow.SetTxn(&newTxn) // SetTxn clones newTxn pErr = &pErrShallow } return pErr }