// TestRocksDBCompaction verifies that a garbage collector can be // installed on a RocksDB engine and will properly compact response // cache and transaction entries. func TestRocksDBCompaction(t *testing.T) { defer leaktest.AfterTest(t) gob.Register(proto.Timestamp{}) rocksdb := newMemRocksDB(proto.Attributes{Attrs: []string{"ssd"}}, testCacheSize) err := rocksdb.Open() if err != nil { t.Fatalf("could not create new in-memory rocksdb db instance: %v", err) } rocksdb.SetGCTimeouts(1, 2) defer rocksdb.Close() cmdID := &proto.ClientCmdID{WallTime: 1, Random: 1} // Write two transaction values and two response cache values such // that exactly one of each should be GC'd based on our GC timeouts. kvs := []proto.KeyValue{ { Key: keys.ResponseCacheKey(1, cmdID), Value: proto.Value{Bytes: encodePutResponse(makeTS(2, 0), t)}, }, { Key: keys.ResponseCacheKey(2, cmdID), Value: proto.Value{Bytes: encodePutResponse(makeTS(3, 0), t)}, }, { Key: keys.TransactionKey(proto.Key("a"), proto.Key(uuid.NewUUID4())), Value: proto.Value{Bytes: encodeTransaction(makeTS(1, 0), t)}, }, { Key: keys.TransactionKey(proto.Key("b"), proto.Key(uuid.NewUUID4())), Value: proto.Value{Bytes: encodeTransaction(makeTS(2, 0), t)}, }, } for _, kv := range kvs { if err := MVCCPut(rocksdb, nil, kv.Key, proto.ZeroTimestamp, kv.Value, nil); err != nil { t.Fatal(err) } } // Compact range and scan remaining values to compare. rocksdb.CompactRange(nil, nil) actualKVs, _, err := MVCCScan(rocksdb, proto.KeyMin, proto.KeyMax, 0, proto.ZeroTimestamp, true, nil) if err != nil { t.Fatalf("could not run scan: %v", err) } var keys []proto.Key for _, kv := range actualKVs { keys = append(keys, kv.Key) } expKeys := []proto.Key{ kvs[1].Key, kvs[3].Key, } if !reflect.DeepEqual(expKeys, keys) { t.Errorf("expected keys %+v, got keys %+v", expKeys, keys) } }
// InternalHeartbeatTxn updates the transaction status and heartbeat // timestamp after receiving transaction heartbeat messages from // coordinator. Returns the updated transaction. func (r *Range) InternalHeartbeatTxn(batch engine.Engine, ms *engine.MVCCStats, args proto.InternalHeartbeatTxnRequest) (proto.InternalHeartbeatTxnResponse, error) { var reply proto.InternalHeartbeatTxnResponse key := keys.TransactionKey(args.Txn.Key, args.Txn.ID) var txn proto.Transaction if ok, err := engine.MVCCGetProto(batch, key, proto.ZeroTimestamp, true, nil, &txn); err != nil { return reply, err } else if !ok { // If no existing transaction record was found, initialize to a // shallow copy of the transaction in the request header. We copy // to avoid mutating the original below. txn = *args.Txn } if txn.Status == proto.PENDING { if txn.LastHeartbeat == nil { txn.LastHeartbeat = &proto.Timestamp{} } if txn.LastHeartbeat.Less(args.Header().Timestamp) { *txn.LastHeartbeat = args.Header().Timestamp } if err := engine.MVCCPutProto(batch, ms, key, proto.ZeroTimestamp, nil, &txn); err != nil { return reply, err } } reply.Txn = &txn return reply, nil }
// InternalHeartbeatTxn updates the transaction status and heartbeat // timestamp after receiving transaction heartbeat messages from // coordinator. Returns the updated transaction. func (r *Range) InternalHeartbeatTxn(batch engine.Engine, ms *engine.MVCCStats, args *proto.InternalHeartbeatTxnRequest, reply *proto.InternalHeartbeatTxnResponse) { key := keys.TransactionKey(args.Txn.Key, args.Txn.ID) var txn proto.Transaction ok, err := engine.MVCCGetProto(batch, key, proto.ZeroTimestamp, true, nil, &txn) if err != nil { reply.SetGoError(err) return } // If no existing transaction record was found, initialize // to the transaction in the request header. if !ok { gogoproto.Merge(&txn, args.Txn) } if txn.Status == proto.PENDING { if txn.LastHeartbeat == nil { txn.LastHeartbeat = &proto.Timestamp{} } if txn.LastHeartbeat.Less(args.Header().Timestamp) { *txn.LastHeartbeat = args.Header().Timestamp } if err := engine.MVCCPutProto(batch, ms, key, proto.ZeroTimestamp, nil, &txn); err != nil { reply.SetGoError(err) return } } reply.Txn = &txn }
// TestRocksDBCompaction verifies that a garbage collector can be // installed on a RocksDB engine and will properly compact transaction // entries. func TestRocksDBCompaction(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() defer stopper.Stop() rocksdb := newMemRocksDB(roachpb.Attributes{}, testCacheSize, stopper) err := rocksdb.Open() if err != nil { t.Fatalf("could not create new in-memory rocksdb db instance: %v", err) } rocksdb.SetGCTimeouts(1) // Write two transaction values such that exactly one should be GC'd based // on our GC timeouts. kvs := []roachpb.KeyValue{ { Key: keys.TransactionKey(roachpb.Key("a"), roachpb.Key(uuid.NewUUID4())), Value: roachpb.MakeValueFromBytes(encodeTransaction(makeTS(1, 0), t)), }, { Key: keys.TransactionKey(roachpb.Key("b"), roachpb.Key(uuid.NewUUID4())), Value: roachpb.MakeValueFromBytes(encodeTransaction(makeTS(2, 0), t)), }, } for _, kv := range kvs { if err := MVCCPut(rocksdb, nil, kv.Key, roachpb.ZeroTimestamp, kv.Value, nil); err != nil { t.Fatal(err) } } // Compact range and scan remaining values to compare. rocksdb.CompactRange(nil, nil) actualKVs, _, err := MVCCScan(rocksdb, keyMin, keyMax, 0, roachpb.ZeroTimestamp, true, nil) if err != nil { t.Fatalf("could not run scan: %v", err) } var keys []roachpb.Key for _, kv := range actualKVs { keys = append(keys, kv.Key) } expKeys := []roachpb.Key{ kvs[1].Key, } if !reflect.DeepEqual(expKeys, keys) { t.Errorf("expected keys %+v, got keys %+v", expKeys, keys) } }
// createRangeData creates sample range data in all possible areas of // the key space. Returns a slice of the encoded keys of all created // data. func createRangeData(t *testing.T, r *Replica) []engine.MVCCKey { ts0 := hlc.ZeroTimestamp ts := hlc.Timestamp{WallTime: 1} desc := r.Desc() keyTSs := []struct { key roachpb.Key ts hlc.Timestamp }{ {keys.AbortCacheKey(r.RangeID, testTxnID), ts0}, {keys.AbortCacheKey(r.RangeID, testTxnID2), ts0}, {keys.RangeFrozenStatusKey(r.RangeID), ts0}, {keys.RangeLastGCKey(r.RangeID), ts0}, {keys.RaftAppliedIndexKey(r.RangeID), ts0}, {keys.RaftTruncatedStateKey(r.RangeID), ts0}, {keys.LeaseAppliedIndexKey(r.RangeID), ts0}, {keys.RangeStatsKey(r.RangeID), ts0}, {keys.RaftHardStateKey(r.RangeID), ts0}, {keys.RaftLastIndexKey(r.RangeID), ts0}, {keys.RaftLogKey(r.RangeID, 1), ts0}, {keys.RaftLogKey(r.RangeID, 2), ts0}, {keys.RangeLastReplicaGCTimestampKey(r.RangeID), ts0}, {keys.RangeLastVerificationTimestampKey(r.RangeID), ts0}, {keys.RangeDescriptorKey(desc.StartKey), ts}, {keys.TransactionKey(roachpb.Key(desc.StartKey), uuid.NewV4()), ts0}, {keys.TransactionKey(roachpb.Key(desc.StartKey.Next()), uuid.NewV4()), ts0}, {keys.TransactionKey(fakePrevKey(desc.EndKey), uuid.NewV4()), ts0}, // TODO(bdarnell): KeyMin.Next() results in a key in the reserved system-local space. // Once we have resolved https://github.com/cockroachdb/cockroach/issues/437, // replace this with something that reliably generates the first valid key in the range. //{r.Desc().StartKey.Next(), ts}, // The following line is similar to StartKey.Next() but adds more to the key to // avoid falling into the system-local space. {append(append([]byte{}, desc.StartKey...), '\x02'), ts}, {fakePrevKey(r.Desc().EndKey), ts}, } keys := []engine.MVCCKey{} for _, keyTS := range keyTSs { if err := engine.MVCCPut(context.Background(), r.store.Engine(), nil, keyTS.key, keyTS.ts, roachpb.MakeValueFromString("value"), nil); err != nil { t.Fatal(err) } keys = append(keys, engine.MVCCKey{Key: keyTS.key, Timestamp: keyTS.ts}) } return keys }
// TestStoreResolveWriteIntent adds write intent and then verifies // that a put returns success and aborts intent's txn in the event the // pushee has lower priority. Othwerise, verifies that a // TransactionPushError is returned. func TestStoreResolveWriteIntent(t *testing.T) { defer leaktest.AfterTest(t) store, _, stopper := createTestStore(t) defer stopper.Stop() for i, resolvable := range []bool{true, false} { key := proto.Key(fmt.Sprintf("key-%d", i)) pusher := newTransaction("test", key, 1, proto.SERIALIZABLE, store.ctx.Clock) pushee := newTransaction("test", key, 1, proto.SERIALIZABLE, store.ctx.Clock) if resolvable { pushee.Priority = 1 pusher.Priority = 2 // Pusher will win. } else { pushee.Priority = 2 pusher.Priority = 1 // Pusher will lose. } // First lay down intent using the pushee's txn. pArgs := putArgs(key, []byte("value"), 1, store.StoreID()) pArgs.Timestamp = store.ctx.Clock.Now() pArgs.Txn = pushee if err := store.ExecuteCmd(context.Background(), proto.Call{Args: &pArgs, Reply: pArgs.CreateReply()}); err != nil { t.Fatal(err) } // Now, try a put using the pusher's txn. pArgs.Timestamp = store.ctx.Clock.Now() pArgs.Txn = pusher err := store.ExecuteCmd(context.Background(), proto.Call{Args: &pArgs, Reply: pArgs.CreateReply()}) if resolvable { if err != nil { t.Errorf("expected intent resolved; got unexpected error: %s", err) } txnKey := keys.TransactionKey(pushee.Key, pushee.ID) var txn proto.Transaction ok, err := engine.MVCCGetProto(store.Engine(), txnKey, proto.ZeroTimestamp, true, nil, &txn) if !ok || err != nil { t.Fatalf("not found or err: %s", err) } if txn.Status != proto.ABORTED { t.Errorf("expected pushee to be aborted; got %s", txn.Status) } } else { if rErr, ok := err.(*proto.TransactionPushError); !ok { t.Errorf("expected txn push error; got %s", err) } else if !bytes.Equal(rErr.PusheeTxn.ID, pushee.ID) { t.Errorf("expected txn to match pushee %q; got %s", pushee.ID, rErr) } // Trying again should fail again. if err = store.ExecuteCmd(context.Background(), proto.Call{Args: &pArgs, Reply: pArgs.CreateReply()}); err == nil { t.Errorf("expected another error on latent write intent but succeeded") } } } }
// createRangeData creates sample range data in all possible areas of // the key space. Returns a slice of the encoded keys of all created // data. func createRangeData(r *Replica, t *testing.T) []roachpb.EncodedKey { ts0 := roachpb.ZeroTimestamp ts := roachpb.Timestamp{WallTime: 1} keyTSs := []struct { key roachpb.Key ts roachpb.Timestamp }{ {keys.ResponseCacheKey(r.Desc().RangeID, &roachpb.ClientCmdID{WallTime: 1, Random: 1}), ts0}, {keys.ResponseCacheKey(r.Desc().RangeID, &roachpb.ClientCmdID{WallTime: 2, Random: 2}), ts0}, {keys.RaftHardStateKey(r.Desc().RangeID), ts0}, {keys.RaftLogKey(r.Desc().RangeID, 1), ts0}, {keys.RaftLogKey(r.Desc().RangeID, 2), ts0}, {keys.RangeGCMetadataKey(r.Desc().RangeID), ts0}, {keys.RangeLastVerificationTimestampKey(r.Desc().RangeID), ts0}, {keys.RangeStatsKey(r.Desc().RangeID), ts0}, {keys.RangeDescriptorKey(r.Desc().StartKey), ts}, {keys.TransactionKey(roachpb.Key(r.Desc().StartKey), []byte("1234")), ts0}, {keys.TransactionKey(roachpb.Key(r.Desc().StartKey.Next()), []byte("5678")), ts0}, {keys.TransactionKey(fakePrevKey(r.Desc().EndKey), []byte("2468")), ts0}, // TODO(bdarnell): KeyMin.Next() results in a key in the reserved system-local space. // Once we have resolved https://github.com/cockroachdb/cockroach/issues/437, // replace this with something that reliably generates the first valid key in the range. //{r.Desc().StartKey.Next(), ts}, // The following line is similar to StartKey.Next() but adds more to the key to // avoid falling into the system-local space. {append(append([]byte{}, r.Desc().StartKey...), '\x01'), ts}, {fakePrevKey(r.Desc().EndKey), ts}, } keys := []roachpb.EncodedKey{} for _, keyTS := range keyTSs { if err := engine.MVCCPut(r.store.Engine(), nil, keyTS.key, keyTS.ts, roachpb.MakeValueFromString("value"), nil); err != nil { t.Fatal(err) } keys = append(keys, engine.MVCCEncodeKey(keyTS.key)) if !keyTS.ts.Equal(ts0) { keys = append(keys, engine.MVCCEncodeVersionKey(keyTS.key, keyTS.ts)) } } return keys }
// processTransactionTable scans the transaction table and updates txnMap with // those transactions which are old and either PENDING or with intents // registered. In the first case we want to push the transaction so that it is // aborted, and in the second case we may have to resolve the intents success- // fully before GCing the entry. The transaction records which can be gc'ed are // returned separately and are not added to txnMap nor intentSpanMap. func (gcq *gcQueue) processTransactionTable(r *Replica, txnMap map[uuid.UUID]*roachpb.Transaction, cutoff roachpb.Timestamp) ([]roachpb.GCRequest_GCKey, error) { snap := r.store.Engine().NewSnapshot() defer snap.Close() var numResolveAttempts, numQueuedPushes int var gcKeys []roachpb.GCRequest_GCKey defer func() { gcq.eventLog.Infof(true, "attempted to resolve %d intents of %d gc'able transactions; queued %d txns for push", numResolveAttempts, len(gcKeys), numQueuedPushes) }() handleOne := func(kv roachpb.KeyValue) error { var txn roachpb.Transaction if err := kv.Value.GetProto(&txn); err != nil { return err } ts := txn.Timestamp if heartbeatTS := txn.LastHeartbeat; heartbeatTS != nil { ts.Forward(*heartbeatTS) } if !ts.Less(cutoff) { return nil } txnID := *txn.ID // The transaction record should be considered for removal. switch txn.Status { case roachpb.PENDING: // Marked as running, so we need to push it to abort it but won't // try to GC it in this cycle (for convenience). // TODO(tschottdorf): refactor so that we can GC PENDING entries // in the same cycle, but keeping the calls to pushTxn in a central // location (keeping it easy to batch them up in the future). numQueuedPushes++ txnMap[txnID] = &txn return nil case roachpb.ABORTED: // If we remove this transaction, it effectively still counts as // ABORTED (by design). So this can be GC'ed even if we can't // resolve the intents. // Note: Most aborted transaction weren't aborted by their client, // but instead by the coordinator - those will not have any intents // persisted, though they still might exist in the system. numResolveAttempts += len(txn.Intents) if err := r.store.intentResolver.resolveIntents(r.context(), r, roachpb.AsIntents(txn.Intents, &txn), true /* wait */, false /* !poison */); err != nil { log.Warningf("failed to resolve intents of aborted txn on gc: %s", err) } case roachpb.COMMITTED: // It's committed, so it doesn't need a push but we can only // GC it after its intents are resolved. numResolveAttempts += len(txn.Intents) if err := r.store.intentResolver.resolveIntents(r.context(), r, roachpb.AsIntents(txn.Intents, &txn), true /* wait */, false /* !poison */); err != nil { log.Warningf("unable to resolve intents of committed txn on gc: %s", err) // Returning the error here would abort the whole GC run, and // we don't want that. Instead, we simply don't GC this entry. return nil } default: panic(fmt.Sprintf("invalid transaction state: %s", txn)) } gcKeys = append(gcKeys, roachpb.GCRequest_GCKey{Key: kv.Key}) // zero timestamp return nil } startKey := keys.TransactionKey(roachpb.KeyMin, uuid.EmptyUUID) endKey := keys.TransactionKey(roachpb.KeyMax, uuid.EmptyUUID) _, err := engine.MVCCIterate(snap, startKey, endKey, roachpb.ZeroTimestamp, true /* consistent */, nil /* txn */, false /* !reverse */, func(kv roachpb.KeyValue) (bool, error) { return false, handleOne(kv) }) return gcKeys, err }
// TestStoreVerifyKeys checks that key length is enforced and // that end keys must sort >= start. func TestStoreVerifyKeys(t *testing.T) { defer leaktest.AfterTest(t) store, _, stopper := createTestStore(t) defer stopper.Stop() tooLongKey := proto.Key(strings.Repeat("x", proto.KeyMaxLength+1)) // Start with a too-long key on a get. gArgs := getArgs(tooLongKey, 1, store.StoreID()) if err := store.ExecuteCmd(context.Background(), proto.Call{Args: &gArgs, Reply: gArgs.CreateReply()}); err == nil { t.Fatal("expected error for key too long") } // Try a start key == KeyMax. gArgs.Key = proto.KeyMax if err := store.ExecuteCmd(context.Background(), proto.Call{Args: &gArgs, Reply: gArgs.CreateReply()}); err == nil { t.Fatal("expected error for start key == KeyMax") } // Try a get with an end key specified (get requires only a start key and should fail). gArgs.EndKey = proto.KeyMax if err := store.ExecuteCmd(context.Background(), proto.Call{Args: &gArgs, Reply: gArgs.CreateReply()}); err == nil { t.Fatal("expected error for end key specified on a non-range-based operation") } // Try a scan with too-long EndKey. sArgs := scanArgs(proto.KeyMin, tooLongKey, 1, store.StoreID()) if err := store.ExecuteCmd(context.Background(), proto.Call{Args: &sArgs, Reply: sArgs.CreateReply()}); err == nil { t.Fatal("expected error for end key too long") } // Try a scan with end key < start key. sArgs.Key = []byte("b") sArgs.EndKey = []byte("a") if err := store.ExecuteCmd(context.Background(), proto.Call{Args: &sArgs, Reply: sArgs.CreateReply()}); err == nil { t.Fatal("expected error for end key < start") } // Try a scan with start key == end key. sArgs.Key = []byte("a") sArgs.EndKey = sArgs.Key if err := store.ExecuteCmd(context.Background(), proto.Call{Args: &sArgs, Reply: sArgs.CreateReply()}); err == nil { t.Fatal("expected error for start == end key") } // Try a put to meta2 key which would otherwise exceed maximum key // length, but is accepted because of the meta prefix. meta2KeyMax := keys.MakeKey(keys.Meta2Prefix, proto.KeyMax) pArgs := putArgs(meta2KeyMax, []byte("value"), 1, store.StoreID()) if err := store.ExecuteCmd(context.Background(), proto.Call{Args: &pArgs, Reply: pArgs.CreateReply()}); err != nil { t.Fatalf("unexpected error on put to meta2 value: %s", err) } // Try to put a range descriptor record for a start key which is // maximum length. key := append([]byte{}, proto.KeyMax...) key[len(key)-1] = 0x01 pArgs = putArgs(keys.RangeDescriptorKey(key), []byte("value"), 1, store.StoreID()) if err := store.ExecuteCmd(context.Background(), proto.Call{Args: &pArgs, Reply: pArgs.CreateReply()}); err != nil { t.Fatalf("unexpected error on put to range descriptor for KeyMax value: %s", err) } // Try a put to txn record for a meta2 key (note that this doesn't // actually happen in practice, as txn records are not put directly, // but are instead manipulated only through txn methods). pArgs = putArgs(keys.TransactionKey(meta2KeyMax, []byte(uuid.NewUUID4())), []byte("value"), 1, store.StoreID()) if err := store.ExecuteCmd(context.Background(), proto.Call{Args: &pArgs, Reply: pArgs.CreateReply()}); err != nil { t.Fatalf("unexpected error on put to txn meta2 value: %s", err) } }
// TestStoreResolveWriteIntentNoTxn verifies that reads and writes // which are not part of a transaction can push intents. func TestStoreResolveWriteIntentNoTxn(t *testing.T) { defer leaktest.AfterTest(t) store, _, stopper := createTestStore(t) defer stopper.Stop() key := proto.Key("a") pushee := newTransaction("test", key, 1, proto.SERIALIZABLE, store.ctx.Clock) pushee.Priority = 0 // pushee should lose all conflicts // First, lay down intent from pushee. args := putArgs(key, []byte("value1"), 1, store.StoreID()) reply := args.CreateReply() args.Timestamp = pushee.Timestamp args.Txn = pushee if err := store.ExecuteCmd(context.Background(), proto.Call{Args: &args, Reply: reply}); err != nil { t.Fatal(err) } // Now, try to read outside a transaction. gArgs := getArgs(key, 1, store.StoreID()) gReply := gArgs.CreateReply().(*proto.GetResponse) gArgs.Timestamp = store.ctx.Clock.Now() gArgs.UserPriority = gogoproto.Int32(math.MaxInt32) if err := store.ExecuteCmd(context.Background(), proto.Call{Args: &gArgs, Reply: gReply}); err != nil { t.Errorf("expected read to succeed: %s", err) } else if gReply.Value != nil { t.Errorf("expected value to be nil, got %+v", gReply.Value) } // Next, try to write outside of a transaction. We will succeed in pushing txn. args.Timestamp = store.ctx.Clock.Now() args.Value.Bytes = []byte("value2") args.Txn = nil args.UserPriority = gogoproto.Int32(math.MaxInt32) if err := store.ExecuteCmd(context.Background(), proto.Call{Args: &args, Reply: reply}); err != nil { t.Errorf("expected success aborting pushee's txn; got %s", err) } // Read pushee's txn. txnKey := keys.TransactionKey(pushee.Key, pushee.ID) var txn proto.Transaction if ok, err := engine.MVCCGetProto(store.Engine(), txnKey, proto.ZeroTimestamp, true, nil, &txn); !ok || err != nil { t.Fatalf("not found or err: %s", err) } if txn.Status != proto.ABORTED { t.Errorf("expected pushee to be aborted; got %s", txn.Status) } // Verify that the pushee's timestamp was moved forward on // former read, since we have it available in write intent error. expTS := gArgs.Timestamp expTS.Logical++ if !txn.Timestamp.Equal(expTS) { t.Errorf("expected pushee timestamp pushed to %s; got %s", expTS, txn.Timestamp) } // Similarly, verify that pushee's priority was moved from 0 // to math.MaxInt32-1 during push. if txn.Priority != math.MaxInt32-1 { t.Errorf("expected pushee priority to be pushed to %d; got %d", math.MaxInt32-1, txn.Priority) } // Finally, try to end the pushee's transaction; it should have // been aborted. etArgs := endTxnArgs(pushee, true, 1, store.StoreID()) etArgs.Timestamp = pushee.Timestamp err := store.ExecuteCmd(context.Background(), proto.Call{Args: &etArgs, Reply: etArgs.CreateReply()}) if err == nil { t.Errorf("unexpected success committing transaction") } if _, ok := err.(*proto.TransactionAbortedError); !ok { t.Errorf("expected transaction aborted error; got %s", err) } }
func TestGCQueueTransactionTable(t *testing.T) { defer leaktest.AfterTest(t) const now time.Duration = 3 * 24 * time.Hour const tTxnThreshold = now - txnCleanupThreshold type spec struct { status roachpb.TransactionStatus ts time.Duration heartbeatTS time.Duration newStatus roachpb.TransactionStatus // -1 for GCed failResolve bool // do we want to fail resolves in this trial? expResolve bool // expect attempt at removing txn-persisted intents? expSeqGC bool // expect sequence cache entries removed? } // Describes the state of the Txn table before the test. testCases := map[string]spec{ // Too young, should not touch. "a": {roachpb.PENDING, tTxnThreshold + 1, 0, roachpb.PENDING, false, false, false}, // Old and pending, but still heartbeat (so no Push attempted; it would succeed). // No GC. "b": {roachpb.PENDING, 0, tTxnThreshold + 1, roachpb.PENDING, false, false, false}, // Old, pending and abandoned. Should push and abort it successfully, // but not GC it just yet (this is an artifact of the implementation). // The sequence cache gets cleaned up though. "c": {roachpb.PENDING, tTxnThreshold - 1, 0, roachpb.ABORTED, false, false, true}, // Old and aborted, should delete. "d": {roachpb.ABORTED, tTxnThreshold - 1, 0, -1, false, true, true}, // Committed and fresh, so no action. "e": {roachpb.COMMITTED, tTxnThreshold + 1, 0, roachpb.COMMITTED, false, false, false}, // Committed and old. It has an intent (like all tests here), which is // resolvable and hence we can GC. "f": {roachpb.COMMITTED, tTxnThreshold - 1, 0, -1, false, true, true}, // Same as the previous one, but we've rigged things so that the intent // resolution here will fail and consequently no GC is expected. "g": {roachpb.COMMITTED, tTxnThreshold - 1, 0, roachpb.COMMITTED, true, true, true}, } resolved := map[string][]roachpb.Span{} TestingCommandFilter = func(_ roachpb.StoreID, req roachpb.Request, _ roachpb.Header) error { if resArgs, ok := req.(*roachpb.ResolveIntentRequest); ok { id := string(resArgs.IntentTxn.Key) resolved[id] = append(resolved[id], roachpb.Span{ Key: resArgs.Key, EndKey: resArgs.EndKey, }) // We've special cased one test case. Note that the intent is still // counted in `resolved`. if testCases[id].failResolve { return util.Errorf("boom") } } return nil } defer func() { TestingCommandFilter = nil }() tc := testContext{} tc.Start(t) defer tc.Stop() tc.manualClock.Set(int64(now)) testIntents := []roachpb.Span{{Key: roachpb.Key("intent")}} txns := map[string]roachpb.Transaction{} var epo uint32 for strKey, test := range testCases { epo++ baseKey := roachpb.Key(strKey) txnClock := hlc.NewClock(hlc.NewManualClock(int64(test.ts)).UnixNano) txn := newTransaction("txn1", baseKey, 1, roachpb.SERIALIZABLE, txnClock) txn.Status = test.status txn.Intents = testIntents txn.LastHeartbeat = &roachpb.Timestamp{WallTime: int64(test.heartbeatTS)} txns[strKey] = *txn key := keys.TransactionKey(baseKey, txn.ID) if err := engine.MVCCPutProto(tc.engine, nil, key, roachpb.ZeroTimestamp, nil, txn); err != nil { t.Fatal(err) } seqTS := txn.Timestamp seqTS.Forward(*txn.LastHeartbeat) if err := tc.rng.sequence.Put(tc.engine, txn.ID, epo, 2*epo, txn.Key, seqTS, nil /* err */); err != nil { t.Fatal(err) } } // Run GC. gcQ := newGCQueue(tc.gossip) cfg := tc.gossip.GetSystemConfig() if cfg == nil { t.Fatal("nil config") } if err := gcQ.process(tc.clock.Now(), tc.rng, cfg); err != nil { t.Fatal(err) } util.SucceedsWithin(t, time.Second, func() error { for strKey, sp := range testCases { txn := &roachpb.Transaction{} key := keys.TransactionKey(roachpb.Key(strKey), txns[strKey].ID) ok, err := engine.MVCCGetProto(tc.engine, key, roachpb.ZeroTimestamp, true, nil, txn) if err != nil { return err } if expGC := (sp.newStatus == -1); expGC { if expGC != !ok { return fmt.Errorf("%s: expected gc: %t, but found %s\n%s", strKey, expGC, txn, roachpb.Key(strKey)) } } else if sp.newStatus != txn.Status { return fmt.Errorf("%s: expected status %s, but found %s", strKey, sp.newStatus, txn.Status) } var expIntents []roachpb.Span if sp.expResolve { expIntents = testIntents } if !reflect.DeepEqual(resolved[strKey], expIntents) { return fmt.Errorf("%s: unexpected intent resolutions:\nexpected: %s\nobserved: %s", strKey, expIntents, resolved[strKey]) } if kvs, err := tc.rng.sequence.GetAllTransactionID(tc.store.Engine(), txns[strKey].ID); err != nil { t.Fatal(err) } else if (len(kvs) != 0) == sp.expSeqGC { return fmt.Errorf("%s: expected sequence cache gc: %t, found %+v", strKey, sp.expSeqGC, kvs) } } return nil }) }
// EndTransaction either commits or aborts (rolls back) an extant // transaction according to the args.Commit parameter. func (r *Range) EndTransaction(batch engine.Engine, ms *engine.MVCCStats, args *proto.EndTransactionRequest, reply *proto.EndTransactionResponse) { if args.Txn == nil { reply.SetGoError(util.Errorf("no transaction specified to EndTransaction")) return } key := keys.TransactionKey(args.Txn.Key, args.Txn.ID) // Fetch existing transaction if possible. existTxn := &proto.Transaction{} ok, err := engine.MVCCGetProto(batch, key, proto.ZeroTimestamp, true, nil, existTxn) if err != nil { reply.SetGoError(err) return } // If the transaction record already exists, verify that we can either // commit it or abort it (according to args.Commit), and also that the // Timestamp and Epoch have not suffered regression. if ok { // Use the persisted transaction record as final transaction. reply.Txn = gogoproto.Clone(existTxn).(*proto.Transaction) if existTxn.Status == proto.COMMITTED { reply.SetGoError(proto.NewTransactionStatusError(existTxn, "already committed")) return } else if existTxn.Status == proto.ABORTED { reply.SetGoError(proto.NewTransactionAbortedError(existTxn)) return } else if args.Txn.Epoch < existTxn.Epoch { reply.SetGoError(proto.NewTransactionStatusError(existTxn, fmt.Sprintf("epoch regression: %d", args.Txn.Epoch))) return } else if args.Txn.Epoch == existTxn.Epoch && existTxn.Timestamp.Less(args.Txn.OrigTimestamp) { // The transaction record can only ever be pushed forward, so it's an // error if somehow the transaction record has an earlier timestamp // than the original transaction timestamp. reply.SetGoError(proto.NewTransactionStatusError(existTxn, fmt.Sprintf("timestamp regression: %s", args.Txn.OrigTimestamp))) return } // Take max of requested epoch and existing epoch. The requester // may have incremented the epoch on retries. if reply.Txn.Epoch < args.Txn.Epoch { reply.Txn.Epoch = args.Txn.Epoch } // Take max of requested priority and existing priority. This isn't // terribly useful, but we do it for completeness. if reply.Txn.Priority < args.Txn.Priority { reply.Txn.Priority = args.Txn.Priority } } else { // The transaction doesn't exist yet on disk; use the supplied version. reply.Txn = gogoproto.Clone(args.Txn).(*proto.Transaction) } // Take max of requested timestamp and possibly "pushed" txn // record timestamp as the final commit timestamp. if reply.Txn.Timestamp.Less(args.Timestamp) { reply.Txn.Timestamp = args.Timestamp } // Set transaction status to COMMITTED or ABORTED as per the // args.Commit parameter. if args.Commit { // If the isolation level is SERIALIZABLE, return a transaction // retry error if the commit timestamp isn't equal to the txn // timestamp. if args.Txn.Isolation == proto.SERIALIZABLE && !reply.Txn.Timestamp.Equal(args.Txn.OrigTimestamp) { reply.SetGoError(proto.NewTransactionRetryError(reply.Txn)) return } reply.Txn.Status = proto.COMMITTED } else { reply.Txn.Status = proto.ABORTED } // Persist the transaction record with updated status (& possibly timestamp). if err := engine.MVCCPutProto(batch, ms, key, proto.ZeroTimestamp, nil, reply.Txn); err != nil { reply.SetGoError(err) return } // Run triggers if successfully committed. Any failures running // triggers will set an error and prevent the batch from committing. if ct := args.InternalCommitTrigger; ct != nil { // Resolve any explicit intents. for _, key := range ct.Intents { if log.V(1) { log.Infof("resolving intent at %s on end transaction [%s]", key, reply.Txn.Status) } if err := engine.MVCCResolveWriteIntent(batch, ms, key, reply.Txn.Timestamp, reply.Txn); err != nil { reply.SetGoError(err) return } reply.Resolved = append(reply.Resolved, key) } // Run appropriate trigger. if reply.Txn.Status == proto.COMMITTED { if ct.SplitTrigger != nil { *ms = engine.MVCCStats{} // clear stats, as split will recompute from scratch. reply.SetGoError(r.splitTrigger(batch, ct.SplitTrigger)) } else if ct.MergeTrigger != nil { *ms = engine.MVCCStats{} // clear stats, as merge will recompute from scratch. reply.SetGoError(r.mergeTrigger(batch, ct.MergeTrigger)) } else if ct.ChangeReplicasTrigger != nil { reply.SetGoError(r.changeReplicasTrigger(ct.ChangeReplicasTrigger)) } } } }
// processTransactionTable scans the transaction table and updates txnMap with // those transactions which are old and either PENDING or with intents // registered. In the first case we want to push the transaction so that it is // aborted, and in the second case we may have to resolve the intents success- // fully before GCing the entry. The transaction records which can be gc'ed are // returned separately and are not added to txnMap nor intentSpanMap. func processTransactionTable( ctx context.Context, snap engine.Engine, desc *roachpb.RangeDescriptor, txnMap map[uuid.UUID]*roachpb.Transaction, cutoff roachpb.Timestamp, infoMu *lockableGCInfo, resolveIntents resolveFunc, ) ([]roachpb.GCRequest_GCKey, error) { infoMu.Lock() defer infoMu.Unlock() var gcKeys []roachpb.GCRequest_GCKey handleOne := func(kv roachpb.KeyValue) error { var txn roachpb.Transaction if err := kv.Value.GetProto(&txn); err != nil { return err } infoMu.TransactionSpanTotal++ if !txn.LastActive().Less(cutoff) { return nil } txnID := *txn.ID // The transaction record should be considered for removal. switch txn.Status { case roachpb.PENDING: // Marked as running, so we need to push it to abort it but won't // try to GC it in this cycle (for convenience). // TODO(tschottdorf): refactor so that we can GC PENDING entries // in the same cycle, but keeping the calls to pushTxn in a central // location (keeping it easy to batch them up in the future). infoMu.TransactionSpanGCPending++ txnMap[txnID] = &txn return nil case roachpb.ABORTED: // If we remove this transaction, it effectively still counts as // ABORTED (by design). So this can be GC'ed even if we can't // resolve the intents. // Note: Most aborted transaction weren't aborted by their client, // but instead by the coordinator - those will not have any intents // persisted, though they still might exist in the system. infoMu.TransactionSpanGCAborted++ func() { infoMu.Unlock() // intentional defer infoMu.Lock() if err := resolveIntents(roachpb.AsIntents(txn.Intents, &txn), true /* wait */, false /* !poison */); err != nil { log.Warningf("failed to resolve intents of aborted txn on gc: %s", err) } }() case roachpb.COMMITTED: // It's committed, so it doesn't need a push but we can only // GC it after its intents are resolved. if err := func() error { infoMu.Unlock() // intentional defer infoMu.Lock() return resolveIntents(roachpb.AsIntents(txn.Intents, &txn), true /* wait */, false /* !poison */) }(); err != nil { log.Warningf("unable to resolve intents of committed txn on gc: %s", err) // Returning the error here would abort the whole GC run, and // we don't want that. Instead, we simply don't GC this entry. return nil } infoMu.TransactionSpanGCCommitted++ default: panic(fmt.Sprintf("invalid transaction state: %s", txn)) } gcKeys = append(gcKeys, roachpb.GCRequest_GCKey{Key: kv.Key}) // zero timestamp return nil } startKey := keys.TransactionKey(desc.StartKey.AsRawKey(), uuid.EmptyUUID) endKey := keys.TransactionKey(desc.EndKey.AsRawKey(), uuid.EmptyUUID) _, err := engine.MVCCIterate(ctx, snap, startKey, endKey, roachpb.ZeroTimestamp, true /* consistent */, nil, /* txn */ false /* !reverse */, func(kv roachpb.KeyValue) (bool, error) { return false, handleOne(kv) }) return gcKeys, err }
// TestStoreVerifyKeys checks that key length is enforced and // that end keys must sort >= start. func TestStoreVerifyKeys(t *testing.T) { defer leaktest.AfterTest(t) store, _, stopper := createTestStore(t) defer stopper.Stop() tooLongKey := roachpb.Key(strings.Repeat("x", roachpb.KeyMaxLength+1)) // Start with a too-long key on a get. gArgs := getArgs(tooLongKey, 1, store.StoreID()) if _, err := client.SendWrapped(store, nil, &gArgs); !testutils.IsError(err, "exceeded") { t.Fatalf("unexpected error for key too long: %v", err) } // Try a start key == KeyMax. gArgs.Key = roachpb.KeyMax if _, err := client.SendWrapped(store, nil, &gArgs); !testutils.IsError(err, "must be less than KeyMax") { t.Fatalf("expected error for start key == KeyMax: %v", err) } // Try a get with an end key specified (get requires only a start key and should fail). gArgs.EndKey = roachpb.KeyMax if _, err := client.SendWrapped(store, nil, &gArgs); !testutils.IsError(err, "must be less than KeyMax") { t.Fatalf("unexpected error for end key specified on a non-range-based operation: %v", err) } // Try a scan with too-long EndKey. sArgs := scanArgs(roachpb.KeyMin, tooLongKey, 1, store.StoreID()) if _, err := client.SendWrapped(store, nil, &sArgs); !testutils.IsError(err, "length exceeded") { t.Fatalf("unexpected error for end key too long: %v", err) } // Try a scan with end key < start key. sArgs.Key = []byte("b") sArgs.EndKey = []byte("a") if _, err := client.SendWrapped(store, nil, &sArgs); !testutils.IsError(err, "must be greater than") { t.Fatalf("unexpected error for end key < start: %v", err) } // Try a scan with start key == end key. sArgs.Key = []byte("a") sArgs.EndKey = sArgs.Key if _, err := client.SendWrapped(store, nil, &sArgs); !testutils.IsError(err, "must be greater than") { t.Fatalf("unexpected error for start == end key: %v", err) } // Try a scan with range-local start key, but "regular" end key. sArgs.Key = keys.MakeRangeKey([]byte("test"), []byte("sffx"), nil) sArgs.EndKey = []byte("z") if _, err := client.SendWrapped(store, nil, &sArgs); !testutils.IsError(err, "range-local") { t.Fatalf("unexpected error for local start, non-local end key: %v", err) } // Try a put to meta2 key which would otherwise exceed maximum key // length, but is accepted because of the meta prefix. meta2KeyMax := keys.MakeKey(keys.Meta2Prefix, roachpb.KeyMax) pArgs := putArgs(meta2KeyMax, []byte("value"), 1, store.StoreID()) if _, err := client.SendWrapped(store, nil, &pArgs); err != nil { t.Fatalf("unexpected error on put to meta2 value: %s", err) } // Try to put a range descriptor record for a start key which is // maximum length. key := append([]byte{}, roachpb.KeyMax...) key[len(key)-1] = 0x01 pArgs = putArgs(keys.RangeDescriptorKey(key), []byte("value"), 1, store.StoreID()) if _, err := client.SendWrapped(store, nil, &pArgs); err != nil { t.Fatalf("unexpected error on put to range descriptor for KeyMax value: %s", err) } // Try a put to txn record for a meta2 key (note that this doesn't // actually happen in practice, as txn records are not put directly, // but are instead manipulated only through txn methods). pArgs = putArgs(keys.TransactionKey(meta2KeyMax, []byte(uuid.NewUUID4())), []byte("value"), 1, store.StoreID()) if _, err := client.SendWrapped(store, nil, &pArgs); err != nil { t.Fatalf("unexpected error on put to txn meta2 value: %s", err) } }
// processIntentsAsync asynchronously processes intents which were // encountered during another command but did not interfere with the // execution of that command. This occurs in two cases: inconsistent // reads and EndTransaction (which queues its own external intents for // processing via this method). The two cases are handled somewhat // differently and would be better served by different entry points, // but combining them simplifies the plumbing necessary in Replica. func (ir *intentResolver) processIntentsAsync(r *Replica, intents []intentsWithArg) { if len(intents) == 0 { return } now := r.store.Clock().Now() ctx := context.TODO() stopper := r.store.Stopper() for _, item := range intents { if item.args.Method() != roachpb.EndTransaction { if err := stopper.RunLimitedAsyncTask(ir.sem, func() { // Everything here is best effort; give up rather than waiting // too long (helps avoid deadlocks during test shutdown, // although this is imperfect due to the use of an // uninterruptible WaitGroup.Wait in beginCmds). ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() h := roachpb.Header{Timestamp: now} resolveIntents, pushErr := ir.maybePushTransactions(ctxWithTimeout, item.intents, h, roachpb.PUSH_TOUCH, true /* skipInFlight */) // resolveIntents with poison=true because we're resolving // intents outside of the context of an EndTransaction. // // Naively, it doesn't seem like we need to poison the abort // cache since we're pushing with PUSH_TOUCH - meaning that // the primary way our Push leads to aborting intents is that // of the transaction having timed out (and thus presumably no // client being around any more, though at the time of writing // we don't guarantee that). But there's another path in which // the Push comes back successful, namely that of the // transaction already having been aborted by someone else, in // which case the client may still be running. Thus, we must // poison. if err := ir.resolveIntents(ctxWithTimeout, resolveIntents, true /* wait */, true /* poison */); err != nil { log.Warningf(context.TODO(), "%s: failed to resolve intents: %s", r, err) return } if pushErr != nil { log.Warningf(context.TODO(), "%s: failed to push during intent resolution: %s", r, pushErr) return } }); err != nil { log.Warningf(context.TODO(), "failed to resolve intents: %s", err) return } } else { // EndTransaction if err := stopper.RunLimitedAsyncTask(ir.sem, func() { ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() // For EndTransaction, we know the transaction is finalized so // we can skip the push and go straight to the resolve. // // This mechanism assumes that when an EndTransaction fails, // the client makes no assumptions about the result. For // example, an attempt to explicitly rollback the transaction // may succeed (triggering this code path), but the result may // not make it back to the client. if err := ir.resolveIntents(ctxWithTimeout, item.intents, true /* wait */, false /* !poison */); err != nil { log.Warningf(context.TODO(), "%s: failed to resolve intents: %s", r, err) return } // We successfully resolved the intents, so we're able to GC from // the txn span directly. b := &client.Batch{} txn := item.intents[0].Txn txnKey := keys.TransactionKey(txn.Key, txn.ID) // This is pretty tricky. Transaction keys are range-local and // so they are encoded specially. The key range addressed by // (txnKey, txnKey.Next()) might be empty (since Next() does // not imply monotonicity on the address side). Instead, we // send this request to a range determined using the resolved // transaction anchor, i.e. if the txn is anchored on // /Local/RangeDescriptor/"a"/uuid, the key range below would // be ["a", "a\x00"). However, the first range is special again // because the above procedure results in KeyMin, but we need // at least KeyLocalMax. // // #7880 will address this by making GCRequest less special and // thus obviating the need to cook up an artificial range here. var gcArgs roachpb.GCRequest { key := keys.MustAddr(txn.Key) if localMax := keys.MustAddr(keys.LocalMax); key.Less(localMax) { key = localMax } endKey := key.Next() gcArgs.Span = roachpb.Span{ Key: key.AsRawKey(), EndKey: endKey.AsRawKey(), } } gcArgs.Keys = append(gcArgs.Keys, roachpb.GCRequest_GCKey{ Key: txnKey, }) b.AddRawRequest(&gcArgs) if err := ir.store.db.Run(b); err != nil { log.Warningf( context.TODO(), "could not GC completed transaction anchored at %s: %s", roachpb.Key(txn.Key), err, ) return } }); err != nil { log.Warningf(context.TODO(), "failed to resolve intents: %s", err) return } } } }
// processIntentsAsync asynchronously processes intents which were // encountered during another command but did not interfere with the // execution of that command. This occurs in two cases: inconsistent // reads and EndTransaction (which queues its own external intents for // processing via this method). The two cases are handled somewhat // differently and would be better served by different entry points, // but combining them simplifies the plumbing necessary in Replica. func (ir *intentResolver) processIntentsAsync(r *Replica, intents []intentsWithArg) { if len(intents) == 0 { return } now := r.store.Clock().Now() ctx := r.context(context.TODO()) stopper := r.store.Stopper() for _, item := range intents { if item.args.Method() != roachpb.EndTransaction { stopper.RunLimitedAsyncTask(ir.sem, func() { // Everything here is best effort; give up rather than waiting // too long (helps avoid deadlocks during test shutdown, // although this is imperfect due to the use of an // uninterruptible WaitGroup.Wait in beginCmds). ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() h := roachpb.Header{Timestamp: now} resolveIntents, pushErr := ir.maybePushTransactions(ctxWithTimeout, item.intents, h, roachpb.PUSH_TOUCH, true /* skipInFlight */) // resolveIntents with poison=true because we're resolving // intents outside of the context of an EndTransaction. // // Naively, it doesn't seem like we need to poison the abort // cache since we're pushing with PUSH_TOUCH - meaning that // the primary way our Push leads to aborting intents is that // of the transaction having timed out (and thus presumably no // client being around any more, though at the time of writing // we don't guarantee that). But there's another path in which // the Push comes back successful, namely that of the // transaction already having been aborted by someone else, in // which case the client may still be running. Thus, we must // poison. if err := ir.resolveIntents(ctxWithTimeout, r, resolveIntents, true /* wait */, true /* poison */); err != nil { log.Warningc(ctxWithTimeout, "failed to resolve intents: %s", err) return } if pushErr != nil { log.Warningc(ctxWithTimeout, "failed to push during intent resolution: %s", pushErr) return } }) } else { // EndTransaction stopper.RunLimitedAsyncTask(ir.sem, func() { ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() // For EndTransaction, we know the transaction is finalized so // we can skip the push and go straight to the resolve. // // This mechanism assumes that when an EndTransaction fails, // the client makes no assumptions about the result. For // example, an attempt to explicitly rollback the transaction // may succeed (triggering this code path), but the result may // not make it back to the client. if err := ir.resolveIntents(ctxWithTimeout, r, item.intents, true /* wait */, false /* !poison */); err != nil { log.Warningc(ctxWithTimeout, "failed to resolve intents: %s", err) return } // We successfully resolved the intents, so we're able to GC from // the txn span directly. var ba roachpb.BatchRequest ba.Timestamp = now txn := item.intents[0].Txn gcArgs := roachpb.GCRequest{ Span: roachpb.Span{ Key: r.Desc().StartKey.AsRawKey(), EndKey: r.Desc().EndKey.AsRawKey(), }, } gcArgs.Keys = append(gcArgs.Keys, roachpb.GCRequest_GCKey{ Key: keys.TransactionKey(txn.Key, txn.ID), }) ba.Add(&gcArgs) if _, pErr := r.addWriteCmd(ctxWithTimeout, ba, nil /* nil */); pErr != nil { log.Warningf("could not GC completed transaction: %s", pErr) } }) } } }
// InternalPushTxn resolves conflicts between concurrent txns (or // between a non-transactional reader or writer and a txn) in several // ways depending on the statuses and priorities of the conflicting // transactions. The InternalPushTxn operation is invoked by a // "pusher" (the writer trying to abort a conflicting txn or the // reader trying to push a conflicting txn's commit timestamp // forward), who attempts to resolve a conflict with a "pushee" // (args.PushTxn -- the pushee txn whose intent(s) caused the // conflict). // // Txn already committed/aborted: If pushee txn is committed or // aborted return success. // // Txn Timeout: If pushee txn entry isn't present or its LastHeartbeat // timestamp isn't set, use PushTxn.Timestamp as LastHeartbeat. If // current time - LastHeartbeat > 2 * DefaultHeartbeatInterval, then // the pushee txn should be either pushed forward, aborted, or // confirmed not pending, depending on value of Request.PushType. // // Old Txn Epoch: If persisted pushee txn entry has a newer Epoch than // PushTxn.Epoch, return success, as older epoch may be removed. // // Lower Txn Priority: If pushee txn has a lower priority than pusher, // adjust pushee's persisted txn depending on value of // args.PushType. If args.PushType is ABORT_TXN, set txn.Status to // ABORTED, and priority to one less than the pusher's priority and // return success. If args.PushType is PUSH_TIMESTAMP, set // txn.Timestamp to pusher's Timestamp + 1 (note that we use the // pusher's Args.Timestamp, not Txn.Timestamp because the args // timestamp can advance during the txn). // // Higher Txn Priority: If pushee txn has a higher priority than // pusher, return TransactionPushError. Transaction will be retried // with priority one less than the pushee's higher priority. func (r *Range) InternalPushTxn(batch engine.Engine, ms *engine.MVCCStats, args *proto.InternalPushTxnRequest, reply *proto.InternalPushTxnResponse) { if !bytes.Equal(args.Key, args.PusheeTxn.Key) { reply.SetGoError(util.Errorf("request key %s should match pushee's txn key %s", args.Key, args.PusheeTxn.Key)) return } key := keys.TransactionKey(args.PusheeTxn.Key, args.PusheeTxn.ID) // Fetch existing transaction if possible. existTxn := &proto.Transaction{} ok, err := engine.MVCCGetProto(batch, key, proto.ZeroTimestamp, true /* consistent */, nil /* txn */, existTxn) if err != nil { reply.SetGoError(err) return } if ok { // Start with the persisted transaction record as final transaction. reply.PusheeTxn = gogoproto.Clone(existTxn).(*proto.Transaction) // Upgrade the epoch, timestamp and priority as necessary. if reply.PusheeTxn.Epoch < args.PusheeTxn.Epoch { reply.PusheeTxn.Epoch = args.PusheeTxn.Epoch } reply.PusheeTxn.Timestamp.Forward(args.PusheeTxn.Timestamp) if reply.PusheeTxn.Priority < args.PusheeTxn.Priority { reply.PusheeTxn.Priority = args.PusheeTxn.Priority } } else { // Some sanity checks for case where we don't find a transaction record. if args.PusheeTxn.LastHeartbeat != nil { reply.SetGoError(proto.NewTransactionStatusError(&args.PusheeTxn, "no txn persisted, yet intent has heartbeat")) return } else if args.PusheeTxn.Status != proto.PENDING { reply.SetGoError(proto.NewTransactionStatusError(&args.PusheeTxn, fmt.Sprintf("no txn persisted, yet intent has status %s", args.PusheeTxn.Status))) return } // The transaction doesn't exist yet on disk; use the supplied version. reply.PusheeTxn = gogoproto.Clone(&args.PusheeTxn).(*proto.Transaction) } // If already committed or aborted, return success. if reply.PusheeTxn.Status != proto.PENDING { // Trivial noop. return } // If we're trying to move the timestamp forward, and it's already // far enough forward, return success. if args.PushType == proto.PUSH_TIMESTAMP && args.Timestamp.Less(reply.PusheeTxn.Timestamp) { // Trivial noop. return } // pusherWins bool is true in the event the pusher prevails. var pusherWins bool // If there's no incoming transaction, the pusher is non-transactional. // We make a random priority, biased by specified // args.Header().UserPriority in this case. var priority int32 if args.Txn != nil { priority = args.Txn.Priority } else { // Make sure we have a deterministic random number when generating // a priority for this txn-less request, so all replicas see same priority. randGen := rand.New(rand.NewSource(int64(reply.PusheeTxn.Priority) ^ args.Timestamp.WallTime)) priority = proto.MakePriority(randGen, args.GetUserPriority()) } // Check for txn timeout. if reply.PusheeTxn.LastHeartbeat == nil { reply.PusheeTxn.LastHeartbeat = &reply.PusheeTxn.Timestamp } if args.Now.Equal(proto.ZeroTimestamp) { reply.SetGoError(util.Error("the field Now must be provided")) return } // Compute heartbeat expiration (all replicas must see the same result). expiry := args.Now expiry.Forward(args.Timestamp) // if Timestamp is ahead, use that expiry.WallTime -= 2 * DefaultHeartbeatInterval.Nanoseconds() if reply.PusheeTxn.LastHeartbeat.Less(expiry) { if log.V(1) { log.Infof("pushing expired txn %s", reply.PusheeTxn) } pusherWins = true } else if reply.PusheeTxn.Isolation == proto.SNAPSHOT && args.PushType == proto.PUSH_TIMESTAMP { if log.V(1) { log.Infof("pushing timestamp for snapshot isolation txn") } pusherWins = true } else if args.PushType == proto.CLEANUP_TXN { // If just attempting to cleanup old or already-committed txns, don't push. pusherWins = false } else if reply.PusheeTxn.Priority < priority || (reply.PusheeTxn.Priority == priority && args.Txn != nil && args.Txn.Timestamp.Less(reply.PusheeTxn.Timestamp)) { // Pusher wins based on priority; if priorities are equal, order // by lower txn timestamp. if log.V(1) { log.Infof("pushing intent from txn with lower priority %s vs %d", reply.PusheeTxn, priority) } pusherWins = true } if !pusherWins { err := proto.NewTransactionPushError(args.Txn, reply.PusheeTxn) if log.V(1) { log.Info(err) } reply.SetGoError(err) return } // Upgrade priority of pushed transaction to one less than pusher's. reply.PusheeTxn.UpgradePriority(priority - 1) // If aborting transaction, set new status and return success. if args.PushType == proto.ABORT_TXN { reply.PusheeTxn.Status = proto.ABORTED } else if args.PushType == proto.PUSH_TIMESTAMP { // Otherwise, update timestamp to be one greater than the request's timestamp. reply.PusheeTxn.Timestamp = args.Timestamp reply.PusheeTxn.Timestamp.Logical++ } // Persist the pushed transaction using zero timestamp for inline value. if err := engine.MVCCPutProto(batch, ms, key, proto.ZeroTimestamp, nil, reply.PusheeTxn); err != nil { reply.SetGoError(err) return } }
func TestGCQueueTransactionTable(t *testing.T) { defer leaktest.AfterTest(t)() const now time.Duration = 3 * 24 * time.Hour const gcTxnAndAC = now - txnCleanupThreshold const gcACOnly = now - abortCacheAgeThreshold if gcTxnAndAC >= gcACOnly { t.Fatalf("test assumption violated due to changing constants; needs adjustment") } type spec struct { status roachpb.TransactionStatus orig time.Duration hb time.Duration // last heartbeat (none if ZeroTimestamp) newStatus roachpb.TransactionStatus // -1 for GCed failResolve bool // do we want to fail resolves in this trial? expResolve bool // expect attempt at removing txn-persisted intents? expAbortGC bool // expect abort cache entries removed? } // Describes the state of the Txn table before the test. // Many of the abort cache entries deleted wouldn't even be there, so don't // be confused by that. testCases := map[string]spec{ // Too young, should not touch. "aa": { status: roachpb.PENDING, orig: gcACOnly + 1, newStatus: roachpb.PENDING, }, // A little older, so the AbortCache gets cleaned up. "ab": { status: roachpb.PENDING, orig: gcTxnAndAC + 1, newStatus: roachpb.PENDING, expAbortGC: true, }, // Old and pending, but still heartbeat (so no Push attempted; it would succeed). // It's old enough to delete the abort cache entry though. "ba": { status: roachpb.PENDING, hb: gcTxnAndAC + 1, newStatus: roachpb.PENDING, expAbortGC: true, }, // Not old enough for Txn GC, but old enough to remove the abort cache entry. "bb": { status: roachpb.ABORTED, orig: gcACOnly - 1, newStatus: roachpb.ABORTED, expAbortGC: true, }, // Old, pending and abandoned. Should push and abort it successfully, // but not GC it just yet (this is an artifact of the implementation). // The abort cache gets cleaned up though. "c": { status: roachpb.PENDING, orig: gcTxnAndAC - 1, newStatus: roachpb.ABORTED, expAbortGC: true, }, // Old and aborted, should delete. "d": { status: roachpb.ABORTED, orig: gcTxnAndAC - 1, newStatus: -1, expResolve: true, expAbortGC: true, }, // Committed and fresh, so no action. But the abort cache entry is old // enough to be discarded. "e": { status: roachpb.COMMITTED, orig: gcTxnAndAC + 1, newStatus: roachpb.COMMITTED, expAbortGC: true, }, // Committed and old. It has an intent (like all tests here), which is // resolvable and hence we can GC. "f": { status: roachpb.COMMITTED, orig: gcTxnAndAC - 1, newStatus: -1, expResolve: true, expAbortGC: true, }, // Same as the previous one, but we've rigged things so that the intent // resolution here will fail and consequently no GC is expected. "g": { status: roachpb.COMMITTED, orig: gcTxnAndAC - 1, newStatus: roachpb.COMMITTED, failResolve: true, expResolve: true, expAbortGC: true, }, } resolved := map[string][]roachpb.Span{} tc := testContext{} tsc := TestStoreContext() tsc.TestingKnobs.TestingCommandFilter = func(filterArgs storagebase.FilterArgs) *roachpb.Error { if resArgs, ok := filterArgs.Req.(*roachpb.ResolveIntentRequest); ok { id := string(resArgs.IntentTxn.Key) resolved[id] = append(resolved[id], roachpb.Span{ Key: resArgs.Key, EndKey: resArgs.EndKey, }) // We've special cased one test case. Note that the intent is still // counted in `resolved`. if testCases[id].failResolve { return roachpb.NewErrorWithTxn(util.Errorf("boom"), filterArgs.Hdr.Txn) } } return nil } tc.StartWithStoreContext(t, tsc) defer tc.Stop() tc.manualClock.Set(int64(now)) outsideKey := tc.rng.Desc().EndKey.Next().AsRawKey() testIntents := []roachpb.Span{{Key: roachpb.Key("intent")}} txns := map[string]roachpb.Transaction{} for strKey, test := range testCases { baseKey := roachpb.Key(strKey) txnClock := hlc.NewClock(hlc.NewManualClock(int64(test.orig)).UnixNano) txn := newTransaction("txn1", baseKey, 1, enginepb.SERIALIZABLE, txnClock) txn.Status = test.status txn.Intents = testIntents if test.hb > 0 { txn.LastHeartbeat = &hlc.Timestamp{WallTime: int64(test.hb)} } // Set a high Timestamp to make sure it does not matter. Only // OrigTimestamp (and heartbeat) are used for GC decisions. txn.Timestamp.Forward(hlc.MaxTimestamp) txns[strKey] = *txn for _, addrKey := range []roachpb.Key{baseKey, outsideKey} { key := keys.TransactionKey(addrKey, txn.ID) if err := engine.MVCCPutProto(context.Background(), tc.engine, nil, key, hlc.ZeroTimestamp, nil, txn); err != nil { t.Fatal(err) } } entry := roachpb.AbortCacheEntry{Key: txn.Key, Timestamp: txn.LastActive()} if err := tc.rng.abortCache.Put(context.Background(), tc.engine, nil, txn.ID, &entry); err != nil { t.Fatal(err) } } // Run GC. gcQ := newGCQueue(tc.gossip) cfg, ok := tc.gossip.GetSystemConfig() if !ok { t.Fatal("config not set") } if err := gcQ.process(tc.clock.Now(), tc.rng, cfg); err != nil { t.Fatal(err) } util.SucceedsSoon(t, func() error { for strKey, sp := range testCases { txn := &roachpb.Transaction{} key := keys.TransactionKey(roachpb.Key(strKey), txns[strKey].ID) ok, err := engine.MVCCGetProto(context.Background(), tc.engine, key, hlc.ZeroTimestamp, true, nil, txn) if err != nil { return err } if expGC := (sp.newStatus == -1); expGC { if expGC != !ok { return fmt.Errorf("%s: expected gc: %t, but found %s\n%s", strKey, expGC, txn, roachpb.Key(strKey)) } } else if sp.newStatus != txn.Status { return fmt.Errorf("%s: expected status %s, but found %s", strKey, sp.newStatus, txn.Status) } var expIntents []roachpb.Span if sp.expResolve { expIntents = testIntents } if !reflect.DeepEqual(resolved[strKey], expIntents) { return fmt.Errorf("%s: unexpected intent resolutions:\nexpected: %s\nobserved: %s", strKey, expIntents, resolved[strKey]) } entry := &roachpb.AbortCacheEntry{} abortExists, err := tc.rng.abortCache.Get(context.Background(), tc.store.Engine(), txns[strKey].ID, entry) if err != nil { t.Fatal(err) } if (abortExists == false) != sp.expAbortGC { return fmt.Errorf("%s: expected abort cache gc: %t, found %+v", strKey, sp.expAbortGC, entry) } } return nil }) outsideTxnPrefix := keys.TransactionKey(outsideKey, uuid.EmptyUUID) outsideTxnPrefixEnd := keys.TransactionKey(outsideKey.Next(), uuid.EmptyUUID) var count int if _, err := engine.MVCCIterate(context.Background(), tc.store.Engine(), outsideTxnPrefix, outsideTxnPrefixEnd, hlc.ZeroTimestamp, true, nil, false, func(roachpb.KeyValue) (bool, error) { count++ return false, nil }); err != nil { t.Fatal(err) } if exp := len(testCases); exp != count { t.Fatalf("expected the %d external transaction entries to remain untouched, "+ "but only %d are left", exp, count) } }
// processIntentsAsync asynchronously processes intents which were // encountered during another command but did not interfere with the // execution of that command. This occurs in two cases: inconsistent // reads and EndTransaction (which queues its own external intents for // processing via this method). The two cases are handled somewhat // differently and would be better served by different entry points, // but combining them simplifies the plumbing necessary in Replica. func (ir *intentResolver) processIntentsAsync(r *Replica, intents []intentsWithArg) { if len(intents) == 0 { return } now := r.store.Clock().Now() ctx := r.context() stopper := r.store.Stopper() for _, item := range intents { if item.args.Method() != roachpb.EndTransaction { stopper.RunLimitedAsyncTask(ir.sem, func() { // Everything here is best effort; give up rather than waiting // too long (helps avoid deadlocks during test shutdown, // although this is imperfect due to the use of an // uninterruptible WaitGroup.Wait in beginCmds). ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() h := roachpb.Header{Timestamp: now} resolveIntents, pushErr := ir.maybePushTransactions(ctxWithTimeout, item.intents, h, roachpb.PUSH_TOUCH, true /* skipInFlight */) if pErr := ir.resolveIntents(ctxWithTimeout, r, resolveIntents, true /* wait */, false /* TODO(tschottdorf): #5088 */); pErr != nil { log.Warningc(ctxWithTimeout, "failed to resolve intents: %s", pErr) return } if pushErr != nil { log.Warningc(ctxWithTimeout, "failed to push during intent resolution: %s", pushErr) return } }) } else { // EndTransaction stopper.RunLimitedAsyncTask(ir.sem, func() { ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() // For EndTransaction, we know the transaction is finalized so // we can skip the push and go straight to the resolve. if pErr := ir.resolveIntents(ctxWithTimeout, r, item.intents, true /* wait */, false /* TODO(tschottdorf): #5088 */); pErr != nil { log.Warningc(ctxWithTimeout, "failed to resolve intents: %s", pErr) return } // We successfully resolved the intents, so we're able to GC from // the txn span directly. Note that the sequence cache was cleared // out synchronously with EndTransaction (see comments within for // an explanation of why that is kosher). // // Note that we poisoned the sequence caches on the external ranges // above. This may seem counter-intuitive, but it's actually // necessary: Assume a transaction has committed here, with two // external intents, and assume that we did not poison. Normally, // these two intents would be resolved in the same batch, but that // is not guaranteed (for example, if DistSender has a stale // descriptor after a Merge). When resolved separately, the first // ResolveIntent would clear out the sequence cache; an individual // write on the second (still present) intent could then be // replayed and would resolve to a real value (at least for a // window of time unless we delete the local txn entry). That's not // OK for non-idempotent commands such as Increment. // TODO(tschottdorf): We should have another side effect on // MVCCResolveIntent (on commit/abort): If it were able to remove // the txn from its corresponding entries in the timestamp cache, // no more replays at the same timestamp would be possible. This // appears to be a useful performance optimization; we could then // not poison on EndTransaction. In fact, the above mechanism // could be an effective alternative to sequence-cache based // poisoning (or the whole sequence cache?) itself. // // TODO(tschottdorf): down the road, can probably unclog the system // here by batching up a bunch of those GCRequests before proposing. var ba roachpb.BatchRequest txn := item.intents[0].Txn gcArgs := roachpb.GCRequest{ Span: roachpb.Span{ Key: r.Desc().StartKey.AsRawKey(), EndKey: r.Desc().EndKey.AsRawKey(), }, } gcArgs.Keys = append(gcArgs.Keys, roachpb.GCRequest_GCKey{ Key: keys.TransactionKey(txn.Key, txn.ID), }) ba.Add(&gcArgs) if _, pErr := r.addWriteCmd(ctxWithTimeout, ba, nil /* nil */); pErr != nil { log.Warningf("could not GC completed transaction: %s", pErr) } }) } } }