// getInternal implements the actual logic of get function. // The values of multiple versions for the given key should // be organized as follows: // ... // keyA : keyMetatata of keyA // keyA_Timestamp_n : value of version_n // keyA_Timestamp_n-1 : value of version_n-1 // ... // keyA_Timestamp_0 : value of version_0 // keyB : keyMetadata of keyB // ... func (mvcc *MVCC) getInternal(key Key, timestamp hlc.Timestamp, txnID string) ([]byte, hlc.Timestamp, string, error) { keyMetadata := &keyMetadata{} ok, err := GetI(mvcc.engine, key, keyMetadata) if err != nil || !ok { return nil, hlc.Timestamp{}, "", err } // If the read timestamp is greater than the latest one, we can just // fetch the value without a scan. if !timestamp.Less(keyMetadata.Timestamp) { if len(keyMetadata.TxnID) > 0 && (len(txnID) == 0 || keyMetadata.TxnID != txnID) { return nil, hlc.Timestamp{}, "", &writeIntentError{TxnID: keyMetadata.TxnID} } latestKey := mvccEncodeKey(key, keyMetadata.Timestamp) val, err := mvcc.engine.Get(latestKey) return val, keyMetadata.Timestamp, keyMetadata.TxnID, err } nextKey := mvccEncodeKey(key, timestamp) // We use the PrefixEndKey(key) as the upper bound for scan. // If there is no other version after nextKey, it won't return // the value of the next key. kvs, err := mvcc.engine.Scan(nextKey, PrefixEndKey(key), 1) if len(kvs) > 0 { _, ts := mvccDecodeKey(kvs[0].Key) return kvs[0].Value, ts, "", err } return nil, hlc.Timestamp{}, "", err }
// UpdateDeadlineMaybe sets the transactions deadline to the lower of the // current one (if any) and the passed value. func (txn *Txn) UpdateDeadlineMaybe(deadline hlc.Timestamp) bool { if txn.deadline == nil || deadline.Less(*txn.deadline) { txn.deadline = &deadline return true } return false }
func replicaGCShouldQueueImpl( now, lastCheck, lastActivity hlc.Timestamp, isCandidate bool, ) (bool, float64) { timeout := ReplicaGCQueueInactivityThreshold var priority float64 if isCandidate { // If the range is a candidate (which happens if its former replica set // ignores it), let it expire much earlier. timeout = ReplicaGCQueueCandidateTimeout priority++ } else if now.Less(lastCheck.Add(ReplicaGCQueueInactivityThreshold.Nanoseconds(), 0)) { // Return false immediately if the previous check was less than the // check interval in the past. Note that we don't do this is the // replica is in candidate state, in which case we want to be more // aggressive - a failed rebalance attempt could have checked this // range, and candidate state suggests that a retry succeeded. See // #7489. return false, 0 } shouldQ := lastActivity.Add(timeout.Nanoseconds(), 0).Less(now) if !shouldQ { return false, 0 } return shouldQ, priority }
// UpdateObservedTimestamp stores a timestamp off a node's clock for future // operations in the transaction. When multiple calls are made for a single // nodeID, the lowest timestamp prevails. func (t *Transaction) UpdateObservedTimestamp(nodeID NodeID, maxTS hlc.Timestamp) { if t.ObservedTimestamps == nil { t.ObservedTimestamps = make(map[NodeID]hlc.Timestamp) } if ts, ok := t.ObservedTimestamps[nodeID]; !ok || maxTS.Less(ts) { t.ObservedTimestamps[nodeID] = maxTS } }
// isAsOf analyzes a select statement to bypass the logic in newPlan(), // since that requires the transaction to be started already. If the returned // timestamp is not nil, it is the timestamp to which a transaction should // be set. func isAsOf(planMaker *planner, stmt parser.Statement, max hlc.Timestamp) (*hlc.Timestamp, error) { s, ok := stmt.(*parser.Select) if !ok { return nil, nil } sc, ok := s.Select.(*parser.SelectClause) if !ok { return nil, nil } if len(sc.From) != 1 { return nil, nil } ate, ok := sc.From[0].(*parser.AliasedTableExpr) if !ok { return nil, nil } if ate.AsOf.Expr == nil { return nil, nil } te, err := ate.AsOf.Expr.TypeCheck(nil, parser.TypeString) if err != nil { return nil, err } d, err := te.Eval(&planMaker.evalCtx) if err != nil { return nil, err } ds, ok := d.(*parser.DString) if !ok { return nil, fmt.Errorf("AS OF SYSTEM TIME expected string, got %s", ds.Type()) } // Allow nanosecond precision because the timestamp is only used by the // system and won't be returned to the user over pgwire. dt, err := parser.ParseDTimestamp(string(*ds), planMaker.session.Location, time.Nanosecond) if err != nil { return nil, err } ts := hlc.Timestamp{ WallTime: dt.Time.UnixNano(), } if max.Less(ts) { return nil, fmt.Errorf("cannot specify timestamp in the future") } return &ts, nil }
// Covers returns true if the given timestamp can be served by the Lease. // This is the case if the timestamp precedes the Lease's stasis period. // Note that the fact that a lease convers a timestamp is not enough for the // holder of the lease to be able to serve a read with that timestamp; // pendingLeaderLeaseRequest.TransferInProgress() should also be consulted to // account for possible lease transfers. func (l Lease) Covers(timestamp hlc.Timestamp) bool { return timestamp.Less(l.StartStasis) }
// add the specified timestamp to the cache as covering the range of // keys from start to end. If end is nil, the range covers the start // key only. txnID is nil for no transaction. readTSCache specifies // whether the command adding this timestamp should update the read // timestamp; false to update the write timestamp cache. func (tc *timestampCache) add( start, end roachpb.Key, timestamp hlc.Timestamp, txnID *uuid.UUID, readTSCache bool, ) { // This gives us a memory-efficient end key if end is empty. if len(end) == 0 { end = start.Next() start = end[:len(start)] } tc.latest.Forward(timestamp) // Only add to the cache if the timestamp is more recent than the // low water mark. if tc.lowWater.Less(timestamp) { tcache := tc.wCache if readTSCache { tcache = tc.rCache } addRange := func(r interval.Range) { value := cacheValue{timestamp: timestamp, txnID: txnID} key := tcache.MakeKey(r.Start, r.End) entry := makeCacheEntry(key, value) tcache.AddEntry(entry) } r := interval.Range{ Start: interval.Comparable(start), End: interval.Comparable(end), } // Check existing, overlapping entries and truncate/split/remove if // superseded and in the past. If existing entries are in the future, // subtract from the range/ranges that need to be added to cache. for _, entry := range tcache.GetOverlaps(r.Start, r.End) { cv := entry.Value.(*cacheValue) key := entry.Key.(*cache.IntervalKey) sCmp := r.Start.Compare(key.Start) eCmp := r.End.Compare(key.End) if !timestamp.Less(cv.timestamp) { // The existing interval has a timestamp less than or equal to the new interval. // Compare interval ranges to determine how to modify existing interval. switch { case sCmp == 0 && eCmp == 0: // New and old are equal; replace old with new and avoid the need to insert new. // // New: ------------ // Old: ------------ // // New: ------------ *cv = cacheValue{timestamp: timestamp, txnID: txnID} tcache.MoveToEnd(entry) return case sCmp <= 0 && eCmp >= 0: // New contains or is equal to old; delete old. // // New: ------------ ------------ ------------ // Old: -------- or ---------- or ---------- // // Old: tcache.DelEntry(entry) case sCmp > 0 && eCmp < 0: // Old contains new; split up old into two. // // New: ---- // Old: ------------ // // Old: ---- ---- oldEnd := key.End key.End = r.Start key := tcache.MakeKey(r.End, oldEnd) newEntry := makeCacheEntry(key, *cv) tcache.AddEntryAfter(newEntry, entry) case eCmp >= 0: // Left partial overlap; truncate old end. // // New: -------- -------- // Old: -------- or ------------ // // Old: ---- ---- key.End = r.Start case sCmp <= 0: // Right partial overlap; truncate old start. // // New: -------- -------- // Old: -------- or ------------ // // Old: ---- ---- key.Start = r.End default: panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r)) } } else { // The existing interval has a timestamp greater than the new interval. // Compare interval ranges to determine how to modify new interval before // adding it to the timestamp cache. switch { case sCmp >= 0 && eCmp <= 0: // Old contains or is equal to new; no need to add. // // Old: ----------- ----------- ----------- ----------- // New: ----- or ----------- or -------- or -------- // // New: return case sCmp < 0 && eCmp > 0: // New contains old; split up old into two. We can add the left piece // immediately because it is guaranteed to be before the rest of the // overlaps. // // Old: ------ // New: ------------ // // New: --- --- lr := interval.Range{Start: r.Start, End: key.Start} addRange(lr) r.Start = key.End case eCmp > 0: // Left partial overlap; truncate new start. // // Old: -------- -------- // New: -------- or ------------ // // New: ---- ---- r.Start = key.End case sCmp < 0: // Right partial overlap; truncate new end. // // Old: -------- -------- // New: -------- or ------------ // // New: ---- ---- r.End = key.Start default: panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r)) } } } addRange(r) } }
// TestTxnCoordSenderHeartbeat verifies periodic heartbeat of the // transaction record. func TestTxnCoordSenderHeartbeat(t *testing.T) { defer leaktest.AfterTest(t)() s, sender := createTestDB(t) defer s.Stop() defer teardownHeartbeats(sender) // Set heartbeat interval to 1ms for testing. sender.heartbeatInterval = 1 * time.Millisecond initialTxn := client.NewTxn(context.Background(), *s.DB) if err := initialTxn.Put(roachpb.Key("a"), []byte("value")); err != nil { t.Fatal(err) } // Verify 3 heartbeats. var heartbeatTS hlc.Timestamp for i := 0; i < 3; i++ { util.SucceedsSoon(t, func() error { txn, pErr := getTxn(sender, &initialTxn.Proto) if pErr != nil { t.Fatal(pErr) } // Advance clock by 1ns. // Locking the TxnCoordSender to prevent a data race. sender.Lock() s.Manual.Increment(1) sender.Unlock() if txn.LastHeartbeat != nil && heartbeatTS.Less(*txn.LastHeartbeat) { heartbeatTS = *txn.LastHeartbeat return nil } return errors.Errorf("expected heartbeat") }) } // Sneakily send an ABORT right to DistSender (bypassing TxnCoordSender). { var ba roachpb.BatchRequest ba.Add(&roachpb.EndTransactionRequest{ Commit: false, Span: roachpb.Span{Key: initialTxn.Proto.Key}, }) ba.Txn = &initialTxn.Proto if _, pErr := sender.wrapped.Send(context.Background(), ba); pErr != nil { t.Fatal(pErr) } } util.SucceedsSoon(t, func() error { sender.Lock() defer sender.Unlock() if txnMeta, ok := sender.txns[*initialTxn.Proto.ID]; !ok { t.Fatal("transaction unregistered prematurely") } else if txnMeta.txn.Status != roachpb.ABORTED { return fmt.Errorf("transaction is not aborted") } return nil }) // Trying to do something else should give us a TransactionAbortedError. _, err := initialTxn.Get("a") assertTransactionAbortedError(t, err) }