func replicaGCShouldQueueImpl( now, lastCheck, lastActivity hlc.Timestamp, isCandidate bool, ) (bool, float64) { timeout := ReplicaGCQueueInactivityThreshold priority := replicaGCPriorityDefault if isCandidate { // If the range is a candidate (which happens if its former replica set // ignores it), let it expire much earlier. timeout = ReplicaGCQueueCandidateTimeout priority = replicaGCPriorityCandidate } else if now.Less(lastCheck.Add(ReplicaGCQueueInactivityThreshold.Nanoseconds(), 0)) { // Return false immediately if the previous check was less than the // check interval in the past. Note that we don't do this if the // replica is in candidate state, in which case we want to be more // aggressive - a failed rebalance attempt could have checked this // range, and candidate state suggests that a retry succeeded. See // #7489. return false, 0 } shouldQ := lastActivity.Add(timeout.Nanoseconds(), 0).Less(now) if !shouldQ { return false, 0 } return shouldQ, priority }
// UpdateDeadlineMaybe sets the transactions deadline to the lower of the // current one (if any) and the passed value. func (txn *Txn) UpdateDeadlineMaybe(deadline hlc.Timestamp) bool { if txn.deadline == nil || deadline.Less(*txn.deadline) { txn.deadline = &deadline return true } return false }
// UpdateObservedTimestamp stores a timestamp off a node's clock for future // operations in the transaction. When multiple calls are made for a single // nodeID, the lowest timestamp prevails. func (t *Transaction) UpdateObservedTimestamp(nodeID NodeID, maxTS hlc.Timestamp) { if t.ObservedTimestamps == nil { t.ObservedTimestamps = make(map[NodeID]hlc.Timestamp) } if ts, ok := t.ObservedTimestamps[nodeID]; !ok || maxTS.Less(ts) { t.ObservedTimestamps[nodeID] = maxTS } }
// leaseStatus returns lease status. If the lease is epoch-based, // the liveness field will be set to the liveness used to compute // its state, unless state == leaseError. // // - The lease is considered valid if the timestamp is covered by the // supplied lease. This is determined differently depending on the // lease properties. For expiration-based leases, the timestamp is // covered if it's less than the expiration (minus the maximum // clock offset). For epoch-based "node liveness" leases, the lease // epoch must match the owner node's liveness epoch -AND- the // timestamp must be within the node's liveness expiration (also // minus the maximum clock offset). // // To be valid, a lease which contains a valid ProposedTS must have // a proposed timestamp greater than the minimum proposed timestamp, // which prevents a restarted process from serving commands, since // the command queue has been wiped through the restart. // // - The lease is considered in stasis if the timestamp is within the // maximum clock offset window of the lease expiration. // // - The lease is considered expired in all other cases. // // The maximum clock offset must always be taken into consideration to // avoid a failure of linearizability on a single register during // lease changes. Without that stasis period, the following could // occur: // // * a range lease gets committed on the new lease holder (but not the old). // * client proposes and commits a write on new lease holder (with a // timestamp just greater than the expiration of the old lease). // * client tries to read what it wrote, but hits a slow coordinator // (which assigns a timestamp covered by the old lease). // * the read is served by the old lease holder (which has not // processed the change in lease holdership). // * the client fails to read their own write. func (r *Replica) leaseStatus( lease *roachpb.Lease, timestamp, minProposedTS hlc.Timestamp, ) LeaseStatus { status := LeaseStatus{timestamp: timestamp, lease: lease} if lease == nil { status.state = leaseExpired return status } var expiration hlc.Timestamp if lease.Type() == roachpb.LeaseExpiration { expiration = lease.Expiration } else { var err error status.liveness, err = r.store.cfg.NodeLiveness.GetLiveness(lease.Replica.NodeID) if err != nil || status.liveness.Epoch < *lease.Epoch { // If lease validity can't be determined (e.g. gossip is down // and liveness info isn't available for owner), we can neither // use the lease nor do we want to attempt to acquire it. status.state = leaseError return status } if status.liveness.Epoch > *lease.Epoch { status.state = leaseExpired return status } expiration = status.liveness.Expiration } stasis := expiration.Add(-int64(r.store.Clock().MaxOffset()), 0) if timestamp.Less(stasis) { status.state = leaseValid // If the replica owns the lease, additional verify that the lease's // proposed timestamp is not earlier than the min proposed timestamp. if lease.Replica.StoreID == r.store.StoreID() && lease.ProposedTS != nil && lease.ProposedTS.Less(minProposedTS) { status.state = leaseProscribed } } else if timestamp.Less(expiration) { status.state = leaseStasis } else { status.state = leaseExpired } return status }
// add the specified timestamp to the cache as covering the range of // keys from start to end. If end is nil, the range covers the start // key only. txnID is nil for no transaction. readTSCache specifies // whether the command adding this timestamp should update the read // timestamp; false to update the write timestamp cache. func (tc *timestampCache) add( start, end roachpb.Key, timestamp hlc.Timestamp, txnID *uuid.UUID, readTSCache bool, ) { // This gives us a memory-efficient end key if end is empty. if len(end) == 0 { end = start.Next() start = end[:len(start)] } tc.latest.Forward(timestamp) // Only add to the cache if the timestamp is more recent than the // low water mark. if tc.lowWater.Less(timestamp) { tcache := tc.wCache if readTSCache { tcache = tc.rCache } addRange := func(r interval.Range) { value := cacheValue{timestamp: timestamp, txnID: txnID} key := tcache.MakeKey(r.Start, r.End) entry := makeCacheEntry(key, value) tcache.AddEntry(entry) } r := interval.Range{ Start: interval.Comparable(start), End: interval.Comparable(end), } // Check existing, overlapping entries and truncate/split/remove if // superseded and in the past. If existing entries are in the future, // subtract from the range/ranges that need to be added to cache. for _, entry := range tcache.GetOverlaps(r.Start, r.End) { cv := entry.Value.(*cacheValue) key := entry.Key.(*cache.IntervalKey) sCmp := r.Start.Compare(key.Start) eCmp := r.End.Compare(key.End) if cv.timestamp.Less(timestamp) { // The existing interval has a timestamp less than the new // interval. Compare interval ranges to determine how to // modify existing interval. switch { case sCmp == 0 && eCmp == 0: // New and old are equal; replace old with new and avoid the need to insert new. // // New: ------------ // Old: ------------ // // New: ------------ // Old: *cv = cacheValue{timestamp: timestamp, txnID: txnID} tcache.MoveToEnd(entry) return case sCmp <= 0 && eCmp >= 0: // New contains or is equal to old; delete old. // // New: ------------ ------------ ------------ // Old: -------- or ---------- or ---------- // // New: ------------ ------------ ------------ // Old: tcache.DelEntry(entry) case sCmp > 0 && eCmp < 0: // Old contains new; split up old into two. // // New: ---- // Old: ------------ // // New: ---- // Old: ---- ---- oldEnd := key.End key.End = r.Start newKey := tcache.MakeKey(r.End, oldEnd) newEntry := makeCacheEntry(newKey, *cv) tcache.AddEntryAfter(newEntry, entry) case eCmp >= 0: // Left partial overlap; truncate old end. // // New: -------- -------- // Old: -------- or ------------ // // New: -------- -------- // Old: ---- ---- key.End = r.Start case sCmp <= 0: // Right partial overlap; truncate old start. // // New: -------- -------- // Old: -------- or ------------ // // New: -------- -------- // Old: ---- ---- key.Start = r.End default: panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r)) } } else if timestamp.Less(cv.timestamp) { // The existing interval has a timestamp greater than the new interval. // Compare interval ranges to determine how to modify new interval before // adding it to the timestamp cache. switch { case sCmp >= 0 && eCmp <= 0: // Old contains or is equal to new; no need to add. // // Old: ----------- ----------- ----------- ----------- // New: ----- or ----------- or -------- or -------- // // Old: ----------- ----------- ----------- ----------- // New: return case sCmp < 0 && eCmp > 0: // New contains old; split up old into two. We can add the left piece // immediately because it is guaranteed to be before the rest of the // overlaps. // // Old: ------ // New: ------------ // // Old: ------ // New: --- --- lr := interval.Range{Start: r.Start, End: key.Start} addRange(lr) r.Start = key.End case eCmp > 0: // Left partial overlap; truncate new start. // // Old: -------- -------- // New: -------- or ------------ // // Old: -------- -------- // New: ---- ---- r.Start = key.End case sCmp < 0: // Right partial overlap; truncate new end. // // Old: -------- -------- // New: -------- or ------------ // // Old: -------- -------- // New: ---- ---- r.End = key.Start default: panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r)) } } else if (cv.txnID == nil && txnID == nil) || (cv.txnID != nil && txnID != nil && *cv.txnID == *txnID) { // The existing interval has a timestamp equal to the new // interval, and the same transaction ID. switch { case sCmp >= 0 && eCmp <= 0: // Old contains or is equal to new; no need to add. // // New: ----- or ----------- or -------- or -------- // Old: ----------- ----------- ----------- ----------- // // New: // Old: ----------- ----------- ----------- ----------- return case sCmp <= 0 && eCmp >= 0: // New contains old; delete old. // // New: ------------ ------------ ------------ // Old: -------- or ---------- or ---------- // // New: ------------ ------------ ------------ // Old: tcache.DelEntry(entry) case eCmp >= 0: // Left partial overlap; truncate old end. // // New: -------- -------- // Old: -------- or ------------ // // New: -------- -------- // Old: ---- ---- key.End = r.Start case sCmp <= 0: // Right partial overlap; truncate old start. // // New: -------- -------- // Old: -------- or ------------ // // New: -------- -------- // Old: ---- ---- key.Start = r.End default: panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r)) } } else { // The existing interval has a timestamp equal to the new // interval and a different transaction ID. switch { case sCmp == 0 && eCmp == 0: // New and old are equal. Segment is no longer owned by any // transaction. // // New: ------------ // Old: ------------ // // New: // Nil: ============ // Old: cv.txnID = nil return case sCmp == 0 && eCmp > 0: // New contains old, left-aligned. Clear ownership of the // existing segment and truncate new. // // New: ------------ // Old: ---------- // // New: -- // Nil: ========== // Old: cv.txnID = nil r.Start = key.End case sCmp < 0 && eCmp == 0: // New contains old, right-aligned. Clear ownership of the // existing segment and truncate new. // // New: ------------ // Old: ---------- // // New: -- // Nil: ========== // Old: cv.txnID = nil r.End = key.Start case sCmp < 0 && eCmp > 0: // New contains old; split into three segments with the // overlap owned by no txn. // // New: ------------ // Old: -------- // // New: -- -- // Nil: ======== // Old: cv.txnID = nil newKey := tcache.MakeKey(r.Start, key.Start) newEntry := makeCacheEntry(newKey, cacheValue{timestamp: timestamp, txnID: txnID}) tcache.AddEntryAfter(newEntry, entry) r.Start = key.End case sCmp > 0 && eCmp < 0: // Old contains new; split up old into two. New segment is // owned by no txn. // // New: ---- // Old: ------------ // // New: // Nil: ==== // Old: ---- ---- txnID = nil oldEnd := key.End key.End = r.Start newKey := tcache.MakeKey(r.End, oldEnd) newEntry := makeCacheEntry(newKey, *cv) tcache.AddEntryAfter(newEntry, entry) case eCmp == 0: // Old contains new, right-aligned; truncate old end and clear // ownership of new segment. // // New: -------- // Old: ------------ // // New: // Nil: ======== // Old: ---- txnID = nil key.End = r.Start case sCmp == 0: // Old contains new, left-aligned; truncate old start and // clear ownership of new segment. // New: -------- // Old: ------------ // // New: // Nil: ======== // Old: ---- txnID = nil key.Start = r.End case eCmp > 0: // Left partial overlap; truncate old end and split new into // segments owned by no txn (the overlap) and the new txn. // // New: -------- // Old: -------- // // New: ---- // Nil: ==== // Old: ---- key.End, r.Start = r.Start, key.End newKey := tcache.MakeKey(key.End, r.Start) newCV := cacheValue{timestamp: cv.timestamp, txnID: nil} newEntry := makeCacheEntry(newKey, newCV) tcache.AddEntryAfter(newEntry, entry) case sCmp < 0: // Right partial overlap; truncate old start and split new into // segments owned by no txn (the overlap) and the new txn. // // New: -------- // Old: -------- // // New: ---- // Nil: ==== // Old: ---- key.Start, r.End = r.End, key.Start newKey := tcache.MakeKey(r.End, key.Start) newCV := cacheValue{timestamp: cv.timestamp, txnID: nil} newEntry := makeCacheEntry(newKey, newCV) tcache.AddEntryAfter(newEntry, entry) default: panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r)) } } } addRange(r) } }
// Covers returns true if the given timestamp can be served by the Lease. // This is the case if the timestamp precedes the Lease's stasis period. // Note that the fact that a lease covers a timestamp is not enough for the // holder of the lease to be able to serve a read with that timestamp; // pendingLeaderLeaseRequest.TransferInProgress() should also be consulted to // account for possible lease transfers. func (l Lease) Covers(timestamp hlc.Timestamp) bool { return timestamp.Less(l.StartStasis) }
// TestTxnCoordSenderHeartbeat verifies periodic heartbeat of the // transaction record. func TestTxnCoordSenderHeartbeat(t *testing.T) { defer leaktest.AfterTest(t)() s, sender := createTestDB(t) defer s.Stop() defer teardownHeartbeats(sender) // Set heartbeat interval to 1ms for testing. sender.heartbeatInterval = 1 * time.Millisecond initialTxn := client.NewTxn(context.Background(), *s.DB) if err := initialTxn.Put(roachpb.Key("a"), []byte("value")); err != nil { t.Fatal(err) } // Verify 3 heartbeats. var heartbeatTS hlc.Timestamp for i := 0; i < 3; i++ { util.SucceedsSoon(t, func() error { txn, pErr := getTxn(sender, &initialTxn.Proto) if pErr != nil { t.Fatal(pErr) } // Advance clock by 1ns. // Locking the TxnCoordSender to prevent a data race. sender.Lock() s.Manual.Increment(1) sender.Unlock() if txn.LastHeartbeat != nil && heartbeatTS.Less(*txn.LastHeartbeat) { heartbeatTS = *txn.LastHeartbeat return nil } return errors.Errorf("expected heartbeat") }) } // Sneakily send an ABORT right to DistSender (bypassing TxnCoordSender). { var ba roachpb.BatchRequest ba.Add(&roachpb.EndTransactionRequest{ Commit: false, Span: roachpb.Span{Key: initialTxn.Proto.Key}, }) ba.Txn = &initialTxn.Proto if _, pErr := sender.wrapped.Send(context.Background(), ba); pErr != nil { t.Fatal(pErr) } } util.SucceedsSoon(t, func() error { sender.Lock() defer sender.Unlock() if txnMeta, ok := sender.txns[*initialTxn.Proto.ID]; !ok { t.Fatal("transaction unregistered prematurely") } else if txnMeta.txn.Status != roachpb.ABORTED { return fmt.Errorf("transaction is not aborted") } return nil }) // Trying to do something else should give us a TransactionAbortedError. _, err := initialTxn.Get("a") assertTransactionAbortedError(t, err) }
// isAsOf analyzes a select statement to bypass the logic in newPlan(), // since that requires the transaction to be started already. If the returned // timestamp is not nil, it is the timestamp to which a transaction should // be set. // // max is a lower bound on what the transaction's timestamp will be. Used to // check that the user didn't specify a timestamp in the future. func isAsOf(planMaker *planner, stmt parser.Statement, max hlc.Timestamp) (*hlc.Timestamp, error) { s, ok := stmt.(*parser.Select) if !ok { return nil, nil } sc, ok := s.Select.(*parser.SelectClause) if !ok { return nil, nil } if sc.From == nil || sc.From.AsOf.Expr == nil { return nil, nil } te, err := sc.From.AsOf.Expr.TypeCheck(nil, parser.TypeString) if err != nil { return nil, err } d, err := te.Eval(&planMaker.evalCtx) if err != nil { return nil, err } var ts hlc.Timestamp switch d := d.(type) { case *parser.DString: // Allow nanosecond precision because the timestamp is only used by the // system and won't be returned to the user over pgwire. dt, err := parser.ParseDTimestamp(string(*d), time.Nanosecond) if err != nil { return nil, err } ts.WallTime = dt.Time.UnixNano() case *parser.DInt: ts.WallTime = int64(*d) case *parser.DDecimal: // Format the decimal into a string and split on `.` to extract the nanosecond // walltime and logical tick parts. s := d.String() parts := strings.SplitN(s, ".", 2) nanos, err := strconv.ParseInt(parts[0], 10, 64) if err != nil { return nil, errors.Wrap(err, "parse AS OF SYSTEM TIME argument") } var logical int64 if len(parts) > 1 { // logicalLength is the number of decimal digits expected in the // logical part to the right of the decimal. See the implementation of // cluster_logical_timestamp(). const logicalLength = 10 p := parts[1] if lp := len(p); lp > logicalLength { return nil, errors.Errorf("bad AS OF SYSTEM TIME argument: logical part has too many digits") } else if lp < logicalLength { p += strings.Repeat("0", logicalLength-lp) } logical, err = strconv.ParseInt(p, 10, 32) if err != nil { return nil, errors.Wrap(err, "parse AS OF SYSTEM TIME argument") } } ts.WallTime = nanos ts.Logical = int32(logical) default: return nil, fmt.Errorf("unexpected AS OF SYSTEM TIME argument: %s (%T)", d.ResolvedType(), d) } if max.Less(ts) { return nil, fmt.Errorf("cannot specify timestamp in the future") } return &ts, nil }
func (l *Liveness) isLive(now hlc.Timestamp, maxOffset time.Duration) bool { expiration := l.Expiration.Add(-maxOffset.Nanoseconds(), 0) return now.Less(expiration) }