// InternalPushTxn resolves conflicts between concurrent txns (or // between a non-transactional reader or writer and a txn) in several // ways depending on the statuses and priorities of the conflicting // transactions. The InternalPushTxn operation is invoked by a // "pusher" (the writer trying to abort a conflicting txn or the // reader trying to push a conflicting txn's commit timestamp // forward), who attempts to resolve a conflict with a "pushee" // (args.PushTxn -- the pushee txn whose intent(s) caused the // conflict). // // Txn already committed/aborted: If pushee txn is committed or // aborted return success. // // Txn Timeout: If pushee txn entry isn't present or its LastHeartbeat // timestamp isn't set, use PushTxn.Timestamp as LastHeartbeat. If // current time - LastHeartbeat > 2 * DefaultHeartbeatInterval, then // the pushee txn should be either pushed forward, aborted, or // confirmed not pending, depending on value of Request.PushType. // // Old Txn Epoch: If persisted pushee txn entry has a newer Epoch than // PushTxn.Epoch, return success, as older epoch may be removed. // // Lower Txn Priority: If pushee txn has a lower priority than pusher, // adjust pushee's persisted txn depending on value of // args.PushType. If args.PushType is ABORT_TXN, set txn.Status to // ABORTED, and priority to one less than the pusher's priority and // return success. If args.PushType is PUSH_TIMESTAMP, set // txn.Timestamp to pusher's Timestamp + 1 (note that we use the // pusher's Args.Timestamp, not Txn.Timestamp because the args // timestamp can advance during the txn). // // Higher Txn Priority: If pushee txn has a higher priority than // pusher, return TransactionPushError. Transaction will be retried // with priority one less than the pushee's higher priority. func (r *Range) InternalPushTxn(batch engine.Engine, ms *engine.MVCCStats, args *proto.InternalPushTxnRequest, reply *proto.InternalPushTxnResponse) { if !bytes.Equal(args.Key, args.PusheeTxn.Key) { reply.SetGoError(util.Errorf("request key %s should match pushee's txn key %s", args.Key, args.PusheeTxn.Key)) return } key := keys.TransactionKey(args.PusheeTxn.Key, args.PusheeTxn.ID) // Fetch existing transaction if possible. existTxn := &proto.Transaction{} ok, err := engine.MVCCGetProto(batch, key, proto.ZeroTimestamp, true /* consistent */, nil /* txn */, existTxn) if err != nil { reply.SetGoError(err) return } if ok { // Start with the persisted transaction record as final transaction. reply.PusheeTxn = gogoproto.Clone(existTxn).(*proto.Transaction) // Upgrade the epoch, timestamp and priority as necessary. if reply.PusheeTxn.Epoch < args.PusheeTxn.Epoch { reply.PusheeTxn.Epoch = args.PusheeTxn.Epoch } reply.PusheeTxn.Timestamp.Forward(args.PusheeTxn.Timestamp) if reply.PusheeTxn.Priority < args.PusheeTxn.Priority { reply.PusheeTxn.Priority = args.PusheeTxn.Priority } } else { // Some sanity checks for case where we don't find a transaction record. if args.PusheeTxn.LastHeartbeat != nil { reply.SetGoError(proto.NewTransactionStatusError(&args.PusheeTxn, "no txn persisted, yet intent has heartbeat")) return } else if args.PusheeTxn.Status != proto.PENDING { reply.SetGoError(proto.NewTransactionStatusError(&args.PusheeTxn, fmt.Sprintf("no txn persisted, yet intent has status %s", args.PusheeTxn.Status))) return } // The transaction doesn't exist yet on disk; use the supplied version. reply.PusheeTxn = gogoproto.Clone(&args.PusheeTxn).(*proto.Transaction) } // If already committed or aborted, return success. if reply.PusheeTxn.Status != proto.PENDING { // Trivial noop. return } // If we're trying to move the timestamp forward, and it's already // far enough forward, return success. if args.PushType == proto.PUSH_TIMESTAMP && args.Timestamp.Less(reply.PusheeTxn.Timestamp) { // Trivial noop. return } // pusherWins bool is true in the event the pusher prevails. var pusherWins bool // If there's no incoming transaction, the pusher is non-transactional. // We make a random priority, biased by specified // args.Header().UserPriority in this case. var priority int32 if args.Txn != nil { priority = args.Txn.Priority } else { // Make sure we have a deterministic random number when generating // a priority for this txn-less request, so all replicas see same priority. randGen := rand.New(rand.NewSource(int64(reply.PusheeTxn.Priority) ^ args.Timestamp.WallTime)) priority = proto.MakePriority(randGen, args.GetUserPriority()) } // Check for txn timeout. if reply.PusheeTxn.LastHeartbeat == nil { reply.PusheeTxn.LastHeartbeat = &reply.PusheeTxn.Timestamp } if args.Now.Equal(proto.ZeroTimestamp) { reply.SetGoError(util.Error("the field Now must be provided")) return } // Compute heartbeat expiration (all replicas must see the same result). expiry := args.Now expiry.Forward(args.Timestamp) // if Timestamp is ahead, use that expiry.WallTime -= 2 * DefaultHeartbeatInterval.Nanoseconds() if reply.PusheeTxn.LastHeartbeat.Less(expiry) { if log.V(1) { log.Infof("pushing expired txn %s", reply.PusheeTxn) } pusherWins = true } else if reply.PusheeTxn.Isolation == proto.SNAPSHOT && args.PushType == proto.PUSH_TIMESTAMP { if log.V(1) { log.Infof("pushing timestamp for snapshot isolation txn") } pusherWins = true } else if args.PushType == proto.CLEANUP_TXN { // If just attempting to cleanup old or already-committed txns, don't push. pusherWins = false } else if reply.PusheeTxn.Priority < priority || (reply.PusheeTxn.Priority == priority && args.Txn != nil && args.Txn.Timestamp.Less(reply.PusheeTxn.Timestamp)) { // Pusher wins based on priority; if priorities are equal, order // by lower txn timestamp. if log.V(1) { log.Infof("pushing intent from txn with lower priority %s vs %d", reply.PusheeTxn, priority) } pusherWins = true } if !pusherWins { err := proto.NewTransactionPushError(args.Txn, reply.PusheeTxn) if log.V(1) { log.Info(err) } reply.SetGoError(err) return } // Upgrade priority of pushed transaction to one less than pusher's. reply.PusheeTxn.UpgradePriority(priority - 1) // If aborting transaction, set new status and return success. if args.PushType == proto.ABORT_TXN { reply.PusheeTxn.Status = proto.ABORTED } else if args.PushType == proto.PUSH_TIMESTAMP { // Otherwise, update timestamp to be one greater than the request's timestamp. reply.PusheeTxn.Timestamp = args.Timestamp reply.PusheeTxn.Timestamp.Logical++ } // Persist the pushed transaction using zero timestamp for inline value. if err := engine.MVCCPutProto(batch, ms, key, proto.ZeroTimestamp, nil, reply.PusheeTxn); err != nil { reply.SetGoError(err) return } }
// InternalPushTxn resolves conflicts between concurrent txns (or // between a non-transactional reader or writer and a txn) in several // ways depending on the statuses and priorities of the conflicting // transactions. The InternalPushTxn operation is invoked by a // "pusher" (the writer trying to abort a conflicting txn or the // reader trying to push a conflicting txn's commit timestamp // forward), who attempts to resolve a conflict with a "pushee" // (args.PushTxn -- the pushee txn whose intent(s) caused the // conflict). // // Txn already committed/aborted: If pushee txn is committed or // aborted return success. // // Txn Timeout: If pushee txn entry isn't present or its LastHeartbeat // timestamp isn't set, use PushTxn.Timestamp as LastHeartbeat. If // current time - LastHeartbeat > 2 * DefaultHeartbeatInterval, then // the pushee txn should be either pushed forward or aborted, // depending on value of Request.Abort. // // Old Txn Epoch: If persisted pushee txn entry has a newer Epoch than // PushTxn.Epoch, return success, as older epoch may be removed. // // Lower Txn Priority: If pushee txn has a lower priority than pusher, // adjust pushee's persisted txn depending on value of args.Abort. If // args.Abort is true, set txn.Status to ABORTED, and priority to one // less than the pusher's priority and return success. If args.Abort // is false, set txn.Timestamp to pusher's txn.Timestamp + 1. // // Higher Txn Priority: If pushee txn has a higher priority than // pusher, return TransactionRetryError. Transaction will be retried // with priority one less than the pushee's higher priority. func (r *Range) InternalPushTxn(args *proto.InternalPushTxnRequest, reply *proto.InternalPushTxnResponse) { if !bytes.Equal(args.Key, args.PusheeTxn.ID) { reply.SetGoError(util.Errorf("request key %q should match pushee's txn ID %q", args.Key, args.PusheeTxn.ID)) return } // Create the actual key to the system-local transaction table. key := engine.MakeKey(engine.KeyLocalTransactionPrefix, args.Key) // Fetch existing transaction if possible. existTxn := &proto.Transaction{} ok, err := engine.GetProto(r.engine, key, existTxn) if err != nil { reply.SetGoError(err) return } if ok { // Start with the persisted transaction record as final transaction. reply.PusheeTxn = gogoproto.Clone(existTxn).(*proto.Transaction) // Upgrade the epoch and timestamp as necessary. if reply.PusheeTxn.Epoch < args.PusheeTxn.Epoch { reply.PusheeTxn.Epoch = args.PusheeTxn.Epoch } if reply.PusheeTxn.Timestamp.Less(args.PusheeTxn.Timestamp) { reply.PusheeTxn.Timestamp = args.PusheeTxn.Timestamp } } else { // Some sanity checks for case where we don't find a transaction record. if args.PusheeTxn.LastHeartbeat != nil { reply.SetGoError(proto.NewTransactionStatusError(&args.PusheeTxn, "no txn persisted, yet intent has heartbeat")) return } else if args.PusheeTxn.Status != proto.PENDING { reply.SetGoError(proto.NewTransactionStatusError(&args.PusheeTxn, fmt.Sprintf("no txn persisted, yet intent has status %s", args.PusheeTxn.Status))) return } // The transaction doesn't exist yet on disk; use the supplied version. reply.PusheeTxn = gogoproto.Clone(&args.PusheeTxn).(*proto.Transaction) } // If already committed or aborted, return success. if reply.PusheeTxn.Status != proto.PENDING { // Trivial noop. return } // If we're trying to move the timestamp forward, and it's already // far enough forward, return success. if !args.Abort && args.Timestamp.Less(reply.PusheeTxn.Timestamp) { // Trivial noop. return } // pusherWins bool is true in the event the pusher prevails. var pusherWins bool // Check for txn timeout. if reply.PusheeTxn.LastHeartbeat == nil { reply.PusheeTxn.LastHeartbeat = &reply.PusheeTxn.Timestamp } // Compute heartbeat expiration. expiry := r.clock.Now() expiry.WallTime -= 2 * DefaultHeartbeatInterval.Nanoseconds() if reply.PusheeTxn.LastHeartbeat.Less(expiry) { log.V(1).Infof("pushing expired txn %+v", reply.PusheeTxn) pusherWins = true } else if args.PusheeTxn.Epoch < reply.PusheeTxn.Epoch { // Check for an intent from a prior epoch. log.V(1).Infof("pushing intent from previous epoch for txn %+v", reply.PusheeTxn) pusherWins = true } else if reply.PusheeTxn.Priority < args.Txn.Priority || (reply.PusheeTxn.Priority == args.Txn.Priority && args.Txn.Timestamp.Less(reply.PusheeTxn.Timestamp)) { // Finally, choose based on priority; if priorities are equal, order by lower txn timestamp. log.V(1).Infof("pushing intent from txn with lower priority %+v vs %+v", reply.PusheeTxn, args.Txn) pusherWins = true } if !pusherWins { log.V(1).Infof("failed to push intent %+v vs %+v", reply.PusheeTxn, args.Txn) reply.SetGoError(proto.NewTransactionRetryError(reply.PusheeTxn)) return } // If aborting transaction, set new status and return success. if args.Abort { reply.PusheeTxn.Status = proto.ABORTED } else { // Otherwise, update timestamp to be one greater than the request's timestamp. reply.PusheeTxn.Timestamp = args.Timestamp reply.PusheeTxn.Timestamp.Logical++ } // Persist the pushed transaction. if err := engine.PutProto(r.engine, key, reply.PusheeTxn); err != nil { reply.SetGoError(err) return } }