// InternalPushTxn resolves conflicts between concurrent txns (or // between a non-transactional reader or writer and a txn) in several // ways depending on the statuses and priorities of the conflicting // transactions. The InternalPushTxn operation is invoked by a // "pusher" (the writer trying to abort a conflicting txn or the // reader trying to push a conflicting txn's commit timestamp // forward), who attempts to resolve a conflict with a "pushee" // (args.PushTxn -- the pushee txn whose intent(s) caused the // conflict). // // Txn already committed/aborted: If pushee txn is committed or // aborted return success. // // Txn Timeout: If pushee txn entry isn't present or its LastHeartbeat // timestamp isn't set, use PushTxn.Timestamp as LastHeartbeat. If // current time - LastHeartbeat > 2 * DefaultHeartbeatInterval, then // the pushee txn should be either pushed forward, aborted, or // confirmed not pending, depending on value of Request.PushType. // // Old Txn Epoch: If persisted pushee txn entry has a newer Epoch than // PushTxn.Epoch, return success, as older epoch may be removed. // // Lower Txn Priority: If pushee txn has a lower priority than pusher, // adjust pushee's persisted txn depending on value of // args.PushType. If args.PushType is ABORT_TXN, set txn.Status to // ABORTED, and priority to one less than the pusher's priority and // return success. If args.PushType is PUSH_TIMESTAMP, set // txn.Timestamp to pusher's Timestamp + 1 (note that we use the // pusher's Args.Timestamp, not Txn.Timestamp because the args // timestamp can advance during the txn). // // Higher Txn Priority: If pushee txn has a higher priority than // pusher, return TransactionPushError. Transaction will be retried // with priority one less than the pushee's higher priority. func (r *Range) InternalPushTxn(batch engine.Engine, ms *engine.MVCCStats, args *proto.InternalPushTxnRequest, reply *proto.InternalPushTxnResponse) { if !bytes.Equal(args.Key, args.PusheeTxn.Key) { reply.SetGoError(util.Errorf("request key %s should match pushee's txn key %s", args.Key, args.PusheeTxn.Key)) return } key := keys.TransactionKey(args.PusheeTxn.Key, args.PusheeTxn.ID) // Fetch existing transaction if possible. existTxn := &proto.Transaction{} ok, err := engine.MVCCGetProto(batch, key, proto.ZeroTimestamp, true /* consistent */, nil /* txn */, existTxn) if err != nil { reply.SetGoError(err) return } if ok { // Start with the persisted transaction record as final transaction. reply.PusheeTxn = gogoproto.Clone(existTxn).(*proto.Transaction) // Upgrade the epoch, timestamp and priority as necessary. if reply.PusheeTxn.Epoch < args.PusheeTxn.Epoch { reply.PusheeTxn.Epoch = args.PusheeTxn.Epoch } reply.PusheeTxn.Timestamp.Forward(args.PusheeTxn.Timestamp) if reply.PusheeTxn.Priority < args.PusheeTxn.Priority { reply.PusheeTxn.Priority = args.PusheeTxn.Priority } } else { // Some sanity checks for case where we don't find a transaction record. if args.PusheeTxn.LastHeartbeat != nil { reply.SetGoError(proto.NewTransactionStatusError(&args.PusheeTxn, "no txn persisted, yet intent has heartbeat")) return } else if args.PusheeTxn.Status != proto.PENDING { reply.SetGoError(proto.NewTransactionStatusError(&args.PusheeTxn, fmt.Sprintf("no txn persisted, yet intent has status %s", args.PusheeTxn.Status))) return } // The transaction doesn't exist yet on disk; use the supplied version. reply.PusheeTxn = gogoproto.Clone(&args.PusheeTxn).(*proto.Transaction) } // If already committed or aborted, return success. if reply.PusheeTxn.Status != proto.PENDING { // Trivial noop. return } // If we're trying to move the timestamp forward, and it's already // far enough forward, return success. if args.PushType == proto.PUSH_TIMESTAMP && args.Timestamp.Less(reply.PusheeTxn.Timestamp) { // Trivial noop. return } // pusherWins bool is true in the event the pusher prevails. var pusherWins bool // If there's no incoming transaction, the pusher is non-transactional. // We make a random priority, biased by specified // args.Header().UserPriority in this case. var priority int32 if args.Txn != nil { priority = args.Txn.Priority } else { // Make sure we have a deterministic random number when generating // a priority for this txn-less request, so all replicas see same priority. randGen := rand.New(rand.NewSource(int64(reply.PusheeTxn.Priority) ^ args.Timestamp.WallTime)) priority = proto.MakePriority(randGen, args.GetUserPriority()) } // Check for txn timeout. if reply.PusheeTxn.LastHeartbeat == nil { reply.PusheeTxn.LastHeartbeat = &reply.PusheeTxn.Timestamp } if args.Now.Equal(proto.ZeroTimestamp) { reply.SetGoError(util.Error("the field Now must be provided")) return } // Compute heartbeat expiration (all replicas must see the same result). expiry := args.Now expiry.Forward(args.Timestamp) // if Timestamp is ahead, use that expiry.WallTime -= 2 * DefaultHeartbeatInterval.Nanoseconds() if reply.PusheeTxn.LastHeartbeat.Less(expiry) { if log.V(1) { log.Infof("pushing expired txn %s", reply.PusheeTxn) } pusherWins = true } else if reply.PusheeTxn.Isolation == proto.SNAPSHOT && args.PushType == proto.PUSH_TIMESTAMP { if log.V(1) { log.Infof("pushing timestamp for snapshot isolation txn") } pusherWins = true } else if args.PushType == proto.CLEANUP_TXN { // If just attempting to cleanup old or already-committed txns, don't push. pusherWins = false } else if reply.PusheeTxn.Priority < priority || (reply.PusheeTxn.Priority == priority && args.Txn != nil && args.Txn.Timestamp.Less(reply.PusheeTxn.Timestamp)) { // Pusher wins based on priority; if priorities are equal, order // by lower txn timestamp. if log.V(1) { log.Infof("pushing intent from txn with lower priority %s vs %d", reply.PusheeTxn, priority) } pusherWins = true } if !pusherWins { err := proto.NewTransactionPushError(args.Txn, reply.PusheeTxn) if log.V(1) { log.Info(err) } reply.SetGoError(err) return } // Upgrade priority of pushed transaction to one less than pusher's. reply.PusheeTxn.UpgradePriority(priority - 1) // If aborting transaction, set new status and return success. if args.PushType == proto.ABORT_TXN { reply.PusheeTxn.Status = proto.ABORTED } else if args.PushType == proto.PUSH_TIMESTAMP { // Otherwise, update timestamp to be one greater than the request's timestamp. reply.PusheeTxn.Timestamp = args.Timestamp reply.PusheeTxn.Timestamp.Logical++ } // Persist the pushed transaction using zero timestamp for inline value. if err := engine.MVCCPutProto(batch, ms, key, proto.ZeroTimestamp, nil, reply.PusheeTxn); err != nil { reply.SetGoError(err) return } }
// EndTransaction either commits or aborts (rolls back) an extant // transaction according to the args.Commit parameter. func (r *Range) EndTransaction(args *proto.EndTransactionRequest, reply *proto.EndTransactionResponse) { // Create the actual key to the system-local transaction table. key := engine.MakeKey(engine.KeyLocalTransactionPrefix, args.Key) // Start with supplied transaction, then possibly load from txn record. reply.Txn = gogoproto.Clone(args.Txn).(*proto.Transaction) // Fetch existing transaction if possible. existTxn := &proto.Transaction{} ok, err := engine.GetProto(r.engine, key, existTxn) if err != nil { reply.SetGoError(err) return } // If the transaction record already exists, verify that we can either // commit it or abort it (according to args.Commit), and also that the // Timestamp and Epoch have not suffered regression. if ok { if existTxn.Status == proto.COMMITTED { reply.SetGoError(proto.NewTransactionStatusError(existTxn, "already committed")) return } else if existTxn.Status == proto.ABORTED { reply.SetGoError(proto.NewTransactionStatusError(existTxn, "already aborted")) return } else if args.Txn.Epoch < existTxn.Epoch { reply.SetGoError(proto.NewTransactionStatusError(existTxn, fmt.Sprintf("epoch regression: %d", args.Txn.Epoch))) return } else if existTxn.Timestamp.Less(args.Txn.Timestamp) { // The transaction record can only ever be pushed forward, so it's an // error if somehow the transaction record has an earlier timestamp // than the transaction timestamp. reply.SetGoError(proto.NewTransactionStatusError(existTxn, fmt.Sprintf("timestamp regression: %+v", args.Txn.Timestamp))) return } // Use the persisted transaction record as final transaction. gogoproto.Merge(reply.Txn, existTxn) } // Take max of requested timestamp and possibly "pushed" txn // record timestamp as the final commit timestamp. if reply.Txn.Timestamp.Less(args.Timestamp) { reply.Txn.Timestamp = args.Timestamp } // Set transaction status to COMMITTED or ABORTED as per the // args.Commit parameter. if args.Commit { // If the isolation level is SERIALIZABLE, return a transaction // retry error if the commit timestamp isn't equal to the txn // timestamp. if args.Txn.Isolation == proto.SERIALIZABLE && !reply.Txn.Timestamp.Equal(args.Txn.Timestamp) { reply.SetGoError(proto.NewTransactionRetryError(reply.Txn)) return } reply.Txn.Status = proto.COMMITTED } else { reply.Txn.Status = proto.ABORTED } // Persist the transaction record with updated status (& possibly timestmap). if err := engine.PutProto(r.engine, key, reply.Txn); err != nil { reply.SetGoError(err) return } }
// EndTransaction either commits or aborts (rolls back) an extant // transaction according to the args.Commit parameter. func (r *Range) EndTransaction(batch engine.Engine, ms *engine.MVCCStats, args *proto.EndTransactionRequest, reply *proto.EndTransactionResponse) { if args.Txn == nil { reply.SetGoError(util.Errorf("no transaction specified to EndTransaction")) return } key := keys.TransactionKey(args.Txn.Key, args.Txn.ID) // Fetch existing transaction if possible. existTxn := &proto.Transaction{} ok, err := engine.MVCCGetProto(batch, key, proto.ZeroTimestamp, true, nil, existTxn) if err != nil { reply.SetGoError(err) return } // If the transaction record already exists, verify that we can either // commit it or abort it (according to args.Commit), and also that the // Timestamp and Epoch have not suffered regression. if ok { // Use the persisted transaction record as final transaction. reply.Txn = gogoproto.Clone(existTxn).(*proto.Transaction) if existTxn.Status == proto.COMMITTED { reply.SetGoError(proto.NewTransactionStatusError(existTxn, "already committed")) return } else if existTxn.Status == proto.ABORTED { reply.SetGoError(proto.NewTransactionAbortedError(existTxn)) return } else if args.Txn.Epoch < existTxn.Epoch { reply.SetGoError(proto.NewTransactionStatusError(existTxn, fmt.Sprintf("epoch regression: %d", args.Txn.Epoch))) return } else if args.Txn.Epoch == existTxn.Epoch && existTxn.Timestamp.Less(args.Txn.OrigTimestamp) { // The transaction record can only ever be pushed forward, so it's an // error if somehow the transaction record has an earlier timestamp // than the original transaction timestamp. reply.SetGoError(proto.NewTransactionStatusError(existTxn, fmt.Sprintf("timestamp regression: %s", args.Txn.OrigTimestamp))) return } // Take max of requested epoch and existing epoch. The requester // may have incremented the epoch on retries. if reply.Txn.Epoch < args.Txn.Epoch { reply.Txn.Epoch = args.Txn.Epoch } // Take max of requested priority and existing priority. This isn't // terribly useful, but we do it for completeness. if reply.Txn.Priority < args.Txn.Priority { reply.Txn.Priority = args.Txn.Priority } } else { // The transaction doesn't exist yet on disk; use the supplied version. reply.Txn = gogoproto.Clone(args.Txn).(*proto.Transaction) } // Take max of requested timestamp and possibly "pushed" txn // record timestamp as the final commit timestamp. if reply.Txn.Timestamp.Less(args.Timestamp) { reply.Txn.Timestamp = args.Timestamp } // Set transaction status to COMMITTED or ABORTED as per the // args.Commit parameter. if args.Commit { // If the isolation level is SERIALIZABLE, return a transaction // retry error if the commit timestamp isn't equal to the txn // timestamp. if args.Txn.Isolation == proto.SERIALIZABLE && !reply.Txn.Timestamp.Equal(args.Txn.OrigTimestamp) { reply.SetGoError(proto.NewTransactionRetryError(reply.Txn)) return } reply.Txn.Status = proto.COMMITTED } else { reply.Txn.Status = proto.ABORTED } // Persist the transaction record with updated status (& possibly timestamp). if err := engine.MVCCPutProto(batch, ms, key, proto.ZeroTimestamp, nil, reply.Txn); err != nil { reply.SetGoError(err) return } // Run triggers if successfully committed. Any failures running // triggers will set an error and prevent the batch from committing. if ct := args.InternalCommitTrigger; ct != nil { // Resolve any explicit intents. for _, key := range ct.Intents { if log.V(1) { log.Infof("resolving intent at %s on end transaction [%s]", key, reply.Txn.Status) } if err := engine.MVCCResolveWriteIntent(batch, ms, key, reply.Txn.Timestamp, reply.Txn); err != nil { reply.SetGoError(err) return } reply.Resolved = append(reply.Resolved, key) } // Run appropriate trigger. if reply.Txn.Status == proto.COMMITTED { if ct.SplitTrigger != nil { *ms = engine.MVCCStats{} // clear stats, as split will recompute from scratch. reply.SetGoError(r.splitTrigger(batch, ct.SplitTrigger)) } else if ct.MergeTrigger != nil { *ms = engine.MVCCStats{} // clear stats, as merge will recompute from scratch. reply.SetGoError(r.mergeTrigger(batch, ct.MergeTrigger)) } else if ct.ChangeReplicasTrigger != nil { reply.SetGoError(r.changeReplicasTrigger(ct.ChangeReplicasTrigger)) } } } }
// InternalPushTxn resolves conflicts between concurrent txns (or // between a non-transactional reader or writer and a txn) in several // ways depending on the statuses and priorities of the conflicting // transactions. The InternalPushTxn operation is invoked by a // "pusher" (the writer trying to abort a conflicting txn or the // reader trying to push a conflicting txn's commit timestamp // forward), who attempts to resolve a conflict with a "pushee" // (args.PushTxn -- the pushee txn whose intent(s) caused the // conflict). // // Txn already committed/aborted: If pushee txn is committed or // aborted return success. // // Txn Timeout: If pushee txn entry isn't present or its LastHeartbeat // timestamp isn't set, use PushTxn.Timestamp as LastHeartbeat. If // current time - LastHeartbeat > 2 * DefaultHeartbeatInterval, then // the pushee txn should be either pushed forward or aborted, // depending on value of Request.Abort. // // Old Txn Epoch: If persisted pushee txn entry has a newer Epoch than // PushTxn.Epoch, return success, as older epoch may be removed. // // Lower Txn Priority: If pushee txn has a lower priority than pusher, // adjust pushee's persisted txn depending on value of args.Abort. If // args.Abort is true, set txn.Status to ABORTED, and priority to one // less than the pusher's priority and return success. If args.Abort // is false, set txn.Timestamp to pusher's txn.Timestamp + 1. // // Higher Txn Priority: If pushee txn has a higher priority than // pusher, return TransactionRetryError. Transaction will be retried // with priority one less than the pushee's higher priority. func (r *Range) InternalPushTxn(args *proto.InternalPushTxnRequest, reply *proto.InternalPushTxnResponse) { if !bytes.Equal(args.Key, args.PusheeTxn.ID) { reply.SetGoError(util.Errorf("request key %q should match pushee's txn ID %q", args.Key, args.PusheeTxn.ID)) return } // Create the actual key to the system-local transaction table. key := engine.MakeKey(engine.KeyLocalTransactionPrefix, args.Key) // Fetch existing transaction if possible. existTxn := &proto.Transaction{} ok, err := engine.GetProto(r.engine, key, existTxn) if err != nil { reply.SetGoError(err) return } if ok { // Start with the persisted transaction record as final transaction. reply.PusheeTxn = gogoproto.Clone(existTxn).(*proto.Transaction) // Upgrade the epoch and timestamp as necessary. if reply.PusheeTxn.Epoch < args.PusheeTxn.Epoch { reply.PusheeTxn.Epoch = args.PusheeTxn.Epoch } if reply.PusheeTxn.Timestamp.Less(args.PusheeTxn.Timestamp) { reply.PusheeTxn.Timestamp = args.PusheeTxn.Timestamp } } else { // Some sanity checks for case where we don't find a transaction record. if args.PusheeTxn.LastHeartbeat != nil { reply.SetGoError(proto.NewTransactionStatusError(&args.PusheeTxn, "no txn persisted, yet intent has heartbeat")) return } else if args.PusheeTxn.Status != proto.PENDING { reply.SetGoError(proto.NewTransactionStatusError(&args.PusheeTxn, fmt.Sprintf("no txn persisted, yet intent has status %s", args.PusheeTxn.Status))) return } // The transaction doesn't exist yet on disk; use the supplied version. reply.PusheeTxn = gogoproto.Clone(&args.PusheeTxn).(*proto.Transaction) } // If already committed or aborted, return success. if reply.PusheeTxn.Status != proto.PENDING { // Trivial noop. return } // If we're trying to move the timestamp forward, and it's already // far enough forward, return success. if !args.Abort && args.Timestamp.Less(reply.PusheeTxn.Timestamp) { // Trivial noop. return } // pusherWins bool is true in the event the pusher prevails. var pusherWins bool // Check for txn timeout. if reply.PusheeTxn.LastHeartbeat == nil { reply.PusheeTxn.LastHeartbeat = &reply.PusheeTxn.Timestamp } // Compute heartbeat expiration. expiry := r.clock.Now() expiry.WallTime -= 2 * DefaultHeartbeatInterval.Nanoseconds() if reply.PusheeTxn.LastHeartbeat.Less(expiry) { log.V(1).Infof("pushing expired txn %+v", reply.PusheeTxn) pusherWins = true } else if args.PusheeTxn.Epoch < reply.PusheeTxn.Epoch { // Check for an intent from a prior epoch. log.V(1).Infof("pushing intent from previous epoch for txn %+v", reply.PusheeTxn) pusherWins = true } else if reply.PusheeTxn.Priority < args.Txn.Priority || (reply.PusheeTxn.Priority == args.Txn.Priority && args.Txn.Timestamp.Less(reply.PusheeTxn.Timestamp)) { // Finally, choose based on priority; if priorities are equal, order by lower txn timestamp. log.V(1).Infof("pushing intent from txn with lower priority %+v vs %+v", reply.PusheeTxn, args.Txn) pusherWins = true } if !pusherWins { log.V(1).Infof("failed to push intent %+v vs %+v", reply.PusheeTxn, args.Txn) reply.SetGoError(proto.NewTransactionRetryError(reply.PusheeTxn)) return } // If aborting transaction, set new status and return success. if args.Abort { reply.PusheeTxn.Status = proto.ABORTED } else { // Otherwise, update timestamp to be one greater than the request's timestamp. reply.PusheeTxn.Timestamp = args.Timestamp reply.PusheeTxn.Timestamp.Logical++ } // Persist the pushed transaction. if err := engine.PutProto(r.engine, key, reply.PusheeTxn); err != nil { reply.SetGoError(err) return } }