// SendBatch implements the batch.Sender interface. It subdivides // the Batch into batches admissible for sending (preventing certain // illegal mixtures of requests), executes each individual part // (which may span multiple ranges), and recombines the response. func (ds *DistSender) SendBatch(ctx context.Context, ba proto.BatchRequest) (*proto.BatchResponse, error) { // In the event that timestamp isn't set and read consistency isn't // required, set the timestamp using the local clock. // TODO(tschottdorf): right place for this? if ba.ReadConsistency == proto.INCONSISTENT && ba.Timestamp.Equal(proto.ZeroTimestamp) { // Make sure that after the call, args hasn't changed. defer func(timestamp proto.Timestamp) { ba.Timestamp = timestamp }(ba.Timestamp) ba.Timestamp = ds.clock.Now() } // TODO(tschottdorf): provisional instantiation. return newChunkingSender(ds.sendChunk).SendBatch(ctx, ba) }
func testPut() proto.BatchRequest { var ba proto.BatchRequest ba.Timestamp = testTS put := &proto.PutRequest{} put.Key = testKey ba.Add(put) return ba }
func (tc *TxnCoordSender) heartbeat(id string, trace *tracer.Trace, ctx context.Context) bool { tc.Lock() proceed := true txnMeta := tc.txns[id] // Before we send a heartbeat, determine whether this transaction // should be considered abandoned. If so, exit heartbeat. if txnMeta.hasClientAbandonedCoord(tc.clock.PhysicalNow()) { // TODO(tschottdorf): should we be more proactive here? // The client might be continuing the transaction // through another coordinator, but in the most likely // case it's just gone and the open transaction record // could block concurrent operations. if log.V(1) { log.Infof("transaction %s abandoned; stopping heartbeat", txnMeta.txn) } proceed = false } // txnMeta.txn is possibly replaced concurrently, // so grab a copy before unlocking. txn := txnMeta.txn tc.Unlock() if !proceed { return false } hb := &proto.HeartbeatTxnRequest{} hb.Key = txn.Key ba := proto.BatchRequest{} ba.Timestamp = tc.clock.Now() ba.Key = txn.Key ba.Txn = &txn ba.Add(hb) epochEnds := trace.Epoch("heartbeat") _, err := tc.wrapped.Send(ctx, ba) epochEnds() // If the transaction is not in pending state, then we can stop // the heartbeat. It's either aborted or committed, and we resolve // write intents accordingly. if err != nil { log.Warningf("heartbeat to %s failed: %s", txn, err) } // TODO(bdarnell): once we have gotten a heartbeat response with // Status != PENDING, future heartbeats are useless. However, we // need to continue the heartbeatLoop until the client either // commits or abandons the transaction. We could save a little // pointless work by restructuring this loop to stop sending // heartbeats between the time that the transaction is aborted and // the client finds out. Furthermore, we could use this information // to send TransactionAbortedErrors to the client so it can restart // immediately instead of running until its EndTransaction. return true }
// Send implements the batch.Sender interface. If the request is part of a // transaction, the TxnCoordSender adds the transaction to a map of active // transactions and begins heartbeating it. Every subsequent request for the // same transaction updates the lastUpdate timestamp to prevent live // transactions from being considered abandoned and garbage collected. // Read/write mutating requests have their key or key range added to the // transaction's interval tree of key ranges for eventual cleanup via resolved // write intents; they're tagged to an outgoing EndTransaction request, with // the receiving replica in charge of resolving them. func (tc *TxnCoordSender) Send(ctx context.Context, ba proto.BatchRequest) (*proto.BatchResponse, *proto.Error) { tc.maybeBeginTxn(&ba) ba.CmdID = ba.GetOrCreateCmdID(tc.clock.PhysicalNow()) var startNS int64 // This is the earliest point at which the request has a ClientCmdID and/or // TxnID (if applicable). Begin a Trace which follows this request. trace := tc.tracer.NewTrace(&ba) defer trace.Finalize() // TODO(tschottdorf): always "Batch" defer trace.Epoch(fmt.Sprintf("sending %s", ba.Method()))() ctx = tracer.ToCtx(ctx, trace) // TODO(tschottdorf): No looping through the batch will be necessary once // we've eliminated all the redundancies. for _, arg := range ba.Requests { trace.Event(fmt.Sprintf("%T", arg.GetValue())) if err := updateForBatch(arg.GetInner(), ba.RequestHeader); err != nil { return nil, proto.NewError(err) } } var id string // optional transaction ID if ba.Txn != nil { // If this request is part of a transaction... id = string(ba.Txn.ID) // Verify that if this Transaction is not read-only, we have it on // file. If not, refuse writes - the client must have issued a write on // another coordinator previously. if ba.Txn.Writing && ba.IsTransactionWrite() { tc.Lock() _, ok := tc.txns[id] tc.Unlock() if !ok { return nil, proto.NewError(util.Errorf("transaction must not write on multiple coordinators")) } } // Set the timestamp to the original timestamp for read-only // commands and to the transaction timestamp for read/write // commands. if ba.IsReadOnly() { ba.Timestamp = ba.Txn.OrigTimestamp } else { ba.Timestamp = ba.Txn.Timestamp } if rArgs, ok := ba.GetArg(proto.EndTransaction); ok { et := rArgs.(*proto.EndTransactionRequest) // Remember when EndTransaction started in case we want to // be linearizable. startNS = tc.clock.PhysicalNow() if len(et.Intents) > 0 { // TODO(tschottdorf): it may be useful to allow this later. // That would be part of a possible plan to allow txns which // write on multiple coordinators. return nil, proto.NewError(util.Errorf("client must not pass intents to EndTransaction")) } if len(et.Key) != 0 { return nil, proto.NewError(util.Errorf("EndTransaction must not have a Key set")) } et.Key = ba.Txn.Key tc.Lock() txnMeta, metaOK := tc.txns[id] if id != "" && metaOK { et.Intents = txnMeta.intents() } tc.Unlock() if intents := ba.GetIntents(); len(intents) > 0 { // Writes in Batch, so EndTransaction is fine. Should add // outstanding intents to EndTransaction, though. // TODO(tschottdorf): possible issues when the batch fails, // but the intents have been added anyways. // TODO(tschottdorf): some of these intents may be covered // by others, for example {[a,b), a}). This can lead to // some extra requests when those are non-local to the txn // record. But it doesn't seem worth optimizing now. et.Intents = append(et.Intents, intents...) } else if !metaOK { // If we don't have the transaction, then this must be a retry // by the client. We can no longer reconstruct a correct // request so we must fail. // // TODO(bdarnell): if we had a GetTransactionStatus API then // we could lookup the transaction and return either nil or // TransactionAbortedError instead of this ambivalent error. return nil, proto.NewError(util.Errorf("transaction is already committed or aborted")) } if len(et.Intents) == 0 { // If there aren't any intents, then there's factually no // transaction to end. Read-only txns have all of their state in // the client. return nil, proto.NewError(util.Errorf("cannot commit a read-only transaction")) } // TODO(tschottdorf): V(1) for _, intent := range et.Intents { trace.Event(fmt.Sprintf("intent: [%s,%s)", intent.Key, intent.EndKey)) } } } // Send the command through wrapped sender, taking appropriate measures // on error. var br *proto.BatchResponse { var pErr *proto.Error br, pErr = tc.wrapped.Send(ctx, ba) if _, ok := pErr.GoError().(*proto.OpRequiresTxnError); ok { br, pErr = tc.resendWithTxn(ba) } if pErr := tc.updateState(ctx, ba, br, pErr); pErr != nil { return nil, pErr } } if br.Txn == nil { return br, nil } if _, ok := ba.GetArg(proto.EndTransaction); !ok { return br, nil } // If the --linearizable flag is set, we want to make sure that // all the clocks in the system are past the commit timestamp // of the transaction. This is guaranteed if either // - the commit timestamp is MaxOffset behind startNS // - MaxOffset ns were spent in this function // when returning to the client. Below we choose the option // that involves less waiting, which is likely the first one // unless a transaction commits with an odd timestamp. if tsNS := br.Txn.Timestamp.WallTime; startNS > tsNS { startNS = tsNS } sleepNS := tc.clock.MaxOffset() - time.Duration(tc.clock.PhysicalNow()-startNS) if tc.linearizable && sleepNS > 0 { defer func() { if log.V(1) { log.Infof("%v: waiting %s on EndTransaction for linearizability", br.Txn.Short(), util.TruncateDuration(sleepNS, time.Millisecond)) } time.Sleep(sleepNS) }() } if br.Txn.Status != proto.PENDING { tc.cleanupTxn(trace, *br.Txn) } return br, nil }