// send runs the specified calls synchronously in a single batch and returns // any errors. Returns a nil response for empty input (no requests). func (db *DB) send(maxScanResults int64, reqs ...roachpb.Request) ( *roachpb.BatchResponse, *roachpb.Error) { if len(reqs) == 0 { return nil, nil } ba := roachpb.BatchRequest{} ba.Add(reqs...) ba.MaxScanResults = maxScanResults if db.userPriority != 1 { ba.UserPriority = db.userPriority } tracing.AnnotateTrace() br, pErr := db.sender.Send(context.TODO(), ba) if pErr != nil { if log.V(1) { log.Infof("failed batch: %s", pErr) } return nil, pErr } return br, nil }
func (ts *txnSender) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { // Send call through wrapped sender. ba.Txn = &ts.Proto ba.SetNewRequest() br, pErr := ts.wrapped.Send(ctx, ba) if br != nil && br.Error != nil { panic(roachpb.ErrorUnexpectedlySet(ts.wrapped, br)) } // TODO(tschottdorf): see about using only the top-level *roachpb.Error // information for this restart logic (includes adding the Txn). err := pErr.GoError() // Only successful requests can carry an updated Txn in their response // header. Any error (e.g. a restart) can have a Txn attached to them as // well; those update our local state in the same way for the next attempt. // The exception is if our transaction was aborted and needs to restart // from scratch, in which case we do just that. if err == nil { ts.Proto.Update(br.Txn) return br, nil } else if abrtErr, ok := err.(*roachpb.TransactionAbortedError); ok { // On Abort, reset the transaction so we start anew on restart. ts.Proto = roachpb.Transaction{ Name: ts.Proto.Name, Isolation: ts.Proto.Isolation, } if abrtTxn := abrtErr.Transaction(); abrtTxn != nil { // Acts as a minimum priority on restart. ts.Proto.Priority = abrtTxn.Priority } } else if txnErr, ok := err.(roachpb.TransactionRestartError); ok { ts.Proto.Update(txnErr.Transaction()) } return nil, pErr }
func (ts *txnSender) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { // Send call through wrapped sender. ba.Txn = &ts.Proto ba.SetNewRequest() br, pErr := ts.wrapped.Send(ctx, ba) if br != nil && br.Error != nil { panic(roachpb.ErrorUnexpectedlySet(ts.wrapped, br)) } // Only successful requests can carry an updated Txn in their response // header. Any error (e.g. a restart) can have a Txn attached to them as // well; those update our local state in the same way for the next attempt. // The exception is if our transaction was aborted and needs to restart // from scratch, in which case we do just that. if pErr == nil { ts.Proto.Update(br.Txn) return br, nil } else if _, ok := pErr.GoError().(*roachpb.TransactionAbortedError); ok { // On Abort, reset the transaction so we start anew on restart. ts.Proto = roachpb.Transaction{ Name: ts.Proto.Name, Isolation: ts.Proto.Isolation, } // Acts as a minimum priority on restart. if pErr.GetTxn() != nil { ts.Proto.Priority = pErr.GetTxn().Priority } } else if pErr.TransactionRestart != roachpb.TransactionRestart_ABORT { ts.Proto.Update(pErr.GetTxn()) } return nil, pErr }
// sendAndFill is a helper which sends the given batch and fills its results, // returning the appropriate error which is either from the first failing call, // or an "internal" error. func sendAndFill( send func(roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error), b *Batch, ) error { // Errors here will be attached to the results, so we will get them from // the call to fillResults in the regular case in which an individual call // fails. But send() also returns its own errors, so there's some dancing // here to do because we want to run fillResults() so that the individual // result gets initialized with an error from the corresponding call. var ba roachpb.BatchRequest // TODO(tschottdorf): this nonsensical copy is required since (at least at // the time of writing, the chunking and masking in DistSender operates on // the original data (as attested to by a whole bunch of test failures). ba.Requests = append([]roachpb.RequestUnion(nil), b.reqs...) ba.Header = b.Header b.response, b.pErr = send(ba) if b.pErr != nil { // Discard errors from fillResults. _ = b.fillResults() return b.pErr.GoError() } if err := b.fillResults(); err != nil { b.pErr = roachpb.NewError(err) return err } return nil }
// RangeLookup dispatches an RangeLookup request for the given // metadata key to the replicas of the given range. Note that we allow // inconsistent reads when doing range lookups for efficiency. Getting // stale data is not a correctness problem but instead may // infrequently result in additional latency as additional range // lookups may be required. Note also that rangeLookup bypasses the // DistSender's Send() method, so there is no error inspection and // retry logic here; this is not an issue since the lookup performs a // single inconsistent read only. func (ds *DistSender) RangeLookup(key roachpb.RKey, desc *roachpb.RangeDescriptor, considerIntents, useReverseScan bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { ba := roachpb.BatchRequest{} ba.ReadConsistency = roachpb.INCONSISTENT ba.Add(&roachpb.RangeLookupRequest{ Span: roachpb.Span{ // We can interpret the RKey as a Key here since it's a metadata // lookup; those are never local. Key: key.AsRawKey(), }, MaxRanges: ds.rangeLookupMaxRanges, ConsiderIntents: considerIntents, Reverse: useReverseScan, }) replicas := newReplicaSlice(ds.gossip, desc) trace := ds.Tracer.StartSpan("range lookup") defer trace.Finish() // TODO(tschottdorf): Ideally we would use the trace of the request which // caused this lookup instead of a new one. br, err := ds.sendRPC(trace, desc.RangeID, replicas, orderRandom, ba) if err != nil { return nil, err } if br.Error != nil { return nil, br.Error } return br.Responses[0].GetInner().(*roachpb.RangeLookupResponse).Ranges, nil }
// Send implements the batch.Sender interface. It subdivides // the Batch into batches admissible for sending (preventing certain // illegal mixtures of requests), executes each individual part // (which may span multiple ranges), and recombines the response. func (ds *DistSender) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { // In the event that timestamp isn't set and read consistency isn't // required, set the timestamp using the local clock. // TODO(tschottdorf): right place for this? if ba.ReadConsistency == roachpb.INCONSISTENT && ba.Timestamp.Equal(roachpb.ZeroTimestamp) { // Make sure that after the call, args hasn't changed. defer func(timestamp roachpb.Timestamp) { ba.Timestamp = timestamp }(ba.Timestamp) ba.Timestamp = ds.clock.Now() } if ba.Txn != nil && len(ba.Txn.CertainNodes.Nodes) == 0 { // Ensure the local NodeID is marked as free from clock offset; // the transaction's timestamp was taken off the local clock. if nDesc := ds.getNodeDescriptor(); nDesc != nil { // TODO(tschottdorf): bad style to assume that ba.Txn is ours. // No race here, but should have a better way of doing this. // TODO(tschottdorf): future refactoring should move this to txn // creation in TxnCoordSender, which is currently unaware of the // NodeID (and wraps *DistSender through client.Sender since it // also needs test compatibility with *LocalSender). ba.Txn.CertainNodes.Add(nDesc.NodeID) } } // TODO(tschottdorf): provisional instantiation. return newChunkingSender(ds.sendChunk).Send(ctx, ba) }
// RangeLookup implements the RangeDescriptorDB interface. // RangeLookup dispatches a RangeLookup request for the given metadata // key to the replicas of the given range. Note that we allow // inconsistent reads when doing range lookups for efficiency. Getting // stale data is not a correctness problem but instead may // infrequently result in additional latency as additional range // lookups may be required. Note also that rangeLookup bypasses the // DistSender's Send() method, so there is no error inspection and // retry logic here; this is not an issue since the lookup performs a // single inconsistent read only. func (ds *DistSender) RangeLookup( key roachpb.RKey, desc *roachpb.RangeDescriptor, considerIntents, useReverseScan bool, ) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, *roachpb.Error) { ba := roachpb.BatchRequest{} ba.ReadConsistency = roachpb.INCONSISTENT ba.Add(&roachpb.RangeLookupRequest{ Span: roachpb.Span{ // We can interpret the RKey as a Key here since it's a metadata // lookup; those are never local. Key: key.AsRawKey(), }, MaxRanges: ds.rangeLookupMaxRanges, ConsiderIntents: considerIntents, Reverse: useReverseScan, }) replicas := newReplicaSlice(ds.gossip, desc) replicas.Shuffle() // TODO(tschottdorf): Ideally we would use the trace of the request which // caused this lookup. _ = context.TODO() br, err := ds.sendRPC(ds.Ctx, desc.RangeID, replicas, ba) if err != nil { return nil, nil, roachpb.NewError(err) } if br.Error != nil { return nil, nil, br.Error } resp := br.Responses[0].GetInner().(*roachpb.RangeLookupResponse) return resp.Ranges, resp.PrefetchedRanges, nil }
// RangeLookup dispatches an RangeLookup request for the given // metadata key to the replicas of the given range. Note that we allow // inconsistent reads when doing range lookups for efficiency. Getting // stale data is not a correctness problem but instead may // infrequently result in additional latency as additional range // lookups may be required. Note also that rangeLookup bypasses the // DistSender's Send() method, so there is no error inspection and // retry logic here; this is not an issue since the lookup performs a // single inconsistent read only. func (ds *DistSender) RangeLookup(key roachpb.RKey, desc *roachpb.RangeDescriptor, considerIntents, useReverseScan bool) ([]roachpb.RangeDescriptor, *roachpb.Error) { ba := roachpb.BatchRequest{} ba.ReadConsistency = roachpb.INCONSISTENT ba.Add(&roachpb.RangeLookupRequest{ Span: roachpb.Span{ // We can interpret the RKey as a Key here since it's a metadata // lookup; those are never local. Key: key.AsRawKey(), }, MaxRanges: ds.rangeLookupMaxRanges, ConsiderIntents: considerIntents, Reverse: useReverseScan, }) replicas := newReplicaSlice(ds.gossip, desc) // TODO(tschottdorf) consider a Trace here, potentially that of the request // that had the cache miss and waits for the result. br, err := ds.sendRPC(nil /* Trace */, desc.RangeID, replicas, rpc.OrderRandom, ba) if err != nil { return nil, err } if br.Error != nil { return nil, br.Error } return br.Responses[0].GetInner().(*roachpb.RangeLookupResponse).Ranges, nil }
// rangeLookup dispatches an RangeLookup request for the given // metadata key to the replicas of the given range. Note that we allow // inconsistent reads when doing range lookups for efficiency. Getting // stale data is not a correctness problem but instead may // infrequently result in additional latency as additional range // lookups may be required. Note also that rangeLookup bypasses the // DistSender's Send() method, so there is no error inspection and // retry logic here; this is not an issue since the lookup performs a // single inconsistent read only. func (ds *DistSender) rangeLookup(key roachpb.Key, options lookupOptions, desc *roachpb.RangeDescriptor) ([]roachpb.RangeDescriptor, error) { ba := roachpb.BatchRequest{} ba.ReadConsistency = roachpb.INCONSISTENT ba.Add(&roachpb.RangeLookupRequest{ RequestHeader: roachpb.RequestHeader{ Key: key, ReadConsistency: roachpb.INCONSISTENT, }, MaxRanges: ds.rangeLookupMaxRanges, ConsiderIntents: options.considerIntents, Reverse: options.useReverseScan, }) replicas := newReplicaSlice(ds.gossip, desc) // TODO(tschottdorf) consider a Trace here, potentially that of the request // that had the cache miss and waits for the result. br, err := ds.sendRPC(nil /* Trace */, desc.RangeID, replicas, rpc.OrderRandom, ba) if err != nil { return nil, err } if err := br.GoError(); err != nil { return nil, err } return br.Responses[0].GetInner().(*roachpb.RangeLookupResponse).Ranges, nil }
// Send forwards the call to the single store. This is a poor man's // version of kv.TxnCoordSender, but it serves the purposes of // supporting tests in this package. Transactions are not supported. // Since kv/ depends on storage/, we can't get access to a // TxnCoordSender from here. // TODO(tschottdorf): {kv->storage}.LocalSender func (db *testSender) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { if et, ok := ba.GetArg(roachpb.EndTransaction); ok { return nil, roachpb.NewError(util.Errorf("%s method not supported", et.Method())) } // Lookup range and direct request. key, endKey := keys.Range(ba) rng := db.store.LookupReplica(key, endKey) if rng == nil { return nil, roachpb.NewError(roachpb.NewRangeKeyMismatchError(key, endKey, nil)) } ba.RangeID = rng.Desc().RangeID replica := rng.GetReplica() if replica == nil { return nil, roachpb.NewError(util.Errorf("own replica missing in range")) } ba.Replica = *replica br, pErr := db.store.Send(ctx, ba) if br != nil && br.Error != nil { panic(roachpb.ErrorUnexpectedlySet(db.store, br)) } if pErr != nil { return nil, pErr } return br, nil }
// Send implements Sender. // TODO(tschottdorf): We actually don't want to chop EndTransaction off for // single-range requests (but that happens now since EndTransaction has the // isAlone flag). Whether it is one or not is unknown right now (you can only // find out after you've sent to the Range/looked up a descriptor that suggests // that you're multi-range. In those cases, the wrapped sender should return an // error so that we split and retry once the chunk which contains // EndTransaction (i.e. the last one). func (cs *chunkingSender) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { if len(ba.Requests) < 1 { panic("empty batch") } parts := ba.Split() var rplChunks []*roachpb.BatchResponse for _, part := range parts { ba.Requests = part // Increase the sequence counter to account for the fact that while // chunking, we're likely sending multiple requests to the same Replica. ba.SetNewRequest() rpl, err := cs.f(ctx, ba) if err != nil { return nil, err } // Propagate transaction from last reply to next request. The final // update is taken and put into the response's main header. ba.Txn.Update(rpl.Header().Txn) rplChunks = append(rplChunks, rpl) } reply := rplChunks[0] for _, rpl := range rplChunks[1:] { reply.Responses = append(reply.Responses, rpl.Responses...) } lastHeader := rplChunks[len(rplChunks)-1].BatchResponse_Header reply.Error = lastHeader.Error reply.Timestamp = lastHeader.Timestamp reply.Txn = ba.Txn return reply, nil }
// sendAttempt gathers and rearranges the replicas, and makes an RPC call. func (ds *DistSender) sendAttempt(trace *tracer.Trace, ba roachpb.BatchRequest, desc *roachpb.RangeDescriptor) (*roachpb.BatchResponse, *roachpb.Error) { defer trace.Epoch("sending RPC")() leader := ds.leaderCache.Lookup(roachpb.RangeID(desc.RangeID)) // Try to send the call. replicas := newReplicaSlice(ds.gossip, desc) // Rearrange the replicas so that those replicas with long common // prefix of attributes end up first. If there's no prefix, this is a // no-op. order := ds.optimizeReplicaOrder(replicas) // If this request needs to go to a leader and we know who that is, move // it to the front. if !(ba.IsReadOnly() && ba.ReadConsistency == roachpb.INCONSISTENT) && leader.StoreID > 0 { if i := replicas.FindReplica(leader.StoreID); i >= 0 { replicas.MoveToFront(i) order = rpc.OrderStable } } br, err := ds.sendRPC(trace, desc.RangeID, replicas, order, ba) if err != nil { return nil, roachpb.NewError(err) } // Untangle the error from the received response. pErr := br.Error br.Error = nil // scrub the response error return br, pErr }
func testPut() roachpb.BatchRequest { var ba roachpb.BatchRequest ba.Timestamp = testTS put := &roachpb.PutRequest{} put.Key = testKey ba.Add(put) return ba }
// Send implements the client.Sender interface. The store is looked up from the // store map if specified by the request; otherwise, the command is being // executed locally, and the replica is determined via lookup through each // store's LookupRange method. The latter path is taken only by unit tests. func (ls *Stores) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { var store *Store var err error // If we aren't given a Replica, then a little bending over // backwards here. This case applies exclusively to unittests. if ba.RangeID == 0 || ba.Replica.StoreID == 0 { var repl *roachpb.ReplicaDescriptor var rangeID roachpb.RangeID rs := keys.Range(ba) rangeID, repl, err = ls.lookupReplica(rs.Key, rs.EndKey) if err == nil { ba.RangeID = rangeID ba.Replica = *repl } } ctx = log.Add(ctx, log.RangeID, ba.RangeID) if err == nil { store, err = ls.GetStore(ba.Replica.StoreID) } if err != nil { return nil, roachpb.NewError(err) } sp, cleanupSp := tracing.SpanFromContext(opStores, store.Tracer(), ctx) defer cleanupSp() if ba.Txn != nil { // For calls that read data within a txn, we keep track of timestamps // observed from the various participating nodes' HLC clocks. If we have // a timestamp on file for this Node which is smaller than MaxTimestamp, // we can lower MaxTimestamp accordingly. If MaxTimestamp drops below // OrigTimestamp, we effectively can't see uncertainty restarts any // more. // Note that it's not an issue if MaxTimestamp propagates back out to // the client via a returned Transaction update - when updating a Txn // from another, the larger MaxTimestamp wins. if maxTS, ok := ba.Txn.GetObservedTimestamp(ba.Replica.NodeID); ok && maxTS.Less(ba.Txn.MaxTimestamp) { // Copy-on-write to protect others we might be sharing the Txn with. shallowTxn := *ba.Txn // The uncertainty window is [OrigTimestamp, maxTS), so if that window // is empty, there won't be any uncertainty restarts. if !ba.Txn.OrigTimestamp.Less(maxTS) { sp.LogEvent("read has no clock uncertainty") } shallowTxn.MaxTimestamp.Backward(maxTS) ba.Txn = &shallowTxn } } br, pErr := store.Send(ctx, ba) if br != nil && br.Error != nil { panic(roachpb.ErrorUnexpectedlySet(store, br)) } return br, pErr }
// Send implements the client.Sender interface. The store is looked up from the // store map if specified by the request; otherwise, the command is being // executed locally, and the replica is determined via lookup through each // store's LookupRange method. The latter path is taken only by unit tests. func (ls *Stores) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { sp := tracing.SpanFromContext(ctx) var store *Store var pErr *roachpb.Error // If we aren't given a Replica, then a little bending over // backwards here. This case applies exclusively to unittests. if ba.RangeID == 0 || ba.Replica.StoreID == 0 { var repl *roachpb.ReplicaDescriptor var rangeID roachpb.RangeID rs := keys.Range(ba) rangeID, repl, pErr = ls.lookupReplica(rs.Key, rs.EndKey) if pErr == nil { ba.RangeID = rangeID ba.Replica = *repl } } ctx = log.Add(ctx, log.RangeID, ba.RangeID) if pErr == nil { store, pErr = ls.GetStore(ba.Replica.StoreID) } var br *roachpb.BatchResponse if pErr != nil { return nil, pErr } // For calls that read data within a txn, we can avoid uncertainty // related retries in certain situations. If the node is in // "CertainNodes", we need not worry about uncertain reads any // more. Setting MaxTimestamp=OrigTimestamp for the operation // accomplishes that. See roachpb.Transaction.CertainNodes for details. if ba.Txn != nil && ba.Txn.CertainNodes.Contains(ba.Replica.NodeID) { // MaxTimestamp = Timestamp corresponds to no clock uncertainty. sp.LogEvent("read has no clock uncertainty") // Copy-on-write to protect others we might be sharing the Txn with. shallowTxn := *ba.Txn // We set to OrigTimestamp because that works for both SNAPSHOT and // SERIALIZABLE: If we used Timestamp instead, we could run into // unnecessary retries at SNAPSHOT. For example, a SNAPSHOT txn at // OrigTimestamp = 1000.0, Timestamp = 2000.0, MaxTimestamp = 3000.0 // will always read at 1000, so a MaxTimestamp of 2000 will still let // it restart with uncertainty when it finds a value in (1000, 2000). shallowTxn.MaxTimestamp = ba.Txn.OrigTimestamp ba.Txn = &shallowTxn } br, pErr = store.Send(ctx, ba) if br != nil && br.Error != nil { panic(roachpb.ErrorUnexpectedlySet(store, br)) } return br, pErr }
// TestBatchPrevNext tests batch.{Prev,Next}. func TestBatchPrevNext(t *testing.T) { defer leaktest.AfterTest(t)() loc := func(s string) string { return string(keys.RangeDescriptorKey(roachpb.RKey(s))) } span := func(strs ...string) []roachpb.Span { var r []roachpb.Span for i, str := range strs { if i%2 == 0 { r = append(r, roachpb.Span{Key: roachpb.Key(str)}) } else { r[len(r)-1].EndKey = roachpb.Key(str) } } return r } max, min := string(roachpb.RKeyMax), string(roachpb.RKeyMin) abc := span("a", "", "b", "", "c", "") testCases := []struct { spans []roachpb.Span key, expFW, expBW string }{ {spans: span("a", "c", "b", ""), key: "b", expFW: "b", expBW: "b"}, {spans: span("a", "c", "b", ""), key: "a", expFW: "a", expBW: "a"}, {spans: span("a", "c", "d", ""), key: "c", expFW: "d", expBW: "c"}, {spans: span("a", "c\x00", "d", ""), key: "c", expFW: "c", expBW: "c"}, {spans: abc, key: "b", expFW: "b", expBW: "b"}, {spans: abc, key: "b\x00", expFW: "c", expBW: "b\x00"}, {spans: abc, key: "bb", expFW: "c", expBW: "b"}, {spans: span(), key: "whatevs", expFW: max, expBW: min}, {spans: span(loc("a"), loc("c")), key: "c", expFW: "c", expBW: "c"}, {spans: span(loc("a"), loc("c")), key: "c\x00", expFW: max, expBW: "c\x00"}, } for i, test := range testCases { var ba roachpb.BatchRequest for _, span := range test.spans { args := &roachpb.ScanRequest{} args.Key, args.EndKey = span.Key, span.EndKey ba.Add(args) } if next, err := next(ba, roachpb.RKey(test.key)); err != nil { t.Errorf("%d: %v", i, err) } else if !bytes.Equal(next, roachpb.Key(test.expFW)) { t.Errorf("%d: next: expected %q, got %q", i, test.expFW, next) } if prev, err := prev(ba, roachpb.RKey(test.key)); err != nil { t.Errorf("%d: %v", i, err) } else if !bytes.Equal(prev, roachpb.Key(test.expBW)) { t.Errorf("%d: prev: expected %q, got %q", i, test.expBW, prev) } } }
func (tc *TxnCoordSender) heartbeat(id string, trace *tracer.Trace, ctx context.Context) bool { tc.Lock() proceed := true txnMeta := tc.txns[id] // Before we send a heartbeat, determine whether this transaction // should be considered abandoned. If so, exit heartbeat. if txnMeta.hasClientAbandonedCoord(tc.clock.PhysicalNow()) { // TODO(tschottdorf): should we be more proactive here? // The client might be continuing the transaction // through another coordinator, but in the most likely // case it's just gone and the open transaction record // could block concurrent operations. if log.V(1) { log.Infof("transaction %s abandoned; stopping heartbeat", txnMeta.txn) } proceed = false } // txnMeta.txn is possibly replaced concurrently, // so grab a copy before unlocking. txn := txnMeta.txn tc.Unlock() if !proceed { return false } hb := &roachpb.HeartbeatTxnRequest{} hb.Key = txn.Key ba := roachpb.BatchRequest{} ba.Timestamp = tc.clock.Now() ba.Txn = txn.Clone() ba.Add(hb) epochEnds := trace.Epoch("heartbeat") _, err := tc.wrapped.Send(ctx, ba) epochEnds() // If the transaction is not in pending state, then we can stop // the heartbeat. It's either aborted or committed, and we resolve // write intents accordingly. if err != nil { log.Warningf("heartbeat to %s failed: %s", txn, err) } // TODO(bdarnell): once we have gotten a heartbeat response with // Status != PENDING, future heartbeats are useless. However, we // need to continue the heartbeatLoop until the client either // commits or abandons the transaction. We could save a little // pointless work by restructuring this loop to stop sending // heartbeats between the time that the transaction is aborted and // the client finds out. Furthermore, we could use this information // to send TransactionAbortedErrors to the client so it can restart // immediately instead of running until its EndTransaction. return true }
// sendSingleRange gathers and rearranges the replicas, and makes an RPC call. func (ds *DistSender) sendSingleRange( ctx context.Context, ba roachpb.BatchRequest, desc *roachpb.RangeDescriptor, ) (*roachpb.BatchResponse, *roachpb.Error) { // Hack: avoid formatting the message passed to Span.LogEvent for // opentracing.noopSpans. We can't actually tell if we have a noopSpan, but // we can see if the span as a NoopTracer. Note that this particular // invocation is expensive because we're pretty-printing keys. // // TODO(tschottdorf): This hack can go away when something like // Span.LogEventf is added. sp := opentracing.SpanFromContext(ctx) if sp != nil && sp.Tracer() != (opentracing.NoopTracer{}) { sp.LogEvent(fmt.Sprintf("sending RPC to [%s, %s)", desc.StartKey, desc.EndKey)) } // Try to send the call. replicas := newReplicaSlice(ds.gossip, desc) // Rearrange the replicas so that those replicas with long common // prefix of attributes end up first. If there's no prefix, this is a // no-op. order := ds.optimizeReplicaOrder(replicas) // If this request needs to go to a leader and we know who that is, move // it to the front. if !(ba.IsReadOnly() && ba.ReadConsistency == roachpb.INCONSISTENT) { if leader := ds.leaderCache.Lookup(roachpb.RangeID(desc.RangeID)); leader.StoreID > 0 { if i := replicas.FindReplica(leader.StoreID); i >= 0 { replicas.MoveToFront(i) order = orderStable } } } // TODO(tschottdorf): should serialize the trace here, not higher up. br, pErr := ds.sendRPC(ctx, desc.RangeID, replicas, order, ba) if pErr != nil { return nil, pErr } // If the reply contains a timestamp, update the local HLC with it. if br.Error != nil && br.Error.Now != roachpb.ZeroTimestamp { ds.clock.Update(br.Error.Now) } else if br.Now != roachpb.ZeroTimestamp { ds.clock.Update(br.Now) } // Untangle the error from the received response. pErr = br.Error br.Error = nil // scrub the response error return br, pErr }
// process iterates through all keys in a replica's range, calling the garbage // collector for each key and associated set of values. GC'd keys are batched // into GC calls. Extant intents are resolved if intents are older than // intentAgeThreshold. The transaction and abort cache records are also // scanned and old entries evicted. During normal operation, both of these // records are cleaned up when their respective transaction finishes, so the // amount of work done here is expected to be small. // // Some care needs to be taken to avoid cyclic recreation of entries during GC: // * a Push initiated due to an intent may recreate a transaction entry // * resolving an intent may write a new abort cache entry // * obtaining the transaction for a abort cache entry requires a Push // // The following order is taken below: // 1) collect all intents with sufficiently old txn record // 2) collect these intents' transactions // 3) scan the transaction table, collecting abandoned or completed txns // 4) push all of these transactions (possibly recreating entries) // 5) resolve all intents (unless the txn is still PENDING), which will recreate // abort cache entries (but with the txn timestamp; i.e. likely gc'able) // 6) scan the abort cache table for old entries // 7) push these transactions (again, recreating txn entries). // 8) send a GCRequest. func (gcq *gcQueue) process( ctx context.Context, now hlc.Timestamp, repl *Replica, sysCfg config.SystemConfig, ) error { snap := repl.store.Engine().NewSnapshot() desc := repl.Desc() defer snap.Close() // Lookup the GC policy for the zone containing this key range. zone, err := sysCfg.GetZoneConfigForKey(desc.StartKey) if err != nil { return errors.Errorf("could not find zone config for range %s: %s", repl, err) } gcKeys, info, err := RunGC(ctx, desc, snap, now, zone.GC, func(now hlc.Timestamp, txn *roachpb.Transaction, typ roachpb.PushTxnType) { pushTxn(gcq.store.DB(), now, txn, typ) }, func(intents []roachpb.Intent, poison bool, wait bool) error { return repl.store.intentResolver.resolveIntents(ctx, intents, poison, wait) }) if err != nil { return err } gcq.eventLog.VInfof(true, "completed with stats %+v", info) var ba roachpb.BatchRequest var gcArgs roachpb.GCRequest // TODO(tschottdorf): This is one of these instances in which we want // to be more careful that the request ends up on the correct Replica, // and we might have to worry about mixing range-local and global keys // in a batch which might end up spanning Ranges by the time it executes. gcArgs.Key = desc.StartKey.AsRawKey() gcArgs.EndKey = desc.EndKey.AsRawKey() gcArgs.Keys = gcKeys gcArgs.Threshold = info.Threshold // Technically not needed since we're talking directly to the Range. ba.RangeID = desc.RangeID ba.Timestamp = now ba.Add(&gcArgs) if _, pErr := repl.Send(ctx, ba); pErr != nil { return pErr.GoError() } return nil }
// TestTxnCoordSenderErrorWithIntent validates that if a transactional request // returns an error but also indicates a Writing transaction, the coordinator // tracks it just like a successful request. func TestTxnCoordSenderErrorWithIntent(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() manual := hlc.NewManualClock(0) clock := hlc.NewClock(manual.UnixNano) clock.SetMaxOffset(20) ts := NewTxnCoordSender(senderFn(func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { txn := ba.Txn.Clone() txn.Writing = true pErr := roachpb.NewError(roachpb.NewTransactionRetryError()) pErr.SetTxn(txn) return nil, pErr }), clock, false, nil, stopper) defer stopper.Stop() var ba roachpb.BatchRequest key := roachpb.Key("test") ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.EndTransactionRequest{}) ba.Txn = &roachpb.Transaction{Name: "test"} if _, pErr := ts.Send(context.Background(), ba); !testutils.IsPError(pErr, "retry txn") { t.Fatalf("unexpected error: %v", pErr) } defer teardownHeartbeats(ts) ts.Lock() defer ts.Unlock() if len(ts.txns) != 1 { t.Fatalf("expected transaction to be tracked") } }
// TestTxnCoordSenderSingleRoundtripTxn checks that a batch which completely // holds the writing portion of a Txn (including EndTransaction) does not // launch a heartbeat goroutine at all. func TestTxnCoordSenderSingleRoundtripTxn(t *testing.T) { defer leaktest.AfterTest(t) stopper := stop.NewStopper() manual := hlc.NewManualClock(0) clock := hlc.NewClock(manual.UnixNano) clock.SetMaxOffset(20) ts := NewTxnCoordSender(senderFn(func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { br := ba.CreateReply() br.Txn = ba.Txn.Clone() br.Txn.Writing = true return br, nil }), clock, false, nil, stopper) // Stop the stopper manually, prior to trying the transaction. This has the // effect of returning a NodeUnavailableError for any attempts at launching // a heartbeat goroutine. stopper.Stop() var ba roachpb.BatchRequest key := roachpb.Key("test") ba.Add(&roachpb.BeginTransactionRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.PutRequest{Span: roachpb.Span{Key: key}}) ba.Add(&roachpb.EndTransactionRequest{}) ba.Txn = &roachpb.Transaction{Name: "test"} _, pErr := ts.Send(context.Background(), ba) if pErr != nil { t.Fatal(pErr) } }
func (ts *txnSender) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { // Send call through wrapped sender. ba.Txn = &ts.Proto if ts.UserPriority > 0 { ba.UserPriority = ts.UserPriority } ctx = opentracing.ContextWithSpan(ctx, ts.Trace) ba.SetNewRequest() br, pErr := ts.wrapped.Send(ctx, ba) if br != nil && br.Error != nil { panic(roachpb.ErrorUnexpectedlySet(ts.wrapped, br)) } if br != nil { for _, encSp := range br.CollectedSpans { var newSp basictracer.RawSpan if err := tracing.DecodeRawSpan(encSp, &newSp); err != nil { return nil, roachpb.NewError(err) } ts.CollectedSpans = append(ts.CollectedSpans, newSp) } } // Only successful requests can carry an updated Txn in their response // header. Any error (e.g. a restart) can have a Txn attached to them as // well; those update our local state in the same way for the next attempt. // The exception is if our transaction was aborted and needs to restart // from scratch, in which case we do just that. if pErr == nil { ts.Proto.Update(br.Txn) return br, nil } else if _, ok := pErr.GetDetail().(*roachpb.TransactionAbortedError); ok { // On Abort, reset the transaction so we start anew on restart. ts.Proto = roachpb.Transaction{ TxnMeta: roachpb.TxnMeta{ Isolation: ts.Proto.Isolation, }, Name: ts.Proto.Name, } // Acts as a minimum priority on restart. if pErr.GetTxn() != nil { ts.Proto.Priority = pErr.GetTxn().Priority } } else if pErr.TransactionRestart != roachpb.TransactionRestart_ABORT { ts.Proto.Update(pErr.GetTxn()) } return nil, pErr }
// Send implements Sender. // TODO(tschottdorf): We actually don't want to chop EndTransaction off for // single-range requests (but that happens now since EndTransaction has the // isAlone flag). Whether it is one or not is unknown right now (you can only // find out after you've sent to the Range/looked up a descriptor that suggests // that you're multi-range. In those cases, the wrapped sender should return an // error so that we split and retry once the chunk which contains // EndTransaction (i.e. the last one). func (cs *chunkingSender) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { if len(ba.Requests) < 1 { panic("empty batch") } // Deterministically create ClientCmdIDs for all parts of the batch if // a CmdID is already set (otherwise, leave them empty). var nextID func() roachpb.ClientCmdID empty := roachpb.ClientCmdID{} if empty == ba.CmdID { nextID = func() roachpb.ClientCmdID { return empty } } else { rng := rand.New(rand.NewSource(ba.CmdID.Random)) id := ba.CmdID nextID = func() roachpb.ClientCmdID { curID := id // copy id.Random = rng.Int63() // adjust for next call return curID } } parts := ba.Split() var rplChunks []*roachpb.BatchResponse for _, part := range parts { ba.Requests = part ba.CmdID = nextID() rpl, err := cs.f(ctx, ba) if err != nil { return nil, err } // Propagate transaction from last reply to next request. The final // update is taken and put into the response's main header. ba.Txn.Update(rpl.Header().Txn) rplChunks = append(rplChunks, rpl) } reply := rplChunks[0] for _, rpl := range rplChunks[1:] { reply.Responses = append(reply.Responses, rpl.Responses...) } lastHeader := rplChunks[len(rplChunks)-1].BatchResponse_Header reply.Error = lastHeader.Error reply.Timestamp = lastHeader.Timestamp reply.Txn = ba.Txn return reply, nil }
// Send implements the batch.Sender interface. It subdivides // the Batch into batches admissible for sending (preventing certain // illegal mixtures of requests), executes each individual part // (which may span multiple ranges), and recombines the response. func (ds *DistSender) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { // In the event that timestamp isn't set and read consistency isn't // required, set the timestamp using the local clock. // TODO(tschottdorf): right place for this? if ba.ReadConsistency == roachpb.INCONSISTENT && ba.Timestamp.Equal(roachpb.ZeroTimestamp) { // Make sure that after the call, args hasn't changed. defer func(timestamp roachpb.Timestamp) { ba.Timestamp = timestamp }(ba.Timestamp) ba.Timestamp = ds.clock.Now() } // TODO(tschottdorf): provisional instantiation. return newChunkingSender(ds.sendChunk).Send(ctx, ba) }
// Send implements the client.Sender interface. The store is looked up from the // store map if specified by the request; otherwise, the command is being // executed locally, and the replica is determined via lookup through each // store's LookupRange method. The latter path is taken only by unit tests. func (ls *Stores) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { sp := tracing.SpanFromContext(ctx) var store *Store var pErr *roachpb.Error // If we aren't given a Replica, then a little bending over // backwards here. This case applies exclusively to unittests. if ba.RangeID == 0 || ba.Replica.StoreID == 0 { var repl *roachpb.ReplicaDescriptor var rangeID roachpb.RangeID rs := keys.Range(ba) rangeID, repl, pErr = ls.lookupReplica(rs.Key, rs.EndKey) if pErr == nil { ba.RangeID = rangeID ba.Replica = *repl } } ctx = log.Add(ctx, log.RangeID, ba.RangeID) if pErr == nil { store, pErr = ls.GetStore(ba.Replica.StoreID) } var br *roachpb.BatchResponse if pErr != nil { return nil, pErr } // For calls that read data within a txn, we can avoid uncertainty // related retries in certain situations. If the node is in // "CertainNodes", we need not worry about uncertain reads any // more. Setting MaxTimestamp=Timestamp for the operation // accomplishes that. See roachpb.Transaction.CertainNodes for details. if ba.Txn != nil && ba.Txn.CertainNodes.Contains(ba.Replica.NodeID) { // MaxTimestamp = Timestamp corresponds to no clock uncertainty. sp.LogEvent("read has no clock uncertainty") // Copy-on-write to protect others we might be sharing the Txn with. shallowTxn := *ba.Txn shallowTxn.MaxTimestamp = ba.Txn.Timestamp ba.Txn = &shallowTxn } br, pErr = store.Send(ctx, ba) if br != nil && br.Error != nil { panic(roachpb.ErrorUnexpectedlySet(store, br)) } return br, pErr }
// sendRPC sends one or more RPCs to replicas from the supplied roachpb.Replica // slice. Returns an RPC error if the request could not be sent. Note // that the reply may contain a higher level error and must be checked in // addition to the RPC error. // The replicas are assume to have been ordered by preference, closer ones (if // any) at the front. func (ds *DistSender) sendRPC( ctx context.Context, rangeID roachpb.RangeID, replicas ReplicaSlice, ba roachpb.BatchRequest, ) (*roachpb.BatchResponse, error) { if len(replicas) == 0 { return nil, noNodeAddrsAvailError{} } // TODO(pmattis): This needs to be tested. If it isn't set we'll // still route the request appropriately by key, but won't receive // RangeNotFoundErrors. ba.RangeID = rangeID // Set RPC opts with stipulation that one of N RPCs must succeed. rpcOpts := SendOptions{ SendNextTimeout: ds.sendNextTimeout, Timeout: base.NetworkTimeout, Context: ctx, transportFactory: ds.transportFactory, } tracing.AnnotateTrace() defer tracing.AnnotateTrace() reply, err := ds.sendToReplicas(rpcOpts, rangeID, replicas, ba, ds.rpcContext) if err != nil { return nil, err } return reply, nil }
// SendWrappedWith is a convenience function which wraps the request in a batch // and sends it via the provided Sender at the given timestamp. It returns the // unwrapped response or an error. It's valid to pass a `nil` context; // context.Background() is used in that case. func SendWrappedWith(sender Sender, ctx context.Context, h roachpb.Header, args roachpb.Request) (roachpb.Response, error) { if ctx == nil { ctx = context.Background() } ba := roachpb.BatchRequest{} ba.Header = h ba.Add(args) br, pErr := sender.Send(ctx, ba) if err := pErr.GoError(); err != nil { return nil, err } unwrappedReply := br.Responses[0].GetInner() unwrappedReply.Header().Txn = br.Txn return unwrappedReply, nil }
// send runs the specified calls synchronously in a single batch and returns // any errors. Returns (nil, nil) for an empty batch. func (db *DB) send(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { if len(ba.Requests) == 0 { return nil, nil } if ba.ReadConsistency == roachpb.INCONSISTENT { for _, ru := range ba.Requests { req := ru.GetInner() if req.Method() != roachpb.Get && req.Method() != roachpb.Scan && req.Method() != roachpb.ReverseScan { return nil, roachpb.NewErrorf("method %s not allowed with INCONSISTENT batch", req.Method) } } } if db.ctx.UserPriority != 1 { ba.UserPriority = db.ctx.UserPriority } tracing.AnnotateTrace() br, pErr := db.sender.Send(context.TODO(), ba) if pErr != nil { if log.V(1) { log.Infof("failed batch: %s", pErr) } return nil, pErr } return br, nil }
// sendRPC sends one or more RPCs to replicas from the supplied roachpb.Replica // slice. First, replicas which have gossiped addresses are corralled (and // rearranged depending on proximity and whether the request needs to go to a // leader) and then sent via Send, with requirement that one RPC to a server // must succeed. Returns an RPC error if the request could not be sent. Note // that the reply may contain a higher level error and must be checked in // addition to the RPC error. func (ds *DistSender) sendRPC(trace opentracing.Span, rangeID roachpb.RangeID, replicas ReplicaSlice, order orderingPolicy, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { if len(replicas) == 0 { return nil, roachpb.NewError(noNodeAddrsAvailError{}) } // TODO(pmattis): This needs to be tested. If it isn't set we'll // still route the request appropriately by key, but won't receive // RangeNotFoundErrors. ba.RangeID = rangeID // Set RPC opts with stipulation that one of N RPCs must succeed. rpcOpts := SendOptions{ Ordering: order, SendNextTimeout: defaultSendNextTimeout, Timeout: rpc.DefaultRPCTimeout, Trace: trace, } tracing.AnnotateTrace() defer tracing.AnnotateTrace() reply, err := ds.rpcSend(rpcOpts, replicas, ba, ds.rpcContext) if err != nil { return nil, roachpb.NewError(err) } return reply.(*roachpb.BatchResponse), nil }
func (tc *TxnCoordSender) clientHasAbandoned(txnID uuid.UUID) { tc.Lock() txnMeta := tc.txns[txnID] var intentSpans []roachpb.Span // TODO(tschottdorf): should we be more proactive here? // The client might be continuing the transaction // through another coordinator, but in the most likely // case it's just gone and the open transaction record // could block concurrent operations. if log.V(1) { log.Infof("transaction %s abandoned; stopping heartbeat", txnMeta.txn) } // Grab the intents here to avoid potential race. intentSpans = collectIntentSpans(txnMeta.keys) txnMeta.keys.Clear() // txnMeta.txn is possibly replaced concurrently, // so grab a copy before unlocking. txn := txnMeta.txn.Clone() tc.Unlock() ba := roachpb.BatchRequest{} ba.Txn = &txn // Actively abort the transaction and its intents since we assume it's abandoned. et := &roachpb.EndTransactionRequest{ Span: roachpb.Span{ Key: txn.Key, }, Commit: false, IntentSpans: intentSpans, } ba.Add(et) tc.stopper.RunAsyncTask(func() { // Use the wrapped sender since the normal Sender // does not allow clients to specify intents. // TODO(tschottdorf): not using the existing context here since that // leads to use-after-finish of the contained trace. Should fork off // before the goroutine. if _, pErr := tc.wrapped.Send(context.Background(), ba); pErr != nil { if log.V(1) { log.Warningf("abort due to inactivity failed for %s: %s ", txn, pErr) } } }) }