예제 #1
0
// Batch sends a request to Cockroach via RPC. Errors which are retryable are
// retried with backoff in a loop using the default retry options. Other errors
// sending the request are retried indefinitely using the same client command
// ID to avoid reporting failure when in fact the command may have gone through
// and been executed successfully. We retry here to eventually get through with
// the same client command ID and be given the cached response.
func (s *rpcSender) Send(ctx context.Context, ba proto.BatchRequest) (*proto.BatchResponse, *proto.Error) {
	var err error
	var br proto.BatchResponse
	for r := retry.Start(s.retryOpts); r.Next(); {
		select {
		case <-s.client.Healthy():
		default:
			err = fmt.Errorf("failed to send RPC request %s: client is unhealthy", method)
			log.Warning(err)
			continue
		}

		if err = s.client.Call(method, &ba, &br); err != nil {
			br.Reset() // don't trust anyone.
			// Assume all errors sending request are retryable. The actual
			// number of things that could go wrong is vast, but we don't
			// want to miss any which should in theory be retried with the
			// same client command ID. We log the error here as a warning so
			// there's visiblity that this is happening. Some of the errors
			// we'll sweep up in this net shouldn't be retried, but we can't
			// really know for sure which.
			log.Warningf("failed to send RPC request %s: %s", method, err)
			continue
		}

		// On successful post, we're done with retry loop.
		break
	}
	if err != nil {
		return nil, proto.NewError(err)
	}
	pErr := br.Error
	br.Error = nil
	return &br, pErr
}
예제 #2
0
func newTestSender(pre, post func(proto.BatchRequest) (*proto.BatchResponse, *proto.Error)) SenderFunc {
	txnKey := proto.Key("test-txn")
	txnID := []byte(uuid.NewUUID4())

	return func(_ context.Context, ba proto.BatchRequest) (*proto.BatchResponse, *proto.Error) {
		ba.UserPriority = gogoproto.Int32(-1)
		if ba.Txn != nil && len(ba.Txn.ID) == 0 {
			ba.Txn.Key = txnKey
			ba.Txn.ID = txnID
		}

		var br *proto.BatchResponse
		var pErr *proto.Error
		if pre != nil {
			br, pErr = pre(ba)
		} else {
			br = &proto.BatchResponse{}
		}
		if pErr != nil {
			return nil, pErr
		}
		var writing bool
		status := proto.PENDING
		if _, ok := ba.GetArg(proto.Put); ok {
			br.Add(gogoproto.Clone(testPutResp).(proto.Response))
			writing = true
		}
		if args, ok := ba.GetArg(proto.EndTransaction); ok {
			et := args.(*proto.EndTransactionRequest)
			writing = true
			if et.Commit {
				status = proto.COMMITTED
			} else {
				status = proto.ABORTED
			}
		}
		br.Txn = gogoproto.Clone(ba.Txn).(*proto.Transaction)
		if br.Txn != nil && pErr == nil {
			br.Txn.Writing = writing
			br.Txn.Status = status
		}

		if post != nil {
			br, pErr = post(ba)
		}
		return br, pErr
	}
}
예제 #3
0
// sendBatch unrolls a batched command and sends each constituent
// command in parallel.
// TODO(tschottdorf): modify sendBatch so that it sends truly parallel requests
// when outside of a Transaction. This can then be used to address the TODO in
// (*TxnCoordSender).resolve().
func (tc *TxnCoordSender) sendBatch(ctx context.Context, batchArgs *proto.BatchRequest, batchReply *proto.BatchResponse) {
	// Prepare the calls by unrolling the batch. If the batchReply is
	// pre-initialized with replies, use those; otherwise create replies
	// as needed.
	// TODO(spencer): send calls in parallel.
	batchReply.Txn = batchArgs.Txn
	for i := range batchArgs.Requests {
		args := batchArgs.Requests[i].GetValue().(proto.Request)
		if err := updateForBatch(args, batchArgs.RequestHeader); err != nil {
			batchReply.Header().SetGoError(err)
			return
		}
		call := proto.Call{Args: args}
		// Create a reply from the method type and add to batch response.
		if i >= len(batchReply.Responses) {
			call.Reply = args.CreateReply()
			batchReply.Add(call.Reply)
		} else {
			call.Reply = batchReply.Responses[i].GetValue().(proto.Response)
		}
		tc.sendOne(ctx, call)
		// Amalgamate transaction updates and propagate first error, if applicable.
		if batchReply.Txn != nil {
			batchReply.Txn.Update(call.Reply.Header().Txn)
		}
		if call.Reply.Header().Error != nil {
			batchReply.Error = call.Reply.Header().Error
			return
		}
	}
}
예제 #4
0
// TODO(tschottdorf): this method is somewhat awkward but unless we want to
// give this error back to the client, our options are limited. We'll have to
// run the whole thing for them, or any restart will still end up at the client
// which will not be prepared to be handed a Txn.
func (tc *TxnCoordSender) resendWithTxn(ba proto.BatchRequest) (*proto.BatchResponse, *proto.Error) {
	// Run a one-off transaction with that single command.
	if log.V(1) {
		log.Infof("%s: auto-wrapping in txn and re-executing: ", ba)
	}
	tmpDB := client.NewDBWithPriority(tc, ba.GetUserPriority())
	var br *proto.BatchResponse
	err := tmpDB.Txn(func(txn *client.Txn) error {
		txn.SetDebugName("auto-wrap", 0)
		b := &client.Batch{}
		for _, arg := range ba.Requests {
			req := arg.GetInner()
			b.InternalAddRequest(req)
		}
		var err error
		br, err = txn.CommitInBatchWithResponse(b)
		return err
	})
	if err != nil {
		return nil, proto.NewError(err)
	}
	br.Txn = nil // hide the evidence
	return br, nil
}
예제 #5
0
// sendBatch unrolls a batched command and sends each constituent
// command in parallel.
func (tc *TxnCoordSender) sendBatch(batchArgs *proto.BatchRequest, batchReply *proto.BatchResponse) {
	// Prepare the calls by unrolling the batch. If the batchReply is
	// pre-initialized with replies, use those; otherwise create replies
	// as needed.
	// TODO(spencer): send calls in parallel.
	batchReply.Txn = batchArgs.Txn
	for i := range batchArgs.Requests {
		// Initialize args header values where appropriate.
		args := batchArgs.Requests[i].GetValue().(proto.Request)
		method, err := proto.MethodForRequest(args)
		call := &client.Call{Method: method, Args: args}
		if err != nil {
			batchReply.SetGoError(err)
			return
		}
		if args.Header().User == "" {
			args.Header().User = batchArgs.User
		}
		if args.Header().UserPriority == nil {
			args.Header().UserPriority = batchArgs.UserPriority
		}
		args.Header().Txn = batchArgs.Txn

		// Create a reply from the method type and add to batch response.
		if i >= len(batchReply.Responses) {
			if call.Reply, err = proto.CreateReply(method); err != nil {
				batchReply.SetGoError(util.Errorf("unsupported method in batch: %s", method))
				return
			}
			batchReply.Add(call.Reply)
		} else {
			call.Reply = batchReply.Responses[i].GetValue().(proto.Response)
		}
		tc.sendOne(call)
		// Amalgamate transaction updates and propagate first error, if applicable.
		if batchReply.Txn != nil {
			batchReply.Txn.Update(call.Reply.Header().Txn)
		}
		if call.Reply.Header().Error != nil {
			batchReply.Error = call.Reply.Header().Error
			return
		}
	}
}
예제 #6
0
// updateState updates the transaction state in both the success and
// error cases, applying those updates to the corresponding txnMeta
// object when adequate. It also updates certain errors with the
// updated transaction for use by client restarts.
func (tc *TxnCoordSender) updateState(ctx context.Context, ba proto.BatchRequest, br *proto.BatchResponse, pErr *proto.Error) *proto.Error {
	trace := tracer.FromCtx(ctx)
	newTxn := &proto.Transaction{}
	newTxn.Update(ba.GetTxn())
	err := pErr.GoError()
	switch t := err.(type) {
	case nil:
		newTxn.Update(br.GetTxn())
		// Move txn timestamp forward to response timestamp if applicable.
		// TODO(tschottdorf): see (*Replica).executeBatch and comments within.
		// Looks like this isn't necessary any more, nor did it prevent a bug
		// referenced in a TODO there.
		newTxn.Timestamp.Forward(br.Timestamp)
	case *proto.TransactionStatusError:
		// Likely already committed or more obscure errors such as epoch or
		// timestamp regressions; consider txn dead.
		defer tc.cleanupTxn(trace, t.Txn)
	case *proto.OpRequiresTxnError:
		// TODO(tschottdorf): range-spanning autowrap currently broken.
		panic("TODO(tschottdorf): disabled")
	case *proto.ReadWithinUncertaintyIntervalError:
		// Mark the host as certain. See the protobuf comment for
		// Transaction.CertainNodes for details.
		if t.NodeID == 0 {
			panic("no replica set in header on uncertainty restart")
		}
		newTxn.CertainNodes.Add(t.NodeID)
		// If the reader encountered a newer write within the uncertainty
		// interval, move the timestamp forward, just past that write or
		// up to MaxTimestamp, whichever comes first.
		candidateTS := newTxn.MaxTimestamp
		candidateTS.Backward(t.ExistingTimestamp.Add(0, 1))
		newTxn.Timestamp.Forward(candidateTS)
		newTxn.Restart(ba.GetUserPriority(), newTxn.Priority, newTxn.Timestamp)
		t.Txn = *newTxn
	case *proto.TransactionAbortedError:
		// Increase timestamp if applicable.
		newTxn.Timestamp.Forward(t.Txn.Timestamp)
		newTxn.Priority = t.Txn.Priority
		t.Txn = *newTxn
		// Clean up the freshly aborted transaction in defer(), avoiding a
		// race with the state update below.
		defer tc.cleanupTxn(trace, t.Txn)
	case *proto.TransactionPushError:
		// Increase timestamp if applicable, ensuring that we're
		// just ahead of the pushee.
		newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp.Add(0, 1))
		newTxn.Restart(ba.GetUserPriority(), t.PusheeTxn.Priority-1, newTxn.Timestamp)
		t.Txn = newTxn
	case *proto.TransactionRetryError:
		// Increase timestamp if applicable.
		newTxn.Timestamp.Forward(t.Txn.Timestamp)
		newTxn.Restart(ba.GetUserPriority(), t.Txn.Priority, newTxn.Timestamp)
		t.Txn = *newTxn
	case proto.TransactionRestartError:
		// Assertion: The above cases should exhaust all ErrorDetails which
		// carry a Transaction.
		if pErr.Detail != nil {
			panic(fmt.Sprintf("unhandled TransactionRestartError %T", err))
		}
	}

	return func() *proto.Error {
		if len(newTxn.ID) <= 0 {
			return pErr
		}
		id := string(newTxn.ID)
		tc.Lock()
		defer tc.Unlock()
		txnMeta := tc.txns[id]
		// For successful transactional requests, keep the written intents and
		// the updated transaction record to be sent along with the reply.
		// The transaction metadata is created with the first writing operation
		// TODO(tschottdorf): already computed the intents prior to sending,
		// consider re-using those.
		if intents := ba.GetIntents(); len(intents) > 0 && err == nil {
			if txnMeta == nil {
				newTxn.Writing = true
				txnMeta = &txnMetadata{
					txn:              *newTxn,
					keys:             cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}),
					firstUpdateNanos: tc.clock.PhysicalNow(),
					lastUpdateNanos:  tc.clock.PhysicalNow(),
					timeoutDuration:  tc.clientTimeout,
					txnEnd:           make(chan struct{}),
				}
				tc.txns[id] = txnMeta
				// If the transaction is already over, there's no point in
				// launching a one-off coordinator which will shut down right
				// away.
				if _, isEnding := ba.GetArg(proto.EndTransaction); !isEnding {
					trace.Event("coordinator spawns")
					if !tc.stopper.RunAsyncTask(func() {
						tc.heartbeatLoop(id)
					}) {
						// The system is already draining and we can't start the
						// heartbeat. We refuse new transactions for now because
						// they're likely not going to have all intents committed.
						// In principle, we can relax this as needed though.
						tc.unregisterTxnLocked(id)
						return proto.NewError(&proto.NodeUnavailableError{})
					}
				}
			}
			for _, intent := range intents {
				txnMeta.addKeyRange(intent.Key, intent.EndKey)
			}
		}
		// Update our record of this transaction, even on error.
		if txnMeta != nil {
			txnMeta.txn.Update(newTxn) // better to replace after #2300
			if !txnMeta.txn.Writing {
				panic("tracking a non-writing txn")
			}
			txnMeta.setLastUpdate(tc.clock.PhysicalNow())
		}
		if err == nil {
			// For successful transactional requests, always send the updated txn
			// record back.
			if br.Txn == nil {
				br.Txn = &proto.Transaction{}
			}
			*br.Txn = *newTxn
		}
		return pErr
	}()
}
예제 #7
0
// sendChunk is in charge of sending an "admissible" piece of batch, i.e. one
// which doesn't need to be subdivided further before going to a range (so no
// mixing of forward and reverse scans, etc).
func (ds *DistSender) sendChunk(ctx context.Context, ba proto.BatchRequest) (*proto.BatchResponse, error) {
	// TODO(tschottdorf): prepare for removing Key and EndKey from BatchRequest,
	// making sure that anything that relies on them goes bust.
	ba.Key, ba.EndKey = nil, nil

	isReverse := ba.IsReverse()

	trace := tracer.FromCtx(ctx)

	// The minimal key range encompassing all requests contained within.
	// Local addressing has already been resolved.
	// TODO(tschottdorf): consider rudimentary validation of the batch here
	// (for example, non-range requests with EndKey, or empty key ranges).
	from, to := keys.Range(ba)
	var br *proto.BatchResponse
	// Send the request to one range per iteration.
	for {
		options := lookupOptions{
			useReverseScan: isReverse,
		}

		var curReply *proto.BatchResponse
		var desc *proto.RangeDescriptor
		var needAnother bool
		var err error
		for r := retry.Start(ds.rpcRetryOptions); r.Next(); {
			// Get range descriptor (or, when spanning range, descriptors). Our
			// error handling below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			descDone := trace.Epoch("meta descriptor lookup")
			var evictDesc func()

			desc, needAnother, evictDesc, err = ds.getDescriptors(from, to, options)
			descDone()

			// getDescriptors may fail retryably if the first range isn't
			// available via Gossip.
			if err != nil {
				if rErr, ok := err.(retry.Retryable); ok && rErr.CanRetry() {
					if log.V(1) {
						log.Warning(err)
					}
					continue
				}
				break
			}

			// If there's no transaction and op spans ranges, possibly
			// re-run as part of a transaction for consistency. The
			// case where we don't need to re-run is if the read
			// consistency is not required.
			if needAnother && ba.Txn == nil && ba.IsRange() &&
				ba.ReadConsistency != proto.INCONSISTENT {
				return nil, &proto.OpRequiresTxnError{}
			}

			// It's possible that the returned descriptor misses parts of the
			// keys it's supposed to scan after it's truncated to match the
			// descriptor. Example revscan [a,g), first desc lookup for "g"
			// returns descriptor [c,d) -> [d,g) is never scanned.
			// We evict and retry in such a case.
			if (isReverse && !desc.ContainsKeyRange(desc.StartKey, to)) || (!isReverse && !desc.ContainsKeyRange(from, desc.EndKey)) {
				evictDesc()
				continue
			}

			curReply, err = func() (*proto.BatchResponse, error) {
				// Truncate the request to our current key range.
				untruncate, numActive, trErr := truncate(&ba, desc, from, to)
				if numActive == 0 {
					untruncate()
					// This shouldn't happen in the wild, but some tests
					// exercise it.
					return nil, util.Errorf("truncation resulted in empty batch on [%s,%s): %s",
						from, to, ba)
				}
				defer untruncate()
				if trErr != nil {
					return nil, trErr
				}
				// TODO(tschottdorf): make key range on batch redundant. The
				// requests within dictate it anyways.
				ba.Key, ba.EndKey = keys.Range(ba)
				reply, err := ds.sendAttempt(trace, ba, desc)
				ba.Key, ba.EndKey = nil, nil

				if err != nil {
					if log.V(0 /* TODO(tschottdorf): 1 */) {
						log.Warningf("failed to invoke %s: %s", ba, err)
					}
				}
				return reply, err
			}()
			// If sending succeeded, break this loop.
			if err == nil {
				break
			}

			// Error handling below.
			// If retryable, allow retry. For range not found or range
			// key mismatch errors, we don't backoff on the retry,
			// but reset the backoff loop so we can retry immediately.
			switch tErr := err.(type) {
			case *rpc.SendError:
				// For an RPC error to occur, we must've been unable to contact
				// any replicas. In this case, likely all nodes are down (or
				// not getting back to us within a reasonable amount of time).
				// We may simply not be trying to talk to the up-to-date
				// replicas, so clearing the descriptor here should be a good
				// idea.
				// TODO(tschottdorf): If a replica group goes dead, this
				// will cause clients to put high read pressure on the first
				// range, so there should be some rate limiting here.
				evictDesc()
				if tErr.CanRetry() {
					continue
				}
			case *proto.RangeNotFoundError, *proto.RangeKeyMismatchError:
				trace.Event(fmt.Sprintf("reply error: %T", err))
				// Range descriptor might be out of date - evict it.
				evictDesc()
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				if log.V(1) {
					log.Warning(err)
				}
				// For the remainder of this call, we'll assume that intents
				// are fair game. This replaces more complex logic based on
				// the type of request.
				options.considerIntents = true
				continue
			case *proto.NotLeaderError:
				trace.Event(fmt.Sprintf("reply error: %T", err))
				newLeader := tErr.GetLeader()
				// Verify that leader is a known replica according to the
				// descriptor. If not, we've got a stale replica; evict cache.
				// Next, cache the new leader.
				if newLeader != nil {
					if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 {
						if log.V(1) {
							log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc)
						}
						evictDesc()
					}
				} else {
					newLeader = &proto.Replica{}
				}
				ds.updateLeaderCache(proto.RangeID(desc.RangeID), *newLeader)
				if log.V(1) {
					log.Warning(err)
				}
				r.Reset()
				continue
			case retry.Retryable:
				if tErr.CanRetry() {
					if log.V(1) {
						log.Warning(err)
					}
					trace.Event(fmt.Sprintf("reply error: %T", err))
					continue
				}
			}
			break
		}

		// Immediately return if querying a range failed non-retryably.
		if err != nil {
			return nil, err
		}

		first := br == nil
		if first {
			// First response from a Range.
			br = curReply
		} else {
			// This was the second or later call in a cross-Range request.
			// Combine the new response with the existing one.
			if err := br.Combine(curReply); err != nil {
				panic(err)
				// TODO(tschottdorf): return nil, err
			}
		}

		// If this request has a bound (such as MaxResults in
		// ScanRequest) and we are going to query at least one more range,
		// check whether enough rows have been retrieved.
		// TODO(tschottdorf): need tests for executing a multi-range batch
		// with various bounded requests which saturate at different times.
		if needAnother {
			// Start with the assumption that all requests are saturated.
			// Below, we look at each and decide whether that's true.
			// Everything that is indeed saturated is "masked out" from the
			// batch request; only if that's all requests does needAnother
			// remain false.
			needAnother = false
			if first {
				// Clone ba.Requests. This is because we're multi-range, and
				// some requests may be bounded, which could lead to them being
				// masked out once they're saturated. We don't want to risk
				// removing requests that way in the "master copy" since that
				// could lead to omitting requests in certain retry scenarios.
				ba.Requests = append([]proto.RequestUnion(nil), ba.Requests...)
			}
			for i, union := range ba.Requests {
				args := union.GetValue()
				if _, ok := args.(*proto.NoopRequest); ok {
					// NoopRequests are skipped.
					continue
				}
				boundedArg, ok := args.(proto.Bounded)
				if !ok {
					// Non-bounded request. We will have to query all ranges.
					needAnother = true
					continue
				}
				prevBound := boundedArg.GetBound()
				cReply, ok := curReply.Responses[i].GetValue().(proto.Countable)
				if !ok || prevBound <= 0 {
					// Request bounded, but without max results. Again, will
					// need to query everything we can. The case in which the reply
					// isn't countable occurs when the request wasn't active for
					// that range (since it didn't apply to it), so the response
					// is a NoopResponse.
					needAnother = true
					continue
				}
				nextBound := prevBound - cReply.Count()
				if nextBound <= 0 {
					// We've hit max results for this piece of the batch. Mask
					// it out (we've copied the requests slice above, so this
					// is kosher).
					ba.Requests[i].Reset() // necessary (no one-of?)
					if !ba.Requests[i].SetValue(&proto.NoopRequest{}) {
						panic("RequestUnion excludes NoopRequest")
					}
					continue
				}
				// The request isn't saturated yet.
				needAnother = true
				boundedArg.SetBound(nextBound)
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if !needAnother {
			return br, nil
		}

		if isReverse {
			// In next iteration, query previous range.
			// We use the StartKey of the current descriptor as opposed to the
			// EndKey of the previous one since that doesn't have bugs when
			// stale descriptors come into play.
			to = prev(ba, desc.StartKey)
		} else {
			// In next iteration, query next range.
			// It's important that we use the EndKey of the current descriptor
			// as opposed to the StartKey of the next one: if the former is stale,
			// it's possible that the next range has since merged the subsequent
			// one, and unless both descriptors are stale, the next descriptor's
			// StartKey would move us to the beginning of the current range,
			// resulting in a duplicate scan.
			from = next(ba, desc.EndKey)
		}
		trace.Event("querying next range")
	}
}