コード例 #1
0
// sendBatch unrolls a batched command and sends each constituent
// command in parallel.
// TODO(tschottdorf): modify sendBatch so that it sends truly parallel requests
// when outside of a Transaction. This can then be used to address the TODO in
// (*TxnCoordSender).resolve().
func (tc *TxnCoordSender) sendBatch(ctx context.Context, batchArgs *proto.BatchRequest, batchReply *proto.BatchResponse) {
	// Prepare the calls by unrolling the batch. If the batchReply is
	// pre-initialized with replies, use those; otherwise create replies
	// as needed.
	// TODO(spencer): send calls in parallel.
	batchReply.Txn = batchArgs.Txn
	for i := range batchArgs.Requests {
		args := batchArgs.Requests[i].GetValue().(proto.Request)
		if err := updateForBatch(args, batchArgs.RequestHeader); err != nil {
			batchReply.Header().SetGoError(err)
			return
		}
		call := proto.Call{Args: args}
		// Create a reply from the method type and add to batch response.
		if i >= len(batchReply.Responses) {
			call.Reply = args.CreateReply()
			batchReply.Add(call.Reply)
		} else {
			call.Reply = batchReply.Responses[i].GetValue().(proto.Response)
		}
		tc.sendOne(ctx, call)
		// Amalgamate transaction updates and propagate first error, if applicable.
		if batchReply.Txn != nil {
			batchReply.Txn.Update(call.Reply.Header().Txn)
		}
		if call.Reply.Header().Error != nil {
			batchReply.Error = call.Reply.Header().Error
			return
		}
	}
}
コード例 #2
0
// sendBatch unrolls a batched command and sends each constituent
// command in parallel.
func (tc *TxnCoordSender) sendBatch(batchArgs *proto.InternalBatchRequest, batchReply *proto.InternalBatchResponse) {
	// Prepare the calls by unrolling the batch. If the batchReply is
	// pre-initialized with replies, use those; otherwise create replies
	// as needed.
	// TODO(spencer): send calls in parallel.
	batchReply.Txn = batchArgs.Txn
	for i := range batchArgs.Requests {
		args := batchArgs.Requests[i].GetValue().(proto.Request)
		call := proto.Call{Args: args}
		// Disallow transaction, user and priority on individual calls, unless
		// equal.
		if args.Header().User != "" && args.Header().User != batchArgs.User {
			batchReply.Header().SetGoError(util.Error("cannot have individual user on call in batch"))
			return
		}
		args.Header().User = batchArgs.User
		if args.Header().UserPriority != nil && args.Header().GetUserPriority() != batchArgs.GetUserPriority() {
			batchReply.Header().SetGoError(util.Error("cannot have individual user priority on call in batch"))
			return
		}
		args.Header().UserPriority = batchArgs.UserPriority
		if txn := args.Header().Txn; txn != nil && !txn.Equal(batchArgs.Txn) {
			batchReply.Header().SetGoError(util.Error("cannot have individual transactional call in batch"))
			return
		}
		// Propagate batch Txn to each call.
		args.Header().Txn = batchArgs.Txn

		// Create a reply from the method type and add to batch response.
		if i >= len(batchReply.Responses) {
			call.Reply = args.CreateReply()
			batchReply.Add(call.Reply)
		} else {
			call.Reply = batchReply.Responses[i].GetValue().(proto.Response)
		}
		tc.sendOne(call)
		// Amalgamate transaction updates and propagate first error, if applicable.
		if batchReply.Txn != nil {
			batchReply.Txn.Update(call.Reply.Header().Txn)
		}
		if call.Reply.Header().Error != nil {
			batchReply.Error = call.Reply.Header().Error
			return
		}
	}
}
コード例 #3
0
ファイル: provisional.go プロジェクト: harryyeh/cockroach
// MaybeWrapCall returns a new call which wraps the original Args and Reply
// in a batch, if necessary.
// TODO(tschottdorf): will go when proto.Call does.
func MaybeWrapCall(call proto.Call) (proto.Call, func(proto.Call) proto.Call) {
	var unwrap func(proto.Response) proto.Response
	call.Args, unwrap = MaybeWrap(call.Args)
	newUnwrap := func(origReply proto.Response) func(proto.Call) proto.Call {
		return func(newCall proto.Call) proto.Call {
			origReply.Reset()
			gogoproto.Merge(origReply, unwrap(newCall.Reply))
			*origReply.Header() = *newCall.Reply.Header()
			newCall.Reply = origReply
			return newCall
		}
	}(call.Reply)
	call.Reply = call.Args.CreateReply()
	return call, newUnwrap
}
コード例 #4
0
ファイル: dist_sender.go プロジェクト: chinnitv/cockroach
// Send implements the client.Sender interface. It verifies
// permissions and looks up the appropriate range based on the
// supplied key and sends the RPC according to the specified options.
//
// If the request spans multiple ranges (which is possible for Scan or
// DeleteRange requests), Send sends requests to the individual ranges
// sequentially and combines the results transparently.
//
// This may temporarily adjust the request headers, so the proto.Call
// must not be used concurrently until Send has returned.
func (ds *DistSender) Send(_ context.Context, call proto.Call) {
	args := call.Args
	finalReply := call.Reply

	// Verify permissions.
	if err := ds.verifyPermissions(call.Args); err != nil {
		call.Reply.Header().SetGoError(err)
		return
	}

	// In the event that timestamp isn't set and read consistency isn't
	// required, set the timestamp using the local clock.
	if args.Header().ReadConsistency == proto.INCONSISTENT && args.Header().Timestamp.Equal(proto.ZeroTimestamp) {
		// Make sure that after the call, args hasn't changed.
		defer func(timestamp proto.Timestamp) {
			args.Header().Timestamp = timestamp
		}(args.Header().Timestamp)
		args.Header().Timestamp = ds.clock.Now()
	}

	// If this is a bounded request, we will change its bound as we receive
	// replies. This undoes that when we return.
	boundedArgs, argsBounded := args.(proto.Bounded)

	if argsBounded {
		defer func(bound int64) {
			boundedArgs.SetBound(bound)
		}(boundedArgs.GetBound())
	}

	defer func(key proto.Key) {
		args.Header().Key = key
	}(args.Header().Key)

	// Retry logic for lookup of range by key and RPCs to range replicas.
	curReply := finalReply
	for {
		call.Reply = curReply
		curReply.Header().Reset()

		var desc, descNext *proto.RangeDescriptor
		var err error
		for r := retry.Start(ds.rpcRetryOptions); r.Next(); {
			// Get range descriptor (or, when spanning range, descriptors).
			// sendAttempt below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			desc, descNext, err = ds.getDescriptors(call)
			// getDescriptors may fail retryably if the first range isn't
			// available via Gossip.
			if err != nil {
				if rErr, ok := err.(util.Retryable); ok && rErr.CanRetry() {
					if log.V(1) {
						log.Warning(err)
					}
					continue
				}
				break
			}
			err = func() error {
				// Truncate the request to our current range, making sure not to
				// touch it unless we have to (it is illegal to send EndKey on
				// commands which do not operate on ranges).
				if descNext != nil {
					defer func(endKey proto.Key) {
						args.Header().EndKey = endKey
					}(args.Header().EndKey)
					args.Header().EndKey = desc.EndKey
				}
				leader := ds.leaderCache.Lookup(proto.RaftID(desc.RaftID))

				// Try to send the call.
				replicas := newReplicaSlice(ds.gossip, desc)

				// Rearrange the replicas so that those replicas with long common
				// prefix of attributes end up first. If there's no prefix, this is a
				// no-op.
				order := ds.optimizeReplicaOrder(replicas)

				// If this request needs to go to a leader and we know who that is, move
				// it to the front.
				if !(proto.IsRead(args) && args.Header().ReadConsistency == proto.INCONSISTENT) &&
					leader.StoreID > 0 {
					if i := replicas.FindReplica(leader.StoreID); i >= 0 {
						replicas.MoveToFront(i)
						order = rpc.OrderStable
					}
				}

				return ds.sendRPC(desc.RaftID, replicas, order, args, curReply)
			}()
			if err != nil {
				// For an RPC error to occur, we must've been unable to contact any
				// replicas. In this case, likely all nodes are down (or not getting back
				// to us within a reasonable amount of time).
				// We may simply not be trying to talk to the up-to-date replicas, so
				// clearing the descriptor here should be a good idea.
				// TODO(tschottdorf): If a replica group goes dead, this will cause clients
				// to put high read pressure on the first range, so there should be some
				// rate limiting here.
				ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc)
			} else {
				err = curReply.Header().GoError()
			}

			if err != nil {
				if log.V(1) {
					log.Warningf("failed to invoke %s: %s", call.Method(), err)
				}

				// If retryable, allow retry. For range not found or range
				// key mismatch errors, we don't backoff on the retry,
				// but reset the backoff loop so we can retry immediately.
				switch tErr := err.(type) {
				case *proto.RangeNotFoundError, *proto.RangeKeyMismatchError:
					// Range descriptor might be out of date - evict it.
					ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc)
					// On addressing errors, don't backoff; retry immediately.
					r.Reset()
					if log.V(1) {
						log.Warning(err)
					}
					continue
				case *proto.NotLeaderError:
					newLeader := tErr.GetLeader()
					// Verify that leader is a known replica according to the
					// descriptor. If not, we've got a stale replica; evict cache.
					// Next, cache the new leader.
					if newLeader != nil {
						if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 {
							if log.V(1) {
								log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc)
							}
							ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc)
						}
					} else {
						newLeader = &proto.Replica{}
					}
					ds.updateLeaderCache(proto.RaftID(desc.RaftID), *newLeader)
					if log.V(1) {
						log.Warning(err)
					}
					r.Reset()
					continue
				case util.Retryable:
					if tErr.CanRetry() {
						if log.V(1) {
							log.Warning(err)
						}
						continue
					}
				}
			}
			break
		}

		// Immediately return if querying a range failed non-retryably.
		// For multi-range requests, we return the failing range's reply.
		if err != nil {
			call.Reply.Header().SetGoError(err)
			return
		}

		if finalReply != curReply {
			// This was the second or later call in a multi-range request.
			// Combine the new response with the existing one.
			if cFinalReply, ok := finalReply.(proto.Combinable); ok {
				cFinalReply.Combine(curReply)
			} else {
				// This should never apply in practice, as we'll only end up here
				// for range-spanning requests.
				call.Reply.Header().SetGoError(util.Errorf("multi-range request with non-combinable response type"))
				return
			}
		}

		// If this request has a bound, such as MaxResults in
		// ScanRequest, check whether enough rows have been retrieved.
		if argsBounded {
			if prevBound := boundedArgs.GetBound(); prevBound > 0 {
				if cReply, ok := curReply.(proto.Countable); ok {
					if nextBound := prevBound - cReply.Count(); nextBound > 0 {
						// Update bound for the next round.
						// We've deferred restoring the original bound earlier.
						boundedArgs.SetBound(nextBound)
					} else {
						// Set flag to break the loop.
						descNext = nil
					}
				}
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if descNext == nil {
			break
		}

		// In next iteration, query next range.
		// It's important that we use the EndKey of the current descriptor
		// as opposed to the StartKey of the next one: if the former is stale,
		// it's possible that the next range has since merged the subsequent
		// one, and unless both descriptors are stale, the next descriptor's
		// StartKey would move us to the beginning of the current range,
		// resulting in a duplicate scan.
		args.Header().Key = desc.EndKey

		// This is a multi-range request, make a new reply object for
		// subsequent iterations of the loop.
		curReply = args.CreateReply()
	}
	call.Reply = finalReply
}
コード例 #5
0
ファイル: dist_sender.go プロジェクト: Hellblazer/cockroach
// Send implements the client.Sender interface. It verifies
// permissions and looks up the appropriate range based on the
// supplied key and sends the RPC according to the specified options.
//
// If the request spans multiple ranges (which is possible for Scan or
// DeleteRange requests), Send sends requests to the individual ranges
// sequentially and combines the results transparently.
//
// This may temporarily adjust the request headers, so the proto.Call
// must not be used concurrently until Send has returned.
func (ds *DistSender) Send(_ context.Context, call proto.Call) {
	args := call.Args
	finalReply := call.Reply
	endKey := args.Header().EndKey

	// Verify permissions.
	if err := ds.verifyPermissions(call.Args); err != nil {
		call.Reply.Header().SetGoError(err)
		return
	}

	// In the event that timestamp isn't set and read consistency isn't
	// required, set the timestamp using the local clock.
	if args.Header().ReadConsistency == proto.INCONSISTENT && args.Header().Timestamp.Equal(proto.ZeroTimestamp) {
		// Make sure that after the call, args hasn't changed.
		defer func(timestamp proto.Timestamp) {
			args.Header().Timestamp = timestamp
		}(args.Header().Timestamp)
		args.Header().Timestamp = ds.clock.Now()
	}

	// If this is a bounded request, we will change its bound as we receive
	// replies. This undoes that when we return.
	boundedArgs, _ := args.(proto.Bounded)

	if boundedArgs != nil {
		defer func(n int64) {
			boundedArgs.SetBound(n)
		}(boundedArgs.GetBound())
	}

	// Retry logic for lookup of range by key and RPCs to range replicas.
	retryOpts := ds.rpcRetryOptions
	retryOpts.Tag = "routing " + call.Method().String() + " rpc"

	curReply := finalReply
	for {
		call.Reply = curReply
		curReply.Header().Reset()

		var desc, descNext *proto.RangeDescriptor
		err := retry.WithBackoff(retryOpts, func() (retry.Status, error) {
			var err error
			// Get range descriptor (or, when spanning range, descriptors).
			// sendAttempt below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			desc, descNext, err = ds.getDescriptors(call)
			// getDescriptors may fail retryably if the first range isn't
			// available via Gossip.
			if err != nil {
				if rErr, ok := err.(util.Retryable); ok && rErr.CanRetry() {
					return retry.Continue, err
				}
				return retry.Break, err
			}
			// Truncate the request to our current range, making sure not to
			// touch it unless we have to (it is illegal to send EndKey on
			// commands which do not operate on ranges).
			if descNext != nil {
				args.Header().EndKey = desc.EndKey
				defer func() {
					// "Untruncate" EndKey to original.
					args.Header().EndKey = endKey
				}()
			}
			return ds.sendAttempt(desc, call)
		})

		// Immediately return if querying a range failed non-retryably.
		// For multi-range requests, we return the failing range's reply.
		if err != nil {
			call.Reply.Header().SetGoError(err)
			return
		}

		if finalReply != curReply {
			// This was the second or later call in a multi-range request.
			// Combine the new response with the existing one.
			if cFinalReply, ok := finalReply.(proto.Combinable); ok {
				cFinalReply.Combine(curReply)
			} else {
				// This should never apply in practice, as we'll only end up here
				// for range-spanning requests.
				call.Reply.Header().SetGoError(util.Errorf("multi-range request with non-combinable response type"))
				return
			}
		}

		// If this request has a bound, such as MaxResults in
		// ScanRequest, check whether enough rows have been retrieved.
		if boundedArgs != nil {
			if prevBound := boundedArgs.GetBound(); prevBound > 0 {
				if cReply, ok := curReply.(proto.Countable); ok {
					if nextBound := prevBound - cReply.Count(); nextBound > 0 {
						// Update bound for the next round.
						// We've deferred restoring the original bound earlier.
						boundedArgs.SetBound(nextBound)
					} else {
						// Set flag to break the loop.
						descNext = nil
					}
				}
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if descNext == nil {
			break
		}

		if curReply == finalReply {
			// This is the end of the first iteration in a multi-range query,
			// so it's a convenient place to clean up changes to the args in
			// the case of multi-range requests.
			// Reset original start key (the EndKey is taken care of without
			// defer above).
			defer func(k proto.Key) {
				args.Header().Key = k
			}(args.Header().Key)
		}

		// In next iteration, query next range.
		args.Header().Key = descNext.StartKey

		// This is a multi-range request, make a new reply object for
		// subsequent iterations of the loop.
		curReply = args.CreateReply()
	}
	call.Reply = finalReply
}
コード例 #6
0
// close sends resolve intent commands for all key ranges this
// transaction has covered, clears the keys cache and closes the
// metadata heartbeat. Any keys listed in the resolved slice have
// already been resolved and do not receive resolve intent commands.
func (tm *txnMetadata) close(txn *proto.Transaction, resolved []proto.Key, sender client.Sender, stopper *util.Stopper) {
	close(tm.txnEnd) // stop heartbeat
	if tm.keys.Len() > 0 {
		if log.V(2) {
			log.Infof("cleaning up %d intent(s) for transaction %s", tm.keys.Len(), txn)
		}
	}
	for _, o := range tm.keys.GetOverlaps(proto.KeyMin, proto.KeyMax) {
		// If the op was range based, end key != start key: resolve a range.
		var call proto.Call
		key := o.Key.Start().(proto.Key)
		endKey := o.Key.End().(proto.Key)
		if !key.Next().Equal(endKey) {
			call.Args = &proto.InternalResolveIntentRangeRequest{
				RequestHeader: proto.RequestHeader{
					Timestamp: txn.Timestamp,
					Key:       key,
					EndKey:    endKey,
					User:      storage.UserRoot,
					Txn:       txn,
				},
			}
			call.Reply = &proto.InternalResolveIntentRangeResponse{}
		} else {
			// Check if the key has already been resolved; skip if yes.
			found := false
			for _, k := range resolved {
				if key.Equal(k) {
					found = true
				}
			}
			if found {
				continue
			}
			call.Args = &proto.InternalResolveIntentRequest{
				RequestHeader: proto.RequestHeader{
					Timestamp: txn.Timestamp,
					Key:       key,
					User:      storage.UserRoot,
					Txn:       txn,
				},
			}
			call.Reply = &proto.InternalResolveIntentResponse{}
		}
		// We don't care about the reply channel; these are best
		// effort. We simply fire and forget, each in its own goroutine.
		if stopper.StartTask() {
			go func() {
				if log.V(2) {
					log.Infof("cleaning up intent %q for txn %s", call.Args.Header().Key, txn)
				}
				sender.Send(context.TODO(), call)
				if call.Reply.Header().Error != nil {
					log.Warningf("failed to cleanup %q intent: %s", call.Args.Header().Key, call.Reply.Header().GoError())
				}
				stopper.FinishTask()
			}()
		}
	}
	tm.keys.Clear()
}
コード例 #7
0
// close sends resolve intent commands for all key ranges this
// transaction has covered, clears the keys cache and closes the
// metadata heartbeat. Any keys listed in the resolved slice have
// already been resolved and do not receive resolve intent commands.
func (tm *txnMetadata) close(trace *tracer.Trace, txn *proto.Transaction, resolved []proto.Key, sender client.Sender, stopper *stop.Stopper) {
	close(tm.txnEnd) // stop heartbeat
	trace.Event("coordinator stops")
	if tm.keys.Len() > 0 {
		if log.V(2) {
			log.Infof("cleaning up %d intent(s) for transaction %s", tm.keys.Len(), txn)
		}
	}
	// TODO(tschottdorf): Should create a Batch here.
	for _, o := range tm.keys.GetOverlaps(proto.KeyMin, proto.KeyMax) {
		// If the op was range based, end key != start key: resolve a range.
		var call proto.Call
		key := o.Key.Start().(proto.Key)
		endKey := o.Key.End().(proto.Key)
		if !key.Next().Equal(endKey) {
			call.Args = &proto.InternalResolveIntentRangeRequest{
				RequestHeader: proto.RequestHeader{
					Timestamp: txn.Timestamp,
					Key:       key,
					EndKey:    endKey,
					User:      security.RootUser,
					Txn:       txn,
				},
			}
			call.Reply = &proto.InternalResolveIntentRangeResponse{}
		} else {
			// Check if the key has already been resolved; skip if yes.
			found := false
			for _, k := range resolved {
				if key.Equal(k) {
					found = true
				}
			}
			if found {
				continue
			}
			call.Args = &proto.InternalResolveIntentRequest{
				RequestHeader: proto.RequestHeader{
					Timestamp: txn.Timestamp,
					Key:       key,
					User:      security.RootUser,
					Txn:       txn,
				},
			}
			call.Reply = &proto.InternalResolveIntentResponse{}
		}
		// We don't care about the reply channel; these are best
		// effort. We simply fire and forget, each in its own goroutine.
		ctx := tracer.ToCtx(context.Background(), trace.Fork())
		stopper.RunAsyncTask(func() {
			if log.V(2) {
				log.Infof("cleaning up intent %q for txn %s", call.Args.Header().Key, txn)
			}
			sender.Send(ctx, call)
			if call.Reply.Header().Error != nil {
				log.Warningf("failed to cleanup %q intent: %s", call.Args.Header().Key, call.Reply.Header().GoError())
			}
		})
	}
	tm.keys.Clear()
}
コード例 #8
0
// resolve sends resolve intent commands for all key ranges this transaction
// has covered. Any keys listed in the resolved slice have already been
// resolved and are skipped.
func (tm *txnMetadata) resolve(trace *tracer.Trace, resolved []proto.Key, sender client.Sender) {
	txn := &tm.txn
	if tm.keys.Len() > 0 {
		if log.V(2) {
			log.Infof("cleaning up %d intent(s) for transaction %s", tm.keys.Len(), txn)
		}
	}
	// TODO(tschottdorf): Should create a Batch here. However, we're resolving
	// intents and if those are on meta records, there may be a certain order
	// in which they need to be resolved so that they can get routed to the
	// correct range. Since a batch runs its commands one by one and we don't
	// know the correct order, we prefer to fire them off in parallel.
	var wg sync.WaitGroup
	for _, o := range tm.keys.GetOverlaps(proto.KeyMin, proto.KeyMax) {
		// If the op was range based, end key != start key: resolve a range.
		var call proto.Call
		key := o.Key.Start().(proto.Key)
		endKey := o.Key.End().(proto.Key)
		if !key.Next().Equal(endKey) {
			call.Args = &proto.InternalResolveIntentRangeRequest{
				RequestHeader: proto.RequestHeader{
					Timestamp: txn.Timestamp,
					Key:       key,
					EndKey:    endKey,
					User:      security.RootUser,
					Txn:       txn,
				},
			}
			call.Reply = &proto.InternalResolveIntentRangeResponse{}
		} else {
			// Check if the key has already been resolved; skip if yes.
			found := false
			for _, k := range resolved {
				if key.Equal(k) {
					if log.V(2) {
						log.Warningf("skipping previously resolved intent at %q", k)
					}
					found = true
				}
			}
			if found {
				continue
			}
			call.Args = &proto.InternalResolveIntentRequest{
				RequestHeader: proto.RequestHeader{
					Timestamp: txn.Timestamp,
					Key:       key,
					User:      security.RootUser,
					Txn:       txn,
				},
			}
			call.Reply = &proto.InternalResolveIntentResponse{}
		}
		ctx := tracer.ToCtx(context.Background(), trace.Fork())
		if log.V(2) {
			log.Infof("cleaning up intent %q for txn %s", call.Args.Header().Key, txn)
		}
		// Each operation gets their own goroutine. We only want to return to
		// the caller after the operations have finished.
		wg.Add(1)
		go func() {
			sender.Send(ctx, call)
			wg.Done()
			if call.Reply.Header().Error != nil {
				log.Warningf("failed to cleanup %q intent: %s", call.Args.Header().Key, call.Reply.Header().GoError())
			}
		}()
	}
	defer trace.Epoch("waiting for intent resolution")()
	wg.Wait()
	tm.keys.Clear()
}