Example #1
0
// sendAttempt is invoked by Send. It temporarily truncates the arguments to
// match the descriptor's EndKey (if necessary) and gathers and rearranges the
// replicas before making a single attempt at sending the request. It returns
// the result of sending the RPC; a potential error contained in the reply has
// to be handled separately by the caller.
func (ds *DistSender) sendAttempt(trace *tracer.Trace, args proto.Request, reply proto.Response, desc *proto.RangeDescriptor) error {
	defer trace.Epoch("sending RPC")()
	// Truncate the request to our current range, making sure not to
	// touch it unless we have to (it is illegal to send EndKey on
	// commands which do not operate on ranges).
	if endKey := args.Header().EndKey; endKey != nil && !endKey.Less(desc.EndKey) {
		defer func(k proto.Key) { args.Header().EndKey = k }(endKey)
		args.Header().EndKey = desc.EndKey
	}
	leader := ds.leaderCache.Lookup(proto.RaftID(desc.RaftID))

	// Try to send the call.
	replicas := newReplicaSlice(ds.gossip, desc)

	// Rearrange the replicas so that those replicas with long common
	// prefix of attributes end up first. If there's no prefix, this is a
	// no-op.
	order := ds.optimizeReplicaOrder(replicas)

	// If this request needs to go to a leader and we know who that is, move
	// it to the front.
	if !(proto.IsRead(args) && args.Header().ReadConsistency == proto.INCONSISTENT) &&
		leader.StoreID > 0 {
		if i := replicas.FindReplica(leader.StoreID); i >= 0 {
			replicas.MoveToFront(i)
			order = rpc.OrderStable
		}
	}

	return ds.sendRPC(trace, desc.RaftID, replicas, order, args, reply)
}
Example #2
0
// Send implements the client.Sender interface. It verifies
// permissions and looks up the appropriate range based on the
// supplied key and sends the RPC according to the specified options.
//
// If the request spans multiple ranges (which is possible for Scan or
// DeleteRange requests), Send sends requests to the individual ranges
// sequentially and combines the results transparently.
//
// This may temporarily adjust the request headers, so the proto.Call
// must not be used concurrently until Send has returned.
func (ds *DistSender) Send(_ context.Context, call proto.Call) {
	args := call.Args
	finalReply := call.Reply

	// Verify permissions.
	if err := ds.verifyPermissions(call.Args); err != nil {
		call.Reply.Header().SetGoError(err)
		return
	}

	// In the event that timestamp isn't set and read consistency isn't
	// required, set the timestamp using the local clock.
	if args.Header().ReadConsistency == proto.INCONSISTENT && args.Header().Timestamp.Equal(proto.ZeroTimestamp) {
		// Make sure that after the call, args hasn't changed.
		defer func(timestamp proto.Timestamp) {
			args.Header().Timestamp = timestamp
		}(args.Header().Timestamp)
		args.Header().Timestamp = ds.clock.Now()
	}

	// If this is a bounded request, we will change its bound as we receive
	// replies. This undoes that when we return.
	boundedArgs, argsBounded := args.(proto.Bounded)

	if argsBounded {
		defer func(bound int64) {
			boundedArgs.SetBound(bound)
		}(boundedArgs.GetBound())
	}

	defer func(key proto.Key) {
		args.Header().Key = key
	}(args.Header().Key)

	// Retry logic for lookup of range by key and RPCs to range replicas.
	curReply := finalReply
	for {
		call.Reply = curReply
		curReply.Header().Reset()

		var desc, descNext *proto.RangeDescriptor
		var err error
		for r := retry.Start(ds.rpcRetryOptions); r.Next(); {
			// Get range descriptor (or, when spanning range, descriptors).
			// sendAttempt below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			desc, descNext, err = ds.getDescriptors(call)
			// getDescriptors may fail retryably if the first range isn't
			// available via Gossip.
			if err != nil {
				if rErr, ok := err.(util.Retryable); ok && rErr.CanRetry() {
					if log.V(1) {
						log.Warning(err)
					}
					continue
				}
				break
			}
			err = func() error {
				// Truncate the request to our current range, making sure not to
				// touch it unless we have to (it is illegal to send EndKey on
				// commands which do not operate on ranges).
				if descNext != nil {
					defer func(endKey proto.Key) {
						args.Header().EndKey = endKey
					}(args.Header().EndKey)
					args.Header().EndKey = desc.EndKey
				}
				leader := ds.leaderCache.Lookup(proto.RaftID(desc.RaftID))

				// Try to send the call.
				replicas := newReplicaSlice(ds.gossip, desc)

				// Rearrange the replicas so that those replicas with long common
				// prefix of attributes end up first. If there's no prefix, this is a
				// no-op.
				order := ds.optimizeReplicaOrder(replicas)

				// If this request needs to go to a leader and we know who that is, move
				// it to the front.
				if !(proto.IsRead(args) && args.Header().ReadConsistency == proto.INCONSISTENT) &&
					leader.StoreID > 0 {
					if i := replicas.FindReplica(leader.StoreID); i >= 0 {
						replicas.MoveToFront(i)
						order = rpc.OrderStable
					}
				}

				return ds.sendRPC(desc.RaftID, replicas, order, args, curReply)
			}()
			if err != nil {
				// For an RPC error to occur, we must've been unable to contact any
				// replicas. In this case, likely all nodes are down (or not getting back
				// to us within a reasonable amount of time).
				// We may simply not be trying to talk to the up-to-date replicas, so
				// clearing the descriptor here should be a good idea.
				// TODO(tschottdorf): If a replica group goes dead, this will cause clients
				// to put high read pressure on the first range, so there should be some
				// rate limiting here.
				ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc)
			} else {
				err = curReply.Header().GoError()
			}

			if err != nil {
				if log.V(1) {
					log.Warningf("failed to invoke %s: %s", call.Method(), err)
				}

				// If retryable, allow retry. For range not found or range
				// key mismatch errors, we don't backoff on the retry,
				// but reset the backoff loop so we can retry immediately.
				switch tErr := err.(type) {
				case *proto.RangeNotFoundError, *proto.RangeKeyMismatchError:
					// Range descriptor might be out of date - evict it.
					ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc)
					// On addressing errors, don't backoff; retry immediately.
					r.Reset()
					if log.V(1) {
						log.Warning(err)
					}
					continue
				case *proto.NotLeaderError:
					newLeader := tErr.GetLeader()
					// Verify that leader is a known replica according to the
					// descriptor. If not, we've got a stale replica; evict cache.
					// Next, cache the new leader.
					if newLeader != nil {
						if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 {
							if log.V(1) {
								log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc)
							}
							ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc)
						}
					} else {
						newLeader = &proto.Replica{}
					}
					ds.updateLeaderCache(proto.RaftID(desc.RaftID), *newLeader)
					if log.V(1) {
						log.Warning(err)
					}
					r.Reset()
					continue
				case util.Retryable:
					if tErr.CanRetry() {
						if log.V(1) {
							log.Warning(err)
						}
						continue
					}
				}
			}
			break
		}

		// Immediately return if querying a range failed non-retryably.
		// For multi-range requests, we return the failing range's reply.
		if err != nil {
			call.Reply.Header().SetGoError(err)
			return
		}

		if finalReply != curReply {
			// This was the second or later call in a multi-range request.
			// Combine the new response with the existing one.
			if cFinalReply, ok := finalReply.(proto.Combinable); ok {
				cFinalReply.Combine(curReply)
			} else {
				// This should never apply in practice, as we'll only end up here
				// for range-spanning requests.
				call.Reply.Header().SetGoError(util.Errorf("multi-range request with non-combinable response type"))
				return
			}
		}

		// If this request has a bound, such as MaxResults in
		// ScanRequest, check whether enough rows have been retrieved.
		if argsBounded {
			if prevBound := boundedArgs.GetBound(); prevBound > 0 {
				if cReply, ok := curReply.(proto.Countable); ok {
					if nextBound := prevBound - cReply.Count(); nextBound > 0 {
						// Update bound for the next round.
						// We've deferred restoring the original bound earlier.
						boundedArgs.SetBound(nextBound)
					} else {
						// Set flag to break the loop.
						descNext = nil
					}
				}
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if descNext == nil {
			break
		}

		// In next iteration, query next range.
		// It's important that we use the EndKey of the current descriptor
		// as opposed to the StartKey of the next one: if the former is stale,
		// it's possible that the next range has since merged the subsequent
		// one, and unless both descriptors are stale, the next descriptor's
		// StartKey would move us to the beginning of the current range,
		// resulting in a duplicate scan.
		args.Header().Key = desc.EndKey

		// This is a multi-range request, make a new reply object for
		// subsequent iterations of the loop.
		curReply = args.CreateReply()
	}
	call.Reply = finalReply
}
Example #3
0
// verifyPermissions verifies that the requesting user (header.User)
// has permission to read/write (capabilities depend on method
// name). In the event that multiple permission configs apply to the
// key range implicated by the command, the lowest common denominator
// for permission. For example, if a scan crosses two permission
// configs, both configs must allow read permissions or the entire
// scan will fail.
func (ds *DistSender) verifyPermissions(args proto.Request) error {
	// The root user can always proceed.
	header := args.Header()
	if header.User == storage.UserRoot {
		return nil
	}
	// Check for admin methods.
	if proto.IsAdmin(args) {
		if header.User != storage.UserRoot {
			return util.Errorf("user %q cannot invoke admin command %s", header.User, args.Method())
		}
		return nil
	}
	// Get permissions map from gossip.
	configMap, err := ds.gossip.GetInfo(gossip.KeyConfigPermission)
	if err != nil {
		return util.Errorf("permissions not available via gossip")
	}
	if configMap == nil {
		return util.Errorf("perm configs not available; cannot execute %s", args.Method())
	}
	permMap := configMap.(storage.PrefixConfigMap)
	headerEnd := header.EndKey
	if len(headerEnd) == 0 {
		headerEnd = header.Key
	}
	// Visit PermConfig(s) which apply to the method's key range.
	//   - For each perm config which the range covers, verify read or writes
	//     are allowed as method requires.
	//   - Verify the permissions hierarchically; that is, if permissions aren't
	//     granted at the longest prefix, try next longest, then next, etc., up
	//     to and including the default prefix.
	//
	// TODO(spencer): it might make sense to visit prefixes from the
	//   shortest to longest instead for performance. Keep an eye on profiling
	//   for this code path as permission sets grow large.
	return permMap.VisitPrefixes(header.Key, headerEnd,
		func(start, end proto.Key, config interface{}) (bool, error) {
			hasPerm := false
			if err := permMap.VisitPrefixesHierarchically(start, func(start, end proto.Key, config interface{}) (bool, error) {
				perm := config.(*proto.PermConfig)
				if proto.IsRead(args) && !perm.CanRead(header.User) {
					return false, nil
				}
				if proto.IsWrite(args) && !perm.CanWrite(header.User) {
					return false, nil
				}
				// Return done = true, as permissions have been granted by this config.
				hasPerm = true
				return true, nil
			}); err != nil {
				return false, err
			}
			if !hasPerm {
				if len(header.EndKey) == 0 {
					return false, util.Errorf("user %q cannot invoke %s at %q", header.User, args.Method(), start)
				}
				return false, util.Errorf("user %q cannot invoke %s at %q-%q", header.User, args.Method(), start, end)
			}
			return false, nil
		})
}
Example #4
0
// sendAttempt is invoked by Send and handles retry logic and cache eviction
// for a call sent to a single range. It returns a retry status, which is Break
// on success and either Break, Continue or Reset depending on error condition.
// This method is expected to be invoked from within a backoff / retry loop to
// retry the send repeatedly (e.g. to continue processing after a critical node
// becomes available after downtime or the range descriptor is refreshed via
// lookup).
func (ds *DistSender) sendAttempt(desc *proto.RangeDescriptor, call proto.Call) (retry.Status, error) {
	leader := ds.leaderCache.Lookup(proto.RaftID(desc.RaftID))

	// Try to send the call.
	replicas := newReplicaSlice(ds.gossip, desc)

	// Rearrange the replicas so that those replicas with long common
	// prefix of attributes end up first. If there's no prefix, this is a
	// no-op.
	order := ds.optimizeReplicaOrder(replicas)

	args := call.Args
	reply := call.Reply

	// If this request needs to go to a leader and we know who that is, move
	// it to the front.
	if !(proto.IsRead(args) && args.Header().ReadConsistency == proto.INCONSISTENT) &&
		leader.StoreID > 0 {
		if i := replicas.FindReplica(leader.StoreID); i >= 0 {
			replicas.MoveToFront(i)
			order = rpc.OrderStable
		}
	}

	err := ds.sendRPC(desc.RaftID, replicas, order, args, reply)
	if err != nil {
		// For an RPC error to occur, we must've been unable to contact any
		// replicas. In this case, likely all nodes are down (or not getting back
		// to us within a reasonable amount of time).
		// We may simply not be trying to talk to the up-to-date replicas, so
		// clearing the descriptor here should be a good idea.
		// TODO(tschottdorf): If a replica group goes dead, this will cause clients
		// to put high read pressure on the first range, so there should be some
		// rate limiting here.
		ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc)
	} else {
		err = reply.Header().GoError()
	}

	if err != nil {
		if log.V(1) {
			log.Warningf("failed to invoke %s: %s", call.Method(), err)
		}

		// If retryable, allow retry. For range not found or range
		// key mismatch errors, we don't backoff on the retry,
		// but reset the backoff loop so we can retry immediately.
		switch tErr := err.(type) {
		case *proto.RangeNotFoundError, *proto.RangeKeyMismatchError:
			// Range descriptor might be out of date - evict it.
			ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc)
			// On addressing errors, don't backoff; retry immediately.
			return retry.Reset, err
		case *proto.NotLeaderError:
			newLeader := tErr.GetLeader()
			// Verify that leader is a known replica according to the
			// descriptor. If not, we've got a stale replica; evict cache.
			// Next, cache the new leader.
			if newLeader != nil {
				if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 {
					if log.V(1) {
						log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc)
					}
					ds.rangeCache.EvictCachedRangeDescriptor(args.Header().Key, desc)
				}
			} else {
				newLeader = &proto.Replica{}
			}
			ds.updateLeaderCache(proto.RaftID(desc.RaftID), *newLeader)
			return retry.Reset, err
		case util.Retryable:
			if tErr.CanRetry() {
				return retry.Continue, err
			}
		}
		return retry.Break, err
	}
	return retry.Break, nil
}
// TestVerifyPermissions verifies permissions are checked for single
// zones and across multiple zones. It also verifies that permissions
// are checked hierarchically.
func TestVerifyPermissions(t *testing.T) {
	defer leaktest.AfterTest(t)
	n := simulation.NewNetwork(1, "tcp", gossip.TestInterval)
	ds := NewDistSender(nil, n.Nodes[0].Gossip)
	config1 := &proto.PermConfig{
		Read:  []string{"read1", "readAll", "rw1", "rwAll"},
		Write: []string{"write1", "writeAll", "rw1", "rwAll"}}
	config2 := &proto.PermConfig{
		Read:  []string{"read2", "readAll", "rw2", "rwAll"},
		Write: []string{"write2", "writeAll", "rw2", "rwAll"}}
	configs := []*storage.PrefixConfig{
		{proto.KeyMin, nil, config1},
		{proto.Key("a"), nil, config2},
	}
	configMap, err := storage.NewPrefixConfigMap(configs)
	if err != nil {
		t.Fatalf("failed to make prefix config map, err: %s", err.Error())
	}
	if err := ds.gossip.AddInfo(gossip.KeyConfigPermission, configMap, time.Hour); err != nil {
		t.Fatal(err)
	}

	allRequestTypes := []proto.Request{
		&proto.GetRequest{},
		&proto.PutRequest{},
		&proto.ConditionalPutRequest{},
		&proto.IncrementRequest{},
		&proto.DeleteRequest{},
		&proto.DeleteRangeRequest{},
		&proto.ScanRequest{},
		&proto.EndTransactionRequest{},
		&proto.BatchRequest{},
		&proto.AdminSplitRequest{},
		&proto.AdminMergeRequest{},
		&proto.InternalHeartbeatTxnRequest{},
		&proto.InternalGCRequest{},
		&proto.InternalPushTxnRequest{},
		&proto.InternalRangeLookupRequest{},
		&proto.InternalResolveIntentRequest{},
		&proto.InternalResolveIntentRangeRequest{},
		&proto.InternalMergeRequest{},
		&proto.InternalTruncateLogRequest{},
		&proto.InternalLeaderLeaseRequest{},
		&proto.InternalBatchRequest{},
	}

	var readOnlyRequests []proto.Request
	var writeOnlyRequests []proto.Request
	var readWriteRequests []proto.Request

	for _, r := range allRequestTypes {
		if proto.IsRead(r) && !proto.IsWrite(r) {
			readOnlyRequests = append(readOnlyRequests, r)
		}
		if proto.IsWrite(r) && !proto.IsRead(r) {
			writeOnlyRequests = append(writeOnlyRequests, r)
		}
		if proto.IsRead(r) && proto.IsWrite(r) {
			readWriteRequests = append(readWriteRequests, r)
		}
	}

	testData := []struct {
		// Permission-based db methods from the storage package.
		requests         []proto.Request
		user             string
		startKey, endKey proto.Key
		hasPermission    bool
	}{
		// Test permissions within a single range
		{readOnlyRequests, "read1", proto.KeyMin, proto.KeyMin, true},
		{readOnlyRequests, "rw1", proto.KeyMin, proto.KeyMin, true},
		{readOnlyRequests, "write1", proto.KeyMin, proto.KeyMin, false},
		{readOnlyRequests, "random", proto.KeyMin, proto.KeyMin, false},
		{readWriteRequests, "rw1", proto.KeyMin, proto.KeyMin, true},
		{readWriteRequests, "read1", proto.KeyMin, proto.KeyMin, false},
		{readWriteRequests, "write1", proto.KeyMin, proto.KeyMin, false},
		{writeOnlyRequests, "write1", proto.KeyMin, proto.KeyMin, true},
		{writeOnlyRequests, "rw1", proto.KeyMin, proto.KeyMin, true},
		{writeOnlyRequests, "read1", proto.KeyMin, proto.KeyMin, false},
		{writeOnlyRequests, "random", proto.KeyMin, proto.KeyMin, false},
		// Test permissions hierarchically.
		{readOnlyRequests, "read1", proto.Key("a"), proto.Key("a1"), true},
		{readWriteRequests, "rw1", proto.Key("a"), proto.Key("a1"), true},
		{writeOnlyRequests, "write1", proto.Key("a"), proto.Key("a1"), true},
		// Test permissions across both ranges.
		{readOnlyRequests, "readAll", proto.KeyMin, proto.Key("b"), true},
		{readOnlyRequests, "read1", proto.KeyMin, proto.Key("b"), true},
		{readOnlyRequests, "read2", proto.KeyMin, proto.Key("b"), false},
		{readOnlyRequests, "random", proto.KeyMin, proto.Key("b"), false},
		{readWriteRequests, "rwAll", proto.KeyMin, proto.Key("b"), true},
		{readWriteRequests, "rw1", proto.KeyMin, proto.Key("b"), true},
		{readWriteRequests, "random", proto.KeyMin, proto.Key("b"), false},
		{writeOnlyRequests, "writeAll", proto.KeyMin, proto.Key("b"), true},
		{writeOnlyRequests, "write1", proto.KeyMin, proto.Key("b"), true},
		{writeOnlyRequests, "write2", proto.KeyMin, proto.Key("b"), false},
		{writeOnlyRequests, "random", proto.KeyMin, proto.Key("b"), false},
		// Test permissions within and around the boundaries of a range,
		// representatively using rw methods.
		{readWriteRequests, "rw2", proto.Key("a"), proto.Key("b"), true},
		{readWriteRequests, "rwAll", proto.Key("a"), proto.Key("b"), true},
		{readWriteRequests, "rw2", proto.Key("a"), proto.Key("a"), true},
		{readWriteRequests, "rw2", proto.Key("a"), proto.Key("a1"), true},
		{readWriteRequests, "rw2", proto.Key("a"), proto.Key("b1"), false},
		{readWriteRequests, "rw2", proto.Key("a3"), proto.Key("a4"), true},
		{readWriteRequests, "rw2", proto.Key("a3"), proto.Key("b1"), false},
	}

	for i, test := range testData {
		for _, r := range test.requests {
			*r.Header() = proto.RequestHeader{
				User:   test.user,
				Key:    test.startKey,
				EndKey: test.endKey,
			}
			err := ds.verifyPermissions(r)
			if err != nil && test.hasPermission {
				t.Errorf("test %d: user %s should have had permission to %s, err: %s",
					i, test.user, r.Method(), err.Error())
				break
			} else if err == nil && !test.hasPermission {
				t.Errorf("test %d: user %s should not have had permission to %s",
					i, test.user, r.Method())
				break
			}
		}
	}
	n.Stop()
}