예제 #1
0
파일: range.go 프로젝트: maijia/cockroach
// AddCmd adds a command for execution on this range. The command's
// affected keys are verified to be contained within the range and the
// range's leadership is confirmed. The command is then dispatched
// either along the read-only execution path or the read-write Raft
// command queue.
func (r *Range) AddCmd(ctx context.Context, call proto.Call) error {
	args := call.Args
	// TODO(tschottdorf) Some (internal) requests go here directly, so they
	// won't be traced.
	trace := tracer.FromCtx(ctx)
	// Differentiate between admin, read-only and read-write.
	var reply proto.Response
	var err error
	if proto.IsAdmin(args) {
		defer trace.Epoch("admin path")()
		reply, err = r.addAdminCmd(ctx, args)
	} else if proto.IsReadOnly(args) {
		defer trace.Epoch("read-only path")()
		reply, err = r.addReadOnlyCmd(ctx, args)
	} else if proto.IsWrite(args) {
		defer trace.Epoch("read-write path")()
		reply, err = r.addWriteCmd(ctx, args, nil)
	} else {
		panic(fmt.Sprintf("don't know how to handle command %T", args))
	}

	if reply != nil {
		gogoproto.Merge(call.Reply, reply)
	}

	if err != nil {
		replyHeader := call.Reply.Header()
		if replyHeader.Error != nil {
			panic("the world is on fire")
		}
		replyHeader.SetGoError(err)
	}

	return err
}
예제 #2
0
// sendAttempt gathers and rearranges the replicas, and makes an RPC call.
func (ds *DistSender) sendAttempt(trace *tracer.Trace, ba proto.BatchRequest, desc *proto.RangeDescriptor) (*proto.BatchResponse, error) {
	defer trace.Epoch("sending RPC")()

	leader := ds.leaderCache.Lookup(proto.RangeID(desc.RangeID))

	// Try to send the call.
	replicas := newReplicaSlice(ds.gossip, desc)

	// Rearrange the replicas so that those replicas with long common
	// prefix of attributes end up first. If there's no prefix, this is a
	// no-op.
	order := ds.optimizeReplicaOrder(replicas)

	// If this request needs to go to a leader and we know who that is, move
	// it to the front.
	if !(proto.IsReadOnly(&ba) && ba.ReadConsistency == proto.INCONSISTENT) &&
		leader.StoreID > 0 {
		if i := replicas.FindReplica(leader.StoreID); i >= 0 {
			replicas.MoveToFront(i)
			order = rpc.OrderStable
		}
	}

	// TODO(tschottdorf) &ba -> ba
	resp, err := ds.sendRPC(trace, desc.RangeID, replicas, order, &ba)
	if err != nil {
		return nil, err
	}
	// Untangle the error from the received response.
	br := resp.(*proto.BatchResponse)
	err = br.GoError()
	br.Error = nil
	return br, err
}
예제 #3
0
// AddCmd adds a command for execution on this range. The command's
// affected keys are verified to be contained within the range and the
// range's leadership is confirmed. The command is then dispatched
// either along the read-only execution path or the read-write Raft
// command queue.
func (r *Range) AddCmd(ctx context.Context, call proto.Call) error {
	args, reply := call.Args, call.Reply

	// Differentiate between admin, read-only and read-write.
	if proto.IsAdmin(args) {
		return r.addAdminCmd(ctx, args, reply)
	} else if proto.IsReadOnly(args) {
		return r.addReadOnlyCmd(ctx, args, reply)
	}
	return r.addWriteCmd(ctx, args, reply, nil)
}
예제 #4
0
// AddCmd adds a command for execution on this range. The command's
// affected keys are verified to be contained within the range and the
// range's leadership is confirmed. The command is then dispatched
// either along the read-only execution path or the read-write Raft
// command queue.
func (r *Range) AddCmd(ctx context.Context, call proto.Call) error {
	args, reply := call.Args, call.Reply
	// TODO(tschottdorf) Some (internal) requests go here directly, so they
	// won't be traced.
	trace := tracer.FromCtx(ctx)
	// Differentiate between admin, read-only and read-write.
	if proto.IsAdmin(args) {
		defer trace.Epoch("admin path")()
		return r.addAdminCmd(ctx, args, reply)
	} else if proto.IsReadOnly(args) {
		defer trace.Epoch("read path")()
		return r.addReadOnlyCmd(ctx, args, reply)
	}
	return r.addWriteCmd(ctx, args, reply, nil)
}
예제 #5
0
파일: range.go 프로젝트: huaxling/cockroach
// AddCmd adds a command for execution on this range. The command's
// affected keys are verified to be contained within the range and the
// range's leadership is confirmed. The command is then dispatched
// either along the read-only execution path or the read-write Raft
// command queue. If wait is false, read-write commands are added to
// Raft without waiting for their completion.
func (r *Range) AddCmd(ctx context.Context, call proto.Call, wait bool) error {
	args, reply := call.Args, call.Reply
	header := args.Header()
	if !r.ContainsKeyRange(header.Key, header.EndKey) {
		err := proto.NewRangeKeyMismatchError(header.Key, header.EndKey, r.Desc())
		reply.Header().SetGoError(err)
		return err
	}

	// Differentiate between admin, read-only and read-write.
	if proto.IsAdmin(args) {
		return r.addAdminCmd(ctx, args, reply)
	} else if proto.IsReadOnly(args) {
		return r.addReadOnlyCmd(ctx, args, reply)
	}
	return r.addWriteCmd(ctx, args, reply, wait)
}
예제 #6
0
// AddCmd adds a command for execution on this range. The command's
// affected keys are verified to be contained within the range and the
// range's leadership is confirmed. The command is then dispatched
// either along the read-only execution path or the read-write Raft
// command queue.
func (r *Replica) AddCmd(ctx context.Context, args proto.Request) (proto.Response, error) {
	// TODO(tschottdorf) Some (internal) requests go here directly, so they
	// won't be traced.
	trace := tracer.FromCtx(ctx)
	// Differentiate between admin, read-only and read-write.
	var reply proto.Response
	var err error
	if proto.IsAdmin(args) {
		defer trace.Epoch("admin path")()
		reply, err = r.addAdminCmd(ctx, args)
	} else if proto.IsReadOnly(args) {
		defer trace.Epoch("read-only path")()
		reply, err = r.addReadOnlyCmd(ctx, args)
	} else if proto.IsWrite(args) {
		defer trace.Epoch("read-write path")()
		reply, err = r.addWriteCmd(ctx, args, nil)
	} else {
		panic(fmt.Sprintf("don't know how to handle command %T", args))
	}

	return reply, err
}
예제 #7
0
// sendOne sends a single call via the wrapped sender. If the call is
// part of a transaction, the TxnCoordSender adds the transaction to a
// map of active transactions and begins heartbeating it. Every
// subsequent call for the same transaction updates the lastUpdate
// timestamp to prevent live transactions from being considered
// abandoned and garbage collected. Read/write mutating requests have
// their key or key range added to the transaction's interval tree of
// key ranges for eventual cleanup via resolved write intents.
//
// On success, and if the call is part of a transaction, the affected
// key range is recorded as live intents for eventual cleanup upon
// transaction commit. Upon successful txn commit, initiates cleanup
// of intents.
func (tc *TxnCoordSender) sendOne(ctx context.Context, call proto.Call) {
	var startNS int64
	header := call.Args.Header()
	trace := tracer.FromCtx(ctx)
	var id string // optional transaction ID
	if header.Txn != nil {
		// If this call is part of a transaction...
		id = string(header.Txn.ID)
		// Verify that if this Transaction is not read-only, we have it on
		// file. If not, refuse writes - the client must have issued a write on
		// another coordinator previously.
		if header.Txn.Writing && proto.IsTransactionWrite(call.Args) {
			tc.Lock()
			_, ok := tc.txns[id]
			tc.Unlock()
			if !ok {
				call.Reply.Header().SetGoError(util.Errorf(
					"transaction must not write on multiple coordinators"))
				return
			}
		}

		// Set the timestamp to the original timestamp for read-only
		// commands and to the transaction timestamp for read/write
		// commands.
		if proto.IsReadOnly(call.Args) {
			header.Timestamp = header.Txn.OrigTimestamp
		} else {
			header.Timestamp = header.Txn.Timestamp
		}

		if args, ok := call.Args.(*proto.EndTransactionRequest); ok {
			// Remember when EndTransaction started in case we want to
			// be linearizable.
			startNS = tc.clock.PhysicalNow()
			// EndTransaction must have its key set to that of the txn.
			header.Key = header.Txn.Key
			if len(args.Intents) > 0 {
				// TODO(tschottdorf): it may be useful to allow this later.
				// That would be part of a possible plan to allow txns which
				// write on multiple coordinators.
				call.Reply.Header().SetGoError(util.Errorf(
					"client must not pass intents to EndTransaction"))
				return
			}
			tc.Lock()
			txnMeta, metaOK := tc.txns[id]
			if id != "" && metaOK {
				args.Intents = txnMeta.intents()
			}
			tc.Unlock()

			if !metaOK {
				// If we don't have the transaction, then this must be a retry
				// by the client. We can no longer reconstruct a correct
				// request so we must fail.
				//
				// TODO(bdarnell): if we had a GetTransactionStatus API then
				// we could lookup the transaction and return either nil or
				// TransactionAbortedError instead of this ambivalent error.
				call.Reply.Header().SetGoError(util.Errorf(
					"transaction is already committed or aborted"))
				return
			} else if len(args.Intents) == 0 {
				// If there aren't any intents, then there's factually no
				// transaction to end. Read-only txns have all of their state in
				// the client.
				call.Reply.Header().SetGoError(util.Errorf(
					"cannot commit a read-only transaction"))
				return
			}
		}
	}

	// Send the command through wrapped sender.
	tc.wrapped.Send(ctx, call)

	// For transactional calls, need to track & update the transaction.
	if header.Txn != nil {
		respHeader := call.Reply.Header()
		if respHeader.Txn == nil {
			// When empty, simply use the request's transaction.
			// This is expected: the Range doesn't bother copying unless the
			// object changes.
			respHeader.Txn = gogoproto.Clone(header.Txn).(*proto.Transaction)
		}
		tc.updateResponseTxn(header, respHeader)
	}

	if txn := call.Reply.Header().Txn; txn != nil {
		if !header.Txn.Equal(txn) {
			panic("transaction ID changed")
		}
		tc.Lock()
		txnMeta := tc.txns[id]
		// If this transactional command leaves transactional intents, add the key
		// or key range to the intents map. If the transaction metadata doesn't yet
		// exist, create it.
		if call.Reply.Header().GoError() == nil {
			if proto.IsTransactionWrite(call.Args) {
				if txnMeta == nil {
					txn.Writing = true
					trace.Event("coordinator spawns")
					txnMeta = &txnMetadata{
						txn:              *txn,
						keys:             cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}),
						firstUpdateNanos: tc.clock.PhysicalNow(),
						lastUpdateNanos:  tc.clock.PhysicalNow(),
						timeoutDuration:  tc.clientTimeout,
						txnEnd:           make(chan struct{}),
					}
					tc.txns[id] = txnMeta
					if !tc.stopper.RunAsyncTask(func() {
						tc.heartbeatLoop(id)
					}) {
						// The system is already draining and we can't start the
						// heartbeat. We refuse new transactions for now because
						// they're likely not going to have all intents committed.
						// In principle, we can relax this as needed though.
						call.Reply.Header().SetGoError(&proto.NodeUnavailableError{})
						tc.Unlock()
						tc.unregisterTxn(id)
						return
					}
				}
				txnMeta.addKeyRange(header.Key, header.EndKey)
			}
			// Update our record of this transaction.
			if txnMeta != nil {
				txnMeta.txn = *txn
				txnMeta.setLastUpdate(tc.clock.PhysicalNow())
			}
		}
		tc.Unlock()
	}

	// Cleanup intents and transaction map if end of transaction.
	switch t := call.Reply.Header().GoError().(type) {
	case *proto.TransactionStatusError:
		// Likely already committed or more obscure errors such as epoch or
		// timestamp regressions; consider it dead.
		tc.cleanupTxn(trace, t.Txn)
	case *proto.TransactionAbortedError:
		// If already aborted, cleanup the txn on this TxnCoordSender.
		tc.cleanupTxn(trace, t.Txn)
	case *proto.OpRequiresTxnError:
		// Run a one-off transaction with that single command.
		if log.V(1) {
			log.Infof("%s: auto-wrapping in txn and re-executing", call.Method())
		}
		// TODO(tschottdorf): this part is awkward. Consider resending here
		// without starting a new call, which is hard to trace. Plus, the
		// below depends on default configuration.
		tmpDB, err := client.Open(
			fmt.Sprintf("//%s?priority=%d",
				call.Args.Header().User, call.Args.Header().GetUserPriority()),
			client.SenderOpt(tc))
		if err != nil {
			log.Warning(err)
			return
		}
		call.Reply.Reset()
		if err := tmpDB.Txn(func(txn *client.Txn) error {
			txn.SetDebugName("auto-wrap", 0)
			b := &client.Batch{}
			b.InternalAddCall(call)
			return txn.CommitInBatch(b)
		}); err != nil {
			log.Warning(err)
		}
	case nil:
		if txn := call.Reply.Header().Txn; txn != nil {
			if _, ok := call.Args.(*proto.EndTransactionRequest); ok {
				// If the --linearizable flag is set, we want to make sure that
				// all the clocks in the system are past the commit timestamp
				// of the transaction. This is guaranteed if either
				// - the commit timestamp is MaxOffset behind startNS
				// - MaxOffset ns were spent in this function
				// when returning to the client. Below we choose the option
				// that involves less waiting, which is likely the first one
				// unless a transaction commits with an odd timestamp.
				if tsNS := txn.Timestamp.WallTime; startNS > tsNS {
					startNS = tsNS
				}
				sleepNS := tc.clock.MaxOffset() -
					time.Duration(tc.clock.PhysicalNow()-startNS)
				if tc.linearizable && sleepNS > 0 {
					defer func() {
						if log.V(1) {
							log.Infof("%v: waiting %s on EndTransaction for linearizability", txn.Short(), util.TruncateDuration(sleepNS, time.Millisecond))
						}
						time.Sleep(sleepNS)
					}()
				}
				if txn.Status != proto.PENDING {
					tc.cleanupTxn(trace, *txn)
				}

			}
		}
	}
}
예제 #8
0
// sendOne sends a single call via the wrapped sender. If the call is
// part of a transaction, the TxnCoordSender adds the transaction to a
// map of active transactions and begins heartbeating it. Every
// subsequent call for the same transaction updates the lastUpdate
// timestamp to prevent live transactions from being considered
// abandoned and garbage collected. Read/write mutating requests have
// their key or key range added to the transaction's interval tree of
// key ranges for eventual cleanup via resolved write intents.
//
// On success, and if the call is part of a transaction, the affected
// key range is recorded as live intents for eventual cleanup upon
// transaction commit. Upon successful txn commit, initiates cleanup
// of intents.
func (tc *TxnCoordSender) sendOne(call proto.Call) {
	var startNS int64
	header := call.Args.Header()
	// If this call is part of a transaction...
	if header.Txn != nil {
		// Set the timestamp to the original timestamp for read-only
		// commands and to the transaction timestamp for read/write
		// commands.
		if proto.IsReadOnly(call.Args) {
			header.Timestamp = header.Txn.OrigTimestamp
		} else {
			header.Timestamp = header.Txn.Timestamp
		}
		// EndTransaction must have its key set to that of the txn.
		if _, ok := call.Args.(*proto.EndTransactionRequest); ok {
			header.Key = header.Txn.Key
			// Remember when EndTransaction started in case we want to
			// be linearizable.
			startNS = tc.clock.PhysicalNow()
		}
	}

	// Send the command through wrapped sender.
	tc.wrapped.Send(context.TODO(), call)

	if header.Txn != nil {
		// If not already set, copy the request txn.
		if call.Reply.Header().Txn == nil {
			call.Reply.Header().Txn = gogoproto.Clone(header.Txn).(*proto.Transaction)
		}
		tc.updateResponseTxn(header, call.Reply.Header())
	}

	if txn := call.Reply.Header().Txn; txn != nil {
		tc.Lock()
		txnMeta := tc.txns[string(txn.ID)]
		// If this transactional command leaves transactional intents, add the key
		// or key range to the intents map. If the transaction metadata doesn't yet
		// exist, create it.
		if call.Reply.Header().GoError() == nil {
			if proto.IsTransactionWrite(call.Args) {
				if txnMeta == nil {
					txnMeta = &txnMetadata{
						txn:              *txn,
						keys:             cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}),
						firstUpdateNanos: tc.clock.PhysicalNow(),
						lastUpdateNanos:  tc.clock.PhysicalNow(),
						timeoutDuration:  tc.clientTimeout,
						txnEnd:           make(chan struct{}),
					}
					id := string(txn.ID)
					tc.txns[id] = txnMeta
					tc.heartbeat(id)
				}
				txnMeta.addKeyRange(header.Key, header.EndKey)
			}
			// Update our record of this transaction.
			if txnMeta != nil {
				txnMeta.txn = *txn
				txnMeta.setLastUpdate(tc.clock.PhysicalNow())
			}
		}
		tc.Unlock()
	}

	// Cleanup intents and transaction map if end of transaction.
	switch t := call.Reply.Header().GoError().(type) {
	case *proto.TransactionStatusError:
		// Likely already committed or more obscure errors such as epoch or
		// timestamp regressions; consider it dead.
		tc.cleanupTxn(t.Txn, nil)
	case *proto.TransactionAbortedError:
		// If already aborted, cleanup the txn on this TxnCoordSender.
		tc.cleanupTxn(t.Txn, nil)
	case *proto.OpRequiresTxnError:
		// Run a one-off transaction with that single command.
		if log.V(1) {
			log.Infof("%s: auto-wrapping in txn and re-executing", call.Method())
		}
		tmpDB, err := client.Open(
			fmt.Sprintf("//%s?priority=%d",
				call.Args.Header().User, call.Args.Header().GetUserPriority()),
			client.SenderOpt(tc))
		if err != nil {
			log.Warning(err)
			return
		}
		call.Reply.Reset()
		if err := tmpDB.Txn(func(txn *client.Txn) error {
			txn.SetDebugName("auto-wrap")
			b := &client.Batch{}
			b.InternalAddCall(call)
			return txn.Commit(b)
		}); err != nil {
			log.Warning(err)
		}
	case nil:
		var resolved []proto.Key
		if txn := call.Reply.Header().Txn; txn != nil {
			if _, ok := call.Args.(*proto.EndTransactionRequest); ok {
				// If the --linearizable flag is set, we want to make sure that
				// all the clocks in the system are past the commit timestamp
				// of the transaction. This is guaranteed if either
				// - the commit timestamp is MaxOffset behind startNS
				// - MaxOffset ns were spent in this function
				// when returning to the client. Below we choose the option
				// that involves less waiting, which is likely the first one
				// unless a transaction commits with an odd timestamp.
				if tsNS := txn.Timestamp.WallTime; startNS > tsNS {
					startNS = tsNS
				}
				sleepNS := tc.clock.MaxOffset() -
					time.Duration(tc.clock.PhysicalNow()-startNS)
				if tc.linearizable && sleepNS > 0 {
					defer func() {
						if log.V(1) {
							log.Infof("%v: waiting %s on EndTransaction for linearizability", txn.Short(), util.TruncateDuration(sleepNS, time.Millisecond))
						}
						time.Sleep(sleepNS)
					}()
				}
				resolved = call.Reply.(*proto.EndTransactionResponse).Resolved
				if txn.Status != proto.PENDING {
					tc.cleanupTxn(*txn, resolved)
				}

			}
		}
	}
}
예제 #9
0
// TestVerifyPermissions verifies permissions are checked for single
// zones and across multiple zones. It also verifies that permissions
// are checked hierarchically.
func TestVerifyPermissions(t *testing.T) {
	n := simulation.NewNetwork(1, "unix", gossip.TestInterval, gossip.TestBootstrap)
	ds := NewDistSender(n.Nodes[0].Gossip)
	config1 := &proto.PermConfig{
		Read:  []string{"read1", "readAll", "rw1", "rwAll"},
		Write: []string{"write1", "writeAll", "rw1", "rwAll"}}
	config2 := &proto.PermConfig{
		Read:  []string{"read2", "readAll", "rw2", "rwAll"},
		Write: []string{"write2", "writeAll", "rw2", "rwAll"}}
	configs := []*storage.PrefixConfig{
		{engine.KeyMin, nil, config1},
		{proto.Key("a"), nil, config2},
	}
	configMap, err := storage.NewPrefixConfigMap(configs)
	if err != nil {
		t.Fatalf("failed to make prefix config map, err: %s", err.Error())
	}
	ds.gossip.AddInfo(gossip.KeyConfigPermission, configMap, time.Hour)

	readOnlyMethods := make([]string, 0, len(proto.ReadMethods))
	writeOnlyMethods := make([]string, 0, len(proto.WriteMethods))
	readWriteMethods := make([]string, 0, len(proto.ReadMethods)+len(proto.WriteMethods))
	for readM := range proto.ReadMethods {
		if proto.IsReadOnly(readM) {
			readOnlyMethods = append(readOnlyMethods, readM)
		} else {
			readWriteMethods = append(readWriteMethods, readM)
		}
	}
	for writeM := range proto.WriteMethods {
		if !proto.NeedReadPerm(writeM) {
			writeOnlyMethods = append(writeOnlyMethods, writeM)
		}
	}

	testData := []struct {
		// Permission-based db methods from the storage package.
		methods          []string
		user             string
		startKey, endKey proto.Key
		hasPermission    bool
	}{
		// Test permissions within a single range
		{readOnlyMethods, "read1", engine.KeyMin, engine.KeyMin, true},
		{readOnlyMethods, "rw1", engine.KeyMin, engine.KeyMin, true},
		{readOnlyMethods, "write1", engine.KeyMin, engine.KeyMin, false},
		{readOnlyMethods, "random", engine.KeyMin, engine.KeyMin, false},
		{readWriteMethods, "rw1", engine.KeyMin, engine.KeyMin, true},
		{readWriteMethods, "read1", engine.KeyMin, engine.KeyMin, false},
		{readWriteMethods, "write1", engine.KeyMin, engine.KeyMin, false},
		{writeOnlyMethods, "write1", engine.KeyMin, engine.KeyMin, true},
		{writeOnlyMethods, "rw1", engine.KeyMin, engine.KeyMin, true},
		{writeOnlyMethods, "read1", engine.KeyMin, engine.KeyMin, false},
		{writeOnlyMethods, "random", engine.KeyMin, engine.KeyMin, false},
		// Test permissions hierarchically.
		{readOnlyMethods, "read1", proto.Key("a"), proto.Key("a1"), true},
		{readWriteMethods, "rw1", proto.Key("a"), proto.Key("a1"), true},
		{writeOnlyMethods, "write1", proto.Key("a"), proto.Key("a1"), true},
		// Test permissions across both ranges.
		{readOnlyMethods, "readAll", engine.KeyMin, proto.Key("b"), true},
		{readOnlyMethods, "read1", engine.KeyMin, proto.Key("b"), true},
		{readOnlyMethods, "read2", engine.KeyMin, proto.Key("b"), false},
		{readOnlyMethods, "random", engine.KeyMin, proto.Key("b"), false},
		{readWriteMethods, "rwAll", engine.KeyMin, proto.Key("b"), true},
		{readWriteMethods, "rw1", engine.KeyMin, proto.Key("b"), true},
		{readWriteMethods, "random", engine.KeyMin, proto.Key("b"), false},
		{writeOnlyMethods, "writeAll", engine.KeyMin, proto.Key("b"), true},
		{writeOnlyMethods, "write1", engine.KeyMin, proto.Key("b"), true},
		{writeOnlyMethods, "write2", engine.KeyMin, proto.Key("b"), false},
		{writeOnlyMethods, "random", engine.KeyMin, proto.Key("b"), false},
		// Test permissions within and around the boundaries of a range,
		// representatively using rw methods.
		{readWriteMethods, "rw2", proto.Key("a"), proto.Key("b"), true},
		{readWriteMethods, "rwAll", proto.Key("a"), proto.Key("b"), true},
		{readWriteMethods, "rw2", proto.Key("a"), proto.Key("a"), true},
		{readWriteMethods, "rw2", proto.Key("a"), proto.Key("a1"), true},
		{readWriteMethods, "rw2", proto.Key("a"), proto.Key("b1"), false},
		{readWriteMethods, "rw2", proto.Key("a3"), proto.Key("a4"), true},
		{readWriteMethods, "rw2", proto.Key("a3"), proto.Key("b1"), false},
	}

	for i, test := range testData {
		for _, method := range test.methods {
			err := ds.verifyPermissions(
				method,
				&proto.RequestHeader{
					User: test.user, Key: test.startKey, EndKey: test.endKey})
			if err != nil && test.hasPermission {
				t.Errorf("test %d: user %s should have had permission to %s, err: %s",
					i, test.user, method, err.Error())
				break
			} else if err == nil && !test.hasPermission {
				t.Errorf("test %d: user %s should not have had permission to %s",
					i, test.user, method)
				break
			}
		}
	}
	n.Stop()
}
예제 #10
0
// sendOne sends a single call via the wrapped sender. If the call is
// part of a transaction, the TxnCoordSender adds the transaction to a
// map of active transactions and begins heartbeating it. Every
// subsequent call for the same transaction updates the lastUpdateTS
// to prevent live transactions from being considered abandoned and
// garbage collected. Read/write mutating requests have their key or
// key range added to the transaction's interval tree of key ranges
// for eventual cleanup via resolved write intents.
//
// On success, and if the call is part of a transaction, the affected
// key range is recorded as live intents for eventual cleanup upon
// transaction commit. Upon successful txn commit, initiates cleanup
// of intents.
func (tc *TxnCoordSender) sendOne(call *client.Call) {
	var startNS int64
	header := call.Args.Header()
	// If this call is part of a transaction...
	if header.Txn != nil {
		// Set the timestamp to the original timestamp for read-only
		// commands and to the transaction timestamp for read/write
		// commands.
		if proto.IsReadOnly(call.Method) {
			header.Timestamp = header.Txn.OrigTimestamp
		} else {
			header.Timestamp = header.Txn.Timestamp
		}
		// End transaction must have its key set to the txn ID.
		if call.Method == proto.EndTransaction {
			header.Key = header.Txn.Key
			// Remember when EndTransaction started in case we want to
			// be linearizable.
			startNS = tc.clock.PhysicalNow()
		}
	}

	// Send the command through wrapped sender.
	tc.wrapped.Send(call)

	if header.Txn != nil {
		// If not already set, copy the request txn.
		if call.Reply.Header().Txn == nil {
			call.Reply.Header().Txn = gogoproto.Clone(header.Txn).(*proto.Transaction)
		}
		tc.updateResponseTxn(header, call.Reply.Header())
	}

	// If successful, we're in a transaction, and the command leaves
	// transactional intents, add the key or key range to the intents map.
	// If the transaction metadata doesn't yet exist, create it.
	if call.Reply.Header().GoError() == nil && header.Txn != nil && proto.IsTransactional(call.Method) {
		tc.Lock()
		var ok bool
		var txnMeta *txnMetadata
		if txnMeta, ok = tc.txns[string(header.Txn.ID)]; !ok {
			txnMeta = &txnMetadata{
				txn:             *header.Txn,
				keys:            util.NewIntervalCache(util.CacheConfig{Policy: util.CacheNone}),
				lastUpdateTS:    tc.clock.Now(),
				timeoutDuration: tc.clientTimeout,
				closer:          make(chan struct{}),
			}
			tc.txns[string(header.Txn.ID)] = txnMeta

			// TODO(jiajia): Reevaluate this logic of creating a goroutine
			// for each active transaction. Spencer suggests a heap
			// containing next heartbeat timeouts which is processed by a
			// single goroutine.
			go tc.heartbeat(header.Txn, txnMeta.closer)
		}
		txnMeta.lastUpdateTS = tc.clock.Now()
		txnMeta.addKeyRange(header.Key, header.EndKey)
		tc.Unlock()
	}

	// Cleanup intents and transaction map if end of transaction.
	switch t := call.Reply.Header().GoError().(type) {
	case *proto.TransactionAbortedError:
		// If already aborted, cleanup the txn on this TxnCoordSender.
		tc.cleanupTxn(&t.Txn)
	case *proto.OpRequiresTxnError:
		// Run a one-off transaction with that single command.
		log.Infof("%s: auto-wrapping in txn and re-executing", call.Method)
		txnOpts := &client.TransactionOptions{
			Name: "auto-wrap",
		}
		// Must not call Close() on this KV - that would call
		// tc.Close().
		tmpKV := client.NewKV(tc, nil)
		tmpKV.User = call.Args.Header().User
		tmpKV.UserPriority = call.Args.Header().GetUserPriority()
		call.Reply.Reset()
		tmpKV.RunTransaction(txnOpts, func(txn *client.KV) error {
			return txn.Call(call.Method, call.Args, call.Reply)
		})
	case nil:
		var txn *proto.Transaction
		if call.Method == proto.EndTransaction {
			txn = call.Reply.Header().Txn
			// If the -linearizable flag is set, we want to make sure that
			// all the clocks in the system are past the commit timestamp
			// of the transaction. This is guaranteed if either
			// - the commit timestamp is MaxOffset behind startNS
			// - MaxOffset ns were spent in this function
			// when returning to the client. Below we choose the option
			// that involves less waiting, which is likely the first one
			// unless a transaction commits with an odd timestamp.
			if tsNS := txn.Timestamp.WallTime; startNS > tsNS {
				startNS = tsNS
			}
			sleepNS := tc.clock.MaxOffset() -
				time.Duration(tc.clock.PhysicalNow()-startNS)
			if tc.linearizable && sleepNS > 0 {
				defer func() {
					log.V(1).Infof("%v: waiting %dms on EndTransaction for linearizability", txn.ID, sleepNS/1000000)
					time.Sleep(sleepNS)
				}()
			}
		}
		if txn != nil && txn.Status != proto.PENDING {
			tc.cleanupTxn(txn)
		}
	}
}