Exemple #1
0
// AddReplicas adds replicas for a range on a set of stores.
// It's illegal to have multiple replicas of the same range on stores of a single
// node.
// The method blocks until a snapshot of the range has been copied to all the
// new replicas and the new replicas become part of the Raft group.
func (tc *TestCluster) AddReplicas(
	startKey roachpb.Key, targets ...ReplicationTarget,
) (*roachpb.RangeDescriptor, error) {
	rKey := keys.MustAddr(startKey)
	rangeDesc, err := tc.changeReplicas(
		roachpb.ADD_REPLICA, rKey, targets...,
	)
	if err != nil {
		return nil, err
	}

	// Wait for the replication to complete on all destination nodes.
	if err := util.RetryForDuration(time.Second*5, func() error {
		for _, target := range targets {
			// Use LookupReplica(keys) instead of GetRange(rangeID) to ensure that the
			// snapshot has been transferred and the descriptor initialized.
			store, err := tc.findMemberStore(target.StoreID)
			if err != nil {
				log.Errorf(context.TODO(), "unexpected error: %s", err)
				return err
			}
			if store.LookupReplica(rKey, nil) == nil {
				return errors.Errorf("range not found on store %d", target)
			}
		}
		return nil
	}); err != nil {
		return nil, err
	}
	return rangeDesc, nil
}
Exemple #2
0
// LookupRange returns the descriptor of the range containing key.
func (ts *TestServer) LookupRange(key roachpb.Key) (roachpb.RangeDescriptor, error) {
	rangeLookupReq := roachpb.RangeLookupRequest{
		Span: roachpb.Span{
			Key: keys.RangeMetaKey(keys.MustAddr(key)),
		},
		MaxRanges: 1,
	}
	resp, pErr := client.SendWrapped(context.Background(), ts.DistSender(), &rangeLookupReq)
	if pErr != nil {
		return roachpb.RangeDescriptor{}, errors.Errorf(
			"%q: lookup range unexpected error: %s", key, pErr)
	}
	return resp.(*roachpb.RangeLookupResponse).Ranges[0], nil
}
Exemple #3
0
func (c *Cluster) lookupRange(nodeIdx int, key roachpb.Key) (*roachpb.RangeDescriptor, error) {
	req := &roachpb.RangeLookupRequest{
		Span: roachpb.Span{
			Key: keys.RangeMetaKey(keys.MustAddr(key)),
		},
		MaxRanges: 1,
	}
	sender := c.Clients[nodeIdx].GetSender()
	resp, pErr := client.SendWrapped(context.Background(), sender, req)
	if pErr != nil {
		return nil, errors.Errorf("%s: lookup range: %s", key, pErr)
	}
	return &resp.(*roachpb.RangeLookupResponse).Ranges[0], nil
}
// processIntentsAsync asynchronously processes intents which were
// encountered during another command but did not interfere with the
// execution of that command. This occurs in two cases: inconsistent
// reads and EndTransaction (which queues its own external intents for
// processing via this method). The two cases are handled somewhat
// differently and would be better served by different entry points,
// but combining them simplifies the plumbing necessary in Replica.
func (ir *intentResolver) processIntentsAsync(r *Replica, intents []intentsWithArg) {
	now := r.store.Clock().Now()
	ctx := context.TODO()
	stopper := r.store.Stopper()

	for _, item := range intents {
		if item.args.Method() != roachpb.EndTransaction {
			if err := stopper.RunLimitedAsyncTask(
				ctx, ir.sem, true /* wait */, func(ctx context.Context) {
					// Everything here is best effort; give up rather than waiting
					// too long (helps avoid deadlocks during test shutdown,
					// although this is imperfect due to the use of an
					// uninterruptible WaitGroup.Wait in beginCmds).
					ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout)
					defer cancel()
					h := roachpb.Header{Timestamp: now}
					resolveIntents, pushErr := ir.maybePushTransactions(ctxWithTimeout,
						item.intents, h, roachpb.PUSH_TOUCH, true /* skipInFlight */)

					// resolveIntents with poison=true because we're resolving
					// intents outside of the context of an EndTransaction.
					//
					// Naively, it doesn't seem like we need to poison the abort
					// cache since we're pushing with PUSH_TOUCH - meaning that
					// the primary way our Push leads to aborting intents is that
					// of the transaction having timed out (and thus presumably no
					// client being around any more, though at the time of writing
					// we don't guarantee that). But there are other paths in which
					// the Push comes back successful while the coordinating client
					// may still be active. Examples of this are when:
					//
					// - the transaction was aborted by someone else, but the
					//   coordinating client may still be running.
					// - the transaction entry wasn't written yet, which at the
					//   time of writing has our push abort it, leading to the
					//   same situation as above.
					//
					// Thus, we must poison.
					if err := ir.resolveIntents(ctxWithTimeout, resolveIntents,
						true /* wait */, true /* poison */); err != nil {
						log.Warningf(ctx, "%s: failed to resolve intents: %s", r, err)
						return
					}
					if pushErr != nil {
						log.Warningf(ctx, "%s: failed to push during intent resolution: %s", r, pushErr)
						return
					}
				}); err != nil {
				log.Warningf(ctx, "failed to resolve intents: %s", err)
				return
			}
		} else { // EndTransaction
			if err := stopper.RunLimitedAsyncTask(
				ctx, ir.sem, true /* wait */, func(ctx context.Context) {
					ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout)
					defer cancel()

					// For EndTransaction, we know the transaction is finalized so
					// we can skip the push and go straight to the resolve.
					//
					// This mechanism assumes that when an EndTransaction fails,
					// the client makes no assumptions about the result. For
					// example, an attempt to explicitly rollback the transaction
					// may succeed (triggering this code path), but the result may
					// not make it back to the client.
					if err := ir.resolveIntents(ctxWithTimeout, item.intents,
						true /* wait */, false /* !poison */); err != nil {
						log.Warningf(ctx, "%s: failed to resolve intents: %s", r, err)
						return
					}

					// We successfully resolved the intents, so we're able to GC from
					// the txn span directly.
					b := &client.Batch{}
					txn := item.intents[0].Txn
					txnKey := keys.TransactionKey(txn.Key, *txn.ID)

					// This is pretty tricky. Transaction keys are range-local and
					// so they are encoded specially. The key range addressed by
					// (txnKey, txnKey.Next()) might be empty (since Next() does
					// not imply monotonicity on the address side). Instead, we
					// send this request to a range determined using the resolved
					// transaction anchor, i.e. if the txn is anchored on
					// /Local/RangeDescriptor/"a"/uuid, the key range below would
					// be ["a", "a\x00"). However, the first range is special again
					// because the above procedure results in KeyMin, but we need
					// at least KeyLocalMax.
					//
					// #7880 will address this by making GCRequest less special and
					// thus obviating the need to cook up an artificial range here.
					var gcArgs roachpb.GCRequest
					{
						key := keys.MustAddr(txn.Key)
						if localMax := keys.MustAddr(keys.LocalMax); key.Less(localMax) {
							key = localMax
						}
						endKey := key.Next()

						gcArgs.Span = roachpb.Span{
							Key:    key.AsRawKey(),
							EndKey: endKey.AsRawKey(),
						}
					}

					gcArgs.Keys = append(gcArgs.Keys, roachpb.GCRequest_GCKey{
						Key: txnKey,
					})
					b.AddRawRequest(&gcArgs)
					if err := ir.store.db.Run(ctx, b); err != nil {
						log.Warningf(ctx, "could not GC completed transaction anchored at %s: %s",
							roachpb.Key(txn.Key), err)
						return
					}
				}); err != nil {
				log.Warningf(ctx, "failed to resolve intents: %s", err)
				return
			}
		}
	}
}
Exemple #5
0
// RemoveReplicas removes one or more replicas from a range.
func (tc *TestCluster) RemoveReplicas(
	startKey roachpb.Key, targets ...ReplicationTarget,
) (*roachpb.RangeDescriptor, error) {
	return tc.changeReplicas(roachpb.REMOVE_REPLICA, keys.MustAddr(startKey), targets...)
}