// AddReplicas adds replicas for a range on a set of stores. // It's illegal to have multiple replicas of the same range on stores of a single // node. // The method blocks until a snapshot of the range has been copied to all the // new replicas and the new replicas become part of the Raft group. func (tc *TestCluster) AddReplicas( startKey roachpb.Key, targets ...ReplicationTarget, ) (*roachpb.RangeDescriptor, error) { rKey := keys.MustAddr(startKey) rangeDesc, err := tc.changeReplicas( roachpb.ADD_REPLICA, rKey, targets..., ) if err != nil { return nil, err } // Wait for the replication to complete on all destination nodes. if err := util.RetryForDuration(time.Second*5, func() error { for _, target := range targets { // Use LookupReplica(keys) instead of GetRange(rangeID) to ensure that the // snapshot has been transferred and the descriptor initialized. store, err := tc.findMemberStore(target.StoreID) if err != nil { log.Errorf(context.TODO(), "unexpected error: %s", err) return err } if store.LookupReplica(rKey, nil) == nil { return errors.Errorf("range not found on store %d", target) } } return nil }); err != nil { return nil, err } return rangeDesc, nil }
// LookupRange returns the descriptor of the range containing key. func (ts *TestServer) LookupRange(key roachpb.Key) (roachpb.RangeDescriptor, error) { rangeLookupReq := roachpb.RangeLookupRequest{ Span: roachpb.Span{ Key: keys.RangeMetaKey(keys.MustAddr(key)), }, MaxRanges: 1, } resp, pErr := client.SendWrapped(context.Background(), ts.DistSender(), &rangeLookupReq) if pErr != nil { return roachpb.RangeDescriptor{}, errors.Errorf( "%q: lookup range unexpected error: %s", key, pErr) } return resp.(*roachpb.RangeLookupResponse).Ranges[0], nil }
func (c *Cluster) lookupRange(nodeIdx int, key roachpb.Key) (*roachpb.RangeDescriptor, error) { req := &roachpb.RangeLookupRequest{ Span: roachpb.Span{ Key: keys.RangeMetaKey(keys.MustAddr(key)), }, MaxRanges: 1, } sender := c.Clients[nodeIdx].GetSender() resp, pErr := client.SendWrapped(context.Background(), sender, req) if pErr != nil { return nil, errors.Errorf("%s: lookup range: %s", key, pErr) } return &resp.(*roachpb.RangeLookupResponse).Ranges[0], nil }
// processIntentsAsync asynchronously processes intents which were // encountered during another command but did not interfere with the // execution of that command. This occurs in two cases: inconsistent // reads and EndTransaction (which queues its own external intents for // processing via this method). The two cases are handled somewhat // differently and would be better served by different entry points, // but combining them simplifies the plumbing necessary in Replica. func (ir *intentResolver) processIntentsAsync(r *Replica, intents []intentsWithArg) { now := r.store.Clock().Now() ctx := context.TODO() stopper := r.store.Stopper() for _, item := range intents { if item.args.Method() != roachpb.EndTransaction { if err := stopper.RunLimitedAsyncTask( ctx, ir.sem, true /* wait */, func(ctx context.Context) { // Everything here is best effort; give up rather than waiting // too long (helps avoid deadlocks during test shutdown, // although this is imperfect due to the use of an // uninterruptible WaitGroup.Wait in beginCmds). ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() h := roachpb.Header{Timestamp: now} resolveIntents, pushErr := ir.maybePushTransactions(ctxWithTimeout, item.intents, h, roachpb.PUSH_TOUCH, true /* skipInFlight */) // resolveIntents with poison=true because we're resolving // intents outside of the context of an EndTransaction. // // Naively, it doesn't seem like we need to poison the abort // cache since we're pushing with PUSH_TOUCH - meaning that // the primary way our Push leads to aborting intents is that // of the transaction having timed out (and thus presumably no // client being around any more, though at the time of writing // we don't guarantee that). But there are other paths in which // the Push comes back successful while the coordinating client // may still be active. Examples of this are when: // // - the transaction was aborted by someone else, but the // coordinating client may still be running. // - the transaction entry wasn't written yet, which at the // time of writing has our push abort it, leading to the // same situation as above. // // Thus, we must poison. if err := ir.resolveIntents(ctxWithTimeout, resolveIntents, true /* wait */, true /* poison */); err != nil { log.Warningf(ctx, "%s: failed to resolve intents: %s", r, err) return } if pushErr != nil { log.Warningf(ctx, "%s: failed to push during intent resolution: %s", r, pushErr) return } }); err != nil { log.Warningf(ctx, "failed to resolve intents: %s", err) return } } else { // EndTransaction if err := stopper.RunLimitedAsyncTask( ctx, ir.sem, true /* wait */, func(ctx context.Context) { ctxWithTimeout, cancel := context.WithTimeout(ctx, base.NetworkTimeout) defer cancel() // For EndTransaction, we know the transaction is finalized so // we can skip the push and go straight to the resolve. // // This mechanism assumes that when an EndTransaction fails, // the client makes no assumptions about the result. For // example, an attempt to explicitly rollback the transaction // may succeed (triggering this code path), but the result may // not make it back to the client. if err := ir.resolveIntents(ctxWithTimeout, item.intents, true /* wait */, false /* !poison */); err != nil { log.Warningf(ctx, "%s: failed to resolve intents: %s", r, err) return } // We successfully resolved the intents, so we're able to GC from // the txn span directly. b := &client.Batch{} txn := item.intents[0].Txn txnKey := keys.TransactionKey(txn.Key, *txn.ID) // This is pretty tricky. Transaction keys are range-local and // so they are encoded specially. The key range addressed by // (txnKey, txnKey.Next()) might be empty (since Next() does // not imply monotonicity on the address side). Instead, we // send this request to a range determined using the resolved // transaction anchor, i.e. if the txn is anchored on // /Local/RangeDescriptor/"a"/uuid, the key range below would // be ["a", "a\x00"). However, the first range is special again // because the above procedure results in KeyMin, but we need // at least KeyLocalMax. // // #7880 will address this by making GCRequest less special and // thus obviating the need to cook up an artificial range here. var gcArgs roachpb.GCRequest { key := keys.MustAddr(txn.Key) if localMax := keys.MustAddr(keys.LocalMax); key.Less(localMax) { key = localMax } endKey := key.Next() gcArgs.Span = roachpb.Span{ Key: key.AsRawKey(), EndKey: endKey.AsRawKey(), } } gcArgs.Keys = append(gcArgs.Keys, roachpb.GCRequest_GCKey{ Key: txnKey, }) b.AddRawRequest(&gcArgs) if err := ir.store.db.Run(ctx, b); err != nil { log.Warningf(ctx, "could not GC completed transaction anchored at %s: %s", roachpb.Key(txn.Key), err) return } }); err != nil { log.Warningf(ctx, "failed to resolve intents: %s", err) return } } } }
// RemoveReplicas removes one or more replicas from a range. func (tc *TestCluster) RemoveReplicas( startKey roachpb.Key, targets ...ReplicationTarget, ) (*roachpb.RangeDescriptor, error) { return tc.changeReplicas(roachpb.REMOVE_REPLICA, keys.MustAddr(startKey), targets...) }