// withRetry gets available connections and executes the action. If there are retryable errors, // it retries retryCount times before failing. It does not retry if the connection is in // the middle of a transaction. While returning the error check if it maybe a result of // a resharding event, and set the re-resolve bit and let the upper layers // re-resolve and retry. func (dg *discoveryGateway) withRetry(ctx context.Context, keyspace, shard string, tabletType topodatapb.TabletType, action func(conn tabletconn.TabletConn, target *querypb.Target) error, transactionID int64, isStreaming bool) error { var tabletLastUsed *topodatapb.Tablet var err error inTransaction := (transactionID != 0) invalidTablets := make(map[string]bool) for i := 0; i < dg.retryCount+1; i++ { tablets := dg.getTablets(keyspace, shard, tabletType) if len(tablets) == 0 { // fail fast if there is no tablet err = vterrors.FromError(vtrpcpb.ErrorCode_INTERNAL_ERROR, fmt.Errorf("no valid tablet")) break } shuffleTablets(tablets) // skip tablets we tried before var ts *discovery.TabletStats for _, t := range tablets { if _, ok := invalidTablets[discovery.TabletToMapKey(t.Tablet)]; !ok { ts = t break } } if ts == nil { if err == nil { // do not override error from last attempt. err = vterrors.FromError(vtrpcpb.ErrorCode_INTERNAL_ERROR, fmt.Errorf("no available connection")) } break } // execute tabletLastUsed = ts.Tablet conn := dg.hc.GetConnection(ts.Tablet) if conn == nil { err = vterrors.FromError(vtrpcpb.ErrorCode_INTERNAL_ERROR, fmt.Errorf("no connection for %+v", ts.Tablet)) invalidTablets[discovery.TabletToMapKey(ts.Tablet)] = true continue } // Potentially buffer this request. if bufferErr := masterbuffer.FakeBuffer(keyspace, shard, tabletType, inTransaction, i); bufferErr != nil { return bufferErr } err = action(conn, ts.Target) if dg.canRetry(ctx, err, transactionID, isStreaming) { invalidTablets[discovery.TabletToMapKey(ts.Tablet)] = true continue } break } return NewShardError(err, keyspace, shard, tabletType, tabletLastUsed, inTransaction) }
// tabletStats creates fake tablet health data. func tabletStats(uid, lag uint32) discovery.TabletStats { typ := topodatapb.TabletType_REPLICA if uid == rdonly1 || uid == rdonly2 { typ = topodatapb.TabletType_RDONLY } tablet := &topodatapb.Tablet{ Alias: &topodatapb.TabletAlias{Cell: "cell1", Uid: uid}, Keyspace: "ks1", Shard: "-80", Type: typ, PortMap: map[string]int32{"vt": int32(uid)}, } return discovery.TabletStats{ Tablet: tablet, Key: discovery.TabletToMapKey(tablet), Target: &querypb.Target{ Keyspace: "ks1", Shard: "-80", TabletType: typ, }, Up: true, Serving: true, Stats: &querypb.RealtimeStats{ SecondsBehindMaster: lag, }, TabletExternallyReparentedTimestamp: 22, LastError: nil, } }
// startWaitingOnUnhealthyTablet registers the tablet as being waited on in a way that // doesn't race with StatsUpdate(). If the tablet is already healthy then the function // will return nil as the channel and nil as the error. If the tablet is unhealthy now // then function will return the channel that will be closed once tablet becomes healthy // and caught up with replication. Note that the channel is returned so that the caller // could wait on it without necessity to lock allTabletsLock. func (shardSwap *shardSchemaSwap) startWaitingOnUnhealthyTablet(tablet *topodatapb.Tablet) (*chan interface{}, error) { shardSwap.allTabletsLock.Lock() defer shardSwap.allTabletsLock.Unlock() tabletKey := discovery.TabletToMapKey(tablet) tabletStats, tabletFound := shardSwap.allTablets[tabletKey] if !tabletFound { return nil, fmt.Errorf("Tablet %v has disappeared while doing schema swap", tablet.Alias) } if isTabletHealthy(tabletStats) { return nil, nil } waitingChannel := make(chan interface{}) shardSwap.healthWaitingChannel = &waitingChannel shardSwap.healthWaitingTablet = tabletKey return shardSwap.healthWaitingChannel, nil }