// getNewConn creates a new tablet connection with a separate per conn timeout. // It limits the overall timeout to connTimeoutTotal by checking elapsed time after each blocking call. func (sdc *ShardConn) getNewConn(ctx context.Context) (conn tabletconn.TabletConn, endPoint *topodatapb.EndPoint, isTimeout bool, err error) { startTime := time.Now() endPoints, err := sdc.balancer.Get() if err != nil { // Error when getting endpoint return nil, nil, false, err } if len(endPoints) == 0 { // No valid endpoint return nil, nil, false, vterrors.FromError( vtrpcpb.ErrorCode_INTERNAL_ERROR, fmt.Errorf("no valid endpoint"), ) } if time.Now().Sub(startTime) >= sdc.connTimeoutTotal { return nil, nil, true, vterrors.FromError( vtrpcpb.ErrorCode_DEADLINE_EXCEEDED, fmt.Errorf("timeout when getting endpoints"), ) } // Iterate through all endpoints to create a connection perConnTimeout := sdc.getConnTimeoutPerConn(len(endPoints)) allErrors := new(concurrency.AllErrorRecorder) for _, endPoint := range endPoints { perConnStartTime := time.Now() conn, err = tabletconn.GetDialer()(ctx, endPoint, sdc.keyspace, sdc.shard, topodatapb.TabletType_UNKNOWN, perConnTimeout) if err == nil { sdc.connectTimings.Record([]string{sdc.keyspace, sdc.shard, strings.ToLower(sdc.tabletType.String())}, perConnStartTime) sdc.mu.Lock() defer sdc.mu.Unlock() sdc.conn = conn return conn, endPoint, false, nil } // Markdown the endpoint if it failed to connect sdc.balancer.MarkDown(endPoint.Uid, err.Error()) vtErr := vterrors.NewVitessError( // TODO(aaijazi): what about OperationalErrors here? vterrors.RecoverVtErrorCode(err), err, "%v %+v", err, endPoint, ) allErrors.RecordError(vtErr) if time.Now().Sub(startTime) >= sdc.connTimeoutTotal { err = vterrors.FromError( vtrpcpb.ErrorCode_DEADLINE_EXCEEDED, fmt.Errorf("timeout when connecting to %+v", endPoint), ) allErrors.RecordError(err) return nil, nil, true, allErrors.AggrError(AggregateVtGateErrors) } } return nil, nil, false, allErrors.Error() }
// ExecuteBatch executes a batch of non-streaming queries on the specified shards. func (stc *ScatterConn) ExecuteBatch( ctx context.Context, batchRequest *scatterBatchRequest, tabletType topodatapb.TabletType, asTransaction bool, session *SafeSession) (qrs []sqltypes.Result, err error) { allErrors := new(concurrency.AllErrorRecorder) results := make([]sqltypes.Result, batchRequest.Length) var resMutex sync.Mutex var wg sync.WaitGroup for _, req := range batchRequest.Requests { wg.Add(1) go func(req *shardBatchRequest) { defer wg.Done() startTime, statsKey, transactionID, err := stc.startAction(ctx, "ExecuteBatch", req.Keyspace, req.Shard, tabletType, session, false, allErrors) defer stc.endAction(startTime, allErrors, statsKey, &err) if err != nil { return } var innerqrs []sqltypes.Result innerqrs, err = stc.gateway.ExecuteBatch(ctx, req.Keyspace, req.Shard, tabletType, req.Queries, asTransaction, transactionID) if err != nil { return } resMutex.Lock() defer resMutex.Unlock() for i, result := range innerqrs { appendResult(&results[req.ResultIndexes[i]], &result) } }(req) } wg.Wait() // If we want to rollback, we have to do it before closing results // so that the session is updated to be not InTransaction. if allErrors.HasErrors() { stc.rollbackIfNeeded(ctx, allErrors, session) return nil, allErrors.AggrError(stc.aggregateErrors) } return results, nil }
// InitializeConnections pre-initializes connections for all shards. // It also populates topology cache by accessing it. // It is not necessary to call this function before serving queries, // but it would reduce connection overhead when serving. func (sg *shardGateway) InitializeConnections(ctx context.Context) error { ksNames, err := sg.toposerv.GetSrvKeyspaceNames(ctx, sg.cell) if err != nil { return err } var wg sync.WaitGroup var errRecorder concurrency.AllErrorRecorder for _, ksName := range ksNames { wg.Add(1) go func(keyspace string) { defer wg.Done() // get SrvKeyspace for cell/keyspace ks, err := sg.toposerv.GetSrvKeyspace(ctx, sg.cell, keyspace) if err != nil { errRecorder.RecordError(err) return } // work on all shards of all serving tablet types for _, ksPartition := range ks.Partitions { tt := ksPartition.ServedType for _, shard := range ksPartition.ShardReferences { wg.Add(1) go func(shardName string, tabletType topodatapb.TabletType) { defer wg.Done() err = sg.getConnection(ctx, keyspace, shardName, tabletType).Dial(ctx) if err != nil { errRecorder.RecordError(err) return } }(shard.Name, tt) } } }(ksName) } wg.Wait() if errRecorder.HasErrors() { return errRecorder.AggrError(AggregateVtGateErrors) } return nil }
// ExecuteBatch executes a batch of non-streaming queries on the specified shards. func (stc *ScatterConn) ExecuteBatch( ctx context.Context, batchRequest *scatterBatchRequest, tabletType topodatapb.TabletType, asTransaction bool, session *SafeSession) (qrs []sqltypes.Result, err error) { allErrors := new(concurrency.AllErrorRecorder) results := make([]sqltypes.Result, batchRequest.Length) var resMutex sync.Mutex var wg sync.WaitGroup for _, req := range batchRequest.Requests { wg.Add(1) go func(req *shardBatchRequest) { statsKey := []string{"ExecuteBatch", req.Keyspace, req.Shard, strings.ToLower(tabletType.String())} defer wg.Done() startTime := time.Now() defer stc.timings.Record(statsKey, startTime) transactionID, err := stc.updateSession(ctx, req.Keyspace, req.Shard, tabletType, session, false) if err != nil { allErrors.RecordError(err) stc.tabletCallErrorCount.Add(statsKey, 1) return } innerqrs, err := stc.gateway.ExecuteBatch(ctx, req.Keyspace, req.Shard, tabletType, req.Queries, asTransaction, transactionID) if err != nil { allErrors.RecordError(err) // Don't increment the error counter for duplicate keys, as those errors // are caused by client queries and are not VTGate's fault. // TODO(aaijazi): get rid of this string parsing, and handle all cases of invalid input if !strings.Contains(err.Error(), errDupKey) && !strings.Contains(err.Error(), errOutOfRange) { stc.tabletCallErrorCount.Add(statsKey, 1) } return } // Encapsulate in a function for safe mutex operation. func() { resMutex.Lock() defer resMutex.Unlock() for i, result := range innerqrs { appendResult(&results[req.ResultIndexes[i]], &result) } }() }(req) } wg.Wait() // If we want to rollback, we have to do it before closing results // so that the session is updated to be not InTransaction. if allErrors.HasErrors() { if session.InTransaction() { errstr := allErrors.Error().Error() // We cannot recover from these errors // TODO(aaijazi): get rid of this string parsing if strings.Contains(errstr, "tx_pool_full") || strings.Contains(errstr, "not_in_tx") { stc.Rollback(ctx, session) } } return nil, allErrors.AggrError(stc.aggregateErrors) } return results, nil }
// ExecuteBatch executes a batch of non-streaming queries on the specified shards. func (stc *ScatterConn) ExecuteBatch( ctx context.Context, batchRequest *scatterBatchRequest, tabletType topodatapb.TabletType, asTransaction bool, session *SafeSession, options *querypb.ExecuteOptions) (qrs []sqltypes.Result, err error) { allErrors := new(concurrency.AllErrorRecorder) results := make([]sqltypes.Result, batchRequest.Length) var resMutex sync.Mutex var wg sync.WaitGroup for _, req := range batchRequest.Requests { wg.Add(1) go func(req *shardBatchRequest) { defer wg.Done() target := &querypb.Target{ Keyspace: req.Keyspace, Shard: req.Shard, TabletType: tabletType, } var err error startTime, statsKey := stc.startAction("ExecuteBatch", target) defer stc.endAction(startTime, allErrors, statsKey, &err) shouldBegin, transactionID := transactionInfo(target, session, false) var innerqrs []sqltypes.Result if shouldBegin { innerqrs, transactionID, err = stc.gateway.BeginExecuteBatch(ctx, target, req.Queries, asTransaction, options) if transactionID != 0 { session.Append(&vtgatepb.Session_ShardSession{ Target: target, TransactionId: transactionID, }) } if err != nil { return } } else { innerqrs, err = stc.gateway.ExecuteBatch(ctx, target, req.Queries, asTransaction, transactionID, options) if err != nil { return } } resMutex.Lock() defer resMutex.Unlock() for i, result := range innerqrs { appendResult(&results[req.ResultIndexes[i]], &result) } }(req) } wg.Wait() // If we want to rollback, we have to do it before closing results // so that the session is updated to be not InTransaction. if allErrors.HasErrors() { err := allErrors.AggrError(stc.aggregateErrors) stc.rollbackIfNeeded(ctx, err, session) return nil, err } return results, nil }