Exemple #1
0
// trimmedRequestToError returns an error for a trimmed request by looking at the
// requested error type. It assumes that prefix checking has already been done.
// If the received string doesn't match a known error, returns an unknown error.
func trimmedRequestToError(received string) error {
	switch received {
	case "integrity error":
		return vterrors.FromError(
			vtrpc.ErrorCode_INTEGRITY_ERROR,
			errors.New("vtgate test client forced error: integrity error (errno 1062)"),
		)
	// request backlog and general throttling type errors
	case "transient error":
		return vterrors.FromError(
			vtrpc.ErrorCode_TRANSIENT_ERROR,
			errors.New("request_backlog: too many requests in flight: vtgate test client forced error"),
		)
	case "unknown error":
		return vterrors.FromError(
			vtrpc.ErrorCode_UNKNOWN_ERROR,
			errors.New("vtgate test client forced error: unknown error"),
		)
	default:
		return vterrors.FromError(
			vtrpc.ErrorCode_UNKNOWN_ERROR,
			fmt.Errorf("vtgate test client error request unrecognized: %v", received),
		)
	}
}
Exemple #2
0
// Commit commits the current transaction. There are no retries on this operation.
func (stc *ScatterConn) Commit(ctx context.Context, session *SafeSession) (err error) {
	if session == nil {
		return vterrors.FromError(
			vtrpcpb.ErrorCode_BAD_INPUT,
			fmt.Errorf("cannot commit: empty session"),
		)
	}
	if !session.InTransaction() {
		return vterrors.FromError(
			vtrpcpb.ErrorCode_NOT_IN_TX,
			fmt.Errorf("cannot commit: not in transaction"),
		)
	}
	committing := true
	for _, shardSession := range session.ShardSessions {
		if !committing {
			stc.gateway.Rollback(ctx, shardSession.Target.Keyspace, shardSession.Target.Shard, shardSession.Target.TabletType, shardSession.TransactionId)
			continue
		}
		if err = stc.gateway.Commit(ctx, shardSession.Target.Keyspace, shardSession.Target.Shard, shardSession.Target.TabletType, shardSession.TransactionId); err != nil {
			committing = false
		}
	}
	session.Reset()
	return err
}
Exemple #3
0
// trimmedRequestToError returns an error for a trimmed request by looking at the
// requested error type. It assumes that prefix checking has already been done.
// If the received string doesn't match a known error, returns an unknown error.
func trimmedRequestToError(received string) error {
	switch received {
	case "bad input":
		return vterrors.FromError(
			vtrpcpb.ErrorCode_BAD_INPUT,
			errors.New("vtgate test client forced error: bad input"),
		)
	case "deadline exceeded":
		return vterrors.FromError(
			vtrpcpb.ErrorCode_DEADLINE_EXCEEDED,
			errors.New("vtgate test client forced error: deadline exceeded"),
		)
	case "integrity error":
		return vterrors.FromError(
			vtrpcpb.ErrorCode_INTEGRITY_ERROR,
			errors.New("vtgate test client forced error: integrity error (errno 1062) (sqlstate 23000)"),
		)
	// request backlog and general throttling type errors
	case "transient error":
		return vterrors.FromError(
			vtrpcpb.ErrorCode_TRANSIENT_ERROR,
			errors.New("request_backlog: too many requests in flight: vtgate test client forced error: transient error"),
		)
	case "throttled error":
		return vterrors.FromError(
			vtrpcpb.ErrorCode_TRANSIENT_ERROR,
			errors.New("request_backlog: exceeded XXX quota, rate limiting: vtgate test client forced error: transient error"),
		)
	case "unauthenticated":
		return vterrors.FromError(
			vtrpcpb.ErrorCode_UNAUTHENTICATED,
			errors.New("vtgate test client forced error: unauthenticated"),
		)
	case "aborted":
		return vterrors.FromError(
			vtrpcpb.ErrorCode_NOT_IN_TX,
			errors.New("vtgate test client forced error: aborted"),
		)
	case "query not served":
		return vterrors.FromError(
			vtrpcpb.ErrorCode_QUERY_NOT_SERVED,
			errors.New("vtgate test client forced error: query not served"),
		)
	case "unknown error":
		return vterrors.FromError(
			vtrpcpb.ErrorCode_UNKNOWN_ERROR,
			errors.New("vtgate test client forced error: unknown error"),
		)
	default:
		return vterrors.FromError(
			vtrpcpb.ErrorCode_UNKNOWN_ERROR,
			fmt.Errorf("vtgate test client error request unrecognized: %v", received),
		)
	}
}
Exemple #4
0
// withRetry gets available connections and executes the action. If there are retryable errors,
// it retries retryCount times before failing. It does not retry if the connection is in
// the middle of a transaction. While returning the error check if it maybe a result of
// a resharding event, and set the re-resolve bit and let the upper layers
// re-resolve and retry.
func (dg *discoveryGateway) withRetry(ctx context.Context, keyspace, shard string, tabletType topodatapb.TabletType, action func(conn tabletconn.TabletConn, target *querypb.Target) error, transactionID int64, isStreaming bool) error {
	var tabletLastUsed *topodatapb.Tablet
	var err error
	inTransaction := (transactionID != 0)
	invalidTablets := make(map[string]bool)

	for i := 0; i < dg.retryCount+1; i++ {
		tablets := dg.getTablets(keyspace, shard, tabletType)
		if len(tablets) == 0 {
			// fail fast if there is no tablet
			err = vterrors.FromError(vtrpcpb.ErrorCode_INTERNAL_ERROR, fmt.Errorf("no valid tablet"))
			break
		}
		shuffleTablets(tablets)

		// skip tablets we tried before
		var ts *discovery.TabletStats
		for _, t := range tablets {
			if _, ok := invalidTablets[discovery.TabletToMapKey(t.Tablet)]; !ok {
				ts = t
				break
			}
		}
		if ts == nil {
			if err == nil {
				// do not override error from last attempt.
				err = vterrors.FromError(vtrpcpb.ErrorCode_INTERNAL_ERROR, fmt.Errorf("no available connection"))
			}
			break
		}

		// execute
		tabletLastUsed = ts.Tablet
		conn := dg.hc.GetConnection(ts.Tablet)
		if conn == nil {
			err = vterrors.FromError(vtrpcpb.ErrorCode_INTERNAL_ERROR, fmt.Errorf("no connection for %+v", ts.Tablet))
			invalidTablets[discovery.TabletToMapKey(ts.Tablet)] = true
			continue
		}

		// Potentially buffer this request.
		if bufferErr := masterbuffer.FakeBuffer(keyspace, shard, tabletType, inTransaction, i); bufferErr != nil {
			return bufferErr
		}

		err = action(conn, ts.Target)
		if dg.canRetry(ctx, err, transactionID, isStreaming) {
			invalidTablets[discovery.TabletToMapKey(ts.Tablet)] = true
			continue
		}
		break
	}
	return NewShardError(err, keyspace, shard, tabletType, tabletLastUsed, inTransaction)
}
Exemple #5
0
// getNewConn creates a new tablet connection with a separate per conn timeout.
// It limits the overall timeout to connTimeoutTotal by checking elapsed time after each blocking call.
func (sdc *ShardConn) getNewConn(ctx context.Context) (conn tabletconn.TabletConn, endPoint *topodatapb.EndPoint, isTimeout bool, err error) {
	startTime := time.Now()

	endPoints, err := sdc.balancer.Get()
	if err != nil {
		// Error when getting endpoint
		return nil, nil, false, err
	}
	if len(endPoints) == 0 {
		// No valid endpoint
		return nil, nil, false, vterrors.FromError(
			vtrpcpb.ErrorCode_INTERNAL_ERROR,
			fmt.Errorf("no valid endpoint"),
		)
	}
	if time.Now().Sub(startTime) >= sdc.connTimeoutTotal {
		return nil, nil, true, vterrors.FromError(
			vtrpcpb.ErrorCode_DEADLINE_EXCEEDED,
			fmt.Errorf("timeout when getting endpoints"),
		)
	}

	// Iterate through all endpoints to create a connection
	perConnTimeout := sdc.getConnTimeoutPerConn(len(endPoints))
	allErrors := new(concurrency.AllErrorRecorder)
	for _, endPoint := range endPoints {
		perConnStartTime := time.Now()
		conn, err = tabletconn.GetDialer()(ctx, endPoint, sdc.keyspace, sdc.shard, topodatapb.TabletType_UNKNOWN, perConnTimeout)
		if err == nil {
			sdc.connectTimings.Record([]string{sdc.keyspace, sdc.shard, strings.ToLower(sdc.tabletType.String())}, perConnStartTime)
			sdc.mu.Lock()
			defer sdc.mu.Unlock()
			sdc.conn = conn
			return conn, endPoint, false, nil
		}
		// Markdown the endpoint if it failed to connect
		sdc.balancer.MarkDown(endPoint.Uid, err.Error())
		vtErr := vterrors.NewVitessError(
			// TODO(aaijazi): what about OperationalErrors here?
			vterrors.RecoverVtErrorCode(err), err,
			"%v %+v", err, endPoint,
		)
		allErrors.RecordError(vtErr)
		if time.Now().Sub(startTime) >= sdc.connTimeoutTotal {
			err = vterrors.FromError(
				vtrpcpb.ErrorCode_DEADLINE_EXCEEDED,
				fmt.Errorf("timeout when connecting to %+v", endPoint),
			)
			allErrors.RecordError(err)
			return nil, nil, true, allErrors.AggrError(AggregateVtGateErrors)
		}
	}
	return nil, nil, false, allErrors.Error()
}
Exemple #6
0
// withRetry gets available connections and executes the action. If there are retryable errors,
// it retries retryCount times before failing. It does not retry if the connection is in
// the middle of a transaction. While returning the error check if it maybe a result of
// a resharding event, and set the re-resolve bit and let the upper layers
// re-resolve and retry.
func (dg *discoveryGateway) withRetry(ctx context.Context, keyspace, shard string, tabletType pbt.TabletType, action func(conn tabletconn.TabletConn) error, transactionID int64, isStreaming bool) error {
	var endPointLastUsed *pbt.EndPoint
	var err error
	inTransaction := (transactionID != 0)
	invalidEndPoints := make(map[string]bool)

	for i := 0; i < dg.retryCount+1; i++ {
		var endPoint *pbt.EndPoint
		endPoints := dg.getEndPoints(keyspace, shard, tabletType)
		if len(endPoints) == 0 {
			// fail fast if there is no endpoint
			err = vterrors.FromError(vtrpc.ErrorCode_INTERNAL_ERROR, fmt.Errorf("no valid endpoint"))
			break
		}
		shuffleEndPoints(endPoints)

		// skip endpoints we tried before
		for _, ep := range endPoints {
			if _, ok := invalidEndPoints[discovery.EndPointToMapKey(ep)]; !ok {
				endPoint = ep
				break
			}
		}
		if endPoint == nil {
			if err == nil {
				// do not override error from last attempt.
				err = vterrors.FromError(vtrpc.ErrorCode_INTERNAL_ERROR, fmt.Errorf("no available connection"))
			}
			break
		}

		// execute
		endPointLastUsed = endPoint
		conn := dg.hc.GetConnection(endPoint)
		if conn == nil {
			err = vterrors.FromError(vtrpc.ErrorCode_INTERNAL_ERROR, fmt.Errorf("no connection for %+v", endPoint))
			invalidEndPoints[discovery.EndPointToMapKey(endPoint)] = true
			continue
		}
		err = action(conn)
		if dg.canRetry(ctx, err, transactionID, isStreaming) {
			invalidEndPoints[discovery.EndPointToMapKey(endPoint)] = true
			continue
		}
		break
	}
	return WrapError(err, keyspace, shard, tabletType, endPointLastUsed, inTransaction)
}
Exemple #7
0
func getShardForKeyspaceID(allShards []*pb.ShardReference, keyspaceID []byte) (string, error) {
	if len(allShards) == 0 {
		return "", vterrors.FromError(vtrpc.ErrorCode_BAD_INPUT,
			fmt.Errorf("No shards found for this tabletType"),
		)
	}

	for _, shardReference := range allShards {
		if key.KeyRangeContains(shardReference.KeyRange, keyspaceID) {
			return shardReference.Name, nil
		}
	}
	return "", vterrors.FromError(vtrpc.ErrorCode_BAD_INPUT,
		fmt.Errorf("KeyspaceId %v didn't match any shards %+v", hex.EncodeToString(keyspaceID), allShards),
	)
}
Exemple #8
0
func TestAggregateVtGateErrors(t *testing.T) {
	var testcases = []struct {
		input    []error
		expected error
	}{
		{
			input:    nil,
			expected: nil,
		},
		{
			input: []error{
				errFromCode(vtrpc.ErrorCode_SUCCESS),
				errFromCode(vtrpc.ErrorCode_TRANSIENT_ERROR),
				errFromCode(vtrpc.ErrorCode_BAD_INPUT),
			},
			expected: vterrors.FromError(
				vtrpc.ErrorCode_BAD_INPUT,
				vterrors.ConcatenateErrors([]error{errGeneric, errGeneric, errGeneric}),
			),
		},
	}
	for _, tc := range testcases {
		out := aggregateVtGateErrors(tc.input)
		if !reflect.DeepEqual(out, tc.expected) {
			t.Errorf("aggregateVtGateErrors(%+v) = %+v \nwant: %+v",
				tc.input, out, tc.expected)
		}
	}
}
Exemple #9
0
func aggregateVtGateErrors(errors []error) error {
	if len(errors) == 0 {
		return nil
	}
	return vterrors.FromError(
		aggregateVtGateErrorCodes(errors),
		vterrors.ConcatenateErrors(errors),
	)
}
Exemple #10
0
func getAnyShard(ctx context.Context, topoServ topo.SrvTopoServer, cell, keyspace string, tabletType topodatapb.TabletType) (ks, shard string, err error) {
	keyspace, _, allShards, err := getKeyspaceShards(ctx, topoServ, cell, keyspace, tabletType)
	if err != nil {
		return "", "", err
	}
	if len(allShards) == 0 {
		return "", "", vterrors.FromError(vtrpcpb.ErrorCode_BAD_INPUT,
			fmt.Errorf("No shards found for this tabletType"),
		)
	}
	return keyspace, allShards[0].Name, nil
}
Exemple #11
0
// ExecuteKeyspaceIds executes a non-streaming query based on KeyspaceIds.
// It retries query if new keyspace/shards are re-resolved after a retryable error.
// This throws an error if a dml spans multiple keyspace_ids. Resharding depends
// on being able to uniquely route a write.
func (res *Resolver) ExecuteKeyspaceIds(ctx context.Context, sql string, bindVariables map[string]interface{}, keyspace string, keyspaceIds [][]byte, tabletType pb.TabletType, session *proto.Session, notInTransaction bool) (*mproto.QueryResult, error) {
	if isDml(sql) && len(keyspaceIds) > 1 {
		return nil, vterrors.FromError(
			vtrpc.ErrorCode_BAD_INPUT,
			fmt.Errorf("DML should not span multiple keyspace_ids"),
		)
	}
	mapToShards := func(k string) (string, []string, error) {
		return mapKeyspaceIdsToShards(
			ctx,
			res.toposerv,
			res.cell,
			k,
			tabletType,
			keyspaceIds)
	}
	return res.Execute(ctx, sql, bindVariables, keyspace, tabletType, session, mapToShards, notInTransaction)
}
Exemple #12
0
// ExecuteKeyspaceIds executes a non-streaming query based on KeyspaceIds.
// It retries query if new keyspace/shards are re-resolved after a retryable error.
// This throws an error if a dml spans multiple keyspace_ids. Resharding depends
// on being able to uniquely route a write.
func (res *Resolver) ExecuteKeyspaceIds(ctx context.Context, sql string, bindVariables map[string]interface{}, keyspace string, keyspaceIds [][]byte, tabletType topodatapb.TabletType, session *vtgatepb.Session, notInTransaction bool, options *querypb.ExecuteOptions) (*sqltypes.Result, error) {
	if sqlannotation.IsDML(sql) && len(keyspaceIds) > 1 {
		return nil, vterrors.FromError(
			vtrpcpb.ErrorCode_BAD_INPUT,
			fmt.Errorf("DML should not span multiple keyspace_ids"),
		)
	}
	mapToShards := func(k string) (string, []string, error) {
		return mapKeyspaceIdsToShards(
			ctx,
			res.toposerv,
			res.cell,
			k,
			tabletType,
			keyspaceIds)
	}
	return res.Execute(ctx, sql, bindVariables, keyspace, tabletType, session, mapToShards, notInTransaction, options)
}
Exemple #13
0
func (blc *Balancer) refresh() error {
	endPoints, err := blc.getEndPoints()
	if err != nil {
		return err
	}
	// Add new addressNodes
	if endPoints != nil {
		for _, endPoint := range endPoints.Entries {
			if index := findAddrNode(blc.addressNodes, endPoint.Uid); index == -1 {
				addrNode := &addressStatus{
					endPoint: endPoint,
					balancer: blc,
				}
				blc.addressNodes = append(blc.addressNodes, addrNode)
			} else {
				blc.addressNodes[index].endPoint = endPoint
			}
		}
	}
	// Remove those that went away
	i := 0
	for i < len(blc.addressNodes) {
		if index := findAddress(endPoints, blc.addressNodes[i].endPoint.Uid); index == -1 {
			blc.addressNodes = delAddrNode(blc.addressNodes, i)
			continue
		}
		i++
	}
	if len(blc.addressNodes) == 0 {
		return vterrors.FromError(
			vtrpc.ErrorCode_INTERNAL_ERROR,
			fmt.Errorf("no available addresses"),
		)
	}
	// Sort endpoints by timeRetry (from ZERO to largest)
	sort.Sort(AddressList(blc.addressNodes))
	// Randomize endpoints with ZERO timeRetry
	shuffle(blc.addressNodes, findFirstAddrNodeNonZeroTimeRetry(blc.addressNodes))
	return nil
}
Exemple #14
0
// mapExactShards maps a keyrange to shards only if there's a complete
// match. If there's any partial match the function returns no match.
func mapExactShards(ctx context.Context, topoServ SrvTopoServer, cell, keyspace string, tabletType pb.TabletType, kr *pb.KeyRange) (newkeyspace string, shards []string, err error) {
	keyspace, _, allShards, err := getKeyspaceShards(ctx, topoServ, cell, keyspace, tabletType)
	if err != nil {
		return "", nil, err
	}
	shardnum := 0
	for shardnum < len(allShards) {
		if bytes.Compare(kr.Start, []byte(allShards[shardnum].KeyRange.Start)) == 0 {
			break
		}
		shardnum++
	}
	for shardnum < len(allShards) {
		shards = append(shards, allShards[shardnum].Name)
		if bytes.Compare(kr.End, []byte(allShards[shardnum].KeyRange.End)) == 0 {
			return keyspace, shards, nil
		}
		shardnum++
	}
	return keyspace, nil, vterrors.FromError(vtrpc.ErrorCode_BAD_INPUT,
		fmt.Errorf("keyrange %v does not exactly match shards", key.KeyRangeString(kr)),
	)
}
Exemple #15
0
const errOutOfRange = "errno 1264"
const errTxPoolFull = "tx_pool_full"

var (
	rpcVTGate *VTGate

	qpsByOperation *stats.Rates
	qpsByKeyspace  *stats.Rates
	qpsByDbType    *stats.Rates

	errorsByOperation *stats.Rates
	errorsByKeyspace  *stats.Rates
	errorsByDbType    *stats.Rates

	errTooManyInFlight = vterrors.FromError(
		vtrpc.ErrorCode_TRANSIENT_ERROR,
		errors.New("request_backlog: too many requests in flight"),
	)

	// Error counters should be global so they can be set from anywhere
	normalErrors   *stats.MultiCounters
	infoErrors     *stats.Counters
	internalErrors *stats.Counters
)

// VTGate is the rpc interface to vtgate. Only one instance
// can be created. It implements vtgateservice.VTGateService
type VTGate struct {
	resolver     *Resolver
	router       *Router
	timings      *stats.MultiTimings
	rowsReturned *stats.MultiCounters
Exemple #16
0
func errFromCode(c vtrpc.ErrorCode) error {
	return vterrors.FromError(c, errGeneric)
}
Exemple #17
0
	maxBufferSize          = flag.Int("max_buffer_size", 10, "The maximum number of master requests to buffer at a time.")
	fakeBufferDelay        = flag.Duration("fake_buffer_delay", 1*time.Second, "The amount of time that we should delay all master requests for, to fake a buffer.")

	bufferedRequestsAttempted  = stats.NewInt("BufferedRequestsAttempted")
	bufferedRequestsSuccessful = stats.NewInt("BufferedRequestsSuccessful")
	// Use this lock when adding to the number of currently buffered requests.
	bufferMu         sync.Mutex
	bufferedRequests = stats.NewInt("BufferedRequests")
)

// timeSleep can be mocked out in unit tests
var timeSleep = time.Sleep

// errBufferFull is the error returned a buffer request is rejected because the buffer is full.
var errBufferFull = vterrors.FromError(
	vtrpcpb.ErrorCode_TRANSIENT_ERROR,
	errors.New("master request buffer full, rejecting request"),
)

// FakeBuffer will pretend to buffer master requests in VTGate.
// Requests *will NOT actually be buffered*, they will just be delayed.
// This can be useful to understand what the impact of master request buffering will be
// on upstream callers. Once the impact is measured, it can be used to tweak parameter values
// for the best behavior.
// FakeBuffer should be called before a potential VtTablet Begin, otherwise it will increase transaction times.
func FakeBuffer(keyspace, shard string, tabletType topodatapb.TabletType, inTransaction bool, attemptNumber int) error {
	if !*enableFakeMasterBuffer {
		return nil
	}
	// Don't buffer non-master traffic, requests that are inside transactions, or retries.
	if tabletType != topodatapb.TabletType_MASTER || inTransaction || attemptNumber != 0 {
		return nil
Exemple #18
0
// setAndStartWorker will set the current worker.
// We always log to both memory logger (for display on the web) and
// console logger (for records / display of command line worker).
func (wi *Instance) setAndStartWorker(wrk Worker, wr *wrangler.Wrangler) (chan struct{}, error) {
	wi.currentWorkerMutex.Lock()
	defer wi.currentWorkerMutex.Unlock()

	if wi.currentContext != nil {
		return nil, vterrors.FromError(vtrpcpb.ErrorCode_TRANSIENT_ERROR,
			fmt.Errorf("A worker job is already in progress: %v", wi.currentWorker))
	}

	if wi.currentWorker != nil {
		// During the grace period, we answer with a retryable error.
		const gracePeriod = 1 * time.Minute
		gracePeriodEnd := time.Now().Add(gracePeriod)
		if wi.lastRunStopTime.Before(gracePeriodEnd) {
			return nil, vterrors.FromError(vtrpcpb.ErrorCode_TRANSIENT_ERROR,
				fmt.Errorf("A worker job was recently stopped (%f seconds ago): %v",
					time.Now().Sub(wi.lastRunStopTime).Seconds(),
					wi.currentWorker))
		}

		// QUERY_NOT_SERVED = FailedPrecondition => manual resolution required.
		return nil, vterrors.FromError(vtrpcpb.ErrorCode_QUERY_NOT_SERVED,
			fmt.Errorf("The worker job was stopped %.1f minutes ago, but not reset. You have to reset it manually. Job: %v",
				time.Now().Sub(wi.lastRunStopTime).Minutes(),
				wi.currentWorker))
	}

	wi.currentWorker = wrk
	wi.currentMemoryLogger = logutil.NewMemoryLogger()
	wi.currentContext, wi.currentCancelFunc = context.WithCancel(wi.backgroundContext)
	wi.lastRunError = nil
	wi.lastRunStopTime = time.Unix(0, 0)
	done := make(chan struct{})
	wranglerLogger := wr.Logger()
	if wr == wi.wr {
		// If it's the default wrangler, do not reuse its logger because it may have been set before.
		// Resuing it would result into an endless recursion.
		wranglerLogger = logutil.NewConsoleLogger()
	}
	wr.SetLogger(logutil.NewTeeLogger(wi.currentMemoryLogger, wranglerLogger))

	// one go function runs the worker, changes state when done
	go func() {
		log.Infof("Starting worker...")
		var err error

		// Catch all panics and always save the execution state at the end.
		defer func() {
			// The recovery code is a copy of servenv.HandlePanic().
			if x := recover(); x != nil {
				log.Errorf("uncaught vtworker panic: %v\n%s", x, tb.Stack(4))
				err = fmt.Errorf("uncaught vtworker panic: %v", x)
			}

			wi.currentWorkerMutex.Lock()
			wi.currentContext = nil
			wi.currentCancelFunc = nil
			wi.lastRunError = err
			wi.lastRunStopTime = time.Now()
			wi.currentWorkerMutex.Unlock()
			close(done)
		}()

		// run will take a long time
		err = wrk.Run(wi.currentContext)
	}()

	return done, nil
}