Example #1
0
// TODO(mberlin): Discuss with the team if it should go to the vterrors package.
// TODO(mberlin): Add other error codes here as well?
func isRetryable(err error) bool {
	switch vterrors.RecoverVtErrorCode(err) {
	case vtrpcpb.ErrorCode_TRANSIENT_ERROR:
		return true
	default:
		return false
	}
}
Example #2
0
func (stc *ScatterConn) rollbackIfNeeded(ctx context.Context, err error, session *SafeSession) {
	if session.InTransaction() {
		ec := vterrors.RecoverVtErrorCode(err)
		if ec == vtrpcpb.ErrorCode_RESOURCE_EXHAUSTED || ec == vtrpcpb.ErrorCode_NOT_IN_TX {
			// We cannot recover from these errors
			stc.Rollback(ctx, session)
		}
	}
}
Example #3
0
// rpcErrFromTabletError translate an error from VTGate to an *mproto.RPCError
func rpcErrFromVtGateError(err error) *mproto.RPCError {
	if err == nil {
		return nil
	}
	return &mproto.RPCError{
		Code:    int64(vterrors.RecoverVtErrorCode(err)),
		Message: err.Error(),
	}
}
Example #4
0
// aggregateVtGateErrorCodes aggregates a list of errors into a single error code.
// It does so by finding the highest priority error code in the list.
func aggregateVtGateErrorCodes(errors []error) vtrpc.ErrorCode {
	highCode := vtrpc.ErrorCode_SUCCESS
	for _, e := range errors {
		code := vterrors.RecoverVtErrorCode(e)
		if errorPriorities[code] > errorPriorities[highCode] {
			highCode = code
		}
	}
	return highCode
}
Example #5
0
// Verifies the returned error has the properties that we expect.
func verifyError(t *testing.T, err error, method string) {
	if err == nil {
		t.Errorf("%s was expecting an error, didn't get one", method)
		return
	}
	code := vterrors.RecoverVtErrorCode(err)
	if code != expectedCode {
		t.Errorf("Unexpected server code from %s: got %v, wanted %v", method, code, expectedCode)
	}
	verifyErrorExceptServerCode(t, err, method)
}
Example #6
0
// getNewConn creates a new tablet connection with a separate per conn timeout.
// It limits the overall timeout to connTimeoutTotal by checking elapsed time after each blocking call.
func (sdc *ShardConn) getNewConn(ctx context.Context) (conn tabletconn.TabletConn, endPoint *topodatapb.EndPoint, isTimeout bool, err error) {
	startTime := time.Now()

	endPoints, err := sdc.balancer.Get()
	if err != nil {
		// Error when getting endpoint
		return nil, nil, false, err
	}
	if len(endPoints) == 0 {
		// No valid endpoint
		return nil, nil, false, vterrors.FromError(
			vtrpcpb.ErrorCode_INTERNAL_ERROR,
			fmt.Errorf("no valid endpoint"),
		)
	}
	if time.Now().Sub(startTime) >= sdc.connTimeoutTotal {
		return nil, nil, true, vterrors.FromError(
			vtrpcpb.ErrorCode_DEADLINE_EXCEEDED,
			fmt.Errorf("timeout when getting endpoints"),
		)
	}

	// Iterate through all endpoints to create a connection
	perConnTimeout := sdc.getConnTimeoutPerConn(len(endPoints))
	allErrors := new(concurrency.AllErrorRecorder)
	for _, endPoint := range endPoints {
		perConnStartTime := time.Now()
		conn, err = tabletconn.GetDialer()(ctx, endPoint, sdc.keyspace, sdc.shard, topodatapb.TabletType_UNKNOWN, perConnTimeout)
		if err == nil {
			sdc.connectTimings.Record([]string{sdc.keyspace, sdc.shard, strings.ToLower(sdc.tabletType.String())}, perConnStartTime)
			sdc.mu.Lock()
			defer sdc.mu.Unlock()
			sdc.conn = conn
			return conn, endPoint, false, nil
		}
		// Markdown the endpoint if it failed to connect
		sdc.balancer.MarkDown(endPoint.Uid, err.Error())
		vtErr := vterrors.NewVitessError(
			// TODO(aaijazi): what about OperationalErrors here?
			vterrors.RecoverVtErrorCode(err), err,
			"%v %+v", err, endPoint,
		)
		allErrors.RecordError(vtErr)
		if time.Now().Sub(startTime) >= sdc.connTimeoutTotal {
			err = vterrors.FromError(
				vtrpcpb.ErrorCode_DEADLINE_EXCEEDED,
				fmt.Errorf("timeout when connecting to %+v", endPoint),
			)
			allErrors.RecordError(err)
			return nil, nil, true, allErrors.AggrError(AggregateVtGateErrors)
		}
	}
	return nil, nil, false, allErrors.Error()
}
Example #7
0
func verifyShardConnError(t *testing.T, err error, wantErr string, wantCode vtrpcpb.ErrorCode) {
	if err == nil || err.Error() != wantErr {
		t.Errorf("wanted error: %s, got error: %v", wantErr, err)
	}
	if _, ok := err.(*ShardConnError); !ok {
		t.Errorf("wanted error type *ShardConnError, got error type: %v", reflect.TypeOf(err))
	}
	code := vterrors.RecoverVtErrorCode(err)
	if code != wantCode {
		t.Errorf("wanted error code: %s, got: %v", wantCode, code)
	}
}
Example #8
0
func (l *L2VTGate) endAction(startTime time.Time, statsKey []string, err *error) {
	if *err != nil {
		// Don't increment the error counter for duplicate
		// keys or bad queries, as those errors are caused by
		// client queries and are not VTGate's fault.
		ec := vterrors.RecoverVtErrorCode(*err)
		if ec != vtrpcpb.ErrorCode_INTEGRITY_ERROR && ec != vtrpcpb.ErrorCode_BAD_INPUT {
			l.tabletCallErrorCount.Add(statsKey, 1)
		}
	}
	l.timings.Record(statsKey, startTime)
}
Example #9
0
// testErrorHelper will check one instance of each error type,
// to make sure we propagate the errors properly.
func testErrorHelper(t *testing.T, f *FakeQueryService, name string, ef func(context.Context) error) {
	errors := []*tabletserver.TabletError{
		// A few generic errors
		tabletserver.NewTabletError(vtrpcpb.ErrorCode_BAD_INPUT, "generic error"),
		tabletserver.NewTabletError(vtrpcpb.ErrorCode_UNKNOWN_ERROR, "uncaught panic"),
		tabletserver.NewTabletError(vtrpcpb.ErrorCode_UNAUTHENTICATED, "missing caller id"),
		tabletserver.NewTabletError(vtrpcpb.ErrorCode_PERMISSION_DENIED, "table acl error: nil acl"),

		// Client will retry on this specific error
		tabletserver.NewTabletError(vtrpcpb.ErrorCode_QUERY_NOT_SERVED, "Query disallowed due to rule: %v", "cool rule"),

		// Client may retry on another server on this specific error
		tabletserver.NewTabletError(vtrpcpb.ErrorCode_INTERNAL_ERROR, "Could not verify strict mode"),

		// This is usually transaction pool full
		tabletserver.NewTabletError(vtrpcpb.ErrorCode_RESOURCE_EXHAUSTED, "Transaction pool connection limit exceeded"),

		// Transaction expired or was unknown
		tabletserver.NewTabletError(vtrpcpb.ErrorCode_NOT_IN_TX, "Transaction 12"),
	}
	for _, e := range errors {
		f.TabletError = e
		ctx := context.Background()
		err := ef(ctx)
		if err == nil {
			t.Errorf("error wasn't returned for %v?", name)
			continue
		}

		// First we check the recoverable vtrpc code is right.
		code := vterrors.RecoverVtErrorCode(err)
		if code != e.ErrorCode {
			t.Errorf("unexpected server code from %v: got %v, wanted %v", name, code, e.ErrorCode)
		}

		// Double-check we always get a ServerError, although
		// we don't really care that much.
		if !f.TestingGateway {
			if _, ok := err.(*tabletconn.ServerError); !ok {
				t.Errorf("error wasn't a tabletconn.ServerError for %v?", name)
				continue
			}
		}

		// and last we check we preserve the text, with the right prefix
		if !strings.Contains(err.Error(), e.Prefix()+e.Message) {
			t.Errorf("client error message '%v' for %v doesn't contain expected server text message '%v'", err.Error(), name, e.Prefix()+e.Message)
		}
	}
	f.TabletError = nil
}
Example #10
0
// VtGateErrorToVtRPCError converts a vtgate error into a vtrpc error.
// TODO(aaijazi): rename this guy, and correct the usage of it everywhere. As it's currently used,
// it will almost never return the correct error code, as it's only getting executeErr and reply.Error.
// It should actually just use reply.Err.
func VtGateErrorToVtRPCError(err error, errString string) *vtrpc.RPCError {
	if err == nil && errString == "" {
		return nil
	}
	message := ""
	if err != nil {
		message = err.Error()
	} else {
		message = errString
	}
	return &vtrpc.RPCError{
		Code:    vterrors.RecoverVtErrorCode(err),
		Message: message,
	}
}
Example #11
0
// NewShardError returns a ShardError which preserves the original
// error code if possible, adds the connection context and adds a bit
// to determine whether the keyspace/shard needs to be re-resolved for
// a potential sharding event (namely, if we were in a transaction).
func NewShardError(in error, keyspace, shard string, tabletType topodatapb.TabletType, tablet *topodatapb.Tablet, inTransaction bool) error {
	if in == nil {
		return nil
	}
	var shardIdentifier string
	if tablet != nil {
		shardIdentifier = fmt.Sprintf("%s.%s.%s, %+v", keyspace, shard, topoproto.TabletTypeLString(tabletType), tablet)
	} else {
		shardIdentifier = fmt.Sprintf("%s.%s.%s", keyspace, shard, topoproto.TabletTypeLString(tabletType))
	}

	return &ShardError{
		ShardIdentifier: shardIdentifier,
		InTransaction:   inTransaction,
		Err:             in,
		ErrorCode:       vterrors.RecoverVtErrorCode(in),
	}
}
Example #12
0
// WrapError returns ShardConnError which preserves the original error code if possible,
// adds the connection context
// and adds a bit to determine whether the keyspace/shard needs to be
// re-resolved for a potential sharding event.
func WrapError(in error, keyspace, shard string, tabletType pbt.TabletType, endPoint *pbt.EndPoint, inTransaction bool) (wrapped error) {
	if in == nil {
		return nil
	}
	shardIdentifier := fmt.Sprintf("%s.%s.%s, %+v", keyspace, shard, strings.ToLower(tabletType.String()), endPoint)
	code := tabletconn.ERR_NORMAL
	serverError, ok := in.(*tabletconn.ServerError)
	if ok {
		code = serverError.Code
	}

	shardConnErr := &ShardConnError{
		Code:            code,
		ShardIdentifier: shardIdentifier,
		InTransaction:   inTransaction,
		Err:             in,
		EndPointCode:    vterrors.RecoverVtErrorCode(in),
	}
	return shardConnErr
}
Example #13
0
func handleExecuteError(err error, statsKey []string, query map[string]interface{}, logger *logutil.ThrottledLogger) error {
	// First we log in the right category.
	ec := vterrors.RecoverVtErrorCode(err)
	switch ec {
	case vtrpcpb.ErrorCode_INTEGRITY_ERROR:
		// Duplicate key error, no need to log.
		infoErrors.Add("DupKey", 1)
	case vtrpcpb.ErrorCode_RESOURCE_EXHAUSTED, vtrpcpb.ErrorCode_BAD_INPUT:
		// Tx pool full error, or bad input, no need to log.
		normalErrors.Add(statsKey, 1)
	default:
		// Regular error, we will log if caused by vtgate.
		normalErrors.Add(statsKey, 1)
		logError(err, query, logger)
	}

	// Then we suffix the error with our address.
	s := fmt.Sprintf(", vtgate: %v", servenv.ListeningURL.String())
	return vterrors.WithSuffix(err, s)
}
Example #14
0
// verifyErrorCode checks the error code for an error
func verifyErrorCode(t *testing.T, err error, wantCode vtrpc.ErrorCode) {
	code := vterrors.RecoverVtErrorCode(err)
	if err == nil || code != wantCode {
		t.Errorf("vterrors.RecoverVtErrorCode(%v) => %v, want %v", err, code, wantCode)
	}
}
Example #15
0
// commandErrorsBecauseBusy tests that concurrent commands are rejected with
// TRANSIENT_ERROR while a command is already running.
// It also tests the correct propagation of the CANCELED error code.
func commandErrorsBecauseBusy(t *testing.T, client vtworkerclient.Client, serverSideCancelation bool) {
	// Run the vtworker "Block" command which blocks until we cancel the context.
	var wg sync.WaitGroup
	ctx, cancel := context.WithCancel(context.Background())
	// blockCommandStarted will be closed after we're sure that vtworker is
	// running the "Block" command.
	blockCommandStarted := make(chan struct{})
	var errorCodeCheck error
	wg.Add(1)
	go func() {
		stream, err := client.ExecuteVtworkerCommand(ctx, []string{"Block"})
		if err != nil {
			t.Fatalf("Block command should not have failed: %v", err)
		}

		firstLineReceived := false
		for {
			if _, err := stream.Recv(); err != nil {
				// We see CANCELED from the RPC client (client side cancelation) or
				// from vtworker itself (server side cancelation).
				if vterrors.RecoverVtErrorCode(err) != vtrpcpb.ErrorCode_CANCELLED {
					errorCodeCheck = fmt.Errorf("Block command should only error due to canceled context: %v", err)
				}
				// Stream has finished.
				break
			}

			if !firstLineReceived {
				firstLineReceived = true
				// The first log line will come from the "Block" command, so we are sure
				// now that vtworker is actually executing it.
				close(blockCommandStarted)
			}
		}
		wg.Done()
	}()

	// Try to run a second, concurrent vtworker command.
	// vtworker should send an error back that it's busy and we should retry later.
	<-blockCommandStarted
	gotErr := runVtworkerCommand(client, []string{"Ping", "Are you busy?"})
	wantCode := vtrpcpb.ErrorCode_TRANSIENT_ERROR
	if gotCode := vterrors.RecoverVtErrorCode(gotErr); gotCode != wantCode {
		t.Fatalf("wrong error code for second cmd: got = %v, want = %v, err: %v", gotCode, wantCode, gotErr)
	}

	// Cancel running "Block" command.
	if serverSideCancelation {
		if err := runVtworkerCommand(client, []string{"Cancel"}); err != nil {
			t.Fatal(err)
		}
	}
	// Always cancel the context to not leak it (regardless of client or server
	// side cancelation).
	cancel()

	wg.Wait()
	if errorCodeCheck != nil {
		t.Fatalf("Block command did not return the CANCELED error code: %v", errorCodeCheck)
	}

	// vtworker is now in a special state where the current job is already
	// canceled but not reset yet. New commands are still failing with a
	// retryable error.
	gotErr2 := runVtworkerCommand(client, []string{"Ping", "canceled and still busy?"})
	wantCode2 := vtrpcpb.ErrorCode_TRANSIENT_ERROR
	if gotCode2 := vterrors.RecoverVtErrorCode(gotErr2); gotCode2 != wantCode2 {
		t.Fatalf("wrong error code for second cmd before reset: got = %v, want = %v, err: %v", gotCode2, wantCode2, gotErr2)
	}

	// Reset vtworker for the next test function.
	if err := resetVtworker(t, client); err != nil {
		t.Fatal(err)
	}

	// Second vtworker command should succeed now after the first has finished.
	if err := runVtworkerCommand(client, []string{"Ping", "You should not be busy anymore!"}); err != nil {
		t.Fatalf("second cmd should not have failed: %v", err)
	}

	// Reset vtworker for the next test function.
	if err := runVtworkerCommand(client, []string{"Reset"}); err != nil {
		t.Fatal(err)
	}
}