Example #1
0
func handleError(err *error, logStats *SQLQueryStats, queryServiceStats *QueryServiceStats) {
	if x := recover(); x != nil {
		terr, ok := x.(*TabletError)
		if !ok {
			log.Errorf("Uncaught panic:\n%v\n%s", x, tb.Stack(4))
			*err = NewTabletError(ErrFail, "%v: uncaught panic", x)
			queryServiceStats.InternalErrors.Add("Panic", 1)
			return
		}
		*err = terr
		terr.RecordStats(queryServiceStats)
		if terr.ErrorType == ErrRetry { // Retry errors are too spammy
			return
		}
		if terr.ErrorType == ErrTxPoolFull {
			logTxPoolFull.Errorf("%v", terr)
		} else {
			log.Errorf("%v", terr)
		}
	}
	if logStats != nil {
		logStats.Error = *err
		logStats.Send()
	}
}
Example #2
0
// HandlePanic should be called using 'defer' in the RPC code that executes the command.
func HandlePanic(component string, err *error) {
	if x := recover(); x != nil {
		// gRPC 0.13 chokes when you return a streaming error that contains newlines.
		*err = fmt.Errorf("uncaught %v panic: %v, %s", component, x,
			strings.Replace(string(tb.Stack(4)), "\n", ";", -1))
	}
}
Example #3
0
func (rci *RowcacheInvalidator) run() {
	for {
		// We wrap this code in a func so we can catch all panics.
		// If an error is returned, we log it, wait 1 second, and retry.
		// This loop can only be stopped by calling Close.
		err := func() (inner error) {
			defer func() {
				if x := recover(); x != nil {
					inner = fmt.Errorf("%v: uncaught panic:\n%s", x, tb.Stack(4))
				}
			}()
			rp, err := rci.mysqld.BinlogInfo(rci.GroupId.Get())
			if err != nil {
				return err
			}
			return rci.evs.Stream(rp.MasterLogFile, int64(rp.MasterLogPosition), func(reply *blproto.StreamEvent) error {
				rci.processEvent(reply)
				return nil
			})
		}()
		if err == nil {
			break
		}
		log.Errorf("binlog.ServeUpdateStream returned err '%v', retrying in 1 second.", err.Error())
		internalErrors.Add("Invalidation", 1)
		time.Sleep(1 * time.Second)
	}
	log.Infof("Rowcache invalidator stopped")
}
Example #4
0
func handlePanic(err *error) {
	if x := recover(); x != nil {
		log.Errorf("Uncaught panic:\n%v\n%s", x, tb.Stack(4))
		*err = fmt.Errorf("uncaught panic: %v", x)
		internalErrors.Add("Panic", 1)
	}
}
Example #5
0
// handleExecError handles panics during query execution and sets
// the supplied error return value.
func handleExecError(query *proto.Query, err *error, logStats *SQLQueryStats) {
	if logStats != nil {
		logStats.Send()
	}
	if x := recover(); x != nil {
		terr, ok := x.(*TabletError)
		if !ok {
			log.Errorf("Uncaught panic for %v:\n%v\n%s", query, x, tb.Stack(4))
			*err = NewTabletError(FAIL, "%v: uncaught panic for %v", x, query)
			internalErrors.Add("Panic", 1)
			return
		}
		*err = terr
		terr.RecordStats()
		// suppress these errors in logs
		if terr.ErrorType == RETRY || terr.ErrorType == TX_POOL_FULL || terr.SqlError == mysql.DUP_ENTRY {
			return
		}
		if terr.ErrorType == FATAL {
			log.Errorf("%v: %v", terr, query)
		} else {
			log.Warningf("%v: %v", terr, query)
		}
	}
}
Example #6
0
// handleExecError handles panics during query execution and sets
// the supplied error return value.
func (sq *SqlQuery) handleExecError(query *proto.Query, err *error, logStats *SQLQueryStats) {
	if x := recover(); x != nil {
		terr, ok := x.(*TabletError)
		if !ok {
			log.Errorf("Uncaught panic for %v:\n%v\n%s", query, x, tb.Stack(4))
			*err = NewTabletError(ErrFail, "%v: uncaught panic for %v", x, query)
			sq.qe.queryServiceStats.InternalErrors.Add("Panic", 1)
			return
		}
		if sq.config.TerseErrors && terr.SqlError != 0 {
			*err = fmt.Errorf("%s(errno %d) during query: %s", terr.Prefix(), terr.SqlError, query.Sql)
		} else {
			*err = terr
		}
		terr.RecordStats(sq.qe.queryServiceStats)
		// suppress these errors in logs
		if terr.ErrorType == ErrRetry || terr.ErrorType == ErrTxPoolFull || terr.SqlError == mysql.ErrDupEntry {
			return
		}
		if terr.ErrorType == ErrFatal {
			log.Errorf("%v: %v", terr, query)
		} else {
			log.Warningf("%v: %v", terr, query)
		}
	}
	if logStats != nil {
		logStats.Error = *err
		logStats.Send()
	}
}
Example #7
0
// HandlePanic recovers from panics, and logs / increment counters
func (vtg *VTGate) HandlePanic(err *error) {
	if x := recover(); x != nil {
		log.Errorf("Uncaught panic:\n%v\n%s", x, tb.Stack(4))
		*err = fmt.Errorf("uncaught panic: %v, vtgate: %v", x, servenv.ListeningURL.String())
		internalErrors.Add("Panic", 1)
	}
}
Example #8
0
func handleError(err *error, logStats *SQLQueryStats, queryServiceStats *QueryServiceStats) {
	if x := recover(); x != nil {
		terr, ok := x.(*TabletError)
		if !ok {
			log.Errorf("Uncaught panic:\n%v\n%s", x, tb.Stack(4))
			*err = NewTabletError(ErrFail, vtrpc.ErrorCode_UNKNOWN_ERROR, "%v: uncaught panic", x)
			queryServiceStats.InternalErrors.Add("Panic", 1)
			return
		}
		*err = terr
		terr.RecordStats(queryServiceStats)
		switch terr.ErrorType {
		case ErrRetry: // Retry errors are too spammy
			return
		case ErrTxPoolFull:
			logTxPoolFull.Errorf("%v", terr)
		default:
			switch terr.SqlError {
			// MySQL deadlock errors are (usually) due to client behavior, not server
			// behavior, and therefore logged at the INFO level.
			case mysql.ErrLockWaitTimeout, mysql.ErrLockDeadlock:
				log.Infof("%v", terr)
			default:
				log.Errorf("%v", terr)
			}
		}
	}
	if logStats != nil {
		logStats.Error = *err
		logStats.Send()
	}
}
Example #9
0
func (rci *RowcacheInvalidator) run(ctx *sync2.ServiceContext) error {
	for {
		evs := binlog.NewEventStreamer(rci.dbname, rci.mysqld, rci.Position(), rci.processEvent)
		// We wrap this code in a func so we can catch all panics.
		// If an error is returned, we log it, wait 1 second, and retry.
		// This loop can only be stopped by calling Close.
		err := func() (inner error) {
			defer func() {
				if x := recover(); x != nil {
					inner = fmt.Errorf("%v: uncaught panic:\n%s", x, tb.Stack(4))
				}
			}()
			return evs.Stream(ctx)
		}()
		if err == nil || !ctx.IsRunning() {
			break
		}
		if IsConnErr(err) {
			rci.checker.CheckMySQL()
		}
		log.Errorf("binlog.ServeUpdateStream returned err '%v', retrying in 1 second.", err.Error())
		rci.qe.queryServiceStats.InternalErrors.Add("Invalidation", 1)
		time.Sleep(1 * time.Second)
	}
	log.Infof("Rowcache invalidator stopped")
	return nil
}
Example #10
0
func logError() {
	if x := recover(); x != nil {
		terr, ok := x.(*TabletError)
		if !ok {
			log.Errorf("Uncaught panic:\n%v\n%s", x, tb.Stack(4))
			internalErrors.Add("Panic", 1)
			return
		}
		log.Errorf("%v", terr)
	}
}
Example #11
0
func handleInvalidationError(event *blproto.StreamEvent) {
	if x := recover(); x != nil {
		terr, ok := x.(*TabletError)
		if !ok {
			log.Errorf("Uncaught panic for %+v:\n%v\n%s", event, x, tb.Stack(4))
			internalErrors.Add("Panic", 1)
			return
		}
		log.Errorf("%v: %+v", terr, event)
		internalErrors.Add("Invalidation", 1)
	}
}
Example #12
0
func handleInvalidationError(request interface{}) {
	if x := recover(); x != nil {
		terr, ok := x.(*TabletError)
		if !ok {
			log.Errorf("Uncaught panic for %v:\n%v\n%s", request, x, tb.Stack(4))
			internalErrors.Add("Panic", 1)
			return
		}
		log.Errorf("%s: %v", terr.Message, request)
		internalErrors.Add("Invalidation", 1)
	}
}
Example #13
0
func (rci *RowcacheInvalidator) handleInvalidationError(event *binlogdatapb.StreamEvent) {
	if x := recover(); x != nil {
		terr, ok := x.(*TabletError)
		if !ok {
			log.Errorf("Uncaught panic for %+v:\n%v\n%s", event, x, tb.Stack(4))
			rci.qe.queryServiceStats.InternalErrors.Add("Panic", 1)
			return
		}
		log.Errorf("%v: %+v", terr, event)
		rci.qe.queryServiceStats.InternalErrors.Add("Invalidation", 1)
	}
}
Example #14
0
func (tsv *TabletServer) handleExecErrorNoPanic(query *proto.Query, err interface{}, logStats *LogStats) error {
	var terr *TabletError
	defer func() {
		if logStats != nil {
			logStats.Error = terr
		}
	}()
	terr, ok := err.(*TabletError)
	if !ok {
		log.Errorf("Uncaught panic for %v:\n%v\n%s", query, err, tb.Stack(4))
		tsv.qe.queryServiceStats.InternalErrors.Add("Panic", 1)
		terr = NewTabletError(ErrFail, vtrpc.ErrorCode_UNKNOWN_ERROR, "%v: uncaught panic for %v", err, query)
		return terr
	}
	var myError error
	if tsv.config.TerseErrors && terr.SQLError != 0 && len(query.BindVariables) != 0 {
		myError = &TabletError{
			ErrorType: terr.ErrorType,
			SQLError:  terr.SQLError,
			ErrorCode: terr.ErrorCode,
			Message:   fmt.Sprintf("(errno %d) during query: %s", terr.SQLError, query.Sql),
		}
	} else {
		myError = terr
	}
	terr.RecordStats(tsv.qe.queryServiceStats)

	logMethod := log.Warningf
	// Suppress or demote some errors in logs
	switch terr.ErrorType {
	case ErrRetry, ErrTxPoolFull:
		return myError
	case ErrFatal:
		logMethod = log.Errorf
	}
	// We want to suppress/demote some MySQL error codes (regardless of the ErrorType)
	switch terr.SQLError {
	case mysql.ErrDupEntry:
		return myError
	case mysql.ErrLockWaitTimeout, mysql.ErrLockDeadlock, mysql.ErrDataTooLong,
		mysql.ErrDataOutOfRange, mysql.ErrBadNullError:
		logMethod = log.Infof
	case 0:
		if strings.Contains(terr.Error(), "Row count exceeded") {
			logMethod = log.Infof
		}
	}
	logMethod("%v: %v", terr, query)
	return myError
}
Example #15
0
func logError(queryServiceStats *QueryServiceStats) {
	if x := recover(); x != nil {
		terr, ok := x.(*TabletError)
		if !ok {
			log.Errorf("Uncaught panic:\n%v\n%s", x, tb.Stack(4))
			queryServiceStats.InternalErrors.Add("Panic", 1)
			return
		}
		if terr.ErrorCode == vtrpcpb.ErrorCode_RESOURCE_EXHAUSTED {
			logTxPoolFull.Errorf("%v", terr)
		} else {
			log.Errorf("%v", terr)
		}
	}
}
Example #16
0
func handleExecError(query *proto.Query, err *error, logStats *sqlQueryStats) {
	if logStats != nil {
		logStats.Send()
	}
	if x := recover(); x != nil {
		terr, ok := x.(*TabletError)
		if !ok {
			log.Errorf("Uncaught panic for %v:\n%v\n%s", query, x, tb.Stack(4))
			*err = NewTabletError(FAIL, "%v: uncaught panic for %v", x, query)
			errorStats.Add("Panic", 1)
			return
		}
		*err = terr
		terr.RecordStats()
		if terr.ErrorType == RETRY || terr.SqlError == DUPLICATE_KEY { // suppress these errors in logs
			return
		}
		log.Errorf("%s: %v", terr.Message, query)
	}
}
Example #17
0
func handleError(err *error, logStats *sqlQueryStats) {
	if logStats != nil {
		logStats.Send()
	}
	if x := recover(); x != nil {
		terr, ok := x.(*TabletError)
		if !ok {
			log.Errorf("Uncaught panic:\n%v\n%s", x, tb.Stack(4))
			*err = NewTabletError(FAIL, "%v: uncaught panic", x)
			internalErrors.Add("Panic", 1)
			return
		}
		*err = terr
		terr.RecordStats()
		if terr.ErrorType == RETRY { // Retry errors are too spammy
			return
		}
		log.Errorf("%s", terr.Message)
	}
}
Example #18
0
func handleError(err *error, logStats *LogStats, queryServiceStats *QueryServiceStats) {
	var terr *TabletError
	defer func() {
		if logStats != nil {
			logStats.Error = terr
			logStats.Send()
		}
	}()
	if x := recover(); x != nil {
		terr, ok := x.(*TabletError)
		if !ok {
			log.Errorf("Uncaught panic:\n%v\n%s", x, tb.Stack(4))
			terr = NewTabletError(vtrpcpb.ErrorCode_UNKNOWN_ERROR, "%v: uncaught panic", x)
			*err = terr
			queryServiceStats.InternalErrors.Add("Panic", 1)
			return
		}
		*err = terr
		terr.RecordStats(queryServiceStats)
		switch terr.ErrorCode {
		case vtrpcpb.ErrorCode_QUERY_NOT_SERVED:
			// Retry errors are too spammy
			return
		case vtrpcpb.ErrorCode_RESOURCE_EXHAUSTED:
			logTxPoolFull.Errorf("%v", terr)
		default:
			switch terr.SQLError {
			// MySQL deadlock errors are (usually) due to client behavior, not server
			// behavior, and therefore logged at the INFO level.
			case mysql.ErrLockWaitTimeout, mysql.ErrLockDeadlock, mysql.ErrDataTooLong,
				mysql.ErrDataOutOfRange, mysql.ErrBadNullError:
				log.Infof("%v", terr)
			default:
				log.Errorf("%v", terr)
			}
		}
	}
}
Example #19
0
func (sq *SqlQuery) handleExecErrorNoPanic(query *proto.Query, err interface{}, logStats *SQLQueryStats) error {
	terr, ok := err.(*TabletError)
	if !ok {
		log.Errorf("Uncaught panic for %v:\n%v\n%s", query, err, tb.Stack(4))
		sq.qe.queryServiceStats.InternalErrors.Add("Panic", 1)
		return NewTabletError(ErrFail, "%v: uncaught panic for %v", err, query)
	}
	var myError error
	if sq.config.TerseErrors && terr.SqlError != 0 && len(query.BindVariables) != 0 {
		myError = fmt.Errorf("%s(errno %d) during query: %s", terr.Prefix(), terr.SqlError, query.Sql)
	} else {
		myError = terr
	}
	terr.RecordStats(sq.qe.queryServiceStats)

	logMethod := log.Warningf
	// Suppress or demote some errors in logs
	switch terr.ErrorType {
	case ErrRetry, ErrTxPoolFull:
		return myError
	case ErrFatal:
		logMethod = log.Errorf
	}
	// We want to suppress/demote some MySQL error codes (regardless of the ErrorType)
	switch terr.SqlError {
	case mysql.ErrDupEntry:
		return myError
	case mysql.ErrLockWaitTimeout, mysql.ErrLockDeadlock, mysql.ErrDataTooLong:
		logMethod = log.Infof
	case 0:
		if strings.Contains(terr.Error(), "Row count exceeded") {
			logMethod = log.Infof
		}
	}
	logMethod("%v: %v", terr, query)
	return myError
}
Example #20
0
func (sq *SqlQuery) handleExecErrorNoPanic(query *proto.Query, err interface{}, logStats *SQLQueryStats) error {
	terr, ok := err.(*TabletError)
	if !ok {
		log.Errorf("Uncaught panic for %v:\n%v\n%s", query, err, tb.Stack(4))
		sq.qe.queryServiceStats.InternalErrors.Add("Panic", 1)
		return NewTabletError(ErrFail, "%v: uncaught panic for %v", err, query)
	}
	var myError error
	if sq.config.TerseErrors && terr.SqlError != 0 {
		myError = fmt.Errorf("%s(errno %d) during query: %s", terr.Prefix(), terr.SqlError, query.Sql)
	} else {
		myError = terr
	}
	terr.RecordStats(sq.qe.queryServiceStats)
	// suppress these errors in logs
	if terr.ErrorType == ErrRetry || terr.ErrorType == ErrTxPoolFull || terr.SqlError == mysql.ErrDupEntry {
		return myError
	}
	if terr.ErrorType == ErrFatal {
		log.Errorf("%v: %v", terr, query)
	}
	log.Warningf("%v: %v", terr, query)
	return myError
}
Example #21
0
// Iteration is a single iteration for the player: get the current status,
// try to play, and plays until interrupted, or until an error occurs.
func (bpc *BinlogPlayerController) Iteration() (err error) {
	defer func() {
		if x := recover(); x != nil {
			log.Errorf("%v: caught panic: %v\n%s", bpc, x, tb.Stack(4))
			err = fmt.Errorf("panic: %v", x)
		}
	}()

	// Apply any special settings necessary for playback of binlogs.
	// We do it on every iteration to be sure, in case MySQL was restarted.
	if err := bpc.mysqld.EnableBinlogPlayback(); err != nil {
		// We failed to apply the required settings, so we shouldn't keep going.
		return err
	}

	// create the db connection, connect it
	vtClient := bpc.vtClientFactory()
	if err := vtClient.Connect(); err != nil {
		return fmt.Errorf("can't connect to database: %v", err)
	}
	defer vtClient.Close()

	// Read the start position
	startPosition, flags, err := binlogplayer.ReadStartPosition(vtClient, bpc.sourceShard.Uid)
	if err != nil {
		return fmt.Errorf("can't read startPosition: %v", err)
	}

	// if we shouldn't start, we just error out and try again later
	if strings.Index(flags, binlogplayer.BlpFlagDontStart) != -1 {
		return fmt.Errorf("not starting because flag '%v' is set", binlogplayer.BlpFlagDontStart)
	}

	// wait for the endpoint set (usefull for the first run at least, fast for next runs)
	if err := discovery.WaitForEndPoints(bpc.healthCheck, bpc.cell, bpc.sourceShard.Keyspace, bpc.sourceShard.Shard, []topodatapb.TabletType{topodatapb.TabletType_REPLICA}); err != nil {
		return fmt.Errorf("error waiting for endpoints for %v %v %v: %v", bpc.cell, bpc.sourceShard.String(), topodatapb.TabletType_REPLICA, err)
	}

	// Find the server list from the health check
	addrs := bpc.healthCheck.GetEndPointStatsFromTarget(bpc.sourceShard.Keyspace, bpc.sourceShard.Shard, topodatapb.TabletType_REPLICA)
	if len(addrs) == 0 {
		return fmt.Errorf("can't find any source tablet for %v %v %v", bpc.cell, bpc.sourceShard.String(), topodatapb.TabletType_REPLICA)
	}
	newServerIndex := rand.Intn(len(addrs))
	endPoint := addrs[newServerIndex].EndPoint

	// save our current server
	bpc.playerMutex.Lock()
	bpc.sourceTablet = &topodatapb.TabletAlias{
		Cell: bpc.cell,
		Uid:  endPoint.Uid,
	}
	bpc.lastError = nil
	bpc.playerMutex.Unlock()

	// check which kind of replication we're doing, tables or keyrange
	if len(bpc.sourceShard.Tables) > 0 {
		// tables, first resolve wildcards
		tables, err := mysqlctl.ResolveTables(bpc.mysqld, bpc.dbName, bpc.sourceShard.Tables)
		if err != nil {
			return fmt.Errorf("failed to resolve table names: %v", err)
		}

		// tables, just get them
		player, err := binlogplayer.NewBinlogPlayerTables(vtClient, endPoint, tables, bpc.sourceShard.Uid, startPosition, bpc.stopPosition, bpc.binlogPlayerStats)
		if err != nil {
			return fmt.Errorf("NewBinlogPlayerTables failed: %v", err)
		}
		return player.ApplyBinlogEvents(bpc.ctx)
	}
	// the data we have to replicate is the intersection of the
	// source keyrange and our keyrange
	overlap, err := key.KeyRangesOverlap(bpc.sourceShard.KeyRange, bpc.keyRange)
	if err != nil {
		return fmt.Errorf("Source shard %v doesn't overlap destination shard %v", bpc.sourceShard.KeyRange, bpc.keyRange)
	}

	player, err := binlogplayer.NewBinlogPlayerKeyRange(vtClient, endPoint, overlap, bpc.sourceShard.Uid, startPosition, bpc.stopPosition, bpc.binlogPlayerStats)
	if err != nil {
		return fmt.Errorf("NewBinlogPlayerKeyRange failed: %v", err)
	}
	return player.ApplyBinlogEvents(bpc.ctx)
}
Example #22
0
// setAndStartWorker will set the current worker.
// We always log to both memory logger (for display on the web) and
// console logger (for records / display of command line worker).
func (wi *Instance) setAndStartWorker(wrk Worker, wr *wrangler.Wrangler) (chan struct{}, error) {
	wi.currentWorkerMutex.Lock()
	defer wi.currentWorkerMutex.Unlock()

	if wi.currentContext != nil {
		return nil, vterrors.FromError(vtrpcpb.ErrorCode_TRANSIENT_ERROR,
			fmt.Errorf("A worker job is already in progress: %v", wi.currentWorker))
	}

	if wi.currentWorker != nil {
		// During the grace period, we answer with a retryable error.
		const gracePeriod = 1 * time.Minute
		gracePeriodEnd := time.Now().Add(gracePeriod)
		if wi.lastRunStopTime.Before(gracePeriodEnd) {
			return nil, vterrors.FromError(vtrpcpb.ErrorCode_TRANSIENT_ERROR,
				fmt.Errorf("A worker job was recently stopped (%f seconds ago): %v",
					time.Now().Sub(wi.lastRunStopTime).Seconds(),
					wi.currentWorker))
		}

		// QUERY_NOT_SERVED = FailedPrecondition => manual resolution required.
		return nil, vterrors.FromError(vtrpcpb.ErrorCode_QUERY_NOT_SERVED,
			fmt.Errorf("The worker job was stopped %.1f minutes ago, but not reset. You have to reset it manually. Job: %v",
				time.Now().Sub(wi.lastRunStopTime).Minutes(),
				wi.currentWorker))
	}

	wi.currentWorker = wrk
	wi.currentMemoryLogger = logutil.NewMemoryLogger()
	wi.currentContext, wi.currentCancelFunc = context.WithCancel(wi.backgroundContext)
	wi.lastRunError = nil
	wi.lastRunStopTime = time.Unix(0, 0)
	done := make(chan struct{})
	wranglerLogger := wr.Logger()
	if wr == wi.wr {
		// If it's the default wrangler, do not reuse its logger because it may have been set before.
		// Resuing it would result into an endless recursion.
		wranglerLogger = logutil.NewConsoleLogger()
	}
	wr.SetLogger(logutil.NewTeeLogger(wi.currentMemoryLogger, wranglerLogger))

	// one go function runs the worker, changes state when done
	go func() {
		log.Infof("Starting worker...")
		var err error

		// Catch all panics and always save the execution state at the end.
		defer func() {
			// The recovery code is a copy of servenv.HandlePanic().
			if x := recover(); x != nil {
				log.Errorf("uncaught vtworker panic: %v\n%s", x, tb.Stack(4))
				err = fmt.Errorf("uncaught vtworker panic: %v", x)
			}

			wi.currentWorkerMutex.Lock()
			wi.currentContext = nil
			wi.currentCancelFunc = nil
			wi.lastRunError = err
			wi.lastRunStopTime = time.Now()
			wi.currentWorkerMutex.Unlock()
			close(done)
		}()

		// run will take a long time
		err = wrk.Run(wi.currentContext)
	}()

	return done, nil
}
Example #23
0
// HandlePanic is part of the UpdateStream interface
func (updateStream *UpdateStreamImpl) HandlePanic(err *error) {
	if x := recover(); x != nil {
		log.Errorf("Uncaught panic:\n%v\n%s", x, tb.Stack(4))
		*err = fmt.Errorf("uncaught panic: %v", x)
	}
}
Example #24
0
func logError() {
	if x := recover(); x != nil {
		log.Errorf("%s at\n%s", x.(error).Error(), tb.Stack(4))
	}
}
Example #25
0
func (c *errorClient) HandlePanic(err *error) {
	if x := recover(); x != nil {
		log.Errorf("Uncaught panic:\n%v\n%s", x, tb.Stack(4))
		*err = fmt.Errorf("uncaught panic: %v", x)
	}
}
Example #26
0
func (tsv *TabletServer) handleExecErrorNoPanic(sql string, bindVariables map[string]interface{}, err interface{}, logStats *LogStats) error {
	var terr *TabletError
	defer func() {
		if logStats != nil {
			logStats.Error = terr
		}
	}()
	terr, ok := err.(*TabletError)
	if !ok {
		log.Errorf("Uncaught panic for %v:\n%v\n%s", querytypes.QueryAsString(sql, bindVariables), err, tb.Stack(4))
		tsv.qe.queryServiceStats.InternalErrors.Add("Panic", 1)
		terr = NewTabletError(vtrpcpb.ErrorCode_UNKNOWN_ERROR, "%v: uncaught panic for %v", err, querytypes.QueryAsString(sql, bindVariables))
		return terr
	}
	var myError error
	if tsv.config.TerseErrors && terr.SQLError != 0 && len(bindVariables) != 0 {
		myError = &TabletError{
			SQLError:  terr.SQLError,
			SQLState:  terr.SQLState,
			ErrorCode: terr.ErrorCode,
			Message:   fmt.Sprintf("(errno %d) (sqlstate %s) during query: %s", terr.SQLError, terr.SQLState, sql),
		}
	} else {
		myError = terr
	}
	terr.RecordStats(tsv.qe.queryServiceStats)

	logMethod := log.Warningf
	// Suppress or demote some errors in logs
	switch terr.ErrorCode {
	case vtrpcpb.ErrorCode_QUERY_NOT_SERVED, vtrpcpb.ErrorCode_RESOURCE_EXHAUSTED:
		return myError
	case vtrpcpb.ErrorCode_INTERNAL_ERROR:
		logMethod = log.Errorf
	case vtrpcpb.ErrorCode_NOT_IN_TX:
		// keep as warning
	default:
		// default is when we think the query itself is
		// problematic. This doesn't indicate a system or
		// component wide degradation, so we log to INFO.
		logMethod = log.Infof
	}
	// We want to suppress/demote some MySQL error codes
	// (regardless of the ErrorType)
	switch terr.SQLError {
	case mysql.ErrDupEntry:
		return myError
	case mysql.ErrLockWaitTimeout, mysql.ErrLockDeadlock, mysql.ErrDataTooLong,
		mysql.ErrDataOutOfRange, mysql.ErrBadNullError:
		logMethod = log.Infof
	case 0:
		if strings.Contains(terr.Error(), "Row count exceeded") {
			logMethod = log.Infof
		}
	}
	logMethod("%v: %v", terr, querytypes.QueryAsString(sql, bindVariables))
	return myError
}
Example #27
0
// rpcWrapper handles all the logic for rpc calls.
func (agent *ActionAgent) rpcWrapper(ctx context.Context, name string, args, reply interface{}, verbose bool, f func() error, lock, runAfterAction bool) (err error) {
	defer func() {
		if x := recover(); x != nil {
			log.Errorf("TabletManager.%v(%v) on %v panic: %v\n%s", name, args, topo.TabletAliasString(agent.TabletAlias), x, tb.Stack(4))
			err = fmt.Errorf("caught panic during %v: %v", name, x)
		}
	}()

	from := ""
	ci, ok := callinfo.FromContext(ctx)
	if ok {
		from = ci.Text()
	}

	if lock {
		beforeLock := time.Now()
		agent.actionMutex.Lock()
		defer agent.actionMutex.Unlock()
		if time.Now().Sub(beforeLock) > rpcTimeout {
			return fmt.Errorf("server timeout for " + name)
		}
	}

	if err = f(); err != nil {
		log.Warningf("TabletManager.%v(%v)(on %v from %v) error: %v", name, args, topo.TabletAliasString(agent.TabletAlias), from, err.Error())
		return fmt.Errorf("TabletManager.%v on %v error: %v", name, topo.TabletAliasString(agent.TabletAlias), err)
	}
	if verbose {
		log.Infof("TabletManager.%v(%v)(on %v from %v): %#v", name, args, topo.TabletAliasString(agent.TabletAlias), from, reply)
	}
	if runAfterAction {
		err = agent.refreshTablet(ctx, "RPC("+name+")")
	}
	return
}
Example #28
0
// Iteration is a single iteration for the player: get the current status,
// try to play, and plays until interrupted, or until an error occurs.
func (bpc *BinlogPlayerController) Iteration() (err error) {
	defer func() {
		if x := recover(); x != nil {
			log.Errorf("%v: caught panic: %v\n%s", bpc, x, tb.Stack(4))
			err = fmt.Errorf("panic: %v", x)
		}
	}()

	// Apply any special settings necessary for playback of binlogs.
	// We do it on every iteration to be sure, in case MySQL was restarted.
	if err := bpc.mysqld.EnableBinlogPlayback(); err != nil {
		// We failed to apply the required settings, so we shouldn't keep going.
		return err
	}

	// create the db connection, connect it
	vtClient := bpc.vtClientFactory()
	if err := vtClient.Connect(); err != nil {
		return fmt.Errorf("can't connect to database: %v", err)
	}
	defer vtClient.Close()

	// Read the start position
	startPosition, flags, err := binlogplayer.ReadStartPosition(vtClient, bpc.sourceShard.Uid)
	if err != nil {
		return fmt.Errorf("can't read startPosition: %v", err)
	}

	// if we shouldn't start, we just error out and try again later
	if strings.Index(flags, binlogplayer.BlpFlagDontStart) != -1 {
		return fmt.Errorf("not starting because flag '%v' is set", binlogplayer.BlpFlagDontStart)
	}

	// Find the server list for the source shard in our cell
	addrs, _, err := bpc.ts.GetEndPoints(bpc.ctx, bpc.cell, bpc.sourceShard.Keyspace, bpc.sourceShard.Shard, pb.TabletType_REPLICA)
	if err != nil {
		// If this calls fails because the context was canceled,
		// we need to return nil.
		select {
		case <-bpc.ctx.Done():
			if bpc.ctx.Err() == context.Canceled {
				return nil
			}
		default:
		}
		return fmt.Errorf("can't find any source tablet for %v %v %v: %v", bpc.cell, bpc.sourceShard.String(), topo.TYPE_REPLICA, err)
	}
	if len(addrs.Entries) == 0 {
		return fmt.Errorf("empty source tablet list for %v %v %v", bpc.cell, bpc.sourceShard.String(), topo.TYPE_REPLICA)
	}
	newServerIndex := rand.Intn(len(addrs.Entries))
	endPoint := addrs.Entries[newServerIndex]

	// save our current server
	bpc.playerMutex.Lock()
	bpc.sourceTablet = &pb.TabletAlias{
		Cell: bpc.cell,
		Uid:  addrs.Entries[newServerIndex].Uid,
	}
	bpc.lastError = nil
	bpc.playerMutex.Unlock()

	// check which kind of replication we're doing, tables or keyrange
	if len(bpc.sourceShard.Tables) > 0 {
		// tables, first resolve wildcards
		tables, err := mysqlctl.ResolveTables(bpc.mysqld, bpc.dbName, bpc.sourceShard.Tables)
		if err != nil {
			return fmt.Errorf("failed to resolve table names: %v", err)
		}

		// tables, just get them
		player := binlogplayer.NewBinlogPlayerTables(vtClient, endPoint, tables, startPosition, bpc.stopPosition, bpc.binlogPlayerStats)
		return player.ApplyBinlogEvents(bpc.ctx)
	}
	// the data we have to replicate is the intersection of the
	// source keyrange and our keyrange
	overlap, err := key.KeyRangesOverlap(bpc.sourceShard.KeyRange, bpc.keyRange)
	if err != nil {
		return fmt.Errorf("Source shard %v doesn't overlap destination shard %v", bpc.sourceShard.KeyRange, bpc.keyRange)
	}

	player := binlogplayer.NewBinlogPlayerKeyRange(vtClient, endPoint, bpc.keyspaceIDType, overlap, startPosition, bpc.stopPosition, bpc.binlogPlayerStats)
	return player.ApplyBinlogEvents(bpc.ctx)
}
Example #29
0
// HandlePanic should be called using 'defer' in the RPC code that executes the command.
func HandlePanic(component string, err *error) {
	if x := recover(); x != nil {
		*err = fmt.Errorf("uncaught %v panic: %v\n%s", component, x, tb.Stack(4))
	}
}
Example #30
0
// Iteration is a single iteration for the player: get the current status,
// try to play, and plays until interrupted, or until an error occurs.
func (bpc *BinlogPlayerController) Iteration() (err error) {
	defer func() {
		if x := recover(); x != nil {
			log.Errorf("%v: caught panic: %v\n%s", bpc, x, tb.Stack(4))
			err = fmt.Errorf("panic: %v", x)
		}
	}()

	// Check if the context is still good.
	select {
	case <-bpc.ctx.Done():
		if bpc.ctx.Err() == context.Canceled {
			// We were stopped. Break out of Loop().
			return nil
		}
		return fmt.Errorf("giving up since the context is done: %v", bpc.ctx.Err())
	default:
	}

	// Apply any special settings necessary for playback of binlogs.
	// We do it on every iteration to be sure, in case MySQL was restarted.
	if err := bpc.mysqld.EnableBinlogPlayback(); err != nil {
		// We failed to apply the required settings, so we shouldn't keep going.
		return err
	}

	// create the db connection, connect it
	vtClient := bpc.vtClientFactory()
	if err := vtClient.Connect(); err != nil {
		return fmt.Errorf("can't connect to database: %v", err)
	}
	defer vtClient.Close()

	// Read the start position
	startPosition, flags, err := binlogplayer.ReadStartPosition(vtClient, bpc.sourceShard.Uid)
	if err != nil {
		return fmt.Errorf("can't read startPosition: %v", err)
	}

	// if we shouldn't start, we just error out and try again later
	if strings.Index(flags, binlogplayer.BlpFlagDontStart) != -1 {
		return fmt.Errorf("not starting because flag '%v' is set", binlogplayer.BlpFlagDontStart)
	}

	// wait for the tablet set (usefull for the first run at least, fast for next runs)
	if err := bpc.tabletStatsCache.WaitForTablets(bpc.ctx, bpc.cell, bpc.sourceShard.Keyspace, bpc.sourceShard.Shard, []topodatapb.TabletType{topodatapb.TabletType_REPLICA}); err != nil {
		return fmt.Errorf("error waiting for tablets for %v %v %v: %v", bpc.cell, bpc.sourceShard.String(), topodatapb.TabletType_REPLICA, err)
	}

	// Find the server list from the health check.
	// Note: We cannot use tsc.GetHealthyTabletStats() here because it does
	// not return non-serving tablets. We must include non-serving tablets because
	// REPLICA source tablets may not be serving anymore because their traffic was
	// already migrated to the destination shards.
	addrs := discovery.RemoveUnhealthyTablets(bpc.tabletStatsCache.GetTabletStats(bpc.sourceShard.Keyspace, bpc.sourceShard.Shard, topodatapb.TabletType_REPLICA))
	if len(addrs) == 0 {
		return fmt.Errorf("can't find any healthy source tablet for %v %v %v", bpc.cell, bpc.sourceShard.String(), topodatapb.TabletType_REPLICA)
	}
	newServerIndex := rand.Intn(len(addrs))
	tablet := addrs[newServerIndex].Tablet

	// save our current server
	bpc.playerMutex.Lock()
	bpc.sourceTablet = tablet.Alias
	bpc.lastError = nil
	bpc.playerMutex.Unlock()

	// check which kind of replication we're doing, tables or keyrange
	if len(bpc.sourceShard.Tables) > 0 {
		// tables, first resolve wildcards
		tables, err := mysqlctl.ResolveTables(bpc.mysqld, bpc.dbName, bpc.sourceShard.Tables)
		if err != nil {
			return fmt.Errorf("failed to resolve table names: %v", err)
		}

		// tables, just get them
		player, err := binlogplayer.NewBinlogPlayerTables(vtClient, tablet, tables, bpc.sourceShard.Uid, startPosition, bpc.stopPosition, bpc.binlogPlayerStats)
		if err != nil {
			return fmt.Errorf("NewBinlogPlayerTables failed: %v", err)
		}
		return player.ApplyBinlogEvents(bpc.ctx)
	}
	// the data we have to replicate is the intersection of the
	// source keyrange and our keyrange
	overlap, err := key.KeyRangesOverlap(bpc.sourceShard.KeyRange, bpc.keyRange)
	if err != nil {
		return fmt.Errorf("Source shard %v doesn't overlap destination shard %v", bpc.sourceShard.KeyRange, bpc.keyRange)
	}

	player, err := binlogplayer.NewBinlogPlayerKeyRange(vtClient, tablet, overlap, bpc.sourceShard.Uid, startPosition, bpc.stopPosition, bpc.binlogPlayerStats)
	if err != nil {
		return fmt.Errorf("NewBinlogPlayerKeyRange failed: %v", err)
	}
	return player.ApplyBinlogEvents(bpc.ctx)
}