func (bpc *BinlogPlayerController) Iteration() (err error) { defer func() { if x := recover(); x != nil { log.Errorf("%v: caught panic: %v", bpc, x) err = fmt.Errorf("panic: %v", x) } }() // Enable any user to set the timestamp. // We do it on every iteration to be sure, in case mysql was // restarted. bpc.DisableSuperToSetTimestamp() // create the db connection, connect it vtClient := binlogplayer.NewDbClient(bpc.dbConfig) if err := vtClient.Connect(); err != nil { return fmt.Errorf("can't connect to database: %v", err) } defer vtClient.Close() // Read the start position startPosition, err := binlogplayer.ReadStartPosition(vtClient, bpc.sourceShard.Uid) if err != nil { return fmt.Errorf("can't read startPosition: %v", err) } // Find the server list for the source shard in our cell addrs, err := bpc.ts.GetEndPoints(bpc.cell, bpc.sourceShard.Keyspace, bpc.sourceShard.Shard, topo.TYPE_REPLICA) if err != nil { return fmt.Errorf("can't find any source tablet for %v %v %v: %v", bpc.cell, bpc.sourceShard.String(), topo.TYPE_REPLICA, err) } if len(addrs.Entries) == 0 { return fmt.Errorf("empty source tablet list for %v %v %v", bpc.cell, bpc.sourceShard.String(), topo.TYPE_REPLICA) } newServerIndex := rand.Intn(len(addrs.Entries)) addr := fmt.Sprintf("%v:%v", addrs.Entries[newServerIndex].Host, addrs.Entries[newServerIndex].NamedPortMap["_vtocc"]) // check which kind of replication we're doing, tables or keyrange if len(bpc.sourceShard.Tables) > 0 { // tables, just get them player := binlogplayer.NewBinlogPlayerTables(vtClient, addr, bpc.sourceShard.Tables, startPosition, bpc.stopAtGroupId) return player.ApplyBinlogEvents(bpc.interrupted) } else { // the data we have to replicate is the intersection of the // source keyrange and our keyrange overlap, err := key.KeyRangesOverlap(bpc.sourceShard.KeyRange, bpc.keyRange) if err != nil { return fmt.Errorf("Source shard %v doesn't overlap destination shard %v", bpc.sourceShard.KeyRange, bpc.keyRange) } player := binlogplayer.NewBinlogPlayerKeyRange(vtClient, addr, overlap, startPosition, bpc.stopAtGroupId) return player.ApplyBinlogEvents(bpc.interrupted) } }
// BlpPositionList returns the current position of all the players. func (blm *BinlogPlayerMap) BlpPositionList() (*blproto.BlpPositionList, error) { // create a db connection for this purpose vtClient := binlogplayer.NewDbClient(blm.dbConfig) if err := vtClient.Connect(); err != nil { return nil, fmt.Errorf("can't connect to database: %v", err) } defer vtClient.Close() result := &blproto.BlpPositionList{} blm.mu.Lock() defer blm.mu.Unlock() for _, bpc := range blm.players { blp, _, err := bpc.BlpPosition(vtClient) if err != nil { return nil, fmt.Errorf("can't read current position for %v: %v", bpc, err) } result.Entries = append(result.Entries, *blp) } return result, nil }
// NewActionAgent creates a new ActionAgent and registers all the // associated services. // // batchCtx is the context that the agent will use for any background tasks // it spawns. func NewActionAgent( batchCtx context.Context, mysqld mysqlctl.MysqlDaemon, queryServiceControl tabletserver.Controller, tabletAlias *topodatapb.TabletAlias, dbcfgs dbconfigs.DBConfigs, mycnf *mysqlctl.Mycnf, port, gRPCPort int32, overridesFile string, ) (agent *ActionAgent, err error) { schemaOverrides := loadSchemaOverrides(overridesFile) topoServer := topo.GetServer() agent = &ActionAgent{ QueryServiceControl: queryServiceControl, HealthReporter: health.DefaultAggregator, batchCtx: batchCtx, TopoServer: topoServer, TabletAlias: tabletAlias, MysqlDaemon: mysqld, DBConfigs: dbcfgs, SchemaOverrides: schemaOverrides, History: history.New(historyLength), lastHealthMapCount: stats.NewInt("LastHealthMapCount"), _healthy: fmt.Errorf("healthcheck not run yet"), } agent.registerQueryRuleSources() // try to initialize the tablet if we have to if err := agent.InitTablet(port, gRPCPort); err != nil { return nil, fmt.Errorf("agent.InitTablet failed: %v", err) } // Publish and set the TargetTabletType. Not a global var // since it should never be changed. statsTabletType := stats.NewString("TargetTabletType") statsTabletType.Set(*targetTabletType) // Create the TabletType stats agent.exportStats = true agent.statsTabletType = stats.NewString("TabletType") // Start the binlog player services, not playing at start. agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, mysqld, func() binlogplayer.VtClient { return binlogplayer.NewDbClient(&agent.DBConfigs.Filtered) }) // Stop all binlog players upon entering lameduck. servenv.OnTerm(agent.BinlogPlayerMap.StopAllPlayersAndReset) RegisterBinlogPlayerMap(agent.BinlogPlayerMap) // try to figure out the mysql port mysqlPort := mycnf.MysqlPort if mysqlPort == 0 { // we don't know the port, try to get it from mysqld var err error mysqlPort, err = mysqld.GetMysqlPort() if err != nil { log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err) } } // Start will get the tablet info, and update our state from it if err := agent.Start(batchCtx, int32(mysqlPort), port, gRPCPort, true); err != nil { return nil, err } // register the RPC services from the agent agent.registerQueryService() // two cases then: // - restoreFromBackup is set: we restore, then initHealthCheck, all // in the background // - restoreFromBackup is not set: we initHealthCheck right away if *restoreFromBackup { go func() { // restoreFromBackup wil just be a regular action // (same as if it was triggered remotely) if err := agent.RestoreFromBackup(batchCtx); err != nil { println(fmt.Sprintf("RestoreFromBackup failed: %v", err)) log.Fatalf("RestoreFromBackup failed: %v", err) } // after the restore is done, start health check agent.initHealthCheck() }() } else { // synchronously start health check if needed agent.initHealthCheck() } return agent, nil }
// Iteration is a single iteration for the player: get the current status, // try to play, and plays until interrupted, or until an error occurs. func (bpc *BinlogPlayerController) Iteration() (err error) { defer func() { if x := recover(); x != nil { log.Errorf("%v: caught panic: %v", bpc, x) err = fmt.Errorf("panic: %v", x) } }() // Apply any special settings necessary for playback of binlogs. // We do it on every iteration to be sure, in case MySQL was restarted. if err := bpc.mysqld.EnableBinlogPlayback(); err != nil { // We failed to apply the required settings, so we shouldn't keep going. return err } // create the db connection, connect it vtClient := binlogplayer.NewDbClient(bpc.dbConfig) if err := vtClient.Connect(); err != nil { return fmt.Errorf("can't connect to database: %v", err) } defer vtClient.Close() // Read the start position startPosition, flags, err := binlogplayer.ReadStartPosition(vtClient, bpc.sourceShard.Uid) if err != nil { return fmt.Errorf("can't read startPosition: %v", err) } // if we shouldn't start, we just error out and try again later if strings.Index(flags, binlogplayer.BlpFlagDontStart) != -1 { return fmt.Errorf("not starting because flag '%v' is set", binlogplayer.BlpFlagDontStart) } // Find the server list for the source shard in our cell addrs, _, err := bpc.ts.GetEndPoints(bpc.ctx, bpc.cell, bpc.sourceShard.Keyspace, bpc.sourceShard.Shard, topo.TYPE_REPLICA) if err != nil { // If this calls fails because the context was canceled, // we need to return nil. select { case <-bpc.ctx.Done(): if bpc.ctx.Err() == context.Canceled { return nil } default: } return fmt.Errorf("can't find any source tablet for %v %v %v: %v", bpc.cell, bpc.sourceShard.String(), topo.TYPE_REPLICA, err) } if len(addrs.Entries) == 0 { return fmt.Errorf("empty source tablet list for %v %v %v", bpc.cell, bpc.sourceShard.String(), topo.TYPE_REPLICA) } newServerIndex := rand.Intn(len(addrs.Entries)) endPoint := addrs.Entries[newServerIndex] // save our current server bpc.playerMutex.Lock() bpc.sourceTablet = topo.TabletAlias{ Cell: bpc.cell, Uid: addrs.Entries[newServerIndex].Uid, } bpc.lastError = nil bpc.playerMutex.Unlock() // check which kind of replication we're doing, tables or keyrange if len(bpc.sourceShard.Tables) > 0 { // tables, first resolve wildcards tables, err := mysqlctl.ResolveTables(bpc.mysqld, bpc.dbName, bpc.sourceShard.Tables) if err != nil { return fmt.Errorf("failed to resolve table names: %v", err) } // tables, just get them player := binlogplayer.NewBinlogPlayerTables(vtClient, endPoint, tables, startPosition, bpc.stopPosition, bpc.binlogPlayerStats) return player.ApplyBinlogEvents(bpc.ctx) } // the data we have to replicate is the intersection of the // source keyrange and our keyrange overlap, err := key.KeyRangesOverlap3(bpc.sourceShard.KeyRange, bpc.keyRange) if err != nil { return fmt.Errorf("Source shard %v doesn't overlap destination shard %v", bpc.sourceShard.KeyRange, bpc.keyRange) } player := binlogplayer.NewBinlogPlayerKeyRange(vtClient, endPoint, bpc.keyspaceIDType, overlap, startPosition, bpc.stopPosition, bpc.binlogPlayerStats) return player.ApplyBinlogEvents(bpc.ctx) }
// NewActionAgent creates a new ActionAgent and registers all the // associated services. // // batchCtx is the context that the agent will use for any background tasks // it spawns. func NewActionAgent( batchCtx context.Context, mysqld mysqlctl.MysqlDaemon, queryServiceControl tabletserver.Controller, tabletAlias *topodatapb.TabletAlias, dbcfgs dbconfigs.DBConfigs, mycnf *mysqlctl.Mycnf, port, gRPCPort int32, ) (agent *ActionAgent, err error) { topoServer := topo.GetServer() orc, err := newOrcClient() if err != nil { return nil, err } agent = &ActionAgent{ QueryServiceControl: queryServiceControl, HealthReporter: health.DefaultAggregator, batchCtx: batchCtx, TopoServer: topoServer, TabletAlias: tabletAlias, MysqlDaemon: mysqld, DBConfigs: dbcfgs, History: history.New(historyLength), _healthy: fmt.Errorf("healthcheck not run yet"), orc: orc, } agent.registerQueryRuleSources() // try to initialize the tablet if we have to if err := agent.InitTablet(port, gRPCPort); err != nil { return nil, fmt.Errorf("agent.InitTablet failed: %v", err) } // Create the TabletType stats agent.exportStats = true agent.statsTabletType = stats.NewString("TabletType") // Start the binlog player services, not playing at start. agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, mysqld, func() binlogplayer.VtClient { return binlogplayer.NewDbClient(&agent.DBConfigs.Filtered) }) // Stop all binlog players upon entering lameduck. servenv.OnTerm(agent.BinlogPlayerMap.StopAllPlayersAndReset) RegisterBinlogPlayerMap(agent.BinlogPlayerMap) // try to figure out the mysql port mysqlPort := mycnf.MysqlPort if mysqlPort == 0 { // we don't know the port, try to get it from mysqld var err error mysqlPort, err = mysqld.GetMysqlPort() if err != nil { log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err) } } // Start will get the tablet info, and update our state from it if err := agent.Start(batchCtx, int32(mysqlPort), port, gRPCPort, true); err != nil { return nil, err } // register the RPC services from the agent servenv.OnRun(func() { agent.registerQueryService() }) // two cases then: // - restoreFromBackup is set: we restore, then initHealthCheck, all // in the background // - restoreFromBackup is not set: we initHealthCheck right away if *restoreFromBackup { go func() { // restoreFromBackup wil just be a regular action // (same as if it was triggered remotely) if err := agent.RestoreData(batchCtx, logutil.NewConsoleLogger(), false /* deleteBeforeRestore */); err != nil { println(fmt.Sprintf("RestoreFromBackup failed: %v", err)) log.Fatalf("RestoreFromBackup failed: %v", err) } // after the restore is done, start health check agent.initHealthCheck() }() } else { // update our state if err := agent.refreshTablet(batchCtx, "Start"); err != nil { return nil, err } // synchronously start health check if needed agent.initHealthCheck() } // Start periodic Orchestrator self-registration, if configured. if agent.orc != nil { go agent.orc.DiscoverLoop(agent) } return agent, nil }
// Iteration is a single iteration for the player: get the current status, // try to play, and plays until interrupted, or until an error occurs. func (bpc *BinlogPlayerController) Iteration() (err error) { defer func() { if x := recover(); x != nil { log.Errorf("%v: caught panic: %v", bpc, x) err = fmt.Errorf("panic: %v", x) } }() // Enable any user to set the timestamp. // We do it on every iteration to be sure, in case mysql was // restarted. bpc.DisableSuperToSetTimestamp() // create the db connection, connect it vtClient := binlogplayer.NewDbClient(bpc.dbConfig) if err := vtClient.Connect(); err != nil { return fmt.Errorf("can't connect to database: %v", err) } defer vtClient.Close() // Read the start position startPosition, flags, err := binlogplayer.ReadStartPosition(vtClient, bpc.sourceShard.Uid) if err != nil { return fmt.Errorf("can't read startPosition: %v", err) } // if we shouldn't start, we just error out and try again later if strings.Index(flags, binlogplayer.BLP_FLAG_DONT_START) != -1 { return fmt.Errorf("not starting because flag '%v' is set", binlogplayer.BLP_FLAG_DONT_START) } // Find the server list for the source shard in our cell addrs, err := bpc.ts.GetEndPoints(bpc.cell, bpc.sourceShard.Keyspace, bpc.sourceShard.Shard, topo.TYPE_REPLICA) if err != nil { return fmt.Errorf("can't find any source tablet for %v %v %v: %v", bpc.cell, bpc.sourceShard.String(), topo.TYPE_REPLICA, err) } if len(addrs.Entries) == 0 { return fmt.Errorf("empty source tablet list for %v %v %v", bpc.cell, bpc.sourceShard.String(), topo.TYPE_REPLICA) } newServerIndex := rand.Intn(len(addrs.Entries)) addr := fmt.Sprintf("%v:%v", addrs.Entries[newServerIndex].Host, addrs.Entries[newServerIndex].NamedPortMap["_vtocc"]) // save our current server bpc.playerMutex.Lock() bpc.sourceTablet = topo.TabletAlias{ Cell: bpc.cell, Uid: addrs.Entries[newServerIndex].Uid, } bpc.lastError = nil bpc.playerMutex.Unlock() // check which kind of replication we're doing, tables or keyrange if len(bpc.sourceShard.Tables) > 0 { // tables, first resolve wildcards tables, err := bpc.mysqld.ResolveTables(bpc.dbName, bpc.sourceShard.Tables) if err != nil { return fmt.Errorf("failed to resolve table names: %v", err) } // tables, just get them player := binlogplayer.NewBinlogPlayerTables(vtClient, addr, tables, startPosition, bpc.stopAtGTID, bpc.binlogPlayerStats) return player.ApplyBinlogEvents(bpc.interrupted) } else { // the data we have to replicate is the intersection of the // source keyrange and our keyrange overlap, err := key.KeyRangesOverlap(bpc.sourceShard.KeyRange, bpc.keyRange) if err != nil { return fmt.Errorf("Source shard %v doesn't overlap destination shard %v", bpc.sourceShard.KeyRange, bpc.keyRange) } player := binlogplayer.NewBinlogPlayerKeyRange(vtClient, addr, bpc.keyspaceIdType, overlap, startPosition, bpc.stopAtGTID, bpc.binlogPlayerStats) return player.ApplyBinlogEvents(bpc.interrupted) } }