func NewSplitStrategy(logger logutil.Logger, argsStr string) (*SplitStrategy, error) { var args []string if argsStr != "" { args = strings.Split(argsStr, " ") } flagSet := flag.NewFlagSet("strategy", flag.ContinueOnError) flagSet.SetOutput(logutil.NewLoggerWriter(logger)) flagSet.Usage = func() { logger.Printf("Strategy flag has the following options:\n") flagSet.PrintDefaults() } populateBlpCheckpoint := flagSet.Bool("populate_blp_checkpoint", false, "populates the blp checkpoint table") dontStartBinlogPlayer := flagSet.Bool("dont_start_binlog_player", false, "do not start the binlog player after restore is complete") skipSetSourceShards := flagSet.Bool("skip_set_source_shards", false, "do not set the SourceShar field on destination shards") if err := flagSet.Parse(args); err != nil { return nil, fmt.Errorf("cannot parse strategy: %v", err) } if flagSet.NArg() > 0 { return nil, fmt.Errorf("strategy doesn't have positional arguments") } return &SplitStrategy{ PopulateBlpCheckpoint: *populateBlpCheckpoint, DontStartBinlogPlayer: *dontStartBinlogPlayer, SkipSetSourceShards: *skipSetSourceShards, }, nil }
// RebuildShard updates the SrvShard objects and underlying serving graph. // // Re-read from TopologyServer to make sure we are using the side // effects of all actions. // // This function will start each cell over from the beginning on ErrBadVersion, // so it doesn't need a lock on the shard. func RebuildShard(ctx context.Context, log logutil.Logger, ts topo.Server, keyspace, shard string, cells []string, lockTimeout time.Duration) (*topo.ShardInfo, error) { log.Infof("RebuildShard %v/%v", keyspace, shard) span := trace.NewSpanFromContext(ctx) span.StartLocal("topotools.RebuildShard") defer span.Finish() ctx = trace.NewContext(ctx, span) // read the existing shard info. It has to exist. shardInfo, err := ts.GetShard(ctx, keyspace, shard) if err != nil { return nil, err } // rebuild all cells in parallel wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, cell := range shardInfo.Cells { // skip this cell if we shouldn't rebuild it if !topo.InCellList(cell, cells) { continue } wg.Add(1) go func(cell string) { defer wg.Done() rec.RecordError(rebuildCellSrvShard(ctx, log, ts, shardInfo, cell)) }(cell) } wg.Wait() return shardInfo, rec.Error() }
// NewRestartableResultReader creates a new RestartableResultReader for // the provided tablet and chunk. // It will automatically create the necessary query to read all rows within // the chunk. // NOTE: We assume that the Columns field in "td" was ordered by a preceding // call to reorderColumnsPrimaryKeyFirst(). func NewRestartableResultReader(ctx context.Context, logger logutil.Logger, ts topo.Server, tabletAlias *topodatapb.TabletAlias, td *tabletmanagerdatapb.TableDefinition, chunk chunk) (*RestartableResultReader, error) { shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) tablet, err := ts.GetTablet(shortCtx, tabletAlias) cancel() if err != nil { return nil, fmt.Errorf("tablet=%v table=%v chunk=%v: Failed to resolve tablet alias: %v", topoproto.TabletAliasString(tabletAlias), td.Name, chunk, err) } conn, err := tabletconn.GetDialer()(tablet.Tablet, *remoteActionsTimeout) if err != nil { return nil, fmt.Errorf("tablet=%v table=%v chunk=%v: Failed to get dialer for tablet: %v", topoproto.TabletAliasString(tabletAlias), td.Name, chunk, err) } r := &RestartableResultReader{ ctx: ctx, logger: logger, tablet: tablet.Tablet, td: td, chunk: chunk, conn: conn, } if err := r.startStream(); err != nil { return nil, err } logger.Infof("tablet=%v table=%v chunk=%v: Starting to stream rows using query '%v'.", topoproto.TabletAliasString(tabletAlias), td.Name, chunk, r.query) return r, nil }
// replaceError replaces original with recent if recent is not nil, // logging original if it wasn't nil. This should be used in deferred // cleanup functions if they change the returned error. func replaceError(logger logutil.Logger, original, recent error) error { if recent == nil { return original } if original != nil { logger.Errorf("One of multiple error: %v", original) } return recent }
// MakeSplitCreateTableSql returns a table creation statement // that is modified to be faster, and the associated optional // 'alter table' to modify the table at the end. // - If the strategy contains the string 'skipAutoIncrement(NNN)' then // we do not re-add the auto_increment on that table. // - If the strategy contains the string 'delaySecondaryIndexes', // then non-primary key indexes will be added afterwards. // - If the strategy contains the string 'useMyIsam' we load // the data into a myisam table and we then convert to innodb // - If the strategy contains the string 'delayPrimaryKey', // then the primary key index will be added afterwards (use with useMyIsam) func MakeSplitCreateTableSql(logger logutil.Logger, schema, databaseName, tableName string, strategy string) (string, string, error) { alters := make([]string, 0, 5) lines := strings.Split(schema, "\n") delayPrimaryKey := strings.Contains(strategy, "delayPrimaryKey") delaySecondaryIndexes := strings.Contains(strategy, "delaySecondaryIndexes") useMyIsam := strings.Contains(strategy, "useMyIsam") for i, line := range lines { if strings.HasPrefix(line, "CREATE TABLE `") { lines[i] = strings.Replace(line, "CREATE TABLE `", "CREATE TABLE `"+databaseName+"`.`", 1) continue } if strings.Contains(line, " AUTO_INCREMENT") { // only add to the final ALTER TABLE if we're not // dropping the AUTO_INCREMENT on the table if strings.Contains(strategy, "skipAutoIncrement("+tableName+")") { logger.Infof("Will not add AUTO_INCREMENT back on table %v", tableName) } else { alters = append(alters, "MODIFY "+line[:len(line)-1]) } lines[i] = strings.Replace(line, " AUTO_INCREMENT", "", 1) continue } isPrimaryKey := strings.Contains(line, " PRIMARY KEY") isSecondaryIndex := !isPrimaryKey && strings.Contains(line, " KEY") if (isPrimaryKey && delayPrimaryKey) || (isSecondaryIndex && delaySecondaryIndexes) { // remove the comma at the end of the previous line, lines[i-1] = lines[i-1][:len(lines[i-1])-1] // keep our comma if any (so the next index // might remove it) // also add the key definition to the alters if strings.HasSuffix(line, ",") { lines[i] = "," alters = append(alters, "ADD "+line[:len(line)-1]) } else { lines[i] = "" alters = append(alters, "ADD "+line) } } if useMyIsam && strings.Contains(line, " ENGINE=InnoDB") { lines[i] = strings.Replace(line, " ENGINE=InnoDB", " ENGINE=MyISAM", 1) alters = append(alters, "ENGINE=InnoDB") } } alter := "" if len(alters) > 0 { alter = "ALTER TABLE `" + databaseName + "`.`" + tableName + "` " + strings.Join(alters, ", ") } return strings.Join(lines, "\n"), alter, nil }
func printUpdatedThrottlers(logger logutil.Logger, server string, names []string) { table := tablewriter.NewWriter(loggerWriter{logger}) table.SetAutoFormatHeaders(false) table.SetHeader([]string{"Name"}) for _, name := range names { table.Append([]string{name}) } table.Render() logger.Printf("%d active throttler(s) on server '%v' were updated.\n", len(names), server) }
// dumpTableFull will dump the contents of a full table, and then // chunk it up in multiple compressed files. func (mysqld *Mysqld) dumpTableFull(logger logutil.Logger, td *proto.TableDefinition, dbName, mainCloneSourcePath string, cloneSourcePath string, maximumFilesize uint64) ([]SnapshotFile, error) { filename := path.Join(mainCloneSourcePath, td.Name+".csv") selectIntoOutfile := `SELECT {{.Columns}} INTO OUTFILE "{{.TableOutputPath}}" CHARACTER SET binary FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\' LINES TERMINATED BY '\n' FROM {{.TableName}}` queryParams := map[string]string{ "TableName": dbName + "." + td.Name, "Columns": strings.Join(td.Columns, ", "), "TableOutputPath": filename, } sio, err := fillStringTemplate(selectIntoOutfile, queryParams) if err != nil { return nil, err } if err := mysqld.ExecuteSuperQuery(sio); err != nil { return nil, err } file, err := os.Open(filename) if err != nil { return nil, err } defer func() { file.Close() if e := os.Remove(filename); e != nil { logger.Errorf("Cannot remove %v: %v", filename, e) } }() filenamePattern := path.Join(cloneSourcePath, td.Name+".%v.csv.gz") hasherWriter, err := newCompressedNamedHasherWriter(filenamePattern, mysqld.SnapshotDir, td.Name, maximumFilesize) if err != nil { return nil, err } splitter := csvsplitter.NewCSVReader(file, ',') for { line, err := splitter.ReadRecord() if err == io.EOF { break } if err != nil { return nil, err } _, err = hasherWriter.Write(line) if err != nil { return nil, err } } return hasherWriter.SnapshotFiles() }
// FixShardReplication will fix the first problem it encounters within // a ShardReplication object func FixShardReplication(ctx context.Context, ts Server, logger logutil.Logger, cell, keyspace, shard string) error { sri, err := ts.GetShardReplication(ctx, cell, keyspace, shard) if err != nil { return err } for _, node := range sri.Nodes { ti, err := ts.GetTablet(ctx, node.TabletAlias) if err == ErrNoNode { logger.Warningf("Tablet %v is in the replication graph, but does not exist, removing it", node.TabletAlias) return RemoveShardReplicationRecord(ctx, ts, cell, keyspace, shard, node.TabletAlias) } if err != nil { // unknown error, we probably don't want to continue return err } if ti.Type == pb.TabletType_SCRAP { logger.Warningf("Tablet %v is in the replication graph, but is scrapped, removing it", node.TabletAlias) return RemoveShardReplicationRecord(ctx, ts, cell, keyspace, shard, node.TabletAlias) } logger.Infof("Keeping tablet %v in the replication graph", node.TabletAlias) } logger.Infof("All entries in replication graph are valid") return nil }
// TableScanByKeyRange returns a QueryResultReader that gets all the // rows from a table that match the supplied KeyRange, ordered by // Primary Key. The returned columns are ordered with the Primary Key // columns in front. func TableScanByKeyRange(ctx context.Context, log logutil.Logger, ts topo.Server, tabletAlias *topodatapb.TabletAlias, tableDefinition *tabletmanagerdatapb.TableDefinition, keyRange *topodatapb.KeyRange, shardingColumnName string, shardingColumnType topodatapb.KeyspaceIdType) (*QueryResultReader, error) { where := "" // TODO(aaijazi): this currently only works with V2 style sharding keys. // We should add FilteredQueryResultReader that will do a full table scan, with client-side // filtering to remove rows that don't map to the keyrange that we want. if keyRange != nil { switch shardingColumnType { case topodatapb.KeyspaceIdType_UINT64: if len(keyRange.Start) > 0 { if len(keyRange.End) > 0 { // have start & end where = fmt.Sprintf("WHERE %v >= %v AND %v < %v ", shardingColumnName, uint64FromKeyspaceID(keyRange.Start), shardingColumnName, uint64FromKeyspaceID(keyRange.End)) } else { // have start only where = fmt.Sprintf("WHERE %v >= %v ", shardingColumnName, uint64FromKeyspaceID(keyRange.Start)) } } else { if len(keyRange.End) > 0 { // have end only where = fmt.Sprintf("WHERE %v < %v ", shardingColumnName, uint64FromKeyspaceID(keyRange.End)) } } case topodatapb.KeyspaceIdType_BYTES: if len(keyRange.Start) > 0 { if len(keyRange.End) > 0 { // have start & end where = fmt.Sprintf("WHERE HEX(%v) >= '%v' AND HEX(%v) < '%v' ", shardingColumnName, hex.EncodeToString(keyRange.Start), shardingColumnName, hex.EncodeToString(keyRange.End)) } else { // have start only where = fmt.Sprintf("WHERE HEX(%v) >= '%v' ", shardingColumnName, hex.EncodeToString(keyRange.Start)) } } else { if len(keyRange.End) > 0 { // have end only where = fmt.Sprintf("WHERE HEX(%v) < '%v' ", shardingColumnName, hex.EncodeToString(keyRange.End)) } } default: return nil, fmt.Errorf("Unsupported ShardingColumnType: %v", shardingColumnType) } } sql := fmt.Sprintf("SELECT %v FROM %v %vORDER BY %v", strings.Join(orderedColumns(tableDefinition), ", "), tableDefinition.Name, where, strings.Join(tableDefinition.PrimaryKeyColumns, ", ")) log.Infof("SQL query for %v/%v: %v", topoproto.TabletAliasString(tabletAlias), tableDefinition.Name, sql) return NewQueryResultReaderForTablet(ctx, ts, tabletAlias, sql) }
// TableScanByKeyRange returns a QueryResultReader that gets all the // rows from a table that match the supplied KeyRange, ordered by // Primary Key. The returned columns are ordered with the Primary Key // columns in front. func TableScanByKeyRange(ctx context.Context, log logutil.Logger, ts topo.Server, tabletAlias *topodatapb.TabletAlias, tableDefinition *tabletmanagerdatapb.TableDefinition, keyRange *topodatapb.KeyRange, keyspaceIDType topodatapb.KeyspaceIdType) (*QueryResultReader, error) { where := "" if keyRange != nil { switch keyspaceIDType { case topodatapb.KeyspaceIdType_UINT64: if len(keyRange.Start) > 0 { if len(keyRange.End) > 0 { // have start & end where = fmt.Sprintf("WHERE keyspace_id >= %v AND keyspace_id < %v ", uint64FromKeyspaceID(keyRange.Start), uint64FromKeyspaceID(keyRange.End)) } else { // have start only where = fmt.Sprintf("WHERE keyspace_id >= %v ", uint64FromKeyspaceID(keyRange.Start)) } } else { if len(keyRange.End) > 0 { // have end only where = fmt.Sprintf("WHERE keyspace_id < %v ", uint64FromKeyspaceID(keyRange.End)) } } case topodatapb.KeyspaceIdType_BYTES: if len(keyRange.Start) > 0 { if len(keyRange.End) > 0 { // have start & end where = fmt.Sprintf("WHERE HEX(keyspace_id) >= '%v' AND HEX(keyspace_id) < '%v' ", hex.EncodeToString(keyRange.Start), hex.EncodeToString(keyRange.End)) } else { // have start only where = fmt.Sprintf("WHERE HEX(keyspace_id) >= '%v' ", hex.EncodeToString(keyRange.Start)) } } else { if len(keyRange.End) > 0 { // have end only where = fmt.Sprintf("WHERE HEX(keyspace_id) < '%v' ", hex.EncodeToString(keyRange.End)) } } default: return nil, fmt.Errorf("Unsupported KeyspaceIdType: %v", keyspaceIDType) } } sql := fmt.Sprintf("SELECT %v FROM %v %vORDER BY %v", strings.Join(orderedColumns(tableDefinition), ", "), tableDefinition.Name, where, strings.Join(tableDefinition.PrimaryKeyColumns, ", ")) log.Infof("SQL query for %v/%v: %v", topoproto.TabletAliasString(tabletAlias), tableDefinition.Name, sql) return NewQueryResultReaderForTablet(ctx, ts, tabletAlias, sql) }
// Backup is the main entry point for a backup: // - uses the BackupStorage service to store a new backup // - shuts down Mysqld during the backup // - remember if we were replicating, restore the exact same state func Backup(ctx context.Context, mysqld MysqlDaemon, logger logutil.Logger, dir, name string, backupConcurrency int, hookExtraEnv map[string]string) error { // start the backup with the BackupStorage bs, err := backupstorage.GetBackupStorage() if err != nil { return err } bh, err := bs.StartBackup(dir, name) if err != nil { return fmt.Errorf("StartBackup failed: %v", err) } if err = backup(ctx, mysqld, logger, bh, backupConcurrency, hookExtraEnv); err != nil { if abortErr := bh.AbortBackup(); abortErr != nil { logger.Errorf("failed to abort backup: %v", abortErr) } return err } return bh.EndBackup() }
// TableScanByKeyRange returns a QueryResultReader that gets all the // rows from a table that match the supplied KeyRange, ordered by // Primary Key. The returned columns are ordered with the Primary Key // columns in front. func TableScanByKeyRange(ctx context.Context, log logutil.Logger, ts topo.Server, tabletAlias topo.TabletAlias, tableDefinition *myproto.TableDefinition, keyRange key.KeyRange, keyspaceIdType key.KeyspaceIdType) (*QueryResultReader, error) { where := "" switch keyspaceIdType { case key.KIT_UINT64: if keyRange.Start != key.MinKey { if keyRange.End != key.MaxKey { // have start & end where = fmt.Sprintf("WHERE keyspace_id >= %v AND keyspace_id < %v ", uint64FromKeyspaceId(keyRange.Start), uint64FromKeyspaceId(keyRange.End)) } else { // have start only where = fmt.Sprintf("WHERE keyspace_id >= %v ", uint64FromKeyspaceId(keyRange.Start)) } } else { if keyRange.End != key.MaxKey { // have end only where = fmt.Sprintf("WHERE keyspace_id < %v ", uint64FromKeyspaceId(keyRange.End)) } } case key.KIT_BYTES: if keyRange.Start != key.MinKey { if keyRange.End != key.MaxKey { // have start & end where = fmt.Sprintf("WHERE HEX(keyspace_id) >= '%v' AND HEX(keyspace_id) < '%v' ", keyRange.Start.Hex(), keyRange.End.Hex()) } else { // have start only where = fmt.Sprintf("WHERE HEX(keyspace_id) >= '%v' ", keyRange.Start.Hex()) } } else { if keyRange.End != key.MaxKey { // have end only where = fmt.Sprintf("WHERE HEX(keyspace_id) < '%v' ", keyRange.End.Hex()) } } default: return nil, fmt.Errorf("Unsupported KeyspaceIdType: %v", keyspaceIdType) } sql := fmt.Sprintf("SELECT %v FROM %v %vORDER BY %v", strings.Join(orderedColumns(tableDefinition), ", "), tableDefinition.Name, where, strings.Join(tableDefinition.PrimaryKeyColumns, ", ")) log.Infof("SQL query for %v/%v: %v", tabletAlias, tableDefinition.Name, sql) return NewQueryResultReaderForTablet(ctx, ts, tabletAlias, sql) }
// This piece runs on the presumably empty machine acting as the target in the // create replica action. // // validate target (self) // shutdown_mysql() // create temp data directory /vt/target/vt_<keyspace> // copy compressed data files via HTTP // verify hash of compressed files // uncompress into /vt/vt_<target-uid>/data/vt_<keyspace> // start_mysql() // clean up compressed files func (mysqld *Mysqld) RestoreFromSnapshot(logger logutil.Logger, snapshotManifest *SnapshotManifest, fetchConcurrency, fetchRetryCount int, dontWaitForSlaveStart bool, hookExtraEnv map[string]string) error { if snapshotManifest == nil { return errors.New("RestoreFromSnapshot: nil snapshotManifest") } logger.Infof("ValidateCloneTarget") if err := mysqld.ValidateCloneTarget(hookExtraEnv); err != nil { return err } logger.Infof("Shutdown mysqld") if err := mysqld.Shutdown(true, MysqlWaitTime); err != nil { return err } logger.Infof("Fetch snapshot") if err := mysqld.fetchSnapshot(snapshotManifest, fetchConcurrency, fetchRetryCount); err != nil { return err } logger.Infof("Restart mysqld") if err := mysqld.Start(MysqlWaitTime); err != nil { return err } cmdList, err := mysqld.StartReplicationCommands(snapshotManifest.ReplicationStatus) if err != nil { return err } if err := mysqld.ExecuteSuperQueryList(cmdList); err != nil { return err } if !dontWaitForSlaveStart { if err := mysqld.WaitForSlaveStart(SlaveStartDeadline); err != nil { return err } } h := hook.NewSimpleHook("postflight_restore") h.ExtraEnv = hookExtraEnv if err := h.ExecuteOptional(); err != nil { return err } return nil }
func (mysqld *Mysqld) restoreAfterSnapshot(logger logutil.Logger, slaveStartRequired, readOnly bool, hookExtraEnv map[string]string, connToRelease dbconnpool.PoolConnection) (err error) { // Try to fix mysqld regardless of snapshot success.. logger.Infof("exec UNLOCK TABLES") _, err = connToRelease.ExecuteFetch("UNLOCK TABLES", 10000, false) connToRelease.Recycle() if err != nil { return fmt.Errorf("failed to UNLOCK TABLES: %v", err) } // restore original mysqld state that we saved above if slaveStartRequired { if err = mysqld.StartSlave(hookExtraEnv); err != nil { return } // this should be quick, but we might as well just wait if err = mysqld.WaitForSlaveStart(5); err != nil { return } } if err = mysqld.SetReadOnly(readOnly); err != nil { return } return nil }
func (mysqld *Mysqld) prepareToSnapshot(logger logutil.Logger, allowHierarchicalReplication bool, hookExtraEnv map[string]string) (slaveStartRequired, readOnly bool, replicationPosition, myMasterPosition proto.ReplicationPosition, masterAddr string, connToRelease dbconnpool.PoolConnection, err error) { // save initial state so we can restore on Start() if slaveStatus, slaveErr := mysqld.SlaveStatus(); slaveErr == nil { slaveStartRequired = slaveStatus.SlaveRunning() } // For masters, set read-only so we don't write anything during snapshot readOnly = true if readOnly, err = mysqld.IsReadOnly(); err != nil { return } logger.Infof("Set Read Only") if !readOnly { mysqld.SetReadOnly(true) } logger.Infof("Stop Slave") if err = mysqld.StopSlave(hookExtraEnv); err != nil { return } // Get the replication position and master addr replicationPosition, masterAddr, err = mysqld.getReplicationPositionForClones(allowHierarchicalReplication) if err != nil { return } // get our master position, some targets may use it myMasterPosition, err = mysqld.MasterPosition() if err != nil && err != ErrNotMaster { // this is a real error return } logger.Infof("Flush tables") if connToRelease, err = mysqld.dbaPool.Get(0); err != nil { return } logger.Infof("exec FLUSH TABLES WITH READ LOCK") if _, err = connToRelease.ExecuteFetch("FLUSH TABLES WITH READ LOCK", 10000, false); err != nil { connToRelease.Recycle() return } return }
// PrintAllCommands prints a help text for all registered commands to the given Logger. func PrintAllCommands(logger logutil.Logger) { for _, group := range commands { if group.Name == "Debugging" { continue } logger.Printf("%v: %v\n", group.Name, group.Description) for _, cmd := range group.Commands { logger.Printf(" %v %v\n", cmd.Name, cmd.Params) } logger.Printf("\n") } }
// RestartSlavesExternal will tell all the slaves in the provided list // that they have a new master, and also tell all the masters. The // masters will be scrapped if they don't answer. // We execute all the actions in parallel. func RestartSlavesExternal(ts topo.Server, log logutil.Logger, slaveTabletMap, masterTabletMap map[topo.TabletAlias]*topo.TabletInfo, masterElectTabletAlias topo.TabletAlias, slaveWasRestarted func(*topo.TabletInfo, *actionnode.SlaveWasRestartedArgs) error) { wg := sync.WaitGroup{} swrd := actionnode.SlaveWasRestartedArgs{ Parent: masterElectTabletAlias, } log.Infof("Updating individual tablets with the right master...") // do all the slaves for _, ti := range slaveTabletMap { wg.Add(1) go func(ti *topo.TabletInfo) { if err := slaveWasRestarted(ti, &swrd); err != nil { log.Warningf("Slave %v had an error: %v", ti.Alias, err) } wg.Done() }(ti) } // and do the old master and any straggler, if possible. for _, ti := range masterTabletMap { wg.Add(1) go func(ti *topo.TabletInfo) { err := slaveWasRestarted(ti, &swrd) if err != nil { // the old master can be annoying if left // around in the replication graph, so if we // can't restart it, we just scrap it. // We don't rebuild the Shard just yet though. log.Warningf("Old master %v is not restarting in time, forcing it to spare: %v", ti.Alias, err) ti.Type = topo.TYPE_SPARE ti.Parent = masterElectTabletAlias if err := topo.UpdateTablet(ts, ti); err != nil { log.Warningf("Failed to change old master %v to spare: %v", ti.Alias, err) } } wg.Done() }(ti) } wg.Wait() }
// RestartSlavesExternal will tell all the slaves in the provided list // that they have a new master, and also tell all the masters. The // masters will be scrapped if they don't answer. // We execute all the actions in parallel. func RestartSlavesExternal(ts topo.Server, log logutil.Logger, slaveTabletMap, masterTabletMap map[topodatapb.TabletAlias]*topo.TabletInfo, masterElectTabletAlias *topodatapb.TabletAlias, slaveWasRestarted func(*topo.TabletInfo, *topodatapb.TabletAlias) error) { wg := sync.WaitGroup{} log.Infof("Updating individual tablets with the right master...") // do all the slaves for _, ti := range slaveTabletMap { wg.Add(1) go func(ti *topo.TabletInfo) { if err := slaveWasRestarted(ti, masterElectTabletAlias); err != nil { log.Warningf("Slave %v had an error: %v", ti.Alias, err) } wg.Done() }(ti) } // and do the old master and any straggler, if possible. for _, ti := range masterTabletMap { wg.Add(1) go func(ti *topo.TabletInfo) { err := slaveWasRestarted(ti, masterElectTabletAlias) if err != nil { // the old master can be annoying if left // around in the replication graph, so if we // can't restart it, we just make it spare. log.Warningf("Old master %v is not restarting in time, forcing it to spare: %v", ti.Alias, err) ti.Type = topodatapb.TabletType_SPARE if err := ts.UpdateTablet(context.TODO(), ti); err != nil { log.Warningf("Failed to change old master %v to spare: %v", ti.Alias, err) } } wg.Done() }(ti) } wg.Wait() }
// rebuildKeyspace should only be used with an action lock on the keyspace // - otherwise the consistency of the serving graph data can't be // guaranteed. // // Take data from the global keyspace and rebuild the local serving // copies in each cell. func rebuildKeyspace(ctx context.Context, log logutil.Logger, ts topo.Server, keyspace string, cells []string, rebuildSrvShards bool) error { log.Infof("rebuildKeyspace %v", keyspace) ki, err := ts.GetKeyspace(ctx, keyspace) if err != nil { return err } var shardCache map[string]*topo.ShardInfo if rebuildSrvShards { shards, err := ts.GetShardNames(ctx, keyspace) if err != nil { return nil } // Rebuild all shards in parallel, save the shards shardCache = make(map[string]*topo.ShardInfo) wg := sync.WaitGroup{} mu := sync.Mutex{} rec := concurrency.FirstErrorRecorder{} for _, shard := range shards { wg.Add(1) go func(shard string) { if shardInfo, err := RebuildShard(ctx, log, ts, keyspace, shard, cells); err != nil { rec.RecordError(fmt.Errorf("RebuildShard failed: %v/%v %v", keyspace, shard, err)) } else { mu.Lock() shardCache[shard] = shardInfo mu.Unlock() } wg.Done() }(shard) } wg.Wait() if rec.HasErrors() { return rec.Error() } } else { shardCache, err = ts.FindAllShardsInKeyspace(ctx, keyspace) if err != nil { return err } } // Build the list of cells to work on: we get the union // of all the Cells of all the Shards, limited to the provided cells. // // srvKeyspaceMap is a map: // key: cell // value: topo.SrvKeyspace object being built srvKeyspaceMap := make(map[string]*topodatapb.SrvKeyspace) findCellsForRebuild(ki, shardCache, cells, srvKeyspaceMap) // Then we add the cells from the keyspaces we might be 'ServedFrom'. for _, ksf := range ki.ServedFroms { servedFromShards, err := ts.FindAllShardsInKeyspace(ctx, ksf.Keyspace) if err != nil { return err } findCellsForRebuild(ki, servedFromShards, cells, srvKeyspaceMap) } // for each entry in the srvKeyspaceMap map, we do the following: // - read the SrvShard structures for each shard / cell // - if not present, build an empty one from global Shard // - compute the union of the db types (replica, master, ...) // - sort the shards in the list by range // - check the ranges are compatible (no hole, covers everything) for cell, srvKeyspace := range srvKeyspaceMap { for _, si := range shardCache { servedTypes := si.GetServedTypesPerCell(cell) // for each type this shard is supposed to serve, // add it to srvKeyspace.Partitions for _, tabletType := range servedTypes { partition := topoproto.SrvKeyspaceGetPartition(srvKeyspace, tabletType) if partition == nil { partition = &topodatapb.SrvKeyspace_KeyspacePartition{ ServedType: tabletType, } srvKeyspace.Partitions = append(srvKeyspace.Partitions, partition) } partition.ShardReferences = append(partition.ShardReferences, &topodatapb.ShardReference{ Name: si.ShardName(), KeyRange: si.KeyRange, }) } } if err := orderAndCheckPartitions(cell, srvKeyspace); err != nil { return err } } // and then finally save the keyspace objects for cell, srvKeyspace := range srvKeyspaceMap { log.Infof("updating keyspace serving graph in cell %v for %v", cell, keyspace) if err := ts.UpdateSrvKeyspace(ctx, cell, keyspace, srvKeyspace); err != nil { return fmt.Errorf("writing serving data failed: %v", err) } } return nil }
// MultiRestore is the main entry point for multi restore. // // We will either: // - read from the network if sourceAddrs != nil // - read from a disk snapshot if fromStoragePaths != nil // // The strategy is used as follows: // - If it contains the string 'writeBinLogs' then we will also write // to the binary logs. // - If it contains the command 'populateBlpCheckpoint' then we will // populate the blp_checkpoint table with master positions to start from // - If is also contains the command 'dontStartBinlogPlayer' we won't // start binlog replication on the destination (but it will be configured) func (mysqld *Mysqld) MultiRestore(logger logutil.Logger, destinationDbName string, keyRanges []key.KeyRange, sourceAddrs []*url.URL, fromStoragePaths []string, snapshotConcurrency, fetchConcurrency, insertTableConcurrency, fetchRetryCount int, strategy string) (err error) { writeBinLogs := strings.Contains(strategy, "writeBinLogs") var manifests []*SplitSnapshotManifest if sourceAddrs != nil { // get the manifests from the network manifests = make([]*SplitSnapshotManifest, len(sourceAddrs)) rc := concurrency.NewResourceConstraint(fetchConcurrency) for i, sourceAddr := range sourceAddrs { rc.Add(1) go func(sourceAddr *url.URL, i int) { rc.Acquire() defer rc.ReleaseAndDone() if rc.HasErrors() { return } var sourceDbName string if len(sourceAddr.Path) < 2 { // "" or "/" sourceDbName = destinationDbName } else { sourceDbName = sourceAddr.Path[1:] } ssm, e := fetchSnapshotManifestWithRetry("http://"+sourceAddr.Host, sourceDbName, keyRanges[i], fetchRetryCount) manifests[i] = ssm rc.RecordError(e) }(sourceAddr, i) } if err = rc.Wait(); err != nil { return } } else { // get the manifests from the local snapshots manifests = make([]*SplitSnapshotManifest, len(fromStoragePaths)) for i, fromStoragePath := range fromStoragePaths { var err error manifests[i], err = readSnapshotManifest(fromStoragePath) if err != nil { return err } } } if e := SanityCheckManifests(manifests); e != nil { return e } tempStoragePath := path.Join(mysqld.SnapshotDir, "multirestore", destinationDbName) // Start fresh if err = os.RemoveAll(tempStoragePath); err != nil { return } if err = os.MkdirAll(tempStoragePath, 0775); err != nil { return err } defer func() { if e := os.RemoveAll(tempStoragePath); e != nil { logger.Errorf("error removing %v: %v", tempStoragePath, e) } }() // Handle our concurrency: // - fetchConcurrency tasks for network / decompress from disk // - insertTableConcurrency for table inserts from a file // into an innodb table // - snapshotConcurrency tasks for table inserts / modify tables sems := make(map[string]*sync2.Semaphore, len(manifests[0].SchemaDefinition.TableDefinitions)+2) sems["net"] = sync2.NewSemaphore(fetchConcurrency, 0) sems["db"] = sync2.NewSemaphore(snapshotConcurrency, 0) // Store the alter table statements for after restore, // and how many jobs we're running on each table // TODO(alainjobart) the jobCount map is a bit weird. replace it // with a map of WaitGroups, initialized to the number of files // per table. Have extra go routines for the tables with auto_increment // to wait on the waitgroup, and apply the modify_table. postSql := make(map[string]string, len(manifests[0].SchemaDefinition.TableDefinitions)) jobCount := make(map[string]*sync2.AtomicInt32) // Create the database (it's a good check to know if we're running // multirestore a second time too!) manifest := manifests[0] // I am assuming they all match createDatabase, e := fillStringTemplate(manifest.SchemaDefinition.DatabaseSchema, map[string]string{"DatabaseName": destinationDbName}) if e != nil { return e } if createDatabase == "" { return fmt.Errorf("Empty create database statement") } createDbCmds := make([]string, 0, len(manifest.SchemaDefinition.TableDefinitions)+2) if !writeBinLogs { createDbCmds = append(createDbCmds, "SET sql_log_bin = OFF") } createDbCmds = append(createDbCmds, createDatabase) createDbCmds = append(createDbCmds, "USE `"+destinationDbName+"`") createViewCmds := make([]string, 0, 16) for _, td := range manifest.SchemaDefinition.TableDefinitions { if td.Type == proto.TABLE_BASE_TABLE { createDbCmd, alterTable, err := MakeSplitCreateTableSql(logger, td.Schema, destinationDbName, td.Name, strategy) if err != nil { return err } if alterTable != "" { postSql[td.Name] = alterTable } jobCount[td.Name] = new(sync2.AtomicInt32) createDbCmds = append(createDbCmds, createDbCmd) sems["table-"+td.Name] = sync2.NewSemaphore(insertTableConcurrency, 0) } else { // views are just created with the right db name // and no data will ever go in them. We create them // after all tables are created, as they will // probably depend on real tables. createViewCmd, err := fillStringTemplate(td.Schema, map[string]string{"DatabaseName": destinationDbName}) if err != nil { return err } createViewCmds = append(createViewCmds, createViewCmd) } } createDbCmds = append(createDbCmds, createViewCmds...) if err = mysqld.ExecuteSuperQueryList(createDbCmds); err != nil { return } // compute how many jobs we will have for _, manifest := range manifests { for _, file := range manifest.Source.Files { jobCount[file.TableName].Add(1) } } loadDataInfile := `LOAD DATA INFILE '{{.TableInputPath}}' INTO TABLE {{.TableName}} CHARACTER SET binary FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\' LINES TERMINATED BY '\n' ({{.Columns}})` // fetch all the csv files, and apply them one at a time. Note // this might start many go routines, and they'll all be // waiting on the resource semaphores. mrc := concurrency.NewMultiResourceConstraint(sems) for manifestIndex, manifest := range manifests { if err = os.Mkdir(path.Join(tempStoragePath, manifest.Source.Addr), 0775); err != nil { return err } for i := range manifest.Source.Files { lsf := localSnapshotFile{manifest: manifest, file: &manifest.Source.Files[i], basePath: tempStoragePath} mrc.Add(1) go func(manifestIndex, i int) { defer mrc.Done() // compute a few things now, so if we can't we // don't take resources: // - get the schema td, ok := manifest.SchemaDefinition.GetTable(lsf.tableName()) if !ok { mrc.RecordError(fmt.Errorf("No table named %v in schema", lsf.tableName())) return } // - get the load data statement queryParams := map[string]string{ "TableInputPath": lsf.filename(), "TableName": lsf.tableName(), "Columns": strings.Join(td.Columns, ", "), } loadStatement, e := fillStringTemplate(loadDataInfile, queryParams) if e != nil { mrc.RecordError(e) return } // get the file, using the 'net' resource mrc.Acquire("net") if mrc.HasErrors() { mrc.Release("net") return } if sourceAddrs == nil { e = uncompressLocalFile(path.Join(fromStoragePaths[manifestIndex], path.Base(lsf.file.Path)), lsf.file.Hash, lsf.filename()) } else { e = fetchFileWithRetry(lsf.url(), lsf.file.Hash, lsf.filename(), fetchRetryCount) } mrc.Release("net") if e != nil { mrc.RecordError(e) return } defer os.Remove(lsf.filename()) // acquire the table lock (we do this first // so we maximize access to db. Otherwise // if 8 threads had gotten the db lock but // were writing to the same table, only one // load would go at once) tableLockName := "table-" + lsf.tableName() mrc.Acquire(tableLockName) defer func() { mrc.Release(tableLockName) }() if mrc.HasErrors() { return } // acquire the db lock mrc.Acquire("db") defer func() { mrc.Release("db") }() if mrc.HasErrors() { return } // load the data in queries := buildQueryList(destinationDbName, loadStatement, writeBinLogs) e = mysqld.ExecuteSuperQueryList(queries) if e != nil { mrc.RecordError(e) return } // if we're running the last insert, // potentially re-add the auto-increments remainingInserts := jobCount[lsf.tableName()].Add(-1) if remainingInserts == 0 && postSql[lsf.tableName()] != "" { queries = buildQueryList(destinationDbName, postSql[lsf.tableName()], writeBinLogs) e = mysqld.ExecuteSuperQueryList(queries) if e != nil { mrc.RecordError(e) return } } }(manifestIndex, i) } } if err = mrc.Wait(); err != nil { return err } // populate blp_checkpoint table if we want to if strings.Index(strategy, "populateBlpCheckpoint") != -1 { queries := make([]string, 0, 4) if !writeBinLogs { queries = append(queries, "SET sql_log_bin = OFF") queries = append(queries, "SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED") } queries = append(queries, binlogplayer.CreateBlpCheckpoint()...) flags := "" if strings.Index(strategy, "dontStartBinlogPlayer") != -1 { flags = binlogplayer.BLP_FLAG_DONT_START } for manifestIndex, manifest := range manifests { queries = append(queries, binlogplayer.PopulateBlpCheckpoint(uint32(manifestIndex), manifest.Source.MasterPosition, time.Now().Unix(), flags)) } if err = mysqld.ExecuteSuperQueryList(queries); err != nil { return err } } return nil }
// CreateMultiSnapshot create snapshots of the data. // - for a resharding snapshot, keyRanges+keyName+keyType are set, // and tables is empty. This action will create multiple snapshots, // one per keyRange. // - for a vertical split, tables is set, keyRanges = [KeyRange{}] and // keyName+keyType are empty. It will create a single snapshot of // the contents of the tables. // Note combinations of table subset and keyranges are not supported. func (mysqld *Mysqld) CreateMultiSnapshot(logger logutil.Logger, keyRanges []key.KeyRange, dbName, keyName string, keyType key.KeyspaceIdType, sourceAddr string, allowHierarchicalReplication bool, snapshotConcurrency int, tables, excludeTables []string, skipSlaveRestart bool, maximumFilesize uint64, hookExtraEnv map[string]string) (snapshotManifestFilenames []string, err error) { if dbName == "" { err = fmt.Errorf("no database name provided") return } if len(tables) > 0 { if len(keyRanges) != 1 || keyRanges[0].IsPartial() { return nil, fmt.Errorf("With tables specified, can only have one full KeyRange") } } // same logic applies here logger.Infof("validateCloneSource") if err = mysqld.validateCloneSource(false, hookExtraEnv); err != nil { return } // clean out and start fresh cloneSourcePaths := make(map[key.KeyRange]string) for _, keyRange := range keyRanges { cloneSourcePaths[keyRange] = path.Join(mysqld.SnapshotDir, dataDir, dbName+"-"+string(keyRange.Start.Hex())+","+string(keyRange.End.Hex())) } for _, _path := range cloneSourcePaths { if err = os.RemoveAll(_path); err != nil { return } if err = os.MkdirAll(_path, 0775); err != nil { return } } mainCloneSourcePath := path.Join(mysqld.SnapshotDir, dataDir, dbName+"-all") if err = os.RemoveAll(mainCloneSourcePath); err != nil { return } if err = os.MkdirAll(mainCloneSourcePath, 0775); err != nil { return } // get the schema for each table sd, fetchErr := mysqld.GetSchema(dbName, tables, excludeTables, true) if fetchErr != nil { return []string{}, fetchErr } if len(sd.TableDefinitions) == 0 { return []string{}, fmt.Errorf("empty table list for %v", dbName) } sd.SortByReverseDataLength() // prepareToSnapshot will get the tablet in the rigth state, // and return the current mysql status. slaveStartRequired, readOnly, replicationPosition, myMasterPosition, masterAddr, conn, err := mysqld.prepareToSnapshot(logger, allowHierarchicalReplication, hookExtraEnv) if err != nil { return } if skipSlaveRestart { if slaveStartRequired { logger.Infof("Overriding slaveStartRequired to false") } slaveStartRequired = false } defer func() { err = replaceError(logger, err, mysqld.restoreAfterSnapshot(logger, slaveStartRequired, readOnly, hookExtraEnv, conn)) }() // dump the files in parallel with a pre-defined concurrency datafiles := make([]map[key.KeyRange][]SnapshotFile, len(sd.TableDefinitions)) dumpTableWorker := func(i int) (err error) { table := sd.TableDefinitions[i] if table.Type != proto.TABLE_BASE_TABLE { // we just skip views here return nil } if len(tables) > 0 { sfs, err := mysqld.dumpTableFull(logger, table, dbName, mainCloneSourcePath, cloneSourcePaths[key.KeyRange{}], maximumFilesize) if err != nil { return err } datafiles[i] = map[key.KeyRange][]SnapshotFile{ key.KeyRange{}: sfs, } } else { datafiles[i], err = mysqld.dumpTableSplit(logger, table, dbName, keyName, keyType, mainCloneSourcePath, cloneSourcePaths, maximumFilesize) } return } if err = ConcurrentMap(snapshotConcurrency, len(sd.TableDefinitions), dumpTableWorker); err != nil { return } if e := os.Remove(mainCloneSourcePath); e != nil { logger.Errorf("Cannot remove %v: %v", mainCloneSourcePath, e) } // Check the replication position after snapshot is done // hasn't changed, to be sure we haven't inserted any data newReplicationPosition, _, err := mysqld.getReplicationPositionForClones(allowHierarchicalReplication) if err != nil { return } if !newReplicationPosition.Equal(replicationPosition) { return nil, fmt.Errorf("replicationPosition position changed during snapshot, from %v to %v", replicationPosition, newReplicationPosition) } // Write all the manifest files ssmFiles := make([]string, len(keyRanges)) for i, kr := range keyRanges { krDatafiles := make([]SnapshotFile, 0, len(datafiles)) for _, m := range datafiles { krDatafiles = append(krDatafiles, m[kr]...) } ssm, err := NewSplitSnapshotManifest(sourceAddr, mysqld.IpAddr(), masterAddr, dbName, krDatafiles, replicationPosition, myMasterPosition, kr, sd) if err != nil { return nil, err } ssmFiles[i] = path.Join(cloneSourcePaths[kr], partialSnapshotManifestFile) if err = writeJson(ssmFiles[i], ssm); err != nil { return nil, err } } // Call the (optional) hook to send the files somewhere else wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, kr := range keyRanges { wg.Add(1) go func(kr key.KeyRange) { defer wg.Done() h := hook.NewSimpleHook("copy_snapshot_to_storage") h.ExtraEnv = make(map[string]string) for k, v := range hookExtraEnv { h.ExtraEnv[k] = v } h.ExtraEnv["KEYRANGE"] = fmt.Sprintf("%v-%v", kr.Start.Hex(), kr.End.Hex()) h.ExtraEnv["SNAPSHOT_PATH"] = cloneSourcePaths[kr] rec.RecordError(h.ExecuteOptional()) }(kr) } wg.Wait() if rec.HasErrors() { return nil, err } // Return all the URLs for the MANIFESTs snapshotURLPaths := make([]string, len(keyRanges)) for i := 0; i < len(keyRanges); i++ { relative, err := filepath.Rel(mysqld.SnapshotDir, ssmFiles[i]) if err != nil { return nil, err } snapshotURLPaths[i] = path.Join(SnapshotURLPath, relative) } return snapshotURLPaths, nil }
func backup(ctx context.Context, mysqld MysqlDaemon, logger logutil.Logger, bh backupstorage.BackupHandle, backupConcurrency int, hookExtraEnv map[string]string) error { // save initial state so we can restore slaveStartRequired := false sourceIsMaster := false readOnly := true var replicationPosition replication.Position semiSyncMaster, semiSyncSlave := mysqld.SemiSyncEnabled() // see if we need to restart replication after backup logger.Infof("getting current replication status") slaveStatus, err := mysqld.SlaveStatus() switch err { case nil: slaveStartRequired = slaveStatus.SlaveRunning() case ErrNotSlave: // keep going if we're the master, might be a degenerate case sourceIsMaster = true default: return fmt.Errorf("can't get slave status: %v", err) } // get the read-only flag readOnly, err = mysqld.IsReadOnly() if err != nil { return fmt.Errorf("can't get read-only status: %v", err) } // get the replication position if sourceIsMaster { if !readOnly { logger.Infof("turning master read-only before backup") if err = mysqld.SetReadOnly(true); err != nil { return fmt.Errorf("can't set read-only status: %v", err) } } replicationPosition, err = mysqld.MasterPosition() if err != nil { return fmt.Errorf("can't get master position: %v", err) } } else { if err = StopSlave(mysqld, hookExtraEnv); err != nil { return fmt.Errorf("can't stop slave: %v", err) } var slaveStatus replication.Status slaveStatus, err = mysqld.SlaveStatus() if err != nil { return fmt.Errorf("can't get slave status: %v", err) } replicationPosition = slaveStatus.Position } logger.Infof("using replication position: %v", replicationPosition) // shutdown mysqld err = mysqld.Shutdown(ctx, true) if err != nil { return fmt.Errorf("can't shutdown mysqld: %v", err) } // get the files to backup fes, err := findFilesTobackup(mysqld.Cnf()) if err != nil { return fmt.Errorf("can't find files to backup: %v", err) } logger.Infof("found %v files to backup", len(fes)) // backup everything if err := backupFiles(mysqld, logger, bh, fes, replicationPosition, backupConcurrency); err != nil { return fmt.Errorf("can't backup files: %v", err) } // Try to restart mysqld err = mysqld.Start(ctx) if err != nil { return fmt.Errorf("can't restart mysqld: %v", err) } // Restore original mysqld state that we saved above. if semiSyncMaster || semiSyncSlave { // Only do this if one of them was on, since both being off could mean // the plugin isn't even loaded, and the server variables don't exist. logger.Infof("restoring semi-sync settings from before backup: master=%v, slave=%v", semiSyncMaster, semiSyncSlave) err := mysqld.SetSemiSyncEnabled(semiSyncMaster, semiSyncSlave) if err != nil { return err } } if slaveStartRequired { logger.Infof("restarting mysql replication") if err := StartSlave(mysqld, hookExtraEnv); err != nil { return fmt.Errorf("cannot restart slave: %v", err) } // this should be quick, but we might as well just wait if err := WaitForSlaveStart(mysqld, slaveStartDeadline); err != nil { return fmt.Errorf("slave is not restarting: %v", err) } } // And set read-only mode logger.Infof("resetting mysqld read-only to %v", readOnly) if err := mysqld.SetReadOnly(readOnly); err != nil { return err } return nil }
// Restore is the main entry point for backup restore. If there is no // appropriate backup on the BackupStorage, Restore logs an error // and returns ErrNoBackup. Any other error is returned. func Restore( ctx context.Context, mysqld MysqlDaemon, dir string, restoreConcurrency int, hookExtraEnv map[string]string, localMetadata map[string]string, logger logutil.Logger, deleteBeforeRestore bool) (replication.Position, error) { // find the right backup handle: most recent one, with a MANIFEST logger.Infof("Restore: looking for a suitable backup to restore") bs, err := backupstorage.GetBackupStorage() if err != nil { return replication.Position{}, err } defer bs.Close() bhs, err := bs.ListBackups(dir) if err != nil { return replication.Position{}, fmt.Errorf("ListBackups failed: %v", err) } var bh backupstorage.BackupHandle var bm BackupManifest var toRestore int for toRestore = len(bhs) - 1; toRestore >= 0; toRestore-- { bh = bhs[toRestore] rc, err := bh.ReadFile(backupManifest) if err != nil { log.Warningf("Possibly incomplete backup %v in directory %v on BackupStorage: can't read MANIFEST: %v)", bh.Name(), dir, err) continue } err = json.NewDecoder(rc).Decode(&bm) rc.Close() if err != nil { log.Warningf("Possibly incomplete backup %v in directory %v on BackupStorage (cannot JSON decode MANIFEST: %v)", bh.Name(), dir, err) continue } logger.Infof("Restore: found backup %v %v to restore with %v files", bh.Directory(), bh.Name(), len(bm.FileEntries)) break } if toRestore < 0 { logger.Errorf("No backup to restore on BackupStorage for directory %v. Starting up empty.", dir) if err = populateLocalMetadata(mysqld, localMetadata); err == nil { err = ErrNoBackup } return replication.Position{}, err } if !deleteBeforeRestore { logger.Infof("Restore: checking no existing data is present") ok, err := checkNoDB(ctx, mysqld) if err != nil { return replication.Position{}, err } if !ok { logger.Infof("Auto-restore is enabled, but mysqld already contains data. Assuming vttablet was just restarted.") if err = populateLocalMetadata(mysqld, localMetadata); err == nil { err = ErrExistingDB } return replication.Position{}, err } } logger.Infof("Restore: shutdown mysqld") err = mysqld.Shutdown(ctx, true) if err != nil { return replication.Position{}, err } logger.Infof("Restore: deleting existing files") if err := removeExistingFiles(mysqld.Cnf()); err != nil { return replication.Position{}, err } logger.Infof("Restore: reinit config file") err = mysqld.ReinitConfig(ctx) if err != nil { return replication.Position{}, err } logger.Infof("Restore: copying all files") if err := restoreFiles(mysqld.Cnf(), bh, bm.FileEntries, restoreConcurrency); err != nil { return replication.Position{}, err } // mysqld needs to be running in order for mysql_upgrade to work. // If we've just restored from a backup from previous MySQL version then mysqld // may fail to start due to a different structure of mysql.* tables. The flag // --skip-grant-tables ensures that these tables are not read until mysql_upgrade // is executed. And since with --skip-grant-tables anyone can connect to MySQL // without password, we are passing --skip-networking to greatly reduce the set // of those who can connect. logger.Infof("Restore: starting mysqld for mysql_upgrade") err = mysqld.Start(ctx, "--skip-grant-tables", "--skip-networking") if err != nil { return replication.Position{}, err } logger.Infof("Restore: running mysql_upgrade") if err := mysqld.RunMysqlUpgrade(); err != nil { return replication.Position{}, fmt.Errorf("mysql_upgrade failed: %v", err) } // Populate local_metadata before starting without --skip-networking, // so it's there before we start announcing ourselves. logger.Infof("Restore: populating local_metadata") err = populateLocalMetadata(mysqld, localMetadata) if err != nil { return replication.Position{}, err } // The MySQL manual recommends restarting mysqld after running mysql_upgrade, // so that any changes made to system tables take effect. logger.Infof("Restore: restarting mysqld after mysql_upgrade") err = mysqld.Shutdown(ctx, true) if err != nil { return replication.Position{}, err } err = mysqld.Start(ctx) if err != nil { return replication.Position{}, err } return bm.Position, nil }
// TableScan returns a QueryResultReader that gets all the rows from a // table, ordered by Primary Key. The returned columns are ordered // with the Primary Key columns in front. func TableScan(ctx context.Context, log logutil.Logger, ts topo.Server, tabletAlias topo.TabletAlias, tableDefinition *myproto.TableDefinition) (*QueryResultReader, error) { sql := fmt.Sprintf("SELECT %v FROM %v ORDER BY %v", strings.Join(orderedColumns(tableDefinition), ", "), tableDefinition.Name, strings.Join(tableDefinition.PrimaryKeyColumns, ", ")) log.Infof("SQL query for %v/%v: %v", tabletAlias, tableDefinition.Name, sql) return NewQueryResultReaderForTablet(ctx, ts, tabletAlias, sql) }
// Go runs the diff. If there is no error, it will drain both sides. // If an error occurs, it will just return it and stop. func (rd *RowSubsetDiffer) Go(log logutil.Logger) (dr DiffReport, err error) { dr.startingTime = time.Now() defer dr.ComputeQPS() var superset []sqltypes.Value var subset []sqltypes.Value advanceSuperset := true advanceSubset := true for { if advanceSuperset { superset, err = rd.superset.Next() if err != nil { return } advanceSuperset = false } if advanceSubset { subset, err = rd.subset.Next() if err != nil { return } advanceSubset = false } dr.processedRows++ if superset == nil { // no more rows from the superset if subset == nil { // no more rows from subset either, we're done return } // drain subset, update count if count, err := rd.subset.Drain(); err != nil { return dr, err } else { dr.extraRowsRight += 1 + count } return } if subset == nil { // no more rows from the subset // we know we have rows from superset, drain if _, err := rd.superset.Drain(); err != nil { return dr, err } return } // we have both superset and subset, compare f := RowsEqual(superset, subset) if f == -1 { // rows are the same, next dr.matchingRows++ advanceSuperset = true advanceSubset = true continue } if f >= rd.pkFieldCount { // rows have the same primary key, only content is different if dr.mismatchedRows < 10 { log.Errorf("Different content %v in same PK: %v != %v", dr.mismatchedRows, superset, subset) } dr.mismatchedRows++ advanceSuperset = true advanceSubset = true continue } // have to find the 'smallest' raw and advance it c, err := CompareRows(rd.superset.Fields(), rd.pkFieldCount, superset, subset) if err != nil { return dr, err } if c < 0 { advanceSuperset = true continue } else if c > 0 { if dr.extraRowsRight < 10 { log.Errorf("Extra row %v on subset: %v", dr.extraRowsRight, subset) } dr.extraRowsRight++ advanceSubset = true continue } // After looking at primary keys more carefully, // they're the same. Logging a regular difference // then, and advancing both. if dr.mismatchedRows < 10 { log.Errorf("Different content %v in same PK: %v != %v", dr.mismatchedRows, superset, subset) } dr.mismatchedRows++ advanceSuperset = true advanceSubset = true } }
// rebuildCellSrvShard computes and writes the serving graph data to a // single cell func rebuildCellSrvShard(log logutil.Logger, ts topo.Server, shardInfo *topo.ShardInfo, cell string, tablets map[topo.TabletAlias]*topo.TabletInfo) error { log.Infof("rebuildCellSrvShard %v/%v in cell %v", shardInfo.Keyspace(), shardInfo.ShardName(), cell) // Get all existing db types so they can be removed if nothing // had been edited. existingTabletTypes, err := ts.GetSrvTabletTypesPerShard(cell, shardInfo.Keyspace(), shardInfo.ShardName()) if err != nil { if err != topo.ErrNoNode { return err } } // Update db type addresses in the serving graph // // locationAddrsMap is a map: // key: tabletType // value: EndPoints (list of server records) locationAddrsMap := make(map[topo.TabletType]*topo.EndPoints) for _, tablet := range tablets { if !tablet.IsInReplicationGraph() { // only valid case is a scrapped master in the // catastrophic reparent case if tablet.Parent.Uid != topo.NO_TABLET { log.Warningf("Tablet %v should not be in the replication graph, please investigate (it is being ignored in the rebuild)", tablet.Alias) } continue } // Check IsInServingGraph, we don't want to add tablets that // are not serving if !tablet.IsInServingGraph() { continue } // Check the Keyspace and Shard for the tablet are right if tablet.Keyspace != shardInfo.Keyspace() || tablet.Shard != shardInfo.ShardName() { return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v", tablet.Alias, shardInfo.Keyspace(), shardInfo.ShardName(), tablet.Keyspace, tablet.Shard) } // Add the tablet to the list addrs, ok := locationAddrsMap[tablet.Type] if !ok { addrs = topo.NewEndPoints() locationAddrsMap[tablet.Type] = addrs } entry, err := tablet.Tablet.EndPoint() if err != nil { log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err) continue } addrs.Entries = append(addrs.Entries, *entry) } // we're gonna parallelize a lot here: // - writing all the tabletTypes records // - removing the unused records // - writing SrvShard rec := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} // write all the EndPoints nodes everywhere we want them for tabletType, addrs := range locationAddrsMap { wg.Add(1) go func(tabletType topo.TabletType, addrs *topo.EndPoints) { log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType) if err := ts.UpdateEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, addrs); err != nil { rec.RecordError(fmt.Errorf("writing endpoints for cell %v shard %v/%v tabletType %v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, err)) } wg.Done() }(tabletType, addrs) } // Delete any pre-existing paths that were not updated by this process. // That's the existingTabletTypes - locationAddrsMap for _, tabletType := range existingTabletTypes { if _, ok := locationAddrsMap[tabletType]; !ok { wg.Add(1) go func(tabletType topo.TabletType) { log.Infof("removing stale db type from serving graph: %v", tabletType) if err := ts.DeleteEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType); err != nil { log.Warningf("unable to remove stale db type %v from serving graph: %v", tabletType, err) } wg.Done() }(tabletType) } } // Update srvShard object wg.Add(1) go func() { log.Infof("updating shard serving graph in cell %v for %v/%v", cell, shardInfo.Keyspace(), shardInfo.ShardName()) srvShard := &topo.SrvShard{ Name: shardInfo.ShardName(), KeyRange: shardInfo.KeyRange, ServedTypes: shardInfo.ServedTypes, MasterCell: shardInfo.MasterAlias.Cell, TabletTypes: make([]topo.TabletType, 0, len(locationAddrsMap)), } for tabletType := range locationAddrsMap { srvShard.TabletTypes = append(srvShard.TabletTypes, tabletType) } if err := ts.UpdateSrvShard(cell, shardInfo.Keyspace(), shardInfo.ShardName(), srvShard); err != nil { rec.RecordError(fmt.Errorf("writing serving data in cell %v for %v/%v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), err)) } wg.Done() }() wg.Wait() return rec.Error() }
// Update shard file with new master, replicas, etc. // // Re-read from TopologyServer to make sure we are using the side // effects of all actions. // // This function locks individual SvrShard paths, so it doesn't need a lock // on the shard. func RebuildShard(log logutil.Logger, ts topo.Server, keyspace, shard string, cells []string, timeout time.Duration, interrupted chan struct{}) error { log.Infof("RebuildShard %v/%v", keyspace, shard) // read the existing shard info. It has to exist. shardInfo, err := ts.GetShard(keyspace, shard) if err != nil { return err } // rebuild all cells in parallel wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, cell := range shardInfo.Cells { // skip this cell if we shouldn't rebuild it if !topo.InCellList(cell, cells) { continue } // start with the master if it's in the current cell tabletsAsMap := make(map[topo.TabletAlias]bool) if shardInfo.MasterAlias.Cell == cell { tabletsAsMap[shardInfo.MasterAlias] = true } wg.Add(1) go func(cell string) { defer wg.Done() // read the ShardReplication object to find tablets sri, err := ts.GetShardReplication(cell, keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v) failed: %v", cell, keyspace, shard, err)) return } // add all relevant tablets to the map for _, rl := range sri.ReplicationLinks { tabletsAsMap[rl.TabletAlias] = true if rl.Parent.Cell == cell { tabletsAsMap[rl.Parent] = true } } // convert the map to a list aliases := make([]topo.TabletAlias, 0, len(tabletsAsMap)) for a := range tabletsAsMap { aliases = append(aliases, a) } // read all the Tablet records tablets, err := topo.GetTabletMap(ts, aliases) switch err { case nil: // keep going, we're good case topo.ErrPartialResult: log.Warningf("Got ErrPartialResult from topo.GetTabletMap in cell %v, some tablets may not be added properly to serving graph", cell) default: rec.RecordError(fmt.Errorf("GetTabletMap in cell %v failed: %v", cell, err)) return } // Lock the SrvShard so we write a consistent data set. actionNode := actionnode.RebuildSrvShard() lockPath, err := actionNode.LockSrvShard(ts, cell, keyspace, shard, timeout, interrupted) if err != nil { rec.RecordError(err) return } // write the data we need to rebuildErr := rebuildCellSrvShard(log, ts, shardInfo, cell, tablets) // and unlock if err := actionNode.UnlockSrvShard(ts, cell, keyspace, shard, lockPath, rebuildErr); err != nil { rec.RecordError(err) } }(cell) } wg.Wait() return rec.Error() }
// rebuildCellSrvShard computes and writes the serving graph data to a // single cell func rebuildCellSrvShard(ctx context.Context, log logutil.Logger, ts topo.Server, si *topo.ShardInfo, cell string) (err error) { log.Infof("rebuildCellSrvShard %v/%v in cell %v", si.Keyspace(), si.ShardName(), cell) for { select { case <-ctx.Done(): return ctx.Err() default: } // Read existing EndPoints node versions, so we know if any // changes sneak in after we read the tablets. versions, err := getEndPointsVersions(ctx, ts, cell, si.Keyspace(), si.ShardName()) // Get all tablets in this cell/shard. tablets, err := ts.GetTabletMapForShardByCell(ctx, si.Keyspace(), si.ShardName(), []string{cell}) if err != nil { if err != topo.ErrPartialResult { return err } log.Warningf("Got ErrPartialResult from topo.GetTabletMapForShardByCell(%v), some tablets may not be added properly to serving graph", cell) } // Build up the serving graph from scratch. serving := make(map[pb.TabletType]*pb.EndPoints) for _, tablet := range tablets { // Only add serving types. if !tablet.IsInServingGraph() { continue } // Check the Keyspace and Shard for the tablet are right. if tablet.Keyspace != si.Keyspace() || tablet.Shard != si.ShardName() { return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v", tablet.Alias, si.Keyspace(), si.ShardName(), tablet.Keyspace, tablet.Shard) } // Add the tablet to the list. endpoints, ok := serving[tablet.Type] if !ok { endpoints = topo.NewEndPoints() serving[tablet.Type] = endpoints } entry, err := topo.TabletEndPoint(tablet.Tablet) if err != nil { log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err) continue } endpoints.Entries = append(endpoints.Entries, entry) } wg := sync.WaitGroup{} fatalErrs := concurrency.AllErrorRecorder{} retryErrs := concurrency.AllErrorRecorder{} // Write nodes that should exist. for tabletType, endpoints := range serving { wg.Add(1) go func(tabletType pb.TabletType, endpoints *pb.EndPoints) { defer wg.Done() log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", cell, si.Keyspace(), si.ShardName(), tabletType) version, ok := versions[tabletType] if !ok { // This type didn't exist when we first checked. // Try to create, but only if it still doesn't exist. if err := ts.CreateEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, endpoints); err != nil { log.Warningf("CreateEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrNodeExists: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } return } // Update only if the version matches. if err := ts.UpdateEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, endpoints, version); err != nil { log.Warningf("UpdateEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrBadVersion, topo.ErrNoNode: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } }(tabletType, endpoints) } // Delete nodes that shouldn't exist. for tabletType, version := range versions { if _, ok := serving[tabletType]; !ok { wg.Add(1) go func(tabletType pb.TabletType, version int64) { defer wg.Done() log.Infof("removing stale db type from serving graph: %v", tabletType) if err := ts.DeleteEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, version); err != nil && err != topo.ErrNoNode { log.Warningf("DeleteEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrNoNode: // Someone else deleted it, which is fine. case topo.ErrBadVersion: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } }(tabletType, version) } } // Update srvShard object wg.Add(1) go func() { defer wg.Done() log.Infof("updating shard serving graph in cell %v for %v/%v", cell, si.Keyspace(), si.ShardName()) if err := UpdateSrvShard(ctx, ts, cell, si); err != nil { fatalErrs.RecordError(err) log.Warningf("writing serving data in cell %v for %v/%v failed: %v", cell, si.Keyspace(), si.ShardName(), err) } }() wg.Wait() // If there are any fatal errors, give up. if fatalErrs.HasErrors() { return fatalErrs.Error() } // If there are any retry errors, try again. if retryErrs.HasErrors() { continue } // Otherwise, success! return nil } }
func logStuff(logger logutil.Logger, count int) { for i := 0; i < count; i++ { logger.Infof(testLogString) } }
// TableScanByKeyRange returns a QueryResultReader that gets all the // rows from a table that match the supplied KeyRange, ordered by // Primary Key. The returned columns are ordered with the Primary Key // columns in front. // If keyspaceSchema is passed in, we go into v3 mode, and we ask for all // source data, and filter here. Otherwise we stick with v2 mode, where we can // ask the source tablet to do the filtering. func TableScanByKeyRange(ctx context.Context, log logutil.Logger, ts topo.Server, tabletAlias *topodatapb.TabletAlias, tableDefinition *tabletmanagerdatapb.TableDefinition, keyRange *topodatapb.KeyRange, keyspaceSchema *vindexes.KeyspaceSchema, shardingColumnName string, shardingColumnType topodatapb.KeyspaceIdType) (*QueryResultReader, error) { if keyspaceSchema != nil { // switch to v3 mode. keyResolver, err := newV3ResolverFromColumnList(keyspaceSchema, tableDefinition.Name, orderedColumns(tableDefinition)) if err != nil { return nil, fmt.Errorf("cannot resolve v3 sharding keys for table %v: %v", tableDefinition.Name, err) } // full table scan scan, err := TableScan(ctx, log, ts, tabletAlias, tableDefinition) if err != nil { return nil, err } // with extra filter scan.output = &v3KeyRangeFilter{ input: scan.output, resolver: keyResolver.(*v3Resolver), keyRange: keyRange, } return scan, nil } // in v2 mode, we can do the filtering at the source where := "" switch shardingColumnType { case topodatapb.KeyspaceIdType_UINT64: if len(keyRange.Start) > 0 { if len(keyRange.End) > 0 { // have start & end where = fmt.Sprintf("WHERE %v >= %v AND %v < %v ", escape(shardingColumnName), uint64FromKeyspaceID(keyRange.Start), escape(shardingColumnName), uint64FromKeyspaceID(keyRange.End)) } else { // have start only where = fmt.Sprintf("WHERE %v >= %v ", escape(shardingColumnName), uint64FromKeyspaceID(keyRange.Start)) } } else { if len(keyRange.End) > 0 { // have end only where = fmt.Sprintf("WHERE %v < %v ", escape(shardingColumnName), uint64FromKeyspaceID(keyRange.End)) } } case topodatapb.KeyspaceIdType_BYTES: if len(keyRange.Start) > 0 { if len(keyRange.End) > 0 { // have start & end where = fmt.Sprintf("WHERE HEX(%v) >= '%v' AND HEX(%v) < '%v' ", escape(shardingColumnName), hex.EncodeToString(keyRange.Start), escape(shardingColumnName), hex.EncodeToString(keyRange.End)) } else { // have start only where = fmt.Sprintf("WHERE HEX(%v) >= '%v' ", escape(shardingColumnName), hex.EncodeToString(keyRange.Start)) } } else { if len(keyRange.End) > 0 { // have end only where = fmt.Sprintf("WHERE HEX(%v) < '%v' ", escape(shardingColumnName), hex.EncodeToString(keyRange.End)) } } default: return nil, fmt.Errorf("Unsupported ShardingColumnType: %v", shardingColumnType) } sql := fmt.Sprintf("SELECT %v FROM %v %vORDER BY %v", strings.Join(escapeAll(orderedColumns(tableDefinition)), ", "), escape(tableDefinition.Name), where, strings.Join(escapeAll(tableDefinition.PrimaryKeyColumns), ", ")) log.Infof("SQL query for %v/%v: %v", topoproto.TabletAliasString(tabletAlias), tableDefinition.Name, sql) return NewQueryResultReaderForTablet(ctx, ts, tabletAlias, sql) }