// FixShardReplication will fix the first problem it encounters within // a ShardReplication object func FixShardReplication(ctx context.Context, ts Server, logger logutil.Logger, cell, keyspace, shard string) error { sri, err := ts.GetShardReplication(ctx, cell, keyspace, shard) if err != nil { return err } for _, node := range sri.Nodes { ti, err := ts.GetTablet(ctx, node.TabletAlias) if err == ErrNoNode { logger.Warningf("Tablet %v is in the replication graph, but does not exist, removing it", node.TabletAlias) return RemoveShardReplicationRecord(ctx, ts, cell, keyspace, shard, node.TabletAlias) } if err != nil { // unknown error, we probably don't want to continue return err } if ti.Type == pb.TabletType_SCRAP { logger.Warningf("Tablet %v is in the replication graph, but is scrapped, removing it", node.TabletAlias) return RemoveShardReplicationRecord(ctx, ts, cell, keyspace, shard, node.TabletAlias) } logger.Infof("Keeping tablet %v in the replication graph", node.TabletAlias) } logger.Infof("All entries in replication graph are valid") return nil }
// NewRestartableResultReader creates a new RestartableResultReader for // the provided tablet and chunk. // It will automatically create the necessary query to read all rows within // the chunk. // NOTE: We assume that the Columns field in "td" was ordered by a preceding // call to reorderColumnsPrimaryKeyFirst(). func NewRestartableResultReader(ctx context.Context, logger logutil.Logger, ts topo.Server, tabletAlias *topodatapb.TabletAlias, td *tabletmanagerdatapb.TableDefinition, chunk chunk) (*RestartableResultReader, error) { shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) tablet, err := ts.GetTablet(shortCtx, tabletAlias) cancel() if err != nil { return nil, fmt.Errorf("tablet=%v table=%v chunk=%v: Failed to resolve tablet alias: %v", topoproto.TabletAliasString(tabletAlias), td.Name, chunk, err) } conn, err := tabletconn.GetDialer()(tablet.Tablet, *remoteActionsTimeout) if err != nil { return nil, fmt.Errorf("tablet=%v table=%v chunk=%v: Failed to get dialer for tablet: %v", topoproto.TabletAliasString(tabletAlias), td.Name, chunk, err) } r := &RestartableResultReader{ ctx: ctx, logger: logger, tablet: tablet.Tablet, td: td, chunk: chunk, conn: conn, } if err := r.startStream(); err != nil { return nil, err } logger.Infof("tablet=%v table=%v chunk=%v: Starting to stream rows using query '%v'.", topoproto.TabletAliasString(tabletAlias), td.Name, chunk, r.query) return r, nil }
// RebuildShard updates the SrvShard objects and underlying serving graph. // // Re-read from TopologyServer to make sure we are using the side // effects of all actions. // // This function will start each cell over from the beginning on ErrBadVersion, // so it doesn't need a lock on the shard. func RebuildShard(ctx context.Context, log logutil.Logger, ts topo.Server, keyspace, shard string, cells []string, lockTimeout time.Duration) (*topo.ShardInfo, error) { log.Infof("RebuildShard %v/%v", keyspace, shard) span := trace.NewSpanFromContext(ctx) span.StartLocal("topotools.RebuildShard") defer span.Finish() ctx = trace.NewContext(ctx, span) // read the existing shard info. It has to exist. shardInfo, err := ts.GetShard(ctx, keyspace, shard) if err != nil { return nil, err } // rebuild all cells in parallel wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, cell := range shardInfo.Cells { // skip this cell if we shouldn't rebuild it if !topo.InCellList(cell, cells) { continue } wg.Add(1) go func(cell string) { defer wg.Done() rec.RecordError(rebuildCellSrvShard(ctx, log, ts, shardInfo, cell)) }(cell) } wg.Wait() return shardInfo, rec.Error() }
// MakeSplitCreateTableSql returns a table creation statement // that is modified to be faster, and the associated optional // 'alter table' to modify the table at the end. // - If the strategy contains the string 'skipAutoIncrement(NNN)' then // we do not re-add the auto_increment on that table. // - If the strategy contains the string 'delaySecondaryIndexes', // then non-primary key indexes will be added afterwards. // - If the strategy contains the string 'useMyIsam' we load // the data into a myisam table and we then convert to innodb // - If the strategy contains the string 'delayPrimaryKey', // then the primary key index will be added afterwards (use with useMyIsam) func MakeSplitCreateTableSql(logger logutil.Logger, schema, databaseName, tableName string, strategy string) (string, string, error) { alters := make([]string, 0, 5) lines := strings.Split(schema, "\n") delayPrimaryKey := strings.Contains(strategy, "delayPrimaryKey") delaySecondaryIndexes := strings.Contains(strategy, "delaySecondaryIndexes") useMyIsam := strings.Contains(strategy, "useMyIsam") for i, line := range lines { if strings.HasPrefix(line, "CREATE TABLE `") { lines[i] = strings.Replace(line, "CREATE TABLE `", "CREATE TABLE `"+databaseName+"`.`", 1) continue } if strings.Contains(line, " AUTO_INCREMENT") { // only add to the final ALTER TABLE if we're not // dropping the AUTO_INCREMENT on the table if strings.Contains(strategy, "skipAutoIncrement("+tableName+")") { logger.Infof("Will not add AUTO_INCREMENT back on table %v", tableName) } else { alters = append(alters, "MODIFY "+line[:len(line)-1]) } lines[i] = strings.Replace(line, " AUTO_INCREMENT", "", 1) continue } isPrimaryKey := strings.Contains(line, " PRIMARY KEY") isSecondaryIndex := !isPrimaryKey && strings.Contains(line, " KEY") if (isPrimaryKey && delayPrimaryKey) || (isSecondaryIndex && delaySecondaryIndexes) { // remove the comma at the end of the previous line, lines[i-1] = lines[i-1][:len(lines[i-1])-1] // keep our comma if any (so the next index // might remove it) // also add the key definition to the alters if strings.HasSuffix(line, ",") { lines[i] = "," alters = append(alters, "ADD "+line[:len(line)-1]) } else { lines[i] = "" alters = append(alters, "ADD "+line) } } if useMyIsam && strings.Contains(line, " ENGINE=InnoDB") { lines[i] = strings.Replace(line, " ENGINE=InnoDB", " ENGINE=MyISAM", 1) alters = append(alters, "ENGINE=InnoDB") } } alter := "" if len(alters) > 0 { alter = "ALTER TABLE `" + databaseName + "`.`" + tableName + "` " + strings.Join(alters, ", ") } return strings.Join(lines, "\n"), alter, nil }
// TableScanByKeyRange returns a QueryResultReader that gets all the // rows from a table that match the supplied KeyRange, ordered by // Primary Key. The returned columns are ordered with the Primary Key // columns in front. func TableScanByKeyRange(ctx context.Context, log logutil.Logger, ts topo.Server, tabletAlias *topodatapb.TabletAlias, tableDefinition *tabletmanagerdatapb.TableDefinition, keyRange *topodatapb.KeyRange, shardingColumnName string, shardingColumnType topodatapb.KeyspaceIdType) (*QueryResultReader, error) { where := "" // TODO(aaijazi): this currently only works with V2 style sharding keys. // We should add FilteredQueryResultReader that will do a full table scan, with client-side // filtering to remove rows that don't map to the keyrange that we want. if keyRange != nil { switch shardingColumnType { case topodatapb.KeyspaceIdType_UINT64: if len(keyRange.Start) > 0 { if len(keyRange.End) > 0 { // have start & end where = fmt.Sprintf("WHERE %v >= %v AND %v < %v ", shardingColumnName, uint64FromKeyspaceID(keyRange.Start), shardingColumnName, uint64FromKeyspaceID(keyRange.End)) } else { // have start only where = fmt.Sprintf("WHERE %v >= %v ", shardingColumnName, uint64FromKeyspaceID(keyRange.Start)) } } else { if len(keyRange.End) > 0 { // have end only where = fmt.Sprintf("WHERE %v < %v ", shardingColumnName, uint64FromKeyspaceID(keyRange.End)) } } case topodatapb.KeyspaceIdType_BYTES: if len(keyRange.Start) > 0 { if len(keyRange.End) > 0 { // have start & end where = fmt.Sprintf("WHERE HEX(%v) >= '%v' AND HEX(%v) < '%v' ", shardingColumnName, hex.EncodeToString(keyRange.Start), shardingColumnName, hex.EncodeToString(keyRange.End)) } else { // have start only where = fmt.Sprintf("WHERE HEX(%v) >= '%v' ", shardingColumnName, hex.EncodeToString(keyRange.Start)) } } else { if len(keyRange.End) > 0 { // have end only where = fmt.Sprintf("WHERE HEX(%v) < '%v' ", shardingColumnName, hex.EncodeToString(keyRange.End)) } } default: return nil, fmt.Errorf("Unsupported ShardingColumnType: %v", shardingColumnType) } } sql := fmt.Sprintf("SELECT %v FROM %v %vORDER BY %v", strings.Join(orderedColumns(tableDefinition), ", "), tableDefinition.Name, where, strings.Join(tableDefinition.PrimaryKeyColumns, ", ")) log.Infof("SQL query for %v/%v: %v", topoproto.TabletAliasString(tabletAlias), tableDefinition.Name, sql) return NewQueryResultReaderForTablet(ctx, ts, tabletAlias, sql) }
// TableScanByKeyRange returns a QueryResultReader that gets all the // rows from a table that match the supplied KeyRange, ordered by // Primary Key. The returned columns are ordered with the Primary Key // columns in front. func TableScanByKeyRange(ctx context.Context, log logutil.Logger, ts topo.Server, tabletAlias *topodatapb.TabletAlias, tableDefinition *tabletmanagerdatapb.TableDefinition, keyRange *topodatapb.KeyRange, keyspaceIDType topodatapb.KeyspaceIdType) (*QueryResultReader, error) { where := "" if keyRange != nil { switch keyspaceIDType { case topodatapb.KeyspaceIdType_UINT64: if len(keyRange.Start) > 0 { if len(keyRange.End) > 0 { // have start & end where = fmt.Sprintf("WHERE keyspace_id >= %v AND keyspace_id < %v ", uint64FromKeyspaceID(keyRange.Start), uint64FromKeyspaceID(keyRange.End)) } else { // have start only where = fmt.Sprintf("WHERE keyspace_id >= %v ", uint64FromKeyspaceID(keyRange.Start)) } } else { if len(keyRange.End) > 0 { // have end only where = fmt.Sprintf("WHERE keyspace_id < %v ", uint64FromKeyspaceID(keyRange.End)) } } case topodatapb.KeyspaceIdType_BYTES: if len(keyRange.Start) > 0 { if len(keyRange.End) > 0 { // have start & end where = fmt.Sprintf("WHERE HEX(keyspace_id) >= '%v' AND HEX(keyspace_id) < '%v' ", hex.EncodeToString(keyRange.Start), hex.EncodeToString(keyRange.End)) } else { // have start only where = fmt.Sprintf("WHERE HEX(keyspace_id) >= '%v' ", hex.EncodeToString(keyRange.Start)) } } else { if len(keyRange.End) > 0 { // have end only where = fmt.Sprintf("WHERE HEX(keyspace_id) < '%v' ", hex.EncodeToString(keyRange.End)) } } default: return nil, fmt.Errorf("Unsupported KeyspaceIdType: %v", keyspaceIDType) } } sql := fmt.Sprintf("SELECT %v FROM %v %vORDER BY %v", strings.Join(orderedColumns(tableDefinition), ", "), tableDefinition.Name, where, strings.Join(tableDefinition.PrimaryKeyColumns, ", ")) log.Infof("SQL query for %v/%v: %v", topoproto.TabletAliasString(tabletAlias), tableDefinition.Name, sql) return NewQueryResultReaderForTablet(ctx, ts, tabletAlias, sql) }
// RestartSlavesExternal will tell all the slaves in the provided list // that they have a new master, and also tell all the masters. The // masters will be scrapped if they don't answer. // We execute all the actions in parallel. func RestartSlavesExternal(ts topo.Server, log logutil.Logger, slaveTabletMap, masterTabletMap map[topo.TabletAlias]*topo.TabletInfo, masterElectTabletAlias topo.TabletAlias, slaveWasRestarted func(*topo.TabletInfo, *actionnode.SlaveWasRestartedArgs) error) { wg := sync.WaitGroup{} swrd := actionnode.SlaveWasRestartedArgs{ Parent: masterElectTabletAlias, } log.Infof("Updating individual tablets with the right master...") // do all the slaves for _, ti := range slaveTabletMap { wg.Add(1) go func(ti *topo.TabletInfo) { if err := slaveWasRestarted(ti, &swrd); err != nil { log.Warningf("Slave %v had an error: %v", ti.Alias, err) } wg.Done() }(ti) } // and do the old master and any straggler, if possible. for _, ti := range masterTabletMap { wg.Add(1) go func(ti *topo.TabletInfo) { err := slaveWasRestarted(ti, &swrd) if err != nil { // the old master can be annoying if left // around in the replication graph, so if we // can't restart it, we just scrap it. // We don't rebuild the Shard just yet though. log.Warningf("Old master %v is not restarting in time, forcing it to spare: %v", ti.Alias, err) ti.Type = topo.TYPE_SPARE ti.Parent = masterElectTabletAlias if err := topo.UpdateTablet(ts, ti); err != nil { log.Warningf("Failed to change old master %v to spare: %v", ti.Alias, err) } } wg.Done() }(ti) } wg.Wait() }
// TableScanByKeyRange returns a QueryResultReader that gets all the // rows from a table that match the supplied KeyRange, ordered by // Primary Key. The returned columns are ordered with the Primary Key // columns in front. func TableScanByKeyRange(ctx context.Context, log logutil.Logger, ts topo.Server, tabletAlias topo.TabletAlias, tableDefinition *myproto.TableDefinition, keyRange key.KeyRange, keyspaceIdType key.KeyspaceIdType) (*QueryResultReader, error) { where := "" switch keyspaceIdType { case key.KIT_UINT64: if keyRange.Start != key.MinKey { if keyRange.End != key.MaxKey { // have start & end where = fmt.Sprintf("WHERE keyspace_id >= %v AND keyspace_id < %v ", uint64FromKeyspaceId(keyRange.Start), uint64FromKeyspaceId(keyRange.End)) } else { // have start only where = fmt.Sprintf("WHERE keyspace_id >= %v ", uint64FromKeyspaceId(keyRange.Start)) } } else { if keyRange.End != key.MaxKey { // have end only where = fmt.Sprintf("WHERE keyspace_id < %v ", uint64FromKeyspaceId(keyRange.End)) } } case key.KIT_BYTES: if keyRange.Start != key.MinKey { if keyRange.End != key.MaxKey { // have start & end where = fmt.Sprintf("WHERE HEX(keyspace_id) >= '%v' AND HEX(keyspace_id) < '%v' ", keyRange.Start.Hex(), keyRange.End.Hex()) } else { // have start only where = fmt.Sprintf("WHERE HEX(keyspace_id) >= '%v' ", keyRange.Start.Hex()) } } else { if keyRange.End != key.MaxKey { // have end only where = fmt.Sprintf("WHERE HEX(keyspace_id) < '%v' ", keyRange.End.Hex()) } } default: return nil, fmt.Errorf("Unsupported KeyspaceIdType: %v", keyspaceIdType) } sql := fmt.Sprintf("SELECT %v FROM %v %vORDER BY %v", strings.Join(orderedColumns(tableDefinition), ", "), tableDefinition.Name, where, strings.Join(tableDefinition.PrimaryKeyColumns, ", ")) log.Infof("SQL query for %v/%v: %v", tabletAlias, tableDefinition.Name, sql) return NewQueryResultReaderForTablet(ctx, ts, tabletAlias, sql) }
// This piece runs on the presumably empty machine acting as the target in the // create replica action. // // validate target (self) // shutdown_mysql() // create temp data directory /vt/target/vt_<keyspace> // copy compressed data files via HTTP // verify hash of compressed files // uncompress into /vt/vt_<target-uid>/data/vt_<keyspace> // start_mysql() // clean up compressed files func (mysqld *Mysqld) RestoreFromSnapshot(logger logutil.Logger, snapshotManifest *SnapshotManifest, fetchConcurrency, fetchRetryCount int, dontWaitForSlaveStart bool, hookExtraEnv map[string]string) error { if snapshotManifest == nil { return errors.New("RestoreFromSnapshot: nil snapshotManifest") } logger.Infof("ValidateCloneTarget") if err := mysqld.ValidateCloneTarget(hookExtraEnv); err != nil { return err } logger.Infof("Shutdown mysqld") if err := mysqld.Shutdown(true, MysqlWaitTime); err != nil { return err } logger.Infof("Fetch snapshot") if err := mysqld.fetchSnapshot(snapshotManifest, fetchConcurrency, fetchRetryCount); err != nil { return err } logger.Infof("Restart mysqld") if err := mysqld.Start(MysqlWaitTime); err != nil { return err } cmdList, err := mysqld.StartReplicationCommands(snapshotManifest.ReplicationStatus) if err != nil { return err } if err := mysqld.ExecuteSuperQueryList(cmdList); err != nil { return err } if !dontWaitForSlaveStart { if err := mysqld.WaitForSlaveStart(SlaveStartDeadline); err != nil { return err } } h := hook.NewSimpleHook("postflight_restore") h.ExtraEnv = hookExtraEnv if err := h.ExecuteOptional(); err != nil { return err } return nil }
// RestartSlavesExternal will tell all the slaves in the provided list // that they have a new master, and also tell all the masters. The // masters will be scrapped if they don't answer. // We execute all the actions in parallel. func RestartSlavesExternal(ts topo.Server, log logutil.Logger, slaveTabletMap, masterTabletMap map[topodatapb.TabletAlias]*topo.TabletInfo, masterElectTabletAlias *topodatapb.TabletAlias, slaveWasRestarted func(*topo.TabletInfo, *topodatapb.TabletAlias) error) { wg := sync.WaitGroup{} log.Infof("Updating individual tablets with the right master...") // do all the slaves for _, ti := range slaveTabletMap { wg.Add(1) go func(ti *topo.TabletInfo) { if err := slaveWasRestarted(ti, masterElectTabletAlias); err != nil { log.Warningf("Slave %v had an error: %v", ti.Alias, err) } wg.Done() }(ti) } // and do the old master and any straggler, if possible. for _, ti := range masterTabletMap { wg.Add(1) go func(ti *topo.TabletInfo) { err := slaveWasRestarted(ti, masterElectTabletAlias) if err != nil { // the old master can be annoying if left // around in the replication graph, so if we // can't restart it, we just make it spare. log.Warningf("Old master %v is not restarting in time, forcing it to spare: %v", ti.Alias, err) ti.Type = topodatapb.TabletType_SPARE if err := ts.UpdateTablet(context.TODO(), ti); err != nil { log.Warningf("Failed to change old master %v to spare: %v", ti.Alias, err) } } wg.Done() }(ti) } wg.Wait() }
func (mysqld *Mysqld) restoreAfterSnapshot(logger logutil.Logger, slaveStartRequired, readOnly bool, hookExtraEnv map[string]string, connToRelease dbconnpool.PoolConnection) (err error) { // Try to fix mysqld regardless of snapshot success.. logger.Infof("exec UNLOCK TABLES") _, err = connToRelease.ExecuteFetch("UNLOCK TABLES", 10000, false) connToRelease.Recycle() if err != nil { return fmt.Errorf("failed to UNLOCK TABLES: %v", err) } // restore original mysqld state that we saved above if slaveStartRequired { if err = mysqld.StartSlave(hookExtraEnv); err != nil { return } // this should be quick, but we might as well just wait if err = mysqld.WaitForSlaveStart(5); err != nil { return } } if err = mysqld.SetReadOnly(readOnly); err != nil { return } return nil }
func (mysqld *Mysqld) prepareToSnapshot(logger logutil.Logger, allowHierarchicalReplication bool, hookExtraEnv map[string]string) (slaveStartRequired, readOnly bool, replicationPosition, myMasterPosition proto.ReplicationPosition, masterAddr string, connToRelease dbconnpool.PoolConnection, err error) { // save initial state so we can restore on Start() if slaveStatus, slaveErr := mysqld.SlaveStatus(); slaveErr == nil { slaveStartRequired = slaveStatus.SlaveRunning() } // For masters, set read-only so we don't write anything during snapshot readOnly = true if readOnly, err = mysqld.IsReadOnly(); err != nil { return } logger.Infof("Set Read Only") if !readOnly { mysqld.SetReadOnly(true) } logger.Infof("Stop Slave") if err = mysqld.StopSlave(hookExtraEnv); err != nil { return } // Get the replication position and master addr replicationPosition, masterAddr, err = mysqld.getReplicationPositionForClones(allowHierarchicalReplication) if err != nil { return } // get our master position, some targets may use it myMasterPosition, err = mysqld.MasterPosition() if err != nil && err != ErrNotMaster { // this is a real error return } logger.Infof("Flush tables") if connToRelease, err = mysqld.dbaPool.Get(0); err != nil { return } logger.Infof("exec FLUSH TABLES WITH READ LOCK") if _, err = connToRelease.ExecuteFetch("FLUSH TABLES WITH READ LOCK", 10000, false); err != nil { connToRelease.Recycle() return } return }
// Update shard file with new master, replicas, etc. // // Re-read from TopologyServer to make sure we are using the side // effects of all actions. // // This function locks individual SvrShard paths, so it doesn't need a lock // on the shard. func RebuildShard(log logutil.Logger, ts topo.Server, keyspace, shard string, cells []string, timeout time.Duration, interrupted chan struct{}) error { log.Infof("RebuildShard %v/%v", keyspace, shard) // read the existing shard info. It has to exist. shardInfo, err := ts.GetShard(keyspace, shard) if err != nil { return err } // rebuild all cells in parallel wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, cell := range shardInfo.Cells { // skip this cell if we shouldn't rebuild it if !topo.InCellList(cell, cells) { continue } // start with the master if it's in the current cell tabletsAsMap := make(map[topo.TabletAlias]bool) if shardInfo.MasterAlias.Cell == cell { tabletsAsMap[shardInfo.MasterAlias] = true } wg.Add(1) go func(cell string) { defer wg.Done() // read the ShardReplication object to find tablets sri, err := ts.GetShardReplication(cell, keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v) failed: %v", cell, keyspace, shard, err)) return } // add all relevant tablets to the map for _, rl := range sri.ReplicationLinks { tabletsAsMap[rl.TabletAlias] = true if rl.Parent.Cell == cell { tabletsAsMap[rl.Parent] = true } } // convert the map to a list aliases := make([]topo.TabletAlias, 0, len(tabletsAsMap)) for a := range tabletsAsMap { aliases = append(aliases, a) } // read all the Tablet records tablets, err := topo.GetTabletMap(ts, aliases) switch err { case nil: // keep going, we're good case topo.ErrPartialResult: log.Warningf("Got ErrPartialResult from topo.GetTabletMap in cell %v, some tablets may not be added properly to serving graph", cell) default: rec.RecordError(fmt.Errorf("GetTabletMap in cell %v failed: %v", cell, err)) return } // Lock the SrvShard so we write a consistent data set. actionNode := actionnode.RebuildSrvShard() lockPath, err := actionNode.LockSrvShard(ts, cell, keyspace, shard, timeout, interrupted) if err != nil { rec.RecordError(err) return } // write the data we need to rebuildErr := rebuildCellSrvShard(log, ts, shardInfo, cell, tablets) // and unlock if err := actionNode.UnlockSrvShard(ts, cell, keyspace, shard, lockPath, rebuildErr); err != nil { rec.RecordError(err) } }(cell) } wg.Wait() return rec.Error() }
func (mysqld *Mysqld) createSnapshot(logger logutil.Logger, concurrency int, serverMode bool) ([]SnapshotFile, error) { sources := make([]string, 0, 128) destinations := make([]string, 0, 128) // clean out and start fresh logger.Infof("removing previous snapshots: %v", mysqld.SnapshotDir) if err := os.RemoveAll(mysqld.SnapshotDir); err != nil { return nil, err } // FIXME(msolomon) innodb paths must match patterns in mycnf - // probably belongs as a derived path. type snapPair struct{ srcDir, dstDir string } dps := []snapPair{ {mysqld.config.InnodbDataHomeDir, path.Join(mysqld.SnapshotDir, innodbDataSubdir)}, {mysqld.config.InnodbLogGroupHomeDir, path.Join(mysqld.SnapshotDir, innodbLogSubdir)}, } dataDirEntries, err := ioutil.ReadDir(mysqld.config.DataDir) if err != nil { return nil, err } for _, de := range dataDirEntries { dbDirPath := path.Join(mysqld.config.DataDir, de.Name()) // If this is not a directory, try to eval it as a syslink. if !de.IsDir() { dbDirPath, err = filepath.EvalSymlinks(dbDirPath) if err != nil { return nil, err } de, err = os.Stat(dbDirPath) if err != nil { return nil, err } } if de.IsDir() { // Copy anything that defines a db.opt file - that includes empty databases. _, err := os.Stat(path.Join(dbDirPath, "db.opt")) if err == nil { dps = append(dps, snapPair{dbDirPath, path.Join(mysqld.SnapshotDir, dataDir, de.Name())}) } else { // Look for at least one .frm file dbDirEntries, err := ioutil.ReadDir(dbDirPath) if err == nil { for _, dbEntry := range dbDirEntries { if strings.HasSuffix(dbEntry.Name(), ".frm") { dps = append(dps, snapPair{dbDirPath, path.Join(mysqld.SnapshotDir, dataDir, de.Name())}) break } } } else { return nil, err } } } } for _, dp := range dps { if err := os.MkdirAll(dp.dstDir, 0775); err != nil { return nil, err } if s, d, err := findFilesToServe(dp.srcDir, dp.dstDir, !serverMode); err != nil { return nil, err } else { sources = append(sources, s...) destinations = append(destinations, d...) } } return newSnapshotFiles(sources, destinations, mysqld.SnapshotDir, concurrency, !serverMode) }
// This function runs on the machine acting as the source for the clone. // // Check master/slave status and determine restore needs. // If this instance is a slave, stop replication, otherwise place in read-only mode. // Record replication position. // Shutdown mysql // Check paths for storing data // // Depending on the serverMode flag, we do the following: // serverMode = false: // Compress /vt/vt_[0-9a-f]+/data/vt_.+ // Compute hash (of compressed files, as we serve .gz files here) // Place in /vt/clone_src where they will be served by http server (not rpc) // Restart mysql // serverMode = true: // Make symlinks for /vt/vt_[0-9a-f]+/data/vt_.+ to innodb files // Compute hash (of uncompressed files, as we serve uncompressed files) // Place symlinks in /vt/clone_src where they will be served by http server // Leave mysql stopped, return slaveStartRequired, readOnly func (mysqld *Mysqld) CreateSnapshot(logger logutil.Logger, dbName, sourceAddr string, allowHierarchicalReplication bool, concurrency int, serverMode bool, hookExtraEnv map[string]string) (snapshotManifestUrlPath string, slaveStartRequired, readOnly bool, err error) { if dbName == "" { return "", false, false, errors.New("CreateSnapshot failed: no database name provided") } if err = mysqld.validateCloneSource(serverMode, hookExtraEnv); err != nil { return } // save initial state so we can restore on Start() slaveStartRequired = false sourceIsMaster := false readOnly = true slaveStatus, err := mysqld.SlaveStatus() if err == nil { slaveStartRequired = slaveStatus.SlaveRunning() } else if err == ErrNotSlave { sourceIsMaster = true } else { // If we can't get any data, just fail. return } readOnly, err = mysqld.IsReadOnly() if err != nil { return } // Stop sources of writes so we can get a consistent replication position. // If the source is a slave use the master replication position // unless we are allowing hierachical replicas. masterAddr := "" var replicationPosition proto.ReplicationPosition if sourceIsMaster { if err = mysqld.SetReadOnly(true); err != nil { return } replicationPosition, err = mysqld.MasterPosition() if err != nil { return } masterAddr = mysqld.IpAddr() } else { if err = mysqld.StopSlave(hookExtraEnv); err != nil { return } var slaveStatus *proto.ReplicationStatus slaveStatus, err = mysqld.SlaveStatus() if err != nil { return } replicationPosition = slaveStatus.Position // We are a slave, check our replication strategy before // choosing the master address. if allowHierarchicalReplication { masterAddr = mysqld.IpAddr() } else { masterAddr, err = mysqld.GetMasterAddr() if err != nil { return } } } if err = mysqld.Shutdown(true, MysqlWaitTime); err != nil { return } var smFile string dataFiles, snapshotErr := mysqld.createSnapshot(logger, concurrency, serverMode) if snapshotErr != nil { logger.Errorf("CreateSnapshot failed: %v", snapshotErr) } else { var sm *SnapshotManifest sm, snapshotErr = newSnapshotManifest(sourceAddr, mysqld.IpAddr(), masterAddr, dbName, dataFiles, replicationPosition, proto.ReplicationPosition{}) if snapshotErr != nil { logger.Errorf("CreateSnapshot failed: %v", snapshotErr) } else { smFile = path.Join(mysqld.SnapshotDir, SnapshotManifestFile) if snapshotErr = writeJson(smFile, sm); snapshotErr != nil { logger.Errorf("CreateSnapshot failed: %v", snapshotErr) } } } // restore our state if required if serverMode && snapshotErr == nil { logger.Infof("server mode snapshot worked, not restarting mysql") } else { if err = mysqld.SnapshotSourceEnd(slaveStartRequired, readOnly, false /*deleteSnapshot*/, hookExtraEnv); err != nil { return } } if snapshotErr != nil { return "", slaveStartRequired, readOnly, snapshotErr } relative, err := filepath.Rel(mysqld.SnapshotDir, smFile) if err != nil { return "", slaveStartRequired, readOnly, nil } return path.Join(SnapshotURLPath, relative), slaveStartRequired, readOnly, nil }
// Restore is the main entry point for backup restore. If there is no // appropriate backup on the BackupStorage, Restore logs an error // and returns ErrNoBackup. Any other error is returned. func Restore( ctx context.Context, mysqld MysqlDaemon, dir string, restoreConcurrency int, hookExtraEnv map[string]string, localMetadata map[string]string, logger logutil.Logger, deleteBeforeRestore bool) (replication.Position, error) { // find the right backup handle: most recent one, with a MANIFEST logger.Infof("Restore: looking for a suitable backup to restore") bs, err := backupstorage.GetBackupStorage() if err != nil { return replication.Position{}, err } defer bs.Close() bhs, err := bs.ListBackups(dir) if err != nil { return replication.Position{}, fmt.Errorf("ListBackups failed: %v", err) } var bh backupstorage.BackupHandle var bm BackupManifest var toRestore int for toRestore = len(bhs) - 1; toRestore >= 0; toRestore-- { bh = bhs[toRestore] rc, err := bh.ReadFile(backupManifest) if err != nil { log.Warningf("Possibly incomplete backup %v in directory %v on BackupStorage: can't read MANIFEST: %v)", bh.Name(), dir, err) continue } err = json.NewDecoder(rc).Decode(&bm) rc.Close() if err != nil { log.Warningf("Possibly incomplete backup %v in directory %v on BackupStorage (cannot JSON decode MANIFEST: %v)", bh.Name(), dir, err) continue } logger.Infof("Restore: found backup %v %v to restore with %v files", bh.Directory(), bh.Name(), len(bm.FileEntries)) break } if toRestore < 0 { logger.Errorf("No backup to restore on BackupStorage for directory %v. Starting up empty.", dir) if err = populateLocalMetadata(mysqld, localMetadata); err == nil { err = ErrNoBackup } return replication.Position{}, err } if !deleteBeforeRestore { logger.Infof("Restore: checking no existing data is present") ok, err := checkNoDB(ctx, mysqld) if err != nil { return replication.Position{}, err } if !ok { logger.Infof("Auto-restore is enabled, but mysqld already contains data. Assuming vttablet was just restarted.") if err = populateLocalMetadata(mysqld, localMetadata); err == nil { err = ErrExistingDB } return replication.Position{}, err } } logger.Infof("Restore: shutdown mysqld") err = mysqld.Shutdown(ctx, true) if err != nil { return replication.Position{}, err } logger.Infof("Restore: deleting existing files") if err := removeExistingFiles(mysqld.Cnf()); err != nil { return replication.Position{}, err } logger.Infof("Restore: reinit config file") err = mysqld.ReinitConfig(ctx) if err != nil { return replication.Position{}, err } logger.Infof("Restore: copying all files") if err := restoreFiles(mysqld.Cnf(), bh, bm.FileEntries, restoreConcurrency); err != nil { return replication.Position{}, err } // mysqld needs to be running in order for mysql_upgrade to work. // If we've just restored from a backup from previous MySQL version then mysqld // may fail to start due to a different structure of mysql.* tables. The flag // --skip-grant-tables ensures that these tables are not read until mysql_upgrade // is executed. And since with --skip-grant-tables anyone can connect to MySQL // without password, we are passing --skip-networking to greatly reduce the set // of those who can connect. logger.Infof("Restore: starting mysqld for mysql_upgrade") err = mysqld.Start(ctx, "--skip-grant-tables", "--skip-networking") if err != nil { return replication.Position{}, err } logger.Infof("Restore: running mysql_upgrade") if err := mysqld.RunMysqlUpgrade(); err != nil { return replication.Position{}, fmt.Errorf("mysql_upgrade failed: %v", err) } // Populate local_metadata before starting without --skip-networking, // so it's there before we start announcing ourselves. logger.Infof("Restore: populating local_metadata") err = populateLocalMetadata(mysqld, localMetadata) if err != nil { return replication.Position{}, err } // The MySQL manual recommends restarting mysqld after running mysql_upgrade, // so that any changes made to system tables take effect. logger.Infof("Restore: restarting mysqld after mysql_upgrade") err = mysqld.Shutdown(ctx, true) if err != nil { return replication.Position{}, err } err = mysqld.Start(ctx) if err != nil { return replication.Position{}, err } return bm.Position, nil }
func backup(ctx context.Context, mysqld MysqlDaemon, logger logutil.Logger, bh backupstorage.BackupHandle, backupConcurrency int, hookExtraEnv map[string]string) error { // save initial state so we can restore slaveStartRequired := false sourceIsMaster := false readOnly := true var replicationPosition replication.Position semiSyncMaster, semiSyncSlave := mysqld.SemiSyncEnabled() // see if we need to restart replication after backup logger.Infof("getting current replication status") slaveStatus, err := mysqld.SlaveStatus() switch err { case nil: slaveStartRequired = slaveStatus.SlaveRunning() case ErrNotSlave: // keep going if we're the master, might be a degenerate case sourceIsMaster = true default: return fmt.Errorf("can't get slave status: %v", err) } // get the read-only flag readOnly, err = mysqld.IsReadOnly() if err != nil { return fmt.Errorf("can't get read-only status: %v", err) } // get the replication position if sourceIsMaster { if !readOnly { logger.Infof("turning master read-only before backup") if err = mysqld.SetReadOnly(true); err != nil { return fmt.Errorf("can't set read-only status: %v", err) } } replicationPosition, err = mysqld.MasterPosition() if err != nil { return fmt.Errorf("can't get master position: %v", err) } } else { if err = StopSlave(mysqld, hookExtraEnv); err != nil { return fmt.Errorf("can't stop slave: %v", err) } var slaveStatus replication.Status slaveStatus, err = mysqld.SlaveStatus() if err != nil { return fmt.Errorf("can't get slave status: %v", err) } replicationPosition = slaveStatus.Position } logger.Infof("using replication position: %v", replicationPosition) // shutdown mysqld err = mysqld.Shutdown(ctx, true) if err != nil { return fmt.Errorf("can't shutdown mysqld: %v", err) } // get the files to backup fes, err := findFilesTobackup(mysqld.Cnf()) if err != nil { return fmt.Errorf("can't find files to backup: %v", err) } logger.Infof("found %v files to backup", len(fes)) // backup everything if err := backupFiles(mysqld, logger, bh, fes, replicationPosition, backupConcurrency); err != nil { return fmt.Errorf("can't backup files: %v", err) } // Try to restart mysqld err = mysqld.Start(ctx) if err != nil { return fmt.Errorf("can't restart mysqld: %v", err) } // Restore original mysqld state that we saved above. if semiSyncMaster || semiSyncSlave { // Only do this if one of them was on, since both being off could mean // the plugin isn't even loaded, and the server variables don't exist. logger.Infof("restoring semi-sync settings from before backup: master=%v, slave=%v", semiSyncMaster, semiSyncSlave) err := mysqld.SetSemiSyncEnabled(semiSyncMaster, semiSyncSlave) if err != nil { return err } } if slaveStartRequired { logger.Infof("restarting mysql replication") if err := StartSlave(mysqld, hookExtraEnv); err != nil { return fmt.Errorf("cannot restart slave: %v", err) } // this should be quick, but we might as well just wait if err := WaitForSlaveStart(mysqld, slaveStartDeadline); err != nil { return fmt.Errorf("slave is not restarting: %v", err) } } // And set read-only mode logger.Infof("resetting mysqld read-only to %v", readOnly) if err := mysqld.SetReadOnly(readOnly); err != nil { return err } return nil }
// TableScan returns a QueryResultReader that gets all the rows from a // table, ordered by Primary Key. The returned columns are ordered // with the Primary Key columns in front. func TableScan(ctx context.Context, log logutil.Logger, ts topo.Server, tabletAlias topo.TabletAlias, tableDefinition *myproto.TableDefinition) (*QueryResultReader, error) { sql := fmt.Sprintf("SELECT %v FROM %v ORDER BY %v", strings.Join(orderedColumns(tableDefinition), ", "), tableDefinition.Name, strings.Join(tableDefinition.PrimaryKeyColumns, ", ")) log.Infof("SQL query for %v/%v: %v", tabletAlias, tableDefinition.Name, sql) return NewQueryResultReaderForTablet(ctx, ts, tabletAlias, sql) }
// CreateMultiSnapshot create snapshots of the data. // - for a resharding snapshot, keyRanges+keyName+keyType are set, // and tables is empty. This action will create multiple snapshots, // one per keyRange. // - for a vertical split, tables is set, keyRanges = [KeyRange{}] and // keyName+keyType are empty. It will create a single snapshot of // the contents of the tables. // Note combinations of table subset and keyranges are not supported. func (mysqld *Mysqld) CreateMultiSnapshot(logger logutil.Logger, keyRanges []key.KeyRange, dbName, keyName string, keyType key.KeyspaceIdType, sourceAddr string, allowHierarchicalReplication bool, snapshotConcurrency int, tables, excludeTables []string, skipSlaveRestart bool, maximumFilesize uint64, hookExtraEnv map[string]string) (snapshotManifestFilenames []string, err error) { if dbName == "" { err = fmt.Errorf("no database name provided") return } if len(tables) > 0 { if len(keyRanges) != 1 || keyRanges[0].IsPartial() { return nil, fmt.Errorf("With tables specified, can only have one full KeyRange") } } // same logic applies here logger.Infof("validateCloneSource") if err = mysqld.validateCloneSource(false, hookExtraEnv); err != nil { return } // clean out and start fresh cloneSourcePaths := make(map[key.KeyRange]string) for _, keyRange := range keyRanges { cloneSourcePaths[keyRange] = path.Join(mysqld.SnapshotDir, dataDir, dbName+"-"+string(keyRange.Start.Hex())+","+string(keyRange.End.Hex())) } for _, _path := range cloneSourcePaths { if err = os.RemoveAll(_path); err != nil { return } if err = os.MkdirAll(_path, 0775); err != nil { return } } mainCloneSourcePath := path.Join(mysqld.SnapshotDir, dataDir, dbName+"-all") if err = os.RemoveAll(mainCloneSourcePath); err != nil { return } if err = os.MkdirAll(mainCloneSourcePath, 0775); err != nil { return } // get the schema for each table sd, fetchErr := mysqld.GetSchema(dbName, tables, excludeTables, true) if fetchErr != nil { return []string{}, fetchErr } if len(sd.TableDefinitions) == 0 { return []string{}, fmt.Errorf("empty table list for %v", dbName) } sd.SortByReverseDataLength() // prepareToSnapshot will get the tablet in the rigth state, // and return the current mysql status. slaveStartRequired, readOnly, replicationPosition, myMasterPosition, masterAddr, conn, err := mysqld.prepareToSnapshot(logger, allowHierarchicalReplication, hookExtraEnv) if err != nil { return } if skipSlaveRestart { if slaveStartRequired { logger.Infof("Overriding slaveStartRequired to false") } slaveStartRequired = false } defer func() { err = replaceError(logger, err, mysqld.restoreAfterSnapshot(logger, slaveStartRequired, readOnly, hookExtraEnv, conn)) }() // dump the files in parallel with a pre-defined concurrency datafiles := make([]map[key.KeyRange][]SnapshotFile, len(sd.TableDefinitions)) dumpTableWorker := func(i int) (err error) { table := sd.TableDefinitions[i] if table.Type != proto.TABLE_BASE_TABLE { // we just skip views here return nil } if len(tables) > 0 { sfs, err := mysqld.dumpTableFull(logger, table, dbName, mainCloneSourcePath, cloneSourcePaths[key.KeyRange{}], maximumFilesize) if err != nil { return err } datafiles[i] = map[key.KeyRange][]SnapshotFile{ key.KeyRange{}: sfs, } } else { datafiles[i], err = mysqld.dumpTableSplit(logger, table, dbName, keyName, keyType, mainCloneSourcePath, cloneSourcePaths, maximumFilesize) } return } if err = ConcurrentMap(snapshotConcurrency, len(sd.TableDefinitions), dumpTableWorker); err != nil { return } if e := os.Remove(mainCloneSourcePath); e != nil { logger.Errorf("Cannot remove %v: %v", mainCloneSourcePath, e) } // Check the replication position after snapshot is done // hasn't changed, to be sure we haven't inserted any data newReplicationPosition, _, err := mysqld.getReplicationPositionForClones(allowHierarchicalReplication) if err != nil { return } if !newReplicationPosition.Equal(replicationPosition) { return nil, fmt.Errorf("replicationPosition position changed during snapshot, from %v to %v", replicationPosition, newReplicationPosition) } // Write all the manifest files ssmFiles := make([]string, len(keyRanges)) for i, kr := range keyRanges { krDatafiles := make([]SnapshotFile, 0, len(datafiles)) for _, m := range datafiles { krDatafiles = append(krDatafiles, m[kr]...) } ssm, err := NewSplitSnapshotManifest(sourceAddr, mysqld.IpAddr(), masterAddr, dbName, krDatafiles, replicationPosition, myMasterPosition, kr, sd) if err != nil { return nil, err } ssmFiles[i] = path.Join(cloneSourcePaths[kr], partialSnapshotManifestFile) if err = writeJson(ssmFiles[i], ssm); err != nil { return nil, err } } // Call the (optional) hook to send the files somewhere else wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, kr := range keyRanges { wg.Add(1) go func(kr key.KeyRange) { defer wg.Done() h := hook.NewSimpleHook("copy_snapshot_to_storage") h.ExtraEnv = make(map[string]string) for k, v := range hookExtraEnv { h.ExtraEnv[k] = v } h.ExtraEnv["KEYRANGE"] = fmt.Sprintf("%v-%v", kr.Start.Hex(), kr.End.Hex()) h.ExtraEnv["SNAPSHOT_PATH"] = cloneSourcePaths[kr] rec.RecordError(h.ExecuteOptional()) }(kr) } wg.Wait() if rec.HasErrors() { return nil, err } // Return all the URLs for the MANIFESTs snapshotURLPaths := make([]string, len(keyRanges)) for i := 0; i < len(keyRanges); i++ { relative, err := filepath.Rel(mysqld.SnapshotDir, ssmFiles[i]) if err != nil { return nil, err } snapshotURLPaths[i] = path.Join(SnapshotURLPath, relative) } return snapshotURLPaths, nil }
// TableScanByKeyRange returns a QueryResultReader that gets all the // rows from a table that match the supplied KeyRange, ordered by // Primary Key. The returned columns are ordered with the Primary Key // columns in front. // If keyspaceSchema is passed in, we go into v3 mode, and we ask for all // source data, and filter here. Otherwise we stick with v2 mode, where we can // ask the source tablet to do the filtering. func TableScanByKeyRange(ctx context.Context, log logutil.Logger, ts topo.Server, tabletAlias *topodatapb.TabletAlias, tableDefinition *tabletmanagerdatapb.TableDefinition, keyRange *topodatapb.KeyRange, keyspaceSchema *vindexes.KeyspaceSchema, shardingColumnName string, shardingColumnType topodatapb.KeyspaceIdType) (*QueryResultReader, error) { if keyspaceSchema != nil { // switch to v3 mode. keyResolver, err := newV3ResolverFromColumnList(keyspaceSchema, tableDefinition.Name, orderedColumns(tableDefinition)) if err != nil { return nil, fmt.Errorf("cannot resolve v3 sharding keys for table %v: %v", tableDefinition.Name, err) } // full table scan scan, err := TableScan(ctx, log, ts, tabletAlias, tableDefinition) if err != nil { return nil, err } // with extra filter scan.output = &v3KeyRangeFilter{ input: scan.output, resolver: keyResolver.(*v3Resolver), keyRange: keyRange, } return scan, nil } // in v2 mode, we can do the filtering at the source where := "" switch shardingColumnType { case topodatapb.KeyspaceIdType_UINT64: if len(keyRange.Start) > 0 { if len(keyRange.End) > 0 { // have start & end where = fmt.Sprintf("WHERE %v >= %v AND %v < %v ", escape(shardingColumnName), uint64FromKeyspaceID(keyRange.Start), escape(shardingColumnName), uint64FromKeyspaceID(keyRange.End)) } else { // have start only where = fmt.Sprintf("WHERE %v >= %v ", escape(shardingColumnName), uint64FromKeyspaceID(keyRange.Start)) } } else { if len(keyRange.End) > 0 { // have end only where = fmt.Sprintf("WHERE %v < %v ", escape(shardingColumnName), uint64FromKeyspaceID(keyRange.End)) } } case topodatapb.KeyspaceIdType_BYTES: if len(keyRange.Start) > 0 { if len(keyRange.End) > 0 { // have start & end where = fmt.Sprintf("WHERE HEX(%v) >= '%v' AND HEX(%v) < '%v' ", escape(shardingColumnName), hex.EncodeToString(keyRange.Start), escape(shardingColumnName), hex.EncodeToString(keyRange.End)) } else { // have start only where = fmt.Sprintf("WHERE HEX(%v) >= '%v' ", escape(shardingColumnName), hex.EncodeToString(keyRange.Start)) } } else { if len(keyRange.End) > 0 { // have end only where = fmt.Sprintf("WHERE HEX(%v) < '%v' ", escape(shardingColumnName), hex.EncodeToString(keyRange.End)) } } default: return nil, fmt.Errorf("Unsupported ShardingColumnType: %v", shardingColumnType) } sql := fmt.Sprintf("SELECT %v FROM %v %vORDER BY %v", strings.Join(escapeAll(orderedColumns(tableDefinition)), ", "), escape(tableDefinition.Name), where, strings.Join(escapeAll(tableDefinition.PrimaryKeyColumns), ", ")) log.Infof("SQL query for %v/%v: %v", topoproto.TabletAliasString(tabletAlias), tableDefinition.Name, sql) return NewQueryResultReaderForTablet(ctx, ts, tabletAlias, sql) }
// Restore is the main entry point for backup restore. If there is no // appropriate backup on the BackupStorage, Restore logs an error // and returns ErrNoBackup. Any other error is returned. func Restore(ctx context.Context, mysqld MysqlDaemon, dir string, restoreConcurrency int, hookExtraEnv map[string]string, logger logutil.Logger, deleteBeforeRestore bool) (replication.Position, error) { // find the right backup handle: most recent one, with a MANIFEST logger.Infof("Restore: looking for a suitable backup to restore") bs, err := backupstorage.GetBackupStorage() if err != nil { return replication.Position{}, err } defer bs.Close() bhs, err := bs.ListBackups(dir) if err != nil { return replication.Position{}, fmt.Errorf("ListBackups failed: %v", err) } var bh backupstorage.BackupHandle var bm BackupManifest var toRestore int for toRestore = len(bhs) - 1; toRestore >= 0; toRestore-- { bh = bhs[toRestore] rc, err := bh.ReadFile(backupManifest) if err != nil { log.Warningf("Possibly incomplete backup %v in directory %v on BackupStorage: can't read MANIFEST: %v)", bh.Name(), dir, err) continue } err = json.NewDecoder(rc).Decode(&bm) rc.Close() if err != nil { log.Warningf("Possibly incomplete backup %v in directory %v on BackupStorage (cannot JSON decode MANIFEST: %v)", bh.Name(), dir, err) continue } logger.Infof("Restore: found backup %v %v to restore with %v files", bh.Directory(), bh.Name(), len(bm.FileEntries)) break } if toRestore < 0 { logger.Errorf("No backup to restore on BackupStorage for directory %v", dir) return replication.Position{}, ErrNoBackup } if !deleteBeforeRestore { logger.Infof("Restore: checking no existing data is present") ok, err := checkNoDB(ctx, mysqld) if err != nil { return replication.Position{}, err } if !ok { return replication.Position{}, ErrExistingDB } } logger.Infof("Restore: shutdown mysqld") err = mysqld.Shutdown(ctx, true) if err != nil { return replication.Position{}, err } logger.Infof("Restore: deleting existing files") if err := removeExistingFiles(mysqld.Cnf()); err != nil { return replication.Position{}, err } logger.Infof("Restore: reinit config file") err = mysqld.ReinitConfig(ctx) if err != nil { return replication.Position{}, err } logger.Infof("Restore: copying all files") if err := restoreFiles(mysqld.Cnf(), bh, bm.FileEntries, restoreConcurrency); err != nil { return replication.Position{}, err } // mysqld needs to be running in order for mysql_upgrade to work. logger.Infof("Restore: starting mysqld for mysql_upgrade") err = mysqld.Start(ctx) if err != nil { return replication.Position{}, err } logger.Infof("Restore: running mysql_upgrade") if err := mysqld.RunMysqlUpgrade(); err != nil { return replication.Position{}, fmt.Errorf("mysql_upgrade failed: %v", err) } // The MySQL manual recommends restarting mysqld after running mysql_upgrade, // so that any changes made to system tables take effect. logger.Infof("Restore: restarting mysqld after mysql_upgrade") err = mysqld.Shutdown(ctx, true) if err != nil { return replication.Position{}, err } err = mysqld.Start(ctx) if err != nil { return replication.Position{}, err } return bm.Position, nil }
// rebuildCellSrvShard computes and writes the serving graph data to a // single cell func rebuildCellSrvShard(ctx context.Context, log logutil.Logger, ts topo.Server, si *topo.ShardInfo, cell string) (err error) { log.Infof("rebuildCellSrvShard %v/%v in cell %v", si.Keyspace(), si.ShardName(), cell) for { select { case <-ctx.Done(): return ctx.Err() default: } // Read existing EndPoints node versions, so we know if any // changes sneak in after we read the tablets. versions, err := getEndPointsVersions(ctx, ts, cell, si.Keyspace(), si.ShardName()) // Get all tablets in this cell/shard. tablets, err := ts.GetTabletMapForShardByCell(ctx, si.Keyspace(), si.ShardName(), []string{cell}) if err != nil { if err != topo.ErrPartialResult { return err } log.Warningf("Got ErrPartialResult from topo.GetTabletMapForShardByCell(%v), some tablets may not be added properly to serving graph", cell) } // Build up the serving graph from scratch. serving := make(map[pb.TabletType]*pb.EndPoints) for _, tablet := range tablets { // Only add serving types. if !tablet.IsInServingGraph() { continue } // Check the Keyspace and Shard for the tablet are right. if tablet.Keyspace != si.Keyspace() || tablet.Shard != si.ShardName() { return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v", tablet.Alias, si.Keyspace(), si.ShardName(), tablet.Keyspace, tablet.Shard) } // Add the tablet to the list. endpoints, ok := serving[tablet.Type] if !ok { endpoints = topo.NewEndPoints() serving[tablet.Type] = endpoints } entry, err := topo.TabletEndPoint(tablet.Tablet) if err != nil { log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err) continue } endpoints.Entries = append(endpoints.Entries, entry) } wg := sync.WaitGroup{} fatalErrs := concurrency.AllErrorRecorder{} retryErrs := concurrency.AllErrorRecorder{} // Write nodes that should exist. for tabletType, endpoints := range serving { wg.Add(1) go func(tabletType pb.TabletType, endpoints *pb.EndPoints) { defer wg.Done() log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", cell, si.Keyspace(), si.ShardName(), tabletType) version, ok := versions[tabletType] if !ok { // This type didn't exist when we first checked. // Try to create, but only if it still doesn't exist. if err := ts.CreateEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, endpoints); err != nil { log.Warningf("CreateEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrNodeExists: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } return } // Update only if the version matches. if err := ts.UpdateEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, endpoints, version); err != nil { log.Warningf("UpdateEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrBadVersion, topo.ErrNoNode: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } }(tabletType, endpoints) } // Delete nodes that shouldn't exist. for tabletType, version := range versions { if _, ok := serving[tabletType]; !ok { wg.Add(1) go func(tabletType pb.TabletType, version int64) { defer wg.Done() log.Infof("removing stale db type from serving graph: %v", tabletType) if err := ts.DeleteEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, version); err != nil && err != topo.ErrNoNode { log.Warningf("DeleteEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrNoNode: // Someone else deleted it, which is fine. case topo.ErrBadVersion: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } }(tabletType, version) } } // Update srvShard object wg.Add(1) go func() { defer wg.Done() log.Infof("updating shard serving graph in cell %v for %v/%v", cell, si.Keyspace(), si.ShardName()) if err := UpdateSrvShard(ctx, ts, cell, si); err != nil { fatalErrs.RecordError(err) log.Warningf("writing serving data in cell %v for %v/%v failed: %v", cell, si.Keyspace(), si.ShardName(), err) } }() wg.Wait() // If there are any fatal errors, give up. if fatalErrs.HasErrors() { return fatalErrs.Error() } // If there are any retry errors, try again. if retryErrs.HasErrors() { continue } // Otherwise, success! return nil } }
// rebuildCellSrvShard computes and writes the serving graph data to a // single cell func rebuildCellSrvShard(log logutil.Logger, ts topo.Server, shardInfo *topo.ShardInfo, cell string, tablets map[topo.TabletAlias]*topo.TabletInfo) error { log.Infof("rebuildCellSrvShard %v/%v in cell %v", shardInfo.Keyspace(), shardInfo.ShardName(), cell) // Get all existing db types so they can be removed if nothing // had been edited. existingTabletTypes, err := ts.GetSrvTabletTypesPerShard(cell, shardInfo.Keyspace(), shardInfo.ShardName()) if err != nil { if err != topo.ErrNoNode { return err } } // Update db type addresses in the serving graph // // locationAddrsMap is a map: // key: tabletType // value: EndPoints (list of server records) locationAddrsMap := make(map[topo.TabletType]*topo.EndPoints) for _, tablet := range tablets { if !tablet.IsInReplicationGraph() { // only valid case is a scrapped master in the // catastrophic reparent case if tablet.Parent.Uid != topo.NO_TABLET { log.Warningf("Tablet %v should not be in the replication graph, please investigate (it is being ignored in the rebuild)", tablet.Alias) } continue } // Check IsInServingGraph, we don't want to add tablets that // are not serving if !tablet.IsInServingGraph() { continue } // Check the Keyspace and Shard for the tablet are right if tablet.Keyspace != shardInfo.Keyspace() || tablet.Shard != shardInfo.ShardName() { return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v", tablet.Alias, shardInfo.Keyspace(), shardInfo.ShardName(), tablet.Keyspace, tablet.Shard) } // Add the tablet to the list addrs, ok := locationAddrsMap[tablet.Type] if !ok { addrs = topo.NewEndPoints() locationAddrsMap[tablet.Type] = addrs } entry, err := tablet.Tablet.EndPoint() if err != nil { log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err) continue } addrs.Entries = append(addrs.Entries, *entry) } // we're gonna parallelize a lot here: // - writing all the tabletTypes records // - removing the unused records // - writing SrvShard rec := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} // write all the EndPoints nodes everywhere we want them for tabletType, addrs := range locationAddrsMap { wg.Add(1) go func(tabletType topo.TabletType, addrs *topo.EndPoints) { log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType) if err := ts.UpdateEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, addrs); err != nil { rec.RecordError(fmt.Errorf("writing endpoints for cell %v shard %v/%v tabletType %v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, err)) } wg.Done() }(tabletType, addrs) } // Delete any pre-existing paths that were not updated by this process. // That's the existingTabletTypes - locationAddrsMap for _, tabletType := range existingTabletTypes { if _, ok := locationAddrsMap[tabletType]; !ok { wg.Add(1) go func(tabletType topo.TabletType) { log.Infof("removing stale db type from serving graph: %v", tabletType) if err := ts.DeleteEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType); err != nil { log.Warningf("unable to remove stale db type %v from serving graph: %v", tabletType, err) } wg.Done() }(tabletType) } } // Update srvShard object wg.Add(1) go func() { log.Infof("updating shard serving graph in cell %v for %v/%v", cell, shardInfo.Keyspace(), shardInfo.ShardName()) srvShard := &topo.SrvShard{ Name: shardInfo.ShardName(), KeyRange: shardInfo.KeyRange, ServedTypes: shardInfo.ServedTypes, MasterCell: shardInfo.MasterAlias.Cell, TabletTypes: make([]topo.TabletType, 0, len(locationAddrsMap)), } for tabletType := range locationAddrsMap { srvShard.TabletTypes = append(srvShard.TabletTypes, tabletType) } if err := ts.UpdateSrvShard(cell, shardInfo.Keyspace(), shardInfo.ShardName(), srvShard); err != nil { rec.RecordError(fmt.Errorf("writing serving data in cell %v for %v/%v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), err)) } wg.Done() }() wg.Wait() return rec.Error() }
func logStuff(logger logutil.Logger, count int) { for i := 0; i < count; i++ { logger.Infof(testLogString) } }
// rebuildKeyspace should only be used with an action lock on the keyspace // - otherwise the consistency of the serving graph data can't be // guaranteed. // // Take data from the global keyspace and rebuild the local serving // copies in each cell. func rebuildKeyspace(ctx context.Context, log logutil.Logger, ts topo.Server, keyspace string, cells []string, rebuildSrvShards bool) error { log.Infof("rebuildKeyspace %v", keyspace) ki, err := ts.GetKeyspace(ctx, keyspace) if err != nil { return err } var shardCache map[string]*topo.ShardInfo if rebuildSrvShards { shards, err := ts.GetShardNames(ctx, keyspace) if err != nil { return nil } // Rebuild all shards in parallel, save the shards shardCache = make(map[string]*topo.ShardInfo) wg := sync.WaitGroup{} mu := sync.Mutex{} rec := concurrency.FirstErrorRecorder{} for _, shard := range shards { wg.Add(1) go func(shard string) { if shardInfo, err := RebuildShard(ctx, log, ts, keyspace, shard, cells); err != nil { rec.RecordError(fmt.Errorf("RebuildShard failed: %v/%v %v", keyspace, shard, err)) } else { mu.Lock() shardCache[shard] = shardInfo mu.Unlock() } wg.Done() }(shard) } wg.Wait() if rec.HasErrors() { return rec.Error() } } else { shardCache, err = ts.FindAllShardsInKeyspace(ctx, keyspace) if err != nil { return err } } // Build the list of cells to work on: we get the union // of all the Cells of all the Shards, limited to the provided cells. // // srvKeyspaceMap is a map: // key: cell // value: topo.SrvKeyspace object being built srvKeyspaceMap := make(map[string]*topodatapb.SrvKeyspace) findCellsForRebuild(ki, shardCache, cells, srvKeyspaceMap) // Then we add the cells from the keyspaces we might be 'ServedFrom'. for _, ksf := range ki.ServedFroms { servedFromShards, err := ts.FindAllShardsInKeyspace(ctx, ksf.Keyspace) if err != nil { return err } findCellsForRebuild(ki, servedFromShards, cells, srvKeyspaceMap) } // for each entry in the srvKeyspaceMap map, we do the following: // - read the SrvShard structures for each shard / cell // - if not present, build an empty one from global Shard // - compute the union of the db types (replica, master, ...) // - sort the shards in the list by range // - check the ranges are compatible (no hole, covers everything) for cell, srvKeyspace := range srvKeyspaceMap { for _, si := range shardCache { servedTypes := si.GetServedTypesPerCell(cell) // for each type this shard is supposed to serve, // add it to srvKeyspace.Partitions for _, tabletType := range servedTypes { partition := topoproto.SrvKeyspaceGetPartition(srvKeyspace, tabletType) if partition == nil { partition = &topodatapb.SrvKeyspace_KeyspacePartition{ ServedType: tabletType, } srvKeyspace.Partitions = append(srvKeyspace.Partitions, partition) } partition.ShardReferences = append(partition.ShardReferences, &topodatapb.ShardReference{ Name: si.ShardName(), KeyRange: si.KeyRange, }) } } if err := orderAndCheckPartitions(cell, srvKeyspace); err != nil { return err } } // and then finally save the keyspace objects for cell, srvKeyspace := range srvKeyspaceMap { log.Infof("updating keyspace serving graph in cell %v for %v", cell, keyspace) if err := ts.UpdateSrvKeyspace(ctx, cell, keyspace, srvKeyspace); err != nil { return fmt.Errorf("writing serving data failed: %v", err) } } return nil }