Esempio n. 1
// replaceError replaces original with recent if recent is not nil,
// logging original if it wasn't nil. This should be used in deferred
// cleanup functions if they change the returned error.
func replaceError(logger logutil.Logger, original, recent error) error {
	if recent == nil {
		return original
	if original != nil {
		logger.Errorf("One of multiple error: %v", original)
	return recent
Esempio n. 2
// dumpTableFull will dump the contents of a full table, and then
// chunk it up in multiple compressed files.
func (mysqld *Mysqld) dumpTableFull(logger logutil.Logger, td *proto.TableDefinition, dbName, mainCloneSourcePath string, cloneSourcePath string, maximumFilesize uint64) ([]SnapshotFile, error) {
	filename := path.Join(mainCloneSourcePath, td.Name+".csv")
	selectIntoOutfile := `SELECT {{.Columns}} INTO OUTFILE "{{.TableOutputPath}}" CHARACTER SET binary FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\' LINES TERMINATED BY '\n' FROM {{.TableName}}`
	queryParams := map[string]string{
		"TableName":       dbName + "." + td.Name,
		"Columns":         strings.Join(td.Columns, ", "),
		"TableOutputPath": filename,
	sio, err := fillStringTemplate(selectIntoOutfile, queryParams)
	if err != nil {
		return nil, err
	if err := mysqld.ExecuteSuperQuery(sio); err != nil {
		return nil, err

	file, err := os.Open(filename)
	if err != nil {
		return nil, err

	defer func() {
		if e := os.Remove(filename); e != nil {
			logger.Errorf("Cannot remove %v: %v", filename, e)

	filenamePattern := path.Join(cloneSourcePath, td.Name+".%v.csv.gz")
	hasherWriter, err := newCompressedNamedHasherWriter(filenamePattern, mysqld.SnapshotDir, td.Name, maximumFilesize)
	if err != nil {
		return nil, err

	splitter := csvsplitter.NewCSVReader(file, ',')
	for {
		line, err := splitter.ReadRecord()
		if err == io.EOF {
		if err != nil {
			return nil, err
		_, err = hasherWriter.Write(line)
		if err != nil {
			return nil, err

	return hasherWriter.SnapshotFiles()
Esempio n. 3
// Backup is the main entry point for a backup:
// - uses the BackupStorage service to store a new backup
// - shuts down Mysqld during the backup
// - remember if we were replicating, restore the exact same state
func Backup(ctx context.Context, mysqld MysqlDaemon, logger logutil.Logger, dir, name string, backupConcurrency int, hookExtraEnv map[string]string) error {

	// start the backup with the BackupStorage
	bs, err := backupstorage.GetBackupStorage()
	if err != nil {
		return err
	bh, err := bs.StartBackup(dir, name)
	if err != nil {
		return fmt.Errorf("StartBackup failed: %v", err)

	if err = backup(ctx, mysqld, logger, bh, backupConcurrency, hookExtraEnv); err != nil {
		if abortErr := bh.AbortBackup(); abortErr != nil {
			logger.Errorf("failed to abort backup: %v", abortErr)
		return err
	return bh.EndBackup()
Esempio n. 4
// Restore is the main entry point for backup restore.  If there is no
// appropriate backup on the BackupStorage, Restore logs an error
// and returns ErrNoBackup. Any other error is returned.
func Restore(
	ctx context.Context,
	mysqld MysqlDaemon,
	dir string,
	restoreConcurrency int,
	hookExtraEnv map[string]string,
	localMetadata map[string]string,
	logger logutil.Logger,
	deleteBeforeRestore bool) (replication.Position, error) {

	// find the right backup handle: most recent one, with a MANIFEST
	logger.Infof("Restore: looking for a suitable backup to restore")
	bs, err := backupstorage.GetBackupStorage()
	if err != nil {
		return replication.Position{}, err
	defer bs.Close()
	bhs, err := bs.ListBackups(dir)
	if err != nil {
		return replication.Position{}, fmt.Errorf("ListBackups failed: %v", err)
	var bh backupstorage.BackupHandle
	var bm BackupManifest
	var toRestore int
	for toRestore = len(bhs) - 1; toRestore >= 0; toRestore-- {
		bh = bhs[toRestore]
		rc, err := bh.ReadFile(backupManifest)
		if err != nil {
			log.Warningf("Possibly incomplete backup %v in directory %v on BackupStorage: can't read MANIFEST: %v)", bh.Name(), dir, err)

		err = json.NewDecoder(rc).Decode(&bm)
		if err != nil {
			log.Warningf("Possibly incomplete backup %v in directory %v on BackupStorage (cannot JSON decode MANIFEST: %v)", bh.Name(), dir, err)

		logger.Infof("Restore: found backup %v %v to restore with %v files", bh.Directory(), bh.Name(), len(bm.FileEntries))
	if toRestore < 0 {
		logger.Errorf("No backup to restore on BackupStorage for directory %v. Starting up empty.", dir)
		if err = populateLocalMetadata(mysqld, localMetadata); err == nil {
			err = ErrNoBackup
		return replication.Position{}, err

	if !deleteBeforeRestore {
		logger.Infof("Restore: checking no existing data is present")
		ok, err := checkNoDB(ctx, mysqld)
		if err != nil {
			return replication.Position{}, err
		if !ok {
			logger.Infof("Auto-restore is enabled, but mysqld already contains data. Assuming vttablet was just restarted.")
			if err = populateLocalMetadata(mysqld, localMetadata); err == nil {
				err = ErrExistingDB
			return replication.Position{}, err

	logger.Infof("Restore: shutdown mysqld")
	err = mysqld.Shutdown(ctx, true)
	if err != nil {
		return replication.Position{}, err

	logger.Infof("Restore: deleting existing files")
	if err := removeExistingFiles(mysqld.Cnf()); err != nil {
		return replication.Position{}, err

	logger.Infof("Restore: reinit config file")
	err = mysqld.ReinitConfig(ctx)
	if err != nil {
		return replication.Position{}, err

	logger.Infof("Restore: copying all files")
	if err := restoreFiles(mysqld.Cnf(), bh, bm.FileEntries, restoreConcurrency); err != nil {
		return replication.Position{}, err

	// mysqld needs to be running in order for mysql_upgrade to work.
	// If we've just restored from a backup from previous MySQL version then mysqld
	// may fail to start due to a different structure of mysql.* tables. The flag
	// --skip-grant-tables ensures that these tables are not read until mysql_upgrade
	// is executed. And since with --skip-grant-tables anyone can connect to MySQL
	// without password, we are passing --skip-networking to greatly reduce the set
	// of those who can connect.
	logger.Infof("Restore: starting mysqld for mysql_upgrade")
	err = mysqld.Start(ctx, "--skip-grant-tables", "--skip-networking")
	if err != nil {
		return replication.Position{}, err

	logger.Infof("Restore: running mysql_upgrade")
	if err := mysqld.RunMysqlUpgrade(); err != nil {
		return replication.Position{}, fmt.Errorf("mysql_upgrade failed: %v", err)

	// Populate local_metadata before starting without --skip-networking,
	// so it's there before we start announcing ourselves.
	logger.Infof("Restore: populating local_metadata")
	err = populateLocalMetadata(mysqld, localMetadata)
	if err != nil {
		return replication.Position{}, err

	// The MySQL manual recommends restarting mysqld after running mysql_upgrade,
	// so that any changes made to system tables take effect.
	logger.Infof("Restore: restarting mysqld after mysql_upgrade")
	err = mysqld.Shutdown(ctx, true)
	if err != nil {
		return replication.Position{}, err
	err = mysqld.Start(ctx)
	if err != nil {
		return replication.Position{}, err

	return bm.Position, nil
Esempio n. 5
// Go runs the diff. If there is no error, it will drain both sides.
// If an error occurs, it will just return it and stop.
func (rd *RowSubsetDiffer) Go(log logutil.Logger) (dr DiffReport, err error) {

	dr.startingTime = time.Now()
	defer dr.ComputeQPS()

	var superset []sqltypes.Value
	var subset []sqltypes.Value
	advanceSuperset := true
	advanceSubset := true
	for {
		if advanceSuperset {
			superset, err = rd.superset.Next()
			if err != nil {
			advanceSuperset = false
		if advanceSubset {
			subset, err = rd.subset.Next()
			if err != nil {
			advanceSubset = false
		if superset == nil {
			// no more rows from the superset
			if subset == nil {
				// no more rows from subset either, we're done

			// drain subset, update count
			if count, err := rd.subset.Drain(); err != nil {
				return dr, err
			} else {
				dr.extraRowsRight += 1 + count
		if subset == nil {
			// no more rows from the subset
			// we know we have rows from superset, drain
			if _, err := rd.superset.Drain(); err != nil {
				return dr, err

		// we have both superset and subset, compare
		f := RowsEqual(superset, subset)
		if f == -1 {
			// rows are the same, next
			advanceSuperset = true
			advanceSubset = true

		if f >= rd.pkFieldCount {
			// rows have the same primary key, only content is different
			if dr.mismatchedRows < 10 {
				log.Errorf("Different content %v in same PK: %v != %v", dr.mismatchedRows, superset, subset)
			advanceSuperset = true
			advanceSubset = true

		// have to find the 'smallest' raw and advance it
		c, err := CompareRows(rd.superset.Fields(), rd.pkFieldCount, superset, subset)
		if err != nil {
			return dr, err
		if c < 0 {
			advanceSuperset = true
		} else if c > 0 {
			if dr.extraRowsRight < 10 {
				log.Errorf("Extra row %v on subset: %v", dr.extraRowsRight, subset)
			advanceSubset = true

		// After looking at primary keys more carefully,
		// they're the same. Logging a regular difference
		// then, and advancing both.
		if dr.mismatchedRows < 10 {
			log.Errorf("Different content %v in same PK: %v != %v", dr.mismatchedRows, superset, subset)
		advanceSuperset = true
		advanceSubset = true
Esempio n. 6
// MultiRestore is the main entry point for multi restore.
// We will either:
// - read from the network if sourceAddrs != nil
// - read from a disk snapshot if fromStoragePaths != nil
// The strategy is used as follows:
// - If it contains the string 'writeBinLogs' then we will also write
//   to the binary logs.
// - If it contains the command 'populateBlpCheckpoint' then we will
//   populate the blp_checkpoint table with master positions to start from
//   - If is also contains the command 'dontStartBinlogPlayer' we won't
//   start binlog replication on the destination (but it will be configured)
func (mysqld *Mysqld) MultiRestore(logger logutil.Logger, destinationDbName string, keyRanges []key.KeyRange, sourceAddrs []*url.URL, fromStoragePaths []string, snapshotConcurrency, fetchConcurrency, insertTableConcurrency, fetchRetryCount int, strategy string) (err error) {
	writeBinLogs := strings.Contains(strategy, "writeBinLogs")

	var manifests []*SplitSnapshotManifest
	if sourceAddrs != nil {
		// get the manifests from the network
		manifests = make([]*SplitSnapshotManifest, len(sourceAddrs))
		rc := concurrency.NewResourceConstraint(fetchConcurrency)
		for i, sourceAddr := range sourceAddrs {
			go func(sourceAddr *url.URL, i int) {
				defer rc.ReleaseAndDone()
				if rc.HasErrors() {

				var sourceDbName string
				if len(sourceAddr.Path) < 2 { // "" or "/"
					sourceDbName = destinationDbName
				} else {
					sourceDbName = sourceAddr.Path[1:]
				ssm, e := fetchSnapshotManifestWithRetry("http://"+sourceAddr.Host, sourceDbName, keyRanges[i], fetchRetryCount)
				manifests[i] = ssm
			}(sourceAddr, i)
		if err = rc.Wait(); err != nil {
	} else {
		// get the manifests from the local snapshots
		manifests = make([]*SplitSnapshotManifest, len(fromStoragePaths))
		for i, fromStoragePath := range fromStoragePaths {
			var err error
			manifests[i], err = readSnapshotManifest(fromStoragePath)
			if err != nil {
				return err

	if e := SanityCheckManifests(manifests); e != nil {
		return e

	tempStoragePath := path.Join(mysqld.SnapshotDir, "multirestore", destinationDbName)

	// Start fresh
	if err = os.RemoveAll(tempStoragePath); err != nil {

	if err = os.MkdirAll(tempStoragePath, 0775); err != nil {
		return err

	defer func() {
		if e := os.RemoveAll(tempStoragePath); e != nil {
			logger.Errorf("error removing %v: %v", tempStoragePath, e)


	// Handle our concurrency:
	// - fetchConcurrency tasks for network / decompress from disk
	// - insertTableConcurrency for table inserts from a file
	//   into an innodb table
	// - snapshotConcurrency tasks for table inserts / modify tables
	sems := make(map[string]*sync2.Semaphore, len(manifests[0].SchemaDefinition.TableDefinitions)+2)
	sems["net"] = sync2.NewSemaphore(fetchConcurrency, 0)
	sems["db"] = sync2.NewSemaphore(snapshotConcurrency, 0)

	// Store the alter table statements for after restore,
	// and how many jobs we're running on each table
	// TODO(alainjobart) the jobCount map is a bit weird. replace it
	// with a map of WaitGroups, initialized to the number of files
	// per table. Have extra go routines for the tables with auto_increment
	// to wait on the waitgroup, and apply the modify_table.
	postSql := make(map[string]string, len(manifests[0].SchemaDefinition.TableDefinitions))
	jobCount := make(map[string]*sync2.AtomicInt32)

	// Create the database (it's a good check to know if we're running
	// multirestore a second time too!)
	manifest := manifests[0] // I am assuming they all match
	createDatabase, e := fillStringTemplate(manifest.SchemaDefinition.DatabaseSchema, map[string]string{"DatabaseName": destinationDbName})
	if e != nil {
		return e
	if createDatabase == "" {
		return fmt.Errorf("Empty create database statement")

	createDbCmds := make([]string, 0, len(manifest.SchemaDefinition.TableDefinitions)+2)
	if !writeBinLogs {
		createDbCmds = append(createDbCmds, "SET sql_log_bin = OFF")
	createDbCmds = append(createDbCmds, createDatabase)
	createDbCmds = append(createDbCmds, "USE `"+destinationDbName+"`")
	createViewCmds := make([]string, 0, 16)
	for _, td := range manifest.SchemaDefinition.TableDefinitions {
		if td.Type == proto.TABLE_BASE_TABLE {
			createDbCmd, alterTable, err := MakeSplitCreateTableSql(logger, td.Schema, destinationDbName, td.Name, strategy)
			if err != nil {
				return err
			if alterTable != "" {
				postSql[td.Name] = alterTable
			jobCount[td.Name] = new(sync2.AtomicInt32)
			createDbCmds = append(createDbCmds, createDbCmd)
			sems["table-"+td.Name] = sync2.NewSemaphore(insertTableConcurrency, 0)
		} else {
			// views are just created with the right db name
			// and no data will ever go in them. We create them
			// after all tables are created, as they will
			// probably depend on real tables.
			createViewCmd, err := fillStringTemplate(td.Schema, map[string]string{"DatabaseName": destinationDbName})
			if err != nil {
				return err
			createViewCmds = append(createViewCmds, createViewCmd)
	createDbCmds = append(createDbCmds, createViewCmds...)
	if err = mysqld.ExecuteSuperQueryList(createDbCmds); err != nil {

	// compute how many jobs we will have
	for _, manifest := range manifests {
		for _, file := range manifest.Source.Files {

	loadDataInfile := `LOAD DATA INFILE '{{.TableInputPath}}' INTO TABLE {{.TableName}} CHARACTER SET binary FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\' LINES TERMINATED BY '\n' ({{.Columns}})`

	// fetch all the csv files, and apply them one at a time. Note
	// this might start many go routines, and they'll all be
	// waiting on the resource semaphores.
	mrc := concurrency.NewMultiResourceConstraint(sems)
	for manifestIndex, manifest := range manifests {
		if err = os.Mkdir(path.Join(tempStoragePath, manifest.Source.Addr), 0775); err != nil {
			return err

		for i := range manifest.Source.Files {
			lsf := localSnapshotFile{manifest: manifest, file: &manifest.Source.Files[i], basePath: tempStoragePath}
			go func(manifestIndex, i int) {
				defer mrc.Done()

				// compute a few things now, so if we can't we
				// don't take resources:
				// - get the schema
				td, ok := manifest.SchemaDefinition.GetTable(lsf.tableName())
				if !ok {
					mrc.RecordError(fmt.Errorf("No table named %v in schema", lsf.tableName()))

				// - get the load data statement
				queryParams := map[string]string{
					"TableInputPath": lsf.filename(),
					"TableName":      lsf.tableName(),
					"Columns":        strings.Join(td.Columns, ", "),
				loadStatement, e := fillStringTemplate(loadDataInfile, queryParams)
				if e != nil {

				// get the file, using the 'net' resource
				if mrc.HasErrors() {
				if sourceAddrs == nil {
					e = uncompressLocalFile(path.Join(fromStoragePaths[manifestIndex], path.Base(lsf.file.Path)), lsf.file.Hash, lsf.filename())
				} else {
					e = fetchFileWithRetry(lsf.url(), lsf.file.Hash, lsf.filename(), fetchRetryCount)
				if e != nil {
				defer os.Remove(lsf.filename())

				// acquire the table lock (we do this first
				// so we maximize access to db. Otherwise
				// if 8 threads had gotten the db lock but
				// were writing to the same table, only one
				// load would go at once)
				tableLockName := "table-" + lsf.tableName()
				defer func() {
				if mrc.HasErrors() {

				// acquire the db lock
				defer func() {
				if mrc.HasErrors() {

				// load the data in
				queries := buildQueryList(destinationDbName, loadStatement, writeBinLogs)
				e = mysqld.ExecuteSuperQueryList(queries)
				if e != nil {

				// if we're running the last insert,
				// potentially re-add the auto-increments
				remainingInserts := jobCount[lsf.tableName()].Add(-1)
				if remainingInserts == 0 && postSql[lsf.tableName()] != "" {
					queries = buildQueryList(destinationDbName, postSql[lsf.tableName()], writeBinLogs)
					e = mysqld.ExecuteSuperQueryList(queries)
					if e != nil {
			}(manifestIndex, i)

	if err = mrc.Wait(); err != nil {
		return err

	// populate blp_checkpoint table if we want to
	if strings.Index(strategy, "populateBlpCheckpoint") != -1 {
		queries := make([]string, 0, 4)
		if !writeBinLogs {
			queries = append(queries, "SET sql_log_bin = OFF")
		queries = append(queries, binlogplayer.CreateBlpCheckpoint()...)
		flags := ""
		if strings.Index(strategy, "dontStartBinlogPlayer") != -1 {
			flags = binlogplayer.BLP_FLAG_DONT_START
		for manifestIndex, manifest := range manifests {
			queries = append(queries, binlogplayer.PopulateBlpCheckpoint(uint32(manifestIndex), manifest.Source.MasterPosition, time.Now().Unix(), flags))
		if err = mysqld.ExecuteSuperQueryList(queries); err != nil {
			return err
	return nil
Esempio n. 7
// CreateMultiSnapshot create snapshots of the data.
// - for a resharding snapshot, keyRanges+keyName+keyType are set,
//   and tables is empty. This action will create multiple snapshots,
//   one per keyRange.
// - for a vertical split, tables is set, keyRanges = [KeyRange{}] and
//   keyName+keyType are empty. It will create a single snapshot of
//   the contents of the tables.
// Note combinations of table subset and keyranges are not supported.
func (mysqld *Mysqld) CreateMultiSnapshot(logger logutil.Logger, keyRanges []key.KeyRange, dbName, keyName string, keyType key.KeyspaceIdType, sourceAddr string, allowHierarchicalReplication bool, snapshotConcurrency int, tables, excludeTables []string, skipSlaveRestart bool, maximumFilesize uint64, hookExtraEnv map[string]string) (snapshotManifestFilenames []string, err error) {
	if dbName == "" {
		err = fmt.Errorf("no database name provided")
	if len(tables) > 0 {
		if len(keyRanges) != 1 || keyRanges[0].IsPartial() {
			return nil, fmt.Errorf("With tables specified, can only have one full KeyRange")

	// same logic applies here
	if err = mysqld.validateCloneSource(false, hookExtraEnv); err != nil {

	// clean out and start fresh
	cloneSourcePaths := make(map[key.KeyRange]string)
	for _, keyRange := range keyRanges {
		cloneSourcePaths[keyRange] = path.Join(mysqld.SnapshotDir, dataDir, dbName+"-"+string(keyRange.Start.Hex())+","+string(keyRange.End.Hex()))
	for _, _path := range cloneSourcePaths {
		if err = os.RemoveAll(_path); err != nil {
		if err = os.MkdirAll(_path, 0775); err != nil {

	mainCloneSourcePath := path.Join(mysqld.SnapshotDir, dataDir, dbName+"-all")
	if err = os.RemoveAll(mainCloneSourcePath); err != nil {
	if err = os.MkdirAll(mainCloneSourcePath, 0775); err != nil {

	// get the schema for each table
	sd, fetchErr := mysqld.GetSchema(dbName, tables, excludeTables, true)
	if fetchErr != nil {
		return []string{}, fetchErr
	if len(sd.TableDefinitions) == 0 {
		return []string{}, fmt.Errorf("empty table list for %v", dbName)

	// prepareToSnapshot will get the tablet in the rigth state,
	// and return the current mysql status.
	slaveStartRequired, readOnly, replicationPosition, myMasterPosition, masterAddr, conn, err := mysqld.prepareToSnapshot(logger, allowHierarchicalReplication, hookExtraEnv)
	if err != nil {
	if skipSlaveRestart {
		if slaveStartRequired {
			logger.Infof("Overriding slaveStartRequired to false")
		slaveStartRequired = false
	defer func() {
		err = replaceError(logger, err, mysqld.restoreAfterSnapshot(logger, slaveStartRequired, readOnly, hookExtraEnv, conn))

	// dump the files in parallel with a pre-defined concurrency
	datafiles := make([]map[key.KeyRange][]SnapshotFile, len(sd.TableDefinitions))
	dumpTableWorker := func(i int) (err error) {
		table := sd.TableDefinitions[i]
		if table.Type != proto.TABLE_BASE_TABLE {
			// we just skip views here
			return nil
		if len(tables) > 0 {
			sfs, err := mysqld.dumpTableFull(logger, table, dbName, mainCloneSourcePath, cloneSourcePaths[key.KeyRange{}], maximumFilesize)
			if err != nil {
				return err
			datafiles[i] = map[key.KeyRange][]SnapshotFile{
				key.KeyRange{}: sfs,
		} else {
			datafiles[i], err = mysqld.dumpTableSplit(logger, table, dbName, keyName, keyType, mainCloneSourcePath, cloneSourcePaths, maximumFilesize)
	if err = ConcurrentMap(snapshotConcurrency, len(sd.TableDefinitions), dumpTableWorker); err != nil {

	if e := os.Remove(mainCloneSourcePath); e != nil {
		logger.Errorf("Cannot remove %v: %v", mainCloneSourcePath, e)

	// Check the replication position after snapshot is done
	// hasn't changed, to be sure we haven't inserted any data
	newReplicationPosition, _, err := mysqld.getReplicationPositionForClones(allowHierarchicalReplication)
	if err != nil {
	if !newReplicationPosition.Equal(replicationPosition) {
		return nil, fmt.Errorf("replicationPosition position changed during snapshot, from %v to %v", replicationPosition, newReplicationPosition)

	// Write all the manifest files
	ssmFiles := make([]string, len(keyRanges))
	for i, kr := range keyRanges {
		krDatafiles := make([]SnapshotFile, 0, len(datafiles))
		for _, m := range datafiles {
			krDatafiles = append(krDatafiles, m[kr]...)
		ssm, err := NewSplitSnapshotManifest(sourceAddr, mysqld.IpAddr(),
			masterAddr, dbName, krDatafiles, replicationPosition,
			myMasterPosition, kr, sd)
		if err != nil {
			return nil, err
		ssmFiles[i] = path.Join(cloneSourcePaths[kr], partialSnapshotManifestFile)
		if err = writeJson(ssmFiles[i], ssm); err != nil {
			return nil, err

	// Call the (optional) hook to send the files somewhere else
	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	for _, kr := range keyRanges {
		go func(kr key.KeyRange) {
			defer wg.Done()
			h := hook.NewSimpleHook("copy_snapshot_to_storage")
			h.ExtraEnv = make(map[string]string)
			for k, v := range hookExtraEnv {
				h.ExtraEnv[k] = v
			h.ExtraEnv["KEYRANGE"] = fmt.Sprintf("%v-%v", kr.Start.Hex(), kr.End.Hex())
			h.ExtraEnv["SNAPSHOT_PATH"] = cloneSourcePaths[kr]
	if rec.HasErrors() {
		return nil, err

	// Return all the URLs for the MANIFESTs
	snapshotURLPaths := make([]string, len(keyRanges))
	for i := 0; i < len(keyRanges); i++ {
		relative, err := filepath.Rel(mysqld.SnapshotDir, ssmFiles[i])
		if err != nil {
			return nil, err
		snapshotURLPaths[i] = path.Join(SnapshotURLPath, relative)
	return snapshotURLPaths, nil
Esempio n. 8
// dumpTableSplit will dump a table, and then split it according to keyspace_id
// into multiple files.
func (mysqld *Mysqld) dumpTableSplit(logger logutil.Logger, td *proto.TableDefinition, dbName, keyName string, keyType key.KeyspaceIdType, mainCloneSourcePath string, cloneSourcePaths map[key.KeyRange]string, maximumFilesize uint64) (map[key.KeyRange][]SnapshotFile, error) {
	filename := path.Join(mainCloneSourcePath, td.Name+".csv")
	selectIntoOutfile := `SELECT {{.KeyspaceIdColumnName}}, {{.Columns}} INTO OUTFILE "{{.TableOutputPath}}" CHARACTER SET binary FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\' LINES TERMINATED BY '\n' FROM {{.TableName}}`
	queryParams := map[string]string{
		"TableName":            dbName + "." + td.Name,
		"Columns":              strings.Join(td.Columns, ", "),
		"KeyspaceIdColumnName": keyName,
		"TableOutputPath":      filename,
	numberColumn := true
	if keyType == key.KIT_BYTES {
		numberColumn = false
		queryParams["KeyspaceIdColumnName"] = "HEX(" + keyName + ")"
	sio, err := fillStringTemplate(selectIntoOutfile, queryParams)
	if err != nil {
		return nil, fmt.Errorf("fillStringTemplate for %v: %v", td.Name, err)
	if err := mysqld.ExecuteSuperQuery(sio); err != nil {
		return nil, fmt.Errorf("ExecuteSuperQuery failed for %v with query %v: %v", td.Name, sio, err)

	file, err := os.Open(filename)
	if err != nil {
		return nil, fmt.Errorf("Cannot open file %v for table %v: %v", filename, td.Name, err)

	defer func() {
		if e := os.Remove(filename); e != nil {
			logger.Errorf("Cannot remove %v: %v", filename, e)

	hasherWriters := make(map[key.KeyRange]*namedHasherWriter)

	for kr, cloneSourcePath := range cloneSourcePaths {
		filenamePattern := path.Join(cloneSourcePath, td.Name+".%v.csv.gz")
		w, err := newCompressedNamedHasherWriter(filenamePattern, mysqld.SnapshotDir, td.Name, maximumFilesize)
		if err != nil {
			return nil, fmt.Errorf("newCompressedNamedHasherWriter failed for %v: %v", td.Name, err)
		hasherWriters[kr] = w

	splitter := csvsplitter.NewKeyspaceCSVReader(file, ',', numberColumn)
	for {
		keyspaceId, line, err := splitter.ReadRecord()
		if err == io.EOF {
		if err != nil {
			return nil, fmt.Errorf("ReadRecord failed for table %v: %v", td.Name, err)
		for kr, w := range hasherWriters {
			if kr.Contains(keyspaceId) {
				_, err = w.Write(line)
				if err != nil {
					return nil, fmt.Errorf("Write failed for %v: %v", td.Name, err)

	snapshotFiles := make(map[key.KeyRange][]SnapshotFile)
	for i, hw := range hasherWriters {
		if snapshotFiles[i], err = hw.SnapshotFiles(); err != nil {
			return nil, fmt.Errorf("SnapshotFiles failed for %v: %v", td.Name, err)

	return snapshotFiles, nil
Esempio n. 9
// RebuildVSchema rebuilds the SrvVSchema for the provided cell list
// (or all cells if cell list is empty).
func RebuildVSchema(ctx context.Context, log logutil.Logger, ts topo.Server, cells []string) error {
	// get the actual list of cells
	if len(cells) == 0 {
		var err error
		cells, err = ts.GetKnownCells(ctx)
		if err != nil {
			return fmt.Errorf("GetKnownCells failed: %v", err)

	// get the keyspaces
	keyspaces, err := ts.GetKeyspaces(ctx)
	if err != nil {
		return fmt.Errorf("GetKeyspaces failed: %v", err)

	// build the SrvVSchema in parallel, protected by mu
	wg := sync.WaitGroup{}
	mu := sync.Mutex{}
	var finalErr error
	srvVSchema := &vschemapb.SrvVSchema{
		Keyspaces: map[string]*vschemapb.Keyspace{},
	for _, keyspace := range keyspaces {
		go func(keyspace string) {
			defer wg.Done()

			k, err := ts.GetVSchema(ctx, keyspace)
			if err == topo.ErrNoNode {
				err = nil
				k = &vschemapb.Keyspace{}

			defer mu.Unlock()
			if err != nil {
				log.Errorf("GetVSchema(%v) failed: %v", keyspace, err)
				finalErr = err
			srvVSchema.Keyspaces[keyspace] = k
	if finalErr != nil {
		return finalErr

	// now save the SrvVSchema in all cells in parallel
	for _, cell := range cells {
		go func(cell string) {
			defer wg.Done()
			if err := ts.UpdateSrvVSchema(ctx, cell, srvVSchema); err != nil {
				log.Errorf("UpdateSrvVSchema(%v) failed: %v", cell, err)
				finalErr = err

	return finalErr
Esempio n. 10
// This function runs on the machine acting as the source for the clone.
// Check master/slave status and determine restore needs.
// If this instance is a slave, stop replication, otherwise place in read-only mode.
// Record replication position.
// Shutdown mysql
// Check paths for storing data
// Depending on the serverMode flag, we do the following:
// serverMode = false:
//   Compress /vt/vt_[0-9a-f]+/data/vt_.+
//   Compute hash (of compressed files, as we serve .gz files here)
//   Place in /vt/clone_src where they will be served by http server (not rpc)
//   Restart mysql
// serverMode = true:
//   Make symlinks for /vt/vt_[0-9a-f]+/data/vt_.+ to innodb files
//   Compute hash (of uncompressed files, as we serve uncompressed files)
//   Place symlinks in /vt/clone_src where they will be served by http server
//   Leave mysql stopped, return slaveStartRequired, readOnly
func (mysqld *Mysqld) CreateSnapshot(logger logutil.Logger, dbName, sourceAddr string, allowHierarchicalReplication bool, concurrency int, serverMode bool, hookExtraEnv map[string]string) (snapshotManifestUrlPath string, slaveStartRequired, readOnly bool, err error) {
	if dbName == "" {
		return "", false, false, errors.New("CreateSnapshot failed: no database name provided")

	if err = mysqld.validateCloneSource(serverMode, hookExtraEnv); err != nil {

	// save initial state so we can restore on Start()
	slaveStartRequired = false
	sourceIsMaster := false
	readOnly = true

	slaveStatus, err := mysqld.SlaveStatus()
	if err == nil {
		slaveStartRequired = slaveStatus.SlaveRunning()
	} else if err == ErrNotSlave {
		sourceIsMaster = true
	} else {
		// If we can't get any data, just fail.

	readOnly, err = mysqld.IsReadOnly()
	if err != nil {

	// Stop sources of writes so we can get a consistent replication position.
	// If the source is a slave use the master replication position
	// unless we are allowing hierachical replicas.
	masterAddr := ""
	var replicationPosition proto.ReplicationPosition
	if sourceIsMaster {
		if err = mysqld.SetReadOnly(true); err != nil {
		replicationPosition, err = mysqld.MasterPosition()
		if err != nil {
		masterAddr = mysqld.IpAddr()
	} else {
		if err = mysqld.StopSlave(hookExtraEnv); err != nil {
		var slaveStatus *proto.ReplicationStatus
		slaveStatus, err = mysqld.SlaveStatus()
		if err != nil {
		replicationPosition = slaveStatus.Position

		// We are a slave, check our replication strategy before
		// choosing the master address.
		if allowHierarchicalReplication {
			masterAddr = mysqld.IpAddr()
		} else {
			masterAddr, err = mysqld.GetMasterAddr()
			if err != nil {

	if err = mysqld.Shutdown(true, MysqlWaitTime); err != nil {

	var smFile string
	dataFiles, snapshotErr := mysqld.createSnapshot(logger, concurrency, serverMode)
	if snapshotErr != nil {
		logger.Errorf("CreateSnapshot failed: %v", snapshotErr)
	} else {
		var sm *SnapshotManifest
		sm, snapshotErr = newSnapshotManifest(sourceAddr, mysqld.IpAddr(),
			masterAddr, dbName, dataFiles, replicationPosition, proto.ReplicationPosition{})
		if snapshotErr != nil {
			logger.Errorf("CreateSnapshot failed: %v", snapshotErr)
		} else {
			smFile = path.Join(mysqld.SnapshotDir, SnapshotManifestFile)
			if snapshotErr = writeJson(smFile, sm); snapshotErr != nil {
				logger.Errorf("CreateSnapshot failed: %v", snapshotErr)

	// restore our state if required
	if serverMode && snapshotErr == nil {
		logger.Infof("server mode snapshot worked, not restarting mysql")
	} else {
		if err = mysqld.SnapshotSourceEnd(slaveStartRequired, readOnly, false /*deleteSnapshot*/, hookExtraEnv); err != nil {

	if snapshotErr != nil {
		return "", slaveStartRequired, readOnly, snapshotErr
	relative, err := filepath.Rel(mysqld.SnapshotDir, smFile)
	if err != nil {
		return "", slaveStartRequired, readOnly, nil
	return path.Join(SnapshotURLPath, relative), slaveStartRequired, readOnly, nil
Esempio n. 11
// Restore is the main entry point for backup restore.  If there is no
// appropriate backup on the BackupStorage, Restore logs an error
// and returns ErrNoBackup. Any other error is returned.
func Restore(ctx context.Context, mysqld MysqlDaemon, dir string, restoreConcurrency int, hookExtraEnv map[string]string, logger logutil.Logger, deleteBeforeRestore bool) (replication.Position, error) {
	// find the right backup handle: most recent one, with a MANIFEST
	logger.Infof("Restore: looking for a suitable backup to restore")
	bs, err := backupstorage.GetBackupStorage()
	if err != nil {
		return replication.Position{}, err
	defer bs.Close()
	bhs, err := bs.ListBackups(dir)
	if err != nil {
		return replication.Position{}, fmt.Errorf("ListBackups failed: %v", err)
	var bh backupstorage.BackupHandle
	var bm BackupManifest
	var toRestore int
	for toRestore = len(bhs) - 1; toRestore >= 0; toRestore-- {
		bh = bhs[toRestore]
		rc, err := bh.ReadFile(backupManifest)
		if err != nil {
			log.Warningf("Possibly incomplete backup %v in directory %v on BackupStorage: can't read MANIFEST: %v)", bh.Name(), dir, err)

		err = json.NewDecoder(rc).Decode(&bm)
		if err != nil {
			log.Warningf("Possibly incomplete backup %v in directory %v on BackupStorage (cannot JSON decode MANIFEST: %v)", bh.Name(), dir, err)

		logger.Infof("Restore: found backup %v %v to restore with %v files", bh.Directory(), bh.Name(), len(bm.FileEntries))
	if toRestore < 0 {
		logger.Errorf("No backup to restore on BackupStorage for directory %v", dir)
		return replication.Position{}, ErrNoBackup

	if !deleteBeforeRestore {
		logger.Infof("Restore: checking no existing data is present")
		ok, err := checkNoDB(ctx, mysqld)
		if err != nil {
			return replication.Position{}, err
		if !ok {
			return replication.Position{}, ErrExistingDB

	logger.Infof("Restore: shutdown mysqld")
	err = mysqld.Shutdown(ctx, true)
	if err != nil {
		return replication.Position{}, err

	logger.Infof("Restore: deleting existing files")
	if err := removeExistingFiles(mysqld.Cnf()); err != nil {
		return replication.Position{}, err

	logger.Infof("Restore: reinit config file")
	err = mysqld.ReinitConfig(ctx)
	if err != nil {
		return replication.Position{}, err

	logger.Infof("Restore: copying all files")
	if err := restoreFiles(mysqld.Cnf(), bh, bm.FileEntries, restoreConcurrency); err != nil {
		return replication.Position{}, err

	// mysqld needs to be running in order for mysql_upgrade to work.
	logger.Infof("Restore: starting mysqld for mysql_upgrade")
	err = mysqld.Start(ctx)
	if err != nil {
		return replication.Position{}, err

	logger.Infof("Restore: running mysql_upgrade")
	if err := mysqld.RunMysqlUpgrade(); err != nil {
		return replication.Position{}, fmt.Errorf("mysql_upgrade failed: %v", err)

	// The MySQL manual recommends restarting mysqld after running mysql_upgrade,
	// so that any changes made to system tables take effect.
	logger.Infof("Restore: restarting mysqld after mysql_upgrade")
	err = mysqld.Shutdown(ctx, true)
	if err != nil {
		return replication.Position{}, err
	err = mysqld.Start(ctx)
	if err != nil {
		return replication.Position{}, err

	return bm.Position, nil