Ejemplo n.º 1
0
func Start(mt *Mysqld, mysqlWaitTime time.Duration) error {
	var name string

	// try the mysqld start hook, if any
	h := hook.NewSimpleHook("mysqld_start")
	hr := h.Execute()
	switch hr.ExitStatus {
	case hook.HOOK_SUCCESS:
		// hook exists and worked, we can keep going
		name = "mysqld_start hook"
	case hook.HOOK_DOES_NOT_EXIST:
		// hook doesn't exist, run mysqld_safe ourselves
		log.Infof("No mysqld_start hook, running mysqld_safe directly")
		dir, err := vtenv.VtMysqlRoot()
		if err != nil {
			return err
		}
		name = path.Join(dir, "bin/mysqld_safe")
		arg := []string{
			"--defaults-file=" + mt.config.path}
		env := []string{os.ExpandEnv("LD_LIBRARY_PATH=$VT_MYSQL_ROOT/lib/mysql")}

		cmd := exec.Command(name, arg...)
		cmd.Dir = dir
		cmd.Env = env
		log.Infof("mysqlctl.Start mysqlWaitTime:%v %#v", mysqlWaitTime, cmd)
		_, err = cmd.StderrPipe()
		if err != nil {
			return nil
		}
		err = cmd.Start()
		if err != nil {
			return nil
		}

		// wait so we don't get a bunch of defunct processes
		go cmd.Wait()
	default:
		// hook failed, we report error
		return fmt.Errorf("mysqld_start hook failed: %v", hr.String())
	}

	// give it some time to succeed - usually by the time the socket emerges
	// we are in good shape
	for i := mysqlWaitTime; i >= 0; i -= time.Second {
		_, statErr := os.Stat(mt.config.SocketFile)
		if statErr == nil {
			// Make sure the socket file isn't stale.
			conn, connErr := mt.createConnection()
			if connErr == nil {
				conn.Close()
				return nil
			}
		} else if !os.IsNotExist(statErr) {
			return statErr
		}
		time.Sleep(time.Second)
	}
	return errors.New(name + ": deadline exceeded waiting for " + mt.config.SocketFile)
}
Ejemplo n.º 2
0
func (mysqld *Mysqld) ValidateCloneTarget(hookExtraEnv map[string]string) error {
	// run a hook to check local things
	h := hook.NewSimpleHook("preflight_restore")
	h.ExtraEnv = hookExtraEnv
	if err := h.ExecuteOptional(); err != nil {
		return err
	}

	qr, err := mysqld.fetchSuperQuery("SHOW DATABASES")
	if err != nil {
		return fmt.Errorf("mysqlctl: ValidateCloneTarget failed, %v", err)
	}

	for _, row := range qr.Rows {
		if strings.HasPrefix(row[0].String(), "vt_") {
			dbName := row[0].String()
			tableQr, err := mysqld.fetchSuperQuery("SHOW TABLES FROM " + dbName)
			if err != nil {
				return fmt.Errorf("mysqlctl: ValidateCloneTarget failed, %v", err)
			} else if len(tableQr.Rows) == 0 {
				// no tables == empty db, all is well
				continue
			}
			return fmt.Errorf("mysqlctl: ValidateCloneTarget failed, found active db %v", dbName)
		}
	}

	return nil
}
Ejemplo n.º 3
0
func (mysqld *Mysqld) initConfig(root string) error {
	var err error
	var configData string

	switch hr := hook.NewSimpleHook("make_mycnf").Execute(); hr.ExitStatus {
	case hook.HOOK_DOES_NOT_EXIST:
		log.Infof("make_mycnf hook doesn't exist, reading default template files")
		cnfTemplatePaths := []string{
			path.Join(root, "config/mycnf/default.cnf"),
			path.Join(root, "config/mycnf/master.cnf"),
			path.Join(root, "config/mycnf/replica.cnf"),
		}

		if extraCnf := os.Getenv("EXTRA_MY_CNF"); extraCnf != "" {
			parts := strings.Split(extraCnf, ":")
			cnfTemplatePaths = append(cnfTemplatePaths, parts...)
		}

		configData, err = mysqld.config.makeMycnf(cnfTemplatePaths)
	case hook.HOOK_SUCCESS:
		configData, err = mysqld.config.fillMycnfTemplate(hr.Stdout)
	default:
		return fmt.Errorf("make_mycnf hook failed(%v): %v", hr.ExitStatus, hr.Stderr)
	}
	if err != nil {
		return err
	}

	return ioutil.WriteFile(mysqld.config.path, []byte(configData), 0664)
}
Ejemplo n.º 4
0
func (wr *Wrangler) checkMasterElect(ti *topo.TabletInfo) error {
	// Check the master-elect is fit for duty - call out for hardware checks.
	// if the server was already serving live traffic, it's probably good
	if ti.IsInServingGraph() {
		return nil
	}
	return wr.ExecuteOptionalTabletInfoHook(ti, hook.NewSimpleHook("preflight_serving_type"))
}
Ejemplo n.º 5
0
// Make this external, since these transitions need to be forced from time to time.
func ChangeType(ts topo.Server, tabletAlias topo.TabletAlias, newType topo.TabletType, runHooks bool) error {
	tablet, err := ts.GetTablet(tabletAlias)
	if err != nil {
		return err
	}

	if !topo.IsTrivialTypeChange(tablet.Type, newType) || !topo.IsValidTypeChange(tablet.Type, newType) {
		return fmt.Errorf("cannot change tablet type %v -> %v %v", tablet.Type, newType, tabletAlias)
	}

	if runHooks {
		// Only run the preflight_serving_type hook when
		// transitioning from non-serving to serving.
		if !topo.IsInServingGraph(tablet.Type) && topo.IsInServingGraph(newType) {
			if err := hook.NewSimpleHook("preflight_serving_type").ExecuteOptional(); err != nil {
				return err
			}
		}
	}

	tablet.Type = newType
	if newType == topo.TYPE_IDLE {
		if tablet.Parent.IsZero() {
			si, err := ts.GetShard(tablet.Keyspace, tablet.Shard)
			if err != nil {
				return err
			}
			rec := concurrency.AllErrorRecorder{}
			wg := sync.WaitGroup{}
			for _, cell := range si.Cells {
				wg.Add(1)
				go func(cell string) {
					defer wg.Done()
					sri, err := ts.GetShardReplication(cell, tablet.Keyspace, tablet.Shard)
					if err != nil {
						log.Warningf("Cannot check cell %v for extra replication paths, assuming it's good", cell)
						return
					}
					for _, rl := range sri.ReplicationLinks {
						if rl.Parent == tabletAlias {
							rec.RecordError(fmt.Errorf("Still have a ReplicationLink in cell %v", cell))
						}
					}
				}(cell)
			}
			wg.Wait()
			if rec.HasErrors() {
				return rec.Error()
			}
		}
		tablet.Parent = topo.TabletAlias{}
		tablet.Keyspace = ""
		tablet.Shard = ""
		tablet.KeyRange = key.KeyRange{}
	}
	return topo.UpdateTablet(ts, tablet)
}
Ejemplo n.º 6
0
// StopSlave stops a slave on the provided MysqldDaemon
func StopSlave(md MysqlDaemon, hookExtraEnv map[string]string) error {
	h := hook.NewSimpleHook("preflight_stop_slave")
	h.ExtraEnv = hookExtraEnv
	if err := h.ExecuteOptional(); err != nil {
		return err
	}

	return md.ExecuteSuperQueryList([]string{SQLStopSlave})
}
Ejemplo n.º 7
0
// StartSlave starts a slave on the provided MysqldDaemon
func StartSlave(md MysqlDaemon, hookExtraEnv map[string]string) error {
	if err := md.ExecuteSuperQueryList([]string{SQLStartSlave}); err != nil {
		return err
	}

	h := hook.NewSimpleHook("postflight_start_slave")
	h.ExtraEnv = hookExtraEnv
	return h.ExecuteOptional()
}
Ejemplo n.º 8
0
func (mysqld *Mysqld) StopSlave(hookExtraEnv map[string]string) error {
	h := hook.NewSimpleHook("preflight_stop_slave")
	h.ExtraEnv = hookExtraEnv
	if err := h.ExecuteOptional(); err != nil {
		return err
	}

	return mysqld.executeSuperQuery("SLAVE STOP")
}
Ejemplo n.º 9
0
func (mysqld *Mysqld) StartSlave(hookExtraEnv map[string]string) error {
	if err := mysqld.executeSuperQuery("SLAVE START"); err != nil {
		return err
	}

	h := hook.NewSimpleHook("postflight_start_slave")
	h.ExtraEnv = hookExtraEnv
	return h.ExecuteOptional()
}
Ejemplo n.º 10
0
// Scrap will update the tablet type to 'Scrap', and remove it from
// the serving graph.
//
// 'force' means we are not on the tablet being scrapped, so it is
// probably dead. So if 'force' is true, we will also remove pending
// remote actions.  And if 'force' is false, we also run an optional
// hook.
func Scrap(ts topo.Server, tabletAlias topo.TabletAlias, force bool) error {
	tablet, err := ts.GetTablet(tabletAlias)
	if err != nil {
		return err
	}

	// If you are already scrap, skip updating replication data. It won't
	// be there anyway.
	wasAssigned := tablet.IsAssigned()
	tablet.Type = topo.TYPE_SCRAP
	tablet.Parent = topo.TabletAlias{}
	// Update the tablet first, since that is canonical.
	err = topo.UpdateTablet(ts, tablet)
	if err != nil {
		return err
	}

	// Remove any pending actions. Presumably forcing a scrap
	// means you don't want the agent doing anything and the
	// machine requires manual attention.
	if force {
		err := ts.PurgeTabletActions(tabletAlias, actionnode.ActionNodeCanBePurged)
		if err != nil {
			log.Warningf("purge actions failed: %v", err)
		}
	}

	if wasAssigned {
		err = topo.DeleteTabletReplicationData(ts, tablet.Tablet)
		if err != nil {
			if err == topo.ErrNoNode {
				log.V(6).Infof("no ShardReplication object for cell %v", tablet.Alias.Cell)
				err = nil
			}
			if err != nil {
				log.Warningf("remove replication data for %v failed: %v", tablet.Alias, err)
			}
		}
	}

	// run a hook for final cleanup, only in non-force mode.
	// (force mode executes on the vtctl side, not on the vttablet side)
	if !force {
		hk := hook.NewSimpleHook("postflight_scrap")
		ConfigureTabletHook(hk, tablet.Alias)
		if hookErr := hk.ExecuteOptional(); hookErr != nil {
			// we don't want to return an error, the server
			// is already in bad shape probably.
			log.Warningf("Scrap: postflight_scrap failed: %v", hookErr)
		}
	}

	return nil
}
Ejemplo n.º 11
0
// Shutdown will stop the mysqld daemon that is running in the background.
//
// waitForMysqld: should the function block until mysqld has stopped?
// This can actually take a *long* time if the buffer cache needs to be fully
// flushed - on the order of 20-30 minutes.
func (mysqld *Mysqld) Shutdown(waitForMysqld bool, mysqlWaitTime time.Duration) error {
	log.Infof("mysqlctl.Shutdown")
	// possibly mysql is already shutdown, check for a few files first
	_, socketPathErr := os.Stat(mysqld.config.SocketFile)
	_, pidPathErr := os.Stat(mysqld.config.PidFile)
	if socketPathErr != nil && pidPathErr != nil {
		log.Warningf("assuming shutdown - no socket, no pid file")
		return nil
	}

	// try the mysqld shutdown hook, if any
	h := hook.NewSimpleHook("mysqld_shutdown")
	hr := h.Execute()
	switch hr.ExitStatus {
	case hook.HOOK_SUCCESS:
		// hook exists and worked, we can keep going
	case hook.HOOK_DOES_NOT_EXIST:
		// hook doesn't exist, try mysqladmin
		log.Infof("No mysqld_shutdown hook, running mysqladmin directly")
		dir, err := vtenv.VtMysqlRoot()
		if err != nil {
			return err
		}
		name := path.Join(dir, "bin/mysqladmin")
		arg := []string{
			"-u", "vt_dba", "-S", mysqld.config.SocketFile,
			"shutdown"}
		env := []string{
			os.ExpandEnv("LD_LIBRARY_PATH=$VT_MYSQL_ROOT/lib/mysql"),
		}
		_, err = execCmd(name, arg, env, dir)
		if err != nil {
			return err
		}
	default:
		// hook failed, we report error
		return fmt.Errorf("mysqld_shutdown hook failed: %v", hr.String())
	}

	// wait for mysqld to really stop. use the sock file as a proxy for that since
	// we can't call wait() in a process we didn't start.
	if waitForMysqld {
		for i := mysqlWaitTime; i >= 0; i -= time.Second {
			_, statErr := os.Stat(mysqld.config.SocketFile)
			if statErr != nil && os.IsNotExist(statErr) {
				return nil
			}
			log.Infof("Mysqld.Shutdown: sleeping for 1s waiting for socket file %v", mysqld.config.SocketFile)
			time.Sleep(time.Second)
		}
		return errors.New("gave up waiting for mysqld to stop")
	}
	return nil
}
Ejemplo n.º 12
0
// This piece runs on the presumably empty machine acting as the target in the
// create replica action.
//
// validate target (self)
// shutdown_mysql()
// create temp data directory /vt/target/vt_<keyspace>
// copy compressed data files via HTTP
// verify hash of compressed files
// uncompress into /vt/vt_<target-uid>/data/vt_<keyspace>
// start_mysql()
// clean up compressed files
func (mysqld *Mysqld) RestoreFromSnapshot(snapshotManifest *SnapshotManifest, fetchConcurrency, fetchRetryCount int, dontWaitForSlaveStart bool, hookExtraEnv map[string]string) error {
	if snapshotManifest == nil {
		return errors.New("RestoreFromSnapshot: nil snapshotManifest")
	}

	relog.Debug("ValidateCloneTarget")
	if err := mysqld.ValidateCloneTarget(hookExtraEnv); err != nil {
		return err
	}

	relog.Debug("Shutdown mysqld")
	if err := Shutdown(mysqld, true, MysqlWaitTime); err != nil {
		return err
	}

	relog.Debug("Fetch snapshot")
	if err := mysqld.fetchSnapshot(snapshotManifest, fetchConcurrency, fetchRetryCount); err != nil {
		return err
	}

	relog.Debug("Restart mysqld")
	if err := Start(mysqld, MysqlWaitTime); err != nil {
		return err
	}

	cmdList, err := StartReplicationCommands(mysqld, snapshotManifest.ReplicationState)
	if err != nil {
		return err
	}
	if err := mysqld.executeSuperQueryList(cmdList); err != nil {
		return err
	}

	if !dontWaitForSlaveStart {
		if err := mysqld.WaitForSlaveStart(SlaveStartDeadline); err != nil {
			return err
		}
	}

	h := hook.NewSimpleHook("postflight_restore")
	h.ExtraEnv = hookExtraEnv
	if err := h.ExecuteOptional(); err != nil {
		return err
	}

	return nil
}
Ejemplo n.º 13
0
// Scrap will update the tablet type to 'Scrap', and remove it from
// the serving graph.
//
// 'force' means we are not on the tablet being scrapped, so it is
// probably dead. So if 'force' is true, we will also remove pending
// remote actions.  And if 'force' is false, we also run an optional
// hook.
func Scrap(ctx context.Context, ts topo.Server, tabletAlias topo.TabletAlias, force bool) error {
	tablet, err := ts.GetTablet(ctx, tabletAlias)
	if err != nil {
		return err
	}

	// If you are already scrap, skip updating replication data. It won't
	// be there anyway.
	wasAssigned := tablet.IsAssigned()
	tablet.Type = topo.TYPE_SCRAP
	// Update the tablet first, since that is canonical.
	err = topo.UpdateTablet(ctx, ts, tablet)
	if err != nil {
		return err
	}

	if wasAssigned {
		err = topo.DeleteTabletReplicationData(ctx, ts, tablet.Tablet)
		if err != nil {
			if err == topo.ErrNoNode {
				log.V(6).Infof("no ShardReplication object for cell %v", tablet.Alias.Cell)
				err = nil
			}
			if err != nil {
				log.Warningf("remove replication data for %v failed: %v", tablet.Alias, err)
			}
		}
	}

	// run a hook for final cleanup, only in non-force mode.
	// (force mode executes on the vtctl side, not on the vttablet side)
	if !force {
		hk := hook.NewSimpleHook("postflight_scrap")
		ConfigureTabletHook(hk, tablet.Alias)
		if hookErr := hk.ExecuteOptional(); hookErr != nil {
			// we don't want to return an error, the server
			// is already in bad shape probably.
			log.Warningf("Scrap: postflight_scrap failed: %v", hookErr)
		}
	}

	return nil
}
Ejemplo n.º 14
0
// Make this external, since these transitions need to be forced from time to time.
func ChangeType(ts topo.Server, tabletAlias topo.TabletAlias, newType topo.TabletType, runHooks bool) error {
	tablet, err := ts.GetTablet(tabletAlias)
	if err != nil {
		return err
	}

	if !topo.IsTrivialTypeChange(tablet.Type, newType) || !topo.IsValidTypeChange(tablet.Type, newType) {
		return fmt.Errorf("cannot change tablet type %v -> %v %v", tablet.Type, newType, tabletAlias)
	}

	if runHooks {
		// Only run the preflight_serving_type hook when
		// transitioning from non-serving to serving.
		if !topo.IsServingType(tablet.Type) && topo.IsServingType(newType) {
			if err := hook.NewSimpleHook("preflight_serving_type").ExecuteOptional(); err != nil {
				return err
			}
		}
	}

	tablet.Type = newType
	if newType == topo.TYPE_IDLE {
		if tablet.Parent.Uid == topo.NO_TABLET {
			// With a master the node cannot be set to idle unless we have already removed all of
			// the derived paths. The global replication path is a good indication that this has
			// been resolved.
			children, err := ts.GetReplicationPaths(tablet.Keyspace, tablet.Shard, tablet.ReplicationPath())
			if err != nil && err != topo.ErrNoNode {
				return err
			}
			if err == nil && len(children) > 0 {
				return fmt.Errorf("cannot change tablet type %v -> %v - reparent action has not finished %v", tablet.Type, newType, tabletAlias)
			}
		}
		tablet.Parent = topo.TabletAlias{}
		tablet.Keyspace = ""
		tablet.Shard = ""
		tablet.KeyRange = key.KeyRange{}
	}
	return topo.UpdateTablet(ts, tablet)
}
Ejemplo n.º 15
0
// change a tablet type to RESTORE and set all the other arguments.
// from now on, we can go to:
// - back to IDLE if we don't use the tablet at all (after for instance
//   a successful ReserveForRestore but a failed Snapshot)
// - to SCRAP if something in the process on the target host fails
// - to SPARE if the clone works
func (ta *TabletActor) changeTypeToRestore(tablet, sourceTablet *topo.TabletInfo, parentAlias topo.TabletAlias, keyRange key.KeyRange) error {
	// run the optional preflight_assigned hook
	hk := hook.NewSimpleHook("preflight_assigned")
	configureTabletHook(hk, ta.tabletAlias)
	if err := hk.ExecuteOptional(); err != nil {
		return err
	}

	// change the type
	tablet.Parent = parentAlias
	tablet.Keyspace = sourceTablet.Keyspace
	tablet.Shard = sourceTablet.Shard
	tablet.Type = topo.TYPE_RESTORE
	tablet.KeyRange = keyRange
	tablet.DbNameOverride = sourceTablet.DbNameOverride
	if err := topo.UpdateTablet(ta.ts, tablet); err != nil {
		return err
	}

	// and create the replication graph items
	return topo.CreateTabletReplicationPaths(ta.ts, tablet.Tablet)
}
Ejemplo n.º 16
0
// Make this external, since in needs to be forced from time to time.
func Scrap(ts topo.Server, tabletAlias topo.TabletAlias, force bool) error {
	tablet, err := ts.GetTablet(tabletAlias)
	if err != nil {
		return err
	}

	// If you are already scrap, skip deleting the path. It won't
	// be correct since the Parent will be cleared already.
	wasAssigned := tablet.IsAssigned()
	replicationPath := ""
	if wasAssigned {
		replicationPath = tablet.ReplicationPath()
	}

	tablet.Type = topo.TYPE_SCRAP
	tablet.Parent = topo.TabletAlias{}
	// Update the tablet first, since that is canonical.
	err = topo.UpdateTablet(ts, tablet)
	if err != nil {
		return err
	}

	// Remove any pending actions. Presumably forcing a scrap means you don't
	// want the agent doing anything and the machine requires manual attention.
	if force {
		err := ts.PurgeTabletActions(tabletAlias, ActionNodeCanBePurged)
		if err != nil {
			log.Warningf("purge actions failed: %v", err)
		}
	}

	if wasAssigned {
		err = ts.DeleteReplicationPath(tablet.Keyspace, tablet.Shard, replicationPath)
		if err != nil {
			switch err {
			case topo.ErrNoNode:
				log.V(6).Infof("no replication path: %v", replicationPath)
				err = nil
			case topo.ErrNotEmpty:
				// If you are forcing the scrapping of a master, you can't update the
				// replication graph yet, since other nodes are still under the impression
				// they are slaved to this tablet.
				// If the node was not empty, we can't do anything about it - the replication
				// graph needs to be fixed by reparenting. If the action was forced, assume
				// the user knows best and squelch the error.
				if tablet.Parent.Uid == topo.NO_TABLET && force {
					err = nil
				}
			}
			if err != nil {
				log.Warningf("remove replication path failed: %v %v", replicationPath, err)
			}
		}
	}

	// run a hook for final cleanup, only in non-force mode.
	// (force mode executes on the vtctl side, not on the vttablet side)
	if !force {
		hk := hook.NewSimpleHook("postflight_scrap")
		configureTabletHook(hk, tablet.Alias())
		if hookErr := hk.ExecuteOptional(); hookErr != nil {
			// we don't want to return an error, the server
			// is already in bad shape probably.
			log.Warningf("Scrap: postflight_scrap failed: %v", hookErr)
		}
	}

	return nil
}
Ejemplo n.º 17
0
// CreateMultiSnapshot create snapshots of the data.
// - for a resharding snapshot, keyRanges+keyName+keyType are set,
//   and tables is empty. This action will create multiple snapshots,
//   one per keyRange.
// - for a vertical split, tables is set, keyRanges = [KeyRange{}] and
//   keyName+keyType are empty. It will create a single snapshot of
//   the contents of the tables.
// Note combinations of table subset and keyranges are not supported.
func (mysqld *Mysqld) CreateMultiSnapshot(keyRanges []key.KeyRange, dbName, keyName string, keyType key.KeyspaceIdType, sourceAddr string, allowHierarchicalReplication bool, snapshotConcurrency int, tables []string, skipSlaveRestart bool, maximumFilesize uint64, hookExtraEnv map[string]string) (snapshotManifestFilenames []string, err error) {
	if dbName == "" {
		err = fmt.Errorf("no database name provided")
		return
	}
	if len(tables) > 0 {
		if len(keyRanges) != 1 || keyRanges[0].IsPartial() {
			return nil, fmt.Errorf("With tables specified, can only have one full KeyRange")
		}
	}

	// same logic applies here
	log.Infof("validateCloneSource")
	if err = mysqld.validateCloneSource(false, hookExtraEnv); err != nil {
		return
	}

	// clean out and start fresh
	cloneSourcePaths := make(map[key.KeyRange]string)
	for _, keyRange := range keyRanges {
		cloneSourcePaths[keyRange] = path.Join(mysqld.SnapshotDir, dataDir, dbName+"-"+string(keyRange.Start.Hex())+","+string(keyRange.End.Hex()))
	}
	for _, _path := range cloneSourcePaths {
		if err = os.RemoveAll(_path); err != nil {
			return
		}
		if err = os.MkdirAll(_path, 0775); err != nil {
			return
		}
	}

	mainCloneSourcePath := path.Join(mysqld.SnapshotDir, dataDir, dbName+"-all")
	if err = os.RemoveAll(mainCloneSourcePath); err != nil {
		return
	}
	if err = os.MkdirAll(mainCloneSourcePath, 0775); err != nil {
		return
	}

	// get the schema for each table
	sd, fetchErr := mysqld.GetSchema(dbName, tables, true)
	if fetchErr != nil {
		return []string{}, fetchErr
	}
	if len(sd.TableDefinitions) == 0 {
		return []string{}, fmt.Errorf("empty table list for %v", dbName)
	}
	sd.SortByReverseDataLength()

	// prepareToSnapshot will get the tablet in the rigth state,
	// and return the current mysql status.
	slaveStartRequired, readOnly, replicationPosition, myMasterPosition, masterAddr, err := mysqld.prepareToSnapshot(allowHierarchicalReplication, hookExtraEnv)
	if err != nil {
		return
	}
	if skipSlaveRestart {
		if slaveStartRequired {
			log.Infof("Overriding slaveStartRequired to false")
		}
		slaveStartRequired = false
	}
	defer func() {
		err = replaceError(err, mysqld.restoreAfterSnapshot(slaveStartRequired, readOnly, hookExtraEnv))
	}()

	// dump the files in parallel with a pre-defined concurrency
	datafiles := make([]map[key.KeyRange][]SnapshotFile, len(sd.TableDefinitions))
	dumpTableWorker := func(i int) (err error) {
		table := sd.TableDefinitions[i]
		if table.Type != proto.TABLE_BASE_TABLE {
			// we just skip views here
			return nil
		}
		if len(tables) > 0 {
			sfs, err := mysqld.dumpTableFull(table, dbName, mainCloneSourcePath, cloneSourcePaths[key.KeyRange{}], maximumFilesize)
			if err != nil {
				return err
			}
			datafiles[i] = map[key.KeyRange][]SnapshotFile{
				key.KeyRange{}: sfs,
			}
		} else {
			datafiles[i], err = mysqld.dumpTableSplit(table, dbName, keyName, keyType, mainCloneSourcePath, cloneSourcePaths, maximumFilesize)
		}
		return
	}
	if err = ConcurrentMap(snapshotConcurrency, len(sd.TableDefinitions), dumpTableWorker); err != nil {
		return
	}

	if e := os.Remove(mainCloneSourcePath); e != nil {
		log.Errorf("Cannot remove %v: %v", mainCloneSourcePath, e)
	}

	// Check the replication position after snapshot is done
	// hasn't changed, to be sure we haven't inserted any data
	var newReplicationPosition *proto.ReplicationPosition
	newReplicationPosition, _, err = mysqld.getReplicationPositionForClones(allowHierarchicalReplication)
	if err != nil {
		return
	}
	if newReplicationPosition.MasterLogGroupId != replicationPosition.MasterLogGroupId {
		return nil, fmt.Errorf("replicationPosition position changed during snapshot, from %u to %v", replicationPosition.MasterLogGroupId, newReplicationPosition.MasterLogGroupId)
	}

	// Write all the manifest files
	ssmFiles := make([]string, len(keyRanges))
	for i, kr := range keyRanges {
		krDatafiles := make([]SnapshotFile, 0, len(datafiles))
		for _, m := range datafiles {
			krDatafiles = append(krDatafiles, m[kr]...)
		}
		ssm, err := NewSplitSnapshotManifest(sourceAddr, mysqld.IpAddr(),
			masterAddr, dbName, krDatafiles, replicationPosition,
			myMasterPosition, kr, sd)
		if err != nil {
			return nil, err
		}
		ssmFiles[i] = path.Join(cloneSourcePaths[kr], partialSnapshotManifestFile)
		if err = writeJson(ssmFiles[i], ssm); err != nil {
			return nil, err
		}
	}

	// Call the (optional) hook to send the files somewhere else
	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	for _, kr := range keyRanges {
		wg.Add(1)
		go func(kr key.KeyRange) {
			defer wg.Done()
			h := hook.NewSimpleHook("copy_snapshot_to_storage")
			h.ExtraEnv = make(map[string]string)
			for k, v := range hookExtraEnv {
				h.ExtraEnv[k] = v
			}
			h.ExtraEnv["KEYRANGE"] = fmt.Sprintf("%v-%v", kr.Start.Hex(), kr.End.Hex())
			h.ExtraEnv["SNAPSHOT_PATH"] = cloneSourcePaths[kr]
			rec.RecordError(h.ExecuteOptional())
		}(kr)
	}
	wg.Wait()
	if rec.HasErrors() {
		return nil, err
	}

	// Return all the URLs for the MANIFESTs
	snapshotURLPaths := make([]string, len(keyRanges))
	for i := 0; i < len(keyRanges); i++ {
		relative, err := filepath.Rel(mysqld.SnapshotDir, ssmFiles[i])
		if err != nil {
			return nil, err
		}
		snapshotURLPaths[i] = path.Join(SnapshotURLPath, relative)
	}
	return snapshotURLPaths, nil
}
Ejemplo n.º 18
0
// Shutdown will stop the mysqld daemon that is running in the background.
//
// waitForMysqld: should the function block until mysqld has stopped?
// This can actually take a *long* time if the buffer cache needs to be fully
// flushed - on the order of 20-30 minutes.
//
// If a mysqlctld address is provided in a flag, Shutdown will run remotely.
func (mysqld *Mysqld) Shutdown(ctx context.Context, waitForMysqld bool) error {
	log.Infof("Mysqld.Shutdown")

	// Execute as remote action on mysqlctld if requested.
	if *socketFile != "" {
		log.Infof("executing Mysqld.Shutdown() remotely via mysqlctld server: %v", *socketFile)
		client, err := mysqlctlclient.New("unix", *socketFile)
		if err != nil {
			return fmt.Errorf("can't dial mysqlctld: %v", err)
		}
		defer client.Close()
		return client.Shutdown(ctx, waitForMysqld)
	}

	// We're shutting down on purpose. We no longer want to be notified when
	// mysqld terminates.
	mysqld.mutex.Lock()
	if mysqld.cancelWaitCmd != nil {
		close(mysqld.cancelWaitCmd)
		mysqld.cancelWaitCmd = nil
	}
	mysqld.mutex.Unlock()

	// possibly mysql is already shutdown, check for a few files first
	_, socketPathErr := os.Stat(mysqld.config.SocketFile)
	_, pidPathErr := os.Stat(mysqld.config.PidFile)
	if socketPathErr != nil && pidPathErr != nil {
		log.Warningf("assuming mysqld already shut down - no socket, no pid file found")
		return nil
	}

	// try the mysqld shutdown hook, if any
	h := hook.NewSimpleHook("mysqld_shutdown")
	hr := h.Execute()
	switch hr.ExitStatus {
	case hook.HOOK_SUCCESS:
		// hook exists and worked, we can keep going
	case hook.HOOK_DOES_NOT_EXIST:
		// hook doesn't exist, try mysqladmin
		log.Infof("No mysqld_shutdown hook, running mysqladmin directly")
		dir, err := vtenv.VtMysqlRoot()
		if err != nil {
			return err
		}
		name := path.Join(dir, "bin/mysqladmin")
		arg := []string{
			"-u", "vt_dba", "-S", mysqld.config.SocketFile,
			"shutdown"}
		env := []string{
			os.ExpandEnv("LD_LIBRARY_PATH=$VT_MYSQL_ROOT/lib/mysql"),
		}
		_, err = execCmd(name, arg, env, dir)
		if err != nil {
			return err
		}
	default:
		// hook failed, we report error
		return fmt.Errorf("mysqld_shutdown hook failed: %v", hr.String())
	}

	// Wait for mysqld to really stop. Use the sock file as a
	// proxy for that since we can't call wait() in a process we
	// didn't start.
	if waitForMysqld {
		for {
			select {
			case <-ctx.Done():
				return errors.New("gave up waiting for mysqld to stop")
			default:
			}

			_, statErr := os.Stat(mysqld.config.SocketFile)
			if statErr != nil && os.IsNotExist(statErr) {
				return nil
			}
			log.Infof("Mysqld.Shutdown: sleeping for 1s waiting for socket file %v", mysqld.config.SocketFile)
			time.Sleep(time.Second)
		}
	}
	return nil
}
Ejemplo n.º 19
0
// Start will start the mysql daemon, either by running the 'mysqld_start'
// hook, or by running mysqld_safe in the background.
// If a mysqlctld address is provided in a flag, Start will run remotely.
func (mysqld *Mysqld) Start(ctx context.Context) error {
	// Execute as remote action on mysqlctld if requested.
	if *socketFile != "" {
		log.Infof("executing Mysqld.Start() remotely via mysqlctld server: %v", *socketFile)
		client, err := mysqlctlclient.New("unix", *socketFile)
		if err != nil {
			return fmt.Errorf("can't dial mysqlctld: %v", err)
		}
		defer client.Close()
		return client.Start(ctx)
	}

	var name string
	ts := fmt.Sprintf("Mysqld.Start(%v)", time.Now().Unix())

	// try the mysqld start hook, if any
	switch hr := hook.NewSimpleHook("mysqld_start").Execute(); hr.ExitStatus {
	case hook.HOOK_SUCCESS:
		// hook exists and worked, we can keep going
		name = "mysqld_start hook"
	case hook.HOOK_DOES_NOT_EXIST:
		// hook doesn't exist, run mysqld_safe ourselves
		log.Infof("%v: No mysqld_start hook, running mysqld_safe directly", ts)
		dir, err := vtenv.VtMysqlRoot()
		if err != nil {
			return err
		}
		name = path.Join(dir, "bin/mysqld_safe")
		arg := []string{
			"--defaults-file=" + mysqld.config.path}
		env := []string{os.ExpandEnv("LD_LIBRARY_PATH=$VT_MYSQL_ROOT/lib/mysql")}

		cmd := exec.Command(name, arg...)
		cmd.Dir = dir
		cmd.Env = env
		log.Infof("%v %#v", ts, cmd)
		stderr, err := cmd.StderrPipe()
		if err != nil {
			return nil
		}
		stdout, err := cmd.StdoutPipe()
		if err != nil {
			return nil
		}
		go func() {
			scanner := bufio.NewScanner(stderr)
			for scanner.Scan() {
				log.Infof("%v stderr: %v", ts, scanner.Text())
			}
		}()
		go func() {
			scanner := bufio.NewScanner(stdout)
			for scanner.Scan() {
				log.Infof("%v stdout: %v", ts, scanner.Text())
			}
		}()
		err = cmd.Start()
		if err != nil {
			return nil
		}

		mysqld.mutex.Lock()
		mysqld.cancelWaitCmd = make(chan struct{})
		go func(cancel <-chan struct{}) {
			// Wait regardless of cancel, so we don't generate defunct processes.
			err := cmd.Wait()
			log.Infof("%v exit: %v", ts, err)

			// The process exited. Trigger OnTerm callbacks, unless we were cancelled.
			select {
			case <-cancel:
			default:
				mysqld.mutex.Lock()
				for _, callback := range mysqld.onTermFuncs {
					go callback()
				}
				mysqld.mutex.Unlock()
			}
		}(mysqld.cancelWaitCmd)
		mysqld.mutex.Unlock()
	default:
		// hook failed, we report error
		return fmt.Errorf("mysqld_start hook failed: %v", hr.String())
	}

	// give it some time to succeed - usually by the time the socket emerges
	// we are in good shape
	for {
		select {
		case <-ctx.Done():
			return errors.New(name + ": deadline exceeded waiting for " + mysqld.config.SocketFile)
		default:
		}

		_, statErr := os.Stat(mysqld.config.SocketFile)
		if statErr == nil {
			// Make sure the socket file isn't stale.
			conn, connErr := mysqld.dbaPool.Get(0)
			if connErr == nil {
				conn.Recycle()
				return nil
			}
		} else if !os.IsNotExist(statErr) {
			return statErr
		}
		log.Infof("%v: sleeping for 1s waiting for socket file %v", ts, mysqld.config.SocketFile)
		time.Sleep(time.Second)
	}
}
Ejemplo n.º 20
0
func (ta *TabletActor) multiRestore(actionNode *actionnode.ActionNode) (err error) {
	args := actionNode.Args.(*actionnode.MultiRestoreArgs)

	// read our current tablet, verify its state
	// we only support restoring to the master or active replicas
	tablet, err := ta.ts.GetTablet(ta.tabletAlias)
	if err != nil {
		return err
	}
	if tablet.Type != topo.TYPE_MASTER && !topo.IsSlaveType(tablet.Type) {
		return fmt.Errorf("expected master, or slave type, not %v: %v", tablet.Type, ta.tabletAlias)
	}

	// get source tablets addresses
	sourceAddrs := make([]*url.URL, len(args.SrcTabletAliases))
	keyRanges := make([]key.KeyRange, len(args.SrcTabletAliases))
	fromStoragePaths := make([]string, len(args.SrcTabletAliases))
	for i, alias := range args.SrcTabletAliases {
		t, e := ta.ts.GetTablet(alias)
		if e != nil {
			return e
		}
		sourceAddrs[i] = &url.URL{
			Host: t.Addr(),
			Path: "/" + t.DbName(),
		}
		keyRanges[i], e = key.KeyRangesOverlap(tablet.KeyRange, t.KeyRange)
		if e != nil {
			return e
		}
		fromStoragePaths[i] = path.Join(ta.mysqld.SnapshotDir, "from-storage", fmt.Sprintf("from-%v-%v", keyRanges[i].Start.Hex(), keyRanges[i].End.Hex()))
	}

	// change type to restore, no change to replication graph
	originalType := tablet.Type
	tablet.Type = topo.TYPE_RESTORE
	err = topo.UpdateTablet(ta.ts, tablet)
	if err != nil {
		return err
	}

	// first try to get the data from a remote storage
	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	for i, alias := range args.SrcTabletAliases {
		wg.Add(1)
		go func(i int, alias topo.TabletAlias) {
			defer wg.Done()
			h := hook.NewSimpleHook("copy_snapshot_from_storage")
			h.ExtraEnv = make(map[string]string)
			for k, v := range ta.hookExtraEnv() {
				h.ExtraEnv[k] = v
			}
			h.ExtraEnv["KEYRANGE"] = fmt.Sprintf("%v-%v", keyRanges[i].Start.Hex(), keyRanges[i].End.Hex())
			h.ExtraEnv["SNAPSHOT_PATH"] = fromStoragePaths[i]
			h.ExtraEnv["SOURCE_TABLET_ALIAS"] = alias.String()
			hr := h.Execute()
			if hr.ExitStatus != hook.HOOK_SUCCESS {
				rec.RecordError(fmt.Errorf("%v hook failed(%v): %v", h.Name, hr.ExitStatus, hr.Stderr))
			}
		}(i, alias)
	}
	wg.Wait()

	// run the action, scrap if it fails
	if rec.HasErrors() {
		log.Infof("Got errors trying to get snapshots from storage, trying to get them from original tablets: %v", rec.Error())
		err = ta.mysqld.MultiRestore(tablet.DbName(), keyRanges, sourceAddrs, nil, args.Concurrency, args.FetchConcurrency, args.InsertTableConcurrency, args.FetchRetryCount, args.Strategy)
	} else {
		log.Infof("Got snapshots from storage, reading them from disk directly")
		err = ta.mysqld.MultiRestore(tablet.DbName(), keyRanges, nil, fromStoragePaths, args.Concurrency, args.FetchConcurrency, args.InsertTableConcurrency, args.FetchRetryCount, args.Strategy)
	}
	if err != nil {
		if e := topotools.Scrap(ta.ts, ta.tabletAlias, false); e != nil {
			log.Errorf("Failed to Scrap after failed RestoreFromMultiSnapshot: %v", e)
		}
		return err
	}

	// restore type back
	tablet.Type = originalType
	return topo.UpdateTablet(ta.ts, tablet)
}
Ejemplo n.º 21
0
// Start will start the mysql daemon, either by running the 'mysqld_start'
// hook, or by running mysqld_safe in the background.
func (mysqld *Mysqld) Start(mysqlWaitTime time.Duration) error {
	var name string
	ts := fmt.Sprintf("Mysqld.Start(%v)", time.Now().Unix())

	// try the mysqld start hook, if any
	switch hr := hook.NewSimpleHook("mysqld_start").Execute(); hr.ExitStatus {
	case hook.HOOK_SUCCESS:
		// hook exists and worked, we can keep going
		name = "mysqld_start hook"
	case hook.HOOK_DOES_NOT_EXIST:
		// hook doesn't exist, run mysqld_safe ourselves
		log.Infof("%v: No mysqld_start hook, running mysqld_safe directly", ts)
		dir, err := vtenv.VtMysqlRoot()
		if err != nil {
			return err
		}
		name = path.Join(dir, "bin/mysqld_safe")
		arg := []string{
			"--defaults-file=" + mysqld.config.path}
		env := []string{os.ExpandEnv("LD_LIBRARY_PATH=$VT_MYSQL_ROOT/lib/mysql")}

		cmd := exec.Command(name, arg...)
		cmd.Dir = dir
		cmd.Env = env
		log.Infof("%v mysqlWaitTime:%v %#v", ts, mysqlWaitTime, cmd)
		stderr, err := cmd.StderrPipe()
		if err != nil {
			return nil
		}
		stdout, err := cmd.StdoutPipe()
		if err != nil {
			return nil
		}
		go func() {
			scanner := bufio.NewScanner(stderr)
			for scanner.Scan() {
				log.Infof("%v stderr: %v", ts, scanner.Text())
			}
		}()
		go func() {
			scanner := bufio.NewScanner(stdout)
			for scanner.Scan() {
				log.Infof("%v stdout: %v", ts, scanner.Text())
			}
		}()
		err = cmd.Start()
		if err != nil {
			return nil
		}

		mysqld.done = make(chan struct{})
		go func(done chan<- struct{}) {
			// wait so we don't get a bunch of defunct processes
			err := cmd.Wait()
			log.Infof("%v exit: %v", ts, err)
			close(done)
		}(mysqld.done)
	default:
		// hook failed, we report error
		return fmt.Errorf("mysqld_start hook failed: %v", hr.String())
	}

	// give it some time to succeed - usually by the time the socket emerges
	// we are in good shape
	for i := mysqlWaitTime; i >= 0; i -= time.Second {
		_, statErr := os.Stat(mysqld.config.SocketFile)
		if statErr == nil {
			// Make sure the socket file isn't stale.
			conn, connErr := mysqld.dbaPool.Get(0)
			if connErr == nil {
				conn.Recycle()
				return nil
			}
		} else if !os.IsNotExist(statErr) {
			return statErr
		}
		log.Infof("%v: sleeping for 1s waiting for socket file %v", ts, mysqld.config.SocketFile)
		time.Sleep(time.Second)
	}
	return errors.New(name + ": deadline exceeded waiting for " + mysqld.config.SocketFile)
}
Ejemplo n.º 22
0
// Start will start the mysql daemon, either by running the 'mysqld_start'
// hook, or by running mysqld_safe in the background.
// If a mysqlctld address is provided in a flag, Start will run remotely.
func (mysqld *Mysqld) Start(ctx context.Context) error {
	// Execute as remote action on mysqlctld if requested.
	if *socketFile != "" {
		log.Infof("executing Mysqld.Start() remotely via mysqlctld server: %v", *socketFile)
		client, err := mysqlctlclient.New("unix", *socketFile)
		if err != nil {
			return fmt.Errorf("can't dial mysqlctld: %v", err)
		}
		defer client.Close()
		return client.Start(ctx)
	}

	var name string
	ts := fmt.Sprintf("Mysqld.Start(%v)", time.Now().Unix())

	// try the mysqld start hook, if any
	switch hr := hook.NewSimpleHook("mysqld_start").Execute(); hr.ExitStatus {
	case hook.HOOK_SUCCESS:
		// hook exists and worked, we can keep going
		name = "mysqld_start hook"
	case hook.HOOK_DOES_NOT_EXIST:
		// hook doesn't exist, run mysqld_safe ourselves
		log.Infof("%v: No mysqld_start hook, running mysqld_safe directly", ts)
		dir, err := vtenv.VtMysqlRoot()
		if err != nil {
			return err
		}
		name, err = binaryPath(dir, "mysqld_safe")
		if err != nil {
			return err
		}
		arg := []string{
			"--defaults-file=" + mysqld.config.path}
		env := []string{os.ExpandEnv("LD_LIBRARY_PATH=$VT_MYSQL_ROOT/lib/mysql")}

		cmd := exec.Command(name, arg...)
		cmd.Dir = dir
		cmd.Env = env
		log.Infof("%v %#v", ts, cmd)
		stderr, err := cmd.StderrPipe()
		if err != nil {
			return nil
		}
		stdout, err := cmd.StdoutPipe()
		if err != nil {
			return nil
		}
		go func() {
			scanner := bufio.NewScanner(stderr)
			for scanner.Scan() {
				log.Infof("%v stderr: %v", ts, scanner.Text())
			}
		}()
		go func() {
			scanner := bufio.NewScanner(stdout)
			for scanner.Scan() {
				log.Infof("%v stdout: %v", ts, scanner.Text())
			}
		}()
		err = cmd.Start()
		if err != nil {
			return nil
		}

		mysqld.mutex.Lock()
		mysqld.cancelWaitCmd = make(chan struct{})
		go func(cancel <-chan struct{}) {
			// Wait regardless of cancel, so we don't generate defunct processes.
			err := cmd.Wait()
			log.Infof("%v exit: %v", ts, err)

			// The process exited. Trigger OnTerm callbacks, unless we were cancelled.
			select {
			case <-cancel:
			default:
				mysqld.mutex.Lock()
				for _, callback := range mysqld.onTermFuncs {
					go callback()
				}
				mysqld.mutex.Unlock()
			}
		}(mysqld.cancelWaitCmd)
		mysqld.mutex.Unlock()
	default:
		// hook failed, we report error
		return fmt.Errorf("mysqld_start hook failed: %v", hr.String())
	}

	return mysqld.Wait(ctx)
}
Ejemplo n.º 23
0
func Init(mt *Mysqld, mysqlWaitTime time.Duration) error {
	log.Infof("mysqlctl.Init")
	err := mt.createDirs()
	if err != nil {
		log.Errorf("%s", err.Error())
		return err
	}
	root, err := vtenv.VtRoot()
	if err != nil {
		log.Errorf("%s", err.Error())
		return err
	}

	hr := hook.NewSimpleHook("make_mycnf").Execute()

	configData := ""
	if hr.ExitStatus == hook.HOOK_DOES_NOT_EXIST {
		log.Infof("make_mycnf hook doesn't exist")
		cnfTemplatePaths := []string{
			path.Join(root, "config/mycnf/default.cnf"),
			path.Join(root, "config/mycnf/master.cnf"),
			path.Join(root, "config/mycnf/replica.cnf"),
		}

		if extraCnf := os.Getenv("EXTRA_MY_CNF"); extraCnf != "" {
			parts := strings.Split(extraCnf, ":")
			cnfTemplatePaths = append(cnfTemplatePaths, parts...)
		}

		configData, err = MakeMycnf(mt.config, cnfTemplatePaths)
	} else if hr.ExitStatus == hook.HOOK_SUCCESS {
		configData, err = fillMycnfTemplate(mt.config, hr.Stdout)
	} else {
		err = fmt.Errorf("make_mycnf hook failed(%v): %v", hr.ExitStatus, hr.Stderr)
	}

	if err == nil {
		err = ioutil.WriteFile(mt.config.path, []byte(configData), 0664)
	}
	if err != nil {
		log.Errorf("failed creating %v: %v", mt.config.path, err)
		return err
	}

	dbTbzPath := path.Join(root, "data/bootstrap/mysql-db-dir.tbz")
	log.Infof("decompress bootstrap db %v", dbTbzPath)
	args := []string{"-xj", "-C", mt.config.DataDir, "-f", dbTbzPath}
	_, tarErr := execCmd("tar", args, []string{}, "")
	if tarErr != nil {
		log.Errorf("failed unpacking %v: %v", dbTbzPath, tarErr)
		return tarErr
	}
	if err = Start(mt, mysqlWaitTime); err != nil {
		log.Errorf("failed starting, check %v", mt.config.ErrorLogPath)
		return err
	}
	schemaPath := path.Join(root, "data/bootstrap/_vt_schema.sql")
	schema, err := ioutil.ReadFile(schemaPath)
	if err != nil {
		return err
	}

	sqlCmds := make([]string, 0, 10)
	log.Infof("initial schema: %v", string(schema))
	for _, cmd := range strings.Split(string(schema), ";") {
		cmd = strings.TrimSpace(cmd)
		if cmd == "" {
			continue
		}
		sqlCmds = append(sqlCmds, cmd)
	}

	return mt.executeSuperQueryList(sqlCmds)
}